ThompsonSampler

AuthorEthan
Submission date2019-03-24 00:30:22.387233
Rating4504
Matches played241
Win rate45.64

Use rpsrunner.py to play unranked matches on your computer.

Source code:

#Uses Thompson Sampling: https://arxiv.org/pdf/1707.02038.pdf
#   Treat action as next throw with parameters of beta distribution mapped to opponent's past n moves

import random

moves = {"R": 0, "P": 1, "S": 2}
moves_inv = {0: "R", 1: "P", 2: "S"}

#(opponent, bot move) = +1(win) / -1(loss)
winner = {(0,0): 0, (0, 1): 1, (0,2): -1, (1, 0): -1, (1,1): 0, (1,2): 1, (2,0): 1, (2,1): -1, (2,2): 0}

def initialize_beta_parameters(num_past_moves):
    if num_past_moves == 0:
        #indexed by `moves` ... entry: [alpha1, beta1, alpha2, beta2] ... parameters of two beta distributions
            #alpha1 = # of wins & beta1 = # of loss/ties
            #alpha2 = # of losses & beta2 = # of ties
        return [[1.0,2.0,1.0,1.0], [1.0,2.0,1.0,1.0], [1.0,2.0,1.0,1.0]]
    else:
        #create tree where arr[a][b][c] encodes result after move a,b,c
        return [initialize_beta_parameters(num_past_moves - 1)] * 3

def approx_beta(x, alpha, beta):
    if x < 0.0 or x > 1.0:
        return 0.0
    return x**(alpha - 1.0) * (1.0 - x)**(beta - 1.0) 

#Sample from beta distribution using Metropolis-Hastings Algorithm
def sample_beta(alpha, beta):
    x = random.random()
    n = 100
    for _ in range(0, n):
        cand = random.random()

        beta_x = approx_beta(x, alpha, beta)
        beta_cand = approx_beta(cand, alpha, beta)
        if beta_cand >= beta_x:
            x = cand
        else:
            acceptance = random.random()
            ratio = beta_cand / beta_x
            if acceptance <= ratio:
                x = cand
    return x


def reward(theta1, theta2):
    #theta1 is probability of winning
    #(1 - theta1) * theta2 is probability of losing
    #1 - theta1 - (1 - theta1) * theta2 is probability of a tie
    return 1.0 * theta1 + -5.0 * (1.0 - theta1) * theta2

class Bot:
    def __init__(self, num_past_moves):
        self.p = initialize_beta_parameters(num_past_moves)
        self.num_past_moves = num_past_moves
        self.past_moves = []

    def next_move(self):
        #Begin training if there are enough past moves
        if len(self.past_moves) < self.num_past_moves:
            return random.choice(["R", "P", "S"])

        #Unwrap the parameters by performing array retrievals 
        parameters = self.p
        for move in self.past_moves:
            parameters = parameters[move]

        theta_R1 = sample_beta(parameters[0][0], parameters[0][1])
        theta_R2 = sample_beta(parameters[0][2], parameters[0][3])
        
        theta_P1 = sample_beta(parameters[1][0], parameters[1][1])
        theta_P2 = sample_beta(parameters[1][2], parameters[1][3])

        theta_S1 = sample_beta(parameters[2][0], parameters[2][1])
        theta_S2 = sample_beta(parameters[2][2], parameters[2][3])

        expected_reward = [reward(theta_R1, theta_R2), reward(theta_P1, theta_P2), reward(theta_S1, theta_S2)]
        max_r = 0
        for r in range(0, 3):
            if expected_reward[r] > expected_reward[max_r]:
                max_r = r
        
        #arbitrarily break ties
        return moves_inv[max_r]
    
    #Moves are using the numerical representation not string representation
    def update(self, opponent_move, bot_move):
        if len(self.past_moves) == self.num_past_moves:    
            parameters = self.p
            for move in self.past_moves:
                parameters = parameters[move]
            
            win = winner[(opponent_move, bot_move)]
            if win == 1:
                #Update parameters with win
                parameters[bot_move][0] += 1
            elif win == 0:
                #Update parameters with tie
                parameters[bot_move][1] += 1
                parameters[bot_move][3] += 1
            elif win == -1:
                #Update parameters with loss
                parameters[bot_move][1] += 1
                parameters[bot_move][2] += 1

            #Update past_moves
            self.past_moves.pop(0)
            self.past_moves.append(opponent_move)
        else:
            self.past_moves.append(opponent_move)

if input == "":
    bot1 = Bot(4)
    output = str(bot1.next_move())
else:
    bot1.update(moves[str(input)], moves[str(output)])
    output = str(bot1.next_move())