# ThompsonSampler

 Author Ethan Submission date 2019-03-24 00:30:22.387233 Rating 4397 Matches played 214 Win rate 45.33

Use rpsrunner.py to play unranked matches on your computer.

## Source code:

``````#Uses Thompson Sampling: https://arxiv.org/pdf/1707.02038.pdf
#   Treat action as next throw with parameters of beta distribution mapped to opponent's past n moves

import random

moves = {"R": 0, "P": 1, "S": 2}
moves_inv = {0: "R", 1: "P", 2: "S"}

#(opponent, bot move) = +1(win) / -1(loss)
winner = {(0,0): 0, (0, 1): 1, (0,2): -1, (1, 0): -1, (1,1): 0, (1,2): 1, (2,0): 1, (2,1): -1, (2,2): 0}

def initialize_beta_parameters(num_past_moves):
if num_past_moves == 0:
#indexed by `moves` ... entry: [alpha1, beta1, alpha2, beta2] ... parameters of two beta distributions
#alpha1 = # of wins & beta1 = # of loss/ties
#alpha2 = # of losses & beta2 = # of ties
return [[1.0,2.0,1.0,1.0], [1.0,2.0,1.0,1.0], [1.0,2.0,1.0,1.0]]
else:
#create tree where arr[a][b][c] encodes result after move a,b,c
return [initialize_beta_parameters(num_past_moves - 1)] * 3

def approx_beta(x, alpha, beta):
if x < 0.0 or x > 1.0:
return 0.0
return x**(alpha - 1.0) * (1.0 - x)**(beta - 1.0)

#Sample from beta distribution using Metropolis-Hastings Algorithm
def sample_beta(alpha, beta):
x = random.random()
n = 100
for _ in range(0, n):
cand = random.random()

beta_x = approx_beta(x, alpha, beta)
beta_cand = approx_beta(cand, alpha, beta)
if beta_cand >= beta_x:
x = cand
else:
acceptance = random.random()
ratio = beta_cand / beta_x
if acceptance <= ratio:
x = cand
return x

def reward(theta1, theta2):
#theta1 is probability of winning
#(1 - theta1) * theta2 is probability of losing
#1 - theta1 - (1 - theta1) * theta2 is probability of a tie
return 1.0 * theta1 + -5.0 * (1.0 - theta1) * theta2

class Bot:
def __init__(self, num_past_moves):
self.p = initialize_beta_parameters(num_past_moves)
self.num_past_moves = num_past_moves
self.past_moves = []

def next_move(self):
#Begin training if there are enough past moves
if len(self.past_moves) < self.num_past_moves:
return random.choice(["R", "P", "S"])

#Unwrap the parameters by performing array retrievals
parameters = self.p
for move in self.past_moves:
parameters = parameters[move]

theta_R1 = sample_beta(parameters[0][0], parameters[0][1])
theta_R2 = sample_beta(parameters[0][2], parameters[0][3])

theta_P1 = sample_beta(parameters[1][0], parameters[1][1])
theta_P2 = sample_beta(parameters[1][2], parameters[1][3])

theta_S1 = sample_beta(parameters[2][0], parameters[2][1])
theta_S2 = sample_beta(parameters[2][2], parameters[2][3])

expected_reward = [reward(theta_R1, theta_R2), reward(theta_P1, theta_P2), reward(theta_S1, theta_S2)]
max_r = 0
for r in range(0, 3):
if expected_reward[r] > expected_reward[max_r]:
max_r = r

#arbitrarily break ties
return moves_inv[max_r]

#Moves are using the numerical representation not string representation
def update(self, opponent_move, bot_move):
if len(self.past_moves) == self.num_past_moves:
parameters = self.p
for move in self.past_moves:
parameters = parameters[move]

win = winner[(opponent_move, bot_move)]
if win == 1:
#Update parameters with win
parameters[bot_move][0] += 1
elif win == 0:
#Update parameters with tie
parameters[bot_move][1] += 1
parameters[bot_move][3] += 1
elif win == -1:
#Update parameters with loss
parameters[bot_move][1] += 1
parameters[bot_move][2] += 1

#Update past_moves
self.past_moves.pop(0)
self.past_moves.append(opponent_move)
else:
self.past_moves.append(opponent_move)

if input == "":
bot1 = Bot(4)
output = str(bot1.next_move())
else:
bot1.update(moves[str(input)], moves[str(output)])
output = str(bot1.next_move())``````