# QLearner3

 Author Ethan Submission date 2019-03-15 00:50:20.148263 Rating 7107 Matches played 212 Win rate 72.64

Use rpsrunner.py to play unranked matches on your computer.

## Source code:

``````import random

moves = {"R": 0, "P": 1, "S": 2}
moves_inv = {0: "R", 1: "P", 2: "S"}

#(opponent, bot move) = reward
reward = {(0,0): 0, (0, 1): 1, (0,2): -1, (1, 0): -1, (1,1): 0, (1,2): 1, (2,0): 1, (2,1): -1, (2,2): 0}

def zero_array(shape):
result = []
for _ in range(0, shape):
if len(shape) == 1:
result.append(0)
else:
result.append(zero_array(shape[1:]))
return result

class Bot:
def __init__(self, alpha, gamma, num_past_moves):
self.past_moves = []
#Q[a][b][c][d] ... Q for action d with last 3 moves a,b,c of opponent
self.Q = zero_array( * (num_past_moves + 1))
self.alpha = alpha
self.gamma = gamma
self.num_past_moves = num_past_moves

def next_move(self):
#Do random move for first couple of moves
if len(self.past_moves) < self.num_past_moves:
return moves_inv[random.randint(0, 2)]

#Pick action with best Q given past couple of moves of opponent
current_Q = self.Q
for prev_move in self.past_moves:
current_Q = current_Q[prev_move]

max_a = 0
ties = [max_a]
for a in range(0, 3):
if current_Q[a] > current_Q[max_a]:
max_a = a
ties = [max_a]
elif current_Q[a] == current_Q[max_a]:
ties.append(a)

#Pick random move in event of tie
return moves_inv[random.choice(ties)]

#Update takes the numerical version of the move ... not the string
def update(self, opponent_move, current_move):
if len(self.past_moves) == self.num_past_moves:
reward_outcome = reward[(opponent_move, current_move)]

#update Q
current_Q = self.Q
for prev_move in self.past_moves:
current_Q = current_Q[prev_move]
current_Q[current_move] = (1 - self.alpha) * current_Q[current_move] + self.alpha * (reward_outcome + self.gamma * max(current_Q))
#Pop off first item so that this move will be added to list of past moves
self.past_moves.pop(0)

#update previous moves list
self.past_moves.append(opponent_move)

if input == "":
bot1 = Bot(0.5, 0.99, 5)
output = str(bot1.next_move())
else:
bot1.update(moves[str(input)], moves[str(output)])
output = str(bot1.next_move())``````