QLearner2

Author	Ethan
Submission date	2019-03-14 23:47:50.246317
Rating	4611
Matches played	251
Win rate	47.01
Use rpsrunner.py to play unranked matches on your computer.
Source code:

import random

moves = {"R": 0, "P": 1, "S": 2}
moves_inv = {0: "R", 1: "P", 2: "S"}

#(opponent, bot move) = reward
reward = {(0,0): 0, (0, 1): 1, (0,2): -1, (1, 0): -1, (1,1): 0, (1,2): 1, (2,0): 1, (2,1): -1, (2,2): 0}

def zero_array(shape):
    result = []
    for _ in range(0, shape[0]):
        if len(shape) == 1:
            result.append(0)
        else:
            result.append(zero_array(shape[1:]))
    return result

class Bot:
    def __init__(self, alpha, gamma, num_past_moves):
        self.past_moves = []
        #Q[a][b][c][d] ... Q for action d with last 3 moves a,b,c of opponent
        self.Q = zero_array([3] * (num_past_moves + 1))
        self.alpha = alpha
        self.gamma = gamma
        self.num_past_moves = num_past_moves

    def next_move(self):
        #Do random move for first couple of moves
        if len(self.past_moves) < self.num_past_moves:
            return moves_inv[random.randint(0, 2)]
        
        #Pick action with best Q given past couple of moves of opponent
        current_Q = self.Q
        for prev_move in self.past_moves:
            current_Q = current_Q[prev_move]
        
        max_a = 0
        ties = [max_a]
        for a in range(0, 3):
            if current_Q[a] > current_Q[max_a]:
                max_a = a
                ties = [max_a]
            elif current_Q[a] == current_Q[max_a]:
                ties.append(a)

        #Pick random move in event of tie
        return moves_inv[random.choice(ties)]

    #Update takes the numerical version of the move ... not the string
    def update(self, opponent_move, current_move):
        if len(self.past_moves) == self.num_past_moves:    
            reward_outcome = reward[(opponent_move, current_move)]

            #update Q
            current_Q = self.Q
            for prev_move in self.past_moves:
                current_Q = current_Q[prev_move]
            current_Q[current_move] = (1 - self.alpha) * current_Q[current_move] + self.alpha * (reward_outcome + self.gamma * max(current_Q))
            #Pop off first item so that this move will be added to list of past moves
            self.past_moves.pop(0)

        #update previous moves list
        self.past_moves.append(opponent_move)
        
        
bot1 = Bot(0.5, 0.99, 5)

last_move = bot1.next_move()
if not(input == ""):
    bot1.update(moves[input], moves[last_move])

output = bot1.next_move()
last_move = output