thirdtry

Author	ADL
Submission date	2019-09-20 23:56:57.809841
Rating	5150
Matches played	224
Win rate	52.68
Use rpsrunner.py to play unranked matches on your computer.
Source code:

import random as rnd
import math


prob0=[1./3.,1./3.,1./3.]
prob=prob0
prob1=[prob,prob,prob]
prob11=[prob1,prob1,prob1]
# Convert the input and the output to numbers
#
convdic = {'R':0,'P':1,'S':2}
# The inverse function as a list
invconv=['R','P','S']

#  beats[X] returns the move that beats X
beats=[1,2,0]
isbeatenby=[2,0,1]


if input=='':
    inhistory=[]
    outhistory=[]
else:
    inhistory.append(convdic.get(input))
    outhistory.append(convdic.get(output))

score=[0,0,0]
lastrounds = 6
maxmax=0.
unc=[math.sqrt(1./6.)]
gainlist=[[0.,math.sqrt(1./6.)]]
alarmlevel = 0.
norm=1.

def decision(inh,outh):
    global prob, prob1, prob11, pl, gainlist, maxmax, unc

    def win(a,b):
        if a==beats[b]:
            return 1
        elif b==beats[a]:
            return -1
        elif b==a:
            return 0

    pl=len(inh)+1
    if pl<3:
        return rnd.randint(0,2)

    count=[0,0,0]
    count1=[0,0,0]
    count11=[0,0,0]
    current1=inh[pl-2] # the last play of the op
    current11=outh[pl-2] # the last play of the bot
    for n,k in enumerate(inh[:-1]): 
        count[k]+= 1 # just count the previous occurrences, disregarding correlations
        if k==current1:
            nextpl=inh[n+1]
            count1[nextpl]+= 1 #counts the occurrences of a play, 
            #given that the previous play of the op equals the current play
            if current11==outh[n]:
                count11[nextpl]+= 1
        

    count[current1]+= 1 # adds the last play to the unconditional count
        
    totcount=sum(count)
    prob=[(count[j]+1.)/(totcount+3.) for j in range(3)]
    expectedgain=[prob[(i-1)%3]-prob[(i+1)%3] for i in range(3)]
    # we are using the fact that in the order RPS each value beats the former; may need rewriting for other games, like 
    # RPS-Spock-Lizard
    uncertgain = [math.sqrt((prob[(i-1)%3]+prob[(i+1)%3]-(prob[(i-1)%3]-prob[(i+1)%3])**2)/(totcount+4.)) for i in range(3)]
    # the uncertainty on the expected gain
    maxlist=[i for i,j in enumerate(expectedgain) if j==max(expectedgain)]
    # There may be more than one move giving the same expected gain. 
    # If this happens, choose the one with least variance. 
    selectunc=[uncertgain[i] for i in maxlist]
    selectmaxlist=[maxlist[i] for i,j in enumerate(selectunc) if j==min(selectunc)]
    move=selectmaxlist[rnd.randint(0,len(selectmaxlist)-1)]
# The next move based on the unconditional freq,. of previous plays by the opp.
    
    totcount1=sum(count1)
    prob1[current1]=[(count1[j]+1.)/(totcount1+3.) for j in range(3)]
    expectedgain1=[prob1[current1][0]*win(i,0)+prob1[current1][1]*win(i,1)+prob1[current1][2]*win(i,2) for i in range(3)]
    uncertgain1 = [math.sqrt((prob1[current1][(i-1)%3]+prob1[current1][(i+1)%3]-(prob1[current1][(i-1)%3]-prob1[current1][(i+1)%3])**2)/(totcount1+4)) for i in range(3)]
    # the uncertainty on the expected gain

    maxlist1=[i for i,j in enumerate(expectedgain1) if j==max(expectedgain1)]
    selectunc1=[uncertgain1[i] for i in maxlist1]
    selectmaxlist1=[maxlist1[i] for i,j in enumerate(selectunc1) if j==min(selectunc1)]
    move1=selectmaxlist1[rnd.randint(0,len(selectmaxlist1)-1)]
 
    totcount11=sum(count11)
    prob11[current1][current11]=[(count11[j]+1.)/(totcount11+3.) for j in range(3)]
    expectedgain11=[prob11[current1][current11][0]*win(i,0)+prob11[current1][current11][1]*win(i,1)+prob11[current1][current11][2]*win(i,2) for i in range(3)]
    uncertgain11 = [math.sqrt((prob11[current1][current11][(i-1)%3]+prob11[current1][current11][(i+1)%3]-(prob11[current1][current11][(i-1)%3]-prob11[current1][current11][(i+1)%3])**2)/(totcount11+4)) for i in range(3)]
    maxlist11=[i for i,j in enumerate(expectedgain11) if j==max(expectedgain11)]
    selectunc11=[uncertgain11[i] for i in maxlist11]
    selectmaxlist11=[maxlist11[i] for i,j in enumerate(selectunc11) if j==min(selectunc11)]
    move11=selectmaxlist11[rnd.randint(0,len(selectmaxlist11)-1)]

    movelist=[move,move1,move11]
    shortlist=[max(expectedgain),max(expectedgain1),max(expectedgain11)]
    shortlistunc=[min(selectunc),min(selectunc1),min(selectunc11)]
    maxmax=max(shortlist)
    maxpos=[i for i,j in enumerate(shortlist) if j==maxmax]
#    print(maxpos)
    unc=[shortlistunc[i] for i in maxpos]
    shortshort=[j for i,j in enumerate(maxpos) if unc[i]==min(unc)]
    return movelist[shortshort[0]]
    
      
naivedecision = decision(inhistory,outhistory)
gainlist.append([maxmax,min(unc)]) 
if pl>lastrounds+1:
#    print maxmax, unc
    norm=sum([1./a[1]**2 for a in gainlist[-lastrounds:]])
    lastexpgain=sum([a[0]/a[1]**2 for a in gainlist[-lastrounds:]])/norm
    tolerance=math.sqrt(sum([a[1]**2 for a in gainlist[-lastrounds:]]))
    actualgain=sum([(1+outhistory[-1-i]-inhistory[-1-i])%3-1 for i in range(lastrounds)])
    alarmlevel = (lastexpgain-actualgain)/tolerance        
if alarmlevel>1:
    output=invconv[beats[beats[naivedecision]]]
else: 
    output= invconv[naivedecision]