freq_bot

Authorjlubea
Submission date2019-11-15 19:27:40.466664
Rating7515
Matches played212
Win rate74.06

Use rpsrunner.py to play unranked matches on your computer.

Source code:

# Hybrid of two strong players with a first attempt at a markov strategy switcher
# 

if input == "":
    import random
    import math
    import operator
    from operator import add
    from operator import truediv
    from operator import mul
    from collections import defaultdict
    from itertools import combinations

    rps = ['R', 'P', 'S']
    beatBy = {'R':'P', 'P':'S', 'S':'R'}
    values = { 'R': 0, 'P': 1, 'S': 2 }
    chars = { 0: 'R', 1: 'P', 2: 'S' }
    output = random.choice(rps)

    def choice(arr, count, p):
        result = [0] * count
        accum = p[:]
        for i in range(1, len(accum)):
            accum[i] += accum[i - 1]

        for c in range(count):
            r = random.randrange(0, 100001) / 100000.0
            for i in range(0, len(accum)):
                if r <= accum[i]:
                    result[c] = arr[i]
                    break;
            
        return result

    # def flatten(l):
    #     flat_list = []
    #     for sublist in l:
    #         for item in sublist:
    #             flat_list.append(item)
    #     return flat_list

    def normalize(counts):
        c = counts[:]
        total = sum(counts)
        if total == 0:
            c = [1] * len(c)
            total = sum(c)
        c = list(map(truediv, c, [total * 1.0] * len(c)))
        return c

    def rotate(l, by):
        return l[by:] + l[:by]

    def distance(l1, l2):
        sum = 0.0
        for i in range(len(l1)):
            diff = l1[i] - l2[i]
            diff2 = diff * diff
            sum += diff2
        return math.sqrt(sum)

    def squash_to_bounds(counts, bounds, default):
        maximum = max(counts)
        minimum = min(counts)

        rb = bounds[1] - bounds[0]
        diff = float(maximum - minimum)

        s = counts[:]
        for i in range(len(s)):
            s[i] = (default if (diff == 0) else (rb * (counts[i] - minimum)) / diff + bounds[0])
        return s

    class ScoreAccumulator:
        def __init__(self, actions):
            self.actions = actions
            self.scores = [0] * len(self.actions)
            self.scores_for_cond_hist = [0] * len(self.actions)
            self.obs_count = 0
            self.modified_count = 0
        
        def update(self, me, opp):
            self.obs_count += 1

            if opp == me:
                return
            elif beatBy[opp] == me:
                self.scores[values[me]] += 1
                self.scores_for_cond_hist[values[beatBy[me]]] += 1
                self.modified_count += 1
            elif beatBy[me] == opp:
                # self.scores[values[me]] -= 1
                self.scores_for_cond_hist[values[beatBy[beatBy[me]]]] -= 1
                self.modified_count += 1
            
            # print "scores, obs ", self.scores, self.obs_count

        def cond_hist(self):
            conditional_probs = squash_to_bounds(self.scores_for_cond_hist, [-1.0, 1.0], 0)
            # print "cond_hist ", conditional_probs, self.scores_for_cond_hist, self.scores
            return conditional_probs

        def wins_hist(self):
            conditional_probs = squash_to_bounds(self.scores, [-1.0, 1.0], 0)
            return conditional_probs

        def select_max_action(self):
            confidence = self.cond_hist()
            
            # print "scores ", self.scores
            # print "cond_hist ", self.cond_hist()
            # print "confidence ", confidence

            selected_action = random.choice([i for i,c in enumerate(confidence) if max(confidence) == c])
            return self.actions[selected_action].evaluate()

        def clone(self):
            c = OppRewardCounter([0, 0, 0])
            c.scores = self.scores[:]
            c.obs_count = self.obs_count[:]
            c.modified_count = self.modified_count[:]
            return c

        def __repr__(self):
            # return self.cond_hist().__repr__()
            return self.scores.__repr__()

    class Rock:
        def reset(self, actions):
            return self

        def evaluate(self):
            return rps[0]

        def learn(self, me, opp):
            return

        def clone(self):
            return self

        def cond_hist(self, me = None):
            return [0, -1, 1]

    class Paper:
        def reset(self, actions):
            return self

        def evaluate(self):
            return rps[1]

        def learn(self, me, opp):
            return

        def clone(self):
            return self

        def cond_hist(self, me = None):
            return [1, 0, -1]

    class Scissors:
        def reset(self, actions):
            return self

        def evaluate(self):
            return rps[2]

        def learn(self, me, opp):
            return

        def clone(self):
            return self

        def cond_hist(self, me = None):
            return [-1, 1, 0]

    class Freq:
        def __init__(self, N, actions):
            self.N = N
            self.actions = actions

            self.history = []
            self.accumulators = {}
            self.rewards = None
            self.total_obs = 0

            self.empty_acc = ScoreAccumulator(self.actions)

        def evaluate(self):
            if (len(self.history) >= self.N):
                key = tuple(self.history[-self.N:])
                if not (key in self.accumulators):
                    self.accumulators.update({key: ScoreAccumulator(self.actions)})
                    e = random.choice(self.actions)
                else:
                    prediction = self.actions[values[self.accumulators[key].select_max_action()]]
                    e = self.actions[values[beatBy[beatBy[prediction.evaluate()]]]]
                    # print key, self.accumulators[key].cond_hist(), self.accumulators[key].scores, self.accumulators[key].scores_for_cond_hist
            else:
                e = random.choice(self.actions)

            self.last_action = action = e.evaluate()
            # print "playing action ", action

            return action

        def learn(self, me, opp):
            # print "actual ", me, opp
            self.rewards = None
            self.history.append(opp)

            if (len(self.history) > self.N):
                key = tuple(self.history[-self.N-1:-1])
                if not (key in self.accumulators):
                    self.accumulators.update({key: ScoreAccumulator(self.actions)})

                self.total_obs += 1
                # print me, opp
                self.accumulators[key].update(me, opp)
                # print self.accumulators
                # print key, self.evaluate()

            return

        def clone(self):
            c = Freq(self.N, self.actions)
            c.history = self.history[:]
            for key in self.accumulators.keys():
                c.accumulators[key] = self.accumulators[key].clone()
            c.total_obs = self.total_obs
            return c

        def cond_hist(self, me = None):
            action = self.evaluate()
            return self.actions[values[action]].cond_hist()

        def __repr__(self):
            return self.accumulators.__repr__()


    class Bayes:
        name = "Bayes13-Modified"

        def __init__(self, lookback):
            self.score = {'RR': 0, 'PP': 0, 'SS': 0, 'PR': 1, 'RS': 1, 'SP': 1,'RP': -1, 'SR': -1, 'PS': -1,}
            self.cscore = {'RR': 'r', 'PP': 'r', 'SS': 'r', 'PR': 'b', 'RS': 'b', 'SP': 'b','RP': 'c', 'SR': 'c', 'PS': 'c',}
            self.beat = {'P': 'S', 'S': 'R', 'R': 'P'}
            self.cede = {'P': 'R', 'S': 'P', 'R': 'S'}

            self.lookback = lookback

            self.reset()

        def reset(self):
            self.played_probs = defaultdict(lambda: 1)
            self.opp_probs = defaultdict(lambda: defaultdict(lambda: 1))
            self.my_probs = defaultdict(lambda: defaultdict(lambda: 1))
            self.both_probs = defaultdict(lambda: defaultdict(lambda: 1))

            self.singleopp_opp_probs = defaultdict(lambda: defaultdict(lambda: 1))
            self.singleopp_my_probs = defaultdict(lambda: defaultdict(lambda: 1))
            self.singleopp_both_probs = defaultdict(lambda: defaultdict(lambda: 1))

            self.singlemy_opp_probs = defaultdict(lambda: defaultdict(lambda: 1))
            self.singlemy_my_probs = defaultdict(lambda: defaultdict(lambda: 1))
            self.singlemy_both_probs = defaultdict(lambda: defaultdict(lambda: 1))

            self.opp2_probs = defaultdict(lambda: defaultdict(lambda: 1))
            self.my2_probs = defaultdict(lambda: defaultdict(lambda: 1))
            self.both2_probs = defaultdict(lambda: defaultdict(lambda: 1))

            self.singleopp_opp2_probs = defaultdict(lambda: defaultdict(lambda: 1))
            self.singleopp_my2_probs = defaultdict(lambda: defaultdict(lambda: 1))
            self.singleopp_both2_probs = defaultdict(lambda: defaultdict(lambda: 1))

            self.singlemy_opp2_probs = defaultdict(lambda: defaultdict(lambda: 1))
            self.singlemy_my2_probs = defaultdict(lambda: defaultdict(lambda: 1))
            self.singlemy_both2_probs = defaultdict(lambda: defaultdict(lambda: 1))

            self.win_probs = defaultdict(lambda: 1)
            self.lose_probs = defaultdict(lambda: 1)
            self.tie_probs = defaultdict(lambda: 1)

            self.opp_answers = {'c': 1, 'b': 1, 'r': 1}
            self.my_answers = {'c': 1, 'b': 1, 'r': 1}

            self.opp2_answers = {'c': 1, 'b': 1, 'r': 1}
            self.my2_answers = {'c': 1, 'b': 1, 'r': 1}  

            self.singleopp_opp_answers = {'c': 1, 'b': 1, 'r': 1}
            self.singleopp_my_answers = {'c': 1, 'b': 1, 'r': 1}

            self.singleopp_opp2_answers = {'c': 1, 'b': 1, 'r': 1}
            self.singleopp_my2_answers = {'c': 1, 'b': 1, 'r': 1}  

            self.singlemy_opp_answers = {'c': 1, 'b': 1, 'r': 1}
            self.singlemy_my_answers = {'c': 1, 'b': 1, 'r': 1}

            self.singlemy_opp2_answers = {'c': 1, 'b': 1, 'r': 1}
            self.singlemy_my2_answers = {'c': 1, 'b': 1, 'r': 1}

            self.patterndict = defaultdict(str)
            self.patterndict2 = defaultdict(str)
            self.opppatterndict = defaultdict(str)
            self.opppatterndict2 = defaultdict(str)
            self.mypatterndict = defaultdict(str)
            self.mypatterndict2 = defaultdict(str)

            self.csu = [0] * 6 # consecutive strategy usage
            self.csc = []  # consecutive strategy candidates

            output = random.choice(["R", "P", "S"])
            self.hist = "" 
            self.myhist = "" 
            self.opphist = "" 

            self.my = self.opp = self.my2 = self.opp2 =  ""
            self.singleopp_my = self.singleopp_opp = self.singleopp_my2 = self.singleopp_opp2 = ""
            self.singlemy_my = self.singlemy_opp = self.singlemy_my2 = self.singlemy_opp2 = ""

            self.sc = 0
            self.opp_strats = []

        def counter_prob(self, probs):
            weighted_list = []
            for h in ['R', 'P', 'S']:
                weighted = 0
                for p in probs.keys():
                    points = self.score[h+p]
                    prob = probs[p]
                    weighted += points * prob
                    weighted_list.append((h, weighted))

            return max(weighted_list, key=operator.itemgetter(1))[0]

        def learn(self, output, input):
            self.previous_opp_strats = self.opp_strats[:]
            self.previous_sc = self.sc

            self.sc = self.score[output + input]
            for i, c in enumerate(self.csc):
                if c == input:
                    self.csu[i] += 1
                else:
                    self.csu[i] = 0

            m = max(self.csu)
            self.opp_strats = [i for i, c in enumerate(self.csc) if self.csu[i] == m]
            
            if self.previous_sc == 1:
                for s1 in self.previous_opp_strats:
                    for s2 in self.opp_strats:
                        self.win_probs[chr(s1)+chr(s2)] += 1
            
            if self.previous_sc == 0:
                for s1 in self.previous_opp_strats:
                    for s2 in self.opp_strats:
                        self.tie_probs[chr(s1)+chr(s2)] += 1

            if self.previous_sc == -1:
                for s1 in self.previous_opp_strats:
                    for s2 in self.opp_strats:
                        self.lose_probs[chr(s1)+chr(s2)] += 1

            if self.my and self.opp:
                self.opp_answers[self.cscore[input+self.opp]] += 1
                self.my_answers[self.cscore[input+self.my]] += 1
            if self.my2 and self.opp2:
                self.opp2_answers[self.cscore[input+self.opp2]] += 1
                self.my2_answers[self.cscore[input+self.my2]] += 1

            if self.singleopp_my and self.singleopp_opp:
                self.singleopp_opp_answers[self.cscore[input+self.singleopp_opp]] += 1
                self.singleopp_my_answers[self.cscore[input+self.singleopp_my]] += 1
            if self.singleopp_my2 and self.singleopp_opp2:
                self.singleopp_opp2_answers[self.cscore[input+self.singleopp_opp2]] += 1
                self.singleopp_my2_answers[self.cscore[input+self.singleopp_my2]] += 1

            if self.singlemy_my and self.singlemy_opp:
                self.singlemy_opp_answers[self.cscore[input+self.singlemy_opp]] += 1
                self.singlemy_my_answers[self.cscore[input+self.singlemy_my]] += 1
            if self.singlemy_my2 and self.singlemy_opp2:
                self.singlemy_opp2_answers[self.cscore[input+self.singlemy_opp2]] += 1
                self.singlemy_my2_answers[self.cscore[input+self.singlemy_my2]] += 1

            for length in range(min(self.lookback, len(self.hist)), 0, -2):
                pattern = self.patterndict[self.hist[-length:]]
                if pattern:
                    for length2 in range(min(self.lookback, len(pattern)), 0, -2):
                        self.patterndict2[pattern[-length2:]] += output + input
                self.patterndict[self.hist[-length:]] += output + input

            # singleopp
            for length in range(min(self.lookback / 2, len(self.opphist)), 0, -1):
                pattern = self.opppatterndict[self.opphist[-length:]]
                if pattern:
                    for length2 in range(min(self.lookback, len(pattern)), 0, -2):
                        self.opppatterndict2[pattern[-length2:]] += output + input
                self.opppatterndict[self.opphist[-length:]] += output + input

            # singlemy
            for length in range(min(self.lookback / 2, len(self.myhist)), 0, -1):
                pattern = self.mypatterndict[self.myhist[-length:]]
                if pattern:
                    for length2 in range(min(self.lookback, len(pattern)), 0, -2):
                        self.mypatterndict2[pattern[-length2:]] += output + input
                self.mypatterndict[self.myhist[-length:]] += output + input

            self.played_probs[input] += 1
            self.opp_probs[self.opp][input] += 1
            self.my_probs[self.my][input] += 1
            self.both_probs[self.my+self.opp][input] += 1

            self.opp2_probs[self.opp2][input] += 1
            self.my2_probs[self.my2][input] += 1
            self.both2_probs[self.my2+self.opp2][input] += 1

            self.hist += output + input
            self.myhist += output
            self.opphist += input

            self.my = self.opp = self.my2 = self.opp2 = ""
            self.singleopp_my = self.singleopp_opp = self.singleopp_my2 = self.singleopp_opp2 = ""
            self.singlemy_my = self.singlemy_opp = self.singlemy_my2 = self.singlemy_opp2 = ""

            for length in range(min(self.lookback, len(self.hist)), 0, -2):
                pattern = self.patterndict[self.hist[-length:]]
                if pattern != "":
                    self.my = pattern[-2]
                    self.opp = pattern[-1]
                    for length2 in range(min(self.lookback, len(pattern)), 0, -2):
                        pattern2 = self.patterndict2[pattern[-length2:]]
                        if pattern2 != "":
                            self.my2 = pattern2[-2]
                            self.opp2 = pattern2[-1]
                            break
                    break

            # singleopp
            for length in range(min(self.lookback / 2, len(self.opphist)), 0, -1):
                pattern = self.opppatterndict[self.opphist[-length:]]
                if pattern != "":
                    self.singleopp_my = pattern[-2]
                    self.singleopp_opp = pattern[-1]
                    for length2 in range(min(self.lookback, len(pattern)), 0, -2):
                        pattern2 = self.opppatterndict2[pattern[-length2:]]
                        if pattern2 != "":
                            self.singleopp_my2 = pattern2[-2]
                            self.singleopp_opp2 = pattern2[-1]
                            break
                    break

            # singlemy
            for length in range(min(self.lookback / 2, len(self.myhist)), 0, -1):
                pattern = self.mypatterndict[self.myhist[-length:]]
                if pattern != "":
                    self.singlemy_my = pattern[-2]
                    self.singlemy_opp = pattern[-1]
                    for length2 in range(min(self.lookback, len(pattern)), 0, -2):
                        pattern2 = self.mypatterndict2[pattern[-length2:]]
                        if pattern2 != "":
                            self.singlemy_my2 = pattern2[-2]
                            self.singlemy_opp2 = pattern2[-1]
                            break
                    break

        def evaluate(self):
            probs = {}
            for hand in rps:
                probs[hand] = self.played_probs[hand]
                
            if self.my and self.opp:
                for hand in rps:
                    probs[hand] *= self.opp_probs[self.opp][hand] * self.my_probs[self.my][hand] * self.both_probs[self.my+self.opp][hand]
                    probs[hand] *= self.opp_answers[self.cscore[hand+self.opp]] * self.my_answers[self.cscore[hand+self.my]]

                self.csc = [self.opp, self.beat[self.opp], self.cede[self.opp], self.my, self.cede[self.my], self.beat[self.my]]

                strats_for_hand = {'R': [], 'P': [], 'S': []}
                for i, c in enumerate(self.csc):
                    strats_for_hand[c].append(i)

                if self.sc == 1:
                    pr = self.win_probs
                if self.sc == 0:
                    pr = self.tie_probs
                if self.sc == -1:
                    pr = self.lose_probs

                for hand in rps:
                    for s1 in self.opp_strats:
                        for s2 in strats_for_hand[hand]:
                            probs[hand] *= pr[chr(s1)+chr(s2)]
            else:
                self.csc = []

            if self.singleopp_my and self.singleopp_opp:
                for hand in rps:
                    probs[hand] *= self.singleopp_opp_probs[self.singleopp_opp][hand] * \
                                    self.singleopp_my_probs[self.singleopp_my][hand] * \
                                    self.singleopp_both_probs[self.singleopp_my+self.singleopp_opp][hand]
                    probs[hand] *= self.singleopp_opp_answers[self.cscore[hand+self.singleopp_opp]] * self.singleopp_my_answers[self.cscore[hand+self.singleopp_my]]

            if self.singlemy_my and self.singlemy_opp:
                for hand in rps:
                    probs[hand] *= self.singlemy_opp_probs[self.singlemy_opp][hand] * \
                                    self.singlemy_my_probs[self.singlemy_my][hand] * \
                                    self.singlemy_both_probs[self.singlemy_my+self.singlemy_opp][hand]
                    probs[hand] *= self.singlemy_opp_answers[self.cscore[hand+self.singlemy_opp]] * self.singlemy_my_answers[self.cscore[hand+self.singlemy_my]]
                        
            if self.my2 and self.opp2:
                for hand in rps:
                    probs[hand] *= self.opp2_probs[self.opp2][hand] * self.my2_probs[self.my2][hand] * self.both2_probs[self.my2+self.opp2][hand]
                    probs[hand] *= self.opp2_answers[self.cscore[hand+self.opp2]] * self.my2_answers[self.cscore[hand+self.my2]]

            if self.singleopp_my2 and self.singleopp_opp2:
                for hand in rps:
                    probs[hand] *= self.singleopp_opp2_probs[self.singleopp_opp2][hand] *\
                                    self.singleopp_my2_probs[self.singleopp_my2][hand] *\
                                    self.singleopp_both2_probs[self.singleopp_my2+self.singleopp_opp2][hand]
                    probs[hand] *= self.singleopp_opp2_answers[self.cscore[hand+self.singleopp_opp2]] * \
                                    self.singleopp_my2_answers[self.cscore[hand+self.singleopp_my2]]

            if self.singlemy_my2 and self.singlemy_opp2:
                for hand in rps:
                    probs[hand] *= self.singlemy_opp2_probs[self.singlemy_opp2][hand] *\
                                    self.singlemy_my2_probs[self.singlemy_my2][hand] *\
                                    self.singlemy_both2_probs[self.singlemy_my2+self.singlemy_opp2][hand]
                    probs[hand] *= self.singlemy_opp2_answers[self.cscore[hand+self.singlemy_opp2]] * \
                                    self.singlemy_my2_answers[self.cscore[hand+self.singlemy_my2]]
            
            output = self.counter_prob(probs)
            return output

    class Bayes13Core:
        def __init__(self):
            self.model = Bayes(10)
            self.payoff_accumulator = ScoreAccumulator(actions)

        def evaluate(self):
            return self.model.evaluate()

        def learn(self, me, opp):
            self.payoff_accumulator.update(me, opp)
            self.model.learn(me, opp)

        def clone(self):
            # don't actually clone since it's expensive
            return self

        def cond_hist(self, me = None):
            return self.payoff_accumulator.cond_hist()

    class MarkovCore:
        def __init__(self, max_markov_order):
            self.debug = 0

            self.USE_RANDOM = 1
            self.USE_MARKOW = 1

            self.LAST_ROUND = 1000
            self.ROUND = 1

            self.max_markov_order = max_markov_order

            self.history = []
            self.moves = ["R","P","S"]
            self.beatedBy = {"R":"P", "P":"S", "S":"R"}
            self.result = {"R":{"R":0, "P":-1, "S":1}, "P":{"R":1, "P":0, "S":-1}, "S":{"R":-1, "P":1, "S":0}}

            self.alpha = 0.01
            self.M = 0

            if self.USE_RANDOM == 1:
                self.M += 1

            if self.USE_MARKOW == 1:
                self.markov_orders = []
                for m in range(0, max_markov_order + 1):
                    self.markov_orders.append(m)
                self.historyCount = {}
                self.M += 6 * len(self.markov_orders)


            self.weight = [1] * self.M
            self.decay = [0.85] * self.M

            self.score = [0] * self.M
            self.selected = [0] * self.M
            self.move = [random.choice(self.moves) for i in range(self.M)]

            self.last = output

        def selectBest(self, s):
            return random.choice([i for i in range(len(s)) if max(s) == s[i]])

        def selectBestDict(self, s):
            ew = {i:s[self.beatedBy[self.beatedBy[i]]] - s[self.beatedBy[i]] for i in s.keys()};
            return random.choice([i for i in ew.keys() if max(ew.values()) == ew[i]])

        def learn(self, output, input):
            if not input:
                return

            self.ROUND += 1
            self.history += [(self.last,input)]
            self.score = [ self.decay[i] * self.score[i] + self.weight[i] * self.result[self.move[i]][input] for i in range(self.M)]
            self.weight = [ self.weight[i] + self.alpha * self.result[self.move[i]][input] for i in range(self.M)]

            index = 0
            # random optimal
            if self.USE_RANDOM == 1:
                self.move[index] = random.choice(self.moves)
                # adjust random optimal score to zero
                self.score[index] = 0
                index += 1

            first_meta_index = index

            if self.USE_MARKOW == 1:
                # markow with meta strategies
                for m in self.markov_orders:
                    if len(self.history) > m:
                        key = tuple(self.history[-m-1:-1])
                        if not (key in self.historyCount):
                            self.historyCount[key] = [{"R":0,"P":0,"S":0},{"R":0,"P":0,"S":0}]
                        self.historyCount[key][0][self.history[-1][0]] += 1
                        self.historyCount[key][1][self.history[-1][1]] += 1

                for m in self.markov_orders:
                    if len(self.history) >= m:
                        key = tuple(self.history[-m:])
                        if key in self.historyCount:
                            self.move[index]   = self.selectBestDict(self.historyCount[key][0])
                            self.move[index+3] = self.selectBestDict(self.historyCount[key][1])
                        else:
                            self.move[index]   = random.choice(self.moves)
                            self.move[index+3] = random.choice(self.moves)
                    else:
                        self.move[index]   = random.choice(self.moves)
                        self.move[index+3] = random.choice(self.moves)
                    index += 6

            # set other meta strategies
            for i in range(first_meta_index, self.M, 3):
                self.move[i+1] = self.beatedBy[self.move[i]]
                self.move[i+2] = self.beatedBy[self.move[i+1]]

        def evaluate(self):
            best = self.selectBest(self.score)
            self.selected[best] += 1
            output = self.move[best]
            self.last = output

            if self.debug == 1:
                if self.ROUND == self.LAST_ROUND:
                    print "score =", self.score
                    print "selected =", self.selected            
            
            return output

        def clone(self):
            return MarkovCore(self.max_markov_order)

    class Beats:
        def __init__(self, model, inner_name, depth = 0):
            self.model = model
            self.depth = depth
            self.inner_name = inner_name
        
        def evaluate(self):
            predicted_action = self.model.evaluate()
            action = beatBy[predicted_action]
            return action
        
        def learn(self, me, opp):
            self.model.learn(me, opp)
        
        def clone(self):
            return Beats(self.model.clone(), self.inner_name, self.depth)

        def cond_hist(self, me = None):
            payoff = self.model.cond_hist()
            payoff = rotate(payoff, 1)
            return payoff

    class Strategy:
        def __init__(self, name, myIndex, strategy_universe, universe_size, actions, inner_model):
            self.name = name
            self.myIndex = myIndex
            self.strategy_universe = strategy_universe
            self.universe_size = universe_size
            self.actions = actions
            self.model = inner_model
            self.beaten_by = None

        def evaluate(self):
            action = self.model.evaluate()
            # print "pred ", predicted_action, " played ", action
            return action
        
        def learn(self, me, opp):
            self.model.learn(me, opp)

        def cond_hist(self, me = None):
            return self.model.cond_hist()

        def find_strategy(self, target_name):
            for s in self.strategy_universe:
                if s.name == target_name:
                    # print "found ", s.name
                    return s
            return None

        def find_beat_by(self):
            target_name = "B(" + self.name + ")"
            s = self.find_strategy(target_name)
            if s:
                return s
            
            depth = 1
            if (isinstance(self.model, Beats)):
                depth = self.model.depth

            s = Strategy(
                "B(" + self.name + ")",
                (self.myIndex + self.universe_size) % (self.universe_size * 3),
                self.strategy_universe,
                self.universe_size,
                self.actions,
                Beats(self.model.clone(), self.name, depth + 1)
            )

            # print "new ", s.name
            return s

        def get_beat_by(self):
            if (isinstance(self.model, Beats)):
                if (self.model.depth + 1) % len(self.actions) == 0:
                    return self.find_strategy(self.model.inner_name)
            return self.find_beat_by()

        def beatenBy(self):
            if self.beaten_by:
                return self.beaten_by

            self.beaten_by = beaten_by = self.get_beat_by()

            return beaten_by

        def clone(self):
            c = Strategy(self.name, self.myIndex, self.strategy_universe, self.universe_size, self.actions, self.model.clone())
            return c

        def __hash__(self):
            # NOTE: in the online learning version the pd can change, so we can't use it in the hash result
            # return self.pd_hash
            return self.myIndex

        def __eq__(self, other):
            return self.myIndex == other.myIndex
        
        def __ne__(self, other):
            return not self.__eq__(other)

        def __repr__(self):
            return self.name

    class FreqStrategySelector:
        def __init__(self, strategy_universe):
            self.debug = 0

            self.ROUND = 1
            self.LAST_ROUND = 1000

            self.strategy_universe = strategy_universe
            self.N = len(self.strategy_universe)
            self.last_action = [None] * self.N
            
            self.score = [0] * self.N
            for i in range(self.N):
                scalar = (self.N-1 - i) / float(self.N)
                self.score[i] = scalar * scalar * scalar

            # print self.score

            self.weight = [1] * self.N
            self.decay = [0.85] * self.N
            self.selected = [0] * self.N
            # self.alpha = 0.005
            self.alpha = 0.009
            # print self.alpha

        def learn(self, me, opp):
            self.ROUND += 1

            if self.last_action[0]:
                # update scores and weights
                self.score = [ self.decay[i] * self.score[i] + self.weight[i] * (1 if self.last_action[i] == beatBy[opp] else -1 if beatBy[self.last_action[i]] == opp else 0) for i in range(self.N)]
                self.weight = [ self.weight[i] + self.alpha * (1 if self.last_action[i] == beatBy[opp] else -1 if beatBy[self.last_action[i]] == opp else 0) for i in range(self.N)]

            for i,s in enumerate(self.strategy_universe):
                s.learn(me, opp)
                self.last_action[i] = s.evaluate()
        
        def evaluate(self):
            # # try pd
            # pd = normalize(self.score)
            # sel = choice([i for i in range(len(self.strategy_universe))], 1, pd)[0]
            
            # try max
            sel = random.choice([i for i in range(len(self.strategy_universe)) if max(self.score) == self.score[i]])

            self.selected[sel] += 1

            if self.debug == 1:
                if self.ROUND == self.LAST_ROUND:
                    print "score ", self.score
                    print "weight ", self.weight
                    print "selected ", self.selected
            return self.last_action[sel]

    actions = [Rock(), Paper(), Scissors()]
    for a in actions:
        a.reset(actions)

    strategy_universe = []

    opponent_models = [
        ("M", MarkovCore(6)),
        # ("B", Bayes13Core()),
        ("F10", Freq(10, actions)),
        ("F9", Freq(9, actions)),
        ("F8", Freq(8, actions)),
        ("F7", Freq(7, actions)),
        ("F6", Freq(6, actions)),
        ("F5", Freq(5, actions)),
        ("F4", Freq(4, actions)),
        ("F3", Freq(3, actions)),
        ("F2", Freq(2, actions)),
        ("F1", Freq(1, actions)),
    ]

    universe_size = len(opponent_models) * 3

    length = len(strategy_universe)
    for i,model in enumerate(opponent_models):
        strategy_universe.append(Strategy(model[0], i, None, universe_size, actions, model[1]))

    length = len(strategy_universe)
    for i,model in enumerate(opponent_models):
        strategy_universe.append(Strategy("B(" + model[0] + ")", length + i, None, universe_size, actions, Beats(model[1].clone(), model[0], 1)))

    length = len(strategy_universe)
    for i,model in enumerate(opponent_models):
        strategy_universe.append(Strategy("B(B(" + model[0] + "))", length + i, None, universe_size, actions, Beats(Beats(model[1].clone(), model[0], 1), model[0], 2)))

    for s in strategy_universe:
        s.strategy_universe = strategy_universe

    strategy_set = FreqStrategySelector(strategy_universe)

else:
    # print "actual ", input, output

    strategy_set.learn(output, input)
    output = strategy_set.evaluate()