combo_markovselector_impr

This program has been disqualified.


Authorjlubea
Submission date2019-11-13 21:03:00.685142
Rating6699
Matches played23
Win rate73.91

Source code:

# Hybrid of two strong players with a first attempt at a markov strategy switcher
# 

if input == "":
    import random
    import operator
    from operator import add
    from operator import truediv
    from collections import defaultdict

    rps = ['R', 'P', 'S']
    fondness = {'R':[0,-1,0], 'P':[0,0,-1], 'S':[-1,0,0]}
    regrets = {'R':[0,0,1], 'P':[1,0,0], 'S':[0,1,0]}
    beats = {'R':'P', 'P':'S', 'S':'R'}
    values = { 'R': 0, 'P': 1, 'S': 2 }

    output = random.choice(rps)

    class MarkovPredictor:
        def __init__(self, max_markov_order):
            self.debug = 0

            self.USE_RANDOM = 1
            self.USE_MARKOW = 1

            self.LAST_ROUND = 1000
            self.ROUND = 1

            self.history = []
            self.moves = ["R","P","S"]
            self.beatedBy = {"R":"P", "P":"S", "S":"R"}
            self.result = {"R":{"R":0, "P":-1, "S":1}, "P":{"R":1, "P":0, "S":-1}, "S":{"R":-1, "P":1, "S":0}}

            self.alpha = 0.01
            self.M = 0

            if self.USE_RANDOM == 1:
                self.M += 1

            if self.USE_MARKOW == 1:
                self.markov_orders = []
                for m in range(0, max_markov_order + 1):
                    self.markov_orders.append(m)
                self.historyCount = {}
                self.M += 6 * len(self.markov_orders)


            self.weight = [1] * self.M
            self.decay = [0.85] * self.M

            self.score = [0] * self.M
            self.selected = [0] * self.M
            self.move = [random.choice(self.moves) for i in range(self.M)]

            self.last = output

        def selectBest(self, s):
            return random.choice([i for i in range(len(s)) if max(s) == s[i]])

        def selectBestDict(self, s):
            ew = {i:s[self.beatedBy[self.beatedBy[i]]] - s[self.beatedBy[i]] for i in s.keys()};
            return random.choice([i for i in ew.keys() if max(ew.values()) == ew[i]])

        def learn(self, output, input):
            if not input:
                return

            self.ROUND += 1
            self.history += [(self.last,input)]
            self.score = [ self.decay[i] * self.score[i] + self.weight[i] * self.result[self.move[i]][input] for i in range(self.M)]
            self.weight = [ self.weight[i] + self.alpha * self.result[self.move[i]][input] for i in range(self.M)]

            index = 0
            # random optimal
            if self.USE_RANDOM == 1:
                self.move[index] = random.choice(self.moves)
                # adjust random optimal score to zero
                self.score[index] = 0
                index += 1

            first_meta_index = index

            if self.USE_MARKOW == 1:
                # markow with meta strategies
                for m in self.markov_orders:
                    if len(self.history) > m:
                        key = tuple(self.history[-m-1:-1])
                        if not (key in self.historyCount):
                            self.historyCount[key] = [{"R":0,"P":0,"S":0},{"R":0,"P":0,"S":0}]
                            self.historyCount[key][0][self.history[-1][0]] += 1
                            self.historyCount[key][1][self.history[-1][1]] += 1

                for m in self.markov_orders:
                    if len(self.history) >= m:
                        key = tuple(self.history[-m:])
                        if key in self.historyCount:
                            self.move[index]   = self.selectBestDict(self.historyCount[key][0])
                            self.move[index+3] = self.selectBestDict(self.historyCount[key][1])
                        else:
                            self.move[index]   = random.choice(self.moves)
                            self.move[index+3] = random.choice(self.moves)
                    else:
                        self.move[index]   = random.choice(self.moves)
                        self.move[index+3] = random.choice(self.moves)
                    index += 6

            # set other meta strategies
            for i in range(first_meta_index, self.M, 3):
                self.move[i+1] = self.beatedBy[self.move[i]]
                self.move[i+2] = self.beatedBy[self.move[i+1]]

        def evaluate(self):
            best = self.selectBest(self.score)
            self.selected[best] += 1
            output = self.move[best]
            self.last = output

            if self.debug == 1:
                if self.ROUND == self.LAST_ROUND:
                    print "score =", self.score
                    print "selected =", self.selected            
            
            return output

    class Bayes:
        name = "Bayes13-Modified"

        def __init__(self, lookback):
            self.score = {'RR': 0, 'PP': 0, 'SS': 0, 'PR': 1, 'RS': 1, 'SP': 1,'RP': -1, 'SR': -1, 'PS': -1,}
            self.cscore = {'RR': 'r', 'PP': 'r', 'SS': 'r', 'PR': 'b', 'RS': 'b', 'SP': 'b','RP': 'c', 'SR': 'c', 'PS': 'c',}
            self.beat = {'P': 'S', 'S': 'R', 'R': 'P'}
            self.cede = {'P': 'R', 'S': 'P', 'R': 'S'}

            self.lookback = lookback

            self.reset()

        def reset(self):
            self.played_probs = defaultdict(lambda: 1)
            self.opp_probs = defaultdict(lambda: defaultdict(lambda: 1))
            self.my_probs = defaultdict(lambda: defaultdict(lambda: 1))
            self.both_probs = defaultdict(lambda: defaultdict(lambda: 1))

            self.singleopp_opp_probs = defaultdict(lambda: defaultdict(lambda: 1))
            self.singleopp_my_probs = defaultdict(lambda: defaultdict(lambda: 1))
            self.singleopp_both_probs = defaultdict(lambda: defaultdict(lambda: 1))

            self.singlemy_opp_probs = defaultdict(lambda: defaultdict(lambda: 1))
            self.singlemy_my_probs = defaultdict(lambda: defaultdict(lambda: 1))
            self.singlemy_both_probs = defaultdict(lambda: defaultdict(lambda: 1))

            self.opp2_probs = defaultdict(lambda: defaultdict(lambda: 1))
            self.my2_probs = defaultdict(lambda: defaultdict(lambda: 1))
            self.both2_probs = defaultdict(lambda: defaultdict(lambda: 1))

            self.singleopp_opp2_probs = defaultdict(lambda: defaultdict(lambda: 1))
            self.singleopp_my2_probs = defaultdict(lambda: defaultdict(lambda: 1))
            self.singleopp_both2_probs = defaultdict(lambda: defaultdict(lambda: 1))

            self.singlemy_opp2_probs = defaultdict(lambda: defaultdict(lambda: 1))
            self.singlemy_my2_probs = defaultdict(lambda: defaultdict(lambda: 1))
            self.singlemy_both2_probs = defaultdict(lambda: defaultdict(lambda: 1))

            self.win_probs = defaultdict(lambda: 1)
            self.lose_probs = defaultdict(lambda: 1)
            self.tie_probs = defaultdict(lambda: 1)

            self.opp_answers = {'c': 1, 'b': 1, 'r': 1}
            self.my_answers = {'c': 1, 'b': 1, 'r': 1}

            self.opp2_answers = {'c': 1, 'b': 1, 'r': 1}
            self.my2_answers = {'c': 1, 'b': 1, 'r': 1}  

            self.singleopp_opp_answers = {'c': 1, 'b': 1, 'r': 1}
            self.singleopp_my_answers = {'c': 1, 'b': 1, 'r': 1}

            self.singleopp_opp2_answers = {'c': 1, 'b': 1, 'r': 1}
            self.singleopp_my2_answers = {'c': 1, 'b': 1, 'r': 1}  

            self.singlemy_opp_answers = {'c': 1, 'b': 1, 'r': 1}
            self.singlemy_my_answers = {'c': 1, 'b': 1, 'r': 1}

            self.singlemy_opp2_answers = {'c': 1, 'b': 1, 'r': 1}
            self.singlemy_my2_answers = {'c': 1, 'b': 1, 'r': 1}

            self.patterndict = defaultdict(str)
            self.patterndict2 = defaultdict(str)
            self.opppatterndict = defaultdict(str)
            self.opppatterndict2 = defaultdict(str)
            self.mypatterndict = defaultdict(str)
            self.mypatterndict2 = defaultdict(str)

            self.csu = [0] * 6 # consecutive strategy usage
            self.csc = []  # consecutive strategy candidates

            output = random.choice(["R", "P", "S"])
            self.hist = "" 
            self.myhist = "" 
            self.opphist = "" 

            self.my = self.opp = self.my2 = self.opp2 =  ""
            self.singleopp_my = self.singleopp_opp = self.singleopp_my2 = self.singleopp_opp2 = ""
            self.singlemy_my = self.singlemy_opp = self.singlemy_my2 = self.singlemy_opp2 = ""

            self.sc = 0
            self.opp_strats = []

        def counter_prob(self, probs):
            weighted_list = []
            for h in ['R', 'P', 'S']:
                weighted = 0
                for p in probs.keys():
                    points = self.score[h+p]
                    prob = probs[p]
                    weighted += points * prob
                    weighted_list.append((h, weighted))

            return max(weighted_list, key=operator.itemgetter(1))[0]

        def learn(self, output, input):
            self.previous_opp_strats = self.opp_strats[:]
            self.previous_sc = self.sc

            self.sc = self.score[output + input]
            for i, c in enumerate(self.csc):
                if c == input:
                    self.csu[i] += 1
                else:
                    self.csu[i] = 0

            m = max(self.csu)
            self.opp_strats = [i for i, c in enumerate(self.csc) if self.csu[i] == m]
            
            if self.previous_sc == 1:
                for s1 in self.previous_opp_strats:
                    for s2 in self.opp_strats:
                        self.win_probs[chr(s1)+chr(s2)] += 1
            
            if self.previous_sc == 0:
                for s1 in self.previous_opp_strats:
                    for s2 in self.opp_strats:
                        self.tie_probs[chr(s1)+chr(s2)] += 1

            if self.previous_sc == -1:
                for s1 in self.previous_opp_strats:
                    for s2 in self.opp_strats:
                        self.lose_probs[chr(s1)+chr(s2)] += 1

            if self.my and self.opp:
                self.opp_answers[self.cscore[input+self.opp]] += 1
                self.my_answers[self.cscore[input+self.my]] += 1
            if self.my2 and self.opp2:
                self.opp2_answers[self.cscore[input+self.opp2]] += 1
                self.my2_answers[self.cscore[input+self.my2]] += 1

            if self.singleopp_my and self.singleopp_opp:
                self.singleopp_opp_answers[self.cscore[input+self.singleopp_opp]] += 1
                self.singleopp_my_answers[self.cscore[input+self.singleopp_my]] += 1
            if self.singleopp_my2 and self.singleopp_opp2:
                self.singleopp_opp2_answers[self.cscore[input+self.singleopp_opp2]] += 1
                self.singleopp_my2_answers[self.cscore[input+self.singleopp_my2]] += 1

            if self.singlemy_my and self.singlemy_opp:
                self.singlemy_opp_answers[self.cscore[input+self.singlemy_opp]] += 1
                self.singlemy_my_answers[self.cscore[input+self.singlemy_my]] += 1
            if self.singlemy_my2 and self.singlemy_opp2:
                self.singlemy_opp2_answers[self.cscore[input+self.singlemy_opp2]] += 1
                self.singlemy_my2_answers[self.cscore[input+self.singlemy_my2]] += 1

            for length in range(min(self.lookback, len(self.hist)), 0, -2):
                pattern = self.patterndict[self.hist[-length:]]
                if pattern:
                    for length2 in range(min(self.lookback, len(pattern)), 0, -2):
                        self.patterndict2[pattern[-length2:]] += output + input
                self.patterndict[self.hist[-length:]] += output + input

            # singleopp
            for length in range(min(self.lookback / 2, len(self.opphist)), 0, -1):
                pattern = self.opppatterndict[self.opphist[-length:]]
                if pattern:
                    for length2 in range(min(self.lookback, len(pattern)), 0, -2):
                        self.opppatterndict2[pattern[-length2:]] += output + input
                self.opppatterndict[self.opphist[-length:]] += output + input

            # singlemy
            for length in range(min(self.lookback / 2, len(self.myhist)), 0, -1):
                pattern = self.mypatterndict[self.myhist[-length:]]
                if pattern:
                    for length2 in range(min(self.lookback, len(pattern)), 0, -2):
                        self.mypatterndict2[pattern[-length2:]] += output + input
                self.mypatterndict[self.myhist[-length:]] += output + input

            self.played_probs[input] += 1
            self.opp_probs[self.opp][input] += 1
            self.my_probs[self.my][input] += 1
            self.both_probs[self.my+self.opp][input] += 1

            self.opp2_probs[self.opp2][input] += 1
            self.my2_probs[self.my2][input] += 1
            self.both2_probs[self.my2+self.opp2][input] += 1

            self.hist += output + input
            self.myhist += output
            self.opphist += input

            self.my = self.opp = self.my2 = self.opp2 = ""
            self.singleopp_my = self.singleopp_opp = self.singleopp_my2 = self.singleopp_opp2 = ""
            self.singlemy_my = self.singlemy_opp = self.singlemy_my2 = self.singlemy_opp2 = ""

            for length in range(min(self.lookback, len(self.hist)), 0, -2):
                pattern = self.patterndict[self.hist[-length:]]
                if pattern != "":
                    self.my = pattern[-2]
                    self.opp = pattern[-1]
                    for length2 in range(min(self.lookback, len(pattern)), 0, -2):
                        pattern2 = self.patterndict2[pattern[-length2:]]
                        if pattern2 != "":
                            self.my2 = pattern2[-2]
                            self.opp2 = pattern2[-1]
                            break
                    break

            # singleopp
            for length in range(min(self.lookback / 2, len(self.opphist)), 0, -1):
                pattern = self.opppatterndict[self.opphist[-length:]]
                if pattern != "":
                    self.singleopp_my = pattern[-2]
                    self.singleopp_opp = pattern[-1]
                    for length2 in range(min(self.lookback, len(pattern)), 0, -2):
                        pattern2 = self.opppatterndict2[pattern[-length2:]]
                        if pattern2 != "":
                            self.singleopp_my2 = pattern2[-2]
                            self.singleopp_opp2 = pattern2[-1]
                            break
                    break

            # singlemy
            for length in range(min(self.lookback / 2, len(self.myhist)), 0, -1):
                pattern = self.mypatterndict[self.myhist[-length:]]
                if pattern != "":
                    self.singlemy_my = pattern[-2]
                    self.singlemy_opp = pattern[-1]
                    for length2 in range(min(self.lookback, len(pattern)), 0, -2):
                        pattern2 = self.mypatterndict2[pattern[-length2:]]
                        if pattern2 != "":
                            self.singlemy_my2 = pattern2[-2]
                            self.singlemy_opp2 = pattern2[-1]
                            break
                    break

        def evaluate(self):
            probs = {}
            for hand in rps:
                probs[hand] = self.played_probs[hand]
                
            if self.my and self.opp:
                for hand in rps:
                    probs[hand] *= self.opp_probs[self.opp][hand] * self.my_probs[self.my][hand] * self.both_probs[self.my+self.opp][hand]
                    probs[hand] *= self.opp_answers[self.cscore[hand+self.opp]] * self.my_answers[self.cscore[hand+self.my]]

                self.csc = [self.opp, self.beat[self.opp], self.cede[self.opp], self.my, self.cede[self.my], self.beat[self.my]]

                strats_for_hand = {'R': [], 'P': [], 'S': []}
                for i, c in enumerate(self.csc):
                    strats_for_hand[c].append(i)

                if self.sc == 1:
                    pr = self.win_probs
                if self.sc == 0:
                    pr = self.tie_probs
                if self.sc == -1:
                    pr = self.lose_probs

                for hand in rps:
                    for s1 in self.opp_strats:
                        for s2 in strats_for_hand[hand]:
                            probs[hand] *= pr[chr(s1)+chr(s2)]
            else:
                self.csc = []

            if self.singleopp_my and self.singleopp_opp:
                for hand in rps:
                    probs[hand] *= self.singleopp_opp_probs[self.singleopp_opp][hand] * \
                                    self.singleopp_my_probs[self.singleopp_my][hand] * \
                                    self.singleopp_both_probs[self.singleopp_my+self.singleopp_opp][hand]
                    probs[hand] *= self.singleopp_opp_answers[self.cscore[hand+self.singleopp_opp]] * self.singleopp_my_answers[self.cscore[hand+self.singleopp_my]]

            if self.singlemy_my and self.singlemy_opp:
                for hand in rps:
                    probs[hand] *= self.singlemy_opp_probs[self.singlemy_opp][hand] * \
                                    self.singlemy_my_probs[self.singlemy_my][hand] * \
                                    self.singlemy_both_probs[self.singlemy_my+self.singlemy_opp][hand]
                    probs[hand] *= self.singlemy_opp_answers[self.cscore[hand+self.singlemy_opp]] * self.singlemy_my_answers[self.cscore[hand+self.singlemy_my]]
                        
            if self.my2 and self.opp2:
                for hand in rps:
                    probs[hand] *= self.opp2_probs[self.opp2][hand] * self.my2_probs[self.my2][hand] * self.both2_probs[self.my2+self.opp2][hand]
                    probs[hand] *= self.opp2_answers[self.cscore[hand+self.opp2]] * self.my2_answers[self.cscore[hand+self.my2]]

            if self.singleopp_my2 and self.singleopp_opp2:
                for hand in rps:
                    probs[hand] *= self.singleopp_opp2_probs[self.singleopp_opp2][hand] *\
                                    self.singleopp_my2_probs[self.singleopp_my2][hand] *\
                                    self.singleopp_both2_probs[self.singleopp_my2+self.singleopp_opp2][hand]
                    probs[hand] *= self.singleopp_opp2_answers[self.cscore[hand+self.singleopp_opp2]] * \
                                    self.singleopp_my2_answers[self.cscore[hand+self.singleopp_my2]]

            if self.singlemy_my2 and self.singlemy_opp2:
                for hand in rps:
                    probs[hand] *= self.singlemy_opp2_probs[self.singlemy_opp2][hand] *\
                                    self.singlemy_my2_probs[self.singlemy_my2][hand] *\
                                    self.singlemy_both2_probs[self.singlemy_my2+self.singlemy_opp2][hand]
                    probs[hand] *= self.singlemy_opp2_answers[self.cscore[hand+self.singlemy_opp2]] * \
                                    self.singlemy_my2_answers[self.cscore[hand+self.singlemy_my2]]
            
            output = self.counter_prob(probs)
            return output

    class MarkovStrategySet:
        def __init__(self, history_start, history_length, step, min_markov_order, max_markov_order, markov_step):
            self.debug = 0

            self.USE_RANDOM = 1
            self.USE_MARKOW = 1

            self.LAST_ROUND = 999
            self.ROUND = 1

            self.history = []

            self.strategies = []
            for hl in range(history_start, history_length, step):
                basic_strat = Bayes(hl)
                self.strategies.append(basic_strat)

            for hl in range(min_markov_order, max_markov_order + 1, markov_step):
                basic_strat = MarkovPredictor(hl)
                self.strategies.append(basic_strat)

            self.last_plays = [-1] * len(self.strategies)
            self.wins = [0] * len(self.strategies)
            self.losses = [0] * len(self.strategies)
            self.ties = [0] * len(self.strategies)

            self.indices = []
            for i in range(0, len(self.strategies)):
                self.indices.append(i)
            
            self.beatedBy = {"R":"P", "P":"S", "S":"R"}
            self.result = {"R":{"R":0, "P":-1, "S":1}, "P":{"R":1, "P":0, "S":-1}, "S":{"R":-1, "P":1, "S":0}}

            self.alpha = 0.01
            self.M = 0
            self.M_tuple_size = 1

            if self.USE_RANDOM == 1:
                self.M += 1

            if self.USE_MARKOW == 1:
                self.markov_orders = []
                for m in range(0, 4):
                    self.markov_orders.append(m)
                self.historyCount = {}
                self.M += len(self.strategies) * 2 * len(self.markov_orders)

            self.weight = [1] * self.M
            self.decay = [0.85] * self.M

            self.score = [0] * self.M
            self.selected = [0] * self.M
            # self.strategy = [[random.choice(self.indices) for k in range(self.M_tuple_size)] for i in range(self.M)]
            self.strategy = [[random.choice(self.indices)] for i in range(self.M)]

            self.last = random.choice(self.indices)
            self.last_chosen_strat = self.last
        
        def selectBest(self, s):
            return random.choice([i for i in range(len(s)) if max(s) == s[i]])

        def selectBestDict(self, h, v):
            # ew = {i:h[i][v][0][self.beatedBy[self.beatedBy[self.last_plays[random.choice(self.strategy[i])]]]] - h[i][v][0][self.beatedBy[self.last_plays[random.choice(self.strategy[i])]]] for i in h.keys()};
            ew = {}
            for i in h.keys():
                rotated = self.rotate_strat(self.strategy[i])
                rotated_2 = self.rotate_strat(rotated)
                ew.update({i:h[i][v][0][random.choice(rotated_2)] - h[i][v][0][random.choice(rotated)]});

            return self.strategy[random.choice([i for i in ew.keys() if max(ew.values()) == ew[i]])]

        def normalize(self, counts):
            c = counts[:]
            total = sum(counts)
            if total == 0:
                c = [1] * len(c)
                total = sum(c)
            c = list(map(truediv, c, [total * 1.0] * len(c)))
            return c

        def max_index(self, arr):
            max_idx = 0
            for i in range(len(arr)):
                if (arr[i] > arr[max_idx]):
                    max_idx = i
            return max_idx

        def learn(self, me, opp):
            if self.last_plays[0] < 0:
                return
            
            self.ROUND += 1
            self.history.append(self.last)
            # print "result = ", [self.result[self.last_plays[self.strategy[i]]][input] for i in range(self.M)]
            
            # decay all alternative strategies
            for i in range(self.M):
                for s in range(len(self.strategy[i])):
                    self.score[i] = self.decay[i] * self.score[i] + self.weight[i] * self.result[self.last_plays[self.strategy[i][s]]][input]
                    self.weight[i] = self.weight[i] + self.alpha * self.result[self.last_plays[self.strategy[i][s]]][input]

            index = 0
            # random optimal
            if self.USE_RANDOM == 1:
                # self.strategy[index] = [random.choice(self.indices) for k in range(self.M_tuple_size)]
                self.strategy[index] = [random.choice(self.indices)]
                # adjust random optimal score to zero
                self.score[index] = 0
                index += 1

            first_meta_index = index

            if self.USE_MARKOW == 1:
                # markow with meta strategies
                for m in self.markov_orders:
                    if len(self.history) > m:
                        key = self.history[-m]
                        if not (key in self.historyCount):
                            my_counts = [{}, {}, {}]
                            opp_counts = [{}, {}, {}]
                            for i in self.indices:
                                my_counts[0].update({i: 0})
                                my_counts[1].update({i: 0})
                                my_counts[2].update({i: 0})
                                opp_counts[0].update({i: 0})
                                opp_counts[1].update({i: 0})
                                opp_counts[2].update({i: 0})
                            
                            self.historyCount[key] = [my_counts, opp_counts]

                        # update my counts with reality
                        for i in range(len(self.strategies)):
                            a = self.last_plays[self.last_chosen_strat]
                            if self.last_plays[i] == a:
                                self.historyCount[key][0][0][i] += 1
                            elif self.beatedBy[a] == self.last_plays[i]:
                                self.historyCount[key][0][1][i] += 1
                            else:
                                self.historyCount[key][0][2][i] += 1

                        # update opp counts with reality
                        for i in range(len(self.strategies)):
                            a = opp
                            if self.last_plays[i] == a:
                                self.historyCount[key][1][0][i] += 1
                            elif self.beatedBy[a] == self.last_plays[i]:
                                self.historyCount[key][1][1][i] += 1
                            else:
                                self.historyCount[key][1][2][i] += 1

                for m in self.markov_orders:
                    if len(self.history) >= m:
                        key = self.history[-m]
                        if key in self.historyCount:
                            # self.strategy[index] = self.selectBestDict(self.historyCount[key][0][0])
                            # self.strategy[index + len(self.strategies)] = self.selectBestDict(self.historyCount[key][1][0])
                            self.strategy[index] = self.selectBestDict(self.historyCount, 0)
                            self.strategy[index + len(self.strategies)] = self.selectBestDict(self.historyCount, 1)
                        else:
                            # self.strategy[index] = [random.choice(self.indices) for k in range(self.M_tuple_size)]
                            # self.strategy[index + len(self.strategies)] = [random.choice(self.indices) for k in range(self.M_tuple_size)]
                            self.strategy[index] = [random.choice(self.indices)]
                            self.strategy[index + len(self.strategies)] = [random.choice(self.indices)]

                    else:
                        # self.strategy[index] = [random.choice(self.indices) for k in range(self.M_tuple_size)]
                        # self.strategy[index + len(self.strategies)] = [random.choice(self.indices) for k in range(self.M_tuple_size)]
                        self.strategy[index] = [random.choice(self.indices)]
                        self.strategy[index + len(self.strategies)] = [random.choice(self.indices)]

                    index += len(self.strategies)

            # set other meta strategies
            for i in range(first_meta_index, self.M, len(self.strategies)):
                # initial value
                rotation_strats = self.strategy[i]

                # compute the next sequence of rotations
                for offset in range(1, len(self.strategies)):
                    rotation_strats = self.rotate_strat(rotation_strats)
                    self.strategy[i+offset] = rotation_strats

            for si in range(0, len(self.strategies)):
                # lp = self.last_plays[si]
                self.strategies[si].learn(me, opp)

        def rotate_strat(self, rotation_strats):
            # get the strategy to pivot on
            strat = random.choice(rotation_strats)
            # print strat, len(self.last_plays)
            # ask the pivot strategy what it did last round
            rotation = self.beatedBy[self.last_plays[strat]]
            
            # build a set of strategies that match the pivot's response
            rotation_strats = []
            for si in range(len(self.strategies)):
                if self.last_plays[si] == rotation:
                    rotation_strats.append(si)

            # randomize to fill the rotation_strats
            while len(rotation_strats) < self.M_tuple_size:
                rotation_strats.append(random.choice(self.indices))
            while len(rotation_strats) > self.M_tuple_size:
                del rotation_strats[random.choice(range(len(rotation_strats)))]

            return rotation_strats

        def choice(self, arr, count, p):
            result = [0] * count
            accum = arr[:]
            for i in range(1, len(accum)):
                accum[i] += accum[i - 1]

            for c in range(count):
                r = random.uniform(0, 1)
                for i in range(0, len(accum)):
                    if i == len(accum) - 1:
                        result[c] = len(accum) - 1
                        break;
                    elif r >= accum[i] and r < accum[i + 1]:
                        result[c] = i
                        break;
                
            return result

        def evaluate(self):
            for si in range(0, len(self.strategies)):
                self.last_plays[si] = self.strategies[si].evaluate()

            best = self.selectBest(self.score)
            self.selected[best] += 1

            strat = None

            # test random strat choice
            strat = random.choice(self.strategy[best])
            # elif sel == 1:
            #     # todo: test pd strat choice
            #     possible_strats = self.strategy[best]
            #     pd = [0] * len(possible_strats)
            #     for ps in range(len(possible_strats)):
            #         key = possible_strats[ps]
            #         if key in self.historyCount:
            #             strat_tuple = self.strategy[key]
            #             for i in strat_tuple:
            #                 pd[ps] += self.historyCount[key][0][0][i] \
            #                         - self.historyCount[key][0][1][i] \
            #                         - self.historyCount[key][0][2][i] / 2.0
                
            #     # print "=======", pd
            #     pd = [x if x > 0 else 0 for x in pd]
            #     pd = self.normalize(pd)

            #     strat = self.choice(possible_strats, 1, pd)[0]

            #     # print "AAAAAAA", possible_strats, pd, strat
            #     # if strat in self.historyCount:
            #     #     print self.historyCount[strat]

            # elif sel == 2:
            #     # todo: test max strat choice
            #     possible_strats = self.strategy[best]
            #     pd = [0] * len(possible_strats)
            #     for ps in range(len(possible_strats)):
            #         key = possible_strats[ps]
            #         if key in self.historyCount:
            #             strat_tuple = self.strategy[key]
            #             for i in strat_tuple:
            #                 pd[ps] += self.historyCount[key][0][0][i] \
            #                         - self.historyCount[key][0][1][i] \
            #                         - self.historyCount[key][0][2][i] / 2.0

            #     strat = possible_strats[self.max_index(pd)];

            output = self.last_plays[strat]
            self.last_chosen_strat = strat
            self.last = best

            if self.debug == 1:
                # print self.ROUND, self.LAST_ROUND
                if self.ROUND == self.LAST_ROUND:
                    print "strats =", self.strategy
                    print "score =", self.score
                    print "selected =", self.selected            
            
            return output

    strategy_set = MarkovStrategySet(16, 17, 5, 6, 6, 2)
else:
    strategy_set.learn(output, input)
    output = strategy_set.evaluate()