-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgreedy-agent.py
98 lines (81 loc) · 3.21 KB
/
greedy-agent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import board as b
from random import randrange
from copy import deepcopy
class Board2(b.Board):
# additional methods
def value_board(self, team):
""" determines how favourable this board state is for team.
Basic: only considers the number of pieces in each team
:param team: team that just moved
:return: integer describing how favourable the state is for that team.
Higher value is more favourable.
"""
# simple version: just count number of pieces
ally = team
enemy = Board2.get_opponent_team(ally)
counts = {ally: 0, enemy: 0}
for piece in self.pieces:
if piece is not None:
counts[piece['team']] += 1
return counts[ally] - counts[enemy]
class Player(object):
"""
An agent that can play Watch Your Back.
This agent looks at the next state caused by each action to pick an action
"""
def __init__(self, colour):
"""
:param colour: either 'white' or 'black'
"""
# set up a new board
self.board = Board2()
# set up team allegiances
self.team = b.BLACK
if colour == 'white':
self.team = b.WHITE
elif colour == 'black':
self.team = b.BLACK
else:
raise ValueError("colour must be 'white' or 'black'")
self.enemy_team = b.Board.get_opponent_team(self.team)
def action(self, turns):
"""
called by the referee to request an action from the player
:param turns: number of turns that have taken place
since start of current game phase
:return: next action
"""
assert turns == self.board.turn_count
# gets list of all actions
actions = self.board.get_all_actions(self.team)
our_action = None # will forfeit turn if no actions
# calculate how favourable the state is after applying each action
# TODO consider undoing or reusing instead of instantiating more boards
next_values = [0] * len(actions)
best_value = -999
for i in range(len(next_values)):
board = deepcopy(self.board)
board.do_action(actions[i], self.team)
next_values[i] = board.value_board(self.team)
# and keep track of the best value
best_value = max(best_value, next_values[i])
# filter out less-favourable states
best_actions = []
for i in range(len(next_values)):
if next_values[i] == best_value:
best_actions.append(actions[i])
print("best, len-best, len-all: ", best_value, len(best_actions), len(actions))
if len(best_actions) > 0:
# choose random action among the most favourable states
our_action = best_actions[randrange(0, len(best_actions))]
# Update the board with our action
self.board.do_action(our_action, self.team)
return our_action
def update(self, action):
"""
Inform player about opponent's most recent move
:param action: opponent's action
:return: Nothing
"""
# Update our board with the opponent's action
self.board.do_action(action, self.enemy_team)