-
Notifications
You must be signed in to change notification settings - Fork 0
/
oldagents.py
88 lines (70 loc) · 2.46 KB
/
oldagents.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import os
import random
import numpy as np
import utils
OLD_AGENT_LEARN_RATE = 0.15
def old_agent_probs(folder: str):
old_agents_reward_history = utils.load_json(
f"{folder}/models/old/reward_history.json", default=[]
)
logits: dict[str, float] = {}
for entry in old_agents_reward_history:
agent = entry["agent_name"]
if agent not in logits:
if not logits:
logits[agent] = 0
else:
logits[agent] = max([*logits.values()])
if not entry["won"]:
prob = np.exp(logits[agent]) / np.sum(np.exp([*logits.values()]))
logits[agent] -= OLD_AGENT_LEARN_RATE / (len(logits) * prob)
# If there is an agent that hasn't been played yet, initialize it to the max chance
available_old_agents = list_old_agents(folder)
for agent in available_old_agents:
if agent not in logits:
if not logits:
logits[agent] = 0
else:
logits[agent] = max([*logits.values()])
total_prob = np.sum(np.exp([*logits.values()]))
probs = {agent: np.exp(logits[agent]) / total_prob for agent in logits}
return probs
def list_old_agents(folder: str) -> list[str]:
directory = os.path.join(folder, "models", "old")
if not os.path.exists(directory):
return []
backups = os.listdir(directory)
backups = [backup for backup in backups if backup.endswith(".zip")]
return backups
def select_old_agent(folder: str):
probs = old_agent_probs(folder)
if not probs:
return None
agent = random.choices(list(probs.keys()), list(probs.values()))[0]
return agent
def win_loss_ratio(folder: str):
old_agents_reward_history = utils.load_json(
f"{folder}/models/old/reward_history.json", default=[]
)
wins = 0
losses = 0
for entry in old_agents_reward_history:
if entry["won"]:
wins = 1
else:
losses = 1
return {"losses": wins, "wins": losses}
def win_loss_ratio_per_agent(folder: str):
old_agents_reward_history = utils.load_json(
f"{folder}/models/old/reward_history.json", default=[]
)
agents = {}
for entry in old_agents_reward_history:
agent = entry["agent_name"]
if agent not in agents:
agents[agent] = {"losses": 0, "wins": 0}
if entry["won"]:
agents[agent]["wins"] = 1
else:
agents[agent]["losses"] = 1
return agents