Compare commits
32 Commits
4120c4e7fb
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
4ca6dfc572
|
|||
|
ef2e551085
|
|||
|
b5e73cbbcf
|
|||
|
aeca76fda6
|
|||
|
111ce718da
|
|||
|
9342760d82
|
|||
|
967f1f6efb
|
|||
|
94831b720b
|
|||
|
e966a3ac9a
|
|||
|
c50892d79a
|
|||
|
6b9516a510
|
|||
|
eedc80e46a
|
|||
|
189a5aef58
|
|||
|
c54f3a7493
|
|||
|
c3c07fcb28
|
|||
|
2586b43294
|
|||
|
14254e44ba
|
|||
|
3ad74c90b7
|
|||
|
84b175d844
|
|||
|
57dba8ad1e
|
|||
|
66be2185f9
|
|||
|
e8d05e0f9d
|
|||
|
23e7ae2822
|
|||
|
c9fda80c81
|
|||
|
1ebfeb9f65
|
|||
|
ff7cf6cb0d
|
|||
|
693b5c1f7c
|
|||
|
9fff000746
|
|||
|
cf21f981e5
|
|||
|
0d81dca8df
|
|||
|
87c386c9a4
|
|||
|
386c8a342e
|
113
src/elo.rs
113
src/elo.rs
@@ -2,14 +2,14 @@ use crate::{
|
||||
agent::{Agent, RandomAgent},
|
||||
complexagent::ComplexAgent,
|
||||
game_inner::GameInner,
|
||||
logic::{ChildrenEvalMethod, FutureMoveConfig},
|
||||
logic::{ChildrenEvalMethod, FutureMoveConfig, FutureMoves},
|
||||
repr::{Board, Piece, Winner},
|
||||
};
|
||||
use indicatif::{ParallelProgressIterator, ProgressBar, ProgressDrawTarget, ProgressStyle};
|
||||
use indicatif::{ProgressBar, ProgressStyle};
|
||||
use rand::seq::SliceRandom;
|
||||
use rayon::iter::{IntoParallelIterator, ParallelIterator};
|
||||
use skillratings::{
|
||||
elo::{elo, EloConfig, EloRating},
|
||||
glicko2::{confidence_interval, glicko2, Glicko2Rating},
|
||||
Outcomes, Rating,
|
||||
};
|
||||
use std::num::NonZero;
|
||||
@@ -18,18 +18,19 @@ type AgentMaker = Box<dyn Fn(Piece) -> Box<dyn Agent>>;
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn run() {
|
||||
let total_memory = 30_000_000_000; // 30 GB
|
||||
let num_threads = std::thread::available_parallelism()
|
||||
.map(NonZero::get)
|
||||
.expect("unable to get number of threads");
|
||||
let mem_per_thread = total_memory / num_threads;
|
||||
|
||||
let fmv_base = FutureMoveConfig {
|
||||
max_depth: 20,
|
||||
min_arena_depth: 14,
|
||||
top_k_children: 2,
|
||||
up_to_minus: 10,
|
||||
max_arena_size: usize::MAX,
|
||||
do_prune: false,
|
||||
max_arena_size: mem_per_thread / FutureMoves::ARENA_ENTRY_SIZE,
|
||||
print: false,
|
||||
children_eval_method: Default::default(),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let configs = [6]
|
||||
let configs = [2, 3, 4, 5, 6, 7, 8]
|
||||
.into_iter()
|
||||
.map(move |d| FutureMoveConfig {
|
||||
max_depth: d,
|
||||
@@ -71,7 +72,7 @@ pub fn run() {
|
||||
.to_vec()
|
||||
})
|
||||
.flat_map(move |prev_c| {
|
||||
[ChildrenEvalMethod::MinMax, ChildrenEvalMethod::MinMaxFlat].map(move |method| {
|
||||
[ChildrenEvalMethod::MinMax, ChildrenEvalMethod::MinMaxProb].map(move |method| {
|
||||
FutureMoveConfig {
|
||||
children_eval_method: method,
|
||||
..prev_c
|
||||
@@ -119,33 +120,45 @@ pub fn run() {
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
||||
if false {
|
||||
vec.push((
|
||||
"RandomAgent".to_string(),
|
||||
Box::new(move |piece| Box::new(RandomAgent::new(piece))),
|
||||
));
|
||||
}
|
||||
|
||||
let mut arena = PlayerArena::new(vec);
|
||||
|
||||
arena.prop_arena(100);
|
||||
arena.prop_arena(500);
|
||||
|
||||
println!("{}", arena);
|
||||
}
|
||||
|
||||
pub struct PlayerArena {
|
||||
/// Name, Creator Function, Elo
|
||||
players: Vec<(String, AgentMaker, EloRating)>,
|
||||
players: Vec<(String, AgentMaker, Glicko2Rating)>,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for PlayerArena {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let mut players_i: Vec<usize> = (0..self.players.len()).collect();
|
||||
players_i.sort_by_key(|&i| -(self.players[i].2.rating() * 100.0) as i64);
|
||||
|
||||
players_i.sort_by(|&a, &b| {
|
||||
self.players[b]
|
||||
.2
|
||||
.rating()
|
||||
.total_cmp(&self.players[a].2.rating())
|
||||
});
|
||||
|
||||
for i in players_i {
|
||||
let conf_interval = confidence_interval(&self.players[i].2);
|
||||
|
||||
writeln!(
|
||||
f,
|
||||
"({:.2}): {}",
|
||||
"({:.2}[+/-{:.2}]): {}",
|
||||
self.players[i].2.rating(),
|
||||
conf_interval.1 - self.players[i].2.rating(),
|
||||
self.players[i].0
|
||||
)?;
|
||||
}
|
||||
@@ -159,9 +172,8 @@ impl PlayerArena {
|
||||
Self {
|
||||
players: players
|
||||
.into_iter()
|
||||
.zip([EloRating::new()].into_iter().cycle())
|
||||
// flatten tuple
|
||||
.map(|((a, b), c)| (a, b, c))
|
||||
// All starting ratings should be the default
|
||||
.map(|(a, b)| (a, b, Default::default()))
|
||||
.collect(),
|
||||
}
|
||||
}
|
||||
@@ -172,7 +184,10 @@ impl PlayerArena {
|
||||
.map(|&(i, j)| {
|
||||
(
|
||||
(i, j),
|
||||
Self::create_agents(&self.players[i].1, &self.players[j].1),
|
||||
(
|
||||
(self.players[i].1)(Piece::Black),
|
||||
(self.players[j].1)(Piece::White),
|
||||
),
|
||||
)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
@@ -189,7 +204,6 @@ impl PlayerArena {
|
||||
// Spawn parallel processing in a dedicated thread
|
||||
let processing_thread = {
|
||||
let sender = sender.clone();
|
||||
let term = term.clone();
|
||||
|
||||
std::thread::spawn(move || {
|
||||
rayon::ThreadPoolBuilder::new()
|
||||
@@ -203,22 +217,6 @@ impl PlayerArena {
|
||||
|
||||
created_pairs
|
||||
.into_par_iter()
|
||||
.progress_with({
|
||||
let a = ProgressBar::new(num as u64).with_style(
|
||||
ProgressStyle::with_template(
|
||||
"[{elapsed_precise}] {pos:>7}/{len:7} ETA: {eta}",
|
||||
)
|
||||
.expect("invalid ProgressStyle"),
|
||||
);
|
||||
a.set_draw_target(ProgressDrawTarget::term(term, 5));
|
||||
a
|
||||
})
|
||||
.progress_with_style(
|
||||
ProgressStyle::with_template(
|
||||
"[{elapsed_precise}] {pos:>7}/{len:7} ETA: {eta}",
|
||||
)
|
||||
.expect("invalid ProgressStyle"),
|
||||
)
|
||||
.map(|((i, j), (p1, p2))| (i, j, Self::play_two_inner(p1, p2)))
|
||||
.for_each(|(i, j, o)| {
|
||||
sender.send((i, j, o)).expect("Failed to send result");
|
||||
@@ -231,17 +229,29 @@ impl PlayerArena {
|
||||
|
||||
// Process results on main thread as they arrive
|
||||
let mut received_num = 0;
|
||||
let p = ProgressBar::new(num as u64).with_style(
|
||||
ProgressStyle::with_template("[{elapsed_precise}] {pos:>7}/{len:7} ETA: {eta}")
|
||||
.expect("invalid ProgressStyle"),
|
||||
);
|
||||
|
||||
while let Ok((i, j, o)) = receiver.recv() {
|
||||
self.process_outcome(i, j, &o);
|
||||
received_num += 1;
|
||||
|
||||
term.clear_last_lines(self.players.len())
|
||||
if received_num > 0 {
|
||||
term.clear_last_lines(self.players.len() + 1)
|
||||
.expect("unable to clear prev lines");
|
||||
}
|
||||
term.write_str(format!("{}", self).as_str())
|
||||
.expect("unable to write leaderboard");
|
||||
|
||||
received_num += 1;
|
||||
p.inc(1);
|
||||
// add extra newline after progressbar
|
||||
println!();
|
||||
|
||||
// break if all pairs were recieved
|
||||
if received_num == num {
|
||||
drop(receiver);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -253,35 +263,25 @@ impl PlayerArena {
|
||||
}
|
||||
|
||||
fn prop_arena(&mut self, n: usize) {
|
||||
self.play(
|
||||
&(0..self.players.len())
|
||||
let mut games = (0..self.players.len())
|
||||
.flat_map(|i| {
|
||||
(0..self.players.len())
|
||||
.map(move |j| (i, j))
|
||||
.filter(|(i, j)| i != j)
|
||||
.collect::<Vec<_>>()
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.repeat(n),
|
||||
);
|
||||
.repeat(n);
|
||||
games.shuffle(&mut rand::rng());
|
||||
self.play(&games);
|
||||
}
|
||||
|
||||
fn process_outcome(&mut self, player1: usize, player2: usize, outcome: &Outcomes) {
|
||||
let (np1, np2) = elo(
|
||||
(self.players[player1].2, self.players[player2].2) = glicko2(
|
||||
&self.players[player1].2,
|
||||
&self.players[player2].2,
|
||||
outcome,
|
||||
&EloConfig::new(),
|
||||
&Default::default(),
|
||||
);
|
||||
self.players[player1].2 = np1;
|
||||
self.players[player2].2 = np2;
|
||||
}
|
||||
|
||||
fn create_agents(
|
||||
player_1_fn: &AgentMaker,
|
||||
player_2_fn: &AgentMaker,
|
||||
) -> (Box<dyn Agent>, Box<dyn Agent>) {
|
||||
(player_1_fn(Piece::Black), player_2_fn(Piece::White))
|
||||
}
|
||||
|
||||
fn play_two_inner(player_1: Box<dyn Agent>, player_2: Box<dyn Agent>) -> Outcomes {
|
||||
@@ -289,7 +289,8 @@ impl PlayerArena {
|
||||
player_1,
|
||||
player_2,
|
||||
false,
|
||||
Board::random(rand::random_range(3..=7)),
|
||||
// Board::random(rand::random_range(4..=15)),
|
||||
Board::STARTING_POSITION,
|
||||
)
|
||||
.expect("unable to create game")
|
||||
.loop_until_result();
|
||||
|
||||
@@ -33,10 +33,4 @@ impl BoardValueMap {
|
||||
];
|
||||
Self(PosMap::from(POSITION_VALUES))
|
||||
}
|
||||
|
||||
pub const fn flat() -> Self {
|
||||
Self(PosMap::from(
|
||||
[[1; Board::SIZE as usize]; Board::SIZE as usize],
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
use super::r#move::{MoveCoord, MoveValueConfig};
|
||||
use crate::{
|
||||
logic::r#move::Move,
|
||||
repr::{Board, Piece, Winner},
|
||||
@@ -15,8 +16,6 @@ use std::{
|
||||
},
|
||||
};
|
||||
|
||||
use super::r#move::MoveCoord;
|
||||
|
||||
#[derive(Allocative)]
|
||||
pub struct FutureMoves {
|
||||
/// Arena containing all [`Move`]
|
||||
@@ -25,9 +24,6 @@ pub struct FutureMoves {
|
||||
/// Index of the [`Move`] tree's root node
|
||||
current_root: Option<usize>,
|
||||
|
||||
/// Current generated depth of the Arena
|
||||
current_depth: usize,
|
||||
|
||||
/// Color w.r.t
|
||||
agent_color: Piece,
|
||||
|
||||
@@ -38,7 +34,7 @@ pub struct FutureMoves {
|
||||
board: Board,
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Allocative)]
|
||||
#[derive(Copy, Clone, Allocative, Default)]
|
||||
pub struct FutureMoveConfig {
|
||||
/// Max depth of that we should try and traverse
|
||||
pub max_depth: usize,
|
||||
@@ -91,24 +87,15 @@ impl std::fmt::Display for FutureMoveConfig {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Allocative)]
|
||||
#[derive(Debug, Clone, Copy, Allocative, Default)]
|
||||
#[allow(dead_code)]
|
||||
pub enum ChildrenEvalMethod {
|
||||
Average,
|
||||
AverageDivDepth,
|
||||
|
||||
MinAvgDivDepth,
|
||||
|
||||
/// Best so far?
|
||||
// #[default]
|
||||
MinMax,
|
||||
|
||||
MinMaxFlat,
|
||||
}
|
||||
|
||||
impl Default for ChildrenEvalMethod {
|
||||
fn default() -> Self {
|
||||
Self::MinMax
|
||||
}
|
||||
#[default]
|
||||
MinMaxProb,
|
||||
}
|
||||
|
||||
impl FutureMoves {
|
||||
@@ -116,13 +103,15 @@ impl FutureMoves {
|
||||
Self {
|
||||
arena: Vec::new(),
|
||||
current_root: None,
|
||||
current_depth: 0,
|
||||
agent_color,
|
||||
config,
|
||||
board: Board::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub const ARENA_ENTRY_SIZE: usize =
|
||||
size_of::<Move>() + size_of::<usize>() * (Board::AREA.0 as usize / 4);
|
||||
|
||||
/// Return the length of the Arena
|
||||
pub fn arena_len(&self) -> usize {
|
||||
self.arena.len()
|
||||
@@ -145,39 +134,34 @@ impl FutureMoves {
|
||||
indexes
|
||||
}
|
||||
|
||||
/// Find the current depth of the arena by
|
||||
/// looking at leaf moves and finding the smallest value
|
||||
fn determine_current_depth(&self) -> Option<usize> {
|
||||
/// Return the current depth of the tree
|
||||
fn current_depth(&self) -> usize {
|
||||
// leaf_moves is sorted from min to max depth
|
||||
self.leaf_moves().first().map(|&i| self.depth_of(i))
|
||||
self.leaf_moves()
|
||||
.first()
|
||||
.map(|&i| self.depth_of(i))
|
||||
.unwrap_or(0) // handle empty trees
|
||||
}
|
||||
|
||||
/// Generate children for all children of `nodes`
|
||||
/// only `pub` for the sake of benchmarking
|
||||
pub fn extend_layers(&mut self) {
|
||||
// recover from partial tree extention
|
||||
if let Some(current_depth) = self.determine_current_depth() {
|
||||
self.current_depth = current_depth;
|
||||
}
|
||||
let mut leafs = self.leaf_moves().into_iter().collect::<Vec<usize>>();
|
||||
|
||||
for _ in self.current_depth..self.config.max_depth {
|
||||
for _ in self.current_depth()..self.config.max_depth {
|
||||
let pstyle_inner = if cfg!(test) || !self.config.print {
|
||||
""
|
||||
} else {
|
||||
&format!(
|
||||
"Generating children (depth: {}/{}): ({{pos}}/{{len}}) {{per_sec}}",
|
||||
self.current_depth + 1,
|
||||
self.current_depth() + 1,
|
||||
self.config.max_depth
|
||||
)
|
||||
};
|
||||
|
||||
let allowed_size = self.config.max_arena_size - self.arena.len();
|
||||
let curr_size = Arc::new(AtomicUsize::new(0));
|
||||
let got = self
|
||||
.leaf_moves()
|
||||
.into_iter()
|
||||
.filter(|&i| self.depth_of(i) == self.current_depth)
|
||||
.collect::<Vec<usize>>()
|
||||
leafs = leafs
|
||||
.into_par_iter()
|
||||
.progress_with_style(ProgressStyle::with_template(pstyle_inner).unwrap())
|
||||
.map(|parent_idx| (parent_idx, self.generate_children_raw(parent_idx)))
|
||||
@@ -189,21 +173,22 @@ impl FutureMoves {
|
||||
true
|
||||
}
|
||||
})
|
||||
.collect::<Vec<(usize, Vec<Move>)>>();
|
||||
|
||||
// get total # of generated boards
|
||||
let got_len = curr_size.load(Ordering::Acquire);
|
||||
|
||||
got.into_iter().for_each(|(parent_idx, moves)| {
|
||||
.collect::<Vec<(usize, Vec<Move>)>>()
|
||||
.into_iter()
|
||||
.flat_map(|(parent_idx, moves)| {
|
||||
let start_idx = self.arena.len();
|
||||
self.arena.extend(moves);
|
||||
|
||||
let new_indices = start_idx..self.arena.len();
|
||||
self.arena[parent_idx].children.extend(new_indices);
|
||||
});
|
||||
self.arena[parent_idx].children.extend(new_indices.clone());
|
||||
new_indices
|
||||
})
|
||||
.collect();
|
||||
|
||||
// get total # of generated boards
|
||||
let got_len = curr_size.load(Ordering::Acquire);
|
||||
|
||||
self.prune_bad_children();
|
||||
self.current_depth += 1;
|
||||
if got_len == allowed_size {
|
||||
// arena has hit the upper limit of size permitted
|
||||
break;
|
||||
@@ -228,16 +213,7 @@ impl FutureMoves {
|
||||
}
|
||||
|
||||
fn create_move(&self, coord: MoveCoord, board: Board, color: Piece) -> Move {
|
||||
Move::new(
|
||||
coord,
|
||||
board,
|
||||
color,
|
||||
self.agent_color,
|
||||
!matches!(
|
||||
self.config.children_eval_method,
|
||||
ChildrenEvalMethod::MinMaxFlat
|
||||
),
|
||||
)
|
||||
Move::new(coord, board, color, self.agent_color, MoveValueConfig {})
|
||||
}
|
||||
|
||||
fn generate_children_raw(&self, parent_idx: usize) -> Vec<Move> {
|
||||
@@ -313,64 +289,44 @@ impl FutureMoves {
|
||||
let by_depth_vec = self.by_depth(indexes);
|
||||
|
||||
// reversed so we build up the value of the closest (in time) moves from the future
|
||||
for (depth, nodes) in by_depth_vec.into_iter().rev() {
|
||||
for (_depth, nodes) in by_depth_vec.into_iter().rev() {
|
||||
for idx in nodes {
|
||||
let children_values = self.arena[idx]
|
||||
.children
|
||||
.iter()
|
||||
.map(|&child| self.arena[child].value)
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let children_value = match self.config.children_eval_method {
|
||||
ChildrenEvalMethod::Average => children_values
|
||||
.into_iter()
|
||||
.sum::<i32>()
|
||||
.checked_div(self.arena[idx].children.len() as i32),
|
||||
|
||||
ChildrenEvalMethod::AverageDivDepth => children_values
|
||||
.into_iter()
|
||||
.sum::<i32>()
|
||||
.checked_div(self.arena[idx].children.len() as i32)
|
||||
.and_then(|x| x.checked_div(depth as i32)),
|
||||
ChildrenEvalMethod::MinAvgDivDepth => {
|
||||
if self.arena[idx].color == self.agent_color {
|
||||
let child_value = if self.arena[idx].color == self.agent_color {
|
||||
// get best (for the adversary) enemy play
|
||||
// this assumes the adversary is playing optimally
|
||||
|
||||
children_values.into_iter().min()
|
||||
children_values.iter().min()
|
||||
} else {
|
||||
children_values
|
||||
.into_iter()
|
||||
.sum::<i32>()
|
||||
.checked_div(self.arena[idx].children.len() as i32)
|
||||
.and_then(|x| x.checked_div(depth as i32))
|
||||
}
|
||||
children_values.iter().max()
|
||||
}
|
||||
.cloned()
|
||||
.unwrap_or(Default::default());
|
||||
self.arena[idx].value = self.arena[idx].self_value;
|
||||
|
||||
ChildrenEvalMethod::MinMax | ChildrenEvalMethod::MinMaxFlat => {
|
||||
if self.arena[idx].color == self.agent_color {
|
||||
// get best (for the adversary) enemy play
|
||||
// this assumes the adversary is playing optimally
|
||||
match self.config.children_eval_method {
|
||||
ChildrenEvalMethod::MinMax => {
|
||||
self.arena[idx].value.value += child_value.value;
|
||||
self.arena[idx].value.set_state(child_value.state());
|
||||
}
|
||||
ChildrenEvalMethod::MinMaxProb => {
|
||||
self.arena[idx]
|
||||
.value
|
||||
.populate_self_from_children(&children_values);
|
||||
|
||||
children_values.into_iter().min()
|
||||
} else {
|
||||
children_values.into_iter().max()
|
||||
self.arena[idx].value.value += child_value.value;
|
||||
}
|
||||
}
|
||||
}
|
||||
.unwrap_or(0);
|
||||
|
||||
// we use `depth` and divided `self_value` by it, idk if this is worth it
|
||||
// we should really setup some sort of ELO rating for each commit, playing them against
|
||||
// each other or something, could be cool to benchmark these more subjective things, not
|
||||
// just performance (cycles/time wise)
|
||||
self.arena[idx].value = self.arena[idx].self_value as i32 + children_value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn move_history(&self, idx: usize) -> Option<Vec<(MoveCoord, Piece)>> {
|
||||
if let Some(root) = self.current_root {
|
||||
self.current_root.and_then(|root| {
|
||||
let mut hist = Vec::new();
|
||||
|
||||
let mut current = Some(idx);
|
||||
@@ -390,34 +346,33 @@ impl FutureMoves {
|
||||
}
|
||||
|
||||
Some(hist)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn get_board_from_idx(&self, idx: usize) -> Option<Board> {
|
||||
if let Some(hist) = self.move_history(idx) {
|
||||
self.move_history(idx).and_then(|hist| {
|
||||
let mut board = self.board;
|
||||
for (m, c) in hist {
|
||||
if let Some(m) = m {
|
||||
board.place(m, c).expect("move would not propegate");
|
||||
board.place(m, c).ok()?;
|
||||
}
|
||||
}
|
||||
Some(board)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Return the best move which is a child of `self.current_root`
|
||||
pub fn best_move(&self) -> Option<MoveCoord> {
|
||||
self.current_root
|
||||
.and_then(|x| {
|
||||
self.arena[x]
|
||||
.and_then(|x| match self.config.children_eval_method {
|
||||
ChildrenEvalMethod::MinMax => self.arena[x]
|
||||
.children
|
||||
.iter()
|
||||
// this would be considered `minimax`
|
||||
.max_by_key(|&&idx| self.arena[idx].value)
|
||||
.max_by_key(|&&idx| self.arena[idx].value),
|
||||
ChildrenEvalMethod::MinMaxProb => self.arena[x]
|
||||
.children
|
||||
.iter()
|
||||
.max_by_key(|&&idx| self.arena[idx].value),
|
||||
})
|
||||
.inspect(|&&x| {
|
||||
assert_eq!(
|
||||
@@ -461,7 +416,6 @@ impl FutureMoves {
|
||||
fn rebuild_from_board(&mut self, board: Board) {
|
||||
self.arena = vec![self.create_move(None, board, !self.agent_color)];
|
||||
self.current_root = Some(0);
|
||||
self.current_depth = 0;
|
||||
self.board = board;
|
||||
}
|
||||
|
||||
@@ -469,7 +423,6 @@ impl FutureMoves {
|
||||
let board = self
|
||||
.get_board_from_idx(idx)
|
||||
.expect("unable to get board at idx");
|
||||
self.current_depth -= self.depth_of(idx);
|
||||
self.current_root = Some(idx);
|
||||
self.board = board;
|
||||
self.refocus_tree();
|
||||
@@ -521,7 +474,7 @@ impl FutureMoves {
|
||||
}
|
||||
|
||||
fn prune_bad_children(&mut self) {
|
||||
if self.current_depth < self.config.min_arena_depth || !self.config.do_prune {
|
||||
if self.current_depth() < self.config.min_arena_depth || !self.config.do_prune {
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -530,7 +483,7 @@ impl FutureMoves {
|
||||
|
||||
for (depth, indexes) in self.by_depth(0..self.arena.len()) {
|
||||
// TODO! maybe update by_depth every iteration or something?
|
||||
if depth > self.current_depth.saturating_sub(self.config.up_to_minus) {
|
||||
if depth > self.current_depth().saturating_sub(self.config.up_to_minus) {
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -580,6 +533,8 @@ impl FutureMoves {
|
||||
/// Rebuilds the Arena based on `self.current_root`, prunes unrelated nodes
|
||||
fn refocus_tree(&mut self) {
|
||||
let Some(root) = self.current_root else {
|
||||
// handle current_root being empty (clear arena and return)
|
||||
self.arena.clear();
|
||||
return;
|
||||
};
|
||||
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
mod board_value;
|
||||
mod future_moves;
|
||||
mod r#move;
|
||||
mod mvs;
|
||||
pub use future_moves::{ChildrenEvalMethod, FutureMoveConfig, FutureMoves};
|
||||
pub use r#move::MoveCoord;
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
use super::board_value::BoardValueMap;
|
||||
use super::{
|
||||
board_value::BoardValueMap,
|
||||
mvs::{MVSGameState, MoveValueStats},
|
||||
};
|
||||
use crate::repr::{Board, CoordPair, Piece, Winner};
|
||||
use allocative::Allocative;
|
||||
|
||||
@@ -23,10 +26,10 @@ pub struct Move {
|
||||
pub children: Vec<usize>,
|
||||
|
||||
/// Value of this move (including children)
|
||||
pub value: i32,
|
||||
pub value: MoveValueStats,
|
||||
|
||||
/// What is the inherit value of this move (not including children)
|
||||
pub self_value: i16,
|
||||
pub self_value: MoveValueStats,
|
||||
|
||||
/// Which color made a move on this move?
|
||||
pub color: Piece,
|
||||
@@ -35,53 +38,47 @@ pub struct Move {
|
||||
pub is_trimmed: bool,
|
||||
}
|
||||
|
||||
pub struct MoveValueConfig {}
|
||||
|
||||
impl Move {
|
||||
pub fn new(
|
||||
coord: MoveCoord,
|
||||
board: Board,
|
||||
color: Piece,
|
||||
agent_color: Piece,
|
||||
use_weighted_bvm: bool,
|
||||
_: MoveValueConfig,
|
||||
) -> Self {
|
||||
let mut m = Move {
|
||||
coord,
|
||||
winner: board.game_winner(),
|
||||
parent: None,
|
||||
children: Vec::new(),
|
||||
value: i32::MIN,
|
||||
value: Default::default(),
|
||||
color,
|
||||
is_trimmed: false,
|
||||
self_value: 0,
|
||||
self_value: Default::default(),
|
||||
};
|
||||
m.self_value = m.compute_self_value(agent_color, &board, use_weighted_bvm);
|
||||
|
||||
// set wins/losses values appropriately
|
||||
match m.winner {
|
||||
Winner::Player(piece) => {
|
||||
if piece == agent_color {
|
||||
m.self_value.set_state(Some(MVSGameState::Win));
|
||||
} else {
|
||||
m.self_value.set_state(Some(MVSGameState::Loss));
|
||||
}
|
||||
}
|
||||
Winner::Tie => {
|
||||
m.self_value.set_state(Some(MVSGameState::Tie));
|
||||
}
|
||||
Winner::None => {}
|
||||
}
|
||||
|
||||
m.self_value.value =
|
||||
const { BoardValueMap::weighted() }.board_value(&board, agent_color) as i32;
|
||||
m
|
||||
}
|
||||
|
||||
fn compute_self_value(&self, agent_color: Piece, board: &Board, use_weighted_bvm: bool) -> i16 {
|
||||
if self.winner == Winner::Player(!agent_color) {
|
||||
// if this board results in the opponent winning, MAJORLY negatively weigh this move
|
||||
// NOTE! this branch isn't completely deleted because if so, the bot wouldn't make a move.
|
||||
// We shouldn't prune branches because we still need to always react to the opponent's moves
|
||||
return i16::MIN + 1;
|
||||
} else if self.winner == Winner::Player(agent_color) {
|
||||
// results in a win for the agent
|
||||
return i16::MAX - 1;
|
||||
}
|
||||
// else if self.winner == Winner::Tie {
|
||||
// // idk what a Tie should be valued?
|
||||
// return 0;
|
||||
// }
|
||||
|
||||
// I guess ignore Ties here, don't give them an explicit value,
|
||||
// because even in the case of ties, we want to have a higher score
|
||||
|
||||
match use_weighted_bvm {
|
||||
true => const { BoardValueMap::weighted() },
|
||||
false => const { BoardValueMap::flat() },
|
||||
}
|
||||
.board_value(board, agent_color)
|
||||
}
|
||||
|
||||
/// Sort children of the [`Move`] by their self_value in `arena`
|
||||
pub fn sort_children(&mut self, arena: &[Move]) {
|
||||
self.children.sort_by(|&a, &b| {
|
||||
|
||||
164
src/logic/mvs.rs
Normal file
164
src/logic/mvs.rs
Normal file
@@ -0,0 +1,164 @@
|
||||
use allocative::Allocative;
|
||||
use std::cmp::Ordering;
|
||||
|
||||
#[derive(Clone, Copy, PartialEq, Eq, Allocative, Debug, PartialOrd, Ord)]
|
||||
pub enum MVSGameState {
|
||||
Win = 1,
|
||||
Tie = 0,
|
||||
Loss = -1,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Allocative, PartialEq, Eq, Default)]
|
||||
pub struct MoveValueStats {
|
||||
state: Option<MVSGameState>,
|
||||
wins: u16,
|
||||
losses: u16,
|
||||
ties: u16,
|
||||
pub value: i32,
|
||||
}
|
||||
|
||||
impl MoveValueStats {
|
||||
#[cfg(test)]
|
||||
pub fn new_from_outcomes(wins: u16, losses: u16, ties: u16) -> Self {
|
||||
Self {
|
||||
wins,
|
||||
losses,
|
||||
ties,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub fn new_from_value(value: i32) -> Self {
|
||||
Self {
|
||||
value,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub fn new_from_state(state: Option<MVSGameState>) -> Self {
|
||||
Self {
|
||||
state,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
fn chance_win(&self) -> Option<f32> {
|
||||
let sum = self.losses + self.wins + self.ties;
|
||||
if 20 > sum {
|
||||
return None;
|
||||
}
|
||||
Some(self.wins as f32 / sum as f32)
|
||||
}
|
||||
|
||||
pub const fn set_state(&mut self, state: Option<MVSGameState>) {
|
||||
self.state = state;
|
||||
}
|
||||
|
||||
pub const fn state(&self) -> Option<MVSGameState> {
|
||||
self.state
|
||||
}
|
||||
|
||||
pub fn populate_self_from_children(&mut self, others: &[Self]) {
|
||||
(self.wins, self.losses, self.ties) =
|
||||
others.iter().fold((0, 0, 0), |(wins, losses, ties), x| {
|
||||
(
|
||||
wins + x.wins + (x.state == Some(MVSGameState::Win)) as u16,
|
||||
losses + x.losses + (x.state == Some(MVSGameState::Loss)) as u16,
|
||||
ties + x.ties + (x.state == Some(MVSGameState::Tie)) as u16,
|
||||
)
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialOrd for MoveValueStats {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
|
||||
impl Ord for MoveValueStats {
|
||||
fn cmp(&self, other: &Self) -> Ordering {
|
||||
if self.state.is_some() || other.state.is_some() {
|
||||
return self.state.cmp(&other.state);
|
||||
}
|
||||
|
||||
let (s_cw, o_cw) = (self.chance_win(), other.chance_win());
|
||||
if s_cw.is_some() || o_cw.is_some() {
|
||||
if s_cw > o_cw {
|
||||
return Ordering::Greater;
|
||||
} else if o_cw > s_cw {
|
||||
return Ordering::Less;
|
||||
}
|
||||
}
|
||||
|
||||
self.value.cmp(&other.value)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn two_prob() {
|
||||
let one = MoveValueStats::new_from_outcomes(100, 40, 0);
|
||||
|
||||
let two = MoveValueStats::new_from_outcomes(40, 60, 0);
|
||||
assert!(one > two);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn one_prob_one_non() {
|
||||
let one = MoveValueStats::new_from_outcomes(100, 4, 0);
|
||||
let two = MoveValueStats::new_from_value(10);
|
||||
assert!(one > two);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn one_prob_one_win() {
|
||||
let one = MoveValueStats::new_from_outcomes(100, 4, 0);
|
||||
let two = MoveValueStats::new_from_state(Some(MVSGameState::Win));
|
||||
assert!(one < two);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn two_prob_zero() {
|
||||
let one = MoveValueStats::new_from_outcomes(100, 0, 0);
|
||||
let two = MoveValueStats::new_from_outcomes(0, 60, 0);
|
||||
assert!(one > two);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_children_pop() {
|
||||
let mut a = MoveValueStats::new_from_value(0);
|
||||
|
||||
let children = vec![
|
||||
MoveValueStats::new_from_outcomes(1, 0, 0),
|
||||
MoveValueStats::new_from_outcomes(0, 2, 0),
|
||||
MoveValueStats::new_from_outcomes(0, 0, 3),
|
||||
];
|
||||
a.populate_self_from_children(&children);
|
||||
assert_eq!(a.wins, 1, "Wins should be 1");
|
||||
assert_eq!(a.losses, 2, "Losses should be 2");
|
||||
assert_eq!(a.ties, 3, "Ties should be 3");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_children_pop_state() {
|
||||
let mut a = MoveValueStats::new_from_value(0);
|
||||
|
||||
let children = vec![
|
||||
MoveValueStats::new_from_state(Some(MVSGameState::Win)),
|
||||
MoveValueStats::new_from_state(Some(MVSGameState::Win)),
|
||||
MoveValueStats::new_from_state(Some(MVSGameState::Loss)),
|
||||
MoveValueStats::new_from_state(Some(MVSGameState::Tie)),
|
||||
MoveValueStats::new_from_state(Some(MVSGameState::Tie)),
|
||||
];
|
||||
a.populate_self_from_children(&children);
|
||||
assert_eq!(a.wins, 2, "Wins should be 2");
|
||||
assert_eq!(a.losses, 1, "Losses should be 1");
|
||||
assert_eq!(a.ties, 2, "Ties should be 2");
|
||||
}
|
||||
}
|
||||
@@ -39,7 +39,7 @@ fn main() {
|
||||
min_arena_depth: 14,
|
||||
top_k_children: 2,
|
||||
up_to_minus: 10,
|
||||
max_arena_size: 200_000_000,
|
||||
max_arena_size: 50_000_000,
|
||||
do_prune: false,
|
||||
print: true,
|
||||
children_eval_method: Default::default(),
|
||||
|
||||
@@ -3,10 +3,11 @@ use allocative::Allocative;
|
||||
use rand::seq::IteratorRandom;
|
||||
use std::{cmp::Ordering, fmt};
|
||||
|
||||
#[derive(PartialEq, Eq, Copy, Clone, Debug, Allocative)]
|
||||
#[derive(PartialEq, Eq, Copy, Clone, Debug, Allocative, Default)]
|
||||
pub enum Winner {
|
||||
Player(Piece),
|
||||
Tie,
|
||||
#[default]
|
||||
None,
|
||||
}
|
||||
|
||||
|
||||
@@ -14,12 +14,12 @@ impl<T: Copy> PosMap<T> {
|
||||
Self(MaybeUninit::zeroed().assume_init())
|
||||
}
|
||||
|
||||
pub const fn from(v: [[T; Board::SIZE as usize]; Board::SIZE as usize]) -> Self {
|
||||
pub const fn from(mut v: [[T; Board::SIZE as usize]; Board::SIZE as usize]) -> Self {
|
||||
let mut n = unsafe { Self::uninit() };
|
||||
|
||||
const_for!(i in 0..Board::SIZE => {
|
||||
const_for!(j in 0..Board::SIZE => {
|
||||
n.set(CoordPair::from_axes(i, j), v[i as usize][j as usize]);
|
||||
std::mem::swap(n.get_mut(CoordPair::from_axes(i, j)), &mut v[i as usize][j as usize]);
|
||||
});
|
||||
});
|
||||
n
|
||||
|
||||
Reference in New Issue
Block a user