Compare commits
1 Commits
main
...
8146c11d48
| Author | SHA1 | Date | |
|---|---|---|---|
|
8146c11d48
|
111
src/elo.rs
111
src/elo.rs
@@ -2,14 +2,14 @@ use crate::{
|
|||||||
agent::{Agent, RandomAgent},
|
agent::{Agent, RandomAgent},
|
||||||
complexagent::ComplexAgent,
|
complexagent::ComplexAgent,
|
||||||
game_inner::GameInner,
|
game_inner::GameInner,
|
||||||
logic::{ChildrenEvalMethod, FutureMoveConfig, FutureMoves},
|
logic::{ChildrenEvalMethod, FutureMoveConfig},
|
||||||
repr::{Board, Piece, Winner},
|
repr::{Board, Piece, Winner},
|
||||||
};
|
};
|
||||||
use indicatif::{ProgressBar, ProgressStyle};
|
use indicatif::{ParallelProgressIterator, ProgressBar, ProgressDrawTarget, ProgressStyle};
|
||||||
use rand::seq::SliceRandom;
|
use rand::seq::SliceRandom;
|
||||||
use rayon::iter::{IntoParallelIterator, ParallelIterator};
|
use rayon::iter::{IntoParallelIterator, ParallelIterator};
|
||||||
use skillratings::{
|
use skillratings::{
|
||||||
glicko2::{confidence_interval, glicko2, Glicko2Rating},
|
elo::{elo, EloConfig, EloRating},
|
||||||
Outcomes, Rating,
|
Outcomes, Rating,
|
||||||
};
|
};
|
||||||
use std::num::NonZero;
|
use std::num::NonZero;
|
||||||
@@ -18,19 +18,18 @@ type AgentMaker = Box<dyn Fn(Piece) -> Box<dyn Agent>>;
|
|||||||
|
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
pub fn run() {
|
pub fn run() {
|
||||||
let total_memory = 30_000_000_000; // 30 GB
|
|
||||||
let num_threads = std::thread::available_parallelism()
|
|
||||||
.map(NonZero::get)
|
|
||||||
.expect("unable to get number of threads");
|
|
||||||
let mem_per_thread = total_memory / num_threads;
|
|
||||||
|
|
||||||
let fmv_base = FutureMoveConfig {
|
let fmv_base = FutureMoveConfig {
|
||||||
max_arena_size: mem_per_thread / FutureMoves::ARENA_ENTRY_SIZE,
|
max_depth: 20,
|
||||||
|
min_arena_depth: 14,
|
||||||
|
top_k_children: 2,
|
||||||
|
up_to_minus: 10,
|
||||||
|
max_arena_size: usize::MAX,
|
||||||
|
do_prune: false,
|
||||||
print: false,
|
print: false,
|
||||||
..Default::default()
|
children_eval_method: Default::default(),
|
||||||
};
|
};
|
||||||
|
|
||||||
let configs = [2, 3, 4, 5, 6, 7, 8]
|
let configs = [6]
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(move |d| FutureMoveConfig {
|
.map(move |d| FutureMoveConfig {
|
||||||
max_depth: d,
|
max_depth: d,
|
||||||
@@ -120,45 +119,33 @@ pub fn run() {
|
|||||||
)
|
)
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
if false {
|
|
||||||
vec.push((
|
vec.push((
|
||||||
"RandomAgent".to_string(),
|
"RandomAgent".to_string(),
|
||||||
Box::new(move |piece| Box::new(RandomAgent::new(piece))),
|
Box::new(move |piece| Box::new(RandomAgent::new(piece))),
|
||||||
));
|
));
|
||||||
}
|
|
||||||
|
|
||||||
let mut arena = PlayerArena::new(vec);
|
let mut arena = PlayerArena::new(vec);
|
||||||
|
|
||||||
arena.prop_arena(500);
|
arena.prop_arena(100);
|
||||||
|
|
||||||
println!("{}", arena);
|
println!("{}", arena);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct PlayerArena {
|
pub struct PlayerArena {
|
||||||
/// Name, Creator Function, Elo
|
/// Name, Creator Function, Elo
|
||||||
players: Vec<(String, AgentMaker, Glicko2Rating)>,
|
players: Vec<(String, AgentMaker, EloRating)>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl std::fmt::Display for PlayerArena {
|
impl std::fmt::Display for PlayerArena {
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
let mut players_i: Vec<usize> = (0..self.players.len()).collect();
|
let mut players_i: Vec<usize> = (0..self.players.len()).collect();
|
||||||
|
players_i.sort_by_key(|&i| -(self.players[i].2.rating() * 100.0) as i64);
|
||||||
players_i.sort_by(|&a, &b| {
|
|
||||||
self.players[b]
|
|
||||||
.2
|
|
||||||
.rating()
|
|
||||||
.total_cmp(&self.players[a].2.rating())
|
|
||||||
});
|
|
||||||
|
|
||||||
for i in players_i {
|
for i in players_i {
|
||||||
let conf_interval = confidence_interval(&self.players[i].2);
|
|
||||||
|
|
||||||
writeln!(
|
writeln!(
|
||||||
f,
|
f,
|
||||||
"({:.2}[+/-{:.2}]): {}",
|
"({:.2}): {}",
|
||||||
self.players[i].2.rating(),
|
self.players[i].2.rating(),
|
||||||
conf_interval.1 - self.players[i].2.rating(),
|
|
||||||
self.players[i].0
|
self.players[i].0
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
@@ -172,8 +159,9 @@ impl PlayerArena {
|
|||||||
Self {
|
Self {
|
||||||
players: players
|
players: players
|
||||||
.into_iter()
|
.into_iter()
|
||||||
// All starting ratings should be the default
|
.zip([EloRating::new()].into_iter().cycle())
|
||||||
.map(|(a, b)| (a, b, Default::default()))
|
// flatten tuple
|
||||||
|
.map(|((a, b), c)| (a, b, c))
|
||||||
.collect(),
|
.collect(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -184,10 +172,7 @@ impl PlayerArena {
|
|||||||
.map(|&(i, j)| {
|
.map(|&(i, j)| {
|
||||||
(
|
(
|
||||||
(i, j),
|
(i, j),
|
||||||
(
|
Self::create_agents(&self.players[i].1, &self.players[j].1),
|
||||||
(self.players[i].1)(Piece::Black),
|
|
||||||
(self.players[j].1)(Piece::White),
|
|
||||||
),
|
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
@@ -204,6 +189,7 @@ impl PlayerArena {
|
|||||||
// Spawn parallel processing in a dedicated thread
|
// Spawn parallel processing in a dedicated thread
|
||||||
let processing_thread = {
|
let processing_thread = {
|
||||||
let sender = sender.clone();
|
let sender = sender.clone();
|
||||||
|
let term = term.clone();
|
||||||
|
|
||||||
std::thread::spawn(move || {
|
std::thread::spawn(move || {
|
||||||
rayon::ThreadPoolBuilder::new()
|
rayon::ThreadPoolBuilder::new()
|
||||||
@@ -217,6 +203,22 @@ impl PlayerArena {
|
|||||||
|
|
||||||
created_pairs
|
created_pairs
|
||||||
.into_par_iter()
|
.into_par_iter()
|
||||||
|
.progress_with({
|
||||||
|
let a = ProgressBar::new(num as u64).with_style(
|
||||||
|
ProgressStyle::with_template(
|
||||||
|
"[{elapsed_precise}] {pos:>7}/{len:7} ETA: {eta}",
|
||||||
|
)
|
||||||
|
.expect("invalid ProgressStyle"),
|
||||||
|
);
|
||||||
|
a.set_draw_target(ProgressDrawTarget::term(term, 5));
|
||||||
|
a
|
||||||
|
})
|
||||||
|
.progress_with_style(
|
||||||
|
ProgressStyle::with_template(
|
||||||
|
"[{elapsed_precise}] {pos:>7}/{len:7} ETA: {eta}",
|
||||||
|
)
|
||||||
|
.expect("invalid ProgressStyle"),
|
||||||
|
)
|
||||||
.map(|((i, j), (p1, p2))| (i, j, Self::play_two_inner(p1, p2)))
|
.map(|((i, j), (p1, p2))| (i, j, Self::play_two_inner(p1, p2)))
|
||||||
.for_each(|(i, j, o)| {
|
.for_each(|(i, j, o)| {
|
||||||
sender.send((i, j, o)).expect("Failed to send result");
|
sender.send((i, j, o)).expect("Failed to send result");
|
||||||
@@ -229,29 +231,17 @@ impl PlayerArena {
|
|||||||
|
|
||||||
// Process results on main thread as they arrive
|
// Process results on main thread as they arrive
|
||||||
let mut received_num = 0;
|
let mut received_num = 0;
|
||||||
let p = ProgressBar::new(num as u64).with_style(
|
|
||||||
ProgressStyle::with_template("[{elapsed_precise}] {pos:>7}/{len:7} ETA: {eta}")
|
|
||||||
.expect("invalid ProgressStyle"),
|
|
||||||
);
|
|
||||||
|
|
||||||
while let Ok((i, j, o)) = receiver.recv() {
|
while let Ok((i, j, o)) = receiver.recv() {
|
||||||
self.process_outcome(i, j, &o);
|
self.process_outcome(i, j, &o);
|
||||||
|
received_num += 1;
|
||||||
|
|
||||||
if received_num > 0 {
|
term.clear_last_lines(self.players.len())
|
||||||
term.clear_last_lines(self.players.len() + 1)
|
|
||||||
.expect("unable to clear prev lines");
|
.expect("unable to clear prev lines");
|
||||||
}
|
|
||||||
term.write_str(format!("{}", self).as_str())
|
term.write_str(format!("{}", self).as_str())
|
||||||
.expect("unable to write leaderboard");
|
.expect("unable to write leaderboard");
|
||||||
|
|
||||||
received_num += 1;
|
|
||||||
p.inc(1);
|
|
||||||
// add extra newline after progressbar
|
|
||||||
println!();
|
|
||||||
|
|
||||||
// break if all pairs were recieved
|
// break if all pairs were recieved
|
||||||
if received_num == num {
|
if received_num == num {
|
||||||
drop(receiver);
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -263,25 +253,35 @@ impl PlayerArena {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn prop_arena(&mut self, n: usize) {
|
fn prop_arena(&mut self, n: usize) {
|
||||||
let mut games = (0..self.players.len())
|
self.play(
|
||||||
|
&(0..self.players.len())
|
||||||
.flat_map(|i| {
|
.flat_map(|i| {
|
||||||
(0..self.players.len())
|
(0..self.players.len())
|
||||||
.map(move |j| (i, j))
|
.map(move |j| (i, j))
|
||||||
.filter(|(i, j)| i != j)
|
.filter(|(i, j)| i != j)
|
||||||
|
.collect::<Vec<_>>()
|
||||||
})
|
})
|
||||||
.collect::<Vec<_>>()
|
.collect::<Vec<_>>()
|
||||||
.repeat(n);
|
.repeat(n),
|
||||||
games.shuffle(&mut rand::rng());
|
);
|
||||||
self.play(&games);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn process_outcome(&mut self, player1: usize, player2: usize, outcome: &Outcomes) {
|
fn process_outcome(&mut self, player1: usize, player2: usize, outcome: &Outcomes) {
|
||||||
(self.players[player1].2, self.players[player2].2) = glicko2(
|
let (np1, np2) = elo(
|
||||||
&self.players[player1].2,
|
&self.players[player1].2,
|
||||||
&self.players[player2].2,
|
&self.players[player2].2,
|
||||||
outcome,
|
outcome,
|
||||||
&Default::default(),
|
&EloConfig::new(),
|
||||||
);
|
);
|
||||||
|
self.players[player1].2 = np1;
|
||||||
|
self.players[player2].2 = np2;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn create_agents(
|
||||||
|
player_1_fn: &AgentMaker,
|
||||||
|
player_2_fn: &AgentMaker,
|
||||||
|
) -> (Box<dyn Agent>, Box<dyn Agent>) {
|
||||||
|
(player_1_fn(Piece::Black), player_2_fn(Piece::White))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn play_two_inner(player_1: Box<dyn Agent>, player_2: Box<dyn Agent>) -> Outcomes {
|
fn play_two_inner(player_1: Box<dyn Agent>, player_2: Box<dyn Agent>) -> Outcomes {
|
||||||
@@ -289,8 +289,7 @@ impl PlayerArena {
|
|||||||
player_1,
|
player_1,
|
||||||
player_2,
|
player_2,
|
||||||
false,
|
false,
|
||||||
// Board::random(rand::random_range(4..=15)),
|
Board::random(rand::random_range(3..=7)),
|
||||||
Board::STARTING_POSITION,
|
|
||||||
)
|
)
|
||||||
.expect("unable to create game")
|
.expect("unable to create game")
|
||||||
.loop_until_result();
|
.loop_until_result();
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
use super::r#move::{MoveCoord, MoveValueConfig};
|
use super::r#move::{MoveCoord, MoveValueConfig, MoveValueStats};
|
||||||
use crate::{
|
use crate::{
|
||||||
logic::r#move::Move,
|
logic::r#move::Move,
|
||||||
repr::{Board, Piece, Winner},
|
repr::{Board, Piece, Winner},
|
||||||
@@ -34,7 +34,7 @@ pub struct FutureMoves {
|
|||||||
board: Board,
|
board: Board,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Copy, Clone, Allocative, Default)]
|
#[derive(Copy, Clone, Allocative)]
|
||||||
pub struct FutureMoveConfig {
|
pub struct FutureMoveConfig {
|
||||||
/// Max depth of that we should try and traverse
|
/// Max depth of that we should try and traverse
|
||||||
pub max_depth: usize,
|
pub max_depth: usize,
|
||||||
@@ -87,17 +87,21 @@ impl std::fmt::Display for FutureMoveConfig {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, Allocative, Default)]
|
#[derive(Debug, Clone, Copy, Allocative)]
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
pub enum ChildrenEvalMethod {
|
pub enum ChildrenEvalMethod {
|
||||||
/// Best so far?
|
/// Best so far?
|
||||||
// #[default]
|
|
||||||
MinMax,
|
MinMax,
|
||||||
|
|
||||||
#[default]
|
|
||||||
MinMaxProb,
|
MinMaxProb,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl Default for ChildrenEvalMethod {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self::MinMax
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl FutureMoves {
|
impl FutureMoves {
|
||||||
pub const fn new(agent_color: Piece, config: FutureMoveConfig) -> Self {
|
pub const fn new(agent_color: Piece, config: FutureMoveConfig) -> Self {
|
||||||
Self {
|
Self {
|
||||||
@@ -109,9 +113,6 @@ impl FutureMoves {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub const ARENA_ENTRY_SIZE: usize =
|
|
||||||
size_of::<Move>() + size_of::<usize>() * (Board::AREA.0 as usize / 4);
|
|
||||||
|
|
||||||
/// Return the length of the Arena
|
/// Return the length of the Arena
|
||||||
pub fn arena_len(&self) -> usize {
|
pub fn arena_len(&self) -> usize {
|
||||||
self.arena.len()
|
self.arena.len()
|
||||||
@@ -296,7 +297,33 @@ impl FutureMoves {
|
|||||||
.iter()
|
.iter()
|
||||||
.map(|&child| self.arena[child].value)
|
.map(|&child| self.arena[child].value)
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
let child_value = if self.arena[idx].color == self.agent_color {
|
|
||||||
|
match self.config.children_eval_method {
|
||||||
|
ChildrenEvalMethod::MinMax => {
|
||||||
|
let children_value = if self.arena[idx].color == self.agent_color {
|
||||||
|
// get best (for the adversary) enemy play
|
||||||
|
// this assumes the adversary is playing optimally
|
||||||
|
|
||||||
|
children_values
|
||||||
|
.into_iter()
|
||||||
|
.min_by_key(|x| x.value)
|
||||||
|
.map(|x| x.value)
|
||||||
|
} else {
|
||||||
|
children_values
|
||||||
|
.into_iter()
|
||||||
|
.max_by_key(|x| x.value)
|
||||||
|
.map(|x| x.value)
|
||||||
|
}
|
||||||
|
.unwrap_or(0);
|
||||||
|
// we use `depth` and divided `self_value` by it, idk if this is worth it
|
||||||
|
// we should really setup some sort of ELO rating for each commit, playing them against
|
||||||
|
// each other or something, could be cool to benchmark these more subjective things, not
|
||||||
|
// just performance (cycles/time wise)
|
||||||
|
self.arena[idx].value.value =
|
||||||
|
self.arena[idx].self_value as i32 + children_value;
|
||||||
|
}
|
||||||
|
ChildrenEvalMethod::MinMaxProb => {
|
||||||
|
let children_value = if self.arena[idx].color == self.agent_color {
|
||||||
// get best (for the adversary) enemy play
|
// get best (for the adversary) enemy play
|
||||||
// this assumes the adversary is playing optimally
|
// this assumes the adversary is playing optimally
|
||||||
|
|
||||||
@@ -306,19 +333,20 @@ impl FutureMoves {
|
|||||||
}
|
}
|
||||||
.cloned()
|
.cloned()
|
||||||
.unwrap_or(Default::default());
|
.unwrap_or(Default::default());
|
||||||
self.arena[idx].value = self.arena[idx].self_value;
|
// we use `depth` and divided `self_value` by it, idk if this is worth it
|
||||||
|
// we should really setup some sort of ELO rating for each commit, playing them against
|
||||||
|
// each other or something, could be cool to benchmark these more subjective things, not
|
||||||
|
// just performance (cycles/time wise)
|
||||||
|
|
||||||
match self.config.children_eval_method {
|
let wins = children_values.iter().map(|x| x.wins).sum();
|
||||||
ChildrenEvalMethod::MinMax => {
|
let losses = children_values.iter().map(|x| x.losses).sum();
|
||||||
self.arena[idx].value.value += child_value.value;
|
|
||||||
self.arena[idx].value.set_state(child_value.state());
|
|
||||||
}
|
|
||||||
ChildrenEvalMethod::MinMaxProb => {
|
|
||||||
self.arena[idx]
|
|
||||||
.value
|
|
||||||
.populate_self_from_children(&children_values);
|
|
||||||
|
|
||||||
self.arena[idx].value.value += child_value.value;
|
let final_value = MoveValueStats {
|
||||||
|
wins,
|
||||||
|
losses,
|
||||||
|
value: self.arena[idx].self_value as i32 + children_value.value,
|
||||||
|
};
|
||||||
|
self.arena[idx].value = final_value;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -364,15 +392,12 @@ impl FutureMoves {
|
|||||||
/// Return the best move which is a child of `self.current_root`
|
/// Return the best move which is a child of `self.current_root`
|
||||||
pub fn best_move(&self) -> Option<MoveCoord> {
|
pub fn best_move(&self) -> Option<MoveCoord> {
|
||||||
self.current_root
|
self.current_root
|
||||||
.and_then(|x| match self.config.children_eval_method {
|
.and_then(|x| {
|
||||||
ChildrenEvalMethod::MinMax => self.arena[x]
|
self.arena[x]
|
||||||
.children
|
.children
|
||||||
.iter()
|
.iter()
|
||||||
.max_by_key(|&&idx| self.arena[idx].value),
|
// this would be considered `minimax`
|
||||||
ChildrenEvalMethod::MinMaxProb => self.arena[x]
|
.max_by_key(|&&idx| self.arena[idx].value)
|
||||||
.children
|
|
||||||
.iter()
|
|
||||||
.max_by_key(|&&idx| self.arena[idx].value),
|
|
||||||
})
|
})
|
||||||
.inspect(|&&x| {
|
.inspect(|&&x| {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
mod board_value;
|
mod board_value;
|
||||||
mod future_moves;
|
mod future_moves;
|
||||||
mod r#move;
|
mod r#move;
|
||||||
mod mvs;
|
|
||||||
pub use future_moves::{ChildrenEvalMethod, FutureMoveConfig, FutureMoves};
|
pub use future_moves::{ChildrenEvalMethod, FutureMoveConfig, FutureMoves};
|
||||||
pub use r#move::MoveCoord;
|
pub use r#move::MoveCoord;
|
||||||
|
|||||||
@@ -1,12 +1,48 @@
|
|||||||
use super::{
|
use std::cmp::Ordering;
|
||||||
board_value::BoardValueMap,
|
|
||||||
mvs::{MVSGameState, MoveValueStats},
|
use super::board_value::BoardValueMap;
|
||||||
};
|
|
||||||
use crate::repr::{Board, CoordPair, Piece, Winner};
|
use crate::repr::{Board, CoordPair, Piece, Winner};
|
||||||
use allocative::Allocative;
|
use allocative::Allocative;
|
||||||
|
|
||||||
pub type MoveCoord = Option<CoordPair>;
|
pub type MoveCoord = Option<CoordPair>;
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, Debug, Allocative, PartialEq, Eq, Default)]
|
||||||
|
pub struct MoveValueStats {
|
||||||
|
pub wins: u16,
|
||||||
|
pub losses: u16,
|
||||||
|
pub value: i32,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MoveValueStats {
|
||||||
|
pub fn chance_win(&self) -> f32 {
|
||||||
|
self.wins as f32 / (self.losses + self.wins) as f32
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PartialOrd for MoveValueStats {
|
||||||
|
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||||
|
Some(self.cmp(other))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Ord for MoveValueStats {
|
||||||
|
fn cmp(&self, other: &Self) -> Ordering {
|
||||||
|
if self.wins != 0 || self.losses != 0 || other.wins != 0 || other.losses != 0 {
|
||||||
|
let s_cw = self.chance_win();
|
||||||
|
let o_cw = other.chance_win();
|
||||||
|
if s_cw > o_cw {
|
||||||
|
Ordering::Greater
|
||||||
|
} else if o_cw > s_cw {
|
||||||
|
Ordering::Less
|
||||||
|
} else {
|
||||||
|
Ordering::Equal
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
self.value.cmp(&other.value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug, Allocative)]
|
#[derive(Clone, Debug, Allocative)]
|
||||||
pub struct Move {
|
pub struct Move {
|
||||||
/// Coordinates (i, j) of the move (if it exists)
|
/// Coordinates (i, j) of the move (if it exists)
|
||||||
@@ -29,7 +65,7 @@ pub struct Move {
|
|||||||
pub value: MoveValueStats,
|
pub value: MoveValueStats,
|
||||||
|
|
||||||
/// What is the inherit value of this move (not including children)
|
/// What is the inherit value of this move (not including children)
|
||||||
pub self_value: MoveValueStats,
|
pub self_value: i16,
|
||||||
|
|
||||||
/// Which color made a move on this move?
|
/// Which color made a move on this move?
|
||||||
pub color: Piece,
|
pub color: Piece,
|
||||||
@@ -46,39 +82,42 @@ impl Move {
|
|||||||
board: Board,
|
board: Board,
|
||||||
color: Piece,
|
color: Piece,
|
||||||
agent_color: Piece,
|
agent_color: Piece,
|
||||||
_: MoveValueConfig,
|
mvc: MoveValueConfig,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
let mut m = Move {
|
let mut m = Move {
|
||||||
coord,
|
coord,
|
||||||
winner: board.game_winner(),
|
winner: board.game_winner(),
|
||||||
parent: None,
|
parent: None,
|
||||||
children: Vec::new(),
|
children: Vec::new(),
|
||||||
value: Default::default(),
|
value: MoveValueStats {
|
||||||
|
wins: 0,
|
||||||
|
losses: 0,
|
||||||
|
value: 0,
|
||||||
|
},
|
||||||
color,
|
color,
|
||||||
is_trimmed: false,
|
is_trimmed: false,
|
||||||
self_value: Default::default(),
|
self_value: 0,
|
||||||
};
|
};
|
||||||
|
m.self_value = m.compute_self_value(agent_color, &board, mvc);
|
||||||
// set wins/losses values appropriately
|
|
||||||
match m.winner {
|
|
||||||
Winner::Player(piece) => {
|
|
||||||
if piece == agent_color {
|
|
||||||
m.self_value.set_state(Some(MVSGameState::Win));
|
|
||||||
} else {
|
|
||||||
m.self_value.set_state(Some(MVSGameState::Loss));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Winner::Tie => {
|
|
||||||
m.self_value.set_state(Some(MVSGameState::Tie));
|
|
||||||
}
|
|
||||||
Winner::None => {}
|
|
||||||
}
|
|
||||||
|
|
||||||
m.self_value.value =
|
|
||||||
const { BoardValueMap::weighted() }.board_value(&board, agent_color) as i32;
|
|
||||||
m
|
m
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn compute_self_value(&self, agent_color: Piece, board: &Board, _mvc: MoveValueConfig) -> i16 {
|
||||||
|
if self.winner == Winner::Player(!agent_color) {
|
||||||
|
// if this board results in the opponent winning, MAJORLY negatively weigh this move
|
||||||
|
// NOTE! this branch isn't completely deleted because if so, the bot wouldn't make a move.
|
||||||
|
// We shouldn't prune branches because we still need to always react to the opponent's moves
|
||||||
|
return i16::MIN + 1;
|
||||||
|
} else if self.winner == Winner::Player(agent_color) {
|
||||||
|
// results in a win for the agent
|
||||||
|
return i16::MAX - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// I guess ignore Ties here, don't give them an explicit value,
|
||||||
|
|
||||||
|
const { BoardValueMap::weighted() }.board_value(board, agent_color)
|
||||||
|
}
|
||||||
|
|
||||||
/// Sort children of the [`Move`] by their self_value in `arena`
|
/// Sort children of the [`Move`] by their self_value in `arena`
|
||||||
pub fn sort_children(&mut self, arena: &[Move]) {
|
pub fn sort_children(&mut self, arena: &[Move]) {
|
||||||
self.children.sort_by(|&a, &b| {
|
self.children.sort_by(|&a, &b| {
|
||||||
|
|||||||
164
src/logic/mvs.rs
164
src/logic/mvs.rs
@@ -1,164 +0,0 @@
|
|||||||
use allocative::Allocative;
|
|
||||||
use std::cmp::Ordering;
|
|
||||||
|
|
||||||
#[derive(Clone, Copy, PartialEq, Eq, Allocative, Debug, PartialOrd, Ord)]
|
|
||||||
pub enum MVSGameState {
|
|
||||||
Win = 1,
|
|
||||||
Tie = 0,
|
|
||||||
Loss = -1,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone, Copy, Debug, Allocative, PartialEq, Eq, Default)]
|
|
||||||
pub struct MoveValueStats {
|
|
||||||
state: Option<MVSGameState>,
|
|
||||||
wins: u16,
|
|
||||||
losses: u16,
|
|
||||||
ties: u16,
|
|
||||||
pub value: i32,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl MoveValueStats {
|
|
||||||
#[cfg(test)]
|
|
||||||
pub fn new_from_outcomes(wins: u16, losses: u16, ties: u16) -> Self {
|
|
||||||
Self {
|
|
||||||
wins,
|
|
||||||
losses,
|
|
||||||
ties,
|
|
||||||
..Default::default()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
pub fn new_from_value(value: i32) -> Self {
|
|
||||||
Self {
|
|
||||||
value,
|
|
||||||
..Default::default()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
pub fn new_from_state(state: Option<MVSGameState>) -> Self {
|
|
||||||
Self {
|
|
||||||
state,
|
|
||||||
..Default::default()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn chance_win(&self) -> Option<f32> {
|
|
||||||
let sum = self.losses + self.wins + self.ties;
|
|
||||||
if 20 > sum {
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
Some(self.wins as f32 / sum as f32)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub const fn set_state(&mut self, state: Option<MVSGameState>) {
|
|
||||||
self.state = state;
|
|
||||||
}
|
|
||||||
|
|
||||||
pub const fn state(&self) -> Option<MVSGameState> {
|
|
||||||
self.state
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn populate_self_from_children(&mut self, others: &[Self]) {
|
|
||||||
(self.wins, self.losses, self.ties) =
|
|
||||||
others.iter().fold((0, 0, 0), |(wins, losses, ties), x| {
|
|
||||||
(
|
|
||||||
wins + x.wins + (x.state == Some(MVSGameState::Win)) as u16,
|
|
||||||
losses + x.losses + (x.state == Some(MVSGameState::Loss)) as u16,
|
|
||||||
ties + x.ties + (x.state == Some(MVSGameState::Tie)) as u16,
|
|
||||||
)
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl PartialOrd for MoveValueStats {
|
|
||||||
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
|
||||||
Some(self.cmp(other))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Ord for MoveValueStats {
|
|
||||||
fn cmp(&self, other: &Self) -> Ordering {
|
|
||||||
if self.state.is_some() || other.state.is_some() {
|
|
||||||
return self.state.cmp(&other.state);
|
|
||||||
}
|
|
||||||
|
|
||||||
let (s_cw, o_cw) = (self.chance_win(), other.chance_win());
|
|
||||||
if s_cw.is_some() || o_cw.is_some() {
|
|
||||||
if s_cw > o_cw {
|
|
||||||
return Ordering::Greater;
|
|
||||||
} else if o_cw > s_cw {
|
|
||||||
return Ordering::Less;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
self.value.cmp(&other.value)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
use super::*;
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn two_prob() {
|
|
||||||
let one = MoveValueStats::new_from_outcomes(100, 40, 0);
|
|
||||||
|
|
||||||
let two = MoveValueStats::new_from_outcomes(40, 60, 0);
|
|
||||||
assert!(one > two);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn one_prob_one_non() {
|
|
||||||
let one = MoveValueStats::new_from_outcomes(100, 4, 0);
|
|
||||||
let two = MoveValueStats::new_from_value(10);
|
|
||||||
assert!(one > two);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn one_prob_one_win() {
|
|
||||||
let one = MoveValueStats::new_from_outcomes(100, 4, 0);
|
|
||||||
let two = MoveValueStats::new_from_state(Some(MVSGameState::Win));
|
|
||||||
assert!(one < two);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn two_prob_zero() {
|
|
||||||
let one = MoveValueStats::new_from_outcomes(100, 0, 0);
|
|
||||||
let two = MoveValueStats::new_from_outcomes(0, 60, 0);
|
|
||||||
assert!(one > two);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_children_pop() {
|
|
||||||
let mut a = MoveValueStats::new_from_value(0);
|
|
||||||
|
|
||||||
let children = vec![
|
|
||||||
MoveValueStats::new_from_outcomes(1, 0, 0),
|
|
||||||
MoveValueStats::new_from_outcomes(0, 2, 0),
|
|
||||||
MoveValueStats::new_from_outcomes(0, 0, 3),
|
|
||||||
];
|
|
||||||
a.populate_self_from_children(&children);
|
|
||||||
assert_eq!(a.wins, 1, "Wins should be 1");
|
|
||||||
assert_eq!(a.losses, 2, "Losses should be 2");
|
|
||||||
assert_eq!(a.ties, 3, "Ties should be 3");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_children_pop_state() {
|
|
||||||
let mut a = MoveValueStats::new_from_value(0);
|
|
||||||
|
|
||||||
let children = vec![
|
|
||||||
MoveValueStats::new_from_state(Some(MVSGameState::Win)),
|
|
||||||
MoveValueStats::new_from_state(Some(MVSGameState::Win)),
|
|
||||||
MoveValueStats::new_from_state(Some(MVSGameState::Loss)),
|
|
||||||
MoveValueStats::new_from_state(Some(MVSGameState::Tie)),
|
|
||||||
MoveValueStats::new_from_state(Some(MVSGameState::Tie)),
|
|
||||||
];
|
|
||||||
a.populate_self_from_children(&children);
|
|
||||||
assert_eq!(a.wins, 2, "Wins should be 2");
|
|
||||||
assert_eq!(a.losses, 1, "Losses should be 1");
|
|
||||||
assert_eq!(a.ties, 2, "Ties should be 2");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -39,7 +39,7 @@ fn main() {
|
|||||||
min_arena_depth: 14,
|
min_arena_depth: 14,
|
||||||
top_k_children: 2,
|
top_k_children: 2,
|
||||||
up_to_minus: 10,
|
up_to_minus: 10,
|
||||||
max_arena_size: 50_000_000,
|
max_arena_size: 200_000_000,
|
||||||
do_prune: false,
|
do_prune: false,
|
||||||
print: true,
|
print: true,
|
||||||
children_eval_method: Default::default(),
|
children_eval_method: Default::default(),
|
||||||
|
|||||||
@@ -3,11 +3,10 @@ use allocative::Allocative;
|
|||||||
use rand::seq::IteratorRandom;
|
use rand::seq::IteratorRandom;
|
||||||
use std::{cmp::Ordering, fmt};
|
use std::{cmp::Ordering, fmt};
|
||||||
|
|
||||||
#[derive(PartialEq, Eq, Copy, Clone, Debug, Allocative, Default)]
|
#[derive(PartialEq, Eq, Copy, Clone, Debug, Allocative)]
|
||||||
pub enum Winner {
|
pub enum Winner {
|
||||||
Player(Piece),
|
Player(Piece),
|
||||||
Tie,
|
Tie,
|
||||||
#[default]
|
|
||||||
None,
|
None,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -14,12 +14,12 @@ impl<T: Copy> PosMap<T> {
|
|||||||
Self(MaybeUninit::zeroed().assume_init())
|
Self(MaybeUninit::zeroed().assume_init())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub const fn from(mut v: [[T; Board::SIZE as usize]; Board::SIZE as usize]) -> Self {
|
pub const fn from(v: [[T; Board::SIZE as usize]; Board::SIZE as usize]) -> Self {
|
||||||
let mut n = unsafe { Self::uninit() };
|
let mut n = unsafe { Self::uninit() };
|
||||||
|
|
||||||
const_for!(i in 0..Board::SIZE => {
|
const_for!(i in 0..Board::SIZE => {
|
||||||
const_for!(j in 0..Board::SIZE => {
|
const_for!(j in 0..Board::SIZE => {
|
||||||
std::mem::swap(n.get_mut(CoordPair::from_axes(i, j)), &mut v[i as usize][j as usize]);
|
n.set(CoordPair::from_axes(i, j), v[i as usize][j as usize]);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
n
|
n
|
||||||
|
|||||||
Reference in New Issue
Block a user