From 8146c11d484fe363d584d23603cd50aec3b118b7 Mon Sep 17 00:00:00 2001 From: Simon Gardling Date: Wed, 23 Apr 2025 10:46:19 -0400 Subject: [PATCH] initial implementation of MoveValueStats --- src/elo.rs | 8 ++++-- src/logic/future_moves.rs | 58 +++++++++++++++++++++++++++++++-------- src/logic/move.rs | 47 +++++++++++++++++++++++++++++-- 3 files changed, 96 insertions(+), 17 deletions(-) diff --git a/src/elo.rs b/src/elo.rs index 0303113..582d87b 100644 --- a/src/elo.rs +++ b/src/elo.rs @@ -71,9 +71,11 @@ pub fn run() { .to_vec() }) .flat_map(move |prev_c| { - [ChildrenEvalMethod::MinMax].map(move |method| FutureMoveConfig { - children_eval_method: method, - ..prev_c + [ChildrenEvalMethod::MinMax, ChildrenEvalMethod::MinMaxProb].map(move |method| { + FutureMoveConfig { + children_eval_method: method, + ..prev_c + } }) }) .flat_map(move |prev_c| { diff --git a/src/logic/future_moves.rs b/src/logic/future_moves.rs index 93d8e9a..ff18a11 100644 --- a/src/logic/future_moves.rs +++ b/src/logic/future_moves.rs @@ -1,4 +1,4 @@ -use super::r#move::{MoveCoord, MoveValueConfig}; +use super::r#move::{MoveCoord, MoveValueConfig, MoveValueStats}; use crate::{ logic::r#move::Move, repr::{Board, Piece, Winner}, @@ -92,6 +92,8 @@ impl std::fmt::Display for FutureMoveConfig { pub enum ChildrenEvalMethod { /// Best so far? MinMax, + + MinMaxProb, } impl Default for ChildrenEvalMethod { @@ -296,25 +298,57 @@ impl FutureMoves { .map(|&child| self.arena[child].value) .collect::>(); - let children_value = match self.config.children_eval_method { + match self.config.children_eval_method { ChildrenEvalMethod::MinMax => { - if self.arena[idx].color == self.agent_color { + let children_value = if self.arena[idx].color == self.agent_color { // get best (for the adversary) enemy play // this assumes the adversary is playing optimally - children_values.into_iter().min() + children_values + .into_iter() + .min_by_key(|x| x.value) + .map(|x| x.value) } else { - children_values.into_iter().max() + children_values + .into_iter() + .max_by_key(|x| x.value) + .map(|x| x.value) } + .unwrap_or(0); + // we use `depth` and divided `self_value` by it, idk if this is worth it + // we should really setup some sort of ELO rating for each commit, playing them against + // each other or something, could be cool to benchmark these more subjective things, not + // just performance (cycles/time wise) + self.arena[idx].value.value = + self.arena[idx].self_value as i32 + children_value; + } + ChildrenEvalMethod::MinMaxProb => { + let children_value = if self.arena[idx].color == self.agent_color { + // get best (for the adversary) enemy play + // this assumes the adversary is playing optimally + + children_values.iter().min() + } else { + children_values.iter().max() + } + .cloned() + .unwrap_or(Default::default()); + // we use `depth` and divided `self_value` by it, idk if this is worth it + // we should really setup some sort of ELO rating for each commit, playing them against + // each other or something, could be cool to benchmark these more subjective things, not + // just performance (cycles/time wise) + + let wins = children_values.iter().map(|x| x.wins).sum(); + let losses = children_values.iter().map(|x| x.losses).sum(); + + let final_value = MoveValueStats { + wins, + losses, + value: self.arena[idx].self_value as i32 + children_value.value, + }; + self.arena[idx].value = final_value; } } - .unwrap_or(0); - - // we use `depth` and divided `self_value` by it, idk if this is worth it - // we should really setup some sort of ELO rating for each commit, playing them against - // each other or something, could be cool to benchmark these more subjective things, not - // just performance (cycles/time wise) - self.arena[idx].value = self.arena[idx].self_value as i32 + children_value; } } } diff --git a/src/logic/move.rs b/src/logic/move.rs index 08f65cf..66f80f7 100644 --- a/src/logic/move.rs +++ b/src/logic/move.rs @@ -1,9 +1,48 @@ +use std::cmp::Ordering; + use super::board_value::BoardValueMap; use crate::repr::{Board, CoordPair, Piece, Winner}; use allocative::Allocative; pub type MoveCoord = Option; +#[derive(Clone, Copy, Debug, Allocative, PartialEq, Eq, Default)] +pub struct MoveValueStats { + pub wins: u16, + pub losses: u16, + pub value: i32, +} + +impl MoveValueStats { + pub fn chance_win(&self) -> f32 { + self.wins as f32 / (self.losses + self.wins) as f32 + } +} + +impl PartialOrd for MoveValueStats { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for MoveValueStats { + fn cmp(&self, other: &Self) -> Ordering { + if self.wins != 0 || self.losses != 0 || other.wins != 0 || other.losses != 0 { + let s_cw = self.chance_win(); + let o_cw = other.chance_win(); + if s_cw > o_cw { + Ordering::Greater + } else if o_cw > s_cw { + Ordering::Less + } else { + Ordering::Equal + } + } else { + self.value.cmp(&other.value) + } + } +} + #[derive(Clone, Debug, Allocative)] pub struct Move { /// Coordinates (i, j) of the move (if it exists) @@ -23,7 +62,7 @@ pub struct Move { pub children: Vec, /// Value of this move (including children) - pub value: i32, + pub value: MoveValueStats, /// What is the inherit value of this move (not including children) pub self_value: i16, @@ -50,7 +89,11 @@ impl Move { winner: board.game_winner(), parent: None, children: Vec::new(), - value: i32::MIN, + value: MoveValueStats { + wins: 0, + losses: 0, + value: 0, + }, color, is_trimmed: false, self_value: 0,