initial implementation of MoveValueStats

2025-04-23 10:46:19 -04:00
8 changed files with 190 additions and 293 deletions
--- a/src/elo.rs
+++ b/src/elo.rs
@@ -2,14 +2,14 @@ use crate::{
    agent::{Agent, RandomAgent},
    complexagent::ComplexAgent,
    game_inner::GameInner,
-    logic::{ChildrenEvalMethod, FutureMoveConfig, FutureMoves},
+    logic::{ChildrenEvalMethod, FutureMoveConfig},
    repr::{Board, Piece, Winner},
 };
-use indicatif::{ProgressBar, ProgressStyle};
+use indicatif::{ParallelProgressIterator, ProgressBar, ProgressDrawTarget, ProgressStyle};
 use rand::seq::SliceRandom;
 use rayon::iter::{IntoParallelIterator, ParallelIterator};
 use skillratings::{
-    glicko2::{confidence_interval, glicko2, Glicko2Rating},
+    elo::{elo, EloConfig, EloRating},
    Outcomes, Rating,
 };
 use std::num::NonZero;
@@ -18,19 +18,18 @@ type AgentMaker = Box<dyn Fn(Piece) -> Box<dyn Agent>>;

 #[allow(dead_code)]
 pub fn run() {
-    let total_memory = 30_000_000_000; // 30 GB
-    let num_threads = std::thread::available_parallelism()
-        .map(NonZero::get)
-        .expect("unable to get number of threads");
-    let mem_per_thread = total_memory / num_threads;
-
    let fmv_base = FutureMoveConfig {
-        max_arena_size: mem_per_thread / FutureMoves::ARENA_ENTRY_SIZE,
+        max_depth: 20,
+        min_arena_depth: 14,
+        top_k_children: 2,
+        up_to_minus: 10,
+        max_arena_size: usize::MAX,
+        do_prune: false,
        print: false,
-        ..Default::default()
+        children_eval_method: Default::default(),
    };

-    let configs = [2, 3, 4, 5, 6, 7, 8]
+    let configs = [6]
        .into_iter()
        .map(move |d| FutureMoveConfig {
            max_depth: d,
@@ -120,45 +119,33 @@ pub fn run() {
            )
        })
        .collect();
-
-    if false {
    vec.push((
        "RandomAgent".to_string(),
        Box::new(move |piece| Box::new(RandomAgent::new(piece))),
    ));
-    }

    let mut arena = PlayerArena::new(vec);

-    arena.prop_arena(500);
+    arena.prop_arena(100);

    println!("{}", arena);
 }

 pub struct PlayerArena {
    /// Name, Creator Function, Elo
-    players: Vec<(String, AgentMaker, Glicko2Rating)>,
+    players: Vec<(String, AgentMaker, EloRating)>,
 }

 impl std::fmt::Display for PlayerArena {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        let mut players_i: Vec<usize> = (0..self.players.len()).collect();
-
-        players_i.sort_by(|&a, &b| {
-            self.players[b]
-                .2
-                .rating()
-                .total_cmp(&self.players[a].2.rating())
-        });
+        players_i.sort_by_key(|&i| -(self.players[i].2.rating() * 100.0) as i64);

        for i in players_i {
-            let conf_interval = confidence_interval(&self.players[i].2);
-
            writeln!(
                f,
-                "({:.2}[+/-{:.2}]): {}",
+                "({:.2}): {}",
                self.players[i].2.rating(),
-                conf_interval.1 - self.players[i].2.rating(),
                self.players[i].0
            )?;
        }
@@ -172,8 +159,9 @@ impl PlayerArena {
        Self {
            players: players
                .into_iter()
-                // All starting ratings should be the default
-                .map(|(a, b)| (a, b, Default::default()))
+                .zip([EloRating::new()].into_iter().cycle())
+                // flatten tuple
+                .map(|((a, b), c)| (a, b, c))
                .collect(),
        }
    }
@@ -184,10 +172,7 @@ impl PlayerArena {
            .map(|&(i, j)| {
                (
                    (i, j),
-                    (
-                        (self.players[i].1)(Piece::Black),
-                        (self.players[j].1)(Piece::White),
-                    ),
+                    Self::create_agents(&self.players[i].1, &self.players[j].1),
                )
            })
            .collect::<Vec<_>>();
@@ -204,6 +189,7 @@ impl PlayerArena {
        // Spawn parallel processing in a dedicated thread
        let processing_thread = {
            let sender = sender.clone();
+            let term = term.clone();

            std::thread::spawn(move || {
                rayon::ThreadPoolBuilder::new()
@@ -217,6 +203,22 @@ impl PlayerArena {

                created_pairs
                    .into_par_iter()
+                    .progress_with({
+                        let a = ProgressBar::new(num as u64).with_style(
+                            ProgressStyle::with_template(
+                                "[{elapsed_precise}] {pos:>7}/{len:7} ETA: {eta}",
+                            )
+                            .expect("invalid ProgressStyle"),
+                        );
+                        a.set_draw_target(ProgressDrawTarget::term(term, 5));
+                        a
+                    })
+                    .progress_with_style(
+                        ProgressStyle::with_template(
+                            "[{elapsed_precise}] {pos:>7}/{len:7} ETA: {eta}",
+                        )
+                        .expect("invalid ProgressStyle"),
+                    )
                    .map(|((i, j), (p1, p2))| (i, j, Self::play_two_inner(p1, p2)))
                    .for_each(|(i, j, o)| {
                        sender.send((i, j, o)).expect("Failed to send result");
@@ -229,29 +231,17 @@ impl PlayerArena {

        // Process results on main thread as they arrive
        let mut received_num = 0;
-        let p = ProgressBar::new(num as u64).with_style(
-            ProgressStyle::with_template("[{elapsed_precise}] {pos:>7}/{len:7} ETA: {eta}")
-                .expect("invalid ProgressStyle"),
-        );
-
        while let Ok((i, j, o)) = receiver.recv() {
            self.process_outcome(i, j, &o);
+            received_num += 1;

-            if received_num > 0 {
-                term.clear_last_lines(self.players.len() + 1)
+            term.clear_last_lines(self.players.len())
                .expect("unable to clear prev lines");
-            }
            term.write_str(format!("{}", self).as_str())
                .expect("unable to write leaderboard");

-            received_num += 1;
-            p.inc(1);
-            // add extra newline after progressbar
-            println!();
-
            // break if all pairs were recieved
            if received_num == num {
-                drop(receiver);
                break;
            }
        }
@@ -263,25 +253,35 @@ impl PlayerArena {
    }

    fn prop_arena(&mut self, n: usize) {
-        let mut games = (0..self.players.len())
+        self.play(
+            &(0..self.players.len())
                .flat_map(|i| {
                    (0..self.players.len())
                        .map(move |j| (i, j))
                        .filter(|(i, j)| i != j)
+                        .collect::<Vec<_>>()
                })
                .collect::<Vec<_>>()
-            .repeat(n);
-        games.shuffle(&mut rand::rng());
-        self.play(&games);
+                .repeat(n),
+        );
    }

    fn process_outcome(&mut self, player1: usize, player2: usize, outcome: &Outcomes) {
-        (self.players[player1].2, self.players[player2].2) = glicko2(
+        let (np1, np2) = elo(
            &self.players[player1].2,
            &self.players[player2].2,
            outcome,
-            &Default::default(),
+            &EloConfig::new(),
        );
+        self.players[player1].2 = np1;
+        self.players[player2].2 = np2;
+    }
+
+    fn create_agents(
+        player_1_fn: &AgentMaker,
+        player_2_fn: &AgentMaker,
+    ) -> (Box<dyn Agent>, Box<dyn Agent>) {
+        (player_1_fn(Piece::Black), player_2_fn(Piece::White))
    }

    fn play_two_inner(player_1: Box<dyn Agent>, player_2: Box<dyn Agent>) -> Outcomes {
@@ -289,8 +289,7 @@ impl PlayerArena {
            player_1,
            player_2,
            false,
-            // Board::random(rand::random_range(4..=15)),
-            Board::STARTING_POSITION,
+            Board::random(rand::random_range(3..=7)),
        )
        .expect("unable to create game")
        .loop_until_result();
--- a/src/logic/future_moves.rs
+++ b/src/logic/future_moves.rs
@@ -1,4 +1,4 @@
-use super::r#move::{MoveCoord, MoveValueConfig};
+use super::r#move::{MoveCoord, MoveValueConfig, MoveValueStats};
 use crate::{
    logic::r#move::Move,
    repr::{Board, Piece, Winner},
@@ -34,7 +34,7 @@ pub struct FutureMoves {
    board: Board,
 }

-#[derive(Copy, Clone, Allocative, Default)]
+#[derive(Copy, Clone, Allocative)]
 pub struct FutureMoveConfig {
    /// Max depth of that we should try and traverse
    pub max_depth: usize,
@@ -87,17 +87,21 @@ impl std::fmt::Display for FutureMoveConfig {
    }
 }

-#[derive(Debug, Clone, Copy, Allocative, Default)]
+#[derive(Debug, Clone, Copy, Allocative)]
 #[allow(dead_code)]
 pub enum ChildrenEvalMethod {
    /// Best so far?
-    // #[default]
    MinMax,

-    #[default]
    MinMaxProb,
 }

+impl Default for ChildrenEvalMethod {
+    fn default() -> Self {
+        Self::MinMax
+    }
+}
+
 impl FutureMoves {
    pub const fn new(agent_color: Piece, config: FutureMoveConfig) -> Self {
        Self {
@@ -109,9 +113,6 @@ impl FutureMoves {
        }
    }

-    pub const ARENA_ENTRY_SIZE: usize =
-        size_of::<Move>() + size_of::<usize>() * (Board::AREA.0 as usize / 4);
-
    /// Return the length of the Arena
    pub fn arena_len(&self) -> usize {
        self.arena.len()
@@ -296,7 +297,33 @@ impl FutureMoves {
                    .iter()
                    .map(|&child| self.arena[child].value)
                    .collect::<Vec<_>>();
-                let child_value = if self.arena[idx].color == self.agent_color {
+
+                match self.config.children_eval_method {
+                    ChildrenEvalMethod::MinMax => {
+                        let children_value = if self.arena[idx].color == self.agent_color {
+                            // get best (for the adversary) enemy play
+                            // this assumes the adversary is playing optimally
+
+                            children_values
+                                .into_iter()
+                                .min_by_key(|x| x.value)
+                                .map(|x| x.value)
+                        } else {
+                            children_values
+                                .into_iter()
+                                .max_by_key(|x| x.value)
+                                .map(|x| x.value)
+                        }
+                        .unwrap_or(0);
+                        // we use `depth` and divided `self_value` by it, idk if this is worth it
+                        // we should really setup some sort of ELO rating for each commit, playing them against
+                        // each other or something, could be cool to benchmark these more subjective things, not
+                        // just performance (cycles/time wise)
+                        self.arena[idx].value.value =
+                            self.arena[idx].self_value as i32 + children_value;
+                    }
+                    ChildrenEvalMethod::MinMaxProb => {
+                        let children_value = if self.arena[idx].color == self.agent_color {
                            // get best (for the adversary) enemy play
                            // this assumes the adversary is playing optimally

@@ -306,19 +333,20 @@ impl FutureMoves {
                        }
                        .cloned()
                        .unwrap_or(Default::default());
-                self.arena[idx].value = self.arena[idx].self_value;
+                        // we use `depth` and divided `self_value` by it, idk if this is worth it
+                        // we should really setup some sort of ELO rating for each commit, playing them against
+                        // each other or something, could be cool to benchmark these more subjective things, not
+                        // just performance (cycles/time wise)

-                match self.config.children_eval_method {
-                    ChildrenEvalMethod::MinMax => {
-                        self.arena[idx].value.value += child_value.value;
-                        self.arena[idx].value.set_state(child_value.state());
-                    }
-                    ChildrenEvalMethod::MinMaxProb => {
-                        self.arena[idx]
-                            .value
-                            .populate_self_from_children(&children_values);
+                        let wins = children_values.iter().map(|x| x.wins).sum();
+                        let losses = children_values.iter().map(|x| x.losses).sum();

-                        self.arena[idx].value.value += child_value.value;
+                        let final_value = MoveValueStats {
+                            wins,
+                            losses,
+                            value: self.arena[idx].self_value as i32 + children_value.value,
+                        };
+                        self.arena[idx].value = final_value;
                    }
                }
            }
@@ -364,15 +392,12 @@ impl FutureMoves {
    /// Return the best move which is a child of `self.current_root`
    pub fn best_move(&self) -> Option<MoveCoord> {
        self.current_root
-            .and_then(|x| match self.config.children_eval_method {
-                ChildrenEvalMethod::MinMax => self.arena[x]
+            .and_then(|x| {
+                self.arena[x]
                    .children
                    .iter()
-                    .max_by_key(|&&idx| self.arena[idx].value),
-                ChildrenEvalMethod::MinMaxProb => self.arena[x]
-                    .children
-                    .iter()
-                    .max_by_key(|&&idx| self.arena[idx].value),
+                    // this would be considered `minimax`
+                    .max_by_key(|&&idx| self.arena[idx].value)
            })
            .inspect(|&&x| {
                assert_eq!(
--- a/src/logic/mod.rs
+++ b/src/logic/mod.rs
@@ -1,6 +1,5 @@
 mod board_value;
 mod future_moves;
 mod r#move;
-mod mvs;
 pub use future_moves::{ChildrenEvalMethod, FutureMoveConfig, FutureMoves};
 pub use r#move::MoveCoord;
--- a/src/logic/move.rs
+++ b/src/logic/move.rs
@@ -1,12 +1,48 @@
-use super::{
-    board_value::BoardValueMap,
-    mvs::{MVSGameState, MoveValueStats},
-};
+use std::cmp::Ordering;
+
+use super::board_value::BoardValueMap;
 use crate::repr::{Board, CoordPair, Piece, Winner};
 use allocative::Allocative;

 pub type MoveCoord = Option<CoordPair>;

+#[derive(Clone, Copy, Debug, Allocative, PartialEq, Eq, Default)]
+pub struct MoveValueStats {
+    pub wins: u16,
+    pub losses: u16,
+    pub value: i32,
+}
+
+impl MoveValueStats {
+    pub fn chance_win(&self) -> f32 {
+        self.wins as f32 / (self.losses + self.wins) as f32
+    }
+}
+
+impl PartialOrd for MoveValueStats {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl Ord for MoveValueStats {
+    fn cmp(&self, other: &Self) -> Ordering {
+        if self.wins != 0 || self.losses != 0 || other.wins != 0 || other.losses != 0 {
+            let s_cw = self.chance_win();
+            let o_cw = other.chance_win();
+            if s_cw > o_cw {
+                Ordering::Greater
+            } else if o_cw > s_cw {
+                Ordering::Less
+            } else {
+                Ordering::Equal
+            }
+        } else {
+            self.value.cmp(&other.value)
+        }
+    }
+}
+
 #[derive(Clone, Debug, Allocative)]
 pub struct Move {
    /// Coordinates (i, j) of the move (if it exists)
@@ -29,7 +65,7 @@ pub struct Move {
    pub value: MoveValueStats,

    /// What is the inherit value of this move (not including children)
-    pub self_value: MoveValueStats,
+    pub self_value: i16,

    /// Which color made a move on this move?
    pub color: Piece,
@@ -46,39 +82,42 @@ impl Move {
        board: Board,
        color: Piece,
        agent_color: Piece,
-        _: MoveValueConfig,
+        mvc: MoveValueConfig,
    ) -> Self {
        let mut m = Move {
            coord,
            winner: board.game_winner(),
            parent: None,
            children: Vec::new(),
-            value: Default::default(),
+            value: MoveValueStats {
+                wins: 0,
+                losses: 0,
+                value: 0,
+            },
            color,
            is_trimmed: false,
-            self_value: Default::default(),
+            self_value: 0,
        };
-
-        // set wins/losses values appropriately
-        match m.winner {
-            Winner::Player(piece) => {
-                if piece == agent_color {
-                    m.self_value.set_state(Some(MVSGameState::Win));
-                } else {
-                    m.self_value.set_state(Some(MVSGameState::Loss));
-                }
-            }
-            Winner::Tie => {
-                m.self_value.set_state(Some(MVSGameState::Tie));
-            }
-            Winner::None => {}
-        }
-
-        m.self_value.value =
-            const { BoardValueMap::weighted() }.board_value(&board, agent_color) as i32;
+        m.self_value = m.compute_self_value(agent_color, &board, mvc);
        m
    }

+    fn compute_self_value(&self, agent_color: Piece, board: &Board, _mvc: MoveValueConfig) -> i16 {
+        if self.winner == Winner::Player(!agent_color) {
+            // if this board results in the opponent winning, MAJORLY negatively weigh this move
+            // NOTE! this branch isn't completely deleted because if so, the bot wouldn't make a move.
+            // We shouldn't prune branches because we still need to always react to the opponent's moves
+            return i16::MIN + 1;
+        } else if self.winner == Winner::Player(agent_color) {
+            // results in a win for the agent
+            return i16::MAX - 1;
+        }
+
+        // I guess ignore Ties here, don't give them an explicit value,
+
+        const { BoardValueMap::weighted() }.board_value(board, agent_color)
+    }
+
    /// Sort children of the [`Move`] by their self_value in `arena`
    pub fn sort_children(&mut self, arena: &[Move]) {
        self.children.sort_by(|&a, &b| {
--- a/src/logic/mvs.rs
+++ b/src/logic/mvs.rs
@@ -1,164 +0,0 @@
-use allocative::Allocative;
-use std::cmp::Ordering;
-
-#[derive(Clone, Copy, PartialEq, Eq, Allocative, Debug, PartialOrd, Ord)]
-pub enum MVSGameState {
-    Win = 1,
-    Tie = 0,
-    Loss = -1,
-}
-
-#[derive(Clone, Copy, Debug, Allocative, PartialEq, Eq, Default)]
-pub struct MoveValueStats {
-    state: Option<MVSGameState>,
-    wins: u16,
-    losses: u16,
-    ties: u16,
-    pub value: i32,
-}
-
-impl MoveValueStats {
-    #[cfg(test)]
-    pub fn new_from_outcomes(wins: u16, losses: u16, ties: u16) -> Self {
-        Self {
-            wins,
-            losses,
-            ties,
-            ..Default::default()
-        }
-    }
-
-    #[cfg(test)]
-    pub fn new_from_value(value: i32) -> Self {
-        Self {
-            value,
-            ..Default::default()
-        }
-    }
-
-    #[cfg(test)]
-    pub fn new_from_state(state: Option<MVSGameState>) -> Self {
-        Self {
-            state,
-            ..Default::default()
-        }
-    }
-
-    fn chance_win(&self) -> Option<f32> {
-        let sum = self.losses + self.wins + self.ties;
-        if 20 > sum {
-            return None;
-        }
-        Some(self.wins as f32 / sum as f32)
-    }
-
-    pub const fn set_state(&mut self, state: Option<MVSGameState>) {
-        self.state = state;
-    }
-
-    pub const fn state(&self) -> Option<MVSGameState> {
-        self.state
-    }
-
-    pub fn populate_self_from_children(&mut self, others: &[Self]) {
-        (self.wins, self.losses, self.ties) =
-            others.iter().fold((0, 0, 0), |(wins, losses, ties), x| {
-                (
-                    wins + x.wins + (x.state == Some(MVSGameState::Win)) as u16,
-                    losses + x.losses + (x.state == Some(MVSGameState::Loss)) as u16,
-                    ties + x.ties + (x.state == Some(MVSGameState::Tie)) as u16,
-                )
-            });
-    }
-}
-
-impl PartialOrd for MoveValueStats {
-    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
-        Some(self.cmp(other))
-    }
-}
-
-impl Ord for MoveValueStats {
-    fn cmp(&self, other: &Self) -> Ordering {
-        if self.state.is_some() || other.state.is_some() {
-            return self.state.cmp(&other.state);
-        }
-
-        let (s_cw, o_cw) = (self.chance_win(), other.chance_win());
-        if s_cw.is_some() || o_cw.is_some() {
-            if s_cw > o_cw {
-                return Ordering::Greater;
-            } else if o_cw > s_cw {
-                return Ordering::Less;
-            }
-        }
-
-        self.value.cmp(&other.value)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn two_prob() {
-        let one = MoveValueStats::new_from_outcomes(100, 40, 0);
-
-        let two = MoveValueStats::new_from_outcomes(40, 60, 0);
-        assert!(one > two);
-    }
-
-    #[test]
-    fn one_prob_one_non() {
-        let one = MoveValueStats::new_from_outcomes(100, 4, 0);
-        let two = MoveValueStats::new_from_value(10);
-        assert!(one > two);
-    }
-
-    #[test]
-    fn one_prob_one_win() {
-        let one = MoveValueStats::new_from_outcomes(100, 4, 0);
-        let two = MoveValueStats::new_from_state(Some(MVSGameState::Win));
-        assert!(one < two);
-    }
-
-    #[test]
-    fn two_prob_zero() {
-        let one = MoveValueStats::new_from_outcomes(100, 0, 0);
-        let two = MoveValueStats::new_from_outcomes(0, 60, 0);
-        assert!(one > two);
-    }
-
-    #[test]
-    fn test_children_pop() {
-        let mut a = MoveValueStats::new_from_value(0);
-
-        let children = vec![
-            MoveValueStats::new_from_outcomes(1, 0, 0),
-            MoveValueStats::new_from_outcomes(0, 2, 0),
-            MoveValueStats::new_from_outcomes(0, 0, 3),
-        ];
-        a.populate_self_from_children(&children);
-        assert_eq!(a.wins, 1, "Wins should be 1");
-        assert_eq!(a.losses, 2, "Losses should be 2");
-        assert_eq!(a.ties, 3, "Ties should be 3");
-    }
-
-    #[test]
-    fn test_children_pop_state() {
-        let mut a = MoveValueStats::new_from_value(0);
-
-        let children = vec![
-            MoveValueStats::new_from_state(Some(MVSGameState::Win)),
-            MoveValueStats::new_from_state(Some(MVSGameState::Win)),
-            MoveValueStats::new_from_state(Some(MVSGameState::Loss)),
-            MoveValueStats::new_from_state(Some(MVSGameState::Tie)),
-            MoveValueStats::new_from_state(Some(MVSGameState::Tie)),
-        ];
-        a.populate_self_from_children(&children);
-        assert_eq!(a.wins, 2, "Wins should be 2");
-        assert_eq!(a.losses, 1, "Losses should be 1");
-        assert_eq!(a.ties, 2, "Ties should be 2");
-    }
-}
--- a/src/main.rs
+++ b/src/main.rs
@@ -39,7 +39,7 @@ fn main() {
        min_arena_depth: 14,
        top_k_children: 2,
        up_to_minus: 10,
-        max_arena_size: 50_000_000,
+        max_arena_size: 200_000_000,
        do_prune: false,
        print: true,
        children_eval_method: Default::default(),
--- a/src/repr/board.rs
+++ b/src/repr/board.rs
@@ -3,11 +3,10 @@ use allocative::Allocative;
 use rand::seq::IteratorRandom;
 use std::{cmp::Ordering, fmt};

-#[derive(PartialEq, Eq, Copy, Clone, Debug, Allocative, Default)]
+#[derive(PartialEq, Eq, Copy, Clone, Debug, Allocative)]
 pub enum Winner {
    Player(Piece),
    Tie,
-    #[default]
    None,
 }

--- a/src/repr/pos_map.rs
+++ b/src/repr/pos_map.rs
@@ -14,12 +14,12 @@ impl<T: Copy> PosMap<T> {
        Self(MaybeUninit::zeroed().assume_init())
    }

-    pub const fn from(mut v: [[T; Board::SIZE as usize]; Board::SIZE as usize]) -> Self {
+    pub const fn from(v: [[T; Board::SIZE as usize]; Board::SIZE as usize]) -> Self {
        let mut n = unsafe { Self::uninit() };

        const_for!(i in 0..Board::SIZE => {
            const_for!(j in 0..Board::SIZE => {
-                std::mem::swap(n.get_mut(CoordPair::from_axes(i, j)), &mut v[i as usize][j as usize]);
+                n.set(CoordPair::from_axes(i, j), v[i as usize][j as usize]);
            });
        });
        n