initial implementation of MoveValueStats

always use weighted board values
refocus_tree: clear arena on no root
2025-04-23 10:46:19 -04:00 · 2025-04-23 10:18:42 -04:00 · 2025-04-22 15:55:21 -04:00 · 2025-04-22 15:53:50 -04:00 · 2025-04-22 15:52:17 -04:00 · 2025-04-22 15:51:10 -04:00
4 changed files with 122 additions and 112 deletions
--- a/src/elo.rs
+++ b/src/elo.rs
@@ -71,7 +71,7 @@ pub fn run() {
            .to_vec()
        })
        .flat_map(move |prev_c| {
-            [ChildrenEvalMethod::MinMax, ChildrenEvalMethod::MinMaxFlat].map(move |method| {
+            [ChildrenEvalMethod::MinMax, ChildrenEvalMethod::MinMaxProb].map(move |method| {
                FutureMoveConfig {
                    children_eval_method: method,
                    ..prev_c
--- a/src/logic/board_value.rs
+++ b/src/logic/board_value.rs
@@ -33,10 +33,4 @@ impl BoardValueMap {
        ];
        Self(PosMap::from(POSITION_VALUES))
    }
-
-    pub const fn flat() -> Self {
-        Self(PosMap::from(
-            [[1; Board::SIZE as usize]; Board::SIZE as usize],
-        ))
-    }
 }
--- a/src/logic/future_moves.rs
+++ b/src/logic/future_moves.rs
@@ -1,3 +1,4 @@
+use super::r#move::{MoveCoord, MoveValueConfig, MoveValueStats};
 use crate::{
    logic::r#move::Move,
    repr::{Board, Piece, Winner},
@@ -15,8 +16,6 @@ use std::{
    },
 };

-use super::r#move::MoveCoord;
-
 #[derive(Allocative)]
 pub struct FutureMoves {
    /// Arena containing all [`Move`]
@@ -25,9 +24,6 @@ pub struct FutureMoves {
    /// Index of the [`Move`] tree's root node
    current_root: Option<usize>,

-    /// Current generated depth of the Arena
-    current_depth: usize,
-
    /// Color w.r.t
    agent_color: Piece,

@@ -94,15 +90,10 @@ impl std::fmt::Display for FutureMoveConfig {
 #[derive(Debug, Clone, Copy, Allocative)]
 #[allow(dead_code)]
 pub enum ChildrenEvalMethod {
-    Average,
-    AverageDivDepth,
-
-    MinAvgDivDepth,
-
    /// Best so far?
    MinMax,

-    MinMaxFlat,
+    MinMaxProb,
 }

 impl Default for ChildrenEvalMethod {
@@ -116,7 +107,6 @@ impl FutureMoves {
        Self {
            arena: Vec::new(),
            current_root: None,
-            current_depth: 0,
            agent_color,
            config,
            board: Board::new(),
@@ -145,39 +135,34 @@ impl FutureMoves {
        indexes
    }

-    /// Find the current depth of the arena by
-    /// looking at leaf moves and finding the smallest value
-    fn determine_current_depth(&self) -> Option<usize> {
+    /// Return the current depth of the tree
+    fn current_depth(&self) -> usize {
        // leaf_moves is sorted from min to max depth
-        self.leaf_moves().first().map(|&i| self.depth_of(i))
+        self.leaf_moves()
+            .first()
+            .map(|&i| self.depth_of(i))
+            .unwrap_or(0) // handle empty trees
    }

    /// Generate children for all children of `nodes`
    /// only `pub` for the sake of benchmarking
    pub fn extend_layers(&mut self) {
-        // recover from partial tree extention
-        if let Some(current_depth) = self.determine_current_depth() {
-            self.current_depth = current_depth;
-        }
+        let mut leafs = self.leaf_moves().into_iter().collect::<Vec<usize>>();

-        for _ in self.current_depth..self.config.max_depth {
+        for _ in self.current_depth()..self.config.max_depth {
            let pstyle_inner = if cfg!(test) || !self.config.print {
                ""
            } else {
                &format!(
                    "Generating children (depth: {}/{}): ({{pos}}/{{len}}) {{per_sec}}",
-                    self.current_depth + 1,
+                    self.current_depth() + 1,
                    self.config.max_depth
                )
            };

            let allowed_size = self.config.max_arena_size - self.arena.len();
            let curr_size = Arc::new(AtomicUsize::new(0));
-            let got = self
-                .leaf_moves()
-                .into_iter()
-                .filter(|&i| self.depth_of(i) == self.current_depth)
-                .collect::<Vec<usize>>()
+            leafs = leafs
                .into_par_iter()
                .progress_with_style(ProgressStyle::with_template(pstyle_inner).unwrap())
                .map(|parent_idx| (parent_idx, self.generate_children_raw(parent_idx)))
@@ -189,21 +174,22 @@ impl FutureMoves {
                        true
                    }
                })
-                .collect::<Vec<(usize, Vec<Move>)>>();
+                .collect::<Vec<(usize, Vec<Move>)>>()
+                .into_iter()
+                .flat_map(|(parent_idx, moves)| {
+                    let start_idx = self.arena.len();
+                    self.arena.extend(moves);
+
+                    let new_indices = start_idx..self.arena.len();
+                    self.arena[parent_idx].children.extend(new_indices.clone());
+                    new_indices
+                })
+                .collect();

            // get total # of generated boards
            let got_len = curr_size.load(Ordering::Acquire);

-            got.into_iter().for_each(|(parent_idx, moves)| {
-                let start_idx = self.arena.len();
-                self.arena.extend(moves);
-
-                let new_indices = start_idx..self.arena.len();
-                self.arena[parent_idx].children.extend(new_indices);
-            });
-
            self.prune_bad_children();
-            self.current_depth += 1;
            if got_len == allowed_size {
                // arena has hit the upper limit of size permitted
                break;
@@ -228,16 +214,7 @@ impl FutureMoves {
    }

    fn create_move(&self, coord: MoveCoord, board: Board, color: Piece) -> Move {
-        Move::new(
-            coord,
-            board,
-            color,
-            self.agent_color,
-            !matches!(
-                self.config.children_eval_method,
-                ChildrenEvalMethod::MinMaxFlat
-            ),
-        )
+        Move::new(coord, board, color, self.agent_color, MoveValueConfig {})
    }

    fn generate_children_raw(&self, parent_idx: usize) -> Vec<Move> {
@@ -313,7 +290,7 @@ impl FutureMoves {
        let by_depth_vec = self.by_depth(indexes);

        // reversed so we build up the value of the closest (in time) moves from the future
-        for (depth, nodes) in by_depth_vec.into_iter().rev() {
+        for (_depth, nodes) in by_depth_vec.into_iter().rev() {
            for idx in nodes {
                let children_values = self.arena[idx]
                    .children
@@ -321,56 +298,63 @@ impl FutureMoves {
                    .map(|&child| self.arena[child].value)
                    .collect::<Vec<_>>();

-                let children_value = match self.config.children_eval_method {
-                    ChildrenEvalMethod::Average => children_values
-                        .into_iter()
-                        .sum::<i32>()
-                        .checked_div(self.arena[idx].children.len() as i32),
-
-                    ChildrenEvalMethod::AverageDivDepth => children_values
-                        .into_iter()
-                        .sum::<i32>()
-                        .checked_div(self.arena[idx].children.len() as i32)
-                        .and_then(|x| x.checked_div(depth as i32)),
-                    ChildrenEvalMethod::MinAvgDivDepth => {
-                        if self.arena[idx].color == self.agent_color {
+                match self.config.children_eval_method {
+                    ChildrenEvalMethod::MinMax => {
+                        let children_value = if self.arena[idx].color == self.agent_color {
                            // get best (for the adversary) enemy play
                            // this assumes the adversary is playing optimally

-                            children_values.into_iter().min()
+                            children_values
+                                .into_iter()
+                                .min_by_key(|x| x.value)
+                                .map(|x| x.value)
                        } else {
                            children_values
                                .into_iter()
-                                .sum::<i32>()
-                                .checked_div(self.arena[idx].children.len() as i32)
-                                .and_then(|x| x.checked_div(depth as i32))
+                                .max_by_key(|x| x.value)
+                                .map(|x| x.value)
                        }
+                        .unwrap_or(0);
+                        // we use `depth` and divided `self_value` by it, idk if this is worth it
+                        // we should really setup some sort of ELO rating for each commit, playing them against
+                        // each other or something, could be cool to benchmark these more subjective things, not
+                        // just performance (cycles/time wise)
+                        self.arena[idx].value.value =
+                            self.arena[idx].self_value as i32 + children_value;
                    }
-
-                    ChildrenEvalMethod::MinMax | ChildrenEvalMethod::MinMaxFlat => {
-                        if self.arena[idx].color == self.agent_color {
+                    ChildrenEvalMethod::MinMaxProb => {
+                        let children_value = if self.arena[idx].color == self.agent_color {
                            // get best (for the adversary) enemy play
                            // this assumes the adversary is playing optimally

-                            children_values.into_iter().min()
+                            children_values.iter().min()
                        } else {
-                            children_values.into_iter().max()
+                            children_values.iter().max()
                        }
+                        .cloned()
+                        .unwrap_or(Default::default());
+                        // we use `depth` and divided `self_value` by it, idk if this is worth it
+                        // we should really setup some sort of ELO rating for each commit, playing them against
+                        // each other or something, could be cool to benchmark these more subjective things, not
+                        // just performance (cycles/time wise)
+
+                        let wins = children_values.iter().map(|x| x.wins).sum();
+                        let losses = children_values.iter().map(|x| x.losses).sum();
+
+                        let final_value = MoveValueStats {
+                            wins,
+                            losses,
+                            value: self.arena[idx].self_value as i32 + children_value.value,
+                        };
+                        self.arena[idx].value = final_value;
                    }
                }
-                .unwrap_or(0);
-
-                // we use `depth` and divided `self_value` by it, idk if this is worth it
-                // we should really setup some sort of ELO rating for each commit, playing them against
-                // each other or something, could be cool to benchmark these more subjective things, not
-                // just performance (cycles/time wise)
-                self.arena[idx].value = self.arena[idx].self_value as i32 + children_value;
            }
        }
    }

    fn move_history(&self, idx: usize) -> Option<Vec<(MoveCoord, Piece)>> {
-        if let Some(root) = self.current_root {
+        self.current_root.and_then(|root| {
            let mut hist = Vec::new();

            let mut current = Some(idx);
@@ -390,23 +374,19 @@ impl FutureMoves {
            }

            Some(hist)
-        } else {
-            None
-        }
+        })
    }

    fn get_board_from_idx(&self, idx: usize) -> Option<Board> {
-        if let Some(hist) = self.move_history(idx) {
+        self.move_history(idx).and_then(|hist| {
            let mut board = self.board;
            for (m, c) in hist {
                if let Some(m) = m {
-                    board.place(m, c).expect("move would not propegate");
+                    board.place(m, c).ok()?;
                }
            }
            Some(board)
-        } else {
-            None
-        }
+        })
    }

    /// Return the best move which is a child of `self.current_root`
@@ -461,7 +441,6 @@ impl FutureMoves {
    fn rebuild_from_board(&mut self, board: Board) {
        self.arena = vec![self.create_move(None, board, !self.agent_color)];
        self.current_root = Some(0);
-        self.current_depth = 0;
        self.board = board;
    }

@@ -469,7 +448,6 @@ impl FutureMoves {
        let board = self
            .get_board_from_idx(idx)
            .expect("unable to get board at idx");
-        self.current_depth -= self.depth_of(idx);
        self.current_root = Some(idx);
        self.board = board;
        self.refocus_tree();
@@ -521,7 +499,7 @@ impl FutureMoves {
    }

    fn prune_bad_children(&mut self) {
-        if self.current_depth < self.config.min_arena_depth || !self.config.do_prune {
+        if self.current_depth() < self.config.min_arena_depth || !self.config.do_prune {
            return;
        }

@@ -530,7 +508,7 @@ impl FutureMoves {

        for (depth, indexes) in self.by_depth(0..self.arena.len()) {
            // TODO! maybe update by_depth every iteration or something?
-            if depth > self.current_depth.saturating_sub(self.config.up_to_minus) {
+            if depth > self.current_depth().saturating_sub(self.config.up_to_minus) {
                return;
            }

@@ -580,6 +558,8 @@ impl FutureMoves {
    /// Rebuilds the Arena based on `self.current_root`, prunes unrelated nodes
    fn refocus_tree(&mut self) {
        let Some(root) = self.current_root else {
+            // handle current_root being empty (clear arena and return)
+            self.arena.clear();
            return;
        };

--- a/src/logic/move.rs
+++ b/src/logic/move.rs
@@ -1,9 +1,48 @@
+use std::cmp::Ordering;
+
 use super::board_value::BoardValueMap;
 use crate::repr::{Board, CoordPair, Piece, Winner};
 use allocative::Allocative;

 pub type MoveCoord = Option<CoordPair>;

+#[derive(Clone, Copy, Debug, Allocative, PartialEq, Eq, Default)]
+pub struct MoveValueStats {
+    pub wins: u16,
+    pub losses: u16,
+    pub value: i32,
+}
+
+impl MoveValueStats {
+    pub fn chance_win(&self) -> f32 {
+        self.wins as f32 / (self.losses + self.wins) as f32
+    }
+}
+
+impl PartialOrd for MoveValueStats {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl Ord for MoveValueStats {
+    fn cmp(&self, other: &Self) -> Ordering {
+        if self.wins != 0 || self.losses != 0 || other.wins != 0 || other.losses != 0 {
+            let s_cw = self.chance_win();
+            let o_cw = other.chance_win();
+            if s_cw > o_cw {
+                Ordering::Greater
+            } else if o_cw > s_cw {
+                Ordering::Less
+            } else {
+                Ordering::Equal
+            }
+        } else {
+            self.value.cmp(&other.value)
+        }
+    }
+}
+
 #[derive(Clone, Debug, Allocative)]
 pub struct Move {
    /// Coordinates (i, j) of the move (if it exists)
@@ -23,7 +62,7 @@ pub struct Move {
    pub children: Vec<usize>,

    /// Value of this move (including children)
-    pub value: i32,
+    pub value: MoveValueStats,

    /// What is the inherit value of this move (not including children)
    pub self_value: i16,
@@ -35,29 +74,35 @@ pub struct Move {
    pub is_trimmed: bool,
 }

+pub struct MoveValueConfig {}
+
 impl Move {
    pub fn new(
        coord: MoveCoord,
        board: Board,
        color: Piece,
        agent_color: Piece,
-        use_weighted_bvm: bool,
+        mvc: MoveValueConfig,
    ) -> Self {
        let mut m = Move {
            coord,
            winner: board.game_winner(),
            parent: None,
            children: Vec::new(),
-            value: i32::MIN,
+            value: MoveValueStats {
+                wins: 0,
+                losses: 0,
+                value: 0,
+            },
            color,
            is_trimmed: false,
            self_value: 0,
        };
-        m.self_value = m.compute_self_value(agent_color, &board, use_weighted_bvm);
+        m.self_value = m.compute_self_value(agent_color, &board, mvc);
        m
    }

-    fn compute_self_value(&self, agent_color: Piece, board: &Board, use_weighted_bvm: bool) -> i16 {
+    fn compute_self_value(&self, agent_color: Piece, board: &Board, _mvc: MoveValueConfig) -> i16 {
        if self.winner == Winner::Player(!agent_color) {
            // if this board results in the opponent winning, MAJORLY negatively weigh this move
            // NOTE! this branch isn't completely deleted because if so, the bot wouldn't make a move.
@@ -67,19 +112,10 @@ impl Move {
            // results in a win for the agent
            return i16::MAX - 1;
        }
-        // else if self.winner == Winner::Tie {
-        //     // idk what a Tie should be valued?
-        //     return 0;
-        // }

        // I guess ignore Ties here, don't give them an explicit value,
-        // because even in the case of ties, we want to have a higher score

-        match use_weighted_bvm {
-            true => const { BoardValueMap::weighted() },
-            false => const { BoardValueMap::flat() },
-        }
-        .board_value(board, agent_color)
+        const { BoardValueMap::weighted() }.board_value(board, agent_color)
    }

    /// Sort children of the [`Move`] by their self_value in `arena`
Author	SHA1	Message	Date
Simon Gardling	8146c11d48	initial implementation of MoveValueStats	2025-04-23 10:46:19 -04:00
Simon Gardling	c9fda80c81	always use weighted board values	2025-04-23 10:18:42 -04:00
Simon Gardling	1ebfeb9f65	refocus_tree: clear arena on no root	2025-04-22 15:55:21 -04:00
Simon Gardling	ff7cf6cb0d	improve error handling in FutureMoves::get_board_from_idx	2025-04-22 15:53:50 -04:00
Simon Gardling	693b5c1f7c	simplify FutureMoves::move_history	2025-04-22 15:52:17 -04:00
Simon Gardling	9fff000746	simplify FutureMoves::get_board_from_idx	2025-04-22 15:51:10 -04:00
Simon Gardling	cf21f981e5	change handling of current_depth	2025-04-22 15:49:39 -04:00
Simon Gardling	0d81dca8df	improve leaf handling	2025-04-22 15:43:13 -04:00
Simon Gardling	87c386c9a4	format import	2025-04-22 15:30:11 -04:00
Simon Gardling	386c8a342e	trim some ChildrenEvalMethod options	2025-04-20 01:57:57 -04:00