initial implementation of MoveValueStats

always use weighted board values
refocus_tree: clear arena on no root
2025-04-23 10:46:19 -04:00 · 2025-04-23 10:18:42 -04:00 · 2025-04-22 15:55:21 -04:00 · 2025-04-22 15:53:50 -04:00 · 2025-04-22 15:52:17 -04:00 · 2025-04-22 15:51:10 -04:00
4 changed files with 122 additions and 112 deletions
--- a/src/elo.rs
+++ b/src/elo.rs
@@ -71,7 +71,7 @@ pub fn run() {
            .to_vec()
        })
        .flat_map(move |prev_c| {
-            [ChildrenEvalMethod::MinMax, ChildrenEvalMethod::MinMaxFlat].map(move |method| {
+            [ChildrenEvalMethod::MinMax, ChildrenEvalMethod::MinMaxProb].map(move |method| {
                FutureMoveConfig {
                    children_eval_method: method,
                    ..prev_c
--- a/src/logic/board_value.rs
+++ b/src/logic/board_value.rs
@@ -33,10 +33,4 @@ impl BoardValueMap {
        ];
        Self(PosMap::from(POSITION_VALUES))
    }
    pub const fn flat() -> Self {
        Self(PosMap::from(
            [[1; Board::SIZE as usize]; Board::SIZE as usize],
        ))
    }
 }
--- a/src/logic/future_moves.rs
+++ b/src/logic/future_moves.rs
@@ -1,3 +1,4 @@
 use super::r#move::{MoveCoord, MoveValueConfig, MoveValueStats};
 use crate::{
    logic::r#move::Move,
    repr::{Board, Piece, Winner},
@@ -15,8 +16,6 @@ use std::{
    },
 };
 use super::r#move::MoveCoord;
 #[derive(Allocative)]
 pub struct FutureMoves {
    /// Arena containing all [`Move`]
@@ -25,9 +24,6 @@ pub struct FutureMoves {
    /// Index of the [`Move`] tree's root node
    current_root: Option<usize>,
    /// Current generated depth of the Arena
    current_depth: usize,
    /// Color w.r.t
    agent_color: Piece,
@@ -94,15 +90,10 @@ impl std::fmt::Display for FutureMoveConfig {
 #[derive(Debug, Clone, Copy, Allocative)]
 #[allow(dead_code)]
 pub enum ChildrenEvalMethod {
    Average,
    AverageDivDepth,
    MinAvgDivDepth,
    /// Best so far?
    MinMax,
-    MinMaxFlat,
+    MinMaxProb,
 }
 impl Default for ChildrenEvalMethod {
@@ -116,7 +107,6 @@ impl FutureMoves {
        Self {
            arena: Vec::new(),
            current_root: None,
            current_depth: 0,
            agent_color,
            config,
            board: Board::new(),
@@ -145,39 +135,34 @@ impl FutureMoves {
        indexes
    }
-    /// Find the current depth of the arena by
+    /// Return the current depth of the tree
-    /// looking at leaf moves and finding the smallest value
+    fn current_depth(&self) -> usize {
    fn determine_current_depth(&self) -> Option<usize> {
        // leaf_moves is sorted from min to max depth
-        self.leaf_moves().first().map(|&i| self.depth_of(i))
+        self.leaf_moves()
            .first()
            .map(|&i| self.depth_of(i))
            .unwrap_or(0) // handle empty trees
    }
    /// Generate children for all children of `nodes`
    /// only `pub` for the sake of benchmarking
    pub fn extend_layers(&mut self) {
-        // recover from partial tree extention
+        let mut leafs = self.leaf_moves().into_iter().collect::<Vec<usize>>();
        if let Some(current_depth) = self.determine_current_depth() {
            self.current_depth = current_depth;
        }
-        for _ in self.current_depth..self.config.max_depth {
+        for _ in self.current_depth()..self.config.max_depth {
            let pstyle_inner = if cfg!(test) || !self.config.print {
                ""
            } else {
                &format!(
                    "Generating children (depth: {}/{}): ({{pos}}/{{len}}) {{per_sec}}",
-                    self.current_depth + 1,
+                    self.current_depth() + 1,
                    self.config.max_depth
                )
            };
            let allowed_size = self.config.max_arena_size - self.arena.len();
            let curr_size = Arc::new(AtomicUsize::new(0));
-            let got = self
+            leafs = leafs
                .leaf_moves()
                .into_iter()
                .filter(|&i| self.depth_of(i) == self.current_depth)
                .collect::<Vec<usize>>()
                .into_par_iter()
                .progress_with_style(ProgressStyle::with_template(pstyle_inner).unwrap())
                .map(|parent_idx| (parent_idx, self.generate_children_raw(parent_idx)))
@@ -189,21 +174,22 @@ impl FutureMoves {
                        true
                    }
                })
-                .collect::<Vec<(usize, Vec<Move>)>>();
+                .collect::<Vec<(usize, Vec<Move>)>>()
                .into_iter()
                .flat_map(|(parent_idx, moves)| {
                    let start_idx = self.arena.len();
                    self.arena.extend(moves);
                    let new_indices = start_idx..self.arena.len();
                    self.arena[parent_idx].children.extend(new_indices.clone());
                    new_indices
                })
                .collect();
            // get total # of generated boards
            let got_len = curr_size.load(Ordering::Acquire);
            got.into_iter().for_each(|(parent_idx, moves)| {
                let start_idx = self.arena.len();
                self.arena.extend(moves);
                let new_indices = start_idx..self.arena.len();
                self.arena[parent_idx].children.extend(new_indices);
            });
            self.prune_bad_children();
            self.current_depth += 1;
            if got_len == allowed_size {
                // arena has hit the upper limit of size permitted
                break;
@@ -228,16 +214,7 @@ impl FutureMoves {
    }
    fn create_move(&self, coord: MoveCoord, board: Board, color: Piece) -> Move {
-        Move::new(
+        Move::new(coord, board, color, self.agent_color, MoveValueConfig {})
            coord,
            board,
            color,
            self.agent_color,
            !matches!(
                self.config.children_eval_method,
                ChildrenEvalMethod::MinMaxFlat
            ),
        )
    }
    fn generate_children_raw(&self, parent_idx: usize) -> Vec<Move> {
@@ -313,7 +290,7 @@ impl FutureMoves {
        let by_depth_vec = self.by_depth(indexes);
        // reversed so we build up the value of the closest (in time) moves from the future
-        for (depth, nodes) in by_depth_vec.into_iter().rev() {
+        for (_depth, nodes) in by_depth_vec.into_iter().rev() {
            for idx in nodes {
                let children_values = self.arena[idx]
                    .children
@@ -321,56 +298,63 @@ impl FutureMoves {
                    .map(|&child| self.arena[child].value)
                    .collect::<Vec<_>>();
-                let children_value = match self.config.children_eval_method {
+                match self.config.children_eval_method {
-                    ChildrenEvalMethod::Average => children_values
+                    ChildrenEvalMethod::MinMax => {
-                        .into_iter()
+                        let children_value = if self.arena[idx].color == self.agent_color {
                        .sum::<i32>()
                        .checked_div(self.arena[idx].children.len() as i32),
                    ChildrenEvalMethod::AverageDivDepth => children_values
                        .into_iter()
                        .sum::<i32>()
                        .checked_div(self.arena[idx].children.len() as i32)
                        .and_then(|x| x.checked_div(depth as i32)),
                    ChildrenEvalMethod::MinAvgDivDepth => {
                        if self.arena[idx].color == self.agent_color {
                            // get best (for the adversary) enemy play
                            // this assumes the adversary is playing optimally
-                            children_values.into_iter().min()
+                            children_values
                                .into_iter()
                                .min_by_key(|x| x.value)
                                .map(|x| x.value)
                        } else {
                            children_values
                                .into_iter()
-                                .sum::<i32>()
+                                .max_by_key(|x| x.value)
-                                .checked_div(self.arena[idx].children.len() as i32)
+                                .map(|x| x.value)
                                .and_then(|x| x.checked_div(depth as i32))
                        }
                        .unwrap_or(0);
                        // we use `depth` and divided `self_value` by it, idk if this is worth it
                        // we should really setup some sort of ELO rating for each commit, playing them against
                        // each other or something, could be cool to benchmark these more subjective things, not
                        // just performance (cycles/time wise)
                        self.arena[idx].value.value =
                            self.arena[idx].self_value as i32 + children_value;
                    }
-
+                    ChildrenEvalMethod::MinMaxProb => {
-                    ChildrenEvalMethod::MinMax | ChildrenEvalMethod::MinMaxFlat => {
+                        let children_value = if self.arena[idx].color == self.agent_color {
                        if self.arena[idx].color == self.agent_color {
                            // get best (for the adversary) enemy play
                            // this assumes the adversary is playing optimally
-                            children_values.into_iter().min()
+                            children_values.iter().min()
                        } else {
-                            children_values.into_iter().max()
+                            children_values.iter().max()
                        }
                        .cloned()
                        .unwrap_or(Default::default());
                        // we use `depth` and divided `self_value` by it, idk if this is worth it
                        // we should really setup some sort of ELO rating for each commit, playing them against
                        // each other or something, could be cool to benchmark these more subjective things, not
                        // just performance (cycles/time wise)
                        let wins = children_values.iter().map(|x| x.wins).sum();
                        let losses = children_values.iter().map(|x| x.losses).sum();
                        let final_value = MoveValueStats {
                            wins,
                            losses,
                            value: self.arena[idx].self_value as i32 + children_value.value,
                        };
                        self.arena[idx].value = final_value;
                    }
                }
                .unwrap_or(0);
                // we use `depth` and divided `self_value` by it, idk if this is worth it
                // we should really setup some sort of ELO rating for each commit, playing them against
                // each other or something, could be cool to benchmark these more subjective things, not
                // just performance (cycles/time wise)
                self.arena[idx].value = self.arena[idx].self_value as i32 + children_value;
            }
        }
    }
    fn move_history(&self, idx: usize) -> Option<Vec<(MoveCoord, Piece)>> {
-        if let Some(root) = self.current_root {
+        self.current_root.and_then(|root| {
            let mut hist = Vec::new();
            let mut current = Some(idx);
@@ -390,23 +374,19 @@ impl FutureMoves {
            }
            Some(hist)
-        } else {
+        })
            None
        }
    }
    fn get_board_from_idx(&self, idx: usize) -> Option<Board> {
-        if let Some(hist) = self.move_history(idx) {
+        self.move_history(idx).and_then(|hist| {
            let mut board = self.board;
            for (m, c) in hist {
                if let Some(m) = m {
-                    board.place(m, c).expect("move would not propegate");
+                    board.place(m, c).ok()?;
                }
            }
            Some(board)
-        } else {
+        })
            None
        }
    }
    /// Return the best move which is a child of `self.current_root`
@@ -461,7 +441,6 @@ impl FutureMoves {
    fn rebuild_from_board(&mut self, board: Board) {
        self.arena = vec![self.create_move(None, board, !self.agent_color)];
        self.current_root = Some(0);
        self.current_depth = 0;
        self.board = board;
    }
@@ -469,7 +448,6 @@ impl FutureMoves {
        let board = self
            .get_board_from_idx(idx)
            .expect("unable to get board at idx");
        self.current_depth -= self.depth_of(idx);
        self.current_root = Some(idx);
        self.board = board;
        self.refocus_tree();
@@ -521,7 +499,7 @@ impl FutureMoves {
    }
    fn prune_bad_children(&mut self) {
-        if self.current_depth < self.config.min_arena_depth || !self.config.do_prune {
+        if self.current_depth() < self.config.min_arena_depth || !self.config.do_prune {
            return;
        }
@@ -530,7 +508,7 @@ impl FutureMoves {
        for (depth, indexes) in self.by_depth(0..self.arena.len()) {
            // TODO! maybe update by_depth every iteration or something?
-            if depth > self.current_depth.saturating_sub(self.config.up_to_minus) {
+            if depth > self.current_depth().saturating_sub(self.config.up_to_minus) {
                return;
            }
@@ -580,6 +558,8 @@ impl FutureMoves {
    /// Rebuilds the Arena based on `self.current_root`, prunes unrelated nodes
    fn refocus_tree(&mut self) {
        let Some(root) = self.current_root else {
            // handle current_root being empty (clear arena and return)
            self.arena.clear();
            return;
        };
--- a/src/logic/move.rs
+++ b/src/logic/move.rs
@@ -1,9 +1,48 @@
 use std::cmp::Ordering;
 use super::board_value::BoardValueMap;
 use crate::repr::{Board, CoordPair, Piece, Winner};
 use allocative::Allocative;
 pub type MoveCoord = Option<CoordPair>;
 #[derive(Clone, Copy, Debug, Allocative, PartialEq, Eq, Default)]
 pub struct MoveValueStats {
    pub wins: u16,
    pub losses: u16,
    pub value: i32,
 }
 impl MoveValueStats {
    pub fn chance_win(&self) -> f32 {
        self.wins as f32 / (self.losses + self.wins) as f32
    }
 }
 impl PartialOrd for MoveValueStats {
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
        Some(self.cmp(other))
    }
 }
 impl Ord for MoveValueStats {
    fn cmp(&self, other: &Self) -> Ordering {
        if self.wins != 0 || self.losses != 0 || other.wins != 0 || other.losses != 0 {
            let s_cw = self.chance_win();
            let o_cw = other.chance_win();
            if s_cw > o_cw {
                Ordering::Greater
            } else if o_cw > s_cw {
                Ordering::Less
            } else {
                Ordering::Equal
            }
        } else {
            self.value.cmp(&other.value)
        }
    }
 }
 #[derive(Clone, Debug, Allocative)]
 pub struct Move {
    /// Coordinates (i, j) of the move (if it exists)
@@ -23,7 +62,7 @@ pub struct Move {
    pub children: Vec<usize>,
    /// Value of this move (including children)
-    pub value: i32,
+    pub value: MoveValueStats,
    /// What is the inherit value of this move (not including children)
    pub self_value: i16,
@@ -35,29 +74,35 @@ pub struct Move {
    pub is_trimmed: bool,
 }
 pub struct MoveValueConfig {}
 impl Move {
    pub fn new(
        coord: MoveCoord,
        board: Board,
        color: Piece,
        agent_color: Piece,
-        use_weighted_bvm: bool,
+        mvc: MoveValueConfig,
    ) -> Self {
        let mut m = Move {
            coord,
            winner: board.game_winner(),
            parent: None,
            children: Vec::new(),
-            value: i32::MIN,
+            value: MoveValueStats {
                wins: 0,
                losses: 0,
                value: 0,
            },
            color,
            is_trimmed: false,
            self_value: 0,
        };
-        m.self_value = m.compute_self_value(agent_color, &board, use_weighted_bvm);
+        m.self_value = m.compute_self_value(agent_color, &board, mvc);
        m
    }
-    fn compute_self_value(&self, agent_color: Piece, board: &Board, use_weighted_bvm: bool) -> i16 {
+    fn compute_self_value(&self, agent_color: Piece, board: &Board, _mvc: MoveValueConfig) -> i16 {
        if self.winner == Winner::Player(!agent_color) {
            // if this board results in the opponent winning, MAJORLY negatively weigh this move
            // NOTE! this branch isn't completely deleted because if so, the bot wouldn't make a move.
@@ -67,19 +112,10 @@ impl Move {
            // results in a win for the agent
            return i16::MAX - 1;
        }
        // else if self.winner == Winner::Tie {
        //     // idk what a Tie should be valued?
        //     return 0;
        // }
        // I guess ignore Ties here, don't give them an explicit value,
        // because even in the case of ties, we want to have a higher score
-        match use_weighted_bvm {
+        const { BoardValueMap::weighted() }.board_value(board, agent_color)
            true => const { BoardValueMap::weighted() },
            false => const { BoardValueMap::flat() },
        }
        .board_value(board, agent_color)
    }
    /// Sort children of the [`Move`] by their self_value in `arena`
Author	SHA1	Message	Date
Simon Gardling	8146c11d48	initial implementation of MoveValueStats	2025-04-23 10:46:19 -04:00
Simon Gardling	c9fda80c81	always use weighted board values	2025-04-23 10:18:42 -04:00
Simon Gardling	1ebfeb9f65	refocus_tree: clear arena on no root	2025-04-22 15:55:21 -04:00
Simon Gardling	ff7cf6cb0d	improve error handling in FutureMoves::get_board_from_idx	2025-04-22 15:53:50 -04:00
Simon Gardling	693b5c1f7c	simplify FutureMoves::move_history	2025-04-22 15:52:17 -04:00
Simon Gardling	9fff000746	simplify FutureMoves::get_board_from_idx	2025-04-22 15:51:10 -04:00
Simon Gardling	cf21f981e5	change handling of current_depth	2025-04-22 15:49:39 -04:00
Simon Gardling	0d81dca8df	improve leaf handling	2025-04-22 15:43:13 -04:00
Simon Gardling	87c386c9a4	format import	2025-04-22 15:30:11 -04:00
Simon Gardling	386c8a342e	trim some ChildrenEvalMethod options	2025-04-20 01:57:57 -04:00