initial implementation of MoveValueStats

2025-04-23 10:46:19 -04:00 · 2025-04-23 10:46:19 -04:00 · 23e7ae2822
commit 23e7ae2822
parent c9fda80c81
4 changed files with 166 additions and 30 deletions
--- a/src/elo.rs
+++ b/src/elo.rs
@ -1,5 +1,5 @@
 use crate::{
-    agent::{Agent, RandomAgent},
+    agent::Agent,
    complexagent::ComplexAgent,
    game_inner::GameInner,
    logic::{ChildrenEvalMethod, FutureMoveConfig},
@ -29,7 +29,7 @@ pub fn run() {
        children_eval_method: Default::default(),
    };

-    let configs = [6]
+    let configs = [4, 5, 6]
        .into_iter()
        .map(move |d| FutureMoveConfig {
            max_depth: d,
@ -71,9 +71,11 @@ pub fn run() {
            .to_vec()
        })
        .flat_map(move |prev_c| {
-            [ChildrenEvalMethod::MinMax].map(move |method| FutureMoveConfig {
-                children_eval_method: method,
-                ..prev_c
+            [ChildrenEvalMethod::MinMax, ChildrenEvalMethod::MinMaxProb].map(move |method| {
+                FutureMoveConfig {
+                    children_eval_method: method,
+                    ..prev_c
+                }
            })
        })
        .flat_map(move |prev_c| {
@ -108,7 +110,7 @@ pub fn run() {
                .collect()
        });

-    let mut vec: Vec<(String, AgentMaker)> = configs
+    let vec: Vec<(String, AgentMaker)> = configs
        .into_iter()
        .map(move |config| -> (String, AgentMaker) {
            (
@ -117,14 +119,14 @@ pub fn run() {
            )
        })
        .collect();
-    vec.push((
-        "RandomAgent".to_string(),
-        Box::new(move |piece| Box::new(RandomAgent::new(piece))),
-    ));
+    // vec.push((
+    //     "RandomAgent".to_string(),
+    //     Box::new(move |piece| Box::new(RandomAgent::new(piece))),
+    // ));

    let mut arena = PlayerArena::new(vec);

-    arena.prop_arena(100);
+    arena.prop_arena(1000);

    println!("{}", arena);
 }
@ -287,7 +289,7 @@ impl PlayerArena {
            player_1,
            player_2,
            false,
-            Board::random(rand::random_range(3..=7)),
+            Board::random(rand::random_range(1..=15)),
        )
        .expect("unable to create game")
        .loop_until_result();
--- a/src/logic/future_moves.rs
+++ b/src/logic/future_moves.rs
@ -92,6 +92,8 @@ impl std::fmt::Display for FutureMoveConfig {
 pub enum ChildrenEvalMethod {
    /// Best so far?
    MinMax,
+
+    MinMaxProb,
 }

 impl Default for ChildrenEvalMethod {
@ -212,7 +214,18 @@ impl FutureMoves {
    }

    fn create_move(&self, coord: MoveCoord, board: Board, color: Piece) -> Move {
-        Move::new(coord, board, color, self.agent_color, MoveValueConfig {})
+        Move::new(
+            coord,
+            board,
+            color,
+            self.agent_color,
+            MoveValueConfig {
+                self_value_raw: matches!(
+                    self.config.children_eval_method,
+                    ChildrenEvalMethod::MinMaxProb
+                ),
+            },
+        )
    }

    fn generate_children_raw(&self, parent_idx: usize) -> Vec<Move> {
@ -296,25 +309,53 @@ impl FutureMoves {
                    .map(|&child| self.arena[child].value)
                    .collect::<Vec<_>>();

-                let children_value = match self.config.children_eval_method {
+                match self.config.children_eval_method {
                    ChildrenEvalMethod::MinMax => {
-                        if self.arena[idx].color == self.agent_color {
+                        let child_value = if self.arena[idx].color == self.agent_color {
                            // get best (for the adversary) enemy play
                            // this assumes the adversary is playing optimally

-                            children_values.into_iter().min()
+                            children_values
+                                .into_iter()
+                                .min_by_key(|x| x.value)
+                                .map(|x| x.value)
                        } else {
-                            children_values.into_iter().max()
+                            children_values
+                                .into_iter()
+                                .max_by_key(|x| x.value)
+                                .map(|x| x.value)
                        }
+                        .unwrap_or(0);
+                        // we use `depth` and divided `self_value` by it, idk if this is worth it
+                        // we should really setup some sort of ELO rating for each commit, playing them against
+                        // each other or something, could be cool to benchmark these more subjective things, not
+                        // just performance (cycles/time wise)
+                        self.arena[idx].value.value =
+                            self.arena[idx].self_value as i32 + child_value;
+                    }
+                    ChildrenEvalMethod::MinMaxProb => {
+                        let child_value = if self.arena[idx].color == self.agent_color {
+                            // get best (for the adversary) enemy play
+                            // this assumes the adversary is playing optimally
+
+                            children_values.iter().min()
+                        } else {
+                            children_values.iter().max()
+                        }
+                        .cloned()
+                        .unwrap_or(Default::default());
+                        // we use `depth` and divided `self_value` by it, idk if this is worth it
+                        // we should really setup some sort of ELO rating for each commit, playing them against
+                        // each other or something, could be cool to benchmark these more subjective things, not
+                        // just performance (cycles/time wise)
+                        self.arena[idx]
+                            .value
+                            .populate_self_from_children(&children_values);
+
+                        self.arena[idx].value.value =
+                            self.arena[idx].self_value as i32 + child_value.value;
                    }
                }
-                .unwrap_or(0);
-
-                // we use `depth` and divided `self_value` by it, idk if this is worth it
-                // we should really setup some sort of ELO rating for each commit, playing them against
-                // each other or something, could be cool to benchmark these more subjective things, not
-                // just performance (cycles/time wise)
-                self.arena[idx].value = self.arena[idx].self_value as i32 + children_value;
            }
        }
    }
--- a/src/logic/move.rs
+++ b/src/logic/move.rs
@ -1,9 +1,78 @@
+use std::cmp::Ordering;
+
 use super::board_value::BoardValueMap;
 use crate::repr::{Board, CoordPair, Piece, Winner};
 use allocative::Allocative;

 pub type MoveCoord = Option<CoordPair>;

+#[derive(Clone, Copy, PartialEq, Eq, Allocative, Debug, PartialOrd, Ord)]
+pub enum MVSGameState {
+    Win = 1,
+    Loss = 0,
+    Tie = -1,
+}
+
+#[derive(Clone, Copy, Debug, Allocative, PartialEq, Eq, Default)]
+pub struct MoveValueStats {
+    state: Option<MVSGameState>,
+    wins: u16,
+    losses: u16,
+    pub value: i32,
+}
+
+impl MoveValueStats {
+    fn chance_win(&self) -> Option<f32> {
+        let sum = self.losses + self.wins;
+        if sum == 0 {
+            return None;
+        }
+        Some(self.wins as f32 / sum as f32)
+    }
+
+    pub fn populate_self_from_children(&mut self, others: &[Self]) {
+        let wins = others.iter().map(|x| x.wins).sum::<u16>()
+            + others
+                .iter()
+                .filter(|x| x.state == Some(MVSGameState::Win))
+                .count() as u16;
+        let losses = others.iter().map(|x| x.losses).sum::<u16>()
+            + others
+                .iter()
+                .filter(|x| x.state == Some(MVSGameState::Loss))
+                .count() as u16;
+
+        self.wins = wins;
+        self.losses = losses;
+    }
+}
+
+impl PartialOrd for MoveValueStats {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl Ord for MoveValueStats {
+    fn cmp(&self, other: &Self) -> Ordering {
+        if self.state.is_some() && other.state.is_some() {
+            return self.state.cmp(&other.state);
+        }
+
+        let s_cw = self.chance_win();
+        let o_cw = other.chance_win();
+        if s_cw.is_some() && o_cw.is_some() {
+            if s_cw > o_cw {
+                return Ordering::Greater;
+            } else if o_cw > s_cw {
+                return Ordering::Less;
+            }
+        }
+
+        self.value.cmp(&other.value)
+    }
+}
+
 #[derive(Clone, Debug, Allocative)]
 pub struct Move {
    /// Coordinates (i, j) of the move (if it exists)
@ -23,7 +92,7 @@ pub struct Move {
    pub children: Vec<usize>,

    /// Value of this move (including children)
-    pub value: i32,
+    pub value: MoveValueStats,

    /// What is the inherit value of this move (not including children)
    pub self_value: i16,
@ -35,7 +104,9 @@ pub struct Move {
    pub is_trimmed: bool,
 }

-pub struct MoveValueConfig {}
+pub struct MoveValueConfig {
+    pub self_value_raw: bool,
+}

 impl Move {
    pub fn new(
@ -50,12 +121,34 @@ impl Move {
            winner: board.game_winner(),
            parent: None,
            children: Vec::new(),
-            value: i32::MIN,
+            value: Default::default(),
            color,
            is_trimmed: false,
            self_value: 0,
        };
-        m.self_value = m.compute_self_value(agent_color, &board, mvc);
+
+        // set wins/losses values appropriately
+        match m.winner {
+            Winner::Player(piece) => {
+                if piece == agent_color {
+                    m.value.wins += 1;
+                    m.value.state = Some(MVSGameState::Win);
+                } else {
+                    m.value.losses += 1;
+                    m.value.state = Some(MVSGameState::Loss);
+                }
+            }
+            Winner::Tie => {
+                m.value.state = Some(MVSGameState::Tie);
+            }
+            Winner::None => {}
+        }
+
+        if !mvc.self_value_raw {
+            m.self_value = m.compute_self_value(agent_color, &board, mvc);
+        } else {
+            m.self_value = const { BoardValueMap::weighted() }.board_value(&board, agent_color);
+        }
        m
    }

@ -71,7 +164,6 @@ impl Move {
        }

        // I guess ignore Ties here, don't give them an explicit value,
-
        const { BoardValueMap::weighted() }.board_value(board, agent_color)
    }

--- a/src/repr/board.rs
+++ b/src/repr/board.rs
@ -3,10 +3,11 @@ use allocative::Allocative;
 use rand::seq::IteratorRandom;
 use std::{cmp::Ordering, fmt};

-#[derive(PartialEq, Eq, Copy, Clone, Debug, Allocative)]
+#[derive(PartialEq, Eq, Copy, Clone, Debug, Allocative, Default)]
 pub enum Winner {
    Player(Piece),
    Tie,
+    #[default]
    None,
 }