diff --git a/src/elo.rs b/src/elo.rs index 0303113..8f32845 100644 --- a/src/elo.rs +++ b/src/elo.rs @@ -1,5 +1,5 @@ use crate::{ - agent::{Agent, RandomAgent}, + agent::Agent, complexagent::ComplexAgent, game_inner::GameInner, logic::{ChildrenEvalMethod, FutureMoveConfig}, @@ -29,7 +29,7 @@ pub fn run() { children_eval_method: Default::default(), }; - let configs = [6] + let configs = [4, 5, 6] .into_iter() .map(move |d| FutureMoveConfig { max_depth: d, @@ -71,9 +71,11 @@ pub fn run() { .to_vec() }) .flat_map(move |prev_c| { - [ChildrenEvalMethod::MinMax].map(move |method| FutureMoveConfig { - children_eval_method: method, - ..prev_c + [ChildrenEvalMethod::MinMax, ChildrenEvalMethod::MinMaxProb].map(move |method| { + FutureMoveConfig { + children_eval_method: method, + ..prev_c + } }) }) .flat_map(move |prev_c| { @@ -108,7 +110,7 @@ pub fn run() { .collect() }); - let mut vec: Vec<(String, AgentMaker)> = configs + let vec: Vec<(String, AgentMaker)> = configs .into_iter() .map(move |config| -> (String, AgentMaker) { ( @@ -117,14 +119,14 @@ pub fn run() { ) }) .collect(); - vec.push(( - "RandomAgent".to_string(), - Box::new(move |piece| Box::new(RandomAgent::new(piece))), - )); + // vec.push(( + // "RandomAgent".to_string(), + // Box::new(move |piece| Box::new(RandomAgent::new(piece))), + // )); let mut arena = PlayerArena::new(vec); - arena.prop_arena(100); + arena.prop_arena(1000); println!("{}", arena); } @@ -287,7 +289,7 @@ impl PlayerArena { player_1, player_2, false, - Board::random(rand::random_range(3..=7)), + Board::random(rand::random_range(1..=15)), ) .expect("unable to create game") .loop_until_result(); diff --git a/src/logic/future_moves.rs b/src/logic/future_moves.rs index 93d8e9a..ac03c31 100644 --- a/src/logic/future_moves.rs +++ b/src/logic/future_moves.rs @@ -92,6 +92,8 @@ impl std::fmt::Display for FutureMoveConfig { pub enum ChildrenEvalMethod { /// Best so far? MinMax, + + MinMaxProb, } impl Default for ChildrenEvalMethod { @@ -212,7 +214,18 @@ impl FutureMoves { } fn create_move(&self, coord: MoveCoord, board: Board, color: Piece) -> Move { - Move::new(coord, board, color, self.agent_color, MoveValueConfig {}) + Move::new( + coord, + board, + color, + self.agent_color, + MoveValueConfig { + self_value_raw: matches!( + self.config.children_eval_method, + ChildrenEvalMethod::MinMaxProb + ), + }, + ) } fn generate_children_raw(&self, parent_idx: usize) -> Vec { @@ -296,25 +309,53 @@ impl FutureMoves { .map(|&child| self.arena[child].value) .collect::>(); - let children_value = match self.config.children_eval_method { + match self.config.children_eval_method { ChildrenEvalMethod::MinMax => { - if self.arena[idx].color == self.agent_color { + let child_value = if self.arena[idx].color == self.agent_color { // get best (for the adversary) enemy play // this assumes the adversary is playing optimally - children_values.into_iter().min() + children_values + .into_iter() + .min_by_key(|x| x.value) + .map(|x| x.value) } else { - children_values.into_iter().max() + children_values + .into_iter() + .max_by_key(|x| x.value) + .map(|x| x.value) } + .unwrap_or(0); + // we use `depth` and divided `self_value` by it, idk if this is worth it + // we should really setup some sort of ELO rating for each commit, playing them against + // each other or something, could be cool to benchmark these more subjective things, not + // just performance (cycles/time wise) + self.arena[idx].value.value = + self.arena[idx].self_value as i32 + child_value; + } + ChildrenEvalMethod::MinMaxProb => { + let child_value = if self.arena[idx].color == self.agent_color { + // get best (for the adversary) enemy play + // this assumes the adversary is playing optimally + + children_values.iter().min() + } else { + children_values.iter().max() + } + .cloned() + .unwrap_or(Default::default()); + // we use `depth` and divided `self_value` by it, idk if this is worth it + // we should really setup some sort of ELO rating for each commit, playing them against + // each other or something, could be cool to benchmark these more subjective things, not + // just performance (cycles/time wise) + self.arena[idx] + .value + .populate_self_from_children(&children_values); + + self.arena[idx].value.value = + self.arena[idx].self_value as i32 + child_value.value; } } - .unwrap_or(0); - - // we use `depth` and divided `self_value` by it, idk if this is worth it - // we should really setup some sort of ELO rating for each commit, playing them against - // each other or something, could be cool to benchmark these more subjective things, not - // just performance (cycles/time wise) - self.arena[idx].value = self.arena[idx].self_value as i32 + children_value; } } } diff --git a/src/logic/move.rs b/src/logic/move.rs index 08f65cf..9a1effa 100644 --- a/src/logic/move.rs +++ b/src/logic/move.rs @@ -1,9 +1,78 @@ +use std::cmp::Ordering; + use super::board_value::BoardValueMap; use crate::repr::{Board, CoordPair, Piece, Winner}; use allocative::Allocative; pub type MoveCoord = Option; +#[derive(Clone, Copy, PartialEq, Eq, Allocative, Debug, PartialOrd, Ord)] +pub enum MVSGameState { + Win = 1, + Loss = 0, + Tie = -1, +} + +#[derive(Clone, Copy, Debug, Allocative, PartialEq, Eq, Default)] +pub struct MoveValueStats { + state: Option, + wins: u16, + losses: u16, + pub value: i32, +} + +impl MoveValueStats { + fn chance_win(&self) -> Option { + let sum = self.losses + self.wins; + if sum == 0 { + return None; + } + Some(self.wins as f32 / sum as f32) + } + + pub fn populate_self_from_children(&mut self, others: &[Self]) { + let wins = others.iter().map(|x| x.wins).sum::() + + others + .iter() + .filter(|x| x.state == Some(MVSGameState::Win)) + .count() as u16; + let losses = others.iter().map(|x| x.losses).sum::() + + others + .iter() + .filter(|x| x.state == Some(MVSGameState::Loss)) + .count() as u16; + + self.wins = wins; + self.losses = losses; + } +} + +impl PartialOrd for MoveValueStats { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for MoveValueStats { + fn cmp(&self, other: &Self) -> Ordering { + if self.state.is_some() && other.state.is_some() { + return self.state.cmp(&other.state); + } + + let s_cw = self.chance_win(); + let o_cw = other.chance_win(); + if s_cw.is_some() && o_cw.is_some() { + if s_cw > o_cw { + return Ordering::Greater; + } else if o_cw > s_cw { + return Ordering::Less; + } + } + + self.value.cmp(&other.value) + } +} + #[derive(Clone, Debug, Allocative)] pub struct Move { /// Coordinates (i, j) of the move (if it exists) @@ -23,7 +92,7 @@ pub struct Move { pub children: Vec, /// Value of this move (including children) - pub value: i32, + pub value: MoveValueStats, /// What is the inherit value of this move (not including children) pub self_value: i16, @@ -35,7 +104,9 @@ pub struct Move { pub is_trimmed: bool, } -pub struct MoveValueConfig {} +pub struct MoveValueConfig { + pub self_value_raw: bool, +} impl Move { pub fn new( @@ -50,12 +121,34 @@ impl Move { winner: board.game_winner(), parent: None, children: Vec::new(), - value: i32::MIN, + value: Default::default(), color, is_trimmed: false, self_value: 0, }; - m.self_value = m.compute_self_value(agent_color, &board, mvc); + + // set wins/losses values appropriately + match m.winner { + Winner::Player(piece) => { + if piece == agent_color { + m.value.wins += 1; + m.value.state = Some(MVSGameState::Win); + } else { + m.value.losses += 1; + m.value.state = Some(MVSGameState::Loss); + } + } + Winner::Tie => { + m.value.state = Some(MVSGameState::Tie); + } + Winner::None => {} + } + + if !mvc.self_value_raw { + m.self_value = m.compute_self_value(agent_color, &board, mvc); + } else { + m.self_value = const { BoardValueMap::weighted() }.board_value(&board, agent_color); + } m } @@ -71,7 +164,6 @@ impl Move { } // I guess ignore Ties here, don't give them an explicit value, - const { BoardValueMap::weighted() }.board_value(board, agent_color) } diff --git a/src/repr/board.rs b/src/repr/board.rs index df1815b..1380766 100644 --- a/src/repr/board.rs +++ b/src/repr/board.rs @@ -3,10 +3,11 @@ use allocative::Allocative; use rand::seq::IteratorRandom; use std::{cmp::Ordering, fmt}; -#[derive(PartialEq, Eq, Copy, Clone, Debug, Allocative)] +#[derive(PartialEq, Eq, Copy, Clone, Debug, Allocative, Default)] pub enum Winner { Player(Piece), Tie, + #[default] None, }