Compare commits

..

22 Commits

Author SHA1 Message Date
4ca6dfc572 commit old stuff 2025-07-24 23:19:09 -07:00
ef2e551085 posmap: use std::mem::swap 2025-04-30 18:08:40 -04:00
b5e73cbbcf elo: improve code quality and add confidence intervals 2025-04-30 13:52:19 -04:00
aeca76fda6 fix typo 2025-04-29 18:43:38 -04:00
111ce718da cleanup minmax impl 2025-04-29 18:32:44 -04:00
9342760d82 simplify populate_self_from_children 2025-04-29 18:20:53 -04:00
967f1f6efb elo: fix display 2025-04-28 20:00:43 -04:00
94831b720b elo: simplify config 2025-04-28 19:45:47 -04:00
e966a3ac9a elo: starting board pos 2025-04-28 19:11:26 -04:00
c50892d79a elo: edit settings 2025-04-28 19:02:40 -04:00
6b9516a510 fix logic and minmax 2025-04-28 18:44:37 -04:00
eedc80e46a fixes 2025-04-28 01:43:24 -04:00
189a5aef58 fix 2025-04-28 01:18:46 -04:00
c54f3a7493 elo test 2025-04-28 01:17:28 -04:00
c3c07fcb28 probabilistic progress 2025-04-28 01:12:15 -04:00
2586b43294 fix mvs 2025-04-24 13:20:07 -04:00
14254e44ba elo: fix display 2 2025-04-24 13:12:02 -04:00
3ad74c90b7 elo: fix display 2025-04-24 13:06:08 -04:00
84b175d844 elo: shuffle prop_arena 2025-04-24 13:02:49 -04:00
57dba8ad1e elo: reduce k value 2025-04-24 13:00:06 -04:00
66be2185f9 add tests for MoveValueStats 2025-04-24 12:58:06 -04:00
e8d05e0f9d test 2025-04-24 11:10:11 -04:00
7 changed files with 279 additions and 233 deletions

View File

@@ -1,15 +1,15 @@
use crate::{
agent::Agent,
agent::{Agent, RandomAgent},
complexagent::ComplexAgent,
game_inner::GameInner,
logic::{ChildrenEvalMethod, FutureMoveConfig},
logic::{ChildrenEvalMethod, FutureMoveConfig, FutureMoves},
repr::{Board, Piece, Winner},
};
use indicatif::{ParallelProgressIterator, ProgressBar, ProgressDrawTarget, ProgressStyle};
use indicatif::{ProgressBar, ProgressStyle};
use rand::seq::SliceRandom;
use rayon::iter::{IntoParallelIterator, ParallelIterator};
use skillratings::{
elo::{elo, EloConfig, EloRating},
glicko2::{confidence_interval, glicko2, Glicko2Rating},
Outcomes, Rating,
};
use std::num::NonZero;
@@ -18,18 +18,19 @@ type AgentMaker = Box<dyn Fn(Piece) -> Box<dyn Agent>>;
#[allow(dead_code)]
pub fn run() {
let total_memory = 30_000_000_000; // 30 GB
let num_threads = std::thread::available_parallelism()
.map(NonZero::get)
.expect("unable to get number of threads");
let mem_per_thread = total_memory / num_threads;
let fmv_base = FutureMoveConfig {
max_depth: 20,
min_arena_depth: 14,
top_k_children: 2,
up_to_minus: 10,
max_arena_size: usize::MAX,
do_prune: false,
max_arena_size: mem_per_thread / FutureMoves::ARENA_ENTRY_SIZE,
print: false,
children_eval_method: Default::default(),
..Default::default()
};
let configs = [4, 5, 6]
let configs = [2, 3, 4, 5, 6, 7, 8]
.into_iter()
.map(move |d| FutureMoveConfig {
max_depth: d,
@@ -110,7 +111,7 @@ pub fn run() {
.collect()
});
let vec: Vec<(String, AgentMaker)> = configs
let mut vec: Vec<(String, AgentMaker)> = configs
.into_iter()
.map(move |config| -> (String, AgentMaker) {
(
@@ -119,33 +120,45 @@ pub fn run() {
)
})
.collect();
// vec.push((
// "RandomAgent".to_string(),
// Box::new(move |piece| Box::new(RandomAgent::new(piece))),
// ));
if false {
vec.push((
"RandomAgent".to_string(),
Box::new(move |piece| Box::new(RandomAgent::new(piece))),
));
}
let mut arena = PlayerArena::new(vec);
arena.prop_arena(1000);
arena.prop_arena(500);
println!("{}", arena);
}
pub struct PlayerArena {
/// Name, Creator Function, Elo
players: Vec<(String, AgentMaker, EloRating)>,
players: Vec<(String, AgentMaker, Glicko2Rating)>,
}
impl std::fmt::Display for PlayerArena {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let mut players_i: Vec<usize> = (0..self.players.len()).collect();
players_i.sort_by_key(|&i| -(self.players[i].2.rating() * 100.0) as i64);
players_i.sort_by(|&a, &b| {
self.players[b]
.2
.rating()
.total_cmp(&self.players[a].2.rating())
});
for i in players_i {
let conf_interval = confidence_interval(&self.players[i].2);
writeln!(
f,
"({:.2}): {}",
"({:.2}[+/-{:.2}]): {}",
self.players[i].2.rating(),
conf_interval.1 - self.players[i].2.rating(),
self.players[i].0
)?;
}
@@ -159,9 +172,8 @@ impl PlayerArena {
Self {
players: players
.into_iter()
.zip([EloRating::new()].into_iter().cycle())
// flatten tuple
.map(|((a, b), c)| (a, b, c))
// All starting ratings should be the default
.map(|(a, b)| (a, b, Default::default()))
.collect(),
}
}
@@ -172,7 +184,10 @@ impl PlayerArena {
.map(|&(i, j)| {
(
(i, j),
Self::create_agents(&self.players[i].1, &self.players[j].1),
(
(self.players[i].1)(Piece::Black),
(self.players[j].1)(Piece::White),
),
)
})
.collect::<Vec<_>>();
@@ -189,7 +204,6 @@ impl PlayerArena {
// Spawn parallel processing in a dedicated thread
let processing_thread = {
let sender = sender.clone();
let term = term.clone();
std::thread::spawn(move || {
rayon::ThreadPoolBuilder::new()
@@ -203,22 +217,6 @@ impl PlayerArena {
created_pairs
.into_par_iter()
.progress_with({
let a = ProgressBar::new(num as u64).with_style(
ProgressStyle::with_template(
"[{elapsed_precise}] {pos:>7}/{len:7} ETA: {eta}",
)
.expect("invalid ProgressStyle"),
);
a.set_draw_target(ProgressDrawTarget::term(term, 5));
a
})
.progress_with_style(
ProgressStyle::with_template(
"[{elapsed_precise}] {pos:>7}/{len:7} ETA: {eta}",
)
.expect("invalid ProgressStyle"),
)
.map(|((i, j), (p1, p2))| (i, j, Self::play_two_inner(p1, p2)))
.for_each(|(i, j, o)| {
sender.send((i, j, o)).expect("Failed to send result");
@@ -231,17 +229,29 @@ impl PlayerArena {
// Process results on main thread as they arrive
let mut received_num = 0;
let p = ProgressBar::new(num as u64).with_style(
ProgressStyle::with_template("[{elapsed_precise}] {pos:>7}/{len:7} ETA: {eta}")
.expect("invalid ProgressStyle"),
);
while let Ok((i, j, o)) = receiver.recv() {
self.process_outcome(i, j, &o);
received_num += 1;
term.clear_last_lines(self.players.len())
.expect("unable to clear prev lines");
if received_num > 0 {
term.clear_last_lines(self.players.len() + 1)
.expect("unable to clear prev lines");
}
term.write_str(format!("{}", self).as_str())
.expect("unable to write leaderboard");
received_num += 1;
p.inc(1);
// add extra newline after progressbar
println!();
// break if all pairs were recieved
if received_num == num {
drop(receiver);
break;
}
}
@@ -253,35 +263,25 @@ impl PlayerArena {
}
fn prop_arena(&mut self, n: usize) {
self.play(
&(0..self.players.len())
.flat_map(|i| {
(0..self.players.len())
.map(move |j| (i, j))
.filter(|(i, j)| i != j)
.collect::<Vec<_>>()
})
.collect::<Vec<_>>()
.repeat(n),
);
let mut games = (0..self.players.len())
.flat_map(|i| {
(0..self.players.len())
.map(move |j| (i, j))
.filter(|(i, j)| i != j)
})
.collect::<Vec<_>>()
.repeat(n);
games.shuffle(&mut rand::rng());
self.play(&games);
}
fn process_outcome(&mut self, player1: usize, player2: usize, outcome: &Outcomes) {
let (np1, np2) = elo(
(self.players[player1].2, self.players[player2].2) = glicko2(
&self.players[player1].2,
&self.players[player2].2,
outcome,
&EloConfig::new(),
&Default::default(),
);
self.players[player1].2 = np1;
self.players[player2].2 = np2;
}
fn create_agents(
player_1_fn: &AgentMaker,
player_2_fn: &AgentMaker,
) -> (Box<dyn Agent>, Box<dyn Agent>) {
(player_1_fn(Piece::Black), player_2_fn(Piece::White))
}
fn play_two_inner(player_1: Box<dyn Agent>, player_2: Box<dyn Agent>) -> Outcomes {
@@ -289,7 +289,8 @@ impl PlayerArena {
player_1,
player_2,
false,
Board::random(rand::random_range(1..=15)),
// Board::random(rand::random_range(4..=15)),
Board::STARTING_POSITION,
)
.expect("unable to create game")
.loop_until_result();

View File

@@ -34,7 +34,7 @@ pub struct FutureMoves {
board: Board,
}
#[derive(Copy, Clone, Allocative)]
#[derive(Copy, Clone, Allocative, Default)]
pub struct FutureMoveConfig {
/// Max depth of that we should try and traverse
pub max_depth: usize,
@@ -87,21 +87,17 @@ impl std::fmt::Display for FutureMoveConfig {
}
}
#[derive(Debug, Clone, Copy, Allocative)]
#[derive(Debug, Clone, Copy, Allocative, Default)]
#[allow(dead_code)]
pub enum ChildrenEvalMethod {
/// Best so far?
// #[default]
MinMax,
#[default]
MinMaxProb,
}
impl Default for ChildrenEvalMethod {
fn default() -> Self {
Self::MinMax
}
}
impl FutureMoves {
pub const fn new(agent_color: Piece, config: FutureMoveConfig) -> Self {
Self {
@@ -113,6 +109,9 @@ impl FutureMoves {
}
}
pub const ARENA_ENTRY_SIZE: usize =
size_of::<Move>() + size_of::<usize>() * (Board::AREA.0 as usize / 4);
/// Return the length of the Arena
pub fn arena_len(&self) -> usize {
self.arena.len()
@@ -214,18 +213,7 @@ impl FutureMoves {
}
fn create_move(&self, coord: MoveCoord, board: Board, color: Piece) -> Move {
Move::new(
coord,
board,
color,
self.agent_color,
MoveValueConfig {
self_value_raw: matches!(
self.config.children_eval_method,
ChildrenEvalMethod::MinMaxProb
),
},
)
Move::new(coord, board, color, self.agent_color, MoveValueConfig {})
}
fn generate_children_raw(&self, parent_idx: usize) -> Vec<Move> {
@@ -308,52 +296,29 @@ impl FutureMoves {
.iter()
.map(|&child| self.arena[child].value)
.collect::<Vec<_>>();
let child_value = if self.arena[idx].color == self.agent_color {
// get best (for the adversary) enemy play
// this assumes the adversary is playing optimally
children_values.iter().min()
} else {
children_values.iter().max()
}
.cloned()
.unwrap_or(Default::default());
self.arena[idx].value = self.arena[idx].self_value;
match self.config.children_eval_method {
ChildrenEvalMethod::MinMax => {
let child_value = if self.arena[idx].color == self.agent_color {
// get best (for the adversary) enemy play
// this assumes the adversary is playing optimally
children_values
.into_iter()
.min_by_key(|x| x.value)
.map(|x| x.value)
} else {
children_values
.into_iter()
.max_by_key(|x| x.value)
.map(|x| x.value)
}
.unwrap_or(0);
// we use `depth` and divided `self_value` by it, idk if this is worth it
// we should really setup some sort of ELO rating for each commit, playing them against
// each other or something, could be cool to benchmark these more subjective things, not
// just performance (cycles/time wise)
self.arena[idx].value.value =
self.arena[idx].self_value as i32 + child_value;
self.arena[idx].value.value += child_value.value;
self.arena[idx].value.set_state(child_value.state());
}
ChildrenEvalMethod::MinMaxProb => {
let child_value = if self.arena[idx].color == self.agent_color {
// get best (for the adversary) enemy play
// this assumes the adversary is playing optimally
children_values.iter().min()
} else {
children_values.iter().max()
}
.cloned()
.unwrap_or(Default::default());
// we use `depth` and divided `self_value` by it, idk if this is worth it
// we should really setup some sort of ELO rating for each commit, playing them against
// each other or something, could be cool to benchmark these more subjective things, not
// just performance (cycles/time wise)
self.arena[idx]
.value
.populate_self_from_children(&children_values);
self.arena[idx].value.value =
self.arena[idx].self_value as i32 + child_value.value;
self.arena[idx].value.value += child_value.value;
}
}
}
@@ -399,12 +364,15 @@ impl FutureMoves {
/// Return the best move which is a child of `self.current_root`
pub fn best_move(&self) -> Option<MoveCoord> {
self.current_root
.and_then(|x| {
self.arena[x]
.and_then(|x| match self.config.children_eval_method {
ChildrenEvalMethod::MinMax => self.arena[x]
.children
.iter()
// this would be considered `minimax`
.max_by_key(|&&idx| self.arena[idx].value)
.max_by_key(|&&idx| self.arena[idx].value),
ChildrenEvalMethod::MinMaxProb => self.arena[x]
.children
.iter()
.max_by_key(|&&idx| self.arena[idx].value),
})
.inspect(|&&x| {
assert_eq!(

View File

@@ -1,5 +1,6 @@
mod board_value;
mod future_moves;
mod r#move;
mod mvs;
pub use future_moves::{ChildrenEvalMethod, FutureMoveConfig, FutureMoves};
pub use r#move::MoveCoord;

View File

@@ -1,78 +1,12 @@
use std::cmp::Ordering;
use super::board_value::BoardValueMap;
use super::{
board_value::BoardValueMap,
mvs::{MVSGameState, MoveValueStats},
};
use crate::repr::{Board, CoordPair, Piece, Winner};
use allocative::Allocative;
pub type MoveCoord = Option<CoordPair>;
#[derive(Clone, Copy, PartialEq, Eq, Allocative, Debug, PartialOrd, Ord)]
pub enum MVSGameState {
Win = 1,
Loss = 0,
Tie = -1,
}
#[derive(Clone, Copy, Debug, Allocative, PartialEq, Eq, Default)]
pub struct MoveValueStats {
state: Option<MVSGameState>,
wins: u16,
losses: u16,
pub value: i32,
}
impl MoveValueStats {
fn chance_win(&self) -> Option<f32> {
let sum = self.losses + self.wins;
if sum == 0 {
return None;
}
Some(self.wins as f32 / sum as f32)
}
pub fn populate_self_from_children(&mut self, others: &[Self]) {
let wins = others.iter().map(|x| x.wins).sum::<u16>()
+ others
.iter()
.filter(|x| x.state == Some(MVSGameState::Win))
.count() as u16;
let losses = others.iter().map(|x| x.losses).sum::<u16>()
+ others
.iter()
.filter(|x| x.state == Some(MVSGameState::Loss))
.count() as u16;
self.wins = wins;
self.losses = losses;
}
}
impl PartialOrd for MoveValueStats {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl Ord for MoveValueStats {
fn cmp(&self, other: &Self) -> Ordering {
if self.state.is_some() && other.state.is_some() {
return self.state.cmp(&other.state);
}
let s_cw = self.chance_win();
let o_cw = other.chance_win();
if s_cw.is_some() && o_cw.is_some() {
if s_cw > o_cw {
return Ordering::Greater;
} else if o_cw > s_cw {
return Ordering::Less;
}
}
self.value.cmp(&other.value)
}
}
#[derive(Clone, Debug, Allocative)]
pub struct Move {
/// Coordinates (i, j) of the move (if it exists)
@@ -95,7 +29,7 @@ pub struct Move {
pub value: MoveValueStats,
/// What is the inherit value of this move (not including children)
pub self_value: i16,
pub self_value: MoveValueStats,
/// Which color made a move on this move?
pub color: Piece,
@@ -104,9 +38,7 @@ pub struct Move {
pub is_trimmed: bool,
}
pub struct MoveValueConfig {
pub self_value_raw: bool,
}
pub struct MoveValueConfig {}
impl Move {
pub fn new(
@@ -114,7 +46,7 @@ impl Move {
board: Board,
color: Piece,
agent_color: Piece,
mvc: MoveValueConfig,
_: MoveValueConfig,
) -> Self {
let mut m = Move {
coord,
@@ -124,49 +56,29 @@ impl Move {
value: Default::default(),
color,
is_trimmed: false,
self_value: 0,
self_value: Default::default(),
};
// set wins/losses values appropriately
match m.winner {
Winner::Player(piece) => {
if piece == agent_color {
m.value.wins += 1;
m.value.state = Some(MVSGameState::Win);
m.self_value.set_state(Some(MVSGameState::Win));
} else {
m.value.losses += 1;
m.value.state = Some(MVSGameState::Loss);
m.self_value.set_state(Some(MVSGameState::Loss));
}
}
Winner::Tie => {
m.value.state = Some(MVSGameState::Tie);
m.self_value.set_state(Some(MVSGameState::Tie));
}
Winner::None => {}
}
if !mvc.self_value_raw {
m.self_value = m.compute_self_value(agent_color, &board, mvc);
} else {
m.self_value = const { BoardValueMap::weighted() }.board_value(&board, agent_color);
}
m.self_value.value =
const { BoardValueMap::weighted() }.board_value(&board, agent_color) as i32;
m
}
fn compute_self_value(&self, agent_color: Piece, board: &Board, _mvc: MoveValueConfig) -> i16 {
if self.winner == Winner::Player(!agent_color) {
// if this board results in the opponent winning, MAJORLY negatively weigh this move
// NOTE! this branch isn't completely deleted because if so, the bot wouldn't make a move.
// We shouldn't prune branches because we still need to always react to the opponent's moves
return i16::MIN + 1;
} else if self.winner == Winner::Player(agent_color) {
// results in a win for the agent
return i16::MAX - 1;
}
// I guess ignore Ties here, don't give them an explicit value,
const { BoardValueMap::weighted() }.board_value(board, agent_color)
}
/// Sort children of the [`Move`] by their self_value in `arena`
pub fn sort_children(&mut self, arena: &[Move]) {
self.children.sort_by(|&a, &b| {

164
src/logic/mvs.rs Normal file
View File

@@ -0,0 +1,164 @@
use allocative::Allocative;
use std::cmp::Ordering;
#[derive(Clone, Copy, PartialEq, Eq, Allocative, Debug, PartialOrd, Ord)]
pub enum MVSGameState {
Win = 1,
Tie = 0,
Loss = -1,
}
#[derive(Clone, Copy, Debug, Allocative, PartialEq, Eq, Default)]
pub struct MoveValueStats {
state: Option<MVSGameState>,
wins: u16,
losses: u16,
ties: u16,
pub value: i32,
}
impl MoveValueStats {
#[cfg(test)]
pub fn new_from_outcomes(wins: u16, losses: u16, ties: u16) -> Self {
Self {
wins,
losses,
ties,
..Default::default()
}
}
#[cfg(test)]
pub fn new_from_value(value: i32) -> Self {
Self {
value,
..Default::default()
}
}
#[cfg(test)]
pub fn new_from_state(state: Option<MVSGameState>) -> Self {
Self {
state,
..Default::default()
}
}
fn chance_win(&self) -> Option<f32> {
let sum = self.losses + self.wins + self.ties;
if 20 > sum {
return None;
}
Some(self.wins as f32 / sum as f32)
}
pub const fn set_state(&mut self, state: Option<MVSGameState>) {
self.state = state;
}
pub const fn state(&self) -> Option<MVSGameState> {
self.state
}
pub fn populate_self_from_children(&mut self, others: &[Self]) {
(self.wins, self.losses, self.ties) =
others.iter().fold((0, 0, 0), |(wins, losses, ties), x| {
(
wins + x.wins + (x.state == Some(MVSGameState::Win)) as u16,
losses + x.losses + (x.state == Some(MVSGameState::Loss)) as u16,
ties + x.ties + (x.state == Some(MVSGameState::Tie)) as u16,
)
});
}
}
impl PartialOrd for MoveValueStats {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl Ord for MoveValueStats {
fn cmp(&self, other: &Self) -> Ordering {
if self.state.is_some() || other.state.is_some() {
return self.state.cmp(&other.state);
}
let (s_cw, o_cw) = (self.chance_win(), other.chance_win());
if s_cw.is_some() || o_cw.is_some() {
if s_cw > o_cw {
return Ordering::Greater;
} else if o_cw > s_cw {
return Ordering::Less;
}
}
self.value.cmp(&other.value)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn two_prob() {
let one = MoveValueStats::new_from_outcomes(100, 40, 0);
let two = MoveValueStats::new_from_outcomes(40, 60, 0);
assert!(one > two);
}
#[test]
fn one_prob_one_non() {
let one = MoveValueStats::new_from_outcomes(100, 4, 0);
let two = MoveValueStats::new_from_value(10);
assert!(one > two);
}
#[test]
fn one_prob_one_win() {
let one = MoveValueStats::new_from_outcomes(100, 4, 0);
let two = MoveValueStats::new_from_state(Some(MVSGameState::Win));
assert!(one < two);
}
#[test]
fn two_prob_zero() {
let one = MoveValueStats::new_from_outcomes(100, 0, 0);
let two = MoveValueStats::new_from_outcomes(0, 60, 0);
assert!(one > two);
}
#[test]
fn test_children_pop() {
let mut a = MoveValueStats::new_from_value(0);
let children = vec![
MoveValueStats::new_from_outcomes(1, 0, 0),
MoveValueStats::new_from_outcomes(0, 2, 0),
MoveValueStats::new_from_outcomes(0, 0, 3),
];
a.populate_self_from_children(&children);
assert_eq!(a.wins, 1, "Wins should be 1");
assert_eq!(a.losses, 2, "Losses should be 2");
assert_eq!(a.ties, 3, "Ties should be 3");
}
#[test]
fn test_children_pop_state() {
let mut a = MoveValueStats::new_from_value(0);
let children = vec![
MoveValueStats::new_from_state(Some(MVSGameState::Win)),
MoveValueStats::new_from_state(Some(MVSGameState::Win)),
MoveValueStats::new_from_state(Some(MVSGameState::Loss)),
MoveValueStats::new_from_state(Some(MVSGameState::Tie)),
MoveValueStats::new_from_state(Some(MVSGameState::Tie)),
];
a.populate_self_from_children(&children);
assert_eq!(a.wins, 2, "Wins should be 2");
assert_eq!(a.losses, 1, "Losses should be 1");
assert_eq!(a.ties, 2, "Ties should be 2");
}
}

View File

@@ -39,7 +39,7 @@ fn main() {
min_arena_depth: 14,
top_k_children: 2,
up_to_minus: 10,
max_arena_size: 200_000_000,
max_arena_size: 50_000_000,
do_prune: false,
print: true,
children_eval_method: Default::default(),

View File

@@ -14,12 +14,12 @@ impl<T: Copy> PosMap<T> {
Self(MaybeUninit::zeroed().assume_init())
}
pub const fn from(v: [[T; Board::SIZE as usize]; Board::SIZE as usize]) -> Self {
pub const fn from(mut v: [[T; Board::SIZE as usize]; Board::SIZE as usize]) -> Self {
let mut n = unsafe { Self::uninit() };
const_for!(i in 0..Board::SIZE => {
const_for!(j in 0..Board::SIZE => {
n.set(CoordPair::from_axes(i, j), v[i as usize][j as usize]);
std::mem::swap(n.get_mut(CoordPair::from_axes(i, j)), &mut v[i as usize][j as usize]);
});
});
n