From f76c9f7401f79caa96758267c9b6bfbb2c28f990 Mon Sep 17 00:00:00 2001 From: Simon Gardling Date: Tue, 30 Mar 2021 12:54:57 -0400 Subject: [PATCH] sin/cos optimizations, cleanup, and optimizations --- Cargo.lock | 7 +++++++ Cargo.toml | 1 + Notes.md | 17 +++++++++++++++++ TODO.md | 5 ++++- src/lib.rs | 3 ++- src/main.rs | 6 +++--- src/math.rs | 44 ++++++++++++++++++++++++++++++++++++++++++++ src/model.rs | 45 +++++++++++++++++++-------------------------- src/util.rs | 4 ++-- 9 files changed, 99 insertions(+), 33 deletions(-) create mode 100644 Notes.md create mode 100644 src/math.rs diff --git a/Cargo.lock b/Cargo.lock index 581d453..379f573 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -248,6 +248,12 @@ version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" +[[package]] +name = "fastapprox" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0031c93f37b5d18272de2d932ebff6a7eb32d4bc3bab6751a9af42da7d1a424" + [[package]] name = "getrandom" version = "0.2.2" @@ -488,6 +494,7 @@ name = "physarum" version = "0.1.0" dependencies = [ "criterion", + "fastapprox", "image", "indicatif", "itertools 0.10.0", diff --git a/Cargo.toml b/Cargo.toml index 9317f8d..c5f506d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,6 +12,7 @@ rand = "0.8.3" rand_distr = "0.4" #rayon = "1.5" rayon = {git = "https://github.com/rayon-rs/rayon.git"} +fastapprox = "0.3.0" [dev-dependencies] criterion = "0.3.4" diff --git a/Notes.md b/Notes.md new file mode 100644 index 0000000..ea61c1f --- /dev/null +++ b/Notes.md @@ -0,0 +1,17 @@ +## sin and cos optimizations: +### Setup/Info: +- measured in ms/agent ticked +- 2048 iterations +- average of 3 trials +- 256x256 grid +- 1 << 20 particles +- 1 population +### Results: +- normal sin + normal cos: + - 0.000018192ms +- old sin + old cos: + - 0.000019803ms (8.85% slower) +- fast_approx::fast::sin + fast_approx::fast::cos + - 0.000018658ms (2.56% slower) +- fast_approx::faster::sin + fast_approx::faster::cos + - 0.000015878ms (14.57% faster) \ No newline at end of file diff --git a/TODO.md b/TODO.md index 5f4b695..59b1f74 100644 --- a/TODO.md +++ b/TODO.md @@ -2,4 +2,7 @@ - Auto create a mp4 from generate images - Instead of using the command `ffmpeg -r 20 -i tmp/out_%d.png -vcodec libx265 -crf 25 -s 512x512 test.mp4` maybe use a rust library to do the same (more research needed) - GPU compute - - Tried [ArrayFire-rust](https://github.com/arrayfire/arrayfire-rust) didn't work well, looking for another library \ No newline at end of file + - Tried [ArrayFire-rust](https://github.com/arrayfire/arrayfire-rust) didn't work well, looking for another library + - Try using [emu](https://github.com/calebwin/emu) (seems to be a very good option) +- sin and cos optimizations + - sin/cos table? \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index 4709868..f14a416 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3,4 +3,5 @@ mod grid; pub mod model; mod palette; mod util; -mod imgdata; // for storing image data \ No newline at end of file +mod imgdata; // for storing image data +mod math; // for math things \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index 0e66e22..fd90602 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,19 +2,19 @@ use physarum::model; fn main() { // # of iterations to go through - let n_iterations = 512; + let n_iterations = 2048; // Size of grid and pictures let (width, height) = (256, 256); // # of agents - let n_particles = 1 << 24; + let n_particles = 1 << 20; println!("n_particles: {}", n_particles); let diffusivity = 1; // `n_populations` is the # of types of agents - let n_populations = 2; + let n_populations = 1; // let n_populations = 1 + rng.gen_range(1..4); // make # of populations between 2 and 5 diff --git a/src/math.rs b/src/math.rs new file mode 100644 index 0000000..b1fb4e3 --- /dev/null +++ b/src/math.rs @@ -0,0 +1,44 @@ +#[inline(always)] +fn to_radians(x: f32) -> f32 { + x * (std::f32::consts::PI / 180.0) +} + +/// Previously from trig.rs +/// From https://bits.stephan-brumme.com/absFloat.html +#[allow(dead_code)] +#[inline(always)] +fn abs(x: f32) -> f32 { + return f32::from_bits(x.to_bits() & 0x7FFF_FFFF); +} + +/// Previously from trig.rs +/// Branchless floor implementation +#[allow(dead_code)] +#[inline(always)] +fn floor(x: f32) -> f32 { + let mut x_trunc = (x as i32) as f32; + x_trunc -= (x < x_trunc) as i32 as f32; + return x_trunc; +} + +/// Previously from trig.rs +/// Approximates `cos(x)` in radians with the maximum error of `0.002` +/// https://stackoverflow.com/posts/28050328/revisions +#[allow(dead_code)] +#[inline(always)] +pub fn cos(mut x: f32) -> f32 { + const ALPHA: f32 = 0.5 * std::f32::consts::FRAC_1_PI; + x *= ALPHA; + x -= 0.25_f32 + floor(x + 0.25_f32); + x *= 16.0_f32 * (abs(x) - 0.5_f32); + x += 0.225_f32 * x * (abs(x) - 1.0_f32); + return x; +} + +/// Previously from trig.rs +/// Approximates `sin(x)` in radians with the maximum error of `0.002` +#[allow(dead_code)] +#[inline(always)] +pub fn sin(x: f32) -> f32 { + return cos(x - std::f32::consts::FRAC_PI_2); +} \ No newline at end of file diff --git a/src/model.rs b/src/model.rs index 87a4b74..5d9b518 100644 --- a/src/model.rs +++ b/src/model.rs @@ -2,6 +2,7 @@ use crate::{ grid::{combine, Grid, PopulationConfig}, palette::{random_palette, Palette}, imgdata::ImgData, + util::wrap, }; use rand::{seq::SliceRandom, Rng}; @@ -13,7 +14,6 @@ use std::time::{Instant}; use rayon::iter::{ParallelIterator,}; use indicatif::{ParallelProgressIterator, ProgressBar, ProgressStyle}; use std::path::Path; -use crate::util::wrap; /// A single Physarum agent. The x and y positions are continuous, hence we use floating point /// numbers instead of integers. @@ -39,21 +39,7 @@ impl Agent { } } - fn get_sensor_coords(&mut self, x: f32, y: f32, sensor_distance: f32, sensor_angle: f32, angle: f32) -> (f32, f32, f32, f32, f32, f32) { - let xc = x + self.angle.cos() * sensor_distance; - let yc = y + self.angle.sin() * sensor_distance; - - let agent_add_sens = angle + sensor_angle; - let agent_sub_sens = angle - sensor_angle; - - let xl = x + agent_sub_sens.cos() * sensor_distance; - let yl = y + agent_sub_sens.sin() * sensor_distance; - let xr = x + agent_add_sens.cos() * sensor_distance; - let yr = y + agent_add_sens.sin() * sensor_distance; - - return (xc, yc, xl, yl, xr, yr); - } - + #[inline] pub fn tick(&mut self, grid: &Grid) { let (width, height) = (grid.width, grid.height); let PopulationConfig { @@ -64,7 +50,16 @@ impl Agent { .. } = grid.config; - let (xc, yc, xl, yl, xr, yr) = Self::get_sensor_coords(self, self.x, self.y, sensor_distance, sensor_angle, self.angle); + let xc = self.x + fastapprox::faster::cos(self.angle) * sensor_distance; + let yc = self.y + fastapprox::faster::sin(self.angle) * sensor_distance; + + let agent_add_sens = self.angle + sensor_angle; + let agent_sub_sens = self.angle - sensor_angle; + + let xl = self.x + fastapprox::faster::cos(agent_sub_sens) * sensor_distance; + let yl = self.y + fastapprox::faster::sin(agent_sub_sens) * sensor_distance; + let xr = self.x + fastapprox::faster::cos(agent_add_sens) * sensor_distance; + let yr = self.y + fastapprox::faster::sin(agent_add_sens) * sensor_distance; // We sense from the buffer because this is where we previously combined data from all the grid. let center = grid.get_buf(xc, yc); @@ -86,9 +81,10 @@ impl Agent { } let delta_angle = rotation_angle * direction; + self.angle = wrap(self.angle + delta_angle, TAU); - self.x = wrap(self.x + step_distance * self.angle.cos(), width as f32); - self.y = wrap(self.y + step_distance * self.angle.sin(), height as f32); + self.x = wrap(self.x + step_distance * fastapprox::faster::cos(self.angle), width as f32); + self.y = wrap(self.y + step_distance * fastapprox::faster::sin(self.angle), height as f32); } } @@ -195,7 +191,7 @@ impl Model { /// Simulates `steps` # of steps - #[inline(always)] + #[inline] pub fn run(&mut self, steps: usize) { let debug: bool = false; @@ -221,11 +217,10 @@ impl Model { // Tick agents self.agents.par_iter_mut().for_each(|agent| { - let grid = &grids[agent.population_id]; - agent.tick(grid); + agent.tick(&grids[agent.population_id]); }); - // Deposit + // Deposit // TODO - Make this parallel for agent in self.agents.iter() { self.grids[agent.population_id].deposit(agent.x, agent.y); } @@ -243,9 +238,7 @@ impl Model { time_per_agent_list.push(ms_per_agent); time_per_step_list.push(agents_tick_elapsed); - if debug { - println!("Finished tick for all agents. took {}ms\nTime per agent: {}ms\n", agents_tick_elapsed, ms_per_agent); - } + if debug {println!("Finished tick for all agents. took {}ms\nTime per agent: {}ms\n", agents_tick_elapsed, ms_per_agent)}; self.iteration += 1; pb.set_position(i as u64); diff --git a/src/util.rs b/src/util.rs index e99d574..3a099c6 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1,4 +1,4 @@ #[inline(always)] pub fn wrap(x: f32, max: f32) -> f32 { - x - max * ((x > max) as i32 as f32 - (x < 0.0_f32) as i32 as f32) -} + return x - max * ((x > max) as i32 as f32 - (x < 0.0_f32) as i32 as f32); +} \ No newline at end of file