From f76c9f7401f79caa96758267c9b6bfbb2c28f990 Mon Sep 17 00:00:00 2001
From: Simon Gardling <titaniumtown@gmail.com>
Date: Tue, 30 Mar 2021 12:54:57 -0400
Subject: [PATCH] sin/cos optimizations, cleanup, and optimizations

---
 Cargo.lock   |  7 +++++++
 Cargo.toml   |  1 +
 Notes.md     | 17 +++++++++++++++++
 TODO.md      |  5 ++++-
 src/lib.rs   |  3 ++-
 src/main.rs  |  6 +++---
 src/math.rs  | 44 ++++++++++++++++++++++++++++++++++++++++++++
 src/model.rs | 45 +++++++++++++++++++--------------------------
 src/util.rs  |  4 ++--
 9 files changed, 99 insertions(+), 33 deletions(-)
 create mode 100644 Notes.md
 create mode 100644 src/math.rs

diff --git a/Cargo.lock b/Cargo.lock
index 581d453..379f573 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -248,6 +248,12 @@ version = "0.3.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
 
+[[package]]
+name = "fastapprox"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d0031c93f37b5d18272de2d932ebff6a7eb32d4bc3bab6751a9af42da7d1a424"
+
 [[package]]
 name = "getrandom"
 version = "0.2.2"
@@ -488,6 +494,7 @@ name = "physarum"
 version = "0.1.0"
 dependencies = [
  "criterion",
+ "fastapprox",
  "image",
  "indicatif",
  "itertools 0.10.0",
diff --git a/Cargo.toml b/Cargo.toml
index 9317f8d..c5f506d 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -12,6 +12,7 @@ rand = "0.8.3"
 rand_distr = "0.4"
 #rayon = "1.5"
 rayon = {git = "https://github.com/rayon-rs/rayon.git"}
+fastapprox = "0.3.0"
 
 [dev-dependencies]
 criterion = "0.3.4"
diff --git a/Notes.md b/Notes.md
new file mode 100644
index 0000000..ea61c1f
--- /dev/null
+++ b/Notes.md
@@ -0,0 +1,17 @@
+## sin and cos optimizations:
+### Setup/Info:
+- measured in ms/agent ticked
+- 2048 iterations
+- average of 3 trials
+- 256x256 grid
+- 1 << 20 particles
+- 1 population
+### Results:
+- normal sin + normal cos: 
+    - 0.000018192ms
+- old sin + old cos:
+    - 0.000019803ms (8.85% slower)
+- fast_approx::fast::sin + fast_approx::fast::cos
+    - 0.000018658ms (2.56% slower)
+- fast_approx::faster::sin + fast_approx::faster::cos
+    - 0.000015878ms (14.57% faster)
\ No newline at end of file
diff --git a/TODO.md b/TODO.md
index 5f4b695..59b1f74 100644
--- a/TODO.md
+++ b/TODO.md
@@ -2,4 +2,7 @@
 - Auto create a mp4 from generate images
     - Instead of using the command `ffmpeg -r 20 -i tmp/out_%d.png -vcodec libx265 -crf 25 -s 512x512 test.mp4` maybe use a rust library to do the same (more research needed)
 - GPU compute 
-    - Tried [ArrayFire-rust](https://github.com/arrayfire/arrayfire-rust) didn't work well, looking for another library
\ No newline at end of file
+    - Tried [ArrayFire-rust](https://github.com/arrayfire/arrayfire-rust) didn't work well, looking for another library
+    - Try using [emu](https://github.com/calebwin/emu) (seems to be a very good option)
+- sin and cos optimizations
+    - sin/cos table?
\ No newline at end of file
diff --git a/src/lib.rs b/src/lib.rs
index 4709868..f14a416 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -3,4 +3,5 @@ mod grid;
 pub mod model;
 mod palette;
 mod util;
-mod imgdata; // for storing image data
\ No newline at end of file
+mod imgdata; // for storing image data
+mod math; // for math things
\ No newline at end of file
diff --git a/src/main.rs b/src/main.rs
index 0e66e22..fd90602 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -2,19 +2,19 @@ use physarum::model;
 
 fn main() {
     // # of iterations to go through
-    let n_iterations = 512;
+    let n_iterations = 2048;
 
     // Size of grid and pictures
     let (width, height) = (256, 256);
 
     // # of agents
-    let n_particles = 1 << 24;
+    let n_particles = 1 << 20;
     println!("n_particles: {}", n_particles);
 
     let diffusivity = 1;
 
     // `n_populations` is the # of types of agents
-    let n_populations = 2;
+    let n_populations = 1;
     // let n_populations = 1 + rng.gen_range(1..4); // make # of populations between 2 and 5
     
 
diff --git a/src/math.rs b/src/math.rs
new file mode 100644
index 0000000..b1fb4e3
--- /dev/null
+++ b/src/math.rs
@@ -0,0 +1,44 @@
+#[inline(always)]
+fn to_radians(x: f32) -> f32 {
+    x * (std::f32::consts::PI / 180.0)
+}
+
+/// Previously from trig.rs
+/// From https://bits.stephan-brumme.com/absFloat.html
+#[allow(dead_code)]
+#[inline(always)]
+fn abs(x: f32) -> f32 {
+    return f32::from_bits(x.to_bits() & 0x7FFF_FFFF);
+}
+
+/// Previously from trig.rs
+/// Branchless floor implementation
+#[allow(dead_code)]
+#[inline(always)]
+fn floor(x: f32) -> f32 {
+    let mut x_trunc = (x as i32) as f32;
+    x_trunc -= (x < x_trunc) as i32 as f32;
+    return x_trunc;
+}
+
+/// Previously from trig.rs
+/// Approximates `cos(x)` in radians with the maximum error of `0.002`
+/// https://stackoverflow.com/posts/28050328/revisions
+#[allow(dead_code)]
+#[inline(always)]
+pub fn cos(mut x: f32) -> f32 {
+    const ALPHA: f32 = 0.5 * std::f32::consts::FRAC_1_PI;
+    x *= ALPHA;
+    x -= 0.25_f32 + floor(x + 0.25_f32);
+    x *= 16.0_f32 * (abs(x) - 0.5_f32);
+    x += 0.225_f32 * x * (abs(x) - 1.0_f32);
+    return x;
+}
+
+/// Previously from trig.rs
+/// Approximates `sin(x)` in radians with the maximum error of `0.002`
+#[allow(dead_code)]
+#[inline(always)]
+pub fn sin(x: f32) -> f32 {
+    return cos(x - std::f32::consts::FRAC_PI_2);
+}
\ No newline at end of file
diff --git a/src/model.rs b/src/model.rs
index 87a4b74..5d9b518 100644
--- a/src/model.rs
+++ b/src/model.rs
@@ -2,6 +2,7 @@ use crate::{
     grid::{combine, Grid, PopulationConfig},
     palette::{random_palette, Palette},
     imgdata::ImgData,
+    util::wrap,
 };
 
 use rand::{seq::SliceRandom, Rng};
@@ -13,7 +14,6 @@ use std::time::{Instant};
 use rayon::iter::{ParallelIterator,};
 use indicatif::{ParallelProgressIterator, ProgressBar, ProgressStyle};
 use std::path::Path;
-use crate::util::wrap;
 
 /// A single Physarum agent. The x and y positions are continuous, hence we use floating point
 /// numbers instead of integers.
@@ -39,21 +39,7 @@ impl Agent {
         }
     }
 
-    fn get_sensor_coords(&mut self, x: f32, y: f32, sensor_distance: f32, sensor_angle: f32, angle: f32) -> (f32, f32, f32, f32, f32, f32) {
-        let xc = x + self.angle.cos() * sensor_distance;
-        let yc = y + self.angle.sin() * sensor_distance;
-        
-        let agent_add_sens = angle + sensor_angle;
-        let agent_sub_sens = angle - sensor_angle;
-
-        let xl = x + agent_sub_sens.cos() * sensor_distance;
-        let yl = y + agent_sub_sens.sin() * sensor_distance;
-        let xr = x + agent_add_sens.cos() * sensor_distance;
-        let yr = y + agent_add_sens.sin() * sensor_distance;
-
-        return (xc, yc, xl, yl, xr, yr);
-    }
-
+    #[inline]
     pub fn tick(&mut self, grid: &Grid) {        
         let (width, height) = (grid.width, grid.height);
         let PopulationConfig {
@@ -64,7 +50,16 @@ impl Agent {
             ..
         } = grid.config;
 
-        let (xc, yc, xl, yl, xr, yr) = Self::get_sensor_coords(self, self.x, self.y, sensor_distance, sensor_angle, self.angle);
+        let xc = self.x + fastapprox::faster::cos(self.angle) * sensor_distance;
+        let yc = self.y + fastapprox::faster::sin(self.angle) * sensor_distance;
+        
+        let agent_add_sens = self.angle + sensor_angle;
+        let agent_sub_sens = self.angle - sensor_angle;
+
+        let xl = self.x + fastapprox::faster::cos(agent_sub_sens) * sensor_distance;
+        let yl = self.y + fastapprox::faster::sin(agent_sub_sens) * sensor_distance;
+        let xr = self.x + fastapprox::faster::cos(agent_add_sens) * sensor_distance;
+        let yr = self.y + fastapprox::faster::sin(agent_add_sens) * sensor_distance;
 
         // We sense from the buffer because this is where we previously combined data from all the grid.
         let center = grid.get_buf(xc, yc);
@@ -86,9 +81,10 @@ impl Agent {
         }
 
         let delta_angle = rotation_angle * direction;
+
         self.angle = wrap(self.angle + delta_angle, TAU);
-        self.x = wrap(self.x + step_distance * self.angle.cos(), width as f32);
-        self.y = wrap(self.y + step_distance * self.angle.sin(), height as f32);
+        self.x = wrap(self.x + step_distance * fastapprox::faster::cos(self.angle), width as f32);
+        self.y = wrap(self.y + step_distance * fastapprox::faster::sin(self.angle), height as f32);
     }
 }
 
@@ -195,7 +191,7 @@ impl Model {
 
 
     /// Simulates `steps` # of steps
-    #[inline(always)]
+    #[inline]
     pub fn run(&mut self, steps: usize) {
         let debug: bool = false;
 
@@ -221,11 +217,10 @@ impl Model {
 
             // Tick agents
             self.agents.par_iter_mut().for_each(|agent| {
-                let grid = &grids[agent.population_id];
-                agent.tick(grid);
+                agent.tick(&grids[agent.population_id]);
             });
 
-            // Deposit
+            // Deposit // TODO - Make this parallel
             for agent in self.agents.iter() {
                 self.grids[agent.population_id].deposit(agent.x, agent.y);
             }
@@ -243,9 +238,7 @@ impl Model {
             time_per_agent_list.push(ms_per_agent);
             time_per_step_list.push(agents_tick_elapsed);
 
-            if debug {
-                println!("Finished tick for all agents. took {}ms\nTime per agent: {}ms\n", agents_tick_elapsed, ms_per_agent);
-            }
+            if debug {println!("Finished tick for all agents. took {}ms\nTime per agent: {}ms\n", agents_tick_elapsed, ms_per_agent)};
 
             self.iteration += 1;
             pb.set_position(i as u64);
diff --git a/src/util.rs b/src/util.rs
index e99d574..3a099c6 100644
--- a/src/util.rs
+++ b/src/util.rs
@@ -1,4 +1,4 @@
 #[inline(always)]
 pub fn wrap(x: f32, max: f32) -> f32 {
-    x - max * ((x > max) as i32 as f32 - (x < 0.0_f32) as i32 as f32)
-}
+    return x - max * ((x > max) as i32 as f32 - (x < 0.0_f32) as i32 as f32);
+}
\ No newline at end of file