sin/cos optimizations, cleanup, and optimizations

This commit is contained in:
Simon Gardling 2021-03-30 12:54:57 -04:00
parent 6e2b5c3d2a
commit f76c9f7401
9 changed files with 99 additions and 33 deletions

7
Cargo.lock generated
View File

@ -248,6 +248,12 @@ version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
[[package]]
name = "fastapprox"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d0031c93f37b5d18272de2d932ebff6a7eb32d4bc3bab6751a9af42da7d1a424"
[[package]]
name = "getrandom"
version = "0.2.2"
@ -488,6 +494,7 @@ name = "physarum"
version = "0.1.0"
dependencies = [
"criterion",
"fastapprox",
"image",
"indicatif",
"itertools 0.10.0",

View File

@ -12,6 +12,7 @@ rand = "0.8.3"
rand_distr = "0.4"
#rayon = "1.5"
rayon = {git = "https://github.com/rayon-rs/rayon.git"}
fastapprox = "0.3.0"
[dev-dependencies]
criterion = "0.3.4"

17
Notes.md Normal file
View File

@ -0,0 +1,17 @@
## sin and cos optimizations:
### Setup/Info:
- measured in ms/agent ticked
- 2048 iterations
- average of 3 trials
- 256x256 grid
- 1 << 20 particles
- 1 population
### Results:
- normal sin + normal cos:
- 0.000018192ms
- old sin + old cos:
- 0.000019803ms (8.85% slower)
- fast_approx::fast::sin + fast_approx::fast::cos
- 0.000018658ms (2.56% slower)
- fast_approx::faster::sin + fast_approx::faster::cos
- 0.000015878ms (14.57% faster)

View File

@ -2,4 +2,7 @@
- Auto create a mp4 from generate images
- Instead of using the command `ffmpeg -r 20 -i tmp/out_%d.png -vcodec libx265 -crf 25 -s 512x512 test.mp4` maybe use a rust library to do the same (more research needed)
- GPU compute
- Tried [ArrayFire-rust](https://github.com/arrayfire/arrayfire-rust) didn't work well, looking for another library
- Tried [ArrayFire-rust](https://github.com/arrayfire/arrayfire-rust) didn't work well, looking for another library
- Try using [emu](https://github.com/calebwin/emu) (seems to be a very good option)
- sin and cos optimizations
- sin/cos table?

View File

@ -3,4 +3,5 @@ mod grid;
pub mod model;
mod palette;
mod util;
mod imgdata; // for storing image data
mod imgdata; // for storing image data
mod math; // for math things

View File

@ -2,19 +2,19 @@ use physarum::model;
fn main() {
// # of iterations to go through
let n_iterations = 512;
let n_iterations = 2048;
// Size of grid and pictures
let (width, height) = (256, 256);
// # of agents
let n_particles = 1 << 24;
let n_particles = 1 << 20;
println!("n_particles: {}", n_particles);
let diffusivity = 1;
// `n_populations` is the # of types of agents
let n_populations = 2;
let n_populations = 1;
// let n_populations = 1 + rng.gen_range(1..4); // make # of populations between 2 and 5

44
src/math.rs Normal file
View File

@ -0,0 +1,44 @@
#[inline(always)]
fn to_radians(x: f32) -> f32 {
x * (std::f32::consts::PI / 180.0)
}
/// Previously from trig.rs
/// From https://bits.stephan-brumme.com/absFloat.html
#[allow(dead_code)]
#[inline(always)]
fn abs(x: f32) -> f32 {
return f32::from_bits(x.to_bits() & 0x7FFF_FFFF);
}
/// Previously from trig.rs
/// Branchless floor implementation
#[allow(dead_code)]
#[inline(always)]
fn floor(x: f32) -> f32 {
let mut x_trunc = (x as i32) as f32;
x_trunc -= (x < x_trunc) as i32 as f32;
return x_trunc;
}
/// Previously from trig.rs
/// Approximates `cos(x)` in radians with the maximum error of `0.002`
/// https://stackoverflow.com/posts/28050328/revisions
#[allow(dead_code)]
#[inline(always)]
pub fn cos(mut x: f32) -> f32 {
const ALPHA: f32 = 0.5 * std::f32::consts::FRAC_1_PI;
x *= ALPHA;
x -= 0.25_f32 + floor(x + 0.25_f32);
x *= 16.0_f32 * (abs(x) - 0.5_f32);
x += 0.225_f32 * x * (abs(x) - 1.0_f32);
return x;
}
/// Previously from trig.rs
/// Approximates `sin(x)` in radians with the maximum error of `0.002`
#[allow(dead_code)]
#[inline(always)]
pub fn sin(x: f32) -> f32 {
return cos(x - std::f32::consts::FRAC_PI_2);
}

View File

@ -2,6 +2,7 @@ use crate::{
grid::{combine, Grid, PopulationConfig},
palette::{random_palette, Palette},
imgdata::ImgData,
util::wrap,
};
use rand::{seq::SliceRandom, Rng};
@ -13,7 +14,6 @@ use std::time::{Instant};
use rayon::iter::{ParallelIterator,};
use indicatif::{ParallelProgressIterator, ProgressBar, ProgressStyle};
use std::path::Path;
use crate::util::wrap;
/// A single Physarum agent. The x and y positions are continuous, hence we use floating point
/// numbers instead of integers.
@ -39,21 +39,7 @@ impl Agent {
}
}
fn get_sensor_coords(&mut self, x: f32, y: f32, sensor_distance: f32, sensor_angle: f32, angle: f32) -> (f32, f32, f32, f32, f32, f32) {
let xc = x + self.angle.cos() * sensor_distance;
let yc = y + self.angle.sin() * sensor_distance;
let agent_add_sens = angle + sensor_angle;
let agent_sub_sens = angle - sensor_angle;
let xl = x + agent_sub_sens.cos() * sensor_distance;
let yl = y + agent_sub_sens.sin() * sensor_distance;
let xr = x + agent_add_sens.cos() * sensor_distance;
let yr = y + agent_add_sens.sin() * sensor_distance;
return (xc, yc, xl, yl, xr, yr);
}
#[inline]
pub fn tick(&mut self, grid: &Grid) {
let (width, height) = (grid.width, grid.height);
let PopulationConfig {
@ -64,7 +50,16 @@ impl Agent {
..
} = grid.config;
let (xc, yc, xl, yl, xr, yr) = Self::get_sensor_coords(self, self.x, self.y, sensor_distance, sensor_angle, self.angle);
let xc = self.x + fastapprox::faster::cos(self.angle) * sensor_distance;
let yc = self.y + fastapprox::faster::sin(self.angle) * sensor_distance;
let agent_add_sens = self.angle + sensor_angle;
let agent_sub_sens = self.angle - sensor_angle;
let xl = self.x + fastapprox::faster::cos(agent_sub_sens) * sensor_distance;
let yl = self.y + fastapprox::faster::sin(agent_sub_sens) * sensor_distance;
let xr = self.x + fastapprox::faster::cos(agent_add_sens) * sensor_distance;
let yr = self.y + fastapprox::faster::sin(agent_add_sens) * sensor_distance;
// We sense from the buffer because this is where we previously combined data from all the grid.
let center = grid.get_buf(xc, yc);
@ -86,9 +81,10 @@ impl Agent {
}
let delta_angle = rotation_angle * direction;
self.angle = wrap(self.angle + delta_angle, TAU);
self.x = wrap(self.x + step_distance * self.angle.cos(), width as f32);
self.y = wrap(self.y + step_distance * self.angle.sin(), height as f32);
self.x = wrap(self.x + step_distance * fastapprox::faster::cos(self.angle), width as f32);
self.y = wrap(self.y + step_distance * fastapprox::faster::sin(self.angle), height as f32);
}
}
@ -195,7 +191,7 @@ impl Model {
/// Simulates `steps` # of steps
#[inline(always)]
#[inline]
pub fn run(&mut self, steps: usize) {
let debug: bool = false;
@ -221,11 +217,10 @@ impl Model {
// Tick agents
self.agents.par_iter_mut().for_each(|agent| {
let grid = &grids[agent.population_id];
agent.tick(grid);
agent.tick(&grids[agent.population_id]);
});
// Deposit
// Deposit // TODO - Make this parallel
for agent in self.agents.iter() {
self.grids[agent.population_id].deposit(agent.x, agent.y);
}
@ -243,9 +238,7 @@ impl Model {
time_per_agent_list.push(ms_per_agent);
time_per_step_list.push(agents_tick_elapsed);
if debug {
println!("Finished tick for all agents. took {}ms\nTime per agent: {}ms\n", agents_tick_elapsed, ms_per_agent);
}
if debug {println!("Finished tick for all agents. took {}ms\nTime per agent: {}ms\n", agents_tick_elapsed, ms_per_agent)};
self.iteration += 1;
pb.set_position(i as u64);

View File

@ -1,4 +1,4 @@
#[inline(always)]
pub fn wrap(x: f32, max: f32) -> f32 {
x - max * ((x > max) as i32 as f32 - (x < 0.0_f32) as i32 as f32)
}
return x - max * ((x > max) as i32 as f32 - (x < 0.0_f32) as i32 as f32);
}