sin/cos optimizations, cleanup, and optimizations
This commit is contained in:
parent
6e2b5c3d2a
commit
f76c9f7401
7
Cargo.lock
generated
7
Cargo.lock
generated
@ -248,6 +248,12 @@ version = "0.3.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
|
||||
|
||||
[[package]]
|
||||
name = "fastapprox"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d0031c93f37b5d18272de2d932ebff6a7eb32d4bc3bab6751a9af42da7d1a424"
|
||||
|
||||
[[package]]
|
||||
name = "getrandom"
|
||||
version = "0.2.2"
|
||||
@ -488,6 +494,7 @@ name = "physarum"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"criterion",
|
||||
"fastapprox",
|
||||
"image",
|
||||
"indicatif",
|
||||
"itertools 0.10.0",
|
||||
|
||||
@ -12,6 +12,7 @@ rand = "0.8.3"
|
||||
rand_distr = "0.4"
|
||||
#rayon = "1.5"
|
||||
rayon = {git = "https://github.com/rayon-rs/rayon.git"}
|
||||
fastapprox = "0.3.0"
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = "0.3.4"
|
||||
|
||||
17
Notes.md
Normal file
17
Notes.md
Normal file
@ -0,0 +1,17 @@
|
||||
## sin and cos optimizations:
|
||||
### Setup/Info:
|
||||
- measured in ms/agent ticked
|
||||
- 2048 iterations
|
||||
- average of 3 trials
|
||||
- 256x256 grid
|
||||
- 1 << 20 particles
|
||||
- 1 population
|
||||
### Results:
|
||||
- normal sin + normal cos:
|
||||
- 0.000018192ms
|
||||
- old sin + old cos:
|
||||
- 0.000019803ms (8.85% slower)
|
||||
- fast_approx::fast::sin + fast_approx::fast::cos
|
||||
- 0.000018658ms (2.56% slower)
|
||||
- fast_approx::faster::sin + fast_approx::faster::cos
|
||||
- 0.000015878ms (14.57% faster)
|
||||
5
TODO.md
5
TODO.md
@ -2,4 +2,7 @@
|
||||
- Auto create a mp4 from generate images
|
||||
- Instead of using the command `ffmpeg -r 20 -i tmp/out_%d.png -vcodec libx265 -crf 25 -s 512x512 test.mp4` maybe use a rust library to do the same (more research needed)
|
||||
- GPU compute
|
||||
- Tried [ArrayFire-rust](https://github.com/arrayfire/arrayfire-rust) didn't work well, looking for another library
|
||||
- Tried [ArrayFire-rust](https://github.com/arrayfire/arrayfire-rust) didn't work well, looking for another library
|
||||
- Try using [emu](https://github.com/calebwin/emu) (seems to be a very good option)
|
||||
- sin and cos optimizations
|
||||
- sin/cos table?
|
||||
@ -3,4 +3,5 @@ mod grid;
|
||||
pub mod model;
|
||||
mod palette;
|
||||
mod util;
|
||||
mod imgdata; // for storing image data
|
||||
mod imgdata; // for storing image data
|
||||
mod math; // for math things
|
||||
@ -2,19 +2,19 @@ use physarum::model;
|
||||
|
||||
fn main() {
|
||||
// # of iterations to go through
|
||||
let n_iterations = 512;
|
||||
let n_iterations = 2048;
|
||||
|
||||
// Size of grid and pictures
|
||||
let (width, height) = (256, 256);
|
||||
|
||||
// # of agents
|
||||
let n_particles = 1 << 24;
|
||||
let n_particles = 1 << 20;
|
||||
println!("n_particles: {}", n_particles);
|
||||
|
||||
let diffusivity = 1;
|
||||
|
||||
// `n_populations` is the # of types of agents
|
||||
let n_populations = 2;
|
||||
let n_populations = 1;
|
||||
// let n_populations = 1 + rng.gen_range(1..4); // make # of populations between 2 and 5
|
||||
|
||||
|
||||
|
||||
44
src/math.rs
Normal file
44
src/math.rs
Normal file
@ -0,0 +1,44 @@
|
||||
#[inline(always)]
|
||||
fn to_radians(x: f32) -> f32 {
|
||||
x * (std::f32::consts::PI / 180.0)
|
||||
}
|
||||
|
||||
/// Previously from trig.rs
|
||||
/// From https://bits.stephan-brumme.com/absFloat.html
|
||||
#[allow(dead_code)]
|
||||
#[inline(always)]
|
||||
fn abs(x: f32) -> f32 {
|
||||
return f32::from_bits(x.to_bits() & 0x7FFF_FFFF);
|
||||
}
|
||||
|
||||
/// Previously from trig.rs
|
||||
/// Branchless floor implementation
|
||||
#[allow(dead_code)]
|
||||
#[inline(always)]
|
||||
fn floor(x: f32) -> f32 {
|
||||
let mut x_trunc = (x as i32) as f32;
|
||||
x_trunc -= (x < x_trunc) as i32 as f32;
|
||||
return x_trunc;
|
||||
}
|
||||
|
||||
/// Previously from trig.rs
|
||||
/// Approximates `cos(x)` in radians with the maximum error of `0.002`
|
||||
/// https://stackoverflow.com/posts/28050328/revisions
|
||||
#[allow(dead_code)]
|
||||
#[inline(always)]
|
||||
pub fn cos(mut x: f32) -> f32 {
|
||||
const ALPHA: f32 = 0.5 * std::f32::consts::FRAC_1_PI;
|
||||
x *= ALPHA;
|
||||
x -= 0.25_f32 + floor(x + 0.25_f32);
|
||||
x *= 16.0_f32 * (abs(x) - 0.5_f32);
|
||||
x += 0.225_f32 * x * (abs(x) - 1.0_f32);
|
||||
return x;
|
||||
}
|
||||
|
||||
/// Previously from trig.rs
|
||||
/// Approximates `sin(x)` in radians with the maximum error of `0.002`
|
||||
#[allow(dead_code)]
|
||||
#[inline(always)]
|
||||
pub fn sin(x: f32) -> f32 {
|
||||
return cos(x - std::f32::consts::FRAC_PI_2);
|
||||
}
|
||||
45
src/model.rs
45
src/model.rs
@ -2,6 +2,7 @@ use crate::{
|
||||
grid::{combine, Grid, PopulationConfig},
|
||||
palette::{random_palette, Palette},
|
||||
imgdata::ImgData,
|
||||
util::wrap,
|
||||
};
|
||||
|
||||
use rand::{seq::SliceRandom, Rng};
|
||||
@ -13,7 +14,6 @@ use std::time::{Instant};
|
||||
use rayon::iter::{ParallelIterator,};
|
||||
use indicatif::{ParallelProgressIterator, ProgressBar, ProgressStyle};
|
||||
use std::path::Path;
|
||||
use crate::util::wrap;
|
||||
|
||||
/// A single Physarum agent. The x and y positions are continuous, hence we use floating point
|
||||
/// numbers instead of integers.
|
||||
@ -39,21 +39,7 @@ impl Agent {
|
||||
}
|
||||
}
|
||||
|
||||
fn get_sensor_coords(&mut self, x: f32, y: f32, sensor_distance: f32, sensor_angle: f32, angle: f32) -> (f32, f32, f32, f32, f32, f32) {
|
||||
let xc = x + self.angle.cos() * sensor_distance;
|
||||
let yc = y + self.angle.sin() * sensor_distance;
|
||||
|
||||
let agent_add_sens = angle + sensor_angle;
|
||||
let agent_sub_sens = angle - sensor_angle;
|
||||
|
||||
let xl = x + agent_sub_sens.cos() * sensor_distance;
|
||||
let yl = y + agent_sub_sens.sin() * sensor_distance;
|
||||
let xr = x + agent_add_sens.cos() * sensor_distance;
|
||||
let yr = y + agent_add_sens.sin() * sensor_distance;
|
||||
|
||||
return (xc, yc, xl, yl, xr, yr);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn tick(&mut self, grid: &Grid) {
|
||||
let (width, height) = (grid.width, grid.height);
|
||||
let PopulationConfig {
|
||||
@ -64,7 +50,16 @@ impl Agent {
|
||||
..
|
||||
} = grid.config;
|
||||
|
||||
let (xc, yc, xl, yl, xr, yr) = Self::get_sensor_coords(self, self.x, self.y, sensor_distance, sensor_angle, self.angle);
|
||||
let xc = self.x + fastapprox::faster::cos(self.angle) * sensor_distance;
|
||||
let yc = self.y + fastapprox::faster::sin(self.angle) * sensor_distance;
|
||||
|
||||
let agent_add_sens = self.angle + sensor_angle;
|
||||
let agent_sub_sens = self.angle - sensor_angle;
|
||||
|
||||
let xl = self.x + fastapprox::faster::cos(agent_sub_sens) * sensor_distance;
|
||||
let yl = self.y + fastapprox::faster::sin(agent_sub_sens) * sensor_distance;
|
||||
let xr = self.x + fastapprox::faster::cos(agent_add_sens) * sensor_distance;
|
||||
let yr = self.y + fastapprox::faster::sin(agent_add_sens) * sensor_distance;
|
||||
|
||||
// We sense from the buffer because this is where we previously combined data from all the grid.
|
||||
let center = grid.get_buf(xc, yc);
|
||||
@ -86,9 +81,10 @@ impl Agent {
|
||||
}
|
||||
|
||||
let delta_angle = rotation_angle * direction;
|
||||
|
||||
self.angle = wrap(self.angle + delta_angle, TAU);
|
||||
self.x = wrap(self.x + step_distance * self.angle.cos(), width as f32);
|
||||
self.y = wrap(self.y + step_distance * self.angle.sin(), height as f32);
|
||||
self.x = wrap(self.x + step_distance * fastapprox::faster::cos(self.angle), width as f32);
|
||||
self.y = wrap(self.y + step_distance * fastapprox::faster::sin(self.angle), height as f32);
|
||||
}
|
||||
}
|
||||
|
||||
@ -195,7 +191,7 @@ impl Model {
|
||||
|
||||
|
||||
/// Simulates `steps` # of steps
|
||||
#[inline(always)]
|
||||
#[inline]
|
||||
pub fn run(&mut self, steps: usize) {
|
||||
let debug: bool = false;
|
||||
|
||||
@ -221,11 +217,10 @@ impl Model {
|
||||
|
||||
// Tick agents
|
||||
self.agents.par_iter_mut().for_each(|agent| {
|
||||
let grid = &grids[agent.population_id];
|
||||
agent.tick(grid);
|
||||
agent.tick(&grids[agent.population_id]);
|
||||
});
|
||||
|
||||
// Deposit
|
||||
// Deposit // TODO - Make this parallel
|
||||
for agent in self.agents.iter() {
|
||||
self.grids[agent.population_id].deposit(agent.x, agent.y);
|
||||
}
|
||||
@ -243,9 +238,7 @@ impl Model {
|
||||
time_per_agent_list.push(ms_per_agent);
|
||||
time_per_step_list.push(agents_tick_elapsed);
|
||||
|
||||
if debug {
|
||||
println!("Finished tick for all agents. took {}ms\nTime per agent: {}ms\n", agents_tick_elapsed, ms_per_agent);
|
||||
}
|
||||
if debug {println!("Finished tick for all agents. took {}ms\nTime per agent: {}ms\n", agents_tick_elapsed, ms_per_agent)};
|
||||
|
||||
self.iteration += 1;
|
||||
pb.set_position(i as u64);
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
#[inline(always)]
|
||||
pub fn wrap(x: f32, max: f32) -> f32 {
|
||||
x - max * ((x > max) as i32 as f32 - (x < 0.0_f32) as i32 as f32)
|
||||
}
|
||||
return x - max * ((x > max) as i32 as f32 - (x < 0.0_f32) as i32 as f32);
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user