(bad) opencl/cuda implementation (slower than cpu atm)

This commit is contained in:
Simon Gardling 2021-03-26 19:36:24 +00:00
parent 7965d55de9
commit a479181882
5 changed files with 252 additions and 36 deletions

65
Cargo.lock generated
View File

@ -14,6 +14,21 @@ version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234"
[[package]]
name = "arrayfire"
version = "3.8.0"
source = "git+https://github.com/arrayfire/arrayfire-rust.git#89528902409a849ee92c982d19024789112e510c"
dependencies = [
"half",
"lazy_static",
"libc",
"num",
"rustc_version",
"serde",
"serde_derive",
"serde_json",
]
[[package]]
name = "atty"
version = "0.2.14"
@ -330,7 +345,7 @@ dependencies = [
"gif",
"jpeg-decoder",
"num-iter",
"num-rational",
"num-rational 0.3.2",
"num-traits",
"png",
"scoped_threadpool",
@ -464,6 +479,41 @@ dependencies = [
"autocfg",
]
[[package]]
name = "num"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b8536030f9fea7127f841b45bb6243b27255787fb4eb83958aa1ef9d2fdc0c36"
dependencies = [
"num-bigint",
"num-complex",
"num-integer",
"num-iter",
"num-rational 0.2.4",
"num-traits",
]
[[package]]
name = "num-bigint"
version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "090c7f9998ee0ff65aa5b723e4009f7b217707f1fb5ea551329cc4d6231fb304"
dependencies = [
"autocfg",
"num-integer",
"num-traits",
]
[[package]]
name = "num-complex"
version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6b19411a9719e753aff12e5187b74d60d3dc449ec3f4dc21e3989c3f554bc95"
dependencies = [
"autocfg",
"num-traits",
]
[[package]]
name = "num-integer"
version = "0.1.44"
@ -485,6 +535,18 @@ dependencies = [
"num-traits",
]
[[package]]
name = "num-rational"
version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c000134b5dbf44adc5cb772486d335293351644b801551abe8f75c84cfa4aef"
dependencies = [
"autocfg",
"num-bigint",
"num-integer",
"num-traits",
]
[[package]]
name = "num-rational"
version = "0.3.2"
@ -532,6 +594,7 @@ checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575"
name = "physarum"
version = "0.1.0"
dependencies = [
"arrayfire",
"chrono",
"criterion",
"image",

View File

@ -12,6 +12,8 @@ itertools = "0.10"
rand = "0.8.3"
rand_distr = "0.4"
rayon = "1.5"
arrayfire = {git = "https://github.com/arrayfire/arrayfire-rust.git"}
#arrayfire = "3.8.0"
[dev-dependencies]
criterion = "0.3"

View File

@ -4,27 +4,6 @@ use crate::{
};
use rayon::iter::{ParallelIterator, IntoParallelIterator};
// for file stuff
use std::fs;
use std::io::{BufRead, Write, BufReader};
use std::fs::File;
use std::path::Path;
use std::fs::OpenOptions;
/*
fn get_resumed_primes(file_path: &str) -> Vec<i32> {
let path = Path::new(file_path);
let lines = lines_from_file(path);
let resumed_primes = lines.par_iter().map(|x| {
return str::replace(&str::replace(x, "Invalid: ", ""), "Prime: ", "").parse::<i32>().unwrap();
}).collect();
return resumed_primes;
}
*/
// Class for storing data that will be used to create images
pub struct ImgData {
pub grids: Vec<Grid>,

View File

@ -2,17 +2,25 @@ use chrono::{DateTime, Utc};
use indicatif::{ProgressBar, ProgressStyle};
use physarum::model;
use rand::Rng;
use arrayfire as af;
fn main() {
backend_man();
// af::set_backend(af::Backend::CPU);
af::set_device(0);
af::info();
// let n_iterations = 16384;
let n_iterations = 4096;
let n_iterations = 2024;
// let n_iterations = 10;
// let (width, height) = (512, 512);
let (width, height) = (1024, 1024);
let (width, height) = (512, 512);
// let (width, height) = (1024, 1024);
// let (width, height) = (2048, 2048);
let n_particles = 1 << 22;
// let n_particles = 1 << 22;
let n_particles = 1 << 24;
// let n_particles = 100;
println!("n_particles: {}", n_particles);
let diffusivity = 1;
let mut rng = rand::thread_rng();
@ -26,12 +34,15 @@ fn main() {
.progress_chars("#>-"),
);
let n_populations = 1 + rng.gen_range(1..4);
// let n_populations = 1 + rng.gen_range(1..4);
let n_populations = 2;
let mut model = model::Model::new(width, height, n_particles, n_populations, diffusivity);
model.print_configurations();
// let dims = af::Dim4::new(&[n_particles as u64, 1, 1, 1]);
for i in 0..n_iterations {
model.step();
// model.step_cl(dims);
pb.set_position(i);
}
pb.finish();
@ -41,3 +52,31 @@ fn main() {
model.flush_image_data();
println!("Done!");
}
fn backend_man() {
let available = af::get_available_backends();
if available.contains(&af::Backend::CUDA) {
println!("Evaluating CUDA Backend...");
af::set_backend(af::Backend::CUDA);
println!("There are {} CUDA compute devices", af::device_count());
return;
}
/*
if available.contains(&af::Backend::OPENCL) {
println!("Evaluating OpenCL Backend...");
af::set_backend(af::Backend::OPENCL);
println!("There are {} OpenCL compute devices", af::device_count());
return;
}
*/
if available.contains(&af::Backend::CPU) {
println!("Evaluating CPU Backend...");
af::set_backend(af::Backend::CPU);
println!("There are {} CPU compute devices", af::device_count());
return;
}
}

View File

@ -4,20 +4,15 @@ use crate::{
imgdata::ImgData,
};
use rand::{seq::SliceRandom, Rng};
use rand_distr::{Distribution, Normal};
use rayon::prelude::*;
use itertools::multizip;
use std::f32::consts::TAU;
use std::time::{Duration, Instant};
use rayon::iter::{ParallelIterator, IntoParallelIterator};
use indicatif::{ParallelProgressIterator, ProgressBar, ProgressStyle};
use arrayfire as af;
use std::path::Path;
/// A single Physarum agent. The x and y positions are continuous, hence we use floating point
@ -28,17 +23,19 @@ struct Agent {
y: f32,
angle: f32,
population_id: usize,
i: usize,
}
impl Agent {
/// Construct a new agent with random parameters.
fn new<R: Rng + ?Sized>(width: usize, height: usize, id: usize, rng: &mut R) -> Self {
fn new<R: Rng + ?Sized>(width: usize, height: usize, id: usize, rng: &mut R, i: usize) -> Self {
let (x, y, angle) = rng.gen::<(f32, f32, f32)>();
Agent {
x: x * width as f32,
y: y * height as f32,
angle: angle * TAU,
population_id: id,
i: i,
}
}
@ -59,6 +56,25 @@ impl Agent {
}
}
impl Clone for Agent {
fn clone(&self) -> Agent {
return Agent {
x: self.x,
y: self.y,
angle: self.angle,
population_id: self.population_id,
i: self.i,
}
}
}
impl PartialEq for Agent {
fn eq(&self, other: &Self) -> bool {
return self.x == other.x && self.y == other.y && self.angle == other.angle && self.population_id == other.population_id && self.i == other.i;
}
}
/// Top-level simulation class.
pub struct Model {
// Physarum agents.
@ -127,7 +143,7 @@ impl Model {
Model {
agents: (0..n_particles)
.map(|i| Agent::new(width, height, i / particles_per_grid, &mut rng))
.map(|i| Agent::new(width, height, i / particles_per_grid, &mut rng, i))
.collect(),
grids: (0..n_populations)
.map(|_| Grid::new(width, height, &mut rng))
@ -160,7 +176,7 @@ impl Model {
combine(grids, &self.attraction_table);
// println!("Starting tick for all agents...");
let agents_tick_time = Instant::now();
// let agents_tick_time = Instant::now();
self.agents.par_iter_mut().for_each(|agent| {
let grid = &grids[agent.population_id];
let PopulationConfig {
@ -215,6 +231,123 @@ impl Model {
self.iteration += 1;
}
pub fn step_cl(&mut self, dims: af::Dim4) {
// Combine grids
let grids = &mut self.grids;
combine(grids, &self.attraction_table);
println!("Starting tick for all agents...");
let agents_tick_time = Instant::now();
let agents_list = &*self.agents.clone();
let agent_num: usize = self.agents.len() as usize;
// let dims = af::Dim4::new(&[self.agents.len() as u64, 1, 1, 1]);
let agent_angles_list: Vec<f32> = agents_list.iter().map(|agent| agent.angle).collect();
let agent_x_list: Vec<f32> = agents_list.iter().map(|agent| agent.x).collect();
let agent_y_list: Vec<f32> = agents_list.iter().map(|agent| agent.y).collect();
let mut sensor_distance_list: Vec<f32> = Vec::new();
let mut sensor_angle_list: Vec<f32> = Vec::new();
let mut rotation_angle_list: Vec<f32> = Vec::new();
let mut step_distance_list: Vec<f32> = Vec::new();
for agent in &*self.agents.clone() {
let grid = &grids[agent.population_id];
let PopulationConfig {
sensor_distance,
sensor_angle,
rotation_angle,
step_distance,
..
} = grid.config;
sensor_distance_list.push(sensor_distance);
sensor_angle_list.push(sensor_angle);
rotation_angle_list.push(rotation_angle);
step_distance_list.push(step_distance);
}
let sensor_distance = af::Array::new(&sensor_distance_list, dims);
let sensor_angle = af::Array::new(&sensor_angle_list, dims);
let agent_angles = af::Array::new(&agent_angles_list, dims);
let agent_x = af::Array::new(&agent_x_list, dims);
let agent_y = af::Array::new(&agent_y_list, dims);
let cos_angles = af::cos(&agent_angles);
let sin_angles = af::sin(&agent_angles);
let cos_angle_dis = af::mul(&cos_angles, &sensor_distance, false);
let sin_angle_dis = af::mul(&sin_angles, &sensor_distance, false);
let xc = Self::to_vec(&af::add(&agent_x, &cos_angle_dis, false));
let yc = Self::to_vec(&af::add(&agent_y, &sin_angle_dis, false));
let agent_add_sens = &agent_angles + &sensor_angle;
let agent_sub_sens = &agent_angles - &sensor_angle;
let agent_add_sens_mul = af::mul(&agent_add_sens, &sensor_distance, false);
let agent_sub_sens_mul = af::mul(&agent_sub_sens, &sensor_distance, false);
let xl = Self::to_vec(&af::add(&agent_x, &af::sin(&agent_sub_sens_mul), false));
let yl = Self::to_vec(&af::add(&agent_y, &af::sin(&agent_sub_sens_mul), false));
let xr = Self::to_vec(&af::add(&agent_x, &af::sin(&agent_add_sens_mul), false));
let yr = Self::to_vec(&af::add(&agent_y, &af::sin(&agent_add_sens_mul), false));
self.agents.par_iter_mut().for_each(|agent| {
let i: usize = agent.i;
let rotation_angle = rotation_angle_list[i];
let step_distance = rotation_angle_list[i];
let xc = xc[i];
let xl = xl[i];
let xr = xr[i];
let yc = yc[i];
let yl = yl[i];
let yr = yr[i];
let grid = &grids[agent.population_id];
let (width, height) = (grid.width, grid.height);
let trail_c = grid.get_buf(xc, yc);
let trail_l = grid.get_buf(xl, yl);
let trail_r = grid.get_buf(xr, yr);
let mut rng = rand::thread_rng();
let direction = Model::pick_direction(trail_c, trail_l, trail_r, &mut rng);
agent.rotate_and_move(direction, rotation_angle, step_distance, width, height);
});
// /*
let agents_tick_elapsed = agents_tick_time.elapsed().as_millis();
let ms_per_agent: f64 = (agents_tick_elapsed as f64) / (self.agents.len() as f64);
println!("Finished tick for all agents. took {}ms\nTime peragent: {}ms", agents_tick_time.elapsed().as_millis(), ms_per_agent);
// */
// Deposit
for agent in self.agents.iter() {
self.grids[agent.population_id].deposit(agent.x, agent.y);
}
// Diffuse + Decay
let diffusivity = self.diffusivity;
self.grids.par_iter_mut().for_each(|grid| {
grid.diffuse(diffusivity);
});
self.save_image_data();
self.iteration += 1;
}
fn to_vec<T:af::HasAfEnum+Default+Clone>(array: &af::Array<T>) -> Vec<T> {
let mut vec = vec!(T::default();array.elements());
array.host(&mut vec);
return vec;
}
fn save_image_data(&mut self) {
let grids = self.grids.clone();