physarum/src/blur.rs

use itertools::multizip;
use rayon::prelude::*;

#[derive(Debug)]
pub struct Blur {
    row_buffer: Vec<f32>,
}

impl Clone for Blur {
    fn clone(&self) -> Blur {
        return Blur {
            row_buffer: self.row_buffer.clone(),
        }
    }
}

impl Blur {
    pub fn new(width: usize) -> Self {
        Blur {
            row_buffer: vec![0.0; width],
        }
    }

    /// Blur an image with 2 box filter passes. The result will be written to the src slice, while
    /// the buf slice is used as a scratch space.
    pub fn run(
        &mut self,
        src: &mut [f32],
        buf: &mut [f32],
        width: usize,
        height: usize,
        sigma: f32,
        decay: f32,
    ) {
        let boxes = Blur::boxes_for_gaussian::<2>(sigma);
        self.box_blur(src, buf, width, height, boxes[0], 1.0);
        self.box_blur(src, buf, width, height, boxes[1], decay);
    }

    /// Approximate 1D Gaussian filter of standard deviation sigma with N box filter passes. Each
    /// element in the output array contains the radius of the box filter for the corresponding
    /// pass.
    fn boxes_for_gaussian<const N: usize>(sigma: f32) -> ([usize; N]) {
        let w_ideal = (12.0 * sigma * sigma / N as f32 + 1.0).sqrt();
        let mut w = w_ideal as usize;
        w -= 1 - (w & 1);
        let mut m = 0.25 * (N * (w + 3)) as f32;
        m -= 3.0 * sigma * sigma / (w + 1) as f32;
        let m = m.round() as usize;

        let mut result = [0; N];
        for (i, value) in result.iter_mut().enumerate() {
            *value = (if i < m { w - 1 } else { w + 1 }) / 2;
        }
        result
    }

    /// Perform one pass of the 2D box filter of the given radius. The result will be written to the
    /// src slice, while the buf slice is used as a scratch space.
    fn box_blur(
        &mut self,
        src: &mut [f32],
        buf: &mut [f32],
        width: usize,
        height: usize,
        radius: usize,
        decay: f32,
    ) {
        self.box_blur_h(src, buf, width, radius);
        self.box_blur_v(buf, src, width, height, radius, decay);
    }

    /// Perform one pass of the 1D box filter of the given radius along x axis.
    fn box_blur_h(&mut self, src: &[f32], dst: &mut [f32], width: usize, radius: usize) {
        let weight = 1.0 / (2 * radius + 1) as f32;

        src.par_chunks_exact(width)
            .zip(dst.par_chunks_exact_mut(width))
            .for_each(|(src_row, dst_row)| {
                // First we build a value for the beginning of each row. We assume periodic boundary
                // conditions, so we need to push the left index to the opposite side of the row.
                let mut value = src_row[width - radius - 1];
                for j in 0..radius {
                    value += src_row[width - radius + j] + src_row[j];
                }

                for (i, dst_elem) in dst_row.iter_mut().enumerate() {
                    let left = (i + width - radius - 1) & (width - 1);
                    let right = (i + radius) & (width - 1);
                    value += src_row[right] - src_row[left];
                    *dst_elem = value * weight;
                }
            })
    }

    /// Perform one pass of the 1D box filter of the given radius along y axis. Applies the decay
    /// factor to the destination buffer.
    fn box_blur_v(
        &mut self,
        src: &[f32],
        dst: &mut [f32],
        width: usize,
        height: usize,
        radius: usize,
        decay: f32,
    ) {
        let weight = decay / (2 * radius + 1) as f32;

        // We don't replicate the horizontal filter logic because of the cache-unfriendly memory
        // access patterns of sequential iteration over individual columns. Instead, we iterate over
        // rows via loop interchange.
        let offset = (height - radius - 1) * width;
        self.row_buffer
            .copy_from_slice(&src[offset..offset + width]);

        for j in 0..radius {
            let bottom_off = (height - radius + j) * width;
            let bottom_row = &src[bottom_off..bottom_off + width];
            let top_off = j * width;
            let top_row = &src[top_off..top_off + width];

            for (buf, bottom, top) in multizip((&mut self.row_buffer, bottom_row, top_row)) {
                *buf += bottom + top;
            }
        }

        // The outer loop cannot be parallelized because we need to use the buffer sequentially.
        for (i, dst_row) in dst.chunks_exact_mut(width).enumerate() {
            let bottom_off = ((i + height - radius - 1) & (height - 1)) * width;
            let bottom_row = &src[bottom_off..bottom_off + width];
            let top_off = ((i + radius) & (height - 1)) * width;
            let top_row = &src[top_off..top_off + width];

            for (dst, buf, bottom, top) in
                multizip((dst_row, &mut self.row_buffer, bottom_row, top_row))
            {
                *buf += top - bottom;
                *dst = *buf * weight;
            }
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_blur() {
        // The values from this test were obtained using Python's code:
        // a = np.random.rand(8, 8)
        // ndimage.uniform_filter1d(a, size=3, axis = 1, mode='wrap') # horizontal blur
        // ndimage.uniform_filter1d(a, size=3, axis = 0, mode='wrap') # vertical blur
        // ndimage.uniform_filter(a, size=3, mode='wrap') # 2D blur

        let mut src: Vec<f32> = vec![
            0.32352856, 0.06571674, 0.01939427, 0.06352045, 0.70852702, 0.61722172, 0.16638431,
            0.62840077, 0.55489392, 0.24007676, 0.32500995, 0.08515139, 0.67984092, 0.6975669,
            0.73623422, 0.55053085, 0.69222768, 0.22727048, 0.13594262, 0.10002105, 0.16099514,
            0.07719103, 0.23984282, 0.9083058, 0.64222744, 0.96893419, 0.74662715, 0.71562013,
            0.73654653, 0.70610344, 0.22101117, 0.75572186, 0.69195882, 0.83741404, 0.27583158,
            0.57257051, 0.68160597, 0.39237339, 0.33524343, 0.89396836, 0.60296932, 0.17130112,
            0.1733834, 0.77127819, 0.99537134, 0.91504964, 0.49312109, 0.43035202, 0.70297265,
            0.36734178, 0.4551964, 0.47104315, 0.60374777, 0.73872683, 0.5630592, 0.97440224,
            0.63368284, 0.84109297, 0.24447136, 0.750384, 0.16893725, 0.54225663, 0.43560783,
            0.41497124,
        ];
        let (width, height) = (8, 8);
        let mut dst = vec![0.0; width * height];
        let mut blur = Blur::new(width);

        blur.box_blur_h(&src, &mut dst, width, 1);
        let mut sol: Vec<f32> = vec![
            0.33921536, 0.13621319, 0.04954382, 0.26381392, 0.46308973, 0.49737768, 0.47066893,
            0.37277121, 0.44850051, 0.37332688, 0.21674603, 0.36333409, 0.48751974, 0.70454735,
            0.66144399, 0.61388633, 0.60926799, 0.35181359, 0.15441138, 0.1323196, 0.11273574,
            0.159343, 0.40844655, 0.61345877, 0.78896116, 0.78592959, 0.81039382, 0.73293127,
            0.71942337, 0.55455371, 0.56094549, 0.53965349, 0.80778041, 0.60173482, 0.56193871,
            0.51000269, 0.54884996, 0.46974093, 0.54052839, 0.64039021, 0.40154082, 0.31588461,
            0.37198757, 0.64667764, 0.89389972, 0.80118069, 0.61284092, 0.50881414, 0.68157222,
            0.50850361, 0.43119378, 0.50999577, 0.60450592, 0.63517794, 0.75872942, 0.74681136,
            0.62991568, 0.57308239, 0.61198277, 0.38793087, 0.48719263, 0.38226724, 0.46427857,
            0.49475397,
        ];
        for (v1, v2) in dst.iter().zip(sol) {
            assert!((v1 - v2).abs() < 1e-6);
        }

        blur.box_blur_v(&src, &mut dst, width, height, 1, 1.0);
        sol = vec![
            0.50403511, 0.38229549, 0.19629186, 0.29968528, 0.51910173, 0.61901508, 0.44607546,
            0.53130095, 0.52355005, 0.177688, 0.16011561, 0.08289763, 0.51645436, 0.46399322,
            0.38082045, 0.69574581, 0.62978301, 0.47876048, 0.40252657, 0.30026419, 0.5257942,
            0.49362046, 0.3990294, 0.73818617, 0.67547131, 0.67787291, 0.38613379, 0.46273723,
            0.52638255, 0.39188929, 0.26536581, 0.85266534, 0.64571853, 0.65921645, 0.39861405,
            0.68648961, 0.80450795, 0.67117549, 0.34979189, 0.69334741, 0.66596693, 0.45868565,
            0.30147046, 0.60496395, 0.76024169, 0.68204996, 0.46380791, 0.76624087, 0.6465416,
            0.45991196, 0.29101705, 0.66423511, 0.58935212, 0.73201103, 0.49726271, 0.60657516,
            0.55339468, 0.42471716, 0.23968734, 0.42831587, 0.49373735, 0.63273506, 0.38835045,
            0.67259142,
        ];
        for (v1, v2) in dst.iter().zip(sol) {
            assert!((v1 - v2).abs() < 1e-6);
        }

        blur.box_blur(&mut src, &mut dst, width, height, 1, 1.0);
        sol = vec![
            0.47254385, 0.36087415, 0.29275754, 0.33835963, 0.47926736, 0.52806409, 0.5321305,
            0.49380384, 0.46566129, 0.28711789, 0.14023375, 0.25315587, 0.3544484, 0.45375601,
            0.51351983, 0.5333721, 0.61557655, 0.50369002, 0.39385041, 0.40952832, 0.43989295,
            0.47281469, 0.54361201, 0.58899953, 0.73533652, 0.579826, 0.50891464, 0.45841785,
            0.46033635, 0.39454588, 0.50330681, 0.59783415, 0.66609413, 0.56784968, 0.58144004,
            0.62987053, 0.72072435, 0.60849178, 0.57143827, 0.56295261, 0.63029782, 0.47537435,
            0.45504002, 0.5555587, 0.68241853, 0.63536652, 0.63736624, 0.63200524, 0.57100958,
            0.46582354, 0.47172137, 0.5148681, 0.66186609, 0.60620862, 0.61194964, 0.58345982,
            0.55023442, 0.40593306, 0.36424013, 0.38724685, 0.51826276, 0.50494095, 0.56455897,
            0.53811218,
        ];
        for (v1, v2) in src.iter().zip(sol) {
            assert!((v1 - v2).abs() < 1e-6);
        }
    }

    #[test]
    fn test_boxes_for_gaussian() {
        let boxes = Blur::boxes_for_gaussian::<3>(1.5);
        assert_eq!(boxes, [1, 1, 1]);

        let boxes = Blur::boxes_for_gaussian::<3>(1.8);
        assert_eq!(boxes, [1, 1, 2]);

        let boxes = Blur::boxes_for_gaussian::<3>(2.5);
        assert_eq!(boxes, [2, 2, 2]);
    }
}