physarum/src/blur.rs

use itertools::multizip;
use rayon::prelude::*;

#[derive(Debug)]
pub struct Blur {
    row_buffer: Vec<f32>,
}

impl Clone for Blur {
    fn clone(&self) -> Blur {
        Blur {
            row_buffer: self.row_buffer.clone(),
        }
    }
}

impl Blur {
    pub fn new(width: usize) -> Self {
        Blur {
            row_buffer: vec![0.0; width],
        }
    }

    // Blur an image with 2 box filter passes. The result will be written to the src slice, while the buf slice is used as a scratch space.
    pub fn run(
        &mut self,
        src: &mut [f32],
        buf: &mut [f32],
        width: usize,
        height: usize,
        sigma: f32,
        decay: f32,
    ) {
        let boxes = Blur::boxes_for_gaussian::<2>(sigma);
        self.box_blur(src, buf, width, height, boxes[0], 1.0);
        self.box_blur(src, buf, width, height, boxes[1], decay);
    }

    // Approximate 1D Gaussian filter of standard deviation sigma with N box filter passes. Each element in the output array contains the radius of the box filter for the corresponding pass.
    fn boxes_for_gaussian<const N: usize>(sigma: f32) -> ([usize; N]) {
        let w_ideal = (12.0 * sigma * sigma / N as f32 + 1.0).sqrt();
        let mut w = w_ideal as usize;
        w -= 1 - (w & 1);
        let mut m = 0.25 * (N * (w + 3)) as f32;
        m -= 3.0 * sigma * sigma / (w + 1) as f32;
        let m = m.round() as usize;

        let mut result = [0; N];
        for (i, value) in result.iter_mut().enumerate() {
            *value = (if i < m { w - 1 } else { w + 1 }) / 2;
        }
        result
    }

    // Perform one pass of the 2D box filter of the given radius. The result will be written to the src slice, while the buf slice is used as a scratch space.
    fn box_blur(
        &mut self,
        src: &mut [f32],
        buf: &mut [f32],
        width: usize,
        height: usize,
        radius: usize,
        decay: f32,
    ) {
        self.box_blur_h(src, buf, width, radius);
        self.box_blur_v(buf, src, width, height, radius, decay);
    }

    // Perform one pass of the 1D box filter of the given radius along x axis.
    fn box_blur_h(&mut self, src: &[f32], dst: &mut [f32], width: usize, radius: usize) {
        let weight = 1.0 / (2 * radius + 1) as f32;

        src.par_chunks_exact(width)
            .zip(dst.par_chunks_exact_mut(width))
            .for_each(|(src_row, dst_row)| {
                // First we build a value for the beginning of each row. We assume periodic boundary conditions, so we need to push the left index to the opposite side of the row.
                let width_sub_radius = width - radius;
                let mut value = src_row[width - radius - 1];
                for j in 0..radius {
                    value += src_row[width_sub_radius + j] + src_row[j];
                }

                for (i, dst_elem) in dst_row.iter_mut().enumerate() {
                    let left = (i + width_sub_radius - 1) & (width - 1);
                    let right = (i + radius) & (width - 1);
                    value += src_row[right] - src_row[left];
                    *dst_elem = value * weight;
                }
            })
    }

    // Perform one pass of the 1D box filter of the given radius along y axis. Applies the decay factor to the destination buffer.
    fn box_blur_v(
        &mut self,
        src: &[f32],
        dst: &mut [f32],
        width: usize,
        height: usize,
        radius: usize,
        decay: f32,
    ) {
        let weight = decay / (2 * radius + 1) as f32;

        // We don't replicate the horizontal filter logic because of the cache-unfriendly memory  access patterns of sequential iteration over individual columns. Instead, we iterate over rows via loop interchange.
        let height_sub_radius = height - radius;
        let offset = (height_sub_radius - 1) * width;
        self.row_buffer
            .copy_from_slice(&src[offset..offset + width]);

        for j in 0..radius {
            let bottom_off = (height_sub_radius + j) * width;
            let bottom_row = &src[bottom_off..bottom_off + width];
            let top_off = j * width;
            let top_row = &src[top_off..top_off + width];

            for (buf, bottom, top) in multizip((&mut self.row_buffer, bottom_row, top_row)) {
                *buf += bottom + top;
            }
        }

        // The outer loop cannot be parallelized because we need to use the buffer sequentially.
        for (i, dst_row) in dst.chunks_exact_mut(width).enumerate() {
            let bottom_off = ((i + height_sub_radius - 1) & (height - 1)) * width;
            let bottom_row = &src[bottom_off..bottom_off + width];
            let top_off = ((i + radius) & (height - 1)) * width;
            let top_row = &src[top_off..top_off + width];

            for (dst, buf, bottom, top) in
                multizip((dst_row, &mut self.row_buffer, bottom_row, top_row))
            {
                *buf += top - bottom;
                *dst = *buf * weight;
            }
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_blur() {
        // The values from this test were obtained using Python's code:
        // a = np.random.rand(8, 8)
        // ndimage.uniform_filter1d(a, size=3, axis = 1, mode='wrap') # horizontal blur
        // ndimage.uniform_filter1d(a, size=3, axis = 0, mode='wrap') # vertical blur
        // ndimage.uniform_filter(a, size=3, mode='wrap') # 2D blur

        let mut src: Vec<f32> = vec![
            0.32352856, 0.06571674, 0.01939427, 0.06352045, 0.708_527, 0.617_221_7, 0.16638431,
            0.628_400_74, 0.554_893_9, 0.240_076_77, 0.325_009_94, 0.08515139, 0.679_840_9, 0.6975669,
            0.736_234_25, 0.55053085, 0.692_227_66, 0.22727048, 0.13594262, 0.10002105, 0.16099514,
            0.07719103, 0.23984282, 0.9083058, 0.642_227_4, 0.968_934_2, 0.74662715, 0.715_620_1,
            0.736_546_5, 0.70610344, 0.221_011_18, 0.755_721_87, 0.691_958_84, 0.837_414, 0.27583158,
            0.572_570_5, 0.681_606, 0.392_373_38, 0.33524343, 0.893_968_34, 0.602_969_35, 0.171_301_13,
            0.1733834, 0.771_278_2, 0.99537134, 0.915_049_6, 0.493_121_1, 0.430_352_03, 0.70297265,
            0.367_341_8, 0.4551964, 0.471_043_14, 0.603_747_8, 0.738_726_85, 0.5630592, 0.974_402_25,
            0.633_682_85, 0.841_092_94, 0.24447136, 0.750384, 0.16893725, 0.542_256_65, 0.435_607_82,
            0.414_971_23,
        ];
        let (width, height) = (8, 8);
        let mut dst = vec![0.0; width * height];
        let mut blur = Blur::new(width);

        blur.box_blur_h(&src, &mut dst, width, 1);
        let mut sol: Vec<f32> = vec![
            0.339_215_37, 0.136_213_18, 0.04954382, 0.263_813_9, 0.46308973, 0.497_377_7, 0.470_668_94,
            0.372_771_2, 0.448_500_5, 0.373_326_87, 0.21674603, 0.363_334_1, 0.48751974, 0.70454735,
            0.661_444, 0.613_886_36, 0.609_268, 0.351_813_58, 0.15441138, 0.1323196, 0.11273574,
            0.159343, 0.40844655, 0.613_458_75, 0.788_961_2, 0.785_929_56, 0.810_393_8, 0.732_931_26,
            0.719_423_35, 0.554_553_7, 0.560_945_5, 0.539_653_5, 0.807_780_4, 0.601_734_8, 0.561_938_7,
            0.510_002_7, 0.548_849_94, 0.46974093, 0.540_528_4, 0.640_390_2, 0.40154082, 0.315_884_62,
            0.371_987_58, 0.646_677_6, 0.893_899_74, 0.801_180_66, 0.612_840_9, 0.508_814_16, 0.681_572_2,
            0.508_503_6, 0.431_193_77, 0.509_995_76, 0.604_505_9, 0.635_177_9, 0.758_729_4, 0.746_811_33,
            0.629_915_65, 0.573_082_4, 0.611_982_76, 0.38793087, 0.48719263, 0.38226724, 0.464_278_58,
            0.494_753_96,
        ];
        for (v1, v2) in dst.iter().zip(sol) {
            assert!((v1 - v2).abs() < 1e-6);
        }

        blur.box_blur_v(&src, &mut dst, width, height, 1, 1.0);
        sol = vec![
            0.504_035_1, 0.382_295_5, 0.19629186, 0.299_685_27, 0.519_101_74, 0.619_015_1, 0.446_075_47,
            0.531_300_96, 0.523_550_03, 0.177688, 0.16011561, 0.08289763, 0.516_454_34, 0.46399322,
            0.38082045, 0.695_745_8, 0.629_783_03, 0.47876048, 0.402_526_56, 0.300_264_18, 0.5257942,
            0.49362046, 0.3990294, 0.738_186_2, 0.675_471_3, 0.677_872_9, 0.386_133_8, 0.46273723,
            0.526_382_57, 0.391_889_3, 0.265_365_8, 0.852_665_36, 0.645_718_5, 0.659_216_46, 0.39861405,
            0.686_489_6, 0.804_508, 0.671_175_5, 0.349_791_88, 0.693_347_4, 0.665_966_9, 0.458_685_64,
            0.30147046, 0.604_963_96, 0.760_241_7, 0.682_05, 0.463_807_9, 0.766_240_9, 0.6465416,
            0.459_911_97, 0.291_017_06, 0.664_235_1, 0.589_352_13, 0.732_011, 0.497_262_72, 0.606_575_13,
            0.553_394_7, 0.42471716, 0.23968734, 0.428_315_88, 0.493_737_34, 0.632_735_1, 0.388_350_46,
            0.672_591_45,
        ];
        for (v1, v2) in dst.iter().zip(sol) {
            assert!((v1 - v2).abs() < 1e-6);
        }

        blur.box_blur(&mut src, &mut dst, width, height, 1, 1.0);
        sol = vec![
            0.472_543_84, 0.36087415, 0.29275754, 0.338_359_62, 0.47926736, 0.528_064_1, 0.5321305,
            0.493_803_83, 0.465_661_3, 0.287_117_9, 0.140_233_76, 0.253_155_86, 0.3544484, 0.453_756,
            0.513_519_8, 0.5333721, 0.615_576_57, 0.503_69, 0.393_850_42, 0.40952832, 0.43989295,
            0.472_814_68, 0.543_612, 0.588_999_5, 0.735_336_54, 0.579826, 0.508_914_65, 0.458_417_86,
            0.460_336_36, 0.39454588, 0.503_306_8, 0.597_834_17, 0.666_094_1, 0.567_849_7, 0.581_440_03,
            0.62987053, 0.720_724_34, 0.608_491_8, 0.571_438_25, 0.562_952_64, 0.630_297_84, 0.475_374_34,
            0.455_04, 0.5555587, 0.682_418_5, 0.635_366_5, 0.63736624, 0.632_005_2, 0.571_009_6,
            0.465_823_53, 0.471_721_38, 0.5148681, 0.661_866_07, 0.606_208_6, 0.611_949_6, 0.583_459_8,
            0.550_234_44, 0.405_933_05, 0.364_240_14, 0.38724685, 0.518_262_74, 0.504_940_9, 0.564_559,
            0.538_112_16,
        ];
        for (v1, v2) in src.iter().zip(sol) {
            assert!((v1 - v2).abs() < 1e-6);
        }
    }

    #[test]
    fn test_boxes_for_gaussian() {
        let boxes = Blur::boxes_for_gaussian::<3>(1.5);
        assert_eq!(boxes, [1, 1, 1]);

        let boxes = Blur::boxes_for_gaussian::<3>(1.8);
        assert_eq!(boxes, [1, 1, 2]);

        let boxes = Blur::boxes_for_gaussian::<3>(2.5);
        assert_eq!(boxes, [2, 2, 2]);
    }
}