use itertools::multizip; use rayon::prelude::*; #[derive(Debug)] pub struct Blur { row_buffer: Vec, } impl Clone for Blur { fn clone(&self) -> Blur { Blur { row_buffer: self.row_buffer.clone(), } } } impl Blur { pub fn new(width: usize) -> Self { Blur { row_buffer: vec![0.0; width], } } // Blur an image with 2 box filter passes. The result will be written to the src slice, while the buf slice is used as a scratch space. pub fn run( &mut self, src: &mut [f32], buf: &mut [f32], width: usize, height: usize, sigma: f32, decay: f32, ) { let boxes = Blur::boxes_for_gaussian::<2>(sigma); self.box_blur(src, buf, width, height, boxes[0], 1.0); self.box_blur(src, buf, width, height, boxes[1], decay); } // Approximate 1D Gaussian filter of standard deviation sigma with N box filter passes. Each element in the output array contains the radius of the box filter for the corresponding pass. fn boxes_for_gaussian(sigma: f32) -> ([usize; N]) { let w_ideal = (12.0 * sigma * sigma / N as f32 + 1.0).sqrt(); let mut w = w_ideal as usize; w -= 1 - (w & 1); let mut m = 0.25 * (N * (w + 3)) as f32; m -= 3.0 * sigma * sigma / (w + 1) as f32; let m = m.round() as usize; let mut result = [0; N]; for (i, value) in result.iter_mut().enumerate() { *value = (if i < m { w - 1 } else { w + 1 }) / 2; } result } // Perform one pass of the 2D box filter of the given radius. The result will be written to the src slice, while the buf slice is used as a scratch space. fn box_blur( &mut self, src: &mut [f32], buf: &mut [f32], width: usize, height: usize, radius: usize, decay: f32, ) { self.box_blur_h(src, buf, width, radius); self.box_blur_v(buf, src, width, height, radius, decay); } // Perform one pass of the 1D box filter of the given radius along x axis. fn box_blur_h(&mut self, src: &[f32], dst: &mut [f32], width: usize, radius: usize) { let weight = 1.0 / (2 * radius + 1) as f32; src.par_chunks_exact(width) .zip(dst.par_chunks_exact_mut(width)) .for_each(|(src_row, dst_row)| { // First we build a value for the beginning of each row. We assume periodic boundary conditions, so we need to push the left index to the opposite side of the row. let width_sub_radius = width - radius; let mut value = src_row[width - radius - 1]; for j in 0..radius { value += src_row[width_sub_radius + j] + src_row[j]; } for (i, dst_elem) in dst_row.iter_mut().enumerate() { let left = (i + width_sub_radius - 1) & (width - 1); let right = (i + radius) & (width - 1); value += src_row[right] - src_row[left]; *dst_elem = value * weight; } }) } // Perform one pass of the 1D box filter of the given radius along y axis. Applies the decay factor to the destination buffer. fn box_blur_v( &mut self, src: &[f32], dst: &mut [f32], width: usize, height: usize, radius: usize, decay: f32, ) { let weight = decay / (2 * radius + 1) as f32; // We don't replicate the horizontal filter logic because of the cache-unfriendly memory access patterns of sequential iteration over individual columns. Instead, we iterate over rows via loop interchange. let height_sub_radius = height - radius; let offset = (height_sub_radius - 1) * width; self.row_buffer .copy_from_slice(&src[offset..offset + width]); for j in 0..radius { let bottom_off = (height_sub_radius + j) * width; let bottom_row = &src[bottom_off..bottom_off + width]; let top_off = j * width; let top_row = &src[top_off..top_off + width]; for (buf, bottom, top) in multizip((&mut self.row_buffer, bottom_row, top_row)) { *buf += bottom + top; } } // The outer loop cannot be parallelized because we need to use the buffer sequentially. for (i, dst_row) in dst.chunks_exact_mut(width).enumerate() { let bottom_off = ((i + height_sub_radius - 1) & (height - 1)) * width; let bottom_row = &src[bottom_off..bottom_off + width]; let top_off = ((i + radius) & (height - 1)) * width; let top_row = &src[top_off..top_off + width]; for (dst, buf, bottom, top) in multizip((dst_row, &mut self.row_buffer, bottom_row, top_row)) { *buf += top - bottom; *dst = *buf * weight; } } } } #[cfg(test)] mod tests { use super::*; #[test] fn test_blur() { // The values from this test were obtained using Python's code: // a = np.random.rand(8, 8) // ndimage.uniform_filter1d(a, size=3, axis = 1, mode='wrap') # horizontal blur // ndimage.uniform_filter1d(a, size=3, axis = 0, mode='wrap') # vertical blur // ndimage.uniform_filter(a, size=3, mode='wrap') # 2D blur let mut src: Vec = vec![ 0.32352856, 0.06571674, 0.01939427, 0.06352045, 0.708_527, 0.617_221_7, 0.16638431, 0.628_400_74, 0.554_893_9, 0.240_076_77, 0.325_009_94, 0.08515139, 0.679_840_9, 0.6975669, 0.736_234_25, 0.55053085, 0.692_227_66, 0.22727048, 0.13594262, 0.10002105, 0.16099514, 0.07719103, 0.23984282, 0.9083058, 0.642_227_4, 0.968_934_2, 0.74662715, 0.715_620_1, 0.736_546_5, 0.70610344, 0.221_011_18, 0.755_721_87, 0.691_958_84, 0.837_414, 0.27583158, 0.572_570_5, 0.681_606, 0.392_373_38, 0.33524343, 0.893_968_34, 0.602_969_35, 0.171_301_13, 0.1733834, 0.771_278_2, 0.99537134, 0.915_049_6, 0.493_121_1, 0.430_352_03, 0.70297265, 0.367_341_8, 0.4551964, 0.471_043_14, 0.603_747_8, 0.738_726_85, 0.5630592, 0.974_402_25, 0.633_682_85, 0.841_092_94, 0.24447136, 0.750384, 0.16893725, 0.542_256_65, 0.435_607_82, 0.414_971_23, ]; let (width, height) = (8, 8); let mut dst = vec![0.0; width * height]; let mut blur = Blur::new(width); blur.box_blur_h(&src, &mut dst, width, 1); let mut sol: Vec = vec![ 0.339_215_37, 0.136_213_18, 0.04954382, 0.263_813_9, 0.46308973, 0.497_377_7, 0.470_668_94, 0.372_771_2, 0.448_500_5, 0.373_326_87, 0.21674603, 0.363_334_1, 0.48751974, 0.70454735, 0.661_444, 0.613_886_36, 0.609_268, 0.351_813_58, 0.15441138, 0.1323196, 0.11273574, 0.159343, 0.40844655, 0.613_458_75, 0.788_961_2, 0.785_929_56, 0.810_393_8, 0.732_931_26, 0.719_423_35, 0.554_553_7, 0.560_945_5, 0.539_653_5, 0.807_780_4, 0.601_734_8, 0.561_938_7, 0.510_002_7, 0.548_849_94, 0.46974093, 0.540_528_4, 0.640_390_2, 0.40154082, 0.315_884_62, 0.371_987_58, 0.646_677_6, 0.893_899_74, 0.801_180_66, 0.612_840_9, 0.508_814_16, 0.681_572_2, 0.508_503_6, 0.431_193_77, 0.509_995_76, 0.604_505_9, 0.635_177_9, 0.758_729_4, 0.746_811_33, 0.629_915_65, 0.573_082_4, 0.611_982_76, 0.38793087, 0.48719263, 0.38226724, 0.464_278_58, 0.494_753_96, ]; for (v1, v2) in dst.iter().zip(sol) { assert!((v1 - v2).abs() < 1e-6); } blur.box_blur_v(&src, &mut dst, width, height, 1, 1.0); sol = vec![ 0.504_035_1, 0.382_295_5, 0.19629186, 0.299_685_27, 0.519_101_74, 0.619_015_1, 0.446_075_47, 0.531_300_96, 0.523_550_03, 0.177688, 0.16011561, 0.08289763, 0.516_454_34, 0.46399322, 0.38082045, 0.695_745_8, 0.629_783_03, 0.47876048, 0.402_526_56, 0.300_264_18, 0.5257942, 0.49362046, 0.3990294, 0.738_186_2, 0.675_471_3, 0.677_872_9, 0.386_133_8, 0.46273723, 0.526_382_57, 0.391_889_3, 0.265_365_8, 0.852_665_36, 0.645_718_5, 0.659_216_46, 0.39861405, 0.686_489_6, 0.804_508, 0.671_175_5, 0.349_791_88, 0.693_347_4, 0.665_966_9, 0.458_685_64, 0.30147046, 0.604_963_96, 0.760_241_7, 0.682_05, 0.463_807_9, 0.766_240_9, 0.6465416, 0.459_911_97, 0.291_017_06, 0.664_235_1, 0.589_352_13, 0.732_011, 0.497_262_72, 0.606_575_13, 0.553_394_7, 0.42471716, 0.23968734, 0.428_315_88, 0.493_737_34, 0.632_735_1, 0.388_350_46, 0.672_591_45, ]; for (v1, v2) in dst.iter().zip(sol) { assert!((v1 - v2).abs() < 1e-6); } blur.box_blur(&mut src, &mut dst, width, height, 1, 1.0); sol = vec![ 0.472_543_84, 0.36087415, 0.29275754, 0.338_359_62, 0.47926736, 0.528_064_1, 0.5321305, 0.493_803_83, 0.465_661_3, 0.287_117_9, 0.140_233_76, 0.253_155_86, 0.3544484, 0.453_756, 0.513_519_8, 0.5333721, 0.615_576_57, 0.503_69, 0.393_850_42, 0.40952832, 0.43989295, 0.472_814_68, 0.543_612, 0.588_999_5, 0.735_336_54, 0.579826, 0.508_914_65, 0.458_417_86, 0.460_336_36, 0.39454588, 0.503_306_8, 0.597_834_17, 0.666_094_1, 0.567_849_7, 0.581_440_03, 0.62987053, 0.720_724_34, 0.608_491_8, 0.571_438_25, 0.562_952_64, 0.630_297_84, 0.475_374_34, 0.455_04, 0.5555587, 0.682_418_5, 0.635_366_5, 0.63736624, 0.632_005_2, 0.571_009_6, 0.465_823_53, 0.471_721_38, 0.5148681, 0.661_866_07, 0.606_208_6, 0.611_949_6, 0.583_459_8, 0.550_234_44, 0.405_933_05, 0.364_240_14, 0.38724685, 0.518_262_74, 0.504_940_9, 0.564_559, 0.538_112_16, ]; for (v1, v2) in src.iter().zip(sol) { assert!((v1 - v2).abs() < 1e-6); } } #[test] fn test_boxes_for_gaussian() { let boxes = Blur::boxes_for_gaussian::<3>(1.5); assert_eq!(boxes, [1, 1, 1]); let boxes = Blur::boxes_for_gaussian::<3>(1.8); assert_eq!(boxes, [1, 1, 2]); let boxes = Blur::boxes_for_gaussian::<3>(2.5); assert_eq!(boxes, [2, 2, 2]); } }