+#define BLUR(w, h, pstep, sstep) \
+ for(i=0; i<h; i++) { \
+ int r, g, b; \
+ int rsum = UNPACK_R16(sptr[0]) * (rad + 1); \
+ int gsum = UNPACK_G16(sptr[0]) * (rad + 1); \
+ int bsum = UNPACK_B16(sptr[0]) * (rad + 1); \
+ int count = (rad * 2 + 1) << 8; \
+ int midsize = w - rad * 2; \
+ int rfirstpix = UNPACK_R16(sptr[0]); \
+ int rlastpix = UNPACK_R16(sptr[pstep * (w - 1)]); \
+ int gfirstpix = UNPACK_G16(sptr[0]); \
+ int glastpix = UNPACK_G16(sptr[pstep * (w - 1)]); \
+ int bfirstpix = UNPACK_B16(sptr[0]); \
+ int blastpix = UNPACK_B16(sptr[pstep * (w - 1)]); \
+ /* add up the contributions for the -1 pixel */ \
+ for(j=0; j<rad; j++) { \
+ rsum += UNPACK_R16(sptr[pstep * j]); \
+ gsum += UNPACK_G16(sptr[pstep * j]); \
+ bsum += UNPACK_B16(sptr[pstep * j]); \
+ } \
+ /* first part adding sptr[rad] and subtracting sptr[0] */ \
+ for(j=0; j<=rad; j++) { \
+ rsum += UNPACK_R16((int)sptr[pstep * rad]) - rfirstpix; \
+ gsum += UNPACK_G16((int)sptr[pstep * rad]) - gfirstpix; \
+ bsum += UNPACK_B16((int)sptr[pstep * rad]) - bfirstpix; \
+ sptr += pstep; \
+ r = scale * rsum / count; \
+ g = scale * gsum / count; \
+ b = scale * bsum / count; \
+ *dptr = PACK_RGB16(r, g, b); \
+ dptr += pstep; \
+ } \
+ /* middle part adding sptr[rad] and subtracting sptr[-(rad+1)] */ \
+ for(j=1; j<midsize; j++) { \
+ rsum += UNPACK_R16((int)sptr[pstep * rad]) - UNPACK_R16((int)sptr[-(rad + 1) * pstep]); \
+ gsum += UNPACK_G16((int)sptr[pstep * rad]) - UNPACK_G16((int)sptr[-(rad + 1) * pstep]); \
+ bsum += UNPACK_B16((int)sptr[pstep * rad]) - UNPACK_B16((int)sptr[-(rad + 1) * pstep]); \
+ sptr += pstep; \
+ r = scale * rsum / count; \
+ g = scale * gsum / count; \
+ b = scale * bsum / count; \
+ *dptr = PACK_RGB16(r, g, b); \
+ dptr += pstep; \
+ } \
+ /* last part adding lastpix and subtracting sptr[-(rad+1)] */ \
+ for(j=0; j<rad; j++) { \
+ rsum += rlastpix - UNPACK_R16((int)sptr[-(rad + 1) * pstep]); \
+ gsum += glastpix - UNPACK_G16((int)sptr[-(rad + 1) * pstep]); \
+ bsum += blastpix - UNPACK_B16((int)sptr[-(rad + 1) * pstep]); \
+ sptr += pstep; \
+ r = scale * rsum / count; \
+ g = scale * gsum / count; \
+ b = scale * bsum / count; \
+ *dptr = PACK_RGB16(r, g, b); \
+ dptr += pstep; \
+ } \
+ sptr += sstep; \
+ dptr += sstep; \
+ }
+