/************************************************************************** * * * This code has been developed by John Funnell. This software is an * * implementation of a part of one or more MPEG-4 Video tools as * * specified in ISO/IEC 14496-2 standard. Those intending to use this * * software module in hardware or software products are advised that its * * use may infringe existing patents or copyrights, and any such use * * would be at such party's own risk. The original developer of this * * software module and his/her company, and subsequent editors and their * * companies (including Project Mayo), will have no liability for use of * * this software or modifications or derivatives thereof. * * * * Project Mayo gives users of the Codec a license to this software * * module or modifications thereof for use in hardware or software * * products claiming conformance to the MPEG-4 Video Standard as * * described in the Open DivX license. * * * * The complete Open DivX license can be found at * * http://www.projectmayo.com/opendivx/license.php * * * **************************************************************************/ /** * Copyright (C) 2001 - Project Mayo * * John Funnell * * DivX Advanced Research Center **/ // postprocess.c // /* Currently this contains only the deblocking filter. The vertical */ /* deblocking filter operates over eight pixel-wide columns at once. The */ /* horizontal deblocking filter works on four horizontals row at a time. */ #include "postprocess.h" #define ABS(a) ( (a)>0 ? (a) : -(a) ) #define SIGN(a) ( (a)<0 ? -1 : 1 ) #define MIN(a, b) ( (a)<(b) ? (a) : (b) ) #define MAX(a, b) ( (a)>(b) ? (a) : (b) ) /***** H O R I Z O N T A L D E B L O C K I N G F I L T E R *****/ /* decide DC mode or default mode for the horizontal filter */ static inline int deblock_horiz_useDC(uint8_t *v, int stride) { register int eq_cnt=0; int x=0, y; for (y=0; y<4; y++) { register unsigned int a; a=v[1+x]-v[2+x]+1;if(a<3) eq_cnt++; a=v[2+x]-v[3+x]+1;if(a<3) eq_cnt++; a=v[3+x]-v[4+x]+1;if(a<3) eq_cnt++; a=v[4+x]-v[5+x]+1;if(a<3) eq_cnt++; a=v[5+x]-v[6+x]+1;if(a<3) eq_cnt++; a=v[6+x]-v[7+x]+1;if(a<3) eq_cnt++; a=v[7+x]-v[8+x]+1;if(a<3) eq_cnt++; x+=stride; } return (eq_cnt >= DEBLOCK_HORIZ_USEDC_THR); } /* decide whether the DC filter should be turned on accoding to QP */ static inline int deblock_horiz_DC_on(uint8_t *v, int stride, int QP) { /* 99% of the time, this test turns out the same as the |max-min| strategy in the standard */ return (ABS(v[1]-v[8]) < 2*QP); } /* The 9-tap low pass filter used in "DC" regions */ static inline void deblock_horiz_lpf9(uint8_t *v, int stride, int QP) { int y; // putchar('D'); for (y=0; y<4; y++) { register int psum; register uint8_t *vv; uint8_t vnew[8]; int x=y*stride; int p1 = (ABS(v[0+x]-v[1+x]) < QP ) ? v[0+x] : v[1+x]; int p2 = (ABS(v[8+x]-v[9+x]) < QP ) ? v[9+x] : v[8+x]; /* C implementation of horizontal LPF */ vv = &(v[x]); psum = p1 + p1 + p1 + vv[1] + vv[2] + vv[3] + vv[4] + 4; vnew[0] = (((psum + vv[1]) << 1) - (vv[4] - vv[5])) >> 4; psum += vv[5] - p1; vnew[1] = (((psum + vv[2]) << 1) - (vv[5] - vv[6])) >> 4; psum += vv[6] - p1; vnew[2] = (((psum + vv[3]) << 1) - (vv[6] - vv[7])) >> 4; psum += vv[7] - p1; vnew[3] = (((psum + vv[4]) << 1) + p1 - vv[1] - (vv[7] - vv[8])) >> 4; psum += vv[8] - vv[1]; vnew[4] = (((psum + vv[5]) << 1) + (vv[1] - vv[2]) - vv[8] + p2) >> 4; psum += p2 - vv[2]; vnew[5] = (((psum + vv[6]) << 1) + (vv[2] - vv[3])) >> 4; psum += p2 - vv[3]; vnew[6] = (((psum + vv[7]) << 1) + (vv[3] - vv[4])) >> 4; psum += p2 - vv[4]; vnew[7] = (((psum + vv[8]) << 1) + (vv[4] - vv[5])) >> 4; // for (x=1; x<=8; x++) vv[x] = vnew[x]; *((int*)&vv[1])=*((int*)&vnew[0]); *((int*)&vv[5])=*((int*)&vnew[4]); } } /* horizontal deblocking filter used in default (non-DC) mode */ static inline void deblock_horiz_default_filter(uint8_t *v, int stride, int QP) { int y; // putchar('A'); for (y=0; y<4; y++) { int q1 = v[4] - v[5]; int q = q1 / 2; if (q) { int a3_0 = 2*(v[3]-v[6]) - 5*q1; /* apply the 'delta' function first and check there is a difference to avoid wasting time */ if (ABS(a3_0) < 8*QP) { int a3_1 = 2*(v[1]-v[4]) + 5*(v[3]-v[2]); int a3_2 = 2*(v[5]-v[8]) + 5*(v[7]-v[8]); int d = ABS(a3_0) - MIN(ABS(a3_1), ABS(a3_2)); if (d > 0) { /* energy across boundary is greater than in one or both of the blocks */ d += d<<2; d = (d + 32) >> 6; if (d > 0) { d *= SIGN(-a3_0); /* clip d in the range 0 ... q */ if (q > 0) { d = d<0 ? 0 : d; d = d>q ? q : d; } else { d = d>0 ? 0 : d; d = d DEBLOCK_VERT_USEDC_THR); } /* decide whether the DC filter should be turned on accoding to QP */ static inline int deblock_vert_DC_on(uint8_t *v, int stride, int QP) { int DC_on, x; /* C implementation of vertical DC_on */ DC_on = 1; for (x=0; x<8; x++) { if (ABS(v[x+1*stride]-v[x+8*stride]) > 2 *QP) DC_on = 0; } return DC_on; } /* Vertical 9-tap low-pass filter for use in "DC" regions of the picture */ void deblock_vert_lpf9(uint64_t *v_local, uint64_t *p1p2, uint8_t *v, int stride, int QP) { int x, y; int p1, p2, psum; uint8_t *vv, vnew[9]; /* define semi-constants to enable us to move up and down the picture easily... */ int l1 = 1 * stride; int l2 = 2 * stride; int l3 = 3 * stride; int l4 = 4 * stride; int l5 = 5 * stride; int l6 = 6 * stride; int l7 = 7 * stride; int l8 = 8 * stride; /* simple C implementation of vertical default filter */ for (x=0; x<8; x++) { /* loop left->right */ vv = &(v[x]); p1 = (ABS(vv[0*stride]-vv[1*stride]) < QP ) ? vv[0*stride] : vv[1*stride]; p2 = (ABS(vv[8*stride]-vv[9*stride]) < QP ) ? vv[9*stride] : vv[8*stride]; /* the above may well be endian-fussy */ psum = p1 + p1 + p1 + vv[l1] + vv[l2] + vv[l3] + vv[l4] + 4; vnew[1] = (((psum + vv[l1]) << 1) - (vv[l4] - vv[l5])) >> 4; psum += vv[l5] - p1; vnew[2] = (((psum + vv[l2]) << 1) - (vv[l5] - vv[l6])) >> 4; psum += vv[l6] - p1; vnew[3] = (((psum + vv[l3]) << 1) - (vv[l6] - vv[l7])) >> 4; psum += vv[l7] - p1; vnew[4] = (((psum + vv[l4]) << 1) + p1 - vv[l1] - (vv[l7] - vv[l8])) >> 4; psum += vv[l8] - vv[l1]; vnew[5] = (((psum + vv[l5]) << 1) + (vv[l1] - vv[l2]) - vv[l8] + p2) >> 4; psum += p2 - vv[l2]; vnew[6] = (((psum + vv[l6]) << 1) + (vv[l2] - vv[l3])) >> 4; psum += p2 - vv[l3]; vnew[7] = (((psum + vv[l7]) << 1) + (vv[l3] - vv[l4])) >> 4; psum += p2 - vv[l4]; vnew[8] = (((psum + vv[l8]) << 1) + (vv[l4] - vv[l5])) >> 4; for (y=1; y<=8; y++) { vv[y*stride] = vnew[y]; } } } /* Vertical deblocking filter for use in non-flat picture regions */ static void deblock_vert_default_filter(uint8_t *v, int stride, int QP) { int x; /* define semi-constants to enable us to move up and down the picture easily... */ int l1 = 1 * stride; int l2 = 2 * stride; int l3 = 3 * stride; int l4 = 4 * stride; int l5 = 5 * stride; int l6 = 6 * stride; int l7 = 7 * stride; int l8 = 8 * stride; /* simple C implementation of vertical default filter */ for (x=0; x<8; x++) { int a3_0 = 2*v[l3+x] - 5*v[l4+x] + 5*v[l5+x] - 2*v[l6+x]; int a3_1 = 2*v[l1+x] - 5*v[l2+x] + 5*v[l3+x] - 2*v[l4+x]; int a3_2 = 2*v[l5+x] - 5*v[l6+x] + 5*v[l7+x] - 2*v[l8+x]; int q = (v[l4+x] - v[l5+x]) / 2; if (ABS(a3_0) < 8*QP) { register int d = ABS(a3_0) - MIN(ABS(a3_1), ABS(a3_2)); if (d < 0) d=0; d = (5*d + 32) >> 6; d *= SIGN(-a3_0); //printf("d[%d] preclip=%d\n", x, d); /* clip d in the range 0 ... q */ if (q > 0) { d = d<0 ? 0 : d; d = d>q ? q : d; } else { d = d>0 ? 0 : d; d = d max ? b8x8[stride*v + h] : max; } } /* Threshold detirmination - compute threshold and dynamic range */ thr = (max + min + 1) / 2; range = max - min; /* Threshold rearrangement not implemented yet */ /* Index aquisition */ for (j=0; j<10; j++) { indicesP[j] = 0; for (i=0; i<10; i++) { if (b10x10[j*stride+i] >= thr) indicesP[j] |= (2 << i); } indicesN[j] = ~indicesP[j]; } /* Adaptive filtering */ /* need to identify 3x3 blocks of '1's in indicesP and indicesN */ for (j=0; j<10; j++) { indicesP[j] = (indicesP[j]<<1) & indicesP[j] & (indicesP[j]>>1); indicesN[j] = (indicesN[j]<<1) & indicesN[j] & (indicesN[j]>>1); } for (j=1; j<9; j++) { indices3x3[j-1] = indicesP[j-1] & indicesP[j] & indicesP[j+1]; indices3x3[j-1] |= indicesN[j-1] & indicesN[j] & indicesN[j+1]; } for (v=0; v<8; v++) { sr = 4; for (h=0; h<8; h++) { if (indices3x3[v] & sr) { b8x8filtered[8*v + h] = ( 8 + 1 * b10x10[stride*(v+0) + (h+0)] + 2 * b10x10[stride*(v+0) + (h+1)] + 1 * b10x10[stride*(v+0) + (h+2)] + 2 * b10x10[stride*(v+1) + (h+0)] + 4 * b10x10[stride*(v+1) + (h+1)] + 2 * b10x10[stride*(v+1) + (h+2)] + 1 * b10x10[stride*(v+2) + (h+0)] + 2 * b10x10[stride*(v+2) + (h+1)] + 1 * b10x10[stride*(v+2) + (h+2)] ) / 16; } sr <<= 1; } } /* Clipping */ max_diff = QP/2; for (v=0; v<8; v++) { sr = 4; for (h=0; h<8; h++) { if (indices3x3[v] & sr) { if (b8x8filtered[8*v + h] - b8x8[stride*v + h] > max_diff) { b8x8[stride*v + h] = b8x8[stride*v + h] + max_diff; } else if (b8x8filtered[8*v + h] - b8x8[stride*v + h] < -max_diff) { b8x8[stride*v + h] = b8x8[stride*v + h] - max_diff; } else { b8x8[stride*v + h] = b8x8filtered[8*v + h]; } } sr <<= 1; } } } // } } /* This function is more or less what Andrea wanted: */ void postprocess_orig(unsigned char * src[], int src_stride, unsigned char * dst[], int dst_stride, int horizontal_size, int vertical_size, QP_STORE_T *QP_store, int QP_stride, int mode) { uint8_t *Y, *U, *V; int x, y; if (!(mode & PP_DONT_COPY)) { /* First copy source to destination... */ /* luma */ for (y=0; y>4)*QP_stride]); deblock_horiz(puc_flt, horizontal_size, 4, dst_stride, QP_ptr, QP_stride, 0); } if (mode & PP_DEBLOCK_Y_V) { if ( ((y&7)) && ((y-4)>=8) ) { puc_flt = &((dst[0])[(y-4)*dst_stride]); QP_ptr = &(QP_store[((y-4)>>4)*QP_stride]); deblock_vert( puc_flt, horizontal_size, 4, dst_stride, QP_ptr, QP_stride, 0); } } if (mode & PP_DERING_Y) { if ( (y%8) && (y-4)>5 ) { puc_flt = &((dst[0])[y*dst_stride]); QP_ptr = &(QP_store[(y>>4)*QP_stride]); dering( puc_flt, horizontal_size, 4, dst_stride, QP_ptr, QP_stride, 0); } // dering( puc_flt, horizontal_size, 4, dst_stride, QP_ptr, QP_stride, 0); } } /* for loop */ /* now we're going to do U and V assuming 4:2:0 */ horizontal_size >>= 1; vertical_size >>= 1; src_stride >>= 1; dst_stride >>= 1; /* loop U then V */ for (i=1; i<=2; i++) { for (y=0; y>3)*QP_stride]); deblock_horiz(puc_flt, horizontal_size, 4, dst_stride, QP_ptr, QP_stride, 1); } if (mode & PP_DEBLOCK_C_V) { if ( ((y&7)) && ((y-4)>=8) ) { puc_flt = &((dst[0])[(y-4)*dst_stride]); QP_ptr = &(QP_store[((y-4)>>4)*QP_stride]); deblock_vert( puc_flt, horizontal_size, 4, dst_stride, QP_ptr, QP_stride, 1); } } if (mode & PP_DERING_C) { if ( (y%8) && (y-4)>5 ) { puc_flt = &((dst[i])[y*dst_stride]); QP_ptr = &(QP_store[(y>>4)*QP_stride]); dering( puc_flt, horizontal_size, 4, dst_stride, QP_ptr, QP_stride, 1); } // dering( puc_flt, horizontal_size, 4, dst_stride, QP_ptr, QP_stride, 1); } } /* stripe loop */ } /* U,V loop */ }