694 lines
17 KiB
C
694 lines
17 KiB
C
/****************************************************************************
|
|
*
|
|
* Module Title : preproc.c
|
|
*
|
|
* Description : Simple pre-processor.
|
|
*
|
|
****************************************************************************/
|
|
|
|
/****************************************************************************
|
|
* Header Files
|
|
****************************************************************************/
|
|
|
|
#include "memory.h"
|
|
#include "preproc.h"
|
|
|
|
/****************************************************************************
|
|
* Macros
|
|
****************************************************************************/
|
|
#define FRAMECOUNT 7
|
|
#define ROUNDUP32(X) ( ( ( (unsigned long) X ) + 31 )&( 0xFFFFFFE0 ) )
|
|
|
|
/****************************************************************************
|
|
* Imports
|
|
****************************************************************************/
|
|
extern void GetProcessorFlags (int *MmxEnabled, int *XmmEnabled, int *WmtEnabled );
|
|
|
|
/****************************************************************************
|
|
* Exported Global Variables
|
|
****************************************************************************/
|
|
void (*tempFilter)( PreProcInstance *ppi, unsigned char *s, unsigned char *d, int bytes, int strength );
|
|
|
|
#ifndef MAPCA
|
|
/****************************************************************************
|
|
*
|
|
* ROUTINE : spatialFilter_wmt
|
|
*
|
|
* INPUTS : PreProcInstance *ppi : Pointer to pre-processor instance.
|
|
* unsigned char *s : Pointer to source frame.
|
|
* unsigned char *d : Pointer to destination frame.
|
|
* int width : WIdth of images.
|
|
* int height : Height of images.
|
|
* int pitch : Stride of images.
|
|
* int strength : Strength of filter to apply.
|
|
*
|
|
* OUTPUTS : None.
|
|
*
|
|
* RETURNS : void
|
|
*
|
|
* FUNCTION : Performs a closesness adjusted temporarl blur
|
|
*
|
|
* SPECIAL NOTES : Destination frame can be same as source frame.
|
|
*
|
|
****************************************************************************/
|
|
void spatialFilter_wmt
|
|
(
|
|
PreProcInstance *ppi,
|
|
unsigned char *s,
|
|
unsigned char *d,
|
|
int width,
|
|
int height,
|
|
int pitch,
|
|
int strength
|
|
)
|
|
{
|
|
int i;
|
|
int row = 1;
|
|
int PixelOffsets[] =
|
|
{
|
|
-pitch-1, -pitch, -pitch+1,
|
|
-1, 0, +1,
|
|
pitch-1, pitch, pitch+1
|
|
};
|
|
unsigned char *frameptr = ppi->frameBuffer;
|
|
|
|
__declspec(align(16)) unsigned short threes[] = { 3, 3, 3, 3, 3, 3, 3, 3};
|
|
__declspec(align(16)) unsigned short sixteens[]= {16,16,16,16,16,16,16,16};
|
|
|
|
memcpy ( d, s, width );
|
|
|
|
d += pitch;
|
|
s += pitch;
|
|
|
|
do
|
|
{
|
|
// NOTE: By doing it this way I am ensuring that pixels will always be unaligned!!!
|
|
int col = 1;
|
|
d[0] = s[0];
|
|
d[width - 1] = s[width - 1];
|
|
do
|
|
{
|
|
__declspec(align(16)) unsigned short counts[8];
|
|
__declspec(align(16)) unsigned short sums[8];
|
|
_asm
|
|
{
|
|
mov esi, s // get the source line
|
|
add esi, col // add the column offset
|
|
pxor xmm1,xmm1 // accumulator
|
|
pxor xmm2,xmm2 // count
|
|
pxor xmm7,xmm7 // 0s for use with unpack
|
|
|
|
movq xmm3, QWORD PTR [esi] // get 8 pixels
|
|
punpcklbw xmm3, xmm7 // unpack to shorts
|
|
xor eax, eax // neighbor iterator
|
|
|
|
NextNeighbor:
|
|
mov ecx, [PixelOffsets+eax*4] // get eax index pixel neighbor offset
|
|
movq xmm4, QWORD PTR [esi + ecx] // get ecx index neighbor values
|
|
punpcklbw xmm4, xmm7 // xmm4 unpacked neighbor values
|
|
movdqa xmm6, xmm4 // save the pixel values
|
|
psubsw xmm4, xmm3 // subtracted pixel values
|
|
pmullw xmm4, xmm4 // square xmm4
|
|
movd xmm5, strength
|
|
psrlw xmm4, xmm5 // should be strength
|
|
pmullw xmm4, threes // 3 * modifier
|
|
movdqa xmm5, sixteens // 16s
|
|
psubusw xmm5, xmm4 // 16 - modifiers
|
|
movdqa xmm4, xmm5 // save the modifiers
|
|
pmullw xmm4, xmm6 // multiplier values
|
|
paddusw xmm1, xmm4 // accumulator
|
|
paddusw xmm2, xmm5 // count
|
|
inc eax // next neighbor
|
|
cmp eax,9 // there are nine neigbors
|
|
jne NextNeighbor
|
|
|
|
movdqa counts, xmm2
|
|
psrlw xmm2,1 // divide count by 2 for rounding
|
|
paddusw xmm1,xmm2 // rounding added in
|
|
|
|
mov frameptr,esi
|
|
|
|
movdqa sums, xmm1
|
|
}
|
|
|
|
for ( i=0; i<8; i++ )
|
|
{
|
|
int blurvalue = sums[i] * ppi->fixedDivide[counts[i]];
|
|
blurvalue >>= 16;
|
|
d[col+i] = blurvalue;
|
|
}
|
|
col += 8;
|
|
|
|
} while ( col<width-1 );
|
|
|
|
d += pitch;
|
|
s += pitch;
|
|
++row;
|
|
} while ( row<height-1 );
|
|
|
|
memcpy ( d, s, width );
|
|
__asm emms
|
|
}
|
|
#endif
|
|
/****************************************************************************
|
|
*
|
|
* ROUTINE : tempFilter_c
|
|
*
|
|
* INPUTS : PreProcInstance *ppi : Pointer to pre-processor instance.
|
|
* unsigned char *s : Pointer to source frame.
|
|
* unsigned char *d : Pointer to destination frame.
|
|
* int bytes : Number of bytes to filter.
|
|
* int strength : Strength of filter to apply.
|
|
*
|
|
* OUTPUTS : None.
|
|
*
|
|
* RETURNS : void
|
|
*
|
|
* FUNCTION : Performs a closesness adjusted temporarl blur
|
|
*
|
|
* SPECIAL NOTES : Destination frame can be same as source frame.
|
|
*
|
|
****************************************************************************/
|
|
void tempFilter_c
|
|
(
|
|
PreProcInstance *ppi,
|
|
unsigned char *s,
|
|
unsigned char *d,
|
|
int bytes,
|
|
int strength
|
|
)
|
|
{
|
|
int byte = 0;
|
|
unsigned char *frameptr = ppi->frameBuffer;
|
|
|
|
if ( ppi->frame == 0 )
|
|
{
|
|
do
|
|
{
|
|
int frame = 0;
|
|
do
|
|
{
|
|
*frameptr = s[byte];
|
|
++frameptr;
|
|
++frame;
|
|
} while ( frame < FRAMECOUNT );
|
|
|
|
d[byte] = s[byte];
|
|
|
|
++byte;
|
|
} while ( byte < bytes );
|
|
}
|
|
else
|
|
{
|
|
int modifier;
|
|
int offset = (ppi->frame % FRAMECOUNT);
|
|
|
|
do
|
|
{
|
|
int accumulator = 0;
|
|
int count = 0;
|
|
int frame = 0;
|
|
|
|
frameptr[offset] = s[byte];
|
|
|
|
do
|
|
{
|
|
int pixelValue = *frameptr;
|
|
|
|
modifier = s[byte];
|
|
modifier -= pixelValue;
|
|
modifier *= modifier;
|
|
modifier >>= strength;
|
|
modifier *= 3;
|
|
|
|
if(modifier > 16)
|
|
modifier = 16;
|
|
|
|
modifier = 16 - modifier;
|
|
|
|
accumulator += modifier * pixelValue;
|
|
|
|
count += modifier;
|
|
|
|
frameptr++;
|
|
|
|
++frame;
|
|
} while ( frame < FRAMECOUNT );
|
|
|
|
accumulator += (count >> 1);
|
|
accumulator *= ppi->fixedDivide[count];
|
|
accumulator >>= 16;
|
|
|
|
d[byte] = accumulator;
|
|
|
|
++byte;
|
|
} while ( byte < bytes );
|
|
}
|
|
++ppi->frame;
|
|
}
|
|
#ifndef MAPCA
|
|
/****************************************************************************
|
|
*
|
|
* ROUTINE : tempFilter_wmt
|
|
*
|
|
* INPUTS : PreProcInstance *ppi : Pointer to pre-processor instance.
|
|
* unsigned char *s : Pointer to source frame.
|
|
* unsigned char *d : Pointer to destination frame.
|
|
* int bytes : Number of bytes to filter.
|
|
* int strength : Strength of filter to apply.
|
|
*
|
|
* OUTPUTS : None.
|
|
*
|
|
* RETURNS : void
|
|
*
|
|
* FUNCTION : Performs a closesness adjusted temporarl blur
|
|
*
|
|
* SPECIAL NOTES : Destination frame can be same as source frame.
|
|
*
|
|
****************************************************************************/
|
|
void tempFilter_wmt
|
|
(
|
|
PreProcInstance *ppi,
|
|
unsigned char *s,
|
|
unsigned char *d,
|
|
int bytes,
|
|
int strength
|
|
)
|
|
{
|
|
int byte = 0;
|
|
unsigned char * frameptr = ppi->frameBuffer;
|
|
|
|
__declspec(align(16)) unsigned short threes[] ={ 3, 3, 3, 3, 3, 3, 3, 3};
|
|
__declspec(align(16)) unsigned short sixteens[]={16,16,16,16,16,16,16,16};
|
|
|
|
if ( ppi->frame == 0 )
|
|
{
|
|
do
|
|
{
|
|
int i;
|
|
int frame = 0;
|
|
|
|
do
|
|
{
|
|
for ( i=0; i<8; i++ )
|
|
{
|
|
*frameptr = s[byte+i];
|
|
++frameptr;
|
|
}
|
|
++frame;
|
|
} while ( frame < FRAMECOUNT );
|
|
|
|
for ( i=0; i<8; i++ )
|
|
d[byte+i] = s[byte+i];
|
|
|
|
byte += 8;
|
|
|
|
} while ( byte < bytes );
|
|
}
|
|
else
|
|
{
|
|
int i;
|
|
int offset2 = (ppi->frame % FRAMECOUNT);
|
|
|
|
do
|
|
{
|
|
__declspec(align(16)) unsigned short counts[8];
|
|
__declspec(align(16)) unsigned short sums[8];
|
|
int accumulator = 0;
|
|
int count = 0;
|
|
int frame = 0;
|
|
_asm
|
|
{
|
|
mov eax,offset2
|
|
mov edi,s // source pixels
|
|
pxor xmm1,xmm1 // accumulator
|
|
|
|
pxor xmm7,xmm7
|
|
|
|
mov esi,frameptr // accumulator
|
|
pxor xmm2,xmm2 // count
|
|
|
|
movq xmm3, QWORD PTR [edi]
|
|
|
|
movq QWORD PTR [esi+8*eax],xmm3
|
|
|
|
punpcklbw xmm3, xmm2 // xmm3 source pixels
|
|
mov ecx, FRAMECOUNT
|
|
|
|
NextFrame:
|
|
movq xmm4, QWORD PTR [esi] // get frame buffer values
|
|
punpcklbw xmm4, xmm7 // xmm4 frame buffer pixels
|
|
movdqa xmm6, xmm4 // save the pixel values
|
|
psubsw xmm4, xmm3 // subtracted pixel values
|
|
pmullw xmm4, xmm4 // square xmm4
|
|
movd xmm5, strength
|
|
psrlw xmm4, xmm5 // should be strength
|
|
pmullw xmm4, threes // 3 * modifier
|
|
movdqa xmm5, sixteens // 16s
|
|
psubusw xmm5, xmm4 // 16 - modifiers
|
|
movdqa xmm4, xmm5 // save the modifiers
|
|
pmullw xmm4, xmm6 // multiplier values
|
|
paddusw xmm1, xmm4 // accumulator
|
|
paddusw xmm2, xmm5 // count
|
|
add esi, 8 // next frame
|
|
dec ecx // next set of eight pixels
|
|
jnz NextFrame
|
|
|
|
movdqa counts, xmm2
|
|
psrlw xmm2,1 // divide count by 2 for rounding
|
|
paddusw xmm1,xmm2 // rounding added in
|
|
|
|
mov frameptr,esi
|
|
|
|
movdqa sums, xmm1
|
|
}
|
|
|
|
for ( i=0; i<8; i++ )
|
|
{
|
|
int blurvalue = sums[i] * ppi->fixedDivide[counts[i]];
|
|
blurvalue >>= 16;
|
|
d[i] = blurvalue;
|
|
}
|
|
s += 8;
|
|
d += 8;
|
|
byte += 8;
|
|
} while ( byte < bytes );
|
|
}
|
|
++ppi->frame;
|
|
__asm emms
|
|
}
|
|
|
|
/****************************************************************************
|
|
*
|
|
* ROUTINE : tempFilter_mmx
|
|
*
|
|
* INPUTS : PreProcInstance *ppi : Pointer to pre-processor instance.
|
|
* unsigned char *s : Pointer to source frame.
|
|
* unsigned char *d : Pointer to destination frame.
|
|
* int bytes : Number of bytes to filter.
|
|
* int strength : Strength of filter to apply.
|
|
*
|
|
* OUTPUTS : None.
|
|
*
|
|
* RETURNS : void
|
|
*
|
|
* FUNCTION : Performs a closesness adjusted temporarl blur
|
|
*
|
|
* SPECIAL NOTES : Destination frame can be same as source frame.
|
|
*
|
|
****************************************************************************/
|
|
void tempFilter_mmx
|
|
(
|
|
PreProcInstance *ppi,
|
|
unsigned char *s,
|
|
unsigned char *d,
|
|
int bytes,
|
|
int strength
|
|
)
|
|
{
|
|
int byte = 0;
|
|
unsigned char *frameptr = ppi->frameBuffer;
|
|
|
|
__declspec(align(16)) unsigned short threes[] ={ 3, 3, 3, 3};
|
|
__declspec(align(16)) unsigned short sixteens[]={16,16,16,16};
|
|
|
|
if ( ppi->frame == 0 )
|
|
{
|
|
do
|
|
{
|
|
int i;
|
|
int frame = 0;
|
|
|
|
do
|
|
{
|
|
for ( i=0; i<4; i++ )
|
|
{
|
|
*frameptr = s[byte+i];
|
|
++frameptr;
|
|
}
|
|
++frame;
|
|
} while ( frame < FRAMECOUNT );
|
|
|
|
for ( i=0; i<4; i++ )
|
|
d[byte+i] = s[byte+i];
|
|
|
|
byte += 4;
|
|
|
|
} while ( byte < bytes );
|
|
}
|
|
else
|
|
{
|
|
int i;
|
|
int offset2 = (ppi->frame % FRAMECOUNT);
|
|
do
|
|
{
|
|
__declspec(align(16)) unsigned short counts[8];
|
|
__declspec(align(16)) unsigned short sums[8];
|
|
int accumulator = 0;
|
|
int count = 0;
|
|
int frame = 0;
|
|
_asm
|
|
{
|
|
|
|
mov eax,offset2
|
|
mov edi,s // source pixels
|
|
pxor mm1,mm1 // accumulator
|
|
pxor mm7,mm7
|
|
|
|
mov esi,frameptr // accumulator
|
|
pxor mm2,mm2 // count
|
|
|
|
movd mm3, DWORD PTR [edi]
|
|
movd DWORD PTR [esi+4*eax],mm3
|
|
|
|
punpcklbw mm3, mm2 // mm3 source pixels
|
|
mov ecx, FRAMECOUNT
|
|
|
|
NextFrame:
|
|
movd mm4, DWORD PTR [esi] // get frame buffer values
|
|
punpcklbw mm4, mm7 // mm4 frame buffer pixels
|
|
movq mm6, mm4 // save the pixel values
|
|
psubsw mm4, mm3 // subtracted pixel values
|
|
pmullw mm4, mm4 // square mm4
|
|
movd mm5, strength
|
|
psrlw mm4, mm5 // should be strength
|
|
pmullw mm4, threes // 3 * modifier
|
|
movq mm5, sixteens // 16s
|
|
psubusw mm5, mm4 // 16 - modifiers
|
|
movq mm4, mm5 // save the modifiers
|
|
pmullw mm4, mm6 // multiplier values
|
|
paddusw mm1, mm4 // accumulator
|
|
paddusw mm2, mm5 // count
|
|
add esi, 4 // next frame
|
|
dec ecx // next set of eight pixels
|
|
jnz NextFrame
|
|
|
|
movq counts, mm2
|
|
psrlw mm2,1 // divide count by 2 for rounding
|
|
paddusw mm1,mm2 // rounding added in
|
|
|
|
mov frameptr,esi
|
|
|
|
movq sums, mm1
|
|
|
|
}
|
|
|
|
for ( i=0; i<4; i++ )
|
|
{
|
|
int blurvalue = sums[i] * ppi->fixedDivide[counts[i]];
|
|
blurvalue >>= 16;
|
|
d[i] = blurvalue;
|
|
}
|
|
s += 4;
|
|
d += 4;
|
|
byte += 4;
|
|
} while ( byte < bytes );
|
|
}
|
|
++ppi->frame;
|
|
__asm emms
|
|
}
|
|
#endif
|
|
/****************************************************************************
|
|
*
|
|
* ROUTINE : DeletePreProc
|
|
*
|
|
* INPUTS : PreProcInstance *ppi : Pointer to pre-processor instance.
|
|
*
|
|
* OUTPUTS : None.
|
|
*
|
|
* RETURNS : void
|
|
*
|
|
* FUNCTION : Deletes a pre-processing instance.
|
|
*
|
|
* SPECIAL NOTES : None.
|
|
*
|
|
****************************************************************************/
|
|
void DeletePreProc ( PreProcInstance *ppi )
|
|
{
|
|
if ( ppi->frameBufferAlloc )
|
|
duck_free ( ppi->frameBufferAlloc );
|
|
ppi->frameBufferAlloc = 0;
|
|
ppi->frameBuffer = 0;
|
|
|
|
if( ppi->fixedDivideAlloc )
|
|
duck_free ( ppi->fixedDivideAlloc );
|
|
ppi->fixedDivideAlloc = 0;
|
|
ppi->fixedDivide = 0;
|
|
}
|
|
|
|
/****************************************************************************
|
|
*
|
|
* ROUTINE : InitPreProc
|
|
*
|
|
* INPUTS : PreProcInstance *ppi : Pointer to pre-processor instance.
|
|
* int FrameSize : Number of bytes in one frame.
|
|
*
|
|
* OUTPUTS : None.
|
|
*
|
|
* RETURNS : int: 1 if successful, 0 if failed.
|
|
*
|
|
* FUNCTION : Initializes prepprocessor instance.
|
|
*
|
|
* SPECIAL NOTES : None.
|
|
*
|
|
****************************************************************************/
|
|
int InitPreProc ( PreProcInstance *ppi, int FrameSize )
|
|
{
|
|
int i;
|
|
int MmxEnabled;
|
|
int XmmEnabled;
|
|
int WmtEnabled;
|
|
#ifndef MAPCA
|
|
GetProcessorFlags ( &MmxEnabled, &XmmEnabled, &WmtEnabled );
|
|
|
|
if ( WmtEnabled )
|
|
tempFilter = tempFilter_wmt;
|
|
else if ( MmxEnabled )
|
|
tempFilter = tempFilter_mmx;
|
|
else
|
|
#endif
|
|
tempFilter = tempFilter_c;
|
|
|
|
DeletePreProc ( ppi );
|
|
|
|
ppi->frameBufferAlloc = duck_malloc ( 32+FrameSize*7*sizeof(unsigned char), DMEM_GENERAL );
|
|
if ( !ppi->frameBufferAlloc ) { DeletePreProc( ppi ); return 0; }
|
|
ppi->frameBuffer = (unsigned char *) ROUNDUP32( ppi->frameBufferAlloc );
|
|
|
|
ppi->fixedDivideAlloc = duck_malloc ( 32+255*sizeof(unsigned int), DMEM_GENERAL );
|
|
if ( !ppi->fixedDivideAlloc ) { DeletePreProc( ppi ); return 0; }
|
|
ppi->fixedDivide = (unsigned int *) ROUNDUP32( ppi->fixedDivideAlloc );
|
|
|
|
for ( i=1; i<255; i++ )
|
|
ppi->fixedDivide[i] = 0x10000 / i;
|
|
return 1;
|
|
}
|
|
|
|
/****************************************************************************
|
|
*
|
|
* ROUTINE : spatialFilter_c
|
|
*
|
|
* INPUTS : PreProcInstance *ppi : Pointer to pre-processor instance.
|
|
* unsigned char *s : Pointer to source frame.
|
|
* unsigned char *d : Pointer to destination frame.
|
|
* int width : Width of images.
|
|
* int height : Height of images.
|
|
* int pitch : Stride of images.
|
|
* int strength : Strength of filter to apply.
|
|
*
|
|
* OUTPUTS : None.
|
|
*
|
|
* RETURNS : void
|
|
*
|
|
* FUNCTION : Performs a closesness adjusted temporal blur.
|
|
*
|
|
* SPECIAL NOTES : None.
|
|
*
|
|
****************************************************************************/
|
|
void spatialFilter_c
|
|
(
|
|
PreProcInstance *ppi,
|
|
unsigned char *s,
|
|
unsigned char *d,
|
|
int width,
|
|
int height,
|
|
int pitch,
|
|
int strength
|
|
)
|
|
{
|
|
int modifier;
|
|
int byte = 0;
|
|
int row = 1;
|
|
int PixelOffsets[9];
|
|
|
|
|
|
PixelOffsets[0] = -pitch - 1;
|
|
PixelOffsets[1] = -pitch;
|
|
PixelOffsets[2] = -pitch + 1;
|
|
PixelOffsets[3] = - 1;
|
|
PixelOffsets[4] = 0;
|
|
PixelOffsets[5] = + 1;
|
|
PixelOffsets[6] = pitch - 1;
|
|
PixelOffsets[7] = pitch ;
|
|
PixelOffsets[8] = pitch + 1;
|
|
|
|
memcpy ( d, s, width );
|
|
|
|
d += pitch;
|
|
s += pitch;
|
|
|
|
do
|
|
{
|
|
int col = 1;
|
|
|
|
d[0] = s[0];
|
|
d[width - 1] = s[width - 1];
|
|
|
|
do
|
|
{
|
|
int accumulator = 0;
|
|
int count = 0;
|
|
int neighbor = 0;
|
|
|
|
do
|
|
{
|
|
int pixelValue = s[ col + PixelOffsets[neighbor] ];
|
|
|
|
modifier = s[col];
|
|
modifier -= pixelValue;
|
|
modifier *= modifier;
|
|
modifier >>= strength;
|
|
modifier *= 3;
|
|
|
|
if(modifier > 16)
|
|
modifier = 16;
|
|
|
|
modifier = 16 - modifier;
|
|
|
|
accumulator += modifier * pixelValue;
|
|
|
|
count += modifier;
|
|
|
|
neighbor++;
|
|
} while ( neighbor < sizeof(PixelOffsets)/sizeof(int) );
|
|
|
|
accumulator += (count >> 1);
|
|
accumulator *= ppi->fixedDivide[count];
|
|
accumulator >>= 16;
|
|
|
|
d[col] = accumulator;
|
|
|
|
++col;
|
|
|
|
} while ( col < width-1 );
|
|
|
|
d += pitch;
|
|
s += pitch;
|
|
|
|
++row;
|
|
|
|
} while ( row < height-1 );
|
|
|
|
memcpy ( d, s, width );
|
|
}
|