winamp/Src/h264dec/ldecod/src/intra16x16_pred.c

429 lines
13 KiB
C

/*!
*************************************************************************************
* \file intra16x16_pred.c
*
* \brief
* Functions for intra 8x8 prediction
*
* \author
* Main contributors (see contributors.h for copyright,
* address and affiliation details)
* - Yuri Vatis
* - Jan Muenster
* - Alexis Michael Tourapis <alexismt@ieee.org>
*
*************************************************************************************
*/
#include "global.h"
#include "intra16x16_pred.h"
#include "mb_access.h"
#include "image.h"
#include <emmintrin.h>
static void memset_16x16(h264_imgpel_macroblock_row_t *mb_pred, int pred)
{
if (sse2_flag)
{
__m128i xmm_pred = _mm_set1_epi8(pred);
int i;
__m128i *xmm_macroblock = (__m128i *)mb_pred;
for (i=0;i<16;i++)
{
_mm_store_si128(xmm_macroblock++, xmm_pred);
}
}
#ifdef _M_IX86
else
{
__m64 mmx_pred = _mm_set1_pi8(pred);
int i;
__m64 *mmx_macroblock = (__m64 *)mb_pred;
for (i=0;i<16;i++)
{
*mmx_macroblock++ = mmx_pred;
*mmx_macroblock++ = mmx_pred;
}
}
#else
else
{
int ii, jj;
for (jj = 0; jj < MB_BLOCK_SIZE; jj++)
{
for (ii = 0; ii < MB_BLOCK_SIZE; ii++)
{
mb_pred[jj][ii]=(imgpel) pred;
}
}
}
#endif
}
/*!
***********************************************************************
* \brief
* makes and returns 16x16 DC prediction mode
*
* \return
* DECODING_OK decoding of intraprediction mode was sucessfull \n
*
***********************************************************************
*/
static inline int intra16x16_dc_pred(Macroblock *currMB,
ColorPlane pl)
{
Slice *currSlice = currMB->p_Slice;
VideoParameters *p_Vid = currMB->p_Vid;
int s0 = 0, s1 = 0, s2 = 0;
int i;
imgpel **imgY = (pl) ? p_Vid->dec_picture->imgUV[pl - 1]->img : p_Vid->dec_picture->imgY->img;
PixelPos up; //!< pixel position p(0,-1)
PixelPos left[17]; //!< pixel positions p(-1, -1..15)
int up_avail, left_avail, left_up_avail;
s1=s2=0;
p_Vid->getNeighbourNXLuma(currMB, -1, &left[0]);
p_Vid->getNeighbourLeftLuma(currMB, &left[1]);
p_Vid->getNeighbourNPLumaNB(currMB, 2-1, &left[2]);
p_Vid->getNeighbourNPLumaNB(currMB, 3-1, &left[3]);
p_Vid->getNeighbourNPLumaNB(currMB, 4-1, &left[4]);
p_Vid->getNeighbourNPLumaNB(currMB, 5-1, &left[5]);
p_Vid->getNeighbourNPLumaNB(currMB, 6-1, &left[6]);
p_Vid->getNeighbourNPLumaNB(currMB, 7-1, &left[7]);
p_Vid->getNeighbourNPLumaNB(currMB, 8-1, &left[8]);
p_Vid->getNeighbourNPLumaNB(currMB, 9-1, &left[9]);
p_Vid->getNeighbourNPLumaNB(currMB, 10-1, &left[10]);
p_Vid->getNeighbourNPLumaNB(currMB, 11-1, &left[11]);
p_Vid->getNeighbourNPLumaNB(currMB, 12-1, &left[12]);
p_Vid->getNeighbourNPLumaNB(currMB, 13-1, &left[13]);
p_Vid->getNeighbourNPLumaNB(currMB, 14-1, &left[14]);
p_Vid->getNeighbourNPLumaNB(currMB, 15-1, &left[15]);
p_Vid->getNeighbourNPLumaNB(currMB, 16-1, &left[16]);
p_Vid->getNeighbourUpLuma(currMB, &up);
if (!p_Vid->active_pps->constrained_intra_pred_flag)
{
up_avail = up.available;
left_avail = left[1].available;
left_up_avail = left[0].available;
}
else
{
up_avail = up.available ? p_Vid->intra_block[up.mb_addr] : 0;
for (i = 1, left_avail = 1; i < 17; ++i)
left_avail &= left[i].available ? p_Vid->intra_block[left[i].mb_addr]: 0;
left_up_avail = left[0].available ? p_Vid->intra_block[left[0].mb_addr]: 0;
}
if (up_avail)
{
s1 += imgY[up.pos_y][up.pos_x+0]; // sum hor pix
s1 += imgY[up.pos_y][up.pos_x+1]; // sum hor pix
s1 += imgY[up.pos_y][up.pos_x+2]; // sum hor pix
s1 += imgY[up.pos_y][up.pos_x+3]; // sum hor pix
s1 += imgY[up.pos_y][up.pos_x+4]; // sum hor pix
s1 += imgY[up.pos_y][up.pos_x+5]; // sum hor pix
s1 += imgY[up.pos_y][up.pos_x+6]; // sum hor pix
s1 += imgY[up.pos_y][up.pos_x+7]; // sum hor pix
s1 += imgY[up.pos_y][up.pos_x+8]; // sum hor pix
s1 += imgY[up.pos_y][up.pos_x+9]; // sum hor pix
s1 += imgY[up.pos_y][up.pos_x+10]; // sum hor pix
s1 += imgY[up.pos_y][up.pos_x+11]; // sum hor pix
s1 += imgY[up.pos_y][up.pos_x+12]; // sum hor pix
s1 += imgY[up.pos_y][up.pos_x+13]; // sum hor pix
s1 += imgY[up.pos_y][up.pos_x+14]; // sum hor pix
s1 += imgY[up.pos_y][up.pos_x+15]; // sum hor pix
}
if (left_avail)
{
s2 += imgY[left[0 + 1].pos_y][left[0 + 1].pos_x]; // sum vert pix
s2 += imgY[left[1 + 1].pos_y][left[1 + 1].pos_x]; // sum vert pix
s2 += imgY[left[2 + 1].pos_y][left[2 + 1].pos_x]; // sum vert pix
s2 += imgY[left[3 + 1].pos_y][left[3 + 1].pos_x]; // sum vert pix
s2 += imgY[left[4 + 1].pos_y][left[4 + 1].pos_x]; // sum vert pix
s2 += imgY[left[5 + 1].pos_y][left[5 + 1].pos_x]; // sum vert pix
s2 += imgY[left[6 + 1].pos_y][left[6 + 1].pos_x]; // sum vert pix
s2 += imgY[left[7 + 1].pos_y][left[7 + 1].pos_x]; // sum vert pix
s2 += imgY[left[8 + 1].pos_y][left[8 + 1].pos_x]; // sum vert pix
s2 += imgY[left[9 + 1].pos_y][left[9 + 1].pos_x]; // sum vert pix
s2 += imgY[left[10 + 1].pos_y][left[10 + 1].pos_x]; // sum vert pix
s2 += imgY[left[11 + 1].pos_y][left[11 + 1].pos_x]; // sum vert pix
s2 += imgY[left[12 + 1].pos_y][left[12 + 1].pos_x]; // sum vert pix
s2 += imgY[left[13 + 1].pos_y][left[13 + 1].pos_x]; // sum vert pix
s2 += imgY[left[14 + 1].pos_y][left[14 + 1].pos_x]; // sum vert pix
s2 += imgY[left[15 + 1].pos_y][left[15 + 1].pos_x]; // sum vert pix
}
if (up_avail && left_avail)
s0 = (s1 + s2 + 16)>>5; // no edge
else if (!up_avail && left_avail)
s0 = (s2 + 8)>>4; // upper edge
else if (up_avail && !left_avail)
s0 = (s1 + 8)>>4; // left edge
else
s0 = p_Vid->dc_pred_value_comp[pl]; // top left corner, nothing to predict from
memset_16x16(currSlice->mb_pred[pl], s0);
return DECODING_OK;
}
/*!
***********************************************************************
* \brief
* makes and returns 16x16 vertical prediction mode
*
* \return
* DECODING_OK decoding of intraprediction mode was sucessfull \n
*
***********************************************************************
*/
static inline int intra16x16_vert_pred(Macroblock *currMB,
ColorPlane pl)
{
Slice *currSlice = currMB->p_Slice;
VideoParameters *p_Vid = currMB->p_Vid;
int j;
imgpel **imgY = (pl) ? p_Vid->dec_picture->imgUV[pl - 1]->img : p_Vid->dec_picture->imgY->img;
PixelPos up; //!< pixel position p(0,-1)
int up_avail;
p_Vid->getNeighbourUpLuma(currMB, &up);
if (!p_Vid->active_pps->constrained_intra_pred_flag)
{
up_avail = up.available;
}
else
{
up_avail = up.available ? p_Vid->intra_block[up.mb_addr] : 0;
}
if (!up_avail)
error ("invalid 16x16 intra pred Mode VERT_PRED_16",500);
for(j=0;j<MB_BLOCK_SIZE;++j)
{
// TODO; take advantage of imgY's stride
memcpy(&currSlice->mb_pred[pl][j][0], &(imgY[up.pos_y][up.pos_x]), MB_BLOCK_SIZE * sizeof(imgpel));
}
return DECODING_OK;
}
/*!
***********************************************************************
* \brief
* makes and returns 16x16 horizontal prediction mode
*
* \return
* DECODING_OK decoding of intraprediction mode was sucessfull \n
*
***********************************************************************
*/
static inline int intra16x16_hor_pred(Macroblock *currMB,
ColorPlane pl)
{
Slice *currSlice = currMB->p_Slice;
VideoParameters *p_Vid = currMB->p_Vid;
int i,j;
imgpel **imgY = (pl) ? p_Vid->dec_picture->imgUV[pl - 1]->img : p_Vid->dec_picture->imgY->img;
imgpel prediction;
PixelPos left[17]; //!< pixel positions p(-1, -1..15)
int left_avail, left_up_avail;
for (i=0;i<17;++i)
{
p_Vid->getNeighbourNXLuma(currMB, i-1, &left[i]);
}
if (!p_Vid->active_pps->constrained_intra_pred_flag)
{
left_avail = left[1].available;
left_up_avail = left[0].available;
}
else
{
for (i = 1, left_avail = 1; i < 17; ++i)
left_avail &= left[i].available ? p_Vid->intra_block[left[i].mb_addr]: 0;
left_up_avail = left[0].available ? p_Vid->intra_block[left[0].mb_addr]: 0;
}
if (!left_avail)
error ("invalid 16x16 intra pred Mode HOR_PRED_16",500);
for(j = 0; j < MB_BLOCK_SIZE; ++j)
{
prediction = imgY[left[j+1].pos_y][left[j+1].pos_x];
for(i = 0; i < MB_BLOCK_SIZE; ++i)
currSlice->mb_pred[pl][j][i]= prediction; // store predicted 16x16 block
}
return DECODING_OK;
}
/*!
***********************************************************************
* \brief
* makes and returns 16x16 horizontal prediction mode
*
* \return
* DECODING_OK decoding of intraprediction mode was sucessfull \n
*
***********************************************************************
*/
static void planeset(h264_imgpel_macroblock_row_t *dest, int iaa, int ib, int ic)
{
int j;
__m128i i0_7 = _mm_setr_epi16(-7,-6,-5,-4,-3,-2,-1, 0);
__m128i i8_15 = _mm_setr_epi16(1,2,3,4,5,6,7,8);
__m128i xmm_ib = _mm_set1_epi16(ib);
int j7ic = iaa + -7 * ic + 16;
i0_7 = _mm_mullo_epi16(i0_7, xmm_ib);
i8_15 = _mm_mullo_epi16(i8_15, xmm_ib);
for (j = 0;j < MB_BLOCK_SIZE; ++j)
{
__m128i xmm_j7ic = _mm_set1_epi16(j7ic);
__m128i xmm_lo = _mm_add_epi16(i0_7, xmm_j7ic);
__m128i xmm_hi = _mm_add_epi16(i8_15, xmm_j7ic);
__m128i xmm_store;
xmm_lo = _mm_srai_epi16(xmm_lo, 5);
xmm_hi = _mm_srai_epi16(xmm_hi, 5);
xmm_store = _mm_packus_epi16(xmm_lo, xmm_hi);
_mm_store_si128((__m128i *)dest[j], xmm_store);
j7ic += ic;
}// store plane prediction
}
static inline int intra16x16_plane_pred(Macroblock *currMB,
ColorPlane pl)
{
Slice *currSlice = currMB->p_Slice;
VideoParameters *p_Vid = currMB->p_Vid;
int i,j;
int ih = 0, iv = 0;
int ib,ic,iaa;
imgpel **imgY = (pl) ? p_Vid->dec_picture->imgUV[pl - 1]->img : p_Vid->dec_picture->imgY->img;
imgpel *mpr_line;
int max_imgpel_value = p_Vid->max_pel_value_comp[pl];
PixelPos up; //!< pixel position p(0,-1)
PixelPos left[17]; //!< pixel positions p(-1, -1..15)
int up_avail, left_avail, left_up_avail;
p_Vid->getNeighbourNXLuma(currMB, -1, &left[0]);
p_Vid->getNeighbourLeftLuma(currMB, &left[1]);
for (i=2;i<17; ++i)
{
p_Vid->getNeighbourNPLumaNB(currMB, i-1, &left[i]);
}
p_Vid->getNeighbourUpLuma(currMB, &up);
if (!p_Vid->active_pps->constrained_intra_pred_flag)
{
up_avail = up.available;
left_avail = left[1].available;
left_up_avail = left[0].available;
}
else
{
up_avail = up.available ? p_Vid->intra_block[up.mb_addr] : 0;
for (i = 1, left_avail = 1; i < 17; ++i)
left_avail &= left[i].available ? p_Vid->intra_block[left[i].mb_addr]: 0;
left_up_avail = left[0].available ? p_Vid->intra_block[left[0].mb_addr]: 0;
}
if (!up_avail || !left_up_avail || !left_avail)
error ("invalid 16x16 intra pred Mode PLANE_16",500);
mpr_line = &imgY[up.pos_y][up.pos_x+7];
for (i = 1; i < 8; ++i)
{
ih += i*(mpr_line[i] - mpr_line[-i]);
iv += i*(imgY[left[8+i].pos_y][left[8+i].pos_x] - imgY[left[8-i].pos_y][left[8-i].pos_x]);
}
ih += 8*(mpr_line[8] - imgY[left[0].pos_y][left[0].pos_x]);
iv += 8*(imgY[left[16].pos_y][left[16].pos_x] - imgY[left[0].pos_y][left[0].pos_x]);
ib=(5 * ih + 32)>>6;
ic=(5 * iv + 32)>>6;
iaa=16 * (mpr_line[8] + imgY[left[16].pos_y][left[16].pos_x]);
if (sse2_flag)
{
planeset(currSlice->mb_pred[pl], iaa, ib, ic);
}
else
{
// TODO: MMX
for (j = 0;j < MB_BLOCK_SIZE; ++j)
{
int j7ic = iaa + (j - 7) * ic + 16;
for (i = 0;i < MB_BLOCK_SIZE; ++i)
{
currSlice->mb_pred[pl][j][i] = (imgpel) iClip1(max_imgpel_value, (((i - 7) * ib + j7ic) >> 5));
}
}// store plane prediction
}
return DECODING_OK;
}
/*!
***********************************************************************
* \brief
* makes and returns 16x16 intra prediction blocks
*
* \return
* DECODING_OK decoding of intraprediction mode was sucessfull \n
* SEARCH_SYNC search next sync element as errors while decoding occured
***********************************************************************
*/
// TODO: replace with ippiPredictIntra_16x16_H264_8u_C1IR ?
int intrapred16x16(Macroblock *currMB, //!< Current Macroblock
ColorPlane pl, //!< Current colorplane (for 4:4:4)
int predmode) //!< prediction mode
{
switch (predmode)
{
case VERT_PRED_16: // vertical prediction from block above
return (intra16x16_vert_pred(currMB, pl));
break;
case HOR_PRED_16: // horizontal prediction from left block
return (intra16x16_hor_pred(currMB, pl));
break;
case DC_PRED_16: // DC prediction
return (intra16x16_dc_pred(currMB, pl));
break;
case PLANE_16:// 16 bit integer plan pred
return (intra16x16_plane_pred(currMB, pl));
break;
default:
{ // indication of fault in bitstream,exit
printf("illegal 16x16 intra prediction mode input: %d\n",predmode);
return SEARCH_SYNC;
}
}
}