winamp/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPFracInterpolator.h

1181 lines
29 KiB
C++

//$ nobt
//$ nocpp
/**
* @file CDSPFracInterpolator.h
*
* @brief Fractional delay interpolator and filter bank classes.
*
* This file includes fractional delay interpolator class.
*
* r8brain-free-src Copyright (c) 2013-2022 Aleksey Vaneev
* See the "LICENSE" file for license.
*/
#ifndef R8B_CDSPFRACINTERPOLATOR_INCLUDED
#define R8B_CDSPFRACINTERPOLATOR_INCLUDED
#include "CDSPSincFilterGen.h"
#include "CDSPProcessor.h"
namespace r8b {
#if R8B_FLTTEST
extern int InterpFilterFracs; ///< Force this number of fractional filter
///< positions. -1 - use default.
///<
#endif // R8B_FLTTEST
/**
* @brief Sinc function-based fractional delay filter bank class.
*
* Class implements storage and initialization of a bank of sinc-based
* fractional delay filters, expressed as 0th, 1st, 2nd or 3rd order
* polynomial interpolation coefficients. The filters are windowed by the
* "Kaiser" power-raised window function.
*/
class CDSPFracDelayFilterBank : public R8B_BASECLASS
{
R8BNOCTOR( CDSPFracDelayFilterBank );
friend class CDSPFracDelayFilterBankCache;
public:
/**
* Constructor.
*
* @param aFilterFracs The number of fractional delay positions to sample,
* -1 - use default.
* @param aElementSize The size of each filter's tap, in "double" values.
* This parameter corresponds to the complexity of interpolation. 4 should
* be set for 3rd order, 3 for 2nd order, 2 for linear interpolation, 1
* for whole-numbered stepping.
* @param aInterpPoints The number of points the interpolation is based
* on. This value should not be confused with the ElementSize. Set to 2
* for linear or no interpolation.
* @param aReqAtten Required filter attentuation.
* @param aIsThird "True" if one-third filter is required.
*/
CDSPFracDelayFilterBank( const int aFilterFracs, const int aElementSize,
const int aInterpPoints, const double aReqAtten, const bool aIsThird )
: InitFilterFracs( aFilterFracs )
, ElementSize( aElementSize )
, InterpPoints( aInterpPoints )
, ReqAtten( aReqAtten )
, IsThird( aIsThird )
, Next( NULL )
, RefCount( 1 )
{
R8BASSERT( ElementSize >= 1 && ElementSize <= 4 );
// Kaiser window function Params, for half and third-band.
const double* const Params = getWinParams( ReqAtten, IsThird,
FilterLen );
FilterSize = FilterLen * ElementSize;
if( InitFilterFracs == -1 )
{
FilterFracs = (int) ceil( pow( 6.4, ReqAtten / 50.0 ));
#if R8B_FLTTEST
if( InterpFilterFracs != -1 )
{
FilterFracs = InterpFilterFracs;
}
#endif // R8B_FLTTEST
}
else
{
FilterFracs = InitFilterFracs;
}
Table.alloc( FilterSize * ( FilterFracs + InterpPoints ));
CDSPSincFilterGen sinc;
sinc.Len2 = FilterLen / 2;
double* p = Table;
const int pc2 = InterpPoints / 2;
int i;
for( i = -pc2 + 1; i <= FilterFracs + pc2; i++ )
{
sinc.FracDelay = (double) ( FilterFracs - i ) / FilterFracs;
sinc.initFrac( CDSPSincFilterGen :: wftKaiser, Params, true );
sinc.generateFrac( p, &CDSPSincFilterGen :: calcWindowKaiser,
ElementSize );
normalizeFIRFilter( p, FilterLen, 1.0, ElementSize );
p += FilterSize;
}
const int TablePos2 = FilterSize;
const int TablePos3 = FilterSize * 2;
const int TablePos4 = FilterSize * 3;
const int TablePos5 = FilterSize * 4;
const int TablePos6 = FilterSize * 5;
const int TablePos7 = FilterSize * 6;
const int TablePos8 = FilterSize * 7;
double* const TableEnd = Table + ( FilterFracs + 1 ) * FilterSize;
p = Table;
if( InterpPoints == 8 )
{
if( ElementSize == 3 )
{
// Calculate 2nd order spline (polynomial) interpolation
// coefficients using 8 points.
while( p < TableEnd )
{
calcSpline2p8Coeffs( p, p[ 0 ], p[ TablePos2 ],
p[ TablePos3 ], p[ TablePos4 ], p[ TablePos5 ],
p[ TablePos6 ], p[ TablePos7 ], p[ TablePos8 ]);
p += ElementSize;
}
#if defined( R8B_SIMD_ISH )
shuffle2_3( Table, TableEnd );
#endif // SIMD
}
else
if( ElementSize == 4 )
{
// Calculate 3rd order spline (polynomial) interpolation
// coefficients using 8 points.
while( p < TableEnd )
{
calcSpline3p8Coeffs( p, p[ 0 ], p[ TablePos2 ],
p[ TablePos3 ], p[ TablePos4 ], p[ TablePos5 ],
p[ TablePos6 ], p[ TablePos7 ], p[ TablePos8 ]);
p += ElementSize;
}
#if defined( R8B_SIMD_ISH )
shuffle2_4( Table, TableEnd );
#endif // SIMD
}
}
else
{
if( ElementSize == 2 )
{
// Calculate linear interpolation coefficients.
while( p < TableEnd )
{
p[ 1 ] = p[ TablePos2 ] - p[ 0 ];
p += ElementSize;
}
#if defined( R8B_SIMD_ISH )
shuffle2_2( Table, TableEnd );
#endif // SIMD
}
}
R8BCONSOLE( "CDSPFracDelayFilterBank: fracs=%i order=%i taps=%i "
"att=%.1f third=%i\n", FilterFracs, ElementSize - 1, FilterLen,
ReqAtten, (int) IsThird );
}
~CDSPFracDelayFilterBank()
{
delete Next;
}
/**
* Function "rounds" the specified attenuation to the nearest effective
* value.
*
* @param[in,out] att Required filter attentuation. Will be rounded to the
* nearest value.
* @param aIsThird "True" if one-third filter is required.
*/
static void roundReqAtten( double& att, const bool aIsThird )
{
int tmp;
getWinParams( att, aIsThird, tmp );
}
/**
* Function returns the length of the filter. Always an even number, not
* less than 6.
*/
int getFilterLen() const
{
return( FilterLen );
}
/**
* Function returns the number of fractional positions sampled by the
* bank.
*/
int getFilterFracs() const
{
return( FilterFracs );
}
/**
* @param i Filter index, in the range 0 to FilterFracs, inclusive.
* @return Reference to the filter.
*/
const double& operator []( const int i ) const
{
R8BASSERT( i >= 0 && i <= FilterFracs );
return( Table[ i * FilterSize ]);
}
/**
* This function should be called when the filter bank obtained via the
* filter bank cache is no longer needed.
*/
void unref();
private:
int FilterLen; ///< Filter length. Always an even number, not less than 6.
///<
int FilterFracs; ///< Fractional position count.
///<
int InitFilterFracs; ///< Fractional position count as supplied to the
///< constructor, may equal -1.
///<
int ElementSize; ///< Filter element size.
///<
int InterpPoints; ///< Interpolation points to use.
///<
double ReqAtten; ///< Filter's attentuation.
///<
bool IsThird; ///< "True" if one-third filter is in use.
///<
int FilterSize; ///< This constant specifies the "size" of a single filter
///< in "double" elements.
///<
CFixedBuffer< double > Table; ///< The table of fractional delay filters
///< for all discrete fractional x = 0..1 sample positions, and
///< interpolation coefficients.
///<
CDSPFracDelayFilterBank* Next; ///< Next filter bank in cache's list.
///<
int RefCount; ///< The number of references made to *this filter bank.
///< Not considered for "static" filter bank objects.
///<
/**
* Function returns windowing function parameters for the specified
* attenuation and filter type.
*
* @param[in,out] att Required filter attentuation. Will be rounded to the
* nearest value.
* @param aIsThird "True" if one-third filter is required.
* @param[out] fltlen Resulting filter length.
*/
static const double* getWinParams( double& att, const bool aIsThird,
int& fltlen )
{
static const int Coeffs2Base = 8;
static const int Coeffs2Count = 12;
static const double Coeffs2[ Coeffs2Count ][ 3 ] = {
{ 4.1308468534586913, 1.1752580009977263, 55.5446 }, // 0.0256
{ 4.4241520324148826, 1.8004881791443044, 81.4191 }, // 0.0886
{ 5.2615232289173663, 1.8133318236025469, 96.3392 }, // 0.0481
{ 5.9433751227216174, 1.8730186391986436, 111.1315 }, // 0.0264
{ 6.8308658290513815, 1.8549555110340281, 125.4653 }, // 0.0146
{ 7.6648458290312904, 1.8565766090828464, 139.7379 }, // 0.0081
{ 8.2038728664307605, 1.9269521820570166, 154.0532 }, // 0.0045
{ 8.7865150946655142, 1.9775307667441668, 168.2101 }, // 0.0025
{ 9.5945017884101773, 1.9718456992078597, 182.1076 }, // 0.0014
{ 10.5163141145985240, 1.9504067820201083, 195.5668 }, // 0.0008
{ 10.2382465206362470, 2.1608923446870087, 209.0610 }, // 0.0004
{ 10.9976060250714000, 2.1536533525688935, 222.5010 }, // 0.0003
};
static const int Coeffs3Base = 6;
static const int Coeffs3Count = 10;
static const double Coeffs3[ Coeffs3Count ][ 3 ] = {
{ 3.9888564562781847, 1.5869927184268915, 66.5701 }, // 0.0467
{ 4.6986694038145007, 1.8086068597928262, 86.4715 }, // 0.0136
{ 5.5995071329337822, 1.8930163360942349, 106.1195 }, // 0.0040
{ 6.3627287800257228, 1.9945748322093975, 125.2307 }, // 0.0012
{ 7.4299550711428308, 1.9893400572347544, 144.3469 }, // 0.0004
{ 8.0667715944075642, 2.0928201458699909, 163.4099 }, // 0.0001
{ 8.7469970226288822, 2.1640279784268355, 181.0694 }, // 0.0000
{ 10.0823430069835230, 2.0896678025321922, 199.2880 }, // 0.0000
{ 10.9222206090489510, 2.1221681162186004, 216.6865 }, // 0.0000
{ 21.2017743894772010, 1.1856768080118900, 233.9188 }, // 0.0000
};
const double* Params;
int i = 0;
if( aIsThird )
{
while( i != Coeffs3Count - 1 && Coeffs3[ i ][ 2 ] < att )
{
i++;
}
Params = &Coeffs3[ i ][ 0 ];
att = Coeffs3[ i ][ 2 ];
fltlen = Coeffs3Base + i * 2;
}
else
{
while( i != Coeffs2Count - 1 && Coeffs2[ i ][ 2 ] < att )
{
i++;
}
Params = &Coeffs2[ i ][ 0 ];
att = Coeffs2[ i ][ 2 ];
fltlen = Coeffs2Base + i * 2;
}
return( Params );
}
/**
* Function shuffles 2 order-2 filter points for SIMD operation.
*
* @param p Filter table start pointer.
* @param pe Filter table end pointer.
*/
static void shuffle2_2( double* p, double* const pe )
{
while( p != pe )
{
const double t = p[ 2 ];
p[ 2 ] = p[ 1 ];
p[ 1 ] = t;
p += 4;
}
}
/**
* Function shuffles 2 order-3 filter points for SIMD operation.
*
* @param p Filter table start pointer.
* @param pe Filter table end pointer.
*/
static void shuffle2_3( double* p, double* const pe )
{
while( p != pe )
{
const double t1 = p[ 1 ];
const double t2 = p[ 2 ];
const double t3 = p[ 3 ];
const double t4 = p[ 4 ];
p[ 1 ] = t3;
p[ 2 ] = t1;
p[ 3 ] = t4;
p[ 4 ] = t2;
p += 6;
}
}
/**
* Function shuffles 2 order-4 filter points for SIMD operation.
*
* @param p Filter table start pointer.
* @param pe Filter table end pointer.
*/
static void shuffle2_4( double* p, double* const pe )
{
while( p != pe )
{
const double t1 = p[ 1 ];
const double t2 = p[ 2 ];
const double t3 = p[ 3 ];
const double t4 = p[ 4 ];
const double t5 = p[ 5 ];
const double t6 = p[ 6 ];
p[ 1 ] = t4;
p[ 2 ] = t1;
p[ 3 ] = t5;
p[ 4 ] = t2;
p[ 5 ] = t6;
p[ 6 ] = t3;
p += 8;
}
}
};
/**
* @brief Fractional delay filter cache class.
*
* Class implements cache storage of fractional delay filter banks.
*/
class CDSPFracDelayFilterBankCache : public R8B_BASECLASS
{
R8BNOCTOR( CDSPFracDelayFilterBankCache );
friend class CDSPFracDelayFilterBank;
public:
/**
* @return The number of filters present in the cache now. This value can
* be monitored for debugging "forgotten" filters.
*/
static int getObjCount()
{
R8BSYNC( StateSync );
return( ObjCount );
}
/**
* Function calculates or returns reference to a previously calculated
* (cached) fractional delay filter bank.
*
* @param aFilterFracs The number of fractional delay positions to sample,
* -1 - use default.
* @param aElementSize The size of each filter's tap, in "double" values.
* @param aInterpPoints The number of points the interpolation is based
* on.
* @param ReqAtten Required filter attentuation.
* @param IsThird "True" if one-third filter is required.
* @param IsStatic "True" if a permanent static filter should be returned
* that is never removed from the cache until application terminates.
*/
static CDSPFracDelayFilterBank& getFilterBank( const int aFilterFracs,
const int aElementSize, const int aInterpPoints,
double ReqAtten, const bool IsThird, const bool IsStatic )
{
CDSPFracDelayFilterBank :: roundReqAtten( ReqAtten, IsThird );
R8BSYNC( StateSync );
if( IsStatic )
{
CDSPFracDelayFilterBank* CurObj = StaticObjects;
while( CurObj != NULL )
{
if( CurObj -> InitFilterFracs == aFilterFracs &&
CurObj -> ElementSize == aElementSize &&
CurObj -> InterpPoints == aInterpPoints &&
CurObj -> ReqAtten == ReqAtten &&
CurObj -> IsThird == IsThird )
{
return( *CurObj );
}
CurObj = CurObj -> Next;
}
// Create a new filter bank and build it.
CurObj = new CDSPFracDelayFilterBank( aFilterFracs, aElementSize,
aInterpPoints, ReqAtten, IsThird );
// Insert the bank at the start of the list.
CurObj -> Next = StaticObjects.unkeep();
StaticObjects = CurObj;
return( *CurObj );
}
CDSPFracDelayFilterBank* PrevObj = NULL;
CDSPFracDelayFilterBank* CurObj = Objects;
while( CurObj != NULL )
{
if( CurObj -> InitFilterFracs == aFilterFracs &&
CurObj -> ElementSize == aElementSize &&
CurObj -> InterpPoints == aInterpPoints &&
CurObj -> ReqAtten == ReqAtten &&
CurObj -> IsThird == IsThird )
{
break;
}
if( CurObj -> Next == NULL && ObjCount >= R8B_FRACBANK_CACHE_MAX )
{
if( CurObj -> RefCount == 0 )
{
// Delete the last bank which is not used.
PrevObj -> Next = NULL;
delete CurObj;
ObjCount--;
}
else
{
// Move the last bank to the top of the list since it
// seems to be in use for a long time.
PrevObj -> Next = NULL;
CurObj -> Next = Objects.unkeep();
Objects = CurObj;
}
CurObj = NULL;
break;
}
PrevObj = CurObj;
CurObj = CurObj -> Next;
}
if( CurObj != NULL )
{
CurObj -> RefCount++;
if( PrevObj == NULL )
{
return( *CurObj );
}
// Remove the bank from the list temporarily.
PrevObj -> Next = CurObj -> Next;
}
else
{
// Create a new filter bank (with RefCount == 1) and build it.
CurObj = new CDSPFracDelayFilterBank( aFilterFracs, aElementSize,
aInterpPoints, ReqAtten, IsThird );
ObjCount++;
}
// Insert the bank at the start of the list.
CurObj -> Next = Objects.unkeep();
Objects = CurObj;
return( *CurObj );
}
private:
static CSyncObject StateSync; ///< Cache state synchronizer.
///<
static CPtrKeeper< CDSPFracDelayFilterBank* > Objects; ///< The chain of
///< cached objects.
///<
static CPtrKeeper< CDSPFracDelayFilterBank* > StaticObjects; ///< The
///< chain of static objects.
///<
static int ObjCount; ///< The number of objects currently preset in the
///< Objects cache.
///<
};
// ---------------------------------------------------------------------------
// CDSPFracDelayFilterBank PUBLIC
// ---------------------------------------------------------------------------
inline void CDSPFracDelayFilterBank :: unref()
{
R8BSYNC( CDSPFracDelayFilterBankCache :: StateSync );
RefCount--;
}
/**
* @param l Number 1.
* @param s Number 2.
* @param[out] GCD Resulting GCD.
* @return "True" if the greatest common denominator of 2 numbers was
* found.
*/
inline bool findGCD( double l, double s, double& GCD )
{
int it = 0;
while( it < 50 )
{
if( s <= 0.0 )
{
GCD = l;
return( true );
}
const double r = l - s;
l = s;
s = ( r < 0.0 ? -r : r );
it++;
}
return( false );
}
/**
* Function evaluates source and destination sample rate ratio and returns
* the required input and output stepping. Function returns "false" if
* whole stepping cannot be used to perform interpolation using these sample
* rates.
*
* @param SSampleRate Source sample rate.
* @param DSampleRate Destination sample rate.
* @param[out] ResInStep Resulting input step.
* @param[out] ResOutStep Resulting output step.
* @return "True" if stepping was acquired.
*/
inline bool getWholeStepping( const double SSampleRate,
const double DSampleRate, int& ResInStep, int& ResOutStep )
{
double GCD;
if( !findGCD( SSampleRate, DSampleRate, GCD ) || GCD < 1.0 )
{
return( false );
}
const double InStep0 = SSampleRate / GCD;
ResInStep = (int) InStep0;
const double OutStep0 = DSampleRate / GCD;
ResOutStep = (int) OutStep0;
if( InStep0 != ResInStep || OutStep0 != ResOutStep )
{
return( false );
}
if( ResOutStep > 1500 )
{
// Do not allow large output stepping due to low cache
// performance of large filter banks.
return( false );
}
return( true );
}
/**
* @brief Fractional delay filter bank-based interpolator class.
*
* Class implements the fractional delay interpolator. This implementation at
* first puts the input signal into a ring buffer and then performs
* interpolation. The interpolation is performed using sinc-based fractional
* delay filters. These filters are contained in a bank, and for higher
* precision they are interpolated between adjacent filters.
*
* To increase sample timing precision, this class uses "resettable counter"
* approach. This gives zero overall sample timing error. With the
* R8B_FASTTIMING configuration option enabled, the sample timing experiences
* a very minor drift.
*/
class CDSPFracInterpolator : public CDSPProcessor
{
public:
/**
* Constructor initalizes the interpolator. It is important to call the
* getMaxOutLen() function afterwards to obtain the optimal output buffer
* length.
*
* @param aSrcSampleRate Source sample rate.
* @param aDstSampleRate Destination sample rate.
* @param ReqAtten Required filter attentuation.
* @param IsThird "True" if one-third filter is required.
* @param PrevLatency Latency, in samples (any value >=0), which was left
* in the output signal by a previous process. This latency will be
* consumed completely.
*/
CDSPFracInterpolator( const double aSrcSampleRate,
const double aDstSampleRate, const double ReqAtten,
const bool IsThird, const double PrevLatency )
: SrcSampleRate( aSrcSampleRate )
, DstSampleRate( aDstSampleRate )
#if R8B_FASTTIMING
, FracStep( aSrcSampleRate / aDstSampleRate )
#endif // R8B_FASTTIMING
{
R8BASSERT( SrcSampleRate > 0.0 );
R8BASSERT( DstSampleRate > 0.0 );
R8BASSERT( PrevLatency >= 0.0 );
R8BASSERT( BufLenBits >= 5 );
InitFracPos = PrevLatency;
Latency = (int) InitFracPos;
InitFracPos -= Latency;
R8BASSERT( Latency >= 0 );
#if R8B_FLTTEST
IsWhole = false;
LatencyFrac = 0.0;
FilterBank = new CDSPFracDelayFilterBank( -1, 3, 8, ReqAtten,
IsThird );
#else // R8B_FLTTEST
IsWhole = getWholeStepping( SrcSampleRate, DstSampleRate, InStep,
OutStep );
if( IsWhole )
{
InitFracPosW = (int) ( InitFracPos * OutStep );
LatencyFrac = InitFracPos - (double) InitFracPosW / OutStep;
FilterBank = &CDSPFracDelayFilterBankCache :: getFilterBank(
OutStep, 1, 2, ReqAtten, IsThird, false );
}
else
{
LatencyFrac = 0.0;
FilterBank = &CDSPFracDelayFilterBankCache :: getFilterBank(
-1, 3, 8, ReqAtten, IsThird, true );
}
#endif // R8B_FLTTEST
FilterLen = FilterBank -> getFilterLen();
fl2 = FilterLen >> 1;
fll = fl2 - 1;
flo = fll + fl2;
flb = BufLen - fll;
R8BASSERT(( 1 << BufLenBits ) >= FilterLen * 3 );
static const CConvolveFn FltConvFn0[ 13 ] = {
&CDSPFracInterpolator :: convolve0< 6 >,
&CDSPFracInterpolator :: convolve0< 8 >,
&CDSPFracInterpolator :: convolve0< 10 >,
&CDSPFracInterpolator :: convolve0< 12 >,
&CDSPFracInterpolator :: convolve0< 14 >,
&CDSPFracInterpolator :: convolve0< 16 >,
&CDSPFracInterpolator :: convolve0< 18 >,
&CDSPFracInterpolator :: convolve0< 20 >,
&CDSPFracInterpolator :: convolve0< 22 >,
&CDSPFracInterpolator :: convolve0< 24 >,
&CDSPFracInterpolator :: convolve0< 26 >,
&CDSPFracInterpolator :: convolve0< 28 >,
&CDSPFracInterpolator :: convolve0< 30 >
};
convfn = ( IsWhole ? FltConvFn0[ fl2 - 3 ] :
&CDSPFracInterpolator :: convolve2 );
R8BCONSOLE( "CDSPFracInterpolator: src=%.2f dst=%.2f taps=%i "
"fracs=%i whole=%i third=%i step=%.6f\n", SrcSampleRate,
DstSampleRate, FilterLen, ( IsWhole ? OutStep :
FilterBank -> getFilterFracs() ), (int) IsWhole, (int) IsThird,
aSrcSampleRate / aDstSampleRate );
clear();
}
virtual ~CDSPFracInterpolator()
{
#if R8B_FLTTEST
delete FilterBank;
#else // R8B_FLTTEST
FilterBank -> unref();
#endif // R8B_FLTTEST
}
virtual int getLatency() const
{
return( 0 );
}
virtual double getLatencyFrac() const
{
return( LatencyFrac );
}
virtual int getMaxOutLen( const int MaxInLen ) const
{
R8BASSERT( MaxInLen >= 0 );
return( (int) ceil( MaxInLen * DstSampleRate / SrcSampleRate ) + 1 );
}
virtual void clear()
{
LatencyLeft = Latency;
BufLeft = 0;
WritePos = 0;
ReadPos = flb; // Set "read" position to account for filter's
// latency at zero fractional delay.
memset( &Buf[ ReadPos ], 0, ( BufLen - flb ) * sizeof( Buf[ 0 ]));
if( IsWhole )
{
InPosFracW = InitFracPosW;
}
else
{
InPosFrac = InitFracPos;
#if !R8B_FASTTIMING
InCounter = 0;
InPosInt = 0;
InPosShift = InitFracPos * DstSampleRate / SrcSampleRate;
#endif // !R8B_FASTTIMING
}
}
virtual int process( double* ip, int l, double*& op0 )
{
R8BASSERT( l >= 0 );
R8BASSERT( ip != op0 || l == 0 || SrcSampleRate > DstSampleRate );
if( LatencyLeft != 0 )
{
if( LatencyLeft >= l )
{
LatencyLeft -= l;
return( 0 );
}
l -= LatencyLeft;
ip += LatencyLeft;
LatencyLeft = 0;
}
double* op = op0;
while( l > 0 )
{
// Add new input samples to both halves of the ring buffer.
const int b = min( min( l, BufLen - WritePos ), flb - BufLeft );
double* const wp1 = Buf + WritePos;
memcpy( wp1, ip, b * sizeof( wp1[ 0 ]));
if( WritePos < flo )
{
const int c = min( b, flo - WritePos );
memcpy( wp1 + BufLen, wp1, c * sizeof( wp1[ 0 ]));
}
ip += b;
WritePos = ( WritePos + b ) & BufLenMask;
l -= b;
BufLeft += b;
// Produce as many output samples as possible.
op = ( *this.*convfn )( op );
}
#if !R8B_FASTTIMING
if( !IsWhole && InCounter > 1000 )
{
// Reset the interpolation position counter to achieve a
// higher sample timing precision.
InCounter = 0;
InPosInt = 0;
InPosShift = InPosFrac * DstSampleRate / SrcSampleRate;
}
#endif // !R8B_FASTTIMING
return( (int) ( op - op0 ));
}
private:
static const int BufLenBits = 8; ///< The length of the ring buffer,
///< expressed as Nth power of 2. This value can be reduced if it is
///< known that only short input buffers will be passed to the
///< interpolator. The minimum value of this parameter is 5, and
///< 1 << BufLenBits should be at least 3 times larger than the
///< FilterLen. However, this condition can be easily met if the input
///< signal is suitably downsampled first before the interpolation is
///< performed.
///<
static const int BufLen = 1 << BufLenBits; ///< The length of the ring
///< buffer. The actual length is twice as long to allow "beyond max
///< position" positioning.
///<
static const int BufLenMask = BufLen - 1; ///< Mask used for quick buffer
///< position wrapping.
///<
int FilterLen; ///< Filter length, in taps. Even value.
///<
int fl2; ///< Right-side (half) filter length.
///<
int fll; ///< Input latency.
///<
int flo; ///< Overrun length.
///<
int flb; ///< Initial read position and maximal buffer write length.
///<
double Buf[ BufLen + 29 ]; ///< The ring buffer, including overrun
///< protection for maximal filter length.
///<
double SrcSampleRate; ///< Source sample rate.
///<
double DstSampleRate; ///< Destination sample rate.
///<
bool IsWhole; ///< "True" if whole-number stepping is in use.
///<
int InStep; ///< Input whole-number stepping.
///<
int OutStep; ///< Output whole-number stepping (corresponds to filter bank
///< size).
///<
double InitFracPos; ///< Initial fractional position, in samples, in the
///< range [0; 1).
///<
int InitFracPosW; ///< Initial fractional position for whole-number
///< stepping.
///<
int Latency; ///< Initial latency that should be removed from the input.
///<
double LatencyFrac; ///< Left-over fractional latency.
///<
int BufLeft; ///< The number of samples left in the buffer to process.
///<
int WritePos; ///< The current buffer write position. Incremented together
///< with the BufLeft variable.
///<
int ReadPos; ///< The current buffer read position.
///<
int LatencyLeft; ///< Input latency left to remove.
///<
double InPosFrac; ///< Interpolation position (fractional part).
///<
int InPosFracW; ///< Interpolation position (fractional part) for
///< whole-number stepping. Corresponds to the index into the filter
///< bank.
///<
CDSPFracDelayFilterBank* FilterBank; ///< Filter bank in use, may be
///< whole-number stepping filter bank or static bank.
///<
#if R8B_FASTTIMING
double FracStep; ///< Fractional sample timing step.
#else // R8B_FASTTIMING
int InCounter; ///< Interpolation step counter.
///<
int InPosInt; ///< Interpolation position (integer part).
///<
double InPosShift; ///< Interpolation position fractional shift.
///<
#endif // R8B_FASTTIMING
typedef double*( CDSPFracInterpolator :: *CConvolveFn )( double* op ); ///<
///< Convolution function type.
///<
CConvolveFn convfn; ///< Convolution function in use.
///<
/**
* Convolution function for 0th order resampling.
*
* @param[out] op Output buffer.
* @return Advanced "op" value.
* @tparam fltlen Filter length, in taps.
*/
template< int fltlen >
double* convolve0( double* op )
{
while( BufLeft > fl2 )
{
const double* const ftp = &(*FilterBank)[ InPosFracW ];
const double* const rp = Buf + ReadPos;
int i;
#if defined( R8B_SSE2 ) && !defined( __INTEL_COMPILER )
__m128d s = _mm_setzero_pd();
for( i = 0; i < fltlen; i += 2 )
{
const __m128d m = _mm_mul_pd( _mm_load_pd( ftp + i ),
_mm_loadu_pd( rp + i ));
s = _mm_add_pd( s, m );
}
_mm_storel_pd( op, _mm_add_pd( s, _mm_shuffle_pd( s, s, 1 )));
#elif defined( R8B_NEON )
float64x2_t s = vdupq_n_f64( 0.0 );
for( i = 0; i < fltlen; i += 2 )
{
s = vmlaq_f64( s, vld1q_f64( ftp + i ), vld1q_f64( rp + i ));
}
*op = vaddvq_f64( s );
#else // SIMD
double s = 0.0;
for( i = 0; i < fltlen; i++ )
{
s += ftp[ i ] * rp[ i ];
}
*op = s;
#endif // SIMD
op++;
InPosFracW += InStep;
const int PosIncr = InPosFracW / OutStep;
InPosFracW -= PosIncr * OutStep;
ReadPos = ( ReadPos + PosIncr ) & BufLenMask;
BufLeft -= PosIncr;
}
return( op );
}
/**
* Convolution function for 2nd order resampling.
*
* @param[out] op Output buffer.
* @return Advanced "op" value.
*/
double* convolve2( double* op )
{
const CDSPFracDelayFilterBank& fb = *FilterBank;
const int fltlen = FilterLen;
while( BufLeft > fl2 )
{
double x = InPosFrac * fb.getFilterFracs();
const int fti = (int) x; // Function table index.
x -= fti; // Coefficient for interpolation between
// adjacent fractional delay filters.
const double x2d = x * x;
const double* ftp = &fb[ fti ];
const double* const rp = Buf + ReadPos;
int i;
#if defined( R8B_SSE2 ) && defined( R8B_SIMD_ISH )
const __m128d x1 = _mm_set1_pd( x );
const __m128d x2 = _mm_set1_pd( x2d );
__m128d s = _mm_setzero_pd();
for( i = 0; i < fltlen; i += 2 )
{
const __m128d ftp2 = _mm_load_pd( ftp + 2 );
const __m128d xx1 = _mm_mul_pd( ftp2, x1 );
const __m128d ftp4 = _mm_load_pd( ftp + 4 );
const __m128d xx2 = _mm_mul_pd( ftp4, x2 );
const __m128d ftp0 = _mm_load_pd( ftp );
ftp += 6;
const __m128d rpi = _mm_loadu_pd( rp + i );
const __m128d xxs = _mm_add_pd( ftp0, _mm_add_pd( xx1, xx2 ));
s = _mm_add_pd( s, _mm_mul_pd( rpi, xxs ));
}
_mm_storel_pd( op, _mm_add_pd( s, _mm_shuffle_pd( s, s, 1 )));
#elif defined( R8B_NEON ) && defined( R8B_SIMD_ISH )
const float64x2_t x1 = vdupq_n_f64( x );
const float64x2_t x2 = vdupq_n_f64( x2d );
float64x2_t s = vdupq_n_f64( 0.0 );
for( i = 0; i < fltlen; i += 2 )
{
const float64x2_t ftp2 = vld1q_f64( ftp + 2 );
const float64x2_t xx1 = vmulq_f64( ftp2, x1 );
const float64x2_t ftp4 = vld1q_f64( ftp + 4 );
const float64x2_t xx2 = vmulq_f64( ftp4, x2 );
const float64x2_t ftp0 = vld1q_f64( ftp );
ftp += 6;
const float64x2_t rpi = vld1q_f64( rp + i );
const float64x2_t xxs = vaddq_f64( ftp0,
vaddq_f64( xx1, xx2 ));
s = vmlaq_f64( s, rpi, xxs );
}
*op = vaddvq_f64( s );
#else // SIMD
double s = 0.0;
for( i = 0; i < fltlen; i++ )
{
s += ( ftp[ 0 ] + ftp[ 1 ] * x + ftp[ 2 ] * x2d ) * rp[ i ];
ftp += 3;
}
*op = s;
#endif // SIMD
op++;
#if R8B_FASTTIMING
InPosFrac += FracStep;
const int PosIncr = (int) InPosFrac;
InPosFrac -= PosIncr;
#else // R8B_FASTTIMING
InCounter++;
const double NextInPos = ( InCounter + InPosShift ) *
SrcSampleRate / DstSampleRate;
const int NextInPosInt = (int) NextInPos;
const int PosIncr = NextInPosInt - InPosInt;
InPosInt = NextInPosInt;
InPosFrac = NextInPos - NextInPosInt;
#endif // R8B_FASTTIMING
ReadPos = ( ReadPos + PosIncr ) & BufLenMask;
BufLeft -= PosIncr;
}
return( op );
}
};
// ---------------------------------------------------------------------------
} // namespace r8b
#endif // R8B_CDSPFRACINTERPOLATOR_INCLUDED