643 lines
15 KiB
C++
643 lines
15 KiB
C++
//$ nobt
|
|
//$ nocpp
|
|
|
|
/**
|
|
* @file CDSPBlockConvolver.h
|
|
*
|
|
* @brief Single-block overlap-save convolution processor class.
|
|
*
|
|
* This file includes single-block overlap-save convolution processor class.
|
|
*
|
|
* r8brain-free-src Copyright (c) 2013-2022 Aleksey Vaneev
|
|
* See the "LICENSE" file for license.
|
|
*/
|
|
|
|
#ifndef R8B_CDSPBLOCKCONVOLVER_INCLUDED
|
|
#define R8B_CDSPBLOCKCONVOLVER_INCLUDED
|
|
|
|
#include "CDSPFIRFilter.h"
|
|
#include "CDSPProcessor.h"
|
|
|
|
namespace r8b {
|
|
|
|
/**
|
|
* @brief Single-block overlap-save convolution processing class.
|
|
*
|
|
* Class that implements single-block overlap-save convolution processing. The
|
|
* length of a single FFT block used depends on the length of the filter
|
|
* kernel.
|
|
*
|
|
* The rationale behind "single-block" processing is that increasing the FFT
|
|
* block length by 2 is more efficient than performing convolution at the same
|
|
* FFT block length but using two blocks.
|
|
*
|
|
* This class also implements a built-in resampling by any whole-number
|
|
* factor, which simplifies the overall resampling objects topology.
|
|
*/
|
|
|
|
class CDSPBlockConvolver : public CDSPProcessor
|
|
{
|
|
public:
|
|
/**
|
|
* Constructor initializes internal variables and constants of *this
|
|
* object.
|
|
*
|
|
* @param aFilter Pre-calculated filter data. Reference to this object is
|
|
* inhertied by *this object, and the object will be released when *this
|
|
* object is destroyed. If upsampling is used, filter's gain should be
|
|
* equal to the upsampling factor.
|
|
* @param aUpFactor The upsampling factor, positive value. E.g. value of 2
|
|
* means 2x upsampling should be performed over the input data.
|
|
* @param aDownFactor The downsampling factor, positive value. E.g. value
|
|
* of 2 means 2x downsampling should be performed over the output data.
|
|
* @param PrevLatency Latency, in samples (any value >=0), which was left
|
|
* in the output signal by a previous process. This value is usually
|
|
* non-zero if the minimum-phase filters are in use. This value is always
|
|
* zero if the linear-phase filters are in use.
|
|
* @param aDoConsumeLatency "True" if the output latency should be
|
|
* consumed. Does not apply to the fractional part of the latency (if such
|
|
* part is available).
|
|
*/
|
|
|
|
CDSPBlockConvolver( CDSPFIRFilter& aFilter, const int aUpFactor,
|
|
const int aDownFactor, const double PrevLatency = 0.0,
|
|
const bool aDoConsumeLatency = true )
|
|
: Filter( &aFilter )
|
|
, UpFactor( aUpFactor )
|
|
, DownFactor( aDownFactor )
|
|
, DoConsumeLatency( aDoConsumeLatency )
|
|
, BlockLen2( 2 << Filter -> getBlockLenBits() )
|
|
{
|
|
R8BASSERT( UpFactor > 0 );
|
|
R8BASSERT( DownFactor > 0 );
|
|
R8BASSERT( PrevLatency >= 0.0 );
|
|
|
|
int fftinBits;
|
|
UpShift = getBitOccupancy( UpFactor ) - 1;
|
|
|
|
if(( 1 << UpShift ) == UpFactor )
|
|
{
|
|
fftinBits = Filter -> getBlockLenBits() + 1 - UpShift;
|
|
PrevInputLen = ( Filter -> getKernelLen() - 1 + UpFactor - 1 ) /
|
|
UpFactor;
|
|
|
|
InputLen = BlockLen2 - PrevInputLen * UpFactor;
|
|
}
|
|
else
|
|
{
|
|
UpShift = -1;
|
|
fftinBits = Filter -> getBlockLenBits() + 1;
|
|
PrevInputLen = Filter -> getKernelLen() - 1;
|
|
InputLen = BlockLen2 - PrevInputLen;
|
|
}
|
|
|
|
OutOffset = ( Filter -> isZeroPhase() ? Filter -> getLatency() : 0 );
|
|
LatencyFrac = Filter -> getLatencyFrac() + PrevLatency * UpFactor;
|
|
Latency = (int) LatencyFrac;
|
|
const int InLatency = Latency + Filter -> getLatency() - OutOffset;
|
|
LatencyFrac -= Latency;
|
|
LatencyFrac /= DownFactor;
|
|
|
|
Latency += InputLen + Filter -> getLatency();
|
|
|
|
int fftoutBits;
|
|
InputDelay = 0;
|
|
DownSkipInit = 0;
|
|
DownShift = getBitOccupancy( DownFactor ) - 1;
|
|
|
|
if(( 1 << DownShift ) == DownFactor )
|
|
{
|
|
fftoutBits = Filter -> getBlockLenBits() + 1 - DownShift;
|
|
|
|
if( DownFactor > 1 )
|
|
{
|
|
if( UpShift > 0 )
|
|
{
|
|
// This case never happens in practice due to mutual
|
|
// exclusion of "power of 2" DownFactor and UpFactor
|
|
// values.
|
|
|
|
R8BASSERT( UpShift == 0 );
|
|
}
|
|
else
|
|
{
|
|
// Make sure InputLen is divisible by DownFactor.
|
|
|
|
const int ilc = InputLen & ( DownFactor - 1 );
|
|
PrevInputLen += ilc;
|
|
InputLen -= ilc;
|
|
Latency -= ilc;
|
|
|
|
// Correct InputDelay for input and filter's latency.
|
|
|
|
const int lc = InLatency & ( DownFactor - 1 );
|
|
|
|
if( lc > 0 )
|
|
{
|
|
InputDelay = DownFactor - lc;
|
|
}
|
|
|
|
if( !DoConsumeLatency )
|
|
{
|
|
Latency /= DownFactor;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
fftoutBits = Filter -> getBlockLenBits() + 1;
|
|
DownShift = -1;
|
|
|
|
if( !DoConsumeLatency && DownFactor > 1 )
|
|
{
|
|
DownSkipInit = Latency % DownFactor;
|
|
Latency /= DownFactor;
|
|
}
|
|
}
|
|
|
|
R8BASSERT( Latency >= 0 );
|
|
|
|
fftin = new CDSPRealFFTKeeper( fftinBits );
|
|
|
|
if( fftoutBits == fftinBits )
|
|
{
|
|
fftout = fftin;
|
|
}
|
|
else
|
|
{
|
|
ffto2 = new CDSPRealFFTKeeper( fftoutBits );
|
|
fftout = ffto2;
|
|
}
|
|
|
|
WorkBlocks.alloc( BlockLen2 * 2 + PrevInputLen );
|
|
CurInput = &WorkBlocks[ 0 ];
|
|
CurOutput = &WorkBlocks[ BlockLen2 ]; // CurInput and
|
|
// CurOutput are address-aligned.
|
|
PrevInput = &WorkBlocks[ BlockLen2 * 2 ];
|
|
|
|
clear();
|
|
|
|
R8BCONSOLE( "CDSPBlockConvolver: flt_len=%i in_len=%i io=%i/%i "
|
|
"fft=%i/%i latency=%i\n", Filter -> getKernelLen(), InputLen,
|
|
UpFactor, DownFactor, (*fftin) -> getLen(), (*fftout) -> getLen(),
|
|
getLatency() );
|
|
}
|
|
|
|
virtual ~CDSPBlockConvolver()
|
|
{
|
|
Filter -> unref();
|
|
}
|
|
|
|
virtual int getLatency() const
|
|
{
|
|
return( DoConsumeLatency ? 0 : Latency );
|
|
}
|
|
|
|
virtual double getLatencyFrac() const
|
|
{
|
|
return( LatencyFrac );
|
|
}
|
|
|
|
virtual int getMaxOutLen( const int MaxInLen ) const
|
|
{
|
|
R8BASSERT( MaxInLen >= 0 );
|
|
|
|
return(( MaxInLen * UpFactor + DownFactor - 1 ) / DownFactor );
|
|
}
|
|
|
|
virtual void clear()
|
|
{
|
|
memset( &PrevInput[ 0 ], 0, PrevInputLen * sizeof( PrevInput[ 0 ]));
|
|
|
|
if( DoConsumeLatency )
|
|
{
|
|
LatencyLeft = Latency;
|
|
}
|
|
else
|
|
{
|
|
LatencyLeft = 0;
|
|
|
|
if( DownShift > 0 )
|
|
{
|
|
memset( &CurOutput[ 0 ], 0, ( BlockLen2 >> DownShift ) *
|
|
sizeof( CurOutput[ 0 ]));
|
|
}
|
|
else
|
|
{
|
|
memset( &CurOutput[ BlockLen2 - OutOffset ], 0, OutOffset *
|
|
sizeof( CurOutput[ 0 ]));
|
|
|
|
memset( &CurOutput[ 0 ], 0, ( InputLen - OutOffset ) *
|
|
sizeof( CurOutput[ 0 ]));
|
|
}
|
|
}
|
|
|
|
memset( CurInput, 0, InputDelay * sizeof( CurInput[ 0 ]));
|
|
|
|
InDataLeft = InputLen - InputDelay;
|
|
UpSkip = 0;
|
|
DownSkip = DownSkipInit;
|
|
}
|
|
|
|
virtual int process( double* ip, int l0, double*& op0 )
|
|
{
|
|
R8BASSERT( l0 >= 0 );
|
|
R8BASSERT( UpFactor / DownFactor <= 1 || ip != op0 || l0 == 0 );
|
|
|
|
double* op = op0;
|
|
int l = l0 * UpFactor;
|
|
l0 = 0;
|
|
|
|
while( l > 0 )
|
|
{
|
|
const int Offs = InputLen - InDataLeft;
|
|
|
|
if( l < InDataLeft )
|
|
{
|
|
InDataLeft -= l;
|
|
|
|
if( UpShift >= 0 )
|
|
{
|
|
memcpy( &CurInput[ Offs >> UpShift ], ip,
|
|
( l >> UpShift ) * sizeof( CurInput[ 0 ]));
|
|
}
|
|
else
|
|
{
|
|
copyUpsample( ip, &CurInput[ Offs ], l );
|
|
}
|
|
|
|
copyToOutput( Offs - OutOffset, op, l, l0 );
|
|
break;
|
|
}
|
|
|
|
const int b = InDataLeft;
|
|
l -= b;
|
|
InDataLeft = InputLen;
|
|
int ilu;
|
|
|
|
if( UpShift >= 0 )
|
|
{
|
|
const int bu = b >> UpShift;
|
|
memcpy( &CurInput[ Offs >> UpShift ], ip,
|
|
bu * sizeof( CurInput[ 0 ]));
|
|
|
|
ip += bu;
|
|
ilu = InputLen >> UpShift;
|
|
}
|
|
else
|
|
{
|
|
copyUpsample( ip, &CurInput[ Offs ], b );
|
|
ilu = InputLen;
|
|
}
|
|
|
|
const size_t pil = PrevInputLen * sizeof( CurInput[ 0 ]);
|
|
memcpy( &CurInput[ ilu ], PrevInput, pil );
|
|
memcpy( PrevInput, &CurInput[ ilu - PrevInputLen ], pil );
|
|
|
|
(*fftin) -> forward( CurInput );
|
|
|
|
if( UpShift > 0 )
|
|
{
|
|
#if R8B_FLOATFFT
|
|
mirrorInputSpectrum( (float*) CurInput );
|
|
#else // R8B_FLOATFFT
|
|
mirrorInputSpectrum( CurInput );
|
|
#endif // R8B_FLOATFFT
|
|
}
|
|
|
|
if( Filter -> isZeroPhase() )
|
|
{
|
|
(*fftout) -> multiplyBlocksZP( Filter -> getKernelBlock(),
|
|
CurInput );
|
|
}
|
|
else
|
|
{
|
|
(*fftout) -> multiplyBlocks( Filter -> getKernelBlock(),
|
|
CurInput );
|
|
}
|
|
|
|
if( DownShift > 0 )
|
|
{
|
|
const int z = BlockLen2 >> DownShift;
|
|
|
|
#if R8B_FLOATFFT
|
|
float* const kb = (float*) Filter -> getKernelBlock();
|
|
float* const p = (float*) CurInput;
|
|
#else // R8B_FLOATFFT
|
|
const double* const kb = Filter -> getKernelBlock();
|
|
double* const p = CurInput;
|
|
#endif // R8B_FLOATFFT
|
|
|
|
p[ 1 ] = kb[ z ] * p[ z ] - kb[ z + 1 ] * p[ z + 1 ];
|
|
}
|
|
|
|
(*fftout) -> inverse( CurInput );
|
|
|
|
copyToOutput( Offs - OutOffset, op, b, l0 );
|
|
|
|
double* const tmp = CurInput;
|
|
CurInput = CurOutput;
|
|
CurOutput = tmp;
|
|
}
|
|
|
|
return( l0 );
|
|
}
|
|
|
|
private:
|
|
CDSPFIRFilter* Filter; ///< Filter in use.
|
|
///<
|
|
CPtrKeeper< CDSPRealFFTKeeper* > fftin; ///< FFT object 1, used to produce
|
|
///< the input spectrum (can embed the "power of 2" upsampling).
|
|
///<
|
|
CPtrKeeper< CDSPRealFFTKeeper* > ffto2; ///< FFT object 2 (can be NULL).
|
|
///<
|
|
CDSPRealFFTKeeper* fftout; ///< FFT object used to produce the output
|
|
///< signal (can embed the "power of 2" downsampling), may point to
|
|
///< either "fftin" or "ffto2".
|
|
///<
|
|
int UpFactor; ///< Upsampling factor.
|
|
///<
|
|
int DownFactor; ///< Downsampling factor.
|
|
///<
|
|
bool DoConsumeLatency; ///< "True" if the output latency should be
|
|
///< consumed. Does not apply to the fractional part of the latency
|
|
///< (if such part is available).
|
|
///<
|
|
int BlockLen2; ///< Equals block length * 2.
|
|
///<
|
|
int OutOffset; ///< Output offset, depends on filter's introduced latency.
|
|
///<
|
|
int PrevInputLen; ///< The length of previous input data saved, used for
|
|
///< overlap.
|
|
///<
|
|
int InputLen; ///< The number of input samples that should be accumulated
|
|
///< before the input block is processed.
|
|
///<
|
|
int Latency; ///< Processing latency, in samples.
|
|
///<
|
|
double LatencyFrac; ///< Fractional latency, in samples, that is left in
|
|
///< the output signal.
|
|
///<
|
|
int UpShift; ///< "Power of 2" upsampling shift. Equals -1 if UpFactor is
|
|
///< not a "power of 2" value. Equals 0 if UpFactor equals 1.
|
|
///<
|
|
int DownShift; ///< "Power of 2" downsampling shift. Equals -1 if
|
|
///< DownFactor is not a "power of 2". Equals 0 if DownFactor equals
|
|
///< 1.
|
|
///<
|
|
int InputDelay; ///< Additional input delay, in samples. Used to make the
|
|
///< output delay divisible by DownShift. Used only if UpShift <= 0
|
|
///< and DownShift > 0.
|
|
///<
|
|
CFixedBuffer< double > WorkBlocks; ///< Previous input data, input and
|
|
///< output data blocks, overall capacity = BlockLen2 * 2 +
|
|
///< PrevInputLen. Used in the flip-flop manner.
|
|
///<
|
|
double* PrevInput; ///< Previous input data buffer, capacity = BlockLen.
|
|
///<
|
|
double* CurInput; ///< Input data buffer, capacity = BlockLen2.
|
|
///<
|
|
double* CurOutput; ///< Output data buffer, capacity = BlockLen2.
|
|
///<
|
|
int InDataLeft; ///< Samples left before processing input and output FFT
|
|
///< blocks. Initialized to InputLen on clear.
|
|
///<
|
|
int LatencyLeft; ///< Latency in samples left to skip.
|
|
///<
|
|
int UpSkip; ///< The current upsampling sample skip (value in the range
|
|
///< 0 to UpFactor - 1).
|
|
///<
|
|
int DownSkip; ///< The current downsampling sample skip (value in the
|
|
///< range 0 to DownFactor - 1). Not used if DownShift > 0.
|
|
///<
|
|
int DownSkipInit; ///< The initial DownSkip value after clear().
|
|
///<
|
|
|
|
/**
|
|
* Function copies samples from the input buffer to the output buffer
|
|
* while inserting zeros inbetween them to perform the whole-numbered
|
|
* upsampling.
|
|
*
|
|
* @param[in,out] ip0 Input buffer. Will be advanced on function's return.
|
|
* @param[out] op Output buffer.
|
|
* @param l0 The number of samples to fill in the output buffer, including
|
|
* both input samples and interpolation (zero) samples.
|
|
*/
|
|
|
|
void copyUpsample( double*& ip0, double* op, int l0 )
|
|
{
|
|
int b = min( UpSkip, l0 );
|
|
|
|
if( b != 0 )
|
|
{
|
|
UpSkip -= b;
|
|
l0 -= b;
|
|
|
|
*op = 0.0;
|
|
op++;
|
|
|
|
while( --b != 0 )
|
|
{
|
|
*op = 0.0;
|
|
op++;
|
|
}
|
|
}
|
|
|
|
double* ip = ip0;
|
|
const int upf = UpFactor;
|
|
int l = l0 / upf;
|
|
int lz = l0 - l * upf;
|
|
|
|
if( upf == 3 )
|
|
{
|
|
while( l != 0 )
|
|
{
|
|
op[ 0 ] = *ip;
|
|
op[ 1 ] = 0.0;
|
|
op[ 2 ] = 0.0;
|
|
ip++;
|
|
op += upf;
|
|
l--;
|
|
}
|
|
}
|
|
else
|
|
if( upf == 5 )
|
|
{
|
|
while( l != 0 )
|
|
{
|
|
op[ 0 ] = *ip;
|
|
op[ 1 ] = 0.0;
|
|
op[ 2 ] = 0.0;
|
|
op[ 3 ] = 0.0;
|
|
op[ 4 ] = 0.0;
|
|
ip++;
|
|
op += upf;
|
|
l--;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
const size_t zc = ( upf - 1 ) * sizeof( op[ 0 ]);
|
|
|
|
while( l != 0 )
|
|
{
|
|
*op = *ip;
|
|
ip++;
|
|
|
|
memset( op + 1, 0, zc );
|
|
op += upf;
|
|
l--;
|
|
}
|
|
}
|
|
|
|
if( lz != 0 )
|
|
{
|
|
*op = *ip;
|
|
ip++;
|
|
op++;
|
|
|
|
UpSkip = upf - lz;
|
|
|
|
while( --lz != 0 )
|
|
{
|
|
*op = 0.0;
|
|
op++;
|
|
}
|
|
}
|
|
|
|
ip0 = ip;
|
|
}
|
|
|
|
/**
|
|
* Function copies sample data from the CurOutput buffer to the specified
|
|
* output buffer and advances its position. If necessary, this function
|
|
* "consumes" latency and performs downsampling.
|
|
*
|
|
* @param Offs CurOutput buffer offset, can be negative.
|
|
* @param[out] op0 Output buffer pointer, will be advanced.
|
|
* @param b The number of output samples available, including those which
|
|
* are discarded during whole-number downsampling.
|
|
* @param l0 The overall output sample count, will be increased.
|
|
*/
|
|
|
|
void copyToOutput( int Offs, double*& op0, int b, int& l0 )
|
|
{
|
|
if( Offs < 0 )
|
|
{
|
|
if( Offs + b <= 0 )
|
|
{
|
|
Offs += BlockLen2;
|
|
}
|
|
else
|
|
{
|
|
copyToOutput( Offs + BlockLen2, op0, -Offs, l0 );
|
|
b += Offs;
|
|
Offs = 0;
|
|
}
|
|
}
|
|
|
|
if( LatencyLeft != 0 )
|
|
{
|
|
if( LatencyLeft >= b )
|
|
{
|
|
LatencyLeft -= b;
|
|
return;
|
|
}
|
|
|
|
Offs += LatencyLeft;
|
|
b -= LatencyLeft;
|
|
LatencyLeft = 0;
|
|
}
|
|
|
|
const int df = DownFactor;
|
|
|
|
if( DownShift > 0 )
|
|
{
|
|
int Skip = Offs & ( df - 1 );
|
|
|
|
if( Skip > 0 )
|
|
{
|
|
Skip = df - Skip;
|
|
b -= Skip;
|
|
Offs += Skip;
|
|
}
|
|
|
|
if( b > 0 )
|
|
{
|
|
b = ( b + df - 1 ) >> DownShift;
|
|
memcpy( op0, &CurOutput[ Offs >> DownShift ],
|
|
b * sizeof( op0[ 0 ]));
|
|
|
|
op0 += b;
|
|
l0 += b;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if( df > 1 )
|
|
{
|
|
const double* ip = &CurOutput[ Offs + DownSkip ];
|
|
int l = ( b + df - 1 - DownSkip ) / df;
|
|
DownSkip += l * df - b;
|
|
|
|
double* op = op0;
|
|
l0 += l;
|
|
op0 += l;
|
|
|
|
while( l > 0 )
|
|
{
|
|
*op = *ip;
|
|
ip += df;
|
|
op++;
|
|
l--;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
memcpy( op0, &CurOutput[ Offs ], b * sizeof( op0[ 0 ]));
|
|
op0 += b;
|
|
l0 += b;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Function performs input spectrum mirroring which is used to perform a
|
|
* fast "power of 2" upsampling. Such mirroring is equivalent to insertion
|
|
* of zeros into the input signal.
|
|
*
|
|
* @param p Spectrum data block to mirror.
|
|
* @tparam T Buffer's element type.
|
|
*/
|
|
|
|
template< typename T >
|
|
void mirrorInputSpectrum( T* const p )
|
|
{
|
|
const int bl1 = BlockLen2 >> UpShift;
|
|
const int bl2 = bl1 + bl1;
|
|
int i;
|
|
|
|
for( i = bl1 + 2; i < bl2; i += 2 )
|
|
{
|
|
p[ i ] = p[ bl2 - i ];
|
|
p[ i + 1 ] = -p[ bl2 - i + 1 ];
|
|
}
|
|
|
|
p[ bl1 ] = p[ 1 ];
|
|
p[ bl1 + 1 ] = 0.0;
|
|
p[ 1 ] = p[ 0 ];
|
|
|
|
for( i = 1; i < UpShift; i++ )
|
|
{
|
|
const int z = bl1 << i;
|
|
memcpy( &p[ z ], p, z * sizeof( p[ 0 ]));
|
|
p[ z + 1 ] = 0.0;
|
|
}
|
|
}
|
|
};
|
|
|
|
} // namespace r8b
|
|
|
|
#endif // R8B_CDSPBLOCKCONVOLVER_INCLUDED
|