winamp/Src/Winamp/classic_vis.cpp

343 lines
8.3 KiB
C++
Raw Permalink Normal View History

2024-09-24 12:54:57 +00:00
#include "main.h"
#include "SABuffer.h"
#include <math.h>
#include "WinampAttributes.h"
#include "fft.h"
extern int _srate;
#ifdef _M_IX86
__inline static int lrint(float flt)
{
int intgr;
_asm
{
fld flt
fistp intgr
}
return intgr;
}
#else
__inline static int lrint(float flt)
{
return (int)flt;
}
#endif
// quantizes to 23 bits - use appropriately
inline static float fastmin(float x, const float b)
{
x = b - x;
x += (float)fabs(x);
x *= 0.5f;
x = b - x;
return x;
}
#define FASTMIN(x,b) { x = b - x; x += (float)fabs(x); x *= 0.5f; x = b - x; }
inline static float fastclip(float x, const float a, const float b)
{
float x1 = (float)fabs(x-a);
float x2 = (float)fabs(x-b);
x = x1 + (a+b);
x -= x2;
x *= 0.5f;
return (x);
}
void makeOscData(char *tempdata, char *data_buf, int little_block, int channels, int bits)
{
float dd = little_block/75.0f;
int x,c;
int stride=bits/8; // number of bytes between samples
// we're calculating using only the most significant byte,
// because we only end up with 6 bit data anyway
// if you want full resolution, check out CVS tag BETA_2005_1122_182830, file: vis.c
char *ptr, *sbuf = data_buf;
for (x = 0; x < 75; x ++)
{
float val=0;
int index =(int)((float)x * dd); // calculate the nearest sample for this point, interpolation is too expensive for this use
ptr=&sbuf[index*stride*channels+stride-1]; // find first sample, and offset for little endian
for (c=0;c<channels;c++)
{
val += (float)*ptr / 8.0f; // we want our final value to be -32 to 32
ptr+=stride; // jump to the next sample (channels are interleaved)
}
tempdata[x] = (char)lrint(val / (float)channels); // average the channels
}
}
inline double fast_exp2(const double val)
{
int e;
double ret;
if (val >= 0)
{
e = int (val);
ret = val - (e - 1);
((*(1 + (int *) &ret)) &= ~(2047 << 20)) += (e + 1023) << 20;
}
else
{
e = int (val + 1023);
ret = val - (e - 1024);
((*(1 + (int *) &ret)) &= ~(2047 << 20)) += e << 20;
}
return (ret);
}
// ~6 clocks on Pentium M vs. ~24 for single precision sqrtf
#if !defined(_WIN64)
static inline float squareroot_sse_11bits(float x)
{
float z;
_asm
{
rsqrtss xmm0, x
rcpss xmm0, xmm0
movss z, xmm0 // z ~= sqrt(x) to 0.038%
}
return z;
}
static inline int floor_int(double x)
{
int i;
static const float round_toward_m_i = -0.5f;
__asm
{
fld x
fadd st, st(0)
fadd round_toward_m_i
fistp i
sar i, 1
}
return (i);
}
#endif
/*
static inline float hermite(float x, float y0, float y1, float y2, float y3)
{
// 4-point, 3rd-order Hermite (x-form)
float c0 = y1;
float c1 = 0.5f * (y2 - y0);
float c2 = y0 - 2.5f * y1 + 2.f * y2 - 0.5f * y3;
float c3 = 1.5f * (y1 - y2) + 0.5f * (y3 - y0);
return ((c3 * x + c2) * x + c1) * x + c0;
}
*/
/*
static const float c_half = 0.5f;
__declspec(naked) static float hermite(float frac_pos, const float* pntr)
{
__asm
{
push ecx;
mov ecx, dword ptr[esp + 12]; //////////////////////////////////////////////////////////////////////////////////////////////////
add ecx, 0x04; // ST(0) ST(1) ST(2) ST(3) ST(4) ST(5) ST(6) ST(7)
fld dword ptr [ecx+4]; // x1
fsub dword ptr [ecx-4]; // x1-xm1
fld dword ptr [ecx]; // x0 x1-xm1
fsub dword ptr [ecx+4]; // v x1-xm1
fld dword ptr [ecx+8]; // x2 v x1-xm1
fsub dword ptr [ecx]; // x2-x0 v x1-xm1
fxch st(2); // x1-m1 v x2-x0
fmul c_half; // c v x2-x0
fxch st(2); // x2-x0 v c
fmul c_half; // 0.5*(x2-x0) v c
fxch st(2); // c v 0.5*(x2-x0)
fst st(3); // c v 0.5*(x2-x0) c
fadd st(0), st(1); // w v 0.5*(x2-x0) c
fxch st(2); // 0.5*(x2-x0) v w c
faddp st(1), st(0); // v+.5(x2-x0) w c
fadd st(0), st(1); // a w c
fadd st(1), st(0); // a b_neg c
fmul dword ptr [esp+8]; // a*frac b_neg c
fsubrp st(1), st(0); // a*f-b c
fmul dword ptr [esp+8]; // (a*f-b)*f c
faddp st(1), st(0); // res-x0/f
fmul dword ptr [esp+8]; // res-x0
fadd dword ptr [ecx]; // res
pop ecx;
ret;
}
}
*/
inline float hermite(float x, float y0, float y1, float y2, float y3)
{
// 4-point, 3rd-order Hermite (x-form)
float c0 = y1;
float c1 = 0.5f * (y2 - y0);
float c3 = 1.5f * (y1 - y2) + 0.5f * (y3 - y0);
float c2 = y0 - y1 + c1 - c3;
return ((c3 * x + c2) * x + c1) * x + c0;
}
static inline float fpow2(const float y)
{
union
{
float f;
int i;
} c;
int integer = lrint(floor(y));
/* cut: because we guarantee y>=0
if(y < 0)
integer = integer-1;
*/
float frac = y - (float)integer;
c.i = (integer+127) << 23;
c.f *= 0.33977f*frac*frac + (1.0f-0.33977f)*frac + 1.0f;
return c.f;
}
//#define SAPOW(x) (powf(2.f, (float)(x)/12.f))
#define SAPOW(x) (fpow2((float)(x)/12.f))
//#define WARP(x) ((powf(1.1f, (float)(x)/12.f) - 1.) * bla)
#define WARP(x) ((SAPOW(x) - 1.f) * bla)
void makeSpecData(unsigned char *tempdata, float *wavetrum)
{
//WARP(75);
float bla = (255.f/SAPOW(75.f));
fft_9(wavetrum);
float spec_scale=0.5;
if (config_replaygain)
{ // benski> i'm sure there's some math identity we can use to optimize this.
spec_scale/=pow(10.0f, config_replaygain_non_rg_gain.GetFloat() / 20.0f);
}
for (int i=0;i<256;i++)
{
//int lookup=2*i;
float sinT = wavetrum[2*i];
float cosT = wavetrum[2*i+1];
wavetrum[i] = sqrt(sinT*sinT+cosT*cosT)*spec_scale;
}
float next = WARP(0)+1 ;
for (int x = 0; x < 75; x ++)
{
//float prev = 1.+(pow(2.,(float)x/12.) -1.) * bla;
float binF = next;
next = WARP(x+1) +1;
float thisValue = 0;
int bin = lrint(floor(binF));
int end = lrint(floor(next));
end = min(end, 255);
float mult = ((float)(bin+1))-binF;
bool herm=true;
do
{
if (bin == end)
{
mult = (next-binF);
herm=true;
}
if (herm)
{
float C=0, D=0;
if (bin<255)
{
C=wavetrum[bin+1];
if (bin<254)
D=wavetrum[bin+2];
}
//float samples[4] = { wavetrum[lookupA], wavetrum[lookupB], wavetrum[lookupC], wavetrum[lookupD] };
//thisValue += hermite(binF-bin, samples) * mult;
thisValue += hermite(binF-bin, wavetrum[bin-1], wavetrum[bin], C, D) * mult;
}
else
{
thisValue += wavetrum[bin];
}
herm=false;
bin++;
binF=(float)bin;
}
while (bin <= end);
tempdata[x]=lrint(fastmin(thisValue, 255.f));
}
}
////////////////////////////////
SABuffer saBuffer;
void sa_addpcmdata(void *_data_buf, int numChannels, int numBits, int ts)
{
char *data_buf = reinterpret_cast<char *>(_data_buf);
char tempdata[75*2] = {0};
__declspec(align(16)) float wavetrum[512];
//extern int sa_curmode;
int vis_Csa=sa_override ? 3 : sa_curmode;
switch (vis_Csa)
{
case 4:
tempdata[0] = 0;
tempdata[1] = 0;
sa_add(tempdata,ts,4);
return;
case 2:
makeOscData(tempdata,data_buf,576,numChannels, numBits);
sa_add(tempdata,ts,2);
return ;
case 3:
makeOscData(tempdata+75,data_buf,576,numChannels, numBits);
// fall through!
case 1:
calcVuData((unsigned char*)tempdata, data_buf, numChannels, numBits);
vu_add(tempdata, ts);
break;
}
bool done=false;
size_t samples=576;
while (samples)
{
unsigned int copied = saBuffer.AddToBuffer(data_buf, numChannels, numBits, ts, (unsigned int) samples);
samples-=copied;
data_buf+=(copied*(numBits/8)*numChannels);
if (saBuffer.Full())
{
saBuffer.WindowToFFTBuffer(wavetrum);
if (!done)
{
if (vis_Csa == 3)
{
makeSpecData((unsigned char*)tempdata, wavetrum);
sa_add(tempdata, ts, 0x80000003);
}
else if (vis_Csa == 1)
{
makeSpecData((unsigned char*)tempdata, wavetrum);
sa_add(tempdata, ts, 1);
}
}
//done=true;
saBuffer.CopyHalf();
ts+=MulDiv(SABUFFER_WINDOW_INCREMENT,1000,_srate);
}
}
}