winamp/Src/Winamp/classic_vis.cpp

#include "main.h"
#include "SABuffer.h"
#include <math.h>
#include "WinampAttributes.h"
#include "fft.h"
extern int _srate;
#ifdef _M_IX86
__inline static int lrint(float flt)
{
	int intgr;

	_asm
	{
		fld flt
		fistp intgr
	}

	return intgr;
}
#else
__inline static int lrint(float flt)
{
	return (int)flt;
}
#endif


//  quantizes to 23 bits - use appropriately
inline static float fastmin(float x, const float b)
{
	x = b - x;
	x += (float)fabs(x);
	x *= 0.5f;
	x = b - x;
	return x;
}
#define FASTMIN(x,b) { x = b - x;   x += (float)fabs(x);   x *= 0.5f;   x = b - x; }
inline static float fastclip(float x, const float a, const float b)
{
	float x1 = (float)fabs(x-a);
	float x2 = (float)fabs(x-b);
	x = x1 + (a+b);
	x -= x2;
	x *= 0.5f;
	return (x);
}


void makeOscData(char *tempdata, char *data_buf, int little_block, int channels, int bits)
{
	float dd = little_block/75.0f;
	int x,c;
	int stride=bits/8; // number of bytes between samples

	// we're calculating using only the most significant byte,
	// because we only end up with 6 bit data anyway
	// if you want full resolution, check out CVS tag BETA_2005_1122_182830, file: vis.c
	char *ptr, *sbuf = data_buf;
	for (x = 0; x < 75; x ++)
	{
		float val=0;
		int index =(int)((float)x * dd);  // calculate the nearest sample for this point, interpolation is too expensive for this use
		ptr=&sbuf[index*stride*channels+stride-1]; // find first sample, and offset for little endian
		for (c=0;c<channels;c++)
		{
			val += (float)*ptr / 8.0f; // we want our final value to be -32 to 32
			ptr+=stride; // jump to the next sample (channels are interleaved)
		}
		tempdata[x] = (char)lrint(val / (float)channels);  // average the channels
	}
}


inline double fast_exp2(const double val)
{
	int    e;
	double ret;

	if (val >= 0)
	{
		e = int (val);
		ret = val - (e - 1);
		((*(1 + (int *) &ret)) &= ~(2047 << 20)) += (e + 1023) << 20;
	}
	else
	{
		e = int (val + 1023);
		ret = val - (e - 1024);
		((*(1 + (int *) &ret)) &= ~(2047 << 20)) += e << 20;
	}
	return (ret);
}

// ~6 clocks on Pentium M vs. ~24 for single precision sqrtf
#if !defined(_WIN64)
static inline float squareroot_sse_11bits(float x)
{
	float z;
	_asm
	{
		rsqrtss xmm0, x
		rcpss    xmm0, xmm0
		movss    z, xmm0            // z ~= sqrt(x) to 0.038%
	}
	return z;
}

static inline int floor_int(double x)
{
	int      i;
	static const float round_toward_m_i = -0.5f;
	__asm
	{
		fld      x
		fadd     st, st(0)
		fadd     round_toward_m_i
		fistp    i
		sar      i, 1
	}

	return (i);
}
#endif
/*
static inline float hermite(float x, float y0, float y1, float y2, float y3)
{
	// 4-point, 3rd-order Hermite (x-form)
	float c0 = y1;
	float c1 = 0.5f * (y2 - y0);
	float c2 = y0 - 2.5f * y1 + 2.f * y2 - 0.5f * y3;
	float c3 = 1.5f * (y1 - y2) + 0.5f * (y3 - y0);

	return ((c3 * x + c2) * x + c1) * x + c0;
}
*/

/*
static const float c_half = 0.5f;
__declspec(naked) static float hermite(float frac_pos, const float* pntr)
{
	__asm
	{
		push    ecx;
		mov     ecx, dword ptr[esp + 12]; //////////////////////////////////////////////////////////////////////////////////////////////////
		add     ecx, 0x04;            //    ST(0)        ST(1)        ST(2)        ST(3)        ST(4)        ST(5)        ST(6)        ST(7)
		fld     dword ptr [ecx+4];    //    x1
		fsub    dword ptr [ecx-4];    //    x1-xm1
		fld     dword ptr [ecx];      //    x0           x1-xm1
		fsub    dword ptr [ecx+4];    //    v            x1-xm1
		fld     dword ptr [ecx+8];    //    x2           v            x1-xm1
		fsub    dword ptr [ecx];      //    x2-x0        v            x1-xm1
		fxch    st(2);                //    x1-m1        v            x2-x0
		fmul    c_half;               //    c            v            x2-x0
		fxch    st(2);                //    x2-x0        v            c
		fmul    c_half;               //    0.5*(x2-x0)  v            c
		fxch    st(2);                //    c            v            0.5*(x2-x0)
		fst     st(3);                //    c            v            0.5*(x2-x0)    c
		fadd    st(0), st(1);         //    w            v            0.5*(x2-x0)    c
		fxch    st(2);                //    0.5*(x2-x0)  v            w              c
		faddp   st(1), st(0);         //    v+.5(x2-x0)  w            c
		fadd    st(0), st(1);         //    a            w            c
		fadd    st(1), st(0);         //    a            b_neg        c
		fmul    dword ptr [esp+8];    //    a*frac       b_neg        c
		fsubrp  st(1), st(0);         //    a*f-b        c
		fmul    dword ptr [esp+8];    //    (a*f-b)*f    c
		faddp   st(1), st(0);         //    res-x0/f
		fmul    dword ptr [esp+8];    //    res-x0
		fadd    dword ptr [ecx];      //    res
		pop     ecx;
		ret;
	}
}
*/
inline float hermite(float x, float y0, float y1, float y2, float y3)
{
    // 4-point, 3rd-order Hermite (x-form)
    float c0 = y1;
    float c1 = 0.5f * (y2 - y0);
    float c3 = 1.5f * (y1 - y2) + 0.5f * (y3 - y0);
    float c2 = y0 - y1 + c1 - c3;

    return ((c3 * x + c2) * x + c1) * x + c0;
}

static inline float fpow2(const float y)
{
    union
    {
        float f;
        int i;
    } c;

    int integer = lrint(floor(y));
		/* cut: because we guarantee y>=0
    if(y < 0)
        integer = integer-1;
		*/

    float frac = y - (float)integer;

    c.i = (integer+127) << 23;
    c.f *= 0.33977f*frac*frac + (1.0f-0.33977f)*frac + 1.0f;

    return c.f;
}

//#define SAPOW(x) (powf(2.f, (float)(x)/12.f))
#define SAPOW(x) (fpow2((float)(x)/12.f))
//#define WARP(x) ((powf(1.1f, (float)(x)/12.f) - 1.) * bla)
#define WARP(x) ((SAPOW(x) - 1.f) * bla)
void makeSpecData(unsigned char *tempdata, float *wavetrum)
{
	//WARP(75);
	float bla = (255.f/SAPOW(75.f));
	fft_9(wavetrum);

	float spec_scale=0.5;
	if (config_replaygain)
	{ // benski> i'm sure there's some math identity we can use to optimize this.
		spec_scale/=pow(10.0f, config_replaygain_non_rg_gain.GetFloat() / 20.0f);
	}

	for (int i=0;i<256;i++)
	{
		//int lookup=2*i;
		float sinT = wavetrum[2*i];
		float cosT = wavetrum[2*i+1];
		wavetrum[i] = sqrt(sinT*sinT+cosT*cosT)*spec_scale;
	}

	float next = WARP(0)+1 ;
	for (int x = 0; x < 75; x ++)
	{
		//float prev = 1.+(pow(2.,(float)x/12.) -1.) * bla;
		float binF = next;
		next = WARP(x+1) +1;

		float thisValue = 0;
		int bin = lrint(floor(binF));
		int end = lrint(floor(next));
		end = min(end, 255);
		float mult = ((float)(bin+1))-binF;
		bool herm=true;
		do
		{
			if (bin == end)
			{
				mult = (next-binF);
				herm=true;
			}

			if (herm)
			{				
				float C=0, D=0;
				if (bin<255)
				{
					C=wavetrum[bin+1];
					if (bin<254)
						D=wavetrum[bin+2];
				}

				//float samples[4] = { wavetrum[lookupA], wavetrum[lookupB], wavetrum[lookupC], wavetrum[lookupD] };
				//thisValue += hermite(binF-bin, samples) * mult;
				thisValue += hermite(binF-bin, wavetrum[bin-1], wavetrum[bin], C, D) * mult;
			}
			else
			{
				thisValue += wavetrum[bin];
			}

			herm=false;
			bin++;
			binF=(float)bin;
		}
		while (bin <= end);

		tempdata[x]=lrint(fastmin(thisValue, 255.f));
	}

}

////////////////////////////////

SABuffer saBuffer;

void sa_addpcmdata(void *_data_buf, int numChannels, int numBits, int ts)
{
	char *data_buf = reinterpret_cast<char *>(_data_buf);
	char tempdata[75*2] = {0};
	__declspec(align(16)) float wavetrum[512];
	//extern int sa_curmode;
	int vis_Csa=sa_override ? 3 : sa_curmode;

	switch (vis_Csa)
	{
	case 4:
		tempdata[0] = 0;
		tempdata[1] = 0;
		sa_add(tempdata,ts,4);
		return;
	case 2:
		makeOscData(tempdata,data_buf,576,numChannels, numBits);
		sa_add(tempdata,ts,2);
		return ;
	case 3:
		makeOscData(tempdata+75,data_buf,576,numChannels, numBits);
		// fall through!
	case 1:
		calcVuData((unsigned char*)tempdata, data_buf, numChannels, numBits);
		vu_add(tempdata, ts);
		break;
	}
	bool done=false;
	size_t samples=576;
	while (samples)
	{
		unsigned int copied = saBuffer.AddToBuffer(data_buf, numChannels, numBits, ts, (unsigned int) samples);
		samples-=copied;
		data_buf+=(copied*(numBits/8)*numChannels);
		if (saBuffer.Full())
		{
			saBuffer.WindowToFFTBuffer(wavetrum);
			if (!done)
			{
				if (vis_Csa == 3)
				{
					makeSpecData((unsigned char*)tempdata, wavetrum);
					sa_add(tempdata, ts, 0x80000003);
				}
				else if (vis_Csa == 1)
				{
					makeSpecData((unsigned char*)tempdata, wavetrum);
					sa_add(tempdata, ts, 1);
				}
			}
			//done=true;
			saBuffer.CopyHalf();
			ts+=MulDiv(SABUFFER_WINDOW_INCREMENT,1000,_srate);
		}
	}
}
Initial community commit 2024-09-24 12:54:57 +00:00			`#include "main.h"`
			`#include "SABuffer.h"`
			`#include <math.h>`
			`#include "WinampAttributes.h"`
			`#include "fft.h"`
			`extern int _srate;`
			`#ifdef _M_IX86`
			`__inline static int lrint(float flt)`
			`{`
			`int intgr;`

			`_asm`
			`{`
			`fld flt`
			`fistp intgr`
			`}`

			`return intgr;`
			`}`
			`#else`
			`__inline static int lrint(float flt)`
			`{`
			`return (int)flt;`
			`}`
			`#endif`


			`// quantizes to 23 bits - use appropriately`
			`inline static float fastmin(float x, const float b)`
			`{`
			`x = b - x;`
			`x += (float)fabs(x);`
			`x *= 0.5f;`
			`x = b - x;`
			`return x;`
			`}`
			`#define FASTMIN(x,b) { x = b - x; x += (float)fabs(x); x *= 0.5f; x = b - x; }`
			`inline static float fastclip(float x, const float a, const float b)`
			`{`
			`float x1 = (float)fabs(x-a);`
			`float x2 = (float)fabs(x-b);`
			`x = x1 + (a+b);`
			`x -= x2;`
			`x *= 0.5f;`
			`return (x);`
			`}`


			`void makeOscData(char tempdata, char data_buf, int little_block, int channels, int bits)`
			`{`
			`float dd = little_block/75.0f;`
			`int x,c;`
			`int stride=bits/8; // number of bytes between samples`

			`// we're calculating using only the most significant byte,`
			`// because we only end up with 6 bit data anyway`
			`// if you want full resolution, check out CVS tag BETA_2005_1122_182830, file: vis.c`
			`char ptr, sbuf = data_buf;`
			`for (x = 0; x < 75; x ++)`
			`{`
			`float val=0;`
			`int index =(int)((float)x * dd); // calculate the nearest sample for this point, interpolation is too expensive for this use`
			`ptr=&sbuf[indexstridechannels+stride-1]; // find first sample, and offset for little endian`
			`for (c=0;c<channels;c++)`
			`{`
			`val += (float)*ptr / 8.0f; // we want our final value to be -32 to 32`
			`ptr+=stride; // jump to the next sample (channels are interleaved)`
			`}`
			`tempdata[x] = (char)lrint(val / (float)channels); // average the channels`
			`}`
			`}`



			`inline double fast_exp2(const double val)`
			`{`
			`int e;`
			`double ret;`

			`if (val >= 0)`
			`{`
			`e = int (val);`
			`ret = val - (e - 1);`
			`(((1 + (int ) &ret)) &= ~(2047 << 20)) += (e + 1023) << 20;`
			`}`
			`else`
			`{`
			`e = int (val + 1023);`
			`ret = val - (e - 1024);`
			`(((1 + (int ) &ret)) &= ~(2047 << 20)) += e << 20;`
			`}`
			`return (ret);`
			`}`

			`// ~6 clocks on Pentium M vs. ~24 for single precision sqrtf`
			`#if !defined(_WIN64)`
			`static inline float squareroot_sse_11bits(float x)`
			`{`
			`float z;`
			`_asm`
			`{`
			`rsqrtss xmm0, x`
			`rcpss xmm0, xmm0`
			`movss z, xmm0 // z ~= sqrt(x) to 0.038%`
			`}`
			`return z;`
			`}`

			`static inline int floor_int(double x)`
			`{`
			`int i;`
			`static const float round_toward_m_i = -0.5f;`
			`__asm`
			`{`
			`fld x`
			`fadd st, st(0)`
			`fadd round_toward_m_i`
			`fistp i`
			`sar i, 1`
			`}`

			`return (i);`
			`}`
			`#endif`
			`/*`
			`static inline float hermite(float x, float y0, float y1, float y2, float y3)`
			`{`
			`// 4-point, 3rd-order Hermite (x-form)`
			`float c0 = y1;`
			`float c1 = 0.5f * (y2 - y0);`
			`float c2 = y0 - 2.5f * y1 + 2.f * y2 - 0.5f * y3;`
			`float c3 = 1.5f * (y1 - y2) + 0.5f * (y3 - y0);`

			`return ((c3 * x + c2) * x + c1) * x + c0;`
			`}`
			`*/`

			`/*`
			`static const float c_half = 0.5f;`
			`__declspec(naked) static float hermite(float frac_pos, const float* pntr)`
			`{`
			`__asm`
			`{`
			`push ecx;`
			`mov ecx, dword ptr[esp + 12]; //////////////////////////////////////////////////////////////////////////////////////////////////`
			`add ecx, 0x04; // ST(0) ST(1) ST(2) ST(3) ST(4) ST(5) ST(6) ST(7)`
			`fld dword ptr [ecx+4]; // x1`
			`fsub dword ptr [ecx-4]; // x1-xm1`
			`fld dword ptr [ecx]; // x0 x1-xm1`
			`fsub dword ptr [ecx+4]; // v x1-xm1`
			`fld dword ptr [ecx+8]; // x2 v x1-xm1`
			`fsub dword ptr [ecx]; // x2-x0 v x1-xm1`
			`fxch st(2); // x1-m1 v x2-x0`
			`fmul c_half; // c v x2-x0`
			`fxch st(2); // x2-x0 v c`
			`fmul c_half; // 0.5*(x2-x0) v c`
			`fxch st(2); // c v 0.5*(x2-x0)`
			`fst st(3); // c v 0.5*(x2-x0) c`
			`fadd st(0), st(1); // w v 0.5*(x2-x0) c`
			`fxch st(2); // 0.5*(x2-x0) v w c`
			`faddp st(1), st(0); // v+.5(x2-x0) w c`
			`fadd st(0), st(1); // a w c`
			`fadd st(1), st(0); // a b_neg c`
			`fmul dword ptr [esp+8]; // a*frac b_neg c`
			`fsubrp st(1), st(0); // a*f-b c`
			`fmul dword ptr [esp+8]; // (af-b)f c`
			`faddp st(1), st(0); // res-x0/f`
			`fmul dword ptr [esp+8]; // res-x0`
			`fadd dword ptr [ecx]; // res`
			`pop ecx;`
			`ret;`
			`}`
			`}`
			`*/`
			`inline float hermite(float x, float y0, float y1, float y2, float y3)`
			`{`
			`// 4-point, 3rd-order Hermite (x-form)`
			`float c0 = y1;`
			`float c1 = 0.5f * (y2 - y0);`
			`float c3 = 1.5f * (y1 - y2) + 0.5f * (y3 - y0);`
			`float c2 = y0 - y1 + c1 - c3;`

			`return ((c3 * x + c2) * x + c1) * x + c0;`
			`}`

			`static inline float fpow2(const float y)`
			`{`
			`union`
			`{`
			`float f;`
			`int i;`
			`} c;`

			`int integer = lrint(floor(y));`
			`/* cut: because we guarantee y>=0`
			`if(y < 0)`
			`integer = integer-1;`
			`*/`

			`float frac = y - (float)integer;`

			`c.i = (integer+127) << 23;`
			`c.f = 0.33977ffracfrac + (1.0f-0.33977f)frac + 1.0f;`

			`return c.f;`
			`}`

			`//#define SAPOW(x) (powf(2.f, (float)(x)/12.f))`
			`#define SAPOW(x) (fpow2((float)(x)/12.f))`
			`//#define WARP(x) ((powf(1.1f, (float)(x)/12.f) - 1.) * bla)`
			`#define WARP(x) ((SAPOW(x) - 1.f) * bla)`
			`void makeSpecData(unsigned char tempdata, float wavetrum)`
			`{`
			`//WARP(75);`
			`float bla = (255.f/SAPOW(75.f));`
			`fft_9(wavetrum);`

			`float spec_scale=0.5;`
			`if (config_replaygain)`
			`{ // benski> i'm sure there's some math identity we can use to optimize this.`
			`spec_scale/=pow(10.0f, config_replaygain_non_rg_gain.GetFloat() / 20.0f);`
			`}`

			`for (int i=0;i<256;i++)`
			`{`
			`//int lookup=2*i;`
			`float sinT = wavetrum[2*i];`
			`float cosT = wavetrum[2*i+1];`
			`wavetrum[i] = sqrt(sinTsinT+cosTcosT)*spec_scale;`
			`}`

			`float next = WARP(0)+1 ;`
			`for (int x = 0; x < 75; x ++)`
			`{`
			`//float prev = 1.+(pow(2.,(float)x/12.) -1.) * bla;`
			`float binF = next;`
			`next = WARP(x+1) +1;`

			`float thisValue = 0;`
			`int bin = lrint(floor(binF));`
			`int end = lrint(floor(next));`
			`end = min(end, 255);`
			`float mult = ((float)(bin+1))-binF;`
			`bool herm=true;`
			`do`
			`{`
			`if (bin == end)`
			`{`
			`mult = (next-binF);`
			`herm=true;`
			`}`

			`if (herm)`
			`{`
			`float C=0, D=0;`
			`if (bin<255)`
			`{`
			`C=wavetrum[bin+1];`
			`if (bin<254)`
			`D=wavetrum[bin+2];`
			`}`

			`//float samples[4] = { wavetrum[lookupA], wavetrum[lookupB], wavetrum[lookupC], wavetrum[lookupD] };`
			`//thisValue += hermite(binF-bin, samples) * mult;`
			`thisValue += hermite(binF-bin, wavetrum[bin-1], wavetrum[bin], C, D) * mult;`
			`}`
			`else`
			`{`
			`thisValue += wavetrum[bin];`
			`}`

			`herm=false;`
			`bin++;`
			`binF=(float)bin;`
			`}`
			`while (bin <= end);`

			`tempdata[x]=lrint(fastmin(thisValue, 255.f));`
			`}`

			`}`

			`////////////////////////////////`

			`SABuffer saBuffer;`

			`void sa_addpcmdata(void *_data_buf, int numChannels, int numBits, int ts)`
			`{`
			`char data_buf = reinterpret_cast<char >(_data_buf);`
			`char tempdata[75*2] = {0};`
			`__declspec(align(16)) float wavetrum[512];`
			`//extern int sa_curmode;`
			`int vis_Csa=sa_override ? 3 : sa_curmode;`

			`switch (vis_Csa)`
			`{`
			`case 4:`
			`tempdata[0] = 0;`
			`tempdata[1] = 0;`
			`sa_add(tempdata,ts,4);`
			`return;`
			`case 2:`
			`makeOscData(tempdata,data_buf,576,numChannels, numBits);`
			`sa_add(tempdata,ts,2);`
			`return ;`
			`case 3:`
			`makeOscData(tempdata+75,data_buf,576,numChannels, numBits);`
			`// fall through!`
			`case 1:`
			`calcVuData((unsigned char*)tempdata, data_buf, numChannels, numBits);`
			`vu_add(tempdata, ts);`
			`break;`
			`}`
			`bool done=false;`
			`size_t samples=576;`
			`while (samples)`
			`{`
			`unsigned int copied = saBuffer.AddToBuffer(data_buf, numChannels, numBits, ts, (unsigned int) samples);`
			`samples-=copied;`
			`data_buf+=(copied(numBits/8)numChannels);`
			`if (saBuffer.Full())`
			`{`
			`saBuffer.WindowToFFTBuffer(wavetrum);`
			`if (!done)`
			`{`
			`if (vis_Csa == 3)`
			`{`
			`makeSpecData((unsigned char*)tempdata, wavetrum);`
			`sa_add(tempdata, ts, 0x80000003);`
			`}`
			`else if (vis_Csa == 1)`
			`{`
			`makeSpecData((unsigned char*)tempdata, wavetrum);`
			`sa_add(tempdata, ts, 1);`
			`}`
			`}`
			`//done=true;`
			`saBuffer.CopyHalf();`
			`ts+=MulDiv(SABUFFER_WINDOW_INCREMENT,1000,_srate);`
			`}`
			`}`
			`}`