#include "main.h" #include "SABuffer.h" #include #include "WinampAttributes.h" #include "fft.h" extern int _srate; #ifdef _M_IX86 __inline static int lrint(float flt) { int intgr; _asm { fld flt fistp intgr } return intgr; } #else __inline static int lrint(float flt) { return (int)flt; } #endif // quantizes to 23 bits - use appropriately inline static float fastmin(float x, const float b) { x = b - x; x += (float)fabs(x); x *= 0.5f; x = b - x; return x; } #define FASTMIN(x,b) { x = b - x; x += (float)fabs(x); x *= 0.5f; x = b - x; } inline static float fastclip(float x, const float a, const float b) { float x1 = (float)fabs(x-a); float x2 = (float)fabs(x-b); x = x1 + (a+b); x -= x2; x *= 0.5f; return (x); } void makeOscData(char *tempdata, char *data_buf, int little_block, int channels, int bits) { float dd = little_block/75.0f; int x,c; int stride=bits/8; // number of bytes between samples // we're calculating using only the most significant byte, // because we only end up with 6 bit data anyway // if you want full resolution, check out CVS tag BETA_2005_1122_182830, file: vis.c char *ptr, *sbuf = data_buf; for (x = 0; x < 75; x ++) { float val=0; int index =(int)((float)x * dd); // calculate the nearest sample for this point, interpolation is too expensive for this use ptr=&sbuf[index*stride*channels+stride-1]; // find first sample, and offset for little endian for (c=0;c= 0) { e = int (val); ret = val - (e - 1); ((*(1 + (int *) &ret)) &= ~(2047 << 20)) += (e + 1023) << 20; } else { e = int (val + 1023); ret = val - (e - 1024); ((*(1 + (int *) &ret)) &= ~(2047 << 20)) += e << 20; } return (ret); } // ~6 clocks on Pentium M vs. ~24 for single precision sqrtf #if !defined(_WIN64) static inline float squareroot_sse_11bits(float x) { float z; _asm { rsqrtss xmm0, x rcpss xmm0, xmm0 movss z, xmm0 // z ~= sqrt(x) to 0.038% } return z; } static inline int floor_int(double x) { int i; static const float round_toward_m_i = -0.5f; __asm { fld x fadd st, st(0) fadd round_toward_m_i fistp i sar i, 1 } return (i); } #endif /* static inline float hermite(float x, float y0, float y1, float y2, float y3) { // 4-point, 3rd-order Hermite (x-form) float c0 = y1; float c1 = 0.5f * (y2 - y0); float c2 = y0 - 2.5f * y1 + 2.f * y2 - 0.5f * y3; float c3 = 1.5f * (y1 - y2) + 0.5f * (y3 - y0); return ((c3 * x + c2) * x + c1) * x + c0; } */ /* static const float c_half = 0.5f; __declspec(naked) static float hermite(float frac_pos, const float* pntr) { __asm { push ecx; mov ecx, dword ptr[esp + 12]; ////////////////////////////////////////////////////////////////////////////////////////////////// add ecx, 0x04; // ST(0) ST(1) ST(2) ST(3) ST(4) ST(5) ST(6) ST(7) fld dword ptr [ecx+4]; // x1 fsub dword ptr [ecx-4]; // x1-xm1 fld dword ptr [ecx]; // x0 x1-xm1 fsub dword ptr [ecx+4]; // v x1-xm1 fld dword ptr [ecx+8]; // x2 v x1-xm1 fsub dword ptr [ecx]; // x2-x0 v x1-xm1 fxch st(2); // x1-m1 v x2-x0 fmul c_half; // c v x2-x0 fxch st(2); // x2-x0 v c fmul c_half; // 0.5*(x2-x0) v c fxch st(2); // c v 0.5*(x2-x0) fst st(3); // c v 0.5*(x2-x0) c fadd st(0), st(1); // w v 0.5*(x2-x0) c fxch st(2); // 0.5*(x2-x0) v w c faddp st(1), st(0); // v+.5(x2-x0) w c fadd st(0), st(1); // a w c fadd st(1), st(0); // a b_neg c fmul dword ptr [esp+8]; // a*frac b_neg c fsubrp st(1), st(0); // a*f-b c fmul dword ptr [esp+8]; // (a*f-b)*f c faddp st(1), st(0); // res-x0/f fmul dword ptr [esp+8]; // res-x0 fadd dword ptr [ecx]; // res pop ecx; ret; } } */ inline float hermite(float x, float y0, float y1, float y2, float y3) { // 4-point, 3rd-order Hermite (x-form) float c0 = y1; float c1 = 0.5f * (y2 - y0); float c3 = 1.5f * (y1 - y2) + 0.5f * (y3 - y0); float c2 = y0 - y1 + c1 - c3; return ((c3 * x + c2) * x + c1) * x + c0; } static inline float fpow2(const float y) { union { float f; int i; } c; int integer = lrint(floor(y)); /* cut: because we guarantee y>=0 if(y < 0) integer = integer-1; */ float frac = y - (float)integer; c.i = (integer+127) << 23; c.f *= 0.33977f*frac*frac + (1.0f-0.33977f)*frac + 1.0f; return c.f; } //#define SAPOW(x) (powf(2.f, (float)(x)/12.f)) #define SAPOW(x) (fpow2((float)(x)/12.f)) //#define WARP(x) ((powf(1.1f, (float)(x)/12.f) - 1.) * bla) #define WARP(x) ((SAPOW(x) - 1.f) * bla) void makeSpecData(unsigned char *tempdata, float *wavetrum) { //WARP(75); float bla = (255.f/SAPOW(75.f)); fft_9(wavetrum); float spec_scale=0.5; if (config_replaygain) { // benski> i'm sure there's some math identity we can use to optimize this. spec_scale/=pow(10.0f, config_replaygain_non_rg_gain.GetFloat() / 20.0f); } for (int i=0;i<256;i++) { //int lookup=2*i; float sinT = wavetrum[2*i]; float cosT = wavetrum[2*i+1]; wavetrum[i] = sqrt(sinT*sinT+cosT*cosT)*spec_scale; } float next = WARP(0)+1 ; for (int x = 0; x < 75; x ++) { //float prev = 1.+(pow(2.,(float)x/12.) -1.) * bla; float binF = next; next = WARP(x+1) +1; float thisValue = 0; int bin = lrint(floor(binF)); int end = lrint(floor(next)); end = min(end, 255); float mult = ((float)(bin+1))-binF; bool herm=true; do { if (bin == end) { mult = (next-binF); herm=true; } if (herm) { float C=0, D=0; if (bin<255) { C=wavetrum[bin+1]; if (bin<254) D=wavetrum[bin+2]; } //float samples[4] = { wavetrum[lookupA], wavetrum[lookupB], wavetrum[lookupC], wavetrum[lookupD] }; //thisValue += hermite(binF-bin, samples) * mult; thisValue += hermite(binF-bin, wavetrum[bin-1], wavetrum[bin], C, D) * mult; } else { thisValue += wavetrum[bin]; } herm=false; bin++; binF=(float)bin; } while (bin <= end); tempdata[x]=lrint(fastmin(thisValue, 255.f)); } } //////////////////////////////// SABuffer saBuffer; void sa_addpcmdata(void *_data_buf, int numChannels, int numBits, int ts) { char *data_buf = reinterpret_cast(_data_buf); char tempdata[75*2] = {0}; __declspec(align(16)) float wavetrum[512]; //extern int sa_curmode; int vis_Csa=sa_override ? 3 : sa_curmode; switch (vis_Csa) { case 4: tempdata[0] = 0; tempdata[1] = 0; sa_add(tempdata,ts,4); return; case 2: makeOscData(tempdata,data_buf,576,numChannels, numBits); sa_add(tempdata,ts,2); return ; case 3: makeOscData(tempdata+75,data_buf,576,numChannels, numBits); // fall through! case 1: calcVuData((unsigned char*)tempdata, data_buf, numChannels, numBits); vu_add(tempdata, ts); break; } bool done=false; size_t samples=576; while (samples) { unsigned int copied = saBuffer.AddToBuffer(data_buf, numChannels, numBits, ts, (unsigned int) samples); samples-=copied; data_buf+=(copied*(numBits/8)*numChannels); if (saBuffer.Full()) { saBuffer.WindowToFFTBuffer(wavetrum); if (!done) { if (vis_Csa == 3) { makeSpecData((unsigned char*)tempdata, wavetrum); sa_add(tempdata, ts, 0x80000003); } else if (vis_Csa == 1) { makeSpecData((unsigned char*)tempdata, wavetrum); sa_add(tempdata, ts, 1); } } //done=true; saBuffer.CopyHalf(); ts+=MulDiv(SABUFFER_WINDOW_INCREMENT,1000,_srate); } } }