//=============================================================================== // Copyright (c) 2007-2016 Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2004-2006 ATI Technologies Inc. //=============================================================================== // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and / or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions : // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. // #include #include #include #include #include "common.h" #include "3dquant_constants.h" #include "3dquant_vpc.h" #include "bc7_definitions.h" #include "debug.h" #include #ifdef BC7_DEBUG_TO_RESULTS_TXT FILE *fp; #endif #define EPSILON 0.000001 #define MAX_TRY 20 #undef TRACE #define MAX_TRACE 250000 struct TRACE { int k; double d; }; static int trcnts[MAX_CLUSTERS][MAX_ENTRIES_QUANT_TRACE]; #define USE_TRACE_WITH_DYNAMIC_MEM #ifdef USE_TRACE_WITH_DYNAMIC_MEM int* amd_codes[MAX_CLUSTERS][MAX_ENTRIES_QUANT_TRACE] = {}; TRACE* amd_trs[MAX_CLUSTERS][MAX_ENTRIES_QUANT_TRACE] = {}; #else int amd_codes[MAX_CLUSTERS][MAX_ENTRIES_QUANT_TRACE][MAX_TRACE]; TRACE amd_trs[MAX_CLUSTERS][MAX_ENTRIES_QUANT_TRACE][MAX_TRACE]; #endif static int g_Quant_init = 0; void traceBuilder (int numEntries, int numClusters,struct TRACE tr [], int code[], int *trcnt ); std::mutex mtx; void Quant_Init(void) { if (g_Quant_init > 0) { g_Quant_init++; return; } if (amd_codes[0][0]) return; mtx.lock(); for ( int numClusters = 0; numClusters < MAX_CLUSTERS; numClusters++ ) { for ( int numEntries = 0; numEntries < MAX_ENTRIES_QUANT_TRACE; numEntries++ ) { #ifdef USE_TRACE_WITH_DYNAMIC_MEM amd_codes[ numClusters][ numEntries ] = new int[ MAX_TRACE ]; amd_trs[ numClusters ][ numEntries ] = new TRACE[ MAX_TRACE ]; assert(amd_codes[ numClusters][ numEntries ]); assert(amd_trs[ numClusters ][ numEntries ]); #endif traceBuilder ( numEntries+1, numClusters+1, amd_trs[numClusters][numEntries], amd_codes[numClusters][numEntries], trcnts[numClusters]+(numEntries)); } } init_ramps(); g_Quant_init++; mtx.unlock(); } void Quant_DeInit(void) { // gpuopen issue 242 quick fix. We are not freeing memory to improve BC7 compression performance return; // g_Quant_init--; // if (g_Quant_init > 1) { // return; // } else { // g_Quant_init = 0; // Reset in case user called Quant_DeInit too many times without matching Quant_Init // if (amd_codes[0][0] == nullptr) return; // #ifdef USE_TRACE_WITH_DYNAMIC_MEM // for (int i = 0; i < MAX_CLUSTERS; i++) { // for (int j = 0; j < MAX_ENTRIES_QUANT_TRACE; j++) { // if (amd_codes[i][j]) { // delete[] amd_codes[i][j]; // amd_codes[i][j] = nullptr; // } // if (amd_trs[i][j]) { // delete[] amd_trs[i][j]; // amd_trs[i][j] = nullptr; // } // } // } // #endif // } } //========================================================================================= void sugar(void) { #ifdef USE_DBGTRACE DbgTrace(("sugar!")) #endif }; // called many times Optimize this call! inline int a_compare( const void *arg1, const void *arg2 ) { // #ifdef USE_DBGTRACE // DbgTrace(()); // #endif if (((a* )arg1)->d-((a* )arg2)->d > 0 ) return 1; if (((a* )arg1)->d-((a* )arg2)->d < 0 ) return -1; return 0; }; // // We ignore the issue of ordering equal elements here, though it can affect results abit // void sortProjection(double projection[MAX_ENTRIES], int order[MAX_ENTRIES], int numEntries) { int i; a what[MAX_ENTRIES+MAX_PARTITIONS_TABLE]; for (i=0; i < numEntries; i++) what[what[i].i=i].d = projection[i]; qsort((void*)&what, numEntries, sizeof(a),a_compare); for (i=0; i < numEntries; i++) order[i]=what[i].i; }; void covariance(double data[][DIMENSION], int numEntries, double cov[DIMENSION][DIMENSION]) { #ifdef USE_DBGTRACE DbgTrace(()); #endif int i,j,k; for(i=0; i0); p = p >0 ? p : 1; q = (EV_ITERATION_NUMBER+p-1) / p; l=0; for(n=0; n maxDiag ? c[l][i][i] : maxDiag; if (maxDiag<=0) { sugar(); return; } assert(maxDiag > 0); for(i=0; i maxDiag ? i : k; maxDiag = c[l][i][i] > maxDiag ? c[l][i][i] : maxDiag; } double t; t=0; for(i=0; i0); if (t<=0) { sugar(); return; } for(i=0; i0); p = p >0 ? p : 1; q = (EV_ITERATION_NUMBER+p-1) / p; l=0; for(n=0; n maxDiag ? c[l][i][i] : maxDiag; if (maxDiag<=0) { sugar(); return; } assert(maxDiag >0); for(i=0; i maxDiag ? i : k; maxDiag = c[l][i][i] > maxDiag ? c[l][i][i] : maxDiag; } double t; t=0; for(i=0; i0); if (t<=0) { sugar(); return; } for(i=0; i get_partition_subset() is not implemented data is global")); #endif int i,j,k; double cov[2][DIMENSION][DIMENSION]; double center[2][DIMENSION]; double cnt[2] = {0,0}; double vector[2][DIMENSION]; double acc=0; for(k=0; k0); k = order[--cluster[level-1]]; s=(cluster[level-1]-cluster[level-2]) == 0 ? 0: 1/sqrt( (double) (cluster[level-1]-cluster[level-2])); // see cluster_ decl for // cluster[-1] value t=1/sqrt((double) (numEntries-cluster[level-1])); for(i=0; inError) { nError=t; for(i=0; i<=level; i++) bestCluster[i]=cluster[i]; } if (level < numClusters - 1 ) { // go up level++; for(i=0; i either // though normally it should not happen // However, the EPSILON should be scaled, otherwise is does not make sense t = sqrt(t)*EPSILON; project(centered, numEntries, direction, projected); for (j=1; j < numEntries; j++) if (projected[order[j]] < projected[order[j-1]]-t /*EPSILON*/) break; if (j >= numEntries) break; } sortProjection(projected, order, numEntries); for (k=0; k either // though normally it should not happen // However, the EPSILON should be scaled, otherwise is does not make sense t = sqrt(t)*EPSILON; project(centered, numEntries, direction, projected); for (j=1; j < numEntries; j++) if (projected[order[j]] < projected[order[j-1]]-t /*EPSILON*/) break; if (j >= numEntries) break; } sortProjection(projected, order, numEntries); quantLineConstr(centered, order, numEntries, numClusters, index); } double gcAcc[MAX_CLUSTERS][DIMENSION]; double gcSAcc[MAX_CLUSTERS][DIMENSION]; double gcS[MAX_CLUSTERS]; for(i=0; i M) {k=i;M=c;};}; for (i=0; i+15=MAX_TRACE)) { // NP return; } k = code[k]; i=0; for (j=0; j>=1; } index[j]=i; k>>=1; } } void quantTrace_d(double data[MAX_ENTRIES_QUANT_TRACE][MAX_DIMENSION_BIG],int numEntries, int numClusters, int index[MAX_ENTRIES_QUANT_TRACE],int dimension) { #ifdef USE_DBGTRACE DbgTrace(()); #endif // Data should be centered, otherwise will not work int i,j,k; double sdata[2*MAX_ENTRIES][MAX_DIMENSION_BIG]; double dpAcc [MAX_DIMENSION_BIG]; double M =0; struct TRACE *tr ; tr=amd_trs[numClusters-1][numEntries-1]; int trcnt =trcnts[numClusters-1][numEntries-1]; int *code; code=amd_codes[numClusters-1][numEntries-1]; for (i=0; i M) {k=i;M=c;};\ }; #define UROLL_STEP_2(i) \ dpAcc[0]+=sdata[tr[i].k][0];\ dpAcc[1]+=sdata[tr[i].k][1];\ { double c; \ c = (dpAcc[0]*dpAcc[0]+dpAcc[1]*dpAcc[1])*tr[i].d;\ if (c > M) {k=i;M=c;};}; #define UROLL_STEP_3(i) \ dpAcc[0]+=sdata[tr[i].k][0];\ dpAcc[1]+=sdata[tr[i].k][1];\ dpAcc[2]+=sdata[tr[i].k][2];\ { double c; \ c = (dpAcc[0]*dpAcc[0]+dpAcc[1]*dpAcc[1]+dpAcc[2]*dpAcc[2])*tr[i].d;\ if (c > M) {k=i;M=c;};}; #define UROLL_STEP_4(i) \ dpAcc[0]+=sdata[tr[i].k][0];\ dpAcc[1]+=sdata[tr[i].k][1];\ dpAcc[2]+=sdata[tr[i].k][2];\ dpAcc[3]+=sdata[tr[i].k][3];\ { double c; \ c = (dpAcc[0]*dpAcc[0]+dpAcc[1]*dpAcc[1]+dpAcc[2]*dpAcc[2]+dpAcc[3]*dpAcc[3])*tr[i].d;\ if (c > M) {k=i;M=c;};}; #undef UROLL_STEP #define UROLL_MACRO(UROLL_STEP){\ \ \ for (i=0;i+15>=1; } index[j]=i; k>>=1; } } void quant_AnD_Shell(double* v_, int k, int n, int *idx) { // input: // // v_ points, might be uncentered // k - number of points in the ramp // n - number of points in v_ // // output: // // index, uncentered, in the range 0..k-1 // #define MAX_BLOCK MAX_ENTRIES int i,j; double v[MAX_BLOCK]; double z[MAX_BLOCK]; a d[MAX_BLOCK]; double l; double mm; double r=0; int mi; assert((v_ != NULL) && (n>1) && (k>1)); double m, M, s, dm=0.; m=M=v_[0]; for (i=1; i < n; i++) { m = m < v_[i] ? m : v_[i]; M = M > v_[i] ? M : v_[i]; } if (M==m) { for (i=0; i < n; i++) idx[i]=0; return; } assert(M-m >0); s = (k-1)/(M-m); for (i=0; i < n; i++) { v[i] = v_[i]*s; idx[i]=(int)(z[i] = floor(v[i] +0.5 /* stabilizer*/ - m *s)); d[i].d = v[i]-z[i]- m *s; d[i].i = i; dm+= d[i].d; r += d[i].d*d[i].d; } if (n*r- dm*dm >= (double)(n-1)/4 /*slack*/ /2) { dm /= (double)n; for (i=0; i < n; i++) d[i].d -= dm; qsort((void*)&d, n, sizeof(a),a_compare); // got into fundamental simplex // move coordinate system origin to its center for (i=0; i < n; i++) d[i].d -= (2.*(double)i+1-(double)n)/2./(double)n; mm=l=0.; j=-1; for (i=0; i < n; i++) { l+=d[i].d; if (l < mm) { mm =l; j=i; } } // position which should be in 0 j = ++j % n; for (i=j; i < n; i++) idx[d[i].i]++; } // get rid of an offset in idx mi=idx[0]; for (i=1; i < n; i++) mi = mi < idx[i]? mi :idx[i]; for (i=0; i < n; i++) idx[i]-=mi; } double optQuantTrace( double data[MAX_ENTRIES][DIMENSION], int numEntries, int numClusters, int index_[MAX_ENTRIES], double out[MAX_ENTRIES][DIMENSION], double direction [DIMENSION],double *step ) { #ifdef USE_DBGTRACE DbgTrace(()); #endif int index[MAX_ENTRIES]; int maxTry=MAX_TRY; int i,j,k; double t,s; double centered[MAX_ENTRIES][DIMENSION]; double ordered[MAX_ENTRIES][DIMENSION]; double mean[DIMENSION]; double cov[DIMENSION][DIMENSION]; double projected[MAX_ENTRIES]; int order[MAX_ENTRIES]; for (i=0; i either // though normally it should not happen // However, the EPSILON should be scaled, otherwise is does not make sense t = sqrt(t)*EPSILON; project(centered, numEntries, direction, projected); for (j=1; j < numEntries; j++) if (projected[order[j]] < projected[order[j-1]]-t /*EPSILON*/) break; if (j >= numEntries) break; } sortProjection(projected, order, numEntries); for (k=0; k either // though normally it should not happen // However, the EPSILON should be scaled, otherwise is does not make sense t = sqrt(t)*EPSILON; project_d(centered, numEntries, direction, projected, dimension); for (j=1; j < numEntries; j++) if (projected[order[j]] < projected[order[j-1]]-t /*EPSILON*/) break; if (j >= numEntries) break; } sortProjection(projected, order, numEntries); for (k=0; k1 ) { #ifdef USE_DBGTRACE DbgTrace(("Driving trace generation error")); for (p=0; p= numEntries) { rescan =1; h[cn]++; h[cn-1]--; } else { q2+= -2*cn+1; q--; { int i1,cc=0; for(i1=0; i1=numEntries || cn <1 || cn >= numClusters || h[cn] < 0 || h[cn-1]>= numEntries) DbgTrace(("tre1 %d %d %d %d %d %d",ci,cn,numEntries,numClusters,h[cn],h[cn-1])); #endif cd |= (1<= numEntries) { rescan =1; h[cn]++; h[cn+1]--; } else { q2+= 2*cn+1; q++; { int i1,cc=0; for(i1=0; i1=numEntries || cn >= numClusters-1 || h[cn] < 0 || h[cn+1]>= numEntries) DbgTrace(("tre2 %d %d %d %d %d %d",ci,cn,numEntries,numClusters,h[cn],h[cn+1])); #endif cd |= (1< (k+0.5 -s)*t && k < numClusters-1) k++; index__[order_[j]]=k; } done =1; for (j=0; j < numEntries; j++) { done = (done && (index__[j]==index[j])); index[j]=index__[j]; } } while (! done && try_two--); if (i==1) for (j=0; j < numEntries; j++) index_[j]=index[j]; else { done =1; for (j=0; j < numEntries; j++) { done = (done && (index_[j]==index[j])); index_[j]=index_[j]; } if (done) break; } } quant_AnD_Shell_Calls++; quant_AnD_Shell(projected, numClusters,numEntries, index); } #ifdef USE_DBGTRACE DbgTrace(("->quant_AnD_Shell [%2d]",quant_AnD_Shell_Calls)); #endif s=t=0; double q=0; for (k=0; k (k+0.5 -s)*t && k < numClusters-1) k++; index__[order_[j]]=k; } done =1; for (j=0; j < numEntries; j++) { done = (done && (index__[j]==index[j])); index[j]=index__[j]; } } while (! done && try_two--); if (i==1) for (j=0; j < numEntries; j++) index_[j]=index[j]; else { done =1; for (j=0; j < numEntries; j++) { done = (done && (index_[j]==index[j])); index_[j]=index_[j]; } if (done) break; } } quant_AnD_Shell_Calls++; quant_AnD_Shell(projected, numClusters,numEntries, index); } #ifdef USE_DBGTRACE DbgTrace(("->quant_AnD_Shell [%2d]",quant_AnD_Shell_Calls)); #endif s=t=0; double q=0; for (k=0; k