//=============================================================================== // Copyright (c) 2007-2017 Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2004-2006 ATI Technologies Inc. //=============================================================================== // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and / or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions : // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. // // // File Name: HDR_Encode.cpp // Description: Reserved utils function for HDR process // ////////////////////////////////////////////////////////////////////////////// #include "HDR_Encode.h" #include #include #include #include namespace HDR_Encode { #define USE_NEWRAMP //============================================================================================== // return # of bits needed to store n. handle signed or unsigned cases properly inline int NBits(int n, bool bIsSigned) { int nb; if (n == 0) { return 0; // no bits needed for 0, signed or not } else if (n > 0) { for (nb = 0; n; ++nb, n >>= 1); return nb + (bIsSigned ? 1 : 0); } else { assert(bIsSigned); for (nb = 0; n < -1; ++nb, n >>= 1); return nb + 1; } } float lerpf(float a, float b, int i, int denom) { assert(denom == 3 || denom == 7 || denom == 15); assert(i >= 0 && i <= denom); int *weights = NULL; switch (denom) { case 3: denom *= 5; i *= 5; // fall through to case 15 case 7: weights = g_aWeights3; break; case 15: weights = g_aWeights4; break; default: assert(0); } return (a*weights[denom - i] + b*weights[i]) / 64.0f; } int QuantizeToInt(short value, int prec, bool signedfloat16, float exposure) { (exposure); if (prec <= 1) return 0; bool negvalue = false; // move data to use extra bits for processing int ivalue = value; if (signedfloat16) { if (value < 0) { negvalue = true; value = -value; } prec--; } else { // clamp -ve if (value < 0) value = 0; } int iQuantized; int bias = (prec > 10 && prec != 16) ? ((1 << (prec - 11)) - 1) : 0; bias = (prec == 16) ? 15 : bias; iQuantized = ((ivalue << prec) + bias) / (F16HMAX + 1); return (negvalue ? -iQuantized : iQuantized); } int Unquantize(int comp, unsigned char uBitsPerComp, bool bSigned) { int unq = 0, s = 0; if (bSigned) { if (uBitsPerComp >= 16) { unq = comp; } else { if (comp < 0) { s = 1; comp = -comp; } if (comp == 0) unq = 0; else if (comp >= ((1 << (uBitsPerComp - 1)) - 1)) unq = 0x7FFF; else unq = ((comp << 15) + 0x4000) >> (uBitsPerComp - 1); if (s) unq = -unq; } } else { if (uBitsPerComp >= 15) unq = comp; else if (comp == 0) unq = 0; else if (comp == ((1 << uBitsPerComp) - 1)) unq = 0xFFFF; else unq = ((comp << 16) + 0x8000) >> uBitsPerComp; } return unq; } //============================================================================================== int PARTITIONS[MAX_SUBSETS][MAX_PARTITIONS][MAX_SUBSET_SIZE] = { // Single subset partitions for both BC6H abd BC7 { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, }, { { // 0 0,0,1,1, 0,0,1,1, 0,0,1,1, 0,0,1,1 }, { // 1 0,0,0,1, 0,0,0,1, 0,0,0,1, 0,0,0,1 }, { // 2 0,1,1,1, 0,1,1,1, 0,1,1,1, 0,1,1,1 }, { // 3 0,0,0,1, 0,0,1,1, 0,0,1,1, 0,1,1,1 }, { // 4 0,0,0,0, 0,0,0,1, 0,0,0,1, 0,0,1,1 }, { // 5 0,0,1,1, 0,1,1,1, 0,1,1,1, 1,1,1,1 }, { // 6 0,0,0,1, 0,0,1,1, 0,1,1,1, 1,1,1,1 }, { // 7 0,0,0,0, 0,0,0,1, 0,0,1,1, 0,1,1,1 }, { // 8 0,0,0,0, 0,0,0,0, 0,0,0,1, 0,0,1,1 }, { // 9 0,0,1,1, 0,1,1,1, 1,1,1,1, 1,1,1,1 }, { // 10 0,0,0,0, 0,0,0,1, 0,1,1,1, 1,1,1,1 }, { // 11 0,0,0,0, 0,0,0,0, 0,0,0,1, 0,1,1,1 }, { // 12 0,0,0,1, 0,1,1,1, 1,1,1,1, 1,1,1,1 }, { // 13 0,0,0,0, 0,0,0,0, 1,1,1,1, 1,1,1,1 }, { // 14 0,0,0,0, 1,1,1,1, 1,1,1,1, 1,1,1,1 }, { // 15 0,0,0,0, 0,0,0,0, 0,0,0,0, 1,1,1,1 }, { // 16 0,0,0,0, 1,0,0,0, 1,1,1,0, 1,1,1,1 }, { // 17 0,1,1,1, 0,0,0,1, 0,0,0,0, 0,0,0,0 }, { // 18 0,0,0,0, 0,0,0,0, 1,0,0,0, 1,1,1,0 }, { // 19 0,1,1,1, 0,0,1,1, 0,0,0,1, 0,0,0,0 }, { // 20 0,0,1,1, 0,0,0,1, 0,0,0,0, 0,0,0,0 }, { // 21 0,0,0,0, 1,0,0,0, 1,1,0,0, 1,1,1,0 }, { // 22 0,0,0,0, 0,0,0,0, 1,0,0,0, 1,1,0,0 }, { // 23 0,1,1,1, 0,0,1,1, 0,0,1,1, 0,0,0,1 }, { // 24 0,0,1,1, 0,0,0,1, 0,0,0,1, 0,0,0,0 }, { // 25 0,0,0,0, 1,0,0,0, 1,0,0,0, 1,1,0,0 }, { // 26 0,1,1,0, 0,1,1,0, 0,1,1,0, 0,1,1,0 }, { // 27 0,0,1,1, 0,1,1,0, 0,1,1,0, 1,1,0,0 }, { // 28 0,0,0,1, 0,1,1,1, 1,1,1,0, 1,0,0,0 }, { // 29 0,0,0,0, 1,1,1,1, 1,1,1,1, 0,0,0,0 }, { // 30 0,1,1,1, 0,0,0,1, 1,0,0,0, 1,1,1,0 }, { // 31 0,0,1,1, 1,0,0,1, 1,0,0,1, 1,1,0,0 }, // ----------- BC7 only shapes from here on ------------- { // 32 0,1,0,1, 0,1,0,1, 0,1,0,1, 0,1,0,1 }, { // 33 0,0,0,0, 1,1,1,1, 0,0,0,0, 1,1,1,1 }, { // 34 0,1,0,1, 1,0,1,0, 0,1,0,1, 1,0,1,0 }, { // 35 0,0,1,1, 0,0,1,1, 1,1,0,0, 1,1,0,0 }, { // 36 0,0,1,1, 1,1,0,0, 0,0,1,1, 1,1,0,0 }, { // 37 0,1,0,1, 0,1,0,1, 1,0,1,0, 1,0,1,0 }, { // 38 0,1,1,0, 1,0,0,1, 0,1,1,0, 1,0,0,1 }, { // 39 0,1,0,1, 1,0,1,0, 1,0,1,0, 0,1,0,1 }, { // 40 0,1,1,1, 0,0,1,1, 1,1,0,0, 1,1,1,0 }, { // 41 0,0,0,1, 0,0,1,1, 1,1,0,0, 1,0,0,0 }, { // 42 0,0,1,1, 0,0,1,0, 0,1,0,0, 1,1,0,0 }, { // 43 0,0,1,1, 1,0,1,1, 1,1,0,1, 1,1,0,0 }, { // 44 0,1,1,0, 1,0,0,1, 1,0,0,1, 0,1,1,0 }, { // 45 0,0,1,1, 1,1,0,0, 1,1,0,0, 0,0,1,1 }, { // 46 0,1,1,0, 0,1,1,0, 1,0,0,1, 1,0,0,1 }, { // 47 0,0,0,0, 0,1,1,0, 0,1,1,0, 0,0,0,0 }, { // 48 0,1,0,0, 1,1,1,0, 0,1,0,0, 0,0,0,0 }, { // 49 0,0,1,0, 0,1,1,1, 0,0,1,0, 0,0,0,0 }, { // 50 0,0,0,0, 0,0,1,0, 0,1,1,1, 0,0,1,0 }, { // 51 0,0,0,0, 0,1,0,0, 1,1,1,0, 0,1,0,0 }, { // 52 0,1,1,0, 1,1,0,0, 1,0,0,1, 0,0,1,1 }, { // 53 0,0,1,1, 0,1,1,0, 1,1,0,0, 1,0,0,1 }, { // 54 0,1,1,0, 0,0,1,1, 1,0,0,1, 1,1,0,0 }, { // 55 0,0,1,1, 1,0,0,1, 1,1,0,0, 0,1,1,0 }, { // 56 0,1,1,0, 1,1,0,0, 1,1,0,0, 1,0,0,1 }, { // 57 0,1,1,0, 0,0,1,1, 0,0,1,1, 1,0,0,1 }, { // 58 0,1,1,1, 1,1,1,0, 1,0,0,0, 0,0,0,1 }, { // 59 0,0,0,1, 1,0,0,0, 1,1,1,0, 0,1,1,1 }, { // 60 0,0,0,0, 1,1,1,1, 0,0,1,1, 0,0,1,1 }, { // 61 0,0,1,1, 0,0,1,1, 1,1,1,1, 0,0,0,0 }, { // 62 0,0,1,0, 0,0,1,0, 1,1,1,0, 1,1,1,0 }, { // 63 0,1,0,0, 0,1,0,0, 0,1,1,1, 0,1,1,1 }, }, // Table.P3 - only for BC7 { { 0,0,1,1, 0,0,1,1, 0,2,2,1, 2,2,2,2 }, { 0,0,0,1, 0,0,1,1, 2,2,1,1, 2,2,2,1 }, { 0,0,0,0, 2,0,0,1, 2,2,1,1, 2,2,1,1 }, { 0,2,2,2, 0,0,2,2, 0,0,1,1, 0,1,1,1 }, { 0,0,0,0, 0,0,0,0, 1,1,2,2, 1,1,2,2 }, { 0,0,1,1, 0,0,1,1, 0,0,2,2, 0,0,2,2 }, { 0,0,2,2, 0,0,2,2, 1,1,1,1, 1,1,1,1 }, { 0,0,1,1, 0,0,1,1, 2,2,1,1, 2,2,1,1 }, { 0,0,0,0, 0,0,0,0, 1,1,1,1, 2,2,2,2 }, { 0,0,0,0, 1,1,1,1, 1,1,1,1, 2,2,2,2 }, { 0,0,0,0, 1,1,1,1, 2,2,2,2, 2,2,2,2 }, { 0,0,1,2, 0,0,1,2, 0,0,1,2, 0,0,1,2 }, { 0,1,1,2, 0,1,1,2, 0,1,1,2, 0,1,1,2 }, { 0,1,2,2, 0,1,2,2, 0,1,2,2, 0,1,2,2 }, { 0,0,1,1, 0,1,1,2, 1,1,2,2, 1,2,2,2 }, { 0,0,1,1, 2,0,0,1, 2,2,0,0, 2,2,2,0 }, { 0,0,0,1, 0,0,1,1, 0,1,1,2, 1,1,2,2 }, { 0,1,1,1, 0,0,1,1, 2,0,0,1, 2,2,0,0 }, { 0,0,0,0, 1,1,2,2, 1,1,2,2, 1,1,2,2 }, { 0,0,2,2, 0,0,2,2, 0,0,2,2, 1,1,1,1 }, { 0,1,1,1, 0,1,1,1, 0,2,2,2, 0,2,2,2 }, { 0,0,0,1, 0,0,0,1, 2,2,2,1, 2,2,2,1 }, { 0,0,0,0, 0,0,1,1, 0,1,2,2, 0,1,2,2 }, { 0,0,0,0, 1,1,0,0, 2,2,1,0, 2,2,1,0 }, { 0,1,2,2, 0,1,2,2, 0,0,1,1, 0,0,0,0 }, { 0,0,1,2, 0,0,1,2, 1,1,2,2, 2,2,2,2 }, { 0,1,1,0, 1,2,2,1, 1,2,2,1, 0,1,1,0 }, { 0,0,0,0, 0,1,1,0, 1,2,2,1, 1,2,2,1 }, { 0,0,2,2, 1,1,0,2, 1,1,0,2, 0,0,2,2 }, { 0,1,1,0, 0,1,1,0, 2,0,0,2, 2,2,2,2 }, { 0,0,1,1, 0,1,2,2, 0,1,2,2, 0,0,1,1 }, { 0,0,0,0, 2,0,0,0, 2,2,1,1, 2,2,2,1 }, { 0,0,0,0, 0,0,0,2, 1,1,2,2, 1,2,2,2 }, { 0,2,2,2, 0,0,2,2, 0,0,1,2, 0,0,1,1 }, { 0,0,1,1, 0,0,1,2, 0,0,2,2, 0,2,2,2 }, { 0,1,2,0, 0,1,2,0, 0,1,2,0, 0,1,2,0 }, { 0,0,0,0, 1,1,1,1, 2,2,2,2, 0,0,0,0 }, { 0,1,2,0, 1,2,0,1, 2,0,1,2, 0,1,2,0 }, { 0,1,2,0, 2,0,1,2, 1,2,0,1, 0,1,2,0 }, { 0,0,1,1, 2,2,0,0, 1,1,2,2, 0,0,1,1 }, { 0,0,1,1, 1,1,2,2, 2,2,0,0, 0,0,1,1 }, { 0,1,0,1, 0,1,0,1, 2,2,2,2, 2,2,2,2 }, { 0,0,0,0, 0,0,0,0, 2,1,2,1, 2,1,2,1 }, { 0,0,2,2, 1,1,2,2, 0,0,2,2, 1,1,2,2 }, { 0,0,2,2, 0,0,1,1, 0,0,2,2, 0,0,1,1 }, { 0,2,2,0, 1,2,2,1, 0,2,2,0, 1,2,2,1 }, { 0,1,0,1, 2,2,2,2, 2,2,2,2, 0,1,0,1 }, { 0,0,0,0, 2,1,2,1, 2,1,2,1, 2,1,2,1 }, { 0,1,0,1, 0,1,0,1, 0,1,0,1, 2,2,2,2 }, { 0,2,2,2, 0,1,1,1, 0,2,2,2, 0,1,1,1 }, { 0,0,0,2, 1,1,1,2, 0,0,0,2, 1,1,1,2 }, { 0,0,0,0, 2,1,1,2, 2,1,1,2, 2,1,1,2 }, { 0,2,2,2, 0,1,1,1, 0,1,1,1, 0,2,2,2 }, { 0,0,0,2, 1,1,1,2, 1,1,1,2, 0,0,0,2 }, { 0,1,1,0, 0,1,1,0, 0,1,1,0, 2,2,2,2 }, { 0,0,0,0, 0,0,0,0, 2,1,1,2, 2,1,1,2 }, { 0,1,1,0, 0,1,1,0, 2,2,2,2, 2,2,2,2 }, { 0,0,2,2, 0,0,1,1, 0,0,1,1, 0,0,2,2 }, { 0,0,2,2, 1,1,2,2, 1,1,2,2, 0,0,2,2 }, { 0,0,0,0, 0,0,0,0, 0,0,0,0, 2,1,1,2 }, { 0,0,0,2, 0,0,0,1, 0,0,0,2, 0,0,0,1 }, { 0,2,2,2, 1,2,2,2, 0,2,2,2, 1,2,2,2 }, { 0,1,0,1, 2,2,2,2, 2,2,2,2, 2,2,2,2 }, { 0,1,1,1, 2,0,1,1, 2,2,0,1, 2,2,2,0 }, }, }; void Partition( int shape, float in[][MAX_DIMENSION_BIG], float subsets[MAX_SUBSETS][MAX_SUBSET_SIZE][MAX_DIMENSION_BIG], int count[MAX_SUBSETS], int ShapeTableToUse, int dimension) { int i, j; int *table = NULL; // Dont use memset: this is better for now for (i = 0; i MAX_SUBSETS) return; // Save Min and Max OutB points as EndPoints for (int subset = 0; subset max) { max = val; maxi = i; } } // Is round best for this ! for (int c = 0; c < MAX_DIMENSION_BIG; c++) { EndPoints[subset][0][c] = outB[subset][mini][c]; } for (int c = 0; c < MAX_DIMENSION_BIG; c++) { EndPoints[subset][1][c] = outB[subset][maxi][c]; } } } void covariance_d(float data[][MAX_DIMENSION_BIG], int numEntries, float cov[MAX_DIMENSION_BIG][MAX_DIMENSION_BIG], int dimension) { int i, j, k; for (i = 0; i0); p = p >0 ? p : 1; q = (EV_ITERATION_NUMBER + p - 1) / p; l = 0; for (n = 0; n maxDiag ? c[l][i][i] : maxDiag; if (maxDiag <= 0) { return; } //assert(maxDiag >0); for (i = 0; i maxDiag ? i : k; maxDiag = c[l][i][i] > maxDiag ? c[l][i][i] : maxDiag; } float t; t = 0; for (i = 0; i0); if (t <= 0) { return; } for (i = 0; id - ((a*)arg2)->d > 0) return 1; if (((a*)arg1)->d - ((a*)arg2)->d < 0) return -1; return 0; }; void sortProjection(float projection[MAX_ENTRIES], int order[MAX_ENTRIES], int numEntries) { int i; a what[MAX_ENTRIES + MAX_PARTITIONS_TABLE]; for (i = 0; i < numEntries; i++) what[what[i].i = i].d = projection[i]; qsort((void*)&what, numEntries, sizeof(a), a_compare); for (i = 0; i < numEntries; i++) order[i] = what[i].i; }; float totalError_d(float data[MAX_ENTRIES][MAX_DIMENSION_BIG], float data2[MAX_ENTRIES][MAX_DIMENSION_BIG], int numEntries, int dimension) { int i, j; float t = 0; for (i = 0; i1) && (k>1)); float m, M, s, dm = 0.; m = M = v_[0]; for (i = 1; i < n; i++) { m = m < v_[i] ? m : v_[i]; M = M > v_[i] ? M : v_[i]; } if (M == m) { for (i = 0; i < n; i++) idx[i] = 0; return; } //assert(M - m >0); s = (k - 1) / (M - m); for (i = 0; i < n; i++) { v[i] = v_[i] * s; idx[i] = (int)(z[i] = (v[i] + 0.5f /* stabilizer*/ - m *s)); //floorf(v[i] + 0.5f /* stabilizer*/ - m *s)); d[i].d = v[i] - z[i] - m *s; d[i].i = i; dm += d[i].d; r += d[i].d*d[i].d; } if (n*r - dm*dm >= (float)(n - 1) / 4 /*slack*/ / 2) { dm /= (float)n; for (i = 0; i < n; i++) d[i].d -= dm; qsort((void*)&d, n, sizeof(a), a_compare); // got into fundamental simplex // move coordinate system origin to its center for (i = 0; i < n; i++) d[i].d -= (2.0f*(float)i + 1.0f - (float)n) / 2.0f / (float)n; mm = l = 0.; j = -1; for (i = 0; i < n; i++) { l += d[i].d; if (l < mm) { mm = l; j = i; } } // position which should be in 0 j = ++j % n; for (i = j; i < n; i++) idx[d[i].i]++; } // get rid of an offset in idx mi = idx[0]; for (i = 1; i < n; i++) mi = mi < idx[i] ? mi : idx[i]; for (i = 0; i < n; i++) idx[i] -= mi; } float optQuantAnD_d( float data[MAX_ENTRIES][MAX_DIMENSION_BIG], int numEntries, int numClusters, int index[MAX_ENTRIES], float out[MAX_ENTRIES][MAX_DIMENSION_BIG], float direction[MAX_DIMENSION_BIG], float *step, int dimension, float quality ) { int index_[MAX_ENTRIES]; int maxTry = (int)(MAX_TRY * quality); int try_two = 50; int i, j, k; float t, s; float centered[MAX_ENTRIES][MAX_DIMENSION_BIG]; float mean[MAX_DIMENSION_BIG]; float cov[MAX_DIMENSION_BIG][MAX_DIMENSION_BIG]; float projected[MAX_ENTRIES]; int order_[MAX_ENTRIES]; for (i = 0; i(k + 0.5 - s)*t && k < numClusters - 1) k++; index__[order_[j]] = k; } done = 1; for (j = 0; j < numEntries; j++) { done = (done && (index__[j] == index[j])); index[j] = index__[j]; } } while (!done && try_two--); if (i == 1) for (j = 0; j < numEntries; j++) index_[j] = index[j]; else { done = 1; for (j = 0; j < numEntries; j++) { done = (done && (index_[j] == index[j])); index_[j] = index_[j]; } if (done) break; } } quant_AnD_Shell(projected, numClusters, numEntries, index); } s = t = 0; float q = 0; for (k = 0; k a[i] ? m : a[i]; return (m); } int cluster_mean_d_d(float d[MAX_ENTRIES][MAX_DIMENSION_BIG], float mean[MAX_ENTRIES][MAX_DIMENSION_BIG], int index[], int i_comp[], int i_cnt[], int n, int dimension) { // unused index values are underfined int i, j, k; //assert(n!=0); for (i = 0; i< n; i++) for (j = 0; j< dimension; j++) { // assert(index[i] index[k] ? Mi : index[k]; } D = 1; for (d = 2; d <= Mi - mi; d++) { for (k = 0; k= numEntries) D = d; } for (k = 0; k a[i] ? m : a[i]; return (m); } int npv_nd[2][2 * MAX_DIMENSION_BIG] = { { 1,2,4,8,16,32,0,0 }, //dimension = 3 { 1,2,4,0,0,0,0,0 } //dimension = 4 }; short par_vectors_nd[2][8][128][2][MAX_DIMENSION_BIG] = { { // Dimension = 3 { { { 0,0,0,0 },{ 0,0,0,0 } }, { { 0,0,0,0 },{ 0,0,0,0 } } }, // 3*n+1 BCC 3*n+1 Cartesian 3*n //same parity { // SAME_PAR { { 0,0,0 },{ 0,0,0 } }, { { 1,1,1 },{ 1,1,1 } } }, // 3*n+2 BCC 3*n+1 BCC 3*n+1 { // BCC { { 0,0,0 },{ 0,0,0 } }, { { 0,0,0 },{ 1,1,1 } }, { { 1,1,1 },{ 0,0,0 } }, { { 1,1,1 },{ 1,1,1 } } }, // 3*n+3 FCC ??? // ?????? // BCC with FCC same or inverted, symmetric { // BCC_SAME_FCC { { 0,0,0 },{ 0,0,0 } }, { { 1,1,0 },{ 1,1,0 } }, { { 1,0,1 },{ 1,0,1 } }, { { 0,1,1 },{ 0,1,1 } }, { { 0,0,0 },{ 1,1,1 } }, { { 1,1,1 },{ 0,0,0 } }, { { 0,1,0 },{ 0,1,0 } }, // ?? { { 1,1,1 },{ 1,1,1 } }, }, // 3*n+4 FCC 3*n+2 FCC 3*n+2 { { { 0,0,0 },{ 0,0,0 } }, { { 1,1,0 },{ 0,0,0 } }, { { 1,0,1 },{ 0,0,0 } }, { { 0,1,1 },{ 0,0,0 } }, { { 0,0,0 },{ 1,1,0 } }, { { 1,1,0 },{ 1,1,0 } }, { { 1,0,1 },{ 1,1,0 } }, { { 0,1,1 },{ 1,1,0 } }, { { 0,0,0 },{ 1,0,1 } }, { { 1,1,0 },{ 1,0,1 } }, { { 1,0,1 },{ 1,0,1 } }, { { 0,1,1 },{ 1,0,1 } }, { { 0,0,0 },{ 0,1,1 } }, { { 1,1,0 },{ 0,1,1 } }, { { 1,0,1 },{ 0,1,1 } }, { { 0,1,1 },{ 0,1,1 } } }, // 3*n+5 Cartesian 3*n+3 FCC 3*n+2 //D^*[6] { { { 0,0,0 },{ 0,0,0 } }, { { 1,1,0 },{ 0,0,0 } }, { { 1,0,1 },{ 0,0,0 } }, { { 0,1,1 },{ 0,0,0 } }, { { 0,0,0 },{ 1,1,0 } }, { { 1,1,0 },{ 1,1,0 } }, { { 1,0,1 },{ 1,1,0 } }, { { 0,1,1 },{ 1,1,0 } }, { { 0,0,0 },{ 1,0,1 } }, { { 1,1,0 },{ 1,0,1 } }, { { 1,0,1 },{ 1,0,1 } }, { { 0,1,1 },{ 1,0,1 } }, { { 0,0,0 },{ 0,1,1 } }, { { 1,1,0 },{ 0,1,1 } }, { { 1,0,1 },{ 0,1,1 } }, { { 0,1,1 },{ 0,1,1 } }, { { 1,0,0 },{ 1,1,1 } }, { { 0,1,0 },{ 1,1,1 } }, { { 0,0,1 },{ 1,1,1 } }, { { 1,1,1 },{ 1,1,1 } }, { { 1,0,0 },{ 0,0,1 } }, { { 0,1,0 },{ 0,0,1 } }, { { 0,0,1 },{ 0,0,1 } }, { { 1,1,1 },{ 0,0,1 } }, { { 1,0,0 },{ 1,0,0 } }, { { 0,1,0 },{ 1,0,0 } }, { { 0,0,1 },{ 1,0,0 } }, { { 1,1,1 },{ 1,0,0 } }, { { 1,0,0 },{ 0,1,0 } }, { { 0,1,0 },{ 0,1,0 } }, { { 0,0,1 },{ 0,1,0 } }, { { 1,1,1 },{ 0,1,0 } } } },// Dimension = 3 { // Dimension = 4 { { { 0,0,0,0 },{ 0,0,0,0 } }, { { 0,0,0,0 },{ 0,0,0,0 } } }, // 3*n+1 BCC 3*n+1 Cartesian 3*n //same parity { // SAME_PAR { { 0,0,0,0 },{ 0,0,0,0 } }, { { 1,1,1,1 },{ 1,1,1,1 } } }, // 3*n+2 BCC 3*n+1 BCC 3*n+1 { // BCC { { 0,0,0,0 },{ 0,0,0,0 } }, { { 0,0,0,0 },{ 1,1,1,1 } }, { { 1,1,1,1 },{ 0,0,0,0 } }, { { 1,1,1,1 },{ 1,1,1,1 } } }, // 3 PBIT { { { 0,0,0,0 },{ 0,0,0,0 } }, { { 0,0,0,0 },{ 0,1,1,1 } }, { { 0,1,1,1 },{ 0,0,0,0 } }, { { 0,1,1,1 },{ 0,1,1,1 } }, { { 1,0,0,0 },{ 1,0,0,0 } }, { { 1,0,0,0 },{ 1,1,1,1 } }, { { 1,1,1,1 },{ 1,0,0,0 } }, { { 1,1,1,1 },{ 1,1,1,1 } } }, // 4 PBIT { { { 0,0,0,0 },{ 0,0,0,0 } }, { { 0,0,0,0 },{ 0,1,1,1 } }, { { 0,1,1,1 },{ 0,0,0,0 } }, { { 0,1,1,1 },{ 0,1,1,1 } }, { { 1,0,0,0 },{ 1,0,0,0 } }, { { 1,0,0,0 },{ 1,1,1,1 } }, { { 1,1,1,1 },{ 1,0,0,0 } }, { { 1,1,1,1 },{ 1,1,1,1 } }, { { 0,0,0,0 },{ 0,0,0,0 } }, { { 0,0,0,0 },{ 0,0,1,1 } }, { { 0,0,1,1 },{ 0,0,0,0 } }, { { 0,1,0,1 },{ 0,1,0,1 } }, { { 1,0,0,0 },{ 1,0,0,0 } }, { { 1,0,0,0 },{ 1,0,1,1 } }, { { 1,0,1,1 },{ 1,0,0,0 } }, { { 1,1,0,1 },{ 1,1,0,1 } }, }, } // Dimension = 4 }; int get_par_vector(int dim1, int dim2, int dim3, int dim4, int dim5) { return par_vectors_nd[dim1][dim2][dim3][dim4][dim5]; } float quant_single_point_d ( float data[MAX_ENTRIES][MAX_DIMENSION_BIG], int numEntries, int index[MAX_ENTRIES], float out[MAX_ENTRIES][MAX_DIMENSION_BIG], int epo_1[2][MAX_DIMENSION_BIG], int Mi_, // last cluster int bits[3], // including parity int type, int dimension // This should be either 3 or 4 ) { if (dimension < 3) return FLT_MAX; int i, j; float err_0 = FLT_MAX; float err_1 = FLT_MAX; int idx = 0; int idx_1 = 0; int epo_0[2][MAX_DIMENSION_BIG]; int use_par = (type != 0); int clog = 0; i = Mi_ + 1; while (i >>= 1) clog++; // assert((1< sperr(tc, CLT(clog), BTT(bits[j]), t1, t2, i)) dr[j] = tc; else if (sperr(tf, CLT(clog), BTT(bits[j]), t1, t2, i) < sperr(tc, CLT(clog), BTT(bits[j]), t1, t2, i)) dr[j] = tf; else #endif dr[j] = (int)floorf(data[0][j] + 0.5f); #ifdef USE_RAMPS tr = sperr(dr[j], CLT(clog), BTT(bits[j]), t1, t2, i) + 2.0f * sqrtf(sperr(dr[j], CLT(clog), BTT(bits[j]), t1, t2, i)) * fabsf((float)dr[j] - data[0][j]) + (dr[j] - data[0][j])* (dr[j] - data[0][j]); if (tr < t_) { t_ = tr; #else t_ = 0; #endif t1o[j] = t1; t2o[j] = t2; dr_0[j] = dr[j]; #ifdef USE_RAMPS if ((dr_0[j] < 0) || (dr_0[j] > 255)) { dr_0[j] = 0; // Error! } } #endif } // B } //C t += t_; } // D if (t < err_0) { idx = i; for (j = 0; j= SP_ERRIDX_MAX) { epo_0[1][j] = 0; // Error!! } #else epo_0[0][j] = 0; epo_0[1][j] = 0; #endif } err_0 = t; } if (err_0 == 0) break; } // E if (err_0 < err_1) { idx_1 = idx; for (j = 0; j> (2 * bits - 8)); } #ifndef USE_NEWRAMP float ep_d[4][SP_ERRIDX_MAX]; float ramp[3][4][SP_ERRIDX_MAX][SP_ERRIDX_MAX][16]; #else float ep_df(int bits, int p1) { return (float)expandbits_(bits + BIT_BASE, p1); } float rampf(int clog, int bits, int p1, int p2, int i) { // (clog+ LOG_CL_BASE) starts from 2 to 4 float ret = floorf((float)ep_df(bits, p1) + rampLerpWeights[clog + LOG_CL_BASE][i] * (float)((ep_df(bits, p2) - ep_df(bits, p1))) + 0.5F); if (ret > SP_ERRIDX_MAX) return SP_ERRIDX_MAX - 1; return ret; } #endif #ifdef USE_RAMPS int spidx(int in_data, int in_clog, int in_bits, int in_p2, int in_o1, int in_o2, int in_i) { return sp_data[in_data].sp_idx[in_clog][in_bits][in_p2][in_o1][in_o2][in_i]; } float sperr(int in_data, int clog, int bits, int p2, int o1, int o2) { return sp_data[in_data].sp_err[clog][bits][p2][o1][o2]; } #endif void init_ramps() { #ifdef USE_RAMPS int clog, bits; int in_data; // p1; int p2; int i; int o1, o2; // sp_datap = (SP_DATA **)malloc(SP_ERRIDX_MAX*sizeof(struct SP_DATA)); // assert(sp_datap); // for (int i = 0; i < SP_ERRIDX_MAX; i++) // { // sp_datap[i] = (SP_DATA *)malloc(sizeof(struct SP_DATA)); // } #ifndef USE_NEWRAMP for (bits = BIT_BASE; bits < BIT_RANGE; bits++) for (p1 = 0; p1 < (1 << bits); p1++) { ep_d[BTT(bits)][p1] = (float)expandbits_(bits, p1); } for (clog = LOG_CL_BASE; clog < LOG_CL_RANGE; clog++) for (bits = BIT_BASE; bits < BIT_RANGE; bits++) for (p1 = 0; p1 < (1 << bits); p1++) for (p2 = 0; p2 < (1 << bits); p2++) { for (o1 = 0; o1 < (1 << clog); o1++) { ramp[CLT(clog)][BTT(bits)][p1][p2][o1] = floorf((float)ep_d[BTT(bits)][p1] + rampLerpWeights[clog][o1] * (float)((ep_d[BTT(bits)][p2] - ep_d[BTT(bits)][p1])) + 0.5F); } } #endif //----------------------------------------------------------------------------- // Step 1 for (clog = LOG_CL_BASE; clog SP_ERRIDX_MAX) spd_i = SP_ERRIDX_MAX - 1; sp_data[spd_i].sp_idx[CLT(clog)][BTT(bits)][in_data & 0x1][p2 & 0x1][o1][0] = in_data; sp_data[spd_i].sp_idx[CLT(clog)][BTT(bits)][in_data & 0x1][p2 & 0x1][o1][1] = p2; sp_data[spd_i].sp_err[CLT(clog)][BTT(bits)][in_data & 0x1][p2 & 0x1][o1] = 0.; #endif } // Step 3 for (clog = LOG_CL_BASE; clog= 0 && sp_data[in_data - k].sp_err[CLT(clog)][BTT(bits)][o1][o2][i] == 0) || (in_data + k < SP_ERRIDX_MAX && sp_data[in_data + k].sp_err[CLT(clog)][BTT(bits)][o1][o2][i] == 0)) break; { if ((in_data - k >= 0 && sp_data[in_data - k].sp_err[CLT(clog)][BTT(bits)][o1][o2][i] == 0)) { sp_data[in_data].sp_idx[CLT(clog)][BTT(bits)][o1][o2][i][0] = sp_data[in_data - k].sp_idx[CLT(clog)][BTT(bits)][o1][o2][i][0]; sp_data[in_data].sp_idx[CLT(clog)][BTT(bits)][o1][o2][i][1] = sp_data[in_data - k].sp_idx[CLT(clog)][BTT(bits)][o1][o2][i][1]; //printf("sp_data[%2d].sp_idx[%2d][%2d][%2d][%2d][%2d][0] = (%d)\n", in_data, CLT(clog), BTT(bits), o1, o2, i, sp_data[in_data].sp_idx[CLT(clog)][BTT(bits)][o1][o2][i][0]); //printf("sp_data[%2d].sp_idx[%2d][%2d][%2d][%2d][%2d][1] = (%d)\n", in_data, CLT(clog), BTT(bits), o1, o2, i, sp_data[in_data].sp_idx[CLT(clog)][BTT(bits)][o1][o2][i][1]); } else if ((in_data + k < SP_ERRIDX_MAX && sp_data[in_data + k].sp_err[CLT(clog)][BTT(bits)][o1][o2][i] == 0)) { sp_data[in_data].sp_idx[CLT(clog)][BTT(bits)][o1][o2][i][0] = sp_data[in_data + k].sp_idx[CLT(clog)][BTT(bits)][o1][o2][i][0]; sp_data[in_data].sp_idx[CLT(clog)][BTT(bits)][o1][o2][i][1] = sp_data[in_data + k].sp_idx[CLT(clog)][BTT(bits)][o1][o2][i][1]; //printf("sp_data[%2d].sp_idx[%2d][%2d][%2d][%2d][%2d][0] = %d\n", in_data, CLT(clog), BTT(bits), o1, o2, i, sp_data[in_data].sp_idx[CLT(clog)][BTT(bits)][o1][o2][i][0]); //printf("sp_data[%2d].sp_idx[%2d][%2d][%2d][%2d][%2d][1] = %d\n", in_data, CLT(clog), BTT(bits), o1, o2, i, sp_data[in_data].sp_idx[CLT(clog)][BTT(bits)][o1][o2][i][1]); } sp_data[in_data].sp_err[CLT(clog)][BTT(bits)][o1][o2][i] = (float)k*k; } } //for (clog = LOG_CL_BASE; clog1) { int j = (i1 + i2) / 2; #ifndef USE_NEWRAMP if (v >= p[(j << use_par) + odd]) #else if (v >= ep_df(BTT(bits), (j << use_par) + odd)) #endif i1 = j; else i2 = j; } return (i1 << use_par) + odd; } //based on code : ep_shaker_d in BC7 shaker float ep_shaker_HD( float data[MAX_ENTRIES][MAX_DIMENSION_BIG], int numEntries, int index_[MAX_ENTRIES], float out[MAX_ENTRIES][MAX_DIMENSION_BIG], int epo_code[2][MAX_DIMENSION_BIG], int Mi_, // last cluster int bits[3], // including parity int dimension ) { int i, j, k; int use_par = 0; int clog = 0; i = Mi_ + 1; while (i >>= 1) clog++; float mean[MAX_DIMENSION_BIG]; int index[MAX_ENTRIES]; int Mi; int maxTry = 1; for (k = 0; k < numEntries; k++) { index[k] = index_[k]; } int done; int change; int better; float err_o = FLT_MAX; float out_2[MAX_ENTRIES][MAX_DIMENSION_BIG]; int idx_2[MAX_ENTRIES]; int epo_2[2][MAX_DIMENSION_BIG]; int max_bits[MAX_DIMENSION_BIG]; int type = bits[0] % (2 * dimension); for (j = 0; j < dimension; j++) max_bits[j] = (bits[0] + 2 * dimension - 1) / (2 * dimension); // handled below automatically int alls = all_same_d(data, numEntries, dimension); mean_d_d(data, mean, numEntries, dimension); do { index_collapse_kernel(index, numEntries); Mi = max_index(index, numEntries); // index can be from requantizer int p, q; int p0 = -1, q0 = -1; float err_2 = FLT_MAX; if (Mi == 0) { float t; int epo_0[2][MAX_DIMENSION_BIG]; // either sinle point from the beginning or collapsed index if (alls) { t = quant_single_point_d(data, numEntries, index, out_2, epo_0, Mi_, bits, type, dimension); } else { quant_single_point_d(&mean, numEntries, index, out_2, epo_0, Mi_, bits, type, dimension); t = totalError_d(data, out_2, numEntries, dimension); } if (t < err_o) { for (k = 0; k> (2 * j)) & 0x3) != 0) { j0 = j; // new cords ei0 = (((s^g) >> (2 * j)) & 0x1); ei1 = (((s^g) >> (2 * j + 1)) & 0x1); } } s = s ^ g; err_0 = 0; for (i = 0; i> (2 * j)) & 0x1); ei1 = ((s1 >> (2 * j + 1)) & 0x1); epo_1[0][j] = (int)epd[0][j][ei0]; epo_1[1][j] = (int)epd[1][j][ei1]; } } if (err_1 < err_2) { // best in the curent ep cube run for (i = 0; i < numEntries; i++) { idx_2[i] = idx_1[i]; for (j = 0; j 0) maxTry--; else maxTry = 0; } while (!done && maxTry); return err_o; } float ep_shaker_2_d( float data[MAX_ENTRIES][MAX_DIMENSION_BIG], int numEntries, int index_[MAX_ENTRIES], float out[MAX_ENTRIES][MAX_DIMENSION_BIG], int epo_code[2][MAX_DIMENSION_BIG], int size, int Mi_, // last cluster int bits, // total for all channels // defined by total numbe of bits and dimensioin int dimension, float epo[2][MAX_DIMENSION_BIG] ) { if (dimension < 3) return FLT_MAX; int i, j, k; int max_bits[MAX_DIMENSION_BIG]; int type = bits % (2 * dimension); int use_par = (type != 0); for (j = 0; j < dimension; j++) max_bits[j] = (bits + 2 * dimension - 1) / (2 * dimension); int clog = 0; i = Mi_ + 1; while (i >>= 1) clog++; if (CLT(clog) > 3) return FLT_MAX; float mean[MAX_DIMENSION_BIG]; int index[MAX_ENTRIES]; int Mi; int maxTry = 8; for (k = 0; k < numEntries; k++) { index[k] = index_[k]; } int done; int change; int better; float err_o = FLT_MAX; int epo_0[2][MAX_DIMENSION_BIG]; float outg[MAX_ENTRIES][MAX_DIMENSION_BIG]; // handled below automatically int alls = all_same_d(data, numEntries, dimension); mean_d_d(data, mean, numEntries, dimension); do { index_collapse_kernel(index, numEntries); Mi = max_i(index, numEntries); // index can be from requantizer int p, q; int p0 = -1, q0 = -1; float err_0 = FLT_MAX; if (Mi == 0) { float t; // either single point from the beginning or collapsed index if (alls) { t = quant_single_point_d(data, numEntries, index, outg, epo_0, Mi_, max_bits, type, dimension); } else { quant_single_point_d(&mean, numEntries, index, outg, epo_0, Mi_, max_bits, type, dimension); t = totalError_d(data, outg, numEntries, dimension); } if (t < err_o) { for (k = 0; k> 1) - 1 ? epi[i][0] : (size >> 1) - 1)) & (~use_par); epi[i][1] += ((1 << max_bits[j]) - 1 - epi[i][1] < (size >> 1) ? (1 << max_bits[j]) - 1 - epi[i][1] : (size >> 1)) & (~use_par); } int p1, p2, step = (1 << use_par); ed[pp[0]][pp[1]][j] = FLT_MAX; for (p1 = epi[0][0]; p1 <= epi[0][1]; p1 += step) for (p2 = epi[1][0]; p2 <= epi[1][1]; p2 += step) { #ifndef USE_NEWRAMP float *rbp = rb[p1][p2]; #endif float t = 0; int *ci = cidx; int m = numEntries; int _mc = m; while (_mc > 0) { #ifndef USE_NEWRAMP t += (rbp[ci[_mc - 1]] - data[_mc - 1][j]) *(rbp[ci[_mc - 1]] - data[_mc - 1][j]); #else t += (rampf(CLT(clog), BTT(max_bits[j]), p1, p2, ci[_mc - 1]) - data[_mc - 1][j]) *(rampf(CLT(clog), BTT(max_bits[j]), p1, p2, ci[_mc - 1]) - data[_mc - 1][j]); #endif _mc--; } if (t (LOG_CL_RANGE - LOG_CL_BASE)) return FLT_MAX; for (int jj = 0; jj(BIT_RANGE - BIT_BASE)) return FLT_MAX; if ((epo_0[0][jj] > 255) || (epo_0[0][jj] < 0)) return FLT_MAX; if ((epo_0[1][jj] > 255) || (epo_0[0][jj] < 0)) return FLT_MAX; #ifndef USE_NEWRAMP r[jj] = ramp[CLT(clog)][BTT(max_bits[jj])][epo_0[0][jj]][epo_0[1][jj]]; #endif } for (i = 0; i 0) maxTry--; else maxTry = 0; } while (!done && maxTry); for (j = 0; j