TexConv/CMP_Framework/Common/HDR_Encode.cpp

2957 lines
77 KiB
C++

//===============================================================================
// Copyright (c) 2007-2017 Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2004-2006 ATI Technologies Inc.
//===============================================================================
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//
//
// File Name: HDR_Encode.cpp
// Description: Reserved utils function for HDR process
//
//////////////////////////////////////////////////////////////////////////////
#include "hdr_encode.h"
#include <assert.h>
#include <math.h>
#include <float.h>
#include <cstdlib>
namespace HDR_Encode {
#define USE_NEWRAMP
#ifdef USE_RAMPS
#include <mutex>
static int g_init_ramps = 0;
std::mutex mtx;
#endif
//==============================================================================================
// return # of bits needed to store n. handle signed or unsigned cases properly
inline int NBits(int n, bool bIsSigned) {
int nb;
if (n == 0) {
return 0; // no bits needed for 0, signed or not
} else if (n > 0) {
for (nb = 0; n; ++nb, n >>= 1);
return nb + (bIsSigned ? 1 : 0);
} else {
assert(bIsSigned);
for (nb = 0; n < -1; ++nb, n >>= 1);
return nb + 1;
}
}
float lerpf(float a, float b, int i, int denom) {
assert(denom == 3 || denom == 7 || denom == 15);
assert(i >= 0 && i <= denom);
int *weights = NULL;
switch (denom) {
case 3:
denom *= 5;
i *= 5; // fall through to case 15
case 7:
weights = g_aWeights3;
break;
case 15:
weights = g_aWeights4;
break;
default:
assert(0);
}
return (a*weights[denom - i] + b*weights[i]) / 64.0f;
}
int QuantizeToInt(short value, int prec, bool signedfloat16, float exposure) {
(exposure);
if (prec <= 1) return 0;
bool negvalue = false;
// move data to use extra bits for processing
int ivalue = value;
if (signedfloat16) {
if (value < 0) {
negvalue = true;
value = -value;
}
prec--;
} else {
// clamp -ve
if (value < 0)
value = 0;
}
int iQuantized;
int bias = (prec > 10 && prec != 16) ? ((1 << (prec - 11)) - 1) : 0;
bias = (prec == 16) ? 15 : bias;
iQuantized = ((ivalue << prec) + bias) / (F16HMAX + 1);
return (negvalue ? -iQuantized : iQuantized);
}
int Unquantize(int comp, unsigned char uBitsPerComp, bool bSigned) {
int unq = 0, s = 0;
if (bSigned) {
if (uBitsPerComp >= 16) {
unq = comp;
} else {
if (comp < 0) {
s = 1;
comp = -comp;
}
if (comp == 0) unq = 0;
else if (comp >= ((1 << (uBitsPerComp - 1)) - 1)) unq = 0x7FFF;
else unq = ((comp << 15) + 0x4000) >> (uBitsPerComp - 1);
if (s) unq = -unq;
}
} else {
if (uBitsPerComp >= 15) unq = comp;
else if (comp == 0) unq = 0;
else if (comp == ((1 << uBitsPerComp) - 1)) unq = 0xFFFF;
else unq = ((comp << 16) + 0x8000) >> uBitsPerComp;
}
return unq;
}
//==============================================================================================
int PARTITIONS[MAX_SUBSETS][MAX_PARTITIONS][MAX_SUBSET_SIZE] = {
// Single subset partitions for both BC6H abd BC7
{
{
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
},
},
{
{
// 0
0,0,1,1,
0,0,1,1,
0,0,1,1,
0,0,1,1
},
{
// 1
0,0,0,1,
0,0,0,1,
0,0,0,1,
0,0,0,1
},
{
// 2
0,1,1,1,
0,1,1,1,
0,1,1,1,
0,1,1,1
},
{
// 3
0,0,0,1,
0,0,1,1,
0,0,1,1,
0,1,1,1
},
{
// 4
0,0,0,0,
0,0,0,1,
0,0,0,1,
0,0,1,1
},
{
// 5
0,0,1,1,
0,1,1,1,
0,1,1,1,
1,1,1,1
},
{
// 6
0,0,0,1,
0,0,1,1,
0,1,1,1,
1,1,1,1
},
{
// 7
0,0,0,0,
0,0,0,1,
0,0,1,1,
0,1,1,1
},
{
// 8
0,0,0,0,
0,0,0,0,
0,0,0,1,
0,0,1,1
},
{
// 9
0,0,1,1,
0,1,1,1,
1,1,1,1,
1,1,1,1
},
{
// 10
0,0,0,0,
0,0,0,1,
0,1,1,1,
1,1,1,1
},
{
// 11
0,0,0,0,
0,0,0,0,
0,0,0,1,
0,1,1,1
},
{
// 12
0,0,0,1,
0,1,1,1,
1,1,1,1,
1,1,1,1
},
{
// 13
0,0,0,0,
0,0,0,0,
1,1,1,1,
1,1,1,1
},
{
// 14
0,0,0,0,
1,1,1,1,
1,1,1,1,
1,1,1,1
},
{
// 15
0,0,0,0,
0,0,0,0,
0,0,0,0,
1,1,1,1
},
{
// 16
0,0,0,0,
1,0,0,0,
1,1,1,0,
1,1,1,1
},
{
// 17
0,1,1,1,
0,0,0,1,
0,0,0,0,
0,0,0,0
},
{
// 18
0,0,0,0,
0,0,0,0,
1,0,0,0,
1,1,1,0
},
{
// 19
0,1,1,1,
0,0,1,1,
0,0,0,1,
0,0,0,0
},
{
// 20
0,0,1,1,
0,0,0,1,
0,0,0,0,
0,0,0,0
},
{
// 21
0,0,0,0,
1,0,0,0,
1,1,0,0,
1,1,1,0
},
{
// 22
0,0,0,0,
0,0,0,0,
1,0,0,0,
1,1,0,0
},
{
// 23
0,1,1,1,
0,0,1,1,
0,0,1,1,
0,0,0,1
},
{
// 24
0,0,1,1,
0,0,0,1,
0,0,0,1,
0,0,0,0
},
{
// 25
0,0,0,0,
1,0,0,0,
1,0,0,0,
1,1,0,0
},
{
// 26
0,1,1,0,
0,1,1,0,
0,1,1,0,
0,1,1,0
},
{
// 27
0,0,1,1,
0,1,1,0,
0,1,1,0,
1,1,0,0
},
{
// 28
0,0,0,1,
0,1,1,1,
1,1,1,0,
1,0,0,0
},
{
// 29
0,0,0,0,
1,1,1,1,
1,1,1,1,
0,0,0,0
},
{
// 30
0,1,1,1,
0,0,0,1,
1,0,0,0,
1,1,1,0
},
{
// 31
0,0,1,1,
1,0,0,1,
1,0,0,1,
1,1,0,0
},
// ----------- BC7 only shapes from here on -------------
{
// 32
0,1,0,1,
0,1,0,1,
0,1,0,1,
0,1,0,1
},
{
// 33
0,0,0,0,
1,1,1,1,
0,0,0,0,
1,1,1,1
},
{
// 34
0,1,0,1,
1,0,1,0,
0,1,0,1,
1,0,1,0
},
{
// 35
0,0,1,1,
0,0,1,1,
1,1,0,0,
1,1,0,0
},
{
// 36
0,0,1,1,
1,1,0,0,
0,0,1,1,
1,1,0,0
},
{
// 37
0,1,0,1,
0,1,0,1,
1,0,1,0,
1,0,1,0
},
{
// 38
0,1,1,0,
1,0,0,1,
0,1,1,0,
1,0,0,1
},
{
// 39
0,1,0,1,
1,0,1,0,
1,0,1,0,
0,1,0,1
},
{
// 40
0,1,1,1,
0,0,1,1,
1,1,0,0,
1,1,1,0
},
{
// 41
0,0,0,1,
0,0,1,1,
1,1,0,0,
1,0,0,0
},
{
// 42
0,0,1,1,
0,0,1,0,
0,1,0,0,
1,1,0,0
},
{
// 43
0,0,1,1,
1,0,1,1,
1,1,0,1,
1,1,0,0
},
{
// 44
0,1,1,0,
1,0,0,1,
1,0,0,1,
0,1,1,0
},
{
// 45
0,0,1,1,
1,1,0,0,
1,1,0,0,
0,0,1,1
},
{
// 46
0,1,1,0,
0,1,1,0,
1,0,0,1,
1,0,0,1
},
{
// 47
0,0,0,0,
0,1,1,0,
0,1,1,0,
0,0,0,0
},
{
// 48
0,1,0,0,
1,1,1,0,
0,1,0,0,
0,0,0,0
},
{
// 49
0,0,1,0,
0,1,1,1,
0,0,1,0,
0,0,0,0
},
{
// 50
0,0,0,0,
0,0,1,0,
0,1,1,1,
0,0,1,0
},
{
// 51
0,0,0,0,
0,1,0,0,
1,1,1,0,
0,1,0,0
},
{
// 52
0,1,1,0,
1,1,0,0,
1,0,0,1,
0,0,1,1
},
{
// 53
0,0,1,1,
0,1,1,0,
1,1,0,0,
1,0,0,1
},
{
// 54
0,1,1,0,
0,0,1,1,
1,0,0,1,
1,1,0,0
},
{
// 55
0,0,1,1,
1,0,0,1,
1,1,0,0,
0,1,1,0
},
{
// 56
0,1,1,0,
1,1,0,0,
1,1,0,0,
1,0,0,1
},
{
// 57
0,1,1,0,
0,0,1,1,
0,0,1,1,
1,0,0,1
},
{
// 58
0,1,1,1,
1,1,1,0,
1,0,0,0,
0,0,0,1
},
{
// 59
0,0,0,1,
1,0,0,0,
1,1,1,0,
0,1,1,1
},
{
// 60
0,0,0,0,
1,1,1,1,
0,0,1,1,
0,0,1,1
},
{
// 61
0,0,1,1,
0,0,1,1,
1,1,1,1,
0,0,0,0
},
{
// 62
0,0,1,0,
0,0,1,0,
1,1,1,0,
1,1,1,0
},
{
// 63
0,1,0,0,
0,1,0,0,
0,1,1,1,
0,1,1,1
},
},
// Table.P3 - only for BC7
{
{
0,0,1,1,
0,0,1,1,
0,2,2,1,
2,2,2,2
},
{
0,0,0,1,
0,0,1,1,
2,2,1,1,
2,2,2,1
},
{
0,0,0,0,
2,0,0,1,
2,2,1,1,
2,2,1,1
},
{
0,2,2,2,
0,0,2,2,
0,0,1,1,
0,1,1,1
},
{
0,0,0,0,
0,0,0,0,
1,1,2,2,
1,1,2,2
},
{
0,0,1,1,
0,0,1,1,
0,0,2,2,
0,0,2,2
},
{
0,0,2,2,
0,0,2,2,
1,1,1,1,
1,1,1,1
},
{
0,0,1,1,
0,0,1,1,
2,2,1,1,
2,2,1,1
},
{
0,0,0,0,
0,0,0,0,
1,1,1,1,
2,2,2,2
},
{
0,0,0,0,
1,1,1,1,
1,1,1,1,
2,2,2,2
},
{
0,0,0,0,
1,1,1,1,
2,2,2,2,
2,2,2,2
},
{
0,0,1,2,
0,0,1,2,
0,0,1,2,
0,0,1,2
},
{
0,1,1,2,
0,1,1,2,
0,1,1,2,
0,1,1,2
},
{
0,1,2,2,
0,1,2,2,
0,1,2,2,
0,1,2,2
},
{
0,0,1,1,
0,1,1,2,
1,1,2,2,
1,2,2,2
},
{
0,0,1,1,
2,0,0,1,
2,2,0,0,
2,2,2,0
},
{
0,0,0,1,
0,0,1,1,
0,1,1,2,
1,1,2,2
},
{
0,1,1,1,
0,0,1,1,
2,0,0,1,
2,2,0,0
},
{
0,0,0,0,
1,1,2,2,
1,1,2,2,
1,1,2,2
},
{
0,0,2,2,
0,0,2,2,
0,0,2,2,
1,1,1,1
},
{
0,1,1,1,
0,1,1,1,
0,2,2,2,
0,2,2,2
},
{
0,0,0,1,
0,0,0,1,
2,2,2,1,
2,2,2,1
},
{
0,0,0,0,
0,0,1,1,
0,1,2,2,
0,1,2,2
},
{
0,0,0,0,
1,1,0,0,
2,2,1,0,
2,2,1,0
},
{
0,1,2,2,
0,1,2,2,
0,0,1,1,
0,0,0,0
},
{
0,0,1,2,
0,0,1,2,
1,1,2,2,
2,2,2,2
},
{
0,1,1,0,
1,2,2,1,
1,2,2,1,
0,1,1,0
},
{
0,0,0,0,
0,1,1,0,
1,2,2,1,
1,2,2,1
},
{
0,0,2,2,
1,1,0,2,
1,1,0,2,
0,0,2,2
},
{
0,1,1,0,
0,1,1,0,
2,0,0,2,
2,2,2,2
},
{
0,0,1,1,
0,1,2,2,
0,1,2,2,
0,0,1,1
},
{
0,0,0,0,
2,0,0,0,
2,2,1,1,
2,2,2,1
},
{
0,0,0,0,
0,0,0,2,
1,1,2,2,
1,2,2,2
},
{
0,2,2,2,
0,0,2,2,
0,0,1,2,
0,0,1,1
},
{
0,0,1,1,
0,0,1,2,
0,0,2,2,
0,2,2,2
},
{
0,1,2,0,
0,1,2,0,
0,1,2,0,
0,1,2,0
},
{
0,0,0,0,
1,1,1,1,
2,2,2,2,
0,0,0,0
},
{
0,1,2,0,
1,2,0,1,
2,0,1,2,
0,1,2,0
},
{
0,1,2,0,
2,0,1,2,
1,2,0,1,
0,1,2,0
},
{
0,0,1,1,
2,2,0,0,
1,1,2,2,
0,0,1,1
},
{
0,0,1,1,
1,1,2,2,
2,2,0,0,
0,0,1,1
},
{
0,1,0,1,
0,1,0,1,
2,2,2,2,
2,2,2,2
},
{
0,0,0,0,
0,0,0,0,
2,1,2,1,
2,1,2,1
},
{
0,0,2,2,
1,1,2,2,
0,0,2,2,
1,1,2,2
},
{
0,0,2,2,
0,0,1,1,
0,0,2,2,
0,0,1,1
},
{
0,2,2,0,
1,2,2,1,
0,2,2,0,
1,2,2,1
},
{
0,1,0,1,
2,2,2,2,
2,2,2,2,
0,1,0,1
},
{
0,0,0,0,
2,1,2,1,
2,1,2,1,
2,1,2,1
},
{
0,1,0,1,
0,1,0,1,
0,1,0,1,
2,2,2,2
},
{
0,2,2,2,
0,1,1,1,
0,2,2,2,
0,1,1,1
},
{
0,0,0,2,
1,1,1,2,
0,0,0,2,
1,1,1,2
},
{
0,0,0,0,
2,1,1,2,
2,1,1,2,
2,1,1,2
},
{
0,2,2,2,
0,1,1,1,
0,1,1,1,
0,2,2,2
},
{
0,0,0,2,
1,1,1,2,
1,1,1,2,
0,0,0,2
},
{
0,1,1,0,
0,1,1,0,
0,1,1,0,
2,2,2,2
},
{
0,0,0,0,
0,0,0,0,
2,1,1,2,
2,1,1,2
},
{
0,1,1,0,
0,1,1,0,
2,2,2,2,
2,2,2,2
},
{
0,0,2,2,
0,0,1,1,
0,0,1,1,
0,0,2,2
},
{
0,0,2,2,
1,1,2,2,
1,1,2,2,
0,0,2,2
},
{
0,0,0,0,
0,0,0,0,
0,0,0,0,
2,1,1,2
},
{
0,0,0,2,
0,0,0,1,
0,0,0,2,
0,0,0,1
},
{
0,2,2,2,
1,2,2,2,
0,2,2,2,
1,2,2,2
},
{
0,1,0,1,
2,2,2,2,
2,2,2,2,
2,2,2,2
},
{
0,1,1,1,
2,0,1,1,
2,2,0,1,
2,2,2,0
},
},
};
void Partition(
int shape,
float in[][MAX_DIMENSION_BIG],
float subsets[MAX_SUBSETS][MAX_SUBSET_SIZE][MAX_DIMENSION_BIG],
int count[MAX_SUBSETS],
int ShapeTableToUse,
int dimension) {
int i, j;
int *table = NULL;
// Dont use memset: this is better for now
for (i = 0; i<MAX_SUBSETS; i++) count[i] = 0;
switch (ShapeTableToUse) {
case 0:
case 1:
table = &(PARTITIONS[0][0][0]);
break;
case 2:
table = &(PARTITIONS[1][shape][0]);
break;
default:
break;
}
// Nothing to do!!: Must indicate an error to user
if (table == NULL) return;
for (i = 0; i<MAX_SUBSET_SIZE; i++) {
int subset = table[i];
for (j = 0; j<dimension; j++) {
subsets[subset][count[subset]][j] = in[i][j];
}
if (dimension < MAX_DIMENSION_BIG) {
subsets[subset][count[subset]][j] = 0.0;
}
count[subset]++;
}
}
//=================================================================================================
void GetEndPoints(float EndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], float outB[MAX_SUBSETS][MAX_SUBSET_SIZE][MAX_DIMENSION_BIG], int max_subsets, int entryCount[MAX_SUBSETS]) {
// Should have some sort of error notification!
if (max_subsets > MAX_SUBSETS) return;
// Save Min and Max OutB points as EndPoints
for (int subset = 0; subset<max_subsets; subset++) {
// We now have points on direction vector(s)
// find the min and max points
float min = FLT_MAX;
float max = 0;
float val;
int mini = 0;
int maxi = 0;
for (int i = 0; i < entryCount[subset]; i++) {
val = outB[subset][i][0] + outB[subset][i][1] + outB[subset][i][2];
if (val < min) {
min = val;
mini = i;
}
if (val > max) {
max = val;
maxi = i;
}
}
// Is round best for this !
for (int c = 0; c < MAX_DIMENSION_BIG; c++) {
EndPoints[subset][0][c] = outB[subset][mini][c];
}
for (int c = 0; c < MAX_DIMENSION_BIG; c++) {
EndPoints[subset][1][c] = outB[subset][maxi][c];
}
}
}
void covariance_d(float data[][MAX_DIMENSION_BIG], int numEntries, float cov[MAX_DIMENSION_BIG][MAX_DIMENSION_BIG], int dimension) {
int i, j, k;
for (i = 0; i<dimension; i++)
for (j = 0; j <= i; j++) {
cov[i][j] = 0;
for (k = 0; k<numEntries; k++)
cov[i][j] += data[k][i] * data[k][j];
}
for (i = 0; i<dimension; i++)
for (j = i + 1; j<dimension; j++)
cov[i][j] = cov[j][i];
}
void centerInPlace_d(float data[][MAX_DIMENSION_BIG], int numEntries, float mean[MAX_DIMENSION_BIG], int dimension) {
int i, k;
for (i = 0; i<dimension; i++) {
mean[i] = 0;
for (k = 0; k<numEntries; k++)
mean[i] += data[k][i];
}
if (!numEntries)
return;
for (i = 0; i<dimension; i++) {
mean[i] /= (float)numEntries;
for (k = 0; k<numEntries; k++)
data[k][i] -= mean[i];
}
}
void eigenVector_d(float cov[MAX_DIMENSION_BIG][MAX_DIMENSION_BIG], float vector[MAX_DIMENSION_BIG], int dimension) {
// calculate an eigenvecto corresponding to a biggest eigenvalue
// will work for non-zero non-negative matricies only
#define EV_ITERATION_NUMBER 20
#define EV_SLACK 2 /* additive for exp base 2)*/
int i, j, k, l, m, n, p, q;
float c[2][MAX_DIMENSION_BIG][MAX_DIMENSION_BIG];
float maxDiag;
for (i = 0; i<dimension; i++)
for (j = 0; j<dimension; j++)
c[0][i][j] = cov[i][j];
p = (int)floorf(logf((HDR_FLT_MAX_EXP - EV_SLACK) / ceilf(logf((float)dimension) / logf(2.0f))) / logf(2.0f));
//assert(p>0);
p = p >0 ? p : 1;
q = (EV_ITERATION_NUMBER + p - 1) / p;
l = 0;
for (n = 0; n<q; n++) {
maxDiag = 0;
for (i = 0; i<dimension; i++)
maxDiag = c[l][i][i] > maxDiag ? c[l][i][i] : maxDiag;
if (maxDiag <= 0) {
return;
}
//assert(maxDiag >0);
for (i = 0; i<dimension; i++)
for (j = 0; j<dimension; j++)
c[l][i][j] /= maxDiag;
for (m = 0; m<p; m++) {
for (i = 0; i<dimension; i++)
for (j = 0; j<dimension; j++) {
float temp = 0;
for (k = 0; k<dimension; k++) {
// Notes:
// This is the most consuming portion of the code and needs optimizing for perfromance
temp += c[l][i][k] * c[l][k][j];
}
c[1 - l][i][j] = temp;
}
l = 1 - l;
}
}
maxDiag = 0;
k = 0;
for (i = 0; i<dimension; i++) {
k = c[l][i][i] > maxDiag ? i : k;
maxDiag = c[l][i][i] > maxDiag ? c[l][i][i] : maxDiag;
}
float t;
t = 0;
for (i = 0; i<dimension; i++) {
t += c[l][k][i] * c[l][k][i];
vector[i] = c[l][k][i];
}
// normalization is really optional
t = sqrtf(t);
//assert(t>0);
if (t <= 0) {
return;
}
for (i = 0; i<dimension; i++)
vector[i] /= t;
}
void project_d(float data[][MAX_DIMENSION_BIG], int numEntries, float vector[MAX_DIMENSION_BIG], float projection[MAX_ENTRIES], int dimension) {
// assume that vector is normalized already
int i, k;
for (k = 0; k<numEntries; k++) {
projection[k] = 0;
for (i = 0; i<dimension; i++) {
projection[k] += data[k][i] * vector[i];
}
}
}
typedef struct {
float d;
int i;
} a;
inline int a_compare(const void *arg1, const void *arg2) {
if (((a*)arg1)->d - ((a*)arg2)->d > 0) return 1;
if (((a*)arg1)->d - ((a*)arg2)->d < 0) return -1;
return 0;
};
void sortProjection(float projection[MAX_ENTRIES], int order[MAX_ENTRIES], int numEntries) {
int i;
a what[MAX_ENTRIES + MAX_PARTITIONS_TABLE];
for (i = 0; i < numEntries; i++)
what[what[i].i = i].d = projection[i];
qsort((void*)&what, numEntries, sizeof(a), a_compare);
for (i = 0; i < numEntries; i++)
order[i] = what[i].i;
};
float totalError_d(float data[MAX_ENTRIES][MAX_DIMENSION_BIG], float data2[MAX_ENTRIES][MAX_DIMENSION_BIG], int numEntries, int dimension) {
int i, j;
float t = 0;
for (i = 0; i<numEntries; i++)
for (j = 0; j<dimension; j++)
t += (data[i][j] - data2[i][j])*(data[i][j] - data2[i][j]);
return t;
};
// input:
//
// v_ points, might be uncentered
// k - number of points in the ramp
// n - number of points in v_
//
// output:
// index, uncentered, in the range 0..k-1
//
void quant_AnD_Shell(float* v_, int k, int n, int *idx) {
#define MAX_BLOCK MAX_ENTRIES
int i, j;
float v[MAX_BLOCK];
float z[MAX_BLOCK];
a d[MAX_BLOCK];
float l;
float mm;
float r = 0;
int mi;
//assert((v_ != NULL) && (n>1) && (k>1));
float m, M, s, dm = 0.;
m = M = v_[0];
for (i = 1; i < n; i++) {
m = m < v_[i] ? m : v_[i];
M = M > v_[i] ? M : v_[i];
}
if (M == m) {
for (i = 0; i < n; i++)
idx[i] = 0;
return;
}
//assert(M - m >0);
s = (k - 1) / (M - m);
for (i = 0; i < n; i++) {
v[i] = v_[i] * s;
idx[i] = (int)(z[i] = (v[i] + 0.5f /* stabilizer*/ - m *s)); //floorf(v[i] + 0.5f /* stabilizer*/ - m *s));
d[i].d = v[i] - z[i] - m *s;
d[i].i = i;
dm += d[i].d;
r += d[i].d*d[i].d;
}
if (n*r - dm*dm >= (float)(n - 1) / 4 /*slack*/ / 2) {
dm /= (float)n;
for (i = 0; i < n; i++)
d[i].d -= dm;
qsort((void*)&d, n, sizeof(a), a_compare);
// got into fundamental simplex
// move coordinate system origin to its center
for (i = 0; i < n; i++)
d[i].d -= (2.0f*(float)i + 1.0f - (float)n) / 2.0f / (float)n;
mm = l = 0.;
j = -1;
for (i = 0; i < n; i++) {
l += d[i].d;
if (l < mm) {
mm = l;
j = i;
}
}
// position which should be in 0
j = ++j % n;
for (i = j; i < n; i++)
idx[d[i].i]++;
}
// get rid of an offset in idx
mi = idx[0];
for (i = 1; i < n; i++)
mi = mi < idx[i] ? mi : idx[i];
for (i = 0; i < n; i++)
idx[i] -= mi;
}
float optQuantAnD_d(
float data[MAX_ENTRIES][MAX_DIMENSION_BIG],
int numEntries,
int numClusters,
int index[MAX_ENTRIES],
float out[MAX_ENTRIES][MAX_DIMENSION_BIG],
float direction[MAX_DIMENSION_BIG], float *step,
int dimension,
float quality
) {
int index_[MAX_ENTRIES];
int maxTry = (int)(MAX_TRY * quality);
int try_two = 50;
int i, j, k;
float t, s;
float centered[MAX_ENTRIES][MAX_DIMENSION_BIG];
float mean[MAX_DIMENSION_BIG];
float cov[MAX_DIMENSION_BIG][MAX_DIMENSION_BIG];
float projected[MAX_ENTRIES];
int order_[MAX_ENTRIES];
for (i = 0; i<numEntries; i++)
for (j = 0; j<dimension; j++)
centered[i][j] = data[i][j];
centerInPlace_d(centered, numEntries, mean, dimension);
covariance_d(centered, numEntries, cov, dimension);
// check if they all are the same
t = 0;
for (j = 0; j<dimension; j++)
t += cov[j][j];
if (numEntries == 0) {
for (i = 0; i<numEntries; i++) {
index[i] = 0;
for (j = 0; j<dimension; j++)
out[i][j] = mean[j];
}
return 0.;
}
eigenVector_d(cov, direction, dimension);
project_d(centered, numEntries, direction, projected, dimension);
for (i = 0; i<maxTry; i++) {
int done = 0;
if (i) {
do {
float q;
q = s = t = 0;
for (k = 0; k<numEntries; k++) {
s += index[k];
t += index[k] * index[k];
}
for (j = 0; j<dimension; j++) {
direction[j] = 0;
for (k = 0; k<numEntries; k++)
direction[j] += centered[k][j] * index[k];
q += direction[j] * direction[j];
}
s /= (float)numEntries;
t = t - s * s * (float)numEntries;
//assert(t != 0);
t = (t == 0.0f ? 0.0f : 1.0f / t);
// We need to requantize
q = sqrtf(q);
t *= q;
if (q != 0)
for (j = 0; j<dimension; j++)
direction[j] /= q;
// direction normalized
project_d(centered, numEntries, direction, projected, dimension);
sortProjection(projected, order_, numEntries);
int index__[MAX_ENTRIES];
// it's projected and centered; cluster centers are (index[i]-s)*t (*dir)
k = 0;
for (j = 0; j < numEntries; j++) {
while (projected[order_[j]] >(k + 0.5 - s)*t && k < numClusters - 1)
k++;
index__[order_[j]] = k;
}
done = 1;
for (j = 0; j < numEntries; j++) {
done = (done && (index__[j] == index[j]));
index[j] = index__[j];
}
} while (!done && try_two--);
if (i == 1)
for (j = 0; j < numEntries; j++)
index_[j] = index[j];
else {
done = 1;
for (j = 0; j < numEntries; j++) {
done = (done && (index_[j] == index[j]));
index_[j] = index_[j];
}
if (done)
break;
}
}
quant_AnD_Shell(projected, numClusters, numEntries, index);
}
s = t = 0;
float q = 0;
for (k = 0; k<numEntries; k++) {
s += index[k];
t += index[k] * index[k];
}
for (j = 0; j<dimension; j++) {
direction[j] = 0;
for (k = 0; k<numEntries; k++)
direction[j] += centered[k][j] * index[k];
q += direction[j] * direction[j];
}
s /= (float)numEntries;
t = t - s * s * (float)numEntries;
//assert(t != 0);
t = (t == 0.0 ? 0.0f : 1.0f / t);
for (i = 0; i<numEntries; i++)
for (j = 0; j<dimension; j++)
out[i][j] = mean[j] + direction[j] * t*(index[i] - s);
// normalize direction for output
q = sqrtf(q);
*step = t*q;
for (j = 0; j<dimension; j++)
direction[j] /= q;
return totalError_d(data, out, numEntries, dimension);
}
//=====================================================================================================================
#define LOG_CL_BASE 2
#define BIT_BASE 5
#define LOG_CL_RANGE 5
#define BIT_RANGE 9
#define MAX_CLUSTERS_BIG 16
#define BTT(bits) (bits-BIT_BASE)
#define CLT(cl) (cl-LOG_CL_BASE)
const float rampLerpWeights[5][16] = {
{ 0.0 }, // 0 bit index
{ 0.0, 1.0 }, // 1 bit index
{ 0.0, 21.0 / 64.0, 43.0 / 64.0, 1.0 }, // 2 bit index
{ 0.0, 9.0 / 64.0, 18.0 / 64.0, 27.0 / 64.0, 37.0 / 64.0, 46.0 / 64.0, 55.0 / 64.0, 1.0 }, // 3 bit index
{
0.0, 4.0 / 64.0, 9.0 / 64.0, 13.0 / 64.0, 17.0 / 64.0, 21.0 / 64.0, 26.0 / 64.0, 30.0 / 64.0,
34.0 / 64.0, 38.0 / 64.0, 43.0 / 64.0, 47.0 / 64.0, 51.0 / 64.0, 55.0 / 64.0, 60.0 / 64.0, 1.0
} // 4 bit index
};
float rampf(int clog, int bits, float p1, float p2, int indexPos) {
(bits);
// (clog+ LOG_CL_BASE) starts from 2 to 4
return (float)p1 + rampLerpWeights[clog + LOG_CL_BASE][indexPos] * (p2 - p1);
}
int all_same_d(float d[][MAX_DIMENSION_BIG], int n, int dimension) {
int i, j;
int same = 1;
for (i = 1; i< n; i++)
for (j = 0; j< dimension; j++)
same = same && (d[0][j] == d[i][j]);
return(same);
}
// return the max index from a set of indexes
int max_index(int a[], int n) {
int i, m = a[0];
for (i = 0; i< n; i++)
m = m > a[i] ? m : a[i];
return (m);
}
int cluster_mean_d_d(float d[MAX_ENTRIES][MAX_DIMENSION_BIG], float mean[MAX_ENTRIES][MAX_DIMENSION_BIG], int index[], int i_comp[], int i_cnt[], int n, int dimension) {
// unused index values are underfined
int i, j, k;
//assert(n!=0);
for (i = 0; i< n; i++)
for (j = 0; j< dimension; j++) {
// assert(index[i]<MAX_CLUSTERS_BIG);
mean[index[i]][j] = 0;
i_cnt[index[i]] = 0;
}
k = 0;
for (i = 0; i< n; i++) {
for (j = 0; j< dimension; j++)
mean[index[i]][j] += d[i][j];
if (i_cnt[index[i]] == 0)
i_comp[k++] = index[i];
i_cnt[index[i]]++;
}
for (i = 0; i< k; i++)
for (j = 0; j< dimension; j++)
mean[i_comp[i]][j] /= (float)i_cnt[i_comp[i]];
return k;
}
void mean_d_d(float d[][MAX_DIMENSION_BIG], float mean[MAX_DIMENSION_BIG], int n, int dimension) {
int i, j;
for (j = 0; j< dimension; j++)
mean[j] = 0;
for (i = 0; i< n; i++)
for (j = 0; j< dimension; j++)
mean[j] += d[i][j];
for (j = 0; j< dimension; j++)
mean[j] /= (float)n;
}
void index_collapse_kernel(int index[], int numEntries) {
int k;
int d, D;
int mi;
int Mi;
if (numEntries == 0)
return;
mi = Mi = index[0];
for (k = 1; k<numEntries; k++) {
mi = mi < index[k] ? mi : index[k];
Mi = Mi > index[k] ? Mi : index[k];
}
D = 1;
for (d = 2; d <= Mi - mi; d++) {
for (k = 0; k<numEntries; k++)
if ((index[k] - mi) % d != 0)
break;
if (k >= numEntries)
D = d;
}
for (k = 0; k<numEntries; k++)
index[k] = (index[k] - mi) / D;
}
//========================================================================================================================
//-------------------------------------------------------------------------------------------------------------------------
int max_i(int a[], int n) {
int i, m = a[0];
for (i = 0; i< n; i++)
m = m > a[i] ? m : a[i];
return (m);
}
int npv_nd[2][2 * MAX_DIMENSION_BIG] = {
{ 1,2,4,8,16,32,0,0 }, //dimension = 3
{ 1,2,4,0,0,0,0,0 } //dimension = 4
};
short par_vectors_nd[2][8][128][2][MAX_DIMENSION_BIG] = {
{
// Dimension = 3
{
{ { 0,0,0,0 },{ 0,0,0,0 } },
{ { 0,0,0,0 },{ 0,0,0,0 } }
},
// 3*n+1 BCC 3*n+1 Cartesian 3*n //same parity
{
// SAME_PAR
{ { 0,0,0 },{ 0,0,0 } },
{ { 1,1,1 },{ 1,1,1 } }
},
// 3*n+2 BCC 3*n+1 BCC 3*n+1
{
// BCC
{ { 0,0,0 },{ 0,0,0 } },
{ { 0,0,0 },{ 1,1,1 } },
{ { 1,1,1 },{ 0,0,0 } },
{ { 1,1,1 },{ 1,1,1 } }
},
// 3*n+3 FCC ??? // ??????
// BCC with FCC same or inverted, symmetric
{
// BCC_SAME_FCC
{ { 0,0,0 },{ 0,0,0 } },
{ { 1,1,0 },{ 1,1,0 } },
{ { 1,0,1 },{ 1,0,1 } },
{ { 0,1,1 },{ 0,1,1 } },
{ { 0,0,0 },{ 1,1,1 } },
{ { 1,1,1 },{ 0,0,0 } },
{ { 0,1,0 },{ 0,1,0 } }, // ??
{ { 1,1,1 },{ 1,1,1 } },
},
// 3*n+4 FCC 3*n+2 FCC 3*n+2
{
{ { 0,0,0 },{ 0,0,0 } },
{ { 1,1,0 },{ 0,0,0 } },
{ { 1,0,1 },{ 0,0,0 } },
{ { 0,1,1 },{ 0,0,0 } },
{ { 0,0,0 },{ 1,1,0 } },
{ { 1,1,0 },{ 1,1,0 } },
{ { 1,0,1 },{ 1,1,0 } },
{ { 0,1,1 },{ 1,1,0 } },
{ { 0,0,0 },{ 1,0,1 } },
{ { 1,1,0 },{ 1,0,1 } },
{ { 1,0,1 },{ 1,0,1 } },
{ { 0,1,1 },{ 1,0,1 } },
{ { 0,0,0 },{ 0,1,1 } },
{ { 1,1,0 },{ 0,1,1 } },
{ { 1,0,1 },{ 0,1,1 } },
{ { 0,1,1 },{ 0,1,1 } }
},
// 3*n+5 Cartesian 3*n+3 FCC 3*n+2 //D^*[6]
{
{ { 0,0,0 },{ 0,0,0 } },
{ { 1,1,0 },{ 0,0,0 } },
{ { 1,0,1 },{ 0,0,0 } },
{ { 0,1,1 },{ 0,0,0 } },
{ { 0,0,0 },{ 1,1,0 } },
{ { 1,1,0 },{ 1,1,0 } },
{ { 1,0,1 },{ 1,1,0 } },
{ { 0,1,1 },{ 1,1,0 } },
{ { 0,0,0 },{ 1,0,1 } },
{ { 1,1,0 },{ 1,0,1 } },
{ { 1,0,1 },{ 1,0,1 } },
{ { 0,1,1 },{ 1,0,1 } },
{ { 0,0,0 },{ 0,1,1 } },
{ { 1,1,0 },{ 0,1,1 } },
{ { 1,0,1 },{ 0,1,1 } },
{ { 0,1,1 },{ 0,1,1 } },
{ { 1,0,0 },{ 1,1,1 } },
{ { 0,1,0 },{ 1,1,1 } },
{ { 0,0,1 },{ 1,1,1 } },
{ { 1,1,1 },{ 1,1,1 } },
{ { 1,0,0 },{ 0,0,1 } },
{ { 0,1,0 },{ 0,0,1 } },
{ { 0,0,1 },{ 0,0,1 } },
{ { 1,1,1 },{ 0,0,1 } },
{ { 1,0,0 },{ 1,0,0 } },
{ { 0,1,0 },{ 1,0,0 } },
{ { 0,0,1 },{ 1,0,0 } },
{ { 1,1,1 },{ 1,0,0 } },
{ { 1,0,0 },{ 0,1,0 } },
{ { 0,1,0 },{ 0,1,0 } },
{ { 0,0,1 },{ 0,1,0 } },
{ { 1,1,1 },{ 0,1,0 } }
}
},// Dimension = 3
{
// Dimension = 4
{
{ { 0,0,0,0 },{ 0,0,0,0 } },
{ { 0,0,0,0 },{ 0,0,0,0 } }
},
// 3*n+1 BCC 3*n+1 Cartesian 3*n //same parity
{
// SAME_PAR
{ { 0,0,0,0 },{ 0,0,0,0 } },
{ { 1,1,1,1 },{ 1,1,1,1 } }
},
// 3*n+2 BCC 3*n+1 BCC 3*n+1
{
// BCC
{ { 0,0,0,0 },{ 0,0,0,0 } },
{ { 0,0,0,0 },{ 1,1,1,1 } },
{ { 1,1,1,1 },{ 0,0,0,0 } },
{ { 1,1,1,1 },{ 1,1,1,1 } }
},
// 3 PBIT
{
{ { 0,0,0,0 },{ 0,0,0,0 } },
{ { 0,0,0,0 },{ 0,1,1,1 } },
{ { 0,1,1,1 },{ 0,0,0,0 } },
{ { 0,1,1,1 },{ 0,1,1,1 } },
{ { 1,0,0,0 },{ 1,0,0,0 } },
{ { 1,0,0,0 },{ 1,1,1,1 } },
{ { 1,1,1,1 },{ 1,0,0,0 } },
{ { 1,1,1,1 },{ 1,1,1,1 } }
},
// 4 PBIT
{
{ { 0,0,0,0 },{ 0,0,0,0 } },
{ { 0,0,0,0 },{ 0,1,1,1 } },
{ { 0,1,1,1 },{ 0,0,0,0 } },
{ { 0,1,1,1 },{ 0,1,1,1 } },
{ { 1,0,0,0 },{ 1,0,0,0 } },
{ { 1,0,0,0 },{ 1,1,1,1 } },
{ { 1,1,1,1 },{ 1,0,0,0 } },
{ { 1,1,1,1 },{ 1,1,1,1 } },
{ { 0,0,0,0 },{ 0,0,0,0 } },
{ { 0,0,0,0 },{ 0,0,1,1 } },
{ { 0,0,1,1 },{ 0,0,0,0 } },
{ { 0,1,0,1 },{ 0,1,0,1 } },
{ { 1,0,0,0 },{ 1,0,0,0 } },
{ { 1,0,0,0 },{ 1,0,1,1 } },
{ { 1,0,1,1 },{ 1,0,0,0 } },
{ { 1,1,0,1 },{ 1,1,0,1 } },
},
} // Dimension = 4
};
int get_par_vector(int dim1, int dim2, int dim3, int dim4, int dim5) {
return par_vectors_nd[dim1][dim2][dim3][dim4][dim5];
}
float quant_single_point_d
(
float data[MAX_ENTRIES][MAX_DIMENSION_BIG],
int numEntries, int index[MAX_ENTRIES],
float out[MAX_ENTRIES][MAX_DIMENSION_BIG],
int epo_1[2][MAX_DIMENSION_BIG],
int Mi_, // last cluster
int bits[3], // including parity
int type,
int dimension // This should be either 3 or 4
) {
if (dimension < 3) return FLT_MAX;
int i, j;
float err_0 = FLT_MAX;
float err_1 = FLT_MAX;
int idx = 0;
int idx_1 = 0;
int epo_0[2][MAX_DIMENSION_BIG];
int use_par = (type != 0);
int clog = 0;
i = Mi_ + 1;
while (i >>= 1)
clog++;
// assert((1<<clog)== Mi_+1);
int pn;
for (pn = 0; pn<npv_nd[dimension - 3][type]; pn++) {
//1
int dim1 = dimension - 3;
int dim2 = type;
int dim3 = pn;
int o1[2][MAX_DIMENSION_BIG]; // = { 0,2 };
int o2[2][MAX_DIMENSION_BIG]; // = { 0,2 };
for (j = 0; j<dimension; j++) {
//A
o2[0][j] = o1[0][j] = 0;
o2[1][j] = o1[1][j] = 2;
if (use_par) {
if (get_par_vector(dim1, dim2, dim3, 0, j))
o1[0][j] = 1;
else
o1[1][j] = 1;
if (get_par_vector(dim1, dim2, dim3, 1, j))
o2[0][j] = 1;
else
o2[1][j] = 1;
}
} //A
int t1, t2;
int dr[MAX_DIMENSION_BIG];
int dr_0[MAX_DIMENSION_BIG];
//float tr;
for (i = 0; i< (1 << clog); i++) {
//E
float t = 0;
int t1o[MAX_DIMENSION_BIG], t2o[MAX_DIMENSION_BIG];
for (j = 0; j<dimension; j++) {
// D
float t_ = FLT_MAX;
for (t1 = o1[0][j]; t1<o1[1][j]; t1++) {
// C
for (t2 = o2[0][j]; t2<o2[1][j]; t2++)
// This is needed for non-integer mean points of "collapsed" sets
{
// B
#ifdef USE_RAMPS
int tf = (int)floorf(data[0][j]);
int tc = (int)ceilf(data[0][j]);
// if they are not equal, the same representalbe point is used for
// both of them, as all representable points are integers in the rage
if (sperr(tf, CLT(clog), BTT(bits[j]), t1, t2, i) > sperr(tc, CLT(clog), BTT(bits[j]), t1, t2, i))
dr[j] = tc;
else if (sperr(tf, CLT(clog), BTT(bits[j]), t1, t2, i) < sperr(tc, CLT(clog), BTT(bits[j]), t1, t2, i))
dr[j] = tf;
else
#endif
dr[j] = (int)floorf(data[0][j] + 0.5f);
#ifdef USE_RAMPS
tr = sperr(dr[j], CLT(clog), BTT(bits[j]), t1, t2, i) + 2.0f * sqrtf(sperr(dr[j], CLT(clog), BTT(bits[j]), t1, t2, i)) * fabsf((float)dr[j] - data[0][j]) +
(dr[j] - data[0][j])* (dr[j] - data[0][j]);
if (tr < t_) {
t_ = tr;
#else
t_ = 0;
#endif
t1o[j] = t1;
t2o[j] = t2;
dr_0[j] = dr[j];
#ifdef USE_RAMPS
if ((dr_0[j] < 0) || (dr_0[j] > 255)) {
dr_0[j] = 0; // Error!
}
}
#endif
} // B
} //C
t += t_;
} // D
if (t < err_0) {
idx = i;
for (j = 0; j<dimension; j++) {
#ifdef USE_RAMPS
int p1 = CLT(clog); // < 3
int p2 = BTT(bits[j]); // < 4
int in_data = dr_0[j]; // < SP_ERRIDX_MAX
int p4 = t1o[j]; // < 2
int p5 = t2o[j]; // < 2
int p6 = i; // < 16
// New spidx
epo_0[0][j] = spidx(in_data, p1, p2, p4, p5, p6, 0);
epo_0[1][j] = spidx(in_data, p1, p2, p4, p5, p6, 1);
if (epo_0[1][j] >= SP_ERRIDX_MAX) {
epo_0[1][j] = 0; // Error!!
}
#else
epo_0[0][j] = 0;
epo_0[1][j] = 0;
#endif
}
err_0 = t;
}
if (err_0 == 0)
break;
} // E
if (err_0 < err_1) {
idx_1 = idx;
for (j = 0; j<dimension; j++) {
epo_1[0][j] = epo_0[0][j];
epo_1[1][j] = epo_0[1][j];
}
err_1 = err_0;
}
if (err_1 == 0)
break;
} //1
for (i = 0; i< numEntries; i++) {
index[i] = idx_1;
for (j = 0; j<dimension; j++) {
int p1 = CLT(clog); // < 3
int p2 = BTT(bits[j]); // < 4
int p3 = epo_1[0][j]; // < SP_ERRIDX_MAX
int p4 = epo_1[1][j]; // < SP_ERRIDX_MAX
int p5 = idx_1; // < 16
#ifndef USE_NEWRAMP
out[i][j] = ramp[p1][p2][p3][p4][p5];
#else
#pragma warning( push )
#pragma warning(disable:4244)
out[i][j] = (int)rampf(p1, p2, p3, p4, p5);
#pragma warning( pop )
#endif
}
}
return err_1 * numEntries;
}
#define SP_ERRIDX_MAX 256
int expandbits_(int bits, int v) {
return (v << (8 - bits) | v >> (2 * bits - 8));
}
#ifndef USE_NEWRAMP
float ep_d[4][SP_ERRIDX_MAX];
float ramp[3][4][SP_ERRIDX_MAX][SP_ERRIDX_MAX][16];
#else
float ep_df(int bits, int p1) {
return (float)expandbits_(bits + BIT_BASE, p1);
}
float rampf(int clog, int bits, int p1, int p2, int i) {
// (clog+ LOG_CL_BASE) starts from 2 to 4
float ret = floorf((float)ep_df(bits, p1) + rampLerpWeights[clog + LOG_CL_BASE][i] * (float)((ep_df(bits, p2) - ep_df(bits, p1))) + 0.5F);
if (ret > SP_ERRIDX_MAX) return SP_ERRIDX_MAX - 1;
return ret;
}
#endif
#ifdef USE_RAMPS
int spidx(int in_data, int in_clog, int in_bits, int in_p2, int in_o1, int in_o2, int in_i) {
return sp_data[in_data].sp_idx[in_clog][in_bits][in_p2][in_o1][in_o2][in_i];
}
float sperr(int in_data, int clog, int bits, int p2, int o1, int o2) {
return sp_data[in_data].sp_err[clog][bits][p2][o1][o2];
}
#endif
void init_ramps() {
#ifdef USE_RAMPS
int clog, bits;
int in_data; // p1;
int p2;
int i;
int o1, o2;
if (g_init_ramps > 0) {
g_init_ramps++;
return;
}
mtx.lock();
// sp_datap = (SP_DATA **)malloc(SP_ERRIDX_MAX*sizeof(struct SP_DATA));
// assert(sp_datap);
// for (int i = 0; i < SP_ERRIDX_MAX; i++)
// {
// sp_datap[i] = (SP_DATA *)malloc(sizeof(struct SP_DATA));
// }
#ifndef USE_NEWRAMP
for (bits = BIT_BASE; bits < BIT_RANGE; bits++)
for (p1 = 0; p1 < (1 << bits); p1++) {
ep_d[BTT(bits)][p1] = (float)expandbits_(bits, p1);
}
for (clog = LOG_CL_BASE; clog < LOG_CL_RANGE; clog++)
for (bits = BIT_BASE; bits < BIT_RANGE; bits++)
for (p1 = 0; p1 < (1 << bits); p1++)
for (p2 = 0; p2 < (1 << bits); p2++) {
for (o1 = 0; o1 < (1 << clog); o1++) {
ramp[CLT(clog)][BTT(bits)][p1][p2][o1] =
floorf((float)ep_d[BTT(bits)][p1] + rampLerpWeights[clog][o1] * (float)((ep_d[BTT(bits)][p2] - ep_d[BTT(bits)][p1])) + 0.5F);
}
}
#endif
//-----------------------------------------------------------------------------
// Step 1
for (clog = LOG_CL_BASE; clog<LOG_CL_RANGE; clog++)
for (bits = BIT_BASE; bits<BIT_RANGE; bits++)
for (in_data = 0; in_data<SP_ERRIDX_MAX; in_data++)
for (o1 = 0; o1<2; o1++)
for (o2 = 0; o2<2; o2++)
for (i = 0; i<16; i++) {
sp_data[in_data].sp_err[CLT(clog)][BTT(bits)][o1][o2][i] = FLT_MAX;
sp_data[in_data].sp_idx[CLT(clog)][BTT(bits)][o1][o2][i][0] = -1;
}
// Step 2
for (clog = LOG_CL_BASE; clog<LOG_CL_RANGE; clog++)
for (bits = BIT_BASE; bits<BIT_RANGE; bits++)
for (in_data = 0; in_data<(1 << bits); in_data++)
for (p2 = 0; p2<(1 << bits); p2++)
for (i = 0; i<(1 << clog); i++) {
#ifndef USE_NEWRAMP
sp_data[(int)ramp[clog].sp_idx[clog]bits][bits][p1][p2][o1]][p1 & 0x1][p2 & 0x1][o1][0] = p1;
sp_data[(int)ramp[clog].sp_idx[clog]bits][bits][p1][p2][o1]][p1 & 0x1][p2 & 0x1][o1][1] = p2;
sp_data[(int)ramp[clog].sp_err[clog]bits][bits][p1][p2][o1]][p1 & 0x1][p2 & 0x1][o1] = 0.;
#else
int spd_i = (int)rampf(CLT(clog), BTT(bits), in_data, p2, o1);
if (spd_i > SP_ERRIDX_MAX) spd_i = SP_ERRIDX_MAX - 1;
sp_data[spd_i].sp_idx[CLT(clog)][BTT(bits)][in_data & 0x1][p2 & 0x1][o1][0] = in_data;
sp_data[spd_i].sp_idx[CLT(clog)][BTT(bits)][in_data & 0x1][p2 & 0x1][o1][1] = p2;
sp_data[spd_i].sp_err[CLT(clog)][BTT(bits)][in_data & 0x1][p2 & 0x1][o1] = 0.;
#endif
}
// Step 3
for (clog = LOG_CL_BASE; clog<LOG_CL_RANGE; clog++)
for (bits = BIT_BASE; bits<BIT_RANGE; bits++)
for (in_data = 0; in_data<SP_ERRIDX_MAX; in_data++)
for (o1 = 0; o1<2; o1++)
for (o2 = 0; o2<2; o2++)
for (i = 0; i<(1 << clog); i++)
if (sp_data[in_data].sp_idx[CLT(clog)][BTT(bits)][o1][o2][i][0]<0) {
int k;
for (k = 1; k<SP_ERRIDX_MAX; k++)
if ((in_data - k >= 0 && sp_data[in_data - k].sp_err[CLT(clog)][BTT(bits)][o1][o2][i] == 0) ||
(in_data + k < SP_ERRIDX_MAX && sp_data[in_data + k].sp_err[CLT(clog)][BTT(bits)][o1][o2][i] == 0))
break;
{
if ((in_data - k >= 0 && sp_data[in_data - k].sp_err[CLT(clog)][BTT(bits)][o1][o2][i] == 0)) {
sp_data[in_data].sp_idx[CLT(clog)][BTT(bits)][o1][o2][i][0] = sp_data[in_data - k].sp_idx[CLT(clog)][BTT(bits)][o1][o2][i][0];
sp_data[in_data].sp_idx[CLT(clog)][BTT(bits)][o1][o2][i][1] = sp_data[in_data - k].sp_idx[CLT(clog)][BTT(bits)][o1][o2][i][1];
//printf("sp_data[%2d].sp_idx[%2d][%2d][%2d][%2d][%2d][0] = (%d)\n", in_data, CLT(clog), BTT(bits), o1, o2, i, sp_data[in_data].sp_idx[CLT(clog)][BTT(bits)][o1][o2][i][0]);
//printf("sp_data[%2d].sp_idx[%2d][%2d][%2d][%2d][%2d][1] = (%d)\n", in_data, CLT(clog), BTT(bits), o1, o2, i, sp_data[in_data].sp_idx[CLT(clog)][BTT(bits)][o1][o2][i][1]);
} else if ((in_data + k < SP_ERRIDX_MAX && sp_data[in_data + k].sp_err[CLT(clog)][BTT(bits)][o1][o2][i] == 0)) {
sp_data[in_data].sp_idx[CLT(clog)][BTT(bits)][o1][o2][i][0] = sp_data[in_data + k].sp_idx[CLT(clog)][BTT(bits)][o1][o2][i][0];
sp_data[in_data].sp_idx[CLT(clog)][BTT(bits)][o1][o2][i][1] = sp_data[in_data + k].sp_idx[CLT(clog)][BTT(bits)][o1][o2][i][1];
//printf("sp_data[%2d].sp_idx[%2d][%2d][%2d][%2d][%2d][0] = %d\n", in_data, CLT(clog), BTT(bits), o1, o2, i, sp_data[in_data].sp_idx[CLT(clog)][BTT(bits)][o1][o2][i][0]);
//printf("sp_data[%2d].sp_idx[%2d][%2d][%2d][%2d][%2d][1] = %d\n", in_data, CLT(clog), BTT(bits), o1, o2, i, sp_data[in_data].sp_idx[CLT(clog)][BTT(bits)][o1][o2][i][1]);
}
sp_data[in_data].sp_err[CLT(clog)][BTT(bits)][o1][o2][i] = (float)k*k;
}
}
//for (clog = LOG_CL_BASE; clog<LOG_CL_RANGE; clog++)
// for (bits = BIT_BASE; bits<BIT_RANGE; bits++)
// for (in_data = 0; in_data<SP_ERRIDX_MAX; in_data++)
// for (o1 = 0; o1<2; o1++)
// for (o2 = 0; o2<2; o2++)
// for (i = 0; i<16; i++)
// {
// printf("sp_data[%2d].sp_idx[%2d][%2d][%2d][%2d][%2d][0] = %d\n", in_data,CLT(clog), BTT(bits),o1,o2,i, sp_data[in_data].sp_idx[CLT(clog)][BTT(bits)][o1][o2][i][0]);
// printf("sp_data[%2d].sp_idx[%2d][%2d][%2d][%2d][%2d][1] = %d\n", in_data,CLT(clog), BTT(bits),o1,o2,i, sp_data[in_data].sp_idx[CLT(clog)][BTT(bits)][o1][o2][i][1]);
// }
g_init_ramps++;
mtx.unlock();
#endif
}
void deinit_ramps() {
#ifdef USE_RAMPS
if (g_init_ramps > 1)
g_init_ramps--;
#endif
}
int ep_find_floor(float v, int bits, int use_par, int odd) {
#ifndef USE_NEWRAMP
float *p = ep_d[BTT(bits)];
#endif
int i1 = 0;
int i2 = 1 << (bits - use_par);
odd = use_par ? odd : 0;
while (i2 - i1>1) {
int j = (i1 + i2) / 2;
#ifndef USE_NEWRAMP
if (v >= p[(j << use_par) + odd])
#else
if (v >= ep_df(BTT(bits), (j << use_par) + odd))
#endif
i1 = j;
else
i2 = j;
}
return (i1 << use_par) + odd;
}
//based on code : ep_shaker_d in BC7 shaker
float ep_shaker_HD(
float data[MAX_ENTRIES][MAX_DIMENSION_BIG],
int numEntries,
int index_[MAX_ENTRIES],
float out[MAX_ENTRIES][MAX_DIMENSION_BIG],
int epo_code[2][MAX_DIMENSION_BIG],
int Mi_, // last cluster
int bits[3], // including parity
int dimension
) {
int i, j, k;
int use_par = 0;
int clog = 0;
i = Mi_ + 1;
while (i >>= 1)
clog++;
float mean[MAX_DIMENSION_BIG];
int index[MAX_ENTRIES];
int Mi;
int maxTry = 1;
for (k = 0; k < numEntries; k++) {
index[k] = index_[k];
}
int done;
int change;
int better;
float err_o = FLT_MAX;
float out_2[MAX_ENTRIES][MAX_DIMENSION_BIG];
int idx_2[MAX_ENTRIES];
int epo_2[2][MAX_DIMENSION_BIG];
int max_bits[MAX_DIMENSION_BIG];
int type = bits[0] % (2 * dimension);
for (j = 0; j < dimension; j++)
max_bits[j] = (bits[0] + 2 * dimension - 1) / (2 * dimension);
// handled below automatically
int alls = all_same_d(data, numEntries, dimension);
mean_d_d(data, mean, numEntries, dimension);
do {
index_collapse_kernel(index, numEntries);
Mi = max_index(index, numEntries); // index can be from requantizer
int p, q;
int p0 = -1, q0 = -1;
float err_2 = FLT_MAX;
if (Mi == 0) {
float t;
int epo_0[2][MAX_DIMENSION_BIG];
// either sinle point from the beginning or collapsed index
if (alls) {
t = quant_single_point_d(data, numEntries, index, out_2, epo_0, Mi_, bits, type, dimension);
} else {
quant_single_point_d(&mean, numEntries, index, out_2, epo_0, Mi_, bits, type, dimension);
t = totalError_d(data, out_2, numEntries, dimension);
}
if (t < err_o) {
for (k = 0; k<numEntries; k++) {
index_[k] = index[k];
for (j = 0; j<dimension; j++) {
out[k][j] = out_2[k][j];
epo_code[0][j] = epo_0[0][j];
epo_code[1][j] = epo_0[1][j];
}
};
err_o = t;
}
return err_o;
}
for (q = 1; Mi != 0 && q*Mi <= Mi_; q++) { // does not work for single point collapsed index!!!
for (p = 0; p <= Mi_ - q*Mi; p++) {
int cidx[MAX_ENTRIES];
for (k = 0; k<numEntries; k++) {
cidx[k] = index[k] * q + p;
}
float epa[2][MAX_DIMENSION_BIG];
//
// solve RMS problem for center
//
float im[2][2] = { { 0,0 },{ 0,0 } }; // matrix /inverse matrix
float rp[2][MAX_DIMENSION_BIG]; // right part for RMS fit problem
// get ideal clustr centers
float cc[MAX_CLUSTERS_BIG][MAX_DIMENSION_BIG];
int index_cnt[MAX_CLUSTERS_BIG]; // count of index entries
int index_comp[MAX_CLUSTERS_BIG]; // compacted index
int index_ncl; // number of unique indexes
index_ncl = cluster_mean_d_d(data, cc, cidx, index_comp, index_cnt, numEntries, dimension); // unrounded
for (i = 0; i<index_ncl; i++)
for (j = 0; j<dimension; j++)
cc[index_comp[i]][j] = (float)floorf(cc[index_comp[i]][j] + 0.5f); // more or less ideal location
for (j = 0; j<dimension; j++) {
rp[0][j] = rp[1][j] = 0;
}
// weight with cnt if runnning on compacted index
for (k = 0; k<numEntries; k++) {
im[0][0] += (Mi_ - cidx[k])* (Mi_ - cidx[k]);
im[0][1] += cidx[k] * (Mi_ - cidx[k]); // im is symmetric
im[1][1] += cidx[k] * cidx[k];
for (j = 0; j<dimension; j++) {
rp[0][j] += (Mi_ - cidx[k]) * cc[cidx[k]][j];
rp[1][j] += cidx[k] * cc[cidx[k]][j];
}
}
float dd = im[0][0] * im[1][1] - im[0][1] * im[0][1];
//assert(dd !=0);
// dd=0 means that cidx[k] and (Mi_-cidx[k]) collinear which implies only one active index;
// taken care of separately
im[1][0] = im[0][0];
im[0][0] = im[1][1] / dd;
im[1][1] = im[1][0] / dd;
im[1][0] = im[0][1] = -im[0][1] / dd;
for (j = 0; j<dimension; j++) {
epa[0][j] = (im[0][0] * rp[0][j] + im[0][1] * rp[1][j])*Mi_;
epa[1][j] = (im[1][0] * rp[0][j] + im[1][1] * rp[1][j])*Mi_;
}
float err_1 = FLT_MAX;
float out_1[MAX_ENTRIES][MAX_DIMENSION_BIG];
int idx_1[MAX_ENTRIES];
int epo_1[2][MAX_DIMENSION_BIG];
int s1 = 0;
float epd[2][MAX_DIMENSION_BIG][2]; // first second, coord, begin range end range
for (j = 0; j<dimension; j++) {
for (i = 0; i<2; i++) {
// set range
epd[i][j][0] = epd[i][j][1] = epa[i][j];
epd[i][j][1] += ((1 << bits[j]) - 1 - (int)epd[i][j][1] < (1 << use_par) ?
(1 << bits[j]) - 1 - (int)epd[i][j][1] : (1 << use_par)) & (~use_par);
}
}
float ce[MAX_ENTRIES][MAX_CLUSTERS_BIG][MAX_DIMENSION_BIG];
float err_0 = 0;
float out_0[MAX_ENTRIES][MAX_DIMENSION_BIG];
int idx_0[MAX_ENTRIES];
for (i = 0; i<numEntries; i++) {
float d[4];
d[0] = data[i][0];
d[1] = data[i][1];
d[2] = data[i][2];
d[3] = data[i][3];
for (j = 0; j<(1 << clog); j++)
for (k = 0; k < dimension; k++) {
ce[i][j][k] = (rampf(CLT(clog), BTT(bits[k]), epd[0][k][0], epd[1][k][0], j) - d[k])*
(rampf(CLT(clog), BTT(bits[k]), epd[0][k][0], epd[1][k][0], j) - d[k]);
}
}
int s = 0, p1, g;
int ei0 = 0, ei1 = 0;
for (p1 = 0; p1<64; p1++) {
int j0 = 0;
// Gray code increment
g = p1 & (-p1);
err_0 = 0;
for (j = 0; j<dimension; j++) {
if (((g >> (2 * j)) & 0x3) != 0) {
j0 = j;
// new cords
ei0 = (((s^g) >> (2 * j)) & 0x1);
ei1 = (((s^g) >> (2 * j + 1)) & 0x1);
}
}
s = s ^ g;
err_0 = 0;
for (i = 0; i<numEntries; i++) {
float d[4];
d[0] = data[i][0];
d[1] = data[i][1];
d[2] = data[i][2];
d[3] = data[i][3];
int ci = 0;
float cmin = FLT_MAX;
for (j = 0; j<(1 << clog); j++) {
float t_ = 0.;
ce[i][j][j0] = (rampf(CLT(clog), BTT(bits[j0]), epd[0][j0][ei0], epd[1][j0][ei1], j) - d[j0])*
(rampf(CLT(clog), BTT(bits[j0]), epd[0][j0][ei0], epd[1][j0][ei1], j) - d[j0]);
for (k = 0; k<dimension; k++) {
t_ += ce[i][j][k];
}
if (t_< cmin) {
cmin = t_;
ci = j;
}
}
idx_0[i] = ci;
for (k = 0; k<dimension; k++) {
out_0[i][k] = rampf(CLT(clog), BTT(bits[k]), epd[0][k][ei0], epd[1][k][ei1], ci);
}
err_0 += cmin;
}
if (err_0 < err_1) {
// best in the curent ep cube run
for (i = 0; i < numEntries; i++) {
idx_1[i] = idx_0[i];
for (j = 0; j<dimension; j++)
out_1[i][j] = out_0[i][j];
}
err_1 = err_0;
s1 = s; // epo coding
}
}
// reconstruct epo
for (j = 0; j<dimension; j++) {
{
// new cords
ei0 = ((s1 >> (2 * j)) & 0x1);
ei1 = ((s1 >> (2 * j + 1)) & 0x1);
epo_1[0][j] = (int)epd[0][j][ei0];
epo_1[1][j] = (int)epd[1][j][ei1];
}
}
if (err_1 < err_2) {
// best in the curent ep cube run
for (i = 0; i < numEntries; i++) {
idx_2[i] = idx_1[i];
for (j = 0; j<dimension; j++)
out_2[i][j] = out_1[i][j];
}
err_2 = err_1;
for (j = 0; j<dimension; j++) {
epo_2[0][j] = epo_1[0][j];
epo_2[1][j] = epo_1[1][j];
}
p0 = p;
q0 = q;
}
}
}
// change/better
change = 0;
for (k = 0; k<numEntries; k++)
change = change || (index[k] * q0 + p0 != idx_2[k]);
better = err_2 < err_o;
if (better) {
for (k = 0; k<numEntries; k++) {
index_[k] = index[k] = idx_2[k];
for (j = 0; j<dimension; j++) {
out[k][j] = out_2[k][j];
epo_code[0][j] = epo_2[0][j];
epo_code[1][j] = epo_2[1][j];
}
}
err_o = err_2;
}
done = !(change && better);
if (maxTry > 0) maxTry--;
else maxTry = 0;
} while (!done && maxTry);
return err_o;
}
float ep_shaker_2_d(
float data[MAX_ENTRIES][MAX_DIMENSION_BIG],
int numEntries,
int index_[MAX_ENTRIES],
float out[MAX_ENTRIES][MAX_DIMENSION_BIG],
int epo_code[2][MAX_DIMENSION_BIG],
int size,
int Mi_, // last cluster
int bits, // total for all channels
// defined by total numbe of bits and dimensioin
int dimension,
float epo[2][MAX_DIMENSION_BIG]
) {
if (dimension < 3) return FLT_MAX;
int i, j, k;
int max_bits[MAX_DIMENSION_BIG];
int type = bits % (2 * dimension);
int use_par = (type != 0);
for (j = 0; j < dimension; j++)
max_bits[j] = (bits + 2 * dimension - 1) / (2 * dimension);
int clog = 0;
i = Mi_ + 1;
while (i >>= 1)
clog++;
if (CLT(clog) > 3)
return FLT_MAX;
float mean[MAX_DIMENSION_BIG];
int index[MAX_ENTRIES];
int Mi;
int maxTry = 8;
for (k = 0; k < numEntries; k++) {
index[k] = index_[k];
}
int done;
int change;
int better;
float err_o = FLT_MAX;
int epo_0[2][MAX_DIMENSION_BIG];
float outg[MAX_ENTRIES][MAX_DIMENSION_BIG];
// handled below automatically
int alls = all_same_d(data, numEntries, dimension);
mean_d_d(data, mean, numEntries, dimension);
do {
index_collapse_kernel(index, numEntries);
Mi = max_i(index, numEntries); // index can be from requantizer
int p, q;
int p0 = -1, q0 = -1;
float err_0 = FLT_MAX;
if (Mi == 0) {
float t;
// either single point from the beginning or collapsed index
if (alls) {
t = quant_single_point_d(data, numEntries, index, outg, epo_0, Mi_, max_bits, type, dimension);
} else {
quant_single_point_d(&mean, numEntries, index, outg, epo_0, Mi_, max_bits, type, dimension);
t = totalError_d(data, outg, numEntries, dimension);
}
if (t < err_o) {
for (k = 0; k<numEntries; k++) {
index_[k] = index[k];
for (j = 0; j<dimension; j++) {
out[k][j] = outg[k][j];
epo_code[0][j] = epo_0[0][j];
epo_code[1][j] = epo_0[1][j];
}
};
err_o = t;
}
for (j = 0; j<dimension; j++) {
#ifndef USE_NEWRAMP
epo[0][j] = ramp[CLT(clog)][BTT(max_bits[j])][epo_code[0][j]][epo_code[1][j]][0];
epo[1][j] = ramp[CLT(clog)][BTT(max_bits[j])][epo_code[0][j]][epo_code[1][j]][(1 << clog) - 1];
#else
epo[0][j] = rampf(CLT(clog), BTT(max_bits[j]), epo_code[0][j], epo_code[1][j], 0);
epo[1][j] = rampf(CLT(clog), BTT(max_bits[j]), epo_code[0][j], epo_code[1][j], (1 << clog) - 1);
#endif
}
return err_o;
}
for (q = 1; Mi != 0 && q*Mi <= Mi_; q++) // does not work for single point collapsed index!!!
for (p = 0; p <= Mi_ - q*Mi; p++) {
int cidx[MAX_ENTRIES];
for (k = 0; k<numEntries; k++)
cidx[k] = index[k] * q + p;
float epa[2][MAX_DIMENSION_BIG];
//
// solve RMS problem for center
//
float im[2][2] = { { 0,0 },{ 0,0 } }; // matrix /inverse matrix
float rp[2][MAX_DIMENSION_BIG]; // right part for RMS fit problem
// get ideal clustr centers
float cc[MAX_CLUSTERS_BIG][MAX_DIMENSION_BIG];
int i_cnt[MAX_CLUSTERS_BIG]; // count of index entries
int i_comp[MAX_CLUSTERS_BIG]; // compacted index
int ncl; // number of unique indexes
ncl = cluster_mean_d_d(data, cc, cidx, i_comp, i_cnt, numEntries, dimension); // unrounded
// round
for (i = 0; i<ncl; i++)
for (j = 0; j<dimension; j++)
cc[i_comp[i]][j] = (float)floorf(cc[i_comp[i]][j] + 0.5F); // more or less ideal location
for (j = 0; j<dimension; j++)
rp[0][j] = rp[1][j] = 0;
// weight with cnt if runnning on compacted index
for (k = 0; k<numEntries; k++) {
im[0][0] += (Mi_ - cidx[k])* (Mi_ - cidx[k]);
im[0][1] += cidx[k] * (Mi_ - cidx[k]); // im is symmetric
im[1][1] += cidx[k] * cidx[k];
for (j = 0; j<dimension; j++) {
rp[0][j] += (Mi_ - cidx[k]) * cc[cidx[k]][j];
rp[1][j] += cidx[k] * cc[cidx[k]][j];
}
}
float dd = im[0][0] * im[1][1] - im[0][1] * im[0][1];
// dd=0 means that cidx[k] and (Mi_-cidx[k]) collinear which implies only one active index;
// taken care of separately
im[1][0] = im[0][0];
im[0][0] = im[1][1] / dd;
im[1][1] = im[1][0] / dd;
im[1][0] = im[0][1] = -im[0][1] / dd;
for (j = 0; j<dimension; j++) {
epa[0][j] = (im[0][0] * rp[0][j] + im[0][1] * rp[1][j])*Mi_;
epa[1][j] = (im[1][0] * rp[0][j] + im[1][1] * rp[1][j])*Mi_;
}
// shake single or - cartesian
// shake odd/odd and even/even or - same parity
// shake odd/odd odd/even , even/odd and even/even - bcc
float err_1 = FLT_MAX;
int epo_1[2][MAX_DIMENSION_BIG];
float ed[2][2][MAX_DIMENSION_BIG];
int epo_2_[2][2][2][MAX_DIMENSION_BIG];
for (j = 0; j<dimension; j++) {
#ifndef USE_NEWRAMP
float(*rb)[SP_ERRIDX_MAX][16] = ramp[CLT(clog)][BTT(max_bits[j])];
#endif
int pp[2] = { 0,0 };
int rr = (use_par ? 2 : 1);
int epi[2][2]; // first/second, coord, begin rage end range
for (pp[0] = 0; pp[0]<rr; pp[0]++) {
for (pp[1] = 0; pp[1]<rr; pp[1]++) {
for (i = 0; i<2; i++) { // set range
epi[i][0] = epi[i][1] = ep_find_floor(epa[i][j], max_bits[j], use_par, pp[i]);
epi[i][0] -= ((epi[i][0] < (size >> 1) - 1 ? epi[i][0] : (size >> 1) - 1)) & (~use_par);
epi[i][1] += ((1 << max_bits[j]) - 1 - epi[i][1] < (size >> 1) ?
(1 << max_bits[j]) - 1 - epi[i][1] : (size >> 1)) & (~use_par);
}
int p1, p2, step = (1 << use_par);
ed[pp[0]][pp[1]][j] = FLT_MAX;
for (p1 = epi[0][0]; p1 <= epi[0][1]; p1 += step)
for (p2 = epi[1][0]; p2 <= epi[1][1]; p2 += step) {
#ifndef USE_NEWRAMP
float *rbp = rb[p1][p2];
#endif
float t = 0;
int *ci = cidx;
int m = numEntries;
int _mc = m;
while (_mc > 0) {
#ifndef USE_NEWRAMP
t += (rbp[ci[_mc - 1]] - data[_mc - 1][j])
*(rbp[ci[_mc - 1]] - data[_mc - 1][j]);
#else
t += (rampf(CLT(clog), BTT(max_bits[j]), p1, p2, ci[_mc - 1]) - data[_mc - 1][j])
*(rampf(CLT(clog), BTT(max_bits[j]), p1, p2, ci[_mc - 1]) - data[_mc - 1][j]);
#endif
_mc--;
}
if (t<ed[pp[0]][pp[1]][j]) {
ed[pp[0]][pp[1]][j] = t;
epo_2_[pp[0]][pp[1]][0][j] = p1;
epo_2_[pp[0]][pp[1]][1][j] = p2;
}
}
} // pp[1]
} // pp[0]
} // j
int pn;
for (pn = 0; pn<npv_nd[dimension - 3][type]; pn++) {
int dim1 = dimension - 3;
int dim2 = type;
int dim3 = pn;
int j1;
float err_2 = 0;
for (j1 = 0; j1<dimension; j1++)
err_2 += ed[
get_par_vector(dim1, dim2, dim3, 0, j1)][get_par_vector(dim1, dim2, dim3, 1, j1)][j1];
if (err_2 < err_1) {
err_1 = err_2;
for (j1 = 0; j1<dimension; j1++) {
epo_1[0][j1] = epo_2_[get_par_vector(dim1, dim2, dim3, 0, j1)][get_par_vector(dim1, dim2, dim3, 1, j1)][0][j1];
epo_1[1][j1] = epo_2_[get_par_vector(dim1, dim2, dim3, 0, j1)][get_par_vector(dim1, dim2, dim3, 1, j1)][1][j1];
}
}
}
if (err_1 <= err_0) { // we'd want to get expanded index;
err_0 = err_1;
p0 = p;
q0 = q;
for (j = 0; j<dimension; j++) {
epo_0[0][j] = epo_1[0][j];
epo_0[1][j] = epo_1[1][j];
}
}
}
// requantize
#ifndef USE_NEWRAMP
float *r[MAX_DIMENSION_BIG];
#endif
int idg[MAX_ENTRIES];
float err_r = 0;
if (CLT(clog) > (LOG_CL_RANGE - LOG_CL_BASE))
return FLT_MAX;
for (int jj = 0; jj<dimension; jj++) {
if (BTT(max_bits[jj]) >(BIT_RANGE - BIT_BASE))
return FLT_MAX;
if ((epo_0[0][jj] > 255) || (epo_0[0][jj] < 0))
return FLT_MAX;
if ((epo_0[1][jj] > 255) || (epo_0[0][jj] < 0))
return FLT_MAX;
#ifndef USE_NEWRAMP
r[jj] = ramp[CLT(clog)][BTT(max_bits[jj])][epo_0[0][jj]][epo_0[1][jj]];
#endif
}
for (i = 0; i<numEntries; i++) {
float cmin = FLT_MAX;
int ci = 0;
float d[4];
d[0] = data[i][0];
d[1] = data[i][1];
d[2] = data[i][2];
d[3] = data[i][3];
for (j = 0; j < (1 << clog); j++) {
float t_ = 0.;
for (k = 0; k<dimension; k++) {
#ifndef USE_NEWRAMP
t_ += (r[k][j] - d[k])*
(r[k][j] - d[k]);
#else
t_ += (rampf(CLT(clog), BTT(max_bits[k]), epo_0[0][k], epo_0[1][k], j) - d[k])*
(rampf(CLT(clog), BTT(max_bits[k]), epo_0[0][k], epo_0[1][k], j) - d[k]);
#endif
}
if (t_<cmin) {
cmin = t_;
ci = j;
}
}
idg[i] = ci;
for (k = 0; k<dimension; k++) {
#ifndef USE_NEWRAMP
outg[i][k] = r[k][ci];
#else
outg[i][k] = rampf(CLT(clog), BTT(max_bits[k]), epo_0[0][k], epo_0[1][k], ci);
#endif
}
err_r += cmin;
}
change = 0;
for (k = 0; k<numEntries; k++)
change = change || (index[k] * q0 + p0 != idg[k]);
better = err_r < err_o;
if (better) {
for (k = 0; k<numEntries; k++) {
index_[k] = index[k] = idg[k];
for (j = 0; j<dimension; j++) {
out[k][j] = outg[k][j];
epo_code[0][j] = epo_0[0][j];
epo_code[1][j] = epo_0[1][j];
}
}
err_o = err_r;
}
done = !(change && better);
if (maxTry > 0) maxTry--;
else maxTry = 0;
} while (!done && maxTry);
for (j = 0; j<dimension; j++) {
#ifndef USE_NEWRAMP
epo[0][j] = ramp[CLT(clog)][BTT(max_bits[j])][epo_code[0][j]][epo_code[1][j]][0];
epo[1][j] = ramp[CLT(clog)][BTT(max_bits[j])][epo_code[0][j]][epo_code[1][j]][(1 << clog) - 1];
#else
epo[0][j] = rampf(CLT(clog), BTT(max_bits[j]), epo_code[0][j], epo_code[1][j], 0);
epo[1][j] = rampf(CLT(clog), BTT(max_bits[j]), epo_code[0][j], epo_code[1][j], (1 << clog) - 1);
#endif
}
return err_o;
}
}