877 lines
32 KiB
C++
877 lines
32 KiB
C++
//===============================================================================
|
|
// Copyright (c) 2007-2016 Advanced Micro Devices, Inc. All rights reserved.
|
|
// Copyright (c) 2004-2006 ATI Technologies Inc.
|
|
//===============================================================================
|
|
//
|
|
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
// of this software and associated documentation files(the "Software"), to deal
|
|
// in the Software without restriction, including without limitation the rights
|
|
// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
|
|
// copies of the Software, and to permit persons to whom the Software is
|
|
// furnished to do so, subject to the following conditions :
|
|
//
|
|
// The above copyright notice and this permission notice shall be included in
|
|
// all copies or substantial portions of the Software.
|
|
//
|
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
|
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
// THE SOFTWARE.
|
|
//
|
|
//
|
|
// File Name: Codec_DXTC.cpp
|
|
// Description: implementation of the CCodec_DXTC class
|
|
//
|
|
//////////////////////////////////////////////////////////////////////////////
|
|
|
|
#include "common.h"
|
|
#include "codec_dxtc.h"
|
|
#include "compressonatorxcodec.h"
|
|
#include "dxtc_v11_compress.h"
|
|
|
|
#include "common_def.h"
|
|
|
|
#ifdef _WIN32
|
|
#pragma warning(disable : 4201)
|
|
#endif
|
|
|
|
CodecError CCodec_DXTC::CompressAlphaBlock(CMP_BYTE alphaBlock[BLOCK_SIZE_4X4], CMP_DWORD compressedBlock[2]) {
|
|
BYTE nEndpoints[2][2];
|
|
BYTE nIndices[2][BLOCK_SIZE_4X4];
|
|
float fError8 = CompBlock1X(alphaBlock, BLOCK_SIZE_4X4, nEndpoints[0], nIndices[0], 8, false, m_bUseSSE2, 8, 0, true);
|
|
float fError6 = (fError8 == 0.f) ? FLT_MAX : CompBlock1X(alphaBlock, BLOCK_SIZE_4X4, nEndpoints[1], nIndices[1], 6, true, m_bUseSSE2, 8, 0, true);
|
|
if(fError8 <= fError6)
|
|
EncodeAlphaBlock(compressedBlock, nEndpoints[0], nIndices[0]);
|
|
else
|
|
EncodeAlphaBlock(compressedBlock, nEndpoints[1], nIndices[1]);
|
|
return CE_OK;
|
|
}
|
|
|
|
// //========================== CMP_CORE Code ===================================
|
|
// #define CMP_QUALITY2 0.601f
|
|
// #ifndef MAX_ERROR
|
|
// #define MAX_ERROR 128000.f
|
|
// #endif
|
|
// #ifndef GBL_SCH_STEP
|
|
// #define GBL_SCH_STEP_MXS 0.018f
|
|
// #define GBL_SCH_EXT_MXS 0.1f
|
|
// #define LCL_SCH_STEP_MXS 0.6f
|
|
// #define GBL_SCH_STEP_MXQ 0.0175f
|
|
// #define GBL_SCH_EXT_MXQ 0.154f
|
|
// #define LCL_SCH_STEP_MXQ 0.45f
|
|
//
|
|
// #define GBL_SCH_STEP GBL_SCH_STEP_MXS
|
|
// #define GBL_SCH_EXT GBL_SCH_EXT_MXS
|
|
// #define LCL_SCH_STEP LCL_SCH_STEP_MXS
|
|
// #endif
|
|
//
|
|
// #define SCH_STPS 3 // number of search steps to make at each end of interval
|
|
// static CMP_CONSTANT CGU_FLOAT sMvF[] = {0.f, -1.f, 1.f, -2.f, 2.f, -3.f, 3.f, -4.f, 4.f, -5.f, 5.f, -6.f, 6.f, -7.f, 7.f, -8.f, 8.f};
|
|
// #define MAX_POINTS 16
|
|
// #define NUM_ENDPOINTS 2
|
|
// #define CMP_ALPHA_RAMP 8
|
|
//
|
|
//
|
|
// static CGU_INT QSortFCmp(const void* Elem1, const void* Elem2)
|
|
// {
|
|
// CGU_INT ret = 0;
|
|
//
|
|
// if (*(CGU_FLOAT*)Elem1 < *(CGU_FLOAT*)Elem2)
|
|
// ret = -1;
|
|
// else if (*(CGU_FLOAT*)Elem1 > *(CGU_FLOAT*)Elem2)
|
|
// ret = 1;
|
|
// return ret;
|
|
// }
|
|
//
|
|
// static CGU_FLOAT cmp_getRampError(CGU_FLOAT _Blk[BLOCK_SIZE_4X4],
|
|
// CGU_FLOAT _Rpt[BLOCK_SIZE_4X4],
|
|
// CGU_FLOAT _maxerror,
|
|
// CGU_FLOAT _min_ex,
|
|
// CGU_FLOAT _max_ex,
|
|
// CGU_INT _NmbrClrs)
|
|
// { // Max 16
|
|
// CGU_INT i;
|
|
// CGU_FLOAT error = 0;
|
|
// const CGU_FLOAT step = (_max_ex - _min_ex) / 7; // (CGU_FLOAT)(dwNumPoints - 1);
|
|
// const CGU_FLOAT step_h = step * 0.5f;
|
|
// const CGU_FLOAT rstep = 1.0f / step;
|
|
//
|
|
// for (i = 0; i < _NmbrClrs; i++)
|
|
// {
|
|
// CGU_FLOAT v;
|
|
// // Work out which value in the block this select
|
|
// CGU_FLOAT del;
|
|
//
|
|
// if ((del = _Blk[i] - _min_ex) <= 0)
|
|
// v = _min_ex;
|
|
// else if (_Blk[i] - _max_ex >= 0)
|
|
// v = _max_ex;
|
|
// else
|
|
// v = (floor((del + step_h) * rstep) * step) + _min_ex;
|
|
//
|
|
// // And accumulate the error
|
|
// CGU_FLOAT del2 = (_Blk[i] - v);
|
|
// error += del2 * del2 * _Rpt[i];
|
|
//
|
|
// // if we've already lost to the previous step bail out
|
|
// if (_maxerror < error)
|
|
// {
|
|
// error = _maxerror;
|
|
// break;
|
|
// }
|
|
// }
|
|
// return error;
|
|
// }
|
|
//
|
|
//
|
|
// static CGU_FLOAT cmp_linearBlockRefine(CGU_FLOAT _Blk[BLOCK_SIZE_4X4],
|
|
// CGU_FLOAT _Rpt[BLOCK_SIZE_4X4],
|
|
// CGU_FLOAT _MaxError,
|
|
// CMP_INOUT CGU_FLOAT CMP_PTRINOUT _min_ex,
|
|
// CMP_INOUT CGU_FLOAT CMP_PTRINOUT _max_ex,
|
|
// CGU_FLOAT _m_step,
|
|
// CGU_FLOAT _min_bnd,
|
|
// CGU_FLOAT _max_bnd,
|
|
// CGU_INT _NmbrClrs)
|
|
// {
|
|
// // Start out assuming our endpoints are the min and max values we've
|
|
// // determined
|
|
//
|
|
// // Attempt a (simple) progressive refinement step to reduce noise in the
|
|
// // output image by trying to find a better overall match for the endpoints.
|
|
//
|
|
// CGU_FLOAT maxerror = _MaxError;
|
|
// CGU_FLOAT min_ex = CMP_PTRINOUT _min_ex;
|
|
// CGU_FLOAT max_ex = CMP_PTRINOUT _max_ex;
|
|
//
|
|
// CGU_INT mode, bestmode;
|
|
//
|
|
// do
|
|
// {
|
|
// CGU_FLOAT cr_min0 = min_ex;
|
|
// CGU_FLOAT cr_max0 = max_ex;
|
|
// for (bestmode = -1, mode = 0; mode < SCH_STPS * SCH_STPS; mode++)
|
|
// {
|
|
// // check each move (see sStep for direction)
|
|
// CGU_FLOAT cr_min = min_ex + _m_step * sMvF[mode / SCH_STPS];
|
|
// CGU_FLOAT cr_max = max_ex + _m_step * sMvF[mode % SCH_STPS];
|
|
//
|
|
// cr_min = max(cr_min, _min_bnd);
|
|
// cr_max = min(cr_max, _max_bnd);
|
|
//
|
|
// CGU_FLOAT error;
|
|
// error = cmp_getRampError(_Blk, _Rpt, maxerror, cr_min, cr_max, _NmbrClrs);
|
|
//
|
|
// if (error < maxerror)
|
|
// {
|
|
// maxerror = error;
|
|
// bestmode = mode;
|
|
// cr_min0 = cr_min;
|
|
// cr_max0 = cr_max;
|
|
// }
|
|
// }
|
|
//
|
|
// if (bestmode != -1)
|
|
// {
|
|
// // make move (see sStep for direction)
|
|
// min_ex = cr_min0;
|
|
// max_ex = cr_max0;
|
|
// }
|
|
// } while (bestmode != -1);
|
|
//
|
|
// CMP_PTRINOUT _min_ex = min_ex;
|
|
// CMP_PTRINOUT _max_ex = max_ex;
|
|
//
|
|
// return maxerror;
|
|
// }
|
|
//
|
|
// static CGU_Vec2i cmp_getLinearEndPoints(CGU_FLOAT _Blk[BLOCK_SIZE_4X4], CMP_IN CGU_FLOAT fquality, CMP_IN CGU_BOOL isSigned)
|
|
// {
|
|
// CGU_UINT32 i;
|
|
// CGU_Vec2i cmpMinMax;
|
|
//
|
|
// CGU_FLOAT scalePts = 1.0f; //isSigned ? 128.0f : 255.0f;
|
|
// CGU_FLOAT scaleOffset = isSigned ? 0.25f : 0.5f;
|
|
//
|
|
// //================================================================
|
|
// // Bounding Box
|
|
// // lowest quality calculation to get min and max value to use
|
|
// //================================================================
|
|
// if (fquality < CMP_QUALITY2)
|
|
// {
|
|
// cmpMinMax.x = _Blk[0];
|
|
// cmpMinMax.y = _Blk[0];
|
|
// for (i = 1; i < BLOCK_SIZE_4X4; ++i)
|
|
// {
|
|
// cmpMinMax.x = min(cmpMinMax.x, _Blk[i]);
|
|
// cmpMinMax.y = max(cmpMinMax.y, _Blk[i]);
|
|
// }
|
|
// return cmpMinMax;
|
|
// }
|
|
//
|
|
// //================================================================
|
|
// // Do more calculations to get the best min and max values to use
|
|
// //================================================================
|
|
// CGU_FLOAT Ramp[2];
|
|
//
|
|
// // Result defaults for SNORM or UNORM
|
|
// Ramp[0] = isSigned ? -1.0f : 0.0f;
|
|
// Ramp[1] = 1.0f;
|
|
//
|
|
// CGU_FLOAT afUniqueValues[BLOCK_SIZE_4X4];
|
|
// CGU_FLOAT afValueRepeats[BLOCK_SIZE_4X4];
|
|
// for (i = 0; i < BLOCK_SIZE_4X4; i++)
|
|
// afUniqueValues[i] = afValueRepeats[i] = 0.f;
|
|
//
|
|
// // For each unique value we compute the number of it appearances.
|
|
// CGU_FLOAT fBlk[BLOCK_SIZE_4X4];
|
|
//
|
|
// memcpy(fBlk, _Blk, BLOCK_SIZE_4X4 * sizeof(CGU_FLOAT));
|
|
// qsort((void*)fBlk, (size_t)BLOCK_SIZE_4X4, sizeof(CGU_FLOAT), QSortFCmp);
|
|
//
|
|
// CGU_FLOAT new_p = -2.0f;
|
|
//
|
|
// CGU_UINT32 dwUniqueValues = 0;
|
|
// afUniqueValues[0] = 0.0f;
|
|
// CGU_BOOL requiresCalculation = true;
|
|
//
|
|
// {
|
|
// // Ramp not fixed
|
|
// for (i = 0; i < BLOCK_SIZE_4X4; i++)
|
|
// {
|
|
// if (new_p != fBlk[i])
|
|
// {
|
|
// afUniqueValues[dwUniqueValues] = new_p = fBlk[i];
|
|
// afValueRepeats[dwUniqueValues] = 1.f;
|
|
// dwUniqueValues++;
|
|
// }
|
|
// else if (dwUniqueValues)
|
|
// afValueRepeats[dwUniqueValues - 1] += 1.f;
|
|
// }
|
|
//
|
|
// // if number of unique colors is less or eq 2, we've done
|
|
// if (dwUniqueValues <= 2)
|
|
// {
|
|
// Ramp[0] = floor(afUniqueValues[0] * scalePts + scaleOffset);
|
|
// if (dwUniqueValues == 1)
|
|
// Ramp[1] = Ramp[0] + 1.f;
|
|
// else
|
|
// Ramp[1] = floor(afUniqueValues[1] * scalePts + scaleOffset);
|
|
// requiresCalculation = false;
|
|
// }
|
|
// } // Ramp not fixed
|
|
//
|
|
// if (requiresCalculation)
|
|
// {
|
|
// CGU_FLOAT min_ex = afUniqueValues[0];
|
|
// CGU_FLOAT max_ex = afUniqueValues[dwUniqueValues - 1];
|
|
// CGU_FLOAT min_bnd = 0, max_bnd = 1.;
|
|
// CGU_FLOAT min_r = min_ex, max_r = max_ex;
|
|
// CGU_FLOAT gbl_l = 0, gbl_r = 0;
|
|
// CGU_FLOAT cntr = (min_r + max_r) / 2;
|
|
//
|
|
// CGU_FLOAT gbl_err = MAX_ERROR;
|
|
// // Trying to avoid unnecessary calculations. Heuristics: after some analisis
|
|
// // it appears that in integer case, if the input interval not more then 48
|
|
// // we won't get much better
|
|
// bool wantsSearch = !((max_ex - min_ex) <= (48.f / scalePts));
|
|
//
|
|
// if (wantsSearch)
|
|
// {
|
|
// // Search.
|
|
// // 1. take the vicinities of both low and high bound of the input
|
|
// // interval.
|
|
// // 2. setup some search step
|
|
// // 3. find the new low and high bound which provides an (sub) optimal
|
|
// // (infinite precision) clusterization.
|
|
// CGU_FLOAT gbl_llb = (min_bnd > min_r - GBL_SCH_EXT) ? min_bnd : min_r - GBL_SCH_EXT;
|
|
// CGU_FLOAT gbl_rrb = (max_bnd < max_r + GBL_SCH_EXT) ? max_bnd : max_r + GBL_SCH_EXT;
|
|
// CGU_FLOAT gbl_lrb = (cntr < min_r + GBL_SCH_EXT) ? cntr : min_r + GBL_SCH_EXT;
|
|
// CGU_FLOAT gbl_rlb = (cntr > max_r - GBL_SCH_EXT) ? cntr : max_r - GBL_SCH_EXT;
|
|
//
|
|
// for (CGU_FLOAT step_l = gbl_llb; step_l < gbl_lrb; step_l += GBL_SCH_STEP)
|
|
// {
|
|
// for (CGU_FLOAT step_r = gbl_rrb; gbl_rlb <= step_r; step_r -= GBL_SCH_STEP)
|
|
// {
|
|
// CGU_FLOAT sch_err;
|
|
// // an sse version is avaiable
|
|
// sch_err = cmp_getRampError(afUniqueValues, afValueRepeats, gbl_err, step_l, step_r, dwUniqueValues);
|
|
// if (sch_err < gbl_err)
|
|
// {
|
|
// gbl_err = sch_err;
|
|
// gbl_l = step_l;
|
|
// gbl_r = step_r;
|
|
// }
|
|
// }
|
|
// }
|
|
//
|
|
// min_r = gbl_l;
|
|
// max_r = gbl_r;
|
|
// } // want search
|
|
//
|
|
// // This is a refinement call. The function tries to make several small
|
|
// // stretches or squashes to minimize quantization error.
|
|
// CGU_FLOAT m_step = LCL_SCH_STEP / scalePts;
|
|
// cmp_linearBlockRefine(afUniqueValues, afValueRepeats, gbl_err, CMP_REFINOUT min_r, CMP_REFINOUT max_r, m_step, min_bnd, max_bnd, dwUniqueValues);
|
|
//
|
|
// min_ex = min_r;
|
|
// max_ex = max_r;
|
|
// max_ex *= scalePts;
|
|
// min_ex *= scalePts;
|
|
//
|
|
// Ramp[0] = floor(min_ex + scaleOffset);
|
|
// Ramp[1] = floor(max_ex + scaleOffset);
|
|
// }
|
|
//
|
|
// // Ensure that the two endpoints are not the same
|
|
// // This is legal but serves no need & can break some optimizations in the compressor
|
|
// if (Ramp[0] == Ramp[1])
|
|
// {
|
|
// if (Ramp[1] < scalePts)
|
|
// Ramp[1] = Ramp[1] + .1f;
|
|
// else if (Ramp[1] > 0.0f)
|
|
// Ramp[1] = Ramp[1] - .1f;
|
|
// }
|
|
//
|
|
// cmpMinMax.x = Ramp[0];
|
|
// cmpMinMax.y = Ramp[1];
|
|
//
|
|
// return cmpMinMax;
|
|
// }
|
|
|
|
|
|
|
|
static CGU_INT8 cmp_SNormFloatToSInt(CGU_FLOAT fsnorm)
|
|
{
|
|
if (isnan(fsnorm))
|
|
fsnorm = 0;
|
|
else if (fsnorm > 1)
|
|
fsnorm = 1; // Clamp to 1
|
|
else if (fsnorm < -1)
|
|
fsnorm = -1; // Clamp to -1
|
|
|
|
fsnorm = fsnorm * 127U;
|
|
|
|
// shift round up or down
|
|
if (fsnorm >= 0)
|
|
fsnorm += .5f;
|
|
else
|
|
fsnorm -= .5f;
|
|
|
|
CGU_INT8 res = static_cast<CGU_INT8>(fsnorm);
|
|
|
|
return (res);
|
|
}
|
|
|
|
static CGU_Vec2f cmp_OptimizeEndPoints(CGU_FLOAT* pPoints, CGU_INT8 cSteps, CGU_BOOL isSigned)
|
|
{
|
|
CGU_Vec2f fendpoints;
|
|
CGU_FLOAT MAX_VALUE = 1.0f;
|
|
CGU_FLOAT MIN_VALUE = isSigned ? -1.0f : 0.0f;
|
|
|
|
// Find Min and Max points, as starting point
|
|
CGU_FLOAT fX = MAX_VALUE;
|
|
CGU_FLOAT fY = MIN_VALUE;
|
|
|
|
if (8 == cSteps)
|
|
{
|
|
for (CGU_INT8 iPoint = 0; iPoint < BLOCK_SIZE_4X4; iPoint++)
|
|
{
|
|
if (pPoints[iPoint] < fX)
|
|
fX = pPoints[iPoint];
|
|
|
|
if (pPoints[iPoint] > fY)
|
|
fY = pPoints[iPoint];
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for (CGU_INT8 iPoint = 0; iPoint < BLOCK_SIZE_4X4; iPoint++)
|
|
{
|
|
if (pPoints[iPoint] < fX && pPoints[iPoint] > MIN_VALUE)
|
|
fX = pPoints[iPoint];
|
|
|
|
if (pPoints[iPoint] > fY && pPoints[iPoint] < MAX_VALUE)
|
|
fY = pPoints[iPoint];
|
|
}
|
|
|
|
if (fX == fY)
|
|
{
|
|
fY = MAX_VALUE;
|
|
}
|
|
}
|
|
|
|
//===================
|
|
// Use Newton Method
|
|
//===================
|
|
CGU_FLOAT cStepsDiv = static_cast<CGU_FLOAT>(cSteps - 1);
|
|
CGU_FLOAT pSteps[8];
|
|
CGU_FLOAT fc;
|
|
CGU_FLOAT fd;
|
|
|
|
for (CGU_UINT8 iIteration = 0; iIteration < 8; iIteration++)
|
|
{
|
|
// reach minimum threashold break
|
|
if ((fY - fX) < (1.0f / 256.0f))
|
|
break;
|
|
|
|
CGU_FLOAT fScale = cStepsDiv / (fY - fX);
|
|
|
|
// Calculate new steps
|
|
for (CGU_INT8 iStep = 0; iStep < cSteps; iStep++)
|
|
{
|
|
fc = (cStepsDiv - (CGU_FLOAT)iStep) / cStepsDiv;
|
|
fd = (CGU_FLOAT)iStep / cStepsDiv;
|
|
pSteps[iStep] = fc * fX + fd * fY;
|
|
}
|
|
|
|
if (6 == cSteps)
|
|
{
|
|
pSteps[6] = MIN_VALUE;
|
|
pSteps[7] = MAX_VALUE;
|
|
}
|
|
|
|
// Evaluate function, and derivatives
|
|
CGU_FLOAT dX = 0.0f;
|
|
CGU_FLOAT dY = 0.0f;
|
|
CGU_FLOAT d2X = 0.0f;
|
|
CGU_FLOAT d2Y = 0.0f;
|
|
|
|
for (CGU_UINT8 iPoint = 0; iPoint < BLOCK_SIZE_4X4; iPoint++)
|
|
{
|
|
float fDot = (pPoints[iPoint] - fX) * fScale;
|
|
|
|
CGU_INT8 iStep;
|
|
if (fDot <= 0.0f)
|
|
{
|
|
iStep = ((6 == cSteps) && (pPoints[iPoint] <= (fX + MIN_VALUE) * 0.5f)) ? 6u : 0u;
|
|
}
|
|
else if (fDot >= cStepsDiv)
|
|
{
|
|
iStep = ((6 == cSteps) && (pPoints[iPoint] >= (fY + MAX_VALUE) * 0.5f)) ? 7u : (cSteps - 1);
|
|
}
|
|
else
|
|
{
|
|
iStep = CGU_INT8(fDot + 0.5f);
|
|
}
|
|
|
|
// steps to improve quality
|
|
if (iStep < cSteps)
|
|
{
|
|
fc = (cStepsDiv - (CGU_FLOAT)iStep) / cStepsDiv;
|
|
fd = (CGU_FLOAT)iStep / cStepsDiv;
|
|
CGU_FLOAT fDiff = pSteps[iStep] - pPoints[iPoint];
|
|
dX += fc * fDiff;
|
|
d2X += fc * fc;
|
|
dY += fd * fDiff;
|
|
d2Y += fd * fd;
|
|
}
|
|
}
|
|
|
|
// Move endpoints
|
|
if (d2X > 0.0f)
|
|
fX -= dX / d2X;
|
|
|
|
if (d2Y > 0.0f)
|
|
fY -= dY / d2Y;
|
|
|
|
if (fX > fY)
|
|
{
|
|
float f = fX;
|
|
fX = fY;
|
|
fY = f;
|
|
}
|
|
|
|
if ((dX * dX < (1.0f / 64.0f)) && (dY * dY < (1.0f / 64.0f)))
|
|
break;
|
|
}
|
|
|
|
fendpoints.x = (fX < MIN_VALUE) ? MIN_VALUE : (fX > MAX_VALUE) ? MAX_VALUE : fX;
|
|
fendpoints.y = (fY < MIN_VALUE) ? MIN_VALUE : (fY > MAX_VALUE) ? MAX_VALUE : fY;
|
|
|
|
return fendpoints;
|
|
}
|
|
|
|
static CGU_Vec2i CMP_FindEndpointsAlphaBlockSnorm(CGU_FLOAT alphaBlockSnorm[])
|
|
{
|
|
|
|
//================================================================
|
|
// Bounding Box
|
|
// lowest quality calculation to get min and max value to use
|
|
//================================================================
|
|
CGU_Vec2f cmpMinMax;
|
|
cmpMinMax.x = alphaBlockSnorm[0];
|
|
cmpMinMax.y = alphaBlockSnorm[0];
|
|
|
|
for (CGU_UINT8 i = 0; i < BLOCK_SIZE_4X4; ++i)
|
|
{
|
|
if (alphaBlockSnorm[i] < cmpMinMax.x)
|
|
{
|
|
cmpMinMax.x = alphaBlockSnorm[i];
|
|
}
|
|
else if (alphaBlockSnorm[i] > cmpMinMax.y)
|
|
{
|
|
cmpMinMax.y = alphaBlockSnorm[i];
|
|
}
|
|
}
|
|
|
|
CGU_Vec2i endpoints;
|
|
CGU_Vec2f fendpoints;
|
|
|
|
// Are we done for lowest quality setting!
|
|
// CGU_FLOAT fquality = 1.0f;
|
|
//
|
|
// if (fquality < CMP_QUALITY2) {
|
|
// endpoints.x = (CGU_INT8)(cmpMinMax.x);
|
|
// endpoints.y = (CGU_INT8)(cmpMinMax.y);
|
|
// return endpoints;
|
|
// }
|
|
|
|
//================================================================
|
|
// Do more calculations to get the best min and max values to use
|
|
//================================================================
|
|
if ((-1.0f == cmpMinMax.x || 1.0f == cmpMinMax.y))
|
|
{
|
|
fendpoints = cmp_OptimizeEndPoints(alphaBlockSnorm, 6, true);
|
|
endpoints.x = cmp_SNormFloatToSInt(fendpoints.x);
|
|
endpoints.y = cmp_SNormFloatToSInt(fendpoints.y);
|
|
}
|
|
else
|
|
{
|
|
fendpoints = cmp_OptimizeEndPoints(alphaBlockSnorm, 8, true);
|
|
endpoints.x = cmp_SNormFloatToSInt(fendpoints.y);
|
|
endpoints.y = cmp_SNormFloatToSInt(fendpoints.x);
|
|
}
|
|
|
|
return endpoints;
|
|
}
|
|
|
|
static uint64_t cmp_getBlockPackedIndicesSNorm(CGU_Vec2f alphaMinMax, const float alphaBlockSnorm[], uint64_t data)
|
|
{
|
|
CGU_FLOAT alpha[8];
|
|
alpha[0] = alphaMinMax.x;
|
|
alpha[1] = alphaMinMax.y;
|
|
|
|
if (alphaMinMax.x > alphaMinMax.y)
|
|
{
|
|
// 8-alpha block: derive the other six alphas.
|
|
// Bit code 000 = alpha_0, 001 = alpha_1, others are interpolated.
|
|
alpha[2] = (alpha[0] * 6.0f + alpha[1]) / 7.0f;
|
|
alpha[3] = (alpha[0] * 5.0f + alpha[1] * 2.0f) / 7.0f;
|
|
alpha[4] = (alpha[0] * 4.0f + alpha[1] * 3.0f) / 7.0f;
|
|
alpha[5] = (alpha[0] * 3.0f + alpha[1] * 4.0f) / 7.0f;
|
|
alpha[6] = (alpha[0] * 2.0f + alpha[1] * 5.0f) / 7.0f;
|
|
alpha[7] = (alpha[0] + alpha[1] * 6.0f) / 7.0f;
|
|
}
|
|
else
|
|
{
|
|
// 6-alpha block.
|
|
// Bit code 000 = alpha_0, 001 = alpha_1, others are interpolated.
|
|
alpha[2] = (alpha[0] * 4.0f + alpha[1]) / 5.0f;
|
|
alpha[3] = (alpha[0] * 3.0f + alpha[1] * 2.0f) / 5.0f;
|
|
alpha[4] = (alpha[0] * 2.0f + alpha[1] * 3.0f) / 5.0f;
|
|
alpha[5] = (alpha[0] + alpha[1] * 4.0f) / 5.0f;
|
|
alpha[6] = -1.0f;
|
|
alpha[7] = 1.0f;
|
|
}
|
|
|
|
// Index all colors using best alpha value
|
|
for (CGU_UINT8 i = 0; i < BLOCK_SIZE_4X4; ++i)
|
|
{
|
|
CGU_UINT8 uBestIndex = 0;
|
|
CGU_FLOAT fBestDelta = CMP_FLOAT_MAX;
|
|
for (CGU_INT32 uIndex = 0; uIndex < 8; uIndex++)
|
|
{
|
|
CGU_FLOAT fCurrentDelta = fabsf(alpha[uIndex] - alphaBlockSnorm[i]);
|
|
if (fCurrentDelta < fBestDelta)
|
|
{
|
|
uBestIndex = (CGU_UINT8)(uIndex);
|
|
fBestDelta = fCurrentDelta;
|
|
}
|
|
}
|
|
|
|
data &= ~(uint64_t(0x07) << (3 * i + 16));
|
|
data |= (uint64_t(uBestIndex) << (3 * i + 16));
|
|
}
|
|
|
|
return data;
|
|
}
|
|
|
|
//=============================================================================
|
|
|
|
CodecError CCodec_DXTC::CompressAlphaBlockSNorm(CMP_FLOAT alphaBlockSnorm[BLOCK_SIZE_4X4], CMP_DWORD compressedBlock[2])
|
|
{
|
|
union
|
|
{
|
|
CMP_DWORD compressedBlock[2];
|
|
struct
|
|
{
|
|
int8_t red_0;
|
|
int8_t red_1;
|
|
uint8_t indices[6];
|
|
};
|
|
uint64_t data;
|
|
} BC4_Snorm_block;
|
|
|
|
BC4_Snorm_block.data = 0LL;
|
|
|
|
CGU_Vec2i reds;
|
|
reds = CMP_FindEndpointsAlphaBlockSnorm(alphaBlockSnorm);
|
|
|
|
BC4_Snorm_block.red_0 = reds.x & 0xFF;
|
|
BC4_Snorm_block.red_1 = reds.y & 0xFF;
|
|
|
|
// check low end boundaries
|
|
if (BC4_Snorm_block.red_0 == -128)
|
|
BC4_Snorm_block.red_0 = -127;
|
|
if (BC4_Snorm_block.red_1 == -128)
|
|
BC4_Snorm_block.red_1 = -127;
|
|
|
|
// Normalize signed int -128..127 to float -1..1
|
|
CGU_Vec2f alphaMinMax;
|
|
alphaMinMax.x = CGU_FLOAT(BC4_Snorm_block.red_0) / 127.0f;
|
|
alphaMinMax.y = CGU_FLOAT(BC4_Snorm_block.red_1) / 127.0f;
|
|
|
|
BC4_Snorm_block.data = cmp_getBlockPackedIndicesSNorm(alphaMinMax, alphaBlockSnorm, BC4_Snorm_block.data);
|
|
|
|
compressedBlock[0] = BC4_Snorm_block.compressedBlock[0];
|
|
compressedBlock[1] = BC4_Snorm_block.compressedBlock[1];
|
|
|
|
return CE_OK;
|
|
}
|
|
|
|
CodecError CCodec_DXTC::CompressAlphaBlock_Fast(CMP_BYTE alphaBlock[BLOCK_SIZE_4X4], CMP_DWORD compressedBlock[2]) {
|
|
#ifdef _WIN64
|
|
CompressAlphaBlock(alphaBlock, compressedBlock);
|
|
#else // !_WIN64
|
|
DXTCV11CompressAlphaBlock(alphaBlock, compressedBlock);
|
|
#endif
|
|
return CE_OK;
|
|
}
|
|
|
|
CodecError CCodec_DXTC::CompressAlphaBlock(CODECFLOAT alphaBlock[BLOCK_SIZE_4X4X4], CMP_DWORD compressedBlock[2]) {
|
|
BYTE nEndpoints[2][2];
|
|
BYTE nIndices[2][BLOCK_SIZE_4X4];
|
|
float fError8 = CompBlock1X(alphaBlock, BLOCK_SIZE_4X4, nEndpoints[0], nIndices[0], 8, false, m_bUseSSE2, 8, 0, true);
|
|
float fError6 = (fError8 == 0.f) ? FLT_MAX : CompBlock1X(alphaBlock, BLOCK_SIZE_4X4, nEndpoints[1], nIndices[1], 6, true, m_bUseSSE2, 8, 0, true);
|
|
if(fError8 <= fError6)
|
|
EncodeAlphaBlock(compressedBlock, nEndpoints[0], nIndices[0]);
|
|
else
|
|
EncodeAlphaBlock(compressedBlock, nEndpoints[1], nIndices[1]);
|
|
return CE_OK;
|
|
}
|
|
|
|
void CCodec_DXTC::EncodeAlphaBlock(CMP_DWORD compressedBlock[2], BYTE nEndpoints[2], BYTE nIndices[BLOCK_SIZE_4X4]) {
|
|
compressedBlock[0] = ((int)nEndpoints[0]) | (((int)nEndpoints[1])<<8);
|
|
compressedBlock[1] = 0;
|
|
|
|
for(int i = 0; i < BLOCK_SIZE_4X4; i++) {
|
|
if(i < 5)
|
|
compressedBlock[0] |= (nIndices[i] & 0x7) << (16 + (i * 3));
|
|
else if(i > 5)
|
|
compressedBlock[1] |= (nIndices[i] & 0x7) << (2 + (i-6) * 3);
|
|
else {
|
|
compressedBlock[0] |= (nIndices[i] & 0x1) << 31;
|
|
compressedBlock[1] |= (nIndices[i] & 0x6) >> 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
//
|
|
// This function decompresses a block
|
|
//
|
|
void CCodec_DXTC::DecompressAlphaBlock(CMP_BYTE alphaBlock[BLOCK_SIZE_4X4], CMP_DWORD compressedBlock[2]) {
|
|
CMP_BYTE alpha[8];
|
|
GetCompressedAlphaRamp(alpha, compressedBlock);
|
|
|
|
for(int i = 0; i < BLOCK_SIZE_4X4; i++) {
|
|
CMP_DWORD index;
|
|
if(i < 5)
|
|
index = (compressedBlock[0] & (0x7 << (16 + (i * 3)))) >> (16 + (i * 3));
|
|
else if(i > 5)
|
|
index = (compressedBlock[1] & (0x7 << (2 + (i-6)*3))) >> (2 + (i-6)*3);
|
|
else {
|
|
index = (compressedBlock[0] & 0x80000000) >> 31;
|
|
index |= (compressedBlock[1] & 0x3) << 1;
|
|
}
|
|
|
|
alphaBlock[i] = alpha[index];
|
|
}
|
|
}
|
|
|
|
//
|
|
// This function decompresses a signed block
|
|
//
|
|
void CCodec_DXTC::DecompressAlphaBlockInt8(CMP_SBYTE alphaBlock[BLOCK_SIZE_4X4], CMP_DWORD compressedBlock[2])
|
|
{
|
|
CMP_SBYTE alpha[8];
|
|
GetCompressedAlphaRampS(alpha, compressedBlock);
|
|
|
|
for (int i = 0; i < BLOCK_SIZE_4X4; i++)
|
|
{
|
|
CMP_DWORD index;
|
|
if (i < 5)
|
|
index = (compressedBlock[0] & (0x7 << (16 + (i * 3)))) >> (16 + (i * 3));
|
|
else if (i > 5)
|
|
index = (compressedBlock[1] & (0x7 << (2 + (i - 6) * 3))) >> (2 + (i - 6) * 3);
|
|
else
|
|
{
|
|
index = (compressedBlock[0] & 0x80000000) >> 31;
|
|
index |= (compressedBlock[1] & 0x3) << 1;
|
|
}
|
|
|
|
alphaBlock[i] = alpha[index];
|
|
}
|
|
}
|
|
|
|
void CCodec_DXTC::DecompressAlphaBlock(CODECFLOAT alphaBlock[BLOCK_SIZE_4X4], CMP_DWORD compressedBlock[2]) {
|
|
CODECFLOAT alpha[8];
|
|
GetCompressedAlphaRamp(alpha, compressedBlock);
|
|
|
|
for(int i = 0; i < BLOCK_SIZE_4X4; i++) {
|
|
CMP_DWORD index;
|
|
if(i < 5)
|
|
index = (compressedBlock[0] & (0x7 << (16 + (i * 3)))) >> (16 + (i * 3));
|
|
else if(i > 5)
|
|
index = (compressedBlock[1] & (0x7 << (2 + (i-6)*3))) >> (2 + (i-6)*3);
|
|
else {
|
|
index = (compressedBlock[0] & 0x80000000) >> 31;
|
|
index |= (compressedBlock[1] & 0x3) << 1;
|
|
}
|
|
|
|
alphaBlock[i] = alpha[index];
|
|
}
|
|
}
|
|
|
|
#define EXPLICIT_ALPHA_PIXEL_MASK 0xf
|
|
#define EXPLICIT_ALPHA_PIXEL_BPP 4
|
|
CodecError CCodec_DXTC::CompressExplicitAlphaBlock(CMP_BYTE alphaBlock[BLOCK_SIZE_4X4], CMP_DWORD compressedBlock[2]) {
|
|
DXTCV11CompressExplicitAlphaBlock(alphaBlock, compressedBlock);
|
|
|
|
return CE_OK;
|
|
}
|
|
|
|
CodecError CCodec_DXTC::CompressExplicitAlphaBlock_Fast(CMP_BYTE alphaBlock[BLOCK_SIZE_4X4], CMP_DWORD compressedBlock[2]) {
|
|
// Should remove or update this: DXTCV11CompressExplicitAlphaBlockMMX(alphaBlock, compressedBlock);
|
|
CompressExplicitAlphaBlock(alphaBlock, compressedBlock);
|
|
return CE_OK;
|
|
}
|
|
|
|
CodecError CCodec_DXTC::CompressExplicitAlphaBlock(CODECFLOAT alphaBlock[BLOCK_SIZE_4X4], CMP_DWORD compressedBlock[2]) {
|
|
compressedBlock[0] = compressedBlock[1] = 0;
|
|
for(int i=0; i<16; i++) {
|
|
int nBlock = i < 8 ? 0 : 1;
|
|
CMP_BYTE cAlpha = CONVERT_FLOAT_TO_BYTE(alphaBlock[i]);
|
|
cAlpha = (CMP_BYTE) ((cAlpha + ((cAlpha >> EXPLICIT_ALPHA_PIXEL_BPP) < 0x8 ? 7 : 8) - (cAlpha >> EXPLICIT_ALPHA_PIXEL_BPP)) >> EXPLICIT_ALPHA_PIXEL_BPP);
|
|
if(cAlpha > EXPLICIT_ALPHA_PIXEL_MASK)
|
|
cAlpha = EXPLICIT_ALPHA_PIXEL_MASK;
|
|
compressedBlock[nBlock] |= (cAlpha << ((i % 8) * EXPLICIT_ALPHA_PIXEL_BPP));
|
|
}
|
|
|
|
return CE_OK;
|
|
}
|
|
|
|
//
|
|
// This function decompresses an explicit alpha block (DXT3)
|
|
//
|
|
void CCodec_DXTC::DecompressExplicitAlphaBlock(CMP_BYTE alphaBlock[BLOCK_SIZE_4X4], CMP_DWORD compressedBlock[2]) {
|
|
for(int i=0; i<16; i++) {
|
|
int nBlock = i < 8 ? 0 : 1;
|
|
CMP_BYTE cAlpha = (CMP_BYTE) ((compressedBlock[nBlock] >> ((i % 8) * EXPLICIT_ALPHA_PIXEL_BPP)) & EXPLICIT_ALPHA_PIXEL_MASK);
|
|
alphaBlock[i] = (CMP_BYTE) ((cAlpha << EXPLICIT_ALPHA_PIXEL_BPP) | cAlpha);
|
|
}
|
|
}
|
|
|
|
void CCodec_DXTC::DecompressExplicitAlphaBlock(CODECFLOAT alphaBlock[BLOCK_SIZE_4X4], CMP_DWORD compressedBlock[2]) {
|
|
for(int i=0; i<16; i++) {
|
|
int nBlock = i < 8 ? 0 : 1;
|
|
CMP_BYTE cAlpha = (CMP_BYTE) ((compressedBlock[nBlock] >> ((i % 8) * EXPLICIT_ALPHA_PIXEL_BPP)) & EXPLICIT_ALPHA_PIXEL_MASK);
|
|
alphaBlock[i] = CONVERT_BYTE_TO_FLOAT((cAlpha << EXPLICIT_ALPHA_PIXEL_BPP) | cAlpha);
|
|
}
|
|
}
|
|
|
|
void CCodec_DXTC::GetCompressedAlphaRamp(CMP_BYTE alpha[8], CMP_DWORD compressedBlock[2]) {
|
|
alpha[0] = (CMP_BYTE)(compressedBlock[0] & 0xff);
|
|
alpha[1] = (CMP_BYTE)((compressedBlock[0]>>8) & 0xff);
|
|
|
|
if (alpha[0] > alpha[1]) {
|
|
// 8-alpha block: derive the other six alphas.
|
|
// Bit code 000 = alpha_0, 001 = alpha_1, others are interpolated.
|
|
alpha[2] = static_cast<CMP_BYTE>((6 * alpha[0] + 1 * alpha[1] + 3) / 7); // bit code 010
|
|
alpha[3] = static_cast<CMP_BYTE>((5 * alpha[0] + 2 * alpha[1] + 3) / 7); // bit code 011
|
|
alpha[4] = static_cast<CMP_BYTE>((4 * alpha[0] + 3 * alpha[1] + 3) / 7); // bit code 100
|
|
alpha[5] = static_cast<CMP_BYTE>((3 * alpha[0] + 4 * alpha[1] + 3) / 7); // bit code 101
|
|
alpha[6] = static_cast<CMP_BYTE>((2 * alpha[0] + 5 * alpha[1] + 3) / 7); // bit code 110
|
|
alpha[7] = static_cast<CMP_BYTE>((1 * alpha[0] + 6 * alpha[1] + 3) / 7); // bit code 111
|
|
} else {
|
|
// 6-alpha block.
|
|
// Bit code 000 = alpha_0, 001 = alpha_1, others are interpolated.
|
|
alpha[2] = static_cast<CMP_BYTE>((4 * alpha[0] + 1 * alpha[1] + 2) / 5); // Bit code 010
|
|
alpha[3] = static_cast<CMP_BYTE>((3 * alpha[0] + 2 * alpha[1] + 2) / 5); // Bit code 011
|
|
alpha[4] = static_cast<CMP_BYTE>((2 * alpha[0] + 3 * alpha[1] + 2) / 5); // Bit code 100
|
|
alpha[5] = static_cast<CMP_BYTE>((1 * alpha[0] + 4 * alpha[1] + 2) / 5); // Bit code 101
|
|
alpha[6] = 0; // Bit code 110
|
|
alpha[7] = 255; // Bit code 111
|
|
}
|
|
}
|
|
|
|
void CCodec_DXTC::GetCompressedAlphaRamp(CODECFLOAT alpha[8], CMP_DWORD compressedBlock[2]) {
|
|
alpha[0] = CONVERT_BYTE_TO_FLOAT(compressedBlock[0] & 0xff);
|
|
alpha[1] = CONVERT_BYTE_TO_FLOAT((compressedBlock[0]>>8) & 0xff);
|
|
|
|
if (alpha[0] > alpha[1]) {
|
|
// 8-alpha block: derive the other six alphas.
|
|
// Bit code 000 = alpha_0, 001 = alpha_1, others are interpolated.
|
|
alpha[2] = (6 * alpha[0] + 1 * alpha[1]) / 7; // bit code 010
|
|
alpha[3] = (5 * alpha[0] + 2 * alpha[1]) / 7; // bit code 011
|
|
alpha[4] = (4 * alpha[0] + 3 * alpha[1]) / 7; // bit code 100
|
|
alpha[5] = (3 * alpha[0] + 4 * alpha[1]) / 7; // bit code 101
|
|
alpha[6] = (2 * alpha[0] + 5 * alpha[1]) / 7; // bit code 110
|
|
alpha[7] = (1 * alpha[0] + 6 * alpha[1]) / 7; // bit code 111
|
|
} else {
|
|
// 6-alpha block.
|
|
// Bit code 000 = alpha_0, 001 = alpha_1, others are interpolated.
|
|
alpha[2] = (4 * alpha[0] + 1 * alpha[1]) / 5; // Bit code 010
|
|
alpha[3] = (3 * alpha[0] + 2 * alpha[1]) / 5; // Bit code 011
|
|
alpha[4] = (2 * alpha[0] + 3 * alpha[1]) / 5; // Bit code 100
|
|
alpha[5] = (1 * alpha[0] + 4 * alpha[1]) / 5; // Bit code 101
|
|
alpha[6] = 0; // Bit code 110
|
|
alpha[7] = 1.0f; // Bit code 111
|
|
}
|
|
}
|
|
|
|
void CCodec_DXTC::GetCompressedAlphaRampS(CMP_SBYTE alpha[8], CMP_DWORD compressedBlock[2])
|
|
{
|
|
alpha[0] = (CMP_SBYTE)(compressedBlock[0] & 0xff);
|
|
alpha[1] = (CMP_SBYTE)((compressedBlock[0] >> 8) & 0xff);
|
|
|
|
if (alpha[0] > alpha[1])
|
|
{
|
|
// 8-alpha block: derive the other six alphas.
|
|
// Bit code 000 = alpha_0, 001 = alpha_1, others are interpolated.
|
|
alpha[2] = static_cast<CMP_SBYTE>((6 * alpha[0] + 1 * alpha[1] + 3) / 7); // bit code 010
|
|
alpha[3] = static_cast<CMP_SBYTE>((5 * alpha[0] + 2 * alpha[1] + 3) / 7); // bit code 011
|
|
alpha[4] = static_cast<CMP_SBYTE>((4 * alpha[0] + 3 * alpha[1] + 3) / 7); // bit code 100
|
|
alpha[5] = static_cast<CMP_SBYTE>((3 * alpha[0] + 4 * alpha[1] + 3) / 7); // bit code 101
|
|
alpha[6] = static_cast<CMP_SBYTE>((2 * alpha[0] + 5 * alpha[1] + 3) / 7); // bit code 110
|
|
alpha[7] = static_cast<CMP_SBYTE>((1 * alpha[0] + 6 * alpha[1] + 3) / 7); // bit code 111
|
|
}
|
|
else
|
|
{
|
|
// 6-alpha block.
|
|
// Bit code 000 = alpha_0, 001 = alpha_1, others are interpolated.
|
|
alpha[2] = static_cast<CMP_SBYTE>((4 * alpha[0] + 1 * alpha[1] + 2) / 5); // Bit code 010
|
|
alpha[3] = static_cast<CMP_SBYTE>((3 * alpha[0] + 2 * alpha[1] + 2) / 5); // Bit code 011
|
|
alpha[4] = static_cast<CMP_SBYTE>((2 * alpha[0] + 3 * alpha[1] + 2) / 5); // Bit code 100
|
|
alpha[5] = static_cast<CMP_SBYTE>((1 * alpha[0] + 4 * alpha[1] + 2) / 5); // Bit code 101
|
|
alpha[6] = -127; // Bit code 110
|
|
alpha[7] = 127; // Bit code 111
|
|
}
|
|
}
|
|
|
|
|
|
|