1451 lines
56 KiB
C++
1451 lines
56 KiB
C++
//===============================================================================
|
|
// Copyright (c) 2007-2016 Advanced Micro Devices, Inc. All rights reserved.
|
|
// Copyright (c) 2004-2006 ATI Technologies Inc.
|
|
//===============================================================================
|
|
//
|
|
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
// of this software and associated documentation files(the "Software"), to deal
|
|
// in the Software without restriction, including without limitation the rights
|
|
// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
|
|
// copies of the Software, and to permit persons to whom the Software is
|
|
// furnished to do so, subject to the following conditions :
|
|
//
|
|
// The above copyright notice and this permission notice shall be included in
|
|
// all copies or substantial portions of the Software.
|
|
//
|
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
|
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
// THE SOFTWARE.
|
|
//
|
|
//
|
|
// BC7_Encode.cpp : A reference encoder for BC7
|
|
//
|
|
|
|
#include <assert.h>
|
|
#include <float.h>
|
|
#include <stdio.h>
|
|
#include <math.h>
|
|
#include "common.h"
|
|
#include "bc7_definitions.h"
|
|
#include "bc7_partitions.h"
|
|
#include "bc7_encode.h"
|
|
#include "bc7_utils.h"
|
|
#include "3dquant_vpc.h"
|
|
#include "shake.h"
|
|
#include "debug.h"
|
|
|
|
//#ifdef USE_CMP_CORE_API
|
|
//#include "bcn_common_kernel.h"
|
|
//#include "bcn_common_api.h"
|
|
//#include "bc7_encode_kernel.h"
|
|
//#endif
|
|
|
|
#ifdef BC7_COMPDEBUGGER
|
|
#include "compclient.h"
|
|
#endif
|
|
|
|
#ifdef USE_FILEIO
|
|
#include <stdio.h>
|
|
extern FILE * bc7_File;
|
|
#endif
|
|
|
|
// Threshold quality below which we will always run fast quality and shaking
|
|
// Selfnote: User should be able to set this?
|
|
// Default FQuality is at 0.1 < g_qFAST_THRESHOLD which will cause the SingleIndex compression to start skipping shape blocks
|
|
// during compression
|
|
// if user sets a value above this then all shapes will be used for compression scan for quality
|
|
double g_qFAST_THRESHOLD = 0.5;
|
|
|
|
// This limit is used for DualIndex Block and if fQuality is above this limit then Quantization shaking will always be performed
|
|
// on all indexs
|
|
double g_HIGHQULITY_THRESHOLD = 0.7;
|
|
//
|
|
// For a given block mode this sets up the data needed by the compressor
|
|
//
|
|
// Note that BC7 only uses NO_PBIT, ONE_PBIT and TWO_PBIT encodings
|
|
// for endpoints
|
|
//
|
|
|
|
void BC7BlockEncoder::BlockSetup(CMP_DWORD blockMode) {
|
|
#ifdef USE_DBGTRACE
|
|
DbgTrace(());
|
|
#endif
|
|
switch(bti_cpu[blockMode].pBitType) {
|
|
case NO_PBIT:
|
|
m_parityBits = CART;
|
|
break;
|
|
case ONE_PBIT:
|
|
m_parityBits = SAME_PAR;
|
|
break;
|
|
case TWO_PBIT:
|
|
m_parityBits = BCC;
|
|
break;
|
|
case THREE_PBIT:
|
|
m_parityBits = SAME_FCC;
|
|
break;
|
|
case FOUR_PBIT:
|
|
m_parityBits = FCC;
|
|
break;
|
|
case FIVE_PBIT:
|
|
m_parityBits = FCC_SAME_BCC;
|
|
break;
|
|
}
|
|
|
|
if(bti_cpu[blockMode].encodingType == NO_ALPHA) {
|
|
m_componentBits[COMP_RED] = bti_cpu[blockMode].vectorBits/3;
|
|
m_componentBits[COMP_GREEN] = bti_cpu[blockMode].vectorBits/3;
|
|
m_componentBits[COMP_BLUE] = bti_cpu[blockMode].vectorBits/3;
|
|
m_componentBits[COMP_ALPHA] = 0;
|
|
|
|
m_clusters[0] = 1 << bti_cpu[blockMode].indexBits[0];
|
|
m_clusters[1] = 0;
|
|
} else if(bti_cpu[blockMode].encodingType == COMBINED_ALPHA) {
|
|
m_componentBits[COMP_RED] = bti_cpu[blockMode].vectorBits/4;
|
|
m_componentBits[COMP_GREEN] = bti_cpu[blockMode].vectorBits/4;
|
|
m_componentBits[COMP_BLUE] = bti_cpu[blockMode].vectorBits/4;
|
|
m_componentBits[COMP_ALPHA] = bti_cpu[blockMode].vectorBits/4;
|
|
|
|
m_clusters[0] = 1 << bti_cpu[blockMode].indexBits[0];
|
|
m_clusters[1] = 0;
|
|
} else if(bti_cpu[blockMode].encodingType == SEPARATE_ALPHA) {
|
|
m_componentBits[COMP_RED] = bti_cpu[blockMode].vectorBits/3;
|
|
m_componentBits[COMP_GREEN] = bti_cpu[blockMode].vectorBits/3;
|
|
m_componentBits[COMP_BLUE] = bti_cpu[blockMode].vectorBits/3;
|
|
m_componentBits[COMP_ALPHA] = bti_cpu[blockMode].scalarBits;
|
|
|
|
m_clusters[0] = 1 << bti_cpu[blockMode].indexBits[0];
|
|
m_clusters[1] = 1 << bti_cpu[blockMode].indexBits[1];
|
|
}
|
|
}
|
|
|
|
//
|
|
// This function sorts out the bit encoding for the BC7 block and packs everything
|
|
// in the right order for the hardware decoder
|
|
//
|
|
//
|
|
//
|
|
|
|
void BC7BlockEncoder::EncodeSingleIndexBlock(CMP_DWORD blockMode,
|
|
CMP_DWORD partition,
|
|
CMP_DWORD colour[MAX_SUBSETS][2],
|
|
int indices[MAX_SUBSETS][MAX_SUBSET_SIZE],
|
|
//CMP_DWORD entryCount[MAX_SUBSETS],
|
|
CMP_BYTE block[COMPRESSED_BLOCK_SIZE]) {
|
|
#ifdef USE_DBGTRACE
|
|
DbgTrace(("-> WriteBit()"));
|
|
#endif
|
|
CMP_DWORD i,j,k;
|
|
CMP_DWORD *partitionTable;
|
|
int bitPosition = 0; // Position the pointer at the LSB
|
|
CMP_BYTE *basePtr = (CMP_BYTE*)block;
|
|
CMP_DWORD blockIndices[MAX_SUBSET_SIZE];
|
|
|
|
// Generate Unary header
|
|
for(i=0; i < (int)blockMode; i++) {
|
|
WriteBit(basePtr, bitPosition++, 0);
|
|
}
|
|
WriteBit(basePtr, bitPosition++, 1);
|
|
|
|
// Write partition bits
|
|
for(i=0; i<bti_cpu[blockMode].partitionBits; i++) {
|
|
WriteBit(basePtr, bitPosition++, (CMP_BYTE)(partition>>i) & 0x1);
|
|
}
|
|
|
|
// Extract the index bits from the partitions
|
|
partitionTable = (CMP_DWORD*)BC7_PARTITIONS_CPU[bti_cpu[blockMode].subsetCount-1][partition];
|
|
|
|
CMP_DWORD idxCount[3] = {0, 0, 0};
|
|
bool flipColours[3] = {false, false, false};
|
|
|
|
// Sort out the index set and tag whether we need to flip the
|
|
// endpoints to get the correct state in the implicit index bits
|
|
// The implicitly encoded MSB of the fixup index must be 0
|
|
CMP_DWORD fixup[3] = {0, 0, 0};
|
|
switch(bti_cpu[blockMode].subsetCount) {
|
|
case 3:
|
|
fixup[1] = BC7_FIXUPINDICES[2][partition][1];
|
|
fixup[2] = BC7_FIXUPINDICES[2][partition][2];
|
|
break;
|
|
case 2:
|
|
fixup[1] = BC7_FIXUPINDICES[1][partition][1];
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
// Extract indices and mark subsets that need to have their colours flipped to get the
|
|
// right state for the implicit MSB of the fixup index
|
|
for(i=0; i < MAX_SUBSET_SIZE; i++) {
|
|
CMP_DWORD p = partitionTable[i];
|
|
blockIndices[i] = indices[p][idxCount[p]++];
|
|
|
|
for(j=0; j<(int)bti_cpu[blockMode].subsetCount; j++) {
|
|
if(i==fixup[j]) {
|
|
if(blockIndices[i] & (1<<(bti_cpu[blockMode].indexBits[0]-1))) {
|
|
flipColours[j] = true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Now we must flip the endpoints where necessary so that the implicitly encoded
|
|
// index bits have the correct state
|
|
for(i=0; i<(int)bti_cpu[blockMode].subsetCount; i++) {
|
|
if(flipColours[i]) {
|
|
CMP_DWORD temp;
|
|
temp = colour[i][0];
|
|
colour[i][0] = colour[i][1];
|
|
colour[i][1] = temp;
|
|
}
|
|
}
|
|
|
|
// ...next flip the indices where necessary
|
|
for(i=0; i<MAX_SUBSET_SIZE; i++) {
|
|
CMP_DWORD p = partitionTable[i];
|
|
if(flipColours[p]) {
|
|
blockIndices[i] = ((1 << bti_cpu[blockMode].indexBits[0]) - 1) - blockIndices[i];
|
|
}
|
|
}
|
|
|
|
CMP_DWORD subset, ep, component;
|
|
|
|
// Endpoints are stored in the following order RRRR GGGG BBBB (AAAA) (PPPP)
|
|
// i.e. components are packed together
|
|
CMP_DWORD unpackedColours[MAX_SUBSETS][2][MAX_DIMENSION_BIG];
|
|
CMP_DWORD parityBits[MAX_SUBSETS][2];
|
|
|
|
// Unpack the colour values for the subsets
|
|
for(i=0; i<bti_cpu[blockMode].subsetCount; i++) {
|
|
CMP_DWORD packedColours[2] = {colour[i][0],
|
|
colour[i][1]
|
|
};
|
|
|
|
if(bti_cpu[blockMode].pBitType == TWO_PBIT) {
|
|
parityBits[i][0] = packedColours[0] & 1;
|
|
parityBits[i][1] = packedColours[1] & 1;
|
|
packedColours[0] >>= 1;
|
|
packedColours[1] >>= 1;
|
|
} else if(bti_cpu[blockMode].pBitType == ONE_PBIT) {
|
|
parityBits[i][0] = packedColours[1] & 1;
|
|
parityBits[i][1] = packedColours[1] & 1;
|
|
packedColours[0] >>= 1;
|
|
packedColours[1] >>= 1;
|
|
} else {
|
|
parityBits[i][0] = 0;
|
|
parityBits[i][1] = 0;
|
|
}
|
|
|
|
CMP_DWORD component1;
|
|
for(component1=0; component1<MAX_DIMENSION_BIG; component1++) {
|
|
if(m_componentBits[component1]) {
|
|
unpackedColours[i][0][component1] = packedColours[0] & ((1 << m_componentBits[component1]) - 1);
|
|
unpackedColours[i][1][component1] = packedColours[1] & ((1 << m_componentBits[component1]) - 1);
|
|
packedColours[0] >>= m_componentBits[component1];
|
|
packedColours[1] >>= m_componentBits[component1];
|
|
}
|
|
}
|
|
}
|
|
|
|
// Loop over components
|
|
for(component=0; component < MAX_DIMENSION_BIG; component++) {
|
|
// loop over subsets
|
|
for(subset=0; subset<(int)bti_cpu[blockMode].subsetCount; subset++) {
|
|
// Loop over endpoints and write colour bits
|
|
for(ep=0; ep<2; ep++) {
|
|
// Write this component
|
|
for(k = 0; k < m_componentBits[component]; k++) {
|
|
WriteBit(basePtr,
|
|
bitPosition++,
|
|
(CMP_BYTE)(unpackedColours[subset][ep][component] >> k) & 0x1);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Now write parity bits if present
|
|
if(bti_cpu[blockMode].pBitType != NO_PBIT) {
|
|
for(subset=0; subset<(int)bti_cpu[blockMode].subsetCount; subset++) {
|
|
if(bti_cpu[blockMode].pBitType == ONE_PBIT) {
|
|
WriteBit(basePtr,
|
|
bitPosition++,
|
|
parityBits[subset][0] & 1);
|
|
} else if(bti_cpu[blockMode].pBitType == TWO_PBIT) {
|
|
WriteBit(basePtr,
|
|
bitPosition++,
|
|
parityBits[subset][0] & 1);
|
|
WriteBit(basePtr,
|
|
bitPosition++,
|
|
parityBits[subset][1] & 1);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Now encode the index bits
|
|
for(i=0; i<MAX_SUBSET_SIZE; i++) {
|
|
CMP_DWORD p = partitionTable[i];
|
|
// If this is a fixup index then drop the MSB which is implicitly 0
|
|
if(i==fixup[p]) {
|
|
for(j=0; j<(bti_cpu[blockMode].indexBits[0]-1); j++) {
|
|
WriteBit(basePtr, bitPosition++,(CMP_BYTE)(blockIndices[i]>>j));
|
|
}
|
|
} else {
|
|
for(j=0; j<bti_cpu[blockMode].indexBits[0]; j++) {
|
|
WriteBit(basePtr, bitPosition++,(CMP_BYTE)(blockIndices[i]>>j));
|
|
}
|
|
}
|
|
}
|
|
|
|
// Check that we encoded exactly the right number of bits
|
|
if(bitPosition != (COMPRESSED_BLOCK_SIZE * 8)) {
|
|
#ifdef USE_DBGTRACE
|
|
DbgTrace(("Error:Encoded incorrect number of bits"));
|
|
#endif
|
|
return;
|
|
}
|
|
|
|
#ifdef USE_DBGTRACE
|
|
DbgTrace(("OUTPUT [%2x,%2x,%2x,%2x,%2x,%2x,%2x,%2x,%2x,%2x,%2x,%2x,%2x,%2x,%2x,%2x]",
|
|
block[ 0],block[ 1],block[ 2],block[ 3],
|
|
block[ 4],block[ 5],block[ 6],block[ 7],
|
|
block[ 8],block[ 9],block[10],block[11],
|
|
block[12],block[13],block[14],block[15]));
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
//
|
|
// This routine can be used to compress a block to any of the modes with a shared index set
|
|
//
|
|
// It will encode the best result for this mode into a BC7 block
|
|
//
|
|
//
|
|
//
|
|
// For debugging this is a no color 4x4 BC7 block
|
|
//BYTE BlankBC7Block[16] = { 0x40, 0xC0, 0x1F, 0xF0, 0x07, 0xFC, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
|
|
|
|
|
|
double BC7BlockEncoder::CompressSingleIndexBlock(
|
|
double in[MAX_SUBSET_SIZE][MAX_DIMENSION_BIG],
|
|
CMP_BYTE out[COMPRESSED_BLOCK_SIZE],
|
|
CMP_DWORD blockMode) {
|
|
#ifdef USE_DBGTRACE
|
|
DbgTrace(("<---------CompressSingleIndexBlock----------->"));
|
|
#endif
|
|
CMP_DWORD i, k, n;
|
|
CMP_DWORD dimension;
|
|
|
|
// Figure out the effective dimension of this block mode
|
|
if(bti_cpu[blockMode].encodingType == NO_ALPHA) {
|
|
dimension = 3;
|
|
} else {
|
|
dimension = 4;
|
|
}
|
|
|
|
CMP_DWORD numPartitionModes = 1 << bti_cpu[blockMode].partitionBits;
|
|
CMP_DWORD partitionsToTry = numPartitionModes;
|
|
|
|
// Linearly reduce the number of partitions to try as the quality falls below a threshold
|
|
if(m_quality < g_qFAST_THRESHOLD) {
|
|
partitionsToTry = (CMP_DWORD)floor((double)(partitionsToTry * m_partitionSearchSize) + 0.5);
|
|
partitionsToTry = cmp_minT(numPartitionModes, cmp_maxT(1, partitionsToTry));
|
|
}
|
|
|
|
CMP_DWORD blockPartition;
|
|
double partition[MAX_SUBSETS][MAX_SUBSET_SIZE][MAX_DIMENSION_BIG];
|
|
CMP_DWORD entryCount[MAX_SUBSETS];
|
|
CMP_DWORD subset;
|
|
|
|
|
|
#ifdef BC7_DEBUG_TO_RESULTS_TXT
|
|
fprintf(fp,"\CompressSingleIndexBlock\n");
|
|
fprintf(fp,"blockMode = %d\n",blockMode);
|
|
fprintf(fp,"numPartitionModes = %d\n",numPartitionModes);
|
|
fprintf(fp,"partitionsToTry = %d\n",partitionsToTry);
|
|
fprintf(fp,"m_blockMaxRange = %4.0f\n",m_blockMaxRange);
|
|
fprintf(fp,"m_quantizerRangeThreshold = %4.0f\n",m_quantizerRangeThreshold);
|
|
fprintf(fp,"m_clusters[0] = %d\n",m_clusters[0]);
|
|
#endif
|
|
|
|
#ifdef USE_DBGTRACE
|
|
DbgTrace(("blockMode [%d] numPartitionModes [%d] partitionsToTry [%2d]",
|
|
blockMode,
|
|
numPartitionModes,
|
|
partitionsToTry));
|
|
DbgTrace((" m_blockMaxRange [%2d] m_quantizerRangeThreshold [%4.0f] m_clusters[0] = %d",
|
|
m_blockMaxRange,
|
|
m_quantizerRangeThreshold,
|
|
m_clusters[0]));
|
|
#endif
|
|
|
|
// Loop over the available partitions for the block mode and quantize them
|
|
// to figure out the best candidates for further refinement
|
|
for(blockPartition = 0;
|
|
blockPartition < partitionsToTry;
|
|
blockPartition++) {
|
|
Partition(blockPartition,
|
|
in,
|
|
partition,
|
|
entryCount,
|
|
blockMode,
|
|
dimension);
|
|
|
|
|
|
double error = 0.;
|
|
double outB[MAX_SUBSET_SIZE][MAX_DIMENSION_BIG];
|
|
double direction[MAX_DIMENSION_BIG];
|
|
double step;
|
|
|
|
for(subset=0; subset < bti_cpu[blockMode].subsetCount; subset++) {
|
|
int indices[MAX_SUBSETS][MAX_SUBSET_SIZE];
|
|
|
|
if(entryCount[subset]) {
|
|
|
|
if((m_clusters[0] > 8) ||
|
|
(m_blockMaxRange <= m_quantizerRangeThreshold)) {
|
|
|
|
#ifdef BC7_DEBUG_TO_RESULTS_TXT
|
|
fprintf(fp,"\noptQuantAnD_d\n");
|
|
#endif
|
|
error += optQuantAnD_d(partition[subset],
|
|
entryCount[subset],
|
|
m_clusters[0],
|
|
indices[subset],
|
|
outB,
|
|
direction,
|
|
&step,
|
|
dimension);
|
|
|
|
#ifdef BC7_DEBUG_TO_RESULTS_TXT
|
|
if (blockPartition == 11) {
|
|
fprintf(fp,"\n");
|
|
for (int row=0; row<16; row++) {
|
|
fprintf(fp,"partition[%2d] = %4.2f, %4.2f, %4.2f\n",row,partition[subset][row][0],partition[subset][row][1],partition[subset][row][2]);
|
|
}
|
|
|
|
|
|
fprintf(fp,"\n");
|
|
for (int row=0; row<16; row++) {
|
|
fprintf(fp,"indices[0][%2d] = %4.2f\n",row,indices[0][row]);
|
|
}
|
|
|
|
fprintf(fp,"\n");
|
|
for (int row=0; row<16; row++) {
|
|
fprintf(fp,"outB[%2d] = %4.2f, %4.2f, %4.2f\n",row,outB[row][0],outB[row][1],outB[row][2]);
|
|
}
|
|
|
|
fprintf(fp,"\n");
|
|
fprintf(fp,"entryCount = %d\n",entryCount[subset]);
|
|
fprintf(fp,"m_clusters[0] = %d\n",m_clusters[0]);
|
|
fprintf(fp,"Direction = %4.2f, %4.2f, %4.2f\n",direction[0],direction[1],direction[2]);
|
|
fprintf(fp,"step = %4.2f\n",step);
|
|
fprintf(fp,"dimension = %4.2f\n",dimension);
|
|
fprintf(fp,"error = %4.2f\n",error);
|
|
}
|
|
#endif
|
|
} else {
|
|
#ifdef BC7_DEBUG_TO_RESULTS_TXT
|
|
fprintf(fp,"\optQuantTrace_d\n");
|
|
#endif
|
|
error += optQuantTrace_d(partition[subset],
|
|
entryCount[subset],
|
|
m_clusters[0],
|
|
indices[subset],
|
|
outB,
|
|
direction,
|
|
&step,
|
|
dimension);
|
|
|
|
#ifdef BC7_DEBUG_TO_RESULTS_TXT
|
|
if (blockPartition == 11) {
|
|
fprintf(fp,"\n");
|
|
for (int row=0; row<16; row++) {
|
|
fprintf(fp,"partition[%2d] = %4.2f, %4.2f, %4.2f\n",row,partition[subset][row][0],partition[subset][row][1],partition[subset][row][2]);
|
|
}
|
|
|
|
|
|
fprintf(fp,"\n");
|
|
for (int row=0; row<16; row++) {
|
|
fprintf(fp,"indices[0][%2d] = %4.2f\n",row,indices[0][row]);
|
|
}
|
|
|
|
fprintf(fp,"\n");
|
|
for (int row=0; row<16; row++) {
|
|
fprintf(fp,"outB[%2d] = %4.2f, %4.2f, %4.2f\n",row,outB[row][0],outB[row][1],outB[row][2]);
|
|
}
|
|
|
|
fprintf(fp,"\n");
|
|
fprintf(fp,"entryCount = %d\n",entryCount[subset]);
|
|
fprintf(fp,"m_clusters[0] = %d\n",m_clusters[0]);
|
|
fprintf(fp,"Direction = %4.2f, %4.2f, %4.2f\n",direction[0],direction[1],direction[2]);
|
|
fprintf(fp,"step = %4.2f\n",step);
|
|
fprintf(fp,"dimension = %4.2f\n",dimension);
|
|
fprintf(fp,"error = %4.2f\n",error);
|
|
}
|
|
#endif
|
|
|
|
}
|
|
|
|
// Store off the indices for later
|
|
for(CMP_DWORD idx=0; idx < entryCount[subset]; idx++) {
|
|
m_storedIndices[blockPartition][subset][idx] = indices[subset][idx];
|
|
}
|
|
}
|
|
}
|
|
|
|
m_storedError[blockPartition] = error;
|
|
}
|
|
|
|
// Sort the results
|
|
sortProjection(m_storedError,
|
|
m_sortedModes,
|
|
partitionsToTry);
|
|
|
|
|
|
// Run shaking (endpoint refinement) pass for partitions that gave the
|
|
// best set of errors from quantization
|
|
|
|
// ep_shaker will take its endpoint information from bits[0-2]
|
|
// ep_shaker_2_d will take its information from bits[3]
|
|
int bits[4] = {0,0,0,0};
|
|
|
|
// ep_shaker_d needs bits specified individually per channel including parity
|
|
bits[0] = m_componentBits[COMP_RED] + (m_parityBits ? 1:0);
|
|
bits[1] = m_componentBits[COMP_GREEN] + (m_parityBits ? 1:0);
|
|
bits[2] = m_componentBits[COMP_BLUE] + (m_parityBits ? 1:0);
|
|
|
|
// ep_shaker_2_d needs bits specified as total bits for both endpoints including parity
|
|
for(i=0; i < dimension; i++) {
|
|
bits[3] += m_componentBits[i];
|
|
}
|
|
bits[3] *= 2;
|
|
if(m_parityBits == BCC) {
|
|
bits[3] += 2;
|
|
} else if (m_parityBits == SAME_PAR) {
|
|
bits[3] += 1;
|
|
}
|
|
|
|
int epo_code[MAX_SUBSETS][2][MAX_DIMENSION_BIG];
|
|
double epo[2][MAX_DIMENSION_BIG];
|
|
double outB[MAX_SUBSET_SIZE][MAX_DIMENSION_BIG];
|
|
|
|
int bestEndpoints[MAX_SUBSETS][2][MAX_DIMENSION_BIG];
|
|
int bestIndices[MAX_SUBSETS][MAX_SUBSET_SIZE];
|
|
CMP_DWORD bestEntryCount[MAX_SUBSETS];
|
|
CMP_DWORD bestPartition = 0;
|
|
double bestError = DBL_MAX;
|
|
|
|
// Extensive shaking is most important when the ramp is short, and
|
|
// when we have less indices. On a long ramp the quality of the
|
|
// initial quantizing is relatively more important
|
|
// We modulate the shake size according to the number of ramp indices
|
|
// - the more indices we have the less shaking should be required to find a near
|
|
// optimal match
|
|
|
|
// shakeSize gives the size of the shake cube (for ep_shaker_2_d)
|
|
// ep_shaker always runs on a 1x1x1 cube on both endpoints
|
|
CMP_DWORD shakeSize = 8 - (CMP_DWORD)floor(1.5 * bti_cpu[blockMode].indexBits[0]);
|
|
shakeSize = cmp_maxT(2, cmp_minT((CMP_DWORD)floor(shakeSize * m_quality + 0.5), 6));
|
|
|
|
// Shake attempts indicates how many partitions to try to shake
|
|
CMP_DWORD numShakeAttempts = cmp_maxT(1, cmp_minT((CMP_DWORD)floor(8 * m_quality + 0.5), partitionsToTry));
|
|
|
|
// Set up all the parameters for the shakers
|
|
// Must increase shake size if these block endpoints use parity
|
|
if((m_parityBits == SAME_PAR) ||
|
|
(m_parityBits == BCC)) {
|
|
shakeSize += 2;
|
|
}
|
|
|
|
#ifdef USE_DBGTRACE
|
|
DbgTrace(("%2d numPartitionModes %2d SearchSize %3.3f shakeSize %2d numShakeAttempts %2d\n",
|
|
partitionsToTry,
|
|
numPartitionModes,
|
|
m_partitionSearchSize,
|
|
shakeSize,
|
|
numShakeAttempts));
|
|
#endif
|
|
|
|
// Now do the endpoint shaking
|
|
for(i=0; i < numShakeAttempts; i++) {
|
|
double error = 0;
|
|
|
|
blockPartition = m_sortedModes[i];
|
|
|
|
Partition(blockPartition,
|
|
in,
|
|
partition,
|
|
entryCount,
|
|
blockMode,
|
|
dimension);
|
|
|
|
for(subset=0; subset < bti_cpu[blockMode].subsetCount; subset++) {
|
|
if(entryCount[subset]) {
|
|
// If quality is set low or the dimension is not compatible with
|
|
// shaker_d then just run shaker_2_d
|
|
if((m_blockMaxRange > m_shakerRangeThreshold) ||
|
|
(dimension != 3)) {
|
|
error += ep_shaker_2_d(partition[subset],
|
|
entryCount[subset],
|
|
m_storedIndices[blockPartition][subset],
|
|
outB,
|
|
epo_code[subset],
|
|
shakeSize,
|
|
m_clusters[0]-1,
|
|
bits[3],
|
|
dimension,
|
|
epo);
|
|
} else {
|
|
double tempError[2];
|
|
int tempIndices[MAX_SUBSET_SIZE];
|
|
int temp_epo_code[2][MAX_DIMENSION_BIG];
|
|
|
|
// Step one - run ep_shaker and ep_shaker_2 in parallel, and get the error from each
|
|
|
|
for(k=0; k < entryCount[subset]; k++) {
|
|
tempIndices[k] = m_storedIndices[blockPartition][subset][k];
|
|
}
|
|
tempError[0] = ep_shaker_d(partition[subset],
|
|
entryCount[subset],
|
|
tempIndices,
|
|
outB,
|
|
temp_epo_code,
|
|
m_clusters[0]-1,
|
|
bits,
|
|
(CMP_qt_cpu)m_parityBits,
|
|
dimension);
|
|
|
|
tempError[1] = ep_shaker_2_d(partition[subset],
|
|
entryCount[subset],
|
|
m_storedIndices[blockPartition][subset],
|
|
outB,
|
|
epo_code[subset],
|
|
shakeSize,
|
|
m_clusters[0]-1,
|
|
bits[3],
|
|
dimension,
|
|
epo);
|
|
|
|
if(tempError[0] < tempError[1]) {
|
|
// If ep_shaker did better than ep_shaker_2 then we need to reshake
|
|
// the output from ep_shaker using ep_shaker_2 for further refinement
|
|
|
|
tempError[1] = ep_shaker_2_d(partition[subset],
|
|
entryCount[subset],
|
|
tempIndices,
|
|
outB,
|
|
temp_epo_code,
|
|
shakeSize,
|
|
m_clusters[0]-1,
|
|
bits[3],
|
|
dimension,
|
|
epo);
|
|
|
|
// Copy the results into the expected location
|
|
for(k=0; k<entryCount[subset]; k++) {
|
|
m_storedIndices[blockPartition][subset][k] = tempIndices[k];
|
|
}
|
|
|
|
for(k=0; k < MAX_DIMENSION_BIG; k++) {
|
|
epo_code[subset][0][k] = temp_epo_code[0][k];
|
|
epo_code[subset][1][k] = temp_epo_code[1][k];
|
|
}
|
|
}
|
|
|
|
error += tempError[1];
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
if(error < bestError) {
|
|
bestPartition = blockPartition;
|
|
|
|
for(subset=0; subset < bti_cpu[blockMode].subsetCount; subset++) {
|
|
bestEntryCount[subset] = entryCount[subset];
|
|
|
|
if(entryCount[subset]) {
|
|
for(k=0; k < dimension; k++) {
|
|
bestEndpoints[subset][0][k] = epo_code[subset][0][k];
|
|
bestEndpoints[subset][1][k] = epo_code[subset][1][k];
|
|
}
|
|
|
|
for(n=0; n < entryCount[subset]; n++) {
|
|
bestIndices[subset][n] = m_storedIndices[blockPartition][subset][n];
|
|
}
|
|
}
|
|
}
|
|
|
|
bestError = error;
|
|
}
|
|
|
|
// Early out if we found we can compress with error below the quality threshold
|
|
if (m_errorThreshold > 0) {
|
|
if(bestError <= m_errorThreshold) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Now we have all the data needed to encode the block
|
|
// We need to pack the endpoints prior to encoding
|
|
CMP_DWORD packedEndpoints[3][2];
|
|
for(subset=0; subset<bti_cpu[blockMode].subsetCount; subset++) {
|
|
if(bestEntryCount[subset]) {
|
|
CMP_DWORD rightAlignment = 0;
|
|
packedEndpoints[subset][0] = 0;
|
|
packedEndpoints[subset][1] = 0;
|
|
|
|
// Sort out parity bits
|
|
if(m_parityBits != CART) {
|
|
packedEndpoints[subset][0] = bestEndpoints[subset][0][0] & 1;
|
|
packedEndpoints[subset][1] = bestEndpoints[subset][1][0] & 1;
|
|
for(k=0; k<MAX_DIMENSION_BIG; k++) {
|
|
bestEndpoints[subset][0][k] >>= 1;
|
|
bestEndpoints[subset][1][k] >>= 1;
|
|
}
|
|
rightAlignment++;
|
|
}
|
|
|
|
// Fixup endpoints
|
|
for(k=0; k<dimension; k++) {
|
|
if(m_componentBits[k]) {
|
|
packedEndpoints[subset][0] |= bestEndpoints[subset][0][k] << rightAlignment;
|
|
packedEndpoints[subset][1] |= bestEndpoints[subset][1][k] << rightAlignment;
|
|
rightAlignment += m_componentBits[k];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Save the data to output
|
|
EncodeSingleIndexBlock(blockMode,
|
|
bestPartition,
|
|
packedEndpoints,
|
|
bestIndices,
|
|
out);
|
|
return bestError;
|
|
}
|
|
|
|
static CMP_DWORD componentRotations[4][4] = {
|
|
{COMP_ALPHA, COMP_RED, COMP_GREEN, COMP_BLUE},
|
|
{COMP_RED, COMP_ALPHA, COMP_GREEN, COMP_BLUE},
|
|
{COMP_GREEN, COMP_RED, COMP_ALPHA, COMP_BLUE},
|
|
{COMP_BLUE, COMP_RED, COMP_GREEN, COMP_ALPHA}
|
|
};
|
|
|
|
void BC7BlockEncoder::EncodeDualIndexBlock(CMP_DWORD blockMode,
|
|
CMP_DWORD indexSelection,
|
|
CMP_DWORD componentRotation,
|
|
int endpoint[2][2][MAX_DIMENSION_BIG],
|
|
int indices[2][MAX_SUBSET_SIZE],
|
|
CMP_BYTE out[COMPRESSED_BLOCK_SIZE]) {
|
|
|
|
#ifdef USE_DBGTRACE
|
|
DbgTrace(("-> WriteBit()"));
|
|
#endif
|
|
CMP_DWORD i,j,k;
|
|
int bitPosition = 0; // Position the pointer at the LSB
|
|
CMP_BYTE *basePtr = out;
|
|
CMP_DWORD idxBits[2];
|
|
CMP_BOOL swapIndices;
|
|
|
|
// Generate Unary header for this mode
|
|
for(i=0; i<blockMode; i++) {
|
|
WriteBit(basePtr, bitPosition++, 0);
|
|
}
|
|
WriteBit(basePtr, bitPosition++, 1);
|
|
|
|
// Write rotation bits
|
|
for(i=0; i<bti_cpu[blockMode].rotationBits; i++) {
|
|
WriteBit(basePtr, bitPosition++, (CMP_BYTE)((componentRotation>>i) & 0xff));
|
|
}
|
|
|
|
// Write index selector bits
|
|
for(i=0; i<bti_cpu[blockMode].indexModeBits; i++) {
|
|
WriteBit(basePtr, bitPosition++, (CMP_BYTE)(indexSelection ? 1: 0));
|
|
}
|
|
|
|
if(indexSelection) {
|
|
swapIndices = TRUE;
|
|
idxBits[0] = bti_cpu[blockMode].indexBits[1];
|
|
idxBits[1] = bti_cpu[blockMode].indexBits[0];
|
|
} else {
|
|
swapIndices = FALSE;
|
|
idxBits[0] = bti_cpu[blockMode].indexBits[0];
|
|
idxBits[1] = bti_cpu[blockMode].indexBits[1];
|
|
}
|
|
|
|
bool flipColours[2] = {false, false};
|
|
|
|
// Indicate if we need to fixup the indices
|
|
if(indices[0][0] & (1<<(idxBits[0]-1))) {
|
|
flipColours[0] = true;
|
|
}
|
|
if(indices[1][0] & (1<<(idxBits[1]-1))) {
|
|
flipColours[1] = true;
|
|
}
|
|
|
|
// Fixup the indices
|
|
for(i=0; i<2; i++) {
|
|
if(flipColours[i]) {
|
|
for(j=0; j<MAX_SUBSET_SIZE; j++) {
|
|
indices[i][j] = ((1 << idxBits[i]) - 1) - indices[i][j];
|
|
}
|
|
}
|
|
}
|
|
|
|
// Now fixup the endpoints so that the implicitly encoded
|
|
// index bits have the correct state
|
|
for(i=0; i<2; i++) {
|
|
if(flipColours[i]) {
|
|
for(k=0; k<4; k++) {
|
|
CMP_DWORD temp;
|
|
temp = endpoint[i][0][k];
|
|
endpoint[i][0][k] = endpoint[i][1][k];
|
|
endpoint[i][1][k] = temp;
|
|
}
|
|
}
|
|
}
|
|
|
|
CMP_DWORD ep, component;
|
|
// Encode the colour and alpha information
|
|
CMP_DWORD vectorComponentBits = bti_cpu[blockMode].vectorBits / 3;
|
|
|
|
// Loop over components
|
|
for(component=0; component < MAX_DIMENSION_BIG; component++) {
|
|
if(component != COMP_ALPHA) {
|
|
for(ep=0; ep<2; ep++) {
|
|
for(k=0; k<vectorComponentBits; k++) {
|
|
WriteBit(basePtr,
|
|
bitPosition++,
|
|
(CMP_BYTE)((endpoint[0][ep][component] >> k) & 0x1));
|
|
}
|
|
}
|
|
} else {
|
|
for(ep=0; ep<2; ep++) {
|
|
for(j=0; j<bti_cpu[blockMode].scalarBits; j++) {
|
|
WriteBit(basePtr,
|
|
bitPosition++,
|
|
(CMP_BYTE)((endpoint[1][ep][0] >> j) & 0x1));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Now encode the index bits
|
|
for(i=0; i<2; i++) {
|
|
CMP_DWORD idxSelect = i;
|
|
|
|
if(swapIndices) {
|
|
idxSelect = i ^ 1;
|
|
}
|
|
for(j=0; j<MAX_SUBSET_SIZE; j++) {
|
|
if(j==0) {
|
|
for(k=0; k<(idxBits[idxSelect]-1); k++) {
|
|
WriteBit(basePtr, bitPosition++,(CMP_BYTE)(indices[idxSelect][j]>>k));
|
|
}
|
|
} else {
|
|
for(k=0; k<idxBits[idxSelect]; k++) {
|
|
WriteBit(basePtr, bitPosition++,(CMP_BYTE)(indices[idxSelect][j]>>k));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Check that we encoded exactly the right number of bits
|
|
if(bitPosition != (COMPRESSED_BLOCK_SIZE * 8)) {
|
|
return;
|
|
}
|
|
|
|
#ifdef USE_DBGTRACE
|
|
DbgTrace(("OUTPUT [%2x,%2x,%2x,%2x,%2x,%2x,%2x,%2x,%2x,%2x,%2x,%2x,%2x,%2x,%2x,%2x]",
|
|
out[ 0],out[ 1],out[ 2],out[ 3],
|
|
out[ 4],out[ 5],out[ 6],out[ 7],
|
|
out[ 8],out[ 9],out[10],out[11],
|
|
out[12],out[13],out[14],out[15]));
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
double BC7BlockEncoder::CompressDualIndexBlock(double in[MAX_SUBSET_SIZE][MAX_DIMENSION_BIG],
|
|
CMP_BYTE out[COMPRESSED_BLOCK_SIZE],
|
|
CMP_DWORD blockMode) {
|
|
#ifdef USE_DBGTRACE
|
|
DbgTrace(("<---------CompressDualIndexBlock----------->"));
|
|
#endif
|
|
CMP_DWORD i;
|
|
double cBlock[MAX_SUBSET_SIZE][MAX_DIMENSION_BIG];
|
|
double aBlock[MAX_SUBSET_SIZE][MAX_DIMENSION_BIG];
|
|
|
|
CMP_DWORD maxRotation = 1 << bti_cpu[blockMode].rotationBits;
|
|
CMP_DWORD rotation;
|
|
|
|
CMP_DWORD maxIndexSelection = 1 << bti_cpu[blockMode].indexModeBits;
|
|
CMP_DWORD indexSelection;
|
|
|
|
int indices[2][MAX_SUBSET_SIZE];
|
|
double outQ[2][MAX_SUBSET_SIZE][MAX_DIMENSION_BIG];
|
|
double direction[MAX_DIMENSION_BIG];
|
|
double step;
|
|
|
|
double quantizerError;
|
|
double bestQuantizerError = DBL_MAX;
|
|
double overallError;
|
|
double bestOverallError = DBL_MAX;
|
|
|
|
#ifdef BC7_DEBUG_TO_RESULTS_TXT
|
|
fprintf(fp,"\nCompressDualIndexBlock\n");
|
|
fprintf(fp,"blockMode = %d\n",blockMode);
|
|
fprintf(fp,"maxIndexSelection = %d\n",maxIndexSelection);
|
|
fprintf(fp,"maxRotation = %d\n",maxRotation);
|
|
fprintf(fp,"m_blockMaxRange = %4.0f\n",m_blockMaxRange);
|
|
fprintf(fp,"m_quantizerRangeThreshold = %4.0f\n",m_quantizerRangeThreshold);
|
|
#endif
|
|
|
|
// Go through each possible rotation and selection of indices
|
|
for(rotation = 0; rotation < maxRotation; rotation++) {
|
|
// A
|
|
|
|
|
|
for(i=0; i<MAX_SUBSET_SIZE; i++) {
|
|
cBlock[i][COMP_RED] = in[i][componentRotations[rotation][1]];
|
|
cBlock[i][COMP_GREEN] = in[i][componentRotations[rotation][2]];
|
|
cBlock[i][COMP_BLUE] = in[i][componentRotations[rotation][3]];
|
|
|
|
aBlock[i][COMP_RED] = in[i][componentRotations[rotation][0]];
|
|
aBlock[i][COMP_GREEN] = in[i][componentRotations[rotation][0]];
|
|
aBlock[i][COMP_BLUE] = in[i][componentRotations[rotation][0]];
|
|
}
|
|
|
|
#ifdef BC7_DEBUG_TO_RESULTS_TXT
|
|
fprintf(fp,"\ncBlock[16][3]\n");
|
|
for(i=0; i<MAX_SUBSET_SIZE; i++) {
|
|
fprintf(fp,"%4.0f, %4.0f, %4.0f\n",cBlock[i][COMP_RED],cBlock[i][COMP_GREEN],cBlock[i][COMP_BLUE]);
|
|
}
|
|
|
|
|
|
fprintf(fp,"\naBlock[16][3]\n");
|
|
for(i=0; i<MAX_SUBSET_SIZE; i++) {
|
|
fprintf(fp,"%4.0f, %4.0f, %4.0f\n",aBlock[i][COMP_RED],aBlock[i][COMP_GREEN],aBlock[i][COMP_BLUE]);
|
|
}
|
|
#endif
|
|
|
|
for(indexSelection = 0; indexSelection < maxIndexSelection; indexSelection++) {
|
|
// B
|
|
quantizerError = 0.;
|
|
|
|
#ifdef BC7_DEBUG_TO_RESULTS_TXT
|
|
fprintf(fp,"\n-------------- Quantize the vector block ----------------\n");
|
|
#endif
|
|
// Quantize the vector block
|
|
if(m_blockMaxRange <= m_quantizerRangeThreshold) {
|
|
|
|
#ifdef BC7_DEBUG_TO_RESULTS_TXT
|
|
fprintf(fp,"\noptQuantAnD_d\n");
|
|
fprintf(fp,"IndexSelection = %d\n",indexSelection);
|
|
fprintf(fp,"NumClusters = %d\n",1 << bti_cpu[blockMode].indexBits[0 ^ indexSelection]);
|
|
#endif
|
|
quantizerError = optQuantAnD_d(cBlock,
|
|
MAX_SUBSET_SIZE,
|
|
(1 << bti_cpu[blockMode].indexBits[0 ^ indexSelection]),
|
|
indices[0],
|
|
outQ[0],
|
|
direction,
|
|
&step,
|
|
3);
|
|
|
|
#ifdef BC7_DEBUG_TO_RESULTS_TXT
|
|
fprintf(fp,"\n");
|
|
for (int row=0; row<16; row++) {
|
|
fprintf(fp,"indices[0][%2d] = %4.2f\n",row,indices[0][row]);
|
|
}
|
|
|
|
fprintf(fp,"\n");
|
|
for (int row=0; row<16; row++) {
|
|
fprintf(fp,"outQ[0][%2d] = %4.2f, %4.2f, %4.2f\n",row,outQ[0][row][0],outQ[0][row][1],outQ[0][row][2]);
|
|
}
|
|
|
|
fprintf(fp,"\n");
|
|
fprintf(fp,"Direction = %4.2f, %4.2f, %4.2f\n",direction[0],direction[1],direction[2]);
|
|
fprintf(fp,"step = %4.2f\n",step);
|
|
fprintf(fp,"quantizerError = %4.2f\n",quantizerError);
|
|
#endif
|
|
|
|
} else {
|
|
|
|
#ifdef BC7_DEBUG_TO_RESULTS_TXT
|
|
fprintf(fp,"\noptQuantTrace_d\n");
|
|
fprintf(fp,"IndexSelection = %d\n",indexSelection);
|
|
fprintf(fp,"NumClusters = %d\n",1 << bti_cpu[blockMode].indexBits[0 ^ indexSelection]);
|
|
#endif
|
|
quantizerError = optQuantTrace_d(cBlock,
|
|
MAX_SUBSET_SIZE,
|
|
(1 << bti_cpu[blockMode].indexBits[0 ^ indexSelection]),
|
|
indices[0],
|
|
outQ[0],
|
|
direction,
|
|
&step,
|
|
3);
|
|
|
|
#ifdef BC7_DEBUG_TO_RESULTS_TXT
|
|
fprintf(fp,"\n");
|
|
for (int row=0; row<16; row++) {
|
|
fprintf(fp,"indices[0][%2d] = %4.2f\n",row,indices[0][row]);
|
|
}
|
|
|
|
fprintf(fp,"\n");
|
|
for (int row=0; row<16; row++) {
|
|
fprintf(fp,"outQ[0][%2d] = %4.2f, %4.2f, %4.2f\n",row,outQ[0][row][0],outQ[0][row][1],outQ[0][row][2]);
|
|
}
|
|
|
|
fprintf(fp,"\n");
|
|
fprintf(fp,"Direction = %4.2f, %4.2f, %4.2f\n",direction[0],direction[1],direction[2]);
|
|
fprintf(fp,"step = %4.2f\n",step);
|
|
fprintf(fp,"quantizerError = %4.2f\n",quantizerError);
|
|
#endif
|
|
|
|
}
|
|
|
|
// Quantize the scalar block
|
|
#ifdef BC7_DEBUG_TO_RESULTS_TXT
|
|
fprintf(fp,"\nQuantize the scalar block\n");
|
|
#endif
|
|
if(m_blockMaxRange <= m_quantizerRangeThreshold) {
|
|
|
|
#ifdef BC7_DEBUG_TO_RESULTS_TXT
|
|
fprintf(fp,"\noptQuantAnD_d\n");
|
|
fprintf(fp,"IndexSelection = %d\n",indexSelection);
|
|
fprintf(fp,"NumClusters = %d\n",1 << bti_cpu[blockMode].indexBits[1 ^ indexSelection]);
|
|
#endif
|
|
quantizerError += optQuantAnD_d(aBlock,
|
|
MAX_SUBSET_SIZE,
|
|
(1 << bti_cpu[blockMode].indexBits[1 ^ indexSelection]),
|
|
indices[1],
|
|
outQ[1],
|
|
direction,
|
|
&step,
|
|
3) / 3.;
|
|
|
|
#ifdef BC7_DEBUG_TO_RESULTS_TXT
|
|
fprintf(fp,"\n");
|
|
for (int row=0; row<16; row++) {
|
|
fprintf(fp,"indices[1][%2d] = %4.2f\n",row,indices[1][row]);
|
|
}
|
|
|
|
fprintf(fp,"\n");
|
|
for (int row=0; row<16; row++) {
|
|
fprintf(fp,"outQ[1][%2d] = %4.2f, %4.2f, %4.2f\n",row,outQ[1][row][0],outQ[1][row][1],outQ[1][row][2]);
|
|
}
|
|
|
|
fprintf(fp,"\n");
|
|
fprintf(fp,"Direction = %4.2f, %4.2f, %4.2f\n",direction[0],direction[1],direction[2]);
|
|
fprintf(fp,"step = %4.2f\n",step);
|
|
fprintf(fp,"quantizerError = %4.2f\n",quantizerError);
|
|
#endif
|
|
} else {
|
|
#ifdef BC7_DEBUG_TO_RESULTS_TXT
|
|
fprintf(fp,"\noptQuantTrace_d\n");
|
|
fprintf(fp,"IndexSelection = %d\n",indexSelection);
|
|
fprintf(fp,"NumClusters = %d\n",1 << bti_cpu[blockMode].indexBits[1 ^ indexSelection]);
|
|
#endif
|
|
quantizerError += optQuantTrace_d(aBlock,
|
|
MAX_SUBSET_SIZE,
|
|
(1 << bti_cpu[blockMode].indexBits[1 ^ indexSelection]),
|
|
indices[1],
|
|
outQ[1],
|
|
direction,
|
|
&step,
|
|
3) / 3.;
|
|
|
|
#ifdef BC7_DEBUG_TO_RESULTS_TXT
|
|
fprintf(fp,"\n");
|
|
for (int row=0; row<16; row++) {
|
|
fprintf(fp,"indices[1][%2d] = %4.2f\n",row,indices[1][row]);
|
|
}
|
|
|
|
fprintf(fp,"\n");
|
|
for (int row=0; row<16; row++) {
|
|
fprintf(fp,"outQ[1][%2d] = %4.2f, %4.2f, %4.2f\n",row,outQ[1][row][0],outQ[1][row][1],outQ[1][row][2]);
|
|
}
|
|
|
|
fprintf(fp,"\n");
|
|
fprintf(fp,"Direction = %4.2f, %4.2f, %4.2f\n",direction[0],direction[1],direction[2]);
|
|
fprintf(fp,"step = %4.2f\n",step);
|
|
fprintf(fp,"quantizerError = %4.2f\n",quantizerError);
|
|
#endif
|
|
|
|
}
|
|
|
|
// If quality is high then run the full shaking for this config and
|
|
// store the result if it beats the best overall error
|
|
// Otherwise only run the shaking if the error is better than the best
|
|
// quantizer error
|
|
if((m_quality > g_HIGHQULITY_THRESHOLD) || (quantizerError <= bestQuantizerError)) {
|
|
// Shake size gives the size of the shake cube
|
|
CMP_DWORD shakeSize;
|
|
|
|
shakeSize = cmp_maxT(2, cmp_minT((CMP_DWORD)(6 * m_quality), 6));
|
|
|
|
int bits[2][4];
|
|
|
|
// Specify number of bits for vector block
|
|
bits[0][COMP_RED] = m_componentBits[COMP_RED];
|
|
bits[0][COMP_GREEN] = m_componentBits[COMP_GREEN];
|
|
bits[0][COMP_BLUE] = m_componentBits[COMP_BLUE];
|
|
bits[0][3] = 2 * (m_componentBits[COMP_RED] + m_componentBits[COMP_GREEN] + m_componentBits[COMP_BLUE]);
|
|
|
|
// Specify number of bits for scalar block
|
|
bits[1][0] = m_componentBits[COMP_ALPHA];
|
|
bits[1][1] = m_componentBits[COMP_ALPHA];
|
|
bits[1][2] = m_componentBits[COMP_ALPHA];
|
|
bits[1][3] = 6 * m_componentBits[COMP_ALPHA];
|
|
|
|
overallError = 0;
|
|
int epo_code[2][2][MAX_DIMENSION_BIG];
|
|
double epo[2][MAX_DIMENSION_BIG];
|
|
|
|
if(m_blockMaxRange > m_shakerRangeThreshold) {
|
|
overallError += ep_shaker_2_d(cBlock,
|
|
MAX_SUBSET_SIZE,
|
|
indices[0],
|
|
outQ[0],
|
|
epo_code[0],
|
|
shakeSize,
|
|
(1 << bti_cpu[blockMode].indexBits[0 ^ indexSelection])-1,
|
|
bits[0][3],
|
|
3,
|
|
epo);
|
|
} else {
|
|
ep_shaker_d(cBlock,
|
|
MAX_SUBSET_SIZE,
|
|
indices[0],
|
|
outQ[0],
|
|
epo_code[0],
|
|
(1 << bti_cpu[blockMode].indexBits[0 ^ indexSelection])-1,
|
|
bits[0],
|
|
(CMP_qt_cpu)0,
|
|
3);
|
|
|
|
overallError += ep_shaker_2_d(cBlock,
|
|
MAX_SUBSET_SIZE,
|
|
indices[0],
|
|
outQ[0],
|
|
epo_code[0],
|
|
shakeSize,
|
|
(1 << bti_cpu[blockMode].indexBits[0 ^ indexSelection])-1,
|
|
bits[0][3],
|
|
3,
|
|
epo);
|
|
}
|
|
|
|
if(m_blockMaxRange > m_shakerRangeThreshold) {
|
|
overallError += ep_shaker_2_d(aBlock,
|
|
MAX_SUBSET_SIZE,
|
|
indices[1],
|
|
outQ[1],
|
|
epo_code[1],
|
|
shakeSize,
|
|
(1 << bti_cpu[blockMode].indexBits[1 ^ indexSelection])-1,
|
|
bits[1][3],
|
|
3,
|
|
epo) / 3.;
|
|
} else {
|
|
ep_shaker_d(aBlock,
|
|
MAX_SUBSET_SIZE,
|
|
indices[1],
|
|
outQ[1],
|
|
epo_code[1],
|
|
(1 << bti_cpu[blockMode].indexBits[1 ^ indexSelection])-1,
|
|
bits[1],
|
|
(CMP_qt_cpu)0,
|
|
3);
|
|
|
|
overallError += ep_shaker_2_d(aBlock,
|
|
MAX_SUBSET_SIZE,
|
|
indices[1],
|
|
outQ[1],
|
|
epo_code[1],
|
|
shakeSize,
|
|
(1 << bti_cpu[blockMode].indexBits[1 ^ indexSelection])-1,
|
|
bits[1][3],
|
|
3,
|
|
epo) / 3.;
|
|
}
|
|
|
|
// If we beat the previous best then encode the block
|
|
if(overallError < bestOverallError) {
|
|
EncodeDualIndexBlock(blockMode,
|
|
indexSelection,
|
|
rotation,
|
|
epo_code,
|
|
indices,
|
|
out);
|
|
|
|
bestOverallError = overallError;
|
|
}
|
|
|
|
if(quantizerError < bestQuantizerError) {
|
|
bestQuantizerError = quantizerError;
|
|
}
|
|
|
|
}
|
|
} // B
|
|
} // A
|
|
return bestOverallError;
|
|
}
|
|
|
|
|
|
|
|
//
|
|
// This routine compresses a block and returns the RMS error
|
|
//
|
|
//
|
|
//
|
|
//
|
|
#include <stdio.h>
|
|
|
|
|
|
double BC7BlockEncoder::CompressBlock(double in[MAX_SUBSET_SIZE][MAX_DIMENSION_BIG],
|
|
CMP_BYTE out[COMPRESSED_BLOCK_SIZE]) {
|
|
|
|
#ifdef USE_DBGTRACE
|
|
DbgTrace(());
|
|
#endif
|
|
CMP_DWORD i, j;
|
|
CMP_BOOL blockNeedsAlpha = FALSE;
|
|
CMP_BOOL blockAlphaZeroOne = FALSE;
|
|
CMP_DWORD validModeMask = m_validModeMask;
|
|
CMP_BOOL encodedBlock = FALSE;
|
|
|
|
#ifdef USE_CMP_BC7_CORE
|
|
if (m_performance < 0.01) {
|
|
// prototype code for next revision, currently accessible only through SDK
|
|
unsigned char srcBlock[64];
|
|
|
|
int px=0;
|
|
for (i=0; i<16; i++)
|
|
{
|
|
srcBlock[px++] = (unsigned char)(in[i][0]);
|
|
srcBlock[px++] = (unsigned char)(in[i][1]);
|
|
srcBlock[px++] = (unsigned char)(in[i][2]);
|
|
srcBlock[px++] = (unsigned char)(in[i][3]);
|
|
}
|
|
|
|
CompressBlockBC7(srcBlock,16,out);
|
|
|
|
return 0.0f;
|
|
}
|
|
#endif
|
|
|
|
#ifdef BC7_DEBUG_TO_RESULTS_TXT
|
|
fp = fopen("debugdata.txt","w");
|
|
if (fp) {
|
|
fprintf(fp,"Data INPUT\n");
|
|
double data[16][4];
|
|
memcpy(data,in,sizeof(data));
|
|
for (int row=0; row<16; row++)
|
|
fprintf(fp,"%4.0f, %4.0f, %4.0f\n", data[row][0],data[row][1],data[row][2]);
|
|
#endif
|
|
|
|
for(i=0; i<MAX_DIMENSION_BIG; i++) {
|
|
m_blockMin[i] = DBL_MAX;
|
|
m_blockMax[i] = 0.0;
|
|
m_blockRange[i] = 0.0;
|
|
}
|
|
|
|
// Check if the input block has any alpha values that are not 1
|
|
// We assume 8-bit input here, so 1 is mapped to 255.
|
|
// Also check if the block encodes an explicit zero or one in the
|
|
// alpha channel. If so then we might need also need special as the
|
|
// block may have a thresholded or punch-through alpha
|
|
for(i=0; i<MAX_SUBSET_SIZE; i++) {
|
|
if(in[i][COMP_ALPHA] != 255.0) {
|
|
blockNeedsAlpha = TRUE;
|
|
} else if((in[i][COMP_ALPHA] == 255.0) ||
|
|
(in[i][COMP_ALPHA] == 0.0)) {
|
|
blockAlphaZeroOne = TRUE;
|
|
}
|
|
}
|
|
|
|
for(i=0; i<MAX_SUBSET_SIZE; i++) {
|
|
for(j=0; j<MAX_DIMENSION_BIG; j++) {
|
|
m_blockMin[j] = (in[i][j] < m_blockMin[j]) ? in[i][j] : m_blockMin[j];
|
|
m_blockMax[j] = (in[i][j] > m_blockMax[j]) ? in[i][j] : m_blockMax[j];
|
|
}
|
|
}
|
|
|
|
#ifdef BC7_DEBUG_TO_RESULTS_TXT
|
|
fprintf(fp,"m_blockMin[0] = %4.2f\n",m_blockMin[0]);
|
|
fprintf(fp,"m_blockMin[1] = %4.2f\n",m_blockMin[1]);
|
|
fprintf(fp,"m_blockMin[2] = %4.2f\n",m_blockMin[2]);
|
|
fprintf(fp,"m_blockMin[3] = %4.2f\n\n",m_blockMin[3]);
|
|
|
|
fprintf(fp,"m_blockMax[0] = %4.2f\n",m_blockMax[0]);
|
|
fprintf(fp,"m_blockMax[1] = %4.2f\n",m_blockMax[1]);
|
|
fprintf(fp,"m_blockMax[2] = %4.2f\n",m_blockMax[2]);
|
|
fprintf(fp,"m_blockMax[3] = %4.2f\n\n",m_blockMax[3]);
|
|
#endif
|
|
|
|
m_blockRange[0] = m_blockMax[0] - m_blockMin[0];
|
|
m_blockRange[1] = m_blockMax[1] - m_blockMin[1];
|
|
m_blockRange[2] = m_blockMax[2] - m_blockMin[2];
|
|
m_blockRange[3] = m_blockMax[3] - m_blockMin[3];
|
|
m_blockMaxRange = cmp_maxT(m_blockRange[0], m_blockRange[1]);
|
|
m_blockMaxRange = cmp_maxT(m_blockMaxRange, m_blockRange[2]);
|
|
m_blockMaxRange = cmp_maxT(m_blockMaxRange, m_blockRange[3]);
|
|
|
|
|
|
#ifdef BC7_DEBUG_TO_RESULTS_TXT
|
|
fprintf(fp,"m_blockRange[0] = %4.2f\n",m_blockRange[0]);
|
|
fprintf(fp,"m_blockRange[1] = %4.2f\n",m_blockRange[1]);
|
|
fprintf(fp,"m_blockRange[2] = %4.2f\n",m_blockRange[2]);
|
|
fprintf(fp,"m_blockRange[3] = %4.2f\n",m_blockRange[3]);
|
|
fprintf(fp,"m_blockMaxRange = %4.2f\n\n",m_blockMaxRange);
|
|
|
|
fprintf(fp,"=========================================\n");
|
|
#endif
|
|
|
|
|
|
// Initial loop - go through the block modes and get the ones that are valid
|
|
for(CMP_DWORD blockMode=0; blockMode < NUM_BLOCK_TYPES; blockMode++) {
|
|
// Check if this mode is allowed based on the global settings
|
|
if(!(validModeMask & (1 << blockMode))) {
|
|
continue;
|
|
}
|
|
|
|
// If the block needs Alpha and this mode doesn't support alpha then
|
|
// indicate that this is not a valid mode and continue
|
|
if((blockNeedsAlpha == TRUE) &&
|
|
(bti_cpu[blockMode].encodingType == NO_ALPHA)) {
|
|
validModeMask &= ~(1<<blockMode);
|
|
}
|
|
|
|
// Optional restriction for colour-only blocks so that they
|
|
// don't use modes that have combined colour+alpha - this
|
|
// avoids the possibility that the encoder might choose an
|
|
// alpha other than 1.0 (due to parity) and cause something to
|
|
// become accidentally slightly transparent (it's possible that
|
|
// when encoding 3-component texture applications will assume that
|
|
// the 4th component can safely be assumed to be 1.0 all the time)
|
|
if((blockNeedsAlpha == FALSE) &&
|
|
(m_colourRestrict == TRUE) &&
|
|
(bti_cpu[blockMode].encodingType == COMBINED_ALPHA)) {
|
|
validModeMask &= ~(1<<blockMode);
|
|
}
|
|
|
|
// Optional restriction for blocks with alpha to avoid issues with
|
|
// punch-through or thresholded alpha encoding
|
|
if((blockNeedsAlpha == TRUE) &&
|
|
(m_alphaRestrict == TRUE) &&
|
|
(blockAlphaZeroOne == TRUE) &&
|
|
(bti_cpu[blockMode].encodingType == COMBINED_ALPHA)) {
|
|
validModeMask &= ~(1<<blockMode);
|
|
}
|
|
}
|
|
|
|
assert(validModeMask != 0);
|
|
|
|
#ifdef USE_DBGTRACE
|
|
DbgTrace(("validModeMask [%x]",validModeMask));
|
|
#endif
|
|
// Try all the legal block modes that we flagged
|
|
|
|
CMP_BYTE temporaryOutputBlock[COMPRESSED_BLOCK_SIZE];
|
|
double bestError = DBL_MAX;
|
|
double thisError;
|
|
CMP_DWORD bestblockMode=99;
|
|
|
|
// We change the order in which we visit the block modes to try to maximize the chance
|
|
// that we manage to early out as quickly as possible.
|
|
// This is a significant performance optimization for the lower quality modes where the
|
|
// exit threshold is higher, and also tends to improve quality (as the generally higher quality
|
|
// modes are now enumerated earlier, so the first encoding that passes the threshold will
|
|
// tend to pass by a greater margin than if we used a dumb ordering, and thus overall error will
|
|
// be improved)
|
|
CMP_DWORD blockModeOrder[NUM_BLOCK_TYPES] = {6, 4, 3, 1, 0, 2, 7, 5};
|
|
|
|
// used for debugging and mode tests
|
|
// 76543210
|
|
// validModeMask = 0b00100000;
|
|
|
|
for(CMP_DWORD j1=0; j1 < NUM_BLOCK_TYPES; j1++) {
|
|
CMP_DWORD blockMode = blockModeOrder[j1];
|
|
CMP_DWORD Mode = 0x0001 << blockMode;
|
|
|
|
if(!(validModeMask & Mode)) {
|
|
continue;
|
|
}
|
|
|
|
// CPU:HPC #1
|
|
// Setup mode parameters for this block
|
|
BlockSetup(blockMode);
|
|
|
|
if(bti_cpu[blockMode].encodingType != SEPARATE_ALPHA) {
|
|
|
|
#ifdef BC7_DEBUG_TO_RESULTS_TXT
|
|
fprintf(fp,"=================== CompressSingleIndexBlock ======================\n");
|
|
#endif
|
|
thisError = CompressSingleIndexBlock(in, temporaryOutputBlock, blockMode);
|
|
|
|
} else {
|
|
|
|
#ifdef BC7_DEBUG_TO_RESULTS_TXT
|
|
fprintf(fp,"================== CompressDualIndexBlock =======================\n");
|
|
#endif
|
|
|
|
thisError = CompressDualIndexBlock(in, temporaryOutputBlock, blockMode);
|
|
}
|
|
|
|
// If this compression did better than all previous attempts then copy the result
|
|
// to the output block
|
|
if(thisError < bestError) {
|
|
for(i=0; i < COMPRESSED_BLOCK_SIZE; i++) {
|
|
out[i] = temporaryOutputBlock[i];
|
|
}
|
|
bestError = thisError;
|
|
encodedBlock = TRUE;
|
|
bestblockMode = blockMode;
|
|
}
|
|
|
|
// If we have achieved an error lower than the requirement threshold then just exit now
|
|
// Early out if we found we can compress with error below the quality threshold
|
|
if (m_errorThreshold > 0) {
|
|
if(bestError <= m_errorThreshold) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if(bestError < m_smallestError) {
|
|
m_smallestError = bestError;
|
|
}
|
|
if(bestError > m_largestError) {
|
|
m_largestError = bestError;
|
|
}
|
|
|
|
if (!encodedBlock) {
|
|
// return some sort of error and abort sequence!
|
|
encodedBlock = FALSE;
|
|
}
|
|
|
|
#ifdef BC7_DEBUG_TO_RESULTS_TXT
|
|
fclose(fp);
|
|
#endif
|
|
|
|
return bestError;
|
|
|
|
#ifdef BC7_DEBUG_TO_RESULTS_TXT
|
|
} else return (0);
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|