//=============================================================================== // Copyright (c) 2007-2016 Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2004-2006 ATI Technologies Inc. //=============================================================================== // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and / or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions : // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. // // // BC7_Encode.cpp : A reference encoder for BC7 // #include #include #include #include #include "common.h" #include "bc7_definitions.h" #include "bc7_partitions.h" #include "bc7_encode.h" #include "bc7_utils.h" #include "3dquant_vpc.h" #include "shake.h" #include "debug.h" //#ifdef USE_CMP_CORE_API //#include "bcn_common_kernel.h" //#include "bcn_common_api.h" //#include "bc7_encode_kernel.h" //#endif #ifdef BC7_COMPDEBUGGER #include "compclient.h" #endif #ifdef USE_FILEIO #include extern FILE * bc7_File; #endif // Threshold quality below which we will always run fast quality and shaking // Selfnote: User should be able to set this? // Default FQuality is at 0.1 < g_qFAST_THRESHOLD which will cause the SingleIndex compression to start skipping shape blocks // during compression // if user sets a value above this then all shapes will be used for compression scan for quality double g_qFAST_THRESHOLD = 0.5; // This limit is used for DualIndex Block and if fQuality is above this limit then Quantization shaking will always be performed // on all indexs double g_HIGHQULITY_THRESHOLD = 0.7; // // For a given block mode this sets up the data needed by the compressor // // Note that BC7 only uses NO_PBIT, ONE_PBIT and TWO_PBIT encodings // for endpoints // void BC7BlockEncoder::BlockSetup(CMP_DWORD blockMode) { #ifdef USE_DBGTRACE DbgTrace(()); #endif switch(bti_cpu[blockMode].pBitType) { case NO_PBIT: m_parityBits = CART; break; case ONE_PBIT: m_parityBits = SAME_PAR; break; case TWO_PBIT: m_parityBits = BCC; break; case THREE_PBIT: m_parityBits = SAME_FCC; break; case FOUR_PBIT: m_parityBits = FCC; break; case FIVE_PBIT: m_parityBits = FCC_SAME_BCC; break; } if(bti_cpu[blockMode].encodingType == NO_ALPHA) { m_componentBits[COMP_RED] = bti_cpu[blockMode].vectorBits/3; m_componentBits[COMP_GREEN] = bti_cpu[blockMode].vectorBits/3; m_componentBits[COMP_BLUE] = bti_cpu[blockMode].vectorBits/3; m_componentBits[COMP_ALPHA] = 0; m_clusters[0] = 1 << bti_cpu[blockMode].indexBits[0]; m_clusters[1] = 0; } else if(bti_cpu[blockMode].encodingType == COMBINED_ALPHA) { m_componentBits[COMP_RED] = bti_cpu[blockMode].vectorBits/4; m_componentBits[COMP_GREEN] = bti_cpu[blockMode].vectorBits/4; m_componentBits[COMP_BLUE] = bti_cpu[blockMode].vectorBits/4; m_componentBits[COMP_ALPHA] = bti_cpu[blockMode].vectorBits/4; m_clusters[0] = 1 << bti_cpu[blockMode].indexBits[0]; m_clusters[1] = 0; } else if(bti_cpu[blockMode].encodingType == SEPARATE_ALPHA) { m_componentBits[COMP_RED] = bti_cpu[blockMode].vectorBits/3; m_componentBits[COMP_GREEN] = bti_cpu[blockMode].vectorBits/3; m_componentBits[COMP_BLUE] = bti_cpu[blockMode].vectorBits/3; m_componentBits[COMP_ALPHA] = bti_cpu[blockMode].scalarBits; m_clusters[0] = 1 << bti_cpu[blockMode].indexBits[0]; m_clusters[1] = 1 << bti_cpu[blockMode].indexBits[1]; } } // // This function sorts out the bit encoding for the BC7 block and packs everything // in the right order for the hardware decoder // // // void BC7BlockEncoder::EncodeSingleIndexBlock(CMP_DWORD blockMode, CMP_DWORD partition, CMP_DWORD colour[MAX_SUBSETS][2], int indices[MAX_SUBSETS][MAX_SUBSET_SIZE], //CMP_DWORD entryCount[MAX_SUBSETS], CMP_BYTE block[COMPRESSED_BLOCK_SIZE]) { #ifdef USE_DBGTRACE DbgTrace(("-> WriteBit()")); #endif CMP_DWORD i,j,k; CMP_DWORD *partitionTable; int bitPosition = 0; // Position the pointer at the LSB CMP_BYTE *basePtr = (CMP_BYTE*)block; CMP_DWORD blockIndices[MAX_SUBSET_SIZE]; // Generate Unary header for(i=0; i < (int)blockMode; i++) { WriteBit(basePtr, bitPosition++, 0); } WriteBit(basePtr, bitPosition++, 1); // Write partition bits for(i=0; i>i) & 0x1); } // Extract the index bits from the partitions partitionTable = (CMP_DWORD*)BC7_PARTITIONS_CPU[bti_cpu[blockMode].subsetCount-1][partition]; CMP_DWORD idxCount[3] = {0, 0, 0}; bool flipColours[3] = {false, false, false}; // Sort out the index set and tag whether we need to flip the // endpoints to get the correct state in the implicit index bits // The implicitly encoded MSB of the fixup index must be 0 CMP_DWORD fixup[3] = {0, 0, 0}; switch(bti_cpu[blockMode].subsetCount) { case 3: fixup[1] = BC7_FIXUPINDICES[2][partition][1]; fixup[2] = BC7_FIXUPINDICES[2][partition][2]; break; case 2: fixup[1] = BC7_FIXUPINDICES[1][partition][1]; break; default: break; } // Extract indices and mark subsets that need to have their colours flipped to get the // right state for the implicit MSB of the fixup index for(i=0; i < MAX_SUBSET_SIZE; i++) { CMP_DWORD p = partitionTable[i]; blockIndices[i] = indices[p][idxCount[p]++]; for(j=0; j<(int)bti_cpu[blockMode].subsetCount; j++) { if(i==fixup[j]) { if(blockIndices[i] & (1<<(bti_cpu[blockMode].indexBits[0]-1))) { flipColours[j] = true; } } } } // Now we must flip the endpoints where necessary so that the implicitly encoded // index bits have the correct state for(i=0; i<(int)bti_cpu[blockMode].subsetCount; i++) { if(flipColours[i]) { CMP_DWORD temp; temp = colour[i][0]; colour[i][0] = colour[i][1]; colour[i][1] = temp; } } // ...next flip the indices where necessary for(i=0; i>= 1; packedColours[1] >>= 1; } else if(bti_cpu[blockMode].pBitType == ONE_PBIT) { parityBits[i][0] = packedColours[1] & 1; parityBits[i][1] = packedColours[1] & 1; packedColours[0] >>= 1; packedColours[1] >>= 1; } else { parityBits[i][0] = 0; parityBits[i][1] = 0; } CMP_DWORD component1; for(component1=0; component1>= m_componentBits[component1]; packedColours[1] >>= m_componentBits[component1]; } } } // Loop over components for(component=0; component < MAX_DIMENSION_BIG; component++) { // loop over subsets for(subset=0; subset<(int)bti_cpu[blockMode].subsetCount; subset++) { // Loop over endpoints and write colour bits for(ep=0; ep<2; ep++) { // Write this component for(k = 0; k < m_componentBits[component]; k++) { WriteBit(basePtr, bitPosition++, (CMP_BYTE)(unpackedColours[subset][ep][component] >> k) & 0x1); } } } } // Now write parity bits if present if(bti_cpu[blockMode].pBitType != NO_PBIT) { for(subset=0; subset<(int)bti_cpu[blockMode].subsetCount; subset++) { if(bti_cpu[blockMode].pBitType == ONE_PBIT) { WriteBit(basePtr, bitPosition++, parityBits[subset][0] & 1); } else if(bti_cpu[blockMode].pBitType == TWO_PBIT) { WriteBit(basePtr, bitPosition++, parityBits[subset][0] & 1); WriteBit(basePtr, bitPosition++, parityBits[subset][1] & 1); } } } // Now encode the index bits for(i=0; i>j)); } } else { for(j=0; j>j)); } } } // Check that we encoded exactly the right number of bits if(bitPosition != (COMPRESSED_BLOCK_SIZE * 8)) { #ifdef USE_DBGTRACE DbgTrace(("Error:Encoded incorrect number of bits")); #endif return; } #ifdef USE_DBGTRACE DbgTrace(("OUTPUT [%2x,%2x,%2x,%2x,%2x,%2x,%2x,%2x,%2x,%2x,%2x,%2x,%2x,%2x,%2x,%2x]", block[ 0],block[ 1],block[ 2],block[ 3], block[ 4],block[ 5],block[ 6],block[ 7], block[ 8],block[ 9],block[10],block[11], block[12],block[13],block[14],block[15])); #endif } // // This routine can be used to compress a block to any of the modes with a shared index set // // It will encode the best result for this mode into a BC7 block // // // // For debugging this is a no color 4x4 BC7 block //BYTE BlankBC7Block[16] = { 0x40, 0xC0, 0x1F, 0xF0, 0x07, 0xFC, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; double BC7BlockEncoder::CompressSingleIndexBlock( double in[MAX_SUBSET_SIZE][MAX_DIMENSION_BIG], CMP_BYTE out[COMPRESSED_BLOCK_SIZE], CMP_DWORD blockMode) { #ifdef USE_DBGTRACE DbgTrace(("<---------CompressSingleIndexBlock----------->")); #endif CMP_DWORD i, k, n; CMP_DWORD dimension; // Figure out the effective dimension of this block mode if(bti_cpu[blockMode].encodingType == NO_ALPHA) { dimension = 3; } else { dimension = 4; } CMP_DWORD numPartitionModes = 1 << bti_cpu[blockMode].partitionBits; CMP_DWORD partitionsToTry = numPartitionModes; // Linearly reduce the number of partitions to try as the quality falls below a threshold if(m_quality < g_qFAST_THRESHOLD) { partitionsToTry = (CMP_DWORD)floor((double)(partitionsToTry * m_partitionSearchSize) + 0.5); partitionsToTry = cmp_minT(numPartitionModes, cmp_maxT(1, partitionsToTry)); } CMP_DWORD blockPartition; double partition[MAX_SUBSETS][MAX_SUBSET_SIZE][MAX_DIMENSION_BIG]; CMP_DWORD entryCount[MAX_SUBSETS]; CMP_DWORD subset; #ifdef BC7_DEBUG_TO_RESULTS_TXT fprintf(fp,"\CompressSingleIndexBlock\n"); fprintf(fp,"blockMode = %d\n",blockMode); fprintf(fp,"numPartitionModes = %d\n",numPartitionModes); fprintf(fp,"partitionsToTry = %d\n",partitionsToTry); fprintf(fp,"m_blockMaxRange = %4.0f\n",m_blockMaxRange); fprintf(fp,"m_quantizerRangeThreshold = %4.0f\n",m_quantizerRangeThreshold); fprintf(fp,"m_clusters[0] = %d\n",m_clusters[0]); #endif #ifdef USE_DBGTRACE DbgTrace(("blockMode [%d] numPartitionModes [%d] partitionsToTry [%2d]", blockMode, numPartitionModes, partitionsToTry)); DbgTrace((" m_blockMaxRange [%2d] m_quantizerRangeThreshold [%4.0f] m_clusters[0] = %d", m_blockMaxRange, m_quantizerRangeThreshold, m_clusters[0])); #endif // Loop over the available partitions for the block mode and quantize them // to figure out the best candidates for further refinement for(blockPartition = 0; blockPartition < partitionsToTry; blockPartition++) { Partition(blockPartition, in, partition, entryCount, blockMode, dimension); double error = 0.; double outB[MAX_SUBSET_SIZE][MAX_DIMENSION_BIG]; double direction[MAX_DIMENSION_BIG]; double step; for(subset=0; subset < bti_cpu[blockMode].subsetCount; subset++) { int indices[MAX_SUBSETS][MAX_SUBSET_SIZE]; if(entryCount[subset]) { if((m_clusters[0] > 8) || (m_blockMaxRange <= m_quantizerRangeThreshold)) { #ifdef BC7_DEBUG_TO_RESULTS_TXT fprintf(fp,"\noptQuantAnD_d\n"); #endif error += optQuantAnD_d(partition[subset], entryCount[subset], m_clusters[0], indices[subset], outB, direction, &step, dimension); #ifdef BC7_DEBUG_TO_RESULTS_TXT if (blockPartition == 11) { fprintf(fp,"\n"); for (int row=0; row<16; row++) { fprintf(fp,"partition[%2d] = %4.2f, %4.2f, %4.2f\n",row,partition[subset][row][0],partition[subset][row][1],partition[subset][row][2]); } fprintf(fp,"\n"); for (int row=0; row<16; row++) { fprintf(fp,"indices[0][%2d] = %4.2f\n",row,indices[0][row]); } fprintf(fp,"\n"); for (int row=0; row<16; row++) { fprintf(fp,"outB[%2d] = %4.2f, %4.2f, %4.2f\n",row,outB[row][0],outB[row][1],outB[row][2]); } fprintf(fp,"\n"); fprintf(fp,"entryCount = %d\n",entryCount[subset]); fprintf(fp,"m_clusters[0] = %d\n",m_clusters[0]); fprintf(fp,"Direction = %4.2f, %4.2f, %4.2f\n",direction[0],direction[1],direction[2]); fprintf(fp,"step = %4.2f\n",step); fprintf(fp,"dimension = %4.2f\n",dimension); fprintf(fp,"error = %4.2f\n",error); } #endif } else { #ifdef BC7_DEBUG_TO_RESULTS_TXT fprintf(fp,"\optQuantTrace_d\n"); #endif error += optQuantTrace_d(partition[subset], entryCount[subset], m_clusters[0], indices[subset], outB, direction, &step, dimension); #ifdef BC7_DEBUG_TO_RESULTS_TXT if (blockPartition == 11) { fprintf(fp,"\n"); for (int row=0; row<16; row++) { fprintf(fp,"partition[%2d] = %4.2f, %4.2f, %4.2f\n",row,partition[subset][row][0],partition[subset][row][1],partition[subset][row][2]); } fprintf(fp,"\n"); for (int row=0; row<16; row++) { fprintf(fp,"indices[0][%2d] = %4.2f\n",row,indices[0][row]); } fprintf(fp,"\n"); for (int row=0; row<16; row++) { fprintf(fp,"outB[%2d] = %4.2f, %4.2f, %4.2f\n",row,outB[row][0],outB[row][1],outB[row][2]); } fprintf(fp,"\n"); fprintf(fp,"entryCount = %d\n",entryCount[subset]); fprintf(fp,"m_clusters[0] = %d\n",m_clusters[0]); fprintf(fp,"Direction = %4.2f, %4.2f, %4.2f\n",direction[0],direction[1],direction[2]); fprintf(fp,"step = %4.2f\n",step); fprintf(fp,"dimension = %4.2f\n",dimension); fprintf(fp,"error = %4.2f\n",error); } #endif } // Store off the indices for later for(CMP_DWORD idx=0; idx < entryCount[subset]; idx++) { m_storedIndices[blockPartition][subset][idx] = indices[subset][idx]; } } } m_storedError[blockPartition] = error; } // Sort the results sortProjection(m_storedError, m_sortedModes, partitionsToTry); // Run shaking (endpoint refinement) pass for partitions that gave the // best set of errors from quantization // ep_shaker will take its endpoint information from bits[0-2] // ep_shaker_2_d will take its information from bits[3] int bits[4] = {0,0,0,0}; // ep_shaker_d needs bits specified individually per channel including parity bits[0] = m_componentBits[COMP_RED] + (m_parityBits ? 1:0); bits[1] = m_componentBits[COMP_GREEN] + (m_parityBits ? 1:0); bits[2] = m_componentBits[COMP_BLUE] + (m_parityBits ? 1:0); // ep_shaker_2_d needs bits specified as total bits for both endpoints including parity for(i=0; i < dimension; i++) { bits[3] += m_componentBits[i]; } bits[3] *= 2; if(m_parityBits == BCC) { bits[3] += 2; } else if (m_parityBits == SAME_PAR) { bits[3] += 1; } int epo_code[MAX_SUBSETS][2][MAX_DIMENSION_BIG]; double epo[2][MAX_DIMENSION_BIG]; double outB[MAX_SUBSET_SIZE][MAX_DIMENSION_BIG]; int bestEndpoints[MAX_SUBSETS][2][MAX_DIMENSION_BIG]; int bestIndices[MAX_SUBSETS][MAX_SUBSET_SIZE]; CMP_DWORD bestEntryCount[MAX_SUBSETS]; CMP_DWORD bestPartition = 0; double bestError = DBL_MAX; // Extensive shaking is most important when the ramp is short, and // when we have less indices. On a long ramp the quality of the // initial quantizing is relatively more important // We modulate the shake size according to the number of ramp indices // - the more indices we have the less shaking should be required to find a near // optimal match // shakeSize gives the size of the shake cube (for ep_shaker_2_d) // ep_shaker always runs on a 1x1x1 cube on both endpoints CMP_DWORD shakeSize = 8 - (CMP_DWORD)floor(1.5 * bti_cpu[blockMode].indexBits[0]); shakeSize = cmp_maxT(2, cmp_minT((CMP_DWORD)floor(shakeSize * m_quality + 0.5), 6)); // Shake attempts indicates how many partitions to try to shake CMP_DWORD numShakeAttempts = cmp_maxT(1, cmp_minT((CMP_DWORD)floor(8 * m_quality + 0.5), partitionsToTry)); // Set up all the parameters for the shakers // Must increase shake size if these block endpoints use parity if((m_parityBits == SAME_PAR) || (m_parityBits == BCC)) { shakeSize += 2; } #ifdef USE_DBGTRACE DbgTrace(("%2d numPartitionModes %2d SearchSize %3.3f shakeSize %2d numShakeAttempts %2d\n", partitionsToTry, numPartitionModes, m_partitionSearchSize, shakeSize, numShakeAttempts)); #endif // Now do the endpoint shaking for(i=0; i < numShakeAttempts; i++) { double error = 0; blockPartition = m_sortedModes[i]; Partition(blockPartition, in, partition, entryCount, blockMode, dimension); for(subset=0; subset < bti_cpu[blockMode].subsetCount; subset++) { if(entryCount[subset]) { // If quality is set low or the dimension is not compatible with // shaker_d then just run shaker_2_d if((m_blockMaxRange > m_shakerRangeThreshold) || (dimension != 3)) { error += ep_shaker_2_d(partition[subset], entryCount[subset], m_storedIndices[blockPartition][subset], outB, epo_code[subset], shakeSize, m_clusters[0]-1, bits[3], dimension, epo); } else { double tempError[2]; int tempIndices[MAX_SUBSET_SIZE]; int temp_epo_code[2][MAX_DIMENSION_BIG]; // Step one - run ep_shaker and ep_shaker_2 in parallel, and get the error from each for(k=0; k < entryCount[subset]; k++) { tempIndices[k] = m_storedIndices[blockPartition][subset][k]; } tempError[0] = ep_shaker_d(partition[subset], entryCount[subset], tempIndices, outB, temp_epo_code, m_clusters[0]-1, bits, (CMP_qt_cpu)m_parityBits, dimension); tempError[1] = ep_shaker_2_d(partition[subset], entryCount[subset], m_storedIndices[blockPartition][subset], outB, epo_code[subset], shakeSize, m_clusters[0]-1, bits[3], dimension, epo); if(tempError[0] < tempError[1]) { // If ep_shaker did better than ep_shaker_2 then we need to reshake // the output from ep_shaker using ep_shaker_2 for further refinement tempError[1] = ep_shaker_2_d(partition[subset], entryCount[subset], tempIndices, outB, temp_epo_code, shakeSize, m_clusters[0]-1, bits[3], dimension, epo); // Copy the results into the expected location for(k=0; k 0) { if(bestError <= m_errorThreshold) { break; } } } // Now we have all the data needed to encode the block // We need to pack the endpoints prior to encoding CMP_DWORD packedEndpoints[3][2]; for(subset=0; subset>= 1; bestEndpoints[subset][1][k] >>= 1; } rightAlignment++; } // Fixup endpoints for(k=0; k WriteBit()")); #endif CMP_DWORD i,j,k; int bitPosition = 0; // Position the pointer at the LSB CMP_BYTE *basePtr = out; CMP_DWORD idxBits[2]; CMP_BOOL swapIndices; // Generate Unary header for this mode for(i=0; i>i) & 0xff)); } // Write index selector bits for(i=0; i> k) & 0x1)); } } } else { for(ep=0; ep<2; ep++) { for(j=0; j> j) & 0x1)); } } } } // Now encode the index bits for(i=0; i<2; i++) { CMP_DWORD idxSelect = i; if(swapIndices) { idxSelect = i ^ 1; } for(j=0; j>k)); } } else { for(k=0; k>k)); } } } } // Check that we encoded exactly the right number of bits if(bitPosition != (COMPRESSED_BLOCK_SIZE * 8)) { return; } #ifdef USE_DBGTRACE DbgTrace(("OUTPUT [%2x,%2x,%2x,%2x,%2x,%2x,%2x,%2x,%2x,%2x,%2x,%2x,%2x,%2x,%2x,%2x]", out[ 0],out[ 1],out[ 2],out[ 3], out[ 4],out[ 5],out[ 6],out[ 7], out[ 8],out[ 9],out[10],out[11], out[12],out[13],out[14],out[15])); #endif } double BC7BlockEncoder::CompressDualIndexBlock(double in[MAX_SUBSET_SIZE][MAX_DIMENSION_BIG], CMP_BYTE out[COMPRESSED_BLOCK_SIZE], CMP_DWORD blockMode) { #ifdef USE_DBGTRACE DbgTrace(("<---------CompressDualIndexBlock----------->")); #endif CMP_DWORD i; double cBlock[MAX_SUBSET_SIZE][MAX_DIMENSION_BIG]; double aBlock[MAX_SUBSET_SIZE][MAX_DIMENSION_BIG]; CMP_DWORD maxRotation = 1 << bti_cpu[blockMode].rotationBits; CMP_DWORD rotation; CMP_DWORD maxIndexSelection = 1 << bti_cpu[blockMode].indexModeBits; CMP_DWORD indexSelection; int indices[2][MAX_SUBSET_SIZE]; double outQ[2][MAX_SUBSET_SIZE][MAX_DIMENSION_BIG]; double direction[MAX_DIMENSION_BIG]; double step; double quantizerError; double bestQuantizerError = DBL_MAX; double overallError; double bestOverallError = DBL_MAX; #ifdef BC7_DEBUG_TO_RESULTS_TXT fprintf(fp,"\nCompressDualIndexBlock\n"); fprintf(fp,"blockMode = %d\n",blockMode); fprintf(fp,"maxIndexSelection = %d\n",maxIndexSelection); fprintf(fp,"maxRotation = %d\n",maxRotation); fprintf(fp,"m_blockMaxRange = %4.0f\n",m_blockMaxRange); fprintf(fp,"m_quantizerRangeThreshold = %4.0f\n",m_quantizerRangeThreshold); #endif // Go through each possible rotation and selection of indices for(rotation = 0; rotation < maxRotation; rotation++) { // A for(i=0; i g_HIGHQULITY_THRESHOLD) || (quantizerError <= bestQuantizerError)) { // Shake size gives the size of the shake cube CMP_DWORD shakeSize; shakeSize = cmp_maxT(2, cmp_minT((CMP_DWORD)(6 * m_quality), 6)); int bits[2][4]; // Specify number of bits for vector block bits[0][COMP_RED] = m_componentBits[COMP_RED]; bits[0][COMP_GREEN] = m_componentBits[COMP_GREEN]; bits[0][COMP_BLUE] = m_componentBits[COMP_BLUE]; bits[0][3] = 2 * (m_componentBits[COMP_RED] + m_componentBits[COMP_GREEN] + m_componentBits[COMP_BLUE]); // Specify number of bits for scalar block bits[1][0] = m_componentBits[COMP_ALPHA]; bits[1][1] = m_componentBits[COMP_ALPHA]; bits[1][2] = m_componentBits[COMP_ALPHA]; bits[1][3] = 6 * m_componentBits[COMP_ALPHA]; overallError = 0; int epo_code[2][2][MAX_DIMENSION_BIG]; double epo[2][MAX_DIMENSION_BIG]; if(m_blockMaxRange > m_shakerRangeThreshold) { overallError += ep_shaker_2_d(cBlock, MAX_SUBSET_SIZE, indices[0], outQ[0], epo_code[0], shakeSize, (1 << bti_cpu[blockMode].indexBits[0 ^ indexSelection])-1, bits[0][3], 3, epo); } else { ep_shaker_d(cBlock, MAX_SUBSET_SIZE, indices[0], outQ[0], epo_code[0], (1 << bti_cpu[blockMode].indexBits[0 ^ indexSelection])-1, bits[0], (CMP_qt_cpu)0, 3); overallError += ep_shaker_2_d(cBlock, MAX_SUBSET_SIZE, indices[0], outQ[0], epo_code[0], shakeSize, (1 << bti_cpu[blockMode].indexBits[0 ^ indexSelection])-1, bits[0][3], 3, epo); } if(m_blockMaxRange > m_shakerRangeThreshold) { overallError += ep_shaker_2_d(aBlock, MAX_SUBSET_SIZE, indices[1], outQ[1], epo_code[1], shakeSize, (1 << bti_cpu[blockMode].indexBits[1 ^ indexSelection])-1, bits[1][3], 3, epo) / 3.; } else { ep_shaker_d(aBlock, MAX_SUBSET_SIZE, indices[1], outQ[1], epo_code[1], (1 << bti_cpu[blockMode].indexBits[1 ^ indexSelection])-1, bits[1], (CMP_qt_cpu)0, 3); overallError += ep_shaker_2_d(aBlock, MAX_SUBSET_SIZE, indices[1], outQ[1], epo_code[1], shakeSize, (1 << bti_cpu[blockMode].indexBits[1 ^ indexSelection])-1, bits[1][3], 3, epo) / 3.; } // If we beat the previous best then encode the block if(overallError < bestOverallError) { EncodeDualIndexBlock(blockMode, indexSelection, rotation, epo_code, indices, out); bestOverallError = overallError; } if(quantizerError < bestQuantizerError) { bestQuantizerError = quantizerError; } } } // B } // A return bestOverallError; } // // This routine compresses a block and returns the RMS error // // // // #include double BC7BlockEncoder::CompressBlock(double in[MAX_SUBSET_SIZE][MAX_DIMENSION_BIG], CMP_BYTE out[COMPRESSED_BLOCK_SIZE]) { #ifdef USE_DBGTRACE DbgTrace(()); #endif CMP_DWORD i, j; CMP_BOOL blockNeedsAlpha = FALSE; CMP_BOOL blockAlphaZeroOne = FALSE; CMP_DWORD validModeMask = m_validModeMask; CMP_BOOL encodedBlock = FALSE; #ifdef USE_CMP_BC7_CORE if (m_performance < 0.01) { // prototype code for next revision, currently accessible only through SDK unsigned char srcBlock[64]; int px=0; for (i=0; i<16; i++) { srcBlock[px++] = (unsigned char)(in[i][0]); srcBlock[px++] = (unsigned char)(in[i][1]); srcBlock[px++] = (unsigned char)(in[i][2]); srcBlock[px++] = (unsigned char)(in[i][3]); } CompressBlockBC7(srcBlock,16,out); return 0.0f; } #endif #ifdef BC7_DEBUG_TO_RESULTS_TXT fp = fopen("debugdata.txt","w"); if (fp) { fprintf(fp,"Data INPUT\n"); double data[16][4]; memcpy(data,in,sizeof(data)); for (int row=0; row<16; row++) fprintf(fp,"%4.0f, %4.0f, %4.0f\n", data[row][0],data[row][1],data[row][2]); #endif for(i=0; i m_blockMax[j]) ? in[i][j] : m_blockMax[j]; } } #ifdef BC7_DEBUG_TO_RESULTS_TXT fprintf(fp,"m_blockMin[0] = %4.2f\n",m_blockMin[0]); fprintf(fp,"m_blockMin[1] = %4.2f\n",m_blockMin[1]); fprintf(fp,"m_blockMin[2] = %4.2f\n",m_blockMin[2]); fprintf(fp,"m_blockMin[3] = %4.2f\n\n",m_blockMin[3]); fprintf(fp,"m_blockMax[0] = %4.2f\n",m_blockMax[0]); fprintf(fp,"m_blockMax[1] = %4.2f\n",m_blockMax[1]); fprintf(fp,"m_blockMax[2] = %4.2f\n",m_blockMax[2]); fprintf(fp,"m_blockMax[3] = %4.2f\n\n",m_blockMax[3]); #endif m_blockRange[0] = m_blockMax[0] - m_blockMin[0]; m_blockRange[1] = m_blockMax[1] - m_blockMin[1]; m_blockRange[2] = m_blockMax[2] - m_blockMin[2]; m_blockRange[3] = m_blockMax[3] - m_blockMin[3]; m_blockMaxRange = cmp_maxT(m_blockRange[0], m_blockRange[1]); m_blockMaxRange = cmp_maxT(m_blockMaxRange, m_blockRange[2]); m_blockMaxRange = cmp_maxT(m_blockMaxRange, m_blockRange[3]); #ifdef BC7_DEBUG_TO_RESULTS_TXT fprintf(fp,"m_blockRange[0] = %4.2f\n",m_blockRange[0]); fprintf(fp,"m_blockRange[1] = %4.2f\n",m_blockRange[1]); fprintf(fp,"m_blockRange[2] = %4.2f\n",m_blockRange[2]); fprintf(fp,"m_blockRange[3] = %4.2f\n",m_blockRange[3]); fprintf(fp,"m_blockMaxRange = %4.2f\n\n",m_blockMaxRange); fprintf(fp,"=========================================\n"); #endif // Initial loop - go through the block modes and get the ones that are valid for(CMP_DWORD blockMode=0; blockMode < NUM_BLOCK_TYPES; blockMode++) { // Check if this mode is allowed based on the global settings if(!(validModeMask & (1 << blockMode))) { continue; } // If the block needs Alpha and this mode doesn't support alpha then // indicate that this is not a valid mode and continue if((blockNeedsAlpha == TRUE) && (bti_cpu[blockMode].encodingType == NO_ALPHA)) { validModeMask &= ~(1< 0) { if(bestError <= m_errorThreshold) { break; } } } if(bestError < m_smallestError) { m_smallestError = bestError; } if(bestError > m_largestError) { m_largestError = bestError; } if (!encodedBlock) { // return some sort of error and abort sequence! encodedBlock = FALSE; } #ifdef BC7_DEBUG_TO_RESULTS_TXT fclose(fp); #endif return bestError; #ifdef BC7_DEBUG_TO_RESULTS_TXT } else return (0); #endif }