760 lines
38 KiB
C++

//===============================================================================
// Copyright (c) 2014-2016 Advanced Micro Devices, Inc. All rights reserved.
//===============================================================================
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//
//
// BC6H_Decode.cpp : Decoder for BC6H
//
// Revision
// 0.1 First implementation
//
#include <assert.h>
#include "debug.h"
#include "common.h"
#include "hdr_encode.h"
#include "bc6h_definitions.h"
#include "bc6h_decode.h"
#include "bc6h_utils.h"
#include <bitset>
#include <stddef.h>
#ifdef TEST_CMP_CORE_DECODER
#include "cmp_core.h"
#endif
#ifdef BC6H_DECODE_DEBUG
int g_dblock = 0;
#endif
/*using namespace std;*/
using namespace HDR_Encode;
float dec_red_out[MAX_SUBSET_SIZE][MAX_DIMENSION_BIG] = {0};
//---------------------------------------------------------------------------------------------------------------------------------------
// Need improve or use alternate implementation: These segments are based on NV code and need to be redone
//---------------------------------------------------------------------------------------------------------------------------------------
#define SIGN_EXTEND(w,tbits) ((((signed(w))&(1<<((tbits)-1)))?((~0)<<(tbits)):0)|(signed(w)))
#define MASK(n) ((1<<(n))-1)
// NV code : used with modifications
void extract_compressed_endpoints2(AMD_BC6H_Format& bc6h_format) {
int i;
int t;
if (bc6h_format.issigned) {
if (bc6h_format.istransformed) {
for (i=0; i<NCHANNELS; i++) {
bc6h_format.E[0].A[i] = SIGN_EXTEND(bc6h_format.EC[0].A[i],bc6h_format.wBits);
t = SIGN_EXTEND(bc6h_format.EC[0].B[i], bc6h_format.tBits[i]); // C_RED
t = (t + bc6h_format.EC[0].A[i]) & MASK(bc6h_format.wBits);
bc6h_format.E[0].B[i] = SIGN_EXTEND(t,bc6h_format.wBits);
t = SIGN_EXTEND(bc6h_format.EC[1].A[i], bc6h_format.tBits[i]); //C_GREEN
t = (t + bc6h_format.EC[0].A[i]) & MASK(bc6h_format.wBits);
bc6h_format.E[1].A[i] = SIGN_EXTEND(t,bc6h_format.wBits);
t = SIGN_EXTEND(bc6h_format.EC[1].B[i], bc6h_format.tBits[i]); //C_BLUE
t = (t + bc6h_format.EC[0].A[i]) & MASK(bc6h_format.wBits);
bc6h_format.E[1].B[i] = SIGN_EXTEND(t,bc6h_format.wBits);
}
} else {
for (i=0; i<NCHANNELS; i++) {
bc6h_format.E[0].A[i] = SIGN_EXTEND(bc6h_format.EC[0].A[i],bc6h_format.wBits);
bc6h_format.E[0].B[i] = SIGN_EXTEND(bc6h_format.EC[0].B[i],bc6h_format.tBits[i]); //C_RED
bc6h_format.E[1].A[i] = SIGN_EXTEND(bc6h_format.EC[1].A[i],bc6h_format.tBits[i]); //C_GREEN
bc6h_format.E[1].B[i] = SIGN_EXTEND(bc6h_format.EC[1].B[i],bc6h_format.tBits[i]); //C_BLUE
}
}
} else {
if (bc6h_format.istransformed) {
for (i=0; i<NCHANNELS; i++) {
bc6h_format.E[0].A[i] = bc6h_format.EC[0].A[i];
t = SIGN_EXTEND(bc6h_format.EC[0].B[i], bc6h_format.tBits[i]); // C_RED
bc6h_format.E[0].B[i] = (t + bc6h_format.EC[0].A[i]) & MASK(bc6h_format.wBits);
t = SIGN_EXTEND(bc6h_format.EC[1].A[i], bc6h_format.tBits[i]); // C_GREEN
bc6h_format.E[1].A[i] = (t + bc6h_format.EC[0].A[i]) & MASK(bc6h_format.wBits);
t = SIGN_EXTEND(bc6h_format.EC[1].B[i], bc6h_format.tBits[i]); //C_BLUE
bc6h_format.E[1].B[i] = (t + bc6h_format.EC[0].A[i]) & MASK(bc6h_format.wBits);
}
} else {
for (i=0; i<NCHANNELS; i++) {
bc6h_format.E[0].A[i] = bc6h_format.EC[0].A[i];
bc6h_format.E[0].B[i] = bc6h_format.EC[0].B[i];
bc6h_format.E[1].A[i] = bc6h_format.EC[1].A[i];
bc6h_format.E[1].B[i] = bc6h_format.EC[1].B[i];
}
}
}
}
void extract_compressed_endpoints(AMD_BC6H_Format& bc6h_format) {
int i;
int t;
if (bc6h_format.issigned) {
if (bc6h_format.istransformed) {
for (i=0; i<NCHANNELS; i++) {
bc6h_format.E[0].A[i] = SIGN_EXTEND(bc6h_format.EC[0].A[i],bc6h_format.wBits);
t = SIGN_EXTEND(bc6h_format.EC[0].B[i], bc6h_format.tBits[i]); //C_RED
t = (t + bc6h_format.EC[0].A[i]) & MASK(bc6h_format.wBits);
bc6h_format.E[0].B[i] = SIGN_EXTEND(t,bc6h_format.wBits);
}
} else {
for (i=0; i<NCHANNELS; i++) {
bc6h_format.E[0].A[i] = SIGN_EXTEND(bc6h_format.EC[0].A[i],bc6h_format.wBits);
bc6h_format.E[0].B[i] = SIGN_EXTEND(bc6h_format.EC[0].B[i],bc6h_format.tBits[i]); //C_RED
}
}
} else {
if (bc6h_format.istransformed) {
for (i=0; i<NCHANNELS; i++) {
bc6h_format.E[0].A[i] = bc6h_format.EC[0].A[i];
t = SIGN_EXTEND(bc6h_format.EC[0].B[i], bc6h_format.tBits[i]); //C_RED
bc6h_format.E[0].B[i] = (t + bc6h_format.EC[0].A[i]) & MASK(bc6h_format.wBits);
}
} else {
for (i=0; i<NCHANNELS; i++) {
bc6h_format.E[0].A[i] = bc6h_format.EC[0].A[i];
bc6h_format.E[0].B[i] = bc6h_format.EC[0].B[i];
}
}
}
}
// NV code: Used with modifcations
int unquantize(AMD_BC6H_Format& bc6h_format, int q, int prec) {
int unq = 0, s;
switch (bc6h_format.format) {
// modify this case to move the multiplication by 31 after interpolation.
// Need to use finish_unquantize.
// since we have 16 bits available, let's unquantize this to 16 bits unsigned
// thus the scale factor is [0-7c00)/[0-10000) = 31/64
case UNSIGNED_F16:
if (prec >= 15)
unq = q;
else if (q == 0)
unq = 0;
else if (q == ((1<<prec)-1))
unq = U16MAX;
else
unq = (q * (U16MAX+1) + (U16MAX+1)/2) >> prec;
break;
// here, let's stick with S16 (no apparent quality benefit from going to S17)
// range is (-7c00..7c00)/(-8000..8000) = 31/32
case SIGNED_F16:
// don't remove this test even though it appears equivalent to the code below
// as it isn't -- the code below can overflow for prec = 16
if (prec >= 16)
unq = q;
else {
if (q < 0) {
s = 1;
q = -q;
} else s = 0;
if (q == 0)
unq = 0;
else if (q >= ((1<<(prec-1))-1))
unq = s ? -S16MAX : S16MAX;
else {
unq = (q * (S16MAX+1) + (S16MAX+1)/2) >> (prec-1);
if (s)
unq = -unq;
}
}
break;
}
return unq;
}
int lerp(int a, int b, int i, int denom) {
assert (denom == 3 || denom == 7 || denom == 15);
assert (i >= 0 && i <= denom);
int shift = 6, *weights = NULL;
switch(denom) {
case 3:
denom *= 5;
i *= 5; // fall through to case 15
case 15:
weights = g_aWeights4;
break;
case 7:
weights = g_aWeights3;
break;
default:
assert(0);
}
#pragma warning(disable:4244)
// no need to round these as this is an exact division
return (int)(a*weights[denom-i] +b*weights[i]) / float(1 << shift);
}
int finish_unquantize(AMD_BC6H_Format bc6h_format, int q) {
if (bc6h_format.format == UNSIGNED_F16)
return (q * 31) >> 6; // scale the magnitude by 31/64
else if (bc6h_format.format == SIGNED_F16)
return (q < 0) ? -(((-q) * 31) >> 5) : (q * 31) >> 5; // scale the magnitude by 31/32
else
return q;
}
void generate_palette_quantized(int max, AMD_BC6H_Format& bc6h_format, int region) {
// scale endpoints
int a, b, c; // really need a IntVec3...
a = unquantize(bc6h_format, bc6h_format.E[region].A[0], bc6h_format.wBits);
b = unquantize(bc6h_format, bc6h_format.E[region].B[0], bc6h_format.wBits);
// interpolate : This part of code is used for debuging data
for (int i = 0; i < max; i++) {
c = finish_unquantize(bc6h_format, lerp(a, b, i, max-1));
bc6h_format.Palete[region][i].x = c;
}
a = unquantize(bc6h_format, bc6h_format.E[region].A[1], bc6h_format.wBits);
b = unquantize(bc6h_format, bc6h_format.E[region].B[1], bc6h_format.wBits);
// interpolate
for (int i = 0; i < max; i++)
bc6h_format.Palete[region][i].y = finish_unquantize(bc6h_format, lerp(a, b, i, max-1));
a = unquantize(bc6h_format,bc6h_format.E[region].A[2], bc6h_format.wBits);
b = unquantize(bc6h_format,bc6h_format.E[region].B[2], bc6h_format.wBits);
// interpolate
for (int i = 0; i < max; i++)
bc6h_format.Palete[region][i].z = finish_unquantize(bc6h_format, lerp(a, b, i, max-1));
}
AMD_BC6H_Format extract_format(BYTE in[COMPRESSED_BLOCK_SIZE]) {
AMD_BC6H_Format bc6h_format;
unsigned short decvalue;
BYTE iData[COMPRESSED_BLOCK_SIZE];
memcpy(iData,in,COMPRESSED_BLOCK_SIZE);
memset(&bc6h_format,0,sizeof(AMD_BC6H_Format));
// 2 bit mode has Mode bit:2 = 0 and mode bits:1 = 0 or 1
// 5 bit mode has Mode bit:2 = 1
if ((in[0]&0x02) > 0) {
decvalue = (in[0]&0x1F); // first five bits
} else {
decvalue = (in[0]&0x01); // first two bits
}
BitHeader header(in,16);
switch (decvalue) {
case 0x00:
bc6h_format.m_mode = 1; // 10:5:5:5
bc6h_format.wBits = 10;
bc6h_format.tBits[C_RED] = 5;
bc6h_format.tBits[C_GREEN] = 5;
bc6h_format.tBits[C_BLUE] = 5;
bc6h_format.rw = header.getvalue(5,10); // 10: rw[9:0]
bc6h_format.rx = header.getvalue(35,5); // 5: rx[4:0]
bc6h_format.ry = header.getvalue(65,5); // 5: ry[4:0]
bc6h_format.rz = header.getvalue(71,5); // 5: rz[4:0]
bc6h_format.gw = header.getvalue(15,10); // 10: gw[9:0]
bc6h_format.gx = header.getvalue(45,5); // 5: gx[4:0]
bc6h_format.gy = header.getvalue(41,4) | // 5: gy[3:0]
(header.getvalue(2,1) << 4); // gy[4]
bc6h_format.gz = header.getvalue(51,4) | // 5: gz[3:0]
(header.getvalue(40,1) << 4); // gz[4]
bc6h_format.bw = header.getvalue(25,10); // 10: bw[9:0]
bc6h_format.bx = header.getvalue(55,5); // 5: bx[4:0]
bc6h_format.by = header.getvalue(61,4) | // 5: by[3:0]
(header.getvalue(3,1) << 4); // by[4]
bc6h_format.bz = header.getvalue(50,1) | // 5: bz[0]
(header.getvalue(60,1) << 1) | // bz[1]
(header.getvalue(70,1) << 2) | // bz[2]
(header.getvalue(76,1) << 3) | // bz[3]
(header.getvalue(4,1) << 4); // bz[4]
break;
case 0x01:
bc6h_format.m_mode = 2; // 7:6:6:6
bc6h_format.wBits = 7;
bc6h_format.tBits[C_RED] = 6;
bc6h_format.tBits[C_GREEN] = 6;
bc6h_format.tBits[C_BLUE] = 6;
bc6h_format.rw = header.getvalue(5,7); // 7: rw[6:0]
bc6h_format.rx = header.getvalue(35,6); // 6: rx[5:0]
bc6h_format.ry = header.getvalue(65,6); // 6: ry[5:0]
bc6h_format.rz = header.getvalue(71,6); // 6: rz[5:0]
bc6h_format.gw = header.getvalue(15,7); // 7: gw[6:0]
bc6h_format.gx = header.getvalue(45,6); // 6: gx[5:0]
bc6h_format.gy = header.getvalue(41,4) | // 6: gy[3:0]
(header.getvalue(24,1) << 4) | // gy[4]
(header.getvalue(2,1) << 5); // gy[5]
bc6h_format.gz = header.getvalue(51,4) | // 6: gz[3:0]
(header.getvalue(3,1) << 4) | // gz[4]
(header.getvalue(4,1) << 5); // gz[5]
bc6h_format.bw = header.getvalue(25,7); // 7: bw[6:0]
bc6h_format.bx = header.getvalue(55,6); // 6: bx[5:0]
bc6h_format.by = header.getvalue(61,4) | // 6: by[3:0]
(header.getvalue(14,1) << 4) | // by[4]
(header.getvalue(22,1) << 5); // by[5]
bc6h_format.bz = header.getvalue(12,1) | // 6: bz[0]
(header.getvalue(13,1) << 1) | // bz[1]
(header.getvalue(23,1) << 2) | // bz[2]
(header.getvalue(32,1) << 3) | // bz[3]
(header.getvalue(34,1) << 4) | // bz[4]
(header.getvalue(33,1) << 5); // bz[5]
break;
case 0x02:
bc6h_format.m_mode = 3; // 11:5:4:4
bc6h_format.wBits = 11;
bc6h_format.tBits[C_RED] = 5;
bc6h_format.tBits[C_GREEN] = 4;
bc6h_format.tBits[C_BLUE] = 4;
bc6h_format.rw = header.getvalue(5,10) | //11: rw[9:0]
(header.getvalue(40,1) << 10); // rw[10]
bc6h_format.rx = header.getvalue(35,5); // 5: rx[4:0]
bc6h_format.ry = header.getvalue(65,5); // 5: ry[4:0]
bc6h_format.rz = header.getvalue(71,5); // 5: rz[4:0]
bc6h_format.gw = header.getvalue(15,10) | //11: gw[9:0]
(header.getvalue(49,1) << 10); // gw[10]
bc6h_format.gx = header.getvalue(45,4); //4: gx[3:0]
bc6h_format.gy = header.getvalue(41,4); //4: gy[3:0]
bc6h_format.gz = header.getvalue(51,4); //4: gz[3:0]
bc6h_format.bw = header.getvalue(25,10) | //11: bw[9:0]
(header.getvalue(59,1) << 10); // bw[10]
bc6h_format.bx = header.getvalue(55,4); //4: bx[3:0]
bc6h_format.by = header.getvalue(61,4); //4: by[3:0]
bc6h_format.bz = header.getvalue(50,1) | //4: bz[0]
(header.getvalue(60,1) << 1) | // bz[1]
(header.getvalue(70,1) << 2) | // bz[2]
(header.getvalue(76,1) << 3); // bz[3]
break;
case 0x06:
bc6h_format.m_mode = 4; // 11:4:5:4
bc6h_format.wBits = 11;
bc6h_format.tBits[C_RED] = 4;
bc6h_format.tBits[C_GREEN] = 5;
bc6h_format.tBits[C_BLUE] = 4;
bc6h_format.rw = header.getvalue(5,10) | //11: rw[9:0]
(header.getvalue(39,1) << 10); // rw[10]
bc6h_format.rx = header.getvalue(35,4); //4: rx[3:0]
bc6h_format.ry = header.getvalue(65,4); //4: ry[3:0]
bc6h_format.rz = header.getvalue(71,4); //4: rz[3:0]
bc6h_format.gw = header.getvalue(15,10) | //11: gw[9:0]
(header.getvalue(50,1) << 10); // gw[10]
bc6h_format.gx = header.getvalue(45,5); //5: gx[4:0]
bc6h_format.gy = header.getvalue(41,4) | //5: gy[3:0]
(header.getvalue(75,1) << 4); // gy[4]
bc6h_format.gz = header.getvalue(51,4) | //5: gz[3:0]
(header.getvalue(40,1) << 4); // gz[4]
bc6h_format.bw = header.getvalue(25,10) | //11: bw[9:0]
(header.getvalue(59,1) << 10); // bw[10]
bc6h_format.bx = header.getvalue(55,4); //4: bx[3:0]
bc6h_format.by = header.getvalue(61,4); //4: by[3:0]
bc6h_format.bz = header.getvalue(69,1) | //4: bz[0]
(header.getvalue(60,1) << 1) | // bz[1]
(header.getvalue(70,1) << 2) | // bz[2]
(header.getvalue(76,1) << 3); // bz[3]
break;
case 0x0A:
bc6h_format.m_mode = 5; // 11:4:4:5
bc6h_format.wBits = 11;
bc6h_format.tBits[C_RED] = 4;
bc6h_format.tBits[C_GREEN] = 4;
bc6h_format.tBits[C_BLUE] = 5;
bc6h_format.rw = header.getvalue(5,10) | //11: rw[9:0]
(header.getvalue(39,1) << 10); // rw[10]
bc6h_format.rx = header.getvalue(35,4); //4: rx[3:0]
bc6h_format.ry = header.getvalue(65,4); //4: ry[3:0]
bc6h_format.rz = header.getvalue(71,4); //4: rz[3:0]
bc6h_format.gw = header.getvalue(15,10) | //11: gw[9:0]
(header.getvalue(49,1) << 10); // gw[10]
bc6h_format.gx = header.getvalue(45,4); //4: gx[3:0]
bc6h_format.gy = header.getvalue(41,4); //4: gy[3:0]
bc6h_format.gz = header.getvalue(51,4); //4: gz[3:0]
bc6h_format.bw = header.getvalue(25,10) | //11: bw[9:0]
(header.getvalue(60,1) << 10); // bw[10]
bc6h_format.bx = header.getvalue(55,5); //5: bx[4:0]
bc6h_format.by = header.getvalue(61,4); //5: by[3:0]
(header.getvalue(40,1) << 4); // by[4]
bc6h_format.bz = header.getvalue(50,1) | //5: bz[0]
(header.getvalue(69,1) << 1) | // bz[1]
(header.getvalue(70,1) << 2) | // bz[2]
(header.getvalue(76,1) << 3) | // bz[3]
(header.getvalue(75,1) << 4); // bz[4]
break;
case 0x0E:
bc6h_format.m_mode = 6; // 9:5:5:5
bc6h_format.wBits = 9;
bc6h_format.tBits[C_RED] = 5;
bc6h_format.tBits[C_GREEN] = 5;
bc6h_format.tBits[C_BLUE] = 5;
bc6h_format.rw = header.getvalue(5,9); //9: rw[8:0]
bc6h_format.gw = header.getvalue(15,9); //9: gw[8:0]
bc6h_format.bw = header.getvalue(25,9); //9: bw[8:0]
bc6h_format.rx = header.getvalue(35,5); //5: rx[4:0]
bc6h_format.gx = header.getvalue(45,5); //5: gx[4:0]
bc6h_format.bx = header.getvalue(55,5); //5: bx[4:0]
bc6h_format.ry = header.getvalue(65,5); //5: ry[4:0]
bc6h_format.gy = header.getvalue(41,4) | //5: gy[3:0]
(header.getvalue(24,1) << 4); // gy[4]
bc6h_format.by = header.getvalue(61,4) | //5: by[3:0]
(header.getvalue(14,1) << 4); // by[4]
bc6h_format.rz = header.getvalue(71,5); //5: rz[4:0]
bc6h_format.gz = header.getvalue(51,4) | //5: gz[3:0]
(header.getvalue(40,1) << 4); // gz[4]
bc6h_format.bz = header.getvalue(50,1) | //5: bz[0]
(header.getvalue(60,1) << 1) | // bz[1]
(header.getvalue(70,1) << 2) | // bz[2]
(header.getvalue(76,1) << 3) | // bz[3]
(header.getvalue(34,1) << 4); // bz[4]
break;
case 0x12:
bc6h_format.m_mode = 7; // 8:6:5:5
bc6h_format.wBits = 8;
bc6h_format.tBits[C_RED] = 6;
bc6h_format.tBits[C_GREEN] = 5;
bc6h_format.tBits[C_BLUE] = 5;
bc6h_format.rw = header.getvalue(5,8); //8: rw[7:0]
bc6h_format.gw = header.getvalue(15,8); //8: gw[7:0]
bc6h_format.bw = header.getvalue(25,8); //8: bw[7:0]
bc6h_format.rx = header.getvalue(35,6); //6: rx[5:0]
bc6h_format.gx = header.getvalue(45,5); //5: gx[4:0]
bc6h_format.bx = header.getvalue(55,5); //5: bx[4:0]
bc6h_format.ry = header.getvalue(65,6); //6: ry[5:0]
bc6h_format.gy = header.getvalue(41,4) | //5: gy[3:0]
(header.getvalue(24,1) << 4); // gy[4]
bc6h_format.by = header.getvalue(61,4) | //5: by[3:0]
(header.getvalue(14,1) << 4); // by[4]
bc6h_format.rz = header.getvalue(71,6); //6: rz[5:0]
bc6h_format.gz = header.getvalue(51,4) | //5: gz[3:0]
(header.getvalue(13,1) << 4); // gz[4]
bc6h_format.bz = header.getvalue(50,1) | //5: bz[0]
(header.getvalue(60,1) << 1) | // bz[1]
(header.getvalue(23,1) << 2) | // bz[2]
(header.getvalue(33,1) << 3) | // bz[3]
(header.getvalue(34,1) << 4); // bz[4]
break;
case 0x16:
bc6h_format.m_mode = 8; // 8:5:6:5
bc6h_format.wBits = 8;
bc6h_format.tBits[C_RED] = 5;
bc6h_format.tBits[C_GREEN] = 6;
bc6h_format.tBits[C_BLUE] = 5;
bc6h_format.rw = header.getvalue(5,8); //8: rw[7:0]
bc6h_format.gw = header.getvalue(15,8); //8: gw[7:0]
bc6h_format.bw = header.getvalue(25,8); //8: bw[7:0]
bc6h_format.rx = header.getvalue(35,5); //5: rx[4:0]
bc6h_format.gx = header.getvalue(45,6); //6: gx[5:0]
bc6h_format.bx = header.getvalue(55,5); //5: bx[4:0]
bc6h_format.ry = header.getvalue(65,5); //5: ry[4:0]
bc6h_format.gy = header.getvalue(41,4) | //6: gy[3:0]
(header.getvalue(24,1) << 4) | // gy[4]
(header.getvalue(23,1) << 5); // gy[5]
bc6h_format.by = header.getvalue(61,4) | //5: by[3:0]
(header.getvalue(14,1) << 4); // by[4]
bc6h_format.rz = header.getvalue(71,5); //5: rz[4:0]
bc6h_format.gz = header.getvalue(51,4) | //6: gz[3:0]
(header.getvalue(40,1) << 4) | // gz[4]
(header.getvalue(33,1) << 5); // gz[5]
bc6h_format.bz = header.getvalue(13,1) | //5: bz[0]
(header.getvalue(60,1) << 1) | // bz[1]
(header.getvalue(70,1) << 2) | // bz[2]
(header.getvalue(76,1) << 3) | // bz[3]
(header.getvalue(34,1) << 4); // bz[4]
break;
case 0x1A:
bc6h_format.m_mode = 9; // 8:5:5:6
bc6h_format.wBits = 8;
bc6h_format.tBits[C_RED] = 5;
bc6h_format.tBits[C_GREEN] = 5;
bc6h_format.tBits[C_BLUE] = 6;
bc6h_format.rw = header.getvalue(5,8); //8: rw[7:0]
bc6h_format.gw = header.getvalue(15,8); //8: gw[7:0]
bc6h_format.bw = header.getvalue(25,8); //8: bw[7:0]
bc6h_format.rx = header.getvalue(35,5); //5: rx[4:0]
bc6h_format.gx = header.getvalue(45,5); //5: gx[4:0]
bc6h_format.bx = header.getvalue(55,6); //6: bx[5:0]
bc6h_format.ry = header.getvalue(65,5); //5: ry[4:0]
bc6h_format.gy = header.getvalue(41,4) | //5: gy[3:0]
(header.getvalue(24,1) << 4); // gy[4]
bc6h_format.by = header.getvalue(61,4) | //6: by[3:0]
(header.getvalue(14,1) << 4) | // by[4]
(header.getvalue(23,1) << 5); // by[5]
bc6h_format.rz = header.getvalue(71,5); //5: rz[4:0]
bc6h_format.gz = header.getvalue(51,4) | //5: gz[3:0]
(header.getvalue(40,1) << 4); // gz[4]
bc6h_format.bz = header.getvalue(50,1) | //6: bz[0]
(header.getvalue(13,1) << 1) | // bz[1]
(header.getvalue(70,1) << 2) | // bz[2]
(header.getvalue(76,1) << 3) | // bz[3]
(header.getvalue(34,1) << 4) | // bz[4]
(header.getvalue(33,1) << 5); // bz[5]
break;
case 0x1E:
bc6h_format.m_mode = 10; // 6:6:6:6
bc6h_format.istransformed = FALSE;
bc6h_format.wBits = 6;
bc6h_format.tBits[C_RED] = 6;
bc6h_format.tBits[C_GREEN] = 6;
bc6h_format.tBits[C_BLUE] = 6;
bc6h_format.rw = header.getvalue(5,6); //6: rw[5:0]
bc6h_format.gw = header.getvalue(15,6); //6: gw[5:0]
bc6h_format.bw = header.getvalue(25,6); //6: bw[5:0]
bc6h_format.rx = header.getvalue(35,6); //6: rx[5:0]
bc6h_format.gx = header.getvalue(45,6); //6: gx[5:0]
bc6h_format.bx = header.getvalue(55,6); //6: bx[5:0]
bc6h_format.ry = header.getvalue(65,6); //6: ry[5:0]
bc6h_format.gy = header.getvalue(41,4) | //6: gy[3:0]
(header.getvalue(24,1) << 4) | // gy[4]
(header.getvalue(21,1) << 5); // gy[5]
bc6h_format.by = header.getvalue(61,4) | //6: by[3:0]
(header.getvalue(14,1) << 4) | // by[4]
(header.getvalue(22,1) << 5); // by[5]
bc6h_format.rz = header.getvalue(71,6); //6: rz[5:0]
bc6h_format.gz = header.getvalue(51,4) | //6: gz[3:0]
(header.getvalue(11,1) << 4) | // gz[4]
(header.getvalue(31,1) << 5); // gz[5]
bc6h_format.bz = header.getvalue(12,1) | //6: bz[0]
(header.getvalue(13,1) << 1) | // bz[1]
(header.getvalue(23,1) << 2) | // bz[2]
(header.getvalue(32,1) << 3) | // bz[3]
(header.getvalue(34,1) << 4) | // bz[4]
(header.getvalue(33,1) << 5); // bz[5]
break;
// Single region modes
case 0x03:
bc6h_format.m_mode = 11; // 10:10
bc6h_format.wBits = 10;
bc6h_format.tBits[C_RED] = 10;
bc6h_format.tBits[C_GREEN] = 10;
bc6h_format.tBits[C_BLUE] = 10;
bc6h_format.rw = header.getvalue(5,10); // 10: rw[9:0]
bc6h_format.gw = header.getvalue(15,10); // 10: gw[9:0]
bc6h_format.bw = header.getvalue(25,10); // 10: bw[9:0]
bc6h_format.rx = header.getvalue(35,10); // 10: rx[9:0]
bc6h_format.gx = header.getvalue(45,10); // 10: gx[9:0]
bc6h_format.bx = header.getvalue(55,10); // 10: bx[9:0]
break;
case 0x07:
bc6h_format.m_mode = 12; // 11:9
bc6h_format.wBits = 11;
bc6h_format.tBits[C_RED] = 9;
bc6h_format.tBits[C_GREEN] = 9;
bc6h_format.tBits[C_BLUE] = 9;
bc6h_format.rw = header.getvalue(5,10) | // 10: rw[9:0]
(header.getvalue(44,1) << 10); // rw[10]
bc6h_format.gw = header.getvalue(15,10) | // 10: gw[9:0]
(header.getvalue(54,1) << 10); // gw[10]
bc6h_format.bw = header.getvalue(25,10) | // 10: bw[9:0]
(header.getvalue(64,1) << 10); // bw[10]
bc6h_format.rx = header.getvalue(35,9); // 9: rx[8:0]
bc6h_format.gx = header.getvalue(45,9); // 9: gx[8:0]
bc6h_format.bx = header.getvalue(55,9); // 9: bx[8:0]
break;
case 0x0B:
bc6h_format.m_mode = 13; // 12:8
bc6h_format.wBits = 12;
bc6h_format.tBits[C_RED] = 8;
bc6h_format.tBits[C_GREEN] = 8;
bc6h_format.tBits[C_BLUE] = 8;
bc6h_format.rw = header.getvalue(5, 10) | // 12: rw[9:0]
(header.getvalue(43, 1) << 11) | // rw[11]
(header.getvalue(44, 1) << 10); // rw[10]
bc6h_format.gw = header.getvalue(15, 10) | // 12: gw[9:0]
(header.getvalue(53, 1) << 11) | // gw[11]
(header.getvalue(54, 1) << 10); // gw[10]
bc6h_format.bw = header.getvalue(25,10) | // 12: bw[9:0]
(header.getvalue(63, 1) << 11) | // bw[11]
(header.getvalue(64,1) << 10); // bw[10]
bc6h_format.rx = header.getvalue(35,8); // 8: rx[7:0]
bc6h_format.gx = header.getvalue(45,8); // 8: gx[7:0]
bc6h_format.bx = header.getvalue(55,8); // 8: bx[7:0]
break;
case 0x0F:
bc6h_format.m_mode = 14; // 16:4
bc6h_format.wBits = 16;
bc6h_format.tBits[C_RED] = 4;
bc6h_format.tBits[C_GREEN] = 4;
bc6h_format.tBits[C_BLUE] = 4;
bc6h_format.rw = header.getvalue(5,10) | // 16: rw[9:0]
(header.getvalue(39, 1) << 15) | // rw[15]
(header.getvalue(40, 1) << 14) | // rw[14]
(header.getvalue(41, 1) << 13) | // rw[13]
(header.getvalue(42, 1) << 12) | // rw[12]
(header.getvalue(43, 1) << 11) | // rw[11]
(header.getvalue(44, 1) << 10); // rw[10]
bc6h_format.gw = header.getvalue(15,10) | // 16: gw[9:0]
(header.getvalue(49, 1) << 15) | // gw[15]
(header.getvalue(50, 1) << 14) | // gw[14]
(header.getvalue(51, 1) << 13) | // gw[13]
(header.getvalue(52, 1) << 12) | // gw[12]
(header.getvalue(53, 1) << 11) | // gw[11]
(header.getvalue(54, 1) << 10); // gw[10]
bc6h_format.bw = header.getvalue(25,10) | // 16: bw[9:0]
(header.getvalue(59, 1) << 15) | // bw[15]
(header.getvalue(60, 1) << 14) | // bw[14]
(header.getvalue(61, 1) << 13) | // bw[13]
(header.getvalue(62, 1) << 12) | // bw[12]
(header.getvalue(63, 1) << 11) | // bw[11]
(header.getvalue(64, 1) << 10); // bw[10]
bc6h_format.rx = header.getvalue(35,4); // 4: rx[3:0]
bc6h_format.gx = header.getvalue(45,4); // 4: gx[3:0]
bc6h_format.bx = header.getvalue(55,4); // 4: bx[3:0]
break;
default:
bc6h_format.m_mode = 0;
return bc6h_format;
}
// Each format in the mode table can be uniquely identified by the mode bits.
// The first ten modes are used for two-region tiles, and the mode bit field
// can be either two or five bits long. These blocks also have fields for
// the compressed color endpoints (72 or 75 bits), the partition (5 bits),
// and the partition indices (46 bits).
if (bc6h_format.m_mode <= 10) {
bc6h_format.region = BC6_TWO;
// Get the shape index bits 77 to 81
bc6h_format.d_shape_index = (unsigned short) header.getvalue(77,5);
bc6h_format.istransformed = (bc6h_format.m_mode < 10) ? TRUE : FALSE;
} else {
bc6h_format.region = BC6_ONE;
bc6h_format.d_shape_index = 0;
bc6h_format.istransformed = (bc6h_format.m_mode > 11) ? TRUE : FALSE;
}
// Save the points in a form easy to compute with
bc6h_format.EC[0].A[0] = bc6h_format.rw;
bc6h_format.EC[0].B[0] = bc6h_format.rx;
bc6h_format.EC[1].A[0] = bc6h_format.ry;
bc6h_format.EC[1].B[0] = bc6h_format.rz;
bc6h_format.EC[0].A[1] = bc6h_format.gw;
bc6h_format.EC[0].B[1] = bc6h_format.gx;
bc6h_format.EC[1].A[1] = bc6h_format.gy;
bc6h_format.EC[1].B[1] = bc6h_format.gz;
bc6h_format.EC[0].A[2] = bc6h_format.bw;
bc6h_format.EC[0].B[2] = bc6h_format.bx;
bc6h_format.EC[1].A[2] = bc6h_format.by;
bc6h_format.EC[1].B[2] = bc6h_format.bz;
if (bc6h_format.region == BC6_ONE) {
int startbits = ONE_REGION_INDEX_OFFSET;
bc6h_format.indices16[0] = (std::uint8_t) header.getvalue(startbits,3);
startbits+=3;
for (int i=1; i<16; i++) {
bc6h_format.indices16[i] = (std::uint8_t) header.getvalue(startbits,4);
startbits+=4;
}
} else {
int startbit = TWO_REGION_INDEX_OFFSET,
nbits = 2;
bc6h_format.indices16[0 ] = (std::uint8_t) header.getvalue(startbit,2);
for (int i= 1; i<16; i++) {
startbit += nbits; // offset start bit for next index using prior nbits used
nbits = g_indexfixups[bc6h_format.d_shape_index] == i?2:3; // get new number of bit to save index with
bc6h_format.indices16[i] = (std::uint8_t) header.getvalue(startbit,nbits);
}
}
return bc6h_format;
}
//---------------------------------------------------------------------------------------------------------------------------------------
void BC6HBlockDecoder::DecompressBlock( float out[MAX_SUBSET_SIZE][MAX_DIMENSION_BIG],BYTE in[COMPRESSED_BLOCK_SIZE]) {
AMD_BC6H_Format bc6h_format = extract_format(in);
if (!bc6signed)
bc6h_format.format = UNSIGNED_F16;
else
bc6h_format.format = SIGNED_F16;
if(bc6h_format.region == BC6_ONE) {
extract_compressed_endpoints(bc6h_format);
generate_palette_quantized(16,bc6h_format,0);
} else { //mode.type == BC6_TWO
extract_compressed_endpoints2(bc6h_format);
for (int r=0; r<2; r++) {
generate_palette_quantized(8,bc6h_format,r);
}
}
BC6H_Vec3 data;
int indexPos=0;
CMP_HALF rgb[3];
// Note first 32 BC6H_PARTIONS is shared with BC6H
// Partitioning is always arranged such that index 0 is always in subset 0 of BC6H_PARTIONS array
// Partition order goes from top-left to bottom-right, moving left to right and then top to bottom.
for (int block_row = 0; block_row < 4; block_row++)
for (int block_col = 0; block_col < 4; block_col++) {
// Need to check region logic
// gets the region (0 or 1) in the partition set
//int region = bc6h_format.region == BC6_ONE?0:REGION(block_col,block_row,bc6h_format.d_shape_index);
// for a one region partitions : its always return 0 so there is room for performance improvement
// by seperating the condition into another looped call.
//int region = bc6h_format.region == BC6_ONE?0:BC6H_PARTITIONS[1][bc6h_format.d_shape_index][indexPos];
int region = bc6h_format.region == BC6_ONE?0:PARTITIONS[1][bc6h_format.d_shape_index][indexPos];
// Index is validated as ok
int paleteIndex = bc6h_format.indices[block_row][block_col];
// this result is validated ok for region = BC6_ONE , BC6_TWO To be determined
data = bc6h_format.Palete[region][paleteIndex];
// Int to Half
rgb[0].setBits((unsigned short) data.x);
rgb[1].setBits((unsigned short) data.y);
rgb[2].setBits((unsigned short) data.z);
out[indexPos][0] = (float) rgb[0]; // r;
out[indexPos][1] = (float) rgb[1]; // g;
out[indexPos][2] = (float) rgb[2]; // b;
out[indexPos][3] = 1.0f;
indexPos++;
}
#ifdef BC6H_DECODE_DEBUG
g_dblock++;
#endif
}