1653 lines
73 KiB
C++
1653 lines
73 KiB
C++
//===============================================================================
|
|
// Copyright (c) 2014-2016 Advanced Micro Devices, Inc. All rights reserved.
|
|
//===============================================================================
|
|
//
|
|
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
// of this software and associated documentation files(the "Software"), to deal
|
|
// in the Software without restriction, including without limitation the rights
|
|
// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
|
|
// copies of the Software, and to permit persons to whom the Software is
|
|
// furnished to do so, subject to the following conditions :
|
|
//
|
|
// The above copyright notice and this permission notice shall be included in
|
|
// all copies or substantial portions of the Software.
|
|
//
|
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
|
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
// THE SOFTWARE.
|
|
//
|
|
// BC6H_Encode.cpp : Encoder for BC6H
|
|
//
|
|
// Revision
|
|
// 0.1 First implementation
|
|
// 0.2 Removed unused code and disabeled optimization
|
|
//
|
|
#include <assert.h>
|
|
#include "debug.h"
|
|
#include "BC6H_Encode.h"
|
|
#include <float.h>
|
|
#include <stdio.h>
|
|
#include <math.h>
|
|
#include "Common.h"
|
|
#include "HDR_Encode.h"
|
|
#include "BC6H_Definitions.h"
|
|
#include "BC6H_Encode.h"
|
|
#include "BC6H_utils.h"
|
|
|
|
using namespace HDR_Encode;
|
|
|
|
|
|
#define USE_SHAKERHD // reserved for future use!
|
|
|
|
BYTE Cmp_Red_Block[16] = { 0xc2,0x7b,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xe0,0x03,0x00,0x00,0x00,0x00,0x00 };
|
|
|
|
extern int g_block;
|
|
extern FILE *g_fp;
|
|
int gl_block = 0;
|
|
|
|
#ifdef DEBUG_PATTERNS
|
|
// random pixel noise range
|
|
float BC6HBlockEncoder::DoPixelNoise()
|
|
{
|
|
float ret = (rand() % RANDOM_NOISE_LEVEL)/ 100.0;
|
|
return (ret);
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
Reserved Feature MONOSHAPE_PATTERNS
|
|
int BC6HBlockEncoder::FindPattern();
|
|
*/
|
|
|
|
void SaveDataBlock(AMD_BC6H_Format bc6h_format, BYTE out[BC6H_COMPRESSED_BLOCK_SIZE])
|
|
{
|
|
BitHeader header(NULL, BC6H_COMPRESSED_BLOCK_SIZE);
|
|
|
|
// Save the RGB end point values
|
|
switch (bc6h_format.m_mode)
|
|
{
|
|
case 1: //0x00
|
|
header.setvalue(0, 2, 0x00);
|
|
header.setvalue(2, 1, bc6h_format.gy, 4); // gy[4]
|
|
header.setvalue(3, 1, bc6h_format.by, 4); // by[4]
|
|
header.setvalue(4, 1, bc6h_format.bz, 4); // bz[4]
|
|
header.setvalue(5, 10, bc6h_format.rw); // 10: rw[9:0]
|
|
header.setvalue(15, 10, bc6h_format.gw); // 10: gw[9:0]
|
|
header.setvalue(25, 10, bc6h_format.bw); // 10: bw[9:0]
|
|
header.setvalue(35, 5, bc6h_format.rx); // 5: rx[4:0]
|
|
header.setvalue(40, 1, bc6h_format.gz, 4); // gz[4]
|
|
header.setvalue(41, 4, bc6h_format.gy); // 5: gy[3:0]
|
|
header.setvalue(45, 5, bc6h_format.gx); // 5: gx[4:0]
|
|
header.setvalue(50, 1, bc6h_format.bz); // 5: bz[0]
|
|
header.setvalue(51, 4, bc6h_format.gz); // 5: gz[3:0]
|
|
header.setvalue(55, 5, bc6h_format.bx); // 5: bx[4:0]
|
|
header.setvalue(60, 1, bc6h_format.bz, 1); // bz[1]
|
|
header.setvalue(61, 4, bc6h_format.by); // 5: by[3:0]
|
|
header.setvalue(65, 5, bc6h_format.ry); // 5: ry[4:0]
|
|
header.setvalue(70, 1, bc6h_format.bz, 2); // bz[2]
|
|
header.setvalue(71, 5, bc6h_format.rz); // 5: rz[4:0]
|
|
header.setvalue(76, 1, bc6h_format.bz, 3); // bz[3]
|
|
break;
|
|
case 2: // 0x01
|
|
header.setvalue(0, 2, 0x01);
|
|
header.setvalue(2, 1, bc6h_format.gy, 5); // gy[5]
|
|
header.setvalue(3, 1, bc6h_format.gz, 4); // gz[4]
|
|
header.setvalue(4, 1, bc6h_format.gz, 5); // gz[5]
|
|
header.setvalue(5, 7, bc6h_format.rw); // rw[6:0]
|
|
header.setvalue(12, 1, bc6h_format.bz); // bz[0]
|
|
header.setvalue(13, 1, bc6h_format.bz, 1); // bz[1]
|
|
header.setvalue(14, 1, bc6h_format.by, 4); // by[4]
|
|
header.setvalue(15, 7, bc6h_format.gw); // gw[6:0]
|
|
header.setvalue(22, 1, bc6h_format.by, 5); // by[5]
|
|
header.setvalue(23, 1, bc6h_format.bz, 2); // bz[2]
|
|
header.setvalue(24, 1, bc6h_format.gy, 4); // gy[4]
|
|
header.setvalue(25, 7, bc6h_format.bw); // 7: bw[6:0]
|
|
header.setvalue(32, 1, bc6h_format.bz, 3); // bz[3]
|
|
header.setvalue(33, 1, bc6h_format.bz, 5); // bz[5]
|
|
header.setvalue(34, 1, bc6h_format.bz, 4); // bz[4]
|
|
header.setvalue(35, 6, bc6h_format.rx); // 6: rx[5:0]
|
|
header.setvalue(41, 4, bc6h_format.gy); // 6: gy[3:0]
|
|
header.setvalue(45, 6, bc6h_format.gx); // 6: gx[5:0]
|
|
header.setvalue(51, 4, bc6h_format.gz); // 6: gz[3:0]
|
|
header.setvalue(55, 6, bc6h_format.bx); // 6: bx[5:0]
|
|
header.setvalue(61, 4, bc6h_format.by); // 6: by[3:0]
|
|
header.setvalue(65, 6, bc6h_format.ry); // 6: ry[5:0]
|
|
header.setvalue(71, 6, bc6h_format.rz); // 6: rz[5:0]
|
|
break;
|
|
case 3: // 0x02
|
|
header.setvalue(0, 5, 0x02);
|
|
header.setvalue(5, 10, bc6h_format.rw); // 11: rw[9:0]
|
|
header.setvalue(15, 10, bc6h_format.gw); // 11: gw[9:0]
|
|
header.setvalue(25, 10, bc6h_format.bw); // 11: bw[9:0]
|
|
header.setvalue(35, 5, bc6h_format.rx); // 5: rx[4:0]
|
|
header.setvalue(40, 1, bc6h_format.rw, 10); // rw[10]
|
|
header.setvalue(41, 4, bc6h_format.gy); // 4: gy[3:0]
|
|
header.setvalue(45, 4, bc6h_format.gx); // 4: gx[3:0]
|
|
header.setvalue(49, 1, bc6h_format.gw, 10); // gw[10]
|
|
header.setvalue(50, 1, bc6h_format.bz); // 4: bz[0]
|
|
header.setvalue(51, 4, bc6h_format.gz); // 4: gz[3:0]
|
|
header.setvalue(55, 4, bc6h_format.bx); // 4: bx[3:0]
|
|
header.setvalue(59, 1, bc6h_format.bw, 10); // bw[10]
|
|
header.setvalue(60, 1, bc6h_format.bz, 1); // bz[1]
|
|
header.setvalue(61, 4, bc6h_format.by); // 4: by[3:0]
|
|
header.setvalue(65, 5, bc6h_format.ry); // 5: ry[4:0]
|
|
header.setvalue(70, 1, bc6h_format.bz, 2); // bz[2]
|
|
header.setvalue(71, 5, bc6h_format.rz); // 5: rz[4:0]
|
|
header.setvalue(76, 1, bc6h_format.bz, 3); // bz[3]
|
|
break;
|
|
case 4: // 0x06
|
|
header.setvalue(0, 5, 0x06);
|
|
header.setvalue(5, 10, bc6h_format.rw); // 11: rw[9:0]
|
|
header.setvalue(15, 10, bc6h_format.gw); // 11: gw[9:0]
|
|
header.setvalue(25, 10, bc6h_format.bw); // 11: bw[9:0]
|
|
header.setvalue(35, 4, bc6h_format.rx); // rx[3:0]
|
|
header.setvalue(39, 1, bc6h_format.rw, 10); // rw[10]
|
|
header.setvalue(40, 1, bc6h_format.gz, 4); // gz[4]
|
|
header.setvalue(41, 4, bc6h_format.gy); // 5: gy[3:0]
|
|
header.setvalue(45, 5, bc6h_format.gx); // gx[4:0]
|
|
header.setvalue(50, 1, bc6h_format.gw, 10); // 5: gw[10]
|
|
header.setvalue(51, 4, bc6h_format.gz); // 5: gz[3:0]
|
|
header.setvalue(55, 4, bc6h_format.bx); // 4: bx[3:0]
|
|
header.setvalue(59, 1, bc6h_format.bw, 10); // bw[10]
|
|
header.setvalue(60, 1, bc6h_format.bz, 1); // bz[1]
|
|
header.setvalue(61, 4, bc6h_format.by); // 4: by[3:0]
|
|
header.setvalue(65, 4, bc6h_format.ry); // 4: ry[3:0]
|
|
header.setvalue(69, 1, bc6h_format.bz); // 4: bz[0]
|
|
header.setvalue(70, 1, bc6h_format.bz, 2); // bz[2]
|
|
header.setvalue(71, 4, bc6h_format.rz); // 4: rz[3:0]
|
|
header.setvalue(75, 1, bc6h_format.gy, 4); // gy[4]
|
|
header.setvalue(76, 1, bc6h_format.bz, 3); // bz[3]
|
|
break;
|
|
case 5: // 0x0A
|
|
header.setvalue(0, 5, 0x0A);
|
|
header.setvalue(5, 10, bc6h_format.rw); // 11: rw[9:0]
|
|
header.setvalue(15, 10, bc6h_format.gw); // 11: gw[9:0]
|
|
header.setvalue(25, 10, bc6h_format.bw); // 11: bw[9:0]
|
|
header.setvalue(35, 4, bc6h_format.rx); // 4: rx[3:0]
|
|
header.setvalue(39, 1, bc6h_format.rw, 10); // rw[10]
|
|
header.setvalue(40, 1, bc6h_format.by, 4); // by[4]
|
|
header.setvalue(41, 4, bc6h_format.gy); // 4: gy[3:0]
|
|
header.setvalue(45, 4, bc6h_format.gx); // 4: gx[3:0]
|
|
header.setvalue(49, 1, bc6h_format.gw, 10); // gw[10]
|
|
header.setvalue(50, 1, bc6h_format.bz); // 5: bz[0]
|
|
header.setvalue(51, 4, bc6h_format.gz); // 4: gz[3:0]
|
|
header.setvalue(55, 5, bc6h_format.bx); // 5: bx[4:0]
|
|
header.setvalue(60, 1, bc6h_format.bw, 10); // bw[10]
|
|
header.setvalue(61, 4, bc6h_format.by); // 5: by[3:0]
|
|
header.setvalue(65, 4, bc6h_format.ry); // 4: ry[3:0]
|
|
header.setvalue(69, 1, bc6h_format.bz, 1); // bz[1]
|
|
header.setvalue(70, 1, bc6h_format.bz, 2); // bz[2]
|
|
header.setvalue(71, 4, bc6h_format.rz); // 4: rz[3:0]
|
|
header.setvalue(75, 1, bc6h_format.bz, 4); // bz[4]
|
|
header.setvalue(76, 1, bc6h_format.bz, 3); // bz[3]
|
|
break;
|
|
case 6: // 0x0E
|
|
header.setvalue(0, 5, 0x0E);
|
|
header.setvalue(5, 9, bc6h_format.rw); // 9: rw[8:0]
|
|
header.setvalue(14, 1, bc6h_format.by, 4); // by[4]
|
|
header.setvalue(15, 9, bc6h_format.gw); // 9: gw[8:0]
|
|
header.setvalue(24, 1, bc6h_format.gy, 4); // gy[4]
|
|
header.setvalue(25, 9, bc6h_format.bw); // 9: bw[8:0]
|
|
header.setvalue(34, 1, bc6h_format.bz, 4); // bz[4]
|
|
header.setvalue(35, 5, bc6h_format.rx); // 5: rx[4:0]
|
|
header.setvalue(40, 1, bc6h_format.gz, 4); // gz[4]
|
|
header.setvalue(41, 4, bc6h_format.gy); // 5: gy[3:0]
|
|
header.setvalue(45, 5, bc6h_format.gx); // 5: gx[4:0]
|
|
header.setvalue(50, 1, bc6h_format.bz); // 5: bz[0]
|
|
header.setvalue(51, 4, bc6h_format.gz); // 5: gz[3:0]
|
|
header.setvalue(55, 5, bc6h_format.bx); // 5: bx[4:0]
|
|
header.setvalue(60, 1, bc6h_format.bz, 1); // bz[1]
|
|
header.setvalue(61, 4, bc6h_format.by); // 5: by[3:0]
|
|
header.setvalue(65, 5, bc6h_format.ry); // 5: ry[4:0]
|
|
header.setvalue(70, 1, bc6h_format.bz, 2); // bz[2]
|
|
header.setvalue(71, 5, bc6h_format.rz); // 5: rz[4:0]
|
|
header.setvalue(76, 1, bc6h_format.bz, 3); // bz[3]
|
|
break;
|
|
case 7: // 0x12
|
|
header.setvalue(0, 5, 0x12);
|
|
header.setvalue(5, 8, bc6h_format.rw); // 8: rw[7:0]
|
|
header.setvalue(13, 1, bc6h_format.gz, 4); // gz[4]
|
|
header.setvalue(14, 1, bc6h_format.by, 4); // by[4]
|
|
header.setvalue(15, 8, bc6h_format.gw); // 8: gw[7:0]
|
|
header.setvalue(23, 1, bc6h_format.bz, 2); // bz[2]
|
|
header.setvalue(24, 1, bc6h_format.gy, 4); // gy[4]
|
|
header.setvalue(25, 8, bc6h_format.bw); // 8: bw[7:0]
|
|
header.setvalue(33, 1, bc6h_format.bz, 3); // bz[3]
|
|
header.setvalue(34, 1, bc6h_format.bz, 4); // bz[4]
|
|
header.setvalue(35, 6, bc6h_format.rx); // 6: rx[5:0]
|
|
header.setvalue(41, 4, bc6h_format.gy); // 5: gy[3:0]
|
|
header.setvalue(45, 5, bc6h_format.gx); // 5: gx[4:0]
|
|
header.setvalue(50, 1, bc6h_format.bz); // 5: bz[0]
|
|
header.setvalue(51, 4, bc6h_format.gz); // 5: gz[3:0]
|
|
header.setvalue(55, 5, bc6h_format.bx); // 5: bx[4:0]
|
|
header.setvalue(60, 1, bc6h_format.bz, 1); // bz[1]
|
|
header.setvalue(61, 4, bc6h_format.by); // 5: by[3:0]
|
|
header.setvalue(65, 6, bc6h_format.ry); // 6: ry[5:0]
|
|
header.setvalue(71, 6, bc6h_format.rz); // 6: rz[5:0]
|
|
break;
|
|
case 8: // 0x16
|
|
header.setvalue(0, 5, 0x16);
|
|
header.setvalue(5, 8, bc6h_format.rw); // 8: rw[7:0]
|
|
header.setvalue(13, 1, bc6h_format.bz); // 5: bz[0]
|
|
header.setvalue(14, 1, bc6h_format.by, 4); // by[4]
|
|
header.setvalue(15, 8, bc6h_format.gw); // 8: gw[7:0]
|
|
header.setvalue(23, 1, bc6h_format.gy, 5); // gy[5]
|
|
header.setvalue(24, 1, bc6h_format.gy, 4); // gy[4]
|
|
header.setvalue(25, 8, bc6h_format.bw); // 8: bw[7:0]
|
|
header.setvalue(33, 1, bc6h_format.gz, 5); // gz[5]
|
|
header.setvalue(34, 1, bc6h_format.bz, 4); // bz[4]
|
|
header.setvalue(35, 5, bc6h_format.rx); // 5: rx[4:0]
|
|
header.setvalue(40, 1, bc6h_format.gz, 4); // gz[4]
|
|
header.setvalue(41, 4, bc6h_format.gy); // 6: gy[3:0]
|
|
header.setvalue(45, 6, bc6h_format.gx); // 6: gx[5:0]
|
|
header.setvalue(51, 4, bc6h_format.gz); // 6: gz[3:0]
|
|
header.setvalue(55, 5, bc6h_format.bx); // 5: bx[4:0]
|
|
header.setvalue(60, 1, bc6h_format.bz, 1); // bz[1]
|
|
header.setvalue(61, 4, bc6h_format.by); // 5: by[3:0]
|
|
header.setvalue(65, 5, bc6h_format.ry); // 5: ry[4:0]
|
|
header.setvalue(70, 1, bc6h_format.bz, 2); // bz[2]
|
|
header.setvalue(71, 5, bc6h_format.rz); // 5: rz[4:0]
|
|
header.setvalue(76, 1, bc6h_format.bz, 3); // bz[3]
|
|
break;
|
|
case 9: // 0x1A
|
|
header.setvalue(0, 5, 0x1A);
|
|
header.setvalue(5, 8, bc6h_format.rw); // 8: rw[7:0]
|
|
header.setvalue(13, 1, bc6h_format.bz, 1); // bz[1]
|
|
header.setvalue(14, 1, bc6h_format.by, 4); // by[4]
|
|
header.setvalue(15, 8, bc6h_format.gw); // 8: gw[7:0]
|
|
header.setvalue(23, 1, bc6h_format.by, 5); // by[5]
|
|
header.setvalue(24, 1, bc6h_format.gy, 4); // gy[4]
|
|
header.setvalue(25, 8, bc6h_format.bw); // 8: bw[7:0]
|
|
header.setvalue(33, 1, bc6h_format.bz, 5); // bz[5]
|
|
header.setvalue(34, 1, bc6h_format.bz, 4); // bz[4]
|
|
header.setvalue(35, 5, bc6h_format.rx); // 5: rx[4:0]
|
|
header.setvalue(40, 1, bc6h_format.gz, 4); // gz[4]
|
|
header.setvalue(41, 4, bc6h_format.gy); // 5: gy[3:0]
|
|
header.setvalue(45, 5, bc6h_format.gx); // 5: gx[4:0]
|
|
header.setvalue(50, 1, bc6h_format.bz); // 6: bz[0]
|
|
header.setvalue(51, 4, bc6h_format.gz); // 5: gz[3:0]
|
|
header.setvalue(55, 6, bc6h_format.bx); // 6: bx[5:0]
|
|
header.setvalue(61, 4, bc6h_format.by); // 6: by[3:0]
|
|
header.setvalue(65, 5, bc6h_format.ry); // 5: ry[4:0]
|
|
header.setvalue(70, 1, bc6h_format.bz, 2); // bz[2]
|
|
header.setvalue(71, 5, bc6h_format.rz); // 5: rz[4:0]
|
|
header.setvalue(76, 1, bc6h_format.bz, 3); // bz[3]
|
|
break;
|
|
case 10: // 0x1E
|
|
header.setvalue(0, 5, 0x1E);
|
|
header.setvalue(5, 6, bc6h_format.rw); // 6: rw[5:0]
|
|
header.setvalue(11, 1, bc6h_format.gz, 4); // gz[4]
|
|
header.setvalue(12, 1, bc6h_format.bz); // 6: bz[0]
|
|
header.setvalue(13, 1, bc6h_format.bz, 1); // bz[1]
|
|
header.setvalue(14, 1, bc6h_format.by, 4); // by[4]
|
|
header.setvalue(15, 6, bc6h_format.gw); // 6: gw[5:0]
|
|
header.setvalue(21, 1, bc6h_format.gy, 5); // gy[5]
|
|
header.setvalue(22, 1, bc6h_format.by, 5); // by[5]
|
|
header.setvalue(23, 1, bc6h_format.bz, 2); // bz[2]
|
|
header.setvalue(24, 1, bc6h_format.gy, 4); // gy[4]
|
|
header.setvalue(25, 6, bc6h_format.bw); // 6: bw[5:0]
|
|
header.setvalue(31, 1, bc6h_format.gz, 5); // gz[5]
|
|
header.setvalue(32, 1, bc6h_format.bz, 3); // bz[3]
|
|
header.setvalue(33, 1, bc6h_format.bz, 5); // bz[5]
|
|
header.setvalue(34, 1, bc6h_format.bz, 4); // bz[4]
|
|
header.setvalue(35, 6, bc6h_format.rx); // 6: rx[5:0]
|
|
header.setvalue(41, 4, bc6h_format.gy); // 6: gy[3:0]
|
|
header.setvalue(45, 6, bc6h_format.gx); // 6: gx[5:0]
|
|
header.setvalue(51, 4, bc6h_format.gz); // 6: gz[3:0]
|
|
header.setvalue(55, 6, bc6h_format.bx); // 6: bx[5:0]
|
|
header.setvalue(61, 4, bc6h_format.by); // 6: by[3:0]
|
|
header.setvalue(65, 6, bc6h_format.ry); // 6: ry[5:0]
|
|
header.setvalue(71, 6, bc6h_format.rz); // 6: rz[5:0]
|
|
break;
|
|
|
|
// Single regions Modes
|
|
case 11: // 0x03
|
|
header.setvalue(0, 5, 0x03);
|
|
header.setvalue(5, 10, bc6h_format.rw); // 10: rw[9:0]
|
|
header.setvalue(15, 10, bc6h_format.gw); // 10: gw[9:0]
|
|
header.setvalue(25, 10, bc6h_format.bw); // 10: bw[9:0]
|
|
header.setvalue(35, 10, bc6h_format.rx); // 10: rx[9:0]
|
|
header.setvalue(45, 10, bc6h_format.gx); // 10: gx[9:0]
|
|
header.setvalue(55, 10, bc6h_format.bx); // 10: bx[9:0]
|
|
break;
|
|
case 12: // 0x07
|
|
header.setvalue(0, 5, 0x07);
|
|
header.setvalue(5, 10, bc6h_format.rw); // 11: rw[9:0]
|
|
header.setvalue(15, 10, bc6h_format.gw); // 11: gw[9:0]
|
|
header.setvalue(25, 10, bc6h_format.bw); // 11: bw[9:0]
|
|
header.setvalue(35, 9, bc6h_format.rx); // 9: rx[8:0]
|
|
header.setvalue(44, 1, bc6h_format.rw, 10); // rw[10]
|
|
header.setvalue(45, 9, bc6h_format.gx); // 9: gx[8:0]
|
|
header.setvalue(54, 1, bc6h_format.gw, 10); // gw[10]
|
|
header.setvalue(55, 9, bc6h_format.bx); // 9: bx[8:0]
|
|
header.setvalue(64, 1, bc6h_format.bw, 10); // bw[10]
|
|
break;
|
|
case 13: // 0x0B
|
|
header.setvalue(0, 5, 0x0B);
|
|
header.setvalue(5, 10, bc6h_format.rw); // 12: rw[9:0]
|
|
header.setvalue(15, 10, bc6h_format.gw); // 12: gw[9:0]
|
|
header.setvalue(25, 10, bc6h_format.bw); // 12: bw[9:0]
|
|
header.setvalue(35, 8, bc6h_format.rx); // 8: rx[7:0]
|
|
header.setvalue(43, 1, bc6h_format.rw, 11); // rw[11]
|
|
header.setvalue(44, 1, bc6h_format.rw, 10); // rw[10]
|
|
header.setvalue(45, 8, bc6h_format.gx); // 8: gx[7:0]
|
|
header.setvalue(53, 1, bc6h_format.gw, 11); // gw[11]
|
|
header.setvalue(54, 1, bc6h_format.gw, 10); // gw[10]
|
|
header.setvalue(55, 8, bc6h_format.bx); // 8: bx[7:0]
|
|
header.setvalue(63, 1, bc6h_format.bw, 11); // bw[11]
|
|
header.setvalue(64, 1, bc6h_format.bw, 10); // bw[10]
|
|
break;
|
|
case 14: // 0x0F
|
|
header.setvalue(0, 5, 0x0F);
|
|
header.setvalue(5, 10, bc6h_format.rw); // 16: rw[9:0]
|
|
header.setvalue(15, 10, bc6h_format.gw); // 16: gw[9:0]
|
|
header.setvalue(25, 10, bc6h_format.bw); // 16: bw[9:0]
|
|
header.setvalue(35, 4, bc6h_format.rx); // 4: rx[3:0]
|
|
header.setvalue(39, 6, bc6h_format.rw, 10); // rw[15:10]
|
|
header.setvalue(45, 4, bc6h_format.gx); // 4: gx[3:0]
|
|
header.setvalue(49, 6, bc6h_format.gw, 10); // gw[15:10]
|
|
header.setvalue(55, 4, bc6h_format.bx); // 4: bx[3:0]
|
|
header.setvalue(59, 6, bc6h_format.bw, 10); // bw[15:10]
|
|
break;
|
|
default: // Need to indicate error!
|
|
return;
|
|
}
|
|
|
|
// Each format in the mode table can be uniquely identified by the mode bits.
|
|
// The first ten modes are used for two-region tiles, and the mode bit field
|
|
// can be either two or five bits long. These blocks also have fields for
|
|
// the compressed color endpoints (72 or 75 bits), the partition (5 bits),
|
|
// and the partition indices (46 bits).
|
|
|
|
if (bc6h_format.m_mode >= MIN_MODE_FOR_ONE_REGION)
|
|
{
|
|
int startbit = ONE_REGION_INDEX_OFFSET;
|
|
header.setvalue(startbit, 3, bc6h_format.indices16[0]);
|
|
startbit += 3;
|
|
for (int i = 1; i<16; i++)
|
|
{
|
|
header.setvalue(startbit, 4, bc6h_format.indices16[i]);
|
|
startbit += 4;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
header.setvalue(77, 5, bc6h_format.d_shape_index); // Shape Index
|
|
int startbit = TWO_REGION_INDEX_OFFSET,
|
|
nbits = 2;
|
|
header.setvalue(startbit, nbits, bc6h_format.indices16[0]);
|
|
for (int i = 1; i<16; i++)
|
|
{
|
|
startbit += nbits; // offset start bit for next index using prior nbits used
|
|
nbits = g_indexfixups[bc6h_format.d_shape_index] == i ? 2 : 3; // get new number of bit to save index with
|
|
header.setvalue(startbit, nbits, bc6h_format.indices16[i]);
|
|
}
|
|
}
|
|
|
|
// save to output buffer our new bit values
|
|
// this can be optimized if header is part of bc6h_format struct
|
|
header.transferbits(out, 16);
|
|
|
|
}
|
|
|
|
|
|
// decompress endpoints
|
|
static void decompress_endpts(const int in[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], int out[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], const int mode, bool issigned)
|
|
{
|
|
|
|
if (ModePartition[mode].transformed)
|
|
{
|
|
for (int i=0; i<3; ++i)
|
|
{
|
|
R_0(out) = issigned ? SIGN_EXTEND(R_0(in),ModePartition[mode].nbits) : R_0(in);
|
|
int t;
|
|
t = SIGN_EXTEND(R_1(in), ModePartition[mode].prec[i]);
|
|
t = (t + R_0(in)) & MASK(ModePartition[mode].nbits);
|
|
R_1(out) = issigned ? SIGN_EXTEND(t,ModePartition[mode].nbits) : t;
|
|
|
|
t = SIGN_EXTEND(R_2(in), ModePartition[mode].prec[i]);
|
|
t = (t + R_0(in)) & MASK(ModePartition[mode].nbits);
|
|
R_2(out) = issigned ? SIGN_EXTEND(t,ModePartition[mode].nbits) : t;
|
|
|
|
t = SIGN_EXTEND(R_3(in), ModePartition[mode].prec[i]);
|
|
t = (t + R_0(in)) & MASK(ModePartition[mode].nbits);
|
|
R_3(out) = issigned ? SIGN_EXTEND(t,ModePartition[mode].nbits) : t;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for (int i=0; i<3; ++i)
|
|
{
|
|
R_0(out) = issigned ? SIGN_EXTEND(R_0(in),ModePartition[mode].nbits) : R_0(in);
|
|
R_1(out) = issigned ? SIGN_EXTEND(R_1(in),ModePartition[mode].prec[i]) : R_1(in);
|
|
R_2(out) = issigned ? SIGN_EXTEND(R_2(in),ModePartition[mode].prec[i]) : R_2(in);
|
|
R_3(out) = issigned ? SIGN_EXTEND(R_3(in),ModePartition[mode].prec[i]) : R_3(in);
|
|
}
|
|
}
|
|
}
|
|
|
|
// endpoints fit only if the compression was lossless
|
|
static bool endpts_fit(const int orig[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], const int compressed[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], const int mode, int max_subsets, bool issigned)
|
|
{
|
|
int uncompressed[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG];
|
|
|
|
decompress_endpts(compressed, uncompressed, mode, issigned);
|
|
|
|
for (int j=0; j<max_subsets; ++j)
|
|
for (int i=0; i<3; ++i)
|
|
{
|
|
if (orig[j][0][i] != uncompressed[j][0][i]) return false;
|
|
if (orig[j][1][i] != uncompressed[j][1][i]) return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
// Dont know exact limits : for now just say is -2.0 to +2.0
|
|
void BC6HBlockEncoder::clampF16Max(float EndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG])
|
|
{
|
|
for(int region=0; region<2; region++)
|
|
for(int ab = 0; ab<2; ab++)
|
|
for (int rgb=0; rgb<3; rgb++)
|
|
{
|
|
if (m_isSigned)
|
|
{
|
|
if (EndPoints[region][ab][rgb] < -F16MAX) EndPoints[region][ab][rgb] = -F16MAX;
|
|
else if (EndPoints[region][ab][rgb] > F16MAX) EndPoints[region][ab][rgb] = F16MAX;
|
|
}
|
|
else
|
|
{
|
|
if (EndPoints[region][ab][rgb] < 0.0) EndPoints[region][ab][rgb] = 0;
|
|
else if (EndPoints[region][ab][rgb] > F16MAX) EndPoints[region][ab][rgb] = F16MAX;
|
|
}
|
|
// Zero region
|
|
// if ((EndPoints[region][ab][rgb] > -0.01) && ((EndPoints[region][ab][rgb] < 0.01))) EndPoints[region][ab][rgb] = 0.0;
|
|
}
|
|
}
|
|
|
|
/*=================================================================
|
|
Quantize Endpoints
|
|
for a given mode
|
|
==================================================================*/
|
|
|
|
void BC6HBlockEncoder::QuantizeEndPointToF16Prec(float EndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], int iEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], int max_subsets, int prec)
|
|
{
|
|
|
|
for (int subset = 0; subset < max_subsets; ++subset)
|
|
{
|
|
iEndPoints[subset][0][0] = QuantizeToInt((short)EndPoints[subset][0][0],prec,m_isSigned, m_Exposure); // A.Red
|
|
iEndPoints[subset][0][1] = QuantizeToInt((short)EndPoints[subset][0][1],prec,m_isSigned, m_Exposure); // A.Green
|
|
iEndPoints[subset][0][2] = QuantizeToInt((short)EndPoints[subset][0][2],prec,m_isSigned, m_Exposure); // A.Blue
|
|
iEndPoints[subset][1][0] = QuantizeToInt((short)EndPoints[subset][1][0],prec,m_isSigned, m_Exposure); // B.Red
|
|
iEndPoints[subset][1][1] = QuantizeToInt((short)EndPoints[subset][1][1],prec,m_isSigned, m_Exposure); // B.Green
|
|
iEndPoints[subset][1][2] = QuantizeToInt((short)EndPoints[subset][1][2],prec,m_isSigned, m_Exposure); // B.Blue
|
|
}
|
|
}
|
|
|
|
/*=================================================================
|
|
Swap Indices
|
|
so that indices at fix up points have higher order bit set to 0
|
|
==================================================================*/
|
|
|
|
void BC6HBlockEncoder::SwapIndices(int iEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], int iIndices[3][BC6H_MAX_SUBSET_SIZE], int entryCount[BC6H_MAX_SUBSETS], int max_subsets, int mode, int shape_pattern)
|
|
{
|
|
|
|
unsigned int uNumIndices = 1 << ModePartition[mode].IndexPrec;
|
|
unsigned int uHighIndexBit = uNumIndices >> 1;
|
|
|
|
for(int subset = 0; subset < max_subsets; ++subset)
|
|
{
|
|
// region 0 (subset = 0) The fix-up index for this subset is allways index 0
|
|
// region 1 (subset = 1) The fix-up index for this subset varies based on the shape
|
|
size_t i = subset?g_Region2FixUp[shape_pattern]:0;
|
|
|
|
if(iIndices[subset][i] & uHighIndexBit)
|
|
{
|
|
// high bit is set, swap the aEndPts and indices for this region
|
|
std::swap(iEndPoints[subset][0][0], iEndPoints[subset][1][0]);
|
|
std::swap(iEndPoints[subset][0][1], iEndPoints[subset][1][1]);
|
|
std::swap(iEndPoints[subset][0][2], iEndPoints[subset][1][2]);
|
|
|
|
for(size_t j = 0; j < (size_t)entryCount[subset]; ++j)
|
|
{
|
|
iIndices[subset][j] = uNumIndices - 1 - iIndices[subset][j] ;
|
|
}
|
|
}
|
|
|
|
}
|
|
}
|
|
|
|
|
|
/*=================================================================
|
|
Tranforms according to shape precission
|
|
==================================================================*/
|
|
// helper function to check transform overflow
|
|
bool isOverflow(int endpoint, int nbit, bool bIsSigned)
|
|
{
|
|
if (bIsSigned)
|
|
{
|
|
int nbRequired; //bits required for the encode
|
|
int nb;
|
|
if (endpoint == 0)
|
|
{
|
|
return false; // no overflow
|
|
}
|
|
else if (endpoint > 0)
|
|
{
|
|
for (nb = 0; endpoint; ++nb, endpoint >>= 1);
|
|
nbRequired = nb + (bIsSigned ? 1 : 0);
|
|
if (nbRequired > nbit) //overflow
|
|
return true;
|
|
}
|
|
else //negative endpoints
|
|
{
|
|
if (!bIsSigned) return true;
|
|
|
|
for (nb = 0; endpoint < -1; ++nb, endpoint >>= 1);
|
|
nbRequired = nb + 1;
|
|
if (nbRequired > nbit) //overflow
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
else
|
|
{
|
|
int maxRange = (int)pow(2, nbit - 1) - 1;
|
|
int minRange = (int)-(pow(2, nbit - 1));
|
|
|
|
//no overflow
|
|
if ((endpoint >= minRange) && (endpoint <= maxRange))
|
|
return false;
|
|
else //overflow
|
|
return true;
|
|
}
|
|
}
|
|
|
|
// Bug in this code : Need to add signed bit to values
|
|
bool BC6HBlockEncoder::TransformEndPoints(AMD_BC6H_Format &BC6H_data, int iEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], int oEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG],int max_subsets, int mode)
|
|
{
|
|
int Mask;
|
|
if ( ModePartition[mode].transformed)
|
|
{
|
|
BC6H_data.istransformed = true;
|
|
for (int i=0; i<3; ++i)
|
|
{
|
|
Mask = MASK(ModePartition[mode].nbits);
|
|
oEndPoints[0][0][i] = iEndPoints[0][0][i] & Mask; // [0][A]
|
|
|
|
Mask = MASK(ModePartition[mode].prec[i]);
|
|
oEndPoints[0][1][i] = iEndPoints[0][1][i]- iEndPoints[0][0][i]; // [0][B] - [0][A]
|
|
|
|
if (isOverflow(oEndPoints[0][1][i], ModePartition[mode].prec[i], BC6H_data.issigned))
|
|
return false;
|
|
|
|
oEndPoints[0][1][i] = (oEndPoints[0][1][i] & Mask);
|
|
|
|
//redo the check for sign overflow for one region case
|
|
if (max_subsets <= 1)
|
|
{
|
|
if (isOverflow(oEndPoints[0][1][i], ModePartition[mode].prec[i], BC6H_data.issigned))
|
|
return false;
|
|
}
|
|
|
|
if (max_subsets > 1)
|
|
{
|
|
oEndPoints[1][0][i] = iEndPoints[1][0][i] - iEndPoints[0][0][i]; // [1][A] - [0][A]
|
|
if (isOverflow(oEndPoints[1][0][i], ModePartition[mode].prec[i], BC6H_data.issigned))
|
|
return false;
|
|
|
|
oEndPoints[1][0][i] = (oEndPoints[1][0][i] & Mask);
|
|
|
|
oEndPoints[1][1][i] = iEndPoints[1][1][i] - iEndPoints[0][0][i]; // [1][B] - [0][A]
|
|
if (isOverflow(oEndPoints[1][1][i], ModePartition[mode].prec[i], BC6H_data.issigned))
|
|
return false;
|
|
|
|
oEndPoints[1][1][i] = (oEndPoints[1][1][i] & Mask);
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
BC6H_data.istransformed = false;
|
|
for (int i=0; i<3; ++i)
|
|
{
|
|
Mask = MASK(ModePartition[mode].nbits);
|
|
oEndPoints[0][0][i] = iEndPoints[0][0][i] & Mask;
|
|
|
|
Mask = MASK(ModePartition[mode].prec[i]);
|
|
oEndPoints[0][1][i] = iEndPoints[0][1][i] & Mask;
|
|
|
|
if (max_subsets > 1)
|
|
{
|
|
oEndPoints[1][0][i] = iEndPoints[1][0][i] & Mask;
|
|
oEndPoints[1][1][i] = iEndPoints[1][1][i] & Mask;
|
|
}
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
void BC6HBlockEncoder::SaveCompressedBlockData( AMD_BC6H_Format &BC6H_data,
|
|
int oEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG],
|
|
int iIndices[2][BC6H_MAX_SUBSET_SIZE],
|
|
int max_subsets,
|
|
int mode)
|
|
{
|
|
BC6H_data.m_mode = (unsigned short)mode;
|
|
BC6H_data.index++;
|
|
|
|
// Save the data to output
|
|
BC6H_data.rw = oEndPoints[0][0][0]; // rw
|
|
BC6H_data.gw = oEndPoints[0][0][1]; // gw
|
|
BC6H_data.bw = oEndPoints[0][0][2]; // bw
|
|
BC6H_data.rx = oEndPoints[0][1][0]; // rx
|
|
BC6H_data.gx = oEndPoints[0][1][1]; // gx
|
|
BC6H_data.bx = oEndPoints[0][1][2]; // bx
|
|
|
|
if (max_subsets > 1)
|
|
{
|
|
// Save the data to output
|
|
BC6H_data.ry = oEndPoints[1][0][0]; // ry
|
|
BC6H_data.gy = oEndPoints[1][0][1]; // gy
|
|
BC6H_data.by = oEndPoints[1][0][2]; // by
|
|
BC6H_data.rz = oEndPoints[1][1][0]; // rz
|
|
BC6H_data.gz = oEndPoints[1][1][1]; // gz
|
|
BC6H_data.bz = oEndPoints[1][1][2]; // bz
|
|
}
|
|
|
|
// Map our two subset Indices for the shape to output 4x4 block
|
|
int pos[2] = {0,0};
|
|
int asubset;
|
|
for (int i=0; i<BC6H_MAX_SUBSET_SIZE; i++)
|
|
{
|
|
if (max_subsets > 1)
|
|
asubset = PARTITIONS[1][BC6H_data.d_shape_index][i]; // Two region shapes
|
|
else
|
|
asubset = PARTITIONS[0][BC6H_data.d_shape_index][i]; // One region shapes
|
|
BC6H_data.indices16[i] = (std::uint8_t)iIndices[asubset][pos[asubset]];
|
|
pos[asubset]++;
|
|
}
|
|
|
|
}
|
|
|
|
|
|
void palitizeEndPointsF(AMD_BC6H_Format &BC6H_data, float fEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG])
|
|
{
|
|
// scale endpoints
|
|
float Ar,Ag,Ab, Br,Bg,Bb;
|
|
|
|
|
|
// Compose index colors from end points
|
|
if (BC6H_data.region == 1)
|
|
{
|
|
Ar = fEndPoints[0][0][0];
|
|
Ag = fEndPoints[0][0][1];
|
|
Ab = fEndPoints[0][0][2];
|
|
Br = fEndPoints[0][1][0];
|
|
Bg = fEndPoints[0][1][1];
|
|
Bb = fEndPoints[0][1][2];
|
|
|
|
for (int i = 0; i < 16; i++)
|
|
{
|
|
|
|
// Red
|
|
BC6H_data.Paletef[0][i].x = lerpf(Ar, Br, i, 15);
|
|
// Green
|
|
BC6H_data.Paletef[0][i].y = lerpf(Ag, Bg, i, 15);
|
|
// Blue
|
|
BC6H_data.Paletef[0][i].z = lerpf(Ab, Bb, i, 15);
|
|
}
|
|
|
|
}
|
|
else //mode.type == BC6_TWO
|
|
{
|
|
for (int region = 0; region<2; region++)
|
|
{
|
|
Ar = fEndPoints[region][0][0];
|
|
Ag = fEndPoints[region][0][1];
|
|
Ab = fEndPoints[region][0][2];
|
|
Br = fEndPoints[region][1][0];
|
|
Bg = fEndPoints[region][1][1];
|
|
Bb = fEndPoints[region][1][2];
|
|
for (int i = 0; i < 8; i++)
|
|
{
|
|
// Red
|
|
BC6H_data.Paletef[region][i].x = lerpf(Ar, Br, i, 7);
|
|
// Greed
|
|
BC6H_data.Paletef[region][i].y = lerpf(Ag, Bg, i, 7);
|
|
// Blue
|
|
BC6H_data.Paletef[region][i].z = lerpf(Ab, Bb, i, 7);
|
|
}
|
|
|
|
}
|
|
}
|
|
}
|
|
|
|
float CalcOneRegionEndPtsError(AMD_BC6H_Format &BC6H_data, float fEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], int shape_indices[MAX_SUBSETS][MAX_SUBSET_SIZE])
|
|
{
|
|
float error = 0;
|
|
|
|
for (int i = 0; i < BC6H_MAX_SUBSET_SIZE; i++)
|
|
{
|
|
for (int m = 0; m < MAX_END_POINTS; m++)
|
|
{
|
|
for (int n = 0; n < NCHANNELS; n++)
|
|
{
|
|
float calencpts = fEndPoints[0][m][n] + (abs(fEndPoints[0][m][n] - fEndPoints[0][m][n]) * (shape_indices[0][i] / 15));
|
|
error += abs(BC6H_data.din[i][n] - calencpts);
|
|
}
|
|
}
|
|
}
|
|
|
|
return error;
|
|
}
|
|
|
|
float CalcShapeError(AMD_BC6H_Format &BC6H_data, float fEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], bool SkipPallet)
|
|
{
|
|
int maxPallet;
|
|
int subset = 0;
|
|
float totalError = 0.0f;
|
|
int region = (BC6H_data.region - 1);
|
|
|
|
if (region == 0)
|
|
maxPallet = 16;
|
|
else
|
|
maxPallet = 8;
|
|
|
|
if (!SkipPallet)
|
|
palitizeEndPointsF(BC6H_data, fEndPoints);
|
|
|
|
for (int i =0; i < MAX_SUBSET_SIZE; i++)
|
|
{
|
|
float error = 0.0f;
|
|
float bestError = 0.0f;
|
|
|
|
if (region == 0)
|
|
{
|
|
subset = 0;
|
|
}
|
|
else
|
|
{
|
|
//subset 0 or subset 1
|
|
subset = PARTITIONS[region][BC6H_data.d_shape_index][i];
|
|
}
|
|
|
|
// initialize bestError to the difference for first data
|
|
bestError = abs(BC6H_data.din[i][0] - BC6H_data.Paletef[subset][0].x) +
|
|
abs(BC6H_data.din[i][1] - BC6H_data.Paletef[subset][0].y) +
|
|
abs(BC6H_data.din[i][2] - BC6H_data.Paletef[subset][0].z);
|
|
|
|
// loop through the rest of the data until find the best error
|
|
for (int j = 1; j < maxPallet && bestError > 0; j++)
|
|
{
|
|
error = abs(BC6H_data.din[i][0] - BC6H_data.Paletef[subset][j].x) +
|
|
abs(BC6H_data.din[i][1] - BC6H_data.Paletef[subset][j].y) +
|
|
abs(BC6H_data.din[i][2] - BC6H_data.Paletef[subset][j].z);
|
|
|
|
if (error <= bestError)
|
|
bestError = error;
|
|
else
|
|
break;
|
|
}
|
|
totalError += bestError;
|
|
}
|
|
|
|
return totalError;
|
|
}
|
|
|
|
void ReIndexShapef(AMD_BC6H_Format &BC6H_data, int shape_indices[BC6H_MAX_SUBSETS][MAX_SUBSET_SIZE])
|
|
{
|
|
float error = 0;
|
|
float bestError;
|
|
int bestIndex = 0;
|
|
int sub0index = 0;
|
|
int sub1index = 0;
|
|
int MaxPallet;
|
|
int region = (BC6H_data.region - 1);
|
|
|
|
if (region == 0)
|
|
MaxPallet = 16;
|
|
else
|
|
MaxPallet = 8;
|
|
|
|
for (int i = 0; i < BC6H_MAX_SUBSET_SIZE; i++)
|
|
{
|
|
// subset 0 or subset 1
|
|
if (PARTITIONS[region][BC6H_data.d_shape_index][i])
|
|
{
|
|
bestError = FLT_MAX;
|
|
bestIndex = 0;
|
|
|
|
// For two shape regions max Pallet is 8
|
|
for (int j = 0; j < MaxPallet; j++)
|
|
{
|
|
// Calculate error from original
|
|
error = abs(BC6H_data.din[i][0] - BC6H_data.Paletef[1][j].x) +
|
|
abs(BC6H_data.din[i][1] - BC6H_data.Paletef[1][j].y) +
|
|
abs(BC6H_data.din[i][2] - BC6H_data.Paletef[1][j].z);
|
|
if (error < bestError)
|
|
{
|
|
bestError = error;
|
|
bestIndex = j;
|
|
}
|
|
}
|
|
|
|
shape_indices[1][sub1index] = bestIndex;
|
|
sub1index++;
|
|
}
|
|
else
|
|
{
|
|
// This is shared for one or two shape regions max Pallet either 16 or 8
|
|
bestError = FLT_MAX;
|
|
bestIndex = 0;
|
|
|
|
for (int j = 0; j < MaxPallet; j++)
|
|
{
|
|
// Calculate error from original
|
|
error = abs(BC6H_data.din[i][0] - BC6H_data.Paletef[0][j].x) +
|
|
abs(BC6H_data.din[i][1] - BC6H_data.Paletef[0][j].y) +
|
|
abs(BC6H_data.din[i][2] - BC6H_data.Paletef[0][j].z);
|
|
if (error < bestError)
|
|
{
|
|
bestError = error;
|
|
bestIndex = j;
|
|
}
|
|
}
|
|
|
|
shape_indices[0][sub0index] = bestIndex;
|
|
sub0index++;
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
float BC6HBlockEncoder::FindBestPattern(AMD_BC6H_Format &BC6H_data,
|
|
bool TwoRegionShapes,
|
|
int shape_pattern)
|
|
{
|
|
// Index bit size for the patterns been used.
|
|
// All two zone shapes have 3 bits per color, max index value < 8
|
|
// All one zone shapes gave 4 bits per color, max index value < 16
|
|
int Index_BitSize = TwoRegionShapes ? 8 : 16;
|
|
int max_subsets = TwoRegionShapes ? 2 : 1;
|
|
float direction[NCHANNELS];
|
|
float step;
|
|
|
|
BC6H_data.region = (unsigned short)max_subsets;
|
|
BC6H_data.index = 0;
|
|
BC6H_data.d_shape_index = (unsigned short)shape_pattern;
|
|
memset(BC6H_data.partition, 0, sizeof(BC6H_data.partition));
|
|
memset(BC6H_data.shape_indices, 0, sizeof(BC6H_data.shape_indices));
|
|
|
|
// Get the pattern to encode with
|
|
Partition( shape_pattern, // Shape pattern we want to get
|
|
BC6H_data.din, // Input data
|
|
BC6H_data.partition, // Returns the patterned shape data
|
|
BC6H_data.entryCount, // counts the number of pixel used in each subset region num of 0's amd 1's
|
|
max_subsets, // Table Shapes to use eithe one regions 1 or two regions 2
|
|
3); // rgb no alpha always = 3
|
|
|
|
|
|
float error[MAX_SUBSETS] = { 0.0,FLT_MAX,FLT_MAX };
|
|
int BestOutB = 0;
|
|
float BestError; //the lowest error from vector direction quantization
|
|
float BestError_endpts; //the lowest error from endpoints extracted from the vector direction quantization
|
|
|
|
float outB[2][2][MAX_SUBSET_SIZE][MAX_DIMENSION_BIG];
|
|
int shape_indicesB[2][MAX_SUBSETS][MAX_SUBSET_SIZE];
|
|
|
|
for (int subset = 0; subset < max_subsets; subset++)
|
|
{
|
|
error[0] += optQuantAnD_d(
|
|
BC6H_data.partition[subset], // input data
|
|
BC6H_data.entryCount[subset], // number of input points above (not clear about 1, better to avoid)
|
|
Index_BitSize, // number of clusters on the ramp, 8 or 16
|
|
shape_indicesB[0][subset], // output index, if not all points of the ramp used, 0 may not be assigned
|
|
outB[0][subset], // resulting quantization
|
|
direction, // direction vector of the ramp (check normalization)
|
|
&step, // step size (check normalization)
|
|
3, // number of channels (always 3 = RGB for BC6H)
|
|
m_quality // Quality set number of retry to get good end points
|
|
// Max retries = MAX_TRY = 4000 when Quality is 1.0
|
|
// Min = 0 and default with quality 0.05 is 200 times
|
|
);
|
|
}
|
|
|
|
BestError = error[0];
|
|
BestOutB = 0;
|
|
|
|
// The following code is almost complete - runs very slow and not sure if % of improvement is justified..
|
|
#ifdef USE_SHAKERHD
|
|
// Valid only for 2 region shapes
|
|
if ((max_subsets > 1) && (m_quality > 0.80))
|
|
{
|
|
int tempIndices[MAX_SUBSET_SIZE];
|
|
// int temp_epo_code[2][2][MAX_DIMENSION_BIG];
|
|
int bits[3] = { 8,8,8 }; // Channel index bit size
|
|
|
|
// float epo[2][MAX_DIMENSION_BIG];
|
|
int epo_code[MAX_SUBSETS][2][MAX_DIMENSION_BIG];
|
|
// int shakeSize = 8;
|
|
|
|
error[1] = 0.0;
|
|
for (int subset = 0; subset < max_subsets; subset++)
|
|
{
|
|
for (int k = 0; k < BC6H_data.entryCount[subset]; k++)
|
|
{
|
|
tempIndices[k] = shape_indicesB[0][subset][k];
|
|
}
|
|
|
|
error[1] += ep_shaker_HD(
|
|
BC6H_data.partition[subset],
|
|
BC6H_data.entryCount[subset],
|
|
tempIndices, // output index, if not all points of the ramp used, 0 may not be assigned
|
|
outB[1][subset], // resulting quantization
|
|
epo_code[subset],
|
|
BC6H_data.entryCount[subset] - 1,
|
|
bits,
|
|
3
|
|
);
|
|
|
|
// error[1] += ep_shaker_2_d(
|
|
// BC6H_data.partition[subset],
|
|
// BC6H_data.entryCount[subset],
|
|
// tempIndices, // output index, if not all points of the ramp used, 0 may not be assigned
|
|
// outB[1][subset], // resulting quantization
|
|
// epo_code[subset],
|
|
// shakeSize,
|
|
// BC6H_data.entryCount[subset] - 1,
|
|
// bits[0],
|
|
// 3,
|
|
// epo
|
|
// );
|
|
|
|
|
|
for (int k = 0; k < BC6H_data.entryCount[subset]; k++)
|
|
{
|
|
shape_indicesB[1][subset][k] = tempIndices[k];
|
|
}
|
|
|
|
} // subsets
|
|
|
|
if (BestError > error[1])
|
|
{
|
|
BestError = error[1];
|
|
BestOutB = 1;
|
|
for (int subset = 0; subset < max_subsets; subset++)
|
|
{
|
|
for (int k = 0; k < MAX_DIMENSION_BIG; k++)
|
|
{
|
|
BC6H_data.fEndPoints[subset][0][k] = (float)epo_code[subset][0][k];
|
|
BC6H_data.fEndPoints[subset][1][k] = (float)epo_code[subset][1][k];
|
|
}
|
|
}
|
|
}
|
|
|
|
}
|
|
#endif
|
|
|
|
// Save the best for BC6H data processing later
|
|
if (BestOutB == 0)
|
|
GetEndPoints(BC6H_data.fEndPoints, outB[BestOutB], max_subsets, BC6H_data.entryCount);
|
|
|
|
memcpy(BC6H_data.shape_indices, shape_indicesB[BestOutB], sizeof(BC6H_data.shape_indices));
|
|
clampF16Max(BC6H_data.fEndPoints);
|
|
|
|
BestError_endpts = CalcShapeError(BC6H_data, BC6H_data.fEndPoints, false);
|
|
return BestError_endpts;
|
|
}
|
|
|
|
int finish_unquantizeF16(int q, bool isSigned)
|
|
{
|
|
// Is it F16 Signed else F16 Unsigned
|
|
if (isSigned)
|
|
return (q < 0) ? -(((-q) * 31) >> 5) : (q * 31) >> 5; // scale the magnitude by 31/32
|
|
else
|
|
return (q * 31) >> 6; // scale the magnitude by 31/64
|
|
|
|
// Note for Undefined we should return q as is
|
|
|
|
}
|
|
|
|
void decompress_endpoints1(AMD_BC6H_Format& bc6h_format, int oEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], float outf[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], int mode)
|
|
{
|
|
int i;
|
|
int t;
|
|
int out[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG];
|
|
|
|
if (bc6h_format.issigned)
|
|
{
|
|
if (bc6h_format.istransformed)
|
|
{
|
|
for (i = 0; i<NCHANNELS; i++)
|
|
{
|
|
out[0][0][i] = SIGN_EXTEND(oEndPoints[0][0][i], ModePartition[mode].nbits);
|
|
|
|
t = SIGN_EXTEND(oEndPoints[0][1][i], ModePartition[mode].prec[i]); //C_RED
|
|
t = (t + oEndPoints[0][0][i]) & MASK(ModePartition[mode].nbits);
|
|
out[0][1][i] = SIGN_EXTEND(t, ModePartition[mode].nbits);
|
|
|
|
// Unquantize all points to nbits
|
|
out[0][0][i] = Unquantize(out[0][0][i], (unsigned char)ModePartition[mode].nbits, true);
|
|
out[0][1][i] = Unquantize(out[0][1][i], (unsigned char)ModePartition[mode].nbits, true);
|
|
|
|
// F16 format
|
|
outf[0][0][i] = (float)finish_unquantizeF16(out[0][0][i], true);
|
|
outf[0][1][i] = (float)finish_unquantizeF16(out[0][1][i], true);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for (i = 0; i<NCHANNELS; i++)
|
|
{
|
|
out[0][0][i] = SIGN_EXTEND(oEndPoints[0][0][i], ModePartition[mode].nbits);
|
|
out[0][1][i] = SIGN_EXTEND(oEndPoints[0][1][i], ModePartition[mode].prec[i]);
|
|
|
|
// Unquantize all points to nbits
|
|
out[0][0][i] = Unquantize(out[0][0][i], (unsigned char)ModePartition[mode].nbits, true);
|
|
out[0][1][i] = Unquantize(out[0][1][i], (unsigned char)ModePartition[mode].nbits, true);
|
|
|
|
// F16 format
|
|
outf[0][0][i] = (float)finish_unquantizeF16(out[0][0][i], true);
|
|
outf[0][1][i] = (float)finish_unquantizeF16(out[0][1][i], true);
|
|
}
|
|
}
|
|
|
|
}
|
|
else
|
|
{
|
|
if (bc6h_format.istransformed)
|
|
{
|
|
for (i = 0; i<NCHANNELS; i++)
|
|
{
|
|
out[0][0][i] = oEndPoints[0][0][i];
|
|
t = SIGN_EXTEND(oEndPoints[0][1][i], ModePartition[mode].prec[i]);
|
|
out[0][1][i] = (t + oEndPoints[0][0][i]) & MASK(ModePartition[mode].nbits);
|
|
|
|
// Unquantize all points to nbits
|
|
out[0][0][i] = Unquantize(out[0][0][i], (unsigned char)ModePartition[mode].nbits, false);
|
|
out[0][1][i] = Unquantize(out[0][1][i], (unsigned char)ModePartition[mode].nbits, false);
|
|
|
|
// F16 format
|
|
outf[0][0][i] = (float)finish_unquantizeF16(out[0][0][i], false);
|
|
outf[0][1][i] = (float)finish_unquantizeF16(out[0][1][i], false);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for (i = 0; i<NCHANNELS; i++)
|
|
{
|
|
out[0][0][i] = oEndPoints[0][0][i];
|
|
out[0][1][i] = oEndPoints[0][1][i];
|
|
|
|
// Unquantize all points to nbits
|
|
out[0][0][i] = Unquantize(out[0][0][i], (unsigned char)ModePartition[mode].nbits, false);
|
|
out[0][1][i] = Unquantize(out[0][1][i], (unsigned char)ModePartition[mode].nbits, false);
|
|
|
|
// F16 format
|
|
outf[0][0][i] = (float)finish_unquantizeF16(out[0][0][i], false);
|
|
outf[0][1][i] = (float)finish_unquantizeF16(out[0][1][i], false);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void decompress_endpoints2(AMD_BC6H_Format& bc6h_format, int oEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], float outf[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], int mode)
|
|
{
|
|
int i;
|
|
int t;
|
|
int out[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG];
|
|
|
|
if (bc6h_format.issigned)
|
|
{
|
|
if (bc6h_format.istransformed)
|
|
{
|
|
for (i = 0; i<NCHANNELS; i++)
|
|
{
|
|
// get the quantized values
|
|
out[0][0][i] = SIGN_EXTEND(oEndPoints[0][0][i], ModePartition[mode].nbits);
|
|
|
|
t = SIGN_EXTEND(oEndPoints[0][1][i], ModePartition[mode].prec[i]);
|
|
t = (t + oEndPoints[0][0][i]) & MASK(ModePartition[mode].nbits);
|
|
out[0][1][i] = SIGN_EXTEND(t, ModePartition[mode].nbits);
|
|
|
|
t = SIGN_EXTEND(oEndPoints[1][0][i], ModePartition[mode].prec[i]);
|
|
t = (t + oEndPoints[0][0][i]) & MASK(ModePartition[mode].nbits);
|
|
out[1][0][i] = SIGN_EXTEND(t, ModePartition[mode].nbits);
|
|
|
|
t = SIGN_EXTEND(oEndPoints[1][1][i], ModePartition[mode].prec[i]);
|
|
t = (t + oEndPoints[0][0][i]) & MASK(ModePartition[mode].nbits);
|
|
out[1][1][i] = SIGN_EXTEND(t, ModePartition[mode].nbits);
|
|
|
|
// Unquantize all points to nbits
|
|
out[0][0][i] = Unquantize(out[0][0][i], (unsigned char)ModePartition[mode].nbits, true);
|
|
out[0][1][i] = Unquantize(out[0][1][i], (unsigned char)ModePartition[mode].nbits, true);
|
|
out[1][0][i] = Unquantize(out[1][0][i], (unsigned char)ModePartition[mode].nbits, true);
|
|
out[1][1][i] = Unquantize(out[1][1][i], (unsigned char)ModePartition[mode].nbits, true);
|
|
|
|
// F16 format
|
|
outf[0][0][i] = (float)finish_unquantizeF16(out[0][0][i], true);
|
|
outf[0][1][i] = (float)finish_unquantizeF16(out[0][1][i], true);
|
|
outf[1][0][i] = (float)finish_unquantizeF16(out[1][0][i], true);
|
|
outf[1][1][i] = (float)finish_unquantizeF16(out[1][1][i], true);
|
|
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for (i = 0; i<NCHANNELS; i++)
|
|
{
|
|
out[0][0][i] = SIGN_EXTEND(oEndPoints[0][0][i], ModePartition[mode].nbits);
|
|
out[0][1][i] = SIGN_EXTEND(oEndPoints[0][1][i], ModePartition[mode].prec[i]);
|
|
out[1][0][i] = SIGN_EXTEND(oEndPoints[1][0][i], ModePartition[mode].prec[i]);
|
|
out[1][1][i] = SIGN_EXTEND(oEndPoints[1][1][i], ModePartition[mode].prec[i]);
|
|
|
|
// Unquantize all points to nbits
|
|
out[0][0][i] = Unquantize(out[0][0][i], (unsigned char)ModePartition[mode].nbits, true);
|
|
out[0][1][i] = Unquantize(out[0][1][i], (unsigned char)ModePartition[mode].nbits, true);
|
|
out[1][0][i] = Unquantize(out[1][0][i], (unsigned char)ModePartition[mode].nbits, true);
|
|
out[1][1][i] = Unquantize(out[1][1][i], (unsigned char)ModePartition[mode].nbits, true);
|
|
|
|
// nbits to F16 format
|
|
outf[0][0][i] = (float)finish_unquantizeF16(out[0][0][i], true);
|
|
outf[0][1][i] = (float)finish_unquantizeF16(out[0][1][i], true);
|
|
outf[1][0][i] = (float)finish_unquantizeF16(out[1][0][i], true);
|
|
outf[1][1][i] = (float)finish_unquantizeF16(out[1][1][i], true);
|
|
}
|
|
}
|
|
|
|
}
|
|
else
|
|
{
|
|
if (bc6h_format.istransformed)
|
|
{
|
|
for (i = 0; i<NCHANNELS; i++)
|
|
{
|
|
out[0][0][i] = oEndPoints[0][0][i];
|
|
t = SIGN_EXTEND(oEndPoints[0][1][i], ModePartition[mode].prec[i]);
|
|
out[0][1][i] = (t + oEndPoints[0][0][i]) & MASK(ModePartition[mode].nbits);
|
|
|
|
t = SIGN_EXTEND(oEndPoints[1][0][i], ModePartition[mode].prec[i]);
|
|
out[1][0][i] = (t + oEndPoints[0][0][i]) & MASK(ModePartition[mode].nbits);
|
|
|
|
t = SIGN_EXTEND(oEndPoints[1][1][i], ModePartition[mode].prec[i]);
|
|
out[1][1][i] = (t + oEndPoints[0][0][i]) & MASK(ModePartition[mode].nbits);
|
|
|
|
// Unquantize all points to nbits
|
|
out[0][0][i] = Unquantize(out[0][0][i], (unsigned char)ModePartition[mode].nbits, false);
|
|
out[0][1][i] = Unquantize(out[0][1][i], (unsigned char)ModePartition[mode].nbits, false);
|
|
out[1][0][i] = Unquantize(out[1][0][i], (unsigned char)ModePartition[mode].nbits, false);
|
|
out[1][1][i] = Unquantize(out[1][1][i], (unsigned char)ModePartition[mode].nbits, false);
|
|
|
|
// nbits to F16 format
|
|
outf[0][0][i] = (float)finish_unquantizeF16(out[0][0][i], false);
|
|
outf[0][1][i] = (float)finish_unquantizeF16(out[0][1][i], false);
|
|
outf[1][0][i] = (float)finish_unquantizeF16(out[1][0][i], false);
|
|
outf[1][1][i] = (float)finish_unquantizeF16(out[1][1][i], false);
|
|
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for (i = 0; i<NCHANNELS; i++)
|
|
{
|
|
out[0][0][i] = oEndPoints[0][0][i];
|
|
out[0][1][i] = oEndPoints[0][1][i];
|
|
out[1][0][i] = oEndPoints[1][0][i];
|
|
out[1][1][i] = oEndPoints[1][1][i];
|
|
|
|
// Unquantize all points to nbits
|
|
out[0][0][i] = Unquantize(out[0][0][i], (unsigned char) ModePartition[mode].nbits, false);
|
|
out[0][1][i] = Unquantize(out[0][1][i], (unsigned char) ModePartition[mode].nbits, false);
|
|
out[1][0][i] = Unquantize(out[1][0][i], (unsigned char) ModePartition[mode].nbits, false);
|
|
out[1][1][i] = Unquantize(out[1][1][i], (unsigned char) ModePartition[mode].nbits, false);
|
|
|
|
// nbits to F16 format
|
|
outf[0][0][i] = (float) finish_unquantizeF16(out[0][0][i], false);
|
|
outf[0][1][i] = (float) finish_unquantizeF16(out[0][1][i], false);
|
|
outf[1][0][i] = (float) finish_unquantizeF16(out[1][0][i], false);
|
|
outf[1][1][i] = (float) finish_unquantizeF16(out[1][1][i], false);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void BC6HBlockEncoder::AverageEndPoint(float EndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], float fEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], int max_subsets, int mode)
|
|
{
|
|
|
|
if (ModePartition[mode].nbits > 7)
|
|
{
|
|
for (int subset = 0; subset < max_subsets; ++subset)
|
|
{
|
|
fEndPoints[subset][0][0] = EndPoints[subset][0][0]; // A.Red
|
|
fEndPoints[subset][0][1] = EndPoints[subset][0][1]; // A.Green
|
|
fEndPoints[subset][0][2] = EndPoints[subset][0][2]; // A.Blue
|
|
fEndPoints[subset][1][0] = EndPoints[subset][1][0]; // A.Red
|
|
fEndPoints[subset][1][1] = EndPoints[subset][1][1]; // A.Green
|
|
fEndPoints[subset][1][2] = EndPoints[subset][1][2]; // A.Blue
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
float diff;
|
|
float avr;
|
|
|
|
// determin differance level based on lowest precision of the mode
|
|
m_DiffLevel = (float)ModePartition[mode].lowestPrec;
|
|
|
|
for (int subset = 0; subset < max_subsets; ++subset)
|
|
{
|
|
avr = (EndPoints[subset][0][0] +
|
|
EndPoints[subset][0][1] +
|
|
EndPoints[subset][0][2]) / 3.0f;
|
|
|
|
// determine average diff
|
|
diff = (abs(EndPoints[subset][0][0] - avr) +
|
|
abs(EndPoints[subset][0][1] - avr) +
|
|
abs(EndPoints[subset][0][2] - avr)) / 3;
|
|
|
|
if ((diff < m_DiffLevel) && (avr > m_DiffLevel))
|
|
{
|
|
fEndPoints[subset][0][0] = avr; // A.Red
|
|
fEndPoints[subset][0][1] = avr; // A.Green
|
|
fEndPoints[subset][0][2] = avr; // A.Blue
|
|
}
|
|
else
|
|
{
|
|
fEndPoints[subset][0][0] = EndPoints[subset][0][0]; // A.Red
|
|
fEndPoints[subset][0][1] = EndPoints[subset][0][1]; // A.Green
|
|
fEndPoints[subset][0][2] = EndPoints[subset][0][2]; // A.Blue
|
|
}
|
|
|
|
avr = (EndPoints[subset][1][0] +
|
|
EndPoints[subset][1][1] +
|
|
EndPoints[subset][1][2]) / 3.0f;
|
|
|
|
diff = (abs(EndPoints[subset][1][0] - avr) +
|
|
abs(EndPoints[subset][1][1] - avr) +
|
|
abs(EndPoints[subset][1][2] - avr)) / 3;
|
|
|
|
if ((diff < m_DiffLevel) && (avr > m_DiffLevel))
|
|
{
|
|
fEndPoints[subset][1][0] = avr; // B.Red
|
|
fEndPoints[subset][1][1] = avr; // B.Green
|
|
fEndPoints[subset][1][2] = avr; // B.Blue
|
|
}
|
|
else
|
|
{
|
|
fEndPoints[subset][1][0] = EndPoints[subset][1][0]; // A.Red
|
|
fEndPoints[subset][1][1] = EndPoints[subset][1][1]; // A.Green
|
|
fEndPoints[subset][1][2] = EndPoints[subset][1][2]; // A.Blue
|
|
}
|
|
}
|
|
}
|
|
|
|
//================================================
|
|
// Mode Pathern order to try on endpoints
|
|
// The order can be rearranged to set which modes gets processed first
|
|
// for now it is set in order.
|
|
//================================================
|
|
static int ModeFitOrder[MAX_BC6H_MODES +1] =
|
|
{
|
|
0, //0: N/A
|
|
// ---- 2 region lower bits ---
|
|
1, // 10 5 5 5
|
|
2, // 7 6 6 6
|
|
3, // 11 5 4 5
|
|
4, // 11 4 5 4
|
|
5, // 11 4 4 5
|
|
6, // 9 5 5 5
|
|
7, // 8 6 5 5
|
|
8, // 8 5 6 5
|
|
9, // 8 5 5 6
|
|
10, // 6 6 6 6
|
|
//------ 1 region high bits ---
|
|
11, // 10 10 10 10
|
|
12, // 11 9 9 9
|
|
13, // 12 8 8 8
|
|
14 // 16 4 4 4
|
|
};
|
|
|
|
float BC6HBlockEncoder::EncodePattern(AMD_BC6H_Format &BC6H_data, float error)
|
|
{
|
|
int max_subsets = BC6H_data.region;
|
|
|
|
// now we have input colors (in), output colors (outB) mapped to a line of ends (EndPoints)
|
|
// and a set of colors on the line equally spaced (indexedcolors)
|
|
// Lets assign indices
|
|
|
|
//float SrcEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG]; // temp endpoints used during calculations
|
|
|
|
// Quantize the EndPoints
|
|
int F16EndPoints[MAX_BC6H_MODES + 1][MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG]; // temp endpoints used during calculations
|
|
int quantEndPoints[MAX_BC6H_MODES + 1][MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG]; // endpoints to save for a given mode
|
|
|
|
// ModePartition[] starts from 1 to 14
|
|
// If we have a shape pattern set the loop to check modes from 1 to 10 else from 11 to 14
|
|
// of the ModePartition table
|
|
int min_mode = (BC6H_data.region == 2)?1:11;
|
|
int max_mode = (BC6H_data.region == 2)?MAX_TWOREGION_MODES: MAX_BC6H_MODES;
|
|
|
|
bool fits[15];
|
|
memset(fits,0,sizeof(fits));
|
|
|
|
int bestFit = 0;
|
|
int bestEndpointMode = 0;
|
|
float bestError = FLT_MAX;
|
|
float bestEndpointsErr = FLT_MAX;
|
|
float endPointErr = 0;
|
|
|
|
// Try Optimization for the Mode
|
|
float best_EndPoints[MAX_BC6H_MODES + 1][MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG];
|
|
int best_Indices[MAX_BC6H_MODES + 1][MAX_SUBSETS][MAX_SUBSET_SIZE];
|
|
float opt_toterr[MAX_BC6H_MODES + 1];
|
|
|
|
// for debugging
|
|
memset(opt_toterr, 0, sizeof(opt_toterr));
|
|
|
|
int numfits = 0;
|
|
//
|
|
// Notes; Only the endpoints are varying; the indices stay fixed in values!
|
|
// so to optimize which mode we need only check the endpoints error against our original to pick the mode to save
|
|
//
|
|
for (int modes = min_mode; modes <= max_mode; ++modes)
|
|
{
|
|
memcpy(best_EndPoints[modes], BC6H_data.fEndPoints, sizeof(BC6H_data.fEndPoints));
|
|
memcpy(best_Indices[modes], BC6H_data.shape_indices, sizeof(BC6H_data.shape_indices));
|
|
|
|
// For some modes the differances between channels can be quite small
|
|
// typically for 6 bits 0..32 an increment of 1 in a channel can cause
|
|
// unwanted color artifacts.
|
|
// Check if computed channel endpoint have a wide spread between channels if not
|
|
// scale all the channels to a avarage so that the variance is not noticed at lower bit values
|
|
//if (m_bAverageEndPoint)
|
|
//{
|
|
// AverageEndPoint(best_EndPoints[modes], SrcEndPoints, max_subsets, ModeFitOrder[modes]);
|
|
// QuantizeEndPointToF16Prec(SrcEndPoints, F16EndPoints[modes], max_subsets, ModePartition[ModeFitOrder[modes]].nbits);
|
|
//}
|
|
//else
|
|
{
|
|
QuantizeEndPointToF16Prec(best_EndPoints[modes], F16EndPoints[modes], max_subsets, ModePartition[ModeFitOrder[modes]].nbits);
|
|
}
|
|
|
|
// Indices data to save for given mode
|
|
SwapIndices(F16EndPoints[modes], best_Indices[modes], BC6H_data.entryCount, max_subsets, ModeFitOrder[modes], BC6H_data.d_shape_index);
|
|
bool transformfit = TransformEndPoints(BC6H_data, F16EndPoints[modes], quantEndPoints[modes], max_subsets,ModeFitOrder[modes]);
|
|
fits[modes] = endpts_fit(F16EndPoints[modes], quantEndPoints[modes], ModeFitOrder[modes],max_subsets, m_isSigned);
|
|
if (fits[modes] && transformfit)
|
|
{
|
|
numfits++;
|
|
|
|
// The new compressed end points fit the mode
|
|
// recalculate the error for this mode with a new set of indices
|
|
// since we have shifted the end points from what we origially calc
|
|
// from the find_bestpattern
|
|
float uncompressed[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG];
|
|
if (BC6H_data.region == 1)
|
|
decompress_endpoints1(BC6H_data, quantEndPoints[modes], uncompressed, ModeFitOrder[modes]);
|
|
else
|
|
decompress_endpoints2(BC6H_data, quantEndPoints[modes], uncompressed, ModeFitOrder[modes]);
|
|
// Takes the end points and creates a pallet of colors
|
|
// based on preset weights along a vector formed by the two end points
|
|
palitizeEndPointsF(BC6H_data, uncompressed);
|
|
|
|
// Once we have the pallet - recalculate the optimal indices using the pallet
|
|
// and the original image data stored in BC6H_data.din[]
|
|
if (!m_isSigned)
|
|
ReIndexShapef(BC6H_data, best_Indices[modes]);
|
|
|
|
// Calculate the error of the new tile vs the old tile data
|
|
opt_toterr[modes] = CalcShapeError(BC6H_data, uncompressed, true);
|
|
|
|
if (BC6H_data.region == 1)
|
|
{
|
|
endPointErr = CalcOneRegionEndPtsError(BC6H_data, uncompressed, best_Indices[modes]);
|
|
if (endPointErr < bestEndpointsErr)
|
|
{
|
|
bestEndpointsErr = endPointErr;
|
|
bestEndpointMode = modes;
|
|
}
|
|
}
|
|
|
|
bool transformFit = true;
|
|
// Save hold this mode fit data if its better than the last one checked.
|
|
if (opt_toterr[modes] < bestError)
|
|
{
|
|
if (!m_isSigned)
|
|
{
|
|
QuantizeEndPointToF16Prec(uncompressed, F16EndPoints[modes], max_subsets, ModePartition[ModeFitOrder[modes]].nbits);
|
|
SwapIndices(F16EndPoints[modes], best_Indices[modes], BC6H_data.entryCount, max_subsets, ModeFitOrder[modes], BC6H_data.d_shape_index);
|
|
transformFit = TransformEndPoints(BC6H_data, F16EndPoints[modes], quantEndPoints[modes], max_subsets, ModeFitOrder[modes]);
|
|
}
|
|
if (transformFit)
|
|
{
|
|
if (BC6H_data.region == 1)
|
|
{
|
|
bestFit = (modes == bestEndpointMode) ? modes: ((modes<bestEndpointMode)? modes:bestEndpointMode);
|
|
}
|
|
else
|
|
{
|
|
bestFit = modes;
|
|
}
|
|
bestError = opt_toterr[bestFit];
|
|
error = bestError;
|
|
}
|
|
}
|
|
|
|
}
|
|
}
|
|
|
|
if (numfits > 0)
|
|
{
|
|
SaveCompressedBlockData(BC6H_data, quantEndPoints[bestFit], best_Indices[bestFit], max_subsets, ModeFitOrder[bestFit]);
|
|
return error;
|
|
}
|
|
|
|
// Should not get here!
|
|
return error;
|
|
}
|
|
|
|
//==================================================================================
|
|
// CompressBlock
|
|
// in[] is half float32 data [0..1] for unsigned and [-1..+1] for signed
|
|
// it will be converted to 16 bit half CMP_HALFSHORT (short with signed component) for processing
|
|
//
|
|
// out is 128 bits BC6H Encoded data
|
|
//==================================================================================
|
|
|
|
//#define DEBUG_A_BLOCK
|
|
#ifdef DEBUG_A_BLOCK
|
|
float Testdin[MAX_SUBSET_SIZE][MAX_DIMENSION_BIG] =
|
|
{
|
|
{29440.0000, 29440.0000, 30255.0000, 0.000000000},
|
|
{29440.0000, 29440.0000, 30123.0000, 0.000000000},
|
|
{29440.0000, 29440.0000, 29440.0000, 0.000000000},
|
|
{29440.0000, 29440.0000, 29440.0000, 0.000000000},
|
|
{29440.0000, 29440.0000, 30251.0000, 0.000000000},
|
|
{29440.0000, 29440.0000, 30105.0000, 0.000000000},
|
|
{29440.0000, 29440.0000, 29440.0000, 0.000000000},
|
|
{29440.0000, 29440.0000, 29440.0000, 0.000000000},
|
|
{29440.0000, 29440.0000, 30246.0000, 0.000000000},
|
|
{29440.0000, 29440.0000, 30086.0000, 0.000000000},
|
|
{29440.0000, 29440.0000, 29440.0000, 0.000000000},
|
|
{29440.0000, 29440.0000, 29440.0000, 0.000000000},
|
|
{29440.0000, 29440.0000, 30240.0000, 0.000000000},
|
|
{29440.0000, 29440.0000, 30047.0000, 0.000000000},
|
|
{29440.0000, 29440.0000, 29440.0000, 0.000000000},
|
|
{29440.0000, 29440.0000, 29440.0000, 0.000000000},
|
|
};
|
|
#endif
|
|
|
|
float BC6HBlockEncoder::CompressBlock(float in[MAX_SUBSET_SIZE][MAX_DIMENSION_BIG], BYTE out[COMPRESSED_BLOCK_SIZE])
|
|
{
|
|
/* Reserved feature:
|
|
float smono[16];
|
|
float R,G,B;
|
|
float smin = FLT_MAX;
|
|
*/
|
|
|
|
#ifdef DEBUG_PATTERNS
|
|
srand(100);
|
|
// Save delta image to file
|
|
fi = fopen("deltaImages.txt", "w");
|
|
#endif
|
|
|
|
float bestError = FLT_MAX;
|
|
float error = FLT_MAX;
|
|
int bestShape = 0;
|
|
|
|
AMD_BC6H_Format BC6H_data;
|
|
|
|
memset(&BC6H_data, 0, sizeof(AMD_BC6H_Format));
|
|
|
|
float normalization = 1.0; // For future use
|
|
|
|
for (int i = 0; i < BC6H_MAX_SUBSET_SIZE; i++)
|
|
{
|
|
|
|
// Our Half floats will be restricted to 0x7BFF with a sign components
|
|
// so use 0..0x7BFF and sign bit for the floats
|
|
|
|
// using if ( < 0.00001) to avoid case of values been -0.0 which is not processed when using if ( < 0)
|
|
if (in[i][0] < 0.00001 || isnan(in[i][0]))
|
|
{
|
|
if (m_isSigned)
|
|
{
|
|
BC6H_data.din[i][0] = (isnan(in[i][0]))? F16NEGPREC_LIMIT_VAL : -CMP_HALF(abs(in[i][0] / normalization)).bits();
|
|
if (BC6H_data.din[i][0] < F16NEGPREC_LIMIT_VAL) {
|
|
BC6H_data.din[i][0] = F16NEGPREC_LIMIT_VAL;
|
|
}
|
|
}
|
|
else
|
|
BC6H_data.din[i][0] = 0.0;
|
|
}
|
|
else
|
|
BC6H_data.din[i][0] = CMP_HALF(in[i][0] / normalization).bits();
|
|
|
|
if (in[i][1] < 0.00001 || isnan(in[i][1]))
|
|
{
|
|
if (m_isSigned)
|
|
{
|
|
BC6H_data.din[i][1] = (isnan(in[i][1])) ? F16NEGPREC_LIMIT_VAL : -CMP_HALF(abs(in[i][1] / normalization)).bits();
|
|
if (BC6H_data.din[i][1] < F16NEGPREC_LIMIT_VAL) {
|
|
BC6H_data.din[i][1] = F16NEGPREC_LIMIT_VAL;
|
|
}
|
|
}
|
|
else
|
|
BC6H_data.din[i][1] = 0.0;
|
|
}
|
|
else
|
|
BC6H_data.din[i][1] = CMP_HALF(in[i][1] / normalization).bits();
|
|
|
|
if (in[i][2] < 0.00001 || isnan(in[i][2]))
|
|
{
|
|
if (m_isSigned)
|
|
{
|
|
BC6H_data.din[i][2] = (isnan(in[i][2])) ? F16NEGPREC_LIMIT_VAL : -CMP_HALF(abs(in[i][2] / normalization)).bits();
|
|
if (BC6H_data.din[i][2] < F16NEGPREC_LIMIT_VAL) {
|
|
BC6H_data.din[i][2] = F16NEGPREC_LIMIT_VAL;
|
|
}
|
|
}
|
|
else
|
|
BC6H_data.din[i][2] = 0.0;
|
|
}
|
|
else
|
|
BC6H_data.din[i][2] = CMP_HALF(in[i][2] / normalization).bits();
|
|
|
|
BC6H_data.din[i][3] = 0.0;
|
|
|
|
}
|
|
|
|
BC6H_data.issigned = m_isSigned;
|
|
#ifdef DEBUG_A_BLOCK
|
|
// Used for debugging blocks!
|
|
for (int i = 0; i < BC6H_MAX_SUBSET_SIZE; i++)
|
|
{
|
|
BC6H_data.din[i][0] = Testdin[i][0];
|
|
BC6H_data.din[i][1] = Testdin[i][1];
|
|
BC6H_data.din[i][2] = Testdin[i][2];
|
|
BC6H_data.din[i][3] = Testdin[i][3];
|
|
}
|
|
#endif
|
|
|
|
if (m_useMonoShapePatterns)
|
|
{
|
|
/*
|
|
Reserved Feature MONOSHAPE_PATTERNS
|
|
*/
|
|
}
|
|
|
|
// run through no partition first
|
|
error = FindBestPattern(BC6H_data, false, 0);
|
|
if (error < bestError)
|
|
{
|
|
bestError = error;
|
|
bestShape = -1;
|
|
memcpy(BC6H_data.cur_best_shape_indices, BC6H_data.shape_indices, sizeof(BC6H_data.shape_indices));
|
|
memcpy(BC6H_data.cur_best_partition, BC6H_data.partition, sizeof(BC6H_data.partition));
|
|
memcpy(BC6H_data.cur_best_fEndPoints, BC6H_data.fEndPoints, sizeof(BC6H_data.fEndPoints));
|
|
memcpy(BC6H_data.cur_best_entryCount, BC6H_data.entryCount, sizeof(BC6H_data.entryCount));
|
|
BC6H_data.d_shape_index = bestShape;
|
|
}
|
|
|
|
// now run through all two regions shapes to find the best pattern
|
|
for (int shape = 0; shape < MAX_BC6H_PARTITIONS; shape++)
|
|
{
|
|
error = FindBestPattern(BC6H_data, true, shape);
|
|
if (error < bestError)
|
|
{
|
|
bestError = error;
|
|
bestShape = shape;
|
|
|
|
memcpy(BC6H_data.cur_best_shape_indices, BC6H_data.shape_indices, sizeof(BC6H_data.shape_indices));
|
|
memcpy(BC6H_data.cur_best_partition, BC6H_data.partition, sizeof(BC6H_data.partition));
|
|
memcpy(BC6H_data.cur_best_fEndPoints, BC6H_data.fEndPoints, sizeof(BC6H_data.fEndPoints));
|
|
memcpy(BC6H_data.cur_best_entryCount, BC6H_data.entryCount, sizeof(BC6H_data.entryCount));
|
|
BC6H_data.d_shape_index = bestShape;
|
|
}
|
|
else
|
|
{
|
|
if (bestShape != -1)
|
|
{
|
|
BC6H_data.d_shape_index = bestShape;
|
|
memcpy(BC6H_data.shape_indices, BC6H_data.cur_best_shape_indices, sizeof(BC6H_data.shape_indices));
|
|
memcpy(BC6H_data.partition, BC6H_data.cur_best_partition, sizeof(BC6H_data.partition));
|
|
memcpy(BC6H_data.fEndPoints, BC6H_data.cur_best_fEndPoints, sizeof(BC6H_data.fEndPoints));
|
|
memcpy(BC6H_data.entryCount, BC6H_data.cur_best_entryCount, sizeof(BC6H_data.entryCount));
|
|
}
|
|
}
|
|
}
|
|
|
|
// Optimize the result for encoding
|
|
bestError = EncodePattern(BC6H_data, bestError);
|
|
|
|
// used for debugging modes, set the value you want to debug with
|
|
if (BC6H_data.m_mode != 0)
|
|
{
|
|
// do final encoding and save to output block
|
|
SaveDataBlock(BC6H_data, out);
|
|
}
|
|
else
|
|
memcpy(out, Cmp_Red_Block, 16);
|
|
|
|
// do final encoding and save to output block
|
|
// SaveDataBlock(best_BC6H_data,out);
|
|
|
|
#ifdef DEBUG_PATTERNS
|
|
if (fi)
|
|
fclose(fi);
|
|
#endif
|
|
|
|
g_block++;
|
|
|
|
return (float)bestError;
|
|
}
|