791 lines
27 KiB
C++

//===============================================================================
// Copyright (c) 2007-2016 Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2004-2006 ATI Technologies Inc.
//===============================================================================
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//
//
// File Name: Codec_ASTC.cpp
// Description: implementation of the CCodec_ASTC class
//
//////////////////////////////////////////////////////////////////////////////
#pragma warning(disable:4100) // Ignore warnings of unreferenced formal parameters
#pragma warning(disable:4101) // Ignore warnings of unreferenced local variable
#pragma warning(disable:4996) // This function or variable may be unsafe
#include "common.h"
#include "compressonator.h"
#include "astc/codec_astc.h"
#include "astc/astc_library.h"
#include "astc/arm/astc_codec_internals.h"
#include "debug.h"
#include <chrono>
#include <cstring>
#ifdef ASTC_COMPDEBUGGER
#include "compclient.h"
extern CompViewerClient g_CompClient;
#endif
//======================================================================================
#define USE_MULTITHREADING 1
// Gets the total numver of active processor cores on the running host system
extern CMP_INT CMP_GetNumberOfProcessors();
struct ASTCEncodeThreadParam {
ASTCBlockEncoder *encoder;
// Encoder params
astc_codec_image *input_image;
uint8_t *bp;
int xdim;
int ydim;
int zdim;
int x;
int y;
int z;
astc_decode_mode decode_mode;
const error_weighting_params * ewp;
volatile CMP_BOOL run;
volatile CMP_BOOL exit;
};
static ASTCEncodeThreadParam *g_EncodeParameterStorage = NULL;
//////////////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////////////
CCodec_ASTC::CCodec_ASTC() : CCodec_DXTC(CT_ASTC) {
m_LibraryInitialized = false;
m_AbortRequested = false;
m_NumThreads = 0;
m_NumEncodingThreads = 0; // new auto setting to use max processors * 2 threads
m_EncodingThreadHandle = NULL;
m_xdim = 4;
m_ydim = 4;
m_zdim = 1;
m_decoder = NULL;
m_Quality = 0.05;
}
CCodec_ASTC::~CCodec_ASTC() {
if (m_LibraryInitialized) {
if (m_Use_MultiThreading) {
// Tell all the live threads that they can exit when they have finished any current work
for (int i = 0; i < m_LiveThreads; i++) {
// If a thread is in the running state then we need to wait for it to finish
// any queued work from the producer before we can tell it to exit.
//
// If we don't wait then there is a race condition here where we have
// told the thread to run but it hasn't yet been scheduled - if we set
// the exit flag before it runs then its block will not be processed.
#pragma warning(push)
#pragma warning(disable:4127) //warning C4127: conditional expression is constant
while (1) {
if (g_EncodeParameterStorage[i].run != TRUE) {
break;
}
}
#pragma warning(pop)
// Signal to the thread that it can exit
g_EncodeParameterStorage[i].exit = TRUE;
}
// Now wait for all threads to have exited
if (m_LiveThreads > 0) {
for ( CMP_DWORD dwThread = 0; dwThread < m_LiveThreads; dwThread++ ) {
std::thread& curThread = m_EncodingThreadHandle[dwThread];
curThread.join();
}
}
for (unsigned int i = 0; i < m_LiveThreads; i++) {
std::thread& curThread = m_EncodingThreadHandle[i];
curThread = std::thread();
}
delete[] m_EncodingThreadHandle;
} // MultiThreading
m_EncodingThreadHandle = NULL;
if (g_EncodeParameterStorage) {
delete[] g_EncodeParameterStorage;
g_EncodeParameterStorage = NULL;
}
for (int i = 0; i < m_NumEncodingThreads; i++) {
if (m_encoder[i]) {
delete m_encoder[i];
m_encoder[i] = NULL;
}
}
if (m_decoder) {
delete m_decoder;
m_decoder = NULL;
}
m_LibraryInitialized = false;
}
}
void CCodec_ASTC::find_closest_blockdim_2d(float target_bitrate, int *x, int *y, int consider_illegal) {
int blockdims[6] = { 4, 5, 6, 8, 10, 12 };
float best_error = 1000;
float aspect_of_best = 1;
int i, j;
// Y dimension
for (i = 0; i < 6; i++) {
// X dimension
for (j = i; j < 6; j++) {
// NxN MxN 8x5 10x5 10x6
int is_legal = (j==i) || (j==i+1) || (j==3 && j==1) || (j==4 && j==1) || (j==4 && j==2);
if(consider_illegal || is_legal) {
float bitrate = 128.0f / (blockdims[i] * blockdims[j]);
float bitrate_error = fabs(bitrate - target_bitrate);
float aspect = (float)blockdims[j] / blockdims[i];
if (bitrate_error < best_error || (bitrate_error == best_error && aspect < aspect_of_best)) {
*x = blockdims[j];
*y = blockdims[i];
best_error = bitrate_error;
aspect_of_best = aspect;
}
}
}
}
}
void CCodec_ASTC::find_closest_blockxy_2d(int *x, int *y, int consider_illegal) {
int blockdims[6] = { 4, 5, 6, 8, 10, 12 };
bool exists_x = std::find(std::begin(blockdims), std::end(blockdims), (*x)) != std::end(blockdims);
bool exists_y = std::find(std::begin(blockdims), std::end(blockdims), (*y)) != std::end(blockdims);
if (exists_x && exists_y) {
if ((*x) < (*y)) {
int temp = *x;
*x = *y;
*y = temp;
}
float bitrateF = float(128.0f / ((*x)*(*y)));
find_closest_blockdim_2d(bitrateF, x, y, 0);
} else {
float bitrateF = float(128.0f / ((*x)*(*y)));
find_closest_blockdim_2d(bitrateF, x, y, 0);
}
}
void CCodec_ASTC::find_closest_blockdim_3d(float target_bitrate, int *x, int *y, int *z, int consider_illegal) {
int blockdims[4] = { 3, 4, 5, 6 };
float best_error = 1000;
float aspect_of_best = 1;
int i, j, k;
for (i = 0; i < 4; i++) // Z
for (j = i; j < 4; j++) // Y
for (k = j; k < 4; k++) { // X
// NxNxN MxNxN MxMxN
int is_legal = ((k==j)&&(j==i)) || ((k==j+1)&&(j==i)) || ((k==j)&&(j==i+1));
if(consider_illegal || is_legal) {
float bitrate = 128.0f / (blockdims[i] * blockdims[j] * blockdims[k]);
float bitrate_error = fabs(bitrate - target_bitrate);
float aspect = (float)blockdims[k] / blockdims[j] + (float)blockdims[j] / blockdims[i] + (float)blockdims[k] / blockdims[i];
if (bitrate_error < best_error || (bitrate_error == best_error && aspect < aspect_of_best)) {
*x = blockdims[k];
*y = blockdims[j];
*z = blockdims[i];
best_error = bitrate_error;
aspect_of_best = aspect;
}
}
}
}
bool CCodec_ASTC::SetParameter(const CMP_CHAR* pszParamName, CMP_CHAR* sValue) {
if (sValue == NULL) return false;
if(strcmp(pszParamName, CodecParameters::NumThreads) == 0) {
m_NumThreads = (CMP_BYTE) std::stoi(sValue) & 0xFF;
}
if(strcmp(pszParamName, "BlockRate") == 0) {
// BlockRate can be a bit value or dimension
if (strchr(sValue, '.') != NULL) {
m_target_bitrate = static_cast < float >(atof(sValue));
find_closest_blockdim_2d(m_target_bitrate, &m_xdim, &m_ydim, DEBUG_ALLOW_ILLEGAL_BLOCK_SIZES);
} else {
int dimensions = sscanf(sValue, "%dx%dx", &m_xdim, &m_ydim);
if (dimensions < 2) return false;
find_closest_blockxy_2d(&m_xdim, &m_ydim, DEBUG_ALLOW_ILLEGAL_BLOCK_SIZES);
// Valid block sizes are for 2D support only (3D is todo later)
// are in cominations of {4,5,6,8,10,12}
if ((m_xdim < 4) || (m_xdim > 12)) return false;
if ((m_ydim < 4) || (m_ydim > 12)) return false;
if ((m_xdim == 7) || (m_xdim == 9) || (m_xdim == 11)) return false;
if ((m_ydim == 7) || (m_ydim == 9) || (m_ydim == 11)) return false;
}
}
if (strcmp(pszParamName, "Quality") == 0) {
m_Quality = std::stof(sValue);
if ((m_Quality < 0) || (m_Quality > 1.0)) {
return false;
}
} else
return CCodec_DXTC::SetParameter(pszParamName, sValue);
return true;
}
bool CCodec_ASTC::SetParameter(const CMP_CHAR* pszParamName, CMP_DWORD dwValue) {
if(strcmp(pszParamName, CodecParameters::NumThreads) == 0) {
m_NumThreads = (CMP_BYTE) dwValue;
} else
return CCodec_DXTC::SetParameter(pszParamName, dwValue);
return true;
}
bool CCodec_ASTC::SetParameter(const CMP_CHAR* pszParamName, CODECFLOAT fValue) {
if (strcmp(pszParamName, "Quality") == 0)
m_Quality = fValue;
else
return CCodec_DXTC::SetParameter(pszParamName, fValue);
return true;
}
//
// Thread procedure for encoding a block
//
// The thread stays alive, and expects blocks to be pushed to it by a producer
// process that signals it when new work is available. When the producer is finished
// it should set the exit flag in the parameters to allow the tread to quit
//
#include "astc_host.h"
ASTC_Encoder::ASTC_Encode g_ASTCEncode;
unsigned int ASTCThreadProcEncode(void* param) {
ASTCEncodeThreadParam *tp = (ASTCEncodeThreadParam*)param;
while (tp->exit == FALSE) {
if (tp->run == TRUE) {
g_ASTCEncode.m_xdim = tp->xdim;
g_ASTCEncode.m_ydim = tp->ydim;
g_ASTCEncode.m_zdim = tp->zdim;
tp->encoder->CompressBlock_kernel(
(ASTC_Encoder::astc_codec_image *)tp->input_image,
tp->bp,
tp->x,
tp->y,
tp->z,
&g_ASTCEncode);
tp->run = FALSE;
}
std::this_thread::sleep_for(std::chrono::milliseconds(0));
}
return 0;
}
CodecError CCodec_ASTC::InitializeASTCLibrary() {
if (!m_LibraryInitialized) {
g_ASTCEncode.m_decode_mode = ASTC_Encoder::DECODE_HDR;
g_ASTCEncode.m_rgb_force_use_of_hdr = 0;
g_ASTCEncode.m_alpha_force_use_of_hdr = 0;
g_ASTCEncode.m_perform_srgb_transform = 0;
g_ASTCEncode.m_Quality = (float)m_Quality;
g_ASTCEncode.m_target_bitrate = m_target_bitrate;
g_ASTCEncode.m_xdim = m_xdim;
g_ASTCEncode.m_ydim = m_ydim;
g_ASTCEncode.m_zdim = m_zdim;
ASTC_Encoder::init_ASTC(&g_ASTCEncode);
//====================== Threads
for (CMP_DWORD i = 0; i < MAX_ASTC_THREADS; i++) {
m_encoder[i] = NULL;
}
// Create threaded encoder instances
m_LiveThreads = 0;
m_LastThread = 0;
m_NumEncodingThreads = MIN(m_NumThreads, (decltype(m_NumThreads))MAX_ASTC_THREADS);
if (m_NumEncodingThreads == 0) {
m_NumEncodingThreads = CMP_GetNumberOfProcessors();
if (m_NumEncodingThreads <= 2)
m_NumEncodingThreads = 8; // fallback to a default!
if (m_NumEncodingThreads > 128)
m_NumEncodingThreads = 128;
}
m_Use_MultiThreading = (m_NumEncodingThreads != 1);
g_EncodeParameterStorage = new ASTCEncodeThreadParam[m_NumEncodingThreads];
if (!g_EncodeParameterStorage) {
return CE_Unknown;
}
m_EncodingThreadHandle = new std::thread[m_NumEncodingThreads];
if (!m_EncodingThreadHandle) {
delete[] g_EncodeParameterStorage;
g_EncodeParameterStorage = NULL;
return CE_Unknown;
}
CMP_INT i;
for (i = 0; i < m_NumEncodingThreads; i++) {
// Create single encoder instance
m_encoder[i] = new ASTCBlockEncoder();
// Cleanup if problem!
if (!m_encoder[i]) {
delete[] g_EncodeParameterStorage;
g_EncodeParameterStorage = NULL;
delete[] m_EncodingThreadHandle;
m_EncodingThreadHandle = NULL;
for (CMP_INT j = 0; j<i; j++) {
delete m_encoder[j];
m_encoder[j] = NULL;
}
return CE_Unknown;
}
#ifdef USE_DBGTRACE
//DbgTrace(("Encoder[%d]:ModeMask %X, Quality %f", i, m_ModeMask, m_Quality));
#endif
}
// Create the encoding threads
for (i = 0; i<m_NumEncodingThreads; i++) {
// Initialize thread parameters.
g_EncodeParameterStorage[i].encoder = m_encoder[i];
// Inform the thread that at the moment it doesn't have any work to do
// but that it should wait for some and not exit
g_EncodeParameterStorage[i].run = FALSE;
g_EncodeParameterStorage[i].exit = FALSE;
m_EncodingThreadHandle[i] = std::thread(
ASTCThreadProcEncode,
(void*)&g_EncodeParameterStorage[i]
);
m_LiveThreads++;
}
// Create single decoder instance
m_decoder = new ASTCBlockDecoder();
if (!m_decoder) {
for (CMP_INT j = 0; j<m_NumEncodingThreads; j++) {
delete m_encoder[j];
m_encoder[j] = NULL;
}
return CE_Unknown;
}
m_LibraryInitialized = true;
}
return CE_OK;
}
CodecError CCodec_ASTC::EncodeASTCBlock(
astc_codec_image *input_image,
uint8_t *bp,
int xdim,
int ydim,
int zdim,
int x,
int y,
int z) {
if (m_Use_MultiThreading) {
CMP_WORD threadIndex;
// Loop and look for an available thread
CMP_BOOL found = FALSE;
threadIndex = m_LastThread;
while (found == FALSE) {
if (g_EncodeParameterStorage == NULL)
return CE_Unknown;
if (g_EncodeParameterStorage[threadIndex].run == FALSE) {
found = TRUE;
break;
}
// Increment and wrap the thread index
threadIndex++;
if (threadIndex == m_LiveThreads) {
threadIndex = 0;
}
}
m_LastThread = threadIndex;
g_EncodeParameterStorage[threadIndex].input_image = input_image;
g_EncodeParameterStorage[threadIndex].bp = bp;
g_EncodeParameterStorage[threadIndex].xdim = xdim;
g_EncodeParameterStorage[threadIndex].ydim = ydim;
g_EncodeParameterStorage[threadIndex].zdim = zdim;
g_EncodeParameterStorage[threadIndex].x = x;
g_EncodeParameterStorage[threadIndex].y = y;
g_EncodeParameterStorage[threadIndex].z = z;
// Tell the thread to start working
g_EncodeParameterStorage[threadIndex].run = TRUE;
} else {
g_ASTCEncode.m_xdim = xdim;
g_ASTCEncode.m_ydim = ydim;
g_ASTCEncode.m_zdim = zdim;
m_encoder[0]->CompressBlock_kernel(
(ASTC_Encoder::astc_codec_image *)input_image,
bp,
x,
y,
z,
&g_ASTCEncode);
}
return CE_OK;
}
CodecError CCodec_ASTC::FinishASTCEncoding(void) {
if (!m_LibraryInitialized) {
return CE_Unknown;
}
if (!g_EncodeParameterStorage) {
return CE_Unknown;
}
if (m_Use_MultiThreading) {
// Wait for all the live threads to finish any current work
for (CMP_DWORD i = 0; i < m_LiveThreads; i++) {
// If a thread is in the running state then we need to wait for it to finish
// its work from the producer
while (g_EncodeParameterStorage[i].run == TRUE) {
std::this_thread::sleep_for(std::chrono::milliseconds(1));
}
}
}
return CE_OK;
}
struct encode_astc_image_info {
int xdim;
int ydim;
int zdim;
const error_weighting_params *ewp;
uint8_t *buffer;
int thread_id;
int threadcount;
astc_decode_mode decode_mode;
swizzlepattern swz_encode;
volatile int *counters;
volatile int *threads_completed;
const astc_codec_image *input_image;
Codec_Feedback_Proc pFeedbackProc;
CMP_DWORD_PTR pUser1;
CMP_DWORD_PTR pUser2;
};
#define USE_ARM_CODE
CodecError CCodec_ASTC::Compress(CCodecBuffer& bufferIn, CCodecBuffer& bufferOut, Codec_Feedback_Proc pFeedbackProc, CMP_DWORD_PTR pUser1, CMP_DWORD_PTR pUser2) {
m_AbortRequested = false;
int xsize = bufferIn.GetWidth();
int ysize = bufferIn.GetHeight();
int zsize = 1; //todo: add depth to support 3d textures
m_xdim = bufferOut.GetBlockWidth();
m_ydim = bufferOut.GetBlockHeight();
m_zdim = 1;
CodecError err = InitializeASTCLibrary();
if (err != CE_OK) return err;
#ifdef ASTC_COMPDEBUGGER
CompViewerClient g_CompClient;
if (g_CompClient.connect()) {
#ifdef USE_DBGTRACE
DbgTrace(("-------> Remote Server Connected\n"));
#endif
}
#endif
#ifdef USE_DBGTRACE
DbgTrace(("IN : BufferType %d ChannelCount %d ChannelDepth %d", bufferIn.GetBufferType(), bufferIn.GetChannelCount(), bufferIn.GetChannelDepth()));
DbgTrace((" : Height %d Width %d Pitch %d isFloat %d", bufferIn.GetHeight(), bufferIn.GetWidth(), bufferIn.GetWidth(), bufferIn.IsFloat()));
DbgTrace(("OUT: BufferType %d ChannelCount %d ChannelDepth %d", bufferOut.GetBufferType(), bufferOut.GetChannelCount(), bufferOut.GetChannelDepth()));
DbgTrace((" : Height %d Width %d Pitch %d isFloat %d", bufferOut.GetHeight(), bufferOut.GetWidth(), bufferOut.GetWidth(), bufferOut.IsFloat()));
#endif
int bitness = 0; //todo: replace astc_codec_image with bufferIn and rewrite fetch_imageblock()
switch (bufferIn.GetBufferType()) {
case CBT_BGRA8888:
case CBT_ARGB8888:
case CBT_RGBA8888:
case CBT_RGB888:
case CBT_RG8:
case CBT_R8:
bitness = 8;
break;
case CBT_RGBA8888S:
case CBT_RGB888S:
case CBT_RG8S:
case CBT_R8S:
bitness = 8;
break;
case CBT_RGBA2101010:
break;
case CBT_RGBA16:
case CBT_RG16:
case CBT_R16:
break;
case CBT_RGBA32:
case CBT_RG32:
case CBT_R32:
break;
case CBT_RGBA16F:
case CBT_RG16F:
case CBT_R16F:
break;
case CBT_RGBA32F:
case CBT_RG32F:
case CBT_R32F:
break;
default:
break;
}
if (bitness != 8)
assert("Unsupported type of input buffer");
astc_codec_image_cpu *input_image = allocate_image_cpu(bitness, xsize, ysize, zsize, 0);
if (!input_image)
assert("Unable to allocate image buffer");
// Loop through the original input image and setup compression threads for each
// block to encode we will load the buffer to pass to ASTC code as 8 bit 4x4 blocks
// the fill in source image. ASTC code will then use the adaptive sizes for process on the input
BYTE *pData = bufferIn.GetData();
int ii = 0;
for (int y = 0; y < ysize; y++) {
for (int x = 0; x < xsize; x++) {
input_image->imagedata8[0][y][4*x ] = pData[ii]; // Red
ii++;
input_image->imagedata8[0][y][4 * x + 1] = pData[ii]; // Green
ii++;
input_image->imagedata8[0][y][4 * x + 2] = pData[ii]; // Blue
ii++;
input_image->imagedata8[0][y][4 * x + 3] = pData[ii]; // Alpha
ii++;
}
}
m_NumEncodingThreads = MIN(m_NumThreads, (decltype(m_NumThreads))MAX_ASTC_THREADS);
if (m_NumEncodingThreads == 0) {
m_NumEncodingThreads = CMP_GetNumberOfProcessors();
if (m_NumEncodingThreads <= 2)
m_NumEncodingThreads = 8; // fallback to a default!
if (m_NumEncodingThreads > 128)
m_NumEncodingThreads = 128;
}
// Common ARM and AMD Code
CodecError result = CE_OK;
int xdim = m_xdim;
int ydim = m_ydim;
int zdim = m_zdim;
uint8_t *bufferOutput = bufferOut.GetData();
// Common ARM and Compressonator Code
int x, y, z, i;
int xblocks = (xsize + xdim - 1) / xdim;
int yblocks = (ysize + ydim - 1) / ydim;
int zblocks = (zsize + zdim - 1) / zdim;
float TotalBlocks = (float) (yblocks * xblocks);
int processingBlock = 0;
for (z = 0; z < zblocks; z++) {
for (y = 0; y < yblocks; y++) {
for (x = 0; x < xblocks; x++) {
int offset = ((z * yblocks + y) * xblocks + x) * 16;
uint8_t *bp = bufferOutput + offset;
EncodeASTCBlock((astc_codec_image *)input_image, bp, xdim, ydim, zdim, x * xdim, y * ydim, z * zdim);
processingBlock++;
}
if (pFeedbackProc) {
float fProgress = 100.f * ((float)(processingBlock) / TotalBlocks);
if (pFeedbackProc(fProgress, pUser1, pUser2)) {
result = CE_Aborted;
break;
}
}
}
}
CodecError EncodeResult = FinishASTCEncoding();
if (result != CE_Aborted)
result = EncodeResult;
destroy_image_cpu(input_image);
#ifdef ASTC_COMPDEBUGGER
g_CompClient.disconnect();
#endif
return result;
}
// notes:
// Slow CPU based decompression : Should look into also using HW based decompression with this interface
//
CodecError CCodec_ASTC::Decompress(CCodecBuffer& bufferIn, CCodecBuffer& bufferOut, Codec_Feedback_Proc pFeedbackProc, CMP_DWORD_PTR pUser1, CMP_DWORD_PTR pUser2) {
m_xdim = bufferIn.GetBlockWidth();
m_ydim = bufferIn.GetBlockHeight();
m_zdim = 1;
CodecError err = InitializeASTCLibrary();
if (err != CE_OK) return err;
// Our Compressed data Blocks are always 128 bit long (4x4 blocks)
const CMP_DWORD imageWidth = bufferIn.GetWidth();
const CMP_DWORD imageHeight = bufferIn.GetHeight();
const CMP_DWORD imageDepth = 1;
const BYTE bitness = 8;
const CMP_DWORD CompBlockX = bufferIn.GetBlockWidth();
const CMP_DWORD CompBlockY = bufferIn.GetBlockHeight();
CMP_BYTE Block_Width = bufferIn.GetBlockWidth();
CMP_BYTE Block_Height = bufferIn.GetBlockHeight();
const CMP_DWORD dwBlocksX = ((bufferIn.GetWidth() + (CompBlockX - 1)) / CompBlockX);
const CMP_DWORD dwBlocksY = ((bufferIn.GetHeight()+ (CompBlockY - 1)) / CompBlockY);
const CMP_DWORD dwBlocksZ = 1;
const CMP_DWORD dwBufferInDepth = 1;
// Override the current input buffer Pitch size (Since it will be set according to the Compressed Block Sizes
// and not to the Compressed Codec data which is for ASTC 16 Bytes per block x Number of blocks per row
bufferIn.SetPitch(16 * dwBlocksX);
// Output data size Pitch
CMP_DWORD dwPitch = bufferOut.GetPitch();
// Output Buffer
BYTE *pDataOut = bufferOut.GetData();
const CMP_DWORD dwBlocksXY = dwBlocksX*dwBlocksY;
for(CMP_DWORD cmpRowY = 0; cmpRowY < dwBlocksY; cmpRowY++) { // Compressed images row = height
for(CMP_DWORD cmpColX = 0; cmpColX < dwBlocksX; cmpColX++) { // Compressed images Col = width
union FBLOCKS {
float decodedBlock[144][4]; // max 12x12 block size
float destBlock[576]; // max 12x12x4
} DecData;
union BBLOCKS {
CMP_DWORD compressedBlock[4];
BYTE out[16];
BYTE in[16];
} CompData;
bufferIn.ReadBlock(cmpColX*4, cmpRowY*4, CompData.compressedBlock, 4);
// Encode to the appropriate location in the compressed image
m_decoder->DecompressBlock(Block_Width, Block_Height, bitness, DecData.decodedBlock,CompData.in);
// Now that we have a decoded block lets copy that data over to the target image buffer
CMP_DWORD outCol = cmpColX*Block_Width;
CMP_DWORD outRow = cmpRowY*Block_Height;
CMP_DWORD outImgRow = outRow;
CMP_DWORD outImgCol = outCol;
for (int row = 0; row < Block_Height; row++) {
CMP_DWORD nextRowCol = (outRow+row)*dwPitch + (outCol * 4);
CMP_BYTE* pData = (CMP_BYTE*)(pDataOut + nextRowCol);
if ((outImgRow + row) < imageHeight) {
outImgCol = outCol;
for (int col = 0; col < Block_Width; col++) {
CMP_DWORD w = outImgCol + col;
if (w < imageWidth) {
int index = row*Block_Width + col;
*pData++ = (CMP_BYTE)DecData.decodedBlock[index][BC_COMP_RED];
*pData++ = (CMP_BYTE)DecData.decodedBlock[index][BC_COMP_GREEN];
*pData++ = (CMP_BYTE)DecData.decodedBlock[index][BC_COMP_BLUE];
*pData++ = (CMP_BYTE)DecData.decodedBlock[index][BC_COMP_ALPHA];
} else break;
}
}
}
}
if (pFeedbackProc) {
float fProgress = 100.f * (cmpRowY * dwBlocksX) / dwBlocksXY;
if (pFeedbackProc(fProgress, pUser1, pUser2)) {
return CE_Aborted;
}
}
}
return CE_OK;
}