//=============================================================================== // Copyright (c) 2019 Advanced Micro Devices, Inc. All rights reserved. //=============================================================================== // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and / or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions : // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. // // // File Name: Codec_APC.cpp // Description: implementation of the CCodec_APC class // ////////////////////////////////////////////////////////////////////////////// #pragma warning(disable:4100) // Ignore warnings of unreferenced formal parameters #ifdef _WIN32 #include "common.h" #ifdef USE_APC #include "codec_apc.h" #include #include "debug.h" #ifdef APC_COMPDEBUGGER #include "compclient.h" #endif //====================================================================================== // #define USE_PRINTF // #define USE_NOMULTITHREADING #ifdef USE_FILEIO #include FILE * gt_File = NULL; int gt_blockcount = 0; int gt_total_MSE = 0; #endif // // Thread procedure for encoding a block // // The thread stays alive, and expects blocks to be pushed to it by a producer // process that signals it when new work is available. When the producer is finished // it should set the exit flag in the parameters to allow the tread to quit // unsigned int _stdcall APCThreadProcEncode(void* param) { APCEncodeThreadParam *tp = (APCEncodeThreadParam*)param; while(tp->exit == FALSE) { if(tp->run == TRUE) { tp->encoder->CompressBlock(tp->in, tp->out); tp->run = false; } std::this_thread::sleep_for(std::chrono::milliseconds(0)); } return 0; } static APCEncodeThreadParam *g_EncodeParameterStorage = NULL; ////////////////////////////////////////////////////////////////////////////// // Construction/Destruction ////////////////////////////////////////////////////////////////////////////// CCodec_APC::CCodec_APC() : CCodec_DXTC(CT_APC) { m_LibraryInitialized = false; m_Use_MultiThreading = true; m_NumThreads = 8; m_NumEncodingThreads = m_NumThreads; m_EncodingThreadHandle = NULL; m_LiveThreads = 0; m_LastThread = 0; m_xdim = 8; m_ydim = 8; m_zdim = 1; m_quality = 0.05f; } bool CCodec_APC::SetParameter(const CMP_CHAR* pszParamName, CMP_CHAR* sValue) { if (sValue == NULL) return false; if(strcmp(pszParamName, CodecParameters::NumThreads) == 0) { m_NumThreads = (CMP_BYTE) std::stoi(sValue) & 0xFF; m_Use_MultiThreading = m_NumThreads > 1; } else if (strcmp(pszParamName, "Quality") == 0) { m_quality = std::stof(sValue); if ((m_quality < 0) || (m_quality > 1.0)) { return false; } } else return CCodec_DXTC::SetParameter(pszParamName, sValue); return true; } bool CCodec_APC::SetParameter(const CMP_CHAR* pszParamName, CMP_DWORD dwValue) { if(strcmp(pszParamName, CodecParameters::NumThreads) == 0) { m_NumThreads = (CMP_BYTE) dwValue; m_Use_MultiThreading = m_NumThreads > 1; } else return CCodec_DXTC::SetParameter(pszParamName, dwValue); return true; } bool CCodec_APC::SetParameter(const CMP_CHAR* pszParamName, CODECFLOAT fValue) { if (strcmp(pszParamName, "Quality") == 0) m_quality = fValue; else return CCodec_DXTC::SetParameter(pszParamName, fValue); return true; } CCodec_APC::~CCodec_APC() { if (m_LibraryInitialized) { if (m_Use_MultiThreading) { // Tell all the live threads that they can exit when they have finished any current work for(int i=0; i < m_LiveThreads; i++) { // If a thread is in the running state then we need to wait for it to finish // any queued work from the producer before we can tell it to exit. // // If we don't wait then there is a race condition here where we have // told the thread to run but it hasn't yet been scheduled - if we set // the exit flag before it runs then its block will not be processed. #pragma warning(push) #pragma warning(disable:4127) //warning C4127: conditional expression is constant while (1) { if (m_EncodeParameterStorage[i].run != TRUE) { break; } } #pragma warning(pop) // Signal to the thread that it can exit m_EncodeParameterStorage[i].exit = TRUE; } // Now wait for all threads to have exited if (m_LiveThreads > 0) { for (CMP_DWORD dwThread = 0; dwThread < m_LiveThreads; dwThread++) { std::thread& curThread = m_EncodingThreadHandle[dwThread]; curThread.join(); } } for (unsigned int i = 0; i < m_LiveThreads; i++) { std::thread& curThread = m_EncodingThreadHandle[i]; curThread = std::thread(); } } // MultiThreading m_EncodingThreadHandle = NULL; if (m_EncodeParameterStorage) delete[] m_EncodeParameterStorage; m_EncodeParameterStorage = NULL; for (int i = 0; i < m_NumEncodingThreads; i++) { if (m_encoder[i]) { delete m_encoder[i]; m_encoder[i] = NULL; } } if (m_decoder) { delete m_decoder; m_decoder = NULL; } m_LibraryInitialized = false; } } CodecError CCodec_APC::InitializeAPCLibrary() { if (!m_LibraryInitialized) { for(CMP_DWORD i=0; i < MAX_GT_THREADS; i++) { m_encoder[i] = NULL; } // Create threaded encoder instances m_LiveThreads = 0; m_LastThread = 0; m_NumEncodingThreads = min(m_NumThreads, MAX_GT_THREADS); if (m_NumEncodingThreads == 0) m_NumEncodingThreads = 1; m_Use_MultiThreading = m_NumEncodingThreads > 1; m_EncodeParameterStorage = new APCEncodeThreadParam[m_NumEncodingThreads]; if (!m_EncodeParameterStorage) { return CE_Unknown; } m_EncodingThreadHandle = new std::thread[m_NumEncodingThreads]; if (!m_EncodingThreadHandle) { delete[] m_EncodeParameterStorage; m_EncodeParameterStorage = NULL; return CE_Unknown; } CMP_DWORD i; for(i=0; i < m_NumEncodingThreads; i++) { // Create single encoder instance m_encoder[i] = new APCBlockEncoder(m_quality); // Cleanup if problem! if (!m_encoder[i]) { delete[] m_EncodeParameterStorage; m_EncodeParameterStorage = NULL; delete[] m_EncodingThreadHandle; m_EncodingThreadHandle = NULL; for (CMP_DWORD j = 0; jCompressBlock(m_EncodeParameterStorage[0].in,m_EncodeParameterStorage[0].out); } return CE_OK; } CodecError CCodec_APC::FinishAPCEncoding(void) { if(!m_LibraryInitialized) { return CE_Unknown; } if (!m_EncodeParameterStorage) { return CE_Unknown; } if (m_Use_MultiThreading) { // Wait for all the live threads to finish any current work for(CMP_DWORD i=0; i < m_LiveThreads; i++) { // If a thread is in the running state then we need to wait for it to finish // its work from the producer while (m_EncodeParameterStorage[i].run == TRUE) { std::this_thread::sleep_for(std::chrono::milliseconds(1)); } } } return CE_OK; } CodecError CCodec_APC::Compress(CCodecBuffer& bufferIn, CCodecBuffer& bufferOut, Codec_Feedback_Proc pFeedbackProc, CMP_DWORD_PTR pUser1, CMP_DWORD_PTR pUser2) { CodecError err = InitializeAPCLibrary(); if (err != CE_OK) return err; // Source image size int xsize = bufferIn.GetWidth(); int ysize = bufferIn.GetHeight(); int zsize = 1; //todo: add depth to support 3d textures // Block sizes to partition the source data into for compression m_xdim = bufferOut.GetBlockWidth(); m_ydim = bufferOut.GetBlockHeight(); m_zdim = 1; CodecError result = CE_OK; int xdim = m_xdim; int ydim = m_ydim; //g_APCEncode.m_xdim = m_xdim; //g_APCEncode.m_ydim = m_ydim; //g_APCEncode.m_zdim = m_zdim; uint8_t *bufferOutput = bufferOut.GetData(); int x, y, z; int xblocks = (xsize + m_xdim - 1) / m_xdim; int yblocks = (ysize + m_ydim - 1) / m_ydim; int zblocks = (zsize + m_zdim - 1) / m_zdim; int offset; int processingBlock = 0; float TotalBlocks = (float)(yblocks * xblocks); CMP_BYTE *srcBlock = (CMP_BYTE *)malloc(xdim*ydim * 4); for (z = 0; z < zblocks; z++) { for (y = 0; y < yblocks; y++) { for (x = 0; x < xblocks; x++) { processingBlock++; // Output block size for GTC is fixed at 16 bytes offset = ((z * yblocks + y) * xblocks + x) * 16; uint8_t *bp = bufferOutput + offset; memset(srcBlock, 0, sizeof(srcBlock)); bufferIn.ReadBlockRGBA(x * m_xdim, y * m_ydim, (CMP_BYTE)m_xdim, (CMP_BYTE)m_ydim, srcBlock); EncodeAPCBlock(srcBlock, bp); } if (pFeedbackProc) { if ((processingBlock % 10) == 0) { float fProgress = 100.f * ((float)(processingBlock) / TotalBlocks); if (pFeedbackProc(fProgress, pUser1, pUser2)) { result = CE_Aborted; break; } } } } } free(srcBlock); CodecError EncodeResult = FinishAPCEncoding(); if (result != CE_Aborted) result = EncodeResult; return result; } #ifdef USE_FILEIO_DECODE FILE * gt_File_Decode = NULL; char ModesUsed[CMP_MAXGTMODES+1]; #endif CodecError CCodec_APC::Decompress(CCodecBuffer& bufferIn, CCodecBuffer& bufferOut, Codec_Feedback_Proc pFeedbackProc, CMP_DWORD_PTR pUser1, CMP_DWORD_PTR pUser2) { CodecError err = InitializeAPCLibrary(); if (err != CE_OK) return err; m_xdim = bufferIn.GetBlockWidth(); m_ydim = bufferIn.GetBlockHeight(); m_zdim = 1; if (m_xdim == 0) m_xdim = 4; if (m_ydim == 0) m_ydim = 4; // Our Compressed data Blocks are always 128 bit long (4x4 blocks) const CMP_DWORD imageWidth = bufferIn.GetWidth(); const CMP_DWORD imageHeight = bufferIn.GetHeight(); const CMP_DWORD CompBlockX = m_xdim; const CMP_DWORD CompBlockY = m_ydim; CMP_BYTE Block_Width = (CMP_BYTE)m_xdim; CMP_BYTE Block_Height = (CMP_BYTE)m_ydim; const CMP_DWORD dwBlocksX = ((bufferIn.GetWidth() + (CompBlockX - 1)) / CompBlockX); const CMP_DWORD dwBlocksY = ((bufferIn.GetHeight() + (CompBlockY - 1)) / CompBlockY); const CMP_DWORD dwBlocksZ = 1; const CMP_DWORD dwBufferInDepth = 1; // Override the current input buffer Pitch size (Since it will be set according to the Compressed Block Sizes // and not to the Compressed Codec data which is for APC 16 Bytes per block x Number of blocks per row bufferIn.SetPitch(16 * dwBlocksX); // Output data size Pitch CMP_DWORD dwPitch = bufferOut.GetPitch(); // Output Buffer CMP_BYTE *pDataOut = bufferOut.GetData(); const CMP_DWORD dwBlocksXY = dwBlocksX*dwBlocksY; for (CMP_DWORD cmpRowY = 0; cmpRowY < dwBlocksY; cmpRowY++) { // Compressed images row = height for (CMP_DWORD cmpColX = 0; cmpColX < dwBlocksX; cmpColX++) { // Compressed images Col = width union FBLOCKS { CMP_BYTE decodedBlock[144][4]; // max 12x12 block size CMP_BYTE destBlock[576]; // max 12x12x4 CMP_DWORD destBlockDW[BLOCK_SIZE_4X4]; } DecData; APC6_BLOCK compressedBlock; int bufferSize= sizeof(compressedBlock) / sizeof(unsigned long); bufferIn.ReadBlock(cmpColX * 4, cmpRowY * 4, compressedBlock.dwRawData, bufferSize); // Encode to the appropriate location in the compressed image m_decoder->DecompressBlock(DecData.decodedBlock, (CMP_BYTE *)compressedBlock.dwRawData); // Now that we have a decoded block lets copy that data over to the target image buffer CMP_DWORD outCol = cmpColX*Block_Width; CMP_DWORD outRow = cmpRowY*Block_Height; CMP_DWORD outImgRow = outRow; CMP_DWORD outImgCol = outCol; for (int row = 0; row < Block_Height; row++) { CMP_DWORD nextRowCol = (outRow + row)*dwPitch + (outCol * 4); CMP_BYTE* pData = (CMP_BYTE*)(pDataOut + nextRowCol); if ((outImgRow + row) < imageHeight) { outImgCol = outCol; for (int col = 0; col < Block_Width; col++) { CMP_DWORD w = outImgCol + col; if (w < imageWidth) { int index = row*Block_Width + col; *pData++ = (CMP_BYTE)DecData.decodedBlock[index][BC_COMP_RED]; *pData++ = (CMP_BYTE)DecData.decodedBlock[index][BC_COMP_GREEN]; *pData++ = (CMP_BYTE)DecData.decodedBlock[index][BC_COMP_BLUE]; *pData++ = (CMP_BYTE)DecData.decodedBlock[index][BC_COMP_ALPHA]; } else break; } } } } if (pFeedbackProc) { float fProgress = 100.f * (cmpRowY * dwBlocksX) / dwBlocksXY; if (pFeedbackProc(fProgress, pUser1, pUser2)) { return CE_Aborted; } } } return CE_OK; } // Not implemented CodecError CCodec_APC::Compress_Fast(CCodecBuffer& bufferIn, CCodecBuffer& bufferOut, Codec_Feedback_Proc pFeedbackProc, CMP_DWORD_PTR pUser1, CMP_DWORD_PTR pUser2) { return CE_OK; } // Not implemented CodecError CCodec_APC::Compress_SuperFast(CCodecBuffer& bufferIn, CCodecBuffer& bufferOut, Codec_Feedback_Proc pFeedbackProc, CMP_DWORD_PTR pUser1, CMP_DWORD_PTR pUser2) { return CE_OK; } #endif #endif