317 lines
10 KiB
C++

//=====================================================================
// Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//
/// \file BC1.cpp
//
//=====================================================================
#include "bcn.h"
#include "bc1.h"
#ifdef USE_TIMERS
#include "query_timer.h"
#endif
//#define BUILD_AS_PLUGIN_DLL
#ifdef USE_CONVECTION_KERNELS
#pragma comment(lib, "convectionkernels.lib")
#include "convectionkernels.h"
using namespace cvtt;
#endif
#ifdef BUILD_AS_PLUGIN_DLL
DECLARE_PLUGIN(Plugin_BC1)
SET_PLUGIN_TYPE("ENCODER")
SET_PLUGIN_NAME("BC1")
#else
void *make_Plugin_BC1() {
return new Plugin_BC1;
}
#endif
CMP_BC15Options g_BC1Encode;
#define GPU_OCL_BC1_COMPUTEFILE "./plugins/Compute/BC1_Encode_kernel.cpp"
#define GPU_DXC_BC1_COMPUTEFILE "./plugins/Compute/BC1_Encode_kernel.hlsl"
void CompressBlockBC1_Internal(
const CMP_Vec4uc srcBlockTemp[16],
CMP_GLOBAL CGU_UINT32 compressedBlock[2],
CMP_GLOBAL CMP_BC15Options *BC15options);
Plugin_BC1::Plugin_BC1() {
m_KernelOptions = NULL;
}
Plugin_BC1::~Plugin_BC1() {
}
int Plugin_BC1::TC_PluginSetSharedIO(void* Shared) {
CMips = reinterpret_cast<CMIPS *> (Shared);
return 0;
}
int Plugin_BC1::TC_PluginGetVersion(TC_PluginVersion* pPluginVersion) {
pPluginVersion->guid = g_GUID;
pPluginVersion->dwAPIVersionMajor = TC_API_VERSION_MAJOR;
pPluginVersion->dwAPIVersionMinor = TC_API_VERSION_MINOR;
pPluginVersion->dwPluginVersionMajor = TC_PLUGIN_VERSION_MAJOR;
pPluginVersion->dwPluginVersionMinor = TC_PLUGIN_VERSION_MINOR;
return 0;
}
void* Plugin_BC1::TC_Create() {
return (void*) new BC1_EncodeClass();
}
void Plugin_BC1::TC_Destroy(void* codec) {
if (codec != nullptr) {
BC1_EncodeClass* pcodec;
pcodec = reinterpret_cast<BC1_EncodeClass*>(codec);
delete pcodec;
codec = nullptr;
}
}
char *Plugin_BC1::TC_ComputeSourceFile(CGU_UINT32 Compute_type) {
switch (Compute_type) {
case CMP_Compute_type::CMP_GPU_OCL:
return(GPU_OCL_BC1_COMPUTEFILE);
case CMP_Compute_type::CMP_GPU_DXC:
return(GPU_DXC_BC1_COMPUTEFILE);
}
return ("");
}
void Plugin_BC1::TC_Start() {};
void Plugin_BC1::TC_End() {};
int Plugin_BC1::TC_Init(void *kernel_options) {
if (!kernel_options) return (-1);
m_KernelOptions = reinterpret_cast<KernelOptions *>(kernel_options);
memset(&g_BC1Encode, 0, sizeof(CMP_BC15Options));
SetDefaultBC15Options(&g_BC1Encode);
SetUserBC15EncoderOptions(&g_BC1Encode,m_KernelOptions);
g_BC1Encode.m_src_width = m_KernelOptions->width;
g_BC1Encode.m_src_height = m_KernelOptions->height;
g_BC1Encode.m_fquality = m_KernelOptions->fquality;
m_KernelOptions->data = &g_BC1Encode;
m_KernelOptions->size = sizeof(g_BC1Encode);
return(0);
}
int BC1_EncodeClass::DecompressBlock(void *cmpin, void *srcout) {
if (srcout == NULL) return -1;
if (cmpin == NULL) return -1;
return 0;
}
int BC1_EncodeClass::DecompressBlock(CGU_UINT32 xBlock, CGU_UINT32 yBlock, void *cmpin, void *srcout) {
if ((xBlock != 0) && (yBlock != 0)) return -1;
if (srcout == NULL) return -1;
if (cmpin == NULL) return -1;
return 0;
}
int BC1_EncodeClass::CompressBlock(void *srcin, void *cmpout, void *blockoptions) {
CMP_BC15Options *BC1Encode = reinterpret_cast<CMP_BC15Options *>(blockoptions);
if (BC1Encode == NULL) return -1;
if (srcin == NULL) return -1;
if (cmpout == NULL) return -1;
return 0;
}
int BC1_EncodeClass::CompressTexture(void *srcin, void *cmpout,void *processOptions) {
(processOptions);
if (srcin == NULL) return -1;
if (cmpout == NULL) return -1;
MipSet* pSourceTexture = reinterpret_cast<MipSet *>(srcin);
CMP_DWORD src_height = pSourceTexture->dwHeight;
CMP_DWORD src_width = pSourceTexture->dwWidth;
CMP_DWORD width_in_blocks = src_width / 4;
CMP_DWORD height_in_blocks = src_height / 4;
int stride = src_width * BYTEPP;
int blockOffset = 0;
int srcidx;
#ifdef USE_CONVECTION_KERNELS
PixelBlockU8 pBlocks[8] = {0};
CMP_BYTE results[64]; // 8 x 8
#endif
for (CMP_DWORD by = 0; by < height_in_blocks; by++) {
for (CMP_DWORD bx = 0; bx < width_in_blocks; bx++) {
int srcOffset = (bx*BlockX*BYTEPP) + (by*stride*BYTEPP);
// Copy src block into Texel
blockOffset = 0;
for (CMP_DWORD i = 0; i < BlockX; i++) {
srcidx = i*stride;
for (CMP_DWORD j = 0; j < BlockY; j++) {
unsigned char R,G,B,A;
R = (pSourceTexture->pData[srcOffset + srcidx++]);
G = (pSourceTexture->pData[srcOffset + srcidx++]);
B = (pSourceTexture->pData[srcOffset + srcidx++]);
A = (pSourceTexture->pData[srcOffset + srcidx++]);
//printf("[%2d,%2d] [%2d][%2d] %2d,%2d,%2d,%2d\n",bx,by,numBlocks,blockOffset,R,G,B,A);
#ifdef USE_CONVECTION_KERNELS
pBlocks[numBlocks].m_pixels[blockOffset][0] = R;
pBlocks[numBlocks].m_pixels[blockOffset][1] = G;
pBlocks[numBlocks].m_pixels[blockOffset][2] = B;
pBlocks[numBlocks].m_pixels[blockOffset][3] = A;
#endif
blockOffset++;
}
}
#ifdef USE_CONVECTION_KERNELS
numBlocks++;
numBlocksProcessed++;
// we are at 8 blocks
if ((numBlocks == 8)||( numBlocksProcessed >= maxBlocks)) {
cvtt::Options options;
options.flags = Flags::Default;
Kernels::EncodeBC1(results,pBlocks,options);
// save results
for (int i=0; i< numBlocks; i++) {
for (int j=0; j<8; j++) {
pdest[j] = results[i*8+j];
}
pdest += 8; // next comp data block to store results
}
numBlocks = 0;
}
#endif
}
}
return 0;
}
int BC1_EncodeClass::DecompressTexture(void *cmpin, void *srcout,void *processOptions) {
// ToDo: Implement texture level decompression
if (processOptions == NULL) return -1;
if (srcout == NULL) return -1;
if (cmpin == NULL) return -1;
// MipSet* pSourceTexture = reinterpret_cast<MipSet *>(cmpin);
// MipSet* pDestTexture = reinterpret_cast<MipSet *>(srcout);
return 0;
}
int BC1_EncodeClass::CompressBlock(CGU_UINT32 xBlock, CGU_UINT32 yBlock, void *srcin, void *cmpout) {
CMP_Vec4uc *ImageSource = (CMP_Vec4uc *)srcin;
CGU_UINT8 *compressedBlocks = (CGU_UINT8 *)cmpout;
// if the srcWidth and srcHeight is not set try using the alternate user setting
// that was set by user for block level codec access!
if ((m_srcHeight == 0)||(m_srcWidth==0)) {
return (-1);
}
int width_in_blocks = (m_srcWidth + 3) >> 2;
CGU_UINT32 srcStride = m_srcWidth*4;
CGU_UINT32 dstStride = width_in_blocks * BC1CompBlockSize;
// user override!
if (m_srcStride > 0) srcStride = m_srcStride;
if (m_dstStride > 0) dstStride = m_dstStride;
CGU_UINT32 destI = (xBlock * BC1CompBlockSize) + (yBlock*dstStride);
int srcindex = (yBlock * srcStride) + (4*xBlock);
int blkindex = 0;
CMP_Vec4uc srcData[16];
//Check if it is a complete 4X4 block
if (((xBlock + 1)*BlockX <= m_srcWidth) && ((yBlock + 1)*BlockY <= m_srcHeight)) {
for (int j = 0; j < 4; j++) {
for (int i = 0; i < 4; i++) {
memcpy(&srcData[blkindex++], &ImageSource[srcindex++], sizeof(CMP_Vec4uc));
}
srcindex += (m_srcWidth - 4);
}
} else {
CMP_DWORD dwWidth = CMP_MIN(static_cast<unsigned int>(BlockX), m_srcWidth - xBlock*BlockX);
CMP_DWORD i, j, srcIndex;
//Go through line by line
for (j = 0; j < BlockY && (BlockY * yBlock + j) < m_srcHeight; j++) {
//Copy the real data
srcIndex = ((yBlock * BlockY + j) * m_srcWidth + (xBlock * BlockX));
for (i = 0; i < dwWidth; i++) {
memcpy(&srcData[j*BlockX + i], &ImageSource[srcIndex + i], sizeof(CMP_Vec4uc));
}
if (i < BlockX)
PadLine(i, BlockX, 4, (CMP_BYTE*)&srcData[j * BlockX]);
}
if (j < BlockY)
PadBlock(j, BlockX, BlockY, 4, (CMP_BYTE*)srcData);
}
#ifdef USE_TIMERS
static int init = false;
if (!init) {
query_timer::initialize();
init = true;
}
{
QUERY_PERFORMANCE("BC1");
#endif
CompressBlockBC1_Internal(srcData, (CGU_UINT32 *)&compressedBlocks[destI],&g_BC1Encode);
#ifdef USE_TIMERS
} // Query
static int sum = 0;
static int count = 0;
if (query_timer::m_elapsed_count < 30) {
sum += (int)query_timer::m_elapsed_count;
count++;
printf("BC1 %f\n",(CGU_FLOAT)sum/(CGU_FLOAT)count);
}
#endif
return (0);
}