3595 lines
128 KiB
C++
3595 lines
128 KiB
C++
/*----------------------------------------------------------------------------*/
|
|
/**
|
|
* This confidential and proprietary software may be used only as
|
|
* authorised by a licensing agreement from ARM Limited
|
|
* (C) COPYRIGHT 2011-2012 ARM Limited
|
|
* ALL RIGHTS RESERVED
|
|
*
|
|
* The entire notice above must be reproduced on all authorised
|
|
* copies and copies may only be made to the extent permitted
|
|
* by a licensing agreement from ARM Limited.
|
|
*
|
|
*/
|
|
/*----------------------------------------------------------------------------*/
|
|
//=====================================================================
|
|
// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved
|
|
//=====================================================================
|
|
|
|
#include <iostream>
|
|
#include <fstream>
|
|
#include <cstdlib>
|
|
|
|
#include "astc_host.h"
|
|
#include "astc_encode_kernel.h"
|
|
#include "compressonator.h"
|
|
|
|
|
|
//================================= ASTC CPU HOST CODE ===========================================
|
|
|
|
namespace ASTC_Encoder {
|
|
|
|
//# this is added just incase the definition is missed when using only REPLACE_CPU_CODE
|
|
#ifdef REPLACE_CPU_CODE
|
|
#ifndef USE_HOST_CALLS
|
|
#define USE_HOST_CALLS
|
|
#endif
|
|
#endif
|
|
|
|
|
|
int compute_ise_bitcount2(int items, quantization_method quant) {
|
|
switch (quant) {
|
|
case QUANT_2:
|
|
return items;
|
|
case QUANT_3:
|
|
return (8 * items + 4) / 5;
|
|
case QUANT_4:
|
|
return 2 * items;
|
|
case QUANT_5:
|
|
return (7 * items + 2) / 3;
|
|
case QUANT_6:
|
|
return (13 * items + 4) / 5;
|
|
case QUANT_8:
|
|
return 3 * items;
|
|
case QUANT_10:
|
|
return (10 * items + 2) / 3;
|
|
case QUANT_12:
|
|
return (18 * items + 4) / 5;
|
|
case QUANT_16:
|
|
return items * 4;
|
|
case QUANT_20:
|
|
return (13 * items + 2) / 3;
|
|
case QUANT_24:
|
|
return (23 * items + 4) / 5;
|
|
case QUANT_32:
|
|
return 5 * items;
|
|
case QUANT_40:
|
|
return (16 * items + 2) / 3;
|
|
case QUANT_48:
|
|
return (28 * items + 4) / 5;
|
|
case QUANT_64:
|
|
return 6 * items;
|
|
case QUANT_80:
|
|
return (19 * items + 2) / 3;
|
|
case QUANT_96:
|
|
return (33 * items + 4) / 5;
|
|
case QUANT_128:
|
|
return 7 * items;
|
|
case QUANT_160:
|
|
return (22 * items + 2) / 3;
|
|
case QUANT_192:
|
|
return (38 * items + 4) / 5;
|
|
case QUANT_256:
|
|
return 8 * items;
|
|
default:
|
|
return 100000;
|
|
}
|
|
}
|
|
|
|
/*
|
|
float dot(float2 p, float2 q)
|
|
{
|
|
return p.x * q.x + p.y * q.y;
|
|
}
|
|
|
|
float dot(float3 p, float3 q)
|
|
{
|
|
return p.x * q.x + p.y * q.y + p.z * q.z;
|
|
}
|
|
|
|
float dot(float4 p, float4 q)
|
|
{
|
|
return p.x * q.x + p.y * q.y + p.z * q.z + p.w * q.w;
|
|
}
|
|
|
|
float3 cross(float3 p, float3 q)
|
|
{
|
|
return p.yzx * q.zxy - p.zxy * q.yzx;
|
|
}
|
|
|
|
float length(float2 p)
|
|
{
|
|
return (float)(sqrt(dot(p, p)));
|
|
}
|
|
|
|
float length(float3 p)
|
|
{
|
|
return (float)(sqrt(dot(p, p)));
|
|
}
|
|
|
|
float length(float4 p)
|
|
{
|
|
return (float)(sqrt(dot(p, p)));
|
|
}
|
|
|
|
float2 normalize(float2 p)
|
|
{
|
|
return p / length(p);
|
|
}
|
|
|
|
float3 normalize(float3 p)
|
|
{
|
|
return p / length(p);
|
|
}
|
|
|
|
float4 normalize(float4 p)
|
|
{
|
|
return p / length(p);
|
|
}
|
|
*/
|
|
|
|
#ifdef ASTC_ENABLE_3D_SUPPORT
|
|
// These functions use new () and should either be in CPU or changed to share a pre allocated pointer
|
|
void initialize_decimation_table_3d(
|
|
// dimensions of the block
|
|
int xdim, int ydim, int zdim,
|
|
// number of grid points in 3d weight grid
|
|
int x_weights, int y_weights, int z_weights, decimation_table * dt) {
|
|
int i, j;
|
|
int x, y, z;
|
|
|
|
int texels_per_block = xdim * ydim * zdim;
|
|
int weights_per_block = x_weights * y_weights * z_weights;
|
|
|
|
int weightcount_of_texel[MAX_TEXELS_PER_BLOCK];
|
|
int grid_weights_of_texel[MAX_TEXELS_PER_BLOCK][4];
|
|
int weights_of_texel[MAX_TEXELS_PER_BLOCK][4];
|
|
|
|
int texelcount_of_weight[MAX_WEIGHTS_PER_BLOCK];
|
|
int texels_of_weight[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK];
|
|
int texelweights_of_weight[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK];
|
|
|
|
for (i = 0; i < weights_per_block; i++)
|
|
texelcount_of_weight[i] = 0;
|
|
for (i = 0; i < texels_per_block; i++)
|
|
weightcount_of_texel[i] = 0;
|
|
|
|
for (z = 0; z < zdim; z++)
|
|
for (y = 0; y < ydim; y++)
|
|
for (x = 0; x < xdim; x++) {
|
|
int texel = (z * ydim + y) * xdim + x;
|
|
|
|
int x_weight = (((1024 + xdim / 2) / (xdim - 1)) * x * (x_weights - 1) + 32) >> 6;
|
|
int y_weight = (((1024 + ydim / 2) / (ydim - 1)) * y * (y_weights - 1) + 32) >> 6;
|
|
int z_weight = (((1024 + zdim / 2) / (zdim - 1)) * z * (z_weights - 1) + 32) >> 6;
|
|
|
|
int x_weight_frac = x_weight & 0xF;
|
|
int y_weight_frac = y_weight & 0xF;
|
|
int z_weight_frac = z_weight & 0xF;
|
|
int x_weight_int = x_weight >> 4;
|
|
int y_weight_int = y_weight >> 4;
|
|
int z_weight_int = z_weight >> 4;
|
|
int qweight[4];
|
|
int weight[4];
|
|
qweight[0] = (z_weight_int * y_weights + y_weight_int) * x_weights + x_weight_int;
|
|
qweight[3] = ((z_weight_int + 1) * y_weights + (y_weight_int + 1)) * x_weights + (x_weight_int + 1);
|
|
|
|
// simplex interpolation
|
|
int fs = x_weight_frac;
|
|
int ft = y_weight_frac;
|
|
int fp = z_weight_frac;
|
|
|
|
int cas = ((fs > ft) << 2) + ((ft > fp) << 1) + ((fs > fp));
|
|
int N = x_weights;
|
|
int NM = x_weights * y_weights;
|
|
|
|
int s1, s2, w0, w1, w2, w3;
|
|
switch (cas) {
|
|
case 7:
|
|
s1 = 1;
|
|
s2 = N;
|
|
w0 = 16 - fs;
|
|
w1 = fs - ft;
|
|
w2 = ft - fp;
|
|
w3 = fp;
|
|
break;
|
|
case 3:
|
|
s1 = N;
|
|
s2 = 1;
|
|
w0 = 16 - ft;
|
|
w1 = ft - fs;
|
|
w2 = fs - fp;
|
|
w3 = fp;
|
|
break;
|
|
case 5:
|
|
s1 = 1;
|
|
s2 = NM;
|
|
w0 = 16 - fs;
|
|
w1 = fs - fp;
|
|
w2 = fp - ft;
|
|
w3 = ft;
|
|
break;
|
|
case 4:
|
|
s1 = NM;
|
|
s2 = 1;
|
|
w0 = 16 - fp;
|
|
w1 = fp - fs;
|
|
w2 = fs - ft;
|
|
w3 = ft;
|
|
break;
|
|
case 2:
|
|
s1 = N;
|
|
s2 = NM;
|
|
w0 = 16 - ft;
|
|
w1 = ft - fp;
|
|
w2 = fp - fs;
|
|
w3 = fs;
|
|
break;
|
|
case 0:
|
|
s1 = NM;
|
|
s2 = N;
|
|
w0 = 16 - fp;
|
|
w1 = fp - ft;
|
|
w2 = ft - fs;
|
|
w3 = fs;
|
|
break;
|
|
|
|
default:
|
|
s1 = NM;
|
|
s2 = N;
|
|
w0 = 16 - fp;
|
|
w1 = fp - ft;
|
|
w2 = ft - fs;
|
|
w3 = fs;
|
|
break;
|
|
}
|
|
|
|
qweight[1] = qweight[0] + s1;
|
|
qweight[2] = qweight[1] + s2;
|
|
weight[0] = w0;
|
|
weight[1] = w1;
|
|
weight[2] = w2;
|
|
weight[3] = w3;
|
|
|
|
/*
|
|
for(i=0;i<4;i++) weight[i] <<= 4; */
|
|
|
|
for (i = 0; i < 4; i++)
|
|
if (weight[i] != 0) {
|
|
grid_weights_of_texel[texel][weightcount_of_texel[texel]] = qweight[i];
|
|
weights_of_texel[texel][weightcount_of_texel[texel]] = weight[i];
|
|
weightcount_of_texel[texel]++;
|
|
texels_of_weight[qweight[i]][texelcount_of_weight[qweight[i]]] = texel;
|
|
texelweights_of_weight[qweight[i]][texelcount_of_weight[qweight[i]]] = weight[i];
|
|
texelcount_of_weight[qweight[i]]++;
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < texels_per_block; i++) {
|
|
dt->texel_num_weights[i] = (ASTC_Encoder::uint8_t)weightcount_of_texel[i];
|
|
|
|
// ensure that all 4 entries are actually initialized.
|
|
// This allows a branch-free implemntation of compute_value_of_texel_flt()
|
|
for (j = 0; j < 4; j++) {
|
|
dt->texel_weights_int[i][j] = 0;
|
|
dt->texel_weights_float[i][j] = 0.0f;
|
|
dt->texel_weights[i][j] = 0;
|
|
}
|
|
|
|
for (j = 0; j < weightcount_of_texel[i]; j++) {
|
|
dt->texel_weights_int[i][j] = (ASTC_Encoder::uint8_t)weights_of_texel[i][j];
|
|
dt->texel_weights_float[i][j] = weights_of_texel[i][j] * (1.0f / TEXEL_WEIGHT_SUM);
|
|
dt->texel_weights[i][j] = (ASTC_Encoder::uint8_t)grid_weights_of_texel[i][j];
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < weights_per_block; i++) {
|
|
dt->weight_num_texels[i] = (ASTC_Encoder::uint8_t)texelcount_of_weight[i];
|
|
for (j = 0; j < texelcount_of_weight[i]; j++) {
|
|
dt->weight_texel[i][j] = (ASTC_Encoder::uint8_t)texels_of_weight[i][j];
|
|
dt->weights_int[i][j] = (ASTC_Encoder::uint8_t)texelweights_of_weight[i][j];
|
|
dt->weights_flt[i][j] = (float)texelweights_of_weight[i][j];
|
|
}
|
|
}
|
|
|
|
dt->num_texels = texels_per_block;
|
|
dt->num_weights = weights_per_block;
|
|
}
|
|
|
|
int decode_block_mode_3d(int blockmode, int *Nval, int *Mval, int *Qval, int *dual_weight_plane, int *quant_mode) {
|
|
int base_quant_mode = (blockmode >> 4) & 1;
|
|
int H = (blockmode >> 9) & 1;
|
|
int D = (blockmode >> 10) & 1;
|
|
|
|
int A = (blockmode >> 5) & 0x3;
|
|
|
|
int N = 0, M = 0, Q = 0;
|
|
|
|
if ((blockmode & 3) != 0) {
|
|
base_quant_mode |= (blockmode & 3) << 1;
|
|
int B = (blockmode >> 7) & 3;
|
|
int C = (blockmode >> 2) & 0x3;
|
|
N = A + 2;
|
|
M = B + 2;
|
|
Q = C + 2;
|
|
} else {
|
|
base_quant_mode |= ((blockmode >> 2) & 3) << 1;
|
|
if (((blockmode >> 2) & 3) == 0)
|
|
return 0;
|
|
int B = (blockmode >> 9) & 3;
|
|
if (((blockmode >> 7) & 3) != 3) {
|
|
D = 0;
|
|
H = 0;
|
|
}
|
|
switch ((blockmode >> 7) & 3) {
|
|
case 0:
|
|
N = 6;
|
|
M = B + 2;
|
|
Q = A + 2;
|
|
break;
|
|
case 1:
|
|
N = A + 2;
|
|
M = 6;
|
|
Q = B + 2;
|
|
break;
|
|
case 2:
|
|
N = A + 2;
|
|
M = B + 2;
|
|
Q = 6;
|
|
break;
|
|
case 3:
|
|
N = 2;
|
|
M = 2;
|
|
Q = 2;
|
|
switch ((blockmode >> 5) & 3) {
|
|
case 0:
|
|
N = 6;
|
|
break;
|
|
case 1:
|
|
M = 6;
|
|
break;
|
|
case 2:
|
|
Q = 6;
|
|
break;
|
|
case 3:
|
|
return 0;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
int weight_count = N * M * Q * (D + 1);
|
|
int qmode = (base_quant_mode - 2) + 6 * H;
|
|
|
|
int weightbits = compute_ise_bitcount(weight_count, (quantization_method)qmode);
|
|
if (weight_count > MAX_WEIGHTS_PER_BLOCK || weightbits < MIN_WEIGHT_BITS_PER_BLOCK || weightbits > MAX_WEIGHT_BITS_PER_BLOCK)
|
|
return 0;
|
|
|
|
*Nval = N;
|
|
*Mval = M;
|
|
*Qval = Q;
|
|
*dual_weight_plane = D;
|
|
*quant_mode = qmode;
|
|
return 1;
|
|
}
|
|
|
|
// stubbed for the time being.
|
|
const float *get_3d_percentile_table_host(int blockdim_x, int blockdim_y, int blockdim_z) {
|
|
IGNOREPARAM(blockdim_x);
|
|
IGNOREPARAM(blockdim_y);
|
|
IGNOREPARAM(blockdim_z);
|
|
return dummy_percentile_table_3d;
|
|
}
|
|
|
|
void construct_block_size_descriptor_3d_host(int xdim, int ydim, int zdim, block_size_descriptor * bsd) {
|
|
int decimation_mode_index[512]; // for each of the 512 entries in the decim_table_array, its index
|
|
int decimation_mode_count = 0;
|
|
|
|
int i;
|
|
int x_weights;
|
|
int y_weights;
|
|
int z_weights;
|
|
|
|
for (i = 0; i < 512; i++) {
|
|
decimation_mode_index[i] = -1;
|
|
}
|
|
|
|
// gather all the infill-modes that can be used with the current block size
|
|
for (x_weights = 2; x_weights <= 6; x_weights++)
|
|
for (y_weights = 2; y_weights <= 6; y_weights++)
|
|
for (z_weights = 2; z_weights <= 6; z_weights++) {
|
|
if ((x_weights * y_weights * z_weights) > MAX_WEIGHTS_PER_BLOCK)
|
|
continue;
|
|
decimation_table dt;
|
|
decimation_mode_index[z_weights * 64 + y_weights * 8 + x_weights] = decimation_mode_count;
|
|
initialize_decimation_table_3d(xdim, ydim, zdim, x_weights, y_weights, z_weights, &dt);
|
|
|
|
int weight_count = x_weights * y_weights * z_weights;
|
|
|
|
int maxprec_1plane = -1;
|
|
int maxprec_2planes = -1;
|
|
for (i = 0; i < 12; i++) {
|
|
int bits_1plane = compute_ise_bitcount(weight_count, (quantization_method)i);
|
|
int bits_2planes = compute_ise_bitcount(2 * weight_count, (quantization_method)i);
|
|
if (bits_1plane >= MIN_WEIGHT_BITS_PER_BLOCK && bits_1plane <= MAX_WEIGHT_BITS_PER_BLOCK)
|
|
maxprec_1plane = i;
|
|
if (bits_2planes >= MIN_WEIGHT_BITS_PER_BLOCK && bits_2planes <= MAX_WEIGHT_BITS_PER_BLOCK)
|
|
maxprec_2planes = i;
|
|
}
|
|
bsd->permit_encode[decimation_mode_count] = (x_weights <= xdim && y_weights <= ydim && z_weights <= zdim);
|
|
|
|
bsd->decimation_mode_samples[decimation_mode_count] = weight_count;
|
|
bsd->decimation_mode_maxprec_1plane[decimation_mode_count] = maxprec_1plane;
|
|
bsd->decimation_mode_maxprec_2planes[decimation_mode_count] = maxprec_2planes;
|
|
bsd->decimation_tables[decimation_mode_count] = dt;
|
|
|
|
decimation_mode_count++;
|
|
}
|
|
|
|
for (i = 0; i < MAX_DECIMATION_MODES; i++) {
|
|
bsd->decimation_mode_percentile[i] = 1.0f;
|
|
}
|
|
|
|
for (i = decimation_mode_count; i < MAX_DECIMATION_MODES; i++) {
|
|
bsd->permit_encode[i] = 0;
|
|
bsd->decimation_mode_samples[i] = 0;
|
|
bsd->decimation_mode_maxprec_1plane[i] = -1;
|
|
bsd->decimation_mode_maxprec_2planes[i] = -1;
|
|
}
|
|
|
|
bsd->decimation_mode_count = decimation_mode_count;
|
|
|
|
const float *percentiles = get_3d_percentile_table_host(xdim, ydim, zdim);
|
|
|
|
// then construct the list of block formats
|
|
for (i = 0; i < 2048; i++) {
|
|
int is_dual_plane;
|
|
int quantization_mode;
|
|
int fail = 0;
|
|
int permit_encode = 1;
|
|
|
|
if (decode_block_mode_3d(i, &x_weights, &y_weights, &z_weights, &is_dual_plane, &quantization_mode)) {
|
|
if (x_weights > xdim || y_weights > ydim || z_weights > zdim)
|
|
permit_encode = 0;
|
|
} else {
|
|
fail = 1;
|
|
permit_encode = 0;
|
|
}
|
|
if (fail) {
|
|
bsd->block_modes[i].decimation_mode = -1;
|
|
bsd->block_modes[i].quantization_mode = -1;
|
|
bsd->block_modes[i].is_dual_plane = -1;
|
|
bsd->block_modes[i].permit_encode = 0;
|
|
bsd->block_modes[i].permit_decode = 0;
|
|
bsd->block_modes[i].percentile = 1.0f;
|
|
} else {
|
|
int decimation_mode = decimation_mode_index[z_weights * 64 + y_weights * 8 + x_weights];
|
|
bsd->block_modes[i].decimation_mode = (ASTC_Encoder::uint8_t)decimation_mode;
|
|
bsd->block_modes[i].quantization_mode = (ASTC_Encoder::uint8_t)quantization_mode;
|
|
bsd->block_modes[i].is_dual_plane = (ASTC_Encoder::uint8_t)is_dual_plane;
|
|
bsd->block_modes[i].permit_encode = (ASTC_Encoder::uint8_t)permit_encode;
|
|
bsd->block_modes[i].permit_decode = (ASTC_Encoder::uint8_t)permit_encode;
|
|
bsd->block_modes[i].percentile = percentiles[i];
|
|
|
|
if (bsd->decimation_mode_percentile[decimation_mode] > percentiles[i])
|
|
bsd->decimation_mode_percentile[decimation_mode] = percentiles[i];
|
|
}
|
|
|
|
}
|
|
|
|
if (xdim * ydim * zdim <= 64) {
|
|
bsd->texelcount_for_bitmap_partitioning = xdim * ydim * zdim;
|
|
for (i = 0; i < xdim * ydim * zdim; i++)
|
|
bsd->texels_for_bitmap_partitioning[i] = i;
|
|
} else {
|
|
// pick 64 random texels for use with bitmap partitioning.
|
|
int arr[MAX_TEXELS_PER_BLOCK];
|
|
for (i = 0; i < xdim * ydim * zdim; i++)
|
|
arr[i] = 0;
|
|
int arr_elements_set = 0;
|
|
while (arr_elements_set < 64) {
|
|
int idx = rand() % (xdim * ydim * zdim);
|
|
if (arr[idx] == 0) {
|
|
arr_elements_set++;
|
|
arr[idx] = 1;
|
|
}
|
|
}
|
|
int texel_weights_written = 0;
|
|
int idx = 0;
|
|
while (texel_weights_written < 64) {
|
|
if (arr[idx])
|
|
bsd->texels_for_bitmap_partitioning[texel_weights_written++] = idx;
|
|
idx++;
|
|
}
|
|
bsd->texelcount_for_bitmap_partitioning = 64;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
// return 0 on invalid mode, 1 on valid mode.
|
|
int decode_block_mode_2d(int blockmode, int *Nval, int *Mval, int *dual_weight_plane, int *quant_mode) {
|
|
int base_quant_mode = (blockmode >> 4) & 1;
|
|
int H = (blockmode >> 9) & 1;
|
|
int D = (blockmode >> 10) & 1;
|
|
|
|
int A = (blockmode >> 5) & 0x3;
|
|
|
|
int N = 0, M = 0;
|
|
|
|
if ((blockmode & 3) != 0) {
|
|
base_quant_mode |= (blockmode & 3) << 1;
|
|
int B = (blockmode >> 7) & 3;
|
|
switch ((blockmode >> 2) & 3) {
|
|
case 0:
|
|
N = B + 4;
|
|
M = A + 2;
|
|
break;
|
|
case 1:
|
|
N = B + 8;
|
|
M = A + 2;
|
|
break;
|
|
case 2:
|
|
N = A + 2;
|
|
M = B + 8;
|
|
break;
|
|
case 3:
|
|
B &= 1;
|
|
if (blockmode & 0x100) {
|
|
N = B + 2;
|
|
M = A + 2;
|
|
} else {
|
|
N = A + 2;
|
|
M = B + 6;
|
|
}
|
|
break;
|
|
}
|
|
} else {
|
|
base_quant_mode |= ((blockmode >> 2) & 3) << 1;
|
|
if (((blockmode >> 2) & 3) == 0)
|
|
return 0;
|
|
int B = (blockmode >> 9) & 3;
|
|
switch ((blockmode >> 7) & 3) {
|
|
case 0:
|
|
N = 12;
|
|
M = A + 2;
|
|
break;
|
|
case 1:
|
|
N = A + 2;
|
|
M = 12;
|
|
break;
|
|
case 2:
|
|
N = A + 6;
|
|
M = B + 6;
|
|
D = 0;
|
|
H = 0;
|
|
break;
|
|
case 3:
|
|
switch ((blockmode >> 5) & 3) {
|
|
case 0:
|
|
N = 6;
|
|
M = 10;
|
|
break;
|
|
case 1:
|
|
N = 10;
|
|
M = 6;
|
|
break;
|
|
case 2:
|
|
case 3:
|
|
return 0;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
int weight_count = N * M * (D + 1);
|
|
int qmode = (base_quant_mode - 2) + 6 * H;
|
|
|
|
int weightbits = compute_ise_bitcount2(weight_count, (quantization_method)qmode);
|
|
if (weight_count > MAX_WEIGHTS_PER_BLOCK || weightbits < MIN_WEIGHT_BITS_PER_BLOCK || weightbits > MAX_WEIGHT_BITS_PER_BLOCK)
|
|
return 0;
|
|
|
|
*Nval = N;
|
|
*Mval = M;
|
|
*dual_weight_plane = D;
|
|
*quant_mode = qmode;
|
|
return 1;
|
|
}
|
|
|
|
const float *get_2d_percentile_table_host(int blockdim_x, int blockdim_y) {
|
|
switch (blockdim_x) {
|
|
case 4:
|
|
switch (blockdim_y) {
|
|
case 4:
|
|
return percentile_table_4x4;
|
|
case 5:
|
|
return percentile_table_4x5;
|
|
case 6:
|
|
return percentile_table_4x6;
|
|
case 8:
|
|
return percentile_table_4x8;
|
|
case 10:
|
|
return percentile_table_4x10;
|
|
case 12:
|
|
return percentile_table_4x12;
|
|
}
|
|
break;
|
|
case 5:
|
|
switch (blockdim_y) {
|
|
case 4:
|
|
return percentile_table_5x4;
|
|
case 5:
|
|
return percentile_table_5x5;
|
|
case 6:
|
|
return percentile_table_5x6;
|
|
case 8:
|
|
return percentile_table_5x8;
|
|
case 10:
|
|
return percentile_table_5x10;
|
|
case 12:
|
|
return percentile_table_5x12;
|
|
}
|
|
break;
|
|
|
|
case 6:
|
|
switch (blockdim_y) {
|
|
case 4:
|
|
return percentile_table_6x4;
|
|
case 5:
|
|
return percentile_table_6x5;
|
|
case 6:
|
|
return percentile_table_6x6;
|
|
case 8:
|
|
return percentile_table_6x8;
|
|
case 10:
|
|
return percentile_table_6x10;
|
|
case 12:
|
|
return percentile_table_6x12;
|
|
}
|
|
break;
|
|
|
|
case 8:
|
|
switch (blockdim_y) {
|
|
case 4:
|
|
return percentile_table_8x4;
|
|
case 5:
|
|
return percentile_table_8x5;
|
|
case 6:
|
|
return percentile_table_8x6;
|
|
case 8:
|
|
return percentile_table_8x8;
|
|
case 10:
|
|
return percentile_table_8x10;
|
|
case 12:
|
|
return percentile_table_8x12;
|
|
}
|
|
break;
|
|
|
|
case 10:
|
|
switch (blockdim_y) {
|
|
case 4:
|
|
return percentile_table_10x4;
|
|
case 5:
|
|
return percentile_table_10x5;
|
|
case 6:
|
|
return percentile_table_10x6;
|
|
case 8:
|
|
return percentile_table_10x8;
|
|
case 10:
|
|
return percentile_table_10x10;
|
|
case 12:
|
|
return percentile_table_10x12;
|
|
}
|
|
break;
|
|
|
|
case 12:
|
|
switch (blockdim_y) {
|
|
case 4:
|
|
return percentile_table_12x4;
|
|
case 5:
|
|
return percentile_table_12x5;
|
|
case 6:
|
|
return percentile_table_12x6;
|
|
case 8:
|
|
return percentile_table_12x8;
|
|
case 10:
|
|
return percentile_table_12x10;
|
|
case 12:
|
|
return percentile_table_12x12;
|
|
}
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return NULL; // should never happen.
|
|
}
|
|
|
|
void initialize_decimation_table_2d(
|
|
// dimensions of the block
|
|
int xdim, int ydim,
|
|
// number of grid points in 2d weight grid
|
|
int x_weights, int y_weights, decimation_table * dt) {
|
|
int i, j;
|
|
int x, y;
|
|
|
|
int texels_per_block = xdim * ydim;
|
|
int weights_per_block = x_weights * y_weights;
|
|
|
|
int weightcount_of_texel[MAX_TEXELS_PER_BLOCK];
|
|
int grid_weights_of_texel[MAX_TEXELS_PER_BLOCK][4];
|
|
int weights_of_texel[MAX_TEXELS_PER_BLOCK][4];
|
|
|
|
int texelcount_of_weight[MAX_WEIGHTS_PER_BLOCK];
|
|
int texels_of_weight[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK];
|
|
int texelweights_of_weight[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK];
|
|
|
|
for (i = 0; i < weights_per_block; i++)
|
|
texelcount_of_weight[i] = 0;
|
|
for (i = 0; i < texels_per_block; i++)
|
|
weightcount_of_texel[i] = 0;
|
|
|
|
for (y = 0; y < ydim; y++)
|
|
for (x = 0; x < xdim; x++) {
|
|
int texel = y * xdim + x;
|
|
|
|
int x_weight = (((1024 + xdim / 2) / (xdim - 1)) * x * (x_weights - 1) + 32) >> 6;
|
|
int y_weight = (((1024 + ydim / 2) / (ydim - 1)) * y * (y_weights - 1) + 32) >> 6;
|
|
|
|
int x_weight_frac = x_weight & 0xF;
|
|
int y_weight_frac = y_weight & 0xF;
|
|
int x_weight_int = x_weight >> 4;
|
|
int y_weight_int = y_weight >> 4;
|
|
int qweight[4];
|
|
int weight[4];
|
|
qweight[0] = x_weight_int + y_weight_int * x_weights;
|
|
qweight[1] = qweight[0] + 1;
|
|
qweight[2] = qweight[0] + x_weights;
|
|
qweight[3] = qweight[2] + 1;
|
|
|
|
// truncated-precision bilinear interpolation.
|
|
int prod = x_weight_frac * y_weight_frac;
|
|
|
|
weight[3] = (prod + 8) >> 4;
|
|
weight[1] = x_weight_frac - weight[3];
|
|
weight[2] = y_weight_frac - weight[3];
|
|
weight[0] = 16 - x_weight_frac - y_weight_frac + weight[3];
|
|
|
|
for (i = 0; i < 4; i++)
|
|
if (weight[i] != 0) {
|
|
grid_weights_of_texel[texel][weightcount_of_texel[texel]] = qweight[i];
|
|
weights_of_texel[texel][weightcount_of_texel[texel]] = weight[i];
|
|
weightcount_of_texel[texel]++;
|
|
texels_of_weight[qweight[i]][texelcount_of_weight[qweight[i]]] = texel;
|
|
texelweights_of_weight[qweight[i]][texelcount_of_weight[qweight[i]]] = weight[i];
|
|
texelcount_of_weight[qweight[i]]++;
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < texels_per_block; i++) {
|
|
dt->texel_num_weights[i] = (ASTC_Encoder::uint8_t)weightcount_of_texel[i];
|
|
|
|
// ensure that all 4 entries are actually initialized.
|
|
// This allows a branch-free implemntation of compute_value_of_texel_flt()
|
|
for (j = 0; j < 4; j++) {
|
|
dt->texel_weights_int[i][j] = 0;
|
|
dt->texel_weights_float[i][j] = 0.0f;
|
|
dt->texel_weights[i][j] = 0;
|
|
}
|
|
|
|
for (j = 0; j < weightcount_of_texel[i]; j++) {
|
|
dt->texel_weights_int[i][j] = (ASTC_Encoder::uint8_t)weights_of_texel[i][j];
|
|
dt->texel_weights_float[i][j] = (weights_of_texel[i][j]) * (1.0f / TEXEL_WEIGHT_SUM);
|
|
dt->texel_weights[i][j] = (ASTC_Encoder::uint8_t)grid_weights_of_texel[i][j];
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < weights_per_block; i++) {
|
|
dt->weight_num_texels[i] = (ASTC_Encoder::uint8_t)texelcount_of_weight[i];
|
|
|
|
|
|
for (j = 0; j < texelcount_of_weight[i]; j++) {
|
|
dt->weight_texel[i][j] = (ASTC_Encoder::uint8_t)texels_of_weight[i][j];
|
|
dt->weights_int[i][j] = (ASTC_Encoder::uint8_t)texelweights_of_weight[i][j];
|
|
dt->weights_flt[i][j] = (float)texelweights_of_weight[i][j];
|
|
}
|
|
}
|
|
|
|
dt->num_texels = texels_per_block;
|
|
dt->num_weights = weights_per_block;
|
|
|
|
|
|
}
|
|
|
|
void construct_block_size_descriptor_2d_host(int xdim, int ydim, block_size_descriptor * bsd) {
|
|
int decimation_mode_index[256]; // for each of the 256 entries in the decim_table_array, its index
|
|
int decimation_mode_count = 0;
|
|
|
|
int i;
|
|
int x_weights;
|
|
int y_weights;
|
|
|
|
for (i = 0; i < 256; i++) {
|
|
decimation_mode_index[i] = -1;
|
|
}
|
|
|
|
// gather all the infill-modes that can be used with the current block size
|
|
for (x_weights = 2; x_weights <= 12; x_weights++)
|
|
for (y_weights = 2; y_weights <= 12; y_weights++) {
|
|
if (x_weights * y_weights > MAX_WEIGHTS_PER_BLOCK)
|
|
continue;
|
|
decimation_table dt;
|
|
decimation_mode_index[y_weights * 16 + x_weights] = decimation_mode_count;
|
|
initialize_decimation_table_2d(xdim, ydim, x_weights, y_weights, &dt);
|
|
|
|
int weight_count = x_weights * y_weights;
|
|
|
|
int maxprec_1plane = -1;
|
|
int maxprec_2planes = -1;
|
|
for (i = 0; i < 12; i++) {
|
|
int bits_1plane = compute_ise_bitcount2(weight_count, (quantization_method)i);
|
|
int bits_2planes = compute_ise_bitcount2(2 * weight_count, (quantization_method)i);
|
|
if (bits_1plane >= MIN_WEIGHT_BITS_PER_BLOCK && bits_1plane <= MAX_WEIGHT_BITS_PER_BLOCK)
|
|
maxprec_1plane = i;
|
|
if (bits_2planes >= MIN_WEIGHT_BITS_PER_BLOCK && bits_2planes <= MAX_WEIGHT_BITS_PER_BLOCK)
|
|
maxprec_2planes = i;
|
|
}
|
|
|
|
bsd->permit_encode[decimation_mode_count] = (x_weights <= xdim && y_weights <= ydim);
|
|
|
|
bsd->decimation_mode_samples[decimation_mode_count] = weight_count;
|
|
bsd->decimation_mode_maxprec_1plane[decimation_mode_count] = maxprec_1plane;
|
|
bsd->decimation_mode_maxprec_2planes[decimation_mode_count] = maxprec_2planes;
|
|
bsd->decimation_tables[decimation_mode_count] = dt;
|
|
|
|
decimation_mode_count++;
|
|
}
|
|
|
|
for (i = 0; i < MAX_DECIMATION_MODES; i++) {
|
|
bsd->decimation_mode_percentile[i] = 1.0f;
|
|
}
|
|
|
|
for (i = decimation_mode_count; i < MAX_DECIMATION_MODES; i++) {
|
|
bsd->permit_encode[i] = 0;
|
|
bsd->decimation_mode_samples[i] = 0;
|
|
bsd->decimation_mode_maxprec_1plane[i] = -1;
|
|
bsd->decimation_mode_maxprec_2planes[i] = -1;
|
|
}
|
|
|
|
bsd->decimation_mode_count = decimation_mode_count;
|
|
|
|
const float *percentiles = get_2d_percentile_table_host(xdim, ydim);
|
|
|
|
// then construct the list of block formats
|
|
for (i = 0; i < 2048; i++) {
|
|
int is_dual_plane;
|
|
int quantization_mode;
|
|
int fail = 0;
|
|
int permit_encode = 1;
|
|
|
|
if (decode_block_mode_2d(i, &x_weights, &y_weights, &is_dual_plane, &quantization_mode)) {
|
|
if (x_weights > xdim || y_weights > ydim)
|
|
permit_encode = 0;
|
|
} else {
|
|
fail = 1;
|
|
permit_encode = 0;
|
|
}
|
|
|
|
if (fail) {
|
|
bsd->block_modes[i].decimation_mode = -1;
|
|
bsd->block_modes[i].quantization_mode = -1;
|
|
bsd->block_modes[i].is_dual_plane = -1;
|
|
bsd->block_modes[i].permit_encode = 0;
|
|
bsd->block_modes[i].permit_decode = 0;
|
|
bsd->block_modes[i].percentile = 1.0f;
|
|
} else {
|
|
int decimation_mode = decimation_mode_index[y_weights * 16 + x_weights];
|
|
bsd->block_modes[i].decimation_mode = (ASTC_Encoder::uint8_t)decimation_mode;
|
|
bsd->block_modes[i].quantization_mode = (ASTC_Encoder::uint8_t)quantization_mode;
|
|
bsd->block_modes[i].is_dual_plane = (ASTC_Encoder::uint8_t)is_dual_plane;
|
|
bsd->block_modes[i].permit_encode = (ASTC_Encoder::uint8_t)permit_encode;
|
|
bsd->block_modes[i].permit_decode = (ASTC_Encoder::uint8_t)permit_encode; // disallow decode of grid size larger than block size.
|
|
bsd->block_modes[i].percentile = percentiles[i];
|
|
|
|
if (bsd->decimation_mode_percentile[decimation_mode] > percentiles[i])
|
|
bsd->decimation_mode_percentile[decimation_mode] = percentiles[i];
|
|
}
|
|
|
|
}
|
|
|
|
if (xdim * ydim <= 64) {
|
|
bsd->texelcount_for_bitmap_partitioning = xdim * ydim;
|
|
for (i = 0; i < xdim * ydim; i++)
|
|
bsd->texels_for_bitmap_partitioning[i] = i;
|
|
}
|
|
|
|
else {
|
|
// pick 64 random texels for use with bitmap partitioning.
|
|
int arr[MAX_TEXELS_PER_BLOCK];
|
|
for (i = 0; i < xdim * ydim; i++)
|
|
arr[i] = 0;
|
|
int arr_elements_set = 0;
|
|
while (arr_elements_set < 64) {
|
|
int idx = rand() % (xdim * ydim);
|
|
if (arr[idx] == 0) {
|
|
arr_elements_set++;
|
|
arr[idx] = 1;
|
|
}
|
|
}
|
|
int texel_weights_written = 0;
|
|
int idx = 0;
|
|
while (texel_weights_written < 64) {
|
|
if (arr[idx])
|
|
bsd->texels_for_bitmap_partitioning[texel_weights_written++] = idx;
|
|
idx++;
|
|
}
|
|
bsd->texelcount_for_bitmap_partitioning = 64;
|
|
|
|
}
|
|
}
|
|
|
|
int compare_canonicalized_partition_tables(const uint64_cl part1[7], const uint64_cl part2[7]) {
|
|
if (part1[0] != part2[0])
|
|
return 0;
|
|
if (part1[1] != part2[1])
|
|
return 0;
|
|
if (part1[2] != part2[2])
|
|
return 0;
|
|
if (part1[3] != part2[3])
|
|
return 0;
|
|
if (part1[4] != part2[4])
|
|
return 0;
|
|
if (part1[5] != part2[5])
|
|
return 0;
|
|
if (part1[6] != part2[6])
|
|
return 0;
|
|
return 1;
|
|
}
|
|
|
|
void gen_canonicalized_partition_table(int texel_count, const ASTC_Encoder::uint8_t * partition_table, uint64_cl canonicalized[7]) {
|
|
int i;
|
|
for (i = 0; i < 7; i++)
|
|
canonicalized[i] = 0;
|
|
|
|
int mapped_index[4];
|
|
int map_weight_count = 0;
|
|
for (i = 0; i < 4; i++)
|
|
mapped_index[i] = -1;
|
|
|
|
for (i = 0; i < texel_count; i++) {
|
|
int index = partition_table[i];
|
|
if (mapped_index[index] == -1)
|
|
mapped_index[index] = map_weight_count++;
|
|
uint64_cl xlat_index = mapped_index[index];
|
|
canonicalized[i >> 5] |= xlat_index << (2 * (i & 0x1F));
|
|
}
|
|
}
|
|
|
|
void partition_table_zap_equal_elements(int xdim, int ydim, int zdim, partition_info * pi) {
|
|
int partition_tables_zapped = 0;
|
|
|
|
int texel_count = xdim * ydim * zdim;
|
|
|
|
int i, j;
|
|
uint64_cl *canonicalizeds = new uint64_cl[PARTITION_COUNT * 7];
|
|
|
|
|
|
for (i = 0; i < PARTITION_COUNT; i++) {
|
|
gen_canonicalized_partition_table(texel_count, pi[i].partition_of_texel, canonicalizeds + i * 7);
|
|
}
|
|
|
|
for (i = 0; i < PARTITION_COUNT; i++) {
|
|
for (j = 0; j < i; j++) {
|
|
if (compare_canonicalized_partition_tables(canonicalizeds + 7 * i, canonicalizeds + 7 * j)) {
|
|
pi[i].partition_count = 0;
|
|
partition_tables_zapped++;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
delete[]canonicalizeds;
|
|
}
|
|
|
|
ASTC_Encoder::uint32_t hash52_host(ASTC_Encoder::uint32_t inp) {
|
|
inp ^= inp >> 15;
|
|
|
|
inp *= 0xEEDE0891; // (2^4+1)*(2^7+1)*(2^17-1)
|
|
inp ^= inp >> 5;
|
|
inp += inp << 16;
|
|
inp ^= inp >> 7;
|
|
inp ^= inp >> 3;
|
|
inp ^= inp << 6;
|
|
inp ^= inp >> 17;
|
|
return inp;
|
|
}
|
|
|
|
int select_partition_host(int seed, int x, int y, int z, int partitioncount, int small_block) {
|
|
if (small_block) {
|
|
x <<= 1;
|
|
y <<= 1;
|
|
z <<= 1;
|
|
}
|
|
|
|
seed += (partitioncount - 1) * 1024;
|
|
|
|
ASTC_Encoder::uint32_t rnum = hash52_host(seed);
|
|
|
|
ASTC_Encoder::uint8_t seed1 = rnum & 0xF;
|
|
ASTC_Encoder::uint8_t seed2 = (rnum >> 4) & 0xF;
|
|
ASTC_Encoder::uint8_t seed3 = (rnum >> 8) & 0xF;
|
|
ASTC_Encoder::uint8_t seed4 = (rnum >> 12) & 0xF;
|
|
ASTC_Encoder::uint8_t seed5 = (rnum >> 16) & 0xF;
|
|
ASTC_Encoder::uint8_t seed6 = (rnum >> 20) & 0xF;
|
|
ASTC_Encoder::uint8_t seed7 = (rnum >> 24) & 0xF;
|
|
ASTC_Encoder::uint8_t seed8 = (rnum >> 28) & 0xF;
|
|
ASTC_Encoder::uint8_t seed9 = (rnum >> 18) & 0xF;
|
|
ASTC_Encoder::uint8_t seed10 = (rnum >> 22) & 0xF;
|
|
ASTC_Encoder::uint8_t seed11 = (rnum >> 26) & 0xF;
|
|
ASTC_Encoder::uint8_t seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF;
|
|
|
|
// squaring all the seeds in order to bias their distribution
|
|
// towards lower values.
|
|
seed1 *= seed1;
|
|
seed2 *= seed2;
|
|
seed3 *= seed3;
|
|
seed4 *= seed4;
|
|
seed5 *= seed5;
|
|
seed6 *= seed6;
|
|
seed7 *= seed7;
|
|
seed8 *= seed8;
|
|
seed9 *= seed9;
|
|
seed10 *= seed10;
|
|
seed11 *= seed11;
|
|
seed12 *= seed12;
|
|
|
|
|
|
int sh1, sh2, sh3;
|
|
if (seed & 1) {
|
|
sh1 = (seed & 2 ? 4 : 5);
|
|
sh2 = (partitioncount == 3 ? 6 : 5);
|
|
} else {
|
|
sh1 = (partitioncount == 3 ? 6 : 5);
|
|
sh2 = (seed & 2 ? 4 : 5);
|
|
}
|
|
sh3 = (seed & 0x10) ? sh1 : sh2;
|
|
|
|
seed1 >>= sh1;
|
|
seed2 >>= sh2;
|
|
seed3 >>= sh1;
|
|
seed4 >>= sh2;
|
|
seed5 >>= sh1;
|
|
seed6 >>= sh2;
|
|
seed7 >>= sh1;
|
|
seed8 >>= sh2;
|
|
|
|
seed9 >>= sh3;
|
|
seed10 >>= sh3;
|
|
seed11 >>= sh3;
|
|
seed12 >>= sh3;
|
|
|
|
|
|
|
|
int a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14);
|
|
int b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10);
|
|
int c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6);
|
|
int d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2);
|
|
|
|
|
|
// apply the saw
|
|
a &= 0x3F;
|
|
b &= 0x3F;
|
|
c &= 0x3F;
|
|
d &= 0x3F;
|
|
|
|
// remove some of the components of we are to output < 4 partitions.
|
|
if (partitioncount <= 3)
|
|
d = 0;
|
|
if (partitioncount <= 2)
|
|
c = 0;
|
|
if (partitioncount <= 1)
|
|
b = 0;
|
|
|
|
int partition;
|
|
if (a >= b && a >= c && a >= d)
|
|
partition = 0;
|
|
else if (b >= c && b >= d)
|
|
partition = 1;
|
|
else if (c >= d)
|
|
partition = 2;
|
|
else
|
|
partition = 3;
|
|
return partition;
|
|
}
|
|
|
|
void generate_one_partition_table(int xdim, int ydim, int zdim, int partition_count, int partition_index, partition_info * pt, __global ASTC_Encode *ASTCEncode) {
|
|
int small_block = (xdim * ydim * zdim) < 32;
|
|
|
|
ASTC_Encoder::uint8_t *partition_of_texel = pt->partition_of_texel;
|
|
int x, y, z, i;
|
|
|
|
|
|
for (z = 0; z < zdim; z++)
|
|
for (y = 0; y < ydim; y++)
|
|
for (x = 0; x < xdim; x++) {
|
|
ASTC_Encoder::uint8_t part = (ASTC_Encoder::uint8_t)select_partition_host(partition_index, x, y, z, partition_count, small_block);
|
|
*partition_of_texel++ = part;
|
|
}
|
|
|
|
|
|
int texels_per_block = xdim * ydim * zdim;
|
|
|
|
int counts[4];
|
|
for (i = 0; i < 4; i++)
|
|
counts[i] = 0;
|
|
|
|
for (i = 0; i < texels_per_block; i++) {
|
|
int partition = pt->partition_of_texel[i];
|
|
pt->texels_of_partition[partition][counts[partition]++] = (ASTC_Encoder::uint8_t)i;
|
|
}
|
|
|
|
for (i = 0; i < 4; i++)
|
|
pt->texels_per_partition[i] = (ASTC_Encoder::uint8_t)counts[i];
|
|
|
|
if (counts[0] == 0)
|
|
pt->partition_count = 0;
|
|
else if (counts[1] == 0)
|
|
pt->partition_count = 1;
|
|
else if (counts[2] == 0)
|
|
pt->partition_count = 2;
|
|
else if (counts[3] == 0)
|
|
pt->partition_count = 3;
|
|
else
|
|
pt->partition_count = 4;
|
|
|
|
|
|
|
|
for (i = 0; i < 4; i++)
|
|
pt->coverage_bitmaps[i] = 0;
|
|
|
|
int texels_to_process = ASTCEncode->bsd.texelcount_for_bitmap_partitioning;
|
|
|
|
//# was 64 bits changed to 32 bit
|
|
//# this will effect results and need to be fixed for GPU use
|
|
if (texels_to_process > COVERAGE_BITMAPS_MAX)
|
|
texels_to_process = COVERAGE_BITMAPS_MAX;
|
|
|
|
uint64_cl shiftbit = 1;
|
|
|
|
for (i = 0; i < texels_to_process; i++) {
|
|
pt->coverage_bitmaps[pt->partition_of_texel[i]] |= shiftbit << i;
|
|
}
|
|
}
|
|
|
|
void generate_partition_tables(int xdim, int ydim, int zdim, __global ASTC_Encode *ASTCEncode) {
|
|
int i;
|
|
generate_one_partition_table(xdim, ydim, zdim, 1, 0, &ASTCEncode->partition_tables[1][0], ASTCEncode);
|
|
for (i = 0; i < PARTITION_COUNT; i++) {
|
|
generate_one_partition_table(xdim, ydim, zdim, 2, i, &ASTCEncode->partition_tables[2][i], ASTCEncode);
|
|
generate_one_partition_table(xdim, ydim, zdim, 3, i, &ASTCEncode->partition_tables[3][i], ASTCEncode);
|
|
generate_one_partition_table(xdim, ydim, zdim, 4, i, &ASTCEncode->partition_tables[4][i], ASTCEncode);
|
|
}
|
|
partition_table_zap_equal_elements(xdim, ydim, zdim, &ASTCEncode->partition_tables[2][0]);
|
|
partition_table_zap_equal_elements(xdim, ydim, zdim, &ASTCEncode->partition_tables[3][0]);
|
|
partition_table_zap_equal_elements(xdim, ydim, zdim, &ASTCEncode->partition_tables[4][0]);
|
|
}
|
|
|
|
void prepare_angular_tables(__global ASTC_Encode *ASTCEncode) {
|
|
int i, j;
|
|
int max_angular_steps_needed_for_quant_steps[40];
|
|
for (i = 0; i < ANGULAR_STEPS; i++) {
|
|
ASTCEncode->stepsizes[i] = 1.0f / angular_steppings[i];
|
|
ASTCEncode->stepsizes_sqr[i] = ASTCEncode->stepsizes[i] * ASTCEncode->stepsizes[i];
|
|
|
|
for (j = 0; j < SINCOS_STEPS; j++) {
|
|
ASTCEncode->sin_table[j][i] = static_cast < float >(sin((2.0f * M_PI / (SINCOS_STEPS - 1.0f)) * angular_steppings[i] * j));
|
|
ASTCEncode->cos_table[j][i] = static_cast < float >(cos((2.0f * M_PI / (SINCOS_STEPS - 1.0f)) * angular_steppings[i] * j));
|
|
}
|
|
|
|
int p = static_cast < int >(floor(angular_steppings[i])) + 1;
|
|
max_angular_steps_needed_for_quant_steps[p] = MIN(i + 1, ANGULAR_STEPS - 1);
|
|
}
|
|
|
|
for (i = 0; i < 13; i++)
|
|
ASTCEncode->max_angular_steps_needed_for_quant_level[i] = max_angular_steps_needed_for_quant_steps[steps_of_level[i]];
|
|
|
|
}
|
|
|
|
void build_quantization_mode_table(__global ASTC_Encode *ASTCEncode) {
|
|
int i, j;
|
|
for (i = 0; i <= 16; i++)
|
|
for (j = 0; j < 128; j++)
|
|
ASTCEncode->quantization_mode_table[i][j] = -1;
|
|
|
|
for (i = 0; i < 21; i++)
|
|
for (j = 1; j <= 16; j++) {
|
|
int p = compute_ise_bitcount2(2 * j, (quantization_method)i);
|
|
if (p < 128)
|
|
ASTCEncode->quantization_mode_table[j][p] = i;
|
|
}
|
|
for (i = 0; i <= 16; i++) {
|
|
int largest_value_so_far = -1;
|
|
for (j = 0; j < 128; j++) {
|
|
if (ASTCEncode->quantization_mode_table[i][j] > largest_value_so_far)
|
|
largest_value_so_far = ASTCEncode->quantization_mode_table[i][j];
|
|
else
|
|
ASTCEncode->quantization_mode_table[i][j] = largest_value_so_far;
|
|
}
|
|
}
|
|
}
|
|
|
|
void expand_block_artifact_suppression_host(int xdim, int ydim, int zdim, error_weighting_params * ewp) {
|
|
int x, y, z;
|
|
float centerpos_x = (xdim - 1) * 0.5f;
|
|
float centerpos_y = (ydim - 1) * 0.5f;
|
|
float centerpos_z = (zdim - 1) * 0.5f;
|
|
int bef = 0;
|
|
|
|
for (z = 0; z < zdim; z++)
|
|
for (y = 0; y < ydim; y++)
|
|
for (x = 0; x < xdim; x++) {
|
|
float xdif = (x - centerpos_x) / xdim;
|
|
float ydif = (y - centerpos_y) / ydim;
|
|
float zdif = (z - centerpos_z) / zdim;
|
|
|
|
float wdif = 0.36f;
|
|
float dist = sqrt(xdif * xdif + ydif * ydif + zdif * zdif + wdif * wdif);
|
|
if (bef < MAX_TEXELS_PER_BLOCK) {
|
|
ewp->block_artifact_suppression_expanded[bef] = pow(dist, ewp->block_artifact_suppression);
|
|
bef++;
|
|
}
|
|
}
|
|
}
|
|
|
|
void set_block_size_descriptor(int xdim, int ydim, int zdim, __global ASTC_Encode *ASTCEncode) {
|
|
#ifdef ASTC_ENABLE_3D_SUPPORT
|
|
if (zdim > 1)
|
|
construct_block_size_descriptor_3d_host(xdim, ydim, zdim, &ASTCEncode->bsd);
|
|
else
|
|
#else
|
|
IGNOREPARAM(zdim);
|
|
#endif
|
|
construct_block_size_descriptor_2d_host(xdim, ydim, &ASTCEncode->bsd);
|
|
}
|
|
|
|
//-----------------------------------------------------
|
|
#ifdef ASTC_ENABLE_3D_SUPPORT
|
|
static void initialize_decimation_table_3d(
|
|
// dimensions of the block
|
|
int xdim, int ydim, int zdim,
|
|
// number of grid points in 3d weight grid
|
|
int x_weights, int y_weights, int z_weights, decimation_table * dt) {
|
|
int i, j;
|
|
int x, y, z;
|
|
|
|
int texels_per_block = xdim * ydim * zdim;
|
|
int weights_per_block = x_weights * y_weights * z_weights;
|
|
|
|
int weightcount_of_texel[MAX_TEXELS_PER_BLOCK];
|
|
int grid_weights_of_texel[MAX_TEXELS_PER_BLOCK][4];
|
|
int weights_of_texel[MAX_TEXELS_PER_BLOCK][4];
|
|
|
|
int texelcount_of_weight[MAX_WEIGHTS_PER_BLOCK];
|
|
int texels_of_weight[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK];
|
|
int texelweights_of_weight[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK];
|
|
|
|
for (i = 0; i < weights_per_block; i++)
|
|
texelcount_of_weight[i] = 0;
|
|
for (i = 0; i < texels_per_block; i++)
|
|
weightcount_of_texel[i] = 0;
|
|
|
|
for (z = 0; z < zdim; z++)
|
|
for (y = 0; y < ydim; y++)
|
|
for (x = 0; x < xdim; x++) {
|
|
int texel = (z * ydim + y) * xdim + x;
|
|
|
|
int x_weight = (((1024 + xdim / 2) / (xdim - 1)) * x * (x_weights - 1) + 32) >> 6;
|
|
int y_weight = (((1024 + ydim / 2) / (ydim - 1)) * y * (y_weights - 1) + 32) >> 6;
|
|
int z_weight = (((1024 + zdim / 2) / (zdim - 1)) * z * (z_weights - 1) + 32) >> 6;
|
|
|
|
int x_weight_frac = x_weight & 0xF;
|
|
int y_weight_frac = y_weight & 0xF;
|
|
int z_weight_frac = z_weight & 0xF;
|
|
int x_weight_int = x_weight >> 4;
|
|
int y_weight_int = y_weight >> 4;
|
|
int z_weight_int = z_weight >> 4;
|
|
int qweight[4];
|
|
int weight[4];
|
|
qweight[0] = (z_weight_int * y_weights + y_weight_int) * x_weights + x_weight_int;
|
|
qweight[3] = ((z_weight_int + 1) * y_weights + (y_weight_int + 1)) * x_weights + (x_weight_int + 1);
|
|
|
|
// simplex interpolation
|
|
int fs = x_weight_frac;
|
|
int ft = y_weight_frac;
|
|
int fp = z_weight_frac;
|
|
|
|
int cas = ((fs > ft) << 2) + ((ft > fp) << 1) + ((fs > fp));
|
|
int N = x_weights;
|
|
int NM = x_weights * y_weights;
|
|
|
|
int s1, s2, w0, w1, w2, w3;
|
|
switch (cas) {
|
|
case 7:
|
|
s1 = 1;
|
|
s2 = N;
|
|
w0 = 16 - fs;
|
|
w1 = fs - ft;
|
|
w2 = ft - fp;
|
|
w3 = fp;
|
|
break;
|
|
case 3:
|
|
s1 = N;
|
|
s2 = 1;
|
|
w0 = 16 - ft;
|
|
w1 = ft - fs;
|
|
w2 = fs - fp;
|
|
w3 = fp;
|
|
break;
|
|
case 5:
|
|
s1 = 1;
|
|
s2 = NM;
|
|
w0 = 16 - fs;
|
|
w1 = fs - fp;
|
|
w2 = fp - ft;
|
|
w3 = ft;
|
|
break;
|
|
case 4:
|
|
s1 = NM;
|
|
s2 = 1;
|
|
w0 = 16 - fp;
|
|
w1 = fp - fs;
|
|
w2 = fs - ft;
|
|
w3 = ft;
|
|
break;
|
|
case 2:
|
|
s1 = N;
|
|
s2 = NM;
|
|
w0 = 16 - ft;
|
|
w1 = ft - fp;
|
|
w2 = fp - fs;
|
|
w3 = fs;
|
|
break;
|
|
case 0:
|
|
s1 = NM;
|
|
s2 = N;
|
|
w0 = 16 - fp;
|
|
w1 = fp - ft;
|
|
w2 = ft - fs;
|
|
w3 = fs;
|
|
break;
|
|
|
|
default:
|
|
s1 = NM;
|
|
s2 = N;
|
|
w0 = 16 - fp;
|
|
w1 = fp - ft;
|
|
w2 = ft - fs;
|
|
w3 = fs;
|
|
break;
|
|
}
|
|
|
|
qweight[1] = qweight[0] + s1;
|
|
qweight[2] = qweight[1] + s2;
|
|
weight[0] = w0;
|
|
weight[1] = w1;
|
|
weight[2] = w2;
|
|
weight[3] = w3;
|
|
|
|
/*
|
|
for(i=0;i<4;i++) weight[i] <<= 4; */
|
|
|
|
for (i = 0; i < 4; i++)
|
|
if (weight[i] != 0) {
|
|
grid_weights_of_texel[texel][weightcount_of_texel[texel]] = qweight[i];
|
|
weights_of_texel[texel][weightcount_of_texel[texel]] = weight[i];
|
|
weightcount_of_texel[texel]++;
|
|
texels_of_weight[qweight[i]][texelcount_of_weight[qweight[i]]] = texel;
|
|
texelweights_of_weight[qweight[i]][texelcount_of_weight[qweight[i]]] = weight[i];
|
|
texelcount_of_weight[qweight[i]]++;
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < texels_per_block; i++) {
|
|
dt->texel_num_weights[i] = (uint8_t)weightcount_of_texel[i];
|
|
|
|
// ensure that all 4 entries are actually initialized.
|
|
// This allows a branch-free implemntation of compute_value_of_texel_flt()
|
|
for (j = 0; j < 4; j++) {
|
|
dt->texel_weights_int[i][j] = 0;
|
|
dt->texel_weights_float[i][j] = 0.0f;
|
|
dt->texel_weights[i][j] = 0;
|
|
}
|
|
|
|
for (j = 0; j < weightcount_of_texel[i]; j++) {
|
|
dt->texel_weights_int[i][j] = (uint8_t)weights_of_texel[i][j];
|
|
dt->texel_weights_float[i][j] = static_cast < float >(weights_of_texel[i][j]) * (1.0f / TEXEL_WEIGHT_SUM);
|
|
dt->texel_weights[i][j] = (uint8_t)grid_weights_of_texel[i][j];
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < weights_per_block; i++) {
|
|
dt->weight_num_texels[i] = (uint8_t)texelcount_of_weight[i];
|
|
for (j = 0; j < texelcount_of_weight[i]; j++) {
|
|
dt->weight_texel[i][j] = (uint8_t)texels_of_weight[i][j];
|
|
dt->weights_int[i][j] = (uint8_t)texelweights_of_weight[i][j];
|
|
dt->weights_flt[i][j] = static_cast < float >(texelweights_of_weight[i][j]);
|
|
}
|
|
}
|
|
|
|
dt->num_texels = texels_per_block;
|
|
dt->num_weights = weights_per_block;
|
|
}
|
|
#endif
|
|
|
|
// routine to write up to 8 bits
|
|
static inline void write_bits(int value, int bitcount, int bitoffset, uint8_t * ptr) {
|
|
int mask = (1 << bitcount) - 1;
|
|
value &= mask;
|
|
ptr += bitoffset >> 3;
|
|
bitoffset &= 7;
|
|
value <<= bitoffset;
|
|
mask <<= bitoffset;
|
|
mask = ~mask;
|
|
|
|
ptr[0] &= mask;
|
|
ptr[0] |= value;
|
|
ptr[1] &= mask >> 8;
|
|
ptr[1] |= value >> 8;
|
|
}
|
|
|
|
|
|
// routine to read up to 8 bits
|
|
static inline int read_bits(int bitcount, int bitoffset, const uint8_t * ptr) {
|
|
int mask = (1 << bitcount) - 1;
|
|
ptr += bitoffset >> 3;
|
|
bitoffset &= 7;
|
|
int value = ptr[0] | (ptr[1] << 8);
|
|
value >>= bitoffset;
|
|
value &= mask;
|
|
return value;
|
|
}
|
|
|
|
// unpacked trit quintuplets <low,_,_,_,high> for each packed-quint value
|
|
static const uint8_t trits_of_integer[256][5] = {
|
|
{ 0, 0, 0, 0, 0 },{ 1, 0, 0, 0, 0 },{ 2, 0, 0, 0, 0 },{ 0, 0, 2, 0, 0 },
|
|
{ 0, 1, 0, 0, 0 },{ 1, 1, 0, 0, 0 },{ 2, 1, 0, 0, 0 },{ 1, 0, 2, 0, 0 },
|
|
{ 0, 2, 0, 0, 0 },{ 1, 2, 0, 0, 0 },{ 2, 2, 0, 0, 0 },{ 2, 0, 2, 0, 0 },
|
|
{ 0, 2, 2, 0, 0 },{ 1, 2, 2, 0, 0 },{ 2, 2, 2, 0, 0 },{ 2, 0, 2, 0, 0 },
|
|
{ 0, 0, 1, 0, 0 },{ 1, 0, 1, 0, 0 },{ 2, 0, 1, 0, 0 },{ 0, 1, 2, 0, 0 },
|
|
{ 0, 1, 1, 0, 0 },{ 1, 1, 1, 0, 0 },{ 2, 1, 1, 0, 0 },{ 1, 1, 2, 0, 0 },
|
|
{ 0, 2, 1, 0, 0 },{ 1, 2, 1, 0, 0 },{ 2, 2, 1, 0, 0 },{ 2, 1, 2, 0, 0 },
|
|
{ 0, 0, 0, 2, 2 },{ 1, 0, 0, 2, 2 },{ 2, 0, 0, 2, 2 },{ 0, 0, 2, 2, 2 },
|
|
{ 0, 0, 0, 1, 0 },{ 1, 0, 0, 1, 0 },{ 2, 0, 0, 1, 0 },{ 0, 0, 2, 1, 0 },
|
|
{ 0, 1, 0, 1, 0 },{ 1, 1, 0, 1, 0 },{ 2, 1, 0, 1, 0 },{ 1, 0, 2, 1, 0 },
|
|
{ 0, 2, 0, 1, 0 },{ 1, 2, 0, 1, 0 },{ 2, 2, 0, 1, 0 },{ 2, 0, 2, 1, 0 },
|
|
{ 0, 2, 2, 1, 0 },{ 1, 2, 2, 1, 0 },{ 2, 2, 2, 1, 0 },{ 2, 0, 2, 1, 0 },
|
|
{ 0, 0, 1, 1, 0 },{ 1, 0, 1, 1, 0 },{ 2, 0, 1, 1, 0 },{ 0, 1, 2, 1, 0 },
|
|
{ 0, 1, 1, 1, 0 },{ 1, 1, 1, 1, 0 },{ 2, 1, 1, 1, 0 },{ 1, 1, 2, 1, 0 },
|
|
{ 0, 2, 1, 1, 0 },{ 1, 2, 1, 1, 0 },{ 2, 2, 1, 1, 0 },{ 2, 1, 2, 1, 0 },
|
|
{ 0, 1, 0, 2, 2 },{ 1, 1, 0, 2, 2 },{ 2, 1, 0, 2, 2 },{ 1, 0, 2, 2, 2 },
|
|
{ 0, 0, 0, 2, 0 },{ 1, 0, 0, 2, 0 },{ 2, 0, 0, 2, 0 },{ 0, 0, 2, 2, 0 },
|
|
{ 0, 1, 0, 2, 0 },{ 1, 1, 0, 2, 0 },{ 2, 1, 0, 2, 0 },{ 1, 0, 2, 2, 0 },
|
|
{ 0, 2, 0, 2, 0 },{ 1, 2, 0, 2, 0 },{ 2, 2, 0, 2, 0 },{ 2, 0, 2, 2, 0 },
|
|
{ 0, 2, 2, 2, 0 },{ 1, 2, 2, 2, 0 },{ 2, 2, 2, 2, 0 },{ 2, 0, 2, 2, 0 },
|
|
{ 0, 0, 1, 2, 0 },{ 1, 0, 1, 2, 0 },{ 2, 0, 1, 2, 0 },{ 0, 1, 2, 2, 0 },
|
|
{ 0, 1, 1, 2, 0 },{ 1, 1, 1, 2, 0 },{ 2, 1, 1, 2, 0 },{ 1, 1, 2, 2, 0 },
|
|
{ 0, 2, 1, 2, 0 },{ 1, 2, 1, 2, 0 },{ 2, 2, 1, 2, 0 },{ 2, 1, 2, 2, 0 },
|
|
{ 0, 2, 0, 2, 2 },{ 1, 2, 0, 2, 2 },{ 2, 2, 0, 2, 2 },{ 2, 0, 2, 2, 2 },
|
|
{ 0, 0, 0, 0, 2 },{ 1, 0, 0, 0, 2 },{ 2, 0, 0, 0, 2 },{ 0, 0, 2, 0, 2 },
|
|
{ 0, 1, 0, 0, 2 },{ 1, 1, 0, 0, 2 },{ 2, 1, 0, 0, 2 },{ 1, 0, 2, 0, 2 },
|
|
{ 0, 2, 0, 0, 2 },{ 1, 2, 0, 0, 2 },{ 2, 2, 0, 0, 2 },{ 2, 0, 2, 0, 2 },
|
|
{ 0, 2, 2, 0, 2 },{ 1, 2, 2, 0, 2 },{ 2, 2, 2, 0, 2 },{ 2, 0, 2, 0, 2 },
|
|
{ 0, 0, 1, 0, 2 },{ 1, 0, 1, 0, 2 },{ 2, 0, 1, 0, 2 },{ 0, 1, 2, 0, 2 },
|
|
{ 0, 1, 1, 0, 2 },{ 1, 1, 1, 0, 2 },{ 2, 1, 1, 0, 2 },{ 1, 1, 2, 0, 2 },
|
|
{ 0, 2, 1, 0, 2 },{ 1, 2, 1, 0, 2 },{ 2, 2, 1, 0, 2 },{ 2, 1, 2, 0, 2 },
|
|
{ 0, 2, 2, 2, 2 },{ 1, 2, 2, 2, 2 },{ 2, 2, 2, 2, 2 },{ 2, 0, 2, 2, 2 },
|
|
{ 0, 0, 0, 0, 1 },{ 1, 0, 0, 0, 1 },{ 2, 0, 0, 0, 1 },{ 0, 0, 2, 0, 1 },
|
|
{ 0, 1, 0, 0, 1 },{ 1, 1, 0, 0, 1 },{ 2, 1, 0, 0, 1 },{ 1, 0, 2, 0, 1 },
|
|
{ 0, 2, 0, 0, 1 },{ 1, 2, 0, 0, 1 },{ 2, 2, 0, 0, 1 },{ 2, 0, 2, 0, 1 },
|
|
{ 0, 2, 2, 0, 1 },{ 1, 2, 2, 0, 1 },{ 2, 2, 2, 0, 1 },{ 2, 0, 2, 0, 1 },
|
|
{ 0, 0, 1, 0, 1 },{ 1, 0, 1, 0, 1 },{ 2, 0, 1, 0, 1 },{ 0, 1, 2, 0, 1 },
|
|
{ 0, 1, 1, 0, 1 },{ 1, 1, 1, 0, 1 },{ 2, 1, 1, 0, 1 },{ 1, 1, 2, 0, 1 },
|
|
{ 0, 2, 1, 0, 1 },{ 1, 2, 1, 0, 1 },{ 2, 2, 1, 0, 1 },{ 2, 1, 2, 0, 1 },
|
|
{ 0, 0, 1, 2, 2 },{ 1, 0, 1, 2, 2 },{ 2, 0, 1, 2, 2 },{ 0, 1, 2, 2, 2 },
|
|
{ 0, 0, 0, 1, 1 },{ 1, 0, 0, 1, 1 },{ 2, 0, 0, 1, 1 },{ 0, 0, 2, 1, 1 },
|
|
{ 0, 1, 0, 1, 1 },{ 1, 1, 0, 1, 1 },{ 2, 1, 0, 1, 1 },{ 1, 0, 2, 1, 1 },
|
|
{ 0, 2, 0, 1, 1 },{ 1, 2, 0, 1, 1 },{ 2, 2, 0, 1, 1 },{ 2, 0, 2, 1, 1 },
|
|
{ 0, 2, 2, 1, 1 },{ 1, 2, 2, 1, 1 },{ 2, 2, 2, 1, 1 },{ 2, 0, 2, 1, 1 },
|
|
{ 0, 0, 1, 1, 1 },{ 1, 0, 1, 1, 1 },{ 2, 0, 1, 1, 1 },{ 0, 1, 2, 1, 1 },
|
|
{ 0, 1, 1, 1, 1 },{ 1, 1, 1, 1, 1 },{ 2, 1, 1, 1, 1 },{ 1, 1, 2, 1, 1 },
|
|
{ 0, 2, 1, 1, 1 },{ 1, 2, 1, 1, 1 },{ 2, 2, 1, 1, 1 },{ 2, 1, 2, 1, 1 },
|
|
{ 0, 1, 1, 2, 2 },{ 1, 1, 1, 2, 2 },{ 2, 1, 1, 2, 2 },{ 1, 1, 2, 2, 2 },
|
|
{ 0, 0, 0, 2, 1 },{ 1, 0, 0, 2, 1 },{ 2, 0, 0, 2, 1 },{ 0, 0, 2, 2, 1 },
|
|
{ 0, 1, 0, 2, 1 },{ 1, 1, 0, 2, 1 },{ 2, 1, 0, 2, 1 },{ 1, 0, 2, 2, 1 },
|
|
{ 0, 2, 0, 2, 1 },{ 1, 2, 0, 2, 1 },{ 2, 2, 0, 2, 1 },{ 2, 0, 2, 2, 1 },
|
|
{ 0, 2, 2, 2, 1 },{ 1, 2, 2, 2, 1 },{ 2, 2, 2, 2, 1 },{ 2, 0, 2, 2, 1 },
|
|
{ 0, 0, 1, 2, 1 },{ 1, 0, 1, 2, 1 },{ 2, 0, 1, 2, 1 },{ 0, 1, 2, 2, 1 },
|
|
{ 0, 1, 1, 2, 1 },{ 1, 1, 1, 2, 1 },{ 2, 1, 1, 2, 1 },{ 1, 1, 2, 2, 1 },
|
|
{ 0, 2, 1, 2, 1 },{ 1, 2, 1, 2, 1 },{ 2, 2, 1, 2, 1 },{ 2, 1, 2, 2, 1 },
|
|
{ 0, 2, 1, 2, 2 },{ 1, 2, 1, 2, 2 },{ 2, 2, 1, 2, 2 },{ 2, 1, 2, 2, 2 },
|
|
{ 0, 0, 0, 1, 2 },{ 1, 0, 0, 1, 2 },{ 2, 0, 0, 1, 2 },{ 0, 0, 2, 1, 2 },
|
|
{ 0, 1, 0, 1, 2 },{ 1, 1, 0, 1, 2 },{ 2, 1, 0, 1, 2 },{ 1, 0, 2, 1, 2 },
|
|
{ 0, 2, 0, 1, 2 },{ 1, 2, 0, 1, 2 },{ 2, 2, 0, 1, 2 },{ 2, 0, 2, 1, 2 },
|
|
{ 0, 2, 2, 1, 2 },{ 1, 2, 2, 1, 2 },{ 2, 2, 2, 1, 2 },{ 2, 0, 2, 1, 2 },
|
|
{ 0, 0, 1, 1, 2 },{ 1, 0, 1, 1, 2 },{ 2, 0, 1, 1, 2 },{ 0, 1, 2, 1, 2 },
|
|
{ 0, 1, 1, 1, 2 },{ 1, 1, 1, 1, 2 },{ 2, 1, 1, 1, 2 },{ 1, 1, 2, 1, 2 },
|
|
{ 0, 2, 1, 1, 2 },{ 1, 2, 1, 1, 2 },{ 2, 2, 1, 1, 2 },{ 2, 1, 2, 1, 2 },
|
|
{ 0, 2, 2, 2, 2 },{ 1, 2, 2, 2, 2 },{ 2, 2, 2, 2, 2 },{ 2, 1, 2, 2, 2 },
|
|
};
|
|
|
|
// unpacked quint triplets <low,middle,high> for each packed-quint value
|
|
static const uint8_t quints_of_integer[128][3] = {
|
|
{ 0, 0, 0 },{ 1, 0, 0 },{ 2, 0, 0 },{ 3, 0, 0 },
|
|
{ 4, 0, 0 },{ 0, 4, 0 },{ 4, 4, 0 },{ 4, 4, 4 },
|
|
{ 0, 1, 0 },{ 1, 1, 0 },{ 2, 1, 0 },{ 3, 1, 0 },
|
|
{ 4, 1, 0 },{ 1, 4, 0 },{ 4, 4, 1 },{ 4, 4, 4 },
|
|
{ 0, 2, 0 },{ 1, 2, 0 },{ 2, 2, 0 },{ 3, 2, 0 },
|
|
{ 4, 2, 0 },{ 2, 4, 0 },{ 4, 4, 2 },{ 4, 4, 4 },
|
|
{ 0, 3, 0 },{ 1, 3, 0 },{ 2, 3, 0 },{ 3, 3, 0 },
|
|
{ 4, 3, 0 },{ 3, 4, 0 },{ 4, 4, 3 },{ 4, 4, 4 },
|
|
{ 0, 0, 1 },{ 1, 0, 1 },{ 2, 0, 1 },{ 3, 0, 1 },
|
|
{ 4, 0, 1 },{ 0, 4, 1 },{ 4, 0, 4 },{ 0, 4, 4 },
|
|
{ 0, 1, 1 },{ 1, 1, 1 },{ 2, 1, 1 },{ 3, 1, 1 },
|
|
{ 4, 1, 1 },{ 1, 4, 1 },{ 4, 1, 4 },{ 1, 4, 4 },
|
|
{ 0, 2, 1 },{ 1, 2, 1 },{ 2, 2, 1 },{ 3, 2, 1 },
|
|
{ 4, 2, 1 },{ 2, 4, 1 },{ 4, 2, 4 },{ 2, 4, 4 },
|
|
{ 0, 3, 1 },{ 1, 3, 1 },{ 2, 3, 1 },{ 3, 3, 1 },
|
|
{ 4, 3, 1 },{ 3, 4, 1 },{ 4, 3, 4 },{ 3, 4, 4 },
|
|
{ 0, 0, 2 },{ 1, 0, 2 },{ 2, 0, 2 },{ 3, 0, 2 },
|
|
{ 4, 0, 2 },{ 0, 4, 2 },{ 2, 0, 4 },{ 3, 0, 4 },
|
|
{ 0, 1, 2 },{ 1, 1, 2 },{ 2, 1, 2 },{ 3, 1, 2 },
|
|
{ 4, 1, 2 },{ 1, 4, 2 },{ 2, 1, 4 },{ 3, 1, 4 },
|
|
{ 0, 2, 2 },{ 1, 2, 2 },{ 2, 2, 2 },{ 3, 2, 2 },
|
|
{ 4, 2, 2 },{ 2, 4, 2 },{ 2, 2, 4 },{ 3, 2, 4 },
|
|
{ 0, 3, 2 },{ 1, 3, 2 },{ 2, 3, 2 },{ 3, 3, 2 },
|
|
{ 4, 3, 2 },{ 3, 4, 2 },{ 2, 3, 4 },{ 3, 3, 4 },
|
|
{ 0, 0, 3 },{ 1, 0, 3 },{ 2, 0, 3 },{ 3, 0, 3 },
|
|
{ 4, 0, 3 },{ 0, 4, 3 },{ 0, 0, 4 },{ 1, 0, 4 },
|
|
{ 0, 1, 3 },{ 1, 1, 3 },{ 2, 1, 3 },{ 3, 1, 3 },
|
|
{ 4, 1, 3 },{ 1, 4, 3 },{ 0, 1, 4 },{ 1, 1, 4 },
|
|
{ 0, 2, 3 },{ 1, 2, 3 },{ 2, 2, 3 },{ 3, 2, 3 },
|
|
{ 4, 2, 3 },{ 2, 4, 3 },{ 0, 2, 4 },{ 1, 2, 4 },
|
|
{ 0, 3, 3 },{ 1, 3, 3 },{ 2, 3, 3 },{ 3, 3, 3 },
|
|
{ 4, 3, 3 },{ 3, 4, 3 },{ 0, 3, 4 },{ 1, 3, 4 },
|
|
};
|
|
|
|
int bitrev8(int p) {
|
|
p = ((p & 0xF) << 4) | ((p >> 4) & 0xF);
|
|
p = ((p & 0x33) << 2) | ((p >> 2) & 0x33);
|
|
p = ((p & 0x55) << 1) | ((p >> 1) & 0x55);
|
|
return p;
|
|
}
|
|
|
|
void decode_ise(int quantization_level, int elements, const uint8_t * input_data, uint8_t * output_data, int bit_offset) {
|
|
int i;
|
|
// note: due to how the the trit/quint-block unpacking is done in this function,
|
|
// we may write more temporary results than the number of outputs
|
|
// The maximum actual number of results is 64 bit, but we keep 4 additional elements
|
|
// of padding.
|
|
uint8_t results[68];
|
|
uint8_t tq_blocks[22]; // trit-blocks or quint-blocks
|
|
|
|
int bits, trits, quints;
|
|
find_number_of_bits_trits_quints(quantization_level, &bits, &trits, &quints);
|
|
|
|
int lcounter = 0;
|
|
int hcounter = 0;
|
|
|
|
// trit-blocks or quint-blocks must be zeroed out before we collect them in the loop below.
|
|
for (i = 0; i < 22; i++)
|
|
tq_blocks[i] = 0;
|
|
|
|
// collect bits for each element, as well as bits for any trit-blocks and quint-blocks.
|
|
for (i = 0; i < elements; i++) {
|
|
results[i] = (uint8_t)read_bits(bits, bit_offset, input_data);
|
|
bit_offset += bits;
|
|
if (trits) {
|
|
static const int bits_to_read[5] = { 2, 2, 1, 2, 1 };
|
|
static const int block_shift[5] = { 0, 2, 4, 5, 7 };
|
|
static const int next_lcounter[5] = { 1, 2, 3, 4, 0 };
|
|
static const int hcounter_incr[5] = { 0, 0, 0, 0, 1 };
|
|
int tdata = read_bits(bits_to_read[lcounter], bit_offset, input_data);
|
|
bit_offset += bits_to_read[lcounter];
|
|
tq_blocks[hcounter] |= tdata << block_shift[lcounter];
|
|
hcounter += hcounter_incr[lcounter];
|
|
lcounter = next_lcounter[lcounter];
|
|
}
|
|
if (quints) {
|
|
static const int bits_to_read[3] = { 3, 2, 2 };
|
|
static const int block_shift[3] = { 0, 3, 5 };
|
|
static const int next_lcounter[3] = { 1, 2, 0 };
|
|
static const int hcounter_incr[3] = { 0, 0, 1 };
|
|
int tdata = read_bits(bits_to_read[lcounter], bit_offset, input_data);
|
|
bit_offset += bits_to_read[lcounter];
|
|
tq_blocks[hcounter] |= tdata << block_shift[lcounter];
|
|
hcounter += hcounter_incr[lcounter];
|
|
lcounter = next_lcounter[lcounter];
|
|
}
|
|
}
|
|
|
|
|
|
// unpack trit-blocks or quint-blocks as needed
|
|
if (trits) {
|
|
int trit_blocks = (elements + 4) / 5;
|
|
for (i = 0; i < trit_blocks; i++) {
|
|
const uint8_t *tritptr = trits_of_integer[tq_blocks[i]];
|
|
results[5 * i] |= tritptr[0] << bits;
|
|
results[5 * i + 1] |= tritptr[1] << bits;
|
|
results[5 * i + 2] |= tritptr[2] << bits;
|
|
results[5 * i + 3] |= tritptr[3] << bits;
|
|
results[5 * i + 4] |= tritptr[4] << bits;
|
|
}
|
|
}
|
|
|
|
if (quints) {
|
|
int quint_blocks = (elements + 2) / 3;
|
|
for (i = 0; i < quint_blocks; i++) {
|
|
const uint8_t *quintptr = quints_of_integer[tq_blocks[i]];
|
|
results[3 * i] |= quintptr[0] << bits;
|
|
results[3 * i + 1] |= quintptr[1] << bits;
|
|
results[3 * i + 2] |= quintptr[2] << bits;
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < elements; i++)
|
|
output_data[i] = results[i];
|
|
}
|
|
|
|
void InitializeASTCSettingsForSetBlockSize(__global ASTC_Encode *ASTCEncode) {
|
|
ASTCEncode->m_target_bitrate = 0;
|
|
int xdim_2d = ASTCEncode->m_xdim;
|
|
int ydim_2d = ASTCEncode->m_ydim;
|
|
float log10_texels_2d = log((float)(xdim_2d * ydim_2d)) / log(10.0f);
|
|
|
|
#ifdef ASTC_ENABLE_3D_SUPPORT
|
|
int xdim_3d = ASTCEncode->m_xdim;
|
|
int ydim_3d = ASTCEncode->m_ydim;
|
|
int zdim_3d = ASTCEncode->m_zdim;
|
|
float log10_texels_3d = 0.0f;
|
|
log10_texels_3d = log((float)(xdim_3d * ydim_3d * zdim_3d)) / log(10.0f);
|
|
float dblimit_autoset_3d = 0.0;
|
|
#endif
|
|
|
|
int plimit_autoset = -1;
|
|
float dblimit_autoset_2d = 0.0;
|
|
float oplimit_autoset = 0.0;
|
|
float mincorrel_autoset = 0.0;
|
|
float bmc_autoset = 0.0;
|
|
int maxiters_autoset = 0;
|
|
|
|
|
|
/**********************************************************************************
|
|
ASTC Settingsto review for quality & perfromance, these are the setting found in
|
|
astc_main for astcenc sample application command line tool
|
|
fast
|
|
plimit_autoset = 4;
|
|
oplimit_autoset = 1.0;
|
|
mincorrel_autoset = 0.5;
|
|
dblimit_autoset_2d = MAX(85 - 35 * log10_texels_2d, 63 - 19 * log10_texels_2d);
|
|
dblimit_autoset_3d = MAX(85 - 35 * log10_texels_3d, 63 - 19 * log10_texels_3d);
|
|
bmc_autoset = 50;
|
|
maxiters_autoset = 1;
|
|
medium
|
|
plimit_autoset = 25;
|
|
oplimit_autoset = 1.2f;
|
|
mincorrel_autoset = 0.75f;
|
|
dblimit_autoset_2d = MAX(95 - 35 * log10_texels_2d, 70 - 19 * log10_texels_2d);
|
|
dblimit_autoset_3d = MAX(95 - 35 * log10_texels_3d, 70 - 19 * log10_texels_3d);
|
|
bmc_autoset = 75;
|
|
maxiters_autoset = 2;
|
|
thorough
|
|
plimit_autoset = 100;
|
|
oplimit_autoset = 2.5f;
|
|
mincorrel_autoset = 0.95f;
|
|
dblimit_autoset_2d = MAX(105 - 35 * log10_texels_2d, 77 - 19 * log10_texels_2d);
|
|
dblimit_autoset_3d = MAX(105 - 35 * log10_texels_3d, 77 - 19 * log10_texels_3d);
|
|
bmc_autoset = 95;
|
|
maxiters_autoset
|
|
exhaustive
|
|
#define PARTITION_BITS 10
|
|
#define PARTITION_COUNT (1 << PARTITION_BITS)
|
|
plimit_autoset = PARTITION_COUNT;
|
|
oplimit_autoset = 1000.0f;
|
|
mincorrel_autoset = 0.99f;
|
|
dblimit_autoset_2d = 999.0f;
|
|
dblimit_autoset_3d = 999.0f;
|
|
bmc_autoset = 100;
|
|
maxiters_autoset = 4;
|
|
***************************************************************************************************/
|
|
|
|
// Codec Speed Setting Defaults based on Quality Settings
|
|
float QualityScale; // Set quality normalized per process setting with a range of 0.0 to 1.0f
|
|
if (ASTCEncode->m_Quality < 0.02f) {
|
|
// Very Fast
|
|
oplimit_autoset = 1.0;
|
|
mincorrel_autoset = 0.5;
|
|
plimit_autoset = 1;
|
|
bmc_autoset = 5.0f;
|
|
maxiters_autoset = 1;
|
|
dblimit_autoset_2d = MAX(70 - 35 * log10_texels_2d, 53 - 19 * log10_texels_2d);
|
|
} else if (ASTCEncode->m_Quality < 0.05f) {
|
|
// Fast:
|
|
QualityScale = ASTCEncode->m_Quality/0.05f;
|
|
oplimit_autoset = 1.0;
|
|
mincorrel_autoset = 0.5;
|
|
plimit_autoset = 4;
|
|
bmc_autoset = 5.0f+(45.0f*QualityScale); // max 50
|
|
maxiters_autoset = 1;
|
|
dblimit_autoset_2d = MAX(85 - 35 * log10_texels_2d, 63 - 19 * log10_texels_2d);
|
|
} else if (ASTCEncode->m_Quality <= 0.20f) {
|
|
// Medium set to match near Compressonator BC7 Default Quality 0.05f setting
|
|
QualityScale = ASTCEncode->m_Quality/0.20f;
|
|
oplimit_autoset = 1.2f;
|
|
mincorrel_autoset = 0.75f;
|
|
plimit_autoset = 15+(int)round(10.0f*QualityScale); // max around 25;
|
|
bmc_autoset = 57.0f+(18.0f*QualityScale); // max 75;
|
|
maxiters_autoset = 2;
|
|
dblimit_autoset_2d = MAX(95 - 35 * log10_texels_2d, 70 - 19 * log10_texels_2d);
|
|
} else if (ASTCEncode->m_Quality <= 0.60f) {
|
|
// Thorough
|
|
QualityScale = ASTCEncode->m_Quality/0.60f;
|
|
oplimit_autoset = 1.2f + (1.3f*QualityScale); // max 2.5f;
|
|
mincorrel_autoset = 0.95f;
|
|
plimit_autoset = 25+(int)round(75.0f*QualityScale); // max around 100
|
|
bmc_autoset = 75.0f+(25.0f*QualityScale); // max 95;
|
|
maxiters_autoset = 4;
|
|
dblimit_autoset_2d = MAX(105 - 35 * log10_texels_2d, 77 - 19 * log10_texels_2d);
|
|
} else {
|
|
// Exhaustive
|
|
QualityScale = ASTCEncode->m_Quality;
|
|
oplimit_autoset = 2.5f+ (997.5f*QualityScale); // max 1000.0f;
|
|
mincorrel_autoset = 0.99f;
|
|
plimit_autoset = 100 + (int)round(923.0f * QualityScale); // max 1024
|
|
bmc_autoset = 95.0f+(5.0f*QualityScale); // max 100;
|
|
maxiters_autoset = 4;
|
|
dblimit_autoset_2d = 999.0f;
|
|
}
|
|
|
|
int partitions_to_test = plimit_autoset;
|
|
float dblimit_2d = dblimit_autoset_2d;
|
|
float oplimit = oplimit_autoset;
|
|
float mincorrel = mincorrel_autoset;
|
|
|
|
#ifdef ASTC_ENABLE_3D_SUPPORT
|
|
float dblimit_3d = dblimit_set_by_user ? dblimit_user_specified : dblimit_autoset_3d;
|
|
#endif
|
|
|
|
ASTCEncode->m_ewp.rgb_power = 1.0f;
|
|
ASTCEncode->m_ewp.alpha_power = 1.0f;
|
|
ASTCEncode->m_ewp.rgb_base_weight = 1.0f;
|
|
ASTCEncode->m_ewp.alpha_base_weight = 1.0f;
|
|
ASTCEncode->m_ewp.rgb_mean_weight = 0.0f;
|
|
ASTCEncode->m_ewp.rgb_stdev_weight = 0.0f;
|
|
ASTCEncode->m_ewp.alpha_mean_weight = 0.0f;
|
|
ASTCEncode->m_ewp.alpha_stdev_weight = 0.0f;
|
|
|
|
ASTCEncode->m_ewp.rgb_mean_and_stdev_mixing = 0.0f;
|
|
ASTCEncode->m_ewp.mean_stdev_radius = 0;
|
|
ASTCEncode->m_ewp.enable_rgb_scale_with_alpha = 0;
|
|
ASTCEncode->m_ewp.alpha_radius = 0;
|
|
|
|
ASTCEncode->m_ewp.block_artifact_suppression = 0.0f;
|
|
ASTCEncode->m_ewp.rgba_weights[0] = 1.0f;
|
|
ASTCEncode->m_ewp.rgba_weights[1] = 1.0f;
|
|
ASTCEncode->m_ewp.rgba_weights[2] = 1.0f;
|
|
ASTCEncode->m_ewp.rgba_weights[3] = 1.0f;
|
|
ASTCEncode->m_ewp.ra_normal_angular_scale = 0;
|
|
ASTCEncode->m_ewp.max_refinement_iters = maxiters_autoset;
|
|
|
|
ASTCEncode->m_ewp.block_mode_cutoff = bmc_autoset / 100.0f;
|
|
|
|
float texel_avg_error_limit_2d;
|
|
float texel_avg_error_limit_3d;
|
|
|
|
if (ASTCEncode->m_rgb_force_use_of_hdr == 0) {
|
|
texel_avg_error_limit_2d = pow(0.1f, dblimit_2d * 0.1f) * 65535.0f * 65535.0f;
|
|
#ifdef ASTC_ENABLE_3D_SUPPORT
|
|
texel_avg_error_limit_3d = pow(0.1f, dblimit_3d * 0.1f) * 65535.0f * 65535.0f;
|
|
#endif
|
|
} else {
|
|
texel_avg_error_limit_2d = 0.0f;
|
|
texel_avg_error_limit_3d = 0.0f;
|
|
}
|
|
ASTCEncode->m_ewp.partition_1_to_2_limit = oplimit;
|
|
ASTCEncode->m_ewp.lowest_correlation_cutoff = mincorrel;
|
|
|
|
if (partitions_to_test < 1)
|
|
partitions_to_test = 1;
|
|
else if (partitions_to_test > PARTITION_COUNT)
|
|
partitions_to_test = PARTITION_COUNT;
|
|
ASTCEncode->m_ewp.partition_search_limit = partitions_to_test;
|
|
|
|
// Specifying the error weight of a color component as 0 is not allowed.
|
|
// If weights are 0, then they are instead set to a small positive value.
|
|
|
|
float max_color_component_weight = MAX(MAX(ASTCEncode->m_ewp.rgba_weights[0], ASTCEncode->m_ewp.rgba_weights[1]),
|
|
MAX(ASTCEncode->m_ewp.rgba_weights[2], ASTCEncode->m_ewp.rgba_weights[3]));
|
|
ASTCEncode->m_ewp.rgba_weights[0] = MAX(ASTCEncode->m_ewp.rgba_weights[0], max_color_component_weight / 1000.0f);
|
|
ASTCEncode->m_ewp.rgba_weights[1] = MAX(ASTCEncode->m_ewp.rgba_weights[1], max_color_component_weight / 1000.0f);
|
|
ASTCEncode->m_ewp.rgba_weights[2] = MAX(ASTCEncode->m_ewp.rgba_weights[2], max_color_component_weight / 1000.0f);
|
|
ASTCEncode->m_ewp.rgba_weights[3] = MAX(ASTCEncode->m_ewp.rgba_weights[3], max_color_component_weight / 1000.0f);
|
|
|
|
// Allocate arrays for image data and load results.
|
|
ASTCEncode->m_ewp.texel_avg_error_limit = texel_avg_error_limit_2d;
|
|
|
|
expand_block_artifact_suppression_host(ASTCEncode->m_xdim, ASTCEncode->m_ydim, ASTCEncode->m_zdim, &ASTCEncode->m_ewp);
|
|
}
|
|
|
|
bool init_ASTC(__global ASTC_Encode *ASTCEncode) {
|
|
prepare_angular_tables(ASTCEncode);
|
|
build_quantization_mode_table(ASTCEncode);
|
|
InitializeASTCSettingsForSetBlockSize(ASTCEncode);
|
|
set_block_size_descriptor(ASTCEncode->m_xdim, ASTCEncode->m_ydim, ASTCEncode->m_zdim, ASTCEncode);
|
|
|
|
#ifdef ASTC_ENABLE_3D_SUPPORT
|
|
ASTCEncode->m_texels_per_block = ASTCEncode->m_xdim * ASTCEncode->m_ydim * ASTCEncode->m_zdim;
|
|
#else
|
|
ASTCEncode->m_texels_per_block = ASTCEncode->m_xdim * ASTCEncode->m_ydim;
|
|
#endif
|
|
ASTCEncode->m_ptindex = ASTCEncode->m_xdim + 16 * ASTCEncode->m_ydim + 256 * ASTCEncode->m_zdim;
|
|
generate_partition_tables(ASTCEncode->m_xdim, ASTCEncode->m_ydim, ASTCEncode->m_zdim, ASTCEncode);
|
|
return true;
|
|
}
|
|
|
|
}
|
|
|
|
//=====================================================================================================================================
|
|
// CPU Based Decoder code
|
|
|
|
extern ASTC_Encoder::ASTC_Encode g_ASTCEncode;
|
|
|
|
void initialize_decimation_table_2d_cpu(
|
|
// dimensions of the block
|
|
int xdim, int ydim,
|
|
// number of grid points in 2d weight grid
|
|
int x_weights, int y_weights, decimation_table_cpu * dt)
|
|
|
|
{
|
|
int i, j;
|
|
int x, y;
|
|
|
|
int texels_per_block = xdim * ydim;
|
|
int weights_per_block = x_weights * y_weights;
|
|
|
|
int weightcount_of_texel[MAX_TEXELS_PER_BLOCK];
|
|
int grid_weights_of_texel[MAX_TEXELS_PER_BLOCK][4];
|
|
int weights_of_texel[MAX_TEXELS_PER_BLOCK][4];
|
|
|
|
int texelcount_of_weight[MAX_WEIGHTS_PER_BLOCK];
|
|
int texels_of_weight[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK];
|
|
int texelweights_of_weight[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK];
|
|
|
|
for (i = 0; i < weights_per_block; i++)
|
|
texelcount_of_weight[i] = 0;
|
|
for (i = 0; i < texels_per_block; i++)
|
|
weightcount_of_texel[i] = 0;
|
|
|
|
for (y = 0; y < ydim; y++)
|
|
for (x = 0; x < xdim; x++) {
|
|
int texel = y * xdim + x;
|
|
|
|
int x_weight = (((1024 + xdim / 2) / (xdim - 1)) * x * (x_weights - 1) + 32) >> 6;
|
|
int y_weight = (((1024 + ydim / 2) / (ydim - 1)) * y * (y_weights - 1) + 32) >> 6;
|
|
|
|
int x_weight_frac = x_weight & 0xF;
|
|
int y_weight_frac = y_weight & 0xF;
|
|
int x_weight_int = x_weight >> 4;
|
|
int y_weight_int = y_weight >> 4;
|
|
int qweight[4];
|
|
int weight[4];
|
|
qweight[0] = x_weight_int + y_weight_int * x_weights;
|
|
qweight[1] = qweight[0] + 1;
|
|
qweight[2] = qweight[0] + x_weights;
|
|
qweight[3] = qweight[2] + 1;
|
|
|
|
// truncated-precision bilinear interpolation.
|
|
int prod = x_weight_frac * y_weight_frac;
|
|
|
|
weight[3] = (prod + 8) >> 4;
|
|
weight[1] = x_weight_frac - weight[3];
|
|
weight[2] = y_weight_frac - weight[3];
|
|
weight[0] = 16 - x_weight_frac - y_weight_frac + weight[3];
|
|
|
|
for (i = 0; i < 4; i++)
|
|
if (weight[i] != 0) {
|
|
grid_weights_of_texel[texel][weightcount_of_texel[texel]] = qweight[i];
|
|
weights_of_texel[texel][weightcount_of_texel[texel]] = weight[i];
|
|
weightcount_of_texel[texel]++;
|
|
texels_of_weight[qweight[i]][texelcount_of_weight[qweight[i]]] = texel;
|
|
texelweights_of_weight[qweight[i]][texelcount_of_weight[qweight[i]]] = weight[i];
|
|
texelcount_of_weight[qweight[i]]++;
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < texels_per_block; i++) {
|
|
dt->texel_num_weights[i] = (ASTC_Encoder::uint8_t)weightcount_of_texel[i];
|
|
|
|
// ensure that all 4 entries are actually initialized.
|
|
// This allows a branch-free implemntation of compute_value_of_texel_flt()
|
|
for (j = 0; j < 4; j++) {
|
|
dt->texel_weights_int[i][j] = 0;
|
|
dt->texel_weights_float[i][j] = 0.0f;
|
|
dt->texel_weights[i][j] = 0;
|
|
}
|
|
|
|
for (j = 0; j < weightcount_of_texel[i]; j++) {
|
|
dt->texel_weights_int[i][j] = (ASTC_Encoder::uint8_t)weights_of_texel[i][j];
|
|
dt->texel_weights_float[i][j] = (weights_of_texel[i][j]) * (1.0f / TEXEL_WEIGHT_SUM);
|
|
dt->texel_weights[i][j] = (ASTC_Encoder::uint8_t)grid_weights_of_texel[i][j];
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < weights_per_block; i++) {
|
|
dt->weight_num_texels[i] = (ASTC_Encoder::uint8_t)texelcount_of_weight[i];
|
|
|
|
|
|
for (j = 0; j < texelcount_of_weight[i]; j++) {
|
|
dt->weight_texel[i][j] = (ASTC_Encoder::uint8_t)texels_of_weight[i][j];
|
|
dt->weights_int[i][j] = (ASTC_Encoder::uint8_t)texelweights_of_weight[i][j];
|
|
dt->weights_flt[i][j] = (float)texelweights_of_weight[i][j];
|
|
}
|
|
}
|
|
|
|
dt->num_texels = texels_per_block;
|
|
dt->num_weights = weights_per_block;
|
|
|
|
|
|
}
|
|
|
|
void construct_block_size_descriptor_2d_cpu(int xdim, int ydim, block_size_descriptor_cpu * bsd) {
|
|
int decimation_mode_index[256]; // for each of the 256 entries in the decim_table_array, its index
|
|
int decimation_mode_count = 0;
|
|
|
|
int i;
|
|
int x_weights;
|
|
int y_weights;
|
|
|
|
for (i = 0; i < 256; i++) {
|
|
decimation_mode_index[i] = -1;
|
|
}
|
|
|
|
// gather all the infill-modes that can be used with the current block size
|
|
for (x_weights = 2; x_weights <= 12; x_weights++)
|
|
for (y_weights = 2; y_weights <= 12; y_weights++) {
|
|
if (x_weights * y_weights > MAX_WEIGHTS_PER_BLOCK)
|
|
continue;
|
|
decimation_table_cpu *dt = new decimation_table_cpu;
|
|
decimation_mode_index[y_weights * 16 + x_weights] = decimation_mode_count;
|
|
initialize_decimation_table_2d_cpu(xdim, ydim, x_weights, y_weights, dt);
|
|
|
|
int weight_count = x_weights * y_weights;
|
|
|
|
int maxprec_1plane = -1;
|
|
int maxprec_2planes = -1;
|
|
for (i = 0; i < 12; i++) {
|
|
int bits_1plane = ASTC_Encoder::compute_ise_bitcount2(weight_count, (ASTC_Encoder::quantization_method)i);
|
|
int bits_2planes = ASTC_Encoder::compute_ise_bitcount2(2 * weight_count, (ASTC_Encoder::quantization_method)i);
|
|
if (bits_1plane >= MIN_WEIGHT_BITS_PER_BLOCK && bits_1plane <= MAX_WEIGHT_BITS_PER_BLOCK)
|
|
maxprec_1plane = i;
|
|
if (bits_2planes >= MIN_WEIGHT_BITS_PER_BLOCK && bits_2planes <= MAX_WEIGHT_BITS_PER_BLOCK)
|
|
maxprec_2planes = i;
|
|
}
|
|
|
|
bsd->permit_encode[decimation_mode_count] = (x_weights <= xdim && y_weights <= ydim);
|
|
|
|
bsd->decimation_mode_samples[decimation_mode_count] = weight_count;
|
|
bsd->decimation_mode_maxprec_1plane[decimation_mode_count] = maxprec_1plane;
|
|
bsd->decimation_mode_maxprec_2planes[decimation_mode_count] = maxprec_2planes;
|
|
bsd->decimation_tables[decimation_mode_count] = dt;
|
|
|
|
decimation_mode_count++;
|
|
}
|
|
|
|
for (i = 0; i < MAX_DECIMATION_MODES; i++) {
|
|
bsd->decimation_mode_percentile[i] = 1.0f;
|
|
}
|
|
|
|
for (i = decimation_mode_count; i < MAX_DECIMATION_MODES; i++) {
|
|
bsd->permit_encode[i] = 0;
|
|
bsd->decimation_mode_samples[i] = 0;
|
|
bsd->decimation_mode_maxprec_1plane[i] = -1;
|
|
bsd->decimation_mode_maxprec_2planes[i] = -1;
|
|
}
|
|
|
|
bsd->decimation_mode_count = decimation_mode_count;
|
|
|
|
const float *percentiles = ASTC_Encoder::get_2d_percentile_table_host(xdim, ydim);
|
|
|
|
// then construct the list of block formats
|
|
for (i = 0; i < 2048; i++) {
|
|
int is_dual_plane;
|
|
int quantization_mode;
|
|
int fail = 0;
|
|
int permit_encode = 1;
|
|
|
|
if (ASTC_Encoder::decode_block_mode_2d(i, &x_weights, &y_weights, &is_dual_plane, &quantization_mode)) {
|
|
if (x_weights > xdim || y_weights > ydim)
|
|
permit_encode = 0;
|
|
} else {
|
|
fail = 1;
|
|
permit_encode = 0;
|
|
}
|
|
|
|
if (fail) {
|
|
bsd->block_modes[i].decimation_mode = -1;
|
|
bsd->block_modes[i].quantization_mode = -1;
|
|
bsd->block_modes[i].is_dual_plane = -1;
|
|
bsd->block_modes[i].permit_encode = 0;
|
|
bsd->block_modes[i].permit_decode = 0;
|
|
bsd->block_modes[i].percentile = 1.0f;
|
|
} else {
|
|
int decimation_mode = decimation_mode_index[y_weights * 16 + x_weights];
|
|
bsd->block_modes[i].decimation_mode = (uint8_t)decimation_mode;
|
|
bsd->block_modes[i].quantization_mode = (uint8_t)quantization_mode;
|
|
bsd->block_modes[i].is_dual_plane = (uint8_t)is_dual_plane;
|
|
bsd->block_modes[i].permit_encode = (uint8_t)permit_encode;
|
|
bsd->block_modes[i].permit_decode = (uint8_t)permit_encode; // disallow decode of grid size larger than block size.
|
|
bsd->block_modes[i].percentile = percentiles[i];
|
|
|
|
if (bsd->decimation_mode_percentile[decimation_mode] > percentiles[i])
|
|
bsd->decimation_mode_percentile[decimation_mode] = percentiles[i];
|
|
}
|
|
|
|
}
|
|
|
|
if (xdim * ydim <= 64) {
|
|
bsd->texelcount_for_bitmap_partitioning = xdim * ydim;
|
|
for (i = 0; i < xdim * ydim; i++)
|
|
bsd->texels_for_bitmap_partitioning[i] = i;
|
|
}
|
|
|
|
else {
|
|
// pick 64 random texels for use with bitmap partitioning.
|
|
int arr[MAX_TEXELS_PER_BLOCK];
|
|
for (i = 0; i < xdim * ydim; i++)
|
|
arr[i] = 0;
|
|
int arr_elements_set = 0;
|
|
while (arr_elements_set < 64) {
|
|
int idx = rand() % (xdim * ydim);
|
|
if (arr[idx] == 0) {
|
|
arr_elements_set++;
|
|
arr[idx] = 1;
|
|
}
|
|
}
|
|
int texel_weights_written = 0;
|
|
int idx = 0;
|
|
while (texel_weights_written < 64) {
|
|
if (arr[idx])
|
|
bsd->texels_for_bitmap_partitioning[texel_weights_written++] = idx;
|
|
idx++;
|
|
}
|
|
bsd->texelcount_for_bitmap_partitioning = 64;
|
|
|
|
}
|
|
}
|
|
|
|
#ifdef ASTC_ENABLE_3D_SUPPORT
|
|
void construct_block_size_descriptor_3d(int xdim, int ydim, int zdim, block_size_descriptor * bsd) {
|
|
int decimation_mode_index[512]; // for each of the 512 entries in the decim_table_array, its index
|
|
int decimation_mode_count = 0;
|
|
|
|
int i;
|
|
int x_weights;
|
|
int y_weights;
|
|
int z_weights;
|
|
|
|
for (i = 0; i < 512; i++) {
|
|
decimation_mode_index[i] = -1;
|
|
}
|
|
|
|
// gather all the infill-modes that can be used with the current block size
|
|
for (x_weights = 2; x_weights <= 6; x_weights++)
|
|
for (y_weights = 2; y_weights <= 6; y_weights++)
|
|
for (z_weights = 2; z_weights <= 6; z_weights++) {
|
|
if ((x_weights * y_weights * z_weights) > MAX_WEIGHTS_PER_BLOCK)
|
|
continue;
|
|
decimation_table *dt = new decimation_table;
|
|
decimation_mode_index[z_weights * 64 + y_weights * 8 + x_weights] = decimation_mode_count;
|
|
initialize_decimation_table_3d(xdim, ydim, zdim, x_weights, y_weights, z_weights, dt);
|
|
|
|
int weight_count = x_weights * y_weights * z_weights;
|
|
|
|
int maxprec_1plane = -1;
|
|
int maxprec_2planes = -1;
|
|
for (i = 0; i < 12; i++) {
|
|
int bits_1plane = compute_ise_bitcount(weight_count, (quantization_method)i);
|
|
int bits_2planes = compute_ise_bitcount(2 * weight_count, (quantization_method)i);
|
|
if (bits_1plane >= MIN_WEIGHT_BITS_PER_BLOCK && bits_1plane <= MAX_WEIGHT_BITS_PER_BLOCK)
|
|
maxprec_1plane = i;
|
|
if (bits_2planes >= MIN_WEIGHT_BITS_PER_BLOCK && bits_2planes <= MAX_WEIGHT_BITS_PER_BLOCK)
|
|
maxprec_2planes = i;
|
|
}
|
|
bsd->permit_encode[decimation_mode_count] = (x_weights <= xdim && y_weights <= ydim && z_weights <= zdim);
|
|
|
|
bsd->decimation_mode_samples[decimation_mode_count] = weight_count;
|
|
bsd->decimation_mode_maxprec_1plane[decimation_mode_count] = maxprec_1plane;
|
|
bsd->decimation_mode_maxprec_2planes[decimation_mode_count] = maxprec_2planes;
|
|
bsd->decimation_tables[decimation_mode_count] = *dt; // NP code change!
|
|
|
|
decimation_mode_count++;
|
|
}
|
|
|
|
for (i = 0; i < MAX_DECIMATION_MODES; i++) {
|
|
bsd->decimation_mode_percentile[i] = 1.0f;
|
|
}
|
|
|
|
for (i = decimation_mode_count; i < MAX_DECIMATION_MODES; i++) {
|
|
bsd->permit_encode[i] = 0;
|
|
bsd->decimation_mode_samples[i] = 0;
|
|
bsd->decimation_mode_maxprec_1plane[i] = -1;
|
|
bsd->decimation_mode_maxprec_2planes[i] = -1;
|
|
}
|
|
|
|
bsd->decimation_mode_count = decimation_mode_count;
|
|
|
|
const float *percentiles = get_3d_percentile_table(xdim, ydim, zdim);
|
|
|
|
// then construct the list of block formats
|
|
for (i = 0; i < 2048; i++) {
|
|
int is_dual_plane;
|
|
int quantization_mode;
|
|
int fail = 0;
|
|
int permit_encode = 1;
|
|
|
|
if (decode_block_mode_3d(i, &x_weights, &y_weights, &z_weights, &is_dual_plane, &quantization_mode)) {
|
|
if (x_weights > xdim || y_weights > ydim || z_weights > zdim)
|
|
permit_encode = 0;
|
|
} else
|
|
|
|
{
|
|
fail = 1;
|
|
permit_encode = 0;
|
|
}
|
|
if (fail) {
|
|
bsd->block_modes[i].decimation_mode = -1;
|
|
bsd->block_modes[i].quantization_mode = -1;
|
|
bsd->block_modes[i].is_dual_plane = -1;
|
|
bsd->block_modes[i].permit_encode = 0;
|
|
bsd->block_modes[i].permit_decode = 0;
|
|
bsd->block_modes[i].percentile = 1.0f;
|
|
} else {
|
|
int decimation_mode = decimation_mode_index[z_weights * 64 + y_weights * 8 + x_weights];
|
|
bsd->block_modes[i].decimation_mode = (uint8_t)decimation_mode;
|
|
bsd->block_modes[i].quantization_mode = (uint8_t)quantization_mode;
|
|
bsd->block_modes[i].is_dual_plane = (uint8_t)is_dual_plane;
|
|
bsd->block_modes[i].permit_encode = (uint8_t)permit_encode;
|
|
bsd->block_modes[i].permit_decode = (uint8_t)permit_encode;
|
|
bsd->block_modes[i].percentile = percentiles[i];
|
|
|
|
if (bsd->decimation_mode_percentile[decimation_mode] > percentiles[i])
|
|
bsd->decimation_mode_percentile[decimation_mode] = percentiles[i];
|
|
}
|
|
|
|
}
|
|
|
|
if (xdim * ydim * zdim <= 64) {
|
|
bsd->texelcount_for_bitmap_partitioning = xdim * ydim * zdim;
|
|
for (i = 0; i < xdim * ydim * zdim; i++)
|
|
bsd->texels_for_bitmap_partitioning[i] = i;
|
|
}
|
|
|
|
else {
|
|
// pick 64 random texels for use with bitmap partitioning.
|
|
int arr[MAX_TEXELS_PER_BLOCK];
|
|
for (i = 0; i < xdim * ydim * zdim; i++)
|
|
arr[i] = 0;
|
|
int arr_elements_set = 0;
|
|
while (arr_elements_set < 64) {
|
|
int idx = rand() % (xdim * ydim * zdim);
|
|
if (arr[idx] == 0) {
|
|
arr_elements_set++;
|
|
arr[idx] = 1;
|
|
}
|
|
}
|
|
int texel_weights_written = 0;
|
|
int idx = 0;
|
|
while (texel_weights_written < 64) {
|
|
if (arr[idx])
|
|
bsd->texels_for_bitmap_partitioning[texel_weights_written++] = idx;
|
|
idx++;
|
|
}
|
|
bsd->texelcount_for_bitmap_partitioning = 64;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
static block_size_descriptor_cpu *bsd_pointers[4096];
|
|
|
|
// function to obtain a block size descriptor. If the descriptor does not exist,
|
|
// it is created as needed. Should not be called from within multithreaded code.
|
|
block_size_descriptor_cpu *get_block_size_descriptor_cpu(int xdim, int ydim, int zdim) {
|
|
int bsd_index = xdim + (ydim << 4) + (zdim << 8);
|
|
if (bsd_pointers[bsd_index] == NULL) {
|
|
block_size_descriptor_cpu *bsd = new block_size_descriptor_cpu;
|
|
#ifdef ASTC_ENABLE_3D_SUPPORT
|
|
if (zdim > 1)
|
|
construct_block_size_descriptor_3d(xdim, ydim, zdim, bsd);
|
|
else
|
|
#endif
|
|
construct_block_size_descriptor_2d_cpu(xdim, ydim, bsd);
|
|
|
|
bsd_pointers[bsd_index] = bsd;
|
|
}
|
|
return bsd_pointers[bsd_index];
|
|
}
|
|
|
|
void physical_to_symbolic_cpu(int xdim, int ydim, int zdim, physical_compressed_block_cpu pb, symbolic_compressed_block_cpu * res) {
|
|
uint8_t bswapped[16];
|
|
int i, j;
|
|
|
|
res->error_block = 0;
|
|
|
|
// get hold of the block-size descriptor and the decimation tables.
|
|
const block_size_descriptor_cpu *bsd = get_block_size_descriptor_cpu(xdim, ydim, zdim);
|
|
const decimation_table_cpu *const *ixtab2 = bsd->decimation_tables;
|
|
|
|
// extract header fields
|
|
int block_mode = ASTC_Encoder::read_bits(11, 0, pb.data);
|
|
|
|
|
|
if ((block_mode & 0x1FF) == 0x1FC) {
|
|
// void-extent block!
|
|
|
|
// check what format the data has
|
|
if (block_mode & 0x200)
|
|
res->block_mode = -1; // floating-point
|
|
else
|
|
res->block_mode = -2; // unorm16.
|
|
|
|
res->partition_count = 0;
|
|
for (i = 0; i < 4; i++) {
|
|
res->constant_color[i] = pb.data[2 * i + 8] | (pb.data[2 * i + 9] << 8);
|
|
}
|
|
|
|
// additionally, check that the void-extent
|
|
if (zdim == 1) {
|
|
// 2D void-extent
|
|
int rsvbits = ASTC_Encoder::read_bits(2, 10, pb.data);
|
|
if (rsvbits != 3)
|
|
res->error_block = 1;
|
|
|
|
int vx_low_s = ASTC_Encoder::read_bits(8, 12, pb.data) | (ASTC_Encoder::read_bits(5, 12 + 8, pb.data) << 8);
|
|
int vx_high_s = ASTC_Encoder::read_bits(8, 25, pb.data) | (ASTC_Encoder::read_bits(5, 25 + 8, pb.data) << 8);
|
|
int vx_low_t = ASTC_Encoder::read_bits(8, 38, pb.data) | (ASTC_Encoder::read_bits(5, 38 + 8, pb.data) << 8);
|
|
int vx_high_t = ASTC_Encoder::read_bits(8, 51, pb.data) | (ASTC_Encoder::read_bits(5, 51 + 8, pb.data) << 8);
|
|
|
|
int all_ones = vx_low_s == 0x1FFF && vx_high_s == 0x1FFF && vx_low_t == 0x1FFF && vx_high_t == 0x1FFF;
|
|
|
|
if ((vx_low_s >= vx_high_s || vx_low_t >= vx_high_t) && !all_ones)
|
|
res->error_block = 1;
|
|
} else {
|
|
// 3D void-extent
|
|
int vx_low_s = ASTC_Encoder::read_bits(9, 10, pb.data);
|
|
int vx_high_s = ASTC_Encoder::read_bits(9, 19, pb.data);
|
|
int vx_low_t = ASTC_Encoder::read_bits(9, 28, pb.data);
|
|
int vx_high_t = ASTC_Encoder::read_bits(9, 37, pb.data);
|
|
int vx_low_p = ASTC_Encoder::read_bits(9, 46, pb.data);
|
|
int vx_high_p = ASTC_Encoder::read_bits(9, 55, pb.data);
|
|
|
|
int all_ones = vx_low_s == 0x1FF && vx_high_s == 0x1FF && vx_low_t == 0x1FF && vx_high_t == 0x1FF && vx_low_p == 0x1FF && vx_high_p == 0x1FF;
|
|
|
|
if ((vx_low_s >= vx_high_s || vx_low_t >= vx_high_t || vx_low_p >= vx_high_p) && !all_ones)
|
|
res->error_block = 1;
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
if (bsd->block_modes[block_mode].permit_decode == 0) {
|
|
res->error_block = 1;
|
|
return;
|
|
}
|
|
|
|
int weight_count = ixtab2[bsd->block_modes[block_mode].decimation_mode]->num_weights;
|
|
int weight_quantization_method = bsd->block_modes[block_mode].quantization_mode;
|
|
int is_dual_plane = bsd->block_modes[block_mode].is_dual_plane;
|
|
|
|
int real_weight_count = is_dual_plane ? 2 * weight_count : weight_count;
|
|
|
|
int partition_count = ASTC_Encoder::read_bits(2, 11, pb.data) + 1;
|
|
|
|
res->block_mode = block_mode;
|
|
res->partition_count = partition_count;
|
|
|
|
for (i = 0; i < 16; i++)
|
|
bswapped[i] = (uint8_t)ASTC_Encoder::bitrev8(pb.data[15 - i]);
|
|
|
|
int bits_for_weights = ASTC_Encoder::compute_ise_bitcount2(real_weight_count,(ASTC_Encoder::quantization_method)weight_quantization_method);
|
|
|
|
int below_weights_pos = 128 - bits_for_weights;
|
|
|
|
if (is_dual_plane) {
|
|
uint8_t indices[64];
|
|
ASTC_Encoder::decode_ise(weight_quantization_method, real_weight_count, bswapped, indices, 0);
|
|
for (i = 0; i < weight_count; i++) {
|
|
res->plane1_weights[i] = indices[2 * i];
|
|
res->plane2_weights[i] = indices[2 * i + 1];
|
|
}
|
|
} else {
|
|
ASTC_Encoder::decode_ise(weight_quantization_method, weight_count, bswapped, res->plane1_weights, 0);
|
|
}
|
|
|
|
if (is_dual_plane && partition_count == 4)
|
|
res->error_block = 1;
|
|
|
|
|
|
|
|
res->color_formats_matched = 0;
|
|
|
|
// then, determine the format of each endpoint pair
|
|
int color_formats[4];
|
|
int encoded_type_highpart_size = 0;
|
|
if (partition_count == 1) {
|
|
color_formats[0] = ASTC_Encoder::read_bits(4, 13, pb.data);
|
|
res->partition_index = 0;
|
|
} else {
|
|
encoded_type_highpart_size = (3 * partition_count) - 4;
|
|
below_weights_pos -= encoded_type_highpart_size;
|
|
int encoded_type = ASTC_Encoder::read_bits(6, 13 + PARTITION_BITS, pb.data) | (ASTC_Encoder::read_bits(encoded_type_highpart_size, below_weights_pos, pb.data) << 6);
|
|
int baseclass = encoded_type & 0x3;
|
|
if (baseclass == 0) {
|
|
for (i = 0; i < partition_count; i++) {
|
|
color_formats[i] = (encoded_type >> 2) & 0xF;
|
|
}
|
|
below_weights_pos += encoded_type_highpart_size;
|
|
res->color_formats_matched = 1;
|
|
encoded_type_highpart_size = 0;
|
|
} else {
|
|
int bitpos = 2;
|
|
baseclass--;
|
|
for (i = 0; i < partition_count; i++) {
|
|
color_formats[i] = (((encoded_type >> bitpos) & 1) + baseclass) << 2;
|
|
bitpos++;
|
|
}
|
|
for (i = 0; i < partition_count; i++) {
|
|
color_formats[i] |= (encoded_type >> bitpos) & 3;
|
|
bitpos += 2;
|
|
}
|
|
}
|
|
res->partition_index = ASTC_Encoder::read_bits(6, 13, pb.data) | (ASTC_Encoder::read_bits(PARTITION_BITS - 6, 19, pb.data) << 6);
|
|
|
|
}
|
|
for (i = 0; i < partition_count; i++)
|
|
res->color_formats[i] = color_formats[i];
|
|
|
|
|
|
// then, determine the number of integers we need to unpack for the endpoint pairs
|
|
int color_integer_count = 0;
|
|
for (i = 0; i < partition_count; i++) {
|
|
int endpoint_class = color_formats[i] >> 2;
|
|
color_integer_count += (endpoint_class + 1) * 2;
|
|
}
|
|
|
|
if (color_integer_count > 18)
|
|
res->error_block = 1;
|
|
|
|
// then, determine the color endpoint format to use for these integers
|
|
static const int color_bits_arr[5] = { -1, 115 - 4, 113 - 4 - PARTITION_BITS, 113 - 4 - PARTITION_BITS, 113 - 4 - PARTITION_BITS };
|
|
int color_bits = color_bits_arr[partition_count] - bits_for_weights - encoded_type_highpart_size;
|
|
if (is_dual_plane)
|
|
color_bits -= 2;
|
|
if (color_bits < 0)
|
|
color_bits = 0;
|
|
|
|
int color_quantization_level = g_ASTCEncode.quantization_mode_table[color_integer_count >> 1][color_bits];
|
|
res->color_quantization_level = color_quantization_level;
|
|
if (color_quantization_level < 4)
|
|
res->error_block = 1;
|
|
|
|
|
|
// then unpack the integer-bits
|
|
uint8_t values_to_decode[32];
|
|
ASTC_Encoder::decode_ise(color_quantization_level, color_integer_count, pb.data, values_to_decode, (partition_count == 1 ? 17 : 19 + PARTITION_BITS));
|
|
|
|
// and distribute them over the endpoint types
|
|
int valuecount_to_decode = 0;
|
|
|
|
for (i = 0; i < partition_count; i++) {
|
|
int vals = 2 * (color_formats[i] >> 2) + 2;
|
|
for (j = 0; j < vals; j++)
|
|
res->color_values[i][j] = values_to_decode[j + valuecount_to_decode];
|
|
valuecount_to_decode += vals;
|
|
}
|
|
|
|
// get hold of color component for second-plane in the case of dual plane of weightss.
|
|
if (is_dual_plane)
|
|
res->plane2_color_component = ASTC_Encoder::read_bits(2, below_weights_pos - 2, pb.data);
|
|
|
|
}
|
|
|
|
void imageblock_initialize_deriv_from_work_and_orig_cpu(imageblock_cpu * pb, int pixelcount) {
|
|
int i;
|
|
|
|
const float *fptr = pb->orig_data;
|
|
const float *wptr = pb->work_data;
|
|
float *dptr = pb->deriv_data;
|
|
|
|
for (i = 0; i < pixelcount; i++) {
|
|
|
|
// compute derivatives for RGB first
|
|
if (pb->rgb_lns[i]) {
|
|
float r = MAX(fptr[0], 6e-5f);
|
|
float g = MAX(fptr[1], 6e-5f);
|
|
float b = MAX(fptr[2], 6e-5f);
|
|
|
|
float rderiv = (ASTC_Encoder::float_to_lns(r * 1.05f) - ASTC_Encoder::float_to_lns(r)) / (r * 0.05f);
|
|
float gderiv = (ASTC_Encoder::float_to_lns(g * 1.05f) - ASTC_Encoder::float_to_lns(g)) / (g * 0.05f);
|
|
float bderiv = (ASTC_Encoder::float_to_lns(b * 1.05f) - ASTC_Encoder::float_to_lns(b)) / (b * 0.05f);
|
|
|
|
// the derivative may not actually take values smaller than 1/32 or larger than 2^25;
|
|
// if it does, we clamp it.
|
|
if (rderiv < (1.0f / 32.0f))
|
|
rderiv = (1.0f / 32.0f);
|
|
else if (rderiv > 33554432.0f)
|
|
rderiv = 33554432.0f;
|
|
|
|
if (gderiv < (1.0f / 32.0f))
|
|
gderiv = (1.0f / 32.0f);
|
|
else if (gderiv > 33554432.0f)
|
|
gderiv = 33554432.0f;
|
|
|
|
if (bderiv < (1.0f / 32.0f))
|
|
bderiv = (1.0f / 32.0f);
|
|
else if (bderiv > 33554432.0f)
|
|
bderiv = 33554432.0f;
|
|
|
|
dptr[0] = rderiv;
|
|
dptr[1] = gderiv;
|
|
dptr[2] = bderiv;
|
|
} else {
|
|
dptr[0] = 65535.0f;
|
|
dptr[1] = 65535.0f;
|
|
dptr[2] = 65535.0f;
|
|
}
|
|
|
|
|
|
// then compute derivatives for Alpha
|
|
if (pb->alpha_lns[i]) {
|
|
float a = MAX(fptr[3], 6e-5f);
|
|
float aderiv = (ASTC_Encoder::float_to_lns(a * 1.05f) - ASTC_Encoder::float_to_lns(a)) / (a * 0.05f);
|
|
// the derivative may not actually take values smaller than 1/32 or larger than 2^25;
|
|
// if it does, we clamp it.
|
|
if (aderiv < (1.0f / 32.0f))
|
|
aderiv = (1.0f / 32.0f);
|
|
else if (aderiv > 33554432.0f)
|
|
aderiv = 33554432.0f;
|
|
|
|
dptr[3] = aderiv;
|
|
} else {
|
|
dptr[3] = 65535.0f;
|
|
}
|
|
|
|
fptr += 4;
|
|
wptr += 4;
|
|
dptr += 4;
|
|
}
|
|
}
|
|
|
|
// helper function to initialize the work-data from the orig-data
|
|
void imageblock_initialize_work_from_orig_cpu(imageblock_cpu * pb, int pixelcount) {
|
|
int i;
|
|
float *fptr = pb->orig_data;
|
|
float *wptr = pb->work_data;
|
|
|
|
for (i = 0; i < pixelcount; i++) {
|
|
if (pb->rgb_lns[i]) {
|
|
wptr[0] = ASTC_Encoder::float_to_lns(fptr[0]);
|
|
wptr[1] = ASTC_Encoder::float_to_lns(fptr[1]);
|
|
wptr[2] = ASTC_Encoder::float_to_lns(fptr[2]);
|
|
} else {
|
|
wptr[0] = fptr[0] * 65535.0f;
|
|
wptr[1] = fptr[1] * 65535.0f;
|
|
wptr[2] = fptr[2] * 65535.0f;
|
|
}
|
|
|
|
if (pb->alpha_lns[i]) {
|
|
wptr[3] = ASTC_Encoder::float_to_lns(fptr[3]);
|
|
} else {
|
|
wptr[3] = fptr[3] * 65535.0f;
|
|
}
|
|
fptr += 4;
|
|
wptr += 4;
|
|
}
|
|
|
|
imageblock_initialize_deriv_from_work_and_orig_cpu(pb, pixelcount);
|
|
}
|
|
|
|
void update_imageblock_flags_cpu(imageblock_cpu * pb, int xdim, int ydim, int zdim) {
|
|
int i;
|
|
float red_min = 1e38f, red_max = -1e38f;
|
|
float green_min = 1e38f, green_max = -1e38f;
|
|
float blue_min = 1e38f, blue_max = -1e38f;
|
|
float alpha_min = 1e38f, alpha_max = -1e38f;
|
|
|
|
int texels_per_block = xdim * ydim * zdim;
|
|
|
|
int grayscale = 1;
|
|
|
|
for (i = 0; i < texels_per_block; i++) {
|
|
float red = pb->work_data[4 * i];
|
|
float green = pb->work_data[4 * i + 1];
|
|
float blue = pb->work_data[4 * i + 2];
|
|
float alpha = pb->work_data[4 * i + 3];
|
|
if (red < red_min)
|
|
red_min = red;
|
|
if (red > red_max)
|
|
red_max = red;
|
|
if (green < green_min)
|
|
green_min = green;
|
|
if (green > green_max)
|
|
green_max = green;
|
|
if (blue < blue_min)
|
|
blue_min = blue;
|
|
if (blue > blue_max)
|
|
blue_max = blue;
|
|
if (alpha < alpha_min)
|
|
alpha_min = alpha;
|
|
if (alpha > alpha_max)
|
|
alpha_max = alpha;
|
|
|
|
if (grayscale == 1 && (red != green || red != blue))
|
|
grayscale = 0;
|
|
}
|
|
|
|
pb->red_min = red_min;
|
|
pb->red_max = red_max;
|
|
pb->green_min = green_min;
|
|
pb->green_max = green_max;
|
|
pb->blue_min = blue_min;
|
|
pb->blue_max = blue_max;
|
|
pb->alpha_min = alpha_min;
|
|
pb->alpha_max = alpha_max;
|
|
pb->grayscale = grayscale;
|
|
}
|
|
|
|
// fetch an imageblock from the input file.
|
|
void fetch_imageblock_cpu(
|
|
const astc_codec_image_cpu * img,
|
|
imageblock_cpu * pb, // picture-block to imitialize with image data
|
|
// block dimensions
|
|
int xdim, int ydim, int zdim,
|
|
// position in texture.
|
|
int xpos, int ypos, int zpos
|
|
) {
|
|
float *fptr = pb->orig_data;
|
|
int xsize = img->xsize + 2 * img->padding;
|
|
int ysize = img->ysize + 2 * img->padding;
|
|
int zsize = (img->zsize == 1) ? 1 : img->zsize + 2 * img->padding;
|
|
|
|
int x, y, z, i;
|
|
|
|
pb->xpos = xpos;
|
|
pb->ypos = ypos;
|
|
pb->zpos = zpos;
|
|
|
|
xpos += img->padding;
|
|
ypos += img->padding;
|
|
if (img->zsize > 1)
|
|
zpos += img->padding;
|
|
|
|
float data[6];
|
|
data[4] = 0;
|
|
data[5] = 1;
|
|
|
|
if (img->imagedata8) {
|
|
for (z = 0; z < zdim; z++)
|
|
for (y = 0; y < ydim; y++)
|
|
for (x = 0; x < xdim; x++) {
|
|
int xi = xpos + x;
|
|
int yi = ypos + y;
|
|
int zi = zpos + z;
|
|
// clamp XY coordinates to the picture.
|
|
if (xi < 0)
|
|
xi = 0;
|
|
if (yi < 0)
|
|
yi = 0;
|
|
if (zi < 0)
|
|
zi = 0;
|
|
if (xi >= xsize)
|
|
xi = xsize - 1;
|
|
if (yi >= ysize)
|
|
yi = ysize - 1;
|
|
if (zi >= zsize)
|
|
zi = zsize - 1;
|
|
|
|
int r = img->imagedata8[zi][yi][4 * xi];
|
|
int g = img->imagedata8[zi][yi][4 * xi + 1];
|
|
int b = img->imagedata8[zi][yi][4 * xi + 2];
|
|
int a = img->imagedata8[zi][yi][4 * xi + 3];
|
|
|
|
data[0] = r / 255.0f;
|
|
data[1] = g / 255.0f;
|
|
data[2] = b / 255.0f;
|
|
data[3] = a / 255.0f;
|
|
|
|
fptr[0] = data[0];
|
|
fptr[1] = data[1];
|
|
fptr[2] = data[2];
|
|
fptr[3] = data[3];
|
|
fptr += 4;
|
|
}
|
|
}
|
|
|
|
//------------------------------------------
|
|
// HDR currently not supported in code
|
|
/*
|
|
else if (img->imagedata16)
|
|
{
|
|
for (z = 0; z < zdim; z++)
|
|
for (y = 0; y < ydim; y++)
|
|
for (x = 0; x < xdim; x++)
|
|
{
|
|
int xi = xpos + x;
|
|
int yi = ypos + y;
|
|
int zi = zpos + z;
|
|
// clamp XY coordinates to the picture.
|
|
if (xi < 0)
|
|
xi = 0;
|
|
if (yi < 0)
|
|
yi = 0;
|
|
if (zi < 0)
|
|
zi = 0;
|
|
if (xi >= xsize)
|
|
xi = xsize - 1;
|
|
if (yi >= ysize)
|
|
yi = ysize - 1;
|
|
if (zi >= ysize)
|
|
zi = zsize - 1;
|
|
|
|
int r = img->imagedata16[zi][yi][4 * xi];
|
|
int g = img->imagedata16[zi][yi][4 * xi + 1];
|
|
int b = img->imagedata16[zi][yi][4 * xi + 2];
|
|
int a = img->imagedata16[zi][yi][4 * xi + 3];
|
|
|
|
float rf = sf16_to_float((sf16)r);
|
|
float gf = sf16_to_float((sf16)g);
|
|
float bf = sf16_to_float((sf16)b);
|
|
float af = sf16_to_float((sf16)a);
|
|
|
|
// equalize the color components somewhat, and get rid of negative values.
|
|
|
|
rf = MAX(rf, 1e-8f);
|
|
gf = MAX(gf, 1e-8f);
|
|
bf = MAX(bf, 1e-8f);
|
|
af = MAX(af, 1e-8f);
|
|
|
|
data[0] = rf;
|
|
data[1] = gf;
|
|
data[2] = bf;
|
|
data[3] = af;
|
|
|
|
fptr[0] = data[0];
|
|
fptr[1] = data[1];
|
|
fptr[2] = data[2];
|
|
fptr[3] = data[3];
|
|
fptr += 4;
|
|
}
|
|
}
|
|
*/
|
|
|
|
int pixelcount = xdim * ydim * zdim;
|
|
|
|
// impose the choice on every pixel when encoding.
|
|
for (i = 0; i < pixelcount; i++) {
|
|
pb->rgb_lns[i] = (uint8_t)g_ASTCEncode.m_rgb_force_use_of_hdr;
|
|
pb->alpha_lns[i] = (uint8_t)g_ASTCEncode.m_alpha_force_use_of_hdr;
|
|
pb->nan_texel[i] = 0;
|
|
}
|
|
|
|
imageblock_initialize_work_from_orig_cpu(pb, pixelcount);
|
|
update_imageblock_flags_cpu(pb, xdim, ydim, zdim);
|
|
}
|
|
|
|
void destroy_image_cpu(astc_codec_image_cpu * img) {
|
|
if (img == NULL)
|
|
return;
|
|
|
|
if (img->imagedata8) {
|
|
delete[]img->imagedata8[0][0];
|
|
delete[]img->imagedata8[0];
|
|
delete[]img->imagedata8;
|
|
}
|
|
if (img->imagedata16) {
|
|
delete[]img->imagedata16[0][0];
|
|
delete[]img->imagedata16[0];
|
|
delete[]img->imagedata16;
|
|
}
|
|
delete img;
|
|
}
|
|
|
|
astc_codec_image_cpu *allocate_image_cpu(int bitness, int xsize, int ysize, int zsize, int padding) {
|
|
int i, j;
|
|
astc_codec_image_cpu *img = new astc_codec_image_cpu;
|
|
img->xsize = xsize;
|
|
img->ysize = ysize;
|
|
img->zsize = zsize;
|
|
img->padding = padding;
|
|
|
|
int exsize = xsize + 2 * padding;
|
|
int eysize = ysize + 2 * padding;
|
|
int ezsize = (zsize == 1) ? 1 : zsize + 2 * padding;
|
|
|
|
if (bitness == 8) {
|
|
img->imagedata8 = new uint8_t **[ezsize];
|
|
img->imagedata8[0] = new uint8_t *[ezsize * eysize];
|
|
img->imagedata8[0][0] = new uint8_t[4 * ezsize * eysize * exsize];
|
|
for (i = 1; i < ezsize; i++) {
|
|
img->imagedata8[i] = img->imagedata8[0] + i * eysize;
|
|
img->imagedata8[i][0] = img->imagedata8[0][0] + 4 * i * exsize * eysize;
|
|
}
|
|
for (i = 0; i < ezsize; i++)
|
|
for (j = 1; j < eysize; j++)
|
|
img->imagedata8[i][j] = img->imagedata8[i][0] + 4 * j * exsize;
|
|
|
|
img->imagedata16 = NULL;
|
|
}
|
|
|
|
else if (bitness == 16) {
|
|
img->imagedata16 = new uint16_t **[ezsize];
|
|
img->imagedata16[0] = new uint16_t *[ezsize * eysize];
|
|
img->imagedata16[0][0] = new uint16_t[4 * ezsize * eysize * exsize];
|
|
for (i = 1; i < ezsize; i++) {
|
|
img->imagedata16[i] = img->imagedata16[0] + i * eysize;
|
|
img->imagedata16[i][0] = img->imagedata16[0][0] + 4 * i * exsize * eysize;
|
|
}
|
|
for (i = 0; i < ezsize; i++)
|
|
for (j = 1; j < eysize; j++)
|
|
img->imagedata16[i][j] = img->imagedata16[i][0] + 4 * j * exsize;
|
|
|
|
img->imagedata8 = NULL;
|
|
} else {
|
|
return nullptr;
|
|
}
|
|
|
|
return img;
|
|
}
|
|
|
|
void initialize_image_cpu(astc_codec_image_cpu * img) {
|
|
int x, y, z;
|
|
|
|
int exsize = img->xsize + 2 * img->padding;
|
|
int eysize = img->ysize + 2 * img->padding;
|
|
int ezsize = (img->zsize == 1) ? 1 : img->zsize + 2 * img->padding;
|
|
|
|
if (img->imagedata8) {
|
|
for (z = 0; z < ezsize; z++)
|
|
for (y = 0; y < eysize; y++)
|
|
for (x = 0; x < exsize; x++) {
|
|
img->imagedata8[z][y][4 * x] = 0;
|
|
img->imagedata8[z][y][4 * x + 1] = 0;
|
|
img->imagedata8[z][y][4 * x + 2] = 0;
|
|
img->imagedata8[z][y][4 * x + 3] = 0xFF;
|
|
}
|
|
} else if (img->imagedata16) {
|
|
for (z = 0; z < ezsize; z++)
|
|
for (y = 0; y < eysize; y++)
|
|
for (x = 0; x < exsize; x++) {
|
|
img->imagedata16[z][y][4 * x] = 0;
|
|
img->imagedata16[z][y][4 * x + 1] = 0;
|
|
img->imagedata16[z][y][4 * x + 2] = 0;
|
|
img->imagedata16[z][y][4 * x + 3] = 0x3C00;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
void write_imageblock_cpu(astc_codec_image_cpu * img, const imageblock_cpu * pb,
|
|
int xdim, int ydim, int zdim,
|
|
int xpos, int ypos, int zpos, swizzlepattern_cpu swz) {
|
|
const float *fptr = pb->orig_data;
|
|
const uint8_t *nptr = pb->nan_texel;
|
|
int xsize = img->xsize;
|
|
int ysize = img->ysize;
|
|
int zsize = img->zsize;
|
|
int x, y, z;
|
|
|
|
|
|
float data[7];
|
|
data[4] = 0.0f;
|
|
data[5] = 1.0f;
|
|
|
|
|
|
if (img->imagedata8) {
|
|
for (z = 0; z < zdim; z++)
|
|
for (y = 0; y < ydim; y++)
|
|
for (x = 0; x < xdim; x++) {
|
|
int xi = xpos + x;
|
|
int yi = ypos + y;
|
|
int zi = zpos + z;
|
|
|
|
if (xi >= 0 && yi >= 0 && zi >= 0 && xi < xsize && yi < ysize && zi < zsize) {
|
|
if (*nptr) {
|
|
// NaN-pixel, but we can't display it. Display purple instead.
|
|
img->imagedata8[zi][yi][4 * xi] = 0xFF;
|
|
img->imagedata8[zi][yi][4 * xi + 1] = 0x00;
|
|
img->imagedata8[zi][yi][4 * xi + 2] = 0xFF;
|
|
img->imagedata8[zi][yi][4 * xi + 3] = 0xFF;
|
|
}
|
|
|
|
else {
|
|
#ifdef USE_PERFORMM_SRGB_TRANSFORM
|
|
// apply swizzle
|
|
if (g_ASTCEncode.m_perform_srgb_transform) {
|
|
float r = fptr[0];
|
|
float g = fptr[1];
|
|
float b = fptr[2];
|
|
|
|
if (r <= 0.0031308f)
|
|
r = r * 12.92f;
|
|
else if (r <= 1)
|
|
r = (float)(1.055f * pow(r, (1.0f / 2.4f)) - 0.055f);
|
|
|
|
if (g <= 0.0031308f)
|
|
g = g * 12.92f;
|
|
else if (g <= 1)
|
|
g = (float)(1.055f * pow(g, (1.0f / 2.4f)) - 0.055f);
|
|
|
|
if (b <= 0.0031308f)
|
|
b = b * 12.92f;
|
|
else if (b <= 1)
|
|
b = (float)(1.055f * pow(b, (1.0f / 2.4f)) - 0.055f);
|
|
|
|
data[0] = r;
|
|
data[1] = g;
|
|
data[2] = b;
|
|
} else
|
|
#endif
|
|
{
|
|
|
|
float r = fptr[0];
|
|
float g = fptr[1];
|
|
float b = fptr[2];
|
|
|
|
data[0] = r;
|
|
data[1] = g;
|
|
data[2] = b;
|
|
}
|
|
data[3] = fptr[3];
|
|
|
|
|
|
|
|
|
|
float xcoord = (data[0] * 2.0f) - 1.0f;
|
|
float ycoord = (data[3] * 2.0f) - 1.0f;
|
|
float zcoord = 1.0f - xcoord * xcoord - ycoord * ycoord;
|
|
if (zcoord < 0.0f)
|
|
zcoord = 0.0f;
|
|
data[6] = float((sqrt(zcoord) * 0.5f) + 0.5f);
|
|
|
|
// clamp to [0,1]
|
|
if (data[0] > 1.0f)
|
|
data[0] = 1.0f;
|
|
if (data[1] > 1.0f)
|
|
data[1] = 1.0f;
|
|
if (data[2] > 1.0f)
|
|
data[2] = 1.0f;
|
|
if (data[3] > 1.0f)
|
|
data[3] = 1.0f;
|
|
|
|
|
|
// pack the data
|
|
int ri = static_cast < int >(floor(data[swz.r] * 255.0f + 0.5f));
|
|
int gi = static_cast < int >(floor(data[swz.g] * 255.0f + 0.5f));
|
|
int bi = static_cast < int >(floor(data[swz.b] * 255.0f + 0.5f));
|
|
int ai = static_cast < int >(floor(data[swz.a] * 255.0f + 0.5f));
|
|
|
|
img->imagedata8[zi][yi][4 * xi] = (uint8_t)ri;
|
|
img->imagedata8[zi][yi][4 * xi + 1] = (uint8_t)gi;
|
|
img->imagedata8[zi][yi][4 * xi + 2] = (uint8_t)bi;
|
|
img->imagedata8[zi][yi][4 * xi + 3] = (uint8_t)ai;
|
|
}
|
|
}
|
|
fptr += 4;
|
|
nptr++;
|
|
}
|
|
} else if (img->imagedata16) {
|
|
for (z = 0; z < zdim; z++)
|
|
for (y = 0; y < ydim; y++)
|
|
for (x = 0; x < xdim; x++) {
|
|
int xi = xpos + x;
|
|
int yi = ypos + y;
|
|
int zi = zpos + z;
|
|
|
|
if (xi >= 0 && yi >= 0 && zi >= 0 && xi < xsize && yi < ysize && zi < zsize) {
|
|
if (*nptr) {
|
|
img->imagedata16[zi][yi][4 * xi] = 0xFFFF;
|
|
img->imagedata16[zi][yi][4 * xi + 1] = 0xFFFF;
|
|
img->imagedata16[zi][yi][4 * xi + 2] = 0xFFFF;
|
|
img->imagedata16[zi][yi][4 * xi + 3] = 0xFFFF;
|
|
}
|
|
|
|
else {
|
|
#ifdef USE_PERFORMM_SRGB_TRANSFORM
|
|
// apply swizzle
|
|
if (g_ASTCEncode.m_perform_srgb_transform) {
|
|
float r = fptr[0];
|
|
float g = fptr[1];
|
|
float b = fptr[2];
|
|
|
|
if (r <= 0.0031308f)
|
|
r = r * 12.92f;
|
|
else if (r <= 1)
|
|
r = (float)(1.055f * pow(r, (1.0f / 2.4f)) - 0.055f);
|
|
if (g <= 0.0031308f)
|
|
g = g * 12.92f;
|
|
else if (g <= 1)
|
|
g = (float)(1.055f * pow(g, (1.0f / 2.4f)) - 0.055f);
|
|
if (b <= 0.0031308f)
|
|
b = b * 12.92f;
|
|
else if (b <= 1)
|
|
b = (float)(1.055f * pow(b, (1.0f / 2.4f)) - 0.055f);
|
|
|
|
data[0] = r;
|
|
data[1] = g;
|
|
data[2] = b;
|
|
} else
|
|
#endif
|
|
{
|
|
data[0] = fptr[0];
|
|
data[1] = fptr[1];
|
|
data[2] = fptr[2];
|
|
}
|
|
data[3] = fptr[3];
|
|
|
|
float x1 = (data[0] * 2.0f) - 1.0f;
|
|
float y1 = (data[3] * 2.0f) - 1.0f;
|
|
float z1 = 1.0f - x1 * x1 - y1 * y1;
|
|
if (z1 < 0.0f)
|
|
z1 = 0.0f;
|
|
data[6] = (float)((sqrt(z1) * 0.5f) + 0.5f);
|
|
|
|
|
|
int r = ASTC_Encoder::float_to_sf16(data[swz.r], ASTC_Encoder::SF_NEARESTEVEN);
|
|
int g = ASTC_Encoder::float_to_sf16(data[swz.g], ASTC_Encoder::SF_NEARESTEVEN);
|
|
int b = ASTC_Encoder::float_to_sf16(data[swz.b], ASTC_Encoder::SF_NEARESTEVEN);
|
|
int a = ASTC_Encoder::float_to_sf16(data[swz.a], ASTC_Encoder::SF_NEARESTEVEN);
|
|
img->imagedata16[zi][yi][4 * xi] = (uint16_t)r;
|
|
img->imagedata16[zi][yi][4 * xi + 1] = (uint16_t)g;
|
|
img->imagedata16[zi][yi][4 * xi + 2] = (uint16_t)b;
|
|
img->imagedata16[zi][yi][4 * xi + 3] = (uint16_t)a;
|
|
}
|
|
}
|
|
fptr += 4;
|
|
nptr++;
|
|
}
|
|
}
|
|
}
|
|
|
|
uint32_t hash52_cpu(uint32_t inp) {
|
|
inp ^= inp >> 15;
|
|
|
|
inp *= 0xEEDE0891; // (2^4+1)*(2^7+1)*(2^17-1)
|
|
inp ^= inp >> 5;
|
|
inp += inp << 16;
|
|
inp ^= inp >> 7;
|
|
inp ^= inp >> 3;
|
|
inp ^= inp << 6;
|
|
inp ^= inp >> 17;
|
|
return inp;
|
|
}
|
|
|
|
int select_partition_cpu(int seed, int x, int y, int z, int partitioncount, int small_block) {
|
|
if (small_block) {
|
|
x <<= 1;
|
|
y <<= 1;
|
|
z <<= 1;
|
|
}
|
|
|
|
seed += (partitioncount - 1) * 1024;
|
|
|
|
uint32_t rnum = hash52_cpu(seed);
|
|
|
|
uint8_t seed1 = rnum & 0xF;
|
|
uint8_t seed2 = (rnum >> 4) & 0xF;
|
|
uint8_t seed3 = (rnum >> 8) & 0xF;
|
|
uint8_t seed4 = (rnum >> 12) & 0xF;
|
|
uint8_t seed5 = (rnum >> 16) & 0xF;
|
|
uint8_t seed6 = (rnum >> 20) & 0xF;
|
|
uint8_t seed7 = (rnum >> 24) & 0xF;
|
|
uint8_t seed8 = (rnum >> 28) & 0xF;
|
|
uint8_t seed9 = (rnum >> 18) & 0xF;
|
|
uint8_t seed10 = (rnum >> 22) & 0xF;
|
|
uint8_t seed11 = (rnum >> 26) & 0xF;
|
|
uint8_t seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF;
|
|
|
|
// squaring all the seeds in order to bias their distribution
|
|
// towards lower values.
|
|
seed1 *= seed1;
|
|
seed2 *= seed2;
|
|
seed3 *= seed3;
|
|
seed4 *= seed4;
|
|
seed5 *= seed5;
|
|
seed6 *= seed6;
|
|
seed7 *= seed7;
|
|
seed8 *= seed8;
|
|
seed9 *= seed9;
|
|
seed10 *= seed10;
|
|
seed11 *= seed11;
|
|
seed12 *= seed12;
|
|
|
|
|
|
int sh1, sh2, sh3;
|
|
if (seed & 1) {
|
|
sh1 = (seed & 2 ? 4 : 5);
|
|
sh2 = (partitioncount == 3 ? 6 : 5);
|
|
} else {
|
|
sh1 = (partitioncount == 3 ? 6 : 5);
|
|
sh2 = (seed & 2 ? 4 : 5);
|
|
}
|
|
sh3 = (seed & 0x10) ? sh1 : sh2;
|
|
|
|
seed1 >>= sh1;
|
|
seed2 >>= sh2;
|
|
seed3 >>= sh1;
|
|
seed4 >>= sh2;
|
|
seed5 >>= sh1;
|
|
seed6 >>= sh2;
|
|
seed7 >>= sh1;
|
|
seed8 >>= sh2;
|
|
|
|
seed9 >>= sh3;
|
|
seed10 >>= sh3;
|
|
seed11 >>= sh3;
|
|
seed12 >>= sh3;
|
|
|
|
|
|
|
|
int a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14);
|
|
int b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10);
|
|
int c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6);
|
|
int d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2);
|
|
|
|
|
|
// apply the saw
|
|
a &= 0x3F;
|
|
b &= 0x3F;
|
|
c &= 0x3F;
|
|
d &= 0x3F;
|
|
|
|
// remove some of the components of we are to output < 4 partitions.
|
|
if (partitioncount <= 3)
|
|
d = 0;
|
|
if (partitioncount <= 2)
|
|
c = 0;
|
|
if (partitioncount <= 1)
|
|
b = 0;
|
|
|
|
int partition;
|
|
if (a >= b && a >= c && a >= d)
|
|
partition = 0;
|
|
else if (b >= c && b >= d)
|
|
partition = 1;
|
|
else if (c >= d)
|
|
partition = 2;
|
|
else
|
|
partition = 3;
|
|
return partition;
|
|
}
|
|
|
|
void generate_one_partition_table_cpu(int xdim, int ydim, int zdim, int partition_count, int partition_index, partition_info_cpu * pt) {
|
|
int small_block = (xdim * ydim * zdim) < 32;
|
|
|
|
uint8_t *partition_of_texel = pt->partition_of_texel;
|
|
int x, y, z, i;
|
|
|
|
|
|
for (z = 0; z < zdim; z++)
|
|
for (y = 0; y < ydim; y++)
|
|
for (x = 0; x < xdim; x++) {
|
|
uint8_t part = (uint8_t)select_partition_cpu(partition_index, x, y, z, partition_count, small_block);
|
|
*partition_of_texel++ = part;
|
|
}
|
|
|
|
|
|
int texels_per_block = xdim * ydim * zdim;
|
|
|
|
int counts[4];
|
|
for (i = 0; i < 4; i++)
|
|
counts[i] = 0;
|
|
|
|
for (i = 0; i < texels_per_block; i++) {
|
|
int partition = pt->partition_of_texel[i];
|
|
pt->texels_of_partition[partition][counts[partition]++] = (uint8_t)i;
|
|
}
|
|
|
|
for (i = 0; i < 4; i++)
|
|
pt->texels_per_partition[i] = (uint8_t)counts[i];
|
|
|
|
if (counts[0] == 0)
|
|
pt->partition_count = 0;
|
|
else if (counts[1] == 0)
|
|
pt->partition_count = 1;
|
|
else if (counts[2] == 0)
|
|
pt->partition_count = 2;
|
|
else if (counts[3] == 0)
|
|
pt->partition_count = 3;
|
|
else
|
|
pt->partition_count = 4;
|
|
|
|
|
|
|
|
for (i = 0; i < 4; i++)
|
|
pt->coverage_bitmaps[i] = 0ULL;
|
|
|
|
const block_size_descriptor_cpu *bsd = get_block_size_descriptor_cpu(xdim, ydim, zdim);
|
|
int texels_to_process = bsd->texelcount_for_bitmap_partitioning;
|
|
for (i = 0; i < texels_to_process; i++) {
|
|
pt->coverage_bitmaps[pt->partition_of_texel[i]] |= 1ULL << i;
|
|
}
|
|
|
|
}
|
|
|
|
void imageblock_initialize_orig_from_work_cpu(imageblock_cpu * pb, int pixelcount) {
|
|
int i;
|
|
float *fptr = pb->orig_data;
|
|
float *wptr = pb->work_data;
|
|
|
|
for (i = 0; i < pixelcount; i++) {
|
|
if (pb->rgb_lns[i]) {
|
|
fptr[0] = ASTC_Encoder::sf16_to_float(ASTC_Encoder::lns_to_sf16((uint16_t)wptr[0]));
|
|
fptr[1] = ASTC_Encoder::sf16_to_float(ASTC_Encoder::lns_to_sf16((uint16_t)wptr[1]));
|
|
fptr[2] = ASTC_Encoder::sf16_to_float(ASTC_Encoder::lns_to_sf16((uint16_t)wptr[2]));
|
|
} else {
|
|
fptr[0] = ASTC_Encoder::sf16_to_float(ASTC_Encoder::unorm16_to_sf16((uint16_t)wptr[0]));
|
|
fptr[1] = ASTC_Encoder::sf16_to_float(ASTC_Encoder::unorm16_to_sf16((uint16_t)wptr[1]));
|
|
fptr[2] = ASTC_Encoder::sf16_to_float(ASTC_Encoder::unorm16_to_sf16((uint16_t)wptr[2]));
|
|
}
|
|
|
|
if (pb->alpha_lns[i]) {
|
|
fptr[3] = ASTC_Encoder::sf16_to_float(ASTC_Encoder::lns_to_sf16((uint16_t)wptr[3]));
|
|
} else {
|
|
fptr[3] = ASTC_Encoder::sf16_to_float(ASTC_Encoder::unorm16_to_sf16((uint16_t)wptr[3]));
|
|
}
|
|
|
|
fptr += 4;
|
|
wptr += 4;
|
|
}
|
|
|
|
imageblock_initialize_deriv_from_work_and_orig_cpu(pb, pixelcount);
|
|
}
|
|
|
|
void unpack_color_endpoints_cpu(ASTC_Encoder::astc_decode_mode decode_mode, int format, int quantization_level, int *input, int *rgb_hdr, int *alpha_hdr, int *nan_endpoint, ASTC_Encoder::ushort4 * output0, ASTC_Encoder::ushort4 * output1) {
|
|
*nan_endpoint = 0;
|
|
|
|
switch (format) {
|
|
case ASTC_Encoder::FMT_LUMINANCE:
|
|
*rgb_hdr = 0;
|
|
*alpha_hdr = 0;
|
|
ASTC_Encoder::luminance_unpack(input, quantization_level, output0, output1);
|
|
break;
|
|
|
|
case ASTC_Encoder::FMT_LUMINANCE_DELTA:
|
|
*rgb_hdr = 0;
|
|
*alpha_hdr = 0;
|
|
ASTC_Encoder::luminance_delta_unpack(input, quantization_level, output0, output1);
|
|
break;
|
|
|
|
case ASTC_Encoder::FMT_HDR_LUMINANCE_SMALL_RANGE:
|
|
*rgb_hdr = 1;
|
|
*alpha_hdr = -1;
|
|
ASTC_Encoder::hdr_luminance_small_range_unpack(input, quantization_level, output0, output1);
|
|
break;
|
|
|
|
case ASTC_Encoder::FMT_HDR_LUMINANCE_LARGE_RANGE:
|
|
*rgb_hdr = 1;
|
|
*alpha_hdr = -1;
|
|
ASTC_Encoder::hdr_luminance_large_range_unpack(input, quantization_level, output0, output1);
|
|
break;
|
|
|
|
case ASTC_Encoder::FMT_LUMINANCE_ALPHA:
|
|
*rgb_hdr = 0;
|
|
*alpha_hdr = 0;
|
|
ASTC_Encoder::luminance_alpha_unpack(input, quantization_level, output0, output1);
|
|
break;
|
|
|
|
case ASTC_Encoder::FMT_LUMINANCE_ALPHA_DELTA:
|
|
*rgb_hdr = 0;
|
|
*alpha_hdr = 0;
|
|
ASTC_Encoder::luminance_alpha_delta_unpack(input, quantization_level, output0, output1);
|
|
break;
|
|
|
|
case ASTC_Encoder::FMT_RGB_SCALE:
|
|
*rgb_hdr = 0;
|
|
*alpha_hdr = 0;
|
|
ASTC_Encoder::rgb_scale_unpack(input, quantization_level, output0, output1);
|
|
break;
|
|
|
|
case ASTC_Encoder::FMT_RGB_SCALE_ALPHA:
|
|
*rgb_hdr = 0;
|
|
*alpha_hdr = 0;
|
|
ASTC_Encoder::rgb_scale_alpha_unpack(input, quantization_level, output0, output1);
|
|
break;
|
|
|
|
case ASTC_Encoder::FMT_HDR_RGB_SCALE:
|
|
*rgb_hdr = 1;
|
|
*alpha_hdr = -1;
|
|
ASTC_Encoder::hdr_rgbo_unpack3(input, quantization_level, output0, output1);
|
|
break;
|
|
|
|
case ASTC_Encoder::FMT_RGB:
|
|
*rgb_hdr = 0;
|
|
*alpha_hdr = 0;
|
|
ASTC_Encoder::rgb_unpack(input, quantization_level, output0, output1);
|
|
break;
|
|
|
|
case ASTC_Encoder::FMT_RGB_DELTA:
|
|
*rgb_hdr = 0;
|
|
*alpha_hdr = 0;
|
|
ASTC_Encoder::rgb_delta_unpack(input, quantization_level, output0, output1);
|
|
break;
|
|
|
|
case ASTC_Encoder::FMT_HDR_RGB:
|
|
*rgb_hdr = 1;
|
|
*alpha_hdr = -1;
|
|
ASTC_Encoder::hdr_rgb_unpack3(input, quantization_level, output0, output1);
|
|
break;
|
|
|
|
case ASTC_Encoder::FMT_RGBA:
|
|
*rgb_hdr = 0;
|
|
*alpha_hdr = 0;
|
|
ASTC_Encoder::rgba_unpack(input, quantization_level, output0, output1);
|
|
break;
|
|
|
|
case ASTC_Encoder::FMT_RGBA_DELTA:
|
|
*rgb_hdr = 0;
|
|
*alpha_hdr = 0;
|
|
ASTC_Encoder::rgba_delta_unpack(input, quantization_level, output0, output1);
|
|
break;
|
|
|
|
case ASTC_Encoder::FMT_HDR_RGB_LDR_ALPHA:
|
|
*rgb_hdr = 1;
|
|
*alpha_hdr = 0;
|
|
ASTC_Encoder::hdr_rgb_ldr_alpha_unpack3(input, quantization_level, output0, output1);
|
|
break;
|
|
|
|
case ASTC_Encoder::FMT_HDR_RGBA:
|
|
*rgb_hdr = 1;
|
|
*alpha_hdr = 1;
|
|
ASTC_Encoder::hdr_rgb_hdr_alpha_unpack3(input, quantization_level, output0, output1);
|
|
break;
|
|
|
|
default:
|
|
break;;
|
|
}
|
|
|
|
|
|
|
|
if (*alpha_hdr == -1) {
|
|
if (g_ASTCEncode.m_alpha_force_use_of_hdr) {
|
|
output0->w = 0x7800;
|
|
output1->w = 0x7800;
|
|
*alpha_hdr = 1;
|
|
} else {
|
|
output0->w = 0x00FF;
|
|
output1->w = 0x00FF;
|
|
*alpha_hdr = 0;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
switch (decode_mode) {
|
|
case ASTC_Encoder::DECODE_LDR_SRGB:
|
|
if (*rgb_hdr == 1) {
|
|
output0->x = 0xFF00;
|
|
output0->y = 0x0000;
|
|
output0->z = 0xFF00;
|
|
output0->w = 0xFF00;
|
|
output1->x = 0xFF00;
|
|
output1->y = 0x0000;
|
|
output1->z = 0xFF00;
|
|
output1->w = 0xFF00;
|
|
} else {
|
|
output0->x *= 257;
|
|
output0->y *= 257;
|
|
output0->z *= 257;
|
|
output0->w *= 257;
|
|
output1->x *= 257;
|
|
output1->y *= 257;
|
|
output1->z *= 257;
|
|
output1->w *= 257;
|
|
}
|
|
*rgb_hdr = 0;
|
|
*alpha_hdr = 0;
|
|
break;
|
|
|
|
case ASTC_Encoder::DECODE_LDR:
|
|
if (*rgb_hdr == 1) {
|
|
output0->x = 0xFFFF;
|
|
output0->y = 0xFFFF;
|
|
output0->z = 0xFFFF;
|
|
output0->w = 0xFFFF;
|
|
output1->x = 0xFFFF;
|
|
output1->y = 0xFFFF;
|
|
output1->z = 0xFFFF;
|
|
output1->w = 0xFFFF;
|
|
*nan_endpoint = 1;
|
|
} else {
|
|
output0->x *= 257;
|
|
output0->y *= 257;
|
|
output0->z *= 257;
|
|
output0->w *= 257;
|
|
output1->x *= 257;
|
|
output1->y *= 257;
|
|
output1->z *= 257;
|
|
output1->w *= 257;
|
|
}
|
|
*rgb_hdr = 0;
|
|
*alpha_hdr = 0;
|
|
break;
|
|
|
|
case ASTC_Encoder::DECODE_HDR:
|
|
|
|
if (*rgb_hdr == 0) {
|
|
output0->x *= 257;
|
|
output0->y *= 257;
|
|
output0->z *= 257;
|
|
output1->x *= 257;
|
|
output1->y *= 257;
|
|
output1->z *= 257;
|
|
}
|
|
if (*alpha_hdr == 0) {
|
|
output0->w *= 257;
|
|
output1->w *= 257;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
|
|
ASTC_Encoder::ushort4 lerp_color_int(ASTC_Encoder::astc_decode_mode decode_mode, ASTC_Encoder::ushort4 color0, ASTC_Encoder::ushort4 color1, int weight, int plane2_weight, int plane2_color_component // -1 in 1-plane mode
|
|
) {
|
|
ASTC_Encoder::int4 ecolor0 = ASTC_Encoder::int4(color0.x, color0.y, color0.z, color0.w);
|
|
ASTC_Encoder::int4 ecolor1 = ASTC_Encoder::int4(color1.x, color1.y, color1.z, color1.w);
|
|
|
|
ASTC_Encoder::int4 eweight1 = ASTC_Encoder::int4(weight, weight, weight, weight);
|
|
switch (plane2_color_component) {
|
|
case 0:
|
|
eweight1.x = plane2_weight;
|
|
break;
|
|
case 1:
|
|
eweight1.y = plane2_weight;
|
|
break;
|
|
case 2:
|
|
eweight1.z = plane2_weight;
|
|
break;
|
|
case 3:
|
|
eweight1.w = plane2_weight;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
ASTC_Encoder::int4 eweight0 = ASTC_Encoder::int4(64, 64, 64, 64) - eweight1;
|
|
|
|
if (decode_mode == ASTC_Encoder::DECODE_LDR_SRGB) {
|
|
ecolor0 = ecolor0 >> 8;
|
|
ecolor1 = ecolor1 >> 8;
|
|
}
|
|
|
|
ASTC_Encoder::int4 color = (ecolor0 * eweight0) + (ecolor1 * eweight1) + ASTC_Encoder::int4(32, 32, 32, 32);
|
|
color = color >> 6;
|
|
if (decode_mode == ASTC_Encoder::DECODE_LDR_SRGB)
|
|
color = color | (color << 8);
|
|
|
|
ASTC_Encoder::ushort4 rcolor = ASTC_Encoder::ushort4((ASTC_Encoder::ushort)color.x, (ASTC_Encoder::ushort)color.y, (ASTC_Encoder::ushort)color.z, (ASTC_Encoder::ushort)color.w);
|
|
return rcolor;
|
|
}
|
|
|
|
int compute_value_of_texel_int_cpu(int texel_to_get, const decimation_table_cpu * it, const int *weights) {
|
|
int i;
|
|
int summed_value = 8;
|
|
int weights_to_evaluate = it->texel_num_weights[texel_to_get];
|
|
for (i = 0; i < weights_to_evaluate; i++) {
|
|
summed_value += weights[it->texel_weights[texel_to_get][i]] * it->texel_weights_int[texel_to_get][i];
|
|
}
|
|
return summed_value >> 4;
|
|
}
|
|
|
|
void decompress_symbolic_block_cpu(ASTC_Encoder::astc_decode_mode decode_mode,
|
|
int xdim, int ydim, int zdim, // dimensions of block
|
|
int xpos, int ypos, int zpos, // position of block
|
|
symbolic_compressed_block_cpu * scb,
|
|
imageblock_cpu * blk) {
|
|
blk->xpos = xpos;
|
|
blk->ypos = ypos;
|
|
blk->zpos = zpos;
|
|
|
|
int i;
|
|
|
|
// if we detected an error-block, blow up immediately.
|
|
if (scb->error_block) {
|
|
if (decode_mode == ASTC_Encoder::DECODE_LDR_SRGB) {
|
|
for (i = 0; i < xdim * ydim * zdim; i++) {
|
|
blk->orig_data[4 * i] = 1.0f;
|
|
blk->orig_data[4 * i + 1] = 0.0f;
|
|
blk->orig_data[4 * i + 2] = 1.0f;
|
|
blk->orig_data[4 * i + 3] = 1.0f;
|
|
blk->rgb_lns[i] = 0;
|
|
blk->alpha_lns[i] = 0;
|
|
blk->nan_texel[i] = 0;
|
|
}
|
|
} else {
|
|
for (i = 0; i < xdim * ydim * zdim; i++) {
|
|
blk->orig_data[4 * i] = 0.0f;
|
|
blk->orig_data[4 * i + 1] = 0.0f;
|
|
blk->orig_data[4 * i + 2] = 0.0f;
|
|
blk->orig_data[4 * i + 3] = 0.0f;
|
|
blk->rgb_lns[i] = 0;
|
|
blk->alpha_lns[i] = 0;
|
|
blk->nan_texel[i] = 1;
|
|
}
|
|
}
|
|
|
|
imageblock_initialize_work_from_orig_cpu(blk, xdim * ydim * zdim);
|
|
update_imageblock_flags_cpu(blk, xdim, ydim, zdim);
|
|
return;
|
|
}
|
|
|
|
if (scb->block_mode < 0) {
|
|
float red = 0, green = 0, blue = 0, alpha = 0;
|
|
int use_lns = 0;
|
|
int use_nan = 0;
|
|
|
|
if (scb->block_mode == -2) {
|
|
// For sRGB decoding, we should return only the top 8 bits.
|
|
int mask = (decode_mode == ASTC_Encoder::DECODE_LDR_SRGB) ? 0xFF00 : 0xFFFF;
|
|
|
|
red = ASTC_Encoder::sf16_to_float(ASTC_Encoder::unorm16_to_sf16((uint16_t)scb->constant_color[0] & mask));
|
|
green = ASTC_Encoder::sf16_to_float(ASTC_Encoder::unorm16_to_sf16((uint16_t)scb->constant_color[1] & mask));
|
|
blue = ASTC_Encoder::sf16_to_float(ASTC_Encoder::unorm16_to_sf16((uint16_t)scb->constant_color[2] & mask));
|
|
alpha = ASTC_Encoder::sf16_to_float(ASTC_Encoder::unorm16_to_sf16((uint16_t)scb->constant_color[3] & mask));
|
|
use_lns = 0;
|
|
use_nan = 0;
|
|
} else {
|
|
switch (decode_mode) {
|
|
case ASTC_Encoder::DECODE_LDR_SRGB:
|
|
red = 1.0f;
|
|
green = 0.0f;
|
|
blue = 1.0f;
|
|
alpha = 1.0f;
|
|
use_lns = 0;
|
|
use_nan = 0;
|
|
break;
|
|
case ASTC_Encoder::DECODE_LDR:
|
|
red = 0.0f;
|
|
green = 0.0f;
|
|
blue = 0.0f;
|
|
alpha = 0.0f;
|
|
use_lns = 0;
|
|
use_nan = 1;
|
|
break;
|
|
case ASTC_Encoder::DECODE_HDR:
|
|
// constant-color block; unpack from FP16 to FP32.
|
|
red = ASTC_Encoder::sf16_to_float((sf16)scb->constant_color[0]);
|
|
green = ASTC_Encoder::sf16_to_float((sf16)scb->constant_color[1]);
|
|
blue = ASTC_Encoder::sf16_to_float((sf16)scb->constant_color[2]);
|
|
alpha = ASTC_Encoder::sf16_to_float((sf16)scb->constant_color[3]);
|
|
use_lns = 1;
|
|
use_nan = 0;
|
|
break;
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < xdim * ydim * zdim; i++) {
|
|
blk->orig_data[4 * i] = red;
|
|
blk->orig_data[4 * i + 1] = green;
|
|
blk->orig_data[4 * i + 2] = blue;
|
|
blk->orig_data[4 * i + 3] = alpha;
|
|
blk->rgb_lns[i] = (uint8_t)use_lns;
|
|
blk->alpha_lns[i] = (uint8_t)use_lns;
|
|
blk->nan_texel[i] = (uint8_t)use_nan;
|
|
}
|
|
|
|
|
|
imageblock_initialize_work_from_orig_cpu(blk, xdim * ydim * zdim);
|
|
update_imageblock_flags_cpu(blk, xdim, ydim, zdim);
|
|
return;
|
|
}
|
|
|
|
// get the appropriate partition-table entry
|
|
int partition_count = scb->partition_count;
|
|
|
|
if ((partition_count > 5) || (scb->partition_index > 1024))
|
|
return;
|
|
|
|
|
|
// get the appropriate block descriptor
|
|
block_size_descriptor_cpu *bsd = get_block_size_descriptor_cpu(xdim, ydim, zdim);
|
|
decimation_table_cpu **ixtab2 = bsd->decimation_tables;
|
|
|
|
decimation_table_cpu *it = ixtab2[bsd->block_modes[scb->block_mode].decimation_mode];
|
|
|
|
int is_dual_plane = bsd->block_modes[scb->block_mode].is_dual_plane;
|
|
|
|
int weight_quantization_level = bsd->block_modes[scb->block_mode].quantization_mode;
|
|
|
|
// decode the color endpoints
|
|
ASTC_Encoder::ushort4 color_endpoint0[4];
|
|
ASTC_Encoder::ushort4 color_endpoint1[4];
|
|
int rgb_hdr_endpoint[4];
|
|
int alpha_hdr_endpoint[4];
|
|
int nan_endpoint[4];
|
|
|
|
for (i = 0; i < partition_count; i++)
|
|
unpack_color_endpoints_cpu(
|
|
decode_mode,
|
|
scb->color_formats[i],
|
|
scb->color_quantization_level,
|
|
scb->color_values[i],
|
|
&(rgb_hdr_endpoint[i]),
|
|
&(alpha_hdr_endpoint[i]),
|
|
&(nan_endpoint[i]),
|
|
&(color_endpoint0[i]),
|
|
&(color_endpoint1[i]));
|
|
|
|
// first unquantize the weights
|
|
int uq_plane1_weights[MAX_WEIGHTS_PER_BLOCK];
|
|
int uq_plane2_weights[MAX_WEIGHTS_PER_BLOCK];
|
|
int weight_count = it->num_weights;
|
|
|
|
const ASTC_Encoder::quantization_and_transfer_table *qat = &(ASTC_Encoder::quant_and_xfer_tables[weight_quantization_level]);
|
|
|
|
for (i = 0; i < weight_count; i++) {
|
|
uq_plane1_weights[i] = qat->unquantized_value[scb->plane1_weights[i]];
|
|
}
|
|
if (is_dual_plane) {
|
|
for (i = 0; i < weight_count; i++)
|
|
uq_plane2_weights[i] = qat->unquantized_value[scb->plane2_weights[i]];
|
|
}
|
|
|
|
|
|
// then un-decimate them.
|
|
int weights[MAX_TEXELS_PER_BLOCK];
|
|
int plane2_weights[MAX_TEXELS_PER_BLOCK];
|
|
|
|
|
|
int texels_per_block = xdim * ydim * zdim;
|
|
for (i = 0; i < texels_per_block; i++)
|
|
weights[i] = compute_value_of_texel_int_cpu(i, it, uq_plane1_weights);
|
|
|
|
if (is_dual_plane)
|
|
for (i = 0; i < texels_per_block; i++)
|
|
plane2_weights[i] = compute_value_of_texel_int_cpu(i, it, uq_plane2_weights);
|
|
|
|
|
|
int plane2_color_component = scb->plane2_color_component;
|
|
|
|
|
|
// now that we have endpoint colors and weights, we can unpack actual colors for
|
|
// each texel.
|
|
for (i = 0; i < texels_per_block; i++) {
|
|
ASTC_Encoder::uint8_t partition = g_ASTCEncode.partition_tables[partition_count][scb->partition_index].partition_of_texel[i];
|
|
|
|
ASTC_Encoder::ushort4 color = lerp_color_int(decode_mode,
|
|
color_endpoint0[partition],
|
|
color_endpoint1[partition],
|
|
weights[i],
|
|
plane2_weights[i],
|
|
is_dual_plane ? plane2_color_component : -1);
|
|
|
|
blk->rgb_lns[i] = (uint8_t)rgb_hdr_endpoint[partition];
|
|
blk->alpha_lns[i] = (uint8_t)alpha_hdr_endpoint[partition];
|
|
blk->nan_texel[i] = (uint8_t)nan_endpoint[partition];
|
|
|
|
blk->work_data[4 * i] = color.x;
|
|
blk->work_data[4 * i + 1] = color.y;
|
|
blk->work_data[4 * i + 2] = color.z;
|
|
blk->work_data[4 * i + 3] = color.w;
|
|
}
|
|
|
|
imageblock_initialize_orig_from_work_cpu(blk, xdim * ydim * zdim);
|
|
update_imageblock_flags_cpu(blk, xdim, ydim, zdim);
|
|
}
|
|
|
|
// End CPU Decoder Code
|
|
//-----------------------------------------------
|
|
|