Added stripped down NVTT library.

This commit is contained in:
Branimir Karadžić 2015-12-20 20:40:35 -08:00
parent 35340d121f
commit 8ab70bd8cf
56 changed files with 20626 additions and 0 deletions

View file

@ -0,0 +1,24 @@
NVIDIA Texture Tools 2.0 is licensed under the MIT license.
Copyright (c) 2007 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person
obtaining a copy of this software and associated documentation
files (the "Software"), to deal in the Software without
restriction, including without limitation the rights to use,
copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following
conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS IN THE SOFTWARE.

76
3rdparty/nvtt/bc6h/bits.h vendored Normal file
View file

@ -0,0 +1,76 @@
/*
Copyright 2007 nVidia, Inc.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License.
*/
#pragma once
#ifndef _ZOH_BITS_H
#define _ZOH_BITS_H
// read/write a bitstream
#include "nvcore/Debug.h"
namespace ZOH {
class Bits
{
public:
Bits(char *data, int maxdatabits) { nvAssert (data && maxdatabits > 0); bptr = bend = 0; bits = data; maxbits = maxdatabits; readonly = 0;}
Bits(const char *data, int availdatabits) { nvAssert (data && availdatabits > 0); bptr = 0; bend = availdatabits; cbits = data; maxbits = availdatabits; readonly = 1;}
void write(int value, int nbits) {
nvAssert (nbits >= 0 && nbits < 32);
nvAssert (sizeof(int)>= 4);
for (int i=0; i<nbits; ++i)
writeone(value>>i);
}
int read(int nbits) {
nvAssert (nbits >= 0 && nbits < 32);
nvAssert (sizeof(int)>= 4);
int out = 0;
for (int i=0; i<nbits; ++i)
out |= readone() << i;
return out;
}
int getptr() { return bptr; }
void setptr(int ptr) { nvAssert (ptr >= 0 && ptr < maxbits); bptr = ptr; }
int getsize() { return bend; }
private:
int bptr; // next bit to read
int bend; // last written bit + 1
char *bits; // ptr to user bit stream
const char *cbits; // ptr to const user bit stream
int maxbits; // max size of user bit stream
char readonly; // 1 if this is a read-only stream
int readone() {
nvAssert (bptr < bend);
if (bptr >= bend) return 0;
int bit = (readonly ? cbits[bptr>>3] : bits[bptr>>3]) & (1 << (bptr & 7));
++bptr;
return bit != 0;
}
void writeone(int bit) {
nvAssert (!readonly); // "Writing a read-only bit stream"
nvAssert (bptr < maxbits);
if (bptr >= maxbits) return;
if (bit&1)
bits[bptr>>3] |= 1 << (bptr & 7);
else
bits[bptr>>3] &= ~(1 << (bptr & 7));
if (bptr++ >= bend) bend = bptr;
}
};
}
#endif

133
3rdparty/nvtt/bc6h/shapes_two.h vendored Normal file
View file

@ -0,0 +1,133 @@
/*
Copyright 2007 nVidia, Inc.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License.
*/
#pragma once
#ifndef _ZOH_SHAPES_TWO_H
#define _ZOH_SHAPES_TWO_H
// shapes for two regions
#define NREGIONS 2
#define NSHAPES 64
#define SHAPEBITS 6
static const int shapes[NSHAPES*16] =
{
0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1,
0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1,
0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1,
0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1,
0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0,
0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1,
0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1,
0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0,
0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1,
0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1,
1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1,
1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1,
1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0,
0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1,
0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1,
0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1,
0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0,
0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0,
0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0,
0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1,
0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1,
1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1,
1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0,
0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1,
0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1,
0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0,
0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0,
0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1,
1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0,
0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0,
1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1,
0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1,
0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1,
1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1,
1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0,
0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0,
1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0,
1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0,
0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0,
0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0,
0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0,
0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1,
1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1,
1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0,
0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0,
0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1,
1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0,
1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0,
1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1,
0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0,
1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0,
0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1,
0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1,
};
#define REGION(x,y,si) shapes[((si)&3)*4+((si)>>2)*64+(x)+(y)*16]
static const int shapeindex_to_compressed_indices[NSHAPES*2] =
{
0,15, 0,15, 0,15, 0,15,
0,15, 0,15, 0,15, 0,15,
0,15, 0,15, 0,15, 0,15,
0,15, 0,15, 0,15, 0,15,
0,15, 0, 2, 0, 8, 0, 2,
0, 2, 0, 8, 0, 8, 0,15,
0, 2, 0, 8, 0, 2, 0, 2,
0, 8, 0, 8, 0, 2, 0, 2,
0,15, 0,15, 0, 6, 0, 8,
0, 2, 0, 8, 0,15, 0,15,
0, 2, 0, 8, 0, 2, 0, 2,
0, 2, 0,15, 0,15, 0, 6,
0, 6, 0, 2, 0, 6, 0, 8,
0,15, 0,15, 0, 2, 0, 2,
0,15, 0,15, 0,15, 0,15,
0,15, 0, 2, 0, 2, 0,15
};
#define SHAPEINDEX_TO_COMPRESSED_INDICES(si,region) shapeindex_to_compressed_indices[(si)*2+(region)]
#endif

83
3rdparty/nvtt/bc6h/tile.h vendored Normal file
View file

@ -0,0 +1,83 @@
/*
Copyright 2007 nVidia, Inc.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License.
*/
#pragma once
#ifndef _ZOH_TILE_H
#define _ZOH_TILE_H
#include "zoh_utils.h"
#include "nvmath/Vector.h"
#include <math.h>
namespace ZOH {
//#define USE_IMPORTANCE_MAP 1 // define this if you want to increase importance of some pixels in tile
class Tile
{
public:
// NOTE: this returns the appropriately-clamped BIT PATTERN of the half as an INTEGRAL float value
static float half2float(uint16 h)
{
return (float) Utils::ushort_to_format(h);
}
// NOTE: this is the inverse of the above operation
static uint16 float2half(float f)
{
return Utils::format_to_ushort((int)f);
}
// look for adjacent pixels that are identical. if there are enough of them, increase their importance
void generate_importance_map()
{
// initialize
for (int y=0; y<size_y; ++y)
for (int x=0; x<size_x; ++x)
{
// my importance is increased if I am identical to any of my 4-neighbors
importance_map[y][x] = match_4_neighbor(x,y) ? 5.0f : 1.0f;
}
}
bool is_equal(int x, int y, int xn, int yn)
{
if (xn < 0 || xn >= size_x || yn < 0 || yn >= size_y)
return false;
return( (data[y][x].x == data[yn][xn].x) &&
(data[y][x].y == data[yn][xn].y) &&
(data[y][x].z == data[yn][xn].z) );
}
#ifdef USE_IMPORTANCE_MAP
bool match_4_neighbor(int x, int y)
{
return is_equal(x,y,x-1,y) || is_equal(x,y,x+1,y) || is_equal(x,y,x,y-1) || is_equal(x,y,x,y+1);
}
#else
bool match_4_neighbor(int, int)
{
return false;
}
#endif
Tile() {};
~Tile(){};
Tile(int xs, int ys) {size_x = xs; size_y = ys;}
static const int TILE_H = 4;
static const int TILE_W = 4;
static const int TILE_TOTAL = TILE_H * TILE_W;
nv::Vector3 data[TILE_H][TILE_W];
float importance_map[TILE_H][TILE_W];
int size_x, size_y; // actual size of tile
};
}
#endif // _ZOH_TILE_H

197
3rdparty/nvtt/bc6h/zoh.cpp vendored Normal file
View file

@ -0,0 +1,197 @@
/*
Copyright 2007 nVidia, Inc.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License.
*/
// the zoh compressor and decompressor
#include "tile.h"
#include "zoh.h"
#include <string.h> // memcpy
using namespace ZOH;
bool ZOH::isone(const char *block)
{
char code = block[0] & 0x1F;
return (code == 0x03 || code == 0x07 || code == 0x0b || code == 0x0f);
}
void ZOH::compress(const Tile &t, char *block)
{
char oneblock[ZOH::BLOCKSIZE], twoblock[ZOH::BLOCKSIZE];
float mseone = ZOH::compressone(t, oneblock);
float msetwo = ZOH::compresstwo(t, twoblock);
if (mseone <= msetwo)
memcpy(block, oneblock, ZOH::BLOCKSIZE);
else
memcpy(block, twoblock, ZOH::BLOCKSIZE);
}
void ZOH::decompress(const char *block, Tile &t)
{
if (ZOH::isone(block))
ZOH::decompressone(block, t);
else
ZOH::decompresstwo(block, t);
}
/*
void ZOH::compress(string inf, string zohf)
{
Array2D<Rgba> pixels;
int w, h;
char block[ZOH::BLOCKSIZE];
Exr::readRgba(inf, pixels, w, h);
FILE *zohfile = fopen(zohf.c_str(), "wb");
if (zohfile == NULL) throw "Unable to open .zoh file for write";
// stuff for progress bar O.o
int ntiles = ((h+Tile::TILE_H-1)/Tile::TILE_H)*((w+Tile::TILE_W-1)/Tile::TILE_W);
int tilecnt = 0;
int ndots = 25;
int dotcnt = 0;
printf("Progress [");
for (int i=0; i<ndots;++i) printf(" ");
printf("]\rProgress ["); fflush(stdout);
// convert to tiles and compress each tile
for (int y=0; y<h; y+=Tile::TILE_H)
{
int ysize = min(Tile::TILE_H, h-y);
for (int x=0; x<w; x+=Tile::TILE_W)
{
int xsize = min(Tile::TILE_W, w-x);
Tile t(xsize, ysize);
t.insert(pixels, x, y);
ZOH::compress(t, block);
if (fwrite(block, sizeof(char), ZOH::BLOCKSIZE, zohfile) != ZOH::BLOCKSIZE)
throw "File error on write";
// progress bar
++tilecnt;
if (tilecnt > (ntiles * dotcnt)/ndots) { printf("."); fflush(stdout); ++dotcnt; }
}
}
printf("]\n"); // advance to next line finally
if (fclose(zohfile)) throw "Close failed on .zoh file";
}
static int str2int(std::string s)
{
int thing;
std::stringstream str (stringstream::in | stringstream::out);
str << s;
str >> thing;
return thing;
}
// zoh file name is ...-w-h.zoh, extract width and height
static void extract(string zohf, int &w, int &h)
{
size_t n = zohf.rfind('.', zohf.length()-1);
size_t n1 = zohf.rfind('-', n-1);
size_t n2 = zohf.rfind('-', n1-1);
string width = zohf.substr(n2+1, n1-n2-1);
w = str2int(width);
string height = zohf.substr(n1+1, n-n1-1);
h = str2int(height);
}
static int mode_to_prec[] = {
10,7,11,10,
10,7,11,11,
10,7,11,12,
10,7,9,16,
10,7,8,-1,
10,7,8,-1,
10,7,8,-1,
10,7,6,-1,
};
static int shapeindexhist[32], modehist[32], prechistone[16], prechisttwo[16], oneregion, tworegions;
static void stats(char block[ZOH::BLOCKSIZE])
{
char mode = block[0] & 0x1F; if ((mode & 0x3) == 0) mode = 0; if ((mode & 0x3) == 1) mode = 1; modehist[mode]++;
int prec = mode_to_prec[mode];
nvAssert (prec != -1);
if (!ZOH::isone(block))
{
tworegions++;
prechisttwo[prec]++;
int shapeindex = ((block[0] & 0xe0) >> 5) | ((block[1] & 0x3) << 3);
shapeindexhist[shapeindex]++;
}
else
{
oneregion++;
prechistone[prec]++;
}
}
static void printstats()
{
printf("\nPrecision histogram 10b to 16b one region: "); for (int i=10; i<=16; ++i) printf("%d,", prechistone[i]);
printf("\nPrecision histogram 6b to 11b two regions: "); for (int i=6; i<=11; ++i) printf("%d,", prechisttwo[i]);
printf("\nMode histogram: "); for (int i=0; i<32; ++i) printf("%d,", modehist[i]);
printf("\nShape index histogram: "); for (int i=0; i<32; ++i) printf("%d,", shapeindexhist[i]);
printf("\nOne region %5.2f%% Two regions %5.2f%%", 100.0*oneregion/float(oneregion+tworegions), 100.0*tworegions/float(oneregion+tworegions));
printf("\n");
}
void ZOH::decompress(string zohf, string outf)
{
Array2D<Rgba> pixels;
int w, h;
char block[ZOH::BLOCKSIZE];
extract(zohf, w, h);
FILE *zohfile = fopen(zohf.c_str(), "rb");
if (zohfile == NULL) throw "Unable to open .zoh file for read";
pixels.resizeErase(h, w);
// convert to tiles and decompress each tile
for (int y=0; y<h; y+=Tile::TILE_H)
{
int ysize = min(Tile::TILE_H, h-y);
for (int x=0; x<w; x+=Tile::TILE_W)
{
int xsize = min(Tile::TILE_W, w-x);
Tile t(xsize, ysize);
if (fread(block, sizeof(char), ZOH::BLOCKSIZE, zohfile) != ZOH::BLOCKSIZE)
throw "File error on read";
stats(block); // collect statistics
ZOH::decompress(block, t);
t.extract(pixels, x, y);
}
}
if (fclose(zohfile)) throw "Close failed on .zoh file";
Exr::writeRgba(outf, pixels, w, h);
#ifndef EXTERNAL_RELEASE
printstats(); // print statistics
#endif
}
*/

65
3rdparty/nvtt/bc6h/zoh.h vendored Normal file
View file

@ -0,0 +1,65 @@
/*
Copyright 2007 nVidia, Inc.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License.
*/
#pragma once
#ifndef _ZOH_H
#define _ZOH_H
#include "tile.h"
namespace ZOH {
// UNUSED ZOH MODES are 0x13, 0x17, 0x1b, 0x1f
static const int NREGIONS_TWO = 2;
static const int NREGIONS_ONE = 1;
static const int NCHANNELS = 3;
struct FltEndpts
{
nv::Vector3 A;
nv::Vector3 B;
};
struct IntEndpts
{
int A[NCHANNELS];
int B[NCHANNELS];
};
struct ComprEndpts
{
uint A[NCHANNELS];
uint B[NCHANNELS];
};
static const int BLOCKSIZE=16;
static const int BITSIZE=128;
void compress(const Tile &t, char *block);
void decompress(const char *block, Tile &t);
float compressone(const Tile &t, char *block);
float compresstwo(const Tile &t, char *block);
void decompressone(const char *block, Tile &t);
void decompresstwo(const char *block, Tile &t);
float refinetwo(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS_TWO], char *block);
float roughtwo(const Tile &tile, int shape, FltEndpts endpts[NREGIONS_TWO]);
float refineone(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS_ONE], char *block);
float roughone(const Tile &tile, int shape, FltEndpts endpts[NREGIONS_ONE]);
bool isone(const char *block);
}
#endif // _ZOH_H

324
3rdparty/nvtt/bc6h/zoh_utils.cpp vendored Normal file
View file

@ -0,0 +1,324 @@
/*
Copyright 2007 nVidia, Inc.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License.
*/
// Utility and common routines
#include "zoh_utils.h"
#include "nvmath/Vector.inl"
#include <math.h>
using namespace nv;
using namespace ZOH;
static const int denom7_weights_64[] = {0, 9, 18, 27, 37, 46, 55, 64}; // divided by 64
static const int denom15_weights_64[] = {0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64}; // divided by 64
/*static*/ Format Utils::FORMAT;
int Utils::lerp(int a, int b, int i, int denom)
{
nvDebugCheck (denom == 3 || denom == 7 || denom == 15);
nvDebugCheck (i >= 0 && i <= denom);
int round = 32, shift = 6;
const int *weights;
switch(denom)
{
case 3: denom *= 5; i *= 5; // fall through to case 15
case 15: weights = denom15_weights_64; break;
case 7: weights = denom7_weights_64; break;
default: nvDebugCheck(0);
}
return (a*weights[denom-i] +b*weights[i] + round) >> shift;
}
Vector3 Utils::lerp(const Vector3& a, const Vector3 &b, int i, int denom)
{
nvDebugCheck (denom == 3 || denom == 7 || denom == 15);
nvDebugCheck (i >= 0 && i <= denom);
int shift = 6;
const int *weights;
switch(denom)
{
case 3: denom *= 5; i *= 5; // fall through to case 15
case 15: weights = denom15_weights_64; break;
case 7: weights = denom7_weights_64; break;
default: nvUnreachable();
}
// no need to round these as this is an exact division
return (a*float(weights[denom-i]) +b*float(weights[i])) / float(1 << shift);
}
/*
For unsigned f16, clamp the input to [0,F16MAX]. Thus u15.
For signed f16, clamp the input to [-F16MAX,F16MAX]. Thus s16.
The conversions proceed as follows:
unsigned f16: get bits. if high bit set, clamp to 0, else clamp to F16MAX.
signed f16: get bits. extract exp+mantissa and clamp to F16MAX. return -value if sign bit was set, else value
unsigned int: get bits. return as a positive value.
signed int. get bits. return as a value in -32768..32767.
The inverse conversions are just the inverse of the above.
*/
// clamp the 3 channels of the input vector to the allowable range based on FORMAT
// note that each channel is a float storing the allowable range as a bit pattern converted to float
// that is, for unsigned f16 say, we would clamp each channel to the range [0, F16MAX]
void Utils::clamp(Vector3 &v)
{
for (int i=0; i<3; ++i)
{
switch(Utils::FORMAT)
{
case UNSIGNED_F16:
if (v.component[i] < 0.0) v.component[i] = 0;
else if (v.component[i] > F16MAX) v.component[i] = F16MAX;
break;
case SIGNED_F16:
if (v.component[i] < -F16MAX) v.component[i] = -F16MAX;
else if (v.component[i] > F16MAX) v.component[i] = F16MAX;
break;
default:
nvUnreachable();
}
}
}
// convert a u16 value to s17 (represented as an int) based on the format expected
int Utils::ushort_to_format(unsigned short input)
{
int out, s;
// clamp to the valid range we are expecting
switch (Utils::FORMAT)
{
case UNSIGNED_F16:
if (input & F16S_MASK) out = 0;
else if (input > F16MAX) out = F16MAX;
else out = input;
break;
case SIGNED_F16:
s = input & F16S_MASK;
input &= F16EM_MASK;
if (input > F16MAX) out = F16MAX;
else out = input;
out = s ? -out : out;
break;
}
return out;
}
// convert a s17 value to u16 based on the format expected
unsigned short Utils::format_to_ushort(int input)
{
unsigned short out;
// clamp to the valid range we are expecting
switch (Utils::FORMAT)
{
case UNSIGNED_F16:
nvDebugCheck (input >= 0 && input <= F16MAX);
out = input;
break;
case SIGNED_F16:
nvDebugCheck (input >= -F16MAX && input <= F16MAX);
// convert to sign-magnitude
int s;
if (input < 0) { s = F16S_MASK; input = -input; }
else { s = 0; }
out = s | input;
break;
}
return out;
}
// quantize the input range into equal-sized bins
int Utils::quantize(float value, int prec)
{
int q, ivalue, s;
nvDebugCheck (prec > 1); // didn't bother to make it work for 1
value = (float)floor(value + 0.5);
int bias = (prec > 10) ? ((1<<(prec-1))-1) : 0; // bias precisions 11..16 to get a more accurate quantization
switch (Utils::FORMAT)
{
case UNSIGNED_F16:
nvDebugCheck (value >= 0 && value <= F16MAX);
ivalue = (int)value;
q = ((ivalue << prec) + bias) / (F16MAX+1);
nvDebugCheck (q >= 0 && q < (1 << prec));
break;
case SIGNED_F16:
nvDebugCheck (value >= -F16MAX && value <= F16MAX);
// convert to sign-magnitude
ivalue = (int)value;
if (ivalue < 0) { s = 1; ivalue = -ivalue; } else s = 0;
q = ((ivalue << (prec-1)) + bias) / (F16MAX+1);
if (s)
q = -q;
nvDebugCheck (q > -(1 << (prec-1)) && q < (1 << (prec-1)));
break;
}
return q;
}
int Utils::finish_unquantize(int q, int prec)
{
if (Utils::FORMAT == UNSIGNED_F16)
return (q * 31) >> 6; // scale the magnitude by 31/64
else if (Utils::FORMAT == SIGNED_F16)
return (q < 0) ? -(((-q) * 31) >> 5) : (q * 31) >> 5; // scale the magnitude by 31/32
else
return q;
}
// unquantize each bin to midpoint of original bin range, except
// for the end bins which we push to an endpoint of the bin range.
// we do this to ensure we can represent all possible original values.
// the asymmetric end bins do not affect PSNR for the test images.
//
// code this function assuming an arbitrary bit pattern as the encoded block
int Utils::unquantize(int q, int prec)
{
int unq, s;
nvDebugCheck (prec > 1); // not implemented for prec 1
switch (Utils::FORMAT)
{
// modify this case to move the multiplication by 31 after interpolation.
// Need to use finish_unquantize.
// since we have 16 bits available, let's unquantize this to 16 bits unsigned
// thus the scale factor is [0-7c00)/[0-10000) = 31/64
case UNSIGNED_F16:
if (prec >= 15)
unq = q;
else if (q == 0)
unq = 0;
else if (q == ((1<<prec)-1))
unq = U16MAX;
else
unq = (q * (U16MAX+1) + (U16MAX+1)/2) >> prec;
break;
// here, let's stick with S16 (no apparent quality benefit from going to S17)
// range is (-7c00..7c00)/(-8000..8000) = 31/32
case SIGNED_F16:
// don't remove this test even though it appears equivalent to the code below
// as it isn't -- the code below can overflow for prec = 16
if (prec >= 16)
unq = q;
else
{
if (q < 0) { s = 1; q = -q; } else s = 0;
if (q == 0)
unq = 0;
else if (q >= ((1<<(prec-1))-1))
unq = s ? -S16MAX : S16MAX;
else
{
unq = (q * (S16MAX+1) + (S16MAX+1)/2) >> (prec-1);
if (s)
unq = -unq;
}
}
break;
}
return unq;
}
// pick a norm!
#define NORM_EUCLIDEAN 1
float Utils::norm(const Vector3 &a, const Vector3 &b)
{
#ifdef NORM_EUCLIDEAN
return lengthSquared(a - b);
#endif
#ifdef NORM_ABS
Vector3 err = a - b;
return fabs(err.x) + fabs(err.y) + fabs(err.z);
#endif
}
// parse <name>[<start>{:<end>}]{,}
// the pointer starts here ^
// name is 1 or 2 chars and matches field names. start and end are decimal numbers
void Utils::parse(const char *encoding, int &ptr, Field &field, int &endbit, int &len)
{
if (ptr <= 0) return;
--ptr;
if (encoding[ptr] == ',') --ptr;
nvDebugCheck (encoding[ptr] == ']');
--ptr;
endbit = 0;
int scale = 1;
while (encoding[ptr] != ':' && encoding[ptr] != '[')
{
nvDebugCheck(encoding[ptr] >= '0' && encoding[ptr] <= '9');
endbit += (encoding[ptr--] - '0') * scale;
scale *= 10;
}
int startbit = 0; scale = 1;
if (encoding[ptr] == '[')
startbit = endbit;
else
{
ptr--;
while (encoding[ptr] != '[')
{
nvDebugCheck(encoding[ptr] >= '0' && encoding[ptr] <= '9');
startbit += (encoding[ptr--] - '0') * scale;
scale *= 10;
}
}
len = startbit - endbit + 1; // startbit>=endbit note
--ptr;
if (encoding[ptr] == 'm') field = FIELD_M;
else if (encoding[ptr] == 'd') field = FIELD_D;
else {
// it's wxyz
nvDebugCheck (encoding[ptr] >= 'w' && encoding[ptr] <= 'z');
int foo = encoding[ptr--] - 'w';
// now it is r g or b
if (encoding[ptr] == 'r') foo += 10;
else if (encoding[ptr] == 'g') foo += 20;
else if (encoding[ptr] == 'b') foo += 30;
else nvDebugCheck(0);
field = (Field) foo;
}
}

73
3rdparty/nvtt/bc6h/zoh_utils.h vendored Normal file
View file

@ -0,0 +1,73 @@
/*
Copyright 2007 nVidia, Inc.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License.
*/
// utility class holding common routines
#pragma once
#ifndef _ZOH_UTILS_H
#define _ZOH_UTILS_H
#include "nvmath/Vector.h"
namespace ZOH {
inline int SIGN_EXTEND(int x, int nb) { return ((((signed(x))&(1<<((nb)-1)))?((~0)<<(nb)):0)|(signed(x))); }
enum Field {
FIELD_M = 1, // mode
FIELD_D = 2, // distribution/shape
FIELD_RW = 10+0, FIELD_RX = 10+1, FIELD_RY = 10+2, FIELD_RZ = 10+3, // red channel endpoints or deltas
FIELD_GW = 20+0, FIELD_GX = 20+1, FIELD_GY = 20+2, FIELD_GZ = 20+3, // green channel endpoints or deltas
FIELD_BW = 30+0, FIELD_BX = 30+1, FIELD_BY = 30+2, FIELD_BZ = 30+3, // blue channel endpoints or deltas
};
// some constants
static const int F16S_MASK = 0x8000; // f16 sign mask
static const int F16EM_MASK = 0x7fff; // f16 exp & mantissa mask
static const int U16MAX = 0xffff;
static const int S16MIN = -0x8000;
static const int S16MAX = 0x7fff;
static const int INT16_MASK = 0xffff;
static const int F16MAX = 0x7bff; // MAXFLT bit pattern for halfs
enum Format { UNSIGNED_F16, SIGNED_F16 };
class Utils
{
public:
static Format FORMAT; // this is a global -- we're either handling unsigned or unsigned half values
// error metrics
static float norm(const nv::Vector3 &a, const nv::Vector3 &b);
static float mpsnr_norm(const nv::Vector3 &a, int exposure, const nv::Vector3 &b);
// conversion & clamp
static int ushort_to_format(unsigned short input);
static unsigned short format_to_ushort(int input);
// clamp to format
static void clamp(nv::Vector3 &v);
// quantization and unquantization
static int finish_unquantize(int q, int prec);
static int unquantize(int q, int prec);
static int quantize(float value, int prec);
static void parse(const char *encoding, int &ptr, Field & field, int &endbit, int &len);
// lerping
static int lerp(int a, int b, int i, int denom);
static nv::Vector3 lerp(const nv::Vector3 & a, const nv::Vector3 & b, int i, int denom);
};
}
#endif // _ZOH_UTILS_H

799
3rdparty/nvtt/bc6h/zohone.cpp vendored Normal file
View file

@ -0,0 +1,799 @@
/*
Copyright 2007 nVidia, Inc.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License.
*/
// one region zoh compress/decompress code
// Thanks to Jacob Munkberg (jacob@cs.lth.se) for the shortcut of using SVD to do the equivalent of principal components analysis
#include "bits.h"
#include "tile.h"
#include "zoh.h"
#include "zoh_utils.h"
#include "nvmath/Vector.inl"
#include "nvmath/Fitting.h"
#include <string.h> // strlen
#include <float.h> // FLT_MAX
using namespace nv;
using namespace ZOH;
#define NINDICES 16
#define INDEXBITS 4
#define HIGH_INDEXBIT (1<<(INDEXBITS-1))
#define DENOM (NINDICES-1)
#define NSHAPES 1
static const int shapes[NSHAPES] =
{
0x0000
}; // only 1 shape
#define REGION(x,y,shapeindex) ((shapes[shapeindex]&(1<<(15-(x)-4*(y))))!=0)
#define POS_TO_X(pos) ((pos)&3)
#define POS_TO_Y(pos) (((pos)>>2)&3)
#define NDELTA 2
struct Chanpat
{
int prec[NDELTA]; // precision pattern for one channel
};
struct Pattern
{
Chanpat chan[NCHANNELS];// allow different bit patterns per channel -- but we still want constant precision per channel
int transformed; // if 0, deltas are unsigned and no transform; otherwise, signed and transformed
int mode; // associated mode value
int modebits; // number of mode bits
const char *encoding; // verilog description of encoding for this mode
};
#define MAXMODEBITS 5
#define MAXMODES (1<<MAXMODEBITS)
#define NPATTERNS 4
static const Pattern patterns[NPATTERNS] =
{
16,4, 16,4, 16,4, 1, 0x0f, 5, "bw[10],bw[11],bw[12],bw[13],bw[14],bw[15],bx[3:0],gw[10],gw[11],gw[12],gw[13],gw[14],gw[15],gx[3:0],rw[10],rw[11],rw[12],rw[13],rw[14],rw[15],rx[3:0],bw[9:0],gw[9:0],rw[9:0],m[4:0]",
12,8, 12,8, 12,8, 1, 0x0b, 5, "bw[10],bw[11],bx[7:0],gw[10],gw[11],gx[7:0],rw[10],rw[11],rx[7:0],bw[9:0],gw[9:0],rw[9:0],m[4:0]",
11,9, 11,9, 11,9, 1, 0x07, 5, "bw[10],bx[8:0],gw[10],gx[8:0],rw[10],rx[8:0],bw[9:0],gw[9:0],rw[9:0],m[4:0]",
10,10, 10,10, 10,10, 0, 0x03, 5, "bx[9:0],gx[9:0],rx[9:0],bw[9:0],gw[9:0],rw[9:0],m[4:0]",
};
// mapping of mode to the corresponding index in pattern
static const int mode_to_pat[MAXMODES] = {
-1,-1,-1,
3, // 0x03
-1,-1,-1,
2, // 0x07
-1,-1,-1,
1, // 0x0b
-1,-1,-1,
0, // 0x0f
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
};
#define R_0(ep) (ep)[0].A[i]
#define R_1(ep) (ep)[0].B[i]
#define MASK(n) ((1<<(n))-1)
// compress endpoints
static void compress_endpts(const IntEndpts in[NREGIONS_ONE], ComprEndpts out[NREGIONS_ONE], const Pattern &p)
{
if (p.transformed)
{
for (int i=0; i<NCHANNELS; ++i)
{
R_0(out) = R_0(in) & MASK(p.chan[i].prec[0]);
R_1(out) = (R_1(in) - R_0(in)) & MASK(p.chan[i].prec[1]);
}
}
else
{
for (int i=0; i<NCHANNELS; ++i)
{
R_0(out) = R_0(in) & MASK(p.chan[i].prec[0]);
R_1(out) = R_1(in) & MASK(p.chan[i].prec[1]);
}
}
}
// decompress endpoints
static void decompress_endpts(const ComprEndpts in[NREGIONS_ONE], IntEndpts out[NREGIONS_ONE], const Pattern &p)
{
bool issigned = Utils::FORMAT == SIGNED_F16;
if (p.transformed)
{
for (int i=0; i<NCHANNELS; ++i)
{
R_0(out) = issigned ? SIGN_EXTEND(R_0(in),p.chan[i].prec[0]) : R_0(in);
int t;
t = SIGN_EXTEND(R_1(in), p.chan[i].prec[1]);
t = (t + R_0(in)) & MASK(p.chan[i].prec[0]);
R_1(out) = issigned ? SIGN_EXTEND(t,p.chan[i].prec[0]) : t;
}
}
else
{
for (int i=0; i<NCHANNELS; ++i)
{
R_0(out) = issigned ? SIGN_EXTEND(R_0(in),p.chan[i].prec[0]) : R_0(in);
R_1(out) = issigned ? SIGN_EXTEND(R_1(in),p.chan[i].prec[1]) : R_1(in);
}
}
}
static void quantize_endpts(const FltEndpts endpts[NREGIONS_ONE], int prec, IntEndpts q_endpts[NREGIONS_ONE])
{
for (int region = 0; region < NREGIONS_ONE; ++region)
{
q_endpts[region].A[0] = Utils::quantize(endpts[region].A.x, prec);
q_endpts[region].A[1] = Utils::quantize(endpts[region].A.y, prec);
q_endpts[region].A[2] = Utils::quantize(endpts[region].A.z, prec);
q_endpts[region].B[0] = Utils::quantize(endpts[region].B.x, prec);
q_endpts[region].B[1] = Utils::quantize(endpts[region].B.y, prec);
q_endpts[region].B[2] = Utils::quantize(endpts[region].B.z, prec);
}
}
// swap endpoints as needed to ensure that the indices at index_one and index_one have a 0 high-order bit
// index_one is 0 at x=0 y=0 and 15 at x=3 y=3 so y = (index >> 2) & 3 and x = index & 3
static void swap_indices(IntEndpts endpts[NREGIONS_ONE], int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex)
{
int index_positions[NREGIONS_ONE];
index_positions[0] = 0; // since WLOG we have the high bit of the shapes at 0
for (int region = 0; region < NREGIONS_ONE; ++region)
{
int x = index_positions[region] & 3;
int y = (index_positions[region] >> 2) & 3;
nvDebugCheck(REGION(x,y,shapeindex) == region); // double check the table
if (indices[y][x] & HIGH_INDEXBIT)
{
// high bit is set, swap the endpts and indices for this region
int t;
for (int i=0; i<NCHANNELS; ++i) { t = endpts[region].A[i]; endpts[region].A[i] = endpts[region].B[i]; endpts[region].B[i] = t; }
for (int y = 0; y < Tile::TILE_H; y++)
for (int x = 0; x < Tile::TILE_W; x++)
if (REGION(x,y,shapeindex) == region)
indices[y][x] = NINDICES - 1 - indices[y][x];
}
}
}
// endpoints fit only if the compression was lossless
static bool endpts_fit(const IntEndpts orig[NREGIONS_ONE], const ComprEndpts compressed[NREGIONS_ONE], const Pattern &p)
{
IntEndpts uncompressed[NREGIONS_ONE];
decompress_endpts(compressed, uncompressed, p);
for (int j=0; j<NREGIONS_ONE; ++j)
for (int i=0; i<NCHANNELS; ++i)
{
if (orig[j].A[i] != uncompressed[j].A[i]) return false;
if (orig[j].B[i] != uncompressed[j].B[i]) return false;
}
return true;
}
static void write_header(const ComprEndpts endpts[NREGIONS_ONE], const Pattern &p, Bits &out)
{
// interpret the verilog backwards and process it
int m = p.mode;
int rw = endpts[0].A[0], rx = endpts[0].B[0];
int gw = endpts[0].A[1], gx = endpts[0].B[1];
int bw = endpts[0].A[2], bx = endpts[0].B[2];
int ptr = int(strlen(p.encoding));
while (ptr)
{
Field field;
int endbit, len;
// !!!UNDONE: get rid of string parsing!!!
Utils::parse(p.encoding, ptr, field, endbit, len);
switch(field)
{
case FIELD_M: out.write( m >> endbit, len); break;
case FIELD_RW: out.write(rw >> endbit, len); break;
case FIELD_RX: out.write(rx >> endbit, len); break;
case FIELD_GW: out.write(gw >> endbit, len); break;
case FIELD_GX: out.write(gx >> endbit, len); break;
case FIELD_BW: out.write(bw >> endbit, len); break;
case FIELD_BX: out.write(bx >> endbit, len); break;
case FIELD_D:
case FIELD_RY:
case FIELD_RZ:
case FIELD_GY:
case FIELD_GZ:
case FIELD_BY:
case FIELD_BZ:
default: nvUnreachable();
}
}
}
static void read_header(Bits &in, ComprEndpts endpts[NREGIONS_ONE], Pattern &p)
{
// reading isn't quite symmetric with writing -- we don't know the encoding until we decode the mode
int mode = in.read(2);
if (mode != 0x00 && mode != 0x01)
mode = (in.read(3) << 2) | mode;
int pat_index = mode_to_pat[mode];
nvDebugCheck (pat_index >= 0 && pat_index < NPATTERNS);
nvDebugCheck (in.getptr() == patterns[pat_index].modebits);
p = patterns[pat_index];
int d;
int rw, rx;
int gw, gx;
int bw, bx;
d = 0;
rw = rx = 0;
gw = gx = 0;
bw = bx = 0;
int ptr = int(strlen(p.encoding));
while (ptr)
{
Field field;
int endbit, len;
// !!!UNDONE: get rid of string parsing!!!
Utils::parse(p.encoding, ptr, field, endbit, len);
switch(field)
{
case FIELD_M: break; // already processed so ignore
case FIELD_RW: rw |= in.read(len) << endbit; break;
case FIELD_RX: rx |= in.read(len) << endbit; break;
case FIELD_GW: gw |= in.read(len) << endbit; break;
case FIELD_GX: gx |= in.read(len) << endbit; break;
case FIELD_BW: bw |= in.read(len) << endbit; break;
case FIELD_BX: bx |= in.read(len) << endbit; break;
case FIELD_D:
case FIELD_RY:
case FIELD_RZ:
case FIELD_GY:
case FIELD_GZ:
case FIELD_BY:
case FIELD_BZ:
default: nvUnreachable();
}
}
nvDebugCheck (in.getptr() == 128 - 63);
endpts[0].A[0] = rw; endpts[0].B[0] = rx;
endpts[0].A[1] = gw; endpts[0].B[1] = gx;
endpts[0].A[2] = bw; endpts[0].B[2] = bx;
}
// compress index 0
static void write_indices(const int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex, Bits &out)
{
for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
{
int x = POS_TO_X(pos);
int y = POS_TO_Y(pos);
out.write(indices[y][x], INDEXBITS - ((pos == 0) ? 1 : 0));
}
}
static void emit_block(const ComprEndpts endpts[NREGIONS_ONE], int shapeindex, const Pattern &p, const int indices[Tile::TILE_H][Tile::TILE_W], char *block)
{
Bits out(block, ZOH::BITSIZE);
write_header(endpts, p, out);
write_indices(indices, shapeindex, out);
nvDebugCheck(out.getptr() == ZOH::BITSIZE);
}
static void generate_palette_quantized(const IntEndpts &endpts, int prec, Vector3 palette[NINDICES])
{
// scale endpoints
int a, b; // really need a IntVector3...
a = Utils::unquantize(endpts.A[0], prec);
b = Utils::unquantize(endpts.B[0], prec);
// interpolate
for (int i = 0; i < NINDICES; ++i)
palette[i].x = float(Utils::finish_unquantize(Utils::lerp(a, b, i, DENOM), prec));
a = Utils::unquantize(endpts.A[1], prec);
b = Utils::unquantize(endpts.B[1], prec);
// interpolate
for (int i = 0; i < NINDICES; ++i)
palette[i].y = float(Utils::finish_unquantize(Utils::lerp(a, b, i, DENOM), prec));
a = Utils::unquantize(endpts.A[2], prec);
b = Utils::unquantize(endpts.B[2], prec);
// interpolate
for (int i = 0; i < NINDICES; ++i)
palette[i].z = float(Utils::finish_unquantize(Utils::lerp(a, b, i, DENOM), prec));
}
// position 0 was compressed
static void read_indices(Bits &in, int shapeindex, int indices[Tile::TILE_H][Tile::TILE_W])
{
for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
{
int x = POS_TO_X(pos);
int y = POS_TO_Y(pos);
indices[y][x]= in.read(INDEXBITS - ((pos == 0) ? 1 : 0));
}
}
void ZOH::decompressone(const char *block, Tile &t)
{
Bits in(block, ZOH::BITSIZE);
Pattern p;
IntEndpts endpts[NREGIONS_ONE];
ComprEndpts compr_endpts[NREGIONS_ONE];
read_header(in, compr_endpts, p);
int shapeindex = 0; // only one shape
decompress_endpts(compr_endpts, endpts, p);
Vector3 palette[NREGIONS_ONE][NINDICES];
for (int r = 0; r < NREGIONS_ONE; ++r)
generate_palette_quantized(endpts[r], p.chan[0].prec[0], &palette[r][0]);
// read indices
int indices[Tile::TILE_H][Tile::TILE_W];
read_indices(in, shapeindex, indices);
nvDebugCheck(in.getptr() == ZOH::BITSIZE);
// lookup
for (int y = 0; y < Tile::TILE_H; y++)
for (int x = 0; x < Tile::TILE_W; x++)
t.data[y][x] = palette[REGION(x,y,shapeindex)][indices[y][x]];
}
// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
static float map_colors(const Vector3 colors[], const float importance[], int np, const IntEndpts &endpts, int prec)
{
Vector3 palette[NINDICES];
float toterr = 0;
Vector3 err;
generate_palette_quantized(endpts, prec, palette);
for (int i = 0; i < np; ++i)
{
float err, besterr;
besterr = Utils::norm(colors[i], palette[0]) * importance[i];
for (int j = 1; j < NINDICES && besterr > 0; ++j)
{
err = Utils::norm(colors[i], palette[j]) * importance[i];
if (err > besterr) // error increased, so we're done searching
break;
if (err < besterr)
besterr = err;
}
toterr += besterr;
}
return toterr;
}
// assign indices given a tile, shape, and quantized endpoints, return toterr for each region
static void assign_indices(const Tile &tile, int shapeindex, IntEndpts endpts[NREGIONS_ONE], int prec,
int indices[Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS_ONE])
{
// build list of possibles
Vector3 palette[NREGIONS_ONE][NINDICES];
for (int region = 0; region < NREGIONS_ONE; ++region)
{
generate_palette_quantized(endpts[region], prec, &palette[region][0]);
toterr[region] = 0;
}
Vector3 err;
for (int y = 0; y < tile.size_y; y++)
for (int x = 0; x < tile.size_x; x++)
{
int region = REGION(x,y,shapeindex);
float err, besterr;
besterr = Utils::norm(tile.data[y][x], palette[region][0]);
indices[y][x] = 0;
for (int i = 1; i < NINDICES && besterr > 0; ++i)
{
err = Utils::norm(tile.data[y][x], palette[region][i]);
if (err > besterr) // error increased, so we're done searching
break;
if (err < besterr)
{
besterr = err;
indices[y][x] = i;
}
}
toterr[region] += besterr;
}
}
static float perturb_one(const Vector3 colors[], const float importance[], int np, int ch, int prec, const IntEndpts &old_endpts, IntEndpts &new_endpts,
float old_err, int do_b)
{
// we have the old endpoints: old_endpts
// we have the perturbed endpoints: new_endpts
// we have the temporary endpoints: temp_endpts
IntEndpts temp_endpts;
float min_err = old_err; // start with the best current error
int beststep;
// copy real endpoints so we can perturb them
for (int i=0; i<NCHANNELS; ++i) { temp_endpts.A[i] = new_endpts.A[i] = old_endpts.A[i]; temp_endpts.B[i] = new_endpts.B[i] = old_endpts.B[i]; }
// do a logarithmic search for the best error for this endpoint (which)
for (int step = 1 << (prec-1); step; step >>= 1)
{
bool improved = false;
for (int sign = -1; sign <= 1; sign += 2)
{
if (do_b == 0)
{
temp_endpts.A[ch] = new_endpts.A[ch] + sign * step;
if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec))
continue;
}
else
{
temp_endpts.B[ch] = new_endpts.B[ch] + sign * step;
if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec))
continue;
}
float err = map_colors(colors, importance, np, temp_endpts, prec);
if (err < min_err)
{
improved = true;
min_err = err;
beststep = sign * step;
}
}
// if this was an improvement, move the endpoint and continue search from there
if (improved)
{
if (do_b == 0)
new_endpts.A[ch] += beststep;
else
new_endpts.B[ch] += beststep;
}
}
return min_err;
}
static void optimize_one(const Vector3 colors[], const float importance[], int np, float orig_err, const IntEndpts &orig_endpts, int prec, IntEndpts &opt_endpts)
{
float opt_err = orig_err;
for (int ch = 0; ch < NCHANNELS; ++ch)
{
opt_endpts.A[ch] = orig_endpts.A[ch];
opt_endpts.B[ch] = orig_endpts.B[ch];
}
/*
err0 = perturb(rgb0, delta0)
err1 = perturb(rgb1, delta1)
if (err0 < err1)
if (err0 >= initial_error) break
rgb0 += delta0
next = 1
else
if (err1 >= initial_error) break
rgb1 += delta1
next = 0
initial_err = map()
for (;;)
err = perturb(next ? rgb1:rgb0, delta)
if (err >= initial_err) break
next? rgb1 : rgb0 += delta
initial_err = err
*/
IntEndpts new_a, new_b;
IntEndpts new_endpt;
int do_b;
// now optimize each channel separately
for (int ch = 0; ch < NCHANNELS; ++ch)
{
// figure out which endpoint when perturbed gives the most improvement and start there
// if we just alternate, we can easily end up in a local minima
float err0 = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_a, opt_err, 0); // perturb endpt A
float err1 = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_b, opt_err, 1); // perturb endpt B
if (err0 < err1)
{
if (err0 >= opt_err)
continue;
opt_endpts.A[ch] = new_a.A[ch];
opt_err = err0;
do_b = 1; // do B next
}
else
{
if (err1 >= opt_err)
continue;
opt_endpts.B[ch] = new_b.B[ch];
opt_err = err1;
do_b = 0; // do A next
}
// now alternate endpoints and keep trying until there is no improvement
for (;;)
{
float err = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_endpt, opt_err, do_b);
if (err >= opt_err)
break;
if (do_b == 0)
opt_endpts.A[ch] = new_endpt.A[ch];
else
opt_endpts.B[ch] = new_endpt.B[ch];
opt_err = err;
do_b = 1 - do_b; // now move the other endpoint
}
}
}
static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_err[NREGIONS_ONE],
const IntEndpts orig_endpts[NREGIONS_ONE], int prec, IntEndpts opt_endpts[NREGIONS_ONE])
{
Vector3 pixels[Tile::TILE_TOTAL];
float importance[Tile::TILE_TOTAL];
float err = 0;
for (int region=0; region<NREGIONS_ONE; ++region)
{
// collect the pixels in the region
int np = 0;
for (int y = 0; y < tile.size_y; y++) {
for (int x = 0; x < tile.size_x; x++) {
if (REGION(x, y, shapeindex) == region) {
pixels[np] = tile.data[y][x];
importance[np] = tile.importance_map[y][x];
++np;
}
}
}
optimize_one(pixels, importance, np, orig_err[region], orig_endpts[region], prec, opt_endpts[region]);
}
}
/* optimization algorithm
for each pattern
convert endpoints using pattern precision
assign indices and get initial error
compress indices (and possibly reorder endpoints)
transform endpoints
if transformed endpoints fit pattern
get original endpoints back
optimize endpoints, get new endpoints, new indices, and new error // new error will almost always be better
compress new indices
transform new endpoints
if new endpoints fit pattern AND if error is improved
emit compressed block with new data
else
emit compressed block with original data // to try to preserve maximum endpoint precision
*/
float ZOH::refineone(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS_ONE], char *block)
{
float orig_err[NREGIONS_ONE], opt_err[NREGIONS_ONE], orig_toterr, opt_toterr;
IntEndpts orig_endpts[NREGIONS_ONE], opt_endpts[NREGIONS_ONE];
ComprEndpts compr_orig[NREGIONS_ONE], compr_opt[NREGIONS_ONE];
int orig_indices[Tile::TILE_H][Tile::TILE_W], opt_indices[Tile::TILE_H][Tile::TILE_W];
for (int sp = 0; sp < NPATTERNS; ++sp)
{
// precisions for all channels need to be the same
for (int i=1; i<NCHANNELS; ++i) nvDebugCheck (patterns[sp].chan[0].prec[0] == patterns[sp].chan[i].prec[0]);
quantize_endpts(endpts, patterns[sp].chan[0].prec[0], orig_endpts);
assign_indices(tile, shapeindex_best, orig_endpts, patterns[sp].chan[0].prec[0], orig_indices, orig_err);
swap_indices(orig_endpts, orig_indices, shapeindex_best);
compress_endpts(orig_endpts, compr_orig, patterns[sp]);
if (endpts_fit(orig_endpts, compr_orig, patterns[sp]))
{
optimize_endpts(tile, shapeindex_best, orig_err, orig_endpts, patterns[sp].chan[0].prec[0], opt_endpts);
assign_indices(tile, shapeindex_best, opt_endpts, patterns[sp].chan[0].prec[0], opt_indices, opt_err);
swap_indices(opt_endpts, opt_indices, shapeindex_best);
compress_endpts(opt_endpts, compr_opt, patterns[sp]);
orig_toterr = opt_toterr = 0;
for (int i=0; i < NREGIONS_ONE; ++i) { orig_toterr += orig_err[i]; opt_toterr += opt_err[i]; }
if (endpts_fit(opt_endpts, compr_opt, patterns[sp]) && opt_toterr < orig_toterr)
{
emit_block(compr_opt, shapeindex_best, patterns[sp], opt_indices, block);
return opt_toterr;
}
else
{
// either it stopped fitting when we optimized it, or there was no improvement
// so go back to the unoptimized endpoints which we know will fit
emit_block(compr_orig, shapeindex_best, patterns[sp], orig_indices, block);
return orig_toterr;
}
}
}
nvAssert (false); // "No candidate found, should never happen (refineone.)";
return FLT_MAX;
}
static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS_ONE], Vector3 palette[NREGIONS_ONE][NINDICES])
{
for (int region = 0; region < NREGIONS_ONE; ++region)
for (int i = 0; i < NINDICES; ++i)
palette[region][i] = Utils::lerp(endpts[region].A, endpts[region].B, i, DENOM);
}
// generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined
static float map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS_ONE])
{
// build list of possibles
Vector3 palette[NREGIONS_ONE][NINDICES];
generate_palette_unquantized(endpts, palette);
float toterr = 0;
Vector3 err;
for (int y = 0; y < tile.size_y; y++)
for (int x = 0; x < tile.size_x; x++)
{
int region = REGION(x,y,shapeindex);
float err, besterr;
besterr = Utils::norm(tile.data[y][x], palette[region][0]) * tile.importance_map[y][x];
for (int i = 1; i < NINDICES && besterr > 0; ++i)
{
err = Utils::norm(tile.data[y][x], palette[region][i]) * tile.importance_map[y][x];
if (err > besterr) // error increased, so we're done searching
break;
if (err < besterr)
besterr = err;
}
toterr += besterr;
}
return toterr;
}
float ZOH::roughone(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS_ONE])
{
for (int region=0; region<NREGIONS_ONE; ++region)
{
int np = 0;
Vector3 colors[Tile::TILE_TOTAL];
Vector3 mean(0,0,0);
for (int y = 0; y < tile.size_y; y++) {
for (int x = 0; x < tile.size_x; x++) {
if (REGION(x,y,shapeindex) == region)
{
colors[np] = tile.data[y][x];
mean += tile.data[y][x];
++np;
}
}
}
// handle simple cases
if (np == 0)
{
Vector3 zero(0,0,0);
endpts[region].A = zero;
endpts[region].B = zero;
continue;
}
else if (np == 1)
{
endpts[region].A = colors[0];
endpts[region].B = colors[0];
continue;
}
else if (np == 2)
{
endpts[region].A = colors[0];
endpts[region].B = colors[1];
continue;
}
mean /= float(np);
Vector3 direction = Fit::computePrincipalComponent_EigenSolver(np, colors);
// project each pixel value along the principal direction
float minp = FLT_MAX, maxp = -FLT_MAX;
for (int i = 0; i < np; i++)
{
float dp = dot(colors[i]-mean, direction);
if (dp < minp) minp = dp;
if (dp > maxp) maxp = dp;
}
// choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values
endpts[region].A = mean + minp*direction;
endpts[region].B = mean + maxp*direction;
// clamp endpoints
// the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best
// shape based on endpoints being clamped
Utils::clamp(endpts[region].A);
Utils::clamp(endpts[region].B);
}
return map_colors(tile, shapeindex, endpts);
}
float ZOH::compressone(const Tile &t, char *block)
{
int shapeindex_best = 0;
FltEndpts endptsbest[NREGIONS_ONE], tempendpts[NREGIONS_ONE];
float msebest = FLT_MAX;
/*
collect the mse values that are within 5% of the best values
optimize each one and choose the best
*/
// hack for now -- just use the best value WORK
for (int i=0; i<NSHAPES && msebest>0.0; ++i)
{
float mse = roughone(t, i, tempendpts);
if (mse < msebest)
{
msebest = mse;
shapeindex_best = i;
memcpy(endptsbest, tempendpts, sizeof(endptsbest));
}
}
return refineone(t, shapeindex_best, endptsbest, block);
}

883
3rdparty/nvtt/bc6h/zohtwo.cpp vendored Normal file
View file

@ -0,0 +1,883 @@
/*
Copyright 2007 nVidia, Inc.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License.
*/
// two regions zoh compress/decompress code
// Thanks to Jacob Munkberg (jacob@cs.lth.se) for the shortcut of using SVD to do the equivalent of principal components analysis
/* optimization algorithm
get initial float endpoints
convert endpoints using 16 bit precision, transform, and get bit delta. choose likely endpoint compression candidates.
note that there will be 1 or 2 candidates; 2 will be chosen when the delta values are close to the max possible.
for each EC candidate in order from max precision to smaller precision
convert endpoints using the appropriate precision.
optimize the endpoints and minimize square error. save the error and index assignments. apply index compression as well.
(thus the endpoints and indices are in final form.)
transform and get bit delta.
if the bit delta fits, exit
if we ended up with no candidates somehow, choose the tail set of EC candidates and retry. this should happen hardly ever.
add a state variable to nvDebugCheck we only do this once.
convert to bit stream.
return the error.
Global optimization
order all tiles based on their errors
do something special for high-error tiles
the goal here is to try to avoid tiling artifacts. but I think this is a research problem. let's just generate an error image...
display an image that shows partitioning and precision selected for each tile
*/
#include "bits.h"
#include "tile.h"
#include "zoh.h"
#include "zoh_utils.h"
#include "nvmath/Fitting.h"
#include "nvmath/Vector.inl"
#include <string.h> // strlen
#include <float.h> // FLT_MAX
using namespace nv;
using namespace ZOH;
#define NINDICES 8
#define INDEXBITS 3
#define HIGH_INDEXBIT (1<<(INDEXBITS-1))
#define DENOM (NINDICES-1)
// WORK: determine optimal traversal pattern to search for best shape -- what does the error curve look like?
// i.e. can we search shapes in a particular order so we can see the global error minima easily and
// stop without having to touch all shapes?
#include "shapes_two.h"
// use only the first 32 available shapes
#undef NSHAPES
#undef SHAPEBITS
#define NSHAPES 32
#define SHAPEBITS 5
#define POS_TO_X(pos) ((pos)&3)
#define POS_TO_Y(pos) (((pos)>>2)&3)
#define NDELTA 4
struct Chanpat
{
int prec[NDELTA]; // precision pattern for one channel
};
struct Pattern
{
Chanpat chan[NCHANNELS]; // allow different bit patterns per channel -- but we still want constant precision per channel
int transformed; // if 0, deltas are unsigned and no transform; otherwise, signed and transformed
int mode; // associated mode value
int modebits; // number of mode bits
const char *encoding; // verilog description of encoding for this mode
};
#define MAXMODEBITS 5
#define MAXMODES (1<<MAXMODEBITS)
#define NPATTERNS 10
static const Pattern patterns[NPATTERNS] =
{
11,5,5,5, 11,4,4,4, 11,4,4,4, 1, 0x02, 5, "d[4:0],bz[3],rz[4:0],bz[2],ry[4:0],by[3:0],bz[1],bw[10],bx[3:0],gz[3:0],bz[0],gw[10],gx[3:0],gy[3:0],rw[10],rx[4:0],bw[9:0],gw[9:0],rw[9:0],m[4:0]",
11,4,4,4, 11,5,5,5, 11,4,4,4, 1, 0x06, 5, "d[4:0],bz[3],gy[4],rz[3:0],bz[2],bz[0],ry[3:0],by[3:0],bz[1],bw[10],bx[3:0],gz[3:0],gw[10],gx[4:0],gy[3:0],gz[4],rw[10],rx[3:0],bw[9:0],gw[9:0],rw[9:0],m[4:0]",
11,4,4,4, 11,4,4,4, 11,5,5,5, 1, 0x0a, 5, "d[4:0],bz[3],bz[4],rz[3:0],bz[2:1],ry[3:0],by[3:0],bw[10],bx[4:0],gz[3:0],bz[0],gw[10],gx[3:0],gy[3:0],by[4],rw[10],rx[3:0],bw[9:0],gw[9:0],rw[9:0],m[4:0]",
10,5,5,5, 10,5,5,5, 10,5,5,5, 1, 0x00, 2, "d[4:0],bz[3],rz[4:0],bz[2],ry[4:0],by[3:0],bz[1],bx[4:0],gz[3:0],bz[0],gx[4:0],gy[3:0],gz[4],rx[4:0],bw[9:0],gw[9:0],rw[9:0],bz[4],by[4],gy[4],m[1:0]",
9,5,5,5, 9,5,5,5, 9,5,5,5, 1, 0x0e, 5, "d[4:0],bz[3],rz[4:0],bz[2],ry[4:0],by[3:0],bz[1],bx[4:0],gz[3:0],bz[0],gx[4:0],gy[3:0],gz[4],rx[4:0],bz[4],bw[8:0],gy[4],gw[8:0],by[4],rw[8:0],m[4:0]",
8,6,6,6, 8,5,5,5, 8,5,5,5, 1, 0x12, 5, "d[4:0],rz[5:0],ry[5:0],by[3:0],bz[1],bx[4:0],gz[3:0],bz[0],gx[4:0],gy[3:0],rx[5:0],bz[4:3],bw[7:0],gy[4],bz[2],gw[7:0],by[4],gz[4],rw[7:0],m[4:0]",
8,5,5,5, 8,6,6,6, 8,5,5,5, 1, 0x16, 5, "d[4:0],bz[3],rz[4:0],bz[2],ry[4:0],by[3:0],bz[1],bx[4:0],gz[3:0],gx[5:0],gy[3:0],gz[4],rx[4:0],bz[4],gz[5],bw[7:0],gy[4],gy[5],gw[7:0],by[4],bz[0],rw[7:0],m[4:0]",
8,5,5,5, 8,5,5,5, 8,6,6,6, 1, 0x1a, 5, "d[4:0],bz[3],rz[4:0],bz[2],ry[4:0],by[3:0],bx[5:0],gz[3:0],bz[0],gx[4:0],gy[3:0],gz[4],rx[4:0],bz[4],bz[5],bw[7:0],gy[4],by[5],gw[7:0],by[4],bz[1],rw[7:0],m[4:0]",
7,6,6,6, 7,6,6,6, 7,6,6,6, 1, 0x01, 2, "d[4:0],rz[5:0],ry[5:0],by[3:0],bx[5:0],gz[3:0],gx[5:0],gy[3:0],rx[5:0],bz[4],bz[5],bz[3],bw[6:0],gy[4],bz[2],by[5],gw[6:0],by[4],bz[1:0],rw[6:0],gz[5:4],gy[5],m[1:0]",
6,6,6,6, 6,6,6,6, 6,6,6,6, 0, 0x1e, 5, "d[4:0],rz[5:0],ry[5:0],by[3:0],bx[5:0],gz[3:0],gx[5:0],gy[3:0],rx[5:0],bz[4],bz[5],bz[3],gz[5],bw[5:0],gy[4],bz[2],by[5],gy[5],gw[5:0],by[4],bz[1:0],gz[4],rw[5:0],m[4:0]",
};
// mapping of mode to the corresponding index in pattern
// UNUSED ZOH MODES are 0x13, 0x17, 0x1b, 0x1f -- return -2 for these
static const int mode_to_pat[MAXMODES] = {
3, // 0x00
8, // 0x01
0, // 0x02
-1,-1,-1,
1, // 0x06
-1,-1,-1,
2, // 0x0a
-1,-1,-1,
4, // 0x0e
-1,-1,-1,
5, // 0x12
-2,-1,-1,
6, // 0x16
-2,-1,-1,
7, // 0x1a
-2,-1,-1,
9, // 0x1e
-2
};
#define R_0(ep) (ep)[0].A[i]
#define R_1(ep) (ep)[0].B[i]
#define R_2(ep) (ep)[1].A[i]
#define R_3(ep) (ep)[1].B[i]
#define MASK(n) ((1<<(n))-1)
// compress endpoints
static void compress_endpts(const IntEndpts in[NREGIONS_TWO], ComprEndpts out[NREGIONS_TWO], const Pattern &p)
{
if (p.transformed)
{
for (int i=0; i<NCHANNELS; ++i)
{
R_0(out) = R_0(in) & MASK(p.chan[i].prec[0]);
R_1(out) = (R_1(in) - R_0(in)) & MASK(p.chan[i].prec[1]);
R_2(out) = (R_2(in) - R_0(in)) & MASK(p.chan[i].prec[2]);
R_3(out) = (R_3(in) - R_0(in)) & MASK(p.chan[i].prec[3]);
}
}
else
{
for (int i=0; i<NCHANNELS; ++i)
{
R_0(out) = R_0(in) & MASK(p.chan[i].prec[0]);
R_1(out) = R_1(in) & MASK(p.chan[i].prec[1]);
R_2(out) = R_2(in) & MASK(p.chan[i].prec[2]);
R_3(out) = R_3(in) & MASK(p.chan[i].prec[3]);
}
}
}
// decompress endpoints
static void decompress_endpts(const ComprEndpts in[NREGIONS_TWO], IntEndpts out[NREGIONS_TWO], const Pattern &p)
{
bool issigned = Utils::FORMAT == SIGNED_F16;
if (p.transformed)
{
for (int i=0; i<NCHANNELS; ++i)
{
R_0(out) = issigned ? SIGN_EXTEND(R_0(in),p.chan[i].prec[0]) : R_0(in);
int t;
t = SIGN_EXTEND(R_1(in), p.chan[i].prec[1]);
t = (t + R_0(in)) & MASK(p.chan[i].prec[0]);
R_1(out) = issigned ? SIGN_EXTEND(t,p.chan[i].prec[0]) : t;
t = SIGN_EXTEND(R_2(in), p.chan[i].prec[2]);
t = (t + R_0(in)) & MASK(p.chan[i].prec[0]);
R_2(out) = issigned ? SIGN_EXTEND(t,p.chan[i].prec[0]) : t;
t = SIGN_EXTEND(R_3(in), p.chan[i].prec[3]);
t = (t + R_0(in)) & MASK(p.chan[i].prec[0]);
R_3(out) = issigned ? SIGN_EXTEND(t,p.chan[i].prec[0]) : t;
}
}
else
{
for (int i=0; i<NCHANNELS; ++i)
{
R_0(out) = issigned ? SIGN_EXTEND(R_0(in),p.chan[i].prec[0]) : R_0(in);
R_1(out) = issigned ? SIGN_EXTEND(R_1(in),p.chan[i].prec[1]) : R_1(in);
R_2(out) = issigned ? SIGN_EXTEND(R_2(in),p.chan[i].prec[2]) : R_2(in);
R_3(out) = issigned ? SIGN_EXTEND(R_3(in),p.chan[i].prec[3]) : R_3(in);
}
}
}
static void quantize_endpts(const FltEndpts endpts[NREGIONS_TWO], int prec, IntEndpts q_endpts[NREGIONS_TWO])
{
for (int region = 0; region < NREGIONS_TWO; ++region)
{
q_endpts[region].A[0] = Utils::quantize(endpts[region].A.x, prec);
q_endpts[region].A[1] = Utils::quantize(endpts[region].A.y, prec);
q_endpts[region].A[2] = Utils::quantize(endpts[region].A.z, prec);
q_endpts[region].B[0] = Utils::quantize(endpts[region].B.x, prec);
q_endpts[region].B[1] = Utils::quantize(endpts[region].B.y, prec);
q_endpts[region].B[2] = Utils::quantize(endpts[region].B.z, prec);
}
}
// swap endpoints as needed to ensure that the indices at index_positions have a 0 high-order bit
static void swap_indices(IntEndpts endpts[NREGIONS_TWO], int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex)
{
for (int region = 0; region < NREGIONS_TWO; ++region)
{
int position = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,region);
int x = POS_TO_X(position);
int y = POS_TO_Y(position);
nvDebugCheck(REGION(x,y,shapeindex) == region); // double check the table
if (indices[y][x] & HIGH_INDEXBIT)
{
// high bit is set, swap the endpts and indices for this region
int t;
for (int i=0; i<NCHANNELS; ++i)
{
t = endpts[region].A[i]; endpts[region].A[i] = endpts[region].B[i]; endpts[region].B[i] = t;
}
for (int y = 0; y < Tile::TILE_H; y++)
for (int x = 0; x < Tile::TILE_W; x++)
if (REGION(x,y,shapeindex) == region)
indices[y][x] = NINDICES - 1 - indices[y][x];
}
}
}
// endpoints fit only if the compression was lossless
static bool endpts_fit(const IntEndpts orig[NREGIONS_TWO], const ComprEndpts compressed[NREGIONS_TWO], const Pattern &p)
{
IntEndpts uncompressed[NREGIONS_TWO];
decompress_endpts(compressed, uncompressed, p);
for (int j=0; j<NREGIONS_TWO; ++j)
{
for (int i=0; i<NCHANNELS; ++i)
{
if (orig[j].A[i] != uncompressed[j].A[i]) return false;
if (orig[j].B[i] != uncompressed[j].B[i]) return false;
}
}
return true;
}
static void write_header(const ComprEndpts endpts[NREGIONS_TWO], int shapeindex, const Pattern &p, Bits &out)
{
// interpret the verilog backwards and process it
int m = p.mode;
int d = shapeindex;
int rw = endpts[0].A[0], rx = endpts[0].B[0], ry = endpts[1].A[0], rz = endpts[1].B[0];
int gw = endpts[0].A[1], gx = endpts[0].B[1], gy = endpts[1].A[1], gz = endpts[1].B[1];
int bw = endpts[0].A[2], bx = endpts[0].B[2], by = endpts[1].A[2], bz = endpts[1].B[2];
int ptr = int(strlen(p.encoding));
while (ptr)
{
Field field;
int endbit, len;
// !!!UNDONE: get rid of string parsing!!!
Utils::parse(p.encoding, ptr, field, endbit, len);
switch(field)
{
case FIELD_M: out.write( m >> endbit, len); break;
case FIELD_D: out.write( d >> endbit, len); break;
case FIELD_RW: out.write(rw >> endbit, len); break;
case FIELD_RX: out.write(rx >> endbit, len); break;
case FIELD_RY: out.write(ry >> endbit, len); break;
case FIELD_RZ: out.write(rz >> endbit, len); break;
case FIELD_GW: out.write(gw >> endbit, len); break;
case FIELD_GX: out.write(gx >> endbit, len); break;
case FIELD_GY: out.write(gy >> endbit, len); break;
case FIELD_GZ: out.write(gz >> endbit, len); break;
case FIELD_BW: out.write(bw >> endbit, len); break;
case FIELD_BX: out.write(bx >> endbit, len); break;
case FIELD_BY: out.write(by >> endbit, len); break;
case FIELD_BZ: out.write(bz >> endbit, len); break;
default: nvUnreachable();
}
}
}
static bool read_header(Bits &in, ComprEndpts endpts[NREGIONS_TWO], int &shapeindex, Pattern &p)
{
// reading isn't quite symmetric with writing -- we don't know the encoding until we decode the mode
int mode = in.read(2);
if (mode != 0x00 && mode != 0x01)
mode = (in.read(3) << 2) | mode;
int pat_index = mode_to_pat[mode];
if (pat_index == -2)
return false; // reserved mode found
nvDebugCheck (pat_index >= 0 && pat_index < NPATTERNS);
nvDebugCheck (in.getptr() == patterns[pat_index].modebits);
p = patterns[pat_index];
int d;
int rw, rx, ry, rz;
int gw, gx, gy, gz;
int bw, bx, by, bz;
d = 0;
rw = rx = ry = rz = 0;
gw = gx = gy = gz = 0;
bw = bx = by = bz = 0;
int ptr = int(strlen(p.encoding));
while (ptr)
{
Field field;
int endbit, len;
// !!!UNDONE: get rid of string parsing!!!
Utils::parse(p.encoding, ptr, field, endbit, len);
switch(field)
{
case FIELD_M: break; // already processed so ignore
case FIELD_D: d |= in.read(len) << endbit; break;
case FIELD_RW: rw |= in.read(len) << endbit; break;
case FIELD_RX: rx |= in.read(len) << endbit; break;
case FIELD_RY: ry |= in.read(len) << endbit; break;
case FIELD_RZ: rz |= in.read(len) << endbit; break;
case FIELD_GW: gw |= in.read(len) << endbit; break;
case FIELD_GX: gx |= in.read(len) << endbit; break;
case FIELD_GY: gy |= in.read(len) << endbit; break;
case FIELD_GZ: gz |= in.read(len) << endbit; break;
case FIELD_BW: bw |= in.read(len) << endbit; break;
case FIELD_BX: bx |= in.read(len) << endbit; break;
case FIELD_BY: by |= in.read(len) << endbit; break;
case FIELD_BZ: bz |= in.read(len) << endbit; break;
default: nvUnreachable();
}
}
nvDebugCheck (in.getptr() == 128 - 46);
shapeindex = d;
endpts[0].A[0] = rw; endpts[0].B[0] = rx; endpts[1].A[0] = ry; endpts[1].B[0] = rz;
endpts[0].A[1] = gw; endpts[0].B[1] = gx; endpts[1].A[1] = gy; endpts[1].B[1] = gz;
endpts[0].A[2] = bw; endpts[0].B[2] = bx; endpts[1].A[2] = by; endpts[1].B[2] = bz;
return true;
}
static void write_indices(const int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex, Bits &out)
{
int positions[NREGIONS_TWO];
for (int r = 0; r < NREGIONS_TWO; ++r)
positions[r] = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,r);
for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
{
int x = POS_TO_X(pos);
int y = POS_TO_Y(pos);
bool match = false;
for (int r = 0; r < NREGIONS_TWO; ++r)
if (positions[r] == pos) { match = true; break; }
out.write(indices[y][x], INDEXBITS - (match ? 1 : 0));
}
}
static void emit_block(const ComprEndpts compr_endpts[NREGIONS_TWO], int shapeindex, const Pattern &p, const int indices[Tile::TILE_H][Tile::TILE_W], char *block)
{
Bits out(block, ZOH::BITSIZE);
write_header(compr_endpts, shapeindex, p, out);
write_indices(indices, shapeindex, out);
nvDebugCheck(out.getptr() == ZOH::BITSIZE);
}
static void generate_palette_quantized(const IntEndpts &endpts, int prec, Vector3 palette[NINDICES])
{
// scale endpoints
int a, b; // really need a IntVector3...
a = Utils::unquantize(endpts.A[0], prec);
b = Utils::unquantize(endpts.B[0], prec);
// interpolate
for (int i = 0; i < NINDICES; ++i)
palette[i].x = float(Utils::finish_unquantize(Utils::lerp(a, b, i, DENOM), prec));
a = Utils::unquantize(endpts.A[1], prec);
b = Utils::unquantize(endpts.B[1], prec);
// interpolate
for (int i = 0; i < NINDICES; ++i)
palette[i].y = float(Utils::finish_unquantize(Utils::lerp(a, b, i, DENOM), prec));
a = Utils::unquantize(endpts.A[2], prec);
b = Utils::unquantize(endpts.B[2], prec);
// interpolate
for (int i = 0; i < NINDICES; ++i)
palette[i].z = float(Utils::finish_unquantize(Utils::lerp(a, b, i, DENOM), prec));
}
static void read_indices(Bits &in, int shapeindex, int indices[Tile::TILE_H][Tile::TILE_W])
{
int positions[NREGIONS_TWO];
for (int r = 0; r < NREGIONS_TWO; ++r)
positions[r] = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,r);
for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
{
int x = POS_TO_X(pos);
int y = POS_TO_Y(pos);
bool match = false;
for (int r = 0; r < NREGIONS_TWO; ++r)
if (positions[r] == pos) { match = true; break; }
indices[y][x]= in.read(INDEXBITS - (match ? 1 : 0));
}
}
void ZOH::decompresstwo(const char *block, Tile &t)
{
Bits in(block, ZOH::BITSIZE);
Pattern p;
IntEndpts endpts[NREGIONS_TWO];
ComprEndpts compr_endpts[NREGIONS_TWO];
int shapeindex;
if (!read_header(in, compr_endpts, shapeindex, p))
{
// reserved mode, return all zeroes
for (int y = 0; y < Tile::TILE_H; y++)
for (int x = 0; x < Tile::TILE_W; x++)
t.data[y][x] = Vector3(0.0f);
return;
}
decompress_endpts(compr_endpts, endpts, p);
Vector3 palette[NREGIONS_TWO][NINDICES];
for (int r = 0; r < NREGIONS_TWO; ++r)
generate_palette_quantized(endpts[r], p.chan[0].prec[0], &palette[r][0]);
int indices[Tile::TILE_H][Tile::TILE_W];
read_indices(in, shapeindex, indices);
nvDebugCheck(in.getptr() == ZOH::BITSIZE);
// lookup
for (int y = 0; y < Tile::TILE_H; y++)
for (int x = 0; x < Tile::TILE_W; x++)
t.data[y][x] = palette[REGION(x,y,shapeindex)][indices[y][x]];
}
// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
static float map_colors(const Vector3 colors[], const float importance[], int np, const IntEndpts &endpts, int prec)
{
Vector3 palette[NINDICES];
float toterr = 0;
Vector3 err;
generate_palette_quantized(endpts, prec, palette);
for (int i = 0; i < np; ++i)
{
float err, besterr;
besterr = Utils::norm(colors[i], palette[0]) * importance[i];
for (int j = 1; j < NINDICES && besterr > 0; ++j)
{
err = Utils::norm(colors[i], palette[j]) * importance[i];
if (err > besterr) // error increased, so we're done searching
break;
if (err < besterr)
besterr = err;
}
toterr += besterr;
}
return toterr;
}
// assign indices given a tile, shape, and quantized endpoints, return toterr for each region
static void assign_indices(const Tile &tile, int shapeindex, IntEndpts endpts[NREGIONS_TWO], int prec,
int indices[Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS_TWO])
{
// build list of possibles
Vector3 palette[NREGIONS_TWO][NINDICES];
for (int region = 0; region < NREGIONS_TWO; ++region)
{
generate_palette_quantized(endpts[region], prec, &palette[region][0]);
toterr[region] = 0;
}
Vector3 err;
for (int y = 0; y < tile.size_y; y++)
for (int x = 0; x < tile.size_x; x++)
{
int region = REGION(x,y,shapeindex);
float err, besterr;
besterr = Utils::norm(tile.data[y][x], palette[region][0]);
indices[y][x] = 0;
for (int i = 1; i < NINDICES && besterr > 0; ++i)
{
err = Utils::norm(tile.data[y][x], palette[region][i]);
if (err > besterr) // error increased, so we're done searching
break;
if (err < besterr)
{
besterr = err;
indices[y][x] = i;
}
}
toterr[region] += besterr;
}
}
static float perturb_one(const Vector3 colors[], const float importance[], int np, int ch, int prec, const IntEndpts &old_endpts, IntEndpts &new_endpts,
float old_err, int do_b)
{
// we have the old endpoints: old_endpts
// we have the perturbed endpoints: new_endpts
// we have the temporary endpoints: temp_endpts
IntEndpts temp_endpts;
float min_err = old_err; // start with the best current error
int beststep;
// copy real endpoints so we can perturb them
for (int i=0; i<NCHANNELS; ++i) { temp_endpts.A[i] = new_endpts.A[i] = old_endpts.A[i]; temp_endpts.B[i] = new_endpts.B[i] = old_endpts.B[i]; }
// do a logarithmic search for the best error for this endpoint (which)
for (int step = 1 << (prec-1); step; step >>= 1)
{
bool improved = false;
for (int sign = -1; sign <= 1; sign += 2)
{
if (do_b == 0)
{
temp_endpts.A[ch] = new_endpts.A[ch] + sign * step;
if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec))
continue;
}
else
{
temp_endpts.B[ch] = new_endpts.B[ch] + sign * step;
if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec))
continue;
}
float err = map_colors(colors, importance, np, temp_endpts, prec);
if (err < min_err)
{
improved = true;
min_err = err;
beststep = sign * step;
}
}
// if this was an improvement, move the endpoint and continue search from there
if (improved)
{
if (do_b == 0)
new_endpts.A[ch] += beststep;
else
new_endpts.B[ch] += beststep;
}
}
return min_err;
}
static void optimize_one(const Vector3 colors[], const float importance[], int np, float orig_err, const IntEndpts &orig_endpts, int prec, IntEndpts &opt_endpts)
{
float opt_err = orig_err;
for (int ch = 0; ch < NCHANNELS; ++ch)
{
opt_endpts.A[ch] = orig_endpts.A[ch];
opt_endpts.B[ch] = orig_endpts.B[ch];
}
/*
err0 = perturb(rgb0, delta0)
err1 = perturb(rgb1, delta1)
if (err0 < err1)
if (err0 >= initial_error) break
rgb0 += delta0
next = 1
else
if (err1 >= initial_error) break
rgb1 += delta1
next = 0
initial_err = map()
for (;;)
err = perturb(next ? rgb1:rgb0, delta)
if (err >= initial_err) break
next? rgb1 : rgb0 += delta
initial_err = err
*/
IntEndpts new_a, new_b;
IntEndpts new_endpt;
int do_b;
// now optimize each channel separately
for (int ch = 0; ch < NCHANNELS; ++ch)
{
// figure out which endpoint when perturbed gives the most improvement and start there
// if we just alternate, we can easily end up in a local minima
float err0 = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_a, opt_err, 0); // perturb endpt A
float err1 = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_b, opt_err, 1); // perturb endpt B
if (err0 < err1)
{
if (err0 >= opt_err)
continue;
opt_endpts.A[ch] = new_a.A[ch];
opt_err = err0;
do_b = 1; // do B next
}
else
{
if (err1 >= opt_err)
continue;
opt_endpts.B[ch] = new_b.B[ch];
opt_err = err1;
do_b = 0; // do A next
}
// now alternate endpoints and keep trying until there is no improvement
for (;;)
{
float err = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_endpt, opt_err, do_b);
if (err >= opt_err)
break;
if (do_b == 0)
opt_endpts.A[ch] = new_endpt.A[ch];
else
opt_endpts.B[ch] = new_endpt.B[ch];
opt_err = err;
do_b = 1 - do_b; // now move the other endpoint
}
}
}
static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_err[NREGIONS_TWO],
const IntEndpts orig_endpts[NREGIONS_TWO], int prec, IntEndpts opt_endpts[NREGIONS_TWO])
{
Vector3 pixels[Tile::TILE_TOTAL];
float importance[Tile::TILE_TOTAL];
float err = 0;
for (int region=0; region<NREGIONS_TWO; ++region)
{
// collect the pixels in the region
int np = 0;
for (int y = 0; y < tile.size_y; y++)
for (int x = 0; x < tile.size_x; x++)
if (REGION(x,y,shapeindex) == region)
{
pixels[np] = tile.data[y][x];
importance[np] = tile.importance_map[y][x];
++np;
}
optimize_one(pixels, importance, np, orig_err[region], orig_endpts[region], prec, opt_endpts[region]);
}
}
/* optimization algorithm
for each pattern
convert endpoints using pattern precision
assign indices and get initial error
compress indices (and possibly reorder endpoints)
transform endpoints
if transformed endpoints fit pattern
get original endpoints back
optimize endpoints, get new endpoints, new indices, and new error // new error will almost always be better
compress new indices
transform new endpoints
if new endpoints fit pattern AND if error is improved
emit compressed block with new data
else
emit compressed block with original data // to try to preserve maximum endpoint precision
*/
float ZOH::refinetwo(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS_TWO], char *block)
{
float orig_err[NREGIONS_TWO], opt_err[NREGIONS_TWO], orig_toterr, opt_toterr;
IntEndpts orig_endpts[NREGIONS_TWO], opt_endpts[NREGIONS_TWO];
ComprEndpts compr_orig[NREGIONS_TWO], compr_opt[NREGIONS_TWO];
int orig_indices[Tile::TILE_H][Tile::TILE_W], opt_indices[Tile::TILE_H][Tile::TILE_W];
for (int sp = 0; sp < NPATTERNS; ++sp)
{
// precisions for all channels need to be the same
for (int i=1; i<NCHANNELS; ++i) nvDebugCheck (patterns[sp].chan[0].prec[0] == patterns[sp].chan[i].prec[0]);
quantize_endpts(endpts, patterns[sp].chan[0].prec[0], orig_endpts);
assign_indices(tile, shapeindex_best, orig_endpts, patterns[sp].chan[0].prec[0], orig_indices, orig_err);
swap_indices(orig_endpts, orig_indices, shapeindex_best);
compress_endpts(orig_endpts, compr_orig, patterns[sp]);
if (endpts_fit(orig_endpts, compr_orig, patterns[sp]))
{
optimize_endpts(tile, shapeindex_best, orig_err, orig_endpts, patterns[sp].chan[0].prec[0], opt_endpts);
assign_indices(tile, shapeindex_best, opt_endpts, patterns[sp].chan[0].prec[0], opt_indices, opt_err);
swap_indices(opt_endpts, opt_indices, shapeindex_best);
compress_endpts(opt_endpts, compr_opt, patterns[sp]);
orig_toterr = opt_toterr = 0;
for (int i=0; i < NREGIONS_TWO; ++i) { orig_toterr += orig_err[i]; opt_toterr += opt_err[i]; }
if (endpts_fit(opt_endpts, compr_opt, patterns[sp]) && opt_toterr < orig_toterr)
{
emit_block(compr_opt, shapeindex_best, patterns[sp], opt_indices, block);
return opt_toterr;
}
else
{
// either it stopped fitting when we optimized it, or there was no improvement
// so go back to the unoptimized endpoints which we know will fit
emit_block(compr_orig, shapeindex_best, patterns[sp], orig_indices, block);
return orig_toterr;
}
}
}
nvAssert(false); //throw "No candidate found, should never happen (refinetwo.)";
return FLT_MAX;
}
static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS_TWO], Vector3 palette[NREGIONS_TWO][NINDICES])
{
for (int region = 0; region < NREGIONS_TWO; ++region)
for (int i = 0; i < NINDICES; ++i)
palette[region][i] = Utils::lerp(endpts[region].A, endpts[region].B, i, DENOM);
}
// generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined
static float map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS_TWO])
{
// build list of possibles
Vector3 palette[NREGIONS_TWO][NINDICES];
generate_palette_unquantized(endpts, palette);
float toterr = 0;
Vector3 err;
for (int y = 0; y < tile.size_y; y++)
for (int x = 0; x < tile.size_x; x++)
{
int region = REGION(x,y,shapeindex);
float err, besterr;
besterr = Utils::norm(tile.data[y][x], palette[region][0]) * tile.importance_map[y][x];
for (int i = 1; i < NINDICES && besterr > 0; ++i)
{
err = Utils::norm(tile.data[y][x], palette[region][i]) * tile.importance_map[y][x];
if (err > besterr) // error increased, so we're done searching
break;
if (err < besterr)
besterr = err;
}
toterr += besterr;
}
return toterr;
}
float ZOH::roughtwo(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS_TWO])
{
for (int region=0; region<NREGIONS_TWO; ++region)
{
int np = 0;
Vector3 colors[Tile::TILE_TOTAL];
Vector3 mean(0,0,0);
for (int y = 0; y < tile.size_y; y++)
for (int x = 0; x < tile.size_x; x++)
if (REGION(x,y,shapeindex) == region)
{
colors[np] = tile.data[y][x];
mean += tile.data[y][x];
++np;
}
// handle simple cases
if (np == 0)
{
Vector3 zero(0,0,0);
endpts[region].A = zero;
endpts[region].B = zero;
continue;
}
else if (np == 1)
{
endpts[region].A = colors[0];
endpts[region].B = colors[0];
continue;
}
else if (np == 2)
{
endpts[region].A = colors[0];
endpts[region].B = colors[1];
continue;
}
mean /= float(np);
Vector3 direction = Fit::computePrincipalComponent_EigenSolver(np, colors);
// project each pixel value along the principal direction
float minp = FLT_MAX, maxp = -FLT_MAX;
for (int i = 0; i < np; i++)
{
float dp = dot(colors[i]-mean, direction);
if (dp < minp) minp = dp;
if (dp > maxp) maxp = dp;
}
// choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values
endpts[region].A = mean + minp*direction;
endpts[region].B = mean + maxp*direction;
// clamp endpoints
// the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best
// shape based on endpoints being clamped
Utils::clamp(endpts[region].A);
Utils::clamp(endpts[region].B);
}
return map_colors(tile, shapeindex, endpts);
}
float ZOH::compresstwo(const Tile &t, char *block)
{
int shapeindex_best = 0;
FltEndpts endptsbest[NREGIONS_TWO], tempendpts[NREGIONS_TWO];
float msebest = FLT_MAX;
/*
collect the mse values that are within 5% of the best values
optimize each one and choose the best
*/
// hack for now -- just use the best value WORK
for (int i=0; i<NSHAPES && msebest>0.0; ++i)
{
float mse = roughtwo(t, i, tempendpts);
if (mse < msebest)
{
msebest = mse;
shapeindex_best = i;
memcpy(endptsbest, tempendpts, sizeof(endptsbest));
}
}
return refinetwo(t, shapeindex_best, endptsbest, block);
}

264
3rdparty/nvtt/bc7/avpcl.cpp vendored Normal file
View file

@ -0,0 +1,264 @@
/*
Copyright 2007 nVidia, Inc.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License.
*/
// the avpcl compressor and decompressor
#include "tile.h"
#include "avpcl.h"
#include "nvcore/Debug.h"
#include "nvmath/Vector.inl"
#include <cstring>
#include <float.h>
using namespace nv;
using namespace AVPCL;
// global flags
bool AVPCL::flag_premult = false;
bool AVPCL::flag_nonuniform = false;
bool AVPCL::flag_nonuniform_ati = false;
// global mode
bool AVPCL::mode_rgb = false; // true if image had constant alpha = 255
void AVPCL::compress(const Tile &t, char *block)
{
char tempblock[AVPCL::BLOCKSIZE];
float msebest = FLT_MAX;
float mse_mode0 = AVPCL::compress_mode0(t, tempblock); if(mse_mode0 < msebest) { msebest = mse_mode0; memcpy(block, tempblock, AVPCL::BLOCKSIZE); }
float mse_mode1 = AVPCL::compress_mode1(t, tempblock); if(mse_mode1 < msebest) { msebest = mse_mode1; memcpy(block, tempblock, AVPCL::BLOCKSIZE); }
float mse_mode2 = AVPCL::compress_mode2(t, tempblock); if(mse_mode2 < msebest) { msebest = mse_mode2; memcpy(block, tempblock, AVPCL::BLOCKSIZE); }
float mse_mode3 = AVPCL::compress_mode3(t, tempblock); if(mse_mode3 < msebest) { msebest = mse_mode3; memcpy(block, tempblock, AVPCL::BLOCKSIZE); }
float mse_mode4 = AVPCL::compress_mode4(t, tempblock); if(mse_mode4 < msebest) { msebest = mse_mode4; memcpy(block, tempblock, AVPCL::BLOCKSIZE); }
float mse_mode5 = AVPCL::compress_mode5(t, tempblock); if(mse_mode5 < msebest) { msebest = mse_mode5; memcpy(block, tempblock, AVPCL::BLOCKSIZE); }
float mse_mode6 = AVPCL::compress_mode6(t, tempblock); if(mse_mode6 < msebest) { msebest = mse_mode6; memcpy(block, tempblock, AVPCL::BLOCKSIZE); }
float mse_mode7 = AVPCL::compress_mode7(t, tempblock); if(mse_mode7 < msebest) { msebest = mse_mode7; memcpy(block, tempblock, AVPCL::BLOCKSIZE); }
/*if (errfile)
{
float errs[21];
int nerrs = 8;
errs[0] = mse_mode0;
errs[1] = mse_mode1;
errs[2] = mse_mode2;
errs[3] = mse_mode3;
errs[4] = mse_mode4;
errs[5] = mse_mode5;
errs[6] = mse_mode6;
errs[7] = mse_mode7;
if (fwrite(errs, sizeof(float), nerrs, errfile) != nerrs)
throw "Write error on error file";
}*/
}
/*
static int getbit(char *b, int start)
{
if (start < 0 || start >= 128) return 0; // out of range
int ix = start >> 3;
return (b[ix] & (1 << (start & 7))) != 0;
}
static int getbits(char *b, int start, int len)
{
int out = 0;
for (int i=0; i<len; ++i)
out |= getbit(b, start+i) << i;
return out;
}
static void setbit(char *b, int start, int bit)
{
if (start < 0 || start >= 128) return; // out of range
int ix = start >> 3;
if (bit & 1)
b[ix] |= (1 << (start & 7));
else
b[ix] &= ~(1 << (start & 7));
}
static void setbits(char *b, int start, int len, int bits)
{
for (int i=0; i<len; ++i)
setbit(b, start+i, bits >> i);
}
*/
void AVPCL::decompress(const char *cblock, Tile &t)
{
char block[AVPCL::BLOCKSIZE];
memcpy(block, cblock, AVPCL::BLOCKSIZE);
switch(getmode(block))
{
case 0: AVPCL::decompress_mode0(block, t); break;
case 1: AVPCL::decompress_mode1(block, t); break;
case 2: AVPCL::decompress_mode2(block, t); break;
case 3: AVPCL::decompress_mode3(block, t); break;
case 4: AVPCL::decompress_mode4(block, t); break;
case 5: AVPCL::decompress_mode5(block, t); break;
case 6: AVPCL::decompress_mode6(block, t); break;
case 7: AVPCL::decompress_mode7(block, t); break;
case 8: // return a black tile if you get a reserved mode
for (int y=0; y<Tile::TILE_H; ++y)
for (int x=0; x<Tile::TILE_W; ++x)
t.data[y][x].set(0, 0, 0, 0);
break;
default: nvUnreachable();
}
}
/*
void AVPCL::compress(string inf, string avpclf, string errf)
{
Array2D<RGBA> pixels;
int w, h;
char block[AVPCL::BLOCKSIZE];
Targa::read(inf, pixels, w, h);
FILE *avpclfile = fopen(avpclf.c_str(), "wb");
if (avpclfile == NULL) throw "Unable to open .avpcl file for write";
FILE *errfile = NULL;
if (errf != "")
{
errfile = fopen(errf.c_str(), "wb");
if (errfile == NULL) throw "Unable to open error file for write";
}
// Look at alpha channel and override the premult flag if alpha is constant (but only if premult is set)
if (AVPCL::flag_premult)
{
if (AVPCL::mode_rgb)
{
AVPCL::flag_premult = false;
cout << endl << "NOTE: Source image alpha is constant 255, turning off premultiplied-alpha error metric." << endl << endl;
}
}
// stuff for progress bar O.o
int ntiles = ((h+Tile::TILE_H-1)/Tile::TILE_H)*((w+Tile::TILE_W-1)/Tile::TILE_W);
int tilecnt = 0;
clock_t start, prev, cur;
start = prev = clock();
// convert to tiles and compress each tile
for (int y=0; y<h; y+=Tile::TILE_H)
{
int ysize = min(Tile::TILE_H, h-y);
for (int x=0; x<w; x+=Tile::TILE_W)
{
if ((tilecnt%100) == 0) { cur = clock(); printf("Progress %d of %d, %5.2f seconds per 100 tiles\r", tilecnt, ntiles, float(cur-prev)/CLOCKS_PER_SEC); fflush(stdout); prev = cur; }
int xsize = min(Tile::TILE_W, w-x);
Tile t(xsize, ysize);
t.insert(pixels, x, y);
AVPCL::compress(t, block, errfile);
if (fwrite(block, sizeof(char), AVPCL::BLOCKSIZE, avpclfile) != AVPCL::BLOCKSIZE)
throw "File error on write";
// progress bar
++tilecnt;
}
}
cur = clock();
printf("\nTotal time to compress: %.2f seconds\n\n", float(cur-start)/CLOCKS_PER_SEC); // advance to next line finally
if (fclose(avpclfile)) throw "Close failed on .avpcl file";
if (errfile && fclose(errfile)) throw "Close failed on error file";
}
static int str2int(std::string s)
{
int thing;
std::stringstream str (stringstream::in | stringstream::out);
str << s;
str >> thing;
return thing;
}
// avpcl file name is ...-w-h-RGB[A].avpcl, extract width and height
static void extract(string avpclf, int &w, int &h, bool &mode_rgb)
{
size_t n = avpclf.rfind('.', avpclf.length()-1);
size_t n1 = avpclf.rfind('-', n-1);
size_t n2 = avpclf.rfind('-', n1-1);
size_t n3 = avpclf.rfind('-', n2-1);
// ...-wwww-hhhh-RGB[A].avpcl
// ^ ^ ^ ^
// n3 n2 n1 n n3<n2<n1<n
string width = avpclf.substr(n3+1, n2-n3-1);
w = str2int(width);
string height = avpclf.substr(n2+1, n1-n2-1);
h = str2int(height);
string mode = avpclf.substr(n1+1, n-n1-1);
mode_rgb = mode == "RGB";
}
static int modehist[8];
static void stats(char block[AVPCL::BLOCKSIZE])
{
int m = AVPCL::getmode(block);
modehist[m]++;
}
static void printstats()
{
printf("\nMode histogram: "); for (int i=0; i<8; ++i) { printf("%d,", modehist[i]); }
printf("\n");
}
void AVPCL::decompress(string avpclf, string outf)
{
Array2D<RGBA> pixels;
int w, h;
char block[AVPCL::BLOCKSIZE];
extract(avpclf, w, h, AVPCL::mode_rgb);
FILE *avpclfile = fopen(avpclf.c_str(), "rb");
if (avpclfile == NULL) throw "Unable to open .avpcl file for read";
pixels.resizeErase(h, w);
// convert to tiles and decompress each tile
for (int y=0; y<h; y+=Tile::TILE_H)
{
int ysize = min(Tile::TILE_H, h-y);
for (int x=0; x<w; x+=Tile::TILE_W)
{
int xsize = min(Tile::TILE_W, w-x);
Tile t(xsize, ysize);
if (fread(block, sizeof(char), AVPCL::BLOCKSIZE, avpclfile) != AVPCL::BLOCKSIZE)
throw "File error on read";
stats(block); // collect statistics
AVPCL::decompress(block, t);
t.extract(pixels, x, y);
}
}
if (fclose(avpclfile)) throw "Close failed on .avpcl file";
Targa::write(outf, pixels, w, h);
printstats(); // print statistics
}
*/

99
3rdparty/nvtt/bc7/avpcl.h vendored Normal file
View file

@ -0,0 +1,99 @@
/*
Copyright 2007 nVidia, Inc.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License.
*/
#ifndef _AVPCL_H
#define _AVPCL_H
#include "tile.h"
#include "bits.h"
#define DISABLE_EXHAUSTIVE 1 // define this if you don't want to spend a lot of time on exhaustive compression
#define USE_ZOH_INTERP 1 // use zoh interpolator, otherwise use exact avpcl interpolators
#define USE_ZOH_INTERP_ROUNDED 1 // use the rounded versions!
namespace AVPCL {
static const int NREGIONS_TWO = 2;
static const int NREGIONS_THREE = 3;
static const int BLOCKSIZE=16;
static const int BITSIZE=128;
// global flags
extern bool flag_premult;
extern bool flag_nonuniform;
extern bool flag_nonuniform_ati;
// global mode
extern bool mode_rgb; // true if image had constant alpha = 255
void compress(const Tile &t, char *block);
void decompress(const char *block, Tile &t);
float compress_mode0(const Tile &t, char *block);
void decompress_mode0(const char *block, Tile &t);
float compress_mode1(const Tile &t, char *block);
void decompress_mode1(const char *block, Tile &t);
float compress_mode2(const Tile &t, char *block);
void decompress_mode2(const char *block, Tile &t);
float compress_mode3(const Tile &t, char *block);
void decompress_mode3(const char *block, Tile &t);
float compress_mode4(const Tile &t, char *block);
void decompress_mode4(const char *block, Tile &t);
float compress_mode5(const Tile &t, char *block);
void decompress_mode5(const char *block, Tile &t);
float compress_mode6(const Tile &t, char *block);
void decompress_mode6(const char *block, Tile &t);
float compress_mode7(const Tile &t, char *block);
void decompress_mode7(const char *block, Tile &t);
inline int getmode(Bits &in)
{
int mode = 0;
if (in.read(1)) mode = 0;
else if (in.read(1)) mode = 1;
else if (in.read(1)) mode = 2;
else if (in.read(1)) mode = 3;
else if (in.read(1)) mode = 4;
else if (in.read(1)) mode = 5;
else if (in.read(1)) mode = 6;
else if (in.read(1)) mode = 7;
else mode = 8; // reserved
return mode;
}
inline int getmode(const char *block)
{
int bits = block[0], mode = 0;
if (bits & 1) mode = 0;
else if ((bits&3) == 2) mode = 1;
else if ((bits&7) == 4) mode = 2;
else if ((bits & 0xF) == 8) mode = 3;
else if ((bits & 0x1F) == 16) mode = 4;
else if ((bits & 0x3F) == 32) mode = 5;
else if ((bits & 0x7F) == 64) mode = 6;
else if ((bits & 0xFF) == 128) mode = 7;
else mode = 8; // reserved
return mode;
}
}
#endif

1066
3rdparty/nvtt/bc7/avpcl_mode0.cpp vendored Normal file

File diff suppressed because it is too large Load diff

1047
3rdparty/nvtt/bc7/avpcl_mode1.cpp vendored Normal file

File diff suppressed because it is too large Load diff

1004
3rdparty/nvtt/bc7/avpcl_mode2.cpp vendored Normal file

File diff suppressed because it is too large Load diff

1059
3rdparty/nvtt/bc7/avpcl_mode3.cpp vendored Normal file

File diff suppressed because it is too large Load diff

1214
3rdparty/nvtt/bc7/avpcl_mode4.cpp vendored Normal file

File diff suppressed because it is too large Load diff

1216
3rdparty/nvtt/bc7/avpcl_mode5.cpp vendored Normal file

File diff suppressed because it is too large Load diff

1055
3rdparty/nvtt/bc7/avpcl_mode6.cpp vendored Normal file

File diff suppressed because it is too large Load diff

1094
3rdparty/nvtt/bc7/avpcl_mode7.cpp vendored Normal file

File diff suppressed because it is too large Load diff

389
3rdparty/nvtt/bc7/avpcl_utils.cpp vendored Normal file
View file

@ -0,0 +1,389 @@
/*
Copyright 2007 nVidia, Inc.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License.
*/
// Utility and common routines
#include "avpcl_utils.h"
#include "avpcl.h"
#include "nvmath/Vector.inl"
#include <math.h>
using namespace nv;
using namespace AVPCL;
static const int denom7_weights[] = {0, 9, 18, 27, 37, 46, 55, 64}; // divided by 64
static const int denom15_weights[] = {0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64}; // divided by 64
int Utils::lerp(int a, int b, int i, int bias, int denom)
{
#ifdef USE_ZOH_INTERP
nvAssert (denom == 3 || denom == 7 || denom == 15);
nvAssert (i >= 0 && i <= denom);
nvAssert (bias >= 0 && bias <= denom/2);
nvAssert (a >= 0 && b >= 0);
int round = 0;
#ifdef USE_ZOH_INTERP_ROUNDED
round = 32;
#endif
switch (denom)
{
case 3: denom *= 5; i *= 5; // fall through to case 15
case 15:return (a*denom15_weights[denom-i] + b*denom15_weights[i] + round) >> 6;
case 7: return (a*denom7_weights[denom-i] + b*denom7_weights[i] + round) >> 6;
default: nvUnreachable(); return 0;
}
#else
return (((a)*((denom)-i)+(b)*(i)+(bias))/(denom)); // simple exact interpolation
#endif
}
Vector4 Utils::lerp(Vector4::Arg a, Vector4::Arg b, int i, int bias, int denom)
{
#ifdef USE_ZOH_INTERP
nvAssert (denom == 3 || denom == 7 || denom == 15);
nvAssert (i >= 0 && i <= denom);
nvAssert (bias >= 0 && bias <= denom/2);
// nvAssert (a >= 0 && b >= 0);
// no need to bias these as this is an exact division
switch (denom)
{
case 3: denom *= 5; i *= 5; // fall through to case 15
case 15:return (a*float(denom15_weights[denom-i]) + b*float(denom15_weights[i])) / 64.0f;
case 7: return (a*float(denom7_weights[denom-i]) + b*float(denom7_weights[i])) / 64.0f;
default: nvUnreachable(); return Vector4(0);
}
#else
return (((a)*((denom)-i)+(b)*(i)+(bias))/(denom)); // simple exact interpolation
#endif
}
int Utils::unquantize(int q, int prec)
{
int unq;
nvAssert (prec > 3); // we only want to do one replicate
#ifdef USE_ZOH_QUANT
if (prec >= 8)
unq = q;
else if (q == 0)
unq = 0;
else if (q == ((1<<prec)-1))
unq = 255;
else
unq = (q * 256 + 128) >> prec;
#else
// avpcl unquantizer -- bit replicate
unq = (q << (8-prec)) | (q >> (2*prec-8));
#endif
return unq;
}
// quantize to the best value -- i.e., minimize unquantize error
int Utils::quantize(float value, int prec)
{
int q, unq;
nvAssert (prec > 3); // we only want to do one replicate
unq = (int)floor(value + 0.5f);
nvAssert (unq <= 255);
#ifdef USE_ZOH_QUANT
q = (prec >= 8) ? unq : (unq << prec) / 256;
#else
// avpcl quantizer -- scale properly for best possible bit-replicated result
q = (unq * ((1<<prec)-1) + 127)/255;
#endif
nvAssert (q >= 0 && q < (1 << prec));
return q;
}
float Utils::metric4(Vector4::Arg a, Vector4::Arg b)
{
Vector4 err = a - b;
// if nonuniform, select weights and weigh away
if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati)
{
float rwt, gwt, bwt;
if (AVPCL::flag_nonuniform)
{
rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
}
else /*if (AVPCL::flag_nonuniform_ati)*/
{
rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
}
// weigh the components
err.x *= rwt;
err.y *= gwt;
err.z *= bwt;
}
return lengthSquared(err);
}
// WORK -- implement rotatemode for the below -- that changes where the rwt, gwt, and bwt's go.
float Utils::metric3(Vector3::Arg a, Vector3::Arg b, int rotatemode)
{
Vector3 err = a - b;
// if nonuniform, select weights and weigh away
if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati)
{
float rwt, gwt, bwt;
if (AVPCL::flag_nonuniform)
{
rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
}
else if (AVPCL::flag_nonuniform_ati)
{
rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
}
// adjust weights based on rotatemode
switch(rotatemode)
{
case ROTATEMODE_RGBA_RGBA: break;
case ROTATEMODE_RGBA_AGBR: rwt = 1.0f; break;
case ROTATEMODE_RGBA_RABG: gwt = 1.0f; break;
case ROTATEMODE_RGBA_RGAB: bwt = 1.0f; break;
default: nvUnreachable();
}
// weigh the components
err.x *= rwt;
err.y *= gwt;
err.z *= bwt;
}
return lengthSquared(err);
}
float Utils::metric1(const float a, const float b, int rotatemode)
{
float err = a - b;
// if nonuniform, select weights and weigh away
if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati)
{
float rwt, gwt, bwt, awt;
if (AVPCL::flag_nonuniform)
{
rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
}
else if (AVPCL::flag_nonuniform_ati)
{
rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
}
// adjust weights based on rotatemode
switch(rotatemode)
{
case ROTATEMODE_RGBA_RGBA: awt = 1.0f; break;
case ROTATEMODE_RGBA_AGBR: awt = rwt; break;
case ROTATEMODE_RGBA_RABG: awt = gwt; break;
case ROTATEMODE_RGBA_RGAB: awt = bwt; break;
default: nvUnreachable();
}
// weigh the components
err *= awt;
}
return err * err;
}
float Utils::premult(float r, float a)
{
// note that the args are really integers stored in floats
int R = int(r), A = int(a);
nvAssert ((R==r) && (A==a));
return float((R*A + 127)/255);
}
static void premult4(Vector4& rgba)
{
rgba.x = Utils::premult(rgba.x, rgba.w);
rgba.y = Utils::premult(rgba.y, rgba.w);
rgba.z = Utils::premult(rgba.z, rgba.w);
}
static void premult3(Vector3& rgb, float a)
{
rgb.x = Utils::premult(rgb.x, a);
rgb.y = Utils::premult(rgb.y, a);
rgb.z = Utils::premult(rgb.z, a);
}
float Utils::metric4premult(Vector4::Arg a, Vector4::Arg b)
{
Vector4 pma = a, pmb = b;
premult4(pma);
premult4(pmb);
Vector4 err = pma - pmb;
// if nonuniform, select weights and weigh away
if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati)
{
float rwt, gwt, bwt;
if (AVPCL::flag_nonuniform)
{
rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
}
else /*if (AVPCL::flag_nonuniform_ati)*/
{
rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
}
// weigh the components
err.x *= rwt;
err.y *= gwt;
err.z *= bwt;
}
return lengthSquared(err);
}
float Utils::metric3premult_alphaout(Vector3::Arg rgb0, float a0, Vector3::Arg rgb1, float a1)
{
Vector3 pma = rgb0, pmb = rgb1;
premult3(pma, a0);
premult3(pmb, a1);
Vector3 err = pma - pmb;
// if nonuniform, select weights and weigh away
if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati)
{
float rwt, gwt, bwt;
if (AVPCL::flag_nonuniform)
{
rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
}
else /*if (AVPCL::flag_nonuniform_ati)*/
{
rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
}
// weigh the components
err.x *= rwt;
err.y *= gwt;
err.z *= bwt;
}
return lengthSquared(err);
}
float Utils::metric3premult_alphain(Vector3::Arg rgb0, Vector3::Arg rgb1, int rotatemode)
{
Vector3 pma = rgb0, pmb = rgb1;
switch(rotatemode)
{
case ROTATEMODE_RGBA_RGBA:
// this function isn't supposed to be called for this rotatemode
nvUnreachable();
break;
case ROTATEMODE_RGBA_AGBR:
pma.y = premult(pma.y, pma.x);
pma.z = premult(pma.z, pma.x);
pmb.y = premult(pmb.y, pmb.x);
pmb.z = premult(pmb.z, pmb.x);
break;
case ROTATEMODE_RGBA_RABG:
pma.x = premult(pma.x, pma.y);
pma.z = premult(pma.z, pma.y);
pmb.x = premult(pmb.x, pmb.y);
pmb.z = premult(pmb.z, pmb.y);
break;
case ROTATEMODE_RGBA_RGAB:
pma.x = premult(pma.x, pma.z);
pma.y = premult(pma.y, pma.z);
pmb.x = premult(pmb.x, pmb.z);
pmb.y = premult(pmb.y, pmb.z);
break;
default: nvUnreachable();
}
Vector3 err = pma - pmb;
// if nonuniform, select weights and weigh away
if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati)
{
float rwt, gwt, bwt;
if (AVPCL::flag_nonuniform)
{
rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
}
else /*if (AVPCL::flag_nonuniform_ati)*/
{
rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
}
// weigh the components
err.x *= rwt;
err.y *= gwt;
err.z *= bwt;
}
return lengthSquared(err);
}
float Utils::metric1premult(float rgb0, float a0, float rgb1, float a1, int rotatemode)
{
float err = premult(rgb0, a0) - premult(rgb1, a1);
// if nonuniform, select weights and weigh away
if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati)
{
float rwt, gwt, bwt, awt;
if (AVPCL::flag_nonuniform)
{
rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
}
else if (AVPCL::flag_nonuniform_ati)
{
rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
}
// adjust weights based on rotatemode
switch(rotatemode)
{
case ROTATEMODE_RGBA_RGBA: awt = 1.0f; break;
case ROTATEMODE_RGBA_AGBR: awt = rwt; break;
case ROTATEMODE_RGBA_RABG: awt = gwt; break;
case ROTATEMODE_RGBA_RGAB: awt = bwt; break;
default: nvUnreachable();
}
// weigh the components
err *= awt;
}
return err * err;
}

61
3rdparty/nvtt/bc7/avpcl_utils.h vendored Normal file
View file

@ -0,0 +1,61 @@
/*
Copyright 2007 nVidia, Inc.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License.
*/
// utility class holding common routines
#ifndef _AVPCL_UTILS_H
#define _AVPCL_UTILS_H
#include "nvmath/Vector.h"
namespace AVPCL {
inline int SIGN_EXTEND(int x, int nb) { return ((((x)&(1<<((nb)-1)))?((~0)<<(nb)):0)|(x)); }
static const int INDEXMODE_BITS = 1; // 2 different index modes
static const int NINDEXMODES = (1<<(INDEXMODE_BITS));
static const int INDEXMODE_ALPHA_IS_3BITS = 0;
static const int INDEXMODE_ALPHA_IS_2BITS = 1;
static const int ROTATEMODE_BITS = 2; // 4 different rotate modes
static const int NROTATEMODES = (1<<(ROTATEMODE_BITS));
static const int ROTATEMODE_RGBA_RGBA = 0;
static const int ROTATEMODE_RGBA_AGBR = 1;
static const int ROTATEMODE_RGBA_RABG = 2;
static const int ROTATEMODE_RGBA_RGAB = 3;
class Utils
{
public:
// error metrics
static float metric4(nv::Vector4::Arg a, nv::Vector4::Arg b);
static float metric3(nv::Vector3::Arg a, nv::Vector3::Arg b, int rotatemode);
static float metric1(float a, float b, int rotatemode);
static float metric4premult(nv::Vector4::Arg rgba0, nv::Vector4::Arg rgba1);
static float metric3premult_alphaout(nv::Vector3::Arg rgb0, float a0, nv::Vector3::Arg rgb1, float a1);
static float metric3premult_alphain(nv::Vector3::Arg rgb0, nv::Vector3::Arg rgb1, int rotatemode);
static float metric1premult(float rgb0, float a0, float rgb1, float a1, int rotatemode);
static float premult(float r, float a);
// quantization and unquantization
static int unquantize(int q, int prec);
static int quantize(float value, int prec);
// lerping
static int lerp(int a, int b, int i, int bias, int denom);
static nv::Vector4 lerp(nv::Vector4::Arg a, nv::Vector4::Arg b, int i, int bias, int denom);
};
}
#endif

76
3rdparty/nvtt/bc7/bits.h vendored Normal file
View file

@ -0,0 +1,76 @@
/*
Copyright 2007 nVidia, Inc.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License.
*/
#ifndef _AVPCL_BITS_H
#define _AVPCL_BITS_H
// read/write a bitstream
#include "nvcore/Debug.h"
namespace AVPCL {
class Bits
{
public:
Bits(char *data, int maxdatabits) { nvAssert (data && maxdatabits > 0); bptr = bend = 0; bits = data; maxbits = maxdatabits; readonly = 0;}
Bits(const char *data, int availdatabits) { nvAssert (data && availdatabits > 0); bptr = 0; bend = availdatabits; cbits = data; maxbits = availdatabits; readonly = 1;}
void write(int value, int nbits) {
nvAssert (nbits >= 0 && nbits < 32);
nvAssert (sizeof(int)>= 4);
for (int i=0; i<nbits; ++i)
writeone(value>>i);
}
int read(int nbits) {
nvAssert (nbits >= 0 && nbits < 32);
nvAssert (sizeof(int)>= 4);
int out = 0;
for (int i=0; i<nbits; ++i)
out |= readone() << i;
return out;
}
int getptr() { return bptr; }
void setptr(int ptr) { nvAssert (ptr >= 0 && ptr < maxbits); bptr = ptr; }
int getsize() { return bend; }
private:
int bptr; // next bit to read
int bend; // last written bit + 1
char *bits; // ptr to user bit stream
const char *cbits; // ptr to const user bit stream
int maxbits; // max size of user bit stream
char readonly; // 1 if this is a read-only stream
int readone() {
nvAssert (bptr < bend);
if (bptr >= bend) return 0;
int bit = (readonly ? cbits[bptr>>3] : bits[bptr>>3]) & (1 << (bptr & 7));
++bptr;
return bit != 0;
}
void writeone(int bit) {
nvAssert (!readonly); // "Writing a read-only bit stream"
nvAssert (bptr < maxbits);
if (bptr >= maxbits) return;
if (bit&1)
bits[bptr>>3] |= 1 << (bptr & 7);
else
bits[bptr>>3] &= ~(1 << (bptr & 7));
if (bptr++ >= bend) bend = bptr;
}
};
}
#endif

81
3rdparty/nvtt/bc7/endpts.h vendored Normal file
View file

@ -0,0 +1,81 @@
/*
Copyright 2007 nVidia, Inc.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License.
*/
#ifndef _AVPCL_ENDPTS_H
#define _AVPCL_ENDPTS_H
// endpoint definitions and routines to search through endpoint space
#include "nvmath/Vector.h"
namespace AVPCL {
static const int NCHANNELS_RGB = 3;
static const int NCHANNELS_RGBA = 4;
static const int CHANNEL_R = 0;
static const int CHANNEL_G = 1;
static const int CHANNEL_B = 2;
static const int CHANNEL_A = 3;
struct FltEndpts
{
nv::Vector4 A;
nv::Vector4 B;
};
struct IntEndptsRGB
{
int A[NCHANNELS_RGB];
int B[NCHANNELS_RGB];
};
struct IntEndptsRGB_1
{
int A[NCHANNELS_RGB];
int B[NCHANNELS_RGB];
int lsb; // shared lsb for A and B
};
struct IntEndptsRGB_2
{
int A[NCHANNELS_RGB];
int B[NCHANNELS_RGB];
int a_lsb; // lsb for A
int b_lsb; // lsb for B
};
struct IntEndptsRGBA
{
int A[NCHANNELS_RGBA];
int B[NCHANNELS_RGBA];
};
struct IntEndptsRGBA_2
{
int A[NCHANNELS_RGBA];
int B[NCHANNELS_RGBA];
int a_lsb; // lsb for A
int b_lsb; // lsb for B
};
struct IntEndptsRGBA_2a
{
int A[NCHANNELS_RGBA];
int B[NCHANNELS_RGBA];
int a_lsb; // lsb for RGB channels of A
int b_lsb; // lsb for RGB channels of A
};
}
#endif

132
3rdparty/nvtt/bc7/shapes_three.h vendored Normal file
View file

@ -0,0 +1,132 @@
/*
Copyright 2007 nVidia, Inc.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License.
*/
#ifndef _AVPCL_SHAPES_THREE_H
#define _AVPCL_SHAPES_THREE_H
// shapes for 3 regions
#define NREGIONS 3
#define NSHAPES 64
#define SHAPEBITS 6
static int shapes[NSHAPES*16] =
{
0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 2, 2, 2,
0, 0, 1, 1, 0, 0, 1, 1, 2, 0, 0, 1, 0, 0, 2, 2,
0, 2, 2, 1, 2, 2, 1, 1, 2, 2, 1, 1, 0, 0, 1, 1,
2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 1, 1, 0, 1, 1, 1,
0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 2, 2, 0, 0, 1, 1,
0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 2, 2, 0, 0, 1, 1,
1, 1, 2, 2, 0, 0, 2, 2, 1, 1, 1, 1, 2, 2, 1, 1,
1, 1, 2, 2, 0, 0, 2, 2, 1, 1, 1, 1, 2, 2, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2,
0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 2,
1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 0, 0, 1, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 1, 2,
0, 1, 1, 2, 0, 1, 2, 2, 0, 0, 1, 1, 0, 0, 1, 1,
0, 1, 1, 2, 0, 1, 2, 2, 0, 1, 1, 2, 2, 0, 0, 1,
0, 1, 1, 2, 0, 1, 2, 2, 1, 1, 2, 2, 2, 2, 0, 0,
0, 1, 1, 2, 0, 1, 2, 2, 1, 2, 2, 2, 2, 2, 2, 0,
0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 2, 2,
0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 0, 0, 2, 2,
0, 1, 1, 2, 2, 0, 0, 1, 1, 1, 2, 2, 0, 0, 2, 2,
1, 1, 2, 2, 2, 2, 0, 0, 1, 1, 2, 2, 1, 1, 1, 1,
0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0,
0, 2, 2, 2, 2, 2, 2, 1, 0, 1, 2, 2, 2, 2, 1, 0,
0, 2, 2, 2, 2, 2, 2, 1, 0, 1, 2, 2, 2, 2, 1, 0,
0, 1, 2, 2, 0, 0, 1, 2, 0, 1, 1, 0, 0, 0, 0, 0,
0, 1, 2, 2, 0, 0, 1, 2, 1, 2, 2, 1, 0, 1, 1, 0,
0, 0, 1, 1, 1, 1, 2, 2, 1, 2, 2, 1, 1, 2, 2, 1,
0, 0, 0, 0, 2, 2, 2, 2, 0, 1, 1, 0, 1, 2, 2, 1,
0, 0, 2, 2, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0,
1, 1, 0, 2, 0, 1, 1, 0, 0, 1, 2, 2, 2, 0, 0, 0,
1, 1, 0, 2, 2, 0, 0, 2, 0, 1, 2, 2, 2, 2, 1, 1,
0, 0, 2, 2, 2, 2, 2, 2, 0, 0, 1, 1, 2, 2, 2, 1,
0, 0, 0, 0, 0, 2, 2, 2, 0, 0, 1, 1, 0, 1, 2, 0,
0, 0, 0, 2, 0, 0, 2, 2, 0, 0, 1, 2, 0, 1, 2, 0,
1, 1, 2, 2, 0, 0, 1, 2, 0, 0, 2, 2, 0, 1, 2, 0,
1, 2, 2, 2, 0, 0, 1, 1, 0, 2, 2, 2, 0, 1, 2, 0,
0, 0, 0, 0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 0, 1, 1,
1, 1, 1, 1, 1, 2, 0, 1, 2, 0, 1, 2, 2, 2, 0, 0,
2, 2, 2, 2, 2, 0, 1, 2, 1, 2, 0, 1, 1, 1, 2, 2,
0, 0, 0, 0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 0, 1, 1,
0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 2, 2,
1, 1, 2, 2, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 2, 2,
2, 2, 0, 0, 2, 2, 2, 2, 2, 1, 2, 1, 0, 0, 2, 2,
0, 0, 1, 1, 2, 2, 2, 2, 2, 1, 2, 1, 1, 1, 2, 2,
0, 0, 2, 2, 0, 2, 2, 0, 0, 1, 0, 1, 0, 0, 0, 0,
0, 0, 1, 1, 1, 2, 2, 1, 2, 2, 2, 2, 2, 1, 2, 1,
0, 0, 2, 2, 0, 2, 2, 0, 2, 2, 2, 2, 2, 1, 2, 1,
0, 0, 1, 1, 1, 2, 2, 1, 0, 1, 0, 1, 2, 1, 2, 1,
0, 1, 0, 1, 0, 2, 2, 2, 0, 0, 0, 2, 0, 0, 0, 0,
0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2,
0, 1, 0, 1, 0, 2, 2, 2, 0, 0, 0, 2, 2, 1, 1, 2,
2, 2, 2, 2, 0, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2,
0, 2, 2, 2, 0, 0, 0, 2, 0, 1, 1, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 1, 1, 2, 0, 1, 1, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 1, 1, 2, 0, 1, 1, 0, 2, 1, 1, 2,
0, 2, 2, 2, 0, 0, 0, 2, 2, 2, 2, 2, 2, 1, 1, 2,
0, 1, 1, 0, 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 0, 0,
0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 2, 2, 0, 0, 0, 0,
2, 2, 2, 2, 0, 0, 1, 1, 1, 1, 2, 2, 0, 0, 0, 0,
2, 2, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 2, 1, 1, 2,
0, 0, 0, 2, 0, 2, 2, 2, 0, 1, 0, 1, 0, 1, 1, 1,
0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 0, 1, 1,
0, 0, 0, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 1,
0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0,
};
#define REGION(x,y,si) shapes[((si)&3)*4+((si)>>2)*64+(x)+(y)*16]
static int shapeindex_to_compressed_indices[NSHAPES*3] =
{
0, 3,15, 0, 3, 8, 0,15, 8, 0,15, 3,
0, 8,15, 0, 3,15, 0,15, 3, 0,15, 8,
0, 8,15, 0, 8,15, 0, 6,15, 0, 6,15,
0, 6,15, 0, 5,15, 0, 3,15, 0, 3, 8,
0, 3,15, 0, 3, 8, 0, 8,15, 0,15, 3,
0, 3,15, 0, 3, 8, 0, 6,15, 0,10, 8,
0, 5, 3, 0, 8,15, 0, 8, 6, 0, 6,10,
0, 8,15, 0, 5,15, 0,15,10, 0,15, 8,
0, 8,15, 0,15, 3, 0, 3,15, 0, 5,10,
0, 6,10, 0,10, 8, 0, 8, 9, 0,15,10,
0,15, 6, 0, 3,15, 0,15, 8, 0, 5,15,
0,15, 3, 0,15, 6, 0,15, 6, 0,15, 8,
0, 3,15, 0,15, 3, 0, 5,15, 0, 5,15,
0, 5,15, 0, 8,15, 0, 5,15, 0,10,15,
0, 5,15, 0,10,15, 0, 8,15, 0,13,15,
0,15, 3, 0,12,15, 0, 3,15, 0, 3, 8
};
#define SHAPEINDEX_TO_COMPRESSED_INDICES(si,region) shapeindex_to_compressed_indices[(si)*3+(region)]
#endif

133
3rdparty/nvtt/bc7/shapes_two.h vendored Normal file
View file

@ -0,0 +1,133 @@
/*
Copyright 2007 nVidia, Inc.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License.
*/
#ifndef _AVPCL_SHAPES_TWO_H
#define _AVPCL_SHAPES_TWO_H
// shapes for two regions
#define NREGIONS 2
#define NSHAPES 64
#define SHAPEBITS 6
static int shapes[NSHAPES*16] =
{
0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1,
0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1,
0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1,
0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1,
0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0,
0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1,
0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1,
0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0,
0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1,
0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1,
1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1,
1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1,
1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0,
0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1,
0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1,
0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1,
0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0,
0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0,
0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0,
0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1,
0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1,
1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1,
1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0,
0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1,
0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1,
0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0,
0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0,
0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1,
1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0,
0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0,
1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1,
0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1,
0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1,
1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1,
1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0,
0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0,
1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0,
1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0,
0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0,
0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0,
0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0,
0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1,
1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1,
1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0,
0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0,
0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1,
1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0,
1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0,
1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1,
0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0,
1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0,
0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1,
0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1,
};
#define REGION(x,y,si) shapes[((si)&3)*4+((si)>>2)*64+(x)+(y)*16]
static int shapeindex_to_compressed_indices[NSHAPES*2] =
{
0,15, 0,15, 0,15, 0,15,
0,15, 0,15, 0,15, 0,15,
0,15, 0,15, 0,15, 0,15,
0,15, 0,15, 0,15, 0,15,
0,15, 0, 2, 0, 8, 0, 2,
0, 2, 0, 8, 0, 8, 0,15,
0, 2, 0, 8, 0, 2, 0, 2,
0, 8, 0, 8, 0, 2, 0, 2,
0,15, 0,15, 0, 6, 0, 8,
0, 2, 0, 8, 0,15, 0,15,
0, 2, 0, 8, 0, 2, 0, 2,
0, 2, 0,15, 0,15, 0, 6,
0, 6, 0, 2, 0, 6, 0, 8,
0,15, 0,15, 0, 2, 0, 2,
0,15, 0,15, 0,15, 0,15,
0,15, 0, 2, 0, 2, 0,15
};
#define SHAPEINDEX_TO_COMPRESSED_INDICES(si,region) shapeindex_to_compressed_indices[(si)*2+(region)]
#endif

41
3rdparty/nvtt/bc7/tile.h vendored Normal file
View file

@ -0,0 +1,41 @@
/*
Copyright 2007 nVidia, Inc.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License.
*/
#ifndef _AVPCL_TILE_H
#define _AVPCL_TILE_H
#include "nvmath/Vector.h"
#include <math.h>
#include "avpcl_utils.h"
namespace AVPCL {
// extract a tile of pixels from an array
class Tile
{
public:
static const int TILE_H = 4;
static const int TILE_W = 4;
static const int TILE_TOTAL = TILE_H * TILE_W;
nv::Vector4 data[TILE_H][TILE_W];
float importance_map[TILE_H][TILE_W];
int size_x, size_y; // actual size of tile
Tile() {};
~Tile(){};
Tile(int xs, int ys) {size_x = xs; size_y = ys;}
};
}
#endif

437
3rdparty/nvtt/nvcore/Array.inl vendored Normal file
View file

@ -0,0 +1,437 @@
// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
#ifndef NV_CORE_ARRAY_INL
#define NV_CORE_ARRAY_INL
#include "array.h"
#include "stream.h"
#include "utils.h" // swap
#include <string.h> // memmove
#include <new> // for placement new
namespace nv
{
template <typename T>
NV_FORCEINLINE T & Array<T>::append()
{
uint old_size = m_size;
uint new_size = m_size + 1;
setArraySize(new_size);
construct_range(m_buffer, new_size, old_size);
return m_buffer[old_size]; // Return reference to last element.
}
// Push an element at the end of the vector.
template <typename T>
NV_FORCEINLINE void Array<T>::push_back( const T & val )
{
#if 1
nvDebugCheck(&val < m_buffer || &val >= m_buffer+m_size);
uint old_size = m_size;
uint new_size = m_size + 1;
setArraySize(new_size);
construct_range(m_buffer, new_size, old_size, val);
#else
uint new_size = m_size + 1;
if (new_size > m_capacity)
{
// @@ Is there any way to avoid this copy?
// @@ Can we create a copy without side effects? Ie. without calls to constructor/destructor. Use alloca + memcpy?
// @@ Assert instead of copy?
const T copy(val); // create a copy in case value is inside of this array.
setArraySize(new_size);
new (m_buffer+new_size-1) T(copy);
}
else
{
m_size = new_size;
new(m_buffer+new_size-1) T(val);
}
#endif // 0/1
}
template <typename T>
NV_FORCEINLINE void Array<T>::pushBack( const T & val )
{
push_back(val);
}
template <typename T>
NV_FORCEINLINE Array<T> & Array<T>::append( const T & val )
{
push_back(val);
return *this;
}
// Qt like push operator.
template <typename T>
NV_FORCEINLINE Array<T> & Array<T>::operator<< ( T & t )
{
push_back(t);
return *this;
}
// Pop the element at the end of the vector.
template <typename T>
NV_FORCEINLINE void Array<T>::pop_back()
{
nvDebugCheck( m_size > 0 );
resize( m_size - 1 );
}
template <typename T>
NV_FORCEINLINE void Array<T>::popBack(uint count)
{
nvDebugCheck(m_size >= count);
resize(m_size - count);
}
template <typename T>
NV_FORCEINLINE void Array<T>::popFront(uint count)
{
nvDebugCheck(m_size >= count);
//resize(m_size - count);
if (m_size == count) {
clear();
}
else {
destroy_range(m_buffer, 0, count);
memmove(m_buffer, m_buffer + count, sizeof(T) * (m_size - count));
m_size -= count;
}
}
// Get back element.
template <typename T>
NV_FORCEINLINE const T & Array<T>::back() const
{
nvDebugCheck( m_size > 0 );
return m_buffer[m_size-1];
}
// Get back element.
template <typename T>
NV_FORCEINLINE T & Array<T>::back()
{
nvDebugCheck( m_size > 0 );
return m_buffer[m_size-1];
}
// Get front element.
template <typename T>
NV_FORCEINLINE const T & Array<T>::front() const
{
nvDebugCheck( m_size > 0 );
return m_buffer[0];
}
// Get front element.
template <typename T>
NV_FORCEINLINE T & Array<T>::front()
{
nvDebugCheck( m_size > 0 );
return m_buffer[0];
}
// Check if the given element is contained in the array.
template <typename T>
NV_FORCEINLINE bool Array<T>::contains(const T & e) const
{
return find(e, NULL);
}
// Return true if element found.
template <typename T>
NV_FORCEINLINE bool Array<T>::find(const T & element, uint * indexPtr) const
{
return find(element, 0, m_size, indexPtr);
}
// Return true if element found within the given range.
template <typename T>
NV_FORCEINLINE bool Array<T>::find(const T & element, uint begin, uint end, uint * indexPtr) const
{
return ::nv::find(element, m_buffer, begin, end, indexPtr);
}
// Remove the element at the given index. This is an expensive operation!
template <typename T>
void Array<T>::removeAt(uint index)
{
nvDebugCheck(index >= 0 && index < m_size);
if (m_size == 1) {
clear();
}
else {
m_buffer[index].~T();
memmove(m_buffer+index, m_buffer+index+1, sizeof(T) * (m_size - 1 - index));
m_size--;
}
}
// Remove the first instance of the given element.
template <typename T>
bool Array<T>::remove(const T & element)
{
uint index;
if (find(element, &index)) {
removeAt(index);
return true;
}
return false;
}
// Insert the given element at the given index shifting all the elements up.
template <typename T>
void Array<T>::insertAt(uint index, const T & val/*=T()*/)
{
nvDebugCheck( index >= 0 && index <= m_size );
setArraySize(m_size + 1);
if (index < m_size - 1) {
memmove(m_buffer+index+1, m_buffer+index, sizeof(T) * (m_size - 1 - index));
}
// Copy-construct into the newly opened slot.
new(m_buffer+index) T(val);
}
// Append the given data to our vector.
template <typename T>
NV_FORCEINLINE void Array<T>::append(const Array<T> & other)
{
append(other.m_buffer, other.m_size);
}
// Append the given data to our vector.
template <typename T>
void Array<T>::append(const T other[], uint count)
{
if (count > 0) {
const uint old_size = m_size;
setArraySize(m_size + count);
for (uint i = 0; i < count; i++ ) {
new(m_buffer + old_size + i) T(other[i]);
}
}
}
// Remove the given element by replacing it with the last one.
template <typename T>
void Array<T>::replaceWithLast(uint index)
{
nvDebugCheck( index < m_size );
nv::swap(m_buffer[index], back()); // @@ Is this OK when index == size-1?
(m_buffer+m_size-1)->~T();
m_size--;
}
// Resize the vector preserving existing elements.
template <typename T>
void Array<T>::resize(uint new_size)
{
uint old_size = m_size;
// Destruct old elements (if we're shrinking).
destroy_range(m_buffer, new_size, old_size);
setArraySize(new_size);
// Call default constructors
construct_range(m_buffer, new_size, old_size);
}
// Resize the vector preserving existing elements and initializing the
// new ones with the given value.
template <typename T>
void Array<T>::resize(uint new_size, const T & elem)
{
nvDebugCheck(&elem < m_buffer || &elem > m_buffer+m_size);
uint old_size = m_size;
// Destruct old elements (if we're shrinking).
destroy_range(m_buffer, new_size, old_size);
setArraySize(new_size);
// Call copy constructors
construct_range(m_buffer, new_size, old_size, elem);
}
// Fill array with the given value.
template <typename T>
void Array<T>::fill(const T & elem)
{
fill(m_buffer, m_size, elem);
}
// Clear the buffer.
template <typename T>
NV_FORCEINLINE void Array<T>::clear()
{
nvDebugCheck(isValidPtr(m_buffer));
// Destruct old elements
destroy_range(m_buffer, 0, m_size);
m_size = 0;
}
// Shrink the allocated vector.
template <typename T>
NV_FORCEINLINE void Array<T>::shrink()
{
if (m_size < m_capacity) {
setArrayCapacity(m_size);
}
}
// Preallocate space.
template <typename T>
NV_FORCEINLINE void Array<T>::reserve(uint desired_size)
{
if (desired_size > m_capacity) {
setArrayCapacity(desired_size);
}
}
// Copy elements to this array. Resizes it if needed.
template <typename T>
NV_FORCEINLINE void Array<T>::copy(const T * data, uint count)
{
#if 1 // More simple, but maybe not be as efficient?
destroy_range(m_buffer, 0, m_size);
setArraySize(count);
construct_range(m_buffer, count, 0, data);
#else
const uint old_size = m_size;
destroy_range(m_buffer, count, old_size);
setArraySize(count);
copy_range(m_buffer, data, old_size);
construct_range(m_buffer, count, old_size, data);
#endif
}
// Assignment operator.
template <typename T>
NV_FORCEINLINE Array<T> & Array<T>::operator=( const Array<T> & a )
{
copy(a.m_buffer, a.m_size);
return *this;
}
// Release ownership of allocated memory and returns pointer to it.
template <typename T>
T * Array<T>::release() {
T * tmp = m_buffer;
m_buffer = NULL;
m_capacity = 0;
m_size = 0;
return tmp;
}
// Change array size.
template <typename T>
inline void Array<T>::setArraySize(uint new_size) {
m_size = new_size;
if (new_size > m_capacity) {
uint new_buffer_size;
if (m_capacity == 0) {
// first allocation is exact
new_buffer_size = new_size;
}
else {
// following allocations grow array by 25%
new_buffer_size = new_size + (new_size >> 2);
}
setArrayCapacity( new_buffer_size );
}
}
// Change array capacity.
template <typename T>
inline void Array<T>::setArrayCapacity(uint new_capacity) {
nvDebugCheck(new_capacity >= m_size);
if (new_capacity == 0) {
// free the buffer.
if (m_buffer != NULL) {
free<T>(m_buffer);
m_buffer = NULL;
}
}
else {
// realloc the buffer
m_buffer = realloc<T>(m_buffer, new_capacity);
}
m_capacity = new_capacity;
}
// Array serialization.
template <typename Typ>
inline Stream & operator<< ( Stream & s, Array<Typ> & p )
{
if (s.isLoading()) {
uint size;
s << size;
p.resize( size );
}
else {
s << p.m_size;
}
for (uint i = 0; i < p.m_size; i++) {
s << p.m_buffer[i];
}
return s;
}
// Swap the members of the two given vectors.
template <typename Typ>
inline void swap(Array<Typ> & a, Array<Typ> & b)
{
nv::swap(a.m_buffer, b.m_buffer);
nv::swap(a.m_capacity, b.m_capacity);
nv::swap(a.m_size, b.m_size);
}
} // nv namespace
#endif // NV_CORE_ARRAY_INL

216
3rdparty/nvtt/nvcore/Debug.h vendored Normal file
View file

@ -0,0 +1,216 @@
// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
#ifndef NV_CORE_DEBUG_H
#define NV_CORE_DEBUG_H
#include "nvcore.h"
#include <stdarg.h> // va_list
// Make sure we are using our assert.
#undef assert
#define NV_ABORT_DEBUG 1
#define NV_ABORT_IGNORE 2
#define NV_ABORT_EXIT 3
#define nvNoAssert(exp) \
NV_MULTI_LINE_MACRO_BEGIN \
(void)sizeof(exp); \
NV_MULTI_LINE_MACRO_END
#if NV_NO_ASSERT
# define nvAssert(exp) nvNoAssert(exp)
# define nvCheck(exp) nvNoAssert(exp)
# define nvDebugAssert(exp) nvNoAssert(exp)
# define nvDebugCheck(exp) nvNoAssert(exp)
# define nvDebugBreak() nvNoAssert(0)
#else // NV_NO_ASSERT
# if NV_CC_MSVC
// @@ Does this work in msvc-6 and earlier?
# define nvDebugBreak() __debugbreak()
//# define nvDebugBreak() __asm { int 3 }
# elif NV_OS_ORBIS
# define nvDebugBreak() __debugbreak()
# elif NV_CC_GNUC
# define nvDebugBreak() __builtin_trap()
# else
# error "No nvDebugBreak()!"
# endif
/*
# elif NV_CC_GNUC || NV_CPU_PPC && NV_OS_DARWIN
// @@ Use __builtin_trap() on GCC
# define nvDebugBreak() __asm__ volatile ("trap")
# elif (NV_CC_GNUC || NV_CPU_X86 || NV_CPU_X86_64) && NV_OS_DARWIN
# define nvDebugBreak() __asm__ volatile ("int3")
# elif NV_CC_GNUC || NV_CPU_X86 || NV_CPU_X86_64
# define nvDebugBreak() __asm__ ( "int %0" : :"I"(3) )
# else
# include <signal.h>
# define nvDebugBreak() raise(SIGTRAP)
# endif
*/
#define nvDebugBreakOnce() \
NV_MULTI_LINE_MACRO_BEGIN \
static bool firstTime = true; \
if (firstTime) { firstTime = false; nvDebugBreak(); } \
NV_MULTI_LINE_MACRO_END
#define nvAssertMacro(exp) \
NV_MULTI_LINE_MACRO_BEGIN \
if (!(exp)) { \
if (nvAbort(#exp, __FILE__, __LINE__, __FUNC__) == NV_ABORT_DEBUG) { \
nvDebugBreak(); \
} \
} \
NV_MULTI_LINE_MACRO_END
// GCC, LLVM need "##" before the __VA_ARGS__, MSVC doesn't care
#define nvAssertMacroWithIgnoreAll(exp,...) \
NV_MULTI_LINE_MACRO_BEGIN \
static bool ignoreAll = false; \
if (!ignoreAll && !(exp)) { \
int result = nvAbort(#exp, __FILE__, __LINE__, __FUNC__, ##__VA_ARGS__); \
if (result == NV_ABORT_DEBUG) { \
nvDebugBreak(); \
} else if (result == NV_ABORT_IGNORE) { \
ignoreAll = true; \
} \
} \
NV_MULTI_LINE_MACRO_END
// Interesting assert macro from Insomniac:
// http://www.gdcvault.com/play/1015319/Developing-Imperfect-Software-How-to
// Used as follows:
// if (nvCheck(i < count)) {
// normal path
// } else {
// fixup code.
// }
// This style of macro could be combined with __builtin_expect to let the compiler know failure is unlikely.
#define nvCheckMacro(exp) \
(\
(exp) ? true : ( \
(nvAbort(#exp, __FILE__, __LINE__, __FUNC__) == NV_ABORT_DEBUG) ? (nvDebugBreak(), true) : ( false ) \
) \
)
#define nvAssert(exp) nvAssertMacro(exp)
#define nvCheck(exp) nvAssertMacro(exp)
#if defined(_DEBUG)
# define nvDebugAssert(exp) nvAssertMacro(exp)
# define nvDebugCheck(exp) nvAssertMacro(exp)
#else // _DEBUG
# define nvDebugAssert(exp) nvNoAssert(exp)
# define nvDebugCheck(exp) nvNoAssert(exp)
#endif // _DEBUG
#endif // NV_NO_ASSERT
// Use nvAssume for very simple expresions only: nvAssume(0), nvAssume(value == true), etc.
/*#if !defined(_DEBUG)
# if NV_CC_MSVC
# define nvAssume(exp) __assume(exp)
# else
# define nvAssume(exp) nvCheck(exp)
# endif
#else
# define nvAssume(exp) nvCheck(exp)
#endif*/
#if defined(_DEBUG)
# if NV_CC_MSVC
# define nvUnreachable() nvAssert(0 && "unreachable"); __assume(0)
# else
# define nvUnreachable() nvAssert(0 && "unreachable"); __builtin_unreachable()
# endif
#else
# if NV_CC_MSVC
# define nvUnreachable() __assume(0)
# else
# define nvUnreachable() __builtin_unreachable()
# endif
#endif
#define nvError(x) nvAbort(x, __FILE__, __LINE__, __FUNC__)
#define nvWarning(x) nvDebugPrint("*** Warning %s/%d: %s\n", __FILE__, __LINE__, (x))
#ifndef NV_DEBUG_PRINT
#define NV_DEBUG_PRINT 1 //defined(_DEBUG)
#endif
#if NV_DEBUG_PRINT
#define nvDebug(...) nvDebugPrint(__VA_ARGS__)
#else
#if NV_CC_MSVC
#define nvDebug(...) __noop(__VA_ARGS__)
#else
#define nvDebug(...) ((void)0) // Non-msvc platforms do not evaluate arguments?
#endif
#endif
NVCORE_API int nvAbort(const char *exp, const char *file, int line, const char * func = NULL, const char * msg = NULL, ...) __attribute__((format (printf, 5, 6)));
NVCORE_API void NV_CDECL nvDebugPrint( const char *msg, ... ) __attribute__((format (printf, 1, 2)));
namespace nv
{
inline bool isValidPtr(const void * ptr) {
#if NV_CPU_X86_64
if (ptr == NULL) return true;
if (reinterpret_cast<uint64>(ptr) < 0x10000ULL) return false;
if (reinterpret_cast<uint64>(ptr) >= 0x000007FFFFFEFFFFULL) return false;
#else
if (reinterpret_cast<uint32>(ptr) == 0xcccccccc) return false;
if (reinterpret_cast<uint32>(ptr) == 0xcdcdcdcd) return false;
if (reinterpret_cast<uint32>(ptr) == 0xdddddddd) return false;
if (reinterpret_cast<uint32>(ptr) == 0xffffffff) return false;
#endif
return true;
}
// Message handler interface.
struct MessageHandler {
virtual void log(const char * str, va_list arg) = 0;
virtual ~MessageHandler() {}
};
// Assert handler interface.
struct AssertHandler {
virtual int assertion(const char *exp, const char *file, int line, const char *func, const char *msg, va_list arg) = 0;
virtual ~AssertHandler() {}
};
namespace debug
{
NVCORE_API void dumpInfo();
NVCORE_API void dumpCallstack( MessageHandler *messageHandler, int callstackLevelsToSkip = 0 );
NVCORE_API void setMessageHandler( MessageHandler * messageHandler );
NVCORE_API void resetMessageHandler();
NVCORE_API void setAssertHandler( AssertHandler * assertHanlder );
NVCORE_API void resetAssertHandler();
NVCORE_API void enableSigHandler(bool interactive);
NVCORE_API void disableSigHandler();
NVCORE_API bool isDebuggerPresent();
NVCORE_API bool attachToDebugger();
NVCORE_API void terminate(int code);
}
} // nv namespace
#endif // NV_CORE_DEBUG_H

181
3rdparty/nvtt/nvcore/array.h vendored Normal file
View file

@ -0,0 +1,181 @@
// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
#ifndef NV_CORE_ARRAY_H
#define NV_CORE_ARRAY_H
/*
This array class requires the elements to be relocable; it uses memmove and realloc. Ideally I should be
using swap, but I honestly don't care. The only thing that you should be aware of is that internal pointers
are not supported.
Note also that push_back and resize does not support inserting arguments elements that are in the same
container. This is forbidden to prevent an extra copy.
*/
#include "memory.h"
#include "debug.h"
#include "foreach.h" // pseudoindex
namespace nv
{
class Stream;
/**
* Replacement for std::vector that is easier to debug and provides
* some nice foreach enumerators.
*/
template<typename T>
class NVCORE_CLASS Array {
public:
typedef uint size_type;
// Default constructor.
NV_FORCEINLINE Array() : m_buffer(NULL), m_capacity(0), m_size(0) {}
// Copy constructor.
NV_FORCEINLINE Array(const Array & a) : m_buffer(NULL), m_capacity(0), m_size(0) {
copy(a.m_buffer, a.m_size);
}
// Constructor that initializes the vector with the given elements.
NV_FORCEINLINE Array(const T * ptr, uint num) : m_buffer(NULL), m_capacity(0), m_size(0) {
copy(ptr, num);
}
// Allocate array.
NV_FORCEINLINE explicit Array(uint capacity) : m_buffer(NULL), m_capacity(0), m_size(0) {
setArrayCapacity(capacity);
}
// Destructor.
NV_FORCEINLINE ~Array() {
clear();
free<T>(m_buffer);
}
/// Const element access.
NV_FORCEINLINE const T & operator[]( uint index ) const
{
nvDebugCheck(index < m_size);
return m_buffer[index];
}
NV_FORCEINLINE const T & at( uint index ) const
{
nvDebugCheck(index < m_size);
return m_buffer[index];
}
/// Element access.
NV_FORCEINLINE T & operator[] ( uint index )
{
nvDebugCheck(index < m_size);
return m_buffer[index];
}
NV_FORCEINLINE T & at( uint index )
{
nvDebugCheck(index < m_size);
return m_buffer[index];
}
/// Get vector size.
NV_FORCEINLINE uint size() const { return m_size; }
/// Get vector size.
NV_FORCEINLINE uint count() const { return m_size; }
/// Get vector capacity.
NV_FORCEINLINE uint capacity() const { return m_capacity; }
/// Get const vector pointer.
NV_FORCEINLINE const T * buffer() const { return m_buffer; }
/// Get vector pointer.
NV_FORCEINLINE T * buffer() { return m_buffer; }
/// Provide begin/end pointers for C++11 range-based for loops.
NV_FORCEINLINE T * begin() { return m_buffer; }
NV_FORCEINLINE T * end() { return m_buffer + m_size; }
NV_FORCEINLINE const T * begin() const { return m_buffer; }
NV_FORCEINLINE const T * end() const { return m_buffer + m_size; }
/// Is vector empty.
NV_FORCEINLINE bool isEmpty() const { return m_size == 0; }
/// Is a null vector.
NV_FORCEINLINE bool isNull() const { return m_buffer == NULL; }
T & append();
void push_back( const T & val );
void pushBack( const T & val );
Array<T> & append( const T & val );
Array<T> & operator<< ( T & t );
void pop_back();
void popBack(uint count = 1);
void popFront(uint count = 1);
const T & back() const;
T & back();
const T & front() const;
T & front();
bool contains(const T & e) const;
bool find(const T & element, uint * indexPtr) const;
bool find(const T & element, uint begin, uint end, uint * indexPtr) const;
void removeAt(uint index);
bool remove(const T & element);
void insertAt(uint index, const T & val = T());
void append(const Array<T> & other);
void append(const T other[], uint count);
void replaceWithLast(uint index);
void resize(uint new_size);
void resize(uint new_size, const T & elem);
void fill(const T & elem);
void clear();
void shrink();
void reserve(uint desired_size);
void copy(const T * data, uint count);
Array<T> & operator=( const Array<T> & a );
T * release();
// Array enumerator.
typedef uint PseudoIndex;
NV_FORCEINLINE PseudoIndex start() const { return 0; }
NV_FORCEINLINE bool isDone(const PseudoIndex & i) const { nvDebugCheck(i <= this->m_size); return i == this->m_size; }
NV_FORCEINLINE void advance(PseudoIndex & i) const { nvDebugCheck(i <= this->m_size); i++; }
#if NV_CC_MSVC
NV_FORCEINLINE T & operator[]( const PseudoIndexWrapper & i ) {
return m_buffer[i(this)];
}
NV_FORCEINLINE const T & operator[]( const PseudoIndexWrapper & i ) const {
return m_buffer[i(this)];
}
#endif
// Friends.
template <typename Typ>
friend Stream & operator<< ( Stream & s, Array<Typ> & p );
template <typename Typ>
friend void swap(Array<Typ> & a, Array<Typ> & b);
protected:
void setArraySize(uint new_size);
void setArrayCapacity(uint new_capacity);
T * m_buffer;
uint m_capacity;
uint m_size;
};
} // nv namespace
#endif // NV_CORE_ARRAY_H

53
3rdparty/nvtt/nvcore/defsgnucdarwin.h vendored Normal file
View file

@ -0,0 +1,53 @@
#ifndef NV_CORE_H
#error "Do not include this file directly."
#endif
#include <stdint.h> // uint8_t, int8_t, ... uintptr_t
#include <stddef.h> // operator new, size_t, NULL
// Function linkage
#define DLL_IMPORT
#if __GNUC__ >= 4
# define DLL_EXPORT __attribute__((visibility("default")))
# define DLL_EXPORT_CLASS DLL_EXPORT
#else
# define DLL_EXPORT
# define DLL_EXPORT_CLASS
#endif
// Function calling modes
#if NV_CPU_X86
# define NV_CDECL __attribute__((cdecl))
# define NV_STDCALL __attribute__((stdcall))
#else
# define NV_CDECL
# define NV_STDCALL
#endif
#define NV_FASTCALL __attribute__((fastcall))
#define NV_FORCEINLINE __attribute__((always_inline)) inline
#define NV_DEPRECATED __attribute__((deprecated))
#define NV_THREAD_LOCAL //ACS: there's no "__thread" or equivalent on iOS/OSX
#if __GNUC__ > 2
#define NV_PURE __attribute__((pure))
#define NV_CONST __attribute__((const))
#else
#define NV_PURE
#define NV_CONST
#endif
#define NV_NOINLINE __attribute__((noinline))
// Define __FUNC__ properly.
#if __STDC_VERSION__ < 199901L
# if __GNUC__ >= 2
# define __FUNC__ __PRETTY_FUNCTION__ // __FUNCTION__
# else
# define __FUNC__ "<unknown>"
# endif
#else
# define __FUNC__ __PRETTY_FUNCTION__
#endif
#define restrict __restrict__

59
3rdparty/nvtt/nvcore/defsgnuclinux.h vendored Normal file
View file

@ -0,0 +1,59 @@
#ifndef NV_CORE_H
#error "Do not include this file directly."
#endif
#include <stdint.h> // uint8_t, int8_t, ... uintptr_t
#include <stddef.h> // operator new, size_t, NULL
// Function linkage
#define DLL_IMPORT
#if __GNUC__ >= 4
# define DLL_EXPORT __attribute__((visibility("default")))
# define DLL_EXPORT_CLASS DLL_EXPORT
#else
# define DLL_EXPORT
# define DLL_EXPORT_CLASS
#endif
// Function calling modes
#if NV_CPU_X86
# define NV_CDECL __attribute__((cdecl))
# define NV_STDCALL __attribute__((stdcall))
#else
# define NV_CDECL
# define NV_STDCALL
#endif
#define NV_FASTCALL __attribute__((fastcall))
//#if __GNUC__ > 3
// It seems that GCC does not assume always_inline implies inline. I think this depends on the GCC version :(
#define NV_FORCEINLINE inline __attribute__((always_inline))
//#else
// Some compilers complain that inline and always_inline are redundant.
//#define NV_FORCEINLINE __attribute__((always_inline))
//#endif
#define NV_DEPRECATED __attribute__((deprecated))
#define NV_THREAD_LOCAL __thread
#if __GNUC__ > 2
#define NV_PURE __attribute__((pure))
#define NV_CONST __attribute__((const))
#else
#define NV_PURE
#define NV_CONST
#endif
#define NV_NOINLINE __attribute__((noinline))
// Define __FUNC__ properly.
#if __STDC_VERSION__ < 199901L
# if __GNUC__ >= 2
# define __FUNC__ __PRETTY_FUNCTION__ // __FUNCTION__
# else
# define __FUNC__ "<unknown>"
# endif
#else
# define __FUNC__ __PRETTY_FUNCTION__
#endif
#define restrict __restrict__

65
3rdparty/nvtt/nvcore/defsgnucwin32.h vendored Normal file
View file

@ -0,0 +1,65 @@
#ifndef NV_CORE_H
#error "Do not include this file directly."
#endif
//#include <cstddef> // size_t, NULL
// Function linkage
#define DLL_IMPORT __declspec(dllimport)
#define DLL_EXPORT __declspec(dllexport)
#define DLL_EXPORT_CLASS DLL_EXPORT
// Function calling modes
#if NV_CPU_X86
# define NV_CDECL __attribute__((cdecl))
# define NV_STDCALL __attribute__((stdcall))
#else
# define NV_CDECL
# define NV_STDCALL
#endif
#define NV_FASTCALL __attribute__((fastcall))
#define NV_FORCEINLINE __attribute__((always_inline))
#define NV_DEPRECATED __attribute__((deprecated))
#if __GNUC__ > 2
#define NV_PURE __attribute__((pure))
#define NV_CONST __attribute__((const))
#else
#define NV_PURE
#define NV_CONST
#endif
#define NV_NOINLINE __attribute__((noinline))
// Define __FUNC__ properly.
#if __STDC_VERSION__ < 199901L
# if __GNUC__ >= 2
# define __FUNC__ __PRETTY_FUNCTION__ // __FUNCTION__
# else
# define __FUNC__ "<unknown>"
# endif
#else
# define __FUNC__ __PRETTY_FUNCTION__
#endif
#define restrict __restrict__
/*
// Type definitions
typedef unsigned char uint8;
typedef signed char int8;
typedef unsigned short uint16;
typedef signed short int16;
typedef unsigned int uint32;
typedef signed int int32;
typedef unsigned long long uint64;
typedef signed long long int64;
// Aliases
typedef uint32 uint;
*/

94
3rdparty/nvtt/nvcore/defsvcwin32.h vendored Normal file
View file

@ -0,0 +1,94 @@
// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
#ifndef NV_CORE_H
#error "Do not include this file directly."
#endif
// Function linkage
#define DLL_IMPORT __declspec(dllimport)
#define DLL_EXPORT __declspec(dllexport)
#define DLL_EXPORT_CLASS DLL_EXPORT
// Function calling modes
#define NV_CDECL __cdecl
#define NV_STDCALL __stdcall
#define NV_FASTCALL __fastcall
#define NV_DEPRECATED
#define NV_PURE
#define NV_CONST
// Set standard function names.
#if _MSC_VER < 1900
# define snprintf _snprintf
#endif
#if _MSC_VER < 1500
# define vsnprintf _vsnprintf
#endif
#if _MSC_VER < 1700
# define strtoll _strtoi64
# define strtoull _strtoui64
#endif
#define chdir _chdir
#define getcwd _getcwd
#if _MSC_VER < 1800 // Not sure what version introduced this.
#define va_copy(a, b) (a) = (b)
#endif
#if !defined restrict
#define restrict
#endif
// Ignore gcc attributes.
#define __attribute__(X)
#if !defined __FUNC__
#define __FUNC__ __FUNCTION__
#endif
#define NV_NOINLINE __declspec(noinline)
#define NV_FORCEINLINE __forceinline
#define NV_THREAD_LOCAL __declspec(thread)
/*
// Type definitions
typedef unsigned char uint8;
typedef signed char int8;
typedef unsigned short uint16;
typedef signed short int16;
typedef unsigned int uint32;
typedef signed int int32;
typedef unsigned __int64 uint64;
typedef signed __int64 int64;
// Aliases
typedef uint32 uint;
*/
// Unwanted VC++ warnings to disable.
/*
#pragma warning(disable : 4244) // conversion to float, possible loss of data
#pragma warning(disable : 4245) // conversion from 'enum ' to 'unsigned long', signed/unsigned mismatch
#pragma warning(disable : 4100) // unreferenced formal parameter
#pragma warning(disable : 4514) // unreferenced inline function has been removed
#pragma warning(disable : 4710) // inline function not expanded
#pragma warning(disable : 4127) // Conditional expression is constant
#pragma warning(disable : 4305) // truncation from 'const double' to 'float'
#pragma warning(disable : 4505) // unreferenced local function has been removed
#pragma warning(disable : 4702) // unreachable code in inline expanded function
#pragma warning(disable : 4711) // function selected for automatic inlining
#pragma warning(disable : 4725) // Pentium fdiv bug
#pragma warning(disable : 4786) // Identifier was truncated and cannot be debugged.
#pragma warning(disable : 4675) // resolved overload was found by argument-dependent lookup
*/
#pragma warning(1 : 4705) // Report unused local variables.
#pragma warning(1 : 4555) // Expression has no effect.

68
3rdparty/nvtt/nvcore/foreach.h vendored Normal file
View file

@ -0,0 +1,68 @@
// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
#pragma once
#ifndef NV_CORE_FOREACH_H
#define NV_CORE_FOREACH_H
/*
These foreach macros are very non-standard and somewhat confusing, but I like them.
*/
#include "nvcore.h"
#if NV_CC_GNUC // If typeof or decltype is available:
#if !NV_CC_CPP11
# define NV_DECLTYPE typeof // Using a non-standard extension over typeof that behaves as C++11 decltype
#else
# define NV_DECLTYPE decltype
#endif
/*
Ideally we would like to write this:
#define NV_FOREACH(i, container) \
for(NV_DECLTYPE(container)::PseudoIndex i((container).start()); !(container).isDone(i); (container).advance(i))
But gcc versions prior to 4.7 required an intermediate type. See:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=6709
*/
#define NV_FOREACH(i, container) \
typedef NV_DECLTYPE(container) NV_STRING_JOIN2(cont,__LINE__); \
for(NV_STRING_JOIN2(cont,__LINE__)::PseudoIndex i((container).start()); !(container).isDone(i); (container).advance(i))
#else // If typeof not available:
#include <new> // placement new
struct PseudoIndexWrapper {
template <typename T>
PseudoIndexWrapper(const T & container) {
nvStaticCheck(sizeof(typename T::PseudoIndex) <= sizeof(memory));
new (memory) typename T::PseudoIndex(container.start());
}
// PseudoIndex cannot have a dtor!
template <typename T> typename T::PseudoIndex & operator()(const T * /*container*/) {
return *reinterpret_cast<typename T::PseudoIndex *>(memory);
}
template <typename T> const typename T::PseudoIndex & operator()(const T * /*container*/) const {
return *reinterpret_cast<const typename T::PseudoIndex *>(memory);
}
uint8 memory[4]; // Increase the size if we have bigger enumerators.
};
#define NV_FOREACH(i, container) \
for(PseudoIndexWrapper i(container); !(container).isDone(i(&(container))); (container).advance(i(&(container))))
#endif
// Declare foreach keyword.
#if !defined NV_NO_USE_KEYWORDS
# define foreach NV_FOREACH
# define foreach_index NV_FOREACH
#endif
#endif // NV_CORE_FOREACH_H

83
3rdparty/nvtt/nvcore/hash.h vendored Normal file
View file

@ -0,0 +1,83 @@
// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
#pragma once
#ifndef NV_CORE_HASH_H
#define NV_CORE_HASH_H
#include "nvcore.h"
namespace nv
{
inline uint sdbmHash(const void * data_in, uint size, uint h = 5381)
{
const uint8 * data = (const uint8 *) data_in;
uint i = 0;
while (i < size) {
h = (h << 16) + (h << 6) - h + (uint) data[i++];
}
return h;
}
// Note that this hash does not handle NaN properly.
inline uint sdbmFloatHash(const float * f, uint count, uint h = 5381)
{
for (uint i = 0; i < count; i++) {
//nvDebugCheck(nv::isFinite(*f));
union { float f; uint32 i; } x = { f[i] };
if (x.i == 0x80000000) x.i = 0;
h = sdbmHash(&x, 4, h);
}
return h;
}
template <typename T>
inline uint hash(const T & t, uint h = 5381)
{
return sdbmHash(&t, sizeof(T), h);
}
template <>
inline uint hash(const float & f, uint h)
{
return sdbmFloatHash(&f, 1, h);
}
// Functors for hash table:
template <typename Key> struct Hash
{
uint operator()(const Key & k) const {
return hash(k);
}
};
template <typename Key> struct Equal
{
bool operator()(const Key & k0, const Key & k1) const {
return k0 == k1;
}
};
// @@ Move to Utils.h?
template <typename T1, typename T2>
struct Pair {
T1 first;
T2 second;
};
template <typename T1, typename T2>
bool operator==(const Pair<T1,T2> & p0, const Pair<T1,T2> & p1) {
return p0.first == p1.first && p0.second == p1.second;
}
template <typename T1, typename T2>
uint hash(const Pair<T1,T2> & p, uint h = 5381) {
return hash(p.second, hash(p.first));
}
} // nv namespace
#endif // NV_CORE_HASH_H

29
3rdparty/nvtt/nvcore/memory.h vendored Normal file
View file

@ -0,0 +1,29 @@
// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
#ifndef NV_CORE_MEMORY_H
#define NV_CORE_MEMORY_H
#include "nvcore.h"
namespace nv {
// C++ helpers.
template <typename T> NV_FORCEINLINE T * malloc(size_t count) {
return (T *)::malloc(sizeof(T) * count);
}
template <typename T> NV_FORCEINLINE T * realloc(T * ptr, size_t count) {
return (T *)::realloc(ptr, sizeof(T) * count);
}
template <typename T> NV_FORCEINLINE void free(const T * ptr) {
::free((void *)ptr);
}
template <typename T> NV_FORCEINLINE void zero(T & data) {
memset(&data, 0, sizeof(T));
}
} // nv namespace
#endif // NV_CORE_MEMORY_H

299
3rdparty/nvtt/nvcore/nvcore.h vendored Normal file
View file

@ -0,0 +1,299 @@
// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
#ifndef NV_CORE_H
#define NV_CORE_H
// Function linkage
#if NVCORE_SHARED
#ifdef NVCORE_EXPORTS
#define NVCORE_API DLL_EXPORT
#define NVCORE_CLASS DLL_EXPORT_CLASS
#else
#define NVCORE_API DLL_IMPORT
#define NVCORE_CLASS DLL_IMPORT
#endif
#else // NVCORE_SHARED
#define NVCORE_API
#define NVCORE_CLASS
#endif // NVCORE_SHARED
// Platform definitions
#include "posh.h"
// OS:
// NV_OS_WIN32
// NV_OS_WIN64
// NV_OS_MINGW
// NV_OS_CYGWIN
// NV_OS_LINUX
// NV_OS_UNIX
// NV_OS_DARWIN
// NV_OS_XBOX
// NV_OS_ORBIS
// NV_OS_IOS
#define NV_OS_STRING POSH_OS_STRING
#if defined POSH_OS_LINUX
# define NV_OS_LINUX 1
# define NV_OS_UNIX 1
#elif defined POSH_OS_ORBIS
# define NV_OS_ORBIS 1
#elif defined POSH_OS_FREEBSD
# define NV_OS_FREEBSD 1
# define NV_OS_UNIX 1
#elif defined POSH_OS_OPENBSD
# define NV_OS_OPENBSD 1
# define NV_OS_UNIX 1
#elif defined POSH_OS_CYGWIN32
# define NV_OS_CYGWIN 1
#elif defined POSH_OS_MINGW
# define NV_OS_MINGW 1
# define NV_OS_WIN32 1
#elif defined POSH_OS_OSX
# define NV_OS_DARWIN 1
# define NV_OS_UNIX 1
#elif defined POSH_OS_IOS
# define NV_OS_DARWIN 1 //ACS should we keep this on IOS?
# define NV_OS_UNIX 1
# define NV_OS_IOS 1
#elif defined POSH_OS_UNIX
# define NV_OS_UNIX 1
#elif defined POSH_OS_WIN64
# define NV_OS_WIN32 1
# define NV_OS_WIN64 1
#elif defined POSH_OS_WIN32
# define NV_OS_WIN32 1
#elif defined POSH_OS_XBOX
# define NV_OS_XBOX 1
#else
# error "Unsupported OS"
#endif
// Threading:
// some platforms don't implement __thread or similar for thread-local-storage
#if NV_OS_UNIX || NV_OS_ORBIS || NV_OS_IOS //ACStodoIOS darwin instead of ios?
# define NV_OS_USE_PTHREAD 1
# if NV_OS_DARWIN || NV_OS_IOS
# define NV_OS_HAS_TLS_QUALIFIER 0
# else
# define NV_OS_HAS_TLS_QUALIFIER 1
# endif
#else
# define NV_OS_USE_PTHREAD 0
# define NV_OS_HAS_TLS_QUALIFIER 1
#endif
// CPUs:
// NV_CPU_X86
// NV_CPU_X86_64
// NV_CPU_PPC
// NV_CPU_ARM
// NV_CPU_AARCH64
#define NV_CPU_STRING POSH_CPU_STRING
#if defined POSH_CPU_X86_64
//# define NV_CPU_X86 1
# define NV_CPU_X86_64 1
#elif defined POSH_CPU_X86
# define NV_CPU_X86 1
#elif defined POSH_CPU_PPC
# define NV_CPU_PPC 1
#elif defined POSH_CPU_STRONGARM
# define NV_CPU_ARM 1
#elif defined POSH_CPU_AARCH64
# define NV_CPU_AARCH64 1
#else
# error "Unsupported CPU"
#endif
// Compiler:
// NV_CC_GNUC
// NV_CC_MSVC
// NV_CC_CLANG
#if defined POSH_COMPILER_CLANG
# define NV_CC_CLANG 1
# define NV_CC_GNUC 1 // Clang is compatible with GCC.
# define NV_CC_STRING "clang"
#elif defined POSH_COMPILER_GCC
# define NV_CC_GNUC 1
# define NV_CC_STRING "gcc"
#elif defined POSH_COMPILER_MSVC
# define NV_CC_MSVC 1
# define NV_CC_STRING "msvc"
#else
# error "Unsupported compiler"
#endif
#if NV_CC_MSVC
#define NV_CC_CPP11 (__cplusplus > 199711L || _MSC_VER >= 1800) // Visual Studio 2013 has all the features we use, but doesn't advertise full C++11 support yet.
#else
// @@ IC: This works in CLANG, about GCC?
// @@ ES: Doesn't work in gcc. These 3 features are available in GCC >= 4.4.
#ifdef __clang__
#define NV_CC_CPP11 (__has_feature(cxx_deleted_functions) && __has_feature(cxx_rvalue_references) && __has_feature(cxx_static_assert))
#elif defined __GNUC__
#define NV_CC_CPP11 ( __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4))
#endif
#endif
// Endiannes:
#define NV_LITTLE_ENDIAN POSH_LITTLE_ENDIAN
#define NV_BIG_ENDIAN POSH_BIG_ENDIAN
#define NV_ENDIAN_STRING POSH_ENDIAN_STRING
// Define the right printf prefix for size_t arguments:
#if POSH_64BIT_POINTER
# define NV_SIZET_PRINTF_PREFIX POSH_I64_PRINTF_PREFIX
#else
# define NV_SIZET_PRINTF_PREFIX
#endif
// Type definitions:
typedef posh_u8_t uint8;
typedef posh_i8_t int8;
typedef posh_u16_t uint16;
typedef posh_i16_t int16;
typedef posh_u32_t uint32;
typedef posh_i32_t int32;
typedef posh_u64_t uint64;
typedef posh_i64_t int64;
// Aliases
typedef uint32 uint;
// Version string:
#define NV_VERSION_STRING \
NV_OS_STRING "/" NV_CC_STRING "/" NV_CPU_STRING"/" \
NV_ENDIAN_STRING"-endian - " __DATE__ "-" __TIME__
// Disable copy constructor and assignment operator.
#if NV_CC_CPP11
#define NV_FORBID_COPY(C) \
C( const C & ) = delete; \
C &operator=( const C & ) = delete
#else
#define NV_FORBID_COPY(C) \
private: \
C( const C & ); \
C &operator=( const C & )
#endif
// Disable dynamic allocation on the heap.
// See Prohibiting Heap-Based Objects in More Effective C++.
#define NV_FORBID_HEAPALLOC() \
private: \
void *operator new(size_t size); \
void *operator new[](size_t size)
//static void *operator new(size_t size); \
//static void *operator new[](size_t size);
// String concatenation macros.
#define NV_STRING_JOIN2(arg1, arg2) NV_DO_STRING_JOIN2(arg1, arg2)
#define NV_DO_STRING_JOIN2(arg1, arg2) arg1 ## arg2
#define NV_STRING_JOIN3(arg1, arg2, arg3) NV_DO_STRING_JOIN3(arg1, arg2, arg3)
#define NV_DO_STRING_JOIN3(arg1, arg2, arg3) arg1 ## arg2 ## arg3
#define NV_STRING2(x) #x
#define NV_STRING(x) NV_STRING2(x)
#if NV_CC_MSVC
#define NV_MULTI_LINE_MACRO_BEGIN do {
#define NV_MULTI_LINE_MACRO_END \
__pragma(warning(push)) \
__pragma(warning(disable:4127)) \
} while(false) \
__pragma(warning(pop))
#else
#define NV_MULTI_LINE_MACRO_BEGIN do {
#define NV_MULTI_LINE_MACRO_END } while(false)
#endif
#if NV_CC_CPP11
#define nvStaticCheck(x) static_assert((x), "Static assert "#x" failed")
#else
#define nvStaticCheck(x) typedef char NV_STRING_JOIN2(__static_assert_,__LINE__)[(x)]
#endif
#define NV_COMPILER_CHECK(x) nvStaticCheck(x) // I like this name best.
// Make sure type definitions are fine.
NV_COMPILER_CHECK(sizeof(int8) == 1);
NV_COMPILER_CHECK(sizeof(uint8) == 1);
NV_COMPILER_CHECK(sizeof(int16) == 2);
NV_COMPILER_CHECK(sizeof(uint16) == 2);
NV_COMPILER_CHECK(sizeof(int32) == 4);
NV_COMPILER_CHECK(sizeof(uint32) == 4);
NV_COMPILER_CHECK(sizeof(int32) == 4);
NV_COMPILER_CHECK(sizeof(uint32) == 4);
#define NV_ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0]))
#if 0 // Disabled in The Witness.
#if NV_CC_MSVC
#define NV_MESSAGE(x) message(__FILE__ "(" NV_STRING(__LINE__) ") : " x)
#else
#define NV_MESSAGE(x) message(x)
#endif
#else
#define NV_MESSAGE(x)
#endif
// Startup initialization macro.
#define NV_AT_STARTUP(some_code) \
namespace { \
static struct NV_STRING_JOIN2(AtStartup_, __LINE__) { \
NV_STRING_JOIN2(AtStartup_, __LINE__)() { some_code; } \
} \
NV_STRING_JOIN3(AtStartup_, __LINE__, Instance); \
}
// Indicate the compiler that the parameter is not used to suppress compier warnings.
#define NV_UNUSED(a) ((a)=(a))
// Null index. @@ Move this somewhere else... it's only used by nvmesh.
//const unsigned int NIL = unsigned int(~0);
//#define NIL uint(~0)
// Null pointer.
#ifndef NULL
#define NULL 0
#endif
// Platform includes
#if NV_CC_MSVC
# if NV_OS_WIN32
# include "DefsVcWin32.h"
# elif NV_OS_XBOX
# include "DefsVcXBox.h"
# else
# error "MSVC: Platform not supported"
# endif
#elif NV_CC_GNUC
# if NV_OS_LINUX
# include "DefsGnucLinux.h"
# elif NV_OS_DARWIN || NV_OS_FREEBSD || NV_OS_OPENBSD
# include "DefsGnucDarwin.h"
# elif NV_OS_MINGW
# include "DefsGnucWin32.h"
# elif NV_OS_CYGWIN
# error "GCC: Cygwin not supported"
# else
# error "GCC: Platform not supported"
# endif
#endif
#endif // NV_CORE_H

1030
3rdparty/nvtt/nvcore/posh.h vendored Normal file

File diff suppressed because it is too large Load diff

459
3rdparty/nvtt/nvcore/stdstream.h vendored Normal file
View file

@ -0,0 +1,459 @@
// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
#include "nvcore.h"
#include "stream.h"
#include "array.h"
#include <stdio.h> // fopen
#include <string.h> // memcpy
namespace nv
{
// Portable version of fopen.
inline FILE * fileOpen(const char * fileName, const char * mode)
{
nvCheck(fileName != NULL);
#if NV_CC_MSVC && _MSC_VER >= 1400
FILE * fp;
if (fopen_s(&fp, fileName, mode) == 0) {
return fp;
}
return NULL;
#else
return fopen(fileName, mode);
#endif
}
/// Base stdio stream.
class NVCORE_CLASS StdStream : public Stream
{
NV_FORBID_COPY(StdStream);
public:
/// Ctor.
StdStream( FILE * fp, bool autoclose ) : m_fp(fp), m_autoclose(autoclose) { }
/// Dtor.
virtual ~StdStream()
{
if( m_fp != NULL && m_autoclose ) {
#if NV_OS_WIN32
_fclose_nolock( m_fp );
#else
fclose( m_fp );
#endif
}
}
/** @name Stream implementation. */
//@{
virtual void seek( uint pos )
{
nvDebugCheck(m_fp != NULL);
nvDebugCheck(pos <= size());
#if NV_OS_WIN32
_fseek_nolock(m_fp, pos, SEEK_SET);
#else
fseek(m_fp, pos, SEEK_SET);
#endif
}
virtual uint tell() const
{
nvDebugCheck(m_fp != NULL);
#if NV_OS_WIN32
return _ftell_nolock(m_fp);
#else
return (uint)ftell(m_fp);
#endif
}
virtual uint size() const
{
nvDebugCheck(m_fp != NULL);
#if NV_OS_WIN32
uint pos = _ftell_nolock(m_fp);
_fseek_nolock(m_fp, 0, SEEK_END);
uint end = _ftell_nolock(m_fp);
_fseek_nolock(m_fp, pos, SEEK_SET);
#else
uint pos = (uint)ftell(m_fp);
fseek(m_fp, 0, SEEK_END);
uint end = (uint)ftell(m_fp);
fseek(m_fp, pos, SEEK_SET);
#endif
return end;
}
virtual bool isError() const
{
return m_fp == NULL || ferror( m_fp ) != 0;
}
virtual void clearError()
{
nvDebugCheck(m_fp != NULL);
clearerr(m_fp);
}
// @@ The original implementation uses feof, which only returns true when we attempt to read *past* the end of the stream.
// That is, if we read the last byte of a file, then isAtEnd would still return false, even though the stream pointer is at the file end. This is not the intent and was inconsistent with the implementation of the MemoryStream, a better
// implementation uses use ftell and fseek to determine our location within the file.
virtual bool isAtEnd() const
{
if (m_fp == NULL) return true;
//nvDebugCheck(m_fp != NULL);
//return feof( m_fp ) != 0;
#if NV_OS_WIN32
uint pos = _ftell_nolock(m_fp);
_fseek_nolock(m_fp, 0, SEEK_END);
uint end = _ftell_nolock(m_fp);
_fseek_nolock(m_fp, pos, SEEK_SET);
#else
uint pos = (uint)ftell(m_fp);
fseek(m_fp, 0, SEEK_END);
uint end = (uint)ftell(m_fp);
fseek(m_fp, pos, SEEK_SET);
#endif
return pos == end;
}
/// Always true.
virtual bool isSeekable() const { return true; }
//@}
protected:
FILE * m_fp;
bool m_autoclose;
};
/// Standard output stream.
class NVCORE_CLASS StdOutputStream : public StdStream
{
NV_FORBID_COPY(StdOutputStream);
public:
/// Construct stream by file name.
StdOutputStream( const char * name ) : StdStream(fileOpen(name, "wb"), /*autoclose=*/true) { }
/// Construct stream by file handle.
StdOutputStream( FILE * fp, bool autoclose ) : StdStream(fp, autoclose)
{
}
/** @name Stream implementation. */
//@{
/// Write data.
virtual uint serialize( void * data, uint len )
{
nvDebugCheck(data != NULL);
nvDebugCheck(m_fp != NULL);
#if NV_OS_WIN32
return (uint)_fwrite_nolock(data, 1, len, m_fp);
#elif NV_OS_LINUX
return (uint)fwrite_unlocked(data, 1, len, m_fp);
#elif NV_OS_DARWIN
// @@ No error checking, always returns len.
for (uint i = 0; i < len; i++) {
putc_unlocked(((char *)data)[i], m_fp);
}
return len;
#else
return (uint)fwrite(data, 1, len, m_fp);
#endif
}
virtual bool isLoading() const
{
return false;
}
virtual bool isSaving() const
{
return true;
}
//@}
};
/// Standard input stream.
class NVCORE_CLASS StdInputStream : public StdStream
{
NV_FORBID_COPY(StdInputStream);
public:
/// Construct stream by file name.
StdInputStream( const char * name ) : StdStream(fileOpen(name, "rb"), /*autoclose=*/true) { }
/// Construct stream by file handle.
StdInputStream( FILE * fp, bool autoclose=true ) : StdStream(fp, autoclose)
{
}
/** @name Stream implementation. */
//@{
/// Read data.
virtual uint serialize( void * data, uint len )
{
nvDebugCheck(data != NULL);
nvDebugCheck(m_fp != NULL);
#if NV_OS_WIN32
return (uint)_fread_nolock(data, 1, len, m_fp);
#elif NV_OS_LINUX
return (uint)fread_unlocked(data, 1, len, m_fp);
#elif NV_OS_DARWIN
// @@ No error checking, always returns len.
for (uint i = 0; i < len; i++) {
((char *)data)[i] = getc_unlocked(m_fp);
}
return len;
#else
return (uint)fread(data, 1, len, m_fp);
#endif
}
virtual bool isLoading() const
{
return true;
}
virtual bool isSaving() const
{
return false;
}
//@}
};
/// Memory input stream.
class NVCORE_CLASS MemoryInputStream : public Stream
{
NV_FORBID_COPY(MemoryInputStream);
public:
/// Ctor.
MemoryInputStream( const uint8 * mem, uint size ) : m_mem(mem), m_ptr(mem), m_size(size) { }
/** @name Stream implementation. */
//@{
/// Read data.
virtual uint serialize( void * data, uint len )
{
nvDebugCheck(data != NULL);
nvDebugCheck(!isError());
uint left = m_size - tell();
if (len > left) len = left;
memcpy( data, m_ptr, len );
m_ptr += len;
return len;
}
virtual void seek( uint pos )
{
nvDebugCheck(!isError());
m_ptr = m_mem + pos;
nvDebugCheck(!isError());
}
virtual uint tell() const
{
nvDebugCheck(m_ptr >= m_mem);
return uint(m_ptr - m_mem);
}
virtual uint size() const
{
return m_size;
}
virtual bool isError() const
{
return m_mem == NULL || m_ptr > m_mem + m_size || m_ptr < m_mem;
}
virtual void clearError()
{
// Nothing to do.
}
virtual bool isAtEnd() const
{
return m_ptr == m_mem + m_size;
}
/// Always true.
virtual bool isSeekable() const
{
return true;
}
virtual bool isLoading() const
{
return true;
}
virtual bool isSaving() const
{
return false;
}
//@}
const uint8 * ptr() const { return m_ptr; }
private:
const uint8 * m_mem;
const uint8 * m_ptr;
uint m_size;
};
/// Buffer output stream.
class NVCORE_CLASS BufferOutputStream : public Stream
{
NV_FORBID_COPY(BufferOutputStream);
public:
BufferOutputStream(Array<uint8> & buffer) : m_buffer(buffer) { }
virtual uint serialize( void * data, uint len )
{
nvDebugCheck(data != NULL);
m_buffer.append((uint8 *)data, len);
return len;
}
virtual void seek( uint /*pos*/ ) { /*Not implemented*/ }
virtual uint tell() const { return m_buffer.size(); }
virtual uint size() const { return m_buffer.size(); }
virtual bool isError() const { return false; }
virtual void clearError() {}
virtual bool isAtEnd() const { return true; }
virtual bool isSeekable() const { return false; }
virtual bool isLoading() const { return false; }
virtual bool isSaving() const { return true; }
private:
Array<uint8> & m_buffer;
};
/// Protected input stream.
class NVCORE_CLASS ProtectedStream : public Stream
{
NV_FORBID_COPY(ProtectedStream);
public:
/// Ctor.
ProtectedStream( Stream & s ) : m_s(&s), m_autodelete(false)
{
}
/// Ctor.
ProtectedStream( Stream * s, bool autodelete = true ) :
m_s(s), m_autodelete(autodelete)
{
nvDebugCheck(m_s != NULL);
}
/// Dtor.
virtual ~ProtectedStream()
{
if( m_autodelete ) {
delete m_s;
}
}
/** @name Stream implementation. */
//@{
/// Read data.
virtual uint serialize( void * data, uint len )
{
nvDebugCheck(data != NULL);
len = m_s->serialize( data, len );
if( m_s->isError() ) {
throw;
}
return len;
}
virtual void seek( uint pos )
{
m_s->seek( pos );
if( m_s->isError() ) {
throw;
}
}
virtual uint tell() const
{
return m_s->tell();
}
virtual uint size() const
{
return m_s->size();
}
virtual bool isError() const
{
return m_s->isError();
}
virtual void clearError()
{
m_s->clearError();
}
virtual bool isAtEnd() const
{
return m_s->isAtEnd();
}
virtual bool isSeekable() const
{
return m_s->isSeekable();
}
virtual bool isLoading() const
{
return m_s->isLoading();
}
virtual bool isSaving() const
{
return m_s->isSaving();
}
//@}
private:
Stream * const m_s;
bool const m_autodelete;
};
} // nv namespace
//#endif // NV_CORE_STDSTREAM_H

163
3rdparty/nvtt/nvcore/stream.h vendored Normal file
View file

@ -0,0 +1,163 @@
// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
#ifndef NV_CORE_STREAM_H
#define NV_CORE_STREAM_H
#include "nvcore.h"
#include "debug.h"
namespace nv
{
/// Base stream class.
class NVCORE_CLASS Stream {
public:
enum ByteOrder {
LittleEndian = false,
BigEndian = true,
};
/// Get the byte order of the system.
static ByteOrder getSystemByteOrder() {
#if NV_LITTLE_ENDIAN
return LittleEndian;
#else
return BigEndian;
#endif
}
/// Ctor.
Stream() : m_byteOrder(LittleEndian) { }
/// Virtual destructor.
virtual ~Stream() {}
/// Set byte order.
void setByteOrder(ByteOrder bo) { m_byteOrder = bo; }
/// Get byte order.
ByteOrder byteOrder() const { return m_byteOrder; }
/// Serialize the given data.
virtual uint serialize( void * data, uint len ) = 0;
/// Move to the given position in the archive.
virtual void seek( uint pos ) = 0;
/// Return the current position in the archive.
virtual uint tell() const = 0;
/// Return the current size of the archive.
virtual uint size() const = 0;
/// Determine if there has been any error.
virtual bool isError() const = 0;
/// Clear errors.
virtual void clearError() = 0;
/// Return true if the stream is at the end.
virtual bool isAtEnd() const = 0;
/// Return true if the stream is seekable.
virtual bool isSeekable() const = 0;
/// Return true if this is an input stream.
virtual bool isLoading() const = 0;
/// Return true if this is an output stream.
virtual bool isSaving() const = 0;
void advance(uint offset) { seek(tell() + offset); }
// friends
friend Stream & operator<<( Stream & s, bool & c ) {
#if NV_OS_DARWIN && !NV_CC_CPP11
nvStaticCheck(sizeof(bool) == 4);
uint8 b = c ? 1 : 0;
s.serialize( &b, 1 );
c = (b == 1);
#else
nvStaticCheck(sizeof(bool) == 1);
s.serialize( &c, 1 );
#endif
return s;
}
friend Stream & operator<<( Stream & s, char & c ) {
nvStaticCheck(sizeof(char) == 1);
s.serialize( &c, 1 );
return s;
}
friend Stream & operator<<( Stream & s, uint8 & c ) {
nvStaticCheck(sizeof(uint8) == 1);
s.serialize( &c, 1 );
return s;
}
friend Stream & operator<<( Stream & s, int8 & c ) {
nvStaticCheck(sizeof(int8) == 1);
s.serialize( &c, 1 );
return s;
}
friend Stream & operator<<( Stream & s, uint16 & c ) {
nvStaticCheck(sizeof(uint16) == 2);
return s.byteOrderSerialize( &c, 2 );
}
friend Stream & operator<<( Stream & s, int16 & c ) {
nvStaticCheck(sizeof(int16) == 2);
return s.byteOrderSerialize( &c, 2 );
}
friend Stream & operator<<( Stream & s, uint32 & c ) {
nvStaticCheck(sizeof(uint32) == 4);
return s.byteOrderSerialize( &c, 4 );
}
friend Stream & operator<<( Stream & s, int32 & c ) {
nvStaticCheck(sizeof(int32) == 4);
return s.byteOrderSerialize( &c, 4 );
}
friend Stream & operator<<( Stream & s, uint64 & c ) {
nvStaticCheck(sizeof(uint64) == 8);
return s.byteOrderSerialize( &c, 8 );
}
friend Stream & operator<<( Stream & s, int64 & c ) {
nvStaticCheck(sizeof(int64) == 8);
return s.byteOrderSerialize( &c, 8 );
}
friend Stream & operator<<( Stream & s, float & c ) {
nvStaticCheck(sizeof(float) == 4);
return s.byteOrderSerialize( &c, 4 );
}
friend Stream & operator<<( Stream & s, double & c ) {
nvStaticCheck(sizeof(double) == 8);
return s.byteOrderSerialize( &c, 8 );
}
protected:
/// Serialize in the stream byte order.
Stream & byteOrderSerialize( void * v, uint len ) {
if( m_byteOrder == getSystemByteOrder() ) {
serialize( v, len );
}
else {
for( uint i = len; i > 0; i-- ) {
serialize( (uint8 *)v + i - 1, 1 );
}
}
return *this;
}
private:
ByteOrder m_byteOrder;
};
} // nv namespace
#endif // NV_CORE_STREAM_H

429
3rdparty/nvtt/nvcore/strlib.h vendored Normal file
View file

@ -0,0 +1,429 @@
// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
#ifndef NV_CORE_STRING_H
#define NV_CORE_STRING_H
#include "debug.h"
#include "hash.h" // hash
//#include <string.h> // strlen, etc.
#if NV_OS_WIN32
#define NV_PATH_SEPARATOR '\\'
#else
#define NV_PATH_SEPARATOR '/'
#endif
namespace nv
{
NVCORE_API uint strHash(const char * str, uint h) NV_PURE;
/// String hash based on Bernstein's hash.
inline uint strHash(const char * data, uint h = 5381)
{
uint i = 0;
while(data[i] != 0) {
h = (33 * h) ^ uint(data[i]);
i++;
}
return h;
}
template <> struct Hash<const char *> {
uint operator()(const char * str) const { return strHash(str); }
};
NVCORE_API uint strLen(const char * str) NV_PURE; // Asserts on NULL strings.
NVCORE_API int strDiff(const char * s1, const char * s2) NV_PURE; // Asserts on NULL strings.
NVCORE_API int strCaseDiff(const char * s1, const char * s2) NV_PURE; // Asserts on NULL strings.
NVCORE_API bool strEqual(const char * s1, const char * s2) NV_PURE; // Accepts NULL strings.
NVCORE_API bool strCaseEqual(const char * s1, const char * s2) NV_PURE; // Accepts NULL strings.
template <> struct Equal<const char *> {
bool operator()(const char * a, const char * b) const { return strEqual(a, b); }
};
NVCORE_API bool strBeginsWith(const char * dst, const char * prefix) NV_PURE;
NVCORE_API bool strEndsWith(const char * dst, const char * suffix) NV_PURE;
NVCORE_API void strCpy(char * dst, uint size, const char * src);
NVCORE_API void strCpy(char * dst, uint size, const char * src, uint len);
NVCORE_API void strCat(char * dst, uint size, const char * src);
NVCORE_API const char * strSkipWhiteSpace(const char * str);
NVCORE_API char * strSkipWhiteSpace(char * str);
NVCORE_API bool strMatch(const char * str, const char * pat) NV_PURE;
NVCORE_API bool isNumber(const char * str) NV_PURE;
/* @@ Implement these two functions and modify StringBuilder to use them?
NVCORE_API void strFormat(const char * dst, const char * fmt, ...);
NVCORE_API void strFormatList(const char * dst, const char * fmt, va_list arg);
template <size_t count> void strFormatSafe(char (&buffer)[count], const char *fmt, ...) __attribute__((format (printf, 2, 3)));
template <size_t count> void strFormatSafe(char (&buffer)[count], const char *fmt, ...) {
va_list args;
va_start(args, fmt);
strFormatList(buffer, count, fmt, args);
va_end(args);
}
template <size_t count> void strFormatListSafe(char (&buffer)[count], const char *fmt, va_list arg) {
va_list tmp;
va_copy(tmp, args);
strFormatList(buffer, count, fmt, tmp);
va_end(tmp);
}*/
template <int count> void strCpySafe(char (&buffer)[count], const char *src) {
strCpy(buffer, count, src);
}
template <int count> void strCatSafe(char (&buffer)[count], const char * src) {
strCat(buffer, count, src);
}
/// String builder.
class NVCORE_CLASS StringBuilder
{
public:
StringBuilder();
explicit StringBuilder( uint size_hint );
StringBuilder(const char * str);
StringBuilder(const char * str, uint len);
StringBuilder(const StringBuilder & other);
~StringBuilder();
StringBuilder & format( const char * format, ... ) __attribute__((format (printf, 2, 3)));
StringBuilder & formatList( const char * format, va_list arg );
StringBuilder & append(const char * str);
StringBuilder & append(const char * str, uint len);
StringBuilder & appendFormat(const char * format, ...) __attribute__((format (printf, 2, 3)));
StringBuilder & appendFormatList(const char * format, va_list arg);
StringBuilder & appendSpace(uint n);
StringBuilder & number( int i, int base = 10 );
StringBuilder & number( uint i, int base = 10 );
StringBuilder & reserve(uint size_hint);
StringBuilder & copy(const char * str);
StringBuilder & copy(const char * str, uint len);
StringBuilder & copy(const StringBuilder & str);
StringBuilder & toLower();
StringBuilder & toUpper();
bool endsWith(const char * str) const;
bool beginsWith(const char * str) const;
char * reverseFind(char c);
void reset();
bool isNull() const { return m_size == 0; }
// const char * accessors
//operator const char * () const { return m_str; }
//operator char * () { return m_str; }
const char * str() const { return m_str; }
char * str() { return m_str; }
char * release();
/// Implement value semantics.
StringBuilder & operator=( const StringBuilder & s ) {
return copy(s);
}
/// Implement value semantics.
StringBuilder & operator=( const char * s ) {
return copy(s);
}
/// Equal operator.
bool operator==( const StringBuilder & s ) const {
return strMatch(s.m_str, m_str);
}
/// Return the exact length.
uint length() const { return isNull() ? 0 : strLen(m_str); }
/// Return the size of the string container.
uint capacity() const { return m_size; }
/// Return the hash of the string.
uint hash() const { return isNull() ? 0 : strHash(m_str); }
// Swap strings.
friend void swap(StringBuilder & a, StringBuilder & b);
protected:
/// Size of the string container.
uint m_size;
/// String.
char * m_str;
};
/// Path string. @@ This should be called PathBuilder.
class NVCORE_CLASS Path : public StringBuilder
{
public:
Path() : StringBuilder() {}
explicit Path(int size_hint) : StringBuilder(size_hint) {}
Path(const char * str) : StringBuilder(str) {}
Path(const Path & path) : StringBuilder(path) {}
const char * fileName() const;
const char * extension() const;
void translatePath(char pathSeparator = NV_PATH_SEPARATOR);
void appendSeparator(char pathSeparator = NV_PATH_SEPARATOR);
void stripFileName();
void stripExtension();
// statics
NVCORE_API static char separator();
NVCORE_API static const char * fileName(const char *);
NVCORE_API static const char * extension(const char *);
NVCORE_API static void translatePath(char * path, char pathSeparator = NV_PATH_SEPARATOR);
};
/// String class.
class NVCORE_CLASS String
{
public:
/// Constructs a null string. @sa isNull()
String()
{
data = NULL;
}
/// Constructs a shared copy of str.
String(const String & str)
{
data = str.data;
if (data != NULL) addRef();
}
/// Constructs a shared string from a standard string.
String(const char * str)
{
setString(str);
}
/// Constructs a shared string from a standard string.
String(const char * str, int length)
{
setString(str, length);
}
/// Constructs a shared string from a StringBuilder.
String(const StringBuilder & str)
{
setString(str);
}
/// Dtor.
~String()
{
release();
}
String clone() const;
/// Release the current string and allocate a new one.
const String & operator=( const char * str )
{
release();
setString( str );
return *this;
}
/// Release the current string and allocate a new one.
const String & operator=( const StringBuilder & str )
{
release();
setString( str );
return *this;
}
/// Implement value semantics.
String & operator=( const String & str )
{
if (str.data != data)
{
release();
data = str.data;
addRef();
}
return *this;
}
/// Equal operator.
bool operator==( const String & str ) const
{
return strMatch(str.data, data);
}
/// Equal operator.
bool operator==( const char * str ) const
{
return strMatch(str, data);
}
/// Not equal operator.
bool operator!=( const String & str ) const
{
return !strMatch(str.data, data);
}
/// Not equal operator.
bool operator!=( const char * str ) const
{
return !strMatch(str, data);
}
/// Returns true if this string is the null string.
bool isNull() const { return data == NULL; }
/// Return the exact length.
uint length() const { nvDebugCheck(data != NULL); return strLen(data); }
/// Return the hash of the string.
uint hash() const { nvDebugCheck(data != NULL); return strHash(data); }
/// const char * cast operator.
operator const char * () const { return data; }
/// Get string pointer.
const char * str() const { return data; }
private:
// Add reference count.
void addRef();
// Decrease reference count.
void release();
uint16 getRefCount() const
{
nvDebugCheck(data != NULL);
return *reinterpret_cast<const uint16 *>(data - 2);
}
void setRefCount(uint16 count) {
nvDebugCheck(data != NULL);
nvCheck(count < 0xFFFF);
*reinterpret_cast<uint16 *>(const_cast<char *>(data - 2)) = uint16(count);
}
void setData(const char * str) {
data = str + 2;
}
void allocString(const char * str)
{
allocString(str, strLen(str));
}
void allocString(const char * str, uint length);
void setString(const char * str);
void setString(const char * str, uint length);
void setString(const StringBuilder & str);
// Swap strings.
friend void swap(String & a, String & b);
private:
const char * data;
};
template <> struct Hash<String> {
uint operator()(const String & str) const { return str.hash(); }
};
// Like AutoPtr, but for const char strings.
class AutoString
{
NV_FORBID_COPY(AutoString);
NV_FORBID_HEAPALLOC();
public:
// Ctor.
AutoString(const char * p = NULL) : m_ptr(p) { }
#if NV_CC_CPP11
// Move ctor.
AutoString(AutoString && ap) : m_ptr(ap.m_ptr) { ap.m_ptr = NULL; }
#endif
// Dtor. Deletes owned pointer.
~AutoString() {
delete [] m_ptr;
m_ptr = NULL;
}
// Delete owned pointer and assign new one.
void operator=(const char * p) {
if (p != m_ptr)
{
delete [] m_ptr;
m_ptr = p;
}
}
// Get pointer.
const char * ptr() const { return m_ptr; }
operator const char *() const { return m_ptr; }
// Relinquish ownership of the underlying pointer and returns that pointer.
const char * release() {
const char * tmp = m_ptr;
m_ptr = NULL;
return tmp;
}
// comparison operators.
friend bool operator == (const AutoString & ap, const char * const p) {
return (ap.ptr() == p);
}
friend bool operator != (const AutoString & ap, const char * const p) {
return (ap.ptr() != p);
}
friend bool operator == (const char * const p, const AutoString & ap) {
return (ap.ptr() == p);
}
friend bool operator != (const char * const p, const AutoString & ap) {
return (ap.ptr() != p);
}
private:
const char * m_ptr;
};
} // nv namespace
#endif // NV_CORE_STRING_H

281
3rdparty/nvtt/nvcore/utils.h vendored Normal file
View file

@ -0,0 +1,281 @@
// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
#ifndef NV_CORE_UTILS_H
#define NV_CORE_UTILS_H
#include "debug.h" // nvdebugcheck
#include <new> // for placement new
// Just in case. Grrr.
#undef min
#undef max
#define NV_INT8_MIN (-128)
#define NV_INT8_MAX 127
#define NV_UINT8_MAX 255
#define NV_INT16_MIN (-32767-1)
#define NV_INT16_MAX 32767
#define NV_UINT16_MAX 0xffff
#define NV_INT32_MIN (-2147483647-1)
#define NV_INT32_MAX 2147483647
#define NV_UINT32_MAX 0xffffffff
#define NV_INT64_MAX POSH_I64(9223372036854775807)
#define NV_INT64_MIN (-POSH_I64(9223372036854775807)-1)
#define NV_UINT64_MAX POSH_U64(0xffffffffffffffff)
#define NV_HALF_MAX 65504.0F
#define NV_FLOAT_MAX 3.402823466e+38F
#define NV_INTEGER_TO_FLOAT_MAX 16777217 // Largest integer such that it and all smaller integers can be stored in a 32bit float.
namespace nv
{
// Less error prone than casting. From CB:
// http://cbloomrants.blogspot.com/2011/06/06-17-11-c-casting-is-devil.html
// These intentionally look like casts.
// uint32 casts:
template <typename T> inline uint32 U32(T x) { return x; }
template <> inline uint32 U32<uint64>(uint64 x) { nvDebugCheck(x <= NV_UINT32_MAX); return (uint32)x; }
template <> inline uint32 U32<int64>(int64 x) { nvDebugCheck(x >= 0 && x <= NV_UINT32_MAX); return (uint32)x; }
//template <> inline uint32 U32<uint32>(uint32 x) { return x; }
template <> inline uint32 U32<int32>(int32 x) { nvDebugCheck(x >= 0); return (uint32)x; }
//template <> inline uint32 U32<uint16>(uint16 x) { return x; }
template <> inline uint32 U32<int16>(int16 x) { nvDebugCheck(x >= 0); return (uint32)x; }
//template <> inline uint32 U32<uint8>(uint8 x) { return x; }
template <> inline uint32 U32<int8>(int8 x) { nvDebugCheck(x >= 0); return (uint32)x; }
// int32 casts:
template <typename T> inline int32 I32(T x) { return x; }
template <> inline int32 I32<uint64>(uint64 x) { nvDebugCheck(x <= NV_INT32_MAX); return (int32)x; }
template <> inline int32 I32<int64>(int64 x) { nvDebugCheck(x >= NV_INT32_MIN && x <= NV_UINT32_MAX); return (int32)x; }
template <> inline int32 I32<uint32>(uint32 x) { nvDebugCheck(x <= NV_INT32_MAX); return (int32)x; }
//template <> inline int32 I32<int32>(int32 x) { return x; }
//template <> inline int32 I32<uint16>(uint16 x) { return x; }
//template <> inline int32 I32<int16>(int16 x) { return x; }
//template <> inline int32 I32<uint8>(uint8 x) { return x; }
//template <> inline int32 I32<int8>(int8 x) { return x; }
// uint16 casts:
template <typename T> inline uint16 U16(T x) { return x; }
template <> inline uint16 U16<uint64>(uint64 x) { nvDebugCheck(x <= NV_UINT16_MAX); return (uint16)x; }
template <> inline uint16 U16<int64>(int64 x) { nvDebugCheck(x >= 0 && x <= NV_UINT16_MAX); return (uint16)x; }
template <> inline uint16 U16<uint32>(uint32 x) { nvDebugCheck(x <= NV_UINT16_MAX); return (uint16)x; }
template <> inline uint16 U16<int32>(int32 x) { nvDebugCheck(x >= 0 && x <= NV_UINT16_MAX); return (uint16)x; }
//template <> inline uint16 U16<uint16>(uint16 x) { return x; }
template <> inline uint16 U16<int16>(int16 x) { nvDebugCheck(x >= 0); return (uint16)x; }
//template <> inline uint16 U16<uint8>(uint8 x) { return x; }
template <> inline uint16 U16<int8>(int8 x) { nvDebugCheck(x >= 0); return (uint16)x; }
// int16 casts:
template <typename T> inline int16 I16(T x) { return x; }
template <> inline int16 I16<uint64>(uint64 x) { nvDebugCheck(x <= NV_INT16_MAX); return (int16)x; }
template <> inline int16 I16<int64>(int64 x) { nvDebugCheck(x >= NV_INT16_MIN && x <= NV_UINT16_MAX); return (int16)x; }
template <> inline int16 I16<uint32>(uint32 x) { nvDebugCheck(x <= NV_INT16_MAX); return (int16)x; }
template <> inline int16 I16<int32>(int32 x) { nvDebugCheck(x >= NV_INT16_MIN && x <= NV_UINT16_MAX); return (int16)x; }
template <> inline int16 I16<uint16>(uint16 x) { nvDebugCheck(x <= NV_INT16_MAX); return (int16)x; }
//template <> inline int16 I16<int16>(int16 x) { return x; }
//template <> inline int16 I16<uint8>(uint8 x) { return x; }
//template <> inline int16 I16<int8>(int8 x) { return x; }
// uint8 casts:
template <typename T> inline uint8 U8(T x) { return x; }
template <> inline uint8 U8<uint64>(uint64 x) { nvDebugCheck(x <= NV_UINT8_MAX); return (uint8)x; }
template <> inline uint8 U8<int64>(int64 x) { nvDebugCheck(x >= 0 && x <= NV_UINT8_MAX); return (uint8)x; }
template <> inline uint8 U8<uint32>(uint32 x) { nvDebugCheck(x <= NV_UINT8_MAX); return (uint8)x; }
template <> inline uint8 U8<int32>(int32 x) { nvDebugCheck(x >= 0 && x <= NV_UINT8_MAX); return (uint8)x; }
template <> inline uint8 U8<uint16>(uint16 x) { nvDebugCheck(x <= NV_UINT8_MAX); return (uint8)x; }
template <> inline uint8 U8<int16>(int16 x) { nvDebugCheck(x >= 0 && x <= NV_UINT8_MAX); return (uint8)x; }
//template <> inline uint8 U8<uint8>(uint8 x) { return x; }
template <> inline uint8 U8<int8>(int8 x) { nvDebugCheck(x >= 0); return (uint8)x; }
//template <> inline uint8 U8<float>(int8 x) { nvDebugCheck(x >= 0.0f && x <= 255.0f); return (uint8)x; }
// int8 casts:
template <typename T> inline int8 I8(T x) { return x; }
template <> inline int8 I8<uint64>(uint64 x) { nvDebugCheck(x <= NV_INT8_MAX); return (int8)x; }
template <> inline int8 I8<int64>(int64 x) { nvDebugCheck(x >= NV_INT8_MIN && x <= NV_UINT8_MAX); return (int8)x; }
template <> inline int8 I8<uint32>(uint32 x) { nvDebugCheck(x <= NV_INT8_MAX); return (int8)x; }
template <> inline int8 I8<int32>(int32 x) { nvDebugCheck(x >= NV_INT8_MIN && x <= NV_UINT8_MAX); return (int8)x; }
template <> inline int8 I8<uint16>(uint16 x) { nvDebugCheck(x <= NV_INT8_MAX); return (int8)x; }
template <> inline int8 I8<int16>(int16 x) { nvDebugCheck(x >= NV_INT8_MIN && x <= NV_UINT8_MAX); return (int8)x; }
template <> inline int8 I8<uint8>(uint8 x) { nvDebugCheck(x <= NV_INT8_MAX); return (int8)x; }
//template <> inline int8 I8<int8>(int8 x) { return x; }
// float casts:
template <typename T> inline float F32(T x) { return x; }
template <> inline float F32<uint64>(uint64 x) { nvDebugCheck(x <= NV_INTEGER_TO_FLOAT_MAX); return (float)x; }
template <> inline float F32<int64>(int64 x) { nvDebugCheck(x >= -NV_INTEGER_TO_FLOAT_MAX && x <= NV_INTEGER_TO_FLOAT_MAX); return (float)x; }
template <> inline float F32<uint32>(uint32 x) { nvDebugCheck(x <= NV_INTEGER_TO_FLOAT_MAX); return (float)x; }
template <> inline float F32<int32>(int32 x) { nvDebugCheck(x >= -NV_INTEGER_TO_FLOAT_MAX && x <= NV_INTEGER_TO_FLOAT_MAX); return (float)x; }
// The compiler should not complain about these conversions:
//template <> inline float F32<uint16>(uint16 x) { nvDebugCheck(return (float)x; }
//template <> inline float F32<int16>(int16 x) { nvDebugCheck(return (float)x; }
//template <> inline float F32<uint8>(uint8 x) { nvDebugCheck(return (float)x; }
//template <> inline float F32<int8>(int8 x) { nvDebugCheck(return (float)x; }
/// Swap two values.
template <typename T>
inline void swap(T & a, T & b)
{
T temp(a);
a = b;
b = temp;
}
/// Return the maximum of the two arguments. For floating point values, it returns the second value if the first is NaN.
template <typename T>
//inline const T & max(const T & a, const T & b)
inline T max(const T & a, const T & b)
{
return (b < a) ? a : b;
}
/// Return the maximum of the four arguments.
template <typename T>
//inline const T & max4(const T & a, const T & b, const T & c)
inline T max4(const T & a, const T & b, const T & c, const T & d)
{
return max(max(a, b), max(c, d));
}
/// Return the maximum of the three arguments.
template <typename T>
//inline const T & max3(const T & a, const T & b, const T & c)
inline T max3(const T & a, const T & b, const T & c)
{
return max(a, max(b, c));
}
/// Return the minimum of two values.
template <typename T>
//inline const T & min(const T & a, const T & b)
inline T min(const T & a, const T & b)
{
return (a < b) ? a : b;
}
/// Return the maximum of the three arguments.
template <typename T>
//inline const T & min3(const T & a, const T & b, const T & c)
inline T min3(const T & a, const T & b, const T & c)
{
return min(a, min(b, c));
}
/// Clamp between two values.
template <typename T>
//inline const T & clamp(const T & x, const T & a, const T & b)
inline T clamp(const T & x, const T & a, const T & b)
{
return min(max(x, a), b);
}
/** Return the next power of two.
* @see http://graphics.stanford.edu/~seander/bithacks.html
* @warning Behaviour for 0 is undefined.
* @note isPowerOfTwo(x) == true -> nextPowerOfTwo(x) == x
* @note nextPowerOfTwo(x) = 2 << log2(x-1)
*/
inline uint nextPowerOfTwo( uint x )
{
nvDebugCheck( x != 0 );
#if 1 // On modern CPUs this is supposed to be as fast as using the bsr instruction.
x--;
x |= x >> 1;
x |= x >> 2;
x |= x >> 4;
x |= x >> 8;
x |= x >> 16;
return x+1;
#else
uint p = 1;
while( x > p ) {
p += p;
}
return p;
#endif
}
/// Return true if @a n is a power of two.
inline bool isPowerOfTwo( uint n )
{
return (n & (n-1)) == 0;
}
// @@ Move this to utils?
/// Delete all the elements of a container.
template <typename T>
void deleteAll(T & container)
{
for (typename T::PseudoIndex i = container.start(); !container.isDone(i); container.advance(i))
{
delete container[i];
}
}
// @@ Specialize these methods for numeric, pointer, and pod types.
template <typename T>
void construct_range(T * restrict ptr, uint new_size, uint old_size) {
for (uint i = old_size; i < new_size; i++) {
new(ptr+i) T; // placement new
}
}
template <typename T>
void construct_range(T * restrict ptr, uint new_size, uint old_size, const T & elem) {
for (uint i = old_size; i < new_size; i++) {
new(ptr+i) T(elem); // placement new
}
}
template <typename T>
void construct_range(T * restrict ptr, uint new_size, uint old_size, const T * src) {
for (uint i = old_size; i < new_size; i++) {
new(ptr+i) T(src[i]); // placement new
}
}
template <typename T>
void destroy_range(T * restrict ptr, uint new_size, uint old_size) {
for (uint i = new_size; i < old_size; i++) {
(ptr+i)->~T(); // Explicit call to the destructor
}
}
template <typename T>
void fill(T * restrict dst, uint count, const T & value) {
for (uint i = 0; i < count; i++) {
dst[i] = value;
}
}
template <typename T>
void copy_range(T * restrict dst, const T * restrict src, uint count) {
for (uint i = 0; i < count; i++) {
dst[i] = src[i];
}
}
template <typename T>
bool find(const T & element, const T * restrict ptr, uint begin, uint end, uint * index) {
for (uint i = begin; i < end; i++) {
if (ptr[i] == element) {
if (index != NULL) *index = i;
return true;
}
}
return false;
}
} // nv namespace
#endif // NV_CORE_UTILS_H

921
3rdparty/nvtt/nvmath/Vector.inl vendored Normal file
View file

@ -0,0 +1,921 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_MATH_VECTOR_INL
#define NV_MATH_VECTOR_INL
#include "vector.h"
#include "nvcore/utils.h" // min, max
#include "nvcore/hash.h" // hash
namespace nv
{
// Helpers to convert vector types. Assume T has x,y members and 2 argument constructor.
//template <typename T> T to(Vector2::Arg v) { return T(v.x, v.y); }
// Helpers to convert vector types. Assume T has x,y,z members and 3 argument constructor.
//template <typename T> T to(Vector3::Arg v) { return T(v.x, v.y, v.z); }
// Helpers to convert vector types. Assume T has x,y,z members and 3 argument constructor.
//template <typename T> T to(Vector4::Arg v) { return T(v.x, v.y, v.z, v.w); }
// Vector2
inline Vector2::Vector2() {}
inline Vector2::Vector2(float f) : x(f), y(f) {}
inline Vector2::Vector2(float x, float y) : x(x), y(y) {}
inline Vector2::Vector2(Vector2::Arg v) : x(v.x), y(v.y) {}
inline const Vector2 & Vector2::operator=(Vector2::Arg v)
{
x = v.x;
y = v.y;
return *this;
}
inline const float * Vector2::ptr() const
{
return &x;
}
inline void Vector2::set(float x, float y)
{
this->x = x;
this->y = y;
}
inline Vector2 Vector2::operator-() const
{
return Vector2(-x, -y);
}
inline void Vector2::operator+=(Vector2::Arg v)
{
x += v.x;
y += v.y;
}
inline void Vector2::operator-=(Vector2::Arg v)
{
x -= v.x;
y -= v.y;
}
inline void Vector2::operator*=(float s)
{
x *= s;
y *= s;
}
inline void Vector2::operator*=(Vector2::Arg v)
{
x *= v.x;
y *= v.y;
}
inline bool operator==(Vector2::Arg a, Vector2::Arg b)
{
return a.x == b.x && a.y == b.y;
}
inline bool operator!=(Vector2::Arg a, Vector2::Arg b)
{
return a.x != b.x || a.y != b.y;
}
// Vector3
inline Vector3::Vector3() {}
inline Vector3::Vector3(float f) : x(f), y(f), z(f) {}
inline Vector3::Vector3(float x, float y, float z) : x(x), y(y), z(z) {}
inline Vector3::Vector3(Vector2::Arg v, float z) : x(v.x), y(v.y), z(z) {}
inline Vector3::Vector3(Vector3::Arg v) : x(v.x), y(v.y), z(v.z) {}
inline const Vector3 & Vector3::operator=(Vector3::Arg v)
{
x = v.x;
y = v.y;
z = v.z;
return *this;
}
inline Vector2 Vector3::xy() const
{
return Vector2(x, y);
}
inline const float * Vector3::ptr() const
{
return &x;
}
inline void Vector3::set(float x, float y, float z)
{
this->x = x;
this->y = y;
this->z = z;
}
inline Vector3 Vector3::operator-() const
{
return Vector3(-x, -y, -z);
}
inline void Vector3::operator+=(Vector3::Arg v)
{
x += v.x;
y += v.y;
z += v.z;
}
inline void Vector3::operator-=(Vector3::Arg v)
{
x -= v.x;
y -= v.y;
z -= v.z;
}
inline void Vector3::operator*=(float s)
{
x *= s;
y *= s;
z *= s;
}
inline void Vector3::operator/=(float s)
{
float is = 1.0f / s;
x *= is;
y *= is;
z *= is;
}
inline void Vector3::operator*=(Vector3::Arg v)
{
x *= v.x;
y *= v.y;
z *= v.z;
}
inline void Vector3::operator/=(Vector3::Arg v)
{
x /= v.x;
y /= v.y;
z /= v.z;
}
inline bool operator==(Vector3::Arg a, Vector3::Arg b)
{
return a.x == b.x && a.y == b.y && a.z == b.z;
}
inline bool operator!=(Vector3::Arg a, Vector3::Arg b)
{
return a.x != b.x || a.y != b.y || a.z != b.z;
}
// Vector4
inline Vector4::Vector4() {}
inline Vector4::Vector4(float f) : x(f), y(f), z(f), w(f) {}
inline Vector4::Vector4(float x, float y, float z, float w) : x(x), y(y), z(z), w(w) {}
inline Vector4::Vector4(Vector2::Arg v, float z, float w) : x(v.x), y(v.y), z(z), w(w) {}
inline Vector4::Vector4(Vector2::Arg v, Vector2::Arg u) : x(v.x), y(v.y), z(u.x), w(u.y) {}
inline Vector4::Vector4(Vector3::Arg v, float w) : x(v.x), y(v.y), z(v.z), w(w) {}
inline Vector4::Vector4(Vector4::Arg v) : x(v.x), y(v.y), z(v.z), w(v.w) {}
inline const Vector4 & Vector4::operator=(const Vector4 & v)
{
x = v.x;
y = v.y;
z = v.z;
w = v.w;
return *this;
}
inline Vector2 Vector4::xy() const
{
return Vector2(x, y);
}
inline Vector2 Vector4::zw() const
{
return Vector2(z, w);
}
inline Vector3 Vector4::xyz() const
{
return Vector3(x, y, z);
}
inline const float * Vector4::ptr() const
{
return &x;
}
inline void Vector4::set(float x, float y, float z, float w)
{
this->x = x;
this->y = y;
this->z = z;
this->w = w;
}
inline Vector4 Vector4::operator-() const
{
return Vector4(-x, -y, -z, -w);
}
inline void Vector4::operator+=(Vector4::Arg v)
{
x += v.x;
y += v.y;
z += v.z;
w += v.w;
}
inline void Vector4::operator-=(Vector4::Arg v)
{
x -= v.x;
y -= v.y;
z -= v.z;
w -= v.w;
}
inline void Vector4::operator*=(float s)
{
x *= s;
y *= s;
z *= s;
w *= s;
}
inline void Vector4::operator/=(float s)
{
x /= s;
y /= s;
z /= s;
w /= s;
}
inline void Vector4::operator*=(Vector4::Arg v)
{
x *= v.x;
y *= v.y;
z *= v.z;
w *= v.w;
}
inline void Vector4::operator/=(Vector4::Arg v)
{
x /= v.x;
y /= v.y;
z /= v.z;
w /= v.w;
}
inline bool operator==(Vector4::Arg a, Vector4::Arg b)
{
return a.x == b.x && a.y == b.y && a.z == b.z && a.w == b.w;
}
inline bool operator!=(Vector4::Arg a, Vector4::Arg b)
{
return a.x != b.x || a.y != b.y || a.z != b.z || a.w != b.w;
}
// Functions
// Vector2
inline Vector2 add(Vector2::Arg a, Vector2::Arg b)
{
return Vector2(a.x + b.x, a.y + b.y);
}
inline Vector2 operator+(Vector2::Arg a, Vector2::Arg b)
{
return add(a, b);
}
inline Vector2 sub(Vector2::Arg a, Vector2::Arg b)
{
return Vector2(a.x - b.x, a.y - b.y);
}
inline Vector2 operator-(Vector2::Arg a, Vector2::Arg b)
{
return sub(a, b);
}
inline Vector2 scale(Vector2::Arg v, float s)
{
return Vector2(v.x * s, v.y * s);
}
inline Vector2 scale(Vector2::Arg v, Vector2::Arg s)
{
return Vector2(v.x * s.x, v.y * s.y);
}
inline Vector2 operator*(Vector2::Arg v, float s)
{
return scale(v, s);
}
inline Vector2 operator*(Vector2::Arg v1, Vector2::Arg v2)
{
return Vector2(v1.x*v2.x, v1.y*v2.y);
}
inline Vector2 operator*(float s, Vector2::Arg v)
{
return scale(v, s);
}
inline Vector2 operator/(Vector2::Arg v, float s)
{
return scale(v, 1.0f/s);
}
inline Vector2 lerp(Vector2::Arg v1, Vector2::Arg v2, float t)
{
const float s = 1.0f - t;
return Vector2(v1.x * s + t * v2.x, v1.y * s + t * v2.y);
}
inline float dot(Vector2::Arg a, Vector2::Arg b)
{
return a.x * b.x + a.y * b.y;
}
inline float lengthSquared(Vector2::Arg v)
{
return v.x * v.x + v.y * v.y;
}
inline float length(Vector2::Arg v)
{
return sqrtf(lengthSquared(v));
}
inline float distance(Vector2::Arg a, Vector2::Arg b)
{
return length(a - b);
}
inline float inverseLength(Vector2::Arg v)
{
return 1.0f / sqrtf(lengthSquared(v));
}
inline bool isNormalized(Vector2::Arg v, float epsilon = NV_NORMAL_EPSILON)
{
return equal(length(v), 1, epsilon);
}
inline Vector2 normalize(Vector2::Arg v, float epsilon = NV_EPSILON)
{
float l = length(v);
NV_UNUSED(epsilon);
nvDebugCheck(!isZero(l, epsilon));
Vector2 n = scale(v, 1.0f / l);
nvDebugCheck(isNormalized(n));
return n;
}
inline Vector2 normalizeSafe(Vector2::Arg v, Vector2::Arg fallback, float epsilon = NV_EPSILON)
{
float l = length(v);
if (isZero(l, epsilon)) {
return fallback;
}
return scale(v, 1.0f / l);
}
// Safe, branchless normalization from Andy Firth. All error checking ommitted.
// http://altdevblogaday.com/2011/08/21/practical-flt-point-tricks/
inline Vector2 normalizeFast(Vector2::Arg v)
{
const float very_small_float = 1.0e-037f;
float l = very_small_float + length(v);
return scale(v, 1.0f / l);
}
inline bool equal(Vector2::Arg v1, Vector2::Arg v2, float epsilon = NV_EPSILON)
{
return equal(v1.x, v2.x, epsilon) && equal(v1.y, v2.y, epsilon);
}
inline Vector2 min(Vector2::Arg a, Vector2::Arg b)
{
return Vector2(min(a.x, b.x), min(a.y, b.y));
}
inline Vector2 max(Vector2::Arg a, Vector2::Arg b)
{
return Vector2(max(a.x, b.x), max(a.y, b.y));
}
inline Vector2 clamp(Vector2::Arg v, float min, float max)
{
return Vector2(clamp(v.x, min, max), clamp(v.y, min, max));
}
inline Vector2 saturate(Vector2::Arg v)
{
return Vector2(saturate(v.x), saturate(v.y));
}
inline bool isFinite(Vector2::Arg v)
{
return isFinite(v.x) && isFinite(v.y);
}
inline Vector2 validate(Vector2::Arg v, Vector2::Arg fallback = Vector2(0.0f))
{
if (!isFinite(v)) return fallback;
Vector2 vf = v;
nv::floatCleanup(vf.component, 2);
return vf;
}
// Note, this is the area scaled by 2!
inline float triangleArea(Vector2::Arg v0, Vector2::Arg v1)
{
return (v0.x * v1.y - v0.y * v1.x); // * 0.5f;
}
inline float triangleArea(Vector2::Arg a, Vector2::Arg b, Vector2::Arg c)
{
// IC: While it may be appealing to use the following expression:
//return (c.x * a.y + a.x * b.y + b.x * c.y - b.x * a.y - c.x * b.y - a.x * c.y); // * 0.5f;
// That's actually a terrible idea. Small triangles far from the origin can end up producing fairly large floating point
// numbers and the results becomes very unstable and dependent on the order of the factors.
// Instead, it's preferable to substract the vertices first, and multiply the resulting small values together. The result
// in this case is always much more accurate (as long as the triangle is small) and less dependent of the location of
// the triangle.
//return ((a.x - c.x) * (b.y - c.y) - (a.y - c.y) * (b.x - c.x)); // * 0.5f;
return triangleArea(a-c, b-c);
}
template <>
inline uint hash(const Vector2 & v, uint h)
{
return sdbmFloatHash(v.component, 2, h);
}
// Vector3
inline Vector3 add(Vector3::Arg a, Vector3::Arg b)
{
return Vector3(a.x + b.x, a.y + b.y, a.z + b.z);
}
inline Vector3 add(Vector3::Arg a, float b)
{
return Vector3(a.x + b, a.y + b, a.z + b);
}
inline Vector3 operator+(Vector3::Arg a, Vector3::Arg b)
{
return add(a, b);
}
inline Vector3 operator+(Vector3::Arg a, float b)
{
return add(a, b);
}
inline Vector3 sub(Vector3::Arg a, Vector3::Arg b)
{
return Vector3(a.x - b.x, a.y - b.y, a.z - b.z);
}
inline Vector3 sub(Vector3::Arg a, float b)
{
return Vector3(a.x - b, a.y - b, a.z - b);
}
inline Vector3 operator-(Vector3::Arg a, Vector3::Arg b)
{
return sub(a, b);
}
inline Vector3 operator-(Vector3::Arg a, float b)
{
return sub(a, b);
}
inline Vector3 cross(Vector3::Arg a, Vector3::Arg b)
{
return Vector3(a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x);
}
inline Vector3 scale(Vector3::Arg v, float s)
{
return Vector3(v.x * s, v.y * s, v.z * s);
}
inline Vector3 scale(Vector3::Arg v, Vector3::Arg s)
{
return Vector3(v.x * s.x, v.y * s.y, v.z * s.z);
}
inline Vector3 operator*(Vector3::Arg v, float s)
{
return scale(v, s);
}
inline Vector3 operator*(float s, Vector3::Arg v)
{
return scale(v, s);
}
inline Vector3 operator*(Vector3::Arg v, Vector3::Arg s)
{
return scale(v, s);
}
inline Vector3 operator/(Vector3::Arg v, float s)
{
return scale(v, 1.0f/s);
}
/*inline Vector3 add_scaled(Vector3::Arg a, Vector3::Arg b, float s)
{
return Vector3(a.x + b.x * s, a.y + b.y * s, a.z + b.z * s);
}*/
inline Vector3 lerp(Vector3::Arg v1, Vector3::Arg v2, float t)
{
const float s = 1.0f - t;
return Vector3(v1.x * s + t * v2.x, v1.y * s + t * v2.y, v1.z * s + t * v2.z);
}
inline float dot(Vector3::Arg a, Vector3::Arg b)
{
return a.x * b.x + a.y * b.y + a.z * b.z;
}
inline float lengthSquared(Vector3::Arg v)
{
return v.x * v.x + v.y * v.y + v.z * v.z;
}
inline float length(Vector3::Arg v)
{
return sqrtf(lengthSquared(v));
}
inline float distance(Vector3::Arg a, Vector3::Arg b)
{
return length(a - b);
}
inline float distanceSquared(Vector3::Arg a, Vector3::Arg b)
{
return lengthSquared(a - b);
}
inline float inverseLength(Vector3::Arg v)
{
return 1.0f / sqrtf(lengthSquared(v));
}
inline bool isNormalized(Vector3::Arg v, float epsilon = NV_NORMAL_EPSILON)
{
return equal(length(v), 1, epsilon);
}
inline Vector3 normalize(Vector3::Arg v, float epsilon = NV_EPSILON)
{
float l = length(v);
NV_UNUSED(epsilon);
nvDebugCheck(!isZero(l, epsilon));
Vector3 n = scale(v, 1.0f / l);
nvDebugCheck(isNormalized(n));
return n;
}
inline Vector3 normalizeSafe(Vector3::Arg v, Vector3::Arg fallback, float epsilon = NV_EPSILON)
{
float l = length(v);
if (isZero(l, epsilon)) {
return fallback;
}
return scale(v, 1.0f / l);
}
// Safe, branchless normalization from Andy Firth. All error checking ommitted.
// http://altdevblogaday.com/2011/08/21/practical-flt-point-tricks/
inline Vector3 normalizeFast(Vector3::Arg v)
{
const float very_small_float = 1.0e-037f;
float l = very_small_float + length(v);
return scale(v, 1.0f / l);
}
inline bool equal(Vector3::Arg v1, Vector3::Arg v2, float epsilon = NV_EPSILON)
{
return equal(v1.x, v2.x, epsilon) && equal(v1.y, v2.y, epsilon) && equal(v1.z, v2.z, epsilon);
}
inline Vector3 min(Vector3::Arg a, Vector3::Arg b)
{
return Vector3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z));
}
inline Vector3 max(Vector3::Arg a, Vector3::Arg b)
{
return Vector3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z));
}
inline Vector3 clamp(Vector3::Arg v, float min, float max)
{
return Vector3(clamp(v.x, min, max), clamp(v.y, min, max), clamp(v.z, min, max));
}
inline Vector3 saturate(Vector3::Arg v)
{
return Vector3(saturate(v.x), saturate(v.y), saturate(v.z));
}
inline Vector3 floor(Vector3::Arg v)
{
return Vector3(floorf(v.x), floorf(v.y), floorf(v.z));
}
inline Vector3 ceil(Vector3::Arg v)
{
return Vector3(ceilf(v.x), ceilf(v.y), ceilf(v.z));
}
inline bool isFinite(Vector3::Arg v)
{
return isFinite(v.x) && isFinite(v.y) && isFinite(v.z);
}
inline Vector3 validate(Vector3::Arg v, Vector3::Arg fallback = Vector3(0.0f))
{
if (!isFinite(v)) return fallback;
Vector3 vf = v;
nv::floatCleanup(vf.component, 3);
return vf;
}
inline Vector3 reflect(Vector3::Arg v, Vector3::Arg n)
{
return v - (2 * dot(v, n)) * n;
}
template <>
inline uint hash(const Vector3 & v, uint h)
{
return sdbmFloatHash(v.component, 3, h);
}
// Vector4
inline Vector4 add(Vector4::Arg a, Vector4::Arg b)
{
return Vector4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
}
inline Vector4 operator+(Vector4::Arg a, Vector4::Arg b)
{
return add(a, b);
}
inline Vector4 sub(Vector4::Arg a, Vector4::Arg b)
{
return Vector4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);
}
inline Vector4 operator-(Vector4::Arg a, Vector4::Arg b)
{
return sub(a, b);
}
inline Vector4 scale(Vector4::Arg v, float s)
{
return Vector4(v.x * s, v.y * s, v.z * s, v.w * s);
}
inline Vector4 scale(Vector4::Arg v, Vector4::Arg s)
{
return Vector4(v.x * s.x, v.y * s.y, v.z * s.z, v.w * s.w);
}
inline Vector4 operator*(Vector4::Arg v, float s)
{
return scale(v, s);
}
inline Vector4 operator*(float s, Vector4::Arg v)
{
return scale(v, s);
}
inline Vector4 operator*(Vector4::Arg v, Vector4::Arg s)
{
return scale(v, s);
}
inline Vector4 operator/(Vector4::Arg v, float s)
{
return scale(v, 1.0f/s);
}
/*inline Vector4 add_scaled(Vector4::Arg a, Vector4::Arg b, float s)
{
return Vector4(a.x + b.x * s, a.y + b.y * s, a.z + b.z * s, a.w + b.w * s);
}*/
inline Vector4 lerp(Vector4::Arg v1, Vector4::Arg v2, float t)
{
const float s = 1.0f - t;
return Vector4(v1.x * s + t * v2.x, v1.y * s + t * v2.y, v1.z * s + t * v2.z, v1.w * s + t * v2.w);
}
inline float dot(Vector4::Arg a, Vector4::Arg b)
{
return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
}
inline float lengthSquared(Vector4::Arg v)
{
return v.x * v.x + v.y * v.y + v.z * v.z + v.w * v.w;
}
inline float length(Vector4::Arg v)
{
return sqrtf(lengthSquared(v));
}
inline float inverseLength(Vector4::Arg v)
{
return 1.0f / sqrtf(lengthSquared(v));
}
inline bool isNormalized(Vector4::Arg v, float epsilon = NV_NORMAL_EPSILON)
{
return equal(length(v), 1, epsilon);
}
inline Vector4 normalize(Vector4::Arg v, float epsilon = NV_EPSILON)
{
float l = length(v);
NV_UNUSED(epsilon);
nvDebugCheck(!isZero(l, epsilon));
Vector4 n = scale(v, 1.0f / l);
nvDebugCheck(isNormalized(n));
return n;
}
inline Vector4 normalizeSafe(Vector4::Arg v, Vector4::Arg fallback, float epsilon = NV_EPSILON)
{
float l = length(v);
if (isZero(l, epsilon)) {
return fallback;
}
return scale(v, 1.0f / l);
}
// Safe, branchless normalization from Andy Firth. All error checking ommitted.
// http://altdevblogaday.com/2011/08/21/practical-flt-point-tricks/
inline Vector4 normalizeFast(Vector4::Arg v)
{
const float very_small_float = 1.0e-037f;
float l = very_small_float + length(v);
return scale(v, 1.0f / l);
}
inline bool equal(Vector4::Arg v1, Vector4::Arg v2, float epsilon = NV_EPSILON)
{
return equal(v1.x, v2.x, epsilon) && equal(v1.y, v2.y, epsilon) && equal(v1.z, v2.z, epsilon) && equal(v1.w, v2.w, epsilon);
}
inline Vector4 min(Vector4::Arg a, Vector4::Arg b)
{
return Vector4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w));
}
inline Vector4 max(Vector4::Arg a, Vector4::Arg b)
{
return Vector4(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w));
}
inline Vector4 clamp(Vector4::Arg v, float min, float max)
{
return Vector4(clamp(v.x, min, max), clamp(v.y, min, max), clamp(v.z, min, max), clamp(v.w, min, max));
}
inline Vector4 saturate(Vector4::Arg v)
{
return Vector4(saturate(v.x), saturate(v.y), saturate(v.z), saturate(v.w));
}
inline bool isFinite(Vector4::Arg v)
{
return isFinite(v.x) && isFinite(v.y) && isFinite(v.z) && isFinite(v.w);
}
inline Vector4 validate(Vector4::Arg v, Vector4::Arg fallback = Vector4(0.0f))
{
if (!isFinite(v)) return fallback;
Vector4 vf = v;
nv::floatCleanup(vf.component, 4);
return vf;
}
template <>
inline uint hash(const Vector4 & v, uint h)
{
return sdbmFloatHash(v.component, 4, h);
}
#if NV_OS_IOS // LLVM is not happy with implicit conversion of immediate constants to float
//int:
inline Vector2 scale(Vector2::Arg v, int s)
{
return Vector2(v.x * s, v.y * s);
}
inline Vector2 operator*(Vector2::Arg v, int s)
{
return scale(v, s);
}
inline Vector2 operator*(int s, Vector2::Arg v)
{
return scale(v, s);
}
inline Vector2 operator/(Vector2::Arg v, int s)
{
return scale(v, 1.0f/s);
}
inline Vector3 scale(Vector3::Arg v, int s)
{
return Vector3(v.x * s, v.y * s, v.z * s);
}
inline Vector3 operator*(Vector3::Arg v, int s)
{
return scale(v, s);
}
inline Vector3 operator*(int s, Vector3::Arg v)
{
return scale(v, s);
}
inline Vector3 operator/(Vector3::Arg v, int s)
{
return scale(v, 1.0f/s);
}
inline Vector4 scale(Vector4::Arg v, int s)
{
return Vector4(v.x * s, v.y * s, v.z * s, v.w * s);
}
inline Vector4 operator*(Vector4::Arg v, int s)
{
return scale(v, s);
}
inline Vector4 operator*(int s, Vector4::Arg v)
{
return scale(v, s);
}
inline Vector4 operator/(Vector4::Arg v, int s)
{
return scale(v, 1.0f/s);
}
//double:
inline Vector3 operator*(Vector3::Arg v, double s)
{
return scale(v, (float)s);
}
inline Vector3 operator*(double s, Vector3::Arg v)
{
return scale(v, (float)s);
}
inline Vector3 operator/(Vector3::Arg v, double s)
{
return scale(v, 1.f/((float)s));
}
#endif //NV_OS_IOS
} // nv namespace
#endif // NV_MATH_VECTOR_INL

1200
3rdparty/nvtt/nvmath/fitting.cpp vendored Normal file

File diff suppressed because it is too large Load diff

49
3rdparty/nvtt/nvmath/fitting.h vendored Normal file
View file

@ -0,0 +1,49 @@
// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
#ifndef NV_MATH_FITTING_H
#define NV_MATH_FITTING_H
#include "vector.h"
#include "plane.h"
namespace nv
{
namespace Fit
{
Vector3 computeCentroid(int n, const Vector3 * points);
Vector3 computeCentroid(int n, const Vector3 * points, const float * weights, const Vector3 & metric);
Vector4 computeCentroid(int n, const Vector4 * points);
Vector4 computeCentroid(int n, const Vector4 * points, const float * weights, const Vector4 & metric);
Vector3 computeCovariance(int n, const Vector3 * points, float * covariance);
Vector3 computeCovariance(int n, const Vector3 * points, const float * weights, const Vector3 & metric, float * covariance);
Vector4 computeCovariance(int n, const Vector4 * points, float * covariance);
Vector4 computeCovariance(int n, const Vector4 * points, const float * weights, const Vector4 & metric, float * covariance);
Vector3 computePrincipalComponent_PowerMethod(int n, const Vector3 * points);
Vector3 computePrincipalComponent_PowerMethod(int n, const Vector3 * points, const float * weights, const Vector3 & metric);
Vector3 computePrincipalComponent_EigenSolver(int n, const Vector3 * points);
Vector3 computePrincipalComponent_EigenSolver(int n, const Vector3 * points, const float * weights, const Vector3 & metric);
Vector4 computePrincipalComponent_EigenSolver(int n, const Vector4 * points);
Vector4 computePrincipalComponent_EigenSolver(int n, const Vector4 * points, const float * weights, const Vector4 & metric);
Vector3 computePrincipalComponent_SVD(int n, const Vector3 * points);
Vector4 computePrincipalComponent_SVD(int n, const Vector4 * points);
Plane bestPlane(int n, const Vector3 * points);
bool isPlanar(int n, const Vector3 * points, float epsilon = NV_EPSILON);
bool eigenSolveSymmetric3(const float matrix[6], float eigenValues[3], Vector3 eigenVectors[3]);
bool eigenSolveSymmetric4(const float matrix[10], float eigenValues[4], Vector4 eigenVectors[4]);
// Returns number of clusters [1-4].
int compute4Means(int n, const Vector3 * points, const float * weights, const Vector3 & metric, Vector3 * cluster);
}
} // nv namespace
#endif // NV_MATH_FITTING_H

112
3rdparty/nvtt/nvmath/matrix.h vendored Normal file
View file

@ -0,0 +1,112 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_MATH_MATRIX_H
#define NV_MATH_MATRIX_H
#include "vector.h"
// - Matrices are stored in memory in *column major* order.
// - Points are to be though of as column vectors.
// - Transformation of a point p by a matrix M is: p' = M * p
namespace nv
{
enum identity_t { identity };
// 3x3 matrix.
class NVMATH_CLASS Matrix3
{
public:
Matrix3();
explicit Matrix3(float f);
explicit Matrix3(identity_t);
Matrix3(const Matrix3 & m);
Matrix3(Vector3::Arg v0, Vector3::Arg v1, Vector3::Arg v2);
float data(uint idx) const;
float & data(uint idx);
float get(uint row, uint col) const;
float operator()(uint row, uint col) const;
float & operator()(uint row, uint col);
Vector3 row(uint i) const;
Vector3 column(uint i) const;
void operator*=(float s);
void operator/=(float s);
void operator+=(const Matrix3 & m);
void operator-=(const Matrix3 & m);
void scale(float s);
void scale(Vector3::Arg s);
float determinant() const;
private:
float m_data[9];
};
// Solve equation system using LU decomposition and back-substitution.
extern bool solveLU(const Matrix3 & m, const Vector3 & b, Vector3 * x);
// Solve equation system using Cramer's inverse.
extern bool solveCramer(const Matrix3 & A, const Vector3 & b, Vector3 * x);
// 4x4 matrix.
class NVMATH_CLASS Matrix
{
public:
typedef Matrix const & Arg;
Matrix();
explicit Matrix(float f);
explicit Matrix(identity_t);
Matrix(const Matrix3 & m);
Matrix(const Matrix & m);
Matrix(Vector4::Arg v0, Vector4::Arg v1, Vector4::Arg v2, Vector4::Arg v3);
//explicit Matrix(const float m[]); // m is assumed to contain 16 elements
float data(uint idx) const;
float & data(uint idx);
float get(uint row, uint col) const;
float operator()(uint row, uint col) const;
float & operator()(uint row, uint col);
const float * ptr() const;
Vector4 row(uint i) const;
Vector4 column(uint i) const;
void zero();
void identity();
void scale(float s);
void scale(Vector3::Arg s);
void translate(Vector3::Arg t);
void rotate(float theta, float v0, float v1, float v2);
float determinant() const;
void operator+=(const Matrix & m);
void operator-=(const Matrix & m);
void apply(Matrix::Arg m);
private:
float m_data[16];
};
// Solve equation system using LU decomposition and back-substitution.
extern bool solveLU(const Matrix & A, const Vector4 & b, Vector4 * x);
// Solve equation system using Cramer's inverse.
extern bool solveCramer(const Matrix & A, const Vector4 & b, Vector4 * x);
// Compute inverse using LU decomposition.
extern Matrix inverseLU(const Matrix & m);
// Compute inverse using Gaussian elimination and partial pivoting.
extern Matrix inverse(const Matrix & m);
extern Matrix3 inverse(const Matrix3 & m);
} // nv namespace
#endif // NV_MATH_MATRIX_H

1274
3rdparty/nvtt/nvmath/matrix.inl vendored Normal file

File diff suppressed because it is too large Load diff

56
3rdparty/nvtt/nvmath/nvmath.h vendored Normal file
View file

@ -0,0 +1,56 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_MATH_H
#define NV_MATH_H
#include <math.h>
#include <float.h> // finite, isnan
#include "nvcore/utils.h" // max, clamp
#define NVMATH_API
#define NVMATH_CLASS
#define PI float(3.1415926535897932384626433833)
#define NV_EPSILON (0.0001f)
#define NV_NORMAL_EPSILON (0.001f)
namespace nv
{
inline float toRadian(float degree) { return degree * (PI / 180.0f); }
inline float toDegree(float radian) { return radian * (180.0f / PI); }
// Robust floating point comparisons:
// http://realtimecollisiondetection.net/blog/?p=89
inline bool equal(const float f0, const float f1, const float epsilon = NV_EPSILON)
{
//return fabs(f0-f1) <= epsilon;
return fabs(f0-f1) <= epsilon * max3(1.0f, fabsf(f0), fabsf(f1));
}
inline bool isZero(const float f, const float epsilon = NV_EPSILON)
{
return fabsf(f) <= epsilon;
}
inline bool isFinite(const float f)
{
return _finite(f) != 0;
}
// Eliminates negative zeros from a float array.
inline void floatCleanup(float * fp, int n)
{
for (int i = 0; i < n; i++) {
//nvDebugCheck(isFinite(fp[i]));
union { float f; uint32 i; } x = { fp[i] };
if (x.i == 0x80000000) fp[i] = 0.0f;
}
}
inline float saturate(float f) {
return clamp(f, 0.0f, 1.0f);
}
}
#endif // NV_MATH_H

40
3rdparty/nvtt/nvmath/plane.h vendored Normal file
View file

@ -0,0 +1,40 @@
// This code is in the public domain -- Ignacio Castańo <castano@gmail.com>
#ifndef NV_MATH_PLANE_H
#define NV_MATH_PLANE_H
#include "nvmath.h"
#include "vector.h"
namespace nv
{
class Matrix;
class NVMATH_CLASS Plane
{
public:
Plane();
Plane(float x, float y, float z, float w);
Plane(const Vector4 & v);
Plane(const Vector3 & v, float d);
Plane(const Vector3 & normal, const Vector3 & point);
Plane(const Vector3 & v0, const Vector3 & v1, const Vector3 & v2);
const Plane & operator=(const Plane & v);
Vector3 vector() const;
float offset() const;
void operator*=(float s);
Vector4 v;
};
Plane transformPlane(const Matrix &, const Plane &);
Vector3 planeIntersection(const Plane & a, const Plane & b, const Plane & c);
} // nv namespace
#endif // NV_MATH_PLANE_H

49
3rdparty/nvtt/nvmath/plane.inl vendored Normal file
View file

@ -0,0 +1,49 @@
// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
#pragma once
#ifndef NV_MATH_PLANE_INL
#define NV_MATH_PLANE_INL
#include "Plane.h"
#include "Vector.inl"
namespace nv
{
inline Plane::Plane() {}
inline Plane::Plane(float x, float y, float z, float w) : v(x, y, z, w) {}
inline Plane::Plane(const Vector4 & v) : v(v) {}
inline Plane::Plane(const Vector3 & v, float d) : v(v, d) {}
inline Plane::Plane(const Vector3 & normal, const Vector3 & point) : v(normal, -dot(normal, point)) {}
inline Plane::Plane(const Vector3 & v0, const Vector3 & v1, const Vector3 & v2) {
Vector3 n = cross(v1-v0, v2-v0);
float d = -dot(n, v0);
v = Vector4(n, d);
}
inline const Plane & Plane::operator=(const Plane & p) { v = p.v; return *this; }
inline Vector3 Plane::vector() const { return v.xyz(); }
inline float Plane::offset() const { return v.w; }
// Normalize plane.
inline Plane normalize(const Plane & plane, float epsilon = NV_EPSILON)
{
const float len = length(plane.vector());
const float inv = isZero(len, epsilon) ? 0 : 1.0f / len;
return Plane(plane.v * inv);
}
// Get the signed distance from the given point to this plane.
inline float distance(const Plane & plane, const Vector3 & point)
{
return dot(plane.vector(), point) + plane.offset();
}
inline void Plane::operator*=(float s)
{
v *= s;
}
} // nv namespace
#endif // NV_MATH_PLANE_H

148
3rdparty/nvtt/nvmath/vector.h vendored Normal file
View file

@ -0,0 +1,148 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_MATH_VECTOR_H
#define NV_MATH_VECTOR_H
#include "nvmath.h"
namespace nv
{
class NVMATH_CLASS Vector2
{
public:
typedef Vector2 const & Arg;
Vector2();
explicit Vector2(float f);
Vector2(float x, float y);
Vector2(Vector2::Arg v);
//template <typename T> explicit Vector2(const T & v) : x(v.x), y(v.y) {}
//template <typename T> operator T() const { return T(x, y); }
const Vector2 & operator=(Vector2::Arg v);
const float * ptr() const;
void set(float x, float y);
Vector2 operator-() const;
void operator+=(Vector2::Arg v);
void operator-=(Vector2::Arg v);
void operator*=(float s);
void operator*=(Vector2::Arg v);
friend bool operator==(Vector2::Arg a, Vector2::Arg b);
friend bool operator!=(Vector2::Arg a, Vector2::Arg b);
union {
struct {
float x, y;
};
float component[2];
};
};
class NVMATH_CLASS Vector3
{
public:
typedef Vector3 const & Arg;
Vector3();
explicit Vector3(float x);
//explicit Vector3(int x) : x(float(x)), y(float(x)), z(float(x)) {}
Vector3(float x, float y, float z);
Vector3(Vector2::Arg v, float z);
Vector3(Vector3::Arg v);
//template <typename T> explicit Vector3(const T & v) : x(v.x), y(v.y), z(v.z) {}
//template <typename T> operator T() const { return T(x, y, z); }
const Vector3 & operator=(Vector3::Arg v);
Vector2 xy() const;
const float * ptr() const;
void set(float x, float y, float z);
Vector3 operator-() const;
void operator+=(Vector3::Arg v);
void operator-=(Vector3::Arg v);
void operator*=(float s);
void operator/=(float s);
void operator*=(Vector3::Arg v);
void operator/=(Vector3::Arg v);
friend bool operator==(Vector3::Arg a, Vector3::Arg b);
friend bool operator!=(Vector3::Arg a, Vector3::Arg b);
union {
struct {
float x, y, z;
};
float component[3];
};
};
class NVMATH_CLASS Vector4
{
public:
typedef Vector4 const & Arg;
Vector4();
explicit Vector4(float x);
Vector4(float x, float y, float z, float w);
Vector4(Vector2::Arg v, float z, float w);
Vector4(Vector2::Arg v, Vector2::Arg u);
Vector4(Vector3::Arg v, float w);
Vector4(Vector4::Arg v);
// Vector4(const Quaternion & v);
//template <typename T> explicit Vector4(const T & v) : x(v.x), y(v.y), z(v.z), w(v.w) {}
//template <typename T> operator T() const { return T(x, y, z, w); }
const Vector4 & operator=(Vector4::Arg v);
Vector2 xy() const;
Vector2 zw() const;
Vector3 xyz() const;
const float * ptr() const;
void set(float x, float y, float z, float w);
Vector4 operator-() const;
void operator+=(Vector4::Arg v);
void operator-=(Vector4::Arg v);
void operator*=(float s);
void operator/=(float s);
void operator*=(Vector4::Arg v);
void operator/=(Vector4::Arg v);
friend bool operator==(Vector4::Arg a, Vector4::Arg b);
friend bool operator!=(Vector4::Arg a, Vector4::Arg b);
union {
struct {
float x, y, z, w;
};
float component[4];
};
};
} // nv namespace
// If we had these functions, they would be ambiguous, the compiler would not know which one to pick:
//template <typename T> Vector2 to(const T & v) { return Vector2(v.x, v.y); }
//template <typename T> Vector3 to(const T & v) { return Vector3(v.x, v.y, v.z); }
//template <typename T> Vector4 to(const T & v) { return Vector4(v.x, v.y, v.z, v.z); }
// We could use a cast operator so that we could infer the expected type, but that doesn't work the same way in all compilers and produces horrible error messages.
// Instead we simply have explicit casts:
template <typename T> T to(const nv::Vector2 & v) { NV_COMPILER_CHECK(sizeof(T) == sizeof(nv::Vector2)); return T(v.x, v.y); }
template <typename T> T to(const nv::Vector3 & v) { NV_COMPILER_CHECK(sizeof(T) == sizeof(nv::Vector3)); return T(v.x, v.y, v.z); }
template <typename T> T to(const nv::Vector4 & v) { NV_COMPILER_CHECK(sizeof(T) == sizeof(nv::Vector4)); return T(v.x, v.y, v.z, v.w); }
#endif // NV_MATH_VECTOR_H

95
3rdparty/nvtt/nvtt.cpp vendored Normal file
View file

@ -0,0 +1,95 @@
/*
* Copyright 2011-2015 Branimir Karadzic. All rights reserved.
* License: http://www.opensource.org/licenses/BSD-2-Clause
*/
#include "nvtt.h"
#include <string.h>
#include <bx/uint32_t.h>
#include "bc6h/zoh.h"
#include "bc7/avpcl.h"
#include "nvmath/vector.inl"
NVCORE_API int nvAbort(const char *, const char *, int , const char *, const char *, ...) __attribute__((format (printf, 5, 6)))
{
abort();
return 0;
}
namespace nvtt
{
using namespace nv;
void compressBC6H(const void* _input, uint32_t _width, uint32_t _height, uint32_t _stride, void* _output)
{
const uint8_t* src = (const uint8_t*)_input;
char* dst = (char*)_output;
for (uint32_t yy = 0; yy < _height; yy += 4)
{
for (uint32_t xx = 0; xx < _width; xx += 4)
{
const Vector4* rgba = (const Vector4*)&src[yy*_stride + xx*sizeof(float)*4];
ZOH::Utils::FORMAT = ZOH::UNSIGNED_F16;
ZOH::Tile zohTile(4, 4);
memset(zohTile.data, 0, sizeof(zohTile.data) );
memset(zohTile.importance_map, 0, sizeof(zohTile.importance_map) );
for (uint32_t blockY = 0; blockY < 4; ++blockY)
{
for (uint32_t blockX = 0; blockX < 4; ++blockX)
{
Vector4 color = rgba[blockY*4 + blockX];
uint16 rHalf = bx::halfFromFloat(color.x);
uint16 gHalf = bx::halfFromFloat(color.y);
uint16 bHalf = bx::halfFromFloat(color.z);
zohTile.data[blockY][blockX].x = ZOH::Tile::half2float(rHalf);
zohTile.data[blockY][blockX].y = ZOH::Tile::half2float(gHalf);
zohTile.data[blockY][blockX].z = ZOH::Tile::half2float(bHalf);
zohTile.importance_map[blockY][blockX] = 1.0f;
}
}
ZOH::compress(zohTile, &dst[( (yy*_width) + xx)/4 * 16]);
}
}
}
void compressBC7(const void* _input, uint32_t _width, uint32_t _height, uint32_t _stride, void* _output)
{
const uint8_t* src = (const uint8_t*)_input;
char* dst = (char*)_output;
for (uint32_t yy = 0; yy < _height; yy += 4)
{
for (uint32_t xx = 0; xx < _width; xx += 4)
{
const Vector4* rgba = (const Vector4*)&src[yy*_stride + xx*sizeof(float)*4];
AVPCL::mode_rgb = false;
AVPCL::flag_premult = false;
AVPCL::flag_nonuniform = false;
AVPCL::flag_nonuniform_ati = false;
AVPCL::Tile avpclTile(4, 4);
memset(avpclTile.data, 0, sizeof(avpclTile.data) );
for (uint32_t blockY = 0; blockY < 4; ++blockY)
{
for (uint32_t blockX = 0; blockX < 4; ++blockX)
{
Vector4 color = rgba[blockY*4 + blockX];
avpclTile.data[blockY][blockX] = color * 255.0f;
avpclTile.importance_map[blockY][blockX] = 1.0f;
}
}
AVPCL::compress(avpclTile, &dst[( (yy*_width) + xx)/4 * 16]);
}
}
}
} //namespace nvtt

13
3rdparty/nvtt/nvtt.h vendored Normal file
View file

@ -0,0 +1,13 @@
#ifndef NVTT_H
#define NVTT_H
#include <stdint.h>
namespace nvtt
{
void compressBC6H(const void* _input, uint32_t _width, uint32_t _height, uint32_t _stride, void* _output);
void compressBC7(const void* _input, uint32_t _width, uint32_t _height, uint32_t _stride, void* _output);
} // namespace nvtt
#endif // NVTT_H

View file

@ -12,6 +12,7 @@ project "texturec"
path.join(BGFX_DIR, "include"),
path.join(BGFX_DIR, "src"),
path.join(BGFX_DIR, "3rdparty"),
path.join(BGFX_DIR, "3rdparty/nvtt"),
}
files {
@ -20,6 +21,8 @@ project "texturec"
path.join(BGFX_DIR, "3rdparty/libsquish/**.h"),
path.join(BGFX_DIR, "3rdparty/etc1/**.cpp"),
path.join(BGFX_DIR, "3rdparty/etc1/**.h"),
path.join(BGFX_DIR, "3rdparty/nvtt/**.cpp"),
path.join(BGFX_DIR, "3rdparty/nvtt/**.h"),
path.join(BGFX_DIR, "tools/texturec/**.cpp"),
path.join(BGFX_DIR, "tools/texturec/**.h"),
}

View file

@ -13,6 +13,7 @@
#include "image.h"
#include <libsquish/squish.h>
#include <etc1/etc1.h>
#include <nvtt/nvtt.h>
#if 0
# define BX_TRACE(_format, ...) fprintf(stderr, "" _format "\n", ##__VA_ARGS__)
@ -113,6 +114,14 @@ int main(int _argc, const char* _argv[])
{
format = TextureFormat::ETC1;
}
else if (0 == bx::stricmp(type, "bc6h") )
{
format = TextureFormat::BC6H;
}
else if (0 == bx::stricmp(type, "bc7") )
{
format = TextureFormat::BC7;
}
}
uint32_t size = (uint32_t)bx::getSize(&reader);
@ -154,10 +163,33 @@ int main(int _argc, const char* _argv[])
);
break;
case TextureFormat::BC4:
case TextureFormat::BC5:
break;
case TextureFormat::BC6H:
nvtt::compressBC6H(rgba, mip.m_width, mip.m_height, 4, output);
break;
case TextureFormat::BC7:
nvtt::compressBC7(rgba, mip.m_width, mip.m_height, 4, output);
break;
case TextureFormat::ETC1:
etc1_encode_image(rgba, mip.m_width, mip.m_height, 4, mip.m_width*4, output);
break;
case TextureFormat::ETC2:
case TextureFormat::ETC2A:
case TextureFormat::ETC2A1:
case TextureFormat::PTC12:
case TextureFormat::PTC14:
case TextureFormat::PTC12A:
case TextureFormat::PTC14A:
case TextureFormat::PTC22:
case TextureFormat::PTC24:
break;
default:
break;
}