/* Copyright 2007 nVidia, Inc. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ // Thanks to Jacob Munkberg (jacob@cs.lth.se) for the shortcut of using SVD to do the equivalent of principal components analysis // x10 (666x2).1 (666x2).1 64p 3bi #include "bits.h" #include "tile.h" #include "avpcl.h" #include "nvcore/debug.h" #include "nvmath/vector.inl" #include "nvmath/matrix.inl" #include "nvmath/fitting.h" #include "avpcl_utils.h" #include "endpts.h" #include #include #include "shapes_two.h" using namespace nv; using namespace AVPCL; #define NLSBMODES 2 // number of different lsb modes per region. since we have one .1 per region, that can have 2 values #define NINDICES 8 #define INDEXBITS 3 #define HIGH_INDEXBIT (1<<(INDEXBITS-1)) #define DENOM (NINDICES-1) #define BIAS (DENOM/2) // WORK: determine optimal traversal pattern to search for best shape -- what does the error curve look like? // i.e. can we search shapes in a particular order so we can see the global error minima easily and // stop without having to touch all shapes? #define POS_TO_X(pos) ((pos)&3) #define POS_TO_Y(pos) (((pos)>>2)&3) #define NBITSIZES (NREGIONS*2) #define ABITINDEX(region) (2*(region)+0) #define BBITINDEX(region) (2*(region)+1) struct ChanBits { int nbitsizes[NBITSIZES]; // bitsizes for one channel }; struct Pattern { ChanBits chan[NCHANNELS_RGB];// bit patterns used per channel int transformed; // if 0, deltas are unsigned and no transform; otherwise, signed and transformed int mode; // associated mode value int modebits; // number of mode bits const char *encoding; // verilog description of encoding for this mode }; #define NPATTERNS 1 static Pattern patterns[NPATTERNS] = { // red green blue xfm mode mb 6,6,6,6, 6,6,6,6, 6,6,6,6, 0, 0x2, 2, "", }; struct RegionPrec { int endpt_a_prec[NCHANNELS_RGB]; int endpt_b_prec[NCHANNELS_RGB]; }; struct PatternPrec { RegionPrec region_precs[NREGIONS]; }; // this is the precision for each channel and region // NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO nvAssert to check this! static PatternPrec pattern_precs[NPATTERNS] = { 6,6,6, 6,6,6, 6,6,6, 6,6,6, }; // return # of bits needed to store n. handle signed or unsigned cases properly static int nbits(int n, bool issigned) { int nb; if (n==0) return 0; // no bits needed for 0, signed or not else if (n > 0) { for (nb=0; n; ++nb, n>>=1) ; return nb + (issigned?1:0); } else { nvAssert (issigned); for (nb=0; n<-1; ++nb, n>>=1) ; return nb + 1; } } static void transform_forward(IntEndptsRGB_1 ep[NREGIONS]) { nvUnreachable(); } static void transform_inverse(IntEndptsRGB_1 ep[NREGIONS]) { nvUnreachable(); } // endpoints are 777,777; reduce to 666,666 and put the lsb bit majority in compr_bits static void compress_one(const IntEndptsRGB& endpts, IntEndptsRGB_1& compr_endpts) { int onescnt; onescnt = 0; for (int j=0; j> 1; onescnt += endpts.B[j] & 1; compr_endpts.B[j] = endpts.B[j] >> 1; nvAssert (compr_endpts.A[j] < 64); nvAssert (compr_endpts.B[j] < 64); } compr_endpts.lsb = onescnt >= 3; } static void uncompress_one(const IntEndptsRGB_1& compr_endpts, IntEndptsRGB& endpts) { for (int j=0; j= 0 && pat_index < NPATTERNS); nvAssert (in.getptr() == patterns[pat_index].modebits); shapeindex = in.read(SHAPEBITS); p = patterns[pat_index]; for (int j=0; j 0; ++j) { float err = Utils::metric4(colors[i], palette[j]) * importance[i]; if (err > besterr) // error increased, so we're done searching break; if (err < besterr) { besterr = err; indices[i] = j; } } toterr += besterr; // check for early exit if (toterr > current_err) { // fill out bogus index values so it's initialized at least for (int k = i; k < np; ++k) indices[k] = -1; return FLT_MAX; } } return toterr; } // assign indices given a tile, shape, and quantized endpoints, return toterr for each region static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB_1 endpts[NREGIONS], const PatternPrec &pattern_prec, int indices[Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS]) { // build list of possibles Vector4 palette[NREGIONS][NINDICES]; for (int region = 0; region < NREGIONS; ++region) { generate_palette_quantized(endpts[region], pattern_prec.region_precs[region], &palette[region][0]); toterr[region] = 0; } Vector4 err; for (int y = 0; y < tile.size_y; y++) for (int x = 0; x < tile.size_x; x++) { int region = REGION(x,y,shapeindex); float err, besterr = FLT_MAX; for (int i = 0; i < NINDICES && besterr > 0; ++i) { err = Utils::metric4(tile.data[y][x], palette[region][i]); if (err > besterr) // error increased, so we're done searching break; if (err < besterr) { besterr = err; indices[y][x] = i; } } toterr[region] += besterr; } } // note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's // this function returns either old_err or a value smaller (if it was successful in improving the error) static float perturb_one(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec ®ion_prec, const IntEndptsRGB_1 &old_endpts, IntEndptsRGB_1 &new_endpts, float old_err, int do_b, int indices[Tile::TILE_TOTAL]) { // we have the old endpoints: old_endpts // we have the perturbed endpoints: new_endpts // we have the temporary endpoints: temp_endpts IntEndptsRGB_1 temp_endpts; float min_err = old_err; // start with the best current error int beststep; int temp_indices[Tile::TILE_TOTAL]; for (int i=0; i>= 1) { bool improved = false; for (int sign = -1; sign <= 1; sign += 2) { if (do_b == 0) { temp_endpts.A[ch] = new_endpts.A[ch] + sign * step; if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec)) continue; } else { temp_endpts.B[ch] = new_endpts.B[ch] + sign * step; if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec)) continue; } float err = map_colors(colors, importance, np, temp_endpts, region_prec, min_err, temp_indices); if (err < min_err) { improved = true; min_err = err; beststep = sign * step; for (int i=0; i 5000 perturb endpoints 50% of precision // if err > 1000 25% // if err > 200 12.5% // if err > 40 6.25% // for np = 16 -- adjust error thresholds as a function of np // always ensure endpoint ordering is preserved (no need to overlap the scan) // if orig_err returned from this is less than its input value, then indices[] will contain valid indices static float exhaustive(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec ®ion_prec, float orig_err, IntEndptsRGB_1 &opt_endpts, int indices[Tile::TILE_TOTAL]) { IntEndptsRGB_1 temp_endpts; float best_err = orig_err; int aprec = region_prec.endpt_a_prec[ch]; int bprec = region_prec.endpt_b_prec[ch]; int good_indices[Tile::TILE_TOTAL]; int temp_indices[Tile::TILE_TOTAL]; for (int i=0; i 5000.0*thr_scale) { adelta = (1 << aprec)/2; bdelta = (1 << bprec)/2; } else if (orig_err > 1000.0*thr_scale) { adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; } else if (orig_err > 200.0*thr_scale) { adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; } else if (orig_err > 40.0*thr_scale) { adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; } adelta = max(adelta, 3); bdelta = max(bdelta, 3); #ifdef DISABLE_EXHAUSTIVE adelta = bdelta = 3; #endif temp_endpts = opt_endpts; // ok figure out the range of A and B int alow = max(0, opt_endpts.A[ch] - adelta); int ahigh = min((1<= initial_error) break rgb0 += delta0 next = 1 else if (err1 >= initial_error) break rgb1 += delta1 next = 0 initial_err = map() for (;;) err = perturb(next ? rgb1:rgb0, delta) if (err >= initial_err) break next? rgb1 : rgb0 += delta initial_err = err */ IntEndptsRGB_1 new_a, new_b; IntEndptsRGB_1 new_endpt; int do_b; int orig_indices[Tile::TILE_TOTAL]; int new_indices[Tile::TILE_TOTAL]; int temp_indices0[Tile::TILE_TOTAL]; int temp_indices1[Tile::TILE_TOTAL]; // now optimize each channel separately // for the first error improvement, we save the indices. then, for any later improvement, we compare the indices // if they differ, we restart the loop (which then falls back to looking for a first improvement.) for (int ch = 0; ch < NCHANNELS_RGB; ++ch) { // figure out which endpoint when perturbed gives the most improvement and start there // if we just alternate, we can easily end up in a local minima float err0 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0); // perturb endpt A float err1 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1); // perturb endpt B if (err0 < err1) { if (err0 >= opt_err) continue; for (int i=0; i= opt_err) continue; for (int i=0; i= opt_err) break; for (int i=0; i 255.0f) v.x = 255.0f; if (v.y < 0.0f) v.y = 0.0f; if (v.y > 255.0f) v.y = 255.0f; if (v.z < 0.0f) v.z = 0.0f; if (v.z > 255.0f) v.z = 255.0f; v.w = 255.0f; } static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS], Vector4 palette[NREGIONS][NINDICES]) { for (int region = 0; region < NREGIONS; ++region) for (int i = 0; i < NINDICES; ++i) palette[region][i] = Utils::lerp(endpts[region].A, endpts[region].B, i, 0, DENOM); } // generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined static float map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS]) { // build list of possibles Vector4 palette[NREGIONS][NINDICES]; generate_palette_unquantized(endpts, palette); float toterr = 0; Vector4 err; for (int y = 0; y < tile.size_y; y++) for (int x = 0; x < tile.size_x; x++) { int region = REGION(x,y,shapeindex); float besterr = FLT_MAX; for (int i = 0; i < NINDICES && besterr > 0; ++i) { float err = Utils::metric4(tile.data[y][x], palette[region][i]) * tile.importance_map[y][x]; if (err > besterr) // error increased, so we're done searching. this works for most norms. break; if (err < besterr) besterr = err; } toterr += besterr; } return toterr; } static float rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS]) { for (int region=0; region maxp) maxp = dp; } // choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values endpts[region].A = mean + minp*Vector4(direction, 0); endpts[region].B = mean + maxp*Vector4(direction, 0); // clamp endpoints // the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best // shape based on endpoints being clamped clamp(endpts[region].A); clamp(endpts[region].B); } return map_colors(tile, shapeindex, endpts); } static void swap(float *list1, int *list2, int i, int j) { float t = list1[i]; list1[i] = list1[j]; list1[j] = t; int t1 = list2[i]; list2[i] = list2[j]; list2[j] = t1; } float AVPCL::compress_mode1(const Tile &t, char *block) { // number of rough cases to look at. reasonable values of this are 1, NSHAPES/4, and NSHAPES // NSHAPES/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out const int NITEMS=NSHAPES/4; // pick the best NITEMS shapes and refine these. struct { FltEndpts endpts[NREGIONS]; } all[NSHAPES]; float roughmse[NSHAPES]; int index[NSHAPES]; char tempblock[AVPCL::BLOCKSIZE]; float msebest = FLT_MAX; for (int i=0; i roughmse[j]) swap(roughmse, index, i, j); for (int i=0; i0; ++i) { int shape = index[i]; float mse = refine(t, shape, &all[shape].endpts[0], tempblock); if (mse < msebest) { memcpy(block, tempblock, sizeof(tempblock)); msebest = mse; } } return msebest; }