Updated ib-compress.

This commit is contained in:
Branimir Karadžić 2014-09-30 21:13:35 -07:00
parent bb9a35e809
commit 3a67a73b3c
8 changed files with 1016 additions and 74 deletions

View file

@ -1,4 +1,4 @@
# Vertex Cache Optimised Index Buffer Compression # Vertex Cache Optimised Index Buffer Compression
This is a small proof of concept for compressing and decompressing index buffer triangle lists. It's designed to maintain the order of the triangle list and perform best with a triangle list that has been vertex cache post-transform optimised (a pre-transform cache optimisation is done as part of the compression). This is a small proof of concept for compressing and decompressing index buffer triangle lists. It's designed to maintain the order of the triangle list and perform best with a triangle list that has been vertex cache post-transform optimised (a pre-transform cache optimisation is done as part of the compression).
@ -8,7 +8,7 @@ There are some cases where the vertices within a triangle are re-ordered, but th
## How does it work? ## How does it work?
The inspiration was a mix of Fabian Giesen's Simple loss-less index buffer compression http://fgiesen.wordpress.com/2013/12/14/simple-lossless-index-buffer-compression/ and The inspiration was a mix of Fabian Giesen's [Simple loss-less index buffer compression](http://fgiesen.wordpress.com/2013/12/14/simple-lossless-index-buffer-compression/) and
the higher compression algorithms that make use of shared edges and re-order triangles. The idea was that there is probably a middle ground between them. the higher compression algorithms that make use of shared edges and re-order triangles. The idea was that there is probably a middle ground between them.
The basic goals were: The basic goals were:
@ -46,4 +46,9 @@ That's a better question! While my thoughts were that in theory it would average
Performance wise, with the code posted here, the Armadillo compresses in 18.5 milliseconds and decompresses in 6.6 milliseconds on average on my system. The Stanford bunny is more like 1.4 milliseconds to decompress, relatively. Performance wise, with the code posted here, the Armadillo compresses in 18.5 milliseconds and decompresses in 6.6 milliseconds on average on my system. The Stanford bunny is more like 1.4 milliseconds to decompress, relatively.
https://conorstokes.github.io/graphics/2014/09/28/vertex-cache-optimised-index-buffer-compression/ ## Update!
I've added a second more efficient (in terms of both speed and size) compression algorithm (CompressIndexBuffer2 and DecompressIndexBuffer2), as well as some changes upstream from Branimir Karadžić, who made some compiler compatibility fixes and added 16bit indice support. This uses a code per triangle instead of multiple codes for different cases.
For details of the original algorithm, please see this [blog post](http://conorstokes.github.io/graphics/2014/09/28/vertex-cache-optimised-index-buffer-compression/). For details of the second algorithm, please see this [blog post](http://conorstokes.github.io/graphics/2014/09/28/vertex-cache-optimised-index-buffer-compression/).

View file

@ -33,8 +33,517 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define IBC_INLINE __attribute__((always_inline)) #define IBC_INLINE __attribute__((always_inline))
#endif #endif
// Individual vertex type classifications.
enum VertexClassification
{
NEW_VERTEX = 0,
CACHED_VERTEX = 1,
FREE_VERTEX = 2
};
// Individual case for handling a combination of vertice classifications.
struct VertexCompressionCase
{
IndexBufferTriangleCodes code;
uint32_t vertexOrder[ 3 ];
};
// This is a table for looking up the appropriate code and rotation for a set of vertex classifications.
const VertexCompressionCase CompressionCase[3][3][3] =
{
{ // new
{ // new new
{ // new new new
IB_NEW_NEW_NEW, { 0, 1, 2 }
},
{ // new new cached
IB_NEW_NEW_CACHED, { 0, 1, 2 }
},
{ // new new free
IB_NEW_NEW_FREE, { 0, 1, 2 }
}
},
{ // new cached
{ // new cached new
IB_NEW_NEW_CACHED, { 2, 0, 1 }
},
{ // new cached cached
IB_NEW_CACHED_CACHED, { 0, 1, 2 }
},
{ // new cached free
IB_NEW_CACHED_FREE, { 0, 1, 2 }
}
},
{ // new free
{ // new free new
IB_NEW_NEW_FREE, { 2, 0, 1 }
},
{ // new free cached
IB_NEW_FREE_CACHED, { 0, 1, 2 }
},
{ // new free free
IB_NEW_FREE_FREE, { 0, 1, 2 }
}
}
},
{ // cached
{ // cached new
{ // cached new new
IB_NEW_NEW_CACHED, { 1, 2, 0 }
},
{ // cached new cached
IB_NEW_CACHED_CACHED, { 1, 2, 0 }
},
{ // cached new free
IB_NEW_FREE_CACHED, { 1, 2, 0 }
}
},
{ // cached cached
{ // cached cached new
IB_NEW_CACHED_CACHED, { 2, 0, 1 }
},
{ // cached cached cached
IB_CACHED_CACHED_CACHED, { 0, 1, 2 }
},
{ // cached cached free
IB_CACHED_CACHED_FREE, { 0, 1, 2 }
}
},
{ // cached free
{ // cached free new
IB_NEW_CACHED_FREE, { 2, 0, 1 }
},
{ // cached free cached
IB_CACHED_CACHED_FREE, { 2, 0, 1 }
},
{ // cached free free
IB_CACHED_FREE_FREE, { 0, 1, 2 }
}
}
},
{ // free
{ // free new
{ // free new new
IB_NEW_NEW_FREE, { 1, 2, 0 }
},
{ // free new cached
IB_NEW_CACHED_FREE, { 1, 2, 0 }
},
{ // free new free
IB_NEW_FREE_FREE, { 1, 2, 0 }
}
},
{ // free cached
{ // free cached new
IB_NEW_FREE_CACHED, { 2, 0, 1 }
},
{ // free cached cached
IB_CACHED_CACHED_FREE, { 1, 2, 0 }
},
{ // free cached free
IB_CACHED_FREE_FREE, { 1, 2, 0 }
}
},
{ // free free
{ // free free new
IB_NEW_FREE_FREE, { 2, 0, 1 }
},
{ // free free cached
IB_CACHED_FREE_FREE, { 2, 0, 1 }
},
{ // free free free
IB_FREE_FREE_FREE, { 0, 1, 2 }
}
}
}
};
const uint32_t VERTEX_NOT_MAPPED = 0xFFFFFFFF; const uint32_t VERTEX_NOT_MAPPED = 0xFFFFFFFF;
// Classify a vertex as new, cached or free, outputting the relative position in the vertex indice cache FIFO.
static IBC_INLINE VertexClassification ClassifyVertex( uint32_t vertex, const uint32_t* vertexRemap, const uint32_t* vertexFifo, uint32_t verticesRead, uint32_t& cachedVertexIndex )
{
if ( vertexRemap[ vertex ] == VERTEX_NOT_MAPPED )
{
return NEW_VERTEX;
}
else
{
int32_t lowestVertexCursor = verticesRead >= VERTEX_FIFO_SIZE ? verticesRead - VERTEX_FIFO_SIZE : 0;
// Probe backwards in the vertex FIFO for a cached vertex
for ( int32_t vertexCursor = verticesRead - 1; vertexCursor >= lowestVertexCursor; --vertexCursor )
{
if ( vertexFifo[ vertexCursor & VERTEX_FIFO_MASK ] == vertex )
{
cachedVertexIndex = ( verticesRead - 1 ) - vertexCursor;
return CACHED_VERTEX;
}
}
return FREE_VERTEX;
}
}
template <typename Ty>
void CompressIndexBuffer2( const Ty* triangles,
uint32_t triangleCount,
uint32_t* vertexRemap,
uint32_t vertexCount,
WriteBitstream& output )
{
Edge edgeFifo[ EDGE_FIFO_SIZE ];
uint32_t vertexFifo[ VERTEX_FIFO_SIZE ];
uint32_t edgesRead = 0;
uint32_t verticesRead = 0;
uint32_t newVertices = 0;
const Ty* triangleEnd = triangles + ( triangleCount * 3 );
assert( vertexCount < 0xFFFFFFFF );
uint32_t* vertexRemapEnd = vertexRemap + vertexCount;
// clear the vertex remapping to "not found" value of 0xFFFFFFFF - dirty, but low overhead.
for ( uint32_t* remappedVertex = vertexRemap; remappedVertex < vertexRemapEnd; ++remappedVertex )
{
*remappedVertex = VERTEX_NOT_MAPPED;
}
// iterate through the triangles
for ( const Ty* triangle = triangles; triangle < triangleEnd; triangle += 3 )
{
int32_t lowestEdgeCursor = edgesRead >= EDGE_FIFO_SIZE ? edgesRead - EDGE_FIFO_SIZE : 0;
int32_t edgeCursor = edgesRead - 1;
bool foundEdge = false;
int32_t spareVertex = 0;
// check to make sure that there are no degenerate triangles.
assert( triangle[ 0 ] != triangle[ 1 ] && triangle[ 1 ] != triangle[ 2 ] && triangle[ 2 ] != triangle[ 0 ] );
// Probe back through the edge fifo to see if one of the triangle edges is in the FIFO
for ( ; edgeCursor >= lowestEdgeCursor; --edgeCursor )
{
const Edge& edge = edgeFifo[ edgeCursor & EDGE_FIFO_MASK ];
// check all the edges in order and save the free vertex.
if ( edge.second == triangle[ 0 ] && edge.first == triangle[ 1 ] )
{
foundEdge = true;
spareVertex = 2;
break;
}
else if ( edge.second == triangle[ 1 ] && edge.first == triangle[ 2 ] )
{
foundEdge = true;
spareVertex = 0;
break;
}
else if ( edge.second == triangle[ 2 ] && edge.first == triangle[ 0 ] )
{
foundEdge = true;
spareVertex = 1;
break;
}
}
// we found an edge so write it out, so classify a vertex and then write out the correct code.
if ( foundEdge )
{
uint32_t cachedVertex;
uint32_t spareVertexIndice = triangle[ spareVertex ];
VertexClassification freeVertexClass = ClassifyVertex( spareVertexIndice, vertexRemap, vertexFifo, verticesRead, cachedVertex );
uint32_t relativeEdge = ( edgesRead - 1 ) - edgeCursor;
switch ( freeVertexClass )
{
case NEW_VERTEX:
switch ( relativeEdge )
{
case 0:
output.Write( IB_EDGE_0_NEW, IB_TRIANGLE_CODE_BITS );
break;
case 1:
output.Write( IB_EDGE_1_NEW, IB_TRIANGLE_CODE_BITS );
break;
default:
output.Write( IB_EDGE_NEW, IB_TRIANGLE_CODE_BITS );
output.Write( relativeEdge, CACHED_EDGE_BITS );
break;
}
vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] = spareVertexIndice;
vertexRemap[ spareVertexIndice ] = newVertices;
++verticesRead;
++newVertices;
break;
case CACHED_VERTEX:
output.Write( IB_EDGE_CACHED, IB_TRIANGLE_CODE_BITS );
output.Write( relativeEdge, CACHED_EDGE_BITS );
output.Write( cachedVertex, CACHED_VERTEX_BITS );
break;
case FREE_VERTEX:
output.Write( IB_EDGE_FREE, IB_TRIANGLE_CODE_BITS );
output.Write( relativeEdge, CACHED_EDGE_BITS );
vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] = spareVertexIndice;
++verticesRead;
output.WriteVInt( ( newVertices - 1 ) - vertexRemap[ spareVertexIndice ] );
break;
}
// Populate the edge fifo with the the remaining edges
// Note - the winding order is important as we'll need to re-produce this on decompression.
// The edges are put in as if the found edge is the first edge in the triangle (which it will be when we
// reconstruct).
switch ( spareVertex )
{
case 0:
edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 2 ], triangle[ 0 ] );
++edgesRead;
edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 0 ], triangle[ 1 ] );
++edgesRead;
break;
case 1:
edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 0 ], triangle[ 1 ] );
++edgesRead;
edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 1 ], triangle[ 2 ] );
++edgesRead;
break;
case 2:
edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 1 ], triangle[ 2 ] );
++edgesRead;
edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 2 ], triangle[ 0 ] );
++edgesRead;
break;
}
}
else
{
VertexClassification classifications[ 3 ];
uint32_t cachedVertexIndices[ 3 ];
// classify each vertex as new, cached or free, potentially extracting a cached indice.
classifications[ 0 ] = ClassifyVertex( triangle[ 0 ], vertexRemap, vertexFifo, verticesRead, cachedVertexIndices[ 0 ] );
classifications[ 1 ] = ClassifyVertex( triangle[ 1 ], vertexRemap, vertexFifo, verticesRead, cachedVertexIndices[ 1 ] );
classifications[ 2 ] = ClassifyVertex( triangle[ 2 ], vertexRemap, vertexFifo, verticesRead, cachedVertexIndices[ 2 ] );
// use the classifications to lookup the matching compression code and potentially rotate the order of the vertices.
const VertexCompressionCase& compressionCase = CompressionCase[ classifications[ 0 ] ][ classifications[ 1 ] ][ classifications[ 2 ] ];
// rotate the order of the vertices based on the compression classification.
uint32_t reorderedTriangle[ 3 ];
reorderedTriangle[ 0 ] = triangle[ compressionCase.vertexOrder[ 0 ] ];
reorderedTriangle[ 1 ] = triangle[ compressionCase.vertexOrder[ 1 ] ];
reorderedTriangle[ 2 ] = triangle[ compressionCase.vertexOrder[ 2 ] ];
output.Write( compressionCase.code, IB_TRIANGLE_CODE_BITS );
switch ( compressionCase.code )
{
case IB_NEW_NEW_NEW:
{
vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] = triangle[ 0 ];
vertexFifo[ ( verticesRead + 1 ) & VERTEX_FIFO_MASK ] = triangle[ 1 ];
vertexFifo[ ( verticesRead + 2 ) & VERTEX_FIFO_MASK ] = triangle[ 2 ];
vertexRemap[ triangle[ 0 ] ] = newVertices;
vertexRemap[ triangle[ 1 ] ] = newVertices + 1;
vertexRemap[ triangle[ 2 ] ] = newVertices + 2;
verticesRead += 3;
newVertices += 3;
break;
}
case IB_NEW_NEW_CACHED:
{
vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] = reorderedTriangle[ 0 ];
vertexFifo[ ( verticesRead + 1 ) & VERTEX_FIFO_MASK ] = reorderedTriangle[ 1 ];
output.Write( cachedVertexIndices[ compressionCase.vertexOrder[ 2 ] ], CACHED_VERTEX_BITS );
vertexRemap[ reorderedTriangle[ 0 ] ] = newVertices;
vertexRemap[ reorderedTriangle[ 1 ] ] = newVertices + 1;
verticesRead += 2;
newVertices += 2;
break;
}
case IB_NEW_NEW_FREE:
{
vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] = reorderedTriangle[ 0 ];
vertexFifo[ ( verticesRead + 1 ) & VERTEX_FIFO_MASK ] = reorderedTriangle[ 1 ];
vertexFifo[ ( verticesRead + 2 ) & VERTEX_FIFO_MASK ] = reorderedTriangle[ 2 ];
output.WriteVInt( ( newVertices - 1 ) - vertexRemap[ reorderedTriangle[ 2 ] ] );
vertexRemap[ reorderedTriangle[ 0 ] ] = newVertices;
vertexRemap[ reorderedTriangle[ 1 ] ] = newVertices + 1;
verticesRead += 3;
newVertices += 2;
break;
}
case IB_NEW_CACHED_CACHED:
{
vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] = reorderedTriangle[ 0 ];
output.Write( cachedVertexIndices[ compressionCase.vertexOrder[ 1 ] ], CACHED_VERTEX_BITS );
output.Write( cachedVertexIndices[ compressionCase.vertexOrder[ 2 ] ], CACHED_VERTEX_BITS );
vertexRemap[ reorderedTriangle[ 0 ] ] = newVertices;
verticesRead += 1;
newVertices += 1;
break;
}
case IB_NEW_CACHED_FREE:
{
vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] = reorderedTriangle[ 0 ];
vertexFifo[ ( verticesRead + 1 ) & VERTEX_FIFO_MASK ] = reorderedTriangle[ 2 ];
output.Write( cachedVertexIndices[ compressionCase.vertexOrder[ 1 ] ], CACHED_VERTEX_BITS );
output.WriteVInt( ( newVertices - 1 ) - vertexRemap[ reorderedTriangle[ 2 ] ] );
vertexRemap[ reorderedTriangle[ 0 ] ] = newVertices;
verticesRead += 2;
newVertices += 1;
break;
}
case IB_NEW_FREE_CACHED:
{
vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] = reorderedTriangle[ 0 ];
vertexFifo[ ( verticesRead + 1 ) & VERTEX_FIFO_MASK ] = reorderedTriangle[ 1 ];
output.WriteVInt( ( newVertices - 1 ) - vertexRemap[ reorderedTriangle[ 1 ] ] );
output.Write( cachedVertexIndices[ compressionCase.vertexOrder[ 2 ] ], CACHED_VERTEX_BITS );
vertexRemap[ reorderedTriangle[ 0 ] ] = newVertices;
verticesRead += 2;
newVertices += 1;
break;
}
case IB_NEW_FREE_FREE:
{
vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] = reorderedTriangle[ 0 ];
vertexFifo[ ( verticesRead + 1 ) & VERTEX_FIFO_MASK ] = reorderedTriangle[ 1 ];
vertexFifo[ ( verticesRead + 2 ) & VERTEX_FIFO_MASK ] = reorderedTriangle[ 2 ];
output.WriteVInt( ( newVertices - 1 ) - vertexRemap[ reorderedTriangle[ 1 ] ] );
output.WriteVInt( ( newVertices - 1 ) - vertexRemap[ reorderedTriangle[ 2 ] ] );
vertexRemap[ reorderedTriangle[ 0 ] ] = newVertices;
verticesRead += 3;
newVertices += 1;
break;
}
case IB_CACHED_CACHED_CACHED:
{
output.Write( cachedVertexIndices[ compressionCase.vertexOrder[ 0 ] ], CACHED_VERTEX_BITS );
output.Write( cachedVertexIndices[ compressionCase.vertexOrder[ 1 ] ], CACHED_VERTEX_BITS );
output.Write( cachedVertexIndices[ compressionCase.vertexOrder[ 2 ] ], CACHED_VERTEX_BITS );
break;
}
case IB_CACHED_CACHED_FREE:
{
vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] = reorderedTriangle[ 2 ];
output.Write( cachedVertexIndices[ compressionCase.vertexOrder[ 0 ] ], CACHED_VERTEX_BITS );
output.Write( cachedVertexIndices[ compressionCase.vertexOrder[ 1 ] ], CACHED_VERTEX_BITS );
output.WriteVInt( ( newVertices - 1 ) - vertexRemap[ reorderedTriangle[ 2 ] ] );
verticesRead += 1;
break;
}
case IB_CACHED_FREE_FREE:
{
vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] = reorderedTriangle[ 1 ];
vertexFifo[ ( verticesRead + 1 ) & VERTEX_FIFO_MASK ] = reorderedTriangle[ 2 ];
output.Write( cachedVertexIndices[ compressionCase.vertexOrder[ 0 ] ], CACHED_VERTEX_BITS );
output.WriteVInt( ( newVertices - 1 ) - vertexRemap[ reorderedTriangle[ 1 ] ] );
output.WriteVInt( ( newVertices - 1 ) - vertexRemap[ reorderedTriangle[ 2 ] ] );
verticesRead += 2;
break;
}
case IB_FREE_FREE_FREE:
{
vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] = reorderedTriangle[ 0 ];
vertexFifo[ ( verticesRead + 1 ) & VERTEX_FIFO_MASK ] = reorderedTriangle[ 1 ];
vertexFifo[ ( verticesRead + 2 ) & VERTEX_FIFO_MASK ] = reorderedTriangle[ 2 ];
output.WriteVInt( ( newVertices - 1 ) - vertexRemap[ reorderedTriangle[ 0 ] ] );
output.WriteVInt( ( newVertices - 1 ) - vertexRemap[ reorderedTriangle[ 1 ] ] );
output.WriteVInt( ( newVertices - 1 ) - vertexRemap[ reorderedTriangle[ 2 ] ] );
verticesRead += 3;
break;
}
}
// populate the edge fifo with the 3 most recent edges
edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( reorderedTriangle[ 0 ], reorderedTriangle[ 1 ] );
++edgesRead;
edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( reorderedTriangle[ 1 ], reorderedTriangle[ 2 ] );
++edgesRead;
edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( reorderedTriangle[ 2 ], reorderedTriangle[ 0 ] );
++edgesRead;
}
}
}
// Output the compression information for a single vertex, remapping any new vertices and updating the vertex fifo where needed. // Output the compression information for a single vertex, remapping any new vertices and updating the vertex fifo where needed.
static IBC_INLINE void OutputVertex( uint32_t vertex, static IBC_INLINE void OutputVertex( uint32_t vertex,
uint32_t* vertexRemap, uint32_t* vertexRemap,
@ -82,16 +591,7 @@ static IBC_INLINE void OutputVertex( uint32_t vertex,
uint32_t vertexOutput = ( newVertexCount - 1 ) - vertexRemap[ vertex ]; uint32_t vertexOutput = ( newVertexCount - 1 ) - vertexRemap[ vertex ];
// v-int encode the free vertex index. // v-int encode the free vertex index.
do output.WriteVInt( vertexOutput );
{
uint32_t lower7 = vertexOutput & 0x7F;
vertexOutput >>= 7;
output.Write( lower7 | ( vertexOutput > 0 ? 0x80 : 0 ), 8 );
} while ( vertexOutput > 0 );
// free vertices go back into the vertex cache. // free vertices go back into the vertex cache.
vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] = vertex; vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] = vertex;
@ -121,13 +621,13 @@ void CompressIndexBuffer( const Ty* triangles,
uint32_t* vertexRemapEnd = vertexRemap + vertexCount; uint32_t* vertexRemapEnd = vertexRemap + vertexCount;
// clear the vertex remapping to "not found" value of 0xFFFFFFFF - dirty, but low overhead. // clear the vertex remapping to "not found" value of 0xFFFFFFFF - dirty, but low overhead.
for (uint32_t* remappedVertex = vertexRemap; remappedVertex < vertexRemapEnd; ++remappedVertex ) for ( uint32_t* remappedVertex = vertexRemap; remappedVertex < vertexRemapEnd; ++remappedVertex )
{ {
*remappedVertex = VERTEX_NOT_MAPPED; *remappedVertex = VERTEX_NOT_MAPPED;
} }
// iterate through the triangles // iterate through the triangles
for (const Ty* triangle = triangles; triangle < triangleEnd; triangle += 3 ) for ( const Ty* triangle = triangles; triangle < triangleEnd; triangle += 3 )
{ {
int32_t lowestEdgeCursor = edgesRead >= EDGE_FIFO_SIZE ? edgesRead - EDGE_FIFO_SIZE : 0; int32_t lowestEdgeCursor = edgesRead >= EDGE_FIFO_SIZE ? edgesRead - EDGE_FIFO_SIZE : 0;
int32_t edgeCursor = edgesRead - 1; int32_t edgeCursor = edgesRead - 1;
@ -189,33 +689,33 @@ void CompressIndexBuffer( const Ty* triangles,
{ {
case 0: case 0:
edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set(triangle[ 2 ], triangle[ 0 ]); edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 2 ], triangle[ 0 ] );
++edgesRead; ++edgesRead;
edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set(triangle[ 0 ], triangle[ 1 ]); edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 0 ], triangle[ 1 ] );
++edgesRead; ++edgesRead;
break; break;
case 1: case 1:
edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set(triangle[ 0 ], triangle[ 1 ]); edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 0 ], triangle[ 1 ] );
++edgesRead; ++edgesRead;
edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set(triangle[ 1 ], triangle[ 2 ]); edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 1 ], triangle[ 2 ] );
++edgesRead; ++edgesRead;
break; break;
case 2: case 2:
edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set(triangle[ 1 ], triangle[ 2 ]); edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 1 ], triangle[ 2 ] );
++edgesRead; ++edgesRead;
edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set(triangle[ 2 ], triangle[ 0 ]); edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 2 ], triangle[ 0 ] );
++edgesRead; ++edgesRead;
break; break;
@ -229,35 +729,53 @@ void CompressIndexBuffer( const Ty* triangles,
OutputVertex( triangle[ 2 ], vertexRemap, newVertices, vertexFifo, verticesRead, output ); OutputVertex( triangle[ 2 ], vertexRemap, newVertices, vertexFifo, verticesRead, output );
// populate the edge fifo with the 3 most recent edges // populate the edge fifo with the 3 most recent edges
edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set(triangle[ 0 ], triangle[ 1 ]); edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 0 ], triangle[ 1 ] );
++edgesRead; ++edgesRead;
edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set(triangle[ 1 ], triangle[ 2 ]); edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 1 ], triangle[ 2 ] );
++edgesRead; ++edgesRead;
edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set(triangle[ 2 ], triangle[ 0 ]); edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 2 ], triangle[ 0 ] );
++edgesRead; ++edgesRead;
} }
} }
} }
void CompressIndexBuffer ( const uint16_t* triangles, void CompressIndexBuffer( const uint16_t* triangles,
uint32_t triangleCount, uint32_t triangleCount,
uint32_t* vertexRemap, uint32_t* vertexRemap,
uint32_t vertexCount, uint32_t vertexCount,
WriteBitstream& output )
{
CompressIndexBuffer<uint16_t>(triangles, triangleCount, vertexRemap, vertexCount, output);
}
void CompressIndexBuffer ( const uint32_t* triangles,
uint32_t triangleCount,
uint32_t* vertexRemap,
uint32_t vertexCount,
WriteBitstream& output ) WriteBitstream& output )
{ {
CompressIndexBuffer<uint32_t>(triangles, triangleCount, vertexRemap, vertexCount, output); CompressIndexBuffer<uint16_t>( triangles, triangleCount, vertexRemap, vertexCount, output );
} }
void CompressIndexBuffer( const uint32_t* triangles,
uint32_t triangleCount,
uint32_t* vertexRemap,
uint32_t vertexCount,
WriteBitstream& output )
{
CompressIndexBuffer<uint32_t>( triangles, triangleCount, vertexRemap, vertexCount, output );
}
void CompressIndexBuffer2( const uint16_t* triangles,
uint32_t triangleCount,
uint32_t* vertexRemap,
uint32_t vertexCount,
WriteBitstream& output )
{
CompressIndexBuffer2<uint16_t>( triangles, triangleCount, vertexRemap, vertexCount, output );
}
void CompressIndexBuffer2( const uint32_t* triangles,
uint32_t triangleCount,
uint32_t* vertexRemap,
uint32_t vertexCount,
WriteBitstream& output )
{
CompressIndexBuffer2<uint32_t>( triangles, triangleCount, vertexRemap, vertexCount, output );
}

View file

@ -27,10 +27,40 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once #pragma once
#include <stdint.h> #include <stdint.h>
#include "writebitstream.h" #include "WriteBitstream.h"
// Compress an index buffer, writing the results out to a bitstream and providing a vertex remapping (which will be in pre-transform cache optimised // Compress an index buffer, writing the results out to a bitstream and providing a vertex remapping (which will be in pre-transform cache optimised
// order. // order).
//
// This version has slightly worse compression and the matching decompression has worse performance than CompressIndexBuffer2, but it supports degenerate triangles
// (that have duplicate vertex indices). Output should be decompressed with DecompressIndexBuffer. It also changes the order of the vertices in each triangle less.
//
// It works by outputting a code (along with any required index symbols) per vertex.
//
// Parameters:
// [in] triangles - A typical triangle list index buffer (3 indices to vertices per triangle). 16 bit indices.
// [in] triangle count - The number of triangles to process.
// [out] vertexRemap - This will be populated with re-mappings that map old vertices to new vertices,
// where indexing with the old vertex index will get you the new one.
// It should be allocated as a with at least vertexCount entries.
// [in] vertexCount - The number of vertices in the mesh. This should be less than 0xFFFFFFFF/2^32 - 1.
// [in] output - The stream that the compressed data will be written to. Note that we will not flush/finish the stream
// in case something else is going to be written after, so WriteBitstream::Finish will need to be called after this.
void CompressIndexBuffer( const uint16_t* triangles, uint32_t triangleCount, uint32_t* vertexRemap, uint32_t vertexCount, WriteBitstream& output );
// Same as above but 32bit indices.
void CompressIndexBuffer( const uint32_t* triangles, uint32_t triangleCount, uint32_t* vertexRemap, uint32_t vertexCount, WriteBitstream& output );
// Compress an index buffer, writing the results out to a bitstream and providing a vertex remapping (which will be in pre-transform cache optimised
// order).
//
// This version has slightly better compression and the matching decompression has better performance than CompressIndexBuffer, but it does not supports degenerate triangles
// (that have duplicate vertex indices). Output should be decompressed with DecompressIndexBuffer2. It changes the order of the vertices in each triangle more.
//
// This version also has compression optimisations that allow it to handle strip/fan cases a lot better compression wise.
//
// This works by outputting a code per triangle (along with the required index symbols).
// //
// Parameters: // Parameters:
// [in] triangles - A typical triangle list index buffer (3 indices to vertices per triangle). // [in] triangles - A typical triangle list index buffer (3 indices to vertices per triangle).
@ -41,7 +71,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// [in] vertexCount - The number of vertices in the mesh. This should be less than 0xFFFFFFFF/2^32 - 1. // [in] vertexCount - The number of vertices in the mesh. This should be less than 0xFFFFFFFF/2^32 - 1.
// [in] output - The stream that the compressed data will be written to. Note that we will not flush/finish the stream // [in] output - The stream that the compressed data will be written to. Note that we will not flush/finish the stream
// in case something else is going to be written after, so WriteBitstream::Finish will need to be called after this. // in case something else is going to be written after, so WriteBitstream::Finish will need to be called after this.
template <typename Ty> void CompressIndexBuffer2( const uint16_t* triangles, uint32_t triangleCount, uint32_t* vertexRemap, uint32_t vertexCount, WriteBitstream& output );
void CompressIndexBuffer( const Ty* triangles, uint32_t triangleCount, uint32_t* vertexRemap, uint32_t vertexCount, WriteBitstream& output );
// Same as above but 32bit indices
void CompressIndexBuffer2( const uint32_t* triangles, uint32_t triangleCount, uint32_t* vertexRemap, uint32_t vertexCount, WriteBitstream& output );
#endif // -- INDEX_BUFFER_COMPRESSION_H__ #endif // -- INDEX_BUFFER_COMPRESSION_H__

View file

@ -27,6 +27,319 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "IndexCompressionConstants.h" #include "IndexCompressionConstants.h"
#include <assert.h> #include <assert.h>
template <typename Ty>
void DecompressIndexBuffer2( Ty* triangles, uint32_t triangleCount, ReadBitstream& input )
{
Edge edgeFifo[ EDGE_FIFO_SIZE ];
uint32_t vertexFifo[ VERTEX_FIFO_SIZE ];
uint32_t edgesRead = 0;
uint32_t verticesRead = 0;
uint32_t newVertices = 0;
const Ty* triangleEnd = triangles + ( triangleCount * 3 );
// iterate through the triangles
for ( Ty* triangle = triangles; triangle < triangleEnd; triangle += 3 )
{
IndexBufferTriangleCodes code = static_cast< IndexBufferTriangleCodes >( input.Read( IB_TRIANGLE_CODE_BITS ) );
switch ( code )
{
case IB_EDGE_NEW:
{
uint32_t edgeFifoIndex = input.Read( CACHED_EDGE_BITS );
const Edge& edge = edgeFifo[ ( ( edgesRead - 1 ) - edgeFifoIndex ) & EDGE_FIFO_MASK ];
triangle[ 0 ] = static_cast< Ty >( edge.second );
triangle[ 1 ] = static_cast< Ty >( edge.first );
vertexFifo[ verticesRead & EDGE_FIFO_MASK ] =
triangle[ 2 ] = static_cast< Ty >( newVertices );
++newVertices;
++verticesRead;
break;
}
case IB_EDGE_CACHED:
{
uint32_t edgeFifoIndex = input.Read( CACHED_EDGE_BITS );
uint32_t vertexFifoIndex = input.Read( CACHED_VERTEX_BITS );
const Edge& edge = edgeFifo[ ( ( edgesRead - 1 ) - edgeFifoIndex ) & EDGE_FIFO_MASK ];
triangle[ 0 ] = static_cast< Ty >( edge.second );
triangle[ 1 ] = static_cast< Ty >( edge.first );
triangle[ 2 ] = static_cast< Ty >( vertexFifo[ ( ( verticesRead - 1 ) - vertexFifoIndex ) & VERTEX_FIFO_MASK ] );
break;
}
case IB_EDGE_FREE:
{
uint32_t edgeFifoIndex = input.Read( CACHED_EDGE_BITS );
uint32_t relativeVertex = input.ReadVInt();
const Edge& edge = edgeFifo[ ( ( edgesRead - 1 ) - edgeFifoIndex ) & EDGE_FIFO_MASK ];
triangle[ 0 ] = static_cast< Ty >( edge.second );
triangle[ 1 ] = static_cast< Ty >( edge.first );
vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] =
triangle[ 2 ] = static_cast< Ty >( ( newVertices - 1 ) - relativeVertex );
++verticesRead;
break;
}
case IB_NEW_NEW_NEW:
{
vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] =
triangle[ 0 ] = static_cast< Ty >( newVertices );
vertexFifo[ ( verticesRead + 1 ) & VERTEX_FIFO_MASK ] =
triangle[ 1 ] = static_cast< Ty >( newVertices + 1 );
vertexFifo[ ( verticesRead + 2 ) & VERTEX_FIFO_MASK ] =
triangle[ 2 ] = static_cast< Ty >( newVertices + 2 );
newVertices += 3;
verticesRead += 3;
edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 0 ], triangle[ 1 ] );
++edgesRead;
break;
}
case IB_NEW_NEW_CACHED:
{
uint32_t vertexFifoIndex = input.Read( CACHED_VERTEX_BITS );
triangle[ 2 ] = static_cast< Ty >( vertexFifo[ ( ( verticesRead - 1 ) - vertexFifoIndex ) & VERTEX_FIFO_MASK ] );
vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] =
triangle[ 0 ] = static_cast< Ty >( newVertices );
vertexFifo[ ( verticesRead + 1 ) & VERTEX_FIFO_MASK ] =
triangle[ 1 ] = static_cast< Ty >( newVertices + 1 );
verticesRead += 2;
newVertices += 2;
edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 0 ], triangle[ 1 ] );
++edgesRead;
break;
}
case IB_NEW_NEW_FREE:
{
uint32_t relativeVertex = input.ReadVInt();
vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] =
triangle[ 0 ] = static_cast< Ty >( newVertices );
vertexFifo[ ( verticesRead + 1 ) & VERTEX_FIFO_MASK ] =
triangle[ 1 ] = static_cast< Ty >( newVertices + 1 );
vertexFifo[ ( verticesRead + 2 ) & VERTEX_FIFO_MASK ] =
triangle[ 2 ] = static_cast< Ty >( ( newVertices - 1 ) - relativeVertex );
newVertices += 2;
verticesRead += 3;
edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 0 ], triangle[ 1 ] );
++edgesRead;
break;
}
case IB_NEW_CACHED_CACHED:
{
uint32_t vertex1FifoIndex = input.Read( CACHED_VERTEX_BITS );
uint32_t vertex2FifoIndex = input.Read( CACHED_VERTEX_BITS );
triangle[ 1 ] = static_cast< Ty >( vertexFifo[ ( ( verticesRead - 1 ) - vertex1FifoIndex ) & VERTEX_FIFO_MASK ] );
triangle[ 2 ] = static_cast< Ty >( vertexFifo[ ( ( verticesRead - 1 ) - vertex2FifoIndex ) & VERTEX_FIFO_MASK ] );
vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] =
triangle[ 0 ] = static_cast< Ty >( newVertices );
++verticesRead;
++newVertices;
edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 0 ], triangle[ 1 ] );
++edgesRead;
break;
}
case IB_NEW_CACHED_FREE:
{
uint32_t vertexFifoIndex = input.Read( CACHED_VERTEX_BITS );
uint32_t relativeVertex = input.ReadVInt();
triangle[ 1 ] = static_cast< Ty >( vertexFifo[ ( ( verticesRead - 1 ) - vertexFifoIndex ) & VERTEX_FIFO_MASK ] );
vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] =
triangle[ 0 ] = static_cast< Ty >( newVertices );
vertexFifo[ ( verticesRead + 1 ) & VERTEX_FIFO_MASK ] =
triangle[ 2 ] = static_cast< Ty >( ( newVertices - 1 ) - relativeVertex );
verticesRead += 2;
++newVertices;
edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 0 ], triangle[ 1 ] );
++edgesRead;
break;
}
case IB_NEW_FREE_CACHED:
{
uint32_t relativeVertex = input.ReadVInt();
uint32_t vertexFifoIndex = input.Read( CACHED_VERTEX_BITS );
triangle[ 2 ] = static_cast< Ty >( vertexFifo[ ( ( verticesRead - 1 ) - vertexFifoIndex ) & VERTEX_FIFO_MASK ] );
vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] =
triangle[ 0 ] = static_cast< Ty >( newVertices );
vertexFifo[ ( verticesRead + 1 ) & VERTEX_FIFO_MASK ] =
triangle[ 1 ] = static_cast< Ty >( ( newVertices - 1 ) - relativeVertex );
verticesRead += 2;
++newVertices;
edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 0 ], triangle[ 1 ] );
++edgesRead;
break;
}
case IB_NEW_FREE_FREE:
{
uint32_t relativeVertex1 = input.ReadVInt();
uint32_t relativeVertex2 = input.ReadVInt();
vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] =
triangle[ 0 ] = static_cast< Ty >( newVertices );
vertexFifo[ ( verticesRead + 1 ) & VERTEX_FIFO_MASK ] =
triangle[ 1 ] = static_cast< Ty >( ( newVertices - 1 ) - relativeVertex1 );
vertexFifo[ ( verticesRead + 2 ) & VERTEX_FIFO_MASK ] =
triangle[ 2 ] = static_cast< Ty >( ( newVertices - 1 ) - relativeVertex2 );
verticesRead += 3;
++newVertices;
edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 0 ], triangle[ 1 ] );
++edgesRead;
break;
}
case IB_CACHED_CACHED_CACHED:
{
uint32_t vertex0FifoIndex = input.Read( CACHED_VERTEX_BITS );
uint32_t vertex1FifoIndex = input.Read( CACHED_VERTEX_BITS );
uint32_t vertex2FifoIndex = input.Read( CACHED_VERTEX_BITS );
triangle[ 0 ] = static_cast< Ty >( vertexFifo[ ( ( verticesRead - 1 ) - vertex0FifoIndex ) & VERTEX_FIFO_MASK ] );
triangle[ 1 ] = static_cast< Ty >( vertexFifo[ ( ( verticesRead - 1 ) - vertex1FifoIndex ) & VERTEX_FIFO_MASK ] );
triangle[ 2 ] = static_cast< Ty >( vertexFifo[ ( ( verticesRead - 1 ) - vertex2FifoIndex ) & VERTEX_FIFO_MASK ] );
edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 0 ], triangle[ 1 ] );
++edgesRead;
break;
}
case IB_CACHED_CACHED_FREE:
{
uint32_t vertex0FifoIndex = input.Read( CACHED_VERTEX_BITS );
uint32_t vertex1FifoIndex = input.Read( CACHED_VERTEX_BITS );
uint32_t relativeVertex2 = input.ReadVInt();
triangle[ 0 ] = static_cast< Ty >( vertexFifo[ ( ( verticesRead - 1 ) - vertex0FifoIndex ) & VERTEX_FIFO_MASK ] );
triangle[ 1 ] = static_cast< Ty >( vertexFifo[ ( ( verticesRead - 1 ) - vertex1FifoIndex ) & VERTEX_FIFO_MASK ] );
vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] =
triangle[ 2 ] = static_cast< Ty >( ( newVertices - 1 ) - relativeVertex2 );
++verticesRead;
edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 0 ], triangle[ 1 ] );
++edgesRead;
break;
}
case IB_CACHED_FREE_FREE:
{
uint32_t vertex0FifoIndex = input.Read( CACHED_VERTEX_BITS );
uint32_t relativeVertex1 = input.ReadVInt();
uint32_t relativeVertex2 = input.ReadVInt();
triangle[ 0 ] = static_cast< Ty >( vertexFifo[ ( ( verticesRead - 1 ) - vertex0FifoIndex ) & VERTEX_FIFO_MASK ] );
vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] =
triangle[ 1 ] = static_cast< Ty >( ( newVertices - 1 ) - relativeVertex1 );
vertexFifo[ ( verticesRead + 1 ) & VERTEX_FIFO_MASK ] =
triangle[ 2 ] = static_cast< Ty >( ( newVertices - 1 ) - relativeVertex2 );
verticesRead += 2;
edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 0 ], triangle[ 1 ] );
++edgesRead;
break;
}
case IB_FREE_FREE_FREE:
{
uint32_t relativeVertex0 = input.ReadVInt();
uint32_t relativeVertex1 = input.ReadVInt();
uint32_t relativeVertex2 = input.ReadVInt();
vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] =
triangle[ 0 ] = static_cast< Ty >( ( newVertices - 1 ) - relativeVertex0 );
vertexFifo[ ( verticesRead + 1 ) & VERTEX_FIFO_MASK ] =
triangle[ 1 ] = static_cast< Ty >( ( newVertices - 1 ) - relativeVertex1 );
vertexFifo[ ( verticesRead + 2 ) & VERTEX_FIFO_MASK ] =
triangle[ 2 ] = static_cast< Ty >( ( newVertices - 1 ) - relativeVertex2 );
verticesRead += 3;
edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 0 ], triangle[ 1 ] );
++edgesRead;
break;
}
case IB_EDGE_0_NEW:
{
const Edge& edge = edgeFifo[ ( edgesRead - 1 ) & EDGE_FIFO_MASK ];
triangle[ 0 ] = static_cast< Ty >( edge.second );
triangle[ 1 ] = static_cast< Ty >( edge.first );
vertexFifo[ verticesRead & EDGE_FIFO_MASK ] =
triangle[ 2 ] = static_cast< Ty >( newVertices );
++newVertices;
++verticesRead;
break;
}
case IB_EDGE_1_NEW:
{
const Edge& edge = edgeFifo[ ( ( edgesRead - 1 ) - 1 ) & EDGE_FIFO_MASK ];
triangle[ 0 ] = static_cast< Ty >( edge.second );
triangle[ 1 ] = static_cast< Ty >( edge.first );
vertexFifo[ verticesRead & EDGE_FIFO_MASK ] =
triangle[ 2 ] = static_cast< Ty >( newVertices );
++newVertices;
++verticesRead;
break;
}
}
edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 1 ], triangle[ 2 ] );
++edgesRead;
edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 2 ], triangle[ 0 ] );
++edgesRead;
}
}
template <typename Ty> template <typename Ty>
void DecompressIndexBuffer( Ty* triangles, uint32_t triangleCount, ReadBitstream& input ) void DecompressIndexBuffer( Ty* triangles, uint32_t triangleCount, ReadBitstream& input )
{ {
@ -36,7 +349,7 @@ void DecompressIndexBuffer( Ty* triangles, uint32_t triangleCount, ReadBitstream
uint32_t edgesRead = 0; uint32_t edgesRead = 0;
uint32_t verticesRead = 0; uint32_t verticesRead = 0;
uint32_t newVertices = 0; uint32_t newVertices = 0;
const Ty* triangleEnd = triangles + ( triangleCount * 3 ); const Ty* triangleEnd = triangles + ( triangleCount * 3 );
// iterate through the triangles // iterate through the triangles
for ( Ty* triangle = triangles; triangle < triangleEnd; triangle += 3 ) for ( Ty* triangle = triangles; triangle < triangleEnd; triangle += 3 )
@ -52,8 +365,8 @@ void DecompressIndexBuffer( Ty* triangles, uint32_t triangleCount, ReadBitstream
{ {
case IB_NEW_VERTEX: case IB_NEW_VERTEX:
triangle[ readVertex ] = vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] =
vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] = newVertices; triangle[ readVertex ] = static_cast< Ty >( newVertices );
++readVertex; ++readVertex;
++verticesRead; ++verticesRead;
@ -69,8 +382,8 @@ void DecompressIndexBuffer( Ty* triangles, uint32_t triangleCount, ReadBitstream
uint32_t fifoIndex = input.Read( CACHED_EDGE_BITS ); uint32_t fifoIndex = input.Read( CACHED_EDGE_BITS );
const Edge& edge = edgeFifo[ ( ( edgesRead - 1 ) - fifoIndex ) & EDGE_FIFO_MASK ]; const Edge& edge = edgeFifo[ ( ( edgesRead - 1 ) - fifoIndex ) & EDGE_FIFO_MASK ];
triangle[ 0 ] = edge.second; triangle[ 0 ] = static_cast< Ty >( edge.second );
triangle[ 1 ] = edge.first; triangle[ 1 ] = static_cast< Ty >( edge.first );
readVertex += 2; readVertex += 2;
skipFirstEdge = true; skipFirstEdge = true;
@ -81,9 +394,9 @@ void DecompressIndexBuffer( Ty* triangles, uint32_t triangleCount, ReadBitstream
case IB_CACHED_VERTEX: case IB_CACHED_VERTEX:
{ {
uint32_t fifoIndex = input.Read( CACHED_VERTEX_BITS ); uint32_t fifoIndex = input.Read( CACHED_VERTEX_BITS );
triangle[ readVertex ] = vertexFifo[ ( ( verticesRead - 1 ) - fifoIndex ) & VERTEX_FIFO_MASK ]; triangle[ readVertex ] = static_cast< Ty >( vertexFifo[ ( ( verticesRead - 1 ) - fifoIndex ) & VERTEX_FIFO_MASK ] );
++readVertex; ++readVertex;
@ -93,24 +406,12 @@ void DecompressIndexBuffer( Ty* triangles, uint32_t triangleCount, ReadBitstream
case IB_FREE_VERTEX: case IB_FREE_VERTEX:
{ {
uint32_t readByte = 0; uint32_t relativeVertex = input.ReadVInt();
uint32_t bitsToShift = 0;
uint32_t relativeVertex = 0;
// V-int decoding, done inline.
do
{
readByte = input.Read( 8 );
relativeVertex |= ( readByte & 0x7F ) << bitsToShift;
bitsToShift += 7;
} while ( readByte & 0x80 );
uint32_t vertex = ( newVertices - 1 ) - relativeVertex; uint32_t vertex = ( newVertices - 1 ) - relativeVertex;
triangle[ readVertex ] = vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] =
vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] = vertex; triangle[ readVertex ] = static_cast< Ty >( vertex );
++verticesRead; ++verticesRead;
++readVertex; ++readVertex;
@ -121,7 +422,7 @@ void DecompressIndexBuffer( Ty* triangles, uint32_t triangleCount, ReadBitstream
if ( !skipFirstEdge ) if ( !skipFirstEdge )
{ {
edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set(triangle[ 0 ], triangle[ 1 ]); edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 0 ], triangle[ 1 ] );
++edgesRead; ++edgesRead;
} }
@ -136,11 +437,11 @@ void DecompressIndexBuffer( Ty* triangles, uint32_t triangleCount, ReadBitstream
++verticesRead; ++verticesRead;
} }
edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set(triangle[ 1 ], triangle[ 2 ]); edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 1 ], triangle[ 2 ] );
++edgesRead; ++edgesRead;
edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set(triangle[ 2 ], triangle[ 0 ]); edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 2 ], triangle[ 0 ] );
++edgesRead; ++edgesRead;
} }
@ -155,3 +456,13 @@ void DecompressIndexBuffer( uint32_t* triangles, uint32_t triangleCount, ReadBit
{ {
DecompressIndexBuffer<uint32_t>( triangles, triangleCount, input ); DecompressIndexBuffer<uint32_t>( triangles, triangleCount, input );
} }
void DecompressIndexBuffer2( uint16_t* triangles, uint32_t triangleCount, ReadBitstream& input )
{
DecompressIndexBuffer2<uint16_t>( triangles, triangleCount, input );
}
void DecompressIndexBuffer2( uint32_t* triangles, uint32_t triangleCount, ReadBitstream& input )
{
DecompressIndexBuffer2<uint32_t>( triangles, triangleCount, input );
}

View file

@ -27,15 +27,29 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once #pragma once
#include <stdint.h> #include <stdint.h>
#include "readbitstream.h" #include "ReadBitstream.h"
// Compress an index buffer, writing the results out to a bitstream and providing a vertex remapping (which will be in pre-transform cache optimised // Compress an index buffer, writing the results out to a bitstream and providing a vertex remapping (which will be in pre-transform cache optimised
// order. // order.
// Parameters: // Parameters:
// [out] triangles - Triangle list index buffer (3 indices to vertices per triangle), output from the decompression. // [out] triangles - Triangle list index buffer (3 indices to vertices per triangle), output from the decompression - 16bit indices
// [in] triangle count - The number of triangles to decompress. // [in] triangle count - The number of triangles to decompress.
// [in] input - The bit stream that the compressed data will be read from. // [in] input - The bit stream that the compressed data will be read from.
template <typename Ty> void DecompressIndexBuffer( uint16_t* triangles, uint32_t triangleCount, ReadBitstream& input );
void DecompressIndexBuffer( Ty* triangles, uint32_t triangleCount, ReadBitstream& input );
// Same as above but 32 bit indices.
void DecompressIndexBuffer( uint32_t* triangles, uint32_t triangleCount, ReadBitstream& input );
// Compress an index buffer, writing the results out to a bitstream and providing a vertex remapping (which will be in pre-transform cache optimised
// order.
// Parameters:
// [out] triangles - Triangle list index buffer (3 indices to vertices per triangle), output from the decompression - 16bit indices
// [in] triangle count - The number of triangles to decompress.
// [in] input - The bit stream that the compressed data will be read from.
void DecompressIndexBuffer2( uint16_t* triangles, uint32_t triangleCount, ReadBitstream& input );
// Same as above but 32bit indices
void DecompressIndexBuffer2( uint32_t* triangles, uint32_t triangleCount, ReadBitstream& input );
#endif // -- INDEX_BUFFER_DECOMPRESSION_H__ #endif // -- INDEX_BUFFER_DECOMPRESSION_H__

View file

@ -26,6 +26,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define INDEX_COMPRESSION_CONSTANTS_H__ #define INDEX_COMPRESSION_CONSTANTS_H__
#pragma once #pragma once
#include <stdint.h>
// Constant fifo and code sizes. // Constant fifo and code sizes.
const int VERTEX_FIFO_SIZE = 32; const int VERTEX_FIFO_SIZE = 32;
const int VERTEX_FIFO_MASK = VERTEX_FIFO_SIZE - 1; const int VERTEX_FIFO_MASK = VERTEX_FIFO_SIZE - 1;
@ -35,10 +37,12 @@ const int CACHED_EDGE_BITS = 5;
const int CACHED_VERTEX_BITS = 5; const int CACHED_VERTEX_BITS = 5;
const int IB_CODE_BITS = 2; const int IB_CODE_BITS = 2;
const int IB_TRIANGLE_CODE_BITS = 4;
// Edge in the edge fifo. // Edge in the edge fifo.
struct Edge struct Edge
{ {
void set(uint32_t f, uint32_t s) void set( uint32_t f, uint32_t s )
{ {
first = f; first = f;
second = s; second = s;
@ -48,7 +52,7 @@ struct Edge
uint32_t second; uint32_t second;
}; };
// Codes // These are the vertex/edge codes for CompressIndexBuffer
enum IndexBufferCodes enum IndexBufferCodes
{ {
// Represents a yet un-seen vertex. // Represents a yet un-seen vertex.
@ -65,4 +69,25 @@ enum IndexBufferCodes
IB_FREE_VERTEX = 3 IB_FREE_VERTEX = 3
}; };
// These are the triangle codes for CompressIndexBuffer2
enum IndexBufferTriangleCodes
{
IB_EDGE_NEW = 0,
IB_EDGE_CACHED = 1,
IB_EDGE_FREE = 2,
IB_NEW_NEW_NEW = 3,
IB_NEW_NEW_CACHED = 4,
IB_NEW_NEW_FREE = 5,
IB_NEW_CACHED_CACHED = 6,
IB_NEW_CACHED_FREE= 7,
IB_NEW_FREE_CACHED = 8,
IB_NEW_FREE_FREE = 9,
IB_CACHED_CACHED_CACHED = 10,
IB_CACHED_CACHED_FREE = 11,
IB_CACHED_FREE_FREE = 12,
IB_FREE_FREE_FREE = 13,
IB_EDGE_0_NEW = 14,
IB_EDGE_1_NEW = 15
};
#endif #endif

View file

@ -54,6 +54,8 @@ public:
// Get the buffer size of this in bytes // Get the buffer size of this in bytes
size_t Size() const { return m_bufferSize; } size_t Size() const { return m_bufferSize; }
uint32_t ReadVInt();
private: private:
uint64_t m_bitBuffer; uint64_t m_bitBuffer;
@ -122,4 +124,22 @@ RBS_INLINE uint32_t ReadBitstream::Read( uint32_t bitCount )
return result; return result;
} }
RBS_INLINE uint32_t ReadBitstream::ReadVInt()
{
uint32_t bitsToShift = 0;
uint32_t result = 0;
uint32_t readByte;
do
{
readByte = Read( 8 );
result |= ( readByte & 0x7F ) << bitsToShift;
bitsToShift += 7;
} while ( readByte & 0x80 );
return result;
}
#endif // -- READ_BIT_STREAM_H__ #endif // -- READ_BIT_STREAM_H__

View file

@ -62,6 +62,9 @@ public:
// Write a number of bits to the stream. // Write a number of bits to the stream.
void Write( uint32_t value, uint32_t bitCount ); void Write( uint32_t value, uint32_t bitCount );
// Write a V int to the stream.
void WriteVInt( uint32_t value );
// Get the size in bytes // Get the size in bytes
size_t ByteSize() const { return ( m_size + 7 ) >> 3; } size_t ByteSize() const { return ( m_size + 7 ) >> 3; }
@ -123,6 +126,19 @@ WBS_INLINE void WriteBitstream::Write( uint32_t value, uint32_t bitCount )
m_size += bitCount; m_size += bitCount;
} }
WBS_INLINE void WriteBitstream::WriteVInt( uint32_t value )
{
do
{
uint32_t lower7 = value & 0x7F;
value >>= 7;
Write( lower7 | ( value > 0 ? 0x80 : 0 ), 8 );
} while ( value > 0 );
}
inline void WriteBitstream::Finish() inline void WriteBitstream::Finish()
{ {
if ( m_bufferCursor > m_bufferEnd - 8 ) if ( m_bufferCursor > m_bufferEnd - 8 )