diff --git a/3rdparty/ib-compress/README.md b/3rdparty/ib-compress/README.md index a1bac526..ddef5e9c 100644 --- a/3rdparty/ib-compress/README.md +++ b/3rdparty/ib-compress/README.md @@ -1,4 +1,4 @@ -# Vertex Cache Optimised Index Buffer Compression +# Vertex Cache Optimised Index Buffer Compression This is a small proof of concept for compressing and decompressing index buffer triangle lists. It's designed to maintain the order of the triangle list and perform best with a triangle list that has been vertex cache post-transform optimised (a pre-transform cache optimisation is done as part of the compression). @@ -8,7 +8,7 @@ There are some cases where the vertices within a triangle are re-ordered, but th ## How does it work? -The inspiration was a mix of Fabian Giesen's Simple loss-less index buffer compression http://fgiesen.wordpress.com/2013/12/14/simple-lossless-index-buffer-compression/ and +The inspiration was a mix of Fabian Giesen's [Simple loss-less index buffer compression](http://fgiesen.wordpress.com/2013/12/14/simple-lossless-index-buffer-compression/) and the higher compression algorithms that make use of shared edges and re-order triangles. The idea was that there is probably a middle ground between them. The basic goals were: @@ -46,4 +46,9 @@ That's a better question! While my thoughts were that in theory it would average Performance wise, with the code posted here, the Armadillo compresses in 18.5 milliseconds and decompresses in 6.6 milliseconds on average on my system. The Stanford bunny is more like 1.4 milliseconds to decompress, relatively. -https://conorstokes.github.io/graphics/2014/09/28/vertex-cache-optimised-index-buffer-compression/ +## Update! + +I've added a second more efficient (in terms of both speed and size) compression algorithm (CompressIndexBuffer2 and DecompressIndexBuffer2), as well as some changes upstream from Branimir Karadžić, who made some compiler compatibility fixes and added 16bit indice support. This uses a code per triangle instead of multiple codes for different cases. + +For details of the original algorithm, please see this [blog post](http://conorstokes.github.io/graphics/2014/09/28/vertex-cache-optimised-index-buffer-compression/). For details of the second algorithm, please see this [blog post](http://conorstokes.github.io/graphics/2014/09/28/vertex-cache-optimised-index-buffer-compression/). + diff --git a/3rdparty/ib-compress/indexbuffercompression.cpp b/3rdparty/ib-compress/indexbuffercompression.cpp index 5ef44c88..e4fffa7a 100644 --- a/3rdparty/ib-compress/indexbuffercompression.cpp +++ b/3rdparty/ib-compress/indexbuffercompression.cpp @@ -33,8 +33,517 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define IBC_INLINE __attribute__((always_inline)) #endif +// Individual vertex type classifications. +enum VertexClassification +{ + NEW_VERTEX = 0, + CACHED_VERTEX = 1, + FREE_VERTEX = 2 +}; + +// Individual case for handling a combination of vertice classifications. +struct VertexCompressionCase +{ + IndexBufferTriangleCodes code; + uint32_t vertexOrder[ 3 ]; +}; + +// This is a table for looking up the appropriate code and rotation for a set of vertex classifications. +const VertexCompressionCase CompressionCase[3][3][3] = +{ + { // new + { // new new + { // new new new + IB_NEW_NEW_NEW, { 0, 1, 2 } + }, + { // new new cached + IB_NEW_NEW_CACHED, { 0, 1, 2 } + }, + { // new new free + IB_NEW_NEW_FREE, { 0, 1, 2 } + } + }, + { // new cached + { // new cached new + IB_NEW_NEW_CACHED, { 2, 0, 1 } + }, + { // new cached cached + IB_NEW_CACHED_CACHED, { 0, 1, 2 } + }, + { // new cached free + IB_NEW_CACHED_FREE, { 0, 1, 2 } + } + }, + { // new free + { // new free new + IB_NEW_NEW_FREE, { 2, 0, 1 } + }, + { // new free cached + IB_NEW_FREE_CACHED, { 0, 1, 2 } + }, + { // new free free + IB_NEW_FREE_FREE, { 0, 1, 2 } + } + } + }, + { // cached + { // cached new + { // cached new new + IB_NEW_NEW_CACHED, { 1, 2, 0 } + }, + { // cached new cached + IB_NEW_CACHED_CACHED, { 1, 2, 0 } + }, + { // cached new free + IB_NEW_FREE_CACHED, { 1, 2, 0 } + } + }, + { // cached cached + { // cached cached new + IB_NEW_CACHED_CACHED, { 2, 0, 1 } + }, + { // cached cached cached + IB_CACHED_CACHED_CACHED, { 0, 1, 2 } + }, + { // cached cached free + IB_CACHED_CACHED_FREE, { 0, 1, 2 } + } + }, + { // cached free + { // cached free new + IB_NEW_CACHED_FREE, { 2, 0, 1 } + }, + { // cached free cached + IB_CACHED_CACHED_FREE, { 2, 0, 1 } + }, + { // cached free free + IB_CACHED_FREE_FREE, { 0, 1, 2 } + } + } + }, + { // free + { // free new + { // free new new + IB_NEW_NEW_FREE, { 1, 2, 0 } + }, + { // free new cached + IB_NEW_CACHED_FREE, { 1, 2, 0 } + }, + { // free new free + IB_NEW_FREE_FREE, { 1, 2, 0 } + } + }, + { // free cached + { // free cached new + IB_NEW_FREE_CACHED, { 2, 0, 1 } + }, + { // free cached cached + IB_CACHED_CACHED_FREE, { 1, 2, 0 } + }, + { // free cached free + IB_CACHED_FREE_FREE, { 1, 2, 0 } + } + }, + { // free free + { // free free new + IB_NEW_FREE_FREE, { 2, 0, 1 } + }, + { // free free cached + IB_CACHED_FREE_FREE, { 2, 0, 1 } + }, + { // free free free + IB_FREE_FREE_FREE, { 0, 1, 2 } + } + } + } +}; + const uint32_t VERTEX_NOT_MAPPED = 0xFFFFFFFF; +// Classify a vertex as new, cached or free, outputting the relative position in the vertex indice cache FIFO. +static IBC_INLINE VertexClassification ClassifyVertex( uint32_t vertex, const uint32_t* vertexRemap, const uint32_t* vertexFifo, uint32_t verticesRead, uint32_t& cachedVertexIndex ) +{ + if ( vertexRemap[ vertex ] == VERTEX_NOT_MAPPED ) + { + return NEW_VERTEX; + } + else + { + int32_t lowestVertexCursor = verticesRead >= VERTEX_FIFO_SIZE ? verticesRead - VERTEX_FIFO_SIZE : 0; + + // Probe backwards in the vertex FIFO for a cached vertex + for ( int32_t vertexCursor = verticesRead - 1; vertexCursor >= lowestVertexCursor; --vertexCursor ) + { + if ( vertexFifo[ vertexCursor & VERTEX_FIFO_MASK ] == vertex ) + { + cachedVertexIndex = ( verticesRead - 1 ) - vertexCursor; + + return CACHED_VERTEX; + } + } + + return FREE_VERTEX; + } +} + +template +void CompressIndexBuffer2( const Ty* triangles, + uint32_t triangleCount, + uint32_t* vertexRemap, + uint32_t vertexCount, + WriteBitstream& output ) +{ + Edge edgeFifo[ EDGE_FIFO_SIZE ]; + uint32_t vertexFifo[ VERTEX_FIFO_SIZE ]; + + uint32_t edgesRead = 0; + uint32_t verticesRead = 0; + uint32_t newVertices = 0; + const Ty* triangleEnd = triangles + ( triangleCount * 3 ); + + assert( vertexCount < 0xFFFFFFFF ); + + uint32_t* vertexRemapEnd = vertexRemap + vertexCount; + + // clear the vertex remapping to "not found" value of 0xFFFFFFFF - dirty, but low overhead. + for ( uint32_t* remappedVertex = vertexRemap; remappedVertex < vertexRemapEnd; ++remappedVertex ) + { + *remappedVertex = VERTEX_NOT_MAPPED; + } + + // iterate through the triangles + for ( const Ty* triangle = triangles; triangle < triangleEnd; triangle += 3 ) + { + int32_t lowestEdgeCursor = edgesRead >= EDGE_FIFO_SIZE ? edgesRead - EDGE_FIFO_SIZE : 0; + int32_t edgeCursor = edgesRead - 1; + bool foundEdge = false; + + int32_t spareVertex = 0; + + // check to make sure that there are no degenerate triangles. + assert( triangle[ 0 ] != triangle[ 1 ] && triangle[ 1 ] != triangle[ 2 ] && triangle[ 2 ] != triangle[ 0 ] ); + + // Probe back through the edge fifo to see if one of the triangle edges is in the FIFO + for ( ; edgeCursor >= lowestEdgeCursor; --edgeCursor ) + { + const Edge& edge = edgeFifo[ edgeCursor & EDGE_FIFO_MASK ]; + + // check all the edges in order and save the free vertex. + if ( edge.second == triangle[ 0 ] && edge.first == triangle[ 1 ] ) + { + foundEdge = true; + spareVertex = 2; + break; + } + else if ( edge.second == triangle[ 1 ] && edge.first == triangle[ 2 ] ) + { + foundEdge = true; + spareVertex = 0; + break; + } + else if ( edge.second == triangle[ 2 ] && edge.first == triangle[ 0 ] ) + { + foundEdge = true; + spareVertex = 1; + break; + } + } + + // we found an edge so write it out, so classify a vertex and then write out the correct code. + if ( foundEdge ) + { + uint32_t cachedVertex; + + uint32_t spareVertexIndice = triangle[ spareVertex ]; + VertexClassification freeVertexClass = ClassifyVertex( spareVertexIndice, vertexRemap, vertexFifo, verticesRead, cachedVertex ); + uint32_t relativeEdge = ( edgesRead - 1 ) - edgeCursor; + + switch ( freeVertexClass ) + { + case NEW_VERTEX: + + switch ( relativeEdge ) + { + case 0: + + output.Write( IB_EDGE_0_NEW, IB_TRIANGLE_CODE_BITS ); + break; + + case 1: + + output.Write( IB_EDGE_1_NEW, IB_TRIANGLE_CODE_BITS ); + break; + + default: + + output.Write( IB_EDGE_NEW, IB_TRIANGLE_CODE_BITS ); + output.Write( relativeEdge, CACHED_EDGE_BITS ); + break; + + } + + vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] = spareVertexIndice; + vertexRemap[ spareVertexIndice ] = newVertices; + + ++verticesRead; + ++newVertices; + break; + + case CACHED_VERTEX: + + output.Write( IB_EDGE_CACHED, IB_TRIANGLE_CODE_BITS ); + output.Write( relativeEdge, CACHED_EDGE_BITS ); + output.Write( cachedVertex, CACHED_VERTEX_BITS ); + break; + + case FREE_VERTEX: + + output.Write( IB_EDGE_FREE, IB_TRIANGLE_CODE_BITS ); + output.Write( relativeEdge, CACHED_EDGE_BITS ); + + vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] = spareVertexIndice; + + ++verticesRead; + + output.WriteVInt( ( newVertices - 1 ) - vertexRemap[ spareVertexIndice ] ); + break; + + } + + // Populate the edge fifo with the the remaining edges + // Note - the winding order is important as we'll need to re-produce this on decompression. + // The edges are put in as if the found edge is the first edge in the triangle (which it will be when we + // reconstruct). + switch ( spareVertex ) + { + case 0: + + edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 2 ], triangle[ 0 ] ); + + ++edgesRead; + + edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 0 ], triangle[ 1 ] ); + + ++edgesRead; + break; + + case 1: + + edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 0 ], triangle[ 1 ] ); + + ++edgesRead; + + edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 1 ], triangle[ 2 ] ); + + ++edgesRead; + break; + + case 2: + + edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 1 ], triangle[ 2 ] ); + + ++edgesRead; + + edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 2 ], triangle[ 0 ] ); + + ++edgesRead; + break; + } + } + else + { + VertexClassification classifications[ 3 ]; + uint32_t cachedVertexIndices[ 3 ]; + + // classify each vertex as new, cached or free, potentially extracting a cached indice. + classifications[ 0 ] = ClassifyVertex( triangle[ 0 ], vertexRemap, vertexFifo, verticesRead, cachedVertexIndices[ 0 ] ); + classifications[ 1 ] = ClassifyVertex( triangle[ 1 ], vertexRemap, vertexFifo, verticesRead, cachedVertexIndices[ 1 ] ); + classifications[ 2 ] = ClassifyVertex( triangle[ 2 ], vertexRemap, vertexFifo, verticesRead, cachedVertexIndices[ 2 ] ); + + // use the classifications to lookup the matching compression code and potentially rotate the order of the vertices. + const VertexCompressionCase& compressionCase = CompressionCase[ classifications[ 0 ] ][ classifications[ 1 ] ][ classifications[ 2 ] ]; + + // rotate the order of the vertices based on the compression classification. + uint32_t reorderedTriangle[ 3 ]; + + reorderedTriangle[ 0 ] = triangle[ compressionCase.vertexOrder[ 0 ] ]; + reorderedTriangle[ 1 ] = triangle[ compressionCase.vertexOrder[ 1 ] ]; + reorderedTriangle[ 2 ] = triangle[ compressionCase.vertexOrder[ 2 ] ]; + + output.Write( compressionCase.code, IB_TRIANGLE_CODE_BITS ); + + switch ( compressionCase.code ) + { + case IB_NEW_NEW_NEW: + { + vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] = triangle[ 0 ]; + vertexFifo[ ( verticesRead + 1 ) & VERTEX_FIFO_MASK ] = triangle[ 1 ]; + vertexFifo[ ( verticesRead + 2 ) & VERTEX_FIFO_MASK ] = triangle[ 2 ]; + + vertexRemap[ triangle[ 0 ] ] = newVertices; + vertexRemap[ triangle[ 1 ] ] = newVertices + 1; + vertexRemap[ triangle[ 2 ] ] = newVertices + 2; + + verticesRead += 3; + newVertices += 3; + + break; + } + case IB_NEW_NEW_CACHED: + { + vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] = reorderedTriangle[ 0 ]; + vertexFifo[ ( verticesRead + 1 ) & VERTEX_FIFO_MASK ] = reorderedTriangle[ 1 ]; + + output.Write( cachedVertexIndices[ compressionCase.vertexOrder[ 2 ] ], CACHED_VERTEX_BITS ); + + vertexRemap[ reorderedTriangle[ 0 ] ] = newVertices; + vertexRemap[ reorderedTriangle[ 1 ] ] = newVertices + 1; + + verticesRead += 2; + newVertices += 2; + + break; + } + case IB_NEW_NEW_FREE: + { + vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] = reorderedTriangle[ 0 ]; + vertexFifo[ ( verticesRead + 1 ) & VERTEX_FIFO_MASK ] = reorderedTriangle[ 1 ]; + vertexFifo[ ( verticesRead + 2 ) & VERTEX_FIFO_MASK ] = reorderedTriangle[ 2 ]; + + output.WriteVInt( ( newVertices - 1 ) - vertexRemap[ reorderedTriangle[ 2 ] ] ); + + vertexRemap[ reorderedTriangle[ 0 ] ] = newVertices; + vertexRemap[ reorderedTriangle[ 1 ] ] = newVertices + 1; + + verticesRead += 3; + newVertices += 2; + + break; + } + case IB_NEW_CACHED_CACHED: + { + vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] = reorderedTriangle[ 0 ]; + + output.Write( cachedVertexIndices[ compressionCase.vertexOrder[ 1 ] ], CACHED_VERTEX_BITS ); + output.Write( cachedVertexIndices[ compressionCase.vertexOrder[ 2 ] ], CACHED_VERTEX_BITS ); + + vertexRemap[ reorderedTriangle[ 0 ] ] = newVertices; + verticesRead += 1; + newVertices += 1; + + break; + } + case IB_NEW_CACHED_FREE: + { + vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] = reorderedTriangle[ 0 ]; + vertexFifo[ ( verticesRead + 1 ) & VERTEX_FIFO_MASK ] = reorderedTriangle[ 2 ]; + + output.Write( cachedVertexIndices[ compressionCase.vertexOrder[ 1 ] ], CACHED_VERTEX_BITS ); + output.WriteVInt( ( newVertices - 1 ) - vertexRemap[ reorderedTriangle[ 2 ] ] ); + + vertexRemap[ reorderedTriangle[ 0 ] ] = newVertices; + + verticesRead += 2; + newVertices += 1; + + break; + } + case IB_NEW_FREE_CACHED: + { + vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] = reorderedTriangle[ 0 ]; + vertexFifo[ ( verticesRead + 1 ) & VERTEX_FIFO_MASK ] = reorderedTriangle[ 1 ]; + + output.WriteVInt( ( newVertices - 1 ) - vertexRemap[ reorderedTriangle[ 1 ] ] ); + output.Write( cachedVertexIndices[ compressionCase.vertexOrder[ 2 ] ], CACHED_VERTEX_BITS ); + + vertexRemap[ reorderedTriangle[ 0 ] ] = newVertices; + + verticesRead += 2; + newVertices += 1; + + break; + } + case IB_NEW_FREE_FREE: + { + vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] = reorderedTriangle[ 0 ]; + vertexFifo[ ( verticesRead + 1 ) & VERTEX_FIFO_MASK ] = reorderedTriangle[ 1 ]; + vertexFifo[ ( verticesRead + 2 ) & VERTEX_FIFO_MASK ] = reorderedTriangle[ 2 ]; + + output.WriteVInt( ( newVertices - 1 ) - vertexRemap[ reorderedTriangle[ 1 ] ] ); + output.WriteVInt( ( newVertices - 1 ) - vertexRemap[ reorderedTriangle[ 2 ] ] ); + + vertexRemap[ reorderedTriangle[ 0 ] ] = newVertices; + + verticesRead += 3; + newVertices += 1; + + break; + } + case IB_CACHED_CACHED_CACHED: + { + output.Write( cachedVertexIndices[ compressionCase.vertexOrder[ 0 ] ], CACHED_VERTEX_BITS ); + output.Write( cachedVertexIndices[ compressionCase.vertexOrder[ 1 ] ], CACHED_VERTEX_BITS ); + output.Write( cachedVertexIndices[ compressionCase.vertexOrder[ 2 ] ], CACHED_VERTEX_BITS ); + break; + } + case IB_CACHED_CACHED_FREE: + { + vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] = reorderedTriangle[ 2 ]; + + output.Write( cachedVertexIndices[ compressionCase.vertexOrder[ 0 ] ], CACHED_VERTEX_BITS ); + output.Write( cachedVertexIndices[ compressionCase.vertexOrder[ 1 ] ], CACHED_VERTEX_BITS ); + output.WriteVInt( ( newVertices - 1 ) - vertexRemap[ reorderedTriangle[ 2 ] ] ); + + verticesRead += 1; + + break; + } + case IB_CACHED_FREE_FREE: + { + vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] = reorderedTriangle[ 1 ]; + vertexFifo[ ( verticesRead + 1 ) & VERTEX_FIFO_MASK ] = reorderedTriangle[ 2 ]; + + output.Write( cachedVertexIndices[ compressionCase.vertexOrder[ 0 ] ], CACHED_VERTEX_BITS ); + output.WriteVInt( ( newVertices - 1 ) - vertexRemap[ reorderedTriangle[ 1 ] ] ); + output.WriteVInt( ( newVertices - 1 ) - vertexRemap[ reorderedTriangle[ 2 ] ] ); + + verticesRead += 2; + + break; + } + case IB_FREE_FREE_FREE: + { + vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] = reorderedTriangle[ 0 ]; + vertexFifo[ ( verticesRead + 1 ) & VERTEX_FIFO_MASK ] = reorderedTriangle[ 1 ]; + vertexFifo[ ( verticesRead + 2 ) & VERTEX_FIFO_MASK ] = reorderedTriangle[ 2 ]; + + output.WriteVInt( ( newVertices - 1 ) - vertexRemap[ reorderedTriangle[ 0 ] ] ); + output.WriteVInt( ( newVertices - 1 ) - vertexRemap[ reorderedTriangle[ 1 ] ] ); + output.WriteVInt( ( newVertices - 1 ) - vertexRemap[ reorderedTriangle[ 2 ] ] ); + + verticesRead += 3; + break; + } + } + + // populate the edge fifo with the 3 most recent edges + edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( reorderedTriangle[ 0 ], reorderedTriangle[ 1 ] ); + + ++edgesRead; + + edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( reorderedTriangle[ 1 ], reorderedTriangle[ 2 ] ); + + ++edgesRead; + + edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( reorderedTriangle[ 2 ], reorderedTriangle[ 0 ] ); + + ++edgesRead; + } + } +} + + + // Output the compression information for a single vertex, remapping any new vertices and updating the vertex fifo where needed. static IBC_INLINE void OutputVertex( uint32_t vertex, uint32_t* vertexRemap, @@ -82,16 +591,7 @@ static IBC_INLINE void OutputVertex( uint32_t vertex, uint32_t vertexOutput = ( newVertexCount - 1 ) - vertexRemap[ vertex ]; // v-int encode the free vertex index. - do - { - uint32_t lower7 = vertexOutput & 0x7F; - - vertexOutput >>= 7; - - output.Write( lower7 | ( vertexOutput > 0 ? 0x80 : 0 ), 8 ); - - } while ( vertexOutput > 0 ); - + output.WriteVInt( vertexOutput ); // free vertices go back into the vertex cache. vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] = vertex; @@ -121,13 +621,13 @@ void CompressIndexBuffer( const Ty* triangles, uint32_t* vertexRemapEnd = vertexRemap + vertexCount; // clear the vertex remapping to "not found" value of 0xFFFFFFFF - dirty, but low overhead. - for (uint32_t* remappedVertex = vertexRemap; remappedVertex < vertexRemapEnd; ++remappedVertex ) + for ( uint32_t* remappedVertex = vertexRemap; remappedVertex < vertexRemapEnd; ++remappedVertex ) { *remappedVertex = VERTEX_NOT_MAPPED; } // iterate through the triangles - for (const Ty* triangle = triangles; triangle < triangleEnd; triangle += 3 ) + for ( const Ty* triangle = triangles; triangle < triangleEnd; triangle += 3 ) { int32_t lowestEdgeCursor = edgesRead >= EDGE_FIFO_SIZE ? edgesRead - EDGE_FIFO_SIZE : 0; int32_t edgeCursor = edgesRead - 1; @@ -189,33 +689,33 @@ void CompressIndexBuffer( const Ty* triangles, { case 0: - edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set(triangle[ 2 ], triangle[ 0 ]); + edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 2 ], triangle[ 0 ] ); ++edgesRead; - edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set(triangle[ 0 ], triangle[ 1 ]); + edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 0 ], triangle[ 1 ] ); ++edgesRead; break; case 1: - edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set(triangle[ 0 ], triangle[ 1 ]); + edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 0 ], triangle[ 1 ] ); ++edgesRead; - edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set(triangle[ 1 ], triangle[ 2 ]); + edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 1 ], triangle[ 2 ] ); ++edgesRead; break; case 2: - edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set(triangle[ 1 ], triangle[ 2 ]); + edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 1 ], triangle[ 2 ] ); ++edgesRead; - edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set(triangle[ 2 ], triangle[ 0 ]); + edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 2 ], triangle[ 0 ] ); ++edgesRead; break; @@ -229,35 +729,53 @@ void CompressIndexBuffer( const Ty* triangles, OutputVertex( triangle[ 2 ], vertexRemap, newVertices, vertexFifo, verticesRead, output ); // populate the edge fifo with the 3 most recent edges - edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set(triangle[ 0 ], triangle[ 1 ]); + edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 0 ], triangle[ 1 ] ); ++edgesRead; - edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set(triangle[ 1 ], triangle[ 2 ]); + edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 1 ], triangle[ 2 ] ); ++edgesRead; - edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set(triangle[ 2 ], triangle[ 0 ]); + edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 2 ], triangle[ 0 ] ); ++edgesRead; } } } -void CompressIndexBuffer ( const uint16_t* triangles, - uint32_t triangleCount, - uint32_t* vertexRemap, - uint32_t vertexCount, - WriteBitstream& output ) -{ - CompressIndexBuffer(triangles, triangleCount, vertexRemap, vertexCount, output); -} - -void CompressIndexBuffer ( const uint32_t* triangles, - uint32_t triangleCount, - uint32_t* vertexRemap, - uint32_t vertexCount, +void CompressIndexBuffer( const uint16_t* triangles, + uint32_t triangleCount, + uint32_t* vertexRemap, + uint32_t vertexCount, WriteBitstream& output ) { - CompressIndexBuffer(triangles, triangleCount, vertexRemap, vertexCount, output); -} \ No newline at end of file + CompressIndexBuffer( triangles, triangleCount, vertexRemap, vertexCount, output ); +} + +void CompressIndexBuffer( const uint32_t* triangles, + uint32_t triangleCount, + uint32_t* vertexRemap, + uint32_t vertexCount, + WriteBitstream& output ) +{ + CompressIndexBuffer( triangles, triangleCount, vertexRemap, vertexCount, output ); +} + +void CompressIndexBuffer2( const uint16_t* triangles, + uint32_t triangleCount, + uint32_t* vertexRemap, + uint32_t vertexCount, + WriteBitstream& output ) +{ + CompressIndexBuffer2( triangles, triangleCount, vertexRemap, vertexCount, output ); +} + +void CompressIndexBuffer2( const uint32_t* triangles, + uint32_t triangleCount, + uint32_t* vertexRemap, + uint32_t vertexCount, + WriteBitstream& output ) +{ + CompressIndexBuffer2( triangles, triangleCount, vertexRemap, vertexCount, output ); +} diff --git a/3rdparty/ib-compress/indexbuffercompression.h b/3rdparty/ib-compress/indexbuffercompression.h index 291e46f6..03a7f9e6 100644 --- a/3rdparty/ib-compress/indexbuffercompression.h +++ b/3rdparty/ib-compress/indexbuffercompression.h @@ -27,10 +27,40 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #pragma once #include -#include "writebitstream.h" +#include "WriteBitstream.h" // Compress an index buffer, writing the results out to a bitstream and providing a vertex remapping (which will be in pre-transform cache optimised -// order. +// order). +// +// This version has slightly worse compression and the matching decompression has worse performance than CompressIndexBuffer2, but it supports degenerate triangles +// (that have duplicate vertex indices). Output should be decompressed with DecompressIndexBuffer. It also changes the order of the vertices in each triangle less. +// +// It works by outputting a code (along with any required index symbols) per vertex. +// +// Parameters: +// [in] triangles - A typical triangle list index buffer (3 indices to vertices per triangle). 16 bit indices. +// [in] triangle count - The number of triangles to process. +// [out] vertexRemap - This will be populated with re-mappings that map old vertices to new vertices, +// where indexing with the old vertex index will get you the new one. +// It should be allocated as a with at least vertexCount entries. +// [in] vertexCount - The number of vertices in the mesh. This should be less than 0xFFFFFFFF/2^32 - 1. +// [in] output - The stream that the compressed data will be written to. Note that we will not flush/finish the stream +// in case something else is going to be written after, so WriteBitstream::Finish will need to be called after this. +void CompressIndexBuffer( const uint16_t* triangles, uint32_t triangleCount, uint32_t* vertexRemap, uint32_t vertexCount, WriteBitstream& output ); + +// Same as above but 32bit indices. +void CompressIndexBuffer( const uint32_t* triangles, uint32_t triangleCount, uint32_t* vertexRemap, uint32_t vertexCount, WriteBitstream& output ); + + +// Compress an index buffer, writing the results out to a bitstream and providing a vertex remapping (which will be in pre-transform cache optimised +// order). +// +// This version has slightly better compression and the matching decompression has better performance than CompressIndexBuffer, but it does not supports degenerate triangles +// (that have duplicate vertex indices). Output should be decompressed with DecompressIndexBuffer2. It changes the order of the vertices in each triangle more. +// +// This version also has compression optimisations that allow it to handle strip/fan cases a lot better compression wise. +// +// This works by outputting a code per triangle (along with the required index symbols). // // Parameters: // [in] triangles - A typical triangle list index buffer (3 indices to vertices per triangle). @@ -41,7 +71,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // [in] vertexCount - The number of vertices in the mesh. This should be less than 0xFFFFFFFF/2^32 - 1. // [in] output - The stream that the compressed data will be written to. Note that we will not flush/finish the stream // in case something else is going to be written after, so WriteBitstream::Finish will need to be called after this. -template -void CompressIndexBuffer( const Ty* triangles, uint32_t triangleCount, uint32_t* vertexRemap, uint32_t vertexCount, WriteBitstream& output ); +void CompressIndexBuffer2( const uint16_t* triangles, uint32_t triangleCount, uint32_t* vertexRemap, uint32_t vertexCount, WriteBitstream& output ); + +// Same as above but 32bit indices +void CompressIndexBuffer2( const uint32_t* triangles, uint32_t triangleCount, uint32_t* vertexRemap, uint32_t vertexCount, WriteBitstream& output ); + #endif // -- INDEX_BUFFER_COMPRESSION_H__ \ No newline at end of file diff --git a/3rdparty/ib-compress/indexbufferdecompression.cpp b/3rdparty/ib-compress/indexbufferdecompression.cpp index 4bb07b4c..529b5eaa 100644 --- a/3rdparty/ib-compress/indexbufferdecompression.cpp +++ b/3rdparty/ib-compress/indexbufferdecompression.cpp @@ -27,6 +27,319 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "IndexCompressionConstants.h" #include +template +void DecompressIndexBuffer2( Ty* triangles, uint32_t triangleCount, ReadBitstream& input ) +{ + Edge edgeFifo[ EDGE_FIFO_SIZE ]; + uint32_t vertexFifo[ VERTEX_FIFO_SIZE ]; + + uint32_t edgesRead = 0; + uint32_t verticesRead = 0; + uint32_t newVertices = 0; + const Ty* triangleEnd = triangles + ( triangleCount * 3 ); + + // iterate through the triangles + for ( Ty* triangle = triangles; triangle < triangleEnd; triangle += 3 ) + { + IndexBufferTriangleCodes code = static_cast< IndexBufferTriangleCodes >( input.Read( IB_TRIANGLE_CODE_BITS ) ); + + switch ( code ) + { + case IB_EDGE_NEW: + { + uint32_t edgeFifoIndex = input.Read( CACHED_EDGE_BITS ); + + const Edge& edge = edgeFifo[ ( ( edgesRead - 1 ) - edgeFifoIndex ) & EDGE_FIFO_MASK ]; + + triangle[ 0 ] = static_cast< Ty >( edge.second ); + triangle[ 1 ] = static_cast< Ty >( edge.first ); + + vertexFifo[ verticesRead & EDGE_FIFO_MASK ] = + triangle[ 2 ] = static_cast< Ty >( newVertices ); + + ++newVertices; + ++verticesRead; + + break; + } + + case IB_EDGE_CACHED: + { + uint32_t edgeFifoIndex = input.Read( CACHED_EDGE_BITS ); + uint32_t vertexFifoIndex = input.Read( CACHED_VERTEX_BITS ); + + const Edge& edge = edgeFifo[ ( ( edgesRead - 1 ) - edgeFifoIndex ) & EDGE_FIFO_MASK ]; + + triangle[ 0 ] = static_cast< Ty >( edge.second ); + triangle[ 1 ] = static_cast< Ty >( edge.first ); + triangle[ 2 ] = static_cast< Ty >( vertexFifo[ ( ( verticesRead - 1 ) - vertexFifoIndex ) & VERTEX_FIFO_MASK ] ); + + break; + } + case IB_EDGE_FREE: + { + uint32_t edgeFifoIndex = input.Read( CACHED_EDGE_BITS ); + uint32_t relativeVertex = input.ReadVInt(); + + const Edge& edge = edgeFifo[ ( ( edgesRead - 1 ) - edgeFifoIndex ) & EDGE_FIFO_MASK ]; + + triangle[ 0 ] = static_cast< Ty >( edge.second ); + triangle[ 1 ] = static_cast< Ty >( edge.first ); + + vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] = + triangle[ 2 ] = static_cast< Ty >( ( newVertices - 1 ) - relativeVertex ); + ++verticesRead; + + break; + } + case IB_NEW_NEW_NEW: + { + vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] = + triangle[ 0 ] = static_cast< Ty >( newVertices ); + vertexFifo[ ( verticesRead + 1 ) & VERTEX_FIFO_MASK ] = + triangle[ 1 ] = static_cast< Ty >( newVertices + 1 ); + vertexFifo[ ( verticesRead + 2 ) & VERTEX_FIFO_MASK ] = + triangle[ 2 ] = static_cast< Ty >( newVertices + 2 ); + + newVertices += 3; + verticesRead += 3; + + edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 0 ], triangle[ 1 ] ); + + ++edgesRead; + break; + } + case IB_NEW_NEW_CACHED: + { + uint32_t vertexFifoIndex = input.Read( CACHED_VERTEX_BITS ); + + triangle[ 2 ] = static_cast< Ty >( vertexFifo[ ( ( verticesRead - 1 ) - vertexFifoIndex ) & VERTEX_FIFO_MASK ] ); + vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] = + triangle[ 0 ] = static_cast< Ty >( newVertices ); + vertexFifo[ ( verticesRead + 1 ) & VERTEX_FIFO_MASK ] = + triangle[ 1 ] = static_cast< Ty >( newVertices + 1 ); + + verticesRead += 2; + newVertices += 2; + + edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 0 ], triangle[ 1 ] ); + + ++edgesRead; + break; + } + case IB_NEW_NEW_FREE: + { + uint32_t relativeVertex = input.ReadVInt(); + + vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] = + triangle[ 0 ] = static_cast< Ty >( newVertices ); + vertexFifo[ ( verticesRead + 1 ) & VERTEX_FIFO_MASK ] = + triangle[ 1 ] = static_cast< Ty >( newVertices + 1 ); + vertexFifo[ ( verticesRead + 2 ) & VERTEX_FIFO_MASK ] = + triangle[ 2 ] = static_cast< Ty >( ( newVertices - 1 ) - relativeVertex ); + + newVertices += 2; + verticesRead += 3; + + edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 0 ], triangle[ 1 ] ); + + ++edgesRead; + break; + } + case IB_NEW_CACHED_CACHED: + { + uint32_t vertex1FifoIndex = input.Read( CACHED_VERTEX_BITS ); + uint32_t vertex2FifoIndex = input.Read( CACHED_VERTEX_BITS ); + + triangle[ 1 ] = static_cast< Ty >( vertexFifo[ ( ( verticesRead - 1 ) - vertex1FifoIndex ) & VERTEX_FIFO_MASK ] ); + triangle[ 2 ] = static_cast< Ty >( vertexFifo[ ( ( verticesRead - 1 ) - vertex2FifoIndex ) & VERTEX_FIFO_MASK ] ); + vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] = + triangle[ 0 ] = static_cast< Ty >( newVertices ); + + ++verticesRead; + ++newVertices; + + edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 0 ], triangle[ 1 ] ); + + ++edgesRead; + break; + } + case IB_NEW_CACHED_FREE: + { + uint32_t vertexFifoIndex = input.Read( CACHED_VERTEX_BITS ); + uint32_t relativeVertex = input.ReadVInt(); + + triangle[ 1 ] = static_cast< Ty >( vertexFifo[ ( ( verticesRead - 1 ) - vertexFifoIndex ) & VERTEX_FIFO_MASK ] ); + vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] = + triangle[ 0 ] = static_cast< Ty >( newVertices ); + vertexFifo[ ( verticesRead + 1 ) & VERTEX_FIFO_MASK ] = + triangle[ 2 ] = static_cast< Ty >( ( newVertices - 1 ) - relativeVertex ); + + verticesRead += 2; + ++newVertices; + + edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 0 ], triangle[ 1 ] ); + + ++edgesRead; + break; + } + case IB_NEW_FREE_CACHED: + { + uint32_t relativeVertex = input.ReadVInt(); + uint32_t vertexFifoIndex = input.Read( CACHED_VERTEX_BITS ); + + triangle[ 2 ] = static_cast< Ty >( vertexFifo[ ( ( verticesRead - 1 ) - vertexFifoIndex ) & VERTEX_FIFO_MASK ] ); + vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] = + triangle[ 0 ] = static_cast< Ty >( newVertices ); + vertexFifo[ ( verticesRead + 1 ) & VERTEX_FIFO_MASK ] = + triangle[ 1 ] = static_cast< Ty >( ( newVertices - 1 ) - relativeVertex ); + + verticesRead += 2; + ++newVertices; + + edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 0 ], triangle[ 1 ] ); + + ++edgesRead; + break; + } + case IB_NEW_FREE_FREE: + { + uint32_t relativeVertex1 = input.ReadVInt(); + uint32_t relativeVertex2 = input.ReadVInt(); + + vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] = + triangle[ 0 ] = static_cast< Ty >( newVertices ); + vertexFifo[ ( verticesRead + 1 ) & VERTEX_FIFO_MASK ] = + triangle[ 1 ] = static_cast< Ty >( ( newVertices - 1 ) - relativeVertex1 ); + vertexFifo[ ( verticesRead + 2 ) & VERTEX_FIFO_MASK ] = + triangle[ 2 ] = static_cast< Ty >( ( newVertices - 1 ) - relativeVertex2 ); + + verticesRead += 3; + ++newVertices; + + edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 0 ], triangle[ 1 ] ); + + ++edgesRead; + + break; + } + case IB_CACHED_CACHED_CACHED: + { + uint32_t vertex0FifoIndex = input.Read( CACHED_VERTEX_BITS ); + uint32_t vertex1FifoIndex = input.Read( CACHED_VERTEX_BITS ); + uint32_t vertex2FifoIndex = input.Read( CACHED_VERTEX_BITS ); + + triangle[ 0 ] = static_cast< Ty >( vertexFifo[ ( ( verticesRead - 1 ) - vertex0FifoIndex ) & VERTEX_FIFO_MASK ] ); + triangle[ 1 ] = static_cast< Ty >( vertexFifo[ ( ( verticesRead - 1 ) - vertex1FifoIndex ) & VERTEX_FIFO_MASK ] ); + triangle[ 2 ] = static_cast< Ty >( vertexFifo[ ( ( verticesRead - 1 ) - vertex2FifoIndex ) & VERTEX_FIFO_MASK ] ); + + edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 0 ], triangle[ 1 ] ); + + ++edgesRead; + break; + } + case IB_CACHED_CACHED_FREE: + { + uint32_t vertex0FifoIndex = input.Read( CACHED_VERTEX_BITS ); + uint32_t vertex1FifoIndex = input.Read( CACHED_VERTEX_BITS ); + uint32_t relativeVertex2 = input.ReadVInt(); + + triangle[ 0 ] = static_cast< Ty >( vertexFifo[ ( ( verticesRead - 1 ) - vertex0FifoIndex ) & VERTEX_FIFO_MASK ] ); + triangle[ 1 ] = static_cast< Ty >( vertexFifo[ ( ( verticesRead - 1 ) - vertex1FifoIndex ) & VERTEX_FIFO_MASK ] ); + + vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] = + triangle[ 2 ] = static_cast< Ty >( ( newVertices - 1 ) - relativeVertex2 ); + + ++verticesRead; + + edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 0 ], triangle[ 1 ] ); + + ++edgesRead; + + break; + } + case IB_CACHED_FREE_FREE: + { + uint32_t vertex0FifoIndex = input.Read( CACHED_VERTEX_BITS ); + uint32_t relativeVertex1 = input.ReadVInt(); + uint32_t relativeVertex2 = input.ReadVInt(); + + triangle[ 0 ] = static_cast< Ty >( vertexFifo[ ( ( verticesRead - 1 ) - vertex0FifoIndex ) & VERTEX_FIFO_MASK ] ); + + vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] = + triangle[ 1 ] = static_cast< Ty >( ( newVertices - 1 ) - relativeVertex1 ); + vertexFifo[ ( verticesRead + 1 ) & VERTEX_FIFO_MASK ] = + triangle[ 2 ] = static_cast< Ty >( ( newVertices - 1 ) - relativeVertex2 ); + + verticesRead += 2; + + edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 0 ], triangle[ 1 ] ); + + ++edgesRead; + + break; + } + case IB_FREE_FREE_FREE: + { + uint32_t relativeVertex0 = input.ReadVInt(); + uint32_t relativeVertex1 = input.ReadVInt(); + uint32_t relativeVertex2 = input.ReadVInt(); + + vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] = + triangle[ 0 ] = static_cast< Ty >( ( newVertices - 1 ) - relativeVertex0 ); + vertexFifo[ ( verticesRead + 1 ) & VERTEX_FIFO_MASK ] = + triangle[ 1 ] = static_cast< Ty >( ( newVertices - 1 ) - relativeVertex1 ); + vertexFifo[ ( verticesRead + 2 ) & VERTEX_FIFO_MASK ] = + triangle[ 2 ] = static_cast< Ty >( ( newVertices - 1 ) - relativeVertex2 ); + + verticesRead += 3; + + edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 0 ], triangle[ 1 ] ); + + ++edgesRead; + + break; + } + case IB_EDGE_0_NEW: + { + const Edge& edge = edgeFifo[ ( edgesRead - 1 ) & EDGE_FIFO_MASK ]; + + triangle[ 0 ] = static_cast< Ty >( edge.second ); + triangle[ 1 ] = static_cast< Ty >( edge.first ); + + vertexFifo[ verticesRead & EDGE_FIFO_MASK ] = + triangle[ 2 ] = static_cast< Ty >( newVertices ); + + ++newVertices; + ++verticesRead; + break; + } + case IB_EDGE_1_NEW: + { + const Edge& edge = edgeFifo[ ( ( edgesRead - 1 ) - 1 ) & EDGE_FIFO_MASK ]; + + triangle[ 0 ] = static_cast< Ty >( edge.second ); + triangle[ 1 ] = static_cast< Ty >( edge.first ); + + vertexFifo[ verticesRead & EDGE_FIFO_MASK ] = + triangle[ 2 ] = static_cast< Ty >( newVertices ); + + ++newVertices; + ++verticesRead; + break; + } + } + + edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 1 ], triangle[ 2 ] ); + + ++edgesRead; + + edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 2 ], triangle[ 0 ] ); + + ++edgesRead; + } +} + template void DecompressIndexBuffer( Ty* triangles, uint32_t triangleCount, ReadBitstream& input ) { @@ -36,7 +349,7 @@ void DecompressIndexBuffer( Ty* triangles, uint32_t triangleCount, ReadBitstream uint32_t edgesRead = 0; uint32_t verticesRead = 0; uint32_t newVertices = 0; - const Ty* triangleEnd = triangles + ( triangleCount * 3 ); + const Ty* triangleEnd = triangles + ( triangleCount * 3 ); // iterate through the triangles for ( Ty* triangle = triangles; triangle < triangleEnd; triangle += 3 ) @@ -52,8 +365,8 @@ void DecompressIndexBuffer( Ty* triangles, uint32_t triangleCount, ReadBitstream { case IB_NEW_VERTEX: - triangle[ readVertex ] = - vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] = newVertices; + vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] = + triangle[ readVertex ] = static_cast< Ty >( newVertices ); ++readVertex; ++verticesRead; @@ -69,8 +382,8 @@ void DecompressIndexBuffer( Ty* triangles, uint32_t triangleCount, ReadBitstream uint32_t fifoIndex = input.Read( CACHED_EDGE_BITS ); const Edge& edge = edgeFifo[ ( ( edgesRead - 1 ) - fifoIndex ) & EDGE_FIFO_MASK ]; - triangle[ 0 ] = edge.second; - triangle[ 1 ] = edge.first; + triangle[ 0 ] = static_cast< Ty >( edge.second ); + triangle[ 1 ] = static_cast< Ty >( edge.first ); readVertex += 2; skipFirstEdge = true; @@ -81,9 +394,9 @@ void DecompressIndexBuffer( Ty* triangles, uint32_t triangleCount, ReadBitstream case IB_CACHED_VERTEX: { - uint32_t fifoIndex = input.Read( CACHED_VERTEX_BITS ); + uint32_t fifoIndex = input.Read( CACHED_VERTEX_BITS ); - triangle[ readVertex ] = vertexFifo[ ( ( verticesRead - 1 ) - fifoIndex ) & VERTEX_FIFO_MASK ]; + triangle[ readVertex ] = static_cast< Ty >( vertexFifo[ ( ( verticesRead - 1 ) - fifoIndex ) & VERTEX_FIFO_MASK ] ); ++readVertex; @@ -93,24 +406,12 @@ void DecompressIndexBuffer( Ty* triangles, uint32_t triangleCount, ReadBitstream case IB_FREE_VERTEX: { - uint32_t readByte = 0; - uint32_t bitsToShift = 0; - uint32_t relativeVertex = 0; - - // V-int decoding, done inline. - do - { - readByte = input.Read( 8 ); - - relativeVertex |= ( readByte & 0x7F ) << bitsToShift; - bitsToShift += 7; - - } while ( readByte & 0x80 ); + uint32_t relativeVertex = input.ReadVInt(); uint32_t vertex = ( newVertices - 1 ) - relativeVertex; - triangle[ readVertex ] = - vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] = vertex; + vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] = + triangle[ readVertex ] = static_cast< Ty >( vertex ); ++verticesRead; ++readVertex; @@ -121,7 +422,7 @@ void DecompressIndexBuffer( Ty* triangles, uint32_t triangleCount, ReadBitstream if ( !skipFirstEdge ) { - edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set(triangle[ 0 ], triangle[ 1 ]); + edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 0 ], triangle[ 1 ] ); ++edgesRead; } @@ -136,11 +437,11 @@ void DecompressIndexBuffer( Ty* triangles, uint32_t triangleCount, ReadBitstream ++verticesRead; } - edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set(triangle[ 1 ], triangle[ 2 ]); + edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 1 ], triangle[ 2 ] ); ++edgesRead; - edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set(triangle[ 2 ], triangle[ 0 ]); + edgeFifo[ edgesRead & EDGE_FIFO_MASK ].set( triangle[ 2 ], triangle[ 0 ] ); ++edgesRead; } @@ -155,3 +456,13 @@ void DecompressIndexBuffer( uint32_t* triangles, uint32_t triangleCount, ReadBit { DecompressIndexBuffer( triangles, triangleCount, input ); } + +void DecompressIndexBuffer2( uint16_t* triangles, uint32_t triangleCount, ReadBitstream& input ) +{ + DecompressIndexBuffer2( triangles, triangleCount, input ); +} + +void DecompressIndexBuffer2( uint32_t* triangles, uint32_t triangleCount, ReadBitstream& input ) +{ + DecompressIndexBuffer2( triangles, triangleCount, input ); +} \ No newline at end of file diff --git a/3rdparty/ib-compress/indexbufferdecompression.h b/3rdparty/ib-compress/indexbufferdecompression.h index 2e20dba5..96149d3b 100644 --- a/3rdparty/ib-compress/indexbufferdecompression.h +++ b/3rdparty/ib-compress/indexbufferdecompression.h @@ -27,15 +27,29 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #pragma once #include -#include "readbitstream.h" +#include "ReadBitstream.h" // Compress an index buffer, writing the results out to a bitstream and providing a vertex remapping (which will be in pre-transform cache optimised // order. // Parameters: -// [out] triangles - Triangle list index buffer (3 indices to vertices per triangle), output from the decompression. +// [out] triangles - Triangle list index buffer (3 indices to vertices per triangle), output from the decompression - 16bit indices // [in] triangle count - The number of triangles to decompress. // [in] input - The bit stream that the compressed data will be read from. -template -void DecompressIndexBuffer( Ty* triangles, uint32_t triangleCount, ReadBitstream& input ); +void DecompressIndexBuffer( uint16_t* triangles, uint32_t triangleCount, ReadBitstream& input ); + +// Same as above but 32 bit indices. +void DecompressIndexBuffer( uint32_t* triangles, uint32_t triangleCount, ReadBitstream& input ); + + +// Compress an index buffer, writing the results out to a bitstream and providing a vertex remapping (which will be in pre-transform cache optimised +// order. +// Parameters: +// [out] triangles - Triangle list index buffer (3 indices to vertices per triangle), output from the decompression - 16bit indices +// [in] triangle count - The number of triangles to decompress. +// [in] input - The bit stream that the compressed data will be read from. +void DecompressIndexBuffer2( uint16_t* triangles, uint32_t triangleCount, ReadBitstream& input ); + +// Same as above but 32bit indices +void DecompressIndexBuffer2( uint32_t* triangles, uint32_t triangleCount, ReadBitstream& input ); #endif // -- INDEX_BUFFER_DECOMPRESSION_H__ \ No newline at end of file diff --git a/3rdparty/ib-compress/indexcompressionconstants.h b/3rdparty/ib-compress/indexcompressionconstants.h index af9e8795..4a2afef6 100644 --- a/3rdparty/ib-compress/indexcompressionconstants.h +++ b/3rdparty/ib-compress/indexcompressionconstants.h @@ -26,6 +26,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define INDEX_COMPRESSION_CONSTANTS_H__ #pragma once +#include + // Constant fifo and code sizes. const int VERTEX_FIFO_SIZE = 32; const int VERTEX_FIFO_MASK = VERTEX_FIFO_SIZE - 1; @@ -35,10 +37,12 @@ const int CACHED_EDGE_BITS = 5; const int CACHED_VERTEX_BITS = 5; const int IB_CODE_BITS = 2; +const int IB_TRIANGLE_CODE_BITS = 4; + // Edge in the edge fifo. struct Edge { - void set(uint32_t f, uint32_t s) + void set( uint32_t f, uint32_t s ) { first = f; second = s; @@ -48,7 +52,7 @@ struct Edge uint32_t second; }; -// Codes +// These are the vertex/edge codes for CompressIndexBuffer enum IndexBufferCodes { // Represents a yet un-seen vertex. @@ -65,4 +69,25 @@ enum IndexBufferCodes IB_FREE_VERTEX = 3 }; +// These are the triangle codes for CompressIndexBuffer2 +enum IndexBufferTriangleCodes +{ + IB_EDGE_NEW = 0, + IB_EDGE_CACHED = 1, + IB_EDGE_FREE = 2, + IB_NEW_NEW_NEW = 3, + IB_NEW_NEW_CACHED = 4, + IB_NEW_NEW_FREE = 5, + IB_NEW_CACHED_CACHED = 6, + IB_NEW_CACHED_FREE= 7, + IB_NEW_FREE_CACHED = 8, + IB_NEW_FREE_FREE = 9, + IB_CACHED_CACHED_CACHED = 10, + IB_CACHED_CACHED_FREE = 11, + IB_CACHED_FREE_FREE = 12, + IB_FREE_FREE_FREE = 13, + IB_EDGE_0_NEW = 14, + IB_EDGE_1_NEW = 15 +}; + #endif \ No newline at end of file diff --git a/3rdparty/ib-compress/readbitstream.h b/3rdparty/ib-compress/readbitstream.h index 251f9d67..06cf88d3 100644 --- a/3rdparty/ib-compress/readbitstream.h +++ b/3rdparty/ib-compress/readbitstream.h @@ -54,6 +54,8 @@ public: // Get the buffer size of this in bytes size_t Size() const { return m_bufferSize; } + uint32_t ReadVInt(); + private: uint64_t m_bitBuffer; @@ -122,4 +124,22 @@ RBS_INLINE uint32_t ReadBitstream::Read( uint32_t bitCount ) return result; } +RBS_INLINE uint32_t ReadBitstream::ReadVInt() +{ + uint32_t bitsToShift = 0; + uint32_t result = 0; + uint32_t readByte; + + do + { + readByte = Read( 8 ); + + result |= ( readByte & 0x7F ) << bitsToShift; + bitsToShift += 7; + + } while ( readByte & 0x80 ); + + return result; +} + #endif // -- READ_BIT_STREAM_H__ diff --git a/3rdparty/ib-compress/writebitstream.h b/3rdparty/ib-compress/writebitstream.h index 648001f4..0c2ed22f 100644 --- a/3rdparty/ib-compress/writebitstream.h +++ b/3rdparty/ib-compress/writebitstream.h @@ -62,6 +62,9 @@ public: // Write a number of bits to the stream. void Write( uint32_t value, uint32_t bitCount ); + // Write a V int to the stream. + void WriteVInt( uint32_t value ); + // Get the size in bytes size_t ByteSize() const { return ( m_size + 7 ) >> 3; } @@ -123,6 +126,19 @@ WBS_INLINE void WriteBitstream::Write( uint32_t value, uint32_t bitCount ) m_size += bitCount; } +WBS_INLINE void WriteBitstream::WriteVInt( uint32_t value ) +{ + do + { + uint32_t lower7 = value & 0x7F; + + value >>= 7; + + Write( lower7 | ( value > 0 ? 0x80 : 0 ), 8 ); + + } while ( value > 0 ); +} + inline void WriteBitstream::Finish() { if ( m_bufferCursor > m_bufferEnd - 8 )