Added Index Buffer Compression.

2014-09-29 21:57:14 -07:00 · 2014-09-29 21:57:14 -07:00 · b7265b7221
commit b7265b7221
parent 9ba0956812
8 changed files with 874 additions and 0 deletions
--- a/3rdparty/ib-compress/IndexBufferCompession.cpp
+++ b/3rdparty/ib-compress/IndexBufferCompession.cpp
@ -0,0 +1,245 @@
+/*
+Copyright (c) 2014, Conor Stokes
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+#include "IndexBufferCompression.h"
+#include "WriteBitstream.h"
+#include "IndexCompressionConstants.h"
+#include <assert.h>
+
+#ifdef _MSC_VER
+#define IBC_INLINE __forceinline
+#else
+#define IBC_INLINE __attribute__((always_inline))
+#endif 
+
+const uint32_t VERTEX_NOT_MAPPED = 0xFFFFFFFF;
+
+// Output the compression information for a single vertex, remapping any new vertices and updating the vertex fifo where needed.
+static IBC_INLINE void OutputVertex( uint32_t vertex,
+							         uint32_t* vertexRemap,
+							         uint32_t& newVertexCount,
+							         uint32_t* vertexFifo,
+							         uint32_t& verticesRead,
+							         WriteBitstream& output )
+{
+	// Check if a vertex hasn't been remapped, 
+	if ( vertexRemap[ vertex ] == VERTEX_NOT_MAPPED )
+	{
+		// no remap, so remap to the current high watermark and output a new vertex code.
+		vertexRemap[ vertex ] = newVertexCount;
+
+		output.Write( IB_NEW_VERTEX, IB_CODE_BITS );
+
+		++newVertexCount;
+
+		// new vertices go into the vertex FIFO
+		vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] = vertex;
+
+		++verticesRead;
+	}
+	else
+	{
+		int32_t lowestVertexCursor = verticesRead >= VERTEX_FIFO_SIZE ? verticesRead - VERTEX_FIFO_SIZE : 0;
+
+		// Probe backwards in the vertex FIFO for a cached vertex
+		for ( int32_t vertexCursor = verticesRead - 1; vertexCursor >= lowestVertexCursor; --vertexCursor )
+		{
+			if ( vertexFifo[ vertexCursor & VERTEX_FIFO_MASK ] == vertex )
+			{
+				// found a cached vertex, so write out the code for a cached vertex, as the relative index into the fifo.
+				output.Write( IB_CACHED_VERTEX, IB_CODE_BITS );
+				output.Write( ( verticesRead - 1 ) - vertexCursor, CACHED_VERTEX_BITS );
+
+				return;
+			}
+		}
+
+		// no cached vertex found, so write out a free vertex 
+		output.Write( IB_FREE_VERTEX, IB_CODE_BITS );
+
+		// free vertices are relative to the latest new vertex.
+		uint32_t vertexOutput = ( newVertexCount - 1 ) - vertexRemap[ vertex ];
+
+		// v-int encode the free vertex index.
+		do
+		{
+			uint32_t lower7 = vertexOutput & 0x7F;
+
+			vertexOutput >>= 7;
+
+			output.Write( lower7 | ( vertexOutput > 0 ? 0x80 : 0 ), 8 );
+
+		} while ( vertexOutput > 0 );
+
+
+		// free vertices go back into the vertex cache.
+		vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] = vertex;
+			
+		++verticesRead;
+	}
+
+}
+
+
+void CompressIndexBuffer( const uint32_t* triangles, 
+						  uint32_t triangleCount, 
+						  uint32_t* vertexRemap, 
+						  uint32_t vertexCount, 
+						  WriteBitstream& output )
+{
+	Edge            edgeFifo[ EDGE_FIFO_SIZE ];
+	uint32_t        vertexFifo[ VERTEX_FIFO_SIZE ];
+
+	uint32_t        edgesRead      = 0;
+	uint32_t        verticesRead   = 0;
+	uint32_t        newVertices    = 0;
+	const uint32_t* triangleEnd    = triangles + ( triangleCount * 3 );
+
+	assert( vertexCount < 0xFFFFFFFF );
+
+	uint32_t*       vertexRemapEnd = vertexRemap + vertexCount;
+
+	// clear the vertex remapping to "not found" value of 0xFFFFFFFF - dirty, but low overhead.
+	for ( auto remappedVertex = vertexRemap; remappedVertex < vertexRemapEnd; ++remappedVertex )
+	{
+		*remappedVertex = VERTEX_NOT_MAPPED;
+	}
+
+	// iterate through the triangles
+	for ( auto triangle = triangles; triangle < triangleEnd; triangle += 3 )
+	{
+		int32_t lowestEdgeCursor = edgesRead >= EDGE_FIFO_SIZE ? edgesRead - EDGE_FIFO_SIZE : 0;
+		int32_t edgeCursor = edgesRead - 1;
+		bool     foundEdge = false;
+
+		int32_t freeVertex;
+
+		// Probe back through the edge fifo to see if one of the triangle edges is in the FIFO
+		for ( ; edgeCursor >= lowestEdgeCursor; --edgeCursor )
+		{
+			const Edge& edge = edgeFifo[ edgeCursor & VERTEX_FIFO_MASK ];
+
+			// check all the edges in order and save the free vertex.
+			if ( edge.second == triangle[ 0 ] && edge.first == triangle[ 1 ] )
+			{
+				foundEdge  = true;
+				freeVertex = 2;
+				break;
+			}
+			else if ( edge.second == triangle[ 1 ] && edge.first == triangle[ 2 ] )
+			{
+				foundEdge  = true;
+				freeVertex = 0;
+				break;
+			}
+			else if ( edge.second == triangle[ 2 ] && edge.first == triangle[ 0 ] )
+			{
+				foundEdge  = true;
+				freeVertex = 1;
+				break;
+			}
+		}
+
+		// we found an edge so write it out, then output the vertex
+		if ( foundEdge )
+		{
+			output.Write( IB_CACHED_EDGE, IB_CODE_BITS );
+			output.Write( ( edgesRead - 1 ) - edgeCursor, CACHED_EDGE_BITS );
+
+			const Edge& edge = edgeFifo[ edgeCursor & EDGE_FIFO_MASK ];
+
+			OutputVertex( triangle[ freeVertex ], vertexRemap, newVertices, vertexFifo, verticesRead, output );
+
+			// edge is in reverse order to last triangle it occured on (and it will only be a match if this is the case).
+			// so put the vertices into the fifo in that order.
+			vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] = edge.second;
+
+			++verticesRead;
+
+			vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] = edge.first;
+
+			++verticesRead;
+
+			// Populate the edge fifo with the the remaining edges
+			// Note - the winding order is important as we'll need to re-produce this on decompression.
+			// The edges are put in as if the found edge is the first edge in the triangle (which it will be when we
+			// reconstruct).
+			switch ( freeVertex )
+			{
+			case 0:
+
+				edgeFifo[ edgesRead & EDGE_FIFO_MASK ] = { triangle[ 2 ], triangle[ 0 ] };
+
+				++edgesRead;
+
+				edgeFifo[ edgesRead & EDGE_FIFO_MASK ] = { triangle[ 0 ], triangle[ 1 ] };
+
+				++edgesRead;
+				break;
+
+			case 1:
+
+				edgeFifo[ edgesRead & EDGE_FIFO_MASK ] = { triangle[ 0 ], triangle[ 1 ] };
+
+				++edgesRead;
+
+				edgeFifo[ edgesRead & EDGE_FIFO_MASK ] = { triangle[ 1 ], triangle[ 2 ] };
+
+				++edgesRead;
+				break;
+
+			case 2:
+
+				edgeFifo[ edgesRead & EDGE_FIFO_MASK ] = { triangle[ 1 ], triangle[ 2 ] };
+
+				++edgesRead;
+
+				edgeFifo[ edgesRead & EDGE_FIFO_MASK ] = { triangle[ 2 ], triangle[ 0 ] };
+
+				++edgesRead;
+				break;
+			}
+		}
+		else
+		{
+			// no edge, so we need to output all the vertices.
+			OutputVertex( triangle[ 0 ], vertexRemap, newVertices, vertexFifo, verticesRead, output );
+			OutputVertex( triangle[ 1 ], vertexRemap, newVertices, vertexFifo, verticesRead, output );
+			OutputVertex( triangle[ 2 ], vertexRemap, newVertices, vertexFifo, verticesRead, output );
+
+			// populate the edge fifo with the 3 most recent edges
+			edgeFifo[ edgesRead & EDGE_FIFO_MASK ] = { triangle[ 0 ], triangle[ 1 ] };
+
+			++edgesRead;
+
+			edgeFifo[ edgesRead & EDGE_FIFO_MASK ] = { triangle[ 1 ], triangle[ 2 ] };
+
+			++edgesRead;
+
+			edgeFifo[ edgesRead & EDGE_FIFO_MASK ] = { triangle[ 2 ], triangle[ 0 ] };
+
+			++edgesRead;
+		}
+	}
+}
--- a/3rdparty/ib-compress/IndexBufferCompression.h
+++ b/3rdparty/ib-compress/IndexBufferCompression.h
@ -0,0 +1,47 @@
+/*
+Copyright (c) 2014, Conor Stokes
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+#ifndef INDEX_BUFFER_COMPRESSION_H__
+#define INDEX_BUFFER_COMPRESSION_H__
+#pragma once
+
+class WriteBitstream;
+
+#include <cstdint>
+
+// Compress an index buffer, writing the results out to a bitstream and providing a vertex remapping (which will be in pre-transform cache optimised
+// order.
+//
+// Parameters: 
+//     [in]  triangles      - A typical triangle list index buffer (3 indices to vertices per triangle).
+//     [in]  triangle count - The number of triangles to process.
+//     [out] vertexRemap    - This will be populated with re-mappings that map old vertices to new vertices,
+//                            where indexing with the old vertex index will get you the new one. 
+//                            It should be allocated as a with at least vertexCount entries.
+//     [in] vertexCount     - The number of vertices in the mesh. This should be less than 0xFFFFFFFF/2^32 - 1.
+//     [in] output          - The stream that the compressed data will be written to. Note that we will not flush/finish the stream
+//                            in case something else is going to be written after, so WriteBitstream::Finish will need to be called after this.
+void CompressIndexBuffer( const uint32_t* triangles, uint32_t triangleCount, uint32_t* vertexRemap, uint32_t vertexCount, WriteBitstream& output );
+
+#endif // -- INDEX_BUFFER_COMPRESSION_H__
--- a/3rdparty/ib-compress/IndexBufferDecompression.cpp
+++ b/3rdparty/ib-compress/IndexBufferDecompression.cpp
@ -0,0 +1,146 @@
+/*
+Copyright (c) 2014, Conor Stokes
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+#include "IndexBufferDecompression.h"
+#include "ReadBitstream.h"
+#include "IndexCompressionConstants.h"
+#include <assert.h>
+
+void DecompressIndexBuffer( uint32_t* triangles, uint32_t triangleCount, ReadBitstream& input )
+{
+	Edge            edgeFifo[ EDGE_FIFO_SIZE ];
+	uint32_t        vertexFifo[ VERTEX_FIFO_SIZE ];
+
+	uint32_t        edgesRead    = 0;
+	uint32_t        verticesRead = 0;
+	uint32_t        newVertices  = 0;
+	const uint32_t* triangleEnd  = triangles + ( triangleCount * 3 );
+
+	// iterate through the triangles
+	for ( uint32_t* triangle = triangles; triangle < triangleEnd; triangle += 3 )
+	{
+		int  readVertex = 0;
+		bool skipFirstEdge = false;
+
+		while ( readVertex < 3 )
+		{
+			IndexBufferCodes code = static_cast< IndexBufferCodes >( input.Read( IB_CODE_BITS ) );
+
+			switch ( code )
+			{
+			case IB_NEW_VERTEX:
+
+				triangle[ readVertex ] =
+					vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] = newVertices;
+
+				++readVertex;
+				++verticesRead;
+				++newVertices;
+
+				break;
+
+			case IB_CACHED_EDGE:
+
+			{
+				assert( readVertex == 0 );
+
+				uint32_t    fifoIndex = input.Read( CACHED_EDGE_BITS );
+				const Edge& edge      = edgeFifo[ ( ( edgesRead - 1 ) - fifoIndex ) & EDGE_FIFO_MASK ];
+
+				triangle[ 0 ] = edge.second;
+				triangle[ 1 ] = edge.first;
+
+				readVertex    += 2;
+				skipFirstEdge  = true;
+
+				break;
+			}
+
+			case IB_CACHED_VERTEX:
+
+			{
+				uint32_t fifoIndex  = input.Read( CACHED_VERTEX_BITS );
+				
+				triangle[ readVertex ] = vertexFifo[ ( ( verticesRead - 1 ) - fifoIndex ) & VERTEX_FIFO_MASK ];
+
+				++readVertex;
+
+				break;
+			}
+
+			case IB_FREE_VERTEX:
+
+			{
+				uint32_t readByte       = 0;
+				uint32_t bitsToShift    = 0;
+				uint32_t relativeVertex = 0;
+
+				// V-int decoding, done inline.
+				do
+				{
+					readByte = input.Read( 8 );
+
+					relativeVertex |= ( readByte & 0x7F ) << bitsToShift;
+					bitsToShift += 7;
+
+				} while ( readByte & 0x80 );
+
+				uint32_t vertex = ( newVertices - 1 ) - relativeVertex;
+
+				triangle[ readVertex ] =
+					vertexFifo[ verticesRead & VERTEX_FIFO_MASK ] = vertex;
+
+				++verticesRead;
+				++readVertex;
+				break;
+			}
+			}
+		}
+
+		if ( !skipFirstEdge )
+		{
+			edgeFifo[ edgesRead & EDGE_FIFO_MASK ] = { triangle[ 0 ], triangle[ 1 ] };
+
+			++edgesRead;
+		}
+		else // first 2 verts were an edge case, so insert them into the vertex fifo. 
+		{
+			vertexFifo[ verticesRead & EDGE_FIFO_MASK ] = triangle[ 0 ];
+
+			++verticesRead;
+
+			vertexFifo[ verticesRead & EDGE_FIFO_MASK ] = triangle[ 1 ];
+
+			++verticesRead;
+		}
+
+		edgeFifo[ edgesRead & EDGE_FIFO_MASK ] = { triangle[ 1 ], triangle[ 2 ] };
+
+		++edgesRead;
+
+		edgeFifo[ edgesRead & EDGE_FIFO_MASK ] = { triangle[ 2 ], triangle[ 0 ] };
+
+		++edgesRead;
+	}
+}
--- a/3rdparty/ib-compress/IndexBufferDecompression.h
+++ b/3rdparty/ib-compress/IndexBufferDecompression.h
@ -0,0 +1,41 @@
+/*
+Copyright (c) 2014, Conor Stokes
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+#ifndef INDEX_BUFFER_DECOMPRESSION_H__
+#define INDEX_BUFFER_DECOMPRESSION_H__
+#pragma once
+
+#include <cstdint>
+
+class ReadBitstream;
+
+// Compress an index buffer, writing the results out to a bitstream and providing a vertex remapping (which will be in pre-transform cache optimised
+// order.
+// Parameters: 
+//     [out] triangles      - Triangle list index buffer (3 indices to vertices per triangle), output from the decompression.
+//     [in]  triangle count - The number of triangles to decompress.
+//     [in]  input          - The bit stream that the compressed data will be read from.
+void DecompressIndexBuffer( uint32_t* triangles, uint32_t triangleCount, ReadBitstream& input );
+
+#endif // -- INDEX_BUFFER_DECOMPRESSION_H__
--- a/3rdparty/ib-compress/IndexCompressionConstants.h
+++ b/3rdparty/ib-compress/IndexCompressionConstants.h
@ -0,0 +1,62 @@
+/*
+Copyright (c) 2014, Conor Stokes
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+#ifndef INDEX_COMPRESSION_CONSTANTS_H__
+#define INDEX_COMPRESSION_CONSTANTS_H__
+#pragma once
+
+// Constant fifo and code sizes.
+const int VERTEX_FIFO_SIZE   = 32;
+const int VERTEX_FIFO_MASK   = VERTEX_FIFO_SIZE - 1;
+const int EDGE_FIFO_SIZE     = 32;
+const int EDGE_FIFO_MASK     = EDGE_FIFO_SIZE - 1;
+const int CACHED_EDGE_BITS   = 5;
+const int CACHED_VERTEX_BITS = 5;
+const int IB_CODE_BITS       = 2;
+
+// Edge in the edge fifo.
+struct Edge
+{
+	uint32_t first;
+	uint32_t second;
+};
+
+// Codes 
+enum IndexBufferCodes
+{
+	// Represents a yet un-seen vertex.
+	IB_NEW_VERTEX    = 0,
+
+	// Represents 2 vertices on an edge in the edge fifo, which will be used as the first 2 vertices of the
+	// triangle.
+	IB_CACHED_EDGE   = 1,
+	
+	// Represents a vertex that has been seen recently and is still in the vertex fifo.
+	IB_CACHED_VERTEX = 2,
+
+	// Represents a vertex that has been seen 
+	IB_FREE_VERTEX   = 3
+};
+
+#endif 
--- a/3rdparty/ib-compress/ReadBitstream.h
+++ b/3rdparty/ib-compress/ReadBitstream.h
@ -0,0 +1,125 @@
+/*
+Copyright (c) 2014, Conor Stokes
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+#ifndef READ_BIT_STREAM_H__
+#define READ_BIT_STREAM_H__
+#pragma once
+
+#include <cstdint>
+
+#ifdef _MSC_VER
+
+#define RBS_INLINE __forceinline
+
+#else
+
+#define RBS_INLINE __attribute__((always_inline))
+
+#endif 
+
+// Very simple reader bitstream, note it does not do any overflow checking, etc.
+class ReadBitstream
+{
+public:
+
+	// Construct the bitstream with a fixed byte buffer (which should be padded out to multiples of 8 bytes, as we read in 8 byte chunks).
+	ReadBitstream( const uint8_t* buffer, size_t bufferSize );
+
+	~ReadBitstream() {}
+
+	// Read a number of bits
+	uint32_t Read( uint32_t bitcount );
+
+	// Get the buffer size of this in bytes
+	size_t Size() const { return m_bufferSize; }
+
+private:
+
+	uint64_t m_bitBuffer;
+
+	const uint8_t* m_buffer;
+	const uint8_t* m_cursor;
+
+	size_t m_bufferSize;
+	uint32_t m_bitsLeft;
+
+};
+
+inline ReadBitstream::ReadBitstream( const uint8_t* buffer, size_t bufferSize )
+{
+	m_cursor =
+		m_buffer = buffer;
+	m_bufferSize = bufferSize;
+
+	if ( bufferSize >= 8 )
+	{
+		m_bitBuffer = m_cursor[ 0 ];
+		m_bitBuffer |= static_cast< uint64_t >( m_cursor[ 1 ] ) << 8;
+		m_bitBuffer |= static_cast< uint64_t >( m_cursor[ 2 ] ) << 16;
+		m_bitBuffer |= static_cast< uint64_t >( m_cursor[ 3 ] ) << 24;
+		m_bitBuffer |= static_cast< uint64_t >( m_cursor[ 4 ] ) << 32;
+		m_bitBuffer |= static_cast< uint64_t >( m_cursor[ 5 ] ) << 40;
+		m_bitBuffer |= static_cast< uint64_t >( m_cursor[ 6 ] ) << 48;
+		m_bitBuffer |= static_cast< uint64_t >( m_cursor[ 7 ] ) << 56;
+
+		m_cursor += 8;
+		m_bitsLeft = 64;
+	}
+	else
+	{
+		m_bitsLeft = 0;
+	}
+}
+
+
+RBS_INLINE uint32_t ReadBitstream::Read( uint32_t bitCount )
+{
+	uint64_t mask = ( 1 << bitCount ) - 1;
+	uint32_t result = static_cast< uint32_t >( ( m_bitBuffer >> ( 64 - m_bitsLeft ) ) & mask );
+
+	if ( m_bitsLeft < bitCount )
+	{
+		m_bitBuffer = m_cursor[ 0 ];
+		m_bitBuffer |= static_cast< uint64_t >( m_cursor[ 1 ] ) << 8;
+		m_bitBuffer |= static_cast< uint64_t >( m_cursor[ 2 ] ) << 16;
+		m_bitBuffer |= static_cast< uint64_t >( m_cursor[ 3 ] ) << 24;
+		m_bitBuffer |= static_cast< uint64_t >( m_cursor[ 4 ] ) << 32;
+		m_bitBuffer |= static_cast< uint64_t >( m_cursor[ 5 ] ) << 40;
+		m_bitBuffer |= static_cast< uint64_t >( m_cursor[ 6 ] ) << 48;
+		m_bitBuffer |= static_cast< uint64_t >( m_cursor[ 7 ] ) << 56;
+
+		m_cursor += 8;
+
+		result |= static_cast< uint32_t >( m_bitBuffer << m_bitsLeft ) & mask;
+		m_bitsLeft = 64 - ( bitCount - m_bitsLeft );
+	}
+	else
+	{
+		m_bitsLeft -= bitCount;
+	}
+
+	return result;
+}
+
+#endif // -- READ_BIT_STREAM_H__
--- a/3rdparty/ib-compress/WriteBitstream.h
+++ b/3rdparty/ib-compress/WriteBitstream.h
@ -0,0 +1,161 @@
+/*
+Copyright (c) 2014, Conor Stokes
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef WRITE_BIT_STREAM_H__
+#define WRITE_BIT_STREAM_H__
+#pragma once
+
+#include <cstdint>
+#include <memory.h>
+
+#ifdef _MSC_VER
+#define WBS_INLINE __forceinline
+#else
+#define WBS_INLINE __attribute__((always_inline))
+#endif 
+
+// Very simple bitstream for writing that will grow to accomodate written bits. 
+class WriteBitstream
+{
+public:
+
+	// Construct the bit stream with an initial buffer capacity - should be a multiple of 8 and > 0 
+	WriteBitstream( size_t initialBufferCapacity = 16 )
+	{
+		m_bufferCursor =
+			m_buffer = new uint8_t[ initialBufferCapacity ];
+		m_bufferEnd = m_buffer + initialBufferCapacity;
+		m_size = 0;
+		m_bitsLeft = 64;
+		m_bitBuffer = 0;
+	}
+
+	~WriteBitstream()
+	{
+		delete[] m_buffer;
+	}
+
+	// Size in bits.
+	size_t Size() const { return m_size; }
+
+	// Write a number of bits to the stream.
+	void Write( uint32_t value, uint32_t bitCount );
+
+	// Get the size in bytes 
+	size_t ByteSize() const { return ( m_size + 7 ) >> 3; }
+
+	// Finish writing by flushing the buffer.
+	void Finish();
+
+	// Get the raw data for this buffer.
+	const uint8_t* RawData() const { return m_buffer; }
+
+private:
+
+	// If we need to grow the buffer.
+	void GrowBuffer();
+
+	// Not copyable 
+	WriteBitstream( const WriteBitstream& );
+
+	// Not assignable
+	WriteBitstream& operator=( const WriteBitstream& );
+
+	uint64_t  m_bitBuffer;
+	size_t    m_size;
+	uint8_t*  m_buffer;
+	uint8_t*  m_bufferCursor;
+	uint8_t*  m_bufferEnd;
+	uint32_t  m_bitsLeft;
+};
+
+WBS_INLINE void WriteBitstream::Write( uint32_t value, uint32_t bitCount )
+{
+	m_bitBuffer |= static_cast<uint64_t>( value ) << ( 64 - m_bitsLeft );
+
+	if ( bitCount > m_bitsLeft )
+	{
+		if ( m_bufferCursor > m_bufferEnd - 7 )
+		{
+			GrowBuffer();
+		}
+
+		m_bufferCursor[ 0 ] = m_bitBuffer & 0xFF;
+		m_bufferCursor[ 1 ] = ( m_bitBuffer >> 8 ) & 0xFF;
+		m_bufferCursor[ 2 ] = ( m_bitBuffer >> 16 ) & 0xFF;
+		m_bufferCursor[ 3 ] = ( m_bitBuffer >> 24 ) & 0xFF;
+		m_bufferCursor[ 4 ] = ( m_bitBuffer >> 32 ) & 0xFF;
+		m_bufferCursor[ 5 ] = ( m_bitBuffer >> 40 ) & 0xFF;
+		m_bufferCursor[ 6 ] = ( m_bitBuffer >> 48 ) & 0xFF;
+		m_bufferCursor[ 7 ] = ( m_bitBuffer >> 56 ) & 0xFF;
+
+		m_bufferCursor += 8;
+
+		m_bitBuffer = value >> ( m_bitsLeft );
+		m_bitsLeft = 64 - ( bitCount - m_bitsLeft );
+	}
+	else
+	{
+		m_bitsLeft -= bitCount;
+	}
+
+	m_size += bitCount;
+}
+
+inline void WriteBitstream::Finish()
+{
+	if ( m_bufferCursor > m_bufferEnd - 8 )
+	{
+		GrowBuffer();
+	}
+
+	m_bufferCursor[ 0 ] = m_bitBuffer & 0xFF;
+	m_bufferCursor[ 1 ] = ( m_bitBuffer >> 8 ) & 0xFF;
+	m_bufferCursor[ 2 ] = ( m_bitBuffer >> 16 ) & 0xFF;
+	m_bufferCursor[ 3 ] = ( m_bitBuffer >> 24 ) & 0xFF;
+	m_bufferCursor[ 4 ] = ( m_bitBuffer >> 32 ) & 0xFF;
+	m_bufferCursor[ 5 ] = ( m_bitBuffer >> 40 ) & 0xFF;
+	m_bufferCursor[ 6 ] = ( m_bitBuffer >> 48 ) & 0xFF;
+	m_bufferCursor[ 7 ] = ( m_bitBuffer >> 56 ) & 0xFF;
+
+	m_bufferCursor += 8;
+}
+
+WBS_INLINE void WriteBitstream::GrowBuffer()
+{
+	size_t    bufferSize = m_bufferEnd - m_buffer;
+	size_t    newBufferSize = bufferSize * 2;
+	size_t    bufferPosition = m_bufferCursor - m_buffer;
+	uint8_t*  newBuffer = new uint8_t[ newBufferSize ];
+
+	::memcpy( reinterpret_cast<void*>( newBuffer ), reinterpret_cast<void*>( m_buffer ), bufferSize );
+
+	delete[] m_buffer;
+
+	m_buffer = newBuffer;
+	m_bufferCursor = m_buffer + bufferPosition;
+	m_bufferEnd = m_buffer + newBufferSize;
+}
+
+#endif // -- WRITE_BIT_STREAM_H__
--- a/3rdparty/ib-compress/readme.md
+++ b/3rdparty/ib-compress/readme.md
@ -0,0 +1,47 @@
+# Vertex Cache Optimised Index Buffer Compression
+
+This is a small proof of concept for compressing and decompressing index buffer triangle lists. It's designed to maintain the order of the triangle list and perform best with a triangle list that has been vertex cache post-transform optimised (a pre-transform cache optimisation is done as part of the compression).
+
+It's also designed to be relatively lightweight, with a decompression throughput in the tens of millions of triangles per core.  It does not achieve state of the art levels of compression levels (which can be less than a bit per triangle, as well as providing good chances for vertex prediction), but it does maintain ordering of triangles and support arbitrary topologies. 
+
+There are some cases where the vertices within a triangle are re-ordered, but the general winding direction is maintained.
+
+## How does it work?
+
+The inspiration was a mix of Fabian Giesen's Simple loss-less index buffer compression http://fgiesen.wordpress.com/2013/12/14/simple-lossless-index-buffer-compression/ and
+the higher compression algorithms that make use of shared edges and re-order triangles. The idea was that there is probably a middle ground between them.
+
+The basic goals were:
+
+* Maintain the ordering of triangles, exploiting vertex cache optimal ordering.
+
+* Exploit recent triangle connectivity.
+
+* Make it fast, especially for decompression, without the need to maintain large extra data structures, like winged edge.
+
+* Make it simple enough to be easily understandable. 
+
+The vertex cache optimisation means that there will be quite a few vertices and edges shared between the next triangle in the list and the previous. We exploit this by maintaining two relatively small fixed size FIFOs, an edge FIFO and a vertex FIFO (not unlike the vertex cache itself, except we store recent indices).
+
+The compression relies on 4 codes: 
+
+1. A _new vertex_ code, for vertices that have not yet been seen. 
+
+2. A _cached edge_ code, for edges that have been seen recently. This code is followed by a relative index back into the edge FIFO.
+
+3. A _cached vertex_ code, for vertices that have been seen recently. This code is followed by a relative index back into the vertex FIFO.
+
+4. A _free vertex_ code, for vertices that have been seen, but not recently. This code is followed by a variable length integer encoding of the index relative to the most recent new vertex.
+
+Triangles can either consist of two codes, a cached edge followed by one of the vertex codes, or of 3 of the vertex codes. The most common codes in an optimised mesh are generally the cached edge and new vertex codes.
+
+Cached edges are always the first code in any triangle they appear in and may correspond to any edge in the original triangle (we check all the edges against the FIFO). This means that an individual triangle may have its vertices specified in a different order (but in the same winding direction) than the original uncompressed one.
+
+New vertex codes work because vertices are re-ordered to the order in which they appear in the mesh, meaning whenever we encounter a new vertex, we can just read and an internal counter to get
+the current index, incrementing it afterwards. This has the benefit of also meaning vertices are in pre-transform cache optimised order.
+
+## Does it actually work?
+
+That's a better question! While my thoughts were that in theory it would average around 11-12bits a triangle, the Stanford Armadillo mesh (optimised with Tom Forsyth's vertex cache optimisation algorithm), with 345944 triangles, compresses the index buffer down to 563122 bytes, which is more like 13 and the Stanford Bunny is 12.85bits or so. This is not anywhere near the state of the art in terms of compression (which get down to less than a bit), but that isn't the goal.
+
+Performance wise, with the code posted here, the Armadillo compresses in 18.5 milliseconds and decompresses in 6.6 milliseconds on average on my system. The Stanford bunny is more like 1.4 milliseconds to decompress, relatively.