diff --git a/README.md b/README.md
index 7b7bec46..0387b56d 100644
--- a/README.md
+++ b/README.md
@@ -131,6 +131,10 @@ Loading textures.
 
 ![example-06-bump](https://github.com/bkaradzic/bgfx/raw/master/examples/06-bump/screenshot.png)
 
+### 07-callback
+Implementing application specific callbacks for taking screen shots, caching
+OpenGL binary shaders, and video capture.
+
 Internals
 ---------
 
diff --git a/examples/00-helloworld/helloworld.cpp b/examples/00-helloworld/helloworld.cpp
index 6ad1b03b..7311a178 100644
--- a/examples/00-helloworld/helloworld.cpp
+++ b/examples/00-helloworld/helloworld.cpp
@@ -7,14 +7,9 @@
 #include <bx/bx.h>
 #include "../common/dbg.h"
 
-void fatalCb(bgfx::Fatal::Enum _code, const char* _str)
-{
-	DBG("%x: %s", _code, _str);
-}
-
 int _main_(int _argc, char** _argv)
 {
-	bgfx::init(fatalCb);
+	bgfx::init();
 	bgfx::reset(1280, 720);
 
 	// Enable debug text.
diff --git a/examples/01-cubes/cubes.cpp b/examples/01-cubes/cubes.cpp
index 0459c068..be105d9e 100644
--- a/examples/01-cubes/cubes.cpp
+++ b/examples/01-cubes/cubes.cpp
@@ -12,11 +12,6 @@
 #include <stdio.h>
 #include <string.h>
 
-void fatalCb(bgfx::Fatal::Enum _code, const char* _str)
-{
-	DBG("%x: %s", _code, _str);
-}
-
 struct PosColorVertex
 {
 	float m_x;
@@ -99,7 +94,7 @@ static const bgfx::Memory* loadShader(const char* _name)
 
 int _main_(int _argc, char** _argv)
 {
-	bgfx::init(fatalCb);
+	bgfx::init();
 	bgfx::reset(1280, 720);
 
 	// Enable debug text.
diff --git a/examples/02-metaballs/metaballs.cpp b/examples/02-metaballs/metaballs.cpp
index 9e2dbfbb..addc50aa 100644
--- a/examples/02-metaballs/metaballs.cpp
+++ b/examples/02-metaballs/metaballs.cpp
@@ -14,11 +14,6 @@
 
 bgfx::VertexDecl s_PosNormalColorDecl;
 
-void fatalCb(bgfx::Fatal::Enum _code, const char* _str)
-{
-	DBG("%x: %s", _code, _str);
-}
-
 struct PosNormalColorVertex
 {
 	float m_pos[3];
@@ -499,7 +494,7 @@ uint32_t triangulate(uint8_t* _result, uint32_t _stride, const float* __restrict
 
 int _main_(int _argc, char** _argv)
 {
-	bgfx::init(fatalCb);
+	bgfx::init();
 	bgfx::reset(1280, 720);
 
 	// Enable debug text.
diff --git a/examples/03-raymarch/raymarch.cpp b/examples/03-raymarch/raymarch.cpp
index 97fc31b3..4bb9de81 100644
--- a/examples/03-raymarch/raymarch.cpp
+++ b/examples/03-raymarch/raymarch.cpp
@@ -12,11 +12,6 @@
 #include <stdio.h>
 #include <string.h>
 
-void fatalCb(bgfx::Fatal::Enum _code, const char* _str)
-{
-	DBG("%x: %s", _code, _str);
-}
-
 struct PosColorTexCoord0Vertex
 {
 	float m_x;
@@ -178,7 +173,7 @@ void renderScreenSpaceQuad(uint32_t _view, bgfx::ProgramHandle _program, float _
 
 int _main_(int _argc, char** _argv)
 {
-	bgfx::init(fatalCb);
+	bgfx::init();
 	bgfx::reset(1280, 720);
 
 	// Enable debug text.
diff --git a/examples/04-mesh/mesh.cpp b/examples/04-mesh/mesh.cpp
index f35d0955..debc3ec1 100644
--- a/examples/04-mesh/mesh.cpp
+++ b/examples/04-mesh/mesh.cpp
@@ -16,11 +16,6 @@
 #include <string>
 #include <vector>
 
-void fatalCb(bgfx::Fatal::Enum _code, const char* _str)
-{
-	DBG("%x: %s", _code, _str);
-}
-
 static const char* s_shaderPath = NULL;
 static bool s_flipV = false;
 
@@ -279,7 +274,7 @@ struct Mesh
 
 int _main_(int _argc, char** _argv)
 {
-	bgfx::init(fatalCb);
+	bgfx::init();
 	bgfx::reset(1280, 720);
 
 	// Enable debug text.
diff --git a/examples/05-instancing/instancing.cpp b/examples/05-instancing/instancing.cpp
index cd9d82c8..ae01513c 100644
--- a/examples/05-instancing/instancing.cpp
+++ b/examples/05-instancing/instancing.cpp
@@ -12,11 +12,6 @@
 #include <stdio.h>
 #include <string.h>
 
-void fatalCb(bgfx::Fatal::Enum _code, const char* _str)
-{
-	DBG("%x: %s", _code, _str);
-}
-
 struct PosColorVertex
 {
 	float m_x;
@@ -99,7 +94,7 @@ static const bgfx::Memory* loadShader(const char* _name)
 
 int _main_(int _argc, char** _argv)
 {
-	bgfx::init(fatalCb);
+	bgfx::init();
 	bgfx::reset(1280, 720);
 
 	// Enable debug text.
diff --git a/examples/06-bump/bump.cpp b/examples/06-bump/bump.cpp
index 4464f1e2..a2282713 100644
--- a/examples/06-bump/bump.cpp
+++ b/examples/06-bump/bump.cpp
@@ -13,11 +13,6 @@
 #include <stdio.h>
 #include <string.h>
 
-void fatalCb(bgfx::Fatal::Enum _code, const char* _str)
-{
-	DBG("%x: %s", _code, _str);
-}
-
 struct PosNormalTangentTexcoordVertex
 {
 	float m_x;
@@ -255,7 +250,7 @@ void calcTangents(const uint16_t* _indices, uint32_t _numIndices, Ty* _vertices,
 
 int _main_(int _argc, char** _argv)
 {
-	bgfx::init(fatalCb);
+	bgfx::init();
 	bgfx::reset(1280, 720);
 
 	// Enable debug text.
diff --git a/examples/07-callback/callback.cpp b/examples/07-callback/callback.cpp
new file mode 100644
index 00000000..54d29c4b
--- /dev/null
+++ b/examples/07-callback/callback.cpp
@@ -0,0 +1,579 @@
+/*
+ * Copyright 2011-2012 Branimir Karadzic. All rights reserved.
+ * License: http://www.opensource.org/licenses/BSD-2-Clause
+ */
+
+#include <bgfx.h>
+#include <bx/bx.h>
+#include <bx/timer.h>
+#include <bx/readerwriter.h>
+#include <bx/string.h>
+#include "../common/dbg.h"
+#include "../common/math.h"
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <string.h>
+
+struct PosColorVertex
+{
+	float m_x;
+	float m_y;
+	float m_z;
+	uint32_t m_abgr;
+};
+
+static bgfx::VertexDecl s_PosColorDecl;
+
+static PosColorVertex s_cubeVertices[8] =
+{
+	{-1.0f,  1.0f,  1.0f, 0xff000000 },
+	{ 1.0f,  1.0f,  1.0f, 0xff0000ff },
+	{-1.0f, -1.0f,  1.0f, 0xff00ff00 },
+	{ 1.0f, -1.0f,  1.0f, 0xff00ffff },
+	{-1.0f,  1.0f, -1.0f, 0xffff0000 },
+	{ 1.0f,  1.0f, -1.0f, 0xffff00ff },
+	{-1.0f, -1.0f, -1.0f, 0xffffff00 },
+	{ 1.0f, -1.0f, -1.0f, 0xffffffff },
+};
+
+static const uint16_t s_cubeIndices[36] =
+{
+	0, 2, 1, // 0
+	1, 2, 3,
+	4, 5, 6, // 2
+	5, 7, 6,
+	0, 4, 2, // 4
+	4, 6, 2,
+	1, 3, 5, // 6
+	5, 3, 7,
+	0, 1, 4, // 8
+	4, 1, 5,
+	2, 6, 3, // 10
+	6, 7, 3,
+};
+
+static const char* s_shaderPath = NULL;
+
+static void shaderFilePath(char* _out, const char* _name)
+{
+	strcpy(_out, s_shaderPath);
+	strcat(_out, _name);
+	strcat(_out, ".bin");
+}
+
+long int fsize(FILE* _file)
+{
+	long int pos = ftell(_file);
+	fseek(_file, 0L, SEEK_END);
+	long int size = ftell(_file);
+	fseek(_file, pos, SEEK_SET);
+	return size;
+}
+
+static const bgfx::Memory* load(const char* _filePath)
+{
+	FILE* file = fopen(_filePath, "rb");
+	if (NULL != file)
+	{
+		uint32_t size = (uint32_t)fsize(file);
+		const bgfx::Memory* mem = bgfx::alloc(size+1);
+		size_t ignore = fread(mem->data, 1, size, file);
+		BX_UNUSED(ignore);
+		fclose(file);
+		mem->data[mem->size-1] = '\0';
+		return mem;
+	}
+
+	return NULL;
+}
+
+static const bgfx::Memory* loadShader(const char* _name)
+{
+	char filePath[512];
+	shaderFilePath(filePath, _name);
+	return load(filePath);
+}
+
+void saveTga(const char* _filePath, uint32_t _width, uint32_t _height, uint32_t _srcPitch, const void* _src, bool _grayscale, bool _yflip)
+{
+	FILE* file = fopen(_filePath, "wb");
+	if ( NULL != file )
+	{
+		uint8_t type = _grayscale ? 3 : 2;
+		uint8_t bpp = _grayscale ? 8 : 32;
+
+		putc(0, file);
+		putc(0, file);
+		putc(type, file);
+		putc(0, file); 
+		putc(0, file);
+		putc(0, file); 
+		putc(0, file);
+		putc(0, file);
+		putc(0, file); 
+		putc(0, file);
+		putc(0, file); 
+		putc(0, file);
+		putc(_width&0xff, file);
+		putc( (_width>>8)&0xff, file);
+		putc(_height&0xff, file);
+		putc( (_height>>8)&0xff, file);
+		putc(bpp, file);
+		putc(32, file);
+
+		uint32_t dstPitch = _width*bpp/8;
+		if (_yflip)
+		{
+			uint8_t* data = (uint8_t*)_src + _srcPitch*_height - _srcPitch;
+			for (uint32_t yy = 0; yy < _height; ++yy)
+			{
+				fwrite(data, dstPitch, 1, file);
+				data -= _srcPitch;
+			}
+		}
+		else
+		{
+			uint8_t* data = (uint8_t*)_src;
+			for (uint32_t yy = 0; yy < _height; ++yy)
+			{
+				fwrite(data, dstPitch, 1, file);
+				data += _srcPitch;
+			}
+		}
+
+		fclose(file);
+	}
+}
+
+// Simple AVI writer. VideoLAN and VirtualDub can decode it.
+// Needs some bits to get jiggled to work with other players. But it's good
+// enough for an example.
+struct AviWriter
+{
+	AviWriter()
+		: m_frame(NULL)
+		, m_frameSize(0)
+		, m_width(0)
+		, m_height(0)
+		, m_yflip(false)
+	{
+	}
+
+	bool open(const char* _filePath, uint32_t _width, uint32_t _height, uint32_t _fps, bool _yflip)
+	{
+		if (0 != m_writer.open(_filePath) )
+		{
+			return false;
+		}
+
+		m_frameSize = _width * _height * 3;
+		m_frame = new uint8_t[m_frameSize + 8];
+		m_width = _width;
+		m_height = _height;
+		
+		// Bgfx returns _yflip true for OpenGL since bottom left corner is 0, 0. In D3D top left corner
+		// is 0, 0. DIB expect OpenGL style coordinates, so this is inverted logic for AVI writer.
+		m_yflip = !_yflip;
+
+		bx::StaticMemoryBlockWriter mem(m_frame, 8);
+		// Stream Data (LIST 'movi' Chunk) http://msdn.microsoft.com/en-us/library/ms899496.aspx
+		bx::write(&mem, BX_MAKEFOURCC('0', '0', 'd', 'b') );
+		bx::write(&mem, m_frameSize);
+
+		bx::write(&m_writer, BX_MAKEFOURCC('R', 'I', 'F', 'F') );
+		bx::write(&m_writer, UINT32_C(0) );
+
+		bx::write(&m_writer, BX_MAKEFOURCC('A', 'V', 'I', ' ') );
+
+		// AVI RIFF Form http://msdn.microsoft.com/en-us/library/ms899422.aspx
+		bx::write(&m_writer, BX_MAKEFOURCC('L', 'I', 'S', 'T') );
+		bx::write(&m_writer, UINT32_C(196) );
+		bx::write(&m_writer, BX_MAKEFOURCC('h', 'd', 'r', 'l') );
+
+		// AVI Main Header http://msdn.microsoft.com/en-us/library/ms779632.aspx
+		bx::write(&m_writer, BX_MAKEFOURCC('a', 'v', 'i', 'h') );
+		bx::write(&m_writer, UINT32_C(56) );
+		bx::write(&m_writer, UINT32_C(0) );      // dwMicroSecPerFrame
+		bx::write(&m_writer, UINT32_C(0) );      // dwMaxBytesPerSec
+		bx::write(&m_writer, UINT32_C(0) );      // dwPaddingGranularity
+		bx::write(&m_writer, UINT32_C(0) );      // dwFlags
+		bx::write(&m_writer, UINT32_C(0) );      // dwTotalFrames
+		bx::write(&m_writer, UINT32_C(0) );      // dwInitialFrames
+		bx::write(&m_writer, UINT32_C(1) );      // dwStreams
+		bx::write(&m_writer, UINT32_C(0) );      // dwSuggestedBufferSize
+		bx::write(&m_writer, _width);            // dwWidth
+		bx::write(&m_writer, _height);           // dwHeight
+		bx::write(&m_writer, UINT32_C(0) );      // dwReserved0
+		bx::write(&m_writer, UINT32_C(0) );      // dwReserved1
+		bx::write(&m_writer, UINT32_C(0) );      // dwReserved2
+		bx::write(&m_writer, UINT32_C(0) );      // dwReserved3
+
+		bx::write(&m_writer, BX_MAKEFOURCC('L', 'I', 'S', 'T') );
+		bx::write(&m_writer, UINT32_C(120) );
+		bx::write(&m_writer, BX_MAKEFOURCC('s', 't', 'r', 'l') );
+
+		// AVISTREAMHEADER Structure http://msdn.microsoft.com/en-us/library/ms779638.aspx
+		bx::write(&m_writer, BX_MAKEFOURCC('s', 't', 'r', 'h') );
+		bx::write(&m_writer, UINT32_C(56) );
+		// AVI Stream Headers http://msdn.microsoft.com/en-us/library/ms899423.aspx
+		bx::write(&m_writer, BX_MAKEFOURCC('v', 'i', 'd', 's') ); // fccType
+		bx::write(&m_writer, BX_MAKEFOURCC('D', 'I', 'B', ' ') ); // fccHandler
+		bx::write(&m_writer, UINT32_C(0) );      // dwFlags
+		bx::write(&m_writer, UINT16_C(0) );      // wPriority
+		bx::write(&m_writer, UINT16_C(0) );      // wLanguage
+		bx::write(&m_writer, UINT32_C(0) );      // dwInitialFrames
+		bx::write(&m_writer, UINT32_C(1000) );   // dwScale
+		bx::write(&m_writer, 1000*_fps);         // dwRate
+		bx::write(&m_writer, UINT32_C(0) );      // dwStart
+		bx::write(&m_writer, UINT32_C(0) );      // dwLength
+		bx::write(&m_writer, UINT32_C(0) );      // dwSuggestedBufferSize
+		bx::write(&m_writer, UINT32_C(0) );      // dwQuality
+		bx::write(&m_writer, UINT32_C(0) );      // dwSampleSize
+		bx::write(&m_writer, INT16_C(0) );       // rcFrame.left
+		bx::write(&m_writer, INT16_C(0) );       // rcFrame.top
+		bx::write(&m_writer, INT16_C(0) );       // rcFrame.right
+		bx::write(&m_writer, INT16_C(0) );       // rcFrame.bottom
+
+		bx::write(&m_writer, BX_MAKEFOURCC('s', 't', 'r', 'f') );
+		bx::write(&m_writer, UINT32_C(44) );
+
+		// BITMAPINFOHEADER structure http://msdn.microsoft.com/en-us/library/windows/desktop/dd318229%28v=vs.85%29.aspx
+		bx::write(&m_writer, UINT32_C(40) );     // biSize
+		bx::write(&m_writer, _width);            // biWidth
+		bx::write(&m_writer, _height);           // biHeight
+		bx::write(&m_writer, UINT16_C(1) );      // biPlanes
+		bx::write(&m_writer, UINT16_C(24) );     // biBitCount
+		bx::write(&m_writer, UINT32_C(0) );      // biCompression
+		bx::write(&m_writer, UINT32_C(0) );      // biSizeImage
+		bx::write(&m_writer, UINT32_C(0) );      // biXPelsPerMeter
+		bx::write(&m_writer, UINT32_C(0) );      // biYPelsPerMeter
+		bx::write(&m_writer, UINT32_C(0) );      // biClrUsed
+		bx::write(&m_writer, UINT32_C(0) );      // biClrImportant
+		bx::write(&m_writer, UINT32_C(0) );
+
+		bx::write(&m_writer, BX_MAKEFOURCC('L', 'I', 'S', 'T') );
+		bx::write(&m_writer, UINT32_C(0) );
+		bx::write(&m_writer, BX_MAKEFOURCC('m', 'o', 'v', 'i') );
+
+		return true;
+	}
+
+	void close()
+	{
+		if (NULL != m_frame)
+		{
+			m_writer.close();
+
+			delete [] m_frame;
+			m_frame = NULL;
+			m_frameSize = 0;
+		}
+	}
+
+	void frame(const void* _data)
+	{
+		if (NULL != m_frame)
+		{
+			uint32_t width = m_width;
+			uint32_t height = m_height;
+
+			uint8_t* bgr = &m_frame[8];
+
+			if (m_yflip)
+			{
+				for (uint32_t yy = 0; yy < height; ++yy)
+				{
+					const uint8_t* bgra = (const uint8_t*)_data + (height-1-yy)*width*4;
+
+					for (uint32_t ii = 0; ii < width; ++ii)
+					{
+						bgr[0] = bgra[0];
+						bgr[1] = bgra[1];
+						bgr[2] = bgra[2];
+						bgr += 3;
+						bgra += 4;
+					}
+				}
+			}
+			else
+			{
+				const uint8_t* bgra = (const uint8_t*)_data;
+				for (uint32_t ii = 0, num = m_frameSize/3; ii < num; ++ii)
+				{
+					bgr[0] = bgra[0];
+					bgr[1] = bgra[1];
+					bgr[2] = bgra[2];
+					bgr += 3;
+					bgra += 4;
+				}
+			}
+
+			bx::write(&m_writer, m_frame, m_frameSize+8);
+		}
+	}
+
+	bx::CrtFileWriter m_writer;
+	uint8_t* m_frame;
+	uint32_t m_frameSize;
+	uint32_t m_width;
+	uint32_t m_height;
+	bool m_yflip;
+};
+
+struct BgfxCallback : public bgfx::CallbackI
+{
+	virtual ~BgfxCallback()
+	{
+	}
+
+	virtual void fatal(bgfx::Fatal::Enum _code, const char* _str) BX_OVERRIDE
+	{
+		dbgPrintf("Fatal error: 0x%08x: %s", _code, _str);
+		abort();
+	}
+
+	virtual uint32_t cacheReadSize(uint64_t _id) BX_OVERRIDE
+	{
+		char filePath[256];
+		bx::snprintf(filePath, sizeof(filePath), "%016" PRIx64, _id);
+		FILE* file = fopen(filePath, "rb");
+		if (NULL != file)
+		{
+			uint32_t size = fsize(file);
+			fclose(file);
+			return size;
+		}
+
+		return 0;
+	}
+
+	virtual bool cacheRead(uint64_t _id, void* _data, uint32_t _size) BX_OVERRIDE
+	{
+		char filePath[256];
+		bx::snprintf(filePath, sizeof(filePath), "%016" PRIx64, _id);
+		FILE* file = fopen(filePath, "rb");
+		if (NULL != file)
+		{
+			size_t result = fread(_data, 1, _size, file);
+			fclose(file);
+			return result == _size;
+		}
+
+		return false;
+	}
+
+	virtual void cacheWrite(uint64_t _id, const void* _data, uint32_t _size) BX_OVERRIDE
+	{
+		char filePath[256];
+		bx::snprintf(filePath, sizeof(filePath), "%016" PRIx64, _id);
+
+		FILE* file = fopen(filePath, "wb");
+		if (NULL != file)
+		{
+			fwrite(_data, 1, _size, file);
+			fclose(file);
+		}
+	}
+
+	virtual void screenShot(const char* _filePath, uint32_t _width, uint32_t _height, uint32_t _pitch, const void* _data, uint32_t /*_size*/, bool _yflip) BX_OVERRIDE
+	{
+		saveTga(_filePath, _width, _height, _pitch, _data, false, _yflip);
+	}
+
+	virtual void captureBegin(uint32_t _width, uint32_t _height, uint32_t _pitch, bgfx::TextureFormat::Enum /*_format*/, bool _yflip) BX_OVERRIDE
+	{
+		m_writer = new AviWriter;
+		if (!m_writer->open("capture.avi", _width, _height, 60, _yflip) )
+		{
+			delete m_writer;
+			m_writer = NULL;
+		}
+	}
+
+	virtual void captureEnd() BX_OVERRIDE
+	{
+		if (NULL != m_writer)
+		{
+			m_writer->close();
+		}
+	}
+
+	virtual void captureFrame(const void* _data, uint32_t /*_size*/) BX_OVERRIDE
+	{
+		if (NULL != m_writer)
+		{
+			m_writer->frame(_data);
+		}
+	}
+
+	AviWriter* m_writer;
+};
+
+int _main_(int _argc, char** _argv)
+{
+	BgfxCallback callback;
+
+	bgfx::init(&callback);
+	bgfx::reset(1280, 720, BGFX_RESET_CAPTURE);
+
+	// Enable debug text.
+	bgfx::setDebug(BGFX_DEBUG_TEXT);
+
+	// Set view 0 default viewport.
+	bgfx::setViewRect(0, 0, 0, 1280, 720);
+
+	// Set view 0 clear state.
+	bgfx::setViewClear(0
+		, BGFX_CLEAR_COLOR_BIT|BGFX_CLEAR_DEPTH_BIT
+		, 0x303030ff
+		, 1.0f
+		, 0
+		);
+
+	// Setup root path for binary shaders. Shader binaries are different 
+	// for each renderer.
+	switch (bgfx::getRendererType() )
+	{
+	default:
+	case bgfx::RendererType::Direct3D9:
+		s_shaderPath = "shaders/dx9/";
+		break;
+
+	case bgfx::RendererType::Direct3D11:
+		s_shaderPath = "shaders/dx11/";
+		break;
+
+	case bgfx::RendererType::OpenGL:
+		s_shaderPath = "shaders/glsl/";
+		break;
+
+	case bgfx::RendererType::OpenGLES2:
+	case bgfx::RendererType::OpenGLES3:
+		s_shaderPath = "shaders/gles/";
+		break;
+	}
+
+	// Create vertex stream declaration.
+	s_PosColorDecl.begin();
+	s_PosColorDecl.add(bgfx::Attrib::Position, 3, bgfx::AttribType::Float);
+	s_PosColorDecl.add(bgfx::Attrib::Color0, 4, bgfx::AttribType::Uint8, true);
+	s_PosColorDecl.end();
+
+	const bgfx::Memory* mem;
+
+	// Create static vertex buffer.
+	mem = bgfx::makeRef(s_cubeVertices, sizeof(s_cubeVertices) );
+	bgfx::VertexBufferHandle vbh = bgfx::createVertexBuffer(mem, s_PosColorDecl);
+
+	// Create static index buffer.
+	mem = bgfx::makeRef(s_cubeIndices, sizeof(s_cubeIndices) );
+	bgfx::IndexBufferHandle ibh = bgfx::createIndexBuffer(mem);
+
+	// Load vertex shader.
+	mem = loadShader("vs_callback");
+	bgfx::VertexShaderHandle vsh = bgfx::createVertexShader(mem);
+
+	// Load fragment shader.
+	mem = loadShader("fs_callback");
+	bgfx::FragmentShaderHandle fsh = bgfx::createFragmentShader(mem);
+
+	// Create program from shaders.
+	bgfx::ProgramHandle program = bgfx::createProgram(vsh, fsh);
+
+	// We can destroy vertex and fragment shader here since
+	// their reference is kept inside bgfx after calling createProgram.
+	// Vertex and fragment shader will be destroyed once program is
+	// destroyed.
+	bgfx::destroyVertexShader(vsh);
+	bgfx::destroyFragmentShader(fsh);
+
+	float time = 0.0f;
+
+	// 5 second 60Hz video
+	for (uint32_t frame = 0; frame < 300; ++frame)
+	{
+		// This dummy draw call is here to make sure that view 0 is cleared
+		// if no other draw calls are submitted to view 0.
+		bgfx::submit(0);
+
+		int64_t now = bx::getHPCounter();
+		static int64_t last = now;
+		const int64_t frameTime = now - last;
+		last = now;
+		const double freq = double(bx::getHPFrequency() );
+		const double toMs = 1000.0/freq;
+
+		// Use debug font to print information about this example.
+		bgfx::dbgTextClear();
+		bgfx::dbgTextPrintf( 0, 1, 0x4f, "bgfx/examples/07-callback");
+		bgfx::dbgTextPrintf( 0, 2, 0x6f, "Description: Implementing application specific callbacks for taking screen shots,");
+		bgfx::dbgTextPrintf(13, 3, 0x6f, "caching OpenGL binary shaders, and video capture.");
+		bgfx::dbgTextPrintf( 0, 4, 0x0f, "Frame: % 7.3f[ms]", double(frameTime)*toMs);
+
+		float at[3] = { 0.0f, 0.0f, 0.0f };
+		float eye[3] = { 0.0f, 0.0f, -35.0f };
+		
+		float view[16];
+		float proj[16];
+		mtxLookAt(view, eye, at);
+		mtxProj(proj, 60.0f, 16.0f/9.0f, 0.1f, 100.0f);
+
+		// Set view and projection matrix for view 0.
+		bgfx::setViewTransform(0, view, proj);
+
+		time += 1.0f/60.0f;
+
+		// Submit 11x11 cubes.
+		for (uint32_t yy = 0; yy < 11; ++yy)
+		{
+			for (uint32_t xx = 0; xx < 11-yy; ++xx)
+			{
+				float mtx[16];
+				mtxRotateXY(mtx, time + xx*0.21f, time + yy*0.37f);
+				mtx[12] = -15.0f + float(xx)*3.0f;
+				mtx[13] = -15.0f + float(yy)*3.0f;
+				mtx[14] = 0.0f;
+
+				// Set model matrix for rendering.
+				bgfx::setTransform(mtx);
+
+				// Set vertex and fragment shaders.
+				bgfx::setProgram(program);
+
+				// Set vertex and index buffer.
+				bgfx::setVertexBuffer(vbh);
+				bgfx::setIndexBuffer(ibh);
+
+				// Set render states.
+				bgfx::setState(BGFX_STATE_RGB_WRITE
+					|BGFX_STATE_DEPTH_WRITE
+					|BGFX_STATE_DEPTH_TEST_LESS
+					);
+
+				// Submit primitive for rendering to view 0.
+				bgfx::submit(0);
+			}
+		}
+
+		// Take screenshot at frame 150.
+		if (150 == frame)
+		{
+			bgfx::saveScreenShot("frame150.tga");
+		}
+
+		// Advance to next frame. Rendering thread will be kicked to 
+		// process submitted rendering primitives.
+		bgfx::frame();
+	}
+
+	// Cleanup.
+	bgfx::destroyIndexBuffer(ibh);
+	bgfx::destroyVertexBuffer(vbh);
+	bgfx::destroyProgram(program);
+
+	// Shutdown bgfx.
+	bgfx::shutdown();
+
+	return 0;
+}
diff --git a/examples/07-callback/fs_callback.sc b/examples/07-callback/fs_callback.sc
new file mode 100644
index 00000000..7e6e0aeb
--- /dev/null
+++ b/examples/07-callback/fs_callback.sc
@@ -0,0 +1,17 @@
+$input v_world, v_color0
+
+/*
+ * Copyright 2011-2012 Branimir Karadzic. All rights reserved.
+ * License: http://www.opensource.org/licenses/BSD-2-Clause
+ */
+
+#include "../common/common.sh"
+
+void main()
+{
+	vec3 normal = normalize(cross(dFdx(v_world), dFdy(v_world) ) );
+	vec3 lightDir = vec3(0.0, 0.0, 1.0);
+	float ndotl = max(dot(normal, lightDir), 0.0);
+	float spec = pow(ndotl, 30.0);
+	gl_FragColor = pow(pow(v_color0, vec4_splat(2.2) ) * ndotl + spec, vec4_splat(1.0/2.2) );
+}
diff --git a/examples/07-callback/makefile b/examples/07-callback/makefile
new file mode 100644
index 00000000..3bb2f87f
--- /dev/null
+++ b/examples/07-callback/makefile
@@ -0,0 +1,17 @@
+#
+# Copyright 2011-2012 Branimir Karadzic. All rights reserved.
+# License: http://www.opensource.org/licenses/BSD-2-Clause
+#
+
+BGFX_DIR=../..
+RUNTIME_DIR=$(BGFX_DIR)/examples/runtime
+BUILD_DIR=../../.build
+
+include $(BGFX_DIR)/premake/shader.mk
+
+rebuild:
+	@make -s --no-print-directory TARGET=0 clean all
+	@make -s --no-print-directory TARGET=1 clean all
+	@make -s --no-print-directory TARGET=2 clean all
+	@make -s --no-print-directory TARGET=3 clean all
+	@make -s --no-print-directory TARGET=4 clean all
diff --git a/examples/07-callback/varying.def.sc b/examples/07-callback/varying.def.sc
new file mode 100644
index 00000000..3f1a2779
--- /dev/null
+++ b/examples/07-callback/varying.def.sc
@@ -0,0 +1,5 @@
+vec3 v_world     : TEXCOORD0 = vec3(0.0, 0.0, 0.0);
+vec4 v_color0    : COLOR0    = vec4(1.0, 0.0, 0.0, 1.0);
+
+vec3 a_position  : POSITION;
+vec4 a_color0    : COLOR0;
diff --git a/examples/07-callback/vs_callback.sc b/examples/07-callback/vs_callback.sc
new file mode 100644
index 00000000..273f408f
--- /dev/null
+++ b/examples/07-callback/vs_callback.sc
@@ -0,0 +1,16 @@
+$input a_position, a_color0
+$output v_world, v_color0
+
+/*
+ * Copyright 2011-2012 Branimir Karadzic. All rights reserved.
+ * License: http://www.opensource.org/licenses/BSD-2-Clause
+ */
+
+#include "../common/common.sh"
+
+void main()
+{
+	gl_Position = mul(u_modelViewProj, vec4(a_position, 1.0) );
+	v_world = mul(u_model, vec4(a_position, 1.0) ).xyz;
+	v_color0 = a_color0;
+}
diff --git a/examples/runtime/shaders/dx11/fs_callback.bin b/examples/runtime/shaders/dx11/fs_callback.bin
new file mode 100644
index 00000000..44d8ee50
Binary files /dev/null and b/examples/runtime/shaders/dx11/fs_callback.bin differ
diff --git a/examples/runtime/shaders/dx11/vs_callback.bin b/examples/runtime/shaders/dx11/vs_callback.bin
new file mode 100644
index 00000000..8052ef33
Binary files /dev/null and b/examples/runtime/shaders/dx11/vs_callback.bin differ
diff --git a/examples/runtime/shaders/dx9/fs_callback.bin b/examples/runtime/shaders/dx9/fs_callback.bin
new file mode 100644
index 00000000..3fbd98eb
Binary files /dev/null and b/examples/runtime/shaders/dx9/fs_callback.bin differ
diff --git a/examples/runtime/shaders/dx9/vs_callback.bin b/examples/runtime/shaders/dx9/vs_callback.bin
new file mode 100644
index 00000000..35c5a9ce
Binary files /dev/null and b/examples/runtime/shaders/dx9/vs_callback.bin differ
diff --git a/examples/runtime/shaders/gles/fs_callback.bin b/examples/runtime/shaders/gles/fs_callback.bin
new file mode 100644
index 00000000..396f76fc
Binary files /dev/null and b/examples/runtime/shaders/gles/fs_callback.bin differ
diff --git a/examples/runtime/shaders/gles/vs_callback.bin b/examples/runtime/shaders/gles/vs_callback.bin
new file mode 100644
index 00000000..4dd35692
Binary files /dev/null and b/examples/runtime/shaders/gles/vs_callback.bin differ
diff --git a/examples/runtime/shaders/glsl/fs_callback.bin b/examples/runtime/shaders/glsl/fs_callback.bin
new file mode 100644
index 00000000..bdd1bd4e
Binary files /dev/null and b/examples/runtime/shaders/glsl/fs_callback.bin differ
diff --git a/examples/runtime/shaders/glsl/vs_callback.bin b/examples/runtime/shaders/glsl/vs_callback.bin
new file mode 100644
index 00000000..6bbb4115
Binary files /dev/null and b/examples/runtime/shaders/glsl/vs_callback.bin differ
diff --git a/include/bgfx.h b/include/bgfx.h
index 2067d9f4..f91776bc 100644
--- a/include/bgfx.h
+++ b/include/bgfx.h
@@ -220,6 +220,7 @@
 #define BGFX_RESET_MSAA_SHIFT           4
 #define BGFX_RESET_MSAA_MASK            UINT32_C(0x00000070)
 #define BGFX_RESET_VSYNC                UINT32_C(0x00000080)
+#define BGFX_RESET_CAPTURE              UINT32_C(0x00000100)
 
 ///
 #define BGFX_HANDLE(_name) struct _name { uint16_t idx; }
@@ -301,9 +302,9 @@ namespace bgfx
 			Dxt5,
 			Unknown,
 			L8,
-			XRGB8,
-			ARGB8,
-			ABGR16,
+			BGRX8,
+			BGRA8,
+			RGBA16,
 			R5G6B5,
 			RGBA4,
 			RGB5A1,
@@ -347,10 +348,47 @@ namespace bgfx
 	BGFX_HANDLE(VertexDeclHandle);
 	BGFX_HANDLE(VertexShaderHandle);
 
-	typedef void (*FatalFn)(Fatal::Enum _code, const char* _str);
 	typedef void* (*ReallocFn)(void* _ptr, size_t _size);
 	typedef void (*FreeFn)(void* _ptr);
-	typedef void (*CacheFn)(uint64_t _id, bool _store, void* _data, uint32_t& _length);
+
+	/// Callback interface to implement application specific behavior.
+	/// Cached items are currently used only when for OpenGL binary shaders.
+	///
+	/// NOTE: Callback functions can be called from any thread.
+	struct CallbackI
+	{
+		virtual ~CallbackI() = 0;
+
+		/// Called on unrecoverable error. It's not safe to continue, inform
+		/// user and terminate application from this call.
+		virtual void fatal(Fatal::Enum _code, const char* _str) = 0;
+
+		/// Return size of for cached item. Return 0 if no cached item was
+		/// found.
+		virtual uint32_t cacheReadSize(uint64_t _id) = 0;
+
+		/// Read cached item.
+		virtual bool cacheRead(uint64_t _id, void* _data, uint32_t _size) = 0;
+
+		/// Write cached item.
+		virtual void cacheWrite(uint64_t _id, const void* _data, uint32_t _size) = 0;
+
+		/// Screenshot captured. Screenshot format is always 4-byte BGRA.
+		virtual void screenShot(const char* _filePath, uint32_t _width, uint32_t _height, uint32_t _pitch, const void* _data, uint32_t _size, bool _yflip) = 0;
+
+		/// Called when capture begins.
+		virtual void captureBegin(uint32_t _width, uint32_t _height, uint32_t _pitch, bgfx::TextureFormat::Enum _format, bool _yflip) = 0;
+
+		/// Called when capture ends.
+		virtual void captureEnd() = 0;
+
+		/// Captured frame.
+		virtual void captureFrame(const void* _data, uint32_t _size) = 0;
+	};
+
+	inline CallbackI::~CallbackI()
+	{
+	}
 
 	struct Memory
 	{
@@ -394,6 +432,7 @@ namespace bgfx
 		uint16_t depth;
 	};
 
+	/// Vertex declaration.
 	struct VertexDecl
 	{
 		/// Start VertexDecl.
@@ -430,7 +469,7 @@ namespace bgfx
 	RendererType::Enum getRendererType();
 
 	/// Initialize bgfx library.
-	void init(FatalFn _fatal = NULL, ReallocFn _realloc = NULL, FreeFn _free = NULL, CacheFn _cache = NULL);
+	void init(CallbackI* _callback = NULL, ReallocFn _realloc = NULL, FreeFn _free = NULL);
 
 	/// Shutdown bgfx library.
 	void shutdown();
diff --git a/premake/example-07-callback.lua b/premake/example-07-callback.lua
new file mode 100644
index 00000000..fde65d48
--- /dev/null
+++ b/premake/example-07-callback.lua
@@ -0,0 +1,36 @@
+project "example-07-callback"
+	uuid "acc53bbc-52f0-11e2-9781-ad8edd4b7d02"
+	kind "WindowedApp"
+
+	debugdir (BGFX_DIR .. "examples/runtime/")
+
+	includedirs {
+		BX_DIR .. "include",
+		BGFX_DIR .. "include",
+	}
+
+	files {
+		BGFX_DIR .. "examples/common/**.cpp",
+		BGFX_DIR .. "examples/common/**.h",
+		BGFX_DIR .. "examples/07-callback/**.cpp",
+		BGFX_DIR .. "examples/07-callback/**.h",
+	}
+
+	links {
+		"bgfx",
+	}
+
+	configuration { "nacl" }
+		targetextension ".nexe"
+
+	configuration { "nacl", "Release" }
+		postbuildcommands {
+			"@echo Stripping symbols.",
+			"@$(NACL)/bin/x86_64-nacl-strip -s \"$(TARGET)\""
+		}
+
+	configuration { "linux" }
+		links {
+			"GL",
+			"pthread",
+		}
diff --git a/premake/premake4.lua b/premake/premake4.lua
index ba4ef7b4..5f58faec 100644
--- a/premake/premake4.lua
+++ b/premake/premake4.lua
@@ -40,3 +40,4 @@ dofile "example-03-raymarch.lua"
 dofile "example-04-mesh.lua"
 dofile "example-05-instancing.lua"
 dofile "example-06-bump.lua"
+dofile "example-07-callback.lua"
diff --git a/src/bgfx.cpp b/src/bgfx.cpp
index 03d5f895..c05c8909 100644
--- a/src/bgfx.cpp
+++ b/src/bgfx.cpp
@@ -37,14 +37,56 @@ namespace bgfx
 #	define BGFX_CHECK_RENDER_THREAD()
 #endif // BGFX_CONFIG_MULTITHREADED
 
-	void fatalStub(Fatal::Enum _code, const char* _str)
+	struct CallbackStub : public CallbackI
 	{
-		BX_TRACE("0x%08x: %s", _code, _str);
-		BX_UNUSED(_code);
-		BX_UNUSED(_str);
-	}
+		virtual ~CallbackStub()
+		{
+		}
 
-	void* reallocStub(void* _ptr, size_t _size)
+		virtual void fatal(Fatal::Enum _code, const char* _str) BX_OVERRIDE
+		{
+			BX_TRACE("0x%08x: %s", _code, _str);
+			BX_UNUSED(_code);
+			BX_UNUSED(_str);
+			abort();
+		}
+
+		virtual uint32_t cacheReadSize(uint64_t /*_id*/) BX_OVERRIDE
+		{
+			return 0;
+		}
+
+		virtual bool cacheRead(uint64_t /*_id*/, void* /*_data*/, uint32_t /*_size*/) BX_OVERRIDE
+		{
+			return false;
+		}
+
+		virtual void cacheWrite(uint64_t /*_id*/, const void* /*_data*/, uint32_t /*_size*/) BX_OVERRIDE
+		{
+		}
+
+		virtual void screenShot(const char* _filePath, uint32_t _width, uint32_t _height, uint32_t _pitch, const void* _data, uint32_t /*_size*/, bool _yflip) BX_OVERRIDE
+		{
+			saveTga(_filePath, _width, _height, _pitch, _data, false, _yflip);
+		}
+
+		virtual void captureBegin(uint32_t /*_width*/, uint32_t /*_height*/, uint32_t /*_pitch*/, bgfx::TextureFormat::Enum /*_format*/, bool /*_yflip*/) BX_OVERRIDE
+		{
+			BX_TRACE("Warning: using capture without callback (a.k.a. pointless).");
+		}
+
+		virtual void captureEnd() BX_OVERRIDE
+		{
+		}
+
+		virtual void captureFrame(const void* /*_data*/, uint32_t /*_size*/) BX_OVERRIDE
+		{
+		}
+	};
+	
+	static CallbackStub s_callbackStub;
+
+	static void* reallocStub(void* _ptr, size_t _size)
 	{
 		void* ptr = ::realloc(_ptr, _size);
 		BX_CHECK(NULL != ptr, "Out of memory!");
@@ -52,21 +94,15 @@ namespace bgfx
 		return ptr;
 	}
 
-	void freeStub(void* _ptr)
+	static void freeStub(void* _ptr)
 	{
 		//	BX_TRACE("free %p", _ptr);
 		::free(_ptr);
 	}
 
-	void cacheStub(uint64_t /*_id*/, bool /*_store*/, void* /*_data*/, uint32_t& _length)
-	{
-		_length = 0;
-	}
-
-	FatalFn g_fatal = fatalStub;
+	CallbackI* g_callback = &s_callbackStub;
 	ReallocFn g_realloc = reallocStub;
 	FreeFn g_free = freeStub;
-	CacheFn g_cache = cacheStub;
 
 	static BX_THREAD uint32_t s_threadIndex = 0;
 	static Context s_ctx;
@@ -82,7 +118,7 @@ namespace bgfx
 
 		temp[sizeof(temp)-1] = '\0';
 
-		g_fatal(_code, temp);
+		g_callback->fatal(_code, temp);
 	}
 
 	inline void vec4MulMtx(float* __restrict _result, const float* __restrict _vec, const float* __restrict _mat)
@@ -150,7 +186,7 @@ namespace bgfx
 			uint32_t dstPitch = _width*bpp/8;
 			if (_yflip)
 			{
-				uint8_t* data = (uint8_t*)_src + dstPitch*_height - _srcPitch;
+				uint8_t* data = (uint8_t*)_src + _srcPitch*_height - _srcPitch;
 				for (uint32_t yy = 0; yy < _height; ++yy)
 				{
 					fwrite(data, dstPitch, 1, file);
@@ -520,11 +556,11 @@ namespace bgfx
 #endif // BGFX_CONFIG_RENDERER_
 	}
 
-	void init(FatalFn _fatal, ReallocFn _realloc, FreeFn _free, CacheFn _cache)
+	void init(CallbackI* _callback, ReallocFn _realloc, FreeFn _free)
 	{
-		if (NULL != _fatal)
+		if (NULL != _callback)
 		{
-			g_fatal = _fatal;
+			g_callback = _callback;
 		}
 
 		if (NULL != _realloc
@@ -534,11 +570,6 @@ namespace bgfx
 			g_free = _free;
 		}
 
-		if (NULL != _cache)
-		{
-			g_cache = _cache;
-		}
-
 		s_threadIndex = BGFX_MAIN_THREAD_MAGIC;
 
 		// On NaCl renderer is on the main thread.
@@ -551,10 +582,9 @@ namespace bgfx
 		s_ctx.shutdown();
 
 		s_threadIndex = 0;
-		g_fatal = fatalStub;
+		g_callback = &s_callbackStub;
 		g_realloc = reallocStub;
 		g_free = freeStub;
-		g_cache = cacheStub;
 	}
 
 	void reset(uint32_t _width, uint32_t _height, uint32_t _flags)
diff --git a/src/bgfx_p.h b/src/bgfx_p.h
index f45ba42b..ef20a2d4 100644
--- a/src/bgfx_p.h
+++ b/src/bgfx_p.h
@@ -177,10 +177,9 @@ namespace bgfx
 	};
 
 	extern const uint32_t g_uniformTypeSize[UniformType::Count];
-	extern FatalFn g_fatal;
+	extern CallbackI* g_callback;
 	extern ReallocFn g_realloc;
 	extern FreeFn g_free;
-	extern CacheFn g_cache;
 
 	void fatal(Fatal::Enum _code, const char* _format, ...);
 	void release(const Memory* _mem);
diff --git a/src/dds.cpp b/src/dds.cpp
index 316c621b..bf5fe395 100644
--- a/src/dds.cpp
+++ b/src/dds.cpp
@@ -437,7 +437,7 @@ bool parseDds(Dds& _dds, const Memory* _mem)
 			break;
 
 		case D3DFMT_A16B16G16R16F:
-			type = TextureFormat::ABGR16;
+			type = TextureFormat::RGBA16;
 			blockSize = 8;
 			bpp = 64;
 			break;
@@ -448,13 +448,13 @@ bool parseDds(Dds& _dds, const Memory* _mem)
 		switch (pixelFlags)
 		{
 		case DDPF_RGB:
-			type = TextureFormat::XRGB8;
+			type = TextureFormat::BGRX8;
 			blockSize = 3;
 			bpp = 24;
 			break;
 
 		case DDPF_RGB|DDPF_ALPHAPIXELS:
-			type = TextureFormat::ARGB8;
+			type = TextureFormat::BGRA8;
 			blockSize = 4;
 			bpp = 32;
 			break;
diff --git a/src/glimports.h b/src/glimports.h
index 35e1ba44..fa9956d2 100644
--- a/src/glimports.h
+++ b/src/glimports.h
@@ -18,6 +18,7 @@ GL_IMPORT(false, PFNGLTEXIMAGE2DPROC,                     glTexImage2D);
 GL_IMPORT(false, PFNGLTEXSUBIMAGE2DPROC,                  glTexSubImage2D);
 GL_IMPORT(false, PFNGLPIXELSTOREI,                        glPixelStorei);
 GL_IMPORT(false, PFNGLTEXPARAMETERIPROC,                  glTexParameteri);
+GL_IMPORT(false, PFNGLTEXPARAMETERIVPROC,                 glTexParameteriv);
 GL_IMPORT(false, PFNGLTEXPARAMETERFPROC,                  glTexParameterf);
 GL_IMPORT(false, PFNGLBINDTEXTUREPROC,                    glBindTexture);
 GL_IMPORT(false, PFNGLGENTEXTURESPROC,                    glGenTextures);
diff --git a/src/renderer_d3d11.cpp b/src/renderer_d3d11.cpp
index f8a38f23..6d6b54bd 100644
--- a/src/renderer_d3d11.cpp
+++ b/src/renderer_d3d11.cpp
@@ -405,6 +405,8 @@ namespace bgfx
 			DX_RELEASE(m_backBufferColor, 0);
 
 //			invalidateCache();
+
+			capturePreReset();
 		}
 
 		void postReset()
@@ -436,6 +438,8 @@ namespace bgfx
 
 			m_currentColor = m_backBufferColor;
 			m_currentDepthStencil = m_backBufferDepthStencil;
+
+			capturePostReset();
 		}
 
 		void flip()
@@ -761,6 +765,18 @@ namespace bgfx
 			commitTextureStage();
 		}
 
+		void capturePreReset()
+		{
+		}
+
+		void capturePostReset()
+		{
+		}
+
+		void capture()
+		{
+		}
+
 		void saveScreenShot(Memory* _mem)
 		{
 			ID3D11Texture2D* backBuffer;
@@ -801,7 +817,14 @@ namespace bgfx
 
 				D3D11_MAPPED_SUBRESOURCE mapped;
 				DX_CHECK(m_deviceCtx->Map(texture, 0, D3D11_MAP_READ, 0, &mapped) );
-				saveTga( (const char*)_mem->data, backBufferDesc.Width, backBufferDesc.Height, mapped.RowPitch, mapped.pData);
+				g_callback->screenShot( (const char*)_mem->data
+					, backBufferDesc.Width
+					, backBufferDesc.Height
+					, mapped.RowPitch
+					, mapped.pData
+					, backBufferDesc.Height*mapped.RowPitch
+					, false
+					);
 				m_deviceCtx->Unmap(texture, 0);
 
 				DX_RELEASE(texture, 0);
@@ -1354,7 +1377,7 @@ namespace bgfx
 			D3D11_SUBRESOURCE_DATA* srd = (D3D11_SUBRESOURCE_DATA*)alloca(numSrd*sizeof(D3D11_SUBRESOURCE_DATA) );
 
 			uint32_t kk = 0;
-			bool convert = TextureFormat::XRGB8 == dds.m_type;
+			bool convert = false;
 
 			m_numMips = dds.m_numMips;
 
@@ -1362,6 +1385,7 @@ namespace bgfx
 			||  TextureFormat::Unknown < dds.m_type)
 			{
 				uint32_t bpp = s_textureFormat[dds.m_type].m_bpp;
+				convert = TextureFormat::BGRX8 == dds.m_type;
 
 				for (uint8_t side = 0, numSides = dds.m_cubeMap ? 6 : 1; side < numSides; ++side)
 				{
@@ -1380,7 +1404,7 @@ namespace bgfx
 						{
 							if (convert)
 							{
-								uint8_t* temp = (uint8_t*)g_realloc(NULL, mip.m_width*bpp*mip.m_height/8);
+								uint8_t* temp = (uint8_t*)g_realloc(NULL, mip.m_width*mip.m_height*bpp/8);
 								mip.decode(temp);
 
 								srd[kk].pSysMem = temp;
diff --git a/src/renderer_d3d9.cpp b/src/renderer_d3d9.cpp
index 0182e570..25fe1eff 100644
--- a/src/renderer_d3d9.cpp
+++ b/src/renderer_d3d9.cpp
@@ -656,6 +656,8 @@ namespace bgfx
 			DX_RELEASE(m_backBufferColor, 0);
 			DX_RELEASE(m_backBufferDepthStencil, 0);
 
+			capturePreReset();
+
 			for (uint32_t ii = 0; ii < countof(m_indexBuffers); ++ii)
 			{
 				m_indexBuffers[ii].preReset();
@@ -677,6 +679,8 @@ namespace bgfx
 			DX_CHECK(m_device->GetBackBuffer(0, 0, D3DBACKBUFFER_TYPE_MONO, &m_backBufferColor) );
 			DX_CHECK(m_device->GetDepthStencilSurface(&m_backBufferDepthStencil) );
 
+			capturePostReset();
+
 			for (uint32_t ii = 0; ii < countof(m_indexBuffers); ++ii)
 			{
 				m_indexBuffers[ii].postReset();
@@ -693,6 +697,89 @@ namespace bgfx
 			}
 		}
 
+		void capturePreReset()
+		{
+			if (NULL != m_captureSurface)
+			{
+				g_callback->captureEnd();
+			}
+			DX_RELEASE(m_captureSurface, 1);
+			DX_RELEASE(m_captureTexture, 0);
+			DX_RELEASE(m_captureResolve, 0);
+		}
+
+		void capturePostReset()
+		{
+			if (m_flags&BGFX_RESET_CAPTURE)
+			{
+				uint32_t width = m_params.BackBufferWidth;
+				uint32_t height = m_params.BackBufferHeight;
+				D3DFORMAT fmt = m_params.BackBufferFormat;
+
+				DX_CHECK(m_device->CreateTexture(width
+					, height
+					, 1
+					, 0
+					, fmt
+					, D3DPOOL_SYSTEMMEM
+					, &m_captureTexture
+					, NULL
+					) );
+
+				DX_CHECK(m_captureTexture->GetSurfaceLevel(0
+					, &m_captureSurface
+					) );
+
+				if (m_params.MultiSampleType != D3DMULTISAMPLE_NONE)
+				{
+					DX_CHECK(m_device->CreateRenderTarget(width
+						, height
+						, fmt
+						, D3DMULTISAMPLE_NONE
+						, 0
+						, false
+						, &m_captureResolve
+						, NULL
+						) );
+				}
+
+				g_callback->captureBegin(width, height, width*4, TextureFormat::BGRA8, false);
+			}
+		}
+
+		void capture()
+		{
+			if (NULL != m_captureSurface)
+			{
+				IDirect3DSurface9* resolve = m_backBufferColor;
+
+				if (NULL != m_captureResolve)
+				{
+					resolve = m_captureResolve;
+					DX_CHECK(m_device->StretchRect(m_backBufferColor
+						, 0
+						, m_captureResolve
+						, NULL
+						, D3DTEXF_NONE
+						) );
+				}
+
+				HRESULT hr = m_device->GetRenderTargetData(resolve, m_captureSurface);
+				if (SUCCEEDED(hr) )
+				{
+					D3DLOCKED_RECT rect;
+					DX_CHECK(m_captureSurface->LockRect(&rect
+						, NULL
+						, D3DLOCK_NO_DIRTY_UPDATE|D3DLOCK_NOSYSLOCK|D3DLOCK_READONLY
+						) );
+
+					g_callback->captureFrame(rect.pBits, m_params.BackBufferHeight*rect.Pitch);
+
+					DX_CHECK(m_captureSurface->UnlockRect() );
+				}
+			}
+		}
+
 		void saveScreenShot(Memory* _mem)
 		{
 #if BX_PLATFORM_WINDOWS
@@ -727,7 +814,15 @@ namespace bgfx
 			ClientToScreen(g_bgfxHwnd, &point);
 			uint8_t* data = (uint8_t*)rect.pBits;
 			uint32_t bytesPerPixel = rect.Pitch/dm.Width;
-			saveTga( (const char*)_mem->data, m_params.BackBufferWidth, m_params.BackBufferHeight, rect.Pitch, &data[point.y*rect.Pitch+point.x*bytesPerPixel]);
+
+			g_callback->screenShot( (const char*)_mem->data
+				, m_params.BackBufferWidth
+				, m_params.BackBufferHeight
+				, rect.Pitch
+				, &data[point.y*rect.Pitch+point.x*bytesPerPixel]
+				, m_params.BackBufferHeight*rect.Pitch
+				, false
+				);
 
 			DX_CHECK(surface->UnlockRect() );
 			DX_RELEASE(surface, 0);
@@ -752,6 +847,11 @@ namespace bgfx
 
 		IDirect3DSurface9* m_backBufferColor;
 		IDirect3DSurface9* m_backBufferDepthStencil;
+
+		IDirect3DTexture9* m_captureTexture;
+		IDirect3DSurface9* m_captureSurface;
+		IDirect3DSurface9* m_captureResolve;
+
 		IDirect3DVertexDeclaration9* m_instanceDataDecls[BGFX_CONFIG_MAX_INSTANCE_DATA_COUNT];
 
 		HMODULE m_d3d9dll;
@@ -2490,6 +2590,10 @@ namespace bgfx
 		int64_t now = bx::getHPCounter();
 		elapsed += now;
 
+		int64_t captureElapsed = -bx::getHPCounter();
+		s_renderCtx.capture();
+		captureElapsed += bx::getHPCounter();
+
 		static int64_t last = now;
 		int64_t frameTime = now - last;
 		last = now;
@@ -2513,7 +2617,6 @@ namespace bgfx
 
 				double freq = double(bx::getHPFrequency() );
 				double toMs = 1000.0/freq;
-				double elapsedCpuMs = double(elapsed)*toMs;
 
 				tvm.clear();
 				uint16_t pos = 10;
@@ -2524,6 +2627,8 @@ namespace bgfx
 					, double(max)*toMs
 					, freq/frameTime
 					);
+
+				double elapsedCpuMs = double(elapsed)*toMs;
 				tvm.printf(10, pos++, 0x8e, " Draw calls: %4d / CPU %3.4f [ms]"
 					, m_render->m_num
 					, elapsedCpuMs
@@ -2533,6 +2638,9 @@ namespace bgfx
 					, statsNumInstances
 					, statsNumPrimsSubmitted
 					);
+
+				double captureMs = double(captureElapsed)*toMs;
+				tvm.printf(10, pos++, 0x8e, "    Capture: %3.4f [ms]", captureMs);
 				tvm.printf(10, pos++, 0x8e, "    Indices: %7d", statsNumIndices);
 				tvm.printf(10, pos++, 0x8e, "   DVB size: %7d", m_render->m_vboffset);
 				tvm.printf(10, pos++, 0x8e, "   DIB size: %7d", m_render->m_iboffset);
diff --git a/src/renderer_gl.cpp b/src/renderer_gl.cpp
index 87a169f7..4f71f8bb 100644
--- a/src/renderer_gl.cpp
+++ b/src/renderer_gl.cpp
@@ -26,6 +26,97 @@
 
 namespace bgfx
 {
+	struct Extension
+	{
+		enum Enum
+		{
+			EXT_texture_filter_anisotropic,
+			EXT_texture_format_BGRA8888,
+			EXT_texture_compression_s3tc,
+			EXT_texture_compression_dxt1,
+			CHROMIUM_texture_compression_dxt3,
+			CHROMIUM_texture_compression_dxt5,
+			ARB_texture_float,
+			OES_texture_float,
+			OES_texture_float_linear,
+			OES_texture_half_float,
+			OES_texture_half_float_linear,
+			EXT_texture_type_2_10_10_10_REV,
+			EXT_texture_sRGB,
+			ARB_texture_swizzle,
+			EXT_texture_swizzle,
+			OES_standard_derivatives,
+			ARB_get_program_binary,
+			OES_get_program_binary,
+			EXT_framebuffer_blit,
+			ARB_timer_query,
+			EXT_timer_query,
+			ARB_framebuffer_sRGB,
+			EXT_framebuffer_sRGB,
+			ARB_multisample,
+			CHROMIUM_framebuffer_multisample,
+			ANGLE_translated_shader_source,
+			ARB_instanced_arrays,
+			ANGLE_instanced_arrays,
+			ARB_half_float_vertex,
+			OES_vertex_half_float,
+			ARB_vertex_type_2_10_10_10_rev,
+			OES_vertex_type_10_10_10_2,
+			EXT_occlusion_query_boolean,
+			ARB_vertex_array_object,
+			ATI_meminfo,
+			NVX_gpu_memory_info,
+
+			Count
+		};
+
+		const char* m_name;
+		bool m_supported;
+		bool m_initialize;
+	};
+
+	static Extension s_extension[Extension::Count] =
+	{
+		{ "GL_EXT_texture_filter_anisotropic",    false, true },
+		// Nvidia BGRA on Linux bug:
+		// https://groups.google.com/a/chromium.org/forum/?fromgroups#!topic/chromium-reviews/yFfbUdyeUCQ
+		{ "GL_EXT_texture_format_BGRA8888",       false, !BX_PLATFORM_LINUX },
+		{ "GL_EXT_texture_compression_s3tc",      false, true },
+		{ "GL_EXT_texture_compression_dxt1",      false, true },
+		{ "GL_CHROMIUM_texture_compression_dxt3", false, true },
+		{ "GL_CHROMIUM_texture_compression_dxt5", false, true },
+		{ "GL_ARB_texture_float",                 false, true },
+		{ "GL_OES_texture_float",                 false, true },
+		{ "GL_OES_texture_float_linear",          false, true },
+		{ "GL_OES_texture_half_float",            false, true },
+		{ "GL_OES_texture_half_float_linear",     false, true },
+		{ "GL_EXT_texture_type_2_10_10_10_REV",   false, true },
+		{ "GL_EXT_texture_sRGB",                  false, true },
+		{ "GL_ARB_texture_swizzle",               false, true },
+		{ "GL_EXT_texture_swizzle",               false, true },
+		{ "GL_OES_standard_derivatives",          false, true },
+		{ "GL_ARB_get_program_binary",            false, true },
+		{ "GL_OES_get_program_binary",            false, false },
+		{ "GL_EXT_framebuffer_blit",              false, true },
+		{ "GL_ARB_timer_query",                   false, true },
+		{ "GL_EXT_timer_query",                   false, true },
+		{ "GL_ARB_framebuffer_sRGB",              false, true },
+		{ "GL_EXT_framebuffer_sRGB",              false, true },
+		{ "GL_ARB_multisample",                   false, true },
+		{ "GL_CHROMIUM_framebuffer_multisample",  false, true },
+		{ "GL_ANGLE_translated_shader_source",    false, true },
+		{ "GL_ARB_instanced_arrays",              false, true },
+		{ "GL_ANGLE_instanced_arrays",            false, true },
+		{ "GL_ARB_half_float_vertex",             false, true },
+		{ "GL_OES_vertex_half_float",             false, true },
+		{ "GL_ARB_vertex_type_2_10_10_10_rev",    false, true },
+		{ "GL_OES_vertex_type_10_10_10_2",        false, true },
+		{ "GL_EXT_occlusion_query_boolean",       false, true },
+		{ "GL_ARB_vertex_array_object",           false, true },
+		{ "GL_ATI_meminfo",                       false, true },
+		{ "GL_NVX_gpu_memory_info",               false, true },
+	};
+
 #if BGFX_USE_WGL
 	PFNWGLGETPROCADDRESSPROC wglGetProcAddress;
 	PFNWGLMAKECURRENTPROC wglMakeCurrent;
@@ -74,10 +165,29 @@ namespace bgfx
 	};
 #endif // BX_PLATFORM_NACL
 
+	static void rgbaToBgra(uint8_t* _data, uint32_t _width, uint32_t _height) 
+	{
+		uint32_t dstpitch = _width*4;
+		for (uint32_t yy = 0; yy < _height; ++yy)
+		{
+			uint8_t* dst = &_data[yy*dstpitch];
+
+			for (uint32_t xx = 0; xx < _width; ++xx)
+			{
+				uint8_t tmp = dst[0];
+				dst[0] = dst[2];
+				dst[2] = tmp;
+				dst += 4;
+			}
+		}
+	}
+
 	struct RendererContext
 	{
 		RendererContext()
-			: m_maxAnisotropy(0.0f)
+			: m_capture(NULL)
+			, m_captureSize(0)
+			, m_maxAnisotropy(0.0f)
 			, m_dxtSupport(false)
 			, m_programBinarySupport(false)
 			, m_flip(false)
@@ -116,6 +226,7 @@ namespace bgfx
 
 				m_resolution = _resolution;
 				setRenderContextSize(_resolution.m_width, _resolution.m_height);
+				updateCapture();
 			}
 		}
 
@@ -198,6 +309,19 @@ namespace bgfx
 
 					DescribePixelFormat(m_hdc, pixelFormat, sizeof(PIXELFORMATDESCRIPTOR), &pfd);
 
+					BX_TRACE("Pixel format:\n"
+						"\tiPixelType %d\n"
+						"\tcColorBits %d\n"
+						"\tcAlphaBits %d\n"
+						"\tcDepthBits %d\n"
+						"\tcStencilBits %d\n"
+						, pfd.iPixelType
+						, pfd.cColorBits
+						, pfd.cAlphaBits
+						, pfd.cDepthBits
+						, pfd.cStencilBits
+						);
+
 					int result;
 					result = SetPixelFormat(m_hdc, pixelFormat, &pfd);
 					BGFX_FATAL(0 != result, Fatal::UnableToInitialize, "SetPixelFormat failed!");
@@ -464,21 +588,75 @@ namespace bgfx
 			}
 		}
 
+		void updateCapture()
+		{
+			if (m_resolution.m_flags&BGFX_RESET_CAPTURE)
+			{
+				m_captureSize = m_resolution.m_width*m_resolution.m_height*4;
+				m_capture = g_realloc(m_capture, m_captureSize);
+				g_callback->captureBegin(m_resolution.m_width, m_resolution.m_height, m_resolution.m_width*4, TextureFormat::BGRA8, true);
+			}
+			else
+			{
+				if (NULL != m_capture)
+				{
+					g_callback->captureEnd();
+					g_free(m_capture);
+					m_capture = NULL;
+					m_captureSize = 0;
+				}
+			}
+		}
+
+		void capture()
+		{
+			if (NULL != m_capture)
+			{
+				GLint fmt = s_extension[Extension::EXT_texture_format_BGRA8888].m_supported ? GL_BGRA_EXT : GL_RGBA;
+				GL_CHECK(glReadPixels(0
+					, 0
+					, m_resolution.m_width
+					, m_resolution.m_height
+					, fmt
+					, GL_UNSIGNED_BYTE
+					, m_capture
+					) );
+
+				g_callback->captureFrame(m_capture, m_captureSize);
+			}
+		}
+
 		void saveScreenShot(Memory* _mem)
 		{
-			void* data = g_realloc(NULL, m_resolution.m_width*m_resolution.m_height*4);
-			glReadPixels(0, 0, m_resolution.m_width, m_resolution.m_height, GL_RGBA, GL_UNSIGNED_BYTE, data);
+			uint32_t length = m_resolution.m_width*m_resolution.m_height*4;
+			uint8_t* data = (uint8_t*)g_realloc(NULL, length);
+			GLint fmt = s_extension[Extension::EXT_texture_format_BGRA8888].m_supported ? GL_BGRA_EXT : GL_RGBA;
 
-			uint8_t* rgba = (uint8_t*)data;
-			for (uint32_t ii = 0, num = m_resolution.m_width*m_resolution.m_height; ii < num; ++ii)
+			uint32_t width = m_resolution.m_width;
+			uint32_t height = m_resolution.m_height;
+
+			GL_CHECK(glReadPixels(0
+				, 0
+				, width
+				, height
+				, fmt
+				, GL_UNSIGNED_BYTE
+				, data
+				) );
+
+			if (GL_RGBA == fmt)
 			{
-				uint8_t temp = rgba[0];
-				rgba[0] = rgba[2];
-				rgba[2] = temp;
-				rgba += 4;
+				rgbaToBgra(data, width, height);
 			}
 
-			saveTga( (const char*)_mem->data, m_resolution.m_width, m_resolution.m_height, m_resolution.m_width*4, data, false, true);
+			g_callback->screenShot( (const char*)_mem->data
+				, width
+				, height
+				, width*4
+				, data
+				, length
+				, true
+				);
 			g_free(data);
 		}
 
@@ -531,6 +709,8 @@ namespace bgfx
 		TextVideoMem m_textVideoMem;
 
 		Resolution m_resolution;
+		void* m_capture;
+		uint32_t m_captureSize;
 		float m_maxAnisotropy;
 		bool m_dxtSupport;
 		bool m_programBinarySupport;
@@ -613,93 +793,6 @@ namespace bgfx
 	}
 #endif // BX_PLATFORM_
 
-	struct Extension
-	{
-		enum Enum
-		{
-			EXT_texture_filter_anisotropic,
-			EXT_texture_format_BGRA8888,
-			EXT_texture_compression_s3tc,
-			EXT_texture_compression_dxt1,
-			CHROMIUM_texture_compression_dxt3,
-			CHROMIUM_texture_compression_dxt5,
-			ARB_texture_float,
-			OES_texture_float,
-			OES_texture_float_linear,
-			OES_texture_half_float,
-			OES_texture_half_float_linear,
-			EXT_texture_type_2_10_10_10_REV,
-			EXT_texture_sRGB,
-			OES_standard_derivatives,
-			ARB_get_program_binary,
-			OES_get_program_binary,
-			EXT_framebuffer_blit,
-			ARB_timer_query,
-			EXT_timer_query,
-			ARB_framebuffer_sRGB,
-			EXT_framebuffer_sRGB,
-			ARB_multisample,
-			CHROMIUM_framebuffer_multisample,
-			ANGLE_translated_shader_source,
-			ARB_instanced_arrays,
-			ANGLE_instanced_arrays,
-			ARB_half_float_vertex,
-			OES_vertex_half_float,
-			ARB_vertex_type_2_10_10_10_rev,
-			OES_vertex_type_10_10_10_2,
-			EXT_occlusion_query_boolean,
-			ARB_vertex_array_object,
-			ATI_meminfo,
-			NVX_gpu_memory_info,
-
-			Count
-		};
-
-		const char* m_name;
-		bool m_supported;
-		bool m_initialize;
-	};
-
-	static Extension s_extension[Extension::Count] =
-	{
-		{ "GL_EXT_texture_filter_anisotropic",    false, true },
-		// Nvidia BGRA on Linux bug:
-		// https://groups.google.com/a/chromium.org/forum/?fromgroups#!topic/chromium-reviews/yFfbUdyeUCQ
-		{ "GL_EXT_texture_format_BGRA8888",       false, !BX_PLATFORM_LINUX },
-		{ "GL_EXT_texture_compression_s3tc",      false, true },
-		{ "GL_EXT_texture_compression_dxt1",      false, true },
-		{ "GL_CHROMIUM_texture_compression_dxt3", false, true },
-		{ "GL_CHROMIUM_texture_compression_dxt5", false, true },
-		{ "GL_ARB_texture_float",                 false, true },
-		{ "GL_OES_texture_float",                 false, true },
-		{ "GL_OES_texture_float_linear",          false, true },
-		{ "GL_OES_texture_half_float",            false, true },
-		{ "GL_OES_texture_half_float_linear",     false, true },
-		{ "GL_EXT_texture_type_2_10_10_10_REV",   false, true },
-		{ "GL_EXT_texture_sRGB",                  false, true },
-		{ "GL_OES_standard_derivatives",          false, true },
-		{ "GL_ARB_get_program_binary",            false, true },
-		{ "GL_OES_get_program_binary",            false, false },
-		{ "GL_EXT_framebuffer_blit",              false, true },
-		{ "GL_ARB_timer_query",                   false, true },
-		{ "GL_EXT_timer_query",                   false, true },
-		{ "GL_ARB_framebuffer_sRGB",              false, true },
-		{ "GL_EXT_framebuffer_sRGB",              false, true },
-		{ "GL_ARB_multisample",                   false, true },
-		{ "GL_CHROMIUM_framebuffer_multisample",  false, true },
-		{ "GL_ANGLE_translated_shader_source",    false, true },
-		{ "GL_ARB_instanced_arrays",              false, true },
-		{ "GL_ANGLE_instanced_arrays",            false, true },
-		{ "GL_ARB_half_float_vertex",             false, true },
-		{ "GL_OES_vertex_half_float",             false, true },
-		{ "GL_ARB_vertex_type_2_10_10_10_rev",    false, true },
-		{ "GL_OES_vertex_type_10_10_10_2",        false, true },
-		{ "GL_EXT_occlusion_query_boolean",       false, true },
-		{ "GL_ARB_vertex_array_object",           false, true },
-		{ "GL_ATI_meminfo",                       false, true },
-		{ "GL_NVX_gpu_memory_info",               false, true },
-	};
-
 	static const GLenum s_primType[] =
 	{
 		GL_TRIANGLES,
@@ -966,21 +1059,21 @@ namespace bgfx
 
 		if (s_renderCtx.m_programBinarySupport)
 		{
-			uint32_t length;
-			g_cache(id, false, NULL, length);
+			uint32_t length = g_callback->cacheReadSize(id);
 			cached = length > 0;
 
 			if (cached)
 			{
 				void* data = g_realloc(NULL, length);
-				g_cache(id, false, data, length);
+				if (g_callback->cacheRead(id, data, length) )
+				{
+					bx::MemoryReader reader(data, length);
 
-				bx::MemoryReader reader(data, length);
+					GLenum format;
+					bx::read(&reader, format);
 
-				GLenum format;
-				bx::read(&reader, format);
-
-				GL_CHECK(glProgramBinary(m_id, format, reader.getDataPtr(), (GLsizei)reader.remaining() ) );
+					GL_CHECK(glProgramBinary(m_id, format, reader.getDataPtr(), (GLsizei)reader.remaining() ) );
+				}
 
 				g_free(data);
 			}
@@ -1022,7 +1115,7 @@ namespace bgfx
 					GL_CHECK(glGetProgramBinary(m_id, programLength, NULL, &format, &data[4]) );
 					*(uint32_t*)data = format;
 
-					g_cache(id, true, data, length);
+					g_callback->cacheWrite(id, data, length);
 
 					g_free(data);
 				}
@@ -1344,16 +1437,30 @@ namespace bgfx
 				if (GL_RGBA == internalFmt
 				||  decompress)
 				{
+					internalFmt = GL_RGBA;
 					m_fmt = s_extension[Extension::EXT_texture_format_BGRA8888].m_supported ? GL_BGRA_EXT : GL_RGBA;
+
 				}
 
+				bool swizzle = GL_RGBA == m_fmt;
+
+#if BGFX_CONFIG_RENDERER_OPENGL
+				if (swizzle
+				&& (s_extension[Extension::ARB_texture_swizzle].m_supported || s_extension[Extension::EXT_texture_swizzle].m_supported) )
+				{
+					swizzle = false;
+					GLint swizzleMask[] = { GL_BLUE, GL_GREEN, GL_RED, GL_ALPHA };
+					GL_CHECK(glTexParameteriv(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_RGBA, swizzleMask) );
+				}
+#endif // BGFX_CONFIG_RENDERER_OPENGL
+
 				m_type = tfi.m_type;
 				if (decompress)
 				{
 					m_type = GL_UNSIGNED_BYTE;
 				}
 
-				uint8_t* bits = (uint8_t*)g_realloc(NULL, dds.m_width*dds.m_height*tfi.m_bpp/8);
+				uint8_t* bits = (uint8_t*)g_realloc(NULL, dds.m_width*dds.m_height*4);
 
 				for (uint8_t side = 0, numSides = dds.m_cubeMap ? 6 : 1; side < numSides; ++side)
 				{
@@ -1372,21 +1479,9 @@ namespace bgfx
 						{
 							mip.decode(bits);
 
-							if (GL_RGBA == internalFmt)
+							if (swizzle)
 							{
-								uint32_t dstpitch = width*4;
-								for (uint32_t yy = 0; yy < height; ++yy)
-								{
-									uint8_t* dst = &bits[yy*dstpitch];
-
-									for (uint32_t xx = 0; xx < width; ++xx)
-									{
-										uint8_t tmp = dst[0];
-										dst[0] = dst[2];
-										dst[2] = tmp;
-										dst += 4;
-									}
-								}
+								rgbaToBgra(bits, width, height);
 							}
 
 							texImage(target+side
@@ -2881,6 +2976,10 @@ namespace bgfx
 		int64_t now = bx::getHPCounter();
 		elapsed += now;
 
+		int64_t captureElapsed = -bx::getHPCounter();
+		s_renderCtx.capture();
+		captureElapsed += bx::getHPCounter();
+
 		static int64_t last = now;
 		int64_t frameTime = now - last;
 		last = now;
@@ -2908,7 +3007,6 @@ namespace bgfx
 				next = now + bx::getHPFrequency();
 				double freq = double(bx::getHPFrequency() );
 				double toMs = 1000.0/freq;
-				double elapsedCpuMs = double(elapsed)*toMs;
 
 				tvm.clear();
 				uint16_t pos = 10;
@@ -2919,6 +3017,8 @@ namespace bgfx
 					, double(max)*toMs
 					, freq/frameTime
 					);
+
+				double elapsedCpuMs = double(elapsed)*toMs;
 				tvm.printf(10, pos++, 0x8e, " Draw calls: %4d / CPU %3.4f [ms] %c GPU %3.4f [ms]"
 					, m_render->m_num
 					, elapsedCpuMs
@@ -2930,6 +3030,10 @@ namespace bgfx
 					, statsNumInstances
 					, statsNumPrimsSubmitted
 					);
+
+				double captureMs = double(captureElapsed)*toMs;
+				tvm.printf(10, pos++, 0x8e, "    Capture: %3.4f [ms]", captureMs);
+
 				tvm.printf(10, pos++, 0x8e, "    Indices: %7d", statsNumIndices);
 				tvm.printf(10, pos++, 0x8e, "   DVB size: %7d", m_render->m_vboffset);
 				tvm.printf(10, pos++, 0x8e, "   DIB size: %7d", m_render->m_iboffset);
diff --git a/src/renderer_gl.h b/src/renderer_gl.h
index fca4fcf9..c12661f1 100644
--- a/src/renderer_gl.h
+++ b/src/renderer_gl.h
@@ -158,6 +158,7 @@ typedef void (APIENTRYP PFNGLREADPIXELSPROC) (GLint x, GLint y, GLsizei width, G
 typedef void (APIENTRYP PFNGLTEXIMAGE2DPROC) (GLenum target, GLint level, GLint internalformat, GLsizei width, GLsizei height, GLint border, GLenum format, GLenum type, const GLvoid *pixels);
 typedef void (APIENTRYP PFNGLTEXSUBIMAGE2DPROC) (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLenum type, const GLvoid *pixels);
 typedef void (APIENTRYP PFNGLTEXPARAMETERIPROC) (GLenum target, GLenum pname, GLint param);
+typedef void (APIENTRYP PFNGLTEXPARAMETERIVPROC) (GLenum target, GLenum pname, const GLint *params);
 typedef void (APIENTRYP PFNGLTEXPARAMETERFPROC) (GLenum target, GLenum pname, GLfloat param);
 typedef void (APIENTRYP PFNGLPIXELSTOREI) (GLenum pname, GLint param);
 typedef void (APIENTRYP PFNGLBINDTEXTUREPROC) (GLenum target, GLuint texture);