diff --git a/examples/01-cubes/cubes.cpp b/examples/01-cubes/cubes.cpp
index 13a13109..1dd90a2b 100644
--- a/examples/01-cubes/cubes.cpp
+++ b/examples/01-cubes/cubes.cpp
@@ -104,8 +104,8 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 	{
 		float view[16];
 		float proj[16];
-		mtxLookAt(view, eye, at);
-		mtxProj(proj, 60.0f, float(width)/float(height), 0.1f, 100.0f);
+		bx::mtxLookAt(view, eye, at);
+		bx::mtxProj(proj, 60.0f, float(width)/float(height), 0.1f, 100.0f);
 
 		// Set view and projection matrix for view 0.
 		bgfx::setViewTransform(0, view, proj);
@@ -138,7 +138,7 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 			for (uint32_t xx = 0; xx < 11; ++xx)
 			{
 				float mtx[16];
-				mtxRotateXY(mtx, time + xx*0.21f, time + yy*0.37f);
+				bx::mtxRotateXY(mtx, time + xx*0.21f, time + yy*0.37f);
 				mtx[12] = -15.0f + float(xx)*3.0f;
 				mtx[13] = -15.0f + float(yy)*3.0f;
 				mtx[14] = 0.0f;
diff --git a/examples/02-metaballs/metaballs.cpp b/examples/02-metaballs/metaballs.cpp
index 6d6ab3be..72dc7598 100644
--- a/examples/02-metaballs/metaballs.cpp
+++ b/examples/02-metaballs/metaballs.cpp
@@ -547,8 +547,8 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 		
 		float view[16];
 		float proj[16];
-		mtxLookAt(view, eye, at);
-		mtxProj(proj, 60.0f, float(width)/float(height), 0.1f, 100.0f);
+		bx::mtxLookAt(view, eye, at);
+		bx::mtxProj(proj, 60.0f, float(width)/float(height), 0.1f, 100.0f);
 
 		// Set view and projection matrix for view 0.
 		bgfx::setViewTransform(0, view, proj);
@@ -629,7 +629,7 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 						grid[xoffset-zpitch].m_val - grid[xoffset+zpitch].m_val,
 					};
 
-					vec3Norm(grid[xoffset].m_normal, normal);
+					bx::vec3Norm(grid[xoffset].m_normal, normal);
 				}
 			}
 		}
@@ -688,7 +688,7 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 		profTriangulate = bx::getHPCounter() - profTriangulate;
 
 		float mtx[16];
-		mtxRotateXY(mtx, time*0.67f, time);
+		bx::mtxRotateXY(mtx, time*0.67f, time);
 
 		// Set model matrix for rendering.
 		bgfx::setTransform(mtx);
diff --git a/examples/03-raymarch/raymarch.cpp b/examples/03-raymarch/raymarch.cpp
index bf50c78e..a60ffb41 100644
--- a/examples/03-raymarch/raymarch.cpp
+++ b/examples/03-raymarch/raymarch.cpp
@@ -174,14 +174,14 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 		
 		float view[16];
 		float proj[16];
-		mtxLookAt(view, eye, at);
-		mtxProj(proj, 60.0f, float(width)/float(height), 0.1f, 100.0f);
+		bx::mtxLookAt(view, eye, at);
+		bx::mtxProj(proj, 60.0f, float(width)/float(height), 0.1f, 100.0f);
 
 		// Set view and projection matrix for view 1.
 		bgfx::setViewTransform(0, view, proj);
 
 		float ortho[16];
-		mtxOrtho(ortho, 0.0f, 1280.0f, 720.0f, 0.0f, 0.0f, 100.0f);
+		bx::mtxOrtho(ortho, 0.0f, 1280.0f, 720.0f, 0.0f, 0.0f, 100.0f);
 
 		// Set view and projection matrix for view 0.
 		bgfx::setViewTransform(1, NULL, ortho);
@@ -189,28 +189,28 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 		float time = (float)( (bx::getHPCounter()-timeOffset)/double(bx::getHPFrequency() ) );
 
 		float vp[16];
-		mtxMul(vp, view, proj);
+		bx::mtxMul(vp, view, proj);
 
 		float mtx[16];
-		mtxRotateXY(mtx
+		bx::mtxRotateXY(mtx
 			, time
 			, time*0.37f
 			); 
 
 		float mtxInv[16];
-		mtxInverse(mtxInv, mtx);
+		bx::mtxInverse(mtxInv, mtx);
 		float lightDirModel[4] = { -0.4f, -0.5f, -1.0f, 0.0f };
 		float lightDirModelN[4] = { 0.0f, 0.0f, 0.0f, 0.0f };
-		vec3Norm(lightDirModelN, lightDirModel);
+		bx::vec3Norm(lightDirModelN, lightDirModel);
 		float lightDir[4];
-		vec4MulMtx(lightDir, lightDirModelN, mtxInv);
+		bx::vec4MulMtx(lightDir, lightDirModelN, mtxInv);
 		bgfx::setUniform(u_lightDir, lightDir);
 
 		float mvp[16];
-		mtxMul(mvp, mtx, vp);
+		bx::mtxMul(mvp, mtx, vp);
 
 		float invMvp[16];
-		mtxInverse(invMvp, mvp);
+		bx::mtxInverse(invMvp, mvp);
 		bgfx::setUniform(u_mtx, invMvp);
 
 		bgfx::setUniform(u_time, &time);
diff --git a/examples/04-mesh/mesh.cpp b/examples/04-mesh/mesh.cpp
index 54d46d6c..ca2bbf0c 100644
--- a/examples/04-mesh/mesh.cpp
+++ b/examples/04-mesh/mesh.cpp
@@ -65,14 +65,14 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 
 		float view[16];
 		float proj[16];
-		mtxLookAt(view, eye, at);
-		mtxProj(proj, 60.0f, float(width)/float(height), 0.1f, 100.0f);
+		bx::mtxLookAt(view, eye, at);
+		bx::mtxProj(proj, 60.0f, float(width)/float(height), 0.1f, 100.0f);
 
 		// Set view and projection matrix for view 0.
 		bgfx::setViewTransform(0, view, proj);
 
 		float mtx[16];
-		mtxRotateXY(mtx
+		bx::mtxRotateXY(mtx
 			, 0.0f
 			, time*0.37f
 			); 
diff --git a/examples/05-instancing/instancing.cpp b/examples/05-instancing/instancing.cpp
index 45672190..71ec7b1e 100644
--- a/examples/05-instancing/instancing.cpp
+++ b/examples/05-instancing/instancing.cpp
@@ -135,8 +135,8 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 			
 			float view[16];
 			float proj[16];
-			mtxLookAt(view, eye, at);
-			mtxProj(proj, 60.0f, float(width)/float(height), 0.1f, 100.0f);
+			bx::mtxLookAt(view, eye, at);
+			bx::mtxProj(proj, 60.0f, float(width)/float(height), 0.1f, 100.0f);
 
 			// Set view and projection matrix for view 0.
 			bgfx::setViewTransform(0, view, proj);
@@ -153,7 +153,7 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 					for (uint32_t xx = 0; xx < 11; ++xx)
 					{
 						float* mtx = (float*)data;
-						mtxRotateXY(mtx, time + xx*0.21f, time + yy*0.37f);
+						bx::mtxRotateXY(mtx, time + xx*0.21f, time + yy*0.37f);
 						mtx[12] = -15.0f + float(xx)*3.0f;
 						mtx[13] = -15.0f + float(yy)*3.0f;
 						mtx[14] = 0.0f;
diff --git a/examples/06-bump/bump.cpp b/examples/06-bump/bump.cpp
index 31306f3a..000af98c 100644
--- a/examples/06-bump/bump.cpp
+++ b/examples/06-bump/bump.cpp
@@ -195,8 +195,8 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 		
 		float view[16];
 		float proj[16];
-		mtxLookAt(view, eye, at);
-		mtxProj(proj, 60.0f, float(width)/float(height), 0.1f, 100.0f);
+		bx::mtxLookAt(view, eye, at);
+		bx::mtxProj(proj, 60.0f, float(width)/float(height), 0.1f, 100.0f);
 
 		float lightPosRadius[4][4];
 		for (uint32_t ii = 0; ii < numLights; ++ii)
@@ -238,7 +238,7 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 					for (uint32_t xx = 0; xx < 3; ++xx)
 					{
 						float* mtx = (float*)data;
-						mtxRotateXY(mtx, time*0.023f + xx*0.21f, time*0.03f + yy*0.37f);
+						bx::mtxRotateXY(mtx, time*0.023f + xx*0.21f, time*0.03f + yy*0.37f);
 						mtx[12] = -3.0f + float(xx)*3.0f;
 						mtx[13] = -3.0f + float(yy)*3.0f;
 						mtx[14] = 0.0f;
@@ -281,7 +281,7 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 				for (uint32_t xx = 0; xx < 3; ++xx)
 				{
 					float mtx[16];
-					mtxRotateXY(mtx, time*0.023f + xx*0.21f, time*0.03f + yy*0.37f);
+					bx::mtxRotateXY(mtx, time*0.023f + xx*0.21f, time*0.03f + yy*0.37f);
 					mtx[12] = -3.0f + float(xx)*3.0f;
 					mtx[13] = -3.0f + float(yy)*3.0f;
 					mtx[14] = 0.0f;
diff --git a/examples/07-callback/callback.cpp b/examples/07-callback/callback.cpp
index 38612a6e..13bd61da 100644
--- a/examples/07-callback/callback.cpp
+++ b/examples/07-callback/callback.cpp
@@ -426,8 +426,8 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 		
 		float view[16];
 		float proj[16];
-		mtxLookAt(view, eye, at);
-		mtxProj(proj, 60.0f, float(width)/float(height), 0.1f, 100.0f);
+		bx::mtxLookAt(view, eye, at);
+		bx::mtxProj(proj, 60.0f, float(width)/float(height), 0.1f, 100.0f);
 
 		// Set view and projection matrix for view 0.
 		bgfx::setViewTransform(0, view, proj);
@@ -440,7 +440,7 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 			for (uint32_t xx = 0; xx < 11-yy; ++xx)
 			{
 				float mtx[16];
-				mtxRotateXY(mtx, time + xx*0.21f, time + yy*0.37f);
+				bx::mtxRotateXY(mtx, time + xx*0.21f, time + yy*0.37f);
 				mtx[12] = -15.0f + float(xx)*3.0f;
 				mtx[13] = -15.0f + float(yy)*3.0f;
 				mtx[14] = 0.0f;
diff --git a/examples/08-update/update.cpp b/examples/08-update/update.cpp
index 9631bd7c..d5b1d38d 100644
--- a/examples/08-update/update.cpp
+++ b/examples/08-update/update.cpp
@@ -287,14 +287,14 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 		
 		float view[16];
 		float proj[16];
-		mtxLookAt(view, eye, at);
-		mtxProj(proj, 60.0f, float(width)/float(height), 0.1f, 100.0f);
+		bx::mtxLookAt(view, eye, at);
+		bx::mtxProj(proj, 60.0f, float(width)/float(height), 0.1f, 100.0f);
 
 		// Set view and projection matrix for view 0.
 		bgfx::setViewTransform(0, view, proj);
 
 		float mtx[16];
-		mtxRotateXY(mtx, time, time*0.37f);
+		bx::mtxRotateXY(mtx, time, time*0.37f);
 
 		// Set model matrix for rendering.
 		bgfx::setTransform(mtx);
@@ -319,11 +319,11 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 		// Set view and projection matrix for view 1.
 		const float aspectRatio = float(height)/float(width);
 		const float size = 10.0f;
-		mtxOrtho(proj, -size, size, size*aspectRatio, -size*aspectRatio, 0.0f, 1000.0f);
+		bx::mtxOrtho(proj, -size, size, size*aspectRatio, -size*aspectRatio, 0.0f, 1000.0f);
 		bgfx::setViewTransform(1, NULL, proj);
 
 
-		mtxTranslate(mtx, -8.0f - BX_COUNTOF(textures)*0.1f*0.5f, 1.9f, 0.0f);
+		bx::mtxTranslate(mtx, -8.0f - BX_COUNTOF(textures)*0.1f*0.5f, 1.9f, 0.0f);
 
 		// Set model matrix for rendering.
 		bgfx::setTransform(mtx);
@@ -347,7 +347,7 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 
 		for (uint32_t ii = 0; ii < BX_COUNTOF(textures); ++ii)
 		{
-			mtxTranslate(mtx, -8.0f - BX_COUNTOF(textures)*0.1f*0.5f + ii*2.1f, 4.0f, 0.0f);
+			bx::mtxTranslate(mtx, -8.0f - BX_COUNTOF(textures)*0.1f*0.5f + ii*2.1f, 4.0f, 0.0f);
 
 			// Set model matrix for rendering.
 			bgfx::setTransform(mtx);
@@ -371,7 +371,7 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 
 		for (uint32_t ii = 0; ii < 3; ++ii)
 		{
-			mtxTranslate(mtx, -8.0f - BX_COUNTOF(textures)*0.1f*0.5f + 8*2.1f, -4.0f + ii*2.1f, 0.0f);
+			bx::mtxTranslate(mtx, -8.0f - BX_COUNTOF(textures)*0.1f*0.5f + 8*2.1f, -4.0f + ii*2.1f, 0.0f);
 
 			// Set model matrix for rendering.
 			bgfx::setTransform(mtx);
diff --git a/examples/09-hdr/hdr.cpp b/examples/09-hdr/hdr.cpp
index 3ab15879..80f0b2c4 100644
--- a/examples/09-hdr/hdr.cpp
+++ b/examples/09-hdr/hdr.cpp
@@ -323,8 +323,8 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 		float view[16];
 		float proj[16];
 
-		mtxIdentity(view);
-		mtxOrtho(proj, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f, 100.0f);
+		bx::mtxIdentity(view);
+		bx::mtxOrtho(proj, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f, 100.0f);
 
 		// Set view and projection matrix for view 0.
 		bgfx::setViewTransformMask(0
@@ -345,16 +345,16 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 		float eye[3] = { 0.0f, 1.0f, -2.5f };
 
 		float mtx[16];
-		mtxRotateXY(mtx
+		bx::mtxRotateXY(mtx
 			, 0.0f
 			, time
 			); 
 
 		float temp[4];
-		vec3MulMtx(temp, eye, mtx);
+		bx::vec3MulMtx(temp, eye, mtx);
 
-		mtxLookAt(view, temp, at);
-		mtxProj(proj, 60.0f, float(width)/float(height), 0.1f, 100.0f);
+		bx::mtxLookAt(view, temp, at);
+		bx::mtxProj(proj, 60.0f, float(width)/float(height), 0.1f, 100.0f);
 
 		// Set view and projection matrix for view 1.
 		bgfx::setViewTransformMask(1<<1, view, proj);
diff --git a/examples/10-font/font.cpp b/examples/10-font/font.cpp
index 95cde030..cdfae7ee 100644
--- a/examples/10-font/font.cpp
+++ b/examples/10-font/font.cpp
@@ -8,7 +8,7 @@
 #include <bgfx.h>
 #include <bx/timer.h>
 #include <bx/string.h>
-#include "fpumath.h"
+#include <bx/fpumath.h>
 
 #include "font/font_manager.h"
 #include "font/text_buffer_manager.h"
@@ -200,11 +200,11 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 
 		float view[16];
 		float proj[16];
-		mtxLookAt(view, eye, at);
+		bx::mtxLookAt(view, eye, at);
 
 		// Setup a top-left ortho matrix for screen space drawing.
 		float centering = 0.5f;
-		mtxOrtho(proj, centering, width + centering, height + centering, centering, -1.0f, 1.0f);
+		bx::mtxOrtho(proj, centering, width + centering, height + centering, centering, -1.0f, 1.0f);
 
 		// Set view and projection matrix for view 0.
 		bgfx::setViewTransform(0, view, proj);
diff --git a/examples/11-fontsdf/fontsdf.cpp b/examples/11-fontsdf/fontsdf.cpp
index 86a320c3..78335fe3 100644
--- a/examples/11-fontsdf/fontsdf.cpp
+++ b/examples/11-fontsdf/fontsdf.cpp
@@ -7,7 +7,7 @@
 
 #include <bgfx.h>
 #include <bx/timer.h>
-#include "fpumath.h"
+#include <bx/fpumath.h>
 
 #include "font/font_manager.h"
 #include "font/text_metrics.h"
@@ -197,11 +197,11 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 
 		float view[16];
 		float proj[16];
-		mtxLookAt(view, eye, at);
+		bx::mtxLookAt(view, eye, at);
 		float centering = 0.5f;
 
 		// Setup a top-left ortho matrix for screen space drawing.
-		mtxOrtho(proj, centering, width + centering, height + centering, centering, -1.0f, 1.0f);
+		bx::mtxOrtho(proj, centering, width + centering, height + centering, centering, -1.0f, 1.0f);
 
 		// Set view and projection matrix for view 0.
 		bgfx::setViewTransform(0, view, proj);
@@ -214,20 +214,20 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 		float textScaleMat[16];
 		float screenCenterMat[16];
 
-		mtxRotateZ(textRotMat, textRotation);
-		mtxTranslate(textCenterMat, -(textAreaWidth * 0.5f), (-visibleLineCount)*metrics.getLineHeight()*0.5f, 0);
-		mtxScale(textScaleMat, textScale, textScale, 1.0f);
-		mtxTranslate(screenCenterMat, ( (width) * 0.5f), ( (height) * 0.5f), 0);
+		bx::mtxRotateZ(textRotMat, textRotation);
+		bx::mtxTranslate(textCenterMat, -(textAreaWidth * 0.5f), (-visibleLineCount)*metrics.getLineHeight()*0.5f, 0);
+		bx::mtxScale(textScaleMat, textScale, textScale, 1.0f);
+		bx::mtxTranslate(screenCenterMat, ( (width) * 0.5f), ( (height) * 0.5f), 0);
 
 		//first translate to text center, then scale, then rotate
 		float tmpMat[16];
-		mtxMul(tmpMat, textCenterMat, textRotMat);
+		bx::mtxMul(tmpMat, textCenterMat, textRotMat);
 
 		float tmpMat2[16];
-		mtxMul(tmpMat2, tmpMat, textScaleMat);
+		bx::mtxMul(tmpMat2, tmpMat, textScaleMat);
 
 		float tmpMat3[16];
-		mtxMul(tmpMat3, tmpMat2, screenCenterMat);
+		bx::mtxMul(tmpMat3, tmpMat2, screenCenterMat);
 
 		// Set model matrix for rendering.
 		bgfx::setTransform(tmpMat3);
diff --git a/examples/12-lod/lod.cpp b/examples/12-lod/lod.cpp
index 085baed4..77ba5d08 100644
--- a/examples/12-lod/lod.cpp
+++ b/examples/12-lod/lod.cpp
@@ -164,14 +164,14 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 				
 		float view[16];
 		float proj[16];
-		mtxLookAt(view, eye, at);
-		mtxProj(proj, 60.0f, float(width)/float(height), 0.1f, 100.0f);
+		bx::mtxLookAt(view, eye, at);
+		bx::mtxProj(proj, 60.0f, float(width)/float(height), 0.1f, 100.0f);
 
 		// Set view and projection matrix for view 0.
 		bgfx::setViewTransform(0, view, proj);
 
 		float mtx[16];
-		mtxIdentity(mtx); 
+		bx::mtxIdentity(mtx); 
 
 		float stipple[3];
 		float stippleInv[3];
diff --git a/examples/13-stencil/stencil.cpp b/examples/13-stencil/stencil.cpp
index 7f948305..74472ecd 100644
--- a/examples/13-stencil/stencil.cpp
+++ b/examples/13-stencil/stencil.cpp
@@ -11,9 +11,9 @@
 #include <bgfx.h>
 #include <bx/timer.h>
 #include <bx/readerwriter.h>
+#include <bx/fpumath.h>
 #include "entry/entry.h"
 #include "camera.h"
-#include "fpumath.h"
 #include "imgui/imgui.h"
 
 #define RENDER_VIEWID_RANGE1_PASS_0   1 
@@ -211,7 +211,7 @@ void mtxReflected(float*__restrict _result
 				  , const float* __restrict _n  /* normal */
 				  )
 {
-	float dot = vec3Dot(_p, _n);
+	float dot = bx::vec3Dot(_p, _n);
 
 	_result[ 0] =  1.0f -  2.0f * _n[0] * _n[0]; //1-2Nx^2
 	_result[ 1] = -2.0f * _n[0] * _n[1];         //-2*Nx*Ny
@@ -977,7 +977,7 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 
 	// Set view and projection matrices.
 	const float aspect = float(viewState.m_width)/float(viewState.m_height);
-	mtxProj(viewState.m_proj, 60.0f, aspect, 0.1f, 100.0f);
+	bx::mtxProj(viewState.m_proj, 60.0f, aspect, 0.1f, 100.0f);
 
 	float initialPos[3] = { 0.0f, 18.0f, -40.0f };
 	cameraCreate();
@@ -1107,7 +1107,7 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 
 		// Floor position.
 		float floorMtx[16];
-		mtxSRT(floorMtx
+		bx::mtxSRT(floorMtx
 			, 20.0f  //scaleX
 			, 20.0f  //scaleY
 			, 20.0f  //scaleZ
@@ -1121,7 +1121,7 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 
 		// Bunny position.
 		float bunnyMtx[16];
-		mtxSRT(bunnyMtx
+		bx::mtxSRT(bunnyMtx
 			, 5.0f
 			, 5.0f
 			, 5.0f
@@ -1146,7 +1146,7 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 		float columnMtx[4][16];
 		for (uint8_t ii = 0; ii < 4; ++ii)
 		{
-			mtxSRT(columnMtx[ii]
+			bx::mtxSRT(columnMtx[ii]
 				, 1.0f
 				, 1.0f
 				, 1.0f
@@ -1163,7 +1163,7 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 		float cubeMtx[numCubes][16];
 		for (uint16_t ii = 0; ii < numCubes; ++ii)
 		{
-			mtxSRT(cubeMtx[ii]
+			bx::mtxSRT(cubeMtx[ii]
 				, 1.0f
 				, 1.0f
 				, 1.0f
@@ -1217,14 +1217,14 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 				float reflectedLights[MAX_NUM_LIGHTS][4];
 				for (uint8_t ii = 0; ii < numLights; ++ii)
 				{
-					vec3MulMtx(reflectedLights[ii], lightPosRadius[ii], reflectMtx);
+					bx::vec3MulMtx(reflectedLights[ii], lightPosRadius[ii], reflectMtx);
 					reflectedLights[ii][3] = lightPosRadius[ii][3];
 				}
 				memcpy(s_uniforms.m_lightPosRadius, reflectedLights, numLights * 4*sizeof(float));
 
 				// Reflect and submit bunny.
 				float mtxReflectedBunny[16];
-				mtxMul(mtxReflectedBunny, bunnyMtx, reflectMtx);
+				bx::mtxMul(mtxReflectedBunny, bunnyMtx, reflectMtx);
 				bunnyMesh.submit(RENDER_VIEWID_RANGE1_PASS_1
 					, mtxReflectedBunny
 					, programColorLightning
@@ -1235,7 +1235,7 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 				float mtxReflectedColumn[16];
 				for (uint8_t ii = 0; ii < 4; ++ii)
 				{
-					mtxMul(mtxReflectedColumn, columnMtx[ii], reflectMtx);
+					bx::mtxMul(mtxReflectedColumn, columnMtx[ii], reflectMtx);
 					columnMesh.submit(RENDER_VIEWID_RANGE1_PASS_1
 						, mtxReflectedColumn
 						, programColorLightning
@@ -1314,7 +1314,7 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 				float plane_pos[3] = { 0.0f, 0.0f, 0.0f };
 				float normal[3] = { 0.0f, 1.0f, 0.0f };
 				memcpy(ground, normal, sizeof(float) * 3);
-				ground[3] = -vec3Dot(plane_pos, normal) - 0.01f; // - 0.01 against z-fighting
+				ground[3] = -bx::vec3Dot(plane_pos, normal) - 0.01f; // - 0.01 against z-fighting
 
 				for (uint8_t ii = 0, viewId = RENDER_VIEWID_RANGE5_PASS_6; ii < numLights; ++ii, ++viewId)
 				{
@@ -1335,7 +1335,7 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 
 					// Submit bunny's shadow.
 					float mtxShadowedBunny[16];
-					mtxMul(mtxShadowedBunny, bunnyMtx, shadowMtx);
+					bx::mtxMul(mtxShadowedBunny, bunnyMtx, shadowMtx);
 					bunnyMesh.submit(viewId
 						, mtxShadowedBunny
 						, programColorBlack
@@ -1346,7 +1346,7 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 					float mtxShadowedCube[16];
 					for (uint8_t jj = 0; jj < numCubes; ++jj)
 					{
-						mtxMul(mtxShadowedCube, cubeMtx[jj], shadowMtx);
+						bx::mtxMul(mtxShadowedCube, cubeMtx[jj], shadowMtx);
 						cubeMesh.submit(viewId
 							, mtxShadowedCube
 							, programColorBlack
@@ -1414,7 +1414,7 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 
 		// Draw floor bottom.
 		float floorBottomMtx[16];
-		mtxSRT(floorBottomMtx
+		bx::mtxSRT(floorBottomMtx
 			, 20.0f  //scaleX
 			, 20.0f  //scaleY
 			, 20.0f  //scaleZ
diff --git a/examples/14-shadowvolumes/shadowvolumes.cpp b/examples/14-shadowvolumes/shadowvolumes.cpp
index 1eee83c3..6c49dfd9 100644
--- a/examples/14-shadowvolumes/shadowvolumes.cpp
+++ b/examples/14-shadowvolumes/shadowvolumes.cpp
@@ -22,9 +22,9 @@ using namespace std::tr1;
 #include <bx/allocator.h>
 #include <bx/hash.h>
 #include <bx/float4_t.h>
+#include <bx/fpumath.h>
 #include "entry/entry.h"
 #include "camera.h"
-#include "fpumath.h"
 #include "imgui/imgui.h"
 
 #define SV_USE_SIMD 1
@@ -217,10 +217,10 @@ void planeNormal(float* __restrict _result
 	vec1[1] = _v2[1] - _v1[1];
 	vec1[2] = _v2[2] - _v1[2];
 
-	vec3Cross(cross, vec0, vec1);
-	vec3Norm(_result, cross);
+	bx::vec3Cross(cross, vec0, vec1);
+	bx::vec3Norm(_result, cross);
 
-	_result[3] = -vec3Dot(_result, _v0);
+	_result[3] = -bx::vec3Dot(_result, _v0);
 }
 
 struct Uniforms
@@ -1248,7 +1248,7 @@ struct Instance
 		memcpy(s_uniforms.m_color, m_color, 3*sizeof(float) );
 
 		float mtx[16];
-		mtxSRT(mtx
+		bx::mtxSRT(mtx
 			, m_scale[0]
 			, m_scale[1]
 			, m_scale[2]
@@ -1360,34 +1360,34 @@ void shadowVolumeLightTransform(float* __restrict _outLightPos
 	 */
 
 	float pivot[16];
-	mtxTranslate(pivot
+	bx::mtxTranslate(pivot
 		, _lightPos[0] - _translate[0]
 		, _lightPos[1] - _translate[1]
 		, _lightPos[2] - _translate[2]
 		);
 
 	float mzyx[16];
-	mtxRotateZYX(mzyx
+	bx::mtxRotateZYX(mzyx
 		, -_rotate[0]
 		, -_rotate[1]
 		, -_rotate[2]
 		);
 
 	float invScale[16];
-	mtxScale(invScale
+	bx::mtxScale(invScale
 		, 1.0f / _scale[0]
 		, 1.0f / _scale[1]
 		, 1.0f / _scale[2]
 		);
 
 	float tmp0[16];
-	mtxMul(tmp0, pivot, mzyx);
+	bx::mtxMul(tmp0, pivot, mzyx);
 
 	float mtx[16];
-	mtxMul(mtx, tmp0, invScale);
+	bx::mtxMul(mtx, tmp0, invScale);
 
 	float origin[3] = { 0.0f, 0.0f, 0.0f };
-	vec3MulMtx(_outLightPos, origin, mtx);
+	bx::vec3MulMtx(_outLightPos, origin, mtx);
 }
 
 void shadowVolumeCreate(ShadowVolume& _shadowVolume
@@ -1452,7 +1452,7 @@ void shadowVolumeCreate(ShadowVolume& _shadowVolume
 			const Face& face = *iter;
 
 			bool frontFacing = false;
-			float f = vec3Dot(face.m_plane, _light) + face.m_plane[3];
+			float f = bx::vec3Dot(face.m_plane, _light) + face.m_plane[3];
 			if (f > 0.0f)
 			{
 				frontFacing = true;
@@ -1759,16 +1759,16 @@ void createNearClipVolume(float* __restrict _outPlanes24f
 
 	float mtxViewInv[16];
 	float mtxViewTrans[16];
-	mtxInverse(mtxViewInv, _view);
-	mtxTranspose(mtxViewTrans, _view);
+	bx::mtxInverse(mtxViewInv, _view);
+	bx::mtxTranspose(mtxViewTrans, _view);
 
 	float lightPosV[4];
-	vec4MulMtx(lightPosV, _lightPos, _view);
+	bx::vec4MulMtx(lightPosV, _lightPos, _view);
 
 	const float delta = 0.1f;
 
 	float nearNormal[4] = { 0.0f, 0.0f, 1.0f, _near };
-	float d = vec3Dot(lightPosV, nearNormal) + lightPosV[3] * nearNormal[3];
+	float d = bx::vec3Dot(lightPosV, nearNormal) + lightPosV[3] * nearNormal[3];
 
 	// Light is:
 	//  1.0f - in front of near plane
@@ -1790,10 +1790,10 @@ void createNearClipVolume(float* __restrict _outPlanes24f
 	};
 
 	float corners[4][3];
-	vec3MulMtx(corners[0], cornersV[0], mtxViewInv);
-	vec3MulMtx(corners[1], cornersV[1], mtxViewInv);
-	vec3MulMtx(corners[2], cornersV[2], mtxViewInv);
-	vec3MulMtx(corners[3], cornersV[3], mtxViewInv);
+	bx::vec3MulMtx(corners[0], cornersV[0], mtxViewInv);
+	bx::vec3MulMtx(corners[1], cornersV[1], mtxViewInv);
+	bx::vec3MulMtx(corners[2], cornersV[2], mtxViewInv);
+	bx::vec3MulMtx(corners[3], cornersV[3], mtxViewInv);
 
 	float planeNormals[4][3];
 	for (uint8_t ii = 0; ii < 4; ++ii)
@@ -1802,25 +1802,25 @@ void createNearClipVolume(float* __restrict _outPlanes24f
 		float* plane = volumePlanes[ii];
 
 		float planeVec[3];
-		vec3Sub(planeVec, corners[ii], corners[(ii-1)%4]);
+		bx::vec3Sub(planeVec, corners[ii], corners[(ii-1)%4]);
 
 		float light[3];
 		float tmp[3];
-		vec3Mul(tmp, corners[ii], _lightPos[3]);
-		vec3Sub(light, _lightPos, tmp);
+		bx::vec3Mul(tmp, corners[ii], _lightPos[3]);
+		bx::vec3Sub(light, _lightPos, tmp);
 
-		vec3Cross(normal, planeVec, light);
+		bx::vec3Cross(normal, planeVec, light);
 
 		normal[0] *= lightSide;
 		normal[1] *= lightSide;
 		normal[2] *= lightSide;
 
-		float lenInv = 1.0f / sqrtf(vec3Dot(normal, normal) );
+		float lenInv = 1.0f / sqrtf(bx::vec3Dot(normal, normal) );
 
 		plane[0] = normal[0] * lenInv;
 		plane[1] = normal[1] * lenInv;
 		plane[2] = normal[2] * lenInv;
-		plane[3] = -vec3Dot(normal, corners[ii]) * lenInv;
+		plane[3] = -bx::vec3Dot(normal, corners[ii]) * lenInv;
 	}
 
 	float nearPlaneV[4] =
@@ -1830,26 +1830,26 @@ void createNearClipVolume(float* __restrict _outPlanes24f
 		1.0f * lightSide,
 		_near * lightSide,
 	};
-	vec4MulMtx(volumePlanes[4], nearPlaneV, mtxViewTrans);
+	bx::vec4MulMtx(volumePlanes[4], nearPlaneV, mtxViewTrans);
 
 	float* lightPlane = volumePlanes[5];
 	float lightPlaneNormal[3] = { 0.0f, 0.0f, -_near * lightSide };
 	float tmp[3];
-	vec3MulMtx(tmp, lightPlaneNormal, mtxViewInv);
-	vec3Sub(lightPlaneNormal, tmp, _lightPos);
+	bx::vec3MulMtx(tmp, lightPlaneNormal, mtxViewInv);
+	bx::vec3Sub(lightPlaneNormal, tmp, _lightPos);
 
-	float lenInv = 1.0f / sqrtf(vec3Dot(lightPlaneNormal, lightPlaneNormal) );
+	float lenInv = 1.0f / sqrtf(bx::vec3Dot(lightPlaneNormal, lightPlaneNormal) );
 
 	lightPlane[0] = lightPlaneNormal[0] * lenInv;
 	lightPlane[1] = lightPlaneNormal[1] * lenInv;
 	lightPlane[2] = lightPlaneNormal[2] * lenInv;
-	lightPlane[3] = -vec3Dot(lightPlaneNormal, _lightPos) * lenInv;
+	lightPlane[3] = -bx::vec3Dot(lightPlaneNormal, _lightPos) * lenInv;
 }
 
 bool clipTest(const float* _planes, uint8_t _planeNum, const Mesh& _mesh, const float* _scale, const float* _translate)
 {
 	float (*volumePlanes)[4] = (float(*)[4])_planes;
-	float scale = fmaxf(fmaxf(_scale[0], _scale[1]), _scale[2]);
+	float scale = bx::fmax(bx::fmax(_scale[0], _scale[1]), _scale[2]);
 
 	const GroupArray& groups = _mesh.m_groups;
 	for (GroupArray::const_iterator it = groups.begin(), itEnd = groups.end(); it != itEnd; ++it)
@@ -1867,7 +1867,7 @@ bool clipTest(const float* _planes, uint8_t _planeNum, const Mesh& _mesh, const
 		{
 			const float* plane = volumePlanes[ii];
 
-			float positiveSide = vec3Dot(plane, sphere.m_center) + plane[3] + sphere.m_radius;
+			float positiveSide = bx::vec3Dot(plane, sphere.m_center) + plane[3] + sphere.m_radius;
 
 			if (positiveSide < 0.0f)
 			{
@@ -2134,7 +2134,7 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 	const float aspect = float(viewState.m_width)/float(viewState.m_height);
 	const float nearPlane = 1.0f;
 	const float farPlane = 1000.0f;
-	mtxProj(viewState.m_proj, fov, aspect, nearPlane, farPlane);
+	bx::mtxProj(viewState.m_proj, fov, aspect, nearPlane, farPlane);
 
 	float initialPos[3] = { 3.0f, 20.0f, -58.0f };
 	cameraCreate();
@@ -2705,7 +2705,7 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 
 				// Compute transform for shadow volume.
 				float shadowVolumeMtx[16];
-				mtxSRT(shadowVolumeMtx
+				bx::mtxSRT(shadowVolumeMtx
 						, instance.m_scale[0]
 						, instance.m_scale[1]
 						, instance.m_scale[2]
diff --git a/examples/15-shadowmaps-simple/shadowmaps_simple.cpp b/examples/15-shadowmaps-simple/shadowmaps_simple.cpp
index 15429c39..b63e281a 100644
--- a/examples/15-shadowmaps-simple/shadowmaps_simple.cpp
+++ b/examples/15-shadowmaps-simple/shadowmaps_simple.cpp
@@ -12,8 +12,8 @@
 #include <bgfx.h>
 #include <bx/timer.h>
 #include <bx/readerwriter.h>
+#include <bx/fpumath.h>
 #include "entry/entry.h"
-#include "fpumath.h"
 
 #define RENDER_SHADOW_PASS_ID 0
 #define RENDER_SHADOW_PASS_BIT (1<<RENDER_SHADOW_PASS_ID)
@@ -484,10 +484,10 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 
 	const float eye[3] = { 0.0f, 30.0f, -60.0f };
 	const float at[3]  = { 0.0f, 5.0f, 0.0f };
-	mtxLookAt(view, eye, at);
+	bx::mtxLookAt(view, eye, at);
 
 	const float aspect = float(int32_t(width) ) / float(int32_t(height) );
-	mtxProj(proj, 60.0f, aspect, 0.1f, 1000.0f);
+	bx::mtxProj(proj, 60.0f, aspect, 0.1f, 1000.0f);
 
 	// Time acumulators.
 	float timeAccumulatorLight = 0.0f;
@@ -526,28 +526,28 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 
 		// Setup instance matrices.
 		float mtxFloor[16];
-		mtxSRT(mtxFloor
+		bx::mtxSRT(mtxFloor
 			, 30.0f, 30.0f, 30.0f
 			, 0.0f, 0.0f, 0.0f
 			, 0.0f, 0.0f, 0.0f
 			);
 
 		float mtxBunny[16];
-		mtxSRT(mtxBunny
+		bx::mtxSRT(mtxBunny
 			, 5.0f, 5.0f, 5.0f
 			, 0.0f, float(M_PI) - timeAccumulatorScene, 0.0f
 			, 15.0f, 5.0f, 0.0f
 			);
 
 		float mtxHollowcube[16];
-		mtxSRT(mtxHollowcube
+		bx::mtxSRT(mtxHollowcube
 			, 2.5f, 2.5f, 2.5f
 			, 0.0f, 1.56f - timeAccumulatorScene, 0.0f
 			, 0.0f, 10.0f, 0.0f
 			);
 
 		float mtxCube[16];
-		mtxSRT(mtxCube
+		bx::mtxSRT(mtxCube
 			, 2.5f, 2.5f, 2.5f
 			, 0.0f, 1.56f - timeAccumulatorScene, 0.0f
 			, -15.0f, 5.0f, 0.0f
@@ -564,10 +564,10 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 			-lightPos[2],
 		};
 		const float at[3] = { 0.0f, 0.0f, 0.0f };
-		mtxLookAt(lightView, eye, at);
+		bx::mtxLookAt(lightView, eye, at);
 
 		const float area = 30.0f;
-		mtxOrtho(lightProj, -area, area, -area, area, -100.0f, 100.0f);
+		bx::mtxOrtho(lightProj, -area, area, -area, area, -100.0f, 100.0f);
 
 		bgfx::setViewRect(RENDER_SHADOW_PASS_ID, 0, 0, shadowMapSize, shadowMapSize);
 		bgfx::setViewFrameBuffer(RENDER_SHADOW_PASS_ID, s_shadowMapFB);
@@ -596,29 +596,29 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 		};
 
 		float mtxTmp[16];
-		mtxMul(mtxTmp, lightProj, mtxCrop);
-		mtxMul(mtxShadow, lightView, mtxTmp);
+		bx::mtxMul(mtxTmp, lightProj, mtxCrop);
+		bx::mtxMul(mtxShadow, lightView, mtxTmp);
 
 		// Floor.
-		mtxMul(lightMtx, mtxFloor, mtxShadow);
+		bx::mtxMul(lightMtx, mtxFloor, mtxShadow);
 		bgfx::setUniform(u_lightMtx, lightMtx);
 		hplaneMesh.submit(RENDER_SCENE_PASS_ID, mtxFloor, progMesh);
 		hplaneMesh.submitShadow(RENDER_SHADOW_PASS_ID, mtxFloor, progShadow);
 
 		// Bunny.
-		mtxMul(lightMtx, mtxBunny, mtxShadow);
+		bx::mtxMul(lightMtx, mtxBunny, mtxShadow);
 		bgfx::setUniform(u_lightMtx, lightMtx);
 		bunnyMesh.submit(RENDER_SCENE_PASS_ID, mtxBunny, progMesh);
 		bunnyMesh.submitShadow(RENDER_SHADOW_PASS_ID, mtxBunny, progShadow);
 
 		// Hollow cube.
-		mtxMul(lightMtx, mtxHollowcube, mtxShadow);
+		bx::mtxMul(lightMtx, mtxHollowcube, mtxShadow);
 		bgfx::setUniform(u_lightMtx, lightMtx);
 		hollowcubeMesh.submit(RENDER_SCENE_PASS_ID, mtxHollowcube, progMesh);
 		hollowcubeMesh.submitShadow(RENDER_SHADOW_PASS_ID, mtxHollowcube, progShadow);
 
 		// Cube.
-		mtxMul(lightMtx, mtxCube, mtxShadow);
+		bx::mtxMul(lightMtx, mtxCube, mtxShadow);
 		bgfx::setUniform(u_lightMtx, lightMtx);
 		cubeMesh.submit(RENDER_SCENE_PASS_ID, mtxCube, progMesh);
 		cubeMesh.submitShadow(RENDER_SHADOW_PASS_ID, mtxCube, progShadow);
@@ -626,7 +626,6 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 		// Advance to next frame. Rendering thread will be kicked to
 		// process submitted rendering primitives.
 		bgfx::frame();
-
 	}
 
 	bunnyMesh.unload();
diff --git a/examples/16-shadowmaps/shadowmaps.cpp b/examples/16-shadowmaps/shadowmaps.cpp
index ae373853..be94a9da 100644
--- a/examples/16-shadowmaps/shadowmaps.cpp
+++ b/examples/16-shadowmaps/shadowmaps.cpp
@@ -12,9 +12,9 @@
 #include <bgfx.h>
 #include <bx/timer.h>
 #include <bx/readerwriter.h>
+#include <bx/fpumath.h>
 #include "entry/entry.h"
 #include "camera.h"
-#include "fpumath.h"
 #include "imgui/imgui.h"
 
 #define RENDERVIEW_SHADOWMAP_0_ID 1
@@ -479,7 +479,7 @@ struct Light
 
 	void computeViewSpaceComponents(float* _viewMtx)
 	{
-		vec4MulMtx(m_position_viewSpace, m_position.m_v, _viewMtx);
+		bx::vec4MulMtx(m_position_viewSpace, m_position.m_v, _viewMtx);
 
 		float tmp[] =
 		{
@@ -488,7 +488,7 @@ struct Light
 			, m_spotDirectionInner.m_z
 			, 0.0f
 		};
-		vec4MulMtx(m_spotDirectionInner_viewSpace, tmp, _viewMtx);
+		bx::vec4MulMtx(m_spotDirectionInner_viewSpace, tmp, _viewMtx);
 		m_spotDirectionInner_viewSpace[3] = m_spotDirectionInner.m_v[3];
 	}
 
@@ -1235,7 +1235,7 @@ void worldSpaceFrustumCorners(float* _corners24f
 	float (*out)[3] = (float(*)[3])_corners24f;
 	for (uint8_t ii = 0; ii < numCorners; ++ii)
 	{
-		vec3MulMtx( (float*)&out[ii], (float*)&corners[ii], _invViewMtx);
+		bx::vec3MulMtx( (float*)&out[ii], (float*)&corners[ii], _invViewMtx);
 	}
 }
 
@@ -2074,7 +2074,7 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 	const float camFar     = 2000.0f;
 	const float projHeight = 1.0f/tanf(camFovy*( (float)M_PI/180.0f)*0.5f);
 	const float projWidth  = projHeight * 1.0f/camAspect;
-	mtxProj(viewState.m_proj, camFovy, camAspect, camNear, camFar);
+	bx::mtxProj(viewState.m_proj, camFovy, camAspect, camNear, camFar);
 	cameraGetViewMtx(viewState.m_view);
 
 	float timeAccumulatorLight = 0.0f;
@@ -2293,7 +2293,7 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 		// Setup instance matrices.
 		float mtxFloor[16];
 		const float floorScale = 550.0f;
-		mtxSRT(mtxFloor
+		bx::mtxSRT(mtxFloor
 			, floorScale //scaleX
 			, floorScale //scaleY
 			, floorScale //scaleZ
@@ -2306,7 +2306,7 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 			);
 
 		float mtxBunny[16];
-		mtxSRT(mtxBunny
+		bx::mtxSRT(mtxBunny
 			, 5.0f
 			, 5.0f
 			, 5.0f
@@ -2319,7 +2319,7 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 			);
 
 		float mtxHollowcube[16];
-		mtxSRT(mtxHollowcube
+		bx::mtxSRT(mtxHollowcube
 			, 2.5f
 			, 2.5f
 			, 2.5f
@@ -2332,7 +2332,7 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 			);
 
 		float mtxCube[16];
-		mtxSRT(mtxCube
+		bx::mtxSRT(mtxCube
 			, 2.5f
 			, 2.5f
 			, 2.5f
@@ -2348,17 +2348,17 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 		float mtxTrees[numTrees][16];
 		for (uint8_t ii = 0; ii < numTrees; ++ii)
 		{
-			mtxSRT(mtxTrees[ii]
-					, 2.0f
-					, 2.0f
-					, 2.0f
-					, 0.0f
-					, float(ii)
-					, 0.0f
-					, sin(float(ii)*2.0f*float(M_PI)/float(numTrees) ) * 60.0f
-					, 0.0f
-					, cos(float(ii)*2.0f*float(M_PI)/float(numTrees) ) * 60.0f
-					);
+			bx::mtxSRT(mtxTrees[ii]
+				, 2.0f
+				, 2.0f
+				, 2.0f
+				, 0.0f
+				, float(ii)
+				, 0.0f
+				, sinf(float(ii)*2.0f*float(M_PI)/float(numTrees) ) * 60.0f
+				, 0.0f
+				, cosf(float(ii)*2.0f*float(M_PI)/float(numTrees) ) * 60.0f
+				);
 		}
 
 		// Compute transform matrices.
@@ -2369,14 +2369,14 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 
 		float screenProj[16];
 		float screenView[16];
-		mtxIdentity(screenView);
-		mtxOrtho(screenProj, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f, 100.0f);
+		bx::mtxIdentity(screenView);
+		bx::mtxOrtho(screenProj, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f, 100.0f);
 
 	    if (LightType::SpotLight == settings.m_lightType)
 		{
 			const float fovy = settings.m_coverageSpotL;
 			const float aspect = 1.0f;
-			mtxProj(lightProj[ProjType::Horizontal], fovy, aspect, currentSmSettings->m_near, currentSmSettings->m_far);
+			bx::mtxProj(lightProj[ProjType::Horizontal], fovy, aspect, currentSmSettings->m_near, currentSmSettings->m_far);
 
 			//For linear depth, prevent depth division by variable w-component in shaders and divide here by far plane
 			if (DepthImpl::Linear == settings.m_depthImpl)
@@ -2386,17 +2386,17 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 			}
 
 			float at[3];
-			vec3Add(at, pointLight.m_position.m_v, pointLight.m_spotDirectionInner.m_v);
-			mtxLookAt(lightView[TetrahedronFaces::Green], pointLight.m_position.m_v, at);
+			bx::vec3Add(at, pointLight.m_position.m_v, pointLight.m_spotDirectionInner.m_v);
+			bx::mtxLookAt(lightView[TetrahedronFaces::Green], pointLight.m_position.m_v, at);
 		}
 		else if (LightType::PointLight == settings.m_lightType)
 		{
 			float ypr[TetrahedronFaces::Count][3] =
 			{
-				 { toRad(  0.0f), toRad( 27.36780516f), toRad(0.0f) }
-				,{ toRad(180.0f), toRad( 27.36780516f), toRad(0.0f) }
-				,{ toRad(-90.0f), toRad(-27.36780516f), toRad(0.0f) }
-				,{ toRad( 90.0f), toRad(-27.36780516f), toRad(0.0f) }
+				{ bx::toRad(  0.0f), bx::toRad( 27.36780516f), bx::toRad(0.0f) },
+				{ bx::toRad(180.0f), bx::toRad( 27.36780516f), bx::toRad(0.0f) },
+				{ bx::toRad(-90.0f), bx::toRad(-27.36780516f), bx::toRad(0.0f) },
+				{ bx::toRad( 90.0f), bx::toRad(-27.36780516f), bx::toRad(0.0f) },
 			};
 
 
@@ -2404,9 +2404,9 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 			{
 				const float fovx = 143.98570868f + 3.51f + settings.m_fovXAdjust;
 				const float fovy = 125.26438968f + 9.85f + settings.m_fovYAdjust;
-				const float aspect = tanf(toRad(fovx*0.5f) )/tanf(toRad(fovy*0.5f) );
+				const float aspect = tanf(bx::toRad(fovx*0.5f) )/tanf(bx::toRad(fovy*0.5f) );
 
-				mtxProj(lightProj[ProjType::Vertical]
+				bx::mtxProj(lightProj[ProjType::Vertical]
 						, fovx
 						, aspect
 						, currentSmSettings->m_near
@@ -2420,17 +2420,17 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 					lightProj[ProjType::Vertical][14] /= currentSmSettings->m_far;
 				}
 
-				ypr[TetrahedronFaces::Green][2]  = toRad(180.0f);
-				ypr[TetrahedronFaces::Yellow][2] = toRad(  0.0f);
-				ypr[TetrahedronFaces::Blue][2]   = toRad( 90.0f);
-				ypr[TetrahedronFaces::Red][2]    = toRad(-90.0f);
+				ypr[TetrahedronFaces::Green ][2] = bx::toRad(180.0f);
+				ypr[TetrahedronFaces::Yellow][2] = bx::toRad(  0.0f);
+				ypr[TetrahedronFaces::Blue  ][2] = bx::toRad( 90.0f);
+				ypr[TetrahedronFaces::Red   ][2] = bx::toRad(-90.0f);
 			}
 
 			const float fovx = 143.98570868f + 7.8f + settings.m_fovXAdjust;
 			const float fovy = 125.26438968f + 3.0f + settings.m_fovYAdjust;
-			const float aspect = tanf(toRad(fovx*0.5f) )/tanf(toRad(fovy*0.5f) );
+			const float aspect = tanf(bx::toRad(fovx*0.5f) )/tanf(bx::toRad(fovy*0.5f) );
 
-			mtxProj(lightProj[ProjType::Horizontal], fovy, aspect, currentSmSettings->m_near, currentSmSettings->m_far);
+			bx::mtxProj(lightProj[ProjType::Horizontal], fovy, aspect, currentSmSettings->m_near, currentSmSettings->m_far);
 
 			//For linear depth, prevent depth division by variable w component in shaders and divide here by far plane
 			if (DepthImpl::Linear == settings.m_depthImpl)
@@ -2447,12 +2447,12 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 
 				float tmp[3] =
 				{
-					  -vec3Dot(pointLight.m_position.m_v, &mtxTmp[0])
-					, -vec3Dot(pointLight.m_position.m_v, &mtxTmp[4])
-					, -vec3Dot(pointLight.m_position.m_v, &mtxTmp[8])
+					-bx::vec3Dot(pointLight.m_position.m_v, &mtxTmp[0]),
+					-bx::vec3Dot(pointLight.m_position.m_v, &mtxTmp[4]),
+					-bx::vec3Dot(pointLight.m_position.m_v, &mtxTmp[8]),
 				};
 
-				mtxTranspose(mtxYpr[ii], mtxTmp);
+				bx::mtxTranspose(mtxYpr[ii], mtxTmp);
 
 				memcpy(lightView[ii], mtxYpr[ii], 12*sizeof(float) );
 				lightView[ii][12] = tmp[0];
@@ -2471,11 +2471,11 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 				, -directionalLight.m_position.m_z
 			};
 			float at[3] = { 0.0f, 0.0f, 0.0f };
-			mtxLookAt(lightView[0], eye, at);
+			bx::mtxLookAt(lightView[0], eye, at);
 
 			// Compute camera inverse view mtx.
 			float mtxViewInv[16];
-			mtxInverse(mtxViewInv, viewState.m_view);
+			bx::mtxInverse(mtxViewInv, viewState.m_view);
 
 			// Compute split distances.
 			const uint8_t maxNumSplits = 4;
@@ -2492,7 +2492,7 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 			}
 
 			float mtxProj[16];
-			mtxOrtho(mtxProj, 1.0f, -1.0f, 1.0f, -1.0f, -currentSmSettings->m_far, currentSmSettings->m_far);
+			bx::mtxOrtho(mtxProj, 1.0f, -1.0f, 1.0f, -1.0f, -currentSmSettings->m_far, currentSmSettings->m_far);
 
 			const uint8_t numCorners = 8;
 			float frustumCorners[maxNumSplits][numCorners][3];
@@ -2508,21 +2508,21 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 				{
 					// Transform to light space.
 					float lightSpaceFrustumCorner[3];
-					vec3MulMtx(lightSpaceFrustumCorner, frustumCorners[ii][jj], lightView[0]);
+					bx::vec3MulMtx(lightSpaceFrustumCorner, frustumCorners[ii][jj], lightView[0]);
 
 					// Update bounding box.
-					min[0] = fminf(min[0], lightSpaceFrustumCorner[0]);
-					max[0] = fmaxf(max[0], lightSpaceFrustumCorner[0]);
-					min[1] = fminf(min[1], lightSpaceFrustumCorner[1]);
-					max[1] = fmaxf(max[1], lightSpaceFrustumCorner[1]);
-					min[2] = fminf(min[2], lightSpaceFrustumCorner[2]);
-					max[2] = fmaxf(max[2], lightSpaceFrustumCorner[2]);
+					min[0] = bx::fmin(min[0], lightSpaceFrustumCorner[0]);
+					max[0] = bx::fmax(max[0], lightSpaceFrustumCorner[0]);
+					min[1] = bx::fmin(min[1], lightSpaceFrustumCorner[1]);
+					max[1] = bx::fmax(max[1], lightSpaceFrustumCorner[1]);
+					min[2] = bx::fmin(min[2], lightSpaceFrustumCorner[2]);
+					max[2] = bx::fmax(max[2], lightSpaceFrustumCorner[2]);
 				}
 
 				float minproj[3];
 				float maxproj[3];
-				vec3MulMtxH(minproj, min, mtxProj);
-				vec3MulMtxH(maxproj, max, mtxProj);
+				bx::vec3MulMtxH(minproj, min, mtxProj);
+				bx::vec3MulMtxH(maxproj, max, mtxProj);
 
 				float offsetx, offsety;
 				float scalex, scaley;
@@ -2548,13 +2548,13 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 				}
 
 				float mtxCrop[16];
-				mtxIdentity(mtxCrop);
+				bx::mtxIdentity(mtxCrop);
 				mtxCrop[ 0] = scalex;
 				mtxCrop[ 5] = scaley;
 				mtxCrop[12] = offsetx;
 				mtxCrop[13] = offsety;
 
-				mtxMul(lightProj[ii], mtxCrop, mtxProj);
+				bx::mtxMul(lightProj[ii], mtxCrop, mtxProj);
 			}
 		}
 
@@ -2976,8 +2976,8 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 			if (LightType::SpotLight == settings.m_lightType)
 			{
 				float mtxTmp[16];
-				mtxMul(mtxTmp, lightProj[ProjType::Horizontal], mtxBias);
-				mtxMul(mtxShadow, lightView[0], mtxTmp); //lightViewProjBias
+				bx::mtxMul(mtxTmp, lightProj[ProjType::Horizontal], mtxBias);
+				bx::mtxMul(mtxShadow, lightView[0], mtxTmp); //lightViewProjBias
 			}
 			else if (LightType::PointLight == settings.m_lightType)
 			{
@@ -3061,11 +3061,11 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 					uint8_t biasIndex = cropBiasIndices[settings.m_stencilPack][uint8_t(s_flipV)][ii];
 
 					float mtxTmp[16];
-					mtxMul(mtxTmp, mtxYpr[ii], lightProj[projType]);
-					mtxMul(shadowMapMtx[ii], mtxTmp, mtxCropBias[settings.m_stencilPack][biasIndex]); //mtxYprProjBias
+					bx::mtxMul(mtxTmp, mtxYpr[ii], lightProj[projType]);
+					bx::mtxMul(shadowMapMtx[ii], mtxTmp, mtxCropBias[settings.m_stencilPack][biasIndex]); //mtxYprProjBias
 				}
 
-				mtxTranslate(mtxShadow //lightInvTranslate
+				bx::mtxTranslate(mtxShadow //lightInvTranslate
 						, -pointLight.m_position.m_v[0]
 						, -pointLight.m_position.m_v[1]
 						, -pointLight.m_position.m_v[2]
@@ -3077,15 +3077,15 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 				{
 					float mtxTmp[16];
 
-					mtxMul(mtxTmp, lightProj[ii], mtxBias);
-					mtxMul(shadowMapMtx[ii], lightView[0], mtxTmp); //lViewProjCropBias
+					bx::mtxMul(mtxTmp, lightProj[ii], mtxBias);
+					bx::mtxMul(shadowMapMtx[ii], lightView[0], mtxTmp); //lViewProjCropBias
 				}
 			}
 
 			// Floor.
 			if (LightType::DirectionalLight != settings.m_lightType)
 			{
-				mtxMul(lightMtx, mtxFloor, mtxShadow); //not needed for directional light
+				bx::mtxMul(lightMtx, mtxFloor, mtxShadow); //not needed for directional light
 			}
 			hplaneMesh.submit(RENDERVIEW_DRAWSCENE_0_ID
 					, mtxFloor
@@ -3096,7 +3096,7 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 			// Bunny.
 			if (LightType::DirectionalLight != settings.m_lightType)
 			{
-				mtxMul(lightMtx, mtxBunny, mtxShadow);
+				bx::mtxMul(lightMtx, mtxBunny, mtxShadow);
 			}
 			bunnyMesh.submit(RENDERVIEW_DRAWSCENE_0_ID
 					, mtxBunny
@@ -3107,7 +3107,7 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 			// Hollow cube.
 			if (LightType::DirectionalLight != settings.m_lightType)
 			{
-				mtxMul(lightMtx, mtxHollowcube, mtxShadow);
+				bx::mtxMul(lightMtx, mtxHollowcube, mtxShadow);
 			}
 			hollowcubeMesh.submit(RENDERVIEW_DRAWSCENE_0_ID
 					, mtxHollowcube
@@ -3118,7 +3118,7 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 			// Cube.
 			if (LightType::DirectionalLight != settings.m_lightType)
 			{
-				mtxMul(lightMtx, mtxCube, mtxShadow);
+				bx::mtxMul(lightMtx, mtxCube, mtxShadow);
 			}
 			cubeMesh.submit(RENDERVIEW_DRAWSCENE_0_ID
 					, mtxCube
@@ -3131,7 +3131,7 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 			{
 				if (LightType::DirectionalLight != settings.m_lightType)
 				{
-					mtxMul(lightMtx, mtxTrees[ii], mtxShadow);
+					bx::mtxMul(lightMtx, mtxTrees[ii], mtxShadow);
 				}
 				treeMesh.submit(RENDERVIEW_DRAWSCENE_0_ID
 						, mtxTrees[ii]
@@ -3156,7 +3156,7 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 
 			// Draw floor bottom.
 			float floorBottomMtx[16];
-			mtxSRT(floorBottomMtx
+			bx::mtxSRT(floorBottomMtx
 					, floorScale //scaleX
 					, floorScale //scaleY
 					, floorScale //scaleZ
diff --git a/examples/17-drawstress/drawstress.cpp b/examples/17-drawstress/drawstress.cpp
index 7032cd07..cf4a9052 100644
--- a/examples/17-drawstress/drawstress.cpp
+++ b/examples/17-drawstress/drawstress.cpp
@@ -174,8 +174,8 @@ bool mainloop()
 
 		float view[16];
 		float proj[16];
-		mtxLookAt(view, eye, at);
-		mtxProj(proj, 60.0f, float(width)/float(height), 0.1f, 100.0f);
+		bx::mtxLookAt(view, eye, at);
+		bx::mtxProj(proj, 60.0f, float(width)/float(height), 0.1f, 100.0f);
 
 		// Set view and projection matrix for view 0.
 		bgfx::setViewTransform(0, view, proj);
@@ -195,7 +195,7 @@ bool mainloop()
 
 		float mtxS[16];
 		const float scale = 0 == transform ? 0.25f : 0.0f;
-		mtxScale(mtxS, scale, scale, scale);
+		bx::mtxScale(mtxS, scale, scale, scale);
 
 		const float step = 0.6f;
 		float pos[3];
@@ -210,10 +210,10 @@ bool mainloop()
 				for (uint32_t xx = 0; xx < uint32_t(dim); ++xx)
 				{
 					float mtxR[16];
-					mtxRotateXYZ(mtxR, time + xx*0.21f, time + yy*0.37f, time + yy*0.13f);
+					bx::mtxRotateXYZ(mtxR, time + xx*0.21f, time + yy*0.37f, time + yy*0.13f);
 
 					float mtx[16];
-					mtxMul(mtx, mtxS, mtxR);
+					bx::mtxMul(mtx, mtxS, mtxR);
 
 					mtx[12] = pos[0] + float(xx)*step;
 					mtx[13] = pos[1] + float(yy)*step;
diff --git a/examples/18-ibl/ibl.cpp b/examples/18-ibl/ibl.cpp
index 92edc944..144f07e1 100644
--- a/examples/18-ibl/ibl.cpp
+++ b/examples/18-ibl/ibl.cpp
@@ -20,7 +20,7 @@ struct Uniforms
 	void init()
 	{
 		m_time = 0.0f;
-		mtxIdentity(m_mtx);
+		bx::mtxIdentity(m_mtx);
 
 		u_time    = bgfx::createUniform("u_time",     bgfx::UniformType::Uniform1f);
 		u_mtx     = bgfx::createUniform("u_mtx",      bgfx::UniformType::Uniform4x4fv);
@@ -720,7 +720,7 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 		float at[3] = { 0.0f, 0.0f, 0.0f };
 		float eye[3] = { 0.0f, 0.0f, -3.0f };
 
-		mtxRotateXY(s_uniforms.m_mtx
+		bx::mtxRotateXY(s_uniforms.m_mtx
 			, 0.0f
 			, time
 			);
@@ -728,13 +728,13 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 		float view[16];
 		float proj[16];
 
-		mtxIdentity(view);
-		mtxOrtho(proj, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f, 100.0f);
+		bx::mtxIdentity(view);
+		bx::mtxOrtho(proj, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f, 100.0f);
 		bgfx::setViewTransform(0, view, proj);
 
-		mtxLookAt(view, eye, at);
+		bx::mtxLookAt(view, eye, at);
 		memcpy(s_uniforms.m_camPos, eye, 3*sizeof(float));
-		mtxProj(proj, 60.0f, float(width)/float(height), 0.1f, 100.0f);
+		bx::mtxProj(proj, 60.0f, float(width)/float(height), 0.1f, 100.0f);
 		bgfx::setViewTransform(1, view, proj);
 
 		bgfx::setViewRect(0, 0, 0, width, height);
@@ -749,7 +749,7 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 
 		// View 1.
 		float mtx[16];
-		mtxSRT(mtx
+		bx::mtxSRT(mtx
 				, 1.0f
 				, 1.0f
 				, 1.0f
diff --git a/examples/19-oit/oit.cpp b/examples/19-oit/oit.cpp
index 63b5f628..ed1a03fd 100644
--- a/examples/19-oit/oit.cpp
+++ b/examples/19-oit/oit.cpp
@@ -301,8 +301,8 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 		float proj[16];
 
 		// Set view and projection matrix for view 0.
-		mtxLookAt(view, eye, at);
-		mtxProj(proj, 60.0f, float(width)/float(height), 0.1f, 100.0f);
+		bx::mtxLookAt(view, eye, at);
+		bx::mtxProj(proj, 60.0f, float(width)/float(height), 0.1f, 100.0f);
 
 		bgfx::setViewTransform(0, view, proj);
 
@@ -317,8 +317,8 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 		bgfx::setViewFrameBuffer(0, 0 == mode ? invalid : fbh);
 
 		// Set view and projection matrix for view 1.
-		mtxIdentity(view);
-		mtxOrtho(proj, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f, 100.0f);
+		bx::mtxIdentity(view);
+		bx::mtxOrtho(proj, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f, 100.0f);
 		bgfx::setViewTransform(1, view, proj);
 
 		for (uint32_t depth = 0; depth < 3; ++depth)
@@ -341,7 +341,7 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 
 					BX_UNUSED(time);
 					float mtx[16];
-					mtxRotateXY(mtx, time*0.023f + xx*0.21f, time*0.03f + yy*0.37f);
+					bx::mtxRotateXY(mtx, time*0.023f + xx*0.21f, time*0.03f + yy*0.37f);
 					//mtxIdentity(mtx);
 					mtx[12] = -2.5f + float(xx)*2.5f;
 					mtx[13] = -2.5f + float(yy)*2.5f;
diff --git a/examples/21-deferred/deferred.cpp b/examples/21-deferred/deferred.cpp
index 7fefe489..62cdd6c4 100644
--- a/examples/21-deferred/deferred.cpp
+++ b/examples/21-deferred/deferred.cpp
@@ -453,15 +453,15 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 				bgfx::setViewFrameBuffer(RENDER_PASS_LIGHT_ID, lightBuffer);
 
 				float proj[16];
-				mtxProj(proj, 60.0f, float(width)/float(height), 0.1f, 100.0f);
+				bx::mtxProj(proj, 60.0f, float(width)/float(height), 0.1f, 100.0f);
 
 				bgfx::setViewFrameBuffer(RENDER_PASS_GEOMETRY_ID, gbuffer);
 				bgfx::setViewTransform(RENDER_PASS_GEOMETRY_ID, view, proj);
 
-				mtxMul(vp, view, proj);
-				mtxInverse(invMvp, vp);
+				bx::mtxMul(vp, view, proj);
+				bx::mtxInverse(invMvp, vp);
 
-				mtxOrtho(proj, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f, 100.0f);
+				bx::mtxOrtho(proj, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f, 100.0f);
 				bgfx::setViewTransformMask(0
 					| RENDER_PASS_LIGHT_BIT
 					| RENDER_PASS_COMBINE_BIT
@@ -470,10 +470,10 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 
 				const float aspectRatio = float(height)/float(width);
 				const float size = 10.0f;
-				mtxOrtho(proj, -size, size, size*aspectRatio, -size*aspectRatio, 0.0f, 1000.0f);
+				bx::mtxOrtho(proj, -size, size, size*aspectRatio, -size*aspectRatio, 0.0f, 1000.0f);
 				bgfx::setViewTransform(RENDER_PASS_DEBUG_GBUFFER_ID, NULL, proj); 
 
-				mtxOrtho(proj, 0.0f, (float)width, 0.0f, (float)height, 0.0f, 1000.0f);
+				bx::mtxOrtho(proj, 0.0f, (float)width, 0.0f, (float)height, 0.0f, 1000.0f);
 				bgfx::setViewTransform(RENDER_PASS_DEBUG_LIGHTS_ID, NULL, proj); 
 			}
 
@@ -488,11 +488,11 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 					float mtx[16];
 					if (animateMesh)
 					{
-						mtxRotateXY(mtx, time*1.023f + xx*0.21f, time*0.03f + yy*0.37f);
+						bx::mtxRotateXY(mtx, time*1.023f + xx*0.21f, time*0.03f + yy*0.37f);
 					}
 					else
 					{
-						mtxIdentity(mtx);
+						bx::mtxIdentity(mtx);
 					}
 					mtx[12] = -offset + float(xx)*3.0f;
 					mtx[13] = -offset + float(yy)*3.0f;
@@ -553,7 +553,7 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 				};
 
 				float xyz[3];
-				vec3MulMtxH(xyz, box[0], vp);
+				bx::vec3MulMtxH(xyz, box[0], vp);
 				float minx = xyz[0];
 				float miny = xyz[1];
 				float maxx = xyz[0];
@@ -562,21 +562,21 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 
 				for (uint32_t ii = 1; ii < 8; ++ii)
 				{
-					vec3MulMtxH(xyz, box[ii], vp);
-					minx = fminf(minx, xyz[0]);
-					miny = fminf(miny, xyz[1]);
-					maxx = fmaxf(maxx, xyz[0]);
-					maxy = fmaxf(maxy, xyz[1]);
-					maxz = fmaxf(maxz, xyz[2]);
+					bx::vec3MulMtxH(xyz, box[ii], vp);
+					minx = bx::fmin(minx, xyz[0]);
+					miny = bx::fmin(miny, xyz[1]);
+					maxx = bx::fmax(maxx, xyz[0]);
+					maxy = bx::fmax(maxy, xyz[1]);
+					maxz = bx::fmax(maxz, xyz[2]);
 				}
 
 				// Cull light if it's fully behind camera.
 				if (maxz >= 0.0f)
 				{
-					float x0 = fclamp( (minx * 0.5f + 0.5f) * width,  0.0f, (float)width);
-					float y0 = fclamp( (miny * 0.5f + 0.5f) * height, 0.0f, (float)height);
-					float x1 = fclamp( (maxx * 0.5f + 0.5f) * width,  0.0f, (float)width);
-					float y1 = fclamp( (maxy * 0.5f + 0.5f) * height, 0.0f, (float)height);
+					float x0 = bx::fclamp( (minx * 0.5f + 0.5f) * width,  0.0f, (float)width);
+					float y0 = bx::fclamp( (miny * 0.5f + 0.5f) * height, 0.0f, (float)height);
+					float x1 = bx::fclamp( (maxx * 0.5f + 0.5f) * width,  0.0f, (float)width);
+					float y1 = bx::fclamp( (maxy * 0.5f + 0.5f) * height, 0.0f, (float)height);
 
 					if (showScissorRects)
 					{
@@ -679,7 +679,7 @@ int _main_(int /*_argc*/, char** /*_argv*/)
 				for (uint32_t ii = 0; ii < BX_COUNTOF(gbufferTex); ++ii)
 				{
 					float mtx[16];
-					mtxSRT(mtx
+					bx::mtxSRT(mtx
 						, aspectRatio, 1.0f, 1.0f
 						, 0.0f, 0.0f, 0.0f
 						, -7.9f - BX_COUNTOF(gbufferTex)*0.1f*0.5f + ii*2.1f*aspectRatio, 4.0f, 0.0f
diff --git a/examples/common/bgfx_utils.cpp b/examples/common/bgfx_utils.cpp
index f470c78b..99f993f0 100644
--- a/examples/common/bgfx_utils.cpp
+++ b/examples/common/bgfx_utils.cpp
@@ -8,7 +8,7 @@
 
 #include <bgfx.h>
 #include <bx/readerwriter.h>
-#include "fpumath.h"
+#include <bx/fpumath.h>
 #include "entry/entry.h"
 
 void* load(bx::FileReaderI* _reader, const char* _filePath)
@@ -191,10 +191,10 @@ void calcTangents(void* _vertices, uint16_t _numVertices, bgfx::VertexDecl _decl
 
 		float normal[4];
 		bgfx::vertexUnpack(normal, bgfx::Attrib::Normal, _decl, _vertices, ii);
-		float ndt = vec3Dot(normal, tanu);
+		float ndt = bx::vec3Dot(normal, tanu);
 
 		float nxt[3];
-		vec3Cross(nxt, normal, tanu);
+		bx::vec3Cross(nxt, normal, tanu);
 
 		float tmp[3];
 		tmp[0] = tanu[0] - normal[0] * ndt;
@@ -202,9 +202,9 @@ void calcTangents(void* _vertices, uint16_t _numVertices, bgfx::VertexDecl _decl
 		tmp[2] = tanu[2] - normal[2] * ndt;
 
 		float tangent[4];
-		vec3Norm(tangent, tmp);
+		bx::vec3Norm(tangent, tmp);
 
-		tangent[3] = vec3Dot(nxt, tanv) < 0.0f ? -1.0f : 1.0f;
+		tangent[3] = bx::vec3Dot(nxt, tanv) < 0.0f ? -1.0f : 1.0f;
 		bgfx::vertexPack(tangent, true, bgfx::Attrib::Tangent, _decl, _vertices, ii);
 	}
 
diff --git a/examples/common/bounds.cpp b/examples/common/bounds.cpp
index 947320f1..10e2b180 100644
--- a/examples/common/bounds.cpp
+++ b/examples/common/bounds.cpp
@@ -4,8 +4,8 @@
  */
 
 #include <bx/rng.h>
+#include <bx/fpumath.h>
 #include "bounds.h"
-#include "fpumath.h"
 
 void aabbToObb(Obb& _obb, const Aabb& _aabb)
 {
@@ -37,7 +37,7 @@ void aabbTransformToObb(Obb& _obb, const Aabb& _aabb, const float* _mtx)
 {
 	aabbToObb(_obb, _aabb);
 	float result[16];
-	mtxMul(result, _obb.m_mtx, _mtx);
+	bx::mtxMul(result, _obb.m_mtx, _mtx);
 	memcpy(_obb.m_mtx, result, sizeof(result) );
 }
 
@@ -67,12 +67,12 @@ void calcAabb(Aabb& _aabb, const void* _vertices, uint32_t _numVertices, uint32_
 		float xx = position[0];
 		float yy = position[1];
 		float zz = position[2];
-		min[0] = fminf(xx, min[0]);
-		min[1] = fminf(yy, min[1]);
-		min[2] = fminf(zz, min[2]);
-		max[0] = fmaxf(xx, max[0]);
-		max[1] = fmaxf(yy, max[1]);
-		max[2] = fmaxf(zz, max[2]);
+		min[0] = bx::fmin(xx, min[0]);
+		min[1] = bx::fmin(yy, min[1]);
+		min[2] = bx::fmin(zz, min[2]);
+		max[0] = bx::fmax(xx, max[0]);
+		max[1] = bx::fmax(yy, max[1]);
+		max[2] = bx::fmax(zz, max[2]);
 	}
 
 	_aabb.m_min[0] = min[0];
@@ -89,7 +89,7 @@ void calcAabb(Aabb& _aabb, const float* _mtx, const void* _vertices, uint32_t _n
 	uint8_t* vertex = (uint8_t*)_vertices;
 
 	float position[3];
-	vec3MulMtx(position, (float*)vertex, _mtx);
+	bx::vec3MulMtx(position, (float*)vertex, _mtx);
 	min[0] = max[0] = position[0];
 	min[1] = max[1] = position[1];
 	min[2] = max[2] = position[2];
@@ -97,18 +97,18 @@ void calcAabb(Aabb& _aabb, const float* _mtx, const void* _vertices, uint32_t _n
 
 	for (uint32_t ii = 1; ii < _numVertices; ++ii)
 	{
-		vec3MulMtx(position, (float*)vertex, _mtx);
+		bx::vec3MulMtx(position, (float*)vertex, _mtx);
 		vertex += _stride;
 
 		float xx = position[0];
 		float yy = position[1];
 		float zz = position[2];
-		min[0] = fminf(xx, min[0]);
-		min[1] = fminf(yy, min[1]);
-		min[2] = fminf(zz, min[2]);
-		max[0] = fmaxf(xx, max[0]);
-		max[1] = fmaxf(yy, max[1]);
-		max[2] = fmaxf(zz, max[2]);
+		min[0] = bx::fmin(xx, min[0]);
+		min[1] = bx::fmin(yy, min[1]);
+		min[2] = bx::fmin(zz, min[2]);
+		max[0] = bx::fmax(xx, max[0]);
+		max[1] = bx::fmax(yy, max[1]);
+		max[2] = bx::fmax(zz, max[2]);
 	}
 
 	_aabb.m_min[0] = min[0];
@@ -171,10 +171,10 @@ void calcObb(Obb& _obb, const void* _vertices, uint32_t _numVertices, uint32_t _
 
 			for (uint32_t kk = 0; kk < _steps; ++kk)
 			{
-				mtxRotateXYZ(mtx, ax, ay, az);
+				bx::mtxRotateXYZ(mtx, ax, ay, az);
 
 				float mtxT[16];
-				mtxTranspose(mtxT, mtx);
+				bx::mtxTranspose(mtxT, mtx);
 				calcAabb(aabb, mtxT, _vertices, _numVertices, _stride);
 
 				float area = calcAreaAabb(aabb);
@@ -219,7 +219,7 @@ void calcMaxBoundingSphere(Sphere& _sphere, const void* _vertices, uint32_t _num
 		float zz = position[2] - center[2];
 
 		float distSq = xx*xx + yy*yy + zz*zz;
-		maxDistSq = fmaxf(distSq, maxDistSq);
+		maxDistSq = bx::fmax(distSq, maxDistSq);
 	}
 
 	_sphere.m_center[0] = center[0];
@@ -276,7 +276,7 @@ void calcMinBoundingSphere(Sphere& _sphere, const void* _vertices, uint32_t _num
 				center[0] += xx * radiusStep;
 				center[1] += yy * radiusStep;
 				center[2] += zz * radiusStep;
-				maxDistSq = flerp(maxDistSq, distSq, _step);
+				maxDistSq = bx::flerp(maxDistSq, distSq, _step);
 
 				break;
 			}
diff --git a/examples/common/camera.cpp b/examples/common/camera.cpp
index 0371c552..7c3a968a 100644
--- a/examples/common/camera.cpp
+++ b/examples/common/camera.cpp
@@ -4,7 +4,7 @@
  */
 
 #include <bx/timer.h>
-#include "fpumath.h"
+#include <bx/fpumath.h>
 #include "camera.h"
 #include "entry/cmd.h"
 #include "entry/input.h"
@@ -152,8 +152,8 @@ struct Camera
 			float tmpRhs[3];
 			float tmpPos[3];
 			memcpy(tmpPos, m_eye, sizeof(float)*3);
-			vec3Mul(tmpRhs, direction, _deltaTime * m_moveSpeed);
-			vec3Add(m_eye, tmpPos, tmpRhs);
+			bx::vec3Mul(tmpRhs, direction, _deltaTime * m_moveSpeed);
+			bx::vec3Add(m_eye, tmpPos, tmpRhs);
 			setKeyState(CAMERA_KEY_UP, false);
 		}
 
@@ -163,8 +163,8 @@ struct Camera
 			float tmpRhs[3];
 			float tmpPos[3];
 			memcpy(tmpPos, m_eye, sizeof(float)*3);
-			vec3Mul(tmpRhs, direction, _deltaTime * m_moveSpeed);
-			vec3Sub(m_eye, tmpPos, tmpRhs);
+			bx::vec3Mul(tmpRhs, direction, _deltaTime * m_moveSpeed);
+			bx::vec3Sub(m_eye, tmpPos, tmpRhs);
 			setKeyState(CAMERA_KEY_DOWN, false);
 		}
 
@@ -174,8 +174,8 @@ struct Camera
 			float tmpRhs[3];
 			float tmpPos[3];
 			memcpy(tmpPos, m_eye, sizeof(float)*3);
-			vec3Mul(tmpRhs, right, _deltaTime * m_moveSpeed);
-			vec3Add(m_eye, tmpPos, tmpRhs);
+			bx::vec3Mul(tmpRhs, right, _deltaTime * m_moveSpeed);
+			bx::vec3Add(m_eye, tmpPos, tmpRhs);
 			setKeyState(CAMERA_KEY_LEFT, false);
 		}
 
@@ -185,18 +185,18 @@ struct Camera
 			float tmpRhs[3];
 			float tmpPos[3];
 			memcpy(tmpPos, m_eye, sizeof(float)*3);
-			vec3Mul(tmpRhs, right, _deltaTime * m_moveSpeed);
-			vec3Sub(m_eye, tmpPos, tmpRhs);
+			bx::vec3Mul(tmpRhs, right, _deltaTime * m_moveSpeed);
+			bx::vec3Sub(m_eye, tmpPos, tmpRhs);
 			setKeyState(CAMERA_KEY_RIGHT, false);
 		}
 
-		vec3Add(m_at, m_eye, direction);
-		vec3Cross(m_up, right, direction);
+		bx::vec3Add(m_at, m_eye, direction);
+		bx::vec3Cross(m_up, right, direction);
 	}
 
 	void getViewMtx(float* _viewMtx)
 	{
-		mtxLookAt(_viewMtx, m_eye, m_at, m_up);
+		bx::mtxLookAt(_viewMtx, m_eye, m_at, m_up);
 	}
 
 	void setPosition(float* _pos)
diff --git a/examples/common/common.h b/examples/common/common.h
index f70ae0a6..08cdc61c 100644
--- a/examples/common/common.h
+++ b/examples/common/common.h
@@ -4,6 +4,6 @@
  */
 
 #include <bx/timer.h>
+#include <bx/fpumath.h>
 
 #include "entry/entry.h"
-#include "fpumath.h"
diff --git a/examples/common/fpumath.h b/examples/common/fpumath.h
deleted file mode 100644
index 5f50ab91..00000000
--- a/examples/common/fpumath.h
+++ /dev/null
@@ -1,571 +0,0 @@
-/*
- * Copyright 2011-2014 Branimir Karadzic. All rights reserved.
- * License: http://www.opensource.org/licenses/BSD-2-Clause
- */
-
-// FPU math lib
-
-#ifndef FPU_MATH_H_HEADER_GUARD
-#define FPU_MATH_H_HEADER_GUARD
-
-#define _USE_MATH_DEFINES
-#include <math.h>
-#include <string.h>
-
-#if BX_COMPILER_MSVC
-inline float fminf(float _a, float _b)
-{
-	return _a < _b ? _a : _b;
-}
-
-inline float fmaxf(float _a, float _b)
-{
-	return _a > _b ? _a : _b;
-}
-#endif // BX_COMPILER_MSVC
-
-inline float toRad(float _deg)
-{
-	return _deg * float(M_PI / 180.0);
-}
-
-inline float toDeg(float _rad)
-{
-	return _rad * float(180.0 / M_PI);
-}
-
-inline float fclamp(float _a, float _min, float _max)
-{
-	return fminf(fmaxf(_a, _min), _max);
-}
-
-inline float fsaturate(float _a)
-{
-	return fclamp(_a, 0.0f, 1.0f);
-}
-
-inline float flerp(float _a, float _b, float _t)
-{
-	return _a + (_b - _a) * _t;
-}
-
-inline float fsign(float _a)
-{
-	return _a < 0.0f ? -1.0f : 1.0f;
-}
-
-inline void vec3Move(float* __restrict _result, const float* __restrict _a)
-{
-	_result[0] = _a[0];
-	_result[1] = _a[1];
-	_result[2] = _a[2];
-}
-
-inline void vec3Abs(float* __restrict _result, const float* __restrict _a)
-{
-	_result[0] = fabsf(_a[0]);
-	_result[1] = fabsf(_a[1]);
-	_result[2] = fabsf(_a[2]);
-}
-
-inline void vec3Neg(float* __restrict _result, const float* __restrict _a)
-{
-	_result[0] = -_a[0];
-	_result[1] = -_a[1];
-	_result[2] = -_a[2];
-}
-
-inline void vec3Add(float* __restrict _result, const float* __restrict _a, const float* __restrict _b)
-{
-	_result[0] = _a[0] + _b[0];
-	_result[1] = _a[1] + _b[1];
-	_result[2] = _a[2] + _b[2];
-}
-
-inline void vec3Sub(float* __restrict _result, const float* __restrict _a, const float* __restrict _b)
-{
-	_result[0] = _a[0] - _b[0];
-	_result[1] = _a[1] - _b[1];
-	_result[2] = _a[2] - _b[2];
-}
-
-inline void vec3Mul(float* __restrict _result, const float* __restrict _a, const float* __restrict _b)
-{
-	_result[0] = _a[0] * _b[0];
-	_result[1] = _a[1] * _b[1];
-	_result[2] = _a[2] * _b[2];
-}
-
-inline void vec3Mul(float* __restrict _result, const float* __restrict _a, float _b)
-{
-	_result[0] = _a[0] * _b;
-	_result[1] = _a[1] * _b;
-	_result[2] = _a[2] * _b;
-}
-
-inline float vec3Dot(const float* __restrict _a, const float* __restrict _b)
-{
-	return _a[0]*_b[0] + _a[1]*_b[1] + _a[2]*_b[2];
-}
-
-inline void vec3Cross(float* __restrict _result, const float* __restrict _a, const float* __restrict _b)
-{
-	_result[0] = _a[1]*_b[2] - _a[2]*_b[1];
-	_result[1] = _a[2]*_b[0] - _a[0]*_b[2];
-	_result[2] = _a[0]*_b[1] - _a[1]*_b[0];
-}
-
-inline float vec3Length(const float* _a)
-{
-	return sqrtf(vec3Dot(_a, _a) );
-}
-
-inline float vec3Norm(float* __restrict _result, const float* __restrict _a)
-{
-	const float len = vec3Length(_a);
-	const float invLen = 1.0f/len;
-	_result[0] = _a[0] * invLen;
-	_result[1] = _a[1] * invLen;
-	_result[2] = _a[2] * invLen;
-	return len;
-}
-
-inline void mtxIdentity(float* _result)
-{
-	memset(_result, 0, sizeof(float)*16);
-	_result[0] = _result[5] = _result[10] = _result[15] = 1.0f;
-}
-
-inline void mtxTranslate(float* _result, float _tx, float _ty, float _tz)
-{
-	mtxIdentity(_result);
-	_result[12] = _tx;
-	_result[13] = _ty;
-	_result[14] = _tz;
-}
-
-inline void mtxScale(float* _result, float _sx, float _sy, float _sz)
-{
-	memset(_result, 0, sizeof(float) * 16);
-	_result[0]  = _sx;
-	_result[5]  = _sy;
-	_result[10] = _sz;
-	_result[15] = 1.0f;
-}
-
-inline void mtxLookAt(float* __restrict _result, const float* __restrict _eye, const float* __restrict _at, const float* __restrict _up = NULL)
-{
-	float tmp[4];
-	vec3Sub(tmp, _at, _eye);
-
-	float view[4];
-	vec3Norm(view, tmp);
-
-	float up[3] = { 0.0f, 1.0f, 0.0f };
-	if (NULL != _up)
-	{
-		up[0] = _up[0];
-		up[1] = _up[1];
-		up[2] = _up[2];
-	}
-	vec3Cross(tmp, up, view);
-
-	float right[4];
-	vec3Norm(right, tmp);
-
-	vec3Cross(up, view, right);
-
-	memset(_result, 0, sizeof(float)*16);
-	_result[ 0] = right[0];
-	_result[ 1] = up[0];
-	_result[ 2] = view[0];
-
-	_result[ 4] = right[1];
-	_result[ 5] = up[1];
-	_result[ 6] = view[1];
-
-	_result[ 8] = right[2];
-	_result[ 9] = up[2];
-	_result[10] = view[2];
-
-	_result[12] = -vec3Dot(right, _eye);
-	_result[13] = -vec3Dot(up, _eye);
-	_result[14] = -vec3Dot(view, _eye);
-	_result[15] = 1.0f;
-}
-
-inline void mtxProj(float* _result, float _fovy, float _aspect, float _near, float _far)
-{
-	const float height = 1.0f/tanf(_fovy*( (float)M_PI/180.0f)*0.5f);
-	const float width = height * 1.0f/_aspect;
-	const float aa = _far/(_far-_near);
-	const float bb = -_near * aa;
-
-	memset(_result, 0, sizeof(float)*16);
-	_result[0] = width;
-	_result[5] = height;
-	_result[10] = aa;
-	_result[11] = 1.0f;
-	_result[14] = bb;
-}
-
-inline void mtxOrtho(float* _result, float _left, float _right, float _bottom, float _top, float _near, float _far)
-{
-	const float aa = 2.0f/(_right - _left);
-	const float bb = 2.0f/(_top - _bottom);
-	const float cc = 1.0f/(_far - _near);
-	const float dd = (_left + _right)/(_left - _right);
-	const float ee = (_top + _bottom)/(_bottom - _top);
-	const float ff = _near / (_near - _far);
-
-	memset(_result, 0, sizeof(float)*16);
-	_result[0] = aa;
-	_result[5] = bb;
-	_result[10] = cc;
-	_result[12] = dd;
-	_result[13] = ee;
-	_result[14] = ff;
-	_result[15] = 1.0f;
-}
-
-inline void mtxRotateX(float* _result, float _ax)
-{
-	const float sx = sinf(_ax);
-	const float cx = cosf(_ax);
-
-	memset(_result, 0, sizeof(float)*16);
-	_result[ 0] = 1.0f;
-	_result[ 5] = cx;
-	_result[ 6] = -sx;
-	_result[ 9] = sx;
-	_result[10] = cx;
-	_result[15] = 1.0f;
-}
-
-inline void mtxRotateY(float* _result, float _ay)
-{
-	const float sy = sinf(_ay);
-	const float cy = cosf(_ay);
-
-	memset(_result, 0, sizeof(float)*16);
-	_result[ 0] = cy;
-	_result[ 2] = sy;
-	_result[ 5] = 1.0f;
-	_result[ 8] = -sy;
-	_result[10] = cy;
-	_result[15] = 1.0f;
-}
-
-inline void mtxRotateZ(float* _result, float _az)
-{
-	const float sz = sinf(_az);
-	const float cz = cosf(_az);
-
-	memset(_result, 0, sizeof(float)*16);
-	_result[ 0] = cz;
-	_result[ 1] = -sz;
-	_result[ 4] = sz;
-	_result[ 5] = cz;
-	_result[10] = 1.0f;
-	_result[15] = 1.0f;
-}
-
-inline void mtxRotateXY(float* _result, float _ax, float _ay)
-{
-	const float sx = sinf(_ax);
-	const float cx = cosf(_ax);
-	const float sy = sinf(_ay);
-	const float cy = cosf(_ay);
-
-	memset(_result, 0, sizeof(float)*16);
-	_result[ 0] = cy;
-	_result[ 2] = sy;
-	_result[ 4] = sx*sy;
-	_result[ 5] = cx;
-	_result[ 6] = -sx*cy;
-	_result[ 8] = -cx*sy;
-	_result[ 9] = sx;
-	_result[10] = cx*cy;
-	_result[15] = 1.0f;
-}
-
-inline void mtxRotateXYZ(float* _result, float _ax, float _ay, float _az)
-{
-	const float sx = sinf(_ax);
-	const float cx = cosf(_ax);
-	const float sy = sinf(_ay);
-	const float cy = cosf(_ay);
-	const float sz = sinf(_az);
-	const float cz = cosf(_az);
-
-	memset(_result, 0, sizeof(float)*16);
-	_result[ 0] = cy*cz;
-	_result[ 1] = -cy*sz;
-	_result[ 2] = sy;
-	_result[ 4] = cz*sx*sy + cx*sz;
-	_result[ 5] = cx*cz - sx*sy*sz;
-	_result[ 6] = -cy*sx;
-	_result[ 8] = -cx*cz*sy + sx*sz;
-	_result[ 9] = cz*sx + cx*sy*sz;
-	_result[10] = cx*cy;
-	_result[15] = 1.0f;
-}
-
-inline void mtxRotateZYX(float* _result, float _ax, float _ay, float _az)
-{
-	const float sx = sinf(_ax);
-	const float cx = cosf(_ax);
-	const float sy = sinf(_ay);
-	const float cy = cosf(_ay);
-	const float sz = sinf(_az);
-	const float cz = cosf(_az);
-
-	memset(_result, 0, sizeof(float)*16);
-	_result[ 0] = cy*cz;
-	_result[ 1] = cz*sx*sy-cx*sz;
-	_result[ 2] = cx*cz*sy+sx*sz;
-	_result[ 4] = cy*sz;
-	_result[ 5] = cx*cz + sx*sy*sz;
-	_result[ 6] = -cz*sx + cx*sy*sz;
-	_result[ 8] = -sy;
-	_result[ 9] = cy*sx;
-	_result[10] = cx*cy;
-	_result[15] = 1.0f;
-};
-
-inline void mtxSRT(float* _result, float _sx, float _sy, float _sz, float _ax, float _ay, float _az, float _tx, float _ty, float _tz)
-{
-	const float sx = sinf(_ax);
-	const float cx = cosf(_ax);
-	const float sy = sinf(_ay);
-	const float cy = cosf(_ay);
-	const float sz = sinf(_az);
-	const float cz = cosf(_az);
-
-	const float sxsz = sx*sz;
-	const float cycz = cy*cz;
-
-	_result[ 0] = _sx * (cycz - sxsz*sy);
-	_result[ 1] = _sx * -cx*sz;
-	_result[ 2] = _sx * (cz*sy + cy*sxsz);
-	_result[ 3] = 0.0f;
-
-	_result[ 4] = _sy * (cz*sx*sy + cy*sz);
-	_result[ 5] = _sy * cx*cz;
-	_result[ 6] = _sy * (sy*sz -cycz*sx);
-	_result[ 7] = 0.0f;
-
-	_result[ 8] = _sz * -cx*sy;
-	_result[ 9] = _sz * sx;
-	_result[10] = _sz * cx*cy;
-	_result[11] = 0.0f;
-
-	_result[12] = _tx;
-	_result[13] = _ty;
-	_result[14] = _tz;
-	_result[15] = 1.0f;
-}
-
-inline void vec3MulMtx(float* __restrict _result, const float* __restrict _vec, const float* __restrict _mat)
-{
-	_result[0] = _vec[0] * _mat[ 0] + _vec[1] * _mat[4] + _vec[2] * _mat[ 8] + _mat[12];
-	_result[1] = _vec[0] * _mat[ 1] + _vec[1] * _mat[5] + _vec[2] * _mat[ 9] + _mat[13];
-	_result[2] = _vec[0] * _mat[ 2] + _vec[1] * _mat[6] + _vec[2] * _mat[10] + _mat[14];
-}
-
-inline void vec3MulMtxH(float* __restrict _result, const float* __restrict _vec, const float* __restrict _mat)
-{
-	float xx = _vec[0] * _mat[ 0] + _vec[1] * _mat[4] + _vec[2] * _mat[ 8] + _mat[12];
-	float yy = _vec[0] * _mat[ 1] + _vec[1] * _mat[5] + _vec[2] * _mat[ 9] + _mat[13];
-	float zz = _vec[0] * _mat[ 2] + _vec[1] * _mat[6] + _vec[2] * _mat[10] + _mat[14];
-	float ww = _vec[0] * _mat[ 3] + _vec[1] * _mat[7] + _vec[2] * _mat[11] + _mat[15];
-	float invW = fsign(ww)/ww;
-	_result[0] = xx*invW;
-	_result[1] = yy*invW;
-	_result[2] = zz*invW;
-}
-
-inline void vec4MulMtx(float* __restrict _result, const float* __restrict _vec, const float* __restrict _mat)
-{
-	_result[0] = _vec[0] * _mat[ 0] + _vec[1] * _mat[4] + _vec[2] * _mat[ 8] + _vec[3] * _mat[12];
-	_result[1] = _vec[0] * _mat[ 1] + _vec[1] * _mat[5] + _vec[2] * _mat[ 9] + _vec[3] * _mat[13];
-	_result[2] = _vec[0] * _mat[ 2] + _vec[1] * _mat[6] + _vec[2] * _mat[10] + _vec[3] * _mat[14];
-	_result[3] = _vec[0] * _mat[ 3] + _vec[1] * _mat[7] + _vec[2] * _mat[11] + _vec[3] * _mat[15];
-}
-
-inline void mtxMul(float* __restrict _result, const float* __restrict _a, const float* __restrict _b)
-{
-	vec4MulMtx(&_result[ 0], &_a[ 0], _b);
-	vec4MulMtx(&_result[ 4], &_a[ 4], _b);
-	vec4MulMtx(&_result[ 8], &_a[ 8], _b);
-	vec4MulMtx(&_result[12], &_a[12], _b);
-}
-
-inline void mtxTranspose(float* __restrict _result, const float* __restrict _a)
-{
-	_result[ 0] = _a[ 0];
-	_result[ 4] = _a[ 1];
-	_result[ 8] = _a[ 2];
-	_result[12] = _a[ 3];
-	_result[ 1] = _a[ 4];
-	_result[ 5] = _a[ 5];
-	_result[ 9] = _a[ 6];
-	_result[13] = _a[ 7];
-	_result[ 2] = _a[ 8];
-	_result[ 6] = _a[ 9];
-	_result[10] = _a[10];
-	_result[14] = _a[11];
-	_result[ 3] = _a[12];
-	_result[ 7] = _a[13];
-	_result[11] = _a[14];
-	_result[15] = _a[15];
-}
-
-inline void mtx3Inverse(float* __restrict _result, const float* __restrict _a)
-{
-	float xx = _a[0];
-	float xy = _a[1];
-	float xz = _a[2];
-	float yx = _a[3];
-	float yy = _a[4];
-	float yz = _a[5];
-	float zx = _a[6];
-	float zy = _a[7];
-	float zz = _a[8];
-
-	float det = 0.0f;
-	det += xx * (yy*zz - yz*zy);
-	det -= xy * (yx*zz - yz*zx);
-	det += xz * (yx*zy - yy*zx);
-
-	float invDet = 1.0f/det;
-
-	_result[0] = +(yy*zz - yz*zy) * invDet;
-	_result[1] = -(xy*zz - xz*zy) * invDet;
-	_result[2] = +(xy*yz - xz*yy) * invDet;
-
-	_result[3] = -(yx*zz - yz*zx) * invDet;
-	_result[4] = +(xx*zz - xz*zx) * invDet;
-	_result[5] = -(xx*yz - xz*yx) * invDet;
-
-	_result[6] = +(yx*zy - yy*zx) * invDet;
-	_result[7] = -(xx*zy - xy*zx) * invDet;
-	_result[8] = +(xx*yy - xy*yx) * invDet;
-}
-
-inline void mtxInverse(float* __restrict _result, const float* __restrict _a)
-{
-	float xx = _a[ 0];
-	float xy = _a[ 1];
-	float xz = _a[ 2];
-	float xw = _a[ 3];
-	float yx = _a[ 4];
-	float yy = _a[ 5];
-	float yz = _a[ 6];
-	float yw = _a[ 7];
-	float zx = _a[ 8];
-	float zy = _a[ 9];
-	float zz = _a[10];
-	float zw = _a[11];
-	float wx = _a[12];
-	float wy = _a[13];
-	float wz = _a[14];
-	float ww = _a[15];
-
-	float det = 0.0f;
-	det += xx * (yy*(zz*ww - zw*wz) - yz*(zy*ww - zw*wy) + yw*(zy*wz - zz*wy) );
-	det -= xy * (yx*(zz*ww - zw*wz) - yz*(zx*ww - zw*wx) + yw*(zx*wz - zz*wx) );
-	det += xz * (yx*(zy*ww - zw*wy) - yy*(zx*ww - zw*wx) + yw*(zx*wy - zy*wx) );
-	det -= xw * (yx*(zy*wz - zz*wy) - yy*(zx*wz - zz*wx) + yz*(zx*wy - zy*wx) );
-
-	float invDet = 1.0f/det;
-
-	_result[ 0] = +(yy*(zz*ww - wz*zw) - yz*(zy*ww - wy*zw) + yw*(zy*wz - wy*zz) ) * invDet;
-	_result[ 1] = -(xy*(zz*ww - wz*zw) - xz*(zy*ww - wy*zw) + xw*(zy*wz - wy*zz) ) * invDet;
-	_result[ 2] = +(xy*(yz*ww - wz*yw) - xz*(yy*ww - wy*yw) + xw*(yy*wz - wy*yz) ) * invDet;
-	_result[ 3] = -(xy*(yz*zw - zz*yw) - xz*(yy*zw - zy*yw) + xw*(yy*zz - zy*yz) ) * invDet;
-				  
-	_result[ 4] = -(yx*(zz*ww - wz*zw) - yz*(zx*ww - wx*zw) + yw*(zx*wz - wx*zz) ) * invDet;
-	_result[ 5] = +(xx*(zz*ww - wz*zw) - xz*(zx*ww - wx*zw) + xw*(zx*wz - wx*zz) ) * invDet;
-	_result[ 6] = -(xx*(yz*ww - wz*yw) - xz*(yx*ww - wx*yw) + xw*(yx*wz - wx*yz) ) * invDet;
-	_result[ 7] = +(xx*(yz*zw - zz*yw) - xz*(yx*zw - zx*yw) + xw*(yx*zz - zx*yz) ) * invDet;
-				  
-	_result[ 8] = +(yx*(zy*ww - wy*zw) - yy*(zx*ww - wx*zw) + yw*(zx*wy - wx*zy) ) * invDet;
-	_result[ 9] = -(xx*(zy*ww - wy*zw) - xy*(zx*ww - wx*zw) + xw*(zx*wy - wx*zy) ) * invDet;
-	_result[10] = +(xx*(yy*ww - wy*yw) - xy*(yx*ww - wx*yw) + xw*(yx*wy - wx*yy) ) * invDet;
-	_result[11] = -(xx*(yy*zw - zy*yw) - xy*(yx*zw - zx*yw) + xw*(yx*zy - zx*yy) ) * invDet;
-				  
-	_result[12] = -(yx*(zy*wz - wy*zz) - yy*(zx*wz - wx*zz) + yz*(zx*wy - wx*zy) ) * invDet;
-	_result[13] = +(xx*(zy*wz - wy*zz) - xy*(zx*wz - wx*zz) + xz*(zx*wy - wx*zy) ) * invDet;
-	_result[14] = -(xx*(yy*wz - wy*yz) - xy*(yx*wz - wx*yz) + xz*(yx*wy - wx*yy) ) * invDet;
-	_result[15] = +(xx*(yy*zz - zy*yz) - xy*(yx*zz - zx*yz) + xz*(yx*zy - zx*yy) ) * invDet;
-}
-
-/// Convert LH to RH projection matrix and vice versa.
-inline void mtxProjFlipHandedness(float* __restrict _dst, const float* __restrict _src)
-{
-	_dst[ 0] = -_src[ 0];
-	_dst[ 1] = -_src[ 1];
-	_dst[ 2] = -_src[ 2];
-	_dst[ 3] = -_src[ 3];
-	_dst[ 4] =  _src[ 4];
-	_dst[ 5] =  _src[ 5];
-	_dst[ 6] =  _src[ 6];
-	_dst[ 7] =  _src[ 7];
-	_dst[ 8] = -_src[ 8];
-	_dst[ 9] = -_src[ 9];
-	_dst[10] = -_src[10];
-	_dst[11] = -_src[11];
-	_dst[12] =  _src[12];
-	_dst[13] =  _src[13];
-	_dst[14] =  _src[14];
-	_dst[15] =  _src[15];
-}
-
-/// Convert LH to RH view matrix and vice versa.
-inline void mtxViewFlipHandedness(float* __restrict _dst, const float* __restrict _src)
-{
-	_dst[ 0] = -_src[ 0];
-	_dst[ 1] =  _src[ 1];
-	_dst[ 2] = -_src[ 2];
-	_dst[ 3] =  _src[ 3];
-	_dst[ 4] = -_src[ 4];
-	_dst[ 5] =  _src[ 5];
-	_dst[ 6] = -_src[ 6];
-	_dst[ 7] =  _src[ 7];
-	_dst[ 8] = -_src[ 8];
-	_dst[ 9] =  _src[ 9];
-	_dst[10] = -_src[10];
-	_dst[11] =  _src[11];
-	_dst[12] = -_src[12];
-	_dst[13] =  _src[13];
-	_dst[14] = -_src[14];
-	_dst[15] =  _src[15];
-}
-
-inline void calcNormal(float _result[3], float _va[3], float _vb[3], float _vc[3])
-{
-	float ba[3];
-	vec3Sub(ba, _vb, _va);
-
-	float ca[3];
-	vec3Sub(ca, _vc, _va);
-
-	float baxca[3];
-	vec3Cross(baxca, ba, ca);
-
-	vec3Norm(_result, baxca);
-}
-
-inline void calcPlane(float _result[4], float _va[3], float _vb[3], float _vc[3])
-{
-	float normal[3];
-	calcNormal(normal, _va, _vb, _vc);
-
-	_result[0] = normal[0];
-	_result[1] = normal[1];
-	_result[2] = normal[2];
-	_result[3] = -vec3Dot(normal, _va);
-}
-
-#endif // FPU_MATH_H_HEADER_GUARD
diff --git a/examples/common/imgui/imgui.cpp b/examples/common/imgui/imgui.cpp
index 9767119a..d88309fb 100644
--- a/examples/common/imgui/imgui.cpp
+++ b/examples/common/imgui/imgui.cpp
@@ -26,11 +26,11 @@
 #include <stdio.h>
 #include <bx/string.h>
 #include <bx/uint32_t.h>
+#include <bx/fpumath.h>
 #include <bgfx.h>
 
 #include "../entry/dbg.h"
 #include "imgui.h"
-#include "../fpumath.h"
 
 #include "vs_imgui_color.bin.h"
 #include "fs_imgui_color.bin.h"
@@ -350,7 +350,7 @@ struct Imgui
 		bgfx::setViewRect(_view, 0, 0, _width, _height);
 
 		float proj[16];
-		mtxOrtho(proj, 0.0f, (float)_width, (float)_height, 0.0f, 0.0f, 1000.0f);
+		bx::mtxOrtho(proj, 0.0f, (float)_width, (float)_height, 0.0f, 0.0f, 1000.0f);
 		bgfx::setViewTransform(_view, NULL, proj);
 
 		updateInput(_mx, _my, _button, _scroll);
diff --git a/tools/geometryc/geometryc.cpp b/tools/geometryc/geometryc.cpp
index 2c348ed3..fff385b7 100644
--- a/tools/geometryc/geometryc.cpp
+++ b/tools/geometryc/geometryc.cpp
@@ -57,10 +57,10 @@
 #include <bx/readerwriter.h>
 #include <bx/hash.h>
 #include <bx/uint32_t.h>
+#include <bx/fpumath.h>
 
 #include "tokenizecmd.h"
 #include "bounds.h"
-#include "fpumath.h"
 
 struct Vector3
 {
@@ -214,10 +214,10 @@ void calcTangents(void* _vertices, uint16_t _numVertices, bgfx::VertexDecl _decl
 
 		float normal[4];
 		bgfx::vertexUnpack(normal, bgfx::Attrib::Normal, _decl, _vertices, ii);
-		float ndt = vec3Dot(normal, tanu);
+		float ndt = bx::vec3Dot(normal, tanu);
 
 		float nxt[3];
-		vec3Cross(nxt, normal, tanu);
+		bx::vec3Cross(nxt, normal, tanu);
 
 		float tmp[3];
 		tmp[0] = tanu[0] - normal[0] * ndt;
@@ -225,9 +225,9 @@ void calcTangents(void* _vertices, uint16_t _numVertices, bgfx::VertexDecl _decl
 		tmp[2] = tanu[2] - normal[2] * ndt;
 
 		float tangent[4];
-		vec3Norm(tangent, tmp);
+		bx::vec3Norm(tangent, tmp);
 
-		tangent[3] = vec3Dot(nxt, tanv) < 0.0f ? -1.0f : 1.0f;
+		tangent[3] = bx::vec3Dot(nxt, tanv) < 0.0f ? -1.0f : 1.0f;
 		bgfx::vertexPack(tangent, true, bgfx::Attrib::Tangent, _decl, _vertices, ii);
 	}
 
@@ -835,7 +835,7 @@ int main(int _argc, const char* _argv[])
 					if (hasNormal)
 					{
 						float normal[4];
-						vec3Norm(normal, (float*)&normals[index.m_normal]);
+						bx::vec3Norm(normal, (float*)&normals[index.m_normal]);
 						bgfx::vertexPack(normal, true, bgfx::Attrib::Normal, decl, vertices);
 					}