From 96d9d7437bdfb54c654988150c82bdee5f74aba1 Mon Sep 17 00:00:00 2001 From: bkaradzic Date: Sat, 18 Jan 2014 23:33:00 -0800 Subject: [PATCH] Swiched from FPU to SIMD math. --- src/bgfx.cpp | 16 -------------- src/bgfx_p.h | 16 +++++++++----- src/image.cpp | 1 - src/renderer_d3d11.cpp | 50 ++++++++++++++++++------------------------ src/renderer_d3d9.cpp | 50 ++++++++++++++++++------------------------ src/renderer_gl.cpp | 50 ++++++++++++++++++------------------------ 6 files changed, 73 insertions(+), 110 deletions(-) diff --git a/src/bgfx.cpp b/src/bgfx.cpp index 38d72c51..92137e7a 100644 --- a/src/bgfx.cpp +++ b/src/bgfx.cpp @@ -235,22 +235,6 @@ namespace bgfx g_callback->fatal(_code, temp); } - inline void vec4MulMtx(float* __restrict _result, const float* __restrict _vec, const float* __restrict _mat) - { - _result[0] = _vec[0] * _mat[ 0] + _vec[1] * _mat[4] + _vec[2] * _mat[ 8] + _vec[3] * _mat[12]; - _result[1] = _vec[0] * _mat[ 1] + _vec[1] * _mat[5] + _vec[2] * _mat[ 9] + _vec[3] * _mat[13]; - _result[2] = _vec[0] * _mat[ 2] + _vec[1] * _mat[6] + _vec[2] * _mat[10] + _vec[3] * _mat[14]; - _result[3] = _vec[0] * _mat[ 3] + _vec[1] * _mat[7] + _vec[2] * _mat[11] + _vec[3] * _mat[15]; - } - - void mtxMul(float* __restrict _result, const float* __restrict _a, const float* __restrict _b) - { - vec4MulMtx(&_result[ 0], &_a[ 0], _b); - vec4MulMtx(&_result[ 4], &_a[ 4], _b); - vec4MulMtx(&_result[ 8], &_a[ 8], _b); - vec4MulMtx(&_result[12], &_a[12], _b); - } - void mtxOrtho(float* _result, float _left, float _right, float _bottom, float _top, float _near, float _far) { const float aa = 2.0f/(_right - _left); diff --git a/src/bgfx_p.h b/src/bgfx_p.h index c418fb9b..0adb70c4 100644 --- a/src/bgfx_p.h +++ b/src/bgfx_p.h @@ -61,6 +61,7 @@ namespace bgfx #include #include +#include #include #include #include @@ -639,16 +640,19 @@ namespace bgfx BX_ALIGN_STRUCT_16(struct) Matrix4 { - float val[16]; + union + { + bx::float4x4_t f4x4; + float val[16]; + } un; void setIdentity() { - memset(val, 0, sizeof(val) ); - val[0] = val[5] = val[10] = val[15] = 1.0f; + memset(un.val, 0, sizeof(un.val) ); + un.val[0] = un.val[5] = un.val[10] = un.val[15] = 1.0f; } }; - void mtxMul(float* __restrict _result, const float* __restrict _a, const float* __restrict _b); void mtxOrtho(float* _result, float _left, float _right, float _bottom, float _top, float _near, float _far); struct MatrixCache @@ -2416,7 +2420,7 @@ namespace bgfx if (NULL != _view) { - memcpy(m_view[_id].val, _view, sizeof(Matrix4) ); + memcpy(m_view[_id].un.val, _view, sizeof(Matrix4) ); } else { @@ -2425,7 +2429,7 @@ namespace bgfx if (NULL != _proj) { - memcpy(m_proj[_id].val, _proj, sizeof(Matrix4) ); + memcpy(m_proj[_id].un.val, _proj, sizeof(Matrix4) ); } else { diff --git a/src/image.cpp b/src/image.cpp index 305a9ddc..478e7f46 100644 --- a/src/image.cpp +++ b/src/image.cpp @@ -4,7 +4,6 @@ */ #include "bgfx_p.h" -#include #include // powf, sqrtf #include "image.h" diff --git a/src/renderer_d3d11.cpp b/src/renderer_d3d11.cpp index 7eab917c..147e8bf2 100644 --- a/src/renderer_d3d11.cpp +++ b/src/renderer_d3d11.cpp @@ -176,6 +176,14 @@ namespace bgfx }, }; + static const Matrix4 s_bias = + { + 0.5f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.5f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.5f, 0.0f, + 0.5f, 0.5f, 0.5f, 1.0f, + }; + struct TextureFormatInfo { DXGI_FORMAT m_fmt; @@ -2206,7 +2214,7 @@ namespace bgfx Matrix4 viewProj[BGFX_CONFIG_MAX_VIEWS]; for (uint32_t ii = 0; ii < BGFX_CONFIG_MAX_VIEWS; ++ii) { - mtxMul(viewProj[ii].val, m_render->m_view[ii].val, m_render->m_proj[ii].val); + bx::float4x4_mul(&viewProj[ii].un.f4x4, &m_render->m_view[ii].un.f4x4, &m_render->m_proj[ii].un.f4x4); } bool wireframe = !!(m_render->m_debug&BGFX_DEBUG_WIREFRAME); @@ -2445,20 +2453,20 @@ namespace bgfx case PredefinedUniform::View: { - s_renderCtx->setShaderConstant(flags, predefined.m_loc, m_render->m_view[view].val, bx::uint32_min(4, predefined.m_count) ); + s_renderCtx->setShaderConstant(flags, predefined.m_loc, m_render->m_view[view].un.val, bx::uint32_min(4, predefined.m_count) ); } break; case PredefinedUniform::ViewProj: { - s_renderCtx->setShaderConstant(flags, predefined.m_loc, viewProj[view].val, bx::uint32_min(4, predefined.m_count) ); + s_renderCtx->setShaderConstant(flags, predefined.m_loc, viewProj[view].un.val, bx::uint32_min(4, predefined.m_count) ); } break; case PredefinedUniform::Model: { const Matrix4& model = m_render->m_matrixCache.m_cache[state.m_matrix]; - s_renderCtx->setShaderConstant(flags, predefined.m_loc, model.val, bx::uint32_min(state.m_num*4, predefined.m_count) ); + s_renderCtx->setShaderConstant(flags, predefined.m_loc, model.un.val, bx::uint32_min(state.m_num*4, predefined.m_count) ); } break; @@ -2466,8 +2474,8 @@ namespace bgfx { Matrix4 modelView; const Matrix4& model = m_render->m_matrixCache.m_cache[state.m_matrix]; - mtxMul(modelView.val, model.val, m_render->m_view[view].val); - s_renderCtx->setShaderConstant(flags, predefined.m_loc, modelView.val, bx::uint32_min(4, predefined.m_count) ); + bx::float4x4_mul(&modelView.un.f4x4, &model.un.f4x4, &m_render->m_view[view].un.f4x4); + s_renderCtx->setShaderConstant(flags, predefined.m_loc, modelView.un.val, bx::uint32_min(4, predefined.m_count) ); } break; @@ -2475,8 +2483,8 @@ namespace bgfx { Matrix4 modelViewProj; const Matrix4& model = m_render->m_matrixCache.m_cache[state.m_matrix]; - mtxMul(modelViewProj.val, model.val, viewProj[view].val); - s_renderCtx->setShaderConstant(flags, predefined.m_loc, modelViewProj.val, bx::uint32_min(4, predefined.m_count) ); + bx::float4x4_mul(&modelViewProj.un.f4x4, &model.un.f4x4, &viewProj[view].un.f4x4); + s_renderCtx->setShaderConstant(flags, predefined.m_loc, modelViewProj.un.val, bx::uint32_min(4, predefined.m_count) ); } break; @@ -2484,40 +2492,24 @@ namespace bgfx { const Matrix4& model = m_render->m_matrixCache.m_cache[state.m_matrix]; - static const BX_ALIGN_STRUCT_16(float) s_bias[16] = - { - 0.5f, 0.0f, 0.0f, 0.0f, - 0.0f, 0.5f, 0.0f, 0.0f, - 0.0f, 0.0f, 0.5f, 0.0f, - 0.5f, 0.5f, 0.5f, 1.0f, - }; - uint8_t other = m_render->m_other[view]; Matrix4 viewProjBias; - mtxMul(viewProjBias.val, viewProj[other].val, s_bias); + bx::float4x4_mul(&viewProjBias.un.f4x4, &viewProj[other].un.f4x4, &s_bias.un.f4x4); Matrix4 modelViewProj; - mtxMul(modelViewProj.val, model.val, viewProjBias.val); + bx::float4x4_mul(&modelViewProj.un.f4x4, &model.un.f4x4, &viewProjBias.un.f4x4); - s_renderCtx->setShaderConstant(flags, predefined.m_loc, modelViewProj.val, bx::uint32_min(4, predefined.m_count) ); + s_renderCtx->setShaderConstant(flags, predefined.m_loc, modelViewProj.un.val, bx::uint32_min(4, predefined.m_count) ); } break; case PredefinedUniform::ViewProjX: { - static const BX_ALIGN_STRUCT_16(float) s_bias[16] = - { - 0.5f, 0.0f, 0.0f, 0.0f, - 0.0f, 0.5f, 0.0f, 0.0f, - 0.0f, 0.0f, 0.5f, 0.0f, - 0.5f, 0.5f, 0.5f, 1.0f, - }; - uint8_t other = m_render->m_other[view]; Matrix4 viewProjBias; - mtxMul(viewProjBias.val, viewProj[other].val, s_bias); + bx::float4x4_mul(&viewProjBias.un.f4x4, &viewProj[other].un.f4x4, &s_bias.un.f4x4); - s_renderCtx->setShaderConstant(flags, predefined.m_loc, viewProjBias.val, bx::uint32_min(4, predefined.m_count) ); + s_renderCtx->setShaderConstant(flags, predefined.m_loc, viewProjBias.un.val, bx::uint32_min(4, predefined.m_count) ); } break; diff --git a/src/renderer_d3d9.cpp b/src/renderer_d3d9.cpp index acdfe4b1..22a02758 100644 --- a/src/renderer_d3d9.cpp +++ b/src/renderer_d3d9.cpp @@ -250,6 +250,14 @@ namespace bgfx { D3DFMT_RAWZ, D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_SURFACE, false }, }; + static const Matrix4 s_bias = + { + 0.5f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.5f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.5f, 0.0f, + 0.5f, 0.5f, 0.5f, 1.0f, + }; + static const GUID IID_IDirect3D9 = { 0x81bdcbca, 0x64d4, 0x426d, { 0xae, 0x8d, 0xad, 0x1, 0x47, 0xf4, 0x27, 0x5c } }; static const GUID IID_IDirect3DDevice9Ex = { 0xb18b10ce, 0x2649, 0x405a, { 0x87, 0xf, 0x95, 0xf7, 0x77, 0xd4, 0x31, 0x3a } }; @@ -2245,7 +2253,7 @@ namespace bgfx Matrix4 viewProj[BGFX_CONFIG_MAX_VIEWS]; for (uint32_t ii = 0; ii < BGFX_CONFIG_MAX_VIEWS; ++ii) { - mtxMul(viewProj[ii].val, m_render->m_view[ii].val, m_render->m_proj[ii].val); + bx::float4x4_mul(&viewProj[ii].un.f4x4, &m_render->m_view[ii].un.f4x4, &m_render->m_proj[ii].un.f4x4); } DX_CHECK(device->SetRenderState(D3DRS_FILLMODE, m_render->m_debug&BGFX_DEBUG_WIREFRAME ? D3DFILL_WIREFRAME : D3DFILL_SOLID) ); @@ -2609,20 +2617,20 @@ namespace bgfx case PredefinedUniform::View: { - s_renderCtx->setShaderConstantF(flags, predefined.m_loc, m_render->m_view[view].val, bx::uint32_min(4, predefined.m_count) ); + s_renderCtx->setShaderConstantF(flags, predefined.m_loc, m_render->m_view[view].un.val, bx::uint32_min(4, predefined.m_count) ); } break; case PredefinedUniform::ViewProj: { - s_renderCtx->setShaderConstantF(flags, predefined.m_loc, viewProj[view].val, bx::uint32_min(4, predefined.m_count) ); + s_renderCtx->setShaderConstantF(flags, predefined.m_loc, viewProj[view].un.val, bx::uint32_min(4, predefined.m_count) ); } break; case PredefinedUniform::Model: { const Matrix4& model = m_render->m_matrixCache.m_cache[state.m_matrix]; - s_renderCtx->setShaderConstantF(flags, predefined.m_loc, model.val, bx::uint32_min(state.m_num*4, predefined.m_count) ); + s_renderCtx->setShaderConstantF(flags, predefined.m_loc, model.un.val, bx::uint32_min(state.m_num*4, predefined.m_count) ); } break; @@ -2630,8 +2638,8 @@ namespace bgfx { Matrix4 modelView; const Matrix4& model = m_render->m_matrixCache.m_cache[state.m_matrix]; - mtxMul(modelView.val, model.val, m_render->m_view[view].val); - s_renderCtx->setShaderConstantF(flags, predefined.m_loc, modelView.val, bx::uint32_min(4, predefined.m_count) ); + bx::float4x4_mul(&modelView.un.f4x4, &model.un.f4x4, &m_render->m_view[view].un.f4x4); + s_renderCtx->setShaderConstantF(flags, predefined.m_loc, modelView.un.val, bx::uint32_min(4, predefined.m_count) ); } break; @@ -2639,8 +2647,8 @@ namespace bgfx { Matrix4 modelViewProj; const Matrix4& model = m_render->m_matrixCache.m_cache[state.m_matrix]; - mtxMul(modelViewProj.val, model.val, viewProj[view].val); - s_renderCtx->setShaderConstantF(flags, predefined.m_loc, modelViewProj.val, bx::uint32_min(4, predefined.m_count) ); + bx::float4x4_mul(&modelViewProj.un.f4x4, &model.un.f4x4, &viewProj[view].un.f4x4); + s_renderCtx->setShaderConstantF(flags, predefined.m_loc, modelViewProj.un.val, bx::uint32_min(4, predefined.m_count) ); } break; @@ -2648,40 +2656,24 @@ namespace bgfx { const Matrix4& model = m_render->m_matrixCache.m_cache[state.m_matrix]; - static const BX_ALIGN_STRUCT_16(float) s_bias[16] = - { - 0.5f, 0.0f, 0.0f, 0.0f, - 0.0f, 0.5f, 0.0f, 0.0f, - 0.0f, 0.0f, 0.5f, 0.0f, - 0.5f, 0.5f, 0.5f, 1.0f, - }; - uint8_t other = m_render->m_other[view]; Matrix4 viewProjBias; - mtxMul(viewProjBias.val, viewProj[other].val, s_bias); + bx::float4x4_mul(&viewProjBias.un.f4x4, &viewProj[other].un.f4x4, &s_bias.un.f4x4); Matrix4 modelViewProj; - mtxMul(modelViewProj.val, model.val, viewProjBias.val); + bx::float4x4_mul(&modelViewProj.un.f4x4, &model.un.f4x4, &viewProjBias.un.f4x4); - s_renderCtx->setShaderConstantF(flags, predefined.m_loc, modelViewProj.val, bx::uint32_min(4, predefined.m_count) ); + s_renderCtx->setShaderConstantF(flags, predefined.m_loc, modelViewProj.un.val, bx::uint32_min(4, predefined.m_count) ); } break; case PredefinedUniform::ViewProjX: { - static const BX_ALIGN_STRUCT_16(float) s_bias[16] = - { - 0.5f, 0.0f, 0.0f, 0.0f, - 0.0f, 0.5f, 0.0f, 0.0f, - 0.0f, 0.0f, 0.5f, 0.0f, - 0.5f, 0.5f, 0.5f, 1.0f, - }; - uint8_t other = m_render->m_other[view]; Matrix4 viewProjBias; - mtxMul(viewProjBias.val, viewProj[other].val, s_bias); + bx::float4x4_mul(&viewProjBias.un.f4x4, &viewProj[other].un.f4x4, &s_bias.un.f4x4); - s_renderCtx->setShaderConstantF(flags, predefined.m_loc, viewProjBias.val, bx::uint32_min(4, predefined.m_count) ); + s_renderCtx->setShaderConstantF(flags, predefined.m_loc, viewProjBias.un.val, bx::uint32_min(4, predefined.m_count) ); } break; diff --git a/src/renderer_gl.cpp b/src/renderer_gl.cpp index 53159679..526f89a0 100644 --- a/src/renderer_gl.cpp +++ b/src/renderer_gl.cpp @@ -242,6 +242,14 @@ namespace bgfx { GL_STENCIL_INDEX8, GL_DEPTH_STENCIL, GL_UNSIGNED_BYTE, false }, // D0S8 }; + static const Matrix4 s_bias = + { + 0.5f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.5f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.5f, 0.0f, + 0.5f, 0.5f, 0.5f, 1.0f, + }; + struct Extension { enum Enum @@ -3087,7 +3095,7 @@ namespace bgfx Matrix4 viewProj[BGFX_CONFIG_MAX_VIEWS]; for (uint32_t ii = 0; ii < BGFX_CONFIG_MAX_VIEWS; ++ii) { - mtxMul(viewProj[ii].val, m_render->m_view[ii].val, m_render->m_proj[ii].val); + float4x4_mul(&viewProj[ii].un.f4x4, &m_render->m_view[ii].un.f4x4, &m_render->m_proj[ii].un.f4x4); } uint16_t programIdx = invalidHandle; @@ -3422,7 +3430,7 @@ namespace bgfx GL_CHECK(glUniformMatrix4fv(predefined.m_loc , 1 , GL_FALSE - , m_render->m_view[view].val + , m_render->m_view[view].un.val ) ); } break; @@ -3432,7 +3440,7 @@ namespace bgfx GL_CHECK(glUniformMatrix4fv(predefined.m_loc , 1 , GL_FALSE - , viewProj[view].val + , viewProj[view].un.val ) ); } break; @@ -3443,7 +3451,7 @@ namespace bgfx GL_CHECK(glUniformMatrix4fv(predefined.m_loc , bx::uint32_min(predefined.m_count, state.m_num) , GL_FALSE - , model.val + , model.un.val ) ); } break; @@ -3452,12 +3460,12 @@ namespace bgfx { Matrix4 modelView; const Matrix4& model = m_render->m_matrixCache.m_cache[state.m_matrix]; - mtxMul(modelView.val, model.val, m_render->m_view[view].val); + bx::float4x4_mul(&modelView.un.f4x4, &model.un.f4x4, &m_render->m_view[view].un.f4x4); GL_CHECK(glUniformMatrix4fv(predefined.m_loc , 1 , GL_FALSE - , modelView.val + , modelView.un.val ) ); } break; @@ -3466,12 +3474,12 @@ namespace bgfx { Matrix4 modelViewProj; const Matrix4& model = m_render->m_matrixCache.m_cache[state.m_matrix]; - mtxMul(modelViewProj.val, model.val, viewProj[view].val); + bx::float4x4_mul(&modelViewProj.un.f4x4, &model.un.f4x4, &viewProj[view].un.f4x4); GL_CHECK(glUniformMatrix4fv(predefined.m_loc , 1 , GL_FALSE - , modelViewProj.val + , modelViewProj.un.val ) ); } break; @@ -3480,47 +3488,31 @@ namespace bgfx { const Matrix4& model = m_render->m_matrixCache.m_cache[state.m_matrix]; - static const BX_ALIGN_STRUCT_16(float) s_bias[16] = - { - 0.5f, 0.0f, 0.0f, 0.0f, - 0.0f, 0.5f, 0.0f, 0.0f, - 0.0f, 0.0f, 0.5f, 0.0f, - 0.5f, 0.5f, 0.5f, 1.0f, - }; - uint8_t other = m_render->m_other[view]; Matrix4 viewProjBias; - mtxMul(viewProjBias.val, viewProj[other].val, s_bias); + bx::float4x4_mul(&viewProjBias.un.f4x4, &viewProj[other].un.f4x4, &s_bias.un.f4x4); Matrix4 modelViewProj; - mtxMul(modelViewProj.val, model.val, viewProjBias.val); + bx::float4x4_mul(&modelViewProj.un.f4x4, &model.un.f4x4, &viewProjBias.un.f4x4); GL_CHECK(glUniformMatrix4fv(predefined.m_loc , 1 , GL_FALSE - , modelViewProj.val + , modelViewProj.un.val ) ); } break; case PredefinedUniform::ViewProjX: { - static const BX_ALIGN_STRUCT_16(float) s_bias[16] = - { - 0.5f, 0.0f, 0.0f, 0.0f, - 0.0f, 0.5f, 0.0f, 0.0f, - 0.0f, 0.0f, 0.5f, 0.0f, - 0.5f, 0.5f, 0.5f, 1.0f, - }; - uint8_t other = m_render->m_other[view]; Matrix4 viewProjBias; - mtxMul(viewProjBias.val, viewProj[other].val, s_bias); + bx::float4x4_mul(&viewProjBias.un.f4x4, &viewProj[other].un.f4x4, &s_bias.un.f4x4); GL_CHECK(glUniformMatrix4fv(predefined.m_loc , 1 , GL_FALSE - , viewProjBias.val + , viewProjBias.un.val ) ); } break;