Swiched from FPU to SIMD math.

This commit is contained in:
bkaradzic 2014-01-18 23:33:00 -08:00
parent 59a76342f2
commit 96d9d7437b
6 changed files with 73 additions and 110 deletions

View file

@ -235,22 +235,6 @@ namespace bgfx
g_callback->fatal(_code, temp);
}
inline void vec4MulMtx(float* __restrict _result, const float* __restrict _vec, const float* __restrict _mat)
{
_result[0] = _vec[0] * _mat[ 0] + _vec[1] * _mat[4] + _vec[2] * _mat[ 8] + _vec[3] * _mat[12];
_result[1] = _vec[0] * _mat[ 1] + _vec[1] * _mat[5] + _vec[2] * _mat[ 9] + _vec[3] * _mat[13];
_result[2] = _vec[0] * _mat[ 2] + _vec[1] * _mat[6] + _vec[2] * _mat[10] + _vec[3] * _mat[14];
_result[3] = _vec[0] * _mat[ 3] + _vec[1] * _mat[7] + _vec[2] * _mat[11] + _vec[3] * _mat[15];
}
void mtxMul(float* __restrict _result, const float* __restrict _a, const float* __restrict _b)
{
vec4MulMtx(&_result[ 0], &_a[ 0], _b);
vec4MulMtx(&_result[ 4], &_a[ 4], _b);
vec4MulMtx(&_result[ 8], &_a[ 8], _b);
vec4MulMtx(&_result[12], &_a[12], _b);
}
void mtxOrtho(float* _result, float _left, float _right, float _bottom, float _top, float _near, float _far)
{
const float aa = 2.0f/(_right - _left);

View file

@ -61,6 +61,7 @@ namespace bgfx
#include <bx/bx.h>
#include <bx/debug.h>
#include <bx/float4x4_t.h>
#include <bx/blockalloc.h>
#include <bx/endian.h>
#include <bx/handlealloc.h>
@ -639,16 +640,19 @@ namespace bgfx
BX_ALIGN_STRUCT_16(struct) Matrix4
{
union
{
bx::float4x4_t f4x4;
float val[16];
} un;
void setIdentity()
{
memset(val, 0, sizeof(val) );
val[0] = val[5] = val[10] = val[15] = 1.0f;
memset(un.val, 0, sizeof(un.val) );
un.val[0] = un.val[5] = un.val[10] = un.val[15] = 1.0f;
}
};
void mtxMul(float* __restrict _result, const float* __restrict _a, const float* __restrict _b);
void mtxOrtho(float* _result, float _left, float _right, float _bottom, float _top, float _near, float _far);
struct MatrixCache
@ -2416,7 +2420,7 @@ namespace bgfx
if (NULL != _view)
{
memcpy(m_view[_id].val, _view, sizeof(Matrix4) );
memcpy(m_view[_id].un.val, _view, sizeof(Matrix4) );
}
else
{
@ -2425,7 +2429,7 @@ namespace bgfx
if (NULL != _proj)
{
memcpy(m_proj[_id].val, _proj, sizeof(Matrix4) );
memcpy(m_proj[_id].un.val, _proj, sizeof(Matrix4) );
}
else
{

View file

@ -4,7 +4,6 @@
*/
#include "bgfx_p.h"
#include <bx/float4_t.h>
#include <math.h> // powf, sqrtf
#include "image.h"

View file

@ -176,6 +176,14 @@ namespace bgfx
},
};
static const Matrix4 s_bias =
{
0.5f, 0.0f, 0.0f, 0.0f,
0.0f, 0.5f, 0.0f, 0.0f,
0.0f, 0.0f, 0.5f, 0.0f,
0.5f, 0.5f, 0.5f, 1.0f,
};
struct TextureFormatInfo
{
DXGI_FORMAT m_fmt;
@ -2206,7 +2214,7 @@ namespace bgfx
Matrix4 viewProj[BGFX_CONFIG_MAX_VIEWS];
for (uint32_t ii = 0; ii < BGFX_CONFIG_MAX_VIEWS; ++ii)
{
mtxMul(viewProj[ii].val, m_render->m_view[ii].val, m_render->m_proj[ii].val);
bx::float4x4_mul(&viewProj[ii].un.f4x4, &m_render->m_view[ii].un.f4x4, &m_render->m_proj[ii].un.f4x4);
}
bool wireframe = !!(m_render->m_debug&BGFX_DEBUG_WIREFRAME);
@ -2445,20 +2453,20 @@ namespace bgfx
case PredefinedUniform::View:
{
s_renderCtx->setShaderConstant(flags, predefined.m_loc, m_render->m_view[view].val, bx::uint32_min(4, predefined.m_count) );
s_renderCtx->setShaderConstant(flags, predefined.m_loc, m_render->m_view[view].un.val, bx::uint32_min(4, predefined.m_count) );
}
break;
case PredefinedUniform::ViewProj:
{
s_renderCtx->setShaderConstant(flags, predefined.m_loc, viewProj[view].val, bx::uint32_min(4, predefined.m_count) );
s_renderCtx->setShaderConstant(flags, predefined.m_loc, viewProj[view].un.val, bx::uint32_min(4, predefined.m_count) );
}
break;
case PredefinedUniform::Model:
{
const Matrix4& model = m_render->m_matrixCache.m_cache[state.m_matrix];
s_renderCtx->setShaderConstant(flags, predefined.m_loc, model.val, bx::uint32_min(state.m_num*4, predefined.m_count) );
s_renderCtx->setShaderConstant(flags, predefined.m_loc, model.un.val, bx::uint32_min(state.m_num*4, predefined.m_count) );
}
break;
@ -2466,8 +2474,8 @@ namespace bgfx
{
Matrix4 modelView;
const Matrix4& model = m_render->m_matrixCache.m_cache[state.m_matrix];
mtxMul(modelView.val, model.val, m_render->m_view[view].val);
s_renderCtx->setShaderConstant(flags, predefined.m_loc, modelView.val, bx::uint32_min(4, predefined.m_count) );
bx::float4x4_mul(&modelView.un.f4x4, &model.un.f4x4, &m_render->m_view[view].un.f4x4);
s_renderCtx->setShaderConstant(flags, predefined.m_loc, modelView.un.val, bx::uint32_min(4, predefined.m_count) );
}
break;
@ -2475,8 +2483,8 @@ namespace bgfx
{
Matrix4 modelViewProj;
const Matrix4& model = m_render->m_matrixCache.m_cache[state.m_matrix];
mtxMul(modelViewProj.val, model.val, viewProj[view].val);
s_renderCtx->setShaderConstant(flags, predefined.m_loc, modelViewProj.val, bx::uint32_min(4, predefined.m_count) );
bx::float4x4_mul(&modelViewProj.un.f4x4, &model.un.f4x4, &viewProj[view].un.f4x4);
s_renderCtx->setShaderConstant(flags, predefined.m_loc, modelViewProj.un.val, bx::uint32_min(4, predefined.m_count) );
}
break;
@ -2484,40 +2492,24 @@ namespace bgfx
{
const Matrix4& model = m_render->m_matrixCache.m_cache[state.m_matrix];
static const BX_ALIGN_STRUCT_16(float) s_bias[16] =
{
0.5f, 0.0f, 0.0f, 0.0f,
0.0f, 0.5f, 0.0f, 0.0f,
0.0f, 0.0f, 0.5f, 0.0f,
0.5f, 0.5f, 0.5f, 1.0f,
};
uint8_t other = m_render->m_other[view];
Matrix4 viewProjBias;
mtxMul(viewProjBias.val, viewProj[other].val, s_bias);
bx::float4x4_mul(&viewProjBias.un.f4x4, &viewProj[other].un.f4x4, &s_bias.un.f4x4);
Matrix4 modelViewProj;
mtxMul(modelViewProj.val, model.val, viewProjBias.val);
bx::float4x4_mul(&modelViewProj.un.f4x4, &model.un.f4x4, &viewProjBias.un.f4x4);
s_renderCtx->setShaderConstant(flags, predefined.m_loc, modelViewProj.val, bx::uint32_min(4, predefined.m_count) );
s_renderCtx->setShaderConstant(flags, predefined.m_loc, modelViewProj.un.val, bx::uint32_min(4, predefined.m_count) );
}
break;
case PredefinedUniform::ViewProjX:
{
static const BX_ALIGN_STRUCT_16(float) s_bias[16] =
{
0.5f, 0.0f, 0.0f, 0.0f,
0.0f, 0.5f, 0.0f, 0.0f,
0.0f, 0.0f, 0.5f, 0.0f,
0.5f, 0.5f, 0.5f, 1.0f,
};
uint8_t other = m_render->m_other[view];
Matrix4 viewProjBias;
mtxMul(viewProjBias.val, viewProj[other].val, s_bias);
bx::float4x4_mul(&viewProjBias.un.f4x4, &viewProj[other].un.f4x4, &s_bias.un.f4x4);
s_renderCtx->setShaderConstant(flags, predefined.m_loc, viewProjBias.val, bx::uint32_min(4, predefined.m_count) );
s_renderCtx->setShaderConstant(flags, predefined.m_loc, viewProjBias.un.val, bx::uint32_min(4, predefined.m_count) );
}
break;

View file

@ -250,6 +250,14 @@ namespace bgfx
{ D3DFMT_RAWZ, D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_SURFACE, false },
};
static const Matrix4 s_bias =
{
0.5f, 0.0f, 0.0f, 0.0f,
0.0f, 0.5f, 0.0f, 0.0f,
0.0f, 0.0f, 0.5f, 0.0f,
0.5f, 0.5f, 0.5f, 1.0f,
};
static const GUID IID_IDirect3D9 = { 0x81bdcbca, 0x64d4, 0x426d, { 0xae, 0x8d, 0xad, 0x1, 0x47, 0xf4, 0x27, 0x5c } };
static const GUID IID_IDirect3DDevice9Ex = { 0xb18b10ce, 0x2649, 0x405a, { 0x87, 0xf, 0x95, 0xf7, 0x77, 0xd4, 0x31, 0x3a } };
@ -2245,7 +2253,7 @@ namespace bgfx
Matrix4 viewProj[BGFX_CONFIG_MAX_VIEWS];
for (uint32_t ii = 0; ii < BGFX_CONFIG_MAX_VIEWS; ++ii)
{
mtxMul(viewProj[ii].val, m_render->m_view[ii].val, m_render->m_proj[ii].val);
bx::float4x4_mul(&viewProj[ii].un.f4x4, &m_render->m_view[ii].un.f4x4, &m_render->m_proj[ii].un.f4x4);
}
DX_CHECK(device->SetRenderState(D3DRS_FILLMODE, m_render->m_debug&BGFX_DEBUG_WIREFRAME ? D3DFILL_WIREFRAME : D3DFILL_SOLID) );
@ -2609,20 +2617,20 @@ namespace bgfx
case PredefinedUniform::View:
{
s_renderCtx->setShaderConstantF(flags, predefined.m_loc, m_render->m_view[view].val, bx::uint32_min(4, predefined.m_count) );
s_renderCtx->setShaderConstantF(flags, predefined.m_loc, m_render->m_view[view].un.val, bx::uint32_min(4, predefined.m_count) );
}
break;
case PredefinedUniform::ViewProj:
{
s_renderCtx->setShaderConstantF(flags, predefined.m_loc, viewProj[view].val, bx::uint32_min(4, predefined.m_count) );
s_renderCtx->setShaderConstantF(flags, predefined.m_loc, viewProj[view].un.val, bx::uint32_min(4, predefined.m_count) );
}
break;
case PredefinedUniform::Model:
{
const Matrix4& model = m_render->m_matrixCache.m_cache[state.m_matrix];
s_renderCtx->setShaderConstantF(flags, predefined.m_loc, model.val, bx::uint32_min(state.m_num*4, predefined.m_count) );
s_renderCtx->setShaderConstantF(flags, predefined.m_loc, model.un.val, bx::uint32_min(state.m_num*4, predefined.m_count) );
}
break;
@ -2630,8 +2638,8 @@ namespace bgfx
{
Matrix4 modelView;
const Matrix4& model = m_render->m_matrixCache.m_cache[state.m_matrix];
mtxMul(modelView.val, model.val, m_render->m_view[view].val);
s_renderCtx->setShaderConstantF(flags, predefined.m_loc, modelView.val, bx::uint32_min(4, predefined.m_count) );
bx::float4x4_mul(&modelView.un.f4x4, &model.un.f4x4, &m_render->m_view[view].un.f4x4);
s_renderCtx->setShaderConstantF(flags, predefined.m_loc, modelView.un.val, bx::uint32_min(4, predefined.m_count) );
}
break;
@ -2639,8 +2647,8 @@ namespace bgfx
{
Matrix4 modelViewProj;
const Matrix4& model = m_render->m_matrixCache.m_cache[state.m_matrix];
mtxMul(modelViewProj.val, model.val, viewProj[view].val);
s_renderCtx->setShaderConstantF(flags, predefined.m_loc, modelViewProj.val, bx::uint32_min(4, predefined.m_count) );
bx::float4x4_mul(&modelViewProj.un.f4x4, &model.un.f4x4, &viewProj[view].un.f4x4);
s_renderCtx->setShaderConstantF(flags, predefined.m_loc, modelViewProj.un.val, bx::uint32_min(4, predefined.m_count) );
}
break;
@ -2648,40 +2656,24 @@ namespace bgfx
{
const Matrix4& model = m_render->m_matrixCache.m_cache[state.m_matrix];
static const BX_ALIGN_STRUCT_16(float) s_bias[16] =
{
0.5f, 0.0f, 0.0f, 0.0f,
0.0f, 0.5f, 0.0f, 0.0f,
0.0f, 0.0f, 0.5f, 0.0f,
0.5f, 0.5f, 0.5f, 1.0f,
};
uint8_t other = m_render->m_other[view];
Matrix4 viewProjBias;
mtxMul(viewProjBias.val, viewProj[other].val, s_bias);
bx::float4x4_mul(&viewProjBias.un.f4x4, &viewProj[other].un.f4x4, &s_bias.un.f4x4);
Matrix4 modelViewProj;
mtxMul(modelViewProj.val, model.val, viewProjBias.val);
bx::float4x4_mul(&modelViewProj.un.f4x4, &model.un.f4x4, &viewProjBias.un.f4x4);
s_renderCtx->setShaderConstantF(flags, predefined.m_loc, modelViewProj.val, bx::uint32_min(4, predefined.m_count) );
s_renderCtx->setShaderConstantF(flags, predefined.m_loc, modelViewProj.un.val, bx::uint32_min(4, predefined.m_count) );
}
break;
case PredefinedUniform::ViewProjX:
{
static const BX_ALIGN_STRUCT_16(float) s_bias[16] =
{
0.5f, 0.0f, 0.0f, 0.0f,
0.0f, 0.5f, 0.0f, 0.0f,
0.0f, 0.0f, 0.5f, 0.0f,
0.5f, 0.5f, 0.5f, 1.0f,
};
uint8_t other = m_render->m_other[view];
Matrix4 viewProjBias;
mtxMul(viewProjBias.val, viewProj[other].val, s_bias);
bx::float4x4_mul(&viewProjBias.un.f4x4, &viewProj[other].un.f4x4, &s_bias.un.f4x4);
s_renderCtx->setShaderConstantF(flags, predefined.m_loc, viewProjBias.val, bx::uint32_min(4, predefined.m_count) );
s_renderCtx->setShaderConstantF(flags, predefined.m_loc, viewProjBias.un.val, bx::uint32_min(4, predefined.m_count) );
}
break;

View file

@ -242,6 +242,14 @@ namespace bgfx
{ GL_STENCIL_INDEX8, GL_DEPTH_STENCIL, GL_UNSIGNED_BYTE, false }, // D0S8
};
static const Matrix4 s_bias =
{
0.5f, 0.0f, 0.0f, 0.0f,
0.0f, 0.5f, 0.0f, 0.0f,
0.0f, 0.0f, 0.5f, 0.0f,
0.5f, 0.5f, 0.5f, 1.0f,
};
struct Extension
{
enum Enum
@ -3087,7 +3095,7 @@ namespace bgfx
Matrix4 viewProj[BGFX_CONFIG_MAX_VIEWS];
for (uint32_t ii = 0; ii < BGFX_CONFIG_MAX_VIEWS; ++ii)
{
mtxMul(viewProj[ii].val, m_render->m_view[ii].val, m_render->m_proj[ii].val);
float4x4_mul(&viewProj[ii].un.f4x4, &m_render->m_view[ii].un.f4x4, &m_render->m_proj[ii].un.f4x4);
}
uint16_t programIdx = invalidHandle;
@ -3422,7 +3430,7 @@ namespace bgfx
GL_CHECK(glUniformMatrix4fv(predefined.m_loc
, 1
, GL_FALSE
, m_render->m_view[view].val
, m_render->m_view[view].un.val
) );
}
break;
@ -3432,7 +3440,7 @@ namespace bgfx
GL_CHECK(glUniformMatrix4fv(predefined.m_loc
, 1
, GL_FALSE
, viewProj[view].val
, viewProj[view].un.val
) );
}
break;
@ -3443,7 +3451,7 @@ namespace bgfx
GL_CHECK(glUniformMatrix4fv(predefined.m_loc
, bx::uint32_min(predefined.m_count, state.m_num)
, GL_FALSE
, model.val
, model.un.val
) );
}
break;
@ -3452,12 +3460,12 @@ namespace bgfx
{
Matrix4 modelView;
const Matrix4& model = m_render->m_matrixCache.m_cache[state.m_matrix];
mtxMul(modelView.val, model.val, m_render->m_view[view].val);
bx::float4x4_mul(&modelView.un.f4x4, &model.un.f4x4, &m_render->m_view[view].un.f4x4);
GL_CHECK(glUniformMatrix4fv(predefined.m_loc
, 1
, GL_FALSE
, modelView.val
, modelView.un.val
) );
}
break;
@ -3466,12 +3474,12 @@ namespace bgfx
{
Matrix4 modelViewProj;
const Matrix4& model = m_render->m_matrixCache.m_cache[state.m_matrix];
mtxMul(modelViewProj.val, model.val, viewProj[view].val);
bx::float4x4_mul(&modelViewProj.un.f4x4, &model.un.f4x4, &viewProj[view].un.f4x4);
GL_CHECK(glUniformMatrix4fv(predefined.m_loc
, 1
, GL_FALSE
, modelViewProj.val
, modelViewProj.un.val
) );
}
break;
@ -3480,47 +3488,31 @@ namespace bgfx
{
const Matrix4& model = m_render->m_matrixCache.m_cache[state.m_matrix];
static const BX_ALIGN_STRUCT_16(float) s_bias[16] =
{
0.5f, 0.0f, 0.0f, 0.0f,
0.0f, 0.5f, 0.0f, 0.0f,
0.0f, 0.0f, 0.5f, 0.0f,
0.5f, 0.5f, 0.5f, 1.0f,
};
uint8_t other = m_render->m_other[view];
Matrix4 viewProjBias;
mtxMul(viewProjBias.val, viewProj[other].val, s_bias);
bx::float4x4_mul(&viewProjBias.un.f4x4, &viewProj[other].un.f4x4, &s_bias.un.f4x4);
Matrix4 modelViewProj;
mtxMul(modelViewProj.val, model.val, viewProjBias.val);
bx::float4x4_mul(&modelViewProj.un.f4x4, &model.un.f4x4, &viewProjBias.un.f4x4);
GL_CHECK(glUniformMatrix4fv(predefined.m_loc
, 1
, GL_FALSE
, modelViewProj.val
, modelViewProj.un.val
) );
}
break;
case PredefinedUniform::ViewProjX:
{
static const BX_ALIGN_STRUCT_16(float) s_bias[16] =
{
0.5f, 0.0f, 0.0f, 0.0f,
0.0f, 0.5f, 0.0f, 0.0f,
0.0f, 0.0f, 0.5f, 0.0f,
0.5f, 0.5f, 0.5f, 1.0f,
};
uint8_t other = m_render->m_other[view];
Matrix4 viewProjBias;
mtxMul(viewProjBias.val, viewProj[other].val, s_bias);
bx::float4x4_mul(&viewProjBias.un.f4x4, &viewProj[other].un.f4x4, &s_bias.un.f4x4);
GL_CHECK(glUniformMatrix4fv(predefined.m_loc
, 1
, GL_FALSE
, viewProjBias.val
, viewProjBias.un.val
) );
}
break;