mirror of
https://github.com/scratchfoundation/bgfx.git
synced 2024-11-25 09:08:22 -05:00
Fixed bad logic for ref/simd path.
This commit is contained in:
parent
7bdae66027
commit
752fa73cec
1 changed files with 25 additions and 30 deletions
|
@ -3,61 +3,56 @@
|
||||||
* License: http://www.opensource.org/licenses/BSD-2-Clause
|
* License: http://www.opensource.org/licenses/BSD-2-Clause
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "bgfx_p.h"
|
||||||
#include <bx/float4_t.h>
|
#include <bx/float4_t.h>
|
||||||
|
|
||||||
namespace bgfx
|
namespace bgfx
|
||||||
{
|
{
|
||||||
static void imageSwizzleBGRA8Ref(uint8_t* _rgbaData, uint32_t _width, uint32_t _height)
|
static void imageSwizzleBGRA8Ref(uint8_t* _rgbaData, uint32_t _width, uint32_t _height)
|
||||||
{
|
{
|
||||||
uint32_t dstpitch = _width*4;
|
const uint32_t dstpitch = _width*4;
|
||||||
for (uint32_t yy = 0; yy < _height; ++yy)
|
uint8_t* ptr = _rgbaData;
|
||||||
{
|
|
||||||
uint8_t* dst = &_rgbaData[yy*dstpitch];
|
|
||||||
|
|
||||||
for (uint32_t xx = 0; xx < _width; ++xx)
|
for (uint32_t xx = 0, num = _width*_height; xx < num; ++xx)
|
||||||
{
|
{
|
||||||
uint8_t tmp = dst[0];
|
uint8_t tmp = ptr[0];
|
||||||
dst[0] = dst[2];
|
ptr[0] = ptr[2];
|
||||||
dst[2] = tmp;
|
ptr[2] = tmp;
|
||||||
dst += 4;
|
ptr += 4;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void imageSwizzleBGRA8(uint8_t* _rgbaData, uint32_t _width, uint32_t _height)
|
void imageSwizzleBGRA8(uint8_t* _rgbaData, uint32_t _width, uint32_t _height)
|
||||||
{
|
{
|
||||||
if (0 != (_width&0xf)
|
// Test can we do four 4-byte pixels at the time.
|
||||||
|| _width < 16)
|
if (0 != (_width&0x3)
|
||||||
|
|| _width < 4)
|
||||||
{
|
{
|
||||||
|
BX_WARN(_width < 4, "Image swizzle is taking slow path. Image width must be multiple of 4 (width %d).", _width);
|
||||||
imageSwizzleBGRA8Ref(_rgbaData, _width, _height);
|
imageSwizzleBGRA8Ref(_rgbaData, _width, _height);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t dstpitch = _width*4;
|
const uint32_t dstpitch = _width*4;
|
||||||
uint32_t num = dstpitch/16;
|
|
||||||
|
|
||||||
using namespace bx;
|
using namespace bx;
|
||||||
|
|
||||||
const float4_t mf0f0 = float4_isplat(0xff00ff00);
|
const float4_t mf0f0 = float4_isplat(0xff00ff00);
|
||||||
const float4_t m0f0f = float4_isplat(0x00ff00ff);
|
const float4_t m0f0f = float4_isplat(0x00ff00ff);
|
||||||
|
uint8_t* ptr = _rgbaData;
|
||||||
|
|
||||||
for (uint32_t yy = 0; yy < _height; ++yy)
|
for (uint32_t xx = 0, num = dstpitch/16*_height; xx < num; ++xx)
|
||||||
{
|
|
||||||
uint8_t* ptr = &_rgbaData[yy*dstpitch];
|
|
||||||
|
|
||||||
for (uint32_t xx = 0; xx < num; ++xx)
|
|
||||||
{
|
{
|
||||||
const float4_t tabgr = float4_ld(ptr);
|
const float4_t tabgr = float4_ld(ptr);
|
||||||
const float4_t t00ab = float4_srl(tabgr, 16);
|
const float4_t t00ab = float4_srl(tabgr, 16);
|
||||||
const float4_t tgr00 = float4_sll(tabgr, 16);
|
const float4_t tgr00 = float4_sll(tabgr, 16);
|
||||||
const float4_t tgrab = float4_or(t00ab, tgr00);
|
const float4_t tgrab = float4_or(t00ab, tgr00);
|
||||||
const float4_t ta0g0 = float4_and(tabgr, mf0f0);
|
const float4_t ta0g0 = float4_and(tabgr, mf0f0);
|
||||||
const float4_t t0g0b = float4_and(tgrab, m0f0f);
|
const float4_t t0r0b = float4_and(tgrab, m0f0f);
|
||||||
const float4_t targb = float4_or(ta0g0, t0g0b);
|
const float4_t targb = float4_or(ta0g0, t0r0b);
|
||||||
float4_st(ptr, targb);
|
float4_st(ptr, targb);
|
||||||
ptr += 16;
|
ptr += 16;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace bgfx
|
} // namespace bgfx
|
||||||
|
|
Loading…
Reference in a new issue