diff --git a/examples/07-callback/callback.cpp b/examples/07-callback/callback.cpp index 349f9c10..483918ce 100644 --- a/examples/07-callback/callback.cpp +++ b/examples/07-callback/callback.cpp @@ -219,8 +219,65 @@ struct BgfxCallback : public bgfx::CallbackI virtual void screenShot(const char* _filePath, uint32_t _width, uint32_t _height, uint32_t _pitch, const void* _data, uint32_t /*_size*/, bool _yflip) BX_OVERRIDE { + char temp[1024]; + // Save screen shot as TGA. - saveTga(_filePath, _width, _height, _pitch, _data, false, _yflip); + bx::snprintf(temp, BX_COUNTOF(temp), "%s.mip0.tga", _filePath); + saveTga(temp, _width, _height, _pitch, _data, false, _yflip); + + uint32_t width = _width; + uint32_t height = _height; + uint32_t pitch = _pitch; + + uint8_t* data = (uint8_t*)_data; + + // Generate mip maps. + uint32_t mip = 1; + for (; 2 <= width && 2 <= height; ++mip) + { + bx::snprintf(temp, BX_COUNTOF(temp), "%s.mip%d.tga", _filePath, mip); + bgfx::imageRgba8Downsample2x2(width, height, pitch, data, data); + + width >>= 1; + height >>= 1; + pitch = width*4; + + saveTga(temp, width, height, pitch, _data, false, _yflip); + } + + if (width > height) + { + for (; 2 <= width; ++mip) + { + memcpy(&data[width*4], data, width*4); + + bx::snprintf(temp, BX_COUNTOF(temp), "%s.mip%d.tga", _filePath, mip); + bgfx::imageRgba8Downsample2x2(width, 2, pitch, data, data); + + width >>= 1; + pitch = width*4; + + saveTga(temp, width, 2, pitch, _data, false, _yflip); + } + } + else + { + for (; 2 <= height; ++mip) + { + uint32_t* src = (uint32_t*)data; + for (uint32_t ii = 0; ii < height; ++ii, src += 2) + { + src[1] = src[0]; + } + + bx::snprintf(temp, BX_COUNTOF(temp), "%s.mip%d.tga", _filePath, mip); + bgfx::imageRgba8Downsample2x2(2, height, 8, data, data); + + height >>= 1; + + saveTga(temp, 2, height, 8, _data, false, _yflip); + } + } } virtual void captureBegin(uint32_t _width, uint32_t _height, uint32_t /*_pitch*/, bgfx::TextureFormat::Enum /*_format*/, bool _yflip) BX_OVERRIDE @@ -403,7 +460,7 @@ int _main_(int /*_argc*/, char** /*_argv*/) // Take screen shot at frame 150. if (150 == frame) { - bgfx::saveScreenShot("temp/frame150.tga"); + bgfx::saveScreenShot("temp/frame150"); } // Advance to next frame. Rendering thread will be kicked to diff --git a/include/bgfx.h b/include/bgfx.h index fc89a995..c0a9a566 100644 --- a/include/bgfx.h +++ b/include/bgfx.h @@ -512,7 +512,26 @@ namespace bgfx void vertexConvert(const VertexDecl& _destDecl, void* _destData, const VertexDecl& _srcDecl, const void* _srcData, uint32_t _num = 1); /// Swizzle RGBA8 image to BGRA8. - void imageSwizzleBGRA8(uint8_t* _rgbaData, uint32_t _width, uint32_t _height); + /// + /// @param _width Width of input image (pixels). + /// @param _height Height of input image (pixels). + /// @param _pitch Pitch of input image (bytes). + /// @param _src Source image. + /// @param _dst Destination image. Must be the same size as input image. + /// _dst might be pointer to the same memory as _src. + /// + void imageSwizzleBgra8(uint32_t _width, uint32_t _height, const void* _src, void* _dst); + + /// Downsample RGBA8 image with 2x2 pixel average filter. + /// + /// @param _width Width of input image (pixels). + /// @param _height Height of input image (pixels). + /// @param _pitch Pitch of input image (bytes). + /// @param _src Source image. + /// @param _dst Destination image. Must be at least quarter size of + /// input image. _dst might be pointer to the same memory as _src. + /// + void imageRgba8Downsample2x2(uint32_t _width, uint32_t _height, uint32_t _pitch, const void* _src, void* _dst); /// Returns renderer backend API type. RendererType::Enum getRendererType(); diff --git a/src/bgfx.cpp b/src/bgfx.cpp index 8185c61c..bf820b0c 100755 --- a/src/bgfx.cpp +++ b/src/bgfx.cpp @@ -98,7 +98,10 @@ namespace bgfx virtual void screenShot(const char* _filePath, uint32_t _width, uint32_t _height, uint32_t _pitch, const void* _data, uint32_t /*_size*/, bool _yflip) BX_OVERRIDE { - saveTga(_filePath, _width, _height, _pitch, _data, false, _yflip); + bx::CrtFileWriter writer; + writer.open(_filePath); + imageWriteTga(&writer, _width, _height, _pitch, _data, false, _yflip); + writer.close(); } virtual void captureBegin(uint32_t /*_width*/, uint32_t /*_height*/, uint32_t /*_pitch*/, TextureFormat::Enum /*_format*/, bool /*_yflip*/) BX_OVERRIDE @@ -187,57 +190,6 @@ namespace bgfx _result[15] = 1.0f; } - void saveTga(const char* _filePath, uint32_t _width, uint32_t _height, uint32_t _srcPitch, const void* _src, bool _grayscale, bool _yflip) - { - FILE* file = fopen(_filePath, "wb"); - if ( NULL != file ) - { - uint8_t type = _grayscale ? 3 : 2; - uint8_t bpp = _grayscale ? 8 : 32; - - putc(0, file); - putc(0, file); - putc(type, file); - putc(0, file); - putc(0, file); - putc(0, file); - putc(0, file); - putc(0, file); - putc(0, file); - putc(0, file); - putc(0, file); - putc(0, file); - putc(_width&0xff, file); - putc( (_width>>8)&0xff, file); - putc(_height&0xff, file); - putc( (_height>>8)&0xff, file); - putc(bpp, file); - putc(32, file); - - uint32_t dstPitch = _width*bpp/8; - if (_yflip) - { - uint8_t* data = (uint8_t*)_src + _srcPitch*_height - _srcPitch; - for (uint32_t yy = 0; yy < _height; ++yy) - { - fwrite(data, dstPitch, 1, file); - data -= _srcPitch; - } - } - else - { - uint8_t* data = (uint8_t*)_src; - for (uint32_t yy = 0; yy < _height; ++yy) - { - fwrite(data, dstPitch, 1, file); - data += _srcPitch; - } - } - - fclose(file); - } - } - #include "charset.h" void charsetFillTexture(const uint8_t* _charset, uint8_t* _rgba, uint32_t _height, uint32_t _pitch, uint32_t _bpp) diff --git a/src/bgfx_p.h b/src/bgfx_p.h index 04dc7844..08879d66 100755 --- a/src/bgfx_p.h +++ b/src/bgfx_p.h @@ -229,7 +229,7 @@ namespace bgfx extern FreeFn g_free; void release(const Memory* _mem); - void saveTga(const char* _filePath, uint32_t _width, uint32_t _height, uint32_t _srcPitch, const void* _src, bool _grayscale = false, bool _yflip = false); + void imageWriteTga(bx::WriterI* _writer, uint32_t _width, uint32_t _height, uint32_t _srcPitch, const void* _src, bool _grayscale = false, bool _yflip = false); const char* getAttribName(Attrib::Enum _attr); bool renderFrame(); diff --git a/src/image.cpp b/src/image.cpp index 6cb22c97..a2d1bd56 100644 --- a/src/image.cpp +++ b/src/image.cpp @@ -8,27 +8,187 @@ namespace bgfx { - static void imageSwizzleBGRA8Ref(uint8_t* _rgbaData, uint32_t _width, uint32_t _height) + void imageSolid(uint32_t _width, uint32_t _height, uint32_t _solid, void* _dst) { - uint8_t* ptr = _rgbaData; - - for (uint32_t xx = 0, num = _width*_height; xx < num; ++xx) + uint32_t* dst = (uint32_t*)_dst; + for (uint32_t ii = 0, num = _width*_height; ii < num; ++ii) { - uint8_t tmp = ptr[0]; - ptr[0] = ptr[2]; - ptr[2] = tmp; - ptr += 4; + *dst++ = _solid; } } - void imageSwizzleBGRA8(uint8_t* _rgbaData, uint32_t _width, uint32_t _height) + void imageChessboard(uint32_t _width, uint32_t _height, uint32_t _step, uint32_t _0, uint32_t _1, void* _dst) + { + uint32_t* dst = (uint32_t*)_dst; + for (uint32_t yy = 0; yy < _height; ++yy) + { + for (uint32_t xx = 0; xx < _width; ++xx) + { + uint32_t abgr = ( (xx/_step)&1) ^ ( (yy/_step)&1) ? _1 : _0; + *dst++ = abgr; + } + } + } + + void imageRgba8Downsample2x2Ref(uint32_t _width, uint32_t _height, uint32_t _pitch, const void* _src, void* _dst) + { + const uint32_t dstwidth = _width/2; + const uint32_t dstheight = _height/2; + + if (0 == dstwidth + || 0 == dstheight) + { + return; + } + + uint8_t* dst = (uint8_t*)_dst; + const uint8_t* src = (const uint8_t*)_src; + + for (uint32_t yy = 0, ystep = _pitch*2; yy < dstheight; ++yy, src += ystep) + { + const uint8_t* rgba = src; + for (uint32_t xx = 0; xx < dstwidth; ++xx, rgba += 8, dst += 4) + { + float rr = powf(rgba[ 0], 2.2f); + float gg = powf(rgba[ 1], 2.2f); + float bb = powf(rgba[ 2], 2.2f); + float aa = rgba[ 3]; + rr += powf(rgba[ 4], 2.2f); + gg += powf(rgba[ 5], 2.2f); + bb += powf(rgba[ 6], 2.2f); + aa += rgba[ 7]; + rr += powf(rgba[_pitch+0], 2.2f); + gg += powf(rgba[_pitch+1], 2.2f); + bb += powf(rgba[_pitch+2], 2.2f); + aa += rgba[_pitch+3]; + rr += powf(rgba[_pitch+4], 2.2f); + gg += powf(rgba[_pitch+5], 2.2f); + bb += powf(rgba[_pitch+6], 2.2f); + aa += rgba[_pitch+7]; + + rr *= 0.25f; + gg *= 0.25f; + bb *= 0.25f; + aa *= 0.25f; + rr = powf(rr, 1.0f/2.2f); + gg = powf(gg, 1.0f/2.2f); + bb = powf(bb, 1.0f/2.2f); + aa = aa; + dst[0] = (uint8_t)rr; + dst[1] = (uint8_t)gg; + dst[2] = (uint8_t)bb; + dst[3] = (uint8_t)aa; + } + } + } + + void imageRgba8Downsample2x2(uint32_t _width, uint32_t _height, uint32_t _pitch, const void* _src, void* _dst) + { + const uint32_t dstwidth = _width/2; + const uint32_t dstheight = _height/2; + + if (0 == dstwidth + || 0 == dstheight) + { + return; + } + + uint8_t* dst = (uint8_t*)_dst; + const uint8_t* src = (const uint8_t*)_src; + + using namespace bx; + const float4_t unpack = float4_ld(1.0f, 1.0f/256.0f, 1.0f/65536.0f, 1.0f/16777216.0f); + const float4_t pack = float4_ld(1.0f, 256.0f*0.5f, 65536.0f, 16777216.0f*0.5f); + const float4_t umask = float4_ild(0xff, 0xff00, 0xff0000, 0xff000000); + const float4_t pmask = float4_ild(0xff, 0x7f80, 0xff0000, 0x7f800000); + const float4_t wflip = float4_ild(0, 0, 0, 0x80000000); + const float4_t wadd = float4_ld(0.0f, 0.0f, 0.0f, 32768.0f*65536.0f); + const float4_t gamma = float4_ld(1.0f/2.2f, 1.0f/2.2f, 1.0f/2.2f, 1.0f); + const float4_t linear = float4_ld(2.2f, 2.2f, 2.2f, 1.0f); + const float4_t quater = float4_splat(0.25f); + + for (uint32_t yy = 0, ystep = _pitch*2; yy < dstheight; ++yy, src += ystep) + { + const uint8_t* rgba = src; + for (uint32_t xx = 0; xx < dstwidth; ++xx, rgba += 8, dst += 4) + { + const float4_t abgr0 = float4_splat(rgba); + const float4_t abgr1 = float4_splat(rgba+4); + const float4_t abgr2 = float4_splat(rgba+_pitch); + const float4_t abgr3 = float4_splat(rgba+_pitch+4); + + const float4_t abgr0m = float4_and(abgr0, umask); + const float4_t abgr1m = float4_and(abgr1, umask); + const float4_t abgr2m = float4_and(abgr2, umask); + const float4_t abgr3m = float4_and(abgr3, umask); + const float4_t abgr0x = float4_xor(abgr0m, wflip); + const float4_t abgr1x = float4_xor(abgr1m, wflip); + const float4_t abgr2x = float4_xor(abgr2m, wflip); + const float4_t abgr3x = float4_xor(abgr3m, wflip); + const float4_t abgr0f = float4_itof(abgr0x); + const float4_t abgr1f = float4_itof(abgr1x); + const float4_t abgr2f = float4_itof(abgr2x); + const float4_t abgr3f = float4_itof(abgr3x); + const float4_t abgr0c = float4_add(abgr0f, wadd); + const float4_t abgr1c = float4_add(abgr1f, wadd); + const float4_t abgr2c = float4_add(abgr2f, wadd); + const float4_t abgr3c = float4_add(abgr3f, wadd); + const float4_t abgr0n = float4_mul(abgr0c, unpack); + const float4_t abgr1n = float4_mul(abgr1c, unpack); + const float4_t abgr2n = float4_mul(abgr2c, unpack); + const float4_t abgr3n = float4_mul(abgr3c, unpack); + + const float4_t abgr0l = float4_pow(abgr0n, linear); + const float4_t abgr1l = float4_pow(abgr1n, linear); + const float4_t abgr2l = float4_pow(abgr2n, linear); + const float4_t abgr3l = float4_pow(abgr3n, linear); + + const float4_t sum0 = float4_add(abgr0l, abgr1l); + const float4_t sum1 = float4_add(abgr2l, abgr3l); + const float4_t sum2 = float4_add(sum0, sum1); + const float4_t avg0 = float4_mul(sum2, quater); + const float4_t avg1 = float4_pow(avg0, gamma); + + const float4_t avg2 = float4_mul(avg1, pack); + const float4_t ftoi0 = float4_ftoi(avg2); + const float4_t ftoi1 = float4_and(ftoi0, pmask); + const float4_t zwxy = float4_swiz_zwxy(ftoi1); + const float4_t tmp0 = float4_or(ftoi1, zwxy); + const float4_t yyyy = float4_swiz_yyyy(tmp0); + const float4_t tmp1 = float4_iadd(yyyy, yyyy); + const float4_t result = float4_or(tmp0, tmp1); + + float4_stx(dst, result); + } + } + } + + static void imageSwizzleBgra8Ref(uint32_t _width, uint32_t _height, const void* _src, void* _dst) + { + const uint8_t* src = (uint8_t*) _src; + uint8_t* dst = (uint8_t*)_dst; + + for (uint32_t xx = 0, num = _width*_height; xx < num; ++xx, src += 4, dst += 4) + { + uint8_t rr = src[0]; + uint8_t gg = src[1]; + uint8_t bb = src[2]; + uint8_t aa = src[3]; + dst[0] = bb; + dst[1] = gg; + dst[2] = rr; + dst[3] = aa; + } + } + + void imageSwizzleBgra8(uint32_t _width, uint32_t _height, const void* _src, void* _dst) { // Test can we do four 4-byte pixels at the time. if (0 != (_width&0x3) || _width < 4) { BX_WARN(_width < 4, "Image swizzle is taking slow path. Image width must be multiple of 4 (width %d).", _width); - imageSwizzleBGRA8Ref(_rgbaData, _width, _height); + imageSwizzleBgra8Ref(_width, _height, _src, _dst); return; } @@ -38,19 +198,60 @@ namespace bgfx const float4_t mf0f0 = float4_isplat(0xff00ff00); const float4_t m0f0f = float4_isplat(0x00ff00ff); - uint8_t* ptr = _rgbaData; + const uint8_t* src = (uint8_t*) _src; + uint8_t* dst = (uint8_t*)_dst; - for (uint32_t xx = 0, num = dstpitch/16*_height; xx < num; ++xx) + for (uint32_t xx = 0, num = dstpitch/16*_height; xx < num; ++xx, src += 16, dst += 16) { - const float4_t tabgr = float4_ld(ptr); + const float4_t tabgr = float4_ld(src); const float4_t t00ab = float4_srl(tabgr, 16); const float4_t tgr00 = float4_sll(tabgr, 16); const float4_t tgrab = float4_or(t00ab, tgr00); const float4_t ta0g0 = float4_and(tabgr, mf0f0); const float4_t t0r0b = float4_and(tgrab, m0f0f); const float4_t targb = float4_or(ta0g0, t0r0b); - float4_st(ptr, targb); - ptr += 16; + float4_st(dst, targb); + } + } + + void imageWriteTga(bx::WriterI* _writer, uint32_t _width, uint32_t _height, uint32_t _srcPitch, const void* _src, bool _grayscale, bool _yflip) + { + uint8_t type = _grayscale ? 3 : 2; + uint8_t bpp = _grayscale ? 8 : 32; + + uint8_t header[18] = {}; + header[2] = type; + header[12] = _width&0xff; + header[13] = (_width>>8)&0xff; + header[14] = _height&0xff; + header[15] = (_height>>8)&0xff; + header[16] = bpp; + header[17] = 32; + + bx::write(_writer, header, sizeof(header) ); + + uint32_t dstPitch = _width*bpp/8; + if (_yflip) + { + uint8_t* data = (uint8_t*)_src + _srcPitch*_height - _srcPitch; + for (uint32_t yy = 0; yy < _height; ++yy) + { + bx::write(_writer, data, dstPitch); + data -= _srcPitch; + } + } + else if (_srcPitch == dstPitch) + { + bx::write(_writer, _src, _height*_srcPitch); + } + else + { + uint8_t* data = (uint8_t*)_src; + for (uint32_t yy = 0; yy < _height; ++yy) + { + bx::write(_writer, data, dstPitch); + data += _srcPitch; + } } } diff --git a/src/renderer_gl.cpp b/src/renderer_gl.cpp index 5efdb43d..52eaf991 100644 --- a/src/renderer_gl.cpp +++ b/src/renderer_gl.cpp @@ -758,7 +758,7 @@ namespace bgfx if (GL_RGBA == fmt) { - imageSwizzleBGRA8(data, width, height); + imageSwizzleBgra8(width, height, data, data); } g_callback->screenShot(_filePath @@ -1455,7 +1455,7 @@ namespace bgfx if (swizzle) { - imageSwizzleBGRA8(bits, width, height); + imageSwizzleBgra8(width, height, bits, bits); } texImage(target+side @@ -1604,7 +1604,7 @@ namespace bgfx if (NULL != data && swizzle) { - imageSwizzleBGRA8(data, width, height); + imageSwizzleBgra8(width, height, data, data); } texImage(target+side