diff --git a/src/amalgamated.cpp b/src/amalgamated.cpp index 2a88eeab..042475d8 100644 --- a/src/amalgamated.cpp +++ b/src/amalgamated.cpp @@ -17,4 +17,7 @@ #include "renderer_null.cpp" #include "renderer_gl.cpp" #include "renderer_vk.cpp" +#include "shader_dxbc.cpp" +#include "shader_dx9bc.cpp" +#include "shader_spirv.cpp" #include "vertexdecl.cpp" diff --git a/src/bgfx_p.h b/src/bgfx_p.h index 76237360..0407cf77 100644 --- a/src/bgfx_p.h +++ b/src/bgfx_p.h @@ -140,11 +140,13 @@ namespace bgfx # include # include # include +# include namespace stl = tinystl; #else # include # include # include +# include namespace stl { using namespace std; diff --git a/src/renderer_d3d12.cpp b/src/renderer_d3d12.cpp index 03736ea4..6d91cfc7 100644 --- a/src/renderer_d3d12.cpp +++ b/src/renderer_d3d12.cpp @@ -6,10 +6,3998 @@ #include "bgfx_p.h" #if BGFX_CONFIG_RENDERER_DIRECT3D12 -# include "../../bgfx-ext/src/renderer_d3d12.cpp" -#else +# include "renderer_d3d12.h" + +# if !USE_D3D12_DYNAMIC_LIB +# pragma comment(lib, "D3D12.lib") +# endif // !USE_D3D12_DYNAMIC_LIB namespace bgfx { namespace d3d12 +{ + static wchar_t s_viewNameW[BGFX_CONFIG_MAX_VIEWS][256]; + + struct PrimInfo + { + D3D_PRIMITIVE_TOPOLOGY m_toplogy; + D3D12_PRIMITIVE_TOPOLOGY_TYPE m_topologyType; + uint32_t m_min; + uint32_t m_div; + uint32_t m_sub; + }; + + static const PrimInfo s_primInfo[] = + { + { D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST, D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE, 3, 3, 0 }, + { D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE, 3, 1, 2 }, + { D3D_PRIMITIVE_TOPOLOGY_LINELIST, D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE, 2, 2, 0 }, + { D3D_PRIMITIVE_TOPOLOGY_POINTLIST, D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT, 1, 1, 0 }, + { D3D_PRIMITIVE_TOPOLOGY_UNDEFINED, D3D12_PRIMITIVE_TOPOLOGY_TYPE_UNDEFINED, 0, 0, 0 }, + }; + + static const char* s_primName[] = + { + "TriList", + "TriStrip", + "Line", + "Point", + }; + BX_STATIC_ASSERT(BX_COUNTOF(s_primInfo) == BX_COUNTOF(s_primName)+1); + + static const uint32_t s_checkMsaa[] = + { + 0, + 2, + 4, + 8, + 16, + }; + + static DXGI_SAMPLE_DESC s_msaa[] = + { + { 1, 0 }, + { 2, 0 }, + { 4, 0 }, + { 8, 0 }, + { 16, 0 }, + }; + + static const D3D12_BLEND s_blendFactor[][2] = + { + { (D3D12_BLEND)0, (D3D12_BLEND)0 }, // ignored + { D3D12_BLEND_ZERO, D3D12_BLEND_ZERO }, // ZERO + { D3D12_BLEND_ONE, D3D12_BLEND_ONE }, // ONE + { D3D12_BLEND_SRC_COLOR, D3D12_BLEND_SRC_ALPHA }, // SRC_COLOR + { D3D12_BLEND_INV_SRC_COLOR, D3D12_BLEND_INV_SRC_ALPHA }, // INV_SRC_COLOR + { D3D12_BLEND_SRC_ALPHA, D3D12_BLEND_SRC_ALPHA }, // SRC_ALPHA + { D3D12_BLEND_INV_SRC_ALPHA, D3D12_BLEND_INV_SRC_ALPHA }, // INV_SRC_ALPHA + { D3D12_BLEND_DEST_ALPHA, D3D12_BLEND_DEST_ALPHA }, // DST_ALPHA + { D3D12_BLEND_INV_DEST_ALPHA, D3D12_BLEND_INV_DEST_ALPHA }, // INV_DST_ALPHA + { D3D12_BLEND_DEST_COLOR, D3D12_BLEND_DEST_ALPHA }, // DST_COLOR + { D3D12_BLEND_INV_DEST_COLOR, D3D12_BLEND_INV_DEST_ALPHA }, // INV_DST_COLOR + { D3D12_BLEND_SRC_ALPHA_SAT, D3D12_BLEND_ONE }, // SRC_ALPHA_SAT + { D3D12_BLEND_BLEND_FACTOR, D3D12_BLEND_BLEND_FACTOR }, // FACTOR + { D3D12_BLEND_INV_BLEND_FACTOR, D3D12_BLEND_INV_BLEND_FACTOR }, // INV_FACTOR + }; + + static const D3D12_BLEND_OP s_blendEquation[] = + { + D3D12_BLEND_OP_ADD, + D3D12_BLEND_OP_SUBTRACT, + D3D12_BLEND_OP_REV_SUBTRACT, + D3D12_BLEND_OP_MIN, + D3D12_BLEND_OP_MAX, + }; + + static const D3D12_COMPARISON_FUNC s_cmpFunc[] = + { + D3D12_COMPARISON_FUNC(0), // ignored + D3D12_COMPARISON_FUNC_LESS, + D3D12_COMPARISON_FUNC_LESS_EQUAL, + D3D12_COMPARISON_FUNC_EQUAL, + D3D12_COMPARISON_FUNC_GREATER_EQUAL, + D3D12_COMPARISON_FUNC_GREATER, + D3D12_COMPARISON_FUNC_NOT_EQUAL, + D3D12_COMPARISON_FUNC_NEVER, + D3D12_COMPARISON_FUNC_ALWAYS, + }; + + static const D3D12_STENCIL_OP s_stencilOp[] = + { + D3D12_STENCIL_OP_ZERO, + D3D12_STENCIL_OP_KEEP, + D3D12_STENCIL_OP_REPLACE, + D3D12_STENCIL_OP_INCR, + D3D12_STENCIL_OP_INCR_SAT, + D3D12_STENCIL_OP_DECR, + D3D12_STENCIL_OP_DECR_SAT, + D3D12_STENCIL_OP_INVERT, + }; + + static const D3D12_CULL_MODE s_cullMode[] = + { + D3D12_CULL_MODE_NONE, + D3D12_CULL_MODE_FRONT, + D3D12_CULL_MODE_BACK, + }; + + static const D3D12_TEXTURE_ADDRESS_MODE s_textureAddress[] = + { + D3D12_TEXTURE_ADDRESS_MODE_WRAP, + D3D12_TEXTURE_ADDRESS_MODE_MIRROR, + D3D12_TEXTURE_ADDRESS_MODE_CLAMP, + }; + + /* + * D3D11_FILTER_MIN_MAG_MIP_POINT = 0x00, + * D3D11_FILTER_MIN_MAG_POINT_MIP_LINEAR = 0x01, + * D3D11_FILTER_MIN_POINT_MAG_LINEAR_MIP_POINT = 0x04, + * D3D11_FILTER_MIN_POINT_MAG_MIP_LINEAR = 0x05, + * D3D11_FILTER_MIN_LINEAR_MAG_MIP_POINT = 0x10, + * D3D11_FILTER_MIN_LINEAR_MAG_POINT_MIP_LINEAR = 0x11, + * D3D11_FILTER_MIN_MAG_LINEAR_MIP_POINT = 0x14, + * D3D11_FILTER_MIN_MAG_MIP_LINEAR = 0x15, + * D3D11_FILTER_ANISOTROPIC = 0x55, + * + * D3D11_COMPARISON_FILTERING_BIT = 0x80, + * D3D11_ANISOTROPIC_FILTERING_BIT = 0x40, + * + * According to D3D11_FILTER enum bits for mip, mag and mip are: + * 0x10 // MIN_LINEAR + * 0x04 // MAG_LINEAR + * 0x01 // MIP_LINEAR + */ + + static const uint8_t s_textureFilter[3][3] = + { + { + 0x10, // min linear + 0x00, // min point + 0x55, // anisotropic + }, + { + 0x04, // mag linear + 0x00, // mag point + 0x55, // anisotropic + }, + { + 0x01, // mip linear + 0x00, // mip point + 0x55, // anisotropic + }, + }; + + struct TextureFormatInfo + { + DXGI_FORMAT m_fmt; + DXGI_FORMAT m_fmtSrv; + DXGI_FORMAT m_fmtDsv; + }; + + static const TextureFormatInfo s_textureFormat[] = + { + { DXGI_FORMAT_BC1_UNORM, DXGI_FORMAT_BC1_UNORM, DXGI_FORMAT_UNKNOWN }, // BC1 + { DXGI_FORMAT_BC2_UNORM, DXGI_FORMAT_BC2_UNORM, DXGI_FORMAT_UNKNOWN }, // BC2 + { DXGI_FORMAT_BC3_UNORM, DXGI_FORMAT_BC3_UNORM, DXGI_FORMAT_UNKNOWN }, // BC3 + { DXGI_FORMAT_BC4_UNORM, DXGI_FORMAT_BC4_UNORM, DXGI_FORMAT_UNKNOWN }, // BC4 + { DXGI_FORMAT_BC5_UNORM, DXGI_FORMAT_BC5_UNORM, DXGI_FORMAT_UNKNOWN }, // BC5 + { DXGI_FORMAT_BC6H_SF16, DXGI_FORMAT_BC6H_SF16, DXGI_FORMAT_UNKNOWN }, // BC6H + { DXGI_FORMAT_BC7_UNORM, DXGI_FORMAT_BC7_UNORM, DXGI_FORMAT_UNKNOWN }, // BC7 + { DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN }, // ETC1 + { DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN }, // ETC2 + { DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN }, // ETC2A + { DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN }, // ETC2A1 + { DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN }, // PTC12 + { DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN }, // PTC14 + { DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN }, // PTC12A + { DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN }, // PTC14A + { DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN }, // PTC22 + { DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN }, // PTC24 + { DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN }, // Unknown + { DXGI_FORMAT_R1_UNORM, DXGI_FORMAT_R1_UNORM, DXGI_FORMAT_UNKNOWN }, // R1 + { DXGI_FORMAT_R8_UNORM, DXGI_FORMAT_R8_UNORM, DXGI_FORMAT_UNKNOWN }, // R8 + { DXGI_FORMAT_R16_UNORM, DXGI_FORMAT_R16_UNORM, DXGI_FORMAT_UNKNOWN }, // R16 + { DXGI_FORMAT_R16_FLOAT, DXGI_FORMAT_R16_FLOAT, DXGI_FORMAT_UNKNOWN }, // R16F + { DXGI_FORMAT_R32_UINT, DXGI_FORMAT_R32_UINT, DXGI_FORMAT_UNKNOWN }, // R32 + { DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_UNKNOWN }, // R32F + { DXGI_FORMAT_R8G8_UNORM, DXGI_FORMAT_R8G8_UNORM, DXGI_FORMAT_UNKNOWN }, // RG8 + { DXGI_FORMAT_R16G16_UNORM, DXGI_FORMAT_R16G16_UNORM, DXGI_FORMAT_UNKNOWN }, // RG16 + { DXGI_FORMAT_R16G16_FLOAT, DXGI_FORMAT_R16G16_FLOAT, DXGI_FORMAT_UNKNOWN }, // RG16F + { DXGI_FORMAT_R32G32_UINT, DXGI_FORMAT_R32G32_UINT, DXGI_FORMAT_UNKNOWN }, // RG32 + { DXGI_FORMAT_R32G32_FLOAT, DXGI_FORMAT_R32G32_FLOAT, DXGI_FORMAT_UNKNOWN }, // RG32F + { DXGI_FORMAT_B8G8R8A8_UNORM, DXGI_FORMAT_B8G8R8A8_UNORM, DXGI_FORMAT_UNKNOWN }, // BGRA8 + { DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_UNKNOWN }, // RGBA8 + { DXGI_FORMAT_R16G16B16A16_UNORM, DXGI_FORMAT_R16G16B16A16_UNORM, DXGI_FORMAT_UNKNOWN }, // RGBA16 + { DXGI_FORMAT_R16G16B16A16_FLOAT, DXGI_FORMAT_R16G16B16A16_FLOAT, DXGI_FORMAT_UNKNOWN }, // RGBA16F + { DXGI_FORMAT_R32G32B32A32_UINT, DXGI_FORMAT_R32G32B32A32_UINT, DXGI_FORMAT_UNKNOWN }, // RGBA32 + { DXGI_FORMAT_R32G32B32A32_FLOAT, DXGI_FORMAT_R32G32B32A32_FLOAT, DXGI_FORMAT_UNKNOWN }, // RGBA32F + { DXGI_FORMAT_B5G6R5_UNORM, DXGI_FORMAT_B5G6R5_UNORM, DXGI_FORMAT_UNKNOWN }, // R5G6B5 + { DXGI_FORMAT_B4G4R4A4_UNORM, DXGI_FORMAT_B4G4R4A4_UNORM, DXGI_FORMAT_UNKNOWN }, // RGBA4 + { DXGI_FORMAT_B5G5R5A1_UNORM, DXGI_FORMAT_B5G5R5A1_UNORM, DXGI_FORMAT_UNKNOWN }, // RGB5A1 + { DXGI_FORMAT_R10G10B10A2_UNORM, DXGI_FORMAT_R10G10B10A2_UNORM, DXGI_FORMAT_UNKNOWN }, // RGB10A2 + { DXGI_FORMAT_R11G11B10_FLOAT, DXGI_FORMAT_R11G11B10_FLOAT, DXGI_FORMAT_UNKNOWN }, // R11G11B10F + { DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN }, // UnknownDepth + { DXGI_FORMAT_R16_TYPELESS, DXGI_FORMAT_R16_UNORM, DXGI_FORMAT_D16_UNORM }, // D16 + { DXGI_FORMAT_R24G8_TYPELESS, DXGI_FORMAT_R24_UNORM_X8_TYPELESS, DXGI_FORMAT_D24_UNORM_S8_UINT }, // D24 + { DXGI_FORMAT_R24G8_TYPELESS, DXGI_FORMAT_R24_UNORM_X8_TYPELESS, DXGI_FORMAT_D24_UNORM_S8_UINT }, // D24S8 + { DXGI_FORMAT_R24G8_TYPELESS, DXGI_FORMAT_R24_UNORM_X8_TYPELESS, DXGI_FORMAT_D24_UNORM_S8_UINT }, // D32 + { DXGI_FORMAT_R32_TYPELESS, DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_D32_FLOAT }, // D16F + { DXGI_FORMAT_R32_TYPELESS, DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_D32_FLOAT }, // D24F + { DXGI_FORMAT_R32_TYPELESS, DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_D32_FLOAT }, // D32F + { DXGI_FORMAT_R24G8_TYPELESS, DXGI_FORMAT_R24_UNORM_X8_TYPELESS, DXGI_FORMAT_D24_UNORM_S8_UINT }, // D0S8 + }; + BX_STATIC_ASSERT(TextureFormat::Count == BX_COUNTOF(s_textureFormat) ); + + static const D3D12_INPUT_ELEMENT_DESC s_attrib[] = + { + { "POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, D3D12_APPEND_ALIGNED_ELEMENT, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 }, + { "NORMAL", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, D3D12_APPEND_ALIGNED_ELEMENT, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 }, + { "TANGENT", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, D3D12_APPEND_ALIGNED_ELEMENT, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 }, + { "BITANGENT", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, D3D12_APPEND_ALIGNED_ELEMENT, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 }, + { "COLOR", 0, DXGI_FORMAT_R8G8B8A8_UINT, 0, D3D12_APPEND_ALIGNED_ELEMENT, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 }, + { "COLOR", 1, DXGI_FORMAT_R8G8B8A8_UINT, 0, D3D12_APPEND_ALIGNED_ELEMENT, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 }, + { "BLENDINDICES", 0, DXGI_FORMAT_R8G8B8A8_UINT, 0, D3D12_APPEND_ALIGNED_ELEMENT, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 }, + { "BLENDWEIGHT", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, D3D12_APPEND_ALIGNED_ELEMENT, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 }, + { "TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, D3D12_APPEND_ALIGNED_ELEMENT, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 }, + { "TEXCOORD", 1, DXGI_FORMAT_R32G32_FLOAT, 0, D3D12_APPEND_ALIGNED_ELEMENT, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 }, + { "TEXCOORD", 2, DXGI_FORMAT_R32G32_FLOAT, 0, D3D12_APPEND_ALIGNED_ELEMENT, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 }, + { "TEXCOORD", 3, DXGI_FORMAT_R32G32_FLOAT, 0, D3D12_APPEND_ALIGNED_ELEMENT, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 }, + { "TEXCOORD", 4, DXGI_FORMAT_R32G32_FLOAT, 0, D3D12_APPEND_ALIGNED_ELEMENT, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 }, + { "TEXCOORD", 5, DXGI_FORMAT_R32G32_FLOAT, 0, D3D12_APPEND_ALIGNED_ELEMENT, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 }, + { "TEXCOORD", 6, DXGI_FORMAT_R32G32_FLOAT, 0, D3D12_APPEND_ALIGNED_ELEMENT, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 }, + { "TEXCOORD", 7, DXGI_FORMAT_R32G32_FLOAT, 0, D3D12_APPEND_ALIGNED_ELEMENT, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 }, + }; + BX_STATIC_ASSERT(Attrib::Count == BX_COUNTOF(s_attrib) ); + + static const DXGI_FORMAT s_attribType[][4][2] = + { + { + { DXGI_FORMAT_R8_UINT, DXGI_FORMAT_R8_UNORM }, + { DXGI_FORMAT_R8G8_UINT, DXGI_FORMAT_R8G8_UNORM }, + { DXGI_FORMAT_R8G8B8A8_UINT, DXGI_FORMAT_R8G8B8A8_UNORM }, + { DXGI_FORMAT_R8G8B8A8_UINT, DXGI_FORMAT_R8G8B8A8_UNORM }, + }, + { + { DXGI_FORMAT_R16_SINT, DXGI_FORMAT_R16_SNORM }, + { DXGI_FORMAT_R16G16_SINT, DXGI_FORMAT_R16G16_SNORM }, + { DXGI_FORMAT_R16G16B16A16_SINT, DXGI_FORMAT_R16G16B16A16_SNORM }, + { DXGI_FORMAT_R16G16B16A16_SINT, DXGI_FORMAT_R16G16B16A16_SNORM }, + }, + { + { DXGI_FORMAT_R16_FLOAT, DXGI_FORMAT_R16_FLOAT }, + { DXGI_FORMAT_R16G16_FLOAT, DXGI_FORMAT_R16G16_FLOAT }, + { DXGI_FORMAT_R16G16B16A16_FLOAT, DXGI_FORMAT_R16G16B16A16_FLOAT }, + { DXGI_FORMAT_R16G16B16A16_FLOAT, DXGI_FORMAT_R16G16B16A16_FLOAT }, + }, + { + { DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32_FLOAT }, + { DXGI_FORMAT_R32G32_FLOAT, DXGI_FORMAT_R32G32_FLOAT }, + { DXGI_FORMAT_R32G32B32_FLOAT, DXGI_FORMAT_R32G32B32_FLOAT }, + { DXGI_FORMAT_R32G32B32A32_FLOAT, DXGI_FORMAT_R32G32B32A32_FLOAT }, + }, + }; + BX_STATIC_ASSERT(AttribType::Count == BX_COUNTOF(s_attribType) ); + + static D3D12_INPUT_ELEMENT_DESC* fillVertexDecl(D3D12_INPUT_ELEMENT_DESC* _out, const VertexDecl& _decl) + { + D3D12_INPUT_ELEMENT_DESC* elem = _out; + + for (uint32_t attr = 0; attr < Attrib::Count; ++attr) + { + if (0xff != _decl.m_attributes[attr]) + { + memcpy(elem, &s_attrib[attr], sizeof(D3D12_INPUT_ELEMENT_DESC) ); + + if (0 == _decl.m_attributes[attr]) + { + elem->AlignedByteOffset = 0; + } + else + { + uint8_t num; + AttribType::Enum type; + bool normalized; + bool asInt; + _decl.decode(Attrib::Enum(attr), num, type, normalized, asInt); + elem->Format = s_attribType[type][num-1][normalized]; + elem->AlignedByteOffset = _decl.m_offset[attr]; + } + + ++elem; + } + } + + return elem; + } + + void setResourceBarrier(ID3D12GraphicsCommandList* _commandList, ID3D12Resource* _resource, D3D12_RESOURCE_STATES _stateBefore, D3D12_RESOURCE_STATES _stateAfter) + { + D3D12_RESOURCE_BARRIER barrier; + barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; + barrier.Transition.pResource = _resource; + barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; + barrier.Transition.StateBefore = _stateBefore; + barrier.Transition.StateAfter = _stateAfter; + _commandList->ResourceBarrier(1, &barrier); + } + + struct HeapProperty + { + enum Enum + { + Default, + Upload, + + Count + }; + + D3D12_HEAP_PROPERTIES m_properties; + D3D12_RESOURCE_STATES m_state; + }; + + static const HeapProperty s_heapProperties[] = + { + { { D3D12_HEAP_TYPE_DEFAULT, D3D12_CPU_PAGE_PROPERTY_UNKNOWN, D3D12_MEMORY_POOL_UNKNOWN, 1, 1 }, D3D12_RESOURCE_STATE_COMMON }, + { { D3D12_HEAP_TYPE_UPLOAD, D3D12_CPU_PAGE_PROPERTY_UNKNOWN, D3D12_MEMORY_POOL_UNKNOWN, 1, 1 }, D3D12_RESOURCE_STATE_GENERIC_READ }, + }; + BX_STATIC_ASSERT(BX_COUNTOF(s_heapProperties) == HeapProperty::Count); + + ID3D12Resource* createCommittedResource(ID3D12Device* _device, HeapProperty::Enum _heapProperty, D3D12_RESOURCE_DESC* _resourceDesc, D3D12_CLEAR_VALUE* _clearValue) + { + const HeapProperty& heapProperty = s_heapProperties[_heapProperty]; + ID3D12Resource* resource; + DX_CHECK(_device->CreateCommittedResource(&heapProperty.m_properties + , D3D12_HEAP_FLAG_NONE + , _resourceDesc + , heapProperty.m_state + , _clearValue + , __uuidof(ID3D12Resource) + , (void**)&resource + ) ); + + return resource; + } + + ID3D12Resource* createCommittedResource(ID3D12Device* _device, HeapProperty::Enum _heapProperty, uint64_t _size, D3D12_RESOURCE_FLAGS _flags = D3D12_RESOURCE_FLAG_NONE) + { + D3D12_RESOURCE_DESC resourceDesc; + resourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + resourceDesc.Alignment = 0; + resourceDesc.Width = _size; + resourceDesc.Height = 1; + resourceDesc.DepthOrArraySize = 1; + resourceDesc.MipLevels = 1; + resourceDesc.Format = DXGI_FORMAT_UNKNOWN; + resourceDesc.SampleDesc.Count = 1; + resourceDesc.SampleDesc.Quality = 0; + resourceDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + resourceDesc.Flags = _flags; + + return createCommittedResource(_device, _heapProperty, &resourceDesc, NULL); + } + + BX_NO_INLINE void setDebugObjectName(ID3D12Object* _object, const char* _format, ...) + { + if (BX_ENABLED(BGFX_CONFIG_DEBUG_OBJECT_NAME) ) + { + char temp[2048]; + va_list argList; + va_start(argList, _format); + int size = bx::uint32_min(sizeof(temp)-1, vsnprintf(temp, sizeof(temp), _format, argList) ); + va_end(argList); + temp[size] = '\0'; + + wchar_t* wtemp = (wchar_t*)alloca( (size+1)*2); + mbstowcs(wtemp, temp, size+1); + _object->SetName(wtemp); + } + } + +#if USE_D3D12_DYNAMIC_LIB + static PFN_D3D12_CREATE_DEVICE D3D12CreateDevice; + static PFN_D3D12_GET_DEBUG_INTERFACE D3D12GetDebugInterface; + static PFN_D3D12_SERIALIZE_ROOT_SIGNATURE D3D12SerializeRootSignature; + static PFN_CREATE_DXGI_FACTORY CreateDXGIFactory1; +#endif // USE_D3D12_DYNAMIC_LIB + + struct RendererContextD3D12 : public RendererContextI + { + RendererContextD3D12() + : m_wireframe(false) + , m_flags(BGFX_RESET_NONE) + , m_fsChanges(0) + , m_vsChanges(0) + , m_frame(0) + , m_backBufferColorIdx(0) + , m_rtMsaa(false) + { + } + + void init() + { + m_fbh.idx = invalidHandle; + memset(m_uniforms, 0, sizeof(m_uniforms) ); + memset(&m_resolution, 0, sizeof(m_resolution) ); + +#if USE_D3D12_DYNAMIC_LIB + m_d3d12dll = bx::dlopen("d3d12.dll"); + BGFX_FATAL(NULL != m_d3d12dll, Fatal::UnableToInitialize, "Failed to load d3d12.dll."); + + D3D12CreateDevice = (PFN_D3D12_CREATE_DEVICE)bx::dlsym(m_d3d12dll, "D3D12CreateDevice"); + BGFX_FATAL(NULL != D3D12CreateDevice, Fatal::UnableToInitialize, "Function D3D12CreateDevice not found."); + + D3D12GetDebugInterface = (PFN_D3D12_GET_DEBUG_INTERFACE)bx::dlsym(m_d3d12dll, "D3D12GetDebugInterface"); + BGFX_FATAL(NULL != D3D12GetDebugInterface, Fatal::UnableToInitialize, "Function D3D12GetDebugInterface not found."); + + D3D12SerializeRootSignature = (PFN_D3D12_SERIALIZE_ROOT_SIGNATURE)bx::dlsym(m_d3d12dll, "D3D12SerializeRootSignature"); + BGFX_FATAL(NULL != D3D12SerializeRootSignature, Fatal::UnableToInitialize, "Function D3D12SerializeRootSignature not found."); + + m_dxgidll = bx::dlopen("dxgi.dll"); + BGFX_FATAL(NULL != m_dxgidll, Fatal::UnableToInitialize, "Failed to load dxgi.dll."); + + CreateDXGIFactory1 = (PFN_CREATE_DXGI_FACTORY)bx::dlsym(m_dxgidll, "CreateDXGIFactory1"); + BGFX_FATAL(NULL != CreateDXGIFactory1, Fatal::UnableToInitialize, "Function CreateDXGIFactory1 not found."); +#endif // USE_D3D12_DYNAMIC_LIB + + HRESULT hr; + + hr = CreateDXGIFactory1(__uuidof(IDXGIFactory), (void**)&m_factory); + BGFX_FATAL(SUCCEEDED(hr), Fatal::UnableToInitialize, "Unable to create DXGI factory."); + + m_adapter = NULL; + m_driverType = D3D_DRIVER_TYPE_HARDWARE; + + IDXGIAdapter* adapter; + for (uint32_t ii = 0; DXGI_ERROR_NOT_FOUND != m_factory->EnumAdapters(ii, &adapter); ++ii) + { + DXGI_ADAPTER_DESC desc; + hr = adapter->GetDesc(&desc); + if (SUCCEEDED(hr) ) + { + BX_TRACE("Adapter #%d", ii); + + char description[BX_COUNTOF(desc.Description)]; + wcstombs(description, desc.Description, BX_COUNTOF(desc.Description) ); + BX_TRACE("\tDescription: %s", description); + BX_TRACE("\tVendorId: 0x%08x, DeviceId: 0x%08x, SubSysId: 0x%08x, Revision: 0x%08x" + , desc.VendorId + , desc.DeviceId + , desc.SubSysId + , desc.Revision + ); + BX_TRACE("\tMemory: %" PRIi64 " (video), %" PRIi64 " (system), %" PRIi64 " (shared)" + , desc.DedicatedVideoMemory + , desc.DedicatedSystemMemory + , desc.SharedSystemMemory + ); + + g_caps.gpu[ii].vendorId = (uint16_t)desc.VendorId; + g_caps.gpu[ii].deviceId = (uint16_t)desc.DeviceId; + ++g_caps.numGPUs; + + if ( (BGFX_PCI_ID_NONE != g_caps.vendorId || 0 != g_caps.deviceId) + && (BGFX_PCI_ID_NONE == g_caps.vendorId || desc.VendorId == g_caps.vendorId) + && (0 == g_caps.deviceId || desc.DeviceId == g_caps.deviceId) ) + { + m_adapter = adapter; + m_adapter->AddRef(); + m_driverType = D3D_DRIVER_TYPE_UNKNOWN; + } + + if (BX_ENABLED(BGFX_CONFIG_DEBUG_PERFHUD) + && 0 != strstr(description, "PerfHUD") ) + { + m_adapter = adapter; + m_driverType = D3D_DRIVER_TYPE_REFERENCE; + } + } + + DX_RELEASE(adapter, adapter == m_adapter ? 1 : 0); + } + + if (BX_ENABLED(BGFX_CONFIG_DEBUG) ) + { + ID3D12Debug* debug; + hr = D3D12GetDebugInterface(__uuidof(ID3D12Debug), (void**)&debug); + + if (SUCCEEDED(hr) ) + { + debug->EnableDebugLayer(); + } + } + + hr = D3D12CreateDevice(m_adapter + , D3D_FEATURE_LEVEL_11_0 + , __uuidof(ID3D12Device) + , (void**)&m_device + ); + BGFX_FATAL(SUCCEEDED(hr), Fatal::UnableToInitialize, "Unable to create Direct3D12 device."); + + if (NULL != m_adapter) + { + DX_RELEASE(m_adapter, 2); + } + + memset(&m_adapterDesc, 0, sizeof(m_adapterDesc) ); + LUID luid = m_device->GetAdapterLuid(); + for (uint32_t ii = 0; DXGI_ERROR_NOT_FOUND != m_factory->EnumAdapters(ii, &adapter); ++ii) + { + adapter->GetDesc(&m_adapterDesc); + if (m_adapterDesc.AdapterLuid.LowPart == luid.LowPart + && m_adapterDesc.AdapterLuid.HighPart == luid.HighPart) + { + break; + } + } + + g_caps.vendorId = (uint16_t)m_adapterDesc.VendorId; + g_caps.deviceId = (uint16_t)m_adapterDesc.DeviceId; + + m_architecture.NodeIndex = 0; + DX_CHECK(m_device->CheckFeatureSupport(D3D12_FEATURE_ARCHITECTURE, &m_architecture, sizeof(m_architecture) ) ); + BX_TRACE("GPU Architecture, TileBasedRenderer %d, UMA %d, CacheCoherentUMA %d" + , m_architecture.TileBasedRenderer + , m_architecture.UMA + , m_architecture.CacheCoherentUMA + ); + + DX_CHECK(m_device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS, &m_options, sizeof(m_options) ) ); + + m_cmd.init(m_device); + + m_scd.BufferDesc.Width = BGFX_DEFAULT_WIDTH; + m_scd.BufferDesc.Height = BGFX_DEFAULT_HEIGHT; + m_scd.BufferDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + m_scd.BufferDesc.Scaling = DXGI_MODE_SCALING_STRETCHED; + m_scd.BufferDesc.ScanlineOrdering = DXGI_MODE_SCANLINE_ORDER_UNSPECIFIED; + m_scd.BufferDesc.RefreshRate.Numerator = 60; + m_scd.BufferDesc.RefreshRate.Denominator = 1; + m_scd.SampleDesc.Count = 1; + m_scd.SampleDesc.Quality = 0; + m_scd.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; + m_scd.BufferCount = bx::uint32_min(BX_COUNTOF(m_backBufferColor), 4); + m_scd.OutputWindow = (HWND)g_platformData.nwh; + m_scd.Windowed = true; + m_scd.SwapEffect = DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL; + m_scd.Flags = DXGI_SWAP_CHAIN_FLAG_ALLOW_MODE_SWITCH; + + BX_CHECK(m_scd.BufferCount <= BX_COUNTOF(m_backBufferColor), "Swap chain buffer count %d (max %d)." + , m_scd.BufferCount + , BX_COUNTOF(m_backBufferColor) + ); + hr = m_factory->CreateSwapChain(m_cmd.m_commandQueue + , &m_scd + , &m_swapChain + ); + BGFX_FATAL(SUCCEEDED(hr), Fatal::UnableToInitialize, "Failed to create swap chain."); + m_resolution.m_width = BGFX_DEFAULT_WIDTH; + m_resolution.m_height = BGFX_DEFAULT_HEIGHT; + + DX_CHECK(m_factory->MakeWindowAssociation( (HWND)g_platformData.nwh + , 0 + | DXGI_MWA_NO_WINDOW_CHANGES + | DXGI_MWA_NO_ALT_ENTER + ) ); + + m_numWindows = 1; + + if (BX_ENABLED(BGFX_CONFIG_DEBUG) ) + { + hr = m_device->QueryInterface(__uuidof(ID3D12InfoQueue), (void**)&m_infoQueue); + + if (SUCCEEDED(hr) ) + { + m_infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_CORRUPTION, true); + m_infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_ERROR, false); + m_infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_WARNING, false); + + D3D12_INFO_QUEUE_FILTER filter; + memset(&filter, 0, sizeof(filter) ); + + D3D12_MESSAGE_CATEGORY catlist[] = + { + D3D12_MESSAGE_CATEGORY_STATE_SETTING, + D3D12_MESSAGE_CATEGORY_EXECUTION, + }; + filter.DenyList.NumCategories = BX_COUNTOF(catlist); + filter.DenyList.pCategoryList = catlist; + m_infoQueue->PushStorageFilter(&filter); + + DX_RELEASE(m_infoQueue, 19); + } + } + + D3D12_DESCRIPTOR_HEAP_DESC rtvDescHeap; + rtvDescHeap.NumDescriptors = 0 + + BX_COUNTOF(m_backBufferColor) + + BGFX_CONFIG_MAX_FRAME_BUFFERS*BGFX_CONFIG_MAX_FRAME_BUFFER_ATTACHMENTS + ; + rtvDescHeap.Type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV; + rtvDescHeap.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE; + rtvDescHeap.NodeMask = 0; + DX_CHECK(m_device->CreateDescriptorHeap(&rtvDescHeap + , __uuidof(ID3D12DescriptorHeap) + , (void**)&m_rtvDescriptorHeap + ) ); + + D3D12_DESCRIPTOR_HEAP_DESC dsvDescHeap; + dsvDescHeap.NumDescriptors = 0 + + 1 // reserved for depth backbuffer. + + BGFX_CONFIG_MAX_FRAME_BUFFERS + ; + dsvDescHeap.Type = D3D12_DESCRIPTOR_HEAP_TYPE_DSV; + dsvDescHeap.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE; + dsvDescHeap.NodeMask = 0; + DX_CHECK(m_device->CreateDescriptorHeap(&dsvDescHeap + , __uuidof(ID3D12DescriptorHeap) + , (void**)&m_dsvDescriptorHeap + ) ); + + for (uint32_t ii = 0; ii < BX_COUNTOF(m_scratchBuffer); ++ii) + { + m_scratchBuffer[ii].create(BGFX_CONFIG_MAX_DRAW_CALLS*1024 + , BGFX_CONFIG_MAX_TEXTURES + BGFX_CONFIG_MAX_SHADERS + BGFX_CONFIG_MAX_DRAW_CALLS + ); + } + m_samplerAllocator.create(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER + , 1024 + , BGFX_CONFIG_MAX_TEXTURE_SAMPLERS + ); + + D3D12_DESCRIPTOR_RANGE descRange[] = + { + { D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, BGFX_CONFIG_MAX_TEXTURE_SAMPLERS, 0, 0, D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND }, + { D3D12_DESCRIPTOR_RANGE_TYPE_SRV, BGFX_CONFIG_MAX_TEXTURE_SAMPLERS, 0, 0, D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND }, + { D3D12_DESCRIPTOR_RANGE_TYPE_CBV, 1, 0, 0, D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND }, + { D3D12_DESCRIPTOR_RANGE_TYPE_UAV, BGFX_CONFIG_MAX_TEXTURE_SAMPLERS, 0, 0, D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND }, + }; + BX_STATIC_ASSERT(BX_COUNTOF(descRange) == Rdt::Count); + + D3D12_ROOT_PARAMETER rootParameter[] = + { + { D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE, { 1, &descRange[Rdt::Sampler] }, D3D12_SHADER_VISIBILITY_ALL }, + { D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE, { 1, &descRange[Rdt::SRV] }, D3D12_SHADER_VISIBILITY_ALL }, + { D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE, { 1, &descRange[Rdt::CBV] }, D3D12_SHADER_VISIBILITY_ALL }, +// { D3D12_ROOT_PARAMETER_TYPE_CBV, { 0, 0 }, D3D12_SHADER_VISIBILITY_ALL }, + { D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE, { 1, &descRange[Rdt::UAV] }, D3D12_SHADER_VISIBILITY_ALL }, + }; +// rootParameter[Rdt::CBV].Constants.ShaderRegister = 0; +// rootParameter[Rdt::CBV].Constants.RegisterSpace = 100; +// rootParameter[Rdt::CBV].Constants.Num32BitValues = 0; + + D3D12_ROOT_SIGNATURE_DESC descRootSignature; + descRootSignature.NumParameters = BX_COUNTOF(rootParameter); + descRootSignature.pParameters = rootParameter; + descRootSignature.NumStaticSamplers = 0; + descRootSignature.pStaticSamplers = NULL; + descRootSignature.Flags = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT; + + ID3DBlob* outBlob; + ID3DBlob* errorBlob; + DX_CHECK(D3D12SerializeRootSignature(&descRootSignature + , D3D_ROOT_SIGNATURE_VERSION_1 + , &outBlob + , &errorBlob + ) ); + + DX_CHECK(m_device->CreateRootSignature(0 + , outBlob->GetBufferPointer() + , outBlob->GetBufferSize() + , __uuidof(ID3D12RootSignature) + , (void**)&m_rootSignature + ) ); + + UniformHandle handle = BGFX_INVALID_HANDLE; + for (uint32_t ii = 0; ii < PredefinedUniform::Count; ++ii) + { + m_uniformReg.add(handle, getPredefinedUniformName(PredefinedUniform::Enum(ii) ), &m_predefinedUniforms[ii]); + } + + g_caps.supported |= ( 0 + | BGFX_CAPS_TEXTURE_3D + | BGFX_CAPS_TEXTURE_COMPARE_ALL + | BGFX_CAPS_INSTANCING + | BGFX_CAPS_VERTEX_ATTRIB_HALF + | BGFX_CAPS_FRAGMENT_DEPTH + | BGFX_CAPS_BLEND_INDEPENDENT + | BGFX_CAPS_COMPUTE + | BGFX_CAPS_FRAGMENT_ORDERING +// | BGFX_CAPS_SWAP_CHAIN + ); + g_caps.maxTextureSize = 16384; + g_caps.maxFBAttachments = bx::uint32_min(16, BGFX_CONFIG_MAX_FRAME_BUFFER_ATTACHMENTS); + + for (uint32_t ii = 0; ii < TextureFormat::Count; ++ii) + { + uint8_t support = BGFX_CAPS_FORMAT_TEXTURE_NONE; + + const DXGI_FORMAT fmt = isDepth(TextureFormat::Enum(ii) ) + ? s_textureFormat[ii].m_fmtDsv + : s_textureFormat[ii].m_fmt + ; + + if (DXGI_FORMAT_UNKNOWN != fmt) + { + D3D12_FEATURE_DATA_FORMAT_SUPPORT data; + data.Format = fmt; + hr = m_device->CheckFeatureSupport(D3D12_FEATURE_FORMAT_SUPPORT, &data, sizeof(data) ); + if (SUCCEEDED(hr) ) + { + support |= 0 != (data.Support1 & (0 + | D3D12_FORMAT_SUPPORT1_TEXTURE2D + | D3D12_FORMAT_SUPPORT1_TEXTURE3D + | D3D12_FORMAT_SUPPORT1_TEXTURECUBE + ) ) + ? BGFX_CAPS_FORMAT_TEXTURE_COLOR + : BGFX_CAPS_FORMAT_TEXTURE_NONE + ; + + support |= 0 != (data.Support1 & (0 + | D3D12_FORMAT_SUPPORT1_BUFFER + | D3D12_FORMAT_SUPPORT1_IA_VERTEX_BUFFER + | D3D12_FORMAT_SUPPORT1_IA_INDEX_BUFFER + ) ) + ? BGFX_CAPS_FORMAT_TEXTURE_VERTEX + : BGFX_CAPS_FORMAT_TEXTURE_NONE + ; + + support |= 0 != (data.Support1 & (0 + | D3D12_FORMAT_SUPPORT1_SHADER_LOAD + ) ) + ? BGFX_CAPS_FORMAT_TEXTURE_IMAGE + : BGFX_CAPS_FORMAT_TEXTURE_NONE + ; + + support |= 0 != (data.Support1 & (0 + | D3D12_FORMAT_SUPPORT1_RENDER_TARGET + | D3D12_FORMAT_SUPPORT1_DEPTH_STENCIL + ) ) + ? BGFX_CAPS_FORMAT_TEXTURE_FRAMEBUFFER + : BGFX_CAPS_FORMAT_TEXTURE_NONE + ; + } + else + { + BX_TRACE("CheckFeatureSupport failed with %x for format %s.", hr, getName(TextureFormat::Enum(ii) ) ); + } + } + + g_caps.formats[ii] = support; + } + + postReset(); + } + + ~RendererContextD3D12() + { + preReset(); + + m_samplerAllocator.destroy(); + + for (uint32_t ii = 0; ii < BX_COUNTOF(m_scratchBuffer); ++ii) + { + m_scratchBuffer[ii].destroy(); + } + + m_pipelineStateCache.invalidate(); + + for (uint32_t ii = 0; ii < BX_COUNTOF(m_indexBuffers); ++ii) + { + m_indexBuffers[ii].destroy(); + } + + for (uint32_t ii = 0; ii < BX_COUNTOF(m_vertexBuffers); ++ii) + { + m_vertexBuffers[ii].destroy(); + } + + for (uint32_t ii = 0; ii < BX_COUNTOF(m_shaders); ++ii) + { + m_shaders[ii].destroy(); + } + + for (uint32_t ii = 0; ii < BX_COUNTOF(m_textures); ++ii) + { + m_textures[ii].destroy(); + } + + DX_RELEASE(m_rtvDescriptorHeap, 0); + DX_RELEASE(m_dsvDescriptorHeap, 0); + + DX_RELEASE(m_rootSignature, 0); + + DX_RELEASE(m_swapChain, 0); + + m_cmd.shutdown(); + + DX_RELEASE(m_device, 0); + +#if USE_D3D12_DYNAMIC_LIB + bx::dlclose(m_d3d12dll); + bx::dlclose(m_dxgidll); +#endif // USE_D3D12_DYNAMIC_LIB + } + + RendererType::Enum getRendererType() const BX_OVERRIDE + { + return RendererType::Direct3D12; + } + + const char* getRendererName() const BX_OVERRIDE + { + return BGFX_RENDERER_DIRECT3D12_NAME; + } + + static bool isLost(HRESULT _hr) + { + return DXGI_ERROR_DEVICE_REMOVED == _hr + || DXGI_ERROR_DEVICE_HUNG == _hr + || DXGI_ERROR_DEVICE_RESET == _hr + || DXGI_ERROR_DRIVER_INTERNAL_ERROR == _hr + || DXGI_ERROR_NOT_CURRENTLY_AVAILABLE == _hr + ; + } + + void flip(HMD& /*_hmd*/) BX_OVERRIDE + { + if (NULL != m_swapChain) + { + HRESULT hr = 0; + uint32_t syncInterval = !!(m_flags & BGFX_RESET_VSYNC); + for (uint32_t ii = 1, num = m_numWindows; ii < num && SUCCEEDED(hr); ++ii) + { + hr = m_frameBuffers[m_windows[ii].idx].m_swapChain->Present(syncInterval, 0); + } + + if (SUCCEEDED(hr) ) + { + m_cmd.finish(m_backBufferColorFence[(m_backBufferColorIdx-1) % m_scd.BufferCount]); + hr = m_swapChain->Present(syncInterval, 0); + } + + if (FAILED(hr) + && isLost(hr) ) + { + ++m_lost; + BGFX_FATAL(10 > m_lost, bgfx::Fatal::DeviceLost, "Device is lost. FAILED 0x%08x", hr); + } + else + { + m_lost = 0; + } + } + } + + void createIndexBuffer(IndexBufferHandle _handle, Memory* _mem, uint16_t _flags) BX_OVERRIDE + { + m_indexBuffers[_handle.idx].create(_mem->size, _mem->data, _flags, false); + } + + void destroyIndexBuffer(IndexBufferHandle _handle) BX_OVERRIDE + { + m_indexBuffers[_handle.idx].destroy(); + } + + void createVertexDecl(VertexDeclHandle _handle, const VertexDecl& _decl) BX_OVERRIDE + { + VertexDecl& decl = m_vertexDecls[_handle.idx]; + memcpy(&decl, &_decl, sizeof(VertexDecl) ); + dump(decl); + } + + void destroyVertexDecl(VertexDeclHandle /*_handle*/) BX_OVERRIDE + { + } + + void createVertexBuffer(VertexBufferHandle _handle, Memory* _mem, VertexDeclHandle _declHandle, uint16_t _flags) BX_OVERRIDE + { + m_vertexBuffers[_handle.idx].create(_mem->size, _mem->data, _declHandle, _flags); + } + + void destroyVertexBuffer(VertexBufferHandle _handle) BX_OVERRIDE + { + m_vertexBuffers[_handle.idx].destroy(); + } + + void createDynamicIndexBuffer(IndexBufferHandle _handle, uint32_t _size, uint16_t _flags) BX_OVERRIDE + { + m_indexBuffers[_handle.idx].create(_size, NULL, _flags, false); + } + + void updateDynamicIndexBuffer(IndexBufferHandle _handle, uint32_t _offset, uint32_t _size, Memory* _mem) BX_OVERRIDE + { + m_indexBuffers[_handle.idx].update(m_commandList, _offset, bx::uint32_min(_size, _mem->size), _mem->data); + } + + void destroyDynamicIndexBuffer(IndexBufferHandle _handle) BX_OVERRIDE + { + m_indexBuffers[_handle.idx].destroy(); + } + + void createDynamicVertexBuffer(VertexBufferHandle _handle, uint32_t _size, uint16_t _flags) BX_OVERRIDE + { + VertexDeclHandle decl = BGFX_INVALID_HANDLE; + m_vertexBuffers[_handle.idx].create(_size, NULL, decl, _flags); + } + + void updateDynamicVertexBuffer(VertexBufferHandle _handle, uint32_t _offset, uint32_t _size, Memory* _mem) BX_OVERRIDE + { + m_vertexBuffers[_handle.idx].update(m_commandList, _offset, bx::uint32_min(_size, _mem->size), _mem->data); + } + + void destroyDynamicVertexBuffer(VertexBufferHandle _handle) BX_OVERRIDE + { + m_vertexBuffers[_handle.idx].destroy(); + } + + void createShader(ShaderHandle _handle, Memory* _mem) BX_OVERRIDE + { + m_shaders[_handle.idx].create(_mem); + } + + void destroyShader(ShaderHandle _handle) BX_OVERRIDE + { + m_shaders[_handle.idx].destroy(); + } + + void createProgram(ProgramHandle _handle, ShaderHandle _vsh, ShaderHandle _fsh) BX_OVERRIDE + { + m_program[_handle.idx].create(&m_shaders[_vsh.idx], isValid(_fsh) ? &m_shaders[_fsh.idx] : NULL); + } + + void destroyProgram(ProgramHandle _handle) BX_OVERRIDE + { + m_program[_handle.idx].destroy(); + } + + void createTexture(TextureHandle _handle, Memory* _mem, uint32_t _flags, uint8_t _skip) BX_OVERRIDE + { + m_textures[_handle.idx].create(_mem, _flags, _skip); + } + + void updateTextureBegin(TextureHandle /*_handle*/, uint8_t /*_side*/, uint8_t /*_mip*/) BX_OVERRIDE + { + } + + void updateTexture(TextureHandle _handle, uint8_t _side, uint8_t _mip, const Rect& _rect, uint16_t _z, uint16_t _depth, uint16_t _pitch, const Memory* _mem) BX_OVERRIDE + { + m_textures[_handle.idx].update(m_commandList, _side, _mip, _rect, _z, _depth, _pitch, _mem); + } + + void updateTextureEnd() BX_OVERRIDE + { + } + + void resizeTexture(TextureHandle _handle, uint16_t _width, uint16_t _height) BX_OVERRIDE + { + TextureD3D12& texture = m_textures[_handle.idx]; + + uint32_t size = sizeof(uint32_t) + sizeof(TextureCreate); + const Memory* mem = alloc(size); + + bx::StaticMemoryBlockWriter writer(mem->data, mem->size); + uint32_t magic = BGFX_CHUNK_MAGIC_TEX; + bx::write(&writer, magic); + + TextureCreate tc; + tc.m_flags = texture.m_flags; + tc.m_width = _width; + tc.m_height = _height; + tc.m_sides = 0; + tc.m_depth = 0; + tc.m_numMips = 1; + tc.m_format = texture.m_requestedFormat; + tc.m_cubeMap = false; + tc.m_mem = NULL; + bx::write(&writer, tc); + + texture.destroy(); + texture.create(mem, tc.m_flags, 0); + + release(mem); + } + + void destroyTexture(TextureHandle _handle) BX_OVERRIDE + { + m_textures[_handle.idx].destroy(); + } + + void createFrameBuffer(FrameBufferHandle _handle, uint8_t _num, const TextureHandle* _textureHandles) BX_OVERRIDE + { + m_frameBuffers[_handle.idx].create(_num, _textureHandles); + } + + void createFrameBuffer(FrameBufferHandle _handle, void* _nwh, uint32_t _width, uint32_t _height, TextureFormat::Enum _depthFormat) BX_OVERRIDE + { + uint16_t denseIdx = m_numWindows++; + m_windows[denseIdx] = _handle; + m_frameBuffers[_handle.idx].create(denseIdx, _nwh, _width, _height, _depthFormat); + } + + void destroyFrameBuffer(FrameBufferHandle _handle) BX_OVERRIDE + { + uint16_t denseIdx = m_frameBuffers[_handle.idx].destroy(); + if (UINT16_MAX != denseIdx) + { + --m_numWindows; + if (m_numWindows > 1) + { + FrameBufferHandle handle = m_windows[m_numWindows]; + m_windows[denseIdx] = handle; + m_frameBuffers[handle.idx].m_denseIdx = denseIdx; + } + } + } + + void createUniform(UniformHandle _handle, UniformType::Enum _type, uint16_t _num, const char* _name) BX_OVERRIDE + { + if (NULL != m_uniforms[_handle.idx]) + { + BX_FREE(g_allocator, m_uniforms[_handle.idx]); + } + + uint32_t size = BX_ALIGN_16(g_uniformTypeSize[_type] * _num); + void* data = BX_ALLOC(g_allocator, size); + memset(data, 0, size); + m_uniforms[_handle.idx] = data; + m_uniformReg.add(_handle, _name, data); + } + + void destroyUniform(UniformHandle _handle) BX_OVERRIDE + { + BX_FREE(g_allocator, m_uniforms[_handle.idx]); + m_uniforms[_handle.idx] = NULL; + } + + void saveScreenShot(const char* /*_filePath*/) BX_OVERRIDE + { + } + + void updateViewName(uint8_t /*_id*/, const char* /*_name*/) BX_OVERRIDE + { + } + + void updateUniform(uint16_t _loc, const void* _data, uint32_t _size) BX_OVERRIDE + { + memcpy(m_uniforms[_loc], _data, _size); + } + + void setMarker(const char* /*_marker*/, uint32_t /*_size*/) BX_OVERRIDE + { + } + + void submit(Frame* _render, ClearQuad& _clearQuad, TextVideoMemBlitter& _textVideoMemBlitter) BX_OVERRIDE; + + void blitSetup(TextVideoMemBlitter& _blitter) BX_OVERRIDE + { + const uint32_t width = m_scd.BufferDesc.Width; + const uint32_t height = m_scd.BufferDesc.Height; + + FrameBufferHandle fbh = BGFX_INVALID_HANDLE; + setFrameBuffer(fbh, false); + + D3D12_VIEWPORT vp; + vp.TopLeftX = 0; + vp.TopLeftY = 0; + vp.Width = (float)width; + vp.Height = (float)height; + vp.MinDepth = 0.0f; + vp.MaxDepth = 1.0f; + m_commandList->RSSetViewports(1, &vp); + + const uint64_t state = 0 + | BGFX_STATE_RGB_WRITE + | BGFX_STATE_ALPHA_WRITE + | BGFX_STATE_DEPTH_TEST_ALWAYS + ; + + m_currentProgram = &m_program[0]; + ID3D12PipelineState* pso = getPipelineState(state + , packStencil(BGFX_STENCIL_DEFAULT, BGFX_STENCIL_DEFAULT) + , _blitter.m_vb->decl.idx + , _blitter.m_program.idx + , 0 + ); + m_commandList->SetPipelineState(pso); + m_commandList->SetGraphicsRootSignature(m_rootSignature); + + float proj[16]; + bx::mtxOrtho(proj, 0.0f, (float)width, (float)height, 0.0f, 0.0f, 1000.0f); + + PredefinedUniform& predefined = m_program[_blitter.m_program.idx].m_predefined[0]; + uint8_t flags = predefined.m_type; + setShaderUniform(flags, predefined.m_loc, proj, 4); + + D3D12_GPU_DESCRIPTOR_HANDLE gpuHandle; + commitShaderConstants(gpuHandle); + + ID3D12DescriptorHeap* heaps[] = + { + m_samplerAllocator.getHeap(), + m_scratchBuffer[m_backBufferColorIdx].getHeap(), + }; + m_commandList->SetDescriptorHeaps(BX_COUNTOF(heaps), heaps); + m_commandList->SetGraphicsRootDescriptorTable(Rdt::CBV, gpuHandle); + + TextureD3D12& texture = m_textures[_blitter.m_texture.idx]; + uint32_t samplerFlags[BGFX_CONFIG_MAX_TEXTURE_SAMPLERS] = { texture.m_flags & BGFX_TEXTURE_SAMPLER_BITS_MASK }; + uint16_t samplerStateIdx = getSamplerState(samplerFlags); + m_commandList->SetGraphicsRootDescriptorTable(Rdt::Sampler, m_samplerAllocator.get(samplerStateIdx) ); + D3D12_GPU_DESCRIPTOR_HANDLE srvHandle; + m_scratchBuffer[m_backBufferColorIdx].alloc(srvHandle, texture); + m_commandList->SetGraphicsRootDescriptorTable(Rdt::SRV, srvHandle); + + VertexBufferD3D12& vb = m_vertexBuffers[_blitter.m_vb->handle.idx]; + const VertexDecl& vertexDecl = m_vertexDecls[_blitter.m_vb->decl.idx]; + D3D12_VERTEX_BUFFER_VIEW viewDesc; + viewDesc.BufferLocation = vb.m_ptr->GetGPUVirtualAddress(); + viewDesc.StrideInBytes = vertexDecl.m_stride; + viewDesc.SizeInBytes = vb.m_size; + m_commandList->IASetVertexBuffers(0, 1, &viewDesc); + + const BufferD3D12& ib = m_indexBuffers[_blitter.m_ib->handle.idx]; + D3D12_INDEX_BUFFER_VIEW ibv; + ibv.Format = DXGI_FORMAT_R16_UINT; + ibv.BufferLocation = ib.m_ptr->GetGPUVirtualAddress(); + ibv.SizeInBytes = ib.m_size; + m_commandList->IASetIndexBuffer(&ibv); + + m_commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + } + + void blitRender(TextVideoMemBlitter& _blitter, uint32_t _numIndices) BX_OVERRIDE + { + const uint32_t numVertices = _numIndices*4/6; + if (0 < numVertices) + { + m_indexBuffers [_blitter.m_ib->handle.idx].update(m_commandList, 0, _numIndices*2, _blitter.m_ib->data); + m_vertexBuffers[_blitter.m_vb->handle.idx].update(m_commandList, 0, numVertices*_blitter.m_decl.m_stride, _blitter.m_vb->data, true); + + m_commandList->DrawIndexedInstanced(_numIndices + , 1 + , 0 + , 0 + , 0 + ); + } + } + + void preReset() + { + finish(); + + for (uint32_t ii = 0, num = m_scd.BufferCount; ii < num; ++ii) + { + DX_RELEASE(m_backBufferColor[ii], num-1-ii); + } + DX_RELEASE(m_backBufferDepthStencil, 0); + + for (uint32_t ii = 0; ii < BX_COUNTOF(m_frameBuffers); ++ii) + { + m_frameBuffers[ii].preReset(); + } + + invalidateCache(); + +// capturePreReset(); + } + + void postReset() + { + uint32_t rtvDescriptorSize = m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_RTV); + + for (uint32_t ii = 0, num = m_scd.BufferCount; ii < num; ++ii) + { + D3D12_CPU_DESCRIPTOR_HANDLE handle = m_rtvDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); + handle.ptr += ii * rtvDescriptorSize; + DX_CHECK(m_swapChain->GetBuffer(ii + , __uuidof(ID3D12Resource) + , (void**)&m_backBufferColor[ii] + ) ); + m_device->CreateRenderTargetView(m_backBufferColor[ii], NULL, handle); + } + + D3D12_RESOURCE_DESC resourceDesc; + resourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; + resourceDesc.Alignment = 0; + resourceDesc.Width = bx::uint32_max(m_resolution.m_width, 1); + resourceDesc.Height = bx::uint32_max(m_resolution.m_height, 1); + resourceDesc.DepthOrArraySize = 1; + resourceDesc.MipLevels = 0; + resourceDesc.Format = DXGI_FORMAT_D24_UNORM_S8_UINT; + resourceDesc.SampleDesc.Count = 1; + resourceDesc.SampleDesc.Quality = 0; + resourceDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; + resourceDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL; + + D3D12_CLEAR_VALUE clearValue; + clearValue.Format = resourceDesc.Format; + clearValue.DepthStencil.Depth = 1.0f; + clearValue.DepthStencil.Stencil = 0; + + m_backBufferDepthStencil = createCommittedResource(m_device, HeapProperty::Default, &resourceDesc, &clearValue); + + D3D12_DEPTH_STENCIL_VIEW_DESC dsvDesc; + ZeroMemory(&dsvDesc, sizeof(dsvDesc) ); + dsvDesc.Format = resourceDesc.Format; + dsvDesc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2D; + dsvDesc.Flags = D3D12_DSV_FLAGS(0) +// | D3D12_DSV_FLAG_READ_ONLY_DEPTH +// | D3D12_DSV_FLAG_READ_ONLY_DEPTH + ; + + m_device->CreateDepthStencilView(m_backBufferDepthStencil + , &dsvDesc + , m_dsvDescriptorHeap->GetCPUDescriptorHandleForHeapStart() + ); + + for (uint32_t ii = 0; ii < BX_COUNTOF(m_frameBuffers); ++ii) + { + m_frameBuffers[ii].postReset(); + } + + m_commandList = m_cmd.alloc(); +// capturePostReset(); + } + + void invalidateCache() + { + m_pipelineStateCache.invalidate(); + m_samplerStateCache.invalidate(); + } + + void updateMsaa() + { + for (uint32_t ii = 1, last = 0; ii < BX_COUNTOF(s_msaa); ++ii) + { + uint32_t msaa = s_checkMsaa[ii]; + + D3D12_FEATURE_DATA_MULTISAMPLE_QUALITY_LEVELS data; + memset(&data, 0, sizeof(msaa) ); + data.Format = m_scd.BufferDesc.Format; + data.SampleCount = msaa; + data.Flags = D3D12_MULTISAMPLE_QUALITY_LEVELS_FLAG_NONE; + HRESULT hr = m_device->CheckFeatureSupport(D3D12_FEATURE_MULTISAMPLE_QUALITY_LEVELS, &data, sizeof(data) ); + +data.NumQualityLevels = 0; + + if (SUCCEEDED(hr) + && 0 < data.NumQualityLevels) + { + s_msaa[ii].Count = data.SampleCount; + s_msaa[ii].Quality = data.NumQualityLevels - 1; + last = ii; + } + else + { + s_msaa[ii] = s_msaa[last]; + } + } + } + + void updateResolution(const Resolution& _resolution) + { + if ( (uint32_t)m_scd.BufferDesc.Width != _resolution.m_width + || (uint32_t)m_scd.BufferDesc.Height != _resolution.m_height + || m_flags != _resolution.m_flags) + { + bool resize = (m_flags&BGFX_RESET_MSAA_MASK) == (_resolution.m_flags&BGFX_RESET_MSAA_MASK); + m_flags = _resolution.m_flags; + + m_textVideoMem.resize(false, _resolution.m_width, _resolution.m_height); + m_textVideoMem.clear(); + + m_resolution = _resolution; + + m_scd.BufferDesc.Width = _resolution.m_width; + m_scd.BufferDesc.Height = _resolution.m_height; + + preReset(); + + if (resize) + { + DX_CHECK(m_swapChain->ResizeBuffers(m_scd.BufferCount + , m_scd.BufferDesc.Width + , m_scd.BufferDesc.Height + , m_scd.BufferDesc.Format + , m_scd.Flags + ) ); + } + else + { + updateMsaa(); + m_scd.SampleDesc = s_msaa[(m_flags&BGFX_RESET_MSAA_MASK)>>BGFX_RESET_MSAA_SHIFT]; + + DX_RELEASE(m_swapChain, 0); + + HRESULT hr; + hr = m_factory->CreateSwapChain(m_cmd.m_commandQueue + , &m_scd + , &m_swapChain + ); + BGFX_FATAL(SUCCEEDED(hr), bgfx::Fatal::UnableToInitialize, "Failed to create swap chain."); + } + + postReset(); + } + } + + void setShaderUniform(uint8_t _flags, uint16_t _regIndex, const void* _val, uint16_t _numRegs) + { + if (_flags&BGFX_UNIFORM_FRAGMENTBIT) + { + memcpy(&m_fsScratch[_regIndex], _val, _numRegs*16); + m_fsChanges += _numRegs; + } + else + { + memcpy(&m_vsScratch[_regIndex], _val, _numRegs*16); + m_vsChanges += _numRegs; + } + } + + void setShaderUniform4f(uint8_t _flags, uint16_t _regIndex, const void* _val, uint16_t _numRegs) + { + setShaderUniform(_flags, _regIndex, _val, _numRegs); + } + + void setShaderUniform4x4f(uint8_t _flags, uint16_t _regIndex, const void* _val, uint16_t _numRegs) + { + setShaderUniform(_flags, _regIndex, _val, _numRegs); + } + + void commitShaderConstants(D3D12_GPU_DESCRIPTOR_HANDLE& gpuHandle) + { + uint32_t total = bx::strideAlign(0 + + m_currentProgram->m_vsh->m_size + + (NULL != m_currentProgram->m_fsh ? m_currentProgram->m_fsh->m_size : 0) + , D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT + ); + uint8_t* data = (uint8_t*)m_scratchBuffer[m_backBufferColorIdx].alloc(gpuHandle, total); + + { + uint32_t size = m_currentProgram->m_vsh->m_size; + memcpy(data, m_vsScratch, size); + data += size; + + m_vsChanges = 0; + } + + if (NULL != m_currentProgram->m_fsh) + { + memcpy(data, m_fsScratch, m_currentProgram->m_fsh->m_size); + + m_fsChanges = 0; + } + } + + void setFrameBuffer(FrameBufferHandle _fbh, bool _msaa = true) + { + if (isValid(m_fbh) + && m_fbh.idx != _fbh.idx) + { + const FrameBufferD3D12& frameBuffer = m_frameBuffers[m_fbh.idx]; + + for (uint8_t ii = 0, num = frameBuffer.m_num; ii < num; ++ii) + { + TextureD3D12& texture = m_textures[frameBuffer.m_texture[ii].idx]; + texture.setState(m_commandList, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + } + + if (isValid(frameBuffer.m_depth) ) + { + TextureD3D12& texture = m_textures[frameBuffer.m_depth.idx]; + const bool bufferOnly = 0 != (texture.m_flags&BGFX_TEXTURE_RT_BUFFER_ONLY); + if (!bufferOnly) + { + texture.setState(m_commandList, D3D12_RESOURCE_STATES(0) + | D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE +// | D3D12_RESOURCE_STATE_DEPTH_READ + ); + } + } + } + + if (!isValid(_fbh) ) + { + m_rtvHandle = m_rtvDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); + uint32_t rtvDescriptorSize = m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_RTV); + m_rtvHandle.ptr += m_backBufferColorIdx * rtvDescriptorSize; + m_dsvHandle = m_dsvDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); + + m_currentColor = &m_rtvHandle; + m_currentDepthStencil = &m_dsvHandle; + m_commandList->OMSetRenderTargets(1, m_currentColor, false, m_currentDepthStencil); + } + else + { + const FrameBufferD3D12& frameBuffer = m_frameBuffers[_fbh.idx]; + + if (0 < frameBuffer.m_num) + { + D3D12_CPU_DESCRIPTOR_HANDLE rtvDescriptor = m_rtvDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); + uint32_t rtvDescriptorSize = m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_RTV); + m_rtvHandle.ptr = rtvDescriptor.ptr + (BX_COUNTOF(m_backBufferColor) + _fbh.idx * BGFX_CONFIG_MAX_FRAME_BUFFER_ATTACHMENTS) * rtvDescriptorSize; + m_currentColor = &m_rtvHandle; + } + else + { + m_currentColor = NULL; + } + + if (isValid(frameBuffer.m_depth) ) + { + D3D12_CPU_DESCRIPTOR_HANDLE dsvDescriptor = m_dsvDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); + uint32_t dsvDescriptorSize = m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_DSV); + m_dsvHandle.ptr = dsvDescriptor.ptr + (1 + _fbh.idx) * dsvDescriptorSize; + m_currentDepthStencil = &m_dsvHandle; + } + else + { + m_currentDepthStencil = NULL; + } + + for (uint8_t ii = 0, num = frameBuffer.m_num; ii < num; ++ii) + { + TextureD3D12& texture = m_textures[frameBuffer.m_texture[ii].idx]; + texture.setState(m_commandList, D3D12_RESOURCE_STATE_RENDER_TARGET); + } + + if (isValid(frameBuffer.m_depth) ) + { + TextureD3D12& texture = m_textures[frameBuffer.m_depth.idx]; + texture.setState(m_commandList, D3D12_RESOURCE_STATE_DEPTH_WRITE); + } + + m_commandList->OMSetRenderTargets(frameBuffer.m_num + , m_currentColor + , true //NULL == m_currentDepthStencil + , m_currentDepthStencil + ); + } + + m_fbh = _fbh; + m_rtMsaa = _msaa; + } + + void setBlendState(D3D12_BLEND_DESC& desc, uint64_t _state, uint32_t _rgba = 0) + { + memset(&desc, 0, sizeof(desc) ); + desc.IndependentBlendEnable = !!(BGFX_STATE_BLEND_INDEPENDENT & _state); + + D3D12_RENDER_TARGET_BLEND_DESC* drt = &desc.RenderTarget[0]; + drt->BlendEnable = !!(BGFX_STATE_BLEND_MASK & _state); + + { + const uint32_t blend = uint32_t( (_state & BGFX_STATE_BLEND_MASK ) >> BGFX_STATE_BLEND_SHIFT); + const uint32_t equation = uint32_t( (_state & BGFX_STATE_BLEND_EQUATION_MASK) >> BGFX_STATE_BLEND_EQUATION_SHIFT); + + const uint32_t srcRGB = (blend ) & 0xf; + const uint32_t dstRGB = (blend >> 4) & 0xf; + const uint32_t srcA = (blend >> 8) & 0xf; + const uint32_t dstA = (blend >> 12) & 0xf; + + const uint32_t equRGB = (equation ) & 0x7; + const uint32_t equA = (equation >> 3) & 0x7; + + drt->SrcBlend = s_blendFactor[srcRGB][0]; + drt->DestBlend = s_blendFactor[dstRGB][0]; + drt->BlendOp = s_blendEquation[equRGB]; + + drt->SrcBlendAlpha = s_blendFactor[srcA][1]; + drt->DestBlendAlpha = s_blendFactor[dstA][1]; + drt->BlendOpAlpha = s_blendEquation[equA]; + } + + uint32_t writeMask = (_state & BGFX_STATE_ALPHA_WRITE) + ? D3D12_COLOR_WRITE_ENABLE_ALPHA + : 0 + ; + writeMask |= (_state & BGFX_STATE_RGB_WRITE) + ? D3D12_COLOR_WRITE_ENABLE_RED + | D3D12_COLOR_WRITE_ENABLE_GREEN + | D3D12_COLOR_WRITE_ENABLE_BLUE + : 0 + ; + + drt->RenderTargetWriteMask = writeMask; + + if (desc.IndependentBlendEnable) + { + for (uint32_t ii = 1, rgba = _rgba; ii < BGFX_CONFIG_MAX_FRAME_BUFFER_ATTACHMENTS; ++ii, rgba >>= 11) + { + drt = &desc.RenderTarget[ii]; + drt->BlendEnable = 0 != (rgba & 0x7ff); + + const uint32_t src = (rgba ) & 0xf; + const uint32_t dst = (rgba >> 4) & 0xf; + const uint32_t equation = (rgba >> 8) & 0x7; + + drt->SrcBlend = s_blendFactor[src][0]; + drt->DestBlend = s_blendFactor[dst][0]; + drt->BlendOp = s_blendEquation[equation]; + + drt->SrcBlendAlpha = s_blendFactor[src][1]; + drt->DestBlendAlpha = s_blendFactor[dst][1]; + drt->BlendOpAlpha = s_blendEquation[equation]; + + drt->RenderTargetWriteMask = writeMask; + } + } + else + { + for (uint32_t ii = 1; ii < BGFX_CONFIG_MAX_FRAME_BUFFER_ATTACHMENTS; ++ii) + { + memcpy(&desc.RenderTarget[ii], drt, sizeof(D3D12_RENDER_TARGET_BLEND_DESC) ); + } + } + } + + void setRasterizerState(D3D12_RASTERIZER_DESC& desc, uint64_t _state, bool _wireframe = false) + { + const uint32_t cull = (_state&BGFX_STATE_CULL_MASK) >> BGFX_STATE_CULL_SHIFT; + + desc.FillMode = _wireframe + ? D3D12_FILL_MODE_WIREFRAME + : D3D12_FILL_MODE_SOLID + ; + desc.CullMode = s_cullMode[cull]; + desc.FrontCounterClockwise = false; + desc.DepthBias = 0; + desc.DepthBiasClamp = 0.0f; + desc.SlopeScaledDepthBias = 0.0f; + desc.DepthClipEnable = false; + desc.MultisampleEnable = !!(_state&BGFX_STATE_MSAA); + desc.AntialiasedLineEnable = false; + desc.ForcedSampleCount = 0; + desc.ConservativeRaster = D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF; + } + + void setDepthStencilState(D3D12_DEPTH_STENCIL_DESC& desc, uint64_t _state, uint64_t _stencil = 0) + { + const uint32_t fstencil = unpackStencil(0, _stencil); + + memset(&desc, 0, sizeof(desc) ); + uint32_t func = (_state&BGFX_STATE_DEPTH_TEST_MASK)>>BGFX_STATE_DEPTH_TEST_SHIFT; + desc.DepthEnable = 0 != func; + desc.DepthWriteMask = !!(BGFX_STATE_DEPTH_WRITE & _state) + ? D3D12_DEPTH_WRITE_MASK_ALL + : D3D12_DEPTH_WRITE_MASK_ZERO + ; + desc.DepthFunc = s_cmpFunc[func]; + + uint32_t bstencil = unpackStencil(1, _stencil); + uint32_t frontAndBack = bstencil != BGFX_STENCIL_NONE && bstencil != fstencil; + bstencil = frontAndBack ? bstencil : fstencil; + + desc.StencilEnable = 0 != _stencil; + desc.StencilReadMask = (fstencil & BGFX_STENCIL_FUNC_RMASK_MASK) >> BGFX_STENCIL_FUNC_RMASK_SHIFT; + desc.StencilWriteMask = 0xff; + + desc.FrontFace.StencilFailOp = s_stencilOp[(fstencil & BGFX_STENCIL_OP_FAIL_S_MASK) >> BGFX_STENCIL_OP_FAIL_S_SHIFT]; + desc.FrontFace.StencilDepthFailOp = s_stencilOp[(fstencil & BGFX_STENCIL_OP_FAIL_Z_MASK) >> BGFX_STENCIL_OP_FAIL_Z_SHIFT]; + desc.FrontFace.StencilPassOp = s_stencilOp[(fstencil & BGFX_STENCIL_OP_PASS_Z_MASK) >> BGFX_STENCIL_OP_PASS_Z_SHIFT]; + desc.FrontFace.StencilFunc = s_cmpFunc[(fstencil & BGFX_STENCIL_TEST_MASK) >> BGFX_STENCIL_TEST_SHIFT]; + + desc.BackFace.StencilFailOp = s_stencilOp[(bstencil & BGFX_STENCIL_OP_FAIL_S_MASK) >> BGFX_STENCIL_OP_FAIL_S_SHIFT]; + desc.BackFace.StencilDepthFailOp = s_stencilOp[(bstencil & BGFX_STENCIL_OP_FAIL_Z_MASK) >> BGFX_STENCIL_OP_FAIL_Z_SHIFT]; + desc.BackFace.StencilPassOp = s_stencilOp[(bstencil & BGFX_STENCIL_OP_PASS_Z_MASK) >> BGFX_STENCIL_OP_PASS_Z_SHIFT]; + desc.BackFace.StencilFunc = s_cmpFunc[(bstencil&BGFX_STENCIL_TEST_MASK) >> BGFX_STENCIL_TEST_SHIFT]; + } + + uint32_t setInputLayout(D3D12_INPUT_ELEMENT_DESC* _vertexElements, const VertexDecl& _vertexDecl, const ProgramD3D12& _program, uint8_t _numInstanceData) + { + VertexDecl decl; + memcpy(&decl, &_vertexDecl, sizeof(VertexDecl) ); + const uint8_t* attrMask = _program.m_vsh->m_attrMask; + + for (uint32_t ii = 0; ii < Attrib::Count; ++ii) + { + uint8_t mask = attrMask[ii]; + uint8_t attr = (decl.m_attributes[ii] & mask); + decl.m_attributes[ii] = attr == 0 ? 0xff : attr == 0xff ? 0 : attr; + } + + D3D12_INPUT_ELEMENT_DESC* elem = fillVertexDecl(_vertexElements, decl); + uint32_t num = uint32_t(elem-_vertexElements); + + const D3D12_INPUT_ELEMENT_DESC inst = { "TEXCOORD", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, D3D12_APPEND_ALIGNED_ELEMENT, D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA, 1 }; + + for (uint32_t ii = 0; ii < _numInstanceData; ++ii) + { + uint32_t index = 7 - ii; // TEXCOORD7 = i_data0, TEXCOORD6 = i_data1, etc. + + uint32_t jj; + D3D12_INPUT_ELEMENT_DESC* curr = _vertexElements; + for (jj = 0; jj < num; ++jj) + { + curr = &_vertexElements[jj]; + if (0 == strcmp(curr->SemanticName, "TEXCOORD") + && curr->SemanticIndex == index) + { + break; + } + } + + if (jj == num) + { + curr = elem; + ++elem; + } + + memcpy(curr, &inst, sizeof(D3D12_INPUT_ELEMENT_DESC) ); + curr->InputSlot = 1; + curr->SemanticIndex = index; + curr->AlignedByteOffset = ii*16; + } + + return uint32_t(elem-_vertexElements); + } + + static void patchCb0(DxbcInstruction& _instruction, void* _userData) + { + union { void* ptr; uint32_t offset; } cast ={ _userData }; + + for (uint32_t ii = 0; ii < _instruction.numOperands; ++ii) + { + DxbcOperand& operand = _instruction.operand[ii]; + if (DxbcOperandType::ConstantBuffer == operand.type) + { + if (DxbcOperandAddrMode::Imm32 == operand.addrMode[0] + && 0 == operand.regIndex[0] + && DxbcOperandAddrMode::Imm32 == operand.addrMode[1]) + { + operand.regIndex[1] += cast.offset; + } + } + } + } + + ID3D12PipelineState* getPipelineState(uint16_t _programIdx) + { + ProgramD3D12& program = m_program[_programIdx]; + + bx::HashMurmur2A murmur; + murmur.begin(); + murmur.add(program.m_vsh->m_hash); + const uint32_t hash = murmur.end(); + + ID3D12PipelineState* pso = m_pipelineStateCache.find(hash); + + if(NULL != pso) + { + return pso; + } + + D3D12_COMPUTE_PIPELINE_STATE_DESC desc; + memset(&desc, 0, sizeof(desc) ); + + desc.pRootSignature = m_rootSignature; + + desc.CS.pShaderBytecode = program.m_vsh->m_code->data; + desc.CS.BytecodeLength = program.m_vsh->m_code->size; + + DX_CHECK(m_device->CreateComputePipelineState(&desc + ,__uuidof(ID3D12PipelineState) + ,(void**)&pso + )); + m_pipelineStateCache.add(hash, pso); + + return pso; + } + + ID3D12PipelineState* getPipelineState(uint64_t _state, uint64_t _stencil, uint16_t _declIdx, uint16_t _programIdx, uint8_t _numInstanceData) + { + ProgramD3D12& program = m_program[_programIdx]; + + _state &= 0 + | BGFX_STATE_RGB_WRITE + | BGFX_STATE_ALPHA_WRITE + | BGFX_STATE_DEPTH_WRITE + | BGFX_STATE_DEPTH_TEST_MASK + | BGFX_STATE_BLEND_MASK + | BGFX_STATE_BLEND_EQUATION_MASK + | BGFX_STATE_BLEND_INDEPENDENT + | BGFX_STATE_CULL_MASK + | BGFX_STATE_MSAA + | BGFX_STATE_PT_MASK + ; + + _stencil &= packStencil(~BGFX_STENCIL_FUNC_REF_MASK, BGFX_STENCIL_MASK); + + VertexDecl decl; + memcpy(&decl, &m_vertexDecls[_declIdx], sizeof(VertexDecl) ); + const uint8_t* attrMask = program.m_vsh->m_attrMask; + + for (uint32_t ii = 0; ii < Attrib::Count; ++ii) + { + uint8_t mask = attrMask[ii]; + uint8_t attr = (decl.m_attributes[ii] & mask); + decl.m_attributes[ii] = attr == 0 ? 0xff : attr == 0xff ? 0 : attr; + } + + bx::HashMurmur2A murmur; + murmur.begin(); + murmur.add(_state); + murmur.add(_stencil); + murmur.add(program.m_vsh->m_hash); + murmur.add(program.m_vsh->m_attrMask, sizeof(program.m_vsh->m_attrMask) ); + murmur.add(program.m_fsh->m_hash); + murmur.add(m_vertexDecls[_declIdx].m_hash); + murmur.add(decl.m_attributes, sizeof(decl.m_attributes) ); + murmur.add(m_fbh.idx); + murmur.add(_numInstanceData); + const uint32_t hash = murmur.end(); + + ID3D12PipelineState* pso = m_pipelineStateCache.find(hash); + + if (NULL != pso) + { + return pso; + } + + D3D12_GRAPHICS_PIPELINE_STATE_DESC desc; + memset(&desc, 0, sizeof(desc) ); + + desc.pRootSignature = m_rootSignature; + + desc.VS.pShaderBytecode = program.m_vsh->m_code->data; + desc.VS.BytecodeLength = program.m_vsh->m_code->size; + + const Memory* temp = alloc(program.m_fsh->m_code->size); + memset(temp->data, 0, temp->size); + bx::MemoryReader rd(program.m_fsh->m_code->data, program.m_fsh->m_code->size); + bx::StaticMemoryBlockWriter wr(temp->data, temp->size); + + DxbcContext dxbc; + read(&rd, dxbc); + + bool patchShader = true; + if (BX_ENABLED(BGFX_CONFIG_DEBUG) ) + { + union { uint32_t offset; void* ptr; } cast = { 0 }; + filter(dxbc.shader, dxbc.shader, patchCb0, cast.ptr); + + write(&wr, dxbc); + + dxbcHash(temp->data + 20, temp->size - 20, temp->data + 4); + + patchShader = 0 == memcmp(program.m_fsh->m_code->data, temp->data, 16); + BX_CHECK(patchShader, "DXBC fragment shader patching error (ShaderHandle: %d).", program.m_fsh - m_shaders); + + if (!patchShader) + { + for (uint32_t ii = 20; ii < temp->size; ii += 16) + { + if (0 != memcmp(&program.m_fsh->m_code->data[ii], &temp->data[ii], 16) ) + { +// dbgPrintfData(&program.m_fsh->m_code->data[ii], temp->size-ii, ""); +// dbgPrintfData(&temp->data[ii], temp->size-ii, ""); + break; + } + } + + desc.PS.pShaderBytecode = program.m_fsh->m_code->data; + desc.PS.BytecodeLength = program.m_fsh->m_code->size; + } + } + + if (patchShader) + { + memcpy(temp->data, program.m_fsh->m_code->data, program.m_fsh->m_code->size); + + bx::seek(&wr, 0, bx::Whence::Begin); + union { uint32_t offset; void* ptr; } cast = + { + m_currentProgram->m_vsh->m_size/16 + }; + filter(dxbc.shader, dxbc.shader, patchCb0, cast.ptr); + write(&wr, dxbc); + dxbcHash(temp->data + 20, temp->size - 20, temp->data + 4); + + desc.PS.pShaderBytecode = temp->data; + desc.PS.BytecodeLength = temp->size; + } + + desc.DS.pShaderBytecode = NULL; + desc.DS.BytecodeLength = 0; + + desc.HS.pShaderBytecode = NULL; + desc.HS.BytecodeLength = 0; + + desc.GS.pShaderBytecode = NULL; + desc.GS.BytecodeLength = 0; + + desc.StreamOutput.pSODeclaration = NULL; + desc.StreamOutput.NumEntries = 0; + desc.StreamOutput.pBufferStrides = NULL; + desc.StreamOutput.NumStrides = 0; + desc.StreamOutput.RasterizedStream = 0; + + setBlendState(desc.BlendState, _state); + desc.SampleMask = 1; + setRasterizerState(desc.RasterizerState, _state); + setDepthStencilState(desc.DepthStencilState, _state, _stencil); + + D3D12_INPUT_ELEMENT_DESC vertexElements[Attrib::Count + 1 + BGFX_CONFIG_MAX_INSTANCE_DATA_COUNT]; + desc.InputLayout.NumElements = setInputLayout(vertexElements, m_vertexDecls[_declIdx], program, _numInstanceData); + desc.InputLayout.pInputElementDescs = vertexElements; + + uint8_t primIndex = uint8_t( (_state&BGFX_STATE_PT_MASK) >> BGFX_STATE_PT_SHIFT); + desc.PrimitiveTopologyType = s_primInfo[primIndex].m_topologyType; + + if (isValid(m_fbh) ) + { + const FrameBufferD3D12& frameBuffer = m_frameBuffers[m_fbh.idx]; + desc.NumRenderTargets = frameBuffer.m_num; + + for (uint8_t ii = 0, num = frameBuffer.m_num; ii < num; ++ii) + { + desc.RTVFormats[ii] = m_textures[frameBuffer.m_texture[ii].idx].m_srvd.Format; + } + + if (isValid(frameBuffer.m_depth) ) + { + desc.DSVFormat = s_textureFormat[m_textures[frameBuffer.m_depth.idx].m_textureFormat].m_fmtDsv; + } + else + { + desc.DSVFormat = DXGI_FORMAT_UNKNOWN; + } + } + else + { + desc.NumRenderTargets = 1; + desc.RTVFormats[0] = DXGI_FORMAT_R8G8B8A8_UNORM; + desc.DSVFormat = DXGI_FORMAT_D24_UNORM_S8_UINT; + } + + desc.SampleDesc.Count = 1; + desc.SampleDesc.Quality = 0; + + uint32_t length = g_callback->cacheReadSize(hash); + bool cached = length > 0; + + void* cachedData = NULL; + + if (cached) + { + cachedData = BX_ALLOC(g_allocator, length); + if (g_callback->cacheRead(hash, cachedData, length) ) + { + BX_TRACE("Loading chached PSO (size %d).", length); + bx::MemoryReader reader(cachedData, length); + +// uint32_t format; +// bx::read(&reader, format); + + desc.CachedPSO.pCachedBlob = reader.getDataPtr(); + desc.CachedPSO.CachedBlobSizeInBytes = (size_t)reader.remaining(); + + HRESULT hr = m_device->CreateGraphicsPipelineState(&desc + , __uuidof(ID3D12PipelineState) + , (void**)&pso + ); + if (FAILED(hr) ) + { + BX_TRACE("Failed to load cached PSO (HRESULT 0x%08x).", hr); + memset(&desc.CachedPSO, 0, sizeof(desc.CachedPSO) ); + } + } + } + + if (NULL == pso) + { + DX_CHECK(m_device->CreateGraphicsPipelineState(&desc + , __uuidof(ID3D12PipelineState) + , (void**)&pso + ) ); + } + m_pipelineStateCache.add(hash, pso); + + release(temp); + + ID3DBlob* blob; + HRESULT hr = pso->GetCachedBlob(&blob); + if (SUCCEEDED(hr) ) + { + void* data = blob->GetBufferPointer(); + length = (uint32_t)blob->GetBufferSize(); + + g_callback->cacheWrite(hash, data, length); + + DX_RELEASE(blob, 0); + } + + if (NULL != cachedData) + { + BX_FREE(g_allocator, cachedData); + } + + return pso; + } + + uint16_t getSamplerState(const uint32_t* _flags, uint32_t _num = BGFX_CONFIG_MAX_TEXTURE_SAMPLERS) + { + bx::HashMurmur2A murmur; + murmur.begin(); + murmur.add(_flags, _num * sizeof(uint32_t) ); + uint32_t hash = murmur.end(); + + uint16_t sampler = m_samplerStateCache.find(hash); + if (UINT16_MAX == sampler) + { + sampler = m_samplerAllocator.alloc(_flags, _num); + m_samplerStateCache.add(hash, sampler); + } + + return sampler; + } + + void commit(ConstantBuffer& _constantBuffer) + { + _constantBuffer.reset(); + + for (;;) + { + uint32_t opcode = _constantBuffer.read(); + + if (UniformType::End == opcode) + { + break; + } + + UniformType::Enum type; + uint16_t loc; + uint16_t num; + uint16_t copy; + ConstantBuffer::decodeOpcode(opcode, type, loc, num, copy); + + const char* data; + if (copy) + { + data = _constantBuffer.read(g_uniformTypeSize[type]*num); + } + else + { + UniformHandle handle; + memcpy(&handle, _constantBuffer.read(sizeof(UniformHandle) ), sizeof(UniformHandle) ); + data = (const char*)m_uniforms[handle.idx]; + } + +#define CASE_IMPLEMENT_UNIFORM(_uniform, _dxsuffix, _type) \ + case UniformType::_uniform: \ + case UniformType::_uniform|BGFX_UNIFORM_FRAGMENTBIT: \ + { \ + setShaderUniform(type, loc, data, num); \ + } \ + break; + + switch ( (int32_t)type) + { + case UniformType::Mat3: + case UniformType::Mat3|BGFX_UNIFORM_FRAGMENTBIT: + { + float* value = (float*)data; + for (uint32_t ii = 0, count = num/3; ii < count; ++ii, loc += 3*16, value += 9) + { + Matrix4 mtx; + mtx.un.val[ 0] = value[0]; + mtx.un.val[ 1] = value[1]; + mtx.un.val[ 2] = value[2]; + mtx.un.val[ 3] = 0.0f; + mtx.un.val[ 4] = value[3]; + mtx.un.val[ 5] = value[4]; + mtx.un.val[ 6] = value[5]; + mtx.un.val[ 7] = 0.0f; + mtx.un.val[ 8] = value[6]; + mtx.un.val[ 9] = value[7]; + mtx.un.val[10] = value[8]; + mtx.un.val[11] = 0.0f; + setShaderUniform(type, loc, &mtx.un.val[0], 3); + } + } + break; + + CASE_IMPLEMENT_UNIFORM(Int1, I, int); + CASE_IMPLEMENT_UNIFORM(Vec4, F, float); + CASE_IMPLEMENT_UNIFORM(Mat4, F, float); + + case UniformType::End: + break; + + default: + BX_TRACE("%4d: INVALID 0x%08x, t %d, l %d, n %d, c %d", _constantBuffer.getPos(), opcode, type, loc, num, copy); + break; + } +#undef CASE_IMPLEMENT_UNIFORM + } + } + + void clear(const Clear& _clear, const float _palette[][4], const D3D12_RECT* _rect = NULL, uint32_t _num = 0) + { + if (isValid(m_fbh) ) + { + FrameBufferD3D12& frameBuffer = m_frameBuffers[m_fbh.idx]; + frameBuffer.clear(m_commandList, _clear, _palette); + } + else + { + if (NULL != m_currentColor + && BGFX_CLEAR_COLOR & _clear.m_flags) + { + if (BGFX_CLEAR_COLOR_USE_PALETTE & _clear.m_flags) + { + uint8_t index = _clear.m_index[0]; + if (UINT8_MAX != index) + { + m_commandList->ClearRenderTargetView(*m_currentColor + , _palette[index] + , _num + , _rect + ); + } + } + else + { + float frgba[4] = + { + _clear.m_index[0] * 1.0f / 255.0f, + _clear.m_index[1] * 1.0f / 255.0f, + _clear.m_index[2] * 1.0f / 255.0f, + _clear.m_index[3] * 1.0f / 255.0f, + }; + m_commandList->ClearRenderTargetView(*m_currentColor + , frgba + , _num + , _rect + ); + } + } + + if (NULL != m_currentDepthStencil + && (BGFX_CLEAR_DEPTH | BGFX_CLEAR_STENCIL) & _clear.m_flags) + { + uint32_t flags = 0; + flags |= (_clear.m_flags & BGFX_CLEAR_DEPTH ) ? D3D12_CLEAR_FLAG_DEPTH : 0; + flags |= (_clear.m_flags & BGFX_CLEAR_STENCIL) ? D3D12_CLEAR_FLAG_STENCIL : 0; + + m_commandList->ClearDepthStencilView(*m_currentDepthStencil + , D3D12_CLEAR_FLAGS(flags) + , _clear.m_depth + , _clear.m_stencil + , _num + , _rect + ); + } + } + } + + void clearQuad(ClearQuad& _clearQuad, const Rect& _rect, const Clear& _clear, const float _palette[][4]) + { + BX_UNUSED(_clearQuad); + + uint32_t width = m_scd.BufferDesc.Width; + uint32_t height = m_scd.BufferDesc.Height; + + if (0 == _rect.m_x + && 0 == _rect.m_y + && width == _rect.m_width + && height == _rect.m_height) + { + clear(_clear, _palette); + } + else + { + D3D12_RECT rect; + rect.left = _rect.m_x; + rect.top = _rect.m_y; + rect.right = _rect.m_x + _rect.m_width; + rect.bottom = _rect.m_y + _rect.m_height; + clear(_clear, _palette, &rect); + } + } + + uint64_t kick() + { + uint64_t fence = m_cmd.kick(); + m_commandList = m_cmd.alloc(); + return fence; + } + + void finish() + { + m_cmd.kick(); + m_cmd.finish(); + m_commandList = NULL; + } + + void* m_d3d12dll; + void* m_dxgidll; + + D3D_DRIVER_TYPE m_driverType; + IDXGIAdapter* m_adapter; + DXGI_ADAPTER_DESC m_adapterDesc; + D3D12_FEATURE_DATA_ARCHITECTURE m_architecture; + D3D12_FEATURE_DATA_D3D12_OPTIONS m_options; + + IDXGIFactory1* m_factory; + + IDXGISwapChain* m_swapChain; + uint16_t m_lost; + uint16_t m_numWindows; + FrameBufferHandle m_windows[BGFX_CONFIG_MAX_FRAME_BUFFERS]; + + ID3D12Device* m_device; + ID3D12InfoQueue* m_infoQueue; + + ID3D12DescriptorHeap* m_rtvDescriptorHeap; + ID3D12DescriptorHeap* m_dsvDescriptorHeap; + D3D12_CPU_DESCRIPTOR_HANDLE m_rtvHandle; + D3D12_CPU_DESCRIPTOR_HANDLE m_dsvHandle; + D3D12_CPU_DESCRIPTOR_HANDLE* m_currentColor; + D3D12_CPU_DESCRIPTOR_HANDLE* m_currentDepthStencil; + ID3D12Resource* m_backBufferColor[4]; + uint64_t m_backBufferColorFence[4]; + ID3D12Resource* m_backBufferDepthStencil; + + ScratchBufferD3D12 m_scratchBuffer[4]; + DescriptorAllocator m_samplerAllocator; + + ID3D12RootSignature* m_rootSignature; + + CommandQueue m_cmd; + ID3D12GraphicsCommandList* m_commandList; + + Resolution m_resolution; + bool m_wireframe; + + DXGI_SWAP_CHAIN_DESC m_scd; + uint32_t m_flags; + + BufferD3D12 m_indexBuffers[BGFX_CONFIG_MAX_INDEX_BUFFERS]; + VertexBufferD3D12 m_vertexBuffers[BGFX_CONFIG_MAX_VERTEX_BUFFERS]; + ShaderD3D12 m_shaders[BGFX_CONFIG_MAX_SHADERS]; + ProgramD3D12 m_program[BGFX_CONFIG_MAX_PROGRAMS]; + TextureD3D12 m_textures[BGFX_CONFIG_MAX_TEXTURES]; + VertexDecl m_vertexDecls[BGFX_CONFIG_MAX_VERTEX_DECLS]; + FrameBufferD3D12 m_frameBuffers[BGFX_CONFIG_MAX_FRAME_BUFFERS]; + void* m_uniforms[BGFX_CONFIG_MAX_UNIFORMS]; + Matrix4 m_predefinedUniforms[PredefinedUniform::Count]; + UniformRegistry m_uniformReg; + + StateCacheT m_pipelineStateCache; + StateCache m_samplerStateCache; + + TextVideoMem m_textVideoMem; + + ProgramD3D12* m_currentProgram; + uint8_t m_fsScratch[64<<10]; + uint8_t m_vsScratch[64<<10]; + uint32_t m_fsChanges; + uint32_t m_vsChanges; + + FrameBufferHandle m_fbh; + uint32_t m_frame; + uint32_t m_backBufferColorIdx; + bool m_rtMsaa; + }; + + static RendererContextD3D12* s_renderD3D12; + + RendererContextI* rendererCreate() + { + s_renderD3D12 = BX_NEW(g_allocator, RendererContextD3D12); + s_renderD3D12->init(); + return s_renderD3D12; + } + + void rendererDestroy() + { + BX_DELETE(g_allocator, s_renderD3D12); + s_renderD3D12 = NULL; + } + + void ScratchBufferD3D12::create(uint32_t _size, uint32_t _maxDescriptors) + { + m_size = _size; + + ID3D12Device* device = s_renderD3D12->m_device; + + m_incrementSize = device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + + D3D12_DESCRIPTOR_HEAP_DESC desc; + desc.NumDescriptors = _maxDescriptors; + desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; + desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; + desc.NodeMask = 0; + DX_CHECK(device->CreateDescriptorHeap(&desc + , __uuidof(ID3D12DescriptorHeap) + , (void**)&m_heap + ) ); + + m_upload = createCommittedResource(device, HeapProperty::Upload, desc.NumDescriptors * 1024); + m_upload->Map(0, NULL, (void**)&m_data); + + reset(m_gpuHandle); + } + + void ScratchBufferD3D12::destroy() + { + m_upload->Unmap(0, NULL); + + DX_RELEASE(m_upload, 0); + DX_RELEASE(m_heap, 0); + } + + void ScratchBufferD3D12::reset(D3D12_GPU_DESCRIPTOR_HANDLE& gpuHandle) + { + m_pos = 0; + m_cpuHandle = m_heap->GetCPUDescriptorHandleForHeapStart(); + m_gpuHandle = m_heap->GetGPUDescriptorHandleForHeapStart(); + gpuHandle = m_gpuHandle; + } + + void* ScratchBufferD3D12::alloc(D3D12_GPU_DESCRIPTOR_HANDLE& gpuHandle, uint32_t _size) + { + D3D12_CONSTANT_BUFFER_VIEW_DESC desc; + desc.BufferLocation = m_upload->GetGPUVirtualAddress() + m_pos; + desc.SizeInBytes = _size; + + void* data = &m_data[m_pos]; + + m_pos += BX_ALIGN_256(_size); + + ID3D12Device* device = s_renderD3D12->m_device; + device->CreateConstantBufferView(&desc + , m_cpuHandle + ); + m_cpuHandle.ptr += m_incrementSize; + + gpuHandle = m_gpuHandle; + m_gpuHandle.ptr += m_incrementSize; + + return data; + } + + void ScratchBufferD3D12::alloc(D3D12_GPU_DESCRIPTOR_HANDLE& gpuHandle, TextureD3D12& _texture) + { + ID3D12Device* device = s_renderD3D12->m_device; + device->CreateShaderResourceView(_texture.m_ptr + , &_texture.m_srvd + , m_cpuHandle + ); + m_cpuHandle.ptr += m_incrementSize; + + gpuHandle = m_gpuHandle; + m_gpuHandle.ptr += m_incrementSize; + } + + void ScratchBufferD3D12::allocUav(D3D12_GPU_DESCRIPTOR_HANDLE& gpuHandle, TextureD3D12& _texture) + { + ID3D12Device* device = s_renderD3D12->m_device; + device->CreateUnorderedAccessView(_texture.m_ptr + , NULL + , &_texture.m_uavd + , m_cpuHandle + ); + m_cpuHandle.ptr += m_incrementSize; + + gpuHandle = m_gpuHandle; + m_gpuHandle.ptr += m_incrementSize; + } + + void ScratchBufferD3D12::alloc(D3D12_GPU_DESCRIPTOR_HANDLE& gpuHandle, BufferD3D12& _buffer) + { + ID3D12Device* device = s_renderD3D12->m_device; + device->CreateShaderResourceView(_buffer.m_ptr + , &_buffer.m_srvd + , m_cpuHandle + ); + m_cpuHandle.ptr += m_incrementSize; + + gpuHandle = m_gpuHandle; + m_gpuHandle.ptr += m_incrementSize; + } + + void ScratchBufferD3D12::allocUav(D3D12_GPU_DESCRIPTOR_HANDLE& gpuHandle, BufferD3D12& _buffer) + { + ID3D12Device* device = s_renderD3D12->m_device; + device->CreateUnorderedAccessView(_buffer.m_ptr + , NULL + , &_buffer.m_uavd + , m_cpuHandle + ); + m_cpuHandle.ptr += m_incrementSize; + + gpuHandle = m_gpuHandle; + m_gpuHandle.ptr += m_incrementSize; + } + + void DescriptorAllocator::create(D3D12_DESCRIPTOR_HEAP_TYPE _type, uint32_t _maxDescriptors, uint16_t _numDescriptorsPerBlock) + { + m_handleAlloc = bx::createHandleAlloc(g_allocator, _maxDescriptors); + m_numDescriptorsPerBlock = _numDescriptorsPerBlock; + + ID3D12Device* device = s_renderD3D12->m_device; + + m_incrementSize = device->GetDescriptorHandleIncrementSize(_type); + + D3D12_DESCRIPTOR_HEAP_DESC desc; + desc.NumDescriptors = _maxDescriptors; + desc.Type = _type; + desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; + desc.NodeMask = 0; + DX_CHECK(device->CreateDescriptorHeap(&desc + , __uuidof(ID3D12DescriptorHeap) + , (void**)&m_heap + ) ); + + m_cpuHandle = m_heap->GetCPUDescriptorHandleForHeapStart(); + m_gpuHandle = m_heap->GetGPUDescriptorHandleForHeapStart(); + } + + void DescriptorAllocator::destroy() + { + bx::destroyHandleAlloc(g_allocator, m_handleAlloc); + + DX_RELEASE(m_heap, 0); + } + + uint16_t DescriptorAllocator::alloc(ID3D12Resource* _ptr, const D3D12_SHADER_RESOURCE_VIEW_DESC* _desc) + { + uint16_t idx = m_handleAlloc->alloc(); + + D3D12_CPU_DESCRIPTOR_HANDLE cpuHandle = { m_cpuHandle.ptr + idx * m_incrementSize }; + + ID3D12Device* device = s_renderD3D12->m_device; + device->CreateShaderResourceView(_ptr + , _desc + , cpuHandle + ); + + return idx; + } + + uint16_t DescriptorAllocator::alloc(const uint32_t* _flags, uint32_t _num) + { + uint16_t idx = m_handleAlloc->alloc(); + + ID3D12Device* device = s_renderD3D12->m_device; + + for (uint32_t ii = 0; ii < _num; ++ii) + { + uint32_t flags = _flags[ii]; + + const uint32_t cmpFunc = (flags&BGFX_TEXTURE_COMPARE_MASK)>>BGFX_TEXTURE_COMPARE_SHIFT; + const uint8_t minFilter = s_textureFilter[0][(flags&BGFX_TEXTURE_MIN_MASK)>>BGFX_TEXTURE_MIN_SHIFT]; + const uint8_t magFilter = s_textureFilter[1][(flags&BGFX_TEXTURE_MAG_MASK)>>BGFX_TEXTURE_MAG_SHIFT]; + const uint8_t mipFilter = s_textureFilter[2][(flags&BGFX_TEXTURE_MIP_MASK)>>BGFX_TEXTURE_MIP_SHIFT]; + const uint8_t filter = 0 == cmpFunc ? 0 : D3D12_FILTER_COMPARISON_MIN_MAG_MIP_POINT; + + D3D12_SAMPLER_DESC sd; + sd.Filter = (D3D12_FILTER)(filter|minFilter|magFilter|mipFilter); + sd.AddressU = s_textureAddress[(flags&BGFX_TEXTURE_U_MASK)>>BGFX_TEXTURE_U_SHIFT]; + sd.AddressV = s_textureAddress[(flags&BGFX_TEXTURE_V_MASK)>>BGFX_TEXTURE_V_SHIFT]; + sd.AddressW = s_textureAddress[(flags&BGFX_TEXTURE_W_MASK)>>BGFX_TEXTURE_W_SHIFT]; + sd.MinLOD = 0; + sd.MaxLOD = D3D12_FLOAT32_MAX; + sd.MipLODBias = 0.0f; + sd.MaxAnisotropy = 1; //m_maxAnisotropy; + sd.ComparisonFunc = 0 == cmpFunc ? D3D12_COMPARISON_FUNC_NEVER : s_cmpFunc[cmpFunc]; + + D3D12_CPU_DESCRIPTOR_HANDLE cpuHandle = + { + m_cpuHandle.ptr + (idx * m_numDescriptorsPerBlock + ii) * m_incrementSize + }; + + device->CreateSampler(&sd, cpuHandle); + } + + return idx; + } + + void DescriptorAllocator::free(uint16_t _idx) + { + m_handleAlloc->free(_idx); + } + + D3D12_GPU_DESCRIPTOR_HANDLE DescriptorAllocator::get(uint16_t _idx) + { + D3D12_GPU_DESCRIPTOR_HANDLE gpuHandle = { m_gpuHandle.ptr + _idx * m_numDescriptorsPerBlock * m_incrementSize }; + return gpuHandle; + } + + struct UavFormat + { + DXGI_FORMAT format[3]; + uint32_t stride; + }; + + static const UavFormat s_uavFormat[] = + { // BGFX_BUFFER_COMPUTE_TYPE_UINT, BGFX_BUFFER_COMPUTE_TYPE_INT, BGFX_BUFFER_COMPUTE_TYPE_FLOAT + { { DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN }, 0 }, // ignored + { { DXGI_FORMAT_R8_SINT, DXGI_FORMAT_R8_UINT, DXGI_FORMAT_UNKNOWN }, 1 }, // BGFX_BUFFER_COMPUTE_FORMAT_8x1 + { { DXGI_FORMAT_R8G8_SINT, DXGI_FORMAT_R8G8_UINT, DXGI_FORMAT_UNKNOWN }, 2 }, // BGFX_BUFFER_COMPUTE_FORMAT_8x2 + { { DXGI_FORMAT_R8G8B8A8_SINT, DXGI_FORMAT_R8G8B8A8_UINT, DXGI_FORMAT_UNKNOWN }, 4 }, // BGFX_BUFFER_COMPUTE_FORMAT_8x4 + { { DXGI_FORMAT_R16_SINT, DXGI_FORMAT_R16_UINT, DXGI_FORMAT_R16_FLOAT }, 2 }, // BGFX_BUFFER_COMPUTE_FORMAT_16x1 + { { DXGI_FORMAT_R16G16_SINT, DXGI_FORMAT_R16G16_UINT, DXGI_FORMAT_R16G16_FLOAT }, 4 }, // BGFX_BUFFER_COMPUTE_FORMAT_16x2 + { { DXGI_FORMAT_R16G16B16A16_SINT, DXGI_FORMAT_R16G16B16A16_UINT, DXGI_FORMAT_R16G16B16A16_FLOAT }, 8 }, // BGFX_BUFFER_COMPUTE_FORMAT_16x4 + { { DXGI_FORMAT_R32_SINT, DXGI_FORMAT_R32_UINT, DXGI_FORMAT_R32_FLOAT }, 4 }, // BGFX_BUFFER_COMPUTE_FORMAT_32x1 + { { DXGI_FORMAT_R32G32_SINT, DXGI_FORMAT_R32G32_UINT, DXGI_FORMAT_R32G32_FLOAT }, 8 }, // BGFX_BUFFER_COMPUTE_FORMAT_32x2 + { { DXGI_FORMAT_R32G32B32A32_SINT, DXGI_FORMAT_R32G32B32A32_UINT, DXGI_FORMAT_R32G32B32A32_FLOAT }, 16 }, // BGFX_BUFFER_COMPUTE_FORMAT_32x4 + }; + + void BufferD3D12::create(uint32_t _size, void* _data, uint16_t _flags, bool _vertex) + { + m_size = _size; + m_flags = _flags; + + const bool needUav = 0 != (_flags & (BGFX_BUFFER_COMPUTE_WRITE|BGFX_BUFFER_DRAW_INDIRECT)); +// const bool needSrv = 0 != (_flags & BGFX_BUFFER_COMPUTE_READ); + const bool drawIndirect = 0 != (_flags & BGFX_BUFFER_DRAW_INDIRECT); + m_dynamic = NULL == _data || needUav; + + DXGI_FORMAT format; + uint32_t stride; + + D3D12_RESOURCE_FLAGS flags = needUav + ? D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS + : D3D12_RESOURCE_FLAG_NONE + ; + + if(drawIndirect) + { + format = DXGI_FORMAT_R32G32B32A32_UINT; + stride = 16; + } + else + { + uint32_t uavFormat = (_flags & BGFX_BUFFER_COMPUTE_FORMAT_MASK) >> BGFX_BUFFER_COMPUTE_FORMAT_SHIFT; + if (0 == uavFormat) + { + if (_vertex) + { + format = DXGI_FORMAT_R32G32B32A32_FLOAT; + stride = 16; + } + else + { + if (0 == (_flags & BGFX_BUFFER_INDEX32) ) + { + format = DXGI_FORMAT_R16_UINT; + stride = 2; + } + else + { + format = DXGI_FORMAT_R32_UINT; + stride = 4; + } + } + } + else + { + const uint32_t uavType = bx::uint32_satsub( (_flags & BGFX_BUFFER_COMPUTE_TYPE_MASK) >> BGFX_BUFFER_COMPUTE_TYPE_SHIFT, 1); + format = s_uavFormat[uavFormat].format[uavType]; + stride = s_uavFormat[uavFormat].stride; + } + } + + m_srvd.Format = format; + m_srvd.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; + m_srvd.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + m_srvd.Buffer.FirstElement = 0; + m_srvd.Buffer.NumElements = m_size / stride; + m_srvd.Buffer.StructureByteStride = 0; + m_srvd.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_NONE; + + m_uavd.Format = format; + m_uavd.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; + m_uavd.Buffer.FirstElement = 0; + m_uavd.Buffer.NumElements = m_size / stride; + m_uavd.Buffer.StructureByteStride = 0; + m_uavd.Buffer.CounterOffsetInBytes = 0; + m_uavd.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_NONE; + + ID3D12Device* device = s_renderD3D12->m_device; + ID3D12GraphicsCommandList* commandList = s_renderD3D12->m_commandList; + + m_ptr = createCommittedResource(device, HeapProperty::Default, _size, flags); + + if (!needUav) + { + m_staging = createCommittedResource(device, HeapProperty::Upload, _size); + } + + if (m_dynamic) + { + setState(commandList, D3D12_RESOURCE_STATE_GENERIC_READ); + } + else + { + setState(commandList, D3D12_RESOURCE_STATE_COPY_DEST); + + D3D12_SUBRESOURCE_DATA subresource; + subresource.pData = _data; + subresource.RowPitch = _size; + subresource.SlicePitch = subresource.RowPitch; + + UpdateSubresources<1>(commandList + , m_ptr + , m_staging + , 0 + , 0 + , 1 + , &subresource + ); + + setState(commandList, D3D12_RESOURCE_STATE_GENERIC_READ); + } + } + + void BufferD3D12::update(ID3D12GraphicsCommandList* _commandList, uint32_t /*_offset*/, uint32_t _size, void* _data, bool /*_discard*/) + { + setState(_commandList, D3D12_RESOURCE_STATE_COPY_DEST); + + D3D12_SUBRESOURCE_DATA subresource; + subresource.pData = _data; + subresource.RowPitch = _size; + subresource.SlicePitch = subresource.RowPitch; + + UpdateSubresources<1>(_commandList + , m_ptr + , m_staging + , 0 + , 0 + , 1 + , &subresource + ); + + setState(_commandList, D3D12_RESOURCE_STATE_GENERIC_READ); + } + + void BufferD3D12::setState(ID3D12GraphicsCommandList* _commandList, D3D12_RESOURCE_STATES _state) + { + if (m_state != _state) + { + setResourceBarrier(_commandList + , m_ptr + , m_state + , _state + ); + + m_state = _state; + } + } + + void VertexBufferD3D12::create(uint32_t _size, void* _data, VertexDeclHandle _declHandle, uint16_t _flags) + { + BufferD3D12::create(_size, _data, _flags, true); + m_decl = _declHandle; + } + + void ShaderD3D12::create(const Memory* _mem) + { + bx::MemoryReader reader(_mem->data, _mem->size); + + uint32_t magic; + bx::read(&reader, magic); + + switch (magic) + { + case BGFX_CHUNK_MAGIC_CSH: + case BGFX_CHUNK_MAGIC_FSH: + case BGFX_CHUNK_MAGIC_VSH: + break; + + default: + BGFX_FATAL(false, Fatal::InvalidShader, "Unknown shader format %x.", magic); + break; + } + + bool fragment = BGFX_CHUNK_MAGIC_FSH == magic; + + uint32_t iohash; + bx::read(&reader, iohash); + + uint16_t count; + bx::read(&reader, count); + + m_numPredefined = 0; + m_numUniforms = count; + + BX_TRACE("%s Shader consts %d" + , BGFX_CHUNK_MAGIC_FSH == magic ? "Fragment" : BGFX_CHUNK_MAGIC_VSH == magic ? "Vertex" : "Compute" + , count + ); + + uint8_t fragmentBit = fragment ? BGFX_UNIFORM_FRAGMENTBIT : 0; + + if (0 < count) + { + for (uint32_t ii = 0; ii < count; ++ii) + { + uint8_t nameSize; + bx::read(&reader, nameSize); + + char name[256]; + bx::read(&reader, &name, nameSize); + name[nameSize] = '\0'; + + uint8_t type; + bx::read(&reader, type); + + uint8_t num; + bx::read(&reader, num); + + uint16_t regIndex; + bx::read(&reader, regIndex); + + uint16_t regCount; + bx::read(&reader, regCount); + + const char* kind = "invalid"; + + PredefinedUniform::Enum predefined = nameToPredefinedUniformEnum(name); + if (PredefinedUniform::Count != predefined) + { + kind = "predefined"; + m_predefined[m_numPredefined].m_loc = regIndex; + m_predefined[m_numPredefined].m_count = regCount; + m_predefined[m_numPredefined].m_type = predefined|fragmentBit; + m_numPredefined++; + } + else + { + const UniformInfo* info = s_renderD3D12->m_uniformReg.find(name); + + if (NULL != info) + { + if (NULL == m_constantBuffer) + { + m_constantBuffer = ConstantBuffer::create(1024); + } + + kind = "user"; + m_constantBuffer->writeUniformHandle( (UniformType::Enum)(type|fragmentBit), regIndex, info->m_handle, regCount); + } + } + + BX_TRACE("\t%s: %s (%s), num %2d, r.index %3d, r.count %2d" + , kind + , name + , getUniformTypeName(UniformType::Enum(type&~BGFX_UNIFORM_FRAGMENTBIT) ) + , num + , regIndex + , regCount + ); + BX_UNUSED(kind); + } + + if (NULL != m_constantBuffer) + { + m_constantBuffer->finish(); + } + } + + uint16_t shaderSize; + bx::read(&reader, shaderSize); + + const DWORD* code = (const DWORD*)reader.getDataPtr(); + bx::skip(&reader, shaderSize+1); + + m_code = copy(code, shaderSize); + + uint8_t numAttrs; + bx::read(&reader, numAttrs); + + memset(m_attrMask, 0, sizeof(m_attrMask) ); + + for (uint32_t ii = 0; ii < numAttrs; ++ii) + { + uint16_t id; + bx::read(&reader, id); + + Attrib::Enum attr = idToAttrib(id); + + if (Attrib::Count != attr) + { + m_attrMask[attr] = 0xff; + } + } + + bx::HashMurmur2A murmur; + murmur.begin(); + murmur.add(iohash); + murmur.add(code, shaderSize); + murmur.add(numAttrs); + murmur.add(m_attrMask, numAttrs); + m_hash = murmur.end(); + + bx::read(&reader, m_size); + } + + void TextureD3D12::create(const Memory* _mem, uint32_t _flags, uint8_t _skip) + { + ImageContainer imageContainer; + + if (imageParse(imageContainer, _mem->data, _mem->size) ) + { + uint8_t numMips = imageContainer.m_numMips; + const uint32_t startLod = bx::uint32_min(_skip, numMips-1); + numMips -= startLod; + const ImageBlockInfo& blockInfo = getBlockInfo(TextureFormat::Enum(imageContainer.m_format) ); + const uint32_t textureWidth = bx::uint32_max(blockInfo.blockWidth, imageContainer.m_width >>startLod); + const uint32_t textureHeight = bx::uint32_max(blockInfo.blockHeight, imageContainer.m_height>>startLod); + + m_flags = _flags; + m_requestedFormat = (uint8_t)imageContainer.m_format; + m_textureFormat = (uint8_t)imageContainer.m_format; + + const TextureFormatInfo& tfi = s_textureFormat[m_requestedFormat]; + const bool convert = DXGI_FORMAT_UNKNOWN == tfi.m_fmt; + + uint8_t bpp = getBitsPerPixel(TextureFormat::Enum(m_textureFormat) ); + if (convert) + { + m_textureFormat = (uint8_t)TextureFormat::BGRA8; + bpp = 32; + } + + if (imageContainer.m_cubeMap) + { + m_type = TextureCube; + } + else if (imageContainer.m_depth > 1) + { + m_type = Texture3D; + } + else + { + m_type = Texture2D; + } + + m_numMips = numMips; + const uint32_t numSides = imageContainer.m_cubeMap ? 6 : 1; + + uint32_t numSrd = numMips*numSides; + D3D12_SUBRESOURCE_DATA* srd = (D3D12_SUBRESOURCE_DATA*)alloca(numSrd*sizeof(D3D12_SUBRESOURCE_DATA) ); + + uint32_t kk = 0; + + const bool compressed = isCompressed(TextureFormat::Enum(m_textureFormat) ); + const bool swizzle = TextureFormat::BGRA8 == m_textureFormat && 0 != (m_flags&BGFX_TEXTURE_COMPUTE_WRITE); + uint32_t blockWidth = 1; + uint32_t blockHeight = 1; + + if (convert && compressed) + { + blockWidth = blockInfo.blockWidth; + blockHeight = blockInfo.blockHeight; + } + + const bool bufferOnly = 0 != (m_flags&BGFX_TEXTURE_RT_BUFFER_ONLY); + const bool computeWrite = 0 != (m_flags&BGFX_TEXTURE_COMPUTE_WRITE); + const bool renderTarget = 0 != (m_flags&BGFX_TEXTURE_RT_MASK); + + BX_TRACE("Texture %3d: %s (requested: %s), %dx%d%s RT[%c], BO[%c], CW[%c]%s." + , this - s_renderD3D12->m_textures + , getName( (TextureFormat::Enum)m_textureFormat) + , getName( (TextureFormat::Enum)m_requestedFormat) + , textureWidth + , textureHeight + , imageContainer.m_cubeMap ? "x6" : "" + , renderTarget ? 'x' : ' ' + , bufferOnly ? 'x' : ' ' + , computeWrite ? 'x' : ' ' + , swizzle ? " (swizzle BGRA8 -> RGBA8)" : "" + ); + + uint32_t totalSize = 0; + + for (uint8_t side = 0; side < numSides; ++side) + { + uint32_t width = textureWidth; + uint32_t height = textureHeight; + uint32_t depth = imageContainer.m_depth; + + for (uint32_t lod = 0; lod < numMips; ++lod) + { + width = bx::uint32_max(blockWidth, width); + height = bx::uint32_max(blockHeight, height); + depth = bx::uint32_max(1, depth); + + ImageMip mip; + if (imageGetRawData(imageContainer, side, lod+startLod, _mem->data, _mem->size, mip) ) + { + if (convert) + { + const uint32_t pitch = bx::strideAlign(width*bpp / 8, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + const uint32_t slice = bx::strideAlign(pitch * height, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT); + + uint8_t* temp = (uint8_t*)BX_ALLOC(g_allocator, slice); + imageDecodeToBgra8(temp + , mip.m_data + , mip.m_width + , mip.m_height + , pitch, mip.m_format + ); + + srd[kk].pData = temp; + srd[kk].RowPitch = pitch; + srd[kk].SlicePitch = slice; + totalSize += slice; + } + else if (compressed) + { + uint32_t pitch = bx::strideAlign( (mip.m_width /blockInfo.blockWidth )*mip.m_blockSize, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + uint32_t slice = bx::strideAlign( (mip.m_height/blockInfo.blockHeight)*pitch, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT); + + uint8_t* temp = (uint8_t*)BX_ALLOC(g_allocator, slice); + imageCopy(mip.m_height/blockInfo.blockHeight + , (mip.m_width /blockInfo.blockWidth )*mip.m_blockSize + , mip.m_data + , pitch + , temp + ); + + srd[kk].pData = temp; + srd[kk].RowPitch = pitch; + srd[kk].SlicePitch = slice; + totalSize += slice; + } + else + { + const uint32_t pitch = bx::strideAlign(mip.m_width*mip.m_bpp / 8, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + const uint32_t slice = bx::strideAlign(pitch * mip.m_height, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT); + + uint8_t* temp = (uint8_t*)BX_ALLOC(g_allocator, slice); + imageCopy(mip.m_height + , mip.m_width*mip.m_bpp / 8 + , mip.m_data + , pitch + , temp + ); + + srd[kk].pData = temp; + srd[kk].RowPitch = pitch; + srd[kk].SlicePitch = slice; + totalSize += slice; + } + + if (swizzle) + { +// imageSwizzleBgra8(width, height, mip.m_width*4, data, temp); + } + + srd[kk].SlicePitch = mip.m_height*srd[kk].RowPitch; + ++kk; + } + else + { + const uint32_t pitch = bx::strideAlign(width*bpp / 8, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + const uint32_t slice = bx::strideAlign(pitch * height, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT); + totalSize += slice; + } + + width >>= 1; + height >>= 1; + depth >>= 1; + } + } + + BX_TRACE("texture total size: %d", totalSize); + + const uint32_t msaaQuality = bx::uint32_satsub( (m_flags&BGFX_TEXTURE_RT_MSAA_MASK)>>BGFX_TEXTURE_RT_MSAA_SHIFT, 1); + const DXGI_SAMPLE_DESC& msaa = s_msaa[msaaQuality]; + + memset(&m_srvd, 0, sizeof(m_srvd) ); + m_srvd.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + m_srvd.Format = s_textureFormat[m_textureFormat].m_fmtSrv; + DXGI_FORMAT format = s_textureFormat[m_textureFormat].m_fmt; + if (swizzle) + { + format = DXGI_FORMAT_R8G8B8A8_UNORM; + m_srvd.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + } + + m_uavd.Format = m_srvd.Format; + + ID3D12Device* device = s_renderD3D12->m_device; + ID3D12GraphicsCommandList* commandList = s_renderD3D12->m_commandList; + + D3D12_RESOURCE_DESC resourceDesc; + resourceDesc.Alignment = 0; + resourceDesc.Width = textureWidth; + resourceDesc.Height = textureHeight; + resourceDesc.MipLevels = numMips; + resourceDesc.Format = format; + resourceDesc.SampleDesc = msaa; + resourceDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; + resourceDesc.Flags = D3D12_RESOURCE_FLAG_NONE; + resourceDesc.DepthOrArraySize = numSides; + + D3D12_RESOURCE_STATES state = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE; + + D3D12_CLEAR_VALUE* clearValue = NULL; + if (isDepth(TextureFormat::Enum(m_textureFormat) ) ) + { + resourceDesc.Format = s_textureFormat[m_textureFormat].m_fmt; + resourceDesc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL; + state |= D3D12_RESOURCE_STATE_DEPTH_WRITE; + state &= ~D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE; + + clearValue = (D3D12_CLEAR_VALUE*)alloca(sizeof(D3D12_CLEAR_VALUE) ); + clearValue->Format = s_textureFormat[m_textureFormat].m_fmtDsv; + clearValue->DepthStencil.Depth = 1.0f; + clearValue->DepthStencil.Stencil = 0; + } + else if (renderTarget) + { + clearValue = (D3D12_CLEAR_VALUE*)alloca(sizeof(D3D12_CLEAR_VALUE) ); + clearValue->Format = resourceDesc.Format; + clearValue->Color[0] = 0.0f; + clearValue->Color[1] = 0.0f; + clearValue->Color[2] = 0.0f; + clearValue->Color[3] = 0.0f; + + resourceDesc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; + } + + if (bufferOnly) + { + resourceDesc.Flags |= D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE; + state &= ~D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE; + } + + if (computeWrite) + { + resourceDesc.Flags &= ~D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + } + + switch (m_type) + { + case Texture2D: + resourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; + m_srvd.ViewDimension = 1 < msaa.Count ? D3D12_SRV_DIMENSION_TEXTURE2DMS : D3D12_SRV_DIMENSION_TEXTURE2D; + m_srvd.Texture2D.MostDetailedMip = 0; + m_srvd.Texture2D.MipLevels = numMips; + m_srvd.Texture2D.ResourceMinLODClamp = 0.0f; + + m_uavd.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D; + m_uavd.Texture2D.MipSlice = 0; + m_uavd.Texture2D.PlaneSlice = 0; + break; + + case Texture3D: + resourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE3D; + m_srvd.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE3D; + m_srvd.Texture3D.MostDetailedMip = 0; + m_srvd.Texture3D.MipLevels = numMips; + m_srvd.Texture3D.ResourceMinLODClamp = 0.0f; + + m_uavd.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE3D; + m_uavd.Texture3D.MipSlice = 0; + m_uavd.Texture3D.FirstWSlice = 0; + m_uavd.Texture3D.WSize = 0; + break; + + case TextureCube: + resourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; + m_srvd.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBE; + m_srvd.TextureCube.MostDetailedMip = 0; + m_srvd.TextureCube.MipLevels = numMips; + m_srvd.TextureCube.ResourceMinLODClamp = 0.0f; + + m_uavd.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D; + m_uavd.Texture2D.MipSlice = 0; + m_uavd.Texture2D.PlaneSlice = 0; + break; + } + + m_ptr = createCommittedResource(device, HeapProperty::Default, &resourceDesc, clearValue); + + { + uint64_t uploadBufferSize; + uint32_t* numRows = (uint32_t*)alloca(sizeof(uint32_t)*numSrd); + uint64_t* rowSizeInBytes = (uint64_t*)alloca(sizeof(uint64_t)*numSrd); + D3D12_PLACED_SUBRESOURCE_FOOTPRINT* layouts = (D3D12_PLACED_SUBRESOURCE_FOOTPRINT*)alloca(sizeof(D3D12_PLACED_SUBRESOURCE_FOOTPRINT)*numSrd); + + device->GetCopyableFootprints(&resourceDesc + , 0 + , numSrd + , 0 + , layouts + , numRows + , rowSizeInBytes + , &uploadBufferSize + ); + BX_WARN(uploadBufferSize == totalSize, "uploadBufferSize %d (totalSize %d), numRows %d, rowSizeInBytes %d" + , uploadBufferSize + , totalSize + , numRows[0] + , rowSizeInBytes[0] + ); + } + + if (kk != 0) + { + m_staging = createCommittedResource(device, HeapProperty::Upload, totalSize); + + setState(commandList,D3D12_RESOURCE_STATE_COPY_DEST); + + uint64_t result = UpdateSubresources(commandList + , m_ptr + , m_staging + , 0 + , 0 + , numSrd + , srd + ); + BX_CHECK(0 != result, "Invalid size"); + BX_TRACE("Update subresource %" PRId64, result); + + setState(commandList, state); + } + else + { + m_staging = NULL; + + setState(commandList, state); + } + + if (0 != kk) + { + kk = 0; + for (uint8_t side = 0; side < numSides; ++side) + { + for (uint32_t lod = 0, num = numMips; lod < num; ++lod) + { + BX_FREE(g_allocator, const_cast(srd[kk].pData) ); + ++kk; + } + } + } + } + } + + void TextureD3D12::destroy() + { + if (NULL != m_ptr) + { + DX_RELEASE(m_ptr, 0); + m_ptr = NULL; + + DX_RELEASE(m_staging, 0); + m_staging = NULL; + } + } + + void TextureD3D12::update(ID3D12GraphicsCommandList* _commandList, uint8_t _side, uint8_t _mip, const Rect& _rect, uint16_t _z, uint16_t _depth, uint16_t _pitch, const Memory* _mem) + { + setState(_commandList, D3D12_RESOURCE_STATE_COPY_DEST); + + const uint32_t subres = _mip + (_side * m_numMips); + const uint32_t bpp = getBitsPerPixel(TextureFormat::Enum(m_textureFormat) ); + const uint32_t rectpitch = _rect.m_width*bpp/8; + const uint32_t srcpitch = UINT16_MAX == _pitch ? rectpitch : _pitch; + + s_renderD3D12->m_cmd.finish(s_renderD3D12->m_cmd.kick() ); + s_renderD3D12->m_commandList = s_renderD3D12->m_cmd.alloc(); + _commandList = s_renderD3D12->m_commandList; + + DX_RELEASE(m_staging, 0); + + D3D12_RESOURCE_DESC desc = m_ptr->GetDesc(); + + desc.Height = _rect.m_height; + + uint32_t numRows; + uint64_t rowPitch; + uint64_t totalBytes; + D3D12_PLACED_SUBRESOURCE_FOOTPRINT layout; + s_renderD3D12->m_device->GetCopyableFootprints(&desc + , subres + , 1 + , 0 + , &layout + , &numRows + , &rowPitch + , &totalBytes + ); + + m_staging = createCommittedResource(s_renderD3D12->m_device, HeapProperty::Upload, totalBytes); + DX_NAME(m_staging, "texture %4d: staging, update", this - s_renderD3D12->m_textures); + + uint8_t* data; + + DX_CHECK(m_staging->Map(0, NULL, (void**)&data) ); + for (uint32_t ii = 0, height = _rect.m_height; ii < height; ++ii) + { + memcpy(&data[ii*rowPitch], &_mem->data[ii*srcpitch], srcpitch); + } + m_staging->Unmap(0, NULL); + + D3D12_BOX box; + box.left = 0; + box.top = 0; + box.right = box.left + _rect.m_width; + box.bottom = box.top + _rect.m_height; + box.front = _z; + box.back = _z+_depth; + + D3D12_TEXTURE_COPY_LOCATION dst = { m_ptr, D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, { subres } }; + D3D12_TEXTURE_COPY_LOCATION src = { m_staging, D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT, layout }; + _commandList->CopyTextureRegion(&dst, _rect.m_x, _rect.m_y, 0, &src, &box); + + setState(_commandList, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + } + + void TextureD3D12::commit(uint8_t _stage, uint32_t _flags) + { + BX_UNUSED(_stage, _flags); + } + + void TextureD3D12::resolve() + { + } + + void TextureD3D12::setState(ID3D12GraphicsCommandList* _commandList, D3D12_RESOURCE_STATES _state) + { + if (m_state != _state) + { + setResourceBarrier(_commandList + , m_ptr + , m_state + , _state + ); + + m_state = _state; + } + } + + void FrameBufferD3D12::create(uint8_t _num, const TextureHandle* _handles) + { + m_numTh = _num; + memcpy(m_th, _handles, _num*sizeof(TextureHandle) ); + + postReset(); + } + + void FrameBufferD3D12::create(uint16_t /*_denseIdx*/, void* /*_nwh*/, uint32_t /*_width*/, uint32_t /*_height*/, TextureFormat::Enum /*_depthFormat*/) + { + } + + void FrameBufferD3D12::preReset() + { + } + + void FrameBufferD3D12::postReset() + { + if (m_numTh != 0) + { + ID3D12Device* device = s_renderD3D12->m_device; + + D3D12_CPU_DESCRIPTOR_HANDLE rtvDescriptor = s_renderD3D12->m_rtvDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); + uint32_t rtvDescriptorSize = device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_RTV); + uint32_t fbhIdx = (uint32_t)(this - s_renderD3D12->m_frameBuffers); + rtvDescriptor.ptr += (BX_COUNTOF(s_renderD3D12->m_backBufferColor) + fbhIdx * BGFX_CONFIG_MAX_FRAME_BUFFER_ATTACHMENTS) * rtvDescriptorSize; + + m_depth.idx = bgfx::invalidHandle; + m_num = 0; + for (uint32_t ii = 0; ii < m_numTh; ++ii) + { + TextureHandle handle = m_th[ii]; + if (isValid(handle) ) + { + const TextureD3D12& texture = s_renderD3D12->m_textures[handle.idx]; + if (isDepth( (TextureFormat::Enum)texture.m_textureFormat) ) + { + BX_CHECK(!isValid(m_depth), ""); + m_depth = handle; + D3D12_CPU_DESCRIPTOR_HANDLE dsvDescriptor = s_renderD3D12->m_dsvDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); + uint32_t dsvDescriptorSize = device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_DSV); + dsvDescriptor.ptr += (1 + fbhIdx) * dsvDescriptorSize; + + const ImageBlockInfo& blockInfo = getBlockInfo(TextureFormat::Enum(texture.m_textureFormat) ); + BX_UNUSED(blockInfo); + + D3D12_DEPTH_STENCIL_VIEW_DESC dsvDesc; + ZeroMemory(&dsvDesc, sizeof(dsvDesc) ); + dsvDesc.Format = s_textureFormat[texture.m_textureFormat].m_fmtDsv; + dsvDesc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2D; + dsvDesc.Flags = D3D12_DSV_FLAG_NONE +// | (blockInfo.depthBits > 0 ? D3D12_DSV_FLAG_READ_ONLY_DEPTH : D3D12_DSV_FLAG_NONE) +// | (blockInfo.stencilBits > 0 ? D3D12_DSV_FLAG_READ_ONLY_STENCIL : D3D12_DSV_FLAG_NONE) + ; + + device->CreateDepthStencilView(texture.m_ptr + , &dsvDesc + , dsvDescriptor + ); + } + else + { + m_texture[m_num] = handle; + D3D12_CPU_DESCRIPTOR_HANDLE rtv = { rtvDescriptor.ptr + m_num * rtvDescriptorSize }; + device->CreateRenderTargetView(texture.m_ptr + , NULL + , rtv + ); + m_num++; + } + } + } + } + } + + uint16_t FrameBufferD3D12::destroy() + { + m_numTh = 0; + + m_depth.idx = bgfx::invalidHandle; + + uint16_t denseIdx = m_denseIdx; + m_denseIdx = UINT16_MAX; + + return denseIdx; + } + + void FrameBufferD3D12::resolve() + { + } + + void FrameBufferD3D12::clear(ID3D12GraphicsCommandList* _commandList, const Clear& _clear, const float _palette[][4], const D3D12_RECT* _rect, uint32_t _num) + { + ID3D12Device* device = s_renderD3D12->m_device; + const uint32_t fbhIdx = (uint32_t)(this - s_renderD3D12->m_frameBuffers); + + if (BGFX_CLEAR_COLOR & _clear.m_flags + && 0 != m_num) + { + D3D12_CPU_DESCRIPTOR_HANDLE rtvDescriptor = s_renderD3D12->m_rtvDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); + uint32_t rtvDescriptorSize = device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_RTV); + rtvDescriptor.ptr += (BX_COUNTOF(s_renderD3D12->m_backBufferColor) + fbhIdx * BGFX_CONFIG_MAX_FRAME_BUFFER_ATTACHMENTS) * rtvDescriptorSize; + + if (BGFX_CLEAR_COLOR_USE_PALETTE & _clear.m_flags) + { + for (uint32_t ii = 0, num = m_num; ii < num; ++ii) + { + uint8_t index = _clear.m_index[ii]; + if (UINT8_MAX != index) + { + D3D12_CPU_DESCRIPTOR_HANDLE rtv = { rtvDescriptor.ptr + ii * rtvDescriptorSize }; + _commandList->ClearRenderTargetView(rtv + , _palette[index] + , _num + , _rect + ); + } + } + } + else + { + float frgba[4] = + { + _clear.m_index[0]*1.0f/255.0f, + _clear.m_index[1]*1.0f/255.0f, + _clear.m_index[2]*1.0f/255.0f, + _clear.m_index[3]*1.0f/255.0f, + }; + for (uint32_t ii = 0, num = m_num; ii < num; ++ii) + { + D3D12_CPU_DESCRIPTOR_HANDLE rtv = { rtvDescriptor.ptr + ii * rtvDescriptorSize }; + _commandList->ClearRenderTargetView(rtv + , frgba + , _num + , _rect + ); + } + } + } + + if (isValid(m_depth) + && (BGFX_CLEAR_DEPTH|BGFX_CLEAR_STENCIL) & _clear.m_flags) + { + D3D12_CPU_DESCRIPTOR_HANDLE dsvDescriptor = s_renderD3D12->m_dsvDescriptorHeap->GetCPUDescriptorHandleForHeapStart(); + uint32_t dsvDescriptorSize = device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_DSV); + dsvDescriptor.ptr += (1 + fbhIdx) * dsvDescriptorSize; + + DWORD flags = 0; + flags |= (_clear.m_flags & BGFX_CLEAR_DEPTH) ? D3D12_CLEAR_FLAG_DEPTH : 0; + flags |= (_clear.m_flags & BGFX_CLEAR_STENCIL) ? D3D12_CLEAR_FLAG_STENCIL : 0; + + _commandList->ClearDepthStencilView(dsvDescriptor + , D3D12_CLEAR_FLAGS(flags) + , _clear.m_depth + , _clear.m_stencil + , _num + , _rect + ); + } + } + + void RendererContextD3D12::submit(Frame* _render, ClearQuad& _clearQuad, TextVideoMemBlitter& _textVideoMemBlitter) + { +// PIX_BEGINEVENT(D3DCOLOR_RGBA(0xff, 0x00, 0x00, 0xff), L"rendererSubmit"); + + updateResolution(_render->m_resolution); + + int64_t elapsed = -bx::getHPCounter(); + int64_t captureElapsed = 0; + + if (0 < _render->m_iboffset) + { + TransientIndexBuffer* ib = _render->m_transientIb; + m_indexBuffers[ib->handle.idx].update(m_commandList, 0, _render->m_iboffset, ib->data); + } + + if (0 < _render->m_vboffset) + { + TransientVertexBuffer* vb = _render->m_transientVb; + m_vertexBuffers[vb->handle.idx].update(m_commandList, 0, _render->m_vboffset, vb->data); + } + + _render->sort(); + + RenderDraw currentState; + currentState.clear(); + currentState.m_flags = BGFX_STATE_NONE; + currentState.m_stencil = packStencil(BGFX_STENCIL_NONE, BGFX_STENCIL_NONE); + + _render->m_hmdInitialized = false; + + const bool hmdEnabled = false; + ViewState viewState(_render, hmdEnabled); + viewState.reset(_render, hmdEnabled); + +// bool wireframe = !!(_render->m_debug&BGFX_DEBUG_WIREFRAME); +// bool scissorEnabled = false; +// setDebugWireframe(wireframe); + + uint16_t programIdx = invalidHandle; + ID3D12PipelineState* currentPso = NULL; + SortKey key; + uint8_t view = 0xff; + FrameBufferHandle fbh = BGFX_INVALID_HANDLE; + float alphaRef = 0.0f; + +// const uint64_t pt = _render->m_debug&BGFX_DEBUG_WIREFRAME ? BGFX_STATE_PT_LINES : 0; +// uint8_t primIndex = uint8_t(pt >> BGFX_STATE_PT_SHIFT); +// PrimInfo prim = s_primInfo[primIndex]; +// deviceCtx->IASetPrimitiveTopology(prim.m_type); + + bool wasCompute = false; + bool viewHasScissor = false; + Rect viewScissorRect; + viewScissorRect.clear(); + + BX_UNUSED(alphaRef); + + uint32_t statsNumPrimsSubmitted[BX_COUNTOF(s_primInfo)] = {}; + uint32_t statsNumPrimsRendered[BX_COUNTOF(s_primInfo)] = {}; + uint32_t statsNumInstances[BX_COUNTOF(s_primInfo)] = {}; + uint32_t statsNumIndices = 0; + uint32_t statsKeyType[2] = {}; + + m_backBufferColorIdx = m_frame % m_scd.BufferCount; + m_frame++; + + const uint64_t f0 = BGFX_STATE_BLEND_FUNC(BGFX_STATE_BLEND_FACTOR, BGFX_STATE_BLEND_FACTOR); + const uint64_t f1 = BGFX_STATE_BLEND_FUNC(BGFX_STATE_BLEND_INV_FACTOR, BGFX_STATE_BLEND_INV_FACTOR); + + D3D12_GPU_DESCRIPTOR_HANDLE gpuHandle; + m_scratchBuffer[m_backBufferColorIdx].reset(gpuHandle); + + setResourceBarrier(m_commandList + , m_backBufferColor[m_backBufferColorIdx] + , D3D12_RESOURCE_STATE_PRESENT + , D3D12_RESOURCE_STATE_RENDER_TARGET + ); + + if (0 == (_render->m_debug&BGFX_DEBUG_IFH) ) + { +// uint8_t eye = 0; +// uint8_t restartState = 0; + viewState.m_rect = _render->m_rect[0]; + + int32_t numItems = _render->m_num; + for (int32_t item = 0, restartItem = numItems; item < numItems || restartItem < numItems;) + { + const bool isCompute = key.decode(_render->m_sortKeys[item], _render->m_viewRemap); + statsKeyType[isCompute]++; + + const bool viewChanged = 0 + || key.m_view != view + || item == numItems + ; + + const RenderItem& renderItem = _render->m_renderItem[_render->m_sortValues[item] ]; + ++item; + + if (viewChanged) + { + kick(); + + if (isCompute) + { + m_commandList->SetComputeRootSignature(m_rootSignature); + } + else + { + m_commandList->SetGraphicsRootSignature(m_rootSignature); + } + + ID3D12DescriptorHeap* heaps[] = { + m_samplerAllocator.getHeap(), + m_scratchBuffer[m_backBufferColorIdx].getHeap(), + }; + m_commandList->SetDescriptorHeaps(BX_COUNTOF(heaps), heaps); + + view = key.m_view; + programIdx = invalidHandle; + currentPso = NULL; + + fbh = _render->m_fb[view]; + setFrameBuffer(fbh); + + viewState.m_rect = _render->m_rect[view]; + const Rect& rect = _render->m_rect[view]; + const Rect& scissorRect = _render->m_scissor[view]; + viewHasScissor = !scissorRect.isZero(); + viewScissorRect = viewHasScissor ? scissorRect : rect; + + D3D12_VIEWPORT vp; + vp.TopLeftX = rect.m_x; + vp.TopLeftY = rect.m_y; + vp.Width = rect.m_width; + vp.Height = rect.m_height; + vp.MinDepth = 0.0f; + vp.MaxDepth = 1.0f; + m_commandList->RSSetViewports(1, &vp); + + D3D12_RECT rc; + rc.left = viewScissorRect.m_x; + rc.top = viewScissorRect.m_y; + rc.right = viewScissorRect.m_x + viewScissorRect.m_width; + rc.bottom = viewScissorRect.m_y + viewScissorRect.m_height; + m_commandList->RSSetScissorRects(1, &rc); + + Clear& clear = _render->m_clear[view]; + if (BGFX_CLEAR_NONE != clear.m_flags) + { + Rect clearRect = rect; + clearRect.intersect(rect, viewScissorRect); + clearQuad(_clearQuad, clearRect, clear, _render->m_clearColor); + } + } + + if (isCompute) + { + if (!wasCompute) + { + wasCompute = true; + } + const RenderCompute& compute = renderItem.compute; + + bool programChanged = false; + bool constantsChanged = compute.m_constBegin < compute.m_constEnd; + rendererUpdateUniforms(this, _render->m_constantBuffer, compute.m_constBegin, compute.m_constEnd); + + if (key.m_program != programIdx) + { + programIdx = key.m_program; + + ProgramD3D12& program = m_program[key.m_program]; + m_currentProgram = &program; + + programChanged = + constantsChanged = true; + } + + if (invalidHandle != programIdx) + { + ProgramD3D12& program = m_program[programIdx]; + + if (constantsChanged) + { + ConstantBuffer* vcb = program.m_vsh->m_constantBuffer; + if (NULL != vcb) + { + commit(*vcb); + } + } + + viewState.setPredefined<4>(this, view, 0, program, _render, compute); + + if (constantsChanged + || program.m_numPredefined > 0) + { + commitShaderConstants(gpuHandle); + } + } + + if (programChanged) + { + ID3D12PipelineState* pso = getPipelineState(programIdx); + m_commandList->SetPipelineState(pso); + } + + D3D12_GPU_DESCRIPTOR_HANDLE srvHandle[BGFX_MAX_COMPUTE_BINDINGS] = {}; + uint32_t samplerFlags[BGFX_MAX_COMPUTE_BINDINGS] = {}; + + for (uint32_t ii = 0; ii < BGFX_MAX_COMPUTE_BINDINGS; ++ii) + { + const Binding& bind = compute.m_bind[ii]; + if (invalidHandle != bind.m_idx) + { + switch (bind.m_type) + { + case Binding::Image: + { + TextureD3D12& texture = m_textures[bind.m_idx]; + + if (Access::Read != bind.m_un.m_compute.m_access) + { + m_scratchBuffer[m_backBufferColorIdx].allocUav(srvHandle[ii], texture); + } + else + { + m_scratchBuffer[m_backBufferColorIdx].alloc(srvHandle[ii], texture); + } + } + break; + + case Binding::IndexBuffer: + case Binding::VertexBuffer: + { + BufferD3D12& buffer = Binding::IndexBuffer == bind.m_type + ? m_indexBuffers[bind.m_idx] + : m_vertexBuffers[bind.m_idx] + ; + + if (Access::Read != bind.m_un.m_compute.m_access) + { + m_scratchBuffer[m_backBufferColorIdx].allocUav(srvHandle[ii], buffer); + } + else + { + m_scratchBuffer[m_backBufferColorIdx].alloc(srvHandle[ii], buffer); + } + } + break; + } + } + } + + uint16_t samplerStateIdx = getSamplerState(samplerFlags, BGFX_MAX_COMPUTE_BINDINGS); + m_commandList->SetComputeRootDescriptorTable(Rdt::Sampler, m_samplerAllocator.get(samplerStateIdx)); + m_commandList->SetComputeRootDescriptorTable(Rdt::SRV, srvHandle[0]); + m_commandList->SetComputeRootDescriptorTable(Rdt::CBV, gpuHandle); + m_commandList->SetComputeRootDescriptorTable(Rdt::UAV, srvHandle[0]); + + if (isValid(compute.m_indirectBuffer) ) + { + const VertexBufferD3D12& vb = m_vertexBuffers[compute.m_indirectBuffer.idx]; + + uint32_t numDrawIndirect = UINT16_MAX == compute.m_numIndirect + ? vb.m_size/BGFX_CONFIG_DRAW_INDIRECT_STRIDE + : compute.m_numIndirect + ; + + uint32_t args = compute.m_startIndirect * BGFX_CONFIG_DRAW_INDIRECT_STRIDE; + for (uint32_t ii = 0; ii < numDrawIndirect; ++ii) + { +// deviceCtx->DispatchIndirect(ptr, args); + args += BGFX_CONFIG_DRAW_INDIRECT_STRIDE; + } + } + else + { + m_commandList->Dispatch(compute.m_numX, compute.m_numY, compute.m_numZ); + } + + continue; + } + +// bool resetState = viewChanged || wasCompute; + + if (wasCompute) + { + if (BX_ENABLED(BGFX_CONFIG_DEBUG_PIX) ) + { +// wchar_t* viewNameW = s_viewNameW[view]; +// viewNameW[3] = L' '; +// PIX_ENDEVENT(); +// PIX_BEGINEVENT(D3DCOLOR_RGBA(0xff, 0x00, 0x00, 0xff), viewNameW); + } + + wasCompute = false; + + programIdx = invalidHandle; + m_currentProgram = NULL; + + m_commandList->SetGraphicsRootSignature(m_rootSignature); + ID3D12DescriptorHeap* heaps[] = { + m_samplerAllocator.getHeap(), + m_scratchBuffer[m_backBufferColorIdx].getHeap(), + }; + m_commandList->SetDescriptorHeaps(BX_COUNTOF(heaps), heaps); + +// invalidateCompute(); + } + + const RenderDraw& draw = renderItem.draw; + + bool constantsChanged = draw.m_constBegin < draw.m_constEnd; + rendererUpdateUniforms(this, _render->m_constantBuffer, draw.m_constBegin, draw.m_constEnd); + + if (isValid(draw.m_vertexBuffer) ) + { + bool programChanged = false; + + if (key.m_program != programIdx) + { + programIdx = key.m_program; + + if (invalidHandle == programIdx) + { + m_currentProgram = NULL; + } + else + { + ProgramD3D12& program = m_program[programIdx]; + m_currentProgram = &program; + } + + programChanged = + constantsChanged = true; + } + + if (invalidHandle != programIdx) + { + ProgramD3D12& program = m_program[programIdx]; + + if (constantsChanged) + { + ConstantBuffer* vcb = program.m_vsh->m_constantBuffer; + if (NULL != vcb) + { + commit(*vcb); + } + + ConstantBuffer* fcb = program.m_fsh->m_constantBuffer; + if (NULL != fcb) + { + commit(*fcb); + } + } + + viewState.setPredefined<4>(this, view, 0, program, _render, draw); + + if (constantsChanged + || program.m_numPredefined > 0) + { + commitShaderConstants(gpuHandle); + } + } + + const VertexBufferD3D12& vb = m_vertexBuffers[draw.m_vertexBuffer.idx]; + uint16_t declIdx = !isValid(vb.m_decl) ? draw.m_vertexDecl.idx : vb.m_decl.idx; + const VertexDecl& vertexDecl = m_vertexDecls[declIdx]; + + const uint64_t state = draw.m_flags; + ID3D12PipelineState* pso = + getPipelineState(state + , draw.m_stencil + , declIdx + , programIdx + , draw.m_instanceDataStride/16 + ); + if (pso != currentPso) + { + currentPso = pso; + m_commandList->SetPipelineState(pso); + } + + const uint32_t fstencil = unpackStencil(0, draw.m_stencil); + const uint32_t ref = (fstencil&BGFX_STENCIL_FUNC_REF_MASK)>>BGFX_STENCIL_FUNC_REF_SHIFT; + m_commandList->OMSetStencilRef(ref); + + bool hasFactor = 0 + || f0 == (state & f0) + || f1 == (state & f1) + ; + + if (hasFactor) + { + float blendFactor[4] = { 1.0f, 1.0f, 1.0f, 1.0f }; + blendFactor[0] = ( (draw.m_rgba>>24) )/255.0f; + blendFactor[1] = ( (draw.m_rgba>>16)&0xff)/255.0f; + blendFactor[2] = ( (draw.m_rgba>> 8)&0xff)/255.0f; + blendFactor[3] = ( (draw.m_rgba )&0xff)/255.0f; + m_commandList->OMSetBlendFactor(blendFactor); + } + + D3D12_GPU_DESCRIPTOR_HANDLE srvHandle[BGFX_CONFIG_MAX_TEXTURE_SAMPLERS]; + uint32_t samplerFlags[BGFX_CONFIG_MAX_TEXTURE_SAMPLERS]; + { + srvHandle[0].ptr = 0; + for (uint32_t stage = 0; stage < BGFX_CONFIG_MAX_TEXTURE_SAMPLERS; ++stage) + { + const Binding& sampler = draw.m_bind[stage]; + if (invalidHandle != sampler.m_idx) + { + TextureD3D12& texture = m_textures[sampler.m_idx]; + m_scratchBuffer[m_backBufferColorIdx].alloc(srvHandle[stage], texture); + samplerFlags[stage] = (0 == (BGFX_SAMPLER_DEFAULT_FLAGS & sampler.m_un.m_draw.m_flags) + ? sampler.m_un.m_draw.m_flags + : texture.m_flags + ) & BGFX_TEXTURE_SAMPLER_BITS_MASK + ; + } + else + { + memcpy(&srvHandle[stage], &srvHandle[0], sizeof(D3D12_GPU_DESCRIPTOR_HANDLE) ); + samplerFlags[stage] = 0; + } + } + } + + uint16_t samplerStateIdx = getSamplerState(samplerFlags); + m_commandList->SetGraphicsRootDescriptorTable(Rdt::Sampler, m_samplerAllocator.get(samplerStateIdx) ); + if (srvHandle[0].ptr != 0) + { + m_commandList->SetGraphicsRootDescriptorTable(Rdt::SRV, srvHandle[0]); + } + m_commandList->SetGraphicsRootDescriptorTable(Rdt::CBV, gpuHandle); + + uint32_t numVertices = draw.m_numVertices; + if (UINT32_MAX == numVertices) + { + numVertices = vb.m_size / vertexDecl.m_stride; + } + + D3D12_VERTEX_BUFFER_VIEW vbView[2]; + uint32_t numVertexBuffers = 1; + vbView[0].BufferLocation = vb.m_ptr->GetGPUVirtualAddress(); + vbView[0].StrideInBytes = vertexDecl.m_stride; + vbView[0].SizeInBytes = vb.m_size; + + if (isValid(draw.m_instanceDataBuffer) ) + { + const VertexBufferD3D12& inst = m_vertexBuffers[draw.m_instanceDataBuffer.idx]; + vbView[1].BufferLocation = inst.m_ptr->GetGPUVirtualAddress() + draw.m_instanceDataOffset; + vbView[1].StrideInBytes = draw.m_instanceDataStride; + vbView[1].SizeInBytes = draw.m_numInstances * draw.m_instanceDataStride; + ++numVertexBuffers; + } + + m_commandList->IASetVertexBuffers(0, numVertexBuffers, vbView); + + uint32_t numIndices = 0; + uint32_t numPrimsSubmitted = 0; + uint32_t numInstances = 0; + uint32_t numPrimsRendered = 0; + + const uint64_t pt = draw.m_flags&BGFX_STATE_PT_MASK; + uint8_t primIdx = uint8_t(pt >> BGFX_STATE_PT_SHIFT); + PrimInfo prim = s_primInfo[primIdx]; + m_commandList->IASetPrimitiveTopology(prim.m_toplogy); + + if (isValid(draw.m_indexBuffer) ) + { + const BufferD3D12& ib = m_indexBuffers[draw.m_indexBuffer.idx]; + const bool hasIndex16 = 0 == (ib.m_flags & BGFX_BUFFER_INDEX32); + + D3D12_INDEX_BUFFER_VIEW ibv; + ibv.Format = hasIndex16 + ? DXGI_FORMAT_R16_UINT + : DXGI_FORMAT_R32_UINT + ; + ibv.BufferLocation = ib.m_ptr->GetGPUVirtualAddress(); + ibv.SizeInBytes = ib.m_size; + m_commandList->IASetIndexBuffer(&ibv); + + if (UINT32_MAX == draw.m_numIndices) + { + const uint32_t indexSize = hasIndex16 ? 2 : 4; + numIndices = ib.m_size / indexSize; + numPrimsSubmitted = numIndices / prim.m_div - prim.m_sub; + numInstances = draw.m_numInstances; + numPrimsRendered = numPrimsSubmitted*draw.m_numInstances; + + m_commandList->DrawIndexedInstanced(numIndices + , draw.m_numInstances + , draw.m_startIndex + , draw.m_startVertex + , 0 + ); + } + else if (prim.m_min <= draw.m_numIndices) + { + numIndices = draw.m_numIndices; + numPrimsSubmitted = numIndices / prim.m_div - prim.m_sub; + numInstances = draw.m_numInstances; + numPrimsRendered = numPrimsSubmitted*draw.m_numInstances; + + m_commandList->DrawIndexedInstanced(numIndices + , draw.m_numInstances + , draw.m_startIndex + , draw.m_startVertex + , 0 + ); + } + } + else + { + numPrimsSubmitted = numVertices / prim.m_div - prim.m_sub; + numInstances = draw.m_numInstances; + numPrimsRendered = numPrimsSubmitted*draw.m_numInstances; + + m_commandList->DrawInstanced(numVertices + , draw.m_numInstances + , draw.m_startVertex + , 0 + ); + } + + statsNumPrimsSubmitted[primIdx] += numPrimsSubmitted; + statsNumPrimsRendered[primIdx] += numPrimsRendered; + statsNumInstances[primIdx] += numInstances; + statsNumIndices += numIndices; + } + } + } + + int64_t now = bx::getHPCounter(); + elapsed += now; + + static int64_t last = now; + int64_t frameTime = now - last; + last = now; + + static int64_t min = frameTime; + static int64_t max = frameTime; + min = min > frameTime ? frameTime : min; + max = max < frameTime ? frameTime : max; + + if (_render->m_debug & (BGFX_DEBUG_IFH | BGFX_DEBUG_STATS) ) + { +// PIX_BEGINEVENT(D3DCOLOR_RGBA(0x40, 0x40, 0x40, 0xff), L"debugstats"); + + TextVideoMem& tvm = m_textVideoMem; + + static int64_t next = now; + + if (now >= next) + { + next = now + bx::getHPFrequency(); + double freq = double(bx::getHPFrequency() ); + double toMs = 1000.0 / freq; + + tvm.clear(); + uint16_t pos = 0; + tvm.printf(0, pos++, BGFX_CONFIG_DEBUG ? 0x89 : 0x8f + , " %s / " BX_COMPILER_NAME " / " BX_CPU_NAME " / " BX_ARCH_NAME " / " BX_PLATFORM_NAME " " + , getRendererName() + ); + + const DXGI_ADAPTER_DESC& desc = m_adapterDesc; + char description[BX_COUNTOF(desc.Description)]; + wcstombs(description, desc.Description, BX_COUNTOF(desc.Description) ); + tvm.printf(0, pos++, 0x0f, " Device: %s", description); + + char dedicatedVideo[16]; + bx::prettify(dedicatedVideo, BX_COUNTOF(dedicatedVideo), desc.DedicatedVideoMemory); + + char dedicatedSystem[16]; + bx::prettify(dedicatedSystem, BX_COUNTOF(dedicatedSystem), desc.DedicatedSystemMemory); + + char sharedSystem[16]; + bx::prettify(sharedSystem, BX_COUNTOF(sharedSystem), desc.SharedSystemMemory); + + tvm.printf(0, pos++, 0x0f, " Memory: %s (video), %s (system), %s (shared)" + , dedicatedVideo + , dedicatedSystem + , sharedSystem + ); + + pos = 10; + tvm.printf(10, pos++, 0x8e, " Frame: %7.3f, % 7.3f \x1f, % 7.3f \x1e [ms] / % 6.2f FPS " + , double(frameTime)*toMs + , double(min)*toMs + , double(max)*toMs + , freq/frameTime + ); + + char hmd[16]; + bx::snprintf(hmd, BX_COUNTOF(hmd), ", [%c] HMD ", hmdEnabled ? '\xfe' : ' '); + + const uint32_t msaa = (m_resolution.m_flags&BGFX_RESET_MSAA_MASK)>>BGFX_RESET_MSAA_SHIFT; + tvm.printf(10, pos++, 0x8e, " Reset flags: [%c] vsync, [%c] MSAAx%d%s, [%c] MaxAnisotropy " + , !!(m_resolution.m_flags&BGFX_RESET_VSYNC) ? '\xfe' : ' ' + , 0 != msaa ? '\xfe' : ' ' + , 1<m_num + , statsKeyType[0] + , statsKeyType[1] + , elapsedCpuMs + ); + + for (uint32_t ii = 0; ii < BX_COUNTOF(s_primName); ++ii) + { + tvm.printf(10, pos++, 0x8e, " %9s: %7d (#inst: %5d), submitted: %7d" + , s_primName[ii] + , statsNumPrimsRendered[ii] + , statsNumInstances[ii] + , statsNumPrimsSubmitted[ii] + ); + } + +// if (NULL != m_renderdocdll) +// { +// tvm.printf(tvm.m_width-27, 0, 0x1f, " [F11 - RenderDoc capture] "); +// } + + tvm.printf(10, pos++, 0x8e, " Indices: %7d", statsNumIndices); + tvm.printf(10, pos++, 0x8e, " DVB size: %7d", _render->m_vboffset); + tvm.printf(10, pos++, 0x8e, " DIB size: %7d", _render->m_iboffset); + + pos++; + tvm.printf(10, pos++, 0x8e, " State cache: "); + tvm.printf(10, pos++, 0x8e, " PSO | Sampler | Queued "); + tvm.printf(10, pos++, 0x8e, " %6d | %6d | %6d" + , m_pipelineStateCache.getCount() + , m_samplerStateCache.getCount() + , m_cmd.m_control.available() + ); + pos++; + + double captureMs = double(captureElapsed)*toMs; + tvm.printf(10, pos++, 0x8e, " Capture: %3.4f [ms]", captureMs); + + uint8_t attr[2] = { 0x89, 0x8a }; + uint8_t attrIndex = _render->m_waitSubmit < _render->m_waitRender; + + tvm.printf(10, pos++, attr[attrIndex&1], " Submit wait: %3.4f [ms]", _render->m_waitSubmit*toMs); + tvm.printf(10, pos++, attr[(attrIndex+1)&1], " Render wait: %3.4f [ms]", _render->m_waitRender*toMs); + + min = frameTime; + max = frameTime; + } + + blit(this, _textVideoMemBlitter, tvm); + +// PIX_ENDEVENT(); + } + else if (_render->m_debug & BGFX_DEBUG_TEXT) + { +// PIX_BEGINEVENT(D3DCOLOR_RGBA(0x40, 0x40, 0x40, 0xff), L"debugtext"); + + blit(this, _textVideoMemBlitter, _render->m_textVideoMem); + +// PIX_ENDEVENT(); + } + + setResourceBarrier(m_commandList + , m_backBufferColor[m_backBufferColorIdx] + , D3D12_RESOURCE_STATE_RENDER_TARGET + , D3D12_RESOURCE_STATE_PRESENT + ); + m_backBufferColorFence[m_backBufferColorIdx] = kick(); + } +} /* namespace d3d12 */ } // namespace bgfx + +#else + +namespace bgfx { RendererContextI* rendererCreate() { diff --git a/src/renderer_d3d12.h b/src/renderer_d3d12.h new file mode 100644 index 00000000..3003db14 --- /dev/null +++ b/src/renderer_d3d12.h @@ -0,0 +1,430 @@ +/* + * Copyright 2011-2015 Branimir Karadzic. All rights reserved. + * License: http://www.opensource.org/licenses/BSD-2-Clause + */ + +#ifndef BGFX_RENDERER_D3D12_H_HEADER_GUARD +#define BGFX_RENDERER_D3D12_H_HEADER_GUARD + +#define USE_D3D12_DYNAMIC_LIB 1 + +#include +#include +#include + +#include "renderer.h" +#include "renderer_d3d.h" +#include "shader_dxbc.h" + +namespace bgfx { namespace d3d12 +{ + struct Rdt + { + enum Enum + { + Sampler, + SRV, + CBV, + UAV, + + Count + }; + }; + + class ScratchBufferD3D12 + { + public: + ScratchBufferD3D12() + { + } + + ~ScratchBufferD3D12() + { + } + + void create(uint32_t _size, uint32_t _maxDescriptors); + void destroy(); + void reset(D3D12_GPU_DESCRIPTOR_HANDLE& gpuHandle); + void* alloc(D3D12_GPU_DESCRIPTOR_HANDLE& gpuHandle, uint32_t _size); + void alloc(D3D12_GPU_DESCRIPTOR_HANDLE& gpuHandle, struct TextureD3D12& _texture); + void allocUav(D3D12_GPU_DESCRIPTOR_HANDLE& gpuHandle, struct TextureD3D12& _texture); + + void alloc(D3D12_GPU_DESCRIPTOR_HANDLE& gpuHandle, struct BufferD3D12& _buffer); + void allocUav(D3D12_GPU_DESCRIPTOR_HANDLE& gpuHandle, struct BufferD3D12& _buffer); + + ID3D12DescriptorHeap* getHeap() + { + return m_heap; + } + + private: + ID3D12DescriptorHeap* m_heap; + ID3D12Resource* m_upload; + D3D12_CPU_DESCRIPTOR_HANDLE m_cpuHandle; + D3D12_GPU_DESCRIPTOR_HANDLE m_gpuHandle; + uint32_t m_incrementSize; + uint8_t* m_data; + uint32_t m_size; + uint32_t m_pos; + }; + + class DescriptorAllocator + { + public: + DescriptorAllocator() + : m_numDescriptorsPerBlock(1) + { + } + + ~DescriptorAllocator() + { + } + + void create(D3D12_DESCRIPTOR_HEAP_TYPE _type, uint32_t _maxDescriptors, uint16_t _numDescriptorsPerBlock = 1); + void destroy(); + + uint16_t alloc(ID3D12Resource* _ptr, const D3D12_SHADER_RESOURCE_VIEW_DESC* _desc); + uint16_t alloc(const uint32_t* _flags, uint32_t _num = BGFX_CONFIG_MAX_TEXTURE_SAMPLERS); + void free(uint16_t _handle); + + D3D12_GPU_DESCRIPTOR_HANDLE get(uint16_t _handle); + + ID3D12DescriptorHeap* getHeap() + { + return m_heap; + } + + private: + ID3D12DescriptorHeap* m_heap; + bx::HandleAlloc* m_handleAlloc; + D3D12_CPU_DESCRIPTOR_HANDLE m_cpuHandle; + D3D12_GPU_DESCRIPTOR_HANDLE m_gpuHandle; + uint32_t m_incrementSize; + uint16_t m_numDescriptorsPerBlock; + }; + + struct BufferD3D12 + { + BufferD3D12() + : m_ptr(NULL) + , m_staging(NULL) + , m_state(D3D12_RESOURCE_STATE_COMMON) + , m_size(0) + , m_flags(BGFX_BUFFER_NONE) + , m_dynamic(false) + { + } + + void create(uint32_t _size, void* _data, uint16_t _flags, bool _vertex); + void update(ID3D12GraphicsCommandList* _commandList, uint32_t _offset, uint32_t _size, void* _data, bool _discard = false); + + void destroy() + { + if (NULL != m_ptr) + { + DX_RELEASE(m_ptr, 0); + DX_RELEASE(m_staging, 0); + m_dynamic = false; + } + } + + void setState(ID3D12GraphicsCommandList* _commandList, D3D12_RESOURCE_STATES _state); + + D3D12_SHADER_RESOURCE_VIEW_DESC m_srvd; + D3D12_UNORDERED_ACCESS_VIEW_DESC m_uavd; + ID3D12Resource* m_ptr; + ID3D12Resource* m_staging; + D3D12_RESOURCE_STATES m_state; + uint32_t m_size; + uint16_t m_flags; + bool m_dynamic; + }; + + struct VertexBufferD3D12 : public BufferD3D12 + { + void create(uint32_t _size, void* _data, VertexDeclHandle _declHandle, uint16_t _flags); + + VertexDeclHandle m_decl; + }; + + struct ShaderD3D12 + { + ShaderD3D12() + : m_code(NULL) + , m_constantBuffer(NULL) + , m_hash(0) + , m_numUniforms(0) + , m_numPredefined(0) + { + } + + void create(const Memory* _mem); + DWORD* getShaderCode(uint8_t _fragmentBit, const Memory* _mem); + + void destroy() + { + if (NULL != m_constantBuffer) + { + ConstantBuffer::destroy(m_constantBuffer); + m_constantBuffer = NULL; + } + + m_numPredefined = 0; + + if (NULL != m_code) + { + release(m_code); + m_code = NULL; + m_hash = 0; + } + } + + const Memory* m_code; + ConstantBuffer* m_constantBuffer; + + PredefinedUniform m_predefined[PredefinedUniform::Count]; + uint8_t m_attrMask[Attrib::Count]; + + uint32_t m_hash; + uint16_t m_numUniforms; + uint16_t m_size; + uint8_t m_numPredefined; + }; + + struct ProgramD3D12 + { + ProgramD3D12() + : m_vsh(NULL) + , m_fsh(NULL) + { + } + + void create(const ShaderD3D12* _vsh, const ShaderD3D12* _fsh) + { + BX_CHECK(NULL != _vsh->m_code, "Vertex shader doesn't exist."); + m_vsh = _vsh; + memcpy(&m_predefined[0], _vsh->m_predefined, _vsh->m_numPredefined*sizeof(PredefinedUniform)); + m_numPredefined = _vsh->m_numPredefined; + + if (NULL != _fsh) + { + BX_CHECK(NULL != _fsh->m_code, "Fragment shader doesn't exist."); + m_fsh = _fsh; + memcpy(&m_predefined[m_numPredefined], _fsh->m_predefined, _fsh->m_numPredefined*sizeof(PredefinedUniform)); + m_numPredefined += _fsh->m_numPredefined; + } + } + + void destroy() + { + m_numPredefined = 0; + m_vsh = NULL; + m_fsh = NULL; + } + + const ShaderD3D12* m_vsh; + const ShaderD3D12* m_fsh; + + PredefinedUniform m_predefined[PredefinedUniform::Count * 2]; + uint8_t m_numPredefined; + }; + + struct TextureD3D12 + { + enum Enum + { + Texture2D, + Texture3D, + TextureCube, + }; + + TextureD3D12() + : m_ptr(NULL) + , m_staging(NULL) + , m_state(D3D12_RESOURCE_STATE_COMMON) + , m_numMips(0) + { + } + + void create(const Memory* _mem, uint32_t _flags, uint8_t _skip); + void destroy(); + void update(ID3D12GraphicsCommandList* _commandList, uint8_t _side, uint8_t _mip, const Rect& _rect, uint16_t _z, uint16_t _depth, uint16_t _pitch, const Memory* _mem); + void commit(uint8_t _stage, uint32_t _flags = BGFX_SAMPLER_DEFAULT_FLAGS); + void resolve(); + void setState(ID3D12GraphicsCommandList* _commandList, D3D12_RESOURCE_STATES _state); + + D3D12_SHADER_RESOURCE_VIEW_DESC m_srvd; + D3D12_UNORDERED_ACCESS_VIEW_DESC m_uavd; + ID3D12Resource* m_ptr; + ID3D12Resource* m_staging; + D3D12_RESOURCE_STATES m_state; + uint32_t m_flags; + uint16_t m_samplerIdx; + uint8_t m_type; + uint8_t m_requestedFormat; + uint8_t m_textureFormat; + uint8_t m_numMips; + }; + + struct FrameBufferD3D12 + { + FrameBufferD3D12() + : m_swapChain(NULL) + , m_denseIdx(UINT16_MAX) + , m_num(0) + , m_numTh(0) + { + m_depth.idx = bgfx::invalidHandle; + } + + void create(uint8_t _num, const TextureHandle* _handles); + void create(uint16_t _denseIdx, void* _nwh, uint32_t _width, uint32_t _height, TextureFormat::Enum _depthFormat); + uint16_t destroy(); + void preReset(); + void postReset(); + void resolve(); + void clear(ID3D12GraphicsCommandList* _commandList, const Clear& _clear, const float _palette[][4], const D3D12_RECT* _rect = NULL, uint32_t _num = 0); + + TextureHandle m_texture[BGFX_CONFIG_MAX_FRAME_BUFFER_ATTACHMENTS]; + TextureHandle m_depth; + IDXGISwapChain* m_swapChain; + uint16_t m_denseIdx; + uint8_t m_num; + uint8_t m_numTh; + TextureHandle m_th[BGFX_CONFIG_MAX_FRAME_BUFFER_ATTACHMENTS]; + }; + + struct CommandQueue + { + CommandQueue() + : m_control(BX_COUNTOF(m_commandList) ) + { + } + + void init(ID3D12Device* _device) + { + D3D12_COMMAND_QUEUE_DESC queueDesc; + queueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT; + queueDesc.Priority = 0; + queueDesc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE; + queueDesc.NodeMask = 1; + DX_CHECK(_device->CreateCommandQueue(&queueDesc + , __uuidof(ID3D12CommandQueue) + , (void**)&m_commandQueue + ) ); + + m_currentFence = 0; + DX_CHECK(_device->CreateFence(0 + , D3D12_FENCE_FLAG_NONE + , __uuidof(ID3D12Fence) + , (void**)&m_fence + ) ); + + for (uint32_t ii = 0; ii < BX_COUNTOF(m_commandList); ++ii) + { + DX_CHECK(_device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT + , __uuidof(ID3D12CommandAllocator) + , (void**)&m_commandList[ii].m_commandAllocator + ) ); + + DX_CHECK(_device->CreateCommandList(0 + , D3D12_COMMAND_LIST_TYPE_DIRECT + , m_commandList[ii].m_commandAllocator + , NULL + , __uuidof(ID3D12GraphicsCommandList) + , (void**)&m_commandList[ii].m_commandList + ) ); + + DX_CHECK(m_commandList[ii].m_commandList->Close() ); + } + } + + void shutdown() + { + finish(); + + DX_RELEASE(m_fence, 0); + + for (uint32_t ii = 0; ii < BX_COUNTOF(m_commandList); ++ii) + { + DX_RELEASE(m_commandList[ii].m_commandAllocator, 0); + DX_RELEASE(m_commandList[ii].m_commandList, 0); + } + + DX_RELEASE(m_commandQueue, 0); + } + + ID3D12GraphicsCommandList* alloc() + { + while (0 == m_control.reserve(1) ) + { + CommandList& commandList = m_commandList[m_control.m_read]; + WaitForSingleObject(commandList.m_event, INFINITE); + CloseHandle(commandList.m_event); + + m_control.consume(1); + } + + CommandList& commandList = m_commandList[m_control.m_current]; + DX_CHECK(commandList.m_commandList->Reset(commandList.m_commandAllocator, NULL) ); + return commandList.m_commandList; + } + + uint64_t kick() + { + CommandList& commandList = m_commandList[m_control.m_current]; + DX_CHECK(commandList.m_commandList->Close() ); + + ID3D12CommandList* commandLists[] = { commandList.m_commandList }; + m_commandQueue->ExecuteCommandLists(BX_COUNTOF(commandLists), commandLists); + + commandList.m_event = CreateEventExA(NULL, NULL, 0, EVENT_ALL_ACCESS); + const uint64_t fence = m_currentFence++; + m_commandQueue->Signal(m_fence, fence); + m_fence->SetEventOnCompletion(fence, commandList.m_event); + + m_control.commit(1); + + return fence; + } + + void finish(uint64_t _waitFence = UINT64_MAX) + { + while (0 < m_control.available() ) + { + CommandList& commandList = m_commandList[m_control.m_read]; + WaitForSingleObject(commandList.m_event, INFINITE); + CloseHandle(commandList.m_event); + commandList.m_event = NULL; + m_completedFence = m_fence->GetCompletedValue(); + m_commandQueue->Wait(m_fence, m_completedFence); + + m_control.consume(1); + + if (_waitFence <= m_completedFence) + { + return; + } + } + + BX_CHECK(0 == m_control.available(), ""); + } + + struct CommandList + { + ID3D12GraphicsCommandList* m_commandList; + ID3D12CommandAllocator* m_commandAllocator; + HANDLE m_event; + }; + + ID3D12CommandQueue* m_commandQueue; + uint64_t m_currentFence; + uint64_t m_completedFence; + ID3D12Fence* m_fence; + CommandList m_commandList[4]; + bx::RingBufferControl m_control; + }; + +} /* namespace d3d12 */ } // namespace bgfx + +#endif // BGFX_RENDERER_D3D12_H_HEADER_GUARD diff --git a/src/shader_dx9bc.cpp b/src/shader_dx9bc.cpp new file mode 100644 index 00000000..70de55a0 --- /dev/null +++ b/src/shader_dx9bc.cpp @@ -0,0 +1,748 @@ +/* + * Copyright 2011-2015 Branimir Karadzic. All rights reserved. + * License: http://www.opensource.org/licenses/BSD-2-Clause + */ + +#include "bgfx_p.h" +#include "shader_dx9bc.h" + +BX_PRAGMA_DIAGNOSTIC_IGNORED_CLANG_GCC("-Wunused-parameter"); +BX_PRAGMA_DIAGNOSTIC_IGNORED_CLANG("-Wunneeded-internal-declaration"); + +namespace bgfx +{ + struct Dx9bcOpcodeInfo + { + uint8_t numOperands; + uint8_t numValues; + }; + + static const Dx9bcOpcodeInfo s_dx9bcOpcodeInfo[] = + { + { 0, 0 }, // NOP + { 2, 0 }, // MOV + { 3, 0 }, // ADD + { 1, 0 }, // SUB + { 4, 0 }, // MAD + { 3, 0 }, // MUL + { 2, 0 }, // RCP + { 2, 0 }, // RSQ + { 3, 0 }, // DP3 + { 3, 0 }, // DP4 + { 3, 0 }, // MIN + { 3, 0 }, // MAX + { 3, 0 }, // SLT + { 3, 0 }, // SGE + { 2, 0 }, // EXP + { 2, 0 }, // LOG + { 1, 0 }, // LIT + { 1, 0 }, // DST + { 4, 0 }, // LRP + { 2, 0 }, // FRC + { 1, 0 }, // M4X4 + { 1, 0 }, // M4X3 + { 1, 0 }, // M3X4 + { 1, 0 }, // M3X3 + { 1, 0 }, // M3X2 + { 0, 0 }, // CALL + { 0, 0 }, // CALLNZ + { 0, 0 }, // LOOP + { 0, 0 }, // RET + { 0, 0 }, // ENDLOOP + { 0, 0 }, // LABEL + { 1, 1 }, // DCL + { 3, 0 }, // POW + { 1, 0 }, // CRS + { 1, 0 }, // SGN + { 1, 0 }, // ABS + { 2, 0 }, // NRM + { 4, 0 }, // SINCOS + { 1, 0 }, // REP + { 0, 0 }, // ENDREP + { 1, 0 }, // IF + { 2, 0 }, // IFC + { 0, 0 }, // ELSE + { 0, 0 }, // ENDIF + { 0, 0 }, // BREAK + { 2, 0 }, // BREAKC + { 2, 0 }, // MOVA + { 1, 4 }, // DEFB + { 1, 4 }, // DEFI + { 0, 0 }, // 0 + { 0, 0 }, // 1 + { 0, 0 }, // 2 + { 0, 0 }, // 3 + { 0, 0 }, // 4 + { 0, 0 }, // 5 + { 0, 0 }, // 6 + { 0, 0 }, // 7 + { 0, 0 }, // 8 + { 0, 0 }, // 9 + { 0, 0 }, // 10 + { 0, 0 }, // 11 + { 0, 0 }, // 12 + { 0, 0 }, // 13 + { 0, 0 }, // 14 + { 1, 0 }, // TEXCOORD + { 1, 0 }, // TEXKILL + { 3, 0 }, // TEX + { 1, 0 }, // TEXBEM + { 1, 0 }, // TEXBEM1 + { 1, 0 }, // TEXREG2AR + { 1, 0 }, // TEXREG2GB + { 1, 0 }, // TEXM3X2PAD + { 1, 0 }, // TEXM3X2TEX + { 1, 0 }, // TEXM3X3PAD + { 1, 0 }, // TEXM3X3TEX + { 1, 0 }, // TEXM3X3DIFF + { 1, 0 }, // TEXM3X3SPEC + { 1, 0 }, // TEXM3X3VSPEC + { 2, 0 }, // EXPP + { 2, 0 }, // LOGP + { 4, 0 }, // CND + { 1, 4 }, // DEF + { 1, 0 }, // TEXREG2RGB + { 1, 0 }, // TEXDP3TEX + { 1, 0 }, // TEXM3X2DEPTH + { 1, 0 }, // TEXDP3 + { 1, 0 }, // TEXM3X3 + { 1, 0 }, // TEXDEPTH + { 4, 0 }, // CMP + { 1, 0 }, // BEM + { 4, 0 }, // DP2ADD + { 2, 0 }, // DSX + { 2, 0 }, // DSY + { 5, 0 }, // TEXLDD + { 1, 0 }, // SETP + { 3, 0 }, // TEXLDL + { 0, 0 }, // BREAKP + }; + BX_STATIC_ASSERT(BX_COUNTOF(s_dx9bcOpcodeInfo) == Dx9bcOpcode::Count); + + static const char* s_dx9bcOpcode[] = + { + "nop", + "mov", + "add", + "sub", + "mad", + "mul", + "rcp", + "rsq", + "dp3", + "dp4", + "min", + "max", + "slt", + "sge", + "exp", + "log", + "lit", + "dst", + "lrp", + "frc", + "m4x4", + "m4x3", + "m3x4", + "m3x3", + "m3x2", + "call", + "callnz", + "loop", + "ret", + "endloop", + "label", + "dcl", + "pow", + "crs", + "sgn", + "abs", + "nrm", + "sincos", + "rep", + "endrep", + "if", + "ifc", + "else", + "endif", + "break", + "breakc", + "mova", + "defb", + "defi", + + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + + "texcoord", + "texkill", + "tex", + "texbem", + "texbem1", + "texreg2ar", + "texreg2gb", + "texm3x2pad", + "texm3x2tex", + "texm3x3pad", + "texm3x3tex", + "texm3x3diff", + "texm3x3spec", + "texm3x3vspec", + "expp", + "logp", + "cnd", + "def", + "texreg2rgb", + "texdp3tex", + "texm3x2depth", + "texdp3", + "texm3x3", + "texdepth", + "cmp", + "bem", + "dp2add", + "dsx", + "dsy", + "texldd", + "setp", + "texldl", + "breakp", + }; + BX_STATIC_ASSERT(BX_COUNTOF(s_dx9bcOpcode) == Dx9bcOpcode::Count); + + const char* getName(Dx9bcOpcode::Enum _opcode) + { + BX_CHECK(_opcode < Dx9bcOpcode::Count, "Unknown opcode id %d (%x).", _opcode, _opcode); + return s_dx9bcOpcode[_opcode]; + } + + static const char* s_dx9bcOperandType[] = + { + "r", // Temporary Register File + "v", // Input Register File + "c", // Constant Register File + "t", // Texture Register File (PS) + "oPos", // Rasterizer Register File + "oD", // Attribute Output Register File + "oT", // Texture Coordinate Output Register File + "output", // Output register file for VS3.0+ + "i", // Constant Integer Vector Register File + "oColor", // Color Output Register File + "oDepth", // Depth Output Register File + "s", // Sampler State Register File + "c", // Constant Register File 2048 - 4095 + "c", // Constant Register File 4096 - 6143 + "c", // Constant Register File 6144 - 8191 + "b", // Constant Boolean register file + "aL", // Loop counter register file + "tempfloat16", // 16-bit float temp register file + "misctype", // Miscellaneous (single) registers. + "label", // Label + "p", // Predicate register + }; + BX_STATIC_ASSERT(BX_COUNTOF(s_dx9bcOperandType) == Dx9bcOperandType::Count); + + static const char* s_dx9bcDeclUsage[] = + { + "position", + "blendweight", + "blendindices", + "normal", + "psize", + "texcoord", + "tangent", + "binormal", + "tessfactor", + "positiont", + "color", + "fog", + "depth", + "sample", + }; + BX_STATIC_ASSERT(BX_COUNTOF(s_dx9bcDeclUsage) == Dx9bcDeclUsage::Count); + + int32_t read(bx::ReaderI* _reader, Dx9bcSubOperand& _subOperand) + { + int32_t size = 0; + + uint32_t token; + size += bx::read(_reader, token); + + _subOperand.type = Dx9bcOperandType::Enum( ( (token & UINT32_C(0x70000000) ) >> 28) + | ( (token & UINT32_C(0x00001800) ) >> 8) ); + _subOperand.regIndex = (token & UINT32_C(0x000007ff) ); + _subOperand.swizzleBits = ( (token & UINT32_C(0x00ff0000) ) >> 16); + + return size; + } + + int32_t write(bx::WriterI* _writer, const Dx9bcSubOperand& _subOperand) + { + int32_t size = 0; + + uint32_t token = 0; + token |= (_subOperand.type << 28) & UINT32_C(0x70000000); + token |= (_subOperand.type << 8) & UINT32_C(0x00001800); + token |= _subOperand.regIndex & UINT32_C(0x000007ff); + token |= (_subOperand.swizzleBits << 16) & UINT32_C(0x00ff0000); + size += bx::write(_writer, token); + + return size; + } + + int32_t read(bx::ReaderI* _reader, Dx9bcOperand& _operand) + { + int32_t size = 0; + + uint32_t token; + size += bx::read(_reader, token); + + _operand.type = Dx9bcOperandType::Enum( ( (token & UINT32_C(0x70000000) ) >> 28) + | ( (token & UINT32_C(0x00001800) ) >> 8) ); + _operand.regIndex = (token & UINT32_C(0x000007ff) ); + _operand.addrMode = Dx9bcOperandAddrMode::Enum( (token & UINT32_C(0x00002000) ) >> 13); + + if (_operand.destination) + { + // Destination Parameter Token + // https://msdn.microsoft.com/en-us/library/ff552738.aspx + + _operand.writeMask = ( (token & UINT32_C(0x000f0000) ) >> 16); + _operand.saturate = 0 != (token & UINT32_C(0x00100000) ); + _operand.partialPrecision = 0 != (token & UINT32_C(0x00200000) ); + _operand.centroid = 0 != (token & UINT32_C(0x00400000) ); + } + else + { + // Source Parameter Token + // https://msdn.microsoft.com/en-us/library/ff569716%28v=vs.85%29.aspx + + _operand.writeMask = 0; + _operand.saturate = false; + _operand.partialPrecision = false; + _operand.centroid = false; + _operand.swizzleBits = ( (token & UINT32_C(0x00ff0000) ) >> 16); + } + + if (Dx9bcOperandAddrMode::Relative == _operand.addrMode) + { + size += read(_reader, _operand.subOperand); + } + + return size; + } + + int32_t write(bx::WriterI* _writer, const Dx9bcOperand& _operand) + { + int32_t size = 0; + + uint32_t token = 0; + token |= (_operand.type << 28) & UINT32_C(0x70000000); + token |= (_operand.type << 8) & UINT32_C(0x00001800); + token |= _operand.regIndex & UINT32_C(0x000007ff); + token |= (_operand.addrMode << 13) & UINT32_C(0x00002000); + size += bx::write(_writer, token); + + if (Dx9bcOperandAddrMode::Relative == _operand.addrMode) + { + size += write(_writer, _operand.subOperand); + } + + return size; + } + + int32_t read(bx::ReaderI* _reader, Dx9bcInstruction& _instruction) + { + int32_t size = 0; + + uint32_t token; + size += bx::read(_reader, token); + + _instruction.opcode = Dx9bcOpcode::Enum( (token & UINT32_C(0x0000ffff) ) ); + + if (Dx9bcOpcode::Comment == _instruction.opcode) + { + _instruction.specific = 0; + _instruction.length = uint16_t( (token & UINT32_C(0x7fff0000) ) >> 16) + 1; + _instruction.predicated = false; + _instruction.coissue = false; + } + else + { + _instruction.specific = uint8_t( (token & UINT32_C(0x00ff0000) ) >> 16); + _instruction.length = uint8_t( (token & UINT32_C(0x0f000000) ) >> 24) + 1; + _instruction.predicated = 0 != (token & UINT32_C(0x10000000) ); + _instruction.coissue = 0 != (token & UINT32_C(0x40000000) ); + } + + if (Dx9bcOpcode::Count <= _instruction.opcode) + { + if (Dx9bcOpcode::Comment == _instruction.opcode) + { + for (int32_t ii = 0, num = _instruction.length-1; ii < num; ++ii) + { + uint32_t tmp; + size += bx::read(_reader, tmp); + } + } + + return size; + } + + uint32_t currOp = 0; + + const Dx9bcOpcodeInfo& info = s_dx9bcOpcodeInfo[bx::uint32_min(_instruction.opcode, Dx9bcOpcode::Count)]; + _instruction.numOperands = info.numOperands; + _instruction.numValues = info.numValues; + + switch (_instruction.opcode) + { + case Dx9bcOpcode::SINCOS: + if (5 > _instruction.length) + { + _instruction.numOperands = 2; + } + break; + + default: + break; + }; + +//BX_TRACE("%d (%d), %d, %d, 0x%08x" +// , _instruction.opcode +// , bx::uint32_min(_instruction.opcode, Dx9bcOpcode::Count) +// , _instruction.length +// , _instruction.numOperands +// , token +// ); + + const bool valuesBeforeOpcode = false + || Dx9bcOpcode::DCL == _instruction.opcode + ; + + if (valuesBeforeOpcode + && 0 < info.numValues) + { + size += read(_reader, _instruction.value, info.numValues*sizeof(uint32_t) ); + } + + _instruction.operand[0].destination = true; + + switch (_instruction.numOperands) + { + case 6: size += read(_reader, _instruction.operand[currOp++]); + case 5: size += read(_reader, _instruction.operand[currOp++]); + case 4: size += read(_reader, _instruction.operand[currOp++]); + case 3: size += read(_reader, _instruction.operand[currOp++]); + case 2: size += read(_reader, _instruction.operand[currOp++]); + case 1: size += read(_reader, _instruction.operand[currOp++]); + case 0: + if (!valuesBeforeOpcode + && 0 < info.numValues) + { + size += read(_reader, _instruction.value, info.numValues*sizeof(uint32_t) ); + } + break; + + default: + BX_CHECK(false, "Instruction %s with invalid number of operands %d (numValues %d)." + , getName(_instruction.opcode) + , _instruction.numOperands + , info.numValues + ); + break; + } + + return size; + } + + int32_t write(bx::WriterI* _writer, const Dx9bcInstruction& _instruction) + { + int32_t size = 0; + + uint32_t token = 0; + token |= _instruction.opcode & UINT32_C(0x0000ffff); + token |= (_instruction.specific << 16) & UINT32_C(0x00ff0000); + token |= ( (_instruction.length - 1) << 24) & UINT32_C(0x0f000000); + size += bx::write(_writer, token); + + uint32_t currOp = 0; + switch (_instruction.numOperands) + { + case 6: size += write(_writer, _instruction.operand[currOp++]); + case 5: size += write(_writer, _instruction.operand[currOp++]); + case 4: size += write(_writer, _instruction.operand[currOp++]); + case 3: size += write(_writer, _instruction.operand[currOp++]); + case 2: size += write(_writer, _instruction.operand[currOp++]); + case 1: size += write(_writer, _instruction.operand[currOp++]); + case 0: + break; + } + + return 0; + } + + int32_t toString(char* _out, int32_t _size, const Dx9bcInstruction& _instruction) + { + int32_t size = 0; + + if (Dx9bcOpcode::Comment == _instruction.opcode + || Dx9bcOpcode::Phase == _instruction.opcode) + { + size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size) + , "// %x" + , _instruction.opcode + ); + return size; + } + + size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size) + , "%2d %s" + , _instruction.opcode + , getName(_instruction.opcode) + ); + + switch (_instruction.opcode) + { + case Dx9bcOpcode::DCL: + size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size) + , "_%s%d (%d, %d, %d, %d)" + , s_dx9bcDeclUsage[_instruction.value[0] & UINT32_C(0x0000000f)] + , (_instruction.value[0] & UINT32_C(0x000f0000) )>>16 + , (_instruction.value[0] & UINT32_C(0x08000000) )>>27 // ? + , (_instruction.value[0] & UINT32_C(0x10000000) )>>28 // texture2d + , (_instruction.value[0] & UINT32_C(0x20000000) )>>29 // textureCube + , (_instruction.value[0] & UINT32_C(0x40000000) )>>30 // texture3d + ); + break; + + default: + break; + } + + for (uint32_t ii = 0; ii < _instruction.numOperands; ++ii) + { + const Dx9bcOperand& operand = _instruction.operand[ii]; + size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size) + , "%s%s%d" + , 0 == ii ? " " : ", " + , s_dx9bcOperandType[operand.type] + , operand.regIndex + ); + + if (operand.destination) + { + if (0xf > operand.writeMask + && 0 < operand.writeMask) + { + size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size) + , ".%s%s%s%s" + , 0 == (operand.writeMask & 1) ? "" : "x" + , 0 == (operand.writeMask & 2) ? "" : "y" + , 0 == (operand.writeMask & 4) ? "" : "z" + , 0 == (operand.writeMask & 8) ? "" : "w" + ); + } + } + else + { + if (Dx9bcOperandAddrMode::Relative == operand.addrMode) + { + const bool array = true; + + size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size) + , "[" + ); + + size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size) + , "%s%d" + , s_dx9bcOperandType[operand.subOperand.type] + , operand.subOperand.regIndex + ); + + size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size) + , "%s" + , array ? "]" : "" + ); + } + + if (0xe4 != operand.swizzleBits) + { + size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size) + , ".%c%c%c%c" + , "xyzw"[(operand.swizzleBits )&0x3] + , "xyzw"[(operand.swizzleBits>>2)&0x3] + , "xyzw"[(operand.swizzleBits>>4)&0x3] + , "xyzw"[(operand.swizzleBits>>6)&0x3] + ); + } + } + } + + switch (_instruction.opcode) + { + case Dx9bcOpcode::DEF: + for (uint32_t jj = 0; jj < _instruction.numValues; ++jj) + { + union { int32_t i; float f; } cast = { _instruction.value[jj] }; + size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size) + , "%s%f%s" + , 0 == jj ? " (" : ", " + , cast.f + , uint32_t(_instruction.numValues-1) == jj ? ")" : "" + ); + } + break; + + case Dx9bcOpcode::DEFI: + for (uint32_t jj = 0; jj < _instruction.numValues; ++jj) + { + size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size) + , "%s%d%s" + , 0 == jj ? " (" : ", " + , _instruction.value[jj] + , uint32_t(_instruction.numValues-1) == jj ? ")" : "" + ); + } + break; + + default: + break; + } + + return size; + } + + int32_t read(bx::ReaderSeekerI* _reader, Dx9bcShader& _shader) + { + int32_t size = 0; + int64_t offset = bx::seek(_reader); + + for (;;) + { + Dx9bcInstruction instruction; + int32_t length = read(_reader, instruction); + size += length; + + if (Dx9bcOpcode::Count > instruction.opcode) + { + char temp[512]; + toString(temp, 512, instruction); + + BX_CHECK(length/4 == instruction.length + , "%s\nread %d, expected %d" + , temp + , length/4 + , instruction.length + ); + } + else + { + if (Dx9bcOpcode::End == instruction.opcode) + { + size -= length; + break; + } + } + } + + bx::seek(_reader, offset, bx::Whence::Begin); + + _shader.byteCode.resize(size); + bx::read(_reader, _shader.byteCode.data(), size); + + return size; + } + + int32_t write(bx::WriterI* _writer, const Dx9bcShader& _shader) + { + BX_UNUSED(_writer, _shader); + return 0; + } + + int32_t read(bx::ReaderSeekerI* _reader, Dx9bc& _bc) + { + int32_t size = 0; + + size += bx::read(_reader, _bc.version); + + bool pixelShader = (0xffff0000 == (_bc.version & 0xffff0000) ); + uint32_t versionMajor = (_bc.version>>8)&0xff; + uint32_t versionMinor = _bc.version&0xff; + BX_TRACE("%s shader %d.%d" + , pixelShader ? "pixel" : "vertex" + , versionMajor + , versionMinor + ); + + size += read(_reader, _bc.shader); + + return size; + } + + int32_t write(bx::WriterSeekerI* _writer, const Dx9bc& _dxbc) + { + BX_UNUSED(_writer, _dxbc); + return 0; + } + + void parse(const Dx9bcShader& _src, Dx9bcParseFn _fn, void* _userData) + { + bx::MemoryReader reader(_src.byteCode.data(), uint32_t(_src.byteCode.size() ) ); + +//BX_TRACE("parse %d", _src.byteCode.size()); + + for (uint32_t token = 0, numTokens = uint32_t(_src.byteCode.size() / sizeof(uint32_t) ); token < numTokens;) + { + Dx9bcInstruction instruction; + uint32_t size = read(&reader, instruction); + + BX_CHECK(size/4 == instruction.length, "read %d, expected %d", size/4, instruction.length); + + _fn(token * sizeof(uint32_t), instruction, _userData); + + token += instruction.length; + } + } + + void filter(Dx9bcShader& _dst, const Dx9bcShader& _src, Dx9bcFilterFn _fn, void* _userData) + { + bx::MemoryReader reader(_src.byteCode.data(), uint32_t(_src.byteCode.size() ) ); + + bx::CrtAllocator r; + bx::MemoryBlock mb(&r); + bx::MemoryWriter writer(&mb); + + for (uint32_t token = 0, numTokens = uint32_t(_src.byteCode.size() / sizeof(uint32_t) ); token < numTokens;) + { + Dx9bcInstruction instruction; + uint32_t size = read(&reader, instruction); + BX_CHECK(size/4 == instruction.length, "read %d, expected %d", size/4, instruction.length); + + _fn(instruction, _userData); + + write(&writer, instruction); + + token += instruction.length; + } + + uint8_t* data = (uint8_t*)mb.more(); + uint32_t size = uint32_t(bx::getSize(&writer) ); + _dst.byteCode.reserve(size); + memcpy(_dst.byteCode.data(), data, size); + } + +} // namespace bgfx diff --git a/src/shader_dx9bc.h b/src/shader_dx9bc.h new file mode 100644 index 00000000..b161dcaa --- /dev/null +++ b/src/shader_dx9bc.h @@ -0,0 +1,258 @@ +/* + * Copyright 2011-2015 Branimir Karadzic. All rights reserved. + * License: http://www.opensource.org/licenses/BSD-2-Clause + */ + +#ifndef BGFX_SHADER_DX9BC_H +#define BGFX_SHADER_DX9BC_H + +#include + +namespace bgfx +{ + struct Dx9bcOpcode + { + enum Enum + { + NOP, + MOV, + ADD, + SUB, + MAD, + MUL, + RCP, + RSQ, + DP3, + DP4, + MIN, + MAX, + SLT, + SGE, + EXP, + LOG, + LIT, + DST, + LRP, + FRC, + M4X4, + M4X3, + M3X4, + M3X3, + M3X2, + CALL, + CALLNZ, + LOOP, + RET, + ENDLOOP, + LABEL, + DCL, + POW, + CRS, + SGN, + ABS, + NRM, + SINCOS, + REP, + ENDREP, + IF, + IFC, + ELSE, + ENDIF, + BREAK, + BREAKC, + MOVA, + DEFB, + DEFI, + + Unknown = 63, + TEXCOORD, + TEXKILL, + TEX, + TEXBEM, + TEXBEM1, + TEXREG2AR, + TEXREG2GB, + TEXM3X2PAD, + TEXM3X2TEX, + TEXM3X3PAD, + TEXM3X3TEX, + TEXM3X3DIFF, + TEXM3X3SPEC, + TEXM3X3VSPEC, + EXPP, + LOGP, + CND, + DEF, + TEXREG2RGB, + TEXDP3TEX, + TEXM3X2DEPTH, + TEXDP3, + TEXM3X3, + TEXDEPTH, + CMP, + BEM, + DP2ADD, + DSX, + DSY, + TEXLDD, + SETP, + TEXLDL, + BREAKP, + + Count, + + Phase = 0xfffd, + Comment = 0xfffe, + End = 0xffff + }; + }; + + const char* getName(Dx9bcOpcode::Enum _opcode); + + struct Dx9bcResourceDim + { + enum Enum + { + Unknown, + Texture1D, + Texture2D, + TextureCube, + Texture3D, + }; + }; + + struct Dx9bcOperandType + { + enum Enum + { + Temp, + Input, + Const, + Texture, + RastOut, + AttrOut, + TexCrdOut, + Output, + ConstInt, + ColorOut, + DepthOut, + Sampler, + Const2, + Const3, + Const4, + ConstBool, + Loop, + TempFloat16, + MiscType, + Label, + Predicate, + + Count + }; + }; + + struct Dx9bcDeclUsage + { + enum Enum + { + Position, + BlendWeight, + BlendIndices, + Normal, + Psize, + Texcoord, + Tangent, + Binormal, + TessFactor, + PositionT, + Color, + Fog, + Depth, + Sample, + + Count + }; + }; + + struct Dx9bcOperandAddrMode + { + enum Enum + { + Absolute, + Relative, + + Count + }; + }; + + struct Dx9bcSubOperand + { + Dx9bcOperandType::Enum type; + uint32_t regIndex; + uint8_t swizzleBits; + }; + + struct Dx9bcOperand + { + Dx9bcOperandType::Enum type; + uint32_t regIndex; + + bool destination; + + // Destination + uint8_t writeMask; + bool saturate; + bool partialPrecision; + bool centroid; + + // Source + uint8_t swizzleBits; + + Dx9bcOperandAddrMode::Enum addrMode; + Dx9bcSubOperand subOperand; + }; + + struct Dx9bcInstruction + { + Dx9bcOpcode::Enum opcode; + uint16_t length; + uint8_t numOperands; + uint8_t numValues; + uint8_t specific; + bool predicated; + bool coissue; + + Dx9bcOperand operand[6]; + int32_t value[4]; + }; + + int32_t read(bx::ReaderI* _reader, Dx9bcInstruction& _instruction); + int32_t write(bx::WriterI* _writer, const Dx9bcInstruction& _instruction); + int32_t toString(char* _out, int32_t _size, const Dx9bcInstruction& _instruction); + + struct Dx9bcShader + { + stl::vector byteCode; + }; + + int32_t read(bx::ReaderSeekerI* _reader, Dx9bcShader& _shader); + int32_t write(bx::WriterI* _writer, const Dx9bcShader& _shader); + + struct Dx9bc + { + uint32_t version; + Dx9bcShader shader; + }; + + int32_t read(bx::ReaderSeekerI* _reader, Dx9bc& _dx9bc); + int32_t write(bx::WriterSeekerI* _writer, const Dx9bc& _dx9bc); + + typedef void (*Dx9bcParseFn)(uint32_t _offset, const Dx9bcInstruction& _instruction, void* _userData); + void parse(const Dx9bcShader& _src, Dx9bcParseFn _fn, void* _userData); + + typedef void (*Dx9bcFilterFn)(Dx9bcInstruction& _instruction, void* _userData); + void filter(Dx9bcShader& _dst, const Dx9bcShader& _src, Dx9bcFilterFn _fn, void* _userData); + +} // namespace bgfx + +#endif // BGFX_SHADER_DX9BC_H diff --git a/src/shader_dxbc.cpp b/src/shader_dxbc.cpp new file mode 100644 index 00000000..61184746 --- /dev/null +++ b/src/shader_dxbc.cpp @@ -0,0 +1,1851 @@ +/* + * Copyright 2011-2015 Branimir Karadzic. All rights reserved. + * License: http://www.opensource.org/licenses/BSD-2-Clause + */ + +#include "bgfx_p.h" +#include "shader_dxbc.h" + +namespace bgfx +{ + struct DxbcOpcodeInfo + { + uint8_t numOperands; + uint8_t numValues; + }; + + static const DxbcOpcodeInfo s_dxbcOpcodeInfo[] = + { + { 3, 0 }, // ADD + { 3, 0 }, // AND + { 0, 0 }, // BREAK + { 1, 0 }, // BREAKC + { 0, 0 }, // CALL + { 0, 0 }, // CALLC + { 1, 0 }, // CASE + { 0, 0 }, // CONTINUE + { 1, 0 }, // CONTINUEC + { 0, 0 }, // CUT + { 0, 0 }, // DEFAULT + { 2, 0 }, // DERIV_RTX + { 2, 0 }, // DERIV_RTY + { 1, 0 }, // DISCARD + { 3, 0 }, // DIV + { 3, 0 }, // DP2 + { 3, 0 }, // DP3 + { 3, 0 }, // DP4 + { 0, 0 }, // ELSE + { 0, 0 }, // EMIT + { 0, 0 }, // EMITTHENCUT + { 0, 0 }, // ENDIF + { 0, 0 }, // ENDLOOP + { 0, 0 }, // ENDSWITCH + { 3, 0 }, // EQ + { 2, 0 }, // EXP + { 2, 0 }, // FRC + { 2, 0 }, // FTOI + { 2, 0 }, // FTOU + { 3, 0 }, // GE + { 3, 0 }, // IADD + { 1, 0 }, // IF + { 3, 0 }, // IEQ + { 3, 0 }, // IGE + { 3, 0 }, // ILT + { 4, 0 }, // IMAD + { 3, 0 }, // IMAX + { 3, 0 }, // IMIN + { 4, 0 }, // IMUL + { 3, 0 }, // INE + { 2, 0 }, // INEG + { 3, 0 }, // ISHL + { 3, 0 }, // ISHR + { 2, 0 }, // ITOF + { 0, 0 }, // LABEL + { 3, 0 }, // LD + { 4, 0 }, // LD_MS + { 2, 0 }, // LOG + { 0, 0 }, // LOOP + { 3, 0 }, // LT + { 4, 0 }, // MAD + { 3, 0 }, // MIN + { 3, 0 }, // MAX + { 0, 1 }, // CUSTOMDATA + { 2, 0 }, // MOV + { 4, 0 }, // MOVC + { 3, 0 }, // MUL + { 3, 0 }, // NE + { 0, 0 }, // NOP + { 2, 0 }, // NOT + { 3, 0 }, // OR + { 3, 0 }, // RESINFO + { 0, 0 }, // RET + { 1, 0 }, // RETC + { 2, 0 }, // ROUND_NE + { 2, 0 }, // ROUND_NI + { 2, 0 }, // ROUND_PI + { 2, 0 }, // ROUND_Z + { 2, 0 }, // RSQ + { 4, 0 }, // SAMPLE + { 5, 0 }, // SAMPLE_C + { 5, 0 }, // SAMPLE_C_LZ + { 5, 0 }, // SAMPLE_L + { 6, 0 }, // SAMPLE_D + { 5, 0 }, // SAMPLE_B + { 2, 0 }, // SQRT + { 1, 0 }, // SWITCH + { 3, 0 }, // SINCOS + { 3, 0 }, // UDIV + { 3, 0 }, // ULT + { 3, 0 }, // UGE + { 4, 0 }, // UMUL + { 4, 0 }, // UMAD + { 3, 0 }, // UMAX + { 3, 0 }, // UMIN + { 3, 0 }, // USHR + { 2, 0 }, // UTOF + { 3, 0 }, // XOR + { 1, 1 }, // DCL_RESOURCE + { 1, 0 }, // DCL_CONSTANT_BUFFER + { 1, 0 }, // DCL_SAMPLER + { 1, 1 }, // DCL_INDEX_RANGE + { 1, 0 }, // DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY + { 1, 0 }, // DCL_GS_INPUT_PRIMITIVE + { 0, 1 }, // DCL_MAX_OUTPUT_VERTEX_COUNT + { 1, 0 }, // DCL_INPUT + { 1, 1 }, // DCL_INPUT_SGV + { 1, 0 }, // DCL_INPUT_SIV + { 1, 0 }, // DCL_INPUT_PS + { 1, 1 }, // DCL_INPUT_PS_SGV + { 1, 1 }, // DCL_INPUT_PS_SIV + { 1, 0 }, // DCL_OUTPUT + { 1, 0 }, // DCL_OUTPUT_SGV + { 1, 1 }, // DCL_OUTPUT_SIV + { 0, 1 }, // DCL_TEMPS + { 0, 3 }, // DCL_INDEXABLE_TEMP + { 0, 0 }, // DCL_GLOBAL_FLAGS + + { 0, 0 }, // InstrD3D10 + { 4, 0 }, // LOD + { 4, 0 }, // GATHER4 + { 0, 0 }, // SAMPLE_POS + { 0, 0 }, // SAMPLE_INFO + + { 0, 0 }, // InstrD3D10_1 + { 0, 0 }, // HS_DECLS + { 0, 0 }, // HS_CONTROL_POINT_PHASE + { 0, 0 }, // HS_FORK_PHASE + { 0, 0 }, // HS_JOIN_PHASE + { 0, 0 }, // EMIT_STREAM + { 0, 0 }, // CUT_STREAM + { 1, 0 }, // EMITTHENCUT_STREAM + { 1, 0 }, // INTERFACE_CALL + { 0, 0 }, // BUFINFO + { 2, 0 }, // DERIV_RTX_COARSE + { 2, 0 }, // DERIV_RTX_FINE + { 2, 0 }, // DERIV_RTY_COARSE + { 2, 0 }, // DERIV_RTY_FINE + { 5, 0 }, // GATHER4_C + { 5, 0 }, // GATHER4_PO + { 0, 0 }, // GATHER4_PO_C + { 0, 0 }, // RCP + { 0, 0 }, // F32TOF16 + { 0, 0 }, // F16TOF32 + { 0, 0 }, // UADDC + { 0, 0 }, // USUBB + { 0, 0 }, // COUNTBITS + { 0, 0 }, // FIRSTBIT_HI + { 0, 0 }, // FIRSTBIT_LO + { 0, 0 }, // FIRSTBIT_SHI + { 0, 0 }, // UBFE + { 0, 0 }, // IBFE + { 5, 0 }, // BFI + { 0, 0 }, // BFREV + { 5, 0 }, // SWAPC + { 0, 0 }, // DCL_STREAM + { 1, 0 }, // DCL_FUNCTION_BODY + { 0, 0 }, // DCL_FUNCTION_TABLE + { 0, 0 }, // DCL_INTERFACE + { 0, 0 }, // DCL_INPUT_CONTROL_POINT_COUNT + { 0, 0 }, // DCL_OUTPUT_CONTROL_POINT_COUNT + { 0, 0 }, // DCL_TESS_DOMAIN + { 0, 0 }, // DCL_TESS_PARTITIONING + { 0, 0 }, // DCL_TESS_OUTPUT_PRIMITIVE + { 0, 0 }, // DCL_HS_MAX_TESSFACTOR + { 0, 0 }, // DCL_HS_FORK_PHASE_INSTANCE_COUNT + { 0, 0 }, // DCL_HS_JOIN_PHASE_INSTANCE_COUNT + { 0, 3 }, // DCL_THREAD_GROUP + { 1, 1 }, // DCL_UNORDERED_ACCESS_VIEW_TYPED + { 1, 0 }, // DCL_UNORDERED_ACCESS_VIEW_RAW + { 1, 1 }, // DCL_UNORDERED_ACCESS_VIEW_STRUCTURED + { 1, 1 }, // DCL_THREAD_GROUP_SHARED_MEMORY_RAW + { 1, 2 }, // DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED + { 1, 0 }, // DCL_RESOURCE_RAW + { 1, 1 }, // DCL_RESOURCE_STRUCTURED + { 3, 0 }, // LD_UAV_TYPED + { 3, 0 }, // STORE_UAV_TYPED + { 3, 0 }, // LD_RAW + { 3, 0 }, // STORE_RAW + { 4, 0 }, // LD_STRUCTURED + { 4, 0 }, // STORE_STRUCTURED + { 3, 0 }, // ATOMIC_AND + { 3, 0 }, // ATOMIC_OR + { 3, 0 }, // ATOMIC_XOR + { 3, 0 }, // ATOMIC_CMP_STORE + { 3, 0 }, // ATOMIC_IADD + { 3, 0 }, // ATOMIC_IMAX + { 3, 0 }, // ATOMIC_IMIN + { 3, 0 }, // ATOMIC_UMAX + { 3, 0 }, // ATOMIC_UMIN + { 2, 0 }, // IMM_ATOMIC_ALLOC + { 2, 0 }, // IMM_ATOMIC_CONSUME + { 0, 0 }, // IMM_ATOMIC_IADD + { 0, 0 }, // IMM_ATOMIC_AND + { 0, 0 }, // IMM_ATOMIC_OR + { 0, 0 }, // IMM_ATOMIC_XOR + { 0, 0 }, // IMM_ATOMIC_EXCH + { 0, 0 }, // IMM_ATOMIC_CMP_EXCH + { 0, 0 }, // IMM_ATOMIC_IMAX + { 0, 0 }, // IMM_ATOMIC_IMIN + { 0, 0 }, // IMM_ATOMIC_UMAX + { 0, 0 }, // IMM_ATOMIC_UMIN + { 0, 0 }, // SYNC + { 3, 0 }, // DADD + { 3, 0 }, // DMAX + { 3, 0 }, // DMIN + { 3, 0 }, // DMUL + { 3, 0 }, // DEQ + { 3, 0 }, // DGE + { 3, 0 }, // DLT + { 3, 0 }, // DNE + { 2, 0 }, // DMOV + { 4, 0 }, // DMOVC + { 0, 0 }, // DTOF + { 0, 0 }, // FTOD + { 3, 0 }, // EVAL_SNAPPED + { 3, 0 }, // EVAL_SAMPLE_INDEX + { 2, 0 }, // EVAL_CENTROID + { 0, 1 }, // DCL_GS_INSTANCE_COUNT + { 0, 0 }, // ABORT + { 0, 0 }, // DEBUG_BREAK + + { 0, 0 }, // InstrD3D11 + { 0, 0 }, // DDIV + { 0, 0 }, // DFMA + { 0, 0 }, // DRCP + { 0, 0 }, // MSAD + { 0, 0 }, // DTOI + { 0, 0 }, // DTOU + { 0, 0 }, // ITOD + { 0, 0 }, // UTOD + }; + BX_STATIC_ASSERT(BX_COUNTOF(s_dxbcOpcodeInfo) == DxbcOpcode::Count); + + static const char* s_dxbcOpcode[] = + { + "add", + "and", + "break", + "breakc", + "call", + "callc", + "case", + "continue", + "continuec", + "cut", + "default", + "deriv_rtx", + "deriv_rty", + "discard", + "div", + "dp2", + "dp3", + "dp4", + "else", + "emit", + "emitthencut", + "endif", + "endloop", + "endswitch", + "eq", + "exp", + "frc", + "ftoi", + "ftou", + "ge", + "iadd", + "if", + "ieq", + "ige", + "ilt", + "imad", + "imax", + "imin", + "imul", + "ine", + "ineg", + "ishl", + "ishr", + "itof", + "label", + "ld", + "ld_ms", + "log", + "loop", + "lt", + "mad", + "min", + "max", + "customdata", + "mov", + "movc", + "mul", + "ne", + "nop", + "not", + "or", + "resinfo", + "ret", + "retc", + "round_ne", + "round_ni", + "round_pi", + "round_z", + "rsq", + "sample", + "sample_c", + "sample_c_lz", + "sample_l", + "sample_d", + "sample_b", + "sqrt", + "switch", + "sincos", + "udiv", + "ult", + "uge", + "umul", + "umad", + "umax", + "umin", + "ushr", + "utof", + "xor", + "dcl_resource", + "dcl_constantbuffer", + "dcl_sampler", + "dcl_index_range", + "dcl_gs_output_primitive_topology", + "dcl_gs_input_primitive", + "dcl_max_output_vertex_count", + "dcl_input", + "dcl_input_sgv", + "dcl_input_siv", + "dcl_input_ps", + "dcl_input_ps_sgv", + "dcl_input_ps_siv", + "dcl_output", + "dcl_output_sgv", + "dcl_output_siv", + "dcl_temps", + "dcl_indexable_temp", + "dcl_global_flags", + + NULL, + "lod", + "gather4", + "sample_pos", + "sample_info", + + NULL, + "hs_decls", + "hs_control_point_phase", + "hs_fork_phase", + "hs_join_phase", + "emit_stream", + "cut_stream", + "emitthencut_stream", + "interface_call", + "bufinfo", + "deriv_rtx_coarse", + "deriv_rtx_fine", + "deriv_rty_coarse", + "deriv_rty_fine", + "gather4_c", + "gather4_po", + "gather4_po_c", + "rcp", + "f32tof16", + "f16tof32", + "uaddc", + "usubb", + "countbits", + "firstbit_hi", + "firstbit_lo", + "firstbit_shi", + "ubfe", + "ibfe", + "bfi", + "bfrev", + "swapc", + "dcl_stream", + "dcl_function_body", + "dcl_function_table", + "dcl_interface", + "dcl_input_control_point_count", + "dcl_output_control_point_count", + "dcl_tess_domain", + "dcl_tess_partitioning", + "dcl_tess_output_primitive", + "dcl_hs_max_tessfactor", + "dcl_hs_fork_phase_instance_count", + "dcl_hs_join_phase_instance_count", + "dcl_thread_group", + "dcl_unordered_access_view_typed", + "dcl_unordered_access_view_raw", + "dcl_unordered_access_view_structured", + "dcl_thread_group_shared_memory_raw", + "dcl_thread_group_shared_memory_structured", + "dcl_resource_raw", + "dcl_resource_structured", + "ld_uav_typed", + "store_uav_typed", + "ld_raw", + "store_raw", + "ld_structured", + "store_structured", + "atomic_and", + "atomic_or", + "atomic_xor", + "atomic_cmp_store", + "atomic_iadd", + "atomic_imax", + "atomic_imin", + "atomic_umax", + "atomic_umin", + "imm_atomic_alloc", + "imm_atomic_consume", + "imm_atomic_iadd", + "imm_atomic_and", + "imm_atomic_or", + "imm_atomic_xor", + "imm_atomic_exch", + "imm_atomic_cmp_exch", + "imm_atomic_imax", + "imm_atomic_imin", + "imm_atomic_umax", + "imm_atomic_umin", + "sync", + "dadd", + "dmax", + "dmin", + "dmul", + "deq", + "dge", + "dlt", + "dne", + "dmov", + "dmovc", + "dtof", + "ftod", + "eval_snapped", + "eval_sample_index", + "eval_centroid", + "dcl_gs_instance_count", + "abort", + "debug_break", + + NULL, + "ddiv", + "dfma", + "drcp", + "msad", + "dtoi", + "dtou", + "itod", + "utod", + }; + BX_STATIC_ASSERT(BX_COUNTOF(s_dxbcOpcode) == DxbcOpcode::Count); + + const char* getName(DxbcOpcode::Enum _opcode) + { + BX_CHECK(_opcode < DxbcOpcode::Count, "Unknown opcode id %d.", _opcode); + return s_dxbcOpcode[_opcode]; + } + + static const char* s_dxbcSrvType[] = + { + "", // Unknown + "Buffer", // Buffer + "Texture1D", // Texture1D + "Texture2D", // Texture2D + "Texture2DMS", // Texture2DMS + "Texture3D", // Texture3D + "TextureCube", // TextureCube + "Texture1DArray", // Texture1DArray + "Texture2DArray", // Texture2DArray + "Texture2DMSArray", // Texture2DMSArray + "TextureCubearray", // TextureCubearray + "RawBuffer", // RawBuffer + "StructuredBuffer", // StructuredBuffer + }; + BX_STATIC_ASSERT(BX_COUNTOF(s_dxbcSrvType) == DxbcResourceDim::Count); + + static const char* s_dxbcInterpolationName[] = + { + "", + "constant", + "linear", + "linear centroid", + "linear noperspective", + "linear noperspective centroid", + "linear sample", + "linear noperspective sample", + }; + BX_STATIC_ASSERT(BX_COUNTOF(s_dxbcInterpolationName) == DxbcInterpolation::Count); + + // mesa/src/gallium/state_trackers/d3d1x/d3d1xshader/defs/shortfiles.txt + static const char* s_dxbcOperandType[] = + { + "r", // Temp + "v", // Input + "o", // Output + "x", // TempArray + "l", // Imm32 + "d", // Imm64 + "s", // Sampler + "t", // Resource + "cb", // ConstantBuffer + "icb", // ImmConstantBuffer + "label", // Label + "vPrim", // PrimitiveID + "oDepth", // OutputDepth + "null", // Null + "rasterizer", // Rasterizer + "oMask", // CoverageMask + "stream", // Stream + "function_body", // FunctionBody + "function_table", // FunctionTable + "interface", // Interface + "function_input", // FunctionInput + "function_output", // FunctionOutput + "vOutputControlPointID", // OutputControlPointId + "vForkInstanceID", // InputForkInstanceId + "vJoinInstanceID", // InputJoinInstanceId + "vicp", // InputControlPoint + "vocp", // OutputControlPoint + "vpc", // InputPatchConstant + "vDomain", // InputDomainPoint + "this", // ThisPointer + "u", // UnorderedAccessView + "g", // ThreadGroupSharedMemory + "vThreadID", // InputThreadId + "vThreadGrouID", // InputThreadGroupId + "vThreadIDInGroup", // InputThreadIdInGroup + "vCoverage", // InputCoverageMask + "vThreadIDInGroupFlattened", // InputThreadIdInGroupFlattened + "vGSInstanceID", // InputGsInstanceId + "oDepthGE", // OutputDepthGreaterEqual + "oDepthLE", // OutputDepthLessEqual + "vCycleCounter", // CycleCounter + }; + BX_STATIC_ASSERT(BX_COUNTOF(s_dxbcOperandType) == DxbcOperandType::Count); + +#define DXBC_MAX_NAME_STRING 512 + + int32_t readString(bx::ReaderSeekerI* _reader, int64_t _offset, char* _out, uint32_t _max = DXBC_MAX_NAME_STRING) + { + int64_t oldOffset = bx::seek(_reader); + bx::seek(_reader, _offset, bx::Whence::Begin); + + int32_t size = 0; + + for (uint32_t ii = 0; ii < _max-1; ++ii) + { + char ch; + size += bx::read(_reader, ch); + *_out++ = ch; + + if ('\0' == ch) + { + break; + } + } + *_out = '\0'; + + bx::seek(_reader, oldOffset, bx::Whence::Begin); + + return size; + } + + inline uint32_t dxbcMixF(uint32_t _b, uint32_t _c, uint32_t _d) + { + const uint32_t tmp0 = bx::uint32_xor(_c, _d); + const uint32_t tmp1 = bx::uint32_and(_b, tmp0); + const uint32_t result = bx::uint32_xor(_d, tmp1); + + return result; + } + + inline uint32_t dxbcMixG(uint32_t _b, uint32_t _c, uint32_t _d) + { + return dxbcMixF(_d, _b, _c); + } + + inline uint32_t dxbcMixH(uint32_t _b, uint32_t _c, uint32_t _d) + { + const uint32_t tmp0 = bx::uint32_xor(_b, _c); + const uint32_t result = bx::uint32_xor(_d, tmp0); + + return result; + } + + inline uint32_t dxbcMixI(uint32_t _b, uint32_t _c, uint32_t _d) + { + const uint32_t tmp0 = bx::uint32_orc(_b, _d); + const uint32_t result = bx::uint32_xor(_c, tmp0); + + return result; + } + + void dxbcHashBlock(const uint32_t* data, uint32_t* hash) + { + const uint32_t d0 = data[ 0]; + const uint32_t d1 = data[ 1]; + const uint32_t d2 = data[ 2]; + const uint32_t d3 = data[ 3]; + const uint32_t d4 = data[ 4]; + const uint32_t d5 = data[ 5]; + const uint32_t d6 = data[ 6]; + const uint32_t d7 = data[ 7]; + const uint32_t d8 = data[ 8]; + const uint32_t d9 = data[ 9]; + const uint32_t d10 = data[10]; + const uint32_t d11 = data[11]; + const uint32_t d12 = data[12]; + const uint32_t d13 = data[13]; + const uint32_t d14 = data[14]; + const uint32_t d15 = data[15]; + + uint32_t aa = hash[0]; + uint32_t bb = hash[1]; + uint32_t cc = hash[2]; + uint32_t dd = hash[3]; + + aa = bb + bx::uint32_rol(aa + dxbcMixF(bb, cc, dd) + d0 + 0xd76aa478, 7); + dd = aa + bx::uint32_rol(dd + dxbcMixF(aa, bb, cc) + d1 + 0xe8c7b756, 12); + cc = dd + bx::uint32_ror(cc + dxbcMixF(dd, aa, bb) + d2 + 0x242070db, 15); + bb = cc + bx::uint32_ror(bb + dxbcMixF(cc, dd, aa) + d3 + 0xc1bdceee, 10); + aa = bb + bx::uint32_rol(aa + dxbcMixF(bb, cc, dd) + d4 + 0xf57c0faf, 7); + dd = aa + bx::uint32_rol(dd + dxbcMixF(aa, bb, cc) + d5 + 0x4787c62a, 12); + cc = dd + bx::uint32_ror(cc + dxbcMixF(dd, aa, bb) + d6 + 0xa8304613, 15); + bb = cc + bx::uint32_ror(bb + dxbcMixF(cc, dd, aa) + d7 + 0xfd469501, 10); + aa = bb + bx::uint32_rol(aa + dxbcMixF(bb, cc, dd) + d8 + 0x698098d8, 7); + dd = aa + bx::uint32_rol(dd + dxbcMixF(aa, bb, cc) + d9 + 0x8b44f7af, 12); + cc = dd + bx::uint32_ror(cc + dxbcMixF(dd, aa, bb) + d10 + 0xffff5bb1, 15); + bb = cc + bx::uint32_ror(bb + dxbcMixF(cc, dd, aa) + d11 + 0x895cd7be, 10); + aa = bb + bx::uint32_rol(aa + dxbcMixF(bb, cc, dd) + d12 + 0x6b901122, 7); + dd = aa + bx::uint32_rol(dd + dxbcMixF(aa, bb, cc) + d13 + 0xfd987193, 12); + cc = dd + bx::uint32_ror(cc + dxbcMixF(dd, aa, bb) + d14 + 0xa679438e, 15); + bb = cc + bx::uint32_ror(bb + dxbcMixF(cc, dd, aa) + d15 + 0x49b40821, 10); + + aa = bb + bx::uint32_rol(aa + dxbcMixG(bb, cc, dd) + d1 + 0xf61e2562, 5); + dd = aa + bx::uint32_rol(dd + dxbcMixG(aa, bb, cc) + d6 + 0xc040b340, 9); + cc = dd + bx::uint32_rol(cc + dxbcMixG(dd, aa, bb) + d11 + 0x265e5a51, 14); + bb = cc + bx::uint32_ror(bb + dxbcMixG(cc, dd, aa) + d0 + 0xe9b6c7aa, 12); + aa = bb + bx::uint32_rol(aa + dxbcMixG(bb, cc, dd) + d5 + 0xd62f105d, 5); + dd = aa + bx::uint32_rol(dd + dxbcMixG(aa, bb, cc) + d10 + 0x02441453, 9); + cc = dd + bx::uint32_rol(cc + dxbcMixG(dd, aa, bb) + d15 + 0xd8a1e681, 14); + bb = cc + bx::uint32_ror(bb + dxbcMixG(cc, dd, aa) + d4 + 0xe7d3fbc8, 12); + aa = bb + bx::uint32_rol(aa + dxbcMixG(bb, cc, dd) + d9 + 0x21e1cde6, 5); + dd = aa + bx::uint32_rol(dd + dxbcMixG(aa, bb, cc) + d14 + 0xc33707d6, 9); + cc = dd + bx::uint32_rol(cc + dxbcMixG(dd, aa, bb) + d3 + 0xf4d50d87, 14); + bb = cc + bx::uint32_ror(bb + dxbcMixG(cc, dd, aa) + d8 + 0x455a14ed, 12); + aa = bb + bx::uint32_rol(aa + dxbcMixG(bb, cc, dd) + d13 + 0xa9e3e905, 5); + dd = aa + bx::uint32_rol(dd + dxbcMixG(aa, bb, cc) + d2 + 0xfcefa3f8, 9); + cc = dd + bx::uint32_rol(cc + dxbcMixG(dd, aa, bb) + d7 + 0x676f02d9, 14); + bb = cc + bx::uint32_ror(bb + dxbcMixG(cc, dd, aa) + d12 + 0x8d2a4c8a, 12); + + aa = bb + bx::uint32_rol(aa + dxbcMixH(bb, cc, dd) + d5 + 0xfffa3942, 4); + dd = aa + bx::uint32_rol(dd + dxbcMixH(aa, bb, cc) + d8 + 0x8771f681, 11); + cc = dd + bx::uint32_rol(cc + dxbcMixH(dd, aa, bb) + d11 + 0x6d9d6122, 16); + bb = cc + bx::uint32_ror(bb + dxbcMixH(cc, dd, aa) + d14 + 0xfde5380c, 9); + aa = bb + bx::uint32_rol(aa + dxbcMixH(bb, cc, dd) + d1 + 0xa4beea44, 4); + dd = aa + bx::uint32_rol(dd + dxbcMixH(aa, bb, cc) + d4 + 0x4bdecfa9, 11); + cc = dd + bx::uint32_rol(cc + dxbcMixH(dd, aa, bb) + d7 + 0xf6bb4b60, 16); + bb = cc + bx::uint32_ror(bb + dxbcMixH(cc, dd, aa) + d10 + 0xbebfbc70, 9); + aa = bb + bx::uint32_rol(aa + dxbcMixH(bb, cc, dd) + d13 + 0x289b7ec6, 4); + dd = aa + bx::uint32_rol(dd + dxbcMixH(aa, bb, cc) + d0 + 0xeaa127fa, 11); + cc = dd + bx::uint32_rol(cc + dxbcMixH(dd, aa, bb) + d3 + 0xd4ef3085, 16); + bb = cc + bx::uint32_ror(bb + dxbcMixH(cc, dd, aa) + d6 + 0x04881d05, 9); + aa = bb + bx::uint32_rol(aa + dxbcMixH(bb, cc, dd) + d9 + 0xd9d4d039, 4); + dd = aa + bx::uint32_rol(dd + dxbcMixH(aa, bb, cc) + d12 + 0xe6db99e5, 11); + cc = dd + bx::uint32_rol(cc + dxbcMixH(dd, aa, bb) + d15 + 0x1fa27cf8, 16); + bb = cc + bx::uint32_ror(bb + dxbcMixH(cc, dd, aa) + d2 + 0xc4ac5665, 9); + + aa = bb + bx::uint32_rol(aa + dxbcMixI(bb, cc, dd) + d0 + 0xf4292244, 6); + dd = aa + bx::uint32_rol(dd + dxbcMixI(aa, bb, cc) + d7 + 0x432aff97, 10); + cc = dd + bx::uint32_rol(cc + dxbcMixI(dd, aa, bb) + d14 + 0xab9423a7, 15); + bb = cc + bx::uint32_ror(bb + dxbcMixI(cc, dd, aa) + d5 + 0xfc93a039, 11); + aa = bb + bx::uint32_rol(aa + dxbcMixI(bb, cc, dd) + d12 + 0x655b59c3, 6); + dd = aa + bx::uint32_rol(dd + dxbcMixI(aa, bb, cc) + d3 + 0x8f0ccc92, 10); + cc = dd + bx::uint32_rol(cc + dxbcMixI(dd, aa, bb) + d10 + 0xffeff47d, 15); + bb = cc + bx::uint32_ror(bb + dxbcMixI(cc, dd, aa) + d1 + 0x85845dd1, 11); + aa = bb + bx::uint32_rol(aa + dxbcMixI(bb, cc, dd) + d8 + 0x6fa87e4f, 6); + dd = aa + bx::uint32_rol(dd + dxbcMixI(aa, bb, cc) + d15 + 0xfe2ce6e0, 10); + cc = dd + bx::uint32_rol(cc + dxbcMixI(dd, aa, bb) + d6 + 0xa3014314, 15); + bb = cc + bx::uint32_ror(bb + dxbcMixI(cc, dd, aa) + d13 + 0x4e0811a1, 11); + aa = bb + bx::uint32_rol(aa + dxbcMixI(bb, cc, dd) + d4 + 0xf7537e82, 6); + dd = aa + bx::uint32_rol(dd + dxbcMixI(aa, bb, cc) + d11 + 0xbd3af235, 10); + cc = dd + bx::uint32_rol(cc + dxbcMixI(dd, aa, bb) + d2 + 0x2ad7d2bb, 15); + bb = cc + bx::uint32_ror(bb + dxbcMixI(cc, dd, aa) + d9 + 0xeb86d391, 11); + + hash[0] += aa; + hash[1] += bb; + hash[2] += cc; + hash[3] += dd; + } + + // dxbc hash function is slightly modified version of MD5 hash. + // https://tools.ietf.org/html/rfc1321 + // http://www.efgh.com/software/md5.txt + // + // Assumption is that data pointer, size are both 4-byte aligned, + // and little endian. + // + void dxbcHash(const void* _data, uint32_t _size, void* _digest) + { + uint32_t hash[4] = + { + 0x67452301, + 0xefcdab89, + 0x98badcfe, + 0x10325476, + }; + + const uint32_t* data = (const uint32_t*)_data; + for (uint32_t ii = 0, num = _size/64; ii < num; ++ii) + { + dxbcHashBlock(data, hash); + data += 16; + } + + uint32_t last[16]; + memset(last, 0, sizeof(last) ); + + const uint32_t remaining = _size & 0x3f; + + if (remaining >= 56) + { + memcpy(&last[0], data, remaining); + last[remaining/4] = 0x80; + dxbcHashBlock(last, hash); + + memset(&last[1], 0, 56); + } + else + { + memcpy(&last[1], data, remaining); + last[1 + remaining/4] = 0x80; + } + + last[ 0] = _size * 8; + last[15] = _size * 2 + 1; + dxbcHashBlock(last, hash); + + memcpy(_digest, hash, 16); + } + + int32_t read(bx::ReaderI* _reader, DxbcSubOperand& _subOperand) + { + uint32_t token; + int32_t size = 0; + + // 0 1 2 3 + // 76543210765432107654321076543210 + // e222111000nnttttttttssssssssmmoo + // ^^ ^ ^ ^ ^ ^ ^ ^-- number of operands + // || | | | | | +---- operand mode + // || | | | | +------------ operand mode bits + // || | | | +-------------------- type + // || | | +---------------------- number of addressing modes + // || | +------------------------- addressing mode 0 + // || +---------------------------- addressing mode 1 + // |+------------------------------- addressing mode 2 + // +-------------------------------- extended + + size += bx::read(_reader, token); + _subOperand.type = DxbcOperandType::Enum( (token & UINT32_C(0x000ff000) ) >> 12); + _subOperand.numAddrModes = uint8_t( (token & UINT32_C(0x00300000) ) >> 20); + _subOperand.addrMode = uint8_t( (token & UINT32_C(0x01c00000) ) >> 22); + _subOperand.mode = DxbcOperandMode::Enum( (token & UINT32_C(0x0000000c) ) >> 2); + _subOperand.modeBits = uint8_t( (token & UINT32_C(0x00000ff0) ) >> 4) & "\x0f\xff\x03\x00"[_subOperand.mode]; + _subOperand.num = uint8_t( (token & UINT32_C(0x00000003) ) ); + + switch (_subOperand.addrMode) + { + case DxbcOperandAddrMode::Imm32: + size += bx::read(_reader, _subOperand.regIndex); + break; + + case DxbcOperandAddrMode::Reg: + { + DxbcSubOperand subOperand; + size += read(_reader, subOperand); + } + break; + + case DxbcOperandAddrMode::RegImm32: + { + size += bx::read(_reader, _subOperand.regIndex); + + DxbcSubOperand subOperand; + size += read(_reader, subOperand); + } + break; + + case DxbcOperandAddrMode::RegImm64: + { + size += bx::read(_reader, _subOperand.regIndex); + size += bx::read(_reader, _subOperand.regIndex); + + DxbcSubOperand subOperand; + size += read(_reader, subOperand); + } + break; + + default: + BX_CHECK(false, "sub operand addressing mode %d", _subOperand.addrMode); + break; + } + + return size; + } + + int32_t write(bx::WriterI* _writer, const DxbcSubOperand& _subOperand) + { + int32_t size = 0; + + uint32_t token = 0; + token |= (_subOperand.type << 12) & UINT32_C(0x000ff000); + token |= (_subOperand.numAddrModes << 20) & UINT32_C(0x00300000); + token |= (_subOperand.addrMode << 22) & UINT32_C(0x01c00000); + token |= (_subOperand.mode << 2) & UINT32_C(0x0000000c); + token |= (_subOperand.modeBits << 4) & UINT32_C(0x00000ff0); + token |= _subOperand.num & UINT32_C(0x00000003); + size += bx::write(_writer, token); + + switch (_subOperand.addrMode) + { + case DxbcOperandAddrMode::Imm32: + size += bx::write(_writer, _subOperand.regIndex); + break; + + case DxbcOperandAddrMode::Reg: + { + DxbcSubOperand subOperand; + size += write(_writer, subOperand); + } + break; + + case DxbcOperandAddrMode::RegImm32: + { + size += bx::write(_writer, _subOperand.regIndex); + + DxbcSubOperand subOperand; + size += write(_writer, subOperand); + } + break; + + case DxbcOperandAddrMode::RegImm64: + { + size += bx::write(_writer, _subOperand.regIndex); + size += bx::write(_writer, _subOperand.regIndex); + + DxbcSubOperand subOperand; + size += write(_writer, subOperand); + } + break; + + default: + BX_CHECK(false, "sub operand addressing mode %d", _subOperand.addrMode); + break; + } + + return size; + } + + int32_t read(bx::ReaderI* _reader, DxbcOperand& _operand) + { + int32_t size = 0; + + uint32_t token; + size += bx::read(_reader, token); + + // 0 1 2 3 + // 76543210765432107654321076543210 + // e222111000nnttttttttssssssssmmoo + // ^^ ^ ^ ^ ^ ^ ^ ^-- number of operands + // || | | | | | +---- operand mode + // || | | | | +------------ operand mode bits + // || | | | +-------------------- type + // || | | +---------------------- number of addressing modes + // || | +------------------------- addressing mode 0 + // || +---------------------------- addressing mode 1 + // |+------------------------------- addressing mode 2 + // +-------------------------------- extended + + _operand.extended = 0 != (token & UINT32_C(0x80000000) ); + _operand.numAddrModes = uint8_t( (token & UINT32_C(0x00300000) ) >> 20); + _operand.addrMode[0] = uint8_t( (token & UINT32_C(0x01c00000) ) >> 22); + _operand.addrMode[1] = uint8_t( (token & UINT32_C(0x0e000000) ) >> 25); + _operand.addrMode[2] = uint8_t( (token & UINT32_C(0x70000000) ) >> 28); + _operand.type = DxbcOperandType::Enum( (token & UINT32_C(0x000ff000) ) >> 12); + _operand.mode = DxbcOperandMode::Enum( (token & UINT32_C(0x0000000c) ) >> 2); + _operand.modeBits = uint8_t( (token & UINT32_C(0x00000ff0) ) >> 4) & "\x0f\xff\x03\x00"[_operand.mode]; + _operand.num = uint8_t( (token & UINT32_C(0x00000003) ) ); + + if (_operand.extended) + { + size += bx::read(_reader, _operand.extBits); + } + + switch (_operand.type) + { + case DxbcOperandType::Imm32: + _operand.num = 2 == _operand.num ? 4 : _operand.num; + for (uint32_t ii = 0; ii < _operand.num; ++ii) + { + size += bx::read(_reader, _operand.un.imm32[ii]); + } + break; + + case DxbcOperandType::Imm64: + _operand.num = 2 == _operand.num ? 4 : _operand.num; + for (uint32_t ii = 0; ii < _operand.num; ++ii) + { + size += bx::read(_reader, _operand.un.imm64[ii]); + } + break; + + default: + break; + } + + for (uint32_t ii = 0; ii < _operand.numAddrModes; ++ii) + { + switch (_operand.addrMode[ii]) + { + case DxbcOperandAddrMode::Imm32: + size += bx::read(_reader, _operand.regIndex[ii]); + break; + + case DxbcOperandAddrMode::Reg: + size += read(_reader, _operand.subOperand[ii]); + break; + + case DxbcOperandAddrMode::RegImm32: + size += bx::read(_reader, _operand.regIndex[ii]); + size += read(_reader, _operand.subOperand[ii]); + break; + + default: + BX_CHECK(false, "operand %d addressing mode %d", ii, _operand.addrMode[ii]); + break; + } + } + + return size; + } + + int32_t write(bx::WriterI* _writer, const DxbcOperand& _operand) + { + int32_t size = 0; + + uint32_t token = 0; + token |= _operand.extended ? UINT32_C(0x80000000) : 0; + token |= (_operand.numAddrModes << 20) & UINT32_C(0x00300000); + token |= (_operand.addrMode[0] << 22) & UINT32_C(0x01c00000); + token |= (_operand.addrMode[1] << 25) & UINT32_C(0x0e000000); + token |= (_operand.addrMode[2] << 28) & UINT32_C(0x70000000); + token |= (_operand.type << 12) & UINT32_C(0x000ff000); + token |= (_operand.mode << 2) & UINT32_C(0x0000000c); + + token |= (4 == _operand.num ? 2 : _operand.num) & UINT32_C(0x00000003); + token |= ( (_operand.modeBits & "\x0f\xff\x03\x00"[_operand.mode]) << 4) & UINT32_C(0x00000ff0); + + size += bx::write(_writer, token); + + if (_operand.extended) + { + size += bx::write(_writer, _operand.extBits); + } + + switch (_operand.type) + { + case DxbcOperandType::Imm32: + for (uint32_t ii = 0; ii < _operand.num; ++ii) + { + size += bx::write(_writer, _operand.un.imm32[ii]); + } + break; + + case DxbcOperandType::Imm64: + for (uint32_t ii = 0; ii < _operand.num; ++ii) + { + size += bx::write(_writer, _operand.un.imm64[ii]); + } + break; + + default: + break; + } + + for (uint32_t ii = 0; ii < _operand.numAddrModes; ++ii) + { + switch (_operand.addrMode[ii]) + { + case DxbcOperandAddrMode::Imm32: + size += bx::write(_writer, _operand.regIndex[ii]); + break; + + case DxbcOperandAddrMode::Reg: + size += write(_writer, _operand.subOperand[ii]); + break; + + case DxbcOperandAddrMode::RegImm32: + size += bx::write(_writer, _operand.regIndex[ii]); + size += write(_writer, _operand.subOperand[ii]); + break; + + default: + BX_CHECK(false, "operand %d addressing mode %d", ii, _operand.addrMode[ii]); + break; + } + } + + return size; + } + + int32_t read(bx::ReaderI* _reader, DxbcInstruction& _instruction) + { + uint32_t size = 0; + + uint32_t token; + size += bx::read(_reader, token); + + // 0 1 2 3 + // 76543210765432107654321076543210 + // elllllll.............ooooooooooo + // ^^ ^----------- opcode + // |+------------------------------- length + // +-------------------------------- extended + + _instruction.opcode = DxbcOpcode::Enum( (token & UINT32_C(0x000007ff) ) ); + _instruction.length = uint8_t( (token & UINT32_C(0x7f000000) ) >> 24); + bool extended = 0 != (token & UINT32_C(0x80000000) ); + + _instruction.srv = DxbcResourceDim::Unknown; + _instruction.samples = 0; + + _instruction.shadow = false; + _instruction.mono = false; + + _instruction.allowRefactoring = false; + _instruction.fp64 = false; + _instruction.earlyDepth = false; + _instruction.enableBuffers = false; + _instruction.skipOptimization = false; + _instruction.enableMinPrecision = false; + _instruction.enableDoubleExtensions = false; + _instruction.enableShaderExtensions = false; + + _instruction.threadsInGroup = false; + _instruction.sharedMemory = false; + _instruction.uavGroup = false; + _instruction.uavGlobal = false; + + _instruction.saturate = false; + _instruction.testNZ = false; + _instruction.retType = DxbcResourceReturnType::Unused; + + switch (_instruction.opcode) + { + case DxbcOpcode::CUSTOMDATA: + { +// uint32_t dataClass; + size += bx::read(_reader, _instruction.length); + for (uint32_t ii = 0, num = (_instruction.length-2)/4; ii < num; ++ii) + { + char temp[16]; + size += bx::read(_reader, temp, 16); + } + + } + return size; + + case DxbcOpcode::DCL_CONSTANT_BUFFER: + // 0 1 2 3 + // 76543210765432107654321076543210 + // ........ a........... + // ^------------ Allow refactoring + + _instruction.allowRefactoring = 0 != (token & UINT32_C(0x00000800) ); + break; + + case DxbcOpcode::DCL_GLOBAL_FLAGS: + // 0 1 2 3 + // 76543210765432107654321076543210 + // ........ sxmoudfa........... + // ^^^^^^^^------------ Allow refactoring + // ||||||+------------- FP64 + // |||||+-------------- Force early depth/stencil + // ||||+--------------- Enable raw and structured buffers + // |||+---------------- Skip optimizations + // ||+----------------- Enable minimum precision + // |+------------------ Enable double extension + // +------------------- Enable shader extension + + _instruction.allowRefactoring = 0 != (token & UINT32_C(0x00000800) ); + _instruction.fp64 = 0 != (token & UINT32_C(0x00001000) ); + _instruction.earlyDepth = 0 != (token & UINT32_C(0x00002000) ); + _instruction.enableBuffers = 0 != (token & UINT32_C(0x00004000) ); + _instruction.skipOptimization = 0 != (token & UINT32_C(0x00008000) ); + _instruction.enableMinPrecision = 0 != (token & UINT32_C(0x00010000) ); + _instruction.enableDoubleExtensions = 0 != (token & UINT32_C(0x00020000) ); + _instruction.enableShaderExtensions = 0 != (token & UINT32_C(0x00040000) ); + break; + + case DxbcOpcode::DCL_INPUT_PS: + // 0 1 2 3 + // 76543210765432107654321076543210 + // ........ iiiii........... + // ^---------------- Interploation + + _instruction.interpolation = DxbcInterpolation::Enum( (token & UINT32_C(0x0000f800) ) >> 11); + break; + + case DxbcOpcode::DCL_RESOURCE: + // 0 1 2 3 + // 76543210765432107654321076543210 + // ........ sssssssrrrrr........... + // ^ ^---------------- SRV + // +----------------------- MSAA samples + + _instruction.srv = DxbcResourceDim::Enum( (token & UINT32_C(0x0000f800) ) >> 11); + _instruction.samples = uint8_t( (token & UINT32_C(0x007f0000) ) >> 16); + break; + + case DxbcOpcode::DCL_SAMPLER: + // 0 1 2 3 + // 76543210765432107654321076543210 + // ........ ms........... + // ^^------------ Shadow sampler + // +------------- Mono + + _instruction.shadow = 0 != (token & UINT32_C(0x00000800) ); + _instruction.mono = 0 != (token & UINT32_C(0x00001000) ); + break; + + case DxbcOpcode::SYNC: + // 0 1 2 3 + // 76543210765432107654321076543210 + // ........ gust........... + // ^^^^------------ Threads in group + // ||+------------- Shared memory + // |+-------------- UAV group + // +--------------- UAV global + + _instruction.threadsInGroup = 0 != (token & UINT32_C(0x00000800) ); + _instruction.sharedMemory = 0 != (token & UINT32_C(0x00001000) ); + _instruction.uavGroup = 0 != (token & UINT32_C(0x00002000) ); + _instruction.uavGlobal = 0 != (token & UINT32_C(0x00004000) ); + break; + + default: + // 0 1 2 3 + // 76543210765432107654321076543210 + // ........ ppppn stt........... + // ^ ^ ^^------------- Resource info return type + // | | +-------------- Saturate + // | +------------------- Test not zero + // +----------------------- Precise mask + + _instruction.retType = DxbcResourceReturnType::Enum( (token & UINT32_C(0x00001800) ) >> 11); + _instruction.saturate = 0 != (token & UINT32_C(0x00002000) ); + _instruction.testNZ = 0 != (token & UINT32_C(0x00040000) ); +// _instruction.precise = uint8_t( (token & UINT32_C(0x00780000) ) >> 19); + break; + } + + _instruction.extended[0] = DxbcInstruction::ExtendedType::Count; + for (uint32_t ii = 0; extended; ++ii) + { + // 0 1 2 3 + // 76543210765432107654321076543210 + // e..........................ttttt + // ^ ^ + // | +----- type + // +-------------------------------- extended + + uint32_t extBits; + size += bx::read(_reader, extBits); + extended = 0 != (extBits & UINT32_C(0x80000000) ); + _instruction.extended[ii] = DxbcInstruction::ExtendedType::Enum(extBits & UINT32_C(0x0000001f) ); + + switch (_instruction.extended[ii]) + { + case DxbcInstruction::ExtendedType::SampleControls: + // 0 1 2 3 + // 76543210765432107654321076543210 + // . zzzzyyyyxxxx ..... + // ^ ^ ^ + // | | +------------- x + // | +----------------- y + // +--------------------- z + + _instruction.sampleOffsets[0] = (extBits & UINT32_C(0x00001e00) ) >> 9; + _instruction.sampleOffsets[1] = (extBits & UINT32_C(0x0001e000) ) >> 13; + _instruction.sampleOffsets[2] = (extBits & UINT32_C(0x001e0000) ) >> 17; + break; + + case DxbcInstruction::ExtendedType::ResourceDim: + // 0 1 2 3 + // 76543210765432107654321076543210 + // . ..... + // + + _instruction.resourceTarget = DxbcResourceDim::Enum( (extBits & UINT32_C(0x000003e0) ) >> 6); + _instruction.resourceStride = (extBits & UINT32_C(0x0000f800) ) >> 11; + break; + + case DxbcInstruction::ExtendedType::ResourceReturnType: + // 0 1 2 3 + // 76543210765432107654321076543210 + // . 3333222211110000..... + // ^ ^ ^ + // | | +------------- x + // | +----------------- y + // +--------------------- z + + _instruction.resourceReturnTypes[0] = DxbcResourceReturnType::Enum( (extBits & UINT32_C(0x000001e0) ) >> 6); + _instruction.resourceReturnTypes[1] = DxbcResourceReturnType::Enum( (extBits & UINT32_C(0x00001e00) ) >> 9); + _instruction.resourceReturnTypes[2] = DxbcResourceReturnType::Enum( (extBits & UINT32_C(0x0001e000) ) >> 13); + _instruction.resourceReturnTypes[3] = DxbcResourceReturnType::Enum( (extBits & UINT32_C(0x001e0000) ) >> 17); + break; + + default: + break; + } + } + + switch (_instruction.opcode) + { + case DxbcOpcode::DCL_FUNCTION_TABLE: + { + uint32_t tableId; + size += read(_reader, tableId); + + uint32_t num; + size += read(_reader, num); + + for (uint32_t ii = 0; ii < num; ++ii) + { + uint32_t bodyId; + size += read(_reader, bodyId); + } + } + break; + + case DxbcOpcode::DCL_INTERFACE: + { + uint32_t interfaceId; + size += read(_reader, interfaceId); + + uint32_t num; + size += read(_reader, num); + + BX_CHECK(false, "not implemented."); + } + break; + + default: + break; + }; + + uint32_t currOp = 0; + + const DxbcOpcodeInfo& info = s_dxbcOpcodeInfo[_instruction.opcode]; + _instruction.numOperands = info.numOperands; + switch (info.numOperands) + { + case 6: size += read(_reader, _instruction.operand[currOp++]); + case 5: size += read(_reader, _instruction.operand[currOp++]); + case 4: size += read(_reader, _instruction.operand[currOp++]); + case 3: size += read(_reader, _instruction.operand[currOp++]); + case 2: size += read(_reader, _instruction.operand[currOp++]); + case 1: size += read(_reader, _instruction.operand[currOp++]); + case 0: + if (0 < info.numValues) + { + size += read(_reader, _instruction.value, info.numValues*sizeof(uint32_t) ); + } + break; + + default: + BX_CHECK(false, "Instruction %s with invalid number of operands %d (numValues %d)." + , getName(_instruction.opcode) + , info.numOperands + , info.numValues + ); + break; + } + + return size; + } + + int32_t write(bx::WriterI* _writer, const DxbcInstruction& _instruction) + { + uint32_t token = 0; + token |= (_instruction.opcode ) & UINT32_C(0x000007ff); + token |= (_instruction.length << 24) & UINT32_C(0x7f000000); + + token |= DxbcInstruction::ExtendedType::Count != _instruction.extended[0] + ? UINT32_C(0x80000000) + : 0 + ; + + switch (_instruction.opcode) + { +// case DxbcOpcode::CUSTOMDATA: +// return size; + + case DxbcOpcode::DCL_CONSTANT_BUFFER: + token |= _instruction.allowRefactoring ? UINT32_C(0x00000800) : 0; + break; + + case DxbcOpcode::DCL_GLOBAL_FLAGS: + token |= _instruction.allowRefactoring ? UINT32_C(0x00000800) : 0; + token |= _instruction.fp64 ? UINT32_C(0x00001000) : 0; + token |= _instruction.earlyDepth ? UINT32_C(0x00002000) : 0; + token |= _instruction.enableBuffers ? UINT32_C(0x00004000) : 0; + token |= _instruction.skipOptimization ? UINT32_C(0x00008000) : 0; + token |= _instruction.enableMinPrecision ? UINT32_C(0x00010000) : 0; + token |= _instruction.enableDoubleExtensions ? UINT32_C(0x00020000) : 0; + token |= _instruction.enableShaderExtensions ? UINT32_C(0x00040000) : 0; + break; + + case DxbcOpcode::DCL_INPUT_PS: + token |= (_instruction.interpolation << 11) & UINT32_C(0x0000f800); + break; + + case DxbcOpcode::DCL_RESOURCE: + token |= (_instruction.srv << 11) & UINT32_C(0x0000f800); + token |= (_instruction.samples << 16) & UINT32_C(0x007f0000); + break; + + case DxbcOpcode::DCL_SAMPLER: + token |= _instruction.shadow ? (0x00000800) : 0; + token |= _instruction.mono ? (0x00001000) : 0; + break; + + case DxbcOpcode::SYNC: + token |= _instruction.threadsInGroup ? UINT32_C(0x00000800) : 0; + token |= _instruction.sharedMemory ? UINT32_C(0x00001000) : 0; + token |= _instruction.uavGroup ? UINT32_C(0x00002000) : 0; + token |= _instruction.uavGlobal ? UINT32_C(0x00004000) : 0; + break; + + default: + token |= (_instruction.retType << 11) & UINT32_C(0x00001800); + token |= _instruction.saturate ? UINT32_C(0x00002000) : 0; + token |= _instruction.testNZ ? UINT32_C(0x00040000) : 0; +// _instruction.precise = uint8_t( (token & UINT32_C(0x00780000) ) >> 19); + break; + } + + uint32_t size =0; + size += bx::write(_writer, token); + + for (uint32_t ii = 0; ii < _instruction.numOperands; ++ii) + { + size += write(_writer, _instruction.operand[ii]); + } + + const DxbcOpcodeInfo& info = s_dxbcOpcodeInfo[_instruction.opcode]; + if (0 < info.numValues) + { + size += bx::write(_writer, _instruction.value, info.numValues*sizeof(uint32_t) ); + } + + return size; + } + + int32_t toString(char* _out, int32_t _size, const DxbcInstruction& _instruction) + { + int32_t size = 0; + + size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size) + , "%s%s%s" + , getName(_instruction.opcode) + , _instruction.saturate ? "_sat" : "" + , _instruction.testNZ ? "_nz" : "" + ); + + if (DxbcResourceDim::Unknown != _instruction.srv) + { + size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size) + , " %s<%x>" + , s_dxbcSrvType[_instruction.srv] + , _instruction.value[0] + ); + } + else if (0 < s_dxbcOpcodeInfo[_instruction.opcode].numValues) + { + size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size) + , " %d" + , _instruction.value[0] + ); + } + + for (uint32_t ii = 0; ii < _instruction.numOperands; ++ii) + { + const DxbcOperand& operand = _instruction.operand[ii]; + + const bool array = false + || 1 < operand.numAddrModes + || DxbcOperandAddrMode::Imm32 != operand.addrMode[0] + ; + + size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size) + , "%s%s%s" + , 0 == ii ? " " : ", " + , operand.extended ? "*" : "" + , s_dxbcOperandType[operand.type] + ); + + switch (operand.type) + { + case DxbcOperandType::Imm32: + case DxbcOperandType::Imm64: + for (uint32_t jj = 0; jj < operand.num; ++jj) + { + union { uint32_t i; float f; } cast = { operand.un.imm32[jj] }; + size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size) + , "%s%f" + , 0 == jj ? "(" : ", " + , cast.f + ); + } + + size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size) + , ")" + ); + break; + + default: + break; + } + + const uint32_t first = DxbcOperandAddrMode::RegImm32 == operand.addrMode[0] ? 0 : 1; + if (0 == first) + { + size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size) + , "[" + ); + } + else + { + size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size) + , "%d%s" + , operand.regIndex[0] + , array ? "[" : "" + ); + } + + for (uint32_t jj = first; jj < operand.numAddrModes; ++jj) + { + switch (operand.addrMode[jj]) + { + case DxbcOperandAddrMode::Imm32: + size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size) + , "%d" + , operand.regIndex[jj] + ); + break; + + case DxbcOperandAddrMode::Reg: + size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size) + , "%s%d" + , s_dxbcOperandType[operand.subOperand[jj].type] + , operand.regIndex[jj] + ); + break; + + case DxbcOperandAddrMode::RegImm32: + size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size) + , "%d + %s%d" + , operand.regIndex[jj] + , s_dxbcOperandType[operand.subOperand[jj].type] + , operand.regIndex[jj] + ); + break; + + default: + break; + } + } + + size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size) + , "%s" + , array ? "]" : "" + ); + + switch (operand.mode) + { + case DxbcOperandMode::Mask: + if (0xf > operand.modeBits + && 0 < operand.modeBits) + { + size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size) + , ".%s%s%s%s" + , 0 == (operand.modeBits & 1) ? "" : "x" + , 0 == (operand.modeBits & 2) ? "" : "y" + , 0 == (operand.modeBits & 4) ? "" : "z" + , 0 == (operand.modeBits & 8) ? "" : "w" + ); + } + break; + + case DxbcOperandMode::Swizzle: + if (0xe4 != operand.modeBits) + { + size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size) + , ".%c%c%c%c" + , "xyzw"[(operand.modeBits )&0x3] + , "xyzw"[(operand.modeBits>>2)&0x3] + , "xyzw"[(operand.modeBits>>4)&0x3] + , "xyzw"[(operand.modeBits>>6)&0x3] + ); + } + break; + + case DxbcOperandMode::Scalar: + size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size) + , ".%c" + , "xyzw"[operand.modeBits] + ); + break; + + default: + break; + } + + } + + return size; + } + + int32_t read(bx::ReaderSeekerI* _reader, DxbcSignature& _signature) + { + int32_t size = 0; + + int64_t offset = bx::seek(_reader); + + uint32_t num; + size += bx::read(_reader, num); + size += bx::read(_reader, _signature.key); + + for (uint32_t ii = 0; ii < num; ++ii) + { + DxbcSignature::Element element; + + uint32_t nameOffset; + size += bx::read(_reader, nameOffset); + + char name[DXBC_MAX_NAME_STRING]; + readString(_reader, offset + nameOffset, name); + element.name = name; + + size += bx::read(_reader, element.semanticIndex); + size += bx::read(_reader, element.valueType); + size += bx::read(_reader, element.componentType); + size += bx::read(_reader, element.registerIndex); + size += bx::read(_reader, element.mask); + size += bx::read(_reader, element.readWriteMask); + size += bx::read(_reader, element.stream); + + // padding + uint8_t padding; + size += bx::read(_reader, padding); + + _signature.elements.push_back(element); + } + + return size; + } + + int32_t write(bx::WriterI* _writer, const DxbcSignature& _signature) + { + int32_t size = 0; + + const uint32_t num = uint32_t(_signature.elements.size() ); + size += bx::write(_writer, num); + size += bx::write(_writer, _signature.key); + + typedef stl::unordered_map NameOffsetMap; + NameOffsetMap nom; + + const uint8_t pad = 0; + uint32_t nameOffset = num * 24 + 8; + for (uint32_t ii = 0; ii < num; ++ii) + { + const DxbcSignature::Element& element = _signature.elements[ii]; + + NameOffsetMap::iterator it = nom.find(element.name); + if (it == nom.end() ) + { + nom.insert(stl::make_pair(element.name, nameOffset) ); + size += bx::write(_writer, nameOffset); + nameOffset += uint32_t(element.name.size() + 1); + } + else + { + size += bx::write(_writer, it->second); + } + + size += bx::write(_writer, element.semanticIndex); + size += bx::write(_writer, element.valueType); + size += bx::write(_writer, element.componentType); + size += bx::write(_writer, element.registerIndex); + size += bx::write(_writer, element.mask); + size += bx::write(_writer, element.readWriteMask); + size += bx::write(_writer, element.stream); + size += bx::write(_writer, pad); + + } + + uint32_t len = 0; + for (uint32_t ii = 0; ii < num; ++ii) + { + const DxbcSignature::Element& element = _signature.elements[ii]; + NameOffsetMap::iterator it = nom.find(element.name); + if (it != nom.end() ) + { + nom.erase(it); + size += bx::write(_writer, element.name.c_str(), uint32_t(element.name.size() + 1) ); + len += uint32_t(element.name.size() + 1); + } + } + + // align 4 bytes + size += bx::writeRep(_writer, 0xab, (len+3)/4*4 - len); + + return size; + } + + int32_t read(bx::ReaderSeekerI* _reader, DxbcShader& _shader) + { + int32_t size = 0; + + size += bx::read(_reader, _shader.version); + + uint32_t bcLength; + size += bx::read(_reader, bcLength); + + uint32_t len = (bcLength-2)*sizeof(uint32_t); + _shader.byteCode.resize(len); + size += bx::read(_reader, _shader.byteCode.data(), len); + + return size; + } + + int32_t write(bx::WriterI* _writer, const DxbcShader& _shader) + { + const uint32_t len = uint32_t(_shader.byteCode.size() ); + const uint32_t bcLength = len / sizeof(uint32_t) + 2; + + int32_t size = 0; + size += bx::write(_writer, _shader.version); + size += bx::write(_writer, bcLength); + size += bx::write(_writer, _shader.byteCode.data(), len); + + return size; + } + +#define DXBC_CHUNK_HEADER BX_MAKEFOURCC('D', 'X', 'B', 'C') +#define DXBC_CHUNK_SHADER BX_MAKEFOURCC('S', 'H', 'D', 'R') +#define DXBC_CHUNK_INPUT_SIGNATURE BX_MAKEFOURCC('I', 'S', 'G', 'N') +#define DXBC_CHUNK_OUTPUT_SIGNATURE BX_MAKEFOURCC('O', 'S', 'G', 'N') + + int32_t read(bx::ReaderSeekerI* _reader, DxbcContext& _dxbc) + { + int32_t size = 0; + size += bx::read(_reader, _dxbc.header); + + for (uint32_t ii = 0; ii < _dxbc.header.numChunks; ++ii) + { + bx::seek(_reader, sizeof(DxbcContext::Header) + ii*sizeof(uint32_t), bx::Whence::Begin); + + uint32_t chunkOffset; + size += bx::read(_reader, chunkOffset); + + bx::seek(_reader, chunkOffset, bx::Whence::Begin); + + uint32_t fourcc; + size += bx::read(_reader, fourcc); + + uint32_t chunkSize; + size += bx::read(_reader, chunkSize); + + switch (fourcc) + { + case DXBC_CHUNK_SHADER: + case BX_MAKEFOURCC('S', 'H', 'E', 'X'): + size += read(_reader, _dxbc.shader); + break; + + case BX_MAKEFOURCC('I', 'S', 'G', '1'): + case DXBC_CHUNK_INPUT_SIGNATURE: + size += read(_reader, _dxbc.inputSignature); + break; + + case BX_MAKEFOURCC('O', 'S', 'G', '1'): + case BX_MAKEFOURCC('O', 'S', 'G', '5'): + case DXBC_CHUNK_OUTPUT_SIGNATURE: + size += read(_reader, _dxbc.outputSignature); + break; + + case BX_MAKEFOURCC('R', 'D', 'E', 'F'): + case BX_MAKEFOURCC('I', 'F', 'C', 'E'): + case BX_MAKEFOURCC('P', 'C', 'S', 'G'): + case BX_MAKEFOURCC('S', 'T', 'A', 'T'): + case BX_MAKEFOURCC('S', 'F', 'I', '0'): + case BX_MAKEFOURCC('P', 'S', 'O', '1'): + case BX_MAKEFOURCC('P', 'S', 'O', '2'): + size += chunkSize; + break; + + default: + size += chunkSize; + BX_CHECK(false, "UNKNOWN FOURCC %c%c%c%c %d" + , ( (char*)&fourcc)[0] + , ( (char*)&fourcc)[1] + , ( (char*)&fourcc)[2] + , ( (char*)&fourcc)[3] + , size + ); + break; + } + } + + return size; + } + + int32_t write(bx::WriterSeekerI* _writer, const DxbcContext& _dxbc) + { + int32_t size = 0; + + int64_t dxbcOffset = bx::seek(_writer); + size += bx::write(_writer, DXBC_CHUNK_HEADER); + + size += bx::writeRep(_writer, 0, 16); + + size += bx::write(_writer, UINT32_C(1) ); + + int64_t sizeOffset = bx::seek(_writer); + size += bx::writeRep(_writer, 0, 4); + + uint32_t numChunks = 3; + size += bx::write(_writer, numChunks); + + int64_t chunksOffsets = bx::seek(_writer); + size += bx::writeRep(_writer, 0, numChunks*sizeof(uint32_t) ); + + uint32_t chunkOffset[3]; + uint32_t chunkSize[3]; + + chunkOffset[0] = uint32_t(bx::seek(_writer) - dxbcOffset); + size += write(_writer, DXBC_CHUNK_INPUT_SIGNATURE); + size += write(_writer, UINT32_C(0) ); + chunkSize[0] = write(_writer, _dxbc.inputSignature); + + chunkOffset[1] = uint32_t(bx::seek(_writer) - dxbcOffset); + size += write(_writer, DXBC_CHUNK_OUTPUT_SIGNATURE); + size += write(_writer, UINT32_C(0) ); + chunkSize[1] = write(_writer, _dxbc.outputSignature); + + chunkOffset[2] = uint32_t(bx::seek(_writer) - dxbcOffset); + size += write(_writer, DXBC_CHUNK_SHADER); + size += write(_writer, UINT32_C(0) ); + chunkSize[2] = write(_writer, _dxbc.shader); + + size += 0 + + chunkSize[0] + + chunkSize[1] + + chunkSize[2] + ; + + int64_t eof = bx::seek(_writer); + + bx::seek(_writer, sizeOffset, bx::Whence::Begin); + bx::write(_writer, size); + + bx::seek(_writer, chunksOffsets, bx::Whence::Begin); + bx::write(_writer, chunkOffset, sizeof(chunkOffset) ); + + for (uint32_t ii = 0; ii < BX_COUNTOF(chunkOffset); ++ii) + { + bx::seek(_writer, chunkOffset[ii]+4, bx::Whence::Begin); + bx::write(_writer, chunkSize[ii]); + } + + bx::seek(_writer, eof, bx::Whence::Begin); + + return size; + } + + void parse(const DxbcShader& _src, DxbcParseFn _fn, void* _userData) + { + bx::MemoryReader reader(_src.byteCode.data(), uint32_t(_src.byteCode.size() ) ); + + for (uint32_t token = 0, numTokens = uint32_t(_src.byteCode.size() / sizeof(uint32_t) ); token < numTokens;) + { + DxbcInstruction instruction; + uint32_t size = read(&reader, instruction); + + BX_CHECK(size/4 == instruction.length, "read %d, expected %d", size/4, instruction.length); + + _fn(token * sizeof(uint32_t), instruction, _userData); + + token += instruction.length; + } + } + + void filter(DxbcShader& _dst, const DxbcShader& _src, DxbcFilterFn _fn, void* _userData) + { + bx::MemoryReader reader(_src.byteCode.data(), uint32_t(_src.byteCode.size() ) ); + + bx::CrtAllocator r; + bx::MemoryBlock mb(&r); + bx::MemoryWriter writer(&mb); + + for (uint32_t token = 0, numTokens = uint32_t(_src.byteCode.size() / sizeof(uint32_t) ); token < numTokens;) + { + DxbcInstruction instruction; + uint32_t size = read(&reader, instruction); + BX_CHECK(size/4 == instruction.length, "read %d, expected %d", size/4, instruction.length); + + _fn(instruction, _userData); + + write(&writer, instruction); + + token += instruction.length; + } + + uint8_t* data = (uint8_t*)mb.more(); + uint32_t size = uint32_t(bx::getSize(&writer) ); + _dst.byteCode.reserve(size); + memcpy(_dst.byteCode.data(), data, size); + } + +} // namespace bgfx diff --git a/src/shader_dxbc.h b/src/shader_dxbc.h new file mode 100644 index 00000000..96625c99 --- /dev/null +++ b/src/shader_dxbc.h @@ -0,0 +1,608 @@ +/* + * Copyright 2011-2015 Branimir Karadzic. All rights reserved. + * License: http://www.opensource.org/licenses/BSD-2-Clause + */ + +#ifndef BGFX_SHADER_DXBC_H +#define BGFX_SHADER_DXBC_H + +#include + +namespace bgfx +{ + struct DxbcOpcode + { + enum Enum + { + ADD, + AND, + BREAK, + BREAKC, + CALL, + CALLC, + CASE, + CONTINUE, + CONTINUEC, + CUT, + DEFAULT, + DERIV_RTX, + DERIV_RTY, + DISCARD, + DIV, + DP2, + DP3, + DP4, + ELSE, + EMIT, + EMITTHENCUT, + ENDIF, + ENDLOOP, + ENDSWITCH, + EQ, + EXP, + FRC, + FTOI, + FTOU, + GE, + IADD, + IF, + IEQ, + IGE, + ILT, + IMAD, + IMAX, + IMIN, + IMUL, + INE, + INEG, + ISHL, + ISHR, + ITOF, + LABEL, + LD, + LD_MS, + LOG, + LOOP, + LT, + MAD, + MIN, + MAX, + CUSTOMDATA, + MOV, + MOVC, + MUL, + NE, + NOP, + NOT, + OR, + RESINFO, + RET, + RETC, + ROUND_NE, + ROUND_NI, + ROUND_PI, + ROUND_Z, + RSQ, + SAMPLE, + SAMPLE_C, + SAMPLE_C_LZ, + SAMPLE_L, + SAMPLE_D, + SAMPLE_B, + SQRT, + SWITCH, + SINCOS, + UDIV, + ULT, + UGE, + UMUL, + UMAD, + UMAX, + UMIN, + USHR, + UTOF, + XOR, + DCL_RESOURCE, + DCL_CONSTANT_BUFFER, + DCL_SAMPLER, + DCL_INDEX_RANGE, + DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY, + DCL_GS_INPUT_PRIMITIVE, + DCL_MAX_OUTPUT_VERTEX_COUNT, + DCL_INPUT, + DCL_INPUT_SGV, + DCL_INPUT_SIV, + DCL_INPUT_PS, + DCL_INPUT_PS_SGV, + DCL_INPUT_PS_SIV, + DCL_OUTPUT, + DCL_OUTPUT_SGV, + DCL_OUTPUT_SIV, + DCL_TEMPS, + DCL_INDEXABLE_TEMP, + DCL_GLOBAL_FLAGS, + + UnknownD3D10, + LOD, + GATHER4, + SAMPLE_POS, + SAMPLE_INFO, + + UnknownD3D10_1, + HS_DECLS, + HS_CONTROL_POINT_PHASE, + HS_FORK_PHASE, + HS_JOIN_PHASE, + EMIT_STREAM, + CUT_STREAM, + EMITTHENCUT_STREAM, + INTERFACE_CALL, + BUFINFO, + DERIV_RTX_COARSE, + DERIV_RTX_FINE, + DERIV_RTY_COARSE, + DERIV_RTY_FINE, + GATHER4_C, + GATHER4_PO, + GATHER4_PO_C, + RCP, + F32TOF16, + F16TOF32, + UADDC, + USUBB, + COUNTBITS, + FIRSTBIT_HI, + FIRSTBIT_LO, + FIRSTBIT_SHI, + UBFE, + IBFE, + BFI, + BFREV, + SWAPC, + DCL_STREAM, + DCL_FUNCTION_BODY, + DCL_FUNCTION_TABLE, + DCL_INTERFACE, + DCL_INPUT_CONTROL_POINT_COUNT, + DCL_OUTPUT_CONTROL_POINT_COUNT, + DCL_TESS_DOMAIN, + DCL_TESS_PARTITIONING, + DCL_TESS_OUTPUT_PRIMITIVE, + DCL_HS_MAX_TESSFACTOR, + DCL_HS_FORK_PHASE_INSTANCE_COUNT, + DCL_HS_JOIN_PHASE_INSTANCE_COUNT, + DCL_THREAD_GROUP, + DCL_UNORDERED_ACCESS_VIEW_TYPED, + DCL_UNORDERED_ACCESS_VIEW_RAW, + DCL_UNORDERED_ACCESS_VIEW_STRUCTURED, + DCL_THREAD_GROUP_SHARED_MEMORY_RAW, + DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED, + DCL_RESOURCE_RAW, + DCL_RESOURCE_STRUCTURED, + LD_UAV_TYPED, + STORE_UAV_TYPED, + LD_RAW, + STORE_RAW, + LD_STRUCTURED, + STORE_STRUCTURED, + ATOMIC_AND, + ATOMIC_OR, + ATOMIC_XOR, + ATOMIC_CMP_STORE, + ATOMIC_IADD, + ATOMIC_IMAX, + ATOMIC_IMIN, + ATOMIC_UMAX, + ATOMIC_UMIN, + IMM_ATOMIC_ALLOC, + IMM_ATOMIC_CONSUME, + IMM_ATOMIC_IADD, + IMM_ATOMIC_AND, + IMM_ATOMIC_OR, + IMM_ATOMIC_XOR, + IMM_ATOMIC_EXCH, + IMM_ATOMIC_CMP_EXCH, + IMM_ATOMIC_IMAX, + IMM_ATOMIC_IMIN, + IMM_ATOMIC_UMAX, + IMM_ATOMIC_UMIN, + SYNC, + DADD, + DMAX, + DMIN, + DMUL, + DEQ, + DGE, + DLT, + DNE, + DMOV, + DMOVC, + DTOF, + FTOD, + EVAL_SNAPPED, + EVAL_SAMPLE_INDEX, + EVAL_CENTROID, + DCL_GS_INSTANCE_COUNT, + ABORT, + DEBUG_BREAK, + + UnknownD3D11, + DDIV, + DFMA, + DRCP, + MSAD, + DTOI, + DTOU, + ITOD, + UTOD, + + Count + }; + }; + + const char* getName(DxbcOpcode::Enum _opcode); + + struct DxbcBuiltin + { + // D3D_NAME + // https://msdn.microsoft.com/en-us/library/windows/desktop/ff728724%28v=vs.85%29.aspx + // mesa/src/gallium/state_trackers/d3d1x/d3d1xshader/defs/svs.txt + enum Enum + { + Undefined, + Position, + ClipDistance, + CullDistance, + RenderTargetArrayIndex, + ViewportArrayIndex, + VertexId, + PrimitiveId, + InstanceId, + IsFrontFace, + SampleIndex, + FinalQuadUEq0EdgeTessFactor, + FinalQuadVEq0EdgeTessFactor, + FinalQuadUEq1EdgeTessFactor, + FinalQuadVEq1EdgeTessFactor, + FinalQuadUInsideTessFactor, + FinalQuadVInsideTessFactor, + FinalTriUEq0EdgeTessFactor, + FinalTriVEq0EdgeTessFactor, + FinalTriWEq0EdgeTessFactor, + FinalTriInsideTessFactor, + FinalLineDetailTessFactor, + FinalLineDensityTessFactor, + Target = 64, + Depth, + Coverage, + DepthGreaterEqual, + DepthLessEqual, + StencilRef, + InnerCoverage, + }; + }; + + struct DxbcResourceDim + { + // D3D_SRV_DIMENSION + // https://msdn.microsoft.com/en-us/library/windows/desktop/ff728736%28v=vs.85%29.aspx + // mesa/src/gallium/state_trackers/d3d1x/d3d1xshader/defs/targets.txt + enum Enum + { + Unknown, + Buffer, + Texture1D, + Texture2D, + Texture2DMS, + Texture3D, + TextureCube, + Texture1DArray, + Texture2DArray, + Texture2DMSArray, + TextureCubearray, + RawBuffer, + StructuredBuffer, + + Count + }; + }; + + struct DxbcInterpolation + { + enum Enum + { + Unknown, + Constant, + Linear, + LinearCentroid, + LinearNoPerspective, + LinearNoPerspectiveCentroid, + LinearSample, + LinearNoPerspectiveSample, + + Count + }; + }; + + struct DxbcResourceReturnType + { + enum Enum + { + Unorm, + Snorm, + Sint, + Uint, + Float, + Mixed, + Double, + Continued, + Unused, + + Count + }; + }; + + struct DxbcComponentType + { + enum Enum + { + Unknown, + Uint32, + Int32, + Float, + + Count + }; + }; + + struct DxbcPrecision + { + enum Enum + { + Default, + Half, + Float2_8, + Reserved, + Int16, + Uint16, + Any16 = 0xf0, + Any10 = 0xf1, + }; + }; + + struct DxbcOperandType + { + enum Enum + { + Temp, + Input, + Output, + TempArray, + Imm32, + Imm64, + Sampler, + Resource, + ConstantBuffer, + ImmConstantBuffer, + Label, + PrimitiveID, + OutputDepth, + Null, + Rasterizer, + CoverageMask, + Stream, + FunctionBody, + FunctionTable, + Interface, + FunctionInput, + FunctionOutput, + OutputControlPointId, + InputForkInstanceId, + InputJoinInstanceId, + InputControlPoint, + OutputControlPoint, + InputPatchConstant, + InputDomainPoint, + ThisPointer, + UnorderedAccessView, + ThreadGroupSharedMemory, + InputThreadId, + InputThreadGroupId, + InputThreadIdInGroup, + InputCoverageMask, + InputThreadIdInGroupFlattened, + InputGsInstanceId, + OutputDepthGreaterEqual, + OutputDepthLessEqual, + CycleCounter, + + Count + }; + }; + + struct DxbcOperandAddrMode + { + enum Enum + { + Imm32, + Imm64, + Reg, + RegImm32, + RegImm64, + + Count + }; + }; + + struct DxbcOperandMode + { + enum Enum + { + Mask, + Swizzle, + Scalar, + + Count + }; + }; + + struct DxbcSubOperand + { + DxbcOperandType::Enum type; + uint8_t mode; + uint8_t modeBits; + uint8_t num; + uint8_t numAddrModes; + uint8_t addrMode; + uint32_t regIndex; + }; + + struct DxbcOperand + { + DxbcOperandType::Enum type; + DxbcOperandMode::Enum mode; + uint8_t modeBits; + uint8_t num; + bool extended; + uint32_t extBits; + + uint8_t numAddrModes; + uint8_t addrMode[3]; + uint32_t regIndex[3]; + DxbcSubOperand subOperand[3]; + + union + { + uint32_t imm32[4]; + uint64_t imm64[4]; + } un; + }; + + struct DxbcInstruction + { + struct ExtendedType + { + enum Enum + { + Empty, + SampleControls, + ResourceDim, + ResourceReturnType, + + Count + }; + }; + + DxbcOpcode::Enum opcode; + uint32_t value[3]; + uint32_t length; + uint8_t numOperands; + ExtendedType::Enum extended[3]; + + // + DxbcResourceDim::Enum srv; + uint8_t samples; + + // + DxbcInterpolation::Enum interpolation; + + // + bool shadow; + bool mono; + + // + bool allowRefactoring; + bool fp64; + bool earlyDepth; + bool enableBuffers; + bool skipOptimization; + bool enableMinPrecision; + bool enableDoubleExtensions; + bool enableShaderExtensions; + + // + bool threadsInGroup; + bool sharedMemory; + bool uavGroup; + bool uavGlobal; + + // + DxbcResourceReturnType::Enum retType; + bool saturate; + uint8_t testNZ; + + // + uint8_t sampleOffsets[3]; + uint8_t resourceTarget; + uint8_t resourceStride; + DxbcResourceReturnType::Enum resourceReturnTypes[4]; + + DxbcOperand operand[6]; + }; + + int32_t read(bx::ReaderI* _reader, DxbcInstruction& _instruction); + int32_t write(bx::WriterI* _writer, const DxbcInstruction& _instruction); + int32_t toString(char* _out, int32_t _size, const DxbcInstruction& _instruction); + + struct DxbcSignature + { + struct Element + { + stl::string name; + uint32_t semanticIndex; + DxbcBuiltin::Enum valueType; + DxbcComponentType::Enum componentType; + uint32_t registerIndex; + uint8_t mask; + uint8_t readWriteMask; + uint8_t stream; + }; + + uint32_t key; + stl::vector elements; + }; + + int32_t read(bx::ReaderSeekerI* _reader, DxbcSignature& _signature); + int32_t write(bx::WriterI* _writer, const DxbcSignature& _signature); + + struct DxbcShader + { + uint32_t version; + stl::vector byteCode; + }; + + int32_t read(bx::ReaderSeekerI* _reader, DxbcShader& _shader); + int32_t write(bx::WriterI* _writer, const DxbcShader& _shader); + + typedef void (*DxbcParseFn)(uint32_t _offset, const DxbcInstruction& _instruction, void* _userData); + void parse(const DxbcShader& _src, DxbcParseFn _fn, void* _userData); + + typedef void (*DxbcFilterFn)(DxbcInstruction& _instruction, void* _userData); + void filter(DxbcShader& _dst, const DxbcShader& _src, DxbcFilterFn _fn, void* _userData); + + struct DxbcContext + { + struct Header + { + uint32_t magic; + uint8_t hash[16]; + uint32_t version; + uint32_t size; + uint32_t numChunks; + }; + + Header header; + DxbcSignature inputSignature; + DxbcSignature outputSignature; + DxbcShader shader; + }; + + int32_t read(bx::ReaderSeekerI* _reader, DxbcContext& _dxbc); + int32_t write(bx::WriterSeekerI* _writer, const DxbcContext& _dxbc); + + /// Calculate DXBC hash from data. + void dxbcHash(const void* _data, uint32_t _size, void* _digest); + +} // namespace bgfx + +#endif // BGFX_SHADER_DXBC_H diff --git a/src/shader_spirv.cpp b/src/shader_spirv.cpp new file mode 100644 index 00000000..a7b15957 --- /dev/null +++ b/src/shader_spirv.cpp @@ -0,0 +1,732 @@ +/* + * Copyright 2011-2015 Branimir Karadzic. All rights reserved. + * License: http://www.opensource.org/licenses/BSD-2-Clause + */ + +#include "bgfx_p.h" +#include "shader_spirv.h" + +namespace bgfx +{ + struct SpirvOpcodeInfo + { + uint8_t numOperands; + uint8_t numValues; + bool hasVariable; + }; + + static const SpirvOpcodeInfo s_sprivOpcodeInfo[] = + { + { 0, 0, false }, // Nop, + { 0, 0, true }, // Source + { 0, 0, true }, // SourceExtension + { 0, 0, false }, // Extension + { 0, 1, true }, // ExtInstImport + { 0, 2, false }, // MemoryModel + { 0, 2, false }, // EntryPoint + { 0, 0, false }, // ExecutionMode + { 0, 1, false }, // TypeVoid + { 0, 1, false }, // TypeBool + { 0, 3, false }, // TypeInt + { 0, 2, false }, // TypeFloat + { 0, 3, false }, // TypeVector + { 0, 3, false }, // TypeMatrix + { 1, 7, false }, // TypeSampler + { 0, 0, false }, // TypeFilter + { 0, 0, false }, // TypeArray + { 0, 0, false }, // TypeRuntimeArray + { 0, 0, false }, // TypeStruct + { 0, 0, false }, // TypeOpaque + { 0, 3, false }, // TypePointer + { 0, 2, true }, // TypeFunction + { 0, 0, false }, // TypeEvent + { 0, 0, false }, // TypeDeviceEvent + { 0, 0, false }, // TypeReserveId + { 0, 0, false }, // TypeQueue + { 0, 0, false }, // TypePipe + { 0, 0, false }, // ConstantTrue + { 0, 0, false }, // ConstantFalse + { 0, 2, true }, // Constant + { 0, 2, true }, // ConstantComposite + { 0, 0, false }, // ConstantSampler + { 0, 0, false }, // ConstantNullPointer + { 0, 0, false }, // ConstantNullObject + { 0, 0, false }, // SpecConstantTrue + { 0, 0, false }, // SpecConstantFalse + { 0, 0, false }, // SpecConstant + { 0, 0, false }, // SpecConstantComposite + { 0, 3, true }, // Variable + { 0, 0, false }, // VariableArray + { 0, 4, false }, // Function + { 0, 0, false }, // FunctionParameter + { 0, 0, false }, // FunctionEnd + { 0, 0, false }, // FunctionCall + { 0, 0, false }, // ExtInst + { 0, 0, false }, // Undef + { 0, 0, false }, // Load + { 0, 2, true }, // Store + { 0, 0, false }, // Phi + { 0, 0, false }, // DecorationGroup + { 0, 2, true }, // Decorate + { 0, 0, false }, // MemberDecorate + { 0, 0, false }, // GroupDecorate + { 0, 0, false }, // GroupMemberDecorate + { 0, 1, true }, // Name + { 0, 1, true }, // MemberName + { 0, 0, false }, // String + { 0, 0, false }, // Line + { 0, 0, false }, // VectorExtractDynamic + { 0, 0, false }, // VectorInsertDynamic + { 0, 0, false }, // VectorShuffle + { 0, 0, false }, // CompositeConstruct + { 0, 0, false }, // CompositeExtract + { 0, 0, false }, // CompositeInsert + { 0, 0, false }, // CopyObject + { 0, 0, false }, // CopyMemory + { 0, 0, false }, // CopyMemorySized + { 0, 0, false }, // Sampler + { 0, 0, false }, // TextureSample + { 0, 0, false }, // TextureSampleDref + { 0, 0, false }, // TextureSampleLod + { 0, 0, false }, // TextureSampleProj + { 0, 0, false }, // TextureSampleGrad + { 0, 0, false }, // TextureSampleOffset + { 0, 0, false }, // TextureSampleProjLod + { 0, 0, false }, // TextureSampleProjGrad + { 0, 0, false }, // TextureSampleLodOffset + { 0, 0, false }, // TextureSampleProjOffset + { 0, 0, false }, // TextureSampleGradOffset + { 0, 0, false }, // TextureSampleProjLodOffset + { 0, 0, false }, // TextureSampleProjGradOffset + { 0, 0, false }, // TextureFetchTexelLod + { 0, 0, false }, // TextureFetchTexelOffset + { 0, 0, false }, // TextureFetchSample + { 0, 0, false }, // TextureFetchTexel + { 0, 0, false }, // TextureGather + { 0, 0, false }, // TextureGatherOffset + { 0, 0, false }, // TextureGatherOffsets + { 0, 0, false }, // TextureQuerySizeLod + { 0, 0, false }, // TextureQuerySize + { 0, 0, false }, // TextureQueryLod + { 0, 0, false }, // TextureQueryLevels + { 0, 0, false }, // TextureQuerySamples + { 0, 0, false }, // AccessChain + { 0, 0, false }, // InBoundsAccessChain + { 0, 0, false }, // SNegate + { 0, 0, false }, // FNegate + { 0, 0, false }, // Not + { 0, 0, false }, // Any + { 0, 0, false }, // All + { 0, 0, false }, // ConvertFToU + { 0, 0, false }, // ConvertFToS + { 0, 0, false }, // ConvertSToF + { 0, 0, false }, // ConvertUToF + { 0, 0, false }, // UConvert + { 0, 0, false }, // SConvert + { 0, 0, false }, // FConvert + { 0, 0, false }, // ConvertPtrToU + { 0, 0, false }, // ConvertUToPtr + { 0, 0, false }, // PtrCastToGeneric + { 0, 0, false }, // GenericCastToPtr + { 0, 0, false }, // Bitcast + { 0, 0, false }, // Transpose + { 0, 0, false }, // IsNan + { 0, 0, false }, // IsInf + { 0, 0, false }, // IsFinite + { 0, 0, false }, // IsNormal + { 0, 0, false }, // SignBitSet + { 0, 0, false }, // LessOrGreater + { 0, 0, false }, // Ordered + { 0, 0, false }, // Unordered + { 0, 0, false }, // ArrayLength + { 0, 0, false }, // IAdd + { 0, 0, false }, // FAdd + { 0, 0, false }, // ISub + { 0, 0, false }, // FSub + { 0, 0, false }, // IMul + { 0, 0, false }, // FMul + { 0, 0, false }, // UDiv + { 0, 0, false }, // SDiv + { 0, 0, false }, // FDiv + { 0, 0, false }, // UMod + { 0, 0, false }, // SRem + { 0, 0, false }, // SMod + { 0, 0, false }, // FRem + { 0, 0, false }, // FMod + { 0, 0, false }, // VectorTimesScalar + { 0, 0, false }, // MatrixTimesScalar + { 0, 0, false }, // VectorTimesMatrix + { 0, 0, false }, // MatrixTimesVector + { 0, 0, false }, // MatrixTimesMatrix + { 0, 0, false }, // OuterProduct + { 0, 0, false }, // Dot + { 0, 0, false }, // ShiftRightLogical + { 0, 0, false }, // ShiftRightArithmetic + { 0, 0, false }, // ShiftLeftLogical + { 0, 0, false }, // LogicalOr + { 0, 0, false }, // LogicalXor + { 0, 0, false }, // LogicalAnd + { 0, 0, false }, // BitwiseOr + { 0, 0, false }, // BitwiseXor + { 0, 0, false }, // BitwiseAnd + { 0, 0, false }, // Select + { 0, 0, false }, // IEqual + { 0, 0, false }, // FOrdEqual + { 0, 0, false }, // FUnordEqual + { 0, 0, false }, // INotEqual + { 0, 0, false }, // FOrdNotEqual + { 0, 0, false }, // FUnordNotEqual + { 0, 0, false }, // ULessThan + { 0, 0, false }, // SLessThan + { 0, 0, false }, // FOrdLessThan + { 0, 0, false }, // FUnordLessThan + { 0, 0, false }, // UGreaterThan + { 0, 0, false }, // SGreaterThan + { 0, 0, false }, // FOrdGreaterThan + { 0, 0, false }, // FUnordGreaterThan + { 0, 0, false }, // ULessThanEqual + { 0, 0, false }, // SLessThanEqual + { 0, 0, false }, // FOrdLessThanEqual + { 0, 0, false }, // FUnordLessThanEqual + { 0, 0, false }, // UGreaterThanEqual + { 0, 0, false }, // SGreaterThanEqual + { 0, 0, false }, // FOrdGreaterThanEqual + { 0, 0, false }, // FUnordGreaterThanEqual + { 0, 0, false }, // DPdx + { 0, 0, false }, // DPdy + { 0, 0, false }, // Fwidth + { 0, 0, false }, // DPdxFine + { 0, 0, false }, // DPdyFine + { 0, 0, false }, // FwidthFine + { 0, 0, false }, // DPdxCoarse + { 0, 0, false }, // DPdyCoarse + { 0, 0, false }, // FwidthCoarse + { 0, 0, false }, // EmitVertex + { 0, 0, false }, // EndPrimitive + { 0, 0, false }, // EmitStreamVertex + { 0, 0, false }, // EndStreamPrimitive + { 0, 0, false }, // ControlBarrier + { 0, 0, false }, // MemoryBarrier + { 0, 0, false }, // ImagePointer + { 0, 0, false }, // AtomicInit + { 0, 0, false }, // AtomicLoad + { 0, 0, false }, // AtomicStore + { 0, 0, false }, // AtomicExchange + { 0, 0, false }, // AtomicCompareExchange + { 0, 0, false }, // AtomicCompareExchangeWeak + { 0, 0, false }, // AtomicIIncrement + { 0, 0, false }, // AtomicIDecrement + { 0, 0, false }, // AtomicIAdd + { 0, 0, false }, // AtomicISub + { 0, 0, false }, // AtomicUMin + { 0, 0, false }, // AtomicUMax + { 0, 0, false }, // AtomicAnd + { 0, 0, false }, // AtomicOr + { 0, 0, false }, // AtomicXor + { 0, 0, false }, // LoopMerge + { 0, 0, false }, // SelectionMerge + { 0, 1, false }, // Label + { 0, 1, false }, // Branch + { 0, 0, false }, // BranchConditional + { 0, 0, false }, // Switch + { 0, 0, false }, // Kill + { 0, 0, false }, // Return + { 0, 0, false }, // ReturnValue + { 0, 0, false }, // Unreachable + { 0, 0, false }, // LifetimeStart + { 0, 0, false }, // LifetimeStop + { 0, 0, false }, // CompileFlag + { 0, 0, false }, // AsyncGroupCopy + { 0, 0, false }, // WaitGroupEvents + { 0, 0, false }, // GroupAll + { 0, 0, false }, // GroupAny + { 0, 0, false }, // GroupBroadcast + { 0, 0, false }, // GroupIAdd + { 0, 0, false }, // GroupFAdd + { 0, 0, false }, // GroupFMin + { 0, 0, false }, // GroupUMin + { 0, 0, false }, // GroupSMin + { 0, 0, false }, // GroupFMax + { 0, 0, false }, // GroupUMax + { 0, 0, false }, // GroupSMax + { 0, 0, false }, // GenericCastToPtrExplicit + { 0, 0, false }, // GenericPtrMemSemantics + { 0, 0, false }, // ReadPipe + { 0, 0, false }, // WritePipe + { 0, 0, false }, // ReservedReadPipe + { 0, 0, false }, // ReservedWritePipe + { 0, 0, false }, // ReserveReadPipePackets + { 0, 0, false }, // ReserveWritePipePackets + { 0, 0, false }, // CommitReadPipe + { 0, 0, false }, // CommitWritePipe + { 0, 0, false }, // IsValidReserveId + { 0, 0, false }, // GetNumPipePackets + { 0, 0, false }, // GetMaxPipePackets + { 0, 0, false }, // GroupReserveReadPipePackets + { 0, 0, false }, // GroupReserveWritePipePackets + { 0, 0, false }, // GroupCommitReadPipe + { 0, 0, false }, // GroupCommitWritePipe + { 0, 0, false }, // EnqueueMarker + { 0, 0, false }, // EnqueueKernel + { 0, 0, false }, // GetKernelNDrangeSubGroupCount + { 0, 0, false }, // GetKernelNDrangeMaxSubGroupSize + { 0, 0, false }, // GetKernelWorkGroupSize + { 0, 0, false }, // GetKernelPreferredWorkGroupSizeMultiple + { 0, 0, false }, // RetainEvent + { 0, 0, false }, // ReleaseEvent + { 0, 0, false }, // CreateUserEvent + { 0, 0, false }, // IsValidEvent + { 0, 0, false }, // SetUserEventStatus + { 0, 0, false }, // CaptureEventProfilingInfo + { 0, 0, false }, // GetDefaultQueue + { 0, 0, false }, // BuildNDRange + { 0, 0, false }, // SatConvertSToU + { 0, 0, false }, // SatConvertUToS + { 0, 0, false }, // AtomicIMin + { 0, 0, false }, // AtomicIMax + }; + BX_STATIC_ASSERT(BX_COUNTOF(s_sprivOpcodeInfo) == SpirvOpcode::Count); + + const char* s_spirvOpcode[] = + { + "Nop", + "Source", + "SourceExtension", + "Extension", + "ExtInstImport", + "MemoryModel", + "EntryPoint", + "ExecutionMode", + "TypeVoid", + "TypeBool", + "TypeInt", + "TypeFloat", + "TypeVector", + "TypeMatrix", + "TypeSampler", + "TypeFilter", + "TypeArray", + "TypeRuntimeArray", + "TypeStruct", + "TypeOpaque", + "TypePointer", + "TypeFunction", + "TypeEvent", + "TypeDeviceEvent", + "TypeReserveId", + "TypeQueue", + "TypePipe", + "ConstantTrue", + "ConstantFalse", + "Constant", + "ConstantComposite", + "ConstantSampler", + "ConstantNullPointer", + "ConstantNullObject", + "SpecConstantTrue", + "SpecConstantFalse", + "SpecConstant", + "SpecConstantComposite", + "Variable", + "VariableArray", + "Function", + "FunctionParameter", + "FunctionEnd", + "FunctionCall", + "ExtInst", + "Undef", + "Load", + "Store", + "Phi", + "DecorationGroup", + "Decorate", + "MemberDecorate", + "GroupDecorate", + "GroupMemberDecorate", + "Name", + "MemberName", + "String", + "Line", + "VectorExtractDynamic", + "VectorInsertDynamic", + "VectorShuffle", + "CompositeConstruct", + "CompositeExtract", + "CompositeInsert", + "CopyObject", + "CopyMemory", + "CopyMemorySized", + "Sampler", + "TextureSample", + "TextureSampleDref", + "TextureSampleLod", + "TextureSampleProj", + "TextureSampleGrad", + "TextureSampleOffset", + "TextureSampleProjLod", + "TextureSampleProjGrad", + "TextureSampleLodOffset", + "TextureSampleProjOffset", + "TextureSampleGradOffset", + "TextureSampleProjLodOffset", + "TextureSampleProjGradOffset", + "TextureFetchTexelLod", + "TextureFetchTexelOffset", + "TextureFetchSample", + "TextureFetchTexel", + "TextureGather", + "TextureGatherOffset", + "TextureGatherOffsets", + "TextureQuerySizeLod", + "TextureQuerySize", + "TextureQueryLod", + "TextureQueryLevels", + "TextureQuerySamples", + "AccessChain", + "InBoundsAccessChain", + "SNegate", + "FNegate", + "Not", + "Any", + "All", + "ConvertFToU", + "ConvertFToS", + "ConvertSToF", + "ConvertUToF", + "UConvert", + "SConvert", + "FConvert", + "ConvertPtrToU", + "ConvertUToPtr", + "PtrCastToGeneric", + "GenericCastToPtr", + "Bitcast", + "Transpose", + "IsNan", + "IsInf", + "IsFinite", + "IsNormal", + "SignBitSet", + "LessOrGreater", + "Ordered", + "Unordered", + "ArrayLength", + "IAdd", + "FAdd", + "ISub", + "FSub", + "IMul", + "FMul", + "UDiv", + "SDiv", + "FDiv", + "UMod", + "SRem", + "SMod", + "FRem", + "FMod", + "VectorTimesScalar", + "MatrixTimesScalar", + "VectorTimesMatrix", + "MatrixTimesVector", + "MatrixTimesMatrix", + "OuterProduct", + "Dot", + "ShiftRightLogical", + "ShiftRightArithmetic", + "ShiftLeftLogical", + "LogicalOr", + "LogicalXor", + "LogicalAnd", + "BitwiseOr", + "BitwiseXor", + "BitwiseAnd", + "Select", + "IEqual", + "FOrdEqual", + "FUnordEqual", + "INotEqual", + "FOrdNotEqual", + "FUnordNotEqual", + "ULessThan", + "SLessThan", + "FOrdLessThan", + "FUnordLessThan", + "UGreaterThan", + "SGreaterThan", + "FOrdGreaterThan", + "FUnordGreaterThan", + "ULessThanEqual", + "SLessThanEqual", + "FOrdLessThanEqual", + "FUnordLessThanEqual", + "UGreaterThanEqual", + "SGreaterThanEqual", + "FOrdGreaterThanEqual", + "FUnordGreaterThanEqual", + "DPdx", + "DPdy", + "Fwidth", + "DPdxFine", + "DPdyFine", + "FwidthFine", + "DPdxCoarse", + "DPdyCoarse", + "FwidthCoarse", + "EmitVertex", + "EndPrimitive", + "EmitStreamVertex", + "EndStreamPrimitive", + "ControlBarrier", + "MemoryBarrier", + "ImagePointer", + "AtomicInit", + "AtomicLoad", + "AtomicStore", + "AtomicExchange", + "AtomicCompareExchange", + "AtomicCompareExchangeWeak", + "AtomicIIncrement", + "AtomicIDecrement", + "AtomicIAdd", + "AtomicISub", + "AtomicUMin", + "AtomicUMax", + "AtomicAnd", + "AtomicOr", + "AtomicXor", + "LoopMerge", + "SelectionMerge", + "Label", + "Branch", + "BranchConditional", + "Switch", + "Kill", + "Return", + "ReturnValue", + "Unreachable", + "LifetimeStart", + "LifetimeStop", + "CompileFlag", + "AsyncGroupCopy", + "WaitGroupEvents", + "GroupAll", + "GroupAny", + "GroupBroadcast", + "GroupIAdd", + "GroupFAdd", + "GroupFMin", + "GroupUMin", + "GroupSMin", + "GroupFMax", + "GroupUMax", + "GroupSMax", + "GenericCastToPtrExplicit", + "GenericPtrMemSemantics", + "ReadPipe", + "WritePipe", + "ReservedReadPipe", + "ReservedWritePipe", + "ReserveReadPipePackets", + "ReserveWritePipePackets", + "CommitReadPipe", + "CommitWritePipe", + "IsValidReserveId", + "GetNumPipePackets", + "GetMaxPipePackets", + "GroupReserveReadPipePackets", + "GroupReserveWritePipePackets", + "GroupCommitReadPipe", + "GroupCommitWritePipe", + "EnqueueMarker", + "EnqueueKernel", + "GetKernelNDrangeSubGroupCount", + "GetKernelNDrangeMaxSubGroupSize", + "GetKernelWorkGroupSize", + "GetKernelPreferredWorkGroupSizeMultiple", + "RetainEvent", + "ReleaseEvent", + "CreateUserEvent", + "IsValidEvent", + "SetUserEventStatus", + "CaptureEventProfilingInfo", + "GetDefaultQueue", + "BuildNDRange", + "SatConvertSToU", + "SatConvertUToS", + "AtomicIMin", + "AtomicIMax", + }; + BX_STATIC_ASSERT(BX_COUNTOF(s_spirvOpcode) == SpirvOpcode::Count); + + const char* getName(SpirvOpcode::Enum _opcode) + { + BX_CHECK(_opcode < SpirvOpcode::Count, "Unknown opcode id %d.", _opcode); + return s_spirvOpcode[_opcode]; + } + + int32_t read(bx::ReaderI* _reader, SpirvOperand& _operand) + { + int32_t size = 0; + + BX_UNUSED(_operand); + uint32_t token; + size += bx::read(_reader, token); + + return size; + } + + int32_t read(bx::ReaderI* _reader, SpirvInstruction& _instruction) + { + int32_t size = 0; + + uint32_t token; + size += bx::read(_reader, token); + + _instruction.opcode = SpirvOpcode::Enum( (token & UINT32_C(0x0000ffff) ) ); + _instruction.length = uint16_t( (token & UINT32_C(0xffff0000) ) >> 16); + + uint32_t currOp = 0; + + const SpirvOpcodeInfo& info = s_sprivOpcodeInfo[_instruction.opcode]; + + if (0 < info.numValues) + { + size += read(_reader, _instruction.un.value, info.numValues*sizeof(uint32_t) ); + } + + if (info.hasVariable) + { + while (size/4 != _instruction.length) + { + uint32_t tmp; + size += bx::read(_reader, tmp); + } + } + else + { + _instruction.numOperands = info.numOperands; + switch (info.numOperands) + { + case 6: size += read(_reader, _instruction.operand[currOp++]); + case 5: size += read(_reader, _instruction.operand[currOp++]); + case 4: size += read(_reader, _instruction.operand[currOp++]); + case 3: size += read(_reader, _instruction.operand[currOp++]); + case 2: size += read(_reader, _instruction.operand[currOp++]); + case 1: size += read(_reader, _instruction.operand[currOp++]); + case 0: + break; + + default: + BX_WARN(false, "Instruction %s with invalid number of operands %d (numValues %d)." + , getName(_instruction.opcode) + , info.numOperands + , info.numValues + ); + break; + } + + BX_WARN(size/4 == _instruction.length, "read %d, expected %d, %s" + , size/4 + , _instruction.length + , getName(_instruction.opcode) + ); + while (size/4 != _instruction.length) + { + uint32_t tmp; + size += bx::read(_reader, tmp); + } + } + + return size; + } + + int32_t write(bx::WriterI* _writer, const SpirvInstruction& _instruction) + { + int32_t size = 0; + BX_UNUSED(_writer, _instruction); + return size; + } + + int32_t toString(char* _out, int32_t _size, const SpirvInstruction& _instruction) + { + int32_t size = 0; + size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size) + , "%s %d (%d, %d)" + , getName(_instruction.opcode) + , _instruction.numOperands + , _instruction.un.value[0] + , _instruction.un.value[1] + ); + + return size; + } + + int32_t read(bx::ReaderSeekerI* _reader, SpirvShader& _shader) + { + int32_t size = 0; + + uint32_t len = uint32_t(bx::getSize(_reader) - bx::seek(_reader) ); + _shader.byteCode.resize(len); + size += bx::read(_reader, _shader.byteCode.data(), len); + + return size; + } + + int32_t write(bx::WriterI* _writer, const SpirvShader& _shader) + { + int32_t size = 0; + BX_UNUSED(_writer, _shader); + return size; + } + +#define SPIRV_MAGIC 0x07230203 + + int32_t read(bx::ReaderSeekerI* _reader, Spirv& _spirv) + { + int32_t size = 0; + + size += bx::read(_reader, _spirv.header); + + if (size != sizeof(Spirv::Header) + || _spirv.header.magic != SPIRV_MAGIC + ) + { + // error + return -size; + } + + size += read(_reader, _spirv.shader); + + return size; + } + + int32_t write(bx::WriterSeekerI* _writer, const Spirv& _spirv) + { + int32_t size = 0; + BX_UNUSED(_writer, _spirv); + return size; + } + + void parse(const SpirvShader& _src, SpirvParseFn _fn, void* _userData) + { + bx::MemoryReader reader(_src.byteCode.data(), uint32_t(_src.byteCode.size() ) ); + + for (uint32_t token = 0, numTokens = uint32_t(_src.byteCode.size() / sizeof(uint32_t) ); token < numTokens;) + { + SpirvInstruction instruction; + uint32_t size = read(&reader, instruction); + + BX_CHECK(size/4 == instruction.length, "read %d, expected %d, %s" + , size/4 + , instruction.length + , getName(instruction.opcode) + ); + + _fn(token * sizeof(uint32_t), instruction, _userData); + + token += instruction.length; + } + } + +} // namespace bgfx diff --git a/src/shader_spirv.h b/src/shader_spirv.h new file mode 100644 index 00000000..5b9cf33c --- /dev/null +++ b/src/shader_spirv.h @@ -0,0 +1,520 @@ +/* + * Copyright 2011-2015 Branimir Karadzic. All rights reserved. + * License: http://www.opensource.org/licenses/BSD-2-Clause + */ + +#ifndef BGFX_SHADER_SPIRV_H +#define BGFX_SHADER_SPIRV_H + +#include + +namespace bgfx +{ + // Reference: https://www.khronos.org/registry/spir-v/specs/1.0/SPIRV.html + + struct SpirvOpcode + { + enum Enum + { + Nop, + Source, + SourceExtension, + Extension, + ExtInstImport, + MemoryModel, + EntryPoint, + ExecutionMode, + TypeVoid, + TypeBool, + TypeInt, + TypeFloat, + TypeVector, + TypeMatrix, + TypeSampler, + TypeFilter, + TypeArray, + TypeRuntimeArray, + TypeStruct, + TypeOpaque, + TypePointer, + TypeFunction, + TypeEvent, + TypeDeviceEvent, + TypeReserveId, + TypeQueue, + TypePipe, + ConstantTrue, + ConstantFalse, + Constant, + ConstantComposite, + ConstantSampler, + ConstantNullPointer, + ConstantNullObject, + SpecConstantTrue, + SpecConstantFalse, + SpecConstant, + SpecConstantComposite, + Variable, + VariableArray, + Function, + FunctionParameter, + FunctionEnd, + FunctionCall, + ExtInst, + Undef, + Load, + Store, + Phi, + DecorationGroup, + Decorate, + MemberDecorate, + GroupDecorate, + GroupMemberDecorate, + Name, + MemberName, + String, + Line, + VectorExtractDynamic, + VectorInsertDynamic, + VectorShuffle, + CompositeConstruct, + CompositeExtract, + CompositeInsert, + CopyObject, + CopyMemory, + CopyMemorySized, + Sampler, + TextureSample, + TextureSampleDref, + TextureSampleLod, + TextureSampleProj, + TextureSampleGrad, + TextureSampleOffset, + TextureSampleProjLod, + TextureSampleProjGrad, + TextureSampleLodOffset, + TextureSampleProjOffset, + TextureSampleGradOffset, + TextureSampleProjLodOffset, + TextureSampleProjGradOffset, + TextureFetchTexelLod, + TextureFetchTexelOffset, + TextureFetchSample, + TextureFetchTexel, + TextureGather, + TextureGatherOffset, + TextureGatherOffsets, + TextureQuerySizeLod, + TextureQuerySize, + TextureQueryLod, + TextureQueryLevels, + TextureQuerySamples, + AccessChain, + InBoundsAccessChain, + SNegate, + FNegate, + Not, + Any, + All, + ConvertFToU, + ConvertFToS, + ConvertSToF, + ConvertUToF, + UConvert, + SConvert, + FConvert, + ConvertPtrToU, + ConvertUToPtr, + PtrCastToGeneric, + GenericCastToPtr, + Bitcast, + Transpose, + IsNan, + IsInf, + IsFinite, + IsNormal, + SignBitSet, + LessOrGreater, + Ordered, + Unordered, + ArrayLength, + IAdd, + FAdd, + ISub, + FSub, + IMul, + FMul, + UDiv, + SDiv, + FDiv, + UMod, + SRem, + SMod, + FRem, + FMod, + VectorTimesScalar, + MatrixTimesScalar, + VectorTimesMatrix, + MatrixTimesVector, + MatrixTimesMatrix, + OuterProduct, + Dot, + ShiftRightLogical, + ShiftRightArithmetic, + ShiftLeftLogical, + LogicalOr, + LogicalXor, + LogicalAnd, + BitwiseOr, + BitwiseXor, + BitwiseAnd, + Select, + IEqual, + FOrdEqual, + FUnordEqual, + INotEqual, + FOrdNotEqual, + FUnordNotEqual, + ULessThan, + SLessThan, + FOrdLessThan, + FUnordLessThan, + UGreaterThan, + SGreaterThan, + FOrdGreaterThan, + FUnordGreaterThan, + ULessThanEqual, + SLessThanEqual, + FOrdLessThanEqual, + FUnordLessThanEqual, + UGreaterThanEqual, + SGreaterThanEqual, + FOrdGreaterThanEqual, + FUnordGreaterThanEqual, + DPdx, + DPdy, + Fwidth, + DPdxFine, + DPdyFine, + FwidthFine, + DPdxCoarse, + DPdyCoarse, + FwidthCoarse, + EmitVertex, + EndPrimitive, + EmitStreamVertex, + EndStreamPrimitive, + ControlBarrier, + MemoryBarrier, + ImagePointer, + AtomicInit, + AtomicLoad, + AtomicStore, + AtomicExchange, + AtomicCompareExchange, + AtomicCompareExchangeWeak, + AtomicIIncrement, + AtomicIDecrement, + AtomicIAdd, + AtomicISub, + AtomicUMin, + AtomicUMax, + AtomicAnd, + AtomicOr, + AtomicXor, + LoopMerge, + SelectionMerge, + Label, + Branch, + BranchConditional, + Switch, + Kill, + Return, + ReturnValue, + Unreachable, + LifetimeStart, + LifetimeStop, + CompileFlag, + AsyncGroupCopy, + WaitGroupEvents, + GroupAll, + GroupAny, + GroupBroadcast, + GroupIAdd, + GroupFAdd, + GroupFMin, + GroupUMin, + GroupSMin, + GroupFMax, + GroupUMax, + GroupSMax, + GenericCastToPtrExplicit, + GenericPtrMemSemantics, + ReadPipe, + WritePipe, + ReservedReadPipe, + ReservedWritePipe, + ReserveReadPipePackets, + ReserveWritePipePackets, + CommitReadPipe, + CommitWritePipe, + IsValidReserveId, + GetNumPipePackets, + GetMaxPipePackets, + GroupReserveReadPipePackets, + GroupReserveWritePipePackets, + GroupCommitReadPipe, + GroupCommitWritePipe, + EnqueueMarker, + EnqueueKernel, + GetKernelNDrangeSubGroupCount, + GetKernelNDrangeMaxSubGroupSize, + GetKernelWorkGroupSize, + GetKernelPreferredWorkGroupSizeMultiple, + RetainEvent, + ReleaseEvent, + CreateUserEvent, + IsValidEvent, + SetUserEventStatus, + CaptureEventProfilingInfo, + GetDefaultQueue, + BuildNDRange, + SatConvertSToU, + SatConvertUToS, + AtomicIMin, + AtomicIMax, + + Count + }; + }; + + struct SpirvBuiltin + { + enum Enum + { + Position, + PointSize, + ClipVertex, + ClipDistance, + CullDistance, + VertexId, + InstanceId, + BuiltInPrimitiveId, + InvocationId, + Layer, + ViewportIndex, + TessLevelOuter, + TessLevelInner, + TessCoord, + PatchVertices, + FragCoord, + PointCoord, + FrontFacing, + SampleId, + SamplePosition, + SampleMask, + FragColor, + FragDepth, + HelperInvocation, + NumWorkgroups, + WorkgroupSize, + WorkgroupId, + LocalInvocationId, + GlobalInvocationId, + LocalInvocationIndex, + WorkDim, + GlobalSize, + EnqueuedWorkgroupSize, + GlobalOffset, + GlobalLinearId, + WorkgroupLinearId, + SubgroupSize, + SubgroupMaxSize, + NumSubgroups, + NumEnqueuedSubgroups, + SubgroupId, + SubgroupLocalInvocationId, + }; + }; + + struct SpirvExecutionModel + { + enum Enum + { + Vertex, + TessellationControl, + TessellationEvaluation, + Geometry, + Fragment, + GLCompute, + Kernel, + + Count + }; + }; + + struct SpirvMemoryModel + { + enum Enum + { + Simple, + GLSL450, + OpenCL12, + OpenCL20, + OpenCL21, + + Count + }; + }; + + struct SpirvStorageClass + { + enum Enum + { + UniformConstant, + Input, + Uniform, + Output, + WorkgroupLocal, + WorkgroupGlobal, + PrivateGlobal, + Function, + Generic, + Private, + AtomicCounter, + }; + }; + + struct SpirvResourceDim + { + enum Enum + { + Texture1D, + Texture2D, + Texture3D, + TextureCube, + TextureRect, + Buffer, + }; + }; + + struct SpirvDecoration + { + enum Enum + { + PrecisionLow, + PrecisionMedium, + PrecisionHigh, + Block, + BufferBlock, + RowMajor, + ColMajor, + GLSLShared, + GLSLStd140, + GLSLStd430, + GLSLPacked, + Smooth, + Noperspective, + Flat, + Patch, + Centroid, + Sample, + Invariant, + Restrict, + Aliased, + Volatile, + Constant, + Coherent, + Nonwritable, + Nonreadable, + Uniform, + NoStaticUse, + CPacked, + SaturatedConversion, + Stream, + Location, + Component, + Index, + Binding, + DescriptorSet, + Offset, + Alignment, + XfbBuffer, + Stride, + BuiltIn, + FuncParamAttr, + FPRoundingMode, + FPFastMathMode, + LinkageAttributes, + SpecId, + + Count + }; + }; + + struct SpirvOperand + { + }; + + struct SpirvInstruction + { + SpirvOpcode::Enum opcode; + uint16_t length; + + uint8_t numOperands; + SpirvOperand operand[6]; + + union + { + struct ResultTypeId + { + uint32_t resultType; + uint32_t id; + }; + + ResultTypeId constant; + ResultTypeId constantComposite; + + uint32_t value[8]; + } un; + }; + + int32_t read(bx::ReaderI* _reader, SpirvInstruction& _instruction); + int32_t write(bx::WriterI* _writer, const SpirvInstruction& _instruction); + int32_t toString(char* _out, int32_t _size, const SpirvInstruction& _instruction); + + struct SpirvShader + { + stl::vector byteCode; + }; + + int32_t read(bx::ReaderSeekerI* _reader, SpirvShader& _shader); + int32_t write(bx::WriterI* _writer, const SpirvShader& _shader); + + typedef void (*SpirvParseFn)(uint32_t _offset, const SpirvInstruction& _instruction, void* _userData); + void parse(const SpirvShader& _src, SpirvParseFn _fn, void* _userData); + + typedef void (*SpirvFilterFn)(SpirvInstruction& _instruction, void* _userData); + void filter(SpirvShader& _dst, const SpirvShader& _src, SpirvFilterFn _fn, void* _userData); + + struct Spirv + { + struct Header + { + uint32_t magic; + uint32_t version; + uint32_t generator; + uint32_t bound; + uint32_t schema; + }; + + Header header; + SpirvShader shader; + }; + + int32_t read(bx::ReaderSeekerI* _reader, Spirv& _spirv); + int32_t write(bx::WriterSeekerI* _writer, const Spirv& _spirv); + +} // namespace bgfx + +#endif // BGFX_SHADER_SPIRV_H