Merge pull request #13922 from samcake/black-bis

Optimize the vertex formats of meshes for less input buffer bindings
This commit is contained in:
Sam Gateau 2018-09-10 16:58:40 -07:00 committed by GitHub
commit 47cea49f78
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
17 changed files with 269 additions and 105 deletions

View file

@ -585,13 +585,8 @@ void FBXReader::buildModelMesh(FBXMesh& extractedMesh, const QString& url) {
FBXMesh& fbxMesh = extractedMesh; FBXMesh& fbxMesh = extractedMesh;
graphics::MeshPointer mesh(new graphics::Mesh()); graphics::MeshPointer mesh(new graphics::Mesh());
bool hasBlendShapes = !fbxMesh.blendshapes.empty();
// Grab the vertices in a buffer int numVerts = extractedMesh.vertices.size();
auto vb = std::make_shared<gpu::Buffer>();
vb->setData(extractedMesh.vertices.size() * sizeof(glm::vec3),
(const gpu::Byte*) extractedMesh.vertices.data());
gpu::BufferView vbv(vb, gpu::Element(gpu::VEC3, gpu::FLOAT, gpu::XYZ));
mesh->setVertexBuffer(vbv);
if (!fbxMesh.normals.empty() && fbxMesh.tangents.empty()) { if (!fbxMesh.normals.empty() && fbxMesh.tangents.empty()) {
// Fill with a dummy value to force tangents to be present if there are normals // Fill with a dummy value to force tangents to be present if there are normals
@ -607,43 +602,61 @@ void FBXReader::buildModelMesh(FBXMesh& extractedMesh, const QString& url) {
} }
} }
// evaluate all attribute channels sizes // evaluate all attribute elements and data sizes
const int normalsSize = fbxMesh.normals.size() * sizeof(NormalType);
const int tangentsSize = fbxMesh.tangents.size() * sizeof(NormalType); // Position is a vec3
const auto positionElement = gpu::Element(gpu::VEC3, gpu::FLOAT, gpu::XYZ);
const int positionsSize = numVerts * positionElement.getSize();
// Normal and tangent are always there together packed in normalized xyz32bits word (times 2)
const auto normalElement = FBX_NORMAL_ELEMENT;
const int normalsSize = fbxMesh.normals.size() * normalElement.getSize();
const int tangentsSize = fbxMesh.tangents.size() * normalElement.getSize();
// If there are normals then there should be tangents // If there are normals then there should be tangents
assert(normalsSize <= tangentsSize); assert(normalsSize <= tangentsSize);
if (tangentsSize > normalsSize) { if (tangentsSize > normalsSize) {
qWarning() << "Unexpected tangents in " << url; qWarning() << "Unexpected tangents in " << url;
} }
const auto normalsAndTangentsSize = normalsSize + tangentsSize; const auto normalsAndTangentsSize = normalsSize + tangentsSize;
const int normalsAndTangentsStride = 2 * sizeof(NormalType); const int normalsAndTangentsStride = 2 * normalElement.getSize();
const int colorsSize = fbxMesh.colors.size() * sizeof(ColorType);
// Color attrib
const auto colorElement = FBX_COLOR_ELEMENT;
const int colorsSize = fbxMesh.colors.size() * colorElement.getSize();
// Texture coordinates are stored in 2 half floats // Texture coordinates are stored in 2 half floats
const int texCoordsSize = fbxMesh.texCoords.size() * sizeof(vec2h); const auto texCoordsElement = gpu::Element(gpu::VEC2, gpu::HALF, gpu::UV);
const int texCoords1Size = fbxMesh.texCoords1.size() * sizeof(vec2h); const int texCoordsSize = fbxMesh.texCoords.size() * texCoordsElement.getSize();
const int texCoords1Size = fbxMesh.texCoords1.size() * texCoordsElement.getSize();
int clusterIndicesSize = fbxMesh.clusterIndices.size() * sizeof(uint8_t); // Support for 4 skinning clusters:
if (fbxMesh.clusters.size() > UINT8_MAX) { // 4 Indices are uint8 ideally, uint16 if more than 256.
// we need 16 bits instead of just 8 for clusterIndices const auto clusterIndiceElement = (fbxMesh.clusters.size() < UINT8_MAX ? gpu::Element(gpu::VEC4, gpu::UINT8, gpu::XYZW) : gpu::Element(gpu::VEC4, gpu::UINT16, gpu::XYZW));
clusterIndicesSize *= 2; // 4 Weights are normalized 16bits
} const auto clusterWeightElement = gpu::Element(gpu::VEC4, gpu::NUINT16, gpu::XYZW);
const int clusterWeightsSize = fbxMesh.clusterWeights.size() * sizeof(uint16_t); // Cluster indices and weights must be the same sizes
const int NUM_CLUSTERS_PER_VERT = 4;
const int numVertClusters = (fbxMesh.clusterIndices.size() == fbxMesh.clusterWeights.size() ? fbxMesh.clusterIndices.size() / NUM_CLUSTERS_PER_VERT : 0);
const int clusterIndicesSize = numVertClusters * clusterIndiceElement.getSize();
const int clusterWeightsSize = numVertClusters * clusterWeightElement.getSize();
// Normals and tangents are interleaved // Decide on where to put what seequencially in a big buffer:
const int normalsOffset = 0; const int positionsOffset = 0;
const int tangentsOffset = normalsOffset + sizeof(NormalType); const int normalsAndTangentsOffset = positionsOffset + positionsSize;
const int colorsOffset = normalsOffset + normalsSize + tangentsSize; const int colorsOffset = normalsAndTangentsOffset + normalsAndTangentsSize;
const int texCoordsOffset = colorsOffset + colorsSize; const int texCoordsOffset = colorsOffset + colorsSize;
const int texCoords1Offset = texCoordsOffset + texCoordsSize; const int texCoords1Offset = texCoordsOffset + texCoordsSize;
const int clusterIndicesOffset = texCoords1Offset + texCoords1Size; const int clusterIndicesOffset = texCoords1Offset + texCoords1Size;
const int clusterWeightsOffset = clusterIndicesOffset + clusterIndicesSize; const int clusterWeightsOffset = clusterIndicesOffset + clusterIndicesSize;
const int totalAttributeSize = clusterWeightsOffset + clusterWeightsSize; const int totalVertsSize = clusterWeightsOffset + clusterWeightsSize;
// Copy all attribute data in a single attribute buffer // Copy all vertex data in a single buffer
auto attribBuffer = std::make_shared<gpu::Buffer>(); auto vertBuffer = std::make_shared<gpu::Buffer>();
attribBuffer->resize(totalAttributeSize); vertBuffer->resize(totalVertsSize);
// First positions
vertBuffer->setSubData(positionsOffset, positionsSize, (const gpu::Byte*) extractedMesh.vertices.data());
// Interleave normals and tangents // Interleave normals and tangents
if (normalsSize > 0) { if (normalsSize > 0) {
@ -651,8 +664,8 @@ void FBXReader::buildModelMesh(FBXMesh& extractedMesh, const QString& url) {
normalsAndTangents.reserve(fbxMesh.normals.size() + fbxMesh.tangents.size()); normalsAndTangents.reserve(fbxMesh.normals.size() + fbxMesh.tangents.size());
for (auto normalIt = fbxMesh.normals.constBegin(), tangentIt = fbxMesh.tangents.constBegin(); for (auto normalIt = fbxMesh.normals.constBegin(), tangentIt = fbxMesh.tangents.constBegin();
normalIt != fbxMesh.normals.constEnd(); normalIt != fbxMesh.normals.constEnd();
++normalIt, ++tangentIt) { ++normalIt, ++tangentIt) {
#if FBX_PACK_NORMALS #if FBX_PACK_NORMALS
const auto normal = normalizeDirForPacking(*normalIt); const auto normal = normalizeDirForPacking(*normalIt);
const auto tangent = normalizeDirForPacking(*tangentIt); const auto tangent = normalizeDirForPacking(*tangentIt);
@ -665,9 +678,10 @@ void FBXReader::buildModelMesh(FBXMesh& extractedMesh, const QString& url) {
normalsAndTangents.push_back(packedNormal); normalsAndTangents.push_back(packedNormal);
normalsAndTangents.push_back(packedTangent); normalsAndTangents.push_back(packedTangent);
} }
attribBuffer->setSubData(normalsOffset, normalsAndTangentsSize, (const gpu::Byte*) normalsAndTangents.data()); vertBuffer->setSubData(normalsAndTangentsOffset, normalsAndTangentsSize, (const gpu::Byte*) normalsAndTangents.data());
} }
// Pack colors
if (colorsSize > 0) { if (colorsSize > 0) {
#if FBX_PACK_COLORS #if FBX_PACK_COLORS
std::vector<ColorType> colors; std::vector<ColorType> colors;
@ -676,12 +690,13 @@ void FBXReader::buildModelMesh(FBXMesh& extractedMesh, const QString& url) {
for (const auto& color : fbxMesh.colors) { for (const auto& color : fbxMesh.colors) {
colors.push_back(glm::packUnorm4x8(glm::vec4(color, 1.0f))); colors.push_back(glm::packUnorm4x8(glm::vec4(color, 1.0f)));
} }
attribBuffer->setSubData(colorsOffset, colorsSize, (const gpu::Byte*) colors.data()); vertBuffer->setSubData(colorsOffset, colorsSize, (const gpu::Byte*) colors.data());
#else #else
attribBuffer->setSubData(colorsOffset, colorsSize, (const gpu::Byte*) fbxMesh.colors.constData()); vertBuffer->setSubData(colorsOffset, colorsSize, (const gpu::Byte*) fbxMesh.colors.constData());
#endif #endif
} }
// Pack Texcoords 0 and 1 (if exists)
if (texCoordsSize > 0) { if (texCoordsSize > 0) {
QVector<vec2h> texCoordData; QVector<vec2h> texCoordData;
texCoordData.reserve(fbxMesh.texCoords.size()); texCoordData.reserve(fbxMesh.texCoords.size());
@ -692,9 +707,8 @@ void FBXReader::buildModelMesh(FBXMesh& extractedMesh, const QString& url) {
texCoordVec2h.y = glm::detail::toFloat16(texCoordVec2f.y); texCoordVec2h.y = glm::detail::toFloat16(texCoordVec2f.y);
texCoordData.push_back(texCoordVec2h); texCoordData.push_back(texCoordVec2h);
} }
attribBuffer->setSubData(texCoordsOffset, texCoordsSize, (const gpu::Byte*) texCoordData.constData()); vertBuffer->setSubData(texCoordsOffset, texCoordsSize, (const gpu::Byte*) texCoordData.constData());
} }
if (texCoords1Size > 0) { if (texCoords1Size > 0) {
QVector<vec2h> texCoordData; QVector<vec2h> texCoordData;
texCoordData.reserve(fbxMesh.texCoords1.size()); texCoordData.reserve(fbxMesh.texCoords1.size());
@ -705,69 +719,170 @@ void FBXReader::buildModelMesh(FBXMesh& extractedMesh, const QString& url) {
texCoordVec2h.y = glm::detail::toFloat16(texCoordVec2f.y); texCoordVec2h.y = glm::detail::toFloat16(texCoordVec2f.y);
texCoordData.push_back(texCoordVec2h); texCoordData.push_back(texCoordVec2h);
} }
attribBuffer->setSubData(texCoords1Offset, texCoords1Size, (const gpu::Byte*) texCoordData.constData()); vertBuffer->setSubData(texCoords1Offset, texCoords1Size, (const gpu::Byte*) texCoordData.constData());
} }
if (fbxMesh.clusters.size() < UINT8_MAX) { // Clusters data
// yay! we can fit the clusterIndices within 8-bits if (clusterIndicesSize > 0) {
int32_t numIndices = fbxMesh.clusterIndices.size(); if (fbxMesh.clusters.size() < UINT8_MAX) {
QVector<uint8_t> clusterIndices; // yay! we can fit the clusterIndices within 8-bits
clusterIndices.resize(numIndices); int32_t numIndices = fbxMesh.clusterIndices.size();
for (int32_t i = 0; i < numIndices; ++i) { QVector<uint8_t> clusterIndices;
assert(fbxMesh.clusterIndices[i] <= UINT8_MAX); clusterIndices.resize(numIndices);
clusterIndices[i] = (uint8_t)(fbxMesh.clusterIndices[i]); for (int32_t i = 0; i < numIndices; ++i) {
assert(fbxMesh.clusterIndices[i] <= UINT8_MAX);
clusterIndices[i] = (uint8_t)(fbxMesh.clusterIndices[i]);
}
vertBuffer->setSubData(clusterIndicesOffset, clusterIndicesSize, (const gpu::Byte*) clusterIndices.constData());
} else {
vertBuffer->setSubData(clusterIndicesOffset, clusterIndicesSize, (const gpu::Byte*) fbxMesh.clusterIndices.constData());
} }
attribBuffer->setSubData(clusterIndicesOffset, clusterIndicesSize, (const gpu::Byte*) clusterIndices.constData());
} else {
attribBuffer->setSubData(clusterIndicesOffset, clusterIndicesSize, (const gpu::Byte*) fbxMesh.clusterIndices.constData());
} }
attribBuffer->setSubData(clusterWeightsOffset, clusterWeightsSize, (const gpu::Byte*) fbxMesh.clusterWeights.constData()); if (clusterWeightsSize > 0) {
vertBuffer->setSubData(clusterWeightsOffset, clusterWeightsSize, (const gpu::Byte*) fbxMesh.clusterWeights.constData());
}
if (normalsSize) {
mesh->addAttribute(gpu::Stream::NORMAL, // Now we decide on how to interleave the attributes and provide the vertices among bufers:
graphics::BufferView(attribBuffer, normalsOffset, normalsAndTangentsSize, // Aka the Vertex format and the vertexBufferStream
normalsAndTangentsStride, FBX_NORMAL_ELEMENT)); auto vertexFormat = std::make_shared<gpu::Stream::Format>();
mesh->addAttribute(gpu::Stream::TANGENT, auto vertexBufferStream = std::make_shared<gpu::BufferStream>();
graphics::BufferView(attribBuffer, tangentsOffset, normalsAndTangentsSize,
normalsAndTangentsStride, FBX_NORMAL_ELEMENT)); // Decision time:
// if blendshapes then keep position and normals/tangents as separated channel buffers from interleaved attributes
// else everything is interleaved in one buffer
// Default case is no blend shapes
gpu::BufferPointer attribBuffer;
int totalAttribBufferSize = totalVertsSize;
gpu::uint8 posChannel = 0;
gpu::uint8 tangentChannel = posChannel;
gpu::uint8 attribChannel = posChannel;
bool interleavePositions = true;
bool interleaveNormalsTangents = true;
// TODO: We are using the same vertex format layout for all meshes because this is more efficient
// This work is going into rc73 release which is meant to be used for the SPot500 event and we are picking the format
// that works best for blendshaped and skinned meshes aka the avatars.
// We will improve this technique in a hot fix to 73.
hasBlendShapes = true;
// If has blend shapes allocate and assign buffers for pos and tangents now
if (hasBlendShapes) {
auto posBuffer = std::make_shared<gpu::Buffer>();
posBuffer->setData(positionsSize, (const gpu::Byte*) vertBuffer->getData() + positionsOffset);
vertexBufferStream->addBuffer(posBuffer, 0, positionElement.getSize());
auto normalsAndTangentsBuffer = std::make_shared<gpu::Buffer>();
normalsAndTangentsBuffer->setData(normalsAndTangentsSize, (const gpu::Byte*) vertBuffer->getData() + normalsAndTangentsOffset);
vertexBufferStream->addBuffer(normalsAndTangentsBuffer, 0, normalsAndTangentsStride);
// update channels and attribBuffer size accordingly
interleavePositions = false;
interleaveNormalsTangents = false;
tangentChannel = 1;
attribChannel = 2;
totalAttribBufferSize = totalVertsSize - positionsSize - normalsAndTangentsSize;
} }
// Define the vertex format, compute the offset for each attributes as we append them to the vertex format
gpu::Offset bufOffset = 0;
if (positionsSize) {
vertexFormat->setAttribute(gpu::Stream::POSITION, posChannel, positionElement, bufOffset);
bufOffset += positionElement.getSize();
if (!interleavePositions) {
bufOffset = 0;
}
}
if (normalsSize) {
vertexFormat->setAttribute(gpu::Stream::NORMAL, tangentChannel, normalElement, bufOffset);
bufOffset += normalElement.getSize();
vertexFormat->setAttribute(gpu::Stream::TANGENT, tangentChannel, normalElement, bufOffset);
bufOffset += normalElement.getSize();
if (!interleaveNormalsTangents) {
bufOffset = 0;
}
}
// Pack normal and Tangent with the rest of atributes if no blend shapes
if (colorsSize) { if (colorsSize) {
mesh->addAttribute(gpu::Stream::COLOR, vertexFormat->setAttribute(gpu::Stream::COLOR, attribChannel, colorElement, bufOffset);
graphics::BufferView(attribBuffer, colorsOffset, colorsSize, FBX_COLOR_ELEMENT)); bufOffset += colorElement.getSize();
} }
if (texCoordsSize) { if (texCoordsSize) {
mesh->addAttribute(gpu::Stream::TEXCOORD, vertexFormat->setAttribute(gpu::Stream::TEXCOORD, attribChannel, texCoordsElement, bufOffset);
graphics::BufferView( attribBuffer, texCoordsOffset, texCoordsSize, bufOffset += texCoordsElement.getSize();
gpu::Element(gpu::VEC2, gpu::HALF, gpu::UV)));
} }
if (texCoords1Size) { if (texCoords1Size) {
mesh->addAttribute( gpu::Stream::TEXCOORD1, vertexFormat->setAttribute(gpu::Stream::TEXCOORD1, attribChannel, texCoordsElement, bufOffset);
graphics::BufferView(attribBuffer, texCoords1Offset, texCoords1Size, bufOffset += texCoordsElement.getSize();
gpu::Element(gpu::VEC2, gpu::HALF, gpu::UV)));
} else if (texCoordsSize) { } else if (texCoordsSize) {
mesh->addAttribute(gpu::Stream::TEXCOORD1, vertexFormat->setAttribute(gpu::Stream::TEXCOORD1, attribChannel, texCoordsElement, bufOffset - texCoordsElement.getSize());
graphics::BufferView(attribBuffer, texCoordsOffset, texCoordsSize,
gpu::Element(gpu::VEC2, gpu::HALF, gpu::UV)));
} }
if (clusterIndicesSize) { if (clusterIndicesSize) {
if (fbxMesh.clusters.size() < UINT8_MAX) { vertexFormat->setAttribute(gpu::Stream::SKIN_CLUSTER_INDEX, attribChannel, clusterIndiceElement, bufOffset);
mesh->addAttribute(gpu::Stream::SKIN_CLUSTER_INDEX, bufOffset += clusterIndiceElement.getSize();
graphics::BufferView(attribBuffer, clusterIndicesOffset, clusterIndicesSize,
gpu::Element(gpu::VEC4, gpu::UINT8, gpu::XYZW)));
} else {
mesh->addAttribute(gpu::Stream::SKIN_CLUSTER_INDEX,
graphics::BufferView(attribBuffer, clusterIndicesOffset, clusterIndicesSize,
gpu::Element(gpu::VEC4, gpu::UINT16, gpu::XYZW)));
}
} }
if (clusterWeightsSize) { if (clusterWeightsSize) {
mesh->addAttribute(gpu::Stream::SKIN_CLUSTER_WEIGHT, vertexFormat->setAttribute(gpu::Stream::SKIN_CLUSTER_WEIGHT, attribChannel, clusterWeightElement, bufOffset);
graphics::BufferView(attribBuffer, clusterWeightsOffset, clusterWeightsSize, bufOffset += clusterWeightElement.getSize();
gpu::Element(gpu::VEC4, gpu::NUINT16, gpu::XYZW)));
} }
// Finally, allocate and fill the attribBuffer interleaving the attributes as needed:
{
auto vPositionOffset = 0;
auto vPositionSize = (interleavePositions ? positionsSize / numVerts : 0);
auto vNormalsAndTangentsOffset = vPositionOffset + vPositionSize;
auto vNormalsAndTangentsSize = (interleaveNormalsTangents ? normalsAndTangentsSize / numVerts : 0);
auto vColorOffset = vNormalsAndTangentsOffset + vNormalsAndTangentsSize;
auto vColorSize = colorsSize / numVerts;
auto vTexcoord0Offset = vColorOffset + vColorSize;
auto vTexcoord0Size = texCoordsSize / numVerts;
auto vTexcoord1Offset = vTexcoord0Offset + vTexcoord0Size;
auto vTexcoord1Size = texCoords1Size / numVerts;
auto vClusterIndiceOffset = vTexcoord1Offset + vTexcoord1Size;
auto vClusterIndiceSize = clusterIndicesSize / numVerts;
auto vClusterWeightOffset = vClusterIndiceOffset + vClusterIndiceSize;
auto vClusterWeightSize = clusterWeightsSize / numVerts;
auto vStride = vClusterWeightOffset + vClusterWeightSize;
std::vector<gpu::Byte> dest;
dest.resize(totalAttribBufferSize);
auto vDest = dest.data();
auto source = vertBuffer->getData();
for (int i = 0; i < numVerts; i++) {
if (vPositionSize) memcpy(vDest + vPositionOffset, source + positionsOffset + i * vPositionSize, vPositionSize);
if (vNormalsAndTangentsSize) memcpy(vDest + vNormalsAndTangentsOffset, source + normalsAndTangentsOffset + i * vNormalsAndTangentsSize, vNormalsAndTangentsSize);
if (vColorSize) memcpy(vDest + vColorOffset, source + colorsOffset + i * vColorSize, vColorSize);
if (vTexcoord0Size) memcpy(vDest + vTexcoord0Offset, source + texCoordsOffset + i * vTexcoord0Size, vTexcoord0Size);
if (vTexcoord1Size) memcpy(vDest + vTexcoord1Offset, source + texCoords1Offset + i * vTexcoord1Size, vTexcoord1Size);
if (vClusterIndiceSize) memcpy(vDest + vClusterIndiceOffset, source + clusterIndicesOffset + i * vClusterIndiceSize, vClusterIndiceSize);
if (vClusterWeightSize) memcpy(vDest + vClusterWeightOffset, source + clusterWeightsOffset + i * vClusterWeightSize, vClusterWeightSize);
vDest += vStride;
}
auto attribBuffer = std::make_shared<gpu::Buffer>();
attribBuffer->setData(totalAttribBufferSize, dest.data());
vertexBufferStream->addBuffer(attribBuffer, 0, vStride);
}
// Mesh vertex format and vertex stream is ready
mesh->setVertexFormatAndStream(vertexFormat, vertexBufferStream);
// Index and Part Buffers
unsigned int totalIndices = 0; unsigned int totalIndices = 0;
foreach(const FBXMeshPart& part, extractedMesh.parts) { foreach(const FBXMeshPart& part, extractedMesh.parts) {
totalIndices += (part.quadTrianglesIndices.size() + part.triangleIndices.size()); totalIndices += (part.quadTrianglesIndices.size() + part.triangleIndices.size());

View file

@ -239,6 +239,7 @@ public:
virtual GLuint getFramebufferID(const FramebufferPointer& framebuffer) = 0; virtual GLuint getFramebufferID(const FramebufferPointer& framebuffer) = 0;
virtual GLuint getTextureID(const TexturePointer& texture) final; virtual GLuint getTextureID(const TexturePointer& texture) final;
virtual GLuint getBufferID(const Buffer& buffer) = 0; virtual GLuint getBufferID(const Buffer& buffer) = 0;
virtual GLuint getBufferIDUnsynced(const Buffer& buffer) = 0;
virtual GLuint getQueryID(const QueryPointer& query) = 0; virtual GLuint getQueryID(const QueryPointer& query) = 0;
virtual GLFramebuffer* syncGPUObject(const Framebuffer& framebuffer) = 0; virtual GLFramebuffer* syncGPUObject(const Framebuffer& framebuffer) = 0;

View file

@ -11,6 +11,7 @@
#include "GLBackend.h" #include "GLBackend.h"
#include "GLShared.h" #include "GLShared.h"
#include "GLInputFormat.h" #include "GLInputFormat.h"
#include "GLBuffer.h"
using namespace gpu; using namespace gpu;
using namespace gpu::gl; using namespace gpu::gl;
@ -43,13 +44,7 @@ void GLBackend::do_setInputBuffer(const Batch& batch, size_t paramOffset) {
bool isModified = false; bool isModified = false;
if (_input._buffers[channel] != buffer) { if (_input._buffers[channel] != buffer) {
_input._buffers[channel] = buffer; _input._buffers[channel] = buffer;
_input._bufferVBOs[channel] = getBufferIDUnsynced((*buffer));
GLuint vbo = 0;
if (buffer) {
vbo = getBufferID((*buffer));
}
_input._bufferVBOs[channel] = vbo;
isModified = true; isModified = true;
} }
@ -128,7 +123,7 @@ void GLBackend::do_setIndexBuffer(const Batch& batch, size_t paramOffset) {
if (indexBuffer != _input._indexBuffer) { if (indexBuffer != _input._indexBuffer) {
_input._indexBuffer = indexBuffer; _input._indexBuffer = indexBuffer;
if (indexBuffer) { if (indexBuffer) {
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, getBufferID(*indexBuffer)); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, getBufferIDUnsynced(*indexBuffer));
} else { } else {
// FIXME do we really need this? Is there ever a draw call where we care that the element buffer is null? // FIXME do we really need this? Is there ever a draw call where we care that the element buffer is null?
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);
@ -145,7 +140,7 @@ void GLBackend::do_setIndirectBuffer(const Batch& batch, size_t paramOffset) {
if (buffer != _input._indirectBuffer) { if (buffer != _input._indirectBuffer) {
_input._indirectBuffer = buffer; _input._indirectBuffer = buffer;
if (buffer) { if (buffer) {
glBindBuffer(GL_DRAW_INDIRECT_BUFFER, getBufferID(*buffer)); glBindBuffer(GL_DRAW_INDIRECT_BUFFER, getBufferIDUnsynced(*buffer));
} else { } else {
// FIXME do we really need this? Is there ever a draw call where we care that the element buffer is null? // FIXME do we really need this? Is there ever a draw call where we care that the element buffer is null?
glBindBuffer(GL_DRAW_INDIRECT_BUFFER, 0); glBindBuffer(GL_DRAW_INDIRECT_BUFFER, 0);
@ -261,9 +256,17 @@ void GLBackend::updateInput() {
auto offset = _input._bufferOffsets.data(); auto offset = _input._bufferOffsets.data();
auto stride = _input._bufferStrides.data(); auto stride = _input._bufferStrides.data();
// Profile the count of buffers to update and use it to short cut the for loop
int numInvalids = (int) _input._invalidBuffers.count();
_stats._ISNumInputBufferChanges += numInvalids;
for (GLuint buffer = 0; buffer < _input._buffers.size(); buffer++, vbo++, offset++, stride++) { for (GLuint buffer = 0; buffer < _input._buffers.size(); buffer++, vbo++, offset++, stride++) {
if (_input._invalidBuffers.test(buffer)) { if (_input._invalidBuffers.test(buffer)) {
glBindVertexBuffer(buffer, (*vbo), (*offset), (GLsizei)(*stride)); glBindVertexBuffer(buffer, (*vbo), (*offset), (GLsizei)(*stride));
numInvalids--;
if (numInvalids <= 0) {
break;
}
} }
} }

View file

@ -85,6 +85,8 @@ void GLBackend::do_setPipeline(const Batch& batch, size_t paramOffset) {
auto& cameraCorrectionBuffer = _transform._viewCorrectionEnabled ? auto& cameraCorrectionBuffer = _transform._viewCorrectionEnabled ?
_pipeline._cameraCorrectionBuffer._buffer : _pipeline._cameraCorrectionBuffer._buffer :
_pipeline._cameraCorrectionBufferIdentity._buffer; _pipeline._cameraCorrectionBufferIdentity._buffer;
// Because we don't sync Buffers in the bindUniformBuffer, let s force this buffer synced
getBufferID(*cameraCorrectionBuffer);
bindUniformBuffer(gpu::slot::buffer::CameraCorrection, cameraCorrectionBuffer, 0, sizeof(CameraCorrection)); bindUniformBuffer(gpu::slot::buffer::CameraCorrection, cameraCorrectionBuffer, 0, sizeof(CameraCorrection));
} }
(void)CHECK_GL_ERROR(); (void)CHECK_GL_ERROR();
@ -170,11 +172,10 @@ void GLBackend::bindUniformBuffer(uint32_t slot, const BufferPointer& buffer, GL
return; return;
} }
// Sync BufferObject // Grab the true gl Buffer object
auto* object = syncGPUObject(*bufferState.buffer); auto glBO = getBufferIDUnsynced(*buffer);
if (object) { if (glBO) {
glBindBufferRange(GL_UNIFORM_BUFFER, slot, object->_buffer, bufferState.offset, bufferState.size); glBindBufferRange(GL_UNIFORM_BUFFER, slot, glBO, bufferState.offset, bufferState.size);
_uniform._buffers[slot] = bufferState; _uniform._buffers[slot] = bufferState;
(void)CHECK_GL_ERROR(); (void)CHECK_GL_ERROR();
} else { } else {

View file

@ -49,6 +49,16 @@ public:
} }
} }
template <typename GLBufferType>
static GLuint getIdUnsynced(GLBackend& backend, const Buffer& buffer) {
GLBufferType* object = Backend::getGPUObject<GLBufferType>(buffer);
if (object) {
return object->_buffer;
} else {
return 0;
}
}
const GLuint& _buffer { _id }; const GLuint& _buffer { _id };
const GLuint _size; const GLuint _size;
const Stamp _stamp; const Stamp _stamp;

View file

@ -134,6 +134,7 @@ protected:
GLFramebuffer* syncGPUObject(const Framebuffer& framebuffer) override; GLFramebuffer* syncGPUObject(const Framebuffer& framebuffer) override;
GLuint getBufferID(const Buffer& buffer) override; GLuint getBufferID(const Buffer& buffer) override;
GLuint getBufferIDUnsynced(const Buffer& buffer) override;
GLuint getResourceBufferID(const Buffer& buffer); GLuint getResourceBufferID(const Buffer& buffer);
GLBuffer* syncGPUObject(const Buffer& buffer) override; GLBuffer* syncGPUObject(const Buffer& buffer) override;

View file

@ -83,6 +83,10 @@ GLuint GL41Backend::getBufferID(const Buffer& buffer) {
return GL41Buffer::getId<GL41Buffer>(*this, buffer); return GL41Buffer::getId<GL41Buffer>(*this, buffer);
} }
GLuint GL41Backend::getBufferIDUnsynced(const Buffer& buffer) {
return GL41Buffer::getIdUnsynced<GL41Buffer>(*this, buffer);
}
GLuint GL41Backend::getResourceBufferID(const Buffer& buffer) { GLuint GL41Backend::getResourceBufferID(const Buffer& buffer) {
auto* object = GL41Buffer::sync<GL41Buffer>(*this, buffer); auto* object = GL41Buffer::sync<GL41Buffer>(*this, buffer);
if (object) { if (object) {

View file

@ -78,8 +78,9 @@ void GL41Backend::updateInput() {
const Stream::Format::AttributeMap& attributes = _input._format->getAttributes(); const Stream::Format::AttributeMap& attributes = _input._format->getAttributes();
auto& inputChannels = _input._format->getChannels(); auto& inputChannels = _input._format->getChannels();
_stats._ISNumInputBufferChanges++; int numInvalids = (int)_input._invalidBuffers.count();
_stats._ISNumInputBufferChanges += numInvalids;
GLuint boundVBO = 0; GLuint boundVBO = 0;
for (auto& channelIt : inputChannels) { for (auto& channelIt : inputChannels) {
const Stream::Format::ChannelMap::value_type::second_type& channel = (channelIt).second; const Stream::Format::ChannelMap::value_type::second_type& channel = (channelIt).second;

View file

@ -235,6 +235,7 @@ protected:
GLFramebuffer* syncGPUObject(const Framebuffer& framebuffer) override; GLFramebuffer* syncGPUObject(const Framebuffer& framebuffer) override;
GLuint getBufferID(const Buffer& buffer) override; GLuint getBufferID(const Buffer& buffer) override;
GLuint getBufferIDUnsynced(const Buffer& buffer) override;
GLBuffer* syncGPUObject(const Buffer& buffer) override; GLBuffer* syncGPUObject(const Buffer& buffer) override;
GLTexture* syncGPUObject(const TexturePointer& texture) override; GLTexture* syncGPUObject(const TexturePointer& texture) override;

View file

@ -51,6 +51,10 @@ GLuint GL45Backend::getBufferID(const Buffer& buffer) {
return GL45Buffer::getId<GL45Buffer>(*this, buffer); return GL45Buffer::getId<GL45Buffer>(*this, buffer);
} }
GLuint GL45Backend::getBufferIDUnsynced(const Buffer& buffer) {
return GL45Buffer::getIdUnsynced<GL45Buffer>(*this, buffer);
}
GLBuffer* GL45Backend::syncGPUObject(const Buffer& buffer) { GLBuffer* GL45Backend::syncGPUObject(const Buffer& buffer) {
return GL45Buffer::sync<GL45Buffer>(*this, buffer); return GL45Buffer::sync<GL45Buffer>(*this, buffer);
} }

View file

@ -132,9 +132,18 @@ void GL45Backend::updateInput() {
auto offset = _input._bufferOffsets.data(); auto offset = _input._bufferOffsets.data();
auto stride = _input._bufferStrides.data(); auto stride = _input._bufferStrides.data();
for (GLuint buffer = 0; buffer < _input._buffers.size(); buffer++, vbo++, offset++, stride++) { // Profile the count of buffers to update and use it to short cut the for loop
int numInvalids = (int) _input._invalidBuffers.count();
_stats._ISNumInputBufferChanges += numInvalids;
auto numBuffers = _input._buffers.size();
for (GLuint buffer = 0; buffer < numBuffers; buffer++, vbo++, offset++, stride++) {
if (_input._invalidBuffers.test(buffer)) { if (_input._invalidBuffers.test(buffer)) {
glBindVertexBuffer(buffer, (*vbo), (*offset), (GLsizei)(*stride)); glBindVertexBuffer(buffer, (*vbo), (*offset), (GLsizei)(*stride));
numInvalids--;
if (numInvalids <= 0) {
break;
}
} }
} }

View file

@ -130,6 +130,7 @@ protected:
GLFramebuffer* syncGPUObject(const Framebuffer& framebuffer) override; GLFramebuffer* syncGPUObject(const Framebuffer& framebuffer) override;
GLuint getBufferID(const Buffer& buffer) override; GLuint getBufferID(const Buffer& buffer) override;
GLuint getBufferIDUnsynced(const Buffer& buffer) override;
GLuint getResourceBufferID(const Buffer& buffer); GLuint getResourceBufferID(const Buffer& buffer);
GLBuffer* syncGPUObject(const Buffer& buffer) override; GLBuffer* syncGPUObject(const Buffer& buffer) override;

View file

@ -64,6 +64,10 @@ GLuint GLESBackend::getBufferID(const Buffer& buffer) {
return GLESBuffer::getId<GLESBuffer>(*this, buffer); return GLESBuffer::getId<GLESBuffer>(*this, buffer);
} }
GLuint GLESBackend::getBufferIDUnsynced(const Buffer& buffer) {
return GLESBuffer::getIdUnsynced<GLESBuffer>(*this, buffer);
}
GLBuffer* GLESBackend::syncGPUObject(const Buffer& buffer) { GLBuffer* GLESBackend::syncGPUObject(const Buffer& buffer) {
return GLESBuffer::sync<GLESBuffer>(*this, buffer); return GLESBuffer::sync<GLESBuffer>(*this, buffer);
} }

View file

@ -417,10 +417,7 @@ public:
} }
const Data& get(uint32 offset) const { const Data& get(uint32 offset) const {
if (offset >= _items.size()) { assert((offset < _items.size()));
static const Data EMPTY;
return EMPTY;
}
return (_items.data() + offset)->_data; return (_items.data() + offset)->_data;
} }

View file

@ -152,6 +152,8 @@ public:
BufferStream makeRangedStream(uint32 offset, uint32 count = -1) const; BufferStream makeRangedStream(uint32 offset, uint32 count = -1) const;
BufferStream& operator = (const BufferStream& src) = default;
protected: protected:
Buffers _buffers; Buffers _buffers;
Offsets _offsets; Offsets _offsets;

View file

@ -32,6 +32,15 @@ Mesh::Mesh(const Mesh& mesh) :
Mesh::~Mesh() { Mesh::~Mesh() {
} }
void Mesh::setVertexFormatAndStream(const gpu::Stream::FormatPointer& vf, const gpu::BufferStreamPointer& vbs) {
_vertexFormat = vf;
_vertexStream = (*vbs);
auto attrib = _vertexFormat->getAttribute(gpu::Stream::POSITION);
_vertexBuffer = BufferView(vbs->getBuffers()[attrib._channel], vbs->getOffsets()[attrib._channel], vbs->getBuffers()[attrib._channel]->getSize(),
(gpu::uint16) vbs->getStrides()[attrib._channel], attrib._element);
}
void Mesh::setVertexBuffer(const BufferView& buffer) { void Mesh::setVertexBuffer(const BufferView& buffer) {
_vertexBuffer = buffer; _vertexBuffer = buffer;
evalVertexFormat(); evalVertexFormat();
@ -107,11 +116,10 @@ Box Mesh::evalPartBound(int partNum) const {
index += part._startIndex; index += part._startIndex;
auto endIndex = index; auto endIndex = index;
endIndex += part._numIndices; endIndex += part._numIndices;
auto vertices = &_vertexBuffer.get<Vec3>(part._baseVertex);
for (;index != endIndex; index++) { for (;index != endIndex; index++) {
// skip primitive restart indices // skip primitive restart indices
if ((*index) != PRIMITIVE_RESTART_INDEX) { if ((*index) != PRIMITIVE_RESTART_INDEX) {
box += vertices[(*index)]; box += _vertexBuffer.get<Vec3>(part._baseVertex + (*index));
} }
} }
} }
@ -128,11 +136,10 @@ Box Mesh::evalPartsBound(int partStart, int partEnd) const {
Box partBound; Box partBound;
auto index = _indexBuffer.cbegin<uint>() + (*part)._startIndex; auto index = _indexBuffer.cbegin<uint>() + (*part)._startIndex;
auto endIndex = index + (*part)._numIndices; auto endIndex = index + (*part)._numIndices;
auto vertices = &_vertexBuffer.get<Vec3>((*part)._baseVertex);
for (;index != endIndex; index++) { for (;index != endIndex; index++) {
// skip primitive restart indices // skip primitive restart indices
if ((*index) != (uint) PRIMITIVE_RESTART_INDEX) { if ((*index) != (uint) PRIMITIVE_RESTART_INDEX) {
partBound += vertices[(*index)]; partBound += _vertexBuffer.get<Vec3>((*part)._baseVertex + (*index));
} }
} }

View file

@ -59,6 +59,9 @@ public:
void removeAttribute(Slot slot); void removeAttribute(Slot slot);
const BufferView getAttributeBuffer(int attrib) const; const BufferView getAttributeBuffer(int attrib) const;
// Force vertex stream and Vertex format
void setVertexFormatAndStream(const gpu::Stream::FormatPointer& vf, const gpu::BufferStreamPointer& vbs);
// Stream format // Stream format
const gpu::Stream::FormatPointer getVertexFormat() const { return _vertexFormat; } const gpu::Stream::FormatPointer getVertexFormat() const { return _vertexFormat; }