From 721cd79b5759f3230601e50e603f42e6b4f21988 Mon Sep 17 00:00:00 2001
From: Brad Davis <bdavis@saintandreas.org>
Date: Wed, 6 Jul 2016 17:55:06 -0700
Subject: [PATCH] Aggressive batch pre-allocation

---
 libraries/gpu/src/gpu/Batch.cpp | 50 ++++++++++++++++-------------
 libraries/gpu/src/gpu/Batch.h   | 56 ++++++++++++++-------------------
 libraries/gpu/src/gpu/Context.h |  4 +--
 3 files changed, 52 insertions(+), 58 deletions(-)

diff --git a/libraries/gpu/src/gpu/Batch.cpp b/libraries/gpu/src/gpu/Batch.cpp
index 6dc1d63ca8..8d16cd9262 100644
--- a/libraries/gpu/src/gpu/Batch.cpp
+++ b/libraries/gpu/src/gpu/Batch.cpp
@@ -30,24 +30,39 @@ ProfileRangeBatch::~ProfileRangeBatch() {
 
 using namespace gpu;
 
-Batch::Batch(const CacheState& cacheState) : Batch() {
-    _commands.reserve(cacheState.commandsSize);
-    _commandOffsets.reserve(cacheState.offsetsSize);
-    _params.reserve(cacheState.paramsSize);
-    _data.reserve(cacheState.dataSize);
-}
+size_t Batch::_commandsMax { 128 };
+size_t Batch::_commandOffsetsMax { 128 };
+size_t Batch::_paramsMax { 128 };
+size_t Batch::_dataMax { 128 };
+size_t Batch::_objectsMax { 128 };
+size_t Batch::_drawCallInfosMax { 128 };
 
-Batch::CacheState Batch::getCacheState() {
-    return CacheState(_commands.size(), _commandOffsets.size(), _params.size(), _data.size(),
-                _buffers.size(), _textures.size(), _streamFormats.size(), _transforms.size(), _pipelines.size(), 
-                _framebuffers.size(), _queries.size());
+Batch::Batch() {
+    _commands.reserve(_commandsMax);
+    _commandOffsets.reserve(_commandOffsetsMax);
+    _params.reserve(_paramsMax);
+    _data.reserve(_dataMax);
+    _objects.reserve(_objectsMax);
+    _drawCallInfos.reserve(_drawCallInfosMax);
 }
 
 Batch::~Batch() {
-    //qDebug() << "Batch::~Batch()... " << getCacheState();
+    _commandsMax = std::max(_commands.size(), _commandsMax);
+    _commandOffsetsMax = std::max(_commandOffsets.size(), _commandOffsetsMax);
+    _paramsMax = std::max(_params.size(), _paramsMax);
+    _dataMax = std::max(_data.size(), _dataMax);
+    _objectsMax = std::max(_objects.size(), _objectsMax);
+    _drawCallInfosMax = std::max(_drawCallInfos.size(), _drawCallInfosMax);
 }
 
 void Batch::clear() {
+    _commandsMax = std::max(_commands.size(), _commandsMax);
+    _commandOffsetsMax = std::max(_commandOffsets.size(), _commandOffsetsMax);
+    _paramsMax = std::max(_params.size(), _paramsMax);
+    _dataMax = std::max(_data.size(), _dataMax);
+    _objectsMax = std::max(_objects.size(), _objectsMax);
+    _drawCallInfosMax = std::max(_drawCallInfos.size(), _drawCallInfosMax);
+
     _commands.clear();
     _commandOffsets.clear();
     _params.clear();
@@ -58,6 +73,8 @@ void Batch::clear() {
     _transforms.clear();
     _pipelines.clear();
     _framebuffers.clear();
+    _objects.clear();
+    _drawCallInfos.clear();
 }
 
 size_t Batch::cacheData(size_t size, const void* data) {
@@ -458,17 +475,6 @@ void Batch::preExecute() {
     }
 }
 
-QDebug& operator<<(QDebug& debug, const Batch::CacheState& cacheState) {
-    debug << "Batch::CacheState[ "
-        << "commandsSize:" << cacheState.commandsSize
-        << "offsetsSize:" << cacheState.offsetsSize
-        << "paramsSize:" << cacheState.paramsSize
-        << "dataSize:" << cacheState.dataSize
-        << "]";
-
-    return debug;
-}
-
 // Debugging
 void Batch::pushProfileRange(const char* name) {
 #if defined(NSIGHT_FOUND)
diff --git a/libraries/gpu/src/gpu/Batch.h b/libraries/gpu/src/gpu/Batch.h
index 4e51038368..b56b5ee84b 100644
--- a/libraries/gpu/src/gpu/Batch.h
+++ b/libraries/gpu/src/gpu/Batch.h
@@ -87,6 +87,7 @@ public:
     using NamedBatchDataMap = std::map<std::string, NamedBatchData>;
 
     DrawCallInfoBuffer _drawCallInfos;
+    static size_t _drawCallInfosMax;
 
     std::string _currentNamedCall;
 
@@ -96,34 +97,7 @@ public:
     void captureDrawCallInfo();
     void captureNamedDrawCallInfo(std::string name);
 
-    class CacheState {
-    public:
-        size_t commandsSize;
-        size_t offsetsSize;
-        size_t paramsSize;
-        size_t dataSize;
-
-        size_t buffersSize;
-        size_t texturesSize;
-        size_t streamFormatsSize;
-        size_t transformsSize;
-        size_t pipelinesSize;
-        size_t framebuffersSize;
-        size_t queriesSize;
-
-        CacheState() : commandsSize(0), offsetsSize(0), paramsSize(0), dataSize(0), buffersSize(0), texturesSize(0), 
-            streamFormatsSize(0), transformsSize(0), pipelinesSize(0), framebuffersSize(0), queriesSize(0) { }
-
-        CacheState(size_t commandsSize, size_t offsetsSize, size_t paramsSize, size_t dataSize, size_t buffersSize,
-            size_t texturesSize, size_t streamFormatsSize, size_t transformsSize, size_t pipelinesSize, 
-            size_t framebuffersSize, size_t queriesSize) : 
-            commandsSize(commandsSize), offsetsSize(offsetsSize), paramsSize(paramsSize), dataSize(dataSize), 
-            buffersSize(buffersSize), texturesSize(texturesSize), streamFormatsSize(streamFormatsSize), 
-            transformsSize(transformsSize), pipelinesSize(pipelinesSize), framebuffersSize(framebuffersSize), queriesSize(queriesSize) { }
-    };
-
-    Batch() {}
-    Batch(const CacheState& cacheState);
+    Batch();
     explicit Batch(const Batch& batch);
     ~Batch();
 
@@ -131,9 +105,6 @@ public:
     
     void preExecute();
 
-    CacheState getCacheState();
-
-
     // Batches may need to override the context level stereo settings
     // if they're performing framebuffer copy operations, like the 
     // deferred lighting resolution mechanism
@@ -401,11 +372,21 @@ public:
         typedef T Data;
         Data _data;
         Cache<T>(const Data& data) : _data(data) {}
+        static size_t _max;
 
         class Vector {
         public:
             std::vector< Cache<T> > _items;
 
+            Vector() {
+                _items.reserve(_max);
+            }
+
+            ~Vector() {
+                _max = std::max(_items.size(), _max);
+            }
+
+
             size_t size() const { return _items.size(); }
             size_t cache(const Data& data) {
                 size_t offset = _items.size();
@@ -449,9 +430,16 @@ public:
     }
 
     Commands _commands;
+    static size_t _commandsMax;
+
     CommandOffsets _commandOffsets;
+    static size_t _commandOffsetsMax;
+
     Params _params;
+    static size_t _paramsMax;
+
     Bytes _data;
+    static size_t _dataMax;
 
     // SSBO class... layout MUST match the layout in Transform.slh
     class TransformObject {
@@ -464,6 +452,7 @@ public:
     bool _invalidModel { true };
     Transform _currentModel;
     TransformObjects _objects;
+    static size_t _objectsMax;
 
     BufferCaches _buffers;
     TextureCaches _textures;
@@ -491,6 +480,9 @@ protected:
     void captureDrawCallInfoImpl();
 };
 
+template <typename T>
+size_t Batch::Cache<T>::_max = 128;
+
 }
 
 #if defined(NSIGHT_FOUND)
@@ -512,6 +504,4 @@ private:
 
 #endif
 
-QDebug& operator<<(QDebug& debug, const gpu::Batch::CacheState& cacheState);
-
 #endif
diff --git a/libraries/gpu/src/gpu/Context.h b/libraries/gpu/src/gpu/Context.h
index 652338f911..47233b2fe3 100644
--- a/libraries/gpu/src/gpu/Context.h
+++ b/libraries/gpu/src/gpu/Context.h
@@ -231,11 +231,9 @@ typedef std::shared_ptr<Context> ContextPointer;
 
 template<typename F>
 void doInBatch(std::shared_ptr<gpu::Context> context, F f) {
-    static gpu::Batch::CacheState cacheState;
-    gpu::Batch batch(cacheState);
+    gpu::Batch batch;
     f(batch);
     context->render(batch);
-    cacheState = batch.getCacheState();
 }
 
 };