From 8a703d036331cf952c8d29d97b5ece409596a317 Mon Sep 17 00:00:00 2001
From: Brad Davis <bdavis@saintandreas.org>
Date: Wed, 16 Sep 2015 23:42:20 -0700
Subject: [PATCH] Instanced rendering, first pass

---
 examples/cubePerfTest.js                      |    6 +-
 .../src/RenderableBoxEntityItem.cpp           |    9 +-
 libraries/gpu/src/gpu/Batch.cpp               |   30 +-
 libraries/gpu/src/gpu/Batch.h                 |   36 +
 libraries/gpu/src/gpu/Format.h                |   17 +-
 libraries/gpu/src/gpu/GLBackend.cpp           |   17 +-
 libraries/gpu/src/gpu/GLBackendInput.cpp      |   18 +-
 libraries/gpu/src/gpu/GLBackendShader.cpp     |    5 +
 libraries/gpu/src/gpu/Inputs.slh              |    1 +
 libraries/gpu/src/gpu/Resource.h              |    5 +
 libraries/gpu/src/gpu/Stream.h                |    9 +-
 libraries/gpu/src/gpu/Transform.slh           |   43 +
 .../src/DeferredLightingEffect.cpp            | 1652 +++++++++--------
 .../render-utils/src/DeferredLightingEffect.h |    9 +-
 libraries/render-utils/src/GeometryCache.cpp  |  164 +-
 libraries/render-utils/src/GeometryCache.h    |    6 +
 libraries/render-utils/src/simple.slv         |   10 +-
 17 files changed, 1139 insertions(+), 898 deletions(-)
diff --git a/examples/cubePerfTest.js b/examples/cubePerfTest.js
index bdf123ae33..699472edd9 100644
--- a/examples/cubePerfTest.js
+++ b/examples/cubePerfTest.js
@@ -16,7 +16,7 @@ var PARTICLE_MAX_SIZE = 2.50;
 var LIFETIME = 600;
 var boxes = [];
 
-var ids = Entities.findEntities({ x: 512, y: 512, z: 512 }, 50);
+var ids = Entities.findEntities(MyAvatar.position, 50);
 for (var i = 0; i < ids.length; i++) {
     var id = ids[i];
     var properties = Entities.getEntityProperties(id);
@@ -33,10 +33,10 @@ for (var x = 0; x < SIDE_SIZE; x++) {
             var gray = Math.random() * 155;
             var cube = Math.random() > 0.5;
             var color = { red: 100 + gray, green: 100 + gray, blue: 100 + gray };
-            var position = { x: 512 + x * 0.2, y: 512 + y * 0.2, z: 512 + z * 0.2};
+            var position = Vec3.sum(MyAvatar.position, { x: x * 0.2, y: y * 0.2, z: z * 0.2});
             var radius = Math.random() * 0.1;
             boxes.push(Entities.addEntity({ 
-                type: cube ? "Box" : "Sphere",
+                type: cube ? "Box" : "Box",
                 name: "PerfTest",
                 position: position,  
                 dimensions: { x: radius, y: radius, z: radius }, 
diff --git a/libraries/entities-renderer/src/RenderableBoxEntityItem.cpp b/libraries/entities-renderer/src/RenderableBoxEntityItem.cpp
index 525226b0e8..b9ff69af52 100644
--- a/libraries/entities-renderer/src/RenderableBoxEntityItem.cpp
+++ b/libraries/entities-renderer/src/RenderableBoxEntityItem.cpp
@@ -39,9 +39,6 @@ void RenderableBoxEntityItem::render(RenderArgs* args) {
     PerformanceTimer perfTimer("RenderableBoxEntityItem::render");
     Q_ASSERT(getType() == EntityTypes::Box);
     Q_ASSERT(args->_batch);
-    gpu::Batch& batch = *args->_batch;
-    batch.setModelTransform(getTransformToCenter()); // we want to include the scale as well
-    glm::vec4 cubeColor(toGlm(getXColor()), getLocalRenderAlpha());
 
     if (!_procedural) {
         _procedural.reset(new Procedural(this->getUserData()));
@@ -54,11 +51,15 @@ void RenderableBoxEntityItem::render(RenderArgs* args) {
             gpu::State::FACTOR_ALPHA, gpu::State::BLEND_OP_ADD, gpu::State::ONE);
     }
 
+    gpu::Batch& batch = *args->_batch;
+    glm::vec4 cubeColor(toGlm(getXColor()), getLocalRenderAlpha());
+
     if (_procedural->ready()) {
+        batch.setModelTransform(getTransformToCenter()); // we want to include the scale as well
         _procedural->prepare(batch, this->getDimensions());
         DependencyManager::get<GeometryCache>()->renderSolidCube(batch, 1.0f, _procedural->getColor(cubeColor));
     } else {
-        DependencyManager::get<DeferredLightingEffect>()->renderSolidCube(batch, 1.0f, cubeColor);
+        DependencyManager::get<DeferredLightingEffect>()->renderSolidCubeInstance(batch, getTransformToCenter(), cubeColor);
     }
 
     RenderableDebugableEntityItem::render(this, args);
diff --git a/libraries/gpu/src/gpu/Batch.cpp b/libraries/gpu/src/gpu/Batch.cpp
index fb6618e953..e5ec8525b6 100644
--- a/libraries/gpu/src/gpu/Batch.cpp
+++ b/libraries/gpu/src/gpu/Batch.cpp
@@ -8,10 +8,10 @@
 //  Distributed under the Apache License, Version 2.0.
 //  See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html
 //
-#include <string.h>
-
 #include "Batch.h"
 
+#include <string.h>
+
 #if defined(NSIGHT_FOUND)
 #include "nvToolsExt.h"
 
@@ -302,4 +302,28 @@ void Batch::enableSkybox(bool enable) {
 
 bool Batch::isSkyboxEnabled() const {
     return _enableSkybox;
-}
\ No newline at end of file
+}
+
+void Batch::setupNamedCalls(const std::string& instanceName, NamedBatchData::Function function) {
+    NamedBatchData& instance = _namedData[instanceName];
+    ++instance._count;
+    instance._function = function;
+}
+
+BufferPointer Batch::getNamedBuffer(const std::string& instanceName, uint8_t index) {
+    NamedBatchData& instance = _namedData[instanceName];
+    if (instance._buffers.size() <= index) {
+        instance._buffers.resize(index + 1);
+    }
+    if (!instance._buffers[index]) {
+        instance._buffers[index].reset(new Buffer());
+    }
+    return instance._buffers[index];
+}
+
+void Batch::preExecute() {
+    for (auto& mapItem : _namedData) {
+        mapItem.second.process(*this);
+    }
+    _namedData.clear();
+}
diff --git a/libraries/gpu/src/gpu/Batch.h b/libraries/gpu/src/gpu/Batch.h
index 0ecfde44f1..c3bf6250c5 100644
--- a/libraries/gpu/src/gpu/Batch.h
+++ b/libraries/gpu/src/gpu/Batch.h
@@ -12,6 +12,8 @@
 #define hifi_gpu_Batch_h
 
 #include <vector>
+#include <mutex>
+#include <functional>
 
 #include "Framebuffer.h"
 #include "Pipeline.h"
@@ -38,16 +40,42 @@ enum ReservedSlot {
     TRANSFORM_CAMERA_SLOT = 7,
 };
 
+// The named batch data provides a mechanism for accumulating data into buffers over the course 
+// of many independent calls.  For instance, two objects in the scene might both want to render 
+// a simple box, but are otherwise unaware of each other.  The common code that they call to render
+// the box can create buffers to store the rendering parameters for each box and register a function 
+// that will be called with the accumulated buffer data when the batch commands are finally 
+// executed against the backend
+
+
 class Batch {
 public:
     typedef Stream::Slot Slot;
 
+    struct NamedBatchData {
+        using BufferPointers = std::vector<BufferPointer>;
+        using Function = std::function<void(gpu::Batch&, NamedBatchData&)>;
+
+        std::once_flag _once;
+        BufferPointers _buffers;
+        size_t _count{ 0 };
+        Function _function;
+
+        void process(Batch& batch) {
+            _function(batch, *this);
+        }
+    };
+
+    using NamedBatchDataMap = std::map<std::string, NamedBatchData>;
+
     Batch();
     explicit Batch(const Batch& batch);
     ~Batch();
 
     void clear();
     
+    void preExecute();
+
     // Batches may need to override the context level stereo settings
     // if they're performing framebuffer copy operations, like the 
     // deferred lighting resolution mechanism
@@ -67,6 +95,12 @@ public:
     void drawInstanced(uint32 nbInstances, Primitive primitiveType, uint32 nbVertices, uint32 startVertex = 0, uint32 startInstance = 0);
     void drawIndexedInstanced(uint32 nbInstances, Primitive primitiveType, uint32 nbIndices, uint32 startIndex = 0, uint32 startInstance = 0);
 
+
+    void setupNamedCalls(const std::string& instanceName, NamedBatchData::Function function);
+    BufferPointer getNamedBuffer(const std::string& instanceName, uint8_t index = 0);
+    
+    
+
     // Input Stage
     // InputFormat
     // InputBuffers
@@ -291,6 +325,8 @@ public:
     FramebufferCaches _framebuffers;
     QueryCaches _queries;
 
+    NamedBatchDataMap _namedData;
+
     bool _enableStereo{ true };
     bool _enableSkybox{ false };
 
diff --git a/libraries/gpu/src/gpu/Format.h b/libraries/gpu/src/gpu/Format.h
index 8cd16e0be4..e16256574b 100644
--- a/libraries/gpu/src/gpu/Format.h
+++ b/libraries/gpu/src/gpu/Format.h
@@ -120,6 +120,18 @@ enum Dimension {
     MAT4,
     NUM_DIMENSIONS,
 };
+
+// Count (of scalars) in an Element for a given Dimension
+static const int LOCATION_COUNT[NUM_DIMENSIONS] = {
+    1,
+    1,
+    1,
+    1,
+    1,
+    3,
+    4,
+};
+
 // Count (of scalars) in an Element for a given Dimension
 static const int DIMENSION_COUNT[NUM_DIMENSIONS] = {
     1,
@@ -127,8 +139,8 @@ static const int DIMENSION_COUNT[NUM_DIMENSIONS] = {
     3,
     4,
     4,
-    9,
-    16,
+    3,
+    4,
 };
 
 // Semantic of an Element
@@ -184,6 +196,7 @@ public:
 
     Dimension getDimension() const { return (Dimension)_dimension; }
     uint8 getDimensionCount() const { return  DIMENSION_COUNT[(Dimension)_dimension]; }
+    uint8 getLocationCount() const { return  LOCATION_COUNT[(Dimension)_dimension]; }
 
     Type getType() const { return (Type)_type; }
     bool isNormalized() const { return (getType() >= NFLOAT); }
diff --git a/libraries/gpu/src/gpu/GLBackend.cpp b/libraries/gpu/src/gpu/GLBackend.cpp
index a8c21125b5..d2e8155ba1 100644
--- a/libraries/gpu/src/gpu/GLBackend.cpp
+++ b/libraries/gpu/src/gpu/GLBackend.cpp
@@ -191,6 +191,9 @@ void GLBackend::renderPassDraw(Batch& batch) {
 }
 
 void GLBackend::render(Batch& batch) {
+    // Finalize the batch by moving all the instanced rendering into the command buffer
+    batch.preExecute();
+
     _stereo._skybox = batch.isSkyboxEnabled();
     // Allow the batch to override the rendering stereo settings
     // for things like full framebuffer copy operations (deferred lighting passes)
@@ -316,7 +319,19 @@ void GLBackend::do_drawInstanced(Batch& batch, uint32 paramOffset) {
 }
 
 void GLBackend::do_drawIndexedInstanced(Batch& batch, uint32 paramOffset) {
-    (void) CHECK_GL_ERROR();
+    updateInput();
+    updateTransform();
+    updatePipeline();
+
+    GLint numInstances = batch._params[paramOffset + 4]._uint;
+    GLenum mode = _primitiveToGLmode[(Primitive)batch._params[paramOffset + 3]._uint];
+    uint32 numIndices = batch._params[paramOffset + 2]._uint;
+    uint32 startIndex = batch._params[paramOffset + 1]._uint;
+    uint32 startInstance = batch._params[paramOffset + 0]._uint;
+    GLenum glType = _elementTypeToGLType[_input._indexBufferType];
+
+    glDrawElementsInstanced(mode, numIndices, glType, nullptr, numInstances);
+    (void)CHECK_GL_ERROR();
 }
 
 void GLBackend::do_resetStages(Batch& batch, uint32 paramOffset) {
diff --git a/libraries/gpu/src/gpu/GLBackendInput.cpp b/libraries/gpu/src/gpu/GLBackendInput.cpp
index efbead5da2..7f021fd5c5 100755
--- a/libraries/gpu/src/gpu/GLBackendInput.cpp
+++ b/libraries/gpu/src/gpu/GLBackendInput.cpp
@@ -160,7 +160,10 @@ void GLBackend::updateInput() {
             if (_input._format) {
                 for (auto& it : _input._format->getAttributes()) {
                     const Stream::Attribute& attrib = (it).second;
-                    newActivation.set(attrib._slot);
+                    uint8_t locationCount = attrib._element.getLocationCount();
+                    for (int i = 0; i < locationCount; ++i) {
+                        newActivation.set(attrib._slot + i);
+                    }
                 }
             }
             
@@ -211,14 +214,19 @@ void GLBackend::updateInput() {
                             const Stream::Attribute& attrib = attributes.at(channel._slots[i]);
                             GLuint slot = attrib._slot;
                             GLuint count = attrib._element.getDimensionCount();
+                            uint8_t locationCount = attrib._element.getLocationCount();
                             GLenum type = _elementTypeToGLType[attrib._element.getType()];
-                            GLuint stride = strides[bufferNum];
+                            GLenum perLocationStride = strides[bufferNum];
+                            GLuint stride = perLocationStride * locationCount;
                             GLuint pointer = attrib._offset + offsets[bufferNum];
                             GLboolean isNormalized = attrib._element.isNormalized();
 
-                            glVertexAttribPointer(slot, count, type, isNormalized, stride,
-                                                      reinterpret_cast<GLvoid*>(pointer));
-
+                            for (int j = 0; j < locationCount; ++j) {
+                                glVertexAttribPointer(slot + j, count, type, isNormalized, stride,
+                                    reinterpret_cast<GLvoid*>(pointer + perLocationStride * j));
+                                glVertexAttribDivisor(slot + j, attrib._frequency);
+                            }
+                            
                             // TODO: Support properly the IAttrib version
 
                             (void) CHECK_GL_ERROR();
diff --git a/libraries/gpu/src/gpu/GLBackendShader.cpp b/libraries/gpu/src/gpu/GLBackendShader.cpp
index cd90da483b..5a0ab93ec5 100755
--- a/libraries/gpu/src/gpu/GLBackendShader.cpp
+++ b/libraries/gpu/src/gpu/GLBackendShader.cpp
@@ -75,6 +75,11 @@ void makeBindings(GLBackend::GLShader* shader) {
         glBindAttribLocation(glprogram, gpu::Stream::SKIN_CLUSTER_WEIGHT, "inSkinClusterWeight");
     }
 
+    loc = glGetAttribLocation(glprogram, "inInstanceTransform");
+    if (loc >= 0 && loc != gpu::Stream::INSTANCE_XFM) {
+        glBindAttribLocation(glprogram, gpu::Stream::INSTANCE_XFM, "inInstanceTransform");
+    }
+
     // Link again to take into account the assigned attrib location
     glLinkProgram(glprogram);
 
diff --git a/libraries/gpu/src/gpu/Inputs.slh b/libraries/gpu/src/gpu/Inputs.slh
index 8f90b6ebee..99e1e1d6d5 100644
--- a/libraries/gpu/src/gpu/Inputs.slh
+++ b/libraries/gpu/src/gpu/Inputs.slh
@@ -18,4 +18,5 @@ in vec4 inTangent;
 in vec4 inSkinClusterIndex;
 in vec4 inSkinClusterWeight;
 in vec4 inTexCoord1;
+in mat4 inInstanceTransform;
 <@endif@>
diff --git a/libraries/gpu/src/gpu/Resource.h b/libraries/gpu/src/gpu/Resource.h
index 177c798e2c..de5e4a7242 100644
--- a/libraries/gpu/src/gpu/Resource.h
+++ b/libraries/gpu/src/gpu/Resource.h
@@ -139,6 +139,11 @@ public:
     // \return the number of bytes copied
     Size append(Size size, const Byte* data);
 
+    template <typename T> 
+    Size append(const T& t) {
+        return append(sizeof(t), reinterpret_cast<const Byte*>(&t));
+    }
+
     // Access the sysmem object.
     const Sysmem& getSysmem() const { assert(_sysmem); return (*_sysmem); }
     Sysmem& editSysmem() { assert(_sysmem); return (*_sysmem); }
diff --git a/libraries/gpu/src/gpu/Stream.h b/libraries/gpu/src/gpu/Stream.h
index 46ea1574ed..c0ad1ebe46 100644
--- a/libraries/gpu/src/gpu/Stream.h
+++ b/libraries/gpu/src/gpu/Stream.h
@@ -35,11 +35,12 @@ public:
         SKIN_CLUSTER_INDEX = 5,
         SKIN_CLUSTER_WEIGHT = 6,
         TEXCOORD1 = 7,
-        INSTANCE_XFM = 8,
-        INSTANCE_SCALE = 9,
-        INSTANCE_TRANSLATE = 10,
+        INSTANCE_SCALE = 8,
+        INSTANCE_TRANSLATE = 9,
+        INSTANCE_XFM = 10,
 
-        NUM_INPUT_SLOTS,
+        // Instance XFM is a mat4, and as such takes up 4 slots
+        NUM_INPUT_SLOTS = INSTANCE_XFM + 4,
     };
 
     typedef uint8 Slot;
diff --git a/libraries/gpu/src/gpu/Transform.slh b/libraries/gpu/src/gpu/Transform.slh
index b492b4ef24..b766cc88d4 100644
--- a/libraries/gpu/src/gpu/Transform.slh
+++ b/libraries/gpu/src/gpu/Transform.slh
@@ -53,6 +53,15 @@ TransformCamera getTransformCamera() {
     }
 <@endfunc@>
 
+<@func transformInstancedModelToClipPos(cameraTransform, objectTransform, modelPos, clipPos)@>
+    <!// Equivalent to the following but hoppefully a tad more accurate
+      //return camera._projection * camera._view * object._model * pos; !>
+    { // transformModelToClipPos
+        vec4 _eyepos = (inInstanceTransform * <$modelPos$>) + vec4(-<$modelPos$>.w * <$cameraTransform$>._viewInverse[3].xyz, 0.0);
+        <$clipPos$> = <$cameraTransform$>._projectionViewUntranslated * _eyepos;
+    }
+<@endfunc@>
+
 <@func $transformModelToEyeAndClipPos(cameraTransform, objectTransform, modelPos, eyePos, clipPos)@>
     <!// Equivalent to the following but hoppefully a tad more accurate
       //return camera._projection * camera._view * object._model * pos; !>
@@ -65,12 +74,31 @@ TransformCamera getTransformCamera() {
     }
 <@endfunc@>
 
+<@func $transformInstancedModelToEyeAndClipPos(cameraTransform, objectTransform, modelPos, eyePos, clipPos)@>
+    <!// Equivalent to the following but hoppefully a tad more accurate
+      //return camera._projection * camera._view * object._model * pos; !>
+    { // transformModelToClipPos
+        vec4 _worldpos = (inInstanceTransform * <$modelPos$>);
+        <$eyePos$> = (<$cameraTransform$>._view * _worldpos);
+        vec4 _eyepos =(inInstanceTransform * <$modelPos$>) + vec4(-<$modelPos$>.w * <$cameraTransform$>._viewInverse[3].xyz, 0.0);
+        <$clipPos$> = <$cameraTransform$>._projectionViewUntranslated * _eyepos;
+      //  <$eyePos$> = (<$cameraTransform$>._projectionInverse * <$clipPos$>);
+    }
+<@endfunc@>
+
+
 <@func transformModelToWorldPos(objectTransform, modelPos, worldPos)@>
     { // transformModelToWorldPos
         <$worldPos$> = (<$objectTransform$>._model * <$modelPos$>);
     }
 <@endfunc@>
 
+<@func transformInstancedModelToWorldPos(objectTransform, modelPos, worldPos)@>
+    { // transformModelToWorldPos
+        <$worldPos$> = (inInstanceTransform * <$modelPos$>);
+    }
+<@endfunc@>
+
 <@func transformModelToEyeDir(cameraTransform, objectTransform, modelDir, eyeDir)@>
     { // transformModelToEyeDir
         vec3 mr0 = vec3(<$objectTransform$>._modelInverse[0].x, <$objectTransform$>._modelInverse[1].x, <$objectTransform$>._modelInverse[2].x);
@@ -85,6 +113,21 @@ TransformCamera getTransformCamera() {
     }
 <@endfunc@>
 
+<@func transformInstancedModelToEyeDir(cameraTransform, objectTransform, modelDir, eyeDir)@>
+    { // transformModelToEyeDir
+        mat4 modelInverse = inverse(inInstanceTransform);
+        vec3 mr0 = vec3(modelInverse[0].x, modelInverse[1].x, modelInverse[2].x);
+        vec3 mr1 = vec3(modelInverse[0].y, modelInverse[1].y, modelInverse[2].y);
+        vec3 mr2 = vec3(modelInverse[0].z, modelInverse[1].z, modelInverse[2].z);
+
+        vec3 mvc0 = vec3(dot(<$cameraTransform$>._viewInverse[0].xyz, mr0), dot(<$cameraTransform$>._viewInverse[0].xyz, mr1), dot(<$cameraTransform$>._viewInverse[0].xyz, mr2));
+        vec3 mvc1 = vec3(dot(<$cameraTransform$>._viewInverse[1].xyz, mr0), dot(<$cameraTransform$>._viewInverse[1].xyz, mr1), dot(<$cameraTransform$>._viewInverse[1].xyz, mr2));
+        vec3 mvc2 = vec3(dot(<$cameraTransform$>._viewInverse[2].xyz, mr0), dot(<$cameraTransform$>._viewInverse[2].xyz, mr1), dot(<$cameraTransform$>._viewInverse[2].xyz, mr2));
+
+        <$eyeDir$> = vec3(dot(mvc0, <$modelDir$>), dot(mvc1, <$modelDir$>), dot(mvc2, <$modelDir$>));
+    }
+<@endfunc@>
+
 <@func transformEyeToWorldDir(cameraTransform, eyeDir, worldDir)@>
     { // transformEyeToWorldDir
         <$worldDir$> = vec3(<$cameraTransform$>._viewInverse * vec4(<$eyeDir$>.xyz, 0.0));
diff --git a/libraries/render-utils/src/DeferredLightingEffect.cpp b/libraries/render-utils/src/DeferredLightingEffect.cpp
index ce387e648b..6c7310509a 100644
--- a/libraries/render-utils/src/DeferredLightingEffect.cpp
+++ b/libraries/render-utils/src/DeferredLightingEffect.cpp
@@ -1,809 +1,845 @@
-//
-//  DeferredLightingEffect.cpp
-//  interface/src/renderer
-//
-//  Created by Andrzej Kapolka on 9/11/14.
-//  Copyright 2014 High Fidelity, Inc.
-//
-//  Distributed under the Apache License, Version 2.0.
-//  See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html
-//
-
-#include "DeferredLightingEffect.h"
-
-#include <GLMHelpers.h>
-#include <PathUtils.h>
-#include <ViewFrustum.h>
-
-#include <gpu/Batch.h>
-#include <gpu/Context.h>
-#include <gpu/StandardShaderLib.h>
-
-#include "AbstractViewStateInterface.h"
-#include "GeometryCache.h"
-#include "TextureCache.h"
-#include "FramebufferCache.h"
-
-
-#include "simple_vert.h"
-#include "simple_textured_frag.h"
-#include "simple_textured_emisive_frag.h"
-
-#include "deferred_light_vert.h"
-#include "deferred_light_limited_vert.h"
-#include "deferred_light_spot_vert.h"
-
-#include "directional_light_frag.h"
-#include "directional_light_shadow_map_frag.h"
-#include "directional_light_cascaded_shadow_map_frag.h"
-
-#include "directional_ambient_light_frag.h"
-#include "directional_ambient_light_shadow_map_frag.h"
-#include "directional_ambient_light_cascaded_shadow_map_frag.h"
-
-#include "directional_skybox_light_frag.h"
-#include "directional_skybox_light_shadow_map_frag.h"
-#include "directional_skybox_light_cascaded_shadow_map_frag.h"
-
-#include "point_light_frag.h"
-#include "spot_light_frag.h"
-
-static const std::string glowIntensityShaderHandle = "glowIntensity";
-
-struct LightLocations {
-    int shadowDistances;
-    int shadowScale;
-    int radius;
-    int ambientSphere;
-    int lightBufferUnit;
-    int atmosphereBufferUnit;
-    int texcoordMat;
-    int coneParam;
-    int deferredTransformBuffer;
-};
-
-static void loadLightProgram(const char* vertSource, const char* fragSource, bool lightVolume, gpu::PipelinePointer& program, LightLocationsPtr& locations);
-
-
-gpu::PipelinePointer DeferredLightingEffect::getPipeline(SimpleProgramKey config) {
-    auto it = _simplePrograms.find(config);
-    if (it != _simplePrograms.end()) {
-        return it.value();
-    }
-    
-    auto state = std::make_shared<gpu::State>();
-    if (config.isCulled()) {
-        state->setCullMode(gpu::State::CULL_BACK);
-    } else {
-        state->setCullMode(gpu::State::CULL_NONE);
-    }
-    state->setDepthTest(true, true, gpu::LESS_EQUAL);
-    if (config.hasDepthBias()) {
-        state->setDepthBias(1.0f);
-        state->setDepthBiasSlopeScale(1.0f);
-    }
-    state->setBlendFunction(false,
-                            gpu::State::SRC_ALPHA, gpu::State::BLEND_OP_ADD, gpu::State::INV_SRC_ALPHA,
-                            gpu::State::FACTOR_ALPHA, gpu::State::BLEND_OP_ADD, gpu::State::ONE);
-    
-    gpu::ShaderPointer program = (config.isEmissive()) ? _emissiveShader : _simpleShader;
-    gpu::PipelinePointer pipeline = gpu::PipelinePointer(gpu::Pipeline::create(program, state));
-    _simplePrograms.insert(config, pipeline);
-    return pipeline;
-}
-
-void DeferredLightingEffect::init(AbstractViewStateInterface* viewState) {
-    auto VS = gpu::ShaderPointer(gpu::Shader::createVertex(std::string(simple_vert)));
-    auto PS = gpu::ShaderPointer(gpu::Shader::createPixel(std::string(simple_textured_frag)));
-    auto PSEmissive = gpu::ShaderPointer(gpu::Shader::createPixel(std::string(simple_textured_emisive_frag)));
-    
-    _simpleShader = gpu::ShaderPointer(gpu::Shader::createProgram(VS, PS));
-    _emissiveShader = gpu::ShaderPointer(gpu::Shader::createProgram(VS, PSEmissive));
-    
-    gpu::Shader::BindingSet slotBindings;
-    slotBindings.insert(gpu::Shader::Binding(std::string("normalFittingMap"), DeferredLightingEffect::NORMAL_FITTING_MAP_SLOT));
-    gpu::Shader::makeProgram(*_simpleShader, slotBindings);
-    gpu::Shader::makeProgram(*_emissiveShader, slotBindings);
-
-    _viewState = viewState;
-    _directionalLightLocations = std::make_shared<LightLocations>();
-    _directionalLightShadowMapLocations = std::make_shared<LightLocations>();
-    _directionalLightCascadedShadowMapLocations = std::make_shared<LightLocations>();
-    _directionalAmbientSphereLightLocations = std::make_shared<LightLocations>();
-    _directionalAmbientSphereLightShadowMapLocations = std::make_shared<LightLocations>();
-    _directionalAmbientSphereLightCascadedShadowMapLocations = std::make_shared<LightLocations>();
-    _directionalSkyboxLightLocations = std::make_shared<LightLocations>();
-    _directionalSkyboxLightShadowMapLocations = std::make_shared<LightLocations>();
-    _directionalSkyboxLightCascadedShadowMapLocations = std::make_shared<LightLocations>();
-    _pointLightLocations = std::make_shared<LightLocations>();
-    _spotLightLocations = std::make_shared<LightLocations>();
-
-    loadLightProgram(deferred_light_vert, directional_light_frag, false, _directionalLight, _directionalLightLocations);
-    loadLightProgram(deferred_light_vert, directional_light_shadow_map_frag, false, _directionalLightShadowMap,
-        _directionalLightShadowMapLocations);
-    loadLightProgram(deferred_light_vert, directional_light_cascaded_shadow_map_frag, false, _directionalLightCascadedShadowMap,
-        _directionalLightCascadedShadowMapLocations);
-
-    loadLightProgram(deferred_light_vert, directional_ambient_light_frag, false, _directionalAmbientSphereLight, _directionalAmbientSphereLightLocations);
-    loadLightProgram(deferred_light_vert, directional_ambient_light_shadow_map_frag, false, _directionalAmbientSphereLightShadowMap,
-        _directionalAmbientSphereLightShadowMapLocations);
-    loadLightProgram(deferred_light_vert, directional_ambient_light_cascaded_shadow_map_frag, false, _directionalAmbientSphereLightCascadedShadowMap,
-        _directionalAmbientSphereLightCascadedShadowMapLocations);
-
-    loadLightProgram(deferred_light_vert, directional_skybox_light_frag, false, _directionalSkyboxLight, _directionalSkyboxLightLocations);
-    loadLightProgram(deferred_light_vert, directional_skybox_light_shadow_map_frag, false, _directionalSkyboxLightShadowMap,
-        _directionalSkyboxLightShadowMapLocations);
-    loadLightProgram(deferred_light_vert, directional_skybox_light_cascaded_shadow_map_frag, false, _directionalSkyboxLightCascadedShadowMap,
-        _directionalSkyboxLightCascadedShadowMapLocations);
-
-
-    loadLightProgram(deferred_light_limited_vert, point_light_frag, true, _pointLight, _pointLightLocations);
-    loadLightProgram(deferred_light_spot_vert, spot_light_frag, true, _spotLight, _spotLightLocations);
-
-    {
-        //auto VSFS = gpu::StandardShaderLib::getDrawViewportQuadTransformTexcoordVS();
-        //auto PSBlit = gpu::StandardShaderLib::getDrawTexturePS();
-        auto blitProgram = gpu::StandardShaderLib::getProgram(gpu::StandardShaderLib::getDrawViewportQuadTransformTexcoordVS, gpu::StandardShaderLib::getDrawTexturePS);
-        gpu::Shader::makeProgram(*blitProgram);
-        auto blitState = std::make_shared<gpu::State>();
-        blitState->setBlendFunction(true,
-                                gpu::State::SRC_ALPHA, gpu::State::BLEND_OP_ADD, gpu::State::INV_SRC_ALPHA,
-                                gpu::State::FACTOR_ALPHA, gpu::State::BLEND_OP_ADD, gpu::State::ONE);
-        blitState->setColorWriteMask(true, true, true, false);
-        _blitLightBuffer = gpu::PipelinePointer(gpu::Pipeline::create(blitProgram, blitState));
-    }
-
-    // Allocate a global light representing the Global Directional light casting shadow (the sun) and the ambient light
-    _globalLights.push_back(0);
-    _allocatedLights.push_back(std::make_shared<model::Light>());
-
-    model::LightPointer lp = _allocatedLights[0];
-
-    lp->setDirection(-glm::vec3(1.0f, 1.0f, 1.0f));
-    lp->setColor(glm::vec3(1.0f));
-    lp->setIntensity(1.0f);
-    lp->setType(model::Light::SUN);
-    lp->setAmbientSpherePreset(gpu::SphericalHarmonics::Preset(_ambientLightMode % gpu::SphericalHarmonics::NUM_PRESET));
-}
-
-
-
-void DeferredLightingEffect::bindSimpleProgram(gpu::Batch& batch, bool textured, bool culled,
-                                               bool emmisive, bool depthBias) {
-    SimpleProgramKey config{textured, culled, emmisive, depthBias};
-    batch.setPipeline(getPipeline(config));
-
-    gpu::ShaderPointer program = (config.isEmissive()) ? _emissiveShader : _simpleShader;
-    int glowIntensity = program->getUniforms().findLocation("glowIntensity");
-    batch._glUniform1f(glowIntensity, 1.0f);
-    
-    if (!config.isTextured()) {
-        // If it is not textured, bind white texture and keep using textured pipeline
-        batch.setResourceTexture(0, DependencyManager::get<TextureCache>()->getWhiteTexture());
-    }
-
-    batch.setResourceTexture(NORMAL_FITTING_MAP_SLOT, DependencyManager::get<TextureCache>()->getNormalFittingTexture());
-}
-
-void DeferredLightingEffect::renderSolidSphere(gpu::Batch& batch, float radius, int slices, int stacks, const glm::vec4& color) {
-    bindSimpleProgram(batch);
-    DependencyManager::get<GeometryCache>()->renderSphere(batch, radius, slices, stacks, color);
-}
-
-void DeferredLightingEffect::renderWireSphere(gpu::Batch& batch, float radius, int slices, int stacks, const glm::vec4& color) {
-    bindSimpleProgram(batch);
-    DependencyManager::get<GeometryCache>()->renderSphere(batch, radius, slices, stacks, color, false);
-}
-
-void DeferredLightingEffect::renderSolidCube(gpu::Batch& batch, float size, const glm::vec4& color) {
-    bindSimpleProgram(batch);
-    DependencyManager::get<GeometryCache>()->renderSolidCube(batch, size, color);
-}
-
-void DeferredLightingEffect::renderWireCube(gpu::Batch& batch, float size, const glm::vec4& color) {
-    bindSimpleProgram(batch);
-    DependencyManager::get<GeometryCache>()->renderWireCube(batch, size, color);
-}
-
-void DeferredLightingEffect::renderQuad(gpu::Batch& batch, const glm::vec3& minCorner, const glm::vec3& maxCorner,
-                                        const glm::vec4& color) {
-    bindSimpleProgram(batch);
-    DependencyManager::get<GeometryCache>()->renderQuad(batch, minCorner, maxCorner, color);
-}
-
-void DeferredLightingEffect::renderLine(gpu::Batch& batch, const glm::vec3& p1, const glm::vec3& p2,
-                                        const glm::vec4& color1, const glm::vec4& color2) {
-    bindSimpleProgram(batch);
-    DependencyManager::get<GeometryCache>()->renderLine(batch, p1, p2, color1, color2);
-}
-
-void DeferredLightingEffect::addPointLight(const glm::vec3& position, float radius, const glm::vec3& color,
-        float intensity) {
-    addSpotLight(position, radius, color, intensity);    
-}
-
-void DeferredLightingEffect::addSpotLight(const glm::vec3& position, float radius, const glm::vec3& color,
-    float intensity, const glm::quat& orientation, float exponent, float cutoff) {
-    
-    unsigned int lightID = _pointLights.size() + _spotLights.size() + _globalLights.size();
-    if (lightID >= _allocatedLights.size()) {
-        _allocatedLights.push_back(std::make_shared<model::Light>());
-    }
-    model::LightPointer lp = _allocatedLights[lightID];
-
-    lp->setPosition(position);
-    lp->setMaximumRadius(radius);
-    lp->setColor(color);
-    lp->setIntensity(intensity);
-    //lp->setShowContour(quadraticAttenuation);
-
-    if (exponent == 0.0f && cutoff == PI) {
-        lp->setType(model::Light::POINT);
-        _pointLights.push_back(lightID);
-        
-    } else {
-        lp->setOrientation(orientation);
-        lp->setSpotAngle(cutoff);
-        lp->setSpotExponent(exponent);
-        lp->setType(model::Light::SPOT);
-        _spotLights.push_back(lightID);
-    }
-}
-
-void DeferredLightingEffect::prepare(RenderArgs* args) {
-    gpu::Batch batch;
-    batch.enableStereo(false);
-
-    batch.setStateScissorRect(args->_viewport);
-
-    auto primaryFbo = DependencyManager::get<FramebufferCache>()->getPrimaryFramebuffer();
-
-    batch.setFramebuffer(primaryFbo);
-    // clear the normal and specular buffers
-    batch.clearColorFramebuffer(gpu::Framebuffer::BUFFER_COLOR1, glm::vec4(0.0f, 0.0f, 0.0f, 0.0f), true);
-    const float MAX_SPECULAR_EXPONENT = 128.0f;
-    batch.clearColorFramebuffer(gpu::Framebuffer::BUFFER_COLOR2, glm::vec4(0.0f, 0.0f, 0.0f, 1.0f / MAX_SPECULAR_EXPONENT), true);
-
-    args->_context->render(batch);
-}
-
-gpu::FramebufferPointer _copyFBO;
-
-void DeferredLightingEffect::render(RenderArgs* args) {
-    gpu::Batch batch;
-
-    // Allocate the parameters buffer used by all the deferred shaders
-    if (!_deferredTransformBuffer[0]._buffer) {
-        DeferredTransform parameters;
-        _deferredTransformBuffer[0] = gpu::BufferView(std::make_shared<gpu::Buffer>(sizeof(DeferredTransform), (const gpu::Byte*) &parameters));
-        _deferredTransformBuffer[1] = gpu::BufferView(std::make_shared<gpu::Buffer>(sizeof(DeferredTransform), (const gpu::Byte*) &parameters));
-    }
-
-    // Framebuffer copy operations cannot function as multipass stereo operations.  
-    batch.enableStereo(false);
-
-    // perform deferred lighting, rendering to free fbo
-    auto framebufferCache = DependencyManager::get<FramebufferCache>();
-    
-    QSize framebufferSize = framebufferCache->getFrameBufferSize();
-    
-    // binding the first framebuffer
-    _copyFBO = framebufferCache->getFramebuffer();
-    batch.setFramebuffer(_copyFBO);
-
-    // Clearing it
-    batch.setViewportTransform(args->_viewport);
-    batch.setStateScissorRect(args->_viewport);
-    batch.clearColorFramebuffer(_copyFBO->getBufferMask(), glm::vec4(0.0f, 0.0f, 0.0f, 0.0f), true);
-
-    // BInd the G-Buffer surfaces
-    batch.setResourceTexture(0, framebufferCache->getPrimaryColorTexture());
-    batch.setResourceTexture(1, framebufferCache->getPrimaryNormalTexture());
-    batch.setResourceTexture(2, framebufferCache->getPrimarySpecularTexture());
-    batch.setResourceTexture(3, framebufferCache->getPrimaryDepthTexture());
-
-    // THe main viewport is assumed to be the mono viewport (or the 2 stereo faces side by side within that viewport)
-    auto monoViewport = args->_viewport;
-    float sMin = args->_viewport.x / (float)framebufferSize.width();
-    float sWidth = args->_viewport.z / (float)framebufferSize.width();
-    float tMin = args->_viewport.y / (float)framebufferSize.height();
-    float tHeight = args->_viewport.w / (float)framebufferSize.height();
-
-    // The view frustum is the mono frustum base
-    auto viewFrustum = args->_viewFrustum;
-
-    // Eval the mono projection
-    mat4 monoProjMat;
-    viewFrustum->evalProjectionMatrix(monoProjMat);
-
-    // The mono view transform
-    Transform monoViewTransform;
-    viewFrustum->evalViewTransform(monoViewTransform);
-
-    // THe mono view matrix coming from the mono view transform
-    glm::mat4 monoViewMat;
-    monoViewTransform.getMatrix(monoViewMat);
-
-    // Running in stero ?
-    bool isStereo = args->_context->isStereo();
-    int numPasses = 1;
-
-    mat4 projMats[2];
-    Transform viewTransforms[2];
-    ivec4 viewports[2];
-    vec4 clipQuad[2];
-    vec2 screenBottomLeftCorners[2];
-    vec2 screenTopRightCorners[2];
-    vec4 fetchTexcoordRects[2];
-
-    DeferredTransform deferredTransforms[2];
-    auto geometryCache = DependencyManager::get<GeometryCache>();
-
-    if (isStereo) {
-        numPasses = 2;
-
-        mat4 eyeViews[2];
-        args->_context->getStereoProjections(projMats);
-        args->_context->getStereoViews(eyeViews);
-
-        float halfWidth = 0.5 * sWidth;
-
-        for (int i = 0; i < numPasses; i++) {
-            // In stereo, the 2 sides are layout side by side in the mono viewport and their width is half
-            int sideWidth = monoViewport.z >> 1;
-            viewports[i] = ivec4(monoViewport.x + (i * sideWidth), monoViewport.y, sideWidth, monoViewport.w);
-
-            deferredTransforms[i].projection = projMats[i];
-
+//
+//  DeferredLightingEffect.cpp
+//  interface/src/renderer
+//
+//  Created by Andrzej Kapolka on 9/11/14.
+//  Copyright 2014 High Fidelity, Inc.
+//
+//  Distributed under the Apache License, Version 2.0.
+//  See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html
+//
+
+#include "DeferredLightingEffect.h"
+
+#include <GLMHelpers.h>
+#include <PathUtils.h>
+#include <ViewFrustum.h>
+
+#include <gpu/Batch.h>
+#include <gpu/Context.h>
+#include <gpu/StandardShaderLib.h>
+
+#include "AbstractViewStateInterface.h"
+#include "GeometryCache.h"
+#include "TextureCache.h"
+#include "FramebufferCache.h"
+
+
+#include "simple_vert.h"
+#include "simple_textured_frag.h"
+#include "simple_textured_emisive_frag.h"
+
+#include "deferred_light_vert.h"
+#include "deferred_light_limited_vert.h"
+#include "deferred_light_spot_vert.h"
+
+#include "directional_light_frag.h"
+#include "directional_light_shadow_map_frag.h"
+#include "directional_light_cascaded_shadow_map_frag.h"
+
+#include "directional_ambient_light_frag.h"
+#include "directional_ambient_light_shadow_map_frag.h"
+#include "directional_ambient_light_cascaded_shadow_map_frag.h"
+
+#include "directional_skybox_light_frag.h"
+#include "directional_skybox_light_shadow_map_frag.h"
+#include "directional_skybox_light_cascaded_shadow_map_frag.h"
+
+#include "point_light_frag.h"
+#include "spot_light_frag.h"
+
+static const std::string glowIntensityShaderHandle = "glowIntensity";
+
+struct LightLocations {
+    int shadowDistances;
+    int shadowScale;
+    int radius;
+    int ambientSphere;
+    int lightBufferUnit;
+    int atmosphereBufferUnit;
+    int texcoordMat;
+    int coneParam;
+    int deferredTransformBuffer;
+};
+
+static void loadLightProgram(const char* vertSource, const char* fragSource, bool lightVolume, gpu::PipelinePointer& program, LightLocationsPtr& locations);
+
+
+gpu::PipelinePointer DeferredLightingEffect::getPipeline(SimpleProgramKey config) {
+    auto it = _simplePrograms.find(config);
+    if (it != _simplePrograms.end()) {
+        return it.value();
+    }
+    
+    auto state = std::make_shared<gpu::State>();
+    if (config.isCulled()) {
+        state->setCullMode(gpu::State::CULL_BACK);
+    } else {
+        state->setCullMode(gpu::State::CULL_NONE);
+    }
+    state->setDepthTest(true, true, gpu::LESS_EQUAL);
+    if (config.hasDepthBias()) {
+        state->setDepthBias(1.0f);
+        state->setDepthBiasSlopeScale(1.0f);
+    }
+    state->setBlendFunction(false,
+                            gpu::State::SRC_ALPHA, gpu::State::BLEND_OP_ADD, gpu::State::INV_SRC_ALPHA,
+                            gpu::State::FACTOR_ALPHA, gpu::State::BLEND_OP_ADD, gpu::State::ONE);
+    
+    gpu::ShaderPointer program = (config.isEmissive()) ? _emissiveShader : _simpleShader;
+    gpu::PipelinePointer pipeline = gpu::PipelinePointer(gpu::Pipeline::create(program, state));
+    _simplePrograms.insert(config, pipeline);
+    return pipeline;
+}
+
+void DeferredLightingEffect::init(AbstractViewStateInterface* viewState) {
+    auto VS = gpu::ShaderPointer(gpu::Shader::createVertex(std::string(simple_vert)));
+    auto PS = gpu::ShaderPointer(gpu::Shader::createPixel(std::string(simple_textured_frag)));
+    auto PSEmissive = gpu::ShaderPointer(gpu::Shader::createPixel(std::string(simple_textured_emisive_frag)));
+    
+    _simpleShader = gpu::ShaderPointer(gpu::Shader::createProgram(VS, PS));
+    _emissiveShader = gpu::ShaderPointer(gpu::Shader::createProgram(VS, PSEmissive));
+    
+    gpu::Shader::BindingSet slotBindings;
+    slotBindings.insert(gpu::Shader::Binding(std::string("normalFittingMap"), DeferredLightingEffect::NORMAL_FITTING_MAP_SLOT));
+    gpu::Shader::makeProgram(*_simpleShader, slotBindings);
+    gpu::Shader::makeProgram(*_emissiveShader, slotBindings);
+
+    _viewState = viewState;
+    _directionalLightLocations = std::make_shared<LightLocations>();
+    _directionalLightShadowMapLocations = std::make_shared<LightLocations>();
+    _directionalLightCascadedShadowMapLocations = std::make_shared<LightLocations>();
+    _directionalAmbientSphereLightLocations = std::make_shared<LightLocations>();
+    _directionalAmbientSphereLightShadowMapLocations = std::make_shared<LightLocations>();
+    _directionalAmbientSphereLightCascadedShadowMapLocations = std::make_shared<LightLocations>();
+    _directionalSkyboxLightLocations = std::make_shared<LightLocations>();
+    _directionalSkyboxLightShadowMapLocations = std::make_shared<LightLocations>();
+    _directionalSkyboxLightCascadedShadowMapLocations = std::make_shared<LightLocations>();
+    _pointLightLocations = std::make_shared<LightLocations>();
+    _spotLightLocations = std::make_shared<LightLocations>();
+
+    loadLightProgram(deferred_light_vert, directional_light_frag, false, _directionalLight, _directionalLightLocations);
+    loadLightProgram(deferred_light_vert, directional_light_shadow_map_frag, false, _directionalLightShadowMap,
+        _directionalLightShadowMapLocations);
+    loadLightProgram(deferred_light_vert, directional_light_cascaded_shadow_map_frag, false, _directionalLightCascadedShadowMap,
+        _directionalLightCascadedShadowMapLocations);
+
+    loadLightProgram(deferred_light_vert, directional_ambient_light_frag, false, _directionalAmbientSphereLight, _directionalAmbientSphereLightLocations);
+    loadLightProgram(deferred_light_vert, directional_ambient_light_shadow_map_frag, false, _directionalAmbientSphereLightShadowMap,
+        _directionalAmbientSphereLightShadowMapLocations);
+    loadLightProgram(deferred_light_vert, directional_ambient_light_cascaded_shadow_map_frag, false, _directionalAmbientSphereLightCascadedShadowMap,
+        _directionalAmbientSphereLightCascadedShadowMapLocations);
+
+    loadLightProgram(deferred_light_vert, directional_skybox_light_frag, false, _directionalSkyboxLight, _directionalSkyboxLightLocations);
+    loadLightProgram(deferred_light_vert, directional_skybox_light_shadow_map_frag, false, _directionalSkyboxLightShadowMap,
+        _directionalSkyboxLightShadowMapLocations);
+    loadLightProgram(deferred_light_vert, directional_skybox_light_cascaded_shadow_map_frag, false, _directionalSkyboxLightCascadedShadowMap,
+        _directionalSkyboxLightCascadedShadowMapLocations);
+
+
+    loadLightProgram(deferred_light_limited_vert, point_light_frag, true, _pointLight, _pointLightLocations);
+    loadLightProgram(deferred_light_spot_vert, spot_light_frag, true, _spotLight, _spotLightLocations);
+
+    {
+        //auto VSFS = gpu::StandardShaderLib::getDrawViewportQuadTransformTexcoordVS();
+        //auto PSBlit = gpu::StandardShaderLib::getDrawTexturePS();
+        auto blitProgram = gpu::StandardShaderLib::getProgram(gpu::StandardShaderLib::getDrawViewportQuadTransformTexcoordVS, gpu::StandardShaderLib::getDrawTexturePS);
+        gpu::Shader::makeProgram(*blitProgram);
+        auto blitState = std::make_shared<gpu::State>();
+        blitState->setBlendFunction(true,
+                                gpu::State::SRC_ALPHA, gpu::State::BLEND_OP_ADD, gpu::State::INV_SRC_ALPHA,
+                                gpu::State::FACTOR_ALPHA, gpu::State::BLEND_OP_ADD, gpu::State::ONE);
+        blitState->setColorWriteMask(true, true, true, false);
+        _blitLightBuffer = gpu::PipelinePointer(gpu::Pipeline::create(blitProgram, blitState));
+    }
+
+    // Allocate a global light representing the Global Directional light casting shadow (the sun) and the ambient light
+    _globalLights.push_back(0);
+    _allocatedLights.push_back(std::make_shared<model::Light>());
+
+    model::LightPointer lp = _allocatedLights[0];
+
+    lp->setDirection(-glm::vec3(1.0f, 1.0f, 1.0f));
+    lp->setColor(glm::vec3(1.0f));
+    lp->setIntensity(1.0f);
+    lp->setType(model::Light::SUN);
+    lp->setAmbientSpherePreset(gpu::SphericalHarmonics::Preset(_ambientLightMode % gpu::SphericalHarmonics::NUM_PRESET));
+}
+
+
+
+gpu::PipelinePointer DeferredLightingEffect::bindSimpleProgram(gpu::Batch& batch, bool textured, bool culled,
+                                               bool emmisive, bool depthBias) {
+    SimpleProgramKey config{textured, culled, emmisive, depthBias};
+    gpu::PipelinePointer pipeline = getPipeline(config);
+    batch.setPipeline(pipeline);
+
+    gpu::ShaderPointer program = (config.isEmissive()) ? _emissiveShader : _simpleShader;
+    int glowIntensity = program->getUniforms().findLocation("glowIntensity");
+    batch._glUniform1f(glowIntensity, 1.0f);
+    
+    if (!config.isTextured()) {
+        // If it is not textured, bind white texture and keep using textured pipeline
+        batch.setResourceTexture(0, DependencyManager::get<TextureCache>()->getWhiteTexture());
+    }
+
+    batch.setResourceTexture(NORMAL_FITTING_MAP_SLOT, DependencyManager::get<TextureCache>()->getNormalFittingTexture());
+    return pipeline;
+}
+
+
+void DeferredLightingEffect::renderSolidSphere(gpu::Batch& batch, float radius, int slices, int stacks, const glm::vec4& color) {
+    bindSimpleProgram(batch);
+    DependencyManager::get<GeometryCache>()->renderSphere(batch, radius, slices, stacks, color);
+}
+
+void DeferredLightingEffect::renderWireSphere(gpu::Batch& batch, float radius, int slices, int stacks, const glm::vec4& color) {
+    bindSimpleProgram(batch);
+    DependencyManager::get<GeometryCache>()->renderSphere(batch, radius, slices, stacks, color, false);
+}
+
+uint32_t toCompactColor(const glm::vec4& color) {
+    uint32_t compactColor = ((int(color.x * 255.0f) & 0xFF)) |
+        ((int(color.y * 255.0f) & 0xFF) << 8) |
+        ((int(color.z * 255.0f) & 0xFF) << 16) |
+        ((int(color.w * 255.0f) & 0xFF) << 24);
+    return compactColor;
+}
+
+void DeferredLightingEffect::renderSolidCubeInstance(gpu::Batch& batch, const Transform& xfm, const glm::vec4& color) {
+    static const std::string INSTANCE_NAME = __FUNCTION__;
+    static const size_t TRANSFORM_BUFFER = 0;
+    static const size_t COLOR_BUFFER = 1;
+    {
+        gpu::BufferPointer instanceTransformBuffer = batch.getNamedBuffer(INSTANCE_NAME, TRANSFORM_BUFFER);
+        glm::mat4 xfmMat4;
+        instanceTransformBuffer->append(xfm.getMatrix(xfmMat4));
+
+        gpu::BufferPointer instanceColorBuffer = batch.getNamedBuffer(INSTANCE_NAME, COLOR_BUFFER);
+        auto compactColor = toCompactColor(color);
+        instanceColorBuffer->append(compactColor);
+    }
+
+    batch.setupNamedCalls(INSTANCE_NAME, [=](gpu::Batch& batch, gpu::Batch::NamedBatchData& data) {
+        auto pipeline = bindSimpleProgram(batch);
+        auto location = pipeline->getProgram()->getUniforms().findLocation("Instanced");
+
+        batch._glUniform1i(location, 1);
+        DependencyManager::get<GeometryCache>()->renderSolidCubeInstances(batch, data._count,
+            data._buffers[TRANSFORM_BUFFER], data._buffers[COLOR_BUFFER]);
+        batch._glUniform1i(location, 0);
+    });
+}
+
+void DeferredLightingEffect::renderSolidCube(gpu::Batch& batch, float size, const glm::vec4& color) {
+    bindSimpleProgram(batch);
+    DependencyManager::get<GeometryCache>()->renderSolidCube(batch, size, color);
+}
+
+void DeferredLightingEffect::renderWireCube(gpu::Batch& batch, float size, const glm::vec4& color) {
+    bindSimpleProgram(batch);
+    DependencyManager::get<GeometryCache>()->renderWireCube(batch, size, color);
+}
+
+void DeferredLightingEffect::renderQuad(gpu::Batch& batch, const glm::vec3& minCorner, const glm::vec3& maxCorner,
+                                        const glm::vec4& color) {
+    bindSimpleProgram(batch);
+    DependencyManager::get<GeometryCache>()->renderQuad(batch, minCorner, maxCorner, color);
+}
+
+void DeferredLightingEffect::renderLine(gpu::Batch& batch, const glm::vec3& p1, const glm::vec3& p2,
+                                        const glm::vec4& color1, const glm::vec4& color2) {
+    bindSimpleProgram(batch);
+    DependencyManager::get<GeometryCache>()->renderLine(batch, p1, p2, color1, color2);
+}
+
+void DeferredLightingEffect::addPointLight(const glm::vec3& position, float radius, const glm::vec3& color,
+        float intensity) {
+    addSpotLight(position, radius, color, intensity);    
+}
+
+void DeferredLightingEffect::addSpotLight(const glm::vec3& position, float radius, const glm::vec3& color,
+    float intensity, const glm::quat& orientation, float exponent, float cutoff) {
+    
+    unsigned int lightID = _pointLights.size() + _spotLights.size() + _globalLights.size();
+    if (lightID >= _allocatedLights.size()) {
+        _allocatedLights.push_back(std::make_shared<model::Light>());
+    }
+    model::LightPointer lp = _allocatedLights[lightID];
+
+    lp->setPosition(position);
+    lp->setMaximumRadius(radius);
+    lp->setColor(color);
+    lp->setIntensity(intensity);
+    //lp->setShowContour(quadraticAttenuation);
+
+    if (exponent == 0.0f && cutoff == PI) {
+        lp->setType(model::Light::POINT);
+        _pointLights.push_back(lightID);
+        
+    } else {
+        lp->setOrientation(orientation);
+        lp->setSpotAngle(cutoff);
+        lp->setSpotExponent(exponent);
+        lp->setType(model::Light::SPOT);
+        _spotLights.push_back(lightID);
+    }
+}
+
+void DeferredLightingEffect::prepare(RenderArgs* args) {
+    gpu::Batch batch;
+    batch.enableStereo(false);
+
+    batch.setStateScissorRect(args->_viewport);
+
+    auto primaryFbo = DependencyManager::get<FramebufferCache>()->getPrimaryFramebuffer();
+
+    batch.setFramebuffer(primaryFbo);
+    // clear the normal and specular buffers
+    batch.clearColorFramebuffer(gpu::Framebuffer::BUFFER_COLOR1, glm::vec4(0.0f, 0.0f, 0.0f, 0.0f), true);
+    const float MAX_SPECULAR_EXPONENT = 128.0f;
+    batch.clearColorFramebuffer(gpu::Framebuffer::BUFFER_COLOR2, glm::vec4(0.0f, 0.0f, 0.0f, 1.0f / MAX_SPECULAR_EXPONENT), true);
+
+    args->_context->render(batch);
+}
+
+gpu::FramebufferPointer _copyFBO;
+
+void DeferredLightingEffect::render(RenderArgs* args) {
+    gpu::Batch batch;
+
+    // Allocate the parameters buffer used by all the deferred shaders
+    if (!_deferredTransformBuffer[0]._buffer) {
+        DeferredTransform parameters;
+        _deferredTransformBuffer[0] = gpu::BufferView(std::make_shared<gpu::Buffer>(sizeof(DeferredTransform), (const gpu::Byte*) &parameters));
+        _deferredTransformBuffer[1] = gpu::BufferView(std::make_shared<gpu::Buffer>(sizeof(DeferredTransform), (const gpu::Byte*) &parameters));
+    }
+
+    // Framebuffer copy operations cannot function as multipass stereo operations.  
+    batch.enableStereo(false);
+
+    // perform deferred lighting, rendering to free fbo
+    auto framebufferCache = DependencyManager::get<FramebufferCache>();
+    
+    QSize framebufferSize = framebufferCache->getFrameBufferSize();
+    
+    // binding the first framebuffer
+    _copyFBO = framebufferCache->getFramebuffer();
+    batch.setFramebuffer(_copyFBO);
+
+    // Clearing it
+    batch.setViewportTransform(args->_viewport);
+    batch.setStateScissorRect(args->_viewport);
+    batch.clearColorFramebuffer(_copyFBO->getBufferMask(), glm::vec4(0.0f, 0.0f, 0.0f, 0.0f), true);
+
+    // BInd the G-Buffer surfaces
+    batch.setResourceTexture(0, framebufferCache->getPrimaryColorTexture());
+    batch.setResourceTexture(1, framebufferCache->getPrimaryNormalTexture());
+    batch.setResourceTexture(2, framebufferCache->getPrimarySpecularTexture());
+    batch.setResourceTexture(3, framebufferCache->getPrimaryDepthTexture());
+
+    // THe main viewport is assumed to be the mono viewport (or the 2 stereo faces side by side within that viewport)
+    auto monoViewport = args->_viewport;
+    float sMin = args->_viewport.x / (float)framebufferSize.width();
+    float sWidth = args->_viewport.z / (float)framebufferSize.width();
+    float tMin = args->_viewport.y / (float)framebufferSize.height();
+    float tHeight = args->_viewport.w / (float)framebufferSize.height();
+
+    // The view frustum is the mono frustum base
+    auto viewFrustum = args->_viewFrustum;
+
+    // Eval the mono projection
+    mat4 monoProjMat;
+    viewFrustum->evalProjectionMatrix(monoProjMat);
+
+    // The mono view transform
+    Transform monoViewTransform;
+    viewFrustum->evalViewTransform(monoViewTransform);
+
+    // THe mono view matrix coming from the mono view transform
+    glm::mat4 monoViewMat;
+    monoViewTransform.getMatrix(monoViewMat);
+
+    // Running in stero ?
+    bool isStereo = args->_context->isStereo();
+    int numPasses = 1;
+
+    mat4 projMats[2];
+    Transform viewTransforms[2];
+    ivec4 viewports[2];
+    vec4 clipQuad[2];
+    vec2 screenBottomLeftCorners[2];
+    vec2 screenTopRightCorners[2];
+    vec4 fetchTexcoordRects[2];
+
+    DeferredTransform deferredTransforms[2];
+    auto geometryCache = DependencyManager::get<GeometryCache>();
+
+    if (isStereo) {
+        numPasses = 2;
+
+        mat4 eyeViews[2];
+        args->_context->getStereoProjections(projMats);
+        args->_context->getStereoViews(eyeViews);
+
+        float halfWidth = 0.5 * sWidth;
+
+        for (int i = 0; i < numPasses; i++) {
+            // In stereo, the 2 sides are layout side by side in the mono viewport and their width is half
+            int sideWidth = monoViewport.z >> 1;
+            viewports[i] = ivec4(monoViewport.x + (i * sideWidth), monoViewport.y, sideWidth, monoViewport.w);
+
+            deferredTransforms[i].projection = projMats[i];
+
             auto sideViewMat =  eyeViews[i] * monoViewMat;
-            viewTransforms[i].evalFromRawMatrix(sideViewMat);
-            deferredTransforms[i].viewInverse = sideViewMat;
-
-            deferredTransforms[i].stereoSide = (i == 0 ? -1.0f : 1.0f);
-
-            clipQuad[i] = glm::vec4(sMin + i * halfWidth, tMin, halfWidth, tHeight);
-            screenBottomLeftCorners[i] = glm::vec2(-1.0f + i * 1.0f, -1.0f);
-            screenTopRightCorners[i] = glm::vec2(i * 1.0f, 1.0f);
-
-            fetchTexcoordRects[i] = glm::vec4(sMin + i * halfWidth, tMin, halfWidth, tHeight);
-        }
-    } else {
-
-        viewports[0] = monoViewport;
-        projMats[0] = monoProjMat;
-
-        deferredTransforms[0].projection = monoProjMat;
-     
-        deferredTransforms[0].viewInverse = monoViewMat;
-        viewTransforms[0] = monoViewTransform;
-
-        deferredTransforms[0].stereoSide = 0.0f;
-
-        clipQuad[0] = glm::vec4(sMin, tMin, sWidth, tHeight);
-        screenBottomLeftCorners[0] = glm::vec2(-1.0f, -1.0f);
-        screenTopRightCorners[0] = glm::vec2(1.0f, 1.0f);
-
-        fetchTexcoordRects[0] = glm::vec4(sMin, tMin, sWidth, tHeight);
-    }
-
-    auto eyePoint = viewFrustum->getPosition();
-    float nearRadius = glm::distance(eyePoint, viewFrustum->getNearTopLeft());
-
-
-    for (int side = 0; side < numPasses; side++) {
-        // Render in this side's viewport
-        batch.setViewportTransform(viewports[side]);
-        batch.setStateScissorRect(viewports[side]);
-
-        // Sync and Bind the correct DeferredTransform ubo
-        _deferredTransformBuffer[side]._buffer->setSubData(0, sizeof(DeferredTransform), (const gpu::Byte*) &deferredTransforms[side]);
-        batch.setUniformBuffer(_directionalLightLocations->deferredTransformBuffer, _deferredTransformBuffer[side]);
-
-        glm::vec2 topLeft(-1.0f, -1.0f);
-        glm::vec2 bottomRight(1.0f, 1.0f);
-        glm::vec2 texCoordTopLeft(clipQuad[side].x, clipQuad[side].y);
-        glm::vec2 texCoordBottomRight(clipQuad[side].x + clipQuad[side].z, clipQuad[side].y + clipQuad[side].w);
-
-        // First Global directional light and ambient pass
-        {
-            bool useSkyboxCubemap = (_skybox) && (_skybox->getCubemap());
-
-            auto& program = _directionalLight;
-            LightLocationsPtr locations = _directionalLightLocations;
-
-            // TODO: At some point bring back the shadows...
-            // Setup the global directional pass pipeline
-            {
-                if (useSkyboxCubemap) {
-                    program = _directionalSkyboxLight;
-                    locations = _directionalSkyboxLightLocations;
-                } else if (_ambientLightMode > -1) {
-                    program = _directionalAmbientSphereLight;
-                    locations = _directionalAmbientSphereLightLocations;
-                }
-                batch.setPipeline(program);
-            }
-
-            { // Setup the global lighting
-                auto globalLight = _allocatedLights[_globalLights.front()];
-    
-                if (locations->ambientSphere >= 0) {
-                    gpu::SphericalHarmonics sh = globalLight->getAmbientSphere();
-                    if (useSkyboxCubemap && _skybox->getCubemap()->getIrradiance()) {
-                        sh = (*_skybox->getCubemap()->getIrradiance());
-                    }
-                    for (int i =0; i <gpu::SphericalHarmonics::NUM_COEFFICIENTS; i++) {
-                       batch._glUniform4fv(locations->ambientSphere + i, 1, (const float*) (&sh) + i * 4);
-                    }
-                }
-    
-                if (useSkyboxCubemap) {
-                    batch.setResourceTexture(5, _skybox->getCubemap());
-                }
-
-                if (locations->lightBufferUnit >= 0) {
-                    batch.setUniformBuffer(locations->lightBufferUnit, globalLight->getSchemaBuffer());
-                }
-        
-                if (_atmosphere && (locations->atmosphereBufferUnit >= 0)) {
-                    batch.setUniformBuffer(locations->atmosphereBufferUnit, _atmosphere->getDataBuffer());
-                }
-            }
-
-            {
-                batch.setModelTransform(Transform());
-                batch.setProjectionTransform(glm::mat4());
-                batch.setViewTransform(Transform());
-
-                glm::vec4 color(1.0f, 1.0f, 1.0f, 1.0f);
-               geometryCache->renderQuad(batch, topLeft, bottomRight, texCoordTopLeft, texCoordBottomRight, color);
-            }
-
-            if (useSkyboxCubemap) {
-                batch.setResourceTexture(5, nullptr);
-            }
-        }
-
-        auto texcoordMat = glm::mat4();
-      /*  texcoordMat[0] = glm::vec4(sWidth / 2.0f, 0.0f, 0.0f, sMin + sWidth / 2.0f);
-        texcoordMat[1] = glm::vec4(0.0f, tHeight / 2.0f, 0.0f, tMin + tHeight / 2.0f);
-       */ texcoordMat[0] = glm::vec4(fetchTexcoordRects[side].z / 2.0f, 0.0f, 0.0f, fetchTexcoordRects[side].x + fetchTexcoordRects[side].z / 2.0f);
-        texcoordMat[1] = glm::vec4(0.0f, fetchTexcoordRects[side].w / 2.0f, 0.0f, fetchTexcoordRects[side].y + fetchTexcoordRects[side].w / 2.0f);
-        texcoordMat[2] = glm::vec4(0.0f, 0.0f, 1.0f, 0.0f);
-        texcoordMat[3] = glm::vec4(0.0f, 0.0f, 0.0f, 1.0f);
-
-        // enlarge the scales slightly to account for tesselation
-        const float SCALE_EXPANSION = 0.05f;
-
-
-        batch.setProjectionTransform(projMats[side]);
-        batch.setViewTransform(viewTransforms[side]);
-
-        // Splat Point lights
-        if (!_pointLights.empty()) {
-            batch.setPipeline(_pointLight);
-
-            batch._glUniformMatrix4fv(_pointLightLocations->texcoordMat, 1, false, reinterpret_cast< const float* >(&texcoordMat));
-
-            for (auto lightID : _pointLights) {
-                auto& light = _allocatedLights[lightID];
-                // IN DEBUG:  light->setShowContour(true);
-                if (_pointLightLocations->lightBufferUnit >= 0) {
-                    batch.setUniformBuffer(_pointLightLocations->lightBufferUnit, light->getSchemaBuffer());
-                }
-
-                float expandedRadius = light->getMaximumRadius() * (1.0f + SCALE_EXPANSION);
-                // TODO: We shouldn;t have to do that test and use a different volume geometry for when inside the vlight volume,
-                // we should be able to draw thre same geometry use DepthClamp but for unknown reason it's s not working...
-                if (glm::distance(eyePoint, glm::vec3(light->getPosition())) < expandedRadius + nearRadius) {
-                    Transform model;
-                    model.setTranslation(glm::vec3(0.0f, 0.0f, -1.0f));
-                    batch.setModelTransform(model);
-                    batch.setViewTransform(Transform());
-                    batch.setProjectionTransform(glm::mat4());
-
-                    glm::vec4 color(1.0f, 1.0f, 1.0f, 1.0f);
-                    DependencyManager::get<GeometryCache>()->renderQuad(batch, topLeft, bottomRight, texCoordTopLeft, texCoordBottomRight, color);
-                
-                    batch.setProjectionTransform(projMats[side]);
-                    batch.setViewTransform(viewTransforms[side]);
-                } else {
-                    Transform model;
-                    model.setTranslation(glm::vec3(light->getPosition().x, light->getPosition().y, light->getPosition().z));
-                    batch.setModelTransform(model);
-                    geometryCache->renderSphere(batch, expandedRadius, 32, 32, glm::vec4(1.0f, 1.0f, 1.0f, 1.0f));
-                }
-            }
-        }
-    
-        // Splat spot lights
-        if (!_spotLights.empty()) {
-            batch.setPipeline(_spotLight);
-
-            batch._glUniformMatrix4fv(_spotLightLocations->texcoordMat, 1, false, reinterpret_cast< const float* >(&texcoordMat));
-
-            for (auto lightID : _spotLights) {
-                auto light = _allocatedLights[lightID];
-                // IN DEBUG:  light->setShowContour(true);
-
-                batch.setUniformBuffer(_spotLightLocations->lightBufferUnit, light->getSchemaBuffer());
-
-                auto eyeLightPos = eyePoint - light->getPosition();
-                auto eyeHalfPlaneDistance = glm::dot(eyeLightPos, light->getDirection());
-
-                const float TANGENT_LENGTH_SCALE = 0.666f;
-                glm::vec4 coneParam(light->getSpotAngleCosSin(), TANGENT_LENGTH_SCALE * tanf(0.5f * light->getSpotAngle()), 1.0f);
-
-                float expandedRadius = light->getMaximumRadius() * (1.0f + SCALE_EXPANSION);
-                // TODO: We shouldn;t have to do that test and use a different volume geometry for when inside the vlight volume,
-                // we should be able to draw thre same geometry use DepthClamp but for unknown reason it's s not working...
-                if ((eyeHalfPlaneDistance > -nearRadius) &&
-                    (glm::distance(eyePoint, glm::vec3(light->getPosition())) < expandedRadius + nearRadius)) {
-                    coneParam.w = 0.0f;
-                    batch._glUniform4fv(_spotLightLocations->coneParam, 1, reinterpret_cast< const float* >(&coneParam));
-
-                    Transform model;
-                    model.setTranslation(glm::vec3(0.0f, 0.0f, -1.0f));
-                    batch.setModelTransform(model);
-                    batch.setViewTransform(Transform());
-                    batch.setProjectionTransform(glm::mat4());
-                
-                    glm::vec4 color(1.0f, 1.0f, 1.0f, 1.0f);
-                    DependencyManager::get<GeometryCache>()->renderQuad(batch, topLeft, bottomRight, texCoordTopLeft, texCoordBottomRight, color);
-                
-                    batch.setProjectionTransform( projMats[side]);
-                    batch.setViewTransform(viewTransforms[side]);
-                } else {
-                    coneParam.w = 1.0f;
-                    batch._glUniform4fv(_spotLightLocations->coneParam, 1, reinterpret_cast< const float* >(&coneParam));
-
-                    Transform model;
-                    model.setTranslation(light->getPosition());
-                    model.postRotate(light->getOrientation());
-                    model.postScale(glm::vec3(expandedRadius, expandedRadius, expandedRadius));
-
-                    batch.setModelTransform(model);
-                    auto mesh = getSpotLightMesh();
-
-                    batch.setIndexBuffer(mesh->getIndexBuffer());
-                    batch.setInputBuffer(0, mesh->getVertexBuffer());
-                    batch.setInputFormat(mesh->getVertexFormat());
-
-                    auto& part = mesh->getPartBuffer().get<model::Mesh::Part>();
-
-                    batch.drawIndexed(model::Mesh::topologyToPrimitive(part._topology), part._numIndices, part._startIndex);
-                }
-            }
-        }
-    }
-
-    // Probably not necessary in the long run because the gpu layer would unbound this texture if used as render target
-    batch.setResourceTexture(0, nullptr);
-    batch.setResourceTexture(1, nullptr);
-    batch.setResourceTexture(2, nullptr);
-    batch.setResourceTexture(3, nullptr);
-    batch.setUniformBuffer(_directionalLightLocations->deferredTransformBuffer, nullptr);
-
-    args->_context->render(batch);
-
-    // End of the Lighting pass
-    if (!_pointLights.empty()) {
-        _pointLights.clear();
-    }
-    if (!_spotLights.empty()) {
-        _spotLights.clear();
-    }
-}
-
-
-void DeferredLightingEffect::copyBack(RenderArgs* args) {
-    gpu::Batch batch;
-    batch.enableStereo(false);
-    auto framebufferCache = DependencyManager::get<FramebufferCache>();
-    QSize framebufferSize = framebufferCache->getFrameBufferSize();
-
-    // TODO why doesn't this blit work?  It only seems to affect a small area below the rear view mirror.
-  //  auto destFbo = framebufferCache->getPrimaryFramebuffer();
-    auto destFbo = framebufferCache->getPrimaryFramebufferDepthColor();
-//    gpu::Vec4i vp = args->_viewport;
-//    batch.blit(_copyFBO, vp, framebufferCache->getPrimaryFramebuffer(), vp);
-    batch.setFramebuffer(destFbo);
-    batch.setViewportTransform(args->_viewport);
-    batch.setProjectionTransform(glm::mat4());
-    batch.setViewTransform(Transform());
-    {
-        float sMin = args->_viewport.x / (float)framebufferSize.width();
-        float sWidth = args->_viewport.z / (float)framebufferSize.width();
-        float tMin = args->_viewport.y / (float)framebufferSize.height();
-        float tHeight = args->_viewport.w / (float)framebufferSize.height();
-        Transform model;
-        batch.setPipeline(_blitLightBuffer);
-        model.setTranslation(glm::vec3(sMin, tMin, 0.0));
-        model.setScale(glm::vec3(sWidth, tHeight, 1.0));
-        batch.setModelTransform(model);
-    }
-
-    batch.setResourceTexture(0, _copyFBO->getRenderBuffer(0));
-    batch.draw(gpu::TRIANGLE_STRIP, 4);
-
-    args->_context->render(batch);
-    framebufferCache->releaseFramebuffer(_copyFBO);
-}
-
-void DeferredLightingEffect::setupTransparent(RenderArgs* args, int lightBufferUnit) {
-    auto globalLight = _allocatedLights[_globalLights.front()];
-    args->_batch->setUniformBuffer(lightBufferUnit, globalLight->getSchemaBuffer());
-}
-
-static void loadLightProgram(const char* vertSource, const char* fragSource, bool lightVolume, gpu::PipelinePointer& pipeline, LightLocationsPtr& locations) {
-    auto VS = gpu::ShaderPointer(gpu::Shader::createVertex(std::string(vertSource)));
-    auto PS = gpu::ShaderPointer(gpu::Shader::createPixel(std::string(fragSource)));
-    
-    gpu::ShaderPointer program = gpu::ShaderPointer(gpu::Shader::createProgram(VS, PS));
-
-    gpu::Shader::BindingSet slotBindings;
-    slotBindings.insert(gpu::Shader::Binding(std::string("diffuseMap"), 0));
-    slotBindings.insert(gpu::Shader::Binding(std::string("normalMap"), 1));
-    slotBindings.insert(gpu::Shader::Binding(std::string("specularMap"), 2));
-    slotBindings.insert(gpu::Shader::Binding(std::string("depthMap"), 3));
-    slotBindings.insert(gpu::Shader::Binding(std::string("shadowMap"), 4));
-    slotBindings.insert(gpu::Shader::Binding(std::string("skyboxMap"), 5));
-    const int LIGHT_GPU_SLOT = 3;
-    slotBindings.insert(gpu::Shader::Binding(std::string("lightBuffer"), LIGHT_GPU_SLOT));
-    const int ATMOSPHERE_GPU_SLOT = 4;
-    slotBindings.insert(gpu::Shader::Binding(std::string("atmosphereBufferUnit"), ATMOSPHERE_GPU_SLOT));
-
-    slotBindings.insert(gpu::Shader::Binding(std::string("deferredTransformBuffer"), DeferredLightingEffect::DEFERRED_TRANSFORM_BUFFER_SLOT));
-
-    gpu::Shader::makeProgram(*program, slotBindings);
-
-    locations->shadowDistances = program->getUniforms().findLocation("shadowDistances");
-    locations->shadowScale = program->getUniforms().findLocation("shadowScale");
-
-    locations->radius = program->getUniforms().findLocation("radius");
-    locations->ambientSphere = program->getUniforms().findLocation("ambientSphere.L00");
-
-    locations->texcoordMat = program->getUniforms().findLocation("texcoordMat");
-    locations->coneParam = program->getUniforms().findLocation("coneParam");
-
-    locations->lightBufferUnit = program->getBuffers().findLocation("lightBuffer");
-    locations->atmosphereBufferUnit = program->getBuffers().findLocation("atmosphereBufferUnit");
-    locations->deferredTransformBuffer = program->getBuffers().findLocation("deferredTransformBuffer");
-
-    auto state = std::make_shared<gpu::State>();
-    if (lightVolume) {
-        state->setCullMode(gpu::State::CULL_BACK);
-        
-        // No need for z test since the depth buffer is not bound state->setDepthTest(true, false, gpu::LESS_EQUAL);
-        // TODO: We should bind the true depth buffer both as RT and texture for the depth test
-        // TODO: We should use DepthClamp and avoid changing geometry for inside /outside cases
-        state->setDepthClampEnable(true);
-
-        // additive blending
-        state->setBlendFunction(true, gpu::State::ONE, gpu::State::BLEND_OP_ADD, gpu::State::ONE);
-    } else {
-        state->setCullMode(gpu::State::CULL_BACK);
-    }
-    pipeline.reset(gpu::Pipeline::create(program, state));
-
-}
-
-void DeferredLightingEffect::setAmbientLightMode(int preset) {
-    if ((preset >= 0) && (preset < gpu::SphericalHarmonics::NUM_PRESET)) {
-        _ambientLightMode = preset;
-        auto light = _allocatedLights.front();
-        light->setAmbientSpherePreset(gpu::SphericalHarmonics::Preset(preset % gpu::SphericalHarmonics::NUM_PRESET));
-    } else {
-        // force to preset 0
-        setAmbientLightMode(0);
-    }
-}
-
-void DeferredLightingEffect::setGlobalLight(const glm::vec3& direction, const glm::vec3& diffuse, float intensity, float ambientIntensity) {
-    auto light = _allocatedLights.front();
-    light->setDirection(direction);
-    light->setColor(diffuse);
-    light->setIntensity(intensity);
-    light->setAmbientIntensity(ambientIntensity);
-}
-
-void DeferredLightingEffect::setGlobalSkybox(const model::SkyboxPointer& skybox) {
-    _skybox = skybox;
-}
-
-model::MeshPointer DeferredLightingEffect::getSpotLightMesh() {
-    if (!_spotLightMesh) {
-        _spotLightMesh = std::make_shared<model::Mesh>();
-
-        int slices = 32;
-        int rings = 3;
-        int vertices = 2 + rings * slices;
-        int originVertex = vertices - 2;
-        int capVertex = vertices - 1;
-        int verticesSize = vertices * 3 * sizeof(float);
-        int indices = 3 * slices * (1 + 1 + 2 * (rings -1));
-        int ringFloatOffset = slices * 3;
-
-
-        float* vertexData = new float[verticesSize];
-        float* vertexRing0 = vertexData;
-        float* vertexRing1 = vertexRing0 + ringFloatOffset;
-        float* vertexRing2 = vertexRing1 + ringFloatOffset;
-        
-        for (int i = 0; i < slices; i++) {
-            float theta = TWO_PI * i / slices;
-            auto cosin = glm::vec2(cosf(theta), sinf(theta));
-
-            *(vertexRing0++) = cosin.x;
-            *(vertexRing0++) = cosin.y;
-            *(vertexRing0++) = 0.0f;
-
-            *(vertexRing1++) = cosin.x;
-            *(vertexRing1++) = cosin.y;
-            *(vertexRing1++) = 0.33f;
-
-            *(vertexRing2++) = cosin.x;
-            *(vertexRing2++) = cosin.y;
-            *(vertexRing2++) = 0.66f;
-        }
-        
-        *(vertexRing2++) = 0.0f;
-        *(vertexRing2++) = 0.0f;
-        *(vertexRing2++) = -1.0f;
-        
-        *(vertexRing2++) = 0.0f;
-        *(vertexRing2++) = 0.0f;
-        *(vertexRing2++) = 1.0f;
-        
-        _spotLightMesh->setVertexBuffer(gpu::BufferView(new gpu::Buffer(verticesSize, (gpu::Byte*) vertexData), gpu::Element::VEC3F_XYZ));
-        delete[] vertexData;
-
-        gpu::uint16* indexData = new gpu::uint16[indices];
-        gpu::uint16* index = indexData;
-        for (int i = 0; i < slices; i++) {
-            *(index++) = originVertex;
-            
-            int s0 = i;
-            int s1 = ((i + 1) % slices);
-            *(index++) = s0;
-            *(index++) = s1;
-
-            int s2 = s0 + slices;
-            int s3 = s1 + slices;
-            *(index++) = s1;
-            *(index++) = s0;
-            *(index++) = s2;
-
-            *(index++) = s1;
-            *(index++) = s2;
-            *(index++) = s3;
-
-            int s4 = s2 + slices;
-            int s5 = s3 + slices;
-            *(index++) = s3;
-            *(index++) = s2;
-            *(index++) = s4;
-
-            *(index++) = s3;
-            *(index++) = s4;
-            *(index++) = s5;
-
-
-            *(index++) = s5;
-            *(index++) = s4;
-            *(index++) = capVertex;
-        }
-
-        _spotLightMesh->setIndexBuffer(gpu::BufferView(new gpu::Buffer(sizeof(unsigned short) * indices, (gpu::Byte*) indexData), gpu::Element::INDEX_UINT16));
-        delete[] indexData;
-
-        model::Mesh::Part part(0, indices, 0, model::Mesh::TRIANGLES);
-        //DEBUG: model::Mesh::Part part(0, indices, 0, model::Mesh::LINE_STRIP);
-        
-        _spotLightMesh->setPartBuffer(gpu::BufferView(new gpu::Buffer(sizeof(part), (gpu::Byte*) &part), gpu::Element::PART_DRAWCALL));
-
-        _spotLightMesh->makeBufferStream();
-    }
-    return _spotLightMesh;
-}
-
+            viewTransforms[i].evalFromRawMatrix(sideViewMat);
+            deferredTransforms[i].viewInverse = sideViewMat;
+
+            deferredTransforms[i].stereoSide = (i == 0 ? -1.0f : 1.0f);
+
+            clipQuad[i] = glm::vec4(sMin + i * halfWidth, tMin, halfWidth, tHeight);
+            screenBottomLeftCorners[i] = glm::vec2(-1.0f + i * 1.0f, -1.0f);
+            screenTopRightCorners[i] = glm::vec2(i * 1.0f, 1.0f);
+
+            fetchTexcoordRects[i] = glm::vec4(sMin + i * halfWidth, tMin, halfWidth, tHeight);
+        }
+    } else {
+
+        viewports[0] = monoViewport;
+        projMats[0] = monoProjMat;
+
+        deferredTransforms[0].projection = monoProjMat;
+     
+        deferredTransforms[0].viewInverse = monoViewMat;
+        viewTransforms[0] = monoViewTransform;
+
+        deferredTransforms[0].stereoSide = 0.0f;
+
+        clipQuad[0] = glm::vec4(sMin, tMin, sWidth, tHeight);
+        screenBottomLeftCorners[0] = glm::vec2(-1.0f, -1.0f);
+        screenTopRightCorners[0] = glm::vec2(1.0f, 1.0f);
+
+        fetchTexcoordRects[0] = glm::vec4(sMin, tMin, sWidth, tHeight);
+    }
+
+    auto eyePoint = viewFrustum->getPosition();
+    float nearRadius = glm::distance(eyePoint, viewFrustum->getNearTopLeft());
+
+
+    for (int side = 0; side < numPasses; side++) {
+        // Render in this side's viewport
+        batch.setViewportTransform(viewports[side]);
+        batch.setStateScissorRect(viewports[side]);
+
+        // Sync and Bind the correct DeferredTransform ubo
+        _deferredTransformBuffer[side]._buffer->setSubData(0, sizeof(DeferredTransform), (const gpu::Byte*) &deferredTransforms[side]);
+        batch.setUniformBuffer(_directionalLightLocations->deferredTransformBuffer, _deferredTransformBuffer[side]);
+
+        glm::vec2 topLeft(-1.0f, -1.0f);
+        glm::vec2 bottomRight(1.0f, 1.0f);
+        glm::vec2 texCoordTopLeft(clipQuad[side].x, clipQuad[side].y);
+        glm::vec2 texCoordBottomRight(clipQuad[side].x + clipQuad[side].z, clipQuad[side].y + clipQuad[side].w);
+
+        // First Global directional light and ambient pass
+        {
+            bool useSkyboxCubemap = (_skybox) && (_skybox->getCubemap());
+
+            auto& program = _directionalLight;
+            LightLocationsPtr locations = _directionalLightLocations;
+
+            // TODO: At some point bring back the shadows...
+            // Setup the global directional pass pipeline
+            {
+                if (useSkyboxCubemap) {
+                    program = _directionalSkyboxLight;
+                    locations = _directionalSkyboxLightLocations;
+                } else if (_ambientLightMode > -1) {
+                    program = _directionalAmbientSphereLight;
+                    locations = _directionalAmbientSphereLightLocations;
+                }
+                batch.setPipeline(program);
+            }
+
+            { // Setup the global lighting
+                auto globalLight = _allocatedLights[_globalLights.front()];
+    
+                if (locations->ambientSphere >= 0) {
+                    gpu::SphericalHarmonics sh = globalLight->getAmbientSphere();
+                    if (useSkyboxCubemap && _skybox->getCubemap()->getIrradiance()) {
+                        sh = (*_skybox->getCubemap()->getIrradiance());
+                    }
+                    for (int i =0; i <gpu::SphericalHarmonics::NUM_COEFFICIENTS; i++) {
+                       batch._glUniform4fv(locations->ambientSphere + i, 1, (const float*) (&sh) + i * 4);
+                    }
+                }
+    
+                if (useSkyboxCubemap) {
+                    batch.setResourceTexture(5, _skybox->getCubemap());
+                }
+
+                if (locations->lightBufferUnit >= 0) {
+                    batch.setUniformBuffer(locations->lightBufferUnit, globalLight->getSchemaBuffer());
+                }
+        
+                if (_atmosphere && (locations->atmosphereBufferUnit >= 0)) {
+                    batch.setUniformBuffer(locations->atmosphereBufferUnit, _atmosphere->getDataBuffer());
+                }
+            }
+
+            {
+                batch.setModelTransform(Transform());
+                batch.setProjectionTransform(glm::mat4());
+                batch.setViewTransform(Transform());
+
+                glm::vec4 color(1.0f, 1.0f, 1.0f, 1.0f);
+               geometryCache->renderQuad(batch, topLeft, bottomRight, texCoordTopLeft, texCoordBottomRight, color);
+            }
+
+            if (useSkyboxCubemap) {
+                batch.setResourceTexture(5, nullptr);
+            }
+        }
+
+        auto texcoordMat = glm::mat4();
+      /*  texcoordMat[0] = glm::vec4(sWidth / 2.0f, 0.0f, 0.0f, sMin + sWidth / 2.0f);
+        texcoordMat[1] = glm::vec4(0.0f, tHeight / 2.0f, 0.0f, tMin + tHeight / 2.0f);
+       */ texcoordMat[0] = glm::vec4(fetchTexcoordRects[side].z / 2.0f, 0.0f, 0.0f, fetchTexcoordRects[side].x + fetchTexcoordRects[side].z / 2.0f);
+        texcoordMat[1] = glm::vec4(0.0f, fetchTexcoordRects[side].w / 2.0f, 0.0f, fetchTexcoordRects[side].y + fetchTexcoordRects[side].w / 2.0f);
+        texcoordMat[2] = glm::vec4(0.0f, 0.0f, 1.0f, 0.0f);
+        texcoordMat[3] = glm::vec4(0.0f, 0.0f, 0.0f, 1.0f);
+
+        // enlarge the scales slightly to account for tesselation
+        const float SCALE_EXPANSION = 0.05f;
+
+
+        batch.setProjectionTransform(projMats[side]);
+        batch.setViewTransform(viewTransforms[side]);
+
+        // Splat Point lights
+        if (!_pointLights.empty()) {
+            batch.setPipeline(_pointLight);
+
+            batch._glUniformMatrix4fv(_pointLightLocations->texcoordMat, 1, false, reinterpret_cast< const float* >(&texcoordMat));
+
+            for (auto lightID : _pointLights) {
+                auto& light = _allocatedLights[lightID];
+                // IN DEBUG:  light->setShowContour(true);
+                if (_pointLightLocations->lightBufferUnit >= 0) {
+                    batch.setUniformBuffer(_pointLightLocations->lightBufferUnit, light->getSchemaBuffer());
+                }
+
+                float expandedRadius = light->getMaximumRadius() * (1.0f + SCALE_EXPANSION);
+                // TODO: We shouldn;t have to do that test and use a different volume geometry for when inside the vlight volume,
+                // we should be able to draw thre same geometry use DepthClamp but for unknown reason it's s not working...
+                if (glm::distance(eyePoint, glm::vec3(light->getPosition())) < expandedRadius + nearRadius) {
+                    Transform model;
+                    model.setTranslation(glm::vec3(0.0f, 0.0f, -1.0f));
+                    batch.setModelTransform(model);
+                    batch.setViewTransform(Transform());
+                    batch.setProjectionTransform(glm::mat4());
+
+                    glm::vec4 color(1.0f, 1.0f, 1.0f, 1.0f);
+                    DependencyManager::get<GeometryCache>()->renderQuad(batch, topLeft, bottomRight, texCoordTopLeft, texCoordBottomRight, color);
+                
+                    batch.setProjectionTransform(projMats[side]);
+                    batch.setViewTransform(viewTransforms[side]);
+                } else {
+                    Transform model;
+                    model.setTranslation(glm::vec3(light->getPosition().x, light->getPosition().y, light->getPosition().z));
+                    batch.setModelTransform(model);
+                    geometryCache->renderSphere(batch, expandedRadius, 32, 32, glm::vec4(1.0f, 1.0f, 1.0f, 1.0f));
+                }
+            }
+        }
+    
+        // Splat spot lights
+        if (!_spotLights.empty()) {
+            batch.setPipeline(_spotLight);
+
+            batch._glUniformMatrix4fv(_spotLightLocations->texcoordMat, 1, false, reinterpret_cast< const float* >(&texcoordMat));
+
+            for (auto lightID : _spotLights) {
+                auto light = _allocatedLights[lightID];
+                // IN DEBUG:  light->setShowContour(true);
+
+                batch.setUniformBuffer(_spotLightLocations->lightBufferUnit, light->getSchemaBuffer());
+
+                auto eyeLightPos = eyePoint - light->getPosition();
+                auto eyeHalfPlaneDistance = glm::dot(eyeLightPos, light->getDirection());
+
+                const float TANGENT_LENGTH_SCALE = 0.666f;
+                glm::vec4 coneParam(light->getSpotAngleCosSin(), TANGENT_LENGTH_SCALE * tanf(0.5f * light->getSpotAngle()), 1.0f);
+
+                float expandedRadius = light->getMaximumRadius() * (1.0f + SCALE_EXPANSION);
+                // TODO: We shouldn;t have to do that test and use a different volume geometry for when inside the vlight volume,
+                // we should be able to draw thre same geometry use DepthClamp but for unknown reason it's s not working...
+                if ((eyeHalfPlaneDistance > -nearRadius) &&
+                    (glm::distance(eyePoint, glm::vec3(light->getPosition())) < expandedRadius + nearRadius)) {
+                    coneParam.w = 0.0f;
+                    batch._glUniform4fv(_spotLightLocations->coneParam, 1, reinterpret_cast< const float* >(&coneParam));
+
+                    Transform model;
+                    model.setTranslation(glm::vec3(0.0f, 0.0f, -1.0f));
+                    batch.setModelTransform(model);
+                    batch.setViewTransform(Transform());
+                    batch.setProjectionTransform(glm::mat4());
+                
+                    glm::vec4 color(1.0f, 1.0f, 1.0f, 1.0f);
+                    DependencyManager::get<GeometryCache>()->renderQuad(batch, topLeft, bottomRight, texCoordTopLeft, texCoordBottomRight, color);
+                
+                    batch.setProjectionTransform( projMats[side]);
+                    batch.setViewTransform(viewTransforms[side]);
+                } else {
+                    coneParam.w = 1.0f;
+                    batch._glUniform4fv(_spotLightLocations->coneParam, 1, reinterpret_cast< const float* >(&coneParam));
+
+                    Transform model;
+                    model.setTranslation(light->getPosition());
+                    model.postRotate(light->getOrientation());
+                    model.postScale(glm::vec3(expandedRadius, expandedRadius, expandedRadius));
+
+                    batch.setModelTransform(model);
+                    auto mesh = getSpotLightMesh();
+
+                    batch.setIndexBuffer(mesh->getIndexBuffer());
+                    batch.setInputBuffer(0, mesh->getVertexBuffer());
+                    batch.setInputFormat(mesh->getVertexFormat());
+
+                    auto& part = mesh->getPartBuffer().get<model::Mesh::Part>();
+
+                    batch.drawIndexed(model::Mesh::topologyToPrimitive(part._topology), part._numIndices, part._startIndex);
+                }
+            }
+        }
+    }
+
+    // Probably not necessary in the long run because the gpu layer would unbound this texture if used as render target
+    batch.setResourceTexture(0, nullptr);
+    batch.setResourceTexture(1, nullptr);
+    batch.setResourceTexture(2, nullptr);
+    batch.setResourceTexture(3, nullptr);
+    batch.setUniformBuffer(_directionalLightLocations->deferredTransformBuffer, nullptr);
+
+    args->_context->render(batch);
+
+    // End of the Lighting pass
+    if (!_pointLights.empty()) {
+        _pointLights.clear();
+    }
+    if (!_spotLights.empty()) {
+        _spotLights.clear();
+    }
+}
+
+
+void DeferredLightingEffect::copyBack(RenderArgs* args) {
+    gpu::Batch batch;
+    batch.enableStereo(false);
+    auto framebufferCache = DependencyManager::get<FramebufferCache>();
+    QSize framebufferSize = framebufferCache->getFrameBufferSize();
+
+    // TODO why doesn't this blit work?  It only seems to affect a small area below the rear view mirror.
+  //  auto destFbo = framebufferCache->getPrimaryFramebuffer();
+    auto destFbo = framebufferCache->getPrimaryFramebufferDepthColor();
+//    gpu::Vec4i vp = args->_viewport;
+//    batch.blit(_copyFBO, vp, framebufferCache->getPrimaryFramebuffer(), vp);
+    batch.setFramebuffer(destFbo);
+    batch.setViewportTransform(args->_viewport);
+    batch.setProjectionTransform(glm::mat4());
+    batch.setViewTransform(Transform());
+    {
+        float sMin = args->_viewport.x / (float)framebufferSize.width();
+        float sWidth = args->_viewport.z / (float)framebufferSize.width();
+        float tMin = args->_viewport.y / (float)framebufferSize.height();
+        float tHeight = args->_viewport.w / (float)framebufferSize.height();
+        Transform model;
+        batch.setPipeline(_blitLightBuffer);
+        model.setTranslation(glm::vec3(sMin, tMin, 0.0));
+        model.setScale(glm::vec3(sWidth, tHeight, 1.0));
+        batch.setModelTransform(model);
+    }
+
+    batch.setResourceTexture(0, _copyFBO->getRenderBuffer(0));
+    batch.draw(gpu::TRIANGLE_STRIP, 4);
+
+    args->_context->render(batch);
+    framebufferCache->releaseFramebuffer(_copyFBO);
+}
+
+void DeferredLightingEffect::setupTransparent(RenderArgs* args, int lightBufferUnit) {
+    auto globalLight = _allocatedLights[_globalLights.front()];
+    args->_batch->setUniformBuffer(lightBufferUnit, globalLight->getSchemaBuffer());
+}
+
+static void loadLightProgram(const char* vertSource, const char* fragSource, bool lightVolume, gpu::PipelinePointer& pipeline, LightLocationsPtr& locations) {
+    auto VS = gpu::ShaderPointer(gpu::Shader::createVertex(std::string(vertSource)));
+    auto PS = gpu::ShaderPointer(gpu::Shader::createPixel(std::string(fragSource)));
+    
+    gpu::ShaderPointer program = gpu::ShaderPointer(gpu::Shader::createProgram(VS, PS));
+
+    gpu::Shader::BindingSet slotBindings;
+    slotBindings.insert(gpu::Shader::Binding(std::string("diffuseMap"), 0));
+    slotBindings.insert(gpu::Shader::Binding(std::string("normalMap"), 1));
+    slotBindings.insert(gpu::Shader::Binding(std::string("specularMap"), 2));
+    slotBindings.insert(gpu::Shader::Binding(std::string("depthMap"), 3));
+    slotBindings.insert(gpu::Shader::Binding(std::string("shadowMap"), 4));
+    slotBindings.insert(gpu::Shader::Binding(std::string("skyboxMap"), 5));
+    const int LIGHT_GPU_SLOT = 3;
+    slotBindings.insert(gpu::Shader::Binding(std::string("lightBuffer"), LIGHT_GPU_SLOT));
+    const int ATMOSPHERE_GPU_SLOT = 4;
+    slotBindings.insert(gpu::Shader::Binding(std::string("atmosphereBufferUnit"), ATMOSPHERE_GPU_SLOT));
+
+    slotBindings.insert(gpu::Shader::Binding(std::string("deferredTransformBuffer"), DeferredLightingEffect::DEFERRED_TRANSFORM_BUFFER_SLOT));
+
+    gpu::Shader::makeProgram(*program, slotBindings);
+
+    locations->shadowDistances = program->getUniforms().findLocation("shadowDistances");
+    locations->shadowScale = program->getUniforms().findLocation("shadowScale");
+
+    locations->radius = program->getUniforms().findLocation("radius");
+    locations->ambientSphere = program->getUniforms().findLocation("ambientSphere.L00");
+
+    locations->texcoordMat = program->getUniforms().findLocation("texcoordMat");
+    locations->coneParam = program->getUniforms().findLocation("coneParam");
+
+    locations->lightBufferUnit = program->getBuffers().findLocation("lightBuffer");
+    locations->atmosphereBufferUnit = program->getBuffers().findLocation("atmosphereBufferUnit");
+    locations->deferredTransformBuffer = program->getBuffers().findLocation("deferredTransformBuffer");
+
+    auto state = std::make_shared<gpu::State>();
+    if (lightVolume) {
+        state->setCullMode(gpu::State::CULL_BACK);
+        
+        // No need for z test since the depth buffer is not bound state->setDepthTest(true, false, gpu::LESS_EQUAL);
+        // TODO: We should bind the true depth buffer both as RT and texture for the depth test
+        // TODO: We should use DepthClamp and avoid changing geometry for inside /outside cases
+        state->setDepthClampEnable(true);
+
+        // additive blending
+        state->setBlendFunction(true, gpu::State::ONE, gpu::State::BLEND_OP_ADD, gpu::State::ONE);
+    } else {
+        state->setCullMode(gpu::State::CULL_BACK);
+    }
+    pipeline.reset(gpu::Pipeline::create(program, state));
+
+}
+
+void DeferredLightingEffect::setAmbientLightMode(int preset) {
+    if ((preset >= 0) && (preset < gpu::SphericalHarmonics::NUM_PRESET)) {
+        _ambientLightMode = preset;
+        auto light = _allocatedLights.front();
+        light->setAmbientSpherePreset(gpu::SphericalHarmonics::Preset(preset % gpu::SphericalHarmonics::NUM_PRESET));
+    } else {
+        // force to preset 0
+        setAmbientLightMode(0);
+    }
+}
+
+void DeferredLightingEffect::setGlobalLight(const glm::vec3& direction, const glm::vec3& diffuse, float intensity, float ambientIntensity) {
+    auto light = _allocatedLights.front();
+    light->setDirection(direction);
+    light->setColor(diffuse);
+    light->setIntensity(intensity);
+    light->setAmbientIntensity(ambientIntensity);
+}
+
+void DeferredLightingEffect::setGlobalSkybox(const model::SkyboxPointer& skybox) {
+    _skybox = skybox;
+}
+
+model::MeshPointer DeferredLightingEffect::getSpotLightMesh() {
+    if (!_spotLightMesh) {
+        _spotLightMesh = std::make_shared<model::Mesh>();
+
+        int slices = 32;
+        int rings = 3;
+        int vertices = 2 + rings * slices;
+        int originVertex = vertices - 2;
+        int capVertex = vertices - 1;
+        int verticesSize = vertices * 3 * sizeof(float);
+        int indices = 3 * slices * (1 + 1 + 2 * (rings -1));
+        int ringFloatOffset = slices * 3;
+
+
+        float* vertexData = new float[verticesSize];
+        float* vertexRing0 = vertexData;
+        float* vertexRing1 = vertexRing0 + ringFloatOffset;
+        float* vertexRing2 = vertexRing1 + ringFloatOffset;
+        
+        for (int i = 0; i < slices; i++) {
+            float theta = TWO_PI * i / slices;
+            auto cosin = glm::vec2(cosf(theta), sinf(theta));
+
+            *(vertexRing0++) = cosin.x;
+            *(vertexRing0++) = cosin.y;
+            *(vertexRing0++) = 0.0f;
+
+            *(vertexRing1++) = cosin.x;
+            *(vertexRing1++) = cosin.y;
+            *(vertexRing1++) = 0.33f;
+
+            *(vertexRing2++) = cosin.x;
+            *(vertexRing2++) = cosin.y;
+            *(vertexRing2++) = 0.66f;
+        }
+        
+        *(vertexRing2++) = 0.0f;
+        *(vertexRing2++) = 0.0f;
+        *(vertexRing2++) = -1.0f;
+        
+        *(vertexRing2++) = 0.0f;
+        *(vertexRing2++) = 0.0f;
+        *(vertexRing2++) = 1.0f;
+        
+        _spotLightMesh->setVertexBuffer(gpu::BufferView(new gpu::Buffer(verticesSize, (gpu::Byte*) vertexData), gpu::Element::VEC3F_XYZ));
+        delete[] vertexData;
+
+        gpu::uint16* indexData = new gpu::uint16[indices];
+        gpu::uint16* index = indexData;
+        for (int i = 0; i < slices; i++) {
+            *(index++) = originVertex;
+            
+            int s0 = i;
+            int s1 = ((i + 1) % slices);
+            *(index++) = s0;
+            *(index++) = s1;
+
+            int s2 = s0 + slices;
+            int s3 = s1 + slices;
+            *(index++) = s1;
+            *(index++) = s0;
+            *(index++) = s2;
+
+            *(index++) = s1;
+            *(index++) = s2;
+            *(index++) = s3;
+
+            int s4 = s2 + slices;
+            int s5 = s3 + slices;
+            *(index++) = s3;
+            *(index++) = s2;
+            *(index++) = s4;
+
+            *(index++) = s3;
+            *(index++) = s4;
+            *(index++) = s5;
+
+
+            *(index++) = s5;
+            *(index++) = s4;
+            *(index++) = capVertex;
+        }
+
+        _spotLightMesh->setIndexBuffer(gpu::BufferView(new gpu::Buffer(sizeof(unsigned short) * indices, (gpu::Byte*) indexData), gpu::Element::INDEX_UINT16));
+        delete[] indexData;
+
+        model::Mesh::Part part(0, indices, 0, model::Mesh::TRIANGLES);
+        //DEBUG: model::Mesh::Part part(0, indices, 0, model::Mesh::LINE_STRIP);
+        
+        _spotLightMesh->setPartBuffer(gpu::BufferView(new gpu::Buffer(sizeof(part), (gpu::Byte*) &part), gpu::Element::PART_DRAWCALL));
+
+        _spotLightMesh->makeBufferStream();
+    }
+    return _spotLightMesh;
+}
+
diff --git a/libraries/render-utils/src/DeferredLightingEffect.h b/libraries/render-utils/src/DeferredLightingEffect.h
index ea6f2f0ce0..83bb4c215f 100644
--- a/libraries/render-utils/src/DeferredLightingEffect.h
+++ b/libraries/render-utils/src/DeferredLightingEffect.h
@@ -37,15 +37,22 @@ public:
     void init(AbstractViewStateInterface* viewState);
 
     /// Sets up the state necessary to render static untextured geometry with the simple program.
-    void bindSimpleProgram(gpu::Batch& batch, bool textured = false, bool culled = true,
+    gpu::PipelinePointer bindSimpleProgram(gpu::Batch& batch, bool textured = false, bool culled = true,
                            bool emmisive = false, bool depthBias = false);
 
+    /// Sets up the state necessary to render static untextured geometry with the simple program.
+    void bindInstanceProgram(gpu::Batch& batch, bool textured = false, bool culled = true,
+        bool emmisive = false, bool depthBias = false);
+
     //// Renders a solid sphere with the simple program.
     void renderSolidSphere(gpu::Batch& batch, float radius, int slices, int stacks, const glm::vec4& color);
 
     //// Renders a wireframe sphere with the simple program.
     void renderWireSphere(gpu::Batch& batch, float radius, int slices, int stacks, const glm::vec4& color);
     
+    //// Renders a solid cube using instancing.  Transform should include scaling.
+    void renderSolidCubeInstance(gpu::Batch& batch, const Transform& xfm, const glm::vec4& color);
+
     //// Renders a solid cube with the simple program.
     void renderSolidCube(gpu::Batch& batch, float size, const glm::vec4& color);
 
diff --git a/libraries/render-utils/src/GeometryCache.cpp b/libraries/render-utils/src/GeometryCache.cpp
index a58df10cc6..ea05df84ef 100644
--- a/libraries/render-utils/src/GeometryCache.cpp
+++ b/libraries/render-utils/src/GeometryCache.cpp
@@ -689,28 +689,31 @@ void GeometryCache::renderVertices(gpu::Batch& batch, gpu::Primitive primitiveTy
     }
 }
 
-void GeometryCache::renderSolidCube(gpu::Batch& batch, float size, const glm::vec4& color) {
-    Vec2Pair colorKey(glm::vec2(color.x, color.y), glm::vec2(color.z, color.y));
-    const int FLOATS_PER_VERTEX = 3;
-    const int VERTICES_PER_FACE = 4;
-    const int NUMBER_OF_FACES = 6;
-    const int TRIANGLES_PER_FACE = 2;
-    const int VERTICES_PER_TRIANGLE = 3;
-    const int vertices = NUMBER_OF_FACES * VERTICES_PER_FACE;
-    const int indices = NUMBER_OF_FACES * TRIANGLES_PER_FACE * VERTICES_PER_TRIANGLE;
-    const int vertexPoints = vertices * FLOATS_PER_VERTEX;
-    const int VERTEX_STRIDE = sizeof(GLfloat) * FLOATS_PER_VERTEX * 2; // vertices and normals
-    const int NORMALS_OFFSET = sizeof(GLfloat) * FLOATS_PER_VERTEX;
+static const int FLOATS_PER_VERTEX = 3;
+static const int VERTICES_PER_TRIANGLE = 3;
 
+static const int CUBE_NUMBER_OF_FACES = 6;
+static const int CUBE_VERTICES_PER_FACE = 4;
+static const int CUBE_TRIANGLES_PER_FACE = 2;
+static const int CUBE_VERTICES = CUBE_NUMBER_OF_FACES * CUBE_VERTICES_PER_FACE;
+static const int CUBE_VERTEX_POINTS = CUBE_VERTICES * FLOATS_PER_VERTEX;
+static const int CUBE_INDICES = CUBE_NUMBER_OF_FACES * CUBE_TRIANGLES_PER_FACE * VERTICES_PER_TRIANGLE;
+
+static const gpu::Element CUBE_POSITION_ELEMENT{ gpu::VEC3, gpu::FLOAT, gpu::XYZ };
+static const gpu::Element CUBE_NORMAL_ELEMENT{ gpu::VEC3, gpu::FLOAT, gpu::XYZ };
+static const gpu::Element CUBE_COLOR_ELEMENT{ gpu::VEC4, gpu::NUINT8, gpu::RGBA };
+static const gpu::Element INSTANCE_XFM_ELEMENT{ gpu::MAT4, gpu::FLOAT, gpu::XYZW };
+
+gpu::BufferPointer GeometryCache::getCubeVertices(float size) {
     if (!_solidCubeVertices.contains(size)) {
         auto verticesBuffer = std::make_shared<gpu::Buffer>();
         _solidCubeVertices[size] = verticesBuffer;
 
-        GLfloat* vertexData = new GLfloat[vertexPoints * 2]; // vertices and normals
+        GLfloat* vertexData = new GLfloat[CUBE_VERTEX_POINTS * 2]; // vertices and normals
         GLfloat* vertex = vertexData;
         float halfSize = size / 2.0f;
 
-        static GLfloat cannonicalVertices[vertexPoints] = 
+        static GLfloat cannonicalVertices[CUBE_VERTEX_POINTS] = 
                                     { 1, 1, 1,  -1, 1, 1,  -1,-1, 1,   1,-1, 1,   // v0,v1,v2,v3 (front)
                                       1, 1, 1,   1,-1, 1,   1,-1,-1,   1, 1,-1,   // v0,v3,v4,v5 (right)
                                       1, 1, 1,   1, 1,-1,  -1, 1,-1,  -1, 1, 1,   // v0,v5,v6,v1 (top)
@@ -719,7 +722,7 @@ void GeometryCache::renderSolidCube(gpu::Batch& batch, float size, const glm::ve
                                       1,-1,-1,  -1,-1,-1,  -1, 1,-1,   1, 1,-1 }; // v4,v7,v6,v5 (back)
 
         // normal array
-        static GLfloat cannonicalNormals[vertexPoints]  = 
+        static GLfloat cannonicalNormals[CUBE_VERTEX_POINTS] =
                                   { 0, 0, 1,   0, 0, 1,   0, 0, 1,   0, 0, 1,   // v0,v1,v2,v3 (front)
                                     1, 0, 0,   1, 0, 0,   1, 0, 0,   1, 0, 0,   // v0,v3,v4,v5 (right)
                                     0, 1, 0,   0, 1, 0,   0, 1, 0,   0, 1, 0,   // v0,v5,v6,v1 (top)
@@ -731,7 +734,7 @@ void GeometryCache::renderSolidCube(gpu::Batch& batch, float size, const glm::ve
         GLfloat* cannonicalVertex = &cannonicalVertices[0];
         GLfloat* cannonicalNormal = &cannonicalNormals[0];
 
-        for (int i = 0; i < vertices; i++) {
+        for (int i = 0; i < CUBE_VERTICES; i++) {
             // vertices
             *(vertex++) = halfSize * *cannonicalVertex++;
             *(vertex++) = halfSize * *cannonicalVertex++;
@@ -742,90 +745,121 @@ void GeometryCache::renderSolidCube(gpu::Batch& batch, float size, const glm::ve
             *(vertex++) = *cannonicalNormal++;
             *(vertex++) = *cannonicalNormal++;
         }
-
-        verticesBuffer->append(sizeof(GLfloat) * vertexPoints * 2, (gpu::Byte*) vertexData);
+        verticesBuffer->append(sizeof(GLfloat) * CUBE_VERTEX_POINTS * 2, (gpu::Byte*) vertexData);
     }
 
+    return _solidCubeVertices[size];
+}
+
+gpu::BufferPointer GeometryCache::getSolidCubeIndices() {
     if (!_solidCubeIndexBuffer) {
-        static GLubyte cannonicalIndices[indices]  = 
-                                    { 0, 1, 2,   2, 3, 0,      // front
+        static GLubyte cannonicalIndices[CUBE_INDICES] =                                     { 0, 1, 2,   2, 3, 0,      // front
                                       4, 5, 6,   6, 7, 4,      // right
                                       8, 9,10,  10,11, 8,      // top
                                      12,13,14,  14,15,12,      // left
                                      16,17,18,  18,19,16,      // bottom
                                      20,21,22,  22,23,20 };    // back
-        
+
         auto indexBuffer = std::make_shared<gpu::Buffer>();
         _solidCubeIndexBuffer = indexBuffer;
-    
+
         _solidCubeIndexBuffer->append(sizeof(cannonicalIndices), (gpu::Byte*) cannonicalIndices);
     }
+    return _solidCubeIndexBuffer;
+}
 
+
+void GeometryCache::setupCubeVertices(gpu::Batch& batch, gpu::BufferPointer& verticesBuffer) {
+    static const int VERTEX_STRIDE = sizeof(GLfloat) * FLOATS_PER_VERTEX * 2; // vertices and normals
+    static const int NORMALS_OFFSET = sizeof(GLfloat) * FLOATS_PER_VERTEX;
+
+    gpu::BufferView verticesView(verticesBuffer, 0, verticesBuffer->getSize(), VERTEX_STRIDE, CUBE_POSITION_ELEMENT);
+    gpu::BufferView normalsView(verticesBuffer, NORMALS_OFFSET, verticesBuffer->getSize(), VERTEX_STRIDE, CUBE_NORMAL_ELEMENT);
+    batch.setInputBuffer(gpu::Stream::POSITION, verticesView);
+    batch.setInputBuffer(gpu::Stream::NORMAL, normalsView);
+}
+
+void GeometryCache::renderSolidCubeInstances(gpu::Batch& batch, size_t count, gpu::BufferPointer transformBuffer, gpu::BufferPointer colorBuffer) {
+    static gpu::Stream::FormatPointer streamFormat;
+    if (!streamFormat) {
+        streamFormat = std::make_shared<gpu::Stream::Format>(); // 1 for everyone
+        streamFormat->setAttribute(gpu::Stream::POSITION, gpu::Stream::POSITION, CUBE_POSITION_ELEMENT, 0);
+        streamFormat->setAttribute(gpu::Stream::NORMAL, gpu::Stream::NORMAL, CUBE_NORMAL_ELEMENT);
+        streamFormat->setAttribute(gpu::Stream::COLOR, gpu::Stream::COLOR, CUBE_COLOR_ELEMENT, 0, gpu::Stream::PER_INSTANCE);
+        streamFormat->setAttribute(gpu::Stream::INSTANCE_XFM, gpu::Stream::INSTANCE_XFM, INSTANCE_XFM_ELEMENT, 0, gpu::Stream::PER_INSTANCE);
+    }
+    batch.setInputFormat(streamFormat);
+
+    gpu::BufferView colorView(colorBuffer, CUBE_COLOR_ELEMENT);
+    batch.setInputBuffer(gpu::Stream::COLOR, colorView);
+
+    gpu::BufferView instanceXfmView(transformBuffer, 0, transformBuffer->getSize(), INSTANCE_XFM_ELEMENT);
+    batch.setInputBuffer(gpu::Stream::INSTANCE_XFM, instanceXfmView);
+
+    gpu::BufferPointer verticesBuffer = getCubeVertices(1.0);
+    setupCubeVertices(batch, verticesBuffer);
+    batch.setIndexBuffer(gpu::UINT8, getSolidCubeIndices(), 0);
+    batch.drawIndexedInstanced(count, gpu::TRIANGLES, CUBE_INDICES);
+}
+
+
+void GeometryCache::renderSolidCube(gpu::Batch& batch, float size, const glm::vec4& color) {
+    Vec2Pair colorKey(glm::vec2(color.x, color.y), glm::vec2(color.z, color.y));
     if (!_solidCubeColors.contains(colorKey)) {
         auto colorBuffer = std::make_shared<gpu::Buffer>();
         _solidCubeColors[colorKey] = colorBuffer;
 
-        const int NUM_COLOR_SCALARS_PER_CUBE = 24;
         int compactColor = ((int(color.x * 255.0f) & 0xFF)) |
                             ((int(color.y * 255.0f) & 0xFF) << 8) |
                             ((int(color.z * 255.0f) & 0xFF) << 16) |
                             ((int(color.w * 255.0f) & 0xFF) << 24);
-        int colors[NUM_COLOR_SCALARS_PER_CUBE] = { compactColor, compactColor, compactColor, compactColor,
-                                                   compactColor, compactColor, compactColor, compactColor,
-                                                   compactColor, compactColor, compactColor, compactColor,
-                                                   compactColor, compactColor, compactColor, compactColor,
-                                                   compactColor, compactColor, compactColor, compactColor,
-                                                   compactColor, compactColor, compactColor, compactColor };
-
+        int colors[CUBE_VERTICES] = { 
+            compactColor, compactColor, compactColor, compactColor,
+            compactColor, compactColor, compactColor, compactColor,
+            compactColor, compactColor, compactColor, compactColor,
+            compactColor, compactColor, compactColor, compactColor,
+            compactColor, compactColor, compactColor, compactColor,
+            compactColor, compactColor, compactColor, compactColor 
+        };
         colorBuffer->append(sizeof(colors), (gpu::Byte*) colors);
     }
-    gpu::BufferPointer verticesBuffer = _solidCubeVertices[size];
     gpu::BufferPointer colorBuffer = _solidCubeColors[colorKey];
 
-    const int VERTICES_SLOT = 0;
-    const int NORMALS_SLOT = 1;
-    const int COLOR_SLOT = 2;
     static gpu::Stream::FormatPointer streamFormat;
-    static gpu::Element positionElement, normalElement, colorElement;
     if (!streamFormat) {
         streamFormat = std::make_shared<gpu::Stream::Format>(); // 1 for everyone
-        streamFormat->setAttribute(gpu::Stream::POSITION, VERTICES_SLOT, gpu::Element(gpu::VEC3, gpu::FLOAT, gpu::XYZ), 0);
-        streamFormat->setAttribute(gpu::Stream::NORMAL, NORMALS_SLOT, gpu::Element(gpu::VEC3, gpu::FLOAT, gpu::XYZ));
-        streamFormat->setAttribute(gpu::Stream::COLOR, COLOR_SLOT, gpu::Element(gpu::VEC4, gpu::NUINT8, gpu::RGBA));
-        positionElement = streamFormat->getAttributes().at(gpu::Stream::POSITION)._element;
-        normalElement = streamFormat->getAttributes().at(gpu::Stream::NORMAL)._element;
-        colorElement = streamFormat->getAttributes().at(gpu::Stream::COLOR)._element;
+        streamFormat->setAttribute(gpu::Stream::POSITION, gpu::Stream::POSITION, CUBE_POSITION_ELEMENT, 0);
+        streamFormat->setAttribute(gpu::Stream::NORMAL, gpu::Stream::NORMAL, CUBE_NORMAL_ELEMENT);
+        streamFormat->setAttribute(gpu::Stream::COLOR, gpu::Stream::COLOR, CUBE_COLOR_ELEMENT);
     }
-    
-    
-    gpu::BufferView verticesView(verticesBuffer, 0, verticesBuffer->getSize(), VERTEX_STRIDE, positionElement);
-    gpu::BufferView normalsView(verticesBuffer, NORMALS_OFFSET, verticesBuffer->getSize(), VERTEX_STRIDE, normalElement);
-    gpu::BufferView colorView(colorBuffer, streamFormat->getAttributes().at(gpu::Stream::COLOR)._element);
-    
     batch.setInputFormat(streamFormat);
-    batch.setInputBuffer(VERTICES_SLOT, verticesView);
-    batch.setInputBuffer(NORMALS_SLOT, normalsView);
-    batch.setInputBuffer(COLOR_SLOT, colorView);
-    batch.setIndexBuffer(gpu::UINT8, _solidCubeIndexBuffer, 0);
-    batch.drawIndexed(gpu::TRIANGLES, indices);
+
+    gpu::BufferView colorView(colorBuffer, CUBE_COLOR_ELEMENT);
+    batch.setInputBuffer(gpu::Stream::COLOR, colorView);
+
+    gpu::BufferPointer verticesBuffer = getCubeVertices(size);
+    setupCubeVertices(batch, verticesBuffer);
+
+    batch.setIndexBuffer(gpu::UINT8, getSolidCubeIndices(), 0);
+    batch.drawIndexed(gpu::TRIANGLES, CUBE_INDICES);
 }
 
+
 void GeometryCache::renderWireCube(gpu::Batch& batch, float size, const glm::vec4& color) {
     Vec2Pair colorKey(glm::vec2(color.x, color.y),glm::vec2(color.z, color.y));
-    const int FLOATS_PER_VERTEX = 3;
-    const int VERTICES_PER_EDGE = 2;
-    const int TOP_EDGES = 4;
-    const int BOTTOM_EDGES = 4;
-    const int SIDE_EDGES = 4;
-    const int vertices = 8;
-    const int indices = (TOP_EDGES + BOTTOM_EDGES + SIDE_EDGES) * VERTICES_PER_EDGE;
+    static const int WIRE_CUBE_VERTICES_PER_EDGE = 2;
+    static const int WIRE_CUBE_TOP_EDGES = 4;
+    static const int WIRE_CUBE_BOTTOM_EDGES = 4;
+    static const int WIRE_CUBE_SIDE_EDGES = 4;
+    static const int WIRE_CUBE_VERTICES = 8;
+    static const int WIRE_CUBE_INDICES = (WIRE_CUBE_TOP_EDGES + WIRE_CUBE_BOTTOM_EDGES + WIRE_CUBE_SIDE_EDGES) * WIRE_CUBE_VERTICES_PER_EDGE;
 
     if (!_cubeVerticies.contains(size)) {
         auto verticesBuffer = std::make_shared<gpu::Buffer>();
         _cubeVerticies[size] = verticesBuffer;
 
-        int vertexPoints = vertices * FLOATS_PER_VERTEX;
-        GLfloat* vertexData = new GLfloat[vertexPoints]; // only vertices, no normals because we're a wire cube
+        static const int WIRE_CUBE_VERTEX_POINTS = WIRE_CUBE_VERTICES * FLOATS_PER_VERTEX;
+        GLfloat* vertexData = new GLfloat[WIRE_CUBE_VERTEX_POINTS]; // only vertices, no normals because we're a wire cube
         GLfloat* vertex = vertexData;
         float halfSize = size / 2.0f;
         
@@ -834,15 +868,15 @@ void GeometryCache::renderWireCube(gpu::Batch& batch, float size, const glm::vec
                                       1,-1, 1,   1,-1,-1,  -1,-1,-1,  -1,-1, 1    // v4, v5, v6, v7 (bottom)
                                     };
 
-        for (int i = 0; i < vertexPoints; i++) {
+        for (int i = 0; i < WIRE_CUBE_VERTEX_POINTS; i++) {
             vertex[i] = cannonicalVertices[i] * halfSize;
         }
 
-        verticesBuffer->append(sizeof(GLfloat) * vertexPoints, (gpu::Byte*) vertexData); // I'm skeptical that this is right
+        verticesBuffer->append(sizeof(GLfloat) * WIRE_CUBE_VERTEX_POINTS, (gpu::Byte*) vertexData); // I'm skeptical that this is right
     }
 
     if (!_wireCubeIndexBuffer) {
-        static GLubyte cannonicalIndices[indices]  = { 
+        static GLubyte cannonicalIndices[WIRE_CUBE_INDICES] = {
                                       0, 1,  1, 2,  2, 3,  3, 0, // (top)
                                       4, 5,  5, 6,  6, 7,  7, 4, // (bottom)
                                       0, 4,  1, 5,  2, 6,  3, 7, // (side edges)
@@ -890,7 +924,7 @@ void GeometryCache::renderWireCube(gpu::Batch& batch, float size, const glm::vec
     batch.setInputBuffer(VERTICES_SLOT, verticesView);
     batch.setInputBuffer(COLOR_SLOT, colorView);
     batch.setIndexBuffer(gpu::UINT8, _wireCubeIndexBuffer, 0);
-    batch.drawIndexed(gpu::LINES, indices);
+    batch.drawIndexed(gpu::LINES, WIRE_CUBE_INDICES);
 }
 
 void GeometryCache::renderBevelCornersRect(gpu::Batch& batch, int x, int y, int width, int height, int bevelDistance, const glm::vec4& color, int id) {
diff --git a/libraries/render-utils/src/GeometryCache.h b/libraries/render-utils/src/GeometryCache.h
index 3820b58baf..9ba2658a9c 100644
--- a/libraries/render-utils/src/GeometryCache.h
+++ b/libraries/render-utils/src/GeometryCache.h
@@ -131,6 +131,11 @@ public:
     virtual QSharedPointer<Resource> createResource(const QUrl& url, const QSharedPointer<Resource>& fallback,
                                                     bool delayLoad, const void* extra);
 
+    gpu::BufferPointer getCubeVertices(float size);
+    void setupCubeVertices(gpu::Batch& batch, gpu::BufferPointer& verticesBuffer);
+
+    gpu::BufferPointer getSolidCubeIndices();
+
     void renderSphere(gpu::Batch& batch, float radius, int slices, int stacks, const glm::vec3& color, bool solid = true, int id = UNKNOWN_ID) 
                 { renderSphere(batch, radius, slices, stacks, glm::vec4(color, 1.0f), solid, id); }
                 
@@ -139,6 +144,7 @@ public:
     void renderGrid(gpu::Batch& batch, int xDivisions, int yDivisions, const glm::vec4& color);
     void renderGrid(gpu::Batch& batch, int x, int y, int width, int height, int rows, int cols, const glm::vec4& color, int id = UNKNOWN_ID);
 
+    void renderSolidCubeInstances(gpu::Batch& batch, size_t count, gpu::BufferPointer transformBuffer, gpu::BufferPointer colorBuffer);
     void renderSolidCube(gpu::Batch& batch, float size, const glm::vec4& color);
     void renderWireCube(gpu::Batch& batch, float size, const glm::vec4& color);
     void renderBevelCornersRect(gpu::Batch& batch, int x, int y, int width, int height, int bevelDistance, const glm::vec4& color, int id = UNKNOWN_ID);
diff --git a/libraries/render-utils/src/simple.slv b/libraries/render-utils/src/simple.slv
index b22bb36d83..e7fed4a6b4 100644
--- a/libraries/render-utils/src/simple.slv
+++ b/libraries/render-utils/src/simple.slv
@@ -18,6 +18,7 @@
 
 <$declareStandardTransform()$>
 
+uniform bool Instanced = false;
 // the interpolated normal
 
 out vec3 _normal;
@@ -33,6 +34,11 @@ void main(void) {
     // standard transform
     TransformCamera cam = getTransformCamera();
     TransformObject obj = getTransformObject();
-    <$transformModelToClipPos(cam, obj, inPosition, gl_Position)$>
-    <$transformModelToEyeDir(cam, obj, inNormal.xyz, _normal)$>
+    if (Instanced) {
+        <$transformInstancedModelToClipPos(cam, obj, inPosition, gl_Position)$>
+        <$transformInstancedModelToEyeDir(cam, obj, inNormal.xyz, _normal)$>
+    } else {
+        <$transformModelToClipPos(cam, obj, inPosition, gl_Position)$>
+        <$transformModelToEyeDir(cam, obj, inNormal.xyz, _normal)$>
+    }
 }
\ No newline at end of file