Merge pull request #9309 from kencooke/audio-hdr-volume

High Dynamic Range Audio injectors
2025-07-06 15:09:15 +02:00 · 2017-01-05 08:10:54 -08:00 · 2017-01-05 08:10:54 -08:00 · 825fc1cfa1
commit 825fc1cfa1
parent e78582ebae 719c89e800
7 changed files with 104 additions and 65 deletions
--- a/assignment-client/src/audio/AudioMixerSlave.cpp
+++ b/assignment-client/src/audio/AudioMixerSlave.cpp
@ -32,6 +32,7 @@
 #include "AudioMixerClientData.h"
 #include "AvatarAudioStream.h"
 #include "InjectedAudioStream.h"
+#include "AudioHelpers.h"

 #include "AudioMixerSlave.h"

@ -406,63 +407,6 @@ void AudioMixerSlave::addStreamToMix(AudioMixerClientData& listenerNodeData, con
                AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL);
 }

-const int IEEE754_MANT_BITS = 23;
-const int IEEE754_EXPN_BIAS = 127;
-
-//
-// for x  > 0.0f, returns log2(x)
-// for x <= 0.0f, returns large negative value
-//
-// abs |error| < 8e-3, smooth (exact for x=2^N) for NPOLY=3
-// abs |error| < 2e-4, smooth (exact for x=2^N) for NPOLY=5
-// rel |error| < 0.4 from precision loss very close to 1.0f
-//
-static inline float fastlog2(float x) {
-
-    union { float f; int32_t i; } mant, bits = { x };
-
-    // split into mantissa and exponent
-    mant.i = (bits.i & ((1 << IEEE754_MANT_BITS) - 1)) | (IEEE754_EXPN_BIAS << IEEE754_MANT_BITS);
-    int32_t expn = (bits.i >> IEEE754_MANT_BITS) - IEEE754_EXPN_BIAS;
-
-    mant.f -= 1.0f;
-
-    // polynomial for log2(1+x) over x=[0,1]
-    //x = (-0.346555386f * mant.f + 1.346555386f) * mant.f;
-    x = (((-0.0821307180f * mant.f + 0.321188984f) * mant.f - 0.677784014f) * mant.f + 1.43872575f) * mant.f;
-
-    return x + expn;
-}
-
-//
-// for -126 <= x < 128, returns exp2(x)
-//
-// rel |error| < 3e-3, smooth (exact for x=N) for NPOLY=3
-// rel |error| < 9e-6, smooth (exact for x=N) for NPOLY=5
-//
-static inline float fastexp2(float x) {
-
-    union { float f; int32_t i; } xi;
-
-    // bias such that x > 0
-    x += IEEE754_EXPN_BIAS;
-    //x = MAX(x, 1.0f);
-    //x = MIN(x, 254.9999f);
-
-    // split into integer and fraction
-    xi.i = (int32_t)x;
-    x -= xi.i;
-
-    // construct exp2(xi) as a float
-    xi.i <<= IEEE754_MANT_BITS;
-
-    // polynomial for exp2(x) over x=[0,1]
-    //x = (0.339766028f * x + 0.660233972f) * x + 1.0f;
-    x = (((0.0135557472f * x + 0.0520323690f) * x + 0.241379763f) * x + 0.693032121f) * x + 1.0f;
-
-    return x * xi.f;
-}
-
 float AudioMixerSlave::gainForSource(const AvatarAudioStream& listeningNodeStream, const PositionalAudioStream& streamToAdd,
        const glm::vec3& relativePosition, bool isEcho) {
    float gain = 1.0f;
@ -514,7 +458,7 @@ float AudioMixerSlave::gainForSource(const AvatarAudioStream& listeningNodeStrea
        g = (g > 1.0f) ? 1.0f : g;

        // calculate the distance coefficient using the distance to this node
-        float distanceCoefficient = fastexp2(fastlog2(g) * fastlog2(distanceBetween/ATTENUATION_BEGINS_AT_DISTANCE));
+        float distanceCoefficient = fastExp2f(fastLog2f(g) * fastLog2f(distanceBetween/ATTENUATION_BEGINS_AT_DISTANCE));

        // multiply the current attenuation coefficient by the distance coefficient
        gain *= distanceCoefficient;
--- a/libraries/audio/src/AudioInjector.cpp
+++ b/libraries/audio/src/AudioInjector.cpp
@ -25,6 +25,7 @@
 #include "AudioLogging.h"
 #include "SoundCache.h"
 #include "AudioSRC.h"
+#include "AudioHelpers.h"

 int audioInjectorPtrMetaTypeId = qRegisterMetaType<AudioInjector*>();

@ -187,7 +188,7 @@ bool AudioInjector::injectLocally() {
    return success;
 }

-const uchar MAX_INJECTOR_VOLUME = 0xFF;
+const uchar MAX_INJECTOR_VOLUME = packFloatGainToByte(1.0f);
 static const int64_t NEXT_FRAME_DELTA_ERROR_OR_FINISHED = -1;
 static const int64_t NEXT_FRAME_DELTA_IMMEDIATELY = 0;

@ -333,7 +334,7 @@ int64_t AudioInjector::injectNextFrame() {
    _currentPacket->writePrimitive(_options.position);
    _currentPacket->writePrimitive(_options.orientation);

-    quint8 volume = MAX_INJECTOR_VOLUME * _options.volume;
+    quint8 volume = packFloatGainToByte(_options.volume);
    _currentPacket->seek(volumeOptionOffset);
    _currentPacket->writePrimitive(volume);

--- a/libraries/audio/src/AudioInjector.h
+++ b/libraries/audio/src/AudioInjector.h
@ -63,7 +63,7 @@ public:
    AudioFOA& getLocalFOA() { return _localFOA; }

    bool isLocalOnly() const { return _options.localOnly; }
-    float getVolume() const { return glm::clamp(_options.volume, 0.0f, 1.0f); }
+    float getVolume() const { return _options.volume; }
    glm::vec3 getPosition() const { return _options.position; }
    glm::quat getOrientation() const { return _options.orientation; }
    bool isStereo() const { return _options.stereo; }
--- a/libraries/audio/src/InjectedAudioStream.cpp
+++ b/libraries/audio/src/InjectedAudioStream.cpp
@ -18,6 +18,7 @@
 #include <UUID.h>

 #include "InjectedAudioStream.h"
+#include "AudioHelpers.h"

 InjectedAudioStream::InjectedAudioStream(const QUuid& streamIdentifier, bool isStereo, int numStaticJitterFrames) :
    PositionalAudioStream(PositionalAudioStream::Injector, isStereo, numStaticJitterFrames),
@ -25,8 +26,6 @@ InjectedAudioStream::InjectedAudioStream(const QUuid& streamIdentifier, bool isS
    _radius(0.0f),
    _attenuationRatio(0) {} 

-const uchar MAX_INJECTOR_VOLUME = 255;
-
 int InjectedAudioStream::parseStreamProperties(PacketType type,
                                               const QByteArray& packetAfterSeqNum,
                                               int& numAudioSamples) {
@ -62,7 +61,7 @@ int InjectedAudioStream::parseStreamProperties(PacketType type,

    quint8 attenuationByte = 0;
    packetStream >> attenuationByte;
-    _attenuationRatio = attenuationByte / (float)MAX_INJECTOR_VOLUME;
+    _attenuationRatio = unpackFloatGainFromByte(attenuationByte);
    
    packetStream >> _ignorePenumbra;
    
--- a/libraries/networking/src/udt/PacketHeaders.cpp
+++ b/libraries/networking/src/udt/PacketHeaders.cpp
@ -78,7 +78,7 @@ PacketVersion versionForPacketType(PacketType packetType) {
        case PacketType::MicrophoneAudioNoEcho:
        case PacketType::MicrophoneAudioWithEcho:
        case PacketType::AudioStreamStats:
-            return static_cast<PacketVersion>(AudioVersion::SpaceBubbleChanges);
+            return static_cast<PacketVersion>(AudioVersion::HighDynamicRangeVolume);

        default:
            return 17;
--- a/libraries/networking/src/udt/PacketHeaders.h
+++ b/libraries/networking/src/udt/PacketHeaders.h
@ -245,6 +245,7 @@ enum class AudioVersion : PacketVersion {
    TerminatingStreamStats,
    SpaceBubbleChanges,
    HasPersonalMute,
+    HighDynamicRangeVolume,
 };

 #endif // hifi_PacketHeaders_h
--- a/libraries/shared/src/AudioHelpers.h
+++ b/libraries/shared/src/AudioHelpers.h
@ -0,0 +1,94 @@
+//
+//  AudioHelpers.h
+//  libraries/shared/src
+//
+//  Created by Ken Cooke on 1/4/17.
+//  Copyright 2017 High Fidelity, Inc.
+//
+//  Distributed under the Apache License, Version 2.0.
+//  See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html
+//
+
+#ifndef hifi_AudioHelpers_h
+#define hifi_AudioHelpers_h
+
+#include <stdint.h>
+
+const int IEEE754_MANT_BITS = 23;
+const int IEEE754_EXPN_BIAS = 127;
+
+//
+// for x  > 0.0f, returns log2(x)
+// for x <= 0.0f, returns large negative value
+//
+// abs |error| < 2e-4, smooth (exact for x=2^N)
+// rel |error| < 0.4 from precision loss very close to 1.0f
+//
+static inline float fastLog2f(float x) {
+
+    union { float f; int32_t i; } mant, bits = { x };
+
+    // split into mantissa and exponent
+    mant.i = (bits.i & ((1 << IEEE754_MANT_BITS) - 1)) | (IEEE754_EXPN_BIAS << IEEE754_MANT_BITS);
+    int32_t expn = (bits.i >> IEEE754_MANT_BITS) - IEEE754_EXPN_BIAS;
+
+    mant.f -= 1.0f;
+
+    // polynomial for log2(1+x) over x=[0,1]
+    x = (((-0.0821307180f * mant.f + 0.321188984f) * mant.f - 0.677784014f) * mant.f + 1.43872575f) * mant.f;
+
+    return x + expn;
+}
+
+//
+// for -127 <= x < 128, returns exp2(x)
+// otherwise, returns undefined
+//
+// rel |error| < 9e-6, smooth (exact for x=N)
+//
+static inline float fastExp2f(float x) {
+
+    union { float f; int32_t i; } xi;
+
+    // bias such that x > 0
+    x += IEEE754_EXPN_BIAS;
+
+    // split into integer and fraction
+    xi.i = (int32_t)x;
+    x -= xi.i;
+
+    // construct exp2(xi) as a float
+    xi.i <<= IEEE754_MANT_BITS;
+
+    // polynomial for exp2(x) over x=[0,1]
+    x = (((0.0135557472f * x + 0.0520323690f) * x + 0.241379763f) * x + 0.693032121f) * x + 1.0f;
+
+    return x * xi.f;
+}
+
+//
+// Quantize a non-negative gain value to the nearest 0.5dB, and pack to a byte.
+//
+// Values above +30dB are clamped to +30dB
+// Values below -97dB are clamped to -inf
+// Value of 1.0 (+0dB) is reconstructed exactly
+//
+const float GAIN_CONVERSION_RATIO = 2.0f * 6.02059991f; // scale log2 to 0.5dB
+const float GAIN_CONVERSION_OFFSET = 255 - 60.0f;       // translate +30dB to max
+
+static inline uint8_t packFloatGainToByte(float gain) {
+
+    float f = fastLog2f(gain) * GAIN_CONVERSION_RATIO + GAIN_CONVERSION_OFFSET;
+    int32_t i = (int32_t)(f + 0.5f);                    // quantize
+    
+    uint8_t byte = (i < 0) ? 0 : ((i > 255) ? 255 : i); // clamp
+    return byte;
+}
+
+static inline float unpackFloatGainFromByte(uint8_t byte) {
+
+    float gain = (byte == 0) ? 0.0f : fastExp2f((byte - GAIN_CONVERSION_OFFSET) * (1.0f/GAIN_CONVERSION_RATIO));
+    return gain;
+}
+
+#endif // hifi_AudioHelpers_h