new audio edit buffer object / click removal / apply linear fade to sound clip endpoints

2025-04-21 18:44:00 +02:00 · 2014-09-20 04:45:58 -07:00 · 2014-09-20 04:45:58 -07:00 · 47c6ba708f
commit 47c6ba708f
parent 8e6d5da8fd
3 changed files with 133 additions and 1 deletions
--- a/libraries/audio/src/AudioEditBuffer.h
+++ b/libraries/audio/src/AudioEditBuffer.h
@ -0,0 +1,108 @@
+//
+//  AudioEditBuffer.h
+//  hifi
+//
+//  Created by Craig Hansen-Sturm on 8/29/14.
+//  Copyright 2014 High Fidelity, Inc.
+//
+//  Distributed under the Apache License, Version 2.0.
+//  See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html
+//
+
+#ifndef hifi_AudioEditBuffer_h
+#define hifi_AudioEditBuffer_h
+
+template< typename T >
+class AudioEditBuffer 
+    : public AudioFrameBuffer<T> {
+    
+public:
+    
+    AudioEditBuffer() : 
+        AudioFrameBuffer<T>() {
+    }
+    
+    AudioEditBuffer(const uint32_t channelCount, const uint32_t frameCount) : 
+        AudioFrameBuffer<T>(channelCount, frameCount) {
+    }
+    
+    ~AudioEditBuffer() {
+    }
+    
+    bool getZeroCrossing(uint32_t start, bool direction, float32_t epsilon, uint32_t& zero);
+        
+    void linearFade(uint32_t start, uint32_t stop, bool slope);
+    void exponentialFade(uint32_t start, uint32_t stop, bool slope);
+};
+
+template< typename T >
+bool AudioEditBuffer<T>::getZeroCrossing(uint32_t start, bool direction, float32_t epsilon, uint32_t& zero) {
+    
+    zero = this->_frameCount;
+    
+    if (direction) { // scan from the left
+        if (start < this->_frameCount) {
+            for (uint32_t i = start; i < this->_frameCount; ++i) {
+                for (uint32_t j = 0; j < this->_channelCount; ++j) {
+                    if (this->_frameBuffer[j][i] >= -epsilon && this->_frameBuffer[j][i] <= epsilon) {
+                        zero = i;
+                        return true;
+                    }
+                }
+            }
+        }
+    }
+    else { // scan from the right
+        if (start != 0 && start < this->_frameCount) {
+            for (uint32_t i = start; i != 0; --i) {
+                for (uint32_t j = 0; j < this->_channelCount; ++j) {
+                    if (this->_frameBuffer[j][i] >= -epsilon && this->_frameBuffer[j][i] <= epsilon) {
+                        zero = i;  
+                        return true;
+                    }
+                }
+            }
+        }
+    }
+    
+    return false;
+}
+
+template< typename T >
+void AudioEditBuffer<T>::linearFade(uint32_t start, uint32_t stop, bool slope) {
+    
+    if (start >= stop || start > this->_frameCount || stop > this->_frameCount ) {
+        return;
+    }
+
+    uint32_t count = stop - start;
+    float32_t delta;
+    float32_t gain;
+    
+    if (slope) { // 0.0 to 1.0f in delta increments
+        delta = 1.0f / (float32_t)count;
+        gain = 0.0f;
+    }
+    else { // 1.0f to 0.0f in delta increments
+        delta = -1.0f / (float32_t)count;
+        gain = 1.0f;
+    }
+    
+    for (uint32_t i = start; i < stop; ++i) {
+        for (uint32_t j = 0; j < this->_channelCount; ++j) {
+            this->_frameBuffer[j][i] *= gain;
+            gain += delta;
+        }
+    }
+}
+
+template< typename T >
+void AudioEditBuffer<T>::exponentialFade(uint32_t start, uint32_t stop, bool slope) {
+    // TBD
+}
+
+typedef AudioEditBuffer< float32_t > AudioEditBufferFloat32;
+typedef AudioEditBuffer< int32_t > AudioEditBufferSInt32;
+
+#endif // hifi_AudioEditBuffer_h
+
--- a/libraries/audio/src/Sound.cpp
+++ b/libraries/audio/src/Sound.cpp
@ -24,6 +24,9 @@
 #include <SharedUtil.h>

 #include "AudioRingBuffer.h"
+#include "AudioFormat.h"
+#include "AudioBuffer.h"
+#include "AudioEditBuffer.h"
 #include "Sound.h"

 // procedural audio version of Sound
@ -120,6 +123,7 @@ void Sound::replyFinished() {
            //  Process as RAW file
            downSample(rawAudioByteArray);
        }
+        trimFrames();
    } else {
        qDebug() << "Network reply without 'Content-Type'.";
    }
@ -133,7 +137,6 @@ void Sound::replyError(QNetworkReply::NetworkError code) {
 }

 void Sound::downSample(const QByteArray& rawAudioByteArray) {
-
    // assume that this was a RAW file and is now an array of samples that are
    // signed, 16-bit, 48Khz, mono

@ -155,6 +158,26 @@ void Sound::downSample(const QByteArray& rawAudioByteArray) {
    }
 }

+void Sound::trimFrames() {
+    
+    const uint32_t inputFrameCount = _byteArray.size() / sizeof(int16_t);
+    const uint32_t trimCount = 1024;  // number of leading and trailing frames to trim
+    
+    if (inputFrameCount <= (2 * trimCount)) {
+        return;
+    }
+    
+    int16_t* inputFrameData = (int16_t*)_byteArray.data();
+
+    AudioEditBufferFloat32 editBuffer(1, inputFrameCount);
+    editBuffer.copyFrames(1, inputFrameCount, inputFrameData, false /*copy in*/);
+    
+    editBuffer.linearFade(0, trimCount, true);
+    editBuffer.linearFade(inputFrameCount - trimCount, inputFrameCount, false);
+    
+    editBuffer.copyFrames(1, inputFrameCount, inputFrameData, true /*copy out*/);
+}
+
 //
 // Format description from https://ccrma.stanford.edu/courses/422/projects/WaveFormat/
 //
--- a/libraries/audio/src/Sound.h
+++ b/libraries/audio/src/Sound.h
@ -33,6 +33,7 @@ private:
    QByteArray _byteArray;
    bool _hasDownloaded;
    
+    void trimFrames();
    void downSample(const QByteArray& rawAudioByteArray);
    void interpretAsWav(const QByteArray& inputAudioByteArray, QByteArray& outputAudioByteArray);