Merge pull request #6403 from jherico/rec3

Recording fixes
2025-04-20 18:23:54 +02:00 · 2015-11-17 17:52:15 -08:00 · 2015-11-17 17:52:15 -08:00 · 733d93039f
commit 733d93039f
parent b340a99d69 4e57c9114c
8 changed files with 154 additions and 135 deletions
--- a/interface/src/scripting/RecordingScriptingInterface.cpp
+++ b/interface/src/scripting/RecordingScriptingInterface.cpp
@ -47,19 +47,19 @@ RecordingScriptingInterface::RecordingScriptingInterface() {
    connect(audioClient.data(), &AudioClient::inputReceived, this, &RecordingScriptingInterface::processAudioInput);
 }

-bool RecordingScriptingInterface::isPlaying() {
+bool RecordingScriptingInterface::isPlaying() const {
    return _player->isPlaying();
 }

-bool RecordingScriptingInterface::isPaused() {
+bool RecordingScriptingInterface::isPaused() const {
    return _player->isPaused();
 }

-float RecordingScriptingInterface::playerElapsed() {
+float RecordingScriptingInterface::playerElapsed() const {
    return _player->position();
 }

-float RecordingScriptingInterface::playerLength() {
+float RecordingScriptingInterface::playerLength() const {
    return _player->length();
 }

@ -103,6 +103,10 @@ void RecordingScriptingInterface::setPlayerAudioOffset(float audioOffset) {
 }

 void RecordingScriptingInterface::setPlayerTime(float time) {
+    if (QThread::currentThread() != thread()) {
+        QMetaObject::invokeMethod(this, "setPlayerTime", Qt::BlockingQueuedConnection, Q_ARG(float, time));
+        return;
+    }
    _player->seek(time);
 }

@ -130,23 +134,27 @@ void RecordingScriptingInterface::setPlayerUseSkeletonModel(bool useSkeletonMode
    _useSkeletonModel = useSkeletonModel;
 }

-void RecordingScriptingInterface::play() {
-    _player->play();
-}
-
 void RecordingScriptingInterface::pausePlayer() {
+    if (QThread::currentThread() != thread()) {
+        QMetaObject::invokeMethod(this, "pausePlayer", Qt::BlockingQueuedConnection);
+        return;
+    }
    _player->pause();
 }

 void RecordingScriptingInterface::stopPlaying() {
+    if (QThread::currentThread() != thread()) {
+        QMetaObject::invokeMethod(this, "stopPlaying", Qt::BlockingQueuedConnection);
+        return;
+    }
    _player->stop();
 }

-bool RecordingScriptingInterface::isRecording() {
+bool RecordingScriptingInterface::isRecording() const {
    return _recorder->isRecording();
 }

-float RecordingScriptingInterface::recorderElapsed() {
+float RecordingScriptingInterface::recorderElapsed() const {
    return _recorder->position();
 }

@ -162,26 +170,15 @@ void RecordingScriptingInterface::startRecording() {
    }

    _recordingEpoch = Frame::epochForFrameTime(0);
-
-    auto myAvatar = DependencyManager::get<AvatarManager>()->getMyAvatar();
-    myAvatar->setRecordingBasis();
+    DependencyManager::get<AvatarManager>()->getMyAvatar()->setRecordingBasis();
    _recorder->start();
 }

 void RecordingScriptingInterface::stopRecording() {
    _recorder->stop();
-
    _lastClip = _recorder->getClip();
-    // post-process the audio into discreet chunks based on times of received samples
    _lastClip->seek(0);
-    Frame::ConstPointer frame;
-    while (frame = _lastClip->nextFrame()) {
-        qDebug() << "Frame time " << frame->timeOffset << " size " << frame->data.size();
-    }
-    _lastClip->seek(0);
-
-    auto myAvatar = DependencyManager::get<AvatarManager>()->getMyAvatar();
-    myAvatar->clearRecordingBasis();
+    DependencyManager::get<AvatarManager>()->getMyAvatar()->clearRecordingBasis();
 }

 void RecordingScriptingInterface::saveRecording(const QString& filename) {
--- a/interface/src/scripting/RecordingScriptingInterface.h
+++ b/interface/src/scripting/RecordingScriptingInterface.h
@ -25,12 +25,17 @@ public:
    RecordingScriptingInterface();

 public slots:
-    bool isPlaying();
-    bool isPaused();
-    float playerElapsed();
-    float playerLength();
    void loadRecording(const QString& filename);
+
    void startPlaying();
+    void pausePlayer();
+    void stopPlaying();
+    bool isPlaying() const;
+    bool isPaused() const;
+
+    float playerElapsed() const;
+    float playerLength() const;
+
    void setPlayerVolume(float volume);
    void setPlayerAudioOffset(float audioOffset);
    void setPlayerTime(float time);
@ -40,13 +45,13 @@ public slots:
    void setPlayerUseAttachments(bool useAttachments);
    void setPlayerUseHeadModel(bool useHeadModel);
    void setPlayerUseSkeletonModel(bool useSkeletonModel);
-    void play();
-    void pausePlayer();
-    void stopPlaying();
-    bool isRecording();
-    float recorderElapsed();
+
    void startRecording();
    void stopRecording();
+    bool isRecording() const;
+
+    float recorderElapsed() const;
+
    void saveRecording(const QString& filename);
    void loadLastRecording();

--- a/libraries/audio-client/src/AudioClient.cpp
+++ b/libraries/audio-client/src/AudioClient.cpp
@ -743,19 +743,9 @@ void AudioClient::handleLocalEchoAndReverb(QByteArray& inputByteArray) {
 }

 void AudioClient::handleAudioInput() {
-    if (!_audioPacket) {
-        // we don't have an audioPacket yet - set that up now
-        _audioPacket = NLPacket::create(PacketType::MicrophoneAudioNoEcho);
-    }
-
    const float inputToNetworkInputRatio = calculateDeviceToNetworkInputRatio();
-    
    const int inputSamplesRequired = (int)((float)AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL * inputToNetworkInputRatio);
    const auto inputAudioSamples = std::unique_ptr<int16_t[]>(new int16_t[inputSamplesRequired]);
-
-    static const int leadingBytes = sizeof(quint16) + sizeof(glm::vec3) + sizeof(glm::quat) + sizeof(quint8);
-    int16_t* const networkAudioSamples = (int16_t*)(_audioPacket->getPayload() + leadingBytes);
-
    QByteArray inputByteArray = _inputDevice->readAll();

    //  Add audio source injection if enabled
@ -784,30 +774,30 @@ void AudioClient::handleAudioInput() {
    float audioInputMsecsRead = inputByteArray.size() / (float)(_inputFormat.bytesForDuration(USECS_PER_MSEC));
    _stats.updateInputMsecsRead(audioInputMsecsRead);

-    while (_inputRingBuffer.samplesAvailable() >= inputSamplesRequired) {
+    const int numNetworkBytes = _isStereoInput
+        ? AudioConstants::NETWORK_FRAME_BYTES_STEREO
+        : AudioConstants::NETWORK_FRAME_BYTES_PER_CHANNEL;
+    const int numNetworkSamples = _isStereoInput
+        ? AudioConstants::NETWORK_FRAME_SAMPLES_STEREO
+        : AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL;

-        const int numNetworkBytes = _isStereoInput
-            ? AudioConstants::NETWORK_FRAME_BYTES_STEREO
-            : AudioConstants::NETWORK_FRAME_BYTES_PER_CHANNEL;
-        const int numNetworkSamples = _isStereoInput
-            ? AudioConstants::NETWORK_FRAME_SAMPLES_STEREO
-            : AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL;
+    static int16_t networkAudioSamples[AudioConstants::NETWORK_FRAME_SAMPLES_STEREO];
+
+    while (_inputRingBuffer.samplesAvailable() >= inputSamplesRequired) {

        if (!_muted) {

-            // zero out the monoAudioSamples array and the locally injected audio
-            memset(networkAudioSamples, 0, numNetworkBytes);

            //  Increment the time since the last clip
            if (_timeSinceLastClip >= 0.0f) {
-                _timeSinceLastClip += (float) numNetworkSamples / (float) AudioConstants::SAMPLE_RATE;
+                _timeSinceLastClip += (float)numNetworkSamples / (float)AudioConstants::SAMPLE_RATE;
            }

            _inputRingBuffer.readSamples(inputAudioSamples.get(), inputSamplesRequired);
            possibleResampling(_inputToNetworkResampler,
-                               inputAudioSamples.get(), networkAudioSamples,
-                               inputSamplesRequired, numNetworkSamples,
-                               _inputFormat, _desiredInputFormat);
+                inputAudioSamples.get(), networkAudioSamples,
+                inputSamplesRequired, numNetworkSamples,
+                _inputFormat, _desiredInputFormat);

            //  Remove DC offset
            if (!_isStereoInput && !_audioSourceInjectEnabled) {
@ -829,7 +819,7 @@ void AudioClient::handleAudioInput() {

                for (int i = 0; i < numNetworkSamples; i++) {
                    int thisSample = std::abs(networkAudioSamples[i]);
-                    loudness += (float) thisSample;
+                    loudness += (float)thisSample;

                    if (thisSample > (AudioConstants::MAX_SAMPLE_VALUE * AudioNoiseGate::CLIPPING_THRESHOLD)) {
                        _timeSinceLastClip = 0.0f;
@ -839,7 +829,7 @@ void AudioClient::handleAudioInput() {
                _lastInputLoudness = fabs(loudness / numNetworkSamples);
            }

-            emit inputReceived({reinterpret_cast<char*>(networkAudioSamples), numNetworkBytes});
+            emit inputReceived({ reinterpret_cast<char*>(networkAudioSamples), numNetworkBytes });

        } else {
            // our input loudness is 0, since we're muted
@ -849,14 +839,38 @@ void AudioClient::handleAudioInput() {
            _inputRingBuffer.shiftReadPosition(inputSamplesRequired);
        }

-        auto nodeList = DependencyManager::get<NodeList>();
-        SharedNodePointer audioMixer = nodeList->soloNodeOfType(NodeType::AudioMixer);
+        emitAudioPacket(networkAudioSamples);
+    }
+}

-        if (audioMixer && audioMixer->getActiveSocket()) {
-            glm::vec3 headPosition = _positionGetter();
-            glm::quat headOrientation = _orientationGetter();
-            quint8 isStereo = _isStereoInput ? 1 : 0;
+void AudioClient::emitAudioPacket(const int16_t* audioData, PacketType packetType) {
+    static std::mutex _mutex;
+    using Locker = std::unique_lock<std::mutex>;

+    // FIXME recorded audio isn't guaranteed to have the same stereo state
+    // as the current system
+    const int numNetworkBytes = _isStereoInput
+        ? AudioConstants::NETWORK_FRAME_BYTES_STEREO
+        : AudioConstants::NETWORK_FRAME_BYTES_PER_CHANNEL;
+    const int numNetworkSamples = _isStereoInput
+        ? AudioConstants::NETWORK_FRAME_SAMPLES_STEREO
+        : AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL;
+
+    auto nodeList = DependencyManager::get<NodeList>();
+    SharedNodePointer audioMixer = nodeList->soloNodeOfType(NodeType::AudioMixer);
+
+    if (audioMixer && audioMixer->getActiveSocket()) {
+        Locker lock(_mutex);
+        if (!_audioPacket) {
+            // we don't have an audioPacket yet - set that up now
+            _audioPacket = NLPacket::create(PacketType::MicrophoneAudioWithEcho);
+        }
+
+        glm::vec3 headPosition = _positionGetter();
+        glm::quat headOrientation = _orientationGetter();
+        quint8 isStereo = _isStereoInput ? 1 : 0;
+
+        if (packetType == PacketType::Unknown) {
            if (_lastInputLoudness == 0) {
                _audioPacket->setType(PacketType::SilentAudioFrame);
            } else {
@ -866,70 +880,52 @@ void AudioClient::handleAudioInput() {
                    _audioPacket->setType(PacketType::MicrophoneAudioNoEcho);
                }
            }
-
-            // reset the audio packet so we can start writing
-            _audioPacket->reset();
-
-            // write sequence number
-            _audioPacket->writePrimitive(_outgoingAvatarAudioSequenceNumber);
-
-            if (_audioPacket->getType() == PacketType::SilentAudioFrame) {
-                // pack num silent samples
-                quint16 numSilentSamples = numNetworkSamples;
-                _audioPacket->writePrimitive(numSilentSamples);
-            } else {
-                // set the mono/stereo byte
-                _audioPacket->writePrimitive(isStereo);
-            }
-
-            // pack the three float positions
-            _audioPacket->writePrimitive(headPosition);
-
-            // pack the orientation
-            _audioPacket->writePrimitive(headOrientation);
-            
-            if (_audioPacket->getType() != PacketType::SilentAudioFrame) {
-                // audio samples have already been packed (written to networkAudioSamples)
-                _audioPacket->setPayloadSize(_audioPacket->getPayloadSize() + numNetworkBytes);
-            }
-            
-            _stats.sentPacket();
-
-            nodeList->flagTimeForConnectionStep(LimitedNodeList::ConnectionStep::SendAudioPacket);
-
-            nodeList->sendUnreliablePacket(*_audioPacket, *audioMixer);
-
-            _outgoingAvatarAudioSequenceNumber++;
+        } else {
+            _audioPacket->setType(packetType);
        }
+
+        // reset the audio packet so we can start writing
+        _audioPacket->reset();
+
+        // write sequence number
+        _audioPacket->writePrimitive(_outgoingAvatarAudioSequenceNumber);
+
+        if (_audioPacket->getType() == PacketType::SilentAudioFrame) {
+            // pack num silent samples
+            quint16 numSilentSamples = numNetworkSamples;
+            _audioPacket->writePrimitive(numSilentSamples);
+        } else {
+            // set the mono/stereo byte
+            _audioPacket->writePrimitive(isStereo);
+        }
+
+        // pack the three float positions
+        _audioPacket->writePrimitive(headPosition);
+
+        // pack the orientation
+        _audioPacket->writePrimitive(headOrientation);
+
+        if (_audioPacket->getType() != PacketType::SilentAudioFrame) {
+            // audio samples have already been packed (written to networkAudioSamples)
+            _audioPacket->setPayloadSize(_audioPacket->getPayloadSize() + numNetworkBytes);
+        }
+
+        static const int leadingBytes = sizeof(quint16) + sizeof(glm::vec3) + sizeof(glm::quat) + sizeof(quint8);
+        int16_t* const networkAudioSamples = (int16_t*)(_audioPacket->getPayload() + leadingBytes);
+        memcpy(networkAudioSamples, audioData, numNetworkBytes);
+
+        _stats.sentPacket();
+
+        nodeList->flagTimeForConnectionStep(LimitedNodeList::ConnectionStep::SendAudioPacket);
+
+        nodeList->sendUnreliablePacket(*_audioPacket, *audioMixer);
+
+        _outgoingAvatarAudioSequenceNumber++;
    }
 }

 void AudioClient::handleRecordedAudioInput(const QByteArray& audio) {
-    if (!_audioPacket) {
-        // we don't have an audioPacket yet - set that up now
-        _audioPacket = NLPacket::create(PacketType::MicrophoneAudioWithEcho);
-    }
-
-    // FIXME either discard stereo in the recording or record a stereo flag
-
-    auto nodeList = DependencyManager::get<NodeList>();
-    SharedNodePointer audioMixer = nodeList->soloNodeOfType(NodeType::AudioMixer);
-    if (audioMixer && audioMixer->getActiveSocket()) {
-        glm::vec3 headPosition = _positionGetter();
-        glm::quat headOrientation = _orientationGetter();
-        quint8 isStereo = _isStereoInput ? 1 : 0;
-        _audioPacket->reset();
-        _audioPacket->setType(PacketType::MicrophoneAudioWithEcho);
-        _audioPacket->writePrimitive(_outgoingAvatarAudioSequenceNumber);
-        _audioPacket->writePrimitive(isStereo);
-        _audioPacket->writePrimitive(headPosition);
-        _audioPacket->writePrimitive(headOrientation);
-        _audioPacket->write(audio);
-        _stats.sentPacket();
-        nodeList->flagTimeForConnectionStep(LimitedNodeList::ConnectionStep::SendAudioPacket);
-        nodeList->sendUnreliablePacket(*_audioPacket, *audioMixer);
-        _outgoingAvatarAudioSequenceNumber++;
-    }
+    emitAudioPacket((int16_t*)audio.data(), PacketType::MicrophoneAudioWithEcho);
 }

 void AudioClient::processReceivedSamples(const QByteArray& inputBuffer, QByteArray& outputBuffer) {
--- a/libraries/audio-client/src/AudioClient.h
+++ b/libraries/audio-client/src/AudioClient.h
@ -212,6 +212,7 @@ protected:
    }

 private:
+    void emitAudioPacket(const int16_t* audioData, PacketType packetType = PacketType::Unknown);
    void outputFormatChanged();

    QByteArray firstInputFrame;
--- a/libraries/avatars/CMakeLists.txt
+++ b/libraries/avatars/CMakeLists.txt
@ -1,3 +1,3 @@
 set(TARGET_NAME avatars)
 setup_hifi_library(Network Script)
-link_hifi_libraries(audio shared networking recording)
+link_hifi_libraries(shared networking)
--- a/libraries/avatars/src/AvatarData.cpp
+++ b/libraries/avatars/src/AvatarData.cpp
@ -33,7 +33,6 @@
 #include <StreamUtils.h>
 #include <UUID.h>
 #include <shared/JSONHelpers.h>
-#include <recording/Frame.h>

 #include "AvatarLogging.h"

@ -1443,14 +1442,10 @@ QByteArray AvatarData::toFrame(const AvatarData& avatar) {

    auto recordingBasis = avatar.getRecordingBasis();
    if (recordingBasis) {
+        root[JSON_AVATAR_BASIS] = Transform::toJson(*recordingBasis);
        // Find the relative transform
        auto relativeTransform = recordingBasis->relativeTransform(avatar.getTransform());
-
-        // if the resulting relative basis is identity, we shouldn't record anything
-        if (!relativeTransform.isIdentity()) {
-            root[JSON_AVATAR_RELATIVE] = Transform::toJson(relativeTransform);
-            root[JSON_AVATAR_BASIS] = Transform::toJson(*recordingBasis);
-        }
+        root[JSON_AVATAR_RELATIVE] = Transform::toJson(relativeTransform);
    } else {
        root[JSON_AVATAR_RELATIVE] = Transform::toJson(avatar.getTransform());
    }
@ -1484,6 +1479,9 @@ QByteArray AvatarData::toFrame(const AvatarData& avatar) {

 void AvatarData::fromFrame(const QByteArray& frameData, AvatarData& result) {
    QJsonDocument doc = QJsonDocument::fromBinaryData(frameData);
+#ifdef WANT_JSON_DEBUG
+    qDebug() << doc.toJson(QJsonDocument::JsonFormat::Indented);
+#endif
    QJsonObject root = doc.object();

    if (root.contains(JSON_AVATAR_HEAD_MODEL)) {
--- a/libraries/avatars/src/AvatarData.h
+++ b/libraries/avatars/src/AvatarData.h
@ -50,7 +50,6 @@ typedef unsigned long long quint64;
 #include <Node.h>
 #include <RegisteredMetaTypes.h>
 #include <SimpleMovingAverage.h>
-#include <recording/Forward.h>

 #include "AABox.h"
 #include "HandData.h"
--- a/libraries/recording/src/recording/Deck.cpp
+++ b/libraries/recording/src/recording/Deck.cpp
@ -8,6 +8,8 @@

 #include "Deck.h"
 
+#include <QtCore/QThread>
+
 #include <NumericalConstants.h>
 #include <SharedUtil.h>

@ -101,9 +103,13 @@ float Deck::position() const {
 }

 static const Frame::Time MIN_FRAME_WAIT_INTERVAL = Frame::secondsToFrameTime(0.001f);
-static const Frame::Time MAX_FRAME_PROCESSING_TIME = Frame::secondsToFrameTime(0.002f);
+static const Frame::Time MAX_FRAME_PROCESSING_TIME = Frame::secondsToFrameTime(0.004f);

 void Deck::processFrames() {
+    if (qApp->thread() != QThread::currentThread()) {
+        qWarning() << "Processing frames must only happen on the main thread.";
+        return;
+    }
    Locker lock(_mutex);
    if (_pause) {
        return;
@ -115,10 +121,17 @@ void Deck::processFrames() {
    // FIXME add code to start dropping frames if we fall behind.
    // Alternatively, add code to cache frames here and then process only the last frame of a given type
    // ... the latter will work for Avatar, but not well for audio I suspect.
+    bool overLimit = false;
    for (nextClip = getNextClip(); nextClip; nextClip = getNextClip()) {
        auto currentPosition = Frame::frameTimeFromEpoch(_startEpoch);
        if ((currentPosition - startingPosition) >= MAX_FRAME_PROCESSING_TIME) {
            qCWarning(recordingLog) << "Exceeded maximum frame processing time, breaking early";
+#ifdef WANT_RECORDING_DEBUG            
+            qCDebug(recordingLog) << "Starting: " << currentPosition;
+            qCDebug(recordingLog) << "Current:  " << startingPosition;
+            qCDebug(recordingLog) << "Trigger:  " << triggerPosition;
+#endif
+            overLimit = true;
            break;
        }

@ -150,9 +163,19 @@ void Deck::processFrames() {

    // If we have more clip frames available, set the timer for the next one
    _position = Frame::frameTimeFromEpoch(_startEpoch);
-    auto nextFrameTime = nextClip->positionFrameTime();
-    auto interval = Frame::frameTimeToMilliseconds(nextFrameTime - _position);
-    _timer.singleShot(interval, [this] {
+    int nextInterval = 1;
+    if (!overLimit) {
+        auto nextFrameTime = nextClip->positionFrameTime();
+        nextInterval = (int)Frame::frameTimeToMilliseconds(nextFrameTime - _position);
+#ifdef WANT_RECORDING_DEBUG
+        qCDebug(recordingLog) << "Now " << _position;
+        qCDebug(recordingLog) << "Next frame time " << nextInterval;
+#endif
+    }
+#ifdef WANT_RECORDING_DEBUG
+    qCDebug(recordingLog) << "Setting timer for next processing " << nextInterval;
+#endif
+    _timer.singleShot(nextInterval, [this] {
        processFrames();
    });
 }