Merge pull request #6403 from jherico/rec3

Recording fixes
2025-08-10 13:33:27 +02:00 · 2015-11-17 17:52:15 -08:00 · 2015-11-17 17:52:15 -08:00 · 733d93039f
commit 733d93039f
parent b340a99d69 4e57c9114c
8 changed files with 154 additions and 135 deletions
--- a/interface/src/scripting/RecordingScriptingInterface.cpp
+++ b/interface/src/scripting/RecordingScriptingInterface.cpp
@ -47,19 +47,19 @@ RecordingScriptingInterface::RecordingScriptingInterface() {
    connect(audioClient.data(), &AudioClient::inputReceived, this, &RecordingScriptingInterface::processAudioInput);
 }
-bool RecordingScriptingInterface::isPlaying() {
+bool RecordingScriptingInterface::isPlaying() const {
    return _player->isPlaying();
 }
-bool RecordingScriptingInterface::isPaused() {
+bool RecordingScriptingInterface::isPaused() const {
    return _player->isPaused();
 }
-float RecordingScriptingInterface::playerElapsed() {
+float RecordingScriptingInterface::playerElapsed() const {
    return _player->position();
 }
-float RecordingScriptingInterface::playerLength() {
+float RecordingScriptingInterface::playerLength() const {
    return _player->length();
 }
@ -103,6 +103,10 @@ void RecordingScriptingInterface::setPlayerAudioOffset(float audioOffset) {
 }
 void RecordingScriptingInterface::setPlayerTime(float time) {
    if (QThread::currentThread() != thread()) {
        QMetaObject::invokeMethod(this, "setPlayerTime", Qt::BlockingQueuedConnection, Q_ARG(float, time));
        return;
    }
    _player->seek(time);
 }
@ -130,23 +134,27 @@ void RecordingScriptingInterface::setPlayerUseSkeletonModel(bool useSkeletonMode
    _useSkeletonModel = useSkeletonModel;
 }
 void RecordingScriptingInterface::play() {
    _player->play();
 }
 void RecordingScriptingInterface::pausePlayer() {
    if (QThread::currentThread() != thread()) {
        QMetaObject::invokeMethod(this, "pausePlayer", Qt::BlockingQueuedConnection);
        return;
    }
    _player->pause();
 }
 void RecordingScriptingInterface::stopPlaying() {
    if (QThread::currentThread() != thread()) {
        QMetaObject::invokeMethod(this, "stopPlaying", Qt::BlockingQueuedConnection);
        return;
    }
    _player->stop();
 }
-bool RecordingScriptingInterface::isRecording() {
+bool RecordingScriptingInterface::isRecording() const {
    return _recorder->isRecording();
 }
-float RecordingScriptingInterface::recorderElapsed() {
+float RecordingScriptingInterface::recorderElapsed() const {
    return _recorder->position();
 }
@ -162,26 +170,15 @@ void RecordingScriptingInterface::startRecording() {
    }
    _recordingEpoch = Frame::epochForFrameTime(0);
-
+    DependencyManager::get<AvatarManager>()->getMyAvatar()->setRecordingBasis();
    auto myAvatar = DependencyManager::get<AvatarManager>()->getMyAvatar();
    myAvatar->setRecordingBasis();
    _recorder->start();
 }
 void RecordingScriptingInterface::stopRecording() {
    _recorder->stop();
    _lastClip = _recorder->getClip();
    // post-process the audio into discreet chunks based on times of received samples
    _lastClip->seek(0);
-    Frame::ConstPointer frame;
+    DependencyManager::get<AvatarManager>()->getMyAvatar()->clearRecordingBasis();
    while (frame = _lastClip->nextFrame()) {
        qDebug() << "Frame time " << frame->timeOffset << " size " << frame->data.size();
    }
    _lastClip->seek(0);
    auto myAvatar = DependencyManager::get<AvatarManager>()->getMyAvatar();
    myAvatar->clearRecordingBasis();
 }
 void RecordingScriptingInterface::saveRecording(const QString& filename) {
--- a/interface/src/scripting/RecordingScriptingInterface.h
+++ b/interface/src/scripting/RecordingScriptingInterface.h
@ -25,12 +25,17 @@ public:
    RecordingScriptingInterface();
 public slots:
    bool isPlaying();
    bool isPaused();
    float playerElapsed();
    float playerLength();
    void loadRecording(const QString& filename);
    void startPlaying();
    void pausePlayer();
    void stopPlaying();
    bool isPlaying() const;
    bool isPaused() const;
    float playerElapsed() const;
    float playerLength() const;
    void setPlayerVolume(float volume);
    void setPlayerAudioOffset(float audioOffset);
    void setPlayerTime(float time);
@ -40,13 +45,13 @@ public slots:
    void setPlayerUseAttachments(bool useAttachments);
    void setPlayerUseHeadModel(bool useHeadModel);
    void setPlayerUseSkeletonModel(bool useSkeletonModel);
-    void play();
+
    void pausePlayer();
    void stopPlaying();
    bool isRecording();
    float recorderElapsed();
    void startRecording();
    void stopRecording();
    bool isRecording() const;
    float recorderElapsed() const;
    void saveRecording(const QString& filename);
    void loadLastRecording();
--- a/libraries/audio-client/src/AudioClient.cpp
+++ b/libraries/audio-client/src/AudioClient.cpp
@ -743,19 +743,9 @@ void AudioClient::handleLocalEchoAndReverb(QByteArray& inputByteArray) {
 }
 void AudioClient::handleAudioInput() {
    if (!_audioPacket) {
        // we don't have an audioPacket yet - set that up now
        _audioPacket = NLPacket::create(PacketType::MicrophoneAudioNoEcho);
    }
    const float inputToNetworkInputRatio = calculateDeviceToNetworkInputRatio();
    const int inputSamplesRequired = (int)((float)AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL * inputToNetworkInputRatio);
    const auto inputAudioSamples = std::unique_ptr<int16_t[]>(new int16_t[inputSamplesRequired]);
    static const int leadingBytes = sizeof(quint16) + sizeof(glm::vec3) + sizeof(glm::quat) + sizeof(quint8);
    int16_t* const networkAudioSamples = (int16_t*)(_audioPacket->getPayload() + leadingBytes);
    QByteArray inputByteArray = _inputDevice->readAll();
    //  Add audio source injection if enabled
@ -784,30 +774,30 @@ void AudioClient::handleAudioInput() {
    float audioInputMsecsRead = inputByteArray.size() / (float)(_inputFormat.bytesForDuration(USECS_PER_MSEC));
    _stats.updateInputMsecsRead(audioInputMsecsRead);
-    while (_inputRingBuffer.samplesAvailable() >= inputSamplesRequired) {
+    const int numNetworkBytes = _isStereoInput
        ? AudioConstants::NETWORK_FRAME_BYTES_STEREO
        : AudioConstants::NETWORK_FRAME_BYTES_PER_CHANNEL;
    const int numNetworkSamples = _isStereoInput
        ? AudioConstants::NETWORK_FRAME_SAMPLES_STEREO
        : AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL;
-        const int numNetworkBytes = _isStereoInput
+    static int16_t networkAudioSamples[AudioConstants::NETWORK_FRAME_SAMPLES_STEREO];
-            ? AudioConstants::NETWORK_FRAME_BYTES_STEREO
+
-            : AudioConstants::NETWORK_FRAME_BYTES_PER_CHANNEL;
+    while (_inputRingBuffer.samplesAvailable() >= inputSamplesRequired) {
        const int numNetworkSamples = _isStereoInput
            ? AudioConstants::NETWORK_FRAME_SAMPLES_STEREO
            : AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL;
        if (!_muted) {
            // zero out the monoAudioSamples array and the locally injected audio
            memset(networkAudioSamples, 0, numNetworkBytes);
            //  Increment the time since the last clip
            if (_timeSinceLastClip >= 0.0f) {
-                _timeSinceLastClip += (float) numNetworkSamples / (float) AudioConstants::SAMPLE_RATE;
+                _timeSinceLastClip += (float)numNetworkSamples / (float)AudioConstants::SAMPLE_RATE;
            }
            _inputRingBuffer.readSamples(inputAudioSamples.get(), inputSamplesRequired);
            possibleResampling(_inputToNetworkResampler,
-                               inputAudioSamples.get(), networkAudioSamples,
+                inputAudioSamples.get(), networkAudioSamples,
-                               inputSamplesRequired, numNetworkSamples,
+                inputSamplesRequired, numNetworkSamples,
-                               _inputFormat, _desiredInputFormat);
+                _inputFormat, _desiredInputFormat);
            //  Remove DC offset
            if (!_isStereoInput && !_audioSourceInjectEnabled) {
@ -829,7 +819,7 @@ void AudioClient::handleAudioInput() {
                for (int i = 0; i < numNetworkSamples; i++) {
                    int thisSample = std::abs(networkAudioSamples[i]);
-                    loudness += (float) thisSample;
+                    loudness += (float)thisSample;
                    if (thisSample > (AudioConstants::MAX_SAMPLE_VALUE * AudioNoiseGate::CLIPPING_THRESHOLD)) {
                        _timeSinceLastClip = 0.0f;
@ -839,7 +829,7 @@ void AudioClient::handleAudioInput() {
                _lastInputLoudness = fabs(loudness / numNetworkSamples);
            }
-            emit inputReceived({reinterpret_cast<char*>(networkAudioSamples), numNetworkBytes});
+            emit inputReceived({ reinterpret_cast<char*>(networkAudioSamples), numNetworkBytes });
        } else {
            // our input loudness is 0, since we're muted
@ -849,14 +839,38 @@ void AudioClient::handleAudioInput() {
            _inputRingBuffer.shiftReadPosition(inputSamplesRequired);
        }
-        auto nodeList = DependencyManager::get<NodeList>();
+        emitAudioPacket(networkAudioSamples);
-        SharedNodePointer audioMixer = nodeList->soloNodeOfType(NodeType::AudioMixer);
+    }
 }
-        if (audioMixer && audioMixer->getActiveSocket()) {
+void AudioClient::emitAudioPacket(const int16_t* audioData, PacketType packetType) {
-            glm::vec3 headPosition = _positionGetter();
+    static std::mutex _mutex;
-            glm::quat headOrientation = _orientationGetter();
+    using Locker = std::unique_lock<std::mutex>;
            quint8 isStereo = _isStereoInput ? 1 : 0;
    // FIXME recorded audio isn't guaranteed to have the same stereo state
    // as the current system
    const int numNetworkBytes = _isStereoInput
        ? AudioConstants::NETWORK_FRAME_BYTES_STEREO
        : AudioConstants::NETWORK_FRAME_BYTES_PER_CHANNEL;
    const int numNetworkSamples = _isStereoInput
        ? AudioConstants::NETWORK_FRAME_SAMPLES_STEREO
        : AudioConstants::NETWORK_FRAME_SAMPLES_PER_CHANNEL;
    auto nodeList = DependencyManager::get<NodeList>();
    SharedNodePointer audioMixer = nodeList->soloNodeOfType(NodeType::AudioMixer);
    if (audioMixer && audioMixer->getActiveSocket()) {
        Locker lock(_mutex);
        if (!_audioPacket) {
            // we don't have an audioPacket yet - set that up now
            _audioPacket = NLPacket::create(PacketType::MicrophoneAudioWithEcho);
        }
        glm::vec3 headPosition = _positionGetter();
        glm::quat headOrientation = _orientationGetter();
        quint8 isStereo = _isStereoInput ? 1 : 0;
        if (packetType == PacketType::Unknown) {
            if (_lastInputLoudness == 0) {
                _audioPacket->setType(PacketType::SilentAudioFrame);
            } else {
@ -866,70 +880,52 @@ void AudioClient::handleAudioInput() {
                    _audioPacket->setType(PacketType::MicrophoneAudioNoEcho);
                }
            }
-
+        } else {
-            // reset the audio packet so we can start writing
+            _audioPacket->setType(packetType);
            _audioPacket->reset();
            // write sequence number
            _audioPacket->writePrimitive(_outgoingAvatarAudioSequenceNumber);
            if (_audioPacket->getType() == PacketType::SilentAudioFrame) {
                // pack num silent samples
                quint16 numSilentSamples = numNetworkSamples;
                _audioPacket->writePrimitive(numSilentSamples);
            } else {
                // set the mono/stereo byte
                _audioPacket->writePrimitive(isStereo);
            }
            // pack the three float positions
            _audioPacket->writePrimitive(headPosition);
            // pack the orientation
            _audioPacket->writePrimitive(headOrientation);
            if (_audioPacket->getType() != PacketType::SilentAudioFrame) {
                // audio samples have already been packed (written to networkAudioSamples)
                _audioPacket->setPayloadSize(_audioPacket->getPayloadSize() + numNetworkBytes);
            }
            _stats.sentPacket();
            nodeList->flagTimeForConnectionStep(LimitedNodeList::ConnectionStep::SendAudioPacket);
            nodeList->sendUnreliablePacket(*_audioPacket, *audioMixer);
            _outgoingAvatarAudioSequenceNumber++;
        }
        // reset the audio packet so we can start writing
        _audioPacket->reset();
        // write sequence number
        _audioPacket->writePrimitive(_outgoingAvatarAudioSequenceNumber);
        if (_audioPacket->getType() == PacketType::SilentAudioFrame) {
            // pack num silent samples
            quint16 numSilentSamples = numNetworkSamples;
            _audioPacket->writePrimitive(numSilentSamples);
        } else {
            // set the mono/stereo byte
            _audioPacket->writePrimitive(isStereo);
        }
        // pack the three float positions
        _audioPacket->writePrimitive(headPosition);
        // pack the orientation
        _audioPacket->writePrimitive(headOrientation);
        if (_audioPacket->getType() != PacketType::SilentAudioFrame) {
            // audio samples have already been packed (written to networkAudioSamples)
            _audioPacket->setPayloadSize(_audioPacket->getPayloadSize() + numNetworkBytes);
        }
        static const int leadingBytes = sizeof(quint16) + sizeof(glm::vec3) + sizeof(glm::quat) + sizeof(quint8);
        int16_t* const networkAudioSamples = (int16_t*)(_audioPacket->getPayload() + leadingBytes);
        memcpy(networkAudioSamples, audioData, numNetworkBytes);
        _stats.sentPacket();
        nodeList->flagTimeForConnectionStep(LimitedNodeList::ConnectionStep::SendAudioPacket);
        nodeList->sendUnreliablePacket(*_audioPacket, *audioMixer);
        _outgoingAvatarAudioSequenceNumber++;
    }
 }
 void AudioClient::handleRecordedAudioInput(const QByteArray& audio) {
-    if (!_audioPacket) {
+    emitAudioPacket((int16_t*)audio.data(), PacketType::MicrophoneAudioWithEcho);
        // we don't have an audioPacket yet - set that up now
        _audioPacket = NLPacket::create(PacketType::MicrophoneAudioWithEcho);
    }
    // FIXME either discard stereo in the recording or record a stereo flag
    auto nodeList = DependencyManager::get<NodeList>();
    SharedNodePointer audioMixer = nodeList->soloNodeOfType(NodeType::AudioMixer);
    if (audioMixer && audioMixer->getActiveSocket()) {
        glm::vec3 headPosition = _positionGetter();
        glm::quat headOrientation = _orientationGetter();
        quint8 isStereo = _isStereoInput ? 1 : 0;
        _audioPacket->reset();
        _audioPacket->setType(PacketType::MicrophoneAudioWithEcho);
        _audioPacket->writePrimitive(_outgoingAvatarAudioSequenceNumber);
        _audioPacket->writePrimitive(isStereo);
        _audioPacket->writePrimitive(headPosition);
        _audioPacket->writePrimitive(headOrientation);
        _audioPacket->write(audio);
        _stats.sentPacket();
        nodeList->flagTimeForConnectionStep(LimitedNodeList::ConnectionStep::SendAudioPacket);
        nodeList->sendUnreliablePacket(*_audioPacket, *audioMixer);
        _outgoingAvatarAudioSequenceNumber++;
    }
 }
 void AudioClient::processReceivedSamples(const QByteArray& inputBuffer, QByteArray& outputBuffer) {
--- a/libraries/audio-client/src/AudioClient.h
+++ b/libraries/audio-client/src/AudioClient.h
@ -212,6 +212,7 @@ protected:
    }
 private:
    void emitAudioPacket(const int16_t* audioData, PacketType packetType = PacketType::Unknown);
    void outputFormatChanged();
    QByteArray firstInputFrame;
--- a/libraries/avatars/CMakeLists.txt
+++ b/libraries/avatars/CMakeLists.txt
@ -1,3 +1,3 @@
 set(TARGET_NAME avatars)
 setup_hifi_library(Network Script)
-link_hifi_libraries(audio shared networking recording)
+link_hifi_libraries(shared networking)
--- a/libraries/avatars/src/AvatarData.cpp
+++ b/libraries/avatars/src/AvatarData.cpp
@ -33,7 +33,6 @@
 #include <StreamUtils.h>
 #include <UUID.h>
 #include <shared/JSONHelpers.h>
 #include <recording/Frame.h>
 #include "AvatarLogging.h"
@ -1443,14 +1442,10 @@ QByteArray AvatarData::toFrame(const AvatarData& avatar) {
    auto recordingBasis = avatar.getRecordingBasis();
    if (recordingBasis) {
        root[JSON_AVATAR_BASIS] = Transform::toJson(*recordingBasis);
        // Find the relative transform
        auto relativeTransform = recordingBasis->relativeTransform(avatar.getTransform());
-
+        root[JSON_AVATAR_RELATIVE] = Transform::toJson(relativeTransform);
        // if the resulting relative basis is identity, we shouldn't record anything
        if (!relativeTransform.isIdentity()) {
            root[JSON_AVATAR_RELATIVE] = Transform::toJson(relativeTransform);
            root[JSON_AVATAR_BASIS] = Transform::toJson(*recordingBasis);
        }
    } else {
        root[JSON_AVATAR_RELATIVE] = Transform::toJson(avatar.getTransform());
    }
@ -1484,6 +1479,9 @@ QByteArray AvatarData::toFrame(const AvatarData& avatar) {
 void AvatarData::fromFrame(const QByteArray& frameData, AvatarData& result) {
    QJsonDocument doc = QJsonDocument::fromBinaryData(frameData);
 #ifdef WANT_JSON_DEBUG
    qDebug() << doc.toJson(QJsonDocument::JsonFormat::Indented);
 #endif
    QJsonObject root = doc.object();
    if (root.contains(JSON_AVATAR_HEAD_MODEL)) {
--- a/libraries/avatars/src/AvatarData.h
+++ b/libraries/avatars/src/AvatarData.h
@ -50,7 +50,6 @@ typedef unsigned long long quint64;
 #include <Node.h>
 #include <RegisteredMetaTypes.h>
 #include <SimpleMovingAverage.h>
 #include <recording/Forward.h>
 #include "AABox.h"
 #include "HandData.h"
--- a/libraries/recording/src/recording/Deck.cpp
+++ b/libraries/recording/src/recording/Deck.cpp
@ -8,6 +8,8 @@
 #include "Deck.h"
 #include <QtCore/QThread>
 #include <NumericalConstants.h>
 #include <SharedUtil.h>
@ -101,9 +103,13 @@ float Deck::position() const {
 }
 static const Frame::Time MIN_FRAME_WAIT_INTERVAL = Frame::secondsToFrameTime(0.001f);
-static const Frame::Time MAX_FRAME_PROCESSING_TIME = Frame::secondsToFrameTime(0.002f);
+static const Frame::Time MAX_FRAME_PROCESSING_TIME = Frame::secondsToFrameTime(0.004f);
 void Deck::processFrames() {
    if (qApp->thread() != QThread::currentThread()) {
        qWarning() << "Processing frames must only happen on the main thread.";
        return;
    }
    Locker lock(_mutex);
    if (_pause) {
        return;
@ -115,10 +121,17 @@ void Deck::processFrames() {
    // FIXME add code to start dropping frames if we fall behind.
    // Alternatively, add code to cache frames here and then process only the last frame of a given type
    // ... the latter will work for Avatar, but not well for audio I suspect.
    bool overLimit = false;
    for (nextClip = getNextClip(); nextClip; nextClip = getNextClip()) {
        auto currentPosition = Frame::frameTimeFromEpoch(_startEpoch);
        if ((currentPosition - startingPosition) >= MAX_FRAME_PROCESSING_TIME) {
            qCWarning(recordingLog) << "Exceeded maximum frame processing time, breaking early";
 #ifdef WANT_RECORDING_DEBUG            
            qCDebug(recordingLog) << "Starting: " << currentPosition;
            qCDebug(recordingLog) << "Current:  " << startingPosition;
            qCDebug(recordingLog) << "Trigger:  " << triggerPosition;
 #endif
            overLimit = true;
            break;
        }
@ -150,9 +163,19 @@ void Deck::processFrames() {
    // If we have more clip frames available, set the timer for the next one
    _position = Frame::frameTimeFromEpoch(_startEpoch);
-    auto nextFrameTime = nextClip->positionFrameTime();
+    int nextInterval = 1;
-    auto interval = Frame::frameTimeToMilliseconds(nextFrameTime - _position);
+    if (!overLimit) {
-    _timer.singleShot(interval, [this] {
+        auto nextFrameTime = nextClip->positionFrameTime();
        nextInterval = (int)Frame::frameTimeToMilliseconds(nextFrameTime - _position);
 #ifdef WANT_RECORDING_DEBUG
        qCDebug(recordingLog) << "Now " << _position;
        qCDebug(recordingLog) << "Next frame time " << nextInterval;
 #endif
    }
 #ifdef WANT_RECORDING_DEBUG
    qCDebug(recordingLog) << "Setting timer for next processing " << nextInterval;
 #endif
    _timer.singleShot(nextInterval, [this] {
        processFrames();
    });
 }