mirror of
https://github.com/overte-org/overte.git
synced 2025-07-13 04:58:43 +02:00
449 lines
16 KiB
C++
449 lines
16 KiB
C++
//
|
|
// Sound.cpp
|
|
// libraries/audio/src
|
|
//
|
|
// Created by Stephen Birarda on 1/2/2014.
|
|
// Copyright 2014 High Fidelity, Inc.
|
|
// Copyright 2023 Overte e.V.
|
|
//
|
|
// Distributed under the Apache License, Version 2.0.
|
|
// See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html
|
|
// SPDX-License-Identifier: Apache-2.0
|
|
//
|
|
|
|
#include "Sound.h"
|
|
|
|
#include <stdint.h>
|
|
|
|
#include <glm/glm.hpp>
|
|
|
|
#include <QRunnable>
|
|
#include <QThreadPool>
|
|
#include <QDataStream>
|
|
#include <QtCore/QDebug>
|
|
#include <QtNetwork/QNetworkRequest>
|
|
#include <QtNetwork/QNetworkReply>
|
|
#include <qendian.h>
|
|
|
|
#include <LimitedNodeList.h>
|
|
#include <NetworkAccessManager.h>
|
|
#include <SharedUtil.h>
|
|
#include <ScriptEngine.h>
|
|
#include <ScriptValue.h>
|
|
|
|
#include "AudioRingBuffer.h"
|
|
#include "AudioLogging.h"
|
|
#include "AudioSRC.h"
|
|
|
|
#include "flump3dec.h"
|
|
|
|
int audioDataPointerMetaTypeID = qRegisterMetaType<AudioDataPointer>("AudioDataPointer");
|
|
|
|
using AudioConstants::AudioSample;
|
|
|
|
AudioDataPointer AudioData::make(uint32_t numSamples, uint32_t numChannels,
|
|
const AudioSample* samples) {
|
|
// Compute the amount of memory required for the audio data object
|
|
const size_t bufferSize = numSamples * sizeof(AudioSample);
|
|
const size_t memorySize = sizeof(AudioData) + bufferSize;
|
|
|
|
// Allocate the memory for the audio data object and the buffer
|
|
void* memory = ::calloc(1, memorySize);
|
|
auto audioData = reinterpret_cast<AudioData*>(memory);
|
|
auto buffer = reinterpret_cast<AudioSample*>(audioData + 1);
|
|
assert(((char*)buffer - (char*)audioData) == sizeof(AudioData));
|
|
|
|
// Use placement new to construct the audio data object at the memory allocated
|
|
::new(audioData) AudioData(numSamples, numChannels, buffer);
|
|
|
|
// Copy the samples to the buffer
|
|
memcpy(buffer, samples, bufferSize);
|
|
|
|
// Return shared_ptr that properly destruct the object and release the memory
|
|
return AudioDataPointer(audioData, [](AudioData* ptr) {
|
|
ptr->~AudioData();
|
|
::free(ptr);
|
|
});
|
|
}
|
|
|
|
|
|
AudioData::AudioData(uint32_t numSamples, uint32_t numChannels, const AudioSample* samples)
|
|
: _numSamples(numSamples),
|
|
_numChannels(numChannels),
|
|
_data(samples)
|
|
{}
|
|
|
|
void Sound::downloadFinished(const QByteArray& data) {
|
|
if (!_self) {
|
|
soundProcessError(301, "Sound object has gone out of scope");
|
|
return;
|
|
}
|
|
|
|
// this is a QRunnable, will delete itself after it has finished running
|
|
auto soundProcessor = new SoundProcessor(_self, data);
|
|
connect(soundProcessor, &SoundProcessor::onSuccess, this, &Sound::soundProcessSuccess);
|
|
connect(soundProcessor, &SoundProcessor::onError, this, &Sound::soundProcessError);
|
|
QThreadPool::globalInstance()->start(soundProcessor);
|
|
}
|
|
|
|
void Sound::soundProcessSuccess(AudioDataPointer audioData) {
|
|
qCDebug(audio) << "Setting ready state for sound file" << _url.fileName();
|
|
|
|
_audioData = std::move(audioData);
|
|
finishedLoading(true);
|
|
|
|
emit ready();
|
|
}
|
|
|
|
void Sound::soundProcessError(int error, QString str) {
|
|
qCCritical(audio) << "Failed to process sound file: code =" << error << str;
|
|
emit failed(QNetworkReply::UnknownContentError);
|
|
finishedLoading(false);
|
|
}
|
|
|
|
|
|
SoundProcessor::SoundProcessor(QWeakPointer<Resource> sound, QByteArray data) :
|
|
_sound(sound),
|
|
_data(data)
|
|
{
|
|
}
|
|
|
|
void SoundProcessor::run() {
|
|
auto sound = qSharedPointerCast<Sound>(_sound.lock());
|
|
if (!sound) {
|
|
emit onError(301, "Sound object has gone out of scope");
|
|
return;
|
|
}
|
|
|
|
auto url = sound->getURL();
|
|
QString fileName = url.fileName().toLower();
|
|
qCDebug(audio) << "Processing sound file" << fileName;
|
|
|
|
static const QString WAV_EXTENSION = ".wav";
|
|
static const QString MP3_EXTENSION = ".mp3";
|
|
static const QString RAW_EXTENSION = ".raw";
|
|
static const QString STEREO_RAW_EXTENSION = ".stereo.raw";
|
|
QString fileType;
|
|
|
|
QByteArray outputAudioByteArray;
|
|
AudioProperties properties;
|
|
|
|
if (fileName.endsWith(WAV_EXTENSION)) {
|
|
fileType = "WAV";
|
|
properties = interpretAsWav(_data, outputAudioByteArray);
|
|
} else if (fileName.endsWith(MP3_EXTENSION)) {
|
|
fileType = "MP3";
|
|
properties = interpretAsMP3(_data, outputAudioByteArray);
|
|
} else if (fileName.endsWith(STEREO_RAW_EXTENSION)) {
|
|
// check if this was a stereo raw file
|
|
// since it's raw the only way for us to know that is if the file was called .stereo.raw
|
|
qCDebug(audio) << "Processing sound of" << _data.size() << "bytes from" << fileName << "as stereo audio file.";
|
|
// Process as 48khz RAW file
|
|
properties.numChannels = 2;
|
|
properties.sampleRate = 48000;
|
|
outputAudioByteArray = _data;
|
|
} else if (fileName.endsWith(RAW_EXTENSION)) {
|
|
// Process as 48khz RAW file
|
|
properties.numChannels = 1;
|
|
properties.sampleRate = 48000;
|
|
outputAudioByteArray = _data;
|
|
} else {
|
|
qCWarning(audio) << "Unknown sound file type";
|
|
emit onError(300, "Failed to load sound file, reason: unknown sound file type");
|
|
return;
|
|
}
|
|
|
|
if (properties.sampleRate == 0) {
|
|
qCWarning(audio) << "Unsupported" << fileType << "file type";
|
|
emit onError(300, "Failed to load sound file, reason: unsupported " + fileType + " file type");
|
|
return;
|
|
}
|
|
|
|
auto data = downSample(outputAudioByteArray, properties);
|
|
|
|
int numSamples = data.size() / AudioConstants::SAMPLE_SIZE;
|
|
auto audioData = AudioData::make(numSamples, properties.numChannels,
|
|
(const AudioSample*)data.constData());
|
|
emit onSuccess(audioData);
|
|
}
|
|
|
|
QByteArray SoundProcessor::downSample(const QByteArray& rawAudioByteArray,
|
|
AudioProperties properties) {
|
|
|
|
// we want to convert it to the format that the audio-mixer wants
|
|
// which is signed, 16-bit, 24Khz
|
|
|
|
if (properties.sampleRate == AudioConstants::SAMPLE_RATE) {
|
|
// no resampling needed
|
|
return rawAudioByteArray;
|
|
}
|
|
|
|
AudioSRC resampler(properties.sampleRate, AudioConstants::SAMPLE_RATE,
|
|
properties.numChannels);
|
|
|
|
// resize to max possible output
|
|
int numSourceFrames = rawAudioByteArray.size() / (properties.numChannels * AudioConstants::SAMPLE_SIZE);
|
|
int maxDestinationFrames = resampler.getMaxOutput(numSourceFrames);
|
|
int maxDestinationBytes = maxDestinationFrames * properties.numChannels * AudioConstants::SAMPLE_SIZE;
|
|
QByteArray data(maxDestinationBytes, Qt::Uninitialized);
|
|
|
|
int numDestinationFrames = resampler.render((int16_t*)rawAudioByteArray.data(),
|
|
(int16_t*)data.data(),
|
|
numSourceFrames);
|
|
|
|
// truncate to actual output
|
|
int numDestinationBytes = numDestinationFrames * properties.numChannels * sizeof(AudioSample);
|
|
data.resize(numDestinationBytes);
|
|
|
|
return data;
|
|
}
|
|
|
|
//
|
|
// Format description from https://ccrma.stanford.edu/courses/422/projects/WaveFormat/
|
|
//
|
|
// The header for a WAV file looks like this:
|
|
// Positions Sample Value Description
|
|
// 00-03 "RIFF" Marks the file as a riff file. Characters are each 1 byte long.
|
|
// 04-07 File size (int) Size of the overall file - 8 bytes, in bytes (32-bit integer).
|
|
// 08-11 "WAVE" File Type Header. For our purposes, it always equals "WAVE".
|
|
// 12-15 "fmt " Format chunk marker.
|
|
// 16-19 16 Length of format data as listed above
|
|
// 20-21 1 Type of format: (1=PCM, 257=Mu-Law, 258=A-Law, 259=ADPCM) - 2 byte integer
|
|
// 22-23 2 Number of Channels - 2 byte integer
|
|
// 24-27 44100 Sample Rate - 32 byte integer. Sample Rate = Number of Samples per second, or Hertz.
|
|
// 28-31 176400 (Sample Rate * BitsPerSample * Channels) / 8.
|
|
// 32-33 4 (BitsPerSample * Channels) / 8 - 8 bit mono2 - 8 bit stereo/16 bit mono4 - 16 bit stereo
|
|
// 34-35 16 Bits per sample
|
|
// 36-39 "data" Chunk header. Marks the beginning of the data section.
|
|
// 40-43 File size (int) Size of the data section.
|
|
// 44-?? Actual sound data
|
|
// Sample values are given above for a 16-bit stereo source.
|
|
//
|
|
|
|
struct chunk {
|
|
char id[4];
|
|
quint32 size;
|
|
};
|
|
|
|
struct RIFFHeader {
|
|
chunk descriptor; // "RIFF"
|
|
char type[4]; // "WAVE"
|
|
};
|
|
|
|
static const int WAVEFORMAT_PCM = 1;
|
|
static const int WAVEFORMAT_EXTENSIBLE = 0xfffe;
|
|
|
|
struct WAVEFormat {
|
|
quint16 audioFormat; // Format type: 1=PCM, 257=Mu-Law, 258=A-Law, 259=ADPCM
|
|
quint16 numChannels; // Number of channels: 1=mono, 2=stereo
|
|
quint32 sampleRate;
|
|
quint32 byteRate; // Sample rate * Number of Channels * Bits per sample / 8
|
|
quint16 blockAlign; // (Number of Channels * Bits per sample) / 8.1
|
|
quint16 bitsPerSample;
|
|
};
|
|
|
|
// returns wavfile sample rate, used for resampling
|
|
SoundProcessor::AudioProperties SoundProcessor::interpretAsWav(const QByteArray& inputAudioByteArray,
|
|
QByteArray& outputAudioByteArray) {
|
|
AudioProperties properties;
|
|
|
|
// Create a data stream to analyze the data
|
|
QDataStream waveStream(const_cast<QByteArray *>(&inputAudioByteArray), QIODevice::ReadOnly);
|
|
|
|
// Read the "RIFF" chunk
|
|
RIFFHeader riff;
|
|
if (waveStream.readRawData((char*)&riff, sizeof(RIFFHeader)) != sizeof(RIFFHeader)) {
|
|
qCWarning(audio) << "Not a valid WAVE file.";
|
|
return AudioProperties();
|
|
}
|
|
|
|
// Parse the "RIFF" chunk
|
|
if (strncmp(riff.descriptor.id, "RIFF", 4) == 0) {
|
|
waveStream.setByteOrder(QDataStream::LittleEndian);
|
|
} else {
|
|
qCWarning(audio) << "Currently not supporting big-endian audio files.";
|
|
return AudioProperties();
|
|
}
|
|
if (strncmp(riff.type, "WAVE", 4) != 0) {
|
|
qCWarning(audio) << "Not a valid WAVE file.";
|
|
return AudioProperties();
|
|
}
|
|
|
|
// Read chunks until the "fmt " chunk is found
|
|
chunk fmt;
|
|
while (true) {
|
|
if (waveStream.readRawData((char*)&fmt, sizeof(chunk)) != sizeof(chunk)) {
|
|
qCWarning(audio) << "Not a valid WAVE file.";
|
|
return AudioProperties();
|
|
}
|
|
if (strncmp(fmt.id, "fmt ", 4) == 0) {
|
|
break;
|
|
}
|
|
waveStream.skipRawData(qFromLittleEndian<quint32>(fmt.size)); // next chunk
|
|
}
|
|
|
|
// Read the "fmt " chunk
|
|
WAVEFormat wave;
|
|
if (waveStream.readRawData((char*)&wave, sizeof(WAVEFormat)) != sizeof(WAVEFormat)) {
|
|
qCWarning(audio) << "Not a valid WAVE file.";
|
|
return AudioProperties();
|
|
}
|
|
|
|
// Parse the "fmt " chunk
|
|
if (qFromLittleEndian<quint16>(wave.audioFormat) != WAVEFORMAT_PCM &&
|
|
qFromLittleEndian<quint16>(wave.audioFormat) != WAVEFORMAT_EXTENSIBLE) {
|
|
qCWarning(audio) << "Currently not supporting non PCM audio files.";
|
|
return AudioProperties();
|
|
}
|
|
|
|
properties.numChannels = qFromLittleEndian<quint16>(wave.numChannels);
|
|
if (properties.numChannels != 1 &&
|
|
properties.numChannels != 2 &&
|
|
properties.numChannels != 4) {
|
|
qCWarning(audio) << "Currently not supporting audio files with other than 1/2/4 channels.";
|
|
return AudioProperties();
|
|
}
|
|
if (qFromLittleEndian<quint16>(wave.bitsPerSample) != 16) {
|
|
qCWarning(audio) << "Currently not supporting non 16bit audio files.";
|
|
return AudioProperties();
|
|
}
|
|
|
|
// Skip any extra data in the "fmt " chunk
|
|
waveStream.skipRawData(qFromLittleEndian<quint32>(fmt.size) - sizeof(WAVEFormat));
|
|
|
|
// Read chunks until the "data" chunk is found
|
|
chunk data;
|
|
while (true) {
|
|
if (waveStream.readRawData((char*)&data, sizeof(chunk)) != sizeof(chunk)) {
|
|
qCWarning(audio) << "Not a valid WAVE file.";
|
|
return AudioProperties();
|
|
}
|
|
if (strncmp(data.id, "data", 4) == 0) {
|
|
break;
|
|
}
|
|
waveStream.skipRawData(qFromLittleEndian<quint32>(data.size)); // next chunk
|
|
}
|
|
|
|
// Read the "data" chunk
|
|
quint32 outputAudioByteArraySize = qFromLittleEndian<quint32>(data.size);
|
|
outputAudioByteArray.resize(outputAudioByteArraySize);
|
|
auto bytesRead = waveStream.readRawData(outputAudioByteArray.data(), outputAudioByteArraySize);
|
|
if (bytesRead != (int)outputAudioByteArraySize) {
|
|
qCWarning(audio) << "Error reading WAV file";
|
|
return AudioProperties();
|
|
}
|
|
|
|
properties.sampleRate = wave.sampleRate;
|
|
return properties;
|
|
}
|
|
|
|
// returns MP3 sample rate, used for resampling
|
|
SoundProcessor::AudioProperties SoundProcessor::interpretAsMP3(const QByteArray& inputAudioByteArray,
|
|
QByteArray& outputAudioByteArray) {
|
|
AudioProperties properties;
|
|
|
|
using namespace flump3dec;
|
|
|
|
static const int MP3_SAMPLES_MAX = 1152;
|
|
static const int MP3_CHANNELS_MAX = 2;
|
|
static const int MP3_BUFFER_SIZE = MP3_SAMPLES_MAX * MP3_CHANNELS_MAX * sizeof(int16_t);
|
|
uint8_t mp3Buffer[MP3_BUFFER_SIZE];
|
|
|
|
// create bitstream
|
|
Bit_stream_struc *bitstream = bs_new();
|
|
if (bitstream == nullptr) {
|
|
return AudioProperties();
|
|
}
|
|
|
|
// create decoder
|
|
mp3tl *decoder = mp3tl_new(bitstream, MP3TL_MODE_16BIT);
|
|
if (decoder == nullptr) {
|
|
bs_free(bitstream);
|
|
return AudioProperties();
|
|
}
|
|
|
|
// initialize
|
|
bs_set_data(bitstream, (uint8_t*)inputAudioByteArray.data(), inputAudioByteArray.size());
|
|
int frameCount = 0;
|
|
|
|
// skip ID3 tag, if present
|
|
Mp3TlRetcode result = mp3tl_skip_id3(decoder);
|
|
|
|
while (!(result == MP3TL_ERR_NO_SYNC || result == MP3TL_ERR_NEED_DATA)) {
|
|
|
|
mp3tl_sync(decoder);
|
|
|
|
// find MP3 header
|
|
const fr_header *header = nullptr;
|
|
result = mp3tl_decode_header(decoder, &header);
|
|
|
|
if (result == MP3TL_ERR_OK) {
|
|
|
|
if (frameCount++ == 0) {
|
|
|
|
qCDebug(audio) << "Decoding MP3 with bitrate =" << header->bitrate
|
|
<< "sample rate =" << header->sample_rate
|
|
<< "channels =" << header->channels;
|
|
|
|
// save header info
|
|
properties.sampleRate = header->sample_rate;
|
|
properties.numChannels = header->channels;
|
|
|
|
// skip Xing header, if present
|
|
result = mp3tl_skip_xing(decoder, header);
|
|
}
|
|
|
|
// decode MP3 frame
|
|
if (result == MP3TL_ERR_OK) {
|
|
|
|
result = mp3tl_decode_frame(decoder, mp3Buffer, MP3_BUFFER_SIZE);
|
|
|
|
// fill bad frames with silence
|
|
int len = header->frame_samples * header->channels * sizeof(int16_t);
|
|
if (result == MP3TL_ERR_BAD_FRAME) {
|
|
memset(mp3Buffer, 0, len);
|
|
}
|
|
|
|
if (result == MP3TL_ERR_OK || result == MP3TL_ERR_BAD_FRAME) {
|
|
outputAudioByteArray.append((char*)mp3Buffer, len);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// free decoder
|
|
mp3tl_free(decoder);
|
|
|
|
// free bitstream
|
|
bs_free(bitstream);
|
|
|
|
if (outputAudioByteArray.isEmpty()) {
|
|
qCWarning(audio) << "Error decoding MP3 file";
|
|
return AudioProperties();
|
|
}
|
|
|
|
return properties;
|
|
}
|
|
|
|
|
|
ScriptValue soundSharedPointerToScriptValue(ScriptEngine* engine, const SharedSoundPointer& in) {
|
|
return engine->newQObject(new SoundScriptingInterface(in), ScriptEngine::ScriptOwnership);
|
|
}
|
|
|
|
bool soundSharedPointerFromScriptValue(const ScriptValue& object, SharedSoundPointer& out) {
|
|
if (auto soundInterface = qobject_cast<SoundScriptingInterface*>(object.toQObject())) {
|
|
out = soundInterface->getSound();
|
|
}
|
|
return true;
|
|
}
|
|
|
|
SoundScriptingInterface::SoundScriptingInterface(const SharedSoundPointer& sound) : _sound(sound) {
|
|
// During shutdown we can sometimes get an empty sound pointer back
|
|
if (_sound) {
|
|
QObject::connect(_sound.data(), &Sound::ready, this, &SoundScriptingInterface::ready);
|
|
}
|
|
}
|
|
|
|
Sound::Sound(const QUrl& url, bool isStereo, bool isAmbisonic) : Resource(url) {
|
|
_numChannels = isAmbisonic ? 4 : (isStereo ? 2 : 1);
|
|
}
|