mirror of
https://github.com/HifiExperiments/overte.git
synced 2025-08-16 13:52:42 +02:00
silent audio packet type generalized
This commit is contained in:
parent
7e59723522
commit
4825457f4d
10 changed files with 87 additions and 81 deletions
|
@ -38,26 +38,9 @@ int AvatarAudioStream::parseStreamProperties(PacketType type, const QByteArray&
|
|||
// read the positional data
|
||||
readBytes += parsePositionalData(packetAfterSeqNum.mid(readBytes));
|
||||
|
||||
if (type == PacketTypeSilentAudioFrame) {
|
||||
int16_t numSilentSamples;
|
||||
memcpy(&numSilentSamples, packetAfterSeqNum.data() + readBytes, sizeof(int16_t));
|
||||
readBytes += sizeof(int16_t);
|
||||
|
||||
numAudioSamples = numSilentSamples;
|
||||
} else {
|
||||
int numAudioBytes = packetAfterSeqNum.size() - readBytes;
|
||||
numAudioSamples = numAudioBytes / sizeof(int16_t);
|
||||
}
|
||||
return readBytes;
|
||||
}
|
||||
|
||||
int AvatarAudioStream::parseAudioData(PacketType type, const QByteArray& packetAfterStreamProperties, int numAudioSamples) {
|
||||
int readBytes = 0;
|
||||
if (type == PacketTypeSilentAudioFrame) {
|
||||
writeDroppableSilentSamples(numAudioSamples);
|
||||
} else {
|
||||
// there is audio data to read
|
||||
readBytes += _ringBuffer.writeData(packetAfterStreamProperties.data(), numAudioSamples * sizeof(int16_t));
|
||||
}
|
||||
// calculate how many samples are in this packet
|
||||
int numAudioBytes = packetAfterSeqNum.size() - readBytes;
|
||||
numAudioSamples = numAudioBytes / sizeof(int16_t);
|
||||
|
||||
return readBytes;
|
||||
}
|
||||
|
|
|
@ -26,7 +26,6 @@ private:
|
|||
AvatarAudioStream& operator= (const AvatarAudioStream&);
|
||||
|
||||
int parseStreamProperties(PacketType type, const QByteArray& packetAfterSeqNum, int& numAudioSamples);
|
||||
int parseAudioData(PacketType type, const QByteArray& packetAfterStreamProperties, int numAudioSamples);
|
||||
};
|
||||
|
||||
#endif // hifi_AvatarAudioStream_h
|
||||
|
|
|
@ -453,9 +453,12 @@ void Audio::handleAudioInput() {
|
|||
static char audioDataPacket[MAX_PACKET_SIZE];
|
||||
|
||||
static int numBytesPacketHeader = numBytesForPacketHeaderGivenPacketType(PacketTypeMicrophoneAudioNoEcho);
|
||||
static int leadingBytes = numBytesPacketHeader + sizeof(quint16) + sizeof(glm::vec3) + sizeof(glm::quat) + sizeof(quint8);
|
||||
|
||||
static int16_t* networkAudioSamples = (int16_t*) (audioDataPacket + leadingBytes);
|
||||
// NOTE: we assume PacketTypeMicrophoneAudioWithEcho has same size headers as
|
||||
// PacketTypeMicrophoneAudioNoEcho. If not, then networkAudioSamples will be pointing to the wrong place for writing
|
||||
// audio samples with echo.
|
||||
static int leadingBytes = numBytesPacketHeader + sizeof(quint16) + sizeof(glm::vec3) + sizeof(glm::quat) + sizeof(quint8);
|
||||
static int16_t* networkAudioSamples = (int16_t*)(audioDataPacket + leadingBytes);
|
||||
|
||||
float inputToNetworkInputRatio = calculateDeviceToNetworkInputRatio(_numInputCallbackBytes);
|
||||
|
||||
|
@ -666,19 +669,13 @@ void Audio::handleAudioInput() {
|
|||
glm::vec3 headPosition = interfaceAvatar->getHead()->getPosition();
|
||||
glm::quat headOrientation = interfaceAvatar->getHead()->getFinalOrientationInWorldFrame();
|
||||
quint8 isStereo = _isStereoInput ? 1 : 0;
|
||||
|
||||
int numAudioBytes = 0;
|
||||
|
||||
|
||||
int numPacketBytes = 0;
|
||||
|
||||
PacketType packetType;
|
||||
if (_lastInputLoudness == 0) {
|
||||
packetType = PacketTypeSilentAudioFrame;
|
||||
|
||||
// we need to indicate how many silent samples this is to the audio mixer
|
||||
networkAudioSamples[0] = numNetworkSamples;
|
||||
numAudioBytes = sizeof(int16_t);
|
||||
} else {
|
||||
numAudioBytes = numNetworkBytes;
|
||||
|
||||
if (Menu::getInstance()->isOptionChecked(MenuOption::EchoServerAudio)) {
|
||||
packetType = PacketTypeMicrophoneAudioWithEcho;
|
||||
} else {
|
||||
|
@ -687,27 +684,38 @@ void Audio::handleAudioInput() {
|
|||
}
|
||||
|
||||
char* currentPacketPtr = audioDataPacket + populatePacketHeader(audioDataPacket, packetType);
|
||||
|
||||
|
||||
// pack sequence number
|
||||
memcpy(currentPacketPtr, &_outgoingAvatarAudioSequenceNumber, sizeof(quint16));
|
||||
currentPacketPtr += sizeof(quint16);
|
||||
|
||||
// set the mono/stereo byte
|
||||
*currentPacketPtr++ = isStereo;
|
||||
if (packetType == PacketTypeSilentAudioFrame) {
|
||||
// pack num silent samples
|
||||
quint16 numSilentSamples = numNetworkSamples;
|
||||
memcpy(currentPacketPtr, &numSilentSamples, sizeof(quint16));
|
||||
currentPacketPtr += sizeof(quint16);
|
||||
} else {
|
||||
// set the mono/stereo byte
|
||||
*currentPacketPtr++ = isStereo;
|
||||
|
||||
// memcpy the three float positions
|
||||
memcpy(currentPacketPtr, &headPosition, sizeof(headPosition));
|
||||
currentPacketPtr += (sizeof(headPosition));
|
||||
// memcpy the three float positions
|
||||
memcpy(currentPacketPtr, &headPosition, sizeof(headPosition));
|
||||
currentPacketPtr += (sizeof(headPosition));
|
||||
|
||||
// memcpy our orientation
|
||||
memcpy(currentPacketPtr, &headOrientation, sizeof(headOrientation));
|
||||
currentPacketPtr += sizeof(headOrientation);
|
||||
|
||||
nodeList->writeDatagram(audioDataPacket, numAudioBytes + leadingBytes, audioMixer);
|
||||
// memcpy our orientation
|
||||
memcpy(currentPacketPtr, &headOrientation, sizeof(headOrientation));
|
||||
currentPacketPtr += sizeof(headOrientation);
|
||||
|
||||
// audio samples have already been packed (written to networkAudioSamples)
|
||||
currentPacketPtr += numNetworkBytes;
|
||||
}
|
||||
|
||||
int packetBytes = currentPacketPtr - audioDataPacket;
|
||||
nodeList->writeDatagram(audioDataPacket, packetBytes, audioMixer);
|
||||
_outgoingAvatarAudioSequenceNumber++;
|
||||
|
||||
Application::getInstance()->getBandwidthMeter()->outputStream(BandwidthMeter::AUDIO)
|
||||
.updateValue(numAudioBytes + leadingBytes);
|
||||
.updateValue(packetBytes);
|
||||
}
|
||||
delete[] inputAudioSamples;
|
||||
}
|
||||
|
|
|
@ -67,34 +67,52 @@ int InboundAudioStream::parseData(const QByteArray& packet) {
|
|||
|
||||
// parse header
|
||||
int numBytesHeader = numBytesForPacketHeader(packet);
|
||||
const char* sequenceAt = packet.constData() + numBytesHeader;
|
||||
const char* dataAt = packet.constData() + numBytesHeader;
|
||||
int readBytes = numBytesHeader;
|
||||
|
||||
// parse sequence number and track it
|
||||
quint16 sequence = *(reinterpret_cast<const quint16*>(sequenceAt));
|
||||
quint16 sequence = *(reinterpret_cast<const quint16*>(dataAt));
|
||||
dataAt += sizeof(quint16);
|
||||
readBytes += sizeof(quint16);
|
||||
SequenceNumberStats::ArrivalInfo arrivalInfo = frameReceivedUpdateNetworkStats(sequence, senderUUID);
|
||||
|
||||
// TODO: handle generalized silent packet here?????
|
||||
|
||||
// parse the info after the seq number and before the audio data.(the stream properties)
|
||||
int numAudioSamples;
|
||||
readBytes += parseStreamProperties(packetType, packet.mid(readBytes), numAudioSamples);
|
||||
|
||||
if (packetType == PacketTypeSilentAudioFrame) {
|
||||
// this is a general silent packet; parse the number of silent samples
|
||||
quint16 numSilentSamples = *(reinterpret_cast<const quint16*>(dataAt));
|
||||
dataAt += sizeof(quint16);
|
||||
readBytes += sizeof(quint16);
|
||||
|
||||
numAudioSamples = numSilentSamples;
|
||||
} else {
|
||||
// parse the info after the seq number and before the audio data (the stream properties)
|
||||
readBytes += parseStreamProperties(packetType, packet.mid(readBytes), numAudioSamples);
|
||||
}
|
||||
|
||||
// handle this packet based on its arrival status.
|
||||
// For now, late packets are ignored. It may be good in the future to insert the late audio frame
|
||||
// into the ring buffer to fill in the missing frame if it hasn't been mixed yet.
|
||||
switch (arrivalInfo._status) {
|
||||
case SequenceNumberStats::Early: {
|
||||
// Packet is early; write droppable silent samples for each of the skipped packets.
|
||||
// NOTE: we assume that each dropped packet contains the same number of samples
|
||||
// as the packet we just received.
|
||||
int packetsDropped = arrivalInfo._seqDiffFromExpected;
|
||||
writeSamplesForDroppedPackets(packetsDropped * numAudioSamples);
|
||||
|
||||
// fall through to OnTime case
|
||||
}
|
||||
case SequenceNumberStats::OnTime: {
|
||||
readBytes += parseAudioData(packetType, packet.mid(readBytes), numAudioSamples);
|
||||
// Packet is on time; parse its data to the ringbuffer
|
||||
if (packetType == PacketTypeSilentAudioFrame) {
|
||||
writeDroppableSilentSamples(numAudioSamples);
|
||||
} else {
|
||||
readBytes += parseAudioData(packetType, packet.mid(readBytes), numAudioSamples);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
// For now, late packets are ignored. It may be good in the future to insert the late audio packet data
|
||||
// into the ring buffer to fill in the missing frame if it hasn't been mixed yet.
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -108,6 +126,10 @@ int InboundAudioStream::parseData(const QByteArray& packet) {
|
|||
return readBytes;
|
||||
}
|
||||
|
||||
int InboundAudioStream::parseAudioData(PacketType type, const QByteArray& packetAfterStreamProperties, int numAudioSamples) {
|
||||
return _ringBuffer.writeData(packetAfterStreamProperties.data(), numAudioSamples * sizeof(int16_t));
|
||||
}
|
||||
|
||||
bool InboundAudioStream::popFrames(int numFrames, bool starveOnFail) {
|
||||
int numSamplesRequested = numFrames * _ringBuffer.getNumFrameSamples();
|
||||
if (_isStarved) {
|
||||
|
@ -119,6 +141,8 @@ bool InboundAudioStream::popFrames(int numFrames, bool starveOnFail) {
|
|||
// we have enough samples to pop, so we're good to mix
|
||||
_lastPopOutput = _ringBuffer.nextOutput();
|
||||
_ringBuffer.shiftReadPosition(numSamplesRequested);
|
||||
|
||||
_framesAvailableStats.update(_ringBuffer.framesAvailable());
|
||||
|
||||
_hasStarted = true;
|
||||
_lastPopSucceeded = true;
|
||||
|
@ -132,6 +156,7 @@ bool InboundAudioStream::popFrames(int numFrames, bool starveOnFail) {
|
|||
_lastPopSucceeded = false;
|
||||
}
|
||||
}
|
||||
|
||||
return _lastPopSucceeded;
|
||||
}
|
||||
|
||||
|
@ -145,6 +170,8 @@ void InboundAudioStream::starved() {
|
|||
_isStarved = true;
|
||||
_consecutiveNotMixedCount = 0;
|
||||
_starveCount++;
|
||||
|
||||
_framesAvailableStats.reset();
|
||||
}
|
||||
|
||||
void InboundAudioStream::overrideDesiredJitterBufferFramesTo(int desired) {
|
||||
|
|
|
@ -113,7 +113,7 @@ protected:
|
|||
virtual int parseStreamProperties(PacketType type, const QByteArray& packetAfterSeqNum, int& numAudioSamples) = 0;
|
||||
|
||||
/// parses the audio data in the network packet
|
||||
virtual int parseAudioData(PacketType type, const QByteArray& packetAfterStreamProperties, int numAudioSamples) = 0;
|
||||
virtual int parseAudioData(PacketType type, const QByteArray& packetAfterStreamProperties, int numAudioSamples);
|
||||
|
||||
int writeDroppableSilentSamples(int numSilentSamples);
|
||||
|
||||
|
|
|
@ -58,10 +58,6 @@ int InjectedAudioStream::parseStreamProperties(PacketType type, const QByteArray
|
|||
return packetStream.device()->pos();
|
||||
}
|
||||
|
||||
int InjectedAudioStream::parseAudioData(PacketType type, const QByteArray& packetAfterStreamProperties, int numAudioSamples) {
|
||||
return _ringBuffer.writeData(packetAfterStreamProperties.data(), numAudioSamples * sizeof(int16_t));
|
||||
}
|
||||
|
||||
AudioStreamStats InjectedAudioStream::getAudioStreamStats() const {
|
||||
AudioStreamStats streamStats = PositionalAudioStream::getAudioStreamStats();
|
||||
streamStats._streamIdentifier = _streamIdentifier;
|
||||
|
|
|
@ -32,7 +32,6 @@ private:
|
|||
|
||||
AudioStreamStats getAudioStreamStats() const;
|
||||
int parseStreamProperties(PacketType type, const QByteArray& packetAfterSeqNum, int& numAudioSamples);
|
||||
int parseAudioData(PacketType type, const QByteArray& packetAfterStreamProperties, int numAudioSamples);
|
||||
|
||||
const QUuid _streamIdentifier;
|
||||
float _radius;
|
||||
|
|
|
@ -50,13 +50,6 @@ protected:
|
|||
PositionalAudioStream(const PositionalAudioStream&);
|
||||
PositionalAudioStream& operator= (const PositionalAudioStream&);
|
||||
|
||||
/// parses the info between the seq num and the audio data in the network packet and calculates
|
||||
/// how many audio samples this packet contains
|
||||
virtual int parseStreamProperties(PacketType type, const QByteArray& packetAfterSeqNum, int& numAudioSamples) = 0;
|
||||
|
||||
/// parses the audio data in the network packet
|
||||
virtual int parseAudioData(PacketType type, const QByteArray& packetAfterStreamProperties, int numAudioSamples) = 0;
|
||||
|
||||
int parsePositionalData(const QByteArray& positionalByteArray);
|
||||
|
||||
protected:
|
||||
|
|
|
@ -49,8 +49,9 @@ PacketVersion versionForPacketType(PacketType type) {
|
|||
switch (type) {
|
||||
case PacketTypeMicrophoneAudioNoEcho:
|
||||
case PacketTypeMicrophoneAudioWithEcho:
|
||||
case PacketTypeSilentAudioFrame:
|
||||
return 2;
|
||||
case PacketTypeSilentAudioFrame:
|
||||
return 3;
|
||||
case PacketTypeMixedAudio:
|
||||
return 1;
|
||||
case PacketTypeAvatarData:
|
||||
|
|
|
@ -490,14 +490,6 @@ void ScriptEngine::run() {
|
|||
// pack a placeholder value for sequence number for now, will be packed when destination node is known
|
||||
int numPreSequenceNumberBytes = audioPacket.size();
|
||||
packetStream << (quint16) 0;
|
||||
|
||||
// assume scripted avatar audio is mono and set channel flag to zero
|
||||
packetStream << (quint8) 0;
|
||||
|
||||
// use the orientation and position of this avatar for the source of this audio
|
||||
packetStream.writeRawData(reinterpret_cast<const char*>(&_avatarData->getPosition()), sizeof(glm::vec3));
|
||||
glm::quat headOrientation = _avatarData->getHeadOrientation();
|
||||
packetStream.writeRawData(reinterpret_cast<const char*>(&headOrientation), sizeof(glm::quat));
|
||||
|
||||
if (silentFrame) {
|
||||
if (!_isListeningToAudioStream) {
|
||||
|
@ -507,12 +499,20 @@ void ScriptEngine::run() {
|
|||
|
||||
// write the number of silent samples so the audio-mixer can uphold timing
|
||||
packetStream.writeRawData(reinterpret_cast<const char*>(&SCRIPT_AUDIO_BUFFER_SAMPLES), sizeof(int16_t));
|
||||
} else if (nextSoundOutput) {
|
||||
// write the raw audio data
|
||||
packetStream.writeRawData(reinterpret_cast<const char*>(nextSoundOutput),
|
||||
numAvailableSamples * sizeof(int16_t));
|
||||
}
|
||||
|
||||
} else if (nextSoundOutput) {
|
||||
// assume scripted avatar audio is mono and set channel flag to zero
|
||||
packetStream << (quint8)0;
|
||||
|
||||
// use the orientation and position of this avatar for the source of this audio
|
||||
packetStream.writeRawData(reinterpret_cast<const char*>(&_avatarData->getPosition()), sizeof(glm::vec3));
|
||||
glm::quat headOrientation = _avatarData->getHeadOrientation();
|
||||
packetStream.writeRawData(reinterpret_cast<const char*>(&headOrientation), sizeof(glm::quat));
|
||||
|
||||
// write the raw audio data
|
||||
packetStream.writeRawData(reinterpret_cast<const char*>(nextSoundOutput), numAvailableSamples * sizeof(int16_t));
|
||||
}
|
||||
|
||||
// write audio packet to AudioMixer nodes
|
||||
NodeList* nodeList = NodeList::getInstance();
|
||||
foreach(const SharedNodePointer& node, nodeList->getNodeHash()) {
|
||||
|
|
Loading…
Reference in a new issue