silent audio packet type generalized

This commit is contained in:
wangyix 2014-07-28 16:49:53 -07:00
parent 7e59723522
commit 4825457f4d
10 changed files with 87 additions and 81 deletions

View file

@ -38,26 +38,9 @@ int AvatarAudioStream::parseStreamProperties(PacketType type, const QByteArray&
// read the positional data
readBytes += parsePositionalData(packetAfterSeqNum.mid(readBytes));
if (type == PacketTypeSilentAudioFrame) {
int16_t numSilentSamples;
memcpy(&numSilentSamples, packetAfterSeqNum.data() + readBytes, sizeof(int16_t));
readBytes += sizeof(int16_t);
numAudioSamples = numSilentSamples;
} else {
int numAudioBytes = packetAfterSeqNum.size() - readBytes;
numAudioSamples = numAudioBytes / sizeof(int16_t);
}
return readBytes;
}
int AvatarAudioStream::parseAudioData(PacketType type, const QByteArray& packetAfterStreamProperties, int numAudioSamples) {
int readBytes = 0;
if (type == PacketTypeSilentAudioFrame) {
writeDroppableSilentSamples(numAudioSamples);
} else {
// there is audio data to read
readBytes += _ringBuffer.writeData(packetAfterStreamProperties.data(), numAudioSamples * sizeof(int16_t));
}
// calculate how many samples are in this packet
int numAudioBytes = packetAfterSeqNum.size() - readBytes;
numAudioSamples = numAudioBytes / sizeof(int16_t);
return readBytes;
}

View file

@ -26,7 +26,6 @@ private:
AvatarAudioStream& operator= (const AvatarAudioStream&);
int parseStreamProperties(PacketType type, const QByteArray& packetAfterSeqNum, int& numAudioSamples);
int parseAudioData(PacketType type, const QByteArray& packetAfterStreamProperties, int numAudioSamples);
};
#endif // hifi_AvatarAudioStream_h

View file

@ -453,9 +453,12 @@ void Audio::handleAudioInput() {
static char audioDataPacket[MAX_PACKET_SIZE];
static int numBytesPacketHeader = numBytesForPacketHeaderGivenPacketType(PacketTypeMicrophoneAudioNoEcho);
static int leadingBytes = numBytesPacketHeader + sizeof(quint16) + sizeof(glm::vec3) + sizeof(glm::quat) + sizeof(quint8);
static int16_t* networkAudioSamples = (int16_t*) (audioDataPacket + leadingBytes);
// NOTE: we assume PacketTypeMicrophoneAudioWithEcho has same size headers as
// PacketTypeMicrophoneAudioNoEcho. If not, then networkAudioSamples will be pointing to the wrong place for writing
// audio samples with echo.
static int leadingBytes = numBytesPacketHeader + sizeof(quint16) + sizeof(glm::vec3) + sizeof(glm::quat) + sizeof(quint8);
static int16_t* networkAudioSamples = (int16_t*)(audioDataPacket + leadingBytes);
float inputToNetworkInputRatio = calculateDeviceToNetworkInputRatio(_numInputCallbackBytes);
@ -666,19 +669,13 @@ void Audio::handleAudioInput() {
glm::vec3 headPosition = interfaceAvatar->getHead()->getPosition();
glm::quat headOrientation = interfaceAvatar->getHead()->getFinalOrientationInWorldFrame();
quint8 isStereo = _isStereoInput ? 1 : 0;
int numAudioBytes = 0;
int numPacketBytes = 0;
PacketType packetType;
if (_lastInputLoudness == 0) {
packetType = PacketTypeSilentAudioFrame;
// we need to indicate how many silent samples this is to the audio mixer
networkAudioSamples[0] = numNetworkSamples;
numAudioBytes = sizeof(int16_t);
} else {
numAudioBytes = numNetworkBytes;
if (Menu::getInstance()->isOptionChecked(MenuOption::EchoServerAudio)) {
packetType = PacketTypeMicrophoneAudioWithEcho;
} else {
@ -687,27 +684,38 @@ void Audio::handleAudioInput() {
}
char* currentPacketPtr = audioDataPacket + populatePacketHeader(audioDataPacket, packetType);
// pack sequence number
memcpy(currentPacketPtr, &_outgoingAvatarAudioSequenceNumber, sizeof(quint16));
currentPacketPtr += sizeof(quint16);
// set the mono/stereo byte
*currentPacketPtr++ = isStereo;
if (packetType == PacketTypeSilentAudioFrame) {
// pack num silent samples
quint16 numSilentSamples = numNetworkSamples;
memcpy(currentPacketPtr, &numSilentSamples, sizeof(quint16));
currentPacketPtr += sizeof(quint16);
} else {
// set the mono/stereo byte
*currentPacketPtr++ = isStereo;
// memcpy the three float positions
memcpy(currentPacketPtr, &headPosition, sizeof(headPosition));
currentPacketPtr += (sizeof(headPosition));
// memcpy the three float positions
memcpy(currentPacketPtr, &headPosition, sizeof(headPosition));
currentPacketPtr += (sizeof(headPosition));
// memcpy our orientation
memcpy(currentPacketPtr, &headOrientation, sizeof(headOrientation));
currentPacketPtr += sizeof(headOrientation);
nodeList->writeDatagram(audioDataPacket, numAudioBytes + leadingBytes, audioMixer);
// memcpy our orientation
memcpy(currentPacketPtr, &headOrientation, sizeof(headOrientation));
currentPacketPtr += sizeof(headOrientation);
// audio samples have already been packed (written to networkAudioSamples)
currentPacketPtr += numNetworkBytes;
}
int packetBytes = currentPacketPtr - audioDataPacket;
nodeList->writeDatagram(audioDataPacket, packetBytes, audioMixer);
_outgoingAvatarAudioSequenceNumber++;
Application::getInstance()->getBandwidthMeter()->outputStream(BandwidthMeter::AUDIO)
.updateValue(numAudioBytes + leadingBytes);
.updateValue(packetBytes);
}
delete[] inputAudioSamples;
}

View file

@ -67,34 +67,52 @@ int InboundAudioStream::parseData(const QByteArray& packet) {
// parse header
int numBytesHeader = numBytesForPacketHeader(packet);
const char* sequenceAt = packet.constData() + numBytesHeader;
const char* dataAt = packet.constData() + numBytesHeader;
int readBytes = numBytesHeader;
// parse sequence number and track it
quint16 sequence = *(reinterpret_cast<const quint16*>(sequenceAt));
quint16 sequence = *(reinterpret_cast<const quint16*>(dataAt));
dataAt += sizeof(quint16);
readBytes += sizeof(quint16);
SequenceNumberStats::ArrivalInfo arrivalInfo = frameReceivedUpdateNetworkStats(sequence, senderUUID);
// TODO: handle generalized silent packet here?????
// parse the info after the seq number and before the audio data.(the stream properties)
int numAudioSamples;
readBytes += parseStreamProperties(packetType, packet.mid(readBytes), numAudioSamples);
if (packetType == PacketTypeSilentAudioFrame) {
// this is a general silent packet; parse the number of silent samples
quint16 numSilentSamples = *(reinterpret_cast<const quint16*>(dataAt));
dataAt += sizeof(quint16);
readBytes += sizeof(quint16);
numAudioSamples = numSilentSamples;
} else {
// parse the info after the seq number and before the audio data (the stream properties)
readBytes += parseStreamProperties(packetType, packet.mid(readBytes), numAudioSamples);
}
// handle this packet based on its arrival status.
// For now, late packets are ignored. It may be good in the future to insert the late audio frame
// into the ring buffer to fill in the missing frame if it hasn't been mixed yet.
switch (arrivalInfo._status) {
case SequenceNumberStats::Early: {
// Packet is early; write droppable silent samples for each of the skipped packets.
// NOTE: we assume that each dropped packet contains the same number of samples
// as the packet we just received.
int packetsDropped = arrivalInfo._seqDiffFromExpected;
writeSamplesForDroppedPackets(packetsDropped * numAudioSamples);
// fall through to OnTime case
}
case SequenceNumberStats::OnTime: {
readBytes += parseAudioData(packetType, packet.mid(readBytes), numAudioSamples);
// Packet is on time; parse its data to the ringbuffer
if (packetType == PacketTypeSilentAudioFrame) {
writeDroppableSilentSamples(numAudioSamples);
} else {
readBytes += parseAudioData(packetType, packet.mid(readBytes), numAudioSamples);
}
break;
}
default: {
// For now, late packets are ignored. It may be good in the future to insert the late audio packet data
// into the ring buffer to fill in the missing frame if it hasn't been mixed yet.
break;
}
}
@ -108,6 +126,10 @@ int InboundAudioStream::parseData(const QByteArray& packet) {
return readBytes;
}
int InboundAudioStream::parseAudioData(PacketType type, const QByteArray& packetAfterStreamProperties, int numAudioSamples) {
return _ringBuffer.writeData(packetAfterStreamProperties.data(), numAudioSamples * sizeof(int16_t));
}
bool InboundAudioStream::popFrames(int numFrames, bool starveOnFail) {
int numSamplesRequested = numFrames * _ringBuffer.getNumFrameSamples();
if (_isStarved) {
@ -119,6 +141,8 @@ bool InboundAudioStream::popFrames(int numFrames, bool starveOnFail) {
// we have enough samples to pop, so we're good to mix
_lastPopOutput = _ringBuffer.nextOutput();
_ringBuffer.shiftReadPosition(numSamplesRequested);
_framesAvailableStats.update(_ringBuffer.framesAvailable());
_hasStarted = true;
_lastPopSucceeded = true;
@ -132,6 +156,7 @@ bool InboundAudioStream::popFrames(int numFrames, bool starveOnFail) {
_lastPopSucceeded = false;
}
}
return _lastPopSucceeded;
}
@ -145,6 +170,8 @@ void InboundAudioStream::starved() {
_isStarved = true;
_consecutiveNotMixedCount = 0;
_starveCount++;
_framesAvailableStats.reset();
}
void InboundAudioStream::overrideDesiredJitterBufferFramesTo(int desired) {

View file

@ -113,7 +113,7 @@ protected:
virtual int parseStreamProperties(PacketType type, const QByteArray& packetAfterSeqNum, int& numAudioSamples) = 0;
/// parses the audio data in the network packet
virtual int parseAudioData(PacketType type, const QByteArray& packetAfterStreamProperties, int numAudioSamples) = 0;
virtual int parseAudioData(PacketType type, const QByteArray& packetAfterStreamProperties, int numAudioSamples);
int writeDroppableSilentSamples(int numSilentSamples);

View file

@ -58,10 +58,6 @@ int InjectedAudioStream::parseStreamProperties(PacketType type, const QByteArray
return packetStream.device()->pos();
}
int InjectedAudioStream::parseAudioData(PacketType type, const QByteArray& packetAfterStreamProperties, int numAudioSamples) {
return _ringBuffer.writeData(packetAfterStreamProperties.data(), numAudioSamples * sizeof(int16_t));
}
AudioStreamStats InjectedAudioStream::getAudioStreamStats() const {
AudioStreamStats streamStats = PositionalAudioStream::getAudioStreamStats();
streamStats._streamIdentifier = _streamIdentifier;

View file

@ -32,7 +32,6 @@ private:
AudioStreamStats getAudioStreamStats() const;
int parseStreamProperties(PacketType type, const QByteArray& packetAfterSeqNum, int& numAudioSamples);
int parseAudioData(PacketType type, const QByteArray& packetAfterStreamProperties, int numAudioSamples);
const QUuid _streamIdentifier;
float _radius;

View file

@ -50,13 +50,6 @@ protected:
PositionalAudioStream(const PositionalAudioStream&);
PositionalAudioStream& operator= (const PositionalAudioStream&);
/// parses the info between the seq num and the audio data in the network packet and calculates
/// how many audio samples this packet contains
virtual int parseStreamProperties(PacketType type, const QByteArray& packetAfterSeqNum, int& numAudioSamples) = 0;
/// parses the audio data in the network packet
virtual int parseAudioData(PacketType type, const QByteArray& packetAfterStreamProperties, int numAudioSamples) = 0;
int parsePositionalData(const QByteArray& positionalByteArray);
protected:

View file

@ -49,8 +49,9 @@ PacketVersion versionForPacketType(PacketType type) {
switch (type) {
case PacketTypeMicrophoneAudioNoEcho:
case PacketTypeMicrophoneAudioWithEcho:
case PacketTypeSilentAudioFrame:
return 2;
case PacketTypeSilentAudioFrame:
return 3;
case PacketTypeMixedAudio:
return 1;
case PacketTypeAvatarData:

View file

@ -490,14 +490,6 @@ void ScriptEngine::run() {
// pack a placeholder value for sequence number for now, will be packed when destination node is known
int numPreSequenceNumberBytes = audioPacket.size();
packetStream << (quint16) 0;
// assume scripted avatar audio is mono and set channel flag to zero
packetStream << (quint8) 0;
// use the orientation and position of this avatar for the source of this audio
packetStream.writeRawData(reinterpret_cast<const char*>(&_avatarData->getPosition()), sizeof(glm::vec3));
glm::quat headOrientation = _avatarData->getHeadOrientation();
packetStream.writeRawData(reinterpret_cast<const char*>(&headOrientation), sizeof(glm::quat));
if (silentFrame) {
if (!_isListeningToAudioStream) {
@ -507,12 +499,20 @@ void ScriptEngine::run() {
// write the number of silent samples so the audio-mixer can uphold timing
packetStream.writeRawData(reinterpret_cast<const char*>(&SCRIPT_AUDIO_BUFFER_SAMPLES), sizeof(int16_t));
} else if (nextSoundOutput) {
// write the raw audio data
packetStream.writeRawData(reinterpret_cast<const char*>(nextSoundOutput),
numAvailableSamples * sizeof(int16_t));
}
} else if (nextSoundOutput) {
// assume scripted avatar audio is mono and set channel flag to zero
packetStream << (quint8)0;
// use the orientation and position of this avatar for the source of this audio
packetStream.writeRawData(reinterpret_cast<const char*>(&_avatarData->getPosition()), sizeof(glm::vec3));
glm::quat headOrientation = _avatarData->getHeadOrientation();
packetStream.writeRawData(reinterpret_cast<const char*>(&headOrientation), sizeof(glm::quat));
// write the raw audio data
packetStream.writeRawData(reinterpret_cast<const char*>(nextSoundOutput), numAvailableSamples * sizeof(int16_t));
}
// write audio packet to AudioMixer nodes
NodeList* nodeList = NodeList::getInstance();
foreach(const SharedNodePointer& node, nodeList->getNodeHash()) {