overte-JulianGro/interface/src/Audio.cpp
2013-05-27 18:54:44 -07:00

559 lines
22 KiB
C++

//
// Audio.cpp
// interface
//
// Created by Stephen Birarda on 1/22/13.
// Copyright (c) 2013 High Fidelity, Inc. All rights reserved.
//
#ifndef _WIN32
#include <iostream>
#include <fstream>
#include <pthread.h>
#include <sys/stat.h>
#include <cstring>
#include <StdDev.h>
#include <UDPSocket.h>
#include <SharedUtil.h>
#include <PacketHeaders.h>
#include <AgentList.h>
#include <AgentTypes.h>
#include "Application.h"
#include "Audio.h"
#include "Util.h"
#include "Log.h"
const int NUM_AUDIO_CHANNELS = 2;
const int PACKET_LENGTH_BYTES = 1024;
const int PACKET_LENGTH_BYTES_PER_CHANNEL = PACKET_LENGTH_BYTES / 2;
const int PACKET_LENGTH_SAMPLES = PACKET_LENGTH_BYTES / sizeof(int16_t);
const int PACKET_LENGTH_SAMPLES_PER_CHANNEL = PACKET_LENGTH_SAMPLES / 2;
const int BUFFER_LENGTH_BYTES = 512;
const int BUFFER_LENGTH_SAMPLES = BUFFER_LENGTH_BYTES / sizeof(int16_t);
const int RING_BUFFER_FRAMES = 10;
const int RING_BUFFER_SAMPLES = RING_BUFFER_FRAMES * BUFFER_LENGTH_SAMPLES;
const int PHASE_DELAY_AT_90 = 20;
const float AMPLITUDE_RATIO_AT_90 = 0.5;
const int MIN_FLANGE_EFFECT_THRESHOLD = 600;
const int MAX_FLANGE_EFFECT_THRESHOLD = 1500;
const float FLANGE_BASE_RATE = 4;
const float MAX_FLANGE_SAMPLE_WEIGHT = 0.50;
const float MIN_FLANGE_INTENSITY = 0.25;
const int SAMPLE_RATE = 22050;
const float JITTER_BUFFER_LENGTH_MSECS = 12;
const short JITTER_BUFFER_SAMPLES = JITTER_BUFFER_LENGTH_MSECS *
NUM_AUDIO_CHANNELS * (SAMPLE_RATE / 1000.0);
const float AUDIO_CALLBACK_MSECS = (float)BUFFER_LENGTH_SAMPLES / (float)SAMPLE_RATE * 1000.0;
const int AGENT_LOOPBACK_MODIFIER = 307;
int numStarves = 0;
StDev stdev;
int samplesLeftForFlange = 0;
int lastYawMeasuredMaximum = 0;
float flangeIntensity = 0;
float flangeRate = 0;
float flangeWeight = 0;
float usecsAtStartup = 0;
// inputBuffer A pointer to an internal portaudio data buffer containing data read by portaudio.
// outputBuffer A pointer to an internal portaudio data buffer to be read by the configured output device.
// frames Number of frames that portaudio requests to be read/written.
// timeInfo Portaudio time info. Currently unused.
// statusFlags Portaudio status flags. Currently unused.
// userData Pointer to supplied user data (in this case, a pointer to the parent Audio object
int audioCallback (const void* inputBuffer,
void* outputBuffer,
unsigned long frames,
const PaStreamCallbackTimeInfo *timeInfo,
PaStreamCallbackFlags statusFlags,
void* userData) {
Audio* parentAudio = (Audio*) userData;
AgentList* agentList = AgentList::getInstance();
Application* interface = Application::getInstance();
Avatar* interfaceAvatar = interface->getAvatar();
int16_t* inputLeft = ((int16_t**) inputBuffer)[0];
int16_t* outputLeft = ((int16_t**) outputBuffer)[0];
int16_t* outputRight = ((int16_t**) outputBuffer)[1];
// Add Procedural effects to input samples
parentAudio->addProceduralSounds(inputLeft, BUFFER_LENGTH_SAMPLES);
// add output (@speakers) data to the scope
parentAudio->_scope->addSamples(1, outputLeft, PACKET_LENGTH_SAMPLES_PER_CHANNEL);
parentAudio->_scope->addSamples(2, outputRight, PACKET_LENGTH_SAMPLES_PER_CHANNEL);
// if needed, add input/output data to echo analysis buffers
if (parentAudio->_echoInputFrameCountdown > 0) {
if (--parentAudio->_echoInputFrameCountdown == 0) {
memcpy(parentAudio->_echoInputSamples, inputLeft,
PACKET_LENGTH_SAMPLES_PER_CHANNEL * sizeof(int16_t));
parentAudio->_echoInputFrameCountdown = 0;
printLog("got input\n");
}
}
if (parentAudio->_isGatheringEchoOutputFrames) {
memcpy(parentAudio->_echoOutputSamples, outputLeft,
PACKET_LENGTH_SAMPLES_PER_CHANNEL * sizeof(int16_t));
parentAudio->_isGatheringEchoOutputFrames = false;
parentAudio->_echoInputFrameCountdown = 2;
printLog("got output\n");
}
if (inputLeft != NULL) {
// Measure the loudness of the signal from the microphone and store in audio object
float loudness = 0;
for (int i = 0; i < BUFFER_LENGTH_SAMPLES; i++) {
loudness += abs(inputLeft[i]);
}
loudness /= BUFFER_LENGTH_SAMPLES;
parentAudio->_lastInputLoudness = loudness;
// add input (@microphone) data to the scope
parentAudio->_scope->addSamples(0, inputLeft, BUFFER_LENGTH_SAMPLES);
Agent* audioMixer = agentList->soloAgentOfType(AGENT_TYPE_AUDIO_MIXER);
if (audioMixer) {
int leadingBytes = 2 + (sizeof(float) * 4);
// we need the amount of bytes in the buffer + 1 for type
// + 12 for 3 floats for position + float for bearing + 1 attenuation byte
unsigned char dataPacket[BUFFER_LENGTH_BYTES + leadingBytes];
dataPacket[0] = PACKET_HEADER_MICROPHONE_AUDIO;
unsigned char *currentPacketPtr = dataPacket + 1;
// memcpy the three float positions
memcpy(currentPacketPtr, &interfaceAvatar->getHeadPosition(), sizeof(float) * 3);
currentPacketPtr += (sizeof(float) * 3);
// tell the mixer not to add additional attenuation to our source
*(currentPacketPtr++) = 255;
// memcpy the corrected render yaw
float correctedYaw = fmodf(-1 * interfaceAvatar->getAbsoluteHeadYaw(), 360);
if (correctedYaw > 180) {
correctedYaw -= 360;
} else if (correctedYaw < -180) {
correctedYaw += 360;
}
if (parentAudio->_mixerLoopbackFlag) {
correctedYaw = correctedYaw > 0
? correctedYaw + AGENT_LOOPBACK_MODIFIER
: correctedYaw - AGENT_LOOPBACK_MODIFIER;
}
memcpy(currentPacketPtr, &correctedYaw, sizeof(float));
currentPacketPtr += sizeof(float);
// copy the audio data to the last BUFFER_LENGTH_BYTES bytes of the data packet
memcpy(currentPacketPtr, inputLeft, BUFFER_LENGTH_BYTES);
agentList->getAgentSocket()->send(audioMixer->getActiveSocket(), dataPacket, BUFFER_LENGTH_BYTES + leadingBytes);
}
}
memset(outputLeft, 0, PACKET_LENGTH_BYTES_PER_CHANNEL);
memset(outputRight, 0, PACKET_LENGTH_BYTES_PER_CHANNEL);
AudioRingBuffer* ringBuffer = &parentAudio->_ringBuffer;
// if we've been reset, and there isn't any new packets yet
// just play some silence
if (ringBuffer->getEndOfLastWrite() != NULL) {
if (!ringBuffer->isStarted() && ringBuffer->diffLastWriteNextOutput() < PACKET_LENGTH_SAMPLES + JITTER_BUFFER_SAMPLES) {
//printLog("Held back, buffer has %d of %d samples required.\n",
// ringBuffer->diffLastWriteNextOutput(), PACKET_LENGTH_SAMPLES + JITTER_BUFFER_SAMPLES);
} else if (ringBuffer->diffLastWriteNextOutput() < PACKET_LENGTH_SAMPLES) {
ringBuffer->setStarted(false);
::numStarves++;
parentAudio->_packetsReceivedThisPlayback = 0;
// printLog("Starved #%d\n", starve_counter);
parentAudio->_wasStarved = 10; // Frames to render the indication that the system was starved.
} else {
if (!ringBuffer->isStarted()) {
ringBuffer->setStarted(true);
// printLog("starting playback %3.1f msecs delayed \n", (usecTimestampNow() - usecTimestamp(&firstPlaybackTimer))/1000.0);
} else {
// printLog("pushing buffer\n");
}
// play whatever we have in the audio buffer
// if we haven't fired off the flange effect, check if we should
// TODO: lastMeasuredHeadYaw is now relative to body - check if this still works.
int lastYawMeasured = fabsf(interfaceAvatar->getHeadYawRate());
if (!::samplesLeftForFlange && lastYawMeasured > MIN_FLANGE_EFFECT_THRESHOLD) {
// we should flange for one second
if ((::lastYawMeasuredMaximum = std::max(::lastYawMeasuredMaximum, lastYawMeasured)) != lastYawMeasured) {
::lastYawMeasuredMaximum = std::min(::lastYawMeasuredMaximum, MIN_FLANGE_EFFECT_THRESHOLD);
::samplesLeftForFlange = SAMPLE_RATE;
::flangeIntensity = MIN_FLANGE_INTENSITY +
((::lastYawMeasuredMaximum - MIN_FLANGE_EFFECT_THRESHOLD) /
(float)(MAX_FLANGE_EFFECT_THRESHOLD - MIN_FLANGE_EFFECT_THRESHOLD)) *
(1 - MIN_FLANGE_INTENSITY);
::flangeRate = FLANGE_BASE_RATE * ::flangeIntensity;
::flangeWeight = MAX_FLANGE_SAMPLE_WEIGHT * ::flangeIntensity;
}
}
for (int s = 0; s < PACKET_LENGTH_SAMPLES_PER_CHANNEL; s++) {
int leftSample = ringBuffer->getNextOutput()[s];
int rightSample = ringBuffer->getNextOutput()[s + PACKET_LENGTH_SAMPLES_PER_CHANNEL];
if (::samplesLeftForFlange > 0) {
float exponent = (SAMPLE_RATE - ::samplesLeftForFlange - (SAMPLE_RATE / ::flangeRate)) /
(SAMPLE_RATE / ::flangeRate);
int sampleFlangeDelay = (SAMPLE_RATE / (1000 * ::flangeIntensity)) * powf(2, exponent);
if (::samplesLeftForFlange != SAMPLE_RATE || s >= (SAMPLE_RATE / 2000)) {
// we have a delayed sample to add to this sample
int16_t *flangeFrame = ringBuffer->getNextOutput();
int flangeIndex = s - sampleFlangeDelay;
if (flangeIndex < 0) {
// we need to grab the flange sample from earlier in the buffer
flangeFrame = ringBuffer->getNextOutput() != ringBuffer->getBuffer()
? ringBuffer->getNextOutput() - PACKET_LENGTH_SAMPLES
: ringBuffer->getNextOutput() + RING_BUFFER_SAMPLES - PACKET_LENGTH_SAMPLES;
flangeIndex = PACKET_LENGTH_SAMPLES_PER_CHANNEL + (s - sampleFlangeDelay);
}
int16_t leftFlangeSample = flangeFrame[flangeIndex];
int16_t rightFlangeSample = flangeFrame[flangeIndex + PACKET_LENGTH_SAMPLES_PER_CHANNEL];
leftSample = (1 - ::flangeWeight) * leftSample + (::flangeWeight * leftFlangeSample);
rightSample = (1 - ::flangeWeight) * rightSample + (::flangeWeight * rightFlangeSample);
::samplesLeftForFlange--;
if (::samplesLeftForFlange == 0) {
::lastYawMeasuredMaximum = 0;
}
}
}
outputLeft[s] = leftSample;
outputRight[s] = rightSample;
}
ringBuffer->setNextOutput(ringBuffer->getNextOutput() + PACKET_LENGTH_SAMPLES);
if (ringBuffer->getNextOutput() == ringBuffer->getBuffer() + RING_BUFFER_SAMPLES) {
ringBuffer->setNextOutput(ringBuffer->getBuffer());
}
}
}
if (parentAudio->_isSendingEchoPing) {
const float PING_PITCH = 4.f;
const float PING_VOLUME = 32000.f;
for (int s = 0; s < PACKET_LENGTH_SAMPLES_PER_CHANNEL; s++) {
outputLeft[s] = outputRight[s] = (int16_t)(sinf((float) s / PING_PITCH) * PING_VOLUME);
}
printLog("Send echo ping\n");
parentAudio->_isSendingEchoPing = false;
parentAudio->_isGatheringEchoOutputFrames = true;
}
gettimeofday(&parentAudio->_lastCallbackTime, NULL);
return paContinue;
}
void outputPortAudioError(PaError error) {
if (error != paNoError) {
printLog("-- portaudio termination error --\n");
printLog("PortAudio error (%d): %s\n", error, Pa_GetErrorText(error));
}
}
Audio::Audio(Oscilloscope* scope) :
_stream(NULL),
_ringBuffer(RING_BUFFER_SAMPLES, PACKET_LENGTH_SAMPLES),
_scope(scope),
_averagedLatency(0.0),
_measuredJitter(0),
_jitterBufferLengthMsecs(12.0),
_jitterBufferSamples(_jitterBufferLengthMsecs *
NUM_AUDIO_CHANNELS * (SAMPLE_RATE / 1000.0)),
_wasStarved(0),
_lastInputLoudness(0),
_mixerLoopbackFlag(false),
_lastVelocity(0),
_lastAcceleration(0),
_totalPacketsReceived(0),
_firstPlaybackTime(),
_packetsReceivedThisPlayback(0),
_shouldStartEcho(false),
_isSendingEchoPing(false),
_echoInputFrameCountdown(0),
_isGatheringEchoOutputFrames(false)
{
outputPortAudioError(Pa_Initialize());
outputPortAudioError(Pa_OpenDefaultStream(&_stream,
2,
2,
(paInt16 | paNonInterleaved),
SAMPLE_RATE,
BUFFER_LENGTH_SAMPLES,
audioCallback,
(void*) this));
// start the stream now that sources are good to go
outputPortAudioError(Pa_StartStream(_stream));
_echoInputSamples = new int16_t[BUFFER_LENGTH_BYTES];
_echoOutputSamples = new int16_t[BUFFER_LENGTH_BYTES];
memset(_echoInputSamples, 0, BUFFER_LENGTH_SAMPLES * sizeof(int));
memset(_echoOutputSamples, 0, BUFFER_LENGTH_SAMPLES * sizeof(int));
gettimeofday(&_lastReceiveTime, NULL);
}
Audio::~Audio() {
if (_stream) {
outputPortAudioError(Pa_CloseStream(_stream));
outputPortAudioError(Pa_Terminate());
}
}
void Audio::renderEchoCompare() {
const int XPOS = 0;
const int YPOS = 500;
const int YSCALE = 500;
const int XSCALE = 2;
glPointSize(1.0);
glLineWidth(1.0);
glDisable(GL_LINE_SMOOTH);
glColor3f(1,1,1);
glBegin(GL_LINE_STRIP);
for (int i = 0; i < BUFFER_LENGTH_SAMPLES; i++) {
glVertex2f(XPOS + i * XSCALE, YPOS + _echoInputSamples[i]/YSCALE);
}
glEnd();
glColor3f(0,1,1);
glBegin(GL_LINE_STRIP);
for (int i = 0; i < BUFFER_LENGTH_SAMPLES; i++) {
glVertex2f(XPOS + i * XSCALE, YPOS + _echoOutputSamples[i]/YSCALE);
}
glEnd();
}
// Take a pointer to the acquired microphone input samples and add procedural sounds
void Audio::addProceduralSounds(int16_t* inputBuffer, int numSamples) {
const float MAX_AUDIBLE_VELOCITY = 6.0;
const float MIN_AUDIBLE_VELOCITY = 0.1;
const int VOLUME_BASELINE = 400;
const float SOUND_PITCH = 8.f;
float speed = glm::length(_lastVelocity);
float volume = VOLUME_BASELINE * (1.f - speed / MAX_AUDIBLE_VELOCITY);
// Add a noise-modulated sinewave with volume that tapers off with speed increasing
if ((speed > MIN_AUDIBLE_VELOCITY) && (speed < MAX_AUDIBLE_VELOCITY)) {
for (int i = 0; i < numSamples; i++) {
inputBuffer[i] += (int16_t)((sinf((float) i / SOUND_PITCH * speed) * randFloat()) * volume * speed);
}
}
}
void Audio::startEchoTest() {
_shouldStartEcho = true;
_isSendingEchoPing = true;
_isGatheringEchoOutputFrames = false;
}
void Audio::analyzeEcho(int16_t* inputBuffer, int16_t* outputBuffer, int numSamples) {
// Compare output and input streams, looking for evidence of correlation needing echo cancellation
//
// OFFSET_RANGE tells us how many samples to vary the analysis window when looking for correlation,
// and should be equal to the largest physical distance between speaker and microphone, where
// OFFSET_RANGE = 1 / (speedOfSound (meters / sec) / SamplingRate (samples / sec)) * distance
//
const int OFFSET_RANGE = 10;
const int SIGNAL_FLOOR = 1000;
float correlation[2 * OFFSET_RANGE + 1];
int numChecked = 0;
bool foundSignal = false;
memset(correlation, 0, sizeof(float) * (2 * OFFSET_RANGE + 1));
for (int offset = -OFFSET_RANGE; offset <= OFFSET_RANGE; offset++) {
for (int i = 0; i < numSamples; i++) {
if ((i + offset >= 0) && (i + offset < numSamples)) {
correlation[offset + OFFSET_RANGE] +=
(float) abs(inputBuffer[i] - outputBuffer[i + offset]);
numChecked++;
foundSignal |= (inputBuffer[i] > SIGNAL_FLOOR);
}
}
correlation[offset + OFFSET_RANGE] /= numChecked;
numChecked = 0;
if (foundSignal) {
printLog("%4.2f, ", correlation[offset + OFFSET_RANGE]);
}
}
if (foundSignal) printLog("\n");
}
void Audio::addReceivedAudioToBuffer(unsigned char* receivedData, int receivedBytes) {
const int NUM_INITIAL_PACKETS_DISCARD = 3;
timeval currentReceiveTime;
gettimeofday(&currentReceiveTime, NULL);
_totalPacketsReceived++;
double timeDiff = diffclock(&_lastReceiveTime, &currentReceiveTime);
// Discard first few received packets for computing jitter (often they pile up on start)
if (_totalPacketsReceived > NUM_INITIAL_PACKETS_DISCARD) {
::stdev.addValue(timeDiff);
}
if (::stdev.getSamples() > 500) {
_measuredJitter = ::stdev.getStDev();
::stdev.reset();
}
if (!_ringBuffer.isStarted()) {
_packetsReceivedThisPlayback++;
}
if (_packetsReceivedThisPlayback == 1) {
gettimeofday(&_firstPlaybackTime, NULL);
}
_ringBuffer.parseData((unsigned char *)receivedData, PACKET_LENGTH_BYTES);
_lastReceiveTime = currentReceiveTime;
}
void Audio::render(int screenWidth, int screenHeight) {
if (_stream) {
glLineWidth(2.0);
glBegin(GL_LINES);
glColor3f(1,1,1);
int startX = 20.0;
int currentX = startX;
int topY = screenHeight - 40;
int bottomY = screenHeight - 20;
float frameWidth = 20.0;
float halfY = topY + ((bottomY - topY) / 2.0);
// draw the lines for the base of the ring buffer
glVertex2f(currentX, topY);
glVertex2f(currentX, bottomY);
for (int i = 0; i < RING_BUFFER_FRAMES; i++) {
glVertex2f(currentX, halfY);
glVertex2f(currentX + frameWidth, halfY);
currentX += frameWidth;
glVertex2f(currentX, topY);
glVertex2f(currentX, bottomY);
}
glEnd();
// Show a bar with the amount of audio remaining in ring buffer beyond current playback
float remainingBuffer = 0;
timeval currentTime;
gettimeofday(&currentTime, NULL);
float timeLeftInCurrentBuffer = 0;
if (_lastCallbackTime.tv_usec > 0) {
timeLeftInCurrentBuffer = AUDIO_CALLBACK_MSECS - diffclock(&_lastCallbackTime, &currentTime);
}
if (_ringBuffer.getEndOfLastWrite() != NULL)
remainingBuffer = _ringBuffer.diffLastWriteNextOutput() / PACKET_LENGTH_SAMPLES * AUDIO_CALLBACK_MSECS;
if (_wasStarved == 0) {
glColor3f(0, 1, 0);
} else {
glColor3f(0.5 + (_wasStarved / 20.0f), 0, 0);
_wasStarved--;
}
glBegin(GL_QUADS);
glVertex2f(startX, topY + 2);
glVertex2f(startX + (remainingBuffer + timeLeftInCurrentBuffer)/AUDIO_CALLBACK_MSECS*frameWidth, topY + 2);
glVertex2f(startX + (remainingBuffer + timeLeftInCurrentBuffer)/AUDIO_CALLBACK_MSECS*frameWidth, bottomY - 2);
glVertex2f(startX, bottomY - 2);
glEnd();
if (_averagedLatency == 0.0) {
_averagedLatency = remainingBuffer + timeLeftInCurrentBuffer;
} else {
_averagedLatency = 0.99f * _averagedLatency + 0.01f * (remainingBuffer + timeLeftInCurrentBuffer);
}
// Show a yellow bar with the averaged msecs latency you are hearing (from time of packet receipt)
glColor3f(1,1,0);
glBegin(GL_QUADS);
glVertex2f(startX + _averagedLatency / AUDIO_CALLBACK_MSECS * frameWidth - 2, topY - 2);
glVertex2f(startX + _averagedLatency / AUDIO_CALLBACK_MSECS * frameWidth + 2, topY - 2);
glVertex2f(startX + _averagedLatency / AUDIO_CALLBACK_MSECS * frameWidth + 2, bottomY + 2);
glVertex2f(startX + _averagedLatency / AUDIO_CALLBACK_MSECS * frameWidth - 2, bottomY + 2);
glEnd();
char out[40];
sprintf(out, "%3.0f\n", _averagedLatency);
drawtext(startX + _averagedLatency / AUDIO_CALLBACK_MSECS * frameWidth - 10, topY - 10, 0.10, 0, 1, 0, out, 1,1,0);
//drawtext(startX + 0, topY-10, 0.08, 0, 1, 0, out, 1,1,0);
// Show a Cyan bar with the most recently measured jitter stdev
int jitterPels = _measuredJitter / ((1000.0f * PACKET_LENGTH_SAMPLES / SAMPLE_RATE)) * frameWidth;
glColor3f(0,1,1);
glBegin(GL_QUADS);
glVertex2f(startX + jitterPels - 2, topY - 2);
glVertex2f(startX + jitterPels + 2, topY - 2);
glVertex2f(startX + jitterPels + 2, bottomY + 2);
glVertex2f(startX + jitterPels - 2, bottomY + 2);
glEnd();
sprintf(out,"%3.1f\n", _measuredJitter);
drawtext(startX + jitterPels - 5, topY-10, 0.10, 0, 1, 0, out, 0,1,1);
sprintf(out, "%3.1fms\n", JITTER_BUFFER_LENGTH_MSECS);
drawtext(startX - 10, bottomY + 15, 0.1, 0, 1, 0, out, 1, 0, 0);
}
}
#endif