From a8bd627cda1c9b35d5c2c9b1701fc664c64293c1 Mon Sep 17 00:00:00 2001 From: Dale Glass Date: Tue, 7 Apr 2020 20:53:52 +0200 Subject: [PATCH] Add Prometheus exporter Exports domain statistics for the domain on port 9703 (officially reserved) --- assignment-client/src/assets/AssetServer.cpp | 11 +- assignment-client/src/octree/OctreeServer.cpp | 14 +- assignment-client/src/octree/OctreeServer.h | 2 + .../resources/prometheus_exporter/index.html | 14 + domain-server/src/DomainServer.cpp | 4 +- domain-server/src/DomainServer.h | 5 +- domain-server/src/DomainServerExporter.cpp | 450 ++++++++++++++++++ domain-server/src/DomainServerExporter.h | 55 +++ libraries/networking/src/DomainHandler.h | 17 +- 9 files changed, 561 insertions(+), 11 deletions(-) create mode 100644 domain-server/resources/prometheus_exporter/index.html create mode 100644 domain-server/src/DomainServerExporter.cpp create mode 100644 domain-server/src/DomainServerExporter.h diff --git a/assignment-client/src/assets/AssetServer.cpp b/assignment-client/src/assets/AssetServer.cpp index 502cf15aa2..b3344e3832 100644 --- a/assignment-client/src/assets/AssetServer.cpp +++ b/assignment-client/src/assets/AssetServer.cpp @@ -176,7 +176,7 @@ std::pair AssetServer::getAssetStatus(const A } else if (loaded && meta.failedLastBake) { return { AssetUtils::Error, meta.lastBakeErrors }; } - + return { AssetUtils::Pending, "" }; } @@ -199,7 +199,7 @@ void AssetServer::maybeBake(const AssetUtils::AssetPath& path, const AssetUtils: void AssetServer::createEmptyMetaFile(const AssetUtils::AssetHash& hash) { QString metaFilePath = "atp:/" + hash + "/meta.json"; QFile metaFile { metaFilePath }; - + if (!metaFile.exists()) { qDebug() << "Creating metafile for " << hash; if (metaFile.open(QFile::WriteOnly)) { @@ -285,7 +285,7 @@ void updateConsumedCores() { auto coreCount = std::thread::hardware_concurrency(); if (isInterfaceRunning) { coreCount = coreCount > MIN_CORES_FOR_MULTICORE ? CPU_AFFINITY_COUNT_HIGH : CPU_AFFINITY_COUNT_LOW; - } + } qCDebug(asset_server) << "Setting max consumed cores to " << coreCount; setMaxCores(coreCount); } @@ -931,6 +931,9 @@ void AssetServer::sendStatsPacket() { connectionStats["5. Period (us)"] = stats.packetSendPeriod; connectionStats["6. Up (Mb/s)"] = stats.sentBytes * megabitsPerSecPerByte; connectionStats["7. Down (Mb/s)"] = stats.receivedBytes * megabitsPerSecPerByte; + connectionStats["last_heard_time_msecs"] = date.toUTC().toMSecsSinceEpoch(); + connectionStats["last_heard_ago_msecs"] = date.msecsTo(QDateTime::currentDateTime()); + nodeStats["Connection Stats"] = connectionStats; using Events = udt::ConnectionStats::Stats::Event; @@ -1147,7 +1150,7 @@ bool AssetServer::deleteMappings(const AssetUtils::AssetPathList& paths) { hashesToCheckForDeletion << it->second; qCDebug(asset_server) << "Deleted a mapping:" << path << "=>" << it->second; - + _fileMappings.erase(it); } else { qCDebug(asset_server) << "Unable to delete a mapping that was not found:" << path; diff --git a/assignment-client/src/octree/OctreeServer.cpp b/assignment-client/src/octree/OctreeServer.cpp index 80e0060299..c1cf3d2297 100644 --- a/assignment-client/src/octree/OctreeServer.cpp +++ b/assignment-client/src/octree/OctreeServer.cpp @@ -1197,7 +1197,7 @@ void OctreeServer::domainSettingsRequestComplete() { } else { beginRunning(); } -} +} void OctreeServer::beginRunning() { auto nodeList = DependencyManager::get(); @@ -1344,6 +1344,11 @@ QString OctreeServer::getUptime() { return formattedUptime; } +double OctreeServer::getUptimeSeconds() +{ + return (usecTimestampNow() - _startedUSecs) / 1000000.0; +} + QString OctreeServer::getFileLoadTime() { QString result; if (isInitialLoadComplete()) { @@ -1386,6 +1391,11 @@ QString OctreeServer::getFileLoadTime() { return result; } +double OctreeServer::getFileLoadTimeSeconds() +{ + return getLoadElapsedTime() / 1000000.0; +} + QString OctreeServer::getConfiguration() { QString result; for (int i = 1; i < _argc; i++) { @@ -1421,6 +1431,8 @@ void OctreeServer::sendStatsPacket() { statsArray1["4. persistFileLoadTime"] = getFileLoadTime(); statsArray1["5. clients"] = getCurrentClientCount(); statsArray1["6. threads"] = threadsStats; + statsArray1["uptime_seconds"] = getUptimeSeconds(); + statsArray1["persistFileLoadTime_seconds"] = getFileLoadTimeSeconds(); // Octree Stats QJsonObject octreeStats; diff --git a/assignment-client/src/octree/OctreeServer.h b/assignment-client/src/octree/OctreeServer.h index 07b1e334b1..3ae4dddee9 100644 --- a/assignment-client/src/octree/OctreeServer.h +++ b/assignment-client/src/octree/OctreeServer.h @@ -158,7 +158,9 @@ protected: void initHTTPManager(int port); void resetSendingStats(); QString getUptime(); + double getUptimeSeconds(); QString getFileLoadTime(); + double getFileLoadTimeSeconds(); QString getConfiguration(); QString getStatusLink(); diff --git a/domain-server/resources/prometheus_exporter/index.html b/domain-server/resources/prometheus_exporter/index.html new file mode 100644 index 0000000000..5a23c78858 --- /dev/null +++ b/domain-server/resources/prometheus_exporter/index.html @@ -0,0 +1,14 @@ + + + Vircadia Prometheus exporter + + + +

Vircadia Prometheus exporter

+ +

This is the Prometheus exporter, used to export stats about the domain server for graphing and analysis.

+

+ Metrics +

+ + diff --git a/domain-server/src/DomainServer.cpp b/domain-server/src/DomainServer.cpp index 9fea49d2da..9c6361faef 100644 --- a/domain-server/src/DomainServer.cpp +++ b/domain-server/src/DomainServer.cpp @@ -163,7 +163,8 @@ bool DomainServer::forwardMetaverseAPIRequest(HTTPConnection* connection, DomainServer::DomainServer(int argc, char* argv[]) : QCoreApplication(argc, argv), _gatekeeper(this), - _httpManager(QHostAddress::AnyIPv4, DOMAIN_SERVER_HTTP_PORT, QString("%1/resources/web/").arg(QCoreApplication::applicationDirPath()), this) + _httpManager(QHostAddress::AnyIPv4, DOMAIN_SERVER_HTTP_PORT, QString("%1/resources/web/").arg(QCoreApplication::applicationDirPath()), this), + _httpExporterManager(QHostAddress::Any, DOMAIN_SERVER_EXPORTER_PORT, QString("%1/resources/prometheus_exporter/").arg(QCoreApplication::applicationDirPath()), &_exporter) { if (_parentPID != -1) { watchParentProcess(_parentPID); @@ -1977,6 +1978,7 @@ bool DomainServer::handleHTTPRequest(HTTPConnection* connection, const QUrl& url const QString URI_API_BACKUPS_ID = "/api/backups/"; const QString URI_API_BACKUPS_DOWNLOAD_ID = "/api/backups/download/"; const QString URI_API_BACKUPS_RECOVER = "/api/backups/recover/"; + const QString URI_EXPORTER_= "/metrics"; const QString UUID_REGEX_STRING = "[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}"; diff --git a/domain-server/src/DomainServer.h b/domain-server/src/DomainServer.h index 95b4b784cb..f6bb9bc7ae 100644 --- a/domain-server/src/DomainServer.h +++ b/domain-server/src/DomainServer.h @@ -36,6 +36,7 @@ #include "DomainContentBackupManager.h" #include "PendingAssignedNodeData.h" +#include "DomainServerExporter.h" #include @@ -115,7 +116,7 @@ private slots: void sendHeartbeatToIceServer(); void nodePingMonitor(); - void handleConnectedNode(SharedNodePointer newNode, quint64 requestReceiveTime); + void handleConnectedNode(SharedNodePointer newNode, quint64 requestReceiveTime); void handleTempDomainSuccess(QNetworkReply* requestReply); void handleTempDomainError(QNetworkReply* requestReply); @@ -234,8 +235,10 @@ private: std::vector _replicatedUsernames; DomainGatekeeper _gatekeeper; + DomainServerExporter _exporter; HTTPManager _httpManager; + HTTPManager _httpExporterManager; std::unique_ptr _httpsManager; QHash _allAssignments; diff --git a/domain-server/src/DomainServerExporter.cpp b/domain-server/src/DomainServerExporter.cpp new file mode 100644 index 0000000000..3ecccfc8b4 --- /dev/null +++ b/domain-server/src/DomainServerExporter.cpp @@ -0,0 +1,450 @@ +// +// DomainServerExporter.cpp +// domain-server/src +// +// Created by Dale Glass on 3 Apr 2020. +// Copyright 2020 Dale Glass +// +// Prometheus exporter +// +// Distributed under the Apache License, Version 2.0. +// See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html +// + + +// TODO: +// +// Look into the data provided by OctreeServer::handleHTTPRequest in assignment-client/src/octree/OctreeServer.cpp +// Turns out the octree server (entity server) can optionally deliver additional statistics via another HTTP server +// that is disabled by default. This functionality can be enabled by setting statusPort to a port number. +// +// Look into what appears in Audio Mixer -> z_listeners -> jitter -> injectors, so far it's been an empty list. + +#include +#include +#include +#include +#include +#include + +#include "DomainServerExporter.h" +#include "DependencyManager.h" +#include "LimitedNodeList.h" +#include "HTTPConnection.h" +#include "DomainServerNodeData.h" + +Q_LOGGING_CATEGORY(domain_server_exporter, "hifi.domain_server.exporter") + + + +static const QMap TYPE_MAP { + { "asset_server_assignment_stats_num_queued_check_ins" , DomainServerExporter::MetricType::Gauge }, + { "asset_server_connection_stats_cw_p" , DomainServerExporter::MetricType::Gauge }, + { "asset_server_connection_stats_down_mb_s" , DomainServerExporter::MetricType::Gauge }, + { "asset_server_connection_stats_est_max_p_s" , DomainServerExporter::MetricType::Gauge }, + { "asset_server_connection_stats_last_heard_ago_msecs" , DomainServerExporter::MetricType::Gauge }, + { "asset_server_connection_stats_last_heard_time_msecs" , DomainServerExporter::MetricType::Gauge }, + { "asset_server_connection_stats_period_us" , DomainServerExporter::MetricType::Gauge }, + { "asset_server_connection_stats_rtt_ms" , DomainServerExporter::MetricType::Gauge }, + { "asset_server_connection_stats_up_mb_s" , DomainServerExporter::MetricType::Gauge }, + { "asset_server_downstream_stats_duplicates" , DomainServerExporter::MetricType::Counter }, + { "asset_server_downstream_stats_recvd_packets" , DomainServerExporter::MetricType::Counter }, + { "asset_server_downstream_stats_recvd_p_s" , DomainServerExporter::MetricType::Gauge }, + { "asset_server_downstream_stats_sent_ack" , DomainServerExporter::MetricType::Counter }, + { "asset_server_io_stats_inbound_kbps" , DomainServerExporter::MetricType::Gauge }, + { "asset_server_io_stats_inbound_pps" , DomainServerExporter::MetricType::Gauge }, + { "asset_server_io_stats_outbound_kbps" , DomainServerExporter::MetricType::Gauge }, + { "asset_server_io_stats_outbound_pps" , DomainServerExporter::MetricType::Gauge }, + { "asset_server_upstream_stats_procd_ack" , DomainServerExporter::MetricType::Counter }, + { "asset_server_upstream_stats_recvd_ack" , DomainServerExporter::MetricType::Counter }, + { "asset_server_upstream_stats_retransmitted" , DomainServerExporter::MetricType::Counter }, + { "asset_server_upstream_stats_sent_packets" , DomainServerExporter::MetricType::Counter }, + { "asset_server_upstream_stats_sent_p_s" , DomainServerExporter::MetricType::Gauge }, + { "audio_mixer_assignment_stats_num_queued_check_ins" , DomainServerExporter::MetricType::Gauge }, + { "audio_mixer_avg_listeners_per_frame" , DomainServerExporter::MetricType::Gauge }, + { "audio_mixer_avg_listeners_silent_per_frame" , DomainServerExporter::MetricType::Gauge }, + { "audio_mixer_avg_streams_per_frame" , DomainServerExporter::MetricType::Gauge }, + { "audio_mixer_avg_timing_stats_us_per_check_time" , DomainServerExporter::MetricType::Gauge }, + { "audio_mixer_avg_timing_stats_us_per_check_time_trailing" , DomainServerExporter::MetricType::Gauge }, + { "audio_mixer_avg_timing_stats_us_per_events" , DomainServerExporter::MetricType::Gauge }, + { "audio_mixer_avg_timing_stats_us_per_events_trailing" , DomainServerExporter::MetricType::Gauge }, + { "audio_mixer_avg_timing_stats_us_per_frame" , DomainServerExporter::MetricType::Gauge }, + { "audio_mixer_avg_timing_stats_us_per_frame_trailing" , DomainServerExporter::MetricType::Gauge }, + { "audio_mixer_avg_timing_stats_us_per_mix" , DomainServerExporter::MetricType::Gauge }, + { "audio_mixer_avg_timing_stats_us_per_mix_trailing" , DomainServerExporter::MetricType::Gauge }, + { "audio_mixer_avg_timing_stats_us_per_packets" , DomainServerExporter::MetricType::Gauge }, + { "audio_mixer_avg_timing_stats_us_per_packets_trailing" , DomainServerExporter::MetricType::Gauge }, + { "audio_mixer_avg_timing_stats_us_per_sleep" , DomainServerExporter::MetricType::Gauge }, + { "audio_mixer_avg_timing_stats_us_per_sleep_trailing" , DomainServerExporter::MetricType::Gauge }, + { "audio_mixer_avg_timing_stats_us_per_tic" , DomainServerExporter::MetricType::Gauge }, + { "audio_mixer_avg_timing_stats_us_per_tic_trailing" , DomainServerExporter::MetricType::Gauge }, + { "audio_mixer_io_stats_inbound_kbps" , DomainServerExporter::MetricType::Gauge }, + { "audio_mixer_io_stats_inbound_pps" , DomainServerExporter::MetricType::Gauge }, + { "audio_mixer_io_stats_outbound_kbps" , DomainServerExporter::MetricType::Gauge }, + { "audio_mixer_io_stats_outbound_pps" , DomainServerExporter::MetricType::Gauge }, + { "audio_mixer_listeners_jitter_downstream_available" , DomainServerExporter::MetricType::Gauge }, + { "audio_mixer_listeners_jitter_downstream_available_avg_10s" , DomainServerExporter::MetricType::Gauge }, + { "audio_mixer_listeners_jitter_downstream_desired" , DomainServerExporter::MetricType::Gauge }, + { "audio_mixer_listeners_jitter_downstream_lost_percent" , DomainServerExporter::MetricType::Gauge }, + { "audio_mixer_listeners_jitter_downstream_lost_percent_30s" , DomainServerExporter::MetricType::Gauge }, + { "audio_mixer_listeners_jitter_downstream_not_mixed" , DomainServerExporter::MetricType::Counter }, + { "audio_mixer_listeners_jitter_downstream_overflows" , DomainServerExporter::MetricType::Counter }, + { "audio_mixer_listeners_jitter_downstream_starves" , DomainServerExporter::MetricType::Counter }, + { "audio_mixer_listeners_jitter_downstream_unplayed" , DomainServerExporter::MetricType::Counter }, + { "audio_mixer_listeners_jitter_injectors" , DomainServerExporter::MetricType::Counter }, + { "audio_mixer_listeners_outbound_kbps" , DomainServerExporter::MetricType::Gauge }, + { "audio_mixer_mix_stats_active_streams" , DomainServerExporter::MetricType::Gauge }, + { "audio_mixer_mix_stats_active_to_inactive" , DomainServerExporter::MetricType::Counter }, + { "audio_mixer_mix_stats_active_to_skippped" , DomainServerExporter::MetricType::Counter }, + { "audio_mixer_mix_stats_avg_mixes_per_block" , DomainServerExporter::MetricType::Gauge }, + { "audio_mixer_mix_stats_hrtf_renders" , DomainServerExporter::MetricType::Counter }, + { "audio_mixer_mix_stats_hrtf_resets" , DomainServerExporter::MetricType::Counter }, + { "audio_mixer_mix_stats_hrtf_updates" , DomainServerExporter::MetricType::Counter }, + { "audio_mixer_mix_stats_inactive_streams" , DomainServerExporter::MetricType::Gauge }, + { "audio_mixer_mix_stats_inactive_to_active" , DomainServerExporter::MetricType::Counter }, + { "audio_mixer_mix_stats_inactive_to_skippped" , DomainServerExporter::MetricType::Counter }, + { "audio_mixer_mix_stats_percent_hrtf_mixes" , DomainServerExporter::MetricType::Gauge }, + { "audio_mixer_mix_stats_percent_manual_echo_mixes" , DomainServerExporter::MetricType::Gauge }, + { "audio_mixer_mix_stats_percent_manual_stereo_mixes" , DomainServerExporter::MetricType::Gauge }, + { "audio_mixer_mix_stats_skipped_streams" , DomainServerExporter::MetricType::Counter }, + { "audio_mixer_mix_stats_skippped_to_active" , DomainServerExporter::MetricType::Counter }, + { "audio_mixer_mix_stats_skippped_to_inactive" , DomainServerExporter::MetricType::Counter }, + { "audio_mixer_mix_stats_total_mixes" , DomainServerExporter::MetricType::Counter }, + { "audio_mixer_silent_packets_per_frame" , DomainServerExporter::MetricType::Gauge }, + { "audio_mixer_threads" , DomainServerExporter::MetricType::Gauge }, + { "audio_mixer_throttling_ratio" , DomainServerExporter::MetricType::Gauge }, + { "audio_mixer_trailing_mix_ratio" , DomainServerExporter::MetricType::Gauge }, + { "audio_mixer_use_dynamic_jitter_buffers" , DomainServerExporter::MetricType::Gauge }, + { "avatar_mixer_assignment_stats_num_queued_check_ins" , DomainServerExporter::MetricType::Gauge }, + { "avatar_mixer_avatars_av_data_receive_rate" , DomainServerExporter::MetricType::Gauge }, + { "avatar_mixer_avatars_avg_other_av_skips_per_second" , DomainServerExporter::MetricType::Gauge }, + { "avatar_mixer_avatars_avg_other_av_starves_per_second" , DomainServerExporter::MetricType::Gauge }, + { "avatar_mixer_avatars_delta_full_vs_avatar_data_kbps" , DomainServerExporter::MetricType::Gauge }, + { "avatar_mixer_avatars_inbound_av_data_kbps" , DomainServerExporter::MetricType::Gauge }, + { "avatar_mixer_avatars_inbound_kbps" , DomainServerExporter::MetricType::Gauge }, + { "avatar_mixer_avatars_num_avs_sent_last_frame" , DomainServerExporter::MetricType::Gauge }, + { "avatar_mixer_avatars_outbound_av_data_kbps" , DomainServerExporter::MetricType::Gauge }, + { "avatar_mixer_avatars_outbound_av_traits_kbps" , DomainServerExporter::MetricType::Gauge }, + { "avatar_mixer_avatars_outbound_kbps" , DomainServerExporter::MetricType::Gauge }, + { "avatar_mixer_avatars_recent_other_av_in_view" , DomainServerExporter::MetricType::Gauge }, + { "avatar_mixer_avatars_recent_other_av_out_of_view" , DomainServerExporter::MetricType::Gauge }, + { "avatar_mixer_avatars_total_num_out_of_order_sends" , DomainServerExporter::MetricType::Counter }, + { "avatar_mixer_average_listeners_last_second" , DomainServerExporter::MetricType::Gauge }, + { "avatar_mixer_broadcast_loop_rate" , DomainServerExporter::MetricType::Gauge }, + { "avatar_mixer_io_stats_inbound_kbps" , DomainServerExporter::MetricType::Gauge }, + { "avatar_mixer_io_stats_inbound_pps" , DomainServerExporter::MetricType::Gauge }, + { "avatar_mixer_io_stats_outbound_kbps" , DomainServerExporter::MetricType::Gauge }, + { "avatar_mixer_io_stats_outbound_pps" , DomainServerExporter::MetricType::Gauge }, + { "avatar_mixer_parallel_tasks_broadcast_avatar_data_functor" , DomainServerExporter::MetricType::Gauge }, + { "avatar_mixer_parallel_tasks_broadcast_avatar_data_innner" , DomainServerExporter::MetricType::Gauge }, + { "avatar_mixer_parallel_tasks_broadcast_avatar_data_lock_wait" , DomainServerExporter::MetricType::Gauge }, + { "avatar_mixer_parallel_tasks_broadcast_avatar_data_node_transform" , DomainServerExporter::MetricType::Gauge }, + { "avatar_mixer_parallel_tasks_broadcast_avatar_data_total" , DomainServerExporter::MetricType::Counter }, + { "avatar_mixer_parallel_tasks_display_name_management_total" , DomainServerExporter::MetricType::Counter }, + { "avatar_mixer_parallel_tasks_process_queued_avatar_data_packets_lock_wait" , DomainServerExporter::MetricType::Gauge }, + { "avatar_mixer_parallel_tasks_process_queued_avatar_data_packets_total" , DomainServerExporter::MetricType::Counter }, + { "avatar_mixer_single_core_tasks_incoming_packets_handle_avatar_identity_packet" , DomainServerExporter::MetricType::Gauge }, + { "avatar_mixer_single_core_tasks_incoming_packets_handle_avatar_query_packet" , DomainServerExporter::MetricType::Gauge }, + { "avatar_mixer_single_core_tasks_incoming_packets_handle_kill_avatar_packet" , DomainServerExporter::MetricType::Gauge }, + { "avatar_mixer_single_core_tasks_incoming_packets_handle_node_ignore_request_packet" , DomainServerExporter::MetricType::Gauge }, + { "avatar_mixer_single_core_tasks_incoming_packets_handle_radius_ignore_request_packet" , DomainServerExporter::MetricType::Gauge }, + { "avatar_mixer_single_core_tasks_incoming_packets_handle_requests_domain_list_data_packet" , DomainServerExporter::MetricType::Gauge }, + { "avatar_mixer_single_core_tasks_process_events" , DomainServerExporter::MetricType::Counter }, + { "avatar_mixer_single_core_tasks_queue_incoming_packet" , DomainServerExporter::MetricType::Gauge }, + { "avatar_mixer_single_core_tasks_send_stats" , DomainServerExporter::MetricType::Gauge }, + { "avatar_mixer_slaves_aggregate_per_frame_received_1_nodes_processed" , DomainServerExporter::MetricType::Gauge }, + { "avatar_mixer_slaves_aggregate_per_frame_sent_1_nodes_broadcasted_to" , DomainServerExporter::MetricType::Gauge }, + { "avatar_mixer_slaves_aggregate_per_frame_sent_2_average_others_included" , DomainServerExporter::MetricType::Gauge }, + { "avatar_mixer_slaves_aggregate_per_frame_sent_3_average_over_budget_avatars" , DomainServerExporter::MetricType::Gauge }, + { "avatar_mixer_slaves_aggregate_per_frame_sent_4_average_data_bytes" , DomainServerExporter::MetricType::Gauge }, + { "avatar_mixer_slaves_aggregate_per_frame_sent_5_average_traits_bytes" , DomainServerExporter::MetricType::Gauge }, + { "avatar_mixer_slaves_aggregate_per_frame_sent_6_average_identity_bytes" , DomainServerExporter::MetricType::Gauge }, + { "avatar_mixer_slaves_aggregate_per_frame_sent_7_average_hero_avatars" , DomainServerExporter::MetricType::Gauge }, + { "avatar_mixer_slaves_aggregate_per_frame_timing_1_process_incoming_packets" , DomainServerExporter::MetricType::Gauge }, + { "avatar_mixer_slaves_aggregate_per_frame_timing_2_ignore_calculation" , DomainServerExporter::MetricType::Gauge }, + { "avatar_mixer_slaves_aggregate_per_frame_timing_3_to_byte_array" , DomainServerExporter::MetricType::Gauge }, + { "avatar_mixer_slaves_aggregate_per_frame_timing_4_avatar_data_packing" , DomainServerExporter::MetricType::Gauge }, + { "avatar_mixer_slaves_aggregate_per_frame_timing_5_packet_sending" , DomainServerExporter::MetricType::Gauge }, + { "avatar_mixer_slaves_aggregate_per_frame_timing_6_job_elapsed_time" , DomainServerExporter::MetricType::Gauge }, + { "avatar_mixer_threads" , DomainServerExporter::MetricType::Gauge }, + { "avatar_mixer_throttling_ratio" , DomainServerExporter::MetricType::Gauge }, + { "avatar_mixer_trailing_mix_ratio" , DomainServerExporter::MetricType::Gauge }, + { "entity_script_server_assignment_stats_num_queued_check_ins" , DomainServerExporter::MetricType::Gauge }, + { "entity_script_server_io_stats_inbound_kbps" , DomainServerExporter::MetricType::Gauge }, + { "entity_script_server_io_stats_inbound_pps" , DomainServerExporter::MetricType::Gauge }, + { "entity_script_server_io_stats_outbound_kbps" , DomainServerExporter::MetricType::Gauge }, + { "entity_script_server_io_stats_outbound_pps" , DomainServerExporter::MetricType::Gauge }, + { "entity_script_server_nodes_inbound_kbit_s" , DomainServerExporter::MetricType::Gauge }, + { "entity_script_server_nodes_outbound_kbit_s" , DomainServerExporter::MetricType::Gauge }, + { "entity_script_server_nodes_reliable_packet_s" , DomainServerExporter::MetricType::Gauge }, + { "entity_script_server_nodes_unreliable_packet_s" , DomainServerExporter::MetricType::Gauge }, + { "entity_script_server_octree_stats_element_count" , DomainServerExporter::MetricType::Gauge }, + { "entity_script_server_octree_stats_internal_element_count" , DomainServerExporter::MetricType::Gauge }, + { "entity_script_server_octree_stats_leaf_element_count" , DomainServerExporter::MetricType::Gauge }, + { "entity_script_server_script_engine_stats_number_running_scripts" , DomainServerExporter::MetricType::Gauge }, + { "entity_server_assignment_stats_num_queued_check_ins" , DomainServerExporter::MetricType::Gauge }, + { "entity_server_entity_server_inbound_data_packet_queue" , DomainServerExporter::MetricType::Gauge }, + { "entity_server_entity_server_inbound_data_total_elements" , DomainServerExporter::MetricType::Gauge }, + { "entity_server_entity_server_inbound_data_total_packets" , DomainServerExporter::MetricType::Gauge }, + { "entity_server_entity_server_inbound_timing_avg_lock_wait_time_per_element" , DomainServerExporter::MetricType::Gauge }, + { "entity_server_entity_server_inbound_timing_avg_lock_wait_time_per_packet" , DomainServerExporter::MetricType::Gauge }, + { "entity_server_entity_server_inbound_timing_avg_process_time_per_element" , DomainServerExporter::MetricType::Gauge }, + { "entity_server_entity_server_inbound_timing_avg_process_time_per_packet" , DomainServerExporter::MetricType::Gauge }, + { "entity_server_entity_server_inbound_timing_avg_transit_time_per_packet" , DomainServerExporter::MetricType::Gauge }, + { "entity_server_entity_server_misc_clients" , DomainServerExporter::MetricType::Gauge }, + { "entity_server_entity_server_misc_persist_file_load_time_seconds" , DomainServerExporter::MetricType::Gauge }, + { "entity_server_entity_server_misc_threads_handle_pacekt_send" , DomainServerExporter::MetricType::Gauge }, + { "entity_server_entity_server_misc_threads_packet_distributor" , DomainServerExporter::MetricType::Gauge }, + { "entity_server_entity_server_misc_threads_processing" , DomainServerExporter::MetricType::Gauge }, + { "entity_server_entity_server_misc_threads_write_datagram" , DomainServerExporter::MetricType::Gauge }, + { "entity_server_entity_server_misc_uptime_seconds" , DomainServerExporter::MetricType::Counter }, + { "entity_server_entity_server_octree_element_count" , DomainServerExporter::MetricType::Gauge }, + { "entity_server_entity_server_octree_internal_element_count" , DomainServerExporter::MetricType::Gauge }, + { "entity_server_entity_server_octree_leaf_element_count" , DomainServerExporter::MetricType::Gauge }, + { "entity_server_entity_server_outbound_data_total_bytes" , DomainServerExporter::MetricType::Counter }, + { "entity_server_entity_server_outbound_data_total_bytes_bit_masks" , DomainServerExporter::MetricType::Counter }, + { "entity_server_entity_server_outbound_data_total_bytes_octal_codes" , DomainServerExporter::MetricType::Counter }, + { "entity_server_entity_server_outbound_data_total_bytes_wasted" , DomainServerExporter::MetricType::Counter }, + { "entity_server_entity_server_outbound_data_total_packets" , DomainServerExporter::MetricType::Counter }, + { "entity_server_entity_server_outbound_timing_avg_compress_and_write_time" , DomainServerExporter::MetricType::Gauge }, + { "entity_server_entity_server_outbound_timing_avg_encode_time" , DomainServerExporter::MetricType::Gauge }, + { "entity_server_entity_server_outbound_timing_avg_inside_time" , DomainServerExporter::MetricType::Gauge }, + { "entity_server_entity_server_outbound_timing_avg_loop_time" , DomainServerExporter::MetricType::Gauge }, + { "entity_server_entity_server_outbound_timing_avg_send_time" , DomainServerExporter::MetricType::Gauge }, + { "entity_server_entity_server_outbound_timing_avg_tree_traverse_time" , DomainServerExporter::MetricType::Gauge }, + { "entity_server_entity_server_outbound_timing_node_wait_time" , DomainServerExporter::MetricType::Gauge }, + { "entity_server_io_stats_inbound_kbps" , DomainServerExporter::MetricType::Gauge }, + { "entity_server_io_stats_inbound_pps" , DomainServerExporter::MetricType::Gauge }, + { "entity_server_io_stats_outbound_kbps" , DomainServerExporter::MetricType::Gauge }, + { "entity_server_io_stats_outbound_pps" , DomainServerExporter::MetricType::Gauge }, + { "messages_mixer_assignment_stats_num_queued_check_ins" , DomainServerExporter::MetricType::Gauge }, + { "messages_mixer_io_stats_inbound_kbps" , DomainServerExporter::MetricType::Gauge }, + { "messages_mixer_io_stats_inbound_pps" , DomainServerExporter::MetricType::Gauge }, + { "messages_mixer_io_stats_outbound_kbps" , DomainServerExporter::MetricType::Gauge }, + { "messages_mixer_io_stats_outbound_pps" , DomainServerExporter::MetricType::Gauge }, + { "messages_mixer_messages_inbound_kbps" , DomainServerExporter::MetricType::Gauge }, + { "messages_mixer_messages_outbound_kbps" , DomainServerExporter::MetricType::Gauge } +}; + + +// Things we're not going to convert for various reasons, such as containing text, +// or having a value followed by an unit ("5.2 seconds"). +// +// Things like text like usernames have no place in the Prometheus model, so they can be skipped. +// +// For numeric values with an unit, instead of trying to parse it, the stats will just need to +// have a second copy of the metric added, with the value expressed as a number, with the original +// being blacklisted here. + +static const QSet BLACKLIST = { + "asset_server_connection_stats_last_heard", // Timestamp as a string + "asset_server_username", // Username + "audio_mixer_listeners_jitter_downstream_avg_gap", // Number as string with unit name + "audio_mixer_listeners_jitter_downstream_avg_gap_30s", // Number as string with unit name + "audio_mixer_listeners_jitter_downstream_max_gap", // Number as string with unit name + "audio_mixer_listeners_jitter_downstream_max_gap_30s", // Number as string with unit name + "audio_mixer_listeners_jitter_downstream_min_gap", // Number as string with unit name + "audio_mixer_listeners_jitter_downstream_min_gap_30s", // Number as string with unit name + "audio_mixer_listeners_jitter_injectors", // Array, empty. TODO: check if this ever contains anything. + "audio_mixer_listeners_jitter_upstream", // Number as string with unit name + "audio_mixer_listeners_username", // Username + "avatar_mixer_avatars_display_name", // Username + "avatar_mixer_avatars_username", // Username + "entity_script_server_nodes_node_type", // Username + "entity_script_server_nodes_username", // Username + "entity_server_entity_server_misc_configuration", // Text + "entity_server_entity_server_misc_detailed_stats_url", // URL + "entity_server_entity_server_misc_persist_file_load_time", // Number as string with unit name + "entity_server_entity_server_misc_uptime", // Number as string with unit name + "messages_mixer_messages_username" // Username +}; + + +DomainServerExporter::DomainServerExporter() +{ + +} + +bool DomainServerExporter::handleHTTPRequest(HTTPConnection *connection, const QUrl &url, bool skipSubHandler) +{ + const QString URI_METRICS = "/metrics"; + const QString EXPORTER_MIME_TYPE = "text/plain"; + + qCDebug(domain_server_exporter) << "Request on URL " << url; + + if ( url.path() == URI_METRICS ) { + auto nodeList = DependencyManager::get(); + QString output = ""; + QTextStream out_stream(&output); + + nodeList->eachNode([this, &out_stream](const SharedNodePointer& node){ + generateMetricsForNode(out_stream, node); + }); + + connection->respond(HTTPConnection::StatusCode200, output.toUtf8(), qPrintable(EXPORTER_MIME_TYPE)); + return true; + } + + return false; +} + +QString DomainServerExporter::escapeName(const QString &name) +{ + QRegularExpression invalid_characters("[^A-Za-z0-9_]"); + + QString ret = name; + + // If a key is named something like: "6. threads", turn it into just "threads" + ret.replace(QRegularExpression("^\\d+\\. "), ""); + ret.replace(QRegularExpression("^\\d+_"), ""); + + // If a key is named something like "z_listeners", turn it into just "listeners" + ret.replace(QRegularExpression("^z_"), ""); + + // If a key is named something like "lost%", change it to "lost_percent_". + // redundant underscores will be removed below. + ret.replace(QRegularExpression("%"), "_percent_"); + + // change mixedCaseNames to mixed_case_names + ret.replace(QRegularExpression("([a-z])([A-Z])"), "\\1_\\2"); + + // Replace all invalid characters with a _ + ret.replace(invalid_characters, "_"); + + // Remove any "_" characters at the beginning or end + ret.replace(QRegularExpression("^_+"), ""); + ret.replace(QRegularExpression("_+$"), ""); + + // Replace any duplicated _ characters with a single one + ret.replace(QRegularExpression("_+"), "_"); + + ret = ret.toLower(); + + return ret; +} + +void DomainServerExporter::generateMetricsForNode( QTextStream &stream, const SharedNodePointer &node ) +{ + QString ret = ""; + QJsonObject statsObject = static_cast(node->getLinkedData())->getStatsJSONObject(); + QString node_type = NodeType::getNodeTypeName(static_cast(node->getType())); + + + stream << "\n\n\n"; + stream << "###############################################################\n"; + stream << "# " << node_type << "\n"; + stream << "###############################################################\n"; + + generateMetricsFromJson(stream, node_type, escapeName(node_type), QHash(), statsObject); + + QJsonDocument doc(statsObject); + ret.append( doc.toJson() ); + +} + +void DomainServerExporter::generateMetricsFromJson(QTextStream &stream, QString original_path, QString path, QHash labels, const QJsonObject &obj) +{ + for(auto iter = obj.constBegin(); iter != obj.constEnd(); ++iter) { + auto key = escapeName(iter.key()); + auto val = iter.value(); + auto metric_name = path + "_" + key; + auto orig_metric_name = original_path + " -> " + iter.key(); + + if ( val.isObject() ) { + QUuid possible_uuid = QUuid::fromString(iter.key()); + + if ( possible_uuid.isNull() ) { + generateMetricsFromJson(stream, original_path + " -> " + iter.key(), path + "_" + key, labels, iter.value().toObject()); + } else { + labels.insert("uuid", possible_uuid.toString(QUuid::WithoutBraces)); + generateMetricsFromJson(stream, original_path, path, labels, iter.value().toObject()); + } + + + continue; + } + + if ( BLACKLIST.contains(metric_name)) { + continue; + } + + bool conversion_ok = false; + double converted = 0; + + if ( val.isString() ) { + // Prometheus only deals with numeric values. See if this string contains a valid one + + QString tmp = val.toString(); + converted = tmp.toDouble(&conversion_ok); + + if ( !conversion_ok ) { + qCWarning(domain_server_exporter) << "Failed to convert value of " << orig_metric_name << " (" << metric_name << ") to double: " << tmp << "'"; + continue; + } + + } + + stream << QString("\n# HELP %1 %2 -> %3\n").arg(metric_name).arg(original_path).arg(iter.key()); + + if ( TYPE_MAP.contains(metric_name )) { + stream << "# TYPE " << metric_name << " "; + switch( TYPE_MAP[metric_name ]) { + case DomainServerExporter::MetricType::Untyped: + stream << "untyped"; break; + case DomainServerExporter::MetricType::Counter: + stream << "counter"; break; + case DomainServerExporter::MetricType::Gauge: + stream << "gauge"; break; + case DomainServerExporter::MetricType::Histogram: + stream << "histogram"; break; + case DomainServerExporter::MetricType::Summary: + stream << "summary"; break; + } + stream << "\n"; + } else { + qCWarning(domain_server_exporter) << "Type for metric " << orig_metric_name << " (" << metric_name << ") not known."; + } + + stream << path << "_" << key; + if (!labels.isEmpty() ) { + stream << "{"; + + bool is_first = true; + QHashIterator iter(labels); + + while( iter.hasNext() ) { + iter.next(); + + if ( ! is_first ) { + stream << ","; + } + + QString value = iter.value(); + value.replace("\\", "\\\\"); + value.replace("\"", "\\\""); + value.replace("\n", "\\\n"); + + stream << iter.key() << "=\"" << value << "\""; + + is_first = false; + } + stream << "}"; + } + + stream << " "; + + if ( val.isBool() ) { + stream << ( iter.value().toBool() ? "1" : "0" ); + } else if ( val.isDouble() ) { + stream << val.toDouble(); + } else if ( val.isString() ) { + // Converted above + stream << converted; + } else { + qCWarning(domain_server_exporter) << "Can't convert metric " << orig_metric_name << "(" << metric_name << ") with value " << val; + } + + stream << "\n"; + } +} + diff --git a/domain-server/src/DomainServerExporter.h b/domain-server/src/DomainServerExporter.h new file mode 100644 index 0000000000..d818ad8114 --- /dev/null +++ b/domain-server/src/DomainServerExporter.h @@ -0,0 +1,55 @@ +// +// DomainServerExporter.h +// domain-server/src +// +// Created by Dale Glass on 3 Apr 2020. +// Copyright 2020 Dale Glass +// +// Prometheus exporter +// +// Distributed under the Apache License, Version 2.0. +// See the accompanying file LICENSE or http://www.apache.org/licenses/LICENSE-2.0.html +// + +#ifndef DOMAINSERVEREXPORTER_H +#define DOMAINSERVEREXPORTER_H + +#include +#include "HTTPManager.h" +#include "Node.h" +#include +#include +#include +#include + + + +/** + * @brief Prometheus exporter for domain stats + * + * This class exportors the statistics that can be seen on the domain's page in + * a format that can be parsed by Prometheus. This is useful for troubleshooting, + * monitoring performance, and making pretty graphs. + */ +class DomainServerExporter : public HTTPRequestHandler +{ +public: + typedef enum { + Untyped, /* Works the same as Gauge, with the difference of signalling that the actual type is unknown */ + Counter, /* Value only goes up. Eg, number of packets received */ + Gauge, /* Current numerical value that can go up or down. Current temperature, memory usage, etc */ + Histogram, /* Samples sorted in buckets gathered over time */ + Summary + } MetricType; + + DomainServerExporter(); + ~DomainServerExporter() = default; + bool handleHTTPRequest(HTTPConnection* connection, const QUrl& url, bool skipSubHandler = false) override; + +private: + QString escapeName(const QString &name); + void generateMetricsForNode( QTextStream &stream, const SharedNodePointer &node ); + void generateMetricsFromJson(QTextStream &stream, QString original_path, QString path, QHash labels, const QJsonObject &obj); +}; + +#endif // DOMAINSERVEREXPORTER_H diff --git a/libraries/networking/src/DomainHandler.h b/libraries/networking/src/DomainHandler.h index 68059fb158..178c56c34a 100644 --- a/libraries/networking/src/DomainHandler.h +++ b/libraries/networking/src/DomainHandler.h @@ -33,7 +33,7 @@ #include "NetworkingConstants.h" #include "MetaverseAPI.h" -const unsigned short DEFAULT_DOMAIN_SERVER_PORT = +const unsigned short DEFAULT_DOMAIN_SERVER_PORT = QProcessEnvironment::systemEnvironment() .contains("HIFI_DOMAIN_SERVER_PORT") ? QProcessEnvironment::systemEnvironment() @@ -41,7 +41,7 @@ const unsigned short DEFAULT_DOMAIN_SERVER_PORT = .toUShort() : 40102; -const unsigned short DEFAULT_DOMAIN_SERVER_DTLS_PORT = +const unsigned short DEFAULT_DOMAIN_SERVER_DTLS_PORT = QProcessEnvironment::systemEnvironment() .contains("HIFI_DOMAIN_SERVER_DTLS_PORT") ? QProcessEnvironment::systemEnvironment() @@ -49,7 +49,7 @@ const unsigned short DEFAULT_DOMAIN_SERVER_DTLS_PORT = .toUShort() : 40103; -const quint16 DOMAIN_SERVER_HTTP_PORT = +const quint16 DOMAIN_SERVER_HTTP_PORT = QProcessEnvironment::systemEnvironment() .contains("HIFI_DOMAIN_SERVER_HTTP_PORT") ? QProcessEnvironment::systemEnvironment() @@ -57,7 +57,7 @@ const quint16 DOMAIN_SERVER_HTTP_PORT = .toUInt() : 40100; -const quint16 DOMAIN_SERVER_HTTPS_PORT = +const quint16 DOMAIN_SERVER_HTTPS_PORT = QProcessEnvironment::systemEnvironment() .contains("HIFI_DOMAIN_SERVER_HTTPS_PORT") ? QProcessEnvironment::systemEnvironment() @@ -65,6 +65,15 @@ const quint16 DOMAIN_SERVER_HTTPS_PORT = .toUInt() : 40101; +const quint16 DOMAIN_SERVER_EXPORTER_PORT = + QProcessEnvironment::systemEnvironment() + .contains("VIRCADIA_DOMAIN_SERVER_EXPORTER_PORT") + ? QProcessEnvironment::systemEnvironment() + .value("VIRCADIA_DOMAIN_SERVER_EXPORTER_PORT") + .toUInt() + : 9703; + + const int MAX_SILENT_DOMAIN_SERVER_CHECK_INS = 5; class DomainHandler : public QObject {