initial commit

This commit is contained in:
Hampus Kraft
2026-01-01 20:42:59 +00:00
commit 2f557eda8c
9029 changed files with 1490197 additions and 0 deletions

2007
fluxer_metrics/Cargo.lock generated Normal file

File diff suppressed because it is too large Load Diff

48
fluxer_metrics/Cargo.toml Normal file
View File

@@ -0,0 +1,48 @@
[package]
name = "fluxer_metrics"
version = "0.1.0"
edition = "2024"
license = "AGPL-3.0-or-later"
description = "Metrics collection and time-series storage service for Fluxer"
[dependencies]
# Async runtime
tokio = { version = "1", features = ["full"] }
# HTTP server
axum = { version = "0.8", features = ["macros"] }
tower = "0.5"
tower-http = { version = "0.6", features = ["cors", "trace"] }
# Serialization
serde = { version = "1", features = ["derive"] }
serde_json = "1"
# Time handling
time = { version = "0.3", features = ["serde", "macros", "formatting", "parsing"] }
# Unique IDs
ulid = { version = "1", features = ["serde"] }
# HTTP client for webhooks
reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] }
# Error handling
thiserror = "2"
anyhow = "1"
# Async trait support
async-trait = "0.1"
# Hashing
sha2 = "0.10"
# Logging/tracing
tracing = "0.1"
tracing-subscriber = { version = "0.3", features = ["env-filter", "json"] }
# Environment configuration
dotenvy = "0.15"
# ClickHouse client
clickhouse = { version = "0.13", features = ["lz4", "time"] }

41
fluxer_metrics/Dockerfile Normal file
View File

@@ -0,0 +1,41 @@
FROM rustlang/rust:nightly-slim AS builder
WORKDIR /app
RUN apt-get update && apt-get install -y \
pkg-config \
libssl-dev \
&& rm -rf /var/lib/apt/lists/*
COPY Cargo.toml ./
RUN mkdir -p src && echo "fn main() {}" > src/main.rs
RUN cargo build --release 2>/dev/null || true
RUN rm -rf src
COPY src ./src
RUN touch src/main.rs && cargo build --release
FROM debian:bookworm-slim
WORKDIR /app
RUN apt-get update && apt-get install -y \
ca-certificates \
curl \
&& rm -rf /var/lib/apt/lists/*
COPY --from=builder /app/target/release/fluxer_metrics /app/fluxer_metrics
RUN useradd -r -s /bin/false fluxer
USER fluxer
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
CMD curl -f http://localhost:8080/_health || exit 1
EXPOSE 8080
CMD ["/app/fluxer_metrics"]

View File

@@ -0,0 +1,118 @@
/*
* Copyright (C) 2026 Fluxer Contributors
*
* This file is part of Fluxer.
*
* Fluxer is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Fluxer is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with Fluxer. If not, see <https://www.gnu.org/licenses/>.
*/
use anyhow::Result;
use serde::Serialize;
use time::OffsetDateTime;
use time::format_description::well_known::Rfc3339;
use tracing::info;
use crate::db::CrashEventData;
#[derive(Serialize)]
struct DiscordWebhookPayload {
embeds: Vec<DiscordEmbed>,
}
#[derive(Serialize)]
struct DiscordEmbed {
title: String,
description: String,
color: u32,
fields: Vec<DiscordField>,
timestamp: String,
footer: DiscordFooter,
}
#[derive(Serialize)]
struct DiscordField {
name: String,
value: String,
inline: bool,
}
#[derive(Serialize)]
struct DiscordFooter {
text: String,
}
pub async fn send_discord_crash_alert(
webhook_url: &str,
crash: &CrashEventData,
admin_endpoint: Option<&str>,
) -> Result<()> {
let timestamp =
OffsetDateTime::from_unix_timestamp_nanos(i128::from(crash.timestamp) * 1_000_000)
.unwrap_or_else(|_| OffsetDateTime::now_utc());
let stacktrace = if crash.stacktrace.len() > 1000 {
format!("{}...\n\n(truncated)", &crash.stacktrace[..1000])
} else {
crash.stacktrace.clone()
};
let guild_link = admin_endpoint.map_or_else(
|| format!("Guild ID: {}", crash.guild_id),
|ep| format!("{ep}/guilds/{}", crash.guild_id),
);
let payload = DiscordWebhookPayload {
embeds: vec![DiscordEmbed {
title: "Guild Crash Detected".to_string(),
description: format!(
"A guild process has crashed on the gateway.\n\n**Guild:** {guild_link}"
),
color: 0x00ED_4245,
fields: vec![
DiscordField {
name: "Guild ID".to_string(),
value: format!("`{}`", crash.guild_id),
inline: true,
},
DiscordField {
name: "Crash ID".to_string(),
value: format!("`{}`", crash.id),
inline: true,
},
DiscordField {
name: "Stacktrace".to_string(),
value: format!("```\n{stacktrace}\n```"),
inline: false,
},
],
timestamp: timestamp.format(&Rfc3339).unwrap_or_default(),
footer: DiscordFooter {
text: "Fluxer Metrics".to_string(),
},
}],
};
let client = reqwest::Client::new();
let response = client.post(webhook_url).json(&payload).send().await?;
if response.status().is_success() {
info!("Discord crash alert sent for guild {}", crash.guild_id);
} else {
let status = response.status();
let body = response.text().await.unwrap_or_default();
anyhow::bail!("Discord webhook failed with status {status}: {body}");
}
Ok(())
}

View File

@@ -0,0 +1,22 @@
/*
* Copyright (C) 2026 Fluxer Contributors
*
* This file is part of Fluxer.
*
* Fluxer is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Fluxer is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with Fluxer. If not, see <https://www.gnu.org/licenses/>.
*/
pub mod discord;
pub use discord::*;

View File

@@ -0,0 +1,155 @@
/*
* Copyright (C) 2026 Fluxer Contributors
*
* This file is part of Fluxer.
*
* Fluxer is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Fluxer is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with Fluxer. If not, see <https://www.gnu.org/licenses/>.
*/
use std::sync::Arc;
use axum::{
Json,
extract::State,
http::StatusCode,
response::{IntoResponse, Response},
};
use tracing::{error, info, warn};
use crate::alerts::send_discord_crash_alert;
use crate::config::Config;
use crate::db::{CounterRequest, CrashRequest, GaugeRequest, HistogramRequest, Storage};
pub struct AppState {
pub storage: Box<dyn Storage>,
pub config: Config,
}
pub async fn ingest_counter(
State(state): State<Arc<AppState>>,
Json(req): Json<CounterRequest>,
) -> StatusCode {
match state.storage.insert_counter(req).await {
Ok(()) => StatusCode::ACCEPTED,
Err(e) => {
error!("Failed to insert counter: {}", e);
StatusCode::INTERNAL_SERVER_ERROR
}
}
}
pub async fn ingest_gauge(
State(state): State<Arc<AppState>>,
Json(req): Json<GaugeRequest>,
) -> StatusCode {
match state.storage.insert_gauge(req).await {
Ok(()) => StatusCode::ACCEPTED,
Err(e) => {
error!("Failed to insert gauge: {}", e);
StatusCode::INTERNAL_SERVER_ERROR
}
}
}
pub async fn ingest_histogram(
State(state): State<Arc<AppState>>,
Json(req): Json<HistogramRequest>,
) -> StatusCode {
match state.storage.insert_histogram(req).await {
Ok(()) => StatusCode::ACCEPTED,
Err(e) => {
error!("Failed to insert histogram: {}", e);
StatusCode::INTERNAL_SERVER_ERROR
}
}
}
#[allow(clippy::cognitive_complexity)]
pub async fn ingest_crash(
State(state): State<Arc<AppState>>,
Json(req): Json<CrashRequest>,
) -> Response {
let guild_id = req.guild_id.clone();
match state.storage.insert_crash(req).await {
Ok(event) => {
info!("Recorded crash for guild {}", guild_id);
if let Some(webhook_url) = &state.config.alert_webhook_url {
let admin_endpoint = state.config.admin_endpoint.as_deref();
match send_discord_crash_alert(webhook_url, &event, admin_endpoint).await {
Ok(()) => {
if let Err(e) = state.storage.mark_crash_notified(&event.id).await {
warn!("Failed to mark crash as notified: {}", e);
}
}
Err(e) => {
error!("Failed to send Discord alert: {}", e);
}
}
}
StatusCode::ACCEPTED.into_response()
}
Err(e) => {
error!("Failed to insert crash: {}", e);
StatusCode::INTERNAL_SERVER_ERROR.into_response()
}
}
}
#[derive(serde::Deserialize)]
pub struct BatchRequest {
#[serde(default)]
pub counters: Vec<CounterRequest>,
#[serde(default)]
pub gauges: Vec<GaugeRequest>,
#[serde(default)]
pub histograms: Vec<HistogramRequest>,
}
#[allow(clippy::cognitive_complexity)]
pub async fn ingest_batch(
State(state): State<Arc<AppState>>,
Json(req): Json<BatchRequest>,
) -> StatusCode {
let mut had_error = false;
for counter in req.counters {
if let Err(e) = state.storage.insert_counter(counter).await {
error!("Failed to insert counter in batch: {}", e);
had_error = true;
}
}
for gauge in req.gauges {
if let Err(e) = state.storage.insert_gauge(gauge).await {
error!("Failed to insert gauge in batch: {}", e);
had_error = true;
}
}
for histogram in req.histograms {
if let Err(e) = state.storage.insert_histogram(histogram).await {
error!("Failed to insert histogram in batch: {}", e);
had_error = true;
}
}
if had_error {
StatusCode::PARTIAL_CONTENT
} else {
StatusCode::ACCEPTED
}
}

View File

@@ -0,0 +1,21 @@
/*
* Copyright (C) 2026 Fluxer Contributors
*
* This file is part of Fluxer.
*
* Fluxer is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Fluxer is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with Fluxer. If not, see <https://www.gnu.org/licenses/>.
*/
pub mod ingest;
pub mod query;

View File

@@ -0,0 +1,535 @@
/*
* Copyright (C) 2026 Fluxer Contributors
*
* This file is part of Fluxer.
*
* Fluxer is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Fluxer is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with Fluxer. If not, see <https://www.gnu.org/licenses/>.
*/
use std::sync::Arc;
use axum::{
Json,
extract::{Query, State},
http::StatusCode,
response::{IntoResponse, Response},
};
use serde::{Deserialize, Serialize};
use time::{
Duration, OffsetDateTime, format_description::well_known::Rfc3339, macros::format_description,
};
use tracing::error;
use crate::api::ingest::AppState;
use crate::db::{
QueryParams, QueryResponse, Resolution, TopEntry, TopQueryParams, TopQueryResponse,
};
const GAUGE_METRICS: &[(&str, &str)] = &[
("guild.member_count", "guild_id"),
("user.guild_membership_count", "user_id"),
];
const METRIC_TYPE_COUNTER: &str = "counter";
const METRIC_TYPE_GAUGE: &str = "gauge";
const METRIC_TYPE_HISTOGRAM: &str = "histogram";
fn parse_datetime(s: &str) -> Option<OffsetDateTime> {
if let Ok(dt) = OffsetDateTime::parse(s, &Rfc3339) {
return Some(dt);
}
let date_format = format_description!("[year]-[month]-[day]");
if let Ok(date) = time::Date::parse(s, &date_format) {
return Some(date.midnight().assume_utc());
}
None
}
fn to_millis(dt: OffsetDateTime) -> i64 {
(dt.unix_timestamp_nanos() / 1_000_000) as i64
}
fn infer_metric_type(metric: &str) -> &'static str {
let is_percentile_metric = metric.ends_with(".p50")
|| metric.ends_with(".p95")
|| metric.ends_with(".p99")
|| metric.ends_with(".avg")
|| metric.ends_with(".min")
|| metric.ends_with(".max")
|| metric.ends_with(".count");
if metric.starts_with("gateway.") || is_percentile_metric {
return METRIC_TYPE_GAUGE;
}
if metric.contains("latency") || metric.ends_with(".histogram") {
return METRIC_TYPE_HISTOGRAM;
}
METRIC_TYPE_COUNTER
}
#[allow(clippy::cognitive_complexity)]
pub async fn query_metrics(
State(state): State<Arc<AppState>>,
Query(params): Query<QueryParams>,
) -> Response {
let now = OffsetDateTime::now_utc();
let default_start = now - Duration::days(7);
let start_ms = params
.start
.as_ref()
.and_then(|s| parse_datetime(s))
.map_or_else(|| to_millis(default_start), to_millis);
let end_ms = params
.end
.as_ref()
.and_then(|s| parse_datetime(s))
.map_or_else(|| to_millis(now), to_millis);
let metric_type = params
.metric_type
.as_deref()
.unwrap_or_else(|| infer_metric_type(&params.metric));
let data = match metric_type {
METRIC_TYPE_GAUGE => {
match state
.storage
.query_gauges(&params.metric, start_ms, end_ms)
.await
{
Ok(d) => d,
Err(e) => {
error!("Query failed for {}: {}", params.metric, e);
return (
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({"error": format!("Query failed: {}", e)})),
)
.into_response();
}
}
}
METRIC_TYPE_HISTOGRAM => {
match state
.storage
.query_histograms(&params.metric, start_ms, end_ms)
.await
{
Ok(d) => d,
Err(e) => {
error!("Query failed for {}: {}", params.metric, e);
return (
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({"error": format!("Query failed: {}", e)})),
)
.into_response();
}
}
}
_ => {
let resolution = Resolution::from_str(params.resolution.as_deref());
match state
.storage
.query_counters(
&params.metric,
start_ms,
end_ms,
params.group_by.as_deref(),
resolution,
)
.await
{
Ok(d) => d,
Err(e) => {
error!("Query failed for {}: {}", params.metric, e);
return (
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({"error": format!("Query failed: {}", e)})),
)
.into_response();
}
}
}
};
Json(QueryResponse {
metric: params.metric,
data,
})
.into_response()
}
/// GET /query/aggregate - Get aggregated totals
#[derive(Deserialize)]
pub struct AggregateParams {
pub metric: String,
pub start: Option<String>,
pub end: Option<String>,
pub group_by: Option<String>,
}
#[derive(Serialize)]
pub struct AggregateResponse {
pub metric: String,
pub total: f64,
#[serde(skip_serializing_if = "Option::is_none")]
pub breakdown: Option<Vec<TopEntry>>,
}
pub async fn query_aggregate(
State(state): State<Arc<AppState>>,
Query(params): Query<AggregateParams>,
) -> Response {
let now = OffsetDateTime::now_utc();
let default_start = now - Duration::days(365);
let start_ms = params
.start
.as_ref()
.and_then(|s| parse_datetime(s))
.map_or_else(|| to_millis(default_start), to_millis);
let end_ms = params
.end
.as_ref()
.and_then(|s| parse_datetime(s))
.map_or_else(|| to_millis(now), to_millis);
let data = match state
.storage
.query_counters(
&params.metric,
start_ms,
end_ms,
params.group_by.as_deref(),
Resolution::Raw,
)
.await
{
Ok(d) => d,
Err(e) => {
error!("Aggregate query failed for {}: {}", params.metric, e);
return (
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({"error": format!("Query failed: {}", e)})),
)
.into_response();
}
};
if params.group_by.is_some() {
let mut groups: std::collections::HashMap<String, f64> = std::collections::HashMap::new();
for point in &data {
if let Some(dims) = &point.dimensions {
let key = dims
.values()
.filter_map(|v| v.as_str())
.collect::<Vec<_>>()
.join(",");
*groups.entry(key).or_insert(0.0) += point.value;
}
}
let mut breakdown: Vec<TopEntry> = groups
.into_iter()
.map(|(label, value)| TopEntry { label, value })
.collect();
breakdown.sort_by(|a, b| {
b.value
.partial_cmp(&a.value)
.unwrap_or(std::cmp::Ordering::Equal)
});
let total = breakdown.iter().map(|e| e.value).sum();
Json(AggregateResponse {
metric: params.metric,
total,
breakdown: Some(breakdown),
})
.into_response()
} else {
let total = data.iter().map(|d| d.value).sum();
Json(AggregateResponse {
metric: params.metric,
total,
breakdown: None,
})
.into_response()
}
}
#[allow(clippy::too_many_lines)]
pub async fn query_top(
State(state): State<Arc<AppState>>,
Query(params): Query<TopQueryParams>,
) -> Response {
let limit = params.limit.unwrap_or(10);
let now = OffsetDateTime::now_utc();
let start_ms = to_millis(now - Duration::days(365));
let end_ms = to_millis(now);
let gauge_metric = GAUGE_METRICS
.iter()
.find(|(name, _)| *name == params.metric);
let mut entries: Vec<TopEntry> = if let Some((_, dimension_key)) = gauge_metric {
match state
.storage
.query_latest_gauges(&params.metric, Some(dimension_key))
.await
{
Ok(summaries) => summaries
.into_iter()
.map(|summary| {
let mut label = summary.label.clone();
if params.metric == "guild.member_count" {
if let Some(name) = summary
.dimensions
.get("guild_name")
.and_then(|v| v.as_str())
{
label = format!("{} ({})", name, summary.label);
}
} else if params.metric == "user.guild_membership_count"
&& let Some(user_id) =
summary.dimensions.get("user_id").and_then(|v| v.as_str())
{
label = user_id.to_string();
}
TopEntry {
label,
value: summary.value,
}
})
.collect(),
Err(e) => {
error!("Top gauge query failed for {}: {}", params.metric, e);
return (
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({"error": format!("Query failed: {}", e)})),
)
.into_response();
}
}
} else {
let group_by = if params.metric.starts_with("guild.") {
Some("guild_id")
} else if params.metric.starts_with("user.") {
Some("user_id")
} else if params.metric.contains("referrer") {
Some("referrer")
} else {
None
};
let data = match state
.storage
.query_counters(&params.metric, start_ms, end_ms, group_by, Resolution::Raw)
.await
{
Ok(d) => d,
Err(e) => {
error!("Query failed for {}: {}", params.metric, e);
return (
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({"error": format!("Query failed: {}", e)})),
)
.into_response();
}
};
let mut groups: std::collections::HashMap<String, f64> = std::collections::HashMap::new();
for point in &data {
if let Some(dims) = &point.dimensions {
let key = dims
.values()
.filter_map(|v| v.as_str())
.collect::<Vec<_>>()
.join(",");
if !key.is_empty() {
*groups.entry(key).or_insert(0.0) += point.value;
}
}
}
let mut counter_entries: Vec<TopEntry> = groups
.into_iter()
.map(|(label, value)| TopEntry { label, value })
.collect();
counter_entries.sort_by(|a, b| {
b.value
.partial_cmp(&a.value)
.unwrap_or(std::cmp::Ordering::Equal)
});
counter_entries
};
entries.sort_by(|a, b| {
b.value
.partial_cmp(&a.value)
.unwrap_or(std::cmp::Ordering::Equal)
});
entries.truncate(limit);
Json(TopQueryResponse {
metric: params.metric,
entries,
})
.into_response()
}
/// GET /query/crashes - Get recent crashes
#[derive(Deserialize)]
pub struct CrashesParams {
pub limit: Option<usize>,
}
#[derive(Serialize)]
pub struct CrashesResponse {
pub crashes: Vec<CrashEventResponse>,
}
#[derive(Serialize)]
pub struct CrashEventResponse {
pub id: String,
pub timestamp: i64,
pub guild_id: String,
pub stacktrace: String,
pub notified: bool,
}
pub async fn query_crashes(
State(state): State<Arc<AppState>>,
Query(params): Query<CrashesParams>,
) -> Response {
let limit = params.limit.unwrap_or(50);
let crashes = match state.storage.get_recent_crashes(limit).await {
Ok(c) => c,
Err(e) => {
error!("Crashes query failed: {}", e);
return (
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({"error": format!("Query failed: {}", e)})),
)
.into_response();
}
};
let response: Vec<CrashEventResponse> = crashes
.into_iter()
.map(|c| CrashEventResponse {
id: c.id.clone(),
timestamp: c.timestamp,
guild_id: c.guild_id.clone(),
stacktrace: c.stacktrace.clone(),
notified: c.notified,
})
.collect();
Json(CrashesResponse { crashes: response }).into_response()
}
#[derive(Deserialize)]
pub struct PercentilesParams {
pub metric: String,
pub start: Option<String>,
pub end: Option<String>,
}
#[derive(Serialize)]
pub struct PercentilesResponse {
pub metric: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub percentiles: Option<PercentilesData>,
}
#[derive(Serialize)]
pub struct PercentilesData {
pub count: u64,
pub avg: f64,
pub min: f64,
pub max: f64,
pub p50: f64,
pub p95: f64,
pub p99: f64,
}
pub async fn query_percentiles(
State(state): State<Arc<AppState>>,
Query(params): Query<PercentilesParams>,
) -> Response {
let now = OffsetDateTime::now_utc();
let default_start = now - Duration::days(7);
let start_ms = params
.start
.as_ref()
.and_then(|s| parse_datetime(s))
.map_or_else(|| to_millis(default_start), to_millis);
let end_ms = params
.end
.as_ref()
.and_then(|s| parse_datetime(s))
.map_or_else(|| to_millis(now), to_millis);
match state
.storage
.query_histogram_percentiles(&params.metric, start_ms, end_ms)
.await
{
Ok(Some(p)) => Json(PercentilesResponse {
metric: params.metric,
percentiles: Some(PercentilesData {
count: p.count,
avg: p.avg,
min: p.min,
max: p.max,
p50: p.p50,
p95: p.p95,
p99: p.p99,
}),
})
.into_response(),
Ok(None) => Json(PercentilesResponse {
metric: params.metric,
percentiles: None,
})
.into_response(),
Err(e) => {
error!("Percentiles query failed for {}: {}", params.metric, e);
(
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({"error": format!("Query failed: {}", e)})),
)
.into_response()
}
}
}
pub async fn health_check(State(state): State<Arc<AppState>>) -> Response {
match state.storage.check_health().await {
Ok(()) => StatusCode::OK.into_response(),
Err(e) => (
StatusCode::SERVICE_UNAVAILABLE,
Json(serde_json::json!({"error": e.to_string()})),
)
.into_response(),
}
}

View File

@@ -0,0 +1,105 @@
/*
* Copyright (C) 2026 Fluxer Contributors
*
* This file is part of Fluxer.
*
* Fluxer is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Fluxer is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with Fluxer. If not, see <https://www.gnu.org/licenses/>.
*/
use std::env;
use std::str::FromStr;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum MetricsMode {
#[default]
ClickHouse,
NoOp,
}
impl FromStr for MetricsMode {
type Err = ();
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.to_lowercase().as_str() {
"noop" | "no-op" | "no_op" | "none" | "disabled" => Ok(Self::NoOp),
"clickhouse" | "ch" | "" => Ok(Self::ClickHouse),
_ => Err(()),
}
}
}
#[derive(Debug, Clone)]
pub struct Config {
pub port: u16,
pub metrics_mode: MetricsMode,
pub clickhouse_url: String,
pub clickhouse_database: String,
pub clickhouse_user: String,
pub clickhouse_password: String,
pub alert_webhook_url: Option<String>,
pub admin_endpoint: Option<String>,
}
impl Config {
pub fn from_env() -> Result<Self, ConfigError> {
let metrics_mode = env::var("METRICS_MODE")
.ok()
.and_then(|v| v.parse().ok())
.unwrap_or_default();
let clickhouse_url =
env::var("CLICKHOUSE_URL").unwrap_or_else(|_| "http://localhost:8123".into());
let clickhouse_database =
env::var("CLICKHOUSE_DATABASE").unwrap_or_else(|_| "fluxer_metrics".into());
let clickhouse_user = env::var("CLICKHOUSE_USER").unwrap_or_else(|_| "default".into());
let clickhouse_password = env::var("CLICKHOUSE_PASSWORD").unwrap_or_default();
if metrics_mode == MetricsMode::ClickHouse && clickhouse_url.is_empty() {
return Err(ConfigError::EmptyEnvVar("CLICKHOUSE_URL"));
}
Ok(Self {
port: env::var("METRICS_PORT")
.ok()
.and_then(|p| p.parse().ok())
.unwrap_or(8080),
metrics_mode,
clickhouse_url,
clickhouse_database,
clickhouse_user,
clickhouse_password,
alert_webhook_url: env::var("ALERT_WEBHOOK_URL").ok().filter(|s| !s.is_empty()),
admin_endpoint: env::var("FLUXER_ADMIN_ENDPOINT")
.ok()
.filter(|s| !s.is_empty()),
})
}
}
#[derive(Debug)]
pub enum ConfigError {
EmptyEnvVar(&'static str),
}
impl std::fmt::Display for ConfigError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::EmptyEnvVar(var) => {
write!(f, "required environment variable {var} is empty")
}
}
}
}
impl std::error::Error for ConfigError {}

View File

@@ -0,0 +1,104 @@
/*
* Copyright (C) 2026 Fluxer Contributors
*
* This file is part of Fluxer.
*
* Fluxer is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Fluxer is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with Fluxer. If not, see <https://www.gnu.org/licenses/>.
*/
use anyhow::Result;
use clickhouse::Client;
use tracing::info;
const MIGRATIONS: &[(&str, &str)] = &[
(
"001_initial_schema",
include_str!("migrations/001_initial_schema.sql"),
),
(
"002_materialized_views",
include_str!("migrations/002_materialized_views.sql"),
),
];
pub async fn run_migrations(client: &Client, database: &str) -> Result<()> {
info!("Running database migrations...");
client
.query(&format!("CREATE DATABASE IF NOT EXISTS `{database}`"))
.execute()
.await?;
let client = client.clone().with_database(database);
client
.query(
r#"
CREATE TABLE IF NOT EXISTS _migrations (
name String,
applied_at DateTime64(3, 'UTC') DEFAULT now64(3)
)
ENGINE = MergeTree()
ORDER BY name
"#,
)
.execute()
.await?;
for (name, sql) in MIGRATIONS {
if migration_applied(&client, name).await? {
info!("Migration {} already applied, skipping", name);
continue;
}
info!("Applying migration: {}", name);
for statement in sql.split(';').filter(|s| !s.trim().is_empty()) {
let statement: String = statement
.lines()
.filter(|line| !line.trim_start().starts_with("--"))
.collect::<Vec<_>>()
.join("\n");
let statement = statement.trim();
if statement.is_empty() {
continue;
}
client.query(statement).execute().await?;
}
mark_applied(&client, name).await?;
info!("Migration {} applied successfully", name);
}
info!("All migrations completed");
Ok(())
}
async fn migration_applied(client: &Client, name: &str) -> Result<bool> {
let count: u64 = client
.query("SELECT count() FROM _migrations WHERE name = ?")
.bind(name)
.fetch_one()
.await?;
Ok(count > 0)
}
async fn mark_applied(client: &Client, name: &str) -> Result<()> {
client
.query("INSERT INTO _migrations (name) VALUES (?)")
.bind(name)
.execute()
.await?;
Ok(())
}

View File

@@ -0,0 +1,90 @@
CREATE TABLE IF NOT EXISTS counters (
metric_name LowCardinality(String),
timestamp DateTime64(3, 'UTC') CODEC(DoubleDelta, ZSTD(1)),
timestamp_bucket DateTime64(3, 'UTC') MATERIALIZED toStartOfInterval(timestamp, INTERVAL 1 minute),
dimensions_hash String,
dimensions Map(String, String) CODEC(ZSTD(1)),
value Int64 CODEC(Delta, ZSTD(1)),
INDEX idx_dimensions_hash dimensions_hash TYPE bloom_filter GRANULARITY 4
)
ENGINE = SummingMergeTree(value)
PARTITION BY toDate(timestamp_bucket)
ORDER BY (metric_name, timestamp_bucket, dimensions_hash)
TTL toDateTime(timestamp) + toIntervalDay(7)
SETTINGS index_granularity = 8192, ttl_only_drop_parts = 1;
CREATE TABLE IF NOT EXISTS gauges (
id String,
metric_name LowCardinality(String),
timestamp DateTime64(3, 'UTC') CODEC(DoubleDelta, ZSTD(1)),
dimensions_hash String,
dimensions Map(String, String) CODEC(ZSTD(1)),
value Float64 CODEC(Gorilla, ZSTD(1)),
INDEX idx_dimensions_hash dimensions_hash TYPE bloom_filter GRANULARITY 4
)
ENGINE = MergeTree()
PARTITION BY toDate(timestamp)
ORDER BY (metric_name, dimensions_hash, timestamp, id)
TTL toDateTime(timestamp) + toIntervalDay(7)
SETTINGS index_granularity = 8192, ttl_only_drop_parts = 1;
CREATE TABLE IF NOT EXISTS histogram_raw (
id String,
metric_name LowCardinality(String),
timestamp DateTime64(3, 'UTC') CODEC(DoubleDelta, ZSTD(1)),
timestamp_bucket DateTime64(3, 'UTC') MATERIALIZED toStartOfInterval(timestamp, INTERVAL 1 minute),
dimensions_hash String,
dimensions Map(String, String) CODEC(ZSTD(1)),
value_ms Float64 CODEC(Gorilla, ZSTD(1)),
INDEX idx_dimensions_hash dimensions_hash TYPE bloom_filter GRANULARITY 4
)
ENGINE = MergeTree()
PARTITION BY toDate(timestamp_bucket)
ORDER BY (metric_name, timestamp_bucket, dimensions_hash, id)
TTL toDateTime(timestamp) + toIntervalDay(7)
SETTINGS index_granularity = 8192, ttl_only_drop_parts = 1;
CREATE TABLE IF NOT EXISTS crashes (
id String,
timestamp DateTime64(3, 'UTC') CODEC(DoubleDelta, ZSTD(1)),
guild_id LowCardinality(String),
stacktrace String,
notified UInt8 DEFAULT 0,
updated_at DateTime64(3, 'UTC') DEFAULT now64(3),
INDEX idx_guild_id guild_id TYPE bloom_filter GRANULARITY 4
)
ENGINE = ReplacingMergeTree(updated_at)
PARTITION BY toDate(timestamp)
ORDER BY (timestamp, id)
TTL toDateTime(timestamp) + toIntervalDay(90)
SETTINGS index_granularity = 8192, ttl_only_drop_parts = 1;
CREATE TABLE IF NOT EXISTS counters_hourly (
metric_name LowCardinality(String),
period_start DateTime64(3, 'UTC') CODEC(DoubleDelta, ZSTD(1)),
dimensions_hash String,
dimensions Map(String, String) CODEC(ZSTD(1)),
total_value Int64 CODEC(Delta, ZSTD(1)),
sample_count UInt64 CODEC(Delta, ZSTD(1)),
INDEX idx_dimensions_hash dimensions_hash TYPE bloom_filter GRANULARITY 4
)
ENGINE = SummingMergeTree((total_value, sample_count))
PARTITION BY toYYYYMM(period_start)
ORDER BY (metric_name, period_start, dimensions_hash)
TTL toDateTime(period_start) + toIntervalDay(90)
SETTINGS index_granularity = 8192, ttl_only_drop_parts = 1;
CREATE TABLE IF NOT EXISTS counters_daily (
metric_name LowCardinality(String),
period_start DateTime64(3, 'UTC') CODEC(DoubleDelta, ZSTD(1)),
dimensions_hash String,
dimensions Map(String, String) CODEC(ZSTD(1)),
total_value Int64 CODEC(Delta, ZSTD(1)),
sample_count UInt64 CODEC(Delta, ZSTD(1)),
INDEX idx_dimensions_hash dimensions_hash TYPE bloom_filter GRANULARITY 4
)
ENGINE = SummingMergeTree((total_value, sample_count))
PARTITION BY toYYYYMM(period_start)
ORDER BY (metric_name, period_start, dimensions_hash)
TTL toDateTime(period_start) + toIntervalDay(365)
SETTINGS index_granularity = 8192, ttl_only_drop_parts = 1

View File

@@ -0,0 +1,23 @@
CREATE MATERIALIZED VIEW IF NOT EXISTS counters_hourly_mv
TO counters_hourly
AS SELECT
metric_name,
toStartOfHour(timestamp_bucket) AS period_start,
dimensions_hash,
anyLast(dimensions) AS dimensions,
sum(value) AS total_value,
count() AS sample_count
FROM counters
GROUP BY metric_name, period_start, dimensions_hash;
CREATE MATERIALIZED VIEW IF NOT EXISTS counters_daily_mv
TO counters_daily
AS SELECT
metric_name,
toStartOfDay(period_start) AS period_start,
dimensions_hash,
anyLast(dimensions) AS dimensions,
sum(total_value) AS total_value,
sum(sample_count) AS sample_count
FROM counters_hourly
GROUP BY metric_name, period_start, dimensions_hash

View File

@@ -0,0 +1,70 @@
/*
* Copyright (C) 2026 Fluxer Contributors
*
* This file is part of Fluxer.
*
* Fluxer is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Fluxer is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with Fluxer. If not, see <https://www.gnu.org/licenses/>.
*/
mod migrations;
pub mod schemas;
pub mod storage;
use anyhow::Result;
use async_trait::async_trait;
pub use schemas::*;
pub use storage::{ClickHouseStorage, CrashEventData, LatestGaugeSummary, NoOpStorage, Resolution};
#[async_trait]
pub trait Storage: Send + Sync {
async fn check_health(&self) -> Result<()>;
async fn insert_counter(&self, req: CounterRequest) -> Result<()>;
async fn insert_gauge(&self, req: GaugeRequest) -> Result<()>;
async fn insert_histogram(&self, req: HistogramRequest) -> Result<()>;
async fn insert_crash(&self, req: CrashRequest) -> Result<CrashEventData>;
async fn mark_crash_notified(&self, id: &str) -> Result<()>;
async fn query_counters(
&self,
metric_name: &str,
start_ms: i64,
end_ms: i64,
group_by: Option<&str>,
resolution: Resolution,
) -> Result<Vec<DataPoint>>;
async fn query_gauges(
&self,
metric_name: &str,
start_ms: i64,
end_ms: i64,
) -> Result<Vec<DataPoint>>;
async fn query_histograms(
&self,
metric_name: &str,
start_ms: i64,
end_ms: i64,
) -> Result<Vec<DataPoint>>;
async fn query_histogram_percentiles(
&self,
metric_name: &str,
start_ms: i64,
end_ms: i64,
) -> Result<Option<HistogramPercentiles>>;
async fn get_recent_crashes(&self, limit: usize) -> Result<Vec<CrashEventData>>;
async fn query_latest_gauges(
&self,
metric_name: &str,
group_by: Option<&str>,
) -> Result<Vec<LatestGaugeSummary>>;
}

View File

@@ -0,0 +1,179 @@
/*
* Copyright (C) 2026 Fluxer Contributors
*
* This file is part of Fluxer.
*
* Fluxer is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Fluxer is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with Fluxer. If not, see <https://www.gnu.org/licenses/>.
*/
use clickhouse::Row;
use serde::{Deserialize, Serialize};
use time::OffsetDateTime;
#[derive(Row, Serialize, Deserialize, Debug, Clone)]
pub struct CounterMetric {
pub metric_name: String,
#[serde(with = "clickhouse::serde::time::datetime64::millis")]
pub timestamp: OffsetDateTime,
pub dimensions_hash: String,
pub dimensions: Vec<(String, String)>,
pub value: i64,
}
#[derive(Row, Serialize, Deserialize, Debug, Clone)]
pub struct GaugeMetric {
pub id: String,
pub metric_name: String,
#[serde(with = "clickhouse::serde::time::datetime64::millis")]
pub timestamp: OffsetDateTime,
pub dimensions_hash: String,
pub dimensions: Vec<(String, String)>,
pub value: f64,
}
#[derive(Row, Serialize, Deserialize, Debug, Clone)]
pub struct HistogramRaw {
pub id: String,
pub metric_name: String,
#[serde(with = "clickhouse::serde::time::datetime64::millis")]
pub timestamp: OffsetDateTime,
pub dimensions_hash: String,
pub dimensions: Vec<(String, String)>,
pub value_ms: f64,
}
#[derive(Row, Serialize, Deserialize, Debug, Clone)]
pub struct CrashEvent {
pub id: String,
#[serde(with = "clickhouse::serde::time::datetime64::millis")]
pub timestamp: OffsetDateTime,
pub guild_id: String,
pub stacktrace: String,
pub notified: u8,
#[serde(with = "clickhouse::serde::time::datetime64::millis")]
pub updated_at: OffsetDateTime,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CounterRequest {
pub name: String,
#[serde(default)]
pub dimensions: serde_json::Map<String, serde_json::Value>,
#[serde(default = "default_counter_value")]
pub value: i64,
}
const fn default_counter_value() -> i64 {
1
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GaugeRequest {
pub name: String,
#[serde(default)]
pub dimensions: serde_json::Map<String, serde_json::Value>,
pub value: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HistogramRequest {
pub name: String,
#[serde(default)]
pub dimensions: serde_json::Map<String, serde_json::Value>,
pub value_ms: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CrashRequest {
pub guild_id: String,
pub stacktrace: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct QueryParams {
pub metric: String,
pub start: Option<String>,
pub end: Option<String>,
pub group_by: Option<String>,
pub resolution: Option<String>,
#[serde(rename = "type")]
pub metric_type: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TopQueryParams {
pub metric: String,
pub limit: Option<usize>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DataPoint {
pub timestamp: i64,
pub value: f64,
#[serde(skip_serializing_if = "Option::is_none")]
pub dimensions: Option<serde_json::Map<String, serde_json::Value>>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct QueryResponse {
pub metric: String,
pub data: Vec<DataPoint>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TopEntry {
pub label: String,
pub value: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TopQueryResponse {
pub metric: String,
pub entries: Vec<TopEntry>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HistogramPercentiles {
pub count: u64,
pub avg: f64,
pub min: f64,
pub max: f64,
pub p50: f64,
pub p95: f64,
pub p99: f64,
}
pub fn convert_dimensions(
dimensions: &serde_json::Map<String, serde_json::Value>,
) -> Vec<(String, String)> {
dimensions
.iter()
.map(|(k, v)| {
let value = match v {
serde_json::Value::String(s) => s.clone(),
other => other.to_string(),
};
(k.clone(), value)
})
.collect()
}
pub fn dimensions_to_json(
dimensions: &[(String, String)],
) -> serde_json::Map<String, serde_json::Value> {
dimensions
.iter()
.map(|(k, v)| (k.clone(), serde_json::Value::String(v.clone())))
.collect()
}

View File

@@ -0,0 +1,848 @@
/*
* Copyright (C) 2026 Fluxer Contributors
*
* This file is part of Fluxer.
*
* Fluxer is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Fluxer is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with Fluxer. If not, see <https://www.gnu.org/licenses/>.
*/
use anyhow::{Result, ensure};
use async_trait::async_trait;
use clickhouse::{Client, Row};
use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256};
use time::OffsetDateTime;
use tracing::info;
use ulid::Ulid;
use super::Storage;
use super::migrations::run_migrations;
use super::schemas::{
CounterMetric, CounterRequest, CrashEvent, CrashRequest, DataPoint, GaugeMetric, GaugeRequest,
HistogramPercentiles, HistogramRaw, HistogramRequest, convert_dimensions, dimensions_to_json,
};
use crate::config::Config;
pub fn hash_dimensions(dimensions: &serde_json::Map<String, serde_json::Value>) -> String {
if dimensions.is_empty() {
return String::new();
}
let json = serde_json::to_string(dimensions).unwrap_or_default();
let mut hasher = Sha256::new();
hasher.update(json.as_bytes());
format!("{:x}", hasher.finalize())[..16].to_string()
}
fn sanitize_dimension_key(key: &str) -> Option<String> {
if key.is_empty() || key.len() > 64 {
return None;
}
let sanitized: String = key
.chars()
.filter(|c| c.is_ascii_alphanumeric() || *c == '_' || *c == '.' || *c == '-')
.collect();
if sanitized.is_empty() || sanitized != key {
None
} else {
Some(sanitized)
}
}
fn validate_identifier(name: &str) -> Result<()> {
ensure!(
name.chars().all(|c| c.is_ascii_alphanumeric() || c == '_'),
"Invalid ClickHouse identifier: {name}"
);
Ok(())
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Resolution {
Raw,
Hourly,
Daily,
}
impl Resolution {
pub fn from_str(s: Option<&str>) -> Self {
match s {
Some("hourly") => Self::Hourly,
Some("daily") => Self::Daily,
_ => Self::Raw,
}
}
}
#[derive(Clone)]
pub struct ClickHouseStorage {
client: Client,
database: String,
}
#[derive(Debug, Clone)]
pub struct LatestGaugeSummary {
pub dimensions: serde_json::Map<String, serde_json::Value>,
pub value: f64,
pub label: String,
}
#[derive(Debug, Clone)]
pub struct CrashEventData {
pub id: String,
pub timestamp: i64,
pub guild_id: String,
pub stacktrace: String,
pub notified: bool,
}
#[derive(Row, Serialize, Deserialize)]
struct CounterQueryRow {
timestamp_bucket: i64,
group_key: String,
total: i64,
}
#[derive(Row, Serialize, Deserialize)]
struct AggregatedCounterQueryRow {
period_start: i64,
group_key: String,
total: i64,
}
#[derive(Row, Serialize, Deserialize)]
struct GaugeQueryRow {
timestamp: i64,
value: f64,
dimensions: Vec<(String, String)>,
}
#[derive(Row, Serialize, Deserialize)]
struct HistogramQueryRow {
timestamp_bucket: i64,
avg_value: f64,
}
#[derive(Row, Serialize, Deserialize)]
struct LatestGaugeRow {
dimensions_hash: String,
timestamp: i64,
value: f64,
dimensions: Vec<(String, String)>,
label: String,
}
#[derive(Row, Serialize, Deserialize)]
struct PercentilesRow {
count: u64,
avg: f64,
min: f64,
max: f64,
p50: f64,
p95: f64,
p99: f64,
}
#[async_trait]
impl Storage for ClickHouseStorage {
async fn check_health(&self) -> Result<()> {
self.client.query("SELECT 1").execute().await?;
Ok(())
}
async fn insert_counter(&self, req: CounterRequest) -> Result<()> {
self.insert_counter_impl(req).await
}
async fn insert_gauge(&self, req: GaugeRequest) -> Result<()> {
self.insert_gauge_impl(req).await
}
async fn insert_histogram(&self, req: HistogramRequest) -> Result<()> {
self.insert_histogram_impl(req).await
}
async fn insert_crash(&self, req: CrashRequest) -> Result<CrashEventData> {
self.insert_crash_impl(req).await
}
async fn mark_crash_notified(&self, id: &str) -> Result<()> {
self.mark_crash_notified_impl(id).await
}
async fn query_counters(
&self,
metric_name: &str,
start_ms: i64,
end_ms: i64,
group_by: Option<&str>,
resolution: Resolution,
) -> Result<Vec<DataPoint>> {
self.query_counters_impl(metric_name, start_ms, end_ms, group_by, resolution)
.await
}
async fn query_gauges(
&self,
metric_name: &str,
start_ms: i64,
end_ms: i64,
) -> Result<Vec<DataPoint>> {
self.query_gauges_impl(metric_name, start_ms, end_ms).await
}
async fn query_histograms(
&self,
metric_name: &str,
start_ms: i64,
end_ms: i64,
) -> Result<Vec<DataPoint>> {
self.query_histograms_impl(metric_name, start_ms, end_ms)
.await
}
async fn query_histogram_percentiles(
&self,
metric_name: &str,
start_ms: i64,
end_ms: i64,
) -> Result<Option<HistogramPercentiles>> {
self.query_histogram_percentiles_impl(metric_name, start_ms, end_ms)
.await
}
async fn get_recent_crashes(&self, limit: usize) -> Result<Vec<CrashEventData>> {
self.get_recent_crashes_impl(limit).await
}
async fn query_latest_gauges(
&self,
metric_name: &str,
group_by: Option<&str>,
) -> Result<Vec<LatestGaugeSummary>> {
self.query_latest_gauges_impl(metric_name, group_by).await
}
}
impl ClickHouseStorage {
pub async fn new(config: &Config) -> Result<Self> {
info!(
"Initializing ClickHouse storage at {}",
config.clickhouse_url
);
validate_identifier(&config.clickhouse_database)?;
let migration_client = Client::default()
.with_url(&config.clickhouse_url)
.with_user(&config.clickhouse_user)
.with_password(&config.clickhouse_password);
run_migrations(&migration_client, &config.clickhouse_database).await?;
let client = migration_client.with_database(&config.clickhouse_database);
info!("ClickHouse storage initialized successfully");
Ok(Self {
client,
database: config.clickhouse_database.clone(),
})
}
async fn insert_counter_impl(&self, req: CounterRequest) -> Result<()> {
let now = OffsetDateTime::now_utc();
let dimensions_hash = hash_dimensions(&req.dimensions);
let dimensions = convert_dimensions(&req.dimensions);
let metric = CounterMetric {
metric_name: req.name,
timestamp: now,
dimensions_hash,
dimensions,
value: req.value,
};
let mut insert = self.client.insert("counters")?;
insert.write(&metric).await?;
insert.end().await?;
Ok(())
}
async fn insert_gauge_impl(&self, req: GaugeRequest) -> Result<()> {
let now = OffsetDateTime::now_utc();
let dimensions_hash = hash_dimensions(&req.dimensions);
let dimensions = convert_dimensions(&req.dimensions);
let metric = GaugeMetric {
id: Ulid::new().to_string(),
metric_name: req.name,
timestamp: now,
dimensions_hash,
dimensions,
value: req.value,
};
let mut insert = self.client.insert("gauges")?;
insert.write(&metric).await?;
insert.end().await?;
Ok(())
}
async fn insert_histogram_impl(&self, req: HistogramRequest) -> Result<()> {
let now = OffsetDateTime::now_utc();
let dimensions_hash = hash_dimensions(&req.dimensions);
let dimensions = convert_dimensions(&req.dimensions);
let raw = HistogramRaw {
id: Ulid::new().to_string(),
metric_name: req.name,
timestamp: now,
dimensions_hash,
dimensions,
value_ms: req.value_ms,
};
let mut insert = self.client.insert("histogram_raw")?;
insert.write(&raw).await?;
insert.end().await?;
Ok(())
}
async fn insert_crash_impl(&self, req: CrashRequest) -> Result<CrashEventData> {
let now = OffsetDateTime::now_utc();
let id = Ulid::new().to_string();
let event = CrashEvent {
id: id.clone(),
timestamp: now,
guild_id: req.guild_id.clone(),
stacktrace: req.stacktrace.clone(),
notified: 0,
updated_at: now,
};
let mut insert = self.client.insert("crashes")?;
insert.write(&event).await?;
insert.end().await?;
Ok(CrashEventData {
id,
timestamp: (now.unix_timestamp_nanos() / 1_000_000) as i64,
guild_id: req.guild_id,
stacktrace: req.stacktrace,
notified: false,
})
}
async fn mark_crash_notified_impl(&self, id: &str) -> Result<()> {
let query = format!(
r#"
INSERT INTO {}.crashes
SELECT
id,
timestamp,
guild_id,
stacktrace,
1 AS notified,
now64(3) AS updated_at
FROM {}.crashes
WHERE id = ?
ORDER BY updated_at DESC
LIMIT 1
"#,
self.database, self.database
);
self.client.query(&query).bind(id).execute().await?;
Ok(())
}
async fn query_counters_impl(
&self,
metric_name: &str,
start_ms: i64,
end_ms: i64,
group_by: Option<&str>,
resolution: Resolution,
) -> Result<Vec<DataPoint>> {
match resolution {
Resolution::Raw => {
self.query_counters_raw(metric_name, start_ms, end_ms, group_by)
.await
}
Resolution::Hourly => {
self.query_counters_aggregated(
metric_name,
start_ms,
end_ms,
group_by,
"counters_hourly",
)
.await
}
Resolution::Daily => {
self.query_counters_aggregated(
metric_name,
start_ms,
end_ms,
group_by,
"counters_daily",
)
.await
}
}
}
async fn query_counters_raw(
&self,
metric_name: &str,
start_ms: i64,
end_ms: i64,
group_by: Option<&str>,
) -> Result<Vec<DataPoint>> {
let group_expr = group_by
.and_then(sanitize_dimension_key)
.map_or("''".to_string(), |g| format!("dimensions['{g}']"));
let query = format!(
r#"
SELECT
toUnixTimestamp64Milli(timestamp_bucket) AS timestamp_bucket,
{group_expr} AS group_key,
sum(value) AS total
FROM {}.counters
WHERE metric_name = ?
AND timestamp_bucket >= fromUnixTimestamp64Milli(?)
AND timestamp_bucket <= fromUnixTimestamp64Milli(?)
GROUP BY timestamp_bucket, group_key
ORDER BY timestamp_bucket
"#,
self.database
);
let rows: Vec<CounterQueryRow> = self
.client
.query(&query)
.bind(metric_name)
.bind(start_ms)
.bind(end_ms)
.fetch_all()
.await?;
let data: Vec<DataPoint> = rows
.into_iter()
.map(|r| {
let dims = if r.group_key.is_empty() {
None
} else {
let mut map = serde_json::Map::new();
map.insert(
group_by.unwrap_or("group").to_string(),
serde_json::Value::String(r.group_key),
);
Some(map)
};
DataPoint {
timestamp: r.timestamp_bucket,
value: r.total as f64,
dimensions: dims,
}
})
.collect();
Ok(data)
}
async fn query_counters_aggregated(
&self,
metric_name: &str,
start_ms: i64,
end_ms: i64,
group_by: Option<&str>,
table_name: &str,
) -> Result<Vec<DataPoint>> {
let group_expr = group_by
.and_then(sanitize_dimension_key)
.map_or("''".to_string(), |g| format!("dimensions['{g}']"));
let query = format!(
r#"
SELECT
toUnixTimestamp64Milli(period_start) AS period_start,
{group_expr} AS group_key,
sum(total_value) AS total
FROM {}.{table_name}
WHERE metric_name = ?
AND period_start >= fromUnixTimestamp64Milli(?)
AND period_start <= fromUnixTimestamp64Milli(?)
GROUP BY period_start, group_key
ORDER BY period_start
"#,
self.database
);
let rows: Vec<AggregatedCounterQueryRow> = self
.client
.query(&query)
.bind(metric_name)
.bind(start_ms)
.bind(end_ms)
.fetch_all()
.await?;
let data: Vec<DataPoint> = rows
.into_iter()
.map(|r| {
let dims = if r.group_key.is_empty() {
None
} else {
let mut map = serde_json::Map::new();
map.insert(
group_by.unwrap_or("group").to_string(),
serde_json::Value::String(r.group_key),
);
Some(map)
};
DataPoint {
timestamp: r.period_start,
value: r.total as f64,
dimensions: dims,
}
})
.collect();
Ok(data)
}
async fn query_gauges_impl(
&self,
metric_name: &str,
start_ms: i64,
end_ms: i64,
) -> Result<Vec<DataPoint>> {
let query = format!(
r#"
SELECT
toUnixTimestamp64Milli(timestamp) AS timestamp,
value,
dimensions
FROM {}.gauges
WHERE metric_name = ?
AND timestamp >= fromUnixTimestamp64Milli(?)
AND timestamp <= fromUnixTimestamp64Milli(?)
ORDER BY timestamp
"#,
self.database
);
let rows: Vec<GaugeQueryRow> = self
.client
.query(&query)
.bind(metric_name)
.bind(start_ms)
.bind(end_ms)
.fetch_all()
.await?;
let data: Vec<DataPoint> = rows
.into_iter()
.map(|r| {
let dims = if r.dimensions.is_empty() {
None
} else {
Some(dimensions_to_json(&r.dimensions))
};
DataPoint {
timestamp: r.timestamp,
value: r.value,
dimensions: dims,
}
})
.collect();
Ok(data)
}
async fn query_histograms_impl(
&self,
metric_name: &str,
start_ms: i64,
end_ms: i64,
) -> Result<Vec<DataPoint>> {
let query = format!(
r#"
SELECT
toUnixTimestamp64Milli(timestamp_bucket) AS timestamp_bucket,
avg(value_ms) AS avg_value
FROM {}.histogram_raw
WHERE metric_name = ?
AND timestamp_bucket >= fromUnixTimestamp64Milli(?)
AND timestamp_bucket <= fromUnixTimestamp64Milli(?)
GROUP BY timestamp_bucket
ORDER BY timestamp_bucket
"#,
self.database
);
let rows: Vec<HistogramQueryRow> = self
.client
.query(&query)
.bind(metric_name)
.bind(start_ms)
.bind(end_ms)
.fetch_all()
.await?;
let data: Vec<DataPoint> = rows
.into_iter()
.map(|r| DataPoint {
timestamp: r.timestamp_bucket,
value: r.avg_value,
dimensions: None,
})
.collect();
Ok(data)
}
async fn query_histogram_percentiles_impl(
&self,
metric_name: &str,
start_ms: i64,
end_ms: i64,
) -> Result<Option<HistogramPercentiles>> {
let query = format!(
r#"
SELECT
count() AS count,
avg(value_ms) AS avg,
min(value_ms) AS min,
max(value_ms) AS max,
quantile(0.50)(value_ms) AS p50,
quantile(0.95)(value_ms) AS p95,
quantile(0.99)(value_ms) AS p99
FROM {}.histogram_raw
WHERE metric_name = ?
AND timestamp_bucket >= fromUnixTimestamp64Milli(?)
AND timestamp_bucket <= fromUnixTimestamp64Milli(?)
"#,
self.database
);
let result: Option<PercentilesRow> = self
.client
.query(&query)
.bind(metric_name)
.bind(start_ms)
.bind(end_ms)
.fetch_optional()
.await?;
match result {
Some(r) if r.count > 0 => Ok(Some(HistogramPercentiles {
count: r.count,
avg: r.avg,
min: r.min,
max: r.max,
p50: r.p50,
p95: r.p95,
p99: r.p99,
})),
_ => Ok(None),
}
}
async fn get_recent_crashes_impl(&self, limit: usize) -> Result<Vec<CrashEventData>> {
let query = format!(
r#"
SELECT
id,
argMax(crashes.timestamp, crashes.updated_at) AS timestamp,
argMax(guild_id, crashes.updated_at) AS guild_id,
argMax(stacktrace, crashes.updated_at) AS stacktrace,
argMax(notified, crashes.updated_at) AS notified,
max(crashes.updated_at) AS updated_at
FROM {}.crashes
GROUP BY id
ORDER BY timestamp DESC
LIMIT ?
"#,
self.database
);
let rows: Vec<CrashEvent> = self.client.query(&query).bind(limit).fetch_all().await?;
let crashes: Vec<CrashEventData> = rows
.into_iter()
.map(|r| CrashEventData {
id: r.id,
timestamp: (r.timestamp.unix_timestamp_nanos() / 1_000_000) as i64,
guild_id: r.guild_id,
stacktrace: r.stacktrace,
notified: r.notified != 0,
})
.collect();
Ok(crashes)
}
async fn query_latest_gauges_impl(
&self,
metric_name: &str,
group_by: Option<&str>,
) -> Result<Vec<LatestGaugeSummary>> {
let label_expr = group_by
.and_then(sanitize_dimension_key)
.map_or("dimensions_hash".to_string(), |g| {
format!("argMax(gauges.dimensions['{g}'], gauges.timestamp)")
});
let query = format!(
r#"
SELECT
dimensions_hash,
max(gauges.timestamp) AS timestamp,
argMax(gauges.value, gauges.timestamp) AS value,
argMax(gauges.dimensions, gauges.timestamp) AS dimensions,
{label_expr} AS label
FROM {}.gauges
WHERE metric_name = ?
GROUP BY dimensions_hash
ORDER BY value DESC
"#,
self.database
);
let rows: Vec<LatestGaugeRow> = self
.client
.query(&query)
.bind(metric_name)
.fetch_all()
.await?;
let summaries: Vec<LatestGaugeSummary> = rows
.into_iter()
.map(|r| LatestGaugeSummary {
dimensions: dimensions_to_json(&r.dimensions),
value: r.value,
label: r.label,
})
.collect();
Ok(summaries)
}
}
#[derive(Clone)]
pub struct NoOpStorage;
impl NoOpStorage {
pub fn new() -> Self {
info!("Initializing NoOp storage (metrics will be discarded)");
Self
}
}
impl Default for NoOpStorage {
fn default() -> Self {
Self::new()
}
}
#[async_trait]
impl Storage for NoOpStorage {
async fn check_health(&self) -> Result<()> {
Ok(())
}
async fn insert_counter(&self, _req: CounterRequest) -> Result<()> {
Ok(())
}
async fn insert_gauge(&self, _req: GaugeRequest) -> Result<()> {
Ok(())
}
async fn insert_histogram(&self, _req: HistogramRequest) -> Result<()> {
Ok(())
}
async fn insert_crash(&self, req: CrashRequest) -> Result<CrashEventData> {
let now = OffsetDateTime::now_utc();
let id = Ulid::new().to_string();
Ok(CrashEventData {
id,
timestamp: (now.unix_timestamp_nanos() / 1_000_000) as i64,
guild_id: req.guild_id,
stacktrace: req.stacktrace,
notified: false,
})
}
async fn mark_crash_notified(&self, _id: &str) -> Result<()> {
Ok(())
}
async fn query_counters(
&self,
_metric_name: &str,
_start_ms: i64,
_end_ms: i64,
_group_by: Option<&str>,
_resolution: Resolution,
) -> Result<Vec<DataPoint>> {
Ok(Vec::new())
}
async fn query_gauges(
&self,
_metric_name: &str,
_start_ms: i64,
_end_ms: i64,
) -> Result<Vec<DataPoint>> {
Ok(Vec::new())
}
async fn query_histograms(
&self,
_metric_name: &str,
_start_ms: i64,
_end_ms: i64,
) -> Result<Vec<DataPoint>> {
Ok(Vec::new())
}
async fn query_histogram_percentiles(
&self,
_metric_name: &str,
_start_ms: i64,
_end_ms: i64,
) -> Result<Option<HistogramPercentiles>> {
Ok(None)
}
async fn get_recent_crashes(&self, _limit: usize) -> Result<Vec<CrashEventData>> {
Ok(Vec::new())
}
async fn query_latest_gauges(
&self,
_metric_name: &str,
_group_by: Option<&str>,
) -> Result<Vec<LatestGaugeSummary>> {
Ok(Vec::new())
}
}

121
fluxer_metrics/src/main.rs Normal file
View File

@@ -0,0 +1,121 @@
/*
* Copyright (C) 2026 Fluxer Contributors
*
* This file is part of Fluxer.
*
* Fluxer is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Fluxer is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with Fluxer. If not, see <https://www.gnu.org/licenses/>.
*/
mod alerts;
mod api;
mod config;
mod db;
use std::net::SocketAddr;
use std::sync::Arc;
use axum::{
Router,
routing::{get, post},
};
use tower_http::cors::{Any, CorsLayer};
use tower_http::trace::TraceLayer;
use tracing::info;
use tracing_subscriber::{EnvFilter, fmt, prelude::*};
use api::ingest::{
AppState, ingest_batch, ingest_counter, ingest_crash, ingest_gauge, ingest_histogram,
};
use api::query::{
health_check, query_aggregate, query_crashes, query_metrics, query_percentiles, query_top,
};
use config::{Config, MetricsMode};
use db::{ClickHouseStorage, NoOpStorage, Storage};
#[tokio::main]
async fn main() -> anyhow::Result<()> {
let _ = dotenvy::dotenv();
tracing_subscriber::registry()
.with(fmt::layer().with_target(true))
.with(
EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info,tonbo=warn")),
)
.init();
info!("Starting fluxer_metrics service");
let config = Config::from_env().unwrap_or_else(|e| {
tracing::error!("Configuration error: {}", e);
std::process::exit(1);
});
let mode_str = match config.metrics_mode {
MetricsMode::ClickHouse => "clickhouse",
MetricsMode::NoOp => "noop",
};
info!(
"Configuration loaded: port={}, mode={}, alert_webhook={}",
config.port,
mode_str,
config.alert_webhook_url.is_some()
);
if config.metrics_mode == MetricsMode::ClickHouse {
info!(
"ClickHouse config: url={}, database={}",
config.clickhouse_url, config.clickhouse_database
);
}
let storage: Box<dyn Storage> = match config.metrics_mode {
MetricsMode::ClickHouse => Box::new(ClickHouseStorage::new(&config).await?),
MetricsMode::NoOp => Box::new(NoOpStorage::new()),
};
let state = Arc::new(AppState {
storage,
config: config.clone(),
});
let app = Router::new()
.route("/metrics/counter", post(ingest_counter))
.route("/metrics/gauge", post(ingest_gauge))
.route("/metrics/histogram", post(ingest_histogram))
.route("/metrics/crash", post(ingest_crash))
.route("/metrics/batch", post(ingest_batch))
.route("/query", get(query_metrics))
.route("/query/aggregate", get(query_aggregate))
.route("/query/percentiles", get(query_percentiles))
.route("/query/top", get(query_top))
.route("/query/crashes", get(query_crashes))
.route("/_health", get(health_check))
.layer(
CorsLayer::new()
.allow_origin(Any)
.allow_methods(Any)
.allow_headers(Any),
)
.layer(TraceLayer::new_for_http())
.with_state(state);
let addr = SocketAddr::from(([0, 0, 0, 0], config.port));
info!("Listening on {}", addr);
let listener = tokio::net::TcpListener::bind(addr).await?;
axum::serve(listener, app).await?;
Ok(())
}