Files
chat-deploy/deployments/observability/config/prometheus-agent-entrypoint.sh
kim ea386977ba
Some checks failed
itom-platform auto build image / build (push) Has been cancelled
修复业务侧日志采集失败问题
2026-01-21 12:08:32 +08:00

237 lines
7.5 KiB
Bash
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env sh
set -eu
# ------------------------------
# chat-deploy 指标采集Prometheus Agent Entrypoint
# - 根据环境变量生成 /prometheus/prometheus.yml 与 /prometheus/targets.json
# - 然后以 agent 模式启动 Prometheusremote_write 推送到 itom-platform 中心)
#
# 关键环境变量(来自 config.env
# - METRICS_REMOTE_WRITE_URL=http(s)://<OBS_HOST>/api/v1/write
# - METRICS_TARGETS=name=host:port,name2=host:port
# - OBS_AUTH_ENABLE=false/true中心侧是否要求鉴权
# - OBS_AUTH_TOKEN=xxxxx当 OBS_AUTH_ENABLE=true 时必填)
# - OBS_PROJECT/OBS_ENV写入 labels便于中心侧筛选
# ------------------------------
METRICS_REMOTE_WRITE_URL="${METRICS_REMOTE_WRITE_URL:-}"
OBS_AUTH_ENABLE="${OBS_AUTH_ENABLE:-false}"
OBS_AUTH_TOKEN="${OBS_AUTH_TOKEN:-}"
METRICS_TARGETS="${METRICS_TARGETS:-}"
OBS_PROJECT="${OBS_PROJECT:-chat-deploy}"
OBS_ENV="${OBS_ENV:-prod}"
OBS_SERVICE="${OBS_SERVICE:-chat-deploy}"
OBS_SERVICE_NAME="${OBS_SERVICE_NAME:-$OBS_SERVICE}"
if [ -z "$OBS_SERVICE_NAME" ]; then
OBS_SERVICE_NAME="chat-deploy"
fi
is_truthy() { case "$1" in 1|true|TRUE|yes|YES|on|ON) return 0 ;; esac; return 1; }
if [ -z "$METRICS_REMOTE_WRITE_URL" ]; then
echo "[prometheus-agent] FAIL: METRICS_REMOTE_WRITE_URL 为空" >&2
exit 2
fi
if is_truthy "$OBS_AUTH_ENABLE" && [ -z "$OBS_AUTH_TOKEN" ]; then
echo "[prometheus-agent] FAIL: OBS_AUTH_ENABLE=true 但 OBS_AUTH_TOKEN 为空" >&2
exit 2
fi
# 确保数据目录存在且有写权限
mkdir -p /prometheus/data
chmod -R 777 /prometheus 2>/dev/null || true
TARGETS_JSON="/prometheus/targets.json"
echo "[" > "$TARGETS_JSON"
first=1
IFS=','
for item in $METRICS_TARGETS; do
item="$(printf "%s" "$item" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')"
[ -z "$item" ] && continue
name="$(printf "%s" "$item" | cut -d= -f1 | tr -d '[:space:]')"
target="$(printf "%s" "$item" | cut -d= -f2- | tr -d '[:space:]')"
[ -z "$name" ] && continue
[ -z "$target" ] && continue
target="$(printf "%s" "$target" | sed -E 's#^https?://##; s#/.*$##')"
if [ $first -eq 1 ]; then first=0; else echo "," >> "$TARGETS_JSON"; fi
cat >> "$TARGETS_JSON" <<EOF
{"targets":["$target"],"labels":{"service":"$name","project":"$OBS_PROJECT","env":"$OBS_ENV"}}
EOF
done
unset IFS
echo "]" >> "$TARGETS_JSON"
if [ "$first" -eq 1 ]; then
echo "[prometheus-agent] WARN: METRICS_TARGETS 为空或格式无效,仅采集 prometheus-agent 自身指标和 Exporter 指标" >&2
fi
CONFIG="/prometheus/prometheus.yml"
cat > "$CONFIG" <<EOF
global:
scrape_interval: 15s
external_labels:
project: $OBS_PROJECT
env: $OBS_ENV
service: $OBS_SERVICE_NAME
scrape_configs:
# Prometheus Agent 自身指标(用于验证采集链路是否正常)
- job_name: "prometheus-agent"
static_configs:
- targets: ["localhost:9090"]
# 业务服务指标(通过 METRICS_TARGETS 或 file_sd_configs 配置)
- job_name: "services"
metrics_path: /metrics
file_sd_configs:
- files: ["$TARGETS_JSON"]
refresh_interval: 10s
EOF
# 自动发现并采集 Exporter 指标Redis、MongoDB
echo "" >> "$CONFIG"
echo " # 自动发现/手动指定的 Exporter 指标" >> "$CONFIG"
trim() {
printf "%s" "$1" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//'
}
normalize_target() {
raw="$1"
default_port="$2"
raw="$(printf "%s" "$raw" | sed -E 's#^https?://##; s#/.*$##')"
raw="$(trim "$raw")"
[ -z "$raw" ] && return 1
case "$raw" in
*:*) printf "%s" "$raw" ;;
*) printf "%s:%s" "$raw" "$default_port" ;;
esac
}
append_exporter_job() {
job="$1"
targets="$2"
default_port="$3"
service_label="$4"
targets_list=""
IFS=','
for item in $targets; do
item="$(normalize_target "$item" "$default_port" || true)"
[ -z "$item" ] && continue
if [ -z "$targets_list" ]; then
targets_list="$item"
else
targets_list="${targets_list},$item"
fi
done
unset IFS
if [ -z "$targets_list" ]; then
return 0
fi
echo " - job_name: '$job'" >> "$CONFIG"
echo " static_configs:" >> "$CONFIG"
echo " - targets:" >> "$CONFIG"
IFS=','
for item in $targets_list; do
echo " - '$item'" >> "$CONFIG"
done
unset IFS
cat >> "$CONFIG" <<EOF
labels:
project: '${OBS_PROJECT}'
service: '${service_label}'
EOF
}
# Redis Exporter端口 9121
REDIS_EXPORTER_HOST=""
REDIS_EXPORTER_TARGETS="${REDIS_EXPORTER_TARGETS:-}"
REDIS_EXPORTER_SERVICE="${REDIS_EXPORTER_SERVICE:-$OBS_SERVICE_NAME}"
if getent hosts redis-exporter >/dev/null 2>&1; then
REDIS_EXPORTER_HOST="redis-exporter"
elif getent hosts chat-deploy-redis-exporter >/dev/null 2>&1; then
REDIS_EXPORTER_HOST="chat-deploy-redis-exporter"
fi
if [ -n "$REDIS_EXPORTER_HOST" ]; then
if [ -n "$REDIS_EXPORTER_TARGETS" ]; then
REDIS_EXPORTER_TARGETS="${REDIS_EXPORTER_TARGETS},${REDIS_EXPORTER_HOST}:9121"
else
REDIS_EXPORTER_TARGETS="${REDIS_EXPORTER_HOST}:9121"
fi
fi
append_exporter_job "redis" "$REDIS_EXPORTER_TARGETS" "9121" "$REDIS_EXPORTER_SERVICE"
if [ -n "$REDIS_EXPORTER_TARGETS" ]; then
echo "[prometheus-agent] Redis Exporter 采集目标已配置project=${OBS_PROJECT} service=${REDIS_EXPORTER_SERVICE}"
fi
# MongoDB Exporter端口 9216- chat-deploy 使用 MongoDB
MONGODB_EXPORTER_HOST=""
MONGODB_EXPORTER_TARGETS="${MONGODB_EXPORTER_TARGETS:-}"
MONGODB_EXPORTER_SERVICE="${MONGODB_EXPORTER_SERVICE:-$OBS_SERVICE_NAME}"
if getent hosts mongodb-exporter >/dev/null 2>&1; then
MONGODB_EXPORTER_HOST="mongodb-exporter"
elif getent hosts chat-deploy-mongodb-exporter >/dev/null 2>&1; then
MONGODB_EXPORTER_HOST="chat-deploy-mongodb-exporter"
fi
if [ -n "$MONGODB_EXPORTER_HOST" ]; then
if [ -n "$MONGODB_EXPORTER_TARGETS" ]; then
MONGODB_EXPORTER_TARGETS="${MONGODB_EXPORTER_TARGETS},${MONGODB_EXPORTER_HOST}:9216"
else
MONGODB_EXPORTER_TARGETS="${MONGODB_EXPORTER_HOST}:9216"
fi
fi
append_exporter_job "mongodb" "$MONGODB_EXPORTER_TARGETS" "9216" "$MONGODB_EXPORTER_SERVICE"
if [ -n "$MONGODB_EXPORTER_TARGETS" ]; then
echo "[prometheus-agent] MongoDB Exporter 采集目标已配置project=${OBS_PROJECT} service=${MONGODB_EXPORTER_SERVICE}"
fi
# Node Exporter端口 9100- 用于系统级指标CPU/Memory/Disk/Network/IO
NODE_EXPORTER_HOST=""
NODE_EXPORTER_TARGETS="${NODE_EXPORTER_TARGETS:-}"
NODE_EXPORTER_SERVICE="${NODE_EXPORTER_SERVICE:-$OBS_SERVICE_NAME}"
if getent hosts node-exporter >/dev/null 2>&1; then
NODE_EXPORTER_HOST="node-exporter"
elif getent hosts chat-deploy-node-exporter >/dev/null 2>&1; then
NODE_EXPORTER_HOST="chat-deploy-node-exporter"
fi
if [ -n "$NODE_EXPORTER_HOST" ]; then
if [ -n "$NODE_EXPORTER_TARGETS" ]; then
NODE_EXPORTER_TARGETS="${NODE_EXPORTER_TARGETS},${NODE_EXPORTER_HOST}:9100"
else
NODE_EXPORTER_TARGETS="${NODE_EXPORTER_HOST}:9100"
fi
fi
append_exporter_job "node" "$NODE_EXPORTER_TARGETS" "9100" "$NODE_EXPORTER_SERVICE"
if [ -n "$NODE_EXPORTER_TARGETS" ]; then
echo "[prometheus-agent] Node Exporter 采集目标已配置project=${OBS_PROJECT} service=${NODE_EXPORTER_SERVICE}"
fi
cat >> "$CONFIG" <<EOF
remote_write:
- url: "$METRICS_REMOTE_WRITE_URL"
EOF
if is_truthy "$OBS_AUTH_ENABLE"; then
echo " bearer_token: \"$OBS_AUTH_TOKEN\"" >> "$CONFIG"
echo "[prometheus-agent] remote_write 鉴权已启用" >&2
else
echo "[prometheus-agent] remote_write 鉴权未启用" >&2
fi
echo "[prometheus-agent] 配置文件已生成:"
cat "$CONFIG"
echo ""
# Prometheus 3.x 不再需要 --enable-feature=agent
exec /bin/prometheus --config.file=/prometheus/prometheus.yml --storage.tsdb.path=/prometheus/data --web.enable-lifecycle