修复业务侧日志采集失败问题
Some checks failed
itom-platform auto build image / build (push) Has been cancelled

This commit is contained in:
kim
2026-01-21 12:08:32 +08:00
parent e018f742ad
commit ea386977ba
5 changed files with 677 additions and 0 deletions

View File

@@ -0,0 +1,236 @@
#!/usr/bin/env sh
set -eu
# ------------------------------
# chat-deploy 指标采集Prometheus Agent Entrypoint
# - 根据环境变量生成 /prometheus/prometheus.yml 与 /prometheus/targets.json
# - 然后以 agent 模式启动 Prometheusremote_write 推送到 itom-platform 中心)
#
# 关键环境变量(来自 config.env
# - METRICS_REMOTE_WRITE_URL=http(s)://<OBS_HOST>/api/v1/write
# - METRICS_TARGETS=name=host:port,name2=host:port
# - OBS_AUTH_ENABLE=false/true中心侧是否要求鉴权
# - OBS_AUTH_TOKEN=xxxxx当 OBS_AUTH_ENABLE=true 时必填)
# - OBS_PROJECT/OBS_ENV写入 labels便于中心侧筛选
# ------------------------------
METRICS_REMOTE_WRITE_URL="${METRICS_REMOTE_WRITE_URL:-}"
OBS_AUTH_ENABLE="${OBS_AUTH_ENABLE:-false}"
OBS_AUTH_TOKEN="${OBS_AUTH_TOKEN:-}"
METRICS_TARGETS="${METRICS_TARGETS:-}"
OBS_PROJECT="${OBS_PROJECT:-chat-deploy}"
OBS_ENV="${OBS_ENV:-prod}"
OBS_SERVICE="${OBS_SERVICE:-chat-deploy}"
OBS_SERVICE_NAME="${OBS_SERVICE_NAME:-$OBS_SERVICE}"
if [ -z "$OBS_SERVICE_NAME" ]; then
OBS_SERVICE_NAME="chat-deploy"
fi
is_truthy() { case "$1" in 1|true|TRUE|yes|YES|on|ON) return 0 ;; esac; return 1; }
if [ -z "$METRICS_REMOTE_WRITE_URL" ]; then
echo "[prometheus-agent] FAIL: METRICS_REMOTE_WRITE_URL 为空" >&2
exit 2
fi
if is_truthy "$OBS_AUTH_ENABLE" && [ -z "$OBS_AUTH_TOKEN" ]; then
echo "[prometheus-agent] FAIL: OBS_AUTH_ENABLE=true 但 OBS_AUTH_TOKEN 为空" >&2
exit 2
fi
# 确保数据目录存在且有写权限
mkdir -p /prometheus/data
chmod -R 777 /prometheus 2>/dev/null || true
TARGETS_JSON="/prometheus/targets.json"
echo "[" > "$TARGETS_JSON"
first=1
IFS=','
for item in $METRICS_TARGETS; do
item="$(printf "%s" "$item" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')"
[ -z "$item" ] && continue
name="$(printf "%s" "$item" | cut -d= -f1 | tr -d '[:space:]')"
target="$(printf "%s" "$item" | cut -d= -f2- | tr -d '[:space:]')"
[ -z "$name" ] && continue
[ -z "$target" ] && continue
target="$(printf "%s" "$target" | sed -E 's#^https?://##; s#/.*$##')"
if [ $first -eq 1 ]; then first=0; else echo "," >> "$TARGETS_JSON"; fi
cat >> "$TARGETS_JSON" <<EOF
{"targets":["$target"],"labels":{"service":"$name","project":"$OBS_PROJECT","env":"$OBS_ENV"}}
EOF
done
unset IFS
echo "]" >> "$TARGETS_JSON"
if [ "$first" -eq 1 ]; then
echo "[prometheus-agent] WARN: METRICS_TARGETS 为空或格式无效,仅采集 prometheus-agent 自身指标和 Exporter 指标" >&2
fi
CONFIG="/prometheus/prometheus.yml"
cat > "$CONFIG" <<EOF
global:
scrape_interval: 15s
external_labels:
project: $OBS_PROJECT
env: $OBS_ENV
service: $OBS_SERVICE_NAME
scrape_configs:
# Prometheus Agent 自身指标(用于验证采集链路是否正常)
- job_name: "prometheus-agent"
static_configs:
- targets: ["localhost:9090"]
# 业务服务指标(通过 METRICS_TARGETS 或 file_sd_configs 配置)
- job_name: "services"
metrics_path: /metrics
file_sd_configs:
- files: ["$TARGETS_JSON"]
refresh_interval: 10s
EOF
# 自动发现并采集 Exporter 指标Redis、MongoDB
echo "" >> "$CONFIG"
echo " # 自动发现/手动指定的 Exporter 指标" >> "$CONFIG"
trim() {
printf "%s" "$1" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//'
}
normalize_target() {
raw="$1"
default_port="$2"
raw="$(printf "%s" "$raw" | sed -E 's#^https?://##; s#/.*$##')"
raw="$(trim "$raw")"
[ -z "$raw" ] && return 1
case "$raw" in
*:*) printf "%s" "$raw" ;;
*) printf "%s:%s" "$raw" "$default_port" ;;
esac
}
append_exporter_job() {
job="$1"
targets="$2"
default_port="$3"
service_label="$4"
targets_list=""
IFS=','
for item in $targets; do
item="$(normalize_target "$item" "$default_port" || true)"
[ -z "$item" ] && continue
if [ -z "$targets_list" ]; then
targets_list="$item"
else
targets_list="${targets_list},$item"
fi
done
unset IFS
if [ -z "$targets_list" ]; then
return 0
fi
echo " - job_name: '$job'" >> "$CONFIG"
echo " static_configs:" >> "$CONFIG"
echo " - targets:" >> "$CONFIG"
IFS=','
for item in $targets_list; do
echo " - '$item'" >> "$CONFIG"
done
unset IFS
cat >> "$CONFIG" <<EOF
labels:
project: '${OBS_PROJECT}'
service: '${service_label}'
EOF
}
# Redis Exporter端口 9121
REDIS_EXPORTER_HOST=""
REDIS_EXPORTER_TARGETS="${REDIS_EXPORTER_TARGETS:-}"
REDIS_EXPORTER_SERVICE="${REDIS_EXPORTER_SERVICE:-$OBS_SERVICE_NAME}"
if getent hosts redis-exporter >/dev/null 2>&1; then
REDIS_EXPORTER_HOST="redis-exporter"
elif getent hosts chat-deploy-redis-exporter >/dev/null 2>&1; then
REDIS_EXPORTER_HOST="chat-deploy-redis-exporter"
fi
if [ -n "$REDIS_EXPORTER_HOST" ]; then
if [ -n "$REDIS_EXPORTER_TARGETS" ]; then
REDIS_EXPORTER_TARGETS="${REDIS_EXPORTER_TARGETS},${REDIS_EXPORTER_HOST}:9121"
else
REDIS_EXPORTER_TARGETS="${REDIS_EXPORTER_HOST}:9121"
fi
fi
append_exporter_job "redis" "$REDIS_EXPORTER_TARGETS" "9121" "$REDIS_EXPORTER_SERVICE"
if [ -n "$REDIS_EXPORTER_TARGETS" ]; then
echo "[prometheus-agent] Redis Exporter 采集目标已配置project=${OBS_PROJECT} service=${REDIS_EXPORTER_SERVICE}"
fi
# MongoDB Exporter端口 9216- chat-deploy 使用 MongoDB
MONGODB_EXPORTER_HOST=""
MONGODB_EXPORTER_TARGETS="${MONGODB_EXPORTER_TARGETS:-}"
MONGODB_EXPORTER_SERVICE="${MONGODB_EXPORTER_SERVICE:-$OBS_SERVICE_NAME}"
if getent hosts mongodb-exporter >/dev/null 2>&1; then
MONGODB_EXPORTER_HOST="mongodb-exporter"
elif getent hosts chat-deploy-mongodb-exporter >/dev/null 2>&1; then
MONGODB_EXPORTER_HOST="chat-deploy-mongodb-exporter"
fi
if [ -n "$MONGODB_EXPORTER_HOST" ]; then
if [ -n "$MONGODB_EXPORTER_TARGETS" ]; then
MONGODB_EXPORTER_TARGETS="${MONGODB_EXPORTER_TARGETS},${MONGODB_EXPORTER_HOST}:9216"
else
MONGODB_EXPORTER_TARGETS="${MONGODB_EXPORTER_HOST}:9216"
fi
fi
append_exporter_job "mongodb" "$MONGODB_EXPORTER_TARGETS" "9216" "$MONGODB_EXPORTER_SERVICE"
if [ -n "$MONGODB_EXPORTER_TARGETS" ]; then
echo "[prometheus-agent] MongoDB Exporter 采集目标已配置project=${OBS_PROJECT} service=${MONGODB_EXPORTER_SERVICE}"
fi
# Node Exporter端口 9100- 用于系统级指标CPU/Memory/Disk/Network/IO
NODE_EXPORTER_HOST=""
NODE_EXPORTER_TARGETS="${NODE_EXPORTER_TARGETS:-}"
NODE_EXPORTER_SERVICE="${NODE_EXPORTER_SERVICE:-$OBS_SERVICE_NAME}"
if getent hosts node-exporter >/dev/null 2>&1; then
NODE_EXPORTER_HOST="node-exporter"
elif getent hosts chat-deploy-node-exporter >/dev/null 2>&1; then
NODE_EXPORTER_HOST="chat-deploy-node-exporter"
fi
if [ -n "$NODE_EXPORTER_HOST" ]; then
if [ -n "$NODE_EXPORTER_TARGETS" ]; then
NODE_EXPORTER_TARGETS="${NODE_EXPORTER_TARGETS},${NODE_EXPORTER_HOST}:9100"
else
NODE_EXPORTER_TARGETS="${NODE_EXPORTER_HOST}:9100"
fi
fi
append_exporter_job "node" "$NODE_EXPORTER_TARGETS" "9100" "$NODE_EXPORTER_SERVICE"
if [ -n "$NODE_EXPORTER_TARGETS" ]; then
echo "[prometheus-agent] Node Exporter 采集目标已配置project=${OBS_PROJECT} service=${NODE_EXPORTER_SERVICE}"
fi
cat >> "$CONFIG" <<EOF
remote_write:
- url: "$METRICS_REMOTE_WRITE_URL"
EOF
if is_truthy "$OBS_AUTH_ENABLE"; then
echo " bearer_token: \"$OBS_AUTH_TOKEN\"" >> "$CONFIG"
echo "[prometheus-agent] remote_write 鉴权已启用" >&2
else
echo "[prometheus-agent] remote_write 鉴权未启用" >&2
fi
echo "[prometheus-agent] 配置文件已生成:"
cat "$CONFIG"
echo ""
# Prometheus 3.x 不再需要 --enable-feature=agent
exec /bin/prometheus --config.file=/prometheus/prometheus.yml --storage.tsdb.path=/prometheus/data --web.enable-lifecycle