health-check.sh 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131
  1. #!/bin/bash
  2. # 物联网基站服务健康检查脚本
  3. set -e
  4. echo "=== 物联网基站服务健康检查 ==="
  5. # 检查PostgreSQL
  6. echo "检查PostgreSQL..."
  7. if docker-compose exec -T postgres pg_isready -U postgres > /dev/null 2>&1; then
  8. echo "✓ PostgreSQL: 健康"
  9. # 检查数据库连接
  10. if docker-compose exec -T postgres psql -U postgres -d iot_base_station -c "SELECT 1;" > /dev/null 2>&1; then
  11. echo " - 数据库连接: 正常"
  12. else
  13. echo " - 数据库连接: 异常"
  14. fi
  15. else
  16. echo "✗ PostgreSQL: 不可用"
  17. fi
  18. # 检查InfluxDB
  19. echo "检查InfluxDB..."
  20. if curl -s http://localhost:8086/health > /dev/null 2>&1; then
  21. echo "✓ InfluxDB: 健康"
  22. # 检查组织是否存在
  23. if curl -s -H "Authorization: Token influxdb-token" "http://localhost:8086/api/v2/orgs" | grep -q "iot-org"; then
  24. echo " - 组织配置: 正常"
  25. else
  26. echo " - 组织配置: 异常"
  27. fi
  28. else
  29. echo "✗ InfluxDB: 不可用"
  30. fi
  31. # 检查Redis
  32. echo "检查Redis..."
  33. if docker-compose exec -T redis redis-cli ping > /dev/null 2>&1; then
  34. echo "✓ Redis: 健康"
  35. # 检查内存使用
  36. MEMORY=$(docker-compose exec -T redis redis-cli info memory | grep used_memory_human | cut -d: -f2 | tr -d '\r')
  37. echo " - 内存使用: $MEMORY"
  38. elif redis-cli ping > /dev/null 2>&1; then
  39. echo "✓ Redis (本地): 健康"
  40. MEMORY=$(redis-cli info memory | grep used_memory_human | cut -d: -f2 | tr -d '\r')
  41. echo " - 内存使用: $MEMORY"
  42. else
  43. echo "✗ Redis: 不可用"
  44. fi
  45. # 检查NATS
  46. echo "检查NATS..."
  47. if curl -s http://localhost:8222/varz > /dev/null 2>&1; then
  48. echo "✓ NATS: 健康"
  49. # 检查连接数
  50. CONNECTIONS=$(curl -s http://localhost:8222/varz | jq -r '.connections // 0')
  51. echo " - 当前连接数: $CONNECTIONS"
  52. else
  53. echo "✗ NATS: 不可用"
  54. fi
  55. # 检查MQTT
  56. echo "检查MQTT..."
  57. if docker-compose exec -T mqtt mosquitto_pub -h localhost -t '$SYS/broker/version' -m 'test' > /dev/null 2>&1; then
  58. echo "✓ MQTT: 健康"
  59. # 检查连接数
  60. CONNECTIONS=$(docker-compose exec -T mqtt mosquitto_sub -h localhost -t '$SYS/broker/connections' -C 1 | grep -o '[0-9]\+' || echo "0")
  61. echo " - 当前连接数: $CONNECTIONS"
  62. elif mosquitto_pub -h localhost -t '$SYS/broker/version' -m 'test' > /dev/null 2>&1; then
  63. echo "✓ MQTT (本地): 健康"
  64. else
  65. echo "✗ MQTT: 不可用"
  66. fi
  67. # 检查Grafana
  68. echo "检查Grafana..."
  69. if curl -s http://localhost:3000/api/health > /dev/null 2>&1; then
  70. echo "✓ Grafana: 健康"
  71. else
  72. echo "✗ Grafana: 不可用"
  73. fi
  74. # 检查Prometheus
  75. echo "检查Prometheus..."
  76. if curl -s http://localhost:9090/-/healthy > /dev/null 2>&1; then
  77. echo "✓ Prometheus: 健康"
  78. # 检查目标数量
  79. TARGETS=$(curl -s http://localhost:9090/api/v1/targets | jq -r '.data.activeTargets | length')
  80. echo " - 活跃目标数: $TARGETS"
  81. else
  82. echo "✗ Prometheus: 不可用"
  83. fi
  84. # 检查应用服务
  85. echo "检查应用服务..."
  86. if curl -s http://localhost:8080/health > /dev/null 2>&1; then
  87. echo "✓ 主服务器: 健康"
  88. else
  89. echo "✗ 主服务器: 不可用"
  90. fi
  91. if curl -s http://localhost:8081/health > /dev/null 2>&1; then
  92. echo "✓ 数据网关: 健康"
  93. else
  94. echo "✗ 数据网关: 不可用"
  95. fi
  96. if curl -s http://localhost:8082/health > /dev/null 2>&1; then
  97. echo "✓ 监控服务: 健康"
  98. else
  99. echo "✗ 监控服务: 不可用"
  100. fi
  101. echo ""
  102. echo "=== 健康检查完成 ==="
  103. # 显示资源使用情况
  104. echo ""
  105. echo "=== 资源使用情况 ==="
  106. docker stats --no-stream --format "table {{.Container}}\t{{.CPUPerc}}\t{{.MemUsage}}"
  107. # 显示磁盘使用情况
  108. echo ""
  109. echo "=== 磁盘使用情况 ==="
  110. df -h | grep -E "(Filesystem|/dev/)"
  111. # 显示网络连接
  112. echo ""
  113. echo "=== 网络连接 ==="
  114. netstat -tuln | grep -E "(LISTEN|5432|8086|6379|4222|1883|3000|9090|8080|8081|8082)" || echo "无法获取网络连接信息"