使用docker部署grafana+prometheus配置

作者:runzhao 时间:2022-05-02 15:36:33 

docker-compose-monitor.yml


version: '2'

networks:
 monitor:
   driver: bridge

services:
 influxdb:
   image: influxdb:latest
   container_name: tig-influxdb
   ports:
     - "18083:8083"
     - "18086:8086"
     - "18090:8090"
   env_file:
     - 'env.influxdb'
   volumes:
     # Data persistency
     # sudo mkdir -p ./influxdb/data
     - ./influxdb/data:/var/lib/influxdb
     # 配置docker里的时间为东八区时间
     - ./timezone:/etc/timezone:ro
     - ./localtime:/etc/localtime:ro
   restart: unless-stopped #停止后自动

telegraf:
   image: telegraf:latest
   container_name: tig-telegraf
   links:
     - influxdb
   volumes:
     - ./telegraf.conf:/etc/telegraf/telegraf.conf:ro
     - ./timezone:/etc/timezone:ro
     - ./localtime:/etc/localtime:ro
   restart: unless-stopped
 prometheus:
   image: prom/prometheus
   container_name: prometheus
   hostname: prometheus
   restart: always
   volumes:
     - /home/qa/docker/grafana/prometheus.yml:/etc/prometheus/prometheus.yml
     - /home/qa/docker/grafana/node_down.yml:/etc/prometheus/node_down.yml
   ports:
     - '9090:9090'
   networks:
     - monitor

alertmanager:
   image: prom/alertmanager
   container_name: alertmanager
   hostname: alertmanager
   restart: always
   volumes:
     - /home/qa/docker/grafana/alertmanager.yml:/etc/alertmanager/alertmanager.yml
   ports:
     - '9093:9093'
   networks:
     - monitor

grafana:
   image: grafana/grafana:6.7.4
   container_name: grafana
   hostname: grafana
   restart: always
   ports:
     - '13000:3000'
   networks:
     - monitor

node-exporter:
   image: quay.io/prometheus/node-exporter
   container_name: node-exporter
   hostname: node-exporter
   restart: always
   ports:
     - '9100:9100'
   networks:
     - monitor

cadvisor:
   image: google/cadvisor:latest
   container_name: cadvisor
   hostname: cadvisor
   restart: always
   volumes:
     - /:/rootfs:ro
     - /var/run:/var/run:rw
     - /sys:/sys:ro
     - /var/lib/docker/:/var/lib/docker:ro
   ports:
     - '18080:8080'
   networks:
     - monitor

alertmanager.yml


global:
 resolve_timeout: 5m
 smtp_from: '邮箱'
 smtp_smarthost: 'smtp.exmail.qq.com:25'
 smtp_auth_username: '邮箱'
 smtp_auth_password: '密码'
 smtp_require_tls: false
 smtp_hello: 'qq.com'
route:
 group_by: ['alertname']
 group_wait: 5s
 group_interval: 5s
 repeat_interval: 5m
 receiver: 'email'
receivers:
- name: 'email'
 email_configs:
 - to: '收件邮箱'
   send_resolved: true
inhibit_rules:
 - source_match:
     severity: 'critical'
   target_match:
     severity: 'warning'
   equal: ['alertname', 'dev', 'instance']

prometheus.yml


global:
 scrape_interval:     15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
 evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
 # scrape_timeout is set to the global default (10s).

# Alertmanager configuration
alerting:
 alertmanagers:
 - static_configs:
   - targets: ['192.168.32.117:9093']
     # - alertmanager:9093

# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
 - "node_down.yml"
 # - "node-exporter-alert-rules.yml"
 # - "first_rules.yml"
 # - "second_rules.yml"

# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
 # IO存储节点组
 - job_name: 'io'
   scrape_interval: 8s
   static_configs:#端口为node-exporter启动的端口
     - targets: ['192.168.32.117:9100']
     - targets: ['192.168.32.196:9100']
     - targets: ['192.168.32.136:9100']
     - targets: ['192.168.32.193:9100']
     - targets: ['192.168.32.153:9100']
     - targets: ['192.168.32.185:9100']
     - targets: ['192.168.32.190:19100']
     - targets: ['192.168.32.192:9100']

# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
 - job_name: 'cadvisor'
   static_configs:#端口为cadvisor启动的端口
     - targets: ['192.168.32.117:18080']
     - targets: ['192.168.32.193:8080']
     - targets: ['192.168.32.153:8080']
     - targets: ['192.168.32.185:8080']
     - targets: ['192.168.32.190:18080']
     - targets: ['192.168.32.192:18080']

node_down.yml


groups:
 - name: node_down
   rules:
     - alert: InstanceDown
       expr: up == 0
       for: 1m
       labels:
         user: test
       annotations:
         summary: 'Instance {{ $labels.instance }} down'
         description: '{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minutes.'

#剩余内存小于10%
     - alert: 剩余内存小于10%
       expr: node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 < 10
       for: 2m
       labels:
         severity: warning
       annotations:
         summary: Host out of memory (instance {{ $labels.instance }})
         description: "Node memory is filling up (< 10% left)\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"

#剩余磁盘小于10%
     - alert: 剩余磁盘小于10%
       expr: (node_filesystem_avail_bytes * 100) / node_filesystem_size_bytes < 10 and ON (instance, device, mountpoint) node_filesystem_readonly == 0
       for: 2m
       labels:
         severity: warning
       annotations:
         summary: Host out of disk space (instance {{ $labels.instance }})
         description: "Disk is almost full (< 10% left)\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"

#cpu负载 > 80%
     - alert: CPU负载 > 80%
       expr: 100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[2m])) * 100) > 80
       for: 0m
       labels:
         severity: warning
       annotations:
         summary: Host high CPU load (instance {{ $labels.instance }})
         description: "CPU load is > 80%\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"

告警:https://awesome-prometheus-alerts.grep.to/rules#prometheus-self-monitoring

官网仪表盘:https://grafana.com/grafana/dashboards/

来源:https://www.cnblogs.com/runzhao/p/15716274.html

标签:docker,grafana,prometheus
0
投稿

猜你喜欢

  • 网络安全之小技巧教你保护(IIS)Web服务器

    2009-09-20 20:04:00
  • 关闭selinux(防火墙)方法分享

    2022-11-03 20:49:35
  • VMware使用方法(图文详解)

    2022-02-09 12:28:10
  • 域名经济时代资源稀少 域名选择经验再分享

    2009-01-07 14:19:00
  • eAccelerator参数详解

    2010-02-10 12:24:00
  • Discuz X链接伪静态的.htaccess重定向规则

    2011-08-18 18:31:05
  • 新手开店:心态是第一决定要素

    2009-07-09 14:11:00
  • CentOS 7下用firewall-cmd控制端口与端口转发详解

    2023-11-01 05:33:21
  • 金山年终游戏软件双引擎加速 多款新品将问世

    2009-10-31 16:03:00
  • 使用参数化查询防止SQL注入漏洞

    2010-06-26 12:56:00
  • Linux常用命令mkdir详解

    2023-08-17 23:02:27
  • 交互性与口碑传播 看博客的广告两大策略

    2009-01-20 10:37:00
  • 如何在Exchange 2003中恢复删除的邮件?

    2010-02-20 18:44:00
  • GoDaddy:如何压缩文件

    2010-04-26 13:00:00
  • 博雅立方:指引传统B2C的突围之路

    2009-10-27 13:48:00
  • 国内最新模板引擎xingTemplate介绍

    2009-10-28 11:04:00
  • php.ini中文版

    2008-02-23 10:12:00
  • 站长赚钱必读:选择优秀广告联盟的技巧

    2009-02-19 10:42:00
  • 网站重建如何保护已有排名

    2008-02-17 14:09:00
  • 通盘考虑 搜索引擎优化SEO文案写作技巧

    2009-01-06 09:38:00
  • asp之家 网站运营 m.aspxhome.com