1. prometheus-添加监控linux服务器

  • prometheus添加监控linux服务器

  • 安装部署

    • 下载node_exporter包
      image

      https://github.com/prometheus/node_exporter/releases/download/v1.1.2/node_exporter-1.1.2.linux-amd64.tar.gz
      
    • 监控端主机下载

      [root@VM-0-17-centos ~]# wget https://github.com/prometheus/node_exporter/releases/download/v1.1.2/node_exporter-1.1.2.linux-amd64.tar.gz
      
    • 解压

      [root@VM-0-17-centos ~]# tar  -xvf node_exporter-1.1.2.linux-amd64.tar.gz 
      node_exporter-1.1.2.linux-amd64/
      node_exporter-1.1.2.linux-amd64/LICENSE
      node_exporter-1.1.2.linux-amd64/NOTICE
      node_exporter-1.1.2.linux-amd64/node_exporter
      
    • 拷贝到opt目录下存放

      [root@VM-0-17-centos ~]# mv node_exporter-1.1.2.linux-amd64 /opt/
      [root@VM-0-17-centos ~]# ll /opt/node_exporter-1.1.2.linux-amd64/
      total 18748
      -rw-r--r-- 1 3434 3434    11357 Mar  5 17:41 LICENSE
      -rwxr-xr-x 1 3434 3434 19178528 Mar  5 17:29 node_exporter
      -rw-r--r-- 1 3434 3434      463 Mar  5 17:41 NOTICE
      [root@VM-0-17-centos ~]# mv /opt/node_exporter-1.1.2.linux-amd64 /opt/node_exporter
      
    • 切换目录

      [root@VM-0-17-centos ~]# cd /opt/node_exporter/
      [root@VM-0-17-centos node_exporter]# ll
      total 18748
      -rw-r--r-- 1 3434 3434    11357 Mar  5 17:41 LICENSE
      -rwxr-xr-x 1 3434 3434 19178528 Mar  5 17:29 node_exporter
      -rw-r--r-- 1 3434 3434      463 Mar  5 17:41 NOTICE
      
    • 启动服务尝试

      [root@VM-0-17-centos node_exporter]# ./node_exporter 
      level=info ts=2021-05-19T07:00:05.583Z caller=node_exporter.go:178 msg="Starting node_exporter" version="(version=1.1.2, branch=HEAD, revision=b597c1244d7bef49e6f3359c87a56dd7707f6719)"
      level=info ts=2021-05-19T07:00:05.583Z caller=node_exporter.go:179 msg="Build context" build_context="(go=go1.15.8, user=root@f07de8ca602a, date=20210305-09:29:10)"
      level=warn ts=2021-05-19T07:00:05.583Z caller=node_exporter.go:181 msg="Node Exporter is running as root user. This exporter is designed to run as unpriviledged user, root is not required."
      level=info ts=2021-05-19T07:00:05.583Z caller=filesystem_common.go:74 collector=filesystem msg="Parsed flag --collector.filesystem.ignored-mount-points" flag=^/(dev|proc|sys|var/lib/docker/.+)($|/)
      level=info ts=2021-05-19T07:00:05.583Z caller=filesystem_common.go:76 collector=filesystem msg="Parsed flag --collector.filesystem.ignored-fs-types" flag=^(autofs|binfmt_misc|bpf|cgroup2?|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|iso9660|mqueue|nsfs|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|selinuxfs|squashfs|sysfs|tracefs)$
      level=info ts=2021-05-19T07:00:05.584Z caller=node_exporter.go:106 msg="Enabled collectors"
      level=info ts=2021-05-19T07:00:05.584Z caller=node_exporter.go:113 collector=arp
      level=info ts=2021-05-19T07:00:05.584Z caller=node_exporter.go:113 collector=bcache
      level=info ts=2021-05-19T07:00:05.584Z caller=node_exporter.go:113 collector=bonding
      level=info ts=2021-05-19T07:00:05.584Z caller=node_exporter.go:113 collector=btrfs
      level=info ts=2021-05-19T07:00:05.584Z caller=node_exporter.go:113 collector=conntrack
      level=info ts=2021-05-19T07:00:05.584Z caller=node_exporter.go:113 collector=cpu
      level=info ts=2021-05-19T07:00:05.584Z caller=node_exporter.go:113 collector=cpufreq
      level=info ts=2021-05-19T07:00:05.584Z caller=node_exporter.go:113 collector=diskstats
      level=info ts=2021-05-19T07:00:05.584Z caller=node_exporter.go:113 collector=edac
      level=info ts=2021-05-19T07:00:05.584Z caller=node_exporter.go:113 collector=entropy
      level=info ts=2021-05-19T07:00:05.584Z caller=node_exporter.go:113 collector=fibrechannel
      level=info ts=2021-05-19T07:00:05.584Z caller=node_exporter.go:113 collector=filefd
      level=info ts=2021-05-19T07:00:05.584Z caller=node_exporter.go:113 collector=filesystem
      level=info ts=2021-05-19T07:00:05.584Z caller=node_exporter.go:113 collector=hwmon
      level=info ts=2021-05-19T07:00:05.584Z caller=node_exporter.go:113 collector=infiniband
      level=info ts=2021-05-19T07:00:05.584Z caller=node_exporter.go:113 collector=ipvs
      level=info ts=2021-05-19T07:00:05.584Z caller=node_exporter.go:113 collector=loadavg
      level=info ts=2021-05-19T07:00:05.584Z caller=node_exporter.go:113 collector=mdadm
      level=info ts=2021-05-19T07:00:05.584Z caller=node_exporter.go:113 collector=meminfo
      level=info ts=2021-05-19T07:00:05.584Z caller=node_exporter.go:113 collector=netclass
      level=info ts=2021-05-19T07:00:05.584Z caller=node_exporter.go:113 collector=netdev
      level=info ts=2021-05-19T07:00:05.584Z caller=node_exporter.go:113 collector=netstat
      level=info ts=2021-05-19T07:00:05.584Z caller=node_exporter.go:113 collector=nfs
      level=info ts=2021-05-19T07:00:05.584Z caller=node_exporter.go:113 collector=nfsd
      level=info ts=2021-05-19T07:00:05.584Z caller=node_exporter.go:113 collector=powersupplyclass
      level=info ts=2021-05-19T07:00:05.584Z caller=node_exporter.go:113 collector=pressure
      level=info ts=2021-05-19T07:00:05.584Z caller=node_exporter.go:113 collector=rapl
      level=info ts=2021-05-19T07:00:05.584Z caller=node_exporter.go:113 collector=schedstat
      level=info ts=2021-05-19T07:00:05.584Z caller=node_exporter.go:113 collector=sockstat
      level=info ts=2021-05-19T07:00:05.584Z caller=node_exporter.go:113 collector=softnet
      level=info ts=2021-05-19T07:00:05.584Z caller=node_exporter.go:113 collector=stat
      level=info ts=2021-05-19T07:00:05.584Z caller=node_exporter.go:113 collector=textfile
      level=info ts=2021-05-19T07:00:05.584Z caller=node_exporter.go:113 collector=thermal_zone
      level=info ts=2021-05-19T07:00:05.584Z caller=node_exporter.go:113 collector=time
      level=info ts=2021-05-19T07:00:05.584Z caller=node_exporter.go:113 collector=timex
      level=info ts=2021-05-19T07:00:05.584Z caller=node_exporter.go:113 collector=udp_queues
      level=info ts=2021-05-19T07:00:05.584Z caller=node_exporter.go:113 collector=uname
      level=info ts=2021-05-19T07:00:05.584Z caller=node_exporter.go:113 collector=vmstat
      level=info ts=2021-05-19T07:00:05.584Z caller=node_exporter.go:113 collector=xfs
      level=info ts=2021-05-19T07:00:05.584Z caller=node_exporter.go:113 collector=zfs
      level=info ts=2021-05-19T07:00:05.584Z caller=node_exporter.go:195 msg="Listening on" address=:9100
      level=info ts=2021-05-19T07:00:05.584Z caller=tls_config.go:191 msg="TLS is disabled." http2=false
      
      
  • 配置为系统服务管理

    • 编写系统配置服务

      [root@iZj6cbgktk3zjpge312vq1Z node_exporter]# vim  /usr/lib/systemd/system/node_exporter.service
      [root@iZj6cbgktk3zjpge312vq1Z node_exporter]# cat  /usr/lib/systemd/system/node_exporter.service
      [Unit]
      Description=node_exporter
      [Service]
      # 添加认证密码文件/opt/node_exporter/config.yml ,默认可以不需要
      ExecStart=/opt/node_exporter/node_exporter --web.config=/opt/node_exporter/config.yml   
      ExecReload=/bin/kill -HUP $MAINPID
      KillMode=process
      Restart=on-failure
      [Install]
      WantedBy=multi-user.target
      
    • 添加config配置文件

      [root@VM-0-17-centos node_exporter]# yum install httpd-tools –y
      [root@VM-0-17-centos node_exporter]# htpasswd -nBC 12 '' | tr -d ':\n'
      New password:       #这里输入的123456
      Re-type new password:    #这里输入的123456
      $2y$12$.YGKNPkYfSOsm.JataWRUe4vWdTS8nW6YtPQI0Jr14eTv6E5Fpdga          # 这段是生成的key
      
    • 编写启动配置文件

      [root@iZj6cbgktk3zjpge312vq1Z node_exporter]# vim config.yml
      [root@iZj6cbgktk3zjpge312vq1Z node_exporter]# cat config.yml
      basic_auth_users:
        prometheus: $2y$12$.YGKNPkYfSOsm.JataWRUe4vWdTS8nW6YtPQI0Jr14eTv6E5Fpdga
      
    • 启动服务

      systemctl daemon-reload
      systemctl start node_exporter
      systemctl enable node_exporter
      
  • 在prometheus添加主机

    • 添加配置文件

      [root@iZj6cbgktk3zjpge312vq2Z prometheus]# vim prometheus.yml
      [root@iZj6cbgktk3zjpge312vq2Z prometheus]# cat prometheus.yml
      # my global config
      global:
        scrape_interval:     15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
        evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
        # scrape_timeout is set to the global default (10s).
      
      # Alertmanager configuration
      alerting:
        alertmanagers:
        - static_configs:
          - targets:
            # - alertmanager:9093
      
      # Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
      rule_files:
        # - "first_rules.yml"
        # - "second_rules.yml"
      
      # A scrape configuration containing exactly one endpoint to scrape:
      # Here it's Prometheus itself.
      scrape_configs:
        # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
        - job_name: 'prometheus'
      
          # metrics_path defaults to '/metrics'
          # scheme defaults to 'http'.
      
          static_configs:
          - targets: ['127.0.0.1:9090']
        - job_name: linux
          # 添加监控主机的用户和密码
          basic_auth:
            username: prometheus
            password: 123456
          static_configs:
            - targets: ['10.1.2.211:9100']
              labels:
                prod: web
      
      
    • 重启服务

      [root@iZj6cbgktk3zjpge312vq2Z prometheus]# /bin/systemctl restart prometheus
      
  • 在Prometheus配置文件添加被监控端:

    • 验证prometheus配置文件
      image

    • 验证发现已经有数据了
      image

  • 使用Grafana展示node_exporter数据指标,仪表盘ID:9276

    • 导入仪表盘
      image
      image
      image

    • 添加
      image
      image