Alpine Linux 部署 SmokePing 网络监控

本文介绍如何在 Alpine Linux 系统上部署 SmokePing 网络延迟监控工具,使用 Caddy 作为 Web 服务器,FCGIWrap 处理 CGI 脚本。

系统要求

  • Alpine Linux 3.18+
  • 2GB+ RAM(推荐 4GB+)
  • 20GB+ 磁盘空间
  • 网络连接正常

安装步骤

1. 安装必要软件包

# 更新软件包索引
apk update

# 安装 SmokePing 及相关组件
apk add fcgiwrap fcgiwrap-openrc caddy smokeping smokeping-openrc

# 创建 fcgiwrap 服务用户
useradd -r fcgiwrap-openrc

2. 创建数据目录

# 创建 SmokePing 数据目录
mkdir -p /var/lib/smokeping/Ping
mkdir -p /var/lib/smokeping/DNS

# 设置权限
chown smokeping:smokeping /var/lib/smokeping/Ping
chown smokeping:smokeping /var/lib/smokeping/DNS
chmod 777 /var/lib/smokeping/images/Ping
chmod 777 /var/lib/smokeping/images/DNS

3. 配置 Caddy Web 服务器

创建 Caddy 配置文件 /etc/caddy/Caddyfile

{
    log {
        output file /var/log/caddy/access.log {
            roll_size 100MB  # 单个日志文件最大 100MB
            roll_keep 10     # 保留最近 10 个日志文件
            roll_keep_for 720h  # 保留 30 天(720 小时)
        }
        format console  # 易读的格式,也可改用 json 格式(format json)
    }
}

http://192.168.1.100 {
    # 处理静态资源
    handle /js/* {
        root * /usr/share/webapps/smokeping/
        file_server
    }
    handle /css/* {
        root * /usr/share/webapps/smokeping/
        file_server
    }
    handle /imgcache/* {
        root * /usr/share/webapps/smokeping/
        file_server
    }
    handle_path /images/* {
        root * /var/lib/smokeping/images
        file_server browse
    }

    # 处理 CGI 请求
    handle {
        root * /usr/share/webapps/smokeping/
        reverse_proxy unix//run/fcgiwrap/fcgiwrap.sock {
            transport fastcgi {
                env SCRIPT_FILENAME /usr/share/webapps/smokeping/smokeping.cgi
                split ""
            }
        }
    }
}

4. 配置 SmokePing

创建配置文件 /etc/smokeping/config

*** General ***

owner    = Demo User
contact  = admin@example.com
mailhost = mail.example.com
sendmail = /usr/sbin/sendmail
# NOTE: do not put the Image Cache below cgi-bin
# since all files under cgi-bin will be executed ... this is not
# good for images.
imgcache = /var/lib/smokeping/images
imgurl   = /images
datadir  = /var/lib/smokeping
piddir   = /run/smokeping
cgiurl   = http://192.168.1.100/smokeping.cgi
smokemail = /etc/smokeping/smokemail
tmail     = /etc/smokeping/tmail
# specify this to get syslog logging
syslogfacility = local0
# each probe is now run in its own process
# disable this to revert to the old behaviour
# concurrentprobes = no

5. 配置告警规则

*** Alerts ***
to = admin@example.com
from = smokeping@example.com

# 定义各种告警规则
+hostdown
type = loss
# in percent
pattern ==0%,==0%,==0%, ==U
comment = 对端无响应

+bigloss
type = loss
# in percent
pattern ==0%,==0%,==0%,==0%,>20%,>20%,>20%
comment = 连续3次采样-丢包率超过20%

+lossdetect
type = loss
# in percent
pattern ==0%,==0%,==0%,==0%,>0%,>0%,>0%
comment = 连续3次采样-存在丢包

+someloss
type = loss
# in percent
pattern >0%,*12*,>0%,*12*,>0%
comment = 间断性丢包

+rttdetect
type = rtt
# in milli seconds
pattern <100,<100,<100,<100,<100,<150,>150,>150,>150
comment = 连续3次采样延迟增大-超过150ms

6. 配置数据库设置

*** Database ***

step     = 300
pings    = 20

# consfn mrhb steps total

AVERAGE  0.5   1  28800
AVERAGE  0.5  12   9600
    MIN  0.5  12   9600
    MAX  0.5  12   9600
AVERAGE  0.5 144   2400
    MAX  0.5 144   2400
    MIN  0.5 144   2400

7. 配置显示模板

*** Presentation ***

template = /etc/smokeping/basepage.html
htmltitle = yes
graphborders = no
# If enabled, treat all filter menu queries as literal strings instead of regex
literalsearch = no

+ charts

menu = Charts
title = The most interesting destinations

++ stddev
sorter = StdDev(entries=>4)
title = Top Standard Deviation
menu = Std Deviation
format = Standard Deviation %f

++ max
sorter = Max(entries=>5)
title = Top Max Roundtrip Time
menu = by Max
format = Max Roundtrip Time %f seconds

++ loss
sorter = Loss(entries=>5)
title = Top Packet Loss
menu = Loss
format = Packets Lost %f

++ median
sorter = Median(entries=>5)
title = Top Median Roundtrip Time
menu = by Median
format = Median RTT %f seconds

8. 配置探针

*** Probes ***

+ FPing

binary = /usr/sbin/fping

+ DNS
binary = /usr/bin/dig
lookup = g.cn
pings = 5
step = 180

9. 配置主从模式(可选)

*** Slaves ***
secrets=/etc/smokeping/smokeping_secrets
+boomer
display_name=boomer
color=0000ff

+slave2
display_name=another
color=00ff00

10. 配置监控目标

*** Targets ***

alerts = someloss
probe = FPing

menu = Top
title = Network Latency Grapher
remark = Welcome to the SmokePing website of <b>Example Company</b>. \
         Here you will learn all about the latency of our network.

+ Ping
menu = Ping
title = 10.0.0.14 Pings
alerts = hostdown,bigloss,lossdetect,someloss,rttdetect

++ server1
menu = 192.168.1.50
host = 192.168.1.50

++ server2
menu = 192.168.1.51
host = 192.168.1.51

+ DNS
menu = DNS
probe = DNS
alerts = hostdown,bigloss,lossdetect,someloss,rttdetect

++ dns-server
menu = 192.168.1.200
host = 192.168.1.200

启动服务

1. 启动并启用服务

# 启动 Caddy 服务
rc-service caddy start
rc-service caddy add

# 启动 SmokePing 服务
rc-service smokeping start
rc-service smokeping add

# 启动 fcgiwrap 服务
rc-service fcgiwrap start
rc-service fcgiwrap add

2. 检查服务状态

# 检查 Caddy 状态
rc-service caddy status

# 检查 SmokePing 状态
rc-service smokeping status

# 检查 fcgiwrap 状态
rc-service fcgiwrap status

验证部署

访问 http://192.168.1.100/smokeping.cgi 验证 SmokePing 是否正常工作。

常见问题

1. Caddy 启动失败

症状rc-service caddy start 失败 解决

# 检查配置文件语法
caddy validate --config /etc/caddy/Caddyfile

# 检查端口占用
netstat -tlnp | grep 80
netstat -tlnp | grep 443

2. SmokePing 无法生成图表

症状:访问 CGI 脚本显示空白或错误 解决

# 检查 fcgiwrap 服务
rc-service fcgiwrap status

# 检查 CGI 脚本权限
ls -la /usr/share/webapps/smokeping/smokeping.cgi

# 检查数据目录权限
ls -la /var/lib/smokeping/

3. 权限问题

症状:无法创建数据或图片 解决

# 确保所有目录权限正确
chown -R smokeping:smokeping /var/lib/smokeping/
chmod -R 755 /var/lib/smokeping/
chmod -R 777 /var/lib/smokeping/images/

4. 网络连接问题

症状:SmokePing 无法 ping 目标主机 解决

# 测试 fping 命令
fping -c 5 10.0.0.14

# 检查防火墙设置
iptables -L -n

日志管理

1. Caddy 日志

# 查看 Caddy 访问日志
tail -f /var/log/caddy/access.log

# 查看系统日志
tail -f /var/log/messages | grep caddy

2. SmokePing 日志

# 查看 SmokePing 日志
tail -f /var/log/messages | grep smokeping

# 查看进程日志
journalctl -u smokeping -f

定期维护

1. 数据清理

SmokePing 会自动管理数据文件,但如果需要手动清理:

# 查看磁盘使用情况
du -sh /var/lib/smokeping/

# 清理旧的图片文件(如果需要)
find /var/lib/smokeping/images -type f -mtime +90 -delete

2. 服务重启

# 重启所有服务
rc-service caddy restart
rc-service fcgiwrap restart
rc-service smokeping restart

3. 配置备份

# 备份配置文件
tar -czf /backup/smokeping-config-$(date +%Y%m%d).tar.gz \
  /etc/caddy/Caddyfile \
  /etc/smokeping/smokeping.config

扩展功能

1. 添加更多监控目标

在配置文件的 *** Targets *** 部分添加新的目标:

+ new-host
menu = New Host
host = 192.168.1.100
alerts = hostdown,bigloss

2. 配置自定义告警

根据业务需求自定义告警规则:

+ custom-alert
type = rtt
pattern >1000,<1000,<1000,<1000,<1000,<1000,>2000
comment = 延迟异常增高

性能优化

1. 调整采样间隔

根据网络状况调整 step 参数:

step = 600  # 10 分钟采样一次,减少资源占用

2. 限制并发探针

如果资源有限,可以禁用并发探针:

# disable concurrent probes
concurrentprobes = no

通过以上配置,您将在 Alpine Linux 上成功部署一个功能完整的 SmokePing 网络监控系统,提供实时的网络延迟、丢包率和抖动监控。

参考资料