crans_bcfg2/Python/etc/monit/services
Pierre-Elliott Bécue e00c869c2f Wheezy commence à arriver, et lenny dégage.
Ignore-this: 48bb60befbcca9f66a7b2f5ebb9d8201

darcs-hash:20121205211802-b6762-28880c06f512803baa322a695e8154a7b6192be3.gz
2012-12-05 22:18:02 +01:00

299 lines
8.9 KiB
Python

# -*- coding: utf-8; mode: python -*-
import re
include("ip")
info["owner"] = "root"
info["group"] = "root"
info["perms"] = 0644
header()
import sys
def service(group, **kw):
"""Permet de définir un service monit.
Les arguments supplémentaires peuvent être:
name (par défault le nom du groupe)
init (le nom du script de démarrage dans /etc/init.d, (par défaut name))
pidp (le nom du fichier de pid (par défaut /var/run/<pidf>.pid))
extra (des lignes supplémentaires)"""
if group == None or has(group):
name = kw.get('name', group)
init = kw.get('init', name)
pidp = kw.get('pidp', "/var/run/%s.pid" % kw.get('pidf', name))
start_cmd = kw.get('start_cmd', '/etc/init.d/%s start' % init)
stop_cmd = kw.get('stop_cmd', '/etc/init.d/%s stop' % init)
print ('''# %(name)s
check process %(name)s with pidfile %(pidp)s
start program = "%(start_cmd)s"
stop program = "%(stop_cmd)s"''' % locals())
max_restart = kw.get('max_restart', 5)
for line in kw.get('extra', []):
if line:
print " ", line
print " if %d restarts within 5 cycles then timeout" % max_restart
print
service("apache",
name="apache2",
extra=[ has("http-server") and "if failed host localhost port 80 protocol http timeout 30 seconds then restart",
has("https-server") and "if failed host localhost port 443 type tcpssl protocol http timeout 30 seconds then restart",
has("intranet-server") and "if failed host intranet.crans.org port 443 type tcpssl protocol http timeout 30 seconds then restart",
"if cpu is greater than 60% for 2 cycles then alert",
"if cpu > 80% for 5 cycles then restart",
"if totalmem > 500.0 MB for 5 cycles then restart",
"if children > 250 then restart",
"if loadavg(5min) greater than 10 for 8 cycles then restart",
],
max_restart=3)
service("at",
init="atd",
pidf="atd")
service("cherrypy",
name="intranet")
service("backuppc-server",
name="backuppc",
pidf="backuppc/BackupPC")
# bind's pidfile has changed on squeeze
service("bind",
init="bind9",
pidf="named/named")
service("cups",
pidf="cups/cupsd")
service("openntpd",
name="openntpd",
pidf="openntpd/ntpd")
service("cron",
pidf="crond")
service("dhcp-detect")
if has("firewall"):
service(None,
name="filtrage_firewall")
service(None,
name="netacct-crans-ens",
pidf="netacct-crans-ens",
init="netacct-crans-ens")
service(None,
name="netacct-crans-sixxs2",
pidf="netacct-crans-sixxs2",
init="netacct-crans-sixxs2")
service("freeradius",
pidf="freeradius/freeradius")
service("inn",
pidf="news/innd",
init="inn2")
service("mailman",
pidf="mailman/mailman")
service("monit-ovh")
service("mysql",
pidf="mysqld/mysqld")
service("munin-node",
pidf="munin/munin-node")
service("nscd",
pidf="nscd/nscd",
extra=["if failed unixsocket /var/run/nscd/socket then restart"])
service("ntp", pidf="ntpd")
service("openvpn-ovh",
pidf="openvpn.ovh",
init="openvpn")
service("openvpn-komaz",
pidf="openvpn.komaz",
init="openvpn")
service("openvpn-freebox",
pidf="openvpn.freebox",
init="openvpn",
extra=["depends on openvpn-komaz"])
service("pgsql-server",
name="postgresql",
init="postgresql",
pidp="/var/lib/postgresql/8.4/main/postmaster.pid",
extra=["if failed port 5432 timeout 30 seconds then restart"])
service("postfix",
pidp="/var/spool/postfix/pid/master.pid",
extra=["if failed port 25 protocol smtp timeout 30 seconds then restart"])
service("privoxy",
extra=["if failed host localhost port 8118 timeout 30 seconds then restart"])
service("proftpd",
extra=["if failed port 21 protocol ftp timeout 30 seconds then restart"])
service("rsync")
service("slapd",
pidp="/var/run/slapd/slapd.pid",
extra=["if failed host localhost port 389 protocol ldap3 timeout 30 seconds then restart"])
service("spamassassin",
name="spamd",
init="spamassassin")
service("sqlgrey")
service("ssh",
pidf="sshd",
extra=["if failed port 22 protocol ssh timeout 30 seconds then restart",
"if children > 200 then restart"])
service("syslog-ng",
extra=["depend on file/var/log/syslog"])
service("ups-monitor",
name="upsmon",
pidf="nut/upsmon")
service("ups-server",
name="upsd",
pidf="nut/upsd")
if has('adm-only'):
vsftpd_ip = admip()
else:
vsftpd_ip = pubip()
service("vsftpd",
pidf="vsftpd/vsftpd",
extra=["if failed host %s port 21 protocol ftp timeout 30 seconds then restart" % vsftpd_ip])
service("vsftpd-federez",
extra=["if failed host 138.231.136.129 port 21 protocol ftp timeout 30 seconds then restart"])
dernierecarte=int(metadata.Probes["cartesdvb"])
try:
cartesdesactivees=map(lambda x : int(x),metadata.Probes["cartesdvbdesactivees_local"].split(' '))
except:
cartesdesactivees=[]
if dernierecarte:
print "# Il y a %d carte(s) DVB sur ce serveur dont %d cartes desactivee(s)\n" % (dernierecarte,len(cartesdesactivees))
for i in range(0,dernierecarte):
if not i in cartesdesactivees:
service("mumudvb",
name = "mumudvb%d" % i,
init = "mumudvb",
pidf = "mumudvb/mumudvb_adapter%d_tuner0" % i,
start_cmd = """/sbin/start-stop-daemon --start --oknodo --pidfile /var/run/mumudvb/mumudvb_adapter%d_tuner0.pid --chuid _mumudvb --exec /usr/bin/mumudvb -- -c /etc/sat/carte%d.conf""" % (i,i),
stop_cmd = """/sbin/start-stop-daemon --stop --pidfile /var/run/mumudvb/mumudvb_adapter%d_tuner0.pid""" % i
)
@check file file/var/log/syslog with path /var/log/syslog
@ if timestamp > 15 minutes then alert
@
if hostname in ['routeur']:
@# gunicorn
@check process gunicorn with pidfile /var/run/gunicorn/blacklist.pid
@ start program = "/etc/init.d/gunicorn start"
@ stop program = "/etc/init.d/gunicorn stop"
@ if 5 restarts within 5 cycles then timeout
@
@# igmpproxy
@check process igmpproxy with pidfile /var/run/igmpproxy.pid
@ start program = "/etc/init.d/igmpproxy start"
@ stop program = "/etc/init.d/igmpproxy stop"
@ if 5 restarts within 5 cycles then timeout
@
if hostname in ['routeur', 'komaz']:
@# nginx
@check process nginx with pidfile /var/run/nginx.pid
@ start program = "/etc/init.d/nginx start"
@ stop program = "/etc/init.d/nginx stop"
@ if 5 restarts within 5 cycles then timeout
@
# on ne monitore pas les disques de canard
if hostname in ['canard'] :
done()
disques = {}
if has('blkid'):
for line in metadata.Probes["blkid"].splitlines():
label, disque = line.strip().split()
disques[label] = disque
for line in metadata.Probes["fstab_local"].splitlines():
# on supprime les espaces
line = line.strip()
# on saute les lignes inintérassantes
if not line :
continue
if line[0] == "#" :
continue
# on découpe la ligne
[fs, mntpoint, type, options, dump, pass_] = re.split('[ \t]*',line)
fs = disques.get(fs, fs)
options = options.split(",")
# on saute si c'est une partition non montée au démarrage
if "noauto" in options:
continue
# on saute si c'est une partition bind
if "bind" in options:
continue
# on saute les système pas intéressants
if type in ['swap', 'sw', 'proc', 'tmpfs', 'sysfs', 'nfs', 'devpts']:
continue
# on ajoute les lignes de configuration générale
comment("partition %s" % mntpoint)
# on vérifie le filesystem directement pour les volumes lvm
if fs.startswith('/dev/mapper'):
print 'check filesystem fs%s with path %s' % (mntpoint, mntpoint)
else:
print 'check device fs%s with path %s' % (mntpoint, fs)
print ' if failed permission 660 then alert'
print ' if failed uid root then alert'
# place sur les disques
alert_level = {
('babar','/backup') : 90,
('sable','/var/spool/squid1') : None,
('sable','/var/spool/squid2') : None,
('sable','/var/log/squid') : 92,
('charybde','/pubftp') : 80,
('news','/var') : 90
}.get((hostname, mntpoint), -1)
if alert_level == -1:
if mntpoint in ('/usr', '/var/lib/mailman', '/localhome' , '/home') or has('domu'):
alert_level = 90
else:
alert_level = 80
if alert_level:
print ' if space usage > %d%% then alert' % alert_level
# inodes pour les disques
if type != 'reiserfs' :
print ' if inode usage > 80% then alert'
print ' mode passive'
print