crans_bcfg2/Python/etc/monit/services
2015-11-10 22:42:16 +01:00

422 lines
13 KiB
Python

# -*- coding: utf-8; mode: python -*-
import re
include("ip")
info["owner"] = "root"
info["group"] = "root"
info["mode"] = 0644
header()
import sys
def service(group=None, **kw):
"""Permet de définir un service monit.
Les arguments supplémentaires peuvent être:
name (par défault le nom du groupe)
init (le nom du service, anciennement init, (par défaut name))
pidp (le nom du fichier de pid (par défaut /var/run/<pidf>.pid))
extra (des lignes supplémentaires)"""
if group == None or has(group):
name = kw.get('name', group)
init = kw.get('init', name)
pid = kw.get("pidp", "")
matching = kw.get("matching", False)
if pid:
pidp = "with pidfile %s" % (pid,)
elif matching:
pidp = ""
else:
pidp = "with pidfile /var/run/%s.pid" % kw.get('pidf', name)
if matching:
matchingp = 'matching "%s"' % (name)
else:
matchingp = ''
start_cmd = kw.get('start_cmd', '/usr/sbin/service %s start' % init)
stop_cmd = kw.get('stop_cmd', '/usr/sbin/service %s stop' % init)
out("""# %(name)s
check process %(name)s %(pidp)s""" % locals())
if matchingp:
out(" %s" % (matchingp,))
out(''' start program = "%(start_cmd)s"
stop program = "%(stop_cmd)s"''' % locals())
max_restart = kw.get('max_restart', 5)
for line in kw.get('extra', []):
if line:
out(" %s" % (line,))
out(" if %d restarts within 5 cycles then timeout" % (max_restart,))
if kw.has_key('depends_on'):
out(" depends on %s" % (', '.join(kw['depends_on']),))
out()
service("apache",
name="apache2",
extra=[ has("http-server") and "if failed host localhost port 80 protocol http timeout 30 seconds then restart",
has("https-server") and "if failed host localhost port 443 type tcpssl protocol http timeout 30 seconds then restart",
"if cpu is greater than 60% for 2 cycles then alert",
"if cpu > 80% for 5 cycles then restart",
has("users") and "if totalmem > 4 GB for 5 cycles then restart" or "if totalmem > 500.0 MB for 5 cycles then restart",
"if children > 250 then restart",
"if loadavg(5min) greater than 10 for 8 cycles then restart",
],
max_restart=3,
pidf='apache2/apache2' if not has('wheezy') else 'apache2',
)
service('nginx')
service('sogo', pidf="sogo/sogo", extra=['if failed host localhost port 20000 protocol http timeout 20 seconds for 5 cycles then restart'])
service('ejabberd',
pidf='ejabberd/ejabberd')
if has('nginx'):
if has('php'):
service(name='php5-fpm', extra=["if failed host localhost port 80 protocol http and request '/php_ping' timeout 20 seconds for 5 cycles then restart"])
if has('cgi'):
if has('wheezy'):
service(name='fcgiwrap', pidp='/var/run/fcgiwrap.pids')
else:
out("""# fcgiwrap
check process fcgiwrap matching fcgiwrap
start program = "/usr/sbin/service fcgiwrap start"
stop program = "/usr/sbin/service fcgiwrap stop"
if 5 restarts within 5 cycles then timeout""")
if has('radio'):
service(None, name='radio-vlc', start_cmd = "/usr/sbin/service radio start", stop_cmd = "/usr/sbin/service radio stop", pidp='/tmp/multicast.pid')
service(None, name='radio-igmp', start_cmd = "/usr/sbin/service igmp start", stop_cmd = "/usr/sbin/service igmp stop", pidf='igmp')
service('intranet2-service',
name="intranet2",
start_cmd = "/usr/sbin/service intranet2 start",
stop_cmd = "/usr/sbin/service intranet2 stop",
)
if has('gunicorn'):
# on utilise le prob gunicorn-debian, qui liste les sites actifs sur
# /etc/gunicorn.d/
re_ignore = re.compile(r'(^_|\.(dpkg-(old|dist|new|tmp)|example)$|\.pyc|\.comc$)')
for site in metadata.Probes["gunicorn-debian"].splitlines():
site = site.strip()
if re_ignore.search(site):
continue
extra = []
# TODO faire un truc plus générique
if site == 'intranet':
extra.append("if failed unixsocket /tmp/gunicorn-intranet.sock " +
"protocol http and request '/heartbeat' " +
"timeout 20 seconds for 5 cycles then restart")
service(None,
name="gunicorn_%s" % site,
start_cmd = "/usr/sbin/service gunicorn start %s" % site,
stop_cmd = "/usr/sbin/service gunicorn stop %s" % site,
pidf = "gunicorn/%s" % site,
extra = extra,
)
if has('asterisk-server'):
service(name="sms_queuing")
service('asterisk-server',
pidp="/var/run/asterisk/asterisk.pid",
start_cmd = "/usr/sbin/service asterisk start",
stop_cmd = "/usr/sbin/service asterisk stop",
)
service("at",
init="atd",
pidf="atd")
service("cherrypy",
name="intranet")
service("backuppc-server",
name="backuppc",
pidf="backuppc/BackupPC")
service("isc-dhcp-server",
pidf="dhcpd")
if has('arpwatch'):
include("arpwatch")
# TODO modeliser ceci par un seul service pour éviter des races débiles
# au moment des restart ...
for vlan in watched_vlans:
iface = 'eth0.%d' % vlan
service(None, name='arpwatch-%s' % iface,
init='arpwatch',
)
service(name="arpwatch_sendmail", group=None)
service("bind",
init="bind9",
pidf="named/named")
service(group="cups-service",
name="cups",
pidf="cups/cupsd")
service("cron",
pidf="crond")
service("dhcp-detect")
service("radvd", pidf='radvd/radvd')
service("ramond")
service("getlogwifi")
if False: #has("komaz"):
service(None,
name="filtrage_firewall")
service('netacct-crans',
name="netacct-crans-ens",
pidf="netacct-crans-ens",
init="netacct-crans-ens")
service('netacct-crans',
name="netacct-crans-sixxs2",
pidf="netacct-crans-sixxs2",
init="netacct-crans-sixxs2",
depends_on=['aiccu'],
)
service('netacct-crans',
name="mac_ip",
pidf="mac_ip",
init="mac_ip")
service("aiccu")
service("freeradius",
pidf="freeradius/freeradius")
service("digicode",
name="digicode_server",
pidf="digicode")
service("inn",
pidf="news/innd",
init="inn2")
service("mailman",
pidf="mailman/mailman")
service("monit-ovh")
service("mysql",
pidf="mysqld/mysqld")
service("munin-node",
pidf="munin/munin-node")
service("munin-server",
name="munin-fastcgi",
pidf="munin/fastcgi-munin-graph",
extra=[
"if failed unixsocket /var/run/munin/fastcgi-munin-graph.sock then restart",
"if failed unixsocket /var/run/munin/fastcgi-munin-html.sock then restart",
])
service("munin-server",
name="rrdcached",
extra=[
"if failed unixsocket /var/run/rrdcached.sock then restart",
])
service("nslcd",
pidf="nslcd/nslcd",
extra=["if failed unixsocket /var/run/nslcd/socket then restart"])
service("nscd",
pidf="nscd/nscd",
extra=["if failed unixsocket /var/run/nscd/socket then restart"])
service("ntp", pidf="ntpd")
service("openvpn-ovh",
pidf="openvpn.ovh",
init="openvpn")
service("openvpn-odlyd",
pidf="openvpn.odlyd",
init="openvpn")
service("openvpn-freebox",
pidf="openvpn.freebox",
init="openvpn",
extra=["depends on openvpn-odlyd"])
pg_version = '9.1'
service("pgsql-server",
name="postgresql",
init="postgresql",
pidp="/var/lib/postgresql/%s/main/postmaster.pid" % pg_version,
extra=["if failed port 5432 timeout 30 seconds then restart"])
service("identd",
name="oidentd", matching=True)
service("postfix",
pidp="/var/spool/postfix/pid/master.pid",
extra=["if failed port 25 protocol smtp timeout 30 seconds then restart"])
service("privoxy",
extra=["if failed host localhost port 8118 timeout 30 seconds then restart"])
service("proftpd",
extra=["if failed port 21 protocol ftp timeout 30 seconds then restart"])
if has('wheezy'):
service("rsync")
else:
service("rsync", pidf='rsyncd')
service("slapd",
pidp="/var/run/slapd/slapd.pid",
extra=["if failed host localhost port 389 protocol ldap3 timeout 30 seconds then restart"])
service("spamassassin",
pidp="/var/run/spamassassin.pid",
name="spamd",
init="spamassassin")
service("sqlgrey")
service("ssh",
pidf="sshd",
extra=["if failed port 22 protocol ssh timeout 30 seconds then restart",
"if children > 200 then restart"])
if has('rsyslog-client') or has('rsyslog-server'):
service(None, name="rsyslog",
pidf='rsyslogd',
extra=["depend on file/var/log/syslog"])
service("ups-monitor",
name="upsmon",
pidf="nut/upsmon")
service("ups-server",
name="upsd",
pidf="nut/upsd")
if not has("nfs"):
out("""check program git_repos with path "/usr/scripts/utils/check_repos.sh"
if status != 0 then alert
""")
if has('non-vlan-adherent'):
vsftpd_ip = admip()
else:
vsftpd_ip = pubip()
service("vsftpd",
pidf="vsftpd/vsftpd",
extra=["if failed host %s port 21 protocol ftp timeout 30 seconds then restart" % vsftpd_ip])
service("vsftpd-federez",
extra=["if failed host 138.231.136.129 port 21 protocol ftp timeout 30 seconds then restart"])
dernierecarte=int(metadata.Probes["cartesdvb"])
try:
cartesdesactivees=map(lambda x : int(x),metadata.Probes["cartesdvbdesactivees_local"].split(' '))
except:
cartesdesactivees=[]
if dernierecarte:
out("# Il y a %d carte(s) DVB sur ce serveur dont %d cartes desactivee(s)\n" % (dernierecarte,len(cartesdesactivees)))
for i in range(0,dernierecarte):
if not i in cartesdesactivees:
service("mumudvb",
name = "mumudvb%d" % i,
init = "mumudvb",
pidf = "mumudvb/mumudvb_adapter%d_tuner0" % i,
start_cmd = """/sbin/start-stop-daemon --start --oknodo --pidfile /var/run/mumudvb/mumudvb_adapter%d_tuner0.pid --chuid _mumudvb --exec /usr/bin/mumudvb -- -c /etc/sat/carte%d.conf""" % (i,i),
stop_cmd = """/sbin/start-stop-daemon --stop --pidfile /var/run/mumudvb/mumudvb_adapter%d_tuner0.pid""" % i
)
out("""check file file/var/log/syslog with path /var/log/syslog
if timestamp > 15 minutes then alert
""")
service('igmpproxy')
disques = {}
if has('blkid'):
for line in metadata.Probes["blkid"].splitlines():
label, disque = line.strip().split()
disques[label] = disque
for line in metadata.Probes["fstab_local"].splitlines():
# on supprime les espaces
line = line.strip()
# on saute les lignes inintérassantes
if not line :
continue
if line[0] == "#" :
continue
# on découpe la ligne
[fs, mntpoint, type, options, dump, pass_] = re.split('[ \t]*',line)
fs = disques.get(fs, fs)
options = options.split(",")
# on saute si c'est une partition non montée au démarrage
if "noauto" in options:
continue
# on saute si c'est une partition bind
if "bind" in options:
continue
# on saute les système pas intéressants
if type in ['swap', 'sw', 'proc', 'tmpfs', 'sysfs', 'nfs', 'devpts']:
continue
# on ajoute les lignes de configuration générale
comment("partition %s" % mntpoint)
# Permission attendue
perm = '0660'
if has('wheezy'):
perm = '1660'
# on vérifie le filesystem directement pour les volumes lvm
if fs.startswith('/dev/mapper'):
out('check filesystem fs%s with path %s' % (mntpoint, mntpoint))
else:
out('check device fs%s with path %s' % (mntpoint, fs))
out(' if failed permission %s then alert' % (perm,))
out(' if failed uid root then alert')
# place sur les disques
alert_level = {
('babar','/backup') : 90,
('sable','/var/spool/squid1') : None,
('sable','/var/spool/squid2') : None,
('sable','/var/log/squid') : 92,
('charybde','/pubftp') : 95,
('news','/var') : 90
}.get((hostname, mntpoint), -1)
if alert_level == -1:
if mntpoint in ('/usr', '/var/lib/mailman', '/localhome', '/var/lib/postgresql', ) or has('domu'):
alert_level = 90
elif mntpoint.startswith('/home-adh'):
alert_level = 90
else:
alert_level = 80
if alert_level:
out(' if space usage > %d%% for 3 cycles then alert' % (alert_level,))
# inodes pour les disques
if type != 'reiserfs' :
out(' if inode usage > 80% then alert')
out(' mode passive')
out()