# -*- coding: utf-8; mode: python -*- import sre include("ip") info["owner"] = "root" info["group"] = "root" info["perms"] = 0644 header() import sys def service(group, name=None, init=None, pidf=None, pidp=None, extra=[], max_restart=5): if group == None or has(group): name = name or group init = init or name pidf = pidf or name pidp = pidp or "/var/run/%s.pid" % pidf print ('''# %(name)s check process %(name)s with pidfile %(pidp)s start program = "/etc/init.d/%(init)s start" stop program = "/etc/init.d/%(init)s stop"''' % { "name": name, "pidp": pidp, "init": init }) for line in extra: if line: print " ", line print " if %d restarts within 5 cycles then timeout" % max_restart print service("apache", name="apache2", extra=[ has("http-server") and "if failed host localhost port 80 protocol http timeout 30 seconds then restart", has("https-server") and "if failed host localhost port 443 type tcpssl protocol http timeout 30 seconds then restart", has("intranet-server") and "if failed host intranet.crans.org port 443 type tcpssl protocol http timeout 30 seconds then restart", "if cpu is greater than 60% for 2 cycles then alert", "if cpu > 80% for 5 cycles then restart", "if totalmem > 500.0 MB for 5 cycles then restart", "if children > 250 then restart", "if loadavg(5min) greater than 10 for 8 cycles then restart", ], max_restart=3) service("at", init="atd", pidf="atd") service("cherrypy", name="intranet") service("backuppc-server", name="backuppc", pidf="backuppc/BackupPC") service("bind", init="bind9", pidf="bind/run/named") service("cups", init="cupsys", pidf="cups/cupsd") service("cron", pidf="crond") if has("firewall"): service(None, name="netacct", pidf="nacctd", init="net-acct") service(None, name="filtrage_netacct") service(None, name="filtrage_firewall") service("dhcp-detect") service("freeradius", pidf="freeradius/freeradius") service("inn", pidf="news/innd", init="inn2") service("mailman", pidf="mailman/mailman") service("monit-ovh") service("mysql", pidf="mysqld/mysqld") service("munin-node", pidf="munin/munin-node") service("nscd", pidf="nscd/nscd", extra=["if failed unixsocket /var/run/nscd/socket then restart"]) service("postfix", pidp="/var/spool/postfix/pid/master.pid", extra=["if failed port 25 protocol smtp timeout 30 seconds then restart"]) service("openvpn-ovh", pidf="openvpn.ovh", init="openvpn") service("openvpn-komaz", pidf="openvpn.komaz", init="openvpn") service("openvpn-freebox", pidf="openvpn.freebox", init="openvpn", extra=["depends on openvpn-komaz"]) service("pgsql", name="postgresql", init="postgresql-7.4", pidp="/var/lib/postgres/data/postmaster.pid", extra=["if failed port 5432 timeout 30 seconds then restart"]) service("privoxy", extra=["if failed host localhost port 8117 timeout 30 seconds then restart"]) service("proftpd", extra=["if failed port 21 protocol ftp timeout 30 seconds then restart"]) service("rsync") service("slapd", pidp="/var/run/slapd/slapd.pid", extra=["if failed host localhost port 389 protocol ldap3 timeout 30 seconds then restart"]) service("spamassassin", name="spamd", init="spamassassin") service("sqlgrey") service("ssh", pidf="sshd", extra=["if failed port 22 protocol ssh timeout 30 seconds then restart", "if children > 200 then restart"]) service("syslog-ng", extra=["depend on file/var/log/syslog"]) if has('adm-only'): vsftpd_ip = admip() else: vsftpd_ip = pubip() service("vsftpd", pidf="vsftpd/vsftpd", extra=["if failed host %s port 21 protocol ftp timeout 30 seconds then restart" % vsftpd_ip]) service("vsftpd-federez", extra=["if failed host 138.231.136.129 port 21 protocol ftp timeout 30 seconds then restart"]) dernierecarte=int(metadata.probes["cartesdvb"]) cartesdesactivees=map(lambda x : int(x),metadata.probes["cartesdvbdesactivees_local"].split(' ')) if dernierecarte: print "# Il y a %d carte(s) DVB sur ce serveur dont %d cartes desactivee(s)\n" % (dernierecarte,len(cartesdesactivees)) for i in range(0,dernierecarte): if not i in cartesdesactivees: service("mumudvb", name = "mumudvb%d" % i, init = "mumudvb", pidf = "mumudvb/mumudvb_carte%d" % i) @check file file/var/log/syslog with path /var/log/syslog @ if timestamp > 15 minutes then alert @ # on ne monitore pas les disques de canard if hostname in ['canard'] : done() for line in metadata.probes["fstab_local"].splitlines(): # on supprime les espaces line = line.strip() # on saute les lignes inintérassantes if not line : continue if line[0] == "#" : continue # on découpe la ligne [fs, mntpoint, type, options, dump, pass_] = sre.split('[ \t]*',line) options = options.split(",") # on saute si c'est une partition non montée au démarrage if "noauto" in options: continue # on saute si c'est une partition bind if "bind" in options: continue # on saute les système pas intéressants if type in ['swap','proc','tmpfs','sysfs', 'nfs', 'devpts']: continue # on ajoute les lignes de configuration générale comment("partition %s" % mntpoint) print 'check device fs%s with path %s' % (mntpoint, fs) print ' if failed permission 660 then alert' print ' if failed uid root then alert' # place sur les disques alert_level = { ('sable','/var/spool/squid1') : None, ('sable','/var/spool/squid2') : None, ('egon','/pubftp') : 95, ('sable','/var/log/squid') : 92, ('sila','/pubftp') : 92, ('news','/var') : 90 }.get((hostname, mntpoint), -1) if alert_level == -1: if mntpoint in ('/usr', '/var/lib/mailman', '/localhome' , '/home') or has('domu'): alert_level = 90 else: alert_level = 80 if alert_level: print ' if space usage > %d%% then alert' % alert_level # inodes pour les disques if type != 'reiserfs' : print ' if inode usage > 80% then alert' print ' mode passive' print