#!/usr/bin/env python # -*- coding: utf-8 -*- import os import sys import robotparser import collections base_file = '/var/www/perso/robots.txt' robots_file = '/usr/scripts/var/perso/robots.txt' robots = collections.defaultdict(lambda:collections.defaultdict(list)) def add_robots(user, robots): robots_path = os.expanduser('~%s/www/robots.txt' % user) if os.path.exists(robots_path): rp = robotparser.RobotFileParser() rp.parse(open(robots_path)) for entry in ([rp.default_entry] if rp.default_entry else []) + rp.entries: for user_agent in entry.useragents: for rule in entry.rulelines: what = "Allow" if rule.allowance else "Disallow" robots["User-agent: %s" % user_agent][what].append("/%s%s" % (user, rule.path)) robots["User-agent: %s" % user_agent][what].append("/~%s%s" % (user, rule.path)) def write_robots(file, robots): for user_agent, whats in robots.items(): f.write(user_agent + "\n") for rule in whats.get("Disallow", []): f.write("Disallow: %s\n" % rule) for rule in whats.get("Allow", []): f.write("Allow: %s\n" % rule) f.write("\n") def get_users(): return os.listdir('/home/mail') if __name__ == '__main__': tmp_file = robots_file + '.tmp' rp = robotparser.RobotFileParser() rp.parse(open(base_file)) for entry in ([rp.default_entry] if rp.default_entry else []) + rp.entries: for user_agent in entry.useragents: for rule in entry.rulelines: what = "Allow" if rule.allowance else "Disallow" robots["User-agent: %s" % user_agent][what].append("%s" % (rule.path)) for user in get_users(): try: add_robots(user, robots) except Exception as e: sys.stderr.write("%r\n" % e) with open(tmp_file, 'w') as f: write_robots(f, robots) os.rename(tmp_file, robots_file)