diff --git a/utils/robots_perso.py b/utils/robots_perso.py new file mode 100755 index 00000000..63f4f0bb --- /dev/null +++ b/utils/robots_perso.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import os +import sys +import robotparser +import collections +base_file = '/var/www/perso/robots.txt' + +robots_file = '/usr/scripts/var/perso/robots.txt' + +robots = collections.defaultdict(lambda:collections.defaultdict(list)) +def add_robots(user, robots): + if os.path.exists('/home/%s/www/robots.txt' % user): + rp = robotparser.RobotFileParser() + rp.parse(open('/home/%s/www/robots.txt' % user)) + for entry in ([rp.default_entry] if rp.default_entry else []) + rp.entries: + for user_agent in entry.useragents: + for rule in entry.rulelines: + what = "Allow" if rule.allowance else "Disallow" + robots["User-agent: %s" % user_agent][what].append("/%s%s" % (user, rule.path)) + robots["User-agent: %s" % user_agent][what].append("/~%s%s" % (user, rule.path)) + +def write_robots(file, robots): + for user_agent, whats in robots.items(): + f.write(user_agent + "\n") + for rule in whats.get("Disallow", []): + f.write("Disallow: %s\n" % rule) + for rule in whats.get("Allow", []): + f.write("Allow: %s\n" % rule) + f.write("\n") + +def get_users(): + return os.listdir('/home/') + + +if __name__ == '__main__': + tmp_file = robots_file + '.tmp' + rp = robotparser.RobotFileParser() + rp.parse(open(base_file)) + for entry in ([rp.default_entry] if rp.default_entry else []) + rp.entries: + for user_agent in entry.useragents: + for rule in entry.rulelines: + what = "Allow" if rule.allowance else "Disallow" + robots["User-agent: %s" % user_agent][what].append("%s" % (rule.path)) + for user in get_users(): + try: + add_robots(user, robots) + except Exception as e: + sys.stderr.write("%r\n" % e) + with open(tmp_file, 'w') as f: + write_robots(f, robots) + os.rename(tmp_file, robots_file)