#coding:utf-8 import piexel import re class Tokenizer: def __init__(self, conf, api): self.conf = conf self.api = api self.reload_tokens() def reload_tokens(self): self.tk = self.api.get_tokens() self.steps = list(set(t['step'] for t in self.tk)) self.steps.sort() def get_tokens_step(self, step): return [t for t in self.tk if t['step'] == step] def tokenise(self, filename): found = {} for step in self.steps: for tok in self.get_tokens_step(step): if(not bool(tk['case_sensitive'])): reg = re.compile(tok['token'], re.IGNORECASE) else: reg = re.compile(tok['token']) if reg.match(filename): if(tok['lang']): found['lang'] = tok['lang'] if(tok['quality']): found['quality'] = tok['quality'] if(tok['subtitle']): found['subtitles'] = tok['subtitle'] reg.sub(' ', filename) return filename, found