#coding:utf-8 import piexel import re class Tokenizer: def __init__(self, conf, api): self.conf = conf self.api = api self.reload_tokens() def reload_tokens(self): self.tk = self.api.get_tokens() self.steps = list(set(t['step'] for t in self.tk)) self.steps.sort() def get_tokens_step(self, step): return [t for t in self.tk if t['step'] == step] def tokenize(self, filename): found = {'lang':[], 'quality':[], 'subtitle':[]} for step in self.steps: for tok in self.get_tokens_step(step): if(not bool(int(tok['case_sensitive']))): reg = re.compile(tok['token'], re.IGNORECASE) else: reg = re.compile(tok['token']) if reg.search(filename): for tok_lang in tok['languages']: found['lang'].append(tok_lang['value']) for tok_qual in tok['qualities']: found['quality'].append(tok_qual['value']) for tok_sub in tok['subtitle_languages']: found['subtitle'].append(tok_sub['value']) filename = reg.sub(' ', filename) return filename, found