piexel-indexer/tokenizer.py
2017-05-27 18:03:47 +02:00

41 lines
894 B
Python

#coding:utf-8
import piexel
import re
class Tokenizer:
def __init__(self, conf, api):
self.conf = conf
self.api = api
self.reload_tokens()
def reload_tokens(self):
self.tk = self.api.get_tokens()
self.steps = list(set(t['step'] for t in self.tk))
self.steps.sort()
def get_tokens_step(self, step):
return [t for t in self.tk if t['step'] == step]
def tokenise(self, filename):
found = {}
for step in self.steps:
for tok in self.get_tokens_step(step):
if(not bool(tk['case_sensitive'])):
reg = re.compile(tok['token'], re.IGNORECASE)
else:
reg = re.compile(tok['token'])
if reg.match(filename):
if(tok['lang']):
found['lang'] = tok['lang']
if(tok['quality']):
found['quality'] = tok['quality']
if(tok['subtitle']):
found['subtitles'] = tok['subtitle']
reg.sub(' ', filename)
return filename, found