42 lines
1.1 KiB
Python
42 lines
1.1 KiB
Python
#coding:utf-8
|
|
import piexel
|
|
import re
|
|
|
|
class Tokenizer:
|
|
def __init__(self, conf, api):
|
|
self.conf = conf
|
|
self.api = api
|
|
self.reload_tokens()
|
|
|
|
def reload_tokens(self):
|
|
self.tk = self.api.get_tokens()
|
|
self.steps = list(set(t['step'] for t in self.tk))
|
|
self.steps.sort()
|
|
|
|
def get_tokens_step(self, step):
|
|
return [t for t in self.tk if t['step'] == step]
|
|
|
|
def tokenize(self, filename):
|
|
found = {'lang':[], 'quality':[], 'subtitle':[]}
|
|
for step in self.steps:
|
|
for tok in self.get_tokens_step(step):
|
|
if(not bool(int(tok['case_sensitive']))):
|
|
reg = re.compile(r' '+tok['token']+r' ', re.IGNORECASE)
|
|
else:
|
|
reg = re.compile(r' '+tok['token']+r' ')
|
|
if reg.search(filename):
|
|
for tok_lang in tok['languages']:
|
|
found['lang'].append(tok_lang['value'])
|
|
for tok_qual in tok['qualities']:
|
|
found['quality'].append(tok_qual['value'])
|
|
for tok_sub in tok['subtitle_languages']:
|
|
found['subtitle'].append(tok_sub['value'])
|
|
filename = reg.sub(' ', filename)
|
|
for typ in found:
|
|
found[typ] = [e for e in found[typ] if e != 'N/A']
|
|
return filename, found
|
|
|
|
|
|
|
|
|
|
|