New handling method
This commit is contained in:
parent
defcc3d618
commit
b305f63b46
7 changed files with 172 additions and 122 deletions
41
tokenizer.py
Normal file
41
tokenizer.py
Normal file
|
@ -0,0 +1,41 @@
|
|||
#coding:utf-8
|
||||
import piexel
|
||||
import re
|
||||
|
||||
|
||||
class Tokenizer:
|
||||
def __init__(self, conf, api):
|
||||
self.conf = conf
|
||||
self.api = api
|
||||
self.reload_tokens()
|
||||
|
||||
def reload_tokens(self):
|
||||
self.tk = self.api.get_tokens()
|
||||
self.steps = list(set(t['step'] for t in self.tk))
|
||||
self.steps.sort()
|
||||
|
||||
def get_tokens_step(self, step):
|
||||
return [t for t in self.tk if t['step'] == step]
|
||||
|
||||
def tokenise(self, filename):
|
||||
found = {}
|
||||
for step in self.steps:
|
||||
for tok in self.get_tokens_step(step):
|
||||
if(not bool(tk['case_sensitive'])):
|
||||
reg = re.compile(tok['token'], re.IGNORECASE)
|
||||
else:
|
||||
reg = re.compile(tok['token'])
|
||||
if reg.match(filename):
|
||||
if(tok['lang']):
|
||||
found['lang'] = tok['lang']
|
||||
if(tok['quality']):
|
||||
found['quality'] = tok['quality']
|
||||
if(tok['subtitle']):
|
||||
found['subtitles'] = tok['subtitle']
|
||||
reg.sub(' ', filename)
|
||||
return filename, found
|
||||
|
||||
|
||||
|
||||
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue