New handling method

2017-05-27 18:03:47 +02:00 · 2017-05-27 18:03:47 +02:00 · b305f63b46
commit b305f63b46
parent defcc3d618
7 changed files with 172 additions and 122 deletions
--- a/config.py
+++ b/config.py
@ -1,3 +1,4 @@
+#coding:utf-8
 from configobj import ConfigObj
 import shutil
 import posixpath
@ -24,10 +25,6 @@ class Config:
        self.check_config()
        print('config loaded')

-    @property
-    def domains(self):
-        return list(self.obj['domains'].keys())
-
    @property
    def app(self):
        return self.obj['api']['app']
@ -40,36 +37,16 @@ class Config:
    def server(self):
        return self.obj['api']['server']

+    @property
+    def extensions(self):
+        return self.obj['api']['extensions']
+        
    def check_config(self):
        if not 'api' in self.obj:
            raise ConfigError("no 'api' section in config file")
        for k in ['app', 'token', 'extensions', 'server']:
            if not k in self.obj['api']:
                raise ConfigError("no '"+k+"' value in config file, subsection 'api'")
-        if not 'domains' in self.obj:
-            raise ConfigError("no 'domains' section in config file")
-        if len(self.obj['domains']) == 0:
-            raise ConfigError("no domains in config file, subsection domains")
-        for d in self.obj['domains']:
-            print('check dom:', d)
-            for k in ['movies', 'series']:
-                if not k in self.obj['domains'][d]:
-                    raise ConfigError("no '"+k+"' value in config file, subsection 'domains/"+d+"'")
-        
-    def get_serie_dir(self, domain):
-        return self.obj['domains'][domain]['series']
-
-    def get_excluded_serie_dir(self, domain):
-        return self.obj['domains'][domain]['no_series']
-
-    def get_movie_dir(self, domain):
-        return self.obj['domains'][domain]['movies']
-
-    def get_excluded_movie_dir(self, domain):
-        return self.obj['domains'][domain]['no_movies']
-
-    def get_domain(self, domain):
-        return self.obj['domains'][domain]['domain']
    
    def is_valid_file(self, name):
        _, ext = posixpath.splitext(name)
--- a/default.conf
+++ b/default.conf
@ -8,17 +8,3 @@ token = ''
 extensions = 'avi','mkv','mp4','m2ts','rmvb'
 # Serveur où appeler l'API
 server = 'http://piexel.rez'
-
-[domains]
-
-[[example]]
-  # Nom du domaine
-  domain='example.ftp'
-
-  [[[directories]]]
-  # Dossiers contenant des films
-  movies = ,
-  # Dossiers contenant des séries
-  series = ,
-
-
--- a/file.py
+++ b/file.py
@ -1,3 +1,4 @@
+#coding:utf-8
 import posixpath

 class File:
@ -7,12 +8,14 @@ class File:
    def __init__(self, path, name,api_id=None):
        self.path = path
        self.name = name
+        self.fullname = path+name
+        '''
        self.info_lang = []
        self.info_quality = []
        self.info_subtitles = []
-        self.year = self._find_year()
-        self.title = self._get_title()
+        self.title = ''
        self.simple_name = self.title.replace(' ', '')
+        '''
        self.api_id = api_id

    def get_ext(self):
@ -23,36 +26,15 @@ class File:
        _, ext = posixpath.splitext(self.name)
        return ext

-    def _get_title(self):
-        fname, ext = posixpath.splitext(self.name)
+    def _extract_title(self, title, tok):
+        # extrait un titre de film depuis la variable #TITLE#
+        fname, ext = posixpath.splitext(title)
        # 1) séparateurs
        fname = fname.replace('.', ' ')
        fname = fname.replace('_', ' ')
        fname = fname.replace('-', ' ')
-        # 2.1) marqueurs de qualitée
-        for m in ['HDRiP', 'HDRip', 'HDTS', 'HD', 'DVDRIP', 'DvDRIP', '720p', '1080p', 'DVDSCR', 'BluRay', 'BrRip', 'BRRip']:
-            if m in fname:
-                self.info_quality.append(m)
-            fname = fname.replace(m, '')
-        # 2.2) marqueur de sous-titres
-        for m in ['SRT FR', 'STFR', 'STEN']:
-            if m in fname:
-                self.info_subtitles.append(m)
-            fname = fname.replace(m, '')
-        # 2.3) marqueurs de langues & sous-titres
-        for m in ['VOSTFR', 'VOST-FR', 'VOSTMulti']:
-            if m in fname:
-                self.info_lang.append(m)
-                self.info_subtitles.append(m)
-            fname = fname.replace(m, '')
-        # 2.4) marqueurs de langues
-        for m in ['VO', 'VF','FRENCH', 'FR', 'ENG', '[Eng]', 'VJAP']:
-            if m in fname:
-                self.info_lang.append(m)
-            fname = fname.replace(m, '')
-        # 2.5) marqueurs autres
-        for m in ['UNCENSORED', 'X264', 'x264', 'X265', '[www Cpasbien com]', '[www newpct1 com]', 'YIFY', 'JYK']:
-            fname = fname.replace(m, '')
+        # 2) marqueurs
+        fname, info = tok.tokenize(fname)
        # 3) minuscule
        fname = fname.lower()
        # 4) année
@ -61,20 +43,12 @@ class File:
            fname = fname.replace('[ '+str(self.year)+' ]', '')
            fname = fname.replace('['+str(self.year)+' ]', '')
            fname = fname.replace('[ '+str(self.year)+']', '')
-        # 5) espaces en bout et centraux
+        # 5) espaces en bout, centraux et rajoutés
        fname = fname.lstrip().rstrip()
        while '  ' in fname:
            fname = fname.replace('  ', ' ')
        return fname

-    def _find_year(self):
-        fname, ext = posixpath.splitext(self.name)
-        fname = fname.replace(' ', '')
-        for i in range(1800, 2100):
-            if '['+str(i)+']' in fname:
-                return i
-        return None
-
    def filename_same(self, other):
        # Compare les noms de fichiers de self et de other
        # En supprimant les espaces, la date, et les marques de qualitée
@ -89,7 +63,7 @@ class File:
            for m in ['VOSTFR', 'VOST-FR', 'VOSTMulti', 'VO']:
                if m in self.info_lang:
                    info['lang'] = 'VO'
-            for m in ['ENG', '[Eng]']:
+            for m in ['ENG', '[Eng]', 'VEN']:
                pass # notation?
            for m in ['VJAP']:
                pass # notation?
@ -97,10 +71,10 @@ class File:
                if m in self.info_lang:
                    info['lang'] = 'FR'
        if len(self.info_quality) > 0:
-            for m in ['HDRiP', 'HDTS', 'HD', '720p', 'BluRay']:
+            for m in ['HDRiP', 'HDRip', 'HDrip', 'HDTS', 'HD', '720p', 'BluRay']:
                if m in self.info_quality:
                    info['quality'] = 'HD'
-            for m in ['DVDRIP', 'DVDSCR']:
+            for m in ['DVDRIP', 'DvDrip', 'DvDRIP', 'DVDrip', 'DVDSCR']:
                pass # qualité?
            for m in ['1080p']:
                if m in self.info_quality:
--- a/filerule.py
+++ b/filerule.py
@ -0,0 +1,36 @@
+import re
+
+
+def match_rules(filename, rules):
+	for r in rules:
+		match = r.apply(filename)
+		if match:
+			return r, match
+	return None
+
+class FileRule:
+	def __init__(self, rule, conf):
+		# trouve les marqueurs dans la règle
+		reg = re.compile(r'#(.+?)#')
+		self.patterns = reg.findall(rule)
+		rg = reg.sub(lambda m:self.regFor(match=m, conf=conf), rule)
+		self.rule = re.compile(rg)
+		print(self.patterns, self.rule)
+
+	@staticmethod
+	def regFor(match, conf):
+		name = match.group(0)
+		if name == '#EXT#':
+			return '('+'|'.join(conf.extensions)+')'
+		return '([^/]+)'
+
+	def apply(self, filename):
+		# applique la règle à un objet file
+		match = self.rule.match(filename)
+		if not match:
+			return None
+		# trouve les différents marqueurs de règle
+		patterns = {}
+		for i in range(len(self.patterns)):
+			patterns[self.patterns[i]] = match.group(i+1)
+		return patterns
--- a/main.py
+++ b/main.py
@ -1,3 +1,4 @@
+#coding:utf-8
 '''

 Pierre Cadart
@ -11,6 +12,8 @@ import posixpath
 import re
 import config
 import piexel
+import tokenizer
+import filerule
 from ftplib import FTP
 import time
 import file
@ -39,36 +42,39 @@ def ftpwalk(directory, ftp):
        # mais retourne les résultats intermédiaires
        yield (current, Ldirs, Lfiles) 

-def visit_server(domain, conf, api):
+def visit_folder(domain, api, rules):
    # Connection au serveur
-    print('connect to:', conf.get_domain(domain))
-    ftp = FTP(conf.get_domain(domain), user='rez', passwd='rez')
+    print('connect to:', domain['server'])
+    ftp = FTP(domain['server'][6:], user=domain['username'], passwd=domain['password'])
    ftp.encoding = 'UTF-8'
    # Initialisation des listes de mises à jour
    L_missing = []      # fichiers non trouvés sur le serveur FTP
    L_unreferenced = [] # fichiers non référencés dans l'API
    L_moved = []        # fichiers déplacés sur le serveur FTP
-    # Lecture à distance des deux BDD
-    for directory in conf.get_movie_dir(domain):
-        # Visite l'arborescence de chaque dossier
+    # Lecture des fichiers sur le serveur FTP
    Lloc = []
-        for path, _, files in ftpwalk(directory, ftp):
-            # Vérifie si le parcours du dossier est autorisé
-            if not any(path.startswith(p) for p in conf.get_excluded_movie_dir(domain)):
+    for path, _, files in ftpwalk(domain['path'], ftp):
        # Ajoute les fichiers correspondants aux extensions
        for f in files:
-                    if conf.is_valid_file(f):
-                        F = file.File(path, f)
+            match = filerule.match_rules(path+'/'+f, rules)
+            if match:
+                print('got match:',match[1], 'name:',path+'/'+f)
+                F = file.File(path+'/'+f, match[1])
                Lloc.append(F)
+    ftp.close()
+    print('total:',len(Lloc))
+    exit(0)
+    # Application des règles de chemins
+
    # Récupère les fichiers de l'api
    Lapi = []
-        for info in api.get_files(path='ftp://'+conf.get_domain(domain)+directory, like=1):
-            Lapi.append(file.File(info['path'][len('ftp://'+conf.get_domain(domain)):], info['name'], api_id=info['filable_id']))
-    ftp.close()
+    for info in api.get_files(path='ftp://'+domain['server']+domain['path'], like=1):
+        Lapi.append(file.File(info['path'][len('ftp://'+domain['server']):], info['name'], api_id=info['filable_id']))

    # supprime les dossiers de l'api
    Lapi = [f for f in Lapi if conf.is_valid_file(f.name)]

+    # TODO: gérer ces noms
    # supprime les noms avec un '+'
    Lloc = [f for f in Lloc if '+' not in f.name]
    
@ -123,11 +129,11 @@ def visit_server(domain, conf, api):
    for f, _ in Llink2:
        Lunref.remove(f)

-    print('missing:',Lmissing)
+    print('missing:',[str(e.api_id)+':'+repr(e) for e in Lmissing])
    print('\n'*3)
    print('unreferenced:','\n'.join(str(f) for f in Lunref))
    print('\n'*3)
-    print('unreferenced titles:', '\n'.join([f.title for f in Lunref]))
+    print('unreferenced titles:', '\n'.join(sorted([f.title for f in Lunref])))

    # Put les renommages / déplacements
    i = 0
@ -175,28 +181,33 @@ def visit_server(domain, conf, api):
            raise Exception('end')
        
    # Poste tout les films locaux
+    """
    i = 0
    for film in Lunref:
        i += 1
-        print('['+str(i)+'/'+str(len(Lunref))+']'+'post:', film.title)
+        print('['+str(i)+'/'+str(len(Lunref))+']'+'post:', film.title, str(film.year))
        try:
-            api.debug_print = True
+            posted = False
            if film.year is not None:
                resp = api.post_film(title=film.title, year=film.year)
-            else:
+                if "id" in resp:
+                    resp = api.post_file(path='ftp://'+conf.get_domain(domain)+film.path, name=film.name, type='Film', type_id=resp["id"], **film.additional_info())
+                    posted = True
+            if not posted:
                resp = api.post_film(title=film.title)
                if "id" in resp:
-                api.post_file(path='ftp://'+conf.get_domain(domain)+film.path, name=film.name, type='Film', type_id=resp["id"], **film.additional_info())
-            api.debug_print = False
+                    resp = api.post_file(path='ftp://'+conf.get_domain(domain)+film.path, name=film.name, type='Film', type_id=resp["id"], **film.additional_info())
+            
+            print('response:', resp)
            time.sleep(1)
        except Exception as e:
            print(e)
            print('film '+film.title+' not posted')
            raise Exception('end')
-
+    """
+    '''
    # Marque comme broken les films référencés non présents
    # TODO: màj des broken_links
-    '''
    i = 0
    for film in Lmissing:
        i += 1
@ -208,8 +219,13 @@ def visit_server(domain, conf, api):
 def main():
    conf = config.Config()
    api = piexel.Piexel(conf.server, conf.app, conf.token)
-    for dom in conf.domains:
-        visit_server(dom, conf, api)
+    tokens = tokenizer.Tokenizer(conf, api)
+    folders = api.get_folders()
+    rules = api.get_paths()
+
+    for fold in folders:
+        applicable = [filerule.FileRule(re.escape(fold['path'])+'\\/'+r['regex'], conf) for r in rules if int(r['indexer_folder_id']) == fold['id']]
+        visit_folder(fold, api, applicable)

 if __name__ == '__main__':
    main()
--- a/piexel.py
+++ b/piexel.py
@ -1,3 +1,4 @@
+#coding:utf-8
 import requests

 class PiexelErrors(Exception):
@ -9,13 +10,15 @@ class InvalidToken(PiexelErrors):
 class ParameterError(PiexelErrors):
    pass

+class InvalidResponse(PiexelErrors):
+    pass
+
 class Piexel:
    def __init__(self, domain, app='', token='', endpoint='/api/'):
        self.app = app
        self.token = token
        self.domain = domain
        self.endpoint = endpoint
-        self.debug_print = False

    def _get_response(self, controller, fields, request_type='get'): 
        """
@ -34,11 +37,12 @@ class Piexel:
            response = requests.put(url, fields)
        elif request_type == 'delete':
            response = requests.delete(url, data=fields)
-        if self.debug_print:
-            print('resp:',response.text)
        response.encoding = 'utf-8'
        code = response.status_code
+        try:
            data = response.json()
+        except:
+            raise InvalidResponse(response.text)
        if code == 403:  # FORBIDDEN
            raise InvalidToken(data['message'])
        elif code == 400:
@ -82,13 +86,29 @@ class Piexel:
        fields = self._get_request(['id', 'serie_id', 'title', 'imdb_id', 'limit', 'first', 'first', 'episodes'], [], **params)
        return self._get_response('episodes', fields)

-    def get_files(self, **params):
+    def get_tokens(self, **params):
        """
        Récupère les fichiers
        :param params: paramètres à passer
        """
-        fields = self._get_request(['id', 'path', 'name', 'limit', 'first', 'filable'], [], **params)
-        return self._get_response('files', fields)
+        fields = self._get_request([], [], **params)
+        return self._get_response('indexer/tokens', fields)
+
+    def get_folders(self, **params):
+        """
+        Récupère les fichiers
+        :param params: paramètres à passer
+        """
+        fields = self._get_request([], [], **params)
+        return self._get_response('indexer/folders', fields)
+
+    def get_paths(self, **params):
+        """
+        Récupère les fichiers
+        :param params: paramètres à passer
+        """
+        fields = self._get_request([], [], **params)
+        return self._get_response('indexer/paths', fields)

    def get_actors(self, **params):
        """
@ -96,7 +116,7 @@ class Piexel:
        :param params: paramètres à passer
        """
        fields = self._get_request(['id', 'name', 'imdb_id', 'tmdb_id', 'limit', 'first', 'films', 'series'], [], **params)
-        return self._get_response('files', fields)
+        return self._get_response('actors', fields)

    def get_files(self, **params):
        """
--- a/tokenizer.py
+++ b/tokenizer.py
@ -0,0 +1,41 @@
+#coding:utf-8
+import piexel
+import re
+
+
+class Tokenizer:
+	def __init__(self, conf, api):
+		self.conf = conf
+		self.api = api
+		self.reload_tokens()
+
+	def reload_tokens(self):
+		self.tk = self.api.get_tokens()
+		self.steps = list(set(t['step'] for t in self.tk))
+		self.steps.sort()
+
+	def get_tokens_step(self, step):
+		return [t for t in self.tk if t['step'] == step]
+
+	def tokenise(self, filename):
+		found = {}
+		for step in self.steps:
+			for tok in self.get_tokens_step(step):
+				if(not bool(tk['case_sensitive'])):
+					reg = re.compile(tok['token'], re.IGNORECASE)
+				else:
+					reg = re.compile(tok['token'])
+				if reg.match(filename):
+					if(tok['lang']):
+						found['lang'] = tok['lang']
+					if(tok['quality']):
+						found['quality'] = tok['quality']
+					if(tok['subtitle']):
+						found['subtitles'] = tok['subtitle']
+					reg.sub(' ', filename)
+		return filename, found
+
+
+
+
+