New handling method

2017-05-27 18:03:47 +02:00 · 2017-05-27 18:03:47 +02:00 · b305f63b46
commit b305f63b46
parent defcc3d618
7 changed files with 172 additions and 122 deletions
--- a/config.py
+++ b/config.py
@ -1,3 +1,4 @@
 #coding:utf-8
 from configobj import ConfigObj
 import shutil
 import posixpath
@ -24,10 +25,6 @@ class Config:
        self.check_config()
        print('config loaded')
    @property
    def domains(self):
        return list(self.obj['domains'].keys())
    @property
    def app(self):
        return self.obj['api']['app']
@ -39,6 +36,10 @@ class Config:
    @property
    def server(self):
        return self.obj['api']['server']
    @property
    def extensions(self):
        return self.obj['api']['extensions']
    def check_config(self):
        if not 'api' in self.obj:
@ -46,30 +47,6 @@ class Config:
        for k in ['app', 'token', 'extensions', 'server']:
            if not k in self.obj['api']:
                raise ConfigError("no '"+k+"' value in config file, subsection 'api'")
        if not 'domains' in self.obj:
            raise ConfigError("no 'domains' section in config file")
        if len(self.obj['domains']) == 0:
            raise ConfigError("no domains in config file, subsection domains")
        for d in self.obj['domains']:
            print('check dom:', d)
            for k in ['movies', 'series']:
                if not k in self.obj['domains'][d]:
                    raise ConfigError("no '"+k+"' value in config file, subsection 'domains/"+d+"'")
    def get_serie_dir(self, domain):
        return self.obj['domains'][domain]['series']
    def get_excluded_serie_dir(self, domain):
        return self.obj['domains'][domain]['no_series']
    def get_movie_dir(self, domain):
        return self.obj['domains'][domain]['movies']
    def get_excluded_movie_dir(self, domain):
        return self.obj['domains'][domain]['no_movies']
    def get_domain(self, domain):
        return self.obj['domains'][domain]['domain']
    def is_valid_file(self, name):
        _, ext = posixpath.splitext(name)
--- a/default.conf
+++ b/default.conf
@ -8,17 +8,3 @@ token = ''
 extensions = 'avi','mkv','mp4','m2ts','rmvb'
 # Serveur où appeler l'API
 server = 'http://piexel.rez'
 [domains]
 [[example]]
  # Nom du domaine
  domain='example.ftp'
  [[[directories]]]
  # Dossiers contenant des films
  movies = ,
  # Dossiers contenant des séries
  series = ,
--- a/file.py
+++ b/file.py
@ -1,3 +1,4 @@
 #coding:utf-8
 import posixpath
 class File:
@ -7,12 +8,14 @@ class File:
    def __init__(self, path, name,api_id=None):
        self.path = path
        self.name = name
        self.fullname = path+name
        '''
        self.info_lang = []
        self.info_quality = []
        self.info_subtitles = []
-        self.year = self._find_year()
+        self.title = ''
        self.title = self._get_title()
        self.simple_name = self.title.replace(' ', '')
        '''
        self.api_id = api_id
    def get_ext(self):
@ -23,36 +26,15 @@ class File:
        _, ext = posixpath.splitext(self.name)
        return ext
-    def _get_title(self):
+    def _extract_title(self, title, tok):
-        fname, ext = posixpath.splitext(self.name)
+        # extrait un titre de film depuis la variable #TITLE#
        fname, ext = posixpath.splitext(title)
        # 1) séparateurs
        fname = fname.replace('.', ' ')
        fname = fname.replace('_', ' ')
        fname = fname.replace('-', ' ')
-        # 2.1) marqueurs de qualitée
+        # 2) marqueurs
-        for m in ['HDRiP', 'HDRip', 'HDTS', 'HD', 'DVDRIP', 'DvDRIP', '720p', '1080p', 'DVDSCR', 'BluRay', 'BrRip', 'BRRip']:
+        fname, info = tok.tokenize(fname)
            if m in fname:
                self.info_quality.append(m)
            fname = fname.replace(m, '')
        # 2.2) marqueur de sous-titres
        for m in ['SRT FR', 'STFR', 'STEN']:
            if m in fname:
                self.info_subtitles.append(m)
            fname = fname.replace(m, '')
        # 2.3) marqueurs de langues & sous-titres
        for m in ['VOSTFR', 'VOST-FR', 'VOSTMulti']:
            if m in fname:
                self.info_lang.append(m)
                self.info_subtitles.append(m)
            fname = fname.replace(m, '')
        # 2.4) marqueurs de langues
        for m in ['VO', 'VF','FRENCH', 'FR', 'ENG', '[Eng]', 'VJAP']:
            if m in fname:
                self.info_lang.append(m)
            fname = fname.replace(m, '')
        # 2.5) marqueurs autres
        for m in ['UNCENSORED', 'X264', 'x264', 'X265', '[www Cpasbien com]', '[www newpct1 com]', 'YIFY', 'JYK']:
            fname = fname.replace(m, '')
        # 3) minuscule
        fname = fname.lower()
        # 4) année
@ -61,20 +43,12 @@ class File:
            fname = fname.replace('[ '+str(self.year)+' ]', '')
            fname = fname.replace('['+str(self.year)+' ]', '')
            fname = fname.replace('[ '+str(self.year)+']', '')
-        # 5) espaces en bout et centraux
+        # 5) espaces en bout, centraux et rajoutés
        fname = fname.lstrip().rstrip()
        while '  ' in fname:
            fname = fname.replace('  ', ' ')
        return fname
    def _find_year(self):
        fname, ext = posixpath.splitext(self.name)
        fname = fname.replace(' ', '')
        for i in range(1800, 2100):
            if '['+str(i)+']' in fname:
                return i
        return None
    def filename_same(self, other):
        # Compare les noms de fichiers de self et de other
        # En supprimant les espaces, la date, et les marques de qualitée
@ -89,7 +63,7 @@ class File:
            for m in ['VOSTFR', 'VOST-FR', 'VOSTMulti', 'VO']:
                if m in self.info_lang:
                    info['lang'] = 'VO'
-            for m in ['ENG', '[Eng]']:
+            for m in ['ENG', '[Eng]', 'VEN']:
                pass # notation?
            for m in ['VJAP']:
                pass # notation?
@ -97,10 +71,10 @@ class File:
                if m in self.info_lang:
                    info['lang'] = 'FR'
        if len(self.info_quality) > 0:
-            for m in ['HDRiP', 'HDTS', 'HD', '720p', 'BluRay']:
+            for m in ['HDRiP', 'HDRip', 'HDrip', 'HDTS', 'HD', '720p', 'BluRay']:
                if m in self.info_quality:
                    info['quality'] = 'HD'
-            for m in ['DVDRIP', 'DVDSCR']:
+            for m in ['DVDRIP', 'DvDrip', 'DvDRIP', 'DVDrip', 'DVDSCR']:
                pass # qualité?
            for m in ['1080p']:
                if m in self.info_quality:
--- a/filerule.py
+++ b/filerule.py
@ -0,0 +1,36 @@
 import re
 def match_rules(filename, rules):
 	for r in rules:
 		match = r.apply(filename)
 		if match:
 			return r, match
 	return None
 class FileRule:
 	def __init__(self, rule, conf):
 		# trouve les marqueurs dans la règle
 		reg = re.compile(r'#(.+?)#')
 		self.patterns = reg.findall(rule)
 		rg = reg.sub(lambda m:self.regFor(match=m, conf=conf), rule)
 		self.rule = re.compile(rg)
 		print(self.patterns, self.rule)
 	@staticmethod
 	def regFor(match, conf):
 		name = match.group(0)
 		if name == '#EXT#':
 			return '('+'|'.join(conf.extensions)+')'
 		return '([^/]+)'
 	def apply(self, filename):
 		# applique la règle à un objet file
 		match = self.rule.match(filename)
 		if not match:
 			return None
 		# trouve les différents marqueurs de règle
 		patterns = {}
 		for i in range(len(self.patterns)):
 			patterns[self.patterns[i]] = match.group(i+1)
 		return patterns
--- a/main.py
+++ b/main.py
@ -1,3 +1,4 @@
 #coding:utf-8
 '''
 Pierre Cadart
@ -11,6 +12,8 @@ import posixpath
 import re
 import config
 import piexel
 import tokenizer
 import filerule
 from ftplib import FTP
 import time
 import file
@ -39,36 +42,39 @@ def ftpwalk(directory, ftp):
        # mais retourne les résultats intermédiaires
        yield (current, Ldirs, Lfiles) 
-def visit_server(domain, conf, api):
+def visit_folder(domain, api, rules):
    # Connection au serveur
-    print('connect to:', conf.get_domain(domain))
+    print('connect to:', domain['server'])
-    ftp = FTP(conf.get_domain(domain), user='rez', passwd='rez')
+    ftp = FTP(domain['server'][6:], user=domain['username'], passwd=domain['password'])
    ftp.encoding = 'UTF-8'
    # Initialisation des listes de mises à jour
    L_missing = []      # fichiers non trouvés sur le serveur FTP
    L_unreferenced = [] # fichiers non référencés dans l'API
    L_moved = []        # fichiers déplacés sur le serveur FTP
-    # Lecture à distance des deux BDD
+    # Lecture des fichiers sur le serveur FTP
-    for directory in conf.get_movie_dir(domain):
+    Lloc = []
-        # Visite l'arborescence de chaque dossier
+    for path, _, files in ftpwalk(domain['path'], ftp):
-        Lloc = []
+        # Ajoute les fichiers correspondants aux extensions
-        for path, _, files in ftpwalk(directory, ftp):
+        for f in files:
-            # Vérifie si le parcours du dossier est autorisé
+            match = filerule.match_rules(path+'/'+f, rules)
-            if not any(path.startswith(p) for p in conf.get_excluded_movie_dir(domain)):
+            if match:
-                # Ajoute les fichiers correspondants aux extensions
+                print('got match:',match[1], 'name:',path+'/'+f)
-                for f in files:
+                F = file.File(path+'/'+f, match[1])
-                    if conf.is_valid_file(f):
+                Lloc.append(F)
                        F = file.File(path, f)
                        Lloc.append(F)
        # Récupère les fichiers de l'api
        Lapi = []
        for info in api.get_files(path='ftp://'+conf.get_domain(domain)+directory, like=1):
            Lapi.append(file.File(info['path'][len('ftp://'+conf.get_domain(domain)):], info['name'], api_id=info['filable_id']))
    ftp.close()
-    
+    print('total:',len(Lloc))
    exit(0)
    # Application des règles de chemins
    # Récupère les fichiers de l'api
    Lapi = []
    for info in api.get_files(path='ftp://'+domain['server']+domain['path'], like=1):
        Lapi.append(file.File(info['path'][len('ftp://'+domain['server']):], info['name'], api_id=info['filable_id']))
    # supprime les dossiers de l'api
    Lapi = [f for f in Lapi if conf.is_valid_file(f.name)]
    # TODO: gérer ces noms
    # supprime les noms avec un '+'
    Lloc = [f for f in Lloc if '+' not in f.name]
@ -123,11 +129,11 @@ def visit_server(domain, conf, api):
    for f, _ in Llink2:
        Lunref.remove(f)
-    print('missing:',Lmissing)
+    print('missing:',[str(e.api_id)+':'+repr(e) for e in Lmissing])
    print('\n'*3)
    print('unreferenced:','\n'.join(str(f) for f in Lunref))
    print('\n'*3)
-    print('unreferenced titles:', '\n'.join([f.title for f in Lunref]))
+    print('unreferenced titles:', '\n'.join(sorted([f.title for f in Lunref])))
    # Put les renommages / déplacements
    i = 0
@ -175,28 +181,33 @@ def visit_server(domain, conf, api):
            raise Exception('end')
    # Poste tout les films locaux
    """
    i = 0
    for film in Lunref:
        i += 1
-        print('['+str(i)+'/'+str(len(Lunref))+']'+'post:', film.title)
+        print('['+str(i)+'/'+str(len(Lunref))+']'+'post:', film.title, str(film.year))
        try:
-            api.debug_print = True
+            posted = False
            if film.year is not None:
                resp = api.post_film(title=film.title, year=film.year)
-            else:
+                if "id" in resp:
                    resp = api.post_file(path='ftp://'+conf.get_domain(domain)+film.path, name=film.name, type='Film', type_id=resp["id"], **film.additional_info())
                    posted = True
            if not posted:
                resp = api.post_film(title=film.title)
-            if "id" in resp:
+                if "id" in resp:
-                api.post_file(path='ftp://'+conf.get_domain(domain)+film.path, name=film.name, type='Film', type_id=resp["id"], **film.additional_info())
+                    resp = api.post_file(path='ftp://'+conf.get_domain(domain)+film.path, name=film.name, type='Film', type_id=resp["id"], **film.additional_info())
-            api.debug_print = False
+            
            print('response:', resp)
            time.sleep(1)
        except Exception as e:
            print(e)
            print('film '+film.title+' not posted')
            raise Exception('end')
-
+    """
    '''
    # Marque comme broken les films référencés non présents
    # TODO: màj des broken_links
    '''
    i = 0
    for film in Lmissing:
        i += 1
@ -208,8 +219,13 @@ def visit_server(domain, conf, api):
 def main():
    conf = config.Config()
    api = piexel.Piexel(conf.server, conf.app, conf.token)
-    for dom in conf.domains:
+    tokens = tokenizer.Tokenizer(conf, api)
-        visit_server(dom, conf, api)
+    folders = api.get_folders()
    rules = api.get_paths()
    for fold in folders:
        applicable = [filerule.FileRule(re.escape(fold['path'])+'\\/'+r['regex'], conf) for r in rules if int(r['indexer_folder_id']) == fold['id']]
        visit_folder(fold, api, applicable)
 if __name__ == '__main__':
    main()
--- a/piexel.py
+++ b/piexel.py
@ -1,3 +1,4 @@
 #coding:utf-8
 import requests
 class PiexelErrors(Exception):
@ -9,13 +10,15 @@ class InvalidToken(PiexelErrors):
 class ParameterError(PiexelErrors):
    pass
 class InvalidResponse(PiexelErrors):
    pass
 class Piexel:
    def __init__(self, domain, app='', token='', endpoint='/api/'):
        self.app = app
        self.token = token
        self.domain = domain
        self.endpoint = endpoint
        self.debug_print = False
    def _get_response(self, controller, fields, request_type='get'): 
        """
@ -34,11 +37,12 @@ class Piexel:
            response = requests.put(url, fields)
        elif request_type == 'delete':
            response = requests.delete(url, data=fields)
        if self.debug_print:
            print('resp:',response.text)
        response.encoding = 'utf-8'
        code = response.status_code
-        data = response.json()
+        try:
            data = response.json()
        except:
            raise InvalidResponse(response.text)
        if code == 403:  # FORBIDDEN
            raise InvalidToken(data['message'])
        elif code == 400:
@ -82,13 +86,29 @@ class Piexel:
        fields = self._get_request(['id', 'serie_id', 'title', 'imdb_id', 'limit', 'first', 'first', 'episodes'], [], **params)
        return self._get_response('episodes', fields)
-    def get_files(self, **params):
+    def get_tokens(self, **params):
        """
        Récupère les fichiers
        :param params: paramètres à passer
        """
-        fields = self._get_request(['id', 'path', 'name', 'limit', 'first', 'filable'], [], **params)
+        fields = self._get_request([], [], **params)
-        return self._get_response('files', fields)
+        return self._get_response('indexer/tokens', fields)
    def get_folders(self, **params):
        """
        Récupère les fichiers
        :param params: paramètres à passer
        """
        fields = self._get_request([], [], **params)
        return self._get_response('indexer/folders', fields)
    def get_paths(self, **params):
        """
        Récupère les fichiers
        :param params: paramètres à passer
        """
        fields = self._get_request([], [], **params)
        return self._get_response('indexer/paths', fields)
    def get_actors(self, **params):
        """
@ -96,7 +116,7 @@ class Piexel:
        :param params: paramètres à passer
        """
        fields = self._get_request(['id', 'name', 'imdb_id', 'tmdb_id', 'limit', 'first', 'films', 'series'], [], **params)
-        return self._get_response('files', fields)
+        return self._get_response('actors', fields)
    def get_files(self, **params):
        """
--- a/tokenizer.py
+++ b/tokenizer.py
@ -0,0 +1,41 @@
 #coding:utf-8
 import piexel
 import re
 class Tokenizer:
 	def __init__(self, conf, api):
 		self.conf = conf
 		self.api = api
 		self.reload_tokens()
 	def reload_tokens(self):
 		self.tk = self.api.get_tokens()
 		self.steps = list(set(t['step'] for t in self.tk))
 		self.steps.sort()
 	def get_tokens_step(self, step):
 		return [t for t in self.tk if t['step'] == step]
 	def tokenise(self, filename):
 		found = {}
 		for step in self.steps:
 			for tok in self.get_tokens_step(step):
 				if(not bool(tk['case_sensitive'])):
 					reg = re.compile(tok['token'], re.IGNORECASE)
 				else:
 					reg = re.compile(tok['token'])
 				if reg.match(filename):
 					if(tok['lang']):
 						found['lang'] = tok['lang']
 					if(tok['quality']):
 						found['quality'] = tok['quality']
 					if(tok['subtitle']):
 						found['subtitles'] = tok['subtitle']
 					reg.sub(' ', filename)
 		return filename, found