From 15e3aaeabfcb0baeb99d694c0b44310c86742f82 Mon Sep 17 00:00:00 2001 From: gryf Date: Wed, 2 Apr 2014 11:27:15 +0200 Subject: [PATCH] Update of scan object, added commandline interface --- cmdcatalog.py | 215 ++++++++++++++++++++++++++++++++++++++++ pygtktalog/__init__.py | 10 +- pygtktalog/dbcommon.py | 2 +- pygtktalog/dbobjects.py | 33 +++--- pygtktalog/logger.py | 46 +++++++-- pygtktalog/scan.py | 103 +++++++++++++++++-- pygtktalog/thumbnail.py | 2 +- pygtktalog/video.py | 2 +- src/lib/img.py | 5 +- src/lib/thumbnail.py | 4 +- 10 files changed, 375 insertions(+), 47 deletions(-) create mode 100755 cmdcatalog.py diff --git a/cmdcatalog.py b/cmdcatalog.py new file mode 100755 index 0000000..9442071 --- /dev/null +++ b/cmdcatalog.py @@ -0,0 +1,215 @@ +#!/usr/bin/env python +import os +import sys +from argparse import ArgumentParser + +from pygtktalog import scan +from pygtktalog.dbobjects import File +from pygtktalog.dbcommon import connect, Session + +BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE = range(8) + +RESET_SEQ = "\033[0m" +COLOR_SEQ = "\033[1;%dm" +BOLD_SEQ = "\033[1m" + + +def cprint(txt, color): + color_map = {"black": BLACK, + "red": RED, + "green": GREEN, + "yellow": YELLOW, + "blue": BLUE, + "magenta": MAGENTA, + "cyan": CYAN, + "white": WHITE} + print COLOR_SEQ % (30 + color_map[color]) + txt + RESET_SEQ + + +class Iface(object): + def __init__(self, dbname, pretend=False, debug=False): + self.engine = connect(dbname) + self.sess = Session() + self.dry_run = pretend + self.root = None + if debug: + scan.LOG.setLevel("DEBUG") + + def close(self): + self.sess.commit() + self.sess.close() + + # def create(self): + # self.sess.commit() + # self.sess.close() + + def _resolve_path(self, path): + if not path.startswith("/"): + raise AttributeError("Path have to start with slash (/)") + + last_node = self.root + for part in path.split("/"): + if not part.strip(): + continue + + for node in last_node.children: + if node.filename == part: + last_node = node + break + else: + raise AttributeError("No such path: %s" % path) + + return last_node + + def _make_path(self, node): + if node.parent == node: + return "/" + + path = [] + path.append(node.filename) + while node.parent != self.root: + path.append(node.parent.filename) + node = node.parent + + return "/".join([""] + path[::-1]) + + def list(self, path=None): + self.root = self.sess.query(File).filter(File.type==0).first() + if path: + node = self._resolve_path(path) + msg = "Content of path `%s':" % path + else: + node = self.root + msg = "Content of path `/':" + + cprint(msg, "white") + for node in node.children: + if node != self.root: + #if __debug__: + # print " %d:" % node.id, self._make_path(node) + #else: + print " ", self._make_path(node) + + def update(self, path, dir_to_update=None): + self.root = self.sess.query(File).filter(File.type==0).first() + node = self._resolve_path(path) + if node == self.root: + cprint("Cannot update entire db, since root was provided as path.", + "red") + return + + if not dir_to_update: + dir_to_update = os.path.join(node.filepath, node.filename) + + if not os.path.exists(dir_to_update): + raise OSError("Path to updtate doesn't exists: %s", dir_to_update) + + cprint("Updating node `%s' against directory " + "`%s'" % (path, dir_to_update), "white") + if not self.dry_run: + scanob = scan.Scan(dir_to_update) + # scanob.update_files(node.id) + scanob.update_files(node.id, self.engine) + + def create(self, dir_to_add): + self.root = File() + self.root.id = 1 + self.root.filename = 'root' + self.root.size = 0 + self.root.source = 0 + self.root.type = 0 + self.root.parent_id = 1 + if not self.dry_run: + self.sess.add(self.root) + self.sess.commit() + + cprint("Creating new db against directory `%s'" % dir_to_add, "white") + if not self.dry_run: + scanob = scan.Scan(dir_to_add) + scanob.add_files(self.engine) + + +def list_db(args): + if not os.path.exists(args.db): + cprint("File `%s' does not exists!" % args.db, "red") + sys.exit(1) + + obj = Iface(args.db, False, args.debug) + obj.list(path=args.path) + obj.close() + + +def update_db(args): + if not os.path.exists(args.db): + cprint("File `%s' does not exists!" % args.db, "red") + sys.exit(1) + + obj = Iface(args.db, args.pretend, args.debug) + obj.update(args.path, dir_to_update=args.dir_to_update) + obj.close() + +def create_db(args): + if os.path.exists(args.db): + cprint("File `%s' exists!" % args.db, "yellow") + + obj = Iface(args.db, args.pretend, args.debug) + obj.create(args.dir_to_add) + obj.close() + + +if __name__ == "__main__": + parser = ArgumentParser() + + subparser = parser.add_subparsers() + list_ = subparser.add_parser("list") + list_.add_argument("db") + list_.add_argument("path", nargs="?") + list_.add_argument("-d", "--debug", help="Turn on debug", + action="store_true", default=False) + list_.set_defaults(func=list_db) + + update = subparser.add_parser("update") + update.add_argument("db") + update.add_argument("path") + update.add_argument("dir_to_update", nargs="?") + update.add_argument("-p", "--pretend", help="Don't do the action, just " + "give the info what would gonna to happen.", + action="store_true", default=False) + update.add_argument("-d", "--debug", help="Turn on debug", + action="store_true", default=False) + update.set_defaults(func=update_db) + + create = subparser.add_parser("create") + create.add_argument("db") + create.add_argument("dir_to_add") + create.add_argument("-p", "--pretend", help="Don't do the action, just " + "give the info what would gonna to happen.", + action="store_true", default=False) + create.add_argument("-d", "--debug", help="Turn on debug", + action="store_true", default=False) + create.set_defaults(func=create_db) + + args = parser.parse_args() + args.func(args) + + +""" +db_file = "/home/gryf/spisy/xxx.sqlite" +connect(db_file) +sess = Session() + +#if not sess.query(File).get(1): +# root = File() +# root.id = 1 +# root.filename = 'root' +# root.size = 0 +# root.source = 0 +# t.type = 0 +# root.parent_id = 1 +# sess.add(root) +# sess.commit() + +f = "/mnt/hardtwo/XXX/" +scanob = scan.Scan(f) +scanob.update_files(2) +""" diff --git a/pygtktalog/__init__.py b/pygtktalog/__init__.py index 4775c87..0446619 100644 --- a/pygtktalog/__init__.py +++ b/pygtktalog/__init__.py @@ -21,7 +21,7 @@ import __builtin__ import gtk.glade -from logger import get_logger +from pygtktalog.logger import get_logger __all__ = ['controllers', @@ -59,7 +59,7 @@ for module in gtk.glade, gettext: __builtin__._ = gettext.gettext # wrap errors into usefull message -def log_exception(exc_type, exc_val, traceback): - get_logger(__name__).error(exc_val) - -sys.excepthook = log_exception +#def log_exception(exc_type, exc_val, traceback): +# get_logger(__name__).error(exc_val) +# +#sys.excepthook = log_exception diff --git a/pygtktalog/dbcommon.py b/pygtktalog/dbcommon.py index 65151f6..1471863 100644 --- a/pygtktalog/dbcommon.py +++ b/pygtktalog/dbcommon.py @@ -42,4 +42,4 @@ def connect(filename=None): engine = create_engine(connect_string) Meta.bind = engine Meta.create_all(checkfirst=True) - + return engine diff --git a/pygtktalog/dbobjects.py b/pygtktalog/dbobjects.py index 986474a..853b7a8 100644 --- a/pygtktalog/dbobjects.py +++ b/pygtktalog/dbobjects.py @@ -52,16 +52,16 @@ def mk_paths(fname): class File(Base): __tablename__ = "files" id = Column(Integer, Sequence("file_id_seq"), primary_key=True) - parent_id = Column(Integer, ForeignKey("files.id")) + parent_id = Column(Integer, ForeignKey("files.id"), index=True) filename = Column(Text) filepath = Column(Text) date = Column(DateTime) size = Column(Integer) - type = Column(Integer) + type = Column(Integer, index=True) source = Column(Integer) note = Column(Text) description = Column(Text) - checksum = Column(Text) + # checksum = Column(Text) children = relation('File', backref=backref('parent', remote_side="File.id"), @@ -102,16 +102,17 @@ class File(Base): else: return [] - def mk_checksum(self): - if not (self.filename and self.filepath): - return + # def mk_checksum(self): + # if not (self.filename and self.filepath): + # return - full_name = os.path.join(self.filepath, self.filename) + # full_name = os.path.join(self.filepath, self.filename) - if os.path.isfile(full_name): - fd = open(full_name) - self.checksum = sha256(fd.read(10*1024*1024)).hexdigest() - fd.close() + # SLOW! + # if os.path.isfile(full_name): + # fd = open(full_name) + # self.checksum = sha256(fd.read(10*1024*1024)).hexdigest() + # fd.close() class Group(Base): __tablename__ = "groups" @@ -130,7 +131,7 @@ class Group(Base): class Tag(Base): __tablename__ = "tags" id = Column(Integer, Sequence("tags_id_seq"), primary_key=True) - group_id = Column(Integer, ForeignKey("groups.id")) + group_id = Column(Integer, ForeignKey("groups.id"), index=True) tag = Column(Text) group = relation('Group', backref=backref('tags', remote_side="Group.id")) @@ -147,7 +148,7 @@ class Tag(Base): class Thumbnail(Base): __tablename__ = "thumbnails" id = Column(Integer, Sequence("thumbnail_id_seq"), primary_key=True) - file_id = Column(Integer, ForeignKey("files.id")) + file_id = Column(Integer, ForeignKey("files.id"), index=True) filename = Column(Text) def __init__(self, filename=None, file_obj=None): @@ -183,7 +184,7 @@ class Thumbnail(Base): class Image(Base): __tablename__ = "images" id = Column(Integer, Sequence("images_id_seq"), primary_key=True) - file_id = Column(Integer, ForeignKey("files.id")) + file_id = Column(Integer, ForeignKey("files.id"), index=True) filename = Column(Text) def __init__(self, filename=None, file_obj=None, move=True): @@ -256,7 +257,7 @@ class Image(Base): class Exif(Base): __tablename__ = "exif" id = Column(Integer, Sequence("exif_id_seq"), primary_key=True) - file_id = Column(Integer, ForeignKey("files.id")) + file_id = Column(Integer, ForeignKey("files.id"), index=True) camera = Column(Text) date = Column(Text) aperture = Column(Text) @@ -293,7 +294,7 @@ class Exif(Base): class Gthumb(Base): __tablename__ = "gthumb" id = Column(Integer, Sequence("gthumb_id_seq"), primary_key=True) - file_id = Column(Integer, ForeignKey("files.id")) + file_id = Column(Integer, ForeignKey("files.id"), index=True) note = Column(Text) place = Column(Text) date = Column(DateTime) diff --git a/pygtktalog/logger.py b/pygtktalog/logger.py index e445dba..36fb674 100644 --- a/pygtktalog/logger.py +++ b/pygtktalog/logger.py @@ -27,6 +27,23 @@ COLORS = {'WARNING': YELLOW, 'CRITICAL': WHITE, 'ERROR': RED} +def cprint(txt, color): + color_map = {"black": BLACK, + "red": RED, + "green": GREEN, + "yellow": YELLOW, + "blue": BLUE, + "magenta": MAGENTA, + "cyan": CYAN, + "white": WHITE} + print COLOR_SEQ % (30 + color_map[color]) + txt + RESET_SEQ + + +class DummyFormater(logging.Formatter): + """Just don't output anything""" + def format(self, record): + return "" + class ColoredFormatter(logging.Formatter): def __init__(self, msg, use_color=True): @@ -41,10 +58,12 @@ class ColoredFormatter(logging.Formatter): record.levelname = levelname_color return logging.Formatter.format(self, record) +log_obj = None #def get_logger(module_name, level='INFO', to_file=False): -def get_logger(module_name, level='DEBUG', to_file=True): -#def get_logger(module_name, level='INFO', to_file=True): +#def get_logger(module_name, level='DEBUG', to_file=True): +def get_logger(module_name, level='INFO', to_file=True, to_console=True): +# def get_logger(module_name, level='DEBUG', to_file=True, to_console=True): #def get_logger(module_name, level='DEBUG', to_file=False): """ Prepare and return log object. Standard formatting is used for all logs. @@ -59,18 +78,21 @@ def get_logger(module_name, level='DEBUG', to_file=True): """ path = os.path.join(os.path.expanduser("~"), ".pygtktalog", "app.log") - #path = "/dev/null" + log = logging.getLogger(module_name) log.setLevel(LEVEL[level]) - console_handler = logging.StreamHandler(sys.stderr) - console_formatter = ColoredFormatter("%(filename)s:%(lineno)s - " - "%(levelname)s - %(message)s") - console_handler.setFormatter(console_formatter) + if to_console: + #path = "/dev/null" - log.addHandler(console_handler) + console_handler = logging.StreamHandler(sys.stderr) + console_formatter = ColoredFormatter("%(filename)s:%(lineno)s - " + "%(levelname)s - %(message)s") + console_handler.setFormatter(console_formatter) - if to_file: + log.addHandler(console_handler) + + elif to_file: file_handler = logging.FileHandler(path) file_formatter = logging.Formatter("%(asctime)s %(levelname)6s " "%(filename)s: %(lineno)s - " @@ -78,5 +100,11 @@ def get_logger(module_name, level='DEBUG', to_file=True): file_handler.setFormatter(file_formatter) file_handler.setLevel(LEVEL[level]) log.addHandler(file_handler) + else: + devnull = open(os.devnull, "w") + dummy_handler = logging.StreamHandler(devnull) + dummy_formatter = DummyFormater("") + dummy_handler.setFormatter(dummy_formatter) + log.addHandler(dummy_handler) return log diff --git a/pygtktalog/scan.py b/pygtktalog/scan.py index 91ef245..626841f 100644 --- a/pygtktalog/scan.py +++ b/pygtktalog/scan.py @@ -55,7 +55,7 @@ class Scan(object): self.files_count = self._get_files_count() self.current_count = 0 - def add_files(self): + def add_files(self, engine=None): """ Returns list, which contain object, modification date and file size. @@ -83,7 +83,41 @@ class Scan(object): self._session.commit() return self._files - def update_files(self, node_id): + def get_all_children(self, node_id, engine): + """ + Get children by pure SQL + + Starting from sqlite 3.8.3 it is possile to do this operation as a + one query using WITH statement. For now on it has to be done in + application. + """ + SQL = "select id from files where parent_id=? and type=1" + SQL2 = "select id from files where parent_id in (%s)" + + row = ((node_id,),) + all_ids = [] + def req(obj): + for line in obj: + all_ids.append(line[0]) + res = engine.execute(SQL, (line[0],)).fetchall() + if res: + req(res) + + req(row) + + sql = SQL2 % ",".join("?" * len(all_ids)) + res = engine.execute(sql, tuple(all_ids)).fetchall() + + all_obj = [] + for row in res: + all_obj.append(self._session + .query(File) + .filter(File.id == row[0]) + .first()) + + return all_obj + + def update_files(self, node_id, engine=None): """ Updtate DB contents of provided node. """ @@ -95,16 +129,30 @@ class Scan(object): parent = old_node.parent self._files = [] - self._existing_branch = old_node.get_all_children() + + if engine: + LOG.debug("Getting all File objects via SQL") + self._existing_branch = self.get_all_children(node_id, engine) + else: + LOG.debug("Getting all File objects via ORM (yeah, it SLOW)") + self._existing_branch = old_node.get_all_children() + self._existing_branch.insert(0, old_node) # Break the chain of parent-children relations + LOG.debug("Make them orphans") for fobj in self._existing_branch: fobj.parent = None update_path = os.path.join(old_node.filepath, old_node.filename) + # gimme a string. unicode can't handle strange filenames in paths, so + # in case of such, better get me a byte string. It is not perfect + # though, since it WILL crash if the update_path would contain some + # unconvertable characters. + update_path = update_path.encode("utf-8") # refresh objects + LOG.debug("Refreshing objects") self._get_all_files() LOG.debug("path for update: %s" % update_path) @@ -142,11 +190,11 @@ class Scan(object): for root, dirs, files in os.walk(path): for fname in files: try: - size += os.stat(os.path.join(root, fname)).st_size + size += os.lstat(os.path.join(root, fname)).st_size except OSError: LOG.warning("Cannot access file " "%s" % os.path.join(root, fname)) - + LOG.debug("_get_dirsize, %s: %d", path, size) return size def _gather_information(self, fobj): @@ -175,7 +223,7 @@ class Scan(object): elif ext and ext in extdict: mimedict[extdict[ext]](fobj, fp) else: - LOG.debug("Filetype not supported " + str(mimeinfo) + " " + fp) + LOG.debug("Filetype not supported %s %s", str(mimeinfo), fp) pass def _audio(self, fobj, filepath): @@ -250,8 +298,10 @@ class Scan(object): """ fullpath = os.path.join(path, fname) - fname = fname.decode(sys.getfilesystemencoding()) - path = path.decode(sys.getfilesystemencoding()) + fname = fname.decode(sys.getfilesystemencoding(), + errors="replace") + path = path.decode(sys.getfilesystemencoding(), + errors="replace") if ftype == TYPE['link']: fname = fname + " -> " + os.readlink(fullpath) @@ -276,7 +326,8 @@ class Scan(object): fobj.type = fob['ftype'] else: fobj = File(**fob) - fobj.mk_checksum() + # SLOW. Don;t do this. Checksums has no value eventually + # fobj.mk_checksum() if parent is None: fobj.parent_id = 1 @@ -287,6 +338,33 @@ class Scan(object): return fobj + def _non_recursive(self, parent, fname, path, size): + """ + Do the walk through the file system. Non recursively, since it's + slow as hell. + @Arguments: + @parent - directory File object which is parent for the current + scope + @fname - string that hold filename + @path - full path for further scanning + @size - size of the object + """ + fullpath = os.path.join(path, fname) + parent = self._mk_file(fname, path, parent, TYPE['dir']) + parent.size = 0 + parent.type = TYPE['dir'] + + for root, dirs, files in os.walk(fullpath): + for dir_ in dirs: + pass + + for file_ in files: + self.current_count += 1 + stat = os.lstat(os.path.join(root, file_)) + parent.size += stat.st_size + + # TODO: finish that up + def _recursive(self, parent, fname, path, size): """ Do the walk through the file system @@ -307,6 +385,9 @@ class Scan(object): parent.size = self._get_dirsize(fullpath) parent.type = TYPE['dir'] + LOG.info("Scanning `%s' [%s/%s]", fullpath, self.current_count, + self.files_count) + root, dirs, files = os.walk(fullpath).next() for fname in files: fpath = os.path.join(root, fname) @@ -402,7 +483,7 @@ class Scan(object): def _get_files_count(self): count = 0 - for root, dirs, files in os.walk(self.path): + for root, dirs, files in os.walk(str(self.path)): count += len(files) LOG.debug("count of files: %s", count) return count @@ -470,7 +551,7 @@ class asdScan(object): try: root, dirs, files = os.walk(path).next() except: - LOG.warning("Cannot access ", path) + LOG.warning("Cannot access %s", path) return 0 ############# diff --git a/pygtktalog/thumbnail.py b/pygtktalog/thumbnail.py index c0bf7fb..f496500 100644 --- a/pygtktalog/thumbnail.py +++ b/pygtktalog/thumbnail.py @@ -9,7 +9,7 @@ import os from tempfile import mkstemp -import Image +from PIL import Image from pygtktalog.logger import get_logger from pygtktalog import EXIF diff --git a/pygtktalog/video.py b/pygtktalog/video.py index 694692d..65b7b8a 100644 --- a/pygtktalog/video.py +++ b/pygtktalog/video.py @@ -11,7 +11,7 @@ import shutil from tempfile import mkdtemp, mkstemp import math -import Image +from PIL import Image from pygtktalog.misc import float_to_string from pygtktalog.logger import get_logger diff --git a/src/lib/img.py b/src/lib/img.py index 014ff60..1fa4a75 100644 --- a/src/lib/img.py +++ b/src/lib/img.py @@ -26,7 +26,8 @@ from shutil import copy from os import path from hashlib import sha512 -import Image +from PIL import Image + class Img(object): @@ -46,7 +47,7 @@ class Img(object): image_filename = path.join(self.base, self.sha512) thumbnail = path.join(self.base, self.sha512 + "_t") - + # check wheter image already exists if path.exists(image_filename) and path.exists(thumbnail): if __debug__: diff --git a/src/lib/thumbnail.py b/src/lib/thumbnail.py index ae608dc..e3cd3cf 100644 --- a/src/lib/thumbnail.py +++ b/src/lib/thumbnail.py @@ -28,8 +28,10 @@ from shutil import move from os import path import sys +from PIL import Image + from lib import EXIF -import Image + class Thumbnail(object): """Class for generate/extract thumbnail from image file"""