From de247e9bc8f2ef9a6a9837cb5d0a0a065f114813 Mon Sep 17 00:00:00 2001 From: gryf Date: Tue, 21 Feb 2012 21:13:47 +0100 Subject: [PATCH] Added branch with implementation for images in db --- convert_1.x_to_2.x.py | 8 +- pavement.py | 21 +++- pygtktalog/__init__.py | 9 ++ pygtktalog/dbobjects.py | 195 ++++++++++++++++++----------- pygtktalog/logger.py | 68 +++++----- pygtktalog/scan.py | 268 +++++++++++++++++++++++++++++++++------- pygtktalog/thumbnail.py | 44 +++---- pygtktalog/video.py | 76 ++++++++++-- src/lib/thumbnail.py | 94 +++++--------- test/unit/scan_test.py | 20 +-- 10 files changed, 535 insertions(+), 268 deletions(-) diff --git a/convert_1.x_to_2.x.py b/convert_1.x_to_2.x.py index c988757..301d85b 100755 --- a/convert_1.x_to_2.x.py +++ b/convert_1.x_to_2.x.py @@ -19,6 +19,7 @@ from pygtktalog.dbobjects import File, Exif, Group, Gthumb from pygtktalog.dbobjects import Image, Tag, Thumbnail from pygtktalog.dbcommon import connect + def create_schema(cur): pass @@ -29,16 +30,18 @@ def create_temporary_db_file(): os.close(fd) return fname + def connect_to_db(filename): """initialize db connection and store it in class attributes""" - db_connection = sqlite.connect(filename, detect_types=sqlite.PARSE_DECLTYPES|sqlite.PARSE_COLNAMES) + db_connection = sqlite.connect(filename, \ + detect_types=sqlite.PARSE_DECLTYPES | sqlite.PARSE_COLNAMES) db_cursor = db_connection.cursor() return db_connection, db_cursor + def opendb(filename=None): """try to open db file""" db_tmp_path = create_temporary_db_file() - compressed = False try: test_file = open(filename).read(15) @@ -57,7 +60,6 @@ def opendb(filename=None): curdb.write(open_file.read()) curdb.close() open_file.close() - compressed = True except IOError: # file is not bz2 os.unlink(db_tmp_path) diff --git a/pavement.py b/pavement.py index b6b99a0..0b1b841 100644 --- a/pavement.py +++ b/pavement.py @@ -1,7 +1,7 @@ """ Project: pyGTKtalog Description: Makefile and setup.py replacement. Used python packages - - paver, nosetests. External commands - xgettext, intltool-extract, svn, + paver, nosetests. External commands - xgettext, intltool-extract, hg, grep. Type: management Author: Roman 'gryf' Dobosz, gryf73@gmail.com @@ -37,7 +37,7 @@ msgstr "" "Content-Transfer-Encoding: utf-8\\n" """ -REV = os.popen("svn info 2>/dev/null|grep ^Revis|cut -d ' ' -f 2").readlines() +REV = os.popen("hg sum 2>/dev/null|grep ^Revis|cut -d ' ' -f 2").readlines() if REV: REV = "r" + REV[0].strip() else: @@ -77,7 +77,7 @@ setup( exclude_package_data={'': ['*.patch']}, packages=["pygtktalog"], scripts=['bin/gtktalog.py'], - test_suite = 'nose.collector' + test_suite='nose.collector' ) options(sphinx=Bunch(builddir="build", sourcedir="source")) @@ -89,6 +89,7 @@ def sdist(): """sdist with message catalogs""" call_task("setuptools.command.sdist") + @task @needs(['locale_gen']) def build(): @@ -103,11 +104,13 @@ def clean(): for root, dummy, files in os.walk("."): for fname in files: if fname.endswith(".pyc") or fname.endswith(".pyo") or \ - fname.endswith("~") or fname.endswith(".h"): + fname.endswith("~") or fname.endswith(".h") or \ + fname == '.coverage': fdel = os.path.join(root, fname) os.unlink(fdel) print "deleted", fdel + @task @needs(["clean"]) def distclean(): @@ -123,6 +126,7 @@ def distclean(): os.unlink(filename) print "deleted", filename + @task def run(): """run application""" @@ -130,6 +134,7 @@ def run(): #import gtktalog #gtktalog.run() + @task def pot(): """generate 'pot' file out of python/glade files""" @@ -150,7 +155,8 @@ def pot(): sh(cmd % (POTFILE, os.path.join(root, fname))) elif fname.endswith(".glade"): sh(cmd_glade % os.path.join(root, fname)) - sh(cmd % (POTFILE, os.path.join(root, fname+".h"))) + sh(cmd % (POTFILE, os.path.join(root, fname + ".h"))) + @task @needs(['pot']) @@ -165,6 +171,7 @@ def locale_merge(): else: shutil.copy(potfile, msg_catalog) + @task @needs(['locale_merge']) def locale_gen(): @@ -183,6 +190,7 @@ def locale_gen(): msg_catalog = os.path.join('locale', "%s.po" % lang) sh('msgfmt %s -o %s' % (msg_catalog, catalog_file)) + if HAVE_LINT: @task def pylint(): @@ -190,6 +198,7 @@ if HAVE_LINT: pylintopts = ['pygtktalog'] dry('pylint %s' % (" ".join(pylintopts)), lint.Run, pylintopts) + @task @cmdopts([('coverage', 'c', 'display coverage information')]) def test(options): @@ -199,6 +208,7 @@ def test(options): cmd += " --with-coverage --cover-package pygtktalog" os.system(cmd) + @task @needs(['locale_gen']) def runpl(): @@ -216,4 +226,3 @@ def _setup_env(): sys.path.insert(0, this_path) return this_path - diff --git a/pygtktalog/__init__.py b/pygtktalog/__init__.py index 6d61e92..4775c87 100644 --- a/pygtktalog/__init__.py +++ b/pygtktalog/__init__.py @@ -14,12 +14,15 @@ __web__ = "http://bitbucket.org/gryf" __logo_img__ = "views/pixmaps/Giant Worms.png" import os +import sys import locale import gettext import __builtin__ import gtk.glade +from logger import get_logger + __all__ = ['controllers', 'models', @@ -54,3 +57,9 @@ for module in gtk.glade, gettext: # register the gettext function for the whole interpreter as "_" __builtin__._ = gettext.gettext + +# wrap errors into usefull message +def log_exception(exc_type, exc_val, traceback): + get_logger(__name__).error(exc_val) + +sys.excepthook = log_exception diff --git a/pygtktalog/dbobjects.py b/pygtktalog/dbobjects.py index dde3ffe..5b1342a 100644 --- a/pygtktalog/dbobjects.py +++ b/pygtktalog/dbobjects.py @@ -6,16 +6,15 @@ Created: 2009-08-07 """ import os -import errno -import shutil -import uuid +from cStringIO import StringIO +from hashlib import sha256 -from sqlalchemy import Column, Table, Integer, Text -from sqlalchemy import DateTime, ForeignKey, Sequence +from sqlalchemy import Column, Table, Integer, Text, Binary, \ + DateTime, ForeignKey, Sequence from sqlalchemy.orm import relation, backref from pygtktalog.dbcommon import Base -from pygtktalog import thumbnail +from pygtktalog.thumbnail import ThumbCreator IMG_PATH = "/home/gryf/.pygtktalog/imgs/" # FIXME: should be configurable @@ -24,6 +23,7 @@ tags_files = Table("tags_files", Base.metadata, Column("file_id", Integer, ForeignKey("files.id")), Column("tag_id", Integer, ForeignKey("tags.id"))) +TYPE = {'root': 0, 'dir': 1, 'file': 2, 'link': 3} class File(Base): __tablename__ = "files" @@ -37,13 +37,15 @@ class File(Base): source = Column(Integer) note = Column(Text) description = Column(Text) + checksum = Column(Text) + thumbnail = Column(Binary) children = relation('File', backref=backref('parent', remote_side="File.id"), order_by=[type, filename]) tags = relation("Tag", secondary=tags_files, order_by="Tag.tag") - thumbnail = relation("Thumbnail", backref="file") - images = relation("Image", backref="file", order_by="Image.filename") + #thumbnail = relation("Thumbnail", backref="file") + images = relation("Image", backref="file") def __init__(self, filename=None, path=None, date=None, size=None, ftype=None, src=None): @@ -58,6 +60,35 @@ class File(Base): def __repr__(self): return "" % (str(self.filename), str(self.id)) + def generate_checksum(self): + """ + Generate checksum of first 10MB of the file + """ + if self.type != TYPE['file']: + return + + buf = open(os.path.join(self.filepath, self.filename)).read(10485760) + self.checksum = sha256(buf).hexdigest() + + def get_all_children(self): + """ + Return list of all node direct and indirect children + """ + def _recursive(node): + children = [] + if node.children: + for child in node.children: + children += _recursive(child) + if node != self: + children.append(node) + + return children + + if self.children: + return _recursive(self) + else: + return [] + class Group(Base): __tablename__ = "groups" @@ -90,54 +121,66 @@ class Tag(Base): return "" % (str(self.tag), str(self.id)) -class Thumbnail(Base): - __tablename__ = "thumbnails" - id = Column(Integer, Sequence("thumbnail_id_seq"), primary_key=True) - file_id = Column(Integer, ForeignKey("files.id")) - filename = Column(Text) - - def __init__(self, filename=None, file_obj=None): - self.filename = filename - self.file = file_obj - if self.filename: - self.save(self.filename) - - def save(self, fname): - """ - Create file related thumbnail, add it to the file object. - """ - new_name = str(uuid.uuid1()).split("-") - try: - os.makedirs(os.path.join(IMG_PATH, *new_name[:-1])) - except OSError as exc: - if exc.errno != errno.EEXIST: - raise - - ext = os.path.splitext(self.filename)[1] - if ext: - new_name.append("".join([new_name.pop(), ext])) - - thumb = thumbnail.Thumbnail(self.filename).save() - name, ext = os.path.splitext(new_name.pop()) - new_name.append("".join([name, "_t", ext])) - self.filename = os.path.sep.join(new_name) - shutil.move(thumb.save(), os.path.join(IMG_PATH, *new_name)) - - def __repr__(self): - return "" % (str(self.filename), str(self.id)) - +#class Thumbnail(Base): +# __tablename__ = "thumbnails" +# id = Column(Integer, Sequence("thumbnail_id_seq"), primary_key=True) +# file_id = Column(Integer, ForeignKey("files.id")) +# filename = Column(Text) +# +# def __init__(self, filename=None, file_obj=None): +# self.filename = filename +# self.file = file_obj +# if self.filename: +# self.save(self.filename) +# +# def save(self, fname): +# """ +# Create file related thumbnail, add it to the file object. +# """ +# new_name = sha1(str(uuid1())).hexdigest() +# new_name = [new_name[start:start+10] for start in range(0, +# len(new_name), +# 10)] +# try: +# os.makedirs(os.path.join(IMG_PATH, *new_name[:-1])) +# except OSError as exc: +# if exc.errno != errno.EEXIST: +# raise +# +# ext = os.path.splitext(self.filename)[1] +# if ext: +# new_name.append("".join([new_name.pop(), ext])) +# +# thumb = thumbnail.Thumbnail(self.filename) +# thumb_tmp_name = thumb.save() +# name, ext = os.path.splitext(new_name.pop()) +# new_name.append("".join([name, "_t", '.jpg'])) +# self.filename = os.path.sep.join(new_name) +# shutil.move(thumb_tmp_name, os.path.join(IMG_PATH, *new_name)) +# +# def get_copy(self): +# """ +# Create the very same object as self with exception of id field +# """ +# thumb = Thumbnail() +# thumb.filename = self.filename +# return thumb +# +# def __repr__(self): +# return "" % (str(self.filename), str(self.id)) +# class Image(Base): __tablename__ = "images" id = Column(Integer, Sequence("images_id_seq"), primary_key=True) file_id = Column(Integer, ForeignKey("files.id")) - filename = Column(Text) + image = Column(Binary) + thumb = Column(Binary) + checksum = Column(Text) def __init__(self, filename=None, file_obj=None): - self.filename = None self.file = file_obj if filename: - self.filename = filename self.save(filename) def save(self, fname): @@ -145,52 +188,60 @@ class Image(Base): Save and create coressponding thumbnail (note: it differs from file related thumbnail!) """ - new_name = str(uuid.uuid1()).split("-") - try: - os.makedirs(os.path.join(IMG_PATH, *new_name[:-1])) - except OSError as exc: - if exc.errno != errno.EEXIST: - raise + file_buffer = StringIO() - ext = os.path.splitext(self.filename)[1] - if ext: - new_name.append("".join([new_name.pop(), ext])) + with open(fname) as f: + file_buffer.write(f.read()) - shutil.move(self.filename, os.path.join(IMG_PATH, *new_name)) + self.image = file_buffer.getvalue() + self.checksum = sha256(file_buffer.getvalue()).hexdigest() - self.filename = os.path.sep.join(new_name) + file_buffer.seek(0) + thumb = ThumbCreator(fname).generate() + if thumb: + self.thumb = thumb.getvalue() + thumb.close() - thumb = thumbnail.Thumbnail(os.path.join(IMG_PATH, self.filename)) - name, ext = os.path.splitext(new_name.pop()) - new_name.append("".join([name, "_t", ext])) - shutil.move(thumb.save(), os.path.join(IMG_PATH, *new_name)) + file_buffer.close() def get_copy(self): """ Create the very same object as self with exception of id field """ img = Image() - img.filename = self.filename + img.image = self.image + img.thumb = self.thumb + img.checksum = self.checksum return img @property - def thumbpath(self): + def fthumb(self): """ - Return full path to thumbnail of this image + Return file-like object with thumbnail """ - path, fname = os.path.split(self.filename) - base, ext = os.path.splitext(fname) - return os.path.join(IMG_PATH, path, base + "_t" + ext) + if self.thumb: + buf = StringIO() + buf.write(self.thumb) + buf.seek(0) + return buf + else: + return None @property - def imagepath(self): + def fimage(self): """ - Return full path to image + Return file-like object with image """ - return os.path.join(IMG_PATH, self.filename) + if self.image: + buf = StringIO() + buf.write(self.image) + buf.seek(0) + return buf + else: + return None def __repr__(self): - return "" % (str(self.filename), str(self.id)) + return "" % str(self.id) class Exif(Base): diff --git a/pygtktalog/logger.py b/pygtktalog/logger.py index 968488e..4b9fee3 100644 --- a/pygtktalog/logger.py +++ b/pygtktalog/logger.py @@ -9,32 +9,27 @@ import os import sys import logging +LEVEL = {'DEBUG': logging.DEBUG, + 'INFO': logging.INFO, + 'WARN': logging.WARN, + 'ERROR': logging.ERROR, + 'CRITICAL': logging.CRITICAL} + BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE = range(8) -# The background is set with 40 plus the number of the color, and the -# foreground with 30 - -#These are the sequences need to get colored ouput RESET_SEQ = "\033[0m" COLOR_SEQ = "\033[1;%dm" BOLD_SEQ = "\033[1m" -def formatter_message(message, use_color = True): - if use_color: - message = message.replace("$RESET", RESET_SEQ).replace("$BOLD", - BOLD_SEQ) - else: - message = message.replace("$RESET", "").replace("$BOLD", "") - return message - COLORS = {'WARNING': YELLOW, 'INFO': GREEN, 'DEBUG': BLUE, 'CRITICAL': WHITE, 'ERROR': RED} + class ColoredFormatter(logging.Formatter): - def __init__(self, msg, use_color = True): + def __init__(self, msg, use_color=True): logging.Formatter.__init__(self, msg) self.use_color = use_color @@ -45,45 +40,42 @@ class ColoredFormatter(logging.Formatter): + levelname + RESET_SEQ record.levelname = levelname_color return logging.Formatter.format(self, record) -LEVEL = {'DEBUG': logging.DEBUG, - 'INFO': logging.INFO, - 'WARN': logging.WARN, - 'ERROR': logging.ERROR, - 'CRITICAL': logging.CRITICAL} -#def get_logger(module_name, level=None, to_file=True): -def get_logger(module_name, level=None, to_file=False): + +#def get_logger(module_name, level='INFO', to_file=False): +#def get_logger(module_name, level='DEBUG', to_file=True): +def get_logger(module_name, level='DEBUG', to_file=False): """ Prepare and return log object. Standard formatting is used for all logs. Arguments: @module_name - String name for Logger object. @level - Log level (as string), one of DEBUG, INFO, WARN, ERROR and CRITICAL. - @to_file - If True, stores log in file inside .pygtktalog config - directory, otherwise log is redirected to stderr. + @to_file - If True, additionally stores full log in file inside + .pygtktalog config directory and to stderr, otherwise log + is only redirected to stderr. Returns: object of logging.Logger class """ path = os.path.join(os.path.expanduser("~"), ".pygtktalog", "app.log") - path = "/dev/null" + #path = "/dev/null" log = logging.getLogger(module_name) + log.setLevel(LEVEL[level]) - if not level: - #log.setLevel(LEVEL['WARN']) - log.setLevel(LEVEL['DEBUG']) - else: - log.setLevel(LEVEL[level]) + console_handler = logging.StreamHandler(sys.stderr) + console_formatter = ColoredFormatter("%(filename)s:%(lineno)s - " + "%(levelname)s - %(message)s") + console_handler.setFormatter(console_formatter) + + log.addHandler(console_handler) if to_file: - log_handler = logging.FileHandler(path) - formatter = logging.Formatter("%(asctime)s %(filename)s:%(lineno)s - " - "%(levelname)s - %(message)s") - else: - log_handler = logging.StreamHandler(sys.stderr) - formatter = ColoredFormatter("%(filename)s:%(lineno)s - " - "%(levelname)s - %(message)s") + file_handler = logging.FileHandler(path) + file_formatter = logging.Formatter("%(asctime)s %(levelname)6s " + "%(filename)s: %(lineno)s - " + "%(message)s") + file_handler.setFormatter(file_formatter) + file_handler.setLevel(LEVEL[level]) + log.addHandler(file_handler) - log_handler.setFormatter(formatter) - log.addHandler(log_handler) return log - diff --git a/pygtktalog/scan.py b/pygtktalog/scan.py index b726695..c690d48 100644 --- a/pygtktalog/scan.py +++ b/pygtktalog/scan.py @@ -7,16 +7,30 @@ """ import os import sys +import re from datetime import datetime import mimetypes -from pygtktalog.dbobjects import File, Image +from pygtktalog.dbobjects import File, Image, TYPE +from pygtktalog.thumbnail import ThumbCreator from pygtktalog.dbcommon import Session from pygtktalog.logger import get_logger from pygtktalog.video import Video LOG = get_logger(__name__) +PAT = re.compile("(\[[^\]]*\]" + ".*\(\d\d\d\d\))" + "\s[^\[]*\[.{8}\]" + ".[a-zA-Z0-9]*$") + +#PAT = re.compile(r'(?P\[[^\]]*\]\s)?' +# r'(?P.*)\s' +# r'(?P<year>\(\d{4}\))\s' +# r'(?P<kind>.*)' +# r'(?P<checksum>\[[A-Z0-9]{8}\])' +# r'\.(?P<extension>(avi|asf|mpeg|mpg|mp4|ogm|ogv|mkv|mov|wmv' +# r'|rm|rmvb|flv|jpg|png|gif|nfo))\.?(conf)?$') class NoAccessError(Exception): @@ -36,8 +50,11 @@ class Scan(object): self.abort = False self.path = path.rstrip(os.path.sep) self._files = [] - self._existing_files = [] + self._existing_files = [] # for re-use purpose in adding + self._existing_branch = [] # for branch storage, mainly for updating self._session = Session() + self.files_count = self._get_files_count() + self.current_count = 0 def add_files(self): """ @@ -45,6 +62,7 @@ class Scan(object): size. """ self._files = [] + self._existing_branch = [] LOG.debug("given path: %s" % self.path) # See, if file exists. If not it would raise OSError exception @@ -56,7 +74,8 @@ class Scan(object): directory = os.path.basename(self.path) path = os.path.dirname(self.path) - if not self._recursive(None, directory, path, 0, 0, 1): + + if not self._recursive(None, directory, path, 0): return None # add only first item from _files, because it is a root of the other, @@ -65,6 +84,52 @@ class Scan(object): self._session.commit() return self._files + def update_files(self, node_id): + """ + Updtate DB contents of provided node. + """ + self.current_count = 0 + old_node = self._session.query(File).get(node_id) + if old_node is None: + LOG.warning("No such object in db: %s", node_id) + return + parent = old_node.parent + + self._files = [] + self._existing_branch = old_node.get_all_children() + self._existing_branch.insert(0, old_node) + + # Break the chain of parent-children relations + for fobj in self._existing_branch: + fobj.parent = None + + update_path = os.path.join(old_node.filepath, old_node.filename) + + # refresh objects + self._get_all_files() + + LOG.debug("path for update: %s" % update_path) + + # See, if file exists. If not it would raise OSError exception + os.stat(update_path) + + if not os.access(update_path, os.R_OK | os.X_OK) \ + or not os.path.isdir(update_path): + raise NoAccessError("Access to %s is forbidden" % update_path) + + directory = os.path.basename(update_path) + path = os.path.dirname(update_path) + + if not self._recursive(parent, directory, path, 0): + return None + + # update branch + #self._session.merge(self._files[0]) + self._session.query(File).filter(File.parent==None).delete() + + self._session.commit() + return self._files + def _get_dirsize(self, path): """ Returns sum of all files under specified path (also in subdirs) @@ -77,8 +142,8 @@ class Scan(object): try: size += os.stat(os.path.join(root, fname)).st_size except OSError: - LOG.info("Cannot access file %s" % \ - os.path.join(root, fname)) + LOG.warning("Cannot access file " + "%s" % os.path.join(root, fname)) return size @@ -96,30 +161,77 @@ class Scan(object): if mimeinfo[0] and mimeinfo[0].split("/")[0] in mimedict.keys(): mimedict[mimeinfo[0].split("/")[0]](fobj, fp) else: - #LOG.info("Filetype not supported " + str(mimeinfo) + " " + fp) + LOG.debug("Filetype not supported " + str(mimeinfo) + " " + fp) pass def _audio(self, fobj, filepath): - #LOG.warning('audio') return def _image(self, fobj, filepath): - #LOG.warning('image') + #Thumbnail(filepath, fobj) return def _video(self, fobj, filepath): """ Make captures for a movie. Save it under uniq name. """ + result = PAT.search(fobj.filename) + if result: + self._check_related(fobj, result.groups()[0]) + vid = Video(filepath) + fobj.description = vid.get_formatted_tags() + preview_fn = vid.capture() - Image(preview_fn, fobj) + if preview_fn: + Image(preview_fn, fobj) + + def _check_related(self, fobj, pattern): + """ + Try to search for related files which belongs to specified File + object and pattern. If found, additional objects are created. + """ + for filen in os.listdir(fobj.filepath): + if pattern in filen and \ + os.path.splitext(filen)[1] in (".jpg", ".png", ".gif"): + full_fname = os.path.join(fobj.filepath, filen) + LOG.debug('found cover file: %s' % full_fname) + + Image(full_fname, fobj) + + if not fobj.thumbnail: + fthumb = ThumbCreator(full_fname).generate() + fobj.thumbnail = fthumb.getvalue() + fthumb.close() + + def _name_matcher(self, fpath, fname, media=False): + """ + Try to match special pattern to filename which may be looks like this: + [aXXo] Batman (1989) [D3ADBEEF].avi + [aXXo] Batman (1989) [D3ADBEEF].avi.conf + [aXXo] Batman (1989) cover [BEEFD00D].jpg + [aXXo] Batman (1989) cover2 [FEEDD00D].jpg + [aXXo] Batman (1989) trailer [B00B1337].avi + or + Batman (1989) [D3ADBEEF].avi (and so on) + + For media=False it will return True for filename, that matches + pattern, and there are at least one corresponding media files (avi, + mpg, mov and so on) _in case the filename differs from media_. This is + usfull for not storing covers, nfo, conf files in the db. + + For kind == 2 it will return all images and other files that should be + gather due to video file examinig as a dict of list (conf, nfo and + images). + """ + # TODO: dokonczyc to na podstawie tego cudowanego patternu u gory. + return def _get_all_files(self): self._existing_files = self._session.query(File).all() - def _mk_file(self, fname, path, parent): + def _mk_file(self, fname, path, parent, ftype=TYPE['file']): """ Create and return File object """ @@ -127,19 +239,41 @@ class Scan(object): fname = fname.decode(sys.getfilesystemencoding()) path = path.decode(sys.getfilesystemencoding()) - fob = File(filename=fname, path=path) - fob.date = datetime.fromtimestamp(os.stat(fullpath).st_mtime) - fob.size = os.stat(fullpath).st_size - fob.parent = parent - fob.type = 2 + + if ftype == TYPE['link']: + fname = fname + " -> " + os.readlink(fullpath) + + fob = {'filename': fname, + 'path': path, + 'ftype': ftype} + try: + fob['date'] = datetime.fromtimestamp(os.stat(fullpath).st_mtime) + fob['size'] = os.stat(fullpath).st_size + except OSError: + # in case of dead softlink, we will have no time and size + fob['date'] = None + fob['size'] = 0 + + fobj = self._get_old_file(fob, ftype) + + if fobj: + LOG.debug("found existing file in db: %s" % str(fobj)) + fobj.size = fob['size'] # TODO: update whole tree sizes (for directories/discs) + fobj.filepath = fob['path'] + fobj.type = fob['ftype'] + else: + fobj = File(**fob) if parent is None: - fob.parent_id = 1 + fobj.parent_id = 1 + else: + fobj.parent = parent - self._files.append(fob) - return fob + self._files.append(fobj) - def _recursive(self, parent, fname, path, date, size, ftype): + return fobj + + def _recursive(self, parent, fname, path, size): """ Do the walk through the file system @Arguments: @@ -147,41 +281,61 @@ class Scan(object): scope @fname - string that hold filename @path - full path for further scanning - @date - @size - size of the object - @ftype - """ if self.abort: return False - LOG.debug("args: fname: %s, path: %s" % (fname, path)) fullpath = os.path.join(path, fname) - parent = self._mk_file(fname, path, parent) - parent.size = self._get_dirsize(fullpath) - parent.type = 1 + parent = self._mk_file(fname, path, parent, TYPE['dir']) + + parent.size = self._get_dirsize(fullpath) + parent.type = TYPE['dir'] - self._get_all_files() root, dirs, files = os.walk(fullpath).next() for fname in files: fpath = os.path.join(root, fname) - fob = self._mk_file(fname, root, parent) + self.current_count += 1 + LOG.debug("Processing %s [%s/%s]", fname, self.current_count, + self.files_count) + + result = PAT.search(fname) + test_ = False + + if result and os.path.splitext(fpath)[1] in ('.jpg', '.gif', + '.png'): + newpat = result.groups()[0] + matching_files = [] + for fn_ in os.listdir(root): + if newpat in fn_: + matching_files.append(fn_) + + if len(matching_files) > 1: + LOG.debug('found cover "%s" in group: %s, skipping', fname, + str(matching_files)) + test_ = True + if test_: + continue + if os.path.islink(fpath): - fob.filename = fob.filename + " -> " + os.readlink(fpath) - fob.type = 3 + fob = self._mk_file(fname, root, parent, TYPE['link']) else: + fob = self._mk_file(fname, root, parent) existing_obj = self._object_exists(fob) + if existing_obj: fob.tags = existing_obj.tags - fob.thumbnail = [th.get_copy \ - for th in existing_obj.thumbnail] + fob.thumbnail = existing_obj.thumbnail fob.images = [img.get_copy() \ - for img in existing_obj.images] + for img in existing_obj.images] # TODO: many-to-many? else: - LOG.debug("gather information") + LOG.debug("gather information for %s", + os.path.join(root, fname)) self._gather_information(fob) size += fob.size - self._existing_files.append(fob) + if fob not in self._existing_files: + self._existing_files.append(fob) for dirname in dirs: dirpath = os.path.join(root, dirname) @@ -191,16 +345,36 @@ class Scan(object): continue if os.path.islink(dirpath): - fob = self._mk_file(dirname, root, parent) - fob.filename = fob.filename + " -> " + os.readlink(dirpath) - fob.type = 3 + fob = self._mk_file(dirname, root, parent, TYPE['link']) else: - LOG.debug("going into %s" % dirname) - self._recursive(parent, dirname, fullpath, date, size, ftype) + LOG.debug("going into %s" % os.path.join(root, dirname)) + self._recursive(parent, dirname, fullpath, size) LOG.debug("size of items: %s" % parent.size) return True + def _get_old_file(self, fdict, ftype): + """ + Search for object with provided data in dictionary in stored branch + (which is updating). Return such object on success, remove it from + list. + """ + for index, obj in enumerate(self._existing_branch): + if ftype == TYPE['link'] and fdict['filename'] == obj.filename: + return self._existing_branch.pop(index) + elif fdict['filename'] == obj.filename and \ + fdict['date'] == obj.date and \ + ftype == TYPE['file'] and \ + fdict['size'] in (obj.size, 0): + obj = self._existing_branch.pop(index) + obj.size = fdict['size'] + return obj + elif fdict['filename'] == obj.filename: + obj = self._existing_branch.pop(index) + obj.size = fdict['date'] + return obj + return False + def _object_exists(self, fobj): """ Perform check if current File object already exists in collection. If @@ -209,16 +383,24 @@ class Scan(object): for efobj in self._existing_files: if efobj.size == fobj.size \ and efobj.type == fobj.type \ - and efobj.date == fobj.date: + and efobj.date == fobj.date \ + and efobj.filename == fobj.filename: return efobj return None + def _get_files_count(self): + count = 0 + for root, dirs, files in os.walk(self.path): + count += len(files) + LOG.debug("count of files: %s", count) + return count + + class asdScan(object): """ Retrieve and identify all files recursively on given path """ def __init__(self, path, tree_model): - LOG.debug("initialization") self.path = path self.abort = False self.label = None @@ -232,7 +414,7 @@ class asdScan(object): self.busy = True # count files in directory tree - LOG.info("Calculating number of files in directory tree...") + LOG.debug("Calculating number of files in directory tree...") step = 0 try: @@ -276,7 +458,7 @@ class asdScan(object): try: root, dirs, files = os.walk(path).next() except: - LOG.debug("cannot access ", path) + LOG.warning("Cannot access ", path) return 0 ############# diff --git a/pygtktalog/thumbnail.py b/pygtktalog/thumbnail.py index 3bb44c8..0e22131 100644 --- a/pygtktalog/thumbnail.py +++ b/pygtktalog/thumbnail.py @@ -1,14 +1,13 @@ """ Project: pyGTKtalog - Description: Create thumbnail for sepcified image + Description: Create thumbnail for sepcified image by its filename Type: lib Author: Roman 'gryf' Dobosz, gryf73@gmail.com Created: 2011-05-15 """ - import os import sys -import shutil +from cStringIO import StringIO from tempfile import mkstemp import Image @@ -20,7 +19,7 @@ from pygtktalog import EXIF LOG = get_logger(__name__) -class Thumbnail(object): +class ThumbCreator(object): """ Class for generate/extract thumbnail from image file """ @@ -28,11 +27,12 @@ class Thumbnail(object): def __init__(self, filename): self.thumb_x = 160 self.thumb_y = 160 - self.filename = filename + self.filename = filename.decode(sys.getfilesystemencoding()) + self.fobj = StringIO() - def save(self): + def generate(self): """ - Save thumbnail into temporary file + Generate and return file-like object with thumbnail """ exif = {} orientations = {2: Image.FLIP_LEFT_RIGHT, # Mirrored horizontal @@ -51,28 +51,30 @@ class Thumbnail(object): os.close(file_desc) if 'JPEGThumbnail' not in exif: - LOG.debug("no exif thumb") + LOG.debug("no exif thumb for file %s; creating." % self.filename) thumb = self._scale_image() if thumb: - thumb.save(thumb_fn, "JPEG") + thumb.save(self.fobj, "JPEG") else: LOG.debug("exif thumb for filename %s" % self.filename) exif_thumbnail = exif['JPEGThumbnail'] - thumb = open(thumb_fn, 'wb') - thumb.write(exif_thumbnail) - thumb.close() + self.fobj.write(exif_thumbnail) + self.fobj.seek(0) - if 'Image Orientation' in exif: - orient = exif['Image Orientation'].values[0] - if orient > 1 and orient in orientations: - thumb_image = Image.open(self.thumb_fn) - tmp_thumb_img = thumb_image.transpose(orientations[orient]) + if 'Image Orientation' in exif: + orient = exif['Image Orientation'].values[0] + if orient > 1 and orient in orientations: + thumb_image = Image.open(self.fobj) + tmp_thumb_img = thumb_image.transpose(orientations[orient]) - if orient in flips: - tmp_thumb_img = tmp_thumb_img.transpose(flips[orient]) + if orient in flips: + tmp_thumb_img = tmp_thumb_img.transpose(flips[orient]) - tmp_thumb_img.save(thumb_fn, 'JPEG') - return thumb_fn + self.fobj.seek(0) + self.fobj.truncate() + tmp_thumb_img.save(self.fobj, 'JPEG') + + return self.fobj def _get_exif(self): """ diff --git a/pygtktalog/video.py b/pygtktalog/video.py index 14dde2e..610743e 100644 --- a/pygtktalog/video.py +++ b/pygtktalog/video.py @@ -13,6 +13,10 @@ import math import Image from pygtktalog.misc import float_to_string +from pygtktalog.logger import get_logger + + +LOG = get_logger("Video") class Video(object): @@ -38,12 +42,13 @@ class Video(object): 'ID_VIDEO_HEIGHT': ['height', int], # length is in seconds 'ID_LENGTH': ['length', lambda x: int(x.split(".")[0])], + 'ID_START_TIME': ['start', self._get_start_pos], 'ID_DEMUXER': ['container', self._return_lower], 'ID_VIDEO_FORMAT': ['video_format', self._return_lower], 'ID_VIDEO_CODEC': ['video_codec', self._return_lower], 'ID_AUDIO_CODEC': ['audio_codec', self._return_lower], 'ID_AUDIO_FORMAT': ['audio_format', self._return_lower], - 'ID_AUDIO_NCH': ['audio_no_channels', int],} + 'ID_AUDIO_NCH': ['audio_no_channels', int]} # TODO: what about audio/subtitle language/existence? for key in output: @@ -51,8 +56,10 @@ class Video(object): self.tags[attrs[key][0]] = attrs[key][1](output[key]) if 'length' in self.tags and self.tags['length'] > 0: - hours = self.tags['length'] / 3600 - seconds = self.tags['length'] - hours * 3600 + start = self.tags.get('start', 0) + length = self.tags['length'] - start + hours = length / 3600 + seconds = length - hours * 3600 minutes = seconds / 60 seconds -= minutes * 60 length_str = "%02d:%02d:%02d" % (hours, minutes, seconds) @@ -70,11 +77,11 @@ class Video(object): other place, otherwise it stays in filesystem. """ - if not (self.tags.has_key('length') and self.tags.has_key('width')): + if not ('length' in self.tags and 'width' in self.tags): # no length or width return None - if not (self.tags['length'] >0 and self.tags['width'] >0): + if not (self.tags['length'] > 0 and self.tags['width'] > 0): # zero length or wight return None @@ -88,7 +95,7 @@ class Video(object): no_pictures = self.tags['length'] / scale if no_pictures > 8: - no_pictures = (no_pictures / 8 ) * 8 # only multiple of 8, please. + no_pictures = (no_pictures / 8) * 8 # only multiple of 8, please. else: # for really short movies no_pictures = 4 @@ -102,6 +109,38 @@ class Video(object): shutil.rmtree(tempdir) return image_fn + def get_formatted_tags(self): + """ + Return formatted tags as a string + """ + out_tags = u'' + if 'container' in self.tags: + out_tags += u"Container: %s\n" % self.tags['container'] + + if 'width' in self.tags and 'height' in self.tags: + out_tags += u"Resolution: %sx%s\n" % (self.tags['width'], + self.tags['height']) + + if 'duration' in self.tags: + out_tags += u"Duration: %s\n" % self.tags['duration'] + + if 'video_codec' in self.tags: + out_tags += "Video codec: %s\n" % self.tags['video_codec'] + + if 'video_format' in self.tags: + out_tags += "Video format: %s\n" % self.tags['video_format'] + + if 'audio_codec' in self.tags: + out_tags += "Audio codec: %s\n" % self.tags['audio_codec'] + + if 'audio_format' in self.tags: + out_tags += "Audio format: %s\n" % self.tags['audio_format'] + + if 'audio_no_channels' in self.tags: + out_tags += "Audio channels: %s\n" % self.tags['audio_no_channels'] + + return out_tags + def _get_movie_info(self): """ Gather movie file information with midentify shell command. @@ -139,18 +178,23 @@ class Video(object): @directory - full output directory name @no_pictures - number of pictures to take """ - step = float(self.tags['length']/(no_pictures + 1)) + step = float(self.tags['length'] / (no_pictures + 1)) current_time = 0 for dummy in range(1, no_pictures + 1): current_time += step time = float_to_string(current_time) - cmd = "mplayer \"%s\" -ao null -brightness 0 -hue 0 " \ + cmd = "mplayer \"%s\" -ao null -brightness 0 -hue 0 " \ "-saturation 0 -contrast 0 -vf-clr -vo jpeg:outdir=\"%s\" -ss %s" \ " -frames 1 2>/dev/null" os.popen(cmd % (self.filename, directory, time)).readlines() - shutil.move(os.path.join(directory, "00000001.jpg"), - os.path.join(directory, "picture_%s.jpg" % time)) + try: + shutil.move(os.path.join(directory, "00000001.jpg"), + os.path.join(directory, "picture_%s.jpg" % time)) + except IOError, (errno, strerror): + LOG.error('error capturing file from movie "%s" at position ' + '%s. Errors: %s, %s', self.filename, time, errno, + strerror) def _make_montage(self, directory, image_fn, no_pictures): """ @@ -199,7 +243,7 @@ class Video(object): for irow in range(no_pictures * row_length): for icol in range(row_length): - left = 1 + icol*(dim[0] + 1) + left = 1 + icol * (dim[0] + 1) right = left + dim[0] upper = 1 + irow * (dim[1] + 1) lower = upper + dim[1] @@ -221,9 +265,17 @@ class Video(object): """ return str(chain).lower() + def _get_start_pos(self, chain): + """ + Return integer for starting point of the movie + """ + try: + return int(chain.split(".")[0]) + except: + return 0 + def __str__(self): str_out = '' for key in self.tags: str_out += "%20s: %s\n" % (key, self.tags[key]) return str_out - diff --git a/src/lib/thumbnail.py b/src/lib/thumbnail.py index ae608dc..31cc832 100644 --- a/src/lib/thumbnail.py +++ b/src/lib/thumbnail.py @@ -1,50 +1,31 @@ -# This Python file uses the following encoding: utf-8 -# -# Author: Roman 'gryf' Dobosz gryf@elysium.pl -# -# Copyright (C) 2007 by Roman 'gryf' Dobosz -# -# This file is part of pyGTKtalog. -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - -# ------------------------------------------------------------------------- - -from tempfile import mkstemp -from hashlib import sha512 -from shutil import move -from os import path -import sys +""" + Project: pyGTKtalog + Description: Thumbnail helper + Type: library + Author: Roman 'gryf' Dobosz, gryf73@gmail.com + Created: 2012-02-19 +""" +from hashlib import sha256 +from cStringIO import StringIO from lib import EXIF import Image + class Thumbnail(object): """Class for generate/extract thumbnail from image file""" - def __init__(self, filename=None, base=''): + def __init__(self, fp, base=''): self.thumb_x = 160 self.thumb_y = 160 - self.filename = filename self.base = base - self.sha512 = sha512(open(filename).read()).hexdigest() - self.thumbnail_path = path.join(self.base, self.sha512 + "_t") + self.sha256 = sha256(fp.read(10485760)).hexdigest() + fp.seek(0) + self.fp = fp def save(self): """Save thumbnail into specific directory structure - return filename base and exif object or None""" + return exif obj and fp to thumbnail""" exif = {} orientations = {2: Image.FLIP_LEFT_RIGHT, # Mirrored horizontal 3: Image.ROTATE_180, # Rotated 180 @@ -57,61 +38,46 @@ class Thumbnail(object): 8: Image.ROTATE_90} # Rotated 90 CCW flips = {7: Image.FLIP_LEFT_RIGHT, 5: Image.FLIP_LEFT_RIGHT} - image_file = open(self.filename, 'rb') try: - exif = EXIF.process_file(image_file) + exif = EXIF.process_file(self.fp) except: - if __debug__: - print "exception", sys.exc_info()[0], "raised with file:" - print self.filename - finally: - image_file.close() - - if path.exists(self.thumbnail_path): - if __debug__: - print "file", self.filename, "with hash", self.sha512, "exists" - return self.sha512, exif + self.fp.seek(0) + thumb_file = StringIO() if 'JPEGThumbnail' in exif: - if __debug__: - print self.filename, "exif thumb" - exif_thumbnail = exif['JPEGThumbnail'] - thumb_file = open(self.thumbnail_path, 'wb') - thumb_file.write(exif_thumbnail) - thumb_file.close() + thumb_file.write(exif['JPEGThumbnail']) if 'Image Orientation' in exif: orient = exif['Image Orientation'].values[0] if orient > 1 and orient in orientations: - fd, temp_image_path = mkstemp() - os.close(fd) + tmp_thumb_img = StringIO() - thumb_image = Image.open(self.thumbnail_path) + thumb_image = Image.open(self.fp) tmp_thumb_img = thumb_image.transpose(orientations[orient]) if orient in flips: tmp_thumb_img = tmp_thumb_img.transpose(flips[orient]) if tmp_thumb_img: - tmp_thumb_img.save(temp_image_path, 'JPEG') - move(temp_image_path, self.thumbnail_path) - return self.sha512, exif + thumb_file.seek(0) + tmp_thumb_img.save(thumb_file, 'JPEG') + tmp_thumb_img.close() + else: - if __debug__: - print self.filename, "no exif thumb" thumb = self.__scale_image() if thumb: thumb.save(self.thumbnail_path, "JPEG") - return self.sha512, exif - return None, exif + + return exif, thumb_file def __scale_image(self): """create thumbnail. returns image object or None""" try: - image_thumb = Image.open(self.filename).convert('RGB') + image_thumb = Image.open(self.fp).convert('RGB') except: return None it_x, it_y = image_thumb.size if it_x > self.thumb_x or it_y > self.thumb_y: - image_thumb.thumbnail((self.thumb_x, self.thumb_y), Image.ANTIALIAS) + image_thumb.thumbnail((self.thumb_x, self.thumb_y), + Image.ANTIALIAS) return image_thumb diff --git a/test/unit/scan_test.py b/test/unit/scan_test.py index 3de86c9..c3d0a0e 100644 --- a/test/unit/scan_test.py +++ b/test/unit/scan_test.py @@ -13,10 +13,12 @@ from pygtktalog.dbobjects import File from pygtktalog.dbcommon import connect, Session +TEST_DIR = "/home/share/_test_/test_dir" +TEST_DIR_PERMS = "/home/share/_test_/test_dir_permissions/" class TestScan(unittest.TestCase): """ - Testcases for scan functionality + Test cases for scan functionality 1. execution scan function: 1.1 simple case - should pass @@ -53,7 +55,7 @@ class TestScan(unittest.TestCase): """ scanob = scan.Scan(os.path.abspath(os.path.join(__file__, "../../../mocks"))) - scanob = scan.Scan("/mnt/data/_test_/test_dir") + scanob = scan.Scan(TEST_DIR) result_list = scanob.add_files() self.assertEqual(len(result_list), 143) self.assertEqual(len(result_list[0].children), 8) @@ -76,28 +78,28 @@ class TestScan(unittest.TestCase): # dir contains some non accessable items. Should just pass, and on # logs should be messages about it - scanobj.path = "/mnt/data/_test_/test_dir_permissions/" + scanobj.path = TEST_DIR_PERMS scanobj.add_files() def test_abort_functionality(self): - scanobj = scan.Scan("/mnt/data/_test_/test_dir") + scanobj = scan.Scan(TEST_DIR) scanobj.abort = True self.assertEqual(None, scanobj.add_files()) - def test_rescan(self): + def test_double_scan(self): """ Do the scan twice. """ ses = Session() self.assertEqual(len(ses.query(File).all()), 1) - scanob = scan.Scan("/mnt/data/_test_/test_dir") + scanob = scan.Scan(TEST_DIR) scanob.add_files() # note: we have 144 elements in db, because of root element self.assertEqual(len(ses.query(File).all()), 144) - scanob2 = scan.Scan("/mnt/data/_test_/test_dir") + scanob2 = scan.Scan(TEST_DIR) scanob2.add_files() # it is perfectly ok, since we don't update collection, but just added # same directory twice. @@ -106,14 +108,14 @@ class TestScan(unittest.TestCase): file2_ob = scanob2._files[2] # File objects are different - self.assertTrue(file_ob.id != file2_ob.id) + self.assertTrue(file_ob is not file2_ob) # While Image objects points to the same file self.assertTrue(file_ob.images[0].filename == \ file2_ob.images[0].filename) # they are different objects - self.assertTrue(file_ob.images[0].id != file2_ob.images[0].id) + self.assertTrue(file_ob.images[0] is not file2_ob.images[0]) ses.close()