From 25740ea1dc5de04e17f8f42ddb93377db62723ef Mon Sep 17 00:00:00 2001 From: gryf Date: Thu, 17 Nov 2016 18:46:46 +0100 Subject: [PATCH] Added new command 'fsck', which will: - search for Image objects which have not File relation - same goes for Thumbnail objects - search for files which doesn't exist neither as Thumbnail nor Image objects - remove those files from filesystem - remove empty directories The last one will need improvements, since it's looking for leafs directory in filesystem tree. Although workaround is to repeat fsck command, until it report 0 empty directories removed. --- README.rst | 1 + scripts/cmdcatalog.py | 297 +++++++++++++++++++++++++++++------------- 2 files changed, 208 insertions(+), 90 deletions(-) diff --git a/README.rst b/README.rst index d5a324f..cea5552 100644 --- a/README.rst +++ b/README.rst @@ -34,6 +34,7 @@ New version of pyGTKtalog was meant to use multiple interfaces. * update it * list * find files + * fsck (for maintenance for orphaned thumbs/images) #. ``gtktalog.py``. This is written from scratch frontend in pygtk. Still work in progress. diff --git a/scripts/cmdcatalog.py b/scripts/cmdcatalog.py index f24f228..5a33d70 100755 --- a/scripts/cmdcatalog.py +++ b/scripts/cmdcatalog.py @@ -14,27 +14,39 @@ from pygtktalog import scan from pygtktalog import misc from pygtktalog import dbobjects as dbo from pygtktalog.dbcommon import connect, Session +from pygtktalog import logger BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE = range(30, 38) -RESET_SEQ = "\033[0m" -COLOR_SEQ = "\033[1;%dm" -BOLD_SEQ = "\033[1m" +RESET_SEQ = '\033[0m' +COLOR_SEQ = '\033[1;%dm' +BOLD_SEQ = '\033[1m' +LOG = logger.get_logger(__name__) def colorize(txt, color): """Pretty print with colors to console.""" - color_map = {"black": BLACK, - "red": RED, - "green": GREEN, - "yellow": YELLOW, - "blue": BLUE, - "magenta": MAGENTA, - "cyan": CYAN, - "white": WHITE} + color_map = {'black': BLACK, + 'red': RED, + 'green': GREEN, + 'yellow': YELLOW, + 'blue': BLUE, + 'magenta': MAGENTA, + 'cyan': CYAN, + 'white': WHITE} return COLOR_SEQ % color_map[color] + txt + RESET_SEQ -TYPE_MAP = {0: "d", 1: "d", 2: "f", 3: "l"} + +def asserdb(func): + def wrapper(args): + if not os.path.exists(args.db): + print colorize("File `%s' does not exists!" % args.db, 'red') + sys.exit(1) + func(args) + return wrapper + + +TYPE_MAP = {0: 'd', 1: 'd', 2: 'f', 3: 'l'} class Iface(object): @@ -47,15 +59,16 @@ class Iface(object): self.root = None self._dbname = dbname if debug: - scan.LOG.setLevel("DEBUG") + scan.LOG.setLevel('DEBUG') + LOG.setLevel('DEBUG') def _resolve_path(self, path): """Identify path in the DB""" if not path.startswith("/"): - raise AttributeError("Path have to start with slash (/)") + raise AttributeError('Path have to start with slash (/)') last_node = self.root - for part in path.split("/"): + for part in path.split('/'): if not part.strip(): continue @@ -64,7 +77,7 @@ class Iface(object): last_node = node break else: - raise AttributeError("No such path: %s" % path) + raise AttributeError('No such path: %s' % path) return last_node @@ -77,17 +90,17 @@ class Iface(object): path.insert(0, parent.filename) parent = parent.parent - return u"/" + u"/".join(path) + return u'/' + u'/'.join(path) def _make_path(self, node): """Make the path to the item in the DB""" orig_node = node if node.parent == node: - return {u"/": (u' ', 0, u' ')} + return {u'/': (u' ', 0, u' ')} - ext = "" + ext = '' if node.parent.type == dbo.TYPE['root']: - ext = colorize(" (%s)" % node.filepath, "white") + ext = colorize(' (%s)' % node.filepath, 'white') path = [] path.append(node.filename) @@ -95,7 +108,7 @@ class Iface(object): path.append(node.parent.filename) node = node.parent - path = "/".join([""] + path[::-1]) + ext + path = '/'.join([''] + path[::-1]) + ext return {path: (TYPE_MAP[orig_node.type], orig_node.size, @@ -138,7 +151,7 @@ class Iface(object): node = self.root msg = "Content of path `/':" - print colorize(msg, "white") + print colorize(msg, 'white') if recursive: items = self._walk(node) @@ -155,7 +168,7 @@ class Iface(object): else: filenames = sorted(items.keys()) - print "\n".join(filenames) + print '\n'.join(filenames) def update(self, path, dir_to_update=None): """ @@ -166,8 +179,8 @@ class Iface(object): self.root = self.root.filter(dbo.File.type == dbo.TYPE['root']).first() node = self._resolve_path(path) if node == self.root: - print colorize("Cannot update entire db, since root was provided " - "as path.", "red") + print colorize('Cannot update entire db, since root was provided ' + 'as path.', 'red') return if not dir_to_update: @@ -177,7 +190,7 @@ class Iface(object): raise OSError("Path to updtate doesn't exists: %s", dir_to_update) print colorize("Updating node `%s' against directory " - "`%s'" % (path, dir_to_update), "white") + "`%s'" % (path, dir_to_update), 'white') if not self.dry_run: scanob = scan.Scan(dir_to_update) # scanob.update_files(node.id) @@ -194,7 +207,7 @@ class Iface(object): self.root.parent_id = 1 config = dbo.Config() - config.key = "image_path" + config.key = 'image_path' config.value = data_dir if not self.dry_run: @@ -203,9 +216,9 @@ class Iface(object): self.sess.commit() print colorize("Creating new db against directory `%s'" % dir_to_add, - "white") + 'white') if not self.dry_run: - if data_dir == ":same_as_db:": + if data_dir == ':same_as_db:': misc.calculate_image_path(None, True) else: misc.calculate_image_path(data_dir, True) @@ -221,7 +234,7 @@ class Iface(object): if not os.path.exists(dir_to_add): raise OSError("Path to add doesn't exists: %s", dir_to_add) - print colorize("Adding directory `%s'" % dir_to_add, "white") + print colorize("Adding directory `%s'" % dir_to_add, 'white') if not self.dry_run: scanob = scan.Scan(dir_to_add) scanob.add_files() @@ -260,140 +273,244 @@ class Iface(object): result = [] for word in search_words: - phrase = u"%%%s%%" % word.decode('utf-8') + phrase = u'%%%s%%' % word.decode('utf-8') query = query.filter(dbo.File.filename.like(phrase)) for item in query.all(): result.append(self._get_full_path(item)) if not result: - print "No results for `%s'" % " ".join(search_words) + print "No results for `%s'" % ' '.join(search_words) return result.sort() for item in result: print self._annotate(item, search_words) + def fsck(self): + """Fsck orphaned images/thumbs""" + image_path = self.sess.query(dbo.Config).\ + filter(dbo.Config.key=='image_path').one().value + + if image_path == ':same_as_db:': + image_path = misc.calculate_image_path(None, False) + + files_to_remove = [] + obj_to_remove = [] + + # remove images/thumbnails which doesn't have file relation + for name, obj in (("images", dbo.Image), + ("thumbnails", dbo.Thumbnail)): + self._purge_orphaned_objects(obj, "Scanning %s " % name) + + # find all image files not associate with either Image (image/thumb) + # or Thumbnail (thumb) objects + sys.stdout.write(40 * " " + "\r") + count = 0 + for root, dirs, files in os.walk(image_path): + for fname in files: + sys.stdout.write("Scanning files " + + "| / - \\".split()[count % 4] + "\r") + sys.stdout.flush() + count += 1 + + fname_ = os.path.join(root.split(image_path)[1], + fname).lstrip('/') + + if '_t' in fname: + obj = self.sess.query(dbo.Thumbnail)\ + .filter(dbo.Thumbnail.filename==fname_).all() + if obj: + continue + + obj = self.sess.query(dbo.Image)\ + .filter(dbo.Image.filename==\ + fname_.replace('_t.', '.')).all() + if obj: + continue + + else: + obj = self.sess.query(dbo.Image)\ + .filter(dbo.Image.filename==fname_).all() + if obj: + continue + + files_to_remove.append(os.path.join(root, fname)) + + LOG.debug("Found %d orphaned files", len(files_to_remove)) + sys.stdout.write(40 * " " + "\r") + sys.stdout.flush() + + if self.dry_run: + print "Following files are not associated to any items in the DB:" + for filename in sorted(files_to_remove): + print filename + self.sess.rollback() + else: + _remove_files(image_path, files_to_remove) + self.sess.commit() + + def _purge_orphaned_objects(self, sa_class, msg): + """Return tuple of lists of images that are orphaned""" + + ids_to_remove = [] + + for count, item in enumerate(self.sess.query(sa_class).all()): + sys.stdout.write(msg + "| / - \\".split()[count % 4] + "\r") + if not item.file: + self.sess.delete(item) + ids_to_remove.append(item.id) + del item + sys.stdout.flush() + + LOG.debug("Found %d orphaned object of class %s", + len(ids_to_remove), sa_class.__name__) + self.sess.flush() + + +def _remove_files(image_path, filenames): + """Remove files and empty directories in provided location""" + + count = 0 + for count, fname in enumerate(filenames, start=1): + os.unlink(fname) + + LOG.info("Removed %d orphaned files", count) + + count = 0 + for root, dirs, _ in os.walk(image_path): + for dirname in dirs: + try: + os.rmdir(os.path.join(root, dirname)) + count += 1 + except OSError: + pass + LOG.info("Removed %d empty directories", count) + def _get_highest_size_length(item_dict): highest = len(str(sorted([i[1] for i in item_dict.values()])[-1])) return highest + highest / 3 +@asserdb def list_db(args): """List""" - if not os.path.exists(args.db): - print colorize("File `%s' does not exists!" % args.db, "red") - sys.exit(1) - obj = Iface(args.db, False, args.debug) obj.list(path=args.path, recursive=args.recursive, long_=args.long) obj.close() +@asserdb def update_db(args): """Update""" - if not os.path.exists(args.db): - print colorize("File `%s' does not exists!" % args.db, "red") - sys.exit(1) - obj = Iface(args.db, args.pretend, args.debug) obj.update(args.path, dir_to_update=args.dir_to_update) obj.close() +@asserdb def add_dir(args): """Add""" - if not os.path.exists(args.db): - print colorize("File `%s' does not exists!" % args.db, "red") - sys.exit(1) - obj = Iface(args.db, args.pretend, args.debug) obj.add(args.dir_to_add) obj.close() +@asserdb def create_db(args): """List""" - if os.path.exists(args.db): - print colorize("File `%s' exists!" % args.db, "yellow") - obj = Iface(args.db, args.pretend, args.debug) obj.create(args.dir_to_add, args.imagedir) obj.close() +@asserdb def search(args): - if not os.path.exists(args.db): - print colorize("File `%s' does not exists!" % args.db, "red") - sys.exit(1) - + """Find""" obj = Iface(args.db, False, args.debug) obj.find(args.search_words) obj.close() +@asserdb +def cleanup(args): + """Cleanup""" + obj = Iface(args.db, False, args.debug) + obj.fsck() + obj.close() + + def main(): """Main""" parser = argparse.ArgumentParser() subparser = parser.add_subparsers() - list_ = subparser.add_parser("list") - list_.add_argument("db") - list_.add_argument("path", nargs="?") - list_.add_argument("-l", "--long", help="Show size, date and type", - action="store_true", default=False) - list_.add_argument("-r", "--recursive", help="list items in " - "subdirectories", action="store_true", default=False) - list_.add_argument("-d", "--debug", help="Turn on debug", - action="store_true", default=False) + list_ = subparser.add_parser('list') + list_.add_argument('db') + list_.add_argument('path', nargs='?') + list_.add_argument('-l', '--long', help='Show size, date and type', + action='store_true', default=False) + list_.add_argument('-r', '--recursive', help='list items in ' + 'subdirectories', action='store_true', default=False) + list_.add_argument('-d', '--debug', help='Turn on debug', + action='store_true', default=False) list_.set_defaults(func=list_db) - update = subparser.add_parser("update") - update.add_argument("db") - update.add_argument("path") - update.add_argument("dir_to_update", nargs="?") - update.add_argument("-p", "--pretend", help="Don't do the action, just " + update = subparser.add_parser('update') + update.add_argument('db') + update.add_argument('path') + update.add_argument('dir_to_update', nargs='?') + update.add_argument('-p', '--pretend', help="Don't do the action, just " "give the info what would gonna to happen.", - action="store_true", default=False) - update.add_argument("-d", "--debug", help="Turn on debug", - action="store_true", default=False) + action='store_true', default=False) + update.add_argument('-d', '--debug', help='Turn on debug', + action='store_true', default=False) update.set_defaults(func=update_db) - create = subparser.add_parser("create") - create.add_argument("db") - create.add_argument("dir_to_add") - create.add_argument("-i", "--imagedir", help="Directory where to put " + create = subparser.add_parser('create') + create.add_argument('db') + create.add_argument('dir_to_add') + create.add_argument('-i', '--imagedir', help="Directory where to put " "images for the database. Popular, but deprecated " "choice is `~/.pygtktalog/images'. Currnet default " "is special string `:same_as_db:' which will try to " "create directory with the same name as the db with " - "data suffix", default=":same_as_db:") - create.add_argument("-p", "--pretend", help="Don't do the action, just " + "data suffix", default=':same_as_db:') + create.add_argument('-p', '--pretend', help="Don't do the action, just " "give the info what would gonna to happen.", - action="store_true", default=False) - create.add_argument("-d", "--debug", help="Turn on debug", - action="store_true", default=False) + action='store_true', default=False) + create.add_argument('-d', '--debug', help='Turn on debug', + action='store_true', default=False) create.set_defaults(func=create_db) - add = subparser.add_parser("add") - add.add_argument("db") - add.add_argument("dir_to_add") - add.add_argument("-p", "--pretend", help="Don't do the action, just " + add = subparser.add_parser('add') + add.add_argument('db') + add.add_argument('dir_to_add') + add.add_argument('-p', '--pretend', help="Don't do the action, just " "give the info what would gonna to happen.", - action="store_true", default=False) - add.add_argument("-d", "--debug", help="Turn on debug", - action="store_true", default=False) + action='store_true', default=False) + add.add_argument('-d', '--debug', help='Turn on debug', + action='store_true', default=False) add.set_defaults(func=add_dir) - find = subparser.add_parser("find") - find.add_argument("db") - find.add_argument("search_words", nargs="+") - find.add_argument("-d", "--debug", help="Turn on debug", - action="store_true", default=False) + find = subparser.add_parser('find') + find.add_argument('db') + find.add_argument('search_words', nargs='+') + find.add_argument('-d', '--debug', help='Turn on debug', + action='store_true', default=False) find.set_defaults(func=search) + fsck = subparser.add_parser('fsck') + fsck.add_argument('db') + fsck.add_argument('-p', '--pretend', help="Don't do the action, just give" + " the info what would gonna to happen.", + action='store_true', default=False) + fsck.add_argument('-d', '--debug', help='Turn on debug', + action='store_true', default=False) + fsck.set_defaults(func=cleanup) + args = parser.parse_args() args.func(args) -if __name__ == "__main__": +if __name__ == '__main__': main()