From 5d0114d0e979d69932f58b631650676727fcdd5d Mon Sep 17 00:00:00 2001 From: deadc0de6 Date: Fri, 27 Mar 2020 10:05:26 +0100 Subject: [PATCH 01/19] hash diff for #10 --- README.md | 5 +++-- catcli/noder.py | 31 +++++++++++++++++++++------- catcli/utils.py | 2 +- catcli/walker.py | 21 +++++++++---------- tests/helpers.py | 20 ++++++++++++++++++ tests/test_update.py | 48 ++++++++++++++++++++++++++++++++++++++++++-- 6 files changed, 104 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index ea6c0a4..7e4e452 100644 --- a/README.md +++ b/README.md @@ -200,8 +200,9 @@ Storage entry can be edited with following catcli commands: ## Update catalog The catalog can be updated with the `update` command. -Updates are based on the access time of each of the files. If using -`-c --hash`, only new files are re-hashed. +Updates are based on the access time of each of the files and on the +hash checksum if present (catalog was indexed with `-c --hash` and +`update` is called with the switch `-c --hash`). # Examples diff --git a/catcli/noder.py b/catcli/noder.py index ba92479..bd99449 100644 --- a/catcli/noder.py +++ b/catcli/noder.py @@ -67,20 +67,29 @@ class Noder: Logger.err('No node at path \"{}\"'.format(path)) return None - def get_node_if_newer(self, top, path, maccess): - '''return the node (if any) and if path is newer''' + def get_node_if_changed(self, top, path): + '''return the node (if any) and if it has changed''' treepath = path.lstrip(os.sep) node = self.get_node(top, treepath, quiet=True) + # node does not exist if not node: - # node does not exist return None, True + # force re-indexing if no maccess + maccess = os.path.getmtime(path) if not self._has_attr(node, 'maccess') or \ not node.maccess: - # force re-indexing if no maccess return node, True + # maccess changed old_maccess = node.maccess if float(maccess) > float(old_maccess): + self._debug('macess changed for \"{}\"'.format(path)) return node, True + # test hash + if self.hash and node.md5: + md5 = self._get_hash(path) + if md5 != node.md5: + self._debug('checksum changed for \"{}\"'.format(path)) + return node, True return node, False def get_meta_node(self, top): @@ -96,8 +105,7 @@ class Noder: recursively traverse tree and return size @store: store the size in the node ''' - if self.verbose: - Logger.info('getting node size recursively') + self._debug('getting node size recursively') if node.type == self.TYPE_FILE: return node.size size = 0 @@ -168,7 +176,7 @@ class Noder: return None md5 = None if self.hash: - md5 = utils.md5sum(path) + md5 = self._get_hash(path) relpath = os.sep.join([storagepath, name]) maccess = os.path.getmtime(path) @@ -461,3 +469,12 @@ class Noder: if parent: return os.sep.join([parent, node.name]) return node.name + + def _get_hash(self, path): + """return md5 hash of node""" + return utils.md5sum(path) + + def _debug(self, string): + if not self.verbose: + return + Logger.info('getting node size recursively') diff --git a/catcli/utils.py b/catcli/utils.py index 5267737..3fe7c0d 100644 --- a/catcli/utils.py +++ b/catcli/utils.py @@ -19,7 +19,7 @@ def md5sum(path): '''calculate md5 sum of a file''' p = os.path.realpath(path) if not os.path.exists(p): - Logger.err('\nunable to get md5sum on {}'.format(path)) + Logger.err('\nmd5sum - file does not exist: {}'.format(p)) return None try: with open(p, mode='rb') as f: diff --git a/catcli/walker.py b/catcli/walker.py index 80e99b3..f1e8678 100644 --- a/catcli/walker.py +++ b/catcli/walker.py @@ -17,7 +17,8 @@ class Walker: def __init__(self, noder, nohash=False, debug=False): self.noder = noder - self.noder.set_hashing(not nohash) + self.nohash = nohash + self.noder.set_hashing(not self.nohash) self.debug = debug def index(self, path, parent, name, storagepath=''): @@ -66,8 +67,7 @@ class Walker: for f in files: self._debug('found file {} under {}'.format(f, path)) sub = os.path.join(root, f) - maccess = os.path.getmtime(sub) - reindex, n = self._need_reindex(parent, f, maccess) + reindex, n = self._need_reindex(parent, sub) if not reindex: self._debug('\tignore file {}'.format(sub)) self.noder.flag(n) @@ -82,8 +82,7 @@ class Walker: self._debug('found dir {} under {}'.format(d, path)) base = os.path.basename(d) sub = os.path.join(root, d) - maccess = os.path.getmtime(sub) - reindex, dummy = self._need_reindex(parent, base, maccess) + reindex, dummy = self._need_reindex(parent, sub) if reindex: self._debug('\tre-index directory {}'.format(sub)) dummy = self.noder.dir_node(base, sub, parent, storagepath) @@ -99,19 +98,19 @@ class Walker: self._log(None) return cnt - def _need_reindex(self, top, path, maccess): + def _need_reindex(self, top, path): '''test if node needs re-indexing''' - cnode, newer = self.noder.get_node_if_newer(top, path, maccess) + cnode, changed = self.noder.get_node_if_changed(top, path) if not cnode: self._debug('\tdoes not exist') return True, cnode - if cnode and not newer: + if cnode and not changed: # ignore this node - self._debug('\tis not newer') + self._debug('\thas not changed') return False, cnode - if cnode and newer: + if cnode and changed: # remove this node and re-add - self._debug('\tis newer') + self._debug('\thas changed') self._debug('\tremoving node {}'.format(cnode)) cnode.parent = None self._debug('\tis to be re-indexed') diff --git a/tests/helpers.py b/tests/helpers.py index ae5624b..7c7a0c7 100644 --- a/tests/helpers.py +++ b/tests/helpers.py @@ -11,6 +11,7 @@ import random import tempfile import shutil import subprocess +import hashlib TMPSUFFIX = '.catcli' @@ -25,6 +26,25 @@ def get_rnd_string(length): return ''.join(random.choice(alpha) for _ in range(length)) +def md5sum(path): + '''calculate md5 sum of a file''' + p = os.path.realpath(path) + if not os.path.exists(p): + return None + try: + with open(p, mode='rb') as f: + d = hashlib.md5() + while True: + buf = f.read(4096) + if not buf: + break + d.update(buf) + return d.hexdigest() + except PermissionError: + pass + return None + + def clean(path): '''Delete file or folder.''' if not os.path.exists(path): diff --git a/tests/test_update.py b/tests/test_update.py index 39eb02c..2f54b27 100644 --- a/tests/test_update.py +++ b/tests/test_update.py @@ -12,7 +12,7 @@ from catcli.catcli import cmd_index, cmd_update from catcli.noder import Noder from catcli.catalog import Catalog from tests.helpers import create_dir, create_rnd_file, get_tempdir, \ - clean, unix_tree, edit_file, read_from_file + clean, unix_tree, edit_file, read_from_file, md5sum import anytree @@ -31,6 +31,7 @@ class TestIndexing(unittest.TestCase): f1 = create_rnd_file(dirpath, 'file1') f2 = create_rnd_file(dirpath, 'file2') f3 = create_rnd_file(dirpath, 'file3') + f4 = create_rnd_file(dirpath, 'file4') # create 2 directories d1 = create_dir(dirpath, 'dir1') @@ -45,6 +46,12 @@ class TestIndexing(unittest.TestCase): top = noder.new_top_node() catalog = Catalog(catalogpath, force=True, verbose=False) + # get checksums + f4_md5 = md5sum(f4) + self.assertTrue(f4_md5) + d1f1_md5 = md5sum(d1f1) + self.assertTrue(d1f1_md5) + # create fake args tmpdirname = 'tmpdir' args = {'': dirpath, '': tmpdirname, @@ -56,6 +63,13 @@ class TestIndexing(unittest.TestCase): cmd_index(args, noder, catalog, top, debug=True) self.assertTrue(os.stat(catalogpath).st_size != 0) + # ensure md5 sum are in + nods = noder.find_name(top, os.path.basename(f4)) + self.assertTrue(len(nods) == 1) + nod = nods[0] + self.assertTrue(nod) + self.assertTrue(nod.md5 == f4_md5) + # print catalog noder.print_tree(top) @@ -70,6 +84,19 @@ class TestIndexing(unittest.TestCase): # modify files EDIT = 'edited' edit_file(d1f1, EDIT) + d1f1_md5_new = md5sum(d1f1) + self.assertTrue(d1f1_md5_new) + self.assertTrue(d1f1_md5_new != d1f1_md5) + + # change file without mtime + maccess = os.path.getmtime(f4) + EDIT = 'edited' + edit_file(f4, EDIT) + # reset edit time + os.utime(f4, (maccess, maccess)) + f4_md5_new = md5sum(d1f1) + self.assertTrue(f4_md5_new) + self.assertTrue(f4_md5_new != f4_md5) # update storage cmd_update(args, noder, catalog, top, debug=True) @@ -81,7 +108,23 @@ class TestIndexing(unittest.TestCase): # explore the top node to find all nodes self.assertTrue(len(top.children) == 1) storage = top.children[0] - self.assertTrue(len(storage.children) == 7) + self.assertTrue(len(storage.children) == 8) + + # ensure d1f1 md5 sum has changed in catalog + nods = noder.find_name(top, os.path.basename(d1f1)) + self.assertTrue(len(nods) == 1) + nod = nods[0] + self.assertTrue(nod) + self.assertTrue(nod.md5 != d1f1_md5) + self.assertTrue(nod.md5 == d1f1_md5_new) + + # ensure f4 md5 sum has changed in catalog + nods = noder.find_name(top, os.path.basename(f4)) + self.assertTrue(len(nods) == 1) + nod = nods[0] + self.assertTrue(nod) + self.assertTrue(nod.md5 != f4_md5) + self.assertTrue(nod.md5 == f4_md5_new) # ensures files and directories are in names = [node.name for node in anytree.PreOrderIter(storage)] @@ -89,6 +132,7 @@ class TestIndexing(unittest.TestCase): self.assertTrue(os.path.basename(f1) in names) self.assertTrue(os.path.basename(f2) in names) self.assertTrue(os.path.basename(f3) in names) + self.assertTrue(os.path.basename(f4) in names) self.assertTrue(os.path.basename(d1) in names) self.assertTrue(os.path.basename(d1f1) in names) self.assertTrue(os.path.basename(d1f2) in names) From aec895038025fa97d7406856a514947f94979807 Mon Sep 17 00:00:00 2001 From: deadc0de6 Date: Fri, 27 Mar 2020 14:19:59 +0100 Subject: [PATCH 02/19] fix reindexing for #10 --- catcli/noder.py | 15 +++++++++++---- catcli/walker.py | 42 +++++++++++++++++++++++++++--------------- 2 files changed, 38 insertions(+), 19 deletions(-) diff --git a/catcli/noder.py b/catcli/noder.py index bd99449..8bc644e 100644 --- a/catcli/noder.py +++ b/catcli/noder.py @@ -67,17 +67,24 @@ class Noder: Logger.err('No node at path \"{}\"'.format(path)) return None - def get_node_if_changed(self, top, path): - '''return the node (if any) and if it has changed''' - treepath = path.lstrip(os.sep) + def get_node_if_changed(self, top, path, treepath): + ''' + return the node (if any) and if it has changed + @top: top node (storage) + @path: abs path to file + @treepath: rel path from indexed directory + ''' + treepath = treepath.lstrip(os.sep) node = self.get_node(top, treepath, quiet=True) # node does not exist if not node: + self._debug('node does not exist') return None, True # force re-indexing if no maccess maccess = os.path.getmtime(path) if not self._has_attr(node, 'maccess') or \ not node.maccess: + self._debug('changed as no maccess found') return node, True # maccess changed old_maccess = node.maccess @@ -477,4 +484,4 @@ class Noder: def _debug(self, string): if not self.verbose: return - Logger.info('getting node size recursively') + Logger.info(string) diff --git a/catcli/walker.py b/catcli/walker.py index f1e8678..f2220d2 100644 --- a/catcli/walker.py +++ b/catcli/walker.py @@ -55,19 +55,25 @@ class Walker: def reindex(self, path, parent, top): '''reindex a directory and store in tree''' - cnt = self._reindex(path, parent, top, '') + cnt = self._reindex(path, parent, top) cnt += self.noder.clean_not_flagged(parent) return cnt - def _reindex(self, path, parent, top, storagepath): - '''reindex a directory and store in tree''' + def _reindex(self, path, parent, top, storagepath=''): + ''' + reindex a directory and store in tree + @path: directory path to re-index + @top: top node (storage) + @storagepath: rel path relative to indexed directory + ''' self._debug('reindexing starting at {}'.format(path)) cnt = 0 for (root, dirs, files) in os.walk(path): for f in files: - self._debug('found file {} under {}'.format(f, path)) + self._debug('found file \"{}\" under {}'.format(f, path)) sub = os.path.join(root, f) - reindex, n = self._need_reindex(parent, sub) + treepath = os.path.join(storagepath, f) + reindex, n = self._need_reindex(parent, sub, treepath) if not reindex: self._debug('\tignore file {}'.format(sub)) self.noder.flag(n) @@ -79,10 +85,11 @@ class Walker: self.noder.flag(n) cnt += 1 for d in dirs: - self._debug('found dir {} under {}'.format(d, path)) + self._debug('found dir \"{}\" under {}'.format(d, path)) base = os.path.basename(d) sub = os.path.join(root, d) - reindex, dummy = self._need_reindex(parent, sub) + treepath = os.path.join(storagepath, d) + reindex, dummy = self._need_reindex(parent, sub, treepath) if reindex: self._debug('\tre-index directory {}'.format(sub)) dummy = self.noder.dir_node(base, sub, parent, storagepath) @@ -98,22 +105,27 @@ class Walker: self._log(None) return cnt - def _need_reindex(self, top, path): - '''test if node needs re-indexing''' - cnode, changed = self.noder.get_node_if_changed(top, path) + def _need_reindex(self, top, path, treepath): + ''' + test if node needs re-indexing + @top: top node (storage) + @path: abs path to file + @treepath: rel path from indexed directory + ''' + cnode, changed = self.noder.get_node_if_changed(top, path, treepath) if not cnode: - self._debug('\tdoes not exist') + self._debug('{} does not exist'.format(path)) return True, cnode if cnode and not changed: # ignore this node - self._debug('\thas not changed') + self._debug('{} has not changed'.format(path)) return False, cnode if cnode and changed: # remove this node and re-add - self._debug('\thas changed') - self._debug('\tremoving node {}'.format(cnode)) + self._debug('{} has changed'.format(path)) + self._debug('removing node {} for {}'.format(cnode, path)) cnode.parent = None - self._debug('\tis to be re-indexed') + self._debug('{} is to be re-indexed'.format(path)) return True, cnode def _debug(self, string): From 6d7f95a6ae26a290f38f6dc7367a5f1ae420e2da Mon Sep 17 00:00:00 2001 From: deadc0de6 Date: Fri, 27 Mar 2020 14:25:06 +0100 Subject: [PATCH 03/19] fix oserror in md5sum --- catcli/utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/catcli/utils.py b/catcli/utils.py index 3fe7c0d..798e784 100644 --- a/catcli/utils.py +++ b/catcli/utils.py @@ -32,6 +32,8 @@ def md5sum(path): return d.hexdigest() except PermissionError: pass + except OSError as e: + Logger.err('md5sum error: {}'.format(e)) return None From f0495e6d00f6d70a45c3e844acdadb1717354865 Mon Sep 17 00:00:00 2001 From: deadc0de6 Date: Fri, 27 Mar 2020 14:26:54 +0100 Subject: [PATCH 04/19] refactor logging --- catcli/catcli.py | 4 ++-- catcli/logger.py | 10 +++++----- catcli/noder.py | 2 +- catcli/walker.py | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/catcli/catcli.py b/catcli/catcli.py index af01244..edd3738 100755 --- a/catcli/catcli.py +++ b/catcli/catcli.py @@ -212,8 +212,8 @@ def cmd_edit(args, noder, catalog, top): def banner(): - Logger.log(BANNER) - Logger.log("") + Logger.out(BANNER) + Logger.out("") def main(): diff --git a/catcli/logger.py b/catcli/logger.py index 66a0134..c7fc611 100644 --- a/catcli/logger.py +++ b/catcli/logger.py @@ -71,17 +71,17 @@ class Logger: # generic output ###################################################################### def out(string): - '''to stdout''' + '''to stdout no color''' sys.stdout.write('{}\n'.format(string)) - def log(string): - '''to stderr''' + def debug(string): + '''to stderr no color''' sys.stderr.write('{}\n'.format(string)) def info(string): - '''to stderr in color''' + '''to stdout in color''' s = '{}{}{}'.format(Logger.MAGENTA, string, Logger.RESET) - sys.stderr.write('{}\n'.format(s)) + sys.stdout.write('{}\n'.format(s)) def err(string): '''to stderr in RED''' diff --git a/catcli/noder.py b/catcli/noder.py index 8bc644e..29e772f 100644 --- a/catcli/noder.py +++ b/catcli/noder.py @@ -484,4 +484,4 @@ class Noder: def _debug(self, string): if not self.verbose: return - Logger.info(string) + Logger.debug(string) diff --git a/catcli/walker.py b/catcli/walker.py index f2220d2..fd72723 100644 --- a/catcli/walker.py +++ b/catcli/walker.py @@ -131,7 +131,7 @@ class Walker: def _debug(self, string): if not self.debug: return - Logger.log(string) + Logger.debug(string) def _log(self, string): if self.debug: From 16bf5f99d47d8c1ff28fc8a42dfbd43a767819db Mon Sep 17 00:00:00 2001 From: deadc0de6 Date: Fri, 27 Mar 2020 14:33:06 +0100 Subject: [PATCH 05/19] refactor verbose in debug in code --- catcli/catalog.py | 33 ++++++++++++++++++++------------- catcli/catcli.py | 6 +++--- catcli/noder.py | 12 +++++------- tests/test_find.py | 2 +- tests/test_graph.py | 2 +- tests/test_index.py | 2 +- tests/test_ls.py | 2 +- tests/test_rm.py | 2 +- tests/test_tree.py | 2 +- tests/test_update.py | 2 +- 10 files changed, 35 insertions(+), 30 deletions(-) diff --git a/catcli/catalog.py b/catcli/catalog.py index 9eea7c5..5c7542b 100644 --- a/catcli/catalog.py +++ b/catcli/catalog.py @@ -17,10 +17,16 @@ from catcli.logger import Logger class Catalog: - def __init__(self, path, pickle=False, verbose=False, force=False): - self.path = path # catalog path - self.verbose = verbose # verbosity - self.force = force # force overwrite if exists + def __init__(self, path, pickle=False, debug=False, force=False): + ''' + @path: catalog path + @pickle: use pickle + @debug: debug mode + @force: force overwrite if exists + ''' + self.path = path + self.debug = debug + self.force = force self.metanode = None self.pickle = pickle @@ -60,19 +66,22 @@ class Catalog: return self._save_pickle(node) return self._save_json(node) + def _debug(self, text): + if not self.debug: + return + Logger.debug(text) + def _save_pickle(self, node): '''pickle the catalog''' pickle.dump(node, open(self.path, 'wb')) - if self.verbose: - Logger.info('Catalog saved to pickle \"{}\"'.format(self.path)) + self._debug('Catalog saved to pickle \"{}\"'.format(self.path)) return True def _restore_pickle(self): '''restore the pickled tree''' root = pickle.load(open(self.path, 'rb')) - if self.verbose: - m = 'Catalog imported from pickle \"{}\"'.format(self.path) - Logger.info(m) + m = 'Catalog imported from pickle \"{}\"'.format(self.path) + self._debug(m) return root def _save_json(self, node): @@ -80,14 +89,12 @@ class Catalog: exp = JsonExporter(indent=2, sort_keys=True) with open(self.path, 'w') as f: exp.write(node, f) - if self.verbose: - Logger.info('Catalog saved to json \"{}\"'.format(self.path)) + self._debug('Catalog saved to json \"{}\"'.format(self.path)) return True def _restore_json(self, string): '''restore the tree from json''' imp = JsonImporter() root = imp.import_(string) - if self.verbose: - Logger.info('Catalog imported from json \"{}\"'.format(self.path)) + self._debug('Catalog imported from json \"{}\"'.format(self.path)) return root diff --git a/catcli/catcli.py b/catcli/catcli.py index edd3738..98fa7ea 100755 --- a/catcli/catcli.py +++ b/catcli/catcli.py @@ -62,7 +62,7 @@ Options: -c --hash Calculate md5 hash [default: False]. -r --recursive Recursive [default: False]. -P --parent Ignore stored relpath [default: True]. - -V --verbose Be verbose [default: False]. + -V --erbose Be verbose [default: False]. -v --version Show version. -h --help Show this screen. """.format(BANNER, NAME, CATALOGPATH) @@ -230,10 +230,10 @@ def main(): banner() # init noder - noder = Noder(verbose=args['--verbose'], sortsize=args['--sortsize'], + noder = Noder(debug=args['--verbose'], sortsize=args['--sortsize'], arc=args['--archive']) # init catalog - catalog = Catalog(args['--catalog'], verbose=args['--verbose'], + catalog = Catalog(args['--catalog'], debug=args['--verbose'], force=args['--force']) # init top node top = catalog.restore() diff --git a/catcli/noder.py b/catcli/noder.py index 29e772f..fc49110 100644 --- a/catcli/noder.py +++ b/catcli/noder.py @@ -36,9 +36,9 @@ class Noder: TYPE_STORAGE = 'storage' TYPE_META = 'meta' - def __init__(self, verbose=False, sortsize=False, arc=False): + def __init__(self, debug=False, sortsize=False, arc=False): self.hash = True - self.verbose = verbose + self.debug = debug self.sortsize = sortsize self.arc = arc if self.arc: @@ -352,8 +352,7 @@ class Noder: script=False, directory=False, startpath=None, parentfromtree=False): '''find files based on their names''' - if self.verbose: - Logger.info('searching for \"{}\"'.format(key)) + self._debug('searching for \"{}\"'.format(key)) start = root if startpath: start = self.get_node(root, startpath) @@ -390,8 +389,7 @@ class Noder: ############################################################### def walk(self, root, path, rec=False): '''walk the tree for ls based on names''' - if self.verbose: - Logger.info('walking path: \"{}\"'.format(path)) + self._debug('walking path: \"{}\"'.format(path)) r = anytree.resolver.Resolver('name') found = [] try: @@ -482,6 +480,6 @@ class Noder: return utils.md5sum(path) def _debug(self, string): - if not self.verbose: + if not self.debug: return Logger.debug(string) diff --git a/tests/test_find.py b/tests/test_find.py index 6bbef99..cb5a584 100644 --- a/tests/test_find.py +++ b/tests/test_find.py @@ -17,7 +17,7 @@ class TestFind(unittest.TestCase): def test_find(self): # init - catalog = Catalog('fake', force=True, verbose=False) + catalog = Catalog('fake', force=True, debug=False) top = catalog._restore_json(get_fakecatalog()) noder = Noder() diff --git a/tests/test_graph.py b/tests/test_graph.py index 68b3750..4d68dbc 100644 --- a/tests/test_graph.py +++ b/tests/test_graph.py @@ -23,7 +23,7 @@ class TestGraph(unittest.TestCase): gpath = tempfile.gettempdir() + os.sep + 'graph.dot' self.addCleanup(clean, path) self.addCleanup(clean, gpath) - catalog = Catalog(path, force=True, verbose=False) + catalog = Catalog(path, force=True, debug=False) top = catalog._restore_json(get_fakecatalog()) noder = Noder() diff --git a/tests/test_index.py b/tests/test_index.py index c5396ff..966acc0 100644 --- a/tests/test_index.py +++ b/tests/test_index.py @@ -42,7 +42,7 @@ class TestIndexing(unittest.TestCase): noder = Noder() top = noder.new_top_node() - catalog = Catalog(catalogpath, force=True, verbose=False) + catalog = Catalog(catalogpath, force=True, debug=False) # create fake args tmpdirname = 'tmpdir' diff --git a/tests/test_ls.py b/tests/test_ls.py index 95d19eb..7b2061a 100644 --- a/tests/test_ls.py +++ b/tests/test_ls.py @@ -19,7 +19,7 @@ class TestWalking(unittest.TestCase): # init path = 'fake' self.addCleanup(clean, path) - catalog = Catalog(path, force=True, verbose=False) + catalog = Catalog(path, force=True, debug=False) top = catalog._restore_json(get_fakecatalog()) noder = Noder() diff --git a/tests/test_rm.py b/tests/test_rm.py index d81d5cd..81f96ef 100644 --- a/tests/test_rm.py +++ b/tests/test_rm.py @@ -19,7 +19,7 @@ class TestRm(unittest.TestCase): # init path = 'fake' self.addCleanup(clean, path) - catalog = Catalog(path, force=True, verbose=False) + catalog = Catalog(path, force=True, debug=False) top = catalog._restore_json(get_fakecatalog()) noder = Noder() diff --git a/tests/test_tree.py b/tests/test_tree.py index 238243f..66bb618 100644 --- a/tests/test_tree.py +++ b/tests/test_tree.py @@ -19,7 +19,7 @@ class TestTree(unittest.TestCase): # init path = 'fake' self.addCleanup(clean, path) - catalog = Catalog(path, force=True, verbose=False) + catalog = Catalog(path, force=True, debug=False) top = catalog._restore_json(get_fakecatalog()) noder = Noder() diff --git a/tests/test_update.py b/tests/test_update.py index 2f54b27..7e33c8e 100644 --- a/tests/test_update.py +++ b/tests/test_update.py @@ -44,7 +44,7 @@ class TestIndexing(unittest.TestCase): noder = Noder() top = noder.new_top_node() - catalog = Catalog(catalogpath, force=True, verbose=False) + catalog = Catalog(catalogpath, force=True, debug=False) # get checksums f4_md5 = md5sum(f4) From c4a13209611123547a4684d90c5bbc41483fd1ca Mon Sep 17 00:00:00 2001 From: deadc0de6 Date: Fri, 27 Mar 2020 14:37:23 +0100 Subject: [PATCH 06/19] fix typo --- catcli/catcli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/catcli/catcli.py b/catcli/catcli.py index 98fa7ea..c8fe56c 100755 --- a/catcli/catcli.py +++ b/catcli/catcli.py @@ -62,7 +62,7 @@ Options: -c --hash Calculate md5 hash [default: False]. -r --recursive Recursive [default: False]. -P --parent Ignore stored relpath [default: True]. - -V --erbose Be verbose [default: False]. + -V --verbose Be verbose [default: False]. -v --version Show version. -h --help Show this screen. """.format(BANNER, NAME, CATALOGPATH) From f23549786de05284bb814ca9a771142ee0d78a41 Mon Sep 17 00:00:00 2001 From: deadc0de6 Date: Fri, 27 Mar 2020 14:54:47 +0100 Subject: [PATCH 07/19] fix typo --- catcli/noder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/catcli/noder.py b/catcli/noder.py index fc49110..c550235 100644 --- a/catcli/noder.py +++ b/catcli/noder.py @@ -89,7 +89,7 @@ class Noder: # maccess changed old_maccess = node.maccess if float(maccess) > float(old_maccess): - self._debug('macess changed for \"{}\"'.format(path)) + self._debug('maccess changed for \"{}\"'.format(path)) return node, True # test hash if self.hash and node.md5: From a8e3c3f77d10b15c0cac73cc03c2c0e704e6bb00 Mon Sep 17 00:00:00 2001 From: deadc0de6 Date: Fri, 27 Mar 2020 14:57:28 +0100 Subject: [PATCH 08/19] fix maccess comparison --- catcli/noder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/catcli/noder.py b/catcli/noder.py index c550235..2a10139 100644 --- a/catcli/noder.py +++ b/catcli/noder.py @@ -88,7 +88,7 @@ class Noder: return node, True # maccess changed old_maccess = node.maccess - if float(maccess) > float(old_maccess): + if float(maccess) != float(old_maccess): self._debug('maccess changed for \"{}\"'.format(path)) return node, True # test hash From 74e74a6f9f2d714de11ae76475f1e7297709d5b2 Mon Sep 17 00:00:00 2001 From: deadc0de6 Date: Fri, 27 Mar 2020 15:39:25 +0100 Subject: [PATCH 09/19] refactoring --- catcli/noder.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/catcli/noder.py b/catcli/noder.py index 2a10139..a78b64f 100644 --- a/catcli/noder.py +++ b/catcli/noder.py @@ -78,25 +78,26 @@ class Noder: node = self.get_node(top, treepath, quiet=True) # node does not exist if not node: - self._debug('node does not exist') + self._debug('change: node does not exist') return None, True # force re-indexing if no maccess maccess = os.path.getmtime(path) if not self._has_attr(node, 'maccess') or \ not node.maccess: - self._debug('changed as no maccess found') + self._debug('change: no maccess found') return node, True # maccess changed old_maccess = node.maccess if float(maccess) != float(old_maccess): - self._debug('maccess changed for \"{}\"'.format(path)) + self._debug('change: maccess changed for \"{}\"'.format(path)) return node, True # test hash if self.hash and node.md5: md5 = self._get_hash(path) if md5 != node.md5: - self._debug('checksum changed for \"{}\"'.format(path)) + self._debug('change: checksum changed for \"{}\"'.format(path)) return node, True + self._debug('change: no change for \"{}\"'.format(path)) return node, False def get_meta_node(self, top): @@ -112,9 +113,11 @@ class Noder: recursively traverse tree and return size @store: store the size in the node ''' - self._debug('getting node size recursively') if node.type == self.TYPE_FILE: + self._debug('getting node size for \"{}\"'.format(node.name)) return node.size + m = 'getting node size recursively for \"{}\"'.format(node.name) + self._debug(m) size = 0 for i in node.children: if node.type == self.TYPE_DIR: From 7cb0e2050a3e8011020788bd0901dd9708ff5582 Mon Sep 17 00:00:00 2001 From: deadc0de6 Date: Fri, 27 Mar 2020 15:39:33 +0100 Subject: [PATCH 10/19] fix tests --- tests/helpers.py | 5 +---- tests/test_update.py | 4 +++- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/tests/helpers.py b/tests/helpers.py index 7c7a0c7..2d4c6d6 100644 --- a/tests/helpers.py +++ b/tests/helpers.py @@ -58,10 +58,7 @@ def clean(path): def edit_file(path, newcontent): - if not os.path.exists(path): - write_to_file(path, newcontent) - else: - write_to_file(path, newcontent) + return write_to_file(path, newcontent) def unix_tree(path): diff --git a/tests/test_update.py b/tests/test_update.py index 7e33c8e..ea05b33 100644 --- a/tests/test_update.py +++ b/tests/test_update.py @@ -42,7 +42,8 @@ class TestIndexing(unittest.TestCase): d1f2 = create_rnd_file(d1, 'dir1file2') d2f1 = create_rnd_file(d2, 'dir2file1') - noder = Noder() + noder = Noder(debug=True) + noder.set_hashing(True) top = noder.new_top_node() catalog = Catalog(catalogpath, force=True, debug=False) @@ -92,6 +93,7 @@ class TestIndexing(unittest.TestCase): maccess = os.path.getmtime(f4) EDIT = 'edited' edit_file(f4, EDIT) + # reset edit time os.utime(f4, (maccess, maccess)) f4_md5_new = md5sum(d1f1) From 99d29c272f605640303a7ad91b6d3940f7705c64 Mon Sep 17 00:00:00 2001 From: deadc0de6 Date: Sat, 28 Mar 2020 09:19:14 +0100 Subject: [PATCH 11/19] fix re-indexing and refactor debug output --- catcli/logger.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/catcli/logger.py b/catcli/logger.py index c7fc611..a056d01 100644 --- a/catcli/logger.py +++ b/catcli/logger.py @@ -76,7 +76,7 @@ class Logger: def debug(string): '''to stderr no color''' - sys.stderr.write('{}\n'.format(string)) + sys.stderr.write('[DBG] {}\n'.format(string)) def info(string): '''to stdout in color''' From 6e4c3784fb9e007ccffbf8a7238b193df8dc636a Mon Sep 17 00:00:00 2001 From: deadc0de6 Date: Sat, 28 Mar 2020 09:19:29 +0100 Subject: [PATCH 12/19] fix re-indexing and refactor debug output --- catcli/noder.py | 16 +++++++++------- catcli/walker.py | 12 ++++++------ 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/catcli/noder.py b/catcli/noder.py index a78b64f..c753eee 100644 --- a/catcli/noder.py +++ b/catcli/noder.py @@ -59,12 +59,14 @@ class Noder: def get_node(self, top, path, quiet=False): '''get the node by internal tree path''' + print(path) r = anytree.resolver.Resolver('name') try: - return r.get(top, path) + p = os.path.basename(path) + return r.get(top, p) except anytree.resolver.ChildResolverError: if not quiet: - Logger.err('No node at path \"{}\"'.format(path)) + Logger.err('No node at path \"{}\"'.format(p)) return None def get_node_if_changed(self, top, path, treepath): @@ -78,26 +80,26 @@ class Noder: node = self.get_node(top, treepath, quiet=True) # node does not exist if not node: - self._debug('change: node does not exist') + self._debug('\tchange: node does not exist') return None, True # force re-indexing if no maccess maccess = os.path.getmtime(path) if not self._has_attr(node, 'maccess') or \ not node.maccess: - self._debug('change: no maccess found') + self._debug('\tchange: no maccess found') return node, True # maccess changed old_maccess = node.maccess if float(maccess) != float(old_maccess): - self._debug('change: maccess changed for \"{}\"'.format(path)) + self._debug('\tchange: maccess changed for \"{}\"'.format(path)) return node, True # test hash if self.hash and node.md5: md5 = self._get_hash(path) if md5 != node.md5: - self._debug('change: checksum changed for \"{}\"'.format(path)) + self._debug('\tchange: checksum changed for \"{}\"'.format(path)) return node, True - self._debug('change: no change for \"{}\"'.format(path)) + self._debug('\tchange: no change for \"{}\"'.format(path)) return node, False def get_meta_node(self, top): diff --git a/catcli/walker.py b/catcli/walker.py index fd72723..73afbbb 100644 --- a/catcli/walker.py +++ b/catcli/walker.py @@ -75,7 +75,7 @@ class Walker: treepath = os.path.join(storagepath, f) reindex, n = self._need_reindex(parent, sub, treepath) if not reindex: - self._debug('\tignore file {}'.format(sub)) + self._debug('\tskip file {}'.format(sub)) self.noder.flag(n) continue self._debug('\tre-index file {}'.format(sub)) @@ -114,18 +114,18 @@ class Walker: ''' cnode, changed = self.noder.get_node_if_changed(top, path, treepath) if not cnode: - self._debug('{} does not exist'.format(path)) + self._debug('\t{} does not exist'.format(path)) return True, cnode if cnode and not changed: # ignore this node - self._debug('{} has not changed'.format(path)) + self._debug('\t{} has not changed'.format(path)) return False, cnode if cnode and changed: # remove this node and re-add - self._debug('{} has changed'.format(path)) - self._debug('removing node {} for {}'.format(cnode, path)) + self._debug('\t{} has changed'.format(path)) + self._debug('\tremoving node {} for {}'.format(cnode, path)) cnode.parent = None - self._debug('{} is to be re-indexed'.format(path)) + self._debug('\t{} is to be re-indexed'.format(path)) return True, cnode def _debug(self, string): From 818e4fa5e05a19b8b71aaf8c5d57f798e93c8bd3 Mon Sep 17 00:00:00 2001 From: deadc0de6 Date: Sat, 28 Mar 2020 09:31:04 +0100 Subject: [PATCH 13/19] ignore maccess change on directory for faster re-indexing --- catcli/noder.py | 3 ++- catcli/walker.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/catcli/noder.py b/catcli/noder.py index c753eee..60b5208 100644 --- a/catcli/noder.py +++ b/catcli/noder.py @@ -59,7 +59,6 @@ class Noder: def get_node(self, top, path, quiet=False): '''get the node by internal tree path''' - print(path) r = anytree.resolver.Resolver('name') try: p = os.path.basename(path) @@ -82,6 +81,8 @@ class Noder: if not node: self._debug('\tchange: node does not exist') return None, True + if os.path.isdir(path): + return node, False # force re-indexing if no maccess maccess = os.path.getmtime(path) if not self._has_attr(node, 'maccess') or \ diff --git a/catcli/walker.py b/catcli/walker.py index 73afbbb..626793b 100644 --- a/catcli/walker.py +++ b/catcli/walker.py @@ -123,7 +123,7 @@ class Walker: if cnode and changed: # remove this node and re-add self._debug('\t{} has changed'.format(path)) - self._debug('\tremoving node {} for {}'.format(cnode, path)) + self._debug('\tremoving node {} for {}'.format(cnode.name, path)) cnode.parent = None self._debug('\t{} is to be re-indexed'.format(path)) return True, cnode From 7d770611413f13da69c7d20a174b1b370fae3d25 Mon Sep 17 00:00:00 2001 From: deadc0de6 Date: Sat, 28 Mar 2020 09:33:05 +0100 Subject: [PATCH 14/19] fix pep8 --- catcli/noder.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/catcli/noder.py b/catcli/noder.py index 60b5208..e931682 100644 --- a/catcli/noder.py +++ b/catcli/noder.py @@ -98,7 +98,8 @@ class Noder: if self.hash and node.md5: md5 = self._get_hash(path) if md5 != node.md5: - self._debug('\tchange: checksum changed for \"{}\"'.format(path)) + m = '\tchange: checksum changed for \"{}\"'.format(path) + self._debug(m) return node, True self._debug('\tchange: no change for \"{}\"'.format(path)) return node, False From e10a65f91c86152435c92d5916ad63d74fd038b3 Mon Sep 17 00:00:00 2001 From: deadc0de6 Date: Sat, 28 Mar 2020 09:48:55 +0100 Subject: [PATCH 15/19] update feedback without verbose --- catcli/walker.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/catcli/walker.py b/catcli/walker.py index 626793b..f2db5bc 100644 --- a/catcli/walker.py +++ b/catcli/walker.py @@ -78,8 +78,7 @@ class Walker: self._debug('\tskip file {}'.format(sub)) self.noder.flag(n) continue - self._debug('\tre-index file {}'.format(sub)) - self._log(f) + Logger.out('- new file \"{}\"'.format(sub)) n = self.noder.file_node(os.path.basename(f), sub, parent, storagepath) self.noder.flag(n) @@ -91,7 +90,7 @@ class Walker: treepath = os.path.join(storagepath, d) reindex, dummy = self._need_reindex(parent, sub, treepath) if reindex: - self._debug('\tre-index directory {}'.format(sub)) + Logger.out('- new directory \"{}\"'.format(sub)) dummy = self.noder.dir_node(base, sub, parent, storagepath) cnt += 1 self.noder.flag(dummy) @@ -125,7 +124,7 @@ class Walker: self._debug('\t{} has changed'.format(path)) self._debug('\tremoving node {} for {}'.format(cnode.name, path)) cnode.parent = None - self._debug('\t{} is to be re-indexed'.format(path)) + Logger.out('- update \"{}\"'.format(path)) return True, cnode def _debug(self, string): From bf0ecf83ccacb7835613c70dba18faf8a71efa86 Mon Sep 17 00:00:00 2001 From: deadc0de6 Date: Sat, 28 Mar 2020 09:54:56 +0100 Subject: [PATCH 16/19] update tests --- catcli/walker.py | 5 ++--- tests/test_update.py | 26 ++++++++++++++++++++++++-- 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/catcli/walker.py b/catcli/walker.py index f2db5bc..df738a9 100644 --- a/catcli/walker.py +++ b/catcli/walker.py @@ -78,7 +78,7 @@ class Walker: self._debug('\tskip file {}'.format(sub)) self.noder.flag(n) continue - Logger.out('- new file \"{}\"'.format(sub)) + Logger.out('- update catalag for \"{}\"'.format(sub)) n = self.noder.file_node(os.path.basename(f), sub, parent, storagepath) self.noder.flag(n) @@ -90,7 +90,7 @@ class Walker: treepath = os.path.join(storagepath, d) reindex, dummy = self._need_reindex(parent, sub, treepath) if reindex: - Logger.out('- new directory \"{}\"'.format(sub)) + Logger.out('- update catalog for \"{}\"'.format(sub)) dummy = self.noder.dir_node(base, sub, parent, storagepath) cnt += 1 self.noder.flag(dummy) @@ -124,7 +124,6 @@ class Walker: self._debug('\t{} has changed'.format(path)) self._debug('\tremoving node {} for {}'.format(cnode.name, path)) cnode.parent = None - Logger.out('- update \"{}\"'.format(path)) return True, cnode def _debug(self, string): diff --git a/tests/test_update.py b/tests/test_update.py index ea05b33..c881862 100644 --- a/tests/test_update.py +++ b/tests/test_update.py @@ -41,6 +41,7 @@ class TestIndexing(unittest.TestCase): d1f1 = create_rnd_file(d1, 'dir1file1') d1f2 = create_rnd_file(d1, 'dir1file2') d2f1 = create_rnd_file(d2, 'dir2file1') + d2f2 = create_rnd_file(d2, 'dir2file2') noder = Noder(debug=True) noder.set_hashing(True) @@ -52,6 +53,8 @@ class TestIndexing(unittest.TestCase): self.assertTrue(f4_md5) d1f1_md5 = md5sum(d1f1) self.assertTrue(d1f1_md5) + d2f2_md5 = md5sum(d2f2) + self.assertTrue(d2f2_md5) # create fake args tmpdirname = 'tmpdir' @@ -93,13 +96,22 @@ class TestIndexing(unittest.TestCase): maccess = os.path.getmtime(f4) EDIT = 'edited' edit_file(f4, EDIT) - # reset edit time os.utime(f4, (maccess, maccess)) f4_md5_new = md5sum(d1f1) self.assertTrue(f4_md5_new) self.assertTrue(f4_md5_new != f4_md5) + # change file without mtime + maccess = os.path.getmtime(d2f2) + EDIT = 'edited' + edit_file(d2f2, EDIT) + # reset edit time + os.utime(d2f2, (maccess, maccess)) + d2f2_md5_new = md5sum(d2f2) + self.assertTrue(d2f2_md5_new) + self.assertTrue(d2f2_md5_new != d2f2_md5) + # update storage cmd_update(args, noder, catalog, top, debug=True) @@ -128,6 +140,14 @@ class TestIndexing(unittest.TestCase): self.assertTrue(nod.md5 != f4_md5) self.assertTrue(nod.md5 == f4_md5_new) + # ensure d2f2 md5 sum has changed in catalog + nods = noder.find_name(top, os.path.basename(d2f2)) + self.assertTrue(len(nods) == 1) + nod = nods[0] + self.assertTrue(nod) + self.assertTrue(nod.md5 != d2f2_md5) + self.assertTrue(nod.md5 == d2f2_md5_new) + # ensures files and directories are in names = [node.name for node in anytree.PreOrderIter(storage)] print(names) @@ -150,7 +170,7 @@ class TestIndexing(unittest.TestCase): if node.name == os.path.basename(d1): self.assertTrue(len(node.children) == 3) elif node.name == os.path.basename(d2): - self.assertTrue(len(node.children) == 2) + self.assertTrue(len(node.children) == 3) elif node.name == os.path.basename(new3): self.assertTrue(len(node.children) == 0) elif node.name == os.path.basename(new4): @@ -172,11 +192,13 @@ class TestIndexing(unittest.TestCase): self.assertTrue(os.path.basename(f1) in names) self.assertTrue(os.path.basename(f2) in names) self.assertTrue(os.path.basename(f3) in names) + self.assertTrue(os.path.basename(f4) in names) self.assertTrue(os.path.basename(d1) in names) self.assertTrue(os.path.basename(d1f1) not in names) self.assertTrue(os.path.basename(d1f2) in names) self.assertTrue(os.path.basename(d2) not in names) self.assertTrue(os.path.basename(d2f1) not in names) + self.assertTrue(os.path.basename(d2f1) not in names) self.assertTrue(os.path.basename(new1) in names) self.assertTrue(os.path.basename(new2) not in names) self.assertTrue(os.path.basename(new3) in names) From 4af351d1448e974e46c5ef8bbce014545a8325e7 Mon Sep 17 00:00:00 2001 From: deadc0de6 Date: Sat, 28 Mar 2020 15:14:23 +0100 Subject: [PATCH 17/19] doc --- catcli/catcli.py | 8 ++++---- catcli/noder.py | 10 +++++++++- catcli/walker.py | 20 +++++++++++++------- 3 files changed, 26 insertions(+), 12 deletions(-) diff --git a/catcli/catcli.py b/catcli/catcli.py index c8fe56c..7aa6a98 100755 --- a/catcli/catcli.py +++ b/catcli/catcli.py @@ -71,7 +71,7 @@ Options: def cmd_index(args, noder, catalog, top, debug=False): path = args[''] name = args[''] - nohash = not args['--hash'] + hash = args['--hash'] subsize = not args['--no-subsize'] if not os.path.exists(path): Logger.err('\"{}\" does not exist'.format(path)) @@ -87,7 +87,7 @@ def cmd_index(args, noder, catalog, top, debug=False): node = noder.get_storage_node(top, name) node.parent = None start = datetime.datetime.now() - walker = Walker(noder, nohash=nohash, debug=debug) + walker = Walker(noder, hash=hash, debug=debug) attr = noder.format_storage_attr(args['--meta']) root = noder.storage_node(name, path, parent=top, attr=attr) _, cnt = walker.index(path, root, name) @@ -102,7 +102,7 @@ def cmd_index(args, noder, catalog, top, debug=False): def cmd_update(args, noder, catalog, top, debug=False): path = args[''] name = args[''] - nohash = not args['--hash'] + hash = args['--hash'] subsize = not args['--no-subsize'] if not os.path.exists(path): Logger.err('\"{}\" does not exist'.format(path)) @@ -112,7 +112,7 @@ def cmd_update(args, noder, catalog, top, debug=False): Logger.err('storage named \"{}\" does not exist'.format(name)) return start = datetime.datetime.now() - walker = Walker(noder, nohash=nohash, debug=debug) + walker = Walker(noder, hash=hash, debug=debug) cnt = walker.reindex(path, root, top) if subsize: noder.rec_size(root) diff --git a/catcli/noder.py b/catcli/noder.py index e931682..5677c7a 100644 --- a/catcli/noder.py +++ b/catcli/noder.py @@ -37,6 +37,11 @@ class Noder: TYPE_META = 'meta' def __init__(self, debug=False, sortsize=False, arc=False): + ''' + @debug: debug mode + @sortsize: sort nodes by size + @arch: handle archive + ''' self.hash = True self.debug = debug self.sortsize = sortsize @@ -222,6 +227,7 @@ class Noder: return cnt def flag(self, node): + '''flag a node''' node.flag = True def _clean(self, node): @@ -416,7 +422,7 @@ class Noder: return found ############################################################### - # tree creationg + # tree creation ############################################################### def _add_entry(self, name, top, resolv): '''add an entry to the tree''' @@ -449,6 +455,7 @@ class Noder: return sorted(items, key=self._sort, reverse=self.sortsize) def _sort(self, x): + '''sort a list''' if self.sortsize: return self._sort_size(x) return self._sort_fs(x) @@ -487,6 +494,7 @@ class Noder: return utils.md5sum(path) def _debug(self, string): + '''print debug''' if not self.debug: return Logger.debug(string) diff --git a/catcli/walker.py b/catcli/walker.py index df738a9..45f0bad 100644 --- a/catcli/walker.py +++ b/catcli/walker.py @@ -15,10 +15,15 @@ class Walker: MAXLINE = 80 - 15 - def __init__(self, noder, nohash=False, debug=False): + def __init__(self, noder, hash=True, debug=False): + ''' + @noder: the noder to use + @hash: calculate hash of nodes + @debug: debug mode + ''' self.noder = noder - self.nohash = nohash - self.noder.set_hashing(not self.nohash) + self.hash = hash + self.noder.set_hashing(self.hash) self.debug = debug def index(self, path, parent, name, storagepath=''): @@ -32,7 +37,7 @@ class Walker: for f in files: self._debug('found file {} under {}'.format(f, path)) sub = os.path.join(root, f) - self._log(f) + self._progress(f) self._debug('index file {}'.format(sub)) self.noder.file_node(os.path.basename(f), sub, parent, storagepath) @@ -50,7 +55,7 @@ class Walker: _, cnt2 = self.index(sub, dummy, base, nstoragepath) cnt += cnt2 break - self._log(None) + self._progress(None) return parent, cnt def reindex(self, path, parent, top): @@ -101,7 +106,6 @@ class Walker: cnt2 = self._reindex(sub, dummy, top, nstoragepath) cnt += cnt2 break - self._log(None) return cnt def _need_reindex(self, top, path, treepath): @@ -127,11 +131,13 @@ class Walker: return True, cnode def _debug(self, string): + '''print to debug''' if not self.debug: return Logger.debug(string) - def _log(self, string): + def _progress(self, string): + '''print progress''' if self.debug: return if not string: From c2510924b1a67239f95b9fa0a8cd97b649130dcd Mon Sep 17 00:00:00 2001 From: deadc0de6 Date: Sun, 29 Mar 2020 10:40:34 +0200 Subject: [PATCH 18/19] log updates to file for #10 --- catcli/catcli.py | 37 ++++++++++++++++++++----------------- catcli/logger.py | 7 +++++++ catcli/walker.py | 16 +++++++++++++--- tests/test_update.py | 3 ++- 4 files changed, 42 insertions(+), 21 deletions(-) diff --git a/catcli/catcli.py b/catcli/catcli.py index 7aa6a98..9f41c9c 100755 --- a/catcli/catcli.py +++ b/catcli/catcli.py @@ -37,7 +37,7 @@ USAGE = """ Usage: {1} index [--catalog=] [--meta=...] [-acfnV] - {1} update [--catalog=] [-acfnV] + {1} update [--catalog=] [-acfnV] [--lpath=] {1} ls [--catalog=] [-arVS] [] {1} find [--catalog=] [-abdVP] [--path=] {1} rm [--catalog=] [-fV] @@ -50,21 +50,22 @@ Usage: {1} --version Options: - --catalog= Path to the catalog [default: {2}]. - --meta= Additional attribute to store [default: ]. - -p --path= Start path. - -n --no-subsize Do not store size of directories [default: False]. - -a --archive Handle archive file [default: False]. - -f --force Do not ask when updating the catalog [default: False]. - -d --directory Only directory (default: False). - -b --script Output script to manage found file(s) [default: False]. - -S --sortsize Sort by size, largest first [default: False]. - -c --hash Calculate md5 hash [default: False]. - -r --recursive Recursive [default: False]. - -P --parent Ignore stored relpath [default: True]. - -V --verbose Be verbose [default: False]. - -v --version Show version. - -h --help Show this screen. + --catalog= Path to the catalog [default: {2}]. + --meta= Additional attribute to store [default: ]. + -p --path= Start path. + -l --lpath= Path where changes are logged [default: ] + -n --no-subsize Do not store size of directories [default: False]. + -a --archive Handle archive file [default: False]. + -f --force Do not ask when updating the catalog [default: False]. + -d --directory Only directory (default: False). + -b --script Output script to manage found file(s) [default: False]. + -S --sortsize Sort by size, largest first [default: False]. + -c --hash Calculate md5 hash [default: False]. + -r --recursive Recursive [default: False]. + -P --parent Ignore stored relpath [default: True]. + -V --verbose Be verbose [default: False]. + -v --version Show version. + -h --help Show this screen. """.format(BANNER, NAME, CATALOGPATH) @@ -103,6 +104,7 @@ def cmd_update(args, noder, catalog, top, debug=False): path = args[''] name = args[''] hash = args['--hash'] + logpath = args['--lpath'] subsize = not args['--no-subsize'] if not os.path.exists(path): Logger.err('\"{}\" does not exist'.format(path)) @@ -112,7 +114,8 @@ def cmd_update(args, noder, catalog, top, debug=False): Logger.err('storage named \"{}\" does not exist'.format(name)) return start = datetime.datetime.now() - walker = Walker(noder, hash=hash, debug=debug) + walker = Walker(noder, hash=hash, debug=debug, + logpath=logpath) cnt = walker.reindex(path, root, top) if subsize: noder.rec_size(root) diff --git a/catcli/logger.py b/catcli/logger.py index a056d01..64ded23 100644 --- a/catcli/logger.py +++ b/catcli/logger.py @@ -96,3 +96,10 @@ class Logger: def bold(string): '''make it bold''' return '{}{}{}'.format(Logger.BOLD, string, Logger.RESET) + + def flog(path, string, append=True): + mode = 'w' + if append: + mode = 'a' + with open(path, mode) as f: + f.write(string) diff --git a/catcli/walker.py b/catcli/walker.py index 45f0bad..ebc6fbb 100644 --- a/catcli/walker.py +++ b/catcli/walker.py @@ -15,16 +15,19 @@ class Walker: MAXLINE = 80 - 15 - def __init__(self, noder, hash=True, debug=False): + def __init__(self, noder, hash=True, debug=False, + logpath=None): ''' @noder: the noder to use @hash: calculate hash of nodes @debug: debug mode + @logpath: path where to log catalog changes on reindex ''' self.noder = noder self.hash = hash self.noder.set_hashing(self.hash) self.debug = debug + self.lpath = logpath def index(self, path, parent, name, storagepath=''): '''index a directory and store in tree''' @@ -83,7 +86,7 @@ class Walker: self._debug('\tskip file {}'.format(sub)) self.noder.flag(n) continue - Logger.out('- update catalag for \"{}\"'.format(sub)) + self._log2file('update catalog for \"{}\"'.format(sub)) n = self.noder.file_node(os.path.basename(f), sub, parent, storagepath) self.noder.flag(n) @@ -95,7 +98,7 @@ class Walker: treepath = os.path.join(storagepath, d) reindex, dummy = self._need_reindex(parent, sub, treepath) if reindex: - Logger.out('- update catalog for \"{}\"'.format(sub)) + self._log2file('update catalog for \"{}\"'.format(sub)) dummy = self.noder.dir_node(base, sub, parent, storagepath) cnt += 1 self.noder.flag(dummy) @@ -147,3 +150,10 @@ class Walker: if len(string) > self.MAXLINE: string = string[:self.MAXLINE] + '...' Logger.progr('indexing: {:80}'.format(string)) + + def _log2file(self, string): + '''log to file''' + if not self.lpath: + return + line = '{}\n'.format(string) + Logger.flog(self.lpath, line, append=True) diff --git a/tests/test_update.py b/tests/test_update.py index c881862..7bcdb66 100644 --- a/tests/test_update.py +++ b/tests/test_update.py @@ -60,7 +60,8 @@ class TestIndexing(unittest.TestCase): tmpdirname = 'tmpdir' args = {'': dirpath, '': tmpdirname, '--hash': True, '--meta': ['some meta'], - '--no-subsize': False, '--verbose': True} + '--no-subsize': False, '--verbose': True, + '--lpath': None} # index the directory unix_tree(dirpath) From 09d86f0902e910d1352aa80b59b8b0e06460886a Mon Sep 17 00:00:00 2001 From: deadc0de6 Date: Sun, 29 Mar 2020 10:45:53 +0200 Subject: [PATCH 19/19] refactoring --- catcli/catcli.py | 12 +++++++----- catcli/noder.py | 19 ++++++++++--------- tests/test_update.py | 6 +++--- 3 files changed, 20 insertions(+), 17 deletions(-) diff --git a/catcli/catcli.py b/catcli/catcli.py index 9f41c9c..401a610 100755 --- a/catcli/catcli.py +++ b/catcli/catcli.py @@ -69,10 +69,11 @@ Options: """.format(BANNER, NAME, CATALOGPATH) -def cmd_index(args, noder, catalog, top, debug=False): +def cmd_index(args, noder, catalog, top): path = args[''] name = args[''] hash = args['--hash'] + debug = args['--verbose'] subsize = not args['--no-subsize'] if not os.path.exists(path): Logger.err('\"{}\" does not exist'.format(path)) @@ -100,11 +101,12 @@ def cmd_index(args, noder, catalog, top, debug=False): catalog.save(top) -def cmd_update(args, noder, catalog, top, debug=False): +def cmd_update(args, noder, catalog, top): path = args[''] name = args[''] hash = args['--hash'] logpath = args['--lpath'] + debug = args['--verbose'] subsize = not args['--no-subsize'] if not os.path.exists(path): Logger.err('\"{}\" does not exist'.format(path)) @@ -244,14 +246,14 @@ def main(): top = noder.new_top_node() # handle the meta node - meta = noder.update_metanode(noder.get_meta_node(top)) + meta = noder.update_metanode(top) catalog.set_metanode(meta) # parse command if args['index']: - cmd_index(args, noder, catalog, top, debug=args['--verbose']) + cmd_index(args, noder, catalog, top) if args['update']: - cmd_update(args, noder, catalog, top, debug=args['--verbose']) + cmd_update(args, noder, catalog, top) elif args['find']: cmd_find(args, noder, top) elif args['tree']: diff --git a/catcli/noder.py b/catcli/noder.py index 5677c7a..1043b1d 100644 --- a/catcli/noder.py +++ b/catcli/noder.py @@ -109,14 +109,6 @@ class Noder: self._debug('\tchange: no change for \"{}\"'.format(path)) return node, False - def get_meta_node(self, top): - '''return the meta node if any''' - try: - return next(filter(lambda x: x.type == self.TYPE_META, - top.children)) - except StopIteration: - return None - def _rec_size(self, node, store=True): ''' recursively traverse tree and return size @@ -169,8 +161,9 @@ class Noder: '''create a new top node''' return anytree.AnyNode(name=self.TOPNAME, type=self.TYPE_TOP) - def update_metanode(self, meta): + def update_metanode(self, top): '''create or update meta node information''' + meta = self._get_meta_node(top) epoch = int(time.time()) if not meta: attr = {} @@ -182,6 +175,14 @@ class Noder: meta.attr['access_version'] = VERSION return meta + def _get_meta_node(self, top): + '''return the meta node if any''' + try: + return next(filter(lambda x: x.type == self.TYPE_META, + top.children)) + except StopIteration: + return None + def file_node(self, name, path, parent, storagepath): '''create a new node representing a file''' if not os.path.exists(path): diff --git a/tests/test_update.py b/tests/test_update.py index 7bcdb66..5d4d4de 100644 --- a/tests/test_update.py +++ b/tests/test_update.py @@ -65,7 +65,7 @@ class TestIndexing(unittest.TestCase): # index the directory unix_tree(dirpath) - cmd_index(args, noder, catalog, top, debug=True) + cmd_index(args, noder, catalog, top) self.assertTrue(os.stat(catalogpath).st_size != 0) # ensure md5 sum are in @@ -114,7 +114,7 @@ class TestIndexing(unittest.TestCase): self.assertTrue(d2f2_md5_new != d2f2_md5) # update storage - cmd_update(args, noder, catalog, top, debug=True) + cmd_update(args, noder, catalog, top) # print catalog # print(read_from_file(catalogpath)) @@ -185,7 +185,7 @@ class TestIndexing(unittest.TestCase): clean(new4) # update storage - cmd_update(args, noder, catalog, top, debug=True) + cmd_update(args, noder, catalog, top) # ensures files and directories are (not) in names = [node.name for node in anytree.PreOrderIter(storage)]