merge hashdiff

pull/19/head
deadc0de6 4 years ago
commit 76eab12499

@ -200,8 +200,9 @@ Storage entry can be edited with following catcli commands:
## Update catalog ## Update catalog
The catalog can be updated with the `update` command. The catalog can be updated with the `update` command.
Updates are based on the access time of each of the files. If using Updates are based on the access time of each of the files and on the
`-c --hash`, only new files are re-hashed. hash checksum if present (catalog was indexed with `-c --hash` and
`update` is called with the switch `-c --hash`).
# Examples # Examples

@ -17,10 +17,16 @@ from catcli.logger import Logger
class Catalog: class Catalog:
def __init__(self, path, pickle=False, verbose=False, force=False): def __init__(self, path, pickle=False, debug=False, force=False):
self.path = path # catalog path '''
self.verbose = verbose # verbosity @path: catalog path
self.force = force # force overwrite if exists @pickle: use pickle
@debug: debug mode
@force: force overwrite if exists
'''
self.path = path
self.debug = debug
self.force = force
self.metanode = None self.metanode = None
self.pickle = pickle self.pickle = pickle
@ -60,19 +66,22 @@ class Catalog:
return self._save_pickle(node) return self._save_pickle(node)
return self._save_json(node) return self._save_json(node)
def _debug(self, text):
if not self.debug:
return
Logger.debug(text)
def _save_pickle(self, node): def _save_pickle(self, node):
'''pickle the catalog''' '''pickle the catalog'''
pickle.dump(node, open(self.path, 'wb')) pickle.dump(node, open(self.path, 'wb'))
if self.verbose: self._debug('Catalog saved to pickle \"{}\"'.format(self.path))
Logger.info('Catalog saved to pickle \"{}\"'.format(self.path))
return True return True
def _restore_pickle(self): def _restore_pickle(self):
'''restore the pickled tree''' '''restore the pickled tree'''
root = pickle.load(open(self.path, 'rb')) root = pickle.load(open(self.path, 'rb'))
if self.verbose: m = 'Catalog imported from pickle \"{}\"'.format(self.path)
m = 'Catalog imported from pickle \"{}\"'.format(self.path) self._debug(m)
Logger.info(m)
return root return root
def _save_json(self, node): def _save_json(self, node):
@ -80,14 +89,12 @@ class Catalog:
exp = JsonExporter(indent=2, sort_keys=True) exp = JsonExporter(indent=2, sort_keys=True)
with open(self.path, 'w') as f: with open(self.path, 'w') as f:
exp.write(node, f) exp.write(node, f)
if self.verbose: self._debug('Catalog saved to json \"{}\"'.format(self.path))
Logger.info('Catalog saved to json \"{}\"'.format(self.path))
return True return True
def _restore_json(self, string): def _restore_json(self, string):
'''restore the tree from json''' '''restore the tree from json'''
imp = JsonImporter() imp = JsonImporter()
root = imp.import_(string) root = imp.import_(string)
if self.verbose: self._debug('Catalog imported from json \"{}\"'.format(self.path))
Logger.info('Catalog imported from json \"{}\"'.format(self.path))
return root return root

@ -37,7 +37,7 @@ USAGE = """
Usage: Usage:
{1} index [--catalog=<path>] [--meta=<meta>...] [-acfnV] <name> <path> {1} index [--catalog=<path>] [--meta=<meta>...] [-acfnV] <name> <path>
{1} update [--catalog=<path>] [-acfnV] <name> <path> {1} update [--catalog=<path>] [-acfnV] [--lpath=<path>] <name> <path>
{1} ls [--catalog=<path>] [-arVS] [<path>] {1} ls [--catalog=<path>] [-arVS] [<path>]
{1} find [--catalog=<path>] [-abdVP] [--path=<path>] <term> {1} find [--catalog=<path>] [-abdVP] [--path=<path>] <term>
{1} rm [--catalog=<path>] [-fV] <storage> {1} rm [--catalog=<path>] [-fV] <storage>
@ -50,28 +50,30 @@ Usage:
{1} --version {1} --version
Options: Options:
--catalog=<path> Path to the catalog [default: {2}]. --catalog=<path> Path to the catalog [default: {2}].
--meta=<meta> Additional attribute to store [default: ]. --meta=<meta> Additional attribute to store [default: ].
-p --path=<path> Start path. -p --path=<path> Start path.
-n --no-subsize Do not store size of directories [default: False]. -l --lpath=<path> Path where changes are logged [default: ]
-a --archive Handle archive file [default: False]. -n --no-subsize Do not store size of directories [default: False].
-f --force Do not ask when updating the catalog [default: False]. -a --archive Handle archive file [default: False].
-d --directory Only directory (default: False). -f --force Do not ask when updating the catalog [default: False].
-b --script Output script to manage found file(s) [default: False]. -d --directory Only directory (default: False).
-S --sortsize Sort by size, largest first [default: False]. -b --script Output script to manage found file(s) [default: False].
-c --hash Calculate md5 hash [default: False]. -S --sortsize Sort by size, largest first [default: False].
-r --recursive Recursive [default: False]. -c --hash Calculate md5 hash [default: False].
-P --parent Ignore stored relpath [default: True]. -r --recursive Recursive [default: False].
-V --verbose Be verbose [default: False]. -P --parent Ignore stored relpath [default: True].
-v --version Show version. -V --verbose Be verbose [default: False].
-h --help Show this screen. -v --version Show version.
-h --help Show this screen.
""".format(BANNER, NAME, CATALOGPATH) """.format(BANNER, NAME, CATALOGPATH)
def cmd_index(args, noder, catalog, top, debug=False): def cmd_index(args, noder, catalog, top):
path = args['<path>'] path = args['<path>']
name = args['<name>'] name = args['<name>']
nohash = not args['--hash'] hash = args['--hash']
debug = args['--verbose']
subsize = not args['--no-subsize'] subsize = not args['--no-subsize']
if not os.path.exists(path): if not os.path.exists(path):
Logger.err('\"{}\" does not exist'.format(path)) Logger.err('\"{}\" does not exist'.format(path))
@ -87,7 +89,7 @@ def cmd_index(args, noder, catalog, top, debug=False):
node = noder.get_storage_node(top, name) node = noder.get_storage_node(top, name)
node.parent = None node.parent = None
start = datetime.datetime.now() start = datetime.datetime.now()
walker = Walker(noder, nohash=nohash, debug=debug) walker = Walker(noder, hash=hash, debug=debug)
attr = noder.format_storage_attr(args['--meta']) attr = noder.format_storage_attr(args['--meta'])
root = noder.storage_node(name, path, parent=top, attr=attr) root = noder.storage_node(name, path, parent=top, attr=attr)
_, cnt = walker.index(path, root, name) _, cnt = walker.index(path, root, name)
@ -99,10 +101,12 @@ def cmd_index(args, noder, catalog, top, debug=False):
catalog.save(top) catalog.save(top)
def cmd_update(args, noder, catalog, top, debug=False): def cmd_update(args, noder, catalog, top):
path = args['<path>'] path = args['<path>']
name = args['<name>'] name = args['<name>']
nohash = not args['--hash'] hash = args['--hash']
logpath = args['--lpath']
debug = args['--verbose']
subsize = not args['--no-subsize'] subsize = not args['--no-subsize']
if not os.path.exists(path): if not os.path.exists(path):
Logger.err('\"{}\" does not exist'.format(path)) Logger.err('\"{}\" does not exist'.format(path))
@ -112,7 +116,8 @@ def cmd_update(args, noder, catalog, top, debug=False):
Logger.err('storage named \"{}\" does not exist'.format(name)) Logger.err('storage named \"{}\" does not exist'.format(name))
return return
start = datetime.datetime.now() start = datetime.datetime.now()
walker = Walker(noder, nohash=nohash, debug=debug) walker = Walker(noder, hash=hash, debug=debug,
logpath=logpath)
cnt = walker.reindex(path, root, top) cnt = walker.reindex(path, root, top)
if subsize: if subsize:
noder.rec_size(root) noder.rec_size(root)
@ -212,8 +217,8 @@ def cmd_edit(args, noder, catalog, top):
def banner(): def banner():
Logger.log(BANNER) Logger.out(BANNER)
Logger.log("") Logger.out("")
def main(): def main():
@ -230,10 +235,10 @@ def main():
banner() banner()
# init noder # init noder
noder = Noder(verbose=args['--verbose'], sortsize=args['--sortsize'], noder = Noder(debug=args['--verbose'], sortsize=args['--sortsize'],
arc=args['--archive']) arc=args['--archive'])
# init catalog # init catalog
catalog = Catalog(args['--catalog'], verbose=args['--verbose'], catalog = Catalog(args['--catalog'], debug=args['--verbose'],
force=args['--force']) force=args['--force'])
# init top node # init top node
top = catalog.restore() top = catalog.restore()
@ -241,14 +246,14 @@ def main():
top = noder.new_top_node() top = noder.new_top_node()
# handle the meta node # handle the meta node
meta = noder.update_metanode(noder.get_meta_node(top)) meta = noder.update_metanode(top)
catalog.set_metanode(meta) catalog.set_metanode(meta)
# parse command # parse command
if args['index']: if args['index']:
cmd_index(args, noder, catalog, top, debug=args['--verbose']) cmd_index(args, noder, catalog, top)
if args['update']: if args['update']:
cmd_update(args, noder, catalog, top, debug=args['--verbose']) cmd_update(args, noder, catalog, top)
elif args['find']: elif args['find']:
cmd_find(args, noder, top) cmd_find(args, noder, top)
elif args['tree']: elif args['tree']:

@ -71,17 +71,17 @@ class Logger:
# generic output # generic output
###################################################################### ######################################################################
def out(string): def out(string):
'''to stdout''' '''to stdout no color'''
sys.stdout.write('{}\n'.format(string)) sys.stdout.write('{}\n'.format(string))
def log(string): def debug(string):
'''to stderr''' '''to stderr no color'''
sys.stderr.write('{}\n'.format(string)) sys.stderr.write('[DBG] {}\n'.format(string))
def info(string): def info(string):
'''to stderr in color''' '''to stdout in color'''
s = '{}{}{}'.format(Logger.MAGENTA, string, Logger.RESET) s = '{}{}{}'.format(Logger.MAGENTA, string, Logger.RESET)
sys.stderr.write('{}\n'.format(s)) sys.stdout.write('{}\n'.format(s))
def err(string): def err(string):
'''to stderr in RED''' '''to stderr in RED'''
@ -96,3 +96,10 @@ class Logger:
def bold(string): def bold(string):
'''make it bold''' '''make it bold'''
return '{}{}{}'.format(Logger.BOLD, string, Logger.RESET) return '{}{}{}'.format(Logger.BOLD, string, Logger.RESET)
def flog(path, string, append=True):
mode = 'w'
if append:
mode = 'a'
with open(path, mode) as f:
f.write(string)

@ -36,9 +36,14 @@ class Noder:
TYPE_STORAGE = 'storage' TYPE_STORAGE = 'storage'
TYPE_META = 'meta' TYPE_META = 'meta'
def __init__(self, verbose=False, sortsize=False, arc=False): def __init__(self, debug=False, sortsize=False, arc=False):
'''
@debug: debug mode
@sortsize: sort nodes by size
@arch: handle archive
'''
self.hash = True self.hash = True
self.verbose = verbose self.debug = debug
self.sortsize = sortsize self.sortsize = sortsize
self.arc = arc self.arc = arc
if self.arc: if self.arc:
@ -61,45 +66,59 @@ class Noder:
'''get the node by internal tree path''' '''get the node by internal tree path'''
r = anytree.resolver.Resolver('name') r = anytree.resolver.Resolver('name')
try: try:
return r.get(top, path) p = os.path.basename(path)
return r.get(top, p)
except anytree.resolver.ChildResolverError: except anytree.resolver.ChildResolverError:
if not quiet: if not quiet:
Logger.err('No node at path \"{}\"'.format(path)) Logger.err('No node at path \"{}\"'.format(p))
return None return None
def get_node_if_newer(self, top, path, maccess): def get_node_if_changed(self, top, path, treepath):
'''return the node (if any) and if path is newer''' '''
treepath = path.lstrip(os.sep) return the node (if any) and if it has changed
@top: top node (storage)
@path: abs path to file
@treepath: rel path from indexed directory
'''
treepath = treepath.lstrip(os.sep)
node = self.get_node(top, treepath, quiet=True) node = self.get_node(top, treepath, quiet=True)
# node does not exist
if not node: if not node:
# node does not exist self._debug('\tchange: node does not exist')
return None, True return None, True
if os.path.isdir(path):
return node, False
# force re-indexing if no maccess
maccess = os.path.getmtime(path)
if not self._has_attr(node, 'maccess') or \ if not self._has_attr(node, 'maccess') or \
not node.maccess: not node.maccess:
# force re-indexing if no maccess self._debug('\tchange: no maccess found')
return node, True return node, True
# maccess changed
old_maccess = node.maccess old_maccess = node.maccess
if float(maccess) > float(old_maccess): if float(maccess) != float(old_maccess):
self._debug('\tchange: maccess changed for \"{}\"'.format(path))
return node, True return node, True
# test hash
if self.hash and node.md5:
md5 = self._get_hash(path)
if md5 != node.md5:
m = '\tchange: checksum changed for \"{}\"'.format(path)
self._debug(m)
return node, True
self._debug('\tchange: no change for \"{}\"'.format(path))
return node, False return node, False
def get_meta_node(self, top):
'''return the meta node if any'''
try:
return next(filter(lambda x: x.type == self.TYPE_META,
top.children))
except StopIteration:
return None
def _rec_size(self, node, store=True): def _rec_size(self, node, store=True):
''' '''
recursively traverse tree and return size recursively traverse tree and return size
@store: store the size in the node @store: store the size in the node
''' '''
if self.verbose:
Logger.info('getting node size recursively')
if node.type == self.TYPE_FILE: if node.type == self.TYPE_FILE:
self._debug('getting node size for \"{}\"'.format(node.name))
return node.size return node.size
m = 'getting node size recursively for \"{}\"'.format(node.name)
self._debug(m)
size = 0 size = 0
for i in node.children: for i in node.children:
if node.type == self.TYPE_DIR: if node.type == self.TYPE_DIR:
@ -142,8 +161,9 @@ class Noder:
'''create a new top node''' '''create a new top node'''
return anytree.AnyNode(name=self.TOPNAME, type=self.TYPE_TOP) return anytree.AnyNode(name=self.TOPNAME, type=self.TYPE_TOP)
def update_metanode(self, meta): def update_metanode(self, top):
'''create or update meta node information''' '''create or update meta node information'''
meta = self._get_meta_node(top)
epoch = int(time.time()) epoch = int(time.time())
if not meta: if not meta:
attr = {} attr = {}
@ -155,6 +175,14 @@ class Noder:
meta.attr['access_version'] = VERSION meta.attr['access_version'] = VERSION
return meta return meta
def _get_meta_node(self, top):
'''return the meta node if any'''
try:
return next(filter(lambda x: x.type == self.TYPE_META,
top.children))
except StopIteration:
return None
def file_node(self, name, path, parent, storagepath): def file_node(self, name, path, parent, storagepath):
'''create a new node representing a file''' '''create a new node representing a file'''
if not os.path.exists(path): if not os.path.exists(path):
@ -168,7 +196,7 @@ class Noder:
return None return None
md5 = None md5 = None
if self.hash: if self.hash:
md5 = utils.md5sum(path) md5 = self._get_hash(path)
relpath = os.sep.join([storagepath, name]) relpath = os.sep.join([storagepath, name])
maccess = os.path.getmtime(path) maccess = os.path.getmtime(path)
@ -200,6 +228,7 @@ class Noder:
return cnt return cnt
def flag(self, node): def flag(self, node):
'''flag a node'''
node.flag = True node.flag = True
def _clean(self, node): def _clean(self, node):
@ -337,8 +366,7 @@ class Noder:
script=False, directory=False, script=False, directory=False,
startpath=None, parentfromtree=False): startpath=None, parentfromtree=False):
'''find files based on their names''' '''find files based on their names'''
if self.verbose: self._debug('searching for \"{}\"'.format(key))
Logger.info('searching for \"{}\"'.format(key))
start = root start = root
if startpath: if startpath:
start = self.get_node(root, startpath) start = self.get_node(root, startpath)
@ -375,8 +403,7 @@ class Noder:
############################################################### ###############################################################
def walk(self, root, path, rec=False): def walk(self, root, path, rec=False):
'''walk the tree for ls based on names''' '''walk the tree for ls based on names'''
if self.verbose: self._debug('walking path: \"{}\"'.format(path))
Logger.info('walking path: \"{}\"'.format(path))
r = anytree.resolver.Resolver('name') r = anytree.resolver.Resolver('name')
found = [] found = []
try: try:
@ -396,7 +423,7 @@ class Noder:
return found return found
############################################################### ###############################################################
# tree creationg # tree creation
############################################################### ###############################################################
def _add_entry(self, name, top, resolv): def _add_entry(self, name, top, resolv):
'''add an entry to the tree''' '''add an entry to the tree'''
@ -429,6 +456,7 @@ class Noder:
return sorted(items, key=self._sort, reverse=self.sortsize) return sorted(items, key=self._sort, reverse=self.sortsize)
def _sort(self, x): def _sort(self, x):
'''sort a list'''
if self.sortsize: if self.sortsize:
return self._sort_size(x) return self._sort_size(x)
return self._sort_fs(x) return self._sort_fs(x)
@ -461,3 +489,13 @@ class Noder:
if parent: if parent:
return os.sep.join([parent, node.name]) return os.sep.join([parent, node.name])
return node.name return node.name
def _get_hash(self, path):
"""return md5 hash of node"""
return utils.md5sum(path)
def _debug(self, string):
'''print debug'''
if not self.debug:
return
Logger.debug(string)

@ -19,7 +19,7 @@ def md5sum(path):
'''calculate md5 sum of a file''' '''calculate md5 sum of a file'''
p = os.path.realpath(path) p = os.path.realpath(path)
if not os.path.exists(p): if not os.path.exists(p):
Logger.err('\nunable to get md5sum on {}'.format(path)) Logger.err('\nmd5sum - file does not exist: {}'.format(p))
return None return None
try: try:
with open(p, mode='rb') as f: with open(p, mode='rb') as f:
@ -32,6 +32,8 @@ def md5sum(path):
return d.hexdigest() return d.hexdigest()
except PermissionError: except PermissionError:
pass pass
except OSError as e:
Logger.err('md5sum error: {}'.format(e))
return None return None

@ -15,10 +15,19 @@ class Walker:
MAXLINE = 80 - 15 MAXLINE = 80 - 15
def __init__(self, noder, nohash=False, debug=False): def __init__(self, noder, hash=True, debug=False,
logpath=None):
'''
@noder: the noder to use
@hash: calculate hash of nodes
@debug: debug mode
@logpath: path where to log catalog changes on reindex
'''
self.noder = noder self.noder = noder
self.noder.set_hashing(not nohash) self.hash = hash
self.noder.set_hashing(self.hash)
self.debug = debug self.debug = debug
self.lpath = logpath
def index(self, path, parent, name, storagepath=''): def index(self, path, parent, name, storagepath=''):
''' '''
@ -44,7 +53,7 @@ class Walker:
sub = os.path.join(root, f) sub = os.path.join(root, f)
if not os.path.exists(sub): if not os.path.exists(sub):
continue continue
self._log(f) self._progress(f)
self._debug('index file {}'.format(sub)) self._debug('index file {}'.format(sub))
n = self.noder.file_node(os.path.basename(f), sub, n = self.noder.file_node(os.path.basename(f), sub,
parent, storagepath) parent, storagepath)
@ -67,43 +76,47 @@ class Walker:
_, cnt2 = self.index(sub, dummy, base, nstoragepath) _, cnt2 = self.index(sub, dummy, base, nstoragepath)
cnt += cnt2 cnt += cnt2
break break
self._log(None) self._progress(None)
return parent, cnt return parent, cnt
def reindex(self, path, parent, top): def reindex(self, path, parent, top):
'''reindex a directory and store in tree''' '''reindex a directory and store in tree'''
cnt = self._reindex(path, parent, top, '') cnt = self._reindex(path, parent, top)
cnt += self.noder.clean_not_flagged(parent) cnt += self.noder.clean_not_flagged(parent)
return cnt return cnt
def _reindex(self, path, parent, top, storagepath): def _reindex(self, path, parent, top, storagepath=''):
'''reindex a directory and store in tree''' '''
reindex a directory and store in tree
@path: directory path to re-index
@top: top node (storage)
@storagepath: rel path relative to indexed directory
'''
self._debug('reindexing starting at {}'.format(path)) self._debug('reindexing starting at {}'.format(path))
cnt = 0 cnt = 0
for (root, dirs, files) in os.walk(path): for (root, dirs, files) in os.walk(path):
for f in files: for f in files:
self._debug('found file {} under {}'.format(f, path)) self._debug('found file \"{}\" under {}'.format(f, path))
sub = os.path.join(root, f) sub = os.path.join(root, f)
maccess = os.path.getmtime(sub) treepath = os.path.join(storagepath, f)
need_reindex, n = self._need_reindex(parent, f, maccess) reindex, n = self._need_reindex(parent, sub, treepath)
if not need_reindex: if not reindex:
self._debug('\tignore file {}'.format(sub)) self._debug('\tskip file {}'.format(sub))
self.noder.flag(n) self.noder.flag(n)
continue continue
self._debug('\tre-index file {}'.format(sub)) self._log2file('update catalog for \"{}\"'.format(sub))
self._log(f)
n = self.noder.file_node(os.path.basename(f), sub, n = self.noder.file_node(os.path.basename(f), sub,
parent, storagepath) parent, storagepath)
self.noder.flag(n) self.noder.flag(n)
cnt += 1 cnt += 1
for d in dirs: for d in dirs:
self._debug('found dir {} under {}'.format(d, path)) self._debug('found dir \"{}\" under {}'.format(d, path))
base = os.path.basename(d) base = os.path.basename(d)
sub = os.path.join(root, d) sub = os.path.join(root, d)
maccess = os.path.getmtime(sub) treepath = os.path.join(storagepath, d)
need_reindex, dummy = self._need_reindex(parent, base, maccess) reindex, dummy = self._need_reindex(parent, sub, treepath)
if need_reindex: if reindex:
self._debug('\tre-index directory {}'.format(sub)) self._log2file('update catalog for \"{}\"'.format(sub))
dummy = self.noder.dir_node(base, sub, parent, storagepath) dummy = self.noder.dir_node(base, sub, parent, storagepath)
cnt += 1 cnt += 1
self.noder.flag(dummy) self.noder.flag(dummy)
@ -114,33 +127,38 @@ class Walker:
cnt2 = self._reindex(sub, dummy, top, nstoragepath) cnt2 = self._reindex(sub, dummy, top, nstoragepath)
cnt += cnt2 cnt += cnt2
break break
self._log(None)
return cnt return cnt
def _need_reindex(self, top, path, maccess): def _need_reindex(self, top, path, treepath):
'''test if node needs re-indexing''' '''
cnode, newer = self.noder.get_node_if_newer(top, path, maccess) test if node needs re-indexing
@top: top node (storage)
@path: abs path to file
@treepath: rel path from indexed directory
'''
cnode, changed = self.noder.get_node_if_changed(top, path, treepath)
if not cnode: if not cnode:
self._debug('\tdoes not exist') self._debug('\t{} does not exist'.format(path))
return True, cnode return True, cnode
if cnode and not newer: if cnode and not changed:
# ignore this node # ignore this node
self._debug('\tis not newer') self._debug('\t{} has not changed'.format(path))
return False, cnode return False, cnode
if cnode and newer: if cnode and changed:
# remove this node and re-add # remove this node and re-add
self._debug('\tis newer') self._debug('\t{} has changed'.format(path))
self._debug('\tremoving node {}'.format(cnode)) self._debug('\tremoving node {} for {}'.format(cnode.name, path))
cnode.parent = None cnode.parent = None
self._debug('\tis to be re-indexed')
return True, cnode return True, cnode
def _debug(self, string): def _debug(self, string):
'''print to debug'''
if not self.debug: if not self.debug:
return return
Logger.log(string) Logger.debug(string)
def _log(self, string): def _progress(self, string):
'''print progress'''
if self.debug: if self.debug:
return return
if not string: if not string:
@ -150,3 +168,10 @@ class Walker:
if len(string) > self.MAXLINE: if len(string) > self.MAXLINE:
string = string[:self.MAXLINE] + '...' string = string[:self.MAXLINE] + '...'
Logger.progr('indexing: {:80}'.format(string)) Logger.progr('indexing: {:80}'.format(string))
def _log2file(self, string):
'''log to file'''
if not self.lpath:
return
line = '{}\n'.format(string)
Logger.flog(self.lpath, line, append=True)

@ -11,6 +11,7 @@ import random
import tempfile import tempfile
import shutil import shutil
import subprocess import subprocess
import hashlib
TMPSUFFIX = '.catcli' TMPSUFFIX = '.catcli'
@ -25,6 +26,25 @@ def get_rnd_string(length):
return ''.join(random.choice(alpha) for _ in range(length)) return ''.join(random.choice(alpha) for _ in range(length))
def md5sum(path):
'''calculate md5 sum of a file'''
p = os.path.realpath(path)
if not os.path.exists(p):
return None
try:
with open(p, mode='rb') as f:
d = hashlib.md5()
while True:
buf = f.read(4096)
if not buf:
break
d.update(buf)
return d.hexdigest()
except PermissionError:
pass
return None
def clean(path): def clean(path):
'''Delete file or folder.''' '''Delete file or folder.'''
if not os.path.exists(path): if not os.path.exists(path):
@ -38,10 +58,7 @@ def clean(path):
def edit_file(path, newcontent): def edit_file(path, newcontent):
if not os.path.exists(path): return write_to_file(path, newcontent)
write_to_file(path, newcontent)
else:
write_to_file(path, newcontent)
def unix_tree(path): def unix_tree(path):

@ -17,7 +17,7 @@ class TestFind(unittest.TestCase):
def test_find(self): def test_find(self):
# init # init
catalog = Catalog('fake', force=True, verbose=False) catalog = Catalog('fake', force=True, debug=False)
top = catalog._restore_json(get_fakecatalog()) top = catalog._restore_json(get_fakecatalog())
noder = Noder() noder = Noder()

@ -23,7 +23,7 @@ class TestGraph(unittest.TestCase):
gpath = tempfile.gettempdir() + os.sep + 'graph.dot' gpath = tempfile.gettempdir() + os.sep + 'graph.dot'
self.addCleanup(clean, path) self.addCleanup(clean, path)
self.addCleanup(clean, gpath) self.addCleanup(clean, gpath)
catalog = Catalog(path, force=True, verbose=False) catalog = Catalog(path, force=True, debug=False)
top = catalog._restore_json(get_fakecatalog()) top = catalog._restore_json(get_fakecatalog())
noder = Noder() noder = Noder()

@ -42,7 +42,7 @@ class TestIndexing(unittest.TestCase):
noder = Noder() noder = Noder()
top = noder.new_top_node() top = noder.new_top_node()
catalog = Catalog(catalogpath, force=True, verbose=False) catalog = Catalog(catalogpath, force=True, debug=False)
# create fake args # create fake args
tmpdirname = 'tmpdir' tmpdirname = 'tmpdir'

@ -19,7 +19,7 @@ class TestWalking(unittest.TestCase):
# init # init
path = 'fake' path = 'fake'
self.addCleanup(clean, path) self.addCleanup(clean, path)
catalog = Catalog(path, force=True, verbose=False) catalog = Catalog(path, force=True, debug=False)
top = catalog._restore_json(get_fakecatalog()) top = catalog._restore_json(get_fakecatalog())
noder = Noder() noder = Noder()

@ -19,7 +19,7 @@ class TestRm(unittest.TestCase):
# init # init
path = 'fake' path = 'fake'
self.addCleanup(clean, path) self.addCleanup(clean, path)
catalog = Catalog(path, force=True, verbose=False) catalog = Catalog(path, force=True, debug=False)
top = catalog._restore_json(get_fakecatalog()) top = catalog._restore_json(get_fakecatalog())
noder = Noder() noder = Noder()

@ -19,7 +19,7 @@ class TestTree(unittest.TestCase):
# init # init
path = 'fake' path = 'fake'
self.addCleanup(clean, path) self.addCleanup(clean, path)
catalog = Catalog(path, force=True, verbose=False) catalog = Catalog(path, force=True, debug=False)
top = catalog._restore_json(get_fakecatalog()) top = catalog._restore_json(get_fakecatalog())
noder = Noder() noder = Noder()

@ -12,7 +12,7 @@ from catcli.catcli import cmd_index, cmd_update
from catcli.noder import Noder from catcli.noder import Noder
from catcli.catalog import Catalog from catcli.catalog import Catalog
from tests.helpers import create_dir, create_rnd_file, get_tempdir, \ from tests.helpers import create_dir, create_rnd_file, get_tempdir, \
clean, unix_tree, edit_file, read_from_file clean, unix_tree, edit_file, read_from_file, md5sum
import anytree import anytree
@ -31,6 +31,7 @@ class TestIndexing(unittest.TestCase):
f1 = create_rnd_file(dirpath, 'file1') f1 = create_rnd_file(dirpath, 'file1')
f2 = create_rnd_file(dirpath, 'file2') f2 = create_rnd_file(dirpath, 'file2')
f3 = create_rnd_file(dirpath, 'file3') f3 = create_rnd_file(dirpath, 'file3')
f4 = create_rnd_file(dirpath, 'file4')
# create 2 directories # create 2 directories
d1 = create_dir(dirpath, 'dir1') d1 = create_dir(dirpath, 'dir1')
@ -40,22 +41,40 @@ class TestIndexing(unittest.TestCase):
d1f1 = create_rnd_file(d1, 'dir1file1') d1f1 = create_rnd_file(d1, 'dir1file1')
d1f2 = create_rnd_file(d1, 'dir1file2') d1f2 = create_rnd_file(d1, 'dir1file2')
d2f1 = create_rnd_file(d2, 'dir2file1') d2f1 = create_rnd_file(d2, 'dir2file1')
d2f2 = create_rnd_file(d2, 'dir2file2')
noder = Noder() noder = Noder(debug=True)
noder.set_hashing(True)
top = noder.new_top_node() top = noder.new_top_node()
catalog = Catalog(catalogpath, force=True, verbose=False) catalog = Catalog(catalogpath, force=True, debug=False)
# get checksums
f4_md5 = md5sum(f4)
self.assertTrue(f4_md5)
d1f1_md5 = md5sum(d1f1)
self.assertTrue(d1f1_md5)
d2f2_md5 = md5sum(d2f2)
self.assertTrue(d2f2_md5)
# create fake args # create fake args
tmpdirname = 'tmpdir' tmpdirname = 'tmpdir'
args = {'<path>': dirpath, '<name>': tmpdirname, args = {'<path>': dirpath, '<name>': tmpdirname,
'--hash': True, '--meta': ['some meta'], '--hash': True, '--meta': ['some meta'],
'--no-subsize': False, '--verbose': True} '--no-subsize': False, '--verbose': True,
'--lpath': None}
# index the directory # index the directory
unix_tree(dirpath) unix_tree(dirpath)
cmd_index(args, noder, catalog, top, debug=True) cmd_index(args, noder, catalog, top)
self.assertTrue(os.stat(catalogpath).st_size != 0) self.assertTrue(os.stat(catalogpath).st_size != 0)
# ensure md5 sum are in
nods = noder.find_name(top, os.path.basename(f4))
self.assertTrue(len(nods) == 1)
nod = nods[0]
self.assertTrue(nod)
self.assertTrue(nod.md5 == f4_md5)
# print catalog # print catalog
noder.print_tree(top) noder.print_tree(top)
@ -70,9 +89,32 @@ class TestIndexing(unittest.TestCase):
# modify files # modify files
EDIT = 'edited' EDIT = 'edited'
edit_file(d1f1, EDIT) edit_file(d1f1, EDIT)
d1f1_md5_new = md5sum(d1f1)
self.assertTrue(d1f1_md5_new)
self.assertTrue(d1f1_md5_new != d1f1_md5)
# change file without mtime
maccess = os.path.getmtime(f4)
EDIT = 'edited'
edit_file(f4, EDIT)
# reset edit time
os.utime(f4, (maccess, maccess))
f4_md5_new = md5sum(d1f1)
self.assertTrue(f4_md5_new)
self.assertTrue(f4_md5_new != f4_md5)
# change file without mtime
maccess = os.path.getmtime(d2f2)
EDIT = 'edited'
edit_file(d2f2, EDIT)
# reset edit time
os.utime(d2f2, (maccess, maccess))
d2f2_md5_new = md5sum(d2f2)
self.assertTrue(d2f2_md5_new)
self.assertTrue(d2f2_md5_new != d2f2_md5)
# update storage # update storage
cmd_update(args, noder, catalog, top, debug=True) cmd_update(args, noder, catalog, top)
# print catalog # print catalog
# print(read_from_file(catalogpath)) # print(read_from_file(catalogpath))
@ -81,7 +123,31 @@ class TestIndexing(unittest.TestCase):
# explore the top node to find all nodes # explore the top node to find all nodes
self.assertTrue(len(top.children) == 1) self.assertTrue(len(top.children) == 1)
storage = top.children[0] storage = top.children[0]
self.assertTrue(len(storage.children) == 7) self.assertTrue(len(storage.children) == 8)
# ensure d1f1 md5 sum has changed in catalog
nods = noder.find_name(top, os.path.basename(d1f1))
self.assertTrue(len(nods) == 1)
nod = nods[0]
self.assertTrue(nod)
self.assertTrue(nod.md5 != d1f1_md5)
self.assertTrue(nod.md5 == d1f1_md5_new)
# ensure f4 md5 sum has changed in catalog
nods = noder.find_name(top, os.path.basename(f4))
self.assertTrue(len(nods) == 1)
nod = nods[0]
self.assertTrue(nod)
self.assertTrue(nod.md5 != f4_md5)
self.assertTrue(nod.md5 == f4_md5_new)
# ensure d2f2 md5 sum has changed in catalog
nods = noder.find_name(top, os.path.basename(d2f2))
self.assertTrue(len(nods) == 1)
nod = nods[0]
self.assertTrue(nod)
self.assertTrue(nod.md5 != d2f2_md5)
self.assertTrue(nod.md5 == d2f2_md5_new)
# ensures files and directories are in # ensures files and directories are in
names = [node.name for node in anytree.PreOrderIter(storage)] names = [node.name for node in anytree.PreOrderIter(storage)]
@ -89,6 +155,7 @@ class TestIndexing(unittest.TestCase):
self.assertTrue(os.path.basename(f1) in names) self.assertTrue(os.path.basename(f1) in names)
self.assertTrue(os.path.basename(f2) in names) self.assertTrue(os.path.basename(f2) in names)
self.assertTrue(os.path.basename(f3) in names) self.assertTrue(os.path.basename(f3) in names)
self.assertTrue(os.path.basename(f4) in names)
self.assertTrue(os.path.basename(d1) in names) self.assertTrue(os.path.basename(d1) in names)
self.assertTrue(os.path.basename(d1f1) in names) self.assertTrue(os.path.basename(d1f1) in names)
self.assertTrue(os.path.basename(d1f2) in names) self.assertTrue(os.path.basename(d1f2) in names)
@ -104,7 +171,7 @@ class TestIndexing(unittest.TestCase):
if node.name == os.path.basename(d1): if node.name == os.path.basename(d1):
self.assertTrue(len(node.children) == 3) self.assertTrue(len(node.children) == 3)
elif node.name == os.path.basename(d2): elif node.name == os.path.basename(d2):
self.assertTrue(len(node.children) == 2) self.assertTrue(len(node.children) == 3)
elif node.name == os.path.basename(new3): elif node.name == os.path.basename(new3):
self.assertTrue(len(node.children) == 0) self.assertTrue(len(node.children) == 0)
elif node.name == os.path.basename(new4): elif node.name == os.path.basename(new4):
@ -118,7 +185,7 @@ class TestIndexing(unittest.TestCase):
clean(new4) clean(new4)
# update storage # update storage
cmd_update(args, noder, catalog, top, debug=True) cmd_update(args, noder, catalog, top)
# ensures files and directories are (not) in # ensures files and directories are (not) in
names = [node.name for node in anytree.PreOrderIter(storage)] names = [node.name for node in anytree.PreOrderIter(storage)]
@ -126,11 +193,13 @@ class TestIndexing(unittest.TestCase):
self.assertTrue(os.path.basename(f1) in names) self.assertTrue(os.path.basename(f1) in names)
self.assertTrue(os.path.basename(f2) in names) self.assertTrue(os.path.basename(f2) in names)
self.assertTrue(os.path.basename(f3) in names) self.assertTrue(os.path.basename(f3) in names)
self.assertTrue(os.path.basename(f4) in names)
self.assertTrue(os.path.basename(d1) in names) self.assertTrue(os.path.basename(d1) in names)
self.assertTrue(os.path.basename(d1f1) not in names) self.assertTrue(os.path.basename(d1f1) not in names)
self.assertTrue(os.path.basename(d1f2) in names) self.assertTrue(os.path.basename(d1f2) in names)
self.assertTrue(os.path.basename(d2) not in names) self.assertTrue(os.path.basename(d2) not in names)
self.assertTrue(os.path.basename(d2f1) not in names) self.assertTrue(os.path.basename(d2f1) not in names)
self.assertTrue(os.path.basename(d2f1) not in names)
self.assertTrue(os.path.basename(new1) in names) self.assertTrue(os.path.basename(new1) in names)
self.assertTrue(os.path.basename(new2) not in names) self.assertTrue(os.path.basename(new2) not in names)
self.assertTrue(os.path.basename(new3) in names) self.assertTrue(os.path.basename(new3) in names)

Loading…
Cancel
Save