Skip to content

Implementation based on the Dirhash Standard #1

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 46 commits into from
Apr 20, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
96300a6
relatively complete draft of dirhash standard
andhus Feb 20, 2019
32f217d
add DIRSUM, change property separator to null char, general additions
andhus Feb 21, 2019
9a50a92
separate content -> data, dirhash for clarity, formalize cyclic links…
andhus Feb 22, 2019
871e18b
complete and separate DIRHASH STANDARD from README
andhus Feb 23, 2019
e26203c
move cyclic links examples to appendix
andhus Feb 23, 2019
9abb336
minor fixes
andhus Feb 23, 2019
77c0035
Use double null char as dir entry separator, fix comments by Jim-Holm…
andhus Apr 11, 2020
6b36806
add general traverse logic
andhus Feb 25, 2019
0da6b06
temp, complete general traverse logic and tentative dirhash use
andhus Mar 1, 2019
32b41d2
add recursion filters, compat module, get_included_paths and tests fo…
andhus Mar 2, 2019
4b5efb2
add include empty option, first tests for traverse
andhus Mar 3, 2019
eea58a2
complete test coverage for traverse
andhus Mar 6, 2019
445951b
add allow_cyclic_links option, fix check for empty issue, more tests
andhus Mar 9, 2019
0568d14
major refactor wip: new standard and based on general traverse logic
andhus Mar 9, 2019
dd88f77
fix empty root dir handling
andhus Mar 11, 2019
cb1079c
update dirhash tests according new API and protocol
andhus Mar 11, 2019
30d0c2d
update cli according new API, NOTE some todo:s left
andhus Mar 11, 2019
c967490
use scantree external package, remove old TODOs re ignorefile
andhus Mar 30, 2019
3491869
losen requirements for multiproc speed-up test, due to travis overhead
andhus Mar 30, 2019
03a4170
rename filter_options -> filtering and protocol_options -> protocol
andhus Apr 10, 2020
56d9120
complete test coverage + some clean up
andhus Apr 10, 2020
7fbd165
update arg in benchmark/run.py
andhus Apr 10, 2020
1968d6a
change entry descripto separator to repeated null char, avoid collisi…
andhus Apr 10, 2020
39ea304
change order of funcs
andhus Apr 12, 2020
b787d84
fix typo in DIRHASH_STANDARD
andhus Apr 12, 2020
49a157d
rename match_patterns -> match
andhus Apr 12, 2020
abcc8b0
update docs of dirhash
andhus Apr 12, 2020
4999e5b
update docs, rename get_included_path -> included_paths, add __all__
andhus Apr 13, 2020
4bc3c71
replace on_cyclic_link -> allow_cyclic_links
andhus Apr 13, 2020
febbedb
update docs
andhus Apr 14, 2020
6104c5a
update CLI
andhus Apr 18, 2020
5e22d5b
update README
andhus Apr 18, 2020
bd76084
fix bug in default arg
andhus Apr 18, 2020
45eaa38
rename dirsum properties
andhus Apr 18, 2020
3d23002
update reference according separation of standard and python implemen…
andhus Apr 18, 2020
a4c267e
update repo ref and bump version in in setup.py
andhus Apr 18, 2020
a1ba7c1
draft of changelog and update of benchmark/run.py
andhus Apr 19, 2020
39c4785
fixes in changelog
andhus Apr 19, 2020
9806bfb
flatten args to dirhash, add dirhash_impl for passing filter_ and pro…
andhus Apr 19, 2020
53ed900
update docs according flat args
andhus Apr 20, 2020
1b500c1
update README
andhus Apr 20, 2020
429f0b9
minor cleanup
andhus Apr 20, 2020
e15a37a
add results from new benchmark runs
andhus Apr 20, 2020
2d821b3
add back removed tests
andhus Apr 20, 2020
1f2643b
update changelog
andhus Apr 20, 2020
aa4cd7f
rename Filter arg match -> match_patterns to reflect Dirhash Standard
andhus Apr 20, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
complete test coverage for traverse
  • Loading branch information
andhus committed Apr 12, 2020
commit eea58a22309545c32231e834d136e0af896768d8
101 changes: 55 additions & 46 deletions src/dirhash/traverse.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def identity(x):
#


def traverse(
def traverse( # TODO rename scantree
directory,
recursion_filter=identity,
file_apply=identity,
Expand Down Expand Up @@ -137,9 +137,6 @@ def _traverse_multiprocess(**kwargs):
file_paths = []

def extract_paths(path):
# hacky way to support pickling
# (__set/getstate__ does not work for slotted attrs classes)
path._dir_entry = DirEntryReplacement.from_dir_entry(path._dir_entry)
result_idx = len(file_paths)
file_paths.append(path)
return result_idx
Expand All @@ -164,7 +161,7 @@ def _cached_by_realpath(file_apply):
def file_apply_cached(path):
if path.real not in cache:
cache[path.real] = file_apply(path)
return cache[path]
return cache[path.real]

return file_apply_cached

Expand Down Expand Up @@ -295,17 +292,24 @@ def inode(self):
def __fspath__(self):
return self.real

# TODO bellow has no effect when pickling?
# def __getstate__(self):
# return (
# self.root,
# self.relative,
# self.real,
# DirEntryReplacement.from_dir_entry(self._dir_entry)
# )
#
# def __setstate__(self, state):
# self.root, self.relative, self.real, self._dir_entry = state
@staticmethod
def _getstate(self):
return (
self.root,
self.relative,
self.real,
DirEntryReplacement.from_dir_entry(self._dir_entry)
)

@staticmethod
def _setstate(self, state):
self.root, self.relative, self.real, self._dir_entry = state


# Attrs overrides __get/setstate__ for slotted classes, see:
# https://github.com/python-attrs/attrs/issues/512
RecursionPath.__getstate__ = RecursionPath._getstate
RecursionPath.__setstate__ = RecursionPath._setstate


@attr.s(slots=True, cmp=False)
Expand Down Expand Up @@ -390,7 +394,7 @@ def __eq__(self, other):
if not this_res == other_res:
return False

return True
return True


@attr.s(slots=True, frozen=True)
Expand Down Expand Up @@ -423,43 +427,48 @@ class CyclicLinkedDir(object):
target_path = attr.ib(validator=attr.validators.instance_of(RecursionPath))


class RecursionFilterABC(with_metaclass(abc.ABCMeta)):
class RecursionFilter(object):

@abc.abstractmethod
def __call__(self, paths):
pass
def __init__(
self,
linked_dirs=True,
linked_files=True,
match=None,
):
self.linked_dirs = linked_dirs
self.linked_files = linked_files
self._match_patterns = tuple('*') if match is None else tuple(match)
if self._match_patterns != tuple('*'):
self._path_spec = PathSpec.from_lines(
GitWildMatchPattern,
self.match_patterns
)
else:
self._path_spec = None

@property
def match_patterns(self):
return self._match_patterns

class RecursionFilterBase(RecursionFilterABC):
def include(self, recursion_path):
if recursion_path.is_symlink():
if recursion_path.is_dir() and not self.linked_dirs:
return False
if recursion_path.is_file() and not self.linked_files:
return False

def __init__(self, linked_dirs=True, linked_files=True):
self.linked_dirs = linked_dirs
self.linked_files = linked_files
if recursion_path.is_dir():
# only filepaths matched against patterns
return True

def include(self, path):
if not path.is_symlink():
return self.match_file(recursion_path.relative)

def match_file(self, filepath):
if self._path_spec is None:
return True
if path.is_dir():
return self.linked_dirs
else:
return self.linked_files
return match_file(self._path_spec.patterns, normalize_file(filepath))

def __call__(self, paths):
for path in paths:
if self.include(path):
yield path


class MatchPatterns(RecursionFilterBase):

def __init__(self, match_patterns=None, **kwargs):
super(MatchPatterns, self).__init__(**kwargs)
match_patterns = match_patterns or ['*']
self.path_spec = PathSpec.from_lines(GitWildMatchPattern, match_patterns)

def include(self, path):
if not super(MatchPatterns, self).include(path):
return False
if path.is_dir():
return True
return match_file(self.path_spec.patterns, normalize_file(path.relative))
Loading