filter-repo: add option --report-dir to set custom analysis dir

--analyze is hardcoded to write to a subdirectory inside GIT_DIR.

When practicing filtering runs on a large repo it is desirable to keep
an unchanged copy read-only to reduce chance of user error. It is
desirable to be able to analyze a read-only repo without having to clone
it. This would save a lot of time and space.

Add --report-dir option to set a non-default destination directory for
writing analysis output to.

Signed-off-by: rndbit <rndbit@filter.bitman.net>
[en: fixed existing regression test broken by now not overwriting the
     analysis directory unconditionally, and also added a new test of
     the new behavior for code coverage.]
Signed-off-by: Elijah Newren <newren@gmail.com>
pull/299/head
rndbit 3 years ago committed by Elijah Newren
parent c5af37f82c
commit e9d5ab3529

@ -1758,6 +1758,12 @@ EXAMPLES
help=_("Analyze repository history and create a report that may be "
"useful in determining what to filter in a subsequent run. "
"Will not modify your repo."))
analyze.add_argument('--report-dir',
metavar='DIR_OR_FILE',
type=os.fsencode,
dest='report_dir',
help=_("Directory to write report, defaults to GIT_DIR/filter_repo/analysis,"
"refuses to run if exists, --force delete existing dir first."))
path = parser.add_argument_group(title=_("Filtering based on paths "
"(see also --filename-callback)"))
@ -2641,15 +2647,25 @@ class RepoAnalyze(object):
@staticmethod
def run(args):
git_dir = GitUtils.determine_git_dir(b'.')
if args.report_dir:
reportdir = args.report_dir
else:
git_dir = GitUtils.determine_git_dir(b'.')
# Create the report directory as necessary
results_tmp_dir = os.path.join(git_dir, b'filter-repo')
if not os.path.isdir(results_tmp_dir):
os.mkdir(results_tmp_dir)
reportdir = os.path.join(results_tmp_dir, b"analysis")
if not args.force and os.path.isdir(reportdir):
shutil.rmtree(reportdir)
results_tmp_dir = os.path.join(git_dir, b'filter-repo')
if not os.path.isdir(results_tmp_dir):
os.mkdir(results_tmp_dir)
reportdir = os.path.join(results_tmp_dir, b"analysis")
if os.path.isdir(reportdir):
if args.force:
sys.stdout.write(_("Warning: Removing recursively: \"%s\"") % decode(reportdir))
shutil.rmtree(reportdir)
else:
sys.stdout.write(_("Error: dir already exists (use --force to delete): \"%s\"\n") % decode(reportdir))
sys.exit(1)
os.mkdir(reportdir)
# Gather the data we need

@ -722,8 +722,11 @@ test_expect_success C_LOCALE_OUTPUT '--analyze' '
git filter-repo --analyze &&
# It should work and overwrite report if run again
git filter-repo --analyze &&
# It should not work again without a --force
test_must_fail git filter-repo --analyze &&
# With a --force, another run should succeed
git filter-repo --analyze --force &&
test -d .git/filter-repo/analysis &&
cd .git/filter-repo/analysis &&
@ -824,6 +827,38 @@ test_expect_success C_LOCALE_OUTPUT '--analyze' '
)
'
test_expect_success C_LOCALE_OUTPUT '--analyze --report-dir' '
setup_analyze_me &&
(
cd analyze_me &&
rm -rf .git/filter-repo &&
git filter-repo --analyze --report-dir foobar &&
# It should not work again without a --force
test_must_fail git filter-repo --analyze --report-dir foobar &&
# With a --force, though, it should overwrite
git filter-repo --analyze --report-dir foobar --force &&
test ! -d .git/filter-repo/analysis &&
test -d foobar &&
cd foobar &&
# Very simple tests because already tested above.
test_path_is_file renames.txt &&
test_path_is_file README &&
test_path_is_file blob-shas-and-paths.txt &&
test_path_is_file directories-all-sizes.txt &&
test_path_is_file directories-deleted-sizes.txt &&
test_path_is_file extensions-all-sizes.txt &&
test_path_is_file extensions-deleted-sizes.txt &&
test_path_is_file path-all-sizes.txt &&
test_path_is_file path-deleted-sizes.txt
)
'
test_expect_success '--replace-text all options' '
setup_analyze_me &&
(

Loading…
Cancel
Save