diff --git a/requirements.d/development.txt b/requirements.d/development.txt index 10d7b55bf0..09a7416599 100644 --- a/requirements.d/development.txt +++ b/requirements.d/development.txt @@ -10,6 +10,7 @@ pytest pytest-xdist pytest-cov pytest-benchmark +freezegun Cython pre-commit bandit[toml] diff --git a/src/borg/archiver/prune_cmd.py b/src/borg/archiver/prune_cmd.py index a4922b46a0..1c112c4d5b 100644 --- a/src/borg/archiver/prune_cmd.py +++ b/src/borg/archiver/prune_cmd.py @@ -1,16 +1,17 @@ import argparse from collections import OrderedDict -from datetime import datetime, timezone, timedelta +from datetime import datetime, timezone import logging from operator import attrgetter import os +import itertools from ._common import with_repository, Highlander from ..archive import Archive from ..cache import Cache from ..constants import * # NOQA -from ..helpers import ArchiveFormatter, interval, sig_int, ProgressIndicatorPercent, CommandError, Error -from ..helpers import archivename_validator +from ..helpers import interval, int_or_interval, sig_int, archivename_validator +from ..helpers import ArchiveFormatter, ProgressIndicatorPercent, CommandError, Error from ..manifest import Manifest from ..logger import create_logger @@ -18,20 +19,23 @@ logger = create_logger() -def prune_within(archives, seconds, kept_because): - target = datetime.now(timezone.utc) - timedelta(seconds=seconds) - kept_counter = 0 - result = [] - for a in archives: - if a.ts > target: - kept_counter += 1 - kept_because[a.id] = ("within", kept_counter) - result.append(a) - return result +# The *_period_func group of functions create period grouping keys to group together archives falling within a certain +# period. Among archives in each of these groups, only the latest (by creation timestamp) is kept. -def default_period_func(pattern): +def unique_period_func(): + counter = itertools.count() + + def unique_values(_a): + """Group archives by an incrementing counter, practically making each archive a group of 1""" + return next(counter) + + return unique_values + + +def pattern_period_func(pattern): def inner(a): + """Group archives by extracting given strftime-pattern from their creation timestamp""" # compute in local timezone return a.ts.astimezone().strftime(pattern) @@ -39,6 +43,7 @@ def inner(a): def quarterly_13weekly_period_func(a): + """Group archives by extracting the ISO-8601 13-week quarter from their creation timestamp""" (year, week, _) = a.ts.astimezone().isocalendar() # local time if week <= 13: # Weeks containing Jan 4th to Mar 28th (leap year) or 29th- 91 (13*7) @@ -60,6 +65,7 @@ def quarterly_13weekly_period_func(a): def quarterly_3monthly_period_func(a): + """Group archives by extracting the 3-month quarter from their creation timestamp""" lt = a.ts.astimezone() # local time if lt.month <= 3: # 1-1 to 3-31 @@ -77,42 +83,64 @@ def quarterly_3monthly_period_func(a): PRUNING_PATTERNS = OrderedDict( [ - ("secondly", default_period_func("%Y-%m-%d %H:%M:%S")), - ("minutely", default_period_func("%Y-%m-%d %H:%M")), - ("hourly", default_period_func("%Y-%m-%d %H")), - ("daily", default_period_func("%Y-%m-%d")), - ("weekly", default_period_func("%G-%V")), - ("monthly", default_period_func("%Y-%m")), + # Each archive is considered for keeping + ("within", unique_period_func()), + ("last", unique_period_func()), + ("keep", unique_period_func()), + # Last archive (by creation timestamp) within period group is consiedered for keeping + ("secondly", pattern_period_func("%Y-%m-%d %H:%M:%S")), + ("minutely", pattern_period_func("%Y-%m-%d %H:%M")), + ("hourly", pattern_period_func("%Y-%m-%d %H")), + ("daily", pattern_period_func("%Y-%m-%d")), + ("weekly", pattern_period_func("%G-%V")), + ("monthly", pattern_period_func("%Y-%m")), ("quarterly_13weekly", quarterly_13weekly_period_func), ("quarterly_3monthly", quarterly_3monthly_period_func), - ("yearly", default_period_func("%Y")), + ("yearly", pattern_period_func("%Y")), ] ) -def prune_split(archives, rule, n, kept_because=None): - last = None +# Datetime cannot represent times before datetime.min, so a day is added to allow for time zone offset. +DATETIME_MIN_WITH_ZONE = datetime.min.replace(tzinfo=timezone.utc) + + +def prune_split(archives, rule, n_or_interval, base_timestamp, kept_because={}): + if isinstance(n_or_interval, int): + n, earliest_timestamp = n_or_interval, None + else: + n, earliest_timestamp = None, base_timestamp - n_or_interval + + def can_retain(a, keep): + if n is not None: + return len(keep) < n + else: + return a.ts > earliest_timestamp + keep = [] - period_func = PRUNING_PATTERNS[rule] - if kept_because is None: - kept_because = {} - if n == 0: + if n == 0 or len(archives) == 0: return keep a = None - for a in sorted(archives, key=attrgetter("ts"), reverse=True): + last = None + period_func = PRUNING_PATTERNS[rule] + sorted_archives = sorted(archives, key=attrgetter("ts"), reverse=True) + for a in sorted_archives: + if not can_retain(a, keep): + break period = period_func(a) if period != last: last = period if a.id not in kept_because: keep.append(a) kept_because[a.id] = (rule, len(keep)) - if len(keep) == n: - break + # Keep oldest archive if we didn't reach the target retention count - if a is not None and len(keep) < n and a.id not in kept_because: + a = sorted_archives[-1] + if a is not None and a.id not in kept_because and can_retain(a, keep): keep.append(a) kept_because[a.id] = (rule + "[oldest]", len(keep)) + return keep @@ -120,8 +148,12 @@ class PruneMixIn: @with_repository(compatibility=(Manifest.Operation.DELETE,)) def do_prune(self, args, repository, manifest): """Prune repository archives according to specified rules""" - if not any( - ( + if all( + e is None + for e in ( + args.keep, + args.within, + args.last, args.secondly, args.minutely, args.hourly, @@ -131,11 +163,10 @@ def do_prune(self, args, repository, manifest): args.quarterly_13weekly, args.quarterly_3monthly, args.yearly, - args.within, ) ): raise CommandError( - 'At least one of the "keep-within", "keep-last", ' + 'At least one of the "keep", "keep-within", "keep-last", ' '"keep-secondly", "keep-minutely", "keep-hourly", "keep-daily", ' '"keep-weekly", "keep-monthly", "keep-13weekly", "keep-3monthly", ' 'or "keep-yearly" settings must be specified.' @@ -159,15 +190,12 @@ def do_prune(self, args, repository, manifest): # (, ) kept_because = {} - # find archives which need to be kept because of the keep-within rule - if args.within: - keep += prune_within(archives, args.within, kept_because) - + base_timestamp = datetime.now().astimezone() # find archives which need to be kept because of the various time period rules for rule in PRUNING_PATTERNS.keys(): - num = getattr(args, rule, None) - if num is not None: - keep += prune_split(archives, rule, num, kept_because) + num_or_interval = getattr(args, rule, None) + if num_or_interval is not None: + keep += prune_split(archives, rule, num_or_interval, base_timestamp, kept_because) to_delete = set(archives) - set(keep) with Cache(repository, manifest, iec=args.iec) as cache: @@ -310,81 +338,81 @@ def build_parser_prune(self, subparsers, common_parser, mid_common_parser): help="keep all archives within this time interval", ) subparser.add_argument( - "--keep-last", + "--keep-last", dest="last", type=int, action=Highlander, help="number of archives to keep" + ) + subparser.add_argument( + "--keep", + dest="keep", + type=int_or_interval, + action=Highlander, + help="number or time interval of archives to keep", + ) + subparser.add_argument( "--keep-secondly", dest="secondly", - type=int, - default=0, + type=int_or_interval, action=Highlander, - help="number of secondly archives to keep", + help="number or time interval of secondly archives to keep", ) subparser.add_argument( "--keep-minutely", dest="minutely", - type=int, - default=0, + type=int_or_interval, action=Highlander, - help="number of minutely archives to keep", + help="number or time interval of minutely archives to keep", ) subparser.add_argument( "-H", "--keep-hourly", dest="hourly", - type=int, - default=0, + type=int_or_interval, action=Highlander, - help="number of hourly archives to keep", + help="number or time interval of hourly archives to keep", ) subparser.add_argument( "-d", "--keep-daily", dest="daily", - type=int, - default=0, + type=int_or_interval, action=Highlander, - help="number of daily archives to keep", + help="number or time interval of daily archives to keep", ) subparser.add_argument( "-w", "--keep-weekly", dest="weekly", - type=int, - default=0, + type=int_or_interval, action=Highlander, - help="number of weekly archives to keep", + help="number or time interval of weekly archives to keep", ) subparser.add_argument( "-m", "--keep-monthly", dest="monthly", - type=int, - default=0, + type=int_or_interval, action=Highlander, - help="number of monthly archives to keep", + help="number or time interval of monthly archives to keep", ) quarterly_group = subparser.add_mutually_exclusive_group() quarterly_group.add_argument( "--keep-13weekly", dest="quarterly_13weekly", - type=int, - default=0, - help="number of quarterly archives to keep (13 week strategy)", + type=int_or_interval, + help="number or time interval of quarterly archives to keep (13 week strategy)", ) quarterly_group.add_argument( "--keep-3monthly", dest="quarterly_3monthly", - type=int, - default=0, - help="number of quarterly archives to keep (3 month strategy)", + type=int_or_interval, + help="number or time interval of quarterly archives to keep (3 month strategy)", ) subparser.add_argument( "-y", "--keep-yearly", dest="yearly", - type=int, - default=0, + type=int_or_interval, action=Highlander, - help="number of yearly archives to keep", + help="number or time interval of yearly archives to keep", ) define_archive_filters_group(subparser, sort_by=False, first_last=False) subparser.add_argument( diff --git a/src/borg/constants.py b/src/borg/constants.py index 911a8f1bef..3fed99c131 100644 --- a/src/borg/constants.py +++ b/src/borg/constants.py @@ -137,7 +137,9 @@ EXIT_SIGNAL_BASE = 128 # terminated due to signal, rc = 128 + sig_no ISO_FORMAT_NO_USECS = "%Y-%m-%dT%H:%M:%S" +ISO_FORMAT_NO_USECS_ZONE = ISO_FORMAT_NO_USECS + "%z" ISO_FORMAT = ISO_FORMAT_NO_USECS + ".%f" +ISO_FORMAT_ZONE = ISO_FORMAT + "%z" DASHES = "-" * 78 diff --git a/src/borg/helpers/__init__.py b/src/borg/helpers/__init__.py index 6473f7dc69..b6902d01e3 100644 --- a/src/borg/helpers/__init__.py +++ b/src/borg/helpers/__init__.py @@ -28,7 +28,7 @@ from .misc import ChunkIteratorFileWrapper, open_item, chunkit, iter_separated, ErrorIgnoringTextIOWrapper from .parseformat import bin_to_hex, hex_to_bin, safe_encode, safe_decode from .parseformat import text_to_json, binary_to_json, remove_surrogates, join_cmd -from .parseformat import eval_escapes, decode_dict, positive_int_validator, interval +from .parseformat import eval_escapes, decode_dict, positive_int_validator, interval, int_or_interval from .parseformat import PathSpec, SortBySpec, ChunkerParams, FilesCacheMode, partial_format, DatetimeWrapper from .parseformat import format_file_size, parse_file_size, FileSize from .parseformat import sizeof_fmt, sizeof_fmt_iec, sizeof_fmt_decimal, Location, text_validator diff --git a/src/borg/helpers/parseformat.py b/src/borg/helpers/parseformat.py index cb481685ab..6cde35dd45 100644 --- a/src/borg/helpers/parseformat.py +++ b/src/borg/helpers/parseformat.py @@ -12,7 +12,7 @@ import uuid from typing import ClassVar, Any, TYPE_CHECKING, Literal from collections import OrderedDict -from datetime import datetime, timezone +from datetime import datetime, timezone, timedelta from functools import partial from string import Formatter @@ -154,12 +154,24 @@ def interval(s): except ValueError: seconds = -1 - if seconds <= 0: - raise argparse.ArgumentTypeError(f'Invalid number "{number}": expected positive integer') + if seconds < 0: + raise argparse.ArgumentTypeError(f'Invalid number "{number}": expected nonnegative integer') return seconds +def int_or_interval(s): + try: + return int(s) + except ValueError: + pass + + try: + return timedelta(seconds=interval(s)) + except argparse.ArgumentTypeError as e: + raise argparse.ArgumentTypeError(f"Value is neither an integer nor an interval: {e}") + + def ChunkerParams(s): params = s.strip().split(",") count = len(params) diff --git a/src/borg/testsuite/archiver/prune_cmd_test.py b/src/borg/testsuite/archiver/prune_cmd_test.py index 9dda19a0b5..ae9696d3cf 100644 --- a/src/borg/testsuite/archiver/prune_cmd_test.py +++ b/src/borg/testsuite/archiver/prune_cmd_test.py @@ -1,22 +1,22 @@ -import re -from datetime import datetime, timezone, timedelta - import pytest +import re +from datetime import datetime, timezone +from freezegun import freeze_time from ...constants import * # NOQA -from ...archiver.prune_cmd import prune_split, prune_within +from ...archiver.prune_cmd import prune_split +from ...helpers import CommandError from . import cmd, RK_ENCRYPTION, src_dir, generate_archiver_tests -from ...helpers import interval pytest_generate_tests = lambda metafunc: generate_archiver_tests(metafunc, kinds="local,remote,binary") # NOQA -def _create_archive_ts(archiver, name, y, m, d, H=0, M=0, S=0): +def _create_archive_ts(archiver, name, y, m, d, H=0, M=0, S=0, us=0, tzinfo=None): cmd( archiver, "create", "--timestamp", - datetime(y, m, d, H, M, S, 0).strftime(ISO_FORMAT_NO_USECS), # naive == local time / local tz + datetime(y, m, d, H, M, S, us, tzinfo=tzinfo).strftime(ISO_FORMAT_ZONE), name, src_dir, ) @@ -256,7 +256,7 @@ def test_prune_ignore_protected(archivers, request): cmd(archiver, "create", "archive3", archiver.input_path) output = cmd(archiver, "prune", "--list", "--keep-last=1", "--match-archives=sh:archive*") assert "archive1" not in output # @PROT archives are completely ignored. - assert re.search(r"Keeping archive \(rule: secondly #1\):\s+archive3", output) + assert re.search(r"Keeping archive \(rule: last #1\):\s+archive3", output) assert re.search(r"Pruning archive \(.*?\):\s+archive2", output) output = cmd(archiver, "repo-list") assert "archive1" in output # @PROT protected archive1 from deletion @@ -265,56 +265,14 @@ def test_prune_ignore_protected(archivers, request): class MockArchive: def __init__(self, ts, id): - self.ts = ts + # Real archive objects have UTC zoned timestamps + self.ts = ts.replace(tzinfo=timezone.utc) self.id = id def __repr__(self): return f"{self.id}: {self.ts.isoformat()}" -# This is the local timezone of the system running the tests. -# We need this e.g. to construct archive timestamps for the prune tests, -# because borg prune operates in the local timezone (it first converts the -# archive timestamp to the local timezone). So, if we want the y/m/d/h/m/s -# values which prune uses to be exactly the ones we give [and NOT shift them -# by tzoffset], we need to give the timestamps in the same local timezone. -# Please note that the timestamps in a real borg archive or manifest are -# stored in UTC timezone. -local_tz = datetime.now(tz=timezone.utc).astimezone(tz=None).tzinfo - - -def test_prune_within(): - def subset(lst, indices): - return {lst[i] for i in indices} - - def dotest(test_archives, within, indices): - for ta in test_archives, reversed(test_archives): - kept_because = {} - keep = prune_within(ta, interval(within), kept_because) - assert set(keep) == subset(test_archives, indices) - assert all("within" == kept_because[a.id][0] for a in keep) - - # 1 minute, 1.5 hours, 2.5 hours, 3.5 hours, 25 hours, 49 hours - test_offsets = [60, 90 * 60, 150 * 60, 210 * 60, 25 * 60 * 60, 49 * 60 * 60] - now = datetime.now(timezone.utc) - test_dates = [now - timedelta(seconds=s) for s in test_offsets] - test_archives = [MockArchive(date, i) for i, date in enumerate(test_dates)] - - dotest(test_archives, "15S", []) - dotest(test_archives, "2M", [0]) - dotest(test_archives, "1H", [0]) - dotest(test_archives, "2H", [0, 1]) - dotest(test_archives, "3H", [0, 1, 2]) - dotest(test_archives, "24H", [0, 1, 2, 3]) - dotest(test_archives, "26H", [0, 1, 2, 3, 4]) - dotest(test_archives, "2d", [0, 1, 2, 3, 4]) - dotest(test_archives, "50H", [0, 1, 2, 3, 4, 5]) - dotest(test_archives, "3d", [0, 1, 2, 3, 4, 5]) - dotest(test_archives, "1w", [0, 1, 2, 3, 4, 5]) - dotest(test_archives, "1m", [0, 1, 2, 3, 4, 5]) - dotest(test_archives, "1y", [0, 1, 2, 3, 4, 5]) - - @pytest.mark.parametrize( "rule,num_to_keep,expected_ids", [ @@ -334,26 +292,26 @@ def subset(lst, ids): archives = [ # years apart - MockArchive(datetime(2015, 1, 1, 10, 0, 0, tzinfo=local_tz), 1), - MockArchive(datetime(2016, 1, 1, 10, 0, 0, tzinfo=local_tz), 2), - MockArchive(datetime(2017, 1, 1, 10, 0, 0, tzinfo=local_tz), 3), + MockArchive(datetime(2015, 1, 1, 10, 0, 0), 1), + MockArchive(datetime(2016, 1, 1, 10, 0, 0), 2), + MockArchive(datetime(2017, 1, 1, 10, 0, 0), 3), # months apart - MockArchive(datetime(2017, 2, 1, 10, 0, 0, tzinfo=local_tz), 4), - MockArchive(datetime(2017, 3, 1, 10, 0, 0, tzinfo=local_tz), 5), + MockArchive(datetime(2017, 2, 1, 10, 0, 0), 4), + MockArchive(datetime(2017, 3, 1, 10, 0, 0), 5), # days apart - MockArchive(datetime(2017, 3, 2, 10, 0, 0, tzinfo=local_tz), 6), - MockArchive(datetime(2017, 3, 3, 10, 0, 0, tzinfo=local_tz), 7), - MockArchive(datetime(2017, 3, 4, 10, 0, 0, tzinfo=local_tz), 8), + MockArchive(datetime(2017, 3, 2, 10, 0, 0), 6), + MockArchive(datetime(2017, 3, 3, 10, 0, 0), 7), + MockArchive(datetime(2017, 3, 4, 10, 0, 0), 8), # minutes apart - MockArchive(datetime(2017, 10, 1, 9, 45, 0, tzinfo=local_tz), 9), - MockArchive(datetime(2017, 10, 1, 9, 55, 0, tzinfo=local_tz), 10), + MockArchive(datetime(2017, 10, 1, 9, 45, 0), 9), + MockArchive(datetime(2017, 10, 1, 9, 55, 0), 10), # seconds apart - MockArchive(datetime(2017, 10, 1, 10, 0, 1, tzinfo=local_tz), 11), - MockArchive(datetime(2017, 10, 1, 10, 0, 3, tzinfo=local_tz), 12), - MockArchive(datetime(2017, 10, 1, 10, 0, 5, tzinfo=local_tz), 13), + MockArchive(datetime(2017, 10, 1, 10, 0, 1), 11), + MockArchive(datetime(2017, 10, 1, 10, 0, 3), 12), + MockArchive(datetime(2017, 10, 1, 10, 0, 5), 13), ] kept_because = {} - keep = prune_split(archives, rule, num_to_keep, kept_because) + keep = prune_split(archives, rule, num_to_keep, None, kept_because) assert set(keep) == subset(archives, expected_ids) for item in keep: @@ -366,17 +324,17 @@ def subset(lst, ids): archives = [ # oldest backup, but not last in its year - MockArchive(datetime(2018, 1, 1, 10, 0, 0, tzinfo=local_tz), 1), + MockArchive(datetime(2018, 1, 1, 10, 0, 0), 1), # an interim backup - MockArchive(datetime(2018, 12, 30, 10, 0, 0, tzinfo=local_tz), 2), + MockArchive(datetime(2018, 12, 30, 10, 0, 0), 2), # year-end backups - MockArchive(datetime(2018, 12, 31, 10, 0, 0, tzinfo=local_tz), 3), - MockArchive(datetime(2019, 12, 31, 10, 0, 0, tzinfo=local_tz), 4), + MockArchive(datetime(2018, 12, 31, 10, 0, 0), 3), + MockArchive(datetime(2019, 12, 31, 10, 0, 0), 4), ] # Keep oldest when retention target can't otherwise be met kept_because = {} - keep = prune_split(archives, "yearly", 3, kept_because) + keep = prune_split(archives, "yearly", 3, None, kept_because) assert set(keep) == subset(archives, [1, 3, 4]) assert kept_because[1][0] == "yearly[oldest]" @@ -385,7 +343,7 @@ def subset(lst, ids): # Otherwise, prune it kept_because = {} - keep = prune_split(archives, "yearly", 2, kept_because) + keep = prune_split(archives, "yearly", 2, None, kept_because) assert set(keep) == subset(archives, [3, 4]) assert kept_because[3][0] == "yearly" @@ -396,7 +354,229 @@ def test_prune_split_no_archives(): archives = [] kept_because = {} - keep = prune_split(archives, "yearly", 3, kept_because) + keep = prune_split(archives, "yearly", 3, None, kept_because) assert keep == [] assert kept_because == {} + + +def test_prune_keep_last_same_second(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + cmd(archiver, "create", "test1", src_dir) + cmd(archiver, "create", "test2", src_dir) + output = cmd(archiver, "prune", "--list", "--dry-run", "--keep-last=2") + # Both archives are kept even though they have the same timestamp to the second. Would previously have failed with + # old behavior of --keep-last. Archives sorted on seconds, order is undefined. + assert re.search(r"Keeping archive \(rule: last #\d\):\s+test1", output) + assert re.search(r"Keeping archive \(rule: last #\d\):\s+test2", output) + + +@freeze_time(datetime(2023, 12, 31, 23, 59, 59, tzinfo=None)) # Non-leap year ending on a Sunday +def test_prune_keep_int_or_interval(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + _create_archive_ts(archiver, "test-1", 2023, 12, 31, 23, 59, 59) + _create_archive_ts(archiver, "test-2", 2023, 12, 31, 23, 59, 59) + _create_archive_ts(archiver, "test-3", 2023, 12, 31, 23, 59, 58) + for keep_arg in ["--keep=2", "--keep=1S"]: + output = cmd(archiver, "prune", "--list", "--dry-run", keep_arg) + assert re.search(r"Keeping archive \(rule: keep #\d\):\s+test-1", output) + assert re.search(r"Keeping archive \(rule: keep #\d\):\s+test-2", output) + assert re.search(r"Would prune:\s+test-3", output) + + +@freeze_time(datetime(2023, 12, 31, 23, 59, 59, tzinfo=None)) +def test_prune_keep_int_or_interval_zero(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + _create_archive_ts(archiver, "test", 2023, 12, 31, 23, 59, 59) + for keep_arg in ["--keep=0", "--keep=0S"]: + output = cmd(archiver, "prune", "--list", "--dry-run", keep_arg) + assert re.search(r"Would prune:\s+test", output) + + +@freeze_time(datetime(2023, 12, 31, 23, 59, 59, tzinfo=None)) +def test_prune_keep_secondly_int_or_interval(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + _create_archive_ts(archiver, "test-1", 2023, 12, 31, 23, 59, 58) + _create_archive_ts(archiver, "test-2", 2023, 12, 31, 23, 59, 57, 1) + _create_archive_ts(archiver, "test-3", 2023, 12, 31, 23, 59, 57) + _create_archive_ts(archiver, "test-4", 2023, 12, 31, 23, 59, 56, 999999) + for keep_arg in ["--keep-secondly=2", "--keep-secondly=2S"]: + output = cmd(archiver, "prune", "--list", "--dry-run", keep_arg) + assert re.search(r"Keeping archive \(rule: secondly #1\):\s+test-1", output) + assert re.search(r"Keeping archive \(rule: secondly #2\):\s+test-2", output) + assert re.search(r"Would prune:\s+test-3", output) + assert re.search(r"Would prune:\s+test-4", output) + + +@freeze_time(datetime(2023, 12, 31, 23, 59, 0, tzinfo=None)) +def test_prune_keep_minutely_int_or_interval(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + _create_archive_ts(archiver, "test-1", 2023, 12, 31, 23, 58) + _create_archive_ts(archiver, "test-2", 2023, 12, 31, 23, 57, 1) + _create_archive_ts(archiver, "test-3", 2023, 12, 31, 23, 57) + _create_archive_ts(archiver, "test-4", 2023, 12, 31, 23, 56, 0, 1) # Last possible microsecond + _create_archive_ts(archiver, "test-5", 2023, 12, 31, 23, 56) + for keep_arg in ["--keep-minutely=3", "--keep-minutely=3M"]: + output = cmd(archiver, "prune", "--list", "--dry-run", keep_arg) + assert re.search(r"Keeping archive \(rule: minutely #1\):\s+test-1", output) + assert re.search(r"Keeping archive \(rule: minutely #2\):\s+test-2", output) + assert re.search(r"Would prune:\s+test-3", output) + assert re.search(r"Keeping archive \(rule: minutely #3\):\s+test-4", output) + assert re.search(r"Would prune:\s+test-5", output) + + +@freeze_time(datetime(2023, 12, 31, 23, 0, 0, tzinfo=None)) +def test_prune_keep_hourly_int_or_interval(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + _create_archive_ts(archiver, "test-1", 2023, 12, 31, 22) + _create_archive_ts(archiver, "test-2", 2023, 12, 31, 21, us=1) + _create_archive_ts(archiver, "test-3", 2023, 12, 31, 21) + _create_archive_ts(archiver, "test-4", 2023, 12, 31, 20, us=1) # Last possible microsecond + _create_archive_ts(archiver, "test-5", 2023, 12, 31, 20) + for keep_arg in ["--keep-hourly=3", "--keep-hourly=3H"]: + output = cmd(archiver, "prune", "--list", "--dry-run", keep_arg) + assert re.search(r"Keeping archive \(rule: hourly #1\):\s+test-1", output) + assert re.search(r"Keeping archive \(rule: hourly #2\):\s+test-2", output) + assert re.search(r"Would prune:\s+test-3", output) + assert re.search(r"Keeping archive \(rule: hourly #3\):\s+test-4", output) + assert re.search(r"Would prune:\s+test-5", output) + + +@freeze_time(datetime(2023, 12, 31, 0, 0, 0, tzinfo=None)) +def test_prune_keep_daily_int_or_interval(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + _create_archive_ts(archiver, "test-1", 2023, 12, 30) + _create_archive_ts(archiver, "test-2", 2023, 12, 29, S=1) + _create_archive_ts(archiver, "test-3", 2023, 12, 29) + _create_archive_ts(archiver, "test-4", 2023, 12, 28, us=1) # Last possible microsecond + _create_archive_ts(archiver, "test-5", 2023, 12, 28) + for keep_arg in ["--keep-daily=3", "--keep-daily=3d"]: + output = cmd(archiver, "prune", "--list", "--dry-run", keep_arg) + assert re.search(r"Keeping archive \(rule: daily #1\):\s+test-1", output) + assert re.search(r"Keeping archive \(rule: daily #2\):\s+test-2", output) + assert re.search(r"Would prune:\s+test-3", output) + assert re.search(r"Keeping archive \(rule: daily #3\):\s+test-4", output) + assert re.search(r"Would prune:\s+test-5", output) + + +@freeze_time(datetime(2023, 12, 31, 0, 0, 0, tzinfo=None)) +def test_prune_keep_weekly_int_or_interval(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + _create_archive_ts(archiver, "test-1", 2023, 12, 24) + _create_archive_ts(archiver, "test-2", 2023, 12, 17, us=1) + _create_archive_ts(archiver, "test-3", 2023, 12, 17) + _create_archive_ts(archiver, "test-4", 2023, 12, 10, us=1) # Last possible microsecond + _create_archive_ts(archiver, "test-5", 2023, 12, 10) + for keep_arg in ["--keep-weekly=3", "--keep-weekly=3w"]: + output = cmd(archiver, "prune", "--list", "--dry-run", keep_arg) + assert re.search(r"Keeping archive \(rule: weekly #1\):\s+test-1", output) + assert re.search(r"Keeping archive \(rule: weekly #2\):\s+test-2", output) + assert re.search(r"Would prune:\s+test-3", output) + assert re.search(r"Keeping archive \(rule: weekly #3\):\s+test-4", output) + assert re.search(r"Would prune:\s+test-5", output) + + +@freeze_time(datetime(2023, 12, 31, 0, 0, 0, tzinfo=None)) +def test_prune_keep_monthly_int_or_interval(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + _create_archive_ts(archiver, "test-1", 2023, 11, 30) + _create_archive_ts(archiver, "test-2", 2023, 10, 30, us=1) # Month defined as 31 days, so not Oct 31st + _create_archive_ts(archiver, "test-3", 2023, 10, 30) + _create_archive_ts(archiver, "test-4", 2023, 9, 29, us=1) # Last possible microsecond + _create_archive_ts(archiver, "test-5", 2023, 9, 29) + for keep_arg in ["--keep-monthly=3", "--keep-monthly=3m"]: + output = cmd(archiver, "prune", "--list", "--dry-run", keep_arg) + assert re.search(r"Keeping archive \(rule: monthly #1\):\s+test-1", output) + assert re.search(r"Keeping archive \(rule: monthly #2\):\s+test-2", output) + assert re.search(r"Would prune:\s+test-3", output) + assert re.search(r"Keeping archive \(rule: monthly #3\):\s+test-4", output) + assert re.search(r"Would prune:\s+test-5", output) + + +# 2023-12-31 is Sunday, week 52. Makes these week calculations a little easier. +@freeze_time(datetime(2023, 12, 31, 0, 0, 0, tzinfo=None)) +def test_prune_keep_13weekly_int_or_interval(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + _create_archive_ts(archiver, "test-1", 2023, 10, 1) + _create_archive_ts(archiver, "test-2", 2023, 7, 2, us=1) + _create_archive_ts(archiver, "test-3", 2023, 7, 2) + _create_archive_ts(archiver, "test-4", 2023, 4, 2, us=1) # Last possible microsecond + _create_archive_ts(archiver, "test-5", 2023, 4, 2) + for keep_arg in ["--keep-13weekly=3", "--keep-13weekly=39w"]: + output = cmd(archiver, "prune", "--list", "--dry-run", keep_arg) + assert re.search(r"Keeping archive \(rule: quarterly_13weekly #1\):\s+test-1", output) + assert re.search(r"Keeping archive \(rule: quarterly_13weekly #2\):\s+test-2", output) + assert re.search(r"Would prune:\s+test-3", output) + assert re.search(r"Keeping archive \(rule: quarterly_13weekly #3\):\s+test-4", output) + assert re.search(r"Would prune:\s+test-5", output) + + +@freeze_time(datetime(2023, 12, 31, 0, 0, 0, tzinfo=None)) +def test_prune_keep_3monthly_int_or_interval(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + _create_archive_ts(archiver, "test-1", 2023, 9, 30) + _create_archive_ts(archiver, "test-2", 2023, 6, 30, us=1) + _create_archive_ts(archiver, "test-3", 2023, 6, 30) + _create_archive_ts(archiver, "test-4", 2023, 3, 31, us=1) # Last possible microsecond + _create_archive_ts(archiver, "test-5", 2023, 3, 31) + for keep_arg in ["--keep-3monthly=3", f"--keep-3monthly={(datetime.now()-datetime(2023, 3, 31)).days}d"]: + output = cmd(archiver, "prune", "--list", "--dry-run", keep_arg) + assert re.search(r"Keeping archive \(rule: quarterly_3monthly #1\):\s+test-1", output) + assert re.search(r"Keeping archive \(rule: quarterly_3monthly #2\):\s+test-2", output) + assert re.search(r"Would prune:\s+test-3", output) + assert re.search(r"Keeping archive \(rule: quarterly_3monthly #3\):\s+test-4", output) + assert re.search(r"Would prune:\s+test-5", output) + + +@freeze_time(datetime(2023, 12, 31, 0, 0, 0, tzinfo=None)) +def test_prune_keep_yearly_int_or_interval(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + _create_archive_ts(archiver, "test-1", 2022, 12, 31) + _create_archive_ts(archiver, "test-2", 2021, 12, 31, us=1) + _create_archive_ts(archiver, "test-3", 2021, 12, 31) + _create_archive_ts(archiver, "test-4", 2020, 12, 31, us=1) # Last possible microsecond + _create_archive_ts(archiver, "test-5", 2020, 12, 31) + for keep_arg in ["--keep-yearly=3", "--keep-yearly=3y"]: + output = cmd(archiver, "prune", "--list", "--dry-run", keep_arg) + assert re.search(r"Keeping archive \(rule: yearly #1\):\s+test-1", output) + assert re.search(r"Keeping archive \(rule: yearly #2\):\s+test-2", output) + assert re.search(r"Would prune:\s+test-3", output) + assert re.search(r"Keeping archive \(rule: yearly #3\):\s+test-4", output) + assert re.search(r"Would prune:\s+test-5", output) + + +def test_prune_no_args(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + with pytest.raises(CommandError) as error: + cmd(archiver, "prune") + output = str(error.value) + assert re.search(r"At least one of the .* settings must be specified.", output) + assert re.search(r"keep(?!-)", output) + flags = [ + "last", + "within", + "secondly", + "minutely", + "hourly", + "daily", + "weekly", + "monthly", + "yearly", + "13weekly", + "3monthly", + ] + for flag in flags: + assert f"keep-{flag}" in output diff --git a/src/borg/testsuite/helpers/parseformat_test.py b/src/borg/testsuite/helpers/parseformat_test.py index ef39e6714c..256bc07199 100644 --- a/src/borg/testsuite/helpers/parseformat_test.py +++ b/src/borg/testsuite/helpers/parseformat_test.py @@ -1,7 +1,8 @@ import base64 import os +import re from argparse import ArgumentTypeError -from datetime import datetime, timezone +from datetime import datetime, timedelta, timezone import pytest @@ -16,6 +17,7 @@ format_file_size, parse_file_size, interval, + int_or_interval, partial_format, clean_lines, format_line, @@ -351,6 +353,7 @@ def test_format_timedelta(): @pytest.mark.parametrize( "timeframe, num_secs", [ + ("0S", 0), ("5S", 5), ("2M", 2 * 60), ("1H", 60 * 60), @@ -367,9 +370,9 @@ def test_interval(timeframe, num_secs): @pytest.mark.parametrize( "invalid_interval, error_tuple", [ - ("H", ('Invalid number "": expected positive integer',)), - ("-1d", ('Invalid number "-1": expected positive integer',)), - ("food", ('Invalid number "foo": expected positive integer',)), + ("H", ('Invalid number "": expected nonnegative integer',)), + ("-1d", ('Invalid number "-1": expected nonnegative integer',)), + ("food", ('Invalid number "foo": expected nonnegative integer',)), ], ) def test_interval_time_unit(invalid_interval, error_tuple): @@ -378,10 +381,49 @@ def test_interval_time_unit(invalid_interval, error_tuple): assert exc.value.args == error_tuple -def test_interval_number(): +@pytest.mark.parametrize( + "invalid_input, error_regex", + [ + ("x", r'^Unexpected time unit "x": choose from'), + ("-1t", r'^Unexpected time unit "t": choose from'), + ("fool", r'^Unexpected time unit "l": choose from'), + ("abc", r'^Unexpected time unit "c": choose from'), + (" abc ", r'^Unexpected time unit " ": choose from'), + ], +) +def test_interval_invalid_time_format(invalid_input, error_regex): + with pytest.raises(ArgumentTypeError) as exc: + interval(invalid_input) + assert re.search(error_regex, exc.value.args[0]) + + +@pytest.mark.parametrize( + "input, result", + [ + ("0", 0), + ("5", 5), + (" 999 ", 999), + ("0S", timedelta(seconds=0)), + ("5S", timedelta(seconds=5)), + ("1m", timedelta(days=31)), + ], +) +def test_int_or_interval(input, result): + assert int_or_interval(input) == result + + +@pytest.mark.parametrize( + "invalid_input, error_regex", + [ + ("H", r"Value is neither an integer nor an interval:"), + ("-1d", r"Value is neither an integer nor an interval:"), + ("food", r"Value is neither an integer nor an interval:"), + ], +) +def test_int_or_interval_time_unit(invalid_input, error_regex): with pytest.raises(ArgumentTypeError) as exc: - interval("5") - assert exc.value.args == ('Unexpected time unit "5": choose from y, m, w, d, H, M, S',) + int_or_interval(invalid_input) + assert re.search(error_regex, exc.value.args[0]) def test_parse_timestamp():