Skip to content

Prune by either int or interval for all retention policies #8775

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 17 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions requirements.d/development.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,6 @@ pytest
pytest-xdist
pytest-cov
pytest-benchmark
freezegun
Cython
pre-commit
150 changes: 87 additions & 63 deletions src/borg/archiver/prune_cmd.py
Original file line number Diff line number Diff line change
@@ -1,44 +1,49 @@
import argparse
from collections import OrderedDict
from datetime import datetime, timezone, timedelta
from datetime import datetime, timezone
import logging
from operator import attrgetter
import os
import itertools

from ._common import with_repository, Highlander
from ..archive import Archive
from ..cache import Cache
from ..constants import * # NOQA
from ..helpers import ArchiveFormatter, interval, sig_int, ProgressIndicatorPercent, CommandError, Error
from ..helpers import archivename_validator
from ..helpers import interval, int_or_interval, sig_int, archivename_validator
from ..helpers import ArchiveFormatter, ProgressIndicatorPercent, CommandError, Error
from ..manifest import Manifest

from ..logger import create_logger

logger = create_logger()


def prune_within(archives, seconds, kept_because):
target = datetime.now(timezone.utc) - timedelta(seconds=seconds)
kept_counter = 0
result = []
for a in archives:
if a.ts > target:
kept_counter += 1
kept_because[a.id] = ("within", kept_counter)
result.append(a)
return result
# The *_period_func group of functions create period grouping keys to group together archives falling within a certain
# period. Among archives in each of these groups, only the latest (by creation timestamp) is kept.


def default_period_func(pattern):
def unique_period_func():
counter = itertools.count()

def unique_values(_a):
"""Group archives by an incrementing counter, practically making each archive a group of 1"""
return next(counter)

return unique_values


def pattern_period_func(pattern):
def inner(a):
"""Group archives by extracting given strftime-pattern from their creation timestamp"""
# compute in local timezone
return a.ts.astimezone().strftime(pattern)

return inner


def quarterly_13weekly_period_func(a):
"""Group archives by extracting the ISO-8601 13-week quarter from their creation timestamp"""
(year, week, _) = a.ts.astimezone().isocalendar() # local time
if week <= 13:
# Weeks containing Jan 4th to Mar 28th (leap year) or 29th- 91 (13*7)
Expand All @@ -60,6 +65,7 @@ def quarterly_13weekly_period_func(a):


def quarterly_3monthly_period_func(a):
"""Group archives by extracting the 3-month quarter from their creation timestamp"""
lt = a.ts.astimezone() # local time
if lt.month <= 3:
# 1-1 to 3-31
Expand All @@ -77,20 +83,36 @@ def quarterly_3monthly_period_func(a):

PRUNING_PATTERNS = OrderedDict(
[
("secondly", default_period_func("%Y-%m-%d %H:%M:%S")),
("minutely", default_period_func("%Y-%m-%d %H:%M")),
("hourly", default_period_func("%Y-%m-%d %H")),
("daily", default_period_func("%Y-%m-%d")),
("weekly", default_period_func("%G-%V")),
("monthly", default_period_func("%Y-%m")),
# Each archive is considered for keeping
("within", unique_period_func()),
("last", unique_period_func()),
("keep", unique_period_func()),
# Last archive (by creation timestamp) within period group is consiedered for keeping
("secondly", pattern_period_func("%Y-%m-%d %H:%M:%S")),
("minutely", pattern_period_func("%Y-%m-%d %H:%M")),
("hourly", pattern_period_func("%Y-%m-%d %H")),
("daily", pattern_period_func("%Y-%m-%d")),
("weekly", pattern_period_func("%G-%V")),
("monthly", pattern_period_func("%Y-%m")),
("quarterly_13weekly", quarterly_13weekly_period_func),
("quarterly_3monthly", quarterly_3monthly_period_func),
("yearly", default_period_func("%Y")),
("yearly", pattern_period_func("%Y")),
]
)


def prune_split(archives, rule, n, kept_because=None):
# Datetime cannot represent times before datetime.min, so a day is added to allow for time zone offset.
DATETIME_MIN_WITH_ZONE = datetime.min.replace(tzinfo=timezone.utc)


def prune_split(archives, rule, n_or_interval, base_timestamp, kept_because=None):
if isinstance(n_or_interval, int):
# If no interval, assume given interval is "infinite"
n, earliest_timestamp = n_or_interval, DATETIME_MIN_WITH_ZONE
else:
# If no n, assume given n is "infinite"
n, earliest_timestamp = -1, base_timestamp - n_or_interval

last = None
keep = []
period_func = PRUNING_PATTERNS[rule]
Expand All @@ -101,27 +123,33 @@ def prune_split(archives, rule, n, kept_because=None):

a = None
for a in sorted(archives, key=attrgetter("ts"), reverse=True):
if a.ts <= earliest_timestamp or len(keep) == n:
break
period = period_func(a)
if period != last:
last = period
if a.id not in kept_because:
keep.append(a)
kept_because[a.id] = (rule, len(keep))
if len(keep) == n:
break

# Keep oldest archive if we didn't reach the target retention count
if a is not None and len(keep) < n and a.id not in kept_because:
keep.append(a)
kept_because[a.id] = (rule + "[oldest]", len(keep))

return keep


class PruneMixIn:
@with_repository(compatibility=(Manifest.Operation.DELETE,))
def do_prune(self, args, repository, manifest):
"""Prune repository archives according to specified rules"""
if not any(
(
if all(
e is None
for e in (
args.keep,
args.within,
args.last,
args.secondly,
args.minutely,
args.hourly,
Expand All @@ -131,11 +159,10 @@ def do_prune(self, args, repository, manifest):
args.quarterly_13weekly,
args.quarterly_3monthly,
args.yearly,
args.within,
)
):
raise CommandError(
'At least one of the "keep-within", "keep-last", '
'At least one of the "keep", "keep-within", "keep-last", '
'"keep-secondly", "keep-minutely", "keep-hourly", "keep-daily", '
'"keep-weekly", "keep-monthly", "keep-13weekly", "keep-3monthly", '
'or "keep-yearly" settings must be specified.'
Expand All @@ -159,15 +186,12 @@ def do_prune(self, args, repository, manifest):
# (<rulename>, <how many archives were kept by this rule so far >)
kept_because = {}

# find archives which need to be kept because of the keep-within rule
if args.within:
keep += prune_within(archives, args.within, kept_because)

base_timestamp = datetime.now().astimezone()
# find archives which need to be kept because of the various time period rules
for rule in PRUNING_PATTERNS.keys():
num = getattr(args, rule, None)
if num is not None:
keep += prune_split(archives, rule, num, kept_because)
num_or_interval = getattr(args, rule, None)
if num_or_interval is not None:
keep += prune_split(archives, rule, num_or_interval, base_timestamp, kept_because)

to_delete = set(archives) - set(keep)
with Cache(repository, manifest, iec=args.iec) as cache:
Expand Down Expand Up @@ -310,81 +334,81 @@ def build_parser_prune(self, subparsers, common_parser, mid_common_parser):
help="keep all archives within this time interval",
)
subparser.add_argument(
"--keep-last",
"--keep-last", dest="last", type=int, action=Highlander, help="number of archives to keep"
)
subparser.add_argument(
"--keep",
dest="keep",
type=int_or_interval,
action=Highlander,
help="number or time interval of archives to keep",
)
subparser.add_argument(
"--keep-secondly",
dest="secondly",
type=int,
default=0,
type=int_or_interval,
action=Highlander,
help="number of secondly archives to keep",
help="number or time interval of secondly archives to keep",
)
subparser.add_argument(
"--keep-minutely",
dest="minutely",
type=int,
default=0,
type=int_or_interval,
action=Highlander,
help="number of minutely archives to keep",
help="number or time interval of minutely archives to keep",
)
subparser.add_argument(
"-H",
"--keep-hourly",
dest="hourly",
type=int,
default=0,
type=int_or_interval,
action=Highlander,
help="number of hourly archives to keep",
help="number or time interval of hourly archives to keep",
)
subparser.add_argument(
"-d",
"--keep-daily",
dest="daily",
type=int,
default=0,
type=int_or_interval,
action=Highlander,
help="number of daily archives to keep",
help="number or time interval of daily archives to keep",
)
subparser.add_argument(
"-w",
"--keep-weekly",
dest="weekly",
type=int,
default=0,
type=int_or_interval,
action=Highlander,
help="number of weekly archives to keep",
help="number or time interval of weekly archives to keep",
)
subparser.add_argument(
"-m",
"--keep-monthly",
dest="monthly",
type=int,
default=0,
type=int_or_interval,
action=Highlander,
help="number of monthly archives to keep",
help="number or time interval of monthly archives to keep",
)
quarterly_group = subparser.add_mutually_exclusive_group()
quarterly_group.add_argument(
"--keep-13weekly",
dest="quarterly_13weekly",
type=int,
default=0,
help="number of quarterly archives to keep (13 week strategy)",
type=int_or_interval,
help="number or time interval of quarterly archives to keep (13 week strategy)",
)
quarterly_group.add_argument(
"--keep-3monthly",
dest="quarterly_3monthly",
type=int,
default=0,
help="number of quarterly archives to keep (3 month strategy)",
type=int_or_interval,
help="number or time interval of quarterly archives to keep (3 month strategy)",
)
subparser.add_argument(
"-y",
"--keep-yearly",
dest="yearly",
type=int,
default=0,
type=int_or_interval,
action=Highlander,
help="number of yearly archives to keep",
help="number or time interval of yearly archives to keep",
)
define_archive_filters_group(subparser, sort_by=False, first_last=False)
subparser.add_argument(
Expand Down
2 changes: 2 additions & 0 deletions src/borg/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,9 @@
EXIT_SIGNAL_BASE = 128 # terminated due to signal, rc = 128 + sig_no

ISO_FORMAT_NO_USECS = "%Y-%m-%dT%H:%M:%S"
ISO_FORMAT_NO_USECS_ZONE = ISO_FORMAT_NO_USECS + "%z"
ISO_FORMAT = ISO_FORMAT_NO_USECS + ".%f"
ISO_FORMAT_ZONE = ISO_FORMAT + "%z"

DASHES = "-" * 78

Expand Down
2 changes: 1 addition & 1 deletion src/borg/helpers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from .misc import ChunkIteratorFileWrapper, open_item, chunkit, iter_separated, ErrorIgnoringTextIOWrapper
from .parseformat import bin_to_hex, hex_to_bin, safe_encode, safe_decode
from .parseformat import text_to_json, binary_to_json, remove_surrogates, join_cmd
from .parseformat import eval_escapes, decode_dict, positive_int_validator, interval
from .parseformat import eval_escapes, decode_dict, positive_int_validator, interval, int_or_interval
from .parseformat import PathSpec, SortBySpec, ChunkerParams, FilesCacheMode, partial_format, DatetimeWrapper
from .parseformat import format_file_size, parse_file_size, FileSize, parse_storage_quota
from .parseformat import sizeof_fmt, sizeof_fmt_iec, sizeof_fmt_decimal, Location, text_validator
Expand Down
18 changes: 15 additions & 3 deletions src/borg/helpers/parseformat.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import uuid
from typing import Dict, Set, Tuple, ClassVar, Any, TYPE_CHECKING, Literal
from collections import OrderedDict
from datetime import datetime, timezone
from datetime import datetime, timezone, timedelta
from functools import partial
from string import Formatter

Expand Down Expand Up @@ -154,12 +154,24 @@ def interval(s):
except ValueError:
seconds = -1

if seconds <= 0:
raise argparse.ArgumentTypeError(f'Invalid number "{number}": expected positive integer')
if seconds < 0:
raise argparse.ArgumentTypeError(f'Invalid number "{number}": expected nonnegative integer')

return seconds


def int_or_interval(s):
try:
return int(s)
except ValueError:
pass

try:
return timedelta(seconds=interval(s))
except argparse.ArgumentTypeError as e:
raise argparse.ArgumentTypeError(f"Value is neither an integer nor an interval: {e}")


def ChunkerParams(s):
params = s.strip().split(",")
count = len(params)
Expand Down
Loading
Loading