From 31e91e8bcdb6a43f3002138a7d73f6c3f12a30c7 Mon Sep 17 00:00:00 2001 From: Axel Burri Date: Tue, 29 Sep 2015 14:07:58 +0200 Subject: [PATCH] btrbk: enforce preserve of all raw incremental images with new FORCE_PRESERVE tag; changed raw target names; added "timestamp_format short|long" configuration option (YYYYMMDD or YYYYMMDDHHMM) --- ChangeLog | 1 + btrbk | 178 +++++++++++++++++++++++++++++++++++++----------------- 2 files changed, 125 insertions(+), 54 deletions(-) diff --git a/ChangeLog b/ChangeLog index 9c983fb..d0b7b15 100644 --- a/ChangeLog +++ b/ChangeLog @@ -8,6 +8,7 @@ btrbk-current * Added configuration option "ssh_cipher_spec" (close: #47). * Added "target raw", with GnuPG and compression support (experimental). + * Added configuration option "timestamp_format short|long". * Bugfix: correctly handle "incremental no" option. * Hardened ssh_filter_btrbk.sh script: fine-grained access control, restrict-path option, sudo option (close: #45). diff --git a/btrbk b/btrbk index 41c3eb2..68559ea 100755 --- a/btrbk +++ b/btrbk @@ -43,7 +43,7 @@ use strict; use warnings FATAL => qw( all ); use Carp qw(confess); -use Date::Calc qw(Today Delta_Days Day_of_Week); +use Date::Calc qw(Today_and_Now Delta_Days Day_of_Week); use Getopt::Long qw(GetOptions); use Data::Dumper; @@ -59,8 +59,9 @@ my $ip_addr_match = qr/(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([ my $host_name_match = qr/(([a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9\-]*[a-zA-Z0-9])\.)*([A-Za-z0-9]|[A-Za-z0-9][A-Za-z0-9\-]*[A-Za-z0-9])/; my $file_match = qr/[0-9a-zA-Z_@\+\-\.\/]+/; # note: ubuntu uses '@' in the subvolume layout: my $ssh_prefix_match = qr/ssh:\/\/($ip_addr_match|$host_name_match)/; -my $snapshot_postfix_match = qr/\.[0-9]{8}(_[0-9]+)?/; -my $uuid_match = qr/[0-9a-f\-]+/; # simple, also matches empty ('-') uuid +my $uuid_match = qr/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/; +my $timestamp_postfix_match = qr/\.(?[0-9]{4})(?[0-9]{2})(?
[0-9]{2})(T(?[0-9]{2})(?[0-9]{2}))?(_(?[0-9]+))?/; # matches "YYYYMMDD[Thhmm][_NN]" +my $raw_postfix_match = qr/--(?$uuid_match)(\@(?$uuid_match))\.btrfs?(\.(?(gz|bz2|xz)))?(\.(?gpg))?/; # matches ".btrfs_[@][.gz|bz2|xz][.gpg]" my $group_match = qr/[a-zA-Z0-9_:-]+/; my $ssh_cipher_match = qr/[a-z0-9][a-z0-9@.-]+/; @@ -69,6 +70,7 @@ my %day_of_week_map = ( monday => 1, tuesday => 2, wednesday => 3, thursday => 4 my %config_options = ( # NOTE: the parser always maps "no" to undef # NOTE: keys "volume", "subvolume" and "target" are hardcoded + timestamp_format => { default => "short", accept => [ "short", "long" ], context => [ "root", "volume", "subvolume" ] }, snapshot_dir => { default => undef, accept_file => { relative => 1 } }, snapshot_name => { default => undef, accept_file => { name_only => 1 }, context => [ "subvolume" ] }, # NOTE: defaults to the subvolume name (hardcoded) snapshot_create => { default => "always", accept => [ "no", "always", "ondemand", "onchange" ] }, @@ -239,7 +241,7 @@ sub vinfo($$) my $name = $url; $name =~ s/^.*\///; my %info = ( - URL => $url, + URL => $url, NAME => $name, ); @@ -1007,14 +1009,16 @@ sub btrfs_send_to_file($$$$;@) my $snapshot_path = $snapshot->{PATH} // die; my $target_path = $target->{PATH} // die; my $parent_path = $parent ? $parent->{PATH} : undef; - my $parent_uuid = $parent ? $parent->{uuid} : "-" ; + my $parent_uuid = $parent ? $parent->{uuid} : undef ; my $received_uuid = $snapshot->{uuid}; - $received_uuid = "__INSERT_SNAPSHOT_UUID_HERE__" if((not $received_uuid) && $dryrun); - die unless($parent_uuid); + $received_uuid = "XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX" if((not $received_uuid) && $dryrun); die unless($received_uuid); + die if($parent && !$parent_uuid); my $target_filename = $snapshot->{NAME} || die; - $target_filename .= ".$received_uuid.$parent_uuid.btrfs"; + $target_filename .= "--$received_uuid"; + $target_filename .= '@' . $parent_uuid if($parent_uuid); + $target_filename .= ".btrfs"; my %compress = ( gzip => { pipe => { cmd => [ 'gzip' ], name => 'gzip' }, postfix => '.gz' }, bzip2 => { pipe => { cmd => [ 'bzip2' ], name => 'bzip2' }, postfix => '.bz2' }, @@ -1272,7 +1276,7 @@ sub macro_send_receive($@) DEBUG "Fetching uuid of new snapshot: $snapshot->{PRINT}"; my $detail = btrfs_subvolume_detail($snapshot); die unless($detail->{uuid}); - vinfo_set_detail($snapshot, { uuid => $detail->{uuid} }); # TODO: add complete detail? + vinfo_set_detail($snapshot, { uuid => $detail->{uuid} }); } } @@ -1308,16 +1312,31 @@ sub macro_send_receive($@) } -sub get_date_tag($) +# returns { btrbk_date => [ yyyy, mm, dd, hh, mm, ] } or undef +# fixed array length of 6, all individually defaulting to 0 +sub parse_filename($$;$) { - my $name = shift; - $name =~ s/_([0-9]+)$//; - my $postfix_counter = $1 // 0; - my $date = undef; - if($name =~ /\.([0-9]{4})([0-9]{2})([0-9]{2})$/) { - $date = [ $1, $2, $3 ]; + my $file = shift; + my $name_match = shift; + my $raw_format = shift || 0; + my %raw_info; + if($raw_format) + { + return undef unless($file =~ /^\Q$name_match\E$timestamp_postfix_match$raw_postfix_match$/); + die unless($+{YYYY} && $+{MM} && $+{DD}); + return { btrbk_date => [ $+{YYYY}, $+{MM}, $+{DD}, ($+{hh} // 0), ($+{mm} // 0), ($+{NN} // 0) ], + received_uuid => $+{received_uuid} // die, + parent_uuid => $+{parent_uuid} // '-', + ENCRYPT => $+{encrypt} // "", + COMPRESS => $+{compress} // "", + }; + } + else + { + return undef unless($file =~ /^\Q$name_match\E$timestamp_postfix_match$/); + die unless($+{YYYY} && $+{MM} && $+{DD}); + return { btrbk_date => [ $+{YYYY}, $+{MM}, $+{DD}, ($+{hh} // 0), ($+{mm} // 0), ($+{NN} // 0) ] }; } - return ($date, $postfix_counter); } @@ -1482,10 +1501,12 @@ sub schedule(@) } # sort the schedule, ascending by date - my @sorted_schedule = sort { ($a->{date}->[0] <=> $b->{date}->[0]) || - ($a->{date}->[1] <=> $b->{date}->[1]) || - ($a->{date}->[2] <=> $b->{date}->[2]) || - ($a->{date_ext} <=> $b->{date_ext}) + my @sorted_schedule = sort { ($a->{btrbk_date}->[0] <=> $b->{btrbk_date}->[0]) || + ($a->{btrbk_date}->[1] <=> $b->{btrbk_date}->[1]) || + ($a->{btrbk_date}->[2] <=> $b->{btrbk_date}->[2]) || + ($a->{btrbk_date}->[3] <=> $b->{btrbk_date}->[3]) || + ($a->{btrbk_date}->[4] <=> $b->{btrbk_date}->[4]) || + ($a->{btrbk_date}->[5] <=> $b->{btrbk_date}->[5]) } @$schedule; # first, do our calendar calculations @@ -1495,7 +1516,7 @@ sub schedule(@) TRACE "last day before next $preserve_day_of_week is in $delta_days_to_eow_from_today days"; foreach my $href (@sorted_schedule) { - my @date = @{$href->{date}}; + my @date = @{$href->{btrbk_date}}[0..2]; # Date::Calc takes: @date = ( yy, mm, dd ) my $delta_days = Delta_Days(@date, @today); my $delta_days_to_eow = $delta_days + $delta_days_to_eow_from_today; { @@ -1510,7 +1531,7 @@ sub schedule(@) if($preserve_latest && (scalar @sorted_schedule)) { my $href = $sorted_schedule[-1]; - $href->{preserve} ||= "preserve forced: latest in list"; + $href->{preserve} ||= $preserve_latest; } # filter daily, weekly, monthly @@ -1603,7 +1624,8 @@ MAIN: Getopt::Long::Configure qw(gnu_getopt); $Data::Dumper::Sortkeys = 1; my $start_time = time; - my @today = Today(); + my @today_and_now = Today_and_Now(); + my @today = @today_and_now[0..2]; my ($config_cmdline, $quiet, $verbose, $preserve_backups, $resume_only); @@ -2120,6 +2142,7 @@ MAIN: } my %subvol_list; + my %parent_uuid_list; foreach my $file (split("\n", $ret)) { unless($file =~ /^$file_match$/) { @@ -2130,16 +2153,22 @@ MAIN: $config_target->{ABORTED} = "Unexpected result from 'find': file \"$file\" is not under \"$droot->{PATH}\""; last; } - unless($file =~ /^\Q$snapshot_basename\E$snapshot_postfix_match\.(?$uuid_match)\.(?$uuid_match)\.btrfs/) { - DEBUG "Skipping unrecognized file: \"$file\""; + my $filename_info = parse_filename($file, $snapshot_basename, 1); + unless($filename_info) { + DEBUG "Skipping file (not btrbk raw): \"$file\""; next; } - my $detail = { received_uuid => $+{received_uuid}, - parent_uuid => $+{parent_uuid}, - }; + + # Fake btrfs subvolume information (received_uuid, parent_uuid) from filename info. + # + # NOTE: parent_uuid in $filename_info is the "parent of the source subvolume", NOT the + # "parent of the received subvolume". We fake the real parent_uuid with the one from + # the filename here. my $subvol = vinfo_child($droot, $file); - vinfo_set_detail($subvol, $detail); + vinfo_set_detail($subvol, $filename_info); + $subvol_list{$file} = $subvol; + $parent_uuid_list{$filename_info->{parent_uuid}} = $subvol if($filename_info->{parent_uuid} ne '-'); } if($config_target->{ABORTED}) { WARN "Skipping target \"$droot->{PRINT}\": $config_target->{ABORTED}"; @@ -2149,6 +2178,25 @@ MAIN: $droot->{SUBVOL_LIST} = \%subvol_list; $droot->{REAL_URL} = $droot->{URL}; # ignore links here + # Make sure that incremental backup chains are never broken: + foreach my $subvol (values %subvol_list) + { + # If restoring a backup from raw btrfs images (using "incremental yes|strict"): + # "btrfs send -p parent source > svol.btrfs", the backups + # on the target will get corrupted (unusable!) as soon as + # an any files in the chain gets deleted. + # + # We need to make sure btrbk will NEVER delete those: + # - svol.--.btrfs : root (full) image + # - svol.--[@].btrfs : incremental image + + if(my $child = $parent_uuid_list{$subvol->{received_uuid}}) { + DEBUG "Found parent/child partners, forcing preserve of: \"$subvol->{PRINT}\", \"$child->{PRINT}\""; + $subvol->{FORCE_PRESERVE} = "preserve forced: parent of another raw target"; + $child->{FORCE_PRESERVE} ||= "preserve forced: child of another raw target"; + } + } + # TRACE(Data::Dumper->Dump([\%subvol_list], ["vinfo_raw_subvol_list{$droot}"])); } $config_target->{droot} = $droot; @@ -2290,7 +2338,6 @@ MAIN: # # create snapshots # - my $timestamp = sprintf("%04d%02d%02d", @today); foreach my $config_vol (@{$config->{VOLUME}}) { next if($config_vol->{ABORTED}); @@ -2341,6 +2388,9 @@ MAIN: } # find unique snapshot name + my $timestamp = ((config_key($config_subvol, "timestamp_format") eq "short") ? + sprintf("%04d%02d%02d", @today) : + sprintf("%04d%02d%02dT%02d%02d", @today_and_now)); my @unconfirmed_target_name; my @lookup = keys %{vinfo_subvol_list($sroot)}; @lookup = grep s/^\Q$snapdir\E// , @lookup; @@ -2409,6 +2459,9 @@ MAIN: foreach my $child (sort { $a->{cgen} <=> $b->{cgen} } get_snapshot_children($sroot, $svol)) { + my $filename_info = parse_filename($child->{SUBVOL_PATH}, $snapdir . $snapshot_basename); + next unless($filename_info); # ignore non-btrbk files + if(scalar get_receive_targets($droot, $child)) { DEBUG "Found matching receive target, skipping: $child->{PRINT}"; } @@ -2420,9 +2473,10 @@ MAIN: } # check if the target would be preserved - my ($date, $date_ext) = get_date_tag($child->{SUBVOL_PATH}); - next unless($date && ($child->{SUBVOL_PATH} =~ /^\Q$snapdir$snapshot_basename\E$snapshot_postfix_match$/)); - push(@schedule, { value => $child, date => $date, date_ext => $date_ext }), + push(@schedule, { value => $child, + btrbk_date => $filename_info->{btrbk_date}, + preserve => $child->{FORCE_PRESERVE}, + }), } } @@ -2432,10 +2486,12 @@ MAIN: # add all present backups to schedule, with no value # these are needed for correct results of schedule() foreach my $vol (values %{vinfo_subvol_list($droot)}) { - next unless($vol->{SUBVOL_PATH} =~ /^\Q$snapshot_basename\E$snapshot_postfix_match$/); - my ($date, $date_ext) = get_date_tag($vol->{NAME}); - next unless($date); - push(@schedule, { value => undef, date => $date, date_ext => $date_ext }); + my $filename_info = parse_filename($vol->{SUBVOL_PATH}, $snapshot_basename, ($config_target->{target_type} eq "raw")); + next unless($filename_info); # ignore non-btrbk files + push(@schedule, { value => undef, + btrbk_date => $filename_info->{btrbk_date}, + preserve => $vol->{FORCE_PRESERVE}, + }); } my ($preserve, undef) = schedule( schedule => \@schedule, @@ -2517,7 +2573,8 @@ MAIN: my $svol = $config_subvol->{svol} || die; my $snapdir = config_key($config_subvol, "snapshot_dir", postfix => '/') // ""; my $snapshot_basename = config_key($config_subvol, "snapshot_name") // die; - my $preserve_latest = $config_subvol->{SNAPSHOT} ? 0 : 1; + my $preserve_latest_snapshot = $config_subvol->{SNAPSHOT} ? 0 : "preserve forced: latest in list"; + my $preserve_latest_backup = $preserve_latest_snapshot; my $target_aborted = 0; foreach my $config_target (@{$config_subvol->{TARGET}}) @@ -2530,12 +2587,19 @@ MAIN: } next; } - if($config_target->{target_type} eq "raw") { - WARN "Preserving all backups (target_type=raw) in: $config_target->{droot}->{PRINT}"; - $target_aborted = 1; - next; - } my $droot = $config_target->{droot} || die; + if($config_target->{target_type} eq "raw") { + if(config_key($config_target, "incremental")) { + # In incremental mode, the latest backup is most certainly our parent. + # (see note on FORCE_PRESERVE above) + $preserve_latest_backup ||= "preserve forced: possibly parent of latest backup"; + # Note that we could check against $config_subvol->{SNAPSHOT}->{parent_uuid} to be certain, + # but this information is not available in $dryrun: + # foreach my $vol (values %{vinfo_subvol_list($droot)}) { + # $vol->{FORCE_PRESERVE} = 1 if($vol->{received_uuid} eq $config_subvol->{SNAPSHOT}->{parent_uuid}); + # } + } + } # # delete backups @@ -2543,15 +2607,19 @@ MAIN: INFO "Cleaning backups of subvolume \"$svol->{PRINT}\": $droot->{PRINT}/$snapshot_basename.*"; my @schedule; foreach my $vol (values %{vinfo_subvol_list($droot)}) { - next unless($vol->{SUBVOL_PATH} =~ /^\Q$snapshot_basename\E$snapshot_postfix_match$/); + my $filename_info = parse_filename($vol->{SUBVOL_PATH}, $snapshot_basename, ($config_target->{target_type} eq "raw")); + next unless($filename_info); # ignore non-btrbk files + # NOTE: checking received_uuid does not make much sense, as this received_uuid is propagated to snapshots # if($vol->{received_uuid} && ($vol->{received_uuid} eq '-')) { # INFO "Target subvolume is not a received backup, skipping deletion of: $vol->{PRINT}"; # next; # } - my ($date, $date_ext) = get_date_tag($vol->{NAME}); - next unless($date); - push(@schedule, { value => $vol, name => $vol->{PRINT}, date => $date, date_ext => $date_ext }); + push(@schedule, { value => $vol, + name => $vol->{PRINT}, + btrbk_date => $filename_info->{btrbk_date}, + preserve => $vol->{FORCE_PRESERVE} + }); } my (undef, $delete) = schedule( schedule => \@schedule, @@ -2560,7 +2628,7 @@ MAIN: preserve_daily => config_key($config_target, "target_preserve_daily"), preserve_weekly => config_key($config_target, "target_preserve_weekly"), preserve_monthly => config_key($config_target, "target_preserve_monthly"), - preserve_latest => $preserve_latest, + preserve_latest => $preserve_latest_backup, log_verbose => 1, ); my $ret = btrfs_subvolume_delete($delete, commit => config_key($config_target, "btrfs_commit_delete")); @@ -2588,10 +2656,12 @@ MAIN: INFO "Cleaning snapshots: $sroot->{PRINT}/$snapdir$snapshot_basename.*"; my @schedule; foreach my $vol (values %{vinfo_subvol_list($sroot)}) { - next unless($vol->{SUBVOL_PATH} =~ /^\Q$snapdir$snapshot_basename\E$snapshot_postfix_match$/); - my ($date, $date_ext) = get_date_tag($vol->{NAME}); - next unless($date); - push(@schedule, { value => $vol, name => $vol->{PRINT}, date => $date, date_ext => $date_ext }); + my $filename_info = parse_filename($vol->{SUBVOL_PATH}, $snapdir . $snapshot_basename); + next unless($filename_info); # ignore non-btrbk files + push(@schedule, { value => $vol, + name => $vol->{PRINT}, + btrbk_date => $filename_info->{btrbk_date} + }); } my (undef, $delete) = schedule( schedule => \@schedule, @@ -2600,7 +2670,7 @@ MAIN: preserve_daily => config_key($config_subvol, "snapshot_preserve_daily"), preserve_weekly => config_key($config_subvol, "snapshot_preserve_weekly"), preserve_monthly => config_key($config_subvol, "snapshot_preserve_monthly"), - preserve_latest => $preserve_latest, + preserve_latest => $preserve_latest_snapshot, log_verbose => 1, ); my $ret = btrfs_subvolume_delete($delete, commit => config_key($config_subvol, "btrfs_commit_delete"));