From d6271709d2deb980804f92e75f9b5cb600dc42ed Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Mon, 24 Oct 2016 12:54:48 +0200 Subject: [PATCH 1/2] non-recursive extract and list Sometimes it makes sense to extract or list a directory contained in an archive without also doing the same for the content of the directory, i.e. allowing -n (= --no-recursion) in combination with the x and t modes. bsdtar uses the match functionality in libarchive to track include matches. A new libarchive API call archive_match_include_directories_recursively() gets introduced to influence the matching behavior, with the default behavior as before. Non-recursive matching can be achieved by anchoring the path match at both start and end. Asking for a directory which itself isn't in the archive when in non-recursive mode is an error and handled by the existing mechanism for tracking unused inclusion entries. Upstream-Status: Submitted [https://github.com/libarchive/libarchive/pull/812] Signed-off-by: Patrick Ohly --- libarchive/archive.h | 2 ++ libarchive/archive_match.c | 30 +++++++++++++++++++++++++++++- tar/bsdtar.1 | 3 +-- tar/bsdtar.c | 12 ++++++++++-- 4 files changed, 42 insertions(+), 5 deletions(-) diff --git a/libarchive/archive.h b/libarchive/archive.h index ff401e9..38d8746 100644 --- a/libarchive/archive.h +++ b/libarchive/archive.h @@ -1085,6 +1085,8 @@ __LA_DECL int archive_match_excluded(struct archive *, */ __LA_DECL int archive_match_path_excluded(struct archive *, struct archive_entry *); +/* Control recursive inclusion of directory content when directory is included. Default on. */ +__LA_DECL int archive_match_include_directories_recursively(struct archive *, int _enabled); /* Add exclusion pathname pattern. */ __LA_DECL int archive_match_exclude_pattern(struct archive *, const char *); __LA_DECL int archive_match_exclude_pattern_w(struct archive *, diff --git a/libarchive/archive_match.c b/libarchive/archive_match.c index 0719cbd..6d03a65 100644 --- a/libarchive/archive_match.c +++ b/libarchive/archive_match.c @@ -93,6 +93,9 @@ struct archive_match { /* exclusion/inclusion set flag. */ int setflag; + /* Recursively include directory content? */ + int recursive_include; + /* * Matching filename patterns. */ @@ -223,6 +226,7 @@ archive_match_new(void) return (NULL); a->archive.magic = ARCHIVE_MATCH_MAGIC; a->archive.state = ARCHIVE_STATE_NEW; + a->recursive_include = 1; match_list_init(&(a->inclusions)); match_list_init(&(a->exclusions)); __archive_rb_tree_init(&(a->exclusion_tree), &rb_ops_mbs); @@ -471,6 +475,28 @@ archive_match_path_excluded(struct archive *_a, } /* + * When recursive inclusion of directory content is enabled, + * an inclusion pattern that matches a directory will also + * include everything beneath that directory. Enabled by default. + * + * For compatibility with GNU tar, exclusion patterns always + * match if a subset of the full patch matches (i.e., they are + * are not rooted at the beginning of the path) and thus there + * is no corresponding non-recursive exclusion mode. + */ +int +archive_match_include_directories_recursively(struct archive *_a, int _enabled) +{ + struct archive_match *a; + + archive_check_magic(_a, ARCHIVE_MATCH_MAGIC, + ARCHIVE_STATE_NEW, "archive_match_include_directories_recursively"); + a = (struct archive_match *)_a; + a->recursive_include = _enabled; + return (ARCHIVE_OK); +} + +/* * Utilty functions to get statistic information for inclusion patterns. */ int @@ -781,7 +807,9 @@ static int match_path_inclusion(struct archive_match *a, struct match *m, int mbs, const void *pn) { - int flag = PATHMATCH_NO_ANCHOR_END; + int flag = a->recursive_include ? + PATHMATCH_NO_ANCHOR_END : /* Prefix match is good enough. */ + 0; /* Full match required. */ int r; if (mbs) { diff --git a/tar/bsdtar.1 b/tar/bsdtar.1 index 9eadaaf..f5d6457 100644 --- a/tar/bsdtar.1 +++ b/tar/bsdtar.1 @@ -346,8 +346,7 @@ In extract or list modes, this option is ignored. Do not extract modification time. By default, the modification time is set to the time stored in the archive. .It Fl n , Fl Fl norecurse , Fl Fl no-recursion -(c, r, u modes only) -Do not recursively archive the contents of directories. +Do not recursively archive (c, r, u), extract (x) or list (t) the contents of directories. .It Fl Fl newer Ar date (c, r, u modes only) Only include files and directories newer than the specified date. diff --git a/tar/bsdtar.c b/tar/bsdtar.c index 93bf60a..001d5ed 100644 --- a/tar/bsdtar.c +++ b/tar/bsdtar.c @@ -738,8 +738,6 @@ main(int argc, char **argv) break; } } - if (bsdtar->option_no_subdirs) - only_mode(bsdtar, "-n", "cru"); if (bsdtar->option_stdout) only_mode(bsdtar, "-O", "xt"); if (bsdtar->option_unlink_first) @@ -788,6 +786,16 @@ main(int argc, char **argv) only_mode(bsdtar, buff, "cru"); } + /* + * When creating an archive from a directory tree, the directory + * walking code will already avoid entering directories when + * recursive inclusion of directory content is disabled, therefore + * changing the matching behavior has no effect for creation modes. + * It is relevant for extraction or listing. + */ + archive_match_include_directories_recursively(bsdtar->matching, + !bsdtar->option_no_subdirs); + /* Filename "-" implies stdio. */ if (strcmp(bsdtar->filename, "-") == 0) bsdtar->filename = NULL; -- 2.1.4