git: f00f8b4fbd26 - main - cp: fix -R recursion detection

From: Kyle Evans <kevans_at_FreeBSD.org>
Date: Wed, 06 Apr 2022 01:42:05 UTC
The branch main has been updated by kevans:

URL: https://cgit.FreeBSD.org/src/commit/?id=f00f8b4fbd268a212687984e44daa3e0d0a16b87

commit f00f8b4fbd268a212687984e44daa3e0d0a16b87
Author:     Kyle Evans <kevans@FreeBSD.org>
AuthorDate: 2022-04-06 01:40:53 +0000
Commit:     Kyle Evans <kevans@FreeBSD.org>
CommitDate: 2022-04-06 01:41:00 +0000

    cp: fix -R recursion detection
    
    The correct logic is a lot simpler than the previous iteration.  We
    record the base fts_name to avoid having to worry about whether we
    needed the root symlink name or not (as applicable), then we can simply
    shift all of that logic to after path translation to make it less
    fragile.
    
    If we're copying to DNE, then we'll have swapped out the NULL root_stat
    pointer and then attempted to recurse on it.  The previously nonexistent
    directory shouldn't exist at all in the new structure, so just back out
    from that tree entirely and move on.
    
    The tests have been amended to indicate our expectations better with
    subdirectory recursion.  If we copy A to A/B, then we expect to copy
    everything from A/B/* into A/B/A/B, with exception to the A that we
    create in A/B.
    
    Reviewed by:    bapt
    Sponsored by:   Klara, Inc.
    Differential Revision:  https://reviews.freebsd.org/D34655
---
 bin/cp/cp.c             | 84 ++++++++++++++++++++++++++-----------------------
 bin/cp/tests/cp_test.sh | 15 +++++++--
 2 files changed, 57 insertions(+), 42 deletions(-)

diff --git a/bin/cp/cp.c b/bin/cp/cp.c
index f132bb940c09..dbae4b535843 100644
--- a/bin/cp/cp.c
+++ b/bin/cp/cp.c
@@ -292,6 +292,7 @@ copy_stat(const char *path, struct stat *sb)
 static int
 copy(char *argv[], enum op type, int fts_options, struct stat *root_stat)
 {
+	char rootname[NAME_MAX];
 	struct stat created_root_stat, to_stat;
 	FTS *ftsp;
 	FTSENT *curr;
@@ -328,45 +329,15 @@ copy(char *argv[], enum op type, int fts_options, struct stat *root_stat)
 			;
 		}
 
-		if (curr->fts_info == FTS_D && type != FILE_TO_FILE &&
-		    root_stat != NULL &&
-		    root_stat->st_dev == curr->fts_statp->st_dev &&
-		    root_stat->st_ino == curr->fts_statp->st_ino) {
-			assert(recurse_path == NULL);
-			if (curr->fts_level > FTS_ROOTLEVEL) {
-				/*
-				 * If the recursion isn't at the immediate
-				 * level, we can just not traverse into this
-				 * directory.
-				 */
-				fts_set(ftsp, curr, FTS_SKIP);
-				continue;
-			} else {
-				const char *slash;
-
-				/*
-				 * Grab the last path component and double it,
-				 * to make life easier later and ensure that
-				 * we work even with fts_level == 0 is a couple
-				 * of components deep in fts_path.  No path
-				 * separators are fine and expected in the
-				 * common case, though.
-				 */
-				slash = strrchr(curr->fts_path, '/');
-				if (slash != NULL)
-					slash++;
-				else
-					slash = curr->fts_path;
-				if (asprintf(&recurse_path, "%s/%s",
-				    curr->fts_path, slash) == -1)
-					err(1, "asprintf");
-			}
-		}
-
-		if (recurse_path != NULL &&
-		    strcmp(curr->fts_path, recurse_path) == 0) {
-			fts_set(ftsp, curr, FTS_SKIP);
-			continue;
+		/*
+		 * Stash the root basename off for detecting recursion later.
+		 *
+		 * This will be essential if the root is a symlink and we're
+		 * rolling with -L or -H.  The later bits will need this bit in
+		 * particular.
+		 */
+		if (curr->fts_level == FTS_ROOTLEVEL) {
+			strlcpy(rootname, curr->fts_name, sizeof(rootname));
 		}
 
 		/*
@@ -422,6 +393,41 @@ copy(char *argv[], enum op type, int fts_options, struct stat *root_stat)
 			to.p_end = target_mid + nlen;
 			*to.p_end = 0;
 			STRIP_TRAILING_SLASH(to);
+
+			/*
+			 * We're on the verge of recursing on ourselves.  Either
+			 * we need to stop right here (we knowingly just created
+			 * it), or we will in an immediate descendant.  Record
+			 * the path of the immediate descendant to make our
+			 * lives a little less complicated looking.
+			 */
+			if (curr->fts_info == FTS_D && root_stat != NULL &&
+			    root_stat->st_dev == curr->fts_statp->st_dev &&
+			    root_stat->st_ino == curr->fts_statp->st_ino) {
+				assert(recurse_path == NULL);
+
+				if (root_stat == &created_root_stat) {
+					/*
+					 * This directory didn't exist when we
+					 * started, we created it as part of
+					 * traversal.  Stop right here before we
+					 * do something silly.
+					 */
+					fts_set(ftsp, curr, FTS_SKIP);
+					continue;
+				}
+
+
+				if (asprintf(&recurse_path, "%s/%s", to.p_path,
+				    rootname) == -1)
+					err(1, "asprintf");
+			}
+
+			if (recurse_path != NULL &&
+			    strcmp(to.p_path, recurse_path) == 0) {
+				fts_set(ftsp, curr, FTS_SKIP);
+				continue;
+			}
 		}
 
 		if (curr->fts_info == FTS_DP) {
diff --git a/bin/cp/tests/cp_test.sh b/bin/cp/tests/cp_test.sh
index adb6ea24d9e7..7362168d7303 100755
--- a/bin/cp/tests/cp_test.sh
+++ b/bin/cp/tests/cp_test.sh
@@ -95,16 +95,25 @@ matching_srctgt_contained_body()
 	# Let's do the same thing, except we'll try to recursively copy foo into
 	# one of its subdirectories.
 	mkdir foo
+	ln -s foo coo
 	echo "qux" > foo/bar
-	mkdir foo/loo
 	mkdir foo/moo
-	mkdir foo/roo
+	touch foo/moo/roo
 	cp foo/bar foo/zoo
 
 	atf_check cp -R foo foo/moo
+	atf_check cp -RH coo foo/moo
 	atf_check -o inline:"qux\n" cat foo/moo/foo/bar
+	atf_check -o inline:"qux\n" cat foo/moo/coo/bar
 	atf_check -o inline:"qux\n" cat foo/moo/foo/zoo
-	atf_check -e not-empty -s not-exit:0 stat foo/moo/foo/moo
+	atf_check -o inline:"qux\n" cat foo/moo/coo/zoo
+
+	# We should have copied the contents of foo/moo before foo, coo started
+	# getting copied in.
+	atf_check -o not-empty stat foo/moo/foo/moo/roo
+	atf_check -o not-empty stat foo/moo/coo/moo/roo
+	atf_check -e not-empty -s not-exit:0 stat foo/moo/foo/moo/foo
+	atf_check -e not-empty -s not-exit:0 stat foo/moo/coo/moo/coo
 }
 
 atf_test_case matching_srctgt_link