From cee42b726446f1cc35aef44685a5d7b4504b2810 Mon Sep 17 00:00:00 2001 From: barryhughes <3594411+barryhughes@users.noreply.github.com> Date: Thu, 24 Mar 2022 17:14:00 -0700 Subject: [PATCH 1/6] Enhance directory traversal handling/resolution. --- .../src/Internal/Utilities/URL.php | 81 ++++++++++++++++--- .../php/src/Internal/Utilities/URLTest.php | 65 +++++++++++++-- 2 files changed, 128 insertions(+), 18 deletions(-) diff --git a/plugins/woocommerce/src/Internal/Utilities/URL.php b/plugins/woocommerce/src/Internal/Utilities/URL.php index bf35943617e..0b64c3c0b8b 100644 --- a/plugins/woocommerce/src/Internal/Utilities/URL.php +++ b/plugins/woocommerce/src/Internal/Utilities/URL.php @@ -124,8 +124,11 @@ class URL { * without touching the filesystem. */ private function process_path() { - $segments = explode( '/', $this->components['path'] ); - $this->is_absolute = substr( $this->components['path'], 0, 1 ) === '/'; + $segments = explode( '/', $this->components['path'] ); + $this->is_absolute = substr( $this->components['path'], 0, 1 ) === '/' || ! empty( $this->components['host'] ); + $is_directory = substr( $this->components['path'], -1, 1 ) === '/' && strlen( $this->components['path'] ) > 1; + $resolve_traversals = 'file' !== $this->components['scheme'] || $this->is_absolute; + $retain_traversals = false; // Clean the path. foreach ( $segments as $part ) { @@ -137,19 +140,39 @@ class URL { // Directory traversals created with percent-encoding syntax should also be detected. $is_traversal = str_ireplace( '%2e', '.', $part ) === '..'; - // Unwind directory traversals. - if ( $is_traversal && count( $this->path_parts ) > 0 ) { - $this->path_parts = array_slice( $this->path_parts, 0, count( $this->path_parts ) - 1 ); - continue; + // Resolve directory traversals (if allowed: see further comment relating to this). + if ( $resolve_traversals && $is_traversal ) { + if ( count( $this->path_parts ) > 0 && ! $retain_traversals ) { + $this->path_parts = array_slice( $this->path_parts, 0, count( $this->path_parts ) - 1 ); + continue; + } elseif ( $this->is_absolute ) { + continue; + } } + /* + * Consider allowing directory traversals to be resolved (ie, the process that converts 'foo/bar/../baz' to + * 'foo/baz'). + * + * 1. We are only concerned with file URLs, for all other types unwinding of traversals is already allowed. + * 2. This is a 'one time' and unidirectional operation. We only wish to flip from false to true, and we + * never wish to do this more than once. + * 3. We only flip the switch after we have examined all leading '..' traversal segments. + */ + if ( false === $resolve_traversals && '..' !== $part && 'file' === $this->components['scheme'] && ! $this->is_absolute ) { + $resolve_traversals = true; + } + + // At this point, if we are committing a traversal to the path then we will wish to retain the next traversal, too. + $retain_traversals = $resolve_traversals && '..' === $part; + // Retain this part of the path. $this->path_parts[] = $part; } // Reform the path from the processed segments, appending a leading slash if it is absolute and restoring // the Windows drive letter if we have one. - $this->components['path'] = ( $this->is_absolute ? '/' : '' ) . implode( '/', $this->path_parts ); + $this->components['path'] = ( $this->is_absolute ? '/' : '' ) . implode( '/', $this->path_parts ) . ( $is_directory ? '/' : '' ); } /** @@ -187,16 +210,47 @@ class URL { * this is set to 1 (parent). 2 will yield the grand-parent, 3 will yield the great * grand-parent, etc. * + * If a level is specified that exceeds the number of path segments, this method will + * return false. + * * @param int $level Used to indicate the level of parent. * - * @return string + * @return string|false */ - public function get_parent_url( int $level = 1 ): string { + public function get_parent_url( int $level = 1 ) { if ( $level < 1 ) { $level = 1; } - $parent_path = implode( '/', array_slice( $this->path_parts, 0, count( $this->path_parts ) - $level ) ) . '/'; + $parent_path_parts_to_keep = count( $this->path_parts ) - $level; + + /* + * With the exception of file URLs, we do not allow obtaining (grand-)parent directories that require + * us to describe them using directory traversals. For example, given "http://hostname/foo/bar/baz.png" we do + * not permit determining anything more than 2 levels up (we cannot go beyond "http://hostname/"). + */ + if ( 'file' !== $this->components['scheme'] && $parent_path_parts_to_keep < 0 ) { + return false; + } + + // In the specific case of an absolute filepath describing the root directory, there can be no parent. + if ( 'file' === $this->components['scheme'] && $this->is_absolute && empty( $this->path_parts ) ) { + return false; + } + + if ( $parent_path_parts_to_keep >= 0 ) { + $parent_path = implode( '/', array_slice( $this->path_parts, 0, $parent_path_parts_to_keep ) ); + } else { + // For relative filepaths only, we use traversals to describe the requested parent. + $parent_path = untrailingslashit( str_repeat( '../', $parent_path_parts_to_keep * -1 ) ); + } + + if ( $this->is_relative() && '' === $parent_path ) { + $parent_path = '.'; + } + + // Append a trailing slash, since a parent is always a directory. The only exception is the current working directory. + $parent_path .= '/'; // For absolute paths, apply a leading slash (does not apply if we have a root path). if ( $this->is_absolute && 0 !== strpos( $parent_path, '/' ) ) { @@ -219,12 +273,17 @@ class URL { $scheme = null !== $this->components['scheme'] ? $this->components['scheme'] . '://' : ''; $host = null !== $this->components['host'] ? $this->components['host'] : ''; $port = null !== $this->components['port'] ? ':' . $this->components['port'] : ''; + $path = $path_override ?? $this->get_path(); + + // Special handling for hostless URLs (typically, filepaths) referencing the current working directory. + if ( '' === $host && ( '' === $path || '.' === $path ) ) { + $path = './'; + } $user = null !== $this->components['user'] ? $this->components['user'] : ''; $pass = null !== $this->components['pass'] ? ':' . $this->components['pass'] : ''; $user_pass = ( ! empty( $user ) || ! empty( $pass ) ) ? $user . $pass . '@' : ''; - $path = $path_override ?? $this->get_path(); $query = null !== $this->components['query'] ? '?' . $this->components['query'] : ''; $fragment = null !== $this->components['fragment'] ? '#' . $this->components['fragment'] : ''; diff --git a/plugins/woocommerce/tests/php/src/Internal/Utilities/URLTest.php b/plugins/woocommerce/tests/php/src/Internal/Utilities/URLTest.php index 415e12e2a2d..d265330f534 100644 --- a/plugins/woocommerce/tests/php/src/Internal/Utilities/URLTest.php +++ b/plugins/woocommerce/tests/php/src/Internal/Utilities/URLTest.php @@ -42,7 +42,19 @@ class URLTest extends WC_Unit_Test_Case { $this->assertEquals( '../should/remain/relative', ( new URL( 'relative/../../should/remain/relative' ) )->get_path(), - 'Simplifies a relative path containing directory traversals to the extent possible (without inspecting the filesystem).' + 'Simplifies a relative path containing directory traversals to the extent possible (without inspecting the filesystem - scenario #1).' + ); + + $this->assertEquals( + '../../should/remain/relative', + ( new URL( 'relative/../../../should/remain/relative' ) )->get_path(), + 'Simplifies a relative path containing directory traversals to the extent possible (without inspecting the filesystem - scenario #2).' + ); + + $this->assertEquals( + 'file:///foo/bar/baz', + ( new URL( '/../foo/bar/baz/bazooka/../../baz' ) )->get_url(), + 'Directory traversals are appropriately resolved even in complex cases with multiple separate traversals. When the original path is absolute, the output will be absolute.' ); } @@ -119,10 +131,9 @@ class URLTest extends WC_Unit_Test_Case { } public function test_can_obtain_parent_url() { - $this->assertEquals( - 'file:///', + $this->assertFalse( ( new URL( '/' ) )->get_parent_url(), - 'The parent of root directory "/" is "/".' + 'Root directory "/" is considered to have no parent.' ); $this->assertEquals( @@ -131,10 +142,9 @@ class URLTest extends WC_Unit_Test_Case { 'The parent URL will be trailingslashed.' ); - $this->assertEquals( - 'https://example.com/', + $this->assertFalse( ( new URL( 'https://example.com' ) )->get_parent_url(), - 'The host name (for non-file URLs) is distinct from the path and will not be removed.' + 'In the case of non-file URLs, if we only have a host name and no path then the parent cannot be derived.' ); } @@ -174,4 +184,45 @@ class URLTest extends WC_Unit_Test_Case { 'All parent URLs can be derived for a filepath, up to and including the root directory plus drive letter (Windows).' ); } + + public function test_obtaining_parent_urls_from_relative_urls() { + $this->assertEquals( + array( + 'file://relative/to/', + 'file://relative/', + 'file://./', + ), + ( new URL( 'relative/to/abspath' ) )->get_all_parent_urls(), + 'When obtaining all parent URLs for a relative filepath, we never return the root directory and never return a URL containing traversals. ' + ); + + $this->assertEquals( + 'file://./', + ( new URL( 'just-a-file.png' ) )->get_parent_url(), + 'The parent URL of an unqualified, relative file is simply an empty relative path (generally, though not always, this is equivalent to ABSPATH).' + ); + + $this->assertEquals( + 'file://../../', + ( new URL( '../../relatively-placed-file.pdf' ) )->get_parent_url() + ); + + $this->assertEquals( + 'file://../', + ( new URL( 'relatively-placed-file.pdf' ) )->get_parent_url( 2 ), + 'For filepaths, we can successfully determine the (grand-)parent directories of relative filepaths (when explicitly requested).' + ); + + $this->assertEquals( + 'file://../../', + ( new URL( 'relatively-placed-file.pdf' ) )->get_parent_url( 3 ), + 'For filepaths, we can successfully determine the (grand-)parent directories of relative filepaths (when explicitly requested).' + ); + + $this->assertEquals( + 'file://../foo/bar/baz', + ( new URL( '../foo/bar/cat/dog/../../baz' ) )->get_url(), + 'Directory traversals are appropriately resolved even in complex cases with multiple separate traversals.' + ); + } } From 1aecb64be5b89f58b4063f700691be4454ea38f3 Mon Sep 17 00:00:00 2001 From: barryhughes <3594411+barryhughes@users.noreply.github.com> Date: Fri, 25 Mar 2022 04:07:15 -0700 Subject: [PATCH 2/6] Cover further scenarios relating to relative URLs and traversals. --- .../src/Internal/Utilities/URL.php | 38 ++++++++++++++---- .../php/src/Internal/Utilities/URLTest.php | 39 ++++++++++++++++++- 2 files changed, 69 insertions(+), 8 deletions(-) diff --git a/plugins/woocommerce/src/Internal/Utilities/URL.php b/plugins/woocommerce/src/Internal/Utilities/URL.php index 0b64c3c0b8b..a9ad45c9118 100644 --- a/plugins/woocommerce/src/Internal/Utilities/URL.php +++ b/plugins/woocommerce/src/Internal/Utilities/URL.php @@ -34,6 +34,13 @@ class URL { */ private $is_absolute; + /** + * If the URL (or filepath) represents a directory. + * + * @var bool + */ + private $is_directory; + /** * The components of the URL's path. * @@ -126,7 +133,7 @@ class URL { private function process_path() { $segments = explode( '/', $this->components['path'] ); $this->is_absolute = substr( $this->components['path'], 0, 1 ) === '/' || ! empty( $this->components['host'] ); - $is_directory = substr( $this->components['path'], -1, 1 ) === '/' && strlen( $this->components['path'] ) > 1; + $this->is_directory = substr( $this->components['path'], -1, 1 ) === '/' && strlen( $this->components['path'] ) > 1; $resolve_traversals = 'file' !== $this->components['scheme'] || $this->is_absolute; $retain_traversals = false; @@ -172,7 +179,7 @@ class URL { // Reform the path from the processed segments, appending a leading slash if it is absolute and restoring // the Windows drive letter if we have one. - $this->components['path'] = ( $this->is_absolute ? '/' : '' ) . implode( '/', $this->path_parts ) . ( $is_directory ? '/' : '' ); + $this->components['path'] = ( $this->is_absolute ? '/' : '' ) . implode( '/', $this->path_parts ) . ( $this->is_directory ? '/' : '' ); } /** @@ -193,6 +200,15 @@ class URL { $max_parent = count( $this->path_parts ); $parents = array(); + /* + * If we are looking at a relative path that begins with at least one traversal (example: "../../foo") + * then we should only return one parent URL (otherwise, we'd potentially have to return an infinite + * number of parent URLs since we can't know how far the tree extends). + */ + if ( $max_parent > 0 && ! $this->is_absolute && '..' === $this->path_parts[0] ) { + $max_parent = 1; + } + for ( $level = 1; $level <= $max_parent; $level++ ) { $parents[] = $this->get_parent_url( $level ); } @@ -222,7 +238,8 @@ class URL { $level = 1; } - $parent_path_parts_to_keep = count( $this->path_parts ) - $level; + $parts_count = count( $this->path_parts ); + $parent_path_parts_to_keep = $parts_count - $level; /* * With the exception of file URLs, we do not allow obtaining (grand-)parent directories that require @@ -238,11 +255,16 @@ class URL { return false; } - if ( $parent_path_parts_to_keep >= 0 ) { - $parent_path = implode( '/', array_slice( $this->path_parts, 0, $parent_path_parts_to_keep ) ); - } else { + if ( $parts_count > 0 && '..' === $this->path_parts[0] ) { + // In the case where we have a filepath already starting with one or more traversals, we need to add additional traversals. + $last_traversal = max( array_keys( $this->path_parts, '..', true ) ) + ( $this->is_directory ? 1 : 0 ); + $parent_path = str_repeat( '../', $level ) . join( '/', array_slice( $this->path_parts, 0, $last_traversal ) ); + } elseif ( $parent_path_parts_to_keep < 0 ) { // For relative filepaths only, we use traversals to describe the requested parent. $parent_path = untrailingslashit( str_repeat( '../', $parent_path_parts_to_keep * -1 ) ); + } else { + // Otherwise, in a very simple case, we just remove existing parts. + $parent_path = implode( '/', array_slice( $this->path_parts, 0, $parent_path_parts_to_keep ) ); } if ( $this->is_relative() && '' === $parent_path ) { @@ -257,7 +279,9 @@ class URL { $parent_path = '/' . $parent_path; } - return $this->get_url( $this->get_path( $parent_path ) ); + // Form the parent URL, then process it exactly as we would any other URL for consistency. + $parent_url = $this->get_url( $this->get_path( $parent_path ) ); + return ( new self( $parent_url ) )->get_url(); } /** diff --git a/plugins/woocommerce/tests/php/src/Internal/Utilities/URLTest.php b/plugins/woocommerce/tests/php/src/Internal/Utilities/URLTest.php index d265330f534..84103e7fde5 100644 --- a/plugins/woocommerce/tests/php/src/Internal/Utilities/URLTest.php +++ b/plugins/woocommerce/tests/php/src/Internal/Utilities/URLTest.php @@ -133,7 +133,12 @@ class URLTest extends WC_Unit_Test_Case { public function test_can_obtain_parent_url() { $this->assertFalse( ( new URL( '/' ) )->get_parent_url(), - 'Root directory "/" is considered to have no parent.' + 'Root directory "/" is considered to have no parent (scenario #1).' + ); + + $this->assertFalse( + ( new URL( '/' ) )->get_parent_url( 2 ), + 'Root directory "/" is considered to have no parent (scenario #2).' ); $this->assertEquals( @@ -196,6 +201,38 @@ class URLTest extends WC_Unit_Test_Case { 'When obtaining all parent URLs for a relative filepath, we never return the root directory and never return a URL containing traversals. ' ); + $this->assertEquals( + array( + 'file://../../' + ), + ( new URL( '../../some.file' ) )->get_all_parent_urls(), + 'When obtaining all parent URLs for a path that begins with directory traversals, we only go up one more level.' + ); + + $this->assertEquals( + 'file://../../', + ( new URL( '../' ) )->get_parent_url(), + 'If a relative *directory* beginning with a traversal is provided, we can successfully derive its parent (scenario #1).' + ); + + $this->assertEquals( + 'file://../../../', + ( new URL( '../' ) )->get_parent_url( 2 ), + 'If a relative *directory* beginning with a traversal is provided, we can successfully derive its parent (scenario #2).' + ); + + $this->assertEquals( + 'file://../../../', + ( new URL( '../../some.file' ) )->get_parent_url( 2 ), + 'If the grandparent of a relative path that begins with one or more traversals is requested, we should receive the expected result (scenario #1).' + ); + + $this->assertEquals( + 'file://../../../../', + ( new URL( '../../some.file' ) )->get_parent_url( 3 ), + 'If the grandparent of a relative path that begins with one or more traversals is requested, we should receive the expected result (scenario #2).' + ); + $this->assertEquals( 'file://./', ( new URL( 'just-a-file.png' ) )->get_parent_url(), From 114480c23c37018ba9a5179d8b680b81e20a4ca7 Mon Sep 17 00:00:00 2001 From: barryhughes <3594411+barryhughes@users.noreply.github.com> Date: Fri, 25 Mar 2022 04:12:46 -0700 Subject: [PATCH 3/6] Tweak commentary. --- plugins/woocommerce/src/Internal/Utilities/URL.php | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/plugins/woocommerce/src/Internal/Utilities/URL.php b/plugins/woocommerce/src/Internal/Utilities/URL.php index a9ad45c9118..16a367550a7 100644 --- a/plugins/woocommerce/src/Internal/Utilities/URL.php +++ b/plugins/woocommerce/src/Internal/Utilities/URL.php @@ -161,7 +161,8 @@ class URL { * Consider allowing directory traversals to be resolved (ie, the process that converts 'foo/bar/../baz' to * 'foo/baz'). * - * 1. We are only concerned with file URLs, for all other types unwinding of traversals is already allowed. + * 1. For this decision point, we are only concerned with relative filepaths (in all other cases, + * $resolve_traversals will already be true). * 2. This is a 'one time' and unidirectional operation. We only wish to flip from false to true, and we * never wish to do this more than once. * 3. We only flip the switch after we have examined all leading '..' traversal segments. @@ -170,7 +171,10 @@ class URL { $resolve_traversals = true; } - // At this point, if we are committing a traversal to the path then we will wish to retain the next traversal, too. + /* + * Set a flag indicating that traversals should be retained. This is done to ensure we don't prematurely + * discard traversals at the start of the path. + */ $retain_traversals = $resolve_traversals && '..' === $part; // Retain this part of the path. From 80397d4f73d2258b6368d672971af10897c3c8c2 Mon Sep 17 00:00:00 2001 From: barryhughes <3594411+barryhughes@users.noreply.github.com> Date: Fri, 25 Mar 2022 04:14:49 -0700 Subject: [PATCH 4/6] Improve test description. --- .../woocommerce/tests/php/src/Internal/Utilities/URLTest.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/woocommerce/tests/php/src/Internal/Utilities/URLTest.php b/plugins/woocommerce/tests/php/src/Internal/Utilities/URLTest.php index 84103e7fde5..168b5920781 100644 --- a/plugins/woocommerce/tests/php/src/Internal/Utilities/URLTest.php +++ b/plugins/woocommerce/tests/php/src/Internal/Utilities/URLTest.php @@ -198,7 +198,7 @@ class URLTest extends WC_Unit_Test_Case { 'file://./', ), ( new URL( 'relative/to/abspath' ) )->get_all_parent_urls(), - 'When obtaining all parent URLs for a relative filepath, we never return the root directory and never return a URL containing traversals. ' + 'When obtaining all parent URLs for a relative filepath, we never return the root directory and never return a URL containing traversals if there were none to begin with.' ); $this->assertEquals( From a41ae705dd75fa97166c1ec5f7bffd73407c7509 Mon Sep 17 00:00:00 2001 From: barryhughes <3594411+barryhughes@users.noreply.github.com> Date: Fri, 25 Mar 2022 10:47:05 -0700 Subject: [PATCH 5/6] Rename the is_directory property to give a better sense of its role. --- .../src/Internal/Utilities/URL.php | 22 +++++++++++-------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/plugins/woocommerce/src/Internal/Utilities/URL.php b/plugins/woocommerce/src/Internal/Utilities/URL.php index 16a367550a7..aa4102d92ed 100644 --- a/plugins/woocommerce/src/Internal/Utilities/URL.php +++ b/plugins/woocommerce/src/Internal/Utilities/URL.php @@ -35,11 +35,15 @@ class URL { private $is_absolute; /** - * If the URL (or filepath) represents a directory. + * If the URL (or filepath) represents a directory other than the root directory. + * + * This is useful at different points in the process, when deciding whether to re-apply + * a trailing slash at the end of processing or when we need to calculate how many + * directory traversals are needed to form a (grand-)parent URL. * * @var bool */ - private $is_directory; + private $is_non_root_directory; /** * The components of the URL's path. @@ -131,11 +135,11 @@ class URL { * without touching the filesystem. */ private function process_path() { - $segments = explode( '/', $this->components['path'] ); - $this->is_absolute = substr( $this->components['path'], 0, 1 ) === '/' || ! empty( $this->components['host'] ); - $this->is_directory = substr( $this->components['path'], -1, 1 ) === '/' && strlen( $this->components['path'] ) > 1; - $resolve_traversals = 'file' !== $this->components['scheme'] || $this->is_absolute; - $retain_traversals = false; + $segments = explode( '/', $this->components['path'] ); + $this->is_absolute = substr( $this->components['path'], 0, 1 ) === '/' || ! empty( $this->components['host'] ); + $this->is_non_root_directory = substr( $this->components['path'], -1, 1 ) === '/' && strlen( $this->components['path'] ) > 1; + $resolve_traversals = 'file' !== $this->components['scheme'] || $this->is_absolute; + $retain_traversals = false; // Clean the path. foreach ( $segments as $part ) { @@ -183,7 +187,7 @@ class URL { // Reform the path from the processed segments, appending a leading slash if it is absolute and restoring // the Windows drive letter if we have one. - $this->components['path'] = ( $this->is_absolute ? '/' : '' ) . implode( '/', $this->path_parts ) . ( $this->is_directory ? '/' : '' ); + $this->components['path'] = ( $this->is_absolute ? '/' : '' ) . implode( '/', $this->path_parts ) . ( $this->is_non_root_directory ? '/' : '' ); } /** @@ -261,7 +265,7 @@ class URL { if ( $parts_count > 0 && '..' === $this->path_parts[0] ) { // In the case where we have a filepath already starting with one or more traversals, we need to add additional traversals. - $last_traversal = max( array_keys( $this->path_parts, '..', true ) ) + ( $this->is_directory ? 1 : 0 ); + $last_traversal = max( array_keys( $this->path_parts, '..', true ) ) + ( $this->is_non_root_directory ? 1 : 0 ); $parent_path = str_repeat( '../', $level ) . join( '/', array_slice( $this->path_parts, 0, $last_traversal ) ); } elseif ( $parent_path_parts_to_keep < 0 ) { // For relative filepaths only, we use traversals to describe the requested parent. From b46f28f4e21395f96c3210e93a4a5ce9ef618c4b Mon Sep 17 00:00:00 2001 From: barryhughes <3594411+barryhughes@users.noreply.github.com> Date: Tue, 29 Mar 2022 14:09:53 -0700 Subject: [PATCH 6/6] Improve path resolution, add additional tests, clean-up tests (use data-providers). --- .../src/Internal/Utilities/URL.php | 21 +- .../php/src/Internal/Utilities/URLTest.php | 356 +++++++----------- 2 files changed, 162 insertions(+), 215 deletions(-) diff --git a/plugins/woocommerce/src/Internal/Utilities/URL.php b/plugins/woocommerce/src/Internal/Utilities/URL.php index aa4102d92ed..340caa71c34 100644 --- a/plugins/woocommerce/src/Internal/Utilities/URL.php +++ b/plugins/woocommerce/src/Internal/Utilities/URL.php @@ -144,7 +144,7 @@ class URL { // Clean the path. foreach ( $segments as $part ) { // Drop empty segments. - if ( strlen( $part ) === 0 ) { + if ( strlen( $part ) === 0 || '.' === $part ) { continue; } @@ -185,6 +185,12 @@ class URL { $this->path_parts[] = $part; } + // Protect against empty relative paths. + if ( count( $this->path_parts ) === 0 && ! $this->is_absolute ) { + $this->path_parts = array( '.' ); + $this->is_non_root_directory = true; + } + // Reform the path from the processed segments, appending a leading slash if it is absolute and restoring // the Windows drive letter if we have one. $this->components['path'] = ( $this->is_absolute ? '/' : '' ) . implode( '/', $this->path_parts ) . ( $this->is_non_root_directory ? '/' : '' ); @@ -263,9 +269,16 @@ class URL { return false; } - if ( $parts_count > 0 && '..' === $this->path_parts[0] ) { - // In the case where we have a filepath already starting with one or more traversals, we need to add additional traversals. - $last_traversal = max( array_keys( $this->path_parts, '..', true ) ) + ( $this->is_non_root_directory ? 1 : 0 ); + // Handle cases where the path starts with one or more 'dot segments'. Since the path has already been + // processed, we can be confident that any such segments are at the start of the path. + if ( $parts_count > 0 && ( '.' === $this->path_parts[0] || '..' === $this->path_parts[0] ) ) { + // Determine the index of the last dot segment (ex: given the path '/../../foo' it would be 1). + $single_dots = array_keys( $this->path_parts, '.', true ); + $double_dots = array_keys( $this->path_parts, '..', true ); + $max_dot_index = max( array_merge( $single_dots, $double_dots ) ); + + // Prepend the required number of traversals and discard unnessary trailing segments. + $last_traversal = $max_dot_index + ( $this->is_non_root_directory ? 1 : 0 ); $parent_path = str_repeat( '../', $level ) . join( '/', array_slice( $this->path_parts, 0, $last_traversal ) ); } elseif ( $parent_path_parts_to_keep < 0 ) { // For relative filepaths only, we use traversals to describe the requested parent. diff --git a/plugins/woocommerce/tests/php/src/Internal/Utilities/URLTest.php b/plugins/woocommerce/tests/php/src/Internal/Utilities/URLTest.php index 168b5920781..659da935540 100644 --- a/plugins/woocommerce/tests/php/src/Internal/Utilities/URLTest.php +++ b/plugins/woocommerce/tests/php/src/Internal/Utilities/URLTest.php @@ -26,240 +26,174 @@ class URLTest extends WC_Unit_Test_Case { ); } - public function test_directory_traversal_resolution() { - $this->assertEquals( - '/var/foo/foobar', - ( new URL( '/var/foo/bar/baz/../../foobar' ) )->get_path(), - 'Correctly resolves a path containing a directory traversal.' - ); + /** + * @dataProvider path_expectations + * + * @param string $source_path + * @param string $expected_resolution + */ + public function test_path_resolution( $source_path, $expected_resolution ) { + $this->assertEquals( $expected_resolution, ( new URL( $source_path ) )->get_path() ); + } - $this->assertEquals( - '/bazbar', - ( new URL( '/var/foo/../../../../bazbar' ) )->get_path(), - 'Correctly resolves a path containing a directory traversal, even if the traversals attempt to backtrack beyond the root directory.' - ); - - $this->assertEquals( - '../should/remain/relative', - ( new URL( 'relative/../../should/remain/relative' ) )->get_path(), - 'Simplifies a relative path containing directory traversals to the extent possible (without inspecting the filesystem - scenario #1).' - ); - - $this->assertEquals( - '../../should/remain/relative', - ( new URL( 'relative/../../../should/remain/relative' ) )->get_path(), - 'Simplifies a relative path containing directory traversals to the extent possible (without inspecting the filesystem - scenario #2).' - ); - - $this->assertEquals( - 'file:///foo/bar/baz', - ( new URL( '/../foo/bar/baz/bazooka/../../baz' ) )->get_url(), - 'Directory traversals are appropriately resolved even in complex cases with multiple separate traversals. When the original path is absolute, the output will be absolute.' + /** + * Expectations when requesting the path of a URL. + * + * @return string[][] + */ + public function path_expectations(): array { + return array( + array( '/var/foo/bar/baz/../../foobar', '/var/foo/foobar' ), + array( '/var/foo/../../../../bazbar', '/bazbar' ), + array( '././././.', './' ), + array( 'empty/segments//are/stripped', 'empty/segments/are/stripped' ), + array( '///nonempty/ /whitespace/ /is//kept', '/nonempty/ /whitespace/ /is/kept' ), + array( 'relative/../../should/remain/relative', '../should/remain/relative' ), + array( 'relative/../../../should/remain/relative', '../../should/remain/relative' ), + array( 'c:\\Windows\Server\HTTP\dump.xml', 'c:/Windows/Server/HTTP/dump.xml') ); } - public function test_can_get_normalized_string_representation() { - $this->assertEquals( - 'foo/bar/baz', - ( new URL( 'foo/bar//baz' ) )->get_path(), - 'Empty segments are discarded, remains as a relative path.' - ); + /** + * @dataProvider url_expectations + * + * @param string $source_url + * @param string $expected_resolution + */ + public function test_url_resolution( $source_url, $expected_resolution ) { + $this->assertEquals( $expected_resolution, ( new URL( $source_url ) )->get_url() ); + } - $this->assertEquals( - '/foo/ /bar/ /baz/foobarbaz', - ( new URL( '///foo/ /bar/ /baz//foobarbaz' ) )->get_path(), - 'Empty segments are discarded, non-empty segments containing only whitespace are preserved, remains as an absolute path.' - ); - - $this->assertEquals( - 'c:/Windows/Server/HTTP/dump.xml', - ( new URL( 'c:\\Windows\Server\HTTP\dump.xml' ) )->get_path(), - 'String representations of Windows filepaths have forward slash separators and preserve the drive letter.' + /** + * Expectations when resolving URLs. + * + * @return string[][] + */ + public function url_expectations(): array { + return array( + array( '/../foo/bar/baz/bazooka/../../baz', 'file:///foo/bar/baz' ), + array( './a/b/c/./../././../b/c', 'file://a/b/c' ), + array( 'relative/path', 'file://relative/path' ), + array( '/absolute/path', 'file:///absolute/path' ), + array( '/var/www/network/%2econfig', 'file:///var/www/network/%2econfig' ), + array( '///foo', 'file:///foo' ), + array( '~/foo.txt', 'file://~/foo.txt' ), + array( 'baz///foo', 'file://baz/foo' ), + array( 'file:///etc/foo/bar', 'file:///etc/foo/bar' ), + array( 'foo://bar', 'foo://bar/' ), + array( 'foo://bar/baz-file', 'foo://bar/baz-file' ), + array( 'foo://bar/baz-dir/', 'foo://bar/baz-dir/' ), + array( 'https://foo.bar/parent/.%2e/asset.txt', 'https://foo.bar/asset.txt' ), + array( 'https://foo.bar/parent/%2E./asset.txt', 'https://foo.bar/asset.txt' ), + array( 'https://foo.bar/parent/%2E%2e/asset.txt', 'https://foo.bar/asset.txt' ), + array( 'https://foo.bar/parent/%2E.%2fasset.txt', 'https://foo.bar/parent/%2E.%2fasset.txt' ), + array( 'http://localhost?../../bar', 'http://localhost/?../../bar' ), ); } - public function test_can_get_normalized_url_representation() { - $this->assertEquals( - 'file://relative/path', - ( new URL( 'relative/path' ) )->get_url(), - 'Can obtain a URL representation of a relative filepath, even when the initial string was a plain filepath.' - ); + /** + * @dataProvider parent_url_expectations + * + * @param string $source_path + * @param int $parent_level + * @param string|false $expectation + */ + public function test_can_obtain_parent_url( string $source_path, int $parent_level, $expectation ) { + $this->assertEquals( $expectation, ( new URL( $source_path ) )->get_parent_url( $parent_level ) ); + } - $this->assertEquals( - 'file:///absolute/path', - ( new URL( '/absolute/path' ) )->get_url(), - 'Can obtain a URL representation of an absolute filepath, even when the initial string was a plain filepath.' - ); - - $this->assertEquals( - 'file:///etc/foo/bar', - ( new URL( 'file:///etc/foo/bar' ) )->get_url(), - 'Can obtain a URL representation of a filepath, when the source filepath was also expressed as a URL.' + /** + * Expectations when resolving (grand-)parent URLs. + * + * @return array[] + */ + public function parent_url_expectations(): array { + return array( + array( '/', 1, false ), + array( '/', 2, false ), + array( './', 1, 'file://../' ), + array( '../', 1, 'file://../../' ), + array( 'relative-file.png', 1, 'file://./' ), + array( 'relative-file.png', 2, 'file://../' ), + array( '/var/dev/', 1, 'file:///var/' ), + array( '/var/../dev/./../foo/bar', 1, 'file:///foo/' ), + array( 'https://example.com', 1, false ), + array( 'https://example.com/foo', 1, 'https://example.com/' ), + array( 'https://example.com/foo/bar/baz/../cat/', 2, 'https://example.com/foo/' ), + array( 'https://example.com/foo/bar/baz/%2E%2E/dog/', 2, 'https://example.com/foo/' ), + array( 'file://./', 1, 'file://../' ), + array( 'file://./', 2, 'file://../../' ), + array( 'file://../../foo', 1, 'file://../../' ), + array( 'file://../../foo', 2, 'file://../../../' ), + array( 'file://../../', 1, 'file://../../../' ), + array( 'file://./../', 2, 'file://../../../' ), ); } - public function test_handling_of_percent_encoded_periods() { - $this->assertEquals( - 'https://foo.bar/asset.txt', - ( new URL( 'https://foo.bar/parent/.%2e/asset.txt' ) )->get_url(), - 'Directory traversals expressed using percent-encoding are still resolved (lowercase, one encoded period).' - ); - - $this->assertEquals( - 'https://foo.bar/asset.txt', - ( new URL( 'https://foo.bar/parent/%2E./asset.txt' ) )->get_url(), - 'Directory traversals expressed using percent-encoding are still resolved (uppercase, one encoded period).' - ); - - $this->assertEquals( - 'https://foo.bar/asset.txt', - ( new URL( 'https://foo.bar/parent/%2E%2e/asset.txt' ) )->get_url(), - 'Directory traversals expressed using percent-encoding are still resolved (mixed case, both periods encoded).' - ); - - $this->assertEquals( - 'https://foo.bar/parent/%2E.%2fasset.txt', - ( new URL( 'https://foo.bar/parent/%2E.%2fasset.txt' ) )->get_url(), - 'If the forward slash after a double period is URL encoded, there is no directory traversal (since this means the slash is a part of the segment and is not a separator).' - ); - - $this->assertEquals( - 'file:///var/www/network/%2econfig', - ( new URL( '/var/www/network/%2econfig' ) )->get_url(), - 'Use of percent-encoding in URLs is accepted and unnecessary conversion does not take place.' - ); + /** + * @dataProvider all_parent_url_expectations + * + * @param string $source_path + * @param array $expectation + */ + public function test_can_obtain_all_parent_urls( string $source_path, array $expectation ) { + $this->assertEquals( $expectation, ( new URL( $source_path ) )->get_all_parent_urls() ); } - public function test_can_obtain_parent_url() { - $this->assertFalse( - ( new URL( '/' ) )->get_parent_url(), - 'Root directory "/" is considered to have no parent (scenario #1).' - ); - - $this->assertFalse( - ( new URL( '/' ) )->get_parent_url( 2 ), - 'Root directory "/" is considered to have no parent (scenario #2).' - ); - - $this->assertEquals( - 'file:///var/', - ( new URL( '/var/dev/' ) )->get_parent_url(), - 'The parent URL will be trailingslashed.' - ); - - $this->assertFalse( - ( new URL( 'https://example.com' ) )->get_parent_url(), - 'In the case of non-file URLs, if we only have a host name and no path then the parent cannot be derived.' - ); - } - - public function test_can_obtain_all_parent_urls() { - $this->assertEquals( + /** + * Expectations when obtaining all possible parent URLs of a given URL/path. + * + * @return array[] + */ + public function all_parent_urL_expectations(): array { + return array( array( - 'https://local.web/wp-content/uploads/woocommerce_uploads/pdf_bucket/', - 'https://local.web/wp-content/uploads/woocommerce_uploads/', - 'https://local.web/wp-content/uploads/', - 'https://local.web/wp-content/', - 'https://local.web/', + 'https://local.web/wp-content/uploads/woocommerce_uploads/pdf_bucket/secret-sauce.pdf', + array( + 'https://local.web/wp-content/uploads/woocommerce_uploads/pdf_bucket/', + 'https://local.web/wp-content/uploads/woocommerce_uploads/', + 'https://local.web/wp-content/uploads/', + 'https://local.web/wp-content/', + 'https://local.web/', + ), + ), + array( + '/srv/websites/my.wp.site/public/test-file.doc', + array( + 'file:///srv/websites/my.wp.site/public/', + 'file:///srv/websites/my.wp.site/', + 'file:///srv/websites/', + 'file:///srv/', + 'file:///', + ), + ), + array( + 'C:\\Documents\\Web\\TestSite\\BackgroundTrack.mp3', + array( + 'file://C:/Documents/Web/TestSite/', + 'file://C:/Documents/Web/', + 'file://C:/Documents/', + 'file://C:/', + ), ), - ( new URL( 'https://local.web/wp-content/uploads/woocommerce_uploads/pdf_bucket/secret-sauce.pdf' ) )->get_all_parent_urls(), - 'All parent URLs can be derived, but the host name is never stripped.' - ); - - $this->assertEquals( array( - 'file:///srv/websites/my.wp.site/public/', - 'file:///srv/websites/my.wp.site/', - 'file:///srv/websites/', - 'file:///srv/', 'file:///', + array(), ), - ( new URL( '/srv/websites/my.wp.site/public/test-file.doc' ) )->get_all_parent_urls(), - 'All parent URLs can be derived for a filepath, up to and including the root directory.' - ); - - $this->assertEquals( array( - 'file://C:/Documents/Web/TestSite/', - 'file://C:/Documents/Web/', - 'file://C:/Documents/', - 'file://C:/', + 'relative/to/abspath', + array( + 'file://relative/to/', + 'file://relative/', + 'file://./', + ), ), - ( new URL( 'C:\\Documents\\Web\\TestSite\\BackgroundTrack.mp3' ) )->get_all_parent_urls(), - 'All parent URLs can be derived for a filepath, up to and including the root directory plus drive letter (Windows).' - ); - } - - public function test_obtaining_parent_urls_from_relative_urls() { - $this->assertEquals( array( - 'file://relative/to/', - 'file://relative/', - 'file://./', + '../../some.file', + array( + 'file://../../' + ), ), - ( new URL( 'relative/to/abspath' ) )->get_all_parent_urls(), - 'When obtaining all parent URLs for a relative filepath, we never return the root directory and never return a URL containing traversals if there were none to begin with.' - ); - - $this->assertEquals( - array( - 'file://../../' - ), - ( new URL( '../../some.file' ) )->get_all_parent_urls(), - 'When obtaining all parent URLs for a path that begins with directory traversals, we only go up one more level.' - ); - - $this->assertEquals( - 'file://../../', - ( new URL( '../' ) )->get_parent_url(), - 'If a relative *directory* beginning with a traversal is provided, we can successfully derive its parent (scenario #1).' - ); - - $this->assertEquals( - 'file://../../../', - ( new URL( '../' ) )->get_parent_url( 2 ), - 'If a relative *directory* beginning with a traversal is provided, we can successfully derive its parent (scenario #2).' - ); - - $this->assertEquals( - 'file://../../../', - ( new URL( '../../some.file' ) )->get_parent_url( 2 ), - 'If the grandparent of a relative path that begins with one or more traversals is requested, we should receive the expected result (scenario #1).' - ); - - $this->assertEquals( - 'file://../../../../', - ( new URL( '../../some.file' ) )->get_parent_url( 3 ), - 'If the grandparent of a relative path that begins with one or more traversals is requested, we should receive the expected result (scenario #2).' - ); - - $this->assertEquals( - 'file://./', - ( new URL( 'just-a-file.png' ) )->get_parent_url(), - 'The parent URL of an unqualified, relative file is simply an empty relative path (generally, though not always, this is equivalent to ABSPATH).' - ); - - $this->assertEquals( - 'file://../../', - ( new URL( '../../relatively-placed-file.pdf' ) )->get_parent_url() - ); - - $this->assertEquals( - 'file://../', - ( new URL( 'relatively-placed-file.pdf' ) )->get_parent_url( 2 ), - 'For filepaths, we can successfully determine the (grand-)parent directories of relative filepaths (when explicitly requested).' - ); - - $this->assertEquals( - 'file://../../', - ( new URL( 'relatively-placed-file.pdf' ) )->get_parent_url( 3 ), - 'For filepaths, we can successfully determine the (grand-)parent directories of relative filepaths (when explicitly requested).' - ); - - $this->assertEquals( - 'file://../foo/bar/baz', - ( new URL( '../foo/bar/cat/dog/../../baz' ) )->get_url(), - 'Directory traversals are appropriately resolved even in complex cases with multiple separate traversals.' ); } }