From b46f28f4e21395f96c3210e93a4a5ce9ef618c4b Mon Sep 17 00:00:00 2001 From: barryhughes <3594411+barryhughes@users.noreply.github.com> Date: Tue, 29 Mar 2022 14:09:53 -0700 Subject: [PATCH] Improve path resolution, add additional tests, clean-up tests (use data-providers). --- .../src/Internal/Utilities/URL.php | 21 +- .../php/src/Internal/Utilities/URLTest.php | 356 +++++++----------- 2 files changed, 162 insertions(+), 215 deletions(-) diff --git a/plugins/woocommerce/src/Internal/Utilities/URL.php b/plugins/woocommerce/src/Internal/Utilities/URL.php index aa4102d92ed..340caa71c34 100644 --- a/plugins/woocommerce/src/Internal/Utilities/URL.php +++ b/plugins/woocommerce/src/Internal/Utilities/URL.php @@ -144,7 +144,7 @@ class URL { // Clean the path. foreach ( $segments as $part ) { // Drop empty segments. - if ( strlen( $part ) === 0 ) { + if ( strlen( $part ) === 0 || '.' === $part ) { continue; } @@ -185,6 +185,12 @@ class URL { $this->path_parts[] = $part; } + // Protect against empty relative paths. + if ( count( $this->path_parts ) === 0 && ! $this->is_absolute ) { + $this->path_parts = array( '.' ); + $this->is_non_root_directory = true; + } + // Reform the path from the processed segments, appending a leading slash if it is absolute and restoring // the Windows drive letter if we have one. $this->components['path'] = ( $this->is_absolute ? '/' : '' ) . implode( '/', $this->path_parts ) . ( $this->is_non_root_directory ? '/' : '' ); @@ -263,9 +269,16 @@ class URL { return false; } - if ( $parts_count > 0 && '..' === $this->path_parts[0] ) { - // In the case where we have a filepath already starting with one or more traversals, we need to add additional traversals. - $last_traversal = max( array_keys( $this->path_parts, '..', true ) ) + ( $this->is_non_root_directory ? 1 : 0 ); + // Handle cases where the path starts with one or more 'dot segments'. Since the path has already been + // processed, we can be confident that any such segments are at the start of the path. + if ( $parts_count > 0 && ( '.' === $this->path_parts[0] || '..' === $this->path_parts[0] ) ) { + // Determine the index of the last dot segment (ex: given the path '/../../foo' it would be 1). + $single_dots = array_keys( $this->path_parts, '.', true ); + $double_dots = array_keys( $this->path_parts, '..', true ); + $max_dot_index = max( array_merge( $single_dots, $double_dots ) ); + + // Prepend the required number of traversals and discard unnessary trailing segments. + $last_traversal = $max_dot_index + ( $this->is_non_root_directory ? 1 : 0 ); $parent_path = str_repeat( '../', $level ) . join( '/', array_slice( $this->path_parts, 0, $last_traversal ) ); } elseif ( $parent_path_parts_to_keep < 0 ) { // For relative filepaths only, we use traversals to describe the requested parent. diff --git a/plugins/woocommerce/tests/php/src/Internal/Utilities/URLTest.php b/plugins/woocommerce/tests/php/src/Internal/Utilities/URLTest.php index 168b5920781..659da935540 100644 --- a/plugins/woocommerce/tests/php/src/Internal/Utilities/URLTest.php +++ b/plugins/woocommerce/tests/php/src/Internal/Utilities/URLTest.php @@ -26,240 +26,174 @@ class URLTest extends WC_Unit_Test_Case { ); } - public function test_directory_traversal_resolution() { - $this->assertEquals( - '/var/foo/foobar', - ( new URL( '/var/foo/bar/baz/../../foobar' ) )->get_path(), - 'Correctly resolves a path containing a directory traversal.' - ); + /** + * @dataProvider path_expectations + * + * @param string $source_path + * @param string $expected_resolution + */ + public function test_path_resolution( $source_path, $expected_resolution ) { + $this->assertEquals( $expected_resolution, ( new URL( $source_path ) )->get_path() ); + } - $this->assertEquals( - '/bazbar', - ( new URL( '/var/foo/../../../../bazbar' ) )->get_path(), - 'Correctly resolves a path containing a directory traversal, even if the traversals attempt to backtrack beyond the root directory.' - ); - - $this->assertEquals( - '../should/remain/relative', - ( new URL( 'relative/../../should/remain/relative' ) )->get_path(), - 'Simplifies a relative path containing directory traversals to the extent possible (without inspecting the filesystem - scenario #1).' - ); - - $this->assertEquals( - '../../should/remain/relative', - ( new URL( 'relative/../../../should/remain/relative' ) )->get_path(), - 'Simplifies a relative path containing directory traversals to the extent possible (without inspecting the filesystem - scenario #2).' - ); - - $this->assertEquals( - 'file:///foo/bar/baz', - ( new URL( '/../foo/bar/baz/bazooka/../../baz' ) )->get_url(), - 'Directory traversals are appropriately resolved even in complex cases with multiple separate traversals. When the original path is absolute, the output will be absolute.' + /** + * Expectations when requesting the path of a URL. + * + * @return string[][] + */ + public function path_expectations(): array { + return array( + array( '/var/foo/bar/baz/../../foobar', '/var/foo/foobar' ), + array( '/var/foo/../../../../bazbar', '/bazbar' ), + array( '././././.', './' ), + array( 'empty/segments//are/stripped', 'empty/segments/are/stripped' ), + array( '///nonempty/ /whitespace/ /is//kept', '/nonempty/ /whitespace/ /is/kept' ), + array( 'relative/../../should/remain/relative', '../should/remain/relative' ), + array( 'relative/../../../should/remain/relative', '../../should/remain/relative' ), + array( 'c:\\Windows\Server\HTTP\dump.xml', 'c:/Windows/Server/HTTP/dump.xml') ); } - public function test_can_get_normalized_string_representation() { - $this->assertEquals( - 'foo/bar/baz', - ( new URL( 'foo/bar//baz' ) )->get_path(), - 'Empty segments are discarded, remains as a relative path.' - ); + /** + * @dataProvider url_expectations + * + * @param string $source_url + * @param string $expected_resolution + */ + public function test_url_resolution( $source_url, $expected_resolution ) { + $this->assertEquals( $expected_resolution, ( new URL( $source_url ) )->get_url() ); + } - $this->assertEquals( - '/foo/ /bar/ /baz/foobarbaz', - ( new URL( '///foo/ /bar/ /baz//foobarbaz' ) )->get_path(), - 'Empty segments are discarded, non-empty segments containing only whitespace are preserved, remains as an absolute path.' - ); - - $this->assertEquals( - 'c:/Windows/Server/HTTP/dump.xml', - ( new URL( 'c:\\Windows\Server\HTTP\dump.xml' ) )->get_path(), - 'String representations of Windows filepaths have forward slash separators and preserve the drive letter.' + /** + * Expectations when resolving URLs. + * + * @return string[][] + */ + public function url_expectations(): array { + return array( + array( '/../foo/bar/baz/bazooka/../../baz', 'file:///foo/bar/baz' ), + array( './a/b/c/./../././../b/c', 'file://a/b/c' ), + array( 'relative/path', 'file://relative/path' ), + array( '/absolute/path', 'file:///absolute/path' ), + array( '/var/www/network/%2econfig', 'file:///var/www/network/%2econfig' ), + array( '///foo', 'file:///foo' ), + array( '~/foo.txt', 'file://~/foo.txt' ), + array( 'baz///foo', 'file://baz/foo' ), + array( 'file:///etc/foo/bar', 'file:///etc/foo/bar' ), + array( 'foo://bar', 'foo://bar/' ), + array( 'foo://bar/baz-file', 'foo://bar/baz-file' ), + array( 'foo://bar/baz-dir/', 'foo://bar/baz-dir/' ), + array( 'https://foo.bar/parent/.%2e/asset.txt', 'https://foo.bar/asset.txt' ), + array( 'https://foo.bar/parent/%2E./asset.txt', 'https://foo.bar/asset.txt' ), + array( 'https://foo.bar/parent/%2E%2e/asset.txt', 'https://foo.bar/asset.txt' ), + array( 'https://foo.bar/parent/%2E.%2fasset.txt', 'https://foo.bar/parent/%2E.%2fasset.txt' ), + array( 'http://localhost?../../bar', 'http://localhost/?../../bar' ), ); } - public function test_can_get_normalized_url_representation() { - $this->assertEquals( - 'file://relative/path', - ( new URL( 'relative/path' ) )->get_url(), - 'Can obtain a URL representation of a relative filepath, even when the initial string was a plain filepath.' - ); + /** + * @dataProvider parent_url_expectations + * + * @param string $source_path + * @param int $parent_level + * @param string|false $expectation + */ + public function test_can_obtain_parent_url( string $source_path, int $parent_level, $expectation ) { + $this->assertEquals( $expectation, ( new URL( $source_path ) )->get_parent_url( $parent_level ) ); + } - $this->assertEquals( - 'file:///absolute/path', - ( new URL( '/absolute/path' ) )->get_url(), - 'Can obtain a URL representation of an absolute filepath, even when the initial string was a plain filepath.' - ); - - $this->assertEquals( - 'file:///etc/foo/bar', - ( new URL( 'file:///etc/foo/bar' ) )->get_url(), - 'Can obtain a URL representation of a filepath, when the source filepath was also expressed as a URL.' + /** + * Expectations when resolving (grand-)parent URLs. + * + * @return array[] + */ + public function parent_url_expectations(): array { + return array( + array( '/', 1, false ), + array( '/', 2, false ), + array( './', 1, 'file://../' ), + array( '../', 1, 'file://../../' ), + array( 'relative-file.png', 1, 'file://./' ), + array( 'relative-file.png', 2, 'file://../' ), + array( '/var/dev/', 1, 'file:///var/' ), + array( '/var/../dev/./../foo/bar', 1, 'file:///foo/' ), + array( 'https://example.com', 1, false ), + array( 'https://example.com/foo', 1, 'https://example.com/' ), + array( 'https://example.com/foo/bar/baz/../cat/', 2, 'https://example.com/foo/' ), + array( 'https://example.com/foo/bar/baz/%2E%2E/dog/', 2, 'https://example.com/foo/' ), + array( 'file://./', 1, 'file://../' ), + array( 'file://./', 2, 'file://../../' ), + array( 'file://../../foo', 1, 'file://../../' ), + array( 'file://../../foo', 2, 'file://../../../' ), + array( 'file://../../', 1, 'file://../../../' ), + array( 'file://./../', 2, 'file://../../../' ), ); } - public function test_handling_of_percent_encoded_periods() { - $this->assertEquals( - 'https://foo.bar/asset.txt', - ( new URL( 'https://foo.bar/parent/.%2e/asset.txt' ) )->get_url(), - 'Directory traversals expressed using percent-encoding are still resolved (lowercase, one encoded period).' - ); - - $this->assertEquals( - 'https://foo.bar/asset.txt', - ( new URL( 'https://foo.bar/parent/%2E./asset.txt' ) )->get_url(), - 'Directory traversals expressed using percent-encoding are still resolved (uppercase, one encoded period).' - ); - - $this->assertEquals( - 'https://foo.bar/asset.txt', - ( new URL( 'https://foo.bar/parent/%2E%2e/asset.txt' ) )->get_url(), - 'Directory traversals expressed using percent-encoding are still resolved (mixed case, both periods encoded).' - ); - - $this->assertEquals( - 'https://foo.bar/parent/%2E.%2fasset.txt', - ( new URL( 'https://foo.bar/parent/%2E.%2fasset.txt' ) )->get_url(), - 'If the forward slash after a double period is URL encoded, there is no directory traversal (since this means the slash is a part of the segment and is not a separator).' - ); - - $this->assertEquals( - 'file:///var/www/network/%2econfig', - ( new URL( '/var/www/network/%2econfig' ) )->get_url(), - 'Use of percent-encoding in URLs is accepted and unnecessary conversion does not take place.' - ); + /** + * @dataProvider all_parent_url_expectations + * + * @param string $source_path + * @param array $expectation + */ + public function test_can_obtain_all_parent_urls( string $source_path, array $expectation ) { + $this->assertEquals( $expectation, ( new URL( $source_path ) )->get_all_parent_urls() ); } - public function test_can_obtain_parent_url() { - $this->assertFalse( - ( new URL( '/' ) )->get_parent_url(), - 'Root directory "/" is considered to have no parent (scenario #1).' - ); - - $this->assertFalse( - ( new URL( '/' ) )->get_parent_url( 2 ), - 'Root directory "/" is considered to have no parent (scenario #2).' - ); - - $this->assertEquals( - 'file:///var/', - ( new URL( '/var/dev/' ) )->get_parent_url(), - 'The parent URL will be trailingslashed.' - ); - - $this->assertFalse( - ( new URL( 'https://example.com' ) )->get_parent_url(), - 'In the case of non-file URLs, if we only have a host name and no path then the parent cannot be derived.' - ); - } - - public function test_can_obtain_all_parent_urls() { - $this->assertEquals( + /** + * Expectations when obtaining all possible parent URLs of a given URL/path. + * + * @return array[] + */ + public function all_parent_urL_expectations(): array { + return array( array( - 'https://local.web/wp-content/uploads/woocommerce_uploads/pdf_bucket/', - 'https://local.web/wp-content/uploads/woocommerce_uploads/', - 'https://local.web/wp-content/uploads/', - 'https://local.web/wp-content/', - 'https://local.web/', + 'https://local.web/wp-content/uploads/woocommerce_uploads/pdf_bucket/secret-sauce.pdf', + array( + 'https://local.web/wp-content/uploads/woocommerce_uploads/pdf_bucket/', + 'https://local.web/wp-content/uploads/woocommerce_uploads/', + 'https://local.web/wp-content/uploads/', + 'https://local.web/wp-content/', + 'https://local.web/', + ), + ), + array( + '/srv/websites/my.wp.site/public/test-file.doc', + array( + 'file:///srv/websites/my.wp.site/public/', + 'file:///srv/websites/my.wp.site/', + 'file:///srv/websites/', + 'file:///srv/', + 'file:///', + ), + ), + array( + 'C:\\Documents\\Web\\TestSite\\BackgroundTrack.mp3', + array( + 'file://C:/Documents/Web/TestSite/', + 'file://C:/Documents/Web/', + 'file://C:/Documents/', + 'file://C:/', + ), ), - ( new URL( 'https://local.web/wp-content/uploads/woocommerce_uploads/pdf_bucket/secret-sauce.pdf' ) )->get_all_parent_urls(), - 'All parent URLs can be derived, but the host name is never stripped.' - ); - - $this->assertEquals( array( - 'file:///srv/websites/my.wp.site/public/', - 'file:///srv/websites/my.wp.site/', - 'file:///srv/websites/', - 'file:///srv/', 'file:///', + array(), ), - ( new URL( '/srv/websites/my.wp.site/public/test-file.doc' ) )->get_all_parent_urls(), - 'All parent URLs can be derived for a filepath, up to and including the root directory.' - ); - - $this->assertEquals( array( - 'file://C:/Documents/Web/TestSite/', - 'file://C:/Documents/Web/', - 'file://C:/Documents/', - 'file://C:/', + 'relative/to/abspath', + array( + 'file://relative/to/', + 'file://relative/', + 'file://./', + ), ), - ( new URL( 'C:\\Documents\\Web\\TestSite\\BackgroundTrack.mp3' ) )->get_all_parent_urls(), - 'All parent URLs can be derived for a filepath, up to and including the root directory plus drive letter (Windows).' - ); - } - - public function test_obtaining_parent_urls_from_relative_urls() { - $this->assertEquals( array( - 'file://relative/to/', - 'file://relative/', - 'file://./', + '../../some.file', + array( + 'file://../../' + ), ), - ( new URL( 'relative/to/abspath' ) )->get_all_parent_urls(), - 'When obtaining all parent URLs for a relative filepath, we never return the root directory and never return a URL containing traversals if there were none to begin with.' - ); - - $this->assertEquals( - array( - 'file://../../' - ), - ( new URL( '../../some.file' ) )->get_all_parent_urls(), - 'When obtaining all parent URLs for a path that begins with directory traversals, we only go up one more level.' - ); - - $this->assertEquals( - 'file://../../', - ( new URL( '../' ) )->get_parent_url(), - 'If a relative *directory* beginning with a traversal is provided, we can successfully derive its parent (scenario #1).' - ); - - $this->assertEquals( - 'file://../../../', - ( new URL( '../' ) )->get_parent_url( 2 ), - 'If a relative *directory* beginning with a traversal is provided, we can successfully derive its parent (scenario #2).' - ); - - $this->assertEquals( - 'file://../../../', - ( new URL( '../../some.file' ) )->get_parent_url( 2 ), - 'If the grandparent of a relative path that begins with one or more traversals is requested, we should receive the expected result (scenario #1).' - ); - - $this->assertEquals( - 'file://../../../../', - ( new URL( '../../some.file' ) )->get_parent_url( 3 ), - 'If the grandparent of a relative path that begins with one or more traversals is requested, we should receive the expected result (scenario #2).' - ); - - $this->assertEquals( - 'file://./', - ( new URL( 'just-a-file.png' ) )->get_parent_url(), - 'The parent URL of an unqualified, relative file is simply an empty relative path (generally, though not always, this is equivalent to ABSPATH).' - ); - - $this->assertEquals( - 'file://../../', - ( new URL( '../../relatively-placed-file.pdf' ) )->get_parent_url() - ); - - $this->assertEquals( - 'file://../', - ( new URL( 'relatively-placed-file.pdf' ) )->get_parent_url( 2 ), - 'For filepaths, we can successfully determine the (grand-)parent directories of relative filepaths (when explicitly requested).' - ); - - $this->assertEquals( - 'file://../../', - ( new URL( 'relatively-placed-file.pdf' ) )->get_parent_url( 3 ), - 'For filepaths, we can successfully determine the (grand-)parent directories of relative filepaths (when explicitly requested).' - ); - - $this->assertEquals( - 'file://../foo/bar/baz', - ( new URL( '../foo/bar/cat/dog/../../baz' ) )->get_url(), - 'Directory traversals are appropriately resolved even in complex cases with multiple separate traversals.' ); } }