Merge pull request #32350 from woocommerce/fix/relative-directory-handling

Enhance directory traversal handling/resolution.
This commit is contained in:
Barry Hughes 2022-04-05 11:24:49 -07:00 committed by GitHub
commit dfbc71974b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 264 additions and 138 deletions

View File

@ -34,6 +34,17 @@ class URL {
*/
private $is_absolute;
/**
* If the URL (or filepath) represents a directory other than the root directory.
*
* This is useful at different points in the process, when deciding whether to re-apply
* a trailing slash at the end of processing or when we need to calculate how many
* directory traversals are needed to form a (grand-)parent URL.
*
* @var bool
*/
private $is_non_root_directory;
/**
* The components of the URL's path.
*
@ -124,32 +135,65 @@ class URL {
* without touching the filesystem.
*/
private function process_path() {
$segments = explode( '/', $this->components['path'] );
$this->is_absolute = substr( $this->components['path'], 0, 1 ) === '/';
$segments = explode( '/', $this->components['path'] );
$this->is_absolute = substr( $this->components['path'], 0, 1 ) === '/' || ! empty( $this->components['host'] );
$this->is_non_root_directory = substr( $this->components['path'], -1, 1 ) === '/' && strlen( $this->components['path'] ) > 1;
$resolve_traversals = 'file' !== $this->components['scheme'] || $this->is_absolute;
$retain_traversals = false;
// Clean the path.
foreach ( $segments as $part ) {
// Drop empty segments.
if ( strlen( $part ) === 0 ) {
if ( strlen( $part ) === 0 || '.' === $part ) {
continue;
}
// Directory traversals created with percent-encoding syntax should also be detected.
$is_traversal = str_ireplace( '%2e', '.', $part ) === '..';
// Unwind directory traversals.
if ( $is_traversal && count( $this->path_parts ) > 0 ) {
$this->path_parts = array_slice( $this->path_parts, 0, count( $this->path_parts ) - 1 );
continue;
// Resolve directory traversals (if allowed: see further comment relating to this).
if ( $resolve_traversals && $is_traversal ) {
if ( count( $this->path_parts ) > 0 && ! $retain_traversals ) {
$this->path_parts = array_slice( $this->path_parts, 0, count( $this->path_parts ) - 1 );
continue;
} elseif ( $this->is_absolute ) {
continue;
}
}
/*
* Consider allowing directory traversals to be resolved (ie, the process that converts 'foo/bar/../baz' to
* 'foo/baz').
*
* 1. For this decision point, we are only concerned with relative filepaths (in all other cases,
* $resolve_traversals will already be true).
* 2. This is a 'one time' and unidirectional operation. We only wish to flip from false to true, and we
* never wish to do this more than once.
* 3. We only flip the switch after we have examined all leading '..' traversal segments.
*/
if ( false === $resolve_traversals && '..' !== $part && 'file' === $this->components['scheme'] && ! $this->is_absolute ) {
$resolve_traversals = true;
}
/*
* Set a flag indicating that traversals should be retained. This is done to ensure we don't prematurely
* discard traversals at the start of the path.
*/
$retain_traversals = $resolve_traversals && '..' === $part;
// Retain this part of the path.
$this->path_parts[] = $part;
}
// Protect against empty relative paths.
if ( count( $this->path_parts ) === 0 && ! $this->is_absolute ) {
$this->path_parts = array( '.' );
$this->is_non_root_directory = true;
}
// Reform the path from the processed segments, appending a leading slash if it is absolute and restoring
// the Windows drive letter if we have one.
$this->components['path'] = ( $this->is_absolute ? '/' : '' ) . implode( '/', $this->path_parts );
$this->components['path'] = ( $this->is_absolute ? '/' : '' ) . implode( '/', $this->path_parts ) . ( $this->is_non_root_directory ? '/' : '' );
}
/**
@ -170,6 +214,15 @@ class URL {
$max_parent = count( $this->path_parts );
$parents = array();
/*
* If we are looking at a relative path that begins with at least one traversal (example: "../../foo")
* then we should only return one parent URL (otherwise, we'd potentially have to return an infinite
* number of parent URLs since we can't know how far the tree extends).
*/
if ( $max_parent > 0 && ! $this->is_absolute && '..' === $this->path_parts[0] ) {
$max_parent = 1;
}
for ( $level = 1; $level <= $max_parent; $level++ ) {
$parents[] = $this->get_parent_url( $level );
}
@ -187,23 +240,69 @@ class URL {
* this is set to 1 (parent). 2 will yield the grand-parent, 3 will yield the great
* grand-parent, etc.
*
* If a level is specified that exceeds the number of path segments, this method will
* return false.
*
* @param int $level Used to indicate the level of parent.
*
* @return string
* @return string|false
*/
public function get_parent_url( int $level = 1 ): string {
public function get_parent_url( int $level = 1 ) {
if ( $level < 1 ) {
$level = 1;
}
$parent_path = implode( '/', array_slice( $this->path_parts, 0, count( $this->path_parts ) - $level ) ) . '/';
$parts_count = count( $this->path_parts );
$parent_path_parts_to_keep = $parts_count - $level;
/*
* With the exception of file URLs, we do not allow obtaining (grand-)parent directories that require
* us to describe them using directory traversals. For example, given "http://hostname/foo/bar/baz.png" we do
* not permit determining anything more than 2 levels up (we cannot go beyond "http://hostname/").
*/
if ( 'file' !== $this->components['scheme'] && $parent_path_parts_to_keep < 0 ) {
return false;
}
// In the specific case of an absolute filepath describing the root directory, there can be no parent.
if ( 'file' === $this->components['scheme'] && $this->is_absolute && empty( $this->path_parts ) ) {
return false;
}
// Handle cases where the path starts with one or more 'dot segments'. Since the path has already been
// processed, we can be confident that any such segments are at the start of the path.
if ( $parts_count > 0 && ( '.' === $this->path_parts[0] || '..' === $this->path_parts[0] ) ) {
// Determine the index of the last dot segment (ex: given the path '/../../foo' it would be 1).
$single_dots = array_keys( $this->path_parts, '.', true );
$double_dots = array_keys( $this->path_parts, '..', true );
$max_dot_index = max( array_merge( $single_dots, $double_dots ) );
// Prepend the required number of traversals and discard unnessary trailing segments.
$last_traversal = $max_dot_index + ( $this->is_non_root_directory ? 1 : 0 );
$parent_path = str_repeat( '../', $level ) . join( '/', array_slice( $this->path_parts, 0, $last_traversal ) );
} elseif ( $parent_path_parts_to_keep < 0 ) {
// For relative filepaths only, we use traversals to describe the requested parent.
$parent_path = untrailingslashit( str_repeat( '../', $parent_path_parts_to_keep * -1 ) );
} else {
// Otherwise, in a very simple case, we just remove existing parts.
$parent_path = implode( '/', array_slice( $this->path_parts, 0, $parent_path_parts_to_keep ) );
}
if ( $this->is_relative() && '' === $parent_path ) {
$parent_path = '.';
}
// Append a trailing slash, since a parent is always a directory. The only exception is the current working directory.
$parent_path .= '/';
// For absolute paths, apply a leading slash (does not apply if we have a root path).
if ( $this->is_absolute && 0 !== strpos( $parent_path, '/' ) ) {
$parent_path = '/' . $parent_path;
}
return $this->get_url( $this->get_path( $parent_path ) );
// Form the parent URL, then process it exactly as we would any other URL for consistency.
$parent_url = $this->get_url( $this->get_path( $parent_path ) );
return ( new self( $parent_url ) )->get_url();
}
/**
@ -219,12 +318,17 @@ class URL {
$scheme = null !== $this->components['scheme'] ? $this->components['scheme'] . '://' : '';
$host = null !== $this->components['host'] ? $this->components['host'] : '';
$port = null !== $this->components['port'] ? ':' . $this->components['port'] : '';
$path = $path_override ?? $this->get_path();
// Special handling for hostless URLs (typically, filepaths) referencing the current working directory.
if ( '' === $host && ( '' === $path || '.' === $path ) ) {
$path = './';
}
$user = null !== $this->components['user'] ? $this->components['user'] : '';
$pass = null !== $this->components['pass'] ? ':' . $this->components['pass'] : '';
$user_pass = ( ! empty( $user ) || ! empty( $pass ) ) ? $user . $pass . '@' : '';
$path = $path_override ?? $this->get_path();
$query = null !== $this->components['query'] ? '?' . $this->components['query'] : '';
$fragment = null !== $this->components['fragment'] ? '#' . $this->components['fragment'] : '';

View File

@ -26,152 +26,174 @@ class URLTest extends WC_Unit_Test_Case {
);
}
public function test_directory_traversal_resolution() {
$this->assertEquals(
'/var/foo/foobar',
( new URL( '/var/foo/bar/baz/../../foobar' ) )->get_path(),
'Correctly resolves a path containing a directory traversal.'
);
/**
* @dataProvider path_expectations
*
* @param string $source_path
* @param string $expected_resolution
*/
public function test_path_resolution( $source_path, $expected_resolution ) {
$this->assertEquals( $expected_resolution, ( new URL( $source_path ) )->get_path() );
}
$this->assertEquals(
'/bazbar',
( new URL( '/var/foo/../../../../bazbar' ) )->get_path(),
'Correctly resolves a path containing a directory traversal, even if the traversals attempt to backtrack beyond the root directory.'
);
$this->assertEquals(
'../should/remain/relative',
( new URL( 'relative/../../should/remain/relative' ) )->get_path(),
'Simplifies a relative path containing directory traversals to the extent possible (without inspecting the filesystem).'
/**
* Expectations when requesting the path of a URL.
*
* @return string[][]
*/
public function path_expectations(): array {
return array(
array( '/var/foo/bar/baz/../../foobar', '/var/foo/foobar' ),
array( '/var/foo/../../../../bazbar', '/bazbar' ),
array( '././././.', './' ),
array( 'empty/segments//are/stripped', 'empty/segments/are/stripped' ),
array( '///nonempty/ /whitespace/ /is//kept', '/nonempty/ /whitespace/ /is/kept' ),
array( 'relative/../../should/remain/relative', '../should/remain/relative' ),
array( 'relative/../../../should/remain/relative', '../../should/remain/relative' ),
array( 'c:\\Windows\Server\HTTP\dump.xml', 'c:/Windows/Server/HTTP/dump.xml')
);
}
public function test_can_get_normalized_string_representation() {
$this->assertEquals(
'foo/bar/baz',
( new URL( 'foo/bar//baz' ) )->get_path(),
'Empty segments are discarded, remains as a relative path.'
);
/**
* @dataProvider url_expectations
*
* @param string $source_url
* @param string $expected_resolution
*/
public function test_url_resolution( $source_url, $expected_resolution ) {
$this->assertEquals( $expected_resolution, ( new URL( $source_url ) )->get_url() );
}
$this->assertEquals(
'/foo/ /bar/ /baz/foobarbaz',
( new URL( '///foo/ /bar/ /baz//foobarbaz' ) )->get_path(),
'Empty segments are discarded, non-empty segments containing only whitespace are preserved, remains as an absolute path.'
);
$this->assertEquals(
'c:/Windows/Server/HTTP/dump.xml',
( new URL( 'c:\\Windows\Server\HTTP\dump.xml' ) )->get_path(),
'String representations of Windows filepaths have forward slash separators and preserve the drive letter.'
/**
* Expectations when resolving URLs.
*
* @return string[][]
*/
public function url_expectations(): array {
return array(
array( '/../foo/bar/baz/bazooka/../../baz', 'file:///foo/bar/baz' ),
array( './a/b/c/./../././../b/c', 'file://a/b/c' ),
array( 'relative/path', 'file://relative/path' ),
array( '/absolute/path', 'file:///absolute/path' ),
array( '/var/www/network/%2econfig', 'file:///var/www/network/%2econfig' ),
array( '///foo', 'file:///foo' ),
array( '~/foo.txt', 'file://~/foo.txt' ),
array( 'baz///foo', 'file://baz/foo' ),
array( 'file:///etc/foo/bar', 'file:///etc/foo/bar' ),
array( 'foo://bar', 'foo://bar/' ),
array( 'foo://bar/baz-file', 'foo://bar/baz-file' ),
array( 'foo://bar/baz-dir/', 'foo://bar/baz-dir/' ),
array( 'https://foo.bar/parent/.%2e/asset.txt', 'https://foo.bar/asset.txt' ),
array( 'https://foo.bar/parent/%2E./asset.txt', 'https://foo.bar/asset.txt' ),
array( 'https://foo.bar/parent/%2E%2e/asset.txt', 'https://foo.bar/asset.txt' ),
array( 'https://foo.bar/parent/%2E.%2fasset.txt', 'https://foo.bar/parent/%2E.%2fasset.txt' ),
array( 'http://localhost?../../bar', 'http://localhost/?../../bar' ),
);
}
public function test_can_get_normalized_url_representation() {
$this->assertEquals(
'file://relative/path',
( new URL( 'relative/path' ) )->get_url(),
'Can obtain a URL representation of a relative filepath, even when the initial string was a plain filepath.'
);
/**
* @dataProvider parent_url_expectations
*
* @param string $source_path
* @param int $parent_level
* @param string|false $expectation
*/
public function test_can_obtain_parent_url( string $source_path, int $parent_level, $expectation ) {
$this->assertEquals( $expectation, ( new URL( $source_path ) )->get_parent_url( $parent_level ) );
}
$this->assertEquals(
'file:///absolute/path',
( new URL( '/absolute/path' ) )->get_url(),
'Can obtain a URL representation of an absolute filepath, even when the initial string was a plain filepath.'
);
$this->assertEquals(
'file:///etc/foo/bar',
( new URL( 'file:///etc/foo/bar' ) )->get_url(),
'Can obtain a URL representation of a filepath, when the source filepath was also expressed as a URL.'
/**
* Expectations when resolving (grand-)parent URLs.
*
* @return array[]
*/
public function parent_url_expectations(): array {
return array(
array( '/', 1, false ),
array( '/', 2, false ),
array( './', 1, 'file://../' ),
array( '../', 1, 'file://../../' ),
array( 'relative-file.png', 1, 'file://./' ),
array( 'relative-file.png', 2, 'file://../' ),
array( '/var/dev/', 1, 'file:///var/' ),
array( '/var/../dev/./../foo/bar', 1, 'file:///foo/' ),
array( 'https://example.com', 1, false ),
array( 'https://example.com/foo', 1, 'https://example.com/' ),
array( 'https://example.com/foo/bar/baz/../cat/', 2, 'https://example.com/foo/' ),
array( 'https://example.com/foo/bar/baz/%2E%2E/dog/', 2, 'https://example.com/foo/' ),
array( 'file://./', 1, 'file://../' ),
array( 'file://./', 2, 'file://../../' ),
array( 'file://../../foo', 1, 'file://../../' ),
array( 'file://../../foo', 2, 'file://../../../' ),
array( 'file://../../', 1, 'file://../../../' ),
array( 'file://./../', 2, 'file://../../../' ),
);
}
public function test_handling_of_percent_encoded_periods() {
$this->assertEquals(
'https://foo.bar/asset.txt',
( new URL( 'https://foo.bar/parent/.%2e/asset.txt' ) )->get_url(),
'Directory traversals expressed using percent-encoding are still resolved (lowercase, one encoded period).'
);
$this->assertEquals(
'https://foo.bar/asset.txt',
( new URL( 'https://foo.bar/parent/%2E./asset.txt' ) )->get_url(),
'Directory traversals expressed using percent-encoding are still resolved (uppercase, one encoded period).'
);
$this->assertEquals(
'https://foo.bar/asset.txt',
( new URL( 'https://foo.bar/parent/%2E%2e/asset.txt' ) )->get_url(),
'Directory traversals expressed using percent-encoding are still resolved (mixed case, both periods encoded).'
);
$this->assertEquals(
'https://foo.bar/parent/%2E.%2fasset.txt',
( new URL( 'https://foo.bar/parent/%2E.%2fasset.txt' ) )->get_url(),
'If the forward slash after a double period is URL encoded, there is no directory traversal (since this means the slash is a part of the segment and is not a separator).'
);
$this->assertEquals(
'file:///var/www/network/%2econfig',
( new URL( '/var/www/network/%2econfig' ) )->get_url(),
'Use of percent-encoding in URLs is accepted and unnecessary conversion does not take place.'
);
/**
* @dataProvider all_parent_url_expectations
*
* @param string $source_path
* @param array $expectation
*/
public function test_can_obtain_all_parent_urls( string $source_path, array $expectation ) {
$this->assertEquals( $expectation, ( new URL( $source_path ) )->get_all_parent_urls() );
}
public function test_can_obtain_parent_url() {
$this->assertEquals(
'file:///',
( new URL( '/' ) )->get_parent_url(),
'The parent of root directory "/" is "/".'
);
$this->assertEquals(
'file:///var/',
( new URL( '/var/dev/' ) )->get_parent_url(),
'The parent URL will be trailingslashed.'
);
$this->assertEquals(
'https://example.com/',
( new URL( 'https://example.com' ) )->get_parent_url(),
'The host name (for non-file URLs) is distinct from the path and will not be removed.'
);
}
public function test_can_obtain_all_parent_urls() {
$this->assertEquals(
/**
* Expectations when obtaining all possible parent URLs of a given URL/path.
*
* @return array[]
*/
public function all_parent_urL_expectations(): array {
return array(
array(
'https://local.web/wp-content/uploads/woocommerce_uploads/pdf_bucket/',
'https://local.web/wp-content/uploads/woocommerce_uploads/',
'https://local.web/wp-content/uploads/',
'https://local.web/wp-content/',
'https://local.web/',
'https://local.web/wp-content/uploads/woocommerce_uploads/pdf_bucket/secret-sauce.pdf',
array(
'https://local.web/wp-content/uploads/woocommerce_uploads/pdf_bucket/',
'https://local.web/wp-content/uploads/woocommerce_uploads/',
'https://local.web/wp-content/uploads/',
'https://local.web/wp-content/',
'https://local.web/',
),
),
array(
'/srv/websites/my.wp.site/public/test-file.doc',
array(
'file:///srv/websites/my.wp.site/public/',
'file:///srv/websites/my.wp.site/',
'file:///srv/websites/',
'file:///srv/',
'file:///',
),
),
array(
'C:\\Documents\\Web\\TestSite\\BackgroundTrack.mp3',
array(
'file://C:/Documents/Web/TestSite/',
'file://C:/Documents/Web/',
'file://C:/Documents/',
'file://C:/',
),
),
( new URL( 'https://local.web/wp-content/uploads/woocommerce_uploads/pdf_bucket/secret-sauce.pdf' ) )->get_all_parent_urls(),
'All parent URLs can be derived, but the host name is never stripped.'
);
$this->assertEquals(
array(
'file:///srv/websites/my.wp.site/public/',
'file:///srv/websites/my.wp.site/',
'file:///srv/websites/',
'file:///srv/',
'file:///',
array(),
),
( new URL( '/srv/websites/my.wp.site/public/test-file.doc' ) )->get_all_parent_urls(),
'All parent URLs can be derived for a filepath, up to and including the root directory.'
);
$this->assertEquals(
array(
'file://C:/Documents/Web/TestSite/',
'file://C:/Documents/Web/',
'file://C:/Documents/',
'file://C:/',
'relative/to/abspath',
array(
'file://relative/to/',
'file://relative/',
'file://./',
),
),
array(
'../../some.file',
array(
'file://../../'
),
),
( new URL( 'C:\\Documents\\Web\\TestSite\\BackgroundTrack.mp3' ) )->get_all_parent_urls(),
'All parent URLs can be derived for a filepath, up to and including the root directory plus drive letter (Windows).'
);
}
}