Enhance directory traversal handling/resolution.

This commit is contained in:
barryhughes 2022-03-24 17:14:00 -07:00
parent a98aa9c910
commit cee42b7264
2 changed files with 128 additions and 18 deletions

View File

@ -124,8 +124,11 @@ class URL {
* without touching the filesystem.
*/
private function process_path() {
$segments = explode( '/', $this->components['path'] );
$this->is_absolute = substr( $this->components['path'], 0, 1 ) === '/';
$segments = explode( '/', $this->components['path'] );
$this->is_absolute = substr( $this->components['path'], 0, 1 ) === '/' || ! empty( $this->components['host'] );
$is_directory = substr( $this->components['path'], -1, 1 ) === '/' && strlen( $this->components['path'] ) > 1;
$resolve_traversals = 'file' !== $this->components['scheme'] || $this->is_absolute;
$retain_traversals = false;
// Clean the path.
foreach ( $segments as $part ) {
@ -137,19 +140,39 @@ class URL {
// Directory traversals created with percent-encoding syntax should also be detected.
$is_traversal = str_ireplace( '%2e', '.', $part ) === '..';
// Unwind directory traversals.
if ( $is_traversal && count( $this->path_parts ) > 0 ) {
$this->path_parts = array_slice( $this->path_parts, 0, count( $this->path_parts ) - 1 );
continue;
// Resolve directory traversals (if allowed: see further comment relating to this).
if ( $resolve_traversals && $is_traversal ) {
if ( count( $this->path_parts ) > 0 && ! $retain_traversals ) {
$this->path_parts = array_slice( $this->path_parts, 0, count( $this->path_parts ) - 1 );
continue;
} elseif ( $this->is_absolute ) {
continue;
}
}
/*
* Consider allowing directory traversals to be resolved (ie, the process that converts 'foo/bar/../baz' to
* 'foo/baz').
*
* 1. We are only concerned with file URLs, for all other types unwinding of traversals is already allowed.
* 2. This is a 'one time' and unidirectional operation. We only wish to flip from false to true, and we
* never wish to do this more than once.
* 3. We only flip the switch after we have examined all leading '..' traversal segments.
*/
if ( false === $resolve_traversals && '..' !== $part && 'file' === $this->components['scheme'] && ! $this->is_absolute ) {
$resolve_traversals = true;
}
// At this point, if we are committing a traversal to the path then we will wish to retain the next traversal, too.
$retain_traversals = $resolve_traversals && '..' === $part;
// Retain this part of the path.
$this->path_parts[] = $part;
}
// Reform the path from the processed segments, appending a leading slash if it is absolute and restoring
// the Windows drive letter if we have one.
$this->components['path'] = ( $this->is_absolute ? '/' : '' ) . implode( '/', $this->path_parts );
$this->components['path'] = ( $this->is_absolute ? '/' : '' ) . implode( '/', $this->path_parts ) . ( $is_directory ? '/' : '' );
}
/**
@ -187,16 +210,47 @@ class URL {
* this is set to 1 (parent). 2 will yield the grand-parent, 3 will yield the great
* grand-parent, etc.
*
* If a level is specified that exceeds the number of path segments, this method will
* return false.
*
* @param int $level Used to indicate the level of parent.
*
* @return string
* @return string|false
*/
public function get_parent_url( int $level = 1 ): string {
public function get_parent_url( int $level = 1 ) {
if ( $level < 1 ) {
$level = 1;
}
$parent_path = implode( '/', array_slice( $this->path_parts, 0, count( $this->path_parts ) - $level ) ) . '/';
$parent_path_parts_to_keep = count( $this->path_parts ) - $level;
/*
* With the exception of file URLs, we do not allow obtaining (grand-)parent directories that require
* us to describe them using directory traversals. For example, given "http://hostname/foo/bar/baz.png" we do
* not permit determining anything more than 2 levels up (we cannot go beyond "http://hostname/").
*/
if ( 'file' !== $this->components['scheme'] && $parent_path_parts_to_keep < 0 ) {
return false;
}
// In the specific case of an absolute filepath describing the root directory, there can be no parent.
if ( 'file' === $this->components['scheme'] && $this->is_absolute && empty( $this->path_parts ) ) {
return false;
}
if ( $parent_path_parts_to_keep >= 0 ) {
$parent_path = implode( '/', array_slice( $this->path_parts, 0, $parent_path_parts_to_keep ) );
} else {
// For relative filepaths only, we use traversals to describe the requested parent.
$parent_path = untrailingslashit( str_repeat( '../', $parent_path_parts_to_keep * -1 ) );
}
if ( $this->is_relative() && '' === $parent_path ) {
$parent_path = '.';
}
// Append a trailing slash, since a parent is always a directory. The only exception is the current working directory.
$parent_path .= '/';
// For absolute paths, apply a leading slash (does not apply if we have a root path).
if ( $this->is_absolute && 0 !== strpos( $parent_path, '/' ) ) {
@ -219,12 +273,17 @@ class URL {
$scheme = null !== $this->components['scheme'] ? $this->components['scheme'] . '://' : '';
$host = null !== $this->components['host'] ? $this->components['host'] : '';
$port = null !== $this->components['port'] ? ':' . $this->components['port'] : '';
$path = $path_override ?? $this->get_path();
// Special handling for hostless URLs (typically, filepaths) referencing the current working directory.
if ( '' === $host && ( '' === $path || '.' === $path ) ) {
$path = './';
}
$user = null !== $this->components['user'] ? $this->components['user'] : '';
$pass = null !== $this->components['pass'] ? ':' . $this->components['pass'] : '';
$user_pass = ( ! empty( $user ) || ! empty( $pass ) ) ? $user . $pass . '@' : '';
$path = $path_override ?? $this->get_path();
$query = null !== $this->components['query'] ? '?' . $this->components['query'] : '';
$fragment = null !== $this->components['fragment'] ? '#' . $this->components['fragment'] : '';

View File

@ -42,7 +42,19 @@ class URLTest extends WC_Unit_Test_Case {
$this->assertEquals(
'../should/remain/relative',
( new URL( 'relative/../../should/remain/relative' ) )->get_path(),
'Simplifies a relative path containing directory traversals to the extent possible (without inspecting the filesystem).'
'Simplifies a relative path containing directory traversals to the extent possible (without inspecting the filesystem - scenario #1).'
);
$this->assertEquals(
'../../should/remain/relative',
( new URL( 'relative/../../../should/remain/relative' ) )->get_path(),
'Simplifies a relative path containing directory traversals to the extent possible (without inspecting the filesystem - scenario #2).'
);
$this->assertEquals(
'file:///foo/bar/baz',
( new URL( '/../foo/bar/baz/bazooka/../../baz' ) )->get_url(),
'Directory traversals are appropriately resolved even in complex cases with multiple separate traversals. When the original path is absolute, the output will be absolute.'
);
}
@ -119,10 +131,9 @@ class URLTest extends WC_Unit_Test_Case {
}
public function test_can_obtain_parent_url() {
$this->assertEquals(
'file:///',
$this->assertFalse(
( new URL( '/' ) )->get_parent_url(),
'The parent of root directory "/" is "/".'
'Root directory "/" is considered to have no parent.'
);
$this->assertEquals(
@ -131,10 +142,9 @@ class URLTest extends WC_Unit_Test_Case {
'The parent URL will be trailingslashed.'
);
$this->assertEquals(
'https://example.com/',
$this->assertFalse(
( new URL( 'https://example.com' ) )->get_parent_url(),
'The host name (for non-file URLs) is distinct from the path and will not be removed.'
'In the case of non-file URLs, if we only have a host name and no path then the parent cannot be derived.'
);
}
@ -174,4 +184,45 @@ class URLTest extends WC_Unit_Test_Case {
'All parent URLs can be derived for a filepath, up to and including the root directory plus drive letter (Windows).'
);
}
public function test_obtaining_parent_urls_from_relative_urls() {
$this->assertEquals(
array(
'file://relative/to/',
'file://relative/',
'file://./',
),
( new URL( 'relative/to/abspath' ) )->get_all_parent_urls(),
'When obtaining all parent URLs for a relative filepath, we never return the root directory and never return a URL containing traversals. '
);
$this->assertEquals(
'file://./',
( new URL( 'just-a-file.png' ) )->get_parent_url(),
'The parent URL of an unqualified, relative file is simply an empty relative path (generally, though not always, this is equivalent to ABSPATH).'
);
$this->assertEquals(
'file://../../',
( new URL( '../../relatively-placed-file.pdf' ) )->get_parent_url()
);
$this->assertEquals(
'file://../',
( new URL( 'relatively-placed-file.pdf' ) )->get_parent_url( 2 ),
'For filepaths, we can successfully determine the (grand-)parent directories of relative filepaths (when explicitly requested).'
);
$this->assertEquals(
'file://../../',
( new URL( 'relatively-placed-file.pdf' ) )->get_parent_url( 3 ),
'For filepaths, we can successfully determine the (grand-)parent directories of relative filepaths (when explicitly requested).'
);
$this->assertEquals(
'file://../foo/bar/baz',
( new URL( '../foo/bar/cat/dog/../../baz' ) )->get_url(),
'Directory traversals are appropriately resolved even in complex cases with multiple separate traversals.'
);
}
}