File "URL.php"
Full Path: /home/siazco/grocery.siazco.se/wp-content/plugins/woocommerce/src/Internal/Utilities/URL.php
File size: 13.1 KB
MIME-type: text/x-php
Charset: utf-8
<?php
namespace Automattic\WooCommerce\Internal\Utilities;
/**
* Provides an easy method of assessing URLs, including filepaths (which will be silently
* converted to a file:// URL if provided).
*/
class URL {
/**
* Components of the URL being assessed.
*
* The keys match those potentially returned by the parse_url() function, except
* that they are always defined and 'drive' (Windows drive letter) has been added.
*
* @var string|null[]
*/
private $components = array(
'drive' => null,
'fragment' => null,
'host' => null,
'pass' => null,
'path' => null,
'port' => null,
'query' => null,
'scheme' => null,
'user' => null,
);
/**
* If the URL (or filepath) is absolute.
*
* @var bool
*/
private $is_absolute;
/**
* If the URL (or filepath) represents a directory other than the root directory.
*
* This is useful at different points in the process, when deciding whether to re-apply
* a trailing slash at the end of processing or when we need to calculate how many
* directory traversals are needed to form a (grand-)parent URL.
*
* @var bool
*/
private $is_non_root_directory;
/**
* The components of the URL's path.
*
* For instance, in the case of "file:///srv/www/wp.site" (noting that a file URL has
* no host component) this would contain:
*
* [ "srv", "www", "wp.site" ]
*
* In the case of a non-file URL such as "https://example.com/foo/bar/baz" (noting the
* host is not part of the path) it would contain:
*
* [ "foo", "bar", "baz" ]
*
* @var array
*/
private $path_parts = array();
/**
* The URL.
*
* @var string
*/
private $url;
/**
* Creates and processes the provided URL (or filepath).
*
* @throws URLException If the URL (or filepath) is seriously malformed.
*
* @param string $url The URL (or filepath).
*/
public function __construct( string $url ) {
$this->url = $url;
$this->preprocess();
$this->process_path();
}
/**
* Makes all slashes forward slashes, converts filepaths to file:// URLs, and
* other processing to help with comprehension of filepaths.
*
* @throws URLException If the URL is seriously malformed.
*/
private function preprocess() {
// For consistency, all slashes should be forward slashes.
$this->url = str_replace( '\\', '/', $this->url );
// Windows: capture the drive letter if provided.
if ( preg_match( '#^(file://)?([a-z]):/(?!/).*#i', $this->url, $matches ) ) {
$this->components['drive'] = $matches[2];
}
/*
* If there is no scheme, assume and prepend "file://". An exception is made for cases where the URL simply
* starts with exactly two forward slashes, which indicates 'any scheme' (most commonly, that is used when
* there is freedom to switch between 'http' and 'https').
*/
if ( ! preg_match( '#^[a-z]+://#i', $this->url ) && ! preg_match( '#^//(?!/)#', $this->url ) ) {
$this->url = 'file://' . $this->url;
}
$parsed_components = wp_parse_url( $this->url );
// If we received a really badly formed URL, let's go no further.
if ( false === $parsed_components ) {
throw new URLException(
sprintf(
/* translators: %s is the URL. */
__( '%s is not a valid URL.', 'woocommerce' ),
$this->url
)
);
}
$this->components = array_merge( $this->components, $parsed_components );
// File URLs cannot have a host. However, the initial path segment *or* the Windows drive letter
// (if present) may be incorrectly be interpreted as the host name.
if ( 'file' === $this->components['scheme'] && ! empty( $this->components['host'] ) ) {
// If we do not have a drive letter, then simply merge the host and the path together.
if ( null === $this->components['drive'] ) {
$this->components['path'] = $this->components['host'] . ( $this->components['path'] ?? '' );
}
// Restore the host to null in this situation.
$this->components['host'] = null;
}
}
/**
* Simplifies the path if possible, by resolving directory traversals to the extent possible
* without touching the filesystem.
*/
private function process_path() {
$segments = explode( '/', $this->components['path'] );
$this->is_absolute = substr( $this->components['path'], 0, 1 ) === '/' || ! empty( $this->components['host'] );
$this->is_non_root_directory = substr( $this->components['path'], -1, 1 ) === '/' && strlen( $this->components['path'] ) > 1;
$resolve_traversals = 'file' !== $this->components['scheme'] || $this->is_absolute;
$retain_traversals = false;
// Clean the path.
foreach ( $segments as $part ) {
// Drop empty segments.
if ( strlen( $part ) === 0 || '.' === $part ) {
continue;
}
// Directory traversals created with percent-encoding syntax should also be detected.
$is_traversal = str_ireplace( '%2e', '.', $part ) === '..';
// Resolve directory traversals (if allowed: see further comment relating to this).
if ( $resolve_traversals && $is_traversal ) {
if ( count( $this->path_parts ) > 0 && ! $retain_traversals ) {
$this->path_parts = array_slice( $this->path_parts, 0, count( $this->path_parts ) - 1 );
continue;
} elseif ( $this->is_absolute ) {
continue;
}
}
/*
* Consider allowing directory traversals to be resolved (ie, the process that converts 'foo/bar/../baz' to
* 'foo/baz').
*
* 1. For this decision point, we are only concerned with relative filepaths (in all other cases,
* $resolve_traversals will already be true).
* 2. This is a 'one time' and unidirectional operation. We only wish to flip from false to true, and we
* never wish to do this more than once.
* 3. We only flip the switch after we have examined all leading '..' traversal segments.
*/
if ( false === $resolve_traversals && '..' !== $part && 'file' === $this->components['scheme'] && ! $this->is_absolute ) {
$resolve_traversals = true;
}
/*
* Set a flag indicating that traversals should be retained. This is done to ensure we don't prematurely
* discard traversals at the start of the path.
*/
$retain_traversals = $resolve_traversals && '..' === $part;
// Retain this part of the path.
$this->path_parts[] = $part;
}
// Protect against empty relative paths.
if ( count( $this->path_parts ) === 0 && ! $this->is_absolute ) {
$this->path_parts = array( '.' );
$this->is_non_root_directory = true;
}
// Reform the path from the processed segments, appending a leading slash if it is absolute and restoring
// the Windows drive letter if we have one.
$this->components['path'] = ( $this->is_absolute ? '/' : '' ) . implode( '/', $this->path_parts ) . ( $this->is_non_root_directory ? '/' : '' );
}
/**
* Returns the processed URL as a string.
*
* @return string
*/
public function __toString(): string {
return $this->get_url();
}
/**
* Returns all possible parent URLs for the current URL.
*
* @return string[]
*/
public function get_all_parent_urls(): array {
$max_parent = count( $this->path_parts );
$parents = array();
/*
* If we are looking at a relative path that begins with at least one traversal (example: "../../foo")
* then we should only return one parent URL (otherwise, we'd potentially have to return an infinite
* number of parent URLs since we can't know how far the tree extends).
*/
if ( $max_parent > 0 && ! $this->is_absolute && '..' === $this->path_parts[0] ) {
$max_parent = 1;
}
for ( $level = 1; $level <= $max_parent; $level++ ) {
$parents[] = $this->get_parent_url( $level );
}
return $parents;
}
/**
* Outputs the parent URL.
*
* For example, if $this->get_url() returns "https://example.com/foo/bar/baz" then
* this method will return "https://example.com/foo/bar/".
*
* When a grand-parent is needed, the optional $level parameter can be used. By default
* this is set to 1 (parent). 2 will yield the grand-parent, 3 will yield the great
* grand-parent, etc.
*
* If a level is specified that exceeds the number of path segments, this method will
* return false.
*
* @param int $level Used to indicate the level of parent.
*
* @return string|false
*/
public function get_parent_url( int $level = 1 ) {
if ( $level < 1 ) {
$level = 1;
}
$parts_count = count( $this->path_parts );
$parent_path_parts_to_keep = $parts_count - $level;
/*
* With the exception of file URLs, we do not allow obtaining (grand-)parent directories that require
* us to describe them using directory traversals. For example, given "http://hostname/foo/bar/baz.png" we do
* not permit determining anything more than 2 levels up (we cannot go beyond "http://hostname/").
*/
if ( 'file' !== $this->components['scheme'] && $parent_path_parts_to_keep < 0 ) {
return false;
}
// In the specific case of an absolute filepath describing the root directory, there can be no parent.
if ( 'file' === $this->components['scheme'] && $this->is_absolute && empty( $this->path_parts ) ) {
return false;
}
// Handle cases where the path starts with one or more 'dot segments'. Since the path has already been
// processed, we can be confident that any such segments are at the start of the path.
if ( $parts_count > 0 && ( '.' === $this->path_parts[0] || '..' === $this->path_parts[0] ) ) {
// Determine the index of the last dot segment (ex: given the path '/../../foo' it would be 1).
$single_dots = array_keys( $this->path_parts, '.', true );
$double_dots = array_keys( $this->path_parts, '..', true );
$max_dot_index = max( array_merge( $single_dots, $double_dots ) );
// Prepend the required number of traversals and discard unnecessary trailing segments.
$last_traversal = $max_dot_index + ( $this->is_non_root_directory ? 1 : 0 );
$parent_path = str_repeat( '../', $level ) . join( '/', array_slice( $this->path_parts, 0, $last_traversal ) );
} elseif ( $parent_path_parts_to_keep < 0 ) {
// For relative filepaths only, we use traversals to describe the requested parent.
$parent_path = untrailingslashit( str_repeat( '../', $parent_path_parts_to_keep * -1 ) );
} else {
// Otherwise, in a very simple case, we just remove existing parts.
$parent_path = implode( '/', array_slice( $this->path_parts, 0, $parent_path_parts_to_keep ) );
}
if ( $this->is_relative() && '' === $parent_path ) {
$parent_path = '.';
}
// Append a trailing slash, since a parent is always a directory. The only exception is the current working directory.
$parent_path .= '/';
// For absolute paths, apply a leading slash (does not apply if we have a root path).
if ( $this->is_absolute && 0 !== strpos( $parent_path, '/' ) ) {
$parent_path = '/' . $parent_path;
}
// Form the parent URL (ditching the query and fragment, if set).
$parent_url = $this->get_url(
array(
'path' => $parent_path,
'query' => null,
'fragment' => null,
)
);
// We process the parent URL through a fresh instance of this class, for consistency.
return ( new self( $parent_url ) )->get_url();
}
/**
* Outputs the processed URL.
*
* Borrows from https://www.php.net/manual/en/function.parse-url.php#106731
*
* @param array $component_overrides If provided, these will override values set in $this->components.
*
* @return string
*/
public function get_url( array $component_overrides = array() ): string {
$components = array_merge( $this->components, $component_overrides );
$scheme = null !== $components['scheme'] ? $components['scheme'] . '://' : '//';
$host = null !== $components['host'] ? $components['host'] : '';
$port = null !== $components['port'] ? ':' . $components['port'] : '';
$path = $this->get_path( $components['path'] );
// Special handling for hostless URLs (typically, filepaths) referencing the current working directory.
if ( '' === $host && ( '' === $path || '.' === $path ) ) {
$path = './';
}
$user = null !== $components['user'] ? $components['user'] : '';
$pass = null !== $components['pass'] ? ':' . $components['pass'] : '';
$user_pass = ( ! empty( $user ) || ! empty( $pass ) ) ? $user . $pass . '@' : '';
$query = null !== $components['query'] ? '?' . $components['query'] : '';
$fragment = null !== $components['fragment'] ? '#' . $components['fragment'] : '';
return $scheme . $user_pass . $host . $port . $path . $query . $fragment;
}
/**
* Outputs the path. Especially useful if it was a a regular filepath that was passed in originally.
*
* @param string $path_override If provided this will be used as the URL path. Does not impact drive letter.
*
* @return string
*/
public function get_path( string $path_override = null ): string {
return ( $this->components['drive'] ? $this->components['drive'] . ':' : '' ) . ( $path_override ?? $this->components['path'] );
}
/**
* Indicates if the URL or filepath was absolute.
*
* @return bool True if absolute, else false.
*/
public function is_absolute(): bool {
return $this->is_absolute;
}
/**
* Indicates if the URL or filepath was relative.
*
* @return bool True if relative, else false.
*/
public function is_relative(): bool {
return ! $this->is_absolute;
}
}