$sitemap_url ) {
/**
* Fires before WP Rocket sitemap preload is called for a sitemap URL.
*
* @since 2.8
*
* @param string $sitemap_type The sitemap identifier.
* @param string $sitemap_url Sitemap URL to be crawled.
*/
do_action( 'before_run_rocket_sitemap_preload', $sitemap_type, $sitemap_url ); // phpcs:ignore WordPress.NamingConventions.PrefixAllGlobals.NonPrefixedHooknameFound
$urls = $this->process_sitemap( $sitemap_url, $urls );
/**
* Fires after WP Rocket sitemap preload was called for a sitemap URL.
*
* @since 2.8
*
* @param string $sitemap_type The sitemap identifier.
* @param string $sitemap_url Sitemap URL crawled.
*/
do_action( 'after_run_rocket_sitemap_preload', $sitemap_type, $sitemap_url ); // phpcs:ignore WordPress.NamingConventions.PrefixAllGlobals.NonPrefixedHooknameFound
}
if ( true === $this->sitemap_error ) {
// Attempt to use the fallback method.
$urls = $this->get_urls( $urls );
}
if ( ! $urls ) {
return;
}
$preload = 0;
foreach ( $urls as $item ) {
$path = wp_parse_url( $item['url'], PHP_URL_PATH );
if ( isset( $path ) && preg_match( '#^(' . \get_rocket_cache_reject_uri() . ')$#', $path ) ) {
continue;
}
$this->preload_process->push_to_queue( $item );
$preload++;
}
if ( ! $preload ) {
return;
}
set_transient( $this->get_running_transient_name(), 0 );
$this->preload_process->save()->dispatch();
}
/**
* Processes the sitemaps recursively.
*
* @since 3.2
* @since 3.5 Now private.
* @author Remy Perona
*
* @param string $sitemap_url URL of the sitemap.
* @param array $urls An array of arrays.
* @return array {
* Array values are arrays described as follow.
* Array keys are an identifier based on the URL path.
*
* @type string $url The URL to preload.
* @type bool $mobile True when we want to send a "mobile" user agent with the request. Optional.
* @type string $source An identifier related to the source of the preload (e.g. RELOAD_ID).
* }
*/
private function process_sitemap( $sitemap_url, array $urls = [] ) {
$this->sitemap_error = false;
/**
* Filters the arguments for the sitemap preload request.
*
* @since 2.10.8
* @author Remy Perona
*
* @param array $args Arguments for the request.
*/
$args = apply_filters(
'rocket_preload_sitemap_request_args',
[
'timeout' => 10,
'user-agent' => 'WP Rocket/Sitemaps',
'sslverify' => apply_filters( 'https_local_ssl_verify', false ), // phpcs:ignore WordPress.NamingConventions.PrefixAllGlobals.NonPrefixedHooknameFound
]
);
$sitemap = wp_remote_get( esc_url_raw( $sitemap_url ), $args );
$errors = get_transient( 'rocket_preload_errors' );
$errors = is_array( $errors ) ? $errors : [];
$errors['errors'] = isset( $errors['errors'] ) && is_array( $errors['errors'] ) ? $errors['errors'] : [];
if ( is_wp_error( $sitemap ) ) {
// Translators: %1$s is a XML sitemap URL, %2$s is the error message, %3$s = opening link tag, %4$s = closing link tag.
$errors['errors'][] = sprintf( __( 'Sitemap preload encountered an error. Could not gather links on %1$s because of the following error: %2$s. %3$sLearn more%4$s.', 'rocket' ), $sitemap_url, $sitemap->get_error_message(), '', '' );
$this->sitemap_error = true;
set_transient( 'rocket_preload_errors', $errors );
return $urls;
}
$response_code = wp_remote_retrieve_response_code( $sitemap );
if ( 200 !== $response_code ) {
switch ( $response_code ) {
case 401:
case 403:
// Translators: %1$s is an URL, %2$s is the HTTP response code, %3$s = opening link tag, %4$s = closing link tag.
$errors['errors'][] = sprintf( __( 'Sitemap preload encountered an error. %1$s is not accessible to due to the following response code: %2$s. Security measures could be preventing access. %3$sLearn more%4$s.', 'rocket' ), $sitemap_url, $response_code, '', '' );
break;
case 404:
// Translators: %1$s is an URL, %2$s = opening link tag, %3$s = closing link tag.
$errors['errors'][] = sprintf( __( 'Sitemap preload encountered an error. %1$s is not accessible to due to the following response code: 404. Please make sure you entered the correct sitemap URL and it is accessible in your browser. %2$sLearn more%3$s.', 'rocket' ), $sitemap_url, '', '' );
break;
case 500:
// Translators: %1$s is an URL, %2$s = opening link tag, %3$s = closing link tag.
$errors['errors'][] = sprintf( __( 'Sitemap preload encountered an error. %1$s is not accessible to due to the following response code: 500. Please check with your web host about server access. %2$sLearn more%3$s.', 'rocket' ), $sitemap_url, '', '' );
break;
default:
// Translators: %1$s is an URL, %2$s is the HTTP response code, %3$s = opening link tag, %4$s = closing link tag.
$errors['errors'][] = sprintf( __( 'Sitemap preload encountered an error. Could not gather links on %1$s because it returned the following response code: %2$s. %3$sLearn more%4$s.', 'rocket' ), $sitemap_url, $response_code, '', '' );
break;
}
$this->sitemap_error = true;
set_transient( 'rocket_preload_errors', $errors );
return $urls;
}
$xml_data = wp_remote_retrieve_body( $sitemap );
if ( empty( $xml_data ) ) {
// Translators: %1$s is a XML sitemap URL, %2$s = opening link tag, %3$s = closing link tag.
$errors['errors'][] = sprintf( __( 'Sitemap preload encountered an error. Could not collect links from %1$s because the file is empty. %2$sLearn more%3$s.', 'rocket' ), $sitemap_url, '', '' );
$this->sitemap_error = true;
set_transient( 'rocket_preload_errors', $errors );
return $urls;
}
if ( ! function_exists( 'simplexml_load_string' ) ) {
$this->sitemap_error = true;
return $urls;
}
libxml_use_internal_errors( true );
$xml = simplexml_load_string( $xml_data );
if ( false === $xml ) {
$errors['errors'][] = sprintf(
// Translators: %1$s is a XML sitemap URL, %2$s = opening link tag, %3$s = closing link tag.
__( 'Sitemap preload encountered an error. Could not collect links from %1$s because of an error during the XML sitemap parsing. %2$sLearn more%3$s.', 'rocket' ),
$sitemap_url,
'',
''
);
$this->sitemap_error = true;
set_transient( 'rocket_preload_errors', $errors );
return $urls;
}
$url_count = count( $xml->url );
$sitemap_children = count( $xml->sitemap );
if ( $url_count > 0 ) {
$mobile_preload = $this->preload_process->is_mobile_preload_enabled();
for ( $i = 0; $i < $url_count; $i++ ) {
$url = (string) $xml->url[ $i ]->loc;
if ( ! $url ) {
continue;
}
$namespaces = $xml->url[ $i ]->getNamespaces( true );
$path = $this->get_url_identifier( $url );
$mobile_key = $path . self::MOBILE_SUFFIX;
if ( ! empty( $namespaces['mobile'] ) ) {
// According to the sitemap, this URL is dedicated to mobile devices.
if ( isset( $urls[ $mobile_key ] ) ) {
continue;
}
$urls[ $mobile_key ] = [
'url' => $url,
'mobile' => true,
'source' => self::PRELOAD_ID,
];
} else {
if ( ! isset( $urls[ $path ] ) ) {
$urls[ $path ] = [
'url' => $url,
'mobile' => false,
'source' => self::PRELOAD_ID,
];
}
if ( $mobile_preload && ! isset( $urls[ $mobile_key ] ) ) {
$urls[ $mobile_key ] = [
'url' => $url,
'mobile' => true,
'source' => self::PRELOAD_ID,
];
}
}
}
return $urls;
}
if ( ! $sitemap_children ) {
return $urls;
}
for ( $i = 0; $i < $sitemap_children; $i++ ) {
$sub_sitemap_url = (string) $xml->sitemap[ $i ]->loc;
$urls = $this->process_sitemap( $sub_sitemap_url, $urls );
}
return $urls;
}
/**
* Get URLs from WordPress.
*
* Used as a fallback when extracting URLs from sitemap fails.
*
* @since 3.3
* @since 3.5 New $urls argument.
* @since 3.5 Now private.
* @author Arun Basil Lal
*
* @link https://github.com/wp-media/wp-rocket/issues/1306
*
* @param array $urls An array of arrays.
* @return array {
* Array values are arrays described as follow.
* Array keys are an identifier based on the URL path.
*
* @type string $url The URL to preload.
* @type bool $mobile True when we want to send a "mobile" user agent with the request. Optional.
* @type string $source An identifier related to the source of the preload (e.g. RELOAD_ID).
* }
*/
private function get_urls( array $urls = [] ) {
// Get public post types.
$post_types = get_post_types( [ 'public' => true ] );
$post_types = array_filter( $post_types, 'is_post_type_viewable' );
/**
* Filters the arguments for get_posts.
*
* @since 3.3
* @author Arun Basil Lal
*
* @param array $args Arguments for get_posts.
*/
$args = apply_filters(
'rocket_preload_sitemap_fallback_request_args',
[
'fields' => 'ids',
'numberposts' => 1000, // phpcs:ignore WordPress.WP.PostsPerPage.posts_per_page_numberposts
'posts_per_page' => -1,
'post_type' => $post_types,
]
);
$all_posts = get_posts( $args );
$mobile_preload = $this->preload_process->is_mobile_preload_enabled();
foreach ( $all_posts as $post ) {
$permalink = get_permalink( $post );
if ( false === $permalink ) {
continue;
}
$path = $this->get_url_identifier( $permalink );
$urls[ $path ] = [
'url' => $permalink,
'mobile' => false,
'source' => self::PRELOAD_ID,
];
if ( ! $mobile_preload ) {
continue;
}
$urls[ $path . self::MOBILE_SUFFIX ] = [
'url' => $permalink,
'mobile' => true,
'source' => self::PRELOAD_ID,
];
}
return $urls;
}
}