e"; if ( $siblingsUserAgents ) { foreach ( $siblingsUserAgents as $siblingUserAgent ) { $rules[ $siblingUserAgent ] = $rules[ $userAgent ]; } $siblingsUserAgents = []; } } $prevDirective = $directive; $prevValue = $value; } return $rules; } /** * Extract sitemap URLs from a string. * * @since 4.0.10 * * @param string $lines The lines to extract from. * @return array An array of sitemap URLs. */ public function extractSitemapUrls( $lines ) { $lines = array_filter( array_map( 'trim', explode( "\n", (string) $lines ) ) ); $sitemapUrls = []; foreach ( $lines as $line ) { $array = array_map( 'trim', explode( 'sitemap:', strtolower( $line ) ) ); if ( ! empty( $array[1] ) ) { $sitemapUrls[] = trim( $line ); } } return $sitemapUrls; } /** * Sanitize the robots.txt rule directive value. * * @since 4.0.0 * @version 4.4.2 * * @param string $directive The directive. * @param string $value The value. * @return string The directive value. */ private function sanitizeDirectiveValue( $directive, $value ) { // Percent-encoded characters are stripped from our option values, so we decode. $value = rawurldecode( trim( $value ) ); if ( ! $value ) { return $value; } $value = preg_replace( '/[><]/', '', $value ); if ( 'user-agent' === $directive ) { $value = preg_replace( '/[^a-z0-9\-_*,.\s]/i', '', $value ); } if ( 'allow' === $directive || 'disallow' === $directive ) { $value = preg_replace( '/^\/+/', '/', $value ); } return $value; } /** * Check if a physical robots.txt file exists, and if it does add a notice. * * @since 4.0.0 * * @return void */ public function checkForPhysicalFiles() { if ( ! $this->hasPhysicalRobotsTxt() ) { return; } $notification = Models\Notification::getNotificationByName( 'robots-physical-file' ); if ( $notification->exists() ) { return; } Models\Notification::addNotification( [ 'slug' => uniqid(), 'notification_name' => 'robots-physical-file', 'title' => __( 'Physical Robots.txt File Detected', 'all-in-one-seo-pack' ), 'content' => sprintf( // Translators: 1 - The plugin short name ("AIOSEO"), 2 - The plugin short name ("AIOSEO"). __( '%1$s has detected a physical robots.txt file in the root folder of your WordPress installation. We recommend removing this file as it could cause conflicts with WordPress\' dynamically generated one. %2$s can import this file and delete it, or you can simply delete it.', 'all-in-one-seo-pack' ), // phpcs:ignore Generic.Files.LineLength.MaxExceeded AIOSEO_PLUGIN_SHORT_NAME, AIOSEO_PLUGIN_SHORT_NAME ), 'type' => 'error', 'level' => [ 'all' ], 'button1_label' => __( 'Import and Delete', 'all-in-one-seo-pack' ), 'button1_action' => 'http://action#tools/import-robots-txt?redirect=aioseo-tools:robots-editor', 'button2_label' => __( 'Delete', 'all-in-one-seo-pack' ), 'button2_action' => 'http://action#tools/delete-robots-txt?redirect=aioseo-tools:robots-editor', 'start' => gmdate( 'Y-m-d H:i:s' ) ] ); } /** * Import physical robots.txt file. * * @since 4.0.0 * @version 4.4.2 * * @param bool $network True if inside WordPress network administration pages. * @throws \Exception If request fails or file is not readable. * @return boolean Whether or not the file imported correctly. */ public function importPhysicalRobotsTxt( $network = false ) { try { $fs = aioseo()->core->fs; if ( ! $fs->isWpfsValid() ) { $invalid = true; } $file = trailingslashit( $fs->fs->abspath() ) . 'robots.txt'; if ( isset( $invalid ) || ! $fs->isReadable( $file ) ) { throw new \Exception( esc_html__( 'There was an error importing the static robots.txt file.', 'all-in-one-seo-pack' ) ); } $lines = trim( (string) $fs->getContents( $file ) ); if ( $lines ) { $this->importRobotsTxtFromText( $lines, $network ); } return true; } catch ( \Exception $e ) { throw new \Exception( esc_html( $e->getMessage() ) ); } } /** * Import robots.txt from a URL. * * @since 4.4.2 * * @param string $text The text to import from. * @param bool $network True if inside WordPress network administration pages. * @throws \Exception If no User-agent is found. * @return boolean Whether the file imported correctly or not. */ public function importRobotsTxtFromText( $text, $network ) { $ruleset = $this->extractRules( $text ); if ( ! key( $ruleset ) ) { throw new \Exception( esc_html__( 'No User-agent found in the content beginning.', 'all-in-one-seo-pack' ) ); } $options = aioseo()->options; if ( $network ) { $options = aioseo()->networkOptions; } $currentRules = $this->groupRulesByUserAgent( $options->tools->robots->rules ); $ruleset = $this->mergeRules( $currentRules, $ruleset, false, true ); $options->tools->robots->rules = aioseo()->robotsTxt->prepareRobotsTxt( $ruleset ); return true; } /** * Import robots.txt from a URL. * * @since 4.4.2 * * @param string $url The URL to import from. * @param bool $network True if inside WordPress network administration pages. * @throws \Exception If request fails. * @return bool Whether the import was successful or not. */ public function importRobotsTxtFromUrl( $url, $network ) { $request = wp_remote_get( $url, [ 'timeout' => 10, 'sslverify' => false ] ); $robotsTxtContent = wp_remote_retrieve_body( $request ); if ( ! $robotsTxtContent ) { throw new \Exception( esc_html__( 'There was an error importing the robots.txt content from the URL.', 'all-in-one-seo-pack' ) ); } $options = aioseo()->options; if ( $network ) { $options = aioseo()->networkOptions; } $newRules = $this->extractRules( $robotsTxtContent ); $currentRules = $this->groupRulesByUserAgent( $options->tools->robots->rules ); $newRules = $this->mergeRules( $currentRules, $newRules, false, true ); $options->tools->robots->rules = aioseo()->robotsTxt->prepareRobotsTxt( $newRules ); return true; } /** * Deletes the physical robots.txt file. * * @since 4.4.5 * * @throws \Exception If the file is not readable, or it can't be deleted. * @return true True if the file was successfully deleted. */ public function deletePhysicalRobotsTxt() { try { $fs = aioseo()->core->fs; if ( ! $fs->isWpfsValid() || ! $fs->fs->delete( trailingslashit( $fs->fs->abspath() ) . 'robots.txt' ) ) { throw new \Exception( __( 'There was an error deleting the physical robots.txt file.', 'all-in-one-seo-pack' ) ); } Models\Notification::deleteNotificationByName( 'robots-physical-file' ); return true; } catch ( \Exception $e ) { throw new \Exception( esc_html( $e->getMessage() ) ); } } /** * Prepare robots.txt rules to save. * * @since 4.1.4 * * @param array $allRules Array with the rules. * @return array The prepared rules array. */ public function prepareRobotsTxt( $allRules = [] ) { $robots = []; foreach ( $allRules as $userAgent => $rules ) { if ( empty( $userAgent ) ) { continue; } foreach ( $rules as $rule ) { list( $directive, $value ) = $this->parseRule( $rule ); if ( empty( $directive ) || empty( $value ) ) { continue; } if ( '*' === $userAgent && ( 'allow' === $directive && '/wp-admin/admin-ajax.php' === $value || 'disallow' === $directive && '/wp-admin/' === $value ) ) { continue; } $robots[] = wp_json_encode( [ 'userAgent' => $userAgent, 'directive' => $directive, 'fieldValue' => $value ] ); } } return $robots; } /** * Checks if a physical robots.txt file exists. * * @since 4.0.0 * * @return boolean True if it does, false if not. */ public function hasPhysicalRobotsTxt() { $fs = aioseo()->core->fs; if ( ! $fs->isWpfsValid() ) { return false; } $accessType = get_filesystem_method(); if ( 'direct' === $accessType ) { $file = trailingslashit( $fs->fs->abspath() ) . 'robots.txt'; return $fs->exists( $file ); } return false; } /** * Get the default Robots.txt lines (excluding our own). * * @since 4.1.7 * @version 4.4.2 * * @return string The robots.txt content rules (excluding our own). */ public function getDefaultRobotsTxtContent() { // First, we need to remove our filter, so that it doesn't run unintentionally. remove_filter( 'robots_txt', [ $this, 'buildRules' ], 10000 ); ob_start(); do_robots(); if ( is_admin() ) { header( 'Content-Type: text/html; charset=utf-8' ); } $rules = strval( ob_get_clean() ); // Add the filter back. add_filter( 'robots_txt', [ $this, 'buildRules' ], 10000 ); return $rules; } /** * A check to see if the rewrite rules are set. * This isn't perfect, but it will help us know in most cases. * * @since 4.0.0 * * @return boolean Whether the rewrite rules are set or not. */ public function rewriteRulesExist() { // If we have a physical file, it's almost impossible to tell if the rewrite rules are set. // The only scenario is if we still get a 404. $response = wp_remote_get( aioseo()->helpers->getSiteUrl() . '/robots.txt' ); if ( 299 < wp_remote_retrieve_response_code( $response ) ) { return false; } return true; } }