类文档

WP_Importer

💡 云策文档标注

概述

WP_Importer 是 WordPress 中用于数据导入的基类,提供了一系列方法以支持从外部源导入内容到 WordPress 数据库。该类主要处理文章、评论的导入管理,以及相关的辅助功能如超时调整和磁盘配额检查。

关键要点

  • WP_Importer 类包含多个核心方法,如 get_imported_posts、count_imported_posts 和 get_imported_comments,用于从数据库检索已导入的数据。
  • 辅助方法包括 bump_request_timeout 增加 HTTP 请求超时、is_user_over_quota 检查用户磁盘配额、stop_the_insanity 重置全局变量以防止内存溢出。
  • 类还提供 set_blog 和 set_user 方法用于设置博客和用户上下文,支持多站点环境。
  • 方法如 cmpr_strlen 和 min_whitespace 提供字符串处理功能,优化导入过程中的数据清理。

代码示例

public function get_imported_posts( $importer_name, $blog_id ) {
    global $wpdb;

    $hashtable = array();

    $limit  = 100;
    $offset = 0;

    // Grab all posts in chunks.
    do {
        $meta_key = $importer_name . '_' . $blog_id . '_permalink';
        $results  = $wpdb->get_results(
            $wpdb->prepare(
                "SELECT post_id, meta_value FROM $wpdb->postmeta WHERE meta_key = %s LIMIT %d,%d",
                $meta_key,
                $offset,
                $limit
            )
        );

        // Increment offset.
        $offset = ( $limit + $offset );

        if ( ! empty( $results ) ) {
            foreach ( $results as $r ) {
                // Set permalinks into array.
                $hashtable[ $r->meta_value ] = (int) $r->post_id;
            }
        }
    } while ( count( $results ) === $limit );

    return $hashtable;
}

注意事项

  • 在使用 set_blog 和 set_user 方法时,需确保参数有效,否则可能触发错误并退出脚本。
  • stop_the_insanity 方法重置 $wpdb->queries 和 $wp_actions 全局变量,适用于大规模导入场景以避免内存问题。
  • get_page 方法支持 HTTP 请求,可设置认证头并调整超时,需注意安全参数处理。

📄 原文内容

WP_Importer base class

Methods

Name Description
WP_Importer::__construct Class Constructor
WP_Importer::bump_request_timeout Bumps up the request timeout for http requests.
WP_Importer::cmpr_strlen Sorts by strlen, longest string first.
WP_Importer::count_imported_posts Returns count of imported permalinks from WordPress database.
WP_Importer::get_imported_comments Sets array with imported comments from WordPress database.
WP_Importer::get_imported_posts Returns array with imported permalinks from WordPress database.
WP_Importer::get_page Gets URL.
WP_Importer::is_user_over_quota Checks if user has exceeded disk quota.
WP_Importer::min_whitespace Replaces newlines, tabs, and multiple spaces with a single space.
WP_Importer::set_blog
WP_Importer::set_user
WP_Importer::stop_the_insanity Resets global variables that grow out of control during imports.

Source

class WP_Importer {
	/**
	 * Class Constructor
	 */
	public function __construct() {}

	/**
	 * Returns array with imported permalinks from WordPress database.
	 *
	 * @global wpdb $wpdb WordPress database abstraction object.
	 *
	 * @param string $importer_name
	 * @param string $blog_id
	 * @return array
	 */
	public function get_imported_posts( $importer_name, $blog_id ) {
		global $wpdb;

		$hashtable = array();

		$limit  = 100;
		$offset = 0;

		// Grab all posts in chunks.
		do {
			$meta_key = $importer_name . '_' . $blog_id . '_permalink';
			$results  = $wpdb->get_results(
				$wpdb->prepare(
					"SELECT post_id, meta_value FROM $wpdb->postmeta WHERE meta_key = %s LIMIT %d,%d",
					$meta_key,
					$offset,
					$limit
				)
			);

			// Increment offset.
			$offset = ( $limit + $offset );

			if ( ! empty( $results ) ) {
				foreach ( $results as $r ) {
					// Set permalinks into array.
					$hashtable[ $r->meta_value ] = (int) $r->post_id;
				}
			}
		} while ( count( $results ) === $limit );

		return $hashtable;
	}

	/**
	 * Returns count of imported permalinks from WordPress database.
	 *
	 * @global wpdb $wpdb WordPress database abstraction object.
	 *
	 * @param string $importer_name
	 * @param string $blog_id
	 * @return int
	 */
	public function count_imported_posts( $importer_name, $blog_id ) {
		global $wpdb;

		$count = 0;

		// Get count of permalinks.
		$meta_key = $importer_name . '_' . $blog_id . '_permalink';
		$result   = $wpdb->get_results(
			$wpdb->prepare(
				"SELECT COUNT( post_id ) AS cnt FROM $wpdb->postmeta WHERE meta_key = %s",
				$meta_key
			)
		);

		if ( ! empty( $result ) ) {
			$count = (int) $result[0]->cnt;
		}

		return $count;
	}

	/**
	 * Sets array with imported comments from WordPress database.
	 *
	 * @global wpdb $wpdb WordPress database abstraction object.
	 *
	 * @param string $blog_id
	 * @return array
	 */
	public function get_imported_comments( $blog_id ) {
		global $wpdb;

		$hashtable = array();

		$limit  = 100;
		$offset = 0;

		// Grab all comments in chunks.
		do {
			$results = $wpdb->get_results(
				$wpdb->prepare(
					"SELECT comment_ID, comment_agent FROM $wpdb->comments LIMIT %d,%d",
					$offset,
					$limit
				)
			);

			// Increment offset.
			$offset = ( $limit + $offset );

			if ( ! empty( $results ) ) {
				foreach ( $results as $r ) {
					// Explode comment_agent key.
					list ( $comment_agent_blog_id, $source_comment_id ) = explode( '-', $r->comment_agent );

					$source_comment_id = (int) $source_comment_id;

					// Check if this comment came from this blog.
					if ( (int) $blog_id === (int) $comment_agent_blog_id ) {
						$hashtable[ $source_comment_id ] = (int) $r->comment_ID;
					}
				}
			}
		} while ( count( $results ) === $limit );

		return $hashtable;
	}

	/**
	 * @param int $blog_id
	 * @return int|void
	 */
	public function set_blog( $blog_id ) {
		if ( is_numeric( $blog_id ) ) {
			$blog_id = (int) $blog_id;
		} else {
			$blog   = 'http://' . preg_replace( '#^https?://#', '', $blog_id );
			$parsed = parse_url( $blog );
			if ( ! $parsed || empty( $parsed['host'] ) ) {
				fwrite( STDERR, "Error: can not determine blog_id from $blog_idn" );
				exit;
			}
			if ( empty( $parsed['path'] ) ) {
				$parsed['path'] = '/';
			}
			$blogs = get_sites(
				array(
					'domain' => $parsed['host'],
					'number' => 1,
					'path'   => $parsed['path'],
				)
			);
			if ( ! $blogs ) {
				fwrite( STDERR, "Error: Could not find blogn" );
				exit;
			}
			$blog    = array_shift( $blogs );
			$blog_id = (int) $blog->blog_id;
		}

		if ( function_exists( 'is_multisite' ) ) {
			if ( is_multisite() ) {
				switch_to_blog( $blog_id );
			}
		}

		return $blog_id;
	}

	/**
	 * @param int $user_id
	 * @return int|void
	 */
	public function set_user( $user_id ) {
		if ( is_numeric( $user_id ) ) {
			$user_id = (int) $user_id;
		} else {
			$user_id = (int) username_exists( $user_id );
		}

		if ( ! $user_id || ! wp_set_current_user( $user_id ) ) {
			fwrite( STDERR, "Error: can not find usern" );
			exit;
		}

		return $user_id;
	}

	/**
	 * Sorts by strlen, longest string first.
	 *
	 * @param string $a
	 * @param string $b
	 * @return int
	 */
	public function cmpr_strlen( $a, $b ) {
		return strlen( $b ) - strlen( $a );
	}

	/**
	 * Gets URL.
	 *
	 * @param string $url
	 * @param string $username
	 * @param string $password
	 * @param bool   $head
	 * @return array
	 */
	public function get_page(
		$url,
		$username = '',
		#[SensitiveParameter]
		$password = '',
		$head = false
	) {
		// Increase the timeout.
		add_filter( 'http_request_timeout', array( $this, 'bump_request_timeout' ) );

		$headers = array();
		$args    = array();
		if ( true === $head ) {
			$args['method'] = 'HEAD';
		}
		if ( ! empty( $username ) && ! empty( $password ) ) {
			$headers['Authorization'] = 'Basic ' . base64_encode( "$username:$password" );
		}

		$args['headers'] = $headers;

		return wp_safe_remote_request( $url, $args );
	}

	/**
	 * Bumps up the request timeout for http requests.
	 *
	 * @param int $val
	 * @return int
	 */
	public function bump_request_timeout( $val ) {
		return 60;
	}

	/**
	 * Checks if user has exceeded disk quota.
	 *
	 * @return bool
	 */
	public function is_user_over_quota() {
		if ( function_exists( 'upload_is_user_over_quota' ) ) {
			if ( upload_is_user_over_quota() ) {
				return true;
			}
		}

		return false;
	}

	/**
	 * Replaces newlines, tabs, and multiple spaces with a single space.
	 *
	 * @param string $text
	 * @return string
	 */
	public function min_whitespace( $text ) {
		return preg_replace( '|[rnt ]+|', ' ', $text );
	}

	/**
	 * Resets global variables that grow out of control during imports.
	 *
	 * @since 3.0.0
	 *
	 * @global wpdb  $wpdb       WordPress database abstraction object.
	 * @global int[] $wp_actions
	 */
	public function stop_the_insanity() {
		global $wpdb, $wp_actions;
		// Or define( 'WP_IMPORTING', true );
		$wpdb->queries = array();
		// Reset $wp_actions to keep it from growing out of control.
		$wp_actions = array();
	}
}