WP_Importer
云策文档标注
概述
WP_Importer 是 WordPress 中用于数据导入的基类,提供了一系列方法以支持从外部源导入内容到 WordPress 数据库。该类主要处理文章、评论的导入管理,以及相关的辅助功能如超时调整和磁盘配额检查。
关键要点
- WP_Importer 类包含多个核心方法,如 get_imported_posts、count_imported_posts 和 get_imported_comments,用于从数据库检索已导入的数据。
- 辅助方法包括 bump_request_timeout 增加 HTTP 请求超时、is_user_over_quota 检查用户磁盘配额、stop_the_insanity 重置全局变量以防止内存溢出。
- 类还提供 set_blog 和 set_user 方法用于设置博客和用户上下文,支持多站点环境。
- 方法如 cmpr_strlen 和 min_whitespace 提供字符串处理功能,优化导入过程中的数据清理。
代码示例
public function get_imported_posts( $importer_name, $blog_id ) {
global $wpdb;
$hashtable = array();
$limit = 100;
$offset = 0;
// Grab all posts in chunks.
do {
$meta_key = $importer_name . '_' . $blog_id . '_permalink';
$results = $wpdb->get_results(
$wpdb->prepare(
"SELECT post_id, meta_value FROM $wpdb->postmeta WHERE meta_key = %s LIMIT %d,%d",
$meta_key,
$offset,
$limit
)
);
// Increment offset.
$offset = ( $limit + $offset );
if ( ! empty( $results ) ) {
foreach ( $results as $r ) {
// Set permalinks into array.
$hashtable[ $r->meta_value ] = (int) $r->post_id;
}
}
} while ( count( $results ) === $limit );
return $hashtable;
}注意事项
- 在使用 set_blog 和 set_user 方法时,需确保参数有效,否则可能触发错误并退出脚本。
- stop_the_insanity 方法重置 $wpdb->queries 和 $wp_actions 全局变量,适用于大规模导入场景以避免内存问题。
- get_page 方法支持 HTTP 请求,可设置认证头并调整超时,需注意安全参数处理。
原文内容
WP_Importer base class
Methods
| Name | Description |
|---|---|
| WP_Importer::__construct | Class Constructor |
| WP_Importer::bump_request_timeout | Bumps up the request timeout for http requests. |
| WP_Importer::cmpr_strlen | Sorts by strlen, longest string first. |
| WP_Importer::count_imported_posts | Returns count of imported permalinks from WordPress database. |
| WP_Importer::get_imported_comments | Sets array with imported comments from WordPress database. |
| WP_Importer::get_imported_posts | Returns array with imported permalinks from WordPress database. |
| WP_Importer::get_page | Gets URL. |
| WP_Importer::is_user_over_quota | Checks if user has exceeded disk quota. |
| WP_Importer::min_whitespace | Replaces newlines, tabs, and multiple spaces with a single space. |
| WP_Importer::set_blog | – |
| WP_Importer::set_user | – |
| WP_Importer::stop_the_insanity | Resets global variables that grow out of control during imports. |
Source
class WP_Importer {
/**
* Class Constructor
*/
public function __construct() {}
/**
* Returns array with imported permalinks from WordPress database.
*
* @global wpdb $wpdb WordPress database abstraction object.
*
* @param string $importer_name
* @param string $blog_id
* @return array
*/
public function get_imported_posts( $importer_name, $blog_id ) {
global $wpdb;
$hashtable = array();
$limit = 100;
$offset = 0;
// Grab all posts in chunks.
do {
$meta_key = $importer_name . '_' . $blog_id . '_permalink';
$results = $wpdb->get_results(
$wpdb->prepare(
"SELECT post_id, meta_value FROM $wpdb->postmeta WHERE meta_key = %s LIMIT %d,%d",
$meta_key,
$offset,
$limit
)
);
// Increment offset.
$offset = ( $limit + $offset );
if ( ! empty( $results ) ) {
foreach ( $results as $r ) {
// Set permalinks into array.
$hashtable[ $r->meta_value ] = (int) $r->post_id;
}
}
} while ( count( $results ) === $limit );
return $hashtable;
}
/**
* Returns count of imported permalinks from WordPress database.
*
* @global wpdb $wpdb WordPress database abstraction object.
*
* @param string $importer_name
* @param string $blog_id
* @return int
*/
public function count_imported_posts( $importer_name, $blog_id ) {
global $wpdb;
$count = 0;
// Get count of permalinks.
$meta_key = $importer_name . '_' . $blog_id . '_permalink';
$result = $wpdb->get_results(
$wpdb->prepare(
"SELECT COUNT( post_id ) AS cnt FROM $wpdb->postmeta WHERE meta_key = %s",
$meta_key
)
);
if ( ! empty( $result ) ) {
$count = (int) $result[0]->cnt;
}
return $count;
}
/**
* Sets array with imported comments from WordPress database.
*
* @global wpdb $wpdb WordPress database abstraction object.
*
* @param string $blog_id
* @return array
*/
public function get_imported_comments( $blog_id ) {
global $wpdb;
$hashtable = array();
$limit = 100;
$offset = 0;
// Grab all comments in chunks.
do {
$results = $wpdb->get_results(
$wpdb->prepare(
"SELECT comment_ID, comment_agent FROM $wpdb->comments LIMIT %d,%d",
$offset,
$limit
)
);
// Increment offset.
$offset = ( $limit + $offset );
if ( ! empty( $results ) ) {
foreach ( $results as $r ) {
// Explode comment_agent key.
list ( $comment_agent_blog_id, $source_comment_id ) = explode( '-', $r->comment_agent );
$source_comment_id = (int) $source_comment_id;
// Check if this comment came from this blog.
if ( (int) $blog_id === (int) $comment_agent_blog_id ) {
$hashtable[ $source_comment_id ] = (int) $r->comment_ID;
}
}
}
} while ( count( $results ) === $limit );
return $hashtable;
}
/**
* @param int $blog_id
* @return int|void
*/
public function set_blog( $blog_id ) {
if ( is_numeric( $blog_id ) ) {
$blog_id = (int) $blog_id;
} else {
$blog = 'http://' . preg_replace( '#^https?://#', '', $blog_id );
$parsed = parse_url( $blog );
if ( ! $parsed || empty( $parsed['host'] ) ) {
fwrite( STDERR, "Error: can not determine blog_id from $blog_idn" );
exit;
}
if ( empty( $parsed['path'] ) ) {
$parsed['path'] = '/';
}
$blogs = get_sites(
array(
'domain' => $parsed['host'],
'number' => 1,
'path' => $parsed['path'],
)
);
if ( ! $blogs ) {
fwrite( STDERR, "Error: Could not find blogn" );
exit;
}
$blog = array_shift( $blogs );
$blog_id = (int) $blog->blog_id;
}
if ( function_exists( 'is_multisite' ) ) {
if ( is_multisite() ) {
switch_to_blog( $blog_id );
}
}
return $blog_id;
}
/**
* @param int $user_id
* @return int|void
*/
public function set_user( $user_id ) {
if ( is_numeric( $user_id ) ) {
$user_id = (int) $user_id;
} else {
$user_id = (int) username_exists( $user_id );
}
if ( ! $user_id || ! wp_set_current_user( $user_id ) ) {
fwrite( STDERR, "Error: can not find usern" );
exit;
}
return $user_id;
}
/**
* Sorts by strlen, longest string first.
*
* @param string $a
* @param string $b
* @return int
*/
public function cmpr_strlen( $a, $b ) {
return strlen( $b ) - strlen( $a );
}
/**
* Gets URL.
*
* @param string $url
* @param string $username
* @param string $password
* @param bool $head
* @return array
*/
public function get_page(
$url,
$username = '',
#[SensitiveParameter]
$password = '',
$head = false
) {
// Increase the timeout.
add_filter( 'http_request_timeout', array( $this, 'bump_request_timeout' ) );
$headers = array();
$args = array();
if ( true === $head ) {
$args['method'] = 'HEAD';
}
if ( ! empty( $username ) && ! empty( $password ) ) {
$headers['Authorization'] = 'Basic ' . base64_encode( "$username:$password" );
}
$args['headers'] = $headers;
return wp_safe_remote_request( $url, $args );
}
/**
* Bumps up the request timeout for http requests.
*
* @param int $val
* @return int
*/
public function bump_request_timeout( $val ) {
return 60;
}
/**
* Checks if user has exceeded disk quota.
*
* @return bool
*/
public function is_user_over_quota() {
if ( function_exists( 'upload_is_user_over_quota' ) ) {
if ( upload_is_user_over_quota() ) {
return true;
}
}
return false;
}
/**
* Replaces newlines, tabs, and multiple spaces with a single space.
*
* @param string $text
* @return string
*/
public function min_whitespace( $text ) {
return preg_replace( '|[rnt ]+|', ' ', $text );
}
/**
* Resets global variables that grow out of control during imports.
*
* @since 3.0.0
*
* @global wpdb $wpdb WordPress database abstraction object.
* @global int[] $wp_actions
*/
public function stop_the_insanity() {
global $wpdb, $wp_actions;
// Or define( 'WP_IMPORTING', true );
$wpdb->queries = array();
// Reset $wp_actions to keep it from growing out of control.
$wp_actions = array();
}
}