root/trunk/wp-admin/import/wordpress.php

Revision 7965, 23.5 kB (checked in by ryan, 2 months ago)

Export and import post excerpts. Props kartik_subbarao. fixes #5555

  • Property svn:eol-style set to native
Line 
1 <?php
2
3 class WP_Import {
4
5     var $post_ids_processed = array ();
6     var $orphans = array ();
7     var $file;
8     var $id;
9     var $mtnames = array ();
10     var $newauthornames = array ();
11     var $allauthornames = array ();
12
13     var $author_ids = array ();
14     var $tags = array ();
15     var $categories = array ();
16
17     var $j = -1;
18     var $fetch_attachments = false;
19     var $url_remap = array ();
20
21     function header() {
22         echo '<div class="wrap">';
23         echo '<h2>'.__('Import WordPress').'</h2>';
24     }
25
26     function footer() {
27         echo '</div>';
28     }
29
30     function unhtmlentities($string) { // From php.net for < 4.3 compat
31         $trans_tbl = get_html_translation_table(HTML_ENTITIES);
32         $trans_tbl = array_flip($trans_tbl);
33         return strtr($string, $trans_tbl);
34     }
35
36     function greet() {
37         echo '<div class="narrow">';
38         echo '<p>'.__('Howdy! Upload your WordPress eXtended RSS (WXR) file and we&#8217;ll import the posts, comments, custom fields, and categories into this blog.').'</p>';
39         echo '<p>'.__('Choose a WordPress WXR file to upload, then click Upload file and import.').'</p>';
40         wp_import_upload_form("admin.php?import=wordpress&amp;step=1");
41         echo '</div>';
42     }
43
44     function get_tag( $string, $tag ) {
45         global $wpdb;
46         preg_match("|<$tag.*?>(.*?)</$tag>|is", $string, $return);
47         $return = preg_replace('|^<!\[CDATA\[(.*)\]\]>$|s', '$1', $return[1]);
48         $return = $wpdb->escape( trim( $return ) );
49         return $return;
50     }
51
52     function has_gzip() {
53         return is_callable('gzopen');
54     }
55
56     function fopen($filename, $mode='r') {
57         if ( $this->has_gzip() )
58             return gzopen($filename, $mode);
59         return fopen($filename, $mode);
60     }
61
62     function feof($fp) {
63         if ( $this->has_gzip() )
64             return gzeof($fp);
65         return feof($fp);
66     }
67
68     function fgets($fp, $len=8192) {
69         if ( $this->has_gzip() )
70             return gzgets($fp, $len);
71         return fgets($fp, $len);
72     }
73
74     function fclose($fp) {
75         if ( $this->has_gzip() )
76             return gzclose($fp);
77         return fclose($fp);
78     }
79
80     function get_entries($process_post_func=NULL) {
81         set_magic_quotes_runtime(0);
82
83         $doing_entry = false;
84         $is_wxr_file = false;
85
86         $fp = $this->fopen($this->file, 'r');
87         if ($fp) {
88             while ( !$this->feof($fp) ) {
89                 $importline = rtrim($this->fgets($fp));
90
91                 // this doesn't check that the file is perfectly valid but will at least confirm that it's not the wrong format altogether
92                 if ( !$is_wxr_file && preg_match('|xmlns:wp="http://wordpress[.]org/export/\d+[.]\d+/"|', $importline) )
93                     $is_wxr_file = true;
94
95                 if ( false !== strpos($importline, '<wp:category>') ) {
96                     preg_match('|<wp:category>(.*?)</wp:category>|is', $importline, $category);
97                     $this->categories[] = $category[1];
98                     continue;
99                 }
100                 if ( false !== strpos($importline, '<wp:tag>') ) {
101                     preg_match('|<wp:tag>(.*?)</wp:tag>|is', $importline, $tag);
102                     $this->tags[] = $tag[1];
103                     continue;
104                 }
105                 if ( false !== strpos($importline, '<item>') ) {
106                     $this->post = '';
107                     $doing_entry = true;
108                     continue;
109                 }
110                 if ( false !== strpos($importline, '</item>') ) {
111                     $doing_entry = false;
112                     if ($process_post_func)
113                         call_user_func($process_post_func, $this->post);
114                     continue;
115                 }
116                 if ( $doing_entry ) {
117                     $this->post .= $importline . "\n";
118                 }
119             }
120
121             $this->fclose($fp);
122         }
123
124         return $is_wxr_file;
125
126     }
127
128     function get_wp_authors() {
129         // We need to find unique values of author names, while preserving the order, so this function emulates the unique_value(); php function, without the sorting.
130         $temp = $this->allauthornames;
131         $authors[0] = array_shift($temp);
132         $y = count($temp) + 1;
133         for ($x = 1; $x < $y; $x ++) {
134             $next = array_shift($temp);
135             if (!(in_array($next, $authors)))
136                 array_push($authors, "$next");
137         }
138
139         return $authors;
140     }
141
142     function get_authors_from_post() {
143         global $current_user;
144
145         // this will populate $this->author_ids with a list of author_names => user_ids
146
147         foreach ( $_POST['author_in'] as $i => $in_author_name ) {
148
149             if ( !empty($_POST['user_select'][$i]) ) {
150                 // an existing user was selected in the dropdown list
151                 $user = get_userdata( intval($_POST['user_select'][$i]) );
152                 if ( isset($user->ID) )
153                     $this->author_ids[$in_author_name] = $user->ID;
154             }
155             elseif ( $this->allow_create_users() ) {
156                 // nothing was selected in the dropdown list, so we'll use the name in the text field
157
158                 $new_author_name = trim($_POST['user_create'][$i]);
159                 // if the user didn't enter a name, assume they want to use the same name as in the import file
160                 if ( empty($new_author_name) )
161                     $new_author_name = $in_author_name;
162
163                 $user_id = username_exists($new_author_name);
164                 if ( !$user_id ) {
165                     $user_id = wp_create_user($new_author_name, wp_generate_password());
166                 }
167
168                 $this->author_ids[$in_author_name] = $user_id;
169             }
170
171             // failsafe: if the user_id was invalid, default to the current user
172             if ( empty($this->author_ids[$in_author_name]) ) {
173                 $this->author_ids[$in_author_name] = intval($current_user->ID);
174             }
175         }
176
177     }
178
179     function wp_authors_form() {
180 ?>
181 <h2><?php _e('Assign Authors'); ?></h2>
182 <p><?php _e('To make it easier for you to edit and save the imported posts and drafts, you may want to change the name of the author of the posts. For example, you may want to import all the entries as <code>admin</code>s entries.'); ?></p>
183 <?php
184     if ( $this->allow_create_users() ) {
185         echo '<p>'.__('If a new user is created by WordPress, a password will be randomly generated. Manually change the user\'s details if necessary.')."</p>\n";
186     }
187
188
189         $authors = $this->get_wp_authors();
190         echo '<ol id="authors">';
191         echo '<form action="?import=wordpress&amp;step=2&amp;id=' . $this->id . '" method="post">';
192         wp_nonce_field('import-wordpress');
193         $j = -1;
194         foreach ($authors as $author) {
195             ++ $j;
196             echo '<li>'.__('Import author:').' <strong>'.$author.'</strong><br />';
197             $this->users_form($j, $author);
198             echo '</li>';
199         }
200
201         if ( $this->allow_fetch_attachments() ) {
202 ?>
203 </ol>
204 <h2><?php _e('Import Attachments'); ?></h2>
205 <p>
206     <input type="checkbox" value="1" name="attachments" id="import-attachments" />
207     <label for="import-attachments"><?php _e('Download and import file attachments') ?></label>
208 </p>
209
210 <?php
211         }
212
213         echo '<input type="submit" value="'.attribute_escape( __('Submit') ).'">'.'<br />';
214         echo '</form>';
215
216     }
217
218     function users_form($n, $author) {
219
220         if ( $this->allow_create_users() ) {
221             printf('<label>'.__('Create user %1$s or map to existing'), ' <input type="text" value="'.$author.'" name="'.'user_create['.intval($n).']'.'" maxlength="30"></label> <br />');
222         }
223         else {
224             echo __('Map to existing').'<br />';
225         }
226
227         // keep track of $n => $author name
228         echo '<input type="hidden" name="author_in['.intval($n).']" value="'.htmlspecialchars($author).'" />';
229
230         $users = get_users_of_blog();
231 ?><select name="user_select[<?php echo $n; ?>]">
232     <option value="0"><?php _e('- Select -'); ?></option>
233     <?php
234         foreach ($users as $user) {
235             echo '<option value="'.$user->user_id.'">'.$user->user_login.'</option>';
236         }
237 ?>
238     </select>
239     <?php
240     }
241
242     function select_authors() {
243         $is_wxr_file = $this->get_entries(array(&$this, 'process_author'));
244         if ( $is_wxr_file ) {
245             $this->wp_authors_form();
246         }
247         else {
248             echo '<h2>'.__('Invalid file').'</h2>';
249             echo '<p>'.__('Please upload a valid WXR (WordPress eXtended RSS) export file.').'</p>';
250         }
251     }
252
253     // fetch the user ID for a given author name, respecting the mapping preferences
254     function checkauthor($author) {
255         global $current_user;
256
257         if ( !empty($this->author_ids[$author]) )
258             return $this->author_ids[$author];
259
260         // failsafe: map to the current user
261         return $current_user->ID;
262     }
263
264
265
266     function process_categories() {
267         global $wpdb;
268
269         $cat_names = (array) get_terms('category', 'fields=names');
270
271         while ( $c = array_shift($this->categories) ) {
272             $cat_name = trim($this->get_tag( $c, 'wp:cat_name' ));
273
274             // If the category exists we leave it alone
275             if ( in_array($cat_name, $cat_names) )
276                 continue;
277
278             $category_nicename    = $this->get_tag( $c, 'wp:category_nicename' );
279             $posts_private        = (int) $this->get_tag( $c, 'wp:posts_private' );
280             $links_private        = (int) $this->get_tag( $c, 'wp:links_private' );
281
282             $parent = $this->get_tag( $c, 'wp:category_parent' );
283
284             if ( empty($parent) )
285                 $category_parent = '0';
286             else
287                 $category_parent = category_exists($parent);
288
289             $catarr = compact('category_nicename', 'category_parent', 'posts_private', 'links_private', 'posts_private', 'cat_name');
290
291             $cat_ID = wp_insert_category($catarr);
292         }
293     }
294
295     function process_tags() {
296         global $wpdb;
297
298         $tag_names = (array) get_terms('post_tag', 'fields=names');
299
300         while ( $c = array_shift($this->tags) ) {
301             $tag_name = trim($this->get_tag( $c, 'wp:tag_name' ));
302
303             // If the category exists we leave it alone
304             if ( in_array($tag_name, $tag_names) )
305                 continue;
306
307             $slug = $this->get_tag( $c, 'wp:tag_slug' );
308             $description = $this->get_tag( $c, 'wp:tag_description' );
309
310             $tagarr = compact('slug', 'description');
311
312             $tag_ID = wp_insert_term($tag_name, 'post_tag', $tagarr);
313         }
314     }
315
316     function process_author($post) {
317         $author = $this->get_tag( $post, 'dc:creator' );
318         if ($author)
319             $this->allauthornames[] = $author;
320     }
321
322     function process_posts() {
323         $i = -1;
324         echo '<ol>';
325
326         $this->get_entries(array(&$this, 'process_post'));
327
328         echo '</ol>';
329
330         wp_import_cleanup($this->id);
331         do_action('import_done', 'wordpress');
332
333         echo '<h3>'.sprintf(__('All done.').' <a href="%s">'.__('Have fun!').'</a>', get_option('home')).'</h3>';
334     }
335
336     function process_post($post) {
337         global $wpdb;
338
339         $post_ID = (int) $this->get_tag( $post, 'wp:post_id' );
340           if ( $post_ID && !empty($this->post_ids_processed[$post_ID]) ) // Processed already
341             return 0;
342         
343         set_time_limit( 60 );
344
345         // There are only ever one of these
346         $post_title     = $this->get_tag( $post, 'title' );
347         $post_date      = $this->get_tag( $post, 'wp:post_date' );
348         $post_date_gmt  = $this->get_tag( $post, 'wp:post_date_gmt' );
349         $comment_status = $this->get_tag( $post, 'wp:comment_status' );
350         $ping_status    = $this->get_tag( $post, 'wp:ping_status' );
351         $post_status    = $this->get_tag( $post, 'wp:status' );
352         $post_name      = $this->get_tag( $post, 'wp:post_name' );
353         $post_parent    = $this->get_tag( $post, 'wp:post_parent' );
354         $menu_order     = $this->get_tag( $post, 'wp:menu_order' );
355         $post_type      = $this->get_tag( $post, 'wp:post_type' );
356         $post_password  = $this->get_tag( $post, 'wp:post_password' );
357         $guid           = $this->get_tag( $post, 'guid' );
358         $post_author    = $this->get_tag( $post, 'dc:creator' );
359
360         $post_excerpt = $this->get_tag( $post, 'excerpt:encoded' );
361         $post_excerpt = preg_replace('|<(/?[A-Z]+)|e', "'<' . strtolower('$1')", $post_excerpt);
362         $post_excerpt = str_replace('<br>', '<br />', $post_excerpt);
363         $post_excerpt = str_replace('<hr>', '<hr />', $post_excerpt);
364
365         $post_content = $this->get_tag( $post, 'content:encoded' );
366         $post_content = preg_replace('|<(/?[A-Z]+)|e', "'<' . strtolower('$1')", $post_content);
367         $post_content = str_replace('<br>', '<br />', $post_content);
368         $post_content = str_replace('<hr>', '<hr />', $post_content);
369
370         preg_match_all('|<category domain="tag">(.*?)</category>|is', $post, $tags);
371         $tags = $tags[1];
372
373         $tag_index = 0;
374         foreach ($tags as $tag) {
375             $tags[$tag_index] = $wpdb->escape($this->unhtmlentities(str_replace(array ('<![CDATA[', ']]>'), '', $tag)));
376             $tag_index++;
377         }
378
379         preg_match_all('|<category>(.*?)</category>|is', $post, $categories);
380         $categories = $categories[1];
381
382         $cat_index = 0;
383         foreach ($categories as $category) {
384             $categories[$cat_index] = $wpdb->escape($this->unhtmlentities(str_replace(array ('<![CDATA[', ']]>'), '', $category)));
385             $cat_index++;
386         }
387
388         $post_exists = post_exists($post_title, '', $post_date);
389
390         if ( $post_exists ) {
391             echo '<li>';
392             printf(__('Post <em>%s</em> already exists.'), stripslashes($post_title));
393         } else {
394
395             // If it has parent, process parent first.
396             $post_parent = (int) $post_parent;
397             if ($post_parent) {
398                 // if we already know the parent, map it to the local ID
399                 if ( $parent = $this->post_ids_processed[$post_parent] ) {
400                     $post_parent = $parent// new ID of the parent
401                 }
402                 else {
403                     // record the parent for later
404                     $this->orphans[intval($post_ID)] = $post_parent;
405                 }
406             }
407
408             echo '<li>';
409
410             $post_author = $this->checkauthor($post_author); //just so that if a post already exists, new users are not created by checkauthor
411
412             $postdata = compact('post_author', 'post_date', 'post_date_gmt', 'post_content', 'post_excerpt', 'post_title', 'post_status', 'post_name', 'comment_status', 'ping_status', 'guid', 'post_parent', 'menu_order', 'post_type', 'post_password');
413             if ($post_type == 'attachment') {
414                 $remote_url = $this->get_tag( $post, 'wp:attachment_url' );
415                 if ( !$remote_url )
416                     $remote_url = $guid;
417
418                 $comment_post_ID = $post_id = $this->process_attachment($postdata, $remote_url);
419                 if ( !$post_id or is_wp_error($post_id) )
420                     return $post_id;
421             }
422             else {
423                 printf(__('Importing post <em>%s</em>...'), stripslashes($post_title));
424                 $comment_post_ID = $post_id = wp_insert_post($postdata);
425             }
426
427             if ( is_wp_error( $post_id ) )
428                 return $post_id;
429
430             // Memorize old and new ID.
431             if ( $post_id && $post_ID ) {
432                 $this->post_ids_processed[intval($post_ID)] = intval($post_id);
433             }
434
435             // Add categories.
436             if (count($categories) > 0) {
437                 $post_cats = array();
438                 foreach ($categories as $category) {
439                     $slug = sanitize_term_field('slug', $category, 0, 'category', 'db');
440                     $cat = get_term_by('slug', $slug, 'category');
441                     $cat_ID = 0;
442                     if ( ! empty($cat) )
443                         $cat_ID = $cat->term_id;
444                     if ($cat_ID == 0) {
445                         $category = $wpdb->escape($category);
446                         $cat_ID = wp_insert_category(array('cat_name' => $category));
447                     }
448                     $post_cats[] = $cat_ID;
449                 }
450                 wp_set_post_categories($post_id, $post_cats);
451             }
452
453             // Add tags.
454             if (count($tags) > 0) {
455                 $post_tags = array();
456                 foreach ($tags as $tag) {
457                     $slug = sanitize_term_field('slug', $tag, 0, </