root/trunk/wp-includes/formatting.php

Revision 8298, 45.3 kB (checked in by ryan, 2 weeks ago)

Hands off pre tags. Props nbachiyski. fixes #7056

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
Line 
1 <?php
2
3 function wptexturize($text) {
4     global $wp_cockneyreplace;
5     $next = true;
6     $has_pre_parent = false;
7     $output = '';
8     $curl = '';
9     $textarr = preg_split('/(<.*>|\[.*\])/Us', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
10     $stop = count($textarr);
11
12     // if a plugin has provided an autocorrect array, use it
13     if ( isset($wp_cockneyreplace) ) {
14         $cockney = array_keys($wp_cockneyreplace);
15         $cockneyreplace = array_values($wp_cockneyreplace);
16     } else {
17         $cockney = array("'tain't","'twere","'twas","'tis","'twill","'til","'bout","'nuff","'round","'cause");
18         $cockneyreplace = array("&#8217;tain&#8217;t","&#8217;twere","&#8217;twas","&#8217;tis","&#8217;twill","&#8217;til","&#8217;bout","&#8217;nuff","&#8217;round","&#8217;cause");
19     }
20
21     $static_characters = array_merge(array('---', ' -- ', '--', 'xn&#8211;', '...', '``', '\'s', '\'\'', ' (tm)'), $cockney);
22     $static_replacements = array_merge(array('&#8212;', ' &#8212; ', '&#8211;', 'xn--', '&#8230;', '&#8220;', '&#8217;s', '&#8221;', ' &#8482;'), $cockneyreplace);
23
24     $dynamic_characters = array('/\'(\d\d(?:&#8217;|\')?s)/', '/(\s|\A|")\'/', '/(\d+)"/', '/(\d+)\'/', '/(\S)\'([^\'\s])/', '/(\s|\A)"(?!\s)/', '/"(\s|\S|\Z)/', '/\'([\s.]|\Z)/', '/(\d+)x(\d+)/');
25     $dynamic_replacements = array('&#8217;$1','$1&#8216;', '$1&#8243;', '$1&#8242;', '$1&#8217;$2', '$1&#8220;$2', '&#8221;$1', '&#8217;$1', '$1&#215;$2');
26
27     for ( $i = 0; $i < $stop; $i++ ) {
28          $curl = $textarr[$i];
29
30         if (isset($curl{0}) && '<' != $curl{0} && '[' != $curl{0} && $next && !$has_pre_parent) { // If it's not a tag
31             // static strings
32             $curl = str_replace($static_characters, $static_replacements, $curl);
33             // regular expressions
34             $curl = preg_replace($dynamic_characters, $dynamic_replacements, $curl);
35         } elseif (strpos($curl, '<code') !== false || strpos($curl, '<kbd') !== false || strpos($curl, '<style') !== false || strpos($curl, '<script') !== false) {
36             $next = false;
37         } elseif (strpos($curl, '<pre') !== false) {
38             $has_pre_parent = true;
39         } elseif (strpos($curl, '</pre>') !== false) {
40             $has_pre_parent = false;
41         } else {
42             $next = true;
43         }
44
45         $curl = preg_replace('/&([^#])(?![a-zA-Z1-4]{1,8};)/', '&#038;$1', $curl);
46         $output .= $curl;
47     }
48
49       return $output;
50 }
51
52 // Accepts matches array from preg_replace_callback in wpautop()
53 // or a string
54 function clean_pre($matches) {
55     if ( is_array($matches) )
56         $text = $matches[1] . $matches[2] . "</pre>";
57     else
58         $text = $matches;
59
60     $text = str_replace('<br />', '', $text);
61     $text = str_replace('<p>', "\n", $text);
62     $text = str_replace('</p>', '', $text);
63
64     return $text;
65 }
66
67 function wpautop($pee, $br = 1) {
68     $pee = $pee . "\n"; // just to make things a little easier, pad the end
69     $pee = preg_replace('|<br />\s*<br />|', "\n\n", $pee);
70     // Space things out a little
71     $allblocks = '(?:table|thead|tfoot|caption|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|select|form|map|area|blockquote|address|math|style|input|p|h[1-6]|hr)';
72     $pee = preg_replace('!(<' . $allblocks . '[^>]*>)!', "\n$1", $pee);
73     $pee = preg_replace('!(</' . $allblocks . '>)!', "$1\n\n", $pee);
74     $pee = str_replace(array("\r\n", "\r"), "\n", $pee); // cross-platform newlines
75     if ( strpos($pee, '<object') !== false ) {
76         $pee = preg_replace('|\s*<param([^>]*)>\s*|', "<param$1>", $pee); // no pee inside object/embed
77         $pee = preg_replace('|\s*</embed>\s*|', '</embed>', $pee);
78     }
79     $pee = preg_replace("/\n\n+/", "\n\n", $pee); // take care of duplicates
80     $pee = preg_replace('/\n?(.+?)(?:\n\s*\n|\z)/s', "<p>$1</p>\n", $pee); // make paragraphs, including one at the end
81     $pee = preg_replace('|<p>\s*?</p>|', '', $pee); // under certain strange conditions it could create a P of entirely whitespace
82     $pee = preg_replace('!<p>([^<]+)\s*?(</(?:div|address|form)[^>]*>)!', "<p>$1</p>$2", $pee);
83     $pee = preg_replace( '|<p>|', "$1<p>", $pee );
84     $pee = preg_replace('!<p>\s*(</?' . $allblocks . '[^>]*>)\s*</p>!', "$1", $pee); // don't pee all over a tag
85     $pee = preg_replace("|<p>(<li.+?)</p>|", "$1", $pee); // problem with nested lists
86     $pee = preg_replace('|<p><blockquote([^>]*)>|i', "<blockquote$1><p>", $pee);
87     $pee = str_replace('</blockquote></p>', '</p></blockquote>', $pee);
88     $pee = preg_replace('!<p>\s*(</?' . $allblocks . '[^>]*>)!', "$1", $pee);
89     $pee = preg_replace('!(</?' . $allblocks . '[^>]*>)\s*</p>!', "$1", $pee);
90     if ($br) {
91         $pee = preg_replace_callback('/<(script|style).*?<\/\\1>/s', create_function('$matches', 'return str_replace("\n", "<WPPreserveNewline />", $matches[0]);'), $pee);
92         $pee = preg_replace('|(?<!<br />)\s*\n|', "<br />\n", $pee); // optionally make line breaks
93         $pee = str_replace('<WPPreserveNewline />', "\n", $pee);
94     }
95     $pee = preg_replace('!(</?' . $allblocks . '[^>]*>)\s*<br />!', "$1", $pee);
96     $pee = preg_replace('!<br />(\s*</?(?:p|li|div|dl|dd|dt|th|pre|td|ul|ol)[^>]*>)!', '$1', $pee);
97     if (strpos($pee, '<pre') !== false)
98         $pee = preg_replace_callback('!(<pre.*?>)(.*?)</pre>!is', 'clean_pre', $pee );
99     $pee = preg_replace( "|\n</p>$|", '</p>', $pee );
100     $pee = preg_replace('/<p>\s*?(' . get_shortcode_regex() . ')\s*<\/p>/s', '$1', $pee); // don't auto-p wrap shortcodes that stand alone
101
102     return $pee;
103 }
104
105
106 function seems_utf8($Str) { # by bmorel at ssi dot fr
107     $length = strlen($Str);
108     for ($i=0; $i < $length; $i++) {
109         if (ord($Str[$i]) < 0x80) continue; # 0bbbbbbb
110         elseif ((ord($Str[$i]) & 0xE0) == 0xC0) $n=1; # 110bbbbb
111         elseif ((ord($Str[$i]) & 0xF0) == 0xE0) $n=2; # 1110bbbb
112         elseif ((ord($Str[$i]) & 0xF8) == 0xF0) $n=3; # 11110bbb
113         elseif ((ord($Str[$i]) & 0xFC) == 0xF8) $n=4; # 111110bb
114         elseif ((ord($Str[$i]) & 0xFE) == 0xFC) $n=5; # 1111110b
115         else return false; # Does not match any model
116         for ($j=0; $j<$n; $j++) { # n bytes matching 10bbbbbb follow ?
117             if ((++$i == $length) || ((ord($Str[$i]) & 0xC0) != 0x80))
118             return false;
119         }
120     }
121     return true;
122 }
123
124 function wp_specialchars( $text, $quotes = 0 ) {
125     // Like htmlspecialchars except don't double-encode HTML entities
126     $text = str_replace('&&', '&#038;&', $text);
127     $text = str_replace('&&', '&#038;&', $text);
128     $text = preg_replace('/&(?:$|([^#])(?![a-z1-4]{1,8};))/', '&#038;$1', $text);
129     $text = str_replace('<', '&lt;', $text);
130     $text = str_replace('>', '&gt;', $text);
131     if ( 'double' === $quotes ) {
132         $text = str_replace('"', '&quot;', $text);
133     } elseif ( 'single' === $quotes ) {
134         $text = str_replace("'", '&#039;', $text);
135     } elseif ( $quotes ) {
136         $text = str_replace('"', '&quot;', $text);
137         $text = str_replace("'", '&#039;', $text);
138     }
139     return $text;
140 }
141
142 function utf8_uri_encode( $utf8_string, $length = 0 ) {
143     $unicode = '';
144     $values = array();
145     $num_octets = 1;
146     $unicode_length = 0;
147
148     $string_length = strlen( $utf8_string );
149     for ($i = 0; $i < $string_length; $i++ ) {
150
151         $value = ord( $utf8_string[ $i ] );
152
153         if ( $value < 128 ) {
154             if ( $length && ( $unicode_length >= $length ) )
155                 break;
156             $unicode .= chr($value);
157             $unicode_length++;
158         } else {
159             if ( count( $values ) == 0 ) $num_octets = ( $value < 224 ) ? 2 : 3;
160
161             $values[] = $value;
162
163             if ( $length && ( $unicode_length + ($num_octets * 3) ) > $length )
164                 break;
165             if ( count( $values ) == $num_octets ) {
166                 if ($num_octets == 3) {
167                     $unicode .= '%' . dechex($values[0]) . '%' . dechex($values[1]) . '%' . dechex($values[2]);
168                     $unicode_length += 9;
169                 } else {
170                     $unicode .= '%' . dechex($values[0]) . '%' . dechex($values[1]);
171                     $unicode_length += 6;
172                 }
173
174                 $values = array();
175                 $num_octets = 1;
176             }
177         }
178     }
179
180     return $unicode;
181 }
182
183 function remove_accents($string) {
184     if ( !preg_match('/[\x80-\xff]/', $string) )
185         return $string;
186
187     if (seems_utf8($string)) {
188         $chars = array(
189         // Decompositions for Latin-1 Supplement
190         chr(195).chr(128) => 'A', chr(195).chr(129) => 'A',
191         chr(195).chr(130) => 'A', chr(195).chr(131) => 'A',
192         chr(195).chr(132) => 'A', chr(195).chr(133) => 'A',
193         chr(195).chr(135) => 'C', chr(195).chr(136) => 'E',
194         chr(195).chr(137) => 'E', chr(195).chr(138) => 'E',
195         chr(195).chr(139) => 'E', chr(195).chr(140) => 'I',
196         chr(195).chr(141) => 'I', chr(195).chr(142) => 'I',
197         chr(195).chr(143) => 'I', chr(195).chr(145) => 'N',
198         chr(195).chr(146) => 'O', chr(195).chr(147) => 'O',
199         chr(195).chr(148) => 'O', chr(195).chr(149) => 'O',
200         chr(195).chr(150) => 'O', chr(195).chr(153) => 'U',
201         chr(195).chr(154) => 'U', chr(195).chr(155) => 'U',
202         chr(195).chr(156) => 'U', chr(195).chr(157) => 'Y',
203         chr(195).chr(159) => 's', chr(195).chr(160) => 'a',
204         chr(195).chr(161) => 'a', chr(195).chr(162) => 'a',
205         chr(195).chr(163) => 'a', chr(195).chr(164) => 'a',
206         chr(195).chr(165) => 'a', chr(195).chr(167) => 'c',
207         chr(195).chr(168) => 'e', chr(195).chr(169) => 'e',
208         chr(195).chr(170) => 'e', chr(195).chr(171) => 'e',
209         chr(195).chr(172) => 'i', chr(195).chr(173) => 'i',
210         chr(195).chr(174) => 'i', chr(195).chr(175) => 'i',
211         chr(195).chr(177) => 'n', chr(195).chr(178) => 'o',
212         chr(195).chr(179) => 'o', chr(195).chr(180) => 'o',
213         chr(195).chr(181) => 'o', chr(195).chr(182) => 'o',
214         chr(195).chr(182) => 'o', chr(195).chr(185) => 'u',
215         chr(195).chr(186) => 'u', chr(195).chr(187) => 'u',
216         chr(195).chr(188) => 'u', chr(195).chr(189) => 'y',
217         chr(195).chr(191) => 'y',
218         // Decompositions for Latin Extended-A
219         chr(196).chr(128) => 'A', chr(196).chr(129) => 'a',
220         chr(196).chr(130) => 'A', chr(196).chr(131) => 'a',
221         chr(196).chr(132) => 'A', chr(196).chr(133) => 'a',
222         chr(196).chr(134) => 'C', chr(196).chr(135) => 'c',
223         chr(196).chr(136) => 'C', chr(196).chr(137) => 'c',
224         chr(196).chr(138) => 'C', chr(196).chr(139) => 'c',
225         chr(196).chr(140) => 'C', chr(196).chr(141) => 'c',
226         chr(196).chr(142) => 'D', chr(196).chr(143) => 'd',
227         chr(196).chr(144) => 'D', chr(196).chr(145) => 'd',
228         chr(196).chr(146) => 'E', chr(196).chr(147) => 'e',
229         chr(196).chr(148) => 'E', chr(196).chr(149) => 'e',
230         chr(196).chr(150) => 'E', chr(196).chr(151) => 'e',
231         chr(196).chr(152) => 'E', chr(196).chr(153) => 'e',
232         chr(196).chr(154) => 'E', chr(196).chr(155) => 'e',
233         chr(196).chr(156) => 'G', chr(196).chr(157) => 'g',
234         chr(196).chr(158) => 'G', chr(196).chr(159) => 'g',
235         chr(196).chr(160) => 'G', chr(196).chr(161) => 'g',
236         chr(196).chr(162) => 'G', chr(196).chr(163) => 'g',
237         chr(196).chr(164) => 'H', chr(196).chr(165) => 'h',
238         chr(196).chr(166) => 'H', chr(196).chr(167) => 'h',
239         chr(196).chr(168) => 'I', chr(196).chr(169) => 'i',
240         chr(196).chr(170) => 'I', chr(196).chr(171) => 'i',
241         chr(196).chr(172) => 'I', chr(196).chr(173) => 'i',
242         chr(196).chr(174) => 'I', chr(196).chr(175) => 'i',
243         chr(196).chr(176) => 'I', chr(196).chr(177) => 'i',
244         chr(196).chr(178) => 'IJ',chr(196).chr(179) => 'ij',
245         chr(196).chr(180) => 'J', chr(196).chr(181) => 'j',
246         chr(196).chr(182) => 'K', chr(196).chr(183) => 'k',
247         chr(196).chr(184) => 'k', chr(196).chr(185) => 'L',
248         chr(196).chr(186) => 'l', chr(196).chr(187) => 'L',
249         chr(196).chr(188) => 'l', chr(196).chr(189) => 'L',
250         chr(196).chr(190) => 'l', chr(196).chr(191) => 'L',
251         chr(197).chr(128) => 'l', chr(197).chr(129) => 'L',
252         chr(197).chr(130) => 'l', chr(197).chr(131) => 'N',
253         chr(197).chr(132) => 'n', chr(197).chr(133) => 'N',
254         chr(197).chr(134) => 'n', chr(197).chr(135) => 'N',
255         chr(197).chr(136) => 'n', chr(197).chr(137) => 'N',
256         chr(197).chr(138) => 'n', chr(197).chr(139) => 'N',
257         chr(197).chr(140) => 'O', chr(197).chr(141) => 'o',
258         chr(197).chr(142) => 'O', chr(197).chr(143) => 'o',
259         chr(197).chr(144) => 'O', chr(197).chr(145) => 'o',
260         chr(197).chr(146) => 'OE',chr(197).chr(