aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLudovic Arnaud <ludovic_arnaud@users.sourceforge.net>2006-07-10 03:05:27 +0000
committerLudovic Arnaud <ludovic_arnaud@users.sourceforge.net>2006-07-10 03:05:27 +0000
commit0fa9103a0cacbe91404c32cddf0bcfe029423d04 (patch)
tree11208e46f9a4c26446960f24471d09ad1d6b3d09 /phpBB/includes
parentAdded: UTF-8 normalizer along with all the data files required (diff)
downloadphpbb-0fa9103a0cacbe91404c32cddf0bcfe029423d04.tar.gz
phpbb-0fa9103a0cacbe91404c32cddf0bcfe029423d04.tar.bz2
phpbb-0fa9103a0cacbe91404c32cddf0bcfe029423d04.zip
Changed: being anal with the notation of Unicode codepoints
Fixed: moved some constants out of the if construct because they're needed by other classes, even if the utfnormal extension exists git-svn-id: file:///svn/phpbb/trunk@6164 89ea8834-ac86-4346-8a33-228a782c2dd0
Diffstat (limited to 'phpBB/includes')
-rw-r--r--phpBB/includes/utf/utf_normalizer.php26
1 files changed, 13 insertions, 13 deletions
diff --git a/phpBB/includes/utf/utf_normalizer.php b/phpBB/includes/utf/utf_normalizer.php
index 45f25e534f..c985337c64 100644
--- a/phpBB/includes/utf/utf_normalizer.php
+++ b/phpBB/includes/utf/utf_normalizer.php
@@ -19,6 +19,8 @@ define('UTF8_FFFE', "\xEF\xBF\xBE");
define('UTF8_FFFF', "\xEF\xBF\xBF");
define('UTF8_SURROGATE_FIRST', "\xED\xA0\x80");
define('UTF8_SURROGATE_LAST', "\xED\xBF\xBF");
+define('UTF8_HANGUL_FIRST', "\xEA\xB0\x80");
+define('UTF8_HANGUL_LAST', "\xED\x9E\xA3");
if (function_exists('utf8_normalize'))
{
@@ -62,7 +64,7 @@ class utf_normalizer
}
/**
- * Check if there is potentially a 0xFFFE or 0xFFFF char (UTF sequence
+ * Check if there is potentially a U+FFFE or U+FFFF char (UTF sequence
* 0xEFBFBE or 0xEFBFBF) and replace them
*
* Note: we start searching at position $pos
@@ -173,8 +175,6 @@ define('UNICODE_HANGUL_NCOUNT', 588);
define('UNICODE_JAMO_L', 0);
define('UNICODE_JAMO_V', 1);
define('UNICODE_JAMO_T', 2);
-define('UTF8_HANGUL_FIRST', "\xEA\xB0\x80");
-define('UTF8_HANGUL_LAST', "\xED\x9E\xA3");
/**
* Unicode normalization routines
@@ -555,7 +555,7 @@ class utf_normalizer
if ($utf_char >= "\xED\xA0\x80")
{
/**
- * Surrogates (0xD800..0xDFFF) are not allowed in UTF-8
+ * Surrogates (U+D800..U+DFFF) are not allowed in UTF-8
* (UTF sequence 0xEDA080..0xEDBFBF)
*/
$tmp .= substr($str, $tmp_pos, $pos - $tmp_pos) . UTF8_REPLACEMENT;
@@ -572,7 +572,7 @@ class utf_normalizer
if ($utf_char == "\xEF\xBF\xBE" || $utf_char == "\xEF\xBF\xBF")
{
/**
- * 0xFFFE and 0xFFFF are explicitly disallowed
+ * U+FFFE and U+FFFF are explicitly disallowed
* (UTF sequence 0xEFBFBE..0xEFBFBF)
*/
$tmp .= substr($str, $tmp_pos, $pos - $tmp_pos) . UTF8_REPLACEMENT;
@@ -587,7 +587,7 @@ class utf_normalizer
if ($utf_char <= "\xC1\xBF")
{
/**
- * Overlong sequence: Unicode char 0x00..0x7F encoded as a
+ * Overlong sequence: Unicode char U+0000..U+007F encoded as a
* double-byte UTF char
*/
$tmp .= substr($str, $tmp_pos, $pos - $tmp_pos) . UTF8_REPLACEMENT;
@@ -601,7 +601,7 @@ class utf_normalizer
if ($utf_char <= "\xE0\x9F\xBF")
{
/**
- * Unicode char 0x0000..0x07FF encoded in 3 bytes
+ * Unicode char U+0000..U+07FF encoded in 3 bytes
*/
$tmp .= substr($str, $tmp_pos, $pos - $tmp_pos) . UTF8_REPLACEMENT;
$pos += $utf_len;
@@ -614,7 +614,7 @@ class utf_normalizer
if ($utf_char <= "\xF0\x8F\xBF\xBF")
{
/**
- * Unicode char 0x0000..0xFFFF encoded in 4 bytes
+ * Unicode char U+0000..U+FFFF encoded in 4 bytes
*/
$tmp .= substr($str, $tmp_pos, $pos - $tmp_pos) . UTF8_REPLACEMENT;
$pos += $utf_len;
@@ -1608,7 +1608,7 @@ class utf_normalizer
if ($utf_char >= "\xED\xA0\x80")
{
/**
- * Surrogates (0xD800..0xDFFF) are not allowed in UTF-8
+ * Surrogates (U+D800..U+DFFF) are not allowed in UTF-8
* (UTF sequence 0xEDA080..0xEDBFBF)
*/
$tmp .= substr($str, $tmp_pos, $starter_pos - $tmp_pos);
@@ -1639,7 +1639,7 @@ class utf_normalizer
if ($utf_char == "\xEF\xBF\xBE" || $utf_char == "\xEF\xBF\xBF")
{
/**
- * 0xFFFE and 0xFFFF are explicitly disallowed
+ * U+FFFE and U+FFFF are explicitly disallowed
* (UTF sequence 0xEFBFBE..0xEFBFBF)
*/
$tmp .= substr($str, $tmp_pos, $starter_pos - $tmp_pos);
@@ -1668,7 +1668,7 @@ class utf_normalizer
if ($utf_char <= "\xC1\xBF")
{
/**
- * Overlong sequence: Unicode char 0x00..0x7F encoded as a
+ * Overlong sequence: Unicode char U+0000..U+007F encoded as a
* double-byte UTF char
*/
$tmp .= substr($str, $tmp_pos, $starter_pos - $tmp_pos);
@@ -1696,7 +1696,7 @@ class utf_normalizer
if ($utf_char <= "\xE0\x9F\xBF")
{
/**
- * Unicode char 0x0000..0x07FF encoded in 3 bytes
+ * Unicode char U+0000..U+07FF encoded in 3 bytes
*/
$tmp .= substr($str, $tmp_pos, $starter_pos - $tmp_pos);
@@ -1723,7 +1723,7 @@ class utf_normalizer
if ($utf_char <= "\xF0\x8F\xBF\xBF")
{
/**
- * Unicode char 0x0000..0xFFFF encoded in 4 bytes
+ * Unicode char U+0000..U+FFFF encoded in 4 bytes
*/
$tmp .= substr($str, $tmp_pos, $starter_pos - $tmp_pos);