diff options
Diffstat (limited to 'dev-lang/perl/files/perl-5.8.8-CVE-2008-1927.patch')
-rw-r--r-- | dev-lang/perl/files/perl-5.8.8-CVE-2008-1927.patch | 256 |
1 files changed, 256 insertions, 0 deletions
diff --git a/dev-lang/perl/files/perl-5.8.8-CVE-2008-1927.patch b/dev-lang/perl/files/perl-5.8.8-CVE-2008-1927.patch new file mode 100644 index 000000000000..470005804f57 --- /dev/null +++ b/dev-lang/perl/files/perl-5.8.8-CVE-2008-1927.patch @@ -0,0 +1,256 @@ +Fix a double free / segfault with utf8 regexps +Debian #454792 +[rt.cpan.org #48156] +[rt.cpan.org #40641] +upstream change 29204 + +UTF8_ALLOW_DEFAULT definition in utf8.h picked from upstream change 27688 + +diff --git a/embed.fnc b/embed.fnc +index edfbc0e..26524c7 100644 +--- a/embed.fnc ++++ b/embed.fnc +@@ -1168,6 +1168,7 @@ Es |void |reguni |NN const struct RExC_state_t *state|UV uv|NN char *s|NN STRLE + Es |regnode*|regclass |NN struct RExC_state_t *state + ERs |I32 |regcurly |NN const char * + Es |regnode*|reg_node |NN struct RExC_state_t *state|U8 op ++Es |UV |reg_recode |const char value|NULLOK SV **encp + Es |regnode*|regpiece |NN struct RExC_state_t *state|NN I32 *flagp + Es |void |reginsert |NN struct RExC_state_t *state|U8 op|NN regnode *opnd + Es |void |regoptail |NN struct RExC_state_t *state|NN regnode *p|NN regnode *val +diff --git a/embed.h b/embed.h +index 2b38fd5..372b04f 100644 +--- a/embed.h ++++ b/embed.h +@@ -1234,6 +1234,7 @@ + #define regclass S_regclass + #define regcurly S_regcurly + #define reg_node S_reg_node ++#define reg_recode S_reg_recode + #define regpiece S_regpiece + #define reginsert S_reginsert + #define regoptail S_regoptail +@@ -3277,6 +3278,7 @@ + #define regclass(a) S_regclass(aTHX_ a) + #define regcurly(a) S_regcurly(aTHX_ a) + #define reg_node(a,b) S_reg_node(aTHX_ a,b) ++#define reg_recode(a,b) S_reg_recode(aTHX_ a,b) + #define regpiece(a,b) S_regpiece(aTHX_ a,b) + #define reginsert(a,b,c) S_reginsert(aTHX_ a,b,c) + #define regoptail(a,b,c) S_regoptail(aTHX_ a,b,c) +diff --git a/pod/perldiag.pod b/pod/perldiag.pod +index 9b3134c..7d95216 100644 +--- a/pod/perldiag.pod ++++ b/pod/perldiag.pod +@@ -1900,6 +1900,15 @@ recognized by Perl or by a user-supplied handler. See L<attributes>. + (W printf) Perl does not understand the given format conversion. See + L<perlfunc/sprintf>. + ++=item Invalid escape in the specified encoding in regex; marked by <-- HERE in m/%s/ ++ ++(W regexp) The numeric escape (for example C<\xHH>) of value < 256 ++didn't correspond to a single character through the conversion ++from the encoding specified by the encoding pragma. ++The escape was replaced with REPLACEMENT CHARACTER (U+FFFD) instead. ++The <-- HERE shows in the regular expression about where the ++escape was discovered. ++ + =item Invalid [] range "%s" in regex; marked by <-- HERE in m/%s/ + + (F) The range specified in a character class had a minimum character +diff --git a/proto.h b/proto.h +index 6d185dd..ef6c0cf 100644 +--- a/proto.h ++++ b/proto.h +@@ -1748,6 +1748,7 @@ STATIC I32 S_regcurly(pTHX_ const char *) + __attribute__warn_unused_result__; + + STATIC regnode* S_reg_node(pTHX_ struct RExC_state_t *state, U8 op); ++STATIC UV S_reg_recode(pTHX_ const char value, SV **encp); + STATIC regnode* S_regpiece(pTHX_ struct RExC_state_t *state, I32 *flagp); + STATIC void S_reginsert(pTHX_ struct RExC_state_t *state, U8 op, regnode *opnd); + STATIC void S_regoptail(pTHX_ struct RExC_state_t *state, regnode *p, regnode *val); +diff --git a/regcomp.c b/regcomp.c +index 928cf39..98d48dd 100644 +--- a/regcomp.c ++++ b/regcomp.c +@@ -2791,6 +2791,39 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp) + } + + /* ++ * reg_recode ++ * ++ * It returns the code point in utf8 for the value in *encp. ++ * value: a code value in the source encoding ++ * encp: a pointer to an Encode object ++ * ++ * If the result from Encode is not a single character, ++ * it returns U+FFFD (Replacement character) and sets *encp to NULL. ++ */ ++STATIC UV ++S_reg_recode(pTHX_ const char value, SV **encp) ++{ ++ STRLEN numlen = 1; ++ SV * const sv = sv_2mortal(newSVpvn(&value, numlen)); ++ const char * const s = encp && *encp ? sv_recode_to_utf8(sv, *encp) ++ : SvPVX(sv); ++ const STRLEN newlen = SvCUR(sv); ++ UV uv = UNICODE_REPLACEMENT; ++ ++ if (newlen) ++ uv = SvUTF8(sv) ++ ? utf8n_to_uvchr((U8*)s, newlen, &numlen, UTF8_ALLOW_DEFAULT) ++ : *(U8*)s; ++ ++ if (!newlen || numlen != newlen) { ++ uv = UNICODE_REPLACEMENT; ++ if (encp) ++ *encp = NULL; ++ } ++ return uv; ++} ++ ++/* + - regatom - the lowest level + * + * Optimization: gobbles an entire sequence of ordinary characters so that +@@ -3182,6 +3215,8 @@ tryagain: + ender = grok_hex(p, &numlen, &flags, NULL); + p += numlen; + } ++ if (PL_encoding && ender < 0x100) ++ goto recode_encoding; + break; + case 'c': + p++; +@@ -3201,6 +3236,17 @@ tryagain: + --p; + goto loopdone; + } ++ if (PL_encoding && ender < 0x100) ++ goto recode_encoding; ++ break; ++ recode_encoding: ++ { ++ SV* enc = PL_encoding; ++ ender = reg_recode((const char)(U8)ender, &enc); ++ if (!enc && SIZE_ONLY && ckWARN(WARN_REGEXP)) ++ vWARN(p, "Invalid escape in the specified encoding"); ++ RExC_utf8 = 1; ++ } + break; + case '\0': + if (p >= RExC_end) +@@ -3331,32 +3377,6 @@ tryagain: + break; + } + +- /* If the encoding pragma is in effect recode the text of +- * any EXACT-kind nodes. */ +- if (PL_encoding && PL_regkind[(U8)OP(ret)] == EXACT) { +- STRLEN oldlen = STR_LEN(ret); +- SV *sv = sv_2mortal(newSVpvn(STRING(ret), oldlen)); +- +- if (RExC_utf8) +- SvUTF8_on(sv); +- if (sv_utf8_downgrade(sv, TRUE)) { +- const char * const s = sv_recode_to_utf8(sv, PL_encoding); +- const STRLEN newlen = SvCUR(sv); +- +- if (SvUTF8(sv)) +- RExC_utf8 = 1; +- if (!SIZE_ONLY) { +- DEBUG_r(PerlIO_printf(Perl_debug_log, "recode %*s to %*s\n", +- (int)oldlen, STRING(ret), +- (int)newlen, s)); +- Copy(s, STRING(ret), newlen, char); +- STR_LEN(ret) += newlen - oldlen; +- RExC_emit += STR_SZ(newlen) - STR_SZ(oldlen); +- } else +- RExC_size += STR_SZ(newlen) - STR_SZ(oldlen); +- } +- } +- + return(ret); + } + +@@ -3734,6 +3754,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) + value = grok_hex(RExC_parse, &numlen, &flags, NULL); + RExC_parse += numlen; + } ++ if (PL_encoding && value < 0x100) ++ goto recode_encoding; + break; + case 'c': + value = UCHARAT(RExC_parse++); +@@ -3741,13 +3763,24 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) + break; + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': +- { +- I32 flags = 0; +- numlen = 3; +- value = grok_oct(--RExC_parse, &numlen, &flags, NULL); +- RExC_parse += numlen; +- break; +- } ++ { ++ I32 flags = 0; ++ numlen = 3; ++ value = grok_oct(--RExC_parse, &numlen, &flags, NULL); ++ RExC_parse += numlen; ++ if (PL_encoding && value < 0x100) ++ goto recode_encoding; ++ break; ++ } ++ recode_encoding: ++ { ++ SV* enc = PL_encoding; ++ value = reg_recode((const char)(U8)value, &enc); ++ if (!enc && SIZE_ONLY && ckWARN(WARN_REGEXP)) ++ vWARN(RExC_parse, ++ "Invalid escape in the specified encoding"); ++ break; ++ } + default: + if (!SIZE_ONLY && isALPHA(value) && ckWARN(WARN_REGEXP)) + vWARN2(RExC_parse, +diff --git a/t/uni/tr_utf8.t b/t/uni/tr_utf8.t +index 606a84a..354156a 100755 +--- a/t/uni/tr_utf8.t ++++ b/t/uni/tr_utf8.t +@@ -31,7 +31,7 @@ BEGIN { + } + + use strict; +-use Test::More tests => 7; ++use Test::More tests => 8; + + use encoding 'utf8'; + +@@ -67,4 +67,12 @@ is($str, $hiragana, "s/// # hiragana -> katakana"); + $line =~ tr/bcdeghijklmnprstvwxyz$02578/בצדעגהיײקלמנפּרסטװשכיזשױתײחא/; + is($line, "aבצדעfגהיײקלמנoפqּרסuטװשכיזש1ױ34ת6ײח9", "[perl #16843]"); + } ++ ++{ ++ # [perl #40641] ++ my $str = qq/Gebääääääääääääääääääääude/; ++ my $reg = qr/Gebääääääääääääääääääääude/; ++ ok($str =~ /$reg/, "[perl #40641]"); ++} ++ + __END__ +diff --git a/utf8.h b/utf8.h +index 6d63897..3800866 100644 +--- a/utf8.h ++++ b/utf8.h +@@ -198,6 +198,8 @@ encoded character. + UTF8_ALLOW_SURROGATE|UTF8_ALLOW_FFFF) + #define UTF8_ALLOW_ANY 0x00FF + #define UTF8_CHECK_ONLY 0x0200 ++#define UTF8_ALLOW_DEFAULT (ckWARN(WARN_UTF8) ? 0 : \ ++ UTF8_ALLOW_ANYUV) + + #define UNICODE_SURROGATE_FIRST 0xD800 + #define UNICODE_SURROGATE_LAST 0xDFFF |