#7475: add (un)transform method to bytes/bytearray and str, add back codecs that can be used with them from Python 2.

author: Georg Brandl <georg@python.org> 2010-12-02 18:06:51 +0000
committer: Georg Brandl <georg@python.org> 2010-12-02 18:06:51 +0000
commit: 02524629f39bb70f4ea00ab8e64d694e08719227 (patch)
tree: de93598f38e1b4d84eaa743b31df1a79a21c957c /Lib/encodings
parent: #10597: fix Py_SetPythonHome docs by pointing to where the meaning of PYTHONH... (diff)
download: cpython-02524629f39bb70f4ea00ab8e64d694e08719227.tar.gz
cpython-02524629f39bb70f4ea00ab8e64d694e08719227.tar.bz2
cpython-02524629f39bb70f4ea00ab8e64d694e08719227.zip
8 files changed, 550 insertions, 18 deletions
diff --git a/Lib/encodings/aliases.py b/Lib/encodings/aliases.py
index 235deb5c309..331095b1f16 100644
--- a/Lib/encodings/aliases.py
+++ b/Lib/encodings/aliases.py
@@ -33,9 +33,9 @@ aliases = {
     'us'                 : 'ascii',
     'us_ascii'           : 'ascii',
 
-    ## base64_codec codec
-    #'base64'             : 'base64_codec',
-    #'base_64'            : 'base64_codec',
+    # base64_codec codec
+    'base64'             : 'base64_codec',
+    'base_64'            : 'base64_codec',
 
     # big5 codec
     'big5_tw'            : 'big5',
@@ -45,8 +45,8 @@ aliases = {
     'big5_hkscs'         : 'big5hkscs',
     'hkscs'              : 'big5hkscs',
 
-    ## bz2_codec codec
-    #'bz2'                : 'bz2_codec',
+    # bz2_codec codec
+    'bz2'                : 'bz2_codec',
 
     # cp037 codec
     '037'                : 'cp037',
@@ -248,8 +248,8 @@ aliases = {
     'cp936'              : 'gbk',
     'ms936'              : 'gbk',
 
-    ## hex_codec codec
-    #'hex'                : 'hex_codec',
+    # hex_codec codec
+    'hex'                : 'hex_codec',
 
     # hp_roman8 codec
     'roman8'             : 'hp_roman8',
@@ -450,13 +450,13 @@ aliases = {
     'cp154'              : 'ptcp154',
     'cyrillic_asian'     : 'ptcp154',
 
-    ## quopri_codec codec
-    #'quopri'             : 'quopri_codec',
-    #'quoted_printable'   : 'quopri_codec',
-    #'quotedprintable'    : 'quopri_codec',
+    # quopri_codec codec
+    'quopri'             : 'quopri_codec',
+    'quoted_printable'   : 'quopri_codec',
+    'quotedprintable'    : 'quopri_codec',
 
-    ## rot_13 codec
-    #'rot13'              : 'rot_13',
+    # rot_13 codec
+    'rot13'              : 'rot_13',
 
     # shift_jis codec
     'csshiftjis'         : 'shift_jis',
@@ -518,12 +518,12 @@ aliases = {
     'utf8_ucs2'          : 'utf_8',
     'utf8_ucs4'          : 'utf_8',
 
-    ## uu_codec codec
-    #'uu'                 : 'uu_codec',
+    # uu_codec codec
+    'uu'                 : 'uu_codec',
 
-    ## zlib_codec codec
-    #'zip'                : 'zlib_codec',
-    #'zlib'               : 'zlib_codec',
+    # zlib_codec codec
+    'zip'                : 'zlib_codec',
+    'zlib'               : 'zlib_codec',
 
     # temporary mac CJK aliases, will be replaced by proper codecs in 3.1
     'x_mac_japanese'      : 'shift_jis',
diff --git a/Lib/encodings/base64_codec.py b/Lib/encodings/base64_codec.py
new file mode 100644
index 00000000000..e8b19ee237d
--- /dev/null
+++ b/Lib/encodings/base64_codec.py
@@ -0,0 +1,55 @@
+"""Python 'base64_codec' Codec - base64 content transfer encoding.
+
+This codec de/encodes from bytes to bytes and is therefore usable with
+bytes.transform() and bytes.untransform().
+
+Written by Marc-Andre Lemburg (mal@lemburg.com).
+"""
+
+import codecs
+import base64
+
+### Codec APIs
+
+def base64_encode(input, errors='strict'):
+    assert errors == 'strict'
+    return (base64.encodestring(input), len(input))
+
+def base64_decode(input, errors='strict'):
+    assert errors == 'strict'
+    return (base64.decodestring(input), len(input))
+
+class Codec(codecs.Codec):
+    def encode(self, input, errors='strict'):
+        return base64_encode(input, errors)
+    def decode(self, input, errors='strict'):
+        return base64_decode(input, errors)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+    def encode(self, input, final=False):
+        assert self.errors == 'strict'
+        return base64.encodestring(input)
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+    def decode(self, input, final=False):
+        assert self.errors == 'strict'
+        return base64.decodestring(input)
+
+class StreamWriter(Codec, codecs.StreamWriter):
+    charbuffertype = bytes
+
+class StreamReader(Codec, codecs.StreamReader):
+    charbuffertype = bytes
+
+### encodings module API
+
+def getregentry():
+    return codecs.CodecInfo(
+        name='base64',
+        encode=base64_encode,
+        decode=base64_decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamwriter=StreamWriter,
+        streamreader=StreamReader,
+    )
diff --git a/Lib/encodings/bz2_codec.py b/Lib/encodings/bz2_codec.py
new file mode 100644
index 00000000000..e65d226bfdf
--- /dev/null
+++ b/Lib/encodings/bz2_codec.py
@@ -0,0 +1,77 @@
+"""Python 'bz2_codec' Codec - bz2 compression encoding.
+
+This codec de/encodes from bytes to bytes and is therefore usable with
+bytes.transform() and bytes.untransform().
+
+Adapted by Raymond Hettinger from zlib_codec.py which was written
+by Marc-Andre Lemburg (mal@lemburg.com).
+"""
+
+import codecs
+import bz2 # this codec needs the optional bz2 module !
+
+### Codec APIs
+
+def bz2_encode(input, errors='strict'):
+    assert errors == 'strict'
+    return (bz2.compress(input), len(input))
+
+def bz2_decode(input, errors='strict'):
+    assert errors == 'strict'
+    return (bz2.decompress(input), len(input))
+
+class Codec(codecs.Codec):
+    def encode(self, input, errors='strict'):
+        return bz2_encode(input, errors)
+    def decode(self, input, errors='strict'):
+        return bz2_decode(input, errors)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+    def __init__(self, errors='strict'):
+        assert errors == 'strict'
+        self.errors = errors
+        self.compressobj = bz2.BZ2Compressor()
+
+    def encode(self, input, final=False):
+        if final:
+            c = self.compressobj.compress(input)
+            return c + self.compressobj.flush()
+        else:
+            return self.compressobj.compress(input)
+
+    def reset(self):
+        self.compressobj = bz2.BZ2Compressor()
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+    def __init__(self, errors='strict'):
+        assert errors == 'strict'
+        self.errors = errors
+        self.decompressobj = bz2.BZ2Decompressor()
+
+    def decode(self, input, final=False):
+        try:
+            return self.decompressobj.decompress(input)
+        except EOFError:
+            return ''
+
+    def reset(self):
+        self.decompressobj = bz2.BZ2Decompressor()
+
+class StreamWriter(Codec, codecs.StreamWriter):
+    charbuffertype = bytes
+
+class StreamReader(Codec, codecs.StreamReader):
+    charbuffertype = bytes
+
+### encodings module API
+
+def getregentry():
+    return codecs.CodecInfo(
+        name="bz2",
+        encode=bz2_encode,
+        decode=bz2_decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamwriter=StreamWriter,
+        streamreader=StreamReader,
+    )
diff --git a/Lib/encodings/hex_codec.py b/Lib/encodings/hex_codec.py
new file mode 100644
index 00000000000..e003fc3ff97
--- /dev/null
+++ b/Lib/encodings/hex_codec.py
@@ -0,0 +1,55 @@
+"""Python 'hex_codec' Codec - 2-digit hex content transfer encoding.
+
+This codec de/encodes from bytes to bytes and is therefore usable with
+bytes.transform() and bytes.untransform().
+
+Written by Marc-Andre Lemburg (mal@lemburg.com).
+"""
+
+import codecs
+import binascii
+
+### Codec APIs
+
+def hex_encode(input, errors='strict'):
+    assert errors == 'strict'
+    return (binascii.b2a_hex(input), len(input))
+
+def hex_decode(input, errors='strict'):
+    assert errors == 'strict'
+    return (binascii.a2b_hex(input), len(input))
+
+class Codec(codecs.Codec):
+    def encode(self, input, errors='strict'):
+        return hex_encode(input, errors)
+    def decode(self, input, errors='strict'):
+        return hex_decode(input, errors)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+    def encode(self, input, final=False):
+        assert self.errors == 'strict'
+        return binascii.b2a_hex(input)
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+    def decode(self, input, final=False):
+        assert self.errors == 'strict'
+        return binascii.a2b_hex(input)
+
+class StreamWriter(Codec, codecs.StreamWriter):
+    charbuffertype = bytes
+
+class StreamReader(Codec, codecs.StreamReader):
+    charbuffertype = bytes
+
+### encodings module API
+
+def getregentry():
+    return codecs.CodecInfo(
+        name='hex',
+        encode=hex_encode,
+        decode=hex_decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamwriter=StreamWriter,
+        streamreader=StreamReader,
+    )
diff --git a/Lib/encodings/quopri_codec.py b/Lib/encodings/quopri_codec.py
new file mode 100644
index 00000000000..9243fc443b0
--- /dev/null
+++ b/Lib/encodings/quopri_codec.py
@@ -0,0 +1,56 @@
+"""Codec for quoted-printable encoding.
+
+This codec de/encodes from bytes to bytes and is therefore usable with
+bytes.transform() and bytes.untransform().
+"""
+
+import codecs
+import quopri
+from io import BytesIO
+
+def quopri_encode(input, errors='strict'):
+    assert errors == 'strict'
+    f = BytesIO(input)
+    g = BytesIO()
+    quopri.encode(f, g, 1)
+    return (g.getvalue(), len(input))
+
+def quopri_decode(input, errors='strict'):
+    assert errors == 'strict'
+    f = BytesIO(input)
+    g = BytesIO()
+    quopri.decode(f, g)
+    return (g.getvalue(), len(input))
+
+class Codec(codecs.Codec):
+    def encode(self, input, errors='strict'):
+        return quopri_encode(input, errors)
+    def decode(self, input, errors='strict'):
+        return quopri_decode(input, errors)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+    def encode(self, input, final=False):
+        return quopri_encode(input, self.errors)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+    def decode(self, input, final=False):
+        return quopri_decode(input, self.errors)[0]
+
+class StreamWriter(Codec, codecs.StreamWriter):
+    charbuffertype = bytes
+
+class StreamReader(Codec, codecs.StreamReader):
+    charbuffertype = bytes
+
+# encodings module API
+
+def getregentry():
+    return codecs.CodecInfo(
+        name='quopri',
+        encode=quopri_encode,
+        decode=quopri_decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamwriter=StreamWriter,
+        streamreader=StreamReader,
+    )
diff --git a/Lib/encodings/rot_13.py b/Lib/encodings/rot_13.py
new file mode 100755
index 00000000000..3140c1432dc
--- /dev/null
+++ b/Lib/encodings/rot_13.py
@@ -0,0 +1,113 @@
+#!/usr/bin/env python
+""" Python Character Mapping Codec for ROT13.
+
+This codec de/encodes from str to str and is therefore usable with
+str.transform() and str.untransform().
+
+Written by Marc-Andre Lemburg (mal@lemburg.com).
+"""
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+    def encode(self, input, errors='strict'):
+        return (input.translate(rot13_map), len(input))
+
+    def decode(self, input, errors='strict'):
+        return (input.translate(rot13_map), len(input))
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+    def encode(self, input, final=False):
+        return input.translate(rot13_map)
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+    def decode(self, input, final=False):
+        return input.translate(rot13_map)
+
+class StreamWriter(Codec,codecs.StreamWriter):
+    pass
+
+class StreamReader(Codec,codecs.StreamReader):
+    pass
+
+### encodings module API
+
+def getregentry():
+    return codecs.CodecInfo(
+        name='rot-13',
+        encode=Codec().encode,
+        decode=Codec().decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamwriter=StreamWriter,
+        streamreader=StreamReader,
+    )
+
+### Map
+
+rot13_map = codecs.make_identity_dict(range(256))
+rot13_map.update({
+   0x0041: 0x004e,
+   0x0042: 0x004f,
+   0x0043: 0x0050,
+   0x0044: 0x0051,
+   0x0045: 0x0052,
+   0x0046: 0x0053,
+   0x0047: 0x0054,
+   0x0048: 0x0055,
+   0x0049: 0x0056,
+   0x004a: 0x0057,
+   0x004b: 0x0058,
+   0x004c: 0x0059,
+   0x004d: 0x005a,
+   0x004e: 0x0041,
+   0x004f: 0x0042,
+   0x0050: 0x0043,
+   0x0051: 0x0044,
+   0x0052: 0x0045,
+   0x0053: 0x0046,
+   0x0054: 0x0047,
+   0x0055: 0x0048,
+   0x0056: 0x0049,
+   0x0057: 0x004a,
+   0x0058: 0x004b,
+   0x0059: 0x004c,
+   0x005a: 0x004d,
+   0x0061: 0x006e,
+   0x0062: 0x006f,
+   0x0063: 0x0070,
+   0x0064: 0x0071,
+   0x0065: 0x0072,
+   0x0066: 0x0073,
+   0x0067: 0x0074,
+   0x0068: 0x0075,
+   0x0069: 0x0076,
+   0x006a: 0x0077,
+   0x006b: 0x0078,
+   0x006c: 0x0079,
+   0x006d: 0x007a,
+   0x006e: 0x0061,
+   0x006f: 0x0062,
+   0x0070: 0x0063,
+   0x0071: 0x0064,
+   0x0072: 0x0065,
+   0x0073: 0x0066,
+   0x0074: 0x0067,
+   0x0075: 0x0068,
+   0x0076: 0x0069,
+   0x0077: 0x006a,
+   0x0078: 0x006b,
+   0x0079: 0x006c,
+   0x007a: 0x006d,
+})
+
+### Filter API
+
+def rot13(infile, outfile):
+    outfile.write(infile.read().encode('rot-13'))
+
+if __name__ == '__main__':
+    import sys
+    rot13(sys.stdin, sys.stdout)
diff --git a/Lib/encodings/uu_codec.py b/Lib/encodings/uu_codec.py
new file mode 100644
index 00000000000..69c6f17c7f3
--- /dev/null
+++ b/Lib/encodings/uu_codec.py
@@ -0,0 +1,99 @@
+"""Python 'uu_codec' Codec - UU content transfer encoding.
+
+This codec de/encodes from bytes to bytes and is therefore usable with
+bytes.transform() and bytes.untransform().
+
+Written by Marc-Andre Lemburg (mal@lemburg.com). Some details were
+adapted from uu.py which was written by Lance Ellinghouse and
+modified by Jack Jansen and Fredrik Lundh.
+"""
+
+import codecs
+import binascii
+from io import BytesIO
+
+### Codec APIs
+
+def uu_encode(input, errors='strict', filename='<data>', mode=0o666):
+    assert errors == 'strict'
+    infile = BytesIO(input)
+    outfile = BytesIO()
+    read = infile.read
+    write = outfile.write
+
+    # Encode
+    write(('begin %o %s\n' % (mode & 0o777, filename)).encode('ascii'))
+    chunk = read(45)
+    while chunk:
+        write(binascii.b2a_uu(chunk))
+        chunk = read(45)
+    write(b' \nend\n')
+
+    return (outfile.getvalue(), len(input))
+
+def uu_decode(input, errors='strict'):
+    assert errors == 'strict'
+    infile = BytesIO(input)
+    outfile = BytesIO()
+    readline = infile.readline
+    write = outfile.write
+
+    # Find start of encoded data
+    while 1:
+        s = readline()
+        if not s:
+            raise ValueError('Missing "begin" line in input data')
+        if s[:5] == b'begin':
+            break
+
+    # Decode
+    while True:
+        s = readline()
+        if not s or s == b'end\n':
+            break
+        try:
+            data = binascii.a2b_uu(s)
+        except binascii.Error as v:
+            # Workaround for broken uuencoders by /Fredrik Lundh
+            nbytes = (((ord(s[0])-32) & 63) * 4 + 5) / 3
+            data = binascii.a2b_uu(s[:nbytes])
+            #sys.stderr.write("Warning: %s\n" % str(v))
+        write(data)
+    if not s:
+        raise ValueError('Truncated input data')
+
+    return (outfile.getvalue(), len(input))
+
+class Codec(codecs.Codec):
+    def encode(self, input, errors='strict'):
+        return uu_encode(input, errors)
+
+    def decode(self, input, errors='strict'):
+        return uu_decode(input, errors)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+    def encode(self, input, final=False):
+        return uu_encode(input, self.errors)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+    def decode(self, input, final=False):
+        return uu_decode(input, self.errors)[0]
+
+class StreamWriter(Codec, codecs.StreamWriter):
+    charbuffertype = bytes
+
+class StreamReader(Codec, codecs.StreamReader):
+    charbuffertype = bytes
+
+### encodings module API
+
+def getregentry():
+    return codecs.CodecInfo(
+        name='uu',
+        encode=uu_encode,
+        decode=uu_decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )
diff --git a/Lib/encodings/zlib_codec.py b/Lib/encodings/zlib_codec.py
new file mode 100644
index 00000000000..e0b9cdadbcd
--- /dev/null
+++ b/Lib/encodings/zlib_codec.py
@@ -0,0 +1,77 @@
+"""Python 'zlib_codec' Codec - zlib compression encoding.
+
+This codec de/encodes from bytes to bytes and is therefore usable with
+bytes.transform() and bytes.untransform().
+
+Written by Marc-Andre Lemburg (mal@lemburg.com).
+"""
+
+import codecs
+import zlib # this codec needs the optional zlib module !
+
+### Codec APIs
+
+def zlib_encode(input, errors='strict'):
+    assert errors == 'strict'
+    return (zlib.compress(input), len(input))
+
+def zlib_decode(input, errors='strict'):
+    assert errors == 'strict'
+    return (zlib.decompress(input), len(input))
+
+class Codec(codecs.Codec):
+    def encode(self, input, errors='strict'):
+        return zlib_encode(input, errors)
+    def decode(self, input, errors='strict'):
+        return zlib_decode(input, errors)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+    def __init__(self, errors='strict'):
+        assert errors == 'strict'
+        self.errors = errors
+        self.compressobj = zlib.compressobj()
+
+    def encode(self, input, final=False):
+        if final:
+            c = self.compressobj.compress(input)
+            return c + self.compressobj.flush()
+        else:
+            return self.compressobj.compress(input)
+
+    def reset(self):
+        self.compressobj = zlib.compressobj()
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+    def __init__(self, errors='strict'):
+        assert errors == 'strict'
+        self.errors = errors
+        self.decompressobj = zlib.decompressobj()
+
+    def decode(self, input, final=False):
+        if final:
+            c = self.decompressobj.decompress(input)
+            return c + self.decompressobj.flush()
+        else:
+            return self.decompressobj.decompress(input)
+
+    def reset(self):
+        self.decompressobj = zlib.decompressobj()
+
+class StreamWriter(Codec, codecs.StreamWriter):
+    charbuffertype = bytes
+
+class StreamReader(Codec, codecs.StreamReader):
+    charbuffertype = bytes
+
+### encodings module API
+
+def getregentry():
+    return codecs.CodecInfo(
+        name='zlib',
+        encode=zlib_encode,
+        decode=zlib_decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )
author	Georg Brandl <georg@python.org>	2010-12-02 18:06:51 +0000
committer	Georg Brandl <georg@python.org>	2010-12-02 18:06:51 +0000
commit	02524629f39bb70f4ea00ab8e64d694e08719227 (patch)
tree	de93598f38e1b4d84eaa743b31df1a79a21c957c /Lib/encodings
parent	#10597: fix Py_SetPythonHome docs by pointing to where the meaning of PYTHONH... (diff)
download	cpython-02524629f39bb70f4ea00ab8e64d694e08719227.tar.gz cpython-02524629f39bb70f4ea00ab8e64d694e08719227.tar.bz2 cpython-02524629f39bb70f4ea00ab8e64d694e08719227.zip