diff options
77 files changed, 2547 insertions, 594 deletions
@@ -163,3 +163,6 @@ feeb267ead3e6771d3f2f49b83e1894839f64fb7 release-pypy3.9-v7.3.11 256b3867989dd3e95eb9ce82f23909b62a362a9a release-pypy3.9-v7.3.12rc1 087971d0009803ea854af1f5121f63666f56a37a release-pypy2.7-v7.3.12rc1 633f94e004748e2a56aff28e4699dc21b4fa2219 release-pypy3.10-v7.3.12rc1 +4ac174a992a398e4f42bc48852e2fb218a403baa release-pypy2.7-v7.3.12rc2 +a6c2a04c0d03054c5a9fbc1eca282046a8c5fd70 release-pypy3.9-v7.3.12rc2 +07561e2940ea7838339a8d615f9f314a0e83bbc0 release-pypy3.10-v7.3.12rc2 diff --git a/lib-python/3/cProfile.py b/lib-python/3/cProfile.py index 22a7d0aade..9ae1fb8859 100755 --- a/lib-python/3/cProfile.py +++ b/lib-python/3/cProfile.py @@ -7,6 +7,7 @@ __all__ = ["run", "runctx", "Profile"] import _lsprof +import io import profile as _pyprofile # ____________________________________________________________ @@ -167,7 +168,7 @@ def main(): else: progname = args[0] sys.path.insert(0, os.path.dirname(progname)) - with open(progname, 'rb') as fp: + with io.open_code(progname) as fp: code = compile(fp.read(), progname, 'exec') globs = { '__file__': progname, diff --git a/lib-python/3/ensurepip/__init__.py b/lib-python/3/ensurepip/__init__.py index 981534c4a0..07065c3cb7 100644 --- a/lib-python/3/ensurepip/__init__.py +++ b/lib-python/3/ensurepip/__init__.py @@ -12,7 +12,7 @@ from . import _bundled __all__ = ["version", "bootstrap"] _SETUPTOOLS_VERSION = "58.1.0" -_PIP_VERSION = "22.0.4" +_PIP_VERSION = "23.0.1" _PROJECTS = [ ("setuptools", _SETUPTOOLS_VERSION, "py3"), ("pip", _PIP_VERSION, "py3"), diff --git a/lib-python/3/ensurepip/_bundled/pip-22.0.4-py3-none-any.whl b/lib-python/3/ensurepip/_bundled/pip-22.0.4-py3-none-any.whl Binary files differdeleted file mode 100644 index 7ba048e245..0000000000 --- a/lib-python/3/ensurepip/_bundled/pip-22.0.4-py3-none-any.whl +++ /dev/null diff --git a/lib-python/3/ensurepip/_bundled/pip-23.0.1-py3-none-any.whl b/lib-python/3/ensurepip/_bundled/pip-23.0.1-py3-none-any.whl Binary files differnew file mode 100644 index 0000000000..a855dc40e8 --- /dev/null +++ b/lib-python/3/ensurepip/_bundled/pip-23.0.1-py3-none-any.whl diff --git a/lib-python/3/http/server.py b/lib-python/3/http/server.py index cf8933c3db..969df7335f 100644 --- a/lib-python/3/http/server.py +++ b/lib-python/3/http/server.py @@ -791,7 +791,7 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler): displaypath = urllib.parse.unquote(self.path, errors='surrogatepass') except UnicodeDecodeError: - displaypath = urllib.parse.unquote(path) + displaypath = urllib.parse.unquote(self.path) displaypath = html.escape(displaypath, quote=False) enc = sys.getfilesystemencoding() title = 'Directory listing for %s' % displaypath diff --git a/lib-python/3/profile.py b/lib-python/3/profile.py index d8599fb4ee..90c4e4c9ff 100755 --- a/lib-python/3/profile.py +++ b/lib-python/3/profile.py @@ -24,6 +24,7 @@ # governing permissions and limitations under the License. +import io import sys import time import marshal @@ -587,7 +588,7 @@ def main(): else: progname = args[0] sys.path.insert(0, os.path.dirname(progname)) - with open(progname, 'rb') as fp: + with io.open_code(progname) as fp: code = compile(fp.read(), progname, 'exec') globs = { '__file__': progname, diff --git a/lib-python/3/shutil.py b/lib-python/3/shutil.py index 48a60c0d28..58b8d3b9f9 100644 --- a/lib-python/3/shutil.py +++ b/lib-python/3/shutil.py @@ -1191,7 +1191,7 @@ def _unpack_zipfile(filename, extract_dir): finally: zip.close() -def _unpack_tarfile(filename, extract_dir): +def _unpack_tarfile(filename, extract_dir, *, filter=None): """Unpack tar/tar.gz/tar.bz2/tar.xz `filename` to `extract_dir` """ import tarfile # late import for breaking circular dependency @@ -1201,7 +1201,7 @@ def _unpack_tarfile(filename, extract_dir): raise ReadError( "%s is not a compressed or uncompressed tar file" % filename) try: - tarobj.extractall(extract_dir) + tarobj.extractall(extract_dir, filter=filter) finally: tarobj.close() @@ -1229,7 +1229,7 @@ def _find_unpack_format(filename): return name return None -def unpack_archive(filename, extract_dir=None, format=None): +def unpack_archive(filename, extract_dir=None, format=None, *, filter=None): """Unpack an archive. `filename` is the name of the archive. @@ -1243,6 +1243,9 @@ def unpack_archive(filename, extract_dir=None, format=None): was registered for that extension. In case none is found, a ValueError is raised. + + If `filter` is given, it is passed to the underlying + extraction function. """ sys.audit("shutil.unpack_archive", filename, extract_dir, format) @@ -1252,6 +1255,10 @@ def unpack_archive(filename, extract_dir=None, format=None): extract_dir = os.fspath(extract_dir) filename = os.fspath(filename) + if filter is None: + filter_kwargs = {} + else: + filter_kwargs = {'filter': filter} if format is not None: try: format_info = _UNPACK_FORMATS[format] @@ -1259,7 +1266,7 @@ def unpack_archive(filename, extract_dir=None, format=None): raise ValueError("Unknown unpack format '{0}'".format(format)) from None func = format_info[1] - func(filename, extract_dir, **dict(format_info[2])) + func(filename, extract_dir, **dict(format_info[2]), **filter_kwargs) else: # we need to look at the registered unpackers supported extensions format = _find_unpack_format(filename) @@ -1267,7 +1274,7 @@ def unpack_archive(filename, extract_dir=None, format=None): raise ReadError("Unknown archive format '{0}'".format(filename)) func = _UNPACK_FORMATS[format][1] - kwargs = dict(_UNPACK_FORMATS[format][2]) + kwargs = dict(_UNPACK_FORMATS[format][2]) | filter_kwargs func(filename, extract_dir, **kwargs) diff --git a/lib-python/3/subprocess.py b/lib-python/3/subprocess.py index 52f77c1ded..a9750bd683 100644 --- a/lib-python/3/subprocess.py +++ b/lib-python/3/subprocess.py @@ -1435,7 +1435,23 @@ class Popen(object): if shell: startupinfo.dwFlags |= _winapi.STARTF_USESHOWWINDOW startupinfo.wShowWindow = _winapi.SW_HIDE - comspec = os.environ.get("COMSPEC", "cmd.exe") + if not executable: + # gh-101283: without a fully-qualified path, before Windows + # checks the system directories, it first looks in the + # application directory, and also the current directory if + # NeedCurrentDirectoryForExePathW(ExeName) is true, so try + # to avoid executing unqualified "cmd.exe". + comspec = os.environ.get('ComSpec') + if not comspec: + system_root = os.environ.get('SystemRoot', '') + comspec = os.path.join(system_root, 'System32', 'cmd.exe') + if not os.path.isabs(comspec): + raise FileNotFoundError('shell not found: neither %ComSpec% nor %SystemRoot% is set') + if os.path.isabs(comspec): + executable = comspec + else: + comspec = executable + args = '{} /c "{}"'.format (comspec, args) if cwd is not None: diff --git a/lib-python/3/tarfile.py b/lib-python/3/tarfile.py index 9438b08ae3..b6ad7dbe2a 100644 --- a/lib-python/3/tarfile.py +++ b/lib-python/3/tarfile.py @@ -46,6 +46,7 @@ import time import struct import copy import re +import warnings try: import pwd @@ -71,6 +72,7 @@ __all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError", "ReadError", "ENCODING", "USTAR_FORMAT", "GNU_FORMAT", "PAX_FORMAT", "DEFAULT_FORMAT", "open"] + #--------------------------------------------------------- # tar constants #--------------------------------------------------------- @@ -158,6 +160,8 @@ else: def stn(s, length, encoding, errors): """Convert a string to a null-terminated bytes object. """ + if s is None: + raise ValueError("metadata cannot contain None") s = s.encode(encoding, errors) return s[:length] + (length - len(s)) * NUL @@ -708,9 +712,127 @@ class ExFileObject(io.BufferedReader): super().__init__(fileobj) #class ExFileObject + +#----------------------------- +# extraction filters (PEP 706) +#----------------------------- + +class FilterError(TarError): + pass + +class AbsolutePathError(FilterError): + def __init__(self, tarinfo): + self.tarinfo = tarinfo + super().__init__(f'member {tarinfo.name!r} has an absolute path') + +class OutsideDestinationError(FilterError): + def __init__(self, tarinfo, path): + self.tarinfo = tarinfo + self._path = path + super().__init__(f'{tarinfo.name!r} would be extracted to {path!r}, ' + + 'which is outside the destination') + +class SpecialFileError(FilterError): + def __init__(self, tarinfo): + self.tarinfo = tarinfo + super().__init__(f'{tarinfo.name!r} is a special file') + +class AbsoluteLinkError(FilterError): + def __init__(self, tarinfo): + self.tarinfo = tarinfo + super().__init__(f'{tarinfo.name!r} is a symlink to an absolute path') + +class LinkOutsideDestinationError(FilterError): + def __init__(self, tarinfo, path): + self.tarinfo = tarinfo + self._path = path + super().__init__(f'{tarinfo.name!r} would link to {path!r}, ' + + 'which is outside the destination') + +def _get_filtered_attrs(member, dest_path, for_data=True): + new_attrs = {} + name = member.name + dest_path = os.path.realpath(dest_path) + # Strip leading / (tar's directory separator) from filenames. + # Include os.sep (target OS directory separator) as well. + if name.startswith(('/', os.sep)): + name = new_attrs['name'] = member.path.lstrip('/' + os.sep) + if os.path.isabs(name): + # Path is absolute even after stripping. + # For example, 'C:/foo' on Windows. + raise AbsolutePathError(member) + # Ensure we stay in the destination + target_path = os.path.realpath(os.path.join(dest_path, name)) + if os.path.commonpath([target_path, dest_path]) != dest_path: + raise OutsideDestinationError(member, target_path) + # Limit permissions (no high bits, and go-w) + mode = member.mode + if mode is not None: + # Strip high bits & group/other write bits + mode = mode & 0o755 + if for_data: + # For data, handle permissions & file types + if member.isreg() or member.islnk(): + if not mode & 0o100: + # Clear executable bits if not executable by user + mode &= ~0o111 + # Ensure owner can read & write + mode |= 0o600 + elif member.isdir() or member.issym(): + # Ignore mode for directories & symlinks + mode = None + else: + # Reject special files + raise SpecialFileError(member) + if mode != member.mode: + new_attrs['mode'] = mode + if for_data: + # Ignore ownership for 'data' + if member.uid is not None: + new_attrs['uid'] = None + if member.gid is not None: + new_attrs['gid'] = None + if member.uname is not None: + new_attrs['uname'] = None + if member.gname is not None: + new_attrs['gname'] = None + # Check link destination for 'data' + if member.islnk() or member.issym(): + if os.path.isabs(member.linkname): + raise AbsoluteLinkError(member) + target_path = os.path.realpath(os.path.join(dest_path, member.linkname)) + if os.path.commonpath([target_path, dest_path]) != dest_path: + raise LinkOutsideDestinationError(member, target_path) + return new_attrs + +def fully_trusted_filter(member, dest_path): + return member + +def tar_filter(member, dest_path): + new_attrs = _get_filtered_attrs(member, dest_path, False) + if new_attrs: + return member.replace(**new_attrs, deep=False) + return member + +def data_filter(member, dest_path): + new_attrs = _get_filtered_attrs(member, dest_path, True) + if new_attrs: + return member.replace(**new_attrs, deep=False) + return member + +_NAMED_FILTERS = { + "fully_trusted": fully_trusted_filter, + "tar": tar_filter, + "data": data_filter, +} + #------------------ # Exported Classes #------------------ + +# Sentinel for replace() defaults, meaning "don't change the attribute" +_KEEP = object() + class TarInfo(object): """Informational class which holds the details about an archive member given by a tar header block. @@ -791,12 +913,44 @@ class TarInfo(object): def __repr__(self): return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self)) + def replace(self, *, + name=_KEEP, mtime=_KEEP, mode=_KEEP, linkname=_KEEP, + uid=_KEEP, gid=_KEEP, uname=_KEEP, gname=_KEEP, + deep=True, _KEEP=_KEEP): + """Return a deep copy of self with the given attributes replaced. + """ + if deep: + result = copy.deepcopy(self) + else: + result = copy.copy(self) + if name is not _KEEP: + result.name = name + if mtime is not _KEEP: + result.mtime = mtime + if mode is not _KEEP: + result.mode = mode + if linkname is not _KEEP: + result.linkname = linkname + if uid is not _KEEP: + result.uid = uid + if gid is not _KEEP: + result.gid = gid + if uname is not _KEEP: + result.uname = uname + if gname is not _KEEP: + result.gname = gname + return result + def get_info(self): """Return the TarInfo's attributes as a dictionary. """ + if self.mode is None: + mode = None + else: + mode = self.mode & 0o7777 info = { "name": self.name, - "mode": self.mode & 0o7777, + "mode": mode, "uid": self.uid, "gid": self.gid, "size": self.size, @@ -819,6 +973,9 @@ class TarInfo(object): """Return a tar header as a string of 512 byte blocks. """ info = self.get_info() + for name, value in info.items(): + if value is None: + raise ValueError("%s may not be None" % name) if format == USTAR_FORMAT: return self.create_ustar_header(info, encoding, errors) @@ -949,6 +1106,12 @@ class TarInfo(object): devmajor = stn("", 8, encoding, errors) devminor = stn("", 8, encoding, errors) + # None values in metadata should cause ValueError. + # itn()/stn() do this for all fields except type. + filetype = info.get("type", REGTYPE) + if filetype is None: + raise ValueError("TarInfo.type must not be None") + parts = [ stn(info.get("name", ""), 100, encoding, errors), itn(info.get("mode", 0) & 0o7777, 8, format), @@ -957,7 +1120,7 @@ class TarInfo(object): itn(info.get("size", 0), 12, format), itn(info.get("mtime", 0), 12, format), b" ", # checksum field - info.get("type", REGTYPE), + filetype, stn(info.get("linkname", ""), 100, encoding, errors), info.get("magic", POSIX_MAGIC), stn(info.get("uname", ""), 32, encoding, errors), @@ -1457,6 +1620,8 @@ class TarFile(object): fileobject = ExFileObject # The file-object for extractfile(). + extraction_filter = None # The default filter for extraction. + def __init__(self, name=None, mode="r", fileobj=None, format=None, tarinfo=None, dereference=None, ignore_zeros=None, encoding=None, errors="surrogateescape", pax_headers=None, debug=None, @@ -1926,7 +2091,10 @@ class TarFile(object): members = self for tarinfo in members: if verbose: - _safe_print(stat.filemode(tarinfo.mode)) + if tarinfo.mode is None: + _safe_print("??????????") + else: + _safe_print(stat.filemode(tarinfo.mode)) _safe_print("%s/%s" % (tarinfo.uname or tarinfo.uid, tarinfo.gname or tarinfo.gid)) if tarinfo.ischr() or tarinfo.isblk(): @@ -1934,8 +2102,11 @@ class TarFile(object): ("%d,%d" % (tarinfo.devmajor, tarinfo.devminor))) else: _safe_print("%10d" % tarinfo.size) - _safe_print("%d-%02d-%02d %02d:%02d:%02d" \ - % time.localtime(tarinfo.mtime)[:6]) + if tarinfo.mtime is None: + _safe_print("????-??-?? ??:??:??") + else: + _safe_print("%d-%02d-%02d %02d:%02d:%02d" \ + % time.localtime(tarinfo.mtime)[:6]) _safe_print(tarinfo.name + ("/" if tarinfo.isdir() else "")) @@ -2022,32 +2193,58 @@ class TarFile(object): self.members.append(tarinfo) - def extractall(self, path=".", members=None, *, numeric_owner=False): + def _get_filter_function(self, filter): + if filter is None: + filter = self.extraction_filter + if filter is None: + return fully_trusted_filter + if isinstance(filter, str): + raise TypeError( + 'String names are not supported for ' + + 'TarFile.extraction_filter. Use a function such as ' + + 'tarfile.data_filter directly.') + return filter + if callable(filter): + return filter + try: + return _NAMED_FILTERS[filter] + except KeyError: + raise ValueError(f"filter {filter!r} not found") from None + + def extractall(self, path=".", members=None, *, numeric_owner=False, + filter=None): """Extract all members from the archive to the current working directory and set owner, modification time and permissions on directories afterwards. `path' specifies a different directory to extract to. `members' is optional and must be a subset of the list returned by getmembers(). If `numeric_owner` is True, only the numbers for user/group names are used and not the names. + + The `filter` function will be called on each member just + before extraction. + It can return a changed TarInfo or None to skip the member. + String names of common filters are accepted. """ directories = [] + filter_function = self._get_filter_function(filter) if members is None: members = self - for tarinfo in members: + for member in members: + tarinfo = self._get_extract_tarinfo(member, filter_function, path) + if tarinfo is None: + continue if tarinfo.isdir(): - # Extract directories with a safe mode. + # For directories, delay setting attributes until later, + # since permissions can interfere with extraction and + # extracting contents can reset mtime. directories.append(tarinfo) - tarinfo = copy.copy(tarinfo) - tarinfo.mode = 0o700 - # Do not set_attrs directories, as we will do that further down - self.extract(tarinfo, path, set_attrs=not tarinfo.isdir(), - numeric_owner=numeric_owner) + self._extract_one(tarinfo, path, set_attrs=not tarinfo.isdir(), + numeric_owner=numeric_owner) # Reverse sort directories. - directories.sort(key=lambda a: a.name) - directories.reverse() + directories.sort(key=lambda a: a.name, reverse=True) # Set correct owner, mtime and filemode on directories. for tarinfo in directories: @@ -2057,12 +2254,10 @@ class TarFile(object): self.utime(tarinfo, dirpath) self.chmod(tarinfo, dirpath) except ExtractError as e: - if self.errorlevel > 1: - raise - else: - self._dbg(1, "tarfile: %s" % e) + self._handle_nonfatal_error(e) - def extract(self, member, path="", set_attrs=True, *, numeric_owner=False): + def extract(self, member, path="", set_attrs=True, *, numeric_owner=False, + filter=None): """Extract a member from the archive to the current working directory, using its full name. Its file information is extracted as accurately as possible. `member' may be a filename or a TarInfo object. You can @@ -2070,35 +2265,70 @@ class TarFile(object): mtime, mode) are set unless `set_attrs' is False. If `numeric_owner` is True, only the numbers for user/group names are used and not the names. + + The `filter` function will be called before extraction. + It can return a changed TarInfo or None to skip the member. + String names of common filters are accepted. """ - self._check("r") + filter_function = self._get_filter_function(filter) + tarinfo = self._get_extract_tarinfo(member, filter_function, path) + if tarinfo is not None: + self._extract_one(tarinfo, path, set_attrs, numeric_owner) + def _get_extract_tarinfo(self, member, filter_function, path): + """Get filtered TarInfo (or None) from member, which might be a str""" if isinstance(member, str): tarinfo = self.getmember(member) else: tarinfo = member + unfiltered = tarinfo + try: + tarinfo = filter_function(tarinfo, path) + except (OSError, FilterError) as e: + self._handle_fatal_error(e) + except ExtractError as e: + self._handle_nonfatal_error(e) + if tarinfo is None: + self._dbg(2, "tarfile: Excluded %r" % unfiltered.name) + return None # Prepare the link target for makelink(). if tarinfo.islnk(): + tarinfo = copy.copy(tarinfo) tarinfo._link_target = os.path.join(path, tarinfo.linkname) + return tarinfo + + def _extract_one(self, tarinfo, path, set_attrs, numeric_owner): + """Extract from filtered tarinfo to disk""" + self._check("r") try: self._extract_member(tarinfo, os.path.join(path, tarinfo.name), set_attrs=set_attrs, numeric_owner=numeric_owner) except OSError as e: - if self.errorlevel > 0: - raise - else: - if e.filename is None: - self._dbg(1, "tarfile: %s" % e.strerror) - else: - self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename)) + self._handle_fatal_error(e) except ExtractError as e: - if self.errorlevel > 1: - raise + self._handle_nonfatal_error(e) + + def _handle_nonfatal_error(self, e): + """Handle non-fatal error (ExtractError) according to errorlevel""" + if self.errorlevel > 1: + raise + else: + self._dbg(1, "tarfile: %s" % e) + + def _handle_fatal_error(self, e): + """Handle "fatal" error according to self.errorlevel""" + if self.errorlevel > 0: + raise + elif isinstance(e, OSError): + if e.filename is None: + self._dbg(1, "tarfile: %s" % e.strerror) else: - self._dbg(1, "tarfile: %s" % e) + self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename)) + else: + self._dbg(1, "tarfile: %s %s" % (type(e).__name__, e)) def extractfile(self, member): """Extract a member from the archive as a file object. `member' may be @@ -2185,9 +2415,13 @@ class TarFile(object): """Make a directory called targetpath. """ try: - # Use a safe mode for the directory, the real mode is set - # later in _extract_member(). - os.mkdir(targetpath, 0o700) + if tarinfo.mode is None: + # Use the system's default mode + os.mkdir(targetpath) + else: + # Use a safe mode for the directory, the real mode is set + # later in _extract_member(). + os.mkdir(targetpath, 0o700) except FileExistsError: pass @@ -2230,6 +2464,9 @@ class TarFile(object): raise ExtractError("special devices not supported by system") mode = tarinfo.mode + if mode is None: + # Use mknod's default + mode = 0o600 if tarinfo.isblk(): mode |= stat.S_IFBLK else: @@ -2251,7 +2488,6 @@ class TarFile(object): os.unlink(targetpath) os.symlink(tarinfo.linkname, targetpath) else: - # See extract(). if os.path.exists(tarinfo._link_target): os.link(tarinfo._link_target, targetpath) else: @@ -2276,15 +2512,19 @@ class TarFile(object): u = tarinfo.uid if not numeric_owner: try: - if grp: + if grp and tarinfo.gname: g = grp.getgrnam(tarinfo.gname)[2] except KeyError: pass try: - if pwd: + if pwd and tarinfo.uname: u = pwd.getpwnam(tarinfo.uname)[2] except KeyError: pass + if g is None: + g = -1 + if u is None: + u = -1 try: if tarinfo.issym() and hasattr(os, "lchown"): os.lchown(targetpath, u, g) @@ -2296,6 +2536,8 @@ class TarFile(object): def chmod(self, tarinfo, targetpath): """Set file permissions of targetpath according to tarinfo. """ + if tarinfo.mode is None: + return try: os.chmod(targetpath, tarinfo.mode) except OSError: @@ -2304,10 +2546,13 @@ class TarFile(object): def utime(self, tarinfo, targetpath): """Set modification time of targetpath according to tarinfo. """ + mtime = tarinfo.mtime + if mtime is None: + return if not hasattr(os, 'utime'): return try: - os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime)) + os.utime(targetpath, (mtime, mtime)) except OSError: raise ExtractError("could not change modification time") @@ -2383,13 +2628,26 @@ class TarFile(object): members = self.getmembers() # Limit the member search list up to tarinfo. + skipping = False if tarinfo is not None: - members = members[:members.index(tarinfo)] + try: + index = members.index(tarinfo) + except ValueError: + # The given starting point might be a (modified) copy. + # We'll later skip members until we find an equivalent. + skipping = True + else: + # Happy fast path + members = members[:index] if normalize: name = os.path.normpath(name) for member in reversed(members): + if skipping: + if tarinfo.offset == member.offset: + skipping = False + continue if normalize: member_name = os.path.normpath(member.name) else: @@ -2398,6 +2656,10 @@ class TarFile(object): if name == member_name: return member + if skipping: + # Starting point was not found + raise ValueError(tarinfo) + def _load(self): """Read through the entire archive file and look for readable members. @@ -2490,6 +2752,7 @@ class TarFile(object): #-------------------- # exported functions #-------------------- + def is_tarfile(name): """Return True if name points to a tar archive that we are able to handle, else return False. @@ -2516,6 +2779,10 @@ def main(): parser = argparse.ArgumentParser(description=description) parser.add_argument('-v', '--verbose', action='store_true', default=False, help='Verbose output') + parser.add_argument('--filter', metavar='<filtername>', + choices=_NAMED_FILTERS, + help='Filter for extraction') + group = parser.add_mutually_exclusive_group(required=True) group.add_argument('-l', '--list', metavar='<tarfile>', help='Show listing of a tarfile') @@ -2527,8 +2794,12 @@ def main(): help='Create tarfile from sources') group.add_argument('-t', '--test', metavar='<tarfile>', help='Test if a tarfile is valid') + args = parser.parse_args() + if args.filter and args.extract is None: + parser.exit(1, '--filter is only valid for extraction\n') + if args.test is not None: src = args.test if is_tarfile(src): @@ -2559,7 +2830,7 @@ def main(): if is_tarfile(src): with TarFile.open(src, 'r:*') as tf: - tf.extractall(path=curdir) + tf.extractall(path=curdir, filter=args.filter) if args.verbose: if curdir == '.': msg = '{!r} file is extracted.'.format(src) diff --git a/lib-python/3/test/test_httpservers.py b/lib-python/3/test/test_httpservers.py index c0e1456218..cc46df56d1 100644 --- a/lib-python/3/test/test_httpservers.py +++ b/lib-python/3/test/test_httpservers.py @@ -417,6 +417,14 @@ class SimpleHTTPServerTestCase(BaseTestCase): self.check_status_and_reason(response, HTTPStatus.OK, data=support.TESTFN_UNDECODABLE) + def test_undecodable_parameter(self): + # sanity check using a valid parameter + response = self.request(self.base_url + '/?x=123').read() + self.assertRegex(response, f'listing for {self.base_url}/\?x=123'.encode('latin1')) + # now the bogus encoding + response = self.request(self.base_url + '/?x=%bb').read() + self.assertRegex(response, f'listing for {self.base_url}/\?x=\xef\xbf\xbd'.encode('latin1')) + def test_get_dir_redirect_location_domain_injection_bug(self): """Ensure //evil.co/..%2f../../X does not put //evil.co/ in Location. diff --git a/lib-python/3/test/test_shutil.py b/lib-python/3/test/test_shutil.py index 3890df93bb..9041e7aa36 100644 --- a/lib-python/3/test/test_shutil.py +++ b/lib-python/3/test/test_shutil.py @@ -31,6 +31,7 @@ except ImportError: from test import support from test.support import TESTFN, FakePath +from test.support import warnings_helper TESTFN2 = TESTFN + "2" TESTFN_SRC = TESTFN + "_SRC" @@ -1578,12 +1579,14 @@ class TestArchives(BaseTest, unittest.TestCase): ### shutil.unpack_archive - def check_unpack_archive(self, format): - self.check_unpack_archive_with_converter(format, lambda path: path) - self.check_unpack_archive_with_converter(format, pathlib.Path) - self.check_unpack_archive_with_converter(format, FakePath) + def check_unpack_archive(self, format, **kwargs): + self.check_unpack_archive_with_converter( + format, lambda path: path, **kwargs) + self.check_unpack_archive_with_converter( + format, pathlib.Path, **kwargs) + self.check_unpack_archive_with_converter(format, FakePath, **kwargs) - def check_unpack_archive_with_converter(self, format, converter): + def check_unpack_archive_with_converter(self, format, converter, **kwargs): root_dir, base_dir = self._create_files() expected = rlistdir(root_dir) expected.remove('outer') @@ -1593,36 +1596,47 @@ class TestArchives(BaseTest, unittest.TestCase): # let's try to unpack it now tmpdir2 = self.mkdtemp() - unpack_archive(converter(filename), converter(tmpdir2)) + unpack_archive(converter(filename), converter(tmpdir2), **kwargs) self.assertEqual(rlistdir(tmpdir2), expected) # and again, this time with the format specified tmpdir3 = self.mkdtemp() - unpack_archive(converter(filename), converter(tmpdir3), format=format) + unpack_archive(converter(filename), converter(tmpdir3), format=format, + **kwargs) self.assertEqual(rlistdir(tmpdir3), expected) - self.assertRaises(shutil.ReadError, unpack_archive, converter(TESTFN)) - self.assertRaises(ValueError, unpack_archive, converter(TESTFN), format='xxx') + with self.assertRaises(shutil.ReadError): + unpack_archive(converter(TESTFN), **kwargs) + with self.assertRaises(ValueError): + unpack_archive(converter(TESTFN), format='xxx', **kwargs) + + def check_unpack_tarball(self, format): + self.check_unpack_archive(format, filter='fully_trusted') + self.check_unpack_archive(format, filter='data') + with warnings_helper.check_no_warnings(self): + self.check_unpack_archive(format) def test_unpack_archive_tar(self): - self.check_unpack_archive('tar') + self.check_unpack_tarball('tar') @support.requires_zlib() def test_unpack_archive_gztar(self): - self.check_unpack_archive('gztar') + self.check_unpack_tarball('gztar') @support.requires_bz2() def test_unpack_archive_bztar(self): - self.check_unpack_archive('bztar') + self.check_unpack_tarball('bztar') @support.requires_lzma() @unittest.skipIf(AIX and not _maxdataOK(), "AIX MAXDATA must be 0x20000000 or larger") def test_unpack_archive_xztar(self): - self.check_unpack_archive('xztar') + self.check_unpack_tarball('xztar') @support.requires_zlib() def test_unpack_archive_zip(self): self.check_unpack_archive('zip') + with self.assertRaises(TypeError): + self.check_unpack_archive('zip', filter='data') def test_unpack_registry(self): diff --git a/lib-python/3/test/test_tarfile.py b/lib-python/3/test/test_tarfile.py index df634bcdc9..67ab60961c 100644 --- a/lib-python/3/test/test_tarfile.py +++ b/lib-python/3/test/test_tarfile.py @@ -5,6 +5,10 @@ from hashlib import sha256 from contextlib import contextmanager from random import Random import pathlib +import shutil +import re +import warnings +import stat import unittest import unittest.mock @@ -12,6 +16,7 @@ import tarfile from test import support from test.support import script_helper +from test.support import warnings_helper # Check for our compression modules. try: @@ -2386,7 +2391,12 @@ class MiscTest(unittest.TestCase): 'EmptyHeaderError', 'TruncatedHeaderError', 'EOFHeaderError', 'InvalidHeaderError', 'SubsequentHeaderError', 'ExFileObject', - 'main'} + 'main', + 'fully_trusted_filter', 'data_filter', + 'tar_filter', 'FilterError', 'AbsoluteLinkError', + 'OutsideDestinationError', 'SpecialFileError', + 'AbsolutePathError', 'LinkOutsideDestinationError', + } support.check__all__(self, tarfile, blacklist=blacklist) @@ -2409,6 +2419,15 @@ class CommandLineTest(unittest.TestCase): for tardata in files: tf.add(tardata, arcname=os.path.basename(tardata)) + def make_evil_tarfile(self, tar_name): + files = [support.findfile('tokenize_tests.txt')] + self.addCleanup(support.unlink, tar_name) + with tarfile.open(tar_name, 'w') as tf: + benign = tarfile.TarInfo('benign') + tf.addfile(benign, fileobj=io.BytesIO(b'')) + evil = tarfile.TarInfo('../evil') + tf.addfile(evil, fileobj=io.BytesIO(b'')) + def test_bad_use(self): rc, out, err = self.tarfilecmd_failure() self.assertEqual(out, b'') @@ -2565,6 +2584,25 @@ class CommandLineTest(unittest.TestCase): finally: support.rmtree(tarextdir) + def test_extract_command_filter(self): + self.make_evil_tarfile(tmpname) + # Make an inner directory, so the member named '../evil' + # is still extracted into `tarextdir` + destdir = os.path.join(tarextdir, 'dest') + os.mkdir(tarextdir) + try: + with support.temp_cwd(destdir): + self.tarfilecmd_failure('-e', tmpname, + '-v', + '--filter', 'data') + out = self.tarfilecmd('-e', tmpname, + '-v', + '--filter', 'fully_trusted', + PYTHONIOENCODING='utf-8') + self.assertIn(b' file is extracted.', out) + finally: + support.rmtree(tarextdir) + def test_extract_command_different_directory(self): self.make_simple_tarfile(tmpname) try: @@ -2838,6 +2876,893 @@ class NumericOwnerTest(unittest.TestCase): tarfl.extract, filename_1, TEMPDIR, False, True) +class ReplaceTests(ReadTest, unittest.TestCase): + def test_replace_name(self): + member = self.tar.getmember('ustar/regtype') + replaced = member.replace(name='misc/other') + self.assertEqual(replaced.name, 'misc/other') + self.assertEqual(member.name, 'ustar/regtype') + self.assertEqual(self.tar.getmember('ustar/regtype').name, + 'ustar/regtype') + + def test_replace_deep(self): + member = self.tar.getmember('pax/regtype1') + replaced = member.replace() + replaced.pax_headers['gname'] = 'not-bar' + self.assertEqual(member.pax_headers['gname'], 'bar') + self.assertEqual( + self.tar.getmember('pax/regtype1').pax_headers['gname'], 'bar') + + def test_replace_shallow(self): + member = self.tar.getmember('pax/regtype1') + replaced = member.replace(deep=False) + replaced.pax_headers['gname'] = 'not-bar' + self.assertEqual(member.pax_headers['gname'], 'not-bar') + self.assertEqual( + self.tar.getmember('pax/regtype1').pax_headers['gname'], 'not-bar') + + def test_replace_all(self): + member = self.tar.getmember('ustar/regtype') + for attr_name in ('name', 'mtime', 'mode', 'linkname', + 'uid', 'gid', 'uname', 'gname'): + with self.subTest(attr_name=attr_name): + replaced = member.replace(**{attr_name: None}) + self.assertEqual(getattr(replaced, attr_name), None) + self.assertNotEqual(getattr(member, attr_name), None) + + def test_replace_internal(self): + member = self.tar.getmember('ustar/regtype') + with self.assertRaises(TypeError): + member.replace(offset=123456789) + + +class NoneInfoExtractTests(ReadTest): + # These mainly check that all kinds of members are extracted successfully + # if some metadata is None. + # Some of the methods do additional spot checks. + + # We also test that the default filters can deal with None. + + extraction_filter = None + + @classmethod + def setUpClass(cls): + tar = tarfile.open(tarname, mode='r', encoding="iso8859-1") + cls.control_dir = pathlib.Path(TEMPDIR) / "extractall_ctrl" + tar.errorlevel = 0 + tar.extractall(cls.control_dir, filter=cls.extraction_filter) + tar.close() + cls.control_paths = set( + p.relative_to(cls.control_dir) + for p in pathlib.Path(cls.control_dir).glob('**/*')) + + @classmethod + def tearDownClass(cls): + shutil.rmtree(cls.control_dir) + + def check_files_present(self, directory): + got_paths = set( + p.relative_to(directory) + for p in pathlib.Path(directory).glob('**/*')) + self.assertEqual(self.control_paths, got_paths) + + @contextmanager + def extract_with_none(self, *attr_names): + DIR = pathlib.Path(TEMPDIR) / "extractall_none" + self.tar.errorlevel = 0 + for member in self.tar.getmembers(): + for attr_name in attr_names: + setattr(member, attr_name, None) + with support.temp_dir(DIR): + self.tar.extractall(DIR, filter='fully_trusted') + self.check_files_present(DIR) + yield DIR + + def test_extractall_none_mtime(self): + # mtimes of extracted files should be later than 'now' -- the mtime + # of a previously created directory. + now = pathlib.Path(TEMPDIR).stat().st_mtime + with self.extract_with_none('mtime') as DIR: + for path in pathlib.Path(DIR).glob('**/*'): + with self.subTest(path=path): + try: + mtime = path.stat().st_mtime + except OSError: + # Some systems can't stat symlinks, ignore those + if not path.is_symlink(): + raise + else: + self.assertGreaterEqual(path.stat().st_mtime, now) + + def test_extractall_none_mode(self): + # modes of directories and regular files should match the mode + # of a "normally" created directory or regular file + dir_mode = pathlib.Path(TEMPDIR).stat().st_mode + regular_file = pathlib.Path(TEMPDIR) / 'regular_file' + regular_file.write_text('') + regular_file_mode = regular_file.stat().st_mode + with self.extract_with_none('mode') as DIR: + for path in pathlib.Path(DIR).glob('**/*'): + with self.subTest(path=path): + if path.is_dir(): + self.assertEqual(path.stat().st_mode, dir_mode) + elif path.is_file(): + self.assertEqual(path.stat().st_mode, + regular_file_mode) + + def test_extractall_none_uid(self): + with self.extract_with_none('uid'): + pass + + def test_extractall_none_gid(self): + with self.extract_with_none('gid'): + pass + + def test_extractall_none_uname(self): + with self.extract_with_none('uname'): + pass + + def test_extractall_none_gname(self): + with self.extract_with_none('gname'): + pass + + def test_extractall_none_ownership(self): + with self.extract_with_none('uid', 'gid', 'uname', 'gname'): + pass + +class NoneInfoExtractTests_Data(NoneInfoExtractTests, unittest.TestCase): + extraction_filter = 'data' + +class NoneInfoExtractTests_FullyTrusted(NoneInfoExtractTests, + unittest.TestCase): + extraction_filter = 'fully_trusted' + +class NoneInfoExtractTests_Tar(NoneInfoExtractTests, unittest.TestCase): + extraction_filter = 'tar' + +class NoneInfoExtractTests_Default(NoneInfoExtractTests, + unittest.TestCase): + extraction_filter = None + +class NoneInfoTests_Misc(unittest.TestCase): + def test_add(self): + # When addfile() encounters None metadata, it raises a ValueError + bio = io.BytesIO() + for tarformat in (tarfile.USTAR_FORMAT, tarfile.GNU_FORMAT, + tarfile.PAX_FORMAT): + with self.subTest(tarformat=tarformat): + tar = tarfile.open(fileobj=bio, mode='w', format=tarformat) + tarinfo = tar.gettarinfo(tarname) + try: + tar.addfile(tarinfo) + except Exception: + if tarformat == tarfile.USTAR_FORMAT: + # In the old, limited format, adding might fail for + # reasons like the UID being too large + pass + else: + raise + else: + for attr_name in ('mtime', 'mode', 'uid', 'gid', + 'uname', 'gname'): + with self.subTest(attr_name=attr_name): + replaced = tarinfo.replace(**{attr_name: None}) + with self.assertRaisesRegex(ValueError, + f"{attr_name}"): + tar.addfile(replaced) + + def test_list(self): + # Change some metadata to None, then compare list() output + # word-for-word. We want list() to not raise, and to only change + # printout for the affected piece of metadata. + # (n.b.: some contents of the test archive are hardcoded.) + for attr_names in ({'mtime'}, {'mode'}, {'uid'}, {'gid'}, + {'uname'}, {'gname'}, + {'uid', 'uname'}, {'gid', 'gname'}): + with (self.subTest(attr_names=attr_names), + tarfile.open(tarname, encoding="iso8859-1") as tar): + tio_prev = io.TextIOWrapper(io.BytesIO(), 'ascii', newline='\n') + with support.swap_attr(sys, 'stdout', tio_prev): + tar.list() + for member in tar.getmembers(): + for attr_name in attr_names: + setattr(member, attr_name, None) + tio_new = io.TextIOWrapper(io.BytesIO(), 'ascii', newline='\n') + with support.swap_attr(sys, 'stdout', tio_new): + tar.list() + for expected, got in zip(tio_prev.detach().getvalue().split(), + tio_new.detach().getvalue().split()): + if attr_names == {'mtime'} and re.match(rb'2003-01-\d\d', expected): + self.assertEqual(got, b'????-??-??') + elif attr_names == {'mtime'} and re.match(rb'\d\d:\d\d:\d\d', expected): + self.assertEqual(got, b'??:??:??') + elif attr_names == {'mode'} and re.match( + rb'.([r-][w-][x-]){3}', expected): + self.assertEqual(got, b'??????????') + elif attr_names == {'uname'} and expected.startswith( + (b'tarfile/', b'lars/', b'foo/')): + exp_user, exp_group = expected.split(b'/') + got_user, got_group = got.split(b'/') + self.assertEqual(got_group, exp_group) + self.assertRegex(got_user, b'[0-9]+') + elif attr_names == {'gname'} and expected.endswith( + (b'/tarfile', b'/users', b'/bar')): + exp_user, exp_group = expected.split(b'/') + got_user, got_group = got.split(b'/') + self.assertEqual(got_user, exp_user) + self.assertRegex(got_group, b'[0-9]+') + elif attr_names == {'uid'} and expected.startswith( + (b'1000/')): + exp_user, exp_group = expected.split(b'/') + got_user, got_group = got.split(b'/') + self.assertEqual(got_group, exp_group) + self.assertEqual(got_user, b'None') + elif attr_names == {'gid'} and expected.endswith((b'/100')): + exp_user, exp_group = expected.split(b'/') + got_user, got_group = got.split(b'/') + self.assertEqual(got_user, exp_user) + self.assertEqual(got_group, b'None') + elif attr_names == {'uid', 'uname'} and expected.startswith( + (b'tarfile/', b'lars/', b'foo/', b'1000/')): + exp_user, exp_group = expected.split(b'/') + got_user, got_group = got.split(b'/') + self.assertEqual(got_group, exp_group) + self.assertEqual(got_user, b'None') + elif attr_names == {'gname', 'gid'} and expected.endswith( + (b'/tarfile', b'/users', b'/bar', b'/100')): + exp_user, exp_group = expected.split(b'/') + got_user, got_group = got.split(b'/') + self.assertEqual(got_user, exp_user) + self.assertEqual(got_group, b'None') + else: + # In other cases the output should be the same + self.assertEqual(expected, got) + +def _filemode_to_int(mode): + """Inverse of `stat.filemode` (for permission bits) + + Using mode strings rather than numbers makes the later tests more readable. + """ + str_mode = mode[1:] + result = ( + {'r': stat.S_IRUSR, '-': 0}[str_mode[0]] + | {'w': stat.S_IWUSR, '-': 0}[str_mode[1]] + | {'x': stat.S_IXUSR, '-': 0, + 's': stat.S_IXUSR | stat.S_ISUID, + 'S': stat.S_ISUID}[str_mode[2]] + | {'r': stat.S_IRGRP, '-': 0}[str_mode[3]] + | {'w': stat.S_IWGRP, '-': 0}[str_mode[4]] + | {'x': stat.S_IXGRP, '-': 0, + 's': stat.S_IXGRP | stat.S_ISGID, + 'S': stat.S_ISGID}[str_mode[5]] + | {'r': stat.S_IROTH, '-': 0}[str_mode[6]] + | {'w': stat.S_IWOTH, '-': 0}[str_mode[7]] + | {'x': stat.S_IXOTH, '-': 0, + 't': stat.S_IXOTH | stat.S_ISVTX, + 'T': stat.S_ISVTX}[str_mode[8]] + ) + # check we did this right + assert stat.filemode(result)[1:] == mode[1:] + + return result + +class ArchiveMaker: + """Helper to create a tar file with specific contents + + Usage: + + with ArchiveMaker() as t: + t.add('filename', ...) + + with t.open() as tar: + ... # `tar` is now a TarFile with 'filename' in it! + """ + def __init__(self): + self.bio = io.BytesIO() + + def __enter__(self): + self.tar_w = tarfile.TarFile(mode='w', fileobj=self.bio) + return self + + def __exit__(self, *exc): + self.tar_w.close() + self.contents = self.bio.getvalue() + self.bio = None + + def add(self, name, *, type=None, symlink_to=None, hardlink_to=None, + mode=None, **kwargs): + """Add a member to the test archive. Call within `with`.""" + name = str(name) + tarinfo = tarfile.TarInfo(name).replace(**kwargs) + if mode: + tarinfo.mode = _filemode_to_int(mode) + if symlink_to is not None: + type = tarfile.SYMTYPE + tarinfo.linkname = str(symlink_to) + if hardlink_to is not None: + type = tarfile.LNKTYPE + tarinfo.linkname = str(hardlink_to) + if name.endswith('/') and type is None: + type = tarfile.DIRTYPE + if type is not None: + tarinfo.type = type + if tarinfo.isreg(): + fileobj = io.BytesIO(bytes(tarinfo.size)) + else: + fileobj = None + self.tar_w.addfile(tarinfo, fileobj) + + def open(self, **kwargs): + """Open the resulting archive as TarFile. Call after `with`.""" + bio = io.BytesIO(self.contents) + return tarfile.open(fileobj=bio, **kwargs) + + +class TestExtractionFilters(unittest.TestCase): + + # A temporary directory for the extraction results. + # All files that "escape" the destination path should still end + # up in this directory. + outerdir = pathlib.Path(TEMPDIR) / 'outerdir' + + # The destination for the extraction, within `outerdir` + destdir = outerdir / 'dest' + + @contextmanager + def check_context(self, tar, filter): + """Extracts `tar` to `self.destdir` and allows checking the result + + If an error occurs, it must be checked using `expect_exception` + + Otherwise, all resulting files must be checked using `expect_file`, + except the destination directory itself and parent directories of + other files. + When checking directories, do so before their contents. + """ + with support.temp_dir(self.outerdir): + try: + tar.extractall(self.destdir, filter=filter) + except Exception as exc: + self.raised_exception = exc + self.expected_paths = set() + else: + self.raised_exception = None + self.expected_paths = set(self.outerdir.glob('**/*')) + self.expected_paths.discard(self.destdir) + try: + yield + finally: + tar.close() + if self.raised_exception: + raise self.raised_exception + self.assertEqual(self.expected_paths, set()) + + def expect_file(self, name, type=None, symlink_to=None, mode=None): + """Check a single file. See check_context.""" + if self.raised_exception: + raise self.raised_exception + # use normpath() rather than resolve() so we don't follow symlinks + path = pathlib.Path(os.path.normpath(self.destdir / name)) + self.assertIn(path, self.expected_paths) + self.expected_paths.remove(path) + + # When checking mode, ignore Windows (which can only set user read and + # user write bits). Newer versions of Python use `os_helper.can_chmod()` + # instead of hardcoding Windows. + if mode is not None and sys.platform != 'win32': + got = stat.filemode(stat.S_IMODE(path.stat().st_mode)) + self.assertEqual(got, mode) + + if type is None and isinstance(name, str) and name.endswith('/'): + type = tarfile.DIRTYPE + if symlink_to is not None: + got = (self.destdir / name).readlink() + expected = pathlib.Path(symlink_to) + # The symlink might be the same (textually) as what we expect, + # but some systems change the link to an equivalent path, so + # we fall back to samefile(). + if expected != got: + self.assertTrue(got.samefile(expected)) + elif type == tarfile.REGTYPE or type is None: + self.assertTrue(path.is_file()) + elif type == tarfile.DIRTYPE: + self.assertTrue(path.is_dir()) + elif type == tarfile.FIFOTYPE: + self.assertTrue(path.is_fifo()) + else: + raise NotImplementedError(type) + for parent in path.parents: + self.expected_paths.discard(parent) + + def expect_exception(self, exc_type, message_re='.'): + with self.assertRaisesRegex(exc_type, message_re): + if self.raised_exception is not None: + raise self.raised_exception + self.raised_exception = None + + def test_benign_file(self): + with ArchiveMaker() as arc: + arc.add('benign.txt') + for filter in 'fully_trusted', 'tar', 'data': + with self.check_context(arc.open(), filter): + self.expect_file('benign.txt') + + def test_absolute(self): + # Test handling a member with an absolute path + # Inspired by 'absolute1' in https://github.com/jwilk/traversal-archives + with ArchiveMaker() as arc: + arc.add(self.outerdir / 'escaped.evil') + + with self.check_context(arc.open(), 'fully_trusted'): + self.expect_file('../escaped.evil') + + for filter in 'tar', 'data': + with self.check_context(arc.open(), filter): + if str(self.outerdir).startswith('/'): + # We strip leading slashes, as e.g. GNU tar does + # (without --absolute-filenames). + outerdir_stripped = str(self.outerdir).lstrip('/') + self.expect_file(f'{outerdir_stripped}/escaped.evil') + else: + # On this system, absolute paths don't have leading + # slashes. + # So, there's nothing to strip. We refuse to unpack + # to an absolute path, nonetheless. + self.expect_exception( + tarfile.AbsolutePathError, + """['"].*escaped.evil['"] has an absolute path""") + + def test_parent_symlink(self): + # Test interplaying symlinks + # Inspired by 'dirsymlink2a' in jwilk/traversal-archives + with ArchiveMaker() as arc: + arc.add('current', symlink_to='.') + arc.add('parent', symlink_to='current/..') + arc.add('parent/evil') + + if support.can_symlink(): + with self.check_context(arc.open(), 'fully_trusted'): + if self.raised_exception is not None: + # Windows will refuse to create a file that's a symlink to itself + # (and tarfile doesn't swallow that exception) + self.expect_exception(FileExistsError) + # The other cases will fail with this error too. + # Skip the rest of this test. + return + else: + self.expect_file('current', symlink_to='.') + self.expect_file('parent', symlink_to='current/..') + self.expect_file('../evil') + + with self.check_context(arc.open(), 'tar'): + self.expect_exception( + tarfile.OutsideDestinationError, + """'parent/evil' would be extracted to ['"].*evil['"], """ + + "which is outside the destination") + + with self.check_context(arc.open(), 'data'): + self.expect_exception( + tarfile.LinkOutsideDestinationError, + """'parent' would link to ['"].*outerdir['"], """ + + "which is outside the destination") + + else: + # No symlink support. The symlinks are ignored. + with self.check_context(arc.open(), 'fully_trusted'): + self.expect_file('parent/evil') + with self.check_context(arc.open(), 'tar'): + self.expect_file('parent/evil') + with self.check_context(arc.open(), 'data'): + self.expect_file('parent/evil') + + def test_parent_symlink2(self): + # Test interplaying symlinks + # Inspired by 'dirsymlink2b' in jwilk/traversal-archives + with ArchiveMaker() as arc: + arc.add('current', symlink_to='.') + arc.add('current/parent', symlink_to='..') + arc.add('parent/evil') + + with self.check_context(arc.open(), 'fully_trusted'): + if support.can_symlink(): + self.expect_file('current', symlink_to='.') + self.expect_file('parent', symlink_to='..') + self.expect_file('../evil') + else: + self.expect_file('current/') + self.expect_file('parent/evil') + + with self.check_context(arc.open(), 'tar'): + if support.can_symlink(): + self.expect_exception( + tarfile.OutsideDestinationError, + "'parent/evil' would be extracted to " + + """['"].*evil['"], which is outside """ + + "the destination") + else: + self.expect_file('current/') + self.expect_file('parent/evil') + + with self.check_context(arc.open(), 'data'): + self.expect_exception( + tarfile.LinkOutsideDestinationError, + """'current/parent' would link to ['"].*['"], """ + + "which is outside the destination") + + def test_absolute_symlink(self): + # Test symlink to an absolute path + # Inspired by 'dirsymlink' in jwilk/traversal-archives + with ArchiveMaker() as arc: + arc.add('parent', symlink_to=self.outerdir) + arc.add('parent/evil') + + with self.check_context(arc.open(), 'fully_trusted'): + if support.can_symlink(): + self.expect_file('parent', symlink_to=self.outerdir) + self.expect_file('../evil') + else: + self.expect_file('parent/evil') + + with self.check_context(arc.open(), 'tar'): + if support.can_symlink(): + self.expect_exception( + tarfile.OutsideDestinationError, + "'parent/evil' would be extracted to " + + """['"].*evil['"], which is outside """ + + "the destination") + else: + self.expect_file('parent/evil') + + with self.check_context(arc.open(), 'data'): + self.expect_exception( + tarfile.AbsoluteLinkError, + "'parent' is a symlink to an absolute path") + + def test_sly_relative0(self): + # Inspired by 'relative0' in jwilk/traversal-archives + with ArchiveMaker() as arc: + arc.add('../moo', symlink_to='..//tmp/moo') + + try: + with self.check_context(arc.open(), filter='fully_trusted'): + if support.can_symlink(): + if isinstance(self.raised_exception, FileExistsError): + # XXX TarFile happens to fail creating a parent + # directory. + # This might be a bug, but fixing it would hurt + # security. + # Note that e.g. GNU `tar` rejects '..' components, + # so you could argue this is an invalid archive and we + # just raise an bad type of exception. + self.expect_exception(FileExistsError) + else: + self.expect_file('../moo', symlink_to='..//tmp/moo') + else: + # The symlink can't be extracted and is ignored + pass + except FileExistsError: + pass + + for filter in 'tar', 'data': + with self.check_context(arc.open(), filter): + self.expect_exception( + tarfile.OutsideDestinationError, + "'../moo' would be extracted to " + + "'.*moo', which is outside " + + "the destination") + + def test_sly_relative2(self): + # Inspired by 'relative2' in jwilk/traversal-archives + with ArchiveMaker() as arc: + arc.add('tmp/') + arc.add('tmp/../../moo', symlink_to='tmp/../..//tmp/moo') + + with self.check_context(arc.open(), 'fully_trusted'): + self.expect_file('tmp', type=tarfile.DIRTYPE) + if support.can_symlink(): + self.expect_file('../moo', symlink_to='tmp/../../tmp/moo') + + for filter in 'tar', 'data': + with self.check_context(arc.open(), filter): + self.expect_exception( + tarfile.OutsideDestinationError, + "'tmp/../../moo' would be extracted to " + + """['"].*moo['"], which is outside the """ + + "destination") + + def test_modes(self): + # Test how file modes are extracted + # (Note that the modes are ignored on platforms without working chmod) + with ArchiveMaker() as arc: + arc.add('all_bits', mode='?rwsrwsrwt') + arc.add('perm_bits', mode='?rwxrwxrwx') + arc.add('exec_group_other', mode='?rw-rwxrwx') + arc.add('read_group_only', mode='?---r-----') + arc.add('no_bits', mode='?---------') + arc.add('dir/', mode='?---rwsrwt') + + # On some systems, setting the sticky bit is a no-op. + # Check if that's the case. + tmp_filename = os.path.join(TEMPDIR, "tmp.file") + with open(tmp_filename, 'w'): + pass + os.chmod(tmp_filename, os.stat(tmp_filename).st_mode | stat.S_ISVTX) + have_sticky_files = (os.stat(tmp_filename).st_mode & stat.S_ISVTX) + os.unlink(tmp_filename) + + os.mkdir(tmp_filename) + os.chmod(tmp_filename, os.stat(tmp_filename).st_mode | stat.S_ISVTX) + have_sticky_dirs = (os.stat(tmp_filename).st_mode & stat.S_ISVTX) + os.rmdir(tmp_filename) + + with self.check_context(arc.open(), 'fully_trusted'): + if have_sticky_files: + self.expect_file('all_bits', mode='?rwsrwsrwt') + else: + self.expect_file('all_bits', mode='?rwsrwsrwx') + self.expect_file('perm_bits', mode='?rwxrwxrwx') + self.expect_file('exec_group_other', mode='?rw-rwxrwx') + self.expect_file('read_group_only', mode='?---r-----') + self.expect_file('no_bits', mode='?---------') + if have_sticky_dirs: + self.expect_file('dir/', mode='?---rwsrwt') + else: + self.expect_file('dir/', mode='?---rwsrwx') + + with self.check_context(arc.open(), 'tar'): + self.expect_file('all_bits', mode='?rwxr-xr-x') + self.expect_file('perm_bits', mode='?rwxr-xr-x') + self.expect_file('exec_group_other', mode='?rw-r-xr-x') + self.expect_file('read_group_only', mode='?---r-----') + self.expect_file('no_bits', mode='?---------') + self.expect_file('dir/', mode='?---r-xr-x') + + with self.check_context(arc.open(), 'data'): + normal_dir_mode = stat.filemode(stat.S_IMODE( + self.outerdir.stat().st_mode)) + self.expect_file('all_bits', mode='?rwxr-xr-x') + self.expect_file('perm_bits', mode='?rwxr-xr-x') + self.expect_file('exec_group_other', mode='?rw-r--r--') + self.expect_file('read_group_only', mode='?rw-r-----') + self.expect_file('no_bits', mode='?rw-------') + self.expect_file('dir/', mode=normal_dir_mode) + + def test_pipe(self): + # Test handling of a special file + with ArchiveMaker() as arc: + arc.add('foo', type=tarfile.FIFOTYPE) + + for filter in 'fully_trusted', 'tar': + with self.check_context(arc.open(), filter): + if hasattr(os, 'mkfifo'): + self.expect_file('foo', type=tarfile.FIFOTYPE) + else: + # The pipe can't be extracted and is skipped. + pass + + with self.check_context(arc.open(), 'data'): + self.expect_exception( + tarfile.SpecialFileError, + "'foo' is a special file") + + def test_special_files(self): + # Creating device files is tricky. Instead of attempting that let's + # only check the filter result. + for special_type in tarfile.FIFOTYPE, tarfile.CHRTYPE, tarfile.BLKTYPE: + tarinfo = tarfile.TarInfo('foo') + tarinfo.type = special_type + trusted = tarfile.fully_trusted_filter(tarinfo, '') + self.assertIs(trusted, tarinfo) + tar = tarfile.tar_filter(tarinfo, '') + self.assertEqual(tar.type, special_type) + with self.assertRaises(tarfile.SpecialFileError) as cm: + tarfile.data_filter(tarinfo, '') + self.assertIsInstance(cm.exception.tarinfo, tarfile.TarInfo) + self.assertEqual(cm.exception.tarinfo.name, 'foo') + + def test_fully_trusted_filter(self): + # The 'fully_trusted' filter returns the original TarInfo objects. + with tarfile.TarFile.open(tarname) as tar: + for tarinfo in tar.getmembers(): + filtered = tarfile.fully_trusted_filter(tarinfo, '') + self.assertIs(filtered, tarinfo) + + def test_tar_filter(self): + # The 'tar' filter returns TarInfo objects with the same name/type. + # (It can also fail for particularly "evil" input, but we don't have + # that in the test archive.) + with tarfile.TarFile.open(tarname) as tar: + for tarinfo in tar.getmembers(): + filtered = tarfile.tar_filter(tarinfo, '') + self.assertIs(filtered.name, tarinfo.name) + self.assertIs(filtered.type, tarinfo.type) + + def test_data_filter(self): + # The 'data' filter either raises, or returns TarInfo with the same + # name/type. + with tarfile.TarFile.open(tarname) as tar: + for tarinfo in tar.getmembers(): + try: + filtered = tarfile.data_filter(tarinfo, '') + except tarfile.FilterError: + continue + self.assertIs(filtered.name, tarinfo.name) + self.assertIs(filtered.type, tarinfo.type) + + def test_default_filter_warns_not(self): + """Ensure the default filter does not warn (like in 3.12)""" + with ArchiveMaker() as arc: + arc.add('foo') + with warnings_helper.check_no_warnings(self): + with self.check_context(arc.open(), None): + self.expect_file('foo') + + def test_change_default_filter_on_instance(self): + tar = tarfile.TarFile(tarname, 'r') + def strict_filter(tarinfo, path): + if tarinfo.name == 'ustar/regtype': + return tarinfo + else: + return None + tar.extraction_filter = strict_filter + with self.check_context(tar, None): + self.expect_file('ustar/regtype') + + def test_change_default_filter_on_class(self): + def strict_filter(tarinfo, path): + if tarinfo.name == 'ustar/regtype': + return tarinfo + else: + return None + tar = tarfile.TarFile(tarname, 'r') + with support.swap_attr(tarfile.TarFile, 'extraction_filter', + staticmethod(strict_filter)): + with self.check_context(tar, None): + self.expect_file('ustar/regtype') + + def test_change_default_filter_on_subclass(self): + class TarSubclass(tarfile.TarFile): + def extraction_filter(self, tarinfo, path): + if tarinfo.name == 'ustar/regtype': + return tarinfo + else: + return None + + tar = TarSubclass(tarname, 'r') + with self.check_context(tar, None): + self.expect_file('ustar/regtype') + + def test_change_default_filter_to_string(self): + tar = tarfile.TarFile(tarname, 'r') + tar.extraction_filter = 'data' + with self.check_context(tar, None): + self.expect_exception(TypeError) + + def test_custom_filter(self): + def custom_filter(tarinfo, path): + self.assertIs(path, self.destdir) + if tarinfo.name == 'move_this': + return tarinfo.replace(name='moved') + if tarinfo.name == 'ignore_this': + return None + return tarinfo + + with ArchiveMaker() as arc: + arc.add('move_this') + arc.add('ignore_this') + arc.add('keep') + with self.check_context(arc.open(), custom_filter): + self.expect_file('moved') + self.expect_file('keep') + + def test_bad_filter_name(self): + with ArchiveMaker() as arc: + arc.add('foo') + with self.check_context(arc.open(), 'bad filter name'): + self.expect_exception(ValueError) + + def test_stateful_filter(self): + # Stateful filters should be possible. + # (This doesn't really test tarfile. Rather, it demonstrates + # that third parties can implement a stateful filter.) + class StatefulFilter: + def __enter__(self): + self.num_files_processed = 0 + return self + + def __call__(self, tarinfo, path): + try: + tarinfo = tarfile.data_filter(tarinfo, path) + except tarfile.FilterError: + return None + self.num_files_processed += 1 + return tarinfo + + def __exit__(self, *exc_info): + self.done = True + + with ArchiveMaker() as arc: + arc.add('good') + arc.add('bad', symlink_to='/') + arc.add('good') + with StatefulFilter() as custom_filter: + with self.check_context(arc.open(), custom_filter): + self.expect_file('good') + self.assertEqual(custom_filter.num_files_processed, 2) + self.assertEqual(custom_filter.done, True) + + def test_errorlevel(self): + def extracterror_filter(tarinfo, path): + raise tarfile.ExtractError('failed with ExtractError') + def filtererror_filter(tarinfo, path): + raise tarfile.FilterError('failed with FilterError') + def oserror_filter(tarinfo, path): + raise OSError('failed with OSError') + def tarerror_filter(tarinfo, path): + raise tarfile.TarError('failed with base TarError') + def valueerror_filter(tarinfo, path): + raise ValueError('failed with ValueError') + + with ArchiveMaker() as arc: + arc.add('file') + + # If errorlevel is 0, errors affected by errorlevel are ignored + + with self.check_context(arc.open(errorlevel=0), extracterror_filter): + self.expect_file('file') + + with self.check_context(arc.open(errorlevel=0), filtererror_filter): + self.expect_file('file') + + with self.check_context(arc.open(errorlevel=0), oserror_filter): + self.expect_file('file') + + with self.check_context(arc.open(errorlevel=0), tarerror_filter): + self.expect_exception(tarfile.TarError) + + with self.check_context(arc.open(errorlevel=0), valueerror_filter): + self.expect_exception(ValueError) + + # If 1, all fatal errors are raised + + with self.check_context(arc.open(errorlevel=1), extracterror_filter): + self.expect_file('file') + + with self.check_context(arc.open(errorlevel=1), filtererror_filter): + self.expect_exception(tarfile.FilterError) + + with self.check_context(arc.open(errorlevel=1), oserror_filter): + self.expect_exception(OSError) + + with self.check_context(arc.open(errorlevel=1), tarerror_filter): + self.expect_exception(tarfile.TarError) + + with self.check_context(arc.open(errorlevel=1), valueerror_filter): + self.expect_exception(ValueError) + + # If 2, all non-fatal errors are raised as well. + + with self.check_context(arc.open(errorlevel=2), extracterror_filter): + self.expect_exception(tarfile.ExtractError) + + with self.check_context(arc.open(errorlevel=2), filtererror_filter): + self.expect_exception(tarfile.FilterError) + + with self.check_context(arc.open(errorlevel=2), oserror_filter): + self.expect_exception(OSError) + + with self.check_context(arc.open(errorlevel=2), tarerror_filter): + self.expect_exception(tarfile.TarError) + + with self.check_context(arc.open(errorlevel=2), valueerror_filter): + self.expect_exception(ValueError) + + # We only handle ExtractionError, FilterError & OSError specially. + + with self.check_context(arc.open(errorlevel='boo!'), filtererror_filter): + self.expect_exception(TypeError) # errorlevel is not int + + def setUpModule(): support.unlink(TEMPDIR) os.makedirs(TEMPDIR) diff --git a/lib-python/3/test/test_threading_local.py b/lib-python/3/test/test_threading_local.py index 5ddb8c41c5..8b24e3a28b 100644 --- a/lib-python/3/test/test_threading_local.py +++ b/lib-python/3/test/test_threading_local.py @@ -193,6 +193,22 @@ class BaseLocalTest: self.assertIsNone(wr()) + def test_threading_local_clear_race(self): + # See https://github.com/python/cpython/issues/100892 + + try: + import _testcapi + except ImportError: + unittest.skip("requires _testcapi") + + _testcapi.call_in_temporary_c_thread(lambda: None, False) + + for _ in range(1000): + _ = threading.local() + + _testcapi.join_temporary_c_thread() + + class ThreadLocalTest(unittest.TestCase, BaseLocalTest): _local = _thread._local diff --git a/lib-python/3/test/test_urlparse.py b/lib-python/3/test/test_urlparse.py index 31943f357f..574da5bd69 100644 --- a/lib-python/3/test/test_urlparse.py +++ b/lib-python/3/test/test_urlparse.py @@ -649,6 +649,65 @@ class UrlParseTestCase(unittest.TestCase): self.assertEqual(p.scheme, "http") self.assertEqual(p.geturl(), "http://www.python.org/javascript:alert('msg')/?query=something#fragment") + def test_urlsplit_strip_url(self): + noise = bytes(range(0, 0x20 + 1)) + base_url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag" + + url = noise.decode("utf-8") + base_url + p = urllib.parse.urlsplit(url) + self.assertEqual(p.scheme, "http") + self.assertEqual(p.netloc, "User:Pass@www.python.org:080") + self.assertEqual(p.path, "/doc/") + self.assertEqual(p.query, "query=yes") + self.assertEqual(p.fragment, "frag") + self.assertEqual(p.username, "User") + self.assertEqual(p.password, "Pass") + self.assertEqual(p.hostname, "www.python.org") + self.assertEqual(p.port, 80) + self.assertEqual(p.geturl(), base_url) + + url = noise + base_url.encode("utf-8") + p = urllib.parse.urlsplit(url) + self.assertEqual(p.scheme, b"http") + self.assertEqual(p.netloc, b"User:Pass@www.python.org:080") + self.assertEqual(p.path, b"/doc/") + self.assertEqual(p.query, b"query=yes") + self.assertEqual(p.fragment, b"frag") + self.assertEqual(p.username, b"User") + self.assertEqual(p.password, b"Pass") + self.assertEqual(p.hostname, b"www.python.org") + self.assertEqual(p.port, 80) + self.assertEqual(p.geturl(), base_url.encode("utf-8")) + + # Test that trailing space is preserved as some applications rely on + # this within query strings. + query_spaces_url = "https://www.python.org:88/doc/?query= " + p = urllib.parse.urlsplit(noise.decode("utf-8") + query_spaces_url) + self.assertEqual(p.scheme, "https") + self.assertEqual(p.netloc, "www.python.org:88") + self.assertEqual(p.path, "/doc/") + self.assertEqual(p.query, "query= ") + self.assertEqual(p.port, 88) + self.assertEqual(p.geturl(), query_spaces_url) + + p = urllib.parse.urlsplit("www.pypi.org ") + # That "hostname" gets considered a "path" due to the + # trailing space and our existing logic... YUCK... + # and re-assembles via geturl aka unurlsplit into the original. + # django.core.validators.URLValidator (at least through v3.2) relies on + # this, for better or worse, to catch it in a ValidationError via its + # regular expressions. + # Here we test the basic round trip concept of such a trailing space. + self.assertEqual(urllib.parse.urlunsplit(p), "www.pypi.org ") + + # with scheme as cache-key + url = "//www.python.org/" + scheme = noise.decode("utf-8") + "https" + noise.decode("utf-8") + for _ in range(2): + p = urllib.parse.urlsplit(url, scheme=scheme) + self.assertEqual(p.scheme, "https") + self.assertEqual(p.geturl(), "https://www.python.org/") + def test_attributes_bad_port(self): """Check handling of invalid ports.""" for bytes in (False, True): @@ -656,7 +715,7 @@ class UrlParseTestCase(unittest.TestCase): for port in ("foo", "1.5", "-1", "0x10"): with self.subTest(bytes=bytes, parse=parse, port=port): netloc = "www.example.net:" + port - url = "http://" + netloc + url = "http://" + netloc + "/" if bytes: netloc = netloc.encode("ascii") url = url.encode("ascii") diff --git a/lib-python/3/test/test_uu.py b/lib-python/3/test/test_uu.py index 4c639b7bd5..410eb8e392 100644 --- a/lib-python/3/test/test_uu.py +++ b/lib-python/3/test/test_uu.py @@ -145,6 +145,34 @@ class UUTest(unittest.TestCase): uu.encode(inp, out, filename) self.assertIn(safefilename, out.getvalue()) + def test_no_directory_traversal(self): + relative_bad = b"""\ +begin 644 ../../../../../../../../tmp/test1 +$86)C"@`` +` +end +""" + with self.assertRaisesRegex(uu.Error, 'directory'): + uu.decode(io.BytesIO(relative_bad)) + if os.altsep: + relative_bad_bs = relative_bad.replace(b'/', b'\\') + with self.assertRaisesRegex(uu.Error, 'directory'): + uu.decode(io.BytesIO(relative_bad_bs)) + + absolute_bad = b"""\ +begin 644 /tmp/test2 +$86)C"@`` +` +end +""" + with self.assertRaisesRegex(uu.Error, 'directory'): + uu.decode(io.BytesIO(absolute_bad)) + if os.altsep: + absolute_bad_bs = absolute_bad.replace(b'/', b'\\') + with self.assertRaisesRegex(uu.Error, 'directory'): + uu.decode(io.BytesIO(absolute_bad_bs)) + + class UUStdIOTest(unittest.TestCase): def setUp(self): diff --git a/lib-python/3/trace.py b/lib-python/3/trace.py index 96a78e7a82..78484cda01 100755 --- a/lib-python/3/trace.py +++ b/lib-python/3/trace.py @@ -49,6 +49,7 @@ Sample use, programmatically """ __all__ = ['Trace', 'CoverageResults'] +import io import linecache import os import sys @@ -725,7 +726,7 @@ def main(): sys.argv = [opts.progname, *opts.arguments] sys.path[0] = os.path.dirname(opts.progname) - with open(opts.progname, 'rb') as fp: + with io.open_code(opts.progname) as fp: code = compile(fp.read(), opts.progname, 'exec') # try to emulate __main__ namespace as much as possible globs = { diff --git a/lib-python/3/urllib/parse.py b/lib-python/3/urllib/parse.py index bd26813649..f5d3662313 100644 --- a/lib-python/3/urllib/parse.py +++ b/lib-python/3/urllib/parse.py @@ -25,6 +25,10 @@ currently not entirely compliant with this RFC due to defacto scenarios for parsing, and for backward compatibility purposes, some parsing quirks from older RFCs are retained. The testcases in test_urlparse.py provides a good indicator of parsing behavior. + +The WHATWG URL Parser spec should also be considered. We are not compliant with +it either due to existing user code API behavior expectations (Hyrum's Law). +It serves as a useful guide when making changes. """ import re @@ -78,6 +82,10 @@ scheme_chars = ('abcdefghijklmnopqrstuvwxyz' '0123456789' '+-.') +# Leading and trailing C0 control and space to be stripped per WHATWG spec. +# == "".join([chr(i) for i in range(0, 0x20 + 1)]) +_WHATWG_C0_CONTROL_OR_SPACE = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f ' + # Unsafe bytes to be removed per WHATWG spec _UNSAFE_URL_BYTES_TO_REMOVE = ['\t', '\r', '\n'] @@ -456,6 +464,10 @@ def urlsplit(url, scheme='', allow_fragments=True): """ url, scheme, _coerce_result = _coerce_args(url, scheme) + # Only lstrip url as some applications rely on preserving trailing space. + # (https://url.spec.whatwg.org/#concept-basic-url-parser would strip both) + url = url.lstrip(_WHATWG_C0_CONTROL_OR_SPACE) + scheme = scheme.strip(_WHATWG_C0_CONTROL_OR_SPACE) for b in _UNSAFE_URL_BYTES_TO_REMOVE: url = url.replace(b, "") diff --git a/lib-python/3/uu.py b/lib-python/3/uu.py index 9f1f37f1a6..9fe252a639 100755..100644 --- a/lib-python/3/uu.py +++ b/lib-python/3/uu.py @@ -130,7 +130,14 @@ def decode(in_file, out_file=None, mode=None, quiet=False): # If the filename isn't ASCII, what's up with that?!? out_file = hdrfields[2].rstrip(b' \t\r\n\f').decode("ascii") if os.path.exists(out_file): - raise Error('Cannot overwrite existing file: %s' % out_file) + raise Error(f'Cannot overwrite existing file: {out_file}') + if (out_file.startswith(os.sep) or + f'..{os.sep}' in out_file or ( + os.altsep and + (out_file.startswith(os.altsep) or + f'..{os.altsep}' in out_file)) + ): + raise Error(f'Refusing to write to {out_file} due to directory traversal') if mode is None: mode = int(hdrfields[1], 8) # diff --git a/lib-python/stdlib-version.txt b/lib-python/stdlib-version.txt index a06a79627d..a8fd53e9e8 100644 --- a/lib-python/stdlib-version.txt +++ b/lib-python/stdlib-version.txt @@ -1,4 +1,4 @@ in here there are copies of the Lib/ directory of the CPython Git repository at https://github.com/python/cpython -the output for git status is: v3.9.16 +the output for git status is: v3.9.17 diff --git a/lib_pypy/hpy.dist-info/LICENSE b/lib_pypy/hpy-0.0.4.dist-info/LICENSE index 80e7f55908..80e7f55908 100644 --- a/lib_pypy/hpy.dist-info/LICENSE +++ b/lib_pypy/hpy-0.0.4.dist-info/LICENSE diff --git a/lib_pypy/hpy.dist-info/METADATA b/lib_pypy/hpy-0.0.4.dist-info/METADATA index cfcc963ad3..cfcc963ad3 100644 --- a/lib_pypy/hpy.dist-info/METADATA +++ b/lib_pypy/hpy-0.0.4.dist-info/METADATA diff --git a/lib_pypy/hpy.dist-info/entry_points.txt b/lib_pypy/hpy-0.0.4.dist-info/entry_points.txt index c714c2cb79..c714c2cb79 100644 --- a/lib_pypy/hpy.dist-info/entry_points.txt +++ b/lib_pypy/hpy-0.0.4.dist-info/entry_points.txt diff --git a/lib_pypy/hpy.dist-info/top_level.txt b/lib_pypy/hpy-0.0.4.dist-info/top_level.txt index 2009c86f18..2009c86f18 100644 --- a/lib_pypy/hpy.dist-info/top_level.txt +++ b/lib_pypy/hpy-0.0.4.dist-info/top_level.txt diff --git a/lib_pypy/pypy_tools/build_cffi_imports.py b/lib_pypy/pypy_tools/build_cffi_imports.py index 30400cc35e..f094755bbe 100644 --- a/lib_pypy/pypy_tools/build_cffi_imports.py +++ b/lib_pypy/pypy_tools/build_cffi_imports.py @@ -63,43 +63,36 @@ configure_args = ['./configure', # without an _ssl module, but the OpenSSL download site redirect HTTP # to HTTPS cffi_dependencies = { - '_ssl1': ('http://artfiles.org/openssl.org/source/openssl-1.1.1t.tar.gz', - '8dee9b24bdb1dcbf0c3d1e9b02fb8f6bf22165e807f45adeb7c9677536859d3b', + '_ssl1': ('http://artfiles.org/openssl.org/source/openssl-1.1.1u.tar.gz', + 'e2f8d84b523eecd06c7be7626830370300fbcc15386bf5142d72758f6963ebc6', [ ['./config', '--prefix=/usr', 'no-shared'], ['make', '-s', '-j', str(multiprocessing.cpu_count())], ['make', 'install', 'DESTDIR={}/'.format(deps_destdir)], ]), - '_ssl3': ('http://artfiles.org/openssl.org/source/openssl-3.0.8.tar.gz', - '6c13d2bf38fdf31eac3ce2a347073673f5d63263398f1f69d0df4a41253e4b3e', + '_ssl3': ('http://artfiles.org/openssl.org/source/openssl-3.0.9.tar.gz', + 'eb1ab04781474360f77c318ab89d8c5a03abc38e63d65a603cabbf1b00a1dc90', [ ['./config', '--prefix=/usr', 'no-shared', 'enable-fips'], ['make', '-s', '-j', str(multiprocessing.cpu_count())], ['make', 'install', 'DESTDIR={}/'.format(deps_destdir)], ]), -} - -cffi_dependencies['_ssl'] = cffi_dependencies['_ssl1'] - -if sys.platform == 'darwin' or platform.machine() == 'aarch64': - # TODO: use these on x86 after upgrading Docker images to manylinux2014 - cffi_dependencies['_gdbm'] = ( - # this does not compile on the x86 buildbot, linker is missing '_history_list' - 'http://distfiles.macports.org/gdbm/gdbm-1.19.tar.gz', - '37ed12214122b972e18a0d94995039e57748191939ef74115b1d41d8811364bc', - # this does not compile on the linux buildbot, linker is missing '_history_list' - [configure_args + ['--without-readline'], + 'lzma': ( + 'http://distfiles.macports.org/xz/xz-5.2.10.tar.bz2', + '01b71df61521d9da698ce3c33148bff06a131628ff037398c09482f3a26e5408', + [configure_args, ['make', '-s', '-j', str(multiprocessing.cpu_count())], ['make', 'install', 'DESTDIR={}/'.format(deps_destdir)], - ]) - cffi_dependencies['lzma'] = ( - # this does not compile on the linux64 buildbot, needs -fPIC - 'http://distfiles.macports.org/xz/xz-5.2.5.tar.bz2', - '5117f930900b341493827d63aa910ff5e011e0b994197c3b71c08a20228a42df', - [configure_args, + ]), + '_gdbm': ('http://distfiles.macports.org/gdbm/gdbm-1.23.tar.gz', + '74b1081d21fff13ae4bd7c16e5d6e504a4c26f7cde1dca0d963a484174bbcacd', + [configure_args + ['--without-readline'], ['make', '-s', '-j', str(multiprocessing.cpu_count())], ['make', 'install', 'DESTDIR={}/'.format(deps_destdir)], - ]) + ]), +} + +cffi_dependencies['_ssl'] = cffi_dependencies['_ssl1'] def _unpack_tarfile(filename, extract_dir): """Unpack tar/tar.gz/tar.bz2/tar.xz `filename` to `extract_dir` diff --git a/pypy/doc/release-v7.3.12.rst b/pypy/doc/release-v7.3.12.rst index b338dc0005..03bb2ab8b6 100644 --- a/pypy/doc/release-v7.3.12.rst +++ b/pypy/doc/release-v7.3.12.rst @@ -3,17 +3,19 @@ PyPy v7.3.12: release of python 2.7, 3.9, and 3.10 ================================================== .. - Changelog up to commit 365683e7da0d + Changelog up to commit 24beab5e4a50 -.. note_:: +.. note:: This is a pre-release announcement. When the release actually happens, it will be announced on the `PyPy blog`_ .. _`PyPy blog`: https://pypy.org/blog The PyPy team is proud to release version 7.3.12 of PyPy. -There are only minimal bugfixes since the last release. We did implement -support for symlinks in Windows, and are releasing a Python3.10 version. +This release includes a new string-to-int algorithm (also appearing in CPython +3.12) that is faster than the older one; support for symlinks in Windows; and +our first Python3.10 version. + The release includes three different interpreters: - PyPy2.7, which is an interpreter supporting the syntax and the features of @@ -24,11 +26,11 @@ The release includes three different interpreters: Python 3.9, including the stdlib for CPython 3.9.16. - PyPy3.10, which is an interpreter supporting the syntax and the features of - Python 3.10, including the stdlib for CPython 3.10.9. This is our first + Python 3.10, including the stdlib for CPython 3.10.11. This is our first release of 3.10, but based on past experience we are quite confident in its compatibility with upstream. Of course, we recommend testing your code with this new version before putting it into production. Note it does - require a new version of cython that has yet to be released. + require at least cython 0.29.35 or cython 3.0.0b3 The interpreters are based on much the same codebase, thus the multiple release. This is a micro release, all APIs are compatible with the other 7.3 @@ -122,11 +124,16 @@ Bugfixes ``ppc_64-linux-gnu`` platform tags (issue 3834_) - Fix various cases where a mutating ``__index__`` method could crash the interpreter (issue 3917_) +- Use C99 ``NAN`` for ``Py_NAN`` + Speedups and enhancements ~~~~~~~~~~~~~~~~~~~~~~~~~ - Do less string copies in the bytecode compiler - Add missing CJK range in unicodedata version 13 +- Implement the base 10 ``string-to-int`` conversion using a divide an conquer + algorithm with complexity ``O(n**1.58)``. The algorithm is due to Bjorn + Martinsson and is part of CPython 3.12. Python 3.9+ ----------- @@ -152,6 +159,12 @@ Bugfixes name in some random function (issue 3925_) - Fix wrong implementation of ``_copy_base`` for slices of n-dimensional buffers (issue 3520_) +- Trim the scope ID from IPV6 addresses (issue 3938_, reversing the decision in + issue 3628_) +- Add ``_hashlib.scrypt`` (issue 3921_) +- Properly create a C-level wrapper that calls ``tp_finalize`` when ``__del__`` + is called, which allows us to use ``CYTHON_USE_TP_FINALIZE`` in cython +- Move ``hpy.dist-info`` to ``hpy-0.0.4.dist-info`` (issue 3579_) Speedups and enhancements ~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -167,11 +180,15 @@ Speedups and enhancements observable applevel difference between ``W_IntObject`` and ``W_LongObject`` - Provide ``_PyEval_GetAsyncGenFirstiter`` and ``_PyEval_GetAsyncGenFinalizer`` for use by cython, towards fixing issue 3280_ +- Squeeze a little more accuracy out of windows ``time.time()``, to make a + cython test pass .. _bpo-37648: https://bugs.python.org/issue37648 .. _GH-100242: https://github.com/python/cpython/issues/100242 .. _3280: https://foss.heptapod.net/pypy/pypy/-/issues/3280 .. _3520: https://foss.heptapod.net/pypy/pypy/-/issues/3520 +.. _3579: https://foss.heptapod.net/pypy/pypy/-/issues/3579 +.. _3628: https://foss.heptapod.net/pypy/pypy/-/issues/3628 .. _3834: https://foss.heptapod.net/pypy/pypy/-/issues/3834 .. _3874: https://foss.heptapod.net/pypy/pypy/-/issues/3874 .. _3881: https://foss.heptapod.net/pypy/pypy/-/issues/3881 @@ -182,4 +199,6 @@ Speedups and enhancements .. _3892: https://foss.heptapod.net/pypy/pypy/-/issues/3892 .. _3906: https://foss.heptapod.net/pypy/pypy/-/issues/3906 .. _3917: https://foss.heptapod.net/pypy/pypy/-/issues/3917 +.. _3921: https://foss.heptapod.net/pypy/pypy/-/issues/3921 .. _3925: https://foss.heptapod.net/pypy/pypy/-/issues/3925 +.. _3938: https://foss.heptapod.net/pypy/pypy/-/issues/3938 diff --git a/pypy/module/cpyext/include/patchlevel.h b/pypy/module/cpyext/include/patchlevel.h index 0c1e3253da..460095fc7b 100644 --- a/pypy/module/cpyext/include/patchlevel.h +++ b/pypy/module/cpyext/include/patchlevel.h @@ -21,12 +21,12 @@ /* Version parsed out into numeric values */ #define PY_MAJOR_VERSION 3 #define PY_MINOR_VERSION 9 -#define PY_MICRO_VERSION 16 +#define PY_MICRO_VERSION 17 #define PY_RELEASE_LEVEL PY_RELEASE_LEVEL_FINAL #define PY_RELEASE_SERIAL 0 /* Version as a string */ -#define PY_VERSION "3.9.16" +#define PY_VERSION "3.9.17" /* PyPy version as a string: make sure to keep this in sync with: * module/sys/version.py diff --git a/pypy/module/cpyext/pyobject.py b/pypy/module/cpyext/pyobject.py index 2dde3d3c82..b889e3ac3b 100644 --- a/pypy/module/cpyext/pyobject.py +++ b/pypy/module/cpyext/pyobject.py @@ -378,7 +378,7 @@ def make_ref(space, w_obj, w_userdata=None, immortal=False): # XXX: adapt for pypy3 state = space.fromcache(State) intval = space.int_w(w_obj) - return state.ccall("PyInt_FromLong", intval) + return state.ccall("PyLong_FromLong", intval) return get_pyobj_and_incref(space, w_obj, w_userdata, immortal=False) @specialize.ll() diff --git a/pypy/module/cpyext/slotdefs.py b/pypy/module/cpyext/slotdefs.py index 9537f21cc8..612c5ef78e 100644 --- a/pypy/module/cpyext/slotdefs.py +++ b/pypy/module/cpyext/slotdefs.py @@ -12,7 +12,7 @@ from pypy.module.cpyext.typeobjectdefs import ( getattrfunc, getattrofunc, setattrofunc, lenfunc, ssizeargfunc, inquiry, ssizessizeargfunc, ssizeobjargproc, iternextfunc, initproc, richcmpfunc, cmpfunc, hashfunc, descrgetfunc, descrsetfunc, objobjproc, objobjargproc, - getbufferproc, ssizessizeobjargproc) + getbufferproc, ssizessizeobjargproc, destructor) from pypy.module.cpyext.pyobject import make_ref, from_ref, as_pyobj, decref from pypy.module.cpyext.pyerrors import PyErr_Occurred from pypy.module.cpyext.memoryobject import fill_Py_buffer @@ -474,6 +474,16 @@ class wrap_cmpfunc(W_PyCWrapperObject): return space.newint(generic_cpy_call(space, func_target, w_self, w_other)) +class wrap_del(W_PyCWrapperObject): + def call(self, space, w_self, __args__): + from pypy.module.cpyext.api import generic_cpy_call_expect_null + self.check_args(__args__, 0) + func = self.get_func_to_call() + func_target = rffi.cast(destructor, func) + generic_cpy_call(space, func_target, w_self) + return space.w_None + + SLOT_FACTORIES = {} def slot_factory(tp_name): def decorate(func): @@ -837,21 +847,6 @@ def slot_from_buffer_w(space, typedef): return 0 return buff_w -def _make_missing_wrapper(name): - assert name not in globals() - class missing_wrapper(W_PyCWrapperObject): - def call(self, space, w_self, __args__): - msg = "cpyext: missing slot wrapper %s for class %s" %( - name, space.getfulltypename(w_self)) - print msg - raise NotImplementedError("Slot wrapper " + name) - missing_wrapper.__name__ = name - globals()[name] = missing_wrapper - -missing_wrappers = ['wrap_del'] -for name in missing_wrappers: - _make_missing_wrapper(name) - def make_missing_slot(space, typedef, name, attr): return None @@ -1009,7 +1004,7 @@ static slotdef slotdefs[] = { TPSLOT("__new__", tp_new, slot_tp_new, NULL, "__new__(type, /, *args, **kwargs)\n--\n\n" "Create and return new object. See help(type) for accurate signature."), - TPSLOT("__del__", tp_finalize, slot_tp_finalize, (wrapperfunc)wrap_del, ""), + TPSLOT("__del__", tp_finalize, slot_tp_finalize, wrap_del, ""), AMSLOT("__await__", am_await, slot_am_await, wrap_unaryfunc, "__await__($self, /)\n--\n\nReturn an iterator to be used in await expression."), diff --git a/pypy/module/cpyext/src/intobject.c b/pypy/module/cpyext/src/intobject.c index 39b6878bcc..2a7c1decf6 100644 --- a/pypy/module/cpyext/src/intobject.c +++ b/pypy/module/cpyext/src/intobject.c @@ -54,46 +54,6 @@ fill_free_list(void) return p + N_INTOBJECTS - 1; } -#ifndef NSMALLPOSINTS -#define NSMALLPOSINTS 257 -#endif -#ifndef NSMALLNEGINTS -#define NSMALLNEGINTS 5 -#endif -#if NSMALLNEGINTS + NSMALLPOSINTS > 0 -/* References to small integers are saved in this array so that they - can be shared. - The integers that are saved are those in the range - -NSMALLNEGINTS (inclusive) to NSMALLPOSINTS (not inclusive). -*/ -static PyIntObject *small_ints[NSMALLNEGINTS + NSMALLPOSINTS]; -#endif - -PyObject * -PyInt_FromLong(long ival) -{ - register PyIntObject *v; - /* -#if NSMALLNEGINTS + NSMALLPOSINTS > 0 - if (-NSMALLNEGINTS <= ival && ival < NSMALLPOSINTS) { - v = small_ints[ival + NSMALLNEGINTS]; - Py_INCREF(v); - return (PyObject *) v; - } -#endif - */ - if (free_list == NULL) { - if ((free_list = fill_free_list()) == NULL) - return NULL; - } - /* Inline PyObject_New */ - v = free_list; - free_list = (PyIntObject *)Py_TYPE(v); - (void)PyObject_INIT(v, &PyInt_Type); - v->ob_ival = ival; - return (PyObject *) v; -} - /* this is CPython's int_dealloc */ #ifdef CPYEXT_TESTS #define _Py_int_dealloc _cpyexttest_int_dealloc diff --git a/pypy/module/cpyext/test/foo.c b/pypy/module/cpyext/test/foo.c index 2b9439de8c..b57736a91b 100644 --- a/pypy/module/cpyext/test/foo.c +++ b/pypy/module/cpyext/test/foo.c @@ -691,7 +691,7 @@ static PyTypeObject TupleLike = { static PyObject *size_of_instances(PyObject *self, PyObject *t) { - return PyInt_FromLong(((PyTypeObject *)t)->tp_basicsize); + return PyLong_FromSsize_t(((PyTypeObject *)t)->tp_basicsize); } @@ -699,8 +699,8 @@ static PyObject * is_TupleLike(PyObject *self, PyObject * t) { int tf = t->ob_type == &TupleLike; if (t->ob_type->tp_itemsize == 0) - return PyInt_FromLong(-1); - return PyInt_FromLong(tf); + return PyLong_FromLong(-1); + return PyLong_FromLong(tf); } static PyTypeObject GetType1 = { @@ -758,13 +758,6 @@ static struct PyModuleDef moduledef = { }; #define INITERROR return NULL -/* Initialize this module. */ -#ifdef __GNUC__ -extern __attribute__((visibility("default"))) -#else -extern __declspec(dllexport) -#endif - PyMODINIT_FUNC PyInit_foo(void) diff --git a/pypy/module/cpyext/test/test_async_iter.py b/pypy/module/cpyext/test/test_async_iter.py index 6910254ea0..4480e67d72 100644 --- a/pypy/module/cpyext/test/test_async_iter.py +++ b/pypy/module/cpyext/test/test_async_iter.py @@ -60,4 +60,3 @@ class AppTestAsyncIter(AppTestCpythonExtensionBase): g = module.gen() result = module.test_last_yield(g) assert result == 123 -"" diff --git a/pypy/module/cpyext/test/test_datetime.py b/pypy/module/cpyext/test/test_datetime.py index 3c62f89563..6e546335f9 100644 --- a/pypy/module/cpyext/test/test_datetime.py +++ b/pypy/module/cpyext/test/test_datetime.py @@ -471,3 +471,20 @@ class AppTestDatetime(AppTestCpythonExtensionBase): o = tzinfo() assert module.checks(o) == (False,) * 8 + (True,) * 2 + def test_getsystemclock(self): + # Taken from the cython time_pxd.pyx test_time() test + module = self.import_extension('foo', [ + ("getsystemclock", "METH_NOARGS", + """ PyDateTime_IMPORT; + _PyTime_t tic = _PyTime_GetSystemClock(); + double r = _PyTime_AsSecondsDouble(tic); + return PyFloat_FromDouble(r); + """)], prologue='#include "datetime.h"\n') + import time + # warm up the time.time function + for _ in range(2000): + tic1 = time.time() + tic2 = module.getsystemclock() + tic3 = time.time() + assert tic1 <= tic2, "%s, %s" % (tic1, tic2) + assert tic2 <= tic3, "%s, %s" % (tic2, tic3) diff --git a/pypy/module/cpyext/test/test_module.py b/pypy/module/cpyext/test/test_module.py index cd8907453b..0bb4be84dc 100644 --- a/pypy/module/cpyext/test/test_module.py +++ b/pypy/module/cpyext/test/test_module.py @@ -271,7 +271,7 @@ class AppTestMultiPhase2(AppTestCpythonExtensionBase): raises(module.error, 'raise module.error()') assert module.int_const == 1969 assert module.str_const == 'something different' - del ex + ex.__del__() import gc for i in range(3): gc.collect() diff --git a/pypy/module/cpyext/test/test_typeobject.py b/pypy/module/cpyext/test/test_typeobject.py index bb48228a63..5f83f72932 100644 --- a/pypy/module/cpyext/test/test_typeobject.py +++ b/pypy/module/cpyext/test/test_typeobject.py @@ -106,8 +106,8 @@ class AppTestTypeObject(AppTestCpythonExtensionBase): raises(TypeError, "obj.char_member = 'spam'") raises(TypeError, "obj.char_member = 42") # - import sys - bignum = sys.maxsize - 42 + import sys, struct + bignum = struct.unpack_from("@L", b"\xFF" * 8)[0]//2 - 42 obj.short_member = -12345; assert obj.short_member == -12345 obj.long_member = -bignum; assert obj.long_member == -bignum obj.ushort_member = 45678; assert obj.ushort_member == 45678 diff --git a/pypy/module/pypyjit/test_pypy_c/test_misc.py b/pypy/module/pypyjit/test_pypy_c/test_misc.py index 9c9e401b29..3f4e771c24 100644 --- a/pypy/module/pypyjit/test_pypy_c/test_misc.py +++ b/pypy/module/pypyjit/test_pypy_c/test_misc.py @@ -237,6 +237,8 @@ class TestMisc(BaseTestPyPyC): guard_nonnull_class(p17, ..., descr=...) guard_not_invalidated? i21 = getfield_gc_i(p17, descr=<FieldS .*W_Array.*.inst_len .*>) + i22 = int_lt(i21, 0) + guard_false(i22, descr=...) i23 = int_lt(0, i21) guard_true(i23, descr=...) i24 = getfield_gc_i(p17, descr=<FieldU .*W_ArrayBase.inst__buffer .*>) diff --git a/pypy/module/sys/version.py b/pypy/module/sys/version.py index 6606bdb3ae..eda4a9d3e0 100644 --- a/pypy/module/sys/version.py +++ b/pypy/module/sys/version.py @@ -8,7 +8,7 @@ Version numbers exposed by PyPy through the 'sys' module. import os #XXX # the release serial 42 is not in range(16) -CPYTHON_VERSION = (3, 9, 16, "final", 0) +CPYTHON_VERSION = (3, 9, 17, "final", 0) #XXX # sync CPYTHON_VERSION with patchlevel.h, package.py CPYTHON_API_VERSION = 1013 #XXX # sync with include/modsupport.h diff --git a/pypy/module/time/interp_time.py b/pypy/module/time/interp_time.py index a41d241a50..b163d32810 100644 --- a/pypy/module/time/interp_time.py +++ b/pypy/module/time/interp_time.py @@ -277,9 +277,10 @@ if _WIN: # We can't use that big number when translating for # 32-bit system (which windows always is currently) # XXX: Need to come up with a better solution - offset = (r_ulonglong(16384) * r_ulonglong(27) * r_ulonglong(390625) - * r_ulonglong(79) * r_ulonglong(853)) - microseconds = quad_part / 10 - offset + offset = (r_ulonglong(16384) * r_ulonglong(27) * + r_ulonglong(390625) * r_ulonglong(79) * + r_ulonglong(853) * r_ulonglong(10)) + microseconds10x = quad_part - offset if w_info: with lltype.scoped_alloc(LPDWORD.TO, 1) as time_adjustment, \ lltype.scoped_alloc(LPDWORD.TO, 1) as time_increment, \ @@ -290,11 +291,9 @@ if _WIN: _setinfo(space, w_info, "GetSystemTimeAsFileTime()", intmask(time_increment[0]) * 1e-7, False, True) if return_ns: - return space.newint(tolong(microseconds) * 10**3) + return space.newint(tolong(microseconds10x) * 10**2) else: - tv_sec = microseconds / 10**6 - tv_usec = microseconds % 10**6 - return space.newfloat(tv_sec + tv_usec * 1e-6) + return space.newfloat(float(microseconds10x) / 1e7) else: if HAVE_GETTIMEOFDAY: if GETTIMEOFDAY_NO_TZ: diff --git a/pypy/objspace/descroperation.py b/pypy/objspace/descroperation.py index b08da865b3..d00b17e4b2 100644 --- a/pypy/objspace/descroperation.py +++ b/pypy/objspace/descroperation.py @@ -394,7 +394,7 @@ class DescrOperation(object): w_obj, w_res) return w_res - def pow(space, w_obj1, w_obj2, w_obj3): + def pow_binary(space, w_obj1, w_obj2): w_typ1 = space.type(w_obj1) w_typ2 = space.type(w_obj2) w_left_src, w_left_impl = space.lookup_in_type_where(w_typ1, '__pow__') @@ -411,23 +411,28 @@ class DescrOperation(object): w_obj1, w_obj2 = w_obj2, w_obj1 w_left_impl, w_right_impl = w_right_impl, w_left_impl if w_left_impl is not None: - if space.is_w(w_obj3, space.w_None): - w_res = space.get_and_call_function(w_left_impl, w_obj1, w_obj2) - else: - w_res = space.get_and_call_function(w_left_impl, w_obj1, w_obj2, w_obj3) + w_res = space.get_and_call_function(w_left_impl, w_obj1, w_obj2) if _check_notimplemented(space, w_res): return w_res if w_right_impl is not None: - if space.is_w(w_obj3, space.w_None): - w_res = space.get_and_call_function(w_right_impl, w_obj2, w_obj1) - else: - w_res = space.get_and_call_function(w_right_impl, w_obj2, w_obj1, - w_obj3) + w_res = space.get_and_call_function(w_right_impl, w_obj2, w_obj1) if _check_notimplemented(space, w_res): return w_res raise oefmt(space.w_TypeError, "operands do not support **") + def pow(space, w_obj1, w_obj2, w_obj3): + if space.is_w(w_obj3, space.w_None): + return space.pow_binary(w_obj1, w_obj2) + # Three-arg power does not use __rpow__ + w_typ1 = space.type(w_obj1) + w_left_src, w_left_impl = space.lookup_in_type_where(w_typ1, '__pow__') + if w_left_impl is not None: + w_res = space.get_and_call_function(w_left_impl, w_obj1, w_obj2, w_obj3) + if _check_notimplemented(space, w_res): + return w_res + raise oefmt(space.w_TypeError, "operands do not support pow()") + def inplace_pow(space, w_lhs, w_rhs): w_impl = space.lookup(w_lhs, '__ipow__') if w_impl is not None: diff --git a/pypy/objspace/std/complexobject.py b/pypy/objspace/std/complexobject.py index 2144db30d1..3c275a58fb 100644 --- a/pypy/objspace/std/complexobject.py +++ b/pypy/objspace/std/complexobject.py @@ -5,6 +5,7 @@ from rpython.rlib.rarithmetic import intmask, r_ulonglong from rpython.rlib.rbigint import rbigint from rpython.rlib.rfloat import ( DTSF_STR_PRECISION, formatd, string_to_float) +from rpython.rlib.special_value import NAN from rpython.rlib.rstring import ParseStringError from rpython.tool.sourcetools import func_with_new_name @@ -55,7 +56,7 @@ def _split_complex(s): realstop = i - # return appropriate strings is only one number is there + # return appropriate strings if only one number is there if i >= slen: newstop = realstop - 1 if newstop < 0: @@ -194,6 +195,31 @@ def unpackcomplex(space, w_complex, strict_typing=True, firstarg=True): "complex() second argument must be a number, not '%T'", w_complex) +# Python3 semantics of c_pow have changed slightly + +def c_pow(x, y): + (a_r, a_i), (b_r, b_i) = x, y + if b_r == 0 and b_i == 0: + rr, ir = (1., 0.) + elif (a_r == 0. or math.isinf(a_r)) and a_i == 0: + if b_i != 0. or b_r < 0: + raise ZeroDivisionError + rr, ir = (0.0, 0.0) + else: + vabs = math.hypot(a_r, a_i) + len = math.pow(vabs, b_r) + at = math.atan2(a_i, a_r) + phase = at * b_r + if b_i != 0.0: + len /= math.exp(at * b_i) + phase += b_i * math.log(vabs) + try: + rr = len * math.cos(phase) + ir = len * math.sin(phase) + except ValueError: + rr = NAN + ir = NAN + return (rr, ir) class W_ComplexObject(W_Root): @@ -227,7 +253,7 @@ class W_ComplexObject(W_Root): return W_ComplexObject(rr, ir) def pow(self, other): - rr, ir = rcomplex.c_pow(self.as_tuple(), other.as_tuple()) + rr, ir = c_pow(self.as_tuple(), other.as_tuple()) return W_ComplexObject(rr, ir) def pow_small_int(self, n): diff --git a/pypy/objspace/std/intobject.py b/pypy/objspace/std/intobject.py index 46466634c7..4382b3bcfe 100644 --- a/pypy/objspace/std/intobject.py +++ b/pypy/objspace/std/intobject.py @@ -500,11 +500,11 @@ def _make_ovf2long(opname, ovf2small=None): return W_SmallLongObject(op(a, b)) from pypy.objspace.std.longobject import W_LongObject, W_AbstractLongObject - if w_x is None or not isinstance(w_x, W_AbstractLongObject): - w_x = W_LongObject.fromint(space, x) - if w_y is None or not isinstance(w_y, W_AbstractLongObject): - w_y = W_LongObject.fromint(space, y) - + w_x = W_LongObject.fromint(space, x) + assert w_y is not None + # call the W_LongObject implementation with the unconverted w_y. + # W_LongObject can deal with W_IntObject arguments just fine, and it + # has a slightly better code path for long/int combinations return getattr(w_x, 'descr_' + opname)(space, w_y) return ovf2long @@ -713,15 +713,25 @@ class W_IntObject(W_AbstractIntObject): def _make_descr_cmp(opname): op = getattr(operator, opname) descr_name = 'descr_' + opname + + if opname == 'lt': revopname = 'gt' + elif opname == 'le': revopname = 'ge' + elif opname == 'eq': revopname = 'eq' + elif opname == 'ne': revopname = 'ne' + elif opname == 'gt': revopname = 'lt' + elif opname == 'ge': revopname = 'le' + bigintintrevop = getattr(rbigint, "int_" + revopname) + @func_renamer(descr_name) def descr_cmp(self, space, w_other): + from pypy.objspace.std.longobject import W_LongObject, W_AbstractLongObject if isinstance(w_other, W_IntObject): i = self.intval j = w_other.intval return space.newbool(op(i, j)) elif isinstance(w_other, W_AbstractIntObject): - self = self.as_w_long(space) - return getattr(self, descr_name)(space, w_other) + assert isinstance(w_other, W_LongObject) + return space.newbool(bigintintrevop(w_other.asbigint(), self.intval)) return space.w_NotImplemented return descr_cmp diff --git a/pypy/objspace/std/longobject.py b/pypy/objspace/std/longobject.py index fe71caa6a0..43b2643188 100644 --- a/pypy/objspace/std/longobject.py +++ b/pypy/objspace/std/longobject.py @@ -259,13 +259,15 @@ class W_LongObject(W_AbstractLongObject): op = getattr(rbigint, opname) intop = getattr(rbigint, "int_" + opname) + @func_renamer('descr_' + opname) def descr_impl(self, space, w_other): if isinstance(w_other, W_IntObject): return space.newbool(intop(self.num, w_other.int_w(space))) elif not isinstance(w_other, W_AbstractLongObject): return space.w_NotImplemented return space.newbool(op(self.num, w_other.asbigint())) - return func_with_new_name(descr_impl, "descr_" + opname) + + return descr_impl descr_lt = _make_descr_cmp('lt') descr_le = _make_descr_cmp('le') diff --git a/pypy/objspace/std/test/apptest_complexobject.py b/pypy/objspace/std/test/apptest_complexobject.py index f65823e010..a08e824a85 100644 --- a/pypy/objspace/std/test/apptest_complexobject.py +++ b/pypy/objspace/std/test/apptest_complexobject.py @@ -170,8 +170,12 @@ def test_pow(): b = 5.1+2.3j raises(ValueError, pow, a, b, 0) - b = complex(float('inf'), 0.0) ** complex(10., 3.) - assert repr(b) == "(nan+nanj)" + raises(ZeroDivisionError, pow, complex(float('inf'), 0.0), complex(10., 3.)) + + # issue 3944 + b = pow(1, -1.j) + assert repr(b.imag) == "-0.0" + def test_boolcontext(): for i in range(100): diff --git a/pypy/objspace/std/test/test_intobject.py b/pypy/objspace/std/test/test_intobject.py index badf10e33c..b2655a1d9b 100644 --- a/pypy/objspace/std/test/test_intobject.py +++ b/pypy/objspace/std/test/test_intobject.py @@ -83,6 +83,37 @@ class TestW_IntObject: assert space.isinstance_w(v, space.w_int) assert space.bigint_w(v).eq(rbigint.fromlong(x + y)) + def test_add_ovf_int_op_shortcut(self, monkeypatch): + from pypy.objspace.std.longobject import W_LongObject, rbigint + @staticmethod + def fromint(space, x): + assert x == sys.maxint # only the maxint is converted, not the 1! + return W_LongObject(rbigint.fromint(x)) + + monkeypatch.setattr(W_LongObject, 'fromint', fromint) + + space = self.space + x = sys.maxint + y = 1 + f1 = iobj.W_IntObject(x) + f2 = iobj.W_IntObject(y) + v = f1.descr_add(space, f2) + assert space.isinstance_w(v, space.w_long) + assert space.bigint_w(v).eq(rbigint.fromlong(x + y)) + + def test_lt_int_long_no_conversion(self, monkeypatch): + from pypy.objspace.std.longobject import W_LongObject, rbigint + + space = self.space + x = sys.maxint + y = 1 + f1 = iobj.W_IntObject(x) + f2 = iobj.W_IntObject(y).as_w_long(space) + + monkeypatch.setattr(iobj.W_IntObject, 'as_w_long', None) + v = f1.descr_gt(space, f2) # does *not* convert f1 to a bigint + assert space.is_true(v) + def test_sub(self): space = self.space x = 1 diff --git a/pypy/objspace/std/test/test_longobject.py b/pypy/objspace/std/test/test_longobject.py index b0bd1b1aa8..853f18c34e 100644 --- a/pypy/objspace/std/test/test_longobject.py +++ b/pypy/objspace/std/test/test_longobject.py @@ -252,6 +252,15 @@ class AppTestLong: assert pow(x, 2, 2) == long(0) assert pow(x, 2, long(3)) == long(1) + def test_issue_3912(self): + + class A(int): + # A class that uses the default 0 value but does not override __pow__ + def __rpow__(self, other, modulo): + return None + + raises(ValueError, pow, 1, A(), A()) + def test_getnewargs(self): assert self._long(0) .__getnewargs__() == (self._long(0),) assert (-self._long(1)) .__getnewargs__() == (-self._long(1),) diff --git a/pypy/tool/release/check_versions.py b/pypy/tool/release/check_versions.py index b43163dd38..233c75bc63 100644 --- a/pypy/tool/release/check_versions.py +++ b/pypy/tool/release/check_versions.py @@ -34,6 +34,9 @@ def assert_in(a, b): pypy_versions = { + '7.3.12rc2': {'python_version': ['3.10.11', '3.9.16', '2.7.18'], + 'date': '2023-05-28', + }, '7.3.12rc1': {'python_version': ['3.10.9', '3.9.16', '2.7.18'], 'date': '2023-05-13', }, diff --git a/pypy/tool/release/repackage.sh b/pypy/tool/release/repackage.sh index 3c5341a8c7..1fc0161e6f 100644 --- a/pypy/tool/release/repackage.sh +++ b/pypy/tool/release/repackage.sh @@ -1,12 +1,12 @@ #! /bin/bash # Edit these appropriately before running this script -pmaj=2 # python main version: 2 or 3 -pmin=7 # python minor version +pmaj=3 # python main version: 2 or 3 +pmin=9 # python minor version maj=7 min=3 rev=12 -rc=rc1 # comment this line for actual release +rc=rc2 # comment this line for actual release function maybe_exit { if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then diff --git a/pypy/tool/release/versions.json b/pypy/tool/release/versions.json index bc579cc0c8..d05edd083a 100644 --- a/pypy/tool/release/versions.json +++ b/pypy/tool/release/versions.json @@ -1,5 +1,158 @@ [ { + "pypy_version": "7.3.12rc2", + "python_version": "3.10.11", + "stable": false, + "latest_pypy": false, + "date": "2023-05-28", + "files": [ + { + "filename": "pypy3.10-v7.3.12rc2-aarch64.tar.bz2", + "arch": "aarch64", + "platform": "linux", + "download_url": "https://downloads.python.org/pypy/pypy3.10-v7.3.12rc2-aarch64.tar.bz2" + }, + { + "filename": "pypy3.10-v7.3.12rc2-linux32.tar.bz2", + "arch": "i686", + "platform": "linux", + "download_url": "https://downloads.python.org/pypy/pypy3.10-v7.3.12rc2-linux32.tar.bz2" + }, + { + "filename": "pypy3.10-v7.3.12rc2-linux64.tar.bz2", + "arch": "x64", + "platform": "linux", + "download_url": "https://downloads.python.org/pypy/pypy3.10-v7.3.12rc2-linux64.tar.bz2" + }, + { + "filename": "pypy3.10-v7.3.12rc2-macos_x86_64.tar.bz2", + "arch": "x64", + "platform": "darwin", + "download_url": "https://downloads.python.org/pypy/pypy3.10-v7.3.12rc2-macos_x86_64.tar.bz2" + }, + { + "filename": "pypy3.10-v7.3.12rc2-macos_arm64.tar.bz2", + "arch": "arm64", + "platform": "darwin", + "download_url": "https://downloads.python.org/pypy/pypy3.10-v7.3.12rc2-macos_arm64.tar.bz2" + }, + { + "filename": "pypy3.10-v7.3.12rc2-win64.zip", + "arch": "x64", + "platform": "win64", + "download_url": "https://downloads.python.org/pypy/pypy3.10-v7.3.12rc2-win64.zip" + }, + { + "filename": "pypy3.10-v7.3.12rc2-s390x.tar.bz2", + "arch": "s390x", + "platform": "linux", + "download_url": "https://downloads.python.org/pypy/pypy3.10-v7.3.12rc2-s390x.tar.bz2" + } + ] + }, + { + "pypy_version": "7.3.12rc2", + "python_version": "3.9.16", + "stable": false, + "latest_pypy": false, + "date": "2023-05-28", + "files": [ + { + "filename": "pypy3.9-v7.3.12rc2-aarch64.tar.bz2", + "arch": "aarch64", + "platform": "linux", + "download_url": "https://downloads.python.org/pypy/pypy3.9-v7.3.12rc2-aarch64.tar.bz2" + }, + { + "filename": "pypy3.9-v7.3.12rc2-linux32.tar.bz2", + "arch": "i686", + "platform": "linux", + "download_url": "https://downloads.python.org/pypy/pypy3.9-v7.3.12rc2-linux32.tar.bz2" + }, + { + "filename": "pypy3.9-v7.3.12rc2-linux64.tar.bz2", + "arch": "x64", + "platform": "linux", + "download_url": "https://downloads.python.org/pypy/pypy3.9-v7.3.12rc2-linux64.tar.bz2" + }, + { + "filename": "pypy3.9-v7.3.12rc2-macos_x86_64.tar.bz2", + "arch": "x64", + "platform": "darwin", + "download_url": "https://downloads.python.org/pypy/pypy3.9-v7.3.12rc2-macos_x86_64.tar.bz2" + }, + { + "filename": "pypy3.9-v7.3.12rc2-macos_arm64.tar.bz2", + "arch": "arm64", + "platform": "darwin", + "download_url": "https://downloads.python.org/pypy/pypy3.9-v7.3.12rc2-macos_arm64.tar.bz2" + }, + { + "filename": "pypy3.9-v7.3.12rc2-win64.zip", + "arch": "x64", + "platform": "win64", + "download_url": "https://downloads.python.org/pypy/pypy3.9-v7.3.12rc2-win64.zip" + }, + { + "filename": "pypy3.9-v7.3.12rc2-s390x.tar.bz2", + "arch": "s390x", + "platform": "linux", + "download_url": "https://downloads.python.org/pypy/pypy3.9-v7.3.12rc2-s390x.tar.bz2" + } + ] + }, + { + "pypy_version": "7.3.12rc2", + "python_version": "2.7.18", + "stable": false, + "latest_pypy": false, + "date": "2023-05-28", + "files": [ + { + "filename": "pypy2.7-v7.3.12rc2-aarch64.tar.bz2", + "arch": "aarch64", + "platform": "linux", + "download_url": "https://downloads.python.org/pypy/pypy2.7-v7.3.12rc2-aarch64.tar.bz2" + }, + { + "filename": "pypy2.7-v7.3.12rc2-linux32.tar.bz2", + "arch": "i686", + "platform": "linux", + "download_url": "https://downloads.python.org/pypy/pypy2.7-v7.3.12rc2-linux32.tar.bz2" + }, + { + "filename": "pypy2.7-v7.3.12rc2-linux64.tar.bz2", + "arch": "x64", + "platform": "linux", + "download_url": "https://downloads.python.org/pypy/pypy2.7-v7.3.12rc2-linux64.tar.bz2" + }, + { + "filename": "pypy2.7-v7.3.12rc2-macos_x86_64.tar.bz2", + "arch": "x64", + "platform": "darwin", + "download_url": "https://downloads.python.org/pypy/pypy2.7-v7.3.12rc2-macos_x86_64.tar.bz2" + }, + { + "filename": "pypy2.7-v7.3.12rc2-macos_arm64.tar.bz2", + "arch": "arm64", + "platform": "darwin", + "download_url": "https://downloads.python.org/pypy/pypy2.7-v7.3.12rc2-macos_arm64.tar.bz2" + }, + { + "filename": "pypy2.7-v7.3.12rc2-win64.zip", + "arch": "x64", + "platform": "win64", + "download_url": "https://downloads.python.org/pypy/pypy2.7-v7.3.12rc2-win64.zip" + }, + { + "filename": "pypy2.7-v7.3.12rc2-s390x.tar.bz2", + "arch": "s390x", + "platform": "linux", + "download_url": "https://downloads.python.org/pypy/pypy2.7-v7.3.12rc2-s390x.tar.bz2" + } + ] + }, + { "pypy_version": "7.3.12rc1", "python_version": "3.10.9", "stable": false, diff --git a/rpython/jit/backend/llgraph/runner.py b/rpython/jit/backend/llgraph/runner.py index d387560cf8..72a0131962 100644 --- a/rpython/jit/backend/llgraph/runner.py +++ b/rpython/jit/backend/llgraph/runner.py @@ -3,7 +3,7 @@ from rpython.jit.backend import model from rpython.jit.backend.llgraph import support from rpython.jit.backend.llsupport import symbolic from rpython.jit.backend.llsupport.vector_ext import VectorExt -from rpython.jit.metainterp.history import AbstractDescr +from rpython.jit.metainterp.history import BackendDescr from rpython.jit.metainterp.history import Const, getkind from rpython.jit.metainterp.history import INT, REF, FLOAT, VOID from rpython.jit.metainterp.resoperation import rop @@ -72,7 +72,7 @@ class LLTrace(object): newop.setfailargs(map(mapping, op.getfailargs())) self.operations.append(newop) -class WeakrefDescr(AbstractDescr): +class WeakrefDescr(BackendDescr): def __init__(self, realdescr): self.realdescrref = weakref.ref(realdescr) self.final_descr = getattr(realdescr, 'final_descr', False) @@ -86,7 +86,7 @@ class Jump(Exception): self.jump_target = jump_target self.args = args -class CallDescr(AbstractDescr): +class CallDescr(BackendDescr): def __init__(self, RESULT, ARGS, extrainfo, ABI=FFI_DEFAULT_ABI): self.RESULT = RESULT self.ARGS = ARGS @@ -118,7 +118,7 @@ class TypeIDSymbolic(Symbolic): def __ne__(self, other): return not self == other -class SizeDescr(AbstractDescr): +class SizeDescr(BackendDescr): def __init__(self, S, vtable, runner): assert not isinstance(vtable, bool) self.S = S @@ -149,7 +149,7 @@ class SizeDescr(AbstractDescr): def __repr__(self): return 'SizeDescr(%r)' % (self.S,) -class FieldDescr(AbstractDescr): +class FieldDescr(BackendDescr): def __init__(self, S, fieldname): self.S = S self.fieldname = fieldname @@ -207,7 +207,7 @@ def _is_signed_kind(TYPE): return (TYPE is not lltype.Bool and isinstance(TYPE, lltype.Number) and rffi.cast(TYPE, -1) == -1) -class ArrayDescr(AbstractDescr): +class ArrayDescr(BackendDescr): all_interiorfielddescrs = None def __init__(self, A, runner): @@ -269,7 +269,7 @@ class ArrayDescr(AbstractDescr): return TypeIDSymbolic(self.A) # integer-like symbolic -class InteriorFieldDescr(AbstractDescr): +class InteriorFieldDescr(BackendDescr): def __init__(self, A, fieldname, runner): self.A = A self.fieldname = fieldname diff --git a/rpython/jit/backend/llsupport/descr.py b/rpython/jit/backend/llsupport/descr.py index 77e48277ef..7c27d29614 100644 --- a/rpython/jit/backend/llsupport/descr.py +++ b/rpython/jit/backend/llsupport/descr.py @@ -2,7 +2,7 @@ import py from rpython.rtyper.lltypesystem import lltype, rffi, llmemory from rpython.rtyper.lltypesystem.lloperation import llop from rpython.jit.backend.llsupport import symbolic, support -from rpython.jit.metainterp.history import AbstractDescr, getkind, FLOAT, INT +from rpython.jit.metainterp.history import BackendDescr, getkind, FLOAT, INT from rpython.jit.metainterp import history from rpython.jit.metainterp.support import ptr2int, int2adr from rpython.jit.codewriter import heaptracker, longlong @@ -57,7 +57,7 @@ class GcCache(object): # ____________________________________________________________ # SizeDescrs -class SizeDescr(AbstractDescr): +class SizeDescr(BackendDescr): size = 0 # help translation tid = llop.combine_ushort(lltype.Signed, 0, 0) vtable = lltype.nullptr(rclass.OBJECT_VTABLE) @@ -137,7 +137,7 @@ FLAG_SIGNED = 'S' FLAG_STRUCT = 'X' FLAG_VOID = 'V' -class ArrayOrFieldDescr(AbstractDescr): +class ArrayOrFieldDescr(BackendDescr): vinfo = None def get_vinfo(self): @@ -381,7 +381,7 @@ def get_array_descr(gccache, ARRAY_OR_STRUCT): # ____________________________________________________________ # InteriorFieldDescr -class InteriorFieldDescr(AbstractDescr): +class InteriorFieldDescr(BackendDescr): arraydescr = ArrayDescr(0, 0, None, '\x00') # workaround for the annotator fielddescr = FieldDescr('', 0, 0, '\x00') @@ -447,7 +447,7 @@ def _missing_call_stub_r(func, args_i, args_r, args_f): def _missing_call_stub_f(func, args_i, args_r, args_f): return longlong.ZEROF -class CallDescr(AbstractDescr): +class CallDescr(BackendDescr): arg_classes = '' # <-- annotation hack result_type = '\x00' result_flag = '\x00' diff --git a/rpython/jit/backend/x86/runner.py b/rpython/jit/backend/x86/runner.py index 3f8c8aaf68..480dfd84cb 100644 --- a/rpython/jit/backend/x86/runner.py +++ b/rpython/jit/backend/x86/runner.py @@ -80,7 +80,7 @@ class AbstractX86CPU(AbstractLLCPU): """ NOT_RPYTHON """ - from rpython.jit.backend.x86.tool.viewcode import machine_code_dump + from rpython.jit.backend.tool.viewcode import machine_code_dump data = [] label_list = [(offset, name) for name, offset in looptoken._x86_ops_offset.iteritems()] diff --git a/rpython/jit/codewriter/effectinfo.py b/rpython/jit/codewriter/effectinfo.py index 03fe12064d..71b15719fc 100644 --- a/rpython/jit/codewriter/effectinfo.py +++ b/rpython/jit/codewriter/effectinfo.py @@ -210,24 +210,24 @@ class EffectInfo(object): def check_readonly_descr_field(self, fielddescr): return bitstring.bitcheck(self.bitstring_readonly_descrs_fields, - fielddescr.ei_index) + fielddescr.get_ei_index()) def check_write_descr_field(self, fielddescr): return bitstring.bitcheck(self.bitstring_write_descrs_fields, - fielddescr.ei_index) + fielddescr.get_ei_index()) def check_readonly_descr_array(self, arraydescr): return bitstring.bitcheck(self.bitstring_readonly_descrs_arrays, - arraydescr.ei_index) + arraydescr.get_ei_index()) def check_write_descr_array(self, arraydescr): return bitstring.bitcheck(self.bitstring_write_descrs_arrays, - arraydescr.ei_index) + arraydescr.get_ei_index()) def check_readonly_descr_interiorfield(self, interiorfielddescr): # NOTE: this is not used so far return bitstring.bitcheck(self.bitstring_readonly_descrs_interiorfields, - interiorfielddescr.ei_index) + interiorfielddescr.get_ei_index()) def check_write_descr_interiorfield(self, interiorfielddescr): # NOTE: this is not used so far return bitstring.bitcheck(self.bitstring_write_descrs_interiorfields, - interiorfielddescr.ei_index) + interiorfielddescr.get_ei_index()) def check_can_raise(self, ignore_memoryerror=False): if ignore_memoryerror: diff --git a/rpython/jit/codewriter/liveness.py b/rpython/jit/codewriter/liveness.py index c4802fde30..18410f79ed 100644 --- a/rpython/jit/codewriter/liveness.py +++ b/rpython/jit/codewriter/liveness.py @@ -119,7 +119,7 @@ def remove_repeated_live(ssarepr): # ____________________________________________________________ # helper functions for compactly encoding and decoding liveness info -# liveness is encoded as a 4 byte offset into the single string all_liveness +# liveness is encoded as a 2 byte offset into the single string all_liveness # (which is stored on the metainterp_sd) OFFSET_SIZE = 2 diff --git a/rpython/jit/codewriter/test/test_effectinfo.py b/rpython/jit/codewriter/test/test_effectinfo.py index 81f5f54483..598f604844 100644 --- a/rpython/jit/codewriter/test/test_effectinfo.py +++ b/rpython/jit/codewriter/test/test_effectinfo.py @@ -144,11 +144,14 @@ class TestVirtualizableAnalyzer(object): def test_compute_bitstrings(): - class FDescr: + class D(object): + def get_ei_index(self): + return self.ei_index + class FDescr(D): pass - class ADescr: + class ADescr(D): pass - class CDescr: + class CDescr(D): def __init__(self, ei): self._ei = ei def get_extra_info(self): diff --git a/rpython/jit/metainterp/heapcache.py b/rpython/jit/metainterp/heapcache.py index 7c6ccf030b..2a066e952e 100644 --- a/rpython/jit/metainterp/heapcache.py +++ b/rpython/jit/metainterp/heapcache.py @@ -142,18 +142,6 @@ class FieldUpdater(object): def setfield(self, fieldbox): self.cache.do_write_with_aliasing(self.ref_box, fieldbox) -class DummyFieldUpdater(FieldUpdater): - def __init__(self): - self.currfieldbox = None - - def getfield_now_known(self, fieldbox): - pass - - def setfield(self, fieldbox): - pass - -dummy_field_updater = DummyFieldUpdater() - class HeapCache(object): def __init__(self): @@ -530,6 +518,7 @@ class HeapCache(object): return cache.read(box) return None + @always_inline def get_field_updater(self, box, descr): cache = self.heap_cache.get(descr, None) if cache is None: diff --git a/rpython/jit/metainterp/history.py b/rpython/jit/metainterp/history.py index 8dffe5f031..d39f37b43d 100644 --- a/rpython/jit/metainterp/history.py +++ b/rpython/jit/metainterp/history.py @@ -91,10 +91,15 @@ def repr_rpython(box, typechars): class AbstractDescr(AbstractValue): - __slots__ = ('descr_index', 'ei_index') + _attrs_ = [] + __slots__ = () llopaque = True - descr_index = -1 - ei_index = sys.maxint + + def get_descr_index(self): + return -1 + + def get_ei_index(self): + return sys.maxint def repr_of_descr(self): return '%r' % (self,) @@ -654,11 +659,6 @@ class FrontendOp(AbstractResOp): p = rffi.cast(rffi.INT, self.position_and_flags) return intmask(p) >> FO_POSITION_SHIFT - def set_position(self, new_pos): - assert new_pos >= 0 - self.position_and_flags &= ~FO_POSITION_MASK - self.position_and_flags |= r_uint(new_pos << FO_POSITION_SHIFT) - def is_replaced_with_const(self): return bool(self.position_and_flags & FO_REPLACED_WITH_CONST) @@ -671,14 +671,18 @@ class FrontendOp(AbstractResOp): class IntFrontendOp(IntOp, FrontendOp): _attrs_ = ('position_and_flags', '_resint') - def copy_value_from(self, other): - self._resint = other.getint() + def __init__(self, pos, value): + FrontendOp.__init__(self, pos) + self._resint = value + class FloatFrontendOp(FloatOp, FrontendOp): _attrs_ = ('position_and_flags', '_resfloat') - def copy_value_from(self, other): - self._resfloat = other.getfloatstorage() + def __init__(self, pos, value): + FrontendOp.__init__(self, pos) + self._resfloat = value + class RefFrontendOp(RefOp, FrontendOp): _attrs_ = ('position_and_flags', '_resref', '_heapc_deps') @@ -687,8 +691,9 @@ class RefFrontendOp(RefOp, FrontendOp): _heapc_flags = r_uint(0) # high 32 bits of 'position_and_flags' _heapc_deps = None - def copy_value_from(self, other): - self._resref = other.getref_base() + def __init__(self, pos, value): + FrontendOp.__init__(self, pos) + self._resref = value if LONG_BIT == 32: def _get_heapc_flags(self): @@ -707,23 +712,13 @@ class RefFrontendOp(RefOp, FrontendOp): class History(object): trace = None - def __init__(self): - self.descr_cache = {} - self.descrs = {} - self.consts = [] - self._cache = [] - - def set_inputargs(self, inpargs, metainterp_sd): + def __init__(self, max_num_inputargs, metainterp_sd): from rpython.jit.metainterp.opencoder import Trace + self.trace = Trace(max_num_inputargs, metainterp_sd) - self.trace = Trace(inpargs, metainterp_sd) + def set_inputargs(self, inpargs): + self.trace.set_inputargs(inpargs) self.inputargs = inpargs - if self._cache is not None: - # hack to record the ops *after* we know our inputargs - for (opnum, argboxes, op, descr) in self._cache: - pos = self.trace.record_op(opnum, argboxes, descr) - op.set_position(pos) - self._cache = None def length(self): return self.trace._count - len(self.trace.inputargs) @@ -749,57 +744,32 @@ class History(object): @specialize.argtype(3) def record(self, opnum, argboxes, value, descr=None): - if self.trace is None: - pos = 2**14 - 1 - else: - pos = self._record_op(opnum, argboxes, descr) + pos = self._record_op(opnum, argboxes, descr) op = self._make_op(pos, value) - if self.trace is None: - self._cache.append((opnum, argboxes, op, descr)) return op @specialize.argtype(2) def record0(self, opnum, value, descr=None): - if self.trace is None: - pos = 2**14 - 1 - else: - pos = self.trace.record_op0(opnum, descr) + pos = self.trace.record_op0(opnum, descr) op = self._make_op(pos, value) - if self.trace is None: - self._cache.append((opnum, [], op, descr)) return op @specialize.argtype(3) def record1(self, opnum, argbox1, value, descr=None): - if self.trace is None: - pos = 2**14 - 1 - else: - pos = self.trace.record_op1(opnum, argbox1, descr) + pos = self.trace.record_op1(opnum, argbox1, descr) op = self._make_op(pos, value) - if self.trace is None: - self._cache.append((opnum, [argbox1], op, descr)) return op @specialize.argtype(4) def record2(self, opnum, argbox1, argbox2, value, descr=None): - if self.trace is None: - pos = 2**14 - 1 - else: - pos = self.trace.record_op2(opnum, argbox1, argbox2, descr) + pos = self.trace.record_op2(opnum, argbox1, argbox2, descr) op = self._make_op(pos, value) - if self.trace is None: - self._cache.append((opnum, [argbox1, argbox2], op, descr)) return op @specialize.argtype(5) def record3(self, opnum, argbox1, argbox2, argbox3, value, descr=None): - if self.trace is None: - pos = 2**14 - 1 - else: - pos = self.trace.record_op3(opnum, argbox1, argbox2, argbox3, descr) + pos = self.trace.record_op3(opnum, argbox1, argbox2, argbox3, descr) op = self._make_op(pos, value) - if self.trace is None: - self._cache.append((opnum, [argbox1, argbox2, argbox3], op, descr)) return op @specialize.argtype(2) @@ -807,37 +777,37 @@ class History(object): if value is None: op = FrontendOp(pos) elif isinstance(value, bool): - op = IntFrontendOp(pos) - op.setint(int(value)) + op = IntFrontendOp(pos, int(value)) elif lltype.typeOf(value) == lltype.Signed: - op = IntFrontendOp(pos) - op.setint(value) + op = IntFrontendOp(pos, value) elif lltype.typeOf(value) is longlong.FLOATSTORAGE: - op = FloatFrontendOp(pos) - op.setfloatstorage(value) + op = FloatFrontendOp(pos, value) else: - op = RefFrontendOp(pos) assert lltype.typeOf(value) == llmemory.GCREF - op.setref_base(value) + op = RefFrontendOp(pos, value) return op - def record_nospec(self, opnum, argboxes, descr=None): + def record_nospec(self, opnum, argboxes, valueconst, descr=None): tp = opclasses[opnum].type pos = self._record_op(opnum, argboxes, descr) if tp == 'v': + assert valueconst is None return FrontendOp(pos) elif tp == 'i': - return IntFrontendOp(pos) + return IntFrontendOp(pos, valueconst.getint()) elif tp == 'f': - return FloatFrontendOp(pos) + return FloatFrontendOp(pos, valueconst.getfloatstorage()) assert tp == 'r' - return RefFrontendOp(pos) + return RefFrontendOp(pos, valueconst.getref_base()) - def record_default_val(self, opnum, argboxes, descr=None): - assert rop.is_same_as(opnum) - op = self.record_nospec(opnum, argboxes, descr) - op.copy_value_from(argboxes[0]) - return op + def record_same_as(self, box): + if box.type == 'i': + return self.record1(rop.SAME_AS_I, box, box.getint()) + elif box.type == 'r': + return self.record1(rop.SAME_AS_R, box, box.getref_base()) + else: + assert box.type == 'f' + return self.record1(rop.SAME_AS_F, box, box.getfloatstorage()) # ____________________________________________________________ @@ -1117,3 +1087,14 @@ class Entry(ExtRegistryEntry): def specialize_call(self, hop): hop.exception_cannot_occur() + + +class BackendDescr(AbstractDescr): + descr_index = -1 + + def get_descr_index(self): + return self.descr_index + + def get_ei_index(self): + return self.ei_index + diff --git a/rpython/jit/metainterp/opencoder.py b/rpython/jit/metainterp/opencoder.py index 8be272f5e2..611c0f8959 100644 --- a/rpython/jit/metainterp/opencoder.py +++ b/rpython/jit/metainterp/opencoder.py @@ -118,7 +118,7 @@ class TraceIterator(BaseTrace): self.inputargs = [rop.inputarg_from_tp(arg.type) for arg in self.trace.inputargs] for i, arg in enumerate(self.inputargs): - self._cache[i] = arg + self._cache[self.trace.inputargs[i].get_position()] = arg self.start = start self.pos = start self._count = start @@ -283,7 +283,7 @@ class TopSnapshot(Snapshot): class Trace(BaseTrace): _deadranges = (-1, None) - def __init__(self, inputargs, metainterp_sd): + def __init__(self, max_num_inputargs, metainterp_sd): self.metainterp_sd = metainterp_sd self._ops = [rffi.cast(get_model(self).STORAGE_TP, 0)] * get_model(self).INIT_SIZE self._pos = 0 @@ -299,15 +299,26 @@ class Trace(BaseTrace): self._bigints_dict = {} self._floats = [] self._snapshots = [] - for i, inparg in enumerate(inputargs): - inparg.set_position(i) - self._count = len(inputargs) # total count - self._index = len(inputargs) # "position" of resulting resops - self._start = len(inputargs) + if not we_are_translated() and isinstance(max_num_inputargs, list): # old api for tests + self.inputargs = max_num_inputargs + for i, box in enumerate(max_num_inputargs): + box.position_and_flags = r_uint(i << 1) + max_num_inputargs = len(max_num_inputargs) + + self.max_num_inputargs = max_num_inputargs + self._count = max_num_inputargs # total count + self._index = max_num_inputargs # "position" of resulting resops + self._start = max_num_inputargs self._pos = self._start - self.inputargs = inputargs self.tag_overflow = False + def set_inputargs(self, inputargs): + self.inputargs = inputargs + if not we_are_translated(): + set_positions = {box.get_position() for box in inputargs} + assert len(set_positions) == len(inputargs) + assert not set_positions or max(set_positions) < self.max_num_inputargs + def append(self, v): model = get_model(self) if self._pos >= len(self._ops): @@ -476,8 +487,9 @@ class Trace(BaseTrace): return pos def _encode_descr(self, descr): - if descr.descr_index != -1: - return descr.descr_index + 1 + descr_index = descr.get_descr_index() + if descr_index != -1: + return descr_index + 1 self._descrs.append(descr) return len(self._descrs) - 1 + len(self.metainterp_sd.all_descrs) + 1 diff --git a/rpython/jit/metainterp/optimizeopt/bridgeopt.py b/rpython/jit/metainterp/optimizeopt/bridgeopt.py index 644f20968d..302419ba8d 100644 --- a/rpython/jit/metainterp/optimizeopt/bridgeopt.py +++ b/rpython/jit/metainterp/optimizeopt/bridgeopt.py @@ -95,13 +95,13 @@ def serialize_optimizer_knowledge(optimizer, numb_state, liveboxes, liveboxes_fr # metainterp_sd.all_descrs numb_state.append_int(len(triples_struct)) for box1, descr, box2 in triples_struct: - descr_index = descr.descr_index + descr_index = descr.get_descr_index() numb_state.append_short(tag_box(box1, liveboxes_from_env, memo)) numb_state.append_int(descr_index) numb_state.append_short(tag_box(box2, liveboxes_from_env, memo)) numb_state.append_int(len(triples_array)) for box1, index, descr, box2 in triples_array: - descr_index = descr.descr_index + descr_index = descr.get_descr_index() numb_state.append_short(tag_box(box1, liveboxes_from_env, memo)) numb_state.append_int(index) numb_state.append_int(descr_index) diff --git a/rpython/jit/metainterp/optimizeopt/heap.py b/rpython/jit/metainterp/optimizeopt/heap.py index 837510f6a1..42e057626d 100644 --- a/rpython/jit/metainterp/optimizeopt/heap.py +++ b/rpython/jit/metainterp/optimizeopt/heap.py @@ -696,7 +696,7 @@ class OptHeap(Optimization): def serialize_optheap(self, available_boxes): result_getfield = [] for descr, cf in self.cached_fields.iteritems(): - if descr.descr_index == -1: + if descr.get_descr_index() == -1: continue # not reachable via metainterp_sd.all_descrs if cf._lazy_set: continue # XXX safe default for now @@ -717,7 +717,7 @@ class OptHeap(Optimization): result_getfield.append((box1, descr, box2)) result_array = [] for descr, indexdict in self.cached_arrayitems.iteritems(): - if descr.descr_index == -1: + if descr.get_descr_index() == -1: continue # not reachable via metainterp_sd.all_descrs for index, cf in indexdict.iteritems(): if cf._lazy_set: diff --git a/rpython/jit/metainterp/optimizeopt/test/test_util.py b/rpython/jit/metainterp/optimizeopt/test/test_util.py index c884e1917e..4a6b21af86 100644 --- a/rpython/jit/metainterp/optimizeopt/test/test_util.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_util.py @@ -564,13 +564,9 @@ class BaseTest(LLtypeMixin): r = [] for arg, v in zip(inpargs, values): if arg.type == 'i': - n = IntFrontendOp(0) - if v is not None: - n.setint(v) + n = IntFrontendOp(0, v) else: - n = RefFrontendOp(0) - if v is not None: - n.setref_base(v) + n = RefFrontendOp(0, v) assert arg.type == 'r' r.append(n) return r diff --git a/rpython/jit/metainterp/pyjitpl.py b/rpython/jit/metainterp/pyjitpl.py index 34e7333e90..441a4faa97 100644 --- a/rpython/jit/metainterp/pyjitpl.py +++ b/rpython/jit/metainterp/pyjitpl.py @@ -39,6 +39,16 @@ def arguments(*args): return func return decorate + +special_handlers = {} + +def special_handler(argcodes): + def decorate(func): + assert func.__name__.startswith("special_") + special_handlers[func.__name__[len("special_"):], argcodes] = func + return func + return decorate + # ____________________________________________________________ FASTPATHS_SAME_BOXES = { @@ -265,6 +275,38 @@ class MIFrame(object): return self.execute(rop.%s, b1, b2) ''' % (_opimpl, _opimpl.upper())).compile()) + @special_handler("ic>i") + def special_int_add(self, position): + from rpython.jit.metainterp.blackhole import signedord + from rpython.jit.metainterp.history import IntFrontendOp + # bit of a micro-optimization: int_add with a constant argument is one + # of the most common opcodes in PyPy, and this way we + # - allocate one ConstInt fewer in the (common) non-recorded case + # - don't wrap and unwrap + # - only check one of the arguments for constness + assert position >= 0 + code = self.bytecode + position += 1 + regs = self.registers_i + b1 = regs[ord(code[position])] + position += 1 + + c = signedord(code[position]) + position += 1 + profiler = self.metainterp.staticdata.profiler + profiler.count_ops(rop.INT_ADD) + if b1.is_constant(): + assert isinstance(b1, ConstInt) + val = b1.getint() + resvalue = val + c + resbox = ConstInt(resvalue) + else: + assert isinstance(b1, IntFrontendOp) + resvalue = b1.getint() + c + resbox = self.metainterp._record_helper(rop.INT_ADD, resvalue, None, b1, ConstInt(c)) + regs[ord(code[position])] = resbox + self.pc = position + 1 + for _opimpl in ['int_eq', 'int_ne', 'int_lt', 'int_le', 'int_gt', 'int_ge', 'ptr_eq', 'ptr_ne', 'instance_ptr_eq', 'instance_ptr_ne']: @@ -1780,10 +1822,20 @@ class MIFrame(object): # changes, due to a call or a return. try: staticdata = self.metainterp.staticdata + pc = self.pc while True: - pc = self.pc - op = ord(self.bytecode[pc]) + bytecode = self.bytecode + op = ord(bytecode[pc]) + if op == staticdata.op_live: + pc += OFFSET_SIZE + 1 + self.pc = pc + continue + elif op == staticdata.op_goto: + pc = ord(bytecode[pc + 1]) | (ord(bytecode[pc + 2])<<8) + self.pc = pc + continue staticdata.opcode_implementations[op](self, pc) + pc = self.pc except ChangeFrame: pass @@ -1873,7 +1925,6 @@ class MIFrame(object): effectinfo = descr.get_extra_info() if effectinfo.oopspecindex == effectinfo.OS_NOT_IN_TRACE: return self.metainterp.do_not_in_trace_call(allboxes, descr) - cut_pos = self.metainterp.history.get_trace_position() if (assembler_call or effectinfo.check_forces_virtual_or_virtualizable()): @@ -1923,30 +1974,28 @@ class MIFrame(object): # from the effectinfo and the 'assembler_call' flag if assembler_call: vablebox, resbox = self.metainterp.direct_assembler_call( - allboxes, descr, assembler_call_jd) + allboxes, c_result, descr, assembler_call_jd) else: vablebox = None resbox = None if effectinfo.oopspecindex == effectinfo.OS_LIBFFI_CALL: - resbox = self.metainterp.direct_libffi_call(allboxes, descr) + resbox = self.metainterp.direct_libffi_call(allboxes, c_result, descr) # ^^^ may return None to mean "can't handle it myself" if resbox is None: if effectinfo.is_call_release_gil(): resbox = self.metainterp.direct_call_release_gil( - allboxes, descr) + allboxes, c_result, descr) else: resbox = self.metainterp.direct_call_may_force( - allboxes, descr) + allboxes, c_result, descr) # 5. invalidate the heapcache based on the CALL_MAY_FORCE # operation executed above in step 2 self.metainterp.heapcache.invalidate_caches_varargs(opnum1, descr, allboxes) - # 6. put 'c_result' back into the recorded operation if resbox.type == 'v': resbox = None # for void calls, must return None below else: - resbox.copy_value_from(c_result) self.make_result_of_lastop(resbox) self.metainterp.vable_after_residual_call(funcbox) self.metainterp.generate_guard(rop.GUARD_NOT_FORCED) @@ -2108,6 +2157,7 @@ class MetaInterpStaticData(object): opimpl = _get_opimpl_method(name, argcodes) self.opcode_implementations[value] = opimpl self.op_live = insns.get('live/', -1) + self.op_goto = insns.get('goto/L', -1) self.op_catch_exception = insns.get('catch_exception/L', -1) self.op_rvmprof_code = insns.get('rvmprof_code/ii', -1) @@ -2468,10 +2518,14 @@ class MetaInterp(object): if self.framestack: self.framestack[-1].pc = saved_pc - def create_empty_history(self): - self.history = history.History() + def create_empty_history(self, inputargs): + self.history = history.History(len(inputargs), self.staticdata) + self.history.set_inputargs(inputargs) self.staticdata.stats.set_history(self.history) + def create_history(self, max_num_inputargs): + self.history = history.History(max_num_inputargs, self.staticdata) + def _all_constants(self, *boxes): if len(boxes) == 0: return True @@ -2744,9 +2798,7 @@ class MetaInterp(object): self.resumekey = compile.ResumeFromInterpDescr(original_greenkey) self.seen_loop_header_for_jdindex = -1 try: - self.create_empty_history() - self.history.set_inputargs(original_boxes[num_green_args:], - self.staticdata) + self.create_empty_history(original_boxes[num_green_args:]) self.interpret() except SwitchToBlackhole as stb: self.run_blackhole_interp_to_cancel_tracing(stb) @@ -2763,9 +2815,11 @@ class MetaInterp(object): if self.resumekey_original_loop_token is None: raise compile.giveup() # should be rare self.staticdata.try_to_free_some_loops() + self.create_history(resume.get_max_num_inputargs(key)) try: + excdata = self._prepare_exception_resumption(deadframe, resumedescr) inputargs = self.initialize_state_from_guard_failure(key, deadframe) - return self._handle_guard_failure(resumedescr, key, inputargs, deadframe) + return self._handle_guard_failure(resumedescr, key, inputargs, deadframe, excdata) except SwitchToBlackhole as stb: self.run_blackhole_interp_to_cancel_tracing(stb) finally: @@ -2773,13 +2827,13 @@ class MetaInterp(object): self.staticdata.profiler.end_tracing() debug_stop('jit-tracing') - def _handle_guard_failure(self, resumedescr, key, inputargs, deadframe): + def _handle_guard_failure(self, resumedescr, key, inputargs, deadframe, excdata): self.current_merge_points = [] self.resumekey = resumedescr self.seen_loop_header_for_jdindex = -1 if isinstance(key, compile.ResumeAtPositionDescr): self.seen_loop_header_for_jdindex = self.jitdriver_sd.index - self.prepare_resume_from_failure(deadframe, inputargs, resumedescr) + self.prepare_resume_from_failure(deadframe, inputargs, resumedescr, excdata) if self.resumekey_original_loop_token is None: # very rare case raise SwitchToBlackhole(Counters.ABORT_BRIDGE) self.interpret() @@ -2798,9 +2852,7 @@ class MetaInterp(object): for i in range(endindex): box = boxes[i] if isinstance(box, Const) or box in duplicates: - opnum = OpHelpers.same_as_for_type(box.type) - op = self.history.record_default_val(opnum, [box]) - boxes[i] = op + boxes[i] = self.history.record_same_as(box) else: duplicates[box] = None @@ -2963,7 +3015,7 @@ class MetaInterp(object): jitcell_token = target_token.targeting_jitcell_token self.raise_continue_running_normally(live_arg_boxes, jitcell_token) - def prepare_resume_from_failure(self, deadframe, inputargs, resumedescr): + def _prepare_exception_resumption(self, deadframe, resumedescr): exception = self.cpu.grab_exc_value(deadframe) if (isinstance(resumedescr, compile.ResumeGuardExcDescr) or isinstance(resumedescr, compile.ResumeGuardCopiedExcDescr)): @@ -2972,11 +3024,11 @@ class MetaInterp(object): # the history aleady contains operations from resume.py. # The optimizer should remove these operations. However, # 'test_guard_no_exception_incorrectly_removed_from_bridge' - # shows a corner case in which just putting GuARD_NO_EXCEPTION + # shows a corner case in which just putting GUARD_NO_EXCEPTION # here is a bad idea: the optimizer might remove it too. - # So we put a SAVE_EXCEPTION at the start, and a - # RESTORE_EXCEPTION just before the guard. (rewrite.py will - # remove the two if they end up consecutive.) + # So we put a SAVE_EXCEPTION at the start, and a RESTORE_EXCEPTION + # just before the guard (done in prepare_resume_from_failure). + # rewrite.py will remove the two if they end up consecutive. # XXX too much jumps between older and newer models; clean up # by killing SAVE_EXC_CLASS, RESTORE_EXCEPTION and GUARD_EXCEPTION @@ -2986,13 +3038,22 @@ class MetaInterp(object): exc_class = ptr2int(exception_obj.typeptr) else: exc_class = 0 - assert self.history.trace is None - i = len(self.history._cache) + assert self.history.trace._pos == self.history.trace._start op1 = self.history.record0(rop.SAVE_EXC_CLASS, exc_class) op2 = self.history.record0(rop.SAVE_EXCEPTION, exception) - self.history._cache = self.history._cache[i:] + self.history._cache[:i] + return exception, op1, op2 + else: + assert not exception + return exception, None, None + + def prepare_resume_from_failure(self, deadframe, inputargs, resumedescr, excdata): + if (isinstance(resumedescr, compile.ResumeGuardExcDescr) or + isinstance(resumedescr, compile.ResumeGuardCopiedExcDescr)): + exception, op1, op2 = excdata + exception_obj = lltype.cast_opaque_ptr(rclass.OBJECTPTR, exception) + self.history.record2(rop.RESTORE_EXCEPTION, op1, op2, None) - self.history.set_inputargs(inputargs, self.staticdata) + self.history.set_inputargs(inputargs) if exception_obj: self.execute_ll_raised(exception_obj) else: @@ -3002,8 +3063,7 @@ class MetaInterp(object): except ChangeFrame: pass else: - self.history.set_inputargs(inputargs, self.staticdata) - assert not exception + self.history.set_inputargs(inputargs) def get_procedure_token(self, greenkey): JitCell = self.jitdriver_sd.warmstate.JitCell @@ -3105,21 +3165,20 @@ class MetaInterp(object): def initialize_original_boxes(self, jitdriver_sd, *args): original_boxes = [None] * len(args) self._fill_original_boxes(jitdriver_sd, original_boxes, 0, - jitdriver_sd.num_green_args, *args) + *args) return original_boxes @specialize.arg(1) @always_inline def _fill_original_boxes(self, jitdriver_sd, original_boxes, - position, - num_green_args, *args): + position, *args): if args: from rpython.jit.metainterp.warmstate import wrap - box = wrap(self.cpu, args[0], num_green_args > 0) + box = wrap(self.cpu, args[0], + position - jitdriver_sd.num_green_args) original_boxes[position] = box self._fill_original_boxes(jitdriver_sd, original_boxes, - position + 1, - num_green_args-1, *args[1:]) + position + 1, *args[1:]) def initialize_state_from_start(self, original_boxes): # ----- make a new frame ----- @@ -3139,7 +3198,6 @@ class MetaInterp(object): rstack._stack_criticalcode_start() try: self.portal_call_depth = -1 # always one portal around - self.history = history.History() inputargs_and_holes = self.rebuild_state_after_failure(resumedescr, deadframe) return [box for box in inputargs_and_holes if box] @@ -3157,8 +3215,10 @@ class MetaInterp(object): vinfo.clear_vable_token(virtualizable) # The field 'virtualizable_boxes' is not even present # if 'virtualizable_info' is None. Check for that first. + startindex = len(original_boxes) - self.jitdriver_sd.num_green_args self.virtualizable_boxes = vinfo.read_boxes(self.cpu, - virtualizable) + virtualizable, + startindex) original_boxes += self.virtualizable_boxes self.virtualizable_boxes.append(virtualizable_box) self.check_synchronized_virtualizable() @@ -3315,7 +3375,8 @@ class MetaInterp(object): virtualizable_box = self.virtualizable_boxes[-1] virtualizable = vinfo.unwrap_virtualizable_box(virtualizable_box) self.virtualizable_boxes = vinfo.read_boxes(self.cpu, - virtualizable) + virtualizable, + 0) self.virtualizable_boxes.append(virtualizable_box) def gen_store_back_in_vable(self, box): @@ -3431,19 +3492,18 @@ class MetaInterp(object): return op # but COND_CALL_VALUE remains opnum = OpHelpers.call_pure_for_descr(descr) self.history.cut(patch_pos) - newop = self.history.record_nospec(opnum, argboxes, descr) - newop.copy_value_from(op) + newop = self.history.record_nospec(opnum, argboxes, resbox_as_const, descr) return newop - def direct_call_may_force(self, argboxes, calldescr): + def direct_call_may_force(self, argboxes, valueconst, calldescr): """ Common case: record in the history a CALL_MAY_FORCE with 'c_result' as the result of that call. (The actual call has already been done.) """ opnum = rop.call_may_force_for_descr(calldescr) - return self.history.record_nospec(opnum, argboxes, calldescr) + return self.history.record_nospec(opnum, argboxes, valueconst, calldescr) - def direct_assembler_call(self, arglist, calldescr, targetjitdriver_sd): + def direct_assembler_call(self, arglist, valueconst, calldescr, targetjitdriver_sd): """ Record in the history a direct call to assembler for portal entry point. """ @@ -3454,7 +3514,7 @@ class MetaInterp(object): warmrunnerstate = targetjitdriver_sd.warmstate token = warmrunnerstate.get_assembler_token(greenargs) opnum = OpHelpers.call_assembler_for_descr(calldescr) - op = self.history.record_nospec(opnum, args, descr=token) + op = self.history.record_nospec(opnum, args, valueconst, descr=token) # # To fix an obscure issue, make sure the vable stays alive # longer than the CALL_ASSEMBLER operation. We do it by @@ -3465,7 +3525,7 @@ class MetaInterp(object): else: return None, op - def direct_libffi_call(self, argboxes, orig_calldescr): + def direct_libffi_call(self, argboxes, valueconst, orig_calldescr): """Generate a direct call to C code using jit_ffi_call() """ # an 'assert' that constant-folds away the rest of this function @@ -3521,11 +3581,11 @@ class MetaInterp(object): assert opnum == rop.call_release_gil_for_descr(calldescr) return self.history.record_nospec(opnum, [c_saveall, argboxes[2]] + arg_boxes, - calldescr) + valueconst, calldescr) # note that the result is written back to the exchange_buffer by the # following operation, which should be a raw_store - def direct_call_release_gil(self, argboxes, calldescr): + def direct_call_release_gil(self, argboxes, valueconst, calldescr): if not we_are_translated(): # for llgraph calldescr._original_func_ = argboxes[0].getint() effectinfo = calldescr.get_extra_info() @@ -3535,7 +3595,7 @@ class MetaInterp(object): opnum = rop.call_release_gil_for_descr(calldescr) return self.history.record_nospec(opnum, [savebox, funcbox] + argboxes[1:], - calldescr) + valueconst, calldescr) def do_not_in_trace_call(self, allboxes, descr): self.clear_exception() @@ -3559,6 +3619,8 @@ class ChangeFrame(jitexc.JitException): def _get_opimpl_method(name, argcodes): from rpython.jit.metainterp.blackhole import signedord + if (name, argcodes) in special_handlers: + return special_handlers[name, argcodes] # def handler(self, position): assert position >= 0 diff --git a/rpython/jit/metainterp/resoperation.py b/rpython/jit/metainterp/resoperation.py index b9dddafddb..1a16e6b190 100644 --- a/rpython/jit/metainterp/resoperation.py +++ b/rpython/jit/metainterp/resoperation.py @@ -574,9 +574,6 @@ class IntOp(object): def setint(self, intval): self._resint = intval - def copy_value_from(self, other): - self.setint(other.getint()) - def constbox(self): from rpython.jit.metainterp import history return history.ConstInt(self.getint()) @@ -603,9 +600,6 @@ class FloatOp(object): assert lltype.typeOf(floatval) is longlong.FLOATSTORAGE self._resfloat = floatval - def copy_value_from(self, other): - self.setfloatstorage(other.getfloatstorage()) - def constbox(self): from rpython.jit.metainterp import history return history.ConstFloat(self.getfloatstorage()) @@ -638,9 +632,6 @@ class RefOp(object): return lltype.cast_opaque_ptr(PTR, self.getref_base()) getref._annspecialcase_ = 'specialize:arg(1)' - def copy_value_from(self, other): - self.setref_base(other.getref_base()) - def nonnull(self): return bool(self._resref) diff --git a/rpython/jit/metainterp/resume.py b/rpython/jit/metainterp/resume.py index 798af789be..a38ac7cfcc 100644 --- a/rpython/jit/metainterp/resume.py +++ b/rpython/jit/metainterp/resume.py @@ -1080,6 +1080,12 @@ def rebuild_from_resumedata(metainterp, storage, deadframe, return resumereader.liveboxes, virtualizable_boxes, virtualref_boxes +def get_max_num_inputargs(storage): + reader = resumecode.Reader(storage.rd_numb) + reader.next_item() + return reader.next_item() + + class ResumeDataBoxReader(AbstractResumeDataReader): unique_id = lambda: None VirtualCache = get_VirtualCache_class('BoxReader') @@ -1283,15 +1289,14 @@ class ResumeDataBoxReader(AbstractResumeDataReader): if num < 0: num += len(self.liveboxes) assert num >= 0 + # we create *FrontendOp instances with numbers in the range + # 0..self.count if kind == INT: - box = IntFrontendOp(0) - box.setint(self.cpu.get_int_value(self.deadframe, num)) + box = IntFrontendOp(num, self.cpu.get_int_value(self.deadframe, num)) elif kind == REF: - box = RefFrontendOp(0) - box.setref_base(self.cpu.get_ref_value(self.deadframe, num)) + box = RefFrontendOp(num, self.cpu.get_ref_value(self.deadframe, num)) elif kind == FLOAT: - box = FloatFrontendOp(0) - box.setfloatstorage(self.cpu.get_float_value(self.deadframe, num)) + box = FloatFrontendOp(num, self.cpu.get_float_value(self.deadframe, num)) else: assert 0, "bad kind: %d" % ord(kind) self.liveboxes[num] = box diff --git a/rpython/jit/metainterp/test/test_compile.py b/rpython/jit/metainterp/test/test_compile.py index 630a97af65..2b444bfbbc 100644 --- a/rpython/jit/metainterp/test/test_compile.py +++ b/rpython/jit/metainterp/test/test_compile.py @@ -97,7 +97,7 @@ def test_compile_loop(): metainterp = FakeMetaInterp() metainterp.staticdata = staticdata metainterp.cpu = cpu - metainterp.history = History() + metainterp.history = History(len(loop.inputargs), staticdata) t = convert_loop_to_trace(loop, staticdata) metainterp.history.inputargs = t.inputargs metainterp.history.trace = t diff --git a/rpython/jit/metainterp/test/test_heapcache.py b/rpython/jit/metainterp/test/test_heapcache.py index 14aae47324..ded7fda5df 100644 --- a/rpython/jit/metainterp/test/test_heapcache.py +++ b/rpython/jit/metainterp/test/test_heapcache.py @@ -2,9 +2,12 @@ import py from rpython.jit.metainterp.heapcache import HeapCache from rpython.jit.metainterp.resoperation import rop, InputArgInt from rpython.jit.metainterp.history import ConstInt, ConstPtr, BasicFailDescr -from rpython.jit.metainterp.history import IntFrontendOp, RefFrontendOp +from rpython.jit.metainterp.history import IntFrontendOp, RefFrontendOp as OrigRefFrontendOp from rpython.rtyper.lltypesystem import llmemory, rffi +def RefFrontendOp(pos): + return OrigRefFrontendOp(pos, OrigRefFrontendOp._resref) + descr1 = object() descr2 = object() descr3 = object() @@ -339,8 +342,8 @@ class TestHeapCache(object): h = HeapCache() box1 = RefFrontendOp(1) box2 = RefFrontendOp(2) - lengthbox1 = IntFrontendOp(11) - lengthbox2 = IntFrontendOp(12) + lengthbox1 = IntFrontendOp(11, 11) + lengthbox2 = IntFrontendOp(12, 11) h.new_array(box1, lengthbox1) assert h.arraylen(box1) is lengthbox1 @@ -485,16 +488,14 @@ class TestHeapCache(object): def test_replace_box_with_const_in_array(self): h = HeapCache() box1 = RefFrontendOp(1) - lengthbox2 = IntFrontendOp(2) - lengthbox2.setint(10) + lengthbox2 = IntFrontendOp(2, 10) h.arraylen_now_known(box1, lengthbox2) assert h.arraylen(box1) is lengthbox2 c10 = ConstInt(10) h.replace_box(lengthbox2, c10) assert c10.same_constant(h.arraylen(box1)) - box2 = IntFrontendOp(2) - box2.setint(12) + box2 = IntFrontendOp(2, 12) h.setarrayitem(box1, index2, box2, descr1) assert h.getarrayitem(box1, index2, descr1) is box2 c12 = ConstInt(12) @@ -508,8 +509,8 @@ class TestHeapCache(object): box3 = RefFrontendOp(3) box4 = RefFrontendOp(4) box5 = RefFrontendOp(5) - lengthbox1 = IntFrontendOp(11) - lengthbox2 = IntFrontendOp(12) + lengthbox1 = IntFrontendOp(11, 12) + lengthbox2 = IntFrontendOp(12, 178) h.new_array(box1, lengthbox1) h.setarrayitem(box1, index1, box2, descr1) h.new_array(box2, lengthbox1) @@ -541,7 +542,7 @@ class TestHeapCache(object): box1 = RefFrontendOp(1) box2 = RefFrontendOp(2) box3 = RefFrontendOp(3) - lengthbox2 = IntFrontendOp(12) + lengthbox2 = IntFrontendOp(12, 12) h.setarrayitem(box1, index1, box2, descr2) assert h.getarrayitem(box1, index1, descr2) is box2 h.new_array(box2, lengthbox2) @@ -585,7 +586,7 @@ class TestHeapCache(object): box2 = RefFrontendOp(2) box3 = RefFrontendOp(3) box4 = RefFrontendOp(4) - lengthbox1 = IntFrontendOp(11) + lengthbox1 = IntFrontendOp(11, 435) h.new_array(box1, lengthbox1) h.setarrayitem(box3, index1, box4, descr1) h.invalidate_caches_varargs( @@ -598,8 +599,8 @@ class TestHeapCache(object): h = HeapCache() box1 = RefFrontendOp(1) box2 = RefFrontendOp(2) - lengthbox1 = IntFrontendOp(11) - lengthbox2 = IntFrontendOp(12) + lengthbox1 = IntFrontendOp(11, 1) + lengthbox2 = IntFrontendOp(12, 3) h.new_array(box1, index2) h.new_array(box2, index2) assert h.is_unescaped(box1) @@ -682,8 +683,8 @@ class TestHeapCache(object): h = HeapCache() box1 = RefFrontendOp(1) box2 = RefFrontendOp(2) - lengthbox1 = IntFrontendOp(11) - lengthbox2 = IntFrontendOp(12) + lengthbox1 = IntFrontendOp(11, 3) + lengthbox2 = IntFrontendOp(12, 3) h.new_array(box1, index2) assert h.is_unescaped(box1) h.invalidate_caches(rop.SETARRAYITEM_GC, None, box1, index1, box2) @@ -722,7 +723,7 @@ class TestHeapCache(object): h = HeapCache() box1 = RefFrontendOp(1) box3 = RefFrontendOp(3) - lengthbox1 = IntFrontendOp(11) + lengthbox1 = IntFrontendOp(11, 3) h.new_array(box1, index2) assert h.is_unescaped(box1) h.setarrayitem(box1, index1, box3, descr1) @@ -840,7 +841,7 @@ class TestHeapCache(object): h.new_array(box1, index2) assert h.is_likely_virtual(box1) box2 = RefFrontendOp(2) - lengthbox = IntFrontendOp(11) + lengthbox = IntFrontendOp(11, 3) # arrays are only virtual if the length is constant h.new_array(box2, lengthbox) assert not h.is_likely_virtual(box2) diff --git a/rpython/jit/metainterp/test/test_history.py b/rpython/jit/metainterp/test/test_history.py index b82e385c1d..640528f1bd 100644 --- a/rpython/jit/metainterp/test/test_history.py +++ b/rpython/jit/metainterp/test/test_history.py @@ -67,8 +67,6 @@ def test_frontendop(): assert f.get_position() == 42 f = FrontendOp(-56) assert f.get_position() == -56 - f.set_position(6519) - assert f.get_position() == 6519 def fresh_ref(): S = lltype.GcStruct('S') diff --git a/rpython/jit/metainterp/test/test_opencoder.py b/rpython/jit/metainterp/test/test_opencoder.py index 9fe9a1b407..fbf377f9ff 100644 --- a/rpython/jit/metainterp/test/test_opencoder.py +++ b/rpython/jit/metainterp/test/test_opencoder.py @@ -66,7 +66,7 @@ class TestOpencoder(object): return iter.inputargs, l, iter def test_simple_iterator(self): - i0, i1 = IntFrontendOp(0), IntFrontendOp(0) + i0, i1 = IntFrontendOp(0, 0), IntFrontendOp(1, 0) t = Trace([i0, i1], metainterp_sd) add = FakeOp(t.record_op(rop.INT_ADD, [i0, i1])) t.record_op(rop.INT_ADD, [add, ConstInt(1)]) @@ -80,7 +80,7 @@ class TestOpencoder(object): assert l[0].getarg(1) is i1 def test_rd_snapshot(self): - i0, i1 = IntFrontendOp(0), IntFrontendOp(0) + i0, i1 = IntFrontendOp(0, 0), IntFrontendOp(1, 0) t = Trace([i0, i1], metainterp_sd) add = FakeOp(t.record_op(rop.INT_ADD, [i0, i1])) t.record_op(rop.GUARD_FALSE, [add]) @@ -104,7 +104,7 @@ class TestOpencoder(object): assert fstack[1].boxes == [i0, i0, l[0]] def test_read_snapshot_interface(self): - i0, i1, i2 = IntFrontendOp(0), IntFrontendOp(0), IntFrontendOp(0) + i0, i1, i2 = IntFrontendOp(0, 0), IntFrontendOp(1, 0), IntFrontendOp(2, 0) t = Trace([i0, i1, i2], metainterp_sd) t.record_op(rop.GUARD_TRUE, [i1]) frame0 = FakeFrame(1, JitCode(2), [i0, i1]) @@ -158,7 +158,7 @@ class TestOpencoder(object): BaseTest.assert_equal(loop1, loop2) def test_cut_trace_from(self): - i0, i1, i2 = IntFrontendOp(0), IntFrontendOp(0), IntFrontendOp(0) + i0, i1, i2 = IntFrontendOp(0, 0), IntFrontendOp(1, 0), IntFrontendOp(2, 0) t = Trace([i0, i1, i2], metainterp_sd) add1 = FakeOp(t.record_op(rop.INT_ADD, [i0, i1])) cut_point = t.cut_point() @@ -173,7 +173,7 @@ class TestOpencoder(object): assert l[0].getarglist() == [i0, i1] def test_virtualizable_virtualref(self): - i0, i1, i2 = IntFrontendOp(0), IntFrontendOp(0), IntFrontendOp(0) + i0, i1, i2 = IntFrontendOp(0, 0), IntFrontendOp(1, 0), IntFrontendOp(2, 0) t = Trace([i0, i1, i2], metainterp_sd) p0 = FakeOp(t.record_op(rop.NEW_WITH_VTABLE, [], descr=SomeDescr())) t.record_op(rop.GUARD_TRUE, [i0]) @@ -184,7 +184,7 @@ class TestOpencoder(object): assert l[1].vref_boxes == [l[0], i1] def test_liveranges(self): - i0, i1, i2 = IntFrontendOp(0), IntFrontendOp(0), IntFrontendOp(0) + i0, i1, i2 = IntFrontendOp(0, 0), IntFrontendOp(1, 0), IntFrontendOp(2, 0) t = Trace([i0, i1, i2], metainterp_sd) p0 = FakeOp(t.record_op(rop.NEW_WITH_VTABLE, [], descr=SomeDescr())) t.record_op(rop.GUARD_TRUE, [i0]) @@ -192,7 +192,7 @@ class TestOpencoder(object): assert t.get_live_ranges() == [4, 4, 4, 4] def test_deadranges(self): - i0, i1, i2 = IntFrontendOp(0), IntFrontendOp(0), IntFrontendOp(0) + i0, i1, i2 = IntFrontendOp(0, 0), IntFrontendOp(1, 0), IntFrontendOp(2, 0) t = Trace([i0, i1, i2], metainterp_sd) p0 = FakeOp(t.record_op(rop.NEW_WITH_VTABLE, [], descr=SomeDescr())) t.record_op(rop.GUARD_TRUE, [i0]) diff --git a/rpython/jit/metainterp/test/test_pyjitpl.py b/rpython/jit/metainterp/test/test_pyjitpl.py index 6bcc469200..df2978ff66 100644 --- a/rpython/jit/metainterp/test/test_pyjitpl.py +++ b/rpython/jit/metainterp/test/test_pyjitpl.py @@ -82,15 +82,13 @@ def test_remove_consts_and_duplicates(): assert box.getint() == referencebox.getint() return True metainterp = pyjitpl.MetaInterp(FakeStaticData(), None) - metainterp.history = History() - b1 = IntFrontendOp(1) - b1.setint(1) - b2 = IntFrontendOp(2) - b2.setint(2) + metainterp.history = History(4, FakeStaticData()) + b1 = IntFrontendOp(1, 1) + b2 = IntFrontendOp(2, 2) c3 = ConstInt(3) boxes = [b1, b2, b1, c3] dup = {} - metainterp.history.set_inputargs([b1, b2], FakeStaticData()) + metainterp.history.set_inputargs([b1, b2]) metainterp.remove_consts_and_duplicates(boxes, 4, dup) assert boxes[0] is b1 assert boxes[1] is b2 diff --git a/rpython/jit/metainterp/test/test_resume.py b/rpython/jit/metainterp/test/test_resume.py index 4d0bc1901b..1c7519ef0f 100644 --- a/rpython/jit/metainterp/test/test_resume.py +++ b/rpython/jit/metainterp/test/test_resume.py @@ -36,6 +36,8 @@ class Storage: rd_virtuals = None rd_pendingfields = None +dummyref = RefFrontendOp._resref + class FakeOptimizer(object): metainterp_sd = None @@ -183,10 +185,9 @@ class MyMetaInterp: def execute_and_record(self, opnum, descr, *argboxes): resvalue = executor.execute(self.cpu, None, opnum, descr, *argboxes) if isinstance(resvalue, int): - op = IntFrontendOp(0) + op = IntFrontendOp(0, resvalue) else: - op = RefFrontendOp(0) - setvalue(op, resvalue) + op = RefFrontendOp(0, resvalue) self.trace.append((opnum, list(argboxes), resvalue, descr)) return op @@ -600,8 +601,8 @@ class Frame(object): return a def test_ResumeDataLoopMemo_number(): - b1, b2, b3, b4, b5 = [IntFrontendOp(0), IntFrontendOp(1), IntFrontendOp(2), - RefFrontendOp(3), RefFrontendOp(4)] + b1, b2, b3, b4, b5 = [IntFrontendOp(0, 0), IntFrontendOp(1, 0), IntFrontendOp(2, 0), + RefFrontendOp(3, dummyref), RefFrontendOp(4, dummyref)] c1, c2, c3, c4 = [ConstInt(1), ConstInt(2), ConstInt(3), ConstInt(4)] env = [b1, c1, b2, b1, c2] @@ -710,7 +711,7 @@ def test_ResumeDataLoopMemo_number(): ] + [0, 0] @given(strategies.lists( - strategies.builds(IntFrontendOp, strategies.just(0)) | intconsts, + strategies.builds(IntFrontendOp, strategies.just(0), strategies.just(1)) | intconsts, min_size=1)) def test_ResumeDataLoopMemo_random(lst): inpargs = [box for box in lst if not isinstance(box, Const)] @@ -741,7 +742,7 @@ def test_ResumeDataLoopMemo_random(lst): def test_ResumeDataLoopMemo_number_boxes(): memo = ResumeDataLoopMemo(FakeMetaInterpStaticData()) - b1, b2 = [IntFrontendOp(0), IntFrontendOp(0)] + b1, b2 = [IntFrontendOp(0, 0), IntFrontendOp(0, 0)] assert memo.num_cached_boxes() == 0 boxes = [] num = memo.assign_number_to_box(b1, boxes) @@ -770,7 +771,7 @@ def test_ResumeDataLoopMemo_number_boxes(): def test_ResumeDataLoopMemo_number_virtuals(): memo = ResumeDataLoopMemo(FakeMetaInterpStaticData()) - b1, b2 = [IntFrontendOp(0), IntFrontendOp(0)] + b1, b2 = [IntFrontendOp(0, 0), IntFrontendOp(0, 0)] assert memo.num_cached_virtuals() == 0 num = memo.assign_number_to_virtual(b1) assert num == -1 @@ -790,8 +791,8 @@ def test_ResumeDataLoopMemo_number_virtuals(): assert memo.num_cached_virtuals() == 0 def test_register_virtual_fields(): - b1, b2 = IntFrontendOp(0), IntFrontendOp(1) - vbox = RefFrontendOp(2) + b1, b2 = IntFrontendOp(0, 0), IntFrontendOp(1, 0) + vbox = RefFrontendOp(2, dummyref) modifier = ResumeDataVirtualAdder(FakeOptimizer(), None, None, None, None) modifier.liveboxes_from_env = {} modifier.liveboxes = {} @@ -976,8 +977,8 @@ def test_virtual_adder_make_constant(): def test_virtual_adder_make_virtual(): - b2s, b3s, b4s, b5s = [RefFrontendOp(0), IntFrontendOp(0), RefFrontendOp(0), - RefFrontendOp(0)] + b2s, b3s, b4s, b5s = [RefFrontendOp(0, dummyref), IntFrontendOp(0, 0), RefFrontendOp(0, dummyref), + RefFrontendOp(0, dummyref)] c1s = ConstInt(111) storage = Storage() memo = ResumeDataLoopMemo(FakeMetaInterpStaticData()) @@ -1007,7 +1008,7 @@ def test_virtual_adder_make_virtual(): storage.rd_consts = memo.consts[:] storage.rd_numb = Numbering([0]) # resume - b3t, b5t = [IntFrontendOp(0), RefFrontendOp(0)] + b3t, b5t = [IntFrontendOp(0, 0), RefFrontendOp(0, dummyref)] b5t.setref_base(demo55o) b3t.setint(33) newboxes = _resume_remap(liveboxes, [#b2s -- virtual @@ -1058,7 +1059,7 @@ class CompareableConsts(object): del Const.__eq__ def test_virtual_adder_make_varray(): - b2s, b4s = [RefFrontendOp(0), IntFrontendOp(0)] + b2s, b4s = [RefFrontendOp(0, dummyref), IntFrontendOp(0, 0)] b4s.setint(4) c1s = ConstInt(111) storage = Storage() @@ -1078,7 +1079,7 @@ def test_virtual_adder_make_varray(): storage.rd_consts = memo.consts[:] storage.rd_numb = Numbering([0]) # resume - b1t, b3t, b4t = [IntFrontendOp(0), IntFrontendOp(0), IntFrontendOp(0)] + b1t, b3t, b4t = [IntFrontendOp(0, 0), IntFrontendOp(0, 0), IntFrontendOp(0, 0)] b1t.setint(11) b3t.setint(33) b4t.setint(44) @@ -1111,7 +1112,7 @@ def test_virtual_adder_make_varray(): def test_virtual_adder_make_vstruct(): - b2s, b4s = [RefFrontendOp(0), RefFrontendOp(0)] + b2s, b4s = [RefFrontendOp(0, dummyref), RefFrontendOp(0, dummyref)] c1s = ConstInt(111) storage = Storage() memo = ResumeDataLoopMemo(FakeMetaInterpStaticData()) @@ -1130,7 +1131,7 @@ def test_virtual_adder_make_vstruct(): dump_storage(storage, liveboxes) storage.rd_consts = memo.consts[:] storage.rd_numb = Numbering([0]) - b4t = RefFrontendOp(0) + b4t = RefFrontendOp(0, dummyref) newboxes = _resume_remap(liveboxes, [#b2s -- virtual b4s], b4t) # @@ -1158,7 +1159,7 @@ def test_virtual_adder_make_vstruct(): def test_virtual_adder_pending_fields(): - b2s, b4s = [RefFrontendOp(0), RefFrontendOp(0)] + b2s, b4s = [RefFrontendOp(0, dummyref), RefFrontendOp(0, dummyref)] storage = Storage() memo = ResumeDataLoopMemo(FakeMetaInterpStaticData()) modifier = ResumeDataVirtualAdder(None, storage, storage, None, memo) @@ -1178,9 +1179,9 @@ def test_virtual_adder_pending_fields(): storage.rd_numb = Numbering([0]) # resume demo55.next = lltype.nullptr(LLtypeMixin.NODE) - b2t = RefFrontendOp(0) + b2t = RefFrontendOp(0, dummyref) b2t.setref_base(demo55o) - b4t = RefFrontendOp(0) + b4t = RefFrontendOp(0, dummyref) b4t.setref_base(demo66o) newboxes = _resume_remap(liveboxes, [b2s, b4s], b2t, b4t) @@ -1210,8 +1211,8 @@ def test_virtual_adder_pending_fields_and_arrayitems(): field_a = FieldDescr() storage = Storage() modifier = ResumeDataVirtualAdder(None, storage, storage, None, None) - a = IntFrontendOp(0) - b = IntFrontendOp(0) + a = IntFrontendOp(0, 0) + b = IntFrontendOp(0, 0) modifier.liveboxes_from_env = {a: rffi.cast(rffi.SHORT, 1042), b: rffi.cast(rffi.SHORT, 1061)} modifier._add_pending_fields( @@ -1227,10 +1228,10 @@ def test_virtual_adder_pending_fields_and_arrayitems(): array_a = FieldDescr() storage = Storage() modifier = ResumeDataVirtualAdder(None, storage, storage, None, None) - a42 = IntFrontendOp(0) - a61 = IntFrontendOp(0) - a62 = IntFrontendOp(0) - a63 = IntFrontendOp(0) + a42 = IntFrontendOp(0, 0) + a61 = IntFrontendOp(0, 0) + a62 = IntFrontendOp(0, 0) + a63 = IntFrontendOp(0, 0) modifier.liveboxes_from_env = {a42: rffi.cast(rffi.SHORT, 1042), a61: rffi.cast(rffi.SHORT, 1061), a62: rffi.cast(rffi.SHORT, 1062), diff --git a/rpython/jit/metainterp/test/test_warmstate.py b/rpython/jit/metainterp/test/test_warmstate.py index d8ac6fe458..c0431f5fee 100644 --- a/rpython/jit/metainterp/test/test_warmstate.py +++ b/rpython/jit/metainterp/test/test_warmstate.py @@ -33,18 +33,15 @@ def test_unwrap(): def test_wrap(): def InputArgInt(a): - i = IntFrontendOp(0) - i.setint(a) + i = IntFrontendOp(0, a) return i def InputArgFloat(a): - i = FloatFrontendOp(0) - i.setfloatstorage(a) + i = FloatFrontendOp(0, a) return i def InputArgRef(a): - i = RefFrontendOp(0) - i.setref_base(a) + i = RefFrontendOp(0, a) return i def boxfloat(x): @@ -55,24 +52,24 @@ def test_wrap(): box1.getvalue() == box2.getvalue()) p = lltype.malloc(lltype.GcStruct('S')) po = lltype.cast_opaque_ptr(llmemory.GCREF, p) - assert _is(wrap(None, 42), InputArgInt(42)) - assert _is(wrap(None, 42.5), boxfloat(42.5)) - assert _is(wrap(None, p), InputArgRef(po)) - assert _is(wrap(None, 42, in_const_box=True), ConstInt(42)) - assert _is(wrap(None, 42.5, in_const_box=True), constfloat(42.5)) - assert _is(wrap(None, p, in_const_box=True), ConstPtr(po)) + assert _is(wrap(None, 42, 0), InputArgInt(42)) + assert _is(wrap(None, 42.5, 0), boxfloat(42.5)) + assert _is(wrap(None, p, 0), InputArgRef(po)) + assert _is(wrap(None, 42, -1), ConstInt(42)) + assert _is(wrap(None, 42.5, -1), constfloat(42.5)) + assert _is(wrap(None, p, -1), ConstPtr(po)) if longlong.supports_longlong: import sys from rpython.rlib.rarithmetic import r_longlong, r_ulonglong value = r_longlong(-sys.maxint*17) - assert _is(wrap(None, value), InputArgFloat(value)) - assert _is(wrap(None, value, in_const_box=True), ConstFloat(value)) + assert _is(wrap(None, value, 0), InputArgFloat(value)) + assert _is(wrap(None, value, -1), ConstFloat(value)) value_unsigned = r_ulonglong(-sys.maxint*17) - assert _is(wrap(None, value_unsigned), InputArgFloat(value)) + assert _is(wrap(None, value_unsigned, 0), InputArgFloat(value)) sfval = r_singlefloat(42.5) ival = longlong.singlefloat2int(sfval) - assert _is(wrap(None, sfval), InputArgInt(ival)) - assert _is(wrap(None, sfval, in_const_box=True), ConstInt(ival)) + assert _is(wrap(None, sfval, 0), InputArgInt(ival)) + assert _is(wrap(None, sfval, -1), ConstInt(ival)) def test_specialize_value(): assert specialize_value(lltype.Char, 0x41) == '\x41' diff --git a/rpython/jit/metainterp/virtualizable.py b/rpython/jit/metainterp/virtualizable.py index 535c11e89a..a5eec52696 100644 --- a/rpython/jit/metainterp/virtualizable.py +++ b/rpython/jit/metainterp/virtualizable.py @@ -83,17 +83,19 @@ class VirtualizableInfo(object): self.array_field_by_descrs = dict( [(descr, i) for (i, descr) in enumerate(self.array_field_descrs)]) - def read_boxes(cpu, virtualizable): + def read_boxes(cpu, virtualizable, startindex): assert lltype.typeOf(virtualizable) == llmemory.GCREF virtualizable = cast_gcref_to_vtype(virtualizable) boxes = [] for _, fieldname in unroll_static_fields: x = getattr(virtualizable, fieldname) - boxes.append(wrap(cpu, x)) + boxes.append(wrap(cpu, x, startindex)) + startindex += 1 for _, fieldname in unroll_array_fields: lst = getattr(virtualizable, fieldname) for i in range(len(lst)): - boxes.append(wrap(cpu, lst[i])) + boxes.append(wrap(cpu, lst[i], startindex + i)) + startindex += len(lst) return boxes def write_boxes(virtualizable, boxes): diff --git a/rpython/jit/metainterp/warmstate.py b/rpython/jit/metainterp/warmstate.py index 828f684ffc..863a1cc808 100644 --- a/rpython/jit/metainterp/warmstate.py +++ b/rpython/jit/metainterp/warmstate.py @@ -70,16 +70,15 @@ def unwrap(TYPE, box): return lltype.cast_primitive(TYPE, box.getint()) @specialize.ll() -def wrap(cpu, value, in_const_box=False): +def wrap(cpu, value, inputarg_position_or_neg): + assert isinstance(inputarg_position_or_neg, int) if isinstance(lltype.typeOf(value), lltype.Ptr): if lltype.typeOf(value).TO._gckind == 'gc': value = lltype.cast_opaque_ptr(llmemory.GCREF, value) - if in_const_box: + if inputarg_position_or_neg < 0: return history.ConstPtr(value) else: - res = history.RefFrontendOp(0) - res.setref_base(value) - return res + return history.RefFrontendOp(inputarg_position_or_neg, value) else: value = ptr2int(value) # fall through to the end of the function @@ -89,12 +88,10 @@ def wrap(cpu, value, in_const_box=False): value = longlong.getfloatstorage(value) else: value = rffi.cast(lltype.SignedLongLong, value) - if in_const_box: + if inputarg_position_or_neg < 0: return history.ConstFloat(value) else: - res = history.FloatFrontendOp(0) - res.setfloatstorage(value) - return res + return history.FloatFrontendOp(inputarg_position_or_neg, value) elif isinstance(value, str) or isinstance(value, unicode): assert len(value) == 1 # must be a character value = ord(value) @@ -102,12 +99,10 @@ def wrap(cpu, value, in_const_box=False): value = longlong.singlefloat2int(value) else: value = intmask(value) - if in_const_box: + if inputarg_position_or_neg < 0: return history.ConstInt(value) else: - res = history.IntFrontendOp(0) - res.setint(value) - return res + return history.IntFrontendOp(inputarg_position_or_neg, value) @specialize.arg(0) def equal_whatever(TYPE, x, y): diff --git a/rpython/jit/tool/oparser.py b/rpython/jit/tool/oparser.py index 7ab0a287ac..f2bea00644 100644 --- a/rpython/jit/tool/oparser.py +++ b/rpython/jit/tool/oparser.py @@ -418,12 +418,12 @@ def pick_cls(inp): from rpython.jit.metainterp import history if inp.type == 'i': - return history.IntFrontendOp + return lambda pos: history.IntFrontendOp(pos, -1) elif inp.type == 'r': - return history.RefFrontendOp + return lambda pos: history.RefFrontendOp(pos, history.RefFrontendOp._resref) else: assert inp.type == 'f' - return history.FloatFrontendOp + return lambda pos: history.FloatFrontendOp(pos, -3.14) def convert_loop_to_trace(loop, metainterp_sd, skip_last=False): from rpython.jit.metainterp.opencoder import Trace @@ -438,12 +438,15 @@ def convert_loop_to_trace(loop, metainterp_sd, skip_last=False): class jitcode: index = 200 - inputargs = [pick_cls(inparg)(i) for i, inparg in - enumerate(loop.inputargs)] + inputargs = [] + for i, inparg in enumerate(loop.inputargs): + inputargs.append(pick_cls(inparg)(i * 2)) + inputargs.append(None) # emulate "holes" mapping = {} - for one, two in zip(loop.inputargs, inputargs): + for one, two in zip(loop.inputargs, inputargs[::2]): mapping[one] = two - trace = Trace(inputargs, metainterp_sd) + trace = Trace(len(inputargs), metainterp_sd) + trace.set_inputargs([x for x in inputargs if x is not None]) ops = loop.operations if skip_last: ops = ops[:-1] diff --git a/rpython/rlib/rbigint.py b/rpython/rlib/rbigint.py index 592bff3728..ce19b7e377 100644 --- a/rpython/rlib/rbigint.py +++ b/rpython/rlib/rbigint.py @@ -760,8 +760,6 @@ class rbigint(object): if selfsize <= i: result = _x_mul(self, other) - """elif 2 * selfsize <= othersize: - result = _k_lopsided_mul(self, other)""" else: result = _k_mul(self, other) else: @@ -809,19 +807,8 @@ class rbigint(object): div = _bigint_true_divide(self, other) return div - @jit.elidable def floordiv(self, other): - if other.numdigits() == 1: - otherint = other.digit(0) * other.sign - assert int_in_valid_range(otherint) - return self.int_floordiv(otherint) - - div, mod = _divrem(self, other) - if mod.sign * other.sign == -1: - if div.sign == 0: - return ONENEGATIVERBIGINT - div = div.int_sub(1) - + div, mod = self.divmod(other) return div def div(self, other): @@ -858,21 +845,8 @@ class rbigint(object): def int_div(self, iother): return self.int_floordiv(iother) - @jit.elidable def mod(self, other): - if other.sign == 0: - raise ZeroDivisionError("long division or modulo by zero") - if self.sign == 0: - return NULLRBIGINT - - if other.numdigits() == 1: - otherint = other.digit(0) * other.sign - assert int_in_valid_range(otherint) - return self.int_mod(otherint) - else: - div, mod = _divrem(self, other) - if mod.sign * other.sign == -1: - mod = mod.add(other) + div, mod = self.divmod(other) return mod @jit.elidable @@ -953,6 +927,15 @@ class rbigint(object): have different signs. We then subtract one from the 'div' part of the outcome to keep the invariant intact. """ + if other.sign == 0: + raise ZeroDivisionError("long division or modulo by zero") + if self.sign == 0: + return TWO_NULLRBIGINTS + if other.numdigits() == 1 and not (-1 == other.sign != self.sign): + otherint = other.digit(0) * other.sign + assert int_in_valid_range(otherint) + return self.int_divmod(otherint) + if self.numdigits() > 1.2 * other.numdigits() and \ other.numdigits() > HOLDER.DIV_LIMIT * 2: # * 2 to offset setup cost res = divmod_big(self, other) @@ -1507,6 +1490,8 @@ ONERBIGINT = rbigint([ONEDIGIT], 1, 1) ONENEGATIVERBIGINT = rbigint([ONEDIGIT], -1, 1) NULLRBIGINT = rbigint() +TWO_NULLRBIGINTS = (NULLRBIGINT, NULLRBIGINT) + _jmapping = [(5 * SHIFT) % 5, (4 * SHIFT) % 5, (3 * SHIFT) % 5, @@ -1865,19 +1850,31 @@ def _k_mul(a, b): # By picking X to be a power of 2, "*X" is just shifting, and it's # been reduced to 3 multiplies on numbers half the size. + # allocate result for both paths, asize + bsize is always enough + ret = rbigint([NULLDIGIT] * (asize + bsize), 1) + # Split a & b into hi & lo pieces. shift = bsize >> 1 - ah, al = _kmul_split(a, shift) - if ah.sign == 0: - # This may happen now that _k_lopsided_mul ain't catching it. - return _x_mul(a, b) - #assert ah.sign == 1 # the split isn't degenerate - + bh, bl = _kmul_split(b, shift) if a is b: - bh = ah - bl = al + ah = bh + al = bl + elif asize <= shift: + # a is more than 2x smaller than b. it's important that we still use + # .mul to get karatsuba for sub-parts. the computation is just: + # a*(bh*X+bl) = a*bh*X + a*bl + + # multiply lower bits, copy into result + t1 = a.mul(bl) + for i in range(t1.numdigits()): + ret._digits[i] = t1._digits[i] + t2 = a.mul(bh) + i = ret.numdigits() - shift # digits after shift + carry = _v_iadd(ret, shift, i, t2, t2.numdigits()) + ret._normalize() + return ret else: - bh, bl = _kmul_split(b, shift) + ah, al = _kmul_split(a, shift) # The plan: # 1. Allocate result space (asize + bsize digits: that's always @@ -1894,8 +1891,7 @@ def _k_mul(a, b): # 6. Compute (ah+al)*(bh+bl), and add it into the result starting # at shift. - # 1. Allocate result space. - ret = rbigint([NULLDIGIT] * (asize + bsize), 1) + # 1. Allocate result space. (done, see above) # 2. t1 <- ah*bh, and copy into high digits of result. t1 = ah.mul(bh) @@ -1936,6 +1932,52 @@ def _k_mul(a, b): ret._normalize() return ret +""" (*) Why adding t3 can't "run out of room" above. + +Let f(x) mean the floor of x and c(x) mean the ceiling of x. Some facts +to start with: + +1. For any integer i, i = c(i/2) + f(i/2). In particular, + bsize = c(bsize/2) + f(bsize/2). +2. shift = f(bsize/2) +3. asize <= bsize +4. Since we call k_lopsided_mul if asize*2 <= bsize, asize*2 > bsize in this + routine, so asize > bsize/2 >= f(bsize/2) in this routine. + +We allocated asize + bsize result digits, and add t3 into them at an offset +of shift. This leaves asize+bsize-shift allocated digit positions for t3 +to fit into, = (by #1 and #2) asize + f(bsize/2) + c(bsize/2) - f(bsize/2) = +asize + c(bsize/2) available digit positions. + +bh has c(bsize/2) digits, and bl at most f(size/2) digits. So bh+hl has +at most c(bsize/2) digits + 1 bit. + +If asize == bsize, ah has c(bsize/2) digits, else ah has at most f(bsize/2) +digits, and al has at most f(bsize/2) digits in any case. So ah+al has at +most (asize == bsize ? c(bsize/2) : f(bsize/2)) digits + 1 bit. + +The product (ah+al)*(bh+bl) therefore has at most + + c(bsize/2) + (asize == bsize ? c(bsize/2) : f(bsize/2)) digits + 2 bits + +and we have asize + c(bsize/2) available digit positions. We need to show +this is always enough. An instance of c(bsize/2) cancels out in both, so +the question reduces to whether asize digits is enough to hold +(asize == bsize ? c(bsize/2) : f(bsize/2)) digits + 2 bits. If asize < bsize, +then we're asking whether asize digits >= f(bsize/2) digits + 2 bits. By #4, +asize is at least f(bsize/2)+1 digits, so this in turn reduces to whether 1 +digit is enough to hold 2 bits. This is so since SHIFT=15 >= 2. If +asize == bsize, then we're asking whether bsize digits is enough to hold +c(bsize/2) digits + 2 bits, or equivalently (by #1) whether f(bsize/2) digits +is enough to hold 2 bits. This is so if bsize >= 2, which holds because +bsize >= KARATSUBA_CUTOFF >= 2. + +Note that since there's always enough room for (ah+al)*(bh+bl), and that's +clearly >= each of ah*bh and al*bl, there's always enough room to subtract +ah*bh and al*bl too. +""" + + def _inplace_divrem1(pout, pin, n): """ Divide bigint pin by non-zero digit n, storing quotient @@ -2217,11 +2259,13 @@ def _divrem(a, b): -class DivLimitHolder: +class LimitHolder: pass -HOLDER = DivLimitHolder() +HOLDER = LimitHolder() HOLDER.DIV_LIMIT = 21 +HOLDER.STR2INT_LIMIT = 2048 +HOLDER.MINSIZE_STR2INT = 4000 def _extract_digits(a, startindex, numdigits): @@ -2402,8 +2446,8 @@ def divmod_big(a, b): elif a.sign < 0: q, r = divmod_big(a.invert(), b) return q.invert(), b.add(r.invert()) - elif a.eq(NULLRBIGINT): - return NULLRBIGINT, NULLRBIGINT + elif a.sign == 0: + return TWO_NULLRBIGINTS else: return _divmod_fast_pos(a, b) @@ -3146,11 +3190,12 @@ BASE_MAX = [0, 1] + [digits_max_for_base(_base) for _base in range(2, 37)] DEC_MAX = digits_max_for_base(10) assert DEC_MAX == BASE_MAX[10] -def _decimalstr_to_bigint(s): +def _decimalstr_to_bigint(s, start=0, lim=-1): # a string that has been already parsed to be decimal and valid, # is turned into a bigint - p = 0 - lim = len(s) + p = start + if lim < 0: + lim = len(s) sign = False if s[p] == '-': sign = True @@ -3167,7 +3212,11 @@ def _decimalstr_to_bigint(s): p += 1 tens *= 10 if tens == DEC_MAX or p == lim: - a = _muladd1(a, tens, dig) + if a is not None: + a = _muladd1(a, tens, dig) + else: + assert dig & MASK == dig + a = rbigint([_store_digit(dig)], int(dig != 0)) tens = 1 dig = 0 if sign and a.sign == 1: @@ -3179,23 +3228,88 @@ def parse_digit_string(parser): base = parser.base if (base & (base - 1)) == 0 and base >= 2: return parse_string_from_binary_base(parser) + if base == 10 and (parser.end - parser.start) > HOLDER.MINSIZE_STR2INT: + # check for errors and potentially remove underscores + s, start, end = parser._all_digits10() + a = _str_to_int_big_base10(s, start, end, HOLDER.STR2INT_LIMIT) + a.sign *= parser.sign + return a a = NULLRBIGINT digitmax = BASE_MAX[base] - tens, dig = 1, 0 + baseexp, dig = 1, 0 while True: digit = parser.next_digit() - if tens == digitmax or digit < 0: - a = _muladd1(a, tens, dig) + if baseexp == digitmax or digit < 0: + if a is not None: + a = _muladd1(a, baseexp, dig) + else: + assert dig & MASK == dig + a = rbigint([_store_digit(dig)], int(dig != 0)) if digit < 0: break dig = digit - tens = base + baseexp = base else: dig = dig * base + digit - tens *= base + baseexp *= base a.sign *= parser.sign return a + +FIVERBIGINT = rbigint.fromint(5) + +def _str_to_int_big_w5pow(w, mem, limit): + """Return 5**w and store the result. + Also possibly save some intermediate results. In context, these + are likely to be reused across various levels of the conversion + to 'int'. + """ + result = mem.get(w, None) + if result is None: + if w <= limit: + result = FIVERBIGINT.int_pow(w) + elif w - 1 in mem: + result = mem[w - 1].int_mul(5) + else: + w2 = w >> 1 + # If w happens to be odd, w-w2 is one larger then w2 + # now. Recurse on the smaller first (w2), so that it's + # in the cache and the larger (w-w2) can be handled by + # the cheaper `w-1 in mem` branch instead. + result = _str_to_int_big_w5pow(w2, mem, limit).mul( + _str_to_int_big_w5pow(w - w2, mem, limit)) + mem[w] = result + return result + +def _str_to_int_big_inner10(s, a, b, mem, limit): + diff = b - a + if diff <= limit: + return _decimalstr_to_bigint(s, a, b) + # choose the midpoint rounding up, as that yields slightly fewer entries in + # mem, see comment in _str_to_int_big_w5pow too + mid = a + (diff + 1) // 2 + right = _str_to_int_big_inner10(s, mid, b, mem, limit) + left = _str_to_int_big_inner10(s, a, mid, mem, limit) + left = left.mul(_str_to_int_big_w5pow(b - mid, mem, limit)).lshift(b - mid) + return right.add(left) + +def _str_to_int_big_base10(s, start, end, limit=20): + """Asymptotically fast conversion of a 'str' to an 'int'.""" + + # Function due to Bjorn Martinsson. See GH issue #90716 for details. + # https://github.com/python/cpython/issues/90716 + # + # The implementation in longobject.c of base conversion algorithms + # between power-of-2 and non-power-of-2 bases are quadratic time. + # This function implements a divide-and-conquer algorithm making use + # of Python's built in big int multiplication. Since Python uses the + # Karatsuba algorithm for multiplication, the time complexity + # of this function is O(len(s)**1.58). + + mem = {} + result = _str_to_int_big_inner10(s, start, end, mem, limit) + return result + def parse_string_from_binary_base(parser): # The point to this routine is that it takes time linear in the number of # string characters. diff --git a/rpython/rlib/rstring.py b/rpython/rlib/rstring.py index 238cd2f05c..1f572f9155 100644 --- a/rpython/rlib/rstring.py +++ b/rpython/rlib/rstring.py @@ -737,6 +737,28 @@ class NumberStringParser: else: return -1 + def _all_digits10(self): + for index in range(self.start, self.end): + c = self.s[index] + if not ('0' <= c <= '9'): + if c == "_" and self.allow_underscores: + break + else: + self.error() + else: + # don't need a copy, no underscores + return self.s, self.start, self.end + assert self.allow_underscores + + builder = StringBuilder(self.end - self.start) + i = 0 + while True: + d = self.next_digit() + if d < 0: + return builder.build(), 0, i + builder.append(chr(d + ord('0'))) + i += 1 + def prev_digit(self): # After exhausting all n digits in next_digit(), you can walk them # again in reverse order by calling prev_digit() exactly n times diff --git a/rpython/rlib/test/test_rbigint.py b/rpython/rlib/test/test_rbigint.py index 292641cf51..828cbefc18 100644 --- a/rpython/rlib/test/test_rbigint.py +++ b/rpython/rlib/test/test_rbigint.py @@ -16,10 +16,13 @@ from rpython.rlib import rbigint as lobj from rpython.rlib.rarithmetic import r_uint, r_longlong, r_ulonglong, intmask, LONG_BIT from rpython.rlib.rbigint import (rbigint, SHIFT, MASK, KARATSUBA_CUTOFF, _store_digit, _mask_digit, InvalidEndiannessError, InvalidSignednessError, - gcd_lehmer, lehmer_xgcd, gcd_binary, divmod_big, ONERBIGINT, MaxIntError) + gcd_lehmer, lehmer_xgcd, gcd_binary, divmod_big, ONERBIGINT, MaxIntError, + _str_to_int_big_w5pow, _str_to_int_big_base10, _str_to_int_big_inner10) +from rpython.rlib.rbigint import HOLDER from rpython.rlib.rfloat import NAN from rpython.rtyper.test.test_llinterp import interpret from rpython.translator.c.test.test_standalone import StandaloneTests +from rpython.rtyper.tool.rfficache import platform from hypothesis import given, strategies, example, settings @@ -34,9 +37,74 @@ def makelong(data): return -r return r +def makelong_long_sequences(data, ndigits): + """ From CPython: + Get quasi-random long consisting of ndigits digits (in base BASE). + quasi == the most-significant digit will not be 0, and the number + is constructed to contain long strings of 0 and 1 bits. These are + more likely than random bits to provoke digit-boundary errors. + The sign of the number is also random. + """ + nbits_hi = ndigits * SHIFT + nbits_lo = nbits_hi - SHIFT + 1 + answer = 0L + nbits = 0 + r = data.draw(strategies.integers(0, SHIFT * 2 - 1)) | 1 # force 1 bits to start + while nbits < nbits_lo: + bits = (r >> 1) + 1 + bits = min(bits, nbits_hi - nbits) + assert 1 <= bits <= SHIFT + nbits = nbits + bits + answer = answer << bits + if r & 1: + answer = answer | ((1 << bits) - 1) + r = data.draw(strategies.integers(0, SHIFT * 2 - 1)) + assert nbits_lo <= nbits <= nbits_hi + if data.draw(strategies.booleans()): + answer = -answer + return answer + + +MAXDIGITS = 15 +digitsizes = strategies.sampled_from( + range(1, MAXDIGITS+1) + + range(KARATSUBA_CUTOFF, KARATSUBA_CUTOFF + 14) + + [KARATSUBA_CUTOFF * 3, KARATSUBA_CUTOFF * 1000] +) + +def make_biglongs_for_division(data): + size1 = data.draw(digitsizes) + val1 = makelong_long_sequences(data, size1) + size2 = data.draw(digitsizes) + val2 = makelong_long_sequences(data, size2) + return val1, val2 + +tuples_biglongs_for_division = strategies.builds( + make_biglongs_for_division, + strategies.data()) + biglongs = strategies.builds(makelong, strategies.data()) +def makerarithint(data): + classlist = platform.numbertype_to_rclass.values() + cls = data.draw(strategies.sampled_from(classlist)) + if cls is int: + minimum = -sys.maxint-1 + maximum = sys.maxint + else: + BITS = cls.BITS + if cls.SIGNED: + minimum = -2 ** (BITS - 1) + maximum = 2 ** (BITS - 1) - 1 + else: + minimum = 0 + maximum = 2 ** BITS - 1 + value = data.draw(strategies.integers(minimum, maximum)) + return cls(value) +rarith_ints = strategies.builds(makerarithint, strategies.data()) + + def gen_signs(l): for s in l: if s == 0: @@ -71,7 +139,6 @@ class TestRLong(object): for op in "add sub mul".split(): r1 = getattr(rl_op1, op)(rl_op2) r2 = getattr(operator, op)(op1, op2) - print op, op1, op2 assert r1.tolong() == r2 def test_frombool(self): @@ -242,7 +309,6 @@ class TestRLong(object): for op3 in gen_signs([1, 2, 5, 1000, 12312312312312235659969696l]): if not op3: continue - print op1, op2, op3 r3 = rl_op1.pow(rl_op2, rbigint.fromlong(op3)) r4 = pow(op1, op2, op3) assert r3.tolong() == r4 @@ -260,7 +326,6 @@ class TestRLong(object): continue r3 = rl_op1.int_pow(op2, rbigint.fromlong(op3)) r4 = pow(op1, op2, op3) - print op1, op2, op3 assert r3.tolong() == r4 def test_int_pow_big(self): @@ -301,12 +366,41 @@ class TestRLong(object): fa = rbigint.fromlong(a) fb = rbigint.fromlong(b) div, mod = divmod_big(fa, fb) - return div.mul(fb).add(mod).eq(fa) + assert div.mul(fb).add(mod).eq(fa) check(2, 3) check(3, 2) check((2 << 1000) - 1, (2 << (65 * 3 + 2)) - 1) check((2 + 5 * 2 ** SHIFT) << (100 * SHIFT), 5 << (100 * SHIFT)) + def test_divmod_big_is_used(self, monkeypatch): + # make sure that the big divmod path is actually hit + monkeypatch.setattr(rbigint, "_divmod_small", None) + fa = rbigint.fromlong(3 ** (SHIFT * HOLDER.DIV_LIMIT * 2)) + fb = rbigint.fromlong(5 ** (SHIFT * HOLDER.DIV_LIMIT)) + div, mod = fa.divmod(fb) + assert div.mul(fb).add(mod).eq(fa) + + def test_karatsuba_not_used_bug(self): + a = rbigint.fromlong(2 ** 2000 + 1) + b = rbigint.fromlong(2 ** 5000 + 7) + assert a.mul(b).tolong() == a.tolong() * b.tolong() + + def test_lopsided_bug(self): + la = 0x1b8e499a888235ea66f6497e3640bc118592a4ecb800e53e0121af9b2dede38c9323dc160ad564c10ff34095fcc89ecefde3116e7ad99bd5a5b785d811a1e930ae0b0a919623569c99d6c1e779aa5345609a14fc64a83970991d7df672d3bf2fe800766932291b2593382495d1b2a9de1a212d0e517d35764a8a30d060d4218f034807c59728a009683887c3f239f6b958216fd6e36db778bf350941be6ee987f87ea6460ba77f1db154fff175d20117107b5ebd48305b4190d082433419f3daace778d9ce9975ca33293c8b7ad7dd253321e208c22e1bf3833535dd4c76395117e6f32444254fdb9e77cd0b5f8d98c31dafaab720067ef925 + a = rbigint.fromlong(la) + lb = 0x30fcf4a0f2ae98bd28d249c3eeabf902b492ec4f8001978aacada9f76e18b0f9e9234e6013427a3ac705c82716b9fde1c35ac9a7f6d8317bd14643473bca821da73012c9ee77b66bbc287529bbd97797c82e5e327a0e9f0110346e27e894e21c471d44493cbadaed7780410a585a118ad91e88fd02a5b4608483e500ac23c9e1ccf1d4ed7e811c8280647f953cd8d3109cad389a77df7f0f8cd01074e0c52d6380e12798f84637513b41c7029891c90c8f1436a5d5ab4ce656c80405b1f53fbda529ba66c49f0a4b059ea4862fb8a5977758ae4875a74e22b05e98a5dd43f41e6361b0407925e34d8b7fa5698d6d815adf712f7e71d2a8d75ee7749e22e558157d73c1ed1089063dd7a29c915990836b5a951aa77917847bd9807d6c89b4262871127d17ca5a84e2b23bc5eb66137cce412dcbd88622b55b05b710258affcc845a8e1b99d33c187a237eacd21e9628063948f711b2e5617b647f3fe7c28bac1989612a66d6be34d59ffee63e15e0cdf10d43c6f6301c47e7c7f3ca71dc4e312873633957a6054f25d4db49dcc401aba272ff7c23e077c143510a040f5eb80fe096384c3a4ab0604d951710956f84cdefb631a2ed806ad8f5fef5ef1223dbea4b8a7b49309e9672e77c763dbb698432c77cfff875ab5c97d24f4441b5a3704deda8835135e3e6314be281a97963b49eccf06571b634efa16605a0ec2eda8148a6537e24da5fb128cfbde3ea6c28d850eac3815dd2a0a72844a14590124a6e9062befbdf7fb14c7783ee5096481a5ef0ef9dabf4bc831213afc469a5256818e1dba97cae6f63d6cf2b9584361f36b1b8fa60286fe6bc010129b7f99ee250907ed0a134900513bd3c38555de3b085e7e86 + b = rbigint.fromlong(lb) + x = a.mul(b) + assert x.tolong() == la * lb + + def test_mul_bug(self): + x = -0x1fffffffffffe00000000000007fffffffffffffffffe0000000000000fffffffffffffc0000000000000003fffffffffff1fffffffffffffffffffff8000000000000000ff80000000000000fffffff000000000000000000000fff800000000000003fffffffffffffffffffffffffffffffe000000000000000000fffffffffffffffffffffffffffffffffffffffffffffc3ffffffffffffff80000000003fffffffffffffe000000000000003fffffffffffffffffffffffffffffffffffffc000000000000000007ffc00000007fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffe00000000000000000000000fffffffffe0000000000000000000000000000000007ffffffffff8000000000000000000000000007ffffffffe00000000000001ffffffff00000000007fffffffffc0000000000000000000007fffffffffffffe0000000000ffffffffffffffffffffffffffffff0000000000000000000000000000004000000000000000000007fffffffffffffffc00fffffffff80000001fffffffffffe0000000007ffffffffffffffffc000000000000000000000003f00fffffff000000001fffffffffffffffffffffffffffffffffe000000000000003ffffffffffffffc000000000000000000000000000000000000000000000000fffffffffffff8000001ffffffffffffffffffffffffe00000000000003ffffffffffffffffffffffff00000000fffffffffff000000000L + y = -0x3fffffffffffc0000000000000000007ffffffffffff800000000000000000001ffffffffffffffc0000000000000000000000ffffffffffffffffffffffffffffc000000000000000000001ffffffffffffffffffffffffffffffffffffffffffffffffffffe00000000000000000000000000007fffffffffff000000000000000000000000fffffffffffffffffffffffffffffffffffffffff0000000003e007fffffffffffffffffff80000000000000000003fffffffffc000000000000007fffc0000000007ffffffffffffff0000000000010000000000000001fffffffffffffffffffffffffffffffffe000000000000fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffe0000000000001ffff007fff0000000000000000000000001f000000000001fffffffffffffffffc00000000001fffffffffffffffffffffffffffffffffffffff0000000000000000001ffffffffffff00000000000000000000000000000000000003fffffffff00003fffffffe00000000000000000000ffffffffffffffffffffff800001ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff8000000000000001ffe000001ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff00000000000fffffffffff800000000000000000fffffffffffffffffffe00000000003ffffffffffffffffffffffffffffffffffffffffc000000000000000006000001fffffffe0000000000ffffffffffffffffffffffffff8003fffffffffffffffffffffffffffe0000007fffc0000000000000000000000001ffffffffffffffffffffffffffffffffffff0000000000001fffe00000000000000000000000000000000000000000000000000000003fffffff0000000000007ffffff8000000000000001fffffffffffffffff80001fffffffffffffffffffffffffff800000000000000000001ffffe00000000000000000003fffffffffffffffffffffffff000000000000000fffffffffffffffffffffffffffffc0000000000000003fffffe0000000000000000000000001ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffe0000000003fff00001ffffffffffffffffffff0000000000001fffffffffffffc0000000000007ffffffffffffffffffffc000000000007fffffffffffffffffff80000000000003ffffffffffffc0000000000000000000000000000000000000000000000ffffe000000000000000000000000000001ffffffffffffffffffffffffffffffffffffe007ffffffffffff000000000000003fffffffffffffffffff800000000000000ff0000000000000000000000000000001ffffffffffffe00000000000007ffffffffffffff8000000000000001ffffffffffffc0000000000007ff000003fffffffffffffffffffffffffffffffffffffe00000007ffffffffffffffffffffe00000007ffffff0000000000000000ffffc00000000000000000ffffffffff8000000000000000fffffe0000000000000000000007fffffffffc000000fe0000000000000000000001ffffff800000000000000001ffffffffff00000000000000000000000000000000000000000000000ffffffffffffffffff000000000000000000000007fffffffffffffc0000fffffffffffffffffffffffffe000003ffffffffffff800000000000001fffffffffffffc000000000000000000000000001fff8000000000000000000000000000fffffffffffffffffffffffff0000000000000000003fe00000003fffffffffffffffff00000000000000ffffffffffe07fffffffffffffffc000000000000000000000003fffffff800000000000000000000003fffffffffffc0000000000000000000000003fffffffffffffffffc0000000000ffffffffffffffffffffffffffffffffffffffffffffffffffe000ffffffffffffffffc000000000000000000000000000000000000000000ffffffffffffffff8000000000000000000000000000000000000000000000000000000000fffffffffffffffc00000000000000003fffffffffffffffffffffffffffffffffffffe00003fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffe0000000000003fffffff00000000007ffffffffffc0007ffffffffe00000ffffc000700000000000000fffffffffff80000000000000000000000L + xl = rbigint.fromlong(x) + yl = rbigint.fromlong(y) + assert xl.mul(yl).tolong() == x * y + assert yl.mul(xl).tolong() == x * y + def bigint(lst, sign): for digit in lst: @@ -392,6 +486,8 @@ class Test_rbigint(object): assert rbigint.fromstr('123L', 22).tolong() == 10648 + 968 + 66 + 21 assert rbigint.fromstr('123L', 21).tolong() == 441 + 42 + 3 assert rbigint.fromstr('1891234174197319').tolong() == 1891234174197319 + assert rbigint.fromstr('1891_234_17_4_19731_9', allow_underscores=True).tolong() == 1891234174197319 + assert rbigint.fromstr('1_1' * 6000, allow_underscores=True).tolong() == int('11' * 6000) def test__from_numberstring_parser_rewind_bug(self): from rpython.rlib.rstring import NumberStringParser @@ -412,9 +508,13 @@ class Test_rbigint(object): res = p.next_digit() assert res == -1 - @given(longs) - def test_fromstr_hypothesis(self, l): - assert rbigint.fromstr(str(l)).tolong() == l + def test_fromstr_huge(self): + assert _str_to_int_big_base10("1" * 1000, 0, 1000).tolong() == int("1" * 1000) + mem = {} + + result = _str_to_int_big_inner10('123952' * 1000, 0, 6000, mem, 20) + assert len(mem) == 13 + assert result def test_from_numberstring_parser(self): from rpython.rlib.rstring import NumberStringParser @@ -869,7 +969,7 @@ class Test_rbigint(object): finally: rbigint.pow = oldpow - def test_overzelous_assertion(self): + def test_overzealous_assertion(self): a = rbigint.fromlong(-1<<10000) b = rbigint.fromlong(-1<<3000) assert a.mul(b).tolong() == (-1<<10000)*(-1<<3000) @@ -948,14 +1048,6 @@ class Test_rbigint(object): with pytest.raises(OverflowError): i.tobytes(2, 'little', signed=True) - @given(strategies.binary(), strategies.booleans(), strategies.booleans()) - def test_frombytes_tobytes_hypothesis(self, s, big, signed): - # check the roundtrip from binary strings to bigints and back - byteorder = 'big' if big else 'little' - bigint = rbigint.frombytes(s, byteorder=byteorder, signed=signed) - t = bigint.tobytes(len(s), byteorder=byteorder, signed=signed) - assert s == t - def test_gcd(self): assert gcd_binary(2*3*7**2, 2**2*7) == 2*7 pytest.raises(ValueError, gcd_binary, 2*3*7**2, -2**2*7) @@ -1079,7 +1171,6 @@ class TestInternalFunctions(object): b = -0x131313131313131313d0 ra = rbigint.fromlong(a) rb = rbigint.fromlong(b) - from rpython.rlib.rbigint import HOLDER oldval = HOLDER.DIV_LIMIT try: HOLDER.DIV_LIMIT = 2 # set limit low to test divmod_big more @@ -1104,7 +1195,6 @@ class TestInternalFunctions(object): div, rem = f1.int_divmod(sy) div1, rem1 = f1.divmod(rbigint.fromlong(sy)) _div, _rem = divmod(sx, sy) - print sx, sy, " | ", div.tolong(), rem.tolong() assert div1.tolong() == _div assert rem1.tolong() == _rem assert div.tolong() == _div @@ -1197,6 +1287,8 @@ class TestInternalFunctions(object): self.sign = sign self.i = 0 self._digits = digits + self.start = 0 + self.end = len(digits) def next_digit(self): i = self.i if i == len(self._digits): @@ -1264,11 +1356,11 @@ class TestTranslatable(object): assert res def test_args_from_rarith_int(self): - from rpython.rtyper.tool.rfficache import platform from rpython.rlib.rarithmetic import r_int + from rpython.rlib.unroll import unrolling_iterable from rpython.rtyper.lltypesystem.rffi import r_int_real classlist = platform.numbertype_to_rclass.values() - fnlist = [] + cases = [] # tuples of (values, strvalues, typename) for r in classlist: if r in (r_int, r_int_real): # and also r_longlong on 64-bit continue @@ -1282,16 +1374,19 @@ class TestTranslatable(object): if not signed: values = [x & mask for x in values] values = [r(x) for x in values] - - def fn(i): - n = rbigint.fromrarith_int(values[i]) - return n.str() - - for i in range(len(values)): - res = fn(i) - assert res == str(long(values[i])) - res = interpret(fn, [i]) - assert ''.join(res.chars) == str(long(values[i])) + results = [str(long(x)) for x in values] + cases.append((values, results, str(r))) + cases = unrolling_iterable(cases) + def fn(): + for values, results, typname in cases: + for i in range(len(values)): + n = rbigint.fromrarith_int(values[i]) + n = rbigint.fromrarith_int(values[i]) + if n.str() != results[i]: + return typname + str(i) + return None + res = interpret(fn, []) + assert not res def test_truediv_overflow(self): overflowing = 2**1024 - 2**(1024-53-1) @@ -1361,7 +1456,6 @@ class TestHypothesis(object): with pytest.raises(type(e)): f1.divmod(f2) else: - print x, y a, b = f1.divmod(f2) assert (a.tolong(), b.tolong()) == res @@ -1392,7 +1486,6 @@ class TestHypothesis(object): @example(17, 257) @example(510439143470502793407446782273075179618477362188870662225920L, 108089693021945158982483698831267549521L) def test_divmod_big(self, x, y): - from rpython.rlib.rbigint import HOLDER oldval = HOLDER.DIV_LIMIT try: HOLDER.DIV_LIMIT = 2 # set limit low to test divmod_big more @@ -1411,12 +1504,31 @@ class TestHypothesis(object): with pytest.raises(type(e)): divmod_big(f1, f2) else: - print x, y a, b = divmod_big(f1, f2) assert (a.tolong(), b.tolong()) == res finally: HOLDER.DIV_LIMIT = oldval + @given(tuples_biglongs_for_division) + def test_divmod_consistency(self, tup): + lx, ly = tup + ly = ly or 1 + x = rbigint.fromlong(lx) + y = rbigint.fromlong(ly) + q, r = x.divmod(y) + q2, r2 = x.floordiv(y), x.mod(y) + pab, pba = x.mul(y), y.mul(x) + assert pab.eq(pba) + assert q.eq(q2) + assert r.eq(r2) + assert x.eq(q.mul(y).add(r)) + if y.int_gt(0): + assert r.lt(y) + assert r.int_ge(0) + else: + assert y.lt(r) + assert y.int_le(0) + @given(biglongs, ints) def test_int_divmod(self, x, iy): f1 = rbigint.fromlong(x) @@ -1426,7 +1538,6 @@ class TestHypothesis(object): with pytest.raises(type(e)): f1.int_divmod(iy) else: - print x, iy a, b = f1.int_divmod(iy) assert (a.tolong(), b.tolong()) == res @@ -1434,7 +1545,7 @@ class TestHypothesis(object): def test_hash(self, x): # hash of large integers: should be equal to the hash of the # integer reduced modulo 2**64-1, to make decimal.py happy - x = randint(0, sys.maxint**5) + x = abs(x) y = x % (2**64-1) assert rbigint.fromlong(x).hash() == rbigint.fromlong(y).hash() assert rbigint.fromlong(-x).hash() == rbigint.fromlong(-y).hash() @@ -1521,11 +1632,9 @@ class TestHypothesis(object): 99887766554433221113) @settings(max_examples=10) def test_gcd(self, x, y, z): - print(x, y, z) x, y, z = abs(x), abs(y), abs(z) def test(a, b, res): - print(rbigint.fromlong(a)) g = rbigint.fromlong(a).gcd(rbigint.fromlong(b)).tolong() assert g == res @@ -1610,10 +1719,94 @@ class TestHypothesis(object): r1 = rx.abs_rshift_and_mask(r_ulonglong(shift), mask) assert r1 == (abs(x) >> shift) & mask + @given(biglongs, strategies.integers(min_value=1, max_value=10000)) + def test_str_to_int_big_base10(self, l, limit): + l = abs(l) + s = str(l) + assert _str_to_int_big_base10(str(l), 0, len(s), limit).tolong() == l + + @given(biglongs) + def test_fromstr(self, l): + assert rbigint.fromstr(str(l)).tolong() == l + + @given(biglongs) + def test_fromstr_str_consistency(self, l): + assert rbigint.fromstr(rbigint.fromlong(l).str()).tolong() == l + + @given(biglongs) + def test_fromstr_small_limit(self, l): + # set limits to 2 to stress the recursive algorithm some more + oldval = HOLDER.STR2INT_LIMIT + oldval2 = HOLDER.MINSIZE_STR2INT + try: + HOLDER.STR2INT_LIMIT = 2 + HOLDER.MINSIZE_STR2INT = 1 + assert rbigint.fromstr(str(l)).tolong() == l + assert rbigint.fromstr(str(l) + "_1", allow_underscores=True).tolong() == int(str(l) + '1') + finally: + HOLDER.STR2INT_LIMIT = oldval + HOLDER.MINSIZE_STR2INT = 1 + + @given(strategies.integers(min_value=1, max_value=10000), strategies.integers(min_value=1, max_value=10000)) + @settings(max_examples=10) + def test_str_to_int_big_w5pow(self, exp, limit): + mem = {} + assert (_str_to_int_big_w5pow(exp, mem, limit).tolong() == 5 ** exp == + rbigint.fromint(5).int_pow(exp).tolong()) + @given(biglongs) def test_bit_count(self, val): assert rbigint.fromlong(val).bit_count() == bin(abs(val)).count("1") + @given(strategies.binary(), strategies.booleans(), strategies.booleans()) + def test_frombytes_tobytes_hypothesis(self, s, big, signed): + # check the roundtrip from binary strings to bigints and back + byteorder = 'big' if big else 'little' + bigint = rbigint.frombytes(s, byteorder=byteorder, signed=signed) + t = bigint.tobytes(len(s), byteorder=byteorder, signed=signed) + assert s == t + + @given(biglongs, biglongs, biglongs) + def test_distributive(self, a, b, c): + la = rbigint.fromlong(a) + lb = rbigint.fromlong(b) + lc = rbigint.fromlong(c) + # a * (b + c) == a * b + a * c + assert la.mul(lb.add(lc)).eq(la.mul(lb).add(la.mul(lc))) + + @given(biglongs, biglongs, biglongs) + def test_associative(self, a, b, c): + la = rbigint.fromlong(a) + lb = rbigint.fromlong(b) + lc = rbigint.fromlong(c) + # a * (b * c) == (a * b) * c + assert la.mul(lb.mul(lc)).eq(la.mul(lb).mul(lc)) + # a + (b + c) == (a + b) + c + assert la.add(lb.add(lc)).eq(la.add(lb).add(lc)) + + @given(biglongs, biglongs) + def test_commutative(self, a, b): + la = rbigint.fromlong(a) + lb = rbigint.fromlong(b) + # a * b == b * a + assert la.mul(lb).eq(lb.mul(la)) + # a + b == b + a + assert la.add(lb).eq(lb.add(la)) + + @given(longs, strategies.integers(0, 100), strategies.integers(0, 100)) + @settings(max_examples=10) + def test_pow_mul(self, a, b, c): + la = rbigint.fromlong(a) + lb = rbigint.fromlong(b) + lc = rbigint.fromlong(c) + # a ** (b + c) == a ** b * a ** c + assert la.pow(lb.add(lc)).eq(la.pow(lb).mul(la.pow(lc))) + + @given(rarith_ints) + def test_args_from_rarith_int(self, i): + li = rbigint.fromrarith_int(i) + assert li.tolong() == int(i) + @pytest.mark.parametrize(['methname'], [(methodname, ) for methodname in dir(TestHypothesis) if methodname.startswith("test_")]) def test_hypothesis_small_shift(methname): @@ -1625,12 +1818,13 @@ def test_hypothesis_small_shift(methname): env = os.environ.copy() parent = os.path.dirname env['PYTHONPATH'] = parent(parent(parent(parent(__file__)))) - p = subprocess.Popen([sys.executable, os.path.abspath(__file__), methname], + p = subprocess.Popen(" ".join([sys.executable, os.path.abspath(__file__), methname]), stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, env=env) stdout, stderr = p.communicate() - print stdout - print stderr + if p.returncode: + print stdout + print stderr assert not p.returncode def _get_hacked_rbigint(shift): @@ -1648,7 +1842,7 @@ def _get_hacked_rbigint(shift): def run(): shift = 9 - print "USING SHIFT", shift + print "USING SHIFT", shift, sys.argv[1] _hacked_rbigint = _get_hacked_rbigint(shift) globals().update(_hacked_rbigint.__dict__) # emulate import * assert SHIFT == shift diff --git a/rpython/rlib/unicodedata/test/test_unicodedata.py b/rpython/rlib/unicodedata/test/test_unicodedata.py index 7a8ad886c1..b2c0f0a430 100644 --- a/rpython/rlib/unicodedata/test/test_unicodedata.py +++ b/rpython/rlib/unicodedata/test/test_unicodedata.py @@ -251,6 +251,6 @@ def test_named_sequence(): def test_cjk_13_missing_range_bug(): assert unicodedb_13_0_0.name(0x30000) == 'CJK UNIFIED IDEOGRAPH-30000' - assert unicodedb_13_0_0.name(0x3134a) == 'CJK UNIFIED IDEOGRAPH-3134a' - assert unicodedb_13_0_0.name(0x3104f) == 'CJK UNIFIED IDEOGRAPH-3134a' + assert unicodedb_13_0_0.name(0x3134a) == 'CJK UNIFIED IDEOGRAPH-3134A' + assert unicodedb_13_0_0.name(0x3104f) == 'CJK UNIFIED IDEOGRAPH-3104F' |