aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVictor Stinner <vstinner@python.org>2020-12-26 02:58:33 +0100
committerGitHub <noreply@github.com>2020-12-26 02:58:33 +0100
commitea251806b8dffff11b30d2182af1e589caf88acf (patch)
tree10d413170c8e71dd19079b3d22eea4ebd5ddf7a5 /Objects
parentbpo-42694: Prevent creating _curses_panel.panel (GH-23948) (diff)
downloadcpython-ea251806b8dffff11b30d2182af1e589caf88acf.tar.gz
cpython-ea251806b8dffff11b30d2182af1e589caf88acf.tar.bz2
cpython-ea251806b8dffff11b30d2182af1e589caf88acf.zip
bpo-40521: Per-interpreter interned strings (GH-20085)
Make the Unicode dictionary of interned strings compatible with subinterpreters. Remove the INTERN_NAME_STRINGS macro in typeobject.c: names are always now interned (even if EXPERIMENTAL_ISOLATED_SUBINTERPRETERS macro is defined). _PyUnicode_ClearInterned() now uses PyDict_Next() to no longer allocate memory, to ensure that the interned dictionary is cleared.
Diffstat (limited to 'Objects')
-rw-r--r--Objects/typeobject.c22
-rw-r--r--Objects/unicodeobject.c89
2 files changed, 28 insertions, 83 deletions
diff --git a/Objects/typeobject.c b/Objects/typeobject.c
index 661ccb71ab0..43c499a0452 100644
--- a/Objects/typeobject.c
+++ b/Objects/typeobject.c
@@ -48,11 +48,6 @@ typedef struct PySlot_Offset {
} PySlot_Offset;
-/* bpo-40521: Interned strings are shared by all subinterpreters */
-#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
-# define INTERN_NAME_STRINGS
-#endif
-
/* alphabetical order */
_Py_IDENTIFIER(__abstractmethods__);
_Py_IDENTIFIER(__class__);
@@ -3527,7 +3522,6 @@ type_setattro(PyTypeObject *type, PyObject *name, PyObject *value)
if (name == NULL)
return -1;
}
-#ifdef INTERN_NAME_STRINGS
if (!PyUnicode_CHECK_INTERNED(name)) {
PyUnicode_InternInPlace(&name);
if (!PyUnicode_CHECK_INTERNED(name)) {
@@ -3537,7 +3531,6 @@ type_setattro(PyTypeObject *type, PyObject *name, PyObject *value)
return -1;
}
}
-#endif
}
else {
/* Will fail in _PyObject_GenericSetAttrWithDict. */
@@ -7683,17 +7676,10 @@ _PyTypes_InitSlotDefs(void)
for (slotdef *p = slotdefs; p->name; p++) {
/* Slots must be ordered by their offset in the PyHeapTypeObject. */
assert(!p[1].name || p->offset <= p[1].offset);
-#ifdef INTERN_NAME_STRINGS
p->name_strobj = PyUnicode_InternFromString(p->name);
if (!p->name_strobj || !PyUnicode_CHECK_INTERNED(p->name_strobj)) {
return _PyStatus_NO_MEMORY();
}
-#else
- p->name_strobj = PyUnicode_FromString(p->name);
- if (!p->name_strobj) {
- return _PyStatus_NO_MEMORY();
- }
-#endif
}
slotdefs_initialized = 1;
return _PyStatus_OK();
@@ -7718,24 +7704,16 @@ update_slot(PyTypeObject *type, PyObject *name)
int offset;
assert(PyUnicode_CheckExact(name));
-#ifdef INTERN_NAME_STRINGS
assert(PyUnicode_CHECK_INTERNED(name));
-#endif
assert(slotdefs_initialized);
pp = ptrs;
for (p = slotdefs; p->name; p++) {
assert(PyUnicode_CheckExact(p->name_strobj));
assert(PyUnicode_CheckExact(name));
-#ifdef INTERN_NAME_STRINGS
if (p->name_strobj == name) {
*pp++ = p;
}
-#else
- if (p->name_strobj == name || _PyUnicode_EQ(p->name_strobj, name)) {
- *pp++ = p;
- }
-#endif
}
*pp = NULL;
for (pp = ptrs; *pp; pp++) {
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 3238d1e692a..a03ca9a10d1 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -206,22 +206,6 @@ extern "C" {
# define OVERALLOCATE_FACTOR 4
#endif
-/* bpo-40521: Interned strings are shared by all interpreters. */
-#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
-# define INTERNED_STRINGS
-#endif
-
-/* This dictionary holds all interned unicode strings. Note that references
- to strings in this dictionary are *not* counted in the string's ob_refcnt.
- When the interned string reaches a refcnt of 0 the string deallocation
- function will delete the reference from this dictionary.
-
- Another way to look at this is that to say that the actual reference
- count of a string is: s->ob_refcnt + (s->state ? 2 : 0)
-*/
-#ifdef INTERNED_STRINGS
-static PyObject *interned = NULL;
-#endif
static struct _Py_unicode_state*
get_unicode_state(void)
@@ -1946,7 +1930,8 @@ unicode_dealloc(PyObject *unicode)
break;
case SSTATE_INTERNED_MORTAL:
-#ifdef INTERNED_STRINGS
+ {
+ struct _Py_unicode_state *state = get_unicode_state();
/* Revive the dead object temporarily. PyDict_DelItem() removes two
references (key and value) which were ignored by
PyUnicode_InternInPlace(). Use refcnt=3 rather than refcnt=2
@@ -1954,14 +1939,14 @@ unicode_dealloc(PyObject *unicode)
PyDict_DelItem(). */
assert(Py_REFCNT(unicode) == 0);
Py_SET_REFCNT(unicode, 3);
- if (PyDict_DelItem(interned, unicode) != 0) {
+ if (PyDict_DelItem(state->interned, unicode) != 0) {
_PyErr_WriteUnraisableMsg("deletion of interned string failed",
NULL);
}
assert(Py_REFCNT(unicode) == 1);
Py_SET_REFCNT(unicode, 0);
-#endif
break;
+ }
case SSTATE_INTERNED_IMMORTAL:
_PyObject_ASSERT_FAILED_MSG(unicode, "Immortal interned string died");
@@ -11536,12 +11521,11 @@ _PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right)
if (PyUnicode_CHECK_INTERNED(left))
return 0;
-#ifdef INTERNED_STRINGS
assert(_PyUnicode_HASH(right_uni) != -1);
Py_hash_t hash = _PyUnicode_HASH(left);
- if (hash != -1 && hash != _PyUnicode_HASH(right_uni))
+ if (hash != -1 && hash != _PyUnicode_HASH(right_uni)) {
return 0;
-#endif
+ }
return unicode_compare_eq(left, right_uni);
}
@@ -15765,23 +15749,21 @@ PyUnicode_InternInPlace(PyObject **p)
return;
}
-#ifdef INTERNED_STRINGS
if (PyUnicode_READY(s) == -1) {
PyErr_Clear();
return;
}
- if (interned == NULL) {
- interned = PyDict_New();
- if (interned == NULL) {
+ struct _Py_unicode_state *state = get_unicode_state();
+ if (state->interned == NULL) {
+ state->interned = PyDict_New();
+ if (state->interned == NULL) {
PyErr_Clear(); /* Don't leave an exception */
return;
}
}
- PyObject *t;
- t = PyDict_SetDefault(interned, s, s);
-
+ PyObject *t = PyDict_SetDefault(state->interned, s, s);
if (t == NULL) {
PyErr_Clear();
return;
@@ -15798,13 +15780,9 @@ PyUnicode_InternInPlace(PyObject **p)
this. */
Py_SET_REFCNT(s, Py_REFCNT(s) - 2);
_PyUnicode_STATE(s).interned = SSTATE_INTERNED_MORTAL;
-#else
- // PyDict expects that interned strings have their hash
- // (PyASCIIObject.hash) already computed.
- (void)unicode_hash(s);
-#endif
}
+
void
PyUnicode_InternImmortal(PyObject **p)
{
@@ -15838,35 +15816,25 @@ PyUnicode_InternFromString(const char *cp)
void
_PyUnicode_ClearInterned(PyThreadState *tstate)
{
- if (!_Py_IsMainInterpreter(tstate)) {
- // interned dict is shared by all interpreters
- return;
- }
-
- if (interned == NULL) {
- return;
- }
- assert(PyDict_CheckExact(interned));
-
- PyObject *keys = PyDict_Keys(interned);
- if (keys == NULL) {
- PyErr_Clear();
+ struct _Py_unicode_state *state = &tstate->interp->unicode;
+ if (state->interned == NULL) {
return;
}
- assert(PyList_CheckExact(keys));
+ assert(PyDict_CheckExact(state->interned));
/* Interned unicode strings are not forcibly deallocated; rather, we give
them their stolen references back, and then clear and DECREF the
interned dict. */
- Py_ssize_t n = PyList_GET_SIZE(keys);
#ifdef INTERNED_STATS
- fprintf(stderr, "releasing %zd interned strings\n", n);
+ fprintf(stderr, "releasing %zd interned strings\n",
+ PyDict_GET_SIZE(state->interned));
Py_ssize_t immortal_size = 0, mortal_size = 0;
#endif
- for (Py_ssize_t i = 0; i < n; i++) {
- PyObject *s = PyList_GET_ITEM(keys, i);
+ Py_ssize_t pos = 0;
+ PyObject *s, *ignored_value;
+ while (PyDict_Next(state->interned, &pos, &s, &ignored_value)) {
assert(PyUnicode_IS_READY(s));
switch (PyUnicode_CHECK_INTERNED(s)) {
@@ -15896,10 +15864,9 @@ _PyUnicode_ClearInterned(PyThreadState *tstate)
"total size of all interned strings: %zd/%zd mortal/immortal\n",
mortal_size, immortal_size);
#endif
- Py_DECREF(keys);
- PyDict_Clear(interned);
- Py_CLEAR(interned);
+ PyDict_Clear(state->interned);
+ Py_CLEAR(state->interned);
}
@@ -16269,19 +16236,19 @@ _PyUnicode_EnableLegacyWindowsFSEncoding(void)
void
_PyUnicode_Fini(PyThreadState *tstate)
{
+ struct _Py_unicode_state *state = &tstate->interp->unicode;
+
// _PyUnicode_ClearInterned() must be called before
+ assert(state->interned == NULL);
- struct _Py_unicode_state *state = &tstate->interp->unicode;
+ _PyUnicode_FiniEncodings(&state->fs_codec);
- Py_CLEAR(state->empty_string);
+ unicode_clear_identifiers(tstate);
for (Py_ssize_t i = 0; i < 256; i++) {
Py_CLEAR(state->latin1[i]);
}
-
- unicode_clear_identifiers(tstate);
-
- _PyUnicode_FiniEncodings(&tstate->interp->unicode.fs_codec);
+ Py_CLEAR(state->empty_string);
}