You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
97 lines
4.1 KiB
97 lines
4.1 KiB
From 36ae9beb04763d498df2114657bfbbcfe58bf913 Mon Sep 17 00:00:00 2001 |
|
From: Brandt Bucher <brandt@python.org> |
|
Date: Mon, 23 Aug 2021 18:34:17 -0700 |
|
Subject: [PATCH] Serialize frozenset elements deterministically |
|
|
|
--- |
|
Lib/test/test_marshal.py | 25 +++++++++++++++ |
|
.../2021-08-23-21-39-59.bpo-37596.ojRcwB.rst | 2 ++ |
|
Python/marshal.c | 32 +++++++++++++++++++ |
|
3 files changed, 59 insertions(+) |
|
create mode 100644 Misc/NEWS.d/next/Library/2021-08-23-21-39-59.bpo-37596.ojRcwB.rst |
|
|
|
--- a/Lib/test/test_marshal.py |
|
+++ b/Lib/test/test_marshal.py |
|
@@ -1,5 +1,6 @@ |
|
from test import support |
|
from test.support import os_helper |
|
+from test.support.script_helper import assert_python_ok |
|
import array |
|
import io |
|
import marshal |
|
@@ -318,6 +319,31 @@ class BugsTestCase(unittest.TestCase): |
|
for i in range(len(data)): |
|
self.assertRaises(EOFError, marshal.loads, data[0: i]) |
|
|
|
+ def test_deterministic_sets(self): |
|
+ # bpo-37596: To support reproducible builds, sets and frozensets need to |
|
+ # have their elements serialized in a consistent order (even when they |
|
+ # have been scrambled by hash randomization): |
|
+ for kind in ("set", "frozenset"): |
|
+ for elements in ( |
|
+ "float('nan'), b'a', b'b', b'c', 'x', 'y', 'z'", |
|
+ # Also test for bad interactions with backreferencing: |
|
+ "('string', 1), ('string', 2), ('string', 3)", |
|
+ ): |
|
+ s = f"{kind}([{elements}])" |
|
+ with self.subTest(s): |
|
+ # First, make sure that our test case still has different |
|
+ # orders under hash seeds 0 and 1. If this check fails, we |
|
+ # need to update this test with different elements: |
|
+ args = ["-c", f"print({s})"] |
|
+ _, repr_0, _ = assert_python_ok(*args, PYTHONHASHSEED="0") |
|
+ _, repr_1, _ = assert_python_ok(*args, PYTHONHASHSEED="1") |
|
+ self.assertNotEqual(repr_0, repr_1) |
|
+ # Then, perform the actual test: |
|
+ args = ["-c", f"import marshal; print(marshal.dumps({s}))"] |
|
+ _, dump_0, _ = assert_python_ok(*args, PYTHONHASHSEED="0") |
|
+ _, dump_1, _ = assert_python_ok(*args, PYTHONHASHSEED="1") |
|
+ self.assertEqual(dump_0, dump_1) |
|
+ |
|
LARGE_SIZE = 2**31 |
|
pointer_size = 8 if sys.maxsize > 0xFFFFFFFF else 4 |
|
|
|
--- a/Python/marshal.c |
|
+++ b/Python/marshal.c |
|
@@ -502,9 +502,41 @@ w_complex_object(PyObject *v, char flag, |
|
W_TYPE(TYPE_SET, p); |
|
n = PySet_GET_SIZE(v); |
|
W_SIZE(n, p); |
|
+ // bpo-37596: To support reproducible builds, sets and frozensets need |
|
+ // to have their elements serialized in a consistent order (even when |
|
+ // they have been scrambled by hash randomization). To ensure this, we |
|
+ // use an order equivalent to sorted(v, key=marshal.dumps): |
|
+ PyObject *pairs = PyList_New(0); |
|
+ if (pairs == NULL) { |
|
+ p->error = WFERR_NOMEMORY; |
|
+ return; |
|
+ } |
|
while (_PySet_NextEntry(v, &pos, &value, &hash)) { |
|
+ PyObject *dump = PyMarshal_WriteObjectToString(value, p->version); |
|
+ if (dump == NULL) { |
|
+ p->error = WFERR_UNMARSHALLABLE; |
|
+ goto anyset_done; |
|
+ } |
|
+ PyObject *pair = PyTuple_Pack(2, dump, value); |
|
+ Py_DECREF(dump); |
|
+ if (pair == NULL || PyList_Append(pairs, pair)) { |
|
+ p->error = WFERR_NOMEMORY; |
|
+ Py_XDECREF(pair); |
|
+ goto anyset_done; |
|
+ } |
|
+ Py_DECREF(pair); |
|
+ } |
|
+ if (PyList_Sort(pairs)) { |
|
+ p->error = WFERR_NOMEMORY; |
|
+ goto anyset_done; |
|
+ } |
|
+ for (Py_ssize_t i = 0; i < n; i++) { |
|
+ PyObject *pair = PyList_GET_ITEM(pairs, i); |
|
+ value = PyTuple_GET_ITEM(pair, 1); |
|
w_object(value, p); |
|
} |
|
+ anyset_done: |
|
+ Py_DECREF(pairs); |
|
} |
|
else if (PyCode_Check(v)) { |
|
PyCodeObject *co = (PyCodeObject *)v;
|
|
|