From 6e1023c850ef6df3bac7f82d78f1ff0b7656db9e Mon Sep 17 00:00:00 2001 From: tonghuaroot Date: Wed, 1 Jul 2026 10:14:27 +0800 Subject: [PATCH 1/5] gh-152718: Reject oversized table counts in the profiling binary reader --- .../test_binary_format.py | 82 +++++++++++++++++++ ...-07-01-12-00-00.gh-issue-152718.pr0f1l.rst | 4 + Modules/_remote_debugging/binary_io.h | 5 ++ Modules/_remote_debugging/binary_io_reader.c | 20 +++++ 4 files changed, 111 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2026-07-01-12-00-00.gh-issue-152718.pr0f1l.rst diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py b/Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py index 5efc60a9211175..42f96e3b7870b6 100644 --- a/Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py +++ b/Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py @@ -980,6 +980,9 @@ class TestBinaryFormatValidation(BinaryFormatTestBase): HDR_OFF_STR_TABLE = 36 HDR_OFF_FRAME_TABLE = 44 FILE_HEADER_PLACEHOLDER_SIZE = 64 + FILE_FOOTER_SIZE = 32 + FTR_OFF_STRINGS = 0 + FTR_OFF_FRAMES = 4 def test_replay_rejects_more_threads_than_declared(self): """Replay rejects files with more unique threads than the header declares.""" @@ -1041,6 +1044,85 @@ def test_replay_rejects_trailing_partial_sample_header(self): reader.replay_samples(RawCollector()) self.assertEqual(str(cm.exception), "Truncated sample data: 1 trailing bytes") + # Minimum on-disk size of one table entry (see binary_io.h). + MIN_STRING_ENTRY_SIZE = 1 + MIN_FRAME_ENTRY_SIZE = 7 + + def _read_offset(self, filename, hdr_off): + with open(filename, "rb") as raw: + raw.seek(hdr_off) + return struct.unpack("=Q", raw.read(8))[0] + + def _patch_footer_count(self, filename, ftr_off, value): + size = os.path.getsize(filename) + with open(filename, "r+b") as raw: + raw.seek(size - self.FILE_FOOTER_SIZE + ftr_off) + raw.write(struct.pack("=I", value)) + + def test_open_rejects_string_count_larger_than_file(self): + """Open rejects a footer string count larger than the file.""" + samples = [[make_interpreter(0, [ + make_thread(1, [make_frame("s.py", 10, "s")]) + ])]] + filename = self.create_binary_file(samples, compression="none") + size = os.path.getsize(filename) + str_off = self._read_offset(filename, self.HDR_OFF_STR_TABLE) + max_strings = (size - str_off) // self.MIN_STRING_ENTRY_SIZE + self._patch_footer_count(filename, self.FTR_OFF_STRINGS, 0xFFFFFFFF) + + with self.assertRaises(ValueError) as cm: + with BinaryReader(filename): + pass + self.assertEqual( + str(cm.exception), + f"Invalid string count 4294967295 exceeds maximum " + f"possible {max_strings}", + ) + + def test_open_rejects_frame_count_larger_than_file(self): + """Open rejects a footer frame count larger than the file.""" + samples = [[make_interpreter(0, [ + make_thread(1, [make_frame("f.py", 10, "f")]) + ])]] + filename = self.create_binary_file(samples, compression="none") + size = os.path.getsize(filename) + frame_off = self._read_offset(filename, self.HDR_OFF_FRAME_TABLE) + max_frames = (size - frame_off) // self.MIN_FRAME_ENTRY_SIZE + self._patch_footer_count(filename, self.FTR_OFF_FRAMES, 0xFFFFFFFF) + + with self.assertRaises(ValueError) as cm: + with BinaryReader(filename): + pass + self.assertEqual( + str(cm.exception), + f"Invalid frame count 4294967295 exceeds maximum " + f"possible {max_frames}", + ) + + def test_open_accepts_frame_count_at_capacity_boundary(self): + """A frame count at the file-size cap opens; one more is rejected.""" + samples = [[make_interpreter(0, [ + make_thread(1, [make_frame("f.py", 10, "f")]) + ])]] + filename = self.create_binary_file(samples, compression="none") + size = os.path.getsize(filename) + frame_off = self._read_offset(filename, self.HDR_OFF_FRAME_TABLE) + max_frames = (size - frame_off) // self.MIN_FRAME_ENTRY_SIZE + + self._patch_footer_count(filename, self.FTR_OFF_FRAMES, max_frames) + with BinaryReader(filename): + pass + + self._patch_footer_count(filename, self.FTR_OFF_FRAMES, max_frames + 1) + with self.assertRaises(ValueError) as cm: + with BinaryReader(filename): + pass + self.assertEqual( + str(cm.exception), + f"Invalid frame count {max_frames + 1} exceeds maximum " + f"possible {max_frames}", + ) + class TestBinaryEncodings(BinaryFormatTestBase): """Tests specifically targeting different stack encodings.""" diff --git a/Misc/NEWS.d/next/Library/2026-07-01-12-00-00.gh-issue-152718.pr0f1l.rst b/Misc/NEWS.d/next/Library/2026-07-01-12-00-00.gh-issue-152718.pr0f1l.rst new file mode 100644 index 00000000000000..3002fd5276a666 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-07-01-12-00-00.gh-issue-152718.pr0f1l.rst @@ -0,0 +1,4 @@ +Fix unbounded memory allocation in the :mod:`profiling.sampling` binary profile +reader, which trusted the string and frame table counts declared in a file's +footer before allocating. A malformed file can no longer make the reader +attempt a huge allocation. diff --git a/Modules/_remote_debugging/binary_io.h b/Modules/_remote_debugging/binary_io.h index d4188335c0b6d0..d4460e2a496237 100644 --- a/Modules/_remote_debugging/binary_io.h +++ b/Modules/_remote_debugging/binary_io.h @@ -91,6 +91,11 @@ static_assert(SAMPLE_HEADER_FIXED_SIZE == 13, static_assert(FILE_FOOTER_SIZE == 32, "FILE_FOOTER_SIZE must remain 32"); +/* Minimum on-disk bytes per table entry: a string is a >=1-byte length + * varint; a frame is six >=1-byte varints plus the opcode byte. */ +#define MIN_STRING_ENTRY_SIZE 1 +#define MIN_FRAME_ENTRY_SIZE 7 + /* Buffer sizes: 512KB balances syscall amortization against memory use, * and aligns well with filesystem block sizes and zstd dictionary windows */ #define WRITE_BUFFER_SIZE (512 * 1024) diff --git a/Modules/_remote_debugging/binary_io_reader.c b/Modules/_remote_debugging/binary_io_reader.c index ce1c3d232c94e0..2bdce10ac1252f 100644 --- a/Modules/_remote_debugging/binary_io_reader.c +++ b/Modules/_remote_debugging/binary_io_reader.c @@ -240,6 +240,16 @@ reader_decompress_samples(BinaryReader *reader, const uint8_t *data) static inline int reader_parse_string_table(BinaryReader *reader, const uint8_t *data, size_t file_size) { + /* Reject a count larger than the remaining bytes can hold. */ + size_t max_strings = + (file_size - reader->string_table_offset) / MIN_STRING_ENTRY_SIZE; + if (reader->strings_count > max_strings) { + PyErr_Format(PyExc_ValueError, + "Invalid string count %u exceeds maximum possible %zu", + reader->strings_count, max_strings); + return -1; + } + reader->strings = PyMem_Calloc(reader->strings_count, sizeof(PyObject *)); if (!reader->strings && reader->strings_count > 0) { PyErr_NoMemory(); @@ -280,6 +290,16 @@ reader_parse_frame_table(BinaryReader *reader, const uint8_t *data, size_t file_ } #endif + /* Reject a count larger than the remaining bytes can hold. */ + size_t max_frames = + (file_size - reader->frame_table_offset) / MIN_FRAME_ENTRY_SIZE; + if (reader->frames_count > max_frames) { + PyErr_Format(PyExc_ValueError, + "Invalid frame count %u exceeds maximum possible %zu", + reader->frames_count, max_frames); + return -1; + } + size_t alloc_size = (size_t)reader->frames_count * sizeof(FrameEntry); reader->frames = PyMem_Malloc(alloc_size); if (!reader->frames && reader->frames_count > 0) { From ffe26437edc2432510b7db1f9bef026823430af6 Mon Sep 17 00:00:00 2001 From: tonghuaroot Date: Wed, 1 Jul 2026 10:22:56 +0800 Subject: [PATCH 2/5] gh-152718: Tighten comments and shorten the NEWS entry --- .../Library/2026-07-01-12-00-00.gh-issue-152718.pr0f1l.rst | 4 +--- Modules/_remote_debugging/binary_io.h | 3 +-- Modules/_remote_debugging/binary_io_reader.c | 2 -- 3 files changed, 2 insertions(+), 7 deletions(-) diff --git a/Misc/NEWS.d/next/Library/2026-07-01-12-00-00.gh-issue-152718.pr0f1l.rst b/Misc/NEWS.d/next/Library/2026-07-01-12-00-00.gh-issue-152718.pr0f1l.rst index 3002fd5276a666..63d9d7f878cb07 100644 --- a/Misc/NEWS.d/next/Library/2026-07-01-12-00-00.gh-issue-152718.pr0f1l.rst +++ b/Misc/NEWS.d/next/Library/2026-07-01-12-00-00.gh-issue-152718.pr0f1l.rst @@ -1,4 +1,2 @@ Fix unbounded memory allocation in the :mod:`profiling.sampling` binary profile -reader, which trusted the string and frame table counts declared in a file's -footer before allocating. A malformed file can no longer make the reader -attempt a huge allocation. +reader when a file declares more string or frame entries than it contains. diff --git a/Modules/_remote_debugging/binary_io.h b/Modules/_remote_debugging/binary_io.h index d4460e2a496237..a4d6c52321e951 100644 --- a/Modules/_remote_debugging/binary_io.h +++ b/Modules/_remote_debugging/binary_io.h @@ -91,8 +91,7 @@ static_assert(SAMPLE_HEADER_FIXED_SIZE == 13, static_assert(FILE_FOOTER_SIZE == 32, "FILE_FOOTER_SIZE must remain 32"); -/* Minimum on-disk bytes per table entry: a string is a >=1-byte length - * varint; a frame is six >=1-byte varints plus the opcode byte. */ +/* Minimum on-disk bytes of a string (1) and frame (7) table entry. */ #define MIN_STRING_ENTRY_SIZE 1 #define MIN_FRAME_ENTRY_SIZE 7 diff --git a/Modules/_remote_debugging/binary_io_reader.c b/Modules/_remote_debugging/binary_io_reader.c index 2bdce10ac1252f..0b8490699da17c 100644 --- a/Modules/_remote_debugging/binary_io_reader.c +++ b/Modules/_remote_debugging/binary_io_reader.c @@ -240,7 +240,6 @@ reader_decompress_samples(BinaryReader *reader, const uint8_t *data) static inline int reader_parse_string_table(BinaryReader *reader, const uint8_t *data, size_t file_size) { - /* Reject a count larger than the remaining bytes can hold. */ size_t max_strings = (file_size - reader->string_table_offset) / MIN_STRING_ENTRY_SIZE; if (reader->strings_count > max_strings) { @@ -290,7 +289,6 @@ reader_parse_frame_table(BinaryReader *reader, const uint8_t *data, size_t file_ } #endif - /* Reject a count larger than the remaining bytes can hold. */ size_t max_frames = (file_size - reader->frame_table_offset) / MIN_FRAME_ENTRY_SIZE; if (reader->frames_count > max_frames) { From 572947c8b64723282c0fd0a1ff8fa0fa4db2bf2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?tonghuaroot=20=28=E7=AB=A5=E8=AF=9D=29?= Date: Thu, 2 Jul 2026 10:24:55 +0800 Subject: [PATCH 3/5] gh-152718: Factor the reader count checks into reader_validate_count Addresses review feedback: centralize the string/frame/RLE oversized-count validations behind one helper. --- Modules/_remote_debugging/binary_io_reader.c | 48 +++++++++++--------- 1 file changed, 27 insertions(+), 21 deletions(-) diff --git a/Modules/_remote_debugging/binary_io_reader.c b/Modules/_remote_debugging/binary_io_reader.c index 0b8490699da17c..73c4f5b6250d89 100644 --- a/Modules/_remote_debugging/binary_io_reader.c +++ b/Modules/_remote_debugging/binary_io_reader.c @@ -237,15 +237,29 @@ reader_decompress_samples(BinaryReader *reader, const uint8_t *data) } #endif +/* Reject a table/run count whose entries cannot fit in the bytes still + * available; a malicious file could otherwise drive a huge allocation. + * Each entry occupies at least min_entry_size bytes. */ +static int +reader_validate_count(const char *what, uint32_t count, + size_t available_bytes, size_t min_entry_size) +{ + size_t max_possible = available_bytes / min_entry_size; + if (count > max_possible) { + PyErr_Format(PyExc_ValueError, + "Invalid %s count %u exceeds maximum possible %zu", + what, count, max_possible); + return -1; + } + return 0; +} + static inline int reader_parse_string_table(BinaryReader *reader, const uint8_t *data, size_t file_size) { - size_t max_strings = - (file_size - reader->string_table_offset) / MIN_STRING_ENTRY_SIZE; - if (reader->strings_count > max_strings) { - PyErr_Format(PyExc_ValueError, - "Invalid string count %u exceeds maximum possible %zu", - reader->strings_count, max_strings); + if (reader_validate_count("string", reader->strings_count, + file_size - reader->string_table_offset, + MIN_STRING_ENTRY_SIZE) < 0) { return -1; } @@ -289,12 +303,9 @@ reader_parse_frame_table(BinaryReader *reader, const uint8_t *data, size_t file_ } #endif - size_t max_frames = - (file_size - reader->frame_table_offset) / MIN_FRAME_ENTRY_SIZE; - if (reader->frames_count > max_frames) { - PyErr_Format(PyExc_ValueError, - "Invalid frame count %u exceeds maximum possible %zu", - reader->frames_count, max_frames); + if (reader_validate_count("frame", reader->frames_count, + file_size - reader->frame_table_offset, + MIN_FRAME_ENTRY_SIZE) < 0) { return -1; } @@ -1071,15 +1082,10 @@ binary_reader_replay(BinaryReader *reader, PyObject *collector, PyObject *progre return -1; } - /* Validate RLE count to prevent DoS from malicious files. - * Each RLE sample needs at least 2 bytes (1 byte min varint + 1 status byte). - * Also reject absurdly large counts that would exhaust memory. */ - size_t remaining_data = reader->sample_data_size - offset; - size_t max_possible_samples = remaining_data / 2; - if (count > max_possible_samples) { - PyErr_Format(PyExc_ValueError, - "Invalid RLE count %u exceeds maximum possible %zu for remaining data", - count, max_possible_samples); + /* Reject a count larger than the remaining bytes can hold; each + * RLE sample needs at least 2 bytes (1-byte min varint + status). */ + if (reader_validate_count("RLE", count, + reader->sample_data_size - offset, 2) < 0) { return -1; } if ((uint64_t)count > (uint64_t)PY_SSIZE_T_MAX - (uint64_t)replayed) { From 7c23d37fcb3daeca580a253ea27004fea84b9f23 Mon Sep 17 00:00:00 2001 From: tonghuaroot Date: Sat, 4 Jul 2026 21:37:47 +0800 Subject: [PATCH 4/5] Accept the 32-bit allocation-overflow rejection in the frame-count test On 32-bit builds a footer frame count of 0xFFFFFFFF overflows the allocation size, so the size_t overflow guard raises OverflowError before the count-vs-file-size guard raises ValueError. Accept either rejection. --- .../test_binary_format.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py b/Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py index 42f96e3b7870b6..67d562910bfc78 100644 --- a/Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py +++ b/Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py @@ -1090,14 +1090,19 @@ def test_open_rejects_frame_count_larger_than_file(self): max_frames = (size - frame_off) // self.MIN_FRAME_ENTRY_SIZE self._patch_footer_count(filename, self.FTR_OFF_FRAMES, 0xFFFFFFFF) - with self.assertRaises(ValueError) as cm: + # 0xFFFFFFFF frames exceed the file on every platform. On a 32-bit + # build it also overflows the allocation size, so the size_t overflow + # guard (OverflowError) fires before the count-vs-file-size guard + # (ValueError); either rejection is correct. + with self.assertRaises((ValueError, OverflowError)) as cm: with BinaryReader(filename): pass - self.assertEqual( - str(cm.exception), - f"Invalid frame count 4294967295 exceeds maximum " - f"possible {max_frames}", - ) + if isinstance(cm.exception, ValueError): + self.assertEqual( + str(cm.exception), + f"Invalid frame count 4294967295 exceeds maximum " + f"possible {max_frames}", + ) def test_open_accepts_frame_count_at_capacity_boundary(self): """A frame count at the file-size cap opens; one more is rejected.""" From ce6b05672c613ee7edf283ae139f8c32210ed228 Mon Sep 17 00:00:00 2001 From: tonghuaroot Date: Sat, 4 Jul 2026 22:29:50 +0800 Subject: [PATCH 5/5] Trim the comment on the frame-count rejection test --- .../test_sampling_profiler/test_binary_format.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py b/Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py index 67d562910bfc78..156662296ebfa9 100644 --- a/Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py +++ b/Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py @@ -1090,10 +1090,8 @@ def test_open_rejects_frame_count_larger_than_file(self): max_frames = (size - frame_off) // self.MIN_FRAME_ENTRY_SIZE self._patch_footer_count(filename, self.FTR_OFF_FRAMES, 0xFFFFFFFF) - # 0xFFFFFFFF frames exceed the file on every platform. On a 32-bit - # build it also overflows the allocation size, so the size_t overflow - # guard (OverflowError) fires before the count-vs-file-size guard - # (ValueError); either rejection is correct. + # On a 32-bit build this count overflows the allocation size, so the + # size_t overflow guard (OverflowError) fires before the count check. with self.assertRaises((ValueError, OverflowError)) as cm: with BinaryReader(filename): pass