From 23d37b438f3d2a2ed53ed3f914563f5f0004da53 Mon Sep 17 00:00:00 2001 From: Carl Friedrich Bolz-Tereick Date: Sat, 29 Apr 2023 13:39:57 +0200 Subject: #3917 fix various cases where a mutating __index__ method could crash the interpreter --- pypy/interpreter/baseobjspace.py | 26 ++++++++++++++++++-------- pypy/module/_collections/interp_deque.py | 6 +++--- pypy/module/_collections/test/apptest_deque.py | 11 +++++++++++ pypy/module/array/interp_array.py | 13 +++++++------ pypy/module/array/test/test_array.py | 18 ++++++++++++++++++ pypy/module/micronumpy/compile.py | 2 +- pypy/module/micronumpy/flatiter.py | 4 ++-- pypy/module/micronumpy/ndarray.py | 2 +- pypy/module/micronumpy/strides.py | 4 ++-- pypy/module/mmap/interp_mmap.py | 9 ++++----- pypy/module/mmap/test/test_mmap.py | 21 +++++++++++++++++++++ pypy/objspace/fake/objspace.py | 6 +----- pypy/objspace/std/bufferobject.py | 6 ++---- pypy/objspace/std/memoryobject.py | 6 ++++-- pypy/objspace/std/test/test_memoryobject.py | 16 +++++++++++++++- 15 files changed, 110 insertions(+), 40 deletions(-) diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py index 5dd38e33e5..66a0f9306c 100644 --- a/pypy/interpreter/baseobjspace.py +++ b/pypy/interpreter/baseobjspace.py @@ -1499,15 +1499,19 @@ class ObjSpace(object): if op == 'ge': return self.ge(w_x1, w_x2) assert False, "bad value for op" - def decode_index(self, w_index_or_slice, seqlength): + def decode_index4_unsafe(self, w_index_or_slice, seqlength): """Helper for custom sequence implementations - -> (index, 0, 0) or - (start, stop, step) + -> (index, 0, 0, 1) or + (start, stop, step, slice_length) + + UNSAFE: if the __index__ method of one of the slice's start/stop/step + fields changes the sequence, then the result can be incorrect """ if self.isinstance_w(w_index_or_slice, self.w_slice): from pypy.objspace.std.sliceobject import W_SliceObject assert isinstance(w_index_or_slice, W_SliceObject) - start, stop, step = w_index_or_slice.indices3(self, seqlength) + start, stop, step, length = w_index_or_slice.indices4(self, + seqlength) else: start = self.getindex_w(w_index_or_slice, self.w_IndexError) if start < 0: @@ -1516,9 +1520,10 @@ class ObjSpace(object): raise oefmt(self.w_IndexError, "index out of range") stop = 0 step = 0 - return start, stop, step + length = 1 + return start, stop, step, length - def decode_index4(self, w_index_or_slice, seqlength): + def decode_index4(self, w_index_or_slice, w_seq): """Helper for custom sequence implementations -> (index, 0, 0, 1) or (start, stop, step, slice_length) @@ -1526,10 +1531,14 @@ class ObjSpace(object): if self.isinstance_w(w_index_or_slice, self.w_slice): from pypy.objspace.std.sliceobject import W_SliceObject assert isinstance(w_index_or_slice, W_SliceObject) - start, stop, step, length = w_index_or_slice.indices4(self, - seqlength) + # it's important to first unpack the slice and then read the length + # of the sequence, because the former can change the latter + start, stop, step = w_index_or_slice.unpack(self) + seqlength = self.len_w(w_seq) + start, stop, step, length = w_index_or_slice.adjust_indices(start, stop, step, seqlength) else: start = self.getindex_w(w_index_or_slice, self.w_IndexError) + seqlength = self.len_w(w_seq) if start < 0: start += seqlength if not (0 <= start < seqlength): @@ -1539,6 +1548,7 @@ class ObjSpace(object): length = 1 return start, stop, step, length + def getindex_w(self, w_obj, w_exception, objdescr=None, errmsg=None): """Return w_obj.__index__() as an RPython int. If w_exception is None, silently clamp in case of overflow; diff --git a/pypy/module/_collections/interp_deque.py b/pypy/module/_collections/interp_deque.py index 43bff2bdaf..f2580e7d66 100644 --- a/pypy/module/_collections/interp_deque.py +++ b/pypy/module/_collections/interp_deque.py @@ -374,7 +374,7 @@ class W_Deque(W_Root): def getitem(self, w_index): space = self.space - start, stop, step = space.decode_index(w_index, self.len) + start, stop, step, _ = space.decode_index4(w_index, self) if step == 0: # index only b, i = self.locate(start) return b.data[i] @@ -383,7 +383,7 @@ class W_Deque(W_Root): def setitem(self, w_index, w_newobj): space = self.space - start, stop, step = space.decode_index(w_index, self.len) + start, stop, step, _ = space.decode_index4(w_index, self) if step == 0: # index only b, i = self.locate(start) b.data[i] = w_newobj @@ -392,7 +392,7 @@ class W_Deque(W_Root): def delitem(self, w_index): space = self.space - start, stop, step = space.decode_index(w_index, self.len) + start, stop, step, _ = space.decode_index4(w_index, self) if step == 0: # index only self.del_item(start) else: diff --git a/pypy/module/_collections/test/apptest_deque.py b/pypy/module/_collections/test/apptest_deque.py index 111e145f76..38e8aa27ff 100644 --- a/pypy/module/_collections/test/apptest_deque.py +++ b/pypy/module/_collections/test/apptest_deque.py @@ -287,3 +287,14 @@ def test_index_method(): return 1 assert d[A()] == 2 +def test_index_method_mutates(): + d = deque([1, 2, 3, 4, 5]) + class A(object): + def __index__(self): + d.clear() + return 1 + with raises(IndexError): + d[A()] + d = deque([1, 2, 3, 4, 5]) + with raises(IndexError): + d[A()] = 2 diff --git a/pypy/module/array/interp_array.py b/pypy/module/array/interp_array.py index 5ebac5a3e4..63af36e4cd 100644 --- a/pypy/module/array/interp_array.py +++ b/pypy/module/array/interp_array.py @@ -568,8 +568,8 @@ class W_ArrayBase(W_Root): def descr_getitem(self, space, w_idx): "x.__getitem__(y) <==> x[y]" if not space.isinstance_w(w_idx, space.w_slice): - idx, stop, step = space.decode_index(w_idx, self.len) - assert step == 0 + idx, stop, step, slicelength = space.decode_index4(w_idx, self) + assert step == 0 and slicelength == 1 return self.w_getitem(space, idx) else: return self.getitem_slice(space, w_idx) @@ -590,7 +590,7 @@ class W_ArrayBase(W_Root): w_item) def descr_delitem(self, space, w_idx): - start, stop, step, size = self.space.decode_index4(w_idx, self.len) + start, stop, step, size = self.space.decode_index4(w_idx, self) if step != 1: # I don't care about efficiency of that so far w_lst = self.descr_tolist(space) @@ -1129,7 +1129,7 @@ def make_array(mytype): keepalive_until_here(self) def getitem_slice(self, space, w_idx): - start, stop, step, size = space.decode_index4(w_idx, self.len) + start, stop, step, size = space.decode_index4(w_idx, self) w_a = mytype.w_class(self.space) w_a.setlen(size, overallocate=False) assert step != 0 @@ -1144,7 +1144,8 @@ def make_array(mytype): return w_a def setitem(self, space, w_idx, w_item): - idx, stop, step = space.decode_index(w_idx, self.len) + idx, stop, step, slicelength = space.decode_index4(w_idx, self) + assert slicelength == 1 if step != 0: raise oefmt(self.space.w_TypeError, "can only assign array to array slice") @@ -1156,7 +1157,7 @@ def make_array(mytype): if not isinstance(w_item, W_Array): raise oefmt(space.w_TypeError, "can only assign to a slice array") - start, stop, step, size = self.space.decode_index4(w_idx, self.len) + start, stop, step, size = self.space.decode_index4(w_idx, self) assert step != 0 if w_item.len != size or self is w_item: if start == self.len and step > 0: diff --git a/pypy/module/array/test/test_array.py b/pypy/module/array/test/test_array.py index 1242ae3fc2..10e54a1b94 100644 --- a/pypy/module/array/test/test_array.py +++ b/pypy/module/array/test/test_array.py @@ -1093,3 +1093,21 @@ class AppTestArray(object): def test_fresh_array_buffer_str(self): assert str(buffer(self.array('i'))) == '' + + def test_mutate_while_slice(self): + class X: + def __index__(self): + del a[:] + return 1 + + a = self.array('i', [1, 2, 3, 4, 5, 6]) + length = len(a[:X():2]) + assert length == 0 + + a = self.array('i', [1, 2, 3, 4, 5, 6]) + length = len(a[:X():2]) + assert length == 0 + + a = self.array('i', [1, 2, 3, 4, 5, 6]) + length = len(a[:X():2]) + assert length == 0 diff --git a/pypy/module/micronumpy/compile.py b/pypy/module/micronumpy/compile.py index 3da09391d5..8296cb871c 100644 --- a/pypy/module/micronumpy/compile.py +++ b/pypy/module/micronumpy/compile.py @@ -156,7 +156,7 @@ class FakeSpace(ObjSpace): raise NotImplementedError - def decode_index4(self, w_idx, size): + def decode_index4_unsafe(self, w_idx, size): if isinstance(w_idx, IntObject): return (self.int_w(w_idx), 0, 0, 1) else: diff --git a/pypy/module/micronumpy/flatiter.py b/pypy/module/micronumpy/flatiter.py index 30c92659bc..f35c2e418b 100644 --- a/pypy/module/micronumpy/flatiter.py +++ b/pypy/module/micronumpy/flatiter.py @@ -67,7 +67,7 @@ class W_FlatIterator(W_NDimArray): space.isinstance_w(w_idx, space.w_slice)): raise oefmt(space.w_IndexError, 'unsupported iterator index') try: - start, stop, step, length = space.decode_index4(w_idx, self.iter.size) + start, stop, step, length = space.decode_index4_unsafe(w_idx, self.iter.size) state = self.iter.goto(start) if length == 1: return self.iter.getitem(state) @@ -82,7 +82,7 @@ class W_FlatIterator(W_NDimArray): if not (space.isinstance_w(w_idx, space.w_int) or space.isinstance_w(w_idx, space.w_slice)): raise oefmt(space.w_IndexError, 'unsupported iterator index') - start, stop, step, length = space.decode_index4(w_idx, self.iter.size) + start, stop, step, length = space.decode_index4_unsafe(w_idx, self.iter.size) try: state = self.iter.goto(start) dtype = self.base.get_dtype() diff --git a/pypy/module/micronumpy/ndarray.py b/pypy/module/micronumpy/ndarray.py index 681729555e..f543fa1921 100644 --- a/pypy/module/micronumpy/ndarray.py +++ b/pypy/module/micronumpy/ndarray.py @@ -184,7 +184,7 @@ class __extend__(W_NDimArray): arr_index_in_shape = True else: if space.isinstance_w(w_item, space.w_slice): - lgt = space.decode_index4(w_item, self.get_shape()[i])[3] + lgt = space.decode_index4_unsafe(w_item, self.get_shape()[i])[3] if not arr_index_in_shape: prefix.append(w_item) res_shape.append(lgt) diff --git a/pypy/module/micronumpy/strides.py b/pypy/module/micronumpy/strides.py index c311db605c..f33a5efe88 100644 --- a/pypy/module/micronumpy/strides.py +++ b/pypy/module/micronumpy/strides.py @@ -40,7 +40,7 @@ class IntegerChunk(BaseChunk): self.w_idx = w_idx def compute(self, space, base_length, base_stride): - start, _, _, _ = space.decode_index4(self.w_idx, base_length) + start, _, _, _ = space.decode_index4_unsafe(self.w_idx, base_length) return start, 0, 0, 0 @@ -52,7 +52,7 @@ class SliceChunk(BaseChunk): self.w_slice = w_slice def compute(self, space, base_length, base_stride): - start, stop, step, length = space.decode_index4(self.w_slice, base_length) + start, stop, step, length = space.decode_index4_unsafe(self.w_slice, base_length) stride = base_stride * step backstride = base_stride * max(0, length - 1) * step return start, length, stride, backstride diff --git a/pypy/module/mmap/interp_mmap.py b/pypy/module/mmap/interp_mmap.py index 975551f317..d6adeda268 100644 --- a/pypy/module/mmap/interp_mmap.py +++ b/pypy/module/mmap/interp_mmap.py @@ -144,6 +144,7 @@ class W_MMap(W_Root): self.space.newtext(e.message)) def __len__(self): + self.check_valid() return self.space.newint(self.mmap.size) def check_valid(self): @@ -165,10 +166,9 @@ class W_MMap(W_Root): raise mmap_error(self.space, v) def descr_getitem(self, w_index): - self.check_valid() - space = self.space - start, stop, step, length = space.decode_index4(w_index, self.mmap.size) + start, stop, step, length = space.decode_index4(w_index, self) + self.check_valid() # decode_index4 can have closed the mmap if step == 0: # index only return space.newbytes(self.mmap.getitem(start)) elif step == 1: @@ -186,11 +186,10 @@ class W_MMap(W_Root): def descr_setitem(self, w_index, w_value): space = self.space value = space.realtext_w(w_value) + start, stop, step, length = space.decode_index4(w_index, self) self.check_valid() - self.check_writeable() - start, stop, step, length = space.decode_index4(w_index, self.mmap.size) if step == 0: # index only if len(value) != 1: raise oefmt(space.w_ValueError, diff --git a/pypy/module/mmap/test/test_mmap.py b/pypy/module/mmap/test/test_mmap.py index a949e16fd0..7e55c9e493 100644 --- a/pypy/module/mmap/test/test_mmap.py +++ b/pypy/module/mmap/test/test_mmap.py @@ -106,6 +106,7 @@ class AppTestMMap: m = mmap(f.fileno(), 1) m.close() raises(ValueError, m.read, 1) + raises(ValueError, len, m) def test_read_byte(self): from mmap import mmap @@ -903,3 +904,23 @@ class AppTestMMap: raises(ValueError, m.write, 'abc') assert m.tell() == 5000 m.close() + + def test_close_while_indexing(self): + import mmap + with open(self.tmpname + "_crash", 'w+b') as f: + f.write(b"foobar") + f.flush() + + class X(object): + def __index__(self): + m.close() + return 1 + + m = mmap.mmap(f.fileno(), 6, access=mmap.ACCESS_READ) + assert m[1] == b"o" + with raises(ValueError): + m[X()] + + m = mmap.mmap(f.fileno(), 6, access=mmap.ACCESS_READ) + with raises(ValueError): + m[X()] = b"u" diff --git a/pypy/objspace/fake/objspace.py b/pypy/objspace/fake/objspace.py index 90baf658d0..7aba9d6559 100644 --- a/pypy/objspace/fake/objspace.py +++ b/pypy/objspace/fake/objspace.py @@ -338,11 +338,7 @@ class FakeObjSpace(ObjSpace): is_root(w_subtype) return instantiate(cls) - def decode_index(self, w_index_or_slice, seqlength): - is_root(w_index_or_slice) - return (NonConstant(42), NonConstant(42), NonConstant(42)) - - def decode_index4(self, w_index_or_slice, seqlength): + def decode_index4(self, w_index_or_slice, w_obj): is_root(w_index_or_slice) return (NonConstant(42), NonConstant(42), NonConstant(42), NonConstant(42)) diff --git a/pypy/objspace/std/bufferobject.py b/pypy/objspace/std/bufferobject.py index 62f65cfae9..6b10575db8 100644 --- a/pypy/objspace/std/bufferobject.py +++ b/pypy/objspace/std/bufferobject.py @@ -40,8 +40,7 @@ class W_AbstractBuffer(W_Root): return space.newint(self.buf.getlength()) def descr_getitem(self, space, w_index): - start, stop, step, size = space.decode_index4(w_index, - self.buf.getlength()) + start, stop, step, size = space.decode_index4(w_index, self) if step == 0: # index only return space.newbytes(self.buf.getitem(start)) res = self.buf.getslice(start, step, size) @@ -50,8 +49,7 @@ class W_AbstractBuffer(W_Root): def descr_setitem(self, space, w_index, w_obj): if self.buf.readonly: raise oefmt(space.w_TypeError, "buffer is read-only") - start, stop, step, size = space.decode_index4(w_index, - self.buf.getlength()) + start, stop, step, size = space.decode_index4(w_index, self) value = space.readbuf_w(w_obj) if step == 0: # index only if value.getlength() != 1: diff --git a/pypy/objspace/std/memoryobject.py b/pypy/objspace/std/memoryobject.py index 4d2a417bdd..657ca1e3ee 100644 --- a/pypy/objspace/std/memoryobject.py +++ b/pypy/objspace/std/memoryobject.py @@ -106,7 +106,9 @@ class W_MemoryView(W_Root): count = 1 else: count = shape[0] - return space.decode_index4(w_index, count) + # it's ok to use 'unsafe' here, because the index error checking + # happens a level deeper on the view access + return space.decode_index4_unsafe(w_index, count) def descr_getitem(self, space, w_index): is_slice = space.isinstance_w(w_index, space.w_slice) @@ -138,7 +140,7 @@ class W_MemoryView(W_Root): self._check_released(space) if self.view.readonly: raise oefmt(space.w_TypeError, "cannot modify read-only memory") - start, stop, step, size = space.decode_index4(w_index, self.getlength()) + start, stop, step, size = space.decode_index4(w_index, self) if step not in (0, 1): raise oefmt(space.w_NotImplementedError, "") is_slice = space.isinstance_w(w_index, space.w_slice) diff --git a/pypy/objspace/std/test/test_memoryobject.py b/pypy/objspace/std/test/test_memoryobject.py index 115b3d7c0a..623dae0933 100644 --- a/pypy/objspace/std/test/test_memoryobject.py +++ b/pypy/objspace/std/test/test_memoryobject.py @@ -63,4 +63,18 @@ class AppTestMemoryView: a = memoryview(b"foobar")._pypy_raw_address() assert a != 0 b = memoryview(bytearray(b"foobar"))._pypy_raw_address() - assert b != 0 + + def test_mutate_memoryview_while_indexing(self): + class A(object): + def __index__(self): + del data[:] + return 1 + data = bytearray(b'abcefg') + v = memoryview(data) + with raises(IndexError): + v[A()] + + data = bytearray(b'abcefg') + v = memoryview(data) + with raises(IndexError): + v[A()] = b'z' -- cgit v1.2.3-65-gdbad