From f8b22f8fea181d2564267282e6c8249fde01bc13 Mon Sep 17 00:00:00 2001 From: Zachary Turner Date: Tue, 13 Oct 2015 18:16:15 +0000 Subject: Fix ref counting of Python objects. PythonObjects were being incorrectly ref-counted. This problem was pervasive throughout the codebase, leading to an unknown number of memory leaks and potentially use-after-free. The issue stems from the fact that Python native methods can either return "borrowed" references or "owned" references. For the former category, you *must* incref it prior to decrefing it. And for the latter category, you should not incref it before decrefing it. This is mostly an issue when a Python C API method returns a `PyObject` to you, but it can also happen with a method accepts a `PyObject`. Notably, this happens in `PyList_SetItem`, which is documented to "steal" the reference that you give it. So if you pass something to `PyList_SetItem`, you cannot hold onto it unless you incref it first. But since this is one of only two exceptions in the entire API, it's confusing and difficult to remember. Our `PythonObject` class was indiscriminantely increfing every object it received, which means that if you passed it an owned reference, you now have a dangling reference since owned references should not be increfed. We were doing this in quite a few places. There was also a fair amount of manual increfing and decrefing prevalent throughout the codebase, which is easy to get wrong. This patch solves the problem by making any construction of a `PythonObject` from a `PyObject` take a flag which indicates whether it is an owned reference or a borrowed reference. There is no way to construct a `PythonObject` without this flag, and it does not offer a default value, forcing the user to make an explicit decision every time. All manual uses of `PyObject` have been cleaned up throughout the codebase and replaced with `PythonObject` in order to make RAII the predominant pattern when dealing with native Python objects. Differential Revision: http://reviews.llvm.org/D13617 Reviewed By: Greg Clayton llvm-svn: 250195 --- .../ScriptInterpreter/Python/PythonDataObjects.h | 369 +++++++++++---------- 1 file changed, 197 insertions(+), 172 deletions(-) (limited to 'lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h') diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h b/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h index 3faaca37c06..8872b11488d 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h +++ b/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h @@ -25,7 +25,6 @@ namespace lldb_private { class PythonString; class PythonList; class PythonDictionary; -class PythonObject; class PythonInteger; class StructuredPythonObject : public StructuredData::Generic @@ -71,214 +70,240 @@ enum class PyObjectType String }; - class PythonObject +enum class PyRefType +{ + Borrowed, // We are not given ownership of the incoming PyObject. + // We cannot safely hold it without calling Py_INCREF. + Owned // We have ownership of the incoming PyObject. We should + // not call Py_INCREF. +}; + +enum class PyInitialValue +{ + Invalid, + Empty +}; + +class PythonObject +{ +public: + PythonObject() + : m_py_obj(nullptr) { - public: - PythonObject () : - m_py_obj(NULL) - { - } - - explicit PythonObject (PyObject* py_obj) : - m_py_obj(NULL) - { - Reset (py_obj); - } - - PythonObject (const PythonObject &rhs) : - m_py_obj(NULL) - { - Reset (rhs.m_py_obj); - } - - virtual - ~PythonObject () - { - Reset (NULL); - } - - bool - Reset (const PythonObject &object) - { - return Reset(object.get()); - } - - virtual bool - Reset (PyObject* py_obj = NULL) - { - if (py_obj != m_py_obj) - { - if (Py_IsInitialized()) - Py_XDECREF(m_py_obj); - m_py_obj = py_obj; - if (Py_IsInitialized()) - Py_XINCREF(m_py_obj); - } - return true; - } - - void - Dump () const - { - if (m_py_obj) - _PyObject_Dump (m_py_obj); - else - puts ("NULL"); - } - - void - Dump (Stream &strm) const; + } - PyObject* - get () const - { - return m_py_obj; - } + PythonObject(PyRefType type, PyObject *py_obj) + : m_py_obj(nullptr) + { + Reset(type, py_obj); + } - PyObjectType GetObjectType() const; + PythonObject(const PythonObject &rhs) + : m_py_obj(nullptr) + { + Reset(rhs); + } - PythonString - Repr (); - - PythonString - Str (); + virtual ~PythonObject() { Reset(); } + + void + Reset() + { + // Avoid calling the virtual method since it's not necessary + // to actually validate the type of the PyObject if we're + // just setting to null. + if (Py_IsInitialized()) + Py_XDECREF(m_py_obj); + m_py_obj = nullptr; + } + + void + Reset(const PythonObject &rhs) + { + // Avoid calling the virtual method if it's not necessary + // to actually validate the type of the PyObject. + if (!rhs.get()) + Reset(); + else + Reset(PyRefType::Borrowed, rhs.m_py_obj); + } + + // PythonObject is implicitly convertible to PyObject *, which will call the + // wrong overload. We want to explicitly disallow this, since a PyObject + // *always* owns its reference. Therefore the overload which takes a + // PyRefType doesn't make sense, and the copy constructor should be used. + void + Reset(PyRefType type, const PythonObject &ref) = delete; + + virtual void + Reset(PyRefType type, PyObject *py_obj) + { + if (py_obj == m_py_obj) + return; + + if (Py_IsInitialized()) + Py_XDECREF(m_py_obj); + + m_py_obj = py_obj; + + // If this is a borrowed reference, we need to convert it to + // an owned reference by incrementing it. If it is an owned + // reference (for example the caller allocated it with PyDict_New() + // then we must *not* increment it. + if (Py_IsInitialized() && type == PyRefType::Borrowed) + Py_XINCREF(m_py_obj); + } - explicit operator bool () const - { - return m_py_obj != NULL; - } + void + Dump () const + { + if (m_py_obj) + _PyObject_Dump (m_py_obj); + else + puts ("NULL"); + } - bool - IsNULLOrNone () const; + void + Dump (Stream &strm) const; - StructuredData::ObjectSP CreateStructuredObject() const; + PyObject* + get () const + { + return m_py_obj; + } - protected: - PyObject* m_py_obj; - }; + PyObjectType GetObjectType() const; - class PythonString: public PythonObject + PythonString + Repr (); + + PythonString + Str (); + + PythonObject & + operator=(const PythonObject &other) { - public: - PythonString (); - PythonString (PyObject *o); - PythonString (const PythonObject &object); - PythonString (llvm::StringRef string); - PythonString (const char *string); - virtual ~PythonString (); + Reset(PyRefType::Borrowed, other.get()); + return *this; + } - static bool Check(PyObject *py_obj); + bool + IsValid() const; - virtual bool - Reset (PyObject* py_obj = NULL); + bool + IsAllocated() const; - llvm::StringRef - GetString() const; + bool + IsNone() const; - size_t - GetSize() const; + StructuredData::ObjectSP CreateStructuredObject() const; - void SetString(llvm::StringRef string); +protected: + PyObject* m_py_obj; +}; - StructuredData::StringSP CreateStructuredString() const; - }; +class PythonString : public PythonObject +{ +public: + PythonString(); + PythonString(PyRefType type, PyObject *o); + PythonString(const PythonString &object); + explicit PythonString(llvm::StringRef string); + explicit PythonString(const char *string); + ~PythonString() override; - class PythonInteger: public PythonObject - { - public: - - PythonInteger (); - PythonInteger (PyObject* py_obj); - PythonInteger (const PythonObject &object); - PythonInteger (int64_t value); - virtual ~PythonInteger (); + static bool Check(PyObject *py_obj); - static bool Check(PyObject *py_obj); + // Bring in the no-argument base class version + using PythonObject::Reset; - virtual bool - Reset (PyObject* py_obj = NULL); + void Reset(PyRefType type, PyObject *py_obj) override; - int64_t GetInteger() const; + llvm::StringRef + GetString() const; - void - SetInteger (int64_t value); + size_t + GetSize() const; - StructuredData::IntegerSP CreateStructuredInteger() const; - }; - - class PythonList: public PythonObject - { - public: - PythonList(); - PythonList (PyObject* py_obj); - PythonList (const PythonObject &object); - virtual ~PythonList (); + void SetString(llvm::StringRef string); - static bool Check(PyObject *py_obj); + StructuredData::StringSP CreateStructuredString() const; +}; - virtual bool - Reset (PyObject* py_obj = NULL); +class PythonInteger : public PythonObject +{ +public: + PythonInteger(); + PythonInteger(PyRefType type, PyObject *o); + PythonInteger(const PythonInteger &object); + explicit PythonInteger(int64_t value); + ~PythonInteger() override; - uint32_t GetSize() const; + static bool Check(PyObject *py_obj); - PythonObject GetItemAtIndex(uint32_t index) const; + // Bring in the no-argument base class version + using PythonObject::Reset; - void - SetItemAtIndex (uint32_t index, const PythonObject &object); - - void - AppendItem (const PythonObject &object); + void Reset(PyRefType type, PyObject *py_obj) override; - StructuredData::ArraySP CreateStructuredArray() const; - }; - - class PythonDictionary: public PythonObject - { - public: - PythonDictionary(); - PythonDictionary (PyObject* object); - PythonDictionary (const PythonObject &object); - virtual ~PythonDictionary (); + int64_t GetInteger() const; - static bool Check(PyObject *py_obj); + void + SetInteger (int64_t value); - virtual bool - Reset (PyObject* object = NULL); + StructuredData::IntegerSP CreateStructuredInteger() const; +}; - uint32_t GetSize() const; +class PythonList : public PythonObject +{ +public: + PythonList(PyInitialValue value); + PythonList(PyRefType type, PyObject *o); + PythonList(const PythonList &list); + ~PythonList() override; - PythonObject - GetItemForKey (const PythonString &key) const; - - const char * - GetItemForKeyAsString (const PythonString &key, const char *fail_value = NULL) const; + static bool Check(PyObject *py_obj); - int64_t - GetItemForKeyAsInteger (const PythonString &key, int64_t fail_value = 0) const; + // Bring in the no-argument base class version + using PythonObject::Reset; - PythonObject - GetItemForKey (const char *key) const; + void Reset(PyRefType type, PyObject *py_obj) override; - typedef bool (*DictionaryIteratorCallback)(PythonString* key, PythonDictionary* dict); - - PythonList - GetKeys () const; - - PythonString - GetKeyAtPosition (uint32_t pos) const; - - PythonObject - GetValueAtPosition (uint32_t pos) const; - - void - SetItemForKey (const PythonString &key, PyObject *value); + uint32_t GetSize() const; + + PythonObject GetItemAtIndex(uint32_t index) const; - void - SetItemForKey (const PythonString &key, const PythonObject& value); + void SetItemAtIndex(uint32_t index, const PythonObject &object); - StructuredData::DictionarySP CreateStructuredDictionary() const; - }; - + void AppendItem(const PythonObject &object); + + StructuredData::ArraySP CreateStructuredArray() const; +}; + +class PythonDictionary : public PythonObject +{ +public: + PythonDictionary(PyInitialValue value); + PythonDictionary(PyRefType type, PyObject *o); + PythonDictionary(const PythonDictionary &dict); + ~PythonDictionary() override; + + static bool Check(PyObject *py_obj); + + // Bring in the no-argument base class version + using PythonObject::Reset; + + void Reset(PyRefType type, PyObject *py_obj) override; + + uint32_t GetSize() const; + + PythonList GetKeys() const; + + PythonObject GetItemForKey(const PythonObject &key) const; + void SetItemForKey(const PythonObject &key, const PythonObject &value); + + StructuredData::DictionarySP CreateStructuredDictionary() const; +}; } // namespace lldb_private #endif // LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_PYTHONDATAOBJECTS_H -- cgit v1.2.3