From 4a52a71956a8d46fcb7294ac71734504bb09bcc2 Mon Sep 17 00:00:00 2001 From: S. Solomon Darnell Date: Fri, 28 Mar 2025 21:52:21 -0500 Subject: two version of R2R are here --- .../site-packages/greenlet/TThreadState.hpp | 497 +++++++++++++++++++++ 1 file changed, 497 insertions(+) create mode 100644 .venv/lib/python3.12/site-packages/greenlet/TThreadState.hpp (limited to '.venv/lib/python3.12/site-packages/greenlet/TThreadState.hpp') diff --git a/.venv/lib/python3.12/site-packages/greenlet/TThreadState.hpp b/.venv/lib/python3.12/site-packages/greenlet/TThreadState.hpp new file mode 100644 index 00000000..e4e6f6cb --- /dev/null +++ b/.venv/lib/python3.12/site-packages/greenlet/TThreadState.hpp @@ -0,0 +1,497 @@ +#ifndef GREENLET_THREAD_STATE_HPP +#define GREENLET_THREAD_STATE_HPP + +#include +#include + +#include "greenlet_internal.hpp" +#include "greenlet_refs.hpp" +#include "greenlet_thread_support.hpp" + +using greenlet::refs::BorrowedObject; +using greenlet::refs::BorrowedGreenlet; +using greenlet::refs::BorrowedMainGreenlet; +using greenlet::refs::OwnedMainGreenlet; +using greenlet::refs::OwnedObject; +using greenlet::refs::OwnedGreenlet; +using greenlet::refs::OwnedList; +using greenlet::refs::PyErrFetchParam; +using greenlet::refs::PyArgParseParam; +using greenlet::refs::ImmortalString; +using greenlet::refs::CreatedModule; +using greenlet::refs::PyErrPieces; +using greenlet::refs::NewReference; + +namespace greenlet { +/** + * Thread-local state of greenlets. + * + * Each native thread will get exactly one of these objects, + * automatically accessed through the best available thread-local + * mechanism the compiler supports (``thread_local`` for C++11 + * compilers or ``__thread``/``declspec(thread)`` for older GCC/clang + * or MSVC, respectively.) + * + * Previously, we kept thread-local state mostly in a bunch of + * ``static volatile`` variables in the main greenlet file.. This had + * the problem of requiring extra checks, loops, and great care + * accessing these variables if we potentially invoked any Python code + * that could release the GIL, because the state could change out from + * under us. Making the variables thread-local solves this problem. + * + * When we detected that a greenlet API accessing the current greenlet + * was invoked from a different thread than the greenlet belonged to, + * we stored a reference to the greenlet in the Python thread + * dictionary for the thread the greenlet belonged to. This could lead + * to memory leaks if the thread then exited (because of a reference + * cycle, as greenlets referred to the thread dictionary, and deleting + * non-current greenlets leaked their frame plus perhaps arguments on + * the C stack). If a thread exited while still having running + * greenlet objects (perhaps that had just switched back to the main + * greenlet), and did not invoke one of the greenlet APIs *in that + * thread, immediately before it exited, without some other thread + * then being invoked*, such a leak was guaranteed. + * + * This can be partly solved by using compiler thread-local variables + * instead of the Python thread dictionary, thus avoiding a cycle. + * + * To fully solve this problem, we need a reliable way to know that a + * thread is done and we should clean up the main greenlet. On POSIX, + * we can use the destructor function of ``pthread_key_create``, but + * there's nothing similar on Windows; a C++11 thread local object + * reliably invokes its destructor when the thread it belongs to exits + * (non-C++11 compilers offer ``__thread`` or ``declspec(thread)`` to + * create thread-local variables, but they can't hold C++ objects that + * invoke destructors; the C++11 version is the most portable solution + * I found). When the thread exits, we can drop references and + * otherwise manipulate greenlets and frames that we know can no + * longer be switched to. For compilers that don't support C++11 + * thread locals, we have a solution that uses the python thread + * dictionary, though it may not collect everything as promptly as + * other compilers do, if some other library is using the thread + * dictionary and has a cycle or extra reference. + * + * There are two small wrinkles. The first is that when the thread + * exits, it is too late to actually invoke Python APIs: the Python + * thread state is gone, and the GIL is released. To solve *this* + * problem, our destructor uses ``Py_AddPendingCall`` to transfer the + * destruction work to the main thread. (This is not an issue for the + * dictionary solution.) + * + * The second is that once the thread exits, the thread local object + * is invalid and we can't even access a pointer to it, so we can't + * pass it to ``Py_AddPendingCall``. This is handled by actually using + * a second object that's thread local (ThreadStateCreator) and having + * it dynamically allocate this object so it can live until the + * pending call runs. + */ + + + +class ThreadState { +private: + // As of commit 08ad1dd7012b101db953f492e0021fb08634afad + // this class needed 56 bytes in o Py_DEBUG build + // on 64-bit macOS 11. + // Adding the vector takes us up to 80 bytes () + + /* Strong reference to the main greenlet */ + OwnedMainGreenlet main_greenlet; + + /* Strong reference to the current greenlet. */ + OwnedGreenlet current_greenlet; + + /* Strong reference to the trace function, if any. */ + OwnedObject tracefunc; + + typedef std::vector > deleteme_t; + /* A vector of raw PyGreenlet pointers representing things that need + deleted when this thread is running. The vector owns the + references, but you need to manually INCREF/DECREF as you use + them. We don't use a vector because we + make copy of this vector, and that would become O(n) as all the + refcounts are incremented in the copy. + */ + deleteme_t deleteme; + +#ifdef GREENLET_NEEDS_EXCEPTION_STATE_SAVED + void* exception_state; +#endif + + static std::clock_t _clocks_used_doing_gc; + static ImmortalString get_referrers_name; + static PythonAllocator allocator; + + G_NO_COPIES_OF_CLS(ThreadState); + + + // Allocates a main greenlet for the thread state. If this fails, + // exits the process. Called only during constructing a ThreadState. + MainGreenlet* alloc_main() + { + PyGreenlet* gmain; + + /* create the main greenlet for this thread */ + gmain = reinterpret_cast(PyType_GenericAlloc(&PyGreenlet_Type, 0)); + if (gmain == NULL) { + throw PyFatalError("alloc_main failed to alloc"); //exits the process + } + + MainGreenlet* const main = new MainGreenlet(gmain, this); + + assert(Py_REFCNT(gmain) == 1); + assert(gmain->pimpl == main); + return main; + } + + +public: + static void* operator new(size_t UNUSED(count)) + { + return ThreadState::allocator.allocate(1); + } + + static void operator delete(void* ptr) + { + return ThreadState::allocator.deallocate(static_cast(ptr), + 1); + } + + static void init() + { + ThreadState::get_referrers_name = "get_referrers"; + ThreadState::_clocks_used_doing_gc = 0; + } + + ThreadState() + { + +#ifdef GREENLET_NEEDS_EXCEPTION_STATE_SAVED + this->exception_state = slp_get_exception_state(); +#endif + + // XXX: Potentially dangerous, exposing a not fully + // constructed object. + MainGreenlet* const main = this->alloc_main(); + this->main_greenlet = OwnedMainGreenlet::consuming( + main->self() + ); + assert(this->main_greenlet); + this->current_greenlet = main->self(); + // The main greenlet starts with 1 refs: The returned one. We + // then copied it to the current greenlet. + assert(this->main_greenlet.REFCNT() == 2); + } + + inline void restore_exception_state() + { +#ifdef GREENLET_NEEDS_EXCEPTION_STATE_SAVED + // It's probably important this be inlined and only call C + // functions to avoid adding an SEH frame. + slp_set_exception_state(this->exception_state); +#endif + } + + inline bool has_main_greenlet() const noexcept + { + return bool(this->main_greenlet); + } + + // Called from the ThreadStateCreator when we're in non-standard + // threading mode. In that case, there is an object in the Python + // thread state dictionary that points to us. The main greenlet + // also traverses into us, in which case it's crucial not to + // traverse back into the main greenlet. + int tp_traverse(visitproc visit, void* arg, bool traverse_main=true) + { + if (traverse_main) { + Py_VISIT(main_greenlet.borrow_o()); + } + if (traverse_main || current_greenlet != main_greenlet) { + Py_VISIT(current_greenlet.borrow_o()); + } + Py_VISIT(tracefunc.borrow()); + return 0; + } + + inline BorrowedMainGreenlet borrow_main_greenlet() const noexcept + { + assert(this->main_greenlet); + assert(this->main_greenlet.REFCNT() >= 2); + return this->main_greenlet; + }; + + inline OwnedMainGreenlet get_main_greenlet() const noexcept + { + return this->main_greenlet; + } + + /** + * In addition to returning a new reference to the currunt + * greenlet, this performs any maintenance needed. + */ + inline OwnedGreenlet get_current() + { + /* green_dealloc() cannot delete greenlets from other threads, so + it stores them in the thread dict; delete them now. */ + this->clear_deleteme_list(); + //assert(this->current_greenlet->main_greenlet == this->main_greenlet); + //assert(this->main_greenlet->main_greenlet == this->main_greenlet); + return this->current_greenlet; + } + + /** + * As for non-const get_current(); + */ + inline BorrowedGreenlet borrow_current() + { + this->clear_deleteme_list(); + return this->current_greenlet; + } + + /** + * Does no maintenance. + */ + inline OwnedGreenlet get_current() const + { + return this->current_greenlet; + } + + template + inline bool is_current(const refs::PyObjectPointer& obj) const + { + return this->current_greenlet.borrow_o() == obj.borrow_o(); + } + + inline void set_current(const OwnedGreenlet& target) + { + this->current_greenlet = target; + } + +private: + /** + * Deref and remove the greenlets from the deleteme list. Must be + * holding the GIL. + * + * If *murder* is true, then we must be called from a different + * thread than the one that these greenlets were running in. + * In that case, if the greenlet was actually running, we destroy + * the frame reference and otherwise make it appear dead before + * proceeding; otherwise, we would try (and fail) to raise an + * exception in it and wind up right back in this list. + */ + inline void clear_deleteme_list(const bool murder=false) + { + if (!this->deleteme.empty()) { + // It's possible we could add items to this list while + // running Python code if there's a thread switch, so we + // need to defensively copy it before that can happen. + deleteme_t copy = this->deleteme; + this->deleteme.clear(); // in case things come back on the list + for(deleteme_t::iterator it = copy.begin(), end = copy.end(); + it != end; + ++it ) { + PyGreenlet* to_del = *it; + if (murder) { + // Force each greenlet to appear dead; we can't raise an + // exception into it anymore anyway. + to_del->pimpl->murder_in_place(); + } + + // The only reference to these greenlets should be in + // this list, decreffing them should let them be + // deleted again, triggering calls to green_dealloc() + // in the correct thread (if we're not murdering). + // This may run arbitrary Python code and switch + // threads or greenlets! + Py_DECREF(to_del); + if (PyErr_Occurred()) { + PyErr_WriteUnraisable(nullptr); + PyErr_Clear(); + } + } + } + } + +public: + + /** + * Returns a new reference, or a false object. + */ + inline OwnedObject get_tracefunc() const + { + return tracefunc; + }; + + + inline void set_tracefunc(BorrowedObject tracefunc) + { + assert(tracefunc); + if (tracefunc == BorrowedObject(Py_None)) { + this->tracefunc.CLEAR(); + } + else { + this->tracefunc = tracefunc; + } + } + + /** + * Given a reference to a greenlet that some other thread + * attempted to delete (has a refcount of 0) store it for later + * deletion when the thread this state belongs to is current. + */ + inline void delete_when_thread_running(PyGreenlet* to_del) + { + Py_INCREF(to_del); + this->deleteme.push_back(to_del); + } + + /** + * Set to std::clock_t(-1) to disable. + */ + inline static std::clock_t& clocks_used_doing_gc() + { + return ThreadState::_clocks_used_doing_gc; + } + + ~ThreadState() + { + if (!PyInterpreterState_Head()) { + // We shouldn't get here (our callers protect us) + // but if we do, all we can do is bail early. + return; + } + + // We should not have an "origin" greenlet; that only exists + // for the temporary time during a switch, which should not + // be in progress as the thread dies. + //assert(!this->switching_state.origin); + + this->tracefunc.CLEAR(); + + // Forcibly GC as much as we can. + this->clear_deleteme_list(true); + + // The pending call did this. + assert(this->main_greenlet->thread_state() == nullptr); + + // If the main greenlet is the current greenlet, + // then we "fell off the end" and the thread died. + // It's possible that there is some other greenlet that + // switched to us, leaving a reference to the main greenlet + // on the stack, somewhere uncollectible. Try to detect that. + if (this->current_greenlet == this->main_greenlet && this->current_greenlet) { + assert(this->current_greenlet->is_currently_running_in_some_thread()); + // Drop one reference we hold. + this->current_greenlet.CLEAR(); + assert(!this->current_greenlet); + // Only our reference to the main greenlet should be left, + // But hold onto the pointer in case we need to do extra cleanup. + PyGreenlet* old_main_greenlet = this->main_greenlet.borrow(); + Py_ssize_t cnt = this->main_greenlet.REFCNT(); + this->main_greenlet.CLEAR(); + if (ThreadState::_clocks_used_doing_gc != std::clock_t(-1) + && cnt == 2 && Py_REFCNT(old_main_greenlet) == 1) { + // Highly likely that the reference is somewhere on + // the stack, not reachable by GC. Verify. + // XXX: This is O(n) in the total number of objects. + // TODO: Add a way to disable this at runtime, and + // another way to report on it. + std::clock_t begin = std::clock(); + NewReference gc(PyImport_ImportModule("gc")); + if (gc) { + OwnedObject get_referrers = gc.PyRequireAttr(ThreadState::get_referrers_name); + OwnedList refs(get_referrers.PyCall(old_main_greenlet)); + if (refs && refs.empty()) { + assert(refs.REFCNT() == 1); + // We found nothing! So we left a dangling + // reference: Probably the last thing some + // other greenlet did was call + // 'getcurrent().parent.switch()' to switch + // back to us. Clean it up. This will be the + // case on CPython 3.7 and newer, as they use + // an internal calling conversion that avoids + // creating method objects and storing them on + // the stack. + Py_DECREF(old_main_greenlet); + } + else if (refs + && refs.size() == 1 + && PyCFunction_Check(refs.at(0)) + && Py_REFCNT(refs.at(0)) == 2) { + assert(refs.REFCNT() == 1); + // Ok, we found a C method that refers to the + // main greenlet, and its only referenced + // twice, once in the list we just created, + // once from...somewhere else. If we can't + // find where else, then this is a leak. + // This happens in older versions of CPython + // that create a bound method object somewhere + // on the stack that we'll never get back to. + if (PyCFunction_GetFunction(refs.at(0).borrow()) == (PyCFunction)green_switch) { + BorrowedObject function_w = refs.at(0); + refs.clear(); // destroy the reference + // from the list. + // back to one reference. Can *it* be + // found? + assert(function_w.REFCNT() == 1); + refs = get_referrers.PyCall(function_w); + if (refs && refs.empty()) { + // Nope, it can't be found so it won't + // ever be GC'd. Drop it. + Py_CLEAR(function_w); + } + } + } + std::clock_t end = std::clock(); + ThreadState::_clocks_used_doing_gc += (end - begin); + } + } + } + + // We need to make sure this greenlet appears to be dead, + // because otherwise deallocing it would fail to raise an + // exception in it (the thread is dead) and put it back in our + // deleteme list. + if (this->current_greenlet) { + this->current_greenlet->murder_in_place(); + this->current_greenlet.CLEAR(); + } + + if (this->main_greenlet) { + // Couldn't have been the main greenlet that was running + // when the thread exited (because we already cleared this + // pointer if it was). This shouldn't be possible? + + // If the main greenlet was current when the thread died (it + // should be, right?) then we cleared its self pointer above + // when we cleared the current greenlet's main greenlet pointer. + // assert(this->main_greenlet->main_greenlet == this->main_greenlet + // || !this->main_greenlet->main_greenlet); + // // self reference, probably gone + // this->main_greenlet->main_greenlet.CLEAR(); + + // This will actually go away when the ivar is destructed. + this->main_greenlet.CLEAR(); + } + + if (PyErr_Occurred()) { + PyErr_WriteUnraisable(NULL); + PyErr_Clear(); + } + + } + +}; + +ImmortalString ThreadState::get_referrers_name(nullptr); +PythonAllocator ThreadState::allocator; +std::clock_t ThreadState::_clocks_used_doing_gc(0); + + + + + +}; // namespace greenlet + +#endif -- cgit v1.2.3