From aa3d310aa257f0ef0a8636272883c3c4e6855a1c Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Sun, 3 Dec 2023 09:43:06 -0600 Subject: Moving files --- topics/python/profiling_python_code.gmi | 28 +++++ topics/python/pytest-pudb.gmi | 12 ++ .../running_pudb_from_external_environment.gmi | 30 +++++ topics/python/using-pdb-to-troubleshoot.gmi | 131 +++++++++++++++++++++ topics/python/using-pudb.gmi | 26 ++++ 5 files changed, 227 insertions(+) create mode 100644 topics/python/profiling_python_code.gmi create mode 100644 topics/python/pytest-pudb.gmi create mode 100644 topics/python/running_pudb_from_external_environment.gmi create mode 100644 topics/python/using-pdb-to-troubleshoot.gmi create mode 100644 topics/python/using-pudb.gmi (limited to 'topics/python') diff --git a/topics/python/profiling_python_code.gmi b/topics/python/profiling_python_code.gmi new file mode 100644 index 0000000..9052364 --- /dev/null +++ b/topics/python/profiling_python_code.gmi @@ -0,0 +1,28 @@ +# Profiling Python code + +As part of improving the system, there is need to identify and fix/improve the performance bottlenecks in the code. This document details examples of how one would run various profilers, for both GeneNetwork2 and GeneNetwork3 + +## GeneNetwork3 + +### cProfile + +Syntax: + +* env [various-env-vars] python3 -m cProfile the-script.py + +where + +* `[various-env-vars]` is a number of environment variables that might be needed for the running of the script, e.g. `SQL_URI` which is used to define how to connect to the database. +* `the-script.py` is the name of the python script to be run under the profiler + +The output can be redirected, e.g. + +* env [various-env-vars] python3 -m cProfile the-script.py 2>error.log 1>performance.log + +which should help will separating errors from the performance data, for easier analysis + + +## Tags + +* type: documentation +* keywords: profiling, python diff --git a/topics/python/pytest-pudb.gmi b/topics/python/pytest-pudb.gmi new file mode 100644 index 0000000..4a9b685 --- /dev/null +++ b/topics/python/pytest-pudb.gmi @@ -0,0 +1,12 @@ +# Using pudb with pytest + +``` +pytest --pdbcls pudb.debugger:Debugger --pdb --capture=no +``` + +=> https://documen.tician.de/pudb/starting.html?highlight=pytest#usage-with-pytest + +## Tags + +* type: documentation +* keywords: pudb, pytest diff --git a/topics/python/running_pudb_from_external_environment.gmi b/topics/python/running_pudb_from_external_environment.gmi new file mode 100644 index 0000000..a533e22 --- /dev/null +++ b/topics/python/running_pudb_from_external_environment.gmi @@ -0,0 +1,30 @@ +# Running pudb From an External Environment + +This documentation is now obsolete since pudb was added as a package in GNU Guix + +Currently (22nd December 2021), it is a tad difficult to get pudb running under the various guix environments without having to update a lot of stuff. This documents a workaround that seems to work for me (@fredm). + +## GeneNetwork3 + +1) Install pudb in a virtual environment: + +* $ python -m venv pudb_env + +2) Activate the environment and install pudb: + +* $ source pudb_env/bin/activate +* $ pip install pudb + +3) Run the GN3 container, exposing the new virtual environment with something like: + +* env GUIX_PACKAGE_PATH=~/genenetwork/guix-bioinformatics/ ~/.config/guix/current/bin/guix environment -C --network --expose="$HOME/genotype_files/"="${HOME}/genotype_files" --expose="${HOME}/genenetwork/pudb_env/"="${HOME}/pudb_env" --load=guix.scm + +4) Inside the container, run the debugger with something like: + +* env PYTHONPATH="${PYTHONPATH}:${HOME}/pudb_env/lib/python3.8/site-packages" ... python3 -m pudb script.py + +## Tags + +* type: documentation +* keywords: pudb +* status: obsolete diff --git a/topics/python/using-pdb-to-troubleshoot.gmi b/topics/python/using-pdb-to-troubleshoot.gmi new file mode 100644 index 0000000..294486f --- /dev/null +++ b/topics/python/using-pdb-to-troubleshoot.gmi @@ -0,0 +1,131 @@ +# Using PDB to Troubleshoot Python Code + +## Tags +* type: documentation +* keywords: debugging, python + +Historically, debugging GeneNetwork code has been a bit of a pain where you would have `print' and `logging' statements to help view offending code chunks. This is not efficient, and we can do better! One painful side-effect wrt logging worth mentioning is that our logs grow quite fast and we need to rotate them, atm manually: + +Here are examples of some logging that we do: + +``` +@app.route("/n/logout") +def logout(): + logger.debug("Logging out...") + UserSession().delete_session() + flash("You are now logged out. We hope you come back soon!") + response = make_response(redirect(url_for('index_page'))) + # Delete the cookie + response.set_cookie(UserSession.user_cookie_name, '', expires=0) + return response +``` + +``` +@app.route("/tmp/") def +tmp_page(img_path): logger.info("In tmp_page") +logger.info("img_path:", img_path) +logger.info(request.url) initial_start_vars = +request.form logger.info("initial_start_vars:", +initial_start_vars) imgfile = +open(GENERATED_IMAGE_DIR + img_path, 'rb') imgdata += imgfile.read() imgB64 = +base64.b64encode(imgdata) bytesarray = +array.array('B', imgB64) return +render_template("show_image.html", +img_base64=bytesarray) +``` + +Earlier this year, one of our members introduced us to pudb---a graphical based logging utility for Python. I have gravitated away from this because it adds yet another dependency in our toolchain; in addition to it being ncurses-based, lacking guarantees in how it behaves in different terminals. It also lacks support in different OS'es, thereby forcing end-users to SSH into one of our remote servers to troubleshoot. + +Python PDB ships with Python, and as such, works well in different setups. There are multiple ways of getting into a pdb session, the easiest being to set a `breakpoint()'. Assume we are trouble-shooting this function: + +``` +from typing import List + + +def avg(numbers: List) -> int: + return sum(numbers)/len(numbers) + + +print(avg([20, 21])) +``` + +This will fail for a list that contains non-integer value, say a list containing ["1", "2"]. The first step to troubleshoot, assuming we have no test would be to set a `breakpoint()' as such: + +``` +from typing import List + + +def avg(numbers: List) -> int: + breakpoint() + return sum(numbers)/len(numbers) + + +print(avg([20, "21"])) +``` + +Useful commands while you are in pdb that are useful: + +* "l ." --- show where you are in the context +* "n" --- go to the next line +* "s" --- step into a function +* "c" --- continues execution until a breakpoint is encountered +* "p" --- print a variable +* "pp" --- pretty print a variable + +When we step into our debug session, we can view all the variables in a local scope using: "locals()"; and the global scope using: "globals()". With this information, we can quickly work out where our problem is by just inspecting the variables we have at hand. + +Another cool trick/pattern when debugging is to tell pdb to jump to where the error occured in a try/except block using `import pdb; pdb.post_mortem()' like so: + +``` +from typing import List + + +def avg(numbers: List) -> int: + try: + return sum(numbers)/len(numbers) + except Exception: + import pdb; pdb.post_mortem() + + +print(avg([20, "21"])) +``` + +With regards to testing, pdb is also integrated with test-runners. To use pdb with pytest, simply run: + +,---- +| pytest --pdb +`---- + +## Running Flask Applications Under pdb + +To troubleshoot a Flask application (and any other application running via an +applicationsserver of sorts), you might need to start the application server under +the debugger, otherwise, you will get an error like: + +``` +BdbQuit +``` + +Ideally, you shouldn't need to, as the terminal where you started the +application server (Flask) should drop you into the debugger automatically. + +If you run the application under other application servers like gunicorn, then +you might need to increase the timeout period to prevent gunicorn from killing +the process, leading to the error above. Generally speaking, you **SHOULD NOT** +be running the debugger in production anyway, and therefore you should not need +to deal with the gunicorn issues. + +That said, you can start the Flask application under pdb with something like: + +``` +python3 -m pdb flask run [OPTIONAL-ARGUMENTS-TO-FLASK] +``` + + +## Useful Tutorials + +To learn more about pdb, you can check out: + +=> https://www.youtube.com/watch?v=0LPuG825eAk python debugger crash course: pdb/breakpoint +=> https://www.youtube.com/watch?v=s8Nx2frW4ps postmortem debugging in python diff --git a/topics/python/using-pudb.gmi b/topics/python/using-pudb.gmi new file mode 100644 index 0000000..7cac743 --- /dev/null +++ b/topics/python/using-pudb.gmi @@ -0,0 +1,26 @@ +# Using pudb + +## Setting Breakpoints + +You can add the following line anywhere in your code to start a pudb breakpoint: + +``` +import pudb; pudb.set_trace() +``` + +You can also press `b` in pudb to add a breakpoint with a keyboard shortcut. + +## pdb and pudb API similarity + +The standard pdb docs should work for pudb with one small exception. + +> At the programming language level, PuDB displays the same interface +> as Python’s built-in pdb module. Just replace pdb with pudb. (One +> exception: run is called runstatement.) + +=> https://docs.python.org/3/library/pdb.html?highlight=pdb#module-pdb + +## Tags + +* type: documentation +* keywords: pudb -- cgit v1.2.3