diff options
Diffstat (limited to 'uploader/dbinsert.py')
-rw-r--r-- | uploader/dbinsert.py | 406 |
1 files changed, 0 insertions, 406 deletions
diff --git a/uploader/dbinsert.py b/uploader/dbinsert.py deleted file mode 100644 index 2116031..0000000 --- a/uploader/dbinsert.py +++ /dev/null @@ -1,406 +0,0 @@ -"Handle inserting data into the database" -import os -import json -from typing import Union -from functools import reduce -from datetime import datetime - -from redis import Redis -from MySQLdb.cursors import DictCursor -from flask import ( - flash, request, url_for, Blueprint, redirect, render_template, - current_app as app) - -from uploader.authorisation import require_login -from uploader.population.models import populations_by_species -from uploader.db_utils import with_db_connection, database_connection -from uploader.species.models import species_by_id, all_species as species - -from . import jobs - -dbinsertbp = Blueprint("dbinsert", __name__) - -def render_error(error_msg): - "Render the generic error page" - return render_template("dbupdate_error.html", error_message=error_msg), 400 - -def make_menu_items_grouper(grouping_fn=lambda item: item): - "Build function to be used to group menu items." - def __grouper__(acc, row): - grouping = grouping_fn(row[2]) - row_values = (row[0].strip(), row[1].strip()) - if acc.get(grouping) is None: - return {**acc, grouping: (row_values,)} - return {**acc, grouping: (acc[grouping] + (row_values,))} - return __grouper__ - -def genechips(): - "Retrieve the genechip information from the database" - def __organise_by_species__(acc, chip): - speciesid = chip["SpeciesId"] - if acc.get(speciesid) is None: - return {**acc, speciesid: (chip,)} - return {**acc, speciesid: acc[speciesid] + (chip,)} - - with database_connection(app.config["SQL_URI"]) as conn: - with conn.cursor(cursorclass=DictCursor) as cursor: - cursor.execute("SELECT * FROM GeneChip ORDER BY GeneChipName ASC") - return reduce(__organise_by_species__, cursor.fetchall(), {}) - - return {} - -def platform_by_id(genechipid:int) -> Union[dict, None]: - "Retrieve the gene platform by id" - with database_connection(app.config["SQL_URI"]) as conn: - with conn.cursor(cursorclass=DictCursor) as cursor: - cursor.execute( - "SELECT * FROM GeneChip WHERE GeneChipId=%s", - (genechipid,)) - return cursor.fetchone() - -def studies_by_species_and_platform(speciesid:int, genechipid:int) -> tuple: - "Retrieve the studies by the related species and gene platform" - with database_connection(app.config["SQL_URI"]) as conn: - with conn.cursor(cursorclass=DictCursor) as cursor: - query = ( - "SELECT Species.SpeciesId, ProbeFreeze.* " - "FROM Species INNER JOIN InbredSet " - "ON Species.SpeciesId=InbredSet.SpeciesId " - "INNER JOIN ProbeFreeze " - "ON InbredSet.InbredSetId=ProbeFreeze.InbredSetId " - "WHERE Species.SpeciesId = %s " - "AND ProbeFreeze.ChipId = %s") - cursor.execute(query, (speciesid, genechipid)) - return tuple(cursor.fetchall()) - - return tuple() - -def organise_groups_by_family(acc:dict, group:dict) -> dict: - "Organise the group (InbredSet) information by the group field" - family = group["Family"] - if acc.get(family): - return {**acc, family: acc[family] + (group,)} - return {**acc, family: (group,)} - -def tissues() -> tuple: - "Retrieve type (Tissue) information from the database." - with database_connection(app.config["SQL_URI"]) as conn: - with conn.cursor(cursorclass=DictCursor) as cursor: - cursor.execute("SELECT * FROM Tissue ORDER BY Name") - return tuple(cursor.fetchall()) - - return tuple() - -@dbinsertbp.route("/platform", methods=["POST"]) -@require_login -def select_platform(): - "Select the platform (GeneChipId) used for the data." - job_id = request.form["job_id"] - with (Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn, - database_connection(app.config["SQL_URI"]) as conn): - job = jobs.job(rconn, jobs.jobsnamespace(), job_id) - if job: - filename = job["filename"] - filepath = f"{app.config['UPLOAD_FOLDER']}/{filename}" - if os.path.exists(filepath): - default_species = 1 - gchips = genechips() - return render_template( - "select_platform.html", filename=filename, - filetype=job["filetype"], totallines=int(job["currentline"]), - default_species=default_species, species=species(conn), - genechips=gchips[default_species], - genechips_data=json.dumps(gchips)) - return render_error(f"File '{filename}' no longer exists.") - return render_error(f"Job '{job_id}' no longer exists.") - return render_error("Unknown error") - -@dbinsertbp.route("/study", methods=["POST"]) -@require_login -def select_study(): - "View to select/create the study (ProbeFreeze) associated with the data." - form = request.form - try: - assert form.get("filename"), "filename" - assert form.get("filetype"), "filetype" - assert form.get("species"), "species" - assert form.get("genechipid"), "platform" - - speciesid = form["species"] - genechipid = form["genechipid"] - - the_studies = studies_by_species_and_platform(speciesid, genechipid) - the_groups = reduce( - organise_groups_by_family, - with_db_connection( - lambda conn: populations_by_species(conn, speciesid)), - {}) - return render_template( - "select_study.html", filename=form["filename"], - filetype=form["filetype"], totallines=form["totallines"], - species=speciesid, genechipid=genechipid, studies=the_studies, - groups=the_groups, tissues = tissues(), - selected_group=int(form.get("inbredsetid", -13)), - selected_tissue=int(form.get("tissueid", -13))) - except AssertionError as aserr: - return render_error(f"Missing data: {aserr.args[0]}") - -@dbinsertbp.route("/create-study", methods=["POST"]) -@require_login -def create_study(): - "Create a new study (ProbeFreeze)." - form = request.form - try: - assert form.get("filename"), "filename" - assert form.get("filetype"), "filetype" - assert form.get("species"), "species" - assert form.get("genechipid"), "platform" - assert form.get("studyname"), "study name" - assert form.get("inbredsetid"), "group" - assert form.get("tissueid"), "type/tissue" - - with database_connection(app.config["SQL_URI"]) as conn: - with conn.cursor(cursorclass=DictCursor) as cursor: - values = ( - form["genechipid"], - form["tissueid"], - form["studyname"], - form.get("studyfullname", ""), - form.get("studyshortname", ""), - datetime.now().date().strftime("%Y-%m-%d"), - form["inbredsetid"]) - query = ( - "INSERT INTO ProbeFreeze(" - "ChipId, TissueId, Name, FullName, ShortName, CreateTime, " - "InbredSetId" - ") VALUES (%s, %s, %s, %s, %s, %s, %s)") - cursor.execute(query, values) - new_studyid = cursor.lastrowid - cursor.execute( - "UPDATE ProbeFreeze SET ProbeFreezeId=%s WHERE Id=%s", - (new_studyid, new_studyid)) - flash("Study created successfully", "alert-success") - return render_template( - "continue_from_create_study.html", - filename=form["filename"], filetype=form["filetype"], - totallines=form["totallines"], species=form["species"], - genechipid=form["genechipid"], studyid=new_studyid) - except AssertionError as aserr: - flash(f"Missing data: {aserr.args[0]}", "alert-error") - return redirect(url_for("dbinsert.select_study"), code=307) - -def datasets_by_study(studyid:int) -> tuple: - "Retrieve datasets associated with a study with the ID `studyid`." - with database_connection(app.config["SQL_URI"]) as conn: - with conn.cursor(cursorclass=DictCursor) as cursor: - query = "SELECT * FROM ProbeSetFreeze WHERE ProbeFreezeId=%s" - cursor.execute(query, (studyid,)) - return tuple(cursor.fetchall()) - - return tuple() - -def averaging_methods() -> tuple: - "Retrieve averaging methods from database" - with database_connection(app.config["SQL_URI"]) as conn: - with conn.cursor(cursorclass=DictCursor) as cursor: - cursor.execute("SELECT * FROM AvgMethod") - return tuple(cursor.fetchall()) - - return tuple() - -def dataset_datascales() -> tuple: - "Retrieve datascales from database" - with database_connection(app.config["SQL_URI"]) as conn: - with conn.cursor() as cursor: - cursor.execute( - 'SELECT DISTINCT DataScale FROM ProbeSetFreeze ' - 'WHERE DataScale IS NOT NULL AND DataScale != ""') - return tuple( - item for item in - (res[0].strip() for res in cursor.fetchall()) - if (item is not None and item != "")) - - return tuple() - -@dbinsertbp.route("/dataset", methods=["POST"]) -@require_login -def select_dataset(): - "Select the dataset to add the file contents against" - form = request.form - try: - assert form.get("filename"), "filename" - assert form.get("filetype"), "filetype" - assert form.get("species"), "species" - assert form.get("genechipid"), "platform" - assert form.get("studyid"), "study" - - studyid = form["studyid"] - datasets = datasets_by_study(studyid) - return render_template( - "select_dataset.html", **{**form, "studyid": studyid}, - datasets=datasets, avgmethods=averaging_methods(), - datascales=dataset_datascales()) - except AssertionError as aserr: - return render_error(f"Missing data: {aserr.args[0]}") - -@dbinsertbp.route("/create-dataset", methods=["POST"]) -@require_login -def create_dataset(): - "Select the dataset to add the file contents against" - form = request.form - try: - assert form.get("filename"), "filename" - assert form.get("filetype"), "filetype" - assert form.get("species"), "species" - assert form.get("genechipid"), "platform" - assert form.get("studyid"), "study" - assert form.get("avgid"), "averaging method" - assert form.get("datasetname2"), "Dataset Name 2" - assert form.get("datasetfullname"), "Dataset Full Name" - assert form.get("datasetshortname"), "Dataset Short Name" - assert form.get("datasetpublic"), "Dataset public specification" - assert form.get("datasetconfidentiality"), "Dataset confidentiality" - assert form.get("datasetdatascale"), "Dataset Datascale" - - with database_connection(app.config["SQL_URI"]) as conn: - with conn.cursor(cursorclass=DictCursor) as cursor: - datasetname = form["datasetname"] - cursor.execute("SELECT * FROM ProbeSetFreeze WHERE Name=%s", - (datasetname,)) - results = cursor.fetchall() - if bool(results): - flash("A dataset with that name already exists.", - "alert-error") - return redirect(url_for("dbinsert.select_dataset"), code=307) - values = ( - form["studyid"], form["avgid"], - datasetname, form["datasetname2"], - form["datasetfullname"], form["datasetshortname"], - datetime.now().date().strftime("%Y-%m-%d"), - form["datasetpublic"], form["datasetconfidentiality"], - "williamslab", form["datasetdatascale"]) - query = ( - "INSERT INTO ProbeSetFreeze(" - "ProbeFreezeId, AvgID, Name, Name2, FullName, " - "ShortName, CreateTime, OrderList, public, " - "confidentiality, AuthorisedUsers, DataScale) " - "VALUES" - "(%s, %s, %s, %s, %s, %s, %s, NULL, %s, %s, %s, %s)") - cursor.execute(query, values) - new_datasetid = cursor.lastrowid - return render_template( - "continue_from_create_dataset.html", - filename=form["filename"], filetype=form["filetype"], - species=form["species"], genechipid=form["genechipid"], - studyid=form["studyid"], datasetid=new_datasetid, - totallines=form["totallines"]) - except AssertionError as aserr: - flash(f"Missing data {aserr.args[0]}", "alert-error") - return redirect(url_for("dbinsert.select_dataset"), code=307) - -def study_by_id(studyid:int) -> Union[dict, None]: - "Get a study by its Id" - with database_connection(app.config["SQL_URI"]) as conn: - with conn.cursor(cursorclass=DictCursor) as cursor: - cursor.execute( - "SELECT * FROM ProbeFreeze WHERE Id=%s", - (studyid,)) - return cursor.fetchone() - -def dataset_by_id(datasetid:int) -> Union[dict, None]: - "Retrieve a dataset by its id" - with database_connection(app.config["SQL_URI"]) as conn: - with conn.cursor(cursorclass=DictCursor) as cursor: - cursor.execute( - ("SELECT AvgMethod.Name AS AvgMethodName, ProbeSetFreeze.* " - "FROM ProbeSetFreeze INNER JOIN AvgMethod " - "ON ProbeSetFreeze.AvgId=AvgMethod.AvgMethodId " - "WHERE ProbeSetFreeze.Id=%s"), - (datasetid,)) - return cursor.fetchone() - -def selected_keys(original: dict, keys: tuple) -> dict: - "Return a new dict from the `original` dict with only `keys` present." - return {key: value for key,value in original.items() if key in keys} - -@dbinsertbp.route("/final-confirmation", methods=["POST"]) -@require_login -def final_confirmation(): - "Preview the data before triggering entry into the database" - form = request.form - try: - assert form.get("filename"), "filename" - assert form.get("filetype"), "filetype" - assert form.get("species"), "species" - assert form.get("genechipid"), "platform" - assert form.get("studyid"), "study" - assert form.get("datasetid"), "dataset" - - speciesid = form["species"] - genechipid = form["genechipid"] - studyid = form["studyid"] - datasetid=form["datasetid"] - return render_template( - "final_confirmation.html", filename=form["filename"], - filetype=form["filetype"], totallines=form["totallines"], - species=speciesid, genechipid=genechipid, studyid=studyid, - datasetid=datasetid, the_species=selected_keys( - with_db_connection(lambda conn: species_by_id(conn, speciesid)), - ("SpeciesName", "Name", "MenuName")), - platform=selected_keys( - platform_by_id(genechipid), - ("GeneChipName", "Name", "GeoPlatform", "Title", "GO_tree_value")), - study=selected_keys( - study_by_id(studyid), ("Name", "FullName", "ShortName")), - dataset=selected_keys( - dataset_by_id(datasetid), - ("AvgMethodName", "Name", "Name2", "FullName", "ShortName", - "DataScale"))) - except AssertionError as aserr: - return render_error(f"Missing data: {aserr.args[0]}") - -@dbinsertbp.route("/insert-data", methods=["POST"]) -@require_login -def insert_data(): - "Trigger data insertion" - form = request.form - try: - assert form.get("filename"), "filename" - assert form.get("filetype"), "filetype" - assert form.get("species"), "species" - assert form.get("genechipid"), "platform" - assert form.get("studyid"), "study" - assert form.get("datasetid"), "dataset" - - filename = form["filename"] - filepath = f"{app.config['UPLOAD_FOLDER']}/{filename}" - redisurl = app.config["REDIS_URL"] - if os.path.exists(filepath): - with Redis.from_url(redisurl, decode_responses=True) as rconn: - job = jobs.launch_job( - jobs.data_insertion_job( - rconn, filepath, form["filetype"], form["totallines"], - form["species"], form["genechipid"], form["datasetid"], - app.config["SQL_URI"], redisurl, - app.config["JOBS_TTL_SECONDS"]), - redisurl, f"{app.config['UPLOAD_FOLDER']}/job_errors") - - return redirect(url_for("dbinsert.insert_status", job_id=job["jobid"])) - return render_error(f"File '{filename}' no longer exists.") - except AssertionError as aserr: - return render_error(f"Missing data: {aserr.args[0]}") - -@dbinsertbp.route("/status/<job_id>", methods=["GET"]) -def insert_status(job_id: str): - "Retrieve status of data insertion." - with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn: - job = jobs.job(rconn, jobs.jobsnamespace(), job_id) - - if job: - job_status = job["status"] - if job_status == "success": - return render_template("insert_success.html", job=job) - if job["status"] == "error": - return render_template("insert_error.html", job=job) - return render_template("insert_progress.html", job=job) - return render_template("no_such_job.html", job_id=job_id), 400 |