aboutsummaryrefslogtreecommitdiff
path: root/uploader/dbinsert.py
diff options
context:
space:
mode:
Diffstat (limited to 'uploader/dbinsert.py')
-rw-r--r--uploader/dbinsert.py406
1 files changed, 0 insertions, 406 deletions
diff --git a/uploader/dbinsert.py b/uploader/dbinsert.py
deleted file mode 100644
index 2116031..0000000
--- a/uploader/dbinsert.py
+++ /dev/null
@@ -1,406 +0,0 @@
-"Handle inserting data into the database"
-import os
-import json
-from typing import Union
-from functools import reduce
-from datetime import datetime
-
-from redis import Redis
-from MySQLdb.cursors import DictCursor
-from flask import (
- flash, request, url_for, Blueprint, redirect, render_template,
- current_app as app)
-
-from uploader.authorisation import require_login
-from uploader.population.models import populations_by_species
-from uploader.db_utils import with_db_connection, database_connection
-from uploader.species.models import species_by_id, all_species as species
-
-from . import jobs
-
-dbinsertbp = Blueprint("dbinsert", __name__)
-
-def render_error(error_msg):
- "Render the generic error page"
- return render_template("dbupdate_error.html", error_message=error_msg), 400
-
-def make_menu_items_grouper(grouping_fn=lambda item: item):
- "Build function to be used to group menu items."
- def __grouper__(acc, row):
- grouping = grouping_fn(row[2])
- row_values = (row[0].strip(), row[1].strip())
- if acc.get(grouping) is None:
- return {**acc, grouping: (row_values,)}
- return {**acc, grouping: (acc[grouping] + (row_values,))}
- return __grouper__
-
-def genechips():
- "Retrieve the genechip information from the database"
- def __organise_by_species__(acc, chip):
- speciesid = chip["SpeciesId"]
- if acc.get(speciesid) is None:
- return {**acc, speciesid: (chip,)}
- return {**acc, speciesid: acc[speciesid] + (chip,)}
-
- with database_connection(app.config["SQL_URI"]) as conn:
- with conn.cursor(cursorclass=DictCursor) as cursor:
- cursor.execute("SELECT * FROM GeneChip ORDER BY GeneChipName ASC")
- return reduce(__organise_by_species__, cursor.fetchall(), {})
-
- return {}
-
-def platform_by_id(genechipid:int) -> Union[dict, None]:
- "Retrieve the gene platform by id"
- with database_connection(app.config["SQL_URI"]) as conn:
- with conn.cursor(cursorclass=DictCursor) as cursor:
- cursor.execute(
- "SELECT * FROM GeneChip WHERE GeneChipId=%s",
- (genechipid,))
- return cursor.fetchone()
-
-def studies_by_species_and_platform(speciesid:int, genechipid:int) -> tuple:
- "Retrieve the studies by the related species and gene platform"
- with database_connection(app.config["SQL_URI"]) as conn:
- with conn.cursor(cursorclass=DictCursor) as cursor:
- query = (
- "SELECT Species.SpeciesId, ProbeFreeze.* "
- "FROM Species INNER JOIN InbredSet "
- "ON Species.SpeciesId=InbredSet.SpeciesId "
- "INNER JOIN ProbeFreeze "
- "ON InbredSet.InbredSetId=ProbeFreeze.InbredSetId "
- "WHERE Species.SpeciesId = %s "
- "AND ProbeFreeze.ChipId = %s")
- cursor.execute(query, (speciesid, genechipid))
- return tuple(cursor.fetchall())
-
- return tuple()
-
-def organise_groups_by_family(acc:dict, group:dict) -> dict:
- "Organise the group (InbredSet) information by the group field"
- family = group["Family"]
- if acc.get(family):
- return {**acc, family: acc[family] + (group,)}
- return {**acc, family: (group,)}
-
-def tissues() -> tuple:
- "Retrieve type (Tissue) information from the database."
- with database_connection(app.config["SQL_URI"]) as conn:
- with conn.cursor(cursorclass=DictCursor) as cursor:
- cursor.execute("SELECT * FROM Tissue ORDER BY Name")
- return tuple(cursor.fetchall())
-
- return tuple()
-
-@dbinsertbp.route("/platform", methods=["POST"])
-@require_login
-def select_platform():
- "Select the platform (GeneChipId) used for the data."
- job_id = request.form["job_id"]
- with (Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn,
- database_connection(app.config["SQL_URI"]) as conn):
- job = jobs.job(rconn, jobs.jobsnamespace(), job_id)
- if job:
- filename = job["filename"]
- filepath = f"{app.config['UPLOAD_FOLDER']}/{filename}"
- if os.path.exists(filepath):
- default_species = 1
- gchips = genechips()
- return render_template(
- "select_platform.html", filename=filename,
- filetype=job["filetype"], totallines=int(job["currentline"]),
- default_species=default_species, species=species(conn),
- genechips=gchips[default_species],
- genechips_data=json.dumps(gchips))
- return render_error(f"File '{filename}' no longer exists.")
- return render_error(f"Job '{job_id}' no longer exists.")
- return render_error("Unknown error")
-
-@dbinsertbp.route("/study", methods=["POST"])
-@require_login
-def select_study():
- "View to select/create the study (ProbeFreeze) associated with the data."
- form = request.form
- try:
- assert form.get("filename"), "filename"
- assert form.get("filetype"), "filetype"
- assert form.get("species"), "species"
- assert form.get("genechipid"), "platform"
-
- speciesid = form["species"]
- genechipid = form["genechipid"]
-
- the_studies = studies_by_species_and_platform(speciesid, genechipid)
- the_groups = reduce(
- organise_groups_by_family,
- with_db_connection(
- lambda conn: populations_by_species(conn, speciesid)),
- {})
- return render_template(
- "select_study.html", filename=form["filename"],
- filetype=form["filetype"], totallines=form["totallines"],
- species=speciesid, genechipid=genechipid, studies=the_studies,
- groups=the_groups, tissues = tissues(),
- selected_group=int(form.get("inbredsetid", -13)),
- selected_tissue=int(form.get("tissueid", -13)))
- except AssertionError as aserr:
- return render_error(f"Missing data: {aserr.args[0]}")
-
-@dbinsertbp.route("/create-study", methods=["POST"])
-@require_login
-def create_study():
- "Create a new study (ProbeFreeze)."
- form = request.form
- try:
- assert form.get("filename"), "filename"
- assert form.get("filetype"), "filetype"
- assert form.get("species"), "species"
- assert form.get("genechipid"), "platform"
- assert form.get("studyname"), "study name"
- assert form.get("inbredsetid"), "group"
- assert form.get("tissueid"), "type/tissue"
-
- with database_connection(app.config["SQL_URI"]) as conn:
- with conn.cursor(cursorclass=DictCursor) as cursor:
- values = (
- form["genechipid"],
- form["tissueid"],
- form["studyname"],
- form.get("studyfullname", ""),
- form.get("studyshortname", ""),
- datetime.now().date().strftime("%Y-%m-%d"),
- form["inbredsetid"])
- query = (
- "INSERT INTO ProbeFreeze("
- "ChipId, TissueId, Name, FullName, ShortName, CreateTime, "
- "InbredSetId"
- ") VALUES (%s, %s, %s, %s, %s, %s, %s)")
- cursor.execute(query, values)
- new_studyid = cursor.lastrowid
- cursor.execute(
- "UPDATE ProbeFreeze SET ProbeFreezeId=%s WHERE Id=%s",
- (new_studyid, new_studyid))
- flash("Study created successfully", "alert-success")
- return render_template(
- "continue_from_create_study.html",
- filename=form["filename"], filetype=form["filetype"],
- totallines=form["totallines"], species=form["species"],
- genechipid=form["genechipid"], studyid=new_studyid)
- except AssertionError as aserr:
- flash(f"Missing data: {aserr.args[0]}", "alert-error")
- return redirect(url_for("dbinsert.select_study"), code=307)
-
-def datasets_by_study(studyid:int) -> tuple:
- "Retrieve datasets associated with a study with the ID `studyid`."
- with database_connection(app.config["SQL_URI"]) as conn:
- with conn.cursor(cursorclass=DictCursor) as cursor:
- query = "SELECT * FROM ProbeSetFreeze WHERE ProbeFreezeId=%s"
- cursor.execute(query, (studyid,))
- return tuple(cursor.fetchall())
-
- return tuple()
-
-def averaging_methods() -> tuple:
- "Retrieve averaging methods from database"
- with database_connection(app.config["SQL_URI"]) as conn:
- with conn.cursor(cursorclass=DictCursor) as cursor:
- cursor.execute("SELECT * FROM AvgMethod")
- return tuple(cursor.fetchall())
-
- return tuple()
-
-def dataset_datascales() -> tuple:
- "Retrieve datascales from database"
- with database_connection(app.config["SQL_URI"]) as conn:
- with conn.cursor() as cursor:
- cursor.execute(
- 'SELECT DISTINCT DataScale FROM ProbeSetFreeze '
- 'WHERE DataScale IS NOT NULL AND DataScale != ""')
- return tuple(
- item for item in
- (res[0].strip() for res in cursor.fetchall())
- if (item is not None and item != ""))
-
- return tuple()
-
-@dbinsertbp.route("/dataset", methods=["POST"])
-@require_login
-def select_dataset():
- "Select the dataset to add the file contents against"
- form = request.form
- try:
- assert form.get("filename"), "filename"
- assert form.get("filetype"), "filetype"
- assert form.get("species"), "species"
- assert form.get("genechipid"), "platform"
- assert form.get("studyid"), "study"
-
- studyid = form["studyid"]
- datasets = datasets_by_study(studyid)
- return render_template(
- "select_dataset.html", **{**form, "studyid": studyid},
- datasets=datasets, avgmethods=averaging_methods(),
- datascales=dataset_datascales())
- except AssertionError as aserr:
- return render_error(f"Missing data: {aserr.args[0]}")
-
-@dbinsertbp.route("/create-dataset", methods=["POST"])
-@require_login
-def create_dataset():
- "Select the dataset to add the file contents against"
- form = request.form
- try:
- assert form.get("filename"), "filename"
- assert form.get("filetype"), "filetype"
- assert form.get("species"), "species"
- assert form.get("genechipid"), "platform"
- assert form.get("studyid"), "study"
- assert form.get("avgid"), "averaging method"
- assert form.get("datasetname2"), "Dataset Name 2"
- assert form.get("datasetfullname"), "Dataset Full Name"
- assert form.get("datasetshortname"), "Dataset Short Name"
- assert form.get("datasetpublic"), "Dataset public specification"
- assert form.get("datasetconfidentiality"), "Dataset confidentiality"
- assert form.get("datasetdatascale"), "Dataset Datascale"
-
- with database_connection(app.config["SQL_URI"]) as conn:
- with conn.cursor(cursorclass=DictCursor) as cursor:
- datasetname = form["datasetname"]
- cursor.execute("SELECT * FROM ProbeSetFreeze WHERE Name=%s",
- (datasetname,))
- results = cursor.fetchall()
- if bool(results):
- flash("A dataset with that name already exists.",
- "alert-error")
- return redirect(url_for("dbinsert.select_dataset"), code=307)
- values = (
- form["studyid"], form["avgid"],
- datasetname, form["datasetname2"],
- form["datasetfullname"], form["datasetshortname"],
- datetime.now().date().strftime("%Y-%m-%d"),
- form["datasetpublic"], form["datasetconfidentiality"],
- "williamslab", form["datasetdatascale"])
- query = (
- "INSERT INTO ProbeSetFreeze("
- "ProbeFreezeId, AvgID, Name, Name2, FullName, "
- "ShortName, CreateTime, OrderList, public, "
- "confidentiality, AuthorisedUsers, DataScale) "
- "VALUES"
- "(%s, %s, %s, %s, %s, %s, %s, NULL, %s, %s, %s, %s)")
- cursor.execute(query, values)
- new_datasetid = cursor.lastrowid
- return render_template(
- "continue_from_create_dataset.html",
- filename=form["filename"], filetype=form["filetype"],
- species=form["species"], genechipid=form["genechipid"],
- studyid=form["studyid"], datasetid=new_datasetid,
- totallines=form["totallines"])
- except AssertionError as aserr:
- flash(f"Missing data {aserr.args[0]}", "alert-error")
- return redirect(url_for("dbinsert.select_dataset"), code=307)
-
-def study_by_id(studyid:int) -> Union[dict, None]:
- "Get a study by its Id"
- with database_connection(app.config["SQL_URI"]) as conn:
- with conn.cursor(cursorclass=DictCursor) as cursor:
- cursor.execute(
- "SELECT * FROM ProbeFreeze WHERE Id=%s",
- (studyid,))
- return cursor.fetchone()
-
-def dataset_by_id(datasetid:int) -> Union[dict, None]:
- "Retrieve a dataset by its id"
- with database_connection(app.config["SQL_URI"]) as conn:
- with conn.cursor(cursorclass=DictCursor) as cursor:
- cursor.execute(
- ("SELECT AvgMethod.Name AS AvgMethodName, ProbeSetFreeze.* "
- "FROM ProbeSetFreeze INNER JOIN AvgMethod "
- "ON ProbeSetFreeze.AvgId=AvgMethod.AvgMethodId "
- "WHERE ProbeSetFreeze.Id=%s"),
- (datasetid,))
- return cursor.fetchone()
-
-def selected_keys(original: dict, keys: tuple) -> dict:
- "Return a new dict from the `original` dict with only `keys` present."
- return {key: value for key,value in original.items() if key in keys}
-
-@dbinsertbp.route("/final-confirmation", methods=["POST"])
-@require_login
-def final_confirmation():
- "Preview the data before triggering entry into the database"
- form = request.form
- try:
- assert form.get("filename"), "filename"
- assert form.get("filetype"), "filetype"
- assert form.get("species"), "species"
- assert form.get("genechipid"), "platform"
- assert form.get("studyid"), "study"
- assert form.get("datasetid"), "dataset"
-
- speciesid = form["species"]
- genechipid = form["genechipid"]
- studyid = form["studyid"]
- datasetid=form["datasetid"]
- return render_template(
- "final_confirmation.html", filename=form["filename"],
- filetype=form["filetype"], totallines=form["totallines"],
- species=speciesid, genechipid=genechipid, studyid=studyid,
- datasetid=datasetid, the_species=selected_keys(
- with_db_connection(lambda conn: species_by_id(conn, speciesid)),
- ("SpeciesName", "Name", "MenuName")),
- platform=selected_keys(
- platform_by_id(genechipid),
- ("GeneChipName", "Name", "GeoPlatform", "Title", "GO_tree_value")),
- study=selected_keys(
- study_by_id(studyid), ("Name", "FullName", "ShortName")),
- dataset=selected_keys(
- dataset_by_id(datasetid),
- ("AvgMethodName", "Name", "Name2", "FullName", "ShortName",
- "DataScale")))
- except AssertionError as aserr:
- return render_error(f"Missing data: {aserr.args[0]}")
-
-@dbinsertbp.route("/insert-data", methods=["POST"])
-@require_login
-def insert_data():
- "Trigger data insertion"
- form = request.form
- try:
- assert form.get("filename"), "filename"
- assert form.get("filetype"), "filetype"
- assert form.get("species"), "species"
- assert form.get("genechipid"), "platform"
- assert form.get("studyid"), "study"
- assert form.get("datasetid"), "dataset"
-
- filename = form["filename"]
- filepath = f"{app.config['UPLOAD_FOLDER']}/{filename}"
- redisurl = app.config["REDIS_URL"]
- if os.path.exists(filepath):
- with Redis.from_url(redisurl, decode_responses=True) as rconn:
- job = jobs.launch_job(
- jobs.data_insertion_job(
- rconn, filepath, form["filetype"], form["totallines"],
- form["species"], form["genechipid"], form["datasetid"],
- app.config["SQL_URI"], redisurl,
- app.config["JOBS_TTL_SECONDS"]),
- redisurl, f"{app.config['UPLOAD_FOLDER']}/job_errors")
-
- return redirect(url_for("dbinsert.insert_status", job_id=job["jobid"]))
- return render_error(f"File '{filename}' no longer exists.")
- except AssertionError as aserr:
- return render_error(f"Missing data: {aserr.args[0]}")
-
-@dbinsertbp.route("/status/<job_id>", methods=["GET"])
-def insert_status(job_id: str):
- "Retrieve status of data insertion."
- with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn:
- job = jobs.job(rconn, jobs.jobsnamespace(), job_id)
-
- if job:
- job_status = job["status"]
- if job_status == "success":
- return render_template("insert_success.html", job=job)
- if job["status"] == "error":
- return render_template("insert_error.html", job=job)
- return render_template("insert_progress.html", job=job)
- return render_template("no_such_job.html", job_id=job_id), 400