diff options
author | Frederick Muriuki Muriithi | 2024-09-09 16:39:50 -0500 |
---|---|---|
committer | Frederick Muriuki Muriithi | 2024-09-09 16:41:46 -0500 |
commit | 3c8ec9af68d593e7e5eef52aca3d001e22c8e0f7 (patch) | |
tree | 6ac71fb5d40d5ae8dc4b9e93b8d8b1b9510b29bc | |
parent | 9cd33ddac3d6848c5443962d66494635feadef51 (diff) | |
download | gn-uploader-3c8ec9af68d593e7e5eef52aca3d001e22c8e0f7.tar.gz |
Enable samples uploads.
-rw-r--r-- | scripts/insert_samples.py | 2 | ||||
-rw-r--r-- | uploader/samples/models.py | 85 | ||||
-rw-r--r-- | uploader/samples/views.py | 173 | ||||
-rw-r--r-- | uploader/templates/samples/list-samples.html | 5 | ||||
-rw-r--r-- | uploader/templates/samples/upload-failure.html | 37 | ||||
-rw-r--r-- | uploader/templates/samples/upload-progress.html | 31 | ||||
-rw-r--r-- | uploader/templates/samples/upload-samples.html | 155 | ||||
-rw-r--r-- | uploader/templates/samples/upload-success.html | 36 |
8 files changed, 408 insertions, 116 deletions
diff --git a/scripts/insert_samples.py b/scripts/insert_samples.py index d107c25..e3577b6 100644 --- a/scripts/insert_samples.py +++ b/scripts/insert_samples.py @@ -11,7 +11,7 @@ from uploader.db_utils import database_connection from uploader.check_connections import check_db, check_redis from uploader.species.models import species_by_id from uploader.population.models import population_by_id -from uploader.expression_data.samples import ( +from uploader.samples.models import ( save_samples_data, read_samples_file, cross_reference_samples) diff --git a/uploader/samples/models.py b/uploader/samples/models.py index 15e509e..d7d5384 100644 --- a/uploader/samples/models.py +++ b/uploader/samples/models.py @@ -1,7 +1,12 @@ """Functions for handling samples.""" +import csv +from typing import Iterator + import MySQLdb as mdb from MySQLdb.cursors import DictCursor +from functional_tools import take + def samples_by_species_and_population( conn: mdb.Connection, species_id: int, @@ -17,3 +22,83 @@ def samples_by_species_and_population( "AND iset.InbredSetId=%(population_id)s", {"species_id": species_id, "population_id": population_id}) return tuple(cursor.fetchall()) + + +def read_samples_file(filepath, separator: str, firstlineheading: bool, **kwargs) -> Iterator[dict]: + """Read the samples file.""" + with open(filepath, "r", encoding="utf-8") as inputfile: + reader = csv.DictReader( + inputfile, + fieldnames=( + None if firstlineheading + else ("Name", "Name2", "Symbol", "Alias")), + delimiter=separator, + quotechar=kwargs.get("quotechar", '"')) + for row in reader: + yield row + + +def save_samples_data(conn: mdb.Connection, + speciesid: int, + file_data: Iterator[dict]): + """Save the samples to DB.""" + data = ({**row, "SpeciesId": speciesid} for row in file_data) + total = 0 + with conn.cursor() as cursor: + while True: + batch = take(data, 5000) + if len(batch) == 0: + break + cursor.executemany( + "INSERT INTO Strain(Name, Name2, SpeciesId, Symbol, Alias) " + "VALUES(" + " %(Name)s, %(Name2)s, %(SpeciesId)s, %(Symbol)s, %(Alias)s" + ") ON DUPLICATE KEY UPDATE Name=Name", + batch) + total += len(batch) + print(f"\tSaved {total} samples total so far.") + + +def cross_reference_samples(conn: mdb.Connection, + species_id: int, + population_id: int, + strain_names: Iterator[str]): + """Link samples to their population.""" + with conn.cursor(cursorclass=DictCursor) as cursor: + cursor.execute( + "SELECT MAX(OrderId) AS loid FROM StrainXRef WHERE InbredSetId=%s", + (population_id,)) + last_order_id = (cursor.fetchone()["loid"] or 10) + total = 0 + while True: + batch = take(strain_names, 5000) + if len(batch) == 0: + break + params_str = ", ".join(["%s"] * len(batch)) + ## This query is slow -- investigate. + cursor.execute( + "SELECT s.Id FROM Strain AS s LEFT JOIN StrainXRef AS sx " + "ON s.Id = sx.StrainId WHERE s.SpeciesId=%s AND s.Name IN " + f"({params_str}) AND sx.StrainId IS NULL", + (species_id,) + tuple(batch)) + strain_ids = (sid["Id"] for sid in cursor.fetchall()) + params = tuple({ + "pop_id": population_id, + "strain_id": strain_id, + "order_id": last_order_id + (order_id * 10), + "mapping": "N", + "pedigree": None + } for order_id, strain_id in enumerate(strain_ids, start=1)) + cursor.executemany( + "INSERT INTO StrainXRef( " + " InbredSetId, StrainId, OrderId, Used_for_mapping, PedigreeStatus" + ")" + "VALUES (" + " %(pop_id)s, %(strain_id)s, %(order_id)s, %(mapping)s, " + " %(pedigree)s" + ")", + params) + last_order_id += (len(params) * 10) + total += len(batch) + print(f"\t{total} total samples cross-referenced to the population " + "so far.") diff --git a/uploader/samples/views.py b/uploader/samples/views.py index 6af90f4..6e3dc4b 100644 --- a/uploader/samples/views.py +++ b/uploader/samples/views.py @@ -1,7 +1,6 @@ """Code regarding samples""" import os import sys -import csv import uuid from pathlib import Path from typing import Iterator @@ -18,8 +17,6 @@ from flask import ( render_template, current_app as app) -from functional_tools import take - from uploader import jobs from uploader.files import save_file from uploader.datautils import order_by_family @@ -34,7 +31,8 @@ from uploader.species.models import (all_species, order_species_by_family) from uploader.population.models import(save_population, population_by_id, - populations_by_species) + populations_by_species, + population_by_species_and_id) from .models import samples_by_species_and_population @@ -123,86 +121,6 @@ def list_samples(species_id: int, population_id: int): activelink="list-samples") -def read_samples_file(filepath, separator: str, firstlineheading: bool, **kwargs) -> Iterator[dict]: - """Read the samples file.""" - with open(filepath, "r", encoding="utf-8") as inputfile: - reader = csv.DictReader( - inputfile, - fieldnames=( - None if firstlineheading - else ("Name", "Name2", "Symbol", "Alias")), - delimiter=separator, - quotechar=kwargs.get("quotechar", '"')) - for row in reader: - yield row - - -def save_samples_data(conn: mdb.Connection, - speciesid: int, - file_data: Iterator[dict]): - """Save the samples to DB.""" - data = ({**row, "SpeciesId": speciesid} for row in file_data) - total = 0 - with conn.cursor() as cursor: - while True: - batch = take(data, 5000) - if len(batch) == 0: - break - cursor.executemany( - "INSERT INTO Strain(Name, Name2, SpeciesId, Symbol, Alias) " - "VALUES(" - " %(Name)s, %(Name2)s, %(SpeciesId)s, %(Symbol)s, %(Alias)s" - ") ON DUPLICATE KEY UPDATE Name=Name", - batch) - total += len(batch) - print(f"\tSaved {total} samples total so far.") - - -def cross_reference_samples(conn: mdb.Connection, - species_id: int, - population_id: int, - strain_names: Iterator[str]): - """Link samples to their population.""" - with conn.cursor(cursorclass=DictCursor) as cursor: - cursor.execute( - "SELECT MAX(OrderId) AS loid FROM StrainXRef WHERE InbredSetId=%s", - (population_id,)) - last_order_id = (cursor.fetchone()["loid"] or 10) - total = 0 - while True: - batch = take(strain_names, 5000) - if len(batch) == 0: - break - params_str = ", ".join(["%s"] * len(batch)) - ## This query is slow -- investigate. - cursor.execute( - "SELECT s.Id FROM Strain AS s LEFT JOIN StrainXRef AS sx " - "ON s.Id = sx.StrainId WHERE s.SpeciesId=%s AND s.Name IN " - f"({params_str}) AND sx.StrainId IS NULL", - (species_id,) + tuple(batch)) - strain_ids = (sid["Id"] for sid in cursor.fetchall()) - params = tuple({ - "pop_id": population_id, - "strain_id": strain_id, - "order_id": last_order_id + (order_id * 10), - "mapping": "N", - "pedigree": None - } for order_id, strain_id in enumerate(strain_ids, start=1)) - cursor.executemany( - "INSERT INTO StrainXRef( " - " InbredSetId, StrainId, OrderId, Used_for_mapping, PedigreeStatus" - ")" - "VALUES (" - " %(pop_id)s, %(strain_id)s, %(order_id)s, %(mapping)s, " - " %(pedigree)s" - ")", - params) - last_order_id += (len(params) * 10) - total += len(batch) - print(f"\t{total} total samples cross-referenced to the population " - "so far.") - - def build_sample_upload_job(# pylint: disable=[too-many-arguments] speciesid: int, populationid: int, @@ -219,14 +137,15 @@ def build_sample_upload_job(# pylint: disable=[too-many-arguments] ] + (["--firstlineheading"] if firstlineheading else []) -@samplesbp.route("/upload/species/<int:species_id>/populations/<int:population_id>/samples", +@samplesbp.route("<int:species_id>/populations/<int:population_id>/upload-samples", methods=["GET", "POST"]) @require_login def upload_samples(species_id: int, population_id: int):#pylint: disable=[too-many-return-statements] """Upload the samples.""" - samples_uploads_page = redirect(url_for("samples.upload_samples", - species_id=species_id, - population_id=population_id)) + samples_uploads_page = redirect(url_for( + "species.populations.samples.upload_samples", + species_id=species_id, + population_id=population_id)) if not is_integer_input(species_id): flash("You did not provide a valid species. Please select one to " "continue.", @@ -241,14 +160,14 @@ def upload_samples(species_id: int, population_id: int):#pylint: disable=[too-ma flash("You did not provide a valid population. Please select one " "to continue.", "alert-danger") - return redirect(url_for("samples.select_population", + return redirect(url_for("species.populations.samples.select_population", species_id=species_id), code=307) population = with_db_connection( lambda conn: population_by_id(conn, int(population_id))) if not bool(population): flash("Invalid grouping/population!", "alert-error") - return redirect(url_for("samples.select_population", + return redirect(url_for("species.populations.samples.select_population", species_id=species_id), code=307) @@ -296,34 +215,62 @@ def upload_samples(species_id: int, population_id: int):#pylint: disable=[too-ma redisuri, f"{app.config['UPLOAD_FOLDER']}/job_errors") return redirect(url_for( - "samples.upload_status", job_id=the_job["jobid"])) + "species.populations.samples.upload_status", + species_id=species_id, + population_id=population_id, + job_id=the_job["jobid"])) + -@samplesbp.route("/upload/status/<uuid:job_id>", methods=["GET"]) -def upload_status(job_id: uuid.UUID): +@samplesbp.route("<int:species_id>/populations/<int:population_id>/" + "upload-samples/status/<uuid:job_id>", + methods=["GET"]) +def upload_status(species_id: int, population_id: int, job_id: uuid.UUID): """Check on the status of a samples upload job.""" - job = with_redis_connection(lambda rconn: jobs.job( - rconn, jobs.jobsnamespace(), job_id)) - if job: - status = job["status"] - if status == "success": - return render_template("samples/upload-success.html", job=job) - - if status == "error": - return redirect(url_for("samples.upload_failure", job_id=job_id)) - - error_filename = Path(jobs.error_filename( - job_id, f"{app.config['UPLOAD_FOLDER']}/job_errors")) - if error_filename.exists(): - stat = os.stat(error_filename) - if stat.st_size > 0: + with database_connection(app.config["SQL_URI"]) as conn: + species = species_by_id(conn, species_id) + if not bool(species): + flash("You must provide a valid species.", "alert-danger") + return redirect(url_for("species.populations.samples.index")) + + population = population_by_species_and_id( + conn, species_id, population_id) + if not bool(population): + flash("You must provide a valid population.", "alert-danger") + return redirect(url_for( + "species.populations.samples.select_population", + species_id=species_id)) + + job = with_redis_connection(lambda rconn: jobs.job( + rconn, jobs.jobsnamespace(), job_id)) + if job: + status = job["status"] + if status == "success": + return render_template("samples/upload-success.html", + job=job, + species=species, + population=population,) + + if status == "error": return redirect(url_for( - "samples.upload_failure", job_id=job_id)) + "species.populations.samples.upload_failure", job_id=job_id)) - return render_template( - "samples/upload-progress.html", - job=job) # maybe also handle this? + error_filename = Path(jobs.error_filename( + job_id, f"{app.config['UPLOAD_FOLDER']}/job_errors")) + if error_filename.exists(): + stat = os.stat(error_filename) + if stat.st_size > 0: + return redirect(url_for( + "samples.upload_failure", job_id=job_id)) - return render_template("no_such_job.html", job_id=job_id), 400 + return render_template("samples/upload-progress.html", + species=species, + population=population, + job=job) # maybe also handle this? + + return render_template("no_such_job.html", + job_id=job_id, + species=species, + population=population), 400 @samplesbp.route("/upload/failure/<uuid:job_id>", methods=["GET"]) def upload_failure(job_id: uuid.UUID): diff --git a/uploader/templates/samples/list-samples.html b/uploader/templates/samples/list-samples.html index a29dc1c..8f1bf16 100644 --- a/uploader/templates/samples/list-samples.html +++ b/uploader/templates/samples/list-samples.html @@ -97,7 +97,9 @@ </p> <p> - <a href="#" + <a href="{{url_for('species.populations.samples.upload_samples', + species_id=species.SpeciesId, + population_id=population.Id)}}" title="Add samples for population '{{population.FullName}}' from species '{{species.FullName}}'." class="btn btn-primary"> @@ -111,4 +113,3 @@ {%block sidebarcontents%} {{display_population_card(species, population)}} {%endblock%} - diff --git a/uploader/templates/samples/upload-failure.html b/uploader/templates/samples/upload-failure.html new file mode 100644 index 0000000..458ab55 --- /dev/null +++ b/uploader/templates/samples/upload-failure.html @@ -0,0 +1,37 @@ +{%extends "base.html"%} +{%from "cli-output.html" import cli_output%} +{%from "populations/macro-display-population-card.html" import display_population_card%} + +{%block title%}Samples Upload Failure{%endblock%} + +{%block contents%} +<div class="row"> +<h2 class="heading">{{job.job_name[0:50]}}…</h2> + +<p>There was a failure attempting to upload the samples.</p> + +<p>Here is some information to help with debugging the issue. Provide this + information to the developer/maintainer.</p> + +<h3>Debugging Information</h3> +<ul> + <li><strong>job id</strong>: {{job.job_id}}</li> + <li><strong>status</strong>: {{job.status}}</li> + <li><strong>job type</strong>: {{job["job-type"]}}</li> +</ul> +</div> + +<div class="row"> +<h4>stdout</h4> +{{cli_output(job, "stdout")}} +</div> + +<div class="row"> +<h4>stderr</h4> +{{cli_output(job, "stderr")}} +</div> +{%endblock%} + +{%block sidebarcontents%} +{{display_population_card(species, population)}} +{%endblock%} diff --git a/uploader/templates/samples/upload-progress.html b/uploader/templates/samples/upload-progress.html new file mode 100644 index 0000000..677d457 --- /dev/null +++ b/uploader/templates/samples/upload-progress.html @@ -0,0 +1,31 @@ +{%extends "samples/base.html"%} +{%from "cli-output.html" import cli_output%} +{%from "populations/macro-display-population-card.html" import display_population_card%} + +{%block extrameta%} +<meta http-equiv="refresh" content="5"> +{%endblock%} + +{%block title%}Job Status{%endblock%} + +{%block contents%} +<div class="row" style="overflow-x: clip;"> +<h2 class="heading">{{job.job_name[0:50]}}…</h2> + +<p> +<strong>status</strong>: +<span>{{job["status"]}} ({{job.get("message", "-")}})</span><br /> +</p> + +<p>saving to database...</p> +</div> + +<div class="row"> + {{cli_output(job, "stdout")}} +</div> + +{%endblock%} + +{%block sidebarcontents%} +{{display_population_card(species, population)}} +{%endblock%} diff --git a/uploader/templates/samples/upload-samples.html b/uploader/templates/samples/upload-samples.html new file mode 100644 index 0000000..b101b2e --- /dev/null +++ b/uploader/templates/samples/upload-samples.html @@ -0,0 +1,155 @@ +{%extends "samples/base.html"%} +{%from "flash_messages.html" import flash_all_messages%} +{%from "populations/macro-select-population.html" import select_population_form%} +{%from "populations/macro-display-population-card.html" import display_population_card%} + +{%block title%}Samples — Upload Samples{%endblock%} + +{%block pagetitle%}Samples — Upload Samples{%endblock%} + +{%block lvl4_breadcrumbs%} +<li {%if activelink=="uploade-samples"%} + class="breadcrumb-item active" + {%else%} + class="breadcrumb-item" + {%endif%}> + <a href="{{url_for('species.populations.samples.upload_samples', + species_id=species.SpeciesId, + population_id=population.Id)}}">List</a> +</li> +{%endblock%} + +{%block contents%} +{{flash_all_messages()}} + +<div class="row"> + <p>You can now upload a character-separated value (CSV) file that contains + details about your samples. The CSV file should have the following fields: + <dl> + <dt>Name</dt> + <dd>The primary name/identifier for the sample/individual.</dd> + + <dt>Name2</dt> + <dd>A secondary name for the sample. This can simply be the same as + <strong>Name</strong> above. This field <strong>MUST</strong> contain a + value.</dd> + + <dt>Symbol</dt> + <dd>A symbol for the sample. This can be a strain name, e.g. 'BXD60' for + species that have strains. This field can be left empty for species like + Humans that do not have strains..</dd> + + <dt>Alias</dt> + <dd>An alias for the sample. Can be an empty field, or take on the same + value as that of the Symbol.</dd> + </dl> + </p> +</div> + +<div class="row"> + <form id="form-samples" + method="POST" + action="{{url_for('species.populations.samples.upload_samples', + species_id=species.SpeciesId, + population_id=population.InbredSetId)}}" + enctype="multipart/form-data"> + <legend class="heading">upload samples</legend> + + <input type="hidden" name="species_id" value="{{species.SpeciesId}}" /> + <input type="hidden" name="population_id" value="{{population.Id}}" /> + + <div class="form-group"> + <label for="file-samples" class="form-label">select file</label> + <input type="file" name="samples_file" id="file:samples" + accept="text/csv, text/tab-separated-values" + class="form-control" /> + </div> + + <div class="form-group"> + <label for="select:separator" class="form-label">field separator</label> + <select id="select:separator" + name="separator" + required="required" + class="form-control"> + <option value="">Select separator for your file: (default is comma)</option> + <option value="	">TAB</option> + <option value=" ">Space</option> + <option value=",">Comma</option> + <option value=";">Semicolon</option> + <option value="other">Other</option> + </select> + <input id="txt:separator" + type="text" + name="other_separator" + class="form-control" /> + <small class="form-text text-muted"> + If you select '<strong>Other</strong>' for the field separator value, + enter the character that separates the fields in your CSV file in the form + field below. + </small> + </div> + + <div class="form-group form-check"> + <input id="chk:heading" + type="checkbox" + name="first_line_heading" + class="form-check-input" /> + <label for="chk:heading" class="form-check-label"> + first line is a heading?</label> + <small class="form-text text-muted"> + Select this if the first line in your file contains headings for the + columns. + </small> + </div> + + <div class="form-group"> + <label for="txt:delimiter" class="form-label">field delimiter</label> + <input id="txt:delimiter" + type="text" + name="field_delimiter" + maxlength="1" + class="form-control" /> + <small class="form-text text-muted"> + If there is a character delimiting the string texts within particular + fields in your CSV, provide the character here. This can be left blank if + no such delimiters exist in your file. + </small> + </div> + + <button type="submit" + class="btn btn-primary">upload samples file</button> + </form> +</div> + +<div class="row"> + <h3>Preview File Content</h3> + + <table id="tbl:samples-preview" class="table"> + <caption class="heading">preview content</caption> + + <thead> + <tr> + <th>Name</th> + <th>Name2</th> + <th>Symbol</th> + <th>Alias</th> + </tr> + </thead> + + <tbody> + <tr id="default-row"> + <td colspan="4"> + Please make some selections in the form above to preview the data.</td> + </tr> + </tbody> + </table> +</div> +{%endblock%} + +{%block sidebarcontents%} +{{display_population_card(species, population)}} +{%endblock%} + +{%block javascript%} +<script src="/static/js/upload_samples.js" type="text/javascript"></script> +{%endblock%} diff --git a/uploader/templates/samples/upload-success.html b/uploader/templates/samples/upload-success.html new file mode 100644 index 0000000..881d466 --- /dev/null +++ b/uploader/templates/samples/upload-success.html @@ -0,0 +1,36 @@ +{%extends "samples/base.html"%} +{%from "cli-output.html" import cli_output%} +{%from "populations/macro-display-population-card.html" import display_population_card%} + +{%block title%}Job Status{%endblock%} + +{%block contents%} + +<div class="row" style="overflow-x: clip;"> + <h2 class="heading">{{job.job_name[0:50]}}…</h2> + + <p> + <strong>status</strong>: + <span>{{job["status"]}} ({{job.get("message", "-")}})</span><br /> + </p> + + <p>Successfully uploaded the samples.</p> + <p> + <a href="{{url_for('species.populations.samples.list_samples', + species_id=species.SpeciesId, + population_id=population.Id)}}" + title="View population samples"> + View samples + </a> + </p> +</div> + +<div class="row"> + {{cli_output(job, "stdout")}} +</div> + +{%endblock%} + +{%block sidebarcontents%} +{{display_population_card(species, population)}} +{%endblock%} |