aboutsummaryrefslogtreecommitdiff
path: root/uploader/samples
diff options
context:
space:
mode:
Diffstat (limited to 'uploader/samples')
-rw-r--r--uploader/samples/models.py85
-rw-r--r--uploader/samples/views.py173
2 files changed, 145 insertions, 113 deletions
diff --git a/uploader/samples/models.py b/uploader/samples/models.py
index 15e509e..d7d5384 100644
--- a/uploader/samples/models.py
+++ b/uploader/samples/models.py
@@ -1,7 +1,12 @@
"""Functions for handling samples."""
+import csv
+from typing import Iterator
+
import MySQLdb as mdb
from MySQLdb.cursors import DictCursor
+from functional_tools import take
+
def samples_by_species_and_population(
conn: mdb.Connection,
species_id: int,
@@ -17,3 +22,83 @@ def samples_by_species_and_population(
"AND iset.InbredSetId=%(population_id)s",
{"species_id": species_id, "population_id": population_id})
return tuple(cursor.fetchall())
+
+
+def read_samples_file(filepath, separator: str, firstlineheading: bool, **kwargs) -> Iterator[dict]:
+ """Read the samples file."""
+ with open(filepath, "r", encoding="utf-8") as inputfile:
+ reader = csv.DictReader(
+ inputfile,
+ fieldnames=(
+ None if firstlineheading
+ else ("Name", "Name2", "Symbol", "Alias")),
+ delimiter=separator,
+ quotechar=kwargs.get("quotechar", '"'))
+ for row in reader:
+ yield row
+
+
+def save_samples_data(conn: mdb.Connection,
+ speciesid: int,
+ file_data: Iterator[dict]):
+ """Save the samples to DB."""
+ data = ({**row, "SpeciesId": speciesid} for row in file_data)
+ total = 0
+ with conn.cursor() as cursor:
+ while True:
+ batch = take(data, 5000)
+ if len(batch) == 0:
+ break
+ cursor.executemany(
+ "INSERT INTO Strain(Name, Name2, SpeciesId, Symbol, Alias) "
+ "VALUES("
+ " %(Name)s, %(Name2)s, %(SpeciesId)s, %(Symbol)s, %(Alias)s"
+ ") ON DUPLICATE KEY UPDATE Name=Name",
+ batch)
+ total += len(batch)
+ print(f"\tSaved {total} samples total so far.")
+
+
+def cross_reference_samples(conn: mdb.Connection,
+ species_id: int,
+ population_id: int,
+ strain_names: Iterator[str]):
+ """Link samples to their population."""
+ with conn.cursor(cursorclass=DictCursor) as cursor:
+ cursor.execute(
+ "SELECT MAX(OrderId) AS loid FROM StrainXRef WHERE InbredSetId=%s",
+ (population_id,))
+ last_order_id = (cursor.fetchone()["loid"] or 10)
+ total = 0
+ while True:
+ batch = take(strain_names, 5000)
+ if len(batch) == 0:
+ break
+ params_str = ", ".join(["%s"] * len(batch))
+ ## This query is slow -- investigate.
+ cursor.execute(
+ "SELECT s.Id FROM Strain AS s LEFT JOIN StrainXRef AS sx "
+ "ON s.Id = sx.StrainId WHERE s.SpeciesId=%s AND s.Name IN "
+ f"({params_str}) AND sx.StrainId IS NULL",
+ (species_id,) + tuple(batch))
+ strain_ids = (sid["Id"] for sid in cursor.fetchall())
+ params = tuple({
+ "pop_id": population_id,
+ "strain_id": strain_id,
+ "order_id": last_order_id + (order_id * 10),
+ "mapping": "N",
+ "pedigree": None
+ } for order_id, strain_id in enumerate(strain_ids, start=1))
+ cursor.executemany(
+ "INSERT INTO StrainXRef( "
+ " InbredSetId, StrainId, OrderId, Used_for_mapping, PedigreeStatus"
+ ")"
+ "VALUES ("
+ " %(pop_id)s, %(strain_id)s, %(order_id)s, %(mapping)s, "
+ " %(pedigree)s"
+ ")",
+ params)
+ last_order_id += (len(params) * 10)
+ total += len(batch)
+ print(f"\t{total} total samples cross-referenced to the population "
+ "so far.")
diff --git a/uploader/samples/views.py b/uploader/samples/views.py
index 6af90f4..6e3dc4b 100644
--- a/uploader/samples/views.py
+++ b/uploader/samples/views.py
@@ -1,7 +1,6 @@
"""Code regarding samples"""
import os
import sys
-import csv
import uuid
from pathlib import Path
from typing import Iterator
@@ -18,8 +17,6 @@ from flask import (
render_template,
current_app as app)
-from functional_tools import take
-
from uploader import jobs
from uploader.files import save_file
from uploader.datautils import order_by_family
@@ -34,7 +31,8 @@ from uploader.species.models import (all_species,
order_species_by_family)
from uploader.population.models import(save_population,
population_by_id,
- populations_by_species)
+ populations_by_species,
+ population_by_species_and_id)
from .models import samples_by_species_and_population
@@ -123,86 +121,6 @@ def list_samples(species_id: int, population_id: int):
activelink="list-samples")
-def read_samples_file(filepath, separator: str, firstlineheading: bool, **kwargs) -> Iterator[dict]:
- """Read the samples file."""
- with open(filepath, "r", encoding="utf-8") as inputfile:
- reader = csv.DictReader(
- inputfile,
- fieldnames=(
- None if firstlineheading
- else ("Name", "Name2", "Symbol", "Alias")),
- delimiter=separator,
- quotechar=kwargs.get("quotechar", '"'))
- for row in reader:
- yield row
-
-
-def save_samples_data(conn: mdb.Connection,
- speciesid: int,
- file_data: Iterator[dict]):
- """Save the samples to DB."""
- data = ({**row, "SpeciesId": speciesid} for row in file_data)
- total = 0
- with conn.cursor() as cursor:
- while True:
- batch = take(data, 5000)
- if len(batch) == 0:
- break
- cursor.executemany(
- "INSERT INTO Strain(Name, Name2, SpeciesId, Symbol, Alias) "
- "VALUES("
- " %(Name)s, %(Name2)s, %(SpeciesId)s, %(Symbol)s, %(Alias)s"
- ") ON DUPLICATE KEY UPDATE Name=Name",
- batch)
- total += len(batch)
- print(f"\tSaved {total} samples total so far.")
-
-
-def cross_reference_samples(conn: mdb.Connection,
- species_id: int,
- population_id: int,
- strain_names: Iterator[str]):
- """Link samples to their population."""
- with conn.cursor(cursorclass=DictCursor) as cursor:
- cursor.execute(
- "SELECT MAX(OrderId) AS loid FROM StrainXRef WHERE InbredSetId=%s",
- (population_id,))
- last_order_id = (cursor.fetchone()["loid"] or 10)
- total = 0
- while True:
- batch = take(strain_names, 5000)
- if len(batch) == 0:
- break
- params_str = ", ".join(["%s"] * len(batch))
- ## This query is slow -- investigate.
- cursor.execute(
- "SELECT s.Id FROM Strain AS s LEFT JOIN StrainXRef AS sx "
- "ON s.Id = sx.StrainId WHERE s.SpeciesId=%s AND s.Name IN "
- f"({params_str}) AND sx.StrainId IS NULL",
- (species_id,) + tuple(batch))
- strain_ids = (sid["Id"] for sid in cursor.fetchall())
- params = tuple({
- "pop_id": population_id,
- "strain_id": strain_id,
- "order_id": last_order_id + (order_id * 10),
- "mapping": "N",
- "pedigree": None
- } for order_id, strain_id in enumerate(strain_ids, start=1))
- cursor.executemany(
- "INSERT INTO StrainXRef( "
- " InbredSetId, StrainId, OrderId, Used_for_mapping, PedigreeStatus"
- ")"
- "VALUES ("
- " %(pop_id)s, %(strain_id)s, %(order_id)s, %(mapping)s, "
- " %(pedigree)s"
- ")",
- params)
- last_order_id += (len(params) * 10)
- total += len(batch)
- print(f"\t{total} total samples cross-referenced to the population "
- "so far.")
-
-
def build_sample_upload_job(# pylint: disable=[too-many-arguments]
speciesid: int,
populationid: int,
@@ -219,14 +137,15 @@ def build_sample_upload_job(# pylint: disable=[too-many-arguments]
] + (["--firstlineheading"] if firstlineheading else [])
-@samplesbp.route("/upload/species/<int:species_id>/populations/<int:population_id>/samples",
+@samplesbp.route("<int:species_id>/populations/<int:population_id>/upload-samples",
methods=["GET", "POST"])
@require_login
def upload_samples(species_id: int, population_id: int):#pylint: disable=[too-many-return-statements]
"""Upload the samples."""
- samples_uploads_page = redirect(url_for("samples.upload_samples",
- species_id=species_id,
- population_id=population_id))
+ samples_uploads_page = redirect(url_for(
+ "species.populations.samples.upload_samples",
+ species_id=species_id,
+ population_id=population_id))
if not is_integer_input(species_id):
flash("You did not provide a valid species. Please select one to "
"continue.",
@@ -241,14 +160,14 @@ def upload_samples(species_id: int, population_id: int):#pylint: disable=[too-ma
flash("You did not provide a valid population. Please select one "
"to continue.",
"alert-danger")
- return redirect(url_for("samples.select_population",
+ return redirect(url_for("species.populations.samples.select_population",
species_id=species_id),
code=307)
population = with_db_connection(
lambda conn: population_by_id(conn, int(population_id)))
if not bool(population):
flash("Invalid grouping/population!", "alert-error")
- return redirect(url_for("samples.select_population",
+ return redirect(url_for("species.populations.samples.select_population",
species_id=species_id),
code=307)
@@ -296,34 +215,62 @@ def upload_samples(species_id: int, population_id: int):#pylint: disable=[too-ma
redisuri,
f"{app.config['UPLOAD_FOLDER']}/job_errors")
return redirect(url_for(
- "samples.upload_status", job_id=the_job["jobid"]))
+ "species.populations.samples.upload_status",
+ species_id=species_id,
+ population_id=population_id,
+ job_id=the_job["jobid"]))
+
-@samplesbp.route("/upload/status/<uuid:job_id>", methods=["GET"])
-def upload_status(job_id: uuid.UUID):
+@samplesbp.route("<int:species_id>/populations/<int:population_id>/"
+ "upload-samples/status/<uuid:job_id>",
+ methods=["GET"])
+def upload_status(species_id: int, population_id: int, job_id: uuid.UUID):
"""Check on the status of a samples upload job."""
- job = with_redis_connection(lambda rconn: jobs.job(
- rconn, jobs.jobsnamespace(), job_id))
- if job:
- status = job["status"]
- if status == "success":
- return render_template("samples/upload-success.html", job=job)
-
- if status == "error":
- return redirect(url_for("samples.upload_failure", job_id=job_id))
-
- error_filename = Path(jobs.error_filename(
- job_id, f"{app.config['UPLOAD_FOLDER']}/job_errors"))
- if error_filename.exists():
- stat = os.stat(error_filename)
- if stat.st_size > 0:
+ with database_connection(app.config["SQL_URI"]) as conn:
+ species = species_by_id(conn, species_id)
+ if not bool(species):
+ flash("You must provide a valid species.", "alert-danger")
+ return redirect(url_for("species.populations.samples.index"))
+
+ population = population_by_species_and_id(
+ conn, species_id, population_id)
+ if not bool(population):
+ flash("You must provide a valid population.", "alert-danger")
+ return redirect(url_for(
+ "species.populations.samples.select_population",
+ species_id=species_id))
+
+ job = with_redis_connection(lambda rconn: jobs.job(
+ rconn, jobs.jobsnamespace(), job_id))
+ if job:
+ status = job["status"]
+ if status == "success":
+ return render_template("samples/upload-success.html",
+ job=job,
+ species=species,
+ population=population,)
+
+ if status == "error":
return redirect(url_for(
- "samples.upload_failure", job_id=job_id))
+ "species.populations.samples.upload_failure", job_id=job_id))
- return render_template(
- "samples/upload-progress.html",
- job=job) # maybe also handle this?
+ error_filename = Path(jobs.error_filename(
+ job_id, f"{app.config['UPLOAD_FOLDER']}/job_errors"))
+ if error_filename.exists():
+ stat = os.stat(error_filename)
+ if stat.st_size > 0:
+ return redirect(url_for(
+ "samples.upload_failure", job_id=job_id))
- return render_template("no_such_job.html", job_id=job_id), 400
+ return render_template("samples/upload-progress.html",
+ species=species,
+ population=population,
+ job=job) # maybe also handle this?
+
+ return render_template("no_such_job.html",
+ job_id=job_id,
+ species=species,
+ population=population), 400
@samplesbp.route("/upload/failure/<uuid:job_id>", methods=["GET"])
def upload_failure(job_id: uuid.UUID):