about summary refs log tree commit diff
path: root/uploader/samples
diff options
context:
space:
mode:
Diffstat (limited to 'uploader/samples')
-rw-r--r--uploader/samples/__init__.py1
-rw-r--r--uploader/samples/models.py103
-rw-r--r--uploader/samples/views.py185
3 files changed, 289 insertions, 0 deletions
diff --git a/uploader/samples/__init__.py b/uploader/samples/__init__.py
new file mode 100644
index 0000000..1bd6d2d
--- /dev/null
+++ b/uploader/samples/__init__.py
@@ -0,0 +1 @@
+"""Samples package. Handle samples uploads and editing."""
diff --git a/uploader/samples/models.py b/uploader/samples/models.py
new file mode 100644
index 0000000..1e9293f
--- /dev/null
+++ b/uploader/samples/models.py
@@ -0,0 +1,103 @@
+"""Functions for handling samples."""
+import csv
+from typing import Iterator
+
+import MySQLdb as mdb
+from MySQLdb.cursors import DictCursor
+
+from functional_tools import take
+
+def samples_by_species_and_population(
+        conn: mdb.Connection,
+        species_id: int,
+        population_id: int
+) -> tuple[dict, ...]:
+    """Fetch the samples by their species and population."""
+    with conn.cursor(cursorclass=DictCursor) as cursor:
+        cursor.execute(
+            "SELECT InbredSet.InbredSetId, Strain.* FROM InbredSet "
+            "INNER JOIN StrainXRef ON InbredSet.InbredSetId=StrainXRef.InbredSetId "
+            "INNER JOIN Strain ON StrainXRef.StrainId=Strain.Id "
+            "WHERE Strain.SpeciesId=%(species_id)s "
+            "AND InbredSet.InbredSetId=%(population_id)s",
+            {"species_id": species_id, "population_id": population_id})
+        return tuple(cursor.fetchall())
+
+
+def read_samples_file(filepath, separator: str, firstlineheading: bool, **kwargs) -> Iterator[dict]:
+    """Read the samples file."""
+    with open(filepath, "r", encoding="utf-8") as inputfile:
+        reader = csv.DictReader(
+            inputfile,
+            fieldnames=(
+                None if firstlineheading
+                else ("Name", "Name2", "Symbol", "Alias")),
+            delimiter=separator,
+            quotechar=kwargs.get("quotechar", '"'))
+        yield from reader
+
+
+def save_samples_data(conn: mdb.Connection,
+                      speciesid: int,
+                      file_data: Iterator[dict]):
+    """Save the samples to DB."""
+    data = ({**row, "SpeciesId": speciesid} for row in file_data)
+    total = 0
+    with conn.cursor() as cursor:
+        while True:
+            batch = take(data, 5000)
+            if len(batch) == 0:
+                break
+            cursor.executemany(
+                "INSERT INTO Strain(Name, Name2, SpeciesId, Symbol, Alias) "
+                "VALUES("
+                "    %(Name)s, %(Name2)s, %(SpeciesId)s, %(Symbol)s, %(Alias)s"
+                ") ON DUPLICATE KEY UPDATE Name=Name",
+                batch)
+            total += len(batch)
+            print(f"\tSaved {total} samples total so far.")
+
+
+def cross_reference_samples(conn: mdb.Connection,
+                            species_id: int,
+                            population_id: int,
+                            strain_names: Iterator[str]):
+    """Link samples to their population."""
+    with conn.cursor(cursorclass=DictCursor) as cursor:
+        cursor.execute(
+            "SELECT MAX(OrderId) AS loid FROM StrainXRef WHERE InbredSetId=%s",
+            (population_id,))
+        last_order_id = (cursor.fetchone()["loid"] or 10)
+        total = 0
+        while True:
+            batch = take(strain_names, 5000)
+            if len(batch) == 0:
+                break
+            params_str = ", ".join(["%s"] * len(batch))
+            ## This query is slow -- investigate.
+            cursor.execute(
+                "SELECT s.Id FROM Strain AS s LEFT JOIN StrainXRef AS sx "
+                "ON s.Id = sx.StrainId WHERE s.SpeciesId=%s AND s.Name IN "
+                f"({params_str}) AND sx.StrainId IS NULL",
+                (species_id,) + tuple(batch))
+            strain_ids = (sid["Id"] for sid in cursor.fetchall())
+            params = tuple({
+                "pop_id": population_id,
+                "strain_id": strain_id,
+                "order_id": last_order_id + (order_id * 10),
+                "mapping": "N",
+                "pedigree": None
+            } for order_id, strain_id in enumerate(strain_ids, start=1))
+            cursor.executemany(
+                "INSERT INTO StrainXRef( "
+                "  InbredSetId, StrainId, OrderId, Used_for_mapping, PedigreeStatus"
+                ")"
+                "VALUES ("
+                "  %(pop_id)s, %(strain_id)s, %(order_id)s, %(mapping)s, "
+                "  %(pedigree)s"
+                ")",
+                params)
+            last_order_id += (len(params) * 10)
+            total += len(batch)
+            print(f"\t{total} total samples cross-referenced to the population "
+                  "so far.")
diff --git a/uploader/samples/views.py b/uploader/samples/views.py
new file mode 100644
index 0000000..f318bf0
--- /dev/null
+++ b/uploader/samples/views.py
@@ -0,0 +1,185 @@
+"""Code regarding samples"""
+import os
+import sys
+import uuid
+import logging
+from pathlib import Path
+
+from flask import (flash,
+                   request,
+                   redirect,
+                   Blueprint,
+                   current_app as app)
+
+from gn_libs import sqlite3
+from gn_libs import jobs as jobs
+
+from uploader import session
+from uploader.files import save_file
+from uploader.flask_extensions import url_for
+from uploader.ui import make_template_renderer
+from uploader.authorisation import require_login
+from uploader.input_validation import is_integer_input
+from uploader.population.models import population_by_id
+from uploader.route_utils import generic_select_population
+from uploader.datautils import safe_int, enumerate_sequence
+from uploader.species.models import all_species, species_by_id
+from uploader.request_checks import with_species, with_population
+from uploader.db_utils import (with_db_connection,
+                               database_connection,
+                               with_redis_connection)
+
+from .models import samples_by_species_and_population
+
+samplesbp = Blueprint("samples", __name__)
+render_template = make_template_renderer("samples")
+
+@samplesbp.route("/samples", methods=["GET"])
+@require_login
+def index():
+    """Direct entry-point for uploading/handling the samples."""
+    with database_connection(app.config["SQL_URI"]) as conn:
+        if not bool(request.args.get("species_id")):
+            return render_template(
+                "samples/index.html",
+                species=all_species(conn),
+                activelink="samples")
+
+        species_id = request.args.get("species_id")
+        if species_id == "CREATE-SPECIES":
+            return redirect(url_for(
+                "species.create_species",
+                return_to="species.populations.samples.select_population"))
+
+        species = species_by_id(conn, request.args.get("species_id"))
+        if not bool(species):
+            flash("No such species!", "alert-danger")
+            return redirect(url_for("species.populations.samples.index"))
+        return redirect(url_for("species.populations.samples.select_population",
+                                species_id=species["SpeciesId"]))
+
+
+@samplesbp.route("<int:species_id>/samples/select-population", methods=["GET"])
+@require_login
+@with_species(redirect_uri="species.populations.samples.index")
+def select_population(species: dict, **kwargs):# pylint: disable=[unused-argument]
+    """Select the population to use for the samples."""
+    return generic_select_population(
+        species,
+        "samples/select-population.html",
+        request.args.get("population_id") or "",
+        "species.populations.samples.select_population",
+        "species.populations.samples.list_samples",
+        "samples",
+        "Population not found!")
+
+@samplesbp.route("<int:species_id>/populations/<int:population_id>/samples")
+@require_login
+@with_population(
+    species_redirect_uri="species.populations.samples.index",
+    redirect_uri="species.populations.samples.select_population")
+def list_samples(species: dict, population: dict, **kwargs):# pylint: disable=[unused-argument]
+    """
+    List the samples in a particular population and give the ability to upload
+    new ones.
+    """
+    with database_connection(app.config["SQL_URI"]) as conn:
+        all_samples = enumerate_sequence(samples_by_species_and_population(
+            conn, species["SpeciesId"], population["Id"]))
+        total_samples = len(all_samples)
+        offset = max(safe_int(request.args.get("from") or 0), 0)
+        count = int(request.args.get("count") or 20)
+        return render_template("samples/list-samples.html",
+                               species=species,
+                               population=population,
+                               samples=all_samples[offset:offset+count],
+                               offset=offset,
+                               count=count,
+                               total_samples=total_samples,
+                               activelink="list-samples")
+
+
+@samplesbp.route("<int:species_id>/populations/<int:population_id>/upload-samples",
+               methods=["GET", "POST"])
+@require_login
+def upload_samples(species_id: int, population_id: int):#pylint: disable=[too-many-return-statements]
+    """Upload the samples."""
+    samples_uploads_page = redirect(url_for(
+        "species.populations.samples.upload_samples",
+        species_id=species_id,
+        population_id=population_id))
+    if not is_integer_input(species_id):
+        flash("You did not provide a valid species. Please select one to "
+              "continue.",
+              "alert-danger")
+        return redirect(url_for("expression-data.samples.select_species"))
+    species = with_db_connection(lambda conn: species_by_id(conn, species_id))
+    if not bool(species):
+        flash("Species with given ID was not found.", "alert-danger")
+        return redirect(url_for("expression-data.samples.select_species"))
+
+    if not is_integer_input(population_id):
+        flash("You did not provide a valid population. Please select one "
+              "to continue.",
+              "alert-danger")
+        return redirect(url_for("species.populations.samples.select_population",
+                                species_id=species_id),
+                        code=307)
+    population = with_db_connection(
+        lambda conn: population_by_id(conn, int(population_id)))
+    if not bool(population):
+        flash("Invalid grouping/population!", "alert-error")
+        return redirect(url_for("species.populations.samples.select_population",
+                                species_id=species_id),
+                        code=307)
+
+    if request.method == "GET" or request.files.get("samples_file") is None:
+        return render_template("samples/upload-samples.html",
+                               species=species,
+                               population=population)
+
+    try:
+        samples_file = save_file(request.files["samples_file"],
+                                 Path(app.config["UPLOAD_FOLDER"]))
+    except AssertionError:
+        flash("You need to provide a file with the samples data.",
+              "alert-error")
+        return samples_uploads_page
+
+    firstlineheading = request.form.get("first_line_heading") == "on"
+
+    separator = request.form.get("separator", ",")
+    if separator == "other":
+        separator = request.form.get("other_separator", ",")
+    if not bool(separator):
+        flash("You need to provide a separator character.", "alert-error")
+        return samples_uploads_page
+
+    quotechar = (request.form.get("field_delimiter", '"') or '"')
+
+    _jobs_db = app.config["ASYNCHRONOUS_JOBS_SQLITE_DB"]
+    with sqlite3.connection(_jobs_db) as conn:
+        job = jobs.launch_job(
+            jobs.initialise_job(
+                conn,
+                str(uuid.uuid4()),
+                [
+                    sys.executable, "-m", "scripts.insert_samples",
+                    app.config["SQL_URI"],
+                    str(species["SpeciesId"]),
+                    str(population["InbredSetId"]),
+                    str(samples_file.absolute()),
+                    separator,
+                    f"--quotechar={quotechar}"
+                ] + (["--firstlineheading"] if firstlineheading else []),
+                "samples_upload",
+                extra_meta={
+                    "job_name": f"Samples Upload: {samples_file.name}"
+                },
+                external_id=session.logged_in_user_id()),
+            _jobs_db,
+            Path(f"{app.config['UPLOAD_FOLDER']}/job_errors").absolute(),
+            loglevel=logging.getLevelName(
+                app.logger.getEffectiveLevel()).lower())
+        return redirect(
+            url_for("background-jobs.job_status", job_id=job["job_id"]))