about summary refs log tree commit diff
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2024-09-09 16:39:50 -0500
committerFrederick Muriuki Muriithi2024-09-09 16:41:46 -0500
commit3c8ec9af68d593e7e5eef52aca3d001e22c8e0f7 (patch)
tree6ac71fb5d40d5ae8dc4b9e93b8d8b1b9510b29bc
parent9cd33ddac3d6848c5443962d66494635feadef51 (diff)
downloadgn-uploader-3c8ec9af68d593e7e5eef52aca3d001e22c8e0f7.tar.gz
Enable samples uploads.
-rw-r--r--scripts/insert_samples.py2
-rw-r--r--uploader/samples/models.py85
-rw-r--r--uploader/samples/views.py173
-rw-r--r--uploader/templates/samples/list-samples.html5
-rw-r--r--uploader/templates/samples/upload-failure.html37
-rw-r--r--uploader/templates/samples/upload-progress.html31
-rw-r--r--uploader/templates/samples/upload-samples.html155
-rw-r--r--uploader/templates/samples/upload-success.html36
8 files changed, 408 insertions, 116 deletions
diff --git a/scripts/insert_samples.py b/scripts/insert_samples.py
index d107c25..e3577b6 100644
--- a/scripts/insert_samples.py
+++ b/scripts/insert_samples.py
@@ -11,7 +11,7 @@ from uploader.db_utils import database_connection
 from uploader.check_connections import check_db, check_redis
 from uploader.species.models import species_by_id
 from uploader.population.models import population_by_id
-from uploader.expression_data.samples import (
+from uploader.samples.models import (
     save_samples_data,
     read_samples_file,
     cross_reference_samples)
diff --git a/uploader/samples/models.py b/uploader/samples/models.py
index 15e509e..d7d5384 100644
--- a/uploader/samples/models.py
+++ b/uploader/samples/models.py
@@ -1,7 +1,12 @@
 """Functions for handling samples."""
+import csv
+from typing import Iterator
+
 import MySQLdb as mdb
 from MySQLdb.cursors import DictCursor
 
+from functional_tools import take
+
 def samples_by_species_and_population(
         conn: mdb.Connection,
         species_id: int,
@@ -17,3 +22,83 @@ def samples_by_species_and_population(
             "AND iset.InbredSetId=%(population_id)s",
             {"species_id": species_id, "population_id": population_id})
         return tuple(cursor.fetchall())
+
+
+def read_samples_file(filepath, separator: str, firstlineheading: bool, **kwargs) -> Iterator[dict]:
+    """Read the samples file."""
+    with open(filepath, "r", encoding="utf-8") as inputfile:
+        reader = csv.DictReader(
+            inputfile,
+            fieldnames=(
+                None if firstlineheading
+                else ("Name", "Name2", "Symbol", "Alias")),
+            delimiter=separator,
+            quotechar=kwargs.get("quotechar", '"'))
+        for row in reader:
+            yield row
+
+
+def save_samples_data(conn: mdb.Connection,
+                      speciesid: int,
+                      file_data: Iterator[dict]):
+    """Save the samples to DB."""
+    data = ({**row, "SpeciesId": speciesid} for row in file_data)
+    total = 0
+    with conn.cursor() as cursor:
+        while True:
+            batch = take(data, 5000)
+            if len(batch) == 0:
+                break
+            cursor.executemany(
+                "INSERT INTO Strain(Name, Name2, SpeciesId, Symbol, Alias) "
+                "VALUES("
+                "    %(Name)s, %(Name2)s, %(SpeciesId)s, %(Symbol)s, %(Alias)s"
+                ") ON DUPLICATE KEY UPDATE Name=Name",
+                batch)
+            total += len(batch)
+            print(f"\tSaved {total} samples total so far.")
+
+
+def cross_reference_samples(conn: mdb.Connection,
+                            species_id: int,
+                            population_id: int,
+                            strain_names: Iterator[str]):
+    """Link samples to their population."""
+    with conn.cursor(cursorclass=DictCursor) as cursor:
+        cursor.execute(
+            "SELECT MAX(OrderId) AS loid FROM StrainXRef WHERE InbredSetId=%s",
+            (population_id,))
+        last_order_id = (cursor.fetchone()["loid"] or 10)
+        total = 0
+        while True:
+            batch = take(strain_names, 5000)
+            if len(batch) == 0:
+                break
+            params_str = ", ".join(["%s"] * len(batch))
+            ## This query is slow -- investigate.
+            cursor.execute(
+                "SELECT s.Id FROM Strain AS s LEFT JOIN StrainXRef AS sx "
+                "ON s.Id = sx.StrainId WHERE s.SpeciesId=%s AND s.Name IN "
+                f"({params_str}) AND sx.StrainId IS NULL",
+                (species_id,) + tuple(batch))
+            strain_ids = (sid["Id"] for sid in cursor.fetchall())
+            params = tuple({
+                "pop_id": population_id,
+                "strain_id": strain_id,
+                "order_id": last_order_id + (order_id * 10),
+                "mapping": "N",
+                "pedigree": None
+            } for order_id, strain_id in enumerate(strain_ids, start=1))
+            cursor.executemany(
+                "INSERT INTO StrainXRef( "
+                "  InbredSetId, StrainId, OrderId, Used_for_mapping, PedigreeStatus"
+                ")"
+                "VALUES ("
+                "  %(pop_id)s, %(strain_id)s, %(order_id)s, %(mapping)s, "
+                "  %(pedigree)s"
+                ")",
+                params)
+            last_order_id += (len(params) * 10)
+            total += len(batch)
+            print(f"\t{total} total samples cross-referenced to the population "
+                  "so far.")
diff --git a/uploader/samples/views.py b/uploader/samples/views.py
index 6af90f4..6e3dc4b 100644
--- a/uploader/samples/views.py
+++ b/uploader/samples/views.py
@@ -1,7 +1,6 @@
 """Code regarding samples"""
 import os
 import sys
-import csv
 import uuid
 from pathlib import Path
 from typing import Iterator
@@ -18,8 +17,6 @@ from flask import (
     render_template,
     current_app as app)
 
-from functional_tools import take
-
 from uploader import jobs
 from uploader.files import save_file
 from uploader.datautils import order_by_family
@@ -34,7 +31,8 @@ from uploader.species.models import (all_species,
                                      order_species_by_family)
 from uploader.population.models import(save_population,
                                        population_by_id,
-                                       populations_by_species)
+                                       populations_by_species,
+                                       population_by_species_and_id)
 
 from .models import samples_by_species_and_population
 
@@ -123,86 +121,6 @@ def list_samples(species_id: int, population_id: int):
                                activelink="list-samples")
 
 
-def read_samples_file(filepath, separator: str, firstlineheading: bool, **kwargs) -> Iterator[dict]:
-    """Read the samples file."""
-    with open(filepath, "r", encoding="utf-8") as inputfile:
-        reader = csv.DictReader(
-            inputfile,
-            fieldnames=(
-                None if firstlineheading
-                else ("Name", "Name2", "Symbol", "Alias")),
-            delimiter=separator,
-            quotechar=kwargs.get("quotechar", '"'))
-        for row in reader:
-            yield row
-
-
-def save_samples_data(conn: mdb.Connection,
-                      speciesid: int,
-                      file_data: Iterator[dict]):
-    """Save the samples to DB."""
-    data = ({**row, "SpeciesId": speciesid} for row in file_data)
-    total = 0
-    with conn.cursor() as cursor:
-        while True:
-            batch = take(data, 5000)
-            if len(batch) == 0:
-                break
-            cursor.executemany(
-                "INSERT INTO Strain(Name, Name2, SpeciesId, Symbol, Alias) "
-                "VALUES("
-                "    %(Name)s, %(Name2)s, %(SpeciesId)s, %(Symbol)s, %(Alias)s"
-                ") ON DUPLICATE KEY UPDATE Name=Name",
-                batch)
-            total += len(batch)
-            print(f"\tSaved {total} samples total so far.")
-
-
-def cross_reference_samples(conn: mdb.Connection,
-                            species_id: int,
-                            population_id: int,
-                            strain_names: Iterator[str]):
-    """Link samples to their population."""
-    with conn.cursor(cursorclass=DictCursor) as cursor:
-        cursor.execute(
-            "SELECT MAX(OrderId) AS loid FROM StrainXRef WHERE InbredSetId=%s",
-            (population_id,))
-        last_order_id = (cursor.fetchone()["loid"] or 10)
-        total = 0
-        while True:
-            batch = take(strain_names, 5000)
-            if len(batch) == 0:
-                break
-            params_str = ", ".join(["%s"] * len(batch))
-            ## This query is slow -- investigate.
-            cursor.execute(
-                "SELECT s.Id FROM Strain AS s LEFT JOIN StrainXRef AS sx "
-                "ON s.Id = sx.StrainId WHERE s.SpeciesId=%s AND s.Name IN "
-                f"({params_str}) AND sx.StrainId IS NULL",
-                (species_id,) + tuple(batch))
-            strain_ids = (sid["Id"] for sid in cursor.fetchall())
-            params = tuple({
-                "pop_id": population_id,
-                "strain_id": strain_id,
-                "order_id": last_order_id + (order_id * 10),
-                "mapping": "N",
-                "pedigree": None
-            } for order_id, strain_id in enumerate(strain_ids, start=1))
-            cursor.executemany(
-                "INSERT INTO StrainXRef( "
-                "  InbredSetId, StrainId, OrderId, Used_for_mapping, PedigreeStatus"
-                ")"
-                "VALUES ("
-                "  %(pop_id)s, %(strain_id)s, %(order_id)s, %(mapping)s, "
-                "  %(pedigree)s"
-                ")",
-                params)
-            last_order_id += (len(params) * 10)
-            total += len(batch)
-            print(f"\t{total} total samples cross-referenced to the population "
-                  "so far.")
-
-
 def build_sample_upload_job(# pylint: disable=[too-many-arguments]
         speciesid: int,
         populationid: int,
@@ -219,14 +137,15 @@ def build_sample_upload_job(# pylint: disable=[too-many-arguments]
     ] + (["--firstlineheading"] if firstlineheading else [])
 
 
-@samplesbp.route("/upload/species/<int:species_id>/populations/<int:population_id>/samples",
+@samplesbp.route("<int:species_id>/populations/<int:population_id>/upload-samples",
                methods=["GET", "POST"])
 @require_login
 def upload_samples(species_id: int, population_id: int):#pylint: disable=[too-many-return-statements]
     """Upload the samples."""
-    samples_uploads_page = redirect(url_for("samples.upload_samples",
-                                            species_id=species_id,
-                                            population_id=population_id))
+    samples_uploads_page = redirect(url_for(
+        "species.populations.samples.upload_samples",
+        species_id=species_id,
+        population_id=population_id))
     if not is_integer_input(species_id):
         flash("You did not provide a valid species. Please select one to "
               "continue.",
@@ -241,14 +160,14 @@ def upload_samples(species_id: int, population_id: int):#pylint: disable=[too-ma
         flash("You did not provide a valid population. Please select one "
               "to continue.",
               "alert-danger")
-        return redirect(url_for("samples.select_population",
+        return redirect(url_for("species.populations.samples.select_population",
                                 species_id=species_id),
                         code=307)
     population = with_db_connection(
         lambda conn: population_by_id(conn, int(population_id)))
     if not bool(population):
         flash("Invalid grouping/population!", "alert-error")
-        return redirect(url_for("samples.select_population",
+        return redirect(url_for("species.populations.samples.select_population",
                                 species_id=species_id),
                         code=307)
 
@@ -296,34 +215,62 @@ def upload_samples(species_id: int, population_id: int):#pylint: disable=[too-ma
             redisuri,
             f"{app.config['UPLOAD_FOLDER']}/job_errors")
         return redirect(url_for(
-            "samples.upload_status", job_id=the_job["jobid"]))
+            "species.populations.samples.upload_status",
+            species_id=species_id,
+            population_id=population_id,
+            job_id=the_job["jobid"]))
+
 
-@samplesbp.route("/upload/status/<uuid:job_id>", methods=["GET"])
-def upload_status(job_id: uuid.UUID):
+@samplesbp.route("<int:species_id>/populations/<int:population_id>/"
+                 "upload-samples/status/<uuid:job_id>",
+                 methods=["GET"])
+def upload_status(species_id: int, population_id: int, job_id: uuid.UUID):
     """Check on the status of a samples upload job."""
-    job = with_redis_connection(lambda rconn: jobs.job(
-        rconn, jobs.jobsnamespace(), job_id))
-    if job:
-        status = job["status"]
-        if status == "success":
-            return render_template("samples/upload-success.html", job=job)
-
-        if status == "error":
-            return redirect(url_for("samples.upload_failure", job_id=job_id))
-
-        error_filename = Path(jobs.error_filename(
-            job_id, f"{app.config['UPLOAD_FOLDER']}/job_errors"))
-        if error_filename.exists():
-            stat = os.stat(error_filename)
-            if stat.st_size > 0:
+    with database_connection(app.config["SQL_URI"]) as conn:
+        species = species_by_id(conn, species_id)
+        if not bool(species):
+            flash("You must provide a valid species.", "alert-danger")
+            return redirect(url_for("species.populations.samples.index"))
+
+        population = population_by_species_and_id(
+            conn, species_id, population_id)
+        if not bool(population):
+            flash("You must provide a valid population.", "alert-danger")
+            return redirect(url_for(
+                "species.populations.samples.select_population",
+                species_id=species_id))
+
+        job = with_redis_connection(lambda rconn: jobs.job(
+            rconn, jobs.jobsnamespace(), job_id))
+        if job:
+            status = job["status"]
+            if status == "success":
+                return render_template("samples/upload-success.html",
+                                       job=job,
+                                       species=species,
+                                       population=population,)
+
+            if status == "error":
                 return redirect(url_for(
-                    "samples.upload_failure", job_id=job_id))
+                    "species.populations.samples.upload_failure", job_id=job_id))
 
-        return render_template(
-            "samples/upload-progress.html",
-            job=job) # maybe also handle this?
+            error_filename = Path(jobs.error_filename(
+                job_id, f"{app.config['UPLOAD_FOLDER']}/job_errors"))
+            if error_filename.exists():
+                stat = os.stat(error_filename)
+                if stat.st_size > 0:
+                    return redirect(url_for(
+                        "samples.upload_failure", job_id=job_id))
 
-    return render_template("no_such_job.html", job_id=job_id), 400
+            return render_template("samples/upload-progress.html",
+                                   species=species,
+                                   population=population,
+                                   job=job) # maybe also handle this?
+
+        return render_template("no_such_job.html",
+                               job_id=job_id,
+                               species=species,
+                               population=population), 400
 
 @samplesbp.route("/upload/failure/<uuid:job_id>", methods=["GET"])
 def upload_failure(job_id: uuid.UUID):
diff --git a/uploader/templates/samples/list-samples.html b/uploader/templates/samples/list-samples.html
index a29dc1c..8f1bf16 100644
--- a/uploader/templates/samples/list-samples.html
+++ b/uploader/templates/samples/list-samples.html
@@ -97,7 +97,9 @@
   </p>
 
   <p>
-    <a href="#"
+    <a href="{{url_for('species.populations.samples.upload_samples',
+             species_id=species.SpeciesId,
+             population_id=population.Id)}}"
        title="Add samples for population '{{population.FullName}}' from species
               '{{species.FullName}}'."
        class="btn btn-primary">
@@ -111,4 +113,3 @@
 {%block sidebarcontents%}
 {{display_population_card(species, population)}}
 {%endblock%}
-
diff --git a/uploader/templates/samples/upload-failure.html b/uploader/templates/samples/upload-failure.html
new file mode 100644
index 0000000..458ab55
--- /dev/null
+++ b/uploader/templates/samples/upload-failure.html
@@ -0,0 +1,37 @@
+{%extends "base.html"%}
+{%from "cli-output.html" import cli_output%}
+{%from "populations/macro-display-population-card.html" import display_population_card%}
+
+{%block title%}Samples Upload Failure{%endblock%}
+
+{%block contents%}
+<div class="row">
+<h2 class="heading">{{job.job_name[0:50]}}&hellip;</h2>
+
+<p>There was a failure attempting to upload the samples.</p>
+
+<p>Here is some information to help with debugging the issue. Provide this
+  information to the developer/maintainer.</p>
+
+<h3>Debugging Information</h3>
+<ul>
+  <li><strong>job id</strong>: {{job.job_id}}</li>
+  <li><strong>status</strong>: {{job.status}}</li>
+  <li><strong>job type</strong>: {{job["job-type"]}}</li>
+</ul>
+</div>
+
+<div class="row">
+<h4>stdout</h4>
+{{cli_output(job, "stdout")}}
+</div>
+
+<div class="row">
+<h4>stderr</h4>
+{{cli_output(job, "stderr")}}
+</div>
+{%endblock%}
+
+{%block sidebarcontents%}
+{{display_population_card(species, population)}}
+{%endblock%}
diff --git a/uploader/templates/samples/upload-progress.html b/uploader/templates/samples/upload-progress.html
new file mode 100644
index 0000000..677d457
--- /dev/null
+++ b/uploader/templates/samples/upload-progress.html
@@ -0,0 +1,31 @@
+{%extends "samples/base.html"%}
+{%from "cli-output.html" import cli_output%}
+{%from "populations/macro-display-population-card.html" import display_population_card%}
+
+{%block extrameta%}
+<meta http-equiv="refresh" content="5">
+{%endblock%}
+
+{%block title%}Job Status{%endblock%}
+
+{%block contents%}
+<div class="row" style="overflow-x: clip;">
+<h2 class="heading">{{job.job_name[0:50]}}&hellip;</h2>
+
+<p>
+<strong>status</strong>:
+<span>{{job["status"]}} ({{job.get("message", "-")}})</span><br />
+</p>
+
+<p>saving to database...</p>
+</div>
+
+<div class="row">
+  {{cli_output(job, "stdout")}}
+</div>
+
+{%endblock%}
+
+{%block sidebarcontents%}
+{{display_population_card(species, population)}}
+{%endblock%}
diff --git a/uploader/templates/samples/upload-samples.html b/uploader/templates/samples/upload-samples.html
new file mode 100644
index 0000000..b101b2e
--- /dev/null
+++ b/uploader/templates/samples/upload-samples.html
@@ -0,0 +1,155 @@
+{%extends "samples/base.html"%}
+{%from "flash_messages.html" import flash_all_messages%}
+{%from "populations/macro-select-population.html" import select_population_form%}
+{%from "populations/macro-display-population-card.html" import display_population_card%}
+
+{%block title%}Samples &mdash; Upload Samples{%endblock%}
+
+{%block pagetitle%}Samples &mdash; Upload Samples{%endblock%}
+
+{%block lvl4_breadcrumbs%}
+<li {%if activelink=="uploade-samples"%}
+    class="breadcrumb-item active"
+    {%else%}
+    class="breadcrumb-item"
+    {%endif%}>
+  <a href="{{url_for('species.populations.samples.upload_samples',
+           species_id=species.SpeciesId,
+           population_id=population.Id)}}">List</a>
+</li>
+{%endblock%}
+
+{%block contents%}
+{{flash_all_messages()}}
+
+<div class="row">
+  <p>You can now upload a character-separated value (CSV) file that contains
+    details about your samples. The CSV file should have the following fields:
+    <dl>
+      <dt>Name</dt>
+      <dd>The primary name/identifier for the sample/individual.</dd>
+
+      <dt>Name2</dt>
+      <dd>A secondary name for the sample. This can simply be the same as
+        <strong>Name</strong> above. This field <strong>MUST</strong> contain a
+        value.</dd>
+
+      <dt>Symbol</dt>
+      <dd>A symbol for the sample. This can be a strain name, e.g. 'BXD60' for
+        species that have strains. This field can be left empty for species like
+        Humans that do not have strains..</dd>
+
+      <dt>Alias</dt>
+      <dd>An alias for the sample. Can be an empty field, or take on the same
+        value as that of the Symbol.</dd>
+    </dl>
+  </p>
+</div>
+
+<div class="row">
+  <form id="form-samples"
+        method="POST"
+        action="{{url_for('species.populations.samples.upload_samples',
+                species_id=species.SpeciesId,
+                population_id=population.InbredSetId)}}"
+        enctype="multipart/form-data">
+    <legend class="heading">upload samples</legend>
+
+    <input type="hidden" name="species_id" value="{{species.SpeciesId}}" />
+    <input type="hidden" name="population_id" value="{{population.Id}}" />
+
+    <div class="form-group">
+      <label for="file-samples" class="form-label">select file</label>
+      <input type="file" name="samples_file" id="file:samples"
+	     accept="text/csv, text/tab-separated-values"
+	     class="form-control" />
+    </div>
+
+    <div class="form-group">
+      <label for="select:separator" class="form-label">field separator</label>
+      <select id="select:separator"
+	      name="separator"
+	      required="required"
+	      class="form-control">
+        <option value="">Select separator for your file: (default is comma)</option>
+        <option value="&#x0009;">TAB</option>
+        <option value="&#x0020;">Space</option>
+        <option value=",">Comma</option>
+        <option value=";">Semicolon</option>
+        <option value="other">Other</option>
+      </select>
+      <input id="txt:separator"
+	     type="text"
+	     name="other_separator"
+	     class="form-control" />
+      <small class="form-text text-muted">
+        If you select '<strong>Other</strong>' for the field separator value,
+        enter the character that separates the fields in your CSV file in the form
+        field below.
+      </small>
+    </div>
+
+    <div class="form-group form-check">
+      <input id="chk:heading"
+	     type="checkbox"
+	     name="first_line_heading"
+	     class="form-check-input" />
+      <label for="chk:heading" class="form-check-label">
+        first line is a heading?</label>
+      <small class="form-text text-muted">
+        Select this if the first line in your file contains headings for the
+        columns.
+      </small>
+    </div>
+
+    <div class="form-group">
+      <label for="txt:delimiter" class="form-label">field delimiter</label>
+      <input id="txt:delimiter"
+	     type="text"
+	     name="field_delimiter"
+	     maxlength="1"
+	     class="form-control" />
+      <small class="form-text text-muted">
+        If there is a character delimiting the string texts within particular
+        fields in your CSV, provide the character here. This can be left blank if
+        no such delimiters exist in your file.
+      </small>
+    </div>
+
+    <button type="submit"
+	    class="btn btn-primary">upload samples file</button>
+  </form>
+</div>
+
+<div class="row">
+  <h3>Preview File Content</h3>
+
+  <table id="tbl:samples-preview" class="table">
+    <caption class="heading">preview content</caption>
+
+    <thead>
+      <tr>
+        <th>Name</th>
+        <th>Name2</th>
+        <th>Symbol</th>
+        <th>Alias</th>
+      </tr>
+    </thead>
+
+    <tbody>
+      <tr id="default-row">
+        <td colspan="4">
+	  Please make some selections in the form above to preview the data.</td>
+      </tr>
+    </tbody>
+  </table>
+</div>
+{%endblock%}
+
+{%block sidebarcontents%}
+{{display_population_card(species, population)}}
+{%endblock%}
+
+{%block javascript%}
+<script src="/static/js/upload_samples.js" type="text/javascript"></script>
+{%endblock%}
diff --git a/uploader/templates/samples/upload-success.html b/uploader/templates/samples/upload-success.html
new file mode 100644
index 0000000..881d466
--- /dev/null
+++ b/uploader/templates/samples/upload-success.html
@@ -0,0 +1,36 @@
+{%extends "samples/base.html"%}
+{%from "cli-output.html" import cli_output%}
+{%from "populations/macro-display-population-card.html" import display_population_card%}
+
+{%block title%}Job Status{%endblock%}
+
+{%block contents%}
+
+<div class="row" style="overflow-x: clip;">
+  <h2 class="heading">{{job.job_name[0:50]}}&hellip;</h2>
+
+  <p>
+    <strong>status</strong>:
+    <span>{{job["status"]}} ({{job.get("message", "-")}})</span><br />
+  </p>
+
+  <p>Successfully uploaded the samples.</p>
+  <p>
+    <a href="{{url_for('species.populations.samples.list_samples',
+             species_id=species.SpeciesId,
+             population_id=population.Id)}}"
+       title="View population samples">
+      View samples
+    </a>
+  </p>
+</div>
+
+<div class="row">
+  {{cli_output(job, "stdout")}}
+</div>
+
+{%endblock%}
+
+{%block sidebarcontents%}
+{{display_population_card(species, population)}}
+{%endblock%}