Enable downloading of dataset's phenotype's data.

author: Frederick Muriuki Muriithi 2025-03-18 13:11:41 -0500
committer: Frederick Muriuki Muriithi 2025-03-18 13:11:41 -0500
commit: a675d462d2771f9e6c497c2385c985c2a6bb8806 (patch)
tree: cbeb1e7a1f341840565b472c076e051e870d46df
parent: a4b32dee1b1b638ab60c56df27ace221d6a77c3a (diff)
download: gn-uploader-a675d462d2771f9e6c497c2385c985c2a6bb8806.tar.gz
4 files changed, 158 insertions, 1 deletions
diff --git a/uploader/default_settings.py b/uploader/default_settings.py
index f07f89e..1136ff8 100644
--- a/uploader/default_settings.py
+++ b/uploader/default_settings.py
@@ -7,6 +7,7 @@ import hashlib
 LOG_LEVEL = "WARNING"
 SECRET_KEY = b"<Please! Please! Please! Change This!>"
 UPLOAD_FOLDER = "/tmp/qc_app_files"
+TEMPORARY_DIRECTORY = "/tmp/gn-uploader-tmpdir"
 REDIS_URL = "redis://"
 JOBS_TTL_SECONDS = 1209600 # 14 days
 GNQC_REDIS_PREFIX="gn-uploader"
diff --git a/uploader/phenotypes/models.py b/uploader/phenotypes/models.py
index 98278e5..ce7720c 100644
--- a/uploader/phenotypes/models.py
+++ b/uploader/phenotypes/models.py
@@ -254,3 +254,29 @@ def save_new_dataset(cursor: Cursor,
         params)
     debug_query(cursor, app.logger)
     return {**params, "Id": cursor.lastrowid}
+
+
+def phenotypes_data_by_ids(
+        conn: mdb.Connection,
+        inbred_pheno_xref: dict[str, int]
+) -> tuple[dict, ...]:
+    """Fetch all phenotype data, filtered by the `inbred_pheno_xref` mapping."""
+    _paramstr = ",".join(["(%s, %s, %s)"] * len(inbred_pheno_xref))
+    _query = ("SELECT pheno.*, pxr.*, pd.*, str.*, iset.InbredSetCode "
+              "FROM Phenotype AS pheno "
+              "INNER JOIN PublishXRef AS pxr ON pheno.Id=pxr.PhenotypeId "
+              "INNER JOIN PublishData AS pd ON pxr.DataId=pd.Id "
+              "INNER JOIN Strain AS str ON pd.StrainId=str.Id "
+              "INNER JOIN StrainXRef AS sxr ON str.Id=sxr.StrainId "
+              "INNER JOIN PublishFreeze AS pf ON sxr.InbredSetId=pf.InbredSetId "
+              "INNER JOIN InbredSet AS iset ON pf.InbredSetId=iset.InbredSetId "
+              f"WHERE (pxr.InbredSetId, pheno.Id, pxr.Id) IN ({_paramstr}) "
+              "ORDER BY pheno.Id")
+    with conn.cursor(cursorclass=DictCursor) as cursor:
+        cursor.execute(_query, tuple(item for row in inbred_pheno_xref
+                                     for item in (row["population_id"],
+                                                  row["phenoid"],
+                                                  row["xref_id"])))
+        debug_query(cursor, app.logger)
+        return tuple(
+            reduce(__organise_by_phenotype__, cursor.fetchall(), {}).values())
diff --git a/uploader/phenotypes/views.py b/uploader/phenotypes/views.py
index dc2df8f..407493c 100644
--- a/uploader/phenotypes/views.py
+++ b/uploader/phenotypes/views.py
@@ -1,8 +1,10 @@
 """Views handling ('classical') phenotypes."""
 import sys
+import csv
 import uuid
 import json
 import datetime
+import tempfile
 from typing import Any
 from pathlib import Path
 from zipfile import ZipFile
@@ -13,6 +15,7 @@ from redis import Redis
 from pymonad.either import Left
 from requests.models import Response
 from MySQLdb.cursors import DictCursor
+from werkzeug.utils import secure_filename
 from gn_libs.mysqldb import database_connection
 from flask import (flash,
                    request,
@@ -20,6 +23,7 @@ from flask import (flash,
                    jsonify,
                    redirect,
                    Blueprint,
+                   send_file,
                    current_app as app)
 
 # from r_qtl import r_qtl2 as rqtl2
@@ -36,6 +40,7 @@ from uploader.datautils import safe_int, enumerate_sequence
 from uploader.species.models import all_species, species_by_id
 from uploader.monadic_requests import make_either_error_handler
 from uploader.request_checks import with_species, with_population
+from uploader.samples.models import samples_by_species_and_population
 from uploader.input_validation import (encode_errors,
                                        decode_errors,
                                        is_valid_representative_name)
@@ -46,6 +51,7 @@ from .models import (dataset_by_id,
                      save_new_dataset,
                      dataset_phenotypes,
                      datasets_by_population,
+                     phenotypes_data_by_ids,
                      phenotype_publication_data)
 
 phenotypesbp = Blueprint("phenotypes", __name__)
@@ -844,3 +850,84 @@ def edit_phenotype_data(# pylint: disable=[unused-argument]
             population_id=population["Id"],
             dataset_id=dataset["Id"],
             xref_id=xref_id))
+
+
+def process_phenotype_data_for_download(pheno: dict) -> dict:
+    """Sanitise data for download."""
+    return {
+        "UniqueIdentifier": f"phId:{pheno['Id']}::xrId:{pheno['xref_id']}",
+        **{
+            key: val for key, val in pheno.items()
+            if key not in ("Id", "xref_id", "data", "Units")
+        },
+        **{
+            data_item["StrainName"]: data_item["value"]
+            for data_item in pheno.get("data", {}).values()
+        }
+    }
+
+@phenotypesbp.route(
+    "<int:species_id>/populations/<int:population_id>/phenotypes/datasets"
+    "/<int:dataset_id>/download",
+    methods=["POST"])
+@require_login
+@with_dataset(
+    species_redirect_uri="species.populations.phenotypes.index",
+    population_redirect_uri="species.populations.phenotypes.select_population",
+    redirect_uri="species.populations.phenotypes.list_datasets")
+def download_phenotype_data(# pylint: disable=[unused-argument]
+        species: dict,
+        population: dict,
+        dataset: dict,
+        **kwargs
+):
+    formdata = request.json
+    with database_connection(app.config["SQL_URI"]) as conn:
+        samples_list = [
+            sample["Name"] for sample in samples_by_species_and_population(
+                conn, species["SpeciesId"], population["Id"])]
+        data = (
+            process_phenotype_data_for_download(pheno)
+            for pheno in phenotypes_data_by_ids(conn, tuple({
+                    "population_id": population["Id"],
+                    "phenoid": row["phenotype_id"],
+                    "xref_id": row["xref_id"]
+            } for row in formdata)))
+
+        with (tempfile.TemporaryDirectory(
+                prefix=app.config["TEMPORARY_DIRECTORY"]) as tmpdir):
+            filename = Path(tmpdir).joinpath("tempfile.tsv")
+            with open(filename, mode="w") as outfile:
+                outfile.write(
+                    "# **DO NOT** delete the 'UniqueIdentifier' field. It is used "
+                    "by the system to identify and edit the correct row(s) in the "
+                    "database.\n")
+                outfile.write(
+                    "# The '…_description' fields are useful for you to figure out "
+                    "what row you are working on. Changing any of this fields will "
+                    "also update the database, so do be careful.\n")
+                outfile.write(
+                    "# Leave a field empty to delete the value in the database.\n")
+                outfile.write(
+                    "# Any line beginning with a '#' character is considered a "
+                    "comment line. This line, and all the lines above it, are "
+                    "all comment lines. Comment lines will be ignored.\n")
+                writer = csv.DictWriter(outfile,
+                                        fieldnames=[
+                                            "UniqueIdentifier",
+                                            "Post_publication_description",
+                                            "Pre_publication_abbreviation",
+                                            "Pre_publication_description",
+                                            "Original_description",
+                                            "Post_publication_abbreviation"
+                                        ] + samples_list,
+                                        dialect="excel-tab")
+                writer.writeheader()
+                writer.writerows(data)
+                outfile.flush()
+
+            return send_file(
+                filename,
+                mimetype="text/csv",
+                as_attachment=True,
+                download_name=secure_filename(f"{dataset['Name']}_data"))
diff --git a/uploader/templates/phenotypes/view-dataset.html b/uploader/templates/phenotypes/view-dataset.html
index 10fd428..fa1044b 100644
--- a/uploader/templates/phenotypes/view-dataset.html
+++ b/uploader/templates/phenotypes/view-dataset.html
@@ -89,7 +89,12 @@
 {%block javascript%}
 <script type="text/javascript">
   $(function() {
+      var species_id = {{species.SpeciesId}};
+      var population_id = {{population.Id}};
+      var dataset_id = {{dataset.Id}};
+      var dataset_name = "{{dataset.Name}}";
       var data = {{phenotypes | tojson}};
+
       var dtPhenotypesList = buildDataTable(
           "#tbl-phenotypes-list",
           data,
@@ -143,7 +148,45 @@
                           {
                               text: "Bulk Edit (Download Data)",
                               action: (event, dt, node, config) => {
-                                  alert("Not implemented yet!");
+                                  var phenoids = [];
+                                  var selected = dt.rows({selected: true, page: "all"}).data();
+                                  for(var idx = 0; idx < selected.length; idx++) {
+                                      phenoids.push({
+                                          phenotype_id: selected[idx].Id,
+                                          xref_id: selected[idx].xref_id
+                                      });
+                                  }
+                                  if(phenoids.length == 0) {
+                                      alert("No record selected. Nothing to do!");
+                                      return false;
+                                  }
+                                  $.ajax(
+                                      (`/species/${species_id}/populations/` +
+                                       `${population_id}/phenotypes/datasets/` +
+                                       `${dataset_id}/download`),
+                                      {
+                                          method: "POST",
+                                          data: JSON.stringify(phenoids),
+                                          xhrFields: {
+                                              responseType: "blob"
+                                          },
+                                          success: (data, textStatus, jqXHR) => {
+                                              var link = document.createElement("a");
+                                              uri = window.URL.createObjectURL(data);
+                                              link.href = uri;
+                                              link.download = `${dataset_name}_data.tsv`;
+
+                                              document.body.appendChild(link);
+                                              link.click();
+                                              window.URL.revokeObjectURL(uri);
+                                              link.remove();
+                                          },
+                                          error: (jQXHR, textStatus, errorThrown) => {
+                                              console.log("Experienced an error: ", textStatus);
+                                              console.log("The ERROR: ", errorThrown);
+                                          },
+                                          contentType: "application/json"
+                                      });
                               },
                               className: "btn btn-info",
                               titleAttr: "Click to download data for editing."
author	Frederick Muriuki Muriithi	2025-03-18 13:11:41 -0500
committer	Frederick Muriuki Muriithi	2025-03-18 13:11:41 -0500
commit	a675d462d2771f9e6c497c2385c985c2a6bb8806 (patch)
tree	cbeb1e7a1f341840565b472c076e051e870d46df
parent	a4b32dee1b1b638ab60c56df27ace221d6a77c3a (diff)
download	gn-uploader-a675d462d2771f9e6c497c2385c985c2a6bb8806.tar.gz