about summary refs log tree commit diff
path: root/uploader/phenotypes/views.py
diff options
context:
space:
mode:
Diffstat (limited to 'uploader/phenotypes/views.py')
-rw-r--r--uploader/phenotypes/views.py382
1 files changed, 185 insertions, 197 deletions
diff --git a/uploader/phenotypes/views.py b/uploader/phenotypes/views.py
index 430a3fb..556b5ff 100644
--- a/uploader/phenotypes/views.py
+++ b/uploader/phenotypes/views.py
@@ -1,46 +1,52 @@
-"""Views handling ('classical') phenotypes."""
+"""Views handling ('classical') phenotypes."""# pylint: disable=[too-many-lines]
 import sys
-import csv
 import uuid
 import json
-import datetime
-import tempfile
+import logging
 from typing import Any
 from pathlib import Path
 from zipfile import ZipFile
 from functools import wraps, reduce
-from logging import INFO, ERROR, DEBUG, FATAL, CRITICAL, WARNING
+from urllib.parse import urljoin, urlparse, ParseResult, urlunparse, urlencode
+
+import datetime
 
 from redis import Redis
 from pymonad.either import Left
 from requests.models import Response
 from MySQLdb.cursors import DictCursor
-from werkzeug.utils import secure_filename
+
+from gn_libs import sqlite3
+from gn_libs import jobs as gnlibs_jobs
+from gn_libs.jobs.jobs import JobNotFound
 from gn_libs.mysqldb import database_connection
+
 from flask import (flash,
                    request,
                    url_for,
                    jsonify,
                    redirect,
                    Blueprint,
-                   send_file,
                    current_app as app)
 
-# from r_qtl import r_qtl2 as rqtl2
 from r_qtl import r_qtl2_qc as rqc
 from r_qtl import exceptions as rqe
 
+
 from uploader import jobs
+from uploader import session
 from uploader.files import save_file#, fullpath
 from uploader.ui import make_template_renderer
 from uploader.oauth2.client import oauth2_post
+from uploader.oauth2.tokens import request_token
 from uploader.authorisation import require_login
+from uploader.oauth2 import client as oauth2client
 from uploader.route_utils import generic_select_population
 from uploader.datautils import safe_int, enumerate_sequence
 from uploader.species.models import all_species, species_by_id
 from uploader.monadic_requests import make_either_error_handler
+from uploader.publications.models import fetch_publication_by_id
 from uploader.request_checks import with_species, with_population
-from uploader.samples.models import samples_by_species_and_population
 from uploader.input_validation import (encode_errors,
                                        decode_errors,
                                        is_valid_representative_name)
@@ -51,7 +57,6 @@ from .models import (dataset_by_id,
                      save_new_dataset,
                      dataset_phenotypes,
                      datasets_by_population,
-                     phenotypes_data_by_ids,
                      phenotype_publication_data)
 
 phenotypesbp = Blueprint("phenotypes", __name__)
@@ -247,12 +252,7 @@ def view_phenotype(# pylint: disable=[unused-argument]
                 if (key in ("PubMed_ID", "Authors", "Title", "Journal")
                     and __non_empty__(val))
             },
-            privileges=(privileges
-                        ### For demo! Do not commit this part
-                            + ("group:resource:edit-resource",
-                               "group:resource:delete-resource",)
-                        ### END: For demo! Do not commit this part
-                            ),
+            privileges=privileges,
             activelink="view-phenotype")
 
     def __fail__(error):
@@ -359,10 +359,17 @@ def process_phenotypes_individual_files(error_uri):
     bundlepath = Path(app.config["UPLOAD_FOLDER"],
                       f"{str(uuid.uuid4()).replace('-', '')}.zip")
     with ZipFile(bundlepath,mode="w") as zfile:
-        for rqtlkey, formkey in (("phenocovar", "phenotype-descriptions"),
-                                 ("pheno", "phenotype-data"),
-                                 ("phenose", "phenotype-se"),
-                                 ("phenonum", "phenotype-n")):
+        for rqtlkey, formkey, _type in (
+                ("phenocovar", "phenotype-descriptions", "mandatory"),
+                ("pheno", "phenotype-data", "mandatory"),
+                ("phenose", "phenotype-se", "optional"),
+                ("phenonum", "phenotype-n", "optional")):
+            if _type == "optional" and not bool(form.get(formkey)):
+                continue # skip if an optional key does not exist.
+
+            cdata[f"{rqtlkey}_transposed"] = (
+                (form.get(f"{formkey}-transposed") or "off") == "on")
+
             if form.get("resumable-upload", False):
                 # Chunked upload of large files was used
                 filedata = json.loads(form[formkey])
@@ -371,7 +378,7 @@ def process_phenotypes_individual_files(error_uri):
                     arcname=filedata["original-name"])
                 cdata[rqtlkey] = cdata.get(rqtlkey, []) + [filedata["original-name"]]
             else:
-                # TODO: Check this path: fix any bugs.
+                # T0DO: Check this path: fix any bugs.
                 _sentfile = request.files[formkey]
                 if not bool(_sentfile):
                     flash(f"Expected file ('{formkey}') was not provided.",
@@ -385,6 +392,7 @@ def process_phenotypes_individual_files(error_uri):
                     arcname=filepath.name)
                 cdata[rqtlkey] = cdata.get(rqtlkey, []) + [filepath.name]
 
+
         zfile.writestr("control_data.json", data=json.dumps(cdata, indent=2))
 
     return bundlepath
@@ -409,10 +417,7 @@ def add_phenotypes(species: dict, population: dict, dataset: dict, **kwargs):# p
         dataset_id=dataset["Id"]))
     _redisuri = app.config["REDIS_URL"]
     _sqluri = app.config["SQL_URI"]
-    with (Redis.from_url(_redisuri, decode_responses=True) as rconn,
-          # database_connection(_sqluri) as conn,
-          # conn.cursor(cursorclass=DictCursor) as cursor
-          ):
+    with Redis.from_url(_redisuri, decode_responses=True) as rconn:
         if request.method == "GET":
             today = datetime.date.today()
             return render_template(
@@ -447,24 +452,20 @@ def add_phenotypes(species: dict, population: dict, dataset: dict, **kwargs):# p
                 [sys.executable, "-m", "scripts.rqtl2.phenotypes_qc", _sqluri,
                  _redisuri, _namespace, str(_jobid), str(species["SpeciesId"]),
                  str(population["Id"]),
-                 # str(dataset["Id"]),
              str(phenobundle),
                  "--loglevel",
-                 {
-                     INFO: "INFO",
-                     ERROR: "ERROR",
-                     DEBUG: "DEBUG",
-                     FATAL: "FATAL",
-                     CRITICAL: "CRITICAL",
-                     WARNING: "WARNING"
-                 }[app.logger.getEffectiveLevel()],
+                 logging.getLevelName(
+                     app.logger.getEffectiveLevel()
+                 ).lower(),
                  "--redisexpiry",
                  str(_ttl_seconds)], "phenotype_qc", _ttl_seconds,
                 {"job-metadata": json.dumps({
                     "speciesid": species["SpeciesId"],
                     "populationid": population["Id"],
                     "datasetid": dataset["Id"],
-                    "bundle": str(phenobundle.absolute())})}),
+                    "bundle": str(phenobundle.absolute()),
+                    **({"publicationid": request.form["publication-id"]}
+                       if request.form.get("publication-id") else {})})}),
             _redisuri,
             f"{app.config['UPLOAD_FOLDER']}/job_errors")
 
@@ -537,7 +538,8 @@ def review_job_data(
         **kwargs
 ):# pylint: disable=[unused-argument]
     """Review data one more time before entering it into the database."""
-    with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn:
+    with (Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn,
+          database_connection(app.config["SQL_URI"]) as conn):
         try:
             job = jobs.job(rconn, jobs.jobsnamespace(), str(job_id))
         except jobs.JobNotFound as _jnf:
@@ -585,6 +587,7 @@ def review_job_data(
             filetype: __summarise__(filetype, meta)
             for filetype,meta in metadata.items()
         }
+        _job_metadata = json.loads(job["job-metadata"])
         return render_template("phenotypes/review-job-data.html",
                                species=species,
                                population=population,
@@ -592,9 +595,103 @@ def review_job_data(
                                job_id=job_id,
                                job=job,
                                summary=summary,
+                               publication=(
+                                   fetch_publication_by_id(
+                                       conn, int(_job_metadata["publicationid"]))
+                                   if _job_metadata.get("publicationid")
+                                   else None),
                                activelink="add-phenotypes")
 
 
+def load_phenotypes_success_handler(job):
+    """Handle loading new phenotypes into the database successfully."""
+    return redirect(url_for(
+        "species.populations.phenotypes.load_data_success",
+        species_id=job["metadata"]["species_id"],
+        population_id=job["metadata"]["population_id"],
+        dataset_id=job["metadata"]["dataset_id"],
+        job_id=job["job_id"]))
+
+
+@phenotypesbp.route(
+    "<int:species_id>/populations/<int:population_id>/phenotypes/datasets"
+    "/<int:dataset_id>/load-data-to-database",
+    methods=["POST"])
+@require_login
+@with_dataset(
+    species_redirect_uri="species.populations.phenotypes.index",
+    population_redirect_uri="species.populations.phenotypes.select_population",
+    redirect_uri="species.populations.phenotypes.list_datasets")
+def load_data_to_database(
+        species: dict,
+        population: dict,
+        dataset: dict,
+        **kwargs
+):# pylint: disable=[unused-argument]
+    """Load the data from the given QC job into the database."""
+    _jobs_db = app.config["ASYNCHRONOUS_JOBS_SQLITE_DB"]
+    with (Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn,
+          sqlite3.connection(_jobs_db) as conn):
+        # T0DO: Maybe break the connection between the jobs here, pass:
+        # - the bundle name (rebuild the full path here.)
+        # - publication details, where separate
+        # - details about the files: e.g. total lines, etc
+        qc_job = jobs.job(rconn, jobs.jobsnamespace(), request.form["data-qc-job-id"])
+        _meta = json.loads(qc_job["job-metadata"])
+        _load_job_id = uuid.uuid4()
+        _loglevel = logging.getLevelName(app.logger.getEffectiveLevel()).lower()
+        command = [
+            sys.executable,
+            "-u",
+            "-m",
+            "scripts.load_phenotypes_to_db",
+            app.config["SQL_URI"],
+            _jobs_db,
+            str(_load_job_id),
+            "--log-level",
+            _loglevel
+        ]
+
+        def __handle_error__(resp):
+            return render_template("http-error.html", *resp.json())
+
+        def __handle_success__(load_job):
+            app.logger.debug("The phenotypes loading job: %s", load_job)
+            return redirect(url_for(
+                "background-jobs.job_status", job_id=load_job["job_id"]))
+
+
+        return request_token(
+            token_uri=urljoin(oauth2client.authserver_uri(), "auth/token"),
+            user_id=session.user_details()["user_id"]
+        ).then(
+            lambda token: gnlibs_jobs.initialise_job(
+                conn,
+                _load_job_id,
+                command,
+                "load-new-phenotypes-data",
+                extra_meta={
+                    "species_id": species["SpeciesId"],
+                    "population_id": population["Id"],
+                    "dataset_id": dataset["Id"],
+                    "bundle_file": _meta["bundle"],
+                    "publication_id": _meta["publicationid"],
+                    "authserver": oauth2client.authserver_uri(),
+                    "token": token["access_token"],
+                    "success_handler": (
+                        "uploader.phenotypes.views"
+                        ".load_phenotypes_success_handler")
+                })
+        ).then(
+            lambda job: gnlibs_jobs.launch_job(
+                job,
+                _jobs_db,
+                Path(f"{app.config['UPLOAD_FOLDER']}/job_errors"),
+                worker_manager="gn_libs.jobs.launcher",
+                loglevel=_loglevel)
+        ).either(__handle_error__, __handle_success__)
+
+
 def update_phenotype_metadata(conn, metadata: dict):
     """Update a phenotype's basic metadata values."""
     with conn.cursor(cursorclass=DictCursor) as cursor:
@@ -747,12 +844,7 @@ def edit_phenotype_data(# pylint: disable=[unused-argument]
     def __render__(**kwargs):
         processed_kwargs = {
             **kwargs,
-            "privileges": (kwargs.get("privileges", tuple())
-                           ### For demo! Do not commit this part
-                            + ("group:resource:edit-resource",
-                               "group:resource:delete-resource",)
-                           ### END: For demo! Do not commit this part
-                            )
+            "privileges": kwargs.get("privileges", tuple())
         }
         return render_template(
             "phenotypes/edit-phenotype.html",
@@ -852,170 +944,66 @@ def edit_phenotype_data(# pylint: disable=[unused-argument]
             xref_id=xref_id))
 
 
-def process_phenotype_data_for_download(pheno: dict) -> dict:
-    """Sanitise data for download."""
-    return {
-        "UniqueIdentifier": f"phId:{pheno['Id']}::xrId:{pheno['xref_id']}",
-        **{
-            key: val for key, val in pheno.items()
-            if key not in ("Id", "xref_id", "data", "Units")
-        },
-        **{
-            data_item["StrainName"]: data_item["value"]
-            for data_item in pheno.get("data", {}).values()
-        }
-    }
-
-
-@phenotypesbp.route(
-    "<int:species_id>/populations/<int:population_id>/phenotypes/datasets"
-    "/<int:dataset_id>/edit-download",
-    methods=["POST"])
-@require_login
-@with_dataset(
-    species_redirect_uri="species.populations.phenotypes.index",
-    population_redirect_uri="species.populations.phenotypes.select_population",
-    redirect_uri="species.populations.phenotypes.list_datasets")
-def edit_download_phenotype_data(# pylint: disable=[unused-argument]
-        species: dict,
-        population: dict,
-        dataset: dict,
-        **kwargs
-):
-    formdata = request.json
-    with database_connection(app.config["SQL_URI"]) as conn:
-        samples_list = [
-            sample["Name"] for sample in samples_by_species_and_population(
-                conn, species["SpeciesId"], population["Id"])]
-        data = (
-            process_phenotype_data_for_download(pheno)
-            for pheno in phenotypes_data_by_ids(conn, tuple({
-                    "population_id": population["Id"],
-                    "phenoid": row["phenotype_id"],
-                    "xref_id": row["xref_id"]
-            } for row in formdata)))
-
-        with (tempfile.TemporaryDirectory(
-                prefix=app.config["TEMPORARY_DIRECTORY"]) as tmpdir):
-            filename = Path(tmpdir).joinpath("tempfile.tsv")
-            with open(filename, mode="w") as outfile:
-                outfile.write(
-                    "# **DO NOT** delete the 'UniqueIdentifier' field. It is used "
-                    "by the system to identify and edit the correct row(s) in the "
-                    "database.\n")
-                outfile.write(
-                    "# The '…_description' fields are useful for you to figure out "
-                    "what row you are working on. Changing any of this fields will "
-                    "also update the database, so do be careful.\n")
-                outfile.write(
-                    "# Leave a field empty to delete the value in the database.\n")
-                outfile.write(
-                    "# Any line beginning with a '#' character is considered a "
-                    "comment line. This line, and all the lines above it, are "
-                    "all comment lines. Comment lines will be ignored.\n")
-                writer = csv.DictWriter(outfile,
-                                        fieldnames=[
-                                            "UniqueIdentifier",
-                                            "Post_publication_description",
-                                            "Pre_publication_abbreviation",
-                                            "Pre_publication_description",
-                                            "Original_description",
-                                            "Post_publication_abbreviation",
-                                            "PubMed_ID"
-                                        ] + samples_list,
-                                        dialect="excel-tab")
-                writer.writeheader()
-                writer.writerows(data)
-                outfile.flush()
-
-            return send_file(
-                filename,
-                mimetype="text/csv",
-                as_attachment=True,
-                download_name=secure_filename(f"{dataset['Name']}_data"))
-
-
 @phenotypesbp.route(
     "<int:species_id>/populations/<int:population_id>/phenotypes/datasets"
-    "/<int:dataset_id>/edit-upload",
-    methods=["GET", "POST"])
+    "/<int:dataset_id>/load-data-success/<uuid:job_id>",
+    methods=["GET"])
 @require_login
 @with_dataset(
     species_redirect_uri="species.populations.phenotypes.index",
     population_redirect_uri="species.populations.phenotypes.select_population",
     redirect_uri="species.populations.phenotypes.list_datasets")
-def edit_upload_phenotype_data(# pylint: disable=[unused-argument]
+def load_data_success(
         species: dict,
         population: dict,
         dataset: dict,
+        job_id: uuid.UUID,
         **kwargs
-):
-    if request.method == "GET":
-        return render_template(
-            "phenotypes/bulk-edit-upload.html",
-            species=species,
-            population=population,
-            dataset=dataset,
-            activelink="edit-phenotype")
-
-    edit_file = save_file(request.files["file-upload-bulk-edit-upload"],
-                          Path(app.config["UPLOAD_FOLDER"]))
-
-    from gn_libs import jobs as gnlibs_jobs
-    from gn_libs import sqlite3
-    jobs_db = app.config["ASYNCHRONOUS_JOBS_SQLITE_DB"]
-    with sqlite3.connection(jobs_db) as conn:
-        job_id = uuid.uuid4()
-        _job = gnlibs_jobs.launch_job(
-            gnlibs_jobs.initialise_job(conn,
-                                       job_id,
-                                       [
-                                           sys.executable, "-u",
-                                           "-m", "scripts.phenotypes_bulk_edit",
-                                           jobs_db,
-                                           str(job_id)
-                                       ],
-                                       "phenotype-bulk-edit",
-                                       extra_meta = {
-                                           "edit-file": str(edit_file)
+):# pylint: disable=[unused-argument]
+    """Display success page if loading data to database was successful."""
+    with (database_connection(app.config["SQL_URI"]) as conn,
+          sqlite3.connection(app.config["ASYNCHRONOUS_JOBS_SQLITE_DB"])
+          as jobsconn):
+        try:
+            gn2_uri = urlparse(app.config["GN2_SERVER_URL"])
+            job = gnlibs_jobs.job(jobsconn, job_id, fulldetails=True)
+            app.logger.debug("THE JOB: %s", job)
+            _xref_ids = tuple(
+                str(item) for item
+                in json.loads(job["metadata"].get("xref_ids", "[]")))
+            _publication = fetch_publication_by_id(
+                conn, int(job["metadata"].get("publication_id", "0")))
+            _search_terms = (item for item in
+                             (str(_publication["PubMed_ID"] or ""),
+                              _publication["Authors"],
+                              (_publication["Title"] or ""))
+                             if item != "")
+            return render_template("phenotypes/load-phenotypes-success.html",
+                                   species=species,
+                                   population=population,
+                                   dataset=dataset,
+                                   job=job,
+                                   search_page_uri=urlunparse(ParseResult(
+                                       scheme=gn2_uri.scheme,
+                                       netloc=gn2_uri.netloc,
+                                       path="/search",
+                                       params="",
+                                       query=urlencode({
+                                           "species": species["Name"],
+                                           "group": population["Name"],
+                                           "type": "Phenotypes",
+                                           "dataset": dataset["Name"],
+                                           "search_terms_or": (
+                                               # Very long URLs will cause
+                                               # errors.
+                                               " ".join(_xref_ids)
+                                               if len(_xref_ids) <= 100
+                                               else ""),
+                                           "search_terms_and": " ".join(
+                                               _search_terms).strip(),
+                                           "accession_id": "None",
+                                           "FormID": "searchResult"
                                        }),
-            jobs_db,
-            f"{app.config['UPLOAD_FOLDER']}/job_errors",
-            worker_manager="gn_libs.jobs.launcher")
-
-
-    return """
-    <p>The following steps need to be performed:
-    <ol>
-    <li>Check that all IDs exist</li>
-    <li>Check for mandatory values</li>
-    <li>Update descriptions in the database (where changed)</li>
-    <li>Update publications in the database (where changed):
-    <ol>
-    <li>If <strong>PubMed_ID</strong> exists in our database, simply update the
-    'PublicationId' value in the 'PublishXRef' table.</li>
-    <li>If <strong>PubMed_ID</strong> does not exists in our database:
-    <ol>
-    <li>fetch the publication's details from PubMed using the new
-    <strong>PubMed_ID</strong> value.</li>
-    <li>create a new publication in our database using the fetched data</li>
-    <li>Update 'PublicationId' value in 'PublishXRef' with ID of newly created
-    publication</li>
-    </ol>
-    </ol>
-    </li>
-    <li>Update values in the database (where changed)</li>
-    </ol>
-    </p>
-
-    <p><strong>Note:</strong>
-    <ul>
-    <li>If a strain that did not have a value is given a value, then we need to
-    add a new cross-reference for the new DataId created.</li>
-    <li>If a strain that had a value has its value deleted and left blank, we
-    need to remove the cross-reference for the existing DataId — or, should we
-    enter the NULL value instead? Removing the cross-reference might be more
-    trouble than it is worth.</li>
-    </ul>
-    </p>
-    """
+                                       fragment="")))
+        except JobNotFound as _jnf:
+            return render_template("jobs/job-not-found.html", job_id=job_id)