2 files changed, 101 insertions, 11 deletions
diff --git a/uploader/phenotypes/models.py b/uploader/phenotypes/models.py
index 3946a0f..3d656d2 100644
--- a/uploader/phenotypes/models.py
+++ b/uploader/phenotypes/models.py
@@ -96,20 +96,34 @@ def dataset_phenotypes(# pylint: disable=[too-many-arguments, too-many-positiona
         xref_ids: tuple[int, ...] = tuple()
 ) -> tuple[dict, ...]:
     """Fetch the actual phenotypes."""
-    _query = (
-        "SELECT pheno.*, pxr.Id AS xref_id, pxr.InbredSetId, ist.InbredSetCode "
+    _narrow_by_ids = (
+            f" AND pxr.Id IN ({', '.join(['%s'] * len(xref_ids))})"
+            if len(xref_ids) > 0 else "")
+    _narrow_by_limit = (
+        f" LIMIT {limit} OFFSET {offset}" if bool(limit) else "")
+    _pub_query = (
+        "SELECT pub.* "
+        "FROM PublishXRef AS pxr "
+        "INNER JOIN  Publication AS pub ON pxr.PublicationId=pub.Id "
+        "WHERE pxr.InbredSetId=%s") + _narrow_by_ids
+    _pheno_query = ((
+        "SELECT pheno.*, pxr.Id AS xref_id, pxr.InbredSetId, pxr.PublicationId, "
+        "ist.InbredSetCode "
         "FROM Phenotype AS pheno "
         "INNER JOIN PublishXRef AS pxr ON pheno.Id=pxr.PhenotypeId "
         "INNER JOIN PublishFreeze AS pf ON pxr.InbredSetId=pf.InbredSetId "
         "INNER JOIN InbredSet AS ist ON pf.InbredSetId=ist.Id "
-        "WHERE pxr.InbredSetId=%s AND pf.Id=%s") + (
-            f" AND pxr.Id IN ({', '.join(['%s'] * len(xref_ids))})"
-            if len(xref_ids) > 0 else "") + (
-            f" LIMIT {limit} OFFSET {offset}" if bool(limit) else "")
+        "WHERE pxr.InbredSetId=%s AND pf.Id=%s") +
+                    _narrow_by_ids +
+                    _narrow_by_limit)
     with conn.cursor(cursorclass=DictCursor) as cursor:
-        cursor.execute(_query, (population_id, dataset_id) + xref_ids)
+        cursor.execute(_pub_query, (population_id,) + xref_ids)
         debug_query(cursor, logger)
-        return tuple(dict(row) for row in cursor.fetchall())
+        _pubs = {row["Id"]: dict(row) for row in cursor.fetchall()}
+        cursor.execute(_pheno_query, (population_id, dataset_id) + xref_ids)
+        debug_query(cursor, logger)
+        return tuple({**dict(row), "publication": _pubs[row["PublicationId"]]}
+                     for row in cursor.fetchall())
 
 
 def __phenotype_se__(cursor: BaseCursor, xref_id, dataids_and_strainids):
diff --git a/uploader/phenotypes/views.py b/uploader/phenotypes/views.py
index ab3b3d2..c03f3f5 100644
--- a/uploader/phenotypes/views.py
+++ b/uploader/phenotypes/views.py
@@ -1,4 +1,6 @@
 """Views handling ('classical') phenotypes."""# pylint: disable=[too-many-lines]
+import io
+import csv
 import sys
 import uuid
 import json
@@ -21,12 +23,14 @@ from gn_libs import jobs as gnlibs_jobs
 from gn_libs.jobs.jobs import JobNotFound
 from gn_libs.mysqldb import database_connection
 
+from werkzeug.datastructures import Headers
 from flask import (flash,
                    request,
                    jsonify,
                    redirect,
                    Blueprint,
-                   current_app as app)
+                   current_app as app,
+                   Response as FlaskResponse)
 
 from r_qtl import r_qtl2_qc as rqc
 from r_qtl import exceptions as rqe
@@ -313,6 +317,11 @@ def create_dataset(species: dict, population: dict, **kwargs):# pylint: disable=
         dataset_shortname = (
             form["dataset-shortname"] or form["dataset-name"]).strip()
         _pheno_dataset = save_new_dataset(
+            # It's not necessary to update the authorisation server to register
+            # new phenotype resource here, since each phenotype trait can, in
+            # theory, have its own access control allowing/disallowing access to
+            # it. In practice, however, we tend to gather multiple traits into a
+            # single resource for access control.
             cursor,
             population["Id"],
             form["dataset-name"].strip(),
@@ -522,6 +531,65 @@ def job_status(
 
 @phenotypesbp.route(
     "<int:species_id>/populations/<int:population_id>/phenotypes/datasets"
+    "/<int:dataset_id>/job/<uuid:job_id>/download-errors",
+    methods=["GET"])
+@require_login
+@with_dataset(
+    species_redirect_uri="species.populations.phenotypes.index",
+    population_redirect_uri="species.populations.phenotypes.select_population",
+    redirect_uri="species.populations.phenotypes.list_datasets")
+def download_errors(
+        species: dict,
+        population: dict,
+        dataset: dict,
+        job_id: uuid.UUID,
+        **kwargs):# pylint: disable=[unused-argument]
+    """Download the list of errors as a CSV file."""
+    with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn:
+        try:
+            job = jobs.job(rconn, jobs.jobsnamespace(), str(job_id))
+            _prefix_ = jobs.jobsnamespace()
+            _jobid_ = job['jobid']
+            def __generate_chunks__():
+                _errors_ = (
+                    json.loads(error)
+                    for key in rconn.keys(
+                            f"{_prefix_}:{str(_jobid_)}:*:errors:*")
+                    for error in rconn.lrange(key, 0, -1))
+                _chunk_no_ = 0
+                _all_errors_printed_ = False
+                while not _all_errors_printed_:
+                    _chunk_ = []
+                    try:
+                        for _ in range(0, 1000):
+                            _chunk_.append(next(_errors_))
+                    except StopIteration:
+                        _all_errors_printed_ = True
+                        if len(_chunk_) <= 0:
+                            raise
+
+                    _out_ = io.StringIO()
+                    _writer_ = csv.DictWriter(_out_, fieldnames=tuple(_chunk_[0].keys()))
+                    if _chunk_no_ == 0:
+                        _writer_.writeheader()
+                    _writer_.writerows(_chunk_)
+                    _chunk_no_ += 1
+                    yield _out_.getvalue()
+                    if _all_errors_printed_:
+                        return
+
+            headers = Headers()
+            headers.set("Content-Disposition",
+                        "attachment",
+                        filename=f"{job['job-type']}_{job['jobid']}.csv")
+            return FlaskResponse(
+                __generate_chunks__(), mimetype="text/csv", headers=headers)
+        except jobs.JobNotFound as _jnf:
+            return render_template("jobs/job-not-found.html", job_id=job_id)
+
+
+@phenotypesbp.route(
+    "<int:species_id>/populations/<int:population_id>/phenotypes/datasets"
     "/<int:dataset_id>/job/<uuid:job_id>/review",
     methods=["GET"])
 @require_login
@@ -599,6 +667,8 @@ def review_job_data(
                                        conn, int(_job_metadata["publicationid"]))
                                    if _job_metadata.get("publicationid")
                                    else None),
+                               user=session.user_details(),
+                               timestamp=datetime.datetime.now().isoformat(),
                                activelink="add-phenotypes")
 
 
@@ -678,9 +748,15 @@ def load_data_to_database(
                     "publication_id": _meta["publicationid"],
                     "authserver": oauth2client.authserver_uri(),
                     "token": token["access_token"],
+                    "dataname": request.form["data_name"].strip(),
                     "success_handler": (
                         "uploader.phenotypes.views"
-                        ".load_phenotypes_success_handler")
+                        ".load_phenotypes_success_handler"),
+                    **{
+                        key: request.form[key]
+                        for key in ("data_description",)
+                        if key in request.form.keys()
+                    }
                 },
                 external_id=session.logged_in_user_id())
         ).then(
@@ -1262,7 +1338,7 @@ def delete_phenotypes(# pylint: disable=[unused-argument, too-many-locals]
                         loglevel=_loglevel)
                 ).either(__handle_error__, proceed_to_job_status)
             case _:
-                _phenos = tuple()
+                _phenos: tuple[dict, ...] = tuple()
                 if len(xref_ids) > 0:
                     _phenos = dataset_phenotypes(
                         conn, population["Id"], dataset["Id"], xref_ids=xref_ids)