aboutsummaryrefslogtreecommitdiff
path: root/uploader/phenotypes
diff options
context:
space:
mode:
Diffstat (limited to 'uploader/phenotypes')
-rw-r--r--uploader/phenotypes/misc.py26
-rw-r--r--uploader/phenotypes/views.py102
2 files changed, 115 insertions, 13 deletions
diff --git a/uploader/phenotypes/misc.py b/uploader/phenotypes/misc.py
new file mode 100644
index 0000000..cbe3b7f
--- /dev/null
+++ b/uploader/phenotypes/misc.py
@@ -0,0 +1,26 @@
+"""Miscellaneous functions handling phenotypes and phenotypes data."""
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+def phenotypes_data_differences(
+ filedata: tuple[dict, ...], dbdata: tuple[dict, ...]
+) -> tuple[dict, ...]:
+ """Compute differences between file data and db data"""
+ diff = tuple()
+ for filerow, dbrow in zip(
+ sorted(filedata, key=lambda item: (item["phenotype_id"], item["xref_id"])),
+ sorted(dbdata, key=lambda item: (item["PhenotypeId"], item["xref_id"]))):
+ for samplename, value in filerow["data"].items():
+ if value != dbrow["data"].get(samplename, {}).get("value"):
+ diff = diff + ({
+ "PhenotypeId": filerow["phenotype_id"],
+ "xref_id": filerow["xref_id"],
+ "DataId": dbrow["DataId"],
+ "StrainId": dbrow["data"].get(samplename, {}).get("StrainId"),
+ "StrainName": samplename,
+ "value": value
+ },)
+
+ return diff
diff --git a/uploader/phenotypes/views.py b/uploader/phenotypes/views.py
index a50a8e7..a18c44d 100644
--- a/uploader/phenotypes/views.py
+++ b/uploader/phenotypes/views.py
@@ -3,6 +3,7 @@ import sys
import csv
import uuid
import json
+import logging
import datetime
import tempfile
from typing import Any
@@ -867,6 +868,17 @@ def process_phenotype_data_for_download(pheno: dict) -> dict:
}
+BULK_EDIT_COMMON_FIELDNAMES = [
+ "UniqueIdentifier",
+ "Post_publication_description",
+ "Pre_publication_abbreviation",
+ "Pre_publication_description",
+ "Original_description",
+ "Post_publication_abbreviation",
+ "PubMed_ID"
+]
+
+
@phenotypesbp.route(
"<int:species_id>/populations/<int:population_id>/phenotypes/datasets"
"/<int:dataset_id>/edit-download",
@@ -900,9 +912,9 @@ def edit_download_phenotype_data(# pylint: disable=[unused-argument]
filename = Path(tmpdir).joinpath("tempfile.tsv")
with open(filename, mode="w") as outfile:
outfile.write(
- "# **DO NOT** delete the 'UniqueIdentifier' field. It is used "
- "by the system to identify and edit the correct row(s) in the "
- "database.\n")
+ "# **DO NOT** delete the 'UniqueIdentifier' row. It is used "
+ "by the system to identify and edit the correct rows and "
+ "columns in the database.\n")
outfile.write(
"# The '…_description' fields are useful for you to figure out "
"what row you are working on. Changing any of this fields will "
@@ -914,15 +926,9 @@ def edit_download_phenotype_data(# pylint: disable=[unused-argument]
"comment line. This line, and all the lines above it, are "
"all comment lines. Comment lines will be ignored.\n")
writer = csv.DictWriter(outfile,
- fieldnames=[
- "UniqueIdentifier",
- "Post_publication_description",
- "Pre_publication_abbreviation",
- "Pre_publication_description",
- "Original_description",
- "Post_publication_abbreviation",
- "PubMed_ID"
- ] + samples_list,
+ fieldnames= (
+ BULK_EDIT_COMMON_FIELDNAMES +
+ samples_list),
dialect="excel-tab")
writer.writeheader()
writer.writerows(data)
@@ -958,4 +964,74 @@ def edit_upload_phenotype_data(# pylint: disable=[unused-argument]
dataset=dataset,
activelink="edit-phenotype")
- return "NOT Implemented: Would do actual edit."
+ edit_file = save_file(request.files["file-upload-bulk-edit-upload"],
+ Path(app.config["UPLOAD_FOLDER"]))
+
+ from gn_libs import jobs as gnlibs_jobs
+ from gn_libs import sqlite3
+ jobs_db = app.config["ASYNCHRONOUS_JOBS_SQLITE_DB"]
+ with sqlite3.connection(jobs_db) as conn:
+ job_id = uuid.uuid4()
+ job_cmd = [
+ sys.executable, "-u",
+ "-m", "scripts.phenotypes_bulk_edit",
+ app.config["SQL_URI"],
+ jobs_db,
+ str(job_id),
+ "--log-level",
+ logging.getLevelName(
+ app.logger.getEffectiveLevel()
+ ).lower()
+ ]
+ app.logger.debug("Phenotype-edit, bulk-upload command: %s", job_cmd)
+ _job = gnlibs_jobs.launch_job(
+ gnlibs_jobs.initialise_job(conn,
+ job_id,
+ job_cmd,
+ "phenotype-bulk-edit",
+ extra_meta = {
+ "edit-file": str(edit_file),
+ "species-id": species["SpeciesId"],
+ "population-id": population["Id"],
+ "dataset-id": dataset["Id"]
+ }),
+ jobs_db,
+ f"{app.config['UPLOAD_FOLDER']}/job_errors",
+ worker_manager="gn_libs.jobs.launcher")
+
+
+ return """
+ <p>The following steps need to be performed:
+ <ol>
+ <li>Check that all IDs exist</li>
+ <li>Check for mandatory values</li>
+ <li>Update descriptions in the database (where changed)</li>
+ <li>Update publications in the database (where changed):
+ <ol>
+ <li>If <strong>PubMed_ID</strong> exists in our database, simply update the
+ 'PublicationId' value in the 'PublishXRef' table.</li>
+ <li>If <strong>PubMed_ID</strong> does not exists in our database:
+ <ol>
+ <li>fetch the publication's details from PubMed using the new
+ <strong>PubMed_ID</strong> value.</li>
+ <li>create a new publication in our database using the fetched data</li>
+ <li>Update 'PublicationId' value in 'PublishXRef' with ID of newly created
+ publication</li>
+ </ol>
+ </ol>
+ </li>
+ <li>Update values in the database (where changed)</li>
+ </ol>
+ </p>
+
+ <p><strong>Note:</strong>
+ <ul>
+ <li>If a strain that did not have a value is given a value, then we need to
+ add a new cross-reference for the new DataId created.</li>
+ <li>If a strain that had a value has its value deleted and left blank, we
+ need to remove the cross-reference for the existing DataId — or, should we
+ enter the NULL value instead? Removing the cross-reference might be more
+ trouble than it is worth.</li>
+ </ul>
+ </p>
+ """