diff options
Diffstat (limited to 'uploader/phenotypes')
-rw-r--r-- | uploader/phenotypes/misc.py | 26 | ||||
-rw-r--r-- | uploader/phenotypes/views.py | 102 |
2 files changed, 115 insertions, 13 deletions
diff --git a/uploader/phenotypes/misc.py b/uploader/phenotypes/misc.py new file mode 100644 index 0000000..cbe3b7f --- /dev/null +++ b/uploader/phenotypes/misc.py @@ -0,0 +1,26 @@ +"""Miscellaneous functions handling phenotypes and phenotypes data.""" +import logging + +logger = logging.getLogger(__name__) + + +def phenotypes_data_differences( + filedata: tuple[dict, ...], dbdata: tuple[dict, ...] +) -> tuple[dict, ...]: + """Compute differences between file data and db data""" + diff = tuple() + for filerow, dbrow in zip( + sorted(filedata, key=lambda item: (item["phenotype_id"], item["xref_id"])), + sorted(dbdata, key=lambda item: (item["PhenotypeId"], item["xref_id"]))): + for samplename, value in filerow["data"].items(): + if value != dbrow["data"].get(samplename, {}).get("value"): + diff = diff + ({ + "PhenotypeId": filerow["phenotype_id"], + "xref_id": filerow["xref_id"], + "DataId": dbrow["DataId"], + "StrainId": dbrow["data"].get(samplename, {}).get("StrainId"), + "StrainName": samplename, + "value": value + },) + + return diff diff --git a/uploader/phenotypes/views.py b/uploader/phenotypes/views.py index a50a8e7..a18c44d 100644 --- a/uploader/phenotypes/views.py +++ b/uploader/phenotypes/views.py @@ -3,6 +3,7 @@ import sys import csv import uuid import json +import logging import datetime import tempfile from typing import Any @@ -867,6 +868,17 @@ def process_phenotype_data_for_download(pheno: dict) -> dict: } +BULK_EDIT_COMMON_FIELDNAMES = [ + "UniqueIdentifier", + "Post_publication_description", + "Pre_publication_abbreviation", + "Pre_publication_description", + "Original_description", + "Post_publication_abbreviation", + "PubMed_ID" +] + + @phenotypesbp.route( "<int:species_id>/populations/<int:population_id>/phenotypes/datasets" "/<int:dataset_id>/edit-download", @@ -900,9 +912,9 @@ def edit_download_phenotype_data(# pylint: disable=[unused-argument] filename = Path(tmpdir).joinpath("tempfile.tsv") with open(filename, mode="w") as outfile: outfile.write( - "# **DO NOT** delete the 'UniqueIdentifier' field. It is used " - "by the system to identify and edit the correct row(s) in the " - "database.\n") + "# **DO NOT** delete the 'UniqueIdentifier' row. It is used " + "by the system to identify and edit the correct rows and " + "columns in the database.\n") outfile.write( "# The '…_description' fields are useful for you to figure out " "what row you are working on. Changing any of this fields will " @@ -914,15 +926,9 @@ def edit_download_phenotype_data(# pylint: disable=[unused-argument] "comment line. This line, and all the lines above it, are " "all comment lines. Comment lines will be ignored.\n") writer = csv.DictWriter(outfile, - fieldnames=[ - "UniqueIdentifier", - "Post_publication_description", - "Pre_publication_abbreviation", - "Pre_publication_description", - "Original_description", - "Post_publication_abbreviation", - "PubMed_ID" - ] + samples_list, + fieldnames= ( + BULK_EDIT_COMMON_FIELDNAMES + + samples_list), dialect="excel-tab") writer.writeheader() writer.writerows(data) @@ -958,4 +964,74 @@ def edit_upload_phenotype_data(# pylint: disable=[unused-argument] dataset=dataset, activelink="edit-phenotype") - return "NOT Implemented: Would do actual edit." + edit_file = save_file(request.files["file-upload-bulk-edit-upload"], + Path(app.config["UPLOAD_FOLDER"])) + + from gn_libs import jobs as gnlibs_jobs + from gn_libs import sqlite3 + jobs_db = app.config["ASYNCHRONOUS_JOBS_SQLITE_DB"] + with sqlite3.connection(jobs_db) as conn: + job_id = uuid.uuid4() + job_cmd = [ + sys.executable, "-u", + "-m", "scripts.phenotypes_bulk_edit", + app.config["SQL_URI"], + jobs_db, + str(job_id), + "--log-level", + logging.getLevelName( + app.logger.getEffectiveLevel() + ).lower() + ] + app.logger.debug("Phenotype-edit, bulk-upload command: %s", job_cmd) + _job = gnlibs_jobs.launch_job( + gnlibs_jobs.initialise_job(conn, + job_id, + job_cmd, + "phenotype-bulk-edit", + extra_meta = { + "edit-file": str(edit_file), + "species-id": species["SpeciesId"], + "population-id": population["Id"], + "dataset-id": dataset["Id"] + }), + jobs_db, + f"{app.config['UPLOAD_FOLDER']}/job_errors", + worker_manager="gn_libs.jobs.launcher") + + + return """ + <p>The following steps need to be performed: + <ol> + <li>Check that all IDs exist</li> + <li>Check for mandatory values</li> + <li>Update descriptions in the database (where changed)</li> + <li>Update publications in the database (where changed): + <ol> + <li>If <strong>PubMed_ID</strong> exists in our database, simply update the + 'PublicationId' value in the 'PublishXRef' table.</li> + <li>If <strong>PubMed_ID</strong> does not exists in our database: + <ol> + <li>fetch the publication's details from PubMed using the new + <strong>PubMed_ID</strong> value.</li> + <li>create a new publication in our database using the fetched data</li> + <li>Update 'PublicationId' value in 'PublishXRef' with ID of newly created + publication</li> + </ol> + </ol> + </li> + <li>Update values in the database (where changed)</li> + </ol> + </p> + + <p><strong>Note:</strong> + <ul> + <li>If a strain that did not have a value is given a value, then we need to + add a new cross-reference for the new DataId created.</li> + <li>If a strain that had a value has its value deleted and left blank, we + need to remove the cross-reference for the existing DataId — or, should we + enter the NULL value instead? Removing the cross-reference might be more + trouble than it is worth.</li> + </ul> + </p> + """ |