aboutsummaryrefslogtreecommitdiff
path: root/scripts/phenotypes_bulk_edit.py
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2025-04-14 12:34:52 -0500
committerFrederick Muriuki Muriithi2025-04-14 12:35:25 -0500
commitc58105c64920625b5e01d820169c884dc136d260 (patch)
treeab6bb2b95a3a39808c277b37b53fe5d33aa18362 /scripts/phenotypes_bulk_edit.py
parent49f1b2fa06da54d2f839ef6d4ca3061b92883417 (diff)
downloadgn-uploader-c58105c64920625b5e01d820169c884dc136d260.tar.gz
Move difference computation to `run()` function.
Diffstat (limited to 'scripts/phenotypes_bulk_edit.py')
-rw-r--r--scripts/phenotypes_bulk_edit.py97
1 files changed, 46 insertions, 51 deletions
diff --git a/scripts/phenotypes_bulk_edit.py b/scripts/phenotypes_bulk_edit.py
index 1d8124e..bbce60d 100644
--- a/scripts/phenotypes_bulk_edit.py
+++ b/scripts/phenotypes_bulk_edit.py
@@ -10,6 +10,7 @@ from MySQLdb.cursors import DictCursor
from gn_libs import jobs, mysqldb, sqlite3
+from uploader.phenotypes.models import phenotypes_data_by_ids
import uploader.publications.pubmed as pmed
from uploader.publications.misc import publications_differences
from uploader.phenotypes.views import BULK_EDIT_COMMON_FIELDNAMES
@@ -87,52 +88,6 @@ def descriptions_differences(file_data, db_data) -> dict[str, str]:
return diff
-def compute_differences(
- conn,
- file_contents,
- pheno_ids,
- pheno_xref_ids,
- pubmed_ids
-) -> tuple[tuple[dict, ...], tuple[dict, ...], tuple[dict, ...]]:
- """Compute differences between data in DB and edited data."""
- logger.info("Computing differences.")
- # 1. Basic Phenotype data differences
- # a. Descriptions differences
- desc_diff = descriptions_differences(
- file_contents, __fetch_phenotypes__(conn, pheno_ids))
- logger.debug("DESCRIPTIONS DIFFERENCES: %s", desc_diff)
-
- # b. Publications differences
- db_publications = fetch_phenotype_publications(conn, pheno_xref_ids)
-
- _pubmed_map = {
- (int(row["PubMed_ID"]) if bool(row["PubMed_ID"]) else None): f"{row['phenotype_id']}::{row['xref_id']}"
- for row in file_contents
- }
- _pub_id_map = {
- f"{pub['PhenotypeId']}::{pub['xref_id']}": pub["PublicationId"]
- for pub in db_publications
- }
-
- new_publications = update_publications(
- conn, tuple({
- **pub, "publication_id": _pub_id_map[_pubmed_map[pub["pubmed_id"]]]
- } for pub in pmed.fetch_publications(tuple(
- pubmed_id for pubmed_id in pubmed_ids
- if pubmed_id not in
- tuple(row["PubMed_ID"] for row in db_publications)))))
- logger.debug("New Publications: %s", new_publications)
- pub_diff = publications_differences(
- file_contents, db_publications, {
- row["PubMed_ID" if "PubMed_ID" in row else "pubmed_id"]: row[
- "PublicationId" if "PublicationId" in row else "publication_id"]
- for row in db_publications + new_publications})
- logger.debug("Publications diff: %s", pub_diff)
- # 2. Data differences
- # data_diff = data_differences(...)
- pass
-
-
def update_descriptions():
"""Update descriptions in the database"""
logger.info("Updating descriptions")
@@ -216,11 +171,51 @@ def run(conn, job):
check_ids(conn, pheno_xref_ids)
check_for_mandatory_fields()
# stop running here if any errors are found.
- compute_differences(conn,
- file_contents,
- pheno_ids,
- pheno_xref_ids,
- pubmed_ids)
+
+ ### Compute differences
+ logger.info("Computing differences.")
+ # 1. Basic Phenotype data differences
+ # a. Descriptions differences
+ _desc_diff = descriptions_differences(
+ file_contents, __fetch_phenotypes__(conn, pheno_ids))
+ logger.debug("DESCRIPTIONS DIFFERENCES: %s", _desc_diff)
+
+ # b. Publications differences
+ _db_publications = fetch_phenotype_publications(conn, pheno_xref_ids)
+ logger.debug("DB PUBLICATIONS: %s", _db_publications)
+
+ _pubmed_map = {
+ (int(row["PubMed_ID"]) if bool(row["PubMed_ID"]) else None): f"{row['phenotype_id']}::{row['xref_id']}"
+ for row in file_contents
+ }
+ _pub_id_map = {
+ f"{pub['PhenotypeId']}::{pub['xref_id']}": pub["PublicationId"]
+ for pub in _db_publications
+ }
+
+ _new_publications = update_publications(
+ conn, tuple({
+ **pub, "publication_id": _pub_id_map[_pubmed_map[pub["pubmed_id"]]]
+ } for pub in pmed.fetch_publications(tuple(
+ pubmed_id for pubmed_id in pubmed_ids
+ if pubmed_id not in
+ tuple(row["PubMed_ID"] for row in _db_publications)))))
+ _pub_diff = publications_differences(
+ file_contents, _db_publications, {
+ row["PubMed_ID" if "PubMed_ID" in row else "pubmed_id"]: row[
+ "PublicationId" if "PublicationId" in row else "publication_id"]
+ for row in _db_publications + _new_publications})
+ logger.debug("Publications diff: %s", _pub_diff)
+ # 2. Data differences
+ # _db_pheno_data = phenotypes_data_by_ids(conn, tuple({
+ # "population_id": job["metadata"]["population-id"],
+ # "phenoid": row[0],
+ # "xref_id": row[1]
+ # } for row in pheno_xref_ids))
+ # logger.debug("Phenotype Data in Database: %s", _db_pheno_data)
+
+ # data_diff = data_differences(...)
+ ### END: Compute differences
update_descriptions()
link_publications()
update_values()