diff options
author | Frederick Muriuki Muriithi | 2025-05-19 13:43:20 -0500 |
---|---|---|
committer | Frederick Muriuki Muriithi | 2025-05-19 14:34:55 -0500 |
commit | 5bd5f3cf1ae30467df5e4a040fec2acb20be6cee (patch) | |
tree | 2dc3f25f21e61f847ae86e094fab342310e919db | |
parent | 7a742c2290c2530c0be070590b199fb11799cf16 (diff) | |
download | gn-uploader-5bd5f3cf1ae30467df5e4a040fec2acb20be6cee.tar.gz |
Simplify handling of publications: Assume a maximum of one.
Adding publication details in the R/qtl2 bundle might not be an easy
thing, so for now, assume all the phenotypes uploaded in a particular
session are all published in one publication.
-rw-r--r-- | scripts/load_phenotypes_to_db.py | 37 |
1 files changed, 3 insertions, 34 deletions
diff --git a/scripts/load_phenotypes_to_db.py b/scripts/load_phenotypes_to_db.py index 8ff8ab4..7e1112e 100644 --- a/scripts/load_phenotypes_to_db.py +++ b/scripts/load_phenotypes_to_db.py @@ -21,29 +21,6 @@ logging.basicConfig( logger = logging.getLogger(__name__) -def save_publications( - conn: mysqldb.Connection, - pubfiles, - standalone_publication_id=Optional[int] = None -) -> tuple[dict[str, Union[int, str]]]: - """Read the `pubfiles` and save the publications therein.""" - _publications = tuple() - if standalone_publication_id: - # HACK: This is a hack. Remove once we update bundle creation to include - # publication(s) in the bundle - _publications = _publications + ( - fetch_publication_by_id(conn, standalone_publication_id),) - # -> check whether the publication-id exists? - # -> perhaps setup the bundle with the appropriate publications - # -> gn-data -> (phenotypes-se, phenotypes-n) - # -> gn-metadata -> (pheno, geno) - if len(pubfiles) > 0:# TODO: check for presence of data — improve this check. - logger.info("Saving publications.") - _publications = _publication + create_new_publications(conn, pubs) - # Check for PubMed IDs, perhaps? - return _publications - - def save_phenotypes( cursor: mysqldb.Connection, control_data: dict[str, Any] @@ -261,17 +238,9 @@ def load_data(conn, job): } for dataid, row in enumerate(_phenos, start=__fetch_next_dataid__(conn)) } - # 2. Save any new publications (in multi-file bundle): - # -> return publication IDS: TODO: Figure out how to link file IDs to - # DB IDs. - publications = save_publications(cursor, - _control_data.get( - "metadata", {}).get( - "publications"), - _job_metadata.get("publicationid")) - _pubidmap = { - # TODO: Map the pheno ids to the publication ids - } + # 2. Just retrive the publication: Don't create publications for now. + publication = fetch_publication_by_id( + conn, int(_job_metadata.get("publicationid", "0"))) or {"Id": 0} # 3. a. Fetch the strain names and IDS: create name->ID map samples = { row["Name"]: row |