diff options
-rw-r--r-- | scripts/load_phenotypes_to_db.py | 37 |
1 files changed, 3 insertions, 34 deletions
diff --git a/scripts/load_phenotypes_to_db.py b/scripts/load_phenotypes_to_db.py index 8ff8ab4..7e1112e 100644 --- a/scripts/load_phenotypes_to_db.py +++ b/scripts/load_phenotypes_to_db.py @@ -21,29 +21,6 @@ logging.basicConfig( logger = logging.getLogger(__name__) -def save_publications( - conn: mysqldb.Connection, - pubfiles, - standalone_publication_id=Optional[int] = None -) -> tuple[dict[str, Union[int, str]]]: - """Read the `pubfiles` and save the publications therein.""" - _publications = tuple() - if standalone_publication_id: - # HACK: This is a hack. Remove once we update bundle creation to include - # publication(s) in the bundle - _publications = _publications + ( - fetch_publication_by_id(conn, standalone_publication_id),) - # -> check whether the publication-id exists? - # -> perhaps setup the bundle with the appropriate publications - # -> gn-data -> (phenotypes-se, phenotypes-n) - # -> gn-metadata -> (pheno, geno) - if len(pubfiles) > 0:# TODO: check for presence of data — improve this check. - logger.info("Saving publications.") - _publications = _publication + create_new_publications(conn, pubs) - # Check for PubMed IDs, perhaps? - return _publications - - def save_phenotypes( cursor: mysqldb.Connection, control_data: dict[str, Any] @@ -261,17 +238,9 @@ def load_data(conn, job): } for dataid, row in enumerate(_phenos, start=__fetch_next_dataid__(conn)) } - # 2. Save any new publications (in multi-file bundle): - # -> return publication IDS: TODO: Figure out how to link file IDs to - # DB IDs. - publications = save_publications(cursor, - _control_data.get( - "metadata", {}).get( - "publications"), - _job_metadata.get("publicationid")) - _pubidmap = { - # TODO: Map the pheno ids to the publication ids - } + # 2. Just retrive the publication: Don't create publications for now. + publication = fetch_publication_by_id( + conn, int(_job_metadata.get("publicationid", "0"))) or {"Id": 0} # 3. a. Fetch the strain names and IDS: create name->ID map samples = { row["Name"]: row |