aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--scripts/load_phenotypes_to_db.py108
1 files changed, 54 insertions, 54 deletions
diff --git a/scripts/load_phenotypes_to_db.py b/scripts/load_phenotypes_to_db.py
index d48084e..753494b 100644
--- a/scripts/load_phenotypes_to_db.py
+++ b/scripts/load_phenotypes_to_db.py
@@ -38,28 +38,31 @@ def save_publications(
return _publications
-def save_phenotypes(cursor: Cursor, phenofiles):
+def save_phenotypes(
+ cursor: mysqldb.Connection,
+ control_data: dict[str, Any]
+) -> tuple[dict, ...]:
"""Read `phenofiles` and save the phenotypes therein."""
pass
-def save_phenotypes_data(cursor: Cursor, dataidmap, samples, datafiles):
+def save_phenotypes_data(conn: mysqldb.Connection, dataidmap, samples, datafiles):
"""Read the `datafiles` and save the data in the database."""
pass
-def save_phenotype_se(cursor: Cursor, dataidmap, samples, sefiles):
+def save_phenotype_se(conn: mysqldb.Connection, dataidmap, samples, sefiles):
"""Read the `sefiles` and save the data in the database."""
pass
-def save_phenotype_n(cursor: Cursor, dataidmap, samples, nfiles):
+def save_phenotype_n(conn: mysqldb.Connection, dataidmap, samples, nfiles):
"""Read the `nfiles` and save the data in the database."""
pass
def cross_reference_phenotypes_publications_and_data(
- cursor: Cursor, xref_data: tuple[dict, ...]):
+ conn: mysqldb.Connection, xref_data: tuple[dict, ...]):
"""Crossreference the phenotypes, publication and data."""
pass
@@ -67,56 +70,53 @@ def cross_reference_phenotypes_publications_and_data(
def load_data(conn, job):
"""Load the data attached in the given job."""
_job_metadata = json.loads(job["job-metadata"])
- with conn.cursor(cursorclass=DictCursor) as cursor:
- # Steps
- # 0. Read data from the files: can be multiple files per type
- #
- # 1. Save all new phenotypes:
- # -> return phenotype IDs
- _control_data = rqtl.control_data(job["job-metadata"]["bundle-file"])
- logger.info("Saving basic phenotype data.")
-
- _phenos = save_phenotypes(cursor, _control_data)
- _next_data_id = fetch_next_dataid(...)
- dataidmap = {
- row["phenotype_id"]: {
- "phenotype_id": row["phenotype_id"],
- "data_id": _nextid
- }
- for _nextid, row in enumerate(_phenos, start=_next_data_id)
+ # Steps
+ # 0. Read data from the files: can be multiple files per type
+ #
+ # 1. Save all new phenotypes:
+ # -> return phenotype IDs
+ _control_data = rqtl.control_data(job["job-metadata"]["bundle-file"])
+ logger.info("Saving basic phenotype data.")
+
+ _phenos = save_phenotypes(cursor, _control_data)
+ _next_data_id = fetch_next_dataid(...)
+ dataidmap = {
+ row["phenotype_id"]: {
+ "phenotype_id": row["phenotype_id"],
+ "data_id": _nextid
}
- # 2. Save any new publications (in multi-file bundle):
- # -> return publication IDS
- publications = publications + save_publications(
- cursor,
- _control_data.get(
- "metadata", {}).get(
- "publications"),
- _job_metadata.get("publicationid"))
- _pubidmap = {
- # TODO: Map the pheno ids to the publication ids
- }
- # 3. a. Fetch the strain names and IDS: create name->ID map
- samples = samples_by_species_and_population(
- # from uploader.samples.models import samples_by_species_and_population
- conn, species["SpeciesId"], population["PopulationId"])
- # b. Save all the data items (DataIds are vibes), return new IDs
- data = save_phenotypes_data(
- cursor, dataidmap, samples, , _control_data["pheno"])
- # c. If standard errors and N exist, save them too
- # (use IDs returned in `b` above).
- data_se = save_phenotypes_data(
- cursor, dataidmap, samples, , _control_data["phenose"])
- data_n = save_phenotypes_n(
- cursor, dataidmap, samples, , _control_data["phenonum"])
- # 4. Cross-reference Phenotype, Publication, and PublishData in PublishXRef
- xrefs = cross_reference_phenotypes_publications_and_data(
- cursor, __merge_map_with_publications__(dataidmap))
- # 5. If entirely new data, update authorisations (break this down)
- update_auth(_user, _species, _population, _dataset, _phenos)
- return 0
-
- return 1
+ for _nextid, row in enumerate(_phenos, start=_next_data_id)
+ }
+ # 2. Save any new publications (in multi-file bundle):
+ # -> return publication IDS
+ publications = publications + save_publications(
+ cursor,
+ _control_data.get(
+ "metadata", {}).get(
+ "publications"),
+ _job_metadata.get("publicationid"))
+ _pubidmap = {
+ # TODO: Map the pheno ids to the publication ids
+ }
+ # 3. a. Fetch the strain names and IDS: create name->ID map
+ samples = samples_by_species_and_population(
+ # from uploader.samples.models import samples_by_species_and_population
+ conn, species["SpeciesId"], population["PopulationId"])
+ # b. Save all the data items (DataIds are vibes), return new IDs
+ data = save_phenotypes_data(
+ cursor, dataidmap, samples, , _control_data["pheno"])
+ # c. If standard errors and N exist, save them too
+ # (use IDs returned in `b` above).
+ data_se = save_phenotypes_data(
+ cursor, dataidmap, samples, , _control_data["phenose"])
+ data_n = save_phenotypes_n(
+ cursor, dataidmap, samples, , _control_data["phenonum"])
+ # 4. Cross-reference Phenotype, Publication, and PublishData in PublishXRef
+ xrefs = cross_reference_phenotypes_publications_and_data(
+ cursor, __merge_map_with_publications__(dataidmap))
+ # 5. If entirely new data, update authorisations (break this down)
+ update_auth(_user, _species, _population, _dataset, _phenos)
+ return 0
if __name__ == "__main__":