about summary refs log tree commit diff
path: root/scripts
diff options
context:
space:
mode:
Diffstat (limited to 'scripts')
-rw-r--r--scripts/load_phenotypes_to_db.py108
1 files changed, 54 insertions, 54 deletions
diff --git a/scripts/load_phenotypes_to_db.py b/scripts/load_phenotypes_to_db.py
index d48084e..753494b 100644
--- a/scripts/load_phenotypes_to_db.py
+++ b/scripts/load_phenotypes_to_db.py
@@ -38,28 +38,31 @@ def save_publications(
     return _publications
 
 
-def save_phenotypes(cursor: Cursor, phenofiles):
+def save_phenotypes(
+        cursor: mysqldb.Connection,
+        control_data: dict[str, Any]
+) -> tuple[dict, ...]:
     """Read `phenofiles` and save the phenotypes therein."""
     pass
 
 
-def save_phenotypes_data(cursor: Cursor, dataidmap, samples, datafiles):
+def save_phenotypes_data(conn: mysqldb.Connection, dataidmap, samples, datafiles):
     """Read the `datafiles` and save the data in the database."""
     pass
 
 
-def save_phenotype_se(cursor: Cursor, dataidmap, samples, sefiles):
+def save_phenotype_se(conn: mysqldb.Connection, dataidmap, samples, sefiles):
     """Read the `sefiles` and save the data in the database."""
     pass
 
 
-def save_phenotype_n(cursor: Cursor, dataidmap, samples, nfiles):
+def save_phenotype_n(conn: mysqldb.Connection, dataidmap, samples, nfiles):
     """Read the `nfiles` and save the data in the database."""
     pass
 
 
 def cross_reference_phenotypes_publications_and_data(
-        cursor: Cursor, xref_data: tuple[dict, ...]):
+        conn: mysqldb.Connection, xref_data: tuple[dict, ...]):
     """Crossreference the phenotypes, publication and data."""
     pass
 
@@ -67,56 +70,53 @@ def cross_reference_phenotypes_publications_and_data(
 def load_data(conn, job):
     """Load the data attached in the given job."""
     _job_metadata = json.loads(job["job-metadata"])
-    with conn.cursor(cursorclass=DictCursor) as cursor:
-        # Steps
-        # 0. Read data from the files: can be multiple files per type
-        #
-        # 1. Save all new phenotypes:
-        #     -> return phenotype IDs
-        _control_data = rqtl.control_data(job["job-metadata"]["bundle-file"])
-        logger.info("Saving basic phenotype data.")
-
-        _phenos = save_phenotypes(cursor, _control_data)
-        _next_data_id = fetch_next_dataid(...)
-        dataidmap = {
-            row["phenotype_id"]: {
-                "phenotype_id": row["phenotype_id"],
-                "data_id": _nextid
-            }
-            for _nextid, row in enumerate(_phenos, start=_next_data_id)
+    # Steps
+    # 0. Read data from the files: can be multiple files per type
+    #
+    # 1. Save all new phenotypes:
+    #     -> return phenotype IDs
+    _control_data = rqtl.control_data(job["job-metadata"]["bundle-file"])
+    logger.info("Saving basic phenotype data.")
+
+    _phenos = save_phenotypes(cursor, _control_data)
+    _next_data_id = fetch_next_dataid(...)
+    dataidmap = {
+        row["phenotype_id"]: {
+            "phenotype_id": row["phenotype_id"],
+            "data_id": _nextid
         }
-        # 2. Save any new publications (in multi-file bundle):
-        #     -> return publication IDS
-        publications = publications + save_publications(
-            cursor,
-            _control_data.get(
-                "metadata", {}).get(
-                    "publications"),
-            _job_metadata.get("publicationid"))
-        _pubidmap = {
-            # TODO: Map the pheno ids to the publication ids
-        }
-        # 3. a. Fetch the strain names and IDS: create name->ID map
-        samples = samples_by_species_and_population(
-            # from uploader.samples.models import samples_by_species_and_population
-            conn, species["SpeciesId"], population["PopulationId"])
-        #    b. Save all the data items (DataIds are vibes), return new IDs
-        data = save_phenotypes_data(
-            cursor, dataidmap, samples, , _control_data["pheno"])
-        #    c. If standard errors and N exist, save them too
-        #       (use IDs returned in `b` above).
-        data_se = save_phenotypes_data(
-            cursor, dataidmap, samples, , _control_data["phenose"])
-        data_n = save_phenotypes_n(
-            cursor, dataidmap, samples, , _control_data["phenonum"])
-        # 4. Cross-reference Phenotype, Publication, and PublishData in PublishXRef
-        xrefs = cross_reference_phenotypes_publications_and_data(
-            cursor, __merge_map_with_publications__(dataidmap))
-        # 5. If entirely new data, update authorisations (break this down)
-        update_auth(_user, _species, _population, _dataset, _phenos)
-        return 0
-
-    return 1
+        for _nextid, row in enumerate(_phenos, start=_next_data_id)
+    }
+    # 2. Save any new publications (in multi-file bundle):
+    #     -> return publication IDS
+    publications = publications + save_publications(
+        cursor,
+        _control_data.get(
+            "metadata", {}).get(
+                "publications"),
+        _job_metadata.get("publicationid"))
+    _pubidmap = {
+        # TODO: Map the pheno ids to the publication ids
+    }
+    # 3. a. Fetch the strain names and IDS: create name->ID map
+    samples = samples_by_species_and_population(
+        # from uploader.samples.models import samples_by_species_and_population
+        conn, species["SpeciesId"], population["PopulationId"])
+    #    b. Save all the data items (DataIds are vibes), return new IDs
+    data = save_phenotypes_data(
+        cursor, dataidmap, samples, , _control_data["pheno"])
+    #    c. If standard errors and N exist, save them too
+    #       (use IDs returned in `b` above).
+    data_se = save_phenotypes_data(
+        cursor, dataidmap, samples, , _control_data["phenose"])
+    data_n = save_phenotypes_n(
+        cursor, dataidmap, samples, , _control_data["phenonum"])
+    # 4. Cross-reference Phenotype, Publication, and PublishData in PublishXRef
+    xrefs = cross_reference_phenotypes_publications_and_data(
+        cursor, __merge_map_with_publications__(dataidmap))
+    # 5. If entirely new data, update authorisations (break this down)
+    update_auth(_user, _species, _population, _dataset, _phenos)
+    return 0
 
 
 if __name__ == "__main__":