about summary refs log tree commit diff
path: root/scripts
diff options
context:
space:
mode:
Diffstat (limited to 'scripts')
-rw-r--r--scripts/load_phenotypes_to_db.py29
1 files changed, 15 insertions, 14 deletions
diff --git a/scripts/load_phenotypes_to_db.py b/scripts/load_phenotypes_to_db.py
index 980aa94..9ba91a0 100644
--- a/scripts/load_phenotypes_to_db.py
+++ b/scripts/load_phenotypes_to_db.py
@@ -26,18 +26,18 @@ logger = logging.getLogger(__name__)
 
 def save_phenotypes(
         cursor: mysqldb.Connection,
-        control_data: dict[str, Any]
+        control_data: dict[str, Any],
+        filesdir: Path
 ) -> tuple[dict, ...]:
     """Read `phenofiles` and save the phenotypes therein."""
     logger.info("Saving new phenotypes.")
-    logger.debug("Processing %s 'pheno' files.", len(phenofiles))
     ## TODO: Replace with something like this: ##
     # phenofiles = control_data["phenocovar"] + control_data.get(
     #     "gn-metadata", {}).get("pheno", [])
     #
     # This is meant to load (and merge) data from the "phenocovar" and
     # "gn-metadata -> pheno" files into a single collection of phenotypes.
-    phenofiles = control_data["phenocovar"]
+    phenofiles = tuple(filesdir.joinpath(_file) for _file in control_data["phenocovar"])
     if len(phenofiles) <= 0:
         return tuple()
 
@@ -48,9 +48,9 @@ def save_phenotypes(
                 _file,
                 build_line_splitter(control_data),
                 build_line_joiner(control_data))
-            for _file in control_data["phenocovar"])
+            for _file in phenofiles)
 
-    _headers = rqtl2.read_csv_file_headers(control_data["phenocovar"][0],
+    _headers = rqtl2.read_csv_file_headers(phenofiles[0],
                                            control_data["phenocovar_transposed"],
                                            control_data["sep"],
                                            control_data["comment.char"])
@@ -74,10 +74,12 @@ def save_pheno_data(
         conn: mysqldb.Connection,
         dataidmap: dict,
         samples: tuple[dict, ...],
-        control_data: dict
+        control_data: dict,
+        filesdir: Path
 ):
     """Read the `datafiles` and save the data in the database."""
-    phenofiles = control_data["pheno"]
+    phenofiles = tuple(
+        filesdir.joinpath(_file) for file in control_data["pheno"])
     if len(phenofiles) <= 0:
         return tuple()
 
@@ -88,9 +90,9 @@ def save_pheno_data(
                 _file,
                 build_line_splitter(control_data),
                 build_line_joiner(control_data))
-            for _file in control_data["pheno"])
+            for _file in phenofiles)
 
-    _headers = rqtl2.read_csv_file_headers(control_data["pheno"][0],
+    _headers = rqtl2.read_csv_file_headers(phenofiles[0],
                                            control_data["pheno_transposed"],
                                            control_data["sep"],
                                            control_data["comment.char"])
@@ -251,8 +253,7 @@ def load_data(conn, job):
     with ZipFile(str(bundle), "r") as zfile:
         _files = rqtl2.extract(zfile, _outdir)
     logger.info("Saving basic phenotype data.")
-
-    _phenos = save_phenotypes(cursor, _control_data)
+    _phenos = save_phenotypes(conn, _control_data, _outdir)
     dataidmap = {
         row["phenotype_id"]: {
             "population_id": population["Id"],
@@ -268,14 +269,14 @@ def load_data(conn, job):
         for row in samples_by_species_and_population(
                 conn, species["SpeciesId"], population["PopulationId"])}
     #    b. Save all the data items (DataIds are vibes), return new IDs
-    data = save_pheno_data(conn, dataidmap, samples, _control_data)
+    data = save_pheno_data(conn, dataidmap, samples, _control_data, _outdir)
     # 4. Cross-reference Phenotype, Publication, and PublishData in PublishXRef
     xrefs = cross_reference_phenotypes_publications_and_data(
         conn, tuple(dataidmap.values()))
     # 5. If standard errors and N exist, save them too
     #    (use IDs returned in `3. b.` above).
-    data_se = save_phenotypes_se(conn, dataidmap, samples, _control_data)
-    data_n = save_phenotypes_n(conn, dataidmap, samples, _control_data)
+    data_se = save_phenotypes_se(conn, dataidmap, samples, _control_data, _outdir)
+    data_n = save_phenotypes_n(conn, dataidmap, samples, _control_data, _outdir)
     # 6. If entirely new data, update authorisations (break this down)
     update_auth(_user, _species, _population, _dataset, _phenos)
     return 0