aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2025-05-28 16:10:11 -0500
committerFrederick Muriuki Muriithi2025-05-28 16:10:11 -0500
commitd8ca7f5c916eadb40fb8e4ea55a853f46f065ae4 (patch)
tree30520773cfd9b7905eea2ac8a3631d61552aa207
parent9f2ddd603444f9a27bdebca2fd4435c6d5bb7411 (diff)
downloadgn-uploader-d8ca7f5c916eadb40fb8e4ea55a853f46f065ae4.tar.gz
Pass path to bundle extraction dir, and make paths relative to it.
Pass the path where the bundle was extracted to the function doing the work. Make the paths for the files being used be relative to the extraction directory thus passed.
-rw-r--r--scripts/load_phenotypes_to_db.py29
1 files changed, 15 insertions, 14 deletions
diff --git a/scripts/load_phenotypes_to_db.py b/scripts/load_phenotypes_to_db.py
index 980aa94..9ba91a0 100644
--- a/scripts/load_phenotypes_to_db.py
+++ b/scripts/load_phenotypes_to_db.py
@@ -26,18 +26,18 @@ logger = logging.getLogger(__name__)
def save_phenotypes(
cursor: mysqldb.Connection,
- control_data: dict[str, Any]
+ control_data: dict[str, Any],
+ filesdir: Path
) -> tuple[dict, ...]:
"""Read `phenofiles` and save the phenotypes therein."""
logger.info("Saving new phenotypes.")
- logger.debug("Processing %s 'pheno' files.", len(phenofiles))
## TODO: Replace with something like this: ##
# phenofiles = control_data["phenocovar"] + control_data.get(
# "gn-metadata", {}).get("pheno", [])
#
# This is meant to load (and merge) data from the "phenocovar" and
# "gn-metadata -> pheno" files into a single collection of phenotypes.
- phenofiles = control_data["phenocovar"]
+ phenofiles = tuple(filesdir.joinpath(_file) for _file in control_data["phenocovar"])
if len(phenofiles) <= 0:
return tuple()
@@ -48,9 +48,9 @@ def save_phenotypes(
_file,
build_line_splitter(control_data),
build_line_joiner(control_data))
- for _file in control_data["phenocovar"])
+ for _file in phenofiles)
- _headers = rqtl2.read_csv_file_headers(control_data["phenocovar"][0],
+ _headers = rqtl2.read_csv_file_headers(phenofiles[0],
control_data["phenocovar_transposed"],
control_data["sep"],
control_data["comment.char"])
@@ -74,10 +74,12 @@ def save_pheno_data(
conn: mysqldb.Connection,
dataidmap: dict,
samples: tuple[dict, ...],
- control_data: dict
+ control_data: dict,
+ filesdir: Path
):
"""Read the `datafiles` and save the data in the database."""
- phenofiles = control_data["pheno"]
+ phenofiles = tuple(
+ filesdir.joinpath(_file) for file in control_data["pheno"])
if len(phenofiles) <= 0:
return tuple()
@@ -88,9 +90,9 @@ def save_pheno_data(
_file,
build_line_splitter(control_data),
build_line_joiner(control_data))
- for _file in control_data["pheno"])
+ for _file in phenofiles)
- _headers = rqtl2.read_csv_file_headers(control_data["pheno"][0],
+ _headers = rqtl2.read_csv_file_headers(phenofiles[0],
control_data["pheno_transposed"],
control_data["sep"],
control_data["comment.char"])
@@ -251,8 +253,7 @@ def load_data(conn, job):
with ZipFile(str(bundle), "r") as zfile:
_files = rqtl2.extract(zfile, _outdir)
logger.info("Saving basic phenotype data.")
-
- _phenos = save_phenotypes(cursor, _control_data)
+ _phenos = save_phenotypes(conn, _control_data, _outdir)
dataidmap = {
row["phenotype_id"]: {
"population_id": population["Id"],
@@ -268,14 +269,14 @@ def load_data(conn, job):
for row in samples_by_species_and_population(
conn, species["SpeciesId"], population["PopulationId"])}
# b. Save all the data items (DataIds are vibes), return new IDs
- data = save_pheno_data(conn, dataidmap, samples, _control_data)
+ data = save_pheno_data(conn, dataidmap, samples, _control_data, _outdir)
# 4. Cross-reference Phenotype, Publication, and PublishData in PublishXRef
xrefs = cross_reference_phenotypes_publications_and_data(
conn, tuple(dataidmap.values()))
# 5. If standard errors and N exist, save them too
# (use IDs returned in `3. b.` above).
- data_se = save_phenotypes_se(conn, dataidmap, samples, _control_data)
- data_n = save_phenotypes_n(conn, dataidmap, samples, _control_data)
+ data_se = save_phenotypes_se(conn, dataidmap, samples, _control_data, _outdir)
+ data_n = save_phenotypes_n(conn, dataidmap, samples, _control_data, _outdir)
# 6. If entirely new data, update authorisations (break this down)
update_auth(_user, _species, _population, _dataset, _phenos)
return 0