diff options
author | Frederick Muriuki Muriithi | 2025-06-03 12:09:59 -0500 |
---|---|---|
committer | Frederick Muriuki Muriithi | 2025-06-03 12:09:59 -0500 |
commit | 9280e6232152991fca762d74f415b704a452673d (patch) | |
tree | efe5df2ee4674551d201f167e5520e579bfd9e88 /scripts | |
parent | 59b07a41ca5181a19e631f640b38e5fd33d1d550 (diff) | |
download | gn-uploader-9280e6232152991fca762d74f415b704a452673d.tar.gz |
Save data with `LOAD DATA INFILE …` query
To help speed up the saving of the data (for really huge files) into
the database, use the `LOAD DATA INFILE …` command if available, and
if not fallback to the one using raw queries.
Diffstat (limited to 'scripts')
-rw-r--r-- | scripts/load_phenotypes_to_db.py | 40 |
1 files changed, 29 insertions, 11 deletions
diff --git a/scripts/load_phenotypes_to_db.py b/scripts/load_phenotypes_to_db.py index de06d70..d2d1d2c 100644 --- a/scripts/load_phenotypes_to_db.py +++ b/scripts/load_phenotypes_to_db.py @@ -18,7 +18,8 @@ from uploader.samples.models import samples_by_species_and_population from uploader.phenotypes.models import ( dataset_by_id, save_phenotypes_data, - create_new_phenotypes) + create_new_phenotypes, + quick_save_phenotypes_data) from uploader.publications.models import ( create_new_publications, fetch_publication_by_id) @@ -157,16 +158,33 @@ def save_numeric_data( build_line_joiner(control_data)) for _file in phenofiles) - return save_phenotypes_data( - conn, - table, - __build_dataitems__( - filetype, - phenofiles, - control_data, - samples, - dataidmap, - pheno_name2id)) + try: + logger.debug("Attempt quick save with `LOAD … INFILE`.") + return quick_save_phenotypes_data( + conn, + table, + __build_dataitems__( + filetype, + phenofiles, + control_data, + samples, + dataidmap, + pheno_name2id), + filesdir) + except Exception as _exc: + logger.debug("Could not use `LOAD … INFILE`, using raw query", + exc_info=True) + import time;time.sleep(60) + return save_phenotypes_data( + conn, + table, + __build_dataitems__( + filetype, + phenofiles, + control_data, + samples, + dataidmap, + pheno_name2id)) save_pheno_data = partial(save_numeric_data, |