From 9280e6232152991fca762d74f415b704a452673d Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Tue, 3 Jun 2025 12:09:59 -0500 Subject: Save data with `LOAD DATA INFILE …` query To help speed up the saving of the data (for really huge files) into the database, use the `LOAD DATA INFILE …` command if available, and if not fallback to the one using raw queries. --- scripts/load_phenotypes_to_db.py | 40 +++++++++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 11 deletions(-) (limited to 'scripts') diff --git a/scripts/load_phenotypes_to_db.py b/scripts/load_phenotypes_to_db.py index de06d70..d2d1d2c 100644 --- a/scripts/load_phenotypes_to_db.py +++ b/scripts/load_phenotypes_to_db.py @@ -18,7 +18,8 @@ from uploader.samples.models import samples_by_species_and_population from uploader.phenotypes.models import ( dataset_by_id, save_phenotypes_data, - create_new_phenotypes) + create_new_phenotypes, + quick_save_phenotypes_data) from uploader.publications.models import ( create_new_publications, fetch_publication_by_id) @@ -157,16 +158,33 @@ def save_numeric_data( build_line_joiner(control_data)) for _file in phenofiles) - return save_phenotypes_data( - conn, - table, - __build_dataitems__( - filetype, - phenofiles, - control_data, - samples, - dataidmap, - pheno_name2id)) + try: + logger.debug("Attempt quick save with `LOAD … INFILE`.") + return quick_save_phenotypes_data( + conn, + table, + __build_dataitems__( + filetype, + phenofiles, + control_data, + samples, + dataidmap, + pheno_name2id), + filesdir) + except Exception as _exc: + logger.debug("Could not use `LOAD … INFILE`, using raw query", + exc_info=True) + import time;time.sleep(60) + return save_phenotypes_data( + conn, + table, + __build_dataitems__( + filetype, + phenofiles, + control_data, + samples, + dataidmap, + pheno_name2id)) save_pheno_data = partial(save_numeric_data, -- cgit v1.2.3