diff options
author | Frederick Muriuki Muriithi | 2025-05-19 10:23:35 -0500 |
---|---|---|
committer | Frederick Muriuki Muriithi | 2025-05-19 10:26:30 -0500 |
commit | 318287f726e167b5f08c14e88bfbba8b81f3b38c (patch) | |
tree | 578d4582ef4feee71c094adf0eacc06dffad579d | |
parent | 94fe97b01a3887209f4785a3d7bce6291ed3cf3d (diff) | |
download | gn-uploader-318287f726e167b5f08c14e88bfbba8b81f3b38c.tar.gz |
Save the basic phenotypes data into the database.
-rw-r--r-- | scripts/load_phenotypes_to_db.py | 30 |
1 files changed, 29 insertions, 1 deletions
diff --git a/scripts/load_phenotypes_to_db.py b/scripts/load_phenotypes_to_db.py index 753494b..fdf711b 100644 --- a/scripts/load_phenotypes_to_db.py +++ b/scripts/load_phenotypes_to_db.py @@ -9,7 +9,11 @@ from MySQLdb.cursors import Cursor, DictCursor from gn_libs import jobs, mysqldb, sqlite3 +from r_qtl import r_qtl2 as rqtl2 from uploader.publications.models import create_new_publications + +from scripts.rqtl2.bundleutils import build_line_joiner, build_line_splitter + logging.basicConfig( format="%(asctime)s — %(filename)s:%(lineno)s — %(levelname)s: %(message)s") logger = logging.getLogger(__name__) @@ -43,7 +47,31 @@ def save_phenotypes( control_data: dict[str, Any] ) -> tuple[dict, ...]: """Read `phenofiles` and save the phenotypes therein.""" - pass + logger.info("Saving new phenotypes.") + logger.debug("Processing %s 'pheno' files.", len(phenofiles)) + phenofiles = control_data["pheno"] + if len(phenofiles) <= 0: + return tuple() + + if control_data["pheno_transposed"]: + logger.info("Undoing transposition of the files rows and columns.") + phenofiles = ( + transpose_csv_with_rename( + _file + build_line_splitter(control_data) + build_line_joiner(control_data)) + for _file in control_data["pheno"]) + + _headers = rqtl2.read_csv_file_headers(control_data["pheno"][0], + control_data["pheno_transposed"], + control_data["sep"], + control_data["comment.char"]) + return create_new_phenotypes( + cursor, + (dict(zip(_headers, line)) for filecontent + in (rqtl2.read_csv_file(path) for path in phenofiles) + for idx, line in enumerate(filecontent) + if idx != 0)) def save_phenotypes_data(conn: mysqldb.Connection, dataidmap, samples, datafiles): |