aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2025-05-19 10:23:35 -0500
committerFrederick Muriuki Muriithi2025-05-19 10:26:30 -0500
commit318287f726e167b5f08c14e88bfbba8b81f3b38c (patch)
tree578d4582ef4feee71c094adf0eacc06dffad579d
parent94fe97b01a3887209f4785a3d7bce6291ed3cf3d (diff)
downloadgn-uploader-318287f726e167b5f08c14e88bfbba8b81f3b38c.tar.gz
Save the basic phenotypes data into the database.
-rw-r--r--scripts/load_phenotypes_to_db.py30
1 files changed, 29 insertions, 1 deletions
diff --git a/scripts/load_phenotypes_to_db.py b/scripts/load_phenotypes_to_db.py
index 753494b..fdf711b 100644
--- a/scripts/load_phenotypes_to_db.py
+++ b/scripts/load_phenotypes_to_db.py
@@ -9,7 +9,11 @@ from MySQLdb.cursors import Cursor, DictCursor
from gn_libs import jobs, mysqldb, sqlite3
+from r_qtl import r_qtl2 as rqtl2
from uploader.publications.models import create_new_publications
+
+from scripts.rqtl2.bundleutils import build_line_joiner, build_line_splitter
+
logging.basicConfig(
format="%(asctime)s — %(filename)s:%(lineno)s — %(levelname)s: %(message)s")
logger = logging.getLogger(__name__)
@@ -43,7 +47,31 @@ def save_phenotypes(
control_data: dict[str, Any]
) -> tuple[dict, ...]:
"""Read `phenofiles` and save the phenotypes therein."""
- pass
+ logger.info("Saving new phenotypes.")
+ logger.debug("Processing %s 'pheno' files.", len(phenofiles))
+ phenofiles = control_data["pheno"]
+ if len(phenofiles) <= 0:
+ return tuple()
+
+ if control_data["pheno_transposed"]:
+ logger.info("Undoing transposition of the files rows and columns.")
+ phenofiles = (
+ transpose_csv_with_rename(
+ _file
+ build_line_splitter(control_data)
+ build_line_joiner(control_data))
+ for _file in control_data["pheno"])
+
+ _headers = rqtl2.read_csv_file_headers(control_data["pheno"][0],
+ control_data["pheno_transposed"],
+ control_data["sep"],
+ control_data["comment.char"])
+ return create_new_phenotypes(
+ cursor,
+ (dict(zip(_headers, line)) for filecontent
+ in (rqtl2.read_csv_file(path) for path in phenofiles)
+ for idx, line in enumerate(filecontent)
+ if idx != 0))
def save_phenotypes_data(conn: mysqldb.Connection, dataidmap, samples, datafiles):