about summary refs log tree commit diff
path: root/scripts
diff options
context:
space:
mode:
Diffstat (limited to 'scripts')
-rw-r--r--scripts/load_phenotypes_to_db.py30
1 files changed, 29 insertions, 1 deletions
diff --git a/scripts/load_phenotypes_to_db.py b/scripts/load_phenotypes_to_db.py
index 753494b..fdf711b 100644
--- a/scripts/load_phenotypes_to_db.py
+++ b/scripts/load_phenotypes_to_db.py
@@ -9,7 +9,11 @@ from MySQLdb.cursors import Cursor, DictCursor
 
 from gn_libs import jobs, mysqldb, sqlite3
 
+from r_qtl import r_qtl2 as rqtl2
 from uploader.publications.models import create_new_publications
+
+from scripts.rqtl2.bundleutils import build_line_joiner, build_line_splitter
+
 logging.basicConfig(
     format="%(asctime)s — %(filename)s:%(lineno)s — %(levelname)s: %(message)s")
 logger = logging.getLogger(__name__)
@@ -43,7 +47,31 @@ def save_phenotypes(
         control_data: dict[str, Any]
 ) -> tuple[dict, ...]:
     """Read `phenofiles` and save the phenotypes therein."""
-    pass
+    logger.info("Saving new phenotypes.")
+    logger.debug("Processing %s 'pheno' files.", len(phenofiles))
+    phenofiles = control_data["pheno"]
+    if len(phenofiles) <= 0:
+        return tuple()
+
+    if control_data["pheno_transposed"]:
+        logger.info("Undoing transposition of the files rows and columns.")
+        phenofiles = (
+            transpose_csv_with_rename(
+                _file
+                build_line_splitter(control_data)
+                build_line_joiner(control_data))
+            for _file in control_data["pheno"])
+
+    _headers = rqtl2.read_csv_file_headers(control_data["pheno"][0],
+                                           control_data["pheno_transposed"],
+                                           control_data["sep"],
+                                           control_data["comment.char"])
+    return create_new_phenotypes(
+        cursor,
+        (dict(zip(_headers, line)) for filecontent
+         in (rqtl2.read_csv_file(path) for path in phenofiles)
+         for idx, line in enumerate(filecontent)
+         if idx != 0))
 
 
 def save_phenotypes_data(conn: mysqldb.Connection, dataidmap, samples, datafiles):