about summary refs log tree commit diff
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2024-01-11 12:38:04 +0300
committerFrederick Muriuki Muriithi2024-01-11 12:38:04 +0300
commitf89c08c392182b669d058a4c21feffde64b15ebb (patch)
treedbc0086335c7c871677ff2f3eff38d8e5b345921
parent2a29e3f0ed57414490f05790e664f94d89b5fdf9 (diff)
downloadgn-uploader-f89c08c392182b669d058a4c21feffde64b15ebb.tar.gz
Update pmap data in the database.
-rw-r--r--scripts/rqtl2/install_genotypes.py34
1 files changed, 23 insertions, 11 deletions
diff --git a/scripts/rqtl2/install_genotypes.py b/scripts/rqtl2/install_genotypes.py
index 1317c96..a1609a0 100644
--- a/scripts/rqtl2/install_genotypes.py
+++ b/scripts/rqtl2/install_genotypes.py
@@ -28,15 +28,30 @@ logger.addHandler(stderr_handler)
 
 def insert_markers(dbconn: mdb.Connection,
                    speciesid: int,
-                   markers: tuple[str, ...]) -> int:
+                   markers: tuple[str, ...],
+                   pmapdata: Union[Iterator[dict], None]) -> int:
     """Insert genotype and genotype values into the database."""
+    mdata = reduce(#type: ignore[var-annotated]
+        lambda acc, row: ({#type: ignore[arg-type, return-value]
+            **acc, row["id"]: {
+                key: val
+                for key,val in row.items()
+                if key != "id"
+            }
+        }),
+        (pmapdata or tuple()),
+        {})
     with dbconn.cursor() as cursor:
         cursor.executemany(
-            "INSERT INTO Geno(SpeciesId, Name, Marker_Name) "
-            "VALUES (%(speciesid)s, %(marker)s, %(marker)s) "
+            "INSERT INTO Geno(SpeciesId, Name, Marker_Name, Chr, Mb) "
+            "VALUES (%(speciesid)s, %(marker)s, %(marker)s, %(chr)s, %(pos)s) "
             "ON DUPLICATE KEY UPDATE SpeciesId=SpeciesId",
-            tuple({"speciesid": speciesid, "marker": marker}
-                  for marker in markers))
+            tuple({
+                "speciesid": speciesid,
+                "marker": marker,
+                "chr": mdata.get(marker, {}).get("chr"),
+                "pos": mdata.get(marker, {}).get("pos")
+            } for marker in markers))
         return cursor.rowcount
 
 def insert_individuals(dbconn: mdb.Connection,
@@ -175,7 +190,9 @@ def install_genotypes(dbconn: mdb.Connection,
                 insert_markers(
                     dbconn,
                     speciesid,
-                    tuple(key for key in batch[0].keys() if key != "id"))
+                    tuple(key for key in batch[0].keys() if key != "id"),
+                    (rqtl2.file_data(zfile, "pmap", cdata) if "pmap" in cdata
+                     else None))
                 individuals = tuple(row["id"] for row in batch)
                 insert_individuals(dbconn, speciesid, individuals)
                 cross_reference_individuals(
@@ -190,11 +207,6 @@ def install_genotypes(dbconn: mdb.Connection,
                     (rqtl2.file_data(zfile, "gmap", cdata)
                      if "gmap" in cdata else None))
                 count = count + len(batch)
-
-            if "pmap" in cdata:
-                logger.info("Loading physical mapping info.")
-                # TODO: load pmap files
-                logger.info("Successfully loaded physical mapping.")
         except rqtl2.InvalidFormat as exc:
             logger.error(str(exc))
             logger.info("There are no genotypes to load.")