about summary refs log tree commit diff
path: root/scripts
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2024-01-10 06:43:05 +0300
committerFrederick Muriuki Muriithi2024-01-10 07:19:36 +0300
commit26ca17cf2bd08a7b75e4094e2903966cfedefb0f (patch)
treec30903b15cdbbba7f3fdc4aa09c7dd450747b972 /scripts
parent9322da0f79dfa4c3f9f899f5a861ce302ce21e9c (diff)
downloadgn-uploader-26ca17cf2bd08a7b75e4094e2903966cfedefb0f.tar.gz
Insert any new markers
Insert any new markers found into the database.
Diffstat (limited to 'scripts')
-rw-r--r--scripts/rqtl2/install_genotypes.py32
1 files changed, 14 insertions, 18 deletions
diff --git a/scripts/rqtl2/install_genotypes.py b/scripts/rqtl2/install_genotypes.py
index f5b6eb4..88f776b 100644
--- a/scripts/rqtl2/install_genotypes.py
+++ b/scripts/rqtl2/install_genotypes.py
@@ -23,21 +23,18 @@ stderr_handler = logging.StreamHandler(stream=sys.stderr)
 logger = logging.getLogger("install_genotypes")
 logger.addHandler(stderr_handler)
 
-def insert_genotypes(dbconn: mdb.Connection,
-                     speciesid: int,
-                     populationid: int,
-                     genotypes: tuple[dict]) -> int:
+def insert_markers(dbconn: mdb.Connection,
+                   speciesid: int,
+                   markers: tuple[str, ...]) -> int:
     """Insert genotype and genotype values into the database."""
     with dbconn.cursor() as cursor:
         cursor.executemany(
             "INSERT INTO Geno(SpeciesId, Name, Marker_Name) "
             "VALUES (%(speciesid)s, %(marker)s, %(marker)s) "
-            "ON DUPLICATE KEY UPDATE "
-            "SpeciesId=VALUE(SpeciesId)",
-            tuple({"speciesid": speciesid, "marker": geno["marker"]}
-                  for geno in genotypes))
-        # TODO: Install individuals/samples/strains: Strain
-        # TODO: Cross-ref samples to population: StrainXRef
+            "ON DUPLICATE KEY UPDATE SpeciesId=SpeciesId",
+            tuple({"speciesid": speciesid, "marker": marker}
+                  for marker in markers))
+        return cursor.rowcount
         # TODO: Install geno data: GenoData
         return cursor.rowcount
 
@@ -47,7 +44,6 @@ def install_genotypes(dbconn: mdb.Connection,
                       rqtl2bundle: Path) -> int:
     """Load any existing genotypes into the database."""
     count = 0
-    installed = 0
     with ZipFile(str(rqtl2bundle.absolute()), "r") as zfile:
         try:
             logger.info("Validating bundle")
@@ -55,7 +51,7 @@ def install_genotypes(dbconn: mdb.Connection,
             logger.info("Bundle validated successfully.")
             logger.info(("Loading genotypes. This could take a while. "
                          "Please be patient."))
-            
+
             cdata = rqtl2.control_data(zfile)
             genotypes = rqtl2.file_data(zfile,
                                         "geno",
@@ -66,14 +62,14 @@ def install_genotypes(dbconn: mdb.Connection,
                 if len(batch) == 0:
                     logger.info("Loading Genotypes complete!")
                     logger.info(
-                        f"Total genotypes installed: {installed} of {count}")
+                        "Total rows processed: %s", count)
                     break
 
-                curr_installed = insert_genotypes(
-                    dbconn, speciesid, populationid, batch)
-                installed = installed + curr_installed
+                insert_markers(
+                    dbconn,
+                    speciesid,
+                    tuple(key for key in batch[0].keys() if key != "id"))
                 count = count + len(batch)
-                logger.info(f"Installed {curr_installed} genotypes")
 
             if "gmap" in cdata:
                 logger.info("Loading genetic mapping info.")
@@ -87,7 +83,7 @@ def install_genotypes(dbconn: mdb.Connection,
         except rqtl2.InvalidFormat as exc:
             logger.error(str(exc))
             logger.info("There are no genotypes to load.")
-        except Exception as exc:
+        except Exception as _exc:
             logger.error("Failing with exception: %s", traceback.format_exc())
             return 3