Update terminology: `strain` to `sample`

Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi * Update the terminology used: use `sample` in place of `strain` according to Zachary's direction at https://github.com/genenetwork/genenetwork3/pull/37#issuecomment-926043306
author: Frederick Muriuki Muriithi 2021-09-27 04:48:53 +0300
committer: Frederick Muriuki Muriithi 2021-09-27 04:48:53 +0300
commit: 19783a18c2bc7941fc5980e593f19fb1d18c3623 (patch)
tree: f3a6e241be3c6224b9647c8258c516a7b741a28c /gn3/db
parent: 8d9bc0f29ce9208306915b079818e6f0c31785e2 (diff)
download: genenetwork3-19783a18c2bc7941fc5980e593f19fb1d18c3623.tar.gz
2 files changed, 26 insertions, 26 deletions
diff --git a/gn3/db/genotypes.py b/gn3/db/genotypes.py
index 9987320..8f18cac 100644
--- a/gn3/db/genotypes.py
+++ b/gn3/db/genotypes.py
@@ -14,16 +14,16 @@ def build_genotype_file(
 
 def load_genotype_samples(genotype_filename: str, file_type: str = "geno"):
     """
-    Load sample of strains from genotype files.
+    Load sample of samples from genotype files.
 
     DESCRIPTION:
-    Traits can contain a varied number of strains, some of which do not exist in
+    Traits can contain a varied number of samples, some of which do not exist in
     certain genotypes. In order to compute QTLs, GEMMAs, etc, we need to ensure
-    to pick only those strains that exist in the genotype under consideration
+    to pick only those samples that exist in the genotype under consideration
     for the traits used in the computation.
 
     This function loads a list of samples from the genotype files for use in
-    filtering out unusable strains.
+    filtering out unusable samples.
 
 
     PARAMETERS:
diff --git a/gn3/db/traits.py b/gn3/db/traits.py
index 4fc47c3..c9d05d7 100644
--- a/gn3/db/traits.py
+++ b/gn3/db/traits.py
@@ -445,7 +445,7 @@ def retrieve_temp_trait_data(trait_info: dict, conn: Any):
             query,
             {"trait_name": trait_info["trait_name"]})
         return [dict(zip(
-            ["strain_name", "value", "se_error", "nstrain", "id"], row))
+            ["sample_name", "value", "se_error", "nstrain", "id"], row))
                 for row in cursor.fetchall()]
     return []
 
@@ -484,7 +484,7 @@ def retrieve_geno_trait_data(trait_info: Dict, conn: Any):
              "species_id": retrieve_species_id(
                  trait_info["db"]["riset"], conn)})
         return [dict(zip(
-            ["strain_name", "value", "se_error", "id"], row))
+            ["sample_name", "value", "se_error", "id"], row))
                 for row in cursor.fetchall()]
     return []
 
@@ -515,7 +515,7 @@ def retrieve_publish_trait_data(trait_info: Dict, conn: Any):
             {"trait_name": trait_info["trait_name"],
              "dataset_id": trait_info["db"]["dataset_id"]})
         return [dict(zip(
-            ["strain_name", "value", "se_error", "nstrain", "id"], row))
+            ["sample_name", "value", "se_error", "nstrain", "id"], row))
                 for row in cursor.fetchall()]
     return []
 
@@ -548,7 +548,7 @@ def retrieve_cellid_trait_data(trait_info: Dict, conn: Any):
              "trait_name": trait_info["trait_name"],
              "dataset_id": trait_info["db"]["dataset_id"]})
         return [dict(zip(
-            ["strain_name", "value", "se_error", "id"], row))
+            ["sample_name", "value", "se_error", "id"], row))
                 for row in cursor.fetchall()]
     return []
 
@@ -577,29 +577,29 @@ def retrieve_probeset_trait_data(trait_info: Dict, conn: Any):
             {"trait_name": trait_info["trait_name"],
              "dataset_name": trait_info["db"]["dataset_name"]})
         return [dict(zip(
-            ["strain_name", "value", "se_error", "id"], row))
+            ["sample_name", "value", "se_error", "id"], row))
                 for row in cursor.fetchall()]
     return []
 
-def with_strainlist_data_setup(strainlist: Sequence[str]):
+def with_samplelist_data_setup(samplelist: Sequence[str]):
     """
-    Build function that computes the trait data from provided list of strains.
+    Build function that computes the trait data from provided list of samples.
 
     PARAMETERS
-    strainlist: (list)
-      A list of strain names
+    samplelist: (list)
+      A list of sample names
 
     RETURNS:
       Returns a function that given some data from the database, computes the
-      strain's value, variance and ndata values, only if the strain is present
-      in the provided `strainlist` variable.
+      sample's value, variance and ndata values, only if the sample is present
+      in the provided `samplelist` variable.
     """
     def setup_fn(tdata):
-        if tdata["strain_name"] in strainlist:
+        if tdata["sample_name"] in samplelist:
             val = tdata["value"]
             if val is not None:
                 return {
-                    "strain_name": tdata["strain_name"],
+                    "sample_name": tdata["sample_name"],
                     "value": val,
                     "variance": tdata["se_error"],
                     "ndata": tdata.get("nstrain", None)
@@ -607,19 +607,19 @@ def with_strainlist_data_setup(strainlist: Sequence[str]):
         return None
     return setup_fn
 
-def without_strainlist_data_setup():
+def without_samplelist_data_setup():
     """
     Build function that computes the trait data.
 
     RETURNS:
       Returns a function that given some data from the database, computes the
-      strain's value, variance and ndata values.
+      sample's value, variance and ndata values.
     """
     def setup_fn(tdata):
         val = tdata["value"]
         if val is not None:
             return {
-                "strain_name": tdata["strain_name"],
+                "sample_name": tdata["sample_name"],
                 "value": val,
                 "variance": tdata["se_error"],
                 "ndata": tdata.get("nstrain", None)
@@ -627,7 +627,7 @@ def without_strainlist_data_setup():
         return None
     return setup_fn
 
-def retrieve_trait_data(trait: dict, conn: Any, strainlist: Sequence[str] = tuple()):
+def retrieve_trait_data(trait: dict, conn: Any, samplelist: Sequence[str] = tuple()):
     """
     Retrieve trait data
 
@@ -650,23 +650,23 @@ def retrieve_trait_data(trait: dict, conn: Any, strainlist: Sequence[str] = tupl
     if results:
         # do something with mysqlid
         mysqlid = results[0]["id"]
-        if strainlist:
+        if samplelist:
             data = [
                 item for item in
-                map(with_strainlist_data_setup(strainlist), results)
+                map(with_samplelist_data_setup(samplelist), results)
                 if item is not None]
         else:
             data = [
                 item for item in
-                map(without_strainlist_data_setup(), results)
+                map(without_samplelist_data_setup(), results)
                 if item is not None]
 
         return {
             "mysqlid": mysqlid,
             "data": dict(map(
                 lambda x: (
-                    x["strain_name"],
-                    {k:v for k, v in x.items() if x != "strain_name"}),
+                    x["sample_name"],
+                    {k:v for k, v in x.items() if x != "sample_name"}),
                 data))}
     return {}
author	Frederick Muriuki Muriithi	2021-09-27 04:48:53 +0300
committer	Frederick Muriuki Muriithi	2021-09-27 04:48:53 +0300
commit	19783a18c2bc7941fc5980e593f19fb1d18c3623 (patch)
tree	f3a6e241be3c6224b9647c8258c516a7b741a28c /gn3/db
parent	8d9bc0f29ce9208306915b079818e6f0c31785e2 (diff)
download	genenetwork3-19783a18c2bc7941fc5980e593f19fb1d18c3623.tar.gz