about summary refs log tree commit diff
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2022-07-29 04:00:41 +0300
committerFrederick Muriuki Muriithi2022-07-29 04:00:41 +0300
commit6b11a267084c131ac7e1be76c4eb602996fd829e (patch)
treeb5003378c4507016a13b9ac38db0e6f53f4d1629
parente3622ac213ef6e1be8a38959f7c6ee082c112c95 (diff)
downloadgenenetwork3-6b11a267084c131ac7e1be76c4eb602996fd829e.tar.gz
New script to run sample correlations
* README.md: update mypy's invocation
* scripts/argparse_actions.py: new file - implement custom FileCheck action
  for argparse
* scripts/sample_correlations.py: new file - implement new script to run
  sample correlations in an external process
-rw-r--r--README.md2
-rw-r--r--scripts/argparse_actions.py26
-rw-r--r--scripts/sample_correlations.py51
3 files changed, 78 insertions, 1 deletions
diff --git a/README.md b/README.md
index 673d154..a5f29dc 100644
--- a/README.md
+++ b/README.md
@@ -110,7 +110,7 @@ pylint *py tests gn3 scripts sheepdog
 Running mypy(type-checker):
 
 ```bash
-mypy .
+mypy --show-error-codes .
 ```
 
 ## Running the GN3 web service
diff --git a/scripts/argparse_actions.py b/scripts/argparse_actions.py
new file mode 100644
index 0000000..d1d1bfd
--- /dev/null
+++ b/scripts/argparse_actions.py
@@ -0,0 +1,26 @@
+"Custom actions for argparse"
+from pathlib import Path
+from typing import Any, Union, Sequence, Optional
+from argparse import Action, Namespace, ArgumentError, ArgumentParser
+
+class FileCheck(Action):
+    "Action class to check existence of a given file path."
+
+    def __init__(self, option_strings, dest, **kwargs):
+        "Initialise the FileCheck action class"
+        super().__init__(option_strings, dest, **kwargs)
+
+    def __call__(# pylint: disable=[signature-differs]
+            self, parser: ArgumentParser, namespace: Namespace,
+            values: Union[str, Sequence[Any], None],
+            option_string: Optional[str] = "") -> None:
+        """Check existence of a given file path and set it, or raise an
+        exception."""
+        the_path = str(values or "")
+        the_file = Path(the_path)
+        if not the_file.is_file():
+            raise ArgumentError(
+                self,
+                f"The file '{values}' does not exist or is a folder/directory.")
+
+        setattr(namespace, self.dest, values)
diff --git a/scripts/sample_correlations.py b/scripts/sample_correlations.py
new file mode 100644
index 0000000..c76be95
--- /dev/null
+++ b/scripts/sample_correlations.py
@@ -0,0 +1,51 @@
+"""Run the 'sample' correlations.
+
+Converts code in
+https://github.com/genenetwork/genenetwork2/blob/a08d91a234f700043d1d31164c7e2bacda4729da/wqflask/wqflask/correlation/correlation_gn3_api.py#L211-L216
+into a script that can be run as an external process."""
+import sys
+import pickle
+from argparse import ArgumentParser
+
+from gn3.computations.correlations import compute_all_sample_correlation
+
+from scripts.argparse_actions import FileCheck
+
+# compute_all_sample_correlation(
+#     corr_method=method, this_trait=this_trait_data, target_dataset=target_dataset_data)
+
+if __name__ == "__main__":
+    def cli_args():
+        "Process the command-line arguments."
+        parser = ArgumentParser(prog="sample_correlations")
+        parser.add_argument(
+            "corrmethod", help="The correlation method to use.", type=str,
+            choices=("pearson", "spearman", "bicor"))
+        parser.add_argument(
+            "traitfile", help="Path to file with pickled trait.",
+            type=str, action=FileCheck)
+        parser.add_argument(
+            "targetdataset", type=str, action=FileCheck,
+            help="Path to file with pickled target dataset traits.")
+        parser.add_argument(
+            "destfile", type=str,
+            help=("Path to file with pickled results of computing the "
+                  "correlations."))
+        args = parser.parse_args()
+        return args
+
+    def main():
+        "CLI entry-point function"
+        args = cli_args()
+        with open(args.traitfile, "rb") as traitfile:
+            with open(args.targetdataset, "rb") as targetdataset:
+                corrs = compute_all_sample_correlation(
+                    corr_method=args.corrmethod,
+                    this_trait=pickle.load(traitfile),
+                    target_dataset=pickle.load(targetdataset))
+
+        with open(args.destfile, "wb") as dest:
+            pickle.dump(corrs, dest)
+        return 0
+
+    sys.exit(main())