From 238e3a1cfd4bc8677a530410af2767d28e93af62 Mon Sep 17 00:00:00 2001
From: zsloan
Date: Thu, 22 Jul 2021 23:47:58 +0000
Subject: Added option for running pairscan to rqtl_wrapper.R

---
 scripts/rqtl_wrapper.R | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/rqtl_wrapper.R b/scripts/rqtl_wrapper.R
index f7e0406..98b1231 100644
--- a/scripts/rqtl_wrapper.R
+++ b/scripts/rqtl_wrapper.R
@@ -11,6 +11,7 @@ option_list = list(
   make_option(c("-c", "--addcovar"), action="store_true", default=NULL, help="Use covariates (included as extra columns in the phenotype input file)"),
   make_option(c("--model"), type="character", default="normal", help="Mapping Model - Normal or Non-Parametric"),
   make_option(c("--method"), type="character", default="hk", help="Mapping Method - hk (Haley Knott), ehk (Extended Haley Knott), mr (Marker Regression), em (Expectation-Maximization), imp (Imputation)"),
+  make_option(c("--pairscan"), action="store_true", default=NULL, help="Run Pair Scan - the R/qtl function scantwo"),
   make_option(c("-i", "--interval"), action="store_true", default=NULL, help="Use interval mapping"),
   make_option(c("--nperm"), type="integer", default=0, help="Number of permutations"),
   make_option(c("--pstrata"), action="store_true", default=NULL, help="Use permutation strata (stored as final column/vector in phenotype input file)"),
-- 
cgit 1.4.1


From c3d59414b4bd23e52823a8b7e846f732e36d8c56 Mon Sep 17 00:00:00 2001
From: zsloan
Date: Fri, 23 Jul 2021 19:40:04 +0000
Subject: - Added scan_func function that determines whether scanone or scantwo
 (pairscan) is used - For pairscan default to using step 20 (subject to
 change, but some step is required during calc.genoprob to make it run fast
 enough) - Added some new verbose prints

---
 scripts/rqtl_wrapper.R | 33 +++++++++++++++++++++++++--------
 1 file changed, 25 insertions(+), 8 deletions(-)

diff --git a/scripts/rqtl_wrapper.R b/scripts/rqtl_wrapper.R
index 98b1231..77ed227 100644
--- a/scripts/rqtl_wrapper.R
+++ b/scripts/rqtl_wrapper.R
@@ -156,6 +156,9 @@ cross_object = geno_to_csvr(geno_file, trait_names, trait_vals, cross_file)
 if (!is.null(opt$interval)) {
   verbose_print('Calculating genotype probabilities with interval mapping\n')
   cross_object <- calc.genoprob(cross_object, step=5, stepwidth="max")
+} else if (!is.null(opt$pairscan)) {
+  verbose_print('Calculating genotype probabilities with interval mapping\n')
+  cross_object <- calc.genoprob(cross_object, step=20)
 } else {
   verbose_print('Calculating genotype probabilities\n')
   cross_object <- calc.genoprob(cross_object)
@@ -164,6 +167,7 @@ if (!is.null(opt$interval)) {
 # Pull covariates out of cross object, if they exist
 covars = vector(mode = "list", length = length(trait_names) - 1)
 if (!is.null(opt$addcovar)) {
+  verbose_print('Pulling covariates out of cross object\n')
   #If perm strata are being used, it'll be included as the final column in the phenotype file
   if (!is.null(opt$pstrata)) {
     covar_names = trait_names[3:length(trait_names) - 1]
@@ -176,16 +180,28 @@ if (!is.null(opt$addcovar)) {
 # Pull permutation strata out of cross object, if it is being used
 perm_strata = vector()
 if (!is.null(opt$pstrata)) {
+  verbose_print('Pulling permutation strata out of cross object\n')
   strata_col = trait_names[length(trait_names)]
   perm_strata <- pull.pheno(cross_object, strata_col)
 }
 
 # If a marker name is supplied as covariate, get its vector of values and add them as a covariate
 if (!is.null(opt$control)) {
+  verbose_print('Creating marker covariates and binding them to covariates vector\n')
   marker_covars = create_marker_covars(cross_object, opt$control)
   covars <- cbind(covars, marker_covars)
 }
 
+if (!is.null(opt$pairscan)) {
+  scan_func <- function(...){
+    scantwo(...)
+  }
+} else {
+  scan_func <- function(...){
+    scanone(...)
+  }
+}
+
 # Calculate permutations
 if (opt$nperm > 0) {
   if (!is.null(opt$filename)){
@@ -197,18 +213,18 @@ if (opt$nperm > 0) {
   if (!is.null(opt$addcovar) || !is.null(opt$control)){
     if (!is.null(opt$pstrata)) {
       verbose_print('Running permutations with cofactors and strata\n')
-      perm_results = scanone(cross_object, pheno.col=1, addcovar=covars, n.perm=opt$nperm, perm.strata=perm_strata, model=opt$model, method=opt$method)
+      perm_results = scan_func(cross_object, pheno.col=1, addcovar=covars, n.perm=opt$nperm, perm.strata=perm_strata, model=opt$model, method=opt$method)
     } else {
       verbose_print('Running permutations with cofactors\n')
-      perm_results = scanone(cross_object, pheno.col=1, addcovar=covars, n.perm=opt$nperm, model=opt$model, method=opt$method)
+      perm_results = scan_func(cross_object, pheno.col=1, addcovar=covars, n.perm=opt$nperm, model=opt$model, method=opt$method)
     }
   } else {
     if (!is.null(opt$pstrata)) {
       verbose_print('Running permutations with strata\n')
-      perm_results = scanone(cross_object, pheno.col=1, n.perm=opt$nperm, perm.strata=perm_strata, model=opt$model, method=opt$method)
+      perm_results = scan_func(cross_object, pheno.col=1, n.perm=opt$nperm, perm.strata=perm_strata, model=opt$model, method=opt$method)
     } else {
       verbose_print('Running permutations\n')
-      perm_results = scanone(cross_object, pheno.col=1, n.perm=opt$nperm, model=opt$model, method=opt$method)
+      perm_results = scan_func(cross_object, pheno.col=1, n.perm=opt$nperm, model=opt$model, method=opt$method)
     }
   }
   write.csv(perm_results, perm_out_file)
@@ -221,10 +237,11 @@ if (!is.null(opt$filename)){
 }
 
 if (!is.null(opt$addcovar) || !is.null(opt$control)){
-  verbose_print('Running scanone with cofactors\n')
-  qtl_results = scanone(cross_object, pheno.col=1, addcovar=covars, model=opt$model, method=opt$method)
+  verbose_print('Running scan with cofactors\n')
+  qtl_results = scan_func(cross_object, pheno.col=1, addcovar=covars, model=opt$model, method=opt$method)
 } else {
-  verbose_print('Running scanone\n')
-  qtl_results = scanone(cross_object, pheno.col=1, model=opt$model, method=opt$method)
+  verbose_print('Running scan\n')
+  qtl_results = scan_func(cross_object, pheno.col=1, model=opt$model, method=opt$method)
 }
+
 write.csv(qtl_results, out_file)
-- 
cgit 1.4.1


From b3e6105b5d8d2c2f30f2609290ef62fa17f32e62 Mon Sep 17 00:00:00 2001
From: zsloan
Date: Fri, 23 Jul 2021 20:08:36 +0000
Subject: Added line priting pair-scan results to CSV and changed the default
 step-size to 10cM for pair-scan

---
 scripts/rqtl_wrapper.R | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/scripts/rqtl_wrapper.R b/scripts/rqtl_wrapper.R
index 77ed227..d8ecfaa 100644
--- a/scripts/rqtl_wrapper.R
+++ b/scripts/rqtl_wrapper.R
@@ -158,7 +158,7 @@ if (!is.null(opt$interval)) {
   cross_object <- calc.genoprob(cross_object, step=5, stepwidth="max")
 } else if (!is.null(opt$pairscan)) {
   verbose_print('Calculating genotype probabilities with interval mapping\n')
-  cross_object <- calc.genoprob(cross_object, step=20)
+  cross_object <- calc.genoprob(cross_object, step=10)
 } else {
   verbose_print('Calculating genotype probabilities\n')
   cross_object <- calc.genoprob(cross_object)
@@ -244,4 +244,9 @@ if (!is.null(opt$addcovar) || !is.null(opt$control)){
   qtl_results = scan_func(cross_object, pheno.col=1, model=opt$model, method=opt$method)
 }
 
-write.csv(qtl_results, out_file)
+verbose_print('Writing results to CSV file\n')
+if (!is.null(opt$pairscan)) {
+  write.csv(qtl_results[1], out_file)
+} else {
+  write.csv(qtl_results, out_file)
+}
-- 
cgit 1.4.1


From a675e25d885f385c6ecd708f186b2ee448b97cd1 Mon Sep 17 00:00:00 2001
From: zsloan
Date: Mon, 26 Jul 2021 20:47:37 +0000
Subject: Added pairscan boolean kwarg and process_rqtl_pairscan function for
 reading in pairscan results + renamed process_rqtl_output to
 process_rqtl_mapping to distinguish between that and pairscan

---
 gn3/api/rqtl.py          |  8 +++++---
 gn3/computations/rqtl.py | 17 ++++++++++++++++-
 2 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py
index ebb746c..ff76356 100644
--- a/gn3/api/rqtl.py
+++ b/gn3/api/rqtl.py
@@ -25,7 +25,7 @@ run the rqtl_wrapper script and return the results as JSON
 
     # Split kwargs by those with values and boolean ones that just convert to True/False
     kwargs = ["model", "method", "nperm", "scale", "control_marker"]
-    boolean_kwargs = ["addcovar", "interval", "pstrata"]
+    boolean_kwargs = ["addcovar", "interval", "pstrata", "pairscan"]
     all_kwargs = kwargs + boolean_kwargs
 
     rqtl_kwargs = {"geno": genofile, "pheno": phenofile}
@@ -48,9 +48,11 @@ run the rqtl_wrapper script and return the results as JSON
                                        "output", rqtl_cmd.get('output_file'))):
         os.system(rqtl_cmd.get('rqtl_cmd'))
 
-    rqtl_output['results'] = process_rqtl_output(rqtl_cmd.get('output_file'))
+    if "pairscan" in boolean_kwargs:
+        rqtl_output['results'] = process_rqtl_pairscan(rqtl_cmd.get('output_file'))
+    else:
+        rqtl_output['results'] = process_rqtl_mapping(rqtl_cmd.get('output_file'))
 
-    rqtl_output['results'] = process_rqtl_output(rqtl_cmd.get('output_file'))
     if int(rqtl_kwargs['nperm']) > 0:
         rqtl_output['perm_results'], rqtl_output['suggestive'], rqtl_output['significant'] = \
         process_perm_output(rqtl_cmd.get('output_file'))
diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py
index 0433b3f..405b743 100644
--- a/gn3/computations/rqtl.py
+++ b/gn3/computations/rqtl.py
@@ -47,7 +47,7 @@ output filename generated from a hash of the genotype and phenotype files
     }
 
 
-def process_rqtl_output(file_name: str) -> List:
+def process_rqtl_mapping(file_name: str) -> List:
     """Given an output file name, read in R/qtl results and return
     a List of marker objects
 
@@ -80,6 +80,21 @@ def process_rqtl_output(file_name: str) -> List:
 
     return marker_obs
 
+def process_rqtl_pairscan(file_name: str) -> List:
+    """Given an output file name, read in R/qtl pair-scan results and return
+    a List of Lists representing the matrix of results
+
+    """
+    results = []
+    with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"),
+                           "output", file_name), "r") as the_file:
+        for i, line in enumerate(the_file):
+            if i == 0: # Skip first line
+                continue
+            line_items = line.split(",")
+            results.append(line_items[1:]) # Append all but first item in line
+
+    return results
 
 def process_perm_output(file_name: str):
     """Given base filename, read in R/qtl permutation output and calculate
-- 
cgit 1.4.1


From 67f064606b9c8d4314701911dc4eedbaaa025cb0 Mon Sep 17 00:00:00 2001
From: zsloan
Date: Thu, 29 Jul 2021 19:46:34 +0000
Subject: Fix imports to import both process_rqtl_mapping and
 process_rqtl_pairscan in api/rqtl.py

---
 gn3/api/rqtl.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py
index ff76356..b0405f4 100644
--- a/gn3/api/rqtl.py
+++ b/gn3/api/rqtl.py
@@ -6,7 +6,7 @@ from flask import current_app
 from flask import jsonify
 from flask import request
 
-from gn3.computations.rqtl import generate_rqtl_cmd, process_rqtl_output, process_perm_output
+from gn3.computations.rqtl import generate_rqtl_cmd, process_rqtl_mapping, process_rqtl_pairscan, process_perm_output
 from gn3.computations.gemma import do_paths_exist
 
 rqtl = Blueprint("rqtl", __name__)
-- 
cgit 1.4.1


From 85762647a0e54888bcc7cbd8a11d64cf22e07960 Mon Sep 17 00:00:00 2001
From: zsloan
Date: Mon, 2 Aug 2021 21:07:52 +0000
Subject: Updated rqtl_wrapper to also return a map file when doing a pair-scan
 (since we need the list of markers/pseudomarkers and their positions)

---
 scripts/rqtl_wrapper.R | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/scripts/rqtl_wrapper.R b/scripts/rqtl_wrapper.R
index d8ecfaa..f9c2c3b 100644
--- a/scripts/rqtl_wrapper.R
+++ b/scripts/rqtl_wrapper.R
@@ -157,7 +157,7 @@ if (!is.null(opt$interval)) {
   verbose_print('Calculating genotype probabilities with interval mapping\n')
   cross_object <- calc.genoprob(cross_object, step=5, stepwidth="max")
 } else if (!is.null(opt$pairscan)) {
-  verbose_print('Calculating genotype probabilities with interval mapping\n')
+  verbose_print('Calculating genotype probabilities for pair-scan\n')
   cross_object <- calc.genoprob(cross_object, step=10)
 } else {
   verbose_print('Calculating genotype probabilities\n')
@@ -246,7 +246,9 @@ if (!is.null(opt$addcovar) || !is.null(opt$control)){
 
 verbose_print('Writing results to CSV file\n')
 if (!is.null(opt$pairscan)) {
+  map_out_file = file.path(opt$outdir, paste("MAP_", opt$filename, sep = "" ))
   write.csv(qtl_results[1], out_file)
+  write.csv(qtl_results[2], map_out_file)
 } else {
   write.csv(qtl_results, out_file)
 }
-- 
cgit 1.4.1


From 58343f1005cb5a6aec47db22553313cf9fa17310 Mon Sep 17 00:00:00 2001
From: zsloan
Date: Tue, 3 Aug 2021 20:34:06 +0000
Subject: Create pairscan_for_figure and pairscan_for_table functions that
 return the Dict and List respectively used for the pair scan figure and the
 table showing the results

---
 gn3/computations/rqtl.py | 87 +++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 82 insertions(+), 5 deletions(-)

diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py
index 405b743..00e1db9 100644
--- a/gn3/computations/rqtl.py
+++ b/gn3/computations/rqtl.py
@@ -82,19 +82,96 @@ def process_rqtl_mapping(file_name: str) -> List:
 
 def process_rqtl_pairscan(file_name: str) -> List:
     """Given an output file name, read in R/qtl pair-scan results and return
-    a List of Lists representing the matrix of results
+    a list of both the JSON needed for the d3panels figure and a list of results
+    to be used when generating the results table (which will include marker names)
 
     """
-    results = []
+
+    figure_data = pairscan_results_for_figure(file_name)
+    table_data = pairscan_results_for_table(file_name)
+
+    return [figure_data, table_data]
+
+def pairscan_for_figure(file_name: str) -> Dict:
+    """Given an output file name, read in R/qtl pair-scan results and return
+    the JSON needed for the d3panels figure
+
+    """
+    figure_data = {}
+
+    # Open the file with the actual results, written as a list of lists
     with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"),
                            "output", file_name), "r") as the_file:
+        lod_results = []
         for i, line in enumerate(the_file):
             if i == 0: # Skip first line
                 continue
-            line_items = line.split(",")
-            results.append(line_items[1:]) # Append all but first item in line
+            line_items = [item.rstrip('\n') for item in line.split(",")]
+            lod_results.append(line_items[1:]) # Append all but first item in line
+        figure_data['lod'] = lod_results
+
+    # Open the map file with the list of markers/pseudomarkers and their positions
+    with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"),
+                           "output", "MAP_" + file_name), "r") as the_file:
+        chr_list = []
+        pos_list = []
+        for i, line in enumerate(the_file):
+            if i == 0: # Skip first line
+                continue
+            line_items = [item.rstrip('\n') for item in line.split(",")]
+            chr_list.append(line_items[1])
+            pos_list.append(line_items[2])
+        figure_data['chr'] = chr_list
+        figure_data['pos'] = pos_list
+
+    return figure_data
+
+
+def pairscan_for_table(file_name: str) -> List:
+    """Given an output file name, read in R/qtl pair-scan results and return
+    a list of results to be used when generating the results table (which will include marker names)
+
+    """
+    table_data = []
+
+    # Open the map file with the list of markers/pseudomarkers and create list of marker obs
+    with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"),
+                           "output", "MAP_" + file_name), "r") as the_file:
+        marker_list = []
+        for i, line in enumerate(the_file.readlines()[1:]):
+            line_items = [item.rstrip('\n') for item in line.split(",")]
+            this_marker = {
+                'name': line_items[0],
+                'chr': line_items[1][1:-1], # Strip quotes from beginning and end of chr string
+                'pos': line_items[2]
+            }
+
+            marker_list.append(this_marker)
+
+    # Open the file with the actual results and write the results as
+    # they will be displayed in the results table
+    with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"),
+                           "output", file_name), "r") as the_file:
+        for i, line in enumerate(the_file.readlines()[1:]):
+            marker_1 = marker_list[i]
+            line_items = [item.rstrip('\n') for item in line.split(",")]
+            for j, item in enumerate(line_items[1:]):
+                marker_2 = marker_list[j]
+                try:
+                    lod_score = f"{float(item):.3f}"
+                except:
+                    lod_score = f"{item}"
+                this_line = {
+                    'marker1': f"{marker_1['name']}",
+                    'pos1': f"Chr {marker_1['chr']} @ {float(marker_1['pos']):.1f} cM",
+                    'lod': lod_score,
+                    'marker2': f"{marker_2['name']}",
+                    'pos2': f"Chr {marker_2['chr']} @ {float(marker_2['pos']):.1f} cM"
+                }
+            table_data.append(this_line)
+
+    return table_data
 
-    return results
 
 def process_perm_output(file_name: str):
     """Given base filename, read in R/qtl permutation output and calculate
-- 
cgit 1.4.1


From e9110f840c8817092d0264990a4c7f6d16e966ba Mon Sep 17 00:00:00 2001
From: zsloan
Date: Thu, 22 Jul 2021 23:47:58 +0000
Subject: Added option for running pairscan to rqtl_wrapper.R

---
 scripts/rqtl_wrapper.R | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/rqtl_wrapper.R b/scripts/rqtl_wrapper.R
index 7518175..db0b83c 100644
--- a/scripts/rqtl_wrapper.R
+++ b/scripts/rqtl_wrapper.R
@@ -11,6 +11,7 @@ option_list = list(
   make_option(c("-c", "--addcovar"), action="store_true", default=NULL, help="Use covariates (included as extra columns in the phenotype input file)"),
   make_option(c("--model"), type="character", default="normal", help="Mapping Model - Normal or Non-Parametric"),
   make_option(c("--method"), type="character", default="hk", help="Mapping Method - hk (Haley Knott), ehk (Extended Haley Knott), mr (Marker Regression), em (Expectation-Maximization), imp (Imputation)"),
+  make_option(c("--pairscan"), action="store_true", default=NULL, help="Run Pair Scan - the R/qtl function scantwo"),
   make_option(c("-i", "--interval"), action="store_true", default=NULL, help="Use interval mapping"),
   make_option(c("--nperm"), type="integer", default=0, help="Number of permutations"),
   make_option(c("--pstrata"), action="store_true", default=NULL, help="Use permutation strata (stored as final column/vector in phenotype input file)"),
-- 
cgit 1.4.1


From 14d76308a779f6e4da4c9f4c17f971f5a0c938b0 Mon Sep 17 00:00:00 2001
From: zsloan
Date: Fri, 23 Jul 2021 19:40:04 +0000
Subject: - Added scan_func function that determines whether scanone or scantwo
 (pairscan) is used - For pairscan default to using step 20 (subject to
 change, but some step is required during calc.genoprob to make it run fast
 enough) - Added some new verbose prints

---
 scripts/rqtl_wrapper.R | 41 +++++++++++++++++++++++++++++------------
 1 file changed, 29 insertions(+), 12 deletions(-)

diff --git a/scripts/rqtl_wrapper.R b/scripts/rqtl_wrapper.R
index db0b83c..2641271 100644
--- a/scripts/rqtl_wrapper.R
+++ b/scripts/rqtl_wrapper.R
@@ -156,6 +156,9 @@ cross_object = geno_to_csvr(geno_file, trait_names, trait_vals, cross_file)
 if (!is.null(opt$interval)) {
   verbose_print('Calculating genotype probabilities with interval mapping\n')
   cross_object <- calc.genoprob(cross_object, step=5, stepwidth="max")
+} else if (!is.null(opt$pairscan)) {
+  verbose_print('Calculating genotype probabilities with interval mapping\n')
+  cross_object <- calc.genoprob(cross_object, step=20)
 } else {
   verbose_print('Calculating genotype probabilities\n')
   cross_object <- calc.genoprob(cross_object)
@@ -164,6 +167,7 @@ if (!is.null(opt$interval)) {
 # Pull covariates out of cross object, if they exist
 covars = vector(mode = "list", length = length(trait_names) - 1)
 if (!is.null(opt$addcovar)) {
+  verbose_print('Pulling covariates out of cross object\n')
   #If perm strata are being used, it'll be included as the final column in the phenotype file
   if (!is.null(opt$pstrata)) {
     covar_names = trait_names[3:length(trait_names) - 1]
@@ -176,16 +180,28 @@ if (!is.null(opt$addcovar)) {
 # Pull permutation strata out of cross object, if it is being used
 perm_strata = vector()
 if (!is.null(opt$pstrata)) {
+  verbose_print('Pulling permutation strata out of cross object\n')
   strata_col = trait_names[length(trait_names)]
   perm_strata <- pull.pheno(cross_object, strata_col)
 }
 
 # If a marker name is supplied as covariate, get its vector of values and add them as a covariate
 if (!is.null(opt$control)) {
+  verbose_print('Creating marker covariates and binding them to covariates vector\n')
   marker_covars = create_marker_covars(cross_object, opt$control)
   covars <- cbind(covars, marker_covars)
 }
 
+if (!is.null(opt$pairscan)) {
+  scan_func <- function(...){
+    scantwo(...)
+  }
+} else {
+  scan_func <- function(...){
+    scanone(...)
+  }
+}
+
 # Calculate permutations
 if (opt$nperm > 0) {
   if (!is.null(opt$filename)){
@@ -196,19 +212,19 @@ if (opt$nperm > 0) {
 
   if (!is.null(opt$addcovar) || !is.null(opt$control)){
     if (!is.null(opt$pstrata)) {
-      verbose_print('Running ', opt$nperm, ' permutations with cofactors and strata\n')
-      perm_results = scanone(cross_object, pheno.col=1, addcovar=covars, n.perm=opt$nperm, perm.strata=perm_strata, model=opt$model, method=opt$method)
+      verbose_print('Running permutations with cofactors and strata\n')
+      perm_results = scan_func(cross_object, pheno.col=1, addcovar=covars, n.perm=opt$nperm, perm.strata=perm_strata, model=opt$model, method=opt$method)
     } else {
-      verbose_print('Running ', opt$nperm, ' permutations with cofactors\n')
-      perm_results = scanone(cross_object, pheno.col=1, addcovar=covars, n.perm=opt$nperm, model=opt$model, method=opt$method)
+      verbose_print('Running permutations with cofactors\n')
+      perm_results = scan_func(cross_object, pheno.col=1, addcovar=covars, n.perm=opt$nperm, model=opt$model, method=opt$method)
     }
   } else {
     if (!is.null(opt$pstrata)) {
-      verbose_print('Running ', opt$nperm, ' permutations with strata\n')
-      perm_results = scanone(cross_object, pheno.col=1, n.perm=opt$nperm, perm.strata=perm_strata, model=opt$model, method=opt$method)
+      verbose_print('Running permutations with strata\n')
+      perm_results = scan_func(cross_object, pheno.col=1, n.perm=opt$nperm, perm.strata=perm_strata, model=opt$model, method=opt$method)
     } else {
-      verbose_print('Running ', opt$nperm, ' permutations\n')
-      perm_results = scanone(cross_object, pheno.col=1, n.perm=opt$nperm, model=opt$model, method=opt$method)
+      verbose_print('Running permutations\n')
+      perm_results = scan_func(cross_object, pheno.col=1, n.perm=opt$nperm, model=opt$model, method=opt$method)
     }
   }
   write.csv(perm_results, perm_out_file)
@@ -221,10 +237,11 @@ if (!is.null(opt$filename)){
 }
 
 if (!is.null(opt$addcovar) || !is.null(opt$control)){
-  verbose_print('Running scanone with cofactors\n')
-  qtl_results = scanone(cross_object, pheno.col=1, addcovar=covars, model=opt$model, method=opt$method)
+  verbose_print('Running scan with cofactors\n')
+  qtl_results = scan_func(cross_object, pheno.col=1, addcovar=covars, model=opt$model, method=opt$method)
 } else {
-  verbose_print('Running scanone\n')
-  qtl_results = scanone(cross_object, pheno.col=1, model=opt$model, method=opt$method)
+  verbose_print('Running scan\n')
+  qtl_results = scan_func(cross_object, pheno.col=1, model=opt$model, method=opt$method)
 }
+
 write.csv(qtl_results, out_file)
-- 
cgit 1.4.1


From 91804b4d4f5f82ddad30b68691456885b39309c9 Mon Sep 17 00:00:00 2001
From: zsloan
Date: Fri, 23 Jul 2021 20:08:36 +0000
Subject: Added line priting pair-scan results to CSV and changed the default
 step-size to 10cM for pair-scan

---
 scripts/rqtl_wrapper.R | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/scripts/rqtl_wrapper.R b/scripts/rqtl_wrapper.R
index 2641271..ebd5a91 100644
--- a/scripts/rqtl_wrapper.R
+++ b/scripts/rqtl_wrapper.R
@@ -158,7 +158,7 @@ if (!is.null(opt$interval)) {
   cross_object <- calc.genoprob(cross_object, step=5, stepwidth="max")
 } else if (!is.null(opt$pairscan)) {
   verbose_print('Calculating genotype probabilities with interval mapping\n')
-  cross_object <- calc.genoprob(cross_object, step=20)
+  cross_object <- calc.genoprob(cross_object, step=10)
 } else {
   verbose_print('Calculating genotype probabilities\n')
   cross_object <- calc.genoprob(cross_object)
@@ -244,4 +244,9 @@ if (!is.null(opt$addcovar) || !is.null(opt$control)){
   qtl_results = scan_func(cross_object, pheno.col=1, model=opt$model, method=opt$method)
 }
 
-write.csv(qtl_results, out_file)
+verbose_print('Writing results to CSV file\n')
+if (!is.null(opt$pairscan)) {
+  write.csv(qtl_results[1], out_file)
+} else {
+  write.csv(qtl_results, out_file)
+}
-- 
cgit 1.4.1


From 971c5f1342770b999c62e345a9ebf942a0410758 Mon Sep 17 00:00:00 2001
From: zsloan
Date: Mon, 26 Jul 2021 20:47:37 +0000
Subject: Added pairscan boolean kwarg and process_rqtl_pairscan function for
 reading in pairscan results + renamed process_rqtl_output to
 process_rqtl_mapping to distinguish between that and pairscan

---
 gn3/api/rqtl.py          |  8 +++++---
 gn3/computations/rqtl.py | 17 ++++++++++++++++-
 2 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py
index ebb746c..ff76356 100644
--- a/gn3/api/rqtl.py
+++ b/gn3/api/rqtl.py
@@ -25,7 +25,7 @@ run the rqtl_wrapper script and return the results as JSON
 
     # Split kwargs by those with values and boolean ones that just convert to True/False
     kwargs = ["model", "method", "nperm", "scale", "control_marker"]
-    boolean_kwargs = ["addcovar", "interval", "pstrata"]
+    boolean_kwargs = ["addcovar", "interval", "pstrata", "pairscan"]
     all_kwargs = kwargs + boolean_kwargs
 
     rqtl_kwargs = {"geno": genofile, "pheno": phenofile}
@@ -48,9 +48,11 @@ run the rqtl_wrapper script and return the results as JSON
                                        "output", rqtl_cmd.get('output_file'))):
         os.system(rqtl_cmd.get('rqtl_cmd'))
 
-    rqtl_output['results'] = process_rqtl_output(rqtl_cmd.get('output_file'))
+    if "pairscan" in boolean_kwargs:
+        rqtl_output['results'] = process_rqtl_pairscan(rqtl_cmd.get('output_file'))
+    else:
+        rqtl_output['results'] = process_rqtl_mapping(rqtl_cmd.get('output_file'))
 
-    rqtl_output['results'] = process_rqtl_output(rqtl_cmd.get('output_file'))
     if int(rqtl_kwargs['nperm']) > 0:
         rqtl_output['perm_results'], rqtl_output['suggestive'], rqtl_output['significant'] = \
         process_perm_output(rqtl_cmd.get('output_file'))
diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py
index 0433b3f..405b743 100644
--- a/gn3/computations/rqtl.py
+++ b/gn3/computations/rqtl.py
@@ -47,7 +47,7 @@ output filename generated from a hash of the genotype and phenotype files
     }
 
 
-def process_rqtl_output(file_name: str) -> List:
+def process_rqtl_mapping(file_name: str) -> List:
     """Given an output file name, read in R/qtl results and return
     a List of marker objects
 
@@ -80,6 +80,21 @@ def process_rqtl_output(file_name: str) -> List:
 
     return marker_obs
 
+def process_rqtl_pairscan(file_name: str) -> List:
+    """Given an output file name, read in R/qtl pair-scan results and return
+    a List of Lists representing the matrix of results
+
+    """
+    results = []
+    with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"),
+                           "output", file_name), "r") as the_file:
+        for i, line in enumerate(the_file):
+            if i == 0: # Skip first line
+                continue
+            line_items = line.split(",")
+            results.append(line_items[1:]) # Append all but first item in line
+
+    return results
 
 def process_perm_output(file_name: str):
     """Given base filename, read in R/qtl permutation output and calculate
-- 
cgit 1.4.1


From 72ed7dfc45bb2a1303b900c8bc5b8ddd9e189a27 Mon Sep 17 00:00:00 2001
From: zsloan
Date: Thu, 29 Jul 2021 19:46:34 +0000
Subject: Fix imports to import both process_rqtl_mapping and
 process_rqtl_pairscan in api/rqtl.py

---
 gn3/api/rqtl.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py
index ff76356..b0405f4 100644
--- a/gn3/api/rqtl.py
+++ b/gn3/api/rqtl.py
@@ -6,7 +6,7 @@ from flask import current_app
 from flask import jsonify
 from flask import request
 
-from gn3.computations.rqtl import generate_rqtl_cmd, process_rqtl_output, process_perm_output
+from gn3.computations.rqtl import generate_rqtl_cmd, process_rqtl_mapping, process_rqtl_pairscan, process_perm_output
 from gn3.computations.gemma import do_paths_exist
 
 rqtl = Blueprint("rqtl", __name__)
-- 
cgit 1.4.1


From 2cb18bb7916e029d3aa7779df710c1d0cbc910f8 Mon Sep 17 00:00:00 2001
From: zsloan
Date: Mon, 2 Aug 2021 21:07:52 +0000
Subject: Updated rqtl_wrapper to also return a map file when doing a pair-scan
 (since we need the list of markers/pseudomarkers and their positions)

---
 scripts/rqtl_wrapper.R | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/scripts/rqtl_wrapper.R b/scripts/rqtl_wrapper.R
index ebd5a91..ffff5b9 100644
--- a/scripts/rqtl_wrapper.R
+++ b/scripts/rqtl_wrapper.R
@@ -157,7 +157,7 @@ if (!is.null(opt$interval)) {
   verbose_print('Calculating genotype probabilities with interval mapping\n')
   cross_object <- calc.genoprob(cross_object, step=5, stepwidth="max")
 } else if (!is.null(opt$pairscan)) {
-  verbose_print('Calculating genotype probabilities with interval mapping\n')
+  verbose_print('Calculating genotype probabilities for pair-scan\n')
   cross_object <- calc.genoprob(cross_object, step=10)
 } else {
   verbose_print('Calculating genotype probabilities\n')
@@ -246,7 +246,9 @@ if (!is.null(opt$addcovar) || !is.null(opt$control)){
 
 verbose_print('Writing results to CSV file\n')
 if (!is.null(opt$pairscan)) {
+  map_out_file = file.path(opt$outdir, paste("MAP_", opt$filename, sep = "" ))
   write.csv(qtl_results[1], out_file)
+  write.csv(qtl_results[2], map_out_file)
 } else {
   write.csv(qtl_results, out_file)
 }
-- 
cgit 1.4.1


From df3ec570f22ebb4809c9accc530795757d5afaad Mon Sep 17 00:00:00 2001
From: zsloan
Date: Tue, 3 Aug 2021 20:34:06 +0000
Subject: Create pairscan_for_figure and pairscan_for_table functions that
 return the Dict and List respectively used for the pair scan figure and the
 table showing the results

---
 gn3/computations/rqtl.py | 87 +++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 82 insertions(+), 5 deletions(-)

diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py
index 405b743..00e1db9 100644
--- a/gn3/computations/rqtl.py
+++ b/gn3/computations/rqtl.py
@@ -82,19 +82,96 @@ def process_rqtl_mapping(file_name: str) -> List:
 
 def process_rqtl_pairscan(file_name: str) -> List:
     """Given an output file name, read in R/qtl pair-scan results and return
-    a List of Lists representing the matrix of results
+    a list of both the JSON needed for the d3panels figure and a list of results
+    to be used when generating the results table (which will include marker names)
 
     """
-    results = []
+
+    figure_data = pairscan_results_for_figure(file_name)
+    table_data = pairscan_results_for_table(file_name)
+
+    return [figure_data, table_data]
+
+def pairscan_for_figure(file_name: str) -> Dict:
+    """Given an output file name, read in R/qtl pair-scan results and return
+    the JSON needed for the d3panels figure
+
+    """
+    figure_data = {}
+
+    # Open the file with the actual results, written as a list of lists
     with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"),
                            "output", file_name), "r") as the_file:
+        lod_results = []
         for i, line in enumerate(the_file):
             if i == 0: # Skip first line
                 continue
-            line_items = line.split(",")
-            results.append(line_items[1:]) # Append all but first item in line
+            line_items = [item.rstrip('\n') for item in line.split(",")]
+            lod_results.append(line_items[1:]) # Append all but first item in line
+        figure_data['lod'] = lod_results
+
+    # Open the map file with the list of markers/pseudomarkers and their positions
+    with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"),
+                           "output", "MAP_" + file_name), "r") as the_file:
+        chr_list = []
+        pos_list = []
+        for i, line in enumerate(the_file):
+            if i == 0: # Skip first line
+                continue
+            line_items = [item.rstrip('\n') for item in line.split(",")]
+            chr_list.append(line_items[1])
+            pos_list.append(line_items[2])
+        figure_data['chr'] = chr_list
+        figure_data['pos'] = pos_list
+
+    return figure_data
+
+
+def pairscan_for_table(file_name: str) -> List:
+    """Given an output file name, read in R/qtl pair-scan results and return
+    a list of results to be used when generating the results table (which will include marker names)
+
+    """
+    table_data = []
+
+    # Open the map file with the list of markers/pseudomarkers and create list of marker obs
+    with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"),
+                           "output", "MAP_" + file_name), "r") as the_file:
+        marker_list = []
+        for i, line in enumerate(the_file.readlines()[1:]):
+            line_items = [item.rstrip('\n') for item in line.split(",")]
+            this_marker = {
+                'name': line_items[0],
+                'chr': line_items[1][1:-1], # Strip quotes from beginning and end of chr string
+                'pos': line_items[2]
+            }
+
+            marker_list.append(this_marker)
+
+    # Open the file with the actual results and write the results as
+    # they will be displayed in the results table
+    with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"),
+                           "output", file_name), "r") as the_file:
+        for i, line in enumerate(the_file.readlines()[1:]):
+            marker_1 = marker_list[i]
+            line_items = [item.rstrip('\n') for item in line.split(",")]
+            for j, item in enumerate(line_items[1:]):
+                marker_2 = marker_list[j]
+                try:
+                    lod_score = f"{float(item):.3f}"
+                except:
+                    lod_score = f"{item}"
+                this_line = {
+                    'marker1': f"{marker_1['name']}",
+                    'pos1': f"Chr {marker_1['chr']} @ {float(marker_1['pos']):.1f} cM",
+                    'lod': lod_score,
+                    'marker2': f"{marker_2['name']}",
+                    'pos2': f"Chr {marker_2['chr']} @ {float(marker_2['pos']):.1f} cM"
+                }
+            table_data.append(this_line)
+
+    return table_data
 
-    return results
 
 def process_perm_output(file_name: str):
     """Given base filename, read in R/qtl permutation output and calculate
-- 
cgit 1.4.1


From 9f913b460cc7f72f1732195e6360c2d45b597d99 Mon Sep 17 00:00:00 2001
From: zsloan
Date: Wed, 4 Aug 2021 19:41:47 +0000
Subject: Fixed a cople function calls to use the updated function names

---
 gn3/computations/rqtl.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py
index 00e1db9..45cf430 100644
--- a/gn3/computations/rqtl.py
+++ b/gn3/computations/rqtl.py
@@ -87,8 +87,8 @@ def process_rqtl_pairscan(file_name: str) -> List:
 
     """
 
-    figure_data = pairscan_results_for_figure(file_name)
-    table_data = pairscan_results_for_table(file_name)
+    figure_data = pairscan_for_figure(file_name)
+    table_data = pairscan_for_table(file_name)
 
     return [figure_data, table_data]
 
-- 
cgit 1.4.1


From 1408b107beb4c799a1d7afaf571f53d0d56305bf Mon Sep 17 00:00:00 2001
From: zsloan
Date: Wed, 11 Aug 2021 17:36:18 +0000
Subject: Removed quotes from beginning and end of chromosome string

---
 gn3/computations/rqtl.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py
index 45cf430..3b50c85 100644
--- a/gn3/computations/rqtl.py
+++ b/gn3/computations/rqtl.py
@@ -119,7 +119,7 @@ def pairscan_for_figure(file_name: str) -> Dict:
             if i == 0: # Skip first line
                 continue
             line_items = [item.rstrip('\n') for item in line.split(",")]
-            chr_list.append(line_items[1])
+            chr_list.append(line_items[1][1:-1])
             pos_list.append(line_items[2])
         figure_data['chr'] = chr_list
         figure_data['pos'] = pos_list
-- 
cgit 1.4.1


From 9aeafe51e60989fb018d147163c4fb861ceeb256 Mon Sep 17 00:00:00 2001
From: zsloan
Date: Thu, 26 Aug 2021 20:17:14 +0000
Subject: Added genofile name to inputs for processing R/qtl pair-scan results,
 since it's needed to store the proximal/distal markers for each position

---
 gn3/api/rqtl.py          | 2 +-
 gn3/computations/rqtl.py | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py
index b0405f4..2ff0f4e 100644
--- a/gn3/api/rqtl.py
+++ b/gn3/api/rqtl.py
@@ -49,7 +49,7 @@ run the rqtl_wrapper script and return the results as JSON
         os.system(rqtl_cmd.get('rqtl_cmd'))
 
     if "pairscan" in boolean_kwargs:
-        rqtl_output['results'] = process_rqtl_pairscan(rqtl_cmd.get('output_file'))
+        rqtl_output['results'] = process_rqtl_pairscan(rqtl_cmd.get('output_file'), genofile)
     else:
         rqtl_output['results'] = process_rqtl_mapping(rqtl_cmd.get('output_file'))
 
diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py
index 3b50c85..ea88c29 100644
--- a/gn3/computations/rqtl.py
+++ b/gn3/computations/rqtl.py
@@ -80,7 +80,7 @@ def process_rqtl_mapping(file_name: str) -> List:
 
     return marker_obs
 
-def process_rqtl_pairscan(file_name: str) -> List:
+def process_rqtl_pairscan(file_name: str, geno_file: str) -> List:
     """Given an output file name, read in R/qtl pair-scan results and return
     a list of both the JSON needed for the d3panels figure and a list of results
     to be used when generating the results table (which will include marker names)
@@ -88,7 +88,7 @@ def process_rqtl_pairscan(file_name: str) -> List:
     """
 
     figure_data = pairscan_for_figure(file_name)
-    table_data = pairscan_for_table(file_name)
+    table_data = pairscan_for_table(file_name, geno_file)
 
     return [figure_data, table_data]
 
@@ -127,7 +127,7 @@ def pairscan_for_figure(file_name: str) -> Dict:
     return figure_data
 
 
-def pairscan_for_table(file_name: str) -> List:
+def pairscan_for_table(file_name: str, geno_file: str) -> List:
     """Given an output file name, read in R/qtl pair-scan results and return
     a list of results to be used when generating the results table (which will include marker names)
 
-- 
cgit 1.4.1


From 23be50a195e648fa567777c4ab679d4c30cf6933 Mon Sep 17 00:00:00 2001
From: zsloan
Date: Sat, 28 Aug 2021 00:46:10 +0000
Subject: Fix issue that causes R/qtl to always run pair-scan even if pair-scan
 isn't selected

---
 gn3/api/rqtl.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py
index 2ff0f4e..6548e1a 100644
--- a/gn3/api/rqtl.py
+++ b/gn3/api/rqtl.py
@@ -48,7 +48,7 @@ run the rqtl_wrapper script and return the results as JSON
                                        "output", rqtl_cmd.get('output_file'))):
         os.system(rqtl_cmd.get('rqtl_cmd'))
 
-    if "pairscan" in boolean_kwargs:
+    if "pairscan" in rqtl_bool_kwargs:
         rqtl_output['results'] = process_rqtl_pairscan(rqtl_cmd.get('output_file'), genofile)
     else:
         rqtl_output['results'] = process_rqtl_mapping(rqtl_cmd.get('output_file'))
-- 
cgit 1.4.1


From 49a9150f2dafee19852d0e51ce089fdd54be7b5f Mon Sep 17 00:00:00 2001
From: zsloan
Date: Thu, 23 Sep 2021 22:33:10 +0000
Subject: Add functions for getting proximal/distal markers for each
 pseudomarker position in pair-scan results + return only the sorted top 500
 results

---
 gn3/computations/rqtl.py | 62 ++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 58 insertions(+), 4 deletions(-)

diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py
index ea88c29..d6fcd3f 100644
--- a/gn3/computations/rqtl.py
+++ b/gn3/computations/rqtl.py
@@ -1,5 +1,6 @@
 """Procedures related rqtl computations"""
 import os
+from bisect import bisect
 from typing import Dict, List, Union
 
 import numpy as np
@@ -148,30 +149,83 @@ def pairscan_for_table(file_name: str, geno_file: str) -> List:
 
             marker_list.append(this_marker)
 
+    # Get the list of original markers from the .geno file
+    original_markers = build_marker_pos_list(geno_file)
+
     # Open the file with the actual results and write the results as
     # they will be displayed in the results table
     with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"),
                            "output", file_name), "r") as the_file:
         for i, line in enumerate(the_file.readlines()[1:]):
             marker_1 = marker_list[i]
+            proximal1, distal1 = find_nearest_marker(marker_1['chr'], marker_1['pos'], original_markers)
             line_items = [item.rstrip('\n') for item in line.split(",")]
             for j, item in enumerate(line_items[1:]):
                 marker_2 = marker_list[j]
+                proximal2, distal2 = find_nearest_marker(marker_2['chr'], marker_2['pos'], original_markers)
                 try:
                     lod_score = f"{float(item):.3f}"
                 except:
                     lod_score = f"{item}"
                 this_line = {
-                    'marker1': f"{marker_1['name']}",
+                    'proximal1': proximal1,
+                    'distal1': distal1,
                     'pos1': f"Chr {marker_1['chr']} @ {float(marker_1['pos']):.1f} cM",
                     'lod': lod_score,
-                    'marker2': f"{marker_2['name']}",
+                    'proximal2': proximal2,
+                    'distal2': distal2,
                     'pos2': f"Chr {marker_2['chr']} @ {float(marker_2['pos']):.1f} cM"
                 }
-            table_data.append(this_line)
 
-    return table_data
+                table_data.append(this_line)
+
+    return sorted(table_data, key = lambda i: float(i['lod']), reverse=True)[:500]
+
+def build_marker_pos_list(genotype_file):
+    """
+    Gets list of markers and their positions from .geno file
+
+    Basically a pared-down version of parse_genotype_file for R/qtl pair-scan
+    """
+
+    with open(genotype_file, "r") as infile:
+        contents = infile.readlines()
+
+    # Get all lines after the metadata
+    lines = tuple(line for line in contents if
+                  ((not line.strip().startswith("#")) and
+                   (not line.strip().startswith("@")) and
+                   (not line.strip() == "")))
+
+    header_items = lines[0].split("\t")
+    mb_exists = "Mb" in header_items
+    pos_column = header_items.index("Mb") if mb_exists else header_items.index("cM")
+
+    the_markers = {}
+    for line in lines[1:]: # The lines with markers
+        line_items = line.split("\t")
+        this_chr = line_items[0]
+        if this_chr not in the_markers:
+            the_markers[this_chr] = {}
+        the_markers[this_chr][str(float(line_items[pos_column]))] = line_items[1]
+
+    return the_markers
+
+def find_nearest_marker(the_chr, the_pos, marker_list):
+    """
+    Given a chromosome and position of a pseudomarker (from R/qtl pair-scan results),
+    return the nearest real marker
+    """
+
+    pos_list = [float(pos) for pos in marker_list[the_chr]]
+
+    # Get the position of the pseudomarker in the list of markers for the chr
+    the_pos_index = bisect(pos_list, float(the_pos))
+
+    proximal_marker = marker_list[the_chr][str(pos_list[the_pos_index-1])]
+    distal_marker = marker_list[the_chr][str(pos_list[the_pos_index])]
 
+    return proximal_marker, distal_marker
 
 def process_perm_output(file_name: str):
     """Given base filename, read in R/qtl permutation output and calculate
-- 
cgit 1.4.1


From 20f26af7a1af24b823ba1cf29d0e4393dabf9168 Mon Sep 17 00:00:00 2001
From: zsloan
Date: Thu, 23 Sep 2021 22:33:31 +0000
Subject: Add typing to some functions

---
 gn3/computations/rqtl.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py
index d6fcd3f..4f2709c 100644
--- a/gn3/computations/rqtl.py
+++ b/gn3/computations/rqtl.py
@@ -1,7 +1,7 @@
 """Procedures related rqtl computations"""
 import os
 from bisect import bisect
-from typing import Dict, List, Union
+from typing import Dict, List, Tuple, Union
 
 import numpy as np
 
@@ -150,7 +150,7 @@ def pairscan_for_table(file_name: str, geno_file: str) -> List:
             marker_list.append(this_marker)
 
     # Get the list of original markers from the .geno file
-    original_markers = build_marker_pos_list(geno_file)
+    original_markers = build_marker_pos_dict(geno_file)
 
     # Open the file with the actual results and write the results as
     # they will be displayed in the results table
@@ -181,7 +181,7 @@ def pairscan_for_table(file_name: str, geno_file: str) -> List:
 
     return sorted(table_data, key = lambda i: float(i['lod']), reverse=True)[:500]
 
-def build_marker_pos_list(genotype_file):
+def build_marker_pos_dict(genotype_file: str) -> Dict:
     """
     Gets list of markers and their positions from .geno file
 
@@ -211,7 +211,7 @@ def build_marker_pos_list(genotype_file):
 
     return the_markers
 
-def find_nearest_marker(the_chr, the_pos, marker_list):
+def find_nearest_marker(the_chr: str, the_pos: str, marker_list: Dict) -> Tuple[str, str]:
     """
     Given a chromosome and position of a pseudomarker (from R/qtl pair-scan results),
     return the nearest real marker
@@ -227,7 +227,7 @@ def find_nearest_marker(the_chr, the_pos, marker_list):
 
     return proximal_marker, distal_marker
 
-def process_perm_output(file_name: str):
+def process_perm_output(file_name: str) -> Tuple[List, float, float]:
     """Given base filename, read in R/qtl permutation output and calculate
     suggestive and significant thresholds
 
-- 
cgit 1.4.1


From 976660ce9ef915426c7ce5ff9077b439e4102a2c Mon Sep 17 00:00:00 2001
From: zsloan
Date: Thu, 23 Sep 2021 22:48:39 +0000
Subject: Tried to make the docstrings more consistent

---
 gn3/computations/rqtl.py | 38 +++++++++++---------------------------
 1 file changed, 11 insertions(+), 27 deletions(-)

diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py
index 38c5000..45232e3 100644
--- a/gn3/computations/rqtl.py
+++ b/gn3/computations/rqtl.py
@@ -16,9 +16,7 @@ def generate_rqtl_cmd(rqtl_wrapper_cmd: str,
                       rqtl_wrapper_bool_kwargs: list) -> Dict:
     """Given the base rqtl_wrapper command and
 dict of keyword arguments, return the full rqtl_wrapper command and an
-output filename generated from a hash of the genotype and phenotype files
-
-    """
+output filename generated from a hash of the genotype and phenotype files"""
 
     # Generate a hash from contents of the genotype and phenotype files
     _hash = get_hash_of_files(
@@ -50,9 +48,7 @@ output filename generated from a hash of the genotype and phenotype files
 
 def process_rqtl_mapping(file_name: str) -> List:
     """Given an output file name, read in R/qtl results and return
-    a List of marker objects
-
-    """
+    a List of marker objects"""
     marker_obs = []
     # Later I should probably redo this using csv.read to avoid the
     # awkwardness with removing quotes with [1:-1]
@@ -83,11 +79,8 @@ def process_rqtl_mapping(file_name: str) -> List:
 
 def process_rqtl_pairscan(file_name: str, geno_file: str) -> List:
     """Given an output file name, read in R/qtl pair-scan results and return
-    a list of both the JSON needed for the d3panels figure and a list of results
-    to be used when generating the results table (which will include marker names)
-
-    """
-
+a list of both the JSON needed for the d3panels figure and a list of results
+to be used when generating the results table (which will include marker names)"""
     figure_data = pairscan_for_figure(file_name)
     table_data = pairscan_for_table(file_name, geno_file)
 
@@ -95,9 +88,7 @@ def process_rqtl_pairscan(file_name: str, geno_file: str) -> List:
 
 def pairscan_for_figure(file_name: str) -> Dict:
     """Given an output file name, read in R/qtl pair-scan results and return
-    the JSON needed for the d3panels figure
-
-    """
+    the JSON needed for the d3panels figure"""
     figure_data = {}
 
     # Open the file with the actual results, written as a list of lists
@@ -129,9 +120,7 @@ def pairscan_for_figure(file_name: str) -> Dict:
 
 def pairscan_for_table(file_name: str, geno_file: str) -> List:
     """Given an output file name, read in R/qtl pair-scan results and return
-    a list of results to be used when generating the results table (which will include marker names)
-
-    """
+    a list of results to be used when generating the results table (which will include marker names)"""
     table_data = []
 
     # Open the map file with the list of markers/pseudomarkers and create list of marker obs
@@ -181,11 +170,9 @@ def pairscan_for_table(file_name: str, geno_file: str) -> List:
     return sorted(table_data, key = lambda i: float(i['lod']), reverse=True)[:500]
 
 def build_marker_pos_dict(genotype_file: str) -> Dict:
-    """
-    Gets list of markers and their positions from .geno file
+    """Gets list of markers and their positions from .geno file
 
-    Basically a pared-down version of parse_genotype_file for R/qtl pair-scan
-    """
+    Basically a pared-down version of parse_genotype_file for R/qtl pair-scan"""
 
     with open(genotype_file, "r") as infile:
         contents = infile.readlines()
@@ -211,10 +198,8 @@ def build_marker_pos_dict(genotype_file: str) -> Dict:
     return the_markers
 
 def find_nearest_marker(the_chr: str, the_pos: str, marker_list: Dict) -> Tuple[str, str]:
-    """
-    Given a chromosome and position of a pseudomarker (from R/qtl pair-scan results),
-    return the nearest real marker
-    """
+    """Given a chromosome and position of a pseudomarker (from R/qtl pair-scan results),
+    return the nearest real marker"""
 
     pos_list = [float(pos) for pos in marker_list[the_chr]]
 
@@ -228,9 +213,8 @@ def find_nearest_marker(the_chr: str, the_pos: str, marker_list: Dict) -> Tuple[
 
 def process_perm_output(file_name: str) -> Tuple[List, float, float]:
     """Given base filename, read in R/qtl permutation output and calculate
-    suggestive and significant thresholds
+    suggestive and significant thresholds"""
 
-    """
     perm_results = []
     with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"),
                            "output", "PERM_" + file_name), "r") as the_file:
-- 
cgit 1.4.1


From 30d00f4db20ea9dce827fc4f4ac3d50221ad3823 Mon Sep 17 00:00:00 2001
From: zsloan
Date: Tue, 12 Oct 2021 19:15:55 +0000
Subject: Fixed mypy typing errors

---
 gn3/computations/rqtl.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py
index 45232e3..bb28dc3 100644
--- a/gn3/computations/rqtl.py
+++ b/gn3/computations/rqtl.py
@@ -105,8 +105,8 @@ def pairscan_for_figure(file_name: str) -> Dict:
     # Open the map file with the list of markers/pseudomarkers and their positions
     with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"),
                            "output", "MAP_" + file_name), "r") as the_file:
-        chr_list = []
-        pos_list = []
+        chr_list = [] # type: List
+        pos_list = [] # type: List
         for i, line in enumerate(the_file):
             if i == 0: # Skip first line
                 continue
@@ -187,7 +187,7 @@ def build_marker_pos_dict(genotype_file: str) -> Dict:
     mb_exists = "Mb" in header_items
     pos_column = header_items.index("Mb") if mb_exists else header_items.index("cM")
 
-    the_markers = {}
+    the_markers = {"1": {}} # type: Dict[str, Dict]
     for line in lines[1:]: # The lines with markers
         line_items = line.split("\t")
         this_chr = line_items[0]
-- 
cgit 1.4.1


From 607c6e627c23c1bce3b199b145855182ab51b211 Mon Sep 17 00:00:00 2001
From: zsloan
Date: Tue, 12 Oct 2021 20:54:47 +0000
Subject: Fixes pylint errors

---
 gn3/api/rqtl.py          |  3 +-
 gn3/computations/rqtl.py | 86 ++++++++++++++++++++++++++++++++++--------------
 2 files changed, 64 insertions(+), 25 deletions(-)

diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py
index 6548e1a..85b2460 100644
--- a/gn3/api/rqtl.py
+++ b/gn3/api/rqtl.py
@@ -6,7 +6,8 @@ from flask import current_app
 from flask import jsonify
 from flask import request
 
-from gn3.computations.rqtl import generate_rqtl_cmd, process_rqtl_mapping, process_rqtl_pairscan, process_perm_output
+from gn3.computations.rqtl import generate_rqtl_cmd, process_rqtl_mapping, \
+                                  process_rqtl_pairscan, process_perm_output
 from gn3.computations.gemma import do_paths_exist
 
 rqtl = Blueprint("rqtl", __name__)
diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py
index bb28dc3..e81aba3 100644
--- a/gn3/computations/rqtl.py
+++ b/gn3/computations/rqtl.py
@@ -118,16 +118,19 @@ def pairscan_for_figure(file_name: str) -> Dict:
 
     return figure_data
 
-def pairscan_for_table(file_name: str, geno_file: str) -> List:
-    """Given an output file name, read in R/qtl pair-scan results and return
-    a list of results to be used when generating the results table (which will include marker names)"""
-    table_data = []
 
-    # Open the map file with the list of markers/pseudomarkers and create list of marker obs
+def get_marker_list(map_file: str) -> List:
+    """
+    Open the map file with the list of markers/pseudomarkers and create list of marker obs
+
+    PARAMETERS:
+    map_file: The map file output by R/qtl containing marker/pseudomarker positions
+    """
+
+    marker_list = []
     with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"),
-                           "output", "MAP_" + file_name), "r") as the_file:
-        marker_list = []
-        for i, line in enumerate(the_file.readlines()[1:]):
+                           "output", map_file), "r") as map_fh:
+        for line in map_fh.readlines()[1:]:
             line_items = [item.rstrip('\n') for item in line.split(",")]
             this_marker = {
                 'name': line_items[0],
@@ -137,37 +140,72 @@ def pairscan_for_table(file_name: str, geno_file: str) -> List:
 
             marker_list.append(this_marker)
 
-    # Get the list of original markers from the .geno file
-    original_markers = build_marker_pos_dict(geno_file)
+    return marker_list
 
-    # Open the file with the actual results and write the results as
-    # they will be displayed in the results table
+def generate_table_rows(results_file: str, marker_list: List, original_markers: Dict) -> List:
+    """
+    Open the file with the actual R/qtl pair-scan results and write them as
+    they will be displayed in the results table
+
+    PARAMETERS:
+    results_file: The filename containing R/qtl pair-scan results
+    marker_list: List of marker/pseudomarker names/positions from results
+    original_markers: Dict of markers from the .geno file, for finding proximal/distal
+                      markers to each pseudomarker
+    """
+
+    table_data = []
     with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"),
-                           "output", file_name), "r") as the_file:
+                           "output", results_file), "r") as the_file:
         for i, line in enumerate(the_file.readlines()[1:]):
             marker_1 = marker_list[i]
-            proximal1, distal1 = find_nearest_marker(marker_1['chr'], marker_1['pos'], original_markers)
+            marker_1['proximal'], marker_1['distal'] = find_nearest_marker(marker_1['chr'],
+                                                                           marker_1['pos'],
+                                                                           original_markers)
             line_items = [item.rstrip('\n') for item in line.split(",")]
             for j, item in enumerate(line_items[1:]):
                 marker_2 = marker_list[j]
-                proximal2, distal2 = find_nearest_marker(marker_2['chr'], marker_2['pos'], original_markers)
+                marker_2['proximal'], marker_2['distal'] = find_nearest_marker(marker_2['chr'],
+                                                                               marker_2['pos'],
+                                                                               original_markers)
                 try:
                     lod_score = f"{float(item):.3f}"
-                except:
+                except ValueError:
                     lod_score = f"{item}"
-                this_line = {
-                    'proximal1': proximal1,
-                    'distal1': distal1,
+
+                table_data.append({
+                    'proximal1': marker_1['proximal'],
+                    'distal1': marker_1['distal'],
                     'pos1': f"Chr {marker_1['chr']} @ {float(marker_1['pos']):.1f} cM",
                     'lod': lod_score,
-                    'proximal2': proximal2,
-                    'distal2': distal2,
+                    'proximal2': marker_2['proximal'],
+                    'distal2': marker_2['distal'],
                     'pos2': f"Chr {marker_2['chr']} @ {float(marker_2['pos']):.1f} cM"
-                }
+                })
+
+    return table_data
+
+def pairscan_for_table(file_name: str, geno_file: str) -> List:
+    """
+    Read in R/qtl pair-scan results and return as List representing
+    table row contents
+
+    PARAMETERS:
+    file_name: The filename containing R/qtl pair-scan results
+    geno_file: Filename of the genotype file (used to get marker positions)
+    """
 
-                table_data.append(this_line)
+    # Open the map file with the list of markers/pseudomarkers and create list of marker obs
+    marker_list = get_marker_list("MAP_" + file_name)
+
+    # Get the list of original markers from the .geno file
+    original_markers = build_marker_pos_dict(geno_file)
+
+    # Open the file with the actual results and write the results as
+    # they will be displayed in the results table
+    table_data = generate_table_rows(file_name, marker_list, original_markers)
 
-    return sorted(table_data, key = lambda i: float(i['lod']), reverse=True)[:500]
+    return sorted(table_data, key=lambda i: float(i['lod']), reverse=True)[:500]
 
 def build_marker_pos_dict(genotype_file: str) -> Dict:
     """Gets list of markers and their positions from .geno file
-- 
cgit 1.4.1