scripts/rqtl2_wrapper.R


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172

# This script file contains  the an implementation of qtl mapping using r-qtl2
# For r-qtl1 implementation see: Scripts/rqtl_wrapper.R
# load the library

library(qtl2)
library(rjson)
library(stringi)
library(stringr)

options(stringsAsFactors = FALSE);
args = commandArgs(trailingOnly=TRUE)

# get the json file path with pre metadata required to create the cross


if (length(args)==0) {
  stop("Argument for the metadata file is Missing ", call.=FALSE)
} else {

  json_file_path = args[1]
}

# validation for the json file


if (!(file.exists(json_file_path))) {
   stop("The input file path does not exists")
} else {
str_glue("The input path for the metadata >>>>>>> {json_file_path}")
json_data  = fromJSON(file = json_file_path)
}


# generate random string file path here
genRandomFileName <- function(prefix,file_ext=".txt"){
	randStr = paste(prefix,stri_rand_strings(1, 9, pattern = "[A-Za-z0-9]"),sep="_")
	return(paste(randStr,file_ext,sep=""))
}

# this should be read from the json file assigned to variables
# TODO improve on this or use option


# should put constraints for items data required for this 
crosstype <- json_data$crosstype
geno_file <- json_data$geno_file
pheno_file <- json_data$pheno_file
geno_map_file <- json_data$geno_map_file 
pheno_covar_file <- json_data$phenocovar_file
alleles <- json_data$alleles
founder_geno_file = json_data$founder_geno_file
gmap_file = json_data$gmap_file


# work on the optional parameters

# better fit for reading the data
# make validations

# parsing the required data for example the geno_codes


# geno_codes  handle the geno codes here 

# make assertion for the geno_file and the geno_file exists
# make assertion for the physical map file or the geno map file exists

# create temp directory for this workspace 
control_file_path  <- file.path("/home/kabui", genRandomFileName(prefix="control_", file_ext=".json"))

str_glue(
  "Generated control file path is  {control_file_path}"
)


# create the cross file here from the arguments provided
# todo add more validation checks here 

# issue I can no define the different paths for files for example pheno_file 

# think about the issue about geno codes ~~~~
# function to generate a cross file from a json list

generate_cross_object  <- function(json_data){
return (write_control_file(control_file_path,
    crosstype= json_data$crosstype,
    geno_file= json_data$geno_file,
    pheno_file= json_data$pheno_file,    
    gmap_file= json_data$geno_map_file,
    phenocovar_file= json_data$phenocovar_file,
    geno_codes= json_data$geno_codes,
    alleles= json_data$alleles,    
    na.strings=json_data$na.strings,
    overwrite = TRUE))
}


# alternatively pass a  yaml file with
dataset <- write_control_file(control_file_path,
    crosstype= crosstype,
    geno_file= geno_file,
    pheno_file= pheno_file,    
    gmap_file= geno_map_file,
    phenocovar_file= pheno_covar_file,
    geno_codes=c(L=1L, C=2L),
    alleles= alleles,    
    na.strings=c("-", "NA"),
    overwrite = TRUE)

# make validation for the data
dataset  <- read_cross2(control_file_path, quiet = FALSE) # replace this with a dynamic path

# check integrity of the cross
cat("Check the integrity of the cross object")
check_cross2(dataset)
if (check_cross2(dataset)){
  print("Dataset meets required specifications for a cross")
} else {
  print("Dataset does not meet required specifications")
}

# Dataset Summarys 
cat("A Summary about the Dataset You Provided\n")
summary(dataset)
n_ind(dataset)
n_chr(dataset)
cat("names of markers in the  object\n")
marker_names(dataset)
cat("names of phenotypes in a the object")
pheno_names(dataset)
cat("IDs for all individuals in the dataset cross object that have genotype data\n")
ind_ids_geno(dataset)
cat(" IDs for all individuals in the dataset object that have phenotype data")
ind_ids_pheno(dataset)
cat("Name of the founder Strains/n")
founders(dataset)

# Work on computing the genetic probabilities


analysis_type <- "single"


perform_genetic_pr <- function(cross, cores= 1, error_prob=0.002, analysis_type="single"){
# improve on this 
if (analysis_type == "single"){
    pr <- calc_genoprob(cross, error_prob= error_prob, quiet=FALSE, cores=cores)
    return (pr)
}
}

# get the genetic probability

Pr = perform_genetic_pr(dataset)
cat("Summaries  on the genetic probabilites \n")
print(Pr)
summary(Pr)


#calculate genotyping error LOD scores, to help identify potential genotyping errors (and problem markers and/or individuals
error_lod <- calc_errorlod(dataset, Pr, quiet = FALSE, cores = 4)
print(error_lod)


#