# This script file contains  the an implementation of qtl mapping using r-qtl2
# For r-qtl1 implementation see: Scripts/rqtl_wrapper.R
# load the library

library(qtl2)
library(rjson)
library(stringi)
library(stringr)

options(stringsAsFactors = FALSE);
args = commandArgs(trailingOnly=TRUE)

# get the json file path with pre metadata required to create the cross


if (length(args)==0) {
  stop("Argument for the metadata file is Missing ", call.=FALSE)
} else {

  json_file_path = args[1]
}

# validation for the json file


if (!(file.exists(json_file_path))) {
   stop("The input file path does not exists")
} else {
str_glue("The input path for the metadata >>>>>>> {json_file_path}")
json_data  = fromJSON(file = json_file_path)
}


# generate random string file path here
genRandomFileName <- function(prefix,file_ext=".txt"){
	randStr = paste(prefix,stri_rand_strings(1, 9, pattern = "[A-Za-z0-9]"),sep="_")
	return(paste(randStr,file_ext,sep=""))
}

# this should be read from the json file assigned to variables
# TODO improve on this or use option


# should put constraints for items data required for this 
crosstype <- json_data$crosstype
geno_file <- json_data$geno_file
pheno_file <- json_data$pheno_file
geno_map_file <- json_data$geno_map_file 
pheno_covar_file <- json_data$phenocovar_file
alleles <- json_data$alleles
founder_geno_file = json_data$founder_geno_file
gmap_file = json_data$gmap_file


# work on the optional parameters

# better fit for reading the data
# make validations

# parsing the required data for example the geno_codes


# geno_codes  handle the geno codes here 

# make assertion for the geno_file and the geno_file exists
# make assertion for the physical map file or the geno map file exists

# create temp directory for this workspace 
control_file_path  <- file.path("/home/kabui", genRandomFileName(prefix="control_", file_ext=".json"))

str_glue(
  "Generated control file path is  {control_file_path}"
)


# create the cross file here from the arguments provided
# todo add more validation checks here 

# issue I can no define the different paths for files for example pheno_file 

# think about the issue about geno codes ~~~~
# function to generate a cross file from a json list

generate_cross_object  <- function(json_data){
return (write_control_file(control_file_path,
    crosstype= json_data$crosstype,
    geno_file= json_data$geno_file,
    pheno_file= json_data$pheno_file,    
    gmap_file= json_data$geno_map_file,
    phenocovar_file= json_data$phenocovar_file,
    geno_codes= json_data$geno_codes,
    alleles= json_data$alleles,    
    na.strings=json_data$na.strings,
    overwrite = TRUE))
}


# alternatively pass a  yaml file with
dataset <- write_control_file(control_file_path,
    crosstype= crosstype,
    geno_file= geno_file,
    pheno_file= pheno_file,    
    gmap_file= geno_map_file,
    phenocovar_file= pheno_covar_file,
    geno_codes=c(L=1L, C=2L),
    alleles= alleles,    
    na.strings=c("-", "NA"),
    overwrite = TRUE)

# make validation for the data
dataset  <- read_cross2(control_file_path, quiet = FALSE) # replace this with a dynamic path

# check integrity of the cross
cat("Check the integrity of the cross object")
check_cross2(dataset)
if (check_cross2(dataset)){
  print("Dataset meets required specifications for a cross")
} else {
  print("Dataset does not meet required specifications")
}

# Dataset Summarys 
cat("A Summary about the Dataset You Provided\n")
summary(dataset)
n_ind(dataset)
n_chr(dataset)
cat("names of markers in the  object\n")
marker_names(dataset)
cat("names of phenotypes in a the object")
pheno_names(dataset)
cat("IDs for all individuals in the dataset cross object that have genotype data\n")
ind_ids_geno(dataset)
cat(" IDs for all individuals in the dataset object that have phenotype data")
ind_ids_pheno(dataset)
cat("Name of the founder Strains/n")
founders(dataset)

# Work on computing the genetic probabilities


analysis_type <- "single"


perform_genetic_pr <- function(cross, cores= 1, error_prob=0.002, analysis_type="single"){
# improve on this 
if (analysis_type == "single"){
    pr <- calc_genoprob(cross, error_prob= error_prob, quiet=FALSE, cores=cores)
    return (pr)
}
}

# get the genetic probability

Pr = perform_genetic_pr(dataset)
cat("Summaries  on the genetic probabilites \n")
print(Pr)
summary(Pr)


#calculate genotyping error LOD scores, to help identify potential genotyping errors (and problem markers and/or individuals
error_lod <- calc_errorlod(dataset, Pr, quiet = FALSE, cores = 4)
print(error_lod)


#  Perform genome scane

# rework on this issue
## grab phenotypes and covariates; ensure that covariates have names attribute
pheno <- dataset$pheno
covar <- match(dataset$covar$sex, c("f", "m")) # make numeric
names(covar) <- rownames(dataset$covar)
Xcovar <- get_x_covar(dataset)

print(pheno)
print(covar)
print(Xcovar)

# rework on fetching th Xcovar and getting the covar data 

# perform kinship


perform_genome_scan <- function(cross, genome_prob, method, covar =NULL, xCovar=NULL)
# perform scan1 using haley-knott regression or linear model, or LOCO linear model
{
if (method == "LMM") {
   # provide parameters for this
   kinship = kinship(genome_prob)
   out  <- scan1(genome_prob, cross$pheno, kinship=kinship, addcovar=covar, Xcovar=Xcovar)    
}

if (method == "LOCO") {
# perform kinship inside better option
  kinship = kinship(genome_prob, "loco")
  out <- scan1(genome_prob, cross$pheno, kinship=kinship,addcovar=covar, Xcovar=Xcovar)
}
else {
# perform using Haley Knott 
out <- scan1(genome_prob, cross$pheno, addcovar=NULL, Xcovar=Xcovar)
}
return (out)
}

results <- perform_genome_scan(cross=dataset, genome_prob=Pr, method = "HMM")


results