# This script file contains  the an implementation of qtl mapping using r-qtl2
# For r-qtl1 implementation see: Scripts/rqtl_wrapper.R
# load the library

library(qtl2)
library(rjson)
library(stringi)
library(stringr)

options(stringsAsFactors = FALSE)

args = commandArgs(trailingOnly = TRUE)

# get the json file path with pre metadata required to create the cross


if (length(args) == 0) {
  stop("Argument for the metadata file is Missing ", call. = FALSE)
} else {
  json_file_path = args[1]
}

# TODO validation for the json file


if (!(file.exists(json_file_path))) {
  stop("The input file path does not exists")
} else {
  str_glue("The input path for the metadata >>>>>>> {json_file_path}")
  json_data  = fromJSON(file = json_file_path)
}


# generate random string file path here
genRandomFileName <- function(prefix, file_ext = ".txt") {
  randStr = paste(prefix, stri_rand_strings(1, 9, pattern = "[A-Za-z0-9]"), sep =
                    "_")
  return(paste(randStr, file_ext, sep = ""))
}


# TODO work on the optional parameters

# better fit for reading the data
# make validations

# parsing the required data for example the geno_codes


# geno_codes  handle the geno codes here

# make assertion for the geno_file and the geno_file exists
# make assertion for the physical map file or the geno map file exists

# create temp directory for this workspace
control_file_path  <- file.path("/home/kabui",
                                genRandomFileName(prefix = "control_", file_ext = ".json"))

str_glue("Generated control file path is  {control_file_path}")


# create the cross file here from the arguments provided
# todo add more validation checks here

# issue I can no define the different paths for files for example pheno_file

# think about the issue about geno codes ~~~~
# function to generate a cross file from a json list


# work on the default from the json_data validation here
# better defaults for the json file


json_data

json_data$na_strings


if (is.null(json_data$sep)){
cat("Using ',' as a default sep for cross file\n")
json_data$sep = ","
}

if (is.null(json_data$na.strings)){
cat("Using '-' and 'NA' as the default na.strings\n")
 json_data$na.strings = c("-" , "NA")
}

if (is.null(json_data$geno_transposed)) {
cat("Using FALSE as default parameter for geno_transposed\n")
json_data$geno_transposed = FALSE
}

if (is.null(json_data$founder_geno_transposed)){
cat("Using FALSE as default parameter for founder_geno_transposed\n")
json_data$founder_geno_transposed = FALSE
}


if (is.null(json_data$pheno_transposed)) {
cat("Using FALSE as default parameter for pheno_transposed\n")
json_data$geno_transposed = FALSE
}


if( is.null(json_data$covar_transposed) ){
cat("Using FALSE as default parameter for covar_transposed\n")
json_data$covar_transposed  = FALSE
}


if (is.null(json_data$phenocovar_transposed)){
cat("Using FALSE as default parameter for phenocovar_transposed\n")
json_data$phenocovar_transposed = FALSE
}


# probably check on using a dict for check this parameter
# more like default object parameter


cross_default1 <- c(
 "sep" = ",",
 "na_strings" = c("-", "NA"),
 "geno_transposed" = FALSE,
 "covar_transposed" = FALSE,
 "pheno_transposed" = FALSE,
 "founder_geno_transposed" = FALSE,
 "phenocovar_transposed" = FALSE
 
)


# mssing something here
generate_cross_object  <- function(json_data) {
 # function to write the cross object from a json data object
  return (
    write_control_file(
      control_file_path,
      crosstype = json_data$crosstype,
      geno_file = json_data$geno_file,
      pheno_file = json_data$pheno_file,
      gmap_file = json_data$geno_map_file,
      pmap_file = json_data$pheno_map_file,
      phenocovar_file = json_data$phenocovar_file,
      geno_codes = json_data$geno_codes,
      alleles = json_data$alleles,
      na.strings = json_data$na.strings,
      geno_transposed = json_data$geno_transposed,
      sex_file = json_data$sex_file,
      founder_geno_file = json_data$founder_geno_file,
      covar_file = json_data$covar_file,
      sex_covar = json_data$sex_covar,
      sex_codes = json_data$sex_codes,
      crossinfo_file = json_data$crossinfo_file,
      crossinfo_covar = json_data$crossinfo_covar,
      crossinfo_codes = json_data$crossinfo_codes,
      xchr = json_data$xchr,
      overwrite = TRUE
    )
  )
}


# alternatively pass a  yaml file with

generate_cross_object(json_data)

dataset  <- read_cross2(control_file_path, quiet = FALSE) # replace this with a dynamic path
# check integrity of the cross


cat("Check the integrity of the cross object")
check_cross2(dataset)
if (check_cross2(dataset)) {
  print("Dataset meets required specifications for a cross")
} else {
  print("Dataset does not meet required specifications")
}

# Dataset Summarys
cat("A Summary about the Dataset You Provided\n")
summary(dataset)
n_ind(dataset)
n_chr(dataset)
cat("names of markers in the  object\n")
marker_names(dataset)
cat("names of phenotypes in a the object")
pheno_names(dataset)
cat("IDs for all individuals in the dataset cross object that have genotype data\n")
ind_ids_geno(dataset)
cat(" IDs for all individuals in the dataset object that have phenotype data")
ind_ids_pheno(dataset)
cat("Name of the founder Strains/n")
founders(dataset)

# Work on computing the genetic probabilities


analysis_type <- "single"


perform_genetic_pr <- function(cross,
                               cores = 1,
                               error_prob = 0.002,
                               analysis_type = "single") {
  # improve on this
  if (analysis_type == "single") {
    pr <- calc_genoprob(cross,
                        error_prob = error_prob,
                        quiet = FALSE,
                        cores = cores)
    return (pr)
  }
}

# get the genetic probability

Pr = perform_genetic_pr(dataset)
cat("Summaries  on the genetic probabilites \n")
print(Pr)
summary(Pr)


#calculate genotyping error LOD scores, to help identify potential genotyping errors (and problem markers and/or individuals
error_lod <- calc_errorlod(dataset, Pr, quiet = FALSE, cores = 4)
print(error_lod)


#  Perform genome scan

# rework on this issue
## grab phenotypes and covariates; ensure that covariates have names attribute
pheno <- dataset$pheno
covar <- match(dataset$covar$sex, c("f", "m")) # make numeric
names(covar) <- rownames(dataset$covar)
Xcovar <- get_x_covar(dataset)

print(pheno)
print(covar)
print(Xcovar)

# TODO: rework on fetching th Xcovar and getting the covar data


perform_genome_scan <- function(cross,
                                genome_prob,
                                method,
                                covar = NULL,
                                xCovar = NULL)
  # perform scan1 using haley-knott regression or linear model, or LOCO linear model
{
  if (method == "LMM") {
    # provide parameters for this
    kinship = calc_kinship(genome_prob)
    out  <- scan1(
      genome_prob,
      cross$pheno,
      kinship = kinship,
      addcovar = covar,
      Xcovar = Xcovar
    )
  }
  
  if (method == "LOCO") {
    # perform kinship inside better option
    kinship = calc_kinship(genome_prob, "loco")
    out <- scan1(
      genome_prob,
      cross$pheno,
      kinship = kinship,
      addcovar = covar,
      Xcovar = Xcovar
    )
  }
  else {
    # perform using Haley Knott
    out <- scan1(genome_prob,
                 cross$pheno,
                 addcovar = NULL,
                 Xcovar = Xcovar)
  }
  return (out)
}

results <- perform_genome_scan(cross = dataset,
                               genome_prob = Pr,
                               method = "HMM")


results # this should probably return the method use here

# plot for the LOD scores from  performing the genome scan

generate_lod_plot <- function(cross, scan_result, method, base_dir = ".") {
  # Plot LOD curves for a genome scan
  color <- c("slateblue", "violetred", "green3")
  par(mar = c(4.1, 4.1, 1.6, 1.1))
  ymx <- maxlod(scan_result)
  file_name = genRandomFileName(prefix = "RQTL_LOD_SCORE_", file_ext = ".png")
  image_loc = file.path(base_dir , file_name)
  png(image_loc,
      width = 1000,
      height = 600,
      type = 'cairo-png')
  plot(
    scan_result,
    cross$gmap,
    lodcolumn = 1,
    col = color[1],
    main = colnames(cross$pheno)[1],
    ylim = c(0, ymx * 1.02)
  )
  legend(
    "topleft",
    lwd = 2,
    col = color[1],
    method,
    bg = "gray90",
    lty = c(1, 1, 2)
  )
  dev.off()
  return (image_loc)
}

lod_file_path <- generate_lod_plot(dataset, results, "HK")
lod_file_path

# work on 2 pair scan multiple pair scan # multiple pair scan

# Q  how do we perform geno scan with Genome scan with a single-QTL model ????

# perform  permutation tests for single-QTL method


perform_permutation_test <- function(cross,
                                     genome_prob,
                                     n_perm,
                                     method = "HKK",
                                     covar = NULL,
                                     Xcovar = NULL,
                                     perm_strata = NULL) {
				     
  # TODO! add chr_lengths and perm_Xsp
  
  if (method == "HKK") {
    perm <- scan1perm(
      genome_prob,
      cross$pheno,
      Xcovar = Xcovar,
      n_perm = n_perm,
      perm_strata = perm_strata
    )
  }
  else if (method == "LMM") {
    kinship = calc_kinship(genome_prob)
    perm <- scan1perm(
      genome_prob,
      cross$pheno,
      kinship = kinship,
      Xcovar = Xcovar,
      n_perm = n_perm
    )
  }
  else if (method == "LOCO") {
    kinship = calc_kinship(genome_prob, "loco")
    perm <- scan1perm(
      genome_prob,
      cross$pheno,
      kinship = kinship ,
      perm_strata = perm_strata,
      Xcovar = Xcovar,
      n_perm = n_perm
    )
  }
  return (perm)
}

# TODO ! get these parameters from argument from the user
perm <- perform_permutation_test(dataset, Pr, n_perm = 2, method = "LMM")
# get the permutation summary with a significance threshold
summary(perm, alpha = c(0.2, 0.05))

#  function to perform the LOD peaks

find_lod_peaks <-function(scan_results, cross, threshold=4,  drop=1.5){
# can this take pmap??? which map should we use???
# TODO add more ags
print("Finding the lod peaks with thresholds n and drop n\n")
return (find_peaks(scan_results, cross$gmap, threshold= threshold, drop=drop))
}
# TODO! add the number of cores
lod_peaks <- find_lod_peaks(results, dataset)
print(lod_peaks)

# TODO! format to return the data ???