diff options
author | Zachary Sloan | 2013-04-02 22:07:01 +0000 |
---|---|---|
committer | Zachary Sloan | 2013-04-02 22:07:01 +0000 |
commit | bf78deec93a6ef3296b4c8cf38a71d1a03480d21 (patch) | |
tree | e13051ed1ae680ee74f6bfd45bd2b96127cc148e /wqflask | |
parent | 51e120ae25a7955a895d5e79d5ee459764a331ea (diff) | |
download | genenetwork2-bf78deec93a6ef3296b4c8cf38a71d1a03480d21.tar.gz |
Committing before splitting code that runs pylmm with plink files
and code that runs it with json
Diffstat (limited to 'wqflask')
-rwxr-xr-x | wqflask/base/data_set.py | 80 | ||||
-rwxr-xr-x | wqflask/wqflask/marker_regression/marker_regression.py | 41 | ||||
-rw-r--r-- | wqflask/wqflask/static/new/javascript/marker_regression.coffee | 1 | ||||
-rw-r--r-- | wqflask/wqflask/static/new/javascript/marker_regression.js | 1 | ||||
-rw-r--r-- | wqflask/wqflask/views.py | 2 |
5 files changed, 71 insertions, 54 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 17881e53..ab8554a0 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -35,6 +35,7 @@ from base import webqtlConfig from base import species from dbFunction import webqtlDatabaseFunction from utility import webqtlUtil +from utility.benchmark import Bench from MySQLdb import escape_string as escape from pprint import pformat as pf @@ -73,14 +74,60 @@ class Markers(object): self.markers = json.load(json_data_fh) def add_pvalues(self, p_values): + print("length of self.markers:", len(self.markers)) + print("length of p_values:", len(p_values)) + + # THIS IS only needed for the case when we are limiting the number of p-values calculated + if len(self.markers) > len(p_values): + self.markers = self.markers[:len(p_values)] + for marker, p_value in itertools.izip(self.markers, p_values): marker['p_value'] = p_value print("p_value is:", marker['p_value']) marker['lod_score'] = -math.log10(marker['p_value']) #Using -log(p) for the LRS; need to ask Rob how he wants to get LRS from p-values marker['lrs_value'] = -math.log10(marker['p_value']) * 4.61 + + + + +class HumanMarkers(Markers): + + def __init__(self, name): + marker_data_fh = open(os.path.join(webqtlConfig.PYLMM_PATH + name + '.bim')) + self.markers = [] + for line in marker_data_fh: + splat = line.strip().split() + marker = {} + marker['chr'] = int(splat[0]) + marker['name'] = splat[1] + marker['Mb'] = float(splat[3]) / 1000000 + self.markers.append(marker) + + #print("markers is: ", pf(self.markers)) + def add_pvalues(self, p_values): + #for marker, p_value in itertools.izip(self.markers, p_values): + # if marker['Mb'] <= 0 and marker['chr'] == 0: + # continue + # marker['p_value'] = p_value + # print("p_value is:", marker['p_value']) + # marker['lod_score'] = -math.log10(marker['p_value']) + # #Using -log(p) for the LRS; need to ask Rob how he wants to get LRS from p-values + # marker['lrs_value'] = -math.log10(marker['p_value']) * 4.61 + + super(HumanMarkers, self).add_pvalues(p_values) + + with Bench("deleting markers"): + markers = [] + for marker in self.markers: + if not marker['Mb'] <= 0 and not marker['chr'] == 0: + markers.append(marker) + self.markers = markers + + + class DatasetGroup(object): """ Each group has multiple datasets; each species has multiple groups. @@ -104,21 +151,17 @@ class DatasetGroup(object): self.incparentsf1 = False self.allsamples = None - self.markers = Markers(self.name) - - - #def read_genotype(self): - # self.read_genotype_file() - # - # if not self.genotype: # Didn'd succeed, so we try method 2 - # self.read_genotype_data() + + + def get_markers(self): + print("self.species is:", self.species) + if self.species == "human": + marker_class = HumanMarkers + else: + marker_class = Markers - #def read_genotype_json(self): - # '''Read genotype from json file''' - # - # json_data = open(os.path.join(webqtlConfig.NEWGENODIR + self.name + '.json')) - # markers = json.load(json_data) - # + self.markers = marker_class(self.name) + def get_f1_parent_strains(self): try: @@ -321,12 +364,9 @@ class PhenotypeDataSet(DataSet): continue # for now if not webqtlUtil.hasAccessToConfidentialPhenotypeTrait(privilege=self.privilege, userName=self.userName, authorized_users=this_trait.authorized_users): description = this_trait.pre_publication_description - this_trait.description_display = description - - try: - this_trait.description_display.decode('ascii') - except Exception: - this_trait.description_display = this_trait.description_display.decode('utf-8') + this_trait.description_display = description.decode('utf-8') + + if not this_trait.year.isdigit(): this_trait.pubmed_text = "N/A" diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py index c3555e8f..a640d37f 100755 --- a/wqflask/wqflask/marker_regression/marker_regression.py +++ b/wqflask/wqflask/marker_regression/marker_regression.py @@ -57,7 +57,6 @@ class MarkerRegression(object): chromosomes = chromosome_mb_lengths, qtl_results = self.qtl_results, ) - def gen_data(self, tempdata): @@ -67,8 +66,8 @@ class MarkerRegression(object): file_base = os.path.join(webqtlConfig.PYLMM_PATH, self.dataset.group.name) plink_input = input.plink(file_base, type='b') - - + + pheno_vector = np.array([val == "x" and np.nan or float(val) for val in self.vals]) pheno_vector = pheno_vector.reshape((len(pheno_vector), 1)) covariate_matrix = np.ones((pheno_vector.shape[0],1)) @@ -83,9 +82,6 @@ class MarkerRegression(object): eigen_values = [] eigen_vectors = [] - - print("pheno_vector shape is: ", pf(pheno_vector.shape)) - #print("pheno_vector is: ", pf(pheno_vector)) #print("kinship_matrix is: ", pf(kinship_matrix)) @@ -101,9 +97,6 @@ class MarkerRegression(object): # eigen_values = np.fromfile(file_base + ".kin.kva") # eigen_vectors = np.fromfile(file_base + ".kin.kve") - #print("eigen_values is: ", pf(eigen_values)) - #print("eigen_vectors is: ", pf(eigen_vectors)) - n = kinship_matrix.shape[0] lmm_ob = lmm.LMM(pheno_vector, kinship_matrix, @@ -121,8 +114,8 @@ class MarkerRegression(object): print("# snps is: ", pf(plink_input.numSNPs)) with Bench("snp iterator loop"): for snp, this_id in plink_input: - #if count > 10000: - # break + if count > 1000: + break count += 1 x = snp[keep].reshape((n,1)) @@ -138,13 +131,13 @@ class MarkerRegression(object): p_values.append(np.nan) t_statistics.append(np.nan) continue - + # Its ok to center the genotype - I used options.normalizeGenotype to # force the removal of missing genotypes as opposed to replacing them with MAF. - + #if not options.normalizeGenotype: # xs = (xs - xs.mean()) / np.sqrt(xs.var()) - + filtered_pheno = pheno_vector[keeps] filtered_covariate_matrix = covariate_matrix[keeps,:] filtered_kinship_matrix = kinship_matrix[keeps,:][:,keeps] @@ -167,7 +160,6 @@ class MarkerRegression(object): ts,ps,beta,betaVar = lmm_ob.association(x) p_values.append(ps) t_statistics.append(ts) - #genotype_data = [marker['genotypes'] for marker in self.dataset.group.markers.markers] # @@ -187,28 +179,11 @@ class MarkerRegression(object): # temp_data=tempdata #) - print("p_values is: ", pf(p_values)) + self.dataset.group.get_markers() self.dataset.group.markers.add_pvalues(p_values) - #self.lrs_values = [marker['lrs_value'] for marker in self.dataset.group.markers.markers] - #lrs_values_sorted = sorted(self.lrs_values) - # - #lrs_values_length = len(lrs_values_sorted) - # - #def lrs_threshold(place): - # return lrs_values_sorted[int((lrs_values_length * place) -1)] - # - #self.lrs_thresholds = Bunch( - # suggestive = lrs_threshold(.37), - # significant = lrs_threshold(.95), - # highly_significant = lrs_threshold(.99), - # ) - self.qtl_results = self.dataset.group.markers.markers - for marker in self.qtl_results: - if marker['lrs_value'] > webqtlConfig.MAXLRS: - marker['lrs_value'] = webqtlConfig.MAXLRS def identify_empty_samples(self): no_val_samples = [] diff --git a/wqflask/wqflask/static/new/javascript/marker_regression.coffee b/wqflask/wqflask/static/new/javascript/marker_regression.coffee index 6e605fa7..3e14ab6b 100644 --- a/wqflask/wqflask/static/new/javascript/marker_regression.coffee +++ b/wqflask/wqflask/static/new/javascript/marker_regression.coffee @@ -2,6 +2,7 @@ $ -> class Manhattan_Plot constructor: (@plot_height, @plot_width) -> @qtl_results = js_data.qtl_results + console.log("qtl_results are:", @qtl_results) @chromosomes = js_data.chromosomes @total_length = 0 diff --git a/wqflask/wqflask/static/new/javascript/marker_regression.js b/wqflask/wqflask/static/new/javascript/marker_regression.js index cb3c09cb..09470daf 100644 --- a/wqflask/wqflask/static/new/javascript/marker_regression.js +++ b/wqflask/wqflask/static/new/javascript/marker_regression.js @@ -11,6 +11,7 @@ this.plot_height = plot_height; this.plot_width = plot_width; this.qtl_results = js_data.qtl_results; + console.log("qtl_results are:", this.qtl_results); this.chromosomes = js_data.chromosomes; this.total_length = 0; this.max_chr = this.get_max_chr(); diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py index 46433430..7f5f88e0 100644 --- a/wqflask/wqflask/views.py +++ b/wqflask/wqflask/views.py @@ -168,7 +168,7 @@ def marker_regression_page(): if key in wanted or key.startswith(('value:')): start_vars[key] = value - version = "v5" + version = "v13" key = "marker_regression:{}:".format(version) + json.dumps(start_vars, sort_keys=True) with Bench("Loading cache"): result = Redis.get(key) |