diff options
-rw-r--r-- | doc/elasticsearch.org | 206 | ||||
-rw-r--r-- | wqflask/wqflask/marker_regression/gemma_mapping.py | 22 |
2 files changed, 219 insertions, 9 deletions
diff --git a/doc/elasticsearch.org b/doc/elasticsearch.org index 18adfc8b..864a8363 100644 --- a/doc/elasticsearch.org +++ b/doc/elasticsearch.org @@ -1,5 +1,12 @@ * Elasticsearch +** Introduction + +GeneNetwork uses elasticsearch (ES) for all things considered +'state'. One example is user collections, another is user management. + +** Example + To get the right environment, first you can get a python REPL with something like : env GN2_PROFILE=~/opt/gn-latest ./bin/genenetwork2 ../etc/default_settings.py -cli python @@ -39,3 +46,202 @@ record['hits']['hits'][0][u'_source']['email_address'] u"myname@email.com" #+END_SRC + +** Health + +ES provides support for checking its health: + +: curl -XGET http://localhost:9200/_cluster/health?pretty=true + +#+BEGIN_SRC json + + + { + "cluster_name" : "asgard", + "status" : "yellow", + "timed_out" : false, + "number_of_nodes" : 1, + "number_of_data_nodes" : 1, + "active_primary_shards" : 5, + "active_shards" : 5, + "relocating_shards" : 0, + "initializing_shards" : 0, + "unassigned_shards" : 5 + } + +#+END_SRC + +Yellow means just one instance is running (no worries). + +To get full cluster info + +: curl -XGET "localhost:9200/_cluster/stats?human&pretty" + +#+BEGIN_SRC json +{ + "_nodes" : { + "total" : 1, + "successful" : 1, + "failed" : 0 + }, + "cluster_name" : "elasticsearch", + "timestamp" : 1529050366452, + "status" : "yellow", + "indices" : { + "count" : 3, + "shards" : { + "total" : 15, + "primaries" : 15, + "replication" : 0.0, + "index" : { + "shards" : { + "min" : 5, + "max" : 5, + "avg" : 5.0 + }, + "primaries" : { + "min" : 5, + "max" : 5, + "avg" : 5.0 + }, + "replication" : { + "min" : 0.0, + "max" : 0.0, + "avg" : 0.0 + } + } + }, + "docs" : { + "count" : 14579, + "deleted" : 0 + }, + "store" : { + "size" : "44.7mb", + "size_in_bytes" : 46892794 + }, + "fielddata" : { + "memory_size" : "0b", + "memory_size_in_bytes" : 0, + "evictions" : 0 + }, + "query_cache" : { + "memory_size" : "0b", + "memory_size_in_bytes" : 0, + "total_count" : 0, + "hit_count" : 0, + "miss_count" : 0, + "cache_size" : 0, + "cache_count" : 0, + "evictions" : 0 + }, + "completion" : { + "size" : "0b", + "size_in_bytes" : 0 + }, + "segments" : { + "count" : 24, + "memory" : "157.3kb", + "memory_in_bytes" : 161112, + "terms_memory" : "122.6kb", + "terms_memory_in_bytes" : 125569, + "stored_fields_memory" : "15.3kb", + "stored_fields_memory_in_bytes" : 15728, + "term_vectors_memory" : "0b", + "term_vectors_memory_in_bytes" : 0, + "norms_memory" : "10.8kb", + "norms_memory_in_bytes" : 11136, + "points_memory" : "111b", + "points_memory_in_bytes" : 111, + "doc_values_memory" : "8.3kb", + "doc_values_memory_in_bytes" : 8568, + "index_writer_memory" : "0b", + "index_writer_memory_in_bytes" : 0, + "version_map_memory" : "0b", + "version_map_memory_in_bytes" : 0, + "fixed_bit_set" : "0b", + "fixed_bit_set_memory_in_bytes" : 0, + "max_unsafe_auto_id_timestamp" : -1, + "file_sizes" : { } + } + }, + "nodes" : { + "count" : { + "total" : 1, + "data" : 1, + "coordinating_only" : 0, + "master" : 1, + "ingest" : 1 + }, + "versions" : [ + "6.2.1" + ], + "os" : { + "available_processors" : 16, + "allocated_processors" : 16, + "names" : [ + { + "name" : "Linux", + "count" : 1 + } + ], + "mem" : { + "total" : "125.9gb", + "total_in_bytes" : 135189286912, + "free" : "48.3gb", + "free_in_bytes" : 51922628608, + "used" : "77.5gb", + "used_in_bytes" : 83266658304, + "free_percent" : 38, + "used_percent" : 62 + } + }, + "process" : { + "cpu" : { + "percent" : 0 + }, + "open_file_descriptors" : { + "min" : 415, + "max" : 415, + "avg" : 415 + } + }, + "jvm" : { + "max_uptime" : "1.9d", + "max_uptime_in_millis" : 165800616, + "versions" : [ + { + "version" : "9.0.4", + "vm_name" : "OpenJDK 64-Bit Server VM", + "vm_version" : "9.0.4+11", + "vm_vendor" : "Oracle Corporation", + "count" : 1 + } + ], + "mem" : { + "heap_used" : "1.1gb", + "heap_used_in_bytes" : 1214872032, + "heap_max" : "23.8gb", + "heap_max_in_bytes" : 25656426496 + }, + "threads" : 110 + }, + "fs" : { + "total" : "786.4gb", + "total_in_bytes" : 844400918528, + "free" : "246.5gb", + "free_in_bytes" : 264688160768, + "available" : "206.5gb", + "available_in_bytes" : 221771468800 + }, + "plugins" : [ ], + "network_types" : { + "transport_types" : { + "netty4" : 1 + }, + "http_types" : { + "netty4" : 1 + } + } + } +} +#+BEGIN_SRC json diff --git a/wqflask/wqflask/marker_regression/gemma_mapping.py b/wqflask/wqflask/marker_regression/gemma_mapping.py index 5ebab611..cbd0bae5 100644 --- a/wqflask/wqflask/marker_regression/gemma_mapping.py +++ b/wqflask/wqflask/marker_regression/gemma_mapping.py @@ -3,11 +3,15 @@ import os, math, string, random, json from base import webqtlConfig from base.trait import GeneralTrait from base.data_set import create_dataset -from utility.tools import flat_files, GEMMA_COMMAND, GEMMA_WRAPPER_COMMAND, TEMPDIR +from utility.tools import flat_files, GEMMA_COMMAND, GEMMA_WRAPPER_COMMAND, TEMPDIR, WEBSERVER_MODE import utility.logger logger = utility.logger.getLogger(__name__ ) +GEMMAOPTS = "-debug" +if WEBSERVER_MODE == 'PROD': + GEMMAOPTS = "-no-check" + def run_gemma(this_dataset, samples, vals, covariates, use_loco): """Generates p-values for each marker using GEMMA""" @@ -34,7 +38,7 @@ def run_gemma(this_dataset, samples, vals, covariates, use_loco): if use_loco == "True": k_output_filename = this_dataset.group.name + "_K_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) - generate_k_command = GEMMA_WRAPPER_COMMAND + ' --json --loco ' + chr_list_string + ' -- -g %s/%s_geno.txt -p %s/%s_pheno.txt -a %s/%s_snps.txt -gk -debug > %s/gn2/%s.json' % (flat_files('genotype/bimbam'), + generate_k_command = GEMMA_WRAPPER_COMMAND + ' --json --loco ' + chr_list_string + ' -- ' + GEMMAOPTS + ' -g %s/%s_geno.txt -p %s/%s_pheno.txt -a %s/%s_snps.txt -gk > %s/gn2/%s.json' % (flat_files('genotype/bimbam'), genofile_name, flat_files('genotype/bimbam'), genofile_name, @@ -45,7 +49,7 @@ def run_gemma(this_dataset, samples, vals, covariates, use_loco): logger.debug("k_command:" + generate_k_command) os.system(generate_k_command) - gemma_command = GEMMA_WRAPPER_COMMAND + ' --json --loco --input %s/gn2/%s.json -- -g %s/%s_geno.txt -p %s/%s_pheno.txt' % (TEMPDIR, + gemma_command = GEMMA_WRAPPER_COMMAND + ' --json --loco --input %s/gn2/%s.json -- '+GEMMAOPTS+' -g %s/%s_geno.txt -p %s/%s_pheno.txt' % (TEMPDIR, k_output_filename, flat_files('genotype/bimbam'), genofile_name, @@ -54,20 +58,20 @@ def run_gemma(this_dataset, samples, vals, covariates, use_loco): gwa_output_filename = this_dataset.group.name + "_GWA_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) if covariates != "": - gemma_command += ' -c %s/%s_covariates.txt -a %s/%s_snps.txt -lmm 2 -maf 0.1 -debug > %s/gn2/%s.json' % (flat_files('mapping'), + gemma_command += ' -c %s/%s_covariates.txt -a %s/%s_snps.txt -lmm 2 -maf 0.1 > %s/gn2/%s.json' % (flat_files('mapping'), this_dataset.group.name, flat_files('genotype/bimbam'), genofile_name, TEMPDIR, gwa_output_filename) else: - gemma_command += ' -a %s/%s_snps.txt -lmm 2 -maf 0.1 -debug > %s/gn2/%s.json' % (flat_files('genotype/bimbam'), + gemma_command += ' -a %s/%s_snps.txt -lmm 2 -maf 0.1 > %s/gn2/%s.json' % (flat_files('genotype/bimbam'), genofile_name, TEMPDIR, gwa_output_filename) else: - gemma_command = GEMMA_COMMAND + ' -g %s/%s_geno.txt -p %s/%s_pheno.txt -a %s/%s_snps.txt -k %s/%s.cXX.txt -lmm 2 -maf 0.1' % (flat_files('genotype/bimbam'), + gemma_command = GEMMA_COMMAND + ' ' + GEMMAOPTS + ' -g %s/%s_geno.txt -p %s/%s_pheno.txt -a %s/%s_snps.txt -k %s/%s.cXX.txt -lmm 2 -maf 0.1' % (flat_files('genotype/bimbam'), genofile_name, flat_files('genotype/bimbam'), genofile_name, @@ -77,12 +81,12 @@ def run_gemma(this_dataset, samples, vals, covariates, use_loco): genofile_name) if covariates != "": - gemma_command += ' -c %s/%s_covariates.txt -outdir %s -debug -o %s_output' % (flat_files('mapping'), + gemma_command += ' -c %s/%s_covariates.txt -outdir %s -o %s_output' % (flat_files('mapping'), this_dataset.group.name, webqtlConfig.GENERATED_IMAGE_DIR, genofile_name) else: - gemma_command += ' -outdir %s -debug -o %s_output' % (webqtlConfig.GENERATED_IMAGE_DIR, + gemma_command += ' -outdir %s -o %s_output' % (webqtlConfig.GENERATED_IMAGE_DIR, genofile_name) logger.debug("gemma_command:" + gemma_command) @@ -218,4 +222,4 @@ def parse_loco_output(this_dataset, gwa_output_filename): included_markers.append(line.split("\t")[1]) p_values.append(float(line.split("\t")[9])) - return marker_obs
\ No newline at end of file + return marker_obs |