aboutsummaryrefslogtreecommitdiff
path: root/wqflask
diff options
context:
space:
mode:
authorZachary Sloan2014-02-28 00:02:13 +0000
committerZachary Sloan2014-02-28 00:02:13 +0000
commitf29bfd7bb55b542c864cbada92eebf2454fa9aac (patch)
tree3864cb83dd3981b64a234afb9049ba583c7a6c31 /wqflask
parentdd6e828ca3b205fefcae833aec53139961f9575f (diff)
downloadgenenetwork2-f29bfd7bb55b542c864cbada92eebf2454fa9aac.tar.gz
Made some changes/comments to Lei's load_genotypes.py file
Diffstat (limited to 'wqflask')
-rw-r--r--wqflask/base/trait_collection.py39
-rw-r--r--wqflask/wqflask/my_pylmm/data/load_genotypes_lei.py181
2 files changed, 220 insertions, 0 deletions
diff --git a/wqflask/base/trait_collection.py b/wqflask/base/trait_collection.py
index 797a4d63..49b0eaea 100644
--- a/wqflask/base/trait_collection.py
+++ b/wqflask/base/trait_collection.py
@@ -22,6 +22,45 @@ from sqlalchemy import (Column, Integer, String, Table, ForeignKey, Unicode, Boo
Text, Index)
from sqlalchemy.orm import relationship, backref
+#from redis import StrictRedis
+import redis
+Redis = redis.StrictRedis()
+
from wqflask.database import Base, init_db
class TraitCollection(object):
+
+
+class AnonCollection(TraitCollection):
+
+ def __init__(self, anon_id)
+ self.anon_id = anon_id
+ self.collection_members = Redis.smembers(self.anon_id)
+ print("self.collection_members is:", self.collection_members)
+ self.num_members = len(self.collection_members)
+
+
+ @app.route("/collections/remove", methods=('POST',))
+ def remove_traits(traits_to_remove):
+ print("traits_to_remove:", traits_to_remove)
+ for trait in traits_to_remove:
+ Redis.srem(self.anon_id, trait)
+ members_now = self.collection_members - traits_to_remove
+ print("members_now:", members_now)
+ print("Went from {} to {} members in set.".format(len(self.collection_members), len(members_now)))
+
+ # We need to return something so we'll return this...maybe in the future
+ # we can use it to check the results
+ return str(len(members_now))
+
+ @property
+ def num_members(self):
+ print("members are:", json.loads(self.members))
+ return len(json.loads(self.members))
+
+ #@property
+ #def display_num_members(self):
+ # return display_collapsible(self.num_members)
+
+ def members_as_set(self):
+ return set(json.loads(self.members)) \ No newline at end of file
diff --git a/wqflask/wqflask/my_pylmm/data/load_genotypes_lei.py b/wqflask/wqflask/my_pylmm/data/load_genotypes_lei.py
new file mode 100644
index 00000000..e3144733
--- /dev/null
+++ b/wqflask/wqflask/my_pylmm/data/load_genotypes_lei.py
@@ -0,0 +1,181 @@
+"""
+Do whatever else is needed with the Marker object
+Probably create Genofile object as well
+Make sure rest of code works with params object (though
+everything in the params object should probably just be the parameters of
+the Genofile object)
+
+Continue to rename variables in ways that make sense and to add underscores between words
+
+Look at genofile_parser.py that I (Zach) wrote a while back and how much of it can just be reused
+
+Get rid of/improve uninformative comments
+
+"""
+
+
+from __future__ import absolute_import, print_function, division
+
+import sys
+import re
+import argparse
+
+import utilities
+import datastructure
+
+def main():
+ parser = argparse.ArgumentParser(description='Load Genotypes')
+ parser.add_argument('-c', '--config')
+ opts = parser.parse_args()
+ config = opts.config
+ # config
+ config = utilities.get_config(config)
+ print("config:")
+ for item in config.items('config'):
+ print("\t", str(item))
+ parse_genofile(fetch_parameters(config))
+
+def fetch_parameters(config):
+ # variables
+ params = {}
+ params['inbredsetid'] = config.get('config', 'inbredsetid')
+ species = datastructure.get_species(params['inbredsetid'])
+ params["speciesid"] = species[0]
+ genofreeze = datastructure.get_genofreeze_byinbredsetid(params['inbredsetid'])
+ params['genofreezeid'] = genofreeze[0]
+ params['dataid'] = datastructure.get_nextdataid_genotype()
+ params['genofile'] = config.get('config', 'genofile')
+
+ return params
+
+def parse_genofile(params):
+ # genofile
+ with open(params['genofile']) as genofile:
+ meta_data = {}
+ print()
+ # parse genofile
+ for line in genofile:
+ line = line.strip()
+ if not line:
+ pass
+ elif line.startswith('#'):
+ pass
+ elif line.startswith('@'):
+ line = line.strip('@')
+ for item in line.split(';'):
+ kv = re.split(':|=', item)
+ meta_data[kv[0].strip()] = kv[1].strip()
+
+ elif line.lower().startswith("chr"):
+ print("geno file meta:")
+ for key, value in meta_data.iteritems():
+ print("\t{}: {}".format(key, value))
+ print("geno file head:\n\t{}\n".format(line))
+ strain_names = line.split()[4:]
+ strains = datastructure.get_strains_bynames(inbredsetid=inbredsetid,
+ strain_names=strain_names,
+ updatestrainxref="yes")
+
+ else:
+ # geno file line
+ marker = Marker(line)
+ #
+ geno_id = check_or_insert_geno(params, marker)
+
+ if check_genoxref(params): #Check if this can go earlier
+ continue
+
+ insert_genodata(params)
+ insert_genoxref(params)
+ data_id += 1
+
+
+class Marker(object):
+ def __init__(self, line):
+ self.cells = line.split()
+ self.chromosome = cells[0]
+ self.locus = cells[1]
+ self.cm = cells[2]
+ self.mb = cells[3]
+ self.values = cells[4:]
+
+def check_or_insert_geno(params, marker):
+ cursor, con = utilities.get_cursor()
+ sql = """
+ SELECT Geno.`Id`
+ FROM Geno
+ WHERE Geno.`SpeciesId`=%s
+ AND Geno.`Name` like %s
+ """
+ cursor.execute(sql, (speciesid, locus)) #This is correct
+ result = cursor.fetchone()
+ if result:
+ geno_id = result[0]
+ print("get geno record: ", geno_id)
+ else:
+ sql = """
+ INSERT INTO Geno
+ SET
+ Geno.`SpeciesId`=%s,
+ Geno.`Name`=%s,
+ Geno.`Marker_Name`=%s,
+ Geno.`Chr`=%s,
+ Geno.`Mb`=%s
+ """
+ cursor.execute(sql, (species_id, locus, locus, chr, mb))
+ row_count = cursor.rowcount
+ geno_id = con.insert_id()
+ print("INSERT INTO Geno: %d record: %d" % (row_count, geno_id))
+ return geno_id
+
+def check_GenoXRef():
+ sql = """
+ select GenoXRef.*
+ from GenoXRef
+ where GenoXRef.`GenoFreezeId`=%s
+ AND GenoXRef.`GenoId`=%s
+ """
+ cursor.execute(sql, (geno_freeze_id, geno_id))
+ row_count = cursor.rowcount
+ return row_count
+
+def insert_genodata():
+ for index, strain in enumerate(strains):
+ strain_id = strain[0]
+ value = utilities.to_db_string(values[index], None)
+ if not value:
+ continue
+ value = config.get('config', "genovalue_" + value)
+ try:
+ number = int(value)
+ except ValueError:
+ continue
+ if number not in [-1, 0, 1]:
+ continue
+ sql = """
+ INSERT INTO GenoData
+ SET
+ GenoData.`Id`=%s,
+ GenoData.`StrainId`=%s,
+ GenoData.`value`=%s
+ """
+ cursor.execute(sql, (dataid, strainid, number))
+
+def insert_genoxref():
+ sql = """
+ INSERT INTO GenoXRef
+ SET
+ GenoXRef.`GenoFreezeId`=%s,
+ GenoXRef.`GenoId`=%s,
+ GenoXRef.`DataId`=%s,
+ GenoXRef.`cM`=%s,
+ GenoXRef.`Used_for_mapping`=%s
+ """
+ cursor.execute(sql, (genofreezeid, genoid, dataid, cm, 'N'))
+ rowcount = cursor.rowcount
+ print("INSERT INTO GenoXRef: %d record" % (rowcount))
+
+if __name__ == "__main__":
+ print("command line arguments:\n\t%s" % sys.argv)
+ main()
+ print("exit successfully")