From f29bfd7bb55b542c864cbada92eebf2454fa9aac Mon Sep 17 00:00:00 2001
From: Zachary Sloan
Date: Fri, 28 Feb 2014 00:02:13 +0000
Subject: Made some changes/comments to Lei's load_genotypes.py file

---
 wqflask/base/trait_collection.py                   |  39 +++++
 .../wqflask/my_pylmm/data/load_genotypes_lei.py    | 181 +++++++++++++++++++++
 2 files changed, 220 insertions(+)
 create mode 100644 wqflask/wqflask/my_pylmm/data/load_genotypes_lei.py

diff --git a/wqflask/base/trait_collection.py b/wqflask/base/trait_collection.py
index 797a4d63..49b0eaea 100644
--- a/wqflask/base/trait_collection.py
+++ b/wqflask/base/trait_collection.py
@@ -22,6 +22,45 @@ from sqlalchemy import (Column, Integer, String, Table, ForeignKey, Unicode, Boo
                         Text, Index)
 from sqlalchemy.orm import relationship, backref
 
+#from redis import StrictRedis
+import redis
+Redis = redis.StrictRedis()
+
 from wqflask.database import Base, init_db
 
 class TraitCollection(object):
+
+
+class AnonCollection(TraitCollection):
+    
+    def __init__(self, anon_id)
+        self.anon_id = anon_id
+        self.collection_members = Redis.smembers(self.anon_id)
+        print("self.collection_members is:", self.collection_members)
+        self.num_members = len(self.collection_members)
+        
+
+    @app.route("/collections/remove", methods=('POST',))
+    def remove_traits(traits_to_remove):
+        print("traits_to_remove:", traits_to_remove)
+        for trait in traits_to_remove:
+            Redis.srem(self.anon_id, trait)
+        members_now = self.collection_members - traits_to_remove
+        print("members_now:", members_now)
+        print("Went from {} to {} members in set.".format(len(self.collection_members), len(members_now)))
+
+        # We need to return something so we'll return this...maybe in the future
+        # we can use it to check the results
+        return str(len(members_now))
+
+    @property
+    def num_members(self):
+        print("members are:", json.loads(self.members))
+        return len(json.loads(self.members))
+
+    #@property
+    #def display_num_members(self):
+    #    return display_collapsible(self.num_members)
+
+    def members_as_set(self):
+        return set(json.loads(self.members))
\ No newline at end of file
diff --git a/wqflask/wqflask/my_pylmm/data/load_genotypes_lei.py b/wqflask/wqflask/my_pylmm/data/load_genotypes_lei.py
new file mode 100644
index 00000000..e3144733
--- /dev/null
+++ b/wqflask/wqflask/my_pylmm/data/load_genotypes_lei.py
@@ -0,0 +1,181 @@
+"""
+Do whatever else is needed with the Marker object
+Probably create Genofile object as well
+Make sure rest of code works with params object (though
+everything in the params object should probably just be the parameters of
+the Genofile object)
+
+Continue to rename variables in ways that make sense and to add underscores between words
+
+Look at genofile_parser.py that I (Zach) wrote a while back and how much of it can just be reused
+
+Get rid of/improve uninformative comments
+
+"""
+
+
+from __future__ import absolute_import, print_function, division
+
+import sys
+import re
+import argparse
+
+import utilities
+import datastructure
+
+def main():
+    parser = argparse.ArgumentParser(description='Load Genotypes')
+    parser.add_argument('-c', '--config')
+    opts = parser.parse_args()
+    config = opts.config
+    # config
+    config = utilities.get_config(config)
+    print("config:")
+    for item in config.items('config'):
+        print("\t", str(item))
+    parse_genofile(fetch_parameters(config))
+
+def fetch_parameters(config):
+    # variables
+    params = {}
+    params['inbredsetid'] = config.get('config', 'inbredsetid')
+    species = datastructure.get_species(params['inbredsetid'])
+    params["speciesid"] = species[0]
+    genofreeze = datastructure.get_genofreeze_byinbredsetid(params['inbredsetid'])
+    params['genofreezeid'] = genofreeze[0]
+    params['dataid'] = datastructure.get_nextdataid_genotype()
+    params['genofile'] = config.get('config', 'genofile')
+    
+    return params
+    
+def parse_genofile(params):
+    # genofile
+    with open(params['genofile']) as genofile:
+        meta_data = {}
+        print()
+        # parse genofile
+        for line in genofile:
+            line = line.strip()
+            if not line:
+                pass
+            elif line.startswith('#'):
+                pass
+            elif line.startswith('@'):
+                line = line.strip('@')
+                for item in line.split(';'):
+                    kv = re.split(':|=', item)
+                    meta_data[kv[0].strip()] = kv[1].strip()
+            
+            elif line.lower().startswith("chr"):
+                print("geno file meta:")
+                for key, value in meta_data.iteritems():
+                    print("\t{}: {}".format(key, value))
+                print("geno file head:\n\t{}\n".format(line))
+                strain_names = line.split()[4:]
+                strains = datastructure.get_strains_bynames(inbredsetid=inbredsetid,
+                                                            strain_names=strain_names,
+                                                            updatestrainxref="yes")
+               
+            else:
+                # geno file line
+                marker = Marker(line)
+                #
+                geno_id = check_or_insert_geno(params, marker)
+    
+                if check_genoxref(params): #Check if this can go earlier
+                    continue
+                
+                insert_genodata(params)
+                insert_genoxref(params)
+                data_id += 1
+    
+    
+class Marker(object):
+    def __init__(self, line):
+        self.cells = line.split()
+        self.chromosome = cells[0]
+        self.locus = cells[1]
+        self.cm = cells[2]
+        self.mb = cells[3]
+        self.values = cells[4:]
+        
+def check_or_insert_geno(params, marker):
+    cursor, con = utilities.get_cursor()
+    sql = """
+        SELECT Geno.`Id`
+        FROM Geno
+        WHERE Geno.`SpeciesId`=%s
+        AND Geno.`Name` like %s
+        """
+    cursor.execute(sql, (speciesid, locus)) #This is correct
+    result = cursor.fetchone()
+    if result:
+        geno_id = result[0]
+        print("get geno record: ", geno_id)
+    else:
+        sql = """
+            INSERT INTO Geno
+            SET
+            Geno.`SpeciesId`=%s,
+            Geno.`Name`=%s,
+            Geno.`Marker_Name`=%s,
+            Geno.`Chr`=%s,
+            Geno.`Mb`=%s
+            """
+        cursor.execute(sql, (species_id, locus, locus, chr, mb))
+        row_count = cursor.rowcount
+        geno_id = con.insert_id()
+        print("INSERT INTO Geno: %d record: %d" % (row_count, geno_id))
+    return geno_id
+
+def check_GenoXRef():
+    sql = """
+        select GenoXRef.*
+        from GenoXRef
+        where GenoXRef.`GenoFreezeId`=%s
+        AND GenoXRef.`GenoId`=%s
+        """
+    cursor.execute(sql, (geno_freeze_id, geno_id))
+    row_count = cursor.rowcount
+    return row_count
+    
+def insert_genodata():
+    for index, strain in enumerate(strains):
+        strain_id = strain[0]
+        value = utilities.to_db_string(values[index], None)
+        if not value:
+            continue
+        value = config.get('config', "genovalue_" + value)
+        try:
+            number = int(value)
+        except ValueError:
+            continue
+        if number not in [-1, 0, 1]:
+            continue
+        sql = """
+            INSERT INTO GenoData
+            SET
+            GenoData.`Id`=%s,
+            GenoData.`StrainId`=%s,
+            GenoData.`value`=%s
+            """
+        cursor.execute(sql, (dataid, strainid, number))
+
+def insert_genoxref():
+    sql = """
+        INSERT INTO GenoXRef
+        SET
+        GenoXRef.`GenoFreezeId`=%s,
+        GenoXRef.`GenoId`=%s,
+        GenoXRef.`DataId`=%s,
+        GenoXRef.`cM`=%s,
+        GenoXRef.`Used_for_mapping`=%s
+        """
+    cursor.execute(sql, (genofreezeid, genoid, dataid, cm, 'N'))
+    rowcount = cursor.rowcount
+    print("INSERT INTO GenoXRef: %d record" % (rowcount))
+
+if __name__ == "__main__":
+    print("command line arguments:\n\t%s" % sys.argv)
+    main()
+    print("exit successfully")
-- 
cgit 1.4.1