about summary refs log tree commit diff
path: root/wqflask/base/data_set/datasetgroup.py
diff options
context:
space:
mode:
Diffstat (limited to 'wqflask/base/data_set/datasetgroup.py')
-rw-r--r--wqflask/base/data_set/datasetgroup.py195
1 files changed, 0 insertions, 195 deletions
diff --git a/wqflask/base/data_set/datasetgroup.py b/wqflask/base/data_set/datasetgroup.py
deleted file mode 100644
index 95dc976f..00000000
--- a/wqflask/base/data_set/datasetgroup.py
+++ /dev/null
@@ -1,195 +0,0 @@
-"Dataset Group class ..."
-
-import os
-import json
-
-
-from base import webqtlConfig
-from .markers import Markers, HumanMarkers
-from utility import webqtlUtil
-from utility import gen_geno_ob
-from db import webqtlDatabaseFunction
-from maintenance import get_group_samplelists
-from wqflask.database import database_connection
-from utility.tools import (
-    locate,
-    USE_REDIS,
-    flat_files,
-    get_setting,
-    flat_file_exists,
-    locate_ignore_error)
-
-class DatasetGroup:
-    """
-    Each group has multiple datasets; each species has multiple groups.
-
-    For example, Mouse has multiple groups (BXD, BXA, etc), and each group
-    has multiple datasets associated with it.
-
-    """
-
-    def __init__(self, dataset, name=None):
-        """This sets self.group and self.group_id"""
-        with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor:
-            if not name:
-                cursor.execute(dataset.query_for_group,
-                               (dataset.name,))
-            else:
-                cursor.execute(
-                    "SELECT InbredSet.Name, "
-                    "InbredSet.Id, "
-                    "InbredSet.GeneticType, "
-                    "InbredSet.InbredSetCode "
-                    "FROM InbredSet WHERE Name = %s",
-                    (name,))
-            results = cursor.fetchone()
-            if results:
-                (self.name, self.id, self.genetic_type, self.code) = results
-            else:
-                self.name = name or dataset.name
-        if self.name == 'BXD300':
-            self.name = "BXD"
-
-        self.f1list = None
-        self.parlist = None
-        self.get_f1_parent_strains()
-
-        self.mapping_id, self.mapping_names = self.get_mapping_methods()
-
-        self.species = webqtlDatabaseFunction.retrieve_species(self.name)
-
-        self.incparentsf1 = False
-        self.allsamples = None
-        self._datasets = None
-        self.genofile = None
-
-    def get_mapping_methods(self):
-        mapping_id = ()
-        with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor:
-            cursor.execute(
-                "SELECT MappingMethodId FROM "
-                "InbredSet WHERE Name= %s",
-                (self.name,))
-            results = cursor.fetchone()
-            if results and results[0]:
-                mapping_id = results[0]
-        if mapping_id == "1":
-            mapping_names = ["GEMMA", "QTLReaper", "R/qtl"]
-        elif mapping_id == "2":
-            mapping_names = ["GEMMA"]
-        elif mapping_id == "3":
-            mapping_names = ["R/qtl"]
-        elif mapping_id == "4":
-            mapping_names = ["GEMMA", "PLINK"]
-        else:
-            mapping_names = []
-
-        return mapping_id, mapping_names
-
-    def get_markers(self):
-        def check_plink_gemma():
-            if flat_file_exists("mapping"):
-                MAPPING_PATH = flat_files("mapping") + "/"
-                if os.path.isfile(MAPPING_PATH + self.name + ".bed"):
-                    return True
-            return False
-
-        if check_plink_gemma():
-            marker_class = HumanMarkers
-        else:
-            marker_class = Markers
-
-        if self.genofile:
-            self.markers = marker_class(self.genofile[:-5])
-        else:
-            self.markers = marker_class(self.name)
-
-    def get_f1_parent_strains(self):
-        try:
-            # NL, 07/27/2010. ParInfo has been moved from webqtlForm.py to webqtlUtil.py;
-            f1, f12, maternal, paternal = webqtlUtil.ParInfo[self.name]
-        except KeyError:
-            f1 = f12 = maternal = paternal = None
-
-        if f1 and f12:
-            self.f1list = [f1, f12]
-        if maternal and paternal:
-            self.parlist = [maternal, paternal]
-
-    def get_study_samplelists(self):
-        study_sample_file = locate_ignore_error(
-            self.name + ".json", 'study_sample_lists')
-        try:
-            f = open(study_sample_file)
-        except:
-            return []
-        study_samples = json.load(f)
-        return study_samples
-
-    def get_genofiles(self):
-        jsonfile = "%s/%s.json" % (webqtlConfig.GENODIR, self.name)
-        try:
-            f = open(jsonfile)
-        except:
-            return None
-        jsondata = json.load(f)
-        return jsondata['genofile']
-
-    def get_samplelist(self, redis_conn):
-        result = None
-        key = "samplelist:v3:" + self.name
-        if USE_REDIS:
-            result = redis_conn.get(key)
-
-        if result is not None:
-            self.samplelist = json.loads(result)
-        else:
-            genotype_fn = locate_ignore_error(self.name + ".geno", 'genotype')
-            if genotype_fn:
-                self.samplelist = get_group_samplelists.get_samplelist(
-                    "geno", genotype_fn)
-            else:
-                self.samplelist = None
-
-            if USE_REDIS:
-                redis_conn.set(key, json.dumps(self.samplelist))
-                redis_conn.expire(key, 60 * 5)
-
-    def all_samples_ordered(self):
-        result = []
-        lists = (self.parlist, self.f1list, self.samplelist)
-        [result.extend(l) for l in lists if l]
-        return result
-
-    def read_genotype_file(self, use_reaper=False):
-        '''Read genotype from .geno file instead of database'''
-        # genotype_1 is Dataset Object without parents and f1
-        # genotype_2 is Dataset Object with parents and f1 (not for intercross)
-
-        # reaper barfs on unicode filenames, so here we ensure it's a string
-        if self.genofile:
-            if "RData" in self.genofile:  # ZS: This is a temporary fix; I need to change the way the JSON files that point to multiple genotype files are structured to point to other file types like RData
-                full_filename = str(
-                    locate(self.genofile.split(".")[0] + ".geno", 'genotype'))
-            else:
-                full_filename = str(locate(self.genofile, 'genotype'))
-        else:
-            full_filename = str(locate(self.name + '.geno', 'genotype'))
-        genotype_1 = gen_geno_ob.genotype(full_filename)
-
-        if genotype_1.type == "group" and self.parlist:
-            genotype_2 = genotype_1.add(
-                Mat=self.parlist[0], Pat=self.parlist[1])  # , F1=_f1)
-        else:
-            genotype_2 = genotype_1
-
-        # determine default genotype object
-        if self.incparentsf1 and genotype_1.type != "intercross":
-            genotype = genotype_2
-        else:
-            self.incparentsf1 = 0
-            genotype = genotype_1
-
-        self.samplelist = list(genotype.prgy)
-
-        return genotype