import string
from gn2.wqflask.database import database_connection
from gn2.utility.tools import flat_files, get_setting
def load_homology(chr_name, start_mb, end_mb, source_file):
homology_list = []
with open(flat_files("homology/") + source_file) as h_file:
current_chr = 0
for line in h_file:
line_items = line.split()
this_dict = {
"ref_chr": line_items[2][3:],
"ref_strand": line_items[4],
"ref_start": float(line_items[5])/1000000,
"ref_end": float(line_items[6])/1000000,
"query_chr": line_items[7][3:],
"query_strand": line_items[9],
"query_start": float(line_items[10])/1000000,
"query_end": float(line_items[11])/1000000
}
if str(this_dict["ref_chr"]) == str(chr_name) and \
this_dict["ref_start"] < end_mb and \
this_dict["ref_end"] > start_mb:
homology_list.append(this_dict)
return homology_list
def loadGenes(chrName, diffCol, startMb, endMb, species='mouse'):
assembly_map = {
"mouse": "mm10",
"rat": "rn7"
}
def append_assembly(fetch_fields, species):
query_fields = []
for field in fetch_fields:
if field in ['Chr', 'TxStart', 'TxEnd', 'Strand']:
query_fields.append(field + "_" + assembly_map[species])
else:
query_fields.append(field)
return query_fields
fetchFields = ['SpeciesId', 'Id', 'GeneSymbol', 'GeneDescription', 'Chr', 'TxStart', 'TxEnd',
'Strand', 'GeneID', 'NM_ID', 'kgID', 'GenBankID', 'UnigenID', 'ProteinID', 'AlignID',
'exonCount', 'exonStarts', 'exonEnds', 'cdsStart', 'cdsEnd']
# List All Species in the Gene Table
speciesDict = {}
results = []
with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor:
cursor.execute("SELECT Species.Name, GeneList081722.SpeciesId "
"FROM Species, GeneList081722 WHERE "
"GeneList081722.SpeciesId = Species.Id "
"GROUP BY GeneList081722.SpeciesId")
results = cursor.fetchall()
for item in results:
if item[0] == "rat":
speciesDict[item[0]] = (item[1], "rn7")
else:
speciesDict[item[0]] = (item[1], "mm10")
# List current Species and other Species
speciesId, assembly = speciesDict[species]
otherSpecies = [[X, speciesDict[X][0], speciesDict[X][1]] for X in list(speciesDict.keys())]
otherSpecies.remove([species, speciesId, assembly])
query_fields = append_assembly(fetchFields, species)
cursor.execute(f"SELECT {', '.join(query_fields)} FROM GeneList081722 "
"WHERE SpeciesId = %s AND "
f"Chr_{assembly}" + " = %s AND "
f"((TxStart_{assembly}" + " > %s and " + f"TxStart_{assembly}" + " <= %s) "
f"OR (TxEnd_{assembly}" + " > %s and " + f"TxEnd_{assembly}" + " <= %s)) "
f"ORDER BY TxStart_{assembly}",
(speciesId, chrName,
startMb, endMb,
startMb, endMb))
results = cursor.fetchall()
GeneList = []
if results:
for result in results:
newdict = {}
for j, item in enumerate(fetchFields):
newdict[item] = result[j]
# count SNPs if possible
if diffCol and species == 'mouse':
cursor.execute(
"SELECT count(*) FROM BXDSnpPosition "
"WHERE Chr = %s AND "
"Mb >= %s AND Mb < %s "
"AND StrainId1 = %s AND StrainId2 = %s",
(chrName, f"{newdict['TxStart']:2.6f}",
f"{newdict['TxEnd']:2.6f}",
diffCol[0], diffCol[1],))
newdict["snpCount"] = cursor.fetchone()[0]
newdict["snpDensity"] = (
newdict["snpCount"] /
(newdict["TxEnd"] - newdict["TxStart"]) / 1000.0)
else:
newdict["snpDensity"] = newdict["snpCount"] = 0
try:
newdict['GeneLength'] = 1000.0 * \
(newdict['TxEnd'] - newdict['TxStart'])
except:
pass
# load gene from other Species by the same name
for item in otherSpecies:
othSpec, othSpecId, othSpecAssembly = item
newdict2 = {}
query_fields = append_assembly(fetchFields, othSpec)
cursor.execute(
f"SELECT {', '.join(query_fields)} FROM GeneList081722 WHERE "
"SpeciesId = %s AND "
"geneSymbol= %s LIMIT 1",
(othSpecId,
newdict["GeneSymbol"]))
resultsOther = cursor.fetchone()
if resultsOther:
for j, item in enumerate(fetchFields):
newdict2[item] = resultsOther[j]
# count SNPs if possible, could be a separate function
if diffCol and othSpec == 'mouse':
cursor.execute(
"SELECT count(*) FROM BXDSnpPosition "
"WHERE Chr = %s AND Mb >= %s AND "
"Mb < %s AND StrainId1 = %s "
"AND StrainId2 = %s",
(chrName, f"{newdict['TxStart']:2.6f}",
f"{newdict['TxEnd']:2.6f}",
diffCol[0], diffCol[1]))
if snp_count := cursor.fetchone():
newdict2["snpCount"] = snp_count[0]
newdict2["snpDensity"] = (
newdict2["snpCount"]
/ (newdict2["TxEnd"] - newdict2["TxStart"])
/ 1000.0)
else:
newdict2["snpDensity"] = newdict2["snpCount"] = 0
try:
newdict2['GeneLength'] = (
1000.0 * (newdict2['TxEnd'] - newdict2['TxStart']))
except:
pass
newdict['%sGene' % othSpec] = newdict2
GeneList.append(newdict)
return GeneList