From 1ba32796008dec4cf548b4c83768064fe988f998 Mon Sep 17 00:00:00 2001 From: zsloan Date: Tue, 11 Oct 2022 20:21:33 +0000 Subject: Change Interval Analyst table to use new GeneList table The query changes in GeneUtil.pyi (to insert column names using string formatting) are definitely some sort of SQL injection issue, but I'm not sure how else to do it Also, the table name will be changed later, once the new table has been tested for a while. --- wqflask/wqflask/interval_analyst/GeneUtil.py | 46 ++++++++++++++++------ .../marker_regression/display_mapping_results.py | 29 +++++++------- 2 files changed, 49 insertions(+), 26 deletions(-) diff --git a/wqflask/wqflask/interval_analyst/GeneUtil.py b/wqflask/wqflask/interval_analyst/GeneUtil.py index b1cfd0ee..1f9f329b 100644 --- a/wqflask/wqflask/interval_analyst/GeneUtil.py +++ b/wqflask/wqflask/interval_analyst/GeneUtil.py @@ -5,7 +5,23 @@ from wqflask.database import database_connection # Just return a list of dictionaries # each dictionary contains sub-dictionary def loadGenes(chrName, diffCol, startMb, endMb, species='mouse'): - fetchFields = ['SpeciesId', 'Id', 'GeneSymbol', 'GeneDescription', 'Chromosome', 'TxStart', 'TxEnd', + assembly_map = { + "mouse": "mm10", + "rat": "rn7" + } + + def append_assembly(fetch_fields, species): + query_fields = [] + for field in fetch_fields: + if field in ['Chr', 'TxStart', 'TxEnd', 'Strand']: + query_fields.append(field + "_" + assembly_map[species]) + else: + query_fields.append(field) + + return query_fields + + + fetchFields = ['SpeciesId', 'Id', 'GeneSymbol', 'GeneDescription', 'Chr', 'TxStart', 'TxEnd', 'Strand', 'GeneID', 'NM_ID', 'kgID', 'GenBankID', 'UnigenID', 'ProteinID', 'AlignID', 'exonCount', 'exonStarts', 'exonEnds', 'cdsStart', 'cdsEnd'] @@ -19,18 +35,23 @@ def loadGenes(chrName, diffCol, startMb, endMb, species='mouse'): "GROUP BY GeneList081722.SpeciesId") results = cursor.fetchall() for item in results: - speciesDict[item[0]] = item[1] + if item[0] == "rat": + speciesDict[item[0]] = (item[1], "rn7") + else: + speciesDict[item[0]] = (item[1], "mm10") # List current Species and other Species - speciesId = speciesDict[species] - otherSpecies = [[X, speciesDict[X]] for X in list(speciesDict.keys())] - otherSpecies.remove([species, speciesId]) - cursor.execute(f"SELECT {', '.join(fetchFields)} FROM GeneList081722 " + speciesId, assembly = speciesDict[species] + otherSpecies = [[X, speciesDict[X][0], speciesDict[X][1]] for X in list(speciesDict.keys())] + otherSpecies.remove([species, speciesId, assembly]) + query_fields = append_assembly(fetchFields, species) + + cursor.execute(f"SELECT {', '.join(query_fields)} FROM GeneList081722 " "WHERE SpeciesId = %s AND " - "Chromosome = %s AND " - "((TxStart > %s and TxStart <= %s) " - "OR (TxEnd > %s and TxEnd <= %s)) " - "ORDER BY txStart", + f"Chr_{assembly}" + " = %s AND " + f"((TxStart_{assembly}" + " > %s and " + f"TxStart_{assembly}" + " <= %s) " + f"OR (TxEnd_{assembly}" + " > %s and " + f"TxEnd_{assembly}" + " <= %s)) " + f"ORDER BY TxStart_{assembly}", (speciesId, chrName, startMb, endMb, startMb, endMb)) @@ -65,10 +86,11 @@ def loadGenes(chrName, diffCol, startMb, endMb, species='mouse'): pass # load gene from other Species by the same name for item in otherSpecies: - othSpec, othSpecId = item + othSpec, othSpecId, othSpecAssembly = item newdict2 = {} + query_fields = append_assembly(fetchFields, othSpec) cursor.execute( - f"SELECT {', '.join(fetchFields)} FROM GeneList081722 WHERE " + f"SELECT {', '.join(query_fields)} FROM GeneList081722 WHERE " "SpeciesId = %s AND " "geneSymbol= %s LIMIT 1", (othSpecId, diff --git a/wqflask/wqflask/marker_regression/display_mapping_results.py b/wqflask/wqflask/marker_regression/display_mapping_results.py index 1d59c694..baae95e0 100644 --- a/wqflask/wqflask/marker_regression/display_mapping_results.py +++ b/wqflask/wqflask/marker_regression/display_mapping_results.py @@ -1341,9 +1341,10 @@ class DisplayMappingResults: tenPercentLength = geneLength * 0.0001 SNPdensity = theGO["snpCount"] / geneLength - exonStarts = list( - map(float, theGO['exonStarts'].split(",")[:-1])) - exonEnds = list(map(float, theGO['exonEnds'].split(",")[:-1])) + if theGO['exonStarts']: + exonStarts = list( + map(float, theGO['exonStarts'].split(",")[:-1])) + exonEnds = list(map(float, theGO['exonEnds'].split(",")[:-1])) cdsStart = theGO['cdsStart'] cdsEnd = theGO['cdsEnd'] accession = theGO['NM_ID'] @@ -2993,7 +2994,7 @@ class DisplayMappingResults: if theGO["snpCount"]: snpString = HT.Link( (f"http://genenetwork.org/webqtl/main.py?FormID=snpBrowser&" - f"chr={theGO['Chromosome']}&" + f"chr={theGO['Chr']}&" f"start={theGO['TxStart']}&" f"end={theGO['TxEnd']}&" f"geneName={theGO['GeneSymbol']}&" @@ -3006,17 +3007,17 @@ class DisplayMappingResults: snpString = 0 mouseStartString = "http://genome.ucsc.edu/cgi-bin/hgTracks?clade=vertebrate&org=Mouse&db=mm10&position=chr" + \ - theGO["Chromosome"] + "%3A" + str(int(theGO["TxStart"] * 1000000.0)) + "-" + str( + theGO["Chr"] + "%3A" + str(int(theGO["TxStart"] * 1000000.0)) + "-" + str( int(theGO["TxEnd"] * 1000000.0)) + "&pix=620&Submit=submit" # the chromosomes for human 1 are 1qXX.XX - if theGO['humanGene']: + if 'humanGene' in theGO: if theGO['humanGene']["TxStart"] == '': humanStartDisplay = "" else: humanStartDisplay = "%0.6f" % theGO['humanGene']["TxStart"] - humanChr = theGO['humanGene']["Chromosome"] + humanChr = theGO['humanGene']["Chr"] humanTxStart = theGO['humanGene']["TxStart"] humanStartString = "http://genome.ucsc.edu/cgi-bin/hgTracks?clade=vertebrate&org=Human&db=hg17&position=chr%s:%d-%d" % ( @@ -3040,10 +3041,10 @@ class DisplayMappingResults: avgExpr = "%0.6f" % avgExpr # If we have a referenceGene then we will show the Literature Correlation - if theGO["Chromosome"] == "X": + if theGO["Chr"] == "X": chr_as_int = 19 else: - chr_as_int = int(theGO["Chromosome"]) - 1 + chr_as_int = int(theGO["Chr"]) - 1 if refGene: literatureCorrelationString = str(self.getLiteratureCorrelation( self.cursor, refGene, theGO['GeneID']) or "N/A") @@ -3122,13 +3123,13 @@ class DisplayMappingResults: else: geneSymbolNCBI = theGO["GeneSymbol"] - if theGO["Chromosome"] == "X": + if theGO["Chr"] == "X": chr_as_int = 20 else: - chr_as_int = int(theGO["Chromosome"]) - 1 + chr_as_int = int(theGO["Chr"]) - 1 geneLength = (float(theGO["TxEnd"]) - float(theGO["TxStart"])) - geneLengthURL = "javascript:rangeView('%s', %f, %f)" % (theGO["Chromosome"], float( + geneLengthURL = "javascript:rangeView('%s', %f, %f)" % (theGO["Chr"], float( theGO["TxStart"]) - (geneLength * 0.1), float(theGO["TxEnd"]) + (geneLength * 0.1)) avgExprVal = [] @@ -3139,14 +3140,14 @@ class DisplayMappingResults: # Mouse Gene if theGO['mouseGene']: - mouseChr = theGO['mouseGene']["Chromosome"] + mouseChr = theGO['mouseGene']["Chr"] mouseTxStart = "%0.6f" % theGO['mouseGene']["TxStart"] else: mouseChr = mouseTxStart = "" # the chromosomes for human 1 are 1qXX.XX if theGO['humanGene']: - humanChr = theGO['humanGene']["Chromosome"] + humanChr = theGO['humanGene']["Chr"] humanTxStart = "%0.6f" % theGO['humanGene']["TxStart"] else: humanChr = humanTxStart = "" -- cgit v1.2.3