diff options
Diffstat (limited to 'wqflask/maintenance/generate_probesetfreeze_file.py')
-rw-r--r-- | wqflask/maintenance/generate_probesetfreeze_file.py | 122 |
1 files changed, 0 insertions, 122 deletions
diff --git a/wqflask/maintenance/generate_probesetfreeze_file.py b/wqflask/maintenance/generate_probesetfreeze_file.py deleted file mode 100644 index 2f917c71..00000000 --- a/wqflask/maintenance/generate_probesetfreeze_file.py +++ /dev/null @@ -1,122 +0,0 @@ -#!/usr/bin/python - -import sys - -# sys.path.insert(0, "..") - why? - -import os -import collections -import csv - -from base import webqtlConfig - -from pprint import pformat as pf - -from utility.tools import get_setting -from wqflask.database import database_connection - - -def show_progress(process, counter): - if counter % 1000 == 0: - print("{}: {}".format(process, counter)) - - -def get_strains(cursor): - cursor.execute("""select Strain.Name - from Strain, StrainXRef, InbredSet - where Strain.Id = StrainXRef.StrainId and - StrainXRef.InbredSetId = InbredSet.Id - and InbredSet.Name=%s; - """, "BXD") - - strains = [strain[0] for strain in cursor.fetchall()] - print("strains:", pf(strains)) - for strain in strains: - print(" -", strain) - - return strains - - -def get_probeset_vals(cursor, dataset_name): - cursor.execute(""" select ProbeSet.Id, ProbeSet.Name - from ProbeSetXRef, - ProbeSetFreeze, - ProbeSet - where ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id and - ProbeSetFreeze.Name = %s and - ProbeSetXRef.ProbeSetId = ProbeSet.Id; - """, dataset_name) - - probesets = cursor.fetchall() - - print("Fetched probesets") - - probeset_vals = collections.OrderedDict() - - for counter, probeset in enumerate(probesets): - cursor.execute(""" select Strain.Name, ProbeSetData.value - from ProbeSetData, ProbeSetXRef, ProbeSetFreeze, Strain - where ProbeSetData.Id = ProbeSetXRef.DataId - and ProbeSetData.StrainId = Strain.Id - and ProbeSetXRef.ProbeSetId = %s - and ProbeSetFreeze.Id = ProbeSetXRef.ProbeSetFreezeId - and ProbeSetFreeze.Name = %s; - """, (probeset[0], dataset_name)) - val_dic = collections.OrderedDict() - vals = cursor.fetchall() - for val in vals: - val_dic[val[0]] = val[1] - - probeset_vals[probeset[1]] = val_dic - show_progress("Querying DB", counter) - - return probeset_vals - - -def trim_strains(strains, probeset_vals): - trimmed_strains = [] - #print("probeset_vals is:", pf(probeset_vals)) - first_probeset = list(probeset_vals.values())[0] - print("\n**** first_probeset is:", pf(first_probeset)) - for strain in strains: - print("\n**** strain is:", pf(strain)) - if strain in first_probeset: - trimmed_strains.append(strain) - print("trimmed_strains:", pf(trimmed_strains)) - return trimmed_strains - - -def write_data_matrix_file(strains, probeset_vals, filename): - with open(filename, "wb") as fh: - csv_writer = csv.writer(fh, delimiter=",", quoting=csv.QUOTE_ALL) - #print("strains is:", pf(strains)) - csv_writer.writerow(['ID'] + strains) - for counter, probeset in enumerate(probeset_vals): - row_data = [probeset] - for strain in strains: - #print("probeset is: ", pf(probeset_vals[probeset])) - row_data.append(probeset_vals[probeset][strain]) - #print("row_data is: ", pf(row_data)) - csv_writer.writerow(row_data) - show_progress("Writing", counter) - - -def main(): - filename = os.path.expanduser( - "~/gene/wqflask/maintenance/" - "ProbeSetFreezeId_210_FullName_Eye_AXBXA_Illumina_V6.2" - "(Oct08)_RankInv_Beta.txt") - dataset_name = "Eye_AXBXA_1008_RankInv" - - with database_connection(get_setting("SQL_URI")) as conn: - with conn.cursor() as cursor: - strains = get_strains(cursor) - print("Getting probset_vals") - probeset_vals = get_probeset_vals(cursor, dataset_name) - print("Finished getting probeset_vals") - trimmed_strains = trim_strains(strains, probeset_vals) - write_data_matrix_file(trimmed_strains, probeset_vals, filename) - - -if __name__ == '__main__': - main() |