about summary refs log tree commit diff
path: root/wqflask/maintenance/quantile_normalize.py
diff options
context:
space:
mode:
authorAlexander_Kabui2024-01-02 13:21:07 +0300
committerAlexander_Kabui2024-01-02 13:21:07 +0300
commit70c4201b332e0e2c0d958428086512f291469b87 (patch)
treeaea4fac8782c110fc233c589c3f0f7bd34bada6c /wqflask/maintenance/quantile_normalize.py
parent5092eb42f062b1695c4e39619f0bd74a876cfac2 (diff)
parent965ce5114d585624d5edb082c710b83d83a3be40 (diff)
downloadgenenetwork2-70c4201b332e0e2c0d958428086512f291469b87.tar.gz
merge changes
Diffstat (limited to 'wqflask/maintenance/quantile_normalize.py')
-rw-r--r--wqflask/maintenance/quantile_normalize.py98
1 files changed, 0 insertions, 98 deletions
diff --git a/wqflask/maintenance/quantile_normalize.py b/wqflask/maintenance/quantile_normalize.py
deleted file mode 100644
index 36049a82..00000000
--- a/wqflask/maintenance/quantile_normalize.py
+++ /dev/null
@@ -1,98 +0,0 @@
-import sys
-sys.path.insert(0, './')
-import urllib.parse
-
-import numpy as np
-import pandas as pd
-
-from flask import Flask, g, request
-
-from wqflask import app
-from wqflask.database import database_connection
-from utility.tools import get_setting
-
-
-def create_dataframe(input_file):
-    with open(input_file) as f:
-        ncols = len(f.readline().split("\t"))
-
-    input_array = np.loadtxt(open(
-        input_file, "rb"), delimiter="\t", skiprows=1, usecols=list(range(1, ncols)))
-    return pd.DataFrame(input_array)
-
-# This function taken from https://github.com/ShawnLYU/Quantile_Normalize
-
-
-def quantileNormalize(df_input):
-    df = df_input.copy()
-    # compute rank
-    dic = {}
-    for col in df:
-        dic.update({col: sorted(df[col])})
-    sorted_df = pd.DataFrame(dic)
-    rank = sorted_df.mean(axis=1).tolist()
-    # sort
-    for col in df:
-        t = np.searchsorted(np.sort(df[col]), df[col])
-        df[col] = [rank[i] for i in t]
-    return df
-
-
-def set_data(cursor, dataset_name):
-    orig_file = "/home/zas1024/cfw_data/" + dataset_name + ".txt"
-
-    sample_list = []
-    with open(orig_file, 'r') as orig_fh, open('/home/zas1024/cfw_data/quant_norm.csv', 'r') as quant_fh:
-        for i, (line1, line2) in enumerate(zip(orig_fh, quant_fh)):
-            trait_dict = {}
-            sample_list = []
-            if i == 0:
-                sample_names = line1.split('\t')[1:]
-            else:
-                trait_name = line1.split('\t')[0]
-                for i, sample in enumerate(sample_names):
-                    this_sample = {
-                        "name": sample,
-                        "value": line1.split('\t')[i + 1],
-                        "qnorm": line2.split('\t')[i + 1]
-                    }
-                    sample_list.append(this_sample)
-                query = """SELECT Species.SpeciesName, InbredSet.InbredSetName, ProbeSetFreeze.FullName
-                           FROM Species, InbredSet, ProbeSetFreeze, ProbeFreeze, ProbeSetXRef, ProbeSet
-                           WHERE Species.Id = InbredSet.SpeciesId and
-                                 InbredSet.Id = ProbeFreeze.InbredSetId and
-                                 ProbeFreeze.Id = ProbeSetFreeze.ProbeFreezeId and
-                                 ProbeSetFreeze.Name = '%s' and
-                                 ProbeSetFreeze.Id = ProbeSetXRef.ProbeSetFreezeId and
-                                 ProbeSetXRef.ProbeSetId = ProbeSet.Id and
-                                 ProbeSet.Name = '%s'""" % (dataset_name, line1.split('\t')[0])
-                cursor.execute(query)
-                result_info = cursor.fetchone()
-
-                yield {
-                    "_index": "traits",
-                    "_type": "trait",
-                    "_source": {
-                        "name": trait_name,
-                        "species": result_info[0],
-                        "group": result_info[1],
-                        "dataset": dataset_name,
-                        "dataset_fullname": result_info[2],
-                        "samples": sample_list,
-                        "transform_types": "qnorm"
-                    }
-                }
-
-
-if __name__ == '__main__':
-    with database_connection(get_setting("SQL_URI")) as conn:
-        with conn.cursor() as cursor:
-            success, _ = bulk(es, set_data(cursor, sys.argv[1]))
-
-            response = es.search(
-                index="traits", doc_type="trait", body={
-                    "query": {"match": {"name": "ENSMUSG00000028982"}}
-                }
-            )
-
-            print(response)