diff options
author | Frederick Muriuki Muriithi | 2022-03-10 08:55:26 +0300 |
---|---|---|
committer | Frederick Muriuki Muriithi | 2022-03-10 08:55:26 +0300 |
commit | 4a7e2c1602ed82aabd7d04953067ba49cb1cebff (patch) | |
tree | ea8c0cf11f1629d79844b2a4e9bb49a69e0bba33 | |
parent | 80b02d37f60d172be01bf8cd62bd84b406b1e0dd (diff) | |
download | genenetwork2-4a7e2c1602ed82aabd7d04953067ba49cb1cebff.tar.gz |
Use context manager with database connection
Use the `with` context manager with database connections and cursors
to ensure that they are closed once they are no longer needed.
Where it was not feasible to use the `with` context manager without a
huge refactor/rewrite, the cursors and connections are closed manually.
-rwxr-xr-x | scripts/maintenance/QTL_Reaper_v6.py | 3 | ||||
-rw-r--r-- | scripts/maintenance/Update_Case_Attributes_MySQL_tab.py | 23 | ||||
-rwxr-xr-x | scripts/maintenance/readProbeSetMean_v7.py | 1 | ||||
-rwxr-xr-x | scripts/maintenance/readProbeSetSE_v7.py | 1 | ||||
-rw-r--r-- | scripts/maintenance/utilities.py | 10 | ||||
-rw-r--r-- | wqflask/maintenance/gen_select_dataset.py | 36 | ||||
-rw-r--r-- | wqflask/maintenance/generate_probesetfreeze_file.py | 26 | ||||
-rw-r--r-- | wqflask/maintenance/quantile_normalize.py | 27 | ||||
-rw-r--r-- | wqflask/maintenance/set_resource_defaults.py | 35 | ||||
-rw-r--r-- | wqflask/wqflask/api/router.py | 10 | ||||
-rw-r--r-- | wqflask/wqflask/comparison_bar_chart/comparison_bar_chart.py | 2 | ||||
-rw-r--r-- | wqflask/wqflask/do_search.py | 18 | ||||
-rw-r--r-- | wqflask/wqflask/metadata_edits.py | 591 | ||||
-rw-r--r-- | wqflask/wqflask/views.py | 3 |
14 files changed, 365 insertions, 421 deletions
diff --git a/scripts/maintenance/QTL_Reaper_v6.py b/scripts/maintenance/QTL_Reaper_v6.py index 35f2d1a1..20fd8e3b 100755 --- a/scripts/maintenance/QTL_Reaper_v6.py +++ b/scripts/maintenance/QTL_Reaper_v6.py @@ -106,3 +106,6 @@ for ProbeSetFreezeId in ProbeSetFreezeIds: print(ProbeSetFreezeIds) + +cursor.close() +con.close() diff --git a/scripts/maintenance/Update_Case_Attributes_MySQL_tab.py b/scripts/maintenance/Update_Case_Attributes_MySQL_tab.py index bf796df4..a3cd1c35 100644 --- a/scripts/maintenance/Update_Case_Attributes_MySQL_tab.py +++ b/scripts/maintenance/Update_Case_Attributes_MySQL_tab.py @@ -10,18 +10,13 @@ import time import csv ######################################################################## -mydb = MySQLdb.connect(host='localhost', - user='username', - passwd='', - db='db_webqtl') -cursor = mydb.cursor() +with MySQLdb.connect( + host='localhost', user='username', passwd='', db='db_webqtl') as mydb: + with mydb.cursor() as cursor: -csv_data = csv.reader(file('GN711_pvalues.txt'), delimiter ="\t") -for row in csv_data: - - cursor.execute("""UPDATE ProbeSetXRef SET pValue = %s WHERE ProbeSetFreezeId = %s AND ProbeSetId = %s """, - (row)) -#close the connection to the database. -mydb.commit() -cursor.close() -print("Done")
\ No newline at end of file + csv_data = csv.reader(file('GN711_pvalues.txt'), delimiter ="\t") + for row in csv_data: + cursor.execute( + """UPDATE ProbeSetXRef SET pValue = %s WHERE ProbeSetFreezeId = %s AND ProbeSetId = %s """, + (row)) +print("Done") diff --git a/scripts/maintenance/readProbeSetMean_v7.py b/scripts/maintenance/readProbeSetMean_v7.py index 43f084f4..56adcdfd 100755 --- a/scripts/maintenance/readProbeSetMean_v7.py +++ b/scripts/maintenance/readProbeSetMean_v7.py @@ -269,4 +269,5 @@ if len(values1) > 0: cmd = 'insert into ProbeSetXRef(ProbeSetFreezeId, ProbeSetId, DataId) values %s' % cmd db.execute(cmd) +db.close() con.close() diff --git a/scripts/maintenance/readProbeSetSE_v7.py b/scripts/maintenance/readProbeSetSE_v7.py index 2cfe2e07..88a347bf 100755 --- a/scripts/maintenance/readProbeSetSE_v7.py +++ b/scripts/maintenance/readProbeSetSE_v7.py @@ -251,4 +251,5 @@ if len(DataValues) > 0: cmd = 'insert ProbeSetSE values %s' % DataValues db.execute(cmd) +db.close() con.close() diff --git a/scripts/maintenance/utilities.py b/scripts/maintenance/utilities.py index 886410c2..1fe14809 100644 --- a/scripts/maintenance/utilities.py +++ b/scripts/maintenance/utilities.py @@ -1,16 +1,6 @@ -import MySQLdb import re import configparser -def get_cursor(): - host = 'tux.uthsc.edu' - user = 'webqtlout' - passwd = 'webqtlout' - db = 'db_webqtl' - con = MySQLdb.Connect(db=db, host=host, user=user, passwd=passwd) - cursor = con.cursor() - return cursor, con - def clearspaces(s, default=None): if s: s = re.sub('\s+', ' ', s) diff --git a/wqflask/maintenance/gen_select_dataset.py b/wqflask/maintenance/gen_select_dataset.py index db65a11f..9f4b670d 100644 --- a/wqflask/maintenance/gen_select_dataset.py +++ b/wqflask/maintenance/gen_select_dataset.py @@ -39,21 +39,13 @@ from wqflask import app from utility.tools import locate, locate_ignore_error, TEMPDIR, SQL_URI -import MySQLdb - import simplejson as json import urllib.parse -#import sqlalchemy as sa - from pprint import pformat as pf -#Engine = sa.create_engine(zach_settings.SQL_URI) - -# build MySql database connection - -#conn = Engine.connect() +from wqflask.database import database_connection def parse_db_uri(): @@ -71,19 +63,19 @@ def parse_db_uri(): return db_conn_info -def get_species(): +def get_species(cursor): """Build species list""" - #Cursor.execute("select Name, MenuName from Species where Species.Name != 'macaque monkey' order by OrderId") - Cursor.execute("select Name, MenuName from Species order by OrderId") - species = list(Cursor.fetchall()) + #cursor.execute("select Name, MenuName from Species where Species.Name != 'macaque monkey' order by OrderId") + cursor.execute("select Name, MenuName from Species order by OrderId") + species = list(cursor.fetchall()) return species -def get_groups(species): +def get_groups(cursor, species): """Build groups list""" groups = {} for species_name, _species_full_name in species: - Cursor.execute("""select InbredSet.Name, InbredSet.FullName from InbredSet, + cursor.execute("""select InbredSet.Name, InbredSet.FullName from InbredSet, Species, ProbeFreeze, GenoFreeze, PublishFreeze where Species.Name = '%s' and InbredSet.SpeciesId = Species.Id and @@ -92,7 +84,7 @@ def get_groups(species): or ProbeFreeze.InbredSetId = InbredSet.Id) group by InbredSet.Name order by InbredSet.FullName""" % species_name) - results = Cursor.fetchall() + results = cursor.fetchall() groups[species_name] = list(results) return groups @@ -273,13 +265,13 @@ def build_datasets(species, group, type_name): return datasets -def main(): +def main(cursor): """Generates and outputs (as json file) the data for the main dropdown menus on the home page""" parse_db_uri() - species = get_species() - groups = get_groups(species) + species = get_species(cursor) + groups = get_groups(cursor, species) types = get_types(groups) datasets = get_datasets(types) @@ -316,6 +308,6 @@ def _test_it(): if __name__ == '__main__': - Conn = MySQLdb.Connect(**parse_db_uri()) - Cursor = Conn.cursor() - main() + with database_connection() as conn: + with conn.cursor() as cursor: + main(cursor) diff --git a/wqflask/maintenance/generate_probesetfreeze_file.py b/wqflask/maintenance/generate_probesetfreeze_file.py index e964c8ed..f43f952b 100644 --- a/wqflask/maintenance/generate_probesetfreeze_file.py +++ b/wqflask/maintenance/generate_probesetfreeze_file.py @@ -8,20 +8,11 @@ import os import collections import csv -import MySQLdb - from base import webqtlConfig from pprint import pformat as pf - -def get_cursor(): - con = MySQLdb.Connect(db=webqtlConfig.DB_UPDNAME, - host=webqtlConfig.MYSQL_UPDSERVER, - user=webqtlConfig.DB_UPDUSER, - passwd=webqtlConfig.DB_UPDPASSWD) - cursor = con.cursor() - return cursor +from wqflask.database import database_connection def show_progress(process, counter): @@ -116,13 +107,14 @@ def main(): "(Oct08)_RankInv_Beta.txt") dataset_name = "Eye_AXBXA_1008_RankInv" - cursor = get_cursor() - strains = get_strains(cursor) - print("Getting probset_vals") - probeset_vals = get_probeset_vals(cursor, dataset_name) - print("Finished getting probeset_vals") - trimmed_strains = trim_strains(strains, probeset_vals) - write_data_matrix_file(trimmed_strains, probeset_vals, filename) + with database_connection as conn: + with conn.cursor() as cursor: + strains = get_strains(cursor) + print("Getting probset_vals") + probeset_vals = get_probeset_vals(cursor, dataset_name) + print("Finished getting probeset_vals") + trimmed_strains = trim_strains(strains, probeset_vals) + write_data_matrix_file(trimmed_strains, probeset_vals, filename) if __name__ == '__main__': diff --git a/wqflask/maintenance/quantile_normalize.py b/wqflask/maintenance/quantile_normalize.py index 32780ca6..2e2b0ec3 100644 --- a/wqflask/maintenance/quantile_normalize.py +++ b/wqflask/maintenance/quantile_normalize.py @@ -1,6 +1,5 @@ import sys sys.path.insert(0, './') -import MySQLdb import urllib.parse import numpy as np @@ -9,6 +8,7 @@ import pandas as pd from flask import Flask, g, request from wqflask import app +from wqflask.database import database_connection def parse_db_uri(): @@ -52,7 +52,7 @@ def quantileNormalize(df_input): return df -def set_data(dataset_name): +def set_data(cursor, dataset_name): orig_file = "/home/zas1024/cfw_data/" + dataset_name + ".txt" sample_list = [] @@ -80,8 +80,8 @@ def set_data(dataset_name): ProbeSetFreeze.Id = ProbeSetXRef.ProbeSetFreezeId and ProbeSetXRef.ProbeSetId = ProbeSet.Id and ProbeSet.Name = '%s'""" % (dataset_name, line1.split('\t')[0]) - Cursor.execute(query) - result_info = Cursor.fetchone() + cursor.execute(query) + result_info = cursor.fetchone() yield { "_index": "traits", @@ -99,15 +99,14 @@ def set_data(dataset_name): if __name__ == '__main__': - Conn = MySQLdb.Connect(**parse_db_uri()) - Cursor = Conn.cursor() + with database_connection as conn: + with conn.cursor as cursor: + success, _ = bulk(es, set_data(cursor, sys.argv[1])) - success, _ = bulk(es, set_data(sys.argv[1])) - - response = es.search( - index="traits", doc_type="trait", body={ - "query": {"match": {"name": "ENSMUSG00000028982"}} - } - ) + response = es.search( + index="traits", doc_type="trait", body={ + "query": {"match": {"name": "ENSMUSG00000028982"}} + } + ) - print(response) + print(response) diff --git a/wqflask/maintenance/set_resource_defaults.py b/wqflask/maintenance/set_resource_defaults.py index 0f472494..22d73ba3 100644 --- a/wqflask/maintenance/set_resource_defaults.py +++ b/wqflask/maintenance/set_resource_defaults.py @@ -30,10 +30,9 @@ from utility.tools import SQL_URI from utility.redis_tools import get_redis_conn, get_user_id, add_resource, get_resources, get_resource_info Redis = get_redis_conn() -import MySQLdb - import urllib.parse +from wqflask.database import database_connection from utility.logger import getLogger logger = getLogger(__name__) @@ -53,14 +52,14 @@ def parse_db_uri(): return db_conn_info -def insert_probeset_resources(default_owner_id): +def insert_probeset_resources(cursor, default_owner_id): current_resources = Redis.hgetall("resources") - Cursor.execute(""" SELECT + cursor.execute(""" SELECT ProbeSetFreeze.Id, ProbeSetFreeze.Name, ProbeSetFreeze.confidentiality, ProbeSetFreeze.public FROM ProbeSetFreeze""") - resource_results = Cursor.fetchall() + resource_results = cursor.fetchall() for i, resource in enumerate(resource_results): resource_ob = {} resource_ob['name'] = resource[1] @@ -80,9 +79,9 @@ def insert_probeset_resources(default_owner_id): add_resource(resource_ob, update=False) -def insert_publish_resources(default_owner_id): +def insert_publish_resources(cursor, default_owner_id): current_resources = Redis.hgetall("resources") - Cursor.execute(""" SELECT + cursor.execute(""" SELECT PublishXRef.Id, PublishFreeze.Id, InbredSet.InbredSetCode FROM PublishXRef, PublishFreeze, InbredSet, Publication @@ -91,7 +90,7 @@ def insert_publish_resources(default_owner_id): InbredSet.Id = PublishXRef.InbredSetId AND Publication.Id = PublishXRef.PublicationId""") - resource_results = Cursor.fetchall() + resource_results = cursor.fetchall() for resource in resource_results: if resource[2]: resource_ob = {} @@ -114,14 +113,14 @@ def insert_publish_resources(default_owner_id): continue -def insert_geno_resources(default_owner_id): +def insert_geno_resources(cursor, default_owner_id): current_resources = Redis.hgetall("resources") - Cursor.execute(""" SELECT + cursor.execute(""" SELECT GenoFreeze.Id, GenoFreeze.ShortName, GenoFreeze.confidentiality FROM GenoFreeze""") - resource_results = Cursor.fetchall() + resource_results = cursor.fetchall() for i, resource in enumerate(resource_results): resource_ob = {} resource_ob['name'] = resource[1] @@ -147,15 +146,15 @@ def insert_geno_resources(default_owner_id): def insert_resources(default_owner_id): current_resources = get_resources() print("START") - insert_publish_resources(default_owner_id) + insert_publish_resources(cursor, default_owner_id) print("AFTER PUBLISH") - insert_geno_resources(default_owner_id) + insert_geno_resources(cursor, default_owner_id) print("AFTER GENO") - insert_probeset_resources(default_owner_id) + insert_probeset_resources(cursor, default_owner_id) print("AFTER PROBESET") -def main(): +def main(cursor): """Generates and outputs (as json file) the data for the main dropdown menus on the home page""" Redis.delete("resources") @@ -166,6 +165,6 @@ def main(): if __name__ == '__main__': - Conn = MySQLdb.Connect(**parse_db_uri()) - Cursor = Conn.cursor() - main() + with database_connection() as conn: + with conn.cursor() as cursor: + main(cursor) diff --git a/wqflask/wqflask/api/router.py b/wqflask/wqflask/api/router.py index a739e5a9..79dc5f1c 100644 --- a/wqflask/wqflask/api/router.py +++ b/wqflask/wqflask/api/router.py @@ -6,7 +6,6 @@ import csv import json import datetime import requests -import MySQLdb from zipfile import ZipFile, ZIP_DEFLATED @@ -24,6 +23,8 @@ from wqflask.api import correlation, mapping, gen_menu from utility.tools import flat_files +from wqflask.database import database_connection + import utility.logger logger = utility.logger.getLogger(__name__) @@ -847,11 +848,8 @@ def get_genotypes(group_name, file_format="csv", dataset_name=None): @app.route("/api/v_{}/gen_dropdown".format(version), methods=("GET",)) def gen_dropdown_menu(): - conn = MySQLdb.Connect(db=current_app.config.get("DB_NAME"), - user=current_app.config.get("DB_USER"), - passwd=current_app.config.get("DB_PASS"), - host=current_app.config.get("DB_HOST")) - results = gen_menu.gen_dropdown_json(conn) + with database_connection as conn: + results = gen_menu.gen_dropdown_json(conn) if len(results) > 0: return flask.jsonify(results) diff --git a/wqflask/wqflask/comparison_bar_chart/comparison_bar_chart.py b/wqflask/wqflask/comparison_bar_chart/comparison_bar_chart.py index cb88eb53..438d2276 100644 --- a/wqflask/wqflask/comparison_bar_chart/comparison_bar_chart.py +++ b/wqflask/wqflask/comparison_bar_chart/comparison_bar_chart.py @@ -26,8 +26,6 @@ from utility import webqtlUtil, helper_functions, corr_result_helpers import utility.webqtlUtil # this is for parallel computing only. from wqflask.correlation import correlation_functions -from MySQLdb import escape_string as escape - from flask import Flask, g diff --git a/wqflask/wqflask/do_search.py b/wqflask/wqflask/do_search.py index 2cc6aa61..1e245d6a 100644 --- a/wqflask/wqflask/do_search.py +++ b/wqflask/wqflask/do_search.py @@ -989,8 +989,6 @@ def get_aliases(symbol, species): if __name__ == "__main__": # Usually this will be used as a library, but call it from the command line for testing # And it runs the code below - - import MySQLdb import sys from base import webqtlConfig @@ -998,15 +996,11 @@ if __name__ == "__main__": from utility import webqtlUtil from db import webqtlDatabaseFunction - db_conn = MySQLdb.Connect(db=webqtlConfig.DB_NAME, - host=webqtlConfig.MYSQL_SERVER, - user=webqtlConfig.DB_USER, - passwd=webqtlConfig.DB_PASSWD) - cursor = db_conn.cursor() - - dataset_name = "HC_M2_0606_P" - dataset = create_dataset(db_conn, dataset_name) + from wqflask.database import database_connection - results = PvalueSearch(['0.005'], '<', dataset, cursor, db_conn).run() + with database_connection() as db_conn: + with db_conn.cursor() as cursor: + dataset_name = "HC_M2_0606_P" + dataset = create_dataset(db_conn, dataset_name) - db_conn.close() + results = PvalueSearch(['0.005'], '<', dataset, cursor, db_conn).run() diff --git a/wqflask/wqflask/metadata_edits.py b/wqflask/wqflask/metadata_edits.py index bceb9f3b..cf684bba 100644 --- a/wqflask/wqflask/metadata_edits.py +++ b/wqflask/wqflask/metadata_edits.py @@ -7,7 +7,6 @@ from collections import namedtuple from itertools import groupby from typing import Dict -import MySQLdb import difflib import redis @@ -45,6 +44,7 @@ from gn3.db.traits import get_trait_csv_sample_data from gn3.db.traits import update_sample_data from gn3.db.traits import delete_sample_data from gn3.db.traits import insert_sample_data +from wqflask.database import database_connection metadata_edit = Blueprint('metadata_edit', __name__) @@ -178,261 +178,247 @@ def edit_probeset(conn, name): @edit_access_required @login_required def display_phenotype_metadata(dataset_id: str, name: str): - conn = MySQLdb.Connect(db=current_app.config.get("DB_NAME"), - user=current_app.config.get("DB_USER"), - passwd=current_app.config.get("DB_PASS"), - host=current_app.config.get("DB_HOST")) - _d = edit_phenotype(conn=conn, name=name, dataset_id=dataset_id) - return render_template( - "edit_phenotype.html", - diff=_d.get("diff"), - publish_xref=_d.get("publish_xref"), - phenotype=_d.get("phenotype"), - publication=_d.get("publication"), - dataset_id=dataset_id, - resource_id=request.args.get("resource-id"), - version=os.environ.get("GN_VERSION"), - ) + with database_connection() as conn: + _d = edit_phenotype(conn=conn, name=name, dataset_id=dataset_id) + return render_template( + "edit_phenotype.html", + diff=_d.get("diff"), + publish_xref=_d.get("publish_xref"), + phenotype=_d.get("phenotype"), + publication=_d.get("publication"), + dataset_id=dataset_id, + resource_id=request.args.get("resource-id"), + version=os.environ.get("GN_VERSION"), + ) @metadata_edit.route("/traits/<name>") @edit_access_required @login_required def display_probeset_metadata(name: str): - conn = MySQLdb.Connect(db=current_app.config.get("DB_NAME"), - user=current_app.config.get("DB_USER"), - passwd=current_app.config.get("DB_PASS"), - host=current_app.config.get("DB_HOST")) - _d = edit_probeset(conn=conn, name=name) - return render_template( - "edit_probeset.html", - diff=_d.get("diff"), - probeset=_d.get("probeset"), - name=name, - resource_id=request.args.get("resource-id"), - version=os.environ.get("GN_VERSION"), - ) + with database_connection as conn: + _d = edit_probeset(conn=conn, name=name) + return render_template( + "edit_probeset.html", + diff=_d.get("diff"), + probeset=_d.get("probeset"), + name=name, + resource_id=request.args.get("resource-id"), + version=os.environ.get("GN_VERSION"), + ) @metadata_edit.route("/<dataset_id>/traits/<name>", methods=("POST",)) @edit_access_required @login_required def update_phenotype(dataset_id: str, name: str): - conn = MySQLdb.Connect(db=current_app.config.get("DB_NAME"), - user=current_app.config.get("DB_USER"), - passwd=current_app.config.get("DB_PASS"), - host=current_app.config.get("DB_HOST")) - data_ = request.form.to_dict() - TMPDIR = current_app.config.get("TMPDIR") - author = ((g.user_session.record.get(b"user_id") or b"").decode("utf-8") - or g.user_session.record.get("user_id") or "") - phenotype_id = str(data_.get('phenotype-id')) - if not (file_ := request.files.get("file")): - flash("No sample-data has been uploaded", "warning") - else: - if not os.path.exists(SAMPLE_DATADIR := os.path.join(TMPDIR, "sample-data")): - os.makedirs(SAMPLE_DATADIR) - if not os.path.exists(os.path.join(SAMPLE_DATADIR, - "diffs")): - os.makedirs(os.path.join(SAMPLE_DATADIR, - "diffs")) - if not os.path.exists(os.path.join(SAMPLE_DATADIR, - "updated")): - os.makedirs(os.path.join(SAMPLE_DATADIR, - "updated")) - current_time = str(datetime.datetime.now().isoformat()) - _file_name = (f"{author}.{request.args.get('resource-id')}." - f"{current_time}") - new_file_name = (os.path.join(TMPDIR, - f"sample-data/updated/{_file_name}.csv")) - uploaded_file_name = (os.path.join( - TMPDIR, "sample-data/updated/", - f"{_file_name}.csv.uploaded")) - file_.save(new_file_name) - with open(uploaded_file_name, "w") as f_: - f_.write(get_trait_csv_sample_data( - conn=conn, - trait_name=str(name), - phenotype_id=str(phenotype_id))) - r = run_cmd(cmd=("csvdiff " - f"'{uploaded_file_name}' '{new_file_name}' " - "--format json")) - json_data = json.loads(r.get("output")) - - # Only consider values where |ε| < 0.001; otherwise, use the - # old value in "Original". - _modifications = [] - for m in json_data.get("Modifications"): - _original = m.get("Original").split(",") - _current = m.get("Current").split(",") - for i, (x, y) in enumerate(zip(_original, _current)): - if (x.replace('.', '').isdigit() - and y.replace('.', '').isdigit() - and abs(float(x) - float(y)) < 0.001): - _current[i] = x - if not (__o:=",".join(_original)) == (__c:=",".join(_current)): - _modifications.append( - { - "Original": __o, - "Current": __c, - }) - json_data['Modifications'] = _modifications - - # Edge case where the csv file has not been edited! - if not any(json_data.values()): - flash(f"You have not modified the csv file you downloaded!", - "warning") - return redirect(f"/datasets/{dataset_id}/traits/{name}" - f"?resource-id={request.args.get('resource-id')}") - diff_output = (f"{TMPDIR}/sample-data/diffs/" - f"{_file_name}.json") - with open(diff_output, "w") as f: - dict_ = json_data - dict_.update({ - "trait_name": str(name), + with database_connection() as conn: + data_ = request.form.to_dict() + TMPDIR = current_app.config.get("TMPDIR") + author = ((g.user_session.record.get(b"user_id") or b"").decode("utf-8") + or g.user_session.record.get("user_id") or "") + phenotype_id = str(data_.get('phenotype-id')) + if not (file_ := request.files.get("file")): + flash("No sample-data has been uploaded", "warning") + else: + if not os.path.exists(SAMPLE_DATADIR := os.path.join(TMPDIR, "sample-data")): + os.makedirs(SAMPLE_DATADIR) + if not os.path.exists(os.path.join(SAMPLE_DATADIR, + "diffs")): + os.makedirs(os.path.join(SAMPLE_DATADIR, + "diffs")) + if not os.path.exists(os.path.join(SAMPLE_DATADIR, + "updated")): + os.makedirs(os.path.join(SAMPLE_DATADIR, + "updated")) + current_time = str(datetime.datetime.now().isoformat()) + _file_name = (f"{author}.{request.args.get('resource-id')}." + f"{current_time}") + new_file_name = (os.path.join(TMPDIR, + f"sample-data/updated/{_file_name}.csv")) + uploaded_file_name = (os.path.join( + TMPDIR, "sample-data/updated/", + f"{_file_name}.csv.uploaded")) + file_.save(new_file_name) + with open(uploaded_file_name, "w") as f_: + f_.write(get_trait_csv_sample_data( + conn=conn, + trait_name=str(name), + phenotype_id=str(phenotype_id))) + r = run_cmd(cmd=("csvdiff " + f"'{uploaded_file_name}' '{new_file_name}' " + "--format json")) + json_data = json.loads(r.get("output")) + + # Only consider values where |ε| < 0.001; otherwise, use the + # old value in "Original". + _modifications = [] + for m in json_data.get("Modifications"): + _original = m.get("Original").split(",") + _current = m.get("Current").split(",") + for i, (x, y) in enumerate(zip(_original, _current)): + if (x.replace('.', '').isdigit() + and y.replace('.', '').isdigit() + and abs(float(x) - float(y)) < 0.001): + _current[i] = x + if not (__o:=",".join(_original)) == (__c:=",".join(_current)): + _modifications.append( + { + "Original": __o, + "Current": __c, + }) + json_data['Modifications'] = _modifications + + # Edge case where the csv file has not been edited! + if not any(json_data.values()): + flash(f"You have not modified the csv file you downloaded!", + "warning") + return redirect(f"/datasets/{dataset_id}/traits/{name}" + f"?resource-id={request.args.get('resource-id')}") + diff_output = (f"{TMPDIR}/sample-data/diffs/" + f"{_file_name}.json") + with open(diff_output, "w") as f: + dict_ = json_data + dict_.update({ + "trait_name": str(name), + "phenotype_id": str(phenotype_id), + "author": author, + "timestamp": datetime.datetime.now().strftime( + "%Y-%m-%d %H:%M:%S") + }) + f.write(json.dumps(dict_)) + flash("Sample-data has been successfully uploaded", "success") + # Run updates: + phenotype_ = { + "pre_pub_description": data_.get("pre-pub-desc"), + "post_pub_description": data_.get("post-pub-desc"), + "original_description": data_.get("orig-desc"), + "units": data_.get("units"), + "pre_pub_abbreviation": data_.get("pre-pub-abbrev"), + "post_pub_abbreviation": data_.get("post-pub-abbrev"), + "lab_code": data_.get("labcode"), + "submitter": data_.get("submitter"), + "owner": data_.get("owner"), + "authorized_users": data_.get("authorized-users"), + } + updated_phenotypes = update( + conn, "Phenotype", + data=Phenotype(**phenotype_), + where=Phenotype(id_=data_.get("phenotype-id"))) + diff_data = {} + if updated_phenotypes: + diff_data.update({"Phenotype": diff_from_dict(old={ + k: data_.get(f"old_{k}") for k, v in phenotype_.items() + if v is not None}, new=phenotype_)}) + publication_ = { + "abstract": data_.get("abstract"), + "authors": data_.get("authors"), + "title": data_.get("title"), + "journal": data_.get("journal"), + "volume": data_.get("volume"), + "pages": data_.get("pages"), + "month": data_.get("month"), + "year": data_.get("year") + } + updated_publications = update( + conn, "Publication", + data=Publication(**publication_), + where=Publication(id_=data_.get("pubmed-id", + data_.get("old_id_")))) + if updated_publications: + diff_data.update({"Publication": diff_from_dict(old={ + k: data_.get(f"old_{k}") for k, v in publication_.items() + if v is not None}, new=publication_)}) + if diff_data: + diff_data.update({ "phenotype_id": str(phenotype_id), + "dataset_id": name, + "resource_id": request.args.get('resource-id'), "author": author, - "timestamp": datetime.datetime.now().strftime( - "%Y-%m-%d %H:%M:%S") + "timestamp": (datetime + .datetime + .now() + .strftime("%Y-%m-%d %H:%M:%S")), }) - f.write(json.dumps(dict_)) - flash("Sample-data has been successfully uploaded", "success") - # Run updates: - phenotype_ = { - "pre_pub_description": data_.get("pre-pub-desc"), - "post_pub_description": data_.get("post-pub-desc"), - "original_description": data_.get("orig-desc"), - "units": data_.get("units"), - "pre_pub_abbreviation": data_.get("pre-pub-abbrev"), - "post_pub_abbreviation": data_.get("post-pub-abbrev"), - "lab_code": data_.get("labcode"), - "submitter": data_.get("submitter"), - "owner": data_.get("owner"), - "authorized_users": data_.get("authorized-users"), - } - updated_phenotypes = update( - conn, "Phenotype", - data=Phenotype(**phenotype_), - where=Phenotype(id_=data_.get("phenotype-id"))) - diff_data = {} - if updated_phenotypes: - diff_data.update({"Phenotype": diff_from_dict(old={ - k: data_.get(f"old_{k}") for k, v in phenotype_.items() - if v is not None}, new=phenotype_)}) - publication_ = { - "abstract": data_.get("abstract"), - "authors": data_.get("authors"), - "title": data_.get("title"), - "journal": data_.get("journal"), - "volume": data_.get("volume"), - "pages": data_.get("pages"), - "month": data_.get("month"), - "year": data_.get("year") - } - updated_publications = update( - conn, "Publication", - data=Publication(**publication_), - where=Publication(id_=data_.get("pubmed-id", - data_.get("old_id_")))) - if updated_publications: - diff_data.update({"Publication": diff_from_dict(old={ - k: data_.get(f"old_{k}") for k, v in publication_.items() - if v is not None}, new=publication_)}) - if diff_data: - diff_data.update({ - "phenotype_id": str(phenotype_id), - "dataset_id": name, - "resource_id": request.args.get('resource-id'), - "author": author, - "timestamp": (datetime - .datetime - .now() - .strftime("%Y-%m-%d %H:%M:%S")), - }) - insert(conn, - table="metadata_audit", - data=MetadataAudit(dataset_id=name, - editor=author, - json_data=json.dumps(diff_data))) - flash(f"Diff-data: \n{diff_data}\nhas been uploaded", "success") - return redirect(f"/datasets/{dataset_id}/traits/{name}" - f"?resource-id={request.args.get('resource-id')}") + insert(conn, + table="metadata_audit", + data=MetadataAudit(dataset_id=name, + editor=author, + json_data=json.dumps(diff_data))) + flash(f"Diff-data: \n{diff_data}\nhas been uploaded", "success") + return redirect(f"/datasets/{dataset_id}/traits/{name}" + f"?resource-id={request.args.get('resource-id')}") @metadata_edit.route("/traits/<name>", methods=("POST",)) @edit_access_required @login_required def update_probeset(name: str): - conn = MySQLdb.Connect(db=current_app.config.get("DB_NAME"), - user=current_app.config.get("DB_USER"), - passwd=current_app.config.get("DB_PASS"), - host=current_app.config.get("DB_HOST")) - data_ = request.form.to_dict() - probeset_ = { - "id_": data_.get("id"), - "symbol": data_.get("symbol"), - "description": data_.get("description"), - "probe_target_description": data_.get("probe_target_description"), - "chr_": data_.get("chr"), - "mb": data_.get("mb"), - "alias": data_.get("alias"), - "geneid": data_.get("geneid"), - "homologeneid": data_.get("homologeneid"), - "unigeneid": data_.get("unigeneid"), - "omim": data_.get("OMIM"), - "refseq_transcriptid": data_.get("refseq_transcriptid"), - "blatseq": data_.get("blatseq"), - "targetseq": data_.get("targetseq"), - "strand_probe": data_.get("Strand_Probe"), - "probe_set_target_region": data_.get("probe_set_target_region"), - "probe_set_specificity": data_.get("probe_set_specificity"), - "probe_set_blat_score": data_.get("probe_set_blat_score"), - "probe_set_blat_mb_start": data_.get("probe_set_blat_mb_start"), - "probe_set_blat_mb_end": data_.get("probe_set_blat_mb_end"), - "probe_set_strand": data_.get("probe_set_strand"), - "probe_set_note_by_rw": data_.get("probe_set_note_by_rw"), - "flag": data_.get("flag") - } - diff_data = {} - author = ((g.user_session.record.get(b"user_id") or b"").decode("utf-8") - or g.user_session.record.get("user_id") or "") - if (updated_probeset := update( - conn, "ProbeSet", - data=Probeset(**probeset_), - where=Probeset(id_=data_.get("id")))): - diff_data.update({"Probeset": diff_from_dict(old={ - k: data_.get(f"old_{k}") for k, v in probeset_.items() - if v is not None}, new=probeset_)}) - if diff_data: - diff_data.update({"probeset_name": data_.get("probeset_name")}) - diff_data.update({"author": author}) - diff_data.update({"resource_id": request.args.get('resource-id')}) - diff_data.update({"timestamp": datetime.datetime.now().strftime( - "%Y-%m-%d %H:%M:%S")}) - insert(conn, - table="metadata_audit", - data=MetadataAudit(dataset_id=data_.get("id"), - editor=author, - json_data=json.dumps(diff_data))) - return redirect(f"/datasets/traits/{name}" - f"?resource-id={request.args.get('resource-id')}") + with database_connection as conn: + data_ = request.form.to_dict() + probeset_ = { + "id_": data_.get("id"), + "symbol": data_.get("symbol"), + "description": data_.get("description"), + "probe_target_description": data_.get("probe_target_description"), + "chr_": data_.get("chr"), + "mb": data_.get("mb"), + "alias": data_.get("alias"), + "geneid": data_.get("geneid"), + "homologeneid": data_.get("homologeneid"), + "unigeneid": data_.get("unigeneid"), + "omim": data_.get("OMIM"), + "refseq_transcriptid": data_.get("refseq_transcriptid"), + "blatseq": data_.get("blatseq"), + "targetseq": data_.get("targetseq"), + "strand_probe": data_.get("Strand_Probe"), + "probe_set_target_region": data_.get("probe_set_target_region"), + "probe_set_specificity": data_.get("probe_set_specificity"), + "probe_set_blat_score": data_.get("probe_set_blat_score"), + "probe_set_blat_mb_start": data_.get("probe_set_blat_mb_start"), + "probe_set_blat_mb_end": data_.get("probe_set_blat_mb_end"), + "probe_set_strand": data_.get("probe_set_strand"), + "probe_set_note_by_rw": data_.get("probe_set_note_by_rw"), + "flag": data_.get("flag") + } + diff_data = {} + author = ((g.user_session.record.get(b"user_id") or b"").decode("utf-8") + or g.user_session.record.get("user_id") or "") + if (updated_probeset := update( + conn, "ProbeSet", + data=Probeset(**probeset_), + where=Probeset(id_=data_.get("id")))): + diff_data.update({"Probeset": diff_from_dict(old={ + k: data_.get(f"old_{k}") for k, v in probeset_.items() + if v is not None}, new=probeset_)}) + if diff_data: + diff_data.update({"probeset_name": data_.get("probeset_name")}) + diff_data.update({"author": author}) + diff_data.update({"resource_id": request.args.get('resource-id')}) + diff_data.update({"timestamp": datetime.datetime.now().strftime( + "%Y-%m-%d %H:%M:%S")}) + insert(conn, + table="metadata_audit", + data=MetadataAudit(dataset_id=data_.get("id"), + editor=author, + json_data=json.dumps(diff_data))) + return redirect(f"/datasets/traits/{name}" + f"?resource-id={request.args.get('resource-id')}") @metadata_edit.route("/<dataset_id>/traits/<phenotype_id>/csv") @login_required def get_sample_data_as_csv(dataset_id: str, phenotype_id: int): - return Response( - get_trait_csv_sample_data( - conn=MySQLdb.Connect(db=current_app.config.get("DB_NAME"), - user=current_app.config.get("DB_USER"), - passwd=current_app.config.get("DB_PASS"), - host=current_app.config.get("DB_HOST")), - trait_name=str(dataset_id), - phenotype_id=str(phenotype_id)), - mimetype="text/csv", - headers={"Content-disposition": - f"attachment; filename=sample-data-{dataset_id}.csv"} - ) + with database_connection as conn: + return Response( + get_trait_csv_sample_data( + conn=conn, + trait_name=str(dataset_id), + phenotype_id=str(phenotype_id)), + mimetype="text/csv", + headers={"Content-disposition": + f"attachment; filename=sample-data-{dataset_id}.csv"} + ) @metadata_edit.route("/diffs") @@ -492,20 +478,45 @@ def reject_data(resource_id: str, file_name: str): @login_required def approve_data(resource_id:str, file_name: str): sample_data = {file_name: str} - conn = MySQLdb.Connect(db=current_app.config.get("DB_NAME"), - user=current_app.config.get("DB_USER"), - passwd=current_app.config.get("DB_PASS"), - host=current_app.config.get("DB_HOST")) - TMPDIR = current_app.config.get("TMPDIR") - with open(os.path.join(f"{TMPDIR}/sample-data/diffs", - file_name), 'r') as myfile: - sample_data = json.load(myfile) - for modification in ( - modifications := [d for d in sample_data.get("Modifications")]): - if modification.get("Current"): + with database_connection() as conn: + TMPDIR = current_app.config.get("TMPDIR") + with open(os.path.join(f"{TMPDIR}/sample-data/diffs", + file_name), 'r') as myfile: + sample_data = json.load(myfile) + for modification in ( + modifications := [d for d in sample_data.get("Modifications")]): + if modification.get("Current"): + (strain_name, + value, se, count) = modification.get("Current").split(",") + update_sample_data( + conn=conn, + trait_name=sample_data.get("trait_name"), + strain_name=strain_name, + phenotype_id=int(sample_data.get("phenotype_id")), + value=value, + error=se, + count=count) + + n_deletions = 0 + for deletion in (deletions := [d for d in sample_data.get("Deletions")]): + strain_name, _, _, _ = deletion.split(",") + __deletions, _, _ = delete_sample_data( + conn=conn, + trait_name=sample_data.get("trait_name"), + strain_name=strain_name, + phenotype_id=int(sample_data.get("phenotype_id"))) + if __deletions: + n_deletions += 1 + # Remove any data that already exists from sample_data deletes + else: + sample_data.get("Deletions").remove(deletion) + + n_insertions = 0 + for insertion in ( + insertions := [d for d in sample_data.get("Additions")]): (strain_name, - value, se, count) = modification.get("Current").split(",") - update_sample_data( + value, se, count) = insertion.split(",") + __insertions, _, _ = insert_sample_data( conn=conn, trait_name=sample_data.get("trait_name"), strain_name=strain_name, @@ -513,65 +524,37 @@ def approve_data(resource_id:str, file_name: str): value=value, error=se, count=count) - - n_deletions = 0 - for deletion in (deletions := [d for d in sample_data.get("Deletions")]): - strain_name, _, _, _ = deletion.split(",") - __deletions, _, _ = delete_sample_data( - conn=conn, - trait_name=sample_data.get("trait_name"), - strain_name=strain_name, - phenotype_id=int(sample_data.get("phenotype_id"))) - if __deletions: - n_deletions += 1 - # Remove any data that already exists from sample_data deletes - else: - sample_data.get("Deletions").remove(deletion) - - n_insertions = 0 - for insertion in ( - insertions := [d for d in sample_data.get("Additions")]): - (strain_name, - value, se, count) = insertion.split(",") - __insertions, _, _ = insert_sample_data( - conn=conn, - trait_name=sample_data.get("trait_name"), - strain_name=strain_name, - phenotype_id=int(sample_data.get("phenotype_id")), - value=value, - error=se, - count=count) - if __insertions: - n_insertions += 1 - # Remove any data that already exists from sample_data inserts - else: - sample_data.get("Additions").remove(insertion) - if any([sample_data.get("Additions"), - sample_data.get("Modifications"), - sample_data.get("Deletions")]): - insert(conn, - table="metadata_audit", - data=MetadataAudit( - dataset_id=sample_data.get("trait_name"), - editor=sample_data.get("author"), - json_data=json.dumps(sample_data))) - # Once data is approved, rename it! - os.rename(os.path.join(f"{TMPDIR}/sample-data/diffs", file_name), - os.path.join(f"{TMPDIR}/sample-data/diffs", - f"{file_name}.approved")) - message = "" - if n_deletions: - flash(f"# Deletions: {n_deletions}", "success") - if n_insertions: - flash("# Additions: {len(modifications)", "success") - if len(modifications): - flash("# Modifications: {len(modifications)}", "success") - else: # Edge case where you need to automatically reject the file - os.rename(os.path.join(f"{TMPDIR}/sample-data/diffs", file_name), - os.path.join(f"{TMPDIR}/sample-data/diffs", - f"{file_name}.rejected")) - flash(("Automatically rejecting this file since no " - "changes could be applied."), "warning") - - return redirect(url_for('metadata_edit.list_diffs')) + if __insertions: + n_insertions += 1 + # Remove any data that already exists from sample_data inserts + else: + sample_data.get("Additions").remove(insertion) + if any([sample_data.get("Additions"), + sample_data.get("Modifications"), + sample_data.get("Deletions")]): + insert(conn, + table="metadata_audit", + data=MetadataAudit( + dataset_id=sample_data.get("trait_name"), + editor=sample_data.get("author"), + json_data=json.dumps(sample_data))) + # Once data is approved, rename it! + os.rename(os.path.join(f"{TMPDIR}/sample-data/diffs", file_name), + os.path.join(f"{TMPDIR}/sample-data/diffs", + f"{file_name}.approved")) + message = "" + if n_deletions: + flash(f"# Deletions: {n_deletions}", "success") + if n_insertions: + flash("# Additions: {len(modifications)", "success") + if len(modifications): + flash("# Modifications: {len(modifications)}", "success") + else: # Edge case where you need to automatically reject the file + os.rename(os.path.join(f"{TMPDIR}/sample-data/diffs", file_name), + os.path.join(f"{TMPDIR}/sample-data/diffs", + f"{file_name}.rejected")) + flash(("Automatically rejecting this file since no " + "changes could be applied."), "warning") + + return redirect(url_for('metadata_edit.list_diffs')) diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py index 3bf17da6..59dba309 100644 --- a/wqflask/wqflask/views.py +++ b/wqflask/wqflask/views.py @@ -1,6 +1,4 @@ """Main routing table for GN2""" - -import MySQLdb import array import base64 import csv @@ -123,6 +121,7 @@ def shutdown_session(exception=None): db = getattr(g, '_database', None) if db is not None: db_session.remove() + g.db.dispose() g.db = None |