From e84861caf897d8f6f371d2797651f7c0a2787cea Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Mon, 26 Jul 2021 12:45:35 +0300 Subject: db: traits: Remove unused functions --- gn3/db/traits.py | 91 -------------------------------------------------------- 1 file changed, 91 deletions(-) diff --git a/gn3/db/traits.py b/gn3/db/traits.py index 4860a07..a773fb2 100644 --- a/gn3/db/traits.py +++ b/gn3/db/traits.py @@ -1,92 +1 @@ -"""This contains all the necessary functions that are required to add traits -to the published database""" -from dataclasses import dataclass -from typing import Any, Dict, Optional - - -@dataclass(frozen=True) -class Riset: - """Class for keeping track of riset. A riset is a group e.g. rat HSNIH-Palmer, -BXD - - """ - name: Optional[str] - r_id: Optional[int] - - -@dataclass(frozen=True) -class WebqtlCaseData: - """Class for keeping track of one case data in one trait""" - value: Optional[float] = None - variance: Optional[float] = None - count: Optional[int] = None # Number of Individuals - - def __str__(self): - _str = "" - if self.value: - _str += f"value={self.value:.3f}" - if self.variance: - _str += f" variance={self.variance:.3f}" - if self.count: - _str += " n_data={self.count}" - return _str - - -def lookup_webqtldataset_name(riset_name: str, conn: Any): - """Given a group name(riset), return it's name e.g. BXDPublish, -HLCPublish.""" with conn.cursor() as cursor: - cursor.execute( - "SELECT PublishFreeze.Name FROM " - "PublishFreeze, InbredSet WHERE " - "PublishFreeze.InbredSetId = InbredSet.Id " - "AND InbredSet.Name = '%s'" % riset_name) - _result, *_ = cursor.fetchone() - return _result - - -def get_riset(data_type: str, name: str, conn: Any): - """Get the groups given the data type and it's PublishFreeze or GenoFreeze -name - - """ - query, _name, _id = None, None, None - if data_type == "Publish": - query = ("SELECT InbredSet.Name, InbredSet.Id FROM InbredSet, " - "PublishFreeze WHERE PublishFreeze.InbredSetId = " - "InbredSet.Id AND PublishFreeze.Name = '%s'" % name) - elif data_type == "Geno": - query = ("SELECT InbredSet.Name, InbredSet.Id FROM InbredSet, " - "GenoFreeze WHERE GenoFreeze.InbredSetId = " - "InbredSet.Id AND GenoFreeze.Name = '%s'" % name) - elif data_type == "ProbeSet": - query = ("SELECT InbredSet.Name, InbredSet.Id FROM " - "InbredSet, ProbeSetFreeze, ProbeFreeze WHERE " - "ProbeFreeze.InbredSetId = InbredSet.Id AND " - "ProbeFreeze.Id = ProbeSetFreeze.ProbeFreezeId AND " - "ProbeSetFreeze.Name = '%s'" % name) - if query: - with conn.cursor() as cursor: - _name, _id = cursor.fetchone() - if _name == "BXD300": - _name = "BXD" - return Riset(_name, _id) - - -def insert_publication(pubmed_id: int, publication: Optional[Dict], - conn: Any): - """Creates a new publication record if it's not available""" - sql = ("SELECT Id FROM Publication where " - "PubMed_ID = %d" % pubmed_id) - _id = None - with conn.cursor() as cursor: - cursor.execute(sql) - _id = cursor.fetchone() - if not _id and publication: - # The Publication contains the fields: 'authors', 'title', 'abstract', - # 'journal','volume','pages','month','year' - insert_query = ("INSERT into Publication (%s) Values (%s)" % - (", ".join(publication.keys()), - ", ".join(['%s'] * len(publication)))) - with conn.cursor() as cursor: - cursor.execute(insert_query, tuple(publication.values())) -- cgit v1.2.3 From 9fe0f711a6b9ba8149ad26ffe2ff8cc1f686d90b Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Mon, 26 Jul 2021 12:45:47 +0300 Subject: db: traits: Fetch sample_data from a trait in csv form --- gn3/db/traits.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/gn3/db/traits.py b/gn3/db/traits.py index a773fb2..ec4fa5a 100644 --- a/gn3/db/traits.py +++ b/gn3/db/traits.py @@ -1 +1,27 @@ +"""This class contains functions relating to trait data manipulation""" +from typing import Any + + +def get_trait_csv_sample_data(conn: Any, + trait_name: int, phenotype_id: int): + """Fetch a trait and return it as a csv string""" + sql = ("SELECT Strain.Id, PublishData.Id, Strain.Name, " + "PublishData.value, " + "PublishSE.error, NStrain.count FROM " + "(PublishData, Strain, PublishXRef, PublishFreeze) " + "LEFT JOIN PublishSE ON " + "(PublishSE.DataId = PublishData.Id AND " + "PublishSE.StrainId = PublishData.StrainId) " + "LEFT JOIN NStrain ON (NStrain.DataId = PublishData.Id AND " + "NStrain.StrainId = PublishData.StrainId) WHERE " + "PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND " + "PublishData.Id = PublishXRef.DataId AND " + "PublishXRef.Id = %s AND PublishXRef.PhenotypeId = %s " + "AND PublishData.StrainId = Strain.Id Order BY Strain.Name") + csv_data = ["Strain Id, Publish Data Id,Strain Name,Value,SE,Count"] with conn.cursor() as cursor: + cursor.execute(sql, (trait_name, phenotype_id,)) + for record in cursor.fetchall(): + csv_data.append(",".join([str(val) if val else "x" + for val in record])) + return "\n".join(csv_data) -- cgit v1.2.3 From ab108e2988ed15b3de4db506a36444d9c736348d Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Mon, 26 Jul 2021 22:00:25 +0300 Subject: gn3: db: Create a raw update query * gn3/db/__init__.py (update_raw): New function. --- gn3/db/__init__.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/gn3/db/__init__.py b/gn3/db/__init__.py index 5ab9f3c..24ae8f1 100644 --- a/gn3/db/__init__.py +++ b/gn3/db/__init__.py @@ -1,7 +1,7 @@ # pylint: disable=[R0902, R0903] """Module that exposes common db operations""" from dataclasses import asdict, astuple -from typing import Any, Dict, List, Optional, Generator, Union +from typing import Any, Dict, List, Optional, Generator, Tuple, Union from typing_extensions import Protocol from gn3.db.metadata_audit import MetadataAudit @@ -64,6 +64,20 @@ def update(conn: Any, return cursor.rowcount +def update_raw(conn: Any, table: str, + set_: List[Tuple[str, Any]], + where: Tuple[str, Tuple]): + """Run a generic raw statement""" + sql = f"UPDATE {table} SET " + sql += ", ".join([f"{k} = '%s'" for k, v in set_]) + sql += f" WHERE {where[0]}" + with conn.cursor() as cursor: + cursor.execute(sql, + tuple(v for _, v in set_) + where[1]) + conn.commit() + return cursor.rowcount + + def fetchone(conn: Any, table: str, where: Optional[Dataclass], -- cgit v1.2.3 From 0b8f5fef2398385113d8e480050b8cb1ed404b4d Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Mon, 26 Jul 2021 22:01:36 +0300 Subject: tests: test_db: Add test case for "update_raw" --- tests/unit/db/test_db.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/unit/db/test_db.py b/tests/unit/db/test_db.py index e47c9fd..1bcde35 100644 --- a/tests/unit/db/test_db.py +++ b/tests/unit/db/test_db.py @@ -5,6 +5,7 @@ from unittest import mock from gn3.db import fetchall from gn3.db import fetchone from gn3.db import update +from gn3.db import update_raw from gn3.db import diff_from_dict from gn3.db.phenotypes import Phenotype from gn3.db.phenotypes import Probeset @@ -118,3 +119,21 @@ class TestCrudMethods(TestCase): {"id": 2, "data": "b"}), {"id": {"old": 1, "new": 2}, "data": {"old": "a", "new": "b"}}) + + def test_update_raw(self): + """Test a raw update query""" + db_mock = mock.MagicMock() + with db_mock.cursor() as cursor: + type(cursor).rowcount = 1 + self.assertEqual(update_raw( + conn=db_mock, + table="PublishData", + set_=[("value", 1)], + where=("StrainId = '%s' AND DataId = '%s'", + (2, 8967049))), + 1) + cursor.execute.assert_called_once_with( + "UPDATE PublishData SET " + "value = '%s' WHERE " + "StrainId = '%s' AND DataId = '%s'", + (1, 2, 8967049)) -- cgit v1.2.3 From f5d83ab0e6db9ed5fab2a97695fee698ed484f9a Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Tue, 27 Jul 2021 11:35:23 +0300 Subject: db: traits: Remove publishdata column --- gn3/db/traits.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/gn3/db/traits.py b/gn3/db/traits.py index ec4fa5a..4baac67 100644 --- a/gn3/db/traits.py +++ b/gn3/db/traits.py @@ -18,10 +18,15 @@ def get_trait_csv_sample_data(conn: Any, "PublishData.Id = PublishXRef.DataId AND " "PublishXRef.Id = %s AND PublishXRef.PhenotypeId = %s " "AND PublishData.StrainId = Strain.Id Order BY Strain.Name") - csv_data = ["Strain Id, Publish Data Id,Strain Name,Value,SE,Count"] + csv_data = ["Strain Id,Strain Name,Value,SE,Count"] + publishdata_id = "" with conn.cursor() as cursor: cursor.execute(sql, (trait_name, phenotype_id,)) for record in cursor.fetchall(): - csv_data.append(",".join([str(val) if val else "x" - for val in record])) - return "\n".join(csv_data) + (strain_id, publishdata_id, + strain_name, value, error, count) = record + csv_data.append( + ",".join([str(val) if val else "x" + for val in (strain_id, strain_name, + value, error, count)])) + return f"# Publish Data Id: {publishdata_id}\n\n" + "\n".join(csv_data) -- cgit v1.2.3 From ac14e1167d866b8ab3a43583db8860ce99a3310b Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Thu, 29 Jul 2021 16:28:12 +0300 Subject: Add method for updating values from a sample dataset * gn3/db/traits.py (update_sample_data): New function. * tests/unit/db/test_traits.py: New test cases for ^^. --- HACKING.org | 8 ++++++++ gn3/db/traits.py | 47 +++++++++++++++++++++++++++++++++++++++++++- tests/unit/db/test_traits.py | 37 ++++++++++++++++++++++++++++++++++ 3 files changed, 91 insertions(+), 1 deletion(-) create mode 100644 HACKING.org create mode 100644 tests/unit/db/test_traits.py diff --git a/HACKING.org b/HACKING.org new file mode 100644 index 0000000..8c26a29 --- /dev/null +++ b/HACKING.org @@ -0,0 +1,8 @@ +* Introduction + +We are getting away from GitHub CI, and hosting our own CI servers. From GitHub, once a person makes a push, we'll be using [[https://github.com/adnanh/webhook][webhook]] to initiate laminar. + + +First install laminar (use the appropriate binary): + +: wget https://github.com/adnanh/webhook/releases/download/2.8.0/webhook-linux-amd64.tar.gz && tar xf webhook-linux-amd64.tar.gz diff --git a/gn3/db/traits.py b/gn3/db/traits.py index 4baac67..a77e6a1 100644 --- a/gn3/db/traits.py +++ b/gn3/db/traits.py @@ -1,5 +1,5 @@ """This class contains functions relating to trait data manipulation""" -from typing import Any +from typing import Any, Union def get_trait_csv_sample_data(conn: Any, @@ -30,3 +30,48 @@ def get_trait_csv_sample_data(conn: Any, for val in (strain_id, strain_name, value, error, count)])) return f"# Publish Data Id: {publishdata_id}\n\n" + "\n".join(csv_data) + + +def update_sample_data(conn: Any, + strain_name: str, + strain_id: int, + publish_data_id: int, + value: Union[int, float, str], + error: Union[int, float, str], + count: Union[int, str]): + """Given the right parameters, update sample-data from the relevant + table.""" + STRAIN_ID_SQL: str = "UPDATE Strain SET Name = %s WHERE Id = %s" + PUBLISH_DATA_SQL: str = ("UPDATE PublishData SET value = %s " + "WHERE StrainId = %s AND Id = %s") + PUBLISH_SE_SQL: str = ("UPDATE PublishSE SET error = %s " + "WHERE StrainId = %s AND DataId = %s") + N_STRAIN_SQL: str = ("UPDATE NStrain SET count = %s " + "WHERE StrainId = %s AND DataId = %s") + + updated_strains: int = 0 + updated_published_data: int = 0 + updated_se_data: int = 0 + updated_n_strains: int = 0 + + with conn.cursor() as cursor: + # Update the Strains table + cursor.execute(STRAIN_ID_SQL, (strain_name, strain_id)) + updated_strains: int = cursor.rowcount + # Update the PublishData table + cursor.execute(PUBLISH_DATA_SQL, + (None if value == "x" else value, + strain_id, publish_data_id)) + updated_published_data: int = cursor.rowcount + # Update the PublishSE table + cursor.execute(PUBLISH_SE_SQL, + (None if error == "x" else error, + strain_id, publish_data_id)) + updated_se_data: int = cursor.rowcount + # Update the NStrain table + cursor.execute(N_STRAIN_SQL, + (None if count == "x" else count, + strain_id, publish_data_id)) + updated_n_strains: int = cursor.rowcount + return (updated_strains, updated_published_data, + updated_se_data, updated_n_strains) diff --git a/tests/unit/db/test_traits.py b/tests/unit/db/test_traits.py new file mode 100644 index 0000000..0e69bbe --- /dev/null +++ b/tests/unit/db/test_traits.py @@ -0,0 +1,37 @@ +"""Tests for db/traits.py""" +from unittest import TestCase +from unittest import mock + +from gn3.db.traits import update_sample_data + + +class TestTraitsSqlMethods(TestCase): + """Test cases for sql operations that affect traits""" + def test_update_sample_data(self): + """Test that the SQL queries when calling update_sample_data are called with + the right calls. + + """ + db_mock = mock.MagicMock() + + STRAIN_ID_SQL: str = "UPDATE Strain SET Name = '%s' WHERE Id = %s" + PUBLISH_DATA_SQL: str = ("UPDATE PublishData SET value = %s " + "WHERE StrainId = %s AND DataId = %s") + PUBLISH_SE_SQL: str = ("UPDATE PublishSE SET error = %s " + "WHERE StrainId = %s AND DataId = %s") + N_STRAIN_SQL: str = ("UPDATE NStrain SET count = '%s' " + "WHERE StrainId = %s AND DataId = %s") + + with db_mock.cursor() as cursor: + type(cursor).rowcount = 1 + self.assertEqual(update_sample_data( + conn=db_mock, strain_name="BXD11", + strain_id=10, publish_data_id=8967049, + value=18.7, error=2.3, count=2), + (1, 1, 1, 1)) + cursor.execute.assert_has_calls( + [mock.call(STRAIN_ID_SQL, ('BXD11', 10)), + mock.call(PUBLISH_DATA_SQL, (18.7, 10, 8967049)), + mock.call(PUBLISH_SE_SQL, (2.3, 10, 8967049)), + mock.call(N_STRAIN_SQL, (2, 10, 8967049))] + ) -- cgit v1.2.3 From 651515eb88cb5675434ff068ad19edd0c4c0e858 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Thu, 29 Jul 2021 16:30:31 +0300 Subject: Delete "update_raw" and it's test-cases --- gn3/db/__init__.py | 14 -------------- tests/unit/db/test_db.py | 19 ------------------- 2 files changed, 33 deletions(-) diff --git a/gn3/db/__init__.py b/gn3/db/__init__.py index 24ae8f1..149a344 100644 --- a/gn3/db/__init__.py +++ b/gn3/db/__init__.py @@ -64,20 +64,6 @@ def update(conn: Any, return cursor.rowcount -def update_raw(conn: Any, table: str, - set_: List[Tuple[str, Any]], - where: Tuple[str, Tuple]): - """Run a generic raw statement""" - sql = f"UPDATE {table} SET " - sql += ", ".join([f"{k} = '%s'" for k, v in set_]) - sql += f" WHERE {where[0]}" - with conn.cursor() as cursor: - cursor.execute(sql, - tuple(v for _, v in set_) + where[1]) - conn.commit() - return cursor.rowcount - - def fetchone(conn: Any, table: str, where: Optional[Dataclass], diff --git a/tests/unit/db/test_db.py b/tests/unit/db/test_db.py index 1bcde35..e47c9fd 100644 --- a/tests/unit/db/test_db.py +++ b/tests/unit/db/test_db.py @@ -5,7 +5,6 @@ from unittest import mock from gn3.db import fetchall from gn3.db import fetchone from gn3.db import update -from gn3.db import update_raw from gn3.db import diff_from_dict from gn3.db.phenotypes import Phenotype from gn3.db.phenotypes import Probeset @@ -119,21 +118,3 @@ class TestCrudMethods(TestCase): {"id": 2, "data": "b"}), {"id": {"old": 1, "new": 2}, "data": {"old": "a", "new": "b"}}) - - def test_update_raw(self): - """Test a raw update query""" - db_mock = mock.MagicMock() - with db_mock.cursor() as cursor: - type(cursor).rowcount = 1 - self.assertEqual(update_raw( - conn=db_mock, - table="PublishData", - set_=[("value", 1)], - where=("StrainId = '%s' AND DataId = '%s'", - (2, 8967049))), - 1) - cursor.execute.assert_called_once_with( - "UPDATE PublishData SET " - "value = '%s' WHERE " - "StrainId = '%s' AND DataId = '%s'", - (1, 2, 8967049)) -- cgit v1.2.3