aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBonfaceKilz2021-07-29 22:15:36 +0300
committerGitHub2021-07-29 22:15:36 +0300
commit00278bc237c90b4ac276171a32bbe57e056644b6 (patch)
treef6ff2fd64cd288d75af4900cb7b2cd984f598ce0
parent2ccbf1844afe352e23af7ff958ec2b0694cd87ea (diff)
parent8d83ba7298c33e54ab204effe2a4241ce848add9 (diff)
downloadgenenetwork3-00278bc237c90b4ac276171a32bbe57e056644b6.tar.gz
Merge pull request #30 from genenetwork/Feature/Update-db-from-csv-data
Feature/update db from csv data
-rw-r--r--HACKING.org8
-rw-r--r--gn3/db/__init__.py2
-rw-r--r--gn3/db/traits.py165
-rw-r--r--tests/unit/db/test_traits.py33
4 files changed, 117 insertions, 91 deletions
diff --git a/HACKING.org b/HACKING.org
new file mode 100644
index 0000000..8c26a29
--- /dev/null
+++ b/HACKING.org
@@ -0,0 +1,8 @@
+* Introduction
+
+We are getting away from GitHub CI, and hosting our own CI servers. From GitHub, once a person makes a push, we'll be using [[https://github.com/adnanh/webhook][webhook]] to initiate laminar.
+
+
+First install laminar (use the appropriate binary):
+
+: wget https://github.com/adnanh/webhook/releases/download/2.8.0/webhook-linux-amd64.tar.gz && tar xf webhook-linux-amd64.tar.gz
diff --git a/gn3/db/__init__.py b/gn3/db/__init__.py
index 5ab9f3c..149a344 100644
--- a/gn3/db/__init__.py
+++ b/gn3/db/__init__.py
@@ -1,7 +1,7 @@
# pylint: disable=[R0902, R0903]
"""Module that exposes common db operations"""
from dataclasses import asdict, astuple
-from typing import Any, Dict, List, Optional, Generator, Union
+from typing import Any, Dict, List, Optional, Generator, Tuple, Union
from typing_extensions import Protocol
from gn3.db.metadata_audit import MetadataAudit
diff --git a/gn3/db/traits.py b/gn3/db/traits.py
index f18e16a..ae1939a 100644
--- a/gn3/db/traits.py
+++ b/gn3/db/traits.py
@@ -1,95 +1,81 @@
-"""This contains all the necessary functions that are required to add traits
-to the published database"""
-from dataclasses import dataclass
-from typing import Any, Dict, Optional
-
-
-@dataclass(frozen=True)
-class Riset:
- """Class for keeping track of riset. A riset is a group e.g. rat HSNIH-Palmer,
-BXD
-
- """
- name: Optional[str]
- r_id: Optional[int]
-
-
-@dataclass(frozen=True)
-class WebqtlCaseData:
- """Class for keeping track of one case data in one trait"""
- value: Optional[float] = None
- variance: Optional[float] = None
- count: Optional[int] = None # Number of Individuals
-
- def __str__(self):
- _str = ""
- if self.value:
- _str += f"value={self.value:.3f}"
- if self.variance:
- _str += f" variance={self.variance:.3f}"
- if self.count:
- _str += " n_data={self.count}"
- return _str
-
-
-def lookup_webqtldataset_name(riset_name: str, conn: Any):
- """Given a group name(riset), return it's name e.g. BXDPublish,
-HLCPublish."""
+"""This class contains functions relating to trait data manipulation"""
+from typing import Any, Dict, Union
+
+
+def get_trait_csv_sample_data(conn: Any,
+ trait_name: int, phenotype_id: int):
+ """Fetch a trait and return it as a csv string"""
+ sql = ("SELECT Strain.Id, PublishData.Id, Strain.Name, "
+ "PublishData.value, "
+ "PublishSE.error, NStrain.count FROM "
+ "(PublishData, Strain, PublishXRef, PublishFreeze) "
+ "LEFT JOIN PublishSE ON "
+ "(PublishSE.DataId = PublishData.Id AND "
+ "PublishSE.StrainId = PublishData.StrainId) "
+ "LEFT JOIN NStrain ON (NStrain.DataId = PublishData.Id AND "
+ "NStrain.StrainId = PublishData.StrainId) WHERE "
+ "PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND "
+ "PublishData.Id = PublishXRef.DataId AND "
+ "PublishXRef.Id = %s AND PublishXRef.PhenotypeId = %s "
+ "AND PublishData.StrainId = Strain.Id Order BY Strain.Name")
+ csv_data = ["Strain Id,Strain Name,Value,SE,Count"]
+ publishdata_id = ""
with conn.cursor() as cursor:
- cursor.execute(
- "SELECT PublishFreeze.Name FROM "
- "PublishFreeze, InbredSet WHERE "
- "PublishFreeze.InbredSetId = InbredSet.Id "
- "AND InbredSet.Name = '%s'" % riset_name)
- _result, *_ = cursor.fetchone()
- return _result
+ cursor.execute(sql, (trait_name, phenotype_id,))
+ for record in cursor.fetchall():
+ (strain_id, publishdata_id,
+ strain_name, value, error, count) = record
+ csv_data.append(
+ ",".join([str(val) if val else "x"
+ for val in (strain_id, strain_name,
+ value, error, count)]))
+ return f"# Publish Data Id: {publishdata_id}\n\n" + "\n".join(csv_data)
+
+
+def update_sample_data(conn: Any,
+ strain_name: str,
+ strain_id: int,
+ publish_data_id: int,
+ value: Union[int, float, str],
+ error: Union[int, float, str],
+ count: Union[int, str]):
+ """Given the right parameters, update sample-data from the relevant
+ table."""
+ STRAIN_ID_SQL: str = "UPDATE Strain SET Name = %s WHERE Id = %s"
+ PUBLISH_DATA_SQL: str = ("UPDATE PublishData SET value = %s "
+ "WHERE StrainId = %s AND Id = %s")
+ PUBLISH_SE_SQL: str = ("UPDATE PublishSE SET error = %s "
+ "WHERE StrainId = %s AND DataId = %s")
+ N_STRAIN_SQL: str = ("UPDATE NStrain SET count = %s "
+ "WHERE StrainId = %s AND DataId = %s")
+
+ updated_strains: int = 0
+ updated_published_data: int = 0
+ updated_se_data: int = 0
+ updated_n_strains: int = 0
-
-def get_riset(data_type: str, name: str, conn: Any):
- """Get the groups given the data type and it's PublishFreeze or GenoFreeze
-name
-
- """
- query, _name, _id = None, None, None
- if data_type == "Publish":
- query = ("SELECT InbredSet.Name, InbredSet.Id FROM InbredSet, "
- "PublishFreeze WHERE PublishFreeze.InbredSetId = "
- "InbredSet.Id AND PublishFreeze.Name = '%s'" % name)
- elif data_type == "Geno":
- query = ("SELECT InbredSet.Name, InbredSet.Id FROM InbredSet, "
- "GenoFreeze WHERE GenoFreeze.InbredSetId = "
- "InbredSet.Id AND GenoFreeze.Name = '%s'" % name)
- elif data_type == "ProbeSet":
- query = ("SELECT InbredSet.Name, InbredSet.Id FROM "
- "InbredSet, ProbeSetFreeze, ProbeFreeze WHERE "
- "ProbeFreeze.InbredSetId = InbredSet.Id AND "
- "ProbeFreeze.Id = ProbeSetFreeze.ProbeFreezeId AND "
- "ProbeSetFreeze.Name = '%s'" % name)
- if query:
- with conn.cursor() as cursor:
- _name, _id = cursor.fetchone()
- if _name == "BXD300":
- _name = "BXD"
- return Riset(_name, _id)
-
-
-def insert_publication(pubmed_id: int, publication: Optional[Dict],
- conn: Any):
- """Creates a new publication record if it's not available"""
- sql = ("SELECT Id FROM Publication where "
- "PubMed_ID = %d" % pubmed_id)
- _id = None
with conn.cursor() as cursor:
- cursor.execute(sql)
- _id = cursor.fetchone()
- if not _id and publication:
- # The Publication contains the fields: 'authors', 'title', 'abstract',
- # 'journal','volume','pages','month','year'
- insert_query = ("INSERT into Publication (%s) Values (%s)" %
- (", ".join(publication.keys()),
- ", ".join(['%s'] * len(publication))))
- with conn.cursor() as cursor:
- cursor.execute(insert_query, tuple(publication.values()))
+ # Update the Strains table
+ cursor.execute(STRAIN_ID_SQL, (strain_name, strain_id))
+ updated_strains: int = cursor.rowcount
+ # Update the PublishData table
+ cursor.execute(PUBLISH_DATA_SQL,
+ (None if value == "x" else value,
+ strain_id, publish_data_id))
+ updated_published_data: int = cursor.rowcount
+ # Update the PublishSE table
+ cursor.execute(PUBLISH_SE_SQL,
+ (None if error == "x" else error,
+ strain_id, publish_data_id))
+ updated_se_data: int = cursor.rowcount
+ # Update the NStrain table
+ cursor.execute(N_STRAIN_SQL,
+ (None if count == "x" else count,
+ strain_id, publish_data_id))
+ updated_n_strains: int = cursor.rowcount
+ return (updated_strains, updated_published_data,
+ updated_se_data, updated_n_strains)
+
def retrieve_trait_dataset_name(
trait_type: str, threshold: int, name: str, connection: Any):
@@ -137,6 +123,7 @@ PUBLISH_TRAIT_INFO_QUERY = (
"PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND "
"PublishFreeze.Id =%(trait_dataset_id)s")
+
def retrieve_publish_trait_info(trait_data_source: Dict[str, Any], conn: Any):
"""Retrieve trait information for type `Publish` traits.
diff --git a/tests/unit/db/test_traits.py b/tests/unit/db/test_traits.py
index e3c5c28..c8f28b5 100644
--- a/tests/unit/db/test_traits.py
+++ b/tests/unit/db/test_traits.py
@@ -11,7 +11,9 @@ from gn3.db.traits import (
retrieve_temp_trait_info,
retrieve_trait_dataset_name,
retrieve_publish_trait_info,
- retrieve_probeset_trait_info)
+ retrieve_probeset_trait_info,
+ update_sample_data)
+
class TestTraitsDBFunctions(TestCase):
"Test cases for traits functions"
@@ -112,3 +114,32 @@ class TestTraitsDBFunctions(TestCase):
trait_type, trait_name, trait_dataset_id,
trait_dataset_name, db_mock),
tuple())
+
+ def test_update_sample_data(self):
+ """Test that the SQL queries when calling update_sample_data are called with
+ the right calls.
+
+ """
+ db_mock = mock.MagicMock()
+
+ STRAIN_ID_SQL: str = "UPDATE Strain SET Name = %s WHERE Id = %s"
+ PUBLISH_DATA_SQL: str = ("UPDATE PublishData SET value = %s "
+ "WHERE StrainId = %s AND Id = %s")
+ PUBLISH_SE_SQL: str = ("UPDATE PublishSE SET error = %s "
+ "WHERE StrainId = %s AND DataId = %s")
+ N_STRAIN_SQL: str = ("UPDATE NStrain SET count = %s "
+ "WHERE StrainId = %s AND DataId = %s")
+
+ with db_mock.cursor() as cursor:
+ type(cursor).rowcount = 1
+ self.assertEqual(update_sample_data(
+ conn=db_mock, strain_name="BXD11",
+ strain_id=10, publish_data_id=8967049,
+ value=18.7, error=2.3, count=2),
+ (1, 1, 1, 1))
+ cursor.execute.assert_has_calls(
+ [mock.call(STRAIN_ID_SQL, ('BXD11', 10)),
+ mock.call(PUBLISH_DATA_SQL, (18.7, 10, 8967049)),
+ mock.call(PUBLISH_SE_SQL, (2.3, 10, 8967049)),
+ mock.call(N_STRAIN_SQL, (2, 10, 8967049))]
+ )