about summary refs log tree commit diff
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2023-07-19 09:42:18 +0300
committerFrederick Muriuki Muriithi2023-07-19 09:48:26 +0300
commit38478ff22686afcb74cfdd43a73c7a42710bfca4 (patch)
treea87fd75aaa6dff6230dd4a2e20153b799b44f8d9
parent82722fefd007edbddf08175686570e2ed307097e (diff)
downloadgenenetwork3-38478ff22686afcb74cfdd43a73c7a42710bfca4.tar.gz
Extract code dealing with ProbeSets to their own module.
-rw-r--r--gn3/db/__init__.py4
-rw-r--r--gn3/db/phenotypes.py59
-rw-r--r--gn3/db/probesets.py77
-rw-r--r--tests/unit/db/test_db.py2
4 files changed, 80 insertions, 62 deletions
diff --git a/gn3/db/__init__.py b/gn3/db/__init__.py
index 149a344..3b063f1 100644
--- a/gn3/db/__init__.py
+++ b/gn3/db/__init__.py
@@ -5,15 +5,15 @@ from typing import Any, Dict, List, Optional, Generator, Tuple, Union
 from typing_extensions import Protocol
 
 from gn3.db.metadata_audit import MetadataAudit
+from gn3.db.probesets import Probeset
 from gn3.db.phenotypes import Phenotype
-from gn3.db.phenotypes import Probeset
 from gn3.db.phenotypes import Publication
 from gn3.db.phenotypes import PublishXRef
 
 
 from gn3.db.metadata_audit import metadata_audit_mapping
+from gn3.db.probesets import probeset_mapping
 from gn3.db.phenotypes import phenotype_mapping
-from gn3.db.phenotypes import probeset_mapping
 from gn3.db.phenotypes import publication_mapping
 from gn3.db.phenotypes import publish_x_ref_mapping
 
diff --git a/gn3/db/phenotypes.py b/gn3/db/phenotypes.py
index 605ca73..e8b0b0b 100644
--- a/gn3/db/phenotypes.py
+++ b/gn3/db/phenotypes.py
@@ -105,65 +105,6 @@ publication_mapping = {
     "year": "Year",
 }
 
-
-@dataclass(frozen=True)
-class Probeset:
-    """Data Type that represents a Probeset"""
-    id_: Optional[int] = None
-    name: Optional[str] = None
-    symbol: Optional[str] = None
-    description: Optional[str] = None
-    probe_target_description: Optional[str] = None
-    chr_: Optional[str] = None
-    mb: Optional[float] = None  # pylint: disable=C0103
-    alias: Optional[str] = None
-    geneid: Optional[str] = None
-    homologeneid: Optional[str] = None
-    unigeneid: Optional[str] = None
-    omim: Optional[str] = None
-    refseq_transcriptid: Optional[str] = None
-    blatseq: Optional[str] = None
-    targetseq: Optional[str] = None
-    strand_probe: Optional[str] = None
-    probe_set_target_region: Optional[str] = None
-    probe_set_specificity: Optional[float] = None
-    probe_set_blat_score: Optional[float] = None
-    probe_set_blat_mb_start: Optional[float] = None
-    probe_set_blat_mb_end: Optional[float] = None
-    probe_set_strand: Optional[str] = None
-    probe_set_note_by_rw: Optional[str] = None
-    flag: Optional[str] = None
-
-
-# Mapping from the Phenotype dataclass to the actual column names in the
-# database
-probeset_mapping = {
-    "id_": "Id",
-    "name": "Name",
-    "symbol": "symbol",
-    "description": "description",
-    "probe_target_description": "Probe_Target_Description",
-    "chr_": "Chr",
-    "mb": "Mb",
-    "alias": "alias",
-    "geneid": "GeneId",
-    "homologeneid": "HomoloGeneID",
-    "unigeneid": "UniGeneId",
-    "omim": "OMIM",
-    "refseq_transcriptid": "RefSeq_TranscriptId",
-    "blatseq": "BlatSeq",
-    "targetseq": "TargetSeq",
-    "strand_probe": "Strand_Probe",
-    "probe_set_target_region": "Probe_set_target_region",
-    "probe_set_specificity": "Probe_set_specificity",
-    "probe_set_blat_score": "Probe_set_BLAT_score",
-    "probe_set_blat_mb_start": "Probe_set_Blat_Mb_start",
-    "probe_set_blat_mb_end": "Probe_set_Blat_Mb_end",
-    "probe_set_strand": "Probe_set_strand",
-    "probe_set_note_by_rw": "Probe_set_Note_by_RW",
-    "flag": "flag"
-}
-
 def fetch_trait(conn: DBConnection, dataset_id: int, trait_name: str) -> dict:
     """Fetch phenotype 'traits' by `dataset_id` and `trait_name`."""
     query = (
diff --git a/gn3/db/probesets.py b/gn3/db/probesets.py
new file mode 100644
index 0000000..19c6896
--- /dev/null
+++ b/gn3/db/probesets.py
@@ -0,0 +1,77 @@
+"""Functions and utilities to handle ProbeSets from the database."""
+from typing import Optional
+from dataclasses import dataclass
+
+from MySQLdb.cursors import DictCursor
+
+from gn3.db_utils import Connection as DBConnection
+
+from .query_tools import mapping_to_query_columns
+
+@dataclass(frozen=True)
+class Probeset: # pylint: disable=[too-many-instance-attributes]
+    """Data Type that represents a Probeset"""
+    id_: Optional[int] = None
+    name: Optional[str] = None
+    symbol: Optional[str] = None
+    description: Optional[str] = None
+    probe_target_description: Optional[str] = None
+    chr_: Optional[str] = None
+    mb: Optional[float] = None  # pylint: disable=C0103
+    alias: Optional[str] = None
+    geneid: Optional[str] = None
+    homologeneid: Optional[str] = None
+    unigeneid: Optional[str] = None
+    omim: Optional[str] = None
+    refseq_transcriptid: Optional[str] = None
+    blatseq: Optional[str] = None
+    targetseq: Optional[str] = None
+    strand_probe: Optional[str] = None
+    probe_set_target_region: Optional[str] = None
+    probe_set_specificity: Optional[float] = None
+    probe_set_blat_score: Optional[float] = None
+    probe_set_blat_mb_start: Optional[float] = None
+    probe_set_blat_mb_end: Optional[float] = None
+    probe_set_strand: Optional[str] = None
+    probe_set_note_by_rw: Optional[str] = None
+    flag: Optional[str] = None
+
+
+# Mapping from the Phenotype dataclass to the actual column names in the
+# database
+probeset_mapping = {
+    "id_": "Id",
+    "name": "Name",
+    "symbol": "symbol",
+    "description": "description",
+    "probe_target_description": "Probe_Target_Description",
+    "chr_": "Chr",
+    "mb": "Mb",
+    "alias": "alias",
+    "geneid": "GeneId",
+    "homologeneid": "HomoloGeneID",
+    "unigeneid": "UniGeneId",
+    "omim": "OMIM",
+    "refseq_transcriptid": "RefSeq_TranscriptId",
+    "blatseq": "BlatSeq",
+    "targetseq": "TargetSeq",
+    "strand_probe": "Strand_Probe",
+    "probe_set_target_region": "Probe_set_target_region",
+    "probe_set_specificity": "Probe_set_specificity",
+    "probe_set_blat_score": "Probe_set_BLAT_score",
+    "probe_set_blat_mb_start": "Probe_set_Blat_Mb_start",
+    "probe_set_blat_mb_end": "Probe_set_Blat_Mb_end",
+    "probe_set_strand": "Probe_set_strand",
+    "probe_set_note_by_rw": "Probe_set_Note_by_RW",
+    "flag": "flag"
+}
+
+def fetch_probeset_metadata_by_name(conn: DBConnection, name: str) -> dict:
+    """Fetch a ProbeSet's metadata by its `name`."""
+    with conn.cursor(cursorclass=DictCursor) as cursor:
+        cols = ", ".join(mapping_to_query_columns(probeset_mapping))
+        cursor.execute((f"SELECT Id as id, {cols} "
+                        "FROM ProbeSet "
+                        "WHERE Name = %(name)s"),
+                       {"name": name})
+        return cursor.fetchone()
diff --git a/tests/unit/db/test_db.py b/tests/unit/db/test_db.py
index 8ac468c..821a6b6 100644
--- a/tests/unit/db/test_db.py
+++ b/tests/unit/db/test_db.py
@@ -8,8 +8,8 @@ from gn3.db import fetchall
 from gn3.db import fetchone
 from gn3.db import update
 from gn3.db import diff_from_dict
+from gn3.db.probesets import Probeset
 from gn3.db.phenotypes import Phenotype
-from gn3.db.phenotypes import Probeset
 from gn3.db.metadata_audit import MetadataAudit