aboutsummaryrefslogtreecommitdiff
path: root/gn3/db
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2023-07-19 09:42:18 +0300
committerFrederick Muriuki Muriithi2023-07-19 09:48:26 +0300
commit38478ff22686afcb74cfdd43a73c7a42710bfca4 (patch)
treea87fd75aaa6dff6230dd4a2e20153b799b44f8d9 /gn3/db
parent82722fefd007edbddf08175686570e2ed307097e (diff)
downloadgenenetwork3-38478ff22686afcb74cfdd43a73c7a42710bfca4.tar.gz
Extract code dealing with ProbeSets to their own module.
Diffstat (limited to 'gn3/db')
-rw-r--r--gn3/db/__init__.py4
-rw-r--r--gn3/db/phenotypes.py59
-rw-r--r--gn3/db/probesets.py77
3 files changed, 79 insertions, 61 deletions
diff --git a/gn3/db/__init__.py b/gn3/db/__init__.py
index 149a344..3b063f1 100644
--- a/gn3/db/__init__.py
+++ b/gn3/db/__init__.py
@@ -5,15 +5,15 @@ from typing import Any, Dict, List, Optional, Generator, Tuple, Union
from typing_extensions import Protocol
from gn3.db.metadata_audit import MetadataAudit
+from gn3.db.probesets import Probeset
from gn3.db.phenotypes import Phenotype
-from gn3.db.phenotypes import Probeset
from gn3.db.phenotypes import Publication
from gn3.db.phenotypes import PublishXRef
from gn3.db.metadata_audit import metadata_audit_mapping
+from gn3.db.probesets import probeset_mapping
from gn3.db.phenotypes import phenotype_mapping
-from gn3.db.phenotypes import probeset_mapping
from gn3.db.phenotypes import publication_mapping
from gn3.db.phenotypes import publish_x_ref_mapping
diff --git a/gn3/db/phenotypes.py b/gn3/db/phenotypes.py
index 605ca73..e8b0b0b 100644
--- a/gn3/db/phenotypes.py
+++ b/gn3/db/phenotypes.py
@@ -105,65 +105,6 @@ publication_mapping = {
"year": "Year",
}
-
-@dataclass(frozen=True)
-class Probeset:
- """Data Type that represents a Probeset"""
- id_: Optional[int] = None
- name: Optional[str] = None
- symbol: Optional[str] = None
- description: Optional[str] = None
- probe_target_description: Optional[str] = None
- chr_: Optional[str] = None
- mb: Optional[float] = None # pylint: disable=C0103
- alias: Optional[str] = None
- geneid: Optional[str] = None
- homologeneid: Optional[str] = None
- unigeneid: Optional[str] = None
- omim: Optional[str] = None
- refseq_transcriptid: Optional[str] = None
- blatseq: Optional[str] = None
- targetseq: Optional[str] = None
- strand_probe: Optional[str] = None
- probe_set_target_region: Optional[str] = None
- probe_set_specificity: Optional[float] = None
- probe_set_blat_score: Optional[float] = None
- probe_set_blat_mb_start: Optional[float] = None
- probe_set_blat_mb_end: Optional[float] = None
- probe_set_strand: Optional[str] = None
- probe_set_note_by_rw: Optional[str] = None
- flag: Optional[str] = None
-
-
-# Mapping from the Phenotype dataclass to the actual column names in the
-# database
-probeset_mapping = {
- "id_": "Id",
- "name": "Name",
- "symbol": "symbol",
- "description": "description",
- "probe_target_description": "Probe_Target_Description",
- "chr_": "Chr",
- "mb": "Mb",
- "alias": "alias",
- "geneid": "GeneId",
- "homologeneid": "HomoloGeneID",
- "unigeneid": "UniGeneId",
- "omim": "OMIM",
- "refseq_transcriptid": "RefSeq_TranscriptId",
- "blatseq": "BlatSeq",
- "targetseq": "TargetSeq",
- "strand_probe": "Strand_Probe",
- "probe_set_target_region": "Probe_set_target_region",
- "probe_set_specificity": "Probe_set_specificity",
- "probe_set_blat_score": "Probe_set_BLAT_score",
- "probe_set_blat_mb_start": "Probe_set_Blat_Mb_start",
- "probe_set_blat_mb_end": "Probe_set_Blat_Mb_end",
- "probe_set_strand": "Probe_set_strand",
- "probe_set_note_by_rw": "Probe_set_Note_by_RW",
- "flag": "flag"
-}
-
def fetch_trait(conn: DBConnection, dataset_id: int, trait_name: str) -> dict:
"""Fetch phenotype 'traits' by `dataset_id` and `trait_name`."""
query = (
diff --git a/gn3/db/probesets.py b/gn3/db/probesets.py
new file mode 100644
index 0000000..19c6896
--- /dev/null
+++ b/gn3/db/probesets.py
@@ -0,0 +1,77 @@
+"""Functions and utilities to handle ProbeSets from the database."""
+from typing import Optional
+from dataclasses import dataclass
+
+from MySQLdb.cursors import DictCursor
+
+from gn3.db_utils import Connection as DBConnection
+
+from .query_tools import mapping_to_query_columns
+
+@dataclass(frozen=True)
+class Probeset: # pylint: disable=[too-many-instance-attributes]
+ """Data Type that represents a Probeset"""
+ id_: Optional[int] = None
+ name: Optional[str] = None
+ symbol: Optional[str] = None
+ description: Optional[str] = None
+ probe_target_description: Optional[str] = None
+ chr_: Optional[str] = None
+ mb: Optional[float] = None # pylint: disable=C0103
+ alias: Optional[str] = None
+ geneid: Optional[str] = None
+ homologeneid: Optional[str] = None
+ unigeneid: Optional[str] = None
+ omim: Optional[str] = None
+ refseq_transcriptid: Optional[str] = None
+ blatseq: Optional[str] = None
+ targetseq: Optional[str] = None
+ strand_probe: Optional[str] = None
+ probe_set_target_region: Optional[str] = None
+ probe_set_specificity: Optional[float] = None
+ probe_set_blat_score: Optional[float] = None
+ probe_set_blat_mb_start: Optional[float] = None
+ probe_set_blat_mb_end: Optional[float] = None
+ probe_set_strand: Optional[str] = None
+ probe_set_note_by_rw: Optional[str] = None
+ flag: Optional[str] = None
+
+
+# Mapping from the Phenotype dataclass to the actual column names in the
+# database
+probeset_mapping = {
+ "id_": "Id",
+ "name": "Name",
+ "symbol": "symbol",
+ "description": "description",
+ "probe_target_description": "Probe_Target_Description",
+ "chr_": "Chr",
+ "mb": "Mb",
+ "alias": "alias",
+ "geneid": "GeneId",
+ "homologeneid": "HomoloGeneID",
+ "unigeneid": "UniGeneId",
+ "omim": "OMIM",
+ "refseq_transcriptid": "RefSeq_TranscriptId",
+ "blatseq": "BlatSeq",
+ "targetseq": "TargetSeq",
+ "strand_probe": "Strand_Probe",
+ "probe_set_target_region": "Probe_set_target_region",
+ "probe_set_specificity": "Probe_set_specificity",
+ "probe_set_blat_score": "Probe_set_BLAT_score",
+ "probe_set_blat_mb_start": "Probe_set_Blat_Mb_start",
+ "probe_set_blat_mb_end": "Probe_set_Blat_Mb_end",
+ "probe_set_strand": "Probe_set_strand",
+ "probe_set_note_by_rw": "Probe_set_Note_by_RW",
+ "flag": "flag"
+}
+
+def fetch_probeset_metadata_by_name(conn: DBConnection, name: str) -> dict:
+ """Fetch a ProbeSet's metadata by its `name`."""
+ with conn.cursor(cursorclass=DictCursor) as cursor:
+ cols = ", ".join(mapping_to_query_columns(probeset_mapping))
+ cursor.execute((f"SELECT Id as id, {cols} "
+ "FROM ProbeSet "
+ "WHERE Name = %(name)s"),
+ {"name": name})
+ return cursor.fetchone()