From 38478ff22686afcb74cfdd43a73c7a42710bfca4 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Wed, 19 Jul 2023 09:42:18 +0300 Subject: Extract code dealing with ProbeSets to their own module. --- gn3/db/__init__.py | 4 +-- gn3/db/phenotypes.py | 59 ---------------------------------------- gn3/db/probesets.py | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 79 insertions(+), 61 deletions(-) create mode 100644 gn3/db/probesets.py (limited to 'gn3') diff --git a/gn3/db/__init__.py b/gn3/db/__init__.py index 149a344..3b063f1 100644 --- a/gn3/db/__init__.py +++ b/gn3/db/__init__.py @@ -5,15 +5,15 @@ from typing import Any, Dict, List, Optional, Generator, Tuple, Union from typing_extensions import Protocol from gn3.db.metadata_audit import MetadataAudit +from gn3.db.probesets import Probeset from gn3.db.phenotypes import Phenotype -from gn3.db.phenotypes import Probeset from gn3.db.phenotypes import Publication from gn3.db.phenotypes import PublishXRef from gn3.db.metadata_audit import metadata_audit_mapping +from gn3.db.probesets import probeset_mapping from gn3.db.phenotypes import phenotype_mapping -from gn3.db.phenotypes import probeset_mapping from gn3.db.phenotypes import publication_mapping from gn3.db.phenotypes import publish_x_ref_mapping diff --git a/gn3/db/phenotypes.py b/gn3/db/phenotypes.py index 605ca73..e8b0b0b 100644 --- a/gn3/db/phenotypes.py +++ b/gn3/db/phenotypes.py @@ -105,65 +105,6 @@ publication_mapping = { "year": "Year", } - -@dataclass(frozen=True) -class Probeset: - """Data Type that represents a Probeset""" - id_: Optional[int] = None - name: Optional[str] = None - symbol: Optional[str] = None - description: Optional[str] = None - probe_target_description: Optional[str] = None - chr_: Optional[str] = None - mb: Optional[float] = None # pylint: disable=C0103 - alias: Optional[str] = None - geneid: Optional[str] = None - homologeneid: Optional[str] = None - unigeneid: Optional[str] = None - omim: Optional[str] = None - refseq_transcriptid: Optional[str] = None - blatseq: Optional[str] = None - targetseq: Optional[str] = None - strand_probe: Optional[str] = None - probe_set_target_region: Optional[str] = None - probe_set_specificity: Optional[float] = None - probe_set_blat_score: Optional[float] = None - probe_set_blat_mb_start: Optional[float] = None - probe_set_blat_mb_end: Optional[float] = None - probe_set_strand: Optional[str] = None - probe_set_note_by_rw: Optional[str] = None - flag: Optional[str] = None - - -# Mapping from the Phenotype dataclass to the actual column names in the -# database -probeset_mapping = { - "id_": "Id", - "name": "Name", - "symbol": "symbol", - "description": "description", - "probe_target_description": "Probe_Target_Description", - "chr_": "Chr", - "mb": "Mb", - "alias": "alias", - "geneid": "GeneId", - "homologeneid": "HomoloGeneID", - "unigeneid": "UniGeneId", - "omim": "OMIM", - "refseq_transcriptid": "RefSeq_TranscriptId", - "blatseq": "BlatSeq", - "targetseq": "TargetSeq", - "strand_probe": "Strand_Probe", - "probe_set_target_region": "Probe_set_target_region", - "probe_set_specificity": "Probe_set_specificity", - "probe_set_blat_score": "Probe_set_BLAT_score", - "probe_set_blat_mb_start": "Probe_set_Blat_Mb_start", - "probe_set_blat_mb_end": "Probe_set_Blat_Mb_end", - "probe_set_strand": "Probe_set_strand", - "probe_set_note_by_rw": "Probe_set_Note_by_RW", - "flag": "flag" -} - def fetch_trait(conn: DBConnection, dataset_id: int, trait_name: str) -> dict: """Fetch phenotype 'traits' by `dataset_id` and `trait_name`.""" query = ( diff --git a/gn3/db/probesets.py b/gn3/db/probesets.py new file mode 100644 index 0000000..19c6896 --- /dev/null +++ b/gn3/db/probesets.py @@ -0,0 +1,77 @@ +"""Functions and utilities to handle ProbeSets from the database.""" +from typing import Optional +from dataclasses import dataclass + +from MySQLdb.cursors import DictCursor + +from gn3.db_utils import Connection as DBConnection + +from .query_tools import mapping_to_query_columns + +@dataclass(frozen=True) +class Probeset: # pylint: disable=[too-many-instance-attributes] + """Data Type that represents a Probeset""" + id_: Optional[int] = None + name: Optional[str] = None + symbol: Optional[str] = None + description: Optional[str] = None + probe_target_description: Optional[str] = None + chr_: Optional[str] = None + mb: Optional[float] = None # pylint: disable=C0103 + alias: Optional[str] = None + geneid: Optional[str] = None + homologeneid: Optional[str] = None + unigeneid: Optional[str] = None + omim: Optional[str] = None + refseq_transcriptid: Optional[str] = None + blatseq: Optional[str] = None + targetseq: Optional[str] = None + strand_probe: Optional[str] = None + probe_set_target_region: Optional[str] = None + probe_set_specificity: Optional[float] = None + probe_set_blat_score: Optional[float] = None + probe_set_blat_mb_start: Optional[float] = None + probe_set_blat_mb_end: Optional[float] = None + probe_set_strand: Optional[str] = None + probe_set_note_by_rw: Optional[str] = None + flag: Optional[str] = None + + +# Mapping from the Phenotype dataclass to the actual column names in the +# database +probeset_mapping = { + "id_": "Id", + "name": "Name", + "symbol": "symbol", + "description": "description", + "probe_target_description": "Probe_Target_Description", + "chr_": "Chr", + "mb": "Mb", + "alias": "alias", + "geneid": "GeneId", + "homologeneid": "HomoloGeneID", + "unigeneid": "UniGeneId", + "omim": "OMIM", + "refseq_transcriptid": "RefSeq_TranscriptId", + "blatseq": "BlatSeq", + "targetseq": "TargetSeq", + "strand_probe": "Strand_Probe", + "probe_set_target_region": "Probe_set_target_region", + "probe_set_specificity": "Probe_set_specificity", + "probe_set_blat_score": "Probe_set_BLAT_score", + "probe_set_blat_mb_start": "Probe_set_Blat_Mb_start", + "probe_set_blat_mb_end": "Probe_set_Blat_Mb_end", + "probe_set_strand": "Probe_set_strand", + "probe_set_note_by_rw": "Probe_set_Note_by_RW", + "flag": "flag" +} + +def fetch_probeset_metadata_by_name(conn: DBConnection, name: str) -> dict: + """Fetch a ProbeSet's metadata by its `name`.""" + with conn.cursor(cursorclass=DictCursor) as cursor: + cols = ", ".join(mapping_to_query_columns(probeset_mapping)) + cursor.execute((f"SELECT Id as id, {cols} " + "FROM ProbeSet " + "WHERE Name = %(name)s"), + {"name": name}) + return cursor.fetchone() -- cgit v1.2.3