From 667e67bae832ca5083f3319ada4fda67aca41f44 Mon Sep 17 00:00:00 2001 From: Muriithi Frederick Muriuki Date: Mon, 9 Aug 2021 11:44:47 +0300 Subject: Fix linting errors Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi * Add module, class and function docstrings * Deactivate some irrelevant pylint errors * Fix indentations and line-lengths --- gn3/db/datasets.py | 64 ++++++++++++++++++++++++++++++++++-------- gn3/db/traits.py | 29 +++++++++++++++++++ tests/unit/db/test_datasets.py | 5 +++- tests/unit/db/test_traits.py | 6 ++++ 4 files changed, 91 insertions(+), 13 deletions(-) diff --git a/gn3/db/datasets.py b/gn3/db/datasets.py index 3ad50f6..53d6811 100644 --- a/gn3/db/datasets.py +++ b/gn3/db/datasets.py @@ -1,7 +1,13 @@ -from typing import Any, Dict, Union +""" +This module contains functions relating to specific trait dataset manipulation +""" +from typing import Any def retrieve_probeset_trait_dataset_name( threshold: int, name: str, connection: Any): + """ + Get the ID, DataScale and various name formats for a `ProbeSet` trait. + """ query = ( "SELECT Id, Name, FullName, ShortName, DataScale " "FROM ProbeSetFreeze " @@ -21,7 +27,11 @@ def retrieve_probeset_trait_dataset_name( "dataset_shortname", "dataset_datascale"], cursor.fetchone)) -def retrieve_publish_trait_dataset_name(threshold: int, name: str, connection: Any): +def retrieve_publish_trait_dataset_name( + threshold: int, name: str, connection: Any): + """ + Get the ID, DataScale and various name formats for a `Publish` trait. + """ query = ( "SELECT Id, Name, FullName, ShortName " "FROM PublishFreeze " @@ -41,7 +51,11 @@ def retrieve_publish_trait_dataset_name(threshold: int, name: str, connection: A "dataset_shortname"], cursor.fetchone)) -def retrieve_geno_trait_dataset_name(threshold: int, name: str, connection: Any): +def retrieve_geno_trait_dataset_name( + threshold: int, name: str, connection: Any): + """ + Get the ID, DataScale and various name formats for a `Geno` trait. + """ query = ( "SELECT Id, Name, FullName, ShortName " "FROM GenoFreeze " @@ -61,7 +75,11 @@ def retrieve_geno_trait_dataset_name(threshold: int, name: str, connection: Any) "dataset_shortname"], cursor.fetchone)) -def retrieve_temp_trait_dataset_name(threshold: int, name: str, connection: Any): +def retrieve_temp_trait_dataset_name( + threshold: int, name: str, connection: Any): + """ + Get the ID, DataScale and various name formats for a `Temp` trait. + """ query = ( "SELECT Id, Name, FullName, ShortName " "FROM TempFreeze " @@ -145,6 +163,9 @@ def retrieve_probeset_riset_fields(name, conn): return {} def retrieve_temp_riset_fields(name, conn): + """ + Retrieve the RISet, and RISetID values for `Temp` trait types. + """ query = ( "SELECT InbredSet.Name, InbredSet.Id " "FROM InbredSet, Temp " @@ -179,6 +200,10 @@ def retrieve_riset_fields(trait_type, trait_name, dataset_info, conn): } def retrieve_temp_trait_dataset(): + """ + Retrieve the dataset that relates to `Temp` traits + """ + # pylint: disable=[C0330] return { "searchfield": ["name", "description"], "disfield": ["name", "description"], @@ -189,28 +214,40 @@ def retrieve_temp_trait_dataset(): } def retrieve_geno_trait_dataset(): + """ + Retrieve the dataset that relates to `Geno` traits + """ + # pylint: disable=[C0330] return { - "searchfield": ["name","chr"], - "disfield": ["name","chr","mb", "source2", "sequence"], + "searchfield": ["name", "chr"], + "disfield": ["name", "chr", "mb", "source2", "sequence"], "type": "Geno" } def retrieve_publish_trait_dataset(): + """ + Retrieve the dataset that relates to `Publish` traits + """ + # pylint: disable=[C0330] return { "searchfield": [ "name", "post_publication_description", "abstract", "title", "authors"], "disfield": [ - "name","pubmed_id", "pre_publication_description", - "post_publication_description", "original_description", + "name", "pubmed_id", "pre_publication_description", + "post_publication_description", "original_description", "pre_publication_abbreviation", "post_publication_abbreviation", "lab_code", "submitter", "owner", "authorized_users", - "authors","title","abstract", "journal","volume","pages","month", - "year","sequence", "units", "comments"], + "authors", "title", "abstract", "journal", "volume", "pages", + "month", "year", "sequence", "units", "comments"], "type": "Publish" } def retrieve_probeset_trait_dataset(): + """ + Retrieve the dataset that relates to `ProbeSet` traits + """ + # pylint: disable=[C0330] return { "searchfield": [ "name", "description", "probe_target_description", "symbol", @@ -228,6 +265,9 @@ def retrieve_probeset_trait_dataset(): } def retrieve_trait_dataset(trait_type, trait, threshold, conn): + """ + Retrieve the dataset that relates to a specific trait. + """ dataset_fns = { "Temp": retrieve_temp_trait_dataset, "Geno": retrieve_geno_trait_dataset, @@ -238,8 +278,8 @@ def retrieve_trait_dataset(trait_type, trait, threshold, conn): "dataset_id": None, "dataset_name": trait["db"]["dataset_name"], **retrieve_dataset_name( - trait_type, threshold, trait["trait_name"], trait["db"]["dataset_name"], - conn) + trait_type, threshold, trait["trait_name"], + trait["db"]["dataset_name"], conn) } riset = retrieve_riset_fields( trait_type, trait["trait_name"], dataset_name_info, conn) diff --git a/gn3/db/traits.py b/gn3/db/traits.py index 6c31a4d..fb48fc3 100644 --- a/gn3/db/traits.py +++ b/gn3/db/traits.py @@ -43,6 +43,7 @@ def update_sample_data(conn: Any, count: Union[int, str]): """Given the right parameters, update sample-data from the relevant table.""" + # pylint: disable=[R0913, R0914] STRAIN_ID_SQL: str = "UPDATE Strain SET Name = %s WHERE Id = %s" PUBLISH_DATA_SQL: str = ("UPDATE PublishData SET value = %s " "WHERE StrainId = %s AND Id = %s") @@ -252,6 +253,9 @@ def set_homologene_id_field(trait_type, trait_info, conn): return functions_table[trait_type](trait_info) def load_publish_qtl_info(trait_info, conn): + """ + Load extra QTL information for `Publish` traits + """ query = ( "SELECT PublishXRef.Locus, PublishXRef.LRS, PublishXRef.additive " "FROM PublishXRef, PublishFreeze " @@ -264,6 +268,9 @@ def load_publish_qtl_info(trait_info, conn): return {"locus": "", "lrs": "", "additive": ""} def load_probeset_qtl_info(trait_info, conn): + """ + Load extra QTL information for `ProbeSet` traits + """ query = ( "SELECT ProbeSetXRef.Locus, ProbeSetXRef.LRS, ProbeSetXRef.pValue, " "ProbeSetXRef.mean, ProbeSetXRef.additive " @@ -278,6 +285,22 @@ def load_probeset_qtl_info(trait_info, conn): return {"locus": "", "lrs": "", "pvalue": "", "mean": "", "additive": ""} def load_qtl_info(qtl, trait_type, trait_info, conn): + """ + Load extra QTL information for traits + + DESCRIPTION: + Migrated from + https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L500-L534 + + PARAMETERS: + qtl: boolean + trait_type: string + The type of the trait in consideration + trait_info: map/dictionary + A dictionary of the trait's key-value pairs + conn: + A database connection object + """ if not qtl: return trait_info qtl_info_functions = { @@ -290,6 +313,9 @@ def load_qtl_info(qtl, trait_type, trait_info, conn): return qtl_info_functions[trait_type](trait_info, conn) def build_trait_name(trait_fullname): + """ + Initialises the trait's name, and other values from the search data provided + """ name_parts = trait_fullname.split("::") assert len(name_parts) >= 2, "Name format error" return { @@ -300,6 +326,9 @@ def build_trait_name(trait_fullname): } def retrieve_probeset_sequence(trait, conn): + """ + Retrieve a 'ProbeSet' trait's sequence information + """ query = ( "SELECT ProbeSet.BlatSeq " "FROM ProbeSet, ProbeSetFreeze, ProbeSetXRef " diff --git a/tests/unit/db/test_datasets.py b/tests/unit/db/test_datasets.py index 4f405cb..38de0e2 100644 --- a/tests/unit/db/test_datasets.py +++ b/tests/unit/db/test_datasets.py @@ -1,3 +1,5 @@ +"""Tests for gn3/db/datasets.py""" + from unittest import mock, TestCase from gn3.db.datasets import ( retrieve_dataset_name, @@ -7,6 +9,7 @@ from gn3.db.datasets import ( retrieve_probeset_riset_fields) class TestDatasetsDBFunctions(TestCase): + """Test cases for datasets functions.""" def test_retrieve_dataset_name(self): """Test that the function is called correctly.""" @@ -34,7 +37,7 @@ class TestDatasetsDBFunctions(TestCase): "(Name = %(name)s " "OR FullName = %(name)s " "OR ShortName = %(name)s)".format( - table=table, cols=columns, ttype=trait_type), + table=table, cols=columns), {"threshold": thresh, "name": dataset_name}) def test_retrieve_probeset_riset_fields(self): diff --git a/tests/unit/db/test_traits.py b/tests/unit/db/test_traits.py index 5f52c18..d9d7bbb 100644 --- a/tests/unit/db/test_traits.py +++ b/tests/unit/db/test_traits.py @@ -121,6 +121,9 @@ class TestTraitsDBFunctions(TestCase): trait_source) def test_build_trait_name_with_good_fullnames(self): + """ + Check that the name is built correctly. + """ for fullname, expected in [ ["testdb::testname", {"db": {"dataset_name": "testdb"}, "trait_name": "testname", @@ -133,6 +136,9 @@ class TestTraitsDBFunctions(TestCase): self.assertEqual(build_trait_name(fullname), expected) def test_build_trait_name_with_bad_fullnames(self): + """ + Check that an exception is raised if the full name format is wrong. + """ for fullname in ["", "test", "test:test"]: with self.subTest(fullname=fullname): with self.assertRaises(AssertionError, msg="Name format error"): -- cgit v1.2.3