diff options
author | BonfaceKilz | 2022-04-05 15:02:13 +0300 |
---|---|---|
committer | BonfaceKilz | 2022-04-07 11:54:28 +0300 |
commit | ac8299161bb1a4611d6e4a33b9dd9ace635d85e9 (patch) | |
tree | e40c5d07ac5c5fcd63fe385e6f6b20bca51efc3b | |
parent | 7d898c70c94d78a7dc81acf43ccfe2cdd7c5172c (diff) | |
download | genenetwork3-ac8299161bb1a4611d6e4a33b9dd9ace635d85e9.tar.gz |
Add method for parsing a csv header from uploaded sample-data file
* gn3/csvcmp.py (parse_csv_column): New function.
* tests/unit/test_csvcmp.py: Test case for the above.
-rw-r--r-- | gn3/csvcmp.py | 13 | ||||
-rw-r--r-- | tests/unit/test_csvcmp.py | 12 |
2 files changed, 25 insertions, 0 deletions
diff --git a/gn3/csvcmp.py b/gn3/csvcmp.py index 4e8cc0f..dcfdc98 100644 --- a/gn3/csvcmp.py +++ b/gn3/csvcmp.py @@ -2,6 +2,7 @@ texts""" from typing import Any, List +import re import json import os import uuid @@ -143,3 +144,15 @@ def extract_invalid_csv_headers(allowed_headers: List, csv_text: str) -> List: if header not in allowed_headers: invalid_headers.append(header) return invalid_headers + + +def parse_csv_column(column: str) -> tuple: + """Give a column, for example: 'Header(1)' or 'Header', return the + column name i.e the column name outside the brackets, and the ID, the number + inside the brackets.""" + id_ = re.search(r"\((\w+)\)", column) + name = column.strip() + if id_: + id_ = id_.groups()[0].strip() + name = column.split(f"({id_})")[0].strip() + return (id_, name) diff --git a/tests/unit/test_csvcmp.py b/tests/unit/test_csvcmp.py index 0843bef..c8b69c7 100644 --- a/tests/unit/test_csvcmp.py +++ b/tests/unit/test_csvcmp.py @@ -7,6 +7,7 @@ from gn3.csvcmp import extract_invalid_csv_headers from gn3.csvcmp import extract_strain_name from gn3.csvcmp import fill_csv from gn3.csvcmp import get_allowable_sampledata_headers +from gn3.csvcmp import parse_csv_column from gn3.csvcmp import remove_insignificant_edits @@ -198,3 +199,14 @@ BXD15,14,x,""" assert clean_csv_text(csv_text) == expected_csv assert clean_csv_text("a,b \n1,2\n") == "a,b\n1,2" + + +@pytest.mark.unit_test +def test_parse_column_string(): + """Test that a column is parsed correctly""" + assert parse_csv_column("Header") == (None, "Header") + assert parse_csv_column("Header (1)") == ("1", "Header") + assert parse_csv_column("Some Other Header (1)") == ( + "1", + "Some Other Header", + ) |