aboutsummaryrefslogtreecommitdiff
path: root/gn3/csvcmp.py
diff options
context:
space:
mode:
authorBonfaceKilz2022-04-05 15:02:13 +0300
committerBonfaceKilz2022-04-07 11:54:28 +0300
commitac8299161bb1a4611d6e4a33b9dd9ace635d85e9 (patch)
treee40c5d07ac5c5fcd63fe385e6f6b20bca51efc3b /gn3/csvcmp.py
parent7d898c70c94d78a7dc81acf43ccfe2cdd7c5172c (diff)
downloadgenenetwork3-ac8299161bb1a4611d6e4a33b9dd9ace635d85e9.tar.gz
Add method for parsing a csv header from uploaded sample-data file
* gn3/csvcmp.py (parse_csv_column): New function. * tests/unit/test_csvcmp.py: Test case for the above.
Diffstat (limited to 'gn3/csvcmp.py')
-rw-r--r--gn3/csvcmp.py13
1 files changed, 13 insertions, 0 deletions
diff --git a/gn3/csvcmp.py b/gn3/csvcmp.py
index 4e8cc0f..dcfdc98 100644
--- a/gn3/csvcmp.py
+++ b/gn3/csvcmp.py
@@ -2,6 +2,7 @@
texts"""
from typing import Any, List
+import re
import json
import os
import uuid
@@ -143,3 +144,15 @@ def extract_invalid_csv_headers(allowed_headers: List, csv_text: str) -> List:
if header not in allowed_headers:
invalid_headers.append(header)
return invalid_headers
+
+
+def parse_csv_column(column: str) -> tuple:
+ """Give a column, for example: 'Header(1)' or 'Header', return the
+ column name i.e the column name outside the brackets, and the ID, the number
+ inside the brackets."""
+ id_ = re.search(r"\((\w+)\)", column)
+ name = column.strip()
+ if id_:
+ id_ = id_.groups()[0].strip()
+ name = column.split(f"({id_})")[0].strip()
+ return (id_, name)