diff options
-rw-r--r-- | gn3/csvcmp.py | 9 | ||||
-rw-r--r-- | tests/unit/test_csvcmp.py | 21 |
2 files changed, 30 insertions, 0 deletions
diff --git a/gn3/csvcmp.py b/gn3/csvcmp.py index 975814a..10c5d3e 100644 --- a/gn3/csvcmp.py +++ b/gn3/csvcmp.py @@ -48,6 +48,15 @@ def remove_insignificant_edits(diff_data, epsilon=0.001): return diff_data +def clean_csv_text(csv_text: str) -> str: + """Remove extra white space elements in all elements of the CSV file""" + _csv_text = [] + for line in csv_text.strip().split("\n"): + _csv_text.append( + ",".join([el.strip() for el in line.split(",")])) + return "\n".join(_csv_text) + + def csv_diff(base_csv, delta_csv, tmp_dir="/tmp") -> dict: """Diff 2 csv strings""" base_csv_list = base_csv.strip().split("\n") diff --git a/tests/unit/test_csvcmp.py b/tests/unit/test_csvcmp.py index b64fe83..ec4ca71 100644 --- a/tests/unit/test_csvcmp.py +++ b/tests/unit/test_csvcmp.py @@ -1,6 +1,7 @@ """Tests for gn3.csvcmp""" import pytest +from gn3.csvcmp import clean_csv_text from gn3.csvcmp import csv_diff from gn3.csvcmp import extract_invalid_csv_headers from gn3.csvcmp import extract_strain_name @@ -147,3 +148,23 @@ string""" csv_text = "Strain Name, Value, SE, Colour" assert extract_invalid_csv_headers(allowed_headers, csv_text) == ["Colour"] + + +@pytest.mark.unit_test +def test_clean_csv(): + """Test that csv text input is cleaned properly""" + csv_text = """ +Strain Name,Value,SE,Count +BXD1,18,x ,0 +BXD12, 16,x,x +BXD14,15 ,x,x +BXD15,14,x, +""" + expected_csv = """Strain Name,Value,SE,Count +BXD1,18,x,0 +BXD12,16,x,x +BXD14,15,x,x +BXD15,14,x,""" + + assert clean_csv_text(csv_text) == expected_csv + assert clean_csv_text("a,b \n1,2\n") == "a,b\n1,2" |