From 490a9c2306d9b05e132b1fcef6cd65a985a14b71 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Fri, 18 Mar 2022 15:39:19 +0300 Subject: Create new function for cleaning individual fields in csv text * gn3/csvcmp.py (clean_csv_text): New function. * tests/unit/test_csvcmp.py: Import "csv_text". (test_clean_csv_text): Test case for the above. --- gn3/csvcmp.py | 9 +++++++++ tests/unit/test_csvcmp.py | 21 +++++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/gn3/csvcmp.py b/gn3/csvcmp.py index 975814a..10c5d3e 100644 --- a/gn3/csvcmp.py +++ b/gn3/csvcmp.py @@ -48,6 +48,15 @@ def remove_insignificant_edits(diff_data, epsilon=0.001): return diff_data +def clean_csv_text(csv_text: str) -> str: + """Remove extra white space elements in all elements of the CSV file""" + _csv_text = [] + for line in csv_text.strip().split("\n"): + _csv_text.append( + ",".join([el.strip() for el in line.split(",")])) + return "\n".join(_csv_text) + + def csv_diff(base_csv, delta_csv, tmp_dir="/tmp") -> dict: """Diff 2 csv strings""" base_csv_list = base_csv.strip().split("\n") diff --git a/tests/unit/test_csvcmp.py b/tests/unit/test_csvcmp.py index b64fe83..ec4ca71 100644 --- a/tests/unit/test_csvcmp.py +++ b/tests/unit/test_csvcmp.py @@ -1,6 +1,7 @@ """Tests for gn3.csvcmp""" import pytest +from gn3.csvcmp import clean_csv_text from gn3.csvcmp import csv_diff from gn3.csvcmp import extract_invalid_csv_headers from gn3.csvcmp import extract_strain_name @@ -147,3 +148,23 @@ string""" csv_text = "Strain Name, Value, SE, Colour" assert extract_invalid_csv_headers(allowed_headers, csv_text) == ["Colour"] + + +@pytest.mark.unit_test +def test_clean_csv(): + """Test that csv text input is cleaned properly""" + csv_text = """ +Strain Name,Value,SE,Count +BXD1,18,x ,0 +BXD12, 16,x,x +BXD14,15 ,x,x +BXD15,14,x, +""" + expected_csv = """Strain Name,Value,SE,Count +BXD1,18,x,0 +BXD12,16,x,x +BXD14,15,x,x +BXD15,14,x,""" + + assert clean_csv_text(csv_text) == expected_csv + assert clean_csv_text("a,b \n1,2\n") == "a,b\n1,2" -- cgit v1.2.3