From b77d028381819cb947342db4ff80590563822b4e Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Fri, 18 Mar 2022 15:41:12 +0300 Subject: Clean all csv fields before diffing There was a subtle bug where "csvdiff" generated an error related to "different column headings" caused something akin to diffing: "a, b \n, ..." with "a, b\n, ...". * gn3/csvcmp.py (csv_diff): Clean csv texts before any diffing. * tests/unit/test_csvcmp.py (test_csv_diff_same_columns): Modify test case to capture aforementioned bug. --- gn3/csvcmp.py | 6 ++++-- tests/unit/test_csvcmp.py | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/gn3/csvcmp.py b/gn3/csvcmp.py index 10c5d3e..8db89ca 100644 --- a/gn3/csvcmp.py +++ b/gn3/csvcmp.py @@ -59,8 +59,10 @@ def clean_csv_text(csv_text: str) -> str: def csv_diff(base_csv, delta_csv, tmp_dir="/tmp") -> dict: """Diff 2 csv strings""" - base_csv_list = base_csv.strip().split("\n") - delta_csv_list = delta_csv.strip().split("\n") + base_csv = clean_csv_text(base_csv) + delta_csv = clean_csv_text(delta_csv) + base_csv_list = base_csv.split("\n") + delta_csv_list = delta_csv.split("\n") base_csv_header, delta_csv_header = "", "" for i, line in enumerate(base_csv_list): diff --git a/tests/unit/test_csvcmp.py b/tests/unit/test_csvcmp.py index ec4ca71..c2fda6b 100644 --- a/tests/unit/test_csvcmp.py +++ b/tests/unit/test_csvcmp.py @@ -55,7 +55,7 @@ def test_remove_insignificant_data(): @pytest.mark.unit_test def test_csv_diff_same_columns(): """Test csv diffing on data with the same number of columns""" - assert csv_diff(base_csv="a,b\n1,2\n", delta_csv="a,b\n1,3") == { + assert csv_diff(base_csv="a,b \n1,2\n", delta_csv="a,b\n1,3") == { "Additions": [], "Deletions": [], "Columns": "", -- cgit v1.2.3