aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBonfaceKilz2022-03-18 15:41:12 +0300
committerBonfaceKilz2022-03-18 15:50:00 +0300
commitb77d028381819cb947342db4ff80590563822b4e (patch)
tree0ed3965797d7a8da0962c029bf76750383d8ff05
parent490a9c2306d9b05e132b1fcef6cd65a985a14b71 (diff)
downloadgenenetwork3-b77d028381819cb947342db4ff80590563822b4e.tar.gz
Clean all csv fields before diffing
There was a subtle bug where "csvdiff" generated an error related to "different column headings" caused something akin to diffing: "a, b \n, ..." with "a, b\n, ...". * gn3/csvcmp.py (csv_diff): Clean csv texts before any diffing. * tests/unit/test_csvcmp.py (test_csv_diff_same_columns): Modify test case to capture aforementioned bug.
-rw-r--r--gn3/csvcmp.py6
-rw-r--r--tests/unit/test_csvcmp.py2
2 files changed, 5 insertions, 3 deletions
diff --git a/gn3/csvcmp.py b/gn3/csvcmp.py
index 10c5d3e..8db89ca 100644
--- a/gn3/csvcmp.py
+++ b/gn3/csvcmp.py
@@ -59,8 +59,10 @@ def clean_csv_text(csv_text: str) -> str:
def csv_diff(base_csv, delta_csv, tmp_dir="/tmp") -> dict:
"""Diff 2 csv strings"""
- base_csv_list = base_csv.strip().split("\n")
- delta_csv_list = delta_csv.strip().split("\n")
+ base_csv = clean_csv_text(base_csv)
+ delta_csv = clean_csv_text(delta_csv)
+ base_csv_list = base_csv.split("\n")
+ delta_csv_list = delta_csv.split("\n")
base_csv_header, delta_csv_header = "", ""
for i, line in enumerate(base_csv_list):
diff --git a/tests/unit/test_csvcmp.py b/tests/unit/test_csvcmp.py
index ec4ca71..c2fda6b 100644
--- a/tests/unit/test_csvcmp.py
+++ b/tests/unit/test_csvcmp.py
@@ -55,7 +55,7 @@ def test_remove_insignificant_data():
@pytest.mark.unit_test
def test_csv_diff_same_columns():
"""Test csv diffing on data with the same number of columns"""
- assert csv_diff(base_csv="a,b\n1,2\n", delta_csv="a,b\n1,3") == {
+ assert csv_diff(base_csv="a,b \n1,2\n", delta_csv="a,b\n1,3") == {
"Additions": [],
"Deletions": [],
"Columns": "",