aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBonfaceKilz2022-03-18 15:39:19 +0300
committerBonfaceKilz2022-03-18 15:50:00 +0300
commit490a9c2306d9b05e132b1fcef6cd65a985a14b71 (patch)
treed39a582509e4e26c8481324f75d79a793b3620cd
parent16367dab9248d3aa2660e0b5cafdce25e8f7067c (diff)
downloadgenenetwork3-490a9c2306d9b05e132b1fcef6cd65a985a14b71.tar.gz
Create new function for cleaning individual fields in csv text
* gn3/csvcmp.py (clean_csv_text): New function. * tests/unit/test_csvcmp.py: Import "csv_text". (test_clean_csv_text): Test case for the above.
-rw-r--r--gn3/csvcmp.py9
-rw-r--r--tests/unit/test_csvcmp.py21
2 files changed, 30 insertions, 0 deletions
diff --git a/gn3/csvcmp.py b/gn3/csvcmp.py
index 975814a..10c5d3e 100644
--- a/gn3/csvcmp.py
+++ b/gn3/csvcmp.py
@@ -48,6 +48,15 @@ def remove_insignificant_edits(diff_data, epsilon=0.001):
return diff_data
+def clean_csv_text(csv_text: str) -> str:
+ """Remove extra white space elements in all elements of the CSV file"""
+ _csv_text = []
+ for line in csv_text.strip().split("\n"):
+ _csv_text.append(
+ ",".join([el.strip() for el in line.split(",")]))
+ return "\n".join(_csv_text)
+
+
def csv_diff(base_csv, delta_csv, tmp_dir="/tmp") -> dict:
"""Diff 2 csv strings"""
base_csv_list = base_csv.strip().split("\n")
diff --git a/tests/unit/test_csvcmp.py b/tests/unit/test_csvcmp.py
index b64fe83..ec4ca71 100644
--- a/tests/unit/test_csvcmp.py
+++ b/tests/unit/test_csvcmp.py
@@ -1,6 +1,7 @@
"""Tests for gn3.csvcmp"""
import pytest
+from gn3.csvcmp import clean_csv_text
from gn3.csvcmp import csv_diff
from gn3.csvcmp import extract_invalid_csv_headers
from gn3.csvcmp import extract_strain_name
@@ -147,3 +148,23 @@ string"""
csv_text = "Strain Name, Value, SE, Colour"
assert extract_invalid_csv_headers(allowed_headers, csv_text) == ["Colour"]
+
+
+@pytest.mark.unit_test
+def test_clean_csv():
+ """Test that csv text input is cleaned properly"""
+ csv_text = """
+Strain Name,Value,SE,Count
+BXD1,18,x ,0
+BXD12, 16,x,x
+BXD14,15 ,x,x
+BXD15,14,x,
+"""
+ expected_csv = """Strain Name,Value,SE,Count
+BXD1,18,x,0
+BXD12,16,x,x
+BXD14,15,x,x
+BXD15,14,x,"""
+
+ assert clean_csv_text(csv_text) == expected_csv
+ assert clean_csv_text("a,b \n1,2\n") == "a,b\n1,2"