aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBonfaceKilz2022-04-12 12:54:57 +0300
committerBonfaceKilz2022-04-12 13:26:57 +0300
commit789d483fe8877c08a07d0f94cb22e3e33a5888bc (patch)
treed219466330d06719b06c57b87aa5c359dd096896
parentca8a18f00b06a7c6ca4b022223f381ddaebbf930 (diff)
downloadgenenetwork3-789d483fe8877c08a07d0f94cb22e3e33a5888bc.tar.gz
Strip any newline, tab or carriage-return chars from sample data
* gn3/db/sample_data.py (get_trait_csv_sample_data): Strip out "\n", "\t", or "\r" from the sample data. See: <https://issues.genenetwork.org/issues/csv-error-ITP_10001-longevity-data-set.html>
-rw-r--r--gn3/db/sample_data.py8
1 files changed, 6 insertions, 2 deletions
diff --git a/gn3/db/sample_data.py b/gn3/db/sample_data.py
index 9e9d527..3f7e2da 100644
--- a/gn3/db/sample_data.py
+++ b/gn3/db/sample_data.py
@@ -1,6 +1,7 @@
"""Module containing functions that work with sample data"""
from typing import Any, Tuple, Dict, Callable
+import re
import collections
import MySQLdb
@@ -90,10 +91,13 @@ def get_trait_csv_sample_data(
if data[1] == "x":
csv_data[data[0]] = None
else:
- sample, case_attr, value = data[0], data[1], data[2]
+ sample, case_attr, value = [
+ re.sub(r"(\\n|\\r|\\t|\\)", "", x).strip()
+ for x in [data[0], data[1], data[2]]
+ ]
if not csv_data.get(sample):
csv_data[sample] = {}
- csv_data[sample][case_attr] = None if value == "x" else value
+ csv_data[sample][case_attr] = value
case_attr_columns.add(case_attr)
if not case_attr_columns:
return "Strain Name,Value,SE,Count\n" + "\n".join(csv_data.keys())