about summary refs log tree commit diff
path: root/gn3/db
diff options
context:
space:
mode:
authorBonfaceKilz2022-04-12 12:54:57 +0300
committerBonfaceKilz2022-04-12 13:26:57 +0300
commit789d483fe8877c08a07d0f94cb22e3e33a5888bc (patch)
treed219466330d06719b06c57b87aa5c359dd096896 /gn3/db
parentca8a18f00b06a7c6ca4b022223f381ddaebbf930 (diff)
downloadgenenetwork3-789d483fe8877c08a07d0f94cb22e3e33a5888bc.tar.gz
Strip any newline, tab or carriage-return chars from sample data
* gn3/db/sample_data.py (get_trait_csv_sample_data): Strip out "\n", "\t", or
"\r" from the sample data. See:
<https://issues.genenetwork.org/issues/csv-error-ITP_10001-longevity-data-set.html>
Diffstat (limited to 'gn3/db')
-rw-r--r--gn3/db/sample_data.py8
1 files changed, 6 insertions, 2 deletions
diff --git a/gn3/db/sample_data.py b/gn3/db/sample_data.py
index 9e9d527..3f7e2da 100644
--- a/gn3/db/sample_data.py
+++ b/gn3/db/sample_data.py
@@ -1,6 +1,7 @@
 """Module containing functions that work with sample data"""
 from typing import Any, Tuple, Dict, Callable
 
+import re
 import collections
 import MySQLdb
 
@@ -90,10 +91,13 @@ def get_trait_csv_sample_data(
             if data[1] == "x":
                 csv_data[data[0]] = None
             else:
-                sample, case_attr, value = data[0], data[1], data[2]
+                sample, case_attr, value = [
+                    re.sub(r"(\\n|\\r|\\t|\\)", "", x).strip()
+                    for x in [data[0], data[1], data[2]]
+                ]
                 if not csv_data.get(sample):
                     csv_data[sample] = {}
-                csv_data[sample][case_attr] = None if value == "x" else value
+                csv_data[sample][case_attr] = value
                 case_attr_columns.add(case_attr)
         if not case_attr_columns:
             return "Strain Name,Value,SE,Count\n" + "\n".join(csv_data.keys())