diff options
author | BonfaceKilz | 2022-04-12 12:54:57 +0300 |
---|---|---|
committer | BonfaceKilz | 2022-04-12 13:26:57 +0300 |
commit | 789d483fe8877c08a07d0f94cb22e3e33a5888bc (patch) | |
tree | d219466330d06719b06c57b87aa5c359dd096896 | |
parent | ca8a18f00b06a7c6ca4b022223f381ddaebbf930 (diff) | |
download | genenetwork3-789d483fe8877c08a07d0f94cb22e3e33a5888bc.tar.gz |
Strip any newline, tab or carriage-return chars from sample data
* gn3/db/sample_data.py (get_trait_csv_sample_data): Strip out "\n", "\t", or
"\r" from the sample data. See:
<https://issues.genenetwork.org/issues/csv-error-ITP_10001-longevity-data-set.html>
-rw-r--r-- | gn3/db/sample_data.py | 8 |
1 files changed, 6 insertions, 2 deletions
diff --git a/gn3/db/sample_data.py b/gn3/db/sample_data.py index 9e9d527..3f7e2da 100644 --- a/gn3/db/sample_data.py +++ b/gn3/db/sample_data.py @@ -1,6 +1,7 @@ """Module containing functions that work with sample data""" from typing import Any, Tuple, Dict, Callable +import re import collections import MySQLdb @@ -90,10 +91,13 @@ def get_trait_csv_sample_data( if data[1] == "x": csv_data[data[0]] = None else: - sample, case_attr, value = data[0], data[1], data[2] + sample, case_attr, value = [ + re.sub(r"(\\n|\\r|\\t|\\)", "", x).strip() + for x in [data[0], data[1], data[2]] + ] if not csv_data.get(sample): csv_data[sample] = {} - csv_data[sample][case_attr] = None if value == "x" else value + csv_data[sample][case_attr] = value case_attr_columns.add(case_attr) if not case_attr_columns: return "Strain Name,Value,SE,Count\n" + "\n".join(csv_data.keys()) |