From 87a0616ab9ca1e2a32348e243e96f06a1bb6e204 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Fri, 2 Jun 2023 12:49:59 +0300 Subject: Cast longtext fields to fix broket utf-8 characters Signed-off-by: Munyoki Kilyungi --- examples/dump-dataset-metadata.scm | 39 +++++++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/examples/dump-dataset-metadata.scm b/examples/dump-dataset-metadata.scm index e59e2c4..aad2331 100755 --- a/examples/dump-dataset-metadata.scm +++ b/examples/dump-dataset-metadata.scm @@ -170,14 +170,29 @@ "")) (set gn:specifics (sanitize-rdf-string (field InfoFiles Specifics))) (set gn:datasetGroup (field Datasets DatasetName DatasetGroup)) - (set gn:aboutCases (sanitize-rdf-string (field Datasets AboutCases))) - (set gn:aboutPlatform (sanitize-rdf-string (field Datasets AboutPlatform))) - (set gn:aboutDataProcessing (sanitize-rdf-string - (field Datasets AboutDataProcessing))) - (set gn:notes (sanitize-rdf-string (field Datasets Notes))) - (set gn:experimentDesign (sanitize-rdf-string - (field Datasets ExperimentDesign))) - (set gn:contributors (sanitize-rdf-string (field Datasets Contributors))) + (set gn:aboutCases + (sanitize-rdf-string + (field ("CAST(CONVERT(BINARY CONVERT(Datasets.AboutCases USING latin1) USING utf8) AS VARCHAR(10000))" AboutCases)))) + (set gn:aboutPlatform + (sanitize-rdf-string + (field ("CAST(CONVERT(BINARY CONVERT(Datasets.AboutPlatform USING latin1) USING utf8) AS VARCHAR(1500))" + AboutPlatform)))) + (set gn:aboutDataProcessing + (sanitize-rdf-string + (field ("CAST(CONVERT(BINARY CONVERT(Datasets.AboutProcessing USING latin1) USING utf8) AS VARCHAR(1500))" + AboutProcessing)))) + (set gn:notes + (sanitize-rdf-string + (field ("CAST(CONVERT(BINARY CONVERT(Datasets.Notes USING latin1) USING utf8) AS VARCHAR(1500))" + Notes)))) + (set gn:experimentDesign + (sanitize-rdf-string + (field ("CAST(CONVERT(BINARY CONVERT(Datasets.ExperimentDesign USING latin1) USING utf8) AS VARCHAR(1500))" + ExperimentDesign)))) + (set gn:contributors + (sanitize-rdf-string + (field ("CAST(CONVERT(BINARY CONVERT(Datasets.Contributors USING latin1) USING utf8) AS VARCHAR(1500))" + Contributors)))) (set gn:citation (sanitize-rdf-string (regexp-substitute/global @@ -189,9 +204,11 @@ (set gn:dataSourceAcknowledgment (sanitize-rdf-string (string-trim-both - (regexp-substitute/global #f "^[Nn]one$" - (field InfoFiles Data_Source_Acknowledge) - "")))) + (regexp-substitute/global + #f "^[Nn]one$" + (field ("CAST(CONVERT(BINARY CONVERT(InfoFiles.Data_Source_Acknowledge USING latin1) USING utf8) AS VARCHAR(1500))" + Data_Source_Acknowledge)) + "")))) (set gn:acknowledgment (sanitize-rdf-string (field Datasets Acknowledgment))))) -- cgit v1.2.3