From f3ede362e1d7d00022a6f9f74d7ca304014f07fe Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Mon, 28 Aug 2023 16:00:16 +0300 Subject: Set string->identifier to defaut to "" sep and string-capitalize Signed-off-by: Munyoki Kilyungi --- examples/classification.scm | 16 ++++------------ examples/dataset-metadata.scm | 36 +++++++++--------------------------- examples/strains.scm | 10 +++------- transform/triples.scm | 11 ++++++----- 4 files changed, 22 insertions(+), 51 deletions(-) diff --git a/examples/classification.scm b/examples/classification.scm index 5d6840b..3843c49 100755 --- a/examples/classification.scm +++ b/examples/classification.scm @@ -47,9 +47,7 @@ (gnc:Species xkos:depth "3") (gnc:Species xkos:specializes gnc:Set)) (triples "gnc:Species" - (set skos:member (string->identifier "" (remap-species-identifiers (field Species Fullname)) - #:separator "" - #:proc string-capitalize-first)))) + (set skos:member (string->identifier "" (remap-species-identifiers (field Species Fullname)))))) (define-transformer classification-scheme-set (tables (InbredSet)) @@ -61,9 +59,7 @@ (triples "gnc:Set" (set skos:member (string->identifier - "set" (field InbredSet Name) - #:separator "" - #:proc string-capitalize-first)))) + "set" (field InbredSet Name))))) (define-transformer species (tables (Species)) @@ -75,9 +71,7 @@ (gnt:shortName rdfs:domain gnc:Species) (gnt:shortName skos:definition "The short name of a given resource")) (triples - (string->identifier "" (remap-species-identifiers (field Species Fullname)) - #:separator "" - #:proc string-capitalize-first) + (string->identifier "" (remap-species-identifiers (field Species Fullname))) (set skos:inScheme 'gnc:ResourceClassificationScheme) (set rdfs:label (remap-species-identifiers (field Species Fullname))) (set skos:prefLabel (field Species MenuName)) @@ -103,9 +97,7 @@ (gnt:mappingMethod a owl:ObjectProperty) (gnt:mappingMethod rdfs:domain gnc:set)) (triples (string->identifier - "set" (field InbredSet Name) - #:separator "" - #:proc string-capitalize-first) + "set" (field InbredSet Name)) (set skos:inScheme 'gnc:ResourceClassificationScheme) (set rdfs:label (field InbredSet FullName)) (set skos:prefLabel (field InbredSet Name)) diff --git a/examples/dataset-metadata.scm b/examples/dataset-metadata.scm index 56280a7..e6ef350 100755 --- a/examples/dataset-metadata.scm +++ b/examples/dataset-metadata.scm @@ -81,9 +81,7 @@ Title))) (set gnt:hasGOTreeValue (field GeneChip Go_tree_value)) (set xkos:classifiedUnder - (string->identifier "" (remap-species-identifiers (field Species Fullname)) - #:separator "" - #:proc string-capitalize-first)) + (string->identifier "" (remap-species-identifiers (field Species Fullname)))) (set gnt:hasGeoSeriesId (ontology 'geoSeries: (string-trim-both (field GeneChip GeoPlatform)))))) @@ -146,9 +144,7 @@ (triples (string->identifier "" (regexp-substitute/global #f "[^A-Za-z0-9:]" (field InfoFiles InfoPageName) - 'pre "_" 'post) - #:separator "" - #:proc string-capitalize-first) + 'pre "_" 'post)) (set rdf:type 'dcat:Dataset) (set xkos:classifiedUnder (let ([dataset-type @@ -190,9 +186,7 @@ (field DatasetStatus DatasetStatusName))) (set xkos:classifiedUnder (string->identifier - "set" (field InbredSet Name) - #:separator "" - #:proc string-capitalize-first)) + "set" (field InbredSet Name))) (set gnt:hasTissue (string->identifier "tissue" (field Tissue Short_Name))) (set gnt:usesNormalization @@ -253,9 +247,7 @@ "" (regexp-substitute/global #f "[^A-Za-z0-9:]" (field PublishFreeze Name) - 'pre "_" 'post) - #:separator "" - #:proc string-capitalize-first) + 'pre "_" 'post)) (set xkos:classifiedUnder 'gnc:Phenotype) (set dct:title (field PublishFreeze FullName)) (set rdfs:label (field PublishFreeze Name)) @@ -265,9 +257,7 @@ '^^xsd:date)) (set xkos:classifiedUnder (string->identifier - "set" (field InbredSet Name) - #:separator "" - #:proc string-capitalize-first)))) + "set" (field InbredSet Name))))) (define-transformer genofreeze (tables (GenoFreeze @@ -283,9 +273,7 @@ #f "[^A-Za-z0-9:]" (field GenoFreeze Name) 'pre "_" 'post) - 'pre "_" 'post) - #:separator "" - #:proc string-capitalize-first) + 'pre "_" 'post)) (set xkos:classifiedUnder 'gnc:Genotype) (set rdfs:label (field GenoFreeze Name)) (set dct:title (field GenoFreeze FullName)) @@ -295,9 +283,7 @@ '^^xsd:date)) (set xkos:classifiedUnder (string->identifier - "set" (field InbredSet Name) - #:separator "" - #:proc string-capitalize-first)))) + "set" (field InbredSet Name))))) ;; Molecular Traits are also referred to as ProbeSets (define-transformer probesetfreeze @@ -319,9 +305,7 @@ (regexp-substitute/global #f "[^A-Za-z0-9:]" (field ProbeSetFreeze Name) - 'pre "_" 'post) - #:separator "" - #:proc string-capitalize-first) + 'pre "_" 'post)) (set xkos:classifiedUnder 'gnc:Probeset) (set gnt:usesNormalization (string->identifier "avgMethod" @@ -342,9 +326,7 @@ (field Tissue Short_Name))) (set xkos:classifiedUnder (string->identifier - "set" (field InbredSet Name) - #:separator "" - #:proc string-capitalize-first)))) + "set" (field InbredSet Name))))) diff --git a/examples/strains.scm b/examples/strains.scm index 44a0e87..5ef6f03 100755 --- a/examples/strains.scm +++ b/examples/strains.scm @@ -76,14 +76,10 @@ At this point it is not very clear how Name, Name2, Symbol and Alias are used. (regexp-substitute/global #f "[^A-Za-z0-9:]" (field Strain Name) - 'pre "_" 'post) - #:separator "" - #:proc string-capitalize-first) + 'pre "_" 'post)) (set rdf:type 'gnc:strain) (set xkos:classifiedUnder - (string->identifier "" (remap-species-identifiers (field Species Fullname)) - #:separator "" - #:proc string-capitalize-first)) + (string->identifier "" (remap-species-identifiers (field Species Fullname)))) ;; Name, and maybe a second name (set rdfs:label (sanitize-rdf-string (field Strain Name))) (set skos:altLabel (sanitize-rdf-string (field ("IF ((Strain.Name2 != Strain.Name), Strain.Name2, '')" Name2)))) @@ -140,7 +136,7 @@ At this point it is not very clear how Name, Name2, Symbol and Alias are used. ("rdfs:" "") ("taxon:" ""))) (inputs - (list inbred-set species strain mapping-method avg-method)) + (list strain mapping-method avg-method)) (outputs `(#:documentation ,documentation #:rdf ,output)))) diff --git a/transform/triples.scm b/transform/triples.scm index 926b8a2..9775d36 100644 --- a/transform/triples.scm +++ b/transform/triples.scm @@ -23,11 +23,12 @@ (string->symbol (format #f "~s~a" string-field schema))))) -(define* (string->identifier prefix str - #:optional #:key - (ontology "gn:") - (separator "_") - (proc string-downcase)) +(define* (string->identifier + prefix str + #:optional #:key + (ontology "gn:") + (separator "") + (proc string-capitalize-first)) "Convert STR to a turtle identifier after replacing illegal characters with an underscore and prefixing with gn:PREFIX." (if (or (and (string? str) (string-null? str)) -- cgit v1.2.3