aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMunyoki Kilyungi2023-08-28 16:00:16 +0300
committerMunyoki Kilyungi2023-08-28 16:00:16 +0300
commitf3ede362e1d7d00022a6f9f74d7ca304014f07fe (patch)
treebfa89e642378090d3b1411f50c0aeabfa74d260a
parent24226aeafadfeb81a01ae6105f078dd319965887 (diff)
downloadgn-transform-databases-f3ede362e1d7d00022a6f9f74d7ca304014f07fe.tar.gz
Set string->identifier to defaut to "" sep and string-capitalize
Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
-rwxr-xr-xexamples/classification.scm16
-rwxr-xr-xexamples/dataset-metadata.scm36
-rwxr-xr-xexamples/strains.scm10
-rw-r--r--transform/triples.scm11
4 files changed, 22 insertions, 51 deletions
diff --git a/examples/classification.scm b/examples/classification.scm
index 5d6840b..3843c49 100755
--- a/examples/classification.scm
+++ b/examples/classification.scm
@@ -47,9 +47,7 @@
(gnc:Species xkos:depth "3")
(gnc:Species xkos:specializes gnc:Set))
(triples "gnc:Species"
- (set skos:member (string->identifier "" (remap-species-identifiers (field Species Fullname))
- #:separator ""
- #:proc string-capitalize-first))))
+ (set skos:member (string->identifier "" (remap-species-identifiers (field Species Fullname))))))
(define-transformer classification-scheme-set
(tables (InbredSet))
@@ -61,9 +59,7 @@
(triples "gnc:Set"
(set skos:member
(string->identifier
- "set" (field InbredSet Name)
- #:separator ""
- #:proc string-capitalize-first))))
+ "set" (field InbredSet Name)))))
(define-transformer species
(tables (Species))
@@ -75,9 +71,7 @@
(gnt:shortName rdfs:domain gnc:Species)
(gnt:shortName skos:definition "The short name of a given resource"))
(triples
- (string->identifier "" (remap-species-identifiers (field Species Fullname))
- #:separator ""
- #:proc string-capitalize-first)
+ (string->identifier "" (remap-species-identifiers (field Species Fullname)))
(set skos:inScheme 'gnc:ResourceClassificationScheme)
(set rdfs:label (remap-species-identifiers (field Species Fullname)))
(set skos:prefLabel (field Species MenuName))
@@ -103,9 +97,7 @@
(gnt:mappingMethod a owl:ObjectProperty)
(gnt:mappingMethod rdfs:domain gnc:set))
(triples (string->identifier
- "set" (field InbredSet Name)
- #:separator ""
- #:proc string-capitalize-first)
+ "set" (field InbredSet Name))
(set skos:inScheme 'gnc:ResourceClassificationScheme)
(set rdfs:label (field InbredSet FullName))
(set skos:prefLabel (field InbredSet Name))
diff --git a/examples/dataset-metadata.scm b/examples/dataset-metadata.scm
index 56280a7..e6ef350 100755
--- a/examples/dataset-metadata.scm
+++ b/examples/dataset-metadata.scm
@@ -81,9 +81,7 @@
Title)))
(set gnt:hasGOTreeValue (field GeneChip Go_tree_value))
(set xkos:classifiedUnder
- (string->identifier "" (remap-species-identifiers (field Species Fullname))
- #:separator ""
- #:proc string-capitalize-first))
+ (string->identifier "" (remap-species-identifiers (field Species Fullname))))
(set gnt:hasGeoSeriesId
(ontology 'geoSeries:
(string-trim-both (field GeneChip GeoPlatform))))))
@@ -146,9 +144,7 @@
(triples (string->identifier
"" (regexp-substitute/global #f "[^A-Za-z0-9:]"
(field InfoFiles InfoPageName)
- 'pre "_" 'post)
- #:separator ""
- #:proc string-capitalize-first)
+ 'pre "_" 'post))
(set rdf:type 'dcat:Dataset)
(set xkos:classifiedUnder
(let ([dataset-type
@@ -190,9 +186,7 @@
(field DatasetStatus DatasetStatusName)))
(set xkos:classifiedUnder
(string->identifier
- "set" (field InbredSet Name)
- #:separator ""
- #:proc string-capitalize-first))
+ "set" (field InbredSet Name)))
(set gnt:hasTissue (string->identifier "tissue"
(field Tissue Short_Name)))
(set gnt:usesNormalization
@@ -253,9 +247,7 @@
""
(regexp-substitute/global #f "[^A-Za-z0-9:]"
(field PublishFreeze Name)
- 'pre "_" 'post)
- #:separator ""
- #:proc string-capitalize-first)
+ 'pre "_" 'post))
(set xkos:classifiedUnder 'gnc:Phenotype)
(set dct:title (field PublishFreeze FullName))
(set rdfs:label (field PublishFreeze Name))
@@ -265,9 +257,7 @@
'^^xsd:date))
(set xkos:classifiedUnder
(string->identifier
- "set" (field InbredSet Name)
- #:separator ""
- #:proc string-capitalize-first))))
+ "set" (field InbredSet Name)))))
(define-transformer genofreeze
(tables (GenoFreeze
@@ -283,9 +273,7 @@
#f "[^A-Za-z0-9:]"
(field GenoFreeze Name)
'pre "_" 'post)
- 'pre "_" 'post)
- #:separator ""
- #:proc string-capitalize-first)
+ 'pre "_" 'post))
(set xkos:classifiedUnder 'gnc:Genotype)
(set rdfs:label (field GenoFreeze Name))
(set dct:title (field GenoFreeze FullName))
@@ -295,9 +283,7 @@
'^^xsd:date))
(set xkos:classifiedUnder
(string->identifier
- "set" (field InbredSet Name)
- #:separator ""
- #:proc string-capitalize-first))))
+ "set" (field InbredSet Name)))))
;; Molecular Traits are also referred to as ProbeSets
(define-transformer probesetfreeze
@@ -319,9 +305,7 @@
(regexp-substitute/global
#f "[^A-Za-z0-9:]"
(field ProbeSetFreeze Name)
- 'pre "_" 'post)
- #:separator ""
- #:proc string-capitalize-first)
+ 'pre "_" 'post))
(set xkos:classifiedUnder 'gnc:Probeset)
(set gnt:usesNormalization
(string->identifier "avgMethod"
@@ -342,9 +326,7 @@
(field Tissue Short_Name)))
(set xkos:classifiedUnder
(string->identifier
- "set" (field InbredSet Name)
- #:separator ""
- #:proc string-capitalize-first))))
+ "set" (field InbredSet Name)))))
diff --git a/examples/strains.scm b/examples/strains.scm
index 44a0e87..5ef6f03 100755
--- a/examples/strains.scm
+++ b/examples/strains.scm
@@ -76,14 +76,10 @@ At this point it is not very clear how Name, Name2, Symbol and Alias are used.
(regexp-substitute/global
#f "[^A-Za-z0-9:]"
(field Strain Name)
- 'pre "_" 'post)
- #:separator ""
- #:proc string-capitalize-first)
+ 'pre "_" 'post))
(set rdf:type 'gnc:strain)
(set xkos:classifiedUnder
- (string->identifier "" (remap-species-identifiers (field Species Fullname))
- #:separator ""
- #:proc string-capitalize-first))
+ (string->identifier "" (remap-species-identifiers (field Species Fullname))))
;; Name, and maybe a second name
(set rdfs:label (sanitize-rdf-string (field Strain Name)))
(set skos:altLabel (sanitize-rdf-string (field ("IF ((Strain.Name2 != Strain.Name), Strain.Name2, '')" Name2))))
@@ -140,7 +136,7 @@ At this point it is not very clear how Name, Name2, Symbol and Alias are used.
("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>")
("taxon:" "<http://purl.uniprot.org/taxonomy/>")))
(inputs
- (list inbred-set species strain mapping-method avg-method))
+ (list strain mapping-method avg-method))
(outputs
`(#:documentation ,documentation
#:rdf ,output))))
diff --git a/transform/triples.scm b/transform/triples.scm
index 926b8a2..9775d36 100644
--- a/transform/triples.scm
+++ b/transform/triples.scm
@@ -23,11 +23,12 @@
(string->symbol
(format #f "~s~a" string-field schema)))))
-(define* (string->identifier prefix str
- #:optional #:key
- (ontology "gn:")
- (separator "_")
- (proc string-downcase))
+(define* (string->identifier
+ prefix str
+ #:optional #:key
+ (ontology "gn:")
+ (separator "")
+ (proc string-capitalize-first))
"Convert STR to a turtle identifier after replacing illegal
characters with an underscore and prefixing with gn:PREFIX."
(if (or (and (string? str) (string-null? str))