aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMunyoki Kilyungi2023-08-28 13:59:16 +0300
committerMunyoki Kilyungi2023-08-28 14:29:25 +0300
commitd5afeeca70445c4e57bb8dd942ee3f9165fbe104 (patch)
treea0bc5bd8439f96e6f3ab46b0ef959e20e76a028a
parent79bc1d3701210a13eaa9d939c3f730898d504b26 (diff)
downloadgn-transform-databases-d5afeeca70445c4e57bb8dd942ee3f9165fbe104.tar.gz
Update how datasets are dumped
Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
-rwxr-xr-xexamples/dataset-metadata.scm86
1 files changed, 39 insertions, 47 deletions
diff --git a/examples/dataset-metadata.scm b/examples/dataset-metadata.scm
index 0b869b9..56280a7 100755
--- a/examples/dataset-metadata.scm
+++ b/examples/dataset-metadata.scm
@@ -104,52 +104,42 @@
(left-join GeneChip "USING (GeneChipId)"))
"WHERE GN_AccesionId IS NOT NULL")
(schema-triples
- (gnc:dataset rdf:type dcat:Dataset)
- (gn:datasetTitle a rdfs:Datatype)
- (gn:datasetTitle rdfs:comment "The Dataset's Title")
- (gn:datasetTitle owl:onDatatype xsd:string)
- (gn:publicationTitle a rdfs:Datatype)
- (gn:publicationTitle rdfs:comment "The Dataset's Publication Title")
- (gn:publicationTitle owl:onDatatype xsd:string)
- (gnc:genotypeDataset rdfs:subClassOf gnc:dataset)
- (gnc:phenotypeDataset rdfs:subClassOf gnc:dataset)
- (gnc:probesetDataset rdfs:subClassOf gnc:dataset)
- (gnt:hasTissue rdfs:domain gnc:dataset)
+ (gnt:hasTissue rdfs:domain dcat:Dataset)
(gnt:hasTissue a owl:ObjectProperty)
(gnt:hasTissue skos:definition "Tissues this resource has")
- (gnt:hasTissueInfo rdfs:domain gnc:dataset)
+ (gnt:hasTissueInfo rdfs:domain dcat:Dataset)
(gnt:hasTissueInfo a owl:ObjectProperty)
(gnt:hasTissueInfo skos:definition "Metadata about Tissue for this resource")
- (gnt:usesNormalization rdfs:domain gnc:dataset)
+ (gnt:usesNormalization rdfs:domain dcat:Dataset)
(gnt:usesNormalization a owl:ObjectProperty)
(gnt:usesNormalization skos:definition "Normalization techniques this resource has")
- (gnt:usesPlatform rdfs:domain gnc:dataset)
+ (gnt:usesPlatform rdfs:domain dcat:Dataset)
(gnt:usesPlatform a owl:ObjectProperty)
(gnt:usesPlatform skos:definition "The Platform this resource uses")
- (gnt:hasGeoSeriesId rdfs:domain gnc:dataset)
+ (gnt:hasGeoSeriesId rdfs:domain dcat:Dataset)
(gnt:hasGeoSeriesId a owl:ObjectProperty)
(gnt:hasGeoSeriesId skos:definition "id of record in NCBI database")
- (gnt:hasExperimentDesignInfo rdfs:domain gnc:dataset)
+ (gnt:hasExperimentDesignInfo rdfs:domain dcat:Dataset)
(gnt:hasExperimentDesignInfo rdfs:label "Experiment Design")
(gnt:hasExperimentDesignInfo a owl:ObjectProperty)
(gnt:hasExperimentDesignInfo skos:definition "Information about how the experiment was designed")
- (gnt:hasNotes rdfs:domain gnc:dataset)
+ (gnt:hasNotes rdfs:domain dcat:Dataset)
(gnt:hasNotes a owl:ObjectProperty)
(gnt:hasNotes rdfs:label "Notes")
(gnt:hasNotes skos:definition "Extra Notes about this dataset")
- (gnt:hasDataProcessingInfo rdfs:domain gnc:dataset)
+ (gnt:hasDataProcessingInfo rdfs:domain dcat:Dataset)
(gnt:hasDataProcessingInfo rdfs:label "About Data Processing")
(gnt:hasDataProcessingInfo a owl:ObjectProperty)
(gnt:hasDataProcessingInfo skos:definition "Information about how this dataset was processed")
- (gnt:hasPlatformInfo rdfs:domain gnc:dataset)
+ (gnt:hasPlatformInfo rdfs:domain dcat:Dataset)
(gnt:hasPlatformInfo a owl:ObjectProperty)
(gnt:hasPlatformInfo rdfs:label "About Platfoorm")
(gnt:hasPlatformInfo skos:definition "Information about the platform that was used with this dataset")
- (gnt:hasCaseInfo rdfs:domain gnc:dataset)
+ (gnt:hasCaseInfo rdfs:domain dcat:Dataset)
(gnt:hasCaseInfo rdfs:label "About Case")
(gnt:hasCaseInfo a owl:ObjectProperty)
(gnt:hasCaseInfo skos:definition "Information about the cases used in this platform")
- (gnt:hasAcknowledgement rdfs:domain gnc:dataset)
+ (gnt:hasAcknowledgement rdfs:domain dcat:Dataset)
(gnt:hasAcknowledgement rdfs:label "Acknowledgement")
(gnt:hasAcknowledgement a owl:ObjectProperty)
(gnt:hasAcknowledgement skos:definition "People to acknowledge"))
@@ -159,9 +149,16 @@
'pre "_" 'post)
#:separator ""
#:proc string-capitalize-first)
- (set rdf:type (string->symbol
- (field ("IF(GenoFreeze.Id IS NOT NULL, 'gnc:genotypeDataset', IF(PublishFreeze.Id IS NOT NULL, 'gnc:phenotypeDataset', IF(ProbeSetFreeze.Name IS NOT NULL, 'gnc:probesetDataset', 'gnc:dataset')))"
- rdfType))))
+ (set rdf:type 'dcat:Dataset)
+ (set xkos:classifiedUnder
+ (let ([dataset-type
+ (string-trim-both
+ (field ("IF(GenoFreeze.Id IS NOT NULL, 'gnc:Genotype', IF(PublishFreeze.Id IS NOT NULL, 'gnc:Phenotype', IF(ProbeSetFreeze.Name IS NOT NULL, 'gnc:Probeset', '')))"
+ DatasetType)))])
+ (if (not (string-null? dataset-type))
+ (string->symbol
+ dataset-type)
+ "")))
(set rdfs:label (regexp-substitute/global
#f "^[Nn]one$"
(field InfoFiles InfoPageName)
@@ -169,26 +166,20 @@
(set skos:prefLabel
(field ("IFNULL(GenoFreeze.FullName, IFNULL(PublishFreeze.FullName, ''))"
DatasetFullName)))
- (set skos:prefLabel (field Datasets DatasetName DatasetGroup))
+ (set skos:altLabel (field Datasets DatasetName DatasetGroup))
(set dct:title
- (annotate-field
- (regexp-substitute/global
- #f "^[Nn]one$"
+ (regexp-substitute/global
+ #f "^[Nn]one$"
+ (or
+ (regexp-substitute/global
+ #f "^Unpublished$" (field Datasets PublicationTitle) "")
(field InfoFiles InfoFileTitle)
- ""))
- '^^gn:datasetTitle)
- ;; This is the published title
- (set dct:title
- (annotate-field
- (regexp-substitute/global
- #f "^[Nn]one$"
- (field Datasets PublicationTitle)
- ""))
- '^^gn:publicationTitle)
+ "")
+ ""))
(set dct:created
(field ("IFNULL(GenoFreeze.CreateTime, IFNULL(PublishFreeze.CreateTime, IFNULL(ProbeSetFreeze.CreateTime, '')))"
createTimeGenoFreeze)))
- (set dcat:contacttPoint
+ (set dcat:contactPoint
(investigator-attributes->id (field Investigators FirstName)
(field Investigators LastName)
(field Investigators Email)))
@@ -196,7 +187,7 @@
(field Organizations OrganizationName))
(set dct:identifier (format #f "GN~a" (field InfoFiles GN_AccesionId)))
(set dct:accessRights (string-downcase
- (field DatasetStatus DatasetStatusName)))
+ (field DatasetStatus DatasetStatusName)))
(set xkos:classifiedUnder
(string->identifier
"set" (field InbredSet Name)
@@ -205,7 +196,7 @@
(set gnt:hasTissue (string->identifier "tissue"
(field Tissue Short_Name)))
(set gnt:usesNormalization
- (string->identifier "avgmethod"
+ (string->identifier "avgMethod"
;; If AvgMethodName is NULL, assume N/A.
(if (string-blank? (field AvgMethod Name AvgMethodName))
"N/A" (field AvgMethod Name AvgMethodName))))
@@ -265,9 +256,9 @@
'pre "_" 'post)
#:separator ""
#:proc string-capitalize-first)
- (set rdf:type 'gnc:phenotypeDataset)
+ (set xkos:classifiedUnder 'gnc:Phenotype)
+ (set dct:title (field PublishFreeze FullName))
(set rdfs:label (field PublishFreeze Name))
- (set skos:prefLabel (field PublishFreeze FullName))
(set skos:altLabel (field PublishFreeze ShortName))
(set dct:created (annotate-field
(field PublishFreeze CreateTime)
@@ -295,9 +286,9 @@
'pre "_" 'post)
#:separator ""
#:proc string-capitalize-first)
- (set rdf:type 'gnc:genotypeDataset)
+ (set xkos:classifiedUnder 'gnc:Genotype)
(set rdfs:label (field GenoFreeze Name))
- (set skos:prefLabel (field GenoFreeze FullName))
+ (set dct:title (field GenoFreeze FullName))
(set skos:altLabel (field GenoFreeze ShortName))
(set dct:created (annotate-field
(field GenoFreeze CreateTime)
@@ -331,9 +322,9 @@
'pre "_" 'post)
#:separator ""
#:proc string-capitalize-first)
- (set rdf:type 'gnc:probesetDataset)
+ (set xkos:classifiedUnder 'gnc:Probeset)
(set gnt:usesNormalization
- (string->identifier "avgmethod"
+ (string->identifier "avgMethod"
;; If AvgMethodName is NULL, assume N/A.
(if (string-blank? (field AvgMethod Name AvgMethodName))
"N/A" (field AvgMethod Name AvgMethodName))))
@@ -375,6 +366,7 @@
(prefixes
'(("v:" "<http://www.w3.org/2006/vcard/ns#>")
("foaf:" "<http://xmlns.com/foaf/0.1/>")
+ ("xsd:" "<http://www.w3.org/2001/XMLSchema#>")
("dcat:" "<http://www.w3.org/ns/dcat#>")
("skos:" "<http://www.w3.org/2004/02/skos/core#>")
("xkos:" "<http://rdf-vocabulary.ddialliance.org/xkos#>")