From d5afeeca70445c4e57bb8dd942ee3f9165fbe104 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Mon, 28 Aug 2023 13:59:16 +0300 Subject: Update how datasets are dumped Signed-off-by: Munyoki Kilyungi --- examples/dataset-metadata.scm | 86 ++++++++++++++++++++----------------------- 1 file changed, 39 insertions(+), 47 deletions(-) diff --git a/examples/dataset-metadata.scm b/examples/dataset-metadata.scm index 0b869b9..56280a7 100755 --- a/examples/dataset-metadata.scm +++ b/examples/dataset-metadata.scm @@ -104,52 +104,42 @@ (left-join GeneChip "USING (GeneChipId)")) "WHERE GN_AccesionId IS NOT NULL") (schema-triples - (gnc:dataset rdf:type dcat:Dataset) - (gn:datasetTitle a rdfs:Datatype) - (gn:datasetTitle rdfs:comment "The Dataset's Title") - (gn:datasetTitle owl:onDatatype xsd:string) - (gn:publicationTitle a rdfs:Datatype) - (gn:publicationTitle rdfs:comment "The Dataset's Publication Title") - (gn:publicationTitle owl:onDatatype xsd:string) - (gnc:genotypeDataset rdfs:subClassOf gnc:dataset) - (gnc:phenotypeDataset rdfs:subClassOf gnc:dataset) - (gnc:probesetDataset rdfs:subClassOf gnc:dataset) - (gnt:hasTissue rdfs:domain gnc:dataset) + (gnt:hasTissue rdfs:domain dcat:Dataset) (gnt:hasTissue a owl:ObjectProperty) (gnt:hasTissue skos:definition "Tissues this resource has") - (gnt:hasTissueInfo rdfs:domain gnc:dataset) + (gnt:hasTissueInfo rdfs:domain dcat:Dataset) (gnt:hasTissueInfo a owl:ObjectProperty) (gnt:hasTissueInfo skos:definition "Metadata about Tissue for this resource") - (gnt:usesNormalization rdfs:domain gnc:dataset) + (gnt:usesNormalization rdfs:domain dcat:Dataset) (gnt:usesNormalization a owl:ObjectProperty) (gnt:usesNormalization skos:definition "Normalization techniques this resource has") - (gnt:usesPlatform rdfs:domain gnc:dataset) + (gnt:usesPlatform rdfs:domain dcat:Dataset) (gnt:usesPlatform a owl:ObjectProperty) (gnt:usesPlatform skos:definition "The Platform this resource uses") - (gnt:hasGeoSeriesId rdfs:domain gnc:dataset) + (gnt:hasGeoSeriesId rdfs:domain dcat:Dataset) (gnt:hasGeoSeriesId a owl:ObjectProperty) (gnt:hasGeoSeriesId skos:definition "id of record in NCBI database") - (gnt:hasExperimentDesignInfo rdfs:domain gnc:dataset) + (gnt:hasExperimentDesignInfo rdfs:domain dcat:Dataset) (gnt:hasExperimentDesignInfo rdfs:label "Experiment Design") (gnt:hasExperimentDesignInfo a owl:ObjectProperty) (gnt:hasExperimentDesignInfo skos:definition "Information about how the experiment was designed") - (gnt:hasNotes rdfs:domain gnc:dataset) + (gnt:hasNotes rdfs:domain dcat:Dataset) (gnt:hasNotes a owl:ObjectProperty) (gnt:hasNotes rdfs:label "Notes") (gnt:hasNotes skos:definition "Extra Notes about this dataset") - (gnt:hasDataProcessingInfo rdfs:domain gnc:dataset) + (gnt:hasDataProcessingInfo rdfs:domain dcat:Dataset) (gnt:hasDataProcessingInfo rdfs:label "About Data Processing") (gnt:hasDataProcessingInfo a owl:ObjectProperty) (gnt:hasDataProcessingInfo skos:definition "Information about how this dataset was processed") - (gnt:hasPlatformInfo rdfs:domain gnc:dataset) + (gnt:hasPlatformInfo rdfs:domain dcat:Dataset) (gnt:hasPlatformInfo a owl:ObjectProperty) (gnt:hasPlatformInfo rdfs:label "About Platfoorm") (gnt:hasPlatformInfo skos:definition "Information about the platform that was used with this dataset") - (gnt:hasCaseInfo rdfs:domain gnc:dataset) + (gnt:hasCaseInfo rdfs:domain dcat:Dataset) (gnt:hasCaseInfo rdfs:label "About Case") (gnt:hasCaseInfo a owl:ObjectProperty) (gnt:hasCaseInfo skos:definition "Information about the cases used in this platform") - (gnt:hasAcknowledgement rdfs:domain gnc:dataset) + (gnt:hasAcknowledgement rdfs:domain dcat:Dataset) (gnt:hasAcknowledgement rdfs:label "Acknowledgement") (gnt:hasAcknowledgement a owl:ObjectProperty) (gnt:hasAcknowledgement skos:definition "People to acknowledge")) @@ -159,9 +149,16 @@ 'pre "_" 'post) #:separator "" #:proc string-capitalize-first) - (set rdf:type (string->symbol - (field ("IF(GenoFreeze.Id IS NOT NULL, 'gnc:genotypeDataset', IF(PublishFreeze.Id IS NOT NULL, 'gnc:phenotypeDataset', IF(ProbeSetFreeze.Name IS NOT NULL, 'gnc:probesetDataset', 'gnc:dataset')))" - rdfType)))) + (set rdf:type 'dcat:Dataset) + (set xkos:classifiedUnder + (let ([dataset-type + (string-trim-both + (field ("IF(GenoFreeze.Id IS NOT NULL, 'gnc:Genotype', IF(PublishFreeze.Id IS NOT NULL, 'gnc:Phenotype', IF(ProbeSetFreeze.Name IS NOT NULL, 'gnc:Probeset', '')))" + DatasetType)))]) + (if (not (string-null? dataset-type)) + (string->symbol + dataset-type) + ""))) (set rdfs:label (regexp-substitute/global #f "^[Nn]one$" (field InfoFiles InfoPageName) @@ -169,26 +166,20 @@ (set skos:prefLabel (field ("IFNULL(GenoFreeze.FullName, IFNULL(PublishFreeze.FullName, ''))" DatasetFullName))) - (set skos:prefLabel (field Datasets DatasetName DatasetGroup)) + (set skos:altLabel (field Datasets DatasetName DatasetGroup)) (set dct:title - (annotate-field - (regexp-substitute/global - #f "^[Nn]one$" + (regexp-substitute/global + #f "^[Nn]one$" + (or + (regexp-substitute/global + #f "^Unpublished$" (field Datasets PublicationTitle) "") (field InfoFiles InfoFileTitle) - "")) - '^^gn:datasetTitle) - ;; This is the published title - (set dct:title - (annotate-field - (regexp-substitute/global - #f "^[Nn]one$" - (field Datasets PublicationTitle) - "")) - '^^gn:publicationTitle) + "") + "")) (set dct:created (field ("IFNULL(GenoFreeze.CreateTime, IFNULL(PublishFreeze.CreateTime, IFNULL(ProbeSetFreeze.CreateTime, '')))" createTimeGenoFreeze))) - (set dcat:contacttPoint + (set dcat:contactPoint (investigator-attributes->id (field Investigators FirstName) (field Investigators LastName) (field Investigators Email))) @@ -196,7 +187,7 @@ (field Organizations OrganizationName)) (set dct:identifier (format #f "GN~a" (field InfoFiles GN_AccesionId))) (set dct:accessRights (string-downcase - (field DatasetStatus DatasetStatusName))) + (field DatasetStatus DatasetStatusName))) (set xkos:classifiedUnder (string->identifier "set" (field InbredSet Name) @@ -205,7 +196,7 @@ (set gnt:hasTissue (string->identifier "tissue" (field Tissue Short_Name))) (set gnt:usesNormalization - (string->identifier "avgmethod" + (string->identifier "avgMethod" ;; If AvgMethodName is NULL, assume N/A. (if (string-blank? (field AvgMethod Name AvgMethodName)) "N/A" (field AvgMethod Name AvgMethodName)))) @@ -265,9 +256,9 @@ 'pre "_" 'post) #:separator "" #:proc string-capitalize-first) - (set rdf:type 'gnc:phenotypeDataset) + (set xkos:classifiedUnder 'gnc:Phenotype) + (set dct:title (field PublishFreeze FullName)) (set rdfs:label (field PublishFreeze Name)) - (set skos:prefLabel (field PublishFreeze FullName)) (set skos:altLabel (field PublishFreeze ShortName)) (set dct:created (annotate-field (field PublishFreeze CreateTime) @@ -295,9 +286,9 @@ 'pre "_" 'post) #:separator "" #:proc string-capitalize-first) - (set rdf:type 'gnc:genotypeDataset) + (set xkos:classifiedUnder 'gnc:Genotype) (set rdfs:label (field GenoFreeze Name)) - (set skos:prefLabel (field GenoFreeze FullName)) + (set dct:title (field GenoFreeze FullName)) (set skos:altLabel (field GenoFreeze ShortName)) (set dct:created (annotate-field (field GenoFreeze CreateTime) @@ -331,9 +322,9 @@ 'pre "_" 'post) #:separator "" #:proc string-capitalize-first) - (set rdf:type 'gnc:probesetDataset) + (set xkos:classifiedUnder 'gnc:Probeset) (set gnt:usesNormalization - (string->identifier "avgmethod" + (string->identifier "avgMethod" ;; If AvgMethodName is NULL, assume N/A. (if (string-blank? (field AvgMethod Name AvgMethodName)) "N/A" (field AvgMethod Name AvgMethodName)))) @@ -375,6 +366,7 @@ (prefixes '(("v:" "") ("foaf:" "") + ("xsd:" "") ("dcat:" "") ("skos:" "") ("xkos:" "") -- cgit v1.2.3