aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xexamples/dump-dataset-metadata.scm182
1 files changed, 92 insertions, 90 deletions
diff --git a/examples/dump-dataset-metadata.scm b/examples/dump-dataset-metadata.scm
index 53c381c..c51364a 100755
--- a/examples/dump-dataset-metadata.scm
+++ b/examples/dump-dataset-metadata.scm
@@ -52,11 +52,11 @@
(foaf:givenName rdfs:range rdfs:Literal)
(foaf:familyName rdfs:range rdfs:Literal)
(foaf:homepage rdfs:range rdfs:Literal)
- (gn:address rdfs:range rdfs:Literal)
- (gn:city rdfs:range rdfs:Literal)
- (gn:state rdfs:range rdfs:Literal)
- (gn:zipCode rdfs:range rdfs:Literal)
- (gn:country rdfs:range rdfs:Literal))
+ (gn-term:address rdfs:range rdfs:Literal)
+ (gn-term:city rdfs:range rdfs:Literal)
+ (gn-term:state rdfs:range rdfs:Literal)
+ (gn-term:zipCode rdfs:range rdfs:Literal)
+ (gn-term:country rdfs:range rdfs:Literal))
(triples (investigator-attributes->id (field Investigators FirstName)
(field Investigators LastName)
(field Investigators Email))
@@ -70,11 +70,11 @@
(set foaf:familyName
(field ("CAST(CONVERT(BINARY CONVERT(LastName USING latin1) USING utf8) AS VARCHAR(100))" LastName)))
(set foaf:homepage (field Investigators Url))
- (set gn:address (field Investigators Address))
- (set gn:city (field Investigators City))
- (set gn:state (field Investigators State))
- (set gn:zipCode (field Investigators ZipCode))
- (set gn:country (field Investigators Country))))
+ (set gn-term:address (field Investigators Address))
+ (set gn-term:city (field Investigators City))
+ (set gn-term:state (field Investigators State))
+ (set gn-term:zipCode (field Investigators ZipCode))
+ (set gn-term:country (field Investigators Country))))
(define-dump dump-info-files
(tables (InfoFiles
@@ -92,123 +92,125 @@
(left-join GeneChip "USING (GeneChipId)"))
"WHERE GN_AccesionId IS NOT NULL")
(schema-triples
- (gn:dataset rdfs:range rdfs:Literal)
- (gn:datasetOfInvestigator rdfs:domain gn:dataset)
- (gn:datasetOfOrganization rdfs:domain gn:dataset)
- (gn:datasetOfInvestigator rdfs:range foaf:Person)
- (gn:datasetOfInbredSet rdfs:domain gn:dataset)
- (gn:datasetOfInbredSet rdfs:range gn:inbredSet)
- (gn:datasetOfSpecies rdfs:domain gn:dataset)
- (gn:datasetOfSpecies rdfs:range gn:inbredSet)
- (gn:datasetOfTissue rdfs:domain gn:dataset)
- (gn:datasetOfTissue rdfs:range gn:tissue)
- (gn:normalization rdfs:domain gn:dataset)
- (gn:normalization rdfs:range gn:avgMethod)
- (gn:datasetOfPlatform rdfs:domain gn:dataset)
- (gn:datasetOfPlatform rdfs:range gn:geneChip)
- (gn:accessionId rdfs:range rdfs:Literal)
- (gn:datasetStatusName rdfs:range rdfs:Literal)
- (gn:summary rdfs:range rdfs:Literal)
- (gn:aboutTissue rdfs:range rdfs:Literal)
- (gn:geoSeries rdfs:range rdfs:Literal)
- (gn:name rdfs:range rdfs:Literal)
- (gn:title rdfs:range rdfs:Literal)
- (gn:publicationTitle rdfs:range rdfs:Literal)
- (gn:specifics rdfs:range rdfs:Literal)
- (gn:datasetGroup rdfs:range rdfs:Literal)
- (gn:aboutCases rdfs:range rdfs:Literal)
- (gn:aboutPlatform rdfs:range rdfs:Literal)
- (gn:aboutDataProcessing rdfs:range rdfs:Literal)
- (gn:notes rdfs:range rdfs:Literal)
- (gn:experimentDesign rdfs:range rdfs:Literal)
- (gn:contributors rdfs:range rdfs:Literal)
- (gn:citation rdfs:range rdfs:Literal)
- (gn:acknowledgment rdfs:range rdfs:Literal))
- (triples (ontology 'dataset:
- (regexp-substitute/global #f "[^A-Za-z0-9:]"
- (field InfoFiles InfoPageName)
- 'pre "_" 'post))
+ (gn-term:dataset rdfs:range rdfs:Literal)
+ (gn-term:datasetOfInvestigator rdfs:domain gn:dataset)
+ (gn-term:datasetOfOrganization rdfs:domain gn:dataset)
+ (gn-term:datasetOfInvestigator rdfs:range foaf:Person)
+ (gn-term:datasetOfInbredSet rdfs:domain gn:dataset)
+ (gn-term:datasetOfInbredSet rdfs:range gn:inbredSet)
+ (gn-term:datasetOfSpecies rdfs:domain gn:dataset)
+ (gn-term:datasetOfSpecies rdfs:range gn:inbredSet)
+ (gn-term:datasetOfTissue rdfs:domain gn:dataset)
+ (gn-term:datasetOfTissue rdfs:range gn:tissue)
+ (gn-term:normalization rdfs:domain gn:dataset)
+ (gn-term:normalization rdfs:range gn:avgMethod)
+ (gn-term:datasetOfPlatform rdfs:domain gn:dataset)
+ (gn-term:datasetOfPlatform rdfs:range gn:geneChip)
+ (gn-term:accessionId rdfs:range rdfs:Literal)
+ (gn-term:datasetStatusName rdfs:range rdfs:Literal)
+ (gn-term:summary rdfs:range rdfs:Literal)
+ (gn-term:aboutTissue rdfs:range rdfs:Literal)
+ (gn-term:geoSeries rdfs:range rdfs:Literal)
+ (gn-term:name rdfs:range rdfs:Literal)
+ (gn-term:title rdfs:range rdfs:Literal)
+ (gn-term:publicationTitle rdfs:range rdfs:Literal)
+ (gn-term:specifics rdfs:range rdfs:Literal)
+ (gn-term:datasetGroup rdfs:range rdfs:Literal)
+ (gn-term:aboutCases rdfs:range rdfs:Literal)
+ (gn-term:aboutPlatform rdfs:range rdfs:Literal)
+ (gn-term:aboutDataProcessing rdfs:range rdfs:Literal)
+ (gn-term:notes rdfs:range rdfs:Literal)
+ (gn-term:experimentDesign rdfs:range rdfs:Literal)
+ (gn-term:contributors rdfs:range rdfs:Literal)
+ (gn-term:citation rdfs:range rdfs:Literal)
+ (gn-term:acknowledgment rdfs:range rdfs:Literal))
+ (triples (string->identifier
+ "" (regexp-substitute/global #f "[^A-Za-z0-9:]"
+ (field InfoFiles InfoPageName)
+ 'pre "_" 'post)
+ #:separator ""
+ #:proc string-capitalize-first)
(set rdf:type (string->symbol
(field ("IF(GenoFreeze.Id IS NOT NULL, 'gn:genotypeDataset', IF(PublishFreeze.Id IS NOT NULL, 'gn:phenotypeDataset', 'gn:dataset'))"
rdfType))))
- (set gn:name (regexp-substitute/global
- #f "^[Nn]one$"
- (field InfoFiles InfoPageName)
- ""))
- (set gn:fullName
+ (set gn-term:name (regexp-substitute/global
+ #f "^[Nn]one$"
+ (field InfoFiles InfoPageName)
+ ""))
+ (set gn-term:fullName
(field ("IFNULL(GenoFreeze.FullName, IFNULL(PublishFreeze.FullName, ''))"
DatasetFullName)))
(set dct:created
(field ("IFNULL(GenoFreeze.CreateTime, IFNULL(PublishFreeze.CreateTime, IFNULL(ProbeSetFreeze.CreateTime, '')))"
createTimeGenoFreeze)))
- (set gn:datasetOfInvestigator
+ (set gn-term:datasetOfInvestigator
(investigator-attributes->id (field Investigators FirstName)
(field Investigators LastName)
(field Investigators Email)))
- (set gn:datasetOfOrganization
+ (set gn-term:datasetOfOrganization
(field ("CAST(CONVERT(BINARY CONVERT(Organizations.OrganizationName USING latin1) USING utf8) AS VARCHAR(1500))" Organizations)))
- (set gn:accessionId (format #f "GN~a" (field InfoFiles GN_AccesionId)))
- (set gn:datasetStatusName (string-downcase
- (field DatasetStatus DatasetStatusName)))
- (set gn:datasetOfInbredSet
+ (set gn-term:accessionId (format #f "GN~a" (field InfoFiles GN_AccesionId)))
+ (set gn-term:datasetStatusName (string-downcase
+ (field DatasetStatus DatasetStatusName)))
+ (set gn-term:datasetOfInbredSet
(string->identifier "inbredSet" (field InbredSet Name InbredSetName)))
- (set gn:datasetOfTissue (string->identifier "tissue"
- (field Tissue Short_Name)))
- (set gn:normalization
+ (set gn-term:datasetOfTissue (string->identifier "tissue"
+ (field Tissue Short_Name)))
+ (set gn-term:normalization
(string->identifier "avgmethod"
;; If AvgMethodName is NULL, assume N/A.
(if (string-blank? (field AvgMethod Name AvgMethodName))
"N/A" (field AvgMethod Name AvgMethodName))))
- (set gn:datasetOfPlatform
+ (set gn-term:datasetOfPlatform
(string->identifier "platform"
(field GeneChip Name GeneChip)))
- (set gn:summary
+ (set gn-term:summary
(sanitize-rdf-string (field Datasets Summary)))
- (set gn:aboutTissue
+ (set gn-term:aboutTissue
(sanitize-rdf-string (field Datasets AboutTissue)))
- (set gn:geoSeries
+ (set gn-term:geoSeries
(let ((s
(string-match "GSE[0-9]*"
(field ("IFNULL(Datasets.GeoSeries, '')" GeoSeries)))))
(if s (ontology
'geoSeries: (match:substring s))
"")))
- (set gn:title
+ (set gn-term:title
(regexp-substitute/global
#f "^[Nn]one$"
(field InfoFiles InfoFileTitle)
""))
- (set gn:publicationTitle
+ (set gn-term:publicationTitle
(regexp-substitute/global
#f "^[Nn]one$"
(field Datasets PublicationTitle)
""))
- (set gn:specifics (sanitize-rdf-string (field InfoFiles Specifics)))
- (set gn:datasetGroup (field Datasets DatasetName DatasetGroup))
- (set gn:aboutCases
+ (set gn-term:specifics (sanitize-rdf-string (field InfoFiles Specifics)))
+ (set gn-term:datasetGroup (field Datasets DatasetName DatasetGroup))
+ (set gn-term:aboutCases
(sanitize-rdf-string
(field ("CAST(CONVERT(BINARY CONVERT(Datasets.AboutCases USING latin1) USING utf8) AS VARCHAR(10000))" AboutCases))))
- (set gn:aboutPlatform
+ (set gn-term:aboutPlatform
(sanitize-rdf-string
(field ("CAST(CONVERT(BINARY CONVERT(Datasets.AboutPlatform USING latin1) USING utf8) AS VARCHAR(1500))"
AboutPlatform))))
- (set gn:aboutDataProcessing
+ (set gn-term:aboutDataProcessing
(sanitize-rdf-string
(field ("CAST(CONVERT(BINARY CONVERT(Datasets.AboutDataProcessing USING latin1) USING utf8) AS VARCHAR(1500))"
AboutDataProcessing))))
- (set gn:notes
+ (set gn-term:notes
(sanitize-rdf-string
(field ("CAST(CONVERT(BINARY CONVERT(Datasets.Notes USING latin1) USING utf8) AS VARCHAR(1500))"
GNNotes))))
- (set gn:experimentDesign
+ (set gn-term:experimentDesign
(sanitize-rdf-string
(field ("CAST(CONVERT(BINARY CONVERT(Datasets.ExperimentDesign USING latin1) USING utf8) AS VARCHAR(1500))"
ExperimentDesign))))
- (set gn:contributors
+ (set gn-term:contributors
(sanitize-rdf-string
(field ("CAST(CONVERT(BINARY CONVERT(Datasets.Contributors USING latin1) USING utf8) AS VARCHAR(1500))"
Contributors))))
- (set gn:citation
+ (set gn-term:citation
(sanitize-rdf-string
(regexp-substitute/global
#f "^[Nn]one$"
@@ -216,7 +218,7 @@
("CAST(CONVERT(BINARY CONVERT(Datasets.Citation USING latin1) USING utf8) AS VARCHAR(1500))"
Citation))
"")))
- (set gn:dataSourceAcknowledgment
+ (set gn-term:dataSourceAcknowledgment
(sanitize-rdf-string
(string-trim-both
(regexp-substitute/global
@@ -224,8 +226,8 @@
(field ("CAST(CONVERT(BINARY CONVERT(InfoFiles.Data_Source_Acknowledge USING latin1) USING utf8) AS VARCHAR(1500))"
Data_Source_Acknowledge))
""))))
- (set gn:acknowledgment (sanitize-rdf-string
- (field Datasets Acknowledgment)))))
+ (set gn-term:acknowledgment (sanitize-rdf-string
+ (field Datasets Acknowledgment)))))
@@ -235,18 +237,18 @@
(connection %connection-settings)
(table-metadata? #f)
(prefixes
- (("dct:" "<http://purl.org/dc/terms/>")
- ("geoSeries:" "<http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=>")
- ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>")
- ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>")
- ("gn:" "<http://genenetwork.org/terms/>")
- ("foaf:" "<http://xmlns.com/foaf/0.1/>")
- ("taxon:" "<http://purl.uniprot.org/taxonomy/>")
- ("dataset:" "<http://genenetwork.org/dataset/>")))
+ '(("foaf:" "<http://xmlns.com/foaf/0.1/>")
+ ("geoSeries:" "<http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=>")
+ ("gn-term:" "<http://genenetwork.org/term/>")
+ ("gn:" "<http://genenetwork.org/id/>")
+ ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>")
+ ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>")
+ ("taxon:" "<http://purl.uniprot.org/taxonomy/>")
+ ("dct:" "<http://purl.org/dc/terms/>")))
(inputs
- (dump-info-files
- dump-investigators))
+ (list dump-info-files
+ dump-investigators))
(outputs
- (#:documentation "./docs/dump-info-pages.md"
- #:rdf "./verified-data/dump-info-pages.ttl")))
+ '(#:documentation "./docs/dump-info-pages.md"
+ #:rdf "./verified-data/dump-info-pages.ttl")))