diff options
Diffstat (limited to 'examples')
-rwxr-xr-x | examples/dump-dataset-metadata.scm | 138 | ||||
-rwxr-xr-x | examples/dump-genotype.scm | 36 | ||||
-rwxr-xr-x | examples/dump-phenotype.scm | 76 | ||||
-rwxr-xr-x | examples/dump-probeset-metadata.scm | 20 | ||||
-rwxr-xr-x | examples/dump-probeset.scm | 22 | ||||
-rwxr-xr-x | examples/dump-probesetfreeze.scm | 25 | ||||
-rwxr-xr-x | examples/dump-publication.scm | 34 | ||||
-rwxr-xr-x | examples/dump-species-metadata.scm | 35 | ||||
-rwxr-xr-x | examples/dump-tissue.scm | 6 |
9 files changed, 197 insertions, 195 deletions
diff --git a/examples/dump-dataset-metadata.scm b/examples/dump-dataset-metadata.scm index c51364a..33e72fe 100755 --- a/examples/dump-dataset-metadata.scm +++ b/examples/dump-dataset-metadata.scm @@ -52,11 +52,11 @@ (foaf:givenName rdfs:range rdfs:Literal) (foaf:familyName rdfs:range rdfs:Literal) (foaf:homepage rdfs:range rdfs:Literal) - (gn-term:address rdfs:range rdfs:Literal) - (gn-term:city rdfs:range rdfs:Literal) - (gn-term:state rdfs:range rdfs:Literal) - (gn-term:zipCode rdfs:range rdfs:Literal) - (gn-term:country rdfs:range rdfs:Literal)) + (gnt:address rdfs:range rdfs:Literal) + (gnt:city rdfs:range rdfs:Literal) + (gnt:state rdfs:range rdfs:Literal) + (gnt:zipCode rdfs:range rdfs:Literal) + (gnt:country rdfs:range rdfs:Literal)) (triples (investigator-attributes->id (field Investigators FirstName) (field Investigators LastName) (field Investigators Email)) @@ -70,11 +70,11 @@ (set foaf:familyName (field ("CAST(CONVERT(BINARY CONVERT(LastName USING latin1) USING utf8) AS VARCHAR(100))" LastName))) (set foaf:homepage (field Investigators Url)) - (set gn-term:address (field Investigators Address)) - (set gn-term:city (field Investigators City)) - (set gn-term:state (field Investigators State)) - (set gn-term:zipCode (field Investigators ZipCode)) - (set gn-term:country (field Investigators Country)))) + (set gnt:address (field Investigators Address)) + (set gnt:city (field Investigators City)) + (set gnt:state (field Investigators State)) + (set gnt:zipCode (field Investigators ZipCode)) + (set gnt:country (field Investigators Country)))) (define-dump dump-info-files (tables (InfoFiles @@ -92,38 +92,38 @@ (left-join GeneChip "USING (GeneChipId)")) "WHERE GN_AccesionId IS NOT NULL") (schema-triples - (gn-term:dataset rdfs:range rdfs:Literal) - (gn-term:datasetOfInvestigator rdfs:domain gn:dataset) - (gn-term:datasetOfOrganization rdfs:domain gn:dataset) - (gn-term:datasetOfInvestigator rdfs:range foaf:Person) - (gn-term:datasetOfInbredSet rdfs:domain gn:dataset) - (gn-term:datasetOfInbredSet rdfs:range gn:inbredSet) - (gn-term:datasetOfSpecies rdfs:domain gn:dataset) - (gn-term:datasetOfSpecies rdfs:range gn:inbredSet) - (gn-term:datasetOfTissue rdfs:domain gn:dataset) - (gn-term:datasetOfTissue rdfs:range gn:tissue) - (gn-term:normalization rdfs:domain gn:dataset) - (gn-term:normalization rdfs:range gn:avgMethod) - (gn-term:datasetOfPlatform rdfs:domain gn:dataset) - (gn-term:datasetOfPlatform rdfs:range gn:geneChip) - (gn-term:accessionId rdfs:range rdfs:Literal) - (gn-term:datasetStatusName rdfs:range rdfs:Literal) - (gn-term:summary rdfs:range rdfs:Literal) - (gn-term:aboutTissue rdfs:range rdfs:Literal) - (gn-term:geoSeries rdfs:range rdfs:Literal) - (gn-term:name rdfs:range rdfs:Literal) - (gn-term:title rdfs:range rdfs:Literal) - (gn-term:publicationTitle rdfs:range rdfs:Literal) - (gn-term:specifics rdfs:range rdfs:Literal) - (gn-term:datasetGroup rdfs:range rdfs:Literal) - (gn-term:aboutCases rdfs:range rdfs:Literal) - (gn-term:aboutPlatform rdfs:range rdfs:Literal) - (gn-term:aboutDataProcessing rdfs:range rdfs:Literal) - (gn-term:notes rdfs:range rdfs:Literal) - (gn-term:experimentDesign rdfs:range rdfs:Literal) - (gn-term:contributors rdfs:range rdfs:Literal) - (gn-term:citation rdfs:range rdfs:Literal) - (gn-term:acknowledgment rdfs:range rdfs:Literal)) + (gnt:dataset rdfs:range rdfs:Literal) + (gnt:datasetOfInvestigator rdfs:domain gn:dataset) + (gnt:datasetOfOrganization rdfs:domain gn:dataset) + (gnt:datasetOfInvestigator rdfs:range foaf:Person) + (gnt:datasetOfInbredSet rdfs:domain gn:dataset) + (gnt:datasetOfInbredSet rdfs:range gn:inbredSet) + (gnt:datasetOfSpecies rdfs:domain gn:dataset) + (gnt:datasetOfSpecies rdfs:range gn:inbredSet) + (gnt:datasetOfTissue rdfs:domain gn:dataset) + (gnt:datasetOfTissue rdfs:range gn:tissue) + (gnt:normalization rdfs:domain gn:dataset) + (gnt:normalization rdfs:range gn:avgMethod) + (gnt:datasetOfPlatform rdfs:domain gn:dataset) + (gnt:datasetOfPlatform rdfs:range gn:geneChip) + (gnt:accessionId rdfs:range rdfs:Literal) + (gnt:datasetStatusName rdfs:range rdfs:Literal) + (gnt:summary rdfs:range rdfs:Literal) + (gnt:aboutTissue rdfs:range rdfs:Literal) + (gnt:geoSeries rdfs:range rdfs:Literal) + (gnt:name rdfs:range rdfs:Literal) + (gnt:title rdfs:range rdfs:Literal) + (gnt:publicationTitle rdfs:range rdfs:Literal) + (gnt:specifics rdfs:range rdfs:Literal) + (gnt:datasetGroup rdfs:range rdfs:Literal) + (gnt:aboutCases rdfs:range rdfs:Literal) + (gnt:aboutPlatform rdfs:range rdfs:Literal) + (gnt:aboutDataProcessing rdfs:range rdfs:Literal) + (gnt:notes rdfs:range rdfs:Literal) + (gnt:experimentDesign rdfs:range rdfs:Literal) + (gnt:contributors rdfs:range rdfs:Literal) + (gnt:citation rdfs:range rdfs:Literal) + (gnt:acknowledgment rdfs:range rdfs:Literal)) (triples (string->identifier "" (regexp-substitute/global #f "[^A-Za-z0-9:]" (field InfoFiles InfoPageName) @@ -133,84 +133,84 @@ (set rdf:type (string->symbol (field ("IF(GenoFreeze.Id IS NOT NULL, 'gn:genotypeDataset', IF(PublishFreeze.Id IS NOT NULL, 'gn:phenotypeDataset', 'gn:dataset'))" rdfType)))) - (set gn-term:name (regexp-substitute/global + (set gnt:name (regexp-substitute/global #f "^[Nn]one$" (field InfoFiles InfoPageName) "")) - (set gn-term:fullName + (set gnt:fullName (field ("IFNULL(GenoFreeze.FullName, IFNULL(PublishFreeze.FullName, ''))" DatasetFullName))) (set dct:created (field ("IFNULL(GenoFreeze.CreateTime, IFNULL(PublishFreeze.CreateTime, IFNULL(ProbeSetFreeze.CreateTime, '')))" createTimeGenoFreeze))) - (set gn-term:datasetOfInvestigator + (set gnt:datasetOfInvestigator (investigator-attributes->id (field Investigators FirstName) (field Investigators LastName) (field Investigators Email))) - (set gn-term:datasetOfOrganization + (set gnt:datasetOfOrganization (field ("CAST(CONVERT(BINARY CONVERT(Organizations.OrganizationName USING latin1) USING utf8) AS VARCHAR(1500))" Organizations))) - (set gn-term:accessionId (format #f "GN~a" (field InfoFiles GN_AccesionId))) - (set gn-term:datasetStatusName (string-downcase + (set gnt:accessionId (format #f "GN~a" (field InfoFiles GN_AccesionId))) + (set gnt:datasetStatusName (string-downcase (field DatasetStatus DatasetStatusName))) - (set gn-term:datasetOfInbredSet + (set gnt:datasetOfInbredSet (string->identifier "inbredSet" (field InbredSet Name InbredSetName))) - (set gn-term:datasetOfTissue (string->identifier "tissue" + (set gnt:datasetOfTissue (string->identifier "tissue" (field Tissue Short_Name))) - (set gn-term:normalization + (set gnt:normalization (string->identifier "avgmethod" ;; If AvgMethodName is NULL, assume N/A. (if (string-blank? (field AvgMethod Name AvgMethodName)) "N/A" (field AvgMethod Name AvgMethodName)))) - (set gn-term:datasetOfPlatform + (set gnt:datasetOfPlatform (string->identifier "platform" (field GeneChip Name GeneChip))) - (set gn-term:summary + (set gnt:summary (sanitize-rdf-string (field Datasets Summary))) - (set gn-term:aboutTissue + (set gnt:aboutTissue (sanitize-rdf-string (field Datasets AboutTissue))) - (set gn-term:geoSeries + (set gnt:geoSeries (let ((s (string-match "GSE[0-9]*" (field ("IFNULL(Datasets.GeoSeries, '')" GeoSeries))))) (if s (ontology 'geoSeries: (match:substring s)) ""))) - (set gn-term:title + (set gnt:title (regexp-substitute/global #f "^[Nn]one$" (field InfoFiles InfoFileTitle) "")) - (set gn-term:publicationTitle + (set gnt:publicationTitle (regexp-substitute/global #f "^[Nn]one$" (field Datasets PublicationTitle) "")) - (set gn-term:specifics (sanitize-rdf-string (field InfoFiles Specifics))) - (set gn-term:datasetGroup (field Datasets DatasetName DatasetGroup)) - (set gn-term:aboutCases + (set gnt:specifics (sanitize-rdf-string (field InfoFiles Specifics))) + (set gnt:datasetGroup (field Datasets DatasetName DatasetGroup)) + (set gnt:aboutCases (sanitize-rdf-string (field ("CAST(CONVERT(BINARY CONVERT(Datasets.AboutCases USING latin1) USING utf8) AS VARCHAR(10000))" AboutCases)))) - (set gn-term:aboutPlatform + (set gnt:aboutPlatform (sanitize-rdf-string (field ("CAST(CONVERT(BINARY CONVERT(Datasets.AboutPlatform USING latin1) USING utf8) AS VARCHAR(1500))" AboutPlatform)))) - (set gn-term:aboutDataProcessing + (set gnt:aboutDataProcessing (sanitize-rdf-string (field ("CAST(CONVERT(BINARY CONVERT(Datasets.AboutDataProcessing USING latin1) USING utf8) AS VARCHAR(1500))" AboutDataProcessing)))) - (set gn-term:notes + (set gnt:notes (sanitize-rdf-string (field ("CAST(CONVERT(BINARY CONVERT(Datasets.Notes USING latin1) USING utf8) AS VARCHAR(1500))" GNNotes)))) - (set gn-term:experimentDesign + (set gnt:experimentDesign (sanitize-rdf-string (field ("CAST(CONVERT(BINARY CONVERT(Datasets.ExperimentDesign USING latin1) USING utf8) AS VARCHAR(1500))" ExperimentDesign)))) - (set gn-term:contributors + (set gnt:contributors (sanitize-rdf-string (field ("CAST(CONVERT(BINARY CONVERT(Datasets.Contributors USING latin1) USING utf8) AS VARCHAR(1500))" Contributors)))) - (set gn-term:citation + (set gnt:citation (sanitize-rdf-string (regexp-substitute/global #f "^[Nn]one$" @@ -218,7 +218,7 @@ ("CAST(CONVERT(BINARY CONVERT(Datasets.Citation USING latin1) USING utf8) AS VARCHAR(1500))" Citation)) ""))) - (set gn-term:dataSourceAcknowledgment + (set gnt:dataSourceAcknowledgment (sanitize-rdf-string (string-trim-both (regexp-substitute/global @@ -226,7 +226,7 @@ (field ("CAST(CONVERT(BINARY CONVERT(InfoFiles.Data_Source_Acknowledge USING latin1) USING utf8) AS VARCHAR(1500))" Data_Source_Acknowledge)) "")))) - (set gn-term:acknowledgment (sanitize-rdf-string + (set gnt:acknowledgment (sanitize-rdf-string (field Datasets Acknowledgment))))) @@ -239,7 +239,7 @@ (prefixes '(("foaf:" "<http://xmlns.com/foaf/0.1/>") ("geoSeries:" "<http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=>") - ("gn-term:" "<http://genenetwork.org/term/>") + ("gnt:" "<http://genenetwork.org/term/>") ("gn:" "<http://genenetwork.org/id/>") ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>") ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>") diff --git a/examples/dump-genotype.scm b/examples/dump-genotype.scm index d97b7e5..88125fa 100755 --- a/examples/dump-genotype.scm +++ b/examples/dump-genotype.scm @@ -26,9 +26,9 @@ (left-join InbredSet "ON GenoFreeze.InbredSetId = InbredSet.InbredSetId")) "WHERE GenoFreeze.public > 0 AND GenoFreeze.confidentiality < 1 AND InfoFiles.InfoPageName IS NULL") (schema-triples - (gn-term:datasetOfInbredSet rdfs:range gn:inbredSet) + (gnt:datasetOfInbredSet rdfs:range gn:inbredSet) (gn:genotypeDataset rdfs:subPropertyOf gn:dataset) - (gn-term:shortName rdfs:range rdfs:Literal)) + (gnt:shortName rdfs:range rdfs:Literal)) (triples (string->identifier "" @@ -42,13 +42,13 @@ #:separator "" #:proc string-capitalize-first) (set rdf:type 'gn:genotypeDataset) - (set gn-term:name (field GenoFreeze Name)) - (set gn-term:fullName (field GenoFreeze FullName)) - (set gn-term:shortName (field GenoFreeze ShortName)) + (set gnt:name (field GenoFreeze Name)) + (set gnt:fullName (field GenoFreeze FullName)) + (set gnt:shortName (field GenoFreeze ShortName)) (set dct:created (annotate-field (field GenoFreeze CreateTime) '^^xsd:date)) - (set gn-term:datasetOfInbredSet + (set gnt:datasetOfInbredSet (string->identifier "" (field InbredSet Name InbredSetName))))) (define-dump dump-genotypes @@ -58,7 +58,7 @@ (left-join InfoFiles "ON InfoFiles.InfoPageName = GenoFreeze.Name"))) (schema-triples (gn:genotype rdfs:range rdfs:Literal) - (gn-term:genotypeDataset rdfs:subPropertyOf gn:dataset)) + (gnt:genotypeDataset rdfs:subPropertyOf gn:dataset)) (triples (string->identifier "" @@ -69,14 +69,14 @@ #:separator "" #:proc string-capitalize-first) (set rdf:type 'gn:genotype) - (set gn-term:name (sanitize-rdf-string (field Geno Name))) - (set gn-term:markerName (sanitize-rdf-string (field Geno Marker_Name))) - (set gn-term:chr (field Geno Chr)) - (set gn-term:mb (annotate-field (field ("IFNULL(Geno.Mb, '')" Mb)) '^^xsd:double)) - (set gn-term:sequence (field Geno Sequence)) - (set gn-term:source (field Geno Source)) - (set gn-term:source2 (field Geno Source2)) - (set gn-term:genotypeOfDataset + (set gnt:name (sanitize-rdf-string (field Geno Name))) + (set gnt:markerName (sanitize-rdf-string (field Geno Marker_Name))) + (set gnt:chr (field Geno Chr)) + (set gnt:mb (annotate-field (field ("IFNULL(Geno.Mb, '')" Mb)) '^^xsd:double)) + (set gnt:sequence (field Geno Sequence)) + (set gnt:source (field Geno Source)) + (set gnt:source2 (field Geno Source2)) + (set gnt:genotypeOfDataset (string->identifier "" (regexp-substitute/global @@ -86,12 +86,12 @@ #:separator "" #:proc string-capitalize-first) ) - (set gn-term:chrNum + (set gnt:chrNum (annotate-field (field ("IFNULL(Geno.chr_num, '')" chr_num)) '^^xsd:int)) (set gn:comments (field ("CAST(CONVERT(BINARY CONVERT(Geno.Comments USING latin1) USING utf8) AS VARCHAR(255))" Comments))) - (set gn-term:cM + (set gnt:cM (annotate-field (field ("IFNULL(GenoXRef.cM, '')" Chr_mm8)) '^^xsd:int)))) @@ -105,7 +105,7 @@ (prefixes '(("dct:" "<http://purl.org/dc/terms/>") ("gn:" "<http://genenetwork.org/id/>") - ("gn-term:" "<http://genenetwork.org/term/>") + ("gnt:" "<http://genenetwork.org/term/>") ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>") ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>") ("xsd:" "<http://www.w3.org/2001/XMLSchema#>"))) diff --git a/examples/dump-phenotype.scm b/examples/dump-phenotype.scm index 1ef498d..00f99d2 100755 --- a/examples/dump-phenotype.scm +++ b/examples/dump-phenotype.scm @@ -27,10 +27,10 @@ (left-join InbredSet "ON PublishFreeze.InbredSetId = InbredSet.InbredSetId")) "WHERE PublishFreeze.public > 0 AND PublishFreeze.confidentiality < 1 AND InfoFiles.InfoPageName IS NULL") (schema-triples - (gn-term:datasetOfInbredSet rdfs:range gn:inbredSet) - (gn-term:name rdfs:range rdfs:Literal) - (gn-term:fullName rdfs:range rdfs:Literal) - (gn-term:shortName rdfs:range rdfs:Literal) + (gnt:datasetOfInbredSet rdfs:range gn:inbredSet) + (gnt:name rdfs:range rdfs:Literal) + (gnt:fullName rdfs:range rdfs:Literal) + (gnt:shortName rdfs:range rdfs:Literal) (gn:phenotypeDataset rdf:subClassOf gn:dataset)) (triples (string->identifier @@ -41,13 +41,13 @@ #:separator "" #:proc string-capitalize-first) (set rdf:type 'gn:phenotypeDataset) - (set gn-term:name (field PublishFreeze Name)) - (set gn-term:fullName (field PublishFreeze FullName)) - (set gn-term:shortName (field PublishFreeze ShortName)) + (set gnt:name (field PublishFreeze Name)) + (set gnt:fullName (field PublishFreeze FullName)) + (set gnt:shortName (field PublishFreeze ShortName)) (set dct:created (annotate-field (field PublishFreeze CreateTime) '^^xsd:date)) - (set gn-term:datasetOfInbredSet + (set gnt:datasetOfInbredSet (string->identifier "inbredSet" (field InbredSet Name InbredSetName))))) (define-dump dump-phenotypes @@ -58,19 +58,19 @@ (left-join InfoFiles "ON InfoFiles.InfoPageName = PublishFreeze.Name"))) (schema-triples (gn:phenotypeDataset rdfs:subPropertyOf gn:dataset) - (gn-term:publicationDescription rdfs:range rdfs:Literal) - (gn-term:originalDescription rdfs:range rdfs:Literal) - (gn-term:prePublicationDescription rdfs:range rdfs:Literal) - (gn-term:postPublicationAbbreviation rdfs:range rdfs:Literal) - (gn-term:labCode rdfs:range rdfs:Literal) - (gn-term:submitter rdfs:range rdfs:Literal) - (gn-term:owner rdfs:range rdfs:Literal) - (gn-term:mean rdfs:range xsd:double) - (gn-term:LRS rdfs:range xsd:float) - (gn-term:locus rdfs:range rdfs:Literal) - (gn-term:additive rdfs:range xsd:decimal) - (gn-term:sequence rdfs:range rdfs:Literal) - (gn-term:phenotypeOfPublication rdfs:range gn-term:pubMedId)) + (gnt:publicationDescription rdfs:range rdfs:Literal) + (gnt:originalDescription rdfs:range rdfs:Literal) + (gnt:prePublicationDescription rdfs:range rdfs:Literal) + (gnt:postPublicationAbbreviation rdfs:range rdfs:Literal) + (gnt:labCode rdfs:range rdfs:Literal) + (gnt:submitter rdfs:range rdfs:Literal) + (gnt:owner rdfs:range rdfs:Literal) + (gnt:mean rdfs:range xsd:double) + (gnt:LRS rdfs:range xsd:float) + (gnt:locus rdfs:range rdfs:Literal) + (gnt:additive rdfs:range xsd:decimal) + (gnt:sequence rdfs:range rdfs:Literal) + (gnt:phenotypeOfPublication rdfs:range gn-term:pubMedId)) (triples (string->identifier "" (regexp-substitute/global #f "[^A-Za-z0-9:]" @@ -79,44 +79,44 @@ #:separator "" #:proc string-capitalize-first) (set rdf:type 'gn:phenotype) - (set gn-term:name (sanitize-rdf-string + (set gnt:name (sanitize-rdf-string (field ("CAST(CONVERT(BINARY CONVERT(IF(Phenotype.Post_publication_abbreviation IS NULL, IF(Phenotype.Pre_publication_abbreviation IS NULL, Phenotype.Id, Phenotype.Pre_publication_abbreviation), Phenotype.Post_publication_abbreviation) USING latin1) USING utf8) AS VARCHAR(100))" PhenotypeName)))) ;; There is no row with an empty post-publication description so ;; use this field as the main publication description - (set gn-term:publicationDescription + (set gnt:publicationDescription (sanitize-rdf-string (field ("CAST(CONVERT(BINARY CONVERT(Phenotype.Post_publication_description USING latin1) USING utf8) AS CHAR(10000))" postPubDescr)))) - (set gn-term:originalDescription (sanitize-rdf-string + (set gnt:originalDescription (sanitize-rdf-string (delete-substrings (field Phenotype Original_description) "Original post publication description: "))) - (set gn-term:prePublicationDescription + (set gnt:prePublicationDescription (sanitize-rdf-string (field ("CAST(CONVERT(BINARY CONVERT(Phenotype.Pre_publication_description USING latin1) USING utf8) AS VARCHAR(15000))" prePubDesc)))) - (set gn-term:prePublicationAbbreviation (sanitize-rdf-string (field Phenotype Pre_publication_abbreviation))) - (set gn-term:postPublicationAbbreviation (sanitize-rdf-string (field Phenotype Post_publication_abbreviation))) - (set gn-term:labCode (field Phenotype Lab_code)) - (set gn-term:submitter (sanitize-rdf-string (field Phenotype Submitter))) - (set gn-term:owner (sanitize-rdf-string (field Phenotype Owner))) - (set gn-term:mean (annotate-field (field ("IFNULL(PublishXRef.mean, '')" mean)) + (set gnt:prePublicationAbbreviation (sanitize-rdf-string (field Phenotype Pre_publication_abbreviation))) + (set gnt:postPublicationAbbreviation (sanitize-rdf-string (field Phenotype Post_publication_abbreviation))) + (set gnt:labCode (field Phenotype Lab_code)) + (set gnt:submitter (sanitize-rdf-string (field Phenotype Submitter))) + (set gnt:owner (sanitize-rdf-string (field Phenotype Owner))) + (set gnt:mean (annotate-field (field ("IFNULL(PublishXRef.mean, '')" mean)) '^^xsd:double)) - (set gn-term:locus (field PublishXRef Locus)) - (set gn-term:LRS (annotate-field (field ("IFNULL(PublishXRef.LRS, '')" lrs)) '^^xsd:float)) - (set gn-term:additive (annotate-field (field ("IFNULL(PublishXRef.additive, '')" additive)) '^^xsd:decimal)) - (set gn-term:sequence (annotate-field (field PublishXRef Sequence) '^^xsd:int)) - (set gn-term:phenotypeOfDataset + (set gnt:locus (field PublishXRef Locus)) + (set gnt:LRS (annotate-field (field ("IFNULL(PublishXRef.LRS, '')" lrs)) '^^xsd:float)) + (set gnt:additive (annotate-field (field ("IFNULL(PublishXRef.additive, '')" additive)) '^^xsd:decimal)) + (set gnt:sequence (annotate-field (field PublishXRef Sequence) '^^xsd:int)) + (set gnt:phenotypeOfDataset (string->identifier "" (field ("IFNULL(InfoFiles.InfoPageName, IFNULL(PublishFreeze.Name, ''))" DatasetName)) #:separator "" #:proc string-capitalize-first)) - (set gn-term:phenotypeOfPublication + (set gnt:phenotypeOfPublication (let ((pmid (field ("IF(Publication.PubMed_ID IS NULL, '', CONVERT(Publication.PubMed_Id, INT))" pmid))) @@ -134,7 +134,7 @@ (prefixes '(("dct:" "<http://purl.org/dc/terms/>") ("gn:" "<http://genenetwork.org/id/>") - ("gn-term:" "<http://genenetwork.org/terms/>") + ("gnt:" "<http://genenetwork.org/terms/>") ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>") ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>") ("xsd:" "<http://www.w3.org/2001/XMLSchema#>") diff --git a/examples/dump-probeset-metadata.scm b/examples/dump-probeset-metadata.scm index 6da1eb0..ddbea5e 100755 --- a/examples/dump-probeset-metadata.scm +++ b/examples/dump-probeset-metadata.scm @@ -24,14 +24,14 @@ "WHERE ProbeSetFreeze.public > 0 AND ProbeSetFreeze.confidentiality < 1") (schema-triples (gn:probesetData rdfs:range gn:probeset) - (gn-term:hasProbeset rdfs:range rdfs:Literal)) + (gnt:hasProbeset rdfs:range rdfs:Literal)) (triples (string->identifier "probesetData" (field ("CONCAT(ProbeSetFreeze.Name,':',IFNULL(ProbeSet.Name, ProbeSet.Id))" ProbeSetName))) (set rdf:type 'gn:probesetData) - (set gn-term:hasProbeset + (set gnt:hasProbeset (ontology 'probeset: (regexp-substitute/global @@ -39,34 +39,34 @@ (field ("IFNULL(ProbeSet.Name, ProbeSet.Id)" name)) 'pre "_" 'post))) - (set gn-term:probesetOfDataset + (set gnt:probesetOfDataset (ontology 'probeset: (regexp-substitute/global #f "[^A-Za-z0-9:]" (field ProbeSetFreeze Name) 'pre "_" 'post))) - (set gn-term:mean + (set gnt:mean (annotate-field (field ("IFNULL(ProbeSetXRef.mean, '')" mean)) '^^xsd:double)) - (set gn-term:se + (set gnt:se (annotate-field (field ("IFNULL(ProbeSetXRef.se, '')" se)) '^^xsd:double)) - (set gn-term:locus (field ProbeSetXRef Locus)) + (set gnt:locus (field ProbeSetXRef Locus)) (set gn:LRS (annotate-field (field ("IFNULL(ProbeSetXRef.LRS, '')" LRS)) '^^xsd:double)) - (set gn-term:pValue + (set gnt:pValue (annotate-field (field ("IFNULL(ProbeSetXRef.pValue, '')" pValue)) '^^xsd:double)) - (set gn-term:additive + (set gnt:additive (annotate-field (field ("IFNULL(ProbeSetXRef.additive, '')" additive)) '^^xsd:double)) - (set gn-term:h2 + (set gnt:h2 (annotate-field (field ("IFNULL(ProbeSetXRef.h2, '')" h2)) '^^xsd:float)))) @@ -79,7 +79,7 @@ (table-metadata? #f) (prefixes '(("gn:" "<http://genenetwork.org/id/>") - ("gn-term:" "<http://genenetwork.org/id/>") + ("gnt:" "<http://genenetwork.org/id/>") ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>") ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>") ("xsd:" "<http://www.w3.org/2001/XMLSchema#>"))) diff --git a/examples/dump-probeset.scm b/examples/dump-probeset.scm index be09b48..4d5f9a5 100755 --- a/examples/dump-probeset.scm +++ b/examples/dump-probeset.scm @@ -21,8 +21,8 @@ (tables (ProbeSet (left-join GeneChip "ON GeneChip.Id = ProbeSet.ChipId"))) (schema-triples - (gn-term:name rdfs:range rdfs:Literal) - (gn-term:probeset rdfs:range rdfs:Literal)) + (gnt:name rdfs:range rdfs:Literal) + (gnt:probeset rdfs:range rdfs:Literal)) (triples (ontology 'probeset: (string-trim-both @@ -32,17 +32,17 @@ name)) 'pre "_" 'post))) (set rdf:type 'gn-id:probeset) - (set gn-term:chipOf (string->identifier "platform" (field GeneChip Name))) - (set gn-term:name (field ProbeSet Name)) - (set gn-term:symbol (delete-substrings (field ProbeSet Symbol) "\"")) - (set gn-term:description (sanitize-rdf-string + (set gnt:chipOf (string->identifier "platform" (field GeneChip Name))) + (set gnt:name (field ProbeSet Name)) + (set gnt:symbol (delete-substrings (field ProbeSet Symbol) "\"")) + (set gnt:description (sanitize-rdf-string (field ProbeSet description))) - (set gn-term:chr (field ProbeSet Chr)) - (set gn-term:mb (annotate-field (field ("IFNULL(ProbeSet.Mb, '')" Mb)) '^^xsd:double)) - (set gn-term:blatSeq (sanitize-rdf-string + (set gnt:chr (field ProbeSet Chr)) + (set gnt:mb (annotate-field (field ("IFNULL(ProbeSet.Mb, '')" Mb)) '^^xsd:double)) + (set gnt:blatSeq (sanitize-rdf-string (string-trim-both (field ProbeSet BlatSeq)))) - (set gn-term:targetSeq (sanitize-rdf-string (field ProbeSet TargetSeq))) - (set gn-term:uniProtReference (ontology 'uniprot: + (set gnt:targetSeq (sanitize-rdf-string (field ProbeSet TargetSeq))) + (set gnt:uniProtReference (ontology 'uniprot: (field ProbeSet UniProtID))))) diff --git a/examples/dump-probesetfreeze.scm b/examples/dump-probesetfreeze.scm index a45fd0a..828ab00 100755 --- a/examples/dump-probesetfreeze.scm +++ b/examples/dump-probesetfreeze.scm @@ -20,11 +20,11 @@ (define-dump dump-gene-chip (tables (GeneChip)) (schema-triples - (gn-term:name rdfs:range rdfs:Literal)) + (gnt:name rdfs:range rdfs:Literal)) (triples (string->identifier "platform" (field GeneChip Name)) (set rdf:type 'gn:platform) - (set gn-term:name (field GeneChip GeneChipName)) - (set gn-term:geoPlatform + (set gnt:name (field GeneChip GeneChipName)) + (set gnt:geoPlatform (ontology 'geoSeries: (string-trim-both (field GeneChip GeoPlatform)))))) @@ -38,8 +38,8 @@ (left-join Tissue "ON ProbeFreeze.TissueId = Tissue.TissueId")) "WHERE ProbeSetFreeze.public > 0 AND InfoFiles.InfoPageName IS NULL GROUP BY ProbeFreeze.Id") (schema-triples - (gn-term:avgMethod rdfs:range rdfs:Literal) - (gn-term:dataScale rdfs:range rdfs:Literal) + (gnt:avgMethod rdfs:range rdfs:Literal) + (gnt:dataScale rdfs:range rdfs:Literal) (gn:probesetDataset rdf:subClassOf gn:dataset)) (triples (string->identifier @@ -51,15 +51,15 @@ #:separator "" #:proc string-capitalize-first) (set rdf:type 'gn:probesetDataset) - (set gn-term:avgMethod (string->identifier "avgmethod" (field AvgMethod Name))) - (set gn-term:fullName (field ProbeSetFreeze FullName)) - (set gn-term:shortName (field ProbeSetFreeze ShortName)) + (set gnt:avgMethod (string->identifier "avgmethod" (field AvgMethod Name))) + (set gnt:fullName (field ProbeSetFreeze FullName)) + (set gnt:shortName (field ProbeSetFreeze ShortName)) (set dct:created (annotate-field (field ProbeSetFreeze CreateTime) '^^xsd:datetime)) - (set gn-term:dataScale (field ProbeSetFreeze DataScale)) - (set gn-term:tissueName (string->identifier "tissue" (field Tissue Short_Name))) - (set gn-term:datasetOfInbredSet + (set gnt:dataScale (field ProbeSetFreeze DataScale)) + (set gnt:tissueName (string->identifier "tissue" (field Tissue Short_Name))) + (set gnt:datasetOfInbredSet (string->identifier "inbredSet" (field InbredSet Name InbredSetName))))) @@ -71,7 +71,8 @@ (prefixes '(("geoSeries:" "<http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=>") ("gn:" "<http://genenetwork.org/id/>") - ("gn-term:" "<http://genenetwork.org/term/>") + ("dct:" "<>") + ("gnt:" "<http://genenetwork.org/term/>") ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>") ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>") ("xsd:" "<http://www.w3.org/2001/XMLSchema#>"))) diff --git a/examples/dump-publication.scm b/examples/dump-publication.scm index f79696e..1384261 100755 --- a/examples/dump-publication.scm +++ b/examples/dump-publication.scm @@ -21,15 +21,15 @@ (define-dump dump-publication (tables (Publication)) (schema-triples - (gn-term:pubMedId rdfs:range rdfs:Literal) - (gn-term:title rdfs:range rdfs:Literal) - (gn-term:journal rdfs:range rdfs:Literal) - (gn-term:volume rdfs:range rdfs:Literal) - (gn-term:pages rdfs:range rdfs:Literal) - (gn-term:month rdfs:range rdfs:Literal) - (gn-term:year rdfs:range rdfs:Literal) - (gn-term:author rdfs:range rdfs:Literal) - (gn-term:abstract rdfs:range rdfs:Literal)) + (gnt:pubMedId rdfs:range rdfs:Literal) + (gnt:title rdfs:range rdfs:Literal) + (gnt:journal rdfs:range rdfs:Literal) + (gnt:volume rdfs:range rdfs:Literal) + (gnt:pages rdfs:range rdfs:Literal) + (gnt:month rdfs:range rdfs:Literal) + (gnt:year rdfs:range rdfs:Literal) + (gnt:author rdfs:range rdfs:Literal) + (gnt:abstract rdfs:range rdfs:Literal)) (triples (let ((pmid (field ("IF(Publication.PubMed_ID IS NULL, '', CONVERT(Publication.PubMed_Id, INT))" @@ -40,19 +40,19 @@ (number->string publication-id)) (ontology 'pubmed: pmid))) (set rdf:type 'gn:publication) - (set gn-term:pubMedId + (set gnt:pubMedId (ontology 'pubmed: (field ("IFNULL(PubMed_ID, '')" pubmedId)))) - (set gn-term:title (delete-substrings (field Publication Title) + (set gnt:title (delete-substrings (field Publication Title) "Unknown")) - (set gn-term:journal (delete-substrings (field Publication Journal) + (set gnt:journal (delete-substrings (field Publication Journal) "Unknown")) - (set gn-term:volume (delete-substrings (field Publication Volume) + (set gnt:volume (delete-substrings (field Publication Volume) "Unknown")) - (set gn-term:pages (delete-substrings (field Publication Pages) + (set gnt:pages (delete-substrings (field Publication Pages) "Unknown")) - (set gn-term:month (delete-substrings (field Publication Month) + (set gnt:month (delete-substrings (field Publication Month) "Unknown")) - (set gn-term:year (field Publication Year)) + (set gnt:year (field Publication Year)) (multiset gn:author ;; The authors field is a comma ;; separated list. Split it. @@ -68,7 +68,7 @@ (connection %connection-settings) (table-metadata? #f) (prefixes - '(("gn-term:" "<http://genenetwork.org/terms/>") + '(("gnt:" "<http://genenetwork.org/terms/>") ("gn:" "<http://genenetwork.org/id/>") ("pubmed:" "<http://rdf.ncbi.nlm.nih.gov/pubmed/>") ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>") diff --git a/examples/dump-species-metadata.scm b/examples/dump-species-metadata.scm index 77db764..39f7147 100755 --- a/examples/dump-species-metadata.scm +++ b/examples/dump-species-metadata.scm @@ -21,25 +21,25 @@ (define-dump dump-species (tables (Species)) (schema-triples - (gn-term:name rdfs:range rdfs:Literal) - (gn-term:displayName rdfs:range rdfs:Literal) - (gn-term:binomialName rdfs:range rdfs:Literal) - (gn-term:family rdfs:range rdfs:Literal)) + (gnt:name rdfs:range rdfs:Literal) + (gnt:displayName rdfs:range rdfs:Literal) + (gnt:binomialName rdfs:range rdfs:Literal) + (gnt:family rdfs:range rdfs:Literal)) (triples (string->binomial-name (field Species FullName)) - (set rdf:type 'gn:species) - (set gn-term:name (field Species SpeciesName)) - (set gn-term:displayName (field Species MenuName)) - (set gn-term:binomialName (field Species FullName)) - (set gn-term:family (field Species Family)) - (set gn-term:organism (ontology 'taxon: (field Species TaxonomyId))))) + (set rdf:type 'gnc:species) + (set gnt:name (field Species SpeciesName)) + (set gnt:displayName (field Species MenuName)) + (set gnt:binomialName (field Species FullName)) + (set gnt:family (field Species Family)) + (set gnt:organism (ontology 'taxon: (field Species TaxonomyId))))) (define-dump dump-strain (tables (Strain (left-join Species "ON Strain.SpeciesId = Species.SpeciesId"))) (schema-triples - (gn-term:strainOfSpecies rdfs:domain gn-term:strain) - (gn-term:strainOfSpecies rdfs:range gn-term:species) + (gnt:strainOfSpecies rdfs:domain gnt:strain) + (gnt:strainOfSpecies rdfs:range gn-term:species) (gn-term:name rdfs:range rdfs:Literal) (gn-term:alias rdfs:range rdfs:Literal) (gn-term:symbol rdfs:range rdfs:Literal)) @@ -51,7 +51,7 @@ 'pre "_" 'post) #:separator "" #:proc string-capitalize-first) - (set rdf:type 'gn:strain) + (set rdf:type 'gnc:strain) (set gn-term:strainOfSpecies (string->binomial-name (field Species FullName))) ;; Name, and maybe a second name @@ -64,7 +64,7 @@ (tables (MappingMethod)) (triples (string->identifier "mappingMethod" (field MappingMethod Name)) - (set rdf:type 'gn:mappingMethod))) + (set rdf:type 'gnc:mappingMethod))) (define-dump dump-inbred-set (tables (InbredSet @@ -85,7 +85,7 @@ "" (field InbredSet Name) #:separator "" #:proc string-capitalize-first) - (set rdf:type 'gn:inbredSet) + (set rdf:type 'gnc:inbredSet) (set gn-term:binomialName (field InbredSet FullName)) (set gn-term:geneticType (field InbredSet GeneticType)) (set gn-term:inbredFamily (field InbredSet Family)) @@ -106,7 +106,7 @@ (schema-triples (gn-term:normalization rdfs:range rdfs:Literal)) (triples (string->identifier "avgmethod" (field AvgMethod Name)) - (set rdf:type 'gn:avgMethod) + (set rdf:type 'gnc:avgMethod) (set gn-term:normalization (field AvgMethod Normalization)))) @@ -117,7 +117,8 @@ (table-metadata? #f) (prefixes '(("gn:" "<http://genenetwork.org/id/>") - ("gn-term:" "<http://genenetwork.org/term/>") + ("gnc:" "<http://genenetwork.org/category/>") + ("gnt:" "<http://genenetwork.org/term/>") ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>") ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>") ("taxon:" "<http://purl.uniprot.org/taxonomy/>"))) diff --git a/examples/dump-tissue.scm b/examples/dump-tissue.scm index ff6792e..a9a50f3 100755 --- a/examples/dump-tissue.scm +++ b/examples/dump-tissue.scm @@ -23,12 +23,12 @@ ;; and BIRN_lex_Name are mostly NULL. (tables (Tissue)) (schema-triples - (gn-term:name rdfs:range rdfs:Literal)) + (gnt:name rdfs:range rdfs:Literal)) ;; Hopefully the Short_Name field is distinct and can be used as an ;; identifier. (triples (string->identifier "tissue" (field Tissue Short_Name)) (set rdf:type 'gn:tissue) - (set gn-term:name (field Tissue Name)))) + (set gnt:name (field Tissue Name)))) @@ -38,7 +38,7 @@ (table-metadata? #f) (prefixes '(("gn:" "<http://genenetwork.org/id/>") - ("gn-term:" "<http://genenetwork.org/terms/>") + ("gnt:" "<http://genenetwork.org/terms/>") ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>") ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>"))) (inputs |