diff options
author | Munyoki Kilyungi | 2023-07-30 12:27:50 +0300 |
---|---|---|
committer | BonfaceKilz | 2023-07-30 12:29:56 +0300 |
commit | 6f5cca6b5511ce5c80639e477ae10b4e70e2b178 (patch) | |
tree | 4fe411bad317a78ba41f863e07a4a10426210945 /examples | |
parent | 1d81a238403c29bb46fb2352505b05cf3c150787 (diff) | |
download | gn-transform-databases-6f5cca6b5511ce5c80639e477ae10b4e70e2b178.tar.gz |
Add gnc: prefix
Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
Diffstat (limited to 'examples')
-rwxr-xr-x | examples/dump-generif.scm | 102 | ||||
-rwxr-xr-x | examples/dump-genotype.scm | 13 | ||||
-rwxr-xr-x | examples/dump-phenotype.scm | 5 | ||||
-rwxr-xr-x | examples/dump-probeset-metadata.scm | 3 | ||||
-rwxr-xr-x | examples/dump-probesetfreeze.scm | 5 | ||||
-rwxr-xr-x | examples/dump-publication.scm | 3 | ||||
-rwxr-xr-x | examples/dump-tissue.scm | 3 |
7 files changed, 65 insertions, 69 deletions
diff --git a/examples/dump-generif.scm b/examples/dump-generif.scm index b546f42..0689f57 100755 --- a/examples/dump-generif.scm +++ b/examples/dump-generif.scm @@ -16,9 +16,6 @@ (call-with-input-file (list-ref (command-line) 1) read)) -(define %dump-directory - (list-ref (command-line) 2)) - (define-dump dump-genewiki-symbols @@ -26,17 +23,17 @@ (left-join Species "USING (SpeciesId)")) "GROUP BY GeneId ORDER BY BINARY symbol") (schema-triples - (gn:symbol rdfs:domain gn:geneWikiEntry) - (gn:wikiEntryOfSpecies rdfs:range gn:species) - (gn:taxid rdfs:domain gn:geneWikiEntry)) + (gnt:symbol rdfs:domain gn-term:geneWikiEntry) + (gnt:wikiEntryOfSpecies rdfs:range gn:species) + (gnt:taxid rdfs:domain gn-term:geneWikiEntry)) (triples (ontology 'generif: (field GeneRIF_BASIC GeneId)) - (multiset gn:symbol (string-split (field ("GROUP_CONCAT(DISTINCT symbol)" symbol)) + (multiset gnt:symbol (string-split (field ("GROUP_CONCAT(DISTINCT symbol)" symbol)) #\,)) - (multiset gn:wikiEntryOfSpecies + (multiset gnt:wikiEntryOfSpecies (string-split (field ("GROUP_CONCAT(DISTINCT Species.SpeciesName)" species)) #\,)) - (multiset gn:taxId (map (cut ontology 'ncbiTaxon: <>) + (multiset gnt:taxId (map (cut ontology 'ncbiTaxon: <>) (string-split (field ("GROUP_CONCAT(DISTINCT TaxID)" taxId)) #\,))))) @@ -48,16 +45,16 @@ (left-join GeneCategory "ON GeneRIFXRef.GeneCategoryId = GeneCategory.Id")) "WHERE GeneRIF.display > 0 AND GeneRIF.VersionId = 0 GROUP BY GeneRIF.symbol") (schema-triples - (gn:geneWikiEntry a rdfs:Class) - (gn:geneWikiEntry a owl:Class) - (gn:geneWikiEntry rdfs:comment "Represents GeneRIF Entries") - (gn:geneCategory rdfs:domain gn:geneWikiEntry) - (gn:geneWikiEntryOfGn rdfs:domain gn:geneWikiEntry) - (gn:geneWikiEntry rdfs:domain gn:geneWikiEntry)) + (gnt:geneWikiEntry a rdfs:Class) + (gnt:geneWikiEntry a owl:Class) + (gnt:geneWikiEntry rdfs:comment "Represents GeneRIF Entries") + (gnt:geneCategory rdfs:domain gn:geneWikiEntry) + (gnt:geneWikiEntryOfGn rdfs:domain gn:geneWikiEntry) + (gnt:geneWikiEntry rdfs:domain gn:geneWikiEntry)) (triples (let ([geneid (field GeneRIF_BASIC GeneId)]) (if (eq? geneid 0) - (ontology 'gn:anonSymbol_ + (ontology 'gnt:anonSymbol_ (field GeneRIF symbol)) (ontology 'generif: geneid))) @@ -65,14 +62,14 @@ (if (string-null? (field ("IFNULL(GeneRIF_BASIC.GeneId, '')" geneWikiEntryP))) "" 'gn:geneWikiEntry)) - (set gn:wikiEntryOfSpecies - (field Species SpeciesName)) + (set gnt:wikiEntryOfSpecies + (string->binomial-name (field Species FullName))) ;; This only dumps symbols not present in the GeneRIF_BASIC table - (set gn:symbol (let ([geneid (field GeneRIF_BASIC GeneId)]) + (set gnt:symbol (let ([geneid (field GeneRIF_BASIC GeneId)]) (if (eq? geneid 0) (field GeneRIF symbol) ""))) - (multiset gn:geneWikiEntryOfGn + (multiset gnt:geneWikiEntryOfGn (let* ([entries (sanitize-rdf-string (field @@ -83,7 +80,7 @@ (match-lambda ((genecategory pmid email text createtime weburl) (blank-node - (set gn:geneCategory genecategory) + (set gnt:geneCategory genecategory) (multiset dct:source (map (lambda (el) (if (string-null? el) "" @@ -94,7 +91,7 @@ 'pre "" 'post)) - (set gn:geneWikiEntry + (set gnt:geneWikiEntry (annotate-field text '^^xsd:string)) (set dct:created (annotate-field createtime @@ -108,12 +105,12 @@ (tables (GeneRIF_BASIC) "GROUP BY GeneId, comment, createtime") (schema-triples - (gn:geneWikiEntryofNCBI rdfs:domain gn:geneWikiEntry)) + (gnt:geneWikiEntryofNCBI rdfs:domain gn:geneWikiEntry)) (triples (ontology 'generif: (field GeneRIF_BASIC GeneId)) - (set gn:geneWikiEntryOfNCBI + (set gnt:geneWikiEntryOfNCBI (blank-node - (set gn:geneWikiEntry + (set gnt:geneWikiEntry (annotate-field (field GeneRIF_BASIC comment) '^^xsd:string)) (multiset dct:source (map (lambda (el) (if (string-null? el) @@ -127,34 +124,27 @@ -(call-with-target-database - %connection-settings - (lambda (db) - (with-output-to-file (string-append %dump-directory "dump-generif.ttl") - (lambda () - (prefix "rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>") - (prefix "rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>") - (prefix "foaf:" "<http://xmlns.com/foaf/0.1/>") - (prefix "gn:" "<http://genenetwork.org/>") - (prefix "dct:" "<http://purl.org/dc/terms/>") - (prefix "pubmed:" "<http://rdf.ncbi.nlm.nih.gov/pubmed/>") - (prefix "up:" "<http://purl.uniprot.org/core/>") - (prefix "ncbiTaxon:" "<https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=>") - (prefix "generif:" "<http://www.ncbi.nlm.nih.gov/gene?cmd=Retrieve&dopt=Graphics&list_uids=>") - (prefix "xsd:" "<http://www.w3.org/2001/XMLSchema#>") - (prefix "owl:" "<http://www.w3.org/2002/07/owl#>") - (prefix "phenotype:" "<http://genenetwork.org/phenotype/>") - (prefix "molecularTrait:" "<http://genenetwork.org/molecular-trait/>") - (prefix "nuccore:" "<https://www.ncbi.nlm.nih.gov/nuccore/>") - (prefix "omim:" "<https://www.omim.org/entry/>") - (prefix "pubchem:" "<https://pubchem.ncbi.nlm.nih.gov/>") - (prefix "uniprot:" "<http://purl.uniprot.org/uniprot/>") - (prefix "hgnc:" "<http://bio2rdf.org/hgnc:>") - (prefix "homologene:" "<https://bio2rdf.org/homologene:>") - (prefix "chebi:" "<http://purl.obolibrary.org/obo/CHEBI_>") - (prefix "kegg:" "<http://bio2rdf.org/ns/kegg#>") - (newline) - (dump-genewiki-symbols db) - (dump-gn-genewiki-entries db) - (dump-ncbi-genewiki-entries db)) - #:encoding "utf8"))) +(dump-with-documentation + (name "GeneRIF Metadata") + (connection %connection-settings) + (table-metadata? #f) + (prefixes + '(("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>") + ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>") + ("gn:" "<http://genenetwork.org/id/>") + ("gnc:" "<http://genenetwork.org/category/>") + ("gnt:" "<http://genenetwork.org/term/>") + ("dct:" "<http://purl.org/dc/terms/>") + ("pubmed:" "<http://rdf.ncbi.nlm.nih.gov/pubmed/>") + ("ncbiTaxon:" "<https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=>") + ("generif:" "<http://www.ncbi.nlm.nih.gov/gene?cmd=Retrieve&dopt=Graphics&list_uids=>") + ("xsd:" "<http://www.w3.org/2001/XMLSchema#>") + ("owl:" "<http://www.w3.org/2002/07/owl#>"))) + (inputs + (list ;; dump-genewiki-symbols + dump-gn-genewiki-entries + ;; dump-ncbi-genewiki-entries + )) + (outputs + '(#:documentation "./docs/dump-generif.md" + #:rdf "./verified-data/dump-generif.ttl"))) diff --git a/examples/dump-genotype.scm b/examples/dump-genotype.scm index 88125fa..50cafb6 100755 --- a/examples/dump-genotype.scm +++ b/examples/dump-genotype.scm @@ -26,9 +26,9 @@ (left-join InbredSet "ON GenoFreeze.InbredSetId = InbredSet.InbredSetId")) "WHERE GenoFreeze.public > 0 AND GenoFreeze.confidentiality < 1 AND InfoFiles.InfoPageName IS NULL") (schema-triples - (gnt:datasetOfInbredSet rdfs:range gn:inbredSet) - (gn:genotypeDataset rdfs:subPropertyOf gn:dataset) - (gnt:shortName rdfs:range rdfs:Literal)) + (gnt:datasetOfInbredSet rdfs:subPropertyOf gnc:inbredSet) + (gnc:genotypeDataset rdfs:subPropertyOf gnc:dataset) + (gnt:shortName rdfs:subPropertyOf gnc:genotypeDataset)) (triples (string->identifier "" @@ -41,7 +41,7 @@ 'pre "_" 'post) #:separator "" #:proc string-capitalize-first) - (set rdf:type 'gn:genotypeDataset) + (set rdf:type 'gnc:genotypeDataset) (set gnt:name (field GenoFreeze Name)) (set gnt:fullName (field GenoFreeze FullName)) (set gnt:shortName (field GenoFreeze ShortName)) @@ -57,7 +57,7 @@ (left-join GenoFreeze "ON GenoFreeze.Id = GenoXRef.GenoFreezeId") (left-join InfoFiles "ON InfoFiles.InfoPageName = GenoFreeze.Name"))) (schema-triples - (gn:genotype rdfs:range rdfs:Literal) + (gnc:genotype rdfs:range rdfs:Literal) (gnt:genotypeDataset rdfs:subPropertyOf gn:dataset)) (triples (string->identifier @@ -68,7 +68,7 @@ 'pre "_" 'post) #:separator "" #:proc string-capitalize-first) - (set rdf:type 'gn:genotype) + (set rdf:type 'gnc:genotype) (set gnt:name (sanitize-rdf-string (field Geno Name))) (set gnt:markerName (sanitize-rdf-string (field Geno Marker_Name))) (set gnt:chr (field Geno Chr)) @@ -105,6 +105,7 @@ (prefixes '(("dct:" "<http://purl.org/dc/terms/>") ("gn:" "<http://genenetwork.org/id/>") + ("gnc:" "<http://genenetwork.org/category/>") ("gnt:" "<http://genenetwork.org/term/>") ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>") ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>") diff --git a/examples/dump-phenotype.scm b/examples/dump-phenotype.scm index 00f99d2..983756b 100755 --- a/examples/dump-phenotype.scm +++ b/examples/dump-phenotype.scm @@ -40,7 +40,7 @@ 'pre "_" 'post) #:separator "" #:proc string-capitalize-first) - (set rdf:type 'gn:phenotypeDataset) + (set rdf:type 'gnc:phenotypeDataset) (set gnt:name (field PublishFreeze Name)) (set gnt:fullName (field PublishFreeze FullName)) (set gnt:shortName (field PublishFreeze ShortName)) @@ -78,7 +78,7 @@ 'pre "_" 'post) #:separator "" #:proc string-capitalize-first) - (set rdf:type 'gn:phenotype) + (set rdf:type 'gnc:phenotype) (set gnt:name (sanitize-rdf-string (field ("CAST(CONVERT(BINARY CONVERT(IF(Phenotype.Post_publication_abbreviation IS NULL, IF(Phenotype.Pre_publication_abbreviation IS NULL, Phenotype.Id, Phenotype.Pre_publication_abbreviation), Phenotype.Post_publication_abbreviation) USING latin1) USING utf8) AS VARCHAR(100))" @@ -134,6 +134,7 @@ (prefixes '(("dct:" "<http://purl.org/dc/terms/>") ("gn:" "<http://genenetwork.org/id/>") + ("gnc:" "<http://genenetwork.org/category/>") ("gnt:" "<http://genenetwork.org/terms/>") ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>") ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>") diff --git a/examples/dump-probeset-metadata.scm b/examples/dump-probeset-metadata.scm index ddbea5e..37fef70 100755 --- a/examples/dump-probeset-metadata.scm +++ b/examples/dump-probeset-metadata.scm @@ -30,7 +30,7 @@ "probesetData" (field ("CONCAT(ProbeSetFreeze.Name,':',IFNULL(ProbeSet.Name, ProbeSet.Id))" ProbeSetName))) - (set rdf:type 'gn:probesetData) + (set rdf:type 'gnc:probesetData) (set gnt:hasProbeset (ontology 'probeset: @@ -79,6 +79,7 @@ (table-metadata? #f) (prefixes '(("gn:" "<http://genenetwork.org/id/>") + ("gnc:" "<http://genenetwork.org/category/>") ("gnt:" "<http://genenetwork.org/id/>") ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>") ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>") diff --git a/examples/dump-probesetfreeze.scm b/examples/dump-probesetfreeze.scm index 828ab00..30ea9f4 100755 --- a/examples/dump-probesetfreeze.scm +++ b/examples/dump-probesetfreeze.scm @@ -22,7 +22,7 @@ (schema-triples (gnt:name rdfs:range rdfs:Literal)) (triples (string->identifier "platform" (field GeneChip Name)) - (set rdf:type 'gn:platform) + (set rdf:type 'gnc:platform) (set gnt:name (field GeneChip GeneChipName)) (set gnt:geoPlatform (ontology 'geoSeries: @@ -50,7 +50,7 @@ 'pre "_" 'post) #:separator "" #:proc string-capitalize-first) - (set rdf:type 'gn:probesetDataset) + (set rdf:type 'gnc:probesetDataset) (set gnt:avgMethod (string->identifier "avgmethod" (field AvgMethod Name))) (set gnt:fullName (field ProbeSetFreeze FullName)) (set gnt:shortName (field ProbeSetFreeze ShortName)) @@ -71,6 +71,7 @@ (prefixes '(("geoSeries:" "<http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=>") ("gn:" "<http://genenetwork.org/id/>") + ("gnc:" "<http://genenetwork.org/category/>") ("dct:" "<>") ("gnt:" "<http://genenetwork.org/term/>") ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>") diff --git a/examples/dump-publication.scm b/examples/dump-publication.scm index 1384261..50e4358 100755 --- a/examples/dump-publication.scm +++ b/examples/dump-publication.scm @@ -39,7 +39,7 @@ (string->identifier "unpublished" (number->string publication-id)) (ontology 'pubmed: pmid))) - (set rdf:type 'gn:publication) + (set rdf:type 'gnc:publication) (set gnt:pubMedId (ontology 'pubmed: (field ("IFNULL(PubMed_ID, '')" pubmedId)))) (set gnt:title (delete-substrings (field Publication Title) @@ -70,6 +70,7 @@ (prefixes '(("gnt:" "<http://genenetwork.org/terms/>") ("gn:" "<http://genenetwork.org/id/>") + ("gnc:" "<http://genenetwork.org/category/>") ("pubmed:" "<http://rdf.ncbi.nlm.nih.gov/pubmed/>") ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>") ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>"))) diff --git a/examples/dump-tissue.scm b/examples/dump-tissue.scm index a9a50f3..dc76600 100755 --- a/examples/dump-tissue.scm +++ b/examples/dump-tissue.scm @@ -27,7 +27,7 @@ ;; Hopefully the Short_Name field is distinct and can be used as an ;; identifier. (triples (string->identifier "tissue" (field Tissue Short_Name)) - (set rdf:type 'gn:tissue) + (set rdf:type 'gnc:tissue) (set gnt:name (field Tissue Name)))) @@ -39,6 +39,7 @@ (prefixes '(("gn:" "<http://genenetwork.org/id/>") ("gnt:" "<http://genenetwork.org/terms/>") + ("gnc:" "<http://genenetwork.org/category/>") ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>") ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>"))) (inputs |