From 67aa8fc2eb5e7267d1799df1be94fbcec71475c8 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Sun, 30 Jul 2023 12:27:50 +0300 Subject: Add gnc: prefix Signed-off-by: Munyoki Kilyungi --- examples/dump-generif.scm | 102 ++++++++++++++++-------------------- examples/dump-genotype.scm | 13 ++--- examples/dump-phenotype.scm | 5 +- examples/dump-probeset-metadata.scm | 3 +- examples/dump-probesetfreeze.scm | 5 +- examples/dump-publication.scm | 3 +- examples/dump-tissue.scm | 3 +- 7 files changed, 65 insertions(+), 69 deletions(-) diff --git a/examples/dump-generif.scm b/examples/dump-generif.scm index b546f42..0689f57 100755 --- a/examples/dump-generif.scm +++ b/examples/dump-generif.scm @@ -16,9 +16,6 @@ (call-with-input-file (list-ref (command-line) 1) read)) -(define %dump-directory - (list-ref (command-line) 2)) - (define-dump dump-genewiki-symbols @@ -26,17 +23,17 @@ (left-join Species "USING (SpeciesId)")) "GROUP BY GeneId ORDER BY BINARY symbol") (schema-triples - (gn:symbol rdfs:domain gn:geneWikiEntry) - (gn:wikiEntryOfSpecies rdfs:range gn:species) - (gn:taxid rdfs:domain gn:geneWikiEntry)) + (gnt:symbol rdfs:domain gn-term:geneWikiEntry) + (gnt:wikiEntryOfSpecies rdfs:range gn:species) + (gnt:taxid rdfs:domain gn-term:geneWikiEntry)) (triples (ontology 'generif: (field GeneRIF_BASIC GeneId)) - (multiset gn:symbol (string-split (field ("GROUP_CONCAT(DISTINCT symbol)" symbol)) + (multiset gnt:symbol (string-split (field ("GROUP_CONCAT(DISTINCT symbol)" symbol)) #\,)) - (multiset gn:wikiEntryOfSpecies + (multiset gnt:wikiEntryOfSpecies (string-split (field ("GROUP_CONCAT(DISTINCT Species.SpeciesName)" species)) #\,)) - (multiset gn:taxId (map (cut ontology 'ncbiTaxon: <>) + (multiset gnt:taxId (map (cut ontology 'ncbiTaxon: <>) (string-split (field ("GROUP_CONCAT(DISTINCT TaxID)" taxId)) #\,))))) @@ -48,16 +45,16 @@ (left-join GeneCategory "ON GeneRIFXRef.GeneCategoryId = GeneCategory.Id")) "WHERE GeneRIF.display > 0 AND GeneRIF.VersionId = 0 GROUP BY GeneRIF.symbol") (schema-triples - (gn:geneWikiEntry a rdfs:Class) - (gn:geneWikiEntry a owl:Class) - (gn:geneWikiEntry rdfs:comment "Represents GeneRIF Entries") - (gn:geneCategory rdfs:domain gn:geneWikiEntry) - (gn:geneWikiEntryOfGn rdfs:domain gn:geneWikiEntry) - (gn:geneWikiEntry rdfs:domain gn:geneWikiEntry)) + (gnt:geneWikiEntry a rdfs:Class) + (gnt:geneWikiEntry a owl:Class) + (gnt:geneWikiEntry rdfs:comment "Represents GeneRIF Entries") + (gnt:geneCategory rdfs:domain gn:geneWikiEntry) + (gnt:geneWikiEntryOfGn rdfs:domain gn:geneWikiEntry) + (gnt:geneWikiEntry rdfs:domain gn:geneWikiEntry)) (triples (let ([geneid (field GeneRIF_BASIC GeneId)]) (if (eq? geneid 0) - (ontology 'gn:anonSymbol_ + (ontology 'gnt:anonSymbol_ (field GeneRIF symbol)) (ontology 'generif: geneid))) @@ -65,14 +62,14 @@ (if (string-null? (field ("IFNULL(GeneRIF_BASIC.GeneId, '')" geneWikiEntryP))) "" 'gn:geneWikiEntry)) - (set gn:wikiEntryOfSpecies - (field Species SpeciesName)) + (set gnt:wikiEntryOfSpecies + (string->binomial-name (field Species FullName))) ;; This only dumps symbols not present in the GeneRIF_BASIC table - (set gn:symbol (let ([geneid (field GeneRIF_BASIC GeneId)]) + (set gnt:symbol (let ([geneid (field GeneRIF_BASIC GeneId)]) (if (eq? geneid 0) (field GeneRIF symbol) ""))) - (multiset gn:geneWikiEntryOfGn + (multiset gnt:geneWikiEntryOfGn (let* ([entries (sanitize-rdf-string (field @@ -83,7 +80,7 @@ (match-lambda ((genecategory pmid email text createtime weburl) (blank-node - (set gn:geneCategory genecategory) + (set gnt:geneCategory genecategory) (multiset dct:source (map (lambda (el) (if (string-null? el) "" @@ -94,7 +91,7 @@ 'pre "" 'post)) - (set gn:geneWikiEntry + (set gnt:geneWikiEntry (annotate-field text '^^xsd:string)) (set dct:created (annotate-field createtime @@ -108,12 +105,12 @@ (tables (GeneRIF_BASIC) "GROUP BY GeneId, comment, createtime") (schema-triples - (gn:geneWikiEntryofNCBI rdfs:domain gn:geneWikiEntry)) + (gnt:geneWikiEntryofNCBI rdfs:domain gn:geneWikiEntry)) (triples (ontology 'generif: (field GeneRIF_BASIC GeneId)) - (set gn:geneWikiEntryOfNCBI + (set gnt:geneWikiEntryOfNCBI (blank-node - (set gn:geneWikiEntry + (set gnt:geneWikiEntry (annotate-field (field GeneRIF_BASIC comment) '^^xsd:string)) (multiset dct:source (map (lambda (el) (if (string-null? el) @@ -127,34 +124,27 @@ -(call-with-target-database - %connection-settings - (lambda (db) - (with-output-to-file (string-append %dump-directory "dump-generif.ttl") - (lambda () - (prefix "rdf:" "") - (prefix "rdfs:" "") - (prefix "foaf:" "") - (prefix "gn:" "") - (prefix "dct:" "") - (prefix "pubmed:" "") - (prefix "up:" "") - (prefix "ncbiTaxon:" "") - (prefix "generif:" "") - (prefix "xsd:" "") - (prefix "owl:" "") - (prefix "phenotype:" "") - (prefix "molecularTrait:" "") - (prefix "nuccore:" "") - (prefix "omim:" "") - (prefix "pubchem:" "") - (prefix "uniprot:" "") - (prefix "hgnc:" "") - (prefix "homologene:" "") - (prefix "chebi:" "") - (prefix "kegg:" "") - (newline) - (dump-genewiki-symbols db) - (dump-gn-genewiki-entries db) - (dump-ncbi-genewiki-entries db)) - #:encoding "utf8"))) +(dump-with-documentation + (name "GeneRIF Metadata") + (connection %connection-settings) + (table-metadata? #f) + (prefixes + '(("rdf:" "") + ("rdfs:" "") + ("gn:" "") + ("gnc:" "") + ("gnt:" "") + ("dct:" "") + ("pubmed:" "") + ("ncbiTaxon:" "") + ("generif:" "") + ("xsd:" "") + ("owl:" ""))) + (inputs + (list ;; dump-genewiki-symbols + dump-gn-genewiki-entries + ;; dump-ncbi-genewiki-entries + )) + (outputs + '(#:documentation "./docs/dump-generif.md" + #:rdf "./verified-data/dump-generif.ttl"))) diff --git a/examples/dump-genotype.scm b/examples/dump-genotype.scm index 88125fa..50cafb6 100755 --- a/examples/dump-genotype.scm +++ b/examples/dump-genotype.scm @@ -26,9 +26,9 @@ (left-join InbredSet "ON GenoFreeze.InbredSetId = InbredSet.InbredSetId")) "WHERE GenoFreeze.public > 0 AND GenoFreeze.confidentiality < 1 AND InfoFiles.InfoPageName IS NULL") (schema-triples - (gnt:datasetOfInbredSet rdfs:range gn:inbredSet) - (gn:genotypeDataset rdfs:subPropertyOf gn:dataset) - (gnt:shortName rdfs:range rdfs:Literal)) + (gnt:datasetOfInbredSet rdfs:subPropertyOf gnc:inbredSet) + (gnc:genotypeDataset rdfs:subPropertyOf gnc:dataset) + (gnt:shortName rdfs:subPropertyOf gnc:genotypeDataset)) (triples (string->identifier "" @@ -41,7 +41,7 @@ 'pre "_" 'post) #:separator "" #:proc string-capitalize-first) - (set rdf:type 'gn:genotypeDataset) + (set rdf:type 'gnc:genotypeDataset) (set gnt:name (field GenoFreeze Name)) (set gnt:fullName (field GenoFreeze FullName)) (set gnt:shortName (field GenoFreeze ShortName)) @@ -57,7 +57,7 @@ (left-join GenoFreeze "ON GenoFreeze.Id = GenoXRef.GenoFreezeId") (left-join InfoFiles "ON InfoFiles.InfoPageName = GenoFreeze.Name"))) (schema-triples - (gn:genotype rdfs:range rdfs:Literal) + (gnc:genotype rdfs:range rdfs:Literal) (gnt:genotypeDataset rdfs:subPropertyOf gn:dataset)) (triples (string->identifier @@ -68,7 +68,7 @@ 'pre "_" 'post) #:separator "" #:proc string-capitalize-first) - (set rdf:type 'gn:genotype) + (set rdf:type 'gnc:genotype) (set gnt:name (sanitize-rdf-string (field Geno Name))) (set gnt:markerName (sanitize-rdf-string (field Geno Marker_Name))) (set gnt:chr (field Geno Chr)) @@ -105,6 +105,7 @@ (prefixes '(("dct:" "") ("gn:" "") + ("gnc:" "") ("gnt:" "") ("rdf:" "") ("rdfs:" "") diff --git a/examples/dump-phenotype.scm b/examples/dump-phenotype.scm index 00f99d2..983756b 100755 --- a/examples/dump-phenotype.scm +++ b/examples/dump-phenotype.scm @@ -40,7 +40,7 @@ 'pre "_" 'post) #:separator "" #:proc string-capitalize-first) - (set rdf:type 'gn:phenotypeDataset) + (set rdf:type 'gnc:phenotypeDataset) (set gnt:name (field PublishFreeze Name)) (set gnt:fullName (field PublishFreeze FullName)) (set gnt:shortName (field PublishFreeze ShortName)) @@ -78,7 +78,7 @@ 'pre "_" 'post) #:separator "" #:proc string-capitalize-first) - (set rdf:type 'gn:phenotype) + (set rdf:type 'gnc:phenotype) (set gnt:name (sanitize-rdf-string (field ("CAST(CONVERT(BINARY CONVERT(IF(Phenotype.Post_publication_abbreviation IS NULL, IF(Phenotype.Pre_publication_abbreviation IS NULL, Phenotype.Id, Phenotype.Pre_publication_abbreviation), Phenotype.Post_publication_abbreviation) USING latin1) USING utf8) AS VARCHAR(100))" @@ -134,6 +134,7 @@ (prefixes '(("dct:" "") ("gn:" "") + ("gnc:" "") ("gnt:" "") ("rdf:" "") ("rdfs:" "") diff --git a/examples/dump-probeset-metadata.scm b/examples/dump-probeset-metadata.scm index ddbea5e..37fef70 100755 --- a/examples/dump-probeset-metadata.scm +++ b/examples/dump-probeset-metadata.scm @@ -30,7 +30,7 @@ "probesetData" (field ("CONCAT(ProbeSetFreeze.Name,':',IFNULL(ProbeSet.Name, ProbeSet.Id))" ProbeSetName))) - (set rdf:type 'gn:probesetData) + (set rdf:type 'gnc:probesetData) (set gnt:hasProbeset (ontology 'probeset: @@ -79,6 +79,7 @@ (table-metadata? #f) (prefixes '(("gn:" "") + ("gnc:" "") ("gnt:" "") ("rdf:" "") ("rdfs:" "") diff --git a/examples/dump-probesetfreeze.scm b/examples/dump-probesetfreeze.scm index 828ab00..30ea9f4 100755 --- a/examples/dump-probesetfreeze.scm +++ b/examples/dump-probesetfreeze.scm @@ -22,7 +22,7 @@ (schema-triples (gnt:name rdfs:range rdfs:Literal)) (triples (string->identifier "platform" (field GeneChip Name)) - (set rdf:type 'gn:platform) + (set rdf:type 'gnc:platform) (set gnt:name (field GeneChip GeneChipName)) (set gnt:geoPlatform (ontology 'geoSeries: @@ -50,7 +50,7 @@ 'pre "_" 'post) #:separator "" #:proc string-capitalize-first) - (set rdf:type 'gn:probesetDataset) + (set rdf:type 'gnc:probesetDataset) (set gnt:avgMethod (string->identifier "avgmethod" (field AvgMethod Name))) (set gnt:fullName (field ProbeSetFreeze FullName)) (set gnt:shortName (field ProbeSetFreeze ShortName)) @@ -71,6 +71,7 @@ (prefixes '(("geoSeries:" "") ("gn:" "") + ("gnc:" "") ("dct:" "<>") ("gnt:" "") ("rdf:" "") diff --git a/examples/dump-publication.scm b/examples/dump-publication.scm index 1384261..50e4358 100755 --- a/examples/dump-publication.scm +++ b/examples/dump-publication.scm @@ -39,7 +39,7 @@ (string->identifier "unpublished" (number->string publication-id)) (ontology 'pubmed: pmid))) - (set rdf:type 'gn:publication) + (set rdf:type 'gnc:publication) (set gnt:pubMedId (ontology 'pubmed: (field ("IFNULL(PubMed_ID, '')" pubmedId)))) (set gnt:title (delete-substrings (field Publication Title) @@ -70,6 +70,7 @@ (prefixes '(("gnt:" "") ("gn:" "") + ("gnc:" "") ("pubmed:" "") ("rdfs:" "") ("rdf:" ""))) diff --git a/examples/dump-tissue.scm b/examples/dump-tissue.scm index a9a50f3..dc76600 100755 --- a/examples/dump-tissue.scm +++ b/examples/dump-tissue.scm @@ -27,7 +27,7 @@ ;; Hopefully the Short_Name field is distinct and can be used as an ;; identifier. (triples (string->identifier "tissue" (field Tissue Short_Name)) - (set rdf:type 'gn:tissue) + (set rdf:type 'gnc:tissue) (set gnt:name (field Tissue Name)))) @@ -39,6 +39,7 @@ (prefixes '(("gn:" "") ("gnt:" "") + ("gnc:" "") ("rdf:" "") ("rdfs:" ""))) (inputs -- cgit v1.2.3