aboutsummaryrefslogtreecommitdiff
path: root/examples
diff options
context:
space:
mode:
authorMunyoki Kilyungi2023-07-30 12:27:50 +0300
committerBonfaceKilz2023-07-30 12:29:56 +0300
commit6f5cca6b5511ce5c80639e477ae10b4e70e2b178 (patch)
tree4fe411bad317a78ba41f863e07a4a10426210945 /examples
parent1d81a238403c29bb46fb2352505b05cf3c150787 (diff)
downloadgn-transform-databases-6f5cca6b5511ce5c80639e477ae10b4e70e2b178.tar.gz
Add gnc: prefix
Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
Diffstat (limited to 'examples')
-rwxr-xr-xexamples/dump-generif.scm102
-rwxr-xr-xexamples/dump-genotype.scm13
-rwxr-xr-xexamples/dump-phenotype.scm5
-rwxr-xr-xexamples/dump-probeset-metadata.scm3
-rwxr-xr-xexamples/dump-probesetfreeze.scm5
-rwxr-xr-xexamples/dump-publication.scm3
-rwxr-xr-xexamples/dump-tissue.scm3
7 files changed, 65 insertions, 69 deletions
diff --git a/examples/dump-generif.scm b/examples/dump-generif.scm
index b546f42..0689f57 100755
--- a/examples/dump-generif.scm
+++ b/examples/dump-generif.scm
@@ -16,9 +16,6 @@
(call-with-input-file (list-ref (command-line) 1)
read))
-(define %dump-directory
- (list-ref (command-line) 2))
-
(define-dump dump-genewiki-symbols
@@ -26,17 +23,17 @@
(left-join Species "USING (SpeciesId)"))
"GROUP BY GeneId ORDER BY BINARY symbol")
(schema-triples
- (gn:symbol rdfs:domain gn:geneWikiEntry)
- (gn:wikiEntryOfSpecies rdfs:range gn:species)
- (gn:taxid rdfs:domain gn:geneWikiEntry))
+ (gnt:symbol rdfs:domain gn-term:geneWikiEntry)
+ (gnt:wikiEntryOfSpecies rdfs:range gn:species)
+ (gnt:taxid rdfs:domain gn-term:geneWikiEntry))
(triples (ontology 'generif: (field GeneRIF_BASIC GeneId))
- (multiset gn:symbol (string-split (field ("GROUP_CONCAT(DISTINCT symbol)" symbol))
+ (multiset gnt:symbol (string-split (field ("GROUP_CONCAT(DISTINCT symbol)" symbol))
#\,))
- (multiset gn:wikiEntryOfSpecies
+ (multiset gnt:wikiEntryOfSpecies
(string-split
(field ("GROUP_CONCAT(DISTINCT Species.SpeciesName)" species))
#\,))
- (multiset gn:taxId (map (cut ontology 'ncbiTaxon: <>)
+ (multiset gnt:taxId (map (cut ontology 'ncbiTaxon: <>)
(string-split (field ("GROUP_CONCAT(DISTINCT TaxID)" taxId))
#\,)))))
@@ -48,16 +45,16 @@
(left-join GeneCategory "ON GeneRIFXRef.GeneCategoryId = GeneCategory.Id"))
"WHERE GeneRIF.display > 0 AND GeneRIF.VersionId = 0 GROUP BY GeneRIF.symbol")
(schema-triples
- (gn:geneWikiEntry a rdfs:Class)
- (gn:geneWikiEntry a owl:Class)
- (gn:geneWikiEntry rdfs:comment "Represents GeneRIF Entries")
- (gn:geneCategory rdfs:domain gn:geneWikiEntry)
- (gn:geneWikiEntryOfGn rdfs:domain gn:geneWikiEntry)
- (gn:geneWikiEntry rdfs:domain gn:geneWikiEntry))
+ (gnt:geneWikiEntry a rdfs:Class)
+ (gnt:geneWikiEntry a owl:Class)
+ (gnt:geneWikiEntry rdfs:comment "Represents GeneRIF Entries")
+ (gnt:geneCategory rdfs:domain gn:geneWikiEntry)
+ (gnt:geneWikiEntryOfGn rdfs:domain gn:geneWikiEntry)
+ (gnt:geneWikiEntry rdfs:domain gn:geneWikiEntry))
(triples
(let ([geneid (field GeneRIF_BASIC GeneId)])
(if (eq? geneid 0)
- (ontology 'gn:anonSymbol_
+ (ontology 'gnt:anonSymbol_
(field GeneRIF symbol))
(ontology 'generif:
geneid)))
@@ -65,14 +62,14 @@
(if (string-null? (field ("IFNULL(GeneRIF_BASIC.GeneId, '')" geneWikiEntryP)))
""
'gn:geneWikiEntry))
- (set gn:wikiEntryOfSpecies
- (field Species SpeciesName))
+ (set gnt:wikiEntryOfSpecies
+ (string->binomial-name (field Species FullName)))
;; This only dumps symbols not present in the GeneRIF_BASIC table
- (set gn:symbol (let ([geneid (field GeneRIF_BASIC GeneId)])
+ (set gnt:symbol (let ([geneid (field GeneRIF_BASIC GeneId)])
(if (eq? geneid 0)
(field GeneRIF symbol)
"")))
- (multiset gn:geneWikiEntryOfGn
+ (multiset gnt:geneWikiEntryOfGn
(let* ([entries
(sanitize-rdf-string
(field
@@ -83,7 +80,7 @@
(match-lambda
((genecategory pmid email text createtime weburl)
(blank-node
- (set gn:geneCategory genecategory)
+ (set gnt:geneCategory genecategory)
(multiset dct:source
(map (lambda (el) (if (string-null? el)
""
@@ -94,7 +91,7 @@
'pre
""
'post))
- (set gn:geneWikiEntry
+ (set gnt:geneWikiEntry
(annotate-field text '^^xsd:string))
(set dct:created (annotate-field
createtime
@@ -108,12 +105,12 @@
(tables (GeneRIF_BASIC)
"GROUP BY GeneId, comment, createtime")
(schema-triples
- (gn:geneWikiEntryofNCBI rdfs:domain gn:geneWikiEntry))
+ (gnt:geneWikiEntryofNCBI rdfs:domain gn:geneWikiEntry))
(triples (ontology 'generif:
(field GeneRIF_BASIC GeneId))
- (set gn:geneWikiEntryOfNCBI
+ (set gnt:geneWikiEntryOfNCBI
(blank-node
- (set gn:geneWikiEntry
+ (set gnt:geneWikiEntry
(annotate-field (field GeneRIF_BASIC comment)
'^^xsd:string))
(multiset dct:source (map (lambda (el) (if (string-null? el)
@@ -127,34 +124,27 @@
-(call-with-target-database
- %connection-settings
- (lambda (db)
- (with-output-to-file (string-append %dump-directory "dump-generif.ttl")
- (lambda ()
- (prefix "rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>")
- (prefix "rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>")
- (prefix "foaf:" "<http://xmlns.com/foaf/0.1/>")
- (prefix "gn:" "<http://genenetwork.org/>")
- (prefix "dct:" "<http://purl.org/dc/terms/>")
- (prefix "pubmed:" "<http://rdf.ncbi.nlm.nih.gov/pubmed/>")
- (prefix "up:" "<http://purl.uniprot.org/core/>")
- (prefix "ncbiTaxon:" "<https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=>")
- (prefix "generif:" "<http://www.ncbi.nlm.nih.gov/gene?cmd=Retrieve&dopt=Graphics&list_uids=>")
- (prefix "xsd:" "<http://www.w3.org/2001/XMLSchema#>")
- (prefix "owl:" "<http://www.w3.org/2002/07/owl#>")
- (prefix "phenotype:" "<http://genenetwork.org/phenotype/>")
- (prefix "molecularTrait:" "<http://genenetwork.org/molecular-trait/>")
- (prefix "nuccore:" "<https://www.ncbi.nlm.nih.gov/nuccore/>")
- (prefix "omim:" "<https://www.omim.org/entry/>")
- (prefix "pubchem:" "<https://pubchem.ncbi.nlm.nih.gov/>")
- (prefix "uniprot:" "<http://purl.uniprot.org/uniprot/>")
- (prefix "hgnc:" "<http://bio2rdf.org/hgnc:>")
- (prefix "homologene:" "<https://bio2rdf.org/homologene:>")
- (prefix "chebi:" "<http://purl.obolibrary.org/obo/CHEBI_>")
- (prefix "kegg:" "<http://bio2rdf.org/ns/kegg#>")
- (newline)
- (dump-genewiki-symbols db)
- (dump-gn-genewiki-entries db)
- (dump-ncbi-genewiki-entries db))
- #:encoding "utf8")))
+(dump-with-documentation
+ (name "GeneRIF Metadata")
+ (connection %connection-settings)
+ (table-metadata? #f)
+ (prefixes
+ '(("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>")
+ ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>")
+ ("gn:" "<http://genenetwork.org/id/>")
+ ("gnc:" "<http://genenetwork.org/category/>")
+ ("gnt:" "<http://genenetwork.org/term/>")
+ ("dct:" "<http://purl.org/dc/terms/>")
+ ("pubmed:" "<http://rdf.ncbi.nlm.nih.gov/pubmed/>")
+ ("ncbiTaxon:" "<https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=>")
+ ("generif:" "<http://www.ncbi.nlm.nih.gov/gene?cmd=Retrieve&dopt=Graphics&list_uids=>")
+ ("xsd:" "<http://www.w3.org/2001/XMLSchema#>")
+ ("owl:" "<http://www.w3.org/2002/07/owl#>")))
+ (inputs
+ (list ;; dump-genewiki-symbols
+ dump-gn-genewiki-entries
+ ;; dump-ncbi-genewiki-entries
+ ))
+ (outputs
+ '(#:documentation "./docs/dump-generif.md"
+ #:rdf "./verified-data/dump-generif.ttl")))
diff --git a/examples/dump-genotype.scm b/examples/dump-genotype.scm
index 88125fa..50cafb6 100755
--- a/examples/dump-genotype.scm
+++ b/examples/dump-genotype.scm
@@ -26,9 +26,9 @@
(left-join InbredSet "ON GenoFreeze.InbredSetId = InbredSet.InbredSetId"))
"WHERE GenoFreeze.public > 0 AND GenoFreeze.confidentiality < 1 AND InfoFiles.InfoPageName IS NULL")
(schema-triples
- (gnt:datasetOfInbredSet rdfs:range gn:inbredSet)
- (gn:genotypeDataset rdfs:subPropertyOf gn:dataset)
- (gnt:shortName rdfs:range rdfs:Literal))
+ (gnt:datasetOfInbredSet rdfs:subPropertyOf gnc:inbredSet)
+ (gnc:genotypeDataset rdfs:subPropertyOf gnc:dataset)
+ (gnt:shortName rdfs:subPropertyOf gnc:genotypeDataset))
(triples
(string->identifier
""
@@ -41,7 +41,7 @@
'pre "_" 'post)
#:separator ""
#:proc string-capitalize-first)
- (set rdf:type 'gn:genotypeDataset)
+ (set rdf:type 'gnc:genotypeDataset)
(set gnt:name (field GenoFreeze Name))
(set gnt:fullName (field GenoFreeze FullName))
(set gnt:shortName (field GenoFreeze ShortName))
@@ -57,7 +57,7 @@
(left-join GenoFreeze "ON GenoFreeze.Id = GenoXRef.GenoFreezeId")
(left-join InfoFiles "ON InfoFiles.InfoPageName = GenoFreeze.Name")))
(schema-triples
- (gn:genotype rdfs:range rdfs:Literal)
+ (gnc:genotype rdfs:range rdfs:Literal)
(gnt:genotypeDataset rdfs:subPropertyOf gn:dataset))
(triples
(string->identifier
@@ -68,7 +68,7 @@
'pre "_" 'post)
#:separator ""
#:proc string-capitalize-first)
- (set rdf:type 'gn:genotype)
+ (set rdf:type 'gnc:genotype)
(set gnt:name (sanitize-rdf-string (field Geno Name)))
(set gnt:markerName (sanitize-rdf-string (field Geno Marker_Name)))
(set gnt:chr (field Geno Chr))
@@ -105,6 +105,7 @@
(prefixes
'(("dct:" "<http://purl.org/dc/terms/>")
("gn:" "<http://genenetwork.org/id/>")
+ ("gnc:" "<http://genenetwork.org/category/>")
("gnt:" "<http://genenetwork.org/term/>")
("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>")
("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>")
diff --git a/examples/dump-phenotype.scm b/examples/dump-phenotype.scm
index 00f99d2..983756b 100755
--- a/examples/dump-phenotype.scm
+++ b/examples/dump-phenotype.scm
@@ -40,7 +40,7 @@
'pre "_" 'post)
#:separator ""
#:proc string-capitalize-first)
- (set rdf:type 'gn:phenotypeDataset)
+ (set rdf:type 'gnc:phenotypeDataset)
(set gnt:name (field PublishFreeze Name))
(set gnt:fullName (field PublishFreeze FullName))
(set gnt:shortName (field PublishFreeze ShortName))
@@ -78,7 +78,7 @@
'pre "_" 'post)
#:separator ""
#:proc string-capitalize-first)
- (set rdf:type 'gn:phenotype)
+ (set rdf:type 'gnc:phenotype)
(set gnt:name (sanitize-rdf-string
(field
("CAST(CONVERT(BINARY CONVERT(IF(Phenotype.Post_publication_abbreviation IS NULL, IF(Phenotype.Pre_publication_abbreviation IS NULL, Phenotype.Id, Phenotype.Pre_publication_abbreviation), Phenotype.Post_publication_abbreviation) USING latin1) USING utf8) AS VARCHAR(100))"
@@ -134,6 +134,7 @@
(prefixes
'(("dct:" "<http://purl.org/dc/terms/>")
("gn:" "<http://genenetwork.org/id/>")
+ ("gnc:" "<http://genenetwork.org/category/>")
("gnt:" "<http://genenetwork.org/terms/>")
("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>")
("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>")
diff --git a/examples/dump-probeset-metadata.scm b/examples/dump-probeset-metadata.scm
index ddbea5e..37fef70 100755
--- a/examples/dump-probeset-metadata.scm
+++ b/examples/dump-probeset-metadata.scm
@@ -30,7 +30,7 @@
"probesetData"
(field ("CONCAT(ProbeSetFreeze.Name,':',IFNULL(ProbeSet.Name, ProbeSet.Id))"
ProbeSetName)))
- (set rdf:type 'gn:probesetData)
+ (set rdf:type 'gnc:probesetData)
(set gnt:hasProbeset
(ontology
'probeset:
@@ -79,6 +79,7 @@
(table-metadata? #f)
(prefixes
'(("gn:" "<http://genenetwork.org/id/>")
+ ("gnc:" "<http://genenetwork.org/category/>")
("gnt:" "<http://genenetwork.org/id/>")
("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>")
("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>")
diff --git a/examples/dump-probesetfreeze.scm b/examples/dump-probesetfreeze.scm
index 828ab00..30ea9f4 100755
--- a/examples/dump-probesetfreeze.scm
+++ b/examples/dump-probesetfreeze.scm
@@ -22,7 +22,7 @@
(schema-triples
(gnt:name rdfs:range rdfs:Literal))
(triples (string->identifier "platform" (field GeneChip Name))
- (set rdf:type 'gn:platform)
+ (set rdf:type 'gnc:platform)
(set gnt:name (field GeneChip GeneChipName))
(set gnt:geoPlatform
(ontology 'geoSeries:
@@ -50,7 +50,7 @@
'pre "_" 'post)
#:separator ""
#:proc string-capitalize-first)
- (set rdf:type 'gn:probesetDataset)
+ (set rdf:type 'gnc:probesetDataset)
(set gnt:avgMethod (string->identifier "avgmethod" (field AvgMethod Name)))
(set gnt:fullName (field ProbeSetFreeze FullName))
(set gnt:shortName (field ProbeSetFreeze ShortName))
@@ -71,6 +71,7 @@
(prefixes
'(("geoSeries:" "<http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=>")
("gn:" "<http://genenetwork.org/id/>")
+ ("gnc:" "<http://genenetwork.org/category/>")
("dct:" "<>")
("gnt:" "<http://genenetwork.org/term/>")
("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>")
diff --git a/examples/dump-publication.scm b/examples/dump-publication.scm
index 1384261..50e4358 100755
--- a/examples/dump-publication.scm
+++ b/examples/dump-publication.scm
@@ -39,7 +39,7 @@
(string->identifier "unpublished"
(number->string publication-id))
(ontology 'pubmed: pmid)))
- (set rdf:type 'gn:publication)
+ (set rdf:type 'gnc:publication)
(set gnt:pubMedId
(ontology 'pubmed: (field ("IFNULL(PubMed_ID, '')" pubmedId))))
(set gnt:title (delete-substrings (field Publication Title)
@@ -70,6 +70,7 @@
(prefixes
'(("gnt:" "<http://genenetwork.org/terms/>")
("gn:" "<http://genenetwork.org/id/>")
+ ("gnc:" "<http://genenetwork.org/category/>")
("pubmed:" "<http://rdf.ncbi.nlm.nih.gov/pubmed/>")
("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>")
("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>")))
diff --git a/examples/dump-tissue.scm b/examples/dump-tissue.scm
index a9a50f3..dc76600 100755
--- a/examples/dump-tissue.scm
+++ b/examples/dump-tissue.scm
@@ -27,7 +27,7 @@
;; Hopefully the Short_Name field is distinct and can be used as an
;; identifier.
(triples (string->identifier "tissue" (field Tissue Short_Name))
- (set rdf:type 'gn:tissue)
+ (set rdf:type 'gnc:tissue)
(set gnt:name (field Tissue Name))))
@@ -39,6 +39,7 @@
(prefixes
'(("gn:" "<http://genenetwork.org/id/>")
("gnt:" "<http://genenetwork.org/terms/>")
+ ("gnc:" "<http://genenetwork.org/category/>")
("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>")
("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>")))
(inputs