From bcfd39682bdf3a0a8ea5c1efaf0c33a9367991ad Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Mon, 17 Jul 2023 21:27:09 +0300 Subject: Dump species metadata with documentation Signed-off-by: Munyoki Kilyungi --- examples/dump-species-metadata.scm | 57 +++++++++++++------------------------- 1 file changed, 19 insertions(+), 38 deletions(-) (limited to 'examples') diff --git a/examples/dump-species-metadata.scm b/examples/dump-species-metadata.scm index 2937c80..4ea9ce7 100755 --- a/examples/dump-species-metadata.scm +++ b/examples/dump-species-metadata.scm @@ -16,9 +16,6 @@ (call-with-input-file (list-ref (command-line) 1) read)) -(define %dump-directory - (list-ref (command-line) 2)) - (define-dump dump-species @@ -34,11 +31,11 @@ (set gn:displayName (field Species MenuName)) (set gn:binomialName (field Species FullName)) (set gn:family (field Species Family)) - (set gn:organism (ontology 'ncbiTaxon: (field Species TaxonomyId))))) + (set gn:organism (ontology 'taxon: (field Species TaxonomyId))))) (define-dump dump-strain (tables (Strain - (join Species "ON Strain.SpeciesId = Species.SpeciesId"))) + (left-join Species "ON Strain.SpeciesId = Species.SpeciesId"))) (schema-triples (gn:strainOfSpecies rdfs:domain gn:strain) (gn:strainOfSpecies rdfs:range gn:species) @@ -106,36 +103,20 @@ -(call-with-target-database - %connection-settings - (lambda (db) - (with-output-to-file (string-append %dump-directory "dump-species-metadata.ttl") - (lambda () - (prefix "chebi:" "") - (prefix "dct:" "") - (prefix "foaf:" "") - (prefix "generif:" "") - (prefix "gn:" "") - (prefix "hgnc:" "") - (prefix "homologene:" "") - (prefix "kegg:" "") - (prefix "molecularTrait:" "") - (prefix "nuccore:" "") - (prefix "omim:" "") - (prefix "owl:" "") - (prefix "phenotype:" "") - (prefix "pubchem:" "") - (prefix "pubmed:" "") - (prefix "rdf:" "") - (prefix "rdfs:" "") - (prefix "ncbiTaxon:" "") - (prefix "uniprot:" "") - (prefix "up:" "") - (prefix "xsd:" "") - (newline) - (dump-species db) - (dump-strain db) - (dump-mapping-method db) - (dump-inbred-set db) - (dump-avg-method db)) - #:encoding "utf8"))) +(dump-with-documentation + (name "Species Metadata") + (connection %connection-settings) + (table-metadata? #f) + (prefixes + (("rdf:" "") + ("rdfs:" "") + ("gn:" "") + ("taxon:" ""))) + (inputs + (dump-species + dump-strain + dump-mapping-method + dump-avg-method)) + (outputs + (#:documentation "docs/dump-species-metadata.md" + #:rdf "./verified-data/dump-species-metadata.ttl"))) -- cgit v1.2.3 From 3494fa35f332e0da6e4d2c76eac0a286fa4f5646 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Mon, 17 Jul 2023 23:03:07 +0300 Subject: Dump tissue metadata with documentation Signed-off-by: Munyoki Kilyungi --- examples/dump-tissue.scm | 57 ++++++++++++++++-------------------------------- 1 file changed, 19 insertions(+), 38 deletions(-) (limited to 'examples') diff --git a/examples/dump-tissue.scm b/examples/dump-tissue.scm index b1104ab..4998cff 100755 --- a/examples/dump-tissue.scm +++ b/examples/dump-tissue.scm @@ -13,53 +13,34 @@ (define %connection-settings - (call-with-input-file (list-ref (command-line) 1) - read)) - -(define %dump-directory - (list-ref (command-line) 2)) + (call-with-input-file (list-ref (command-line) 1) + read)) (define-dump dump-tissue - ;; The Name and TissueName fields seem to be identical. BIRN_lex_ID - ;; and BIRN_lex_Name are mostly NULL. - (tables (Tissue)) + ;; The Name and TissueName fields seem to be identical. BIRN_lex_ID + ;; and BIRN_lex_Name are mostly NULL. + (tables (Tissue)) (schema-triples (gn:name rdfs:range rdfs:Literal)) ;; Hopefully the Short_Name field is distinct and can be used as an ;; identifier. (triples (string->identifier "tissue" (field Tissue Short_Name)) - (set rdf:type 'gn:tissue) - (set gn:name (field Tissue Name)))) + (set rdf:type 'gn:tissue) + (set gn:name (field Tissue Name)))) -(call-with-target-database - %connection-settings - (lambda (db) - (with-output-to-file (string-append %dump-directory "dump-tissue.ttl") - (lambda () - (prefix "chebi:" "") - (prefix "dct:" "") - (prefix "foaf:" "") - (prefix "generif:" "") - (prefix "gn:" "") - (prefix "hgnc:" "") - (prefix "homologene:" "") - (prefix "kegg:" "") - (prefix "molecularTrait:" "") - (prefix "nuccore:" "") - (prefix "omim:" "") - (prefix "owl:" "") - (prefix "phenotype:" "") - (prefix "pubchem:" "") - (prefix "pubmed:" "") - (prefix "rdf:" "") - (prefix "rdfs:" "") - (prefix "uniprot:" "") - (prefix "up:" "") - (prefix "xsd:" "") - (newline) - (dump-tissue db)) - #:encoding "utf8"))) +(dump-with-documentation + (name "Tissue Metadata") + (connection %connection-settings) + (table-metadata? #f) + (prefixes + (("rdf:" "") + ("rdfs:" "") + ("gn:" ""))) + (inputs + (dump-tissue)) + (outputs + (#:documentation "./docs/dump-tissue.md" #:rdf "./verified-data/dump-tissue.ttl"))) -- cgit v1.2.3 From 47a4af4abad4f872db2832adb61f0bc60ac0ee5a Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Mon, 17 Jul 2023 23:03:21 +0300 Subject: Dump publication metadata with documentation Signed-off-by: Munyoki Kilyungi --- examples/dump-publication.scm | 51 +++++++++++++++---------------------------- 1 file changed, 17 insertions(+), 34 deletions(-) (limited to 'examples') diff --git a/examples/dump-publication.scm b/examples/dump-publication.scm index 784d815..ff46d3d 100755 --- a/examples/dump-publication.scm +++ b/examples/dump-publication.scm @@ -13,11 +13,8 @@ (define %connection-settings - (call-with-input-file (list-ref (command-line) 1) - read)) - -(define %dump-directory - (list-ref (command-line) 2)) + (call-with-input-file (list-ref (command-line) 1) + read)) @@ -65,32 +62,18 @@ -(call-with-target-database - %connection-settings - (lambda (db) - (with-output-to-file (string-append %dump-directory "dump-publication.ttl") - (lambda () - (prefix "chebi:" "") - (prefix "dct:" "") - (prefix "foaf:" "") - (prefix "generif:" "") - (prefix "gn:" "") - (prefix "hgnc:" "") - (prefix "homologene:" "") - (prefix "kegg:" "") - (prefix "molecularTrait:" "") - (prefix "nuccore:" "") - (prefix "omim:" "") - (prefix "owl:" "") - (prefix "phenotype:" "") - (prefix "pubchem:" "") - (prefix "pubmed:" "") - (prefix "rdf:" "") - (prefix "rdfs:" "") - (prefix "uniprot:" "") - (prefix "up:" "") - (prefix "xsd:" "") - (prefix "publication:" "") - (newline) - (dump-publication db)) - #:encoding "utf8"))) +(dump-with-documentation + (name "Publications Metadata") + (connection %connection-settings) + (table-metadata? #f) + (prefixes + (("rdf:" "") + ("rdfs:" "") + ("gn:" "") + ("publication:" "") + ("pubmed:" ""))) + (inputs + (dump-publication)) + (outputs + (#:documentation "./docs/dump-publication.md" + #:rdf "./verified-data/dump-publication.md"))) -- cgit v1.2.3 From 3453fed05b3222fab7a153ba378e833f79f02924 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Mon, 17 Jul 2023 23:03:39 +0300 Subject: Dump dataset metadata with documentation Signed-off-by: Munyoki Kilyungi --- examples/dump-dataset-metadata.scm | 50 ++++++++++++++++---------------------- 1 file changed, 21 insertions(+), 29 deletions(-) (limited to 'examples') diff --git a/examples/dump-dataset-metadata.scm b/examples/dump-dataset-metadata.scm index 789e298..53c381c 100755 --- a/examples/dump-dataset-metadata.scm +++ b/examples/dump-dataset-metadata.scm @@ -16,9 +16,6 @@ (call-with-input-file (list-ref (command-line) 1) read)) -(define %dump-directory - (list-ref (command-line) 2)) - ;; One email ID in the Investigators table has spaces in it. This @@ -150,8 +147,7 @@ (field Investigators Email))) (set gn:datasetOfOrganization (field ("CAST(CONVERT(BINARY CONVERT(Organizations.OrganizationName USING latin1) USING utf8) AS VARCHAR(1500))" Organizations))) - (set gn:accessionId (string-append "GN" (number->string - (field InfoFiles GN_AccesionId)))) + (set gn:accessionId (format #f "GN~a" (field InfoFiles GN_AccesionId))) (set gn:datasetStatusName (string-downcase (field DatasetStatus DatasetStatusName))) (set gn:datasetOfInbredSet @@ -234,27 +230,23 @@ -(call-with-target-database - %connection-settings - (lambda (db) - (with-output-to-file (string-append %dump-directory "dump-info-pages.ttl") - (lambda () - (prefix "dct:" "") - (prefix "foaf:" "") - (prefix "generif:" "") - (prefix "geoSeries:" "") - (prefix "gn:" "") - (prefix "owl:" "") - (prefix "phenotype:" "") - (prefix "pubmed:" "") - (prefix "rdf:" "") - (prefix "rdfs:" "") - (prefix "uniprot:" "") - (prefix "up:" "") - (prefix "xsd:" "") - (prefix "probeset:" "") - (prefix "dataset:" "") - (newline) - (dump-info-files db) - (dump-investigators db)) - #:encoding "utf8"))) +(dump-with-documentation + (name "Info files / Investigators Metadata") + (connection %connection-settings) + (table-metadata? #f) + (prefixes + (("dct:" "") + ("geoSeries:" "") + ("rdf:" "") + ("rdfs:" "") + ("gn:" "") + ("foaf:" "") + ("taxon:" "") + ("dataset:" ""))) + (inputs + (dump-info-files + dump-investigators)) + (outputs + (#:documentation "./docs/dump-info-pages.md" + #:rdf "./verified-data/dump-info-pages.ttl"))) + -- cgit v1.2.3 From dfa1e64260d08cae88beb210569a5d0e231dc040 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Tue, 18 Jul 2023 16:18:54 +0300 Subject: Capitalize species identifier Replace gn:species_mus_musculus with gn:species:Mus_musculus. --- examples/dump-species-metadata.scm | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'examples') diff --git a/examples/dump-species-metadata.scm b/examples/dump-species-metadata.scm index 4ea9ce7..898aa5a 100755 --- a/examples/dump-species-metadata.scm +++ b/examples/dump-species-metadata.scm @@ -25,7 +25,11 @@ (gn:displayName rdfs:range rdfs:Literal) (gn:binomialName rdfs:range rdfs:Literal) (gn:family rdfs:range rdfs:Literal)) - (triples (string->identifier "species" (field Species FullName)) + (triples + (string->identifier "" (field Species FullName) + #:ontology "gn:species:" + #:separator "" + #:proc string-capitalize-first) (set rdf:type 'gn:species) (set gn:name (field Species SpeciesName)) (set gn:displayName (field Species MenuName)) @@ -50,7 +54,10 @@ 'pre "_" 'post)) (set rdf:type 'gn:strain) (set gn:strainOfSpecies - (string->identifier "species" (field Species FullName))) + (string->identifier "" (field Species FullName) + #:ontology "gn:species:" + #:separator "" + #:proc string-capitalize-first)) ;; Name, and maybe a second name (set gn:name (sanitize-rdf-string (field Strain Name))) (set gn:name (sanitize-rdf-string (field Strain Name2))) @@ -85,7 +92,10 @@ (set gn:inbredSetOfMappingMethod (field MappingMethod Name)) (set gn:inbredSetCode (field InbredSet InbredSetCode)) (set gn:inbredSetOfSpecies - (string->identifier "species" (field Species FullName BinomialName))) + (string->identifier "" (field Species FullName BinomialName) + #:ontology "gn:species:" + #:separator "" + #:proc string-capitalize-first)) (set gn:genotype (field ("IF ((SELECT PublishFreeze.Name FROM PublishFreeze WHERE PublishFreeze.InbredSetId = InbredSet.Id LIMIT 1) IS NOT NULL, 'Traits and Cofactors', '')" genotypeP))) (set gn:phenotype -- cgit v1.2.3 From 4485d3e9c043bf6b3952f83175b358edf0ef63b3 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Tue, 18 Jul 2023 16:27:58 +0300 Subject: Add "gn:species:" prefix Signed-off-by: Munyoki Kilyungi --- examples/dump-species-metadata.scm | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'examples') diff --git a/examples/dump-species-metadata.scm b/examples/dump-species-metadata.scm index 898aa5a..653fb11 100755 --- a/examples/dump-species-metadata.scm +++ b/examples/dump-species-metadata.scm @@ -118,10 +118,11 @@ (connection %connection-settings) (table-metadata? #f) (prefixes - (("rdf:" "") - ("rdfs:" "") - ("gn:" "") - ("taxon:" ""))) + '(("rdf:" "") + ("rdfs:" "") + ("gn:" "") + ("gn:species:" "") + ("taxon:" ""))) (inputs (dump-species dump-strain -- cgit v1.2.3 From 5ed67ee1ce654a545ec11481ec0e3a78cda89a75 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Tue, 18 Jul 2023 16:29:07 +0300 Subject: Update dump-species to have lists for inputs and output Signed-off-by: Munyoki Kilyungi --- examples/dump-species-metadata.scm | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'examples') diff --git a/examples/dump-species-metadata.scm b/examples/dump-species-metadata.scm index 653fb11..7c6bfc3 100755 --- a/examples/dump-species-metadata.scm +++ b/examples/dump-species-metadata.scm @@ -124,10 +124,10 @@ ("gn:species:" "") ("taxon:" ""))) (inputs - (dump-species - dump-strain - dump-mapping-method - dump-avg-method)) + (list dump-species + dump-strain + dump-mapping-method + dump-avg-method)) (outputs - (#:documentation "docs/dump-species-metadata.md" - #:rdf "./verified-data/dump-species-metadata.ttl"))) + '(#:documentation "./docs/dump-species-metadata.md" + #:rdf "./verified-data/dump-species-metadata.ttl"))) -- cgit v1.2.3 From 90225bfaef8227ae47a592a2e3319633ff367adb Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Tue, 18 Jul 2023 16:38:42 +0300 Subject: Replace gn:species with gn-species Signed-off-by: Munyoki Kilyungi --- examples/dump-species-metadata.scm | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'examples') diff --git a/examples/dump-species-metadata.scm b/examples/dump-species-metadata.scm index 7c6bfc3..e83893f 100755 --- a/examples/dump-species-metadata.scm +++ b/examples/dump-species-metadata.scm @@ -27,7 +27,7 @@ (gn:family rdfs:range rdfs:Literal)) (triples (string->identifier "" (field Species FullName) - #:ontology "gn:species:" + #:ontology "gn-species:" #:separator "" #:proc string-capitalize-first) (set rdf:type 'gn:species) @@ -55,7 +55,7 @@ (set rdf:type 'gn:strain) (set gn:strainOfSpecies (string->identifier "" (field Species FullName) - #:ontology "gn:species:" + #:ontology "gn-species:" #:separator "" #:proc string-capitalize-first)) ;; Name, and maybe a second name @@ -93,7 +93,7 @@ (set gn:inbredSetCode (field InbredSet InbredSetCode)) (set gn:inbredSetOfSpecies (string->identifier "" (field Species FullName BinomialName) - #:ontology "gn:species:" + #:ontology "gn-species:" #:separator "" #:proc string-capitalize-first)) (set gn:genotype @@ -121,7 +121,7 @@ '(("rdf:" "") ("rdfs:" "") ("gn:" "") - ("gn:species:" "") + ("gn-species:" "") ("taxon:" ""))) (inputs (list dump-species -- cgit v1.2.3 From a525d36edb35587d2f95142d4e6e7e9a32c0136d Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Tue, 18 Jul 2023 17:16:56 +0300 Subject: Replace gn with gn-term and gn-id where suitable Signed-off-by: Munyoki Kilyungi --- examples/dump-species-metadata.scm | 110 +++++++++++++++++++------------------ 1 file changed, 57 insertions(+), 53 deletions(-) (limited to 'examples') diff --git a/examples/dump-species-metadata.scm b/examples/dump-species-metadata.scm index e83893f..48fd425 100755 --- a/examples/dump-species-metadata.scm +++ b/examples/dump-species-metadata.scm @@ -21,53 +21,54 @@ (define-dump dump-species (tables (Species)) (schema-triples - (gn:name rdfs:range rdfs:Literal) - (gn:displayName rdfs:range rdfs:Literal) - (gn:binomialName rdfs:range rdfs:Literal) - (gn:family rdfs:range rdfs:Literal)) + (gn-term:name rdfs:range rdfs:Literal) + (gn-term:displayName rdfs:range rdfs:Literal) + (gn-term:binomialName rdfs:range rdfs:Literal) + (gn-term:family rdfs:range rdfs:Literal)) (triples (string->identifier "" (field Species FullName) - #:ontology "gn-species:" #:separator "" #:proc string-capitalize-first) - (set rdf:type 'gn:species) - (set gn:name (field Species SpeciesName)) - (set gn:displayName (field Species MenuName)) - (set gn:binomialName (field Species FullName)) - (set gn:family (field Species Family)) - (set gn:organism (ontology 'taxon: (field Species TaxonomyId))))) + (set rdf:type 'gn-id:species) + (set gn-term:name (field Species SpeciesName)) + (set gn-term:displayName (field Species MenuName)) + (set gn-term:binomialName (field Species FullName)) + (set gn-term:family (field Species Family)) + (set gn-term:organism (ontology 'taxon: (field Species TaxonomyId))))) (define-dump dump-strain (tables (Strain (left-join Species "ON Strain.SpeciesId = Species.SpeciesId"))) (schema-triples - (gn:strainOfSpecies rdfs:domain gn:strain) - (gn:strainOfSpecies rdfs:range gn:species) - (gn:name rdfs:range rdfs:Literal) - (gn:alias rdfs:range rdfs:Literal) - (gn:symbol rdfs:range rdfs:Literal)) + (gn-term:strainOfSpecies rdfs:domain gn-term:strain) + (gn-term:strainOfSpecies rdfs:range gn-term:species) + (gn-term:name rdfs:range rdfs:Literal) + (gn-term:alias rdfs:range rdfs:Literal) + (gn-term:symbol rdfs:range rdfs:Literal)) (triples (string->identifier - "strain" + "" (regexp-substitute/global #f "[^A-Za-z0-9:]" (field ("CAST(CONVERT(BINARY CONVERT(Strain.Name USING latin1) USING utf8) AS VARCHAR(15000))" StrainName)) - 'pre "_" 'post)) - (set rdf:type 'gn:strain) - (set gn:strainOfSpecies + 'pre "_" 'post) + #:separator "" + #:proc string-capitalize-first) + (set rdf:type 'gn-id:strain) + (set gn-term:strainOfSpecies (string->identifier "" (field Species FullName) - #:ontology "gn-species:" #:separator "" #:proc string-capitalize-first)) ;; Name, and maybe a second name - (set gn:name (sanitize-rdf-string (field Strain Name))) - (set gn:name (sanitize-rdf-string (field Strain Name2))) - (set gn:alias (sanitize-rdf-string (field Strain Alias))) - (set gn:symbol (field Strain Symbol)))) + (set gn-term:name (sanitize-rdf-string (field Strain Name))) + (set gn-term:name2 (sanitize-rdf-string (field Strain Name2))) + (set gn-term:alias (sanitize-rdf-string (field Strain Alias))) + (set gn-term:symbol (field Strain Symbol)))) (define-dump dump-mapping-method (tables (MappingMethod)) - (triples (string->identifier "mappingMethod" (field MappingMethod Name)) - (set rdf:type 'gn:mappingMethod))) + (triples + (string->identifier "mappingMethod" (field MappingMethod Name)) + (set rdf:type 'gn-id:mappingMethod))) (define-dump dump-inbred-set (tables (InbredSet @@ -75,30 +76,33 @@ (left-join MappingMethod "ON InbredSet.MappingMethodId=MappingMethod.Id"))) (schema-triples - (gn:fullName rdfs:range rdfs:Literal) - (gn:geneticType rdfs:range rdfs:Literal) - (gn:inbredSetCode rdfs:range rdfs:Literal) - (gn:inbredFamily rdfs:range rdfs:Literal) - (gn:inbredSetOfSpecies rdfs:range gn:species) - (gn:inbredSetType rdfs:range rdfs:Literal) - (gn:phenotype rdfs:range gn:inbredSetType) - (gn:genotype rdfs:range gn:inbredSetType) - (gn:inbredSetOfMappingMethod rdfs:range gn:mappingMethod)) - (triples (string->identifier "inbredSet" (field InbredSet Name)) - (set rdf:type 'gn:inbredSet) - (set gn:binomialName (field InbredSet FullName)) - (set gn:geneticType (field InbredSet GeneticType)) - (set gn:inbredFamily (field InbredSet Family)) - (set gn:inbredSetOfMappingMethod (field MappingMethod Name)) - (set gn:inbredSetCode (field InbredSet InbredSetCode)) - (set gn:inbredSetOfSpecies + (gn-term:fullName rdfs:range rdfs:Literal) + (gn-term:geneticType rdfs:range rdfs:Literal) + (gn-term:inbredSetCode rdfs:range rdfs:Literal) + (gn-term:inbredFamily rdfs:range rdfs:Literal) + (gn-term:inbredSetOfSpecies rdfs:range gn:species) + (gn-term:inbredSetType rdfs:range rdfs:Literal) + (gn-term:phenotype rdfs:range gn-term:inbredSetType) + (gn-term:genotype rdfs:range gn-term:inbredSetType) + (gn-term:inbredSetOfMappingMethod rdfs:range gn-term:mappingMethod)) + (triples (string->identifier + "" (field InbredSet Name) + #:separator "" + #:proc string-capitalize-first) + (set rdf:type 'gn-id:inbredSet) + (set gn-term:binomialName (field InbredSet FullName)) + (set gn-term:geneticType (field InbredSet GeneticType)) + (set gn-term:inbredFamily (field InbredSet Family)) + (set gn-term:inbredSetOfMappingMethod (field MappingMethod Name)) + (set gn-term:inbredSetCode (field InbredSet InbredSetCode)) + (set gn-term:inbredSetOfSpecies (string->identifier "" (field Species FullName BinomialName) - #:ontology "gn-species:" + #:ontology "gn-id:" #:separator "" #:proc string-capitalize-first)) - (set gn:genotype + (set gn-term:genotype (field ("IF ((SELECT PublishFreeze.Name FROM PublishFreeze WHERE PublishFreeze.InbredSetId = InbredSet.Id LIMIT 1) IS NOT NULL, 'Traits and Cofactors', '')" genotypeP))) - (set gn:phenotype + (set gn-term:phenotype (field ("IF ((SELECT GenoFreeze.Name FROM GenoFreeze WHERE GenoFreeze.InbredSetId = InbredSet.Id LIMIT 1) IS NOT NULL, 'DNA Markers and SNPs', '')" phenotypeP))))) (define-dump dump-avg-method @@ -106,10 +110,10 @@ ;; the Name field. (tables (AvgMethod)) (schema-triples - (gn:name rdfs:range rdfs:Literal)) + (gn-term:normalization rdfs:range rdfs:Literal)) (triples (string->identifier "avgmethod" (field AvgMethod Name)) - (set rdf:type 'gn:avgMethod) - (set gn:name (field AvgMethod Name)))) + (set rdf:type 'gn-id:avgMethod) + (set gn-term:normalization (field AvgMethod Normalization)))) @@ -118,10 +122,10 @@ (connection %connection-settings) (table-metadata? #f) (prefixes - '(("rdf:" "") + '(("gn-id:" "") + ("gn-term:" "") + ("rdf:" "") ("rdfs:" "") - ("gn:" "") - ("gn-species:" "") ("taxon:" ""))) (inputs (list dump-species -- cgit v1.2.3 From 22de1613596c2a429d52a65702e72018d4011b46 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Tue, 18 Jul 2023 17:19:20 +0300 Subject: Update tissue dump to use gn-term/gn-id Signed-off-by: Munyoki Kilyungi --- examples/dump-tissue.scm | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'examples') diff --git a/examples/dump-tissue.scm b/examples/dump-tissue.scm index 4998cff..376129c 100755 --- a/examples/dump-tissue.scm +++ b/examples/dump-tissue.scm @@ -23,12 +23,12 @@ ;; and BIRN_lex_Name are mostly NULL. (tables (Tissue)) (schema-triples - (gn:name rdfs:range rdfs:Literal)) + (gn-term:name rdfs:range rdfs:Literal)) ;; Hopefully the Short_Name field is distinct and can be used as an ;; identifier. (triples (string->identifier "tissue" (field Tissue Short_Name)) - (set rdf:type 'gn:tissue) - (set gn:name (field Tissue Name)))) + (set rdf:type 'gn-id:tissue) + (set gn-term:name (field Tissue Name)))) @@ -37,10 +37,12 @@ (connection %connection-settings) (table-metadata? #f) (prefixes - (("rdf:" "") - ("rdfs:" "") - ("gn:" ""))) + '(("gn-id:" "") + ("gn-term:" "") + ("rdf:" "") + ("rdfs:" ""))) (inputs - (dump-tissue)) + (list dump-tissue)) (outputs - (#:documentation "./docs/dump-tissue.md" #:rdf "./verified-data/dump-tissue.ttl"))) + '(#:documentation "./docs/dump-tissue.md" + #:rdf "./verified-data/dump-tissue.ttl"))) -- cgit v1.2.3 From 30525673f58ace73f9ccc84de570d6967e79958e Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Tue, 18 Jul 2023 17:26:50 +0300 Subject: Dump probeset metadata with documentation Signed-off-by: Munyoki Kilyungi --- examples/dump-probeset.scm | 174 ++++++++------------------------------------- 1 file changed, 31 insertions(+), 143 deletions(-) (limited to 'examples') diff --git a/examples/dump-probeset.scm b/examples/dump-probeset.scm index 0a6e07b..be09b48 100755 --- a/examples/dump-probeset.scm +++ b/examples/dump-probeset.scm @@ -16,17 +16,13 @@ (call-with-input-file (list-ref (command-line) 1) read)) -(define %dump-directory - (list-ref (command-line) 2)) - -(define-dump dump-probeset-0 +(define-dump dump-probeset (tables (ProbeSet - (left-join GeneChip "ON GeneChip.Id = ProbeSet.ChipId")) - "LIMIT 2000000 OFFSET 0") + (left-join GeneChip "ON GeneChip.Id = ProbeSet.ChipId"))) (schema-triples - (gn:name rdfs:range rdfs:Literal) - (gn:probeset rdfs:range rdfs:Literal)) + (gn-term:name rdfs:range rdfs:Literal) + (gn-term:probeset rdfs:range rdfs:Literal)) (triples (ontology 'probeset: (string-trim-both @@ -35,142 +31,34 @@ (field ("IFNULL(NULLIF(TRIM(ProbeSet.Name), ''), ProbeSet.Id)" name)) 'pre "_" 'post))) - (set rdf:type 'gn:probeset) - (set gn:chipOf (string->identifier "platform" (field GeneChip Name))) - (set gn:name (field ProbeSet Name)) - (set gn:symbol (delete-substrings (field ProbeSet Symbol) "\"")) - (set gn:description (sanitize-rdf-string - (field ProbeSet description))) - (set gn:chr (field ProbeSet Chr)) - (set gn:mb (annotate-field (field ("IFNULL(ProbeSet.Mb, '')" Mb)) '^^xsd:double)) - ;; For now have the tissue, and alias as one line without - ;; splitting to make the dump faster - ;; (set gn:tissue (field ("IFNULL(ProbeSet.Tissue, '')" Tissue))) - ;; (set gn:alias (field ProbeSet alias)) - ;; (set gn:generif (ontology 'generif: (field ProbeSet GeneId))) - (set gn:blatSeq (sanitize-rdf-string - (string-trim-both (field ProbeSet BlatSeq)))) - (set gn:targetSeq (sanitize-rdf-string (field ProbeSet TargetSeq))) - ;; (set gn:unigene (field ProbeSet UniGeneId)) - ;; (set gn:genbank (field ProbeSet GenbankId)) - ;; (set gn:omim (sanitize-rdf-string (string-trim-both (field ProbeSet OMIM)))) - ;; (set gn:RefSeq_TranscriptId (field ProbeSet RefSeq_TranscriptId)) - (set gn:uniProtReference (ontology 'uniprot: - (field ProbeSet UniProtID))))) + (set rdf:type 'gn-id:probeset) + (set gn-term:chipOf (string->identifier "platform" (field GeneChip Name))) + (set gn-term:name (field ProbeSet Name)) + (set gn-term:symbol (delete-substrings (field ProbeSet Symbol) "\"")) + (set gn-term:description (sanitize-rdf-string + (field ProbeSet description))) + (set gn-term:chr (field ProbeSet Chr)) + (set gn-term:mb (annotate-field (field ("IFNULL(ProbeSet.Mb, '')" Mb)) '^^xsd:double)) + (set gn-term:blatSeq (sanitize-rdf-string + (string-trim-both (field ProbeSet BlatSeq)))) + (set gn-term:targetSeq (sanitize-rdf-string (field ProbeSet TargetSeq))) + (set gn-term:uniProtReference (ontology 'uniprot: + (field ProbeSet UniProtID))))) -(define-dump dump-probeset-1 - (tables (ProbeSet - (left-join GeneChip "ON GeneChip.Id = ProbeSet.ChipId")) - "LIMIT 2000000 OFFSET 2000000") - (schema-triples - (gn:name rdfs:range rdfs:Literal) - (gn:probeset rdfs:range rdfs:Literal)) - (triples (ontology - 'probeset: - (string-trim-both - (regexp-substitute/global - #f "[^A-Za-z0-9:]" - (field ("IFNULL(NULLIF(TRIM(ProbeSet.Name), ''), ProbeSet.Id)" - name)) - 'pre "_" 'post))) - (set rdf:type 'gn:probeset) - (set gn:chipOf (string->identifier "platform" (field GeneChip Name))) - (set gn:name (field ProbeSet Name)) - (set gn:symbol (delete-substrings (field ProbeSet Symbol) "\"")) - (set gn:description (sanitize-rdf-string - (field ProbeSet description))) - (set gn:chr (field ProbeSet Chr)) - (set gn:mb (annotate-field (field ("IFNULL(ProbeSet.Mb, '')" Mb)) '^^xsd:double)) - (set gn:blatSeq (sanitize-rdf-string - (string-trim-both (field ProbeSet BlatSeq)))) - (set gn:targetSeq (sanitize-rdf-string (field ProbeSet TargetSeq))) - (set gn:uniProtReference (ontology 'uniprot: - (field ProbeSet UniProtID))))) -(define-dump dump-probeset-2 - (tables (ProbeSet - (left-join GeneChip "ON GeneChip.Id = ProbeSet.ChipId")) - "WHERE ProbeSet.Name IS NOT NULL LIMIT 2000000 OFFSET 4000000") - (schema-triples - (gn:name rdfs:range rdfs:Literal) - (gn:probeset rdfs:range rdfs:Literal)) - (triples (ontology - 'probeset: - (string-trim-both - (regexp-substitute/global - #f "[^A-Za-z0-9:]" - (field ("IFNULL(ProbeSet.Name, ProbeSet.Id)" - name)) - 'pre "_" 'post))) - (set rdf:type 'gn:probeset) - (set gn:chipOf (string->identifier "platform" (field GeneChip Name))) - (set gn:name (field ProbeSet Name)) - (set gn:symbol (delete-substrings (field ProbeSet Symbol) "\"")) - (set gn:description (sanitize-rdf-string - (field ProbeSet description))) - (set gn:chr (field ProbeSet Chr)) - (set gn:mb (annotate-field (field ("IFNULL(ProbeSet.Mb, '')" Mb)) '^^xsd:double)) - (set gn:blatSeq (sanitize-rdf-string - (string-trim-both (field ProbeSet BlatSeq)))) - (set gn:targetSeq (sanitize-rdf-string (field ProbeSet TargetSeq))) - (set gn:uniProtReference (ontology 'uniprot: - (field ProbeSet UniProtID))))) -(call-with-target-database - %connection-settings - (lambda (db) - (with-output-to-file (string-append %dump-directory "dump-probeset-0.ttl") - (lambda () - (prefix "dct:" "") - (prefix "foaf:" "") - (prefix "generif:" "") - (prefix "gn:" "") - (prefix "owl:" "") - (prefix "phenotype:" "") - (prefix "pubmed:" "") - (prefix "rdf:" "") - (prefix "rdfs:" "") - (prefix "uniprot:" "") - (prefix "up:" "") - (prefix "xsd:" "") - (prefix "probeset:" "") - (newline) - (dump-probeset-0 db)) - #:encoding "utf8") - (with-output-to-file (string-append %dump-directory "dump-probeset-1.ttl") - (lambda () - (prefix "dct:" "") - (prefix "foaf:" "") - (prefix "generif:" "") - (prefix "gn:" "") - (prefix "owl:" "") - (prefix "phenotype:" "") - (prefix "pubmed:" "") - (prefix "rdf:" "") - (prefix "rdfs:" "") - (prefix "uniprot:" "") - (prefix "up:" "") - (prefix "xsd:" "") - (prefix "probeset:" "") - (newline) - (dump-probeset-1 db)) - #:encoding "utf8") - (with-output-to-file (string-append %dump-directory "dump-probeset-2.ttl") - (lambda () - (prefix "dct:" "") - (prefix "foaf:" "") - (prefix "generif:" "") - (prefix "gn:" "") - (prefix "owl:" "") - (prefix "phenotype:" "") - (prefix "pubmed:" "") - (prefix "rdf:" "") - (prefix "rdfs:" "") - (prefix "uniprot:" "") - (prefix "up:" "") - (prefix "xsd:" "") - (prefix "probeset:" "") - (newline) - (dump-probeset-2 db)) - #:encoding "utf8"))) +(dump-with-documentation + (name "ProbeSet Metadata") + (connection %connection-settings) + (table-metadata? #f) + (prefixes + '(("probeset:" "") + ("rdf:" "") + ("rdfs:" ""))) + (inputs + (list dump-probeset)) + (outputs + '(#:documentation "./docs/dump-probeset.md" + #:rdf "./verified-data/dump-probeset.ttl"))) -- cgit v1.2.3 From f385a1286aa2b53eaa85a3ca9ef3c5b82eabb3cc Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Tue, 18 Jul 2023 17:32:19 +0300 Subject: Dump phenotypes with documentation. --- examples/dump-phenotype.scm | 50 +++++++++++++++++---------------------------- 1 file changed, 19 insertions(+), 31 deletions(-) (limited to 'examples') diff --git a/examples/dump-phenotype.scm b/examples/dump-phenotype.scm index e4d20c9..33577ce 100755 --- a/examples/dump-phenotype.scm +++ b/examples/dump-phenotype.scm @@ -111,34 +111,22 @@ (ontology 'publication: pmid)))))) -(call-with-target-database - %connection-settings - (lambda (db) - (with-output-to-file (string-append %dump-directory "dump-phenotype.ttl") - (lambda () - (prefix "chebi:" "") - (prefix "dct:" "") - (prefix "foaf:" "") - (prefix "generif:" "") - (prefix "gn:" "") - (prefix "hgnc:" "") - (prefix "homologene:" "") - (prefix "kegg:" "") - (prefix "molecularTrait:" "") - (prefix "nuccore:" "") - (prefix "omim:" "") - (prefix "owl:" "") - (prefix "phenotype:" "") - (prefix "pubchem:" "") - (prefix "pubmed:" "") - (prefix "rdf:" "") - (prefix "rdfs:" "") - (prefix "uniprot:" "") - (prefix "up:" "") - (prefix "xsd:" "") - (prefix "dataset:" "") - (prefix "publication:" "") - (newline) - (dump-publishfreeze db) - (dump-phenotypes db)) - #:encoding "utf8"))) +(dump-with-documentation + (name "Phenotypes Metadata") + (connection %connection-settings) + (table-metadata? #f) + (prefixes + '(("gn-id:" "") + ("gn-term:" "") + ("phenotype:" "") + ("rdf:" "") + ("rdfs:" "") + ("xsd:" "") + ("dataset:" "") + ("publication:" ""))) + (inputs + (list dump-publishfreeze + dump-phenotype)) + (outputs + '(#:documentation "./docs/dump-phenotype.md" + #:rdf "./verified-data/dump-phenotype.ttl"))) -- cgit v1.2.3 From f7542c86fd6f63c0315776953d734eaef502d667 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Wed, 19 Jul 2023 14:28:15 +0300 Subject: Rename 'gn-id' prefix to 'gn' Signed-off-by: Munyoki Kilyungi --- dump/triples.scm | 2 +- examples/dump-publication.scm | 43 +++++++++++++++++++------------------- examples/dump-species-metadata.scm | 18 ++++++++-------- examples/dump-tissue.scm | 4 ++-- 4 files changed, 34 insertions(+), 33 deletions(-) (limited to 'examples') diff --git a/dump/triples.scm b/dump/triples.scm index c168f3e..2b43d68 100644 --- a/dump/triples.scm +++ b/dump/triples.scm @@ -23,7 +23,7 @@ (define* (string->identifier prefix str #:optional #:key - (ontology "gn-id:") + (ontology "gn:") (separator "_") (proc string-downcase)) "Convert STR to a turtle identifier after replacing illegal diff --git a/examples/dump-publication.scm b/examples/dump-publication.scm index ff46d3d..6f349d6 100755 --- a/examples/dump-publication.scm +++ b/examples/dump-publication.scm @@ -21,15 +21,15 @@ (define-dump dump-publication (tables (Publication)) (schema-triples - (gn:pubMedId rdfs:range rdfs:Literal) - (gn:title rdfs:range rdfs:Literal) - (gn:journal rdfs:range rdfs:Literal) - (gn:volume rdfs:range rdfs:Literal) - (gn:pages rdfs:range rdfs:Literal) - (gn:month rdfs:range rdfs:Literal) - (gn:year rdfs:range rdfs:Literal) - (gn:author rdfs:range rdfs:Literal) - (gn:abstract rdfs:range rdfs:Literal)) + (gn-term:pubMedId rdfs:range rdfs:Literal) + (gn-term:title rdfs:range rdfs:Literal) + (gn-term:journal rdfs:range rdfs:Literal) + (gn-term:volume rdfs:range rdfs:Literal) + (gn-term:pages rdfs:range rdfs:Literal) + (gn-term:month rdfs:range rdfs:Literal) + (gn-term:year rdfs:range rdfs:Literal) + (gn-term:author rdfs:range rdfs:Literal) + (gn-term:abstract rdfs:range rdfs:Literal)) (triples (let ((pmid (field ("IF(Publication.PubMed_ID IS NULL, '', CONVERT(Publication.PubMed_Id, INT))" @@ -40,18 +40,19 @@ (number->string publication-id)) (ontology 'publication: pmid))) (set rdf:type 'gn:publication) - (set gn:pubMedId (ontology 'pubmed: (field ("IFNULL(PubMed_ID, '')" pubmedId)))) - (set gn:title (delete-substrings (field Publication Title) - "Unknown")) - (set gn:journal (delete-substrings (field Publication Journal) - "Unknown")) - (set gn:volume (delete-substrings (field Publication Volume) - "Unknown")) - (set gn:pages (delete-substrings (field Publication Pages) - "Unknown")) - (set gn:month (delete-substrings (field Publication Month) - "Unknown")) - (set gn:year (field Publication Year)) + (set gn-term:pubMedId + (ontology 'pubmed: (field ("IFNULL(PubMed_ID, '')" pubmedId)))) + (set gn-term:title (delete-substrings (field Publication Title) + "Unknown")) + (set gn-term:journal (delete-substrings (field Publication Journal) + "Unknown")) + (set gn-term:volume (delete-substrings (field Publication Volume) + "Unknown")) + (set gn-term:pages (delete-substrings (field Publication Pages) + "Unknown")) + (set gn-term:month (delete-substrings (field Publication Month) + "Unknown")) + (set gn-term:year (field Publication Year)) (multiset gn:author ;; The authors field is a comma ;; separated list. Split it. diff --git a/examples/dump-species-metadata.scm b/examples/dump-species-metadata.scm index 48fd425..41d5847 100755 --- a/examples/dump-species-metadata.scm +++ b/examples/dump-species-metadata.scm @@ -29,7 +29,7 @@ (string->identifier "" (field Species FullName) #:separator "" #:proc string-capitalize-first) - (set rdf:type 'gn-id:species) + (set rdf:type 'gn:species) (set gn-term:name (field Species SpeciesName)) (set gn-term:displayName (field Species MenuName)) (set gn-term:binomialName (field Species FullName)) @@ -53,7 +53,7 @@ 'pre "_" 'post) #:separator "" #:proc string-capitalize-first) - (set rdf:type 'gn-id:strain) + (set rdf:type 'gn:strain) (set gn-term:strainOfSpecies (string->identifier "" (field Species FullName) #:separator "" @@ -68,7 +68,7 @@ (tables (MappingMethod)) (triples (string->identifier "mappingMethod" (field MappingMethod Name)) - (set rdf:type 'gn-id:mappingMethod))) + (set rdf:type 'gn:mappingMethod))) (define-dump dump-inbred-set (tables (InbredSet @@ -89,7 +89,7 @@ "" (field InbredSet Name) #:separator "" #:proc string-capitalize-first) - (set rdf:type 'gn-id:inbredSet) + (set rdf:type 'gn:inbredSet) (set gn-term:binomialName (field InbredSet FullName)) (set gn-term:geneticType (field InbredSet GeneticType)) (set gn-term:inbredFamily (field InbredSet Family)) @@ -97,7 +97,7 @@ (set gn-term:inbredSetCode (field InbredSet InbredSetCode)) (set gn-term:inbredSetOfSpecies (string->identifier "" (field Species FullName BinomialName) - #:ontology "gn-id:" + #:ontology "gn:" #:separator "" #:proc string-capitalize-first)) (set gn-term:genotype @@ -112,7 +112,7 @@ (schema-triples (gn-term:normalization rdfs:range rdfs:Literal)) (triples (string->identifier "avgmethod" (field AvgMethod Name)) - (set rdf:type 'gn-id:avgMethod) + (set rdf:type 'gn:avgMethod) (set gn-term:normalization (field AvgMethod Normalization)))) @@ -122,9 +122,9 @@ (connection %connection-settings) (table-metadata? #f) (prefixes - '(("gn-id:" "") - ("gn-term:" "") - ("rdf:" "") + '(("gn:" "") + ("gn-term:" "") + ("rdf:" "") ("rdfs:" "") ("taxon:" ""))) (inputs diff --git a/examples/dump-tissue.scm b/examples/dump-tissue.scm index 376129c..ff6792e 100755 --- a/examples/dump-tissue.scm +++ b/examples/dump-tissue.scm @@ -27,7 +27,7 @@ ;; Hopefully the Short_Name field is distinct and can be used as an ;; identifier. (triples (string->identifier "tissue" (field Tissue Short_Name)) - (set rdf:type 'gn-id:tissue) + (set rdf:type 'gn:tissue) (set gn-term:name (field Tissue Name)))) @@ -37,7 +37,7 @@ (connection %connection-settings) (table-metadata? #f) (prefixes - '(("gn-id:" "") + '(("gn:" "") ("gn-term:" "") ("rdf:" "") ("rdfs:" ""))) -- cgit v1.2.3 From 4e99bd1b99a239b43ba9aaa12e563cf774d7e68b Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Wed, 19 Jul 2023 14:57:59 +0300 Subject: Remove unnecessary prefixes Signed-off-by: Munyoki Kilyungi --- examples/dump-publication.scm | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'examples') diff --git a/examples/dump-publication.scm b/examples/dump-publication.scm index 6f349d6..5d2cc9e 100755 --- a/examples/dump-publication.scm +++ b/examples/dump-publication.scm @@ -68,11 +68,12 @@ (connection %connection-settings) (table-metadata? #f) (prefixes - (("rdf:" "") - ("rdfs:" "") - ("gn:" "") - ("publication:" "") - ("pubmed:" ""))) + '(("gn-term:" "") + ("gn:" "") + ("publication:" "") + ("pubmed:" "") + ("rdfs:" "") + ("rdf:" ""))) (inputs (dump-publication)) (outputs -- cgit v1.2.3 From 47a2707a4d5fa33f3d9339c43ef28e96b116ea37 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Wed, 19 Jul 2023 14:58:27 +0300 Subject: Make 'inputs' and 'outputs' fields lists Signed-off-by: Munyoki Kilyungi --- examples/dump-publication.scm | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'examples') diff --git a/examples/dump-publication.scm b/examples/dump-publication.scm index 5d2cc9e..fc2e6d0 100755 --- a/examples/dump-publication.scm +++ b/examples/dump-publication.scm @@ -75,7 +75,7 @@ ("rdfs:" "") ("rdf:" ""))) (inputs - (dump-publication)) + (list dump-publication)) (outputs - (#:documentation "./docs/dump-publication.md" - #:rdf "./verified-data/dump-publication.md"))) + '(#:documentation "./docs/dump-publication.md" + #:rdf "./verified-data/dump-publication.md"))) -- cgit v1.2.3 From 381acf546900c74a907bc56e236de4fece953869 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Wed, 19 Jul 2023 16:41:36 +0300 Subject: Use "gn:" and "gn-terms" prefixes Signed-off-by: Munyoki Kilyungi --- examples/dump-dataset-metadata.scm | 182 +++++++++++++++++++------------------ 1 file changed, 92 insertions(+), 90 deletions(-) (limited to 'examples') diff --git a/examples/dump-dataset-metadata.scm b/examples/dump-dataset-metadata.scm index 53c381c..c51364a 100755 --- a/examples/dump-dataset-metadata.scm +++ b/examples/dump-dataset-metadata.scm @@ -52,11 +52,11 @@ (foaf:givenName rdfs:range rdfs:Literal) (foaf:familyName rdfs:range rdfs:Literal) (foaf:homepage rdfs:range rdfs:Literal) - (gn:address rdfs:range rdfs:Literal) - (gn:city rdfs:range rdfs:Literal) - (gn:state rdfs:range rdfs:Literal) - (gn:zipCode rdfs:range rdfs:Literal) - (gn:country rdfs:range rdfs:Literal)) + (gn-term:address rdfs:range rdfs:Literal) + (gn-term:city rdfs:range rdfs:Literal) + (gn-term:state rdfs:range rdfs:Literal) + (gn-term:zipCode rdfs:range rdfs:Literal) + (gn-term:country rdfs:range rdfs:Literal)) (triples (investigator-attributes->id (field Investigators FirstName) (field Investigators LastName) (field Investigators Email)) @@ -70,11 +70,11 @@ (set foaf:familyName (field ("CAST(CONVERT(BINARY CONVERT(LastName USING latin1) USING utf8) AS VARCHAR(100))" LastName))) (set foaf:homepage (field Investigators Url)) - (set gn:address (field Investigators Address)) - (set gn:city (field Investigators City)) - (set gn:state (field Investigators State)) - (set gn:zipCode (field Investigators ZipCode)) - (set gn:country (field Investigators Country)))) + (set gn-term:address (field Investigators Address)) + (set gn-term:city (field Investigators City)) + (set gn-term:state (field Investigators State)) + (set gn-term:zipCode (field Investigators ZipCode)) + (set gn-term:country (field Investigators Country)))) (define-dump dump-info-files (tables (InfoFiles @@ -92,123 +92,125 @@ (left-join GeneChip "USING (GeneChipId)")) "WHERE GN_AccesionId IS NOT NULL") (schema-triples - (gn:dataset rdfs:range rdfs:Literal) - (gn:datasetOfInvestigator rdfs:domain gn:dataset) - (gn:datasetOfOrganization rdfs:domain gn:dataset) - (gn:datasetOfInvestigator rdfs:range foaf:Person) - (gn:datasetOfInbredSet rdfs:domain gn:dataset) - (gn:datasetOfInbredSet rdfs:range gn:inbredSet) - (gn:datasetOfSpecies rdfs:domain gn:dataset) - (gn:datasetOfSpecies rdfs:range gn:inbredSet) - (gn:datasetOfTissue rdfs:domain gn:dataset) - (gn:datasetOfTissue rdfs:range gn:tissue) - (gn:normalization rdfs:domain gn:dataset) - (gn:normalization rdfs:range gn:avgMethod) - (gn:datasetOfPlatform rdfs:domain gn:dataset) - (gn:datasetOfPlatform rdfs:range gn:geneChip) - (gn:accessionId rdfs:range rdfs:Literal) - (gn:datasetStatusName rdfs:range rdfs:Literal) - (gn:summary rdfs:range rdfs:Literal) - (gn:aboutTissue rdfs:range rdfs:Literal) - (gn:geoSeries rdfs:range rdfs:Literal) - (gn:name rdfs:range rdfs:Literal) - (gn:title rdfs:range rdfs:Literal) - (gn:publicationTitle rdfs:range rdfs:Literal) - (gn:specifics rdfs:range rdfs:Literal) - (gn:datasetGroup rdfs:range rdfs:Literal) - (gn:aboutCases rdfs:range rdfs:Literal) - (gn:aboutPlatform rdfs:range rdfs:Literal) - (gn:aboutDataProcessing rdfs:range rdfs:Literal) - (gn:notes rdfs:range rdfs:Literal) - (gn:experimentDesign rdfs:range rdfs:Literal) - (gn:contributors rdfs:range rdfs:Literal) - (gn:citation rdfs:range rdfs:Literal) - (gn:acknowledgment rdfs:range rdfs:Literal)) - (triples (ontology 'dataset: - (regexp-substitute/global #f "[^A-Za-z0-9:]" - (field InfoFiles InfoPageName) - 'pre "_" 'post)) + (gn-term:dataset rdfs:range rdfs:Literal) + (gn-term:datasetOfInvestigator rdfs:domain gn:dataset) + (gn-term:datasetOfOrganization rdfs:domain gn:dataset) + (gn-term:datasetOfInvestigator rdfs:range foaf:Person) + (gn-term:datasetOfInbredSet rdfs:domain gn:dataset) + (gn-term:datasetOfInbredSet rdfs:range gn:inbredSet) + (gn-term:datasetOfSpecies rdfs:domain gn:dataset) + (gn-term:datasetOfSpecies rdfs:range gn:inbredSet) + (gn-term:datasetOfTissue rdfs:domain gn:dataset) + (gn-term:datasetOfTissue rdfs:range gn:tissue) + (gn-term:normalization rdfs:domain gn:dataset) + (gn-term:normalization rdfs:range gn:avgMethod) + (gn-term:datasetOfPlatform rdfs:domain gn:dataset) + (gn-term:datasetOfPlatform rdfs:range gn:geneChip) + (gn-term:accessionId rdfs:range rdfs:Literal) + (gn-term:datasetStatusName rdfs:range rdfs:Literal) + (gn-term:summary rdfs:range rdfs:Literal) + (gn-term:aboutTissue rdfs:range rdfs:Literal) + (gn-term:geoSeries rdfs:range rdfs:Literal) + (gn-term:name rdfs:range rdfs:Literal) + (gn-term:title rdfs:range rdfs:Literal) + (gn-term:publicationTitle rdfs:range rdfs:Literal) + (gn-term:specifics rdfs:range rdfs:Literal) + (gn-term:datasetGroup rdfs:range rdfs:Literal) + (gn-term:aboutCases rdfs:range rdfs:Literal) + (gn-term:aboutPlatform rdfs:range rdfs:Literal) + (gn-term:aboutDataProcessing rdfs:range rdfs:Literal) + (gn-term:notes rdfs:range rdfs:Literal) + (gn-term:experimentDesign rdfs:range rdfs:Literal) + (gn-term:contributors rdfs:range rdfs:Literal) + (gn-term:citation rdfs:range rdfs:Literal) + (gn-term:acknowledgment rdfs:range rdfs:Literal)) + (triples (string->identifier + "" (regexp-substitute/global #f "[^A-Za-z0-9:]" + (field InfoFiles InfoPageName) + 'pre "_" 'post) + #:separator "" + #:proc string-capitalize-first) (set rdf:type (string->symbol (field ("IF(GenoFreeze.Id IS NOT NULL, 'gn:genotypeDataset', IF(PublishFreeze.Id IS NOT NULL, 'gn:phenotypeDataset', 'gn:dataset'))" rdfType)))) - (set gn:name (regexp-substitute/global - #f "^[Nn]one$" - (field InfoFiles InfoPageName) - "")) - (set gn:fullName + (set gn-term:name (regexp-substitute/global + #f "^[Nn]one$" + (field InfoFiles InfoPageName) + "")) + (set gn-term:fullName (field ("IFNULL(GenoFreeze.FullName, IFNULL(PublishFreeze.FullName, ''))" DatasetFullName))) (set dct:created (field ("IFNULL(GenoFreeze.CreateTime, IFNULL(PublishFreeze.CreateTime, IFNULL(ProbeSetFreeze.CreateTime, '')))" createTimeGenoFreeze))) - (set gn:datasetOfInvestigator + (set gn-term:datasetOfInvestigator (investigator-attributes->id (field Investigators FirstName) (field Investigators LastName) (field Investigators Email))) - (set gn:datasetOfOrganization + (set gn-term:datasetOfOrganization (field ("CAST(CONVERT(BINARY CONVERT(Organizations.OrganizationName USING latin1) USING utf8) AS VARCHAR(1500))" Organizations))) - (set gn:accessionId (format #f "GN~a" (field InfoFiles GN_AccesionId))) - (set gn:datasetStatusName (string-downcase - (field DatasetStatus DatasetStatusName))) - (set gn:datasetOfInbredSet + (set gn-term:accessionId (format #f "GN~a" (field InfoFiles GN_AccesionId))) + (set gn-term:datasetStatusName (string-downcase + (field DatasetStatus DatasetStatusName))) + (set gn-term:datasetOfInbredSet (string->identifier "inbredSet" (field InbredSet Name InbredSetName))) - (set gn:datasetOfTissue (string->identifier "tissue" - (field Tissue Short_Name))) - (set gn:normalization + (set gn-term:datasetOfTissue (string->identifier "tissue" + (field Tissue Short_Name))) + (set gn-term:normalization (string->identifier "avgmethod" ;; If AvgMethodName is NULL, assume N/A. (if (string-blank? (field AvgMethod Name AvgMethodName)) "N/A" (field AvgMethod Name AvgMethodName)))) - (set gn:datasetOfPlatform + (set gn-term:datasetOfPlatform (string->identifier "platform" (field GeneChip Name GeneChip))) - (set gn:summary + (set gn-term:summary (sanitize-rdf-string (field Datasets Summary))) - (set gn:aboutTissue + (set gn-term:aboutTissue (sanitize-rdf-string (field Datasets AboutTissue))) - (set gn:geoSeries + (set gn-term:geoSeries (let ((s (string-match "GSE[0-9]*" (field ("IFNULL(Datasets.GeoSeries, '')" GeoSeries))))) (if s (ontology 'geoSeries: (match:substring s)) ""))) - (set gn:title + (set gn-term:title (regexp-substitute/global #f "^[Nn]one$" (field InfoFiles InfoFileTitle) "")) - (set gn:publicationTitle + (set gn-term:publicationTitle (regexp-substitute/global #f "^[Nn]one$" (field Datasets PublicationTitle) "")) - (set gn:specifics (sanitize-rdf-string (field InfoFiles Specifics))) - (set gn:datasetGroup (field Datasets DatasetName DatasetGroup)) - (set gn:aboutCases + (set gn-term:specifics (sanitize-rdf-string (field InfoFiles Specifics))) + (set gn-term:datasetGroup (field Datasets DatasetName DatasetGroup)) + (set gn-term:aboutCases (sanitize-rdf-string (field ("CAST(CONVERT(BINARY CONVERT(Datasets.AboutCases USING latin1) USING utf8) AS VARCHAR(10000))" AboutCases)))) - (set gn:aboutPlatform + (set gn-term:aboutPlatform (sanitize-rdf-string (field ("CAST(CONVERT(BINARY CONVERT(Datasets.AboutPlatform USING latin1) USING utf8) AS VARCHAR(1500))" AboutPlatform)))) - (set gn:aboutDataProcessing + (set gn-term:aboutDataProcessing (sanitize-rdf-string (field ("CAST(CONVERT(BINARY CONVERT(Datasets.AboutDataProcessing USING latin1) USING utf8) AS VARCHAR(1500))" AboutDataProcessing)))) - (set gn:notes + (set gn-term:notes (sanitize-rdf-string (field ("CAST(CONVERT(BINARY CONVERT(Datasets.Notes USING latin1) USING utf8) AS VARCHAR(1500))" GNNotes)))) - (set gn:experimentDesign + (set gn-term:experimentDesign (sanitize-rdf-string (field ("CAST(CONVERT(BINARY CONVERT(Datasets.ExperimentDesign USING latin1) USING utf8) AS VARCHAR(1500))" ExperimentDesign)))) - (set gn:contributors + (set gn-term:contributors (sanitize-rdf-string (field ("CAST(CONVERT(BINARY CONVERT(Datasets.Contributors USING latin1) USING utf8) AS VARCHAR(1500))" Contributors)))) - (set gn:citation + (set gn-term:citation (sanitize-rdf-string (regexp-substitute/global #f "^[Nn]one$" @@ -216,7 +218,7 @@ ("CAST(CONVERT(BINARY CONVERT(Datasets.Citation USING latin1) USING utf8) AS VARCHAR(1500))" Citation)) ""))) - (set gn:dataSourceAcknowledgment + (set gn-term:dataSourceAcknowledgment (sanitize-rdf-string (string-trim-both (regexp-substitute/global @@ -224,8 +226,8 @@ (field ("CAST(CONVERT(BINARY CONVERT(InfoFiles.Data_Source_Acknowledge USING latin1) USING utf8) AS VARCHAR(1500))" Data_Source_Acknowledge)) "")))) - (set gn:acknowledgment (sanitize-rdf-string - (field Datasets Acknowledgment))))) + (set gn-term:acknowledgment (sanitize-rdf-string + (field Datasets Acknowledgment))))) @@ -235,18 +237,18 @@ (connection %connection-settings) (table-metadata? #f) (prefixes - (("dct:" "") - ("geoSeries:" "") - ("rdf:" "") - ("rdfs:" "") - ("gn:" "") - ("foaf:" "") - ("taxon:" "") - ("dataset:" ""))) + '(("foaf:" "") + ("geoSeries:" "") + ("gn-term:" "") + ("gn:" "") + ("rdf:" "") + ("rdfs:" "") + ("taxon:" "") + ("dct:" ""))) (inputs - (dump-info-files - dump-investigators)) + (list dump-info-files + dump-investigators)) (outputs - (#:documentation "./docs/dump-info-pages.md" - #:rdf "./verified-data/dump-info-pages.ttl"))) + '(#:documentation "./docs/dump-info-pages.md" + #:rdf "./verified-data/dump-info-pages.ttl"))) -- cgit v1.2.3 From bfeeefcd6b6383a5df317441f7e885a4631e5458 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Wed, 19 Jul 2023 16:56:27 +0300 Subject: Replace "publication:" with "pubmed:" Signed-off-by: Munyoki Kilyungi --- examples/dump-publication.scm | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'examples') diff --git a/examples/dump-publication.scm b/examples/dump-publication.scm index fc2e6d0..f79696e 100755 --- a/examples/dump-publication.scm +++ b/examples/dump-publication.scm @@ -38,7 +38,7 @@ (if (string-null? pmid) (string->identifier "unpublished" (number->string publication-id)) - (ontology 'publication: pmid))) + (ontology 'pubmed: pmid))) (set rdf:type 'gn:publication) (set gn-term:pubMedId (ontology 'pubmed: (field ("IFNULL(PubMed_ID, '')" pubmedId)))) @@ -70,7 +70,6 @@ (prefixes '(("gn-term:" "") ("gn:" "") - ("publication:" "") ("pubmed:" "") ("rdfs:" "") ("rdf:" ""))) -- cgit v1.2.3 From d9e8b0ee01d4cdef99d5e23f53bcb34b8cd63d88 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Wed, 19 Jul 2023 16:57:20 +0300 Subject: Use "gn:" and "gn-term:" when dumping phenotypes Signed-off-by: Munyoki Kilyungi --- examples/dump-phenotype.scm | 98 +++++++++++++++++++++++---------------------- 1 file changed, 50 insertions(+), 48 deletions(-) (limited to 'examples') diff --git a/examples/dump-phenotype.scm b/examples/dump-phenotype.scm index 33577ce..924ec9a 100755 --- a/examples/dump-phenotype.scm +++ b/examples/dump-phenotype.scm @@ -18,9 +18,6 @@ (call-with-input-file (list-ref (command-line) 1) read)) -(define %dump-directory - (list-ref (command-line) 2)) - ;; Only dump publish freeze entries that were not dumped from the InfoFiles page @@ -30,25 +27,28 @@ (left-join InbredSet "ON PublishFreeze.InbredSetId = InbredSet.InbredSetId")) "WHERE PublishFreeze.public > 0 AND PublishFreeze.confidentiality < 1 AND InfoFiles.InfoPageName IS NULL") (schema-triples - (gn:datasetOfInbredSet rdfs:range gn:inbredSet) - (gn:name rdfs:range rdfs:Literal) - (gn:fullName rdfs:range rdfs:Literal) - (gn:shortName rdfs:range rdfs:Literal) - (gn:createTime rdfs:range rdfs:Literal) + (gn-term:datasetOfInbredSet rdfs:range gn:inbredSet) + (gn-term:name rdfs:range rdfs:Literal) + (gn-term:fullName rdfs:range rdfs:Literal) + (gn-term:shortName rdfs:range rdfs:Literal) + (gn-term:createTime rdfs:range rdfs:Literal) (gn:phenotypeDataset rdf:subClassOf gn:dataset)) (triples - (ontology 'dataset: - (regexp-substitute/global #f "[^A-Za-z0-9:]" - (field PublishFreeze Name) - 'pre "_" 'post)) + (string->identifier + "" + (regexp-substitute/global #f "[^A-Za-z0-9:]" + (field PublishFreeze Name) + 'pre "_" 'post) + #:separator "" + #:proc string-capitalize-first) (set rdf:type 'gn:phenotypeDataset) - (set gn:name (field PublishFreeze Name)) - (set gn:fullName (field PublishFreeze FullName)) - (set gn:shortName (field PublishFreeze ShortName)) - (set dct:created (annotate-field + (set gn-term:name (field PublishFreeze Name)) + (set gn-term:fullName (field PublishFreeze FullName)) + (set gn-term:shortName (field PublishFreeze ShortName)) + (set dc-termt:created (annotate-field (field PublishFreeze CreateTime) '^^xsd:date)) - (set gn:datasetOfInbredSet + (set gn-term:datasetOfInbredSet (string->identifier "inbredSet" (field InbredSet Name InbredSetName))))) (define-dump dump-phenotypes @@ -59,48 +59,52 @@ (left-join InfoFiles "ON InfoFiles.InfoPageName = PublishFreeze.Name"))) (schema-triples (gn:phenotypeDataset rdfs:subPropertyOf gn:dataset)) - (triples (ontology 'phenotype: - (regexp-substitute/global #f "[^A-Za-z0-9:]" - (field ("CONCAT(IF(PublishFreeze.Name IS NULL, '', CONCAT(PublishFreeze.Name, ':')), IF(Phenotype.Post_publication_abbreviation IS NULL, IF(Phenotype.Pre_publication_abbreviation IS NULL, Phenotype.Id, Pre_publication_abbreviation), Phenotype.Post_publication_abbreviation))" abbrev)) - 'pre "_" 'post)) + (triples (string->identifier + "" + (regexp-substitute/global #f "[^A-Za-z0-9:]" + (field ("CONCAT(IF(PublishFreeze.Name IS NULL, '', CONCAT(PublishFreeze.Name, '_')), IF(Phenotype.Post_publication_abbreviation IS NULL, IF(Phenotype.Pre_publication_abbreviation IS NULL, Phenotype.Id, Pre_publication_abbreviation), Phenotype.Post_publication_abbreviation))" abbrev)) + 'pre "_" 'post) + #:separator "" + #:proc string-capitalize-first) (set rdf:type 'gn:phenotype) - (set gn:name (sanitize-rdf-string + (set gn-term:name (sanitize-rdf-string (field ("CAST(CONVERT(BINARY CONVERT(IF(Phenotype.Post_publication_abbreviation IS NULL, IF(Phenotype.Pre_publication_abbreviation IS NULL, Phenotype.Id, Phenotype.Pre_publication_abbreviation), Phenotype.Post_publication_abbreviation) USING latin1) USING utf8) AS VARCHAR(100))" PhenotypeName)))) ;; There is no row with an empty post-publication description so ;; use this field as the main publication description - (set gn:publicationDescription + (set gn-term:publicationDescription (sanitize-rdf-string (field ("CAST(CONVERT(BINARY CONVERT(Phenotype.Post_publication_description USING latin1) USING utf8) AS CHAR(10000))" postPubDescr)))) - (set gn:originalDescription (sanitize-rdf-string + (set gn-term:originalDescription (sanitize-rdf-string (delete-substrings (field Phenotype Original_description) "Original post publication description: "))) - (set gn:prePublicationDescription + (set gn-term:prePublicationDescription (sanitize-rdf-string (field ("CAST(CONVERT(BINARY CONVERT(Phenotype.Pre_publication_description USING latin1) USING utf8) AS VARCHAR(15000))" prePubDesc)))) - (set gn:prePublicationAbbreviation (sanitize-rdf-string (field Phenotype Pre_publication_abbreviation))) - (set gn:postPublicationAbbreviation (sanitize-rdf-string (field Phenotype Post_publication_abbreviation))) - (set gn:labCode (field Phenotype Lab_code)) - (set gn:submitter (sanitize-rdf-string (field Phenotype Submitter))) - (set gn:owner (sanitize-rdf-string (field Phenotype Owner))) - (set gn:mean (annotate-field (field ("IFNULL(PublishXRef.mean, '')" mean)) + (set gn-term:prePublicationAbbreviation (sanitize-rdf-string (field Phenotype Pre_publication_abbreviation))) + (set gn-term:postPublicationAbbreviation (sanitize-rdf-string (field Phenotype Post_publication_abbreviation))) + (set gn-term:labCode (field Phenotype Lab_code)) + (set gn-term:submitter (sanitize-rdf-string (field Phenotype Submitter))) + (set gn-term:owner (sanitize-rdf-string (field Phenotype Owner))) + (set gn-term:mean (annotate-field (field ("IFNULL(PublishXRef.mean, '')" mean)) '^^xsd:double)) - (set gn:locus (field PublishXRef Locus)) - (set gn:LRS (annotate-field (field ("IFNULL(PublishXRef.LRS, '')" lrs)) '^^xsd:float)) - (set gn:additive (annotate-field (field ("IFNULL(PublishXRef.additive, '')" additive)) '^^xsd:decimal)) - (set gn:sequence (annotate-field (field PublishXRef Sequence) '^^xsd:int)) - (set gn:phenotypeOfDataset - (ontology 'dataset: - (regexp-substitute/global - #f "[^A-Za-z0-9:]" - (field ("IFNULL(InfoFiles.InfoPageName, IFNULL(PublishFreeze.Name, ''))" DatasetName)) - 'pre "_" 'post))) - (set gn:phenotypeOfPublication + (set gn-term:locus (field PublishXRef Locus)) + (set gn-term:LRS (annotate-field (field ("IFNULL(PublishXRef.LRS, '')" lrs)) '^^xsd:float)) + (set gn-term:additive (annotate-field (field ("IFNULL(PublishXRef.additive, '')" additive)) '^^xsd:decimal)) + (set gn-term:sequence (annotate-field (field PublishXRef Sequence) '^^xsd:int)) + (set gn-term:phenotypeOfDataset + (string->identifier + "" + (field + ("IFNULL(InfoFiles.InfoPageName, IFNULL(PublishFreeze.Name, ''))" DatasetName)) + #:separator "" + #:proc string-capitalize-first)) + (set gn-term:phenotypeOfPublication (let ((pmid (field ("IF(Publication.PubMed_ID IS NULL, '', CONVERT(Publication.PubMed_Id, INT))" pmid))) @@ -108,7 +112,7 @@ (if (string-null? pmid) (string->identifier "unpublished" (number->string publication-id)) - (ontology 'publication: pmid)))))) + (ontology 'pubmed: pmid)))))) (dump-with-documentation @@ -116,17 +120,15 @@ (connection %connection-settings) (table-metadata? #f) (prefixes - '(("gn-id:" "") + '(("gn:" "") ("gn-term:" "") - ("phenotype:" "") ("rdf:" "") ("rdfs:" "") ("xsd:" "") - ("dataset:" "") - ("publication:" ""))) + ("pubmed:" ""))) (inputs (list dump-publishfreeze - dump-phenotype)) + dump-phenotypes)) (outputs '(#:documentation "./docs/dump-phenotype.md" #:rdf "./verified-data/dump-phenotype.ttl"))) -- cgit v1.2.3 From d7d1bef8c6dd18e2dfe8e48b7a23efdb640b1eaf Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Wed, 19 Jul 2023 17:43:13 +0300 Subject: Dump genotypes with the new syntax Signed-off-by: Munyoki Kilyungi --- examples/dump-genotype.scm | 119 +++++++++++++++++++++++---------------------- 1 file changed, 61 insertions(+), 58 deletions(-) (limited to 'examples') diff --git a/examples/dump-genotype.scm b/examples/dump-genotype.scm index 1be1d34..0fbbbfe 100755 --- a/examples/dump-genotype.scm +++ b/examples/dump-genotype.scm @@ -18,9 +18,6 @@ (call-with-input-file (list-ref (command-line) 1) read)) -(define %dump-directory - (list-ref (command-line) 2)) - (define-dump dump-genofreeze @@ -29,24 +26,30 @@ (left-join InbredSet "ON GenoFreeze.InbredSetId = InbredSet.InbredSetId")) "WHERE GenoFreeze.public > 0 AND GenoFreeze.confidentiality < 1 AND InfoFiles.InfoPageName IS NULL") (schema-triples - (gn:datasetOfInbredSet rdfs:range gn:inbredSet) + (gn-term:datasetOfInbredSet rdfs:range gn:inbredSet) (gn:genotypeDataset rdfs:subPropertyOf gn:dataset) - (gn:shortName rdfs:range rdfs:Literal)) - (triples (ontology - 'dataset: - (regexp-substitute/global - #f "[^A-Za-z0-9:]" - (field GenoFreeze Name) - 'pre "_" 'post)) + (gn-term:shortName rdfs:range rdfs:Literal)) + (triples + (string->identifier + "" + (regexp-substitute/global + #f "[^A-Za-z0-9:]" + (regexp-substitute/global + #f "[^A-Za-z0-9:]" + (field GenoFreeze Name) + 'pre "_" 'post) + 'pre "_" 'post) + #:separator "" + #:proc string-capitalize-first) (set rdf:type 'gn:genotypeDataset) - (set gn:name (field GenoFreeze Name)) - (set gn:fullName (field GenoFreeze FullName)) - (set gn:shortName (field GenoFreeze ShortName)) + (set gn-term:name (field GenoFreeze Name)) + (set gn-term:fullName (field GenoFreeze FullName)) + (set gn-term:shortName (field GenoFreeze ShortName)) (set dct:created (annotate-field (field GenoFreeze CreateTime) '^^xsd:date)) - (set gn:datasetOfInbredSet - (string->identifier "inbredSet" (field InbredSet Name InbredSetName))))) + (set gn-term:datasetOfInbredSet + (string->identifier "" (field InbredSet Name InbredSetName))))) (define-dump dump-genotypes (tables (Geno @@ -54,60 +57,60 @@ (left-join GenoFreeze "ON GenoFreeze.Id = GenoXRef.GenoFreezeId") (left-join InfoFiles "ON InfoFiles.InfoPageName = GenoFreeze.Name"))) (schema-triples - (gn:genotypeDataset rdfs:subPropertyOf gn:dataset)) + (gn:genotype rdfs:range rdfs:Literal) + (gn-term:genotypeDataset rdfs:subPropertyOf gn:dataset)) (triples - (ontology - 'genotype: + (string->identifier + "" (regexp-substitute/global #f "[^A-Za-z0-9:]" (field ("CONCAT(IF(GenoFreeze.Name IS NULL, '', CONCAT(GenoFreeze.Name, ':')), Geno.Name)" abbrev)) - 'pre "_" 'post)) + 'pre "_" 'post) + #:separator "" + #:proc string-capitalize-first) (set rdf:type 'gn:genotype) - (set gn:name (sanitize-rdf-string (field Geno Name))) - (set gn:markerName (sanitize-rdf-string (field Geno Marker_Name))) - (set gn:chr (field Geno Chr)) - (set gn:mb (annotate-field (field ("IFNULL(Geno.Mb, '')" Mb)) '^^xsd:double)) - (set gn:sequence (annotate-field (field Geno Sequence) '^^xsd:int)) - (set gn:source (field Geno Source)) - (set gn:source2 (field Geno Source2)) - (set gn:genotypeOfDataset - (ontology 'dataset: - (regexp-substitute/global - #f "[^A-Za-z0-9:]" - (field ("IFNULL(GenoFreeze.Name, '')" DatasetName)) - 'pre "_" 'post))) - (set gn:chrNum + (set gn-term:name (sanitize-rdf-string (field Geno Name))) + (set gn-term:markerName (sanitize-rdf-string (field Geno Marker_Name))) + (set gn-term:chr (field Geno Chr)) + (set gn-term:mb (annotate-field (field ("IFNULL(Geno.Mb, '')" Mb)) '^^xsd:double)) + (set gn-term:sequence (field Geno Sequence)) + (set gn-term:source (field Geno Source)) + (set gn-term:source2 (field Geno Source2)) + (set gn-term:genotypeOfDataset + (string->identifier + "" + (regexp-substitute/global + #f "[^A-Za-z0-9:]" + (field ("IFNULL(GenoFreeze.Name, '')" DatasetName)) + 'pre "_" 'post) + #:separator "" + #:proc string-capitalize-first) + ) + (set gn-term:chrNum (annotate-field (field ("IFNULL(Geno.chr_num, '')" chr_num)) '^^xsd:int)) (set gn:comments (field ("CAST(CONVERT(BINARY CONVERT(Geno.Comments USING latin1) USING utf8) AS VARCHAR(255))" Comments))) - (set gn:cM + (set gn-term:cM (annotate-field (field ("IFNULL(GenoXRef.cM, '')" Chr_mm8)) '^^xsd:int)))) -(call-with-target-database - %connection-settings - (lambda (db) - (with-output-to-file (string-append %dump-directory "dump-genotype.ttl") - (lambda () - (prefix "dct:" "") - (prefix "foaf:" "") - (prefix "generif:" "") - (prefix "gn:" "") - (prefix "owl:" "") - (prefix "phenotype:" "") - (prefix "pubmed:" "") - (prefix "rdf:" "") - (prefix "rdfs:" "") - (prefix "uniprot:" "") - (prefix "up:" "") - (prefix "xsd:" "") - (prefix "genotype:" "") - (prefix "dataset:" "") - (newline) - (dump-genofreeze db) - (dump-genotypes db)) - #:encoding "utf8"))) +(dump-with-documentation + (name "Genotype Metadata") + (connection %connection-settings) + (table-metadata? #f) + (prefixes + '(("gn:" "") + ("gn-term:" "") + ("rdf:" "") + ("rdfs:" "") + ("xsd:" ""))) + (inputs + (list dump-genofreeze + dump-genotypes)) + (outputs + '(#:documentation "./docs/dump-genotype.md" + #:rdf "./verified-data/dump-genotype.ttl"))) -- cgit v1.2.3 From 0036a4f63fa3bef6eea95fe635eb23d4dc070727 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Wed, 19 Jul 2023 20:14:02 +0300 Subject: Dump probeset-metadata using the new syntax Signed-off-by: Munyoki Kilyungi --- examples/dump-probeset-metadata.scm | 65 ++++++++++++++----------------------- 1 file changed, 24 insertions(+), 41 deletions(-) (limited to 'examples') diff --git a/examples/dump-probeset-metadata.scm b/examples/dump-probeset-metadata.scm index b0c4853..6da1eb0 100755 --- a/examples/dump-probeset-metadata.scm +++ b/examples/dump-probeset-metadata.scm @@ -16,9 +16,6 @@ (call-with-input-file (list-ref (command-line) 1) read)) -(define %dump-directory - (list-ref (command-line) 2)) - (define-dump dump-probeset-metadata (tables (ProbeSetXRef @@ -27,14 +24,14 @@ "WHERE ProbeSetFreeze.public > 0 AND ProbeSetFreeze.confidentiality < 1") (schema-triples (gn:probesetData rdfs:range gn:probeset) - (gn:hasProbeset rdfs:range rdfs:Literal)) + (gn-term:hasProbeset rdfs:range rdfs:Literal)) (triples (string->identifier "probesetData" (field ("CONCAT(ProbeSetFreeze.Name,':',IFNULL(ProbeSet.Name, ProbeSet.Id))" ProbeSetName))) (set rdf:type 'gn:probesetData) - (set gn:hasProbeset + (set gn-term:hasProbeset (ontology 'probeset: (regexp-substitute/global @@ -42,66 +39,52 @@ (field ("IFNULL(ProbeSet.Name, ProbeSet.Id)" name)) 'pre "_" 'post))) - (set gn:probesetOfDataset + (set gn-term:probesetOfDataset (ontology 'probeset: (regexp-substitute/global #f "[^A-Za-z0-9:]" (field ProbeSetFreeze Name) 'pre "_" 'post))) - (set gn:mean + (set gn-term:mean (annotate-field (field ("IFNULL(ProbeSetXRef.mean, '')" mean)) '^^xsd:double)) - (set gn:se + (set gn-term:se (annotate-field (field ("IFNULL(ProbeSetXRef.se, '')" se)) '^^xsd:double)) - (set gn:locus (field ProbeSetXRef Locus)) + (set gn-term:locus (field ProbeSetXRef Locus)) (set gn:LRS (annotate-field (field ("IFNULL(ProbeSetXRef.LRS, '')" LRS)) '^^xsd:double)) - (set gn:pValue + (set gn-term:pValue (annotate-field (field ("IFNULL(ProbeSetXRef.pValue, '')" pValue)) '^^xsd:double)) - (set gn:additive + (set gn-term:additive (annotate-field (field ("IFNULL(ProbeSetXRef.additive, '')" additive)) '^^xsd:double)) - (set gn:h2 + (set gn-term:h2 (annotate-field (field ("IFNULL(ProbeSetXRef.h2, '')" h2)) '^^xsd:float)))) -(call-with-target-database - %connection-settings - (lambda (db) - (with-output-to-file (string-append %dump-directory "dump-probeset-metadata.ttl") - (lambda () - (prefix "chebi:" "") - (prefix "dct:" "") - (prefix "foaf:" "") - (prefix "generif:" "") - (prefix "gn:" "") - (prefix "hgnc:" "") - (prefix "homologene:" "") - (prefix "kegg:" "") - (prefix "molecularTrait:" "") - (prefix "nuccore:" "") - (prefix "omim:" "") - (prefix "owl:" "") - (prefix "phenotype:" "") - (prefix "pubchem:" "") - (prefix "pubmed:" "") - (prefix "rdf:" "") - (prefix "rdfs:" "") - (prefix "uniprot:" "") - (prefix "up:" "") - (prefix "xsd:" "") - (prefix "probeset:" "") - (newline) - (dump-probeset-metadata db)) - #:encoding "utf8"))) +(dump-with-documentation + (name "Probeset Metadata") + (connection %connection-settings) + (table-metadata? #f) + (prefixes + '(("gn:" "") + ("gn-term:" "") + ("rdf:" "") + ("rdfs:" "") + ("xsd:" ""))) + (inputs + (list dump-probeset-metadata)) + (outputs + '(#:documentation "./docs/dump-probeset-metadata.md" + #:rdf "./verified-data/dump-probeset-metadata.ttl"))) -- cgit v1.2.3 From 16ebe166618b7e36d92bcc6c3e497dcfa188ce90 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Wed, 19 Jul 2023 20:23:58 +0300 Subject: Dump probesetfreeze metadata using new metadata --- examples/dump-probesetfreeze.scm | 77 +++++++++++++++++++--------------------- 1 file changed, 36 insertions(+), 41 deletions(-) (limited to 'examples') diff --git a/examples/dump-probesetfreeze.scm b/examples/dump-probesetfreeze.scm index 0be81ac..a45fd0a 100755 --- a/examples/dump-probesetfreeze.scm +++ b/examples/dump-probesetfreeze.scm @@ -16,18 +16,15 @@ (call-with-input-file (list-ref (command-line) 1) read)) -(define %dump-directory - (list-ref (command-line) 2)) - (define-dump dump-gene-chip (tables (GeneChip)) (schema-triples - (gn:name rdfs:range rdfs:Literal)) + (gn-term:name rdfs:range rdfs:Literal)) (triples (string->identifier "platform" (field GeneChip Name)) (set rdf:type 'gn:platform) - (set gn:name (field GeneChip GeneChipName)) - (set gn:geoPlatform + (set gn-term:name (field GeneChip GeneChipName)) + (set gn-term:geoPlatform (ontology 'geoSeries: (string-trim-both (field GeneChip GeoPlatform)))))) @@ -41,48 +38,46 @@ (left-join Tissue "ON ProbeFreeze.TissueId = Tissue.TissueId")) "WHERE ProbeSetFreeze.public > 0 AND InfoFiles.InfoPageName IS NULL GROUP BY ProbeFreeze.Id") (schema-triples - (gn:avgMethod rdfs:range rdfs:Literal) - (gn:dataScale rdfs:range rdfs:Literal) + (gn-term:avgMethod rdfs:range rdfs:Literal) + (gn-term:dataScale rdfs:range rdfs:Literal) (gn:probesetDataset rdf:subClassOf gn:dataset)) (triples - (ontology 'probeset: - (regexp-substitute/global #f "[^A-Za-z0-9:]" - (field ProbeSetFreeze Name) - 'pre "_" 'post)) + (string->identifier + "" + (regexp-substitute/global + #f "[^A-Za-z0-9:]" + (field ProbeSetFreeze Name) + 'pre "_" 'post) + #:separator "" + #:proc string-capitalize-first) (set rdf:type 'gn:probesetDataset) - (set gn:avgMethod (string->identifier "avgmethod" (field AvgMethod Name))) - (set gn:fullName (field ProbeSetFreeze FullName)) - (set gn:shortName (field ProbeSetFreeze ShortName)) + (set gn-term:avgMethod (string->identifier "avgmethod" (field AvgMethod Name))) + (set gn-term:fullName (field ProbeSetFreeze FullName)) + (set gn-term:shortName (field ProbeSetFreeze ShortName)) (set dct:created (annotate-field (field ProbeSetFreeze CreateTime) '^^xsd:datetime)) - (set gn:dataScale (field ProbeSetFreeze DataScale)) - (set gn:tissueName (string->identifier "tissue" (field Tissue Short_Name))) - (set gn:datasetOfInbredSet + (set gn-term:dataScale (field ProbeSetFreeze DataScale)) + (set gn-term:tissueName (string->identifier "tissue" (field Tissue Short_Name))) + (set gn-term:datasetOfInbredSet (string->identifier "inbredSet" (field InbredSet Name InbredSetName))))) -(call-with-target-database - %connection-settings - (lambda (db) - (with-output-to-file (string-append %dump-directory "dump-probesetfreeze.ttl") - (lambda () - (prefix "dct:" "") - (prefix "foaf:" "") - (prefix "generif:" "") - (prefix "geoSeries:" "") - (prefix "gn:" "") - (prefix "owl:" "") - (prefix "phenotype:" "") - (prefix "pubmed:" "") - (prefix "rdf:" "") - (prefix "rdfs:" "") - (prefix "uniprot:" "") - (prefix "up:" "") - (prefix "xsd:" "") - (prefix "probeset:" "") - (newline) - (dump-gene-chip db) - (dump-probesetfreeze db)) - #:encoding "utf8"))) +(dump-with-documentation + (name "Probeset freeze metadata") + (connection %connection-settings) + (table-metadata? #f) + (prefixes + '(("geoSeries:" "") + ("gn:" "") + ("gn-term:" "") + ("rdf:" "") + ("rdfs:" "") + ("xsd:" ""))) + (inputs + (list dump-gene-chip + dump-probesetfreeze)) + (outputs + '(#:documentation "./docs/dump-gene-chip.md" + #:rdf "./verified-data/dump-probesetfreeze.ttl"))) -- cgit v1.2.3 From 0c0e80d850ae4ff72e02778afca64778421ba72c Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Fri, 21 Jul 2023 14:30:06 +0300 Subject: Update phenotype dump --- examples/dump-phenotype.scm | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) (limited to 'examples') diff --git a/examples/dump-phenotype.scm b/examples/dump-phenotype.scm index 924ec9a..1ef498d 100755 --- a/examples/dump-phenotype.scm +++ b/examples/dump-phenotype.scm @@ -31,7 +31,6 @@ (gn-term:name rdfs:range rdfs:Literal) (gn-term:fullName rdfs:range rdfs:Literal) (gn-term:shortName rdfs:range rdfs:Literal) - (gn-term:createTime rdfs:range rdfs:Literal) (gn:phenotypeDataset rdf:subClassOf gn:dataset)) (triples (string->identifier @@ -45,7 +44,7 @@ (set gn-term:name (field PublishFreeze Name)) (set gn-term:fullName (field PublishFreeze FullName)) (set gn-term:shortName (field PublishFreeze ShortName)) - (set dc-termt:created (annotate-field + (set dct:created (annotate-field (field PublishFreeze CreateTime) '^^xsd:date)) (set gn-term:datasetOfInbredSet @@ -58,7 +57,20 @@ (left-join PublishFreeze "ON PublishFreeze.InbredSetId = PublishXRef.InbredSetId") (left-join InfoFiles "ON InfoFiles.InfoPageName = PublishFreeze.Name"))) (schema-triples - (gn:phenotypeDataset rdfs:subPropertyOf gn:dataset)) + (gn:phenotypeDataset rdfs:subPropertyOf gn:dataset) + (gn-term:publicationDescription rdfs:range rdfs:Literal) + (gn-term:originalDescription rdfs:range rdfs:Literal) + (gn-term:prePublicationDescription rdfs:range rdfs:Literal) + (gn-term:postPublicationAbbreviation rdfs:range rdfs:Literal) + (gn-term:labCode rdfs:range rdfs:Literal) + (gn-term:submitter rdfs:range rdfs:Literal) + (gn-term:owner rdfs:range rdfs:Literal) + (gn-term:mean rdfs:range xsd:double) + (gn-term:LRS rdfs:range xsd:float) + (gn-term:locus rdfs:range rdfs:Literal) + (gn-term:additive rdfs:range xsd:decimal) + (gn-term:sequence rdfs:range rdfs:Literal) + (gn-term:phenotypeOfPublication rdfs:range gn-term:pubMedId)) (triples (string->identifier "" (regexp-substitute/global #f "[^A-Za-z0-9:]" @@ -120,7 +132,8 @@ (connection %connection-settings) (table-metadata? #f) (prefixes - '(("gn:" "") + '(("dct:" "") + ("gn:" "") ("gn-term:" "") ("rdf:" "") ("rdfs:" "") -- cgit v1.2.3 From 1dea579f74cad817b5dcb92de73e3136a7058549 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Fri, 21 Jul 2023 14:30:36 +0300 Subject: Add dct: prefix to genotype dump Signed-off-by: Munyoki Kilyungi --- examples/dump-genotype.scm | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'examples') diff --git a/examples/dump-genotype.scm b/examples/dump-genotype.scm index 0fbbbfe..d97b7e5 100755 --- a/examples/dump-genotype.scm +++ b/examples/dump-genotype.scm @@ -103,7 +103,8 @@ (connection %connection-settings) (table-metadata? #f) (prefixes - '(("gn:" "") + '(("dct:" "") + ("gn:" "") ("gn-term:" "") ("rdf:" "") ("rdfs:" "") -- cgit v1.2.3 From 5a2a7dc79c08997868c6644c20443263ac6c7fec Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Mon, 24 Jul 2023 16:30:29 +0300 Subject: Use correct URL for rdf prefix Signed-off-by: Munyoki Kilyungi --- examples/dump-species-metadata.scm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'examples') diff --git a/examples/dump-species-metadata.scm b/examples/dump-species-metadata.scm index 41d5847..008c3a0 100755 --- a/examples/dump-species-metadata.scm +++ b/examples/dump-species-metadata.scm @@ -124,7 +124,7 @@ (prefixes '(("gn:" "") ("gn-term:" "") - ("rdf:" "") + ("rdf:" "") ("rdfs:" "") ("taxon:" ""))) (inputs -- cgit v1.2.3 From e662d9054a1f753044cfd13bf8f6965062879e86 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Wed, 26 Jul 2023 13:36:03 +0300 Subject: Use "string->binomial-name" to id species Signed-off-by: Munyoki Kilyungi --- examples/dump-species-metadata.scm | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) (limited to 'examples') diff --git a/examples/dump-species-metadata.scm b/examples/dump-species-metadata.scm index 008c3a0..77db764 100755 --- a/examples/dump-species-metadata.scm +++ b/examples/dump-species-metadata.scm @@ -26,9 +26,7 @@ (gn-term:binomialName rdfs:range rdfs:Literal) (gn-term:family rdfs:range rdfs:Literal)) (triples - (string->identifier "" (field Species FullName) - #:separator "" - #:proc string-capitalize-first) + (string->binomial-name (field Species FullName)) (set rdf:type 'gn:species) (set gn-term:name (field Species SpeciesName)) (set gn-term:displayName (field Species MenuName)) @@ -55,9 +53,7 @@ #:proc string-capitalize-first) (set rdf:type 'gn:strain) (set gn-term:strainOfSpecies - (string->identifier "" (field Species FullName) - #:separator "" - #:proc string-capitalize-first)) + (string->binomial-name (field Species FullName))) ;; Name, and maybe a second name (set gn-term:name (sanitize-rdf-string (field Strain Name))) (set gn-term:name2 (sanitize-rdf-string (field Strain Name2))) @@ -96,10 +92,8 @@ (set gn-term:inbredSetOfMappingMethod (field MappingMethod Name)) (set gn-term:inbredSetCode (field InbredSet InbredSetCode)) (set gn-term:inbredSetOfSpecies - (string->identifier "" (field Species FullName BinomialName) - #:ontology "gn:" - #:separator "" - #:proc string-capitalize-first)) + (string->binomial-name + (field Species FullName BinomialName))) (set gn-term:genotype (field ("IF ((SELECT PublishFreeze.Name FROM PublishFreeze WHERE PublishFreeze.InbredSetId = InbredSet.Id LIMIT 1) IS NOT NULL, 'Traits and Cofactors', '')" genotypeP))) (set gn-term:phenotype -- cgit v1.2.3 From 1d81a238403c29bb46fb2352505b05cf3c150787 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Wed, 26 Jul 2023 13:43:33 +0300 Subject: Replace "gn-term" with "gnt" prefix Signed-off-by: Munyoki Kilyungi --- examples/dump-dataset-metadata.scm | 138 ++++++++++++++++++------------------ examples/dump-genotype.scm | 36 +++++----- examples/dump-phenotype.scm | 76 ++++++++++---------- examples/dump-probeset-metadata.scm | 20 +++--- examples/dump-probeset.scm | 22 +++--- examples/dump-probesetfreeze.scm | 25 +++---- examples/dump-publication.scm | 34 ++++----- examples/dump-species-metadata.scm | 35 ++++----- examples/dump-tissue.scm | 6 +- 9 files changed, 197 insertions(+), 195 deletions(-) (limited to 'examples') diff --git a/examples/dump-dataset-metadata.scm b/examples/dump-dataset-metadata.scm index c51364a..33e72fe 100755 --- a/examples/dump-dataset-metadata.scm +++ b/examples/dump-dataset-metadata.scm @@ -52,11 +52,11 @@ (foaf:givenName rdfs:range rdfs:Literal) (foaf:familyName rdfs:range rdfs:Literal) (foaf:homepage rdfs:range rdfs:Literal) - (gn-term:address rdfs:range rdfs:Literal) - (gn-term:city rdfs:range rdfs:Literal) - (gn-term:state rdfs:range rdfs:Literal) - (gn-term:zipCode rdfs:range rdfs:Literal) - (gn-term:country rdfs:range rdfs:Literal)) + (gnt:address rdfs:range rdfs:Literal) + (gnt:city rdfs:range rdfs:Literal) + (gnt:state rdfs:range rdfs:Literal) + (gnt:zipCode rdfs:range rdfs:Literal) + (gnt:country rdfs:range rdfs:Literal)) (triples (investigator-attributes->id (field Investigators FirstName) (field Investigators LastName) (field Investigators Email)) @@ -70,11 +70,11 @@ (set foaf:familyName (field ("CAST(CONVERT(BINARY CONVERT(LastName USING latin1) USING utf8) AS VARCHAR(100))" LastName))) (set foaf:homepage (field Investigators Url)) - (set gn-term:address (field Investigators Address)) - (set gn-term:city (field Investigators City)) - (set gn-term:state (field Investigators State)) - (set gn-term:zipCode (field Investigators ZipCode)) - (set gn-term:country (field Investigators Country)))) + (set gnt:address (field Investigators Address)) + (set gnt:city (field Investigators City)) + (set gnt:state (field Investigators State)) + (set gnt:zipCode (field Investigators ZipCode)) + (set gnt:country (field Investigators Country)))) (define-dump dump-info-files (tables (InfoFiles @@ -92,38 +92,38 @@ (left-join GeneChip "USING (GeneChipId)")) "WHERE GN_AccesionId IS NOT NULL") (schema-triples - (gn-term:dataset rdfs:range rdfs:Literal) - (gn-term:datasetOfInvestigator rdfs:domain gn:dataset) - (gn-term:datasetOfOrganization rdfs:domain gn:dataset) - (gn-term:datasetOfInvestigator rdfs:range foaf:Person) - (gn-term:datasetOfInbredSet rdfs:domain gn:dataset) - (gn-term:datasetOfInbredSet rdfs:range gn:inbredSet) - (gn-term:datasetOfSpecies rdfs:domain gn:dataset) - (gn-term:datasetOfSpecies rdfs:range gn:inbredSet) - (gn-term:datasetOfTissue rdfs:domain gn:dataset) - (gn-term:datasetOfTissue rdfs:range gn:tissue) - (gn-term:normalization rdfs:domain gn:dataset) - (gn-term:normalization rdfs:range gn:avgMethod) - (gn-term:datasetOfPlatform rdfs:domain gn:dataset) - (gn-term:datasetOfPlatform rdfs:range gn:geneChip) - (gn-term:accessionId rdfs:range rdfs:Literal) - (gn-term:datasetStatusName rdfs:range rdfs:Literal) - (gn-term:summary rdfs:range rdfs:Literal) - (gn-term:aboutTissue rdfs:range rdfs:Literal) - (gn-term:geoSeries rdfs:range rdfs:Literal) - (gn-term:name rdfs:range rdfs:Literal) - (gn-term:title rdfs:range rdfs:Literal) - (gn-term:publicationTitle rdfs:range rdfs:Literal) - (gn-term:specifics rdfs:range rdfs:Literal) - (gn-term:datasetGroup rdfs:range rdfs:Literal) - (gn-term:aboutCases rdfs:range rdfs:Literal) - (gn-term:aboutPlatform rdfs:range rdfs:Literal) - (gn-term:aboutDataProcessing rdfs:range rdfs:Literal) - (gn-term:notes rdfs:range rdfs:Literal) - (gn-term:experimentDesign rdfs:range rdfs:Literal) - (gn-term:contributors rdfs:range rdfs:Literal) - (gn-term:citation rdfs:range rdfs:Literal) - (gn-term:acknowledgment rdfs:range rdfs:Literal)) + (gnt:dataset rdfs:range rdfs:Literal) + (gnt:datasetOfInvestigator rdfs:domain gn:dataset) + (gnt:datasetOfOrganization rdfs:domain gn:dataset) + (gnt:datasetOfInvestigator rdfs:range foaf:Person) + (gnt:datasetOfInbredSet rdfs:domain gn:dataset) + (gnt:datasetOfInbredSet rdfs:range gn:inbredSet) + (gnt:datasetOfSpecies rdfs:domain gn:dataset) + (gnt:datasetOfSpecies rdfs:range gn:inbredSet) + (gnt:datasetOfTissue rdfs:domain gn:dataset) + (gnt:datasetOfTissue rdfs:range gn:tissue) + (gnt:normalization rdfs:domain gn:dataset) + (gnt:normalization rdfs:range gn:avgMethod) + (gnt:datasetOfPlatform rdfs:domain gn:dataset) + (gnt:datasetOfPlatform rdfs:range gn:geneChip) + (gnt:accessionId rdfs:range rdfs:Literal) + (gnt:datasetStatusName rdfs:range rdfs:Literal) + (gnt:summary rdfs:range rdfs:Literal) + (gnt:aboutTissue rdfs:range rdfs:Literal) + (gnt:geoSeries rdfs:range rdfs:Literal) + (gnt:name rdfs:range rdfs:Literal) + (gnt:title rdfs:range rdfs:Literal) + (gnt:publicationTitle rdfs:range rdfs:Literal) + (gnt:specifics rdfs:range rdfs:Literal) + (gnt:datasetGroup rdfs:range rdfs:Literal) + (gnt:aboutCases rdfs:range rdfs:Literal) + (gnt:aboutPlatform rdfs:range rdfs:Literal) + (gnt:aboutDataProcessing rdfs:range rdfs:Literal) + (gnt:notes rdfs:range rdfs:Literal) + (gnt:experimentDesign rdfs:range rdfs:Literal) + (gnt:contributors rdfs:range rdfs:Literal) + (gnt:citation rdfs:range rdfs:Literal) + (gnt:acknowledgment rdfs:range rdfs:Literal)) (triples (string->identifier "" (regexp-substitute/global #f "[^A-Za-z0-9:]" (field InfoFiles InfoPageName) @@ -133,84 +133,84 @@ (set rdf:type (string->symbol (field ("IF(GenoFreeze.Id IS NOT NULL, 'gn:genotypeDataset', IF(PublishFreeze.Id IS NOT NULL, 'gn:phenotypeDataset', 'gn:dataset'))" rdfType)))) - (set gn-term:name (regexp-substitute/global + (set gnt:name (regexp-substitute/global #f "^[Nn]one$" (field InfoFiles InfoPageName) "")) - (set gn-term:fullName + (set gnt:fullName (field ("IFNULL(GenoFreeze.FullName, IFNULL(PublishFreeze.FullName, ''))" DatasetFullName))) (set dct:created (field ("IFNULL(GenoFreeze.CreateTime, IFNULL(PublishFreeze.CreateTime, IFNULL(ProbeSetFreeze.CreateTime, '')))" createTimeGenoFreeze))) - (set gn-term:datasetOfInvestigator + (set gnt:datasetOfInvestigator (investigator-attributes->id (field Investigators FirstName) (field Investigators LastName) (field Investigators Email))) - (set gn-term:datasetOfOrganization + (set gnt:datasetOfOrganization (field ("CAST(CONVERT(BINARY CONVERT(Organizations.OrganizationName USING latin1) USING utf8) AS VARCHAR(1500))" Organizations))) - (set gn-term:accessionId (format #f "GN~a" (field InfoFiles GN_AccesionId))) - (set gn-term:datasetStatusName (string-downcase + (set gnt:accessionId (format #f "GN~a" (field InfoFiles GN_AccesionId))) + (set gnt:datasetStatusName (string-downcase (field DatasetStatus DatasetStatusName))) - (set gn-term:datasetOfInbredSet + (set gnt:datasetOfInbredSet (string->identifier "inbredSet" (field InbredSet Name InbredSetName))) - (set gn-term:datasetOfTissue (string->identifier "tissue" + (set gnt:datasetOfTissue (string->identifier "tissue" (field Tissue Short_Name))) - (set gn-term:normalization + (set gnt:normalization (string->identifier "avgmethod" ;; If AvgMethodName is NULL, assume N/A. (if (string-blank? (field AvgMethod Name AvgMethodName)) "N/A" (field AvgMethod Name AvgMethodName)))) - (set gn-term:datasetOfPlatform + (set gnt:datasetOfPlatform (string->identifier "platform" (field GeneChip Name GeneChip))) - (set gn-term:summary + (set gnt:summary (sanitize-rdf-string (field Datasets Summary))) - (set gn-term:aboutTissue + (set gnt:aboutTissue (sanitize-rdf-string (field Datasets AboutTissue))) - (set gn-term:geoSeries + (set gnt:geoSeries (let ((s (string-match "GSE[0-9]*" (field ("IFNULL(Datasets.GeoSeries, '')" GeoSeries))))) (if s (ontology 'geoSeries: (match:substring s)) ""))) - (set gn-term:title + (set gnt:title (regexp-substitute/global #f "^[Nn]one$" (field InfoFiles InfoFileTitle) "")) - (set gn-term:publicationTitle + (set gnt:publicationTitle (regexp-substitute/global #f "^[Nn]one$" (field Datasets PublicationTitle) "")) - (set gn-term:specifics (sanitize-rdf-string (field InfoFiles Specifics))) - (set gn-term:datasetGroup (field Datasets DatasetName DatasetGroup)) - (set gn-term:aboutCases + (set gnt:specifics (sanitize-rdf-string (field InfoFiles Specifics))) + (set gnt:datasetGroup (field Datasets DatasetName DatasetGroup)) + (set gnt:aboutCases (sanitize-rdf-string (field ("CAST(CONVERT(BINARY CONVERT(Datasets.AboutCases USING latin1) USING utf8) AS VARCHAR(10000))" AboutCases)))) - (set gn-term:aboutPlatform + (set gnt:aboutPlatform (sanitize-rdf-string (field ("CAST(CONVERT(BINARY CONVERT(Datasets.AboutPlatform USING latin1) USING utf8) AS VARCHAR(1500))" AboutPlatform)))) - (set gn-term:aboutDataProcessing + (set gnt:aboutDataProcessing (sanitize-rdf-string (field ("CAST(CONVERT(BINARY CONVERT(Datasets.AboutDataProcessing USING latin1) USING utf8) AS VARCHAR(1500))" AboutDataProcessing)))) - (set gn-term:notes + (set gnt:notes (sanitize-rdf-string (field ("CAST(CONVERT(BINARY CONVERT(Datasets.Notes USING latin1) USING utf8) AS VARCHAR(1500))" GNNotes)))) - (set gn-term:experimentDesign + (set gnt:experimentDesign (sanitize-rdf-string (field ("CAST(CONVERT(BINARY CONVERT(Datasets.ExperimentDesign USING latin1) USING utf8) AS VARCHAR(1500))" ExperimentDesign)))) - (set gn-term:contributors + (set gnt:contributors (sanitize-rdf-string (field ("CAST(CONVERT(BINARY CONVERT(Datasets.Contributors USING latin1) USING utf8) AS VARCHAR(1500))" Contributors)))) - (set gn-term:citation + (set gnt:citation (sanitize-rdf-string (regexp-substitute/global #f "^[Nn]one$" @@ -218,7 +218,7 @@ ("CAST(CONVERT(BINARY CONVERT(Datasets.Citation USING latin1) USING utf8) AS VARCHAR(1500))" Citation)) ""))) - (set gn-term:dataSourceAcknowledgment + (set gnt:dataSourceAcknowledgment (sanitize-rdf-string (string-trim-both (regexp-substitute/global @@ -226,7 +226,7 @@ (field ("CAST(CONVERT(BINARY CONVERT(InfoFiles.Data_Source_Acknowledge USING latin1) USING utf8) AS VARCHAR(1500))" Data_Source_Acknowledge)) "")))) - (set gn-term:acknowledgment (sanitize-rdf-string + (set gnt:acknowledgment (sanitize-rdf-string (field Datasets Acknowledgment))))) @@ -239,7 +239,7 @@ (prefixes '(("foaf:" "") ("geoSeries:" "") - ("gn-term:" "") + ("gnt:" "") ("gn:" "") ("rdf:" "") ("rdfs:" "") diff --git a/examples/dump-genotype.scm b/examples/dump-genotype.scm index d97b7e5..88125fa 100755 --- a/examples/dump-genotype.scm +++ b/examples/dump-genotype.scm @@ -26,9 +26,9 @@ (left-join InbredSet "ON GenoFreeze.InbredSetId = InbredSet.InbredSetId")) "WHERE GenoFreeze.public > 0 AND GenoFreeze.confidentiality < 1 AND InfoFiles.InfoPageName IS NULL") (schema-triples - (gn-term:datasetOfInbredSet rdfs:range gn:inbredSet) + (gnt:datasetOfInbredSet rdfs:range gn:inbredSet) (gn:genotypeDataset rdfs:subPropertyOf gn:dataset) - (gn-term:shortName rdfs:range rdfs:Literal)) + (gnt:shortName rdfs:range rdfs:Literal)) (triples (string->identifier "" @@ -42,13 +42,13 @@ #:separator "" #:proc string-capitalize-first) (set rdf:type 'gn:genotypeDataset) - (set gn-term:name (field GenoFreeze Name)) - (set gn-term:fullName (field GenoFreeze FullName)) - (set gn-term:shortName (field GenoFreeze ShortName)) + (set gnt:name (field GenoFreeze Name)) + (set gnt:fullName (field GenoFreeze FullName)) + (set gnt:shortName (field GenoFreeze ShortName)) (set dct:created (annotate-field (field GenoFreeze CreateTime) '^^xsd:date)) - (set gn-term:datasetOfInbredSet + (set gnt:datasetOfInbredSet (string->identifier "" (field InbredSet Name InbredSetName))))) (define-dump dump-genotypes @@ -58,7 +58,7 @@ (left-join InfoFiles "ON InfoFiles.InfoPageName = GenoFreeze.Name"))) (schema-triples (gn:genotype rdfs:range rdfs:Literal) - (gn-term:genotypeDataset rdfs:subPropertyOf gn:dataset)) + (gnt:genotypeDataset rdfs:subPropertyOf gn:dataset)) (triples (string->identifier "" @@ -69,14 +69,14 @@ #:separator "" #:proc string-capitalize-first) (set rdf:type 'gn:genotype) - (set gn-term:name (sanitize-rdf-string (field Geno Name))) - (set gn-term:markerName (sanitize-rdf-string (field Geno Marker_Name))) - (set gn-term:chr (field Geno Chr)) - (set gn-term:mb (annotate-field (field ("IFNULL(Geno.Mb, '')" Mb)) '^^xsd:double)) - (set gn-term:sequence (field Geno Sequence)) - (set gn-term:source (field Geno Source)) - (set gn-term:source2 (field Geno Source2)) - (set gn-term:genotypeOfDataset + (set gnt:name (sanitize-rdf-string (field Geno Name))) + (set gnt:markerName (sanitize-rdf-string (field Geno Marker_Name))) + (set gnt:chr (field Geno Chr)) + (set gnt:mb (annotate-field (field ("IFNULL(Geno.Mb, '')" Mb)) '^^xsd:double)) + (set gnt:sequence (field Geno Sequence)) + (set gnt:source (field Geno Source)) + (set gnt:source2 (field Geno Source2)) + (set gnt:genotypeOfDataset (string->identifier "" (regexp-substitute/global @@ -86,12 +86,12 @@ #:separator "" #:proc string-capitalize-first) ) - (set gn-term:chrNum + (set gnt:chrNum (annotate-field (field ("IFNULL(Geno.chr_num, '')" chr_num)) '^^xsd:int)) (set gn:comments (field ("CAST(CONVERT(BINARY CONVERT(Geno.Comments USING latin1) USING utf8) AS VARCHAR(255))" Comments))) - (set gn-term:cM + (set gnt:cM (annotate-field (field ("IFNULL(GenoXRef.cM, '')" Chr_mm8)) '^^xsd:int)))) @@ -105,7 +105,7 @@ (prefixes '(("dct:" "") ("gn:" "") - ("gn-term:" "") + ("gnt:" "") ("rdf:" "") ("rdfs:" "") ("xsd:" ""))) diff --git a/examples/dump-phenotype.scm b/examples/dump-phenotype.scm index 1ef498d..00f99d2 100755 --- a/examples/dump-phenotype.scm +++ b/examples/dump-phenotype.scm @@ -27,10 +27,10 @@ (left-join InbredSet "ON PublishFreeze.InbredSetId = InbredSet.InbredSetId")) "WHERE PublishFreeze.public > 0 AND PublishFreeze.confidentiality < 1 AND InfoFiles.InfoPageName IS NULL") (schema-triples - (gn-term:datasetOfInbredSet rdfs:range gn:inbredSet) - (gn-term:name rdfs:range rdfs:Literal) - (gn-term:fullName rdfs:range rdfs:Literal) - (gn-term:shortName rdfs:range rdfs:Literal) + (gnt:datasetOfInbredSet rdfs:range gn:inbredSet) + (gnt:name rdfs:range rdfs:Literal) + (gnt:fullName rdfs:range rdfs:Literal) + (gnt:shortName rdfs:range rdfs:Literal) (gn:phenotypeDataset rdf:subClassOf gn:dataset)) (triples (string->identifier @@ -41,13 +41,13 @@ #:separator "" #:proc string-capitalize-first) (set rdf:type 'gn:phenotypeDataset) - (set gn-term:name (field PublishFreeze Name)) - (set gn-term:fullName (field PublishFreeze FullName)) - (set gn-term:shortName (field PublishFreeze ShortName)) + (set gnt:name (field PublishFreeze Name)) + (set gnt:fullName (field PublishFreeze FullName)) + (set gnt:shortName (field PublishFreeze ShortName)) (set dct:created (annotate-field (field PublishFreeze CreateTime) '^^xsd:date)) - (set gn-term:datasetOfInbredSet + (set gnt:datasetOfInbredSet (string->identifier "inbredSet" (field InbredSet Name InbredSetName))))) (define-dump dump-phenotypes @@ -58,19 +58,19 @@ (left-join InfoFiles "ON InfoFiles.InfoPageName = PublishFreeze.Name"))) (schema-triples (gn:phenotypeDataset rdfs:subPropertyOf gn:dataset) - (gn-term:publicationDescription rdfs:range rdfs:Literal) - (gn-term:originalDescription rdfs:range rdfs:Literal) - (gn-term:prePublicationDescription rdfs:range rdfs:Literal) - (gn-term:postPublicationAbbreviation rdfs:range rdfs:Literal) - (gn-term:labCode rdfs:range rdfs:Literal) - (gn-term:submitter rdfs:range rdfs:Literal) - (gn-term:owner rdfs:range rdfs:Literal) - (gn-term:mean rdfs:range xsd:double) - (gn-term:LRS rdfs:range xsd:float) - (gn-term:locus rdfs:range rdfs:Literal) - (gn-term:additive rdfs:range xsd:decimal) - (gn-term:sequence rdfs:range rdfs:Literal) - (gn-term:phenotypeOfPublication rdfs:range gn-term:pubMedId)) + (gnt:publicationDescription rdfs:range rdfs:Literal) + (gnt:originalDescription rdfs:range rdfs:Literal) + (gnt:prePublicationDescription rdfs:range rdfs:Literal) + (gnt:postPublicationAbbreviation rdfs:range rdfs:Literal) + (gnt:labCode rdfs:range rdfs:Literal) + (gnt:submitter rdfs:range rdfs:Literal) + (gnt:owner rdfs:range rdfs:Literal) + (gnt:mean rdfs:range xsd:double) + (gnt:LRS rdfs:range xsd:float) + (gnt:locus rdfs:range rdfs:Literal) + (gnt:additive rdfs:range xsd:decimal) + (gnt:sequence rdfs:range rdfs:Literal) + (gnt:phenotypeOfPublication rdfs:range gn-term:pubMedId)) (triples (string->identifier "" (regexp-substitute/global #f "[^A-Za-z0-9:]" @@ -79,44 +79,44 @@ #:separator "" #:proc string-capitalize-first) (set rdf:type 'gn:phenotype) - (set gn-term:name (sanitize-rdf-string + (set gnt:name (sanitize-rdf-string (field ("CAST(CONVERT(BINARY CONVERT(IF(Phenotype.Post_publication_abbreviation IS NULL, IF(Phenotype.Pre_publication_abbreviation IS NULL, Phenotype.Id, Phenotype.Pre_publication_abbreviation), Phenotype.Post_publication_abbreviation) USING latin1) USING utf8) AS VARCHAR(100))" PhenotypeName)))) ;; There is no row with an empty post-publication description so ;; use this field as the main publication description - (set gn-term:publicationDescription + (set gnt:publicationDescription (sanitize-rdf-string (field ("CAST(CONVERT(BINARY CONVERT(Phenotype.Post_publication_description USING latin1) USING utf8) AS CHAR(10000))" postPubDescr)))) - (set gn-term:originalDescription (sanitize-rdf-string + (set gnt:originalDescription (sanitize-rdf-string (delete-substrings (field Phenotype Original_description) "Original post publication description: "))) - (set gn-term:prePublicationDescription + (set gnt:prePublicationDescription (sanitize-rdf-string (field ("CAST(CONVERT(BINARY CONVERT(Phenotype.Pre_publication_description USING latin1) USING utf8) AS VARCHAR(15000))" prePubDesc)))) - (set gn-term:prePublicationAbbreviation (sanitize-rdf-string (field Phenotype Pre_publication_abbreviation))) - (set gn-term:postPublicationAbbreviation (sanitize-rdf-string (field Phenotype Post_publication_abbreviation))) - (set gn-term:labCode (field Phenotype Lab_code)) - (set gn-term:submitter (sanitize-rdf-string (field Phenotype Submitter))) - (set gn-term:owner (sanitize-rdf-string (field Phenotype Owner))) - (set gn-term:mean (annotate-field (field ("IFNULL(PublishXRef.mean, '')" mean)) + (set gnt:prePublicationAbbreviation (sanitize-rdf-string (field Phenotype Pre_publication_abbreviation))) + (set gnt:postPublicationAbbreviation (sanitize-rdf-string (field Phenotype Post_publication_abbreviation))) + (set gnt:labCode (field Phenotype Lab_code)) + (set gnt:submitter (sanitize-rdf-string (field Phenotype Submitter))) + (set gnt:owner (sanitize-rdf-string (field Phenotype Owner))) + (set gnt:mean (annotate-field (field ("IFNULL(PublishXRef.mean, '')" mean)) '^^xsd:double)) - (set gn-term:locus (field PublishXRef Locus)) - (set gn-term:LRS (annotate-field (field ("IFNULL(PublishXRef.LRS, '')" lrs)) '^^xsd:float)) - (set gn-term:additive (annotate-field (field ("IFNULL(PublishXRef.additive, '')" additive)) '^^xsd:decimal)) - (set gn-term:sequence (annotate-field (field PublishXRef Sequence) '^^xsd:int)) - (set gn-term:phenotypeOfDataset + (set gnt:locus (field PublishXRef Locus)) + (set gnt:LRS (annotate-field (field ("IFNULL(PublishXRef.LRS, '')" lrs)) '^^xsd:float)) + (set gnt:additive (annotate-field (field ("IFNULL(PublishXRef.additive, '')" additive)) '^^xsd:decimal)) + (set gnt:sequence (annotate-field (field PublishXRef Sequence) '^^xsd:int)) + (set gnt:phenotypeOfDataset (string->identifier "" (field ("IFNULL(InfoFiles.InfoPageName, IFNULL(PublishFreeze.Name, ''))" DatasetName)) #:separator "" #:proc string-capitalize-first)) - (set gn-term:phenotypeOfPublication + (set gnt:phenotypeOfPublication (let ((pmid (field ("IF(Publication.PubMed_ID IS NULL, '', CONVERT(Publication.PubMed_Id, INT))" pmid))) @@ -134,7 +134,7 @@ (prefixes '(("dct:" "") ("gn:" "") - ("gn-term:" "") + ("gnt:" "") ("rdf:" "") ("rdfs:" "") ("xsd:" "") diff --git a/examples/dump-probeset-metadata.scm b/examples/dump-probeset-metadata.scm index 6da1eb0..ddbea5e 100755 --- a/examples/dump-probeset-metadata.scm +++ b/examples/dump-probeset-metadata.scm @@ -24,14 +24,14 @@ "WHERE ProbeSetFreeze.public > 0 AND ProbeSetFreeze.confidentiality < 1") (schema-triples (gn:probesetData rdfs:range gn:probeset) - (gn-term:hasProbeset rdfs:range rdfs:Literal)) + (gnt:hasProbeset rdfs:range rdfs:Literal)) (triples (string->identifier "probesetData" (field ("CONCAT(ProbeSetFreeze.Name,':',IFNULL(ProbeSet.Name, ProbeSet.Id))" ProbeSetName))) (set rdf:type 'gn:probesetData) - (set gn-term:hasProbeset + (set gnt:hasProbeset (ontology 'probeset: (regexp-substitute/global @@ -39,34 +39,34 @@ (field ("IFNULL(ProbeSet.Name, ProbeSet.Id)" name)) 'pre "_" 'post))) - (set gn-term:probesetOfDataset + (set gnt:probesetOfDataset (ontology 'probeset: (regexp-substitute/global #f "[^A-Za-z0-9:]" (field ProbeSetFreeze Name) 'pre "_" 'post))) - (set gn-term:mean + (set gnt:mean (annotate-field (field ("IFNULL(ProbeSetXRef.mean, '')" mean)) '^^xsd:double)) - (set gn-term:se + (set gnt:se (annotate-field (field ("IFNULL(ProbeSetXRef.se, '')" se)) '^^xsd:double)) - (set gn-term:locus (field ProbeSetXRef Locus)) + (set gnt:locus (field ProbeSetXRef Locus)) (set gn:LRS (annotate-field (field ("IFNULL(ProbeSetXRef.LRS, '')" LRS)) '^^xsd:double)) - (set gn-term:pValue + (set gnt:pValue (annotate-field (field ("IFNULL(ProbeSetXRef.pValue, '')" pValue)) '^^xsd:double)) - (set gn-term:additive + (set gnt:additive (annotate-field (field ("IFNULL(ProbeSetXRef.additive, '')" additive)) '^^xsd:double)) - (set gn-term:h2 + (set gnt:h2 (annotate-field (field ("IFNULL(ProbeSetXRef.h2, '')" h2)) '^^xsd:float)))) @@ -79,7 +79,7 @@ (table-metadata? #f) (prefixes '(("gn:" "") - ("gn-term:" "") + ("gnt:" "") ("rdf:" "") ("rdfs:" "") ("xsd:" ""))) diff --git a/examples/dump-probeset.scm b/examples/dump-probeset.scm index be09b48..4d5f9a5 100755 --- a/examples/dump-probeset.scm +++ b/examples/dump-probeset.scm @@ -21,8 +21,8 @@ (tables (ProbeSet (left-join GeneChip "ON GeneChip.Id = ProbeSet.ChipId"))) (schema-triples - (gn-term:name rdfs:range rdfs:Literal) - (gn-term:probeset rdfs:range rdfs:Literal)) + (gnt:name rdfs:range rdfs:Literal) + (gnt:probeset rdfs:range rdfs:Literal)) (triples (ontology 'probeset: (string-trim-both @@ -32,17 +32,17 @@ name)) 'pre "_" 'post))) (set rdf:type 'gn-id:probeset) - (set gn-term:chipOf (string->identifier "platform" (field GeneChip Name))) - (set gn-term:name (field ProbeSet Name)) - (set gn-term:symbol (delete-substrings (field ProbeSet Symbol) "\"")) - (set gn-term:description (sanitize-rdf-string + (set gnt:chipOf (string->identifier "platform" (field GeneChip Name))) + (set gnt:name (field ProbeSet Name)) + (set gnt:symbol (delete-substrings (field ProbeSet Symbol) "\"")) + (set gnt:description (sanitize-rdf-string (field ProbeSet description))) - (set gn-term:chr (field ProbeSet Chr)) - (set gn-term:mb (annotate-field (field ("IFNULL(ProbeSet.Mb, '')" Mb)) '^^xsd:double)) - (set gn-term:blatSeq (sanitize-rdf-string + (set gnt:chr (field ProbeSet Chr)) + (set gnt:mb (annotate-field (field ("IFNULL(ProbeSet.Mb, '')" Mb)) '^^xsd:double)) + (set gnt:blatSeq (sanitize-rdf-string (string-trim-both (field ProbeSet BlatSeq)))) - (set gn-term:targetSeq (sanitize-rdf-string (field ProbeSet TargetSeq))) - (set gn-term:uniProtReference (ontology 'uniprot: + (set gnt:targetSeq (sanitize-rdf-string (field ProbeSet TargetSeq))) + (set gnt:uniProtReference (ontology 'uniprot: (field ProbeSet UniProtID))))) diff --git a/examples/dump-probesetfreeze.scm b/examples/dump-probesetfreeze.scm index a45fd0a..828ab00 100755 --- a/examples/dump-probesetfreeze.scm +++ b/examples/dump-probesetfreeze.scm @@ -20,11 +20,11 @@ (define-dump dump-gene-chip (tables (GeneChip)) (schema-triples - (gn-term:name rdfs:range rdfs:Literal)) + (gnt:name rdfs:range rdfs:Literal)) (triples (string->identifier "platform" (field GeneChip Name)) (set rdf:type 'gn:platform) - (set gn-term:name (field GeneChip GeneChipName)) - (set gn-term:geoPlatform + (set gnt:name (field GeneChip GeneChipName)) + (set gnt:geoPlatform (ontology 'geoSeries: (string-trim-both (field GeneChip GeoPlatform)))))) @@ -38,8 +38,8 @@ (left-join Tissue "ON ProbeFreeze.TissueId = Tissue.TissueId")) "WHERE ProbeSetFreeze.public > 0 AND InfoFiles.InfoPageName IS NULL GROUP BY ProbeFreeze.Id") (schema-triples - (gn-term:avgMethod rdfs:range rdfs:Literal) - (gn-term:dataScale rdfs:range rdfs:Literal) + (gnt:avgMethod rdfs:range rdfs:Literal) + (gnt:dataScale rdfs:range rdfs:Literal) (gn:probesetDataset rdf:subClassOf gn:dataset)) (triples (string->identifier @@ -51,15 +51,15 @@ #:separator "" #:proc string-capitalize-first) (set rdf:type 'gn:probesetDataset) - (set gn-term:avgMethod (string->identifier "avgmethod" (field AvgMethod Name))) - (set gn-term:fullName (field ProbeSetFreeze FullName)) - (set gn-term:shortName (field ProbeSetFreeze ShortName)) + (set gnt:avgMethod (string->identifier "avgmethod" (field AvgMethod Name))) + (set gnt:fullName (field ProbeSetFreeze FullName)) + (set gnt:shortName (field ProbeSetFreeze ShortName)) (set dct:created (annotate-field (field ProbeSetFreeze CreateTime) '^^xsd:datetime)) - (set gn-term:dataScale (field ProbeSetFreeze DataScale)) - (set gn-term:tissueName (string->identifier "tissue" (field Tissue Short_Name))) - (set gn-term:datasetOfInbredSet + (set gnt:dataScale (field ProbeSetFreeze DataScale)) + (set gnt:tissueName (string->identifier "tissue" (field Tissue Short_Name))) + (set gnt:datasetOfInbredSet (string->identifier "inbredSet" (field InbredSet Name InbredSetName))))) @@ -71,7 +71,8 @@ (prefixes '(("geoSeries:" "") ("gn:" "") - ("gn-term:" "") + ("dct:" "<>") + ("gnt:" "") ("rdf:" "") ("rdfs:" "") ("xsd:" ""))) diff --git a/examples/dump-publication.scm b/examples/dump-publication.scm index f79696e..1384261 100755 --- a/examples/dump-publication.scm +++ b/examples/dump-publication.scm @@ -21,15 +21,15 @@ (define-dump dump-publication (tables (Publication)) (schema-triples - (gn-term:pubMedId rdfs:range rdfs:Literal) - (gn-term:title rdfs:range rdfs:Literal) - (gn-term:journal rdfs:range rdfs:Literal) - (gn-term:volume rdfs:range rdfs:Literal) - (gn-term:pages rdfs:range rdfs:Literal) - (gn-term:month rdfs:range rdfs:Literal) - (gn-term:year rdfs:range rdfs:Literal) - (gn-term:author rdfs:range rdfs:Literal) - (gn-term:abstract rdfs:range rdfs:Literal)) + (gnt:pubMedId rdfs:range rdfs:Literal) + (gnt:title rdfs:range rdfs:Literal) + (gnt:journal rdfs:range rdfs:Literal) + (gnt:volume rdfs:range rdfs:Literal) + (gnt:pages rdfs:range rdfs:Literal) + (gnt:month rdfs:range rdfs:Literal) + (gnt:year rdfs:range rdfs:Literal) + (gnt:author rdfs:range rdfs:Literal) + (gnt:abstract rdfs:range rdfs:Literal)) (triples (let ((pmid (field ("IF(Publication.PubMed_ID IS NULL, '', CONVERT(Publication.PubMed_Id, INT))" @@ -40,19 +40,19 @@ (number->string publication-id)) (ontology 'pubmed: pmid))) (set rdf:type 'gn:publication) - (set gn-term:pubMedId + (set gnt:pubMedId (ontology 'pubmed: (field ("IFNULL(PubMed_ID, '')" pubmedId)))) - (set gn-term:title (delete-substrings (field Publication Title) + (set gnt:title (delete-substrings (field Publication Title) "Unknown")) - (set gn-term:journal (delete-substrings (field Publication Journal) + (set gnt:journal (delete-substrings (field Publication Journal) "Unknown")) - (set gn-term:volume (delete-substrings (field Publication Volume) + (set gnt:volume (delete-substrings (field Publication Volume) "Unknown")) - (set gn-term:pages (delete-substrings (field Publication Pages) + (set gnt:pages (delete-substrings (field Publication Pages) "Unknown")) - (set gn-term:month (delete-substrings (field Publication Month) + (set gnt:month (delete-substrings (field Publication Month) "Unknown")) - (set gn-term:year (field Publication Year)) + (set gnt:year (field Publication Year)) (multiset gn:author ;; The authors field is a comma ;; separated list. Split it. @@ -68,7 +68,7 @@ (connection %connection-settings) (table-metadata? #f) (prefixes - '(("gn-term:" "") + '(("gnt:" "") ("gn:" "") ("pubmed:" "") ("rdfs:" "") diff --git a/examples/dump-species-metadata.scm b/examples/dump-species-metadata.scm index 77db764..39f7147 100755 --- a/examples/dump-species-metadata.scm +++ b/examples/dump-species-metadata.scm @@ -21,25 +21,25 @@ (define-dump dump-species (tables (Species)) (schema-triples - (gn-term:name rdfs:range rdfs:Literal) - (gn-term:displayName rdfs:range rdfs:Literal) - (gn-term:binomialName rdfs:range rdfs:Literal) - (gn-term:family rdfs:range rdfs:Literal)) + (gnt:name rdfs:range rdfs:Literal) + (gnt:displayName rdfs:range rdfs:Literal) + (gnt:binomialName rdfs:range rdfs:Literal) + (gnt:family rdfs:range rdfs:Literal)) (triples (string->binomial-name (field Species FullName)) - (set rdf:type 'gn:species) - (set gn-term:name (field Species SpeciesName)) - (set gn-term:displayName (field Species MenuName)) - (set gn-term:binomialName (field Species FullName)) - (set gn-term:family (field Species Family)) - (set gn-term:organism (ontology 'taxon: (field Species TaxonomyId))))) + (set rdf:type 'gnc:species) + (set gnt:name (field Species SpeciesName)) + (set gnt:displayName (field Species MenuName)) + (set gnt:binomialName (field Species FullName)) + (set gnt:family (field Species Family)) + (set gnt:organism (ontology 'taxon: (field Species TaxonomyId))))) (define-dump dump-strain (tables (Strain (left-join Species "ON Strain.SpeciesId = Species.SpeciesId"))) (schema-triples - (gn-term:strainOfSpecies rdfs:domain gn-term:strain) - (gn-term:strainOfSpecies rdfs:range gn-term:species) + (gnt:strainOfSpecies rdfs:domain gnt:strain) + (gnt:strainOfSpecies rdfs:range gn-term:species) (gn-term:name rdfs:range rdfs:Literal) (gn-term:alias rdfs:range rdfs:Literal) (gn-term:symbol rdfs:range rdfs:Literal)) @@ -51,7 +51,7 @@ 'pre "_" 'post) #:separator "" #:proc string-capitalize-first) - (set rdf:type 'gn:strain) + (set rdf:type 'gnc:strain) (set gn-term:strainOfSpecies (string->binomial-name (field Species FullName))) ;; Name, and maybe a second name @@ -64,7 +64,7 @@ (tables (MappingMethod)) (triples (string->identifier "mappingMethod" (field MappingMethod Name)) - (set rdf:type 'gn:mappingMethod))) + (set rdf:type 'gnc:mappingMethod))) (define-dump dump-inbred-set (tables (InbredSet @@ -85,7 +85,7 @@ "" (field InbredSet Name) #:separator "" #:proc string-capitalize-first) - (set rdf:type 'gn:inbredSet) + (set rdf:type 'gnc:inbredSet) (set gn-term:binomialName (field InbredSet FullName)) (set gn-term:geneticType (field InbredSet GeneticType)) (set gn-term:inbredFamily (field InbredSet Family)) @@ -106,7 +106,7 @@ (schema-triples (gn-term:normalization rdfs:range rdfs:Literal)) (triples (string->identifier "avgmethod" (field AvgMethod Name)) - (set rdf:type 'gn:avgMethod) + (set rdf:type 'gnc:avgMethod) (set gn-term:normalization (field AvgMethod Normalization)))) @@ -117,7 +117,8 @@ (table-metadata? #f) (prefixes '(("gn:" "") - ("gn-term:" "") + ("gnc:" "") + ("gnt:" "") ("rdf:" "") ("rdfs:" "") ("taxon:" ""))) diff --git a/examples/dump-tissue.scm b/examples/dump-tissue.scm index ff6792e..a9a50f3 100755 --- a/examples/dump-tissue.scm +++ b/examples/dump-tissue.scm @@ -23,12 +23,12 @@ ;; and BIRN_lex_Name are mostly NULL. (tables (Tissue)) (schema-triples - (gn-term:name rdfs:range rdfs:Literal)) + (gnt:name rdfs:range rdfs:Literal)) ;; Hopefully the Short_Name field is distinct and can be used as an ;; identifier. (triples (string->identifier "tissue" (field Tissue Short_Name)) (set rdf:type 'gn:tissue) - (set gn-term:name (field Tissue Name)))) + (set gnt:name (field Tissue Name)))) @@ -38,7 +38,7 @@ (table-metadata? #f) (prefixes '(("gn:" "") - ("gn-term:" "") + ("gnt:" "") ("rdf:" "") ("rdfs:" ""))) (inputs -- cgit v1.2.3 From 6f5cca6b5511ce5c80639e477ae10b4e70e2b178 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Sun, 30 Jul 2023 12:27:50 +0300 Subject: Add gnc: prefix Signed-off-by: Munyoki Kilyungi --- examples/dump-generif.scm | 102 ++++++++++++++++-------------------- examples/dump-genotype.scm | 13 ++--- examples/dump-phenotype.scm | 5 +- examples/dump-probeset-metadata.scm | 3 +- examples/dump-probesetfreeze.scm | 5 +- examples/dump-publication.scm | 3 +- examples/dump-tissue.scm | 3 +- 7 files changed, 65 insertions(+), 69 deletions(-) (limited to 'examples') diff --git a/examples/dump-generif.scm b/examples/dump-generif.scm index b546f42..0689f57 100755 --- a/examples/dump-generif.scm +++ b/examples/dump-generif.scm @@ -16,9 +16,6 @@ (call-with-input-file (list-ref (command-line) 1) read)) -(define %dump-directory - (list-ref (command-line) 2)) - (define-dump dump-genewiki-symbols @@ -26,17 +23,17 @@ (left-join Species "USING (SpeciesId)")) "GROUP BY GeneId ORDER BY BINARY symbol") (schema-triples - (gn:symbol rdfs:domain gn:geneWikiEntry) - (gn:wikiEntryOfSpecies rdfs:range gn:species) - (gn:taxid rdfs:domain gn:geneWikiEntry)) + (gnt:symbol rdfs:domain gn-term:geneWikiEntry) + (gnt:wikiEntryOfSpecies rdfs:range gn:species) + (gnt:taxid rdfs:domain gn-term:geneWikiEntry)) (triples (ontology 'generif: (field GeneRIF_BASIC GeneId)) - (multiset gn:symbol (string-split (field ("GROUP_CONCAT(DISTINCT symbol)" symbol)) + (multiset gnt:symbol (string-split (field ("GROUP_CONCAT(DISTINCT symbol)" symbol)) #\,)) - (multiset gn:wikiEntryOfSpecies + (multiset gnt:wikiEntryOfSpecies (string-split (field ("GROUP_CONCAT(DISTINCT Species.SpeciesName)" species)) #\,)) - (multiset gn:taxId (map (cut ontology 'ncbiTaxon: <>) + (multiset gnt:taxId (map (cut ontology 'ncbiTaxon: <>) (string-split (field ("GROUP_CONCAT(DISTINCT TaxID)" taxId)) #\,))))) @@ -48,16 +45,16 @@ (left-join GeneCategory "ON GeneRIFXRef.GeneCategoryId = GeneCategory.Id")) "WHERE GeneRIF.display > 0 AND GeneRIF.VersionId = 0 GROUP BY GeneRIF.symbol") (schema-triples - (gn:geneWikiEntry a rdfs:Class) - (gn:geneWikiEntry a owl:Class) - (gn:geneWikiEntry rdfs:comment "Represents GeneRIF Entries") - (gn:geneCategory rdfs:domain gn:geneWikiEntry) - (gn:geneWikiEntryOfGn rdfs:domain gn:geneWikiEntry) - (gn:geneWikiEntry rdfs:domain gn:geneWikiEntry)) + (gnt:geneWikiEntry a rdfs:Class) + (gnt:geneWikiEntry a owl:Class) + (gnt:geneWikiEntry rdfs:comment "Represents GeneRIF Entries") + (gnt:geneCategory rdfs:domain gn:geneWikiEntry) + (gnt:geneWikiEntryOfGn rdfs:domain gn:geneWikiEntry) + (gnt:geneWikiEntry rdfs:domain gn:geneWikiEntry)) (triples (let ([geneid (field GeneRIF_BASIC GeneId)]) (if (eq? geneid 0) - (ontology 'gn:anonSymbol_ + (ontology 'gnt:anonSymbol_ (field GeneRIF symbol)) (ontology 'generif: geneid))) @@ -65,14 +62,14 @@ (if (string-null? (field ("IFNULL(GeneRIF_BASIC.GeneId, '')" geneWikiEntryP))) "" 'gn:geneWikiEntry)) - (set gn:wikiEntryOfSpecies - (field Species SpeciesName)) + (set gnt:wikiEntryOfSpecies + (string->binomial-name (field Species FullName))) ;; This only dumps symbols not present in the GeneRIF_BASIC table - (set gn:symbol (let ([geneid (field GeneRIF_BASIC GeneId)]) + (set gnt:symbol (let ([geneid (field GeneRIF_BASIC GeneId)]) (if (eq? geneid 0) (field GeneRIF symbol) ""))) - (multiset gn:geneWikiEntryOfGn + (multiset gnt:geneWikiEntryOfGn (let* ([entries (sanitize-rdf-string (field @@ -83,7 +80,7 @@ (match-lambda ((genecategory pmid email text createtime weburl) (blank-node - (set gn:geneCategory genecategory) + (set gnt:geneCategory genecategory) (multiset dct:source (map (lambda (el) (if (string-null? el) "" @@ -94,7 +91,7 @@ 'pre "" 'post)) - (set gn:geneWikiEntry + (set gnt:geneWikiEntry (annotate-field text '^^xsd:string)) (set dct:created (annotate-field createtime @@ -108,12 +105,12 @@ (tables (GeneRIF_BASIC) "GROUP BY GeneId, comment, createtime") (schema-triples - (gn:geneWikiEntryofNCBI rdfs:domain gn:geneWikiEntry)) + (gnt:geneWikiEntryofNCBI rdfs:domain gn:geneWikiEntry)) (triples (ontology 'generif: (field GeneRIF_BASIC GeneId)) - (set gn:geneWikiEntryOfNCBI + (set gnt:geneWikiEntryOfNCBI (blank-node - (set gn:geneWikiEntry + (set gnt:geneWikiEntry (annotate-field (field GeneRIF_BASIC comment) '^^xsd:string)) (multiset dct:source (map (lambda (el) (if (string-null? el) @@ -127,34 +124,27 @@ -(call-with-target-database - %connection-settings - (lambda (db) - (with-output-to-file (string-append %dump-directory "dump-generif.ttl") - (lambda () - (prefix "rdf:" "") - (prefix "rdfs:" "") - (prefix "foaf:" "") - (prefix "gn:" "") - (prefix "dct:" "") - (prefix "pubmed:" "") - (prefix "up:" "") - (prefix "ncbiTaxon:" "") - (prefix "generif:" "") - (prefix "xsd:" "") - (prefix "owl:" "") - (prefix "phenotype:" "") - (prefix "molecularTrait:" "") - (prefix "nuccore:" "") - (prefix "omim:" "") - (prefix "pubchem:" "") - (prefix "uniprot:" "") - (prefix "hgnc:" "") - (prefix "homologene:" "") - (prefix "chebi:" "") - (prefix "kegg:" "") - (newline) - (dump-genewiki-symbols db) - (dump-gn-genewiki-entries db) - (dump-ncbi-genewiki-entries db)) - #:encoding "utf8"))) +(dump-with-documentation + (name "GeneRIF Metadata") + (connection %connection-settings) + (table-metadata? #f) + (prefixes + '(("rdf:" "") + ("rdfs:" "") + ("gn:" "") + ("gnc:" "") + ("gnt:" "") + ("dct:" "") + ("pubmed:" "") + ("ncbiTaxon:" "") + ("generif:" "") + ("xsd:" "") + ("owl:" ""))) + (inputs + (list ;; dump-genewiki-symbols + dump-gn-genewiki-entries + ;; dump-ncbi-genewiki-entries + )) + (outputs + '(#:documentation "./docs/dump-generif.md" + #:rdf "./verified-data/dump-generif.ttl"))) diff --git a/examples/dump-genotype.scm b/examples/dump-genotype.scm index 88125fa..50cafb6 100755 --- a/examples/dump-genotype.scm +++ b/examples/dump-genotype.scm @@ -26,9 +26,9 @@ (left-join InbredSet "ON GenoFreeze.InbredSetId = InbredSet.InbredSetId")) "WHERE GenoFreeze.public > 0 AND GenoFreeze.confidentiality < 1 AND InfoFiles.InfoPageName IS NULL") (schema-triples - (gnt:datasetOfInbredSet rdfs:range gn:inbredSet) - (gn:genotypeDataset rdfs:subPropertyOf gn:dataset) - (gnt:shortName rdfs:range rdfs:Literal)) + (gnt:datasetOfInbredSet rdfs:subPropertyOf gnc:inbredSet) + (gnc:genotypeDataset rdfs:subPropertyOf gnc:dataset) + (gnt:shortName rdfs:subPropertyOf gnc:genotypeDataset)) (triples (string->identifier "" @@ -41,7 +41,7 @@ 'pre "_" 'post) #:separator "" #:proc string-capitalize-first) - (set rdf:type 'gn:genotypeDataset) + (set rdf:type 'gnc:genotypeDataset) (set gnt:name (field GenoFreeze Name)) (set gnt:fullName (field GenoFreeze FullName)) (set gnt:shortName (field GenoFreeze ShortName)) @@ -57,7 +57,7 @@ (left-join GenoFreeze "ON GenoFreeze.Id = GenoXRef.GenoFreezeId") (left-join InfoFiles "ON InfoFiles.InfoPageName = GenoFreeze.Name"))) (schema-triples - (gn:genotype rdfs:range rdfs:Literal) + (gnc:genotype rdfs:range rdfs:Literal) (gnt:genotypeDataset rdfs:subPropertyOf gn:dataset)) (triples (string->identifier @@ -68,7 +68,7 @@ 'pre "_" 'post) #:separator "" #:proc string-capitalize-first) - (set rdf:type 'gn:genotype) + (set rdf:type 'gnc:genotype) (set gnt:name (sanitize-rdf-string (field Geno Name))) (set gnt:markerName (sanitize-rdf-string (field Geno Marker_Name))) (set gnt:chr (field Geno Chr)) @@ -105,6 +105,7 @@ (prefixes '(("dct:" "") ("gn:" "") + ("gnc:" "") ("gnt:" "") ("rdf:" "") ("rdfs:" "") diff --git a/examples/dump-phenotype.scm b/examples/dump-phenotype.scm index 00f99d2..983756b 100755 --- a/examples/dump-phenotype.scm +++ b/examples/dump-phenotype.scm @@ -40,7 +40,7 @@ 'pre "_" 'post) #:separator "" #:proc string-capitalize-first) - (set rdf:type 'gn:phenotypeDataset) + (set rdf:type 'gnc:phenotypeDataset) (set gnt:name (field PublishFreeze Name)) (set gnt:fullName (field PublishFreeze FullName)) (set gnt:shortName (field PublishFreeze ShortName)) @@ -78,7 +78,7 @@ 'pre "_" 'post) #:separator "" #:proc string-capitalize-first) - (set rdf:type 'gn:phenotype) + (set rdf:type 'gnc:phenotype) (set gnt:name (sanitize-rdf-string (field ("CAST(CONVERT(BINARY CONVERT(IF(Phenotype.Post_publication_abbreviation IS NULL, IF(Phenotype.Pre_publication_abbreviation IS NULL, Phenotype.Id, Phenotype.Pre_publication_abbreviation), Phenotype.Post_publication_abbreviation) USING latin1) USING utf8) AS VARCHAR(100))" @@ -134,6 +134,7 @@ (prefixes '(("dct:" "") ("gn:" "") + ("gnc:" "") ("gnt:" "") ("rdf:" "") ("rdfs:" "") diff --git a/examples/dump-probeset-metadata.scm b/examples/dump-probeset-metadata.scm index ddbea5e..37fef70 100755 --- a/examples/dump-probeset-metadata.scm +++ b/examples/dump-probeset-metadata.scm @@ -30,7 +30,7 @@ "probesetData" (field ("CONCAT(ProbeSetFreeze.Name,':',IFNULL(ProbeSet.Name, ProbeSet.Id))" ProbeSetName))) - (set rdf:type 'gn:probesetData) + (set rdf:type 'gnc:probesetData) (set gnt:hasProbeset (ontology 'probeset: @@ -79,6 +79,7 @@ (table-metadata? #f) (prefixes '(("gn:" "") + ("gnc:" "") ("gnt:" "") ("rdf:" "") ("rdfs:" "") diff --git a/examples/dump-probesetfreeze.scm b/examples/dump-probesetfreeze.scm index 828ab00..30ea9f4 100755 --- a/examples/dump-probesetfreeze.scm +++ b/examples/dump-probesetfreeze.scm @@ -22,7 +22,7 @@ (schema-triples (gnt:name rdfs:range rdfs:Literal)) (triples (string->identifier "platform" (field GeneChip Name)) - (set rdf:type 'gn:platform) + (set rdf:type 'gnc:platform) (set gnt:name (field GeneChip GeneChipName)) (set gnt:geoPlatform (ontology 'geoSeries: @@ -50,7 +50,7 @@ 'pre "_" 'post) #:separator "" #:proc string-capitalize-first) - (set rdf:type 'gn:probesetDataset) + (set rdf:type 'gnc:probesetDataset) (set gnt:avgMethod (string->identifier "avgmethod" (field AvgMethod Name))) (set gnt:fullName (field ProbeSetFreeze FullName)) (set gnt:shortName (field ProbeSetFreeze ShortName)) @@ -71,6 +71,7 @@ (prefixes '(("geoSeries:" "") ("gn:" "") + ("gnc:" "") ("dct:" "<>") ("gnt:" "") ("rdf:" "") diff --git a/examples/dump-publication.scm b/examples/dump-publication.scm index 1384261..50e4358 100755 --- a/examples/dump-publication.scm +++ b/examples/dump-publication.scm @@ -39,7 +39,7 @@ (string->identifier "unpublished" (number->string publication-id)) (ontology 'pubmed: pmid))) - (set rdf:type 'gn:publication) + (set rdf:type 'gnc:publication) (set gnt:pubMedId (ontology 'pubmed: (field ("IFNULL(PubMed_ID, '')" pubmedId)))) (set gnt:title (delete-substrings (field Publication Title) @@ -70,6 +70,7 @@ (prefixes '(("gnt:" "") ("gn:" "") + ("gnc:" "") ("pubmed:" "") ("rdfs:" "") ("rdf:" ""))) diff --git a/examples/dump-tissue.scm b/examples/dump-tissue.scm index a9a50f3..dc76600 100755 --- a/examples/dump-tissue.scm +++ b/examples/dump-tissue.scm @@ -27,7 +27,7 @@ ;; Hopefully the Short_Name field is distinct and can be used as an ;; identifier. (triples (string->identifier "tissue" (field Tissue Short_Name)) - (set rdf:type 'gn:tissue) + (set rdf:type 'gnc:tissue) (set gnt:name (field Tissue Name)))) @@ -39,6 +39,7 @@ (prefixes '(("gn:" "") ("gnt:" "") + ("gnc:" "") ("rdf:" "") ("rdfs:" ""))) (inputs -- cgit v1.2.3