diff options
Diffstat (limited to 'examples')
| -rwxr-xr-x | examples/datasets.scm | 2 | ||||
| -rwxr-xr-x | examples/genotype-datasets.scm | 53 | ||||
| -rwxr-xr-x | examples/genotype.scm | 70 | ||||
| -rwxr-xr-x | examples/ontology.scm | 39 | ||||
| -rwxr-xr-x | examples/phenotype-datasets.scm | 54 | ||||
| -rwxr-xr-x | examples/phenotype.scm | 53 |
6 files changed, 194 insertions, 77 deletions
diff --git a/examples/datasets.scm b/examples/datasets.scm index 55d8c35..e4f35da 100755 --- a/examples/datasets.scm +++ b/examples/datasets.scm @@ -25,6 +25,7 @@ (set rdf:type 'dcat:Dataset) (set dct:title (normalize-string-field (field InfoFiles InfoPageName))) (set dct:identifier (format #f "GN~a" (field InfoFiles GN_AccesionId))) + (set gnt:has_genotype_files (string->symbol (format #f "gn-files:GN~a" (field InfoFiles GN_AccesionId)))) (set gnt:has_strain (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_")) (set gnt:has_experiment_type @@ -105,6 +106,7 @@ ("gn:" "<http://rdf.genenetwork.org/v1/id/>") ("gnc:" "<http://rdf.genenetwork.org/v1/category/>") ("gnt:" "<http://rdf.genenetwork.org/v1/term/>") + ("gn-files:" "<http://files.genenetwork.org/current/>") ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>") ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>") ("owl:" "<http://www.w3.org/2002/07/owl#>") diff --git a/examples/genotype-datasets.scm b/examples/genotype-datasets.scm index f140600..ebe2349 100755 --- a/examples/genotype-datasets.scm +++ b/examples/genotype-datasets.scm @@ -15,29 +15,39 @@ (define-transformer gn:set->gn:dataset - (tables (Datasets - (inner-join InfoFiles "ON InfoFiles.DatasetId = Datasets.DatasetId") - (inner-join InbredSet "ON InbredSet.Id = InfoFiles.InbredSetId") + (tables (Species + (inner-join InbredSet "ON InbredSet.SpeciesId = Species.Id") (inner-join GenoFreeze "ON GenoFreeze.InbredSetId = InbredSet.Id")) - "WHERE GenoFreeze.public > 0 GROUP BY Datasets.DatasetId") - (schema-triples - (gnt:has_genotype_data rdf:type owl:ObjectProperty) - (gnt:has_genotype_data rdfs:label "this resources has genotype data.") - (gnt:has_genotype_data rdfs:comment "Associates a resource with its genotype data.") - (gnt:has_genotype_data rdfs:domain gnc:set) - (gnt:has_genotype_data rdfs:range dcat:Dataset) - (gnt:has_genotype_data rdfs:subPropertyOf dct:relation)) + "WHERE GenoFreeze.public > 0 AND Species.Name != 'monkey' GROUP BY Species.Name, GenoFreeze.ShortName") (triples (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_") - (set gnt:has_genotype_data (string->identifier "dataset" (field GenoFreeze Name) #:separator "_")))) + (multiset gnt:has_genotype_data + (map (cut string->identifier "dataset" <> #:separator "_") + (string-split + (field ("GROUP_CONCAT(GenoFreeze.Name SEPARATOR ',')" + dataset_name)) + #\,))))) -(define-transformer gn:dataset->gn:set - (tables (Datasets - (inner-join InfoFiles "ON InfoFiles.DatasetId = Datasets.DatasetId") - (inner-join InbredSet "ON InbredSet.Id = InfoFiles.InbredSetId") - (inner-join GenoFreeze "ON GenoFreeze.InbredSetId = InbredSet.Id")) - "WHERE GenoFreeze.public > 0 GROUP BY Datasets.DatasetId") +(define-transformer gn:dataset->metadata + (tables (GenoFreeze + (inner-join InbredSet "ON InbredSet.Id = GenoFreeze.InbredSetId") + (inner-join Species "ON InbredSet.SpeciesId = Species.Id")) + "WHERE GenoFreeze.public > 0 AND Species.Name != 'monkey'") + (triples (string->identifier "dataset" (field GenoFreeze Name) #:separator "_") + (set gnt:has_strain (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_")) + (set dct:created (annotate-field (field GenoFreeze CreateTime) '^^xsd:datetime)))) + +(define-transformer gn:dataset->marker/snp-count + (tables (GenoFreeze + (inner-join InbredSet "ON InbredSet.Id = GenoFreeze.InbredSetId") + (inner-join Species "ON InbredSet.SpeciesId = Species.Id") + (inner-join Geno "ON Geno.SpeciesId = Species.Id")) + "WHERE GenoFreeze.public > 0 AND Species.Name != 'monkey' GROUP BY GenoFreeze.Name") (triples (string->identifier "dataset" (field GenoFreeze Name) #:separator "_") - (set gnt:has_strain (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_")))) + (set gnt:has_marker_count + (string->symbol + (format #f "'~s'^^xsd:integer" + (field + ("COUNT(DISTINCT Geno.Marker_Name)" MarkerCount))))))) (let* ((option-spec @@ -68,7 +78,10 @@ ("xkos:" "<http://rdf-vocabulary.ddialliance.org/xkos#>") ("xsd:" "<http://www.w3.org/2001/XMLSchema#>"))) (inputs - (list gn:set->gn:dataset gn:dataset->gn:set)) + (list + gn:set->gn:dataset + gn:dataset->metadata + gn:dataset->marker/snp-count)) (outputs `(#:documentation ,documentation #:rdf ,output)))) diff --git a/examples/genotype.scm b/examples/genotype.scm new file mode 100755 index 0000000..4a91b63 --- /dev/null +++ b/examples/genotype.scm @@ -0,0 +1,70 @@ +#! /usr/bin/env guile +!# + +(use-modules (rnrs programs) + (rnrs io ports) + (srfi srfi-1) + (srfi srfi-26) + (ice-9 getopt-long) + (ice-9 match) + (ice-9 regex) + (transform strings) + (transform sql) + (transform triples) + (transform special-forms)) + +(define-transformer gn:markers/snps->metadata + (tables (Geno + (inner-join Species "ON Geno.SpeciesId = Species.Id")) + "WHERE Species.Name != 'monkey'") + (triples (string->identifier "marker" (field Geno Name) #:separator "_") + (set gnt:has_species + (string->identifier "" (remap-species-identifiers (field Species Fullname)))) + (set rdf:type 'gnc:dna_marker) + (set skos:prefLabel (field Geno Name)) + (set skos:altLabel (field Geno Marker_Name)) + (set gnt:chr (field Geno Chr)) + (set gnt:mb (annotate-field (field Geno Mb) '^^xsd:doubleg)) + (set gnt:sequence (field Geno Sequence)) + (set gnt:source (field Geno Source)) + (set rdfs:comment (field Geno Comments)))) + + + +(let* ((option-spec + '((settings (single-char #\s) (value #t)) + (output (single-char #\o) (value #t)) + (documentation (single-char #\d) (value #t)))) + (options (getopt-long (command-line) option-spec)) + (settings (option-ref options 'settings #f)) + (output (option-ref options 'output #f)) + (documentation (option-ref options 'documentation #f)) + (%connection-settings + (call-with-input-file settings + read))) + (with-documentation + (name "Phenotypes Metadata") + (connection %connection-settings) + (table-metadata? #f) + (prefixes + '(("dcat:" "<http://www.w3.org/ns/dcat#>") + ("dct:" "<http://purl.org/dc/terms/>") + ("gn:" "<http://rdf.genenetwork.org/v1/id/>") + ("owl:" "<http://www.w3.org/2002/07/owl#>") + ("gnc:" "<http://rdf.genenetwork.org/v1/category/>") + ("gnd:" "<https://cd.genenetwork.org/lmdb/v1/data/traits/>") + ("gnt:" "<http://rdf.genenetwork.org/v1/term/>") + ("sdmx-measure:" "<http://purl.org/linked-data/sdmx/2009/measure#>") + ("skos:" "<http://www.w3.org/2004/02/skos/core#>") + ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>") + ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>") + ("xsd:" "<http://www.w3.org/2001/XMLSchema#>") + ("qb:" "<http://purl.org/linked-data/cube#>") + ("xkos:" "<http://rdf-vocabulary.ddialliance.org/xkos#>") + ("pubmed:" "<http://rdf.ncbi.nlm.nih.gov/pubmed/>"))) + (inputs + (list gn:markers/snps->metadata)) + (outputs + `(#:documentation ,documentation + #:rdf ,output)))) + diff --git a/examples/ontology.scm b/examples/ontology.scm index 2e03c53..724a75a 100755 --- a/examples/ontology.scm +++ b/examples/ontology.scm @@ -38,7 +38,7 @@ (triple 'gnc:population_category 'a 'xkos:ClassificationLevel) (triple 'gnc:population_category 'rdfs:label "Population Category") (triple 'gnc:population_category 'skos:inScheme 'gnc:resource_classification_scheme) - (triple 'gnc:population_category 'skos:prefLabel "Species") + (triple 'gnc:population_category 'skos:prefLabel "Population Category") (triple 'gnc:population_category 'xkos:depth "3") (triple 'gnc:population_category 'xkos:nextLevel 'gnc:set) (triple 'gnc:population_category 'xkos:previousLevel 'gnc:species) @@ -234,4 +234,39 @@ (triple 'gnt:submitter 'skos:definition "A person who submitted this resource to GN") (triple 'gnt:has_phenotype_data 'a 'owl:ObjectProperty) (triple 'gnt:has_phenotype_data 'rdfs:domain 'gnc:set) - (triple 'gnt:has_phenotype_data 'skos:definition "This resource has phenotype data.")))) + (triple 'gnt:has_phenotype_data 'skos:definition "This resource has phenotype data.") + + ;; Genotypes + (triple 'gnc:dna_marker 'a 'owl:Class) + (triple 'gnc:dna_marker 'a 'skos:Concept) + (triple 'gnc:dna_marker 'rdfs:label "A DNA Marker or SNP") + (triple 'gnt:has_genotype_files 'rdfs:label "This resource has these genotype files") + (triple 'gnt:has_genotype_files 'rdfs:domain 'dcat:Dataset) + (triple 'gnt:has_genotype_data 'rdf:type 'owl:ObjectProperty) + (triple 'gnt:has_genotype_data 'rdfs:label "this resources has genotype data.") + (triple 'gnt:has_genotype_data 'rdfs:comment "Associates a resource with its genotype data.") + (triple 'gnt:has_genotype_data 'rdfs:domain 'gnc:set) + (triple 'gnt:has_genotype_data 'rdfs:range 'dcat:Dataset) + (triple 'gnt:has_genotype_data 'rdfs:subPropertyOf 'dct:relation) + (triple 'gnt:has_marker_count 'rdf:type 'owl:ObjectProperty) + (triple 'gnt:has_marker_count 'rdfs:label "this resources has N number of dna markers/SNPs.") + (triple 'gnt:has_marker_count 'rdfs:domain 'xsd:integer) + (triple 'gnt:has_marker_count 'rdfs:range 'dcat:Dataset) + (triple 'gnt:chr 'a 'qb:MeasureProperty) + (triple 'gnt:chr 'a 'rdf:Property) + (triple 'gnt:chr 'rdfs:label "Chromosome") + (triple 'gnt:chr 'rdfs:domain 'gnc:marker) + (triple 'gnt:chr 'rdfs:range 'rdfs:Literal) + (triple 'gnt:chr 'rdfs:subPropertyOf 'sdmx-measure:obsValue) + (triple 'gnt:mb 'rdfs:label "Megabase") + (triple 'gnt:mb 'rdfs:domain 'gnc:marker) + (triple 'gnt:mb 'rdfs:range 'rdfs:Literal) + (triple 'gnt:mb 'rdfs:subPropertyOf 'sdmx-measure:obsValue) + (triple 'gnt:sequence 'rdfs:label "Sequence") + (triple 'gnt:sequence 'rdfs:domain 'gnc:marker) + (triple 'gnt:sequence 'rdfs:range 'rdfs:Literal) + (triple 'gnt:sequence 'rdfs:subPropertyOf 'sdmx-measure:obsValue) + (triple 'gnt:source 'rdfs:label "Source") + (triple 'gnt:source 'rdfs:domain 'gnc:marker) + (triple 'gnt:source 'rdfs:range 'rdfs:Literal) + (triple 'gnt:source 'rdfs:subPropertyOf 'sdmx-measure:obsValue)))) diff --git a/examples/phenotype-datasets.scm b/examples/phenotype-datasets.scm index 410178a..4819627 100755 --- a/examples/phenotype-datasets.scm +++ b/examples/phenotype-datasets.scm @@ -15,13 +15,17 @@ (define-transformer gn:set->gn:dataset - (tables (Datasets - (inner-join InfoFiles "ON InfoFiles.DatasetId = Datasets.DatasetId") - (inner-join InbredSet "ON InbredSet.Id = InfoFiles.InbredSetId") + (tables (Species + (inner-join InbredSet "ON InbredSet.SpeciesId = Species.Id") (inner-join PublishFreeze "ON PublishFreeze.InbredSetId = InbredSet.Id")) - "WHERE PublishFreeze.public > 0 GROUP BY Datasets.DatasetId") + "WHERE PublishFreeze.public > 0 AND Species.Name != 'monkey' GROUP BY Species.Name, PublishFreeze.ShortName") (triples (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_") - (set gnt:has_phenotype_data (string->identifier "dataset" (field PublishFreeze Name) #:separator "_")))) + (multiset gnt:has_phenotype_data + (map (cut string->identifier "dataset" <> #:separator "_") + (string-split + (field ("GROUP_CONCAT(PublishFreeze.Name SEPARATOR ',')" + dataset_name)) + #\,))))) (define-transformer gn:dataset->gn:set (tables (Datasets @@ -32,6 +36,40 @@ (triples (string->identifier "dataset" (field PublishFreeze Name) #:separator "_") (set gnt:has_strain (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_")))) +(define-transformer gn:dataset->metadata + (tables (PublishXRef + (inner-join InbredSet "ON InbredSet.InbredSetId = PublishXRef.InbredSetId") + (inner-join Species "ON InbredSet.SpeciesId = Species.Id") + (inner-join PublishFreeze "ON PublishFreeze.InbredSetId = InbredSet.Id") + (inner-join Publication "ON Publication.Id = PublishXRef.PublicationId") + (inner-join Phenotype "ON Phenotype.Id = PublishXRef.PhenotypeId")) + "WHERE InbredSet.public > 0 GROUP BY Species.Name, PublishFreeze.Name") + (triples (string->identifier "dataset" (field PublishFreeze Name) #:separator "_") + (set dct:created (annotate-field (field PublishFreeze CreateTime) '^^xsd:datetime)) + (set gnt:has_strain (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_")))) + +(define-transformer gn:dataset->gn:trait + (tables (PublishXRef + (inner-join InbredSet "ON InbredSet.InbredSetId = PublishXRef.InbredSetId") + (inner-join Species "ON InbredSet.SpeciesId = Species.Id") + (inner-join PublishFreeze "ON PublishFreeze.InbredSetId = InbredSet.Id") + (inner-join Publication "ON Publication.Id = PublishXRef.PublicationId") + (inner-join Phenotype "ON Phenotype.Id = PublishXRef.PhenotypeId")) + "WHERE InbredSet.public > 0") + (triples (string->identifier "dataset" (field PublishFreeze Name) #:separator "_") + (set gnt:has_phenotype_trait + (let ((post-abbrev (blank-p (field Phenotype Post_publication_abbreviation))) + (pre-abbrev (blank-p (field Phenotype Pre_publication_abbreviation))) + (post-desc (blank-p (field Phenotype Post_publication_description))) + (pre-desc (blank-p (field Phenotype Post_publication_description)))) + (string->identifier + "trait" + (format #f "~a_~a" (field PublishFreeze Name) + (or post-abbrev pre-abbrev post-desc pre-desc)) + #:separator "_"))) + (set dct:created (annotate-field (field PublishFreeze CreateTime) '^^xsd:datetime)) + (set gnt:has_strain (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_")))) + (let* ((option-spec '((settings (single-char #\s) (value #t)) @@ -61,7 +99,11 @@ ("xkos:" "<http://rdf-vocabulary.ddialliance.org/xkos#>") ("xsd:" "<http://www.w3.org/2001/XMLSchema#>"))) (inputs - (list gn:set->gn:dataset gn:dataset->gn:set)) + (list + gn:set->gn:dataset + gn:dataset->gn:set + gn:dataset->metadata + gn:dataset->gn:trait)) (outputs `(#:documentation ,documentation #:rdf ,output)))) diff --git a/examples/phenotype.scm b/examples/phenotype.scm index 72d3ed3..834ff5a 100755 --- a/examples/phenotype.scm +++ b/examples/phenotype.scm @@ -12,57 +12,14 @@ (transform sql) (transform triples) (transform special-forms)) + -(define (blank-p str) - (if (string-blank? str) #f str)) + -(define-transformer gn:set->gn:dataset - (tables (Species - (inner-join InbredSet "ON InbredSet.SpeciesId = Species.Id") - (inner-join PublishFreeze "ON PublishFreeze.InbredSetId = InbredSet.Id")) - "WHERE PublishFreeze.public > 0 AND Species.Name != 'monkey' GROUP BY Species.Name, PublishFreeze.ShortName") - (triples (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_") - (multiset gnt:has_phenotype_data - (map (cut string->identifier "dataset" <> #:separator "_") - (string-split - (field ("GROUP_CONCAT(PublishFreeze.Name SEPARATOR ',')" - dataset_name)) - #\,))))) -(define-transformer gn:dataset->metadata - (tables (PublishXRef - (inner-join InbredSet "ON InbredSet.InbredSetId = PublishXRef.InbredSetId") - (inner-join Species "ON InbredSet.SpeciesId = Species.Id") - (inner-join PublishFreeze "ON PublishFreeze.InbredSetId = InbredSet.Id") - (inner-join Publication "ON Publication.Id = PublishXRef.PublicationId") - (inner-join Phenotype "ON Phenotype.Id = PublishXRef.PhenotypeId")) - "WHERE InbredSet.public > 0 GROUP BY Species.Name, PublishFreeze.Name") - (triples (string->identifier "dataset" (field PublishFreeze Name) #:separator "_") - (set dct:created (annotate-field (field PublishFreeze CreateTime) '^^xsd:datetime)) - (set gnt:has_strain (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_")))) -(define-transformer gn:dataset->gn:trait - (tables (PublishXRef - (inner-join InbredSet "ON InbredSet.InbredSetId = PublishXRef.InbredSetId") - (inner-join Species "ON InbredSet.SpeciesId = Species.Id") - (inner-join PublishFreeze "ON PublishFreeze.InbredSetId = InbredSet.Id") - (inner-join Publication "ON Publication.Id = PublishXRef.PublicationId") - (inner-join Phenotype "ON Phenotype.Id = PublishXRef.PhenotypeId")) - "WHERE InbredSet.public > 0") - (triples (string->identifier "dataset" (field PublishFreeze Name) #:separator "_") - (set gnt:has_phenotype_trait - (let ((post-abbrev (blank-p (field Phenotype Post_publication_abbreviation))) - (pre-abbrev (blank-p (field Phenotype Pre_publication_abbreviation))) - (post-desc (blank-p (field Phenotype Post_publication_description))) - (pre-desc (blank-p (field Phenotype Post_publication_description)))) - (string->identifier - "trait" - (format #f "~a_~a" (field PublishFreeze Name) - (or post-abbrev pre-abbrev post-desc pre-desc)) - #:separator "_"))) - (set dct:created (annotate-field (field PublishFreeze CreateTime) '^^xsd:datetime)) - (set gnt:has_strain (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_")))) + (define-transformer gnc:phenotype->gn:phenotype (tables (Phenotype)) @@ -198,9 +155,7 @@ ("xkos:" "<http://rdf-vocabulary.ddialliance.org/xkos#>") ("pubmed:" "<http://rdf.ncbi.nlm.nih.gov/pubmed/>"))) (inputs - (list gn:dataset->metadata - gn:dataset->gn:trait - gnc:phenotype->gn:phenotype + (list gnc:phenotype->gn:phenotype gn:phenotype->metadata gn:trait->gn:phenotype)) (outputs |
