diff options
Diffstat (limited to 'examples')
-rwxr-xr-x | examples/dataset-metadata.scm (renamed from examples/dump-dataset-metadata.scm) | 30 | ||||
-rwxr-xr-x | examples/generif.scm (renamed from examples/dump-generif.scm) | 18 | ||||
-rwxr-xr-x | examples/genotype.scm (renamed from examples/dump-genotype.scm) | 10 | ||||
-rwxr-xr-x | examples/phenotype.scm (renamed from examples/dump-phenotype.scm) | 10 | ||||
-rwxr-xr-x | examples/probeset-data.scm (renamed from examples/dump-probeset-data.scm) | 20 | ||||
-rwxr-xr-x | examples/probeset.scm (renamed from examples/dump-probeset.scm) | 10 | ||||
-rwxr-xr-x | examples/publication.scm (renamed from examples/dump-publication.scm) | 10 | ||||
-rwxr-xr-x | examples/species-metadata.scm (renamed from examples/dump-species-metadata.scm) | 28 | ||||
-rwxr-xr-x | examples/tissue.scm (renamed from examples/dump-tissue.scm) | 10 |
9 files changed, 69 insertions, 77 deletions
diff --git a/examples/dump-dataset-metadata.scm b/examples/dataset-metadata.scm index 6173201..5680a2b 100755 --- a/examples/dump-dataset-metadata.scm +++ b/examples/dataset-metadata.scm @@ -34,7 +34,7 @@ (list first-name last-name (fix-email-id email)) "_"))) -(define-transformer dump-investigators +(define-transformer investigators ;; There are a few duplicate entries. We group by email to ;; deduplicate. (tables (Investigators) @@ -56,7 +56,7 @@ (set v:postal-code (field Investigators ZipCode)) (set v:country-name (field Investigators Country)))) -(define-transformer dump-gene-chip +(define-transformer gene-chip (tables (GeneChip (left-join Species "USING (SpeciesId)"))) (schema-triples @@ -87,7 +87,7 @@ (ontology 'geoSeries: (string-trim-both (field GeneChip GeoPlatform)))))) -(define-transformer dump-info-files +(define-transformer info-files (tables (InfoFiles (left-join PublishFreeze "ON InfoFiles.InfoPageName = PublishFreeze.Name") (left-join GenoFreeze "ON InfoFiles.InfoPageName = GenoFreeze.Name") @@ -251,7 +251,7 @@ (field Datasets Acknowledgment))))) ;; These are phenotype datasets that don't have Infofile metadata -(define-transformer dump-publishfreeze +(define-transformer publishfreeze (tables (PublishFreeze (left-join InfoFiles "ON InfoFiles.InfoPageName = PublishFreeze.Name") (left-join InbredSet "ON PublishFreeze.InbredSetId = InbredSet.InbredSetId")) @@ -277,7 +277,7 @@ #:separator "" #:proc string-capitalize-first)))) -(define-transformer dump-genofreeze +(define-transformer genofreeze (tables (GenoFreeze (left-join InfoFiles "ON InfoFiles.InfoPageName = GenoFreeze.Name") (left-join InbredSet "ON GenoFreeze.InbredSetId = InbredSet.InbredSetId")) @@ -308,7 +308,7 @@ #:proc string-capitalize-first)))) ;; Molecular Traits are also referred to as ProbeSets -(define-transformer dump-probesetfreeze +(define-transformer probesetfreeze (tables (ProbeSetFreeze (left-join InfoFiles "ON InfoFiles.InfoPageName = ProbeSetFreeze.Name") (left-join ProbeFreeze "USING (ProbeFreezeId)") @@ -356,7 +356,7 @@ -(dump-with-documentation +(with-documentation (name "Info files / Investigators Metadata") (connection %connection-settings) (table-metadata? #f) @@ -375,13 +375,13 @@ ("taxon:" "<http://purl.uniprot.org/taxonomy/>") ("dct:" "<http://purl.org/dc/terms/>"))) (inputs - (list dump-info-files - dump-publishfreeze - dump-genofreeze - dump-probesetfreeze - dump-investigators - dump-gene-chip)) + (list info-files + publishfreeze + genofreeze + probesetfreeze + investigators + gene-chip)) (outputs - '(#:documentation "./docs/dump-info-pages.md" - #:rdf "/export/data/genenetwork-virtuoso/dump-info-pages.ttl"))) + '(#:documentation "./docs/info-pages.md" + #:rdf "/export/data/genenetwork-virtuoso/info-pages.ttl"))) diff --git a/examples/dump-generif.scm b/examples/generif.scm index f754274..0b3c8e4 100755 --- a/examples/dump-generif.scm +++ b/examples/generif.scm @@ -18,7 +18,7 @@ -(define-transformer dump-genewiki-symbols +(define-transformer genewiki-symbols (tables (GeneRIF_BASIC (left-join Species "USING (SpeciesId)")) "GROUP BY GeneId ORDER BY BINARY symbol") @@ -37,7 +37,7 @@ (string-split (field ("GROUP_CONCAT(DISTINCT TaxID)" taxId)) #\,))))) -(define-transformer dump-gn-genewiki-entries +(define-transformer gn-genewiki-entries (tables (GeneRIF (left-join GeneRIF_BASIC "USING (symbol)") (left-join Species "ON Species.SpeciesId = GeneRIF.SpeciesId") @@ -101,7 +101,7 @@ (cut string-split-substring <> "::::") comments)))))) -(define-transformer dump-ncbi-genewiki-entries +(define-transformer ncbi-genewiki-entries (tables (GeneRIF_BASIC) "GROUP BY GeneId, comment, createtime") (schema-triples @@ -124,7 +124,7 @@ -(dump-with-documentation +(with-documentation (name "GeneRIF Metadata") (connection %connection-settings) (table-metadata? #f) @@ -141,10 +141,10 @@ ("xsd:" "<http://www.w3.org/2001/XMLSchema#>") ("owl:" "<http://www.w3.org/2002/07/owl#>"))) (inputs - (list ;; dump-genewiki-symbols - dump-gn-genewiki-entries - ;; dump-ncbi-genewiki-entries + (list ;; genewiki-symbols + gn-genewiki-entries + ;; ncbi-genewiki-entries )) (outputs - '(#:documentation "./docs/dump-generif.md" - #:rdf "./verified-data/dump-generif.ttl"))) + '(#:documentation "./docs/generif.md" + #:rdf "./verified-data/generif.ttl"))) diff --git a/examples/dump-genotype.scm b/examples/genotype.scm index a055039..63b85a7 100755 --- a/examples/dump-genotype.scm +++ b/examples/genotype.scm @@ -30,7 +30,7 @@ ["Bat (Glossophaga soricina)" "Glossophaga soricina"] [str str])) -(define-transformer dump-genotypes +(define-transformer genotypes (tables (Geno (left-join Species "USING (SpeciesId)"))) (schema-triples @@ -103,7 +103,7 @@ -(dump-with-documentation +(with-documentation (name "Genotype Metadata") (connection %connection-settings) (table-metadata? #f) @@ -118,7 +118,7 @@ ("skos:" "<http://www.w3.org/2004/02/skos/core#>") ("xsd:" "<http://www.w3.org/2001/XMLSchema#>"))) (inputs - (list dump-genotypes)) + (list genotypes)) (outputs - '(#:documentation "./docs/dump-genotype.md" - #:rdf "/export/data/genenetwork-virtuoso/dump-genotype.ttl"))) + '(#:documentation "./docs/genotype.md" + #:rdf "/export/data/genenetwork-virtuoso/genotype.ttl"))) diff --git a/examples/dump-phenotype.scm b/examples/phenotype.scm index b7ae003..1c68159 100755 --- a/examples/dump-phenotype.scm +++ b/examples/phenotype.scm @@ -19,7 +19,7 @@ read)) -(define-transformer dump-phenotypes +(define-transformer phenotypes (tables (PublishXRef (left-join InbredSet "ON InbredSet.InbredSetId = PublishXRef.InbredSetId") (left-join Publication "ON Publication.Id = PublishXRef.PublicationId") @@ -102,7 +102,7 @@ -(dump-with-documentation +(with-documentation (name "Phenotypes Metadata") (connection %connection-settings) (table-metadata? #f) @@ -119,7 +119,7 @@ ("pubmed:" "<http://rdf.ncbi.nlm.nih.gov/pubmed/>"))) (inputs (list - dump-phenotypes)) + phenotypes)) (outputs - '(#:documentation "./docs/dump-phenotype.md" - #:rdf "/export/data/genenetwork-virtuoso/dump-phenotype.ttl"))) + '(#:documentation "./docs/phenotype.md" + #:rdf "/export/data/genenetwork-virtuoso/phenotype.ttl"))) diff --git a/examples/dump-probeset-data.scm b/examples/probeset-data.scm index 55f3f4b..d46bcda 100755 --- a/examples/dump-probeset-data.scm +++ b/examples/probeset-data.scm @@ -18,7 +18,7 @@ -(define-transformer dump-probeset-data +(define-transformer probeset-data (tables (ProbeSetXRef (left-join ProbeSet "ON ProbeSetXRef.ProbeSetId = ProbeSet.Id") (left-join ProbeSetFreeze "ON ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id")) @@ -74,19 +74,11 @@ (set gnt:pValue (annotate-field (field ("IFNULL(ProbeSetXRef.pValue, '')" pValue)) '^^xsd:double)) (set gnt:h2 (annotate-field (field ("IFNULL(ProbeSetXRef.h2, '')" h2)) - '^^xsd:double)) - (set gnt:belongsToDataset - (string->identifier - "" - (regexp-substitute/global #f "[^A-Za-z0-9:]" - (field ProbeSetFreeze Name) - 'pre "_" 'post) - #:separator "" - #:proc string-capitalize-first)))) + '^^xsd:double)))) -(dump-with-documentation +(with-documentation (name "Probeset Summary Statistics") (connection %connection-settings) (table-metadata? #f) @@ -100,7 +92,7 @@ ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>") ("xsd:" "<http://www.w3.org/2001/XMLSchema#>"))) (inputs - (list dump-probeset-data)) + (list probeset-data)) (outputs - '(#:documentation "./docs/dump-probeset-summary-stats.md" - #:rdf "./verified-data/dump-probeset-summary-stats.ttl"))) + '(#:documentation "./docs/probeset-summary-stats.md" + #:rdf "./verified-data/probeset-summary-stats.ttl"))) diff --git a/examples/dump-probeset.scm b/examples/probeset.scm index 3a55506..68ddb59 100755 --- a/examples/dump-probeset.scm +++ b/examples/probeset.scm @@ -17,7 +17,7 @@ read)) -(define-transformer dump-probeset +(define-transformer probeset (tables (ProbeSet (left-join GeneChip "ON GeneChip.Id = ProbeSet.ChipId"))) (schema-triples @@ -156,7 +156,7 @@ -(dump-with-documentation +(with-documentation (name "ProbeSet Metadata") (connection %connection-settings) (table-metadata? #f) @@ -178,7 +178,7 @@ ("xsd:" "<http://www.w3.org/2001/XMLSchema#>") ("skos:" "<http://www.w3.org/2004/02/skos/core#>"))) (inputs - (list dump-probeset)) + (list probeset)) (outputs - '(#:documentation "./docs/dump-probeset.md" - #:rdf "./verified-data/dump-probeset.ttl"))) + '(#:documentation "./docs/probeset.md" + #:rdf "./verified-data/probeset.ttl"))) diff --git a/examples/dump-publication.scm b/examples/publication.scm index 1881872..313ee96 100755 --- a/examples/dump-publication.scm +++ b/examples/publication.scm @@ -18,7 +18,7 @@ -(define-transformer dump-publication +(define-transformer publication (tables (Publication)) (triples (let ((pmid (field @@ -59,7 +59,7 @@ -(dump-with-documentation +(with-documentation (name "Publications Metadata") (connection %connection-settings) (table-metadata? #f) @@ -75,7 +75,7 @@ ("xsd:" "<http://www.w3.org/2001/XMLSchema#>") ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>"))) (inputs - (list dump-publication)) + (list publication)) (outputs - '(#:documentation "./docs/dump-publication.md" - #:rdf "./verified-data/dump-publication.ttl"))) + '(#:documentation "./docs/publication.md" + #:rdf "./verified-data/publication.ttl"))) diff --git a/examples/dump-species-metadata.scm b/examples/species-metadata.scm index b0ac6f8..f3794b8 100755 --- a/examples/dump-species-metadata.scm +++ b/examples/species-metadata.scm @@ -28,7 +28,7 @@ ["Bat (Glossophaga soricina)" "Glossophaga soricina"] [str str])) -(define-transformer dump-species +(define-transformer species (tables (Species)) (schema-triples (gnc:species a skos:Concept) @@ -97,7 +97,7 @@ At this point it is not very clear how Name, Name2, Symbol and Alias are used. !# -(define-transformer dump-strain +(define-transformer strain (tables (Strain (left-join Species "ON Strain.SpeciesId = Species.SpeciesId"))) (schema-triples @@ -129,7 +129,7 @@ At this point it is not very clear how Name, Name2, Symbol and Alias are used. (set gnt:alias (sanitize-rdf-string (field ("IF ((Strain.Alias != Strain.Name), Strain.Alias, '')" Alias)))) (set gnt:symbol (field ("IF ((Strain.Symbol != Strain.Name), Strain.Symbol, '')" Symbol))))) -(define-transformer dump-mapping-method +(define-transformer mapping-method (tables (MappingMethod)) (schema-triples (gnc:mappingMethod a skos:Concept) @@ -140,7 +140,7 @@ At this point it is not very clear how Name, Name2, Symbol and Alias are used. (set rdfs:label (field MappingMethod Name)))) -(define-transformer dump-inbred-set +(define-transformer inbred-set (tables (InbredSet (left-join Species "ON InbredSet.SpeciesId=Species.Id") (left-join MappingMethod @@ -152,7 +152,7 @@ At this point it is not very clear how Name, Name2, Symbol and Alias are used. (gnt:geneticType rdfs:domain gnc:set) (gnt:code a owl:ObjectProperty) (gnt:code rdfs:domain gnc:set) - ;; Already defined as an owl prop in dump-species + ;; Already defined as an owl prop in species (gnt:family rdfs:domain gnc:set) (gnt:phenotype a owl:ObjectProperty) (gnt:phenotype rdfs:domain gnc:set) @@ -189,7 +189,7 @@ At this point it is not very clear how Name, Name2, Symbol and Alias are used. molecularTrait)) "||"))))) -(define-transformer dump-avg-method +(define-transformer avg-method ;; The Name and Normalization fields seem to be the same. Dump only ;; the Name field. (tables (AvgMethod)) @@ -201,7 +201,7 @@ At this point it is not very clear how Name, Name2, Symbol and Alias are used. -(dump-with-documentation +(with-documentation (name "Species Metadata") (connection %connection-settings) (table-metadata? #f) @@ -216,11 +216,11 @@ At this point it is not very clear how Name, Name2, Symbol and Alias are used. ("taxon:" "<http://purl.uniprot.org/taxonomy/>"))) (inputs (list - dump-inbred-set - dump-species - dump-strain - dump-mapping-method - dump-avg-method)) + inbred-set + species + strain + mapping-method + avg-method)) (outputs - '(#:documentation "./docs/dump-species-metadata.md" - #:rdf "/export/data/genenetwork-virtuoso/dump-species-metadata.ttl"))) + '(#:documentation "./docs/species-metadata.md" + #:rdf "/export/data/genenetwork-virtuoso/species-metadata.ttl"))) diff --git a/examples/dump-tissue.scm b/examples/tissue.scm index 3658a26..8ce96c8 100755 --- a/examples/dump-tissue.scm +++ b/examples/tissue.scm @@ -18,7 +18,7 @@ -(define-transformer dump-tissue +(define-transformer tissue ;; The Name and TissueName fields seem to be identical. BIRN_lex_ID ;; and BIRN_lex_Name are mostly NULL. (tables (Tissue)) @@ -32,7 +32,7 @@ -(dump-with-documentation +(with-documentation (name "Tissue Metadata") (connection %connection-settings) (table-metadata? #f) @@ -44,7 +44,7 @@ ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>") ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>"))) (inputs - (list dump-tissue)) + (list tissue)) (outputs - '(#:documentation "./docs/dump-tissue.md" - #:rdf "./verified-data/dump-tissue.ttl"))) + '(#:documentation "./docs/tissue.md" + #:rdf "./verified-data/tissue.ttl"))) |