diff options
| author | Munyoki Kilyungi | 2023-08-21 14:54:21 +0300 | 
|---|---|---|
| committer | Munyoki Kilyungi | 2023-08-21 14:56:57 +0300 | 
| commit | 51b3c0548c98e0bc05e11a89cbf6b75d31b9f8d5 (patch) | |
| tree | ab3d7c6f589ed8480f0a9d451566681bcfd8eaaf /examples/dump-species-metadata.scm | |
| parent | 849874fdfe11003f05abe5f82efde974a8c8a388 (diff) | |
| download | gn-transform-databases-51b3c0548c98e0bc05e11a89cbf6b75d31b9f8d5.tar.gz | |
Remove "dump-" prefix
Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
Diffstat (limited to 'examples/dump-species-metadata.scm')
| -rwxr-xr-x | examples/dump-species-metadata.scm | 226 | 
1 files changed, 0 insertions, 226 deletions
| diff --git a/examples/dump-species-metadata.scm b/examples/dump-species-metadata.scm deleted file mode 100755 index b0ac6f8..0000000 --- a/examples/dump-species-metadata.scm +++ /dev/null @@ -1,226 +0,0 @@ -#! /usr/bin/env guile -!# - -(use-modules (srfi srfi-1) - (srfi srfi-26) - (ice-9 match) - (ice-9 regex) - (dump strings) - (dump sql) - (dump triples) - (dump special-forms)) - - - -(define %connection-settings - (call-with-input-file (list-ref (command-line) 1) - read)) - - - -(define (remap-species-identifiers str) - "This procedure remaps identifiers to standard binominal. Obviously this should - be sorted by correcting the database!" - (match str - ["Fly (Drosophila melanogaster dm6)" "Drosophila melanogaster"] - ["Oryzias latipes (Japanese medaka)" "Oryzias latipes"] - ["Macaca mulatta" "Macaca nemestrina"] - ["Bat (Glossophaga soricina)" "Glossophaga soricina"] - [str str])) - -(define-transformer dump-species - (tables (Species)) - (schema-triples - (gnc:species a skos:Concept) - (gnc:species skos:description "This is a set of controlled terms that are used to describe a given species") - (gnc:species skos:broader gnc:family) - (gnt:binomialName a owl:ObjectProperty) - (gnt:binomialName rdfs:domain gnc:species) - (gnt:family a owl:ObjectProperty) - (gnt:family rdfs:domain gnc:species) - (gnt:family skos:definition "This resource belongs to this family") - (gnt:organism a owl:ObjectProperty) - (gnt:organism rdfs:domain gnc:species) - (gnt:shortName a owl:ObjectProperty) - (gnt:shortName rdfs:domain gnc:species)) - (triples - (string->identifier "" (remap-species-identifiers (field Species Fullname)) - #:separator "" - #:proc string-capitalize-first) - (set rdf:type 'gnc:species) - (set skos:label (field Species SpeciesName)) - (set skos:altLabel (field Species Name)) - (set rdfs:label (field Species MenuName)) - (set gnt:binomialName (field Species FullName)) - (set gnt:family (field Species Family)) - (set gnt:organism (ontology 'taxon: (field Species TaxonomyId))))) - -#! - -The ProbeData table contains StrainID. - -MariaDB [db_webqtl]> select * from ProbeData limit 2; -+--------+----------+---------+ -| Id | StrainId | value | -+--------+----------+---------+ -| 503636 | 42 | 11.6906 | -| 503636 | 43 | 11.4205 | -+--------+----------+---------+ - -Likewise - -MariaDB [db_webqtl]> select * from ProbeSetData wher limit 2; -+----+----------+-------+ -| Id | StrainId | value | -+----+----------+-------+ -| 1 | 1 | 5.742 | -| 1 | 2 | 5.006 | -+----+----------+-------+ - -To get at the strain use - -MariaDB [db_webqtl]> select * from Strain where Id=1 limit 15; -+----+--------+--------+-----------+--------+-------+ -| Id | Name | Name2 | SpeciesId | Symbol | Alias | -+----+--------+--------+-----------+--------+-------+ -| 1 | B6D2F1 | B6D2F1 | 1 | NULL | NULL | -+----+--------+--------+-----------+--------+-------+ - -A typical query may look like - -SELECT Strain.Name, Strain.Id FROM Strain, Species -WHERE Strain.Name IN f{create_in_clause(self.samplelist)} -AND Strain.SpeciesId=Species.Id -AND Species.name = %s, (self.group.species,) - -At this point it is not very clear how Name, Name2, Symbol and Alias are used. - -!# - -(define-transformer dump-strain - (tables (Strain - (left-join Species "ON Strain.SpeciesId = Species.SpeciesId"))) - (schema-triples - (gnc:strain skos:broader gnc:species) - (gnt:belongsToSpecies rdfs:domain gnc:strain) - (gnt:belongsToSpecies skos:definition "This resource belongs to this species") - (gnt:belongsToSpecies a owl:ObjectProperty) - (gnt:belongsToSpecies skos:definition "This resource belongs to this species") - (gnt:alias rdfs:domain gnc:strain) - (gnt:alias a owl:ObjectProperty) - (gnt:symbol rdfs:domain gnc:strain) - (gnt:symbol a owl:ObjectProperty)) - (triples (string->identifier - "" - (regexp-substitute/global - #f "[^A-Za-z0-9:]" - (field Strain Name) - 'pre "_" 'post) - #:separator "" - #:proc string-capitalize-first) - (set rdf:type 'gnc:strain) - (set gnt:belongsToSpecies - (string->identifier "" (remap-species-identifiers (field Species Fullname)) - #:separator "" - #:proc string-capitalize-first)) - ;; Name, and maybe a second name - (set rdfs:label (sanitize-rdf-string (field Strain Name))) - (set rdfs:label (sanitize-rdf-string (field ("IF ((Strain.Name2 != Strain.Name), Strain.Name2, '')" Name2)))) - (set gnt:alias (sanitize-rdf-string (field ("IF ((Strain.Alias != Strain.Name), Strain.Alias, '')" Alias)))) - (set gnt:symbol (field ("IF ((Strain.Symbol != Strain.Name), Strain.Symbol, '')" Symbol))))) - -(define-transformer dump-mapping-method - (tables (MappingMethod)) - (schema-triples - (gnc:mappingMethod a skos:Concept) - (gnc:mappingMethod skos:definition "Terms that decribe mapping/normalization methods used in GeneNetwork")) - (triples - (string->identifier "mappingMethod" (field MappingMethod Name)) - (set rdf:type 'gnc:mappingMethod) - (set rdfs:label (field MappingMethod Name)))) - - -(define-transformer dump-inbred-set - (tables (InbredSet - (left-join Species "ON InbredSet.SpeciesId=Species.Id") - (left-join MappingMethod - "ON InbredSet.MappingMethodId=MappingMethod.Id"))) - (schema-triples - (gnc:set skos:broader gnc:species) - (gnc:set skos:definition "A set of terms used to describe an set, which can be inbredSet, outbredSet etc etc.") - (gnt:geneticType a owl:ObjectProperty) - (gnt:geneticType rdfs:domain gnc:set) - (gnt:code a owl:ObjectProperty) - (gnt:code rdfs:domain gnc:set) - ;; Already defined as an owl prop in dump-species - (gnt:family rdfs:domain gnc:set) - (gnt:phenotype a owl:ObjectProperty) - (gnt:phenotype rdfs:domain gnc:set) - (gnt:genotype a owl:ObjectProperty) - (gnt:genotype rdfs:domain gnt:inbredSet) - (gnt:mappingMethod a owl:ObjectProperty) - (gnt:mappingMethod rdfs:domain gnc:set)) - (triples (string->identifier - "set" (field InbredSet Name) - #:separator "" - #:proc string-capitalize-first) - (set rdf:type 'gnc:set) - (set rdfs:label (field InbredSet FullName)) - (set skos:altLabel (field InbredSet Name)) - (set gnt:geneticType (field InbredSet GeneticType)) - (set gnt:family (field InbredSet Family)) - (set gnt:mappingMethod (field MappingMethod Name)) - (set gnt:code (field InbredSet InbredSetCode)) - (set gnt:belongsToSpecies - (string->identifier "" (remap-species-identifiers (field Species Fullname)) - #:separator "" - #:proc string-capitalize-first)) - (set gnt:genotype - (field ("IF ((SELECT PublishFreeze.Name FROM PublishFreeze WHERE PublishFreeze.InbredSetId = InbredSet.Id LIMIT 1) IS NOT NULL, 'Traits and Cofactors', '')" genotypeP))) - (set gnt:phenotype - (field ("IF ((SELECT GenoFreeze.Name FROM GenoFreeze WHERE GenoFreeze.InbredSetId = InbredSet.Id LIMIT 1) IS NOT NULL, 'DNA Markers and SNPs', '')" phenotypeP))) - (multiset gnt:hasTissue - (map - (lambda (x) - (string->identifier "tissue" - x)) - (string-split-substring - (field ("(SELECT GROUP_CONCAT(DISTINCT Tissue.Short_Name SEPARATOR'||') AS MolecularTraits FROM ProbeFreeze, ProbeSetFreeze, InbredSet, Tissue, Species WHERE ProbeFreeze.TissueId = Tissue.Id AND ProbeFreeze.InbredSetId = InbredSet.Id AND ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id ORDER BY Tissue.Name)" - molecularTrait)) - "||"))))) - -(define-transformer dump-avg-method - ;; The Name and Normalization fields seem to be the same. Dump only - ;; the Name field. - (tables (AvgMethod)) - (schema-triples - (gnc:avgMethod rdf:type owl:Class)) - (triples (string->identifier "avgmethod" (field AvgMethod Name)) - (set rdf:type 'gnc:avgMethod) - (set rdfs:label (field AvgMethod Normalization)))) - - - -(dump-with-documentation - (name "Species Metadata") - (connection %connection-settings) - (table-metadata? #f) - (prefixes - '(("gn:" "<http://genenetwork.org/id/>") - ("gnc:" "<http://genenetwork.org/category/>") - ("owl:" "<http://www.w3.org/2002/07/owl#>") - ("gnt:" "<http://genenetwork.org/term/>") - ("skos:" "<http://www.w3.org/2004/02/skos/core#>") - ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>") - ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>") - ("taxon:" "<http://purl.uniprot.org/taxonomy/>"))) - (inputs - (list - dump-inbred-set - dump-species - dump-strain - dump-mapping-method - dump-avg-method)) - (outputs - '(#:documentation "./docs/dump-species-metadata.md" - #:rdf "/export/data/genenetwork-virtuoso/dump-species-metadata.ttl"))) | 
