From d3e1a1191ced13d00a2e24b286998630f14abae6 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Fri, 25 Aug 2023 18:17:56 +0300 Subject: Rename file Signed-off-by: Munyoki Kilyungi --- examples/species-metadata.scm | 150 ------------------------------------------ examples/strains.scm | 150 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 150 insertions(+), 150 deletions(-) delete mode 100755 examples/species-metadata.scm create mode 100755 examples/strains.scm diff --git a/examples/species-metadata.scm b/examples/species-metadata.scm deleted file mode 100755 index 89f2e27..0000000 --- a/examples/species-metadata.scm +++ /dev/null @@ -1,150 +0,0 @@ -#! /usr/bin/env guile -!# - -(use-modules (srfi srfi-1) - (srfi srfi-26) - (ice-9 getopt-long) - (ice-9 match) - (ice-9 regex) - (transform strings) - (transform sql) - (transform triples) - (transform special-forms)) - -(define (remap-species-identifiers str) - "This procedure remaps identifiers to standard binominal. Obviously this should - be sorted by correcting the database!" - (match str - ["Fly (Drosophila melanogaster dm6)" "Drosophila melanogaster"] - ["Oryzias latipes (Japanese medaka)" "Oryzias latipes"] - ["Macaca mulatta" "Macaca nemestrina"] - ["Bat (Glossophaga soricina)" "Glossophaga soricina"] - [str str])) - -#! - -The ProbeData table contains StrainID. - -MariaDB [db_webqtl]> select * from ProbeData limit 2; -+--------+----------+---------+ -| Id | StrainId | value | -+--------+----------+---------+ -| 503636 | 42 | 11.6906 | -| 503636 | 43 | 11.4205 | -+--------+----------+---------+ - -Likewise - -MariaDB [db_webqtl]> select * from ProbeSetData wher limit 2; -+----+----------+-------+ -| Id | StrainId | value | -+----+----------+-------+ -| 1 | 1 | 5.742 | -| 1 | 2 | 5.006 | -+----+----------+-------+ - -To get at the strain use - -MariaDB [db_webqtl]> select * from Strain where Id=1 limit 15; -+----+--------+--------+-----------+--------+-------+ -| Id | Name | Name2 | SpeciesId | Symbol | Alias | -+----+--------+--------+-----------+--------+-------+ -| 1 | B6D2F1 | B6D2F1 | 1 | NULL | NULL | -+----+--------+--------+-----------+--------+-------+ - -A typical query may look like - -SELECT Strain.Name, Strain.Id FROM Strain, Species -WHERE Strain.Name IN f{create_in_clause(self.samplelist)} -AND Strain.SpeciesId=Species.Id -AND Species.name = %s, (self.group.species,) - -At this point it is not very clear how Name, Name2, Symbol and Alias are used. - -!# - -(define-transformer strain - (tables (Strain - (left-join Species "ON Strain.SpeciesId = Species.SpeciesId"))) - (schema-triples - (gnc:strain skos:broader gnc:species) - (gnt:belongsToSpecies rdfs:domain gnc:strain) - (gnt:belongsToSpecies skos:definition "This resource belongs to this species") - (gnt:belongsToSpecies a owl:ObjectProperty) - (gnt:belongsToSpecies skos:definition "This resource belongs to this species") - (gnt:alias rdfs:domain gnc:strain) - (gnt:alias a owl:ObjectProperty) - (gnt:symbol rdfs:domain gnc:strain) - (gnt:symbol a owl:ObjectProperty)) - (triples (string->identifier - "" - (regexp-substitute/global - #f "[^A-Za-z0-9:]" - (field Strain Name) - 'pre "_" 'post) - #:separator "" - #:proc string-capitalize-first) - (set rdf:type 'gnc:strain) - (set gnt:belongsToSpecies - (string->identifier "" (remap-species-identifiers (field Species Fullname)) - #:separator "" - #:proc string-capitalize-first)) - ;; Name, and maybe a second name - (set rdfs:label (sanitize-rdf-string (field Strain Name))) - (set rdfs:label (sanitize-rdf-string (field ("IF ((Strain.Name2 != Strain.Name), Strain.Name2, '')" Name2)))) - (set gnt:alias (sanitize-rdf-string (field ("IF ((Strain.Alias != Strain.Name), Strain.Alias, '')" Alias)))) - (set gnt:symbol (field ("IF ((Strain.Symbol != Strain.Name), Strain.Symbol, '')" Symbol))))) - -(define-transformer mapping-method - (tables (MappingMethod)) - (schema-triples - (gnc:mappingMethod a skos:Concept) - (gnc:mappingMethod skos:definition "Terms that decribe mapping/normalization methods used in GeneNetwork")) - (triples - (string->identifier "mappingMethod" (field MappingMethod Name)) - (set rdf:type 'gnc:mappingMethod) - (set rdfs:label (field MappingMethod Name)))) - -(define-transformer avg-method - ;; The Name and Normalization fields seem to be the same. Dump only - ;; the Name field. - (tables (AvgMethod)) - (schema-triples - (gnc:avgMethod rdf:type owl:Class)) - (triples (string->identifier "avgmethod" (field AvgMethod Name)) - (set rdf:type 'gnc:avgMethod) - (set rdfs:label (field AvgMethod Normalization)))) - - - -(let* ((option-spec - '((settings (single-char #\s) (value #t)) - (output (single-char #\o) (value #t)) - (documentation (single-char #\d) (value #t)))) - (options (getopt-long (command-line) option-spec)) - (settings (option-ref options 'settings #f)) - (output (option-ref options 'output #f)) - (documentation (option-ref options 'documentation #f)) - (%connection-settings - (call-with-input-file settings - read))) - - (with-documentation - (name "Species Metadata") - (connection %connection-settings) - (table-metadata? #f) - (prefixes - '(("gn:" "") - ("gnc:" "") - ("owl:" "") - ("gnt:" "") - ("skos:" "") - ("rdf:" "") - ("rdfs:" "") - ("taxon:" ""))) - (inputs - (list inbred-set species strain mapping-method avg-method)) - (outputs - `(#:documentation ,documentation - #:rdf ,output)))) - diff --git a/examples/strains.scm b/examples/strains.scm new file mode 100755 index 0000000..89f2e27 --- /dev/null +++ b/examples/strains.scm @@ -0,0 +1,150 @@ +#! /usr/bin/env guile +!# + +(use-modules (srfi srfi-1) + (srfi srfi-26) + (ice-9 getopt-long) + (ice-9 match) + (ice-9 regex) + (transform strings) + (transform sql) + (transform triples) + (transform special-forms)) + +(define (remap-species-identifiers str) + "This procedure remaps identifiers to standard binominal. Obviously this should + be sorted by correcting the database!" + (match str + ["Fly (Drosophila melanogaster dm6)" "Drosophila melanogaster"] + ["Oryzias latipes (Japanese medaka)" "Oryzias latipes"] + ["Macaca mulatta" "Macaca nemestrina"] + ["Bat (Glossophaga soricina)" "Glossophaga soricina"] + [str str])) + +#! + +The ProbeData table contains StrainID. + +MariaDB [db_webqtl]> select * from ProbeData limit 2; ++--------+----------+---------+ +| Id | StrainId | value | ++--------+----------+---------+ +| 503636 | 42 | 11.6906 | +| 503636 | 43 | 11.4205 | ++--------+----------+---------+ + +Likewise + +MariaDB [db_webqtl]> select * from ProbeSetData wher limit 2; ++----+----------+-------+ +| Id | StrainId | value | ++----+----------+-------+ +| 1 | 1 | 5.742 | +| 1 | 2 | 5.006 | ++----+----------+-------+ + +To get at the strain use + +MariaDB [db_webqtl]> select * from Strain where Id=1 limit 15; ++----+--------+--------+-----------+--------+-------+ +| Id | Name | Name2 | SpeciesId | Symbol | Alias | ++----+--------+--------+-----------+--------+-------+ +| 1 | B6D2F1 | B6D2F1 | 1 | NULL | NULL | ++----+--------+--------+-----------+--------+-------+ + +A typical query may look like + +SELECT Strain.Name, Strain.Id FROM Strain, Species +WHERE Strain.Name IN f{create_in_clause(self.samplelist)} +AND Strain.SpeciesId=Species.Id +AND Species.name = %s, (self.group.species,) + +At this point it is not very clear how Name, Name2, Symbol and Alias are used. + +!# + +(define-transformer strain + (tables (Strain + (left-join Species "ON Strain.SpeciesId = Species.SpeciesId"))) + (schema-triples + (gnc:strain skos:broader gnc:species) + (gnt:belongsToSpecies rdfs:domain gnc:strain) + (gnt:belongsToSpecies skos:definition "This resource belongs to this species") + (gnt:belongsToSpecies a owl:ObjectProperty) + (gnt:belongsToSpecies skos:definition "This resource belongs to this species") + (gnt:alias rdfs:domain gnc:strain) + (gnt:alias a owl:ObjectProperty) + (gnt:symbol rdfs:domain gnc:strain) + (gnt:symbol a owl:ObjectProperty)) + (triples (string->identifier + "" + (regexp-substitute/global + #f "[^A-Za-z0-9:]" + (field Strain Name) + 'pre "_" 'post) + #:separator "" + #:proc string-capitalize-first) + (set rdf:type 'gnc:strain) + (set gnt:belongsToSpecies + (string->identifier "" (remap-species-identifiers (field Species Fullname)) + #:separator "" + #:proc string-capitalize-first)) + ;; Name, and maybe a second name + (set rdfs:label (sanitize-rdf-string (field Strain Name))) + (set rdfs:label (sanitize-rdf-string (field ("IF ((Strain.Name2 != Strain.Name), Strain.Name2, '')" Name2)))) + (set gnt:alias (sanitize-rdf-string (field ("IF ((Strain.Alias != Strain.Name), Strain.Alias, '')" Alias)))) + (set gnt:symbol (field ("IF ((Strain.Symbol != Strain.Name), Strain.Symbol, '')" Symbol))))) + +(define-transformer mapping-method + (tables (MappingMethod)) + (schema-triples + (gnc:mappingMethod a skos:Concept) + (gnc:mappingMethod skos:definition "Terms that decribe mapping/normalization methods used in GeneNetwork")) + (triples + (string->identifier "mappingMethod" (field MappingMethod Name)) + (set rdf:type 'gnc:mappingMethod) + (set rdfs:label (field MappingMethod Name)))) + +(define-transformer avg-method + ;; The Name and Normalization fields seem to be the same. Dump only + ;; the Name field. + (tables (AvgMethod)) + (schema-triples + (gnc:avgMethod rdf:type owl:Class)) + (triples (string->identifier "avgmethod" (field AvgMethod Name)) + (set rdf:type 'gnc:avgMethod) + (set rdfs:label (field AvgMethod Normalization)))) + + + +(let* ((option-spec + '((settings (single-char #\s) (value #t)) + (output (single-char #\o) (value #t)) + (documentation (single-char #\d) (value #t)))) + (options (getopt-long (command-line) option-spec)) + (settings (option-ref options 'settings #f)) + (output (option-ref options 'output #f)) + (documentation (option-ref options 'documentation #f)) + (%connection-settings + (call-with-input-file settings + read))) + + (with-documentation + (name "Species Metadata") + (connection %connection-settings) + (table-metadata? #f) + (prefixes + '(("gn:" "") + ("gnc:" "") + ("owl:" "") + ("gnt:" "") + ("skos:" "") + ("rdf:" "") + ("rdfs:" "") + ("taxon:" ""))) + (inputs + (list inbred-set species strain mapping-method avg-method)) + (outputs + `(#:documentation ,documentation + #:rdf ,output)))) + -- cgit v1.2.3