#! /usr/bin/env guile !# (use-modules (srfi srfi-1) (srfi srfi-26) (ice-9 getopt-long) (ice-9 match) (ice-9 regex) (transform strings) (transform sql) (transform triples) (transform special-forms)) (define (remap-species-identifiers str) "This procedure remaps identifiers to standard binominal. Obviously this should be sorted by correcting the database!" (match str ["Fly (Drosophila melanogaster dm6)" "Drosophila melanogaster"] ["Oryzias latipes (Japanese medaka)" "Oryzias latipes"] ["Macaca mulatta" "Macaca nemestrina"] ["Bat (Glossophaga soricina)" "Glossophaga soricina"] [str str])) #! The ProbeData table contains StrainID. MariaDB [db_webqtl]> select * from ProbeData limit 2; +--------+----------+---------+ | Id | StrainId | value | +--------+----------+---------+ | 503636 | 42 | 11.6906 | | 503636 | 43 | 11.4205 | +--------+----------+---------+ Likewise MariaDB [db_webqtl]> select * from ProbeSetData wher limit 2; +----+----------+-------+ | Id | StrainId | value | +----+----------+-------+ | 1 | 1 | 5.742 | | 1 | 2 | 5.006 | +----+----------+-------+ To get at the strain use MariaDB [db_webqtl]> select * from Strain where Id=1 limit 15; +----+--------+--------+-----------+--------+-------+ | Id | Name | Name2 | SpeciesId | Symbol | Alias | +----+--------+--------+-----------+--------+-------+ | 1 | B6D2F1 | B6D2F1 | 1 | NULL | NULL | +----+--------+--------+-----------+--------+-------+ A typical query may look like SELECT Strain.Name, Strain.Id FROM Strain, Species WHERE Strain.Name IN f{create_in_clause(self.samplelist)} AND Strain.SpeciesId=Species.Id AND Species.name = %s, (self.group.species,) At this point it is not very clear how Name, Name2, Symbol and Alias are used. !# (define-transformer strain (tables (Strain (left-join Species "ON Strain.SpeciesId = Species.SpeciesId"))) (schema-triples (gnt:alias rdfs:domain gnc:strain) (gnt:alias a owl:ObjectProperty) (gnt:geneSymbol rdfs:domain gnc:strain) (gnt:geneSymbol a owl:ObjectProperty)) (triples (string->identifier "" (regexp-substitute/global #f "[^A-Za-z0-9:]" (field Strain Name) 'pre "_" 'post)) (set rdf:type 'gnc:strain) (set gnt:belongsToSpecies (string->identifier "" (remap-species-identifiers (field Species Fullname)) #:separator "" #:proc string-capitalize-first)) ;; Name, and maybe a second name (set rdfs:label (sanitize-rdf-string (field Strain Name))) (set skos:altLabel (sanitize-rdf-string (field ("IF ((Strain.Name2 != Strain.Name), Strain.Name2, '')" Name2)))) (set gnt:alias (sanitize-rdf-string (field ("IF ((Strain.Alias != Strain.Name), Strain.Alias, '')" Alias)))) (set gnt:geneSymbol (field Strain Symbol)))) (define-transformer mapping-method (tables (MappingMethod)) (schema-triples (gnc:mappingMethod a skos:Concept) (gnc:mappingMethod skos:definition "Terms that decribe mapping methods used on this resource")) (triples (string->identifier "mappingMethod" (field MappingMethod Name)) (set rdf:type 'gnc:mappingMethod) (set rdfs:label (field MappingMethod Name)))) (define-transformer avg-method ;; The Name and Normalization fields seem to be the same. Dump only ;; the Name field. (tables (AvgMethod)) (schema-triples (gnc:avgMethod a skos:Concept) (gnc:avgMethod skos:definition "Terms that decribe normalization methods used on this resource")) (triples (string->identifier "avgMethod" (field AvgMethod Name AvgMethodName)) (set rdf:type 'gnc:avgMethod) (set rdfs:label (field AvgMethod Normalization)))) (let* ((option-spec '((settings (single-char #\s) (value #t)) (output (single-char #\o) (value #t)) (documentation (single-char #\d) (value #t)))) (options (getopt-long (command-line) option-spec)) (settings (option-ref options 'settings #f)) (output (option-ref options 'output #f)) (documentation (option-ref options 'documentation #f)) (%connection-settings (call-with-input-file settings read))) (with-documentation (name "Species Metadata") (connection %connection-settings) (table-metadata? #f) (prefixes '(("gn:" "") ("gnc:" "") ("owl:" "") ("gnt:" "") ("skos:" "") ("xkos:" "") ("rdf:" "") ("rdfs:" "") ("taxon:" ""))) (inputs (list strain mapping-method avg-method)) (outputs `(#:documentation ,documentation #:rdf ,output))))