#! /usr/bin/env guile !# (use-modules (srfi srfi-1) (srfi srfi-26) (ice-9 getopt-long) (ice-9 match) (ice-9 regex) (transform strings) (transform sql) (transform triples) (transform special-forms)) (define (remap-species-identifiers str) "This procedure remaps identifiers to standard binominal. Obviously this should be sorted by correcting the database!" (match str ["Fly (Drosophila melanogaster dm6)" "Drosophila melanogaster"] ["Oryzias latipes (Japanese medaka)" "Oryzias latipes"] ["Macaca mulatta" "Macaca nemestrina"] ["Bat (Glossophaga soricina)" "Glossophaga soricina"] [str str])) (define-transformer species (tables (Species)) (schema-triples (gnc:species a skos:Concept) (gnc:species skos:description "This is a set of controlled terms that are used to describe a given species") (gnc:species skos:broader gnc:family) (gnt:binomialName a owl:ObjectProperty) (gnt:binomialName rdfs:domain gnc:species) (gnt:family a owl:ObjectProperty) (gnt:family rdfs:domain gnc:species) (gnt:family skos:definition "This resource belongs to this family") (gnt:organism a owl:ObjectProperty) (gnt:organism rdfs:domain gnc:species) (gnt:shortName a owl:ObjectProperty) (gnt:shortName rdfs:domain gnc:species)) (triples (string->identifier "" (remap-species-identifiers (field Species Fullname)) #:separator "" #:proc string-capitalize-first) (set rdf:type 'gnc:species) (set skos:label (field Species SpeciesName)) (set skos:altLabel (field Species Name)) (set rdfs:label (field Species MenuName)) (set gnt:binomialName (field Species FullName)) (set gnt:family (field Species Family)) (set gnt:organism (ontology 'taxon: (field Species TaxonomyId))))) #! The ProbeData table contains StrainID. MariaDB [db_webqtl]> select * from ProbeData limit 2; +--------+----------+---------+ | Id | StrainId | value | +--------+----------+---------+ | 503636 | 42 | 11.6906 | | 503636 | 43 | 11.4205 | +--------+----------+---------+ Likewise MariaDB [db_webqtl]> select * from ProbeSetData wher limit 2; +----+----------+-------+ | Id | StrainId | value | +----+----------+-------+ | 1 | 1 | 5.742 | | 1 | 2 | 5.006 | +----+----------+-------+ To get at the strain use MariaDB [db_webqtl]> select * from Strain where Id=1 limit 15; +----+--------+--------+-----------+--------+-------+ | Id | Name | Name2 | SpeciesId | Symbol | Alias | +----+--------+--------+-----------+--------+-------+ | 1 | B6D2F1 | B6D2F1 | 1 | NULL | NULL | +----+--------+--------+-----------+--------+-------+ A typical query may look like SELECT Strain.Name, Strain.Id FROM Strain, Species WHERE Strain.Name IN f{create_in_clause(self.samplelist)} AND Strain.SpeciesId=Species.Id AND Species.name = %s, (self.group.species,) At this point it is not very clear how Name, Name2, Symbol and Alias are used. !# (define-transformer strain (tables (Strain (left-join Species "ON Strain.SpeciesId = Species.SpeciesId"))) (schema-triples (gnc:strain skos:broader gnc:species) (gnt:belongsToSpecies rdfs:domain gnc:strain) (gnt:belongsToSpecies skos:definition "This resource belongs to this species") (gnt:belongsToSpecies a owl:ObjectProperty) (gnt:belongsToSpecies skos:definition "This resource belongs to this species") (gnt:alias rdfs:domain gnc:strain) (gnt:alias a owl:ObjectProperty) (gnt:symbol rdfs:domain gnc:strain) (gnt:symbol a owl:ObjectProperty)) (triples (string->identifier "" (regexp-substitute/global #f "[^A-Za-z0-9:]" (field Strain Name) 'pre "_" 'post) #:separator "" #:proc string-capitalize-first) (set rdf:type 'gnc:strain) (set gnt:belongsToSpecies (string->identifier "" (remap-species-identifiers (field Species Fullname)) #:separator "" #:proc string-capitalize-first)) ;; Name, and maybe a second name (set rdfs:label (sanitize-rdf-string (field Strain Name))) (set rdfs:label (sanitize-rdf-string (field ("IF ((Strain.Name2 != Strain.Name), Strain.Name2, '')" Name2)))) (set gnt:alias (sanitize-rdf-string (field ("IF ((Strain.Alias != Strain.Name), Strain.Alias, '')" Alias)))) (set gnt:symbol (field ("IF ((Strain.Symbol != Strain.Name), Strain.Symbol, '')" Symbol))))) (define-transformer mapping-method (tables (MappingMethod)) (schema-triples (gnc:mappingMethod a skos:Concept) (gnc:mappingMethod skos:definition "Terms that decribe mapping/normalization methods used in GeneNetwork")) (triples (string->identifier "mappingMethod" (field MappingMethod Name)) (set rdf:type 'gnc:mappingMethod) (set rdfs:label (field MappingMethod Name)))) (define-transformer inbred-set (tables (InbredSet (left-join Species "ON InbredSet.SpeciesId=Species.Id") (left-join MappingMethod "ON InbredSet.MappingMethodId=MappingMethod.Id"))) (schema-triples (gnc:set skos:broader gnc:species) (gnc:set skos:definition "A set of terms used to describe an set, which can be inbredSet, outbredSet etc etc.") (gnt:geneticType a owl:ObjectProperty) (gnt:geneticType rdfs:domain gnc:set) (gnt:code a owl:ObjectProperty) (gnt:code rdfs:domain gnc:set) ;; Already defined as an owl prop in species (gnt:family rdfs:domain gnc:set) (gnt:phenotype a owl:ObjectProperty) (gnt:phenotype rdfs:domain gnc:set) (gnt:genotype a owl:ObjectProperty) (gnt:genotype rdfs:domain gnt:inbredSet) (gnt:mappingMethod a owl:ObjectProperty) (gnt:mappingMethod rdfs:domain gnc:set)) (triples (string->identifier "set" (field InbredSet Name) #:separator "" #:proc string-capitalize-first) (set rdf:type 'gnc:set) (set rdfs:label (field InbredSet FullName)) (set skos:altLabel (field InbredSet Name)) (set gnt:geneticType (field InbredSet GeneticType)) (set gnt:family (field InbredSet Family)) (set gnt:mappingMethod (field MappingMethod Name)) (set gnt:code (field InbredSet InbredSetCode)) (set gnt:belongsToSpecies (string->identifier "" (remap-species-identifiers (field Species Fullname)) #:separator "" #:proc string-capitalize-first)) (set gnt:genotype (field ("IF ((SELECT PublishFreeze.Name FROM PublishFreeze WHERE PublishFreeze.InbredSetId = InbredSet.Id LIMIT 1) IS NOT NULL, 'Traits and Cofactors', '')" genotypeP))) (set gnt:phenotype (field ("IF ((SELECT GenoFreeze.Name FROM GenoFreeze WHERE GenoFreeze.InbredSetId = InbredSet.Id LIMIT 1) IS NOT NULL, 'DNA Markers and SNPs', '')" phenotypeP))) (multiset gnt:hasTissue (map (lambda (x) (string->identifier "tissue" x)) (string-split-substring (field ("(SELECT GROUP_CONCAT(DISTINCT Tissue.Short_Name SEPARATOR'||') AS MolecularTraits FROM ProbeFreeze, ProbeSetFreeze, InbredSet, Tissue, Species WHERE ProbeFreeze.TissueId = Tissue.Id AND ProbeFreeze.InbredSetId = InbredSet.Id AND ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id ORDER BY Tissue.Name)" molecularTrait)) "||"))))) (define-transformer avg-method ;; The Name and Normalization fields seem to be the same. Dump only ;; the Name field. (tables (AvgMethod)) (schema-triples (gnc:avgMethod rdf:type owl:Class)) (triples (string->identifier "avgmethod" (field AvgMethod Name)) (set rdf:type 'gnc:avgMethod) (set rdfs:label (field AvgMethod Normalization)))) (let* ((option-spec '((settings (single-char #\s) (value #t)) (output (single-char #\o) (value #t)) (documentation (single-char #\d) (value #t)))) (options (getopt-long (command-line) option-spec)) (settings (option-ref options 'settings #f)) (output (option-ref options 'output #f)) (documentation (option-ref options 'documentation #f)) (%connection-settings (call-with-input-file settings read))) (with-documentation (name "Species Metadata") (connection %connection-settings) (table-metadata? #f) (prefixes '(("gn:" "") ("gnc:" "") ("owl:" "") ("gnt:" "") ("skos:" "") ("rdf:" "") ("rdfs:" "") ("taxon:" ""))) (inputs (list inbred-set species strain mapping-method avg-method)) (outputs `(#:documentation ,documentation #:rdf ,output))))