#! /usr/bin/env guile !# (use-modules (srfi srfi-1) (srfi srfi-26) (ice-9 match) (ice-9 regex) (dump strings) (dump sql) (dump triples) (dump special-forms)) (define %connection-settings (call-with-input-file (list-ref (command-line) 1) read)) (define (remap-species-identifiers str) "This procedure remaps identifiers to standard binominal. Obviously this should be sorted by correcting the database!" (match str ["Fly (Drosophila melanogaster dm6)" "Drosophila melanogaster"] ["Oryzias latipes (Japanese medaka)" "Oryzias latipes"] ["Macaca mulatta" "Macaca nemestrina"] ["Bat (Glossophaga soricina)" "Glossophaga soricina"] [str str])) (define-dump dump-gene-chip (tables (GeneChip (left-join Species "USING (SpeciesId)"))) (schema-triples (gnc:geneChip a skos:Concept) (gnc:geneChip skos:description "This is a set of controlled terms that are used to describe a given gene chip/platform") (gnt:hasGeoSeriesId rdfs:domain gnc:platform) (gnt:belongsToSpecies a owl:ObjectProperty) (gnt:belongsToSpecies skos:definition "This resource belongs to this given species") (gnt:belongsToSpecies rdfs:domain gnc:geneChip) (gnt:hasGeoSeriesId rdfs:domain gnc:geneChip) (gnt:hasGOTreeValue a owl:ObjectProperty) (gnt:hasGOTreeValue skos:definition "This resource the following GO tree value") (gnt:hasGOTreeValue rdfs:domain gnc:geneChip)) (triples (string->identifier "platform" (field GeneChip Name)) (set rdf:type 'gnc:geneChip) (set rdfs:label (field GeneChip GeneChipName)) (set skos:prefLabel (field GeneChip Name)) (set skos:altLabel (field ("IF(GeneChip.GeneChipName != GeneChip.Title, Title, NULL)" Title))) (set gnt:hasGOTreeValue (field GeneChip Go_tree_value)) (set gnt:belongsToSpecies (string->identifier "" (remap-species-identifiers (field Species Fullname)) #:separator "" #:proc string-capitalize-first)) (set gnt:hasGeoSeriesId (ontology 'geoSeries: (string-trim-both (field GeneChip GeoPlatform)))))) ;; Molecular Traits are also referred to as ProbeSets (define-dump dump-probesetfreeze (tables (ProbeSetFreeze (left-join InfoFiles "ON InfoFiles.InfoPageName = ProbeSetFreeze.Name") (left-join ProbeFreeze "USING (ProbeFreezeId)") (left-join AvgMethod "ON AvgMethod.AvgMethodId = ProbeSetFreeze.AvgID") (left-join InbredSet "ON ProbeFreeze.InbredSetId = InbredSet.Id") (left-join Tissue "ON ProbeFreeze.TissueId = Tissue.TissueId")) "WHERE ProbeSetFreeze.public > 0 AND InfoFiles.InfoPageName IS NULL GROUP BY ProbeFreeze.Id") (schema-triples (gnc:probeset rdfs:subClassOf gnc:dataset) (gnt:usesNormalization rdfs:domain gnc:probeset) (gnt:usesDataScale rdfs:domain gnc:probeset) (gnt:usesDataScale a owl:ObjectProperty) (gnt:usesDataScale skos:definition "Thi data scale this resource uses")) (triples (string->identifier "" (regexp-substitute/global #f "[^A-Za-z0-9:]" (field ProbeSetFreeze Name) 'pre "_" 'post) #:separator "" #:proc string-capitalize-first) (set rdf:type 'gnc:probeset) (set gnt:usesNormalization (string->identifier "avgmethod" ;; If AvgMethodName is NULL, assume N/A. (if (string-blank? (field AvgMethod Name AvgMethodName)) "N/A" (field AvgMethod Name AvgMethodName)))) (set dct:title (field ProbeSetFreeze FullName)) (set rdfs:label (field ProbeSetFreeze Name)) (set skos:altLabel (field ProbeSetFreeze Name2)) (set skos:prefLabel (field ProbeSetFreeze ShortName)) (set dct:created (annotate-field (field ProbeSetFreeze CreateTime) '^^xsd:datetime)) (set gnt:usesDataScale (field ProbeSetFreeze DataScale)) (set gnt:hasTissue (string->identifier "tissue" (field Tissue Short_Name))) (set gnt:belongsToInbredSet (string->identifier "inbredSet" (field InbredSet Name InbredSetName))))) (dump-with-documentation (name "Probeset freeze metadata") (connection %connection-settings) (table-metadata? #f) (prefixes '(("geoSeries:" "") ("gn:" "") ("gnc:" "") ("dct:" "") ("owl:" "") ("skos:" "") ("gnt:" "") ("rdf:" "") ("rdfs:" "") ("xsd:" ""))) (inputs (list dump-gene-chip dump-probesetfreeze)) (outputs '(#:documentation "./docs/dump-gene-chip.md" #:rdf "./verified-data/dump-probesetfreeze.ttl")))