#! /usr/bin/env guile !# (use-modules (srfi srfi-1) (srfi srfi-26) (rnrs bytevectors) (ice-9 getopt-long) (ice-9 match) (ice-9 regex) (transform strings) (transform sql) (transform triples) (transform special-forms)) (define (remap-species-identifiers str) "This procedure remaps identifiers to standard binominal. Obviously this should be sorted by correcting the database!" (match str ["Fly (Drosophila melanogaster dm6)" "Drosophila melanogaster"] ["Oryzias latipes (Japanese medaka)" "Oryzias latipes"] ["Macaca mulatta" "Macaca nemestrina"] ["Bat (Glossophaga soricina)" "Glossophaga soricina"] [str str])) (define-transformer genbank (tables (Genbank (left-join Species "USING (SpeciesId)"))) (schema-triples (gnc:nucleotide a skos:Concept) (gnt:hasSequence rdfs:domain gnc:nucleotide)) (triples (ontology 'genbank: (field Genbank Id)) (set gnt:hasSequence (field Genbank Sequence)) (set gnt:belongsToSpecies (string->identifier "" (remap-species-identifiers (field Species Fullname)) #:separator "" #:proc string-capitalize-first)))) (let* ((option-spec '((settings (single-char #\s) (value #t)) (output (single-char #\o) (value #t)) (documentation (single-char #\d) (value #t)))) (options (getopt-long (command-line) option-spec)) (settings (option-ref options 'settings #f)) (output (option-ref options 'output #f)) (documentation (option-ref options 'documentation #f)) (%connection-settings (call-with-input-file settings read))) (with-documentation (name "Genebank Metadata") (connection %connection-settings) (table-metadata? #f) (prefixes '(("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>") ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>") ("skos:" "<http://www.w3.org/2004/02/skos/core#>") ("xkos:" "<http://rdf-vocabulary.ddialliance.org/xkos#>") ("gn:" "<http://genenetwork.org/id/>") ("gnc:" "<http://genenetwork.org/category/>") ("gnt:" "<http://genenetwork.org/term/>") ("dct:" "<http://purl.org/dc/terms/>") ("foaf:" "<http://xmlns.com/foaf/0.1/>") ("pubmed:" "<http://rdf.ncbi.nlm.nih.gov/pubmed/>") ("ncbiTaxon:" "<https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=>") ("generif:" "<http://www.ncbi.nlm.nih.gov/gene?cmd=Retrieve&dopt=Graphics&list_uids=>") ("xsd:" "<http://www.w3.org/2001/XMLSchema#>") ("genbank:" "<https://bioregistry.io/reference/genbank:>") ("owl:" "<http://www.w3.org/2002/07/owl#>"))) (inputs (list genbank)) (outputs `(#:documentation ,documentation #:rdf ,output))))