#! /usr/bin/env guile !# (use-modules (srfi srfi-1) (srfi srfi-26) (rnrs bytevectors) (ice-9 format) (ice-9 getopt-long) (ice-9 match) (ice-9 regex) (transform strings) (transform sql) (transform triples) (transform special-forms)) (define-transformer gn-genewiki-entries (tables (GeneRIF (left-join Species "ON Species.SpeciesId = GeneRIF.SpeciesId") (left-join GeneRIFXRef "ON GeneRIFXRef.GeneRIFId = GeneRIF.Id") (left-join GeneCategory "ON GeneRIFXRef.GeneCategoryId = GeneCategory.Id")) "WHERE GeneRIF.display > 0 AND GeneRIF.comment IS NOT NULL GROUP BY GeneRIF.Id, GeneRIF.versionId, GeneRIF.symbol") (triples (string->identifier "" (gn-uuid (format #f "~a.~a.~a?type=wikii" (field GeneRIF Id) (field GeneRIF versionId) (field GeneRIF createtime))) #:url-char #\-) (set dct:identifier (gn-uuid (format #f "~a?type=wiki" (field GeneRIF Id)))) (set rdfs:label (string->symbol (format #f "'~a'@en" (replace-substrings (sanitize-rdf-string (field GeneRIF comment)) '(("'" . "\\'")))))) (set rdf:type 'gnc:gn_wiki_entry) (set gnt:symbol (field GeneRIF symbol)) (set gnt:has_species (string->identifier "" (remap-species-identifiers (field Species Fullname)))) (set dct:created (string->symbol (format #f "~s^^xsd:datetime " (field ("CAST(createtime AS CHAR)" EntryCreateTime))))) (multiset dct:references (map (lambda (pmid) (match pmid ((? string-blank? p) "") (p (string->symbol (format #f "pubmed:~a" (string-trim-both pmid)))))) (string-split (field GeneRIF PubMed_ID PMID) #\space))) ;; Hide e-mail for now. ;; (set foaf:mbox ;; (match (sanitize-rdf-string (field GeneRIF email)) ;; ((? string-blank? mbox) "") ;; (mbox (string->symbol ;; (format #f "<~a>" mbox))))) (set foaf:homepage (match (sanitize-rdf-string (field GeneRIF weburl)) ((? string-blank? homepage) "") (homepage (string->symbol (format #f "<~a>" homepage))))) (set dct:hasVersion (annotate-field (format #f "~s" (field GeneRIF versionId)) '^^xsd:integer)) (set gnt:initial (sanitize-rdf-string (field GeneRIF initial))) (set gnt:reason (field GeneRIF reason)) (multiset gnt:belongs_to_category (string-split (field ("GROUP_CONCAT(DISTINCT GeneCategory.Name SEPARATOR ';')" GeneCategory)) #\;)))) (define-transformer ncbi-genewiki-entries (tables (GeneRIF_BASIC (left-join Species "USING (SpeciesId)"))) (triples (string->identifier "" (gn-uuid (format #f "~a_~a_~a_~a" (field GeneRIF_BASIC GeneId) (field GeneRIF_BASIC PubMed_ID) (field ("DATE_FORMAT(createtime, '%Y-%m-%dT%T')" CreateTime)) (field GeneRIF_BASIC VersionId))) #:url-char #\-) (set rdf:type 'gnc:ncbi_wiki_entry) (set rdfs:label (format #f "'~a'@en" (replace-substrings (sanitize-rdf-string (field GeneRIF_BASIC comment)) '(("\\" . "\\\\") ("\n" . "\\n") ("\r" . "\\r") ("'" . "\\'"))))) (set gnt:symbol (field GeneRIF_BASIC symbol)) (set gnt:has_species (string->identifier "" (remap-species-identifiers (field Species Fullname)))) (set skos:notation (ontology 'taxon: (field GeneRIF_BASIC TaxID TaxonomicId))) (set dct:hasVersion (annotate-field (field GeneRIF_BASIC versionId) '^^xsd:integer)) (set gnt:has_gene_id (ontology 'generif: (field GeneRIF_BASIC GeneId))) (set dct:references (ontology 'pubmed: (field GeneRIF_BASIC PubMed_ID))) (set dct:created (string->symbol (format #f "~s^^xsd:datetime" (field ("CAST(createtime AS CHAR)" EntryCreateTime))))))) (let* ((option-spec '((settings (single-char #\s) (value #t)) (output (single-char #\o) (value #t)) (documentation (single-char #\d) (value #t)))) (options (getopt-long (command-line) option-spec)) (settings (option-ref options 'settings #f)) (output (option-ref options 'output #f)) (documentation (option-ref options 'documentation #f)) (%connection-settings (call-with-input-file settings read))) (with-documentation (name "GeneRIF Metadata") (connection %connection-settings) (table-metadata? #f) (prefixes '(("rdf:" "") ("rdfs:" "") ("skos:" "") ("xkos:" "") ("gn:" "") ("gnc:" "") ("gnt:" "") ("dct:" "") ("foaf:" "") ("pubmed:" "") ("taxon:" "") ("generif:" "") ("xsd:" "") ("owl:" ""))) (inputs (list gn-genewiki-entries ncbi-genewiki-entries)) (outputs `(#:documentation ,documentation #:rdf ,output))))