#! /usr/bin/env guile !# (use-modules (srfi srfi-1) (srfi srfi-26) (rnrs bytevectors) (ice-9 format) (ice-9 getopt-long) (ice-9 match) (ice-9 regex) (transform strings) (transform sql) (transform triples) (transform special-forms) (transform uuid)) (define-transformer gn-genewiki-entries (tables (GeneRIF (left-join Species "ON Species.SpeciesId = GeneRIF.SpeciesId") (left-join GeneRIFXRef "ON GeneRIFXRef.GeneRIFId = GeneRIF.Id") (left-join GeneCategory "ON GeneRIFXRef.GeneCategoryId = GeneCategory.Id")) "WHERE GeneRIF.display > 0 AND GeneRIF.comment IS NOT NULL GROUP BY GeneRIF.Id, GeneRIF.versionId, GeneRIF.symbol") (schema-triples (gnc:GeneWikiEntry a rdfs:Class) (gnc:GNWikiEntry rdfs:subClassOf gnc:GeneWikiEntry) (gnt:initial a owl:ObjectProperty) (gnt:initial rdfs:domain gnc:GeneWikiEntry) (gnt:initial skos:definition "Optional user or project code or your initials") (gnt:reason a owl:ObjectProperty) (gnt:reason rdfs:domain gnc:GeneWikiEntry) (gnt:reason skos:definition "The reason why this resource was modified") (gnc:GNWikiEntry rdfs:comment "Represents GeneRIF Entries entered from GeneNetwork") (gnt:geneSymbol rdfs:domain gnc:GNWikiEntry)) (triples (format #f "gn:wiki-~a-~a" (field GeneRIF Id) (field GeneRIF versionId)) (set rdfs:label (string->symbol (format #f "'~a'@en" (replace-substrings (sanitize-rdf-string (field GeneRIF comment)) '(("'" . "\\'")))))) (set rdf:type 'gnc:GNWikiEntry) (set gnt:symbol (field GeneRIF symbol)) (set gnt:belongsToSpecies (string->identifier "" (remap-species-identifiers (field Species Fullname)) #:separator "" #:proc string-capitalize-first)) (set dct:created (string->symbol (format #f "~s^^xsd:datetime " (field ("CAST(createtime AS CHAR)" EntryCreateTime))))) (multiset dct:references (map (lambda (pmid) (match pmid ((? string-blank? p) "") (p (string->symbol (format #f "pubmed:~a" (string-trim-both pmid)))))) (string-split (field GeneRIF PubMed_ID PMID) #\space))) (set foaf:mbox (match (sanitize-rdf-string (field GeneRIF email)) ((? string-blank? mbox) "") (mbox (string->symbol (format #f "<~a>" mbox))))) (set dct:identifier (annotate-field (format #f "~s" (field GeneRIF Id)) '^^xsd:integer)) (set foaf:homepage (match (sanitize-rdf-string (field GeneRIF weburl)) ((? string-blank? homepage) "") (homepage (string->symbol (format #f "<~a>" homepage))))) (set dct:hasVersion (annotate-field (format #f "~s" (field GeneRIF versionId)) '^^xsd:integer)) (set gnt:initial (sanitize-rdf-string (field GeneRIF initial))) (set gnt:reason (field GeneRIF reason)) (multiset gnt:belongsToCategory (string-split (field ("GROUP_CONCAT(DISTINCT GeneCategory.Name SEPARATOR ';')" GeneCategory)) #\;)))) (define-transformer ncbi-genewiki-entries (tables (GeneRIF_BASIC (left-join Species "USING (SpeciesId)"))) (schema-triples (gnc:NCBIWikiEntry rdfs:subClassOf gnc:GeneWikiEntry) (gnc:NCBIWikiEntry rdfs:comment "Represents GeneRIF Entries obtained from NCBI")) (triples (format #f "gn:rif-~a-~a-~a-~a" (field GeneRIF_BASIC GeneId) (field GeneRIF_BASIC PubMed_ID) (field ("DATE_FORMAT(createtime, '%Y-%m-%dT%T')" CreateTime)) (field GeneRIF_BASIC VersionId)) (set rdf:type (let* ((comment (format #f "'~a'@en" (replace-substrings (sanitize-rdf-string (field GeneRIF_BASIC comment)) '(("\\" . "\\\\") ("\n" . "\\n") ("\r" . "\\r") ("'" . "\\'"))))) (create-time (format #f "~s^^xsd:datetime" (field ("CAST(createtime AS CHAR)" EntryCreateTime)))) (symbol (field GeneRIF_BASIC symbol)) (species (string->identifier "" (remap-species-identifiers (field Species Fullname)) #:separator "" #:proc string-capitalize-first)) (gene-id (field GeneRIF_BASIC GeneId)) (taxon-id (field GeneRIF_BASIC TaxID TaxonomicId)) (pmid (field GeneRIF_BASIC PubMed_ID)) (version-id (field GeneRIF_BASIC versionId))) (string->symbol (string-append (format #f "gnc:NCBIWikiEntry ;\n") (format #f "\trdfs:label ~a ;\n" comment) (format #f "\tgnt:belongsToSpecies ~a ;\n" species) (format #f "\tgnt:symbol ~s ;\n" symbol) (format #f "\tgnt:hasGeneId generif:~a ;\n" gene-id) (match taxon-id ((? number? x) (format #f "\tskos:notation taxon:~a ;\n" taxon-id)) (else "")) (format #f "\tdct:hasVersion \"~a\"^^xsd:integer ;\n" version-id) (format #f "\tdct:references pubmed:~a ;\n" pmid) (format #f "\tdct:created ~a" create-time))))))) (let* ((option-spec '((settings (single-char #\s) (value #t)) (output (single-char #\o) (value #t)) (documentation (single-char #\d) (value #t)))) (options (getopt-long (command-line) option-spec)) (settings (option-ref options 'settings #f)) (output (option-ref options 'output #f)) (documentation (option-ref options 'documentation #f)) (%connection-settings (call-with-input-file settings read))) (with-documentation (name "GeneRIF Metadata") (connection %connection-settings) (table-metadata? #f) (prefixes '(("rdf:" "") ("rdfs:" "") ("skos:" "") ("xkos:" "") ("gn:" "") ("gnc:" "") ("gnt:" "") ("dct:" "") ("foaf:" "") ("pubmed:" "") ("taxon:" "") ("generif:" "") ("xsd:" "") ("owl:" ""))) (inputs (list gn-genewiki-entries ncbi-genewiki-entries)) (outputs `(#:documentation ,documentation #:rdf ,output))))