#! /usr/bin/env guile
!#
(use-modules (srfi srfi-1)
(srfi srfi-26)
(rnrs bytevectors)
(ice-9 format)
(ice-9 getopt-long)
(ice-9 match)
(ice-9 regex)
(transform strings)
(transform sql)
(transform triples)
(transform special-forms))
(define-transformer gn-genewiki-entries
(tables (GeneRIF
(left-join Species "ON Species.SpeciesId = GeneRIF.SpeciesId")
(left-join GeneRIFXRef "ON GeneRIFXRef.GeneRIFId = GeneRIF.Id")
(left-join GeneCategory "ON GeneRIFXRef.GeneCategoryId = GeneCategory.Id"))
"WHERE GeneRIF.display > 0 AND GeneRIF.comment IS NOT NULL
GROUP BY GeneRIF.Id, GeneRIF.versionId, GeneRIF.symbol")
(schema-triples
(gnc:GeneWikiEntry a rdfs:Class)
(gnc:GNWikiEntry rdfs:subClassOf gnc:GeneWikiEntry)
(gnt:initial a owl:ObjectProperty)
(gnt:initial rdfs:domain gnc:GeneWikiEntry)
(gnt:initial skos:definition "Optional user or project code or your initials")
(gnt:reason a owl:ObjectProperty)
(gnt:reason rdfs:domain gnc:GeneWikiEntry)
(gnt:reason skos:definition "The reason why this resource was modified")
(gnc:GNWikiEntry rdfs:comment "Represents GeneRIF Entries entered from GeneNetwork")
(gnt:geneSymbol rdfs:domain gnc:GNWikiEntry))
(triples
(format
#f "gn:wiki-~a-~a"
(field GeneRIF Id)
(field GeneRIF versionId))
(set rdfs:label (string->symbol
(format #f "'~a'@en"
(replace-substrings
(sanitize-rdf-string
(field GeneRIF comment))
'(("'" . "\\'"))))))
(set rdf:type 'gnc:GNWikiEntry)
(set gnt:symbol (field GeneRIF symbol))
(set gnt:belongsToSpecies (string->identifier
""
(remap-species-identifiers (field Species Fullname))
#:separator ""
#:proc string-capitalize-first))
(set dct:created
(string->symbol
(format #f "~s^^xsd:datetime "
(field
("CAST(createtime AS CHAR)" EntryCreateTime)))))
(multiset dct:references
(map (lambda (pmid)
(match pmid
((? string-blank? p) "")
(p (string->symbol
(format #f "pubmed:~a" (string-trim-both pmid))))))
(string-split (field GeneRIF PubMed_ID PMID)
#\space)))
(set foaf:mbox
(match (sanitize-rdf-string (field GeneRIF email))
((? string-blank? mbox) "")
(mbox (string->symbol
(format #f "<~a>" mbox)))))
(set dct:identifier (annotate-field (format #f "~s" (field GeneRIF Id))
'^^xsd:integer))
(set foaf:homepage
(match (sanitize-rdf-string (field GeneRIF weburl))
((? string-blank? homepage) "")
(homepage (string->symbol
(format #f "<~a>" homepage)))))
(set dct:hasVersion (annotate-field (format #f "~s" (field GeneRIF versionId))
'^^xsd:integer))
(set gnt:initial (sanitize-rdf-string (field GeneRIF initial)))
(set gnt:reason (field GeneRIF reason))
(multiset gnt:belongsToCategory
(string-split
(field ("GROUP_CONCAT(DISTINCT GeneCategory.Name SEPARATOR ';')"
GeneCategory))
#\;))))
(define-transformer ncbi-genewiki-entries
(tables (GeneRIF_BASIC
(left-join Species "USING (SpeciesId)")))
(schema-triples
(gnc:NCBIWikiEntry rdfs:subClassOf gnc:GeneWikiEntry)
(gnc:NCBIWikiEntry rdfs:comment "Represents GeneRIF Entries obtained from NCBI"))
(triples
(format
#f "gn:rif-~a-~a-~a-~a"
(field GeneRIF_BASIC GeneId)
(field GeneRIF_BASIC PubMed_ID)
(field
("DATE_FORMAT(createtime, '%Y-%m-%dT%T')" CreateTime))
(field GeneRIF_BASIC VersionId))
(set rdf:type
(let* ((comment (format #f "'~a'@en"
(replace-substrings
(sanitize-rdf-string
(field GeneRIF_BASIC comment))
'(("\\" . "\\\\")
("\n" . "\\n")
("\r" . "\\r")
("'" . "\\'")))))
(create-time (format #f "~s^^xsd:datetime"
(field
("CAST(createtime AS CHAR)" EntryCreateTime))))
(symbol (field GeneRIF_BASIC symbol))
(species (string->identifier
""
(remap-species-identifiers (field Species Fullname))
#:separator ""
#:proc string-capitalize-first))
(gene-id (field GeneRIF_BASIC GeneId))
(taxon-id (field GeneRIF_BASIC TaxID TaxonomicId))
(pmid (field GeneRIF_BASIC PubMed_ID))
(version-id (field GeneRIF_BASIC versionId)))
(string->symbol
(string-append
(format #f "gnc:NCBIWikiEntry ;\n")
(format #f "\trdfs:label ~a ;\n" comment)
(format #f "\tgnt:belongsToSpecies ~a ;\n" species)
(format #f "\tgnt:symbol ~s ;\n" symbol)
(format #f "\tgnt:hasGeneId generif:~a ;\n" gene-id)
(match taxon-id
((? number? x)
(format #f "\tskos:notation taxon:~a ;\n" taxon-id))
(else ""))
(format #f "\tdct:hasVersion \"~a\"^^xsd:integer ;\n" version-id)
(format #f "\tdct:references pubmed:~a ;\n" pmid)
(format #f "\tdct:created ~a" create-time)))))))
(let* ((option-spec
'((settings (single-char #\s) (value #t))
(output (single-char #\o) (value #t))
(documentation (single-char #\d) (value #t))))
(options (getopt-long (command-line) option-spec))
(settings (option-ref options 'settings #f))
(output (option-ref options 'output #f))
(documentation (option-ref options 'documentation #f))
(%connection-settings
(call-with-input-file settings
read)))
(with-documentation
(name "GeneRIF Metadata")
(connection %connection-settings)
(table-metadata? #f)
(prefixes
'(("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>")
("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>")
("skos:" "<http://www.w3.org/2004/02/skos/core#>")
("xkos:" "<http://rdf-vocabulary.ddialliance.org/xkos#>")
("gn:" "<http://genenetwork.org/id/>")
("gnc:" "<http://genenetwork.org/category/>")
("gnt:" "<http://genenetwork.org/term/>")
("dct:" "<http://purl.org/dc/terms/>")
("foaf:" "<http://xmlns.com/foaf/0.1/>")
("pubmed:" "<http://rdf.ncbi.nlm.nih.gov/pubmed/>")
("taxon:" "<https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=>")
("generif:" "<http://www.ncbi.nlm.nih.gov/gene?cmd=Retrieve&dopt=Graphics&list_uids=>")
("xsd:" "<http://www.w3.org/2001/XMLSchema#>")
("owl:" "<http://www.w3.org/2002/07/owl#>")))
(inputs
(list
gn-genewiki-entries
ncbi-genewiki-entries))
(outputs
`(#:documentation ,documentation
#:rdf ,output))))