aboutsummaryrefslogtreecommitdiff
path: root/examples/generif.scm
diff options
context:
space:
mode:
Diffstat (limited to 'examples/generif.scm')
-rwxr-xr-xexamples/generif.scm246
1 files changed, 97 insertions, 149 deletions
diff --git a/examples/generif.scm b/examples/generif.scm
index 11235e0..628e34e 100755
--- a/examples/generif.scm
+++ b/examples/generif.scm
@@ -11,45 +11,17 @@
(transform strings)
(transform sql)
(transform triples)
- (transform special-forms)
- (transform uuid))
+ (transform special-forms))
-(define-transformer genewiki-symbols
- (tables (GeneRIF_BASIC)
- "GROUP BY BINARY symbol")
- (triples
- (string->identifier
- "symbol"
- (regexp-substitute/global #f "[^A-Za-z0-9:]"
- (field GeneRIF_BASIC symbol)
- 'pre "_" 'post)
- #:proc (lambda (x) x))
- (set rdfs:label
- (field GeneRIF_BASIC symbol))))
-
-;; Some symbols exist in the RIF table that don't exist in the GeneRIF
-;; table.
-(define-transformer generif-symbols
- (tables (GeneRIF)
- "WHERE symbol NOT IN (SELECT symbol from GeneRIF_BASIC) GROUP BY BINARY symbol")
- (triples
- (string->identifier
- "symbol"
- (regexp-substitute/global #f "[^A-Za-z0-9:]"
- (field GeneRIF symbol)
- 'pre "_" 'post)
- #:proc (lambda (x) x))
- (set rdfs:label
- (field GeneRIF symbol))))
-
(define-transformer gn-genewiki-entries
(tables (GeneRIF
(left-join Species "ON Species.SpeciesId = GeneRIF.SpeciesId")
(left-join GeneRIFXRef "ON GeneRIFXRef.GeneRIFId = GeneRIF.Id")
(left-join GeneCategory "ON GeneRIFXRef.GeneCategoryId = GeneCategory.Id"))
- "WHERE GeneRIF.display > 0 AND GeneRIF.comment IS NOT NULL GROUP BY GeneRIF.Id, GeneRIF.versionId, GeneRIF.symbol, GeneRIF.SpeciesId, GeneRIF.createtime, GeneRIF.reason")
+ "WHERE GeneRIF.display > 0 AND GeneRIF.comment IS NOT NULL
+GROUP BY GeneRIF.Id, GeneRIF.versionId, GeneRIF.symbol")
(schema-triples
(gnc:GeneWikiEntry a rdfs:Class)
(gnc:GNWikiEntry rdfs:subClassOf gnc:GeneWikiEntry)
@@ -62,130 +34,108 @@
(gnc:GNWikiEntry rdfs:comment "Represents GeneRIF Entries entered from GeneNetwork")
(gnt:geneSymbol rdfs:domain gnc:GNWikiEntry))
(triples
- (string->identifier
- "symbol"
- (regexp-substitute/global
- #f "[^A-Za-z0-9:]"
- (field GeneRIF symbol)
- 'pre "_" 'post)
- #:proc (lambda (x) x))
- (set rdfs:comment
- (let* ((generif-comment (sanitize-rdf-string (field GeneRIF comment)))
- (create-time (field GeneRIF createtime EntryCreateTime))
- (pmid (field GeneRIF PubMed_ID PMID))
- (web-url (field GeneRIF weburl))
- (species (string->identifier
- ""
- (remap-species-identifiers (field Species Fullname))
- #:separator ""
- #:proc string-capitalize-first))
- (version-id (field GeneRIF versionId))
- (identifier (field GeneRIF Id))
- (initial (sanitize-rdf-string (field GeneRIF initial)))
- (reason (field GeneRIF reason))
- (email (sanitize-rdf-string (field GeneRIF email)))
- (category
- (field ("GROUP_CONCAT(DISTINCT GeneCategory.Name SEPARATOR '; ')"
- GeneCategory))))
- (string->symbol
- (string-append
- "[ "
- (format #f "rdf:type gnc:GNWikiEntry ; ")
- (if (string? species)
- ""
- (format #f "gnt:belongsToSpecies ~a ; "
- species))
- (format #f "rdfs:comment ~s^^xsd:string ; "
- generif-comment)
- (if (string? create-time)
- ""
- (format #f "dct:created ~s^^xsd:datetime ; "
- (time-unix->string
- create-time "~5")))
- (if (and (string? pmid) (not (string-null? pmid)))
- (format #f
- "~{dct:references pubmed:~a ; ~}"
- (string-split pmid #\space))
- "")
- (if (string-blank? email)
- ""
- (format #f "foaf:mbox ~s ; " email))
- (format #f "dct:identifier ~s ; " identifier)
- (format #f "dct:hasVersion \"~s\"^^xsd:int ; " version-id)
- (if (string-blank? reason)
- ""
- (format #f "gnt:reason ~s ; " reason))
- (if (or (null? initial)
- (string-blank? initial))
- "" (format #f "gnt:initial ~s ; " initial))
- (if (string-blank? category)
- ""
- (format #f
- "gnt:belongsToCategory ~s ; "
- category))
- (if (and (string? web-url) (not (string-null? web-url)))
- (format #f "foaf:homepage ~s ; "
- web-url)
- "")
- " ] "))))))
+ (format
+ #f "gn:wiki-~a-~a"
+ (field GeneRIF Id)
+ (field GeneRIF versionId))
+ (set rdfs:label (string->symbol
+ (format #f "'~a'@en"
+ (replace-substrings
+ (sanitize-rdf-string
+ (field GeneRIF comment))
+ '(("'" . "\\'"))))))
+ (set rdf:type 'gnc:GNWikiEntry)
+ (set gnt:symbol (field GeneRIF symbol))
+ (set gnt:belongsToSpecies (string->identifier
+ ""
+ (remap-species-identifiers (field Species Fullname))
+ #:separator ""
+ #:proc string-capitalize-first))
+ (set dct:created
+ (string->symbol
+ (format #f "~s^^xsd:datetime "
+ (field
+ ("CAST(createtime AS CHAR)" EntryCreateTime)))))
+ (multiset dct:references
+ (map (lambda (pmid)
+ (match pmid
+ ((? string-blank? p) "")
+ (p (string->symbol
+ (format #f "pubmed:~a" (string-trim-both pmid))))))
+ (string-split (field GeneRIF PubMed_ID PMID)
+ #\space)))
+ (set foaf:mbox
+ (match (sanitize-rdf-string (field GeneRIF email))
+ ((? string-blank? mbox) "")
+ (mbox (string->symbol
+ (format #f "<~a>" mbox)))))
+ (set dct:identifier (annotate-field (format #f "~s" (field GeneRIF Id))
+ '^^xsd:integer))
+ (set foaf:homepage
+ (match (sanitize-rdf-string (field GeneRIF weburl))
+ ((? string-blank? homepage) "")
+ (homepage (string->symbol
+ (format #f "<~a>" homepage)))))
+ (set dct:hasVersion (annotate-field (format #f "~s" (field GeneRIF versionId))
+ '^^xsd:integer))
+ (set gnt:initial (sanitize-rdf-string (field GeneRIF initial)))
+ (set gnt:reason (field GeneRIF reason))
+ (multiset gnt:belongsToCategory
+ (string-split
+ (field ("GROUP_CONCAT(DISTINCT GeneCategory.Name SEPARATOR ';')"
+ GeneCategory))
+ #\;))))
(define-transformer ncbi-genewiki-entries
(tables (GeneRIF_BASIC
- (left-join Species "USING (SpeciesId)"))
- "WHERE GeneRIF_BASIC.comment IS NOT NULL AND TRIM(GeneRIF_BASIC.comment) != '' AND TRIM(GeneRIF_BASIC.symbol) != '' GROUP BY GeneRIF_BASIC.comment, GeneRIF_BASIC.createtime, GeneRIF_BASIC.VersionId, GeneRIF_BASIC.SpeciesId, GeneRIF_BASIC.TaxID")
+ (left-join Species "USING (SpeciesId)")))
(schema-triples
(gnc:NCBIWikiEntry rdfs:subClassOf gnc:GeneWikiEntry)
- (gnc:NCBIWikiEntry rdfs:comment "Represents GeneRIF Entries obtained from NCBI")
- (gnt:hasVersionId a owl:ObjectProperty)
- (gnt:hasVersionId rdfs:domain gnc:NCBIWikiEntry)
- (gnt:hasVersionId skos:definition "The VersionId of this this resource"))
+ (gnc:NCBIWikiEntry rdfs:comment "Represents GeneRIF Entries obtained from NCBI"))
(triples
- (string->identifier
- "symbol"
- (regexp-substitute/global #f "[^A-Za-z0-9:]"
- (field GeneRIF_BASIC symbol GeneRIFSymbol)
- 'pre "_" 'post)
- #:proc (lambda (x) x))
- (set rdfs:comment
- (let ([ncbi-comment (sanitize-rdf-string (field GeneRIF_BASIC comment))]
- [species-name
- (string->identifier
- ""
- (remap-species-identifiers (field Species Fullname SpeciesFullName))
- #:separator ""
- #:proc string-capitalize-first)]
- [taxonomic-id (field GeneRIF_BASIC TaxID TaxonomicId)]
- [create-time (field GeneRIF_BASIC createtime EntryCreateTime)]
- [pmid (field GeneRIF_BASIC PubMed_ID PMID)]
- [gene-id (field GeneRIF_BASIC GeneId)]
- [version-id (field GeneRIF_BASIC VersionId)])
+ (format
+ #f "gn:rif-~a-~a-~a-~a"
+ (field GeneRIF_BASIC GeneId)
+ (field GeneRIF_BASIC PubMed_ID)
+ (field
+ ("DATE_FORMAT(createtime, '%Y-%m-%dT%T')" CreateTime))
+ (field GeneRIF_BASIC VersionId))
+ (set rdf:type
+ (let* ((comment (format #f "'~a'@en"
+ (replace-substrings
+ (sanitize-rdf-string
+ (field GeneRIF_BASIC comment))
+ '(("\\" . "\\\\")
+ ("\n" . "\\n")
+ ("\r" . "\\r")
+ ("'" . "\\'")))))
+ (create-time (format #f "~s^^xsd:datetime"
+ (field
+ ("CAST(createtime AS CHAR)" EntryCreateTime))))
+ (symbol (field GeneRIF_BASIC symbol))
+ (species (string->identifier
+ ""
+ (remap-species-identifiers (field Species Fullname))
+ #:separator ""
+ #:proc string-capitalize-first))
+ (gene-id (field GeneRIF_BASIC GeneId))
+ (taxon-id (field GeneRIF_BASIC TaxID TaxonomicId))
+ (pmid (field GeneRIF_BASIC PubMed_ID))
+ (version-id (field GeneRIF_BASIC versionId)))
(string->symbol
(string-append
- "[ "
- (format #f "rdf:type gnc:NCBIWikiEntry ; ")
- (format #f "rdfs:comment ~s^^xsd:string ; "
- ncbi-comment)
- (format #f "gnt:belongsToSpecies ~a ; "
- species-name)
- (if (eq? #f taxonomic-id)
- ""
- (format #f "skos:notation taxon:~a ; "
- taxonomic-id))
- (format #f "gnt:hasGeneId generif:~a ; "
- gene-id)
- (format #f "dct:hasVersion '~a'^^xsd:int ; "
- version-id)
- (if (and (string? pmid) (not (string-null? pmid)))
- (format #f
- "~{dct:references pubmed:~a ; ~}"
- (string-split pmid #\space))
- "")
- (if (string? create-time)
- ""
- (format #f "dct:created ~s^^xsd:datetime ; "
- (time-unix->string
- create-time "~5")))
- " ]"))))))
+ (format #f "gnc:NCBIWikiEntry ;\n")
+ (format #f "\trdfs:label ~a ;\n" comment)
+ (format #f "\tgnt:belongsToSpecies ~a ;\n" species)
+ (format #f "\tgnt:symbol ~s ;\n" symbol)
+ (format #f "\tgnt:hasGeneId generif:~a ;\n" gene-id)
+ (match taxon-id
+ ((? number? x)
+ (format #f "\tskos:notation taxon:~a ;\n" taxon-id))
+ (else ""))
+ (format #f "\tdct:hasVersion \"~a\"^^xsd:integer ;\n" version-id)
+ (format #f "\tdct:references pubmed:~a ;\n" pmid)
+ (format #f "\tdct:created ~a" create-time)))))))
@@ -222,8 +172,6 @@
("owl:" "<http://www.w3.org/2002/07/owl#>")))
(inputs
(list
- genewiki-symbols
- generif-symbols
gn-genewiki-entries
ncbi-genewiki-entries))
(outputs