about summary refs log tree commit diff
diff options
context:
space:
mode:
-rwxr-xr-xexamples/generif-old.scm230
1 files changed, 230 insertions, 0 deletions
diff --git a/examples/generif-old.scm b/examples/generif-old.scm
new file mode 100755
index 0000000..ba6768d
--- /dev/null
+++ b/examples/generif-old.scm
@@ -0,0 +1,230 @@
+#! /usr/bin/env guile
+!#
+
+(use-modules (srfi srfi-1)
+             (srfi srfi-26)
+             (rnrs bytevectors)
+             (ice-9 format)
+             (ice-9 getopt-long)
+             (ice-9 match)
+             (ice-9 regex)
+             (transform strings)
+             (transform sql)
+             (transform triples)
+             (transform special-forms))
+
+
+
+(define-transformer genewiki-symbols
+  (tables (GeneRIF_BASIC)
+          "GROUP BY BINARY symbol")
+  (triples
+      (string->identifier
+       "symbol"
+       (regexp-substitute/global #f "[^A-Za-z0-9:]"
+                                 (field GeneRIF_BASIC symbol)
+                                 'pre "_" 'post)
+       #:proc (lambda (x) x))
+    (set rdfs:label
+         (field GeneRIF_BASIC symbol))))
+
+;; Some symbols exist in the RIF table that don't exist in the GeneRIF
+;; table.
+(define-transformer generif-symbols
+  (tables (GeneRIF)
+          "WHERE symbol NOT IN (SELECT symbol from GeneRIF_BASIC) GROUP BY BINARY symbol")
+  (triples
+      (string->identifier
+       "symbol"
+       (regexp-substitute/global #f "[^A-Za-z0-9:]"
+                                 (field GeneRIF symbol)
+                                 'pre "_" 'post)
+       #:proc (lambda (x) x))
+    (set rdfs:label
+         (field GeneRIF symbol))))
+
+(define-transformer gn-genewiki-entries
+  (tables (GeneRIF
+           (left-join Species "ON Species.SpeciesId = GeneRIF.SpeciesId")
+           (left-join GeneRIFXRef "ON GeneRIFXRef.GeneRIFId = GeneRIF.Id")
+           (left-join GeneCategory "ON GeneRIFXRef.GeneCategoryId = GeneCategory.Id"))
+          "WHERE GeneRIF.display > 0 AND GeneRIF.comment IS NOT NULL GROUP BY GeneRIF.Id, GeneRIF.versionId, GeneRIF.symbol, GeneRIF.SpeciesId, GeneRIF.createtime, GeneRIF.reason")
+  (schema-triples
+   (gnc:GeneWikiEntry a rdfs:Class)
+   (gnc:GNWikiEntry rdfs:subClassOf gnc:GeneWikiEntry)
+   (gnt:initial a owl:ObjectProperty)
+   (gnt:initial rdfs:domain gnc:GeneWikiEntry)
+   (gnt:initial skos:definition "Optional user or project code or your initials")
+   (gnt:reason a owl:ObjectProperty)
+   (gnt:reason rdfs:domain gnc:GeneWikiEntry)
+   (gnt:reason skos:definition "The reason why this resource was modified")
+   (gnc:GNWikiEntry rdfs:comment "Represents GeneRIF Entries entered from GeneNetwork")
+   (gnt:geneSymbol rdfs:domain gnc:GNWikiEntry))
+  (triples
+      (string->identifier
+       "symbol"
+       (regexp-substitute/global
+        #f "[^A-Za-z0-9:]"
+        (field GeneRIF symbol)
+        'pre "_" 'post)
+       #:proc (lambda (x) x))
+    (set rdfs:comment
+         (let* ((generif-comment (sanitize-rdf-string (field GeneRIF comment)))
+                (create-time (field GeneRIF createtime EntryCreateTime))
+                (pmid (field GeneRIF PubMed_ID PMID))
+                (web-url (field GeneRIF weburl))
+                (species (string->identifier
+                          ""
+                          (remap-species-identifiers (field Species Fullname))
+                          #:separator ""
+                          #:proc string-capitalize-first))
+                (version-id (field GeneRIF versionId))
+                (identifier (field GeneRIF Id))
+                (initial (sanitize-rdf-string (field GeneRIF initial)))
+                (reason (field GeneRIF reason))
+                (email (sanitize-rdf-string (field GeneRIF email)))
+                (category
+                 (field ("GROUP_CONCAT(DISTINCT GeneCategory.Name SEPARATOR '; ')"
+                         GeneCategory))))
+           (string->symbol
+            (string-append
+             "[ "
+             (format #f "rdf:type gnc:GNWikiEntry ; ")
+             (if (string? species)
+                 ""
+                 (format #f "gnt:belongsToSpecies ~a ; "
+                         species))
+             (format #f "rdfs:comment ~s^^xsd:string ; "
+                     generif-comment)
+             (if (string? create-time)
+                 ""
+                 (format #f "dct:created ~s^^xsd:datetime ; "
+                         (time-unix->string
+                          create-time "~5")))
+             (if (and (string? pmid) (not (string-null? pmid)))
+                 (format #f
+                         "~{dct:references pubmed:~a ; ~}"
+                         (string-split pmid #\space))
+                 "")
+             (if (string-blank? email)
+                 ""
+                 (format #f "foaf:mbox ~s ; " email))
+             (format #f "dct:identifier ~s ; " identifier)
+             (format #f "dct:hasVersion \"~s\"^^xsd:int ; " version-id)
+             (if (string-blank? reason)
+                 ""
+                 (format #f "gnt:reason ~s ; " reason))
+             (if (or (null? initial)
+                      (string-blank? initial))
+                 "" (format #f "gnt:initial ~s ; " initial))
+             (if (string-blank? category)
+                 ""
+                 (format #f
+                         "gnt:belongsToCategory ~s ; "
+                         category))
+             (if (and (string? web-url) (not (string-null? web-url)))
+                 (format #f "foaf:homepage ~s ; "
+                         web-url)
+                 "")
+             " ] "))))))
+
+(define-transformer ncbi-genewiki-entries
+  (tables (GeneRIF_BASIC
+           (left-join Species "USING (SpeciesId)"))
+          "WHERE GeneRIF_BASIC.comment IS NOT NULL AND TRIM(GeneRIF_BASIC.comment) != '' AND TRIM(GeneRIF_BASIC.symbol) != '' GROUP BY GeneRIF_BASIC.comment, GeneRIF_BASIC.createtime, GeneRIF_BASIC.VersionId, GeneRIF_BASIC.SpeciesId, GeneRIF_BASIC.TaxID")
+  (schema-triples
+   (gnc:NCBIWikiEntry rdfs:subClassOf gnc:GeneWikiEntry)
+   (gnc:NCBIWikiEntry rdfs:comment "Represents GeneRIF Entries obtained from NCBI")
+   (gnt:hasVersionId a owl:ObjectProperty)
+   (gnt:hasVersionId rdfs:domain gnc:NCBIWikiEntry)
+   (gnt:hasVersionId skos:definition "The VersionId of this this resource"))
+  (triples
+      (string->identifier
+       "symbol"
+       (regexp-substitute/global #f "[^A-Za-z0-9:]"
+                                 (field GeneRIF_BASIC symbol GeneRIFSymbol)
+                                 'pre "_" 'post)
+       #:proc (lambda (x) x))
+    (set rdfs:comment
+         (let ([ncbi-comment (sanitize-rdf-string (field GeneRIF_BASIC comment))]
+               [species-name
+                (string->identifier
+                 ""
+                 (remap-species-identifiers (field Species Fullname SpeciesFullName))
+                 #:separator ""
+                 #:proc string-capitalize-first)]
+               [taxonomic-id (field GeneRIF_BASIC TaxID TaxonomicId)]
+               [create-time (field GeneRIF_BASIC createtime EntryCreateTime)]
+               [pmid (field GeneRIF_BASIC PubMed_ID PMID)]
+               [gene-id (field GeneRIF_BASIC GeneId)]
+               [version-id (field GeneRIF_BASIC VersionId)])
+           (string->symbol
+            (string-append
+             "[ "
+             (format #f "rdf:type gnc:NCBIWikiEntry ; ")
+             (format #f "rdfs:comment ~s^^xsd:string ; "
+                     ncbi-comment)
+             (format #f "gnt:belongsToSpecies ~a ; "
+                     species-name)
+             (if (eq? #f taxonomic-id)
+                 ""
+                 (format #f "skos:notation taxon:~a ; "
+                         taxonomic-id))
+             (format #f "gnt:hasGeneId generif:~a ; "
+                     gene-id)
+             (format #f "dct:hasVersion '~a'^^xsd:int ; "
+                     version-id)
+             (if (and (string? pmid) (not (string-null? pmid)))
+                 (format #f
+                         "~{dct:references pubmed:~a ; ~}"
+                         (string-split pmid #\space))
+                 "")
+             (if (string? create-time)
+                 ""
+                 (format #f "dct:created ~s^^xsd:datetime ; "
+                         (time-unix->string
+                          create-time "~5")))
+             " ]"))))))
+
+
+
+(let* ((option-spec
+        '((settings (single-char #\s) (value #t))
+          (output (single-char #\o) (value #t))
+          (documentation (single-char #\d) (value #t))))
+       (options (getopt-long (command-line) option-spec))
+       (settings (option-ref options 'settings #f))
+       (output (option-ref options 'output #f))
+       (documentation (option-ref options 'documentation #f))
+       (%connection-settings
+        (call-with-input-file settings
+          read)))
+
+  (with-documentation
+   (name "GeneRIF Metadata")
+   (connection %connection-settings)
+   (table-metadata? #f)
+   (prefixes
+    '(("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>")
+      ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>")
+      ("skos:" "<http://www.w3.org/2004/02/skos/core#>")
+      ("xkos:" "<http://rdf-vocabulary.ddialliance.org/xkos#>")
+      ("gn:" "<http://genenetwork.org/id/>")
+      ("gnc:" "<http://genenetwork.org/category/>")
+      ("gnt:" "<http://genenetwork.org/term/>")
+      ("dct:" "<http://purl.org/dc/terms/>")
+      ("foaf:" "<http://xmlns.com/foaf/0.1/>")
+      ("pubmed:" "<http://rdf.ncbi.nlm.nih.gov/pubmed/>")
+      ("taxon:" "<https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=>")
+      ("generif:" "<http://www.ncbi.nlm.nih.gov/gene?cmd=Retrieve&dopt=Graphics&list_uids=>")
+      ("xsd:" "<http://www.w3.org/2001/XMLSchema#>")
+      ("owl:" "<http://www.w3.org/2002/07/owl#>")))
+   (inputs
+    (list
+     genewiki-symbols
+     generif-symbols
+     gn-genewiki-entries
+     ncbi-genewiki-entries))
+   (outputs
+    `(#:documentation ,documentation
+      #:rdf ,output))))