about summary refs log tree commit diff
path: root/examples/generif-old.scm
diff options
context:
space:
mode:
Diffstat (limited to 'examples/generif-old.scm')
-rwxr-xr-xexamples/generif-old.scm241
1 files changed, 0 insertions, 241 deletions
diff --git a/examples/generif-old.scm b/examples/generif-old.scm
deleted file mode 100755
index ede5a28..0000000
--- a/examples/generif-old.scm
+++ /dev/null
@@ -1,241 +0,0 @@
-#! /usr/bin/env guile
-!#
-
-(use-modules (srfi srfi-1)
-             (srfi srfi-26)
-             (rnrs bytevectors)
-             (ice-9 format)
-             (ice-9 getopt-long)
-             (ice-9 match)
-             (ice-9 regex)
-             (transform strings)
-             (transform sql)
-             (transform triples)
-             (transform special-forms))
-
-
-
-(define (fix-email-id email)
-  (string-delete #\space email))
-
-(define (investigator-attributes->id first-name last-name email)
-  ;; There is just one record corresponding to "Evan Williams" which
-  ;; does not have an email ID. To accommodate that record, we
-  ;; construct the investigator ID from not just the email ID, but
-  ;; also the first and the last names. It would be preferable to just
-  ;; find Evan Williams' email ID and insert it into the database.
-  (string->identifier "investigator"
-                      (string-join
-                       (list first-name last-name (fix-email-id email))
-                       "_")))
-
-
-
-(define-transformer genewiki-symbols
-  (tables (GeneRIF_BASIC)
-          "GROUP BY BINARY symbol")
-  (triples
-      (string->identifier
-       "symbol"
-       (regexp-substitute/global #f "[^A-Za-z0-9:]"
-                                 (field GeneRIF_BASIC symbol)
-                                 'pre "_" 'post)
-       #:proc (lambda (x) x))
-    (set rdfs:label
-         (field GeneRIF_BASIC symbol))))
-
-;; Some symbols exist in the RIF table that don't exist in the GeneRIF
-;; table.
-(define-transformer generif-symbols
-  (tables (GeneRIF)
-          "WHERE symbol NOT IN (SELECT symbol from GeneRIF_BASIC) GROUP BY BINARY symbol")
-  (triples
-      (string->identifier
-       "symbol"
-       (regexp-substitute/global #f "[^A-Za-z0-9:]"
-                                 (field GeneRIF symbol)
-                                 'pre "_" 'post)
-       #:proc (lambda (x) x))
-    (set rdfs:label
-         (field GeneRIF symbol))))
-
-(define-transformer gn-genewiki-entries
-  (tables (GeneRIF
-           (left-join Species "ON Species.SpeciesId = GeneRIF.SpeciesId")
-           (left-join GeneRIFXRef "ON GeneRIFXRef.GeneRIFId = GeneRIF.Id")
-           (left-join GeneCategory "ON GeneRIFXRef.GeneCategoryId = GeneCategory.Id")
-           (left-join Investigators "ON Investigators.Email = GeneRIF.email"))
-          "WHERE GeneRIF.display > 0 AND GeneRIF.VersionId = 0 AND GeneRIF.comment IS NOT NULL GROUP BY GeneRIF.comment, BINARY GeneRIF.symbol")
-  (schema-triples
-   (gnc:GeneWikiEntry a rdfs:Class)
-   (gnc:GNWikiEntry rdfs:subClassOf gnc:GeneWikiEntry)
-   (gnc:GNWikiEntry rdfs:comment "Represents GeneRIF Entries entered from GeneNetwork")
-   (gnt:geneSymbol rdfs:domain gnc:GNWikiEntry))
-  (triples
-      (string->identifier
-       "symbol"
-       (regexp-substitute/global
-        #f "[^A-Za-z0-9:]"
-        (field GeneRIF symbol)
-        'pre "_" 'post)
-       #:proc (lambda (x) x))
-    (set rdfs:comment
-         (let* ([generif-comment (sanitize-rdf-string (field GeneRIF comment))]
-                [create-time (field GeneRIF createtime EntryCreateTime)]
-                [pmid (field GeneRIF PubMed_ID PMID)]
-                [web-url (field GeneRIF weburl)]
-                [species (string->identifier
-                          ""
-                          (remap-species-identifiers (field Species Fullname))
-                          #:separator ""
-                          #:proc string-capitalize-first)]
-                [categories
-                 (remove (lambda (x)
-                           (or (eq? x #f)
-                               (and (string? x)
-                                    (string-null? x))))
-                         (remove-duplicates
-                          (string-split-substring
-                           (field ("GROUP_CONCAT(DISTINCT GeneCategory.Name SEPARATOR '$$')"
-                                   GeneCategory))
-                           "$$")))])
-           (string->symbol
-            (string-append
-             "[ "
-             (format #f "rdf:type gnc:GNWikiEntry ; ")
-             (if (string? species)
-                 ""
-                 (format #f "gnt:belongsToSpecies ~a ; "
-                         species))
-             (format #f "rdfs:comment ~s^^xsd:string ; "
-                     generif-comment)
-             (if (string? create-time)
-                 ""
-                 (format #f "dct:created ~s^^xsd:datetime ; "
-                         (time-unix->string
-                          create-time "~5")))
-             (if (and (string? pmid) (not (string-null? pmid)))
-                 (format #f
-                         "~{dct:references pubmed:~a ; ~}"
-                         (string-split pmid #\space))
-                 "")
-             (if (and (not (string-null?
-                            (string-trim-both (field GeneRIF email))))
-                      (not (string-null? (field Investigators Email))))
-                 (format #f "dct:creator ~a ; "
-                         (investigator-attributes->id
-                          (field Investigators FirstName)
-                          (field Investigators LastName)
-                          (field Investigators Email)))
-                 "")
-             (if (not (null? categories))
-                 (format #f
-                         "~{gnt:belongsToCategory ~s ; ~}"
-                         categories)
-                 "")
-             (if (and (string? web-url) (not (string-null? web-url)))
-                 (format #f "foaf:homepage ~s ; "
-                         web-url)
-                 "")
-             " ] "))))))
-
-(define-transformer ncbi-genewiki-entries
-  (tables (GeneRIF_BASIC
-           (left-join Species "USING (SpeciesId)"))
-          "WHERE GeneRIF_BASIC.comment IS NOT NULL AND TRIM(GeneRIF_BASIC.comment) != '' AND TRIM(GeneRIF_BASIC.symbol) != '' GROUP BY GeneRIF_BASIC.comment, GeneRIF_BASIC.createtime, GeneRIF_BASIC.VersionId, GeneRIF_BASIC.SpeciesId, GeneRIF_BASIC.TaxID")
-  (schema-triples
-   (gnc:NCBIWikiEntry rdfs:subClassOf gnc:GeneWikiEntry)
-   (gnc:NCBIWikiEntry rdfs:comment "Represents GeneRIF Entries obtained from NCBI")
-   (gnt:hasVersionId a owl:ObjectProperty)
-   (gnt:hasVersionId rdfs:domain gnc:NCBIWikiEntry)
-   (gnt:hasVersionId skos:definition "The VersionId of this this resource"))
-  (triples
-      (string->identifier
-       "symbol"
-       (regexp-substitute/global #f "[^A-Za-z0-9:]"
-                                 (field GeneRIF_BASIC symbol GeneRIFSymbol)
-                                 'pre "_" 'post)
-       #:proc (lambda (x) x))
-    (set rdfs:comment
-         (let ([ncbi-comment (sanitize-rdf-string (field GeneRIF_BASIC comment))]
-               [species-name
-                (string->identifier
-                 ""
-                 (remap-species-identifiers (field Species Fullname SpeciesFullName))
-                 #:separator ""
-                 #:proc string-capitalize-first)]
-               [taxonomic-id (field GeneRIF_BASIC TaxID TaxonomicId)]
-               [create-time (field GeneRIF_BASIC createtime EntryCreateTime)]
-               [pmid (field GeneRIF_BASIC PubMed_ID PMID)]
-               [gene-id (field GeneRIF_BASIC GeneId)]
-               [version-id (field GeneRIF_BASIC VersionId)])
-           (string->symbol
-            (string-append
-             "[ "
-             (format #f "rdf:type gnc:NCBIWikiEntry ; ")
-             (format #f "rdfs:comment ~s^^xsd:string ; "
-                     ncbi-comment)
-             (format #f "gnt:belongsToSpecies ~a ; "
-                     species-name)
-             (if (eq? #f taxonomic-id)
-                 ""
-                 (format #f "skos:notation taxon:~a ; "
-                         taxonomic-id))
-             (format #f "gnt:hasGeneId generif:~a ; "
-                     gene-id)
-             (format #f "gnt:hasVersionId '~a'^^xsd:integer ; "
-                     version-id)
-             (if (and (string? pmid) (not (string-null? pmid)))
-                 (format #f
-                         "~{dct:references pubmed:~a ; ~}"
-                         (string-split pmid #\space))
-                 "")
-             (if (string? create-time)
-                 ""
-                 (format #f "dct:created ~s^^xsd:datetime ; "
-                         (time-unix->string
-                          create-time "~5")))
-             " ]"))))))
-
-
-
-(let* ((option-spec
-        '((settings (single-char #\s) (value #t))
-          (output (single-char #\o) (value #t))
-          (documentation (single-char #\d) (value #t))))
-       (options (getopt-long (command-line) option-spec))
-       (settings (option-ref options 'settings #f))
-       (output (option-ref options 'output #f))
-       (documentation (option-ref options 'documentation #f))
-       (%connection-settings
-        (call-with-input-file settings
-          read)))
-
-  (with-documentation
-   (name "GeneRIF Metadata")
-   (connection %connection-settings)
-   (table-metadata? #f)
-   (prefixes
-    '(("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>")
-      ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>")
-      ("skos:" "<http://www.w3.org/2004/02/skos/core#>")
-      ("xkos:" "<http://rdf-vocabulary.ddialliance.org/xkos#>")
-      ("gn:" "<http://genenetwork.org/id/>")
-      ("gnc:" "<http://genenetwork.org/category/>")
-      ("gnt:" "<http://genenetwork.org/term/>")
-      ("dct:" "<http://purl.org/dc/terms/>")
-      ("foaf:" "<http://xmlns.com/foaf/0.1/>")
-      ("pubmed:" "<http://rdf.ncbi.nlm.nih.gov/pubmed/>")
-      ("taxon:" "<https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=>")
-      ("generif:" "<http://www.ncbi.nlm.nih.gov/gene?cmd=Retrieve&dopt=Graphics&list_uids=>")
-      ("xsd:" "<http://www.w3.org/2001/XMLSchema#>")
-      ("owl:" "<http://www.w3.org/2002/07/owl#>")))
-   (inputs
-    (list
-     genewiki-symbols
-     generif-symbols
-     gn-genewiki-entries
-     ncbi-genewiki-entries))
-   (outputs
-    `(#:documentation ,documentation
-      #:rdf ,output))))