From 1ca7e679b834ccaf53a3243d0e1c2f3f9e8d56d8 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Tue, 23 Dec 2025 12:06:06 +0300 Subject: Snake case gn/gnt/gnc identifiers. Signed-off-by: Munyoki Kilyungi --- examples/generif-old.scm | 241 ----------------------------------------------- 1 file changed, 241 deletions(-) delete mode 100755 examples/generif-old.scm (limited to 'examples/generif-old.scm') diff --git a/examples/generif-old.scm b/examples/generif-old.scm deleted file mode 100755 index ede5a28..0000000 --- a/examples/generif-old.scm +++ /dev/null @@ -1,241 +0,0 @@ -#! /usr/bin/env guile -!# - -(use-modules (srfi srfi-1) - (srfi srfi-26) - (rnrs bytevectors) - (ice-9 format) - (ice-9 getopt-long) - (ice-9 match) - (ice-9 regex) - (transform strings) - (transform sql) - (transform triples) - (transform special-forms)) - - - -(define (fix-email-id email) - (string-delete #\space email)) - -(define (investigator-attributes->id first-name last-name email) - ;; There is just one record corresponding to "Evan Williams" which - ;; does not have an email ID. To accommodate that record, we - ;; construct the investigator ID from not just the email ID, but - ;; also the first and the last names. It would be preferable to just - ;; find Evan Williams' email ID and insert it into the database. - (string->identifier "investigator" - (string-join - (list first-name last-name (fix-email-id email)) - "_"))) - - - -(define-transformer genewiki-symbols - (tables (GeneRIF_BASIC) - "GROUP BY BINARY symbol") - (triples - (string->identifier - "symbol" - (regexp-substitute/global #f "[^A-Za-z0-9:]" - (field GeneRIF_BASIC symbol) - 'pre "_" 'post) - #:proc (lambda (x) x)) - (set rdfs:label - (field GeneRIF_BASIC symbol)))) - -;; Some symbols exist in the RIF table that don't exist in the GeneRIF -;; table. -(define-transformer generif-symbols - (tables (GeneRIF) - "WHERE symbol NOT IN (SELECT symbol from GeneRIF_BASIC) GROUP BY BINARY symbol") - (triples - (string->identifier - "symbol" - (regexp-substitute/global #f "[^A-Za-z0-9:]" - (field GeneRIF symbol) - 'pre "_" 'post) - #:proc (lambda (x) x)) - (set rdfs:label - (field GeneRIF symbol)))) - -(define-transformer gn-genewiki-entries - (tables (GeneRIF - (left-join Species "ON Species.SpeciesId = GeneRIF.SpeciesId") - (left-join GeneRIFXRef "ON GeneRIFXRef.GeneRIFId = GeneRIF.Id") - (left-join GeneCategory "ON GeneRIFXRef.GeneCategoryId = GeneCategory.Id") - (left-join Investigators "ON Investigators.Email = GeneRIF.email")) - "WHERE GeneRIF.display > 0 AND GeneRIF.VersionId = 0 AND GeneRIF.comment IS NOT NULL GROUP BY GeneRIF.comment, BINARY GeneRIF.symbol") - (schema-triples - (gnc:GeneWikiEntry a rdfs:Class) - (gnc:GNWikiEntry rdfs:subClassOf gnc:GeneWikiEntry) - (gnc:GNWikiEntry rdfs:comment "Represents GeneRIF Entries entered from GeneNetwork") - (gnt:geneSymbol rdfs:domain gnc:GNWikiEntry)) - (triples - (string->identifier - "symbol" - (regexp-substitute/global - #f "[^A-Za-z0-9:]" - (field GeneRIF symbol) - 'pre "_" 'post) - #:proc (lambda (x) x)) - (set rdfs:comment - (let* ([generif-comment (sanitize-rdf-string (field GeneRIF comment))] - [create-time (field GeneRIF createtime EntryCreateTime)] - [pmid (field GeneRIF PubMed_ID PMID)] - [web-url (field GeneRIF weburl)] - [species (string->identifier - "" - (remap-species-identifiers (field Species Fullname)) - #:separator "" - #:proc string-capitalize-first)] - [categories - (remove (lambda (x) - (or (eq? x #f) - (and (string? x) - (string-null? x)))) - (remove-duplicates - (string-split-substring - (field ("GROUP_CONCAT(DISTINCT GeneCategory.Name SEPARATOR '$$')" - GeneCategory)) - "$$")))]) - (string->symbol - (string-append - "[ " - (format #f "rdf:type gnc:GNWikiEntry ; ") - (if (string? species) - "" - (format #f "gnt:belongsToSpecies ~a ; " - species)) - (format #f "rdfs:comment ~s^^xsd:string ; " - generif-comment) - (if (string? create-time) - "" - (format #f "dct:created ~s^^xsd:datetime ; " - (time-unix->string - create-time "~5"))) - (if (and (string? pmid) (not (string-null? pmid))) - (format #f - "~{dct:references pubmed:~a ; ~}" - (string-split pmid #\space)) - "") - (if (and (not (string-null? - (string-trim-both (field GeneRIF email)))) - (not (string-null? (field Investigators Email)))) - (format #f "dct:creator ~a ; " - (investigator-attributes->id - (field Investigators FirstName) - (field Investigators LastName) - (field Investigators Email))) - "") - (if (not (null? categories)) - (format #f - "~{gnt:belongsToCategory ~s ; ~}" - categories) - "") - (if (and (string? web-url) (not (string-null? web-url))) - (format #f "foaf:homepage ~s ; " - web-url) - "") - " ] ")))))) - -(define-transformer ncbi-genewiki-entries - (tables (GeneRIF_BASIC - (left-join Species "USING (SpeciesId)")) - "WHERE GeneRIF_BASIC.comment IS NOT NULL AND TRIM(GeneRIF_BASIC.comment) != '' AND TRIM(GeneRIF_BASIC.symbol) != '' GROUP BY GeneRIF_BASIC.comment, GeneRIF_BASIC.createtime, GeneRIF_BASIC.VersionId, GeneRIF_BASIC.SpeciesId, GeneRIF_BASIC.TaxID") - (schema-triples - (gnc:NCBIWikiEntry rdfs:subClassOf gnc:GeneWikiEntry) - (gnc:NCBIWikiEntry rdfs:comment "Represents GeneRIF Entries obtained from NCBI") - (gnt:hasVersionId a owl:ObjectProperty) - (gnt:hasVersionId rdfs:domain gnc:NCBIWikiEntry) - (gnt:hasVersionId skos:definition "The VersionId of this this resource")) - (triples - (string->identifier - "symbol" - (regexp-substitute/global #f "[^A-Za-z0-9:]" - (field GeneRIF_BASIC symbol GeneRIFSymbol) - 'pre "_" 'post) - #:proc (lambda (x) x)) - (set rdfs:comment - (let ([ncbi-comment (sanitize-rdf-string (field GeneRIF_BASIC comment))] - [species-name - (string->identifier - "" - (remap-species-identifiers (field Species Fullname SpeciesFullName)) - #:separator "" - #:proc string-capitalize-first)] - [taxonomic-id (field GeneRIF_BASIC TaxID TaxonomicId)] - [create-time (field GeneRIF_BASIC createtime EntryCreateTime)] - [pmid (field GeneRIF_BASIC PubMed_ID PMID)] - [gene-id (field GeneRIF_BASIC GeneId)] - [version-id (field GeneRIF_BASIC VersionId)]) - (string->symbol - (string-append - "[ " - (format #f "rdf:type gnc:NCBIWikiEntry ; ") - (format #f "rdfs:comment ~s^^xsd:string ; " - ncbi-comment) - (format #f "gnt:belongsToSpecies ~a ; " - species-name) - (if (eq? #f taxonomic-id) - "" - (format #f "skos:notation taxon:~a ; " - taxonomic-id)) - (format #f "gnt:hasGeneId generif:~a ; " - gene-id) - (format #f "gnt:hasVersionId '~a'^^xsd:integer ; " - version-id) - (if (and (string? pmid) (not (string-null? pmid))) - (format #f - "~{dct:references pubmed:~a ; ~}" - (string-split pmid #\space)) - "") - (if (string? create-time) - "" - (format #f "dct:created ~s^^xsd:datetime ; " - (time-unix->string - create-time "~5"))) - " ]")))))) - - - -(let* ((option-spec - '((settings (single-char #\s) (value #t)) - (output (single-char #\o) (value #t)) - (documentation (single-char #\d) (value #t)))) - (options (getopt-long (command-line) option-spec)) - (settings (option-ref options 'settings #f)) - (output (option-ref options 'output #f)) - (documentation (option-ref options 'documentation #f)) - (%connection-settings - (call-with-input-file settings - read))) - - (with-documentation - (name "GeneRIF Metadata") - (connection %connection-settings) - (table-metadata? #f) - (prefixes - '(("rdf:" "") - ("rdfs:" "") - ("skos:" "") - ("xkos:" "") - ("gn:" "") - ("gnc:" "") - ("gnt:" "") - ("dct:" "") - ("foaf:" "") - ("pubmed:" "") - ("taxon:" "") - ("generif:" "") - ("xsd:" "") - ("owl:" ""))) - (inputs - (list - genewiki-symbols - generif-symbols - gn-genewiki-entries - ncbi-genewiki-entries)) - (outputs - `(#:documentation ,documentation - #:rdf ,output)))) -- cgit 1.4.1