diff options
| author | Munyoki Kilyungi | 2025-12-23 12:06:06 +0300 |
|---|---|---|
| committer | Munyoki Kilyungi | 2026-01-13 12:02:49 +0300 |
| commit | 1ca7e679b834ccaf53a3243d0e1c2f3f9e8d56d8 (patch) | |
| tree | 514c544706986f3edd0b3f53a89113e334a0b9a3 /examples/generif-old.scm | |
| parent | c42933e8f474d8d14eac387d5a94da6f52210629 (diff) | |
| download | gn-transform-databases-1ca7e679b834ccaf53a3243d0e1c2f3f9e8d56d8.tar.gz | |
Snake case gn/gnt/gnc identifiers.
Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
Diffstat (limited to 'examples/generif-old.scm')
| -rwxr-xr-x | examples/generif-old.scm | 241 |
1 files changed, 0 insertions, 241 deletions
diff --git a/examples/generif-old.scm b/examples/generif-old.scm deleted file mode 100755 index ede5a28..0000000 --- a/examples/generif-old.scm +++ /dev/null @@ -1,241 +0,0 @@ -#! /usr/bin/env guile -!# - -(use-modules (srfi srfi-1) - (srfi srfi-26) - (rnrs bytevectors) - (ice-9 format) - (ice-9 getopt-long) - (ice-9 match) - (ice-9 regex) - (transform strings) - (transform sql) - (transform triples) - (transform special-forms)) - - - -(define (fix-email-id email) - (string-delete #\space email)) - -(define (investigator-attributes->id first-name last-name email) - ;; There is just one record corresponding to "Evan Williams" which - ;; does not have an email ID. To accommodate that record, we - ;; construct the investigator ID from not just the email ID, but - ;; also the first and the last names. It would be preferable to just - ;; find Evan Williams' email ID and insert it into the database. - (string->identifier "investigator" - (string-join - (list first-name last-name (fix-email-id email)) - "_"))) - - - -(define-transformer genewiki-symbols - (tables (GeneRIF_BASIC) - "GROUP BY BINARY symbol") - (triples - (string->identifier - "symbol" - (regexp-substitute/global #f "[^A-Za-z0-9:]" - (field GeneRIF_BASIC symbol) - 'pre "_" 'post) - #:proc (lambda (x) x)) - (set rdfs:label - (field GeneRIF_BASIC symbol)))) - -;; Some symbols exist in the RIF table that don't exist in the GeneRIF -;; table. -(define-transformer generif-symbols - (tables (GeneRIF) - "WHERE symbol NOT IN (SELECT symbol from GeneRIF_BASIC) GROUP BY BINARY symbol") - (triples - (string->identifier - "symbol" - (regexp-substitute/global #f "[^A-Za-z0-9:]" - (field GeneRIF symbol) - 'pre "_" 'post) - #:proc (lambda (x) x)) - (set rdfs:label - (field GeneRIF symbol)))) - -(define-transformer gn-genewiki-entries - (tables (GeneRIF - (left-join Species "ON Species.SpeciesId = GeneRIF.SpeciesId") - (left-join GeneRIFXRef "ON GeneRIFXRef.GeneRIFId = GeneRIF.Id") - (left-join GeneCategory "ON GeneRIFXRef.GeneCategoryId = GeneCategory.Id") - (left-join Investigators "ON Investigators.Email = GeneRIF.email")) - "WHERE GeneRIF.display > 0 AND GeneRIF.VersionId = 0 AND GeneRIF.comment IS NOT NULL GROUP BY GeneRIF.comment, BINARY GeneRIF.symbol") - (schema-triples - (gnc:GeneWikiEntry a rdfs:Class) - (gnc:GNWikiEntry rdfs:subClassOf gnc:GeneWikiEntry) - (gnc:GNWikiEntry rdfs:comment "Represents GeneRIF Entries entered from GeneNetwork") - (gnt:geneSymbol rdfs:domain gnc:GNWikiEntry)) - (triples - (string->identifier - "symbol" - (regexp-substitute/global - #f "[^A-Za-z0-9:]" - (field GeneRIF symbol) - 'pre "_" 'post) - #:proc (lambda (x) x)) - (set rdfs:comment - (let* ([generif-comment (sanitize-rdf-string (field GeneRIF comment))] - [create-time (field GeneRIF createtime EntryCreateTime)] - [pmid (field GeneRIF PubMed_ID PMID)] - [web-url (field GeneRIF weburl)] - [species (string->identifier - "" - (remap-species-identifiers (field Species Fullname)) - #:separator "" - #:proc string-capitalize-first)] - [categories - (remove (lambda (x) - (or (eq? x #f) - (and (string? x) - (string-null? x)))) - (remove-duplicates - (string-split-substring - (field ("GROUP_CONCAT(DISTINCT GeneCategory.Name SEPARATOR '$$')" - GeneCategory)) - "$$")))]) - (string->symbol - (string-append - "[ " - (format #f "rdf:type gnc:GNWikiEntry ; ") - (if (string? species) - "" - (format #f "gnt:belongsToSpecies ~a ; " - species)) - (format #f "rdfs:comment ~s^^xsd:string ; " - generif-comment) - (if (string? create-time) - "" - (format #f "dct:created ~s^^xsd:datetime ; " - (time-unix->string - create-time "~5"))) - (if (and (string? pmid) (not (string-null? pmid))) - (format #f - "~{dct:references pubmed:~a ; ~}" - (string-split pmid #\space)) - "") - (if (and (not (string-null? - (string-trim-both (field GeneRIF email)))) - (not (string-null? (field Investigators Email)))) - (format #f "dct:creator ~a ; " - (investigator-attributes->id - (field Investigators FirstName) - (field Investigators LastName) - (field Investigators Email))) - "") - (if (not (null? categories)) - (format #f - "~{gnt:belongsToCategory ~s ; ~}" - categories) - "") - (if (and (string? web-url) (not (string-null? web-url))) - (format #f "foaf:homepage ~s ; " - web-url) - "") - " ] ")))))) - -(define-transformer ncbi-genewiki-entries - (tables (GeneRIF_BASIC - (left-join Species "USING (SpeciesId)")) - "WHERE GeneRIF_BASIC.comment IS NOT NULL AND TRIM(GeneRIF_BASIC.comment) != '' AND TRIM(GeneRIF_BASIC.symbol) != '' GROUP BY GeneRIF_BASIC.comment, GeneRIF_BASIC.createtime, GeneRIF_BASIC.VersionId, GeneRIF_BASIC.SpeciesId, GeneRIF_BASIC.TaxID") - (schema-triples - (gnc:NCBIWikiEntry rdfs:subClassOf gnc:GeneWikiEntry) - (gnc:NCBIWikiEntry rdfs:comment "Represents GeneRIF Entries obtained from NCBI") - (gnt:hasVersionId a owl:ObjectProperty) - (gnt:hasVersionId rdfs:domain gnc:NCBIWikiEntry) - (gnt:hasVersionId skos:definition "The VersionId of this this resource")) - (triples - (string->identifier - "symbol" - (regexp-substitute/global #f "[^A-Za-z0-9:]" - (field GeneRIF_BASIC symbol GeneRIFSymbol) - 'pre "_" 'post) - #:proc (lambda (x) x)) - (set rdfs:comment - (let ([ncbi-comment (sanitize-rdf-string (field GeneRIF_BASIC comment))] - [species-name - (string->identifier - "" - (remap-species-identifiers (field Species Fullname SpeciesFullName)) - #:separator "" - #:proc string-capitalize-first)] - [taxonomic-id (field GeneRIF_BASIC TaxID TaxonomicId)] - [create-time (field GeneRIF_BASIC createtime EntryCreateTime)] - [pmid (field GeneRIF_BASIC PubMed_ID PMID)] - [gene-id (field GeneRIF_BASIC GeneId)] - [version-id (field GeneRIF_BASIC VersionId)]) - (string->symbol - (string-append - "[ " - (format #f "rdf:type gnc:NCBIWikiEntry ; ") - (format #f "rdfs:comment ~s^^xsd:string ; " - ncbi-comment) - (format #f "gnt:belongsToSpecies ~a ; " - species-name) - (if (eq? #f taxonomic-id) - "" - (format #f "skos:notation taxon:~a ; " - taxonomic-id)) - (format #f "gnt:hasGeneId generif:~a ; " - gene-id) - (format #f "gnt:hasVersionId '~a'^^xsd:integer ; " - version-id) - (if (and (string? pmid) (not (string-null? pmid))) - (format #f - "~{dct:references pubmed:~a ; ~}" - (string-split pmid #\space)) - "") - (if (string? create-time) - "" - (format #f "dct:created ~s^^xsd:datetime ; " - (time-unix->string - create-time "~5"))) - " ]")))))) - - - -(let* ((option-spec - '((settings (single-char #\s) (value #t)) - (output (single-char #\o) (value #t)) - (documentation (single-char #\d) (value #t)))) - (options (getopt-long (command-line) option-spec)) - (settings (option-ref options 'settings #f)) - (output (option-ref options 'output #f)) - (documentation (option-ref options 'documentation #f)) - (%connection-settings - (call-with-input-file settings - read))) - - (with-documentation - (name "GeneRIF Metadata") - (connection %connection-settings) - (table-metadata? #f) - (prefixes - '(("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>") - ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>") - ("skos:" "<http://www.w3.org/2004/02/skos/core#>") - ("xkos:" "<http://rdf-vocabulary.ddialliance.org/xkos#>") - ("gn:" "<http://genenetwork.org/id/>") - ("gnc:" "<http://genenetwork.org/category/>") - ("gnt:" "<http://genenetwork.org/term/>") - ("dct:" "<http://purl.org/dc/terms/>") - ("foaf:" "<http://xmlns.com/foaf/0.1/>") - ("pubmed:" "<http://rdf.ncbi.nlm.nih.gov/pubmed/>") - ("taxon:" "<https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=>") - ("generif:" "<http://www.ncbi.nlm.nih.gov/gene?cmd=Retrieve&dopt=Graphics&list_uids=>") - ("xsd:" "<http://www.w3.org/2001/XMLSchema#>") - ("owl:" "<http://www.w3.org/2002/07/owl#>"))) - (inputs - (list - genewiki-symbols - generif-symbols - gn-genewiki-entries - ncbi-genewiki-entries)) - (outputs - `(#:documentation ,documentation - #:rdf ,output)))) |
