about summary refs log tree commit diff
path: root/examples/genelist.scm
diff options
context:
space:
mode:
Diffstat (limited to 'examples/genelist.scm')
-rwxr-xr-xexamples/genelist.scm36
1 files changed, 14 insertions, 22 deletions
diff --git a/examples/genelist.scm b/examples/genelist.scm
index 18fd30b..dbca921 100755
--- a/examples/genelist.scm
+++ b/examples/genelist.scm
@@ -77,12 +77,9 @@
    (gnt:has_target_seq rdfs:domain gnc:probeset))
   (triples
       (string->identifier
-       "gene" (regexp-substitute/global
-               #f "[^A-Za-z0-9:]"
-               (string-trim-both
-                (field ("CONCAT_WS('_', GeneSymbol, GeneID, AlignID)" GENE_UID)))
-               'pre "_" 'post)
-       #:proc (lambda (x) x))
+       "gene" (normalize-string-field (string-trim-both
+                                       (field ("CONCAT_WS('_', GeneSymbol, GeneID, AlignID)" GENE_UID))))
+       #:separator "_")
     (set rdf:type 'gnc:gene)
     (set gnt:gene_symbol (field GeneList GeneSymbol))
     (set dct:description (sanitize-rdf-string (field GeneList GeneDescription)))
@@ -225,20 +222,14 @@
                "")))
     (set gnt:chromosome (field GeneList Chromosome))
     (set gnt:tx_start (annotate-field
-                      (field GeneList tx_start)
+                      (field GeneList TxStart)
                       '^^xsd:double))
     (set gnt:tx_end (annotate-field
-                    (field GeneList tx_end)
+                    (field GeneList TxEnd)
                     '^^xsd:double))
     (set gnt:strand (string-trim-both (field GeneList Strand)))
     (set
-     gnt:belongs_to_species
-     (string->identifier
-      ""
-      (remap-species-identifiers
-       (string-trim-both (field Species Name)))
-      #:separator ""
-      #:proc string-capitalize-first))
+     gnt:belongs_to_species (string->identifier "" (remap-species-identifiers (field Species Fullname))))
     (set
      gnt:transcript
      (ontology 'transcript:
@@ -259,10 +250,11 @@
          (if (number? gene-uid)
              (number->string
               gene-uid)
-             gene-uid)))
+             gene-uid)
+         #:separator "_"))
     (set rdf:type 'gnc:gene)
     (set gnt:belongs_to_species 'gn:Rattus_norvegicus)
-    (set gnt:gene_symbol (string-trim-both (field GeneList_rn33 gene_symbol)))
+    (set gnt:gene_symbol (string-trim-both (field GeneList_rn33 geneSymbol)))
     (set gnt:chromosome (field GeneList_rn33 chromosome))
     (set gnt:tx_start (annotate-field
                       (field GeneList_rn33 txStart)
@@ -280,7 +272,7 @@
      gnc:has_kg_id
      (string-trim-both (field GeneList_rn33 kgID)))
     (set dct:references
-         (let ((symbol (field GeneList_rn33 gene_symbol)))
+         (let ((symbol (field GeneList_rn33 geneSymbol)))
            (if (not (string-blank? symbol))
                (string->symbol
                 (format #f
@@ -290,7 +282,7 @@
                         "a gnc:PantherLink"))
                "")))
     (set dct:references
-         (let ((symbol (string-trim-both (field GeneList_rn33 gene_symbol))))
+         (let ((symbol (string-trim-both (field GeneList_rn33 geneSymbol))))
            (if (not (string-blank? symbol))
                (string->symbol
                 (format #f
@@ -300,7 +292,7 @@
                         "a gnc:ebi_gwas_link"))
                "")))
     (set dct:references
-         (let ((symbol (string-trim-both (field GeneList_rn33 gene_symbol))))
+         (let ((symbol (string-trim-both (field GeneList_rn33 geneSymbol))))
            (if (not (string-blank? symbol))
                (string->symbol
                 (format #f
@@ -311,7 +303,7 @@
                         "a gnc:panther_link"))
                "")))
     (set dct:references
-         (let ((symbol (string-trim-both (field GeneList_rn33 gene_symbol))))
+         (let ((symbol (string-trim-both (field GeneList_rn33 geneSymbol))))
            (if (not (string-blank? symbol))
                (string->symbol
                 (format #f
@@ -322,7 +314,7 @@
                         "a gnc:gtex_link"))
                "")))
     (set dct:references
-         (let ((symbol (string-trim-both (field GeneList_rn33 gene_symbol))))
+         (let ((symbol (string-trim-both (field GeneList_rn33 geneSymbol))))
            (if (not (string-blank? symbol))
                (string->symbol
                 (format #f