diff options
Diffstat (limited to 'examples/probesets.scm')
| -rwxr-xr-x | examples/probesets.scm | 134 |
1 files changed, 134 insertions, 0 deletions
diff --git a/examples/probesets.scm b/examples/probesets.scm new file mode 100755 index 0000000..11b867a --- /dev/null +++ b/examples/probesets.scm @@ -0,0 +1,134 @@ +#! /usr/bin/env guile +!# + +(use-modules (srfi srfi-1) + (srfi srfi-26) + (ice-9 getopt-long) + (ice-9 match) + (ice-9 regex) + (transform strings) + (transform sql) + (transform triples) + (transform special-forms) + (web uri)) + +(define-transformer probeset->metadata + (tables (ProbeSet + (left-join GeneChip "ON GeneChip.Id = ProbeSet.ChipId")) + "WHERE ProbeSet.Name IS NOT NULL AND TRIM(ProbeSet.Name) != ''") + (triples + (string->identifier "probeset" (field ProbeSet Name)) + (set rdf:type 'gnc:probeset) + (set skos:prefLabel (field ProbeSet Name)) + (multiset skos:altLabel + (map string-trim-both + (string-split (sanitize-rdf-string (field ProbeSet alias)) #\;))) + (set gnt:uses_genechip (string->identifier "platform" (field GeneChip Name) #:separator "_")) + (set gnt:has_target_id (string-trim-both (sanitize-rdf-string (field ProbeSet TargetId)))) + (set gnt:symbol (string-trim-both (field ProbeSet Symbol))) + (set dct:description (sanitize-rdf-string (field ProbeSet description))) + (set gnt:targets_region (string-trim-both (sanitize-rdf-string (field ProbeSet Probe_set_target_region)))) + (set gnt:chr (field ProbeSet Chr)) + (set gnt:mb (annotate-field (field ("IFNULL(ProbeSet.Mb, '')" Mb)) '^^xsd:double)) + (set gnt:mb_mm8 (annotate-field (field ("IFNULL(ProbeSet.Mb_mm8, '')" Mb_mm8)) + '^^xsd:double)) + (set gnt:has_specificity + (field ("IFNULL(ProbeSet.Probe_set_specificity, '')" + Probe_set_specificity))) + (set gnt:has_blat_score + (field ("IFNULL(ProbeSet.Probe_set_BLAT_score, '')" + Probe_set_BLAT_score))) + (set gnt:has_blat_mb_start + (annotate-field (field ("IFNULL(ProbeSet.Probe_set_Blat_Mb_start, '')" + Probe_set_Blat_Mb_start)) + '^^xsd:double)) + (set gnt:has_blat_mb_end + (annotate-field (field ("IFNULL(ProbeSet.Probe_set_Blat_Mb_end, '')" + Probe_set_Blat_Mb_end)) + '^^xsd:double)) + (set gnt:has_blat_seq (sanitize-rdf-string (field ProbeSet BlatSeq))) + (set gnt:has_target_seq (sanitize-rdf-string (field ProbeSet TargetSeq))) + (set gnt:has_homologene_id (ontology 'homologene: + (uri-encode + (field ("IFNULL(ProbeSet.HomoloGeneID, '')" + HomoloGeneID))))) + (set gnt:has_uniprot_id (ontology 'uniprot: + (uri-encode + (field ("IFNULL(ProbeSet.UniProtID, '')" + UniProtID))))) + (set gnt:has_pub_chem_id (ontology + 'pubchem: + (uri-encode + (field ("IFNULL(ProbeSet.PubChem_ID, '')" + PubChem_ID))))) + (set gnt:has_kegg_id (ontology + 'kegg: + (uri-encode + (field ("IFNULL(ProbeSet.KEGG_ID, '')" + KEGG_ID))))) + (set gnt:has_omim_id (ontology + 'omim: + (uri-encode + (let ((omim (field ("IFNULL(ProbeSet.OMIM, '')" + OMIM)))) + (if (number? omim) + omim + (regexp-substitute/global + #f "[^0-9]" + omim + 'pre "" 'post)))))) + (set gnt:has_chebi_id (ontology + 'chebi: + (uri-encode + (field ("IFNULL(ProbeSet.ChEBI_ID, '')" + ChEBI_ID))))))) + + + + +(let* ((option-spec + '((settings (single-char #\s) (value #t)) + (output (single-char #\o) (value #t)) + (documentation (single-char #\d) (value #t)))) + (options (getopt-long (command-line) option-spec)) + (settings (option-ref options 'settings #f)) + (output (option-ref options 'output #f)) + (documentation (option-ref options 'documentation #f)) + (%connection-settings + (call-with-input-file settings + read))) + (call-with-target-database + %connection-settings + (lambda (db) + (with-documentation + (name "ProbeSet Metadata") + (connection %connection-settings) + (table-metadata? #f) + (total-rows (assoc-ref + (sql-find db "SELECT count(*) AS count from ProbeSet") + "count")) + (rows-per-chunk 1000000) + (prefixes + '(("gn:" "<http://genenetwork.org/id/>") + ("probeset:" "<http://genenetwork.org/probeset/>") + ("gnc:" "<http://genenetwork.org/category/>") + ("gnt:" "<http://genenetwork.org/term/>") + ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>") + ("kegg:" "<http://bio2rdf.org/ns/kegg#>") + ("pubchem:" "<https://pubchem.ncbi.nlm.nih.gov/>") + ("omim:" "<https://www.omim.org/entry/>") + ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>") + ("uniprot:" "<http://purl.uniprot.org/uniprot/>") + ("chebi:" "<http://purl.obolibrary.org/obo/CHEBI_>") + ("dct:" "<http://purl.org/dc/terms/>") + ("owl:" "<http://www.w3.org/2002/07/owl#>") + ("homologene:" "<https://bio2rdf.org/homologene:>") + ("xsd:" "<http://www.w3.org/2001/XMLSchema#>") + ("qb:" "<http://purl.org/linked-data/cube#>") + ("sdmx-measure:" "<http://purl.org/linked-data/sdmx/2009/measure#>") + ("skos:" "<http://www.w3.org/2004/02/skos/core#>"))) + (inputs + (list probeset->metadata)) + (outputs + `(#:documentation ,documentation + #:rdf ,output)))))) |
