diff options
Diffstat (limited to 'examples/datasets.scm')
| -rwxr-xr-x | examples/datasets.scm | 120 |
1 files changed, 120 insertions, 0 deletions
diff --git a/examples/datasets.scm b/examples/datasets.scm new file mode 100755 index 0000000..85a5aee --- /dev/null +++ b/examples/datasets.scm @@ -0,0 +1,120 @@ +#! /usr/bin/env guile +!# + +(use-modules (rnrs programs) + (rnrs io ports) + (srfi srfi-1) + (srfi srfi-26) + (ice-9 getopt-long) + (ice-9 match) + (ice-9 regex) + (transform strings) + (transform sql) + (transform triples) + (transform special-forms)) + + +(define-transformer gn:dataset->metadata + (tables (Datasets + (inner-join InfoFiles "ON InfoFiles.DatasetId = Datasets.DatasetId") + (inner-join InbredSet "ON InbredSet.Id = InfoFiles.InbredSetId")) + ;; Skip monkey datasets + "WHERE InfoFiles.InfoPageName NOT LIKE 'INIA_MacFas_%'" + "GROUP BY Datasets.DatasetId") + (triples (string->identifier "dataset" (field InfoFiles InfoPageName) #:separator "_") + (set rdf:type 'dcat:Dataset) + (set dct:title (normalize-string-field (field InfoFiles InfoPageName))) + (set dct:identifier (format #f "GN~a" (field InfoFiles GN_AccesionId))) + (set gnt:has_genotype_files (string->symbol (format #f "gn-files:GN~a%2F" (field InfoFiles GN_AccesionId)))) + (set gnt:has_strain + (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_")) + (set gnt:has_experiment_type + (let ((experiment-type + (field InfoFiles Experiment_Type))) + (if (or (null? experiment-type) (string-blank? experiment-type)) + "" (sanitize-rdf-string experiment-type)))) + (set gnt:has_tissue_info + (let ((tissue-info + (field Datasets AboutTissue))) + (if (or (null? tissue-info) (string-blank? tissue-info)) + "" (sanitize-rdf-string tissue-info)))) + (set gnt:has_summary + (let* ((summary + (field Datasets Summary))) + (if (or (null? summary) (string-blank? summary)) + "" (sanitize-rdf-string summary)))) + (set gnt:has_citation + (let ((citation + (field Datasets Citation))) + (if (or (null? citation) (string-blank? citation)) + "" (sanitize-rdf-string citation)))) + (set gnt:has_samples + (let ((samples + (field InfoFiles samples))) + (if (or (null? samples) (string-blank? samples)) + "" (sanitize-rdf-string samples)))) + (set gnt:has_specifics + (let* ((specifics + (field InfoFiles Specifics))) + (if (or (null? specifics) (string-blank? specifics)) + "" (sanitize-rdf-string specifics)))) + (set gnt:has_case_info + (let ((cases + (field Datasets AboutCases))) + (if (or (null? cases) (string-blank? cases)) + "" (sanitize-rdf-string cases)))) + (set gnt:has_platform_info + (let* ((platform + (field Datasets AboutPlatform))) + (if (or (null? platform) (string-blank? platform)) + "" (sanitize-rdf-string platform)))) + (set gnt:has_data_processing_info + (let* ((processing + (field Datasets AboutDataProcessing))) + (if (or (null? processing) (string-blank? processing)) + "" (sanitize-rdf-string processing)))) + (set gnt:has_experiment_design + (let ((experiment-design + (field Datasets ExperimentDesign))) + (if (or (null? experiment-design) (string-blank? experiment-design)) + "" (sanitize-rdf-string experiment-design)))) + (set gnt:has_contributors + (let ((contributors + (field Datasets Contributors))) + (if (or (null? contributors) (string-blank? contributors)) + "" (sanitize-rdf-string contributors)))))) + + +(let* ((option-spec + '((settings (single-char #\s) (value #t)) + (output (single-char #\o) (value #t)) + (documentation (single-char #\d) (value #t)))) + (options (getopt-long (command-line) option-spec)) + (settings (option-ref options 'settings #f)) + (output (option-ref options 'output #f)) + (documentation (option-ref options 'documentation #f)) + (%connection-settings + (call-with-input-file settings + read))) + (with-documentation + (name "Datasets Metadata") + (connection %connection-settings) + (table-metadata? #f) + (prefixes + '(("dct:" "<http://purl.org/dc/terms/>") + ("dcat:" "<http://www.w3.org/ns/dcat#>") + ("gn:" "<http://rdf.genenetwork.org/v1/id/>") + ("gnc:" "<http://rdf.genenetwork.org/v1/category/>") + ("gnt:" "<http://rdf.genenetwork.org/v1/term/>") + ("gn-files:" "<http://files.genenetwork.org/current/>") + ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>") + ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>") + ("owl:" "<http://www.w3.org/2002/07/owl#>") + ("skos:" "<http://www.w3.org/2004/02/skos/core#>") + ("xkos:" "<http://rdf-vocabulary.ddialliance.org/xkos#>") + ("xsd:" "<http://www.w3.org/2001/XMLSchema#>"))) + (inputs + (list gn:dataset->metadata)) + (outputs + `(#:documentation ,documentation + #:rdf ,output)))) |
