#! /usr/bin/env guile !# (use-modules (rnrs programs) (rnrs io ports) (srfi srfi-1) (srfi srfi-26) (ice-9 getopt-long) (ice-9 match) (ice-9 regex) (transform strings) (transform sql) (transform triples) (transform special-forms)) (define-transformer gn:dataset->metadata (tables (Datasets (inner-join InfoFiles "ON InfoFiles.DatasetId = Datasets.DatasetId") (inner-join InbredSet "ON InbredSet.Id = InfoFiles.InbredSetId")) ;; Skip monkey datasets "WHERE InfoFiles.InfoPageName NOT LIKE 'INIA_MacFas_%'" "GROUP BY Datasets.DatasetId") (triples (string->identifier "dataset" (field InfoFiles InfoPageName) #:separator "_") (set rdf:type 'dcat:Dataset) (set dct:title (normalize-string-field (field InfoFiles InfoPageName))) (set dct:identifier (format #f "GN~a" (field InfoFiles GN_AccesionId))) (set gnt:has_strain (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_")) (set gnt:has_experiment_type (let ((experiment-type (field InfoFiles Experiment_Type))) (if (or (null? experiment-type) (string-blank? experiment-type)) "" (sanitize-rdf-string experiment-type)))) (set gnt:has_tissue_info (let ((tissue-info (field Datasets AboutTissue))) (if (or (null? tissue-info) (string-blank? tissue-info)) "" (sanitize-rdf-string tissue-info)))) (set gnt:has_summary (let* ((summary (field Datasets Summary))) (if (or (null? summary) (string-blank? summary)) "" (sanitize-rdf-string summary)))) (set gnt:has_citation (let ((citation (field Datasets Citation))) (if (or (null? citation) (string-blank? citation)) "" (sanitize-rdf-string citation)))) (set gnt:has_samples (let ((samples (field InfoFiles samples))) (if (or (null? samples) (string-blank? samples)) "" (sanitize-rdf-string samples)))) (set gnt:has_specifics (let* ((specifics (field InfoFiles Specifics))) (if (or (null? specifics) (string-blank? specifics)) "" (sanitize-rdf-string specifics)))) (set gnt:has_case_info (let ((cases (field Datasets AboutCases))) (if (or (null? cases) (string-blank? cases)) "" (sanitize-rdf-string cases)))) (set gnt:has_platform_info (let* ((platform (field Datasets AboutPlatform))) (if (or (null? platform) (string-blank? platform)) "" (sanitize-rdf-string platform)))) (set gnt:has_data_processing_info (let* ((processing (field Datasets AboutDataProcessing))) (if (or (null? processing) (string-blank? processing)) "" (sanitize-rdf-string processing)))) (set gnt:has_experiment_design (let ((experiment-design (field Datasets ExperimentDesign))) (if (or (null? experiment-design) (string-blank? experiment-design)) "" (sanitize-rdf-string experiment-design)))) (set gnt:has_contributors (let ((contributors (field Datasets Contributors))) (if (or (null? contributors) (string-blank? contributors)) "" (sanitize-rdf-string contributors)))))) (let* ((option-spec '((settings (single-char #\s) (value #t)) (output (single-char #\o) (value #t)) (documentation (single-char #\d) (value #t)))) (options (getopt-long (command-line) option-spec)) (settings (option-ref options 'settings #f)) (output (option-ref options 'output #f)) (documentation (option-ref options 'documentation #f)) (%connection-settings (call-with-input-file settings read))) (with-documentation (name "Datasets Metadata") (connection %connection-settings) (table-metadata? #f) (prefixes '(("dct:" "") ("dcat:" "") ("gn:" "") ("gnc:" "") ("gnt:" "") ("rdf:" "") ("rdfs:" "") ("owl:" "") ("skos:" "") ("xkos:" "") ("xsd:" ""))) (inputs (list gn:dataset->metadata)) (outputs `(#:documentation ,documentation #:rdf ,output))))