#! /usr/bin/env guile !# (use-modules (rnrs programs) (rnrs io ports) (srfi srfi-1) (srfi srfi-26) (ice-9 getopt-long) (ice-9 match) (ice-9 regex) (transform strings) (transform sql) (transform triples) (transform special-forms)) (define-transformer gn:dataset->metadata (tables (Datasets (inner-join InfoFiles "ON InfoFiles.DatasetId = Datasets.DatasetId") (inner-join InbredSet "ON InbredSet.Id = InfoFiles.InbredSetId")) ;; Skip monkey datasets "WHERE InfoFiles.InfoPageName NOT LIKE 'INIA_MacFas_%'" "GROUP BY Datasets.DatasetId") (schema-triples (gnt:has_case_info a owl:ObjectProperty) (gnt:has_case_info rdfs:comment "Information about the cases used in this platform") (gnt:has_case_info rdfs:domain dcat:Dataset) (gnt:has_case_info rdfs:label "About Case") (gnt:has_citation a owl:ObjectProperty) (gnt:has_citation rdfs:comment "Citation for this dataset") (gnt:has_citation rdfs:domain dcat:Dataset) (gnt:has_citation rdfs:label "Citation") (gnt:has_contributors a owl:ObjectProperty) (gnt:has_contributors rdfs:comment "Contributors of this resource") (gnt:has_contributors rdfs:comment "Contributors of this resource") (gnt:has_contributors rdfs:domain dcat:Dataset) (gnt:has_contributors rdfs:label "Contributors") (gnt:has_data_processing_info a owl:ObjectProperty) (gnt:has_data_processing_info rdfs:comment "Information about how this dataset was processed") (gnt:has_data_processing_info rdfs:domain dcat:Dataset) (gnt:has_data_processing_info rdfs:label "About Data Processing") (gnt:has_experiment_design a owl:ObjectProperty) (gnt:has_experiment_design rdfs:comment "Experiment Design for this resource") (gnt:has_experiment_design rdfs:domain dcat:Dataset) (gnt:has_experiment_design rdfs:label "Experiment Design") (gnt:has_experiment_design_info a owl:ObjectProperty) (gnt:has_experiment_design_info rdfs:comment "Information about how the experiment was designed") (gnt:has_experiment_design_info rdfs:domain dcat:Dataset) (gnt:has_experiment_design_info rdfs:label "Experiment Design") (gnt:has_experiment_type a owl:ObjectProperty) (gnt:has_experiment_type rdfs:comment "Information about the experiment type") (gnt:has_experiment_type rdfs:domain dcat:Dataset) (gnt:has_experiment_type rdfs:label "Experiment Type Metadata") (gnt:has_platform_info a owl:ObjectProperty) (gnt:has_platform_info rdfs:comment "Information about the platform that was used with this dataset") (gnt:has_platform_info rdfs:domain dcat:Dataset) (gnt:has_platform_info rdfs:label "About Platform") (gnt:has_samples a owl:ObjectProperty) (gnt:has_samples rdfs:domain dcat:Dataset) (gnt:has_samples rdfs:label "Samples") (gnt:has_specifics a owl:ObjectProperty) (gnt:has_specifics rdfs:comment "Has specifics") (gnt:has_specifics rdfs:domain dcat:Dataset) (gnt:has_specifics rdfs:label "Specifics") (gnt:has_summary a owl:ObjectProperty) (gnt:has_summary rdfs:comment "Summary information about dataset") (gnt:has_summary rdfs:domain dcat:Dataset) (gnt:has_summary rdfs:label "Summary") (gnt:has_tissue_info a owl:ObjectProperty) (gnt:has_tissue_info rdfs:domain dcat:Dataset) (gnt:has_tissue_info rdfs:label "Metadata about Tissue for this resource")) (triples (string->identifier "dataset" (field InfoFiles InfoPageName) #:separator "_") (set rdf:type 'dcat:Dataset) (set dct:title (normalize-string-field (field InfoFiles InfoPageName))) (set dct:identifier (format #f "GN~a" (field InfoFiles GN_AccesionId))) (set gnt:has_strain (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_")) (set gnt:has_experiment_type (let ((experiment-type (field InfoFiles Experiment_Type))) (if (or (null? experiment-type) (string-blank? experiment-type)) "" (sanitize-rdf-string experiment-type)))) (set gnt:has_tissue_info (let ((tissue-info (field Datasets AboutTissue))) (if (or (null? tissue-info) (string-blank? tissue-info)) "" (sanitize-rdf-string tissue-info)))) (set gnt:has_summary (let* ((summary (field Datasets Summary))) (if (or (null? summary) (string-blank? summary)) "" (sanitize-rdf-string summary)))) (set gnt:has_citation (let ((citation (field Datasets Citation))) (if (or (null? citation) (string-blank? citation)) "" (sanitize-rdf-string citation)))) (set gnt:has_samples (let ((samples (field InfoFiles samples))) (if (or (null? samples) (string-blank? samples)) "" (sanitize-rdf-string samples)))) (set gnt:has_specifics (let* ((specifics (field InfoFiles Specifics))) (if (or (null? specifics) (string-blank? specifics)) "" (sanitize-rdf-string specifics)))) (set gnt:has_case_info (let ((cases (field Datasets AboutCases))) (if (or (null? cases) (string-blank? cases)) "" (sanitize-rdf-string cases)))) (set gnt:has_platform_info (let* ((platform (field Datasets AboutPlatform))) (if (or (null? platform) (string-blank? platform)) "" (sanitize-rdf-string platform)))) (set gnt:has_data_processing_info (let* ((processing (field Datasets AboutDataProcessing))) (if (or (null? processing) (string-blank? processing)) "" (sanitize-rdf-string processing)))) (set gnt:has_experiment_design (let ((experiment-design (field Datasets ExperimentDesign))) (if (or (null? experiment-design) (string-blank? experiment-design)) "" (sanitize-rdf-string experiment-design)))) (set gnt:has_contributors (let ((contributors (field Datasets Contributors))) (if (or (null? contributors) (string-blank? contributors)) "" (sanitize-rdf-string contributors)))))) (let* ((option-spec '((settings (single-char #\s) (value #t)) (output (single-char #\o) (value #t)) (documentation (single-char #\d) (value #t)))) (options (getopt-long (command-line) option-spec)) (settings (option-ref options 'settings #f)) (output (option-ref options 'output #f)) (documentation (option-ref options 'documentation #f)) (%connection-settings (call-with-input-file settings read))) (with-documentation (name "Datasets Metadata") (connection %connection-settings) (table-metadata? #f) (prefixes '(("dct:" "") ("dcat:" "") ("gn:" "") ("gnc:" "") ("gnt:" "") ("rdf:" "") ("rdfs:" "") ("owl:" "") ("skos:" "") ("xkos:" "") ("xsd:" ""))) (inputs (list gn:dataset->metadata)) (outputs `(#:documentation ,documentation #:rdf ,output))))