#! /usr/bin/env guile !# (use-modules (srfi srfi-1) (srfi srfi-26) (ice-9 getopt-long) (ice-9 match) (ice-9 regex) (transform strings) (transform sql) (transform triples) (transform special-forms)) (define-transformer gn:molecular-traits->gn:datasets (tables (Tissue)) (schema-triples (gnc:molecular_trait a owl:Class) (gnc:molecular_trait a skos:Concept) (gnc:molecular_trait rdfs:subClassOf obo:UBERON_0000479) (gnc:molecular_trait rdfs:label "Molecular Trait. This describes a melecular trait of a given species. We combine the species name and the tissue name in order to differentiate the traits across different inbredset groups.")) (triples (string->identifier "trait" (field Tissue Short_Name) #:separator "_") (set rdf:type 'gnc:molecular_trait) (set skos:prefLabel (field Tissue Name)) (set skos:altLabel (field Tissue Short_Name)))) (define-transformer gnc:molecular_trait->gn:molecular_trait (tables (Tissue)) (triples "gnc:molecular_trait" (set skos:member (string->identifier "trait" (field Tissue Short_Name) #:separator "_")))) (define-transformer gn:set->gn:dataset (tables (Species (inner-join InbredSet "ON InbredSet.SpeciesId = Species.Id") (inner-join ProbeFreeze "ON ProbeFreeze.InbredSetId = InbredSet.Id") (inner-join ProbeSetFreeze "ON ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id") (inner-join Tissue "ON ProbeFreeze.TissueId = Tissue.Id")) "WHERE ProbeSetFreeze.public > 0 AND Species.Name != 'monkey' GROUP BY Species.Name, Tissue.Short_Name") (triples (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_") (multiset gnt:has_probeset_data (map (cut string->identifier "dataset" <> #:separator "_") (string-split (field ("GROUP_CONCAT(ProbeSetFreeze.Name SEPARATOR ',')" dataset_name)) #\,))))) (define-transformer gn:dataset->set/species/molecular_trait (tables (Species (inner-join InbredSet "ON InbredSet.SpeciesId = Species.Id") (inner-join ProbeFreeze "ON ProbeFreeze.InbredSetId = InbredSet.Id") (inner-join ProbeSetFreeze "ON ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id") (inner-join Tissue "ON ProbeFreeze.TissueId = Tissue.Id")) "WHERE ProbeSetFreeze.public > 0 AND Species.Name != 'monkey'") (schema-triples (gnt:has_molecular_trait rdf:type owl:ObjectProperty) (gnt:has_molecular_trait rdfs:domain gnc:set) (gnt:has_molecular_trait rdfs:range gnc:molecular_trait) (gnt:has_molecular_trait rdfs:label "has molecular trait")) (triples (string->identifier "dataset" (field ProbeSetFreeze Name) #:separator "_") (set gnt:has_strain (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_")) (set gnt:has_species (string->identifier "" (remap-species-identifiers (field Species Fullname)))) (set gnt:has_molecular_trait (string->identifier "trait" (field Tissue Short_Name) #:separator "_")))) (define-transformer gn:dataset->metadata (tables (ProbeSetFreeze (inner-join ProbeFreeze "ON ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id") (inner-join InbredSet "ON InbredSet.Id = ProbeFreeze.InbredSetId") (inner-join Species "ON InbredSet.SpeciesId = Species.Id") (inner-join Tissue "ON ProbeFreeze.TissueId = Tissue.Id") (inner-join AvgMethod "ON AvgMethod.AvgMethodId = ProbeSetFreeze.AvgID") (inner-join InfoFiles "ON InfoFiles.InfoPageName = ProbeSetFreeze.Name") (left-join GeneChip "ON GeneChip.Id = InfoFiles.GeneChipId")) "WHERE ProbeSetFreeze.public > 0 AND Species.Name != 'monkey'") (schema-triples (gnt:has_case_info a owl:ObjectProperty) (gnt:has_case_info rdfs:comment "Information about the cases used in this platform") (gnt:has_case_info rdfs:domain dcat:Dataset) (gnt:has_case_info rdfs:label "About Case") (gnt:has_citation a owl:ObjectProperty) (gnt:has_citation rdfs:comment "Citation for this dataset") (gnt:has_citation rdfs:domain dcat:Dataset) (gnt:has_citation rdfs:label "Citation") (gnt:has_contributors a owl:ObjectProperty) (gnt:has_contributors rdfs:comment "Contributors of this resource") (gnt:has_contributors rdfs:comment "Contributors of this resource") (gnt:has_contributors rdfs:domain dcat:Dataset) (gnt:has_contributors rdfs:label "Contributors") (gnt:has_data_processing_info a owl:ObjectProperty) (gnt:has_data_processing_info rdfs:comment "Information about how this dataset was processed") (gnt:has_data_processing_info rdfs:domain dcat:Dataset) (gnt:has_data_processing_info rdfs:label "About Data Processing") (gnt:has_experiment_design a owl:ObjectProperty) (gnt:has_experiment_design rdfs:comment "Experiment Design for this resource") (gnt:has_experiment_design rdfs:domain dcat:Dataset) (gnt:has_experiment_design rdfs:label "Experiment Design") (gnt:has_experiment_design_info a owl:ObjectProperty) (gnt:has_experiment_design_info rdfs:comment "Information about how the experiment was designed") (gnt:has_experiment_design_info rdfs:domain dcat:Dataset) (gnt:has_experiment_design_info rdfs:label "Experiment Design") (gnt:has_experiment_type a owl:ObjectProperty) (gnt:has_experiment_type rdfs:comment "Information about the experiment type") (gnt:has_experiment_type rdfs:comment "Information about the experiment type") (gnt:has_experiment_type rdfs:domain dcat:Dataset) (gnt:has_experiment_type rdfs:label "Experiment Type Metadata") (gnt:has_platform_info a owl:ObjectProperty) (gnt:has_platform_info rdfs:comment "Information about the platform that was used with this dataset") (gnt:has_platform_info rdfs:domain dcat:Dataset) (gnt:has_platform_info rdfs:label "About Platform") (gnt:has_samples a owl:ObjectProperty) (gnt:has_samples rdfs:domain dcat:Dataset) (gnt:has_samples rdfs:label "Samples") (gnt:has_specifics a owl:ObjectProperty) (gnt:has_specifics rdfs:comment "Has specifics") (gnt:has_specifics rdfs:domain dcat:Dataset) (gnt:has_specifics rdfs:label "Specifics") (gnt:has_summary a owl:ObjectProperty) (gnt:has_summary rdfs:comment "Summary information about dataset") (gnt:has_summary rdfs:domain dcat:Dataset) (gnt:has_summary rdfs:label "Summary") (gnt:has_tissue_info a owl:ObjectProperty) (gnt:has_tissue_info rdfs:domain dcat:Dataset) (gnt:has_tissue_info rdfs:label "Metadata about Tissue for this resource") (gnt:uses_genechip a owl:ObjectProperty) (gnt:uses_genechip rdfs:domain dcat:Dataset) (gnt:uses_genechip skos:definition "The Platform this resource uses") (gnt:uses_normalization_method rdfs:comment "The method used to map genetic or experimental data for this resource.") (gnt:uses_normalization_method rdfs:domain dcat:Dataset) (gnt:uses_normalization_method rdfs:label "Averaging method") (gnt:uses_normalization_method rdfs:range gnc:avg_method)) (gnt:uses_normalization_method a owl:ObjectProperty) (triples (string->identifier "dataset" (field ProbeSetFreeze Name) #:separator "_") (set rdf:type 'dcat:Dataset) (set skos:prefLabel (field ProbeSetFreeze Name)) (set dct:title (normalize-string-field (field InfoFiles InfoPageName))) (set rdfs:label (normalize-string-field (field InfoFiles InfoPageName))) (set dct:created (annotate-field (field ProbeSetFreeze CreateTime) '^^xsd:datetime)) (set gnt:uses_normalization_method (string->identifier "avg_method" (field AvgMethod Name AvgMethodName) #:separator "_")) (set gnt:has_strain (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_")) (set gnt:has_species (string->identifier "" (remap-species-identifiers (field Species Fullname)))) (set gnt:has_molecular_trait (string->identifier "trait" (field Tissue Short_Name) #:separator "_")) (set gnt:uses_genechip (string->identifier "platform" (field GeneChip Name) #:separator "_")) (set dct:identifier (format #f "GN~a" (field InfoFiles GN_AccesionId))) (set gnt:has_experiment_type (let ((experiment-type (field InfoFiles Experiment_Type))) (if (or (null? experiment-type) (string-blank? experiment-type)) "" (sanitize-rdf-string experiment-type)))) (set gnt:has_tissue_info (let ((tissue-info (field InfoFiles About_Tissue))) (if (or (null? tissue-info) (string-blank? tissue-info)) "" (sanitize-rdf-string tissue-info)))) (set gnt:has_summary (let* ((summary (field InfoFiles Summary))) (if (or (null? summary) (string-blank? summary)) "" (sanitize-rdf-string summary)))) (set gnt:has_citation (let ((citation (field InfoFiles Citation))) (if (or (null? citation) (string-blank? citation)) "" (sanitize-rdf-string citation)))) (set gnt:has_samples (let ((samples (field InfoFiles samples))) (if (or (null? samples) (string-blank? samples)) "" (sanitize-rdf-string samples)))) (set gnt:has_specifics (let* ((specifics (field InfoFiles Specifics))) (if (or (null? specifics) (string-blank? specifics)) "" (sanitize-rdf-string specifics)))) (set gnt:has_case_info (let ((cases (field InfoFiles About_Cases))) (if (or (null? cases) (string-blank? cases)) "" (sanitize-rdf-string cases)))) (set gnt:has_platform_info (let* ((platform (field InfoFiles About_Array_Platform))) (if (or (null? platform) (string-blank? platform)) "" (sanitize-rdf-string platform)))) (set gnt:has_data_processing_info (let* ((processing (field InfoFiles About_Data_Values_Processing))) (if (or (null? processing) (string-blank? processing)) "" (sanitize-rdf-string processing)))) (set gnt:has_experiment_type (let ((experiment-type (field InfoFiles Experiment_Type))) (if (or (null? experiment-type) (string-blank? experiment-type)) "" (sanitize-rdf-string experiment-type)))) (set gnt:has_experiment_design (let ((experiment-design (field InfoFiles Overall_Design))) (if (or (null? experiment-design) (string-blank? experiment-design)) "" (sanitize-rdf-string experiment-design)))) (set gnt:has_contributors (let ((contributors (field InfoFiles Contributor))) (if (or (null? contributors) (string-blank? contributors)) "" (sanitize-rdf-string contributors)))))) (let* ((option-spec '((settings (single-char #\s) (value #t)) (output (single-char #\o) (value #t)) (documentation (single-char #\d) (value #t)))) (options (getopt-long (command-line) option-spec)) (settings (option-ref options 'settings #f)) (output (option-ref options 'output #f)) (documentation (option-ref options 'documentation #f)) (%connection-settings (call-with-input-file settings read))) (with-documentation (name "Molecular Traits") (connection %connection-settings) (table-metadata? #f) (prefixes '(("dcat:" "") ("gn:" "") ("obo:" "") ("owl:" "") ("xsd:" "") ("dct:" "") ("xkos:" "") ("gnt:" "") ("skos:" "") ("gnc:" "") ("rdf:" "") ("rdfs:" ""))) (inputs (list gn:dataset->metadata gn:dataset->set/species/molecular_trait gn:molecular-traits->gn:datasets gn:set->gn:dataset gnc:molecular_trait->gn:molecular_trait)) (outputs `(#:documentation ,documentation #:rdf ,output))))