diff options
Diffstat (limited to 'examples/molecular-traits.scm')
| -rwxr-xr-x | examples/molecular-traits.scm | 162 |
1 files changed, 155 insertions, 7 deletions
diff --git a/examples/molecular-traits.scm b/examples/molecular-traits.scm index 9e826f6..0393a0d 100755 --- a/examples/molecular-traits.scm +++ b/examples/molecular-traits.scm @@ -12,7 +12,7 @@ (transform special-forms)) -(define-transformer tissues->gn:molecular-traits +(define-transformer gn:molecular-traits->gn:datasets (tables (Tissue)) (schema-triples (gnc:molecular_trait a owl:Class) @@ -64,9 +64,152 @@ (set gnt:has_molecular_trait (string->identifier "trait" (field Tissue Short_Name) #:separator "_")))) +(define-transformer gn:dataset->metadata + (tables (ProbeSetFreeze + (inner-join ProbeFreeze "ON ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id") + (inner-join InbredSet "ON InbredSet.Id = ProbeFreeze.InbredSetId") + (inner-join Species "ON InbredSet.SpeciesId = Species.Id") + (inner-join Tissue "ON ProbeFreeze.TissueId = Tissue.Id") + (inner-join AvgMethod "ON AvgMethod.AvgMethodId = ProbeSetFreeze.AvgID") + (inner-join InfoFiles "ON InfoFiles.InfoPageName = ProbeSetFreeze.Name") + (left-join GeneChip "ON GeneChip.Id = InfoFiles.GeneChipId")) + "WHERE ProbeSetFreeze.public > 0") + (schema-triples + (gnt:has_case_info a owl:ObjectProperty) + (gnt:has_case_info rdfs:comment "Information about the cases used in this platform") + (gnt:has_case_info rdfs:domain dcat:Dataset) + (gnt:has_case_info rdfs:label "About Case") + (gnt:has_citation a owl:ObjectProperty) + (gnt:has_citation rdfs:comment "Citation for this dataset") + (gnt:has_citation rdfs:domain dcat:Dataset) + (gnt:has_citation rdfs:label "Citation") + (gnt:has_contributors a owl:ObjectProperty) + (gnt:has_contributors rdfs:comment "Contributors of this resource") + (gnt:has_contributors rdfs:comment "Contributors of this resource") + (gnt:has_contributors rdfs:domain dcat:Dataset) + (gnt:has_contributors rdfs:label "Contributors") + (gnt:has_data_processing_info a owl:ObjectProperty) + (gnt:has_data_processing_info rdfs:comment "Information about how this dataset was processed") + (gnt:has_data_processing_info rdfs:domain dcat:Dataset) + (gnt:has_data_processing_info rdfs:label "About Data Processing") + (gnt:has_experiment_design a owl:ObjectProperty) + (gnt:has_experiment_design rdfs:comment "Experiment Design for this resource") + (gnt:has_experiment_design rdfs:domain dcat:Dataset) + (gnt:has_experiment_design rdfs:label "Experiment Design") + (gnt:has_experiment_design_info a owl:ObjectProperty) + (gnt:has_experiment_design_info rdfs:comment "Information about how the experiment was designed") + (gnt:has_experiment_design_info rdfs:domain dcat:Dataset) + (gnt:has_experiment_design_info rdfs:label "Experiment Design") + (gnt:has_experiment_type a owl:ObjectProperty) + (gnt:has_experiment_type rdfs:comment "Information about the experiment type") + (gnt:has_experiment_type rdfs:comment "Information about the experiment type") + (gnt:has_experiment_type rdfs:domain dcat:Dataset) + (gnt:has_experiment_type rdfs:label "Experiment Type Metadata") + (gnt:has_platform_info a owl:ObjectProperty) + (gnt:has_platform_info rdfs:comment "Information about the platform that was used with this dataset") + (gnt:has_platform_info rdfs:domain dcat:Dataset) + (gnt:has_platform_info rdfs:label "About Platform") + (gnt:has_samples a owl:ObjectProperty) + (gnt:has_samples rdfs:domain dcat:Dataset) + (gnt:has_samples rdfs:label "Samples") + (gnt:has_specifics a owl:ObjectProperty) + (gnt:has_specifics rdfs:comment "Has specifics") + (gnt:has_specifics rdfs:domain dcat:Dataset) + (gnt:has_specifics rdfs:label "Specifics") + (gnt:has_summary a owl:ObjectProperty) + (gnt:has_summary rdfs:comment "Summary information about dataset") + (gnt:has_summary rdfs:domain dcat:Dataset) + (gnt:has_summary rdfs:label "Summary") + (gnt:has_tissue_info a owl:ObjectProperty) + (gnt:has_tissue_info rdfs:domain dcat:Dataset) + (gnt:has_tissue_info rdfs:label "Metadata about Tissue for this resource") + (gnt:uses_genechip a owl:ObjectProperty) + (gnt:uses_genechip rdfs:domain dcat:Dataset) + (gnt:uses_genechip skos:definition "The Platform this resource uses") + (gnt:uses_normalization_method rdfs:comment "The method used to map genetic or experimental data for this resource.") + (gnt:uses_normalization_method rdfs:domain dcat:Dataset) + (gnt:uses_normalization_method rdfs:label "Averaging method") + (gnt:uses_normalization_method rdfs:range gnc:avg_method)) + (gnt:uses_normalization_method a owl:ObjectProperty) + (triples (string->identifier "dataset" (field ProbeSetFreeze Name) #:separator "_") + (set rdf:type 'dcat:Dataset) + (set skos:prefLabel (field ProbeSetFreeze Name)) + (set dct:title (normalize-string-field (field InfoFiles InfoPageName))) + (set rdfs:label (normalize-string-field (field InfoFiles InfoPageName))) + (set dct:created (annotate-field (field ProbeSetFreeze CreateTime) '^^xsd:datetime)) + (set gnt:uses_normalization_method + (string->identifier "avg_method" (field AvgMethod Name AvgMethodName) #:separator "_")) + (set gnt:has_strain + (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_")) + (set gnt:has_species + (string->identifier "" (remap-species-identifiers (field Species Fullname)))) + (set gnt:has_molecular_trait + (string->identifier "trait" (field Tissue Short_Name) #:separator "_")) + (set gnt:uses_genechip + (string->identifier "platform" (field GeneChip Name) #:separator "_")) + (set dct:identifier (format #f "GN~a" (field InfoFiles GN_AccesionId))) + (set gnt:has_experiment_type + (let ((experiment-type + (field InfoFiles Experiment_Type))) + (if (or (null? experiment-type) (string-blank? experiment-type)) + "" (sanitize-rdf-string experiment-type)))) + (set gnt:has_tissue_info + (let ((tissue-info + (field InfoFiles About_Tissue))) + (if (or (null? tissue-info) (string-blank? tissue-info)) + "" (sanitize-rdf-string tissue-info)))) + (set gnt:has_summary + (let* ((summary + (field InfoFiles Summary))) + (if (or (null? summary) (string-blank? summary)) + "" (sanitize-rdf-string summary)))) + (set gnt:has_citation + (let ((citation + (field InfoFiles Citation))) + (if (or (null? citation) (string-blank? citation)) + "" (sanitize-rdf-string citation)))) + (set gnt:has_samples + (let ((samples + (field InfoFiles samples))) + (if (or (null? samples) (string-blank? samples)) + "" (sanitize-rdf-string samples)))) + (set gnt:has_specifics + (let* ((specifics + (field InfoFiles Specifics))) + (if (or (null? specifics) (string-blank? specifics)) + "" (sanitize-rdf-string specifics)))) + (set gnt:has_case_info + (let ((cases + (field InfoFiles About_Cases))) + (if (or (null? cases) (string-blank? cases)) + "" (sanitize-rdf-string cases)))) + (set gnt:has_platform_info + (let* ((platform + (field InfoFiles About_Array_Platform))) + (if (or (null? platform) (string-blank? platform)) + "" (sanitize-rdf-string platform)))) + (set gnt:has_data_processing_info + (let* ((processing + (field InfoFiles About_Data_Values_Processing))) + (if (or (null? processing) (string-blank? processing)) + "" (sanitize-rdf-string processing)))) + (set gnt:has_experiment_type + (let ((experiment-type + (field InfoFiles Experiment_Type))) + (if (or (null? experiment-type) (string-blank? experiment-type)) + "" (sanitize-rdf-string experiment-type)))) + (set gnt:has_experiment_design + (let ((experiment-design + (field InfoFiles Overall_Design))) + (if (or (null? experiment-design) (string-blank? experiment-design)) + "" (sanitize-rdf-string experiment-design)))) + (set gnt:has_contributors + (let ((contributors + (field InfoFiles Contributor))) + (if (or (null? contributors) (string-blank? contributors)) + "" (sanitize-rdf-string contributors)))))) - (let* ((option-spec '((settings (single-char #\s) (value #t)) (output (single-char #\o) (value #t)) @@ -79,13 +222,17 @@ (call-with-input-file settings read))) (with-documentation - (name "Tissue Metadata") + (name "Molecular Traits") (connection %connection-settings) (table-metadata? #f) (prefixes - '(("gn:" "<http://rdf.genenetwork.org/v1/id/>") + '(("dcat:" "<http://www.w3.org/ns/dcat#>") + ("gn:" "<http://rdf.genenetwork.org/v1/id/>") ("obo:" "<http://purl.obolibrary.org/obo/>") ("owl:" "<http://www.w3.org/2002/07/owl#>") + ("xsd:" "<http://www.w3.org/2001/XMLSchema#>") + ("dct:" "<http://purl.org/dc/terms/>") + ("xkos:" "<http://rdf-vocabulary.ddialliance.org/xkos#>") ("gnt:" "<http://rdf.genenetwork.org/v1/term/>") ("skos:" "<http://www.w3.org/2004/02/skos/core#>") ("gnc:" "<http://rdf.genenetwork.org/v1/category/>") @@ -93,10 +240,11 @@ ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>"))) (inputs (list - tissues->gn:molecular-traits - gnc:molecular_trait->gn:molecular_trait + gn:dataset->metadata + gn:dataset->set/species/molecular_trait + gn:molecular-traits->gn:datasets gn:set->gn:dataset - gn:dataset->set/species/molecular_trait)) + gnc:molecular_trait->gn:molecular_trait)) (outputs `(#:documentation ,documentation #:rdf ,output)))) |
