From add95e737f61fdf3e8f244dd7ebedca963514bb7 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Mon, 21 Aug 2023 14:41:23 +0300 Subject: Move dumps related to datasets to one place * examples/dump-dataset-metadata.scm: Add dump-gene-chip, dump-publishfreeze, dump-genofreeze, dump-probesetfreeze * examples/dump-genotype.scm: Delete dump-genofreeze. * examples/dump-phenotype.scm: Delete dump-publishfreeze. * examples/dump-probesetfreeze.scm: Delete file --- examples/dump-probesetfreeze.scm | 131 --------------------------------------- 1 file changed, 131 deletions(-) delete mode 100755 examples/dump-probesetfreeze.scm (limited to 'examples/dump-probesetfreeze.scm') diff --git a/examples/dump-probesetfreeze.scm b/examples/dump-probesetfreeze.scm deleted file mode 100755 index 50307bf..0000000 --- a/examples/dump-probesetfreeze.scm +++ /dev/null @@ -1,131 +0,0 @@ -#! /usr/bin/env guile -!# - -(use-modules (srfi srfi-1) - (srfi srfi-26) - (ice-9 match) - (ice-9 regex) - (dump strings) - (dump sql) - (dump triples) - (dump special-forms)) - - - -(define %connection-settings - (call-with-input-file (list-ref (command-line) 1) - read)) - - -(define (remap-species-identifiers str) - "This procedure remaps identifiers to standard binominal. Obviously this should - be sorted by correcting the database!" - (match str - ["Fly (Drosophila melanogaster dm6)" "Drosophila melanogaster"] - ["Oryzias latipes (Japanese medaka)" "Oryzias latipes"] - ["Macaca mulatta" "Macaca nemestrina"] - ["Bat (Glossophaga soricina)" "Glossophaga soricina"] - [str str])) - - -(define-dump dump-gene-chip - (tables (GeneChip - (left-join Species "USING (SpeciesId)"))) - (schema-triples - (gnc:geneChip a skos:Concept) - (gnc:geneChip - skos:description - "This is a set of controlled terms that are used to describe a given gene chip/platform") - (gnt:hasGeoSeriesId rdfs:domain gnc:platform) - (gnt:belongsToSpecies a owl:ObjectProperty) - (gnt:belongsToSpecies skos:definition "This resource belongs to this given species") - (gnt:belongsToSpecies rdfs:domain gnc:geneChip) - (gnt:hasGeoSeriesId rdfs:domain gnc:geneChip) - (gnt:hasGOTreeValue a owl:ObjectProperty) - (gnt:hasGOTreeValue skos:definition "This resource the following GO tree value") - (gnt:hasGOTreeValue rdfs:domain gnc:geneChip)) - (triples (string->identifier "platform" (field GeneChip Name)) - (set rdf:type 'gnc:geneChip) - (set rdfs:label (field GeneChip GeneChipName)) - (set skos:prefLabel (field GeneChip Name)) - (set skos:altLabel (field ("IF(GeneChip.GeneChipName != GeneChip.Title, Title, NULL)" - Title))) - (set gnt:hasGOTreeValue (field GeneChip Go_tree_value)) - (set gnt:belongsToSpecies - (string->identifier "" (remap-species-identifiers (field Species Fullname)) - #:separator "" - #:proc string-capitalize-first)) - (set gnt:hasGeoSeriesId - (ontology 'geoSeries: - (string-trim-both (field GeneChip GeoPlatform)))))) - -;; Molecular Traits are also referred to as ProbeSets -(define-dump dump-probesetfreeze - (tables (ProbeSetFreeze - (left-join InfoFiles "ON InfoFiles.InfoPageName = ProbeSetFreeze.Name") - (left-join ProbeFreeze "USING (ProbeFreezeId)") - (left-join AvgMethod "ON AvgMethod.AvgMethodId = ProbeSetFreeze.AvgID") - (left-join InbredSet "ON ProbeFreeze.InbredSetId = InbredSet.Id") - (left-join Tissue "ON ProbeFreeze.TissueId = Tissue.TissueId")) - "WHERE ProbeSetFreeze.public > 0 AND InfoFiles.InfoPageName IS NULL GROUP BY ProbeFreeze.Id") - (schema-triples - (gnt:usesNormalization rdfs:domain gnc:probeset) - (gnt:usesDataScale rdfs:domain gnc:probeset) - (gnt:usesDataScale a owl:ObjectProperty) - (gnt:usesDataScale skos:definition "Thi data scale this resource uses")) - (triples - (string->identifier - "" - (regexp-substitute/global - #f "[^A-Za-z0-9:]" - (field ProbeSetFreeze Name) - 'pre "_" 'post) - #:separator "" - #:proc string-capitalize-first) - (set rdf:type 'gnc:probesetDataset) - (set gnt:usesNormalization - (string->identifier "avgmethod" - ;; If AvgMethodName is NULL, assume N/A. - (if (string-blank? (field AvgMethod Name AvgMethodName)) - "N/A" (field AvgMethod Name AvgMethodName)))) - (set dct:title (field ProbeSetFreeze FullName)) - (set rdfs:label (field ProbeSetFreeze ShortName)) - (set skos:prefLabel (field ProbeSetFreeze Name)) - (set skos:altLabel (field ProbeSetFreeze Name2)) - (set dct:created (annotate-field - (field ProbeSetFreeze CreateTime) - '^^xsd:datetime)) - (set gnt:usesDataScale (field ProbeSetFreeze DataScale)) - (set gnt:hasTissue - (string->identifier - "tissue" - (field Tissue Short_Name))) - (set gnt:belongsToSet - (string->identifier - "inbredSet" (field InbredSet Name) - #:separator "" - #:proc string-capitalize-first)))) - - - -(dump-with-documentation - (name "Probeset freeze metadata") - (connection %connection-settings) - (table-metadata? #f) - (prefixes - '(("geoSeries:" "") - ("gn:" "") - ("gnc:" "") - ("dct:" "") - ("owl:" "") - ("skos:" "") - ("gnt:" "") - ("rdf:" "") - ("rdfs:" "") - ("xsd:" ""))) - (inputs - (list dump-gene-chip - dump-probesetfreeze)) - (outputs - '(#:documentation "./docs/dump-gene-chip.md" - #:rdf "./verified-data/dump-probesetfreeze.ttl"))) -- cgit v1.2.3