From 16ebe166618b7e36d92bcc6c3e497dcfa188ce90 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Wed, 19 Jul 2023 20:23:58 +0300 Subject: Dump probesetfreeze metadata using new metadata --- examples/dump-probesetfreeze.scm | 77 +++++++++++++++++++--------------------- 1 file changed, 36 insertions(+), 41 deletions(-) diff --git a/examples/dump-probesetfreeze.scm b/examples/dump-probesetfreeze.scm index 0be81ac..a45fd0a 100755 --- a/examples/dump-probesetfreeze.scm +++ b/examples/dump-probesetfreeze.scm @@ -16,18 +16,15 @@ (call-with-input-file (list-ref (command-line) 1) read)) -(define %dump-directory - (list-ref (command-line) 2)) - (define-dump dump-gene-chip (tables (GeneChip)) (schema-triples - (gn:name rdfs:range rdfs:Literal)) + (gn-term:name rdfs:range rdfs:Literal)) (triples (string->identifier "platform" (field GeneChip Name)) (set rdf:type 'gn:platform) - (set gn:name (field GeneChip GeneChipName)) - (set gn:geoPlatform + (set gn-term:name (field GeneChip GeneChipName)) + (set gn-term:geoPlatform (ontology 'geoSeries: (string-trim-both (field GeneChip GeoPlatform)))))) @@ -41,48 +38,46 @@ (left-join Tissue "ON ProbeFreeze.TissueId = Tissue.TissueId")) "WHERE ProbeSetFreeze.public > 0 AND InfoFiles.InfoPageName IS NULL GROUP BY ProbeFreeze.Id") (schema-triples - (gn:avgMethod rdfs:range rdfs:Literal) - (gn:dataScale rdfs:range rdfs:Literal) + (gn-term:avgMethod rdfs:range rdfs:Literal) + (gn-term:dataScale rdfs:range rdfs:Literal) (gn:probesetDataset rdf:subClassOf gn:dataset)) (triples - (ontology 'probeset: - (regexp-substitute/global #f "[^A-Za-z0-9:]" - (field ProbeSetFreeze Name) - 'pre "_" 'post)) + (string->identifier + "" + (regexp-substitute/global + #f "[^A-Za-z0-9:]" + (field ProbeSetFreeze Name) + 'pre "_" 'post) + #:separator "" + #:proc string-capitalize-first) (set rdf:type 'gn:probesetDataset) - (set gn:avgMethod (string->identifier "avgmethod" (field AvgMethod Name))) - (set gn:fullName (field ProbeSetFreeze FullName)) - (set gn:shortName (field ProbeSetFreeze ShortName)) + (set gn-term:avgMethod (string->identifier "avgmethod" (field AvgMethod Name))) + (set gn-term:fullName (field ProbeSetFreeze FullName)) + (set gn-term:shortName (field ProbeSetFreeze ShortName)) (set dct:created (annotate-field (field ProbeSetFreeze CreateTime) '^^xsd:datetime)) - (set gn:dataScale (field ProbeSetFreeze DataScale)) - (set gn:tissueName (string->identifier "tissue" (field Tissue Short_Name))) - (set gn:datasetOfInbredSet + (set gn-term:dataScale (field ProbeSetFreeze DataScale)) + (set gn-term:tissueName (string->identifier "tissue" (field Tissue Short_Name))) + (set gn-term:datasetOfInbredSet (string->identifier "inbredSet" (field InbredSet Name InbredSetName))))) -(call-with-target-database - %connection-settings - (lambda (db) - (with-output-to-file (string-append %dump-directory "dump-probesetfreeze.ttl") - (lambda () - (prefix "dct:" "") - (prefix "foaf:" "") - (prefix "generif:" "") - (prefix "geoSeries:" "") - (prefix "gn:" "") - (prefix "owl:" "") - (prefix "phenotype:" "") - (prefix "pubmed:" "") - (prefix "rdf:" "") - (prefix "rdfs:" "") - (prefix "uniprot:" "") - (prefix "up:" "") - (prefix "xsd:" "") - (prefix "probeset:" "") - (newline) - (dump-gene-chip db) - (dump-probesetfreeze db)) - #:encoding "utf8"))) +(dump-with-documentation + (name "Probeset freeze metadata") + (connection %connection-settings) + (table-metadata? #f) + (prefixes + '(("geoSeries:" "") + ("gn:" "") + ("gn-term:" "") + ("rdf:" "") + ("rdfs:" "") + ("xsd:" ""))) + (inputs + (list dump-gene-chip + dump-probesetfreeze)) + (outputs + '(#:documentation "./docs/dump-gene-chip.md" + #:rdf "./verified-data/dump-probesetfreeze.ttl"))) -- cgit v1.2.3