From 53c519c6d07992a0249e8f365f8b1a1af7a13886 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Fri, 26 May 2023 08:35:54 +0300 Subject: Create a new probesetfreeze dump This way, the probeset dump will become smaller. Signed-off-by: Munyoki Kilyungi --- examples/dump-probeset.scm | 39 ----------------- examples/dump-probesetfreeze.scm | 92 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+), 39 deletions(-) create mode 100755 examples/dump-probesetfreeze.scm diff --git a/examples/dump-probeset.scm b/examples/dump-probeset.scm index a71f708..8e809a5 100755 --- a/examples/dump-probeset.scm +++ b/examples/dump-probeset.scm @@ -20,14 +20,6 @@ (list-ref (command-line) 2)) -(define-dump dump-gene-chip - (tables (GeneChip)) - (schema-triples - (gn:name rdfs:range rdfs:Literal)) - (triples (string->identifier "platform" (field GeneChip Name)) - (set rdf:type 'gn:platform) - (set gn:name (field GeneChip GeneChipName)))) - (define-dump dump-probeset (tables (ProbeSet (left-join ProbeSetXRef "ON ProbeSetXRef.ProbeSetId = ProbeSet.Id") @@ -144,35 +136,6 @@ (set gn:secondaryNames (field ProbeSet SecondaryNames)) (set gn:peptideSequence (field ProbeSet PeptideSequence)))) -;; Molecular Traits are also referred to as ProbeSets -(define-dump dump-probesetfreeze - (tables (ProbeSetFreeze - (left-join InfoFiles "ON InfoFiles.InfoPageName = ProbeSetFreeze.Name") - (left-join ProbeFreeze "USING (ProbeFreezeId)") - (left-join AvgMethod "ON AvgMethod.AvgMethodId = ProbeSetFreeze.AvgID") - (left-join InbredSet "ON ProbeFreeze.InbredSetId=InbredSet.Id") - (left-join Tissue "USING (TissueId)")) - "WHERE ProbeSetFreeze.public > 0 AND InfoFiles.InfoPageName IS NULL GROUP BY ProbeFreeze.Id") - (schema-triples - (gn:avgMethod rdfs:range rdfs:Literal) - (gn:dataScale rdfs:range rdfs:Literal)) - (triples - (ontology 'probeset: - (regexp-substitute/global #f "[^A-Za-z0-9:]" - (field ProbeSetFreeze Name) - 'pre "_" 'post)) - (set rdf:type 'gn:probesetDataset) - (set gn:avgMethod (string->identifier "avgmethod" (field AvgMethod Name))) - (set gn:fullName (field ProbeSetFreeze FullName)) - (set gn:shortName (field ProbeSetFreeze ShortName)) - (set dct:created (annotate-field - (field ProbeSetFreeze CreateTime) - '^^xsd:datetime)) - (set gn:dataScale (field ProbeSetFreeze DataScale)) - (set gn:tissueName (string->identifier "tissue" (field Tissue Short_Name))) - (set gn:datasetOfInbredSet - (string->identifier "inbredSet" (field InbredSet Name InbredSetName))))) - (call-with-target-database @@ -203,7 +166,5 @@ (prefix "xsd:" "") (prefix "probeset:" "") (newline) - (dump-gene-chip db) - (dump-probesetfreeze db) (dump-probeset db)) #:encoding "utf8"))) diff --git a/examples/dump-probesetfreeze.scm b/examples/dump-probesetfreeze.scm new file mode 100755 index 0000000..b238851 --- /dev/null +++ b/examples/dump-probesetfreeze.scm @@ -0,0 +1,92 @@ +#! /usr/bin/env guile +!# + +(use-modules (srfi srfi-1) + (srfi srfi-26) + (ice-9 match) + (ice-9 regex) + (dump strings) + (dump sql) + (dump triples) + (dump special-forms)) + + + +(define %connection-settings + (call-with-input-file (list-ref (command-line) 1) + read)) + +(define %dump-directory + (list-ref (command-line) 2)) + + +(define-dump dump-gene-chip + (tables (GeneChip)) + (schema-triples + (gn:name rdfs:range rdfs:Literal)) + (triples (string->identifier "platform" (field GeneChip Name)) + (set rdf:type 'gn:platform) + (set gn:name (field GeneChip GeneChipName)))) + +;; Molecular Traits are also referred to as ProbeSets +(define-dump dump-probesetfreeze + (tables (ProbeSetFreeze + (left-join InfoFiles "ON InfoFiles.InfoPageName = ProbeSetFreeze.Name") + (left-join ProbeFreeze "USING (ProbeFreezeId)") + (left-join AvgMethod "ON AvgMethod.AvgMethodId = ProbeSetFreeze.AvgID") + (left-join InbredSet "ON ProbeFreeze.InbredSetId = InbredSet.Id") + (left-join Tissue "ON ProbeFreeze.TissueId = Tissue.TissueId")) + "WHERE ProbeSetFreeze.public > 0 AND InfoFiles.InfoPageName IS NULL GROUP BY ProbeFreeze.Id") + (schema-triples + (gn:avgMethod rdfs:range rdfs:Literal) + (gn:dataScale rdfs:range rdfs:Literal)) + (triples + (ontology 'probeset: + (regexp-substitute/global #f "[^A-Za-z0-9:]" + (field ProbeSetFreeze Name) + 'pre "_" 'post)) + (set rdf:type 'gn:probesetDataset) + (set gn:avgMethod (string->identifier "avgmethod" (field AvgMethod Name))) + (set gn:fullName (field ProbeSetFreeze FullName)) + (set gn:shortName (field ProbeSetFreeze ShortName)) + (set dct:created (annotate-field + (field ProbeSetFreeze CreateTime) + '^^xsd:datetime)) + (set gn:dataScale (field ProbeSetFreeze DataScale)) + (set gn:tissueName (string->identifier "tissue" (field Tissue Short_Name))) + (set gn:datasetOfInbredSet + (string->identifier "inbredSet" (field InbredSet Name InbredSetName))))) + + + +(call-with-target-database + %connection-settings + (lambda (db) + (with-output-to-file (string-append %dump-directory "dump-probesetfreeze.ttl") + (lambda () + (prefix "chebi:" "") + (prefix "dct:" "") + (prefix "foaf:" "") + (prefix "generif:" "") + (prefix "gn:" "") + (prefix "hgnc:" "") + (prefix "homologene:" "") + (prefix "kegg:" "") + (prefix "molecularTrait:" "") + (prefix "nuccore:" "") + (prefix "omim:" "") + (prefix "owl:" "") + (prefix "phenotype:" "") + (prefix "pubchem:" "") + (prefix "pubmed:" "") + (prefix "rdf:" "") + (prefix "rdfs:" "") + (prefix "taxon:" "") + (prefix "uniprot:" "") + (prefix "up:" "") + (prefix "xsd:" "") + (prefix "probeset:" "") + (newline) + (dump-gene-chip db) + (dump-probesetfreeze db)) + #:encoding "utf8"))) -- cgit v1.2.3