From c1cb2b5e5dfc6647f8c0c35fe6ad392cdd4fdfd7 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Tue, 15 Aug 2023 18:25:18 +0300 Subject: Update probeset dump Signed-off-by: Munyoki Kilyungi --- examples/dump-probeset.scm | 171 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 149 insertions(+), 22 deletions(-) (limited to 'examples') diff --git a/examples/dump-probeset.scm b/examples/dump-probeset.scm index 4d5f9a5..a9b6c7c 100755 --- a/examples/dump-probeset.scm +++ b/examples/dump-probeset.scm @@ -21,29 +21,144 @@ (tables (ProbeSet (left-join GeneChip "ON GeneChip.Id = ProbeSet.ChipId"))) (schema-triples - (gnt:name rdfs:range rdfs:Literal) - (gnt:probeset rdfs:range rdfs:Literal)) - (triples (ontology - 'probeset: - (string-trim-both + (gnc:probeset a skos:Concept) + (gnc:probeset + skos:description + "This is a set of controlled terms that are used to describe a given probeset") + (gnt:hasChip a owl:ObjectProperty) + (gnt:hasChip rdfs:domain gnc:probeset) + (gnt:hasTargetId a owl:ObjectProperty) + (gnt:hasTargetId rdfs:domain gnc:probeset) + (gnt:symbol rdfs:domain gnc:probeset) + (gnt:targetsRegion a owl:ObjectProperty) + (gnt:targetsRegion rdfs:domain gnc:probeset) + (gnt:chr rdfs:domain gnc:probeset) + (gnt:mb rdfs:domain gnc:probeset) + (gnt:mbMm8 rdfs:domain gnc:probeset) + (gnt:mb2016 rdfs:domain gnc:probeset) + (gnt:hasSpecificity a owl:ObjectProperty) + (gnt:hasSpecificity rdfs:domain gnc:probeset) + (gnt:hasBlatScore a owl:ObjectProperty) + (gnt:hasBlatScore rdfs:domain gnc:probeset) + (gnt:hasBlatMbStart a owl:ObjectProperty) + (gnt:hasBlatMbStart rdfs:domain gnc:probeset) + (gnt:hasBlatMbStart2016 a owl:ObjectProperty) + (gnt:hasBlatMbStart2016 rdfs:domain gnc:probeset) + (gnt:hasBlatMbEnd a owl:ObjectProperty) + (gnt:hasBlatMbEnd rdfs:domain gnc:probeset) + (gnt:hasBlatMbEnd2016 a owl:ObjectProperty) + (gnt:hasBlatMbEnd2016 rdfs:domain gnc:probeset) + (gnt:hasBlatSeq a owl:ObjectProperty) + (gnt:hasBlatSeq rdfs:domain gnc:probeset) + (gnt:hasTargetSeq a owl:ObjectProperty) + (gnt:hasTargetSeq rdfs:domain gnc:probeset) + (gnt:hasHomologeneId a owl:ObjectProperty) + (gnt:hasHomologeneId rdfs:domain gnc:probeset) + (gnt:hasPubChemId a owl:ObjectProperty) + (gnt:hasPubChemId rdfs:domain gnc:probeset) + (gnt:hasKeggId a owl:ObjectProperty) + (gnt:hasKeggId rdfs:domain gnc:probeset) + (gnt:hasOmimId a owl:ObjectProperty) + (gnt:hasOmimId rdfs:domain gnc:probeset) + (gnt:hasChebiId a owl:ObjectProperty) + (gnt:hasChebiId rdfs:domain gnc:probeset)) + (triples + (let ((id (field ("IF(NULLIF(TRIM(ProbeSet.Name), '') IS NULL, '', TRIM(ProbeSet.Name))" + ProbeSetIdName))) + (probeset-id (field ProbeSet Id))) + (if (string-null? id) + (string->identifier + "probeset" + (number->string + probeset-id)) + (string->identifier + "" (regexp-substitute/global #f "[^A-Za-z0-9:]" - (field ("IFNULL(NULLIF(TRIM(ProbeSet.Name), ''), ProbeSet.Id)" - name)) - 'pre "_" 'post))) - (set rdf:type 'gn-id:probeset) - (set gnt:chipOf (string->identifier "platform" (field GeneChip Name))) - (set gnt:name (field ProbeSet Name)) - (set gnt:symbol (delete-substrings (field ProbeSet Symbol) "\"")) - (set gnt:description (sanitize-rdf-string - (field ProbeSet description))) + id + 'pre "_" 'post) + #:separator "" + #:proc string-capitalize-first))) + (set rdf:type 'gnc:probeset) + (set rdfs:label (field ProbeSet Name)) + (set skos:altLabel + (replace-substrings + (field ProbeSet alias) + '(("\r\n" . "; ")))) + (set gnt:hasChip + (string->identifier + "platform" + (field ("IFNULL(GeneChip.Name, '')" GeneChipName)))) + (set gnt:hasTargetId + (field ("NULLIF(TRIM(ProbeSet.TargetId), '')" + TargetId))) + (set gnt:symbol (field ProbeSet Symbol)) + (set dct:description (sanitize-rdf-string (field ProbeSet description))) + (set gnt:targetsRegion + (sanitize-rdf-string + (field ("NULLIF(TRIM(ProbeSet.Probe_set_target_region), '')" + Probe_set_target_region)))) (set gnt:chr (field ProbeSet Chr)) (set gnt:mb (annotate-field (field ("IFNULL(ProbeSet.Mb, '')" Mb)) '^^xsd:double)) - (set gnt:blatSeq (sanitize-rdf-string - (string-trim-both (field ProbeSet BlatSeq)))) - (set gnt:targetSeq (sanitize-rdf-string (field ProbeSet TargetSeq))) - (set gnt:uniProtReference (ontology 'uniprot: - (field ProbeSet UniProtID))))) + (set gnt:mbMm8 (annotate-field (field ("IFNULL(ProbeSet.Mb_mm8, '')" Mb_mm8)) + '^^xsd:double)) + (set gnt:mb2016 + (annotate-field (field ("IFNULL(ProbeSet.Mb_2016, '')" Mb_2016)) + '^^xsd:double)) + (set gnt:hasSpecificity + (annotate-field (field ("IFNULL(ProbeSet.Probe_set_specificity, '')" + Probe_set_specificity)) + '^^xsd:double)) + (set gnt:hasBlatScore + (annotate-field (field ("IFNULL(ProbeSet.Probe_set_BLAT_score, '')" + Probe_set_BLAT_score)) + '^^xsd:double)) + (set gnt:hasBlatMbStart + (annotate-field (field ("IFNULL(ProbeSet.Probe_set_Blat_Mb_start, '')" + Probe_set_Blat_Mb_start)) + '^^xsd:double)) + (set gnt:hasBlatMbStart2016 + (annotate-field (field ("IFNULL(ProbeSet.Probe_set_Blat_Mb_start_2016, '')" + Probe_set_Blat_Mb_start_2016)) + '^^xsd:double)) + (set gnt:hasBlatMbEnd + (annotate-field (field ("IFNULL(ProbeSet.Probe_set_Blat_Mb_end, '')" + Probe_set_Blat_Mb_end)) + '^^xsd:double)) + (set gnt:hasBlatMbEnd2016 + (annotate-field (field ("IFNULL(ProbeSet.Probe_set_Blat_Mb_start_2016, '')" + Probe_set_Blat_Mb_start_2016)) + '^^xsd:double)) + (set gnt:hasBlatSeq (sanitize-rdf-string (field ProbeSet BlatSeq))) + (set gnt:hasTargetSeq (sanitize-rdf-string (field ProbeSet TargetSeq))) + (set gnt:hasHomologeneId (ontology 'homologene: + (field ("IFNULL(ProbeSet.HomoloGeneID, '')" + HomoloGeneID)))) + (set gnt:hasUniprotId (ontology 'uniprot: + (field ("IFNULL(ProbeSet.UniProtID, '')" + UniProtID)))) + (set gnt:hasPubChemId (ontology + 'pubchem: + (field ("IFNULL(ProbeSet.PubChem_ID, '')" + PubChem_ID)))) + (set gnt:hasKeggId (ontology + 'kegg: + (field ("IFNULL(ProbeSet.KEGG_ID, '')" + KEGG_ID)))) + (set gnt:hasOmimId (ontology + 'omim: + (let ((omim (field ("IFNULL(ProbeSet.OMIM, '')" + OMIM)))) + (if (number? omim) + omim + (regexp-substitute/global + #f "[^0-9]" + omim + 'pre "" 'post))))) + (set gnt:hasChebiId (ontology + 'chebi: + (field ("IFNULL(ProbeSet.ChEBI_ID, '')" + ChEBI_ID)))))) @@ -53,10 +168,22 @@ (connection %connection-settings) (table-metadata? #f) (prefixes - '(("probeset:" "") + '(("gn:" "") + ("probeset:" "") + ("gnc:" "") + ("gnt:" "") ("rdf:" "") - ("rdfs:" ""))) + ("kegg:" "") + ("pubchem:" "") + ("omim:" "") + ("rdfs:" "") + ("uniprot:" "") + ("chebi:" "") + ("dct:" "") + ("owl:" "") + ("homologene:" "") + ("xsd:" "") + ("skos:" ""))) (inputs (list dump-probeset)) (outputs -- cgit v1.2.3