diff options
author | Munyoki Kilyungi | 2023-08-15 18:25:18 +0300 |
---|---|---|
committer | Munyoki Kilyungi | 2023-08-15 19:32:48 +0300 |
commit | c1cb2b5e5dfc6647f8c0c35fe6ad392cdd4fdfd7 (patch) | |
tree | 7be9c90c8cec73c97d472e73eed0dedb994da7a3 | |
parent | 8764d0d964e9ef89a41bbc8d1b7ea96646733c83 (diff) | |
download | gn-transform-databases-c1cb2b5e5dfc6647f8c0c35fe6ad392cdd4fdfd7.tar.gz |
Update probeset dump
Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
-rwxr-xr-x | examples/dump-probeset.scm | 171 |
1 files changed, 149 insertions, 22 deletions
diff --git a/examples/dump-probeset.scm b/examples/dump-probeset.scm index 4d5f9a5..a9b6c7c 100755 --- a/examples/dump-probeset.scm +++ b/examples/dump-probeset.scm @@ -21,29 +21,144 @@ (tables (ProbeSet (left-join GeneChip "ON GeneChip.Id = ProbeSet.ChipId"))) (schema-triples - (gnt:name rdfs:range rdfs:Literal) - (gnt:probeset rdfs:range rdfs:Literal)) - (triples (ontology - 'probeset: - (string-trim-both + (gnc:probeset a skos:Concept) + (gnc:probeset + skos:description + "This is a set of controlled terms that are used to describe a given probeset") + (gnt:hasChip a owl:ObjectProperty) + (gnt:hasChip rdfs:domain gnc:probeset) + (gnt:hasTargetId a owl:ObjectProperty) + (gnt:hasTargetId rdfs:domain gnc:probeset) + (gnt:symbol rdfs:domain gnc:probeset) + (gnt:targetsRegion a owl:ObjectProperty) + (gnt:targetsRegion rdfs:domain gnc:probeset) + (gnt:chr rdfs:domain gnc:probeset) + (gnt:mb rdfs:domain gnc:probeset) + (gnt:mbMm8 rdfs:domain gnc:probeset) + (gnt:mb2016 rdfs:domain gnc:probeset) + (gnt:hasSpecificity a owl:ObjectProperty) + (gnt:hasSpecificity rdfs:domain gnc:probeset) + (gnt:hasBlatScore a owl:ObjectProperty) + (gnt:hasBlatScore rdfs:domain gnc:probeset) + (gnt:hasBlatMbStart a owl:ObjectProperty) + (gnt:hasBlatMbStart rdfs:domain gnc:probeset) + (gnt:hasBlatMbStart2016 a owl:ObjectProperty) + (gnt:hasBlatMbStart2016 rdfs:domain gnc:probeset) + (gnt:hasBlatMbEnd a owl:ObjectProperty) + (gnt:hasBlatMbEnd rdfs:domain gnc:probeset) + (gnt:hasBlatMbEnd2016 a owl:ObjectProperty) + (gnt:hasBlatMbEnd2016 rdfs:domain gnc:probeset) + (gnt:hasBlatSeq a owl:ObjectProperty) + (gnt:hasBlatSeq rdfs:domain gnc:probeset) + (gnt:hasTargetSeq a owl:ObjectProperty) + (gnt:hasTargetSeq rdfs:domain gnc:probeset) + (gnt:hasHomologeneId a owl:ObjectProperty) + (gnt:hasHomologeneId rdfs:domain gnc:probeset) + (gnt:hasPubChemId a owl:ObjectProperty) + (gnt:hasPubChemId rdfs:domain gnc:probeset) + (gnt:hasKeggId a owl:ObjectProperty) + (gnt:hasKeggId rdfs:domain gnc:probeset) + (gnt:hasOmimId a owl:ObjectProperty) + (gnt:hasOmimId rdfs:domain gnc:probeset) + (gnt:hasChebiId a owl:ObjectProperty) + (gnt:hasChebiId rdfs:domain gnc:probeset)) + (triples + (let ((id (field ("IF(NULLIF(TRIM(ProbeSet.Name), '') IS NULL, '', TRIM(ProbeSet.Name))" + ProbeSetIdName))) + (probeset-id (field ProbeSet Id))) + (if (string-null? id) + (string->identifier + "probeset" + (number->string + probeset-id)) + (string->identifier + "" (regexp-substitute/global #f "[^A-Za-z0-9:]" - (field ("IFNULL(NULLIF(TRIM(ProbeSet.Name), ''), ProbeSet.Id)" - name)) - 'pre "_" 'post))) - (set rdf:type 'gn-id:probeset) - (set gnt:chipOf (string->identifier "platform" (field GeneChip Name))) - (set gnt:name (field ProbeSet Name)) - (set gnt:symbol (delete-substrings (field ProbeSet Symbol) "\"")) - (set gnt:description (sanitize-rdf-string - (field ProbeSet description))) + id + 'pre "_" 'post) + #:separator "" + #:proc string-capitalize-first))) + (set rdf:type 'gnc:probeset) + (set rdfs:label (field ProbeSet Name)) + (set skos:altLabel + (replace-substrings + (field ProbeSet alias) + '(("\r\n" . "; ")))) + (set gnt:hasChip + (string->identifier + "platform" + (field ("IFNULL(GeneChip.Name, '')" GeneChipName)))) + (set gnt:hasTargetId + (field ("NULLIF(TRIM(ProbeSet.TargetId), '')" + TargetId))) + (set gnt:symbol (field ProbeSet Symbol)) + (set dct:description (sanitize-rdf-string (field ProbeSet description))) + (set gnt:targetsRegion + (sanitize-rdf-string + (field ("NULLIF(TRIM(ProbeSet.Probe_set_target_region), '')" + Probe_set_target_region)))) (set gnt:chr (field ProbeSet Chr)) (set gnt:mb (annotate-field (field ("IFNULL(ProbeSet.Mb, '')" Mb)) '^^xsd:double)) - (set gnt:blatSeq (sanitize-rdf-string - (string-trim-both (field ProbeSet BlatSeq)))) - (set gnt:targetSeq (sanitize-rdf-string (field ProbeSet TargetSeq))) - (set gnt:uniProtReference (ontology 'uniprot: - (field ProbeSet UniProtID))))) + (set gnt:mbMm8 (annotate-field (field ("IFNULL(ProbeSet.Mb_mm8, '')" Mb_mm8)) + '^^xsd:double)) + (set gnt:mb2016 + (annotate-field (field ("IFNULL(ProbeSet.Mb_2016, '')" Mb_2016)) + '^^xsd:double)) + (set gnt:hasSpecificity + (annotate-field (field ("IFNULL(ProbeSet.Probe_set_specificity, '')" + Probe_set_specificity)) + '^^xsd:double)) + (set gnt:hasBlatScore + (annotate-field (field ("IFNULL(ProbeSet.Probe_set_BLAT_score, '')" + Probe_set_BLAT_score)) + '^^xsd:double)) + (set gnt:hasBlatMbStart + (annotate-field (field ("IFNULL(ProbeSet.Probe_set_Blat_Mb_start, '')" + Probe_set_Blat_Mb_start)) + '^^xsd:double)) + (set gnt:hasBlatMbStart2016 + (annotate-field (field ("IFNULL(ProbeSet.Probe_set_Blat_Mb_start_2016, '')" + Probe_set_Blat_Mb_start_2016)) + '^^xsd:double)) + (set gnt:hasBlatMbEnd + (annotate-field (field ("IFNULL(ProbeSet.Probe_set_Blat_Mb_end, '')" + Probe_set_Blat_Mb_end)) + '^^xsd:double)) + (set gnt:hasBlatMbEnd2016 + (annotate-field (field ("IFNULL(ProbeSet.Probe_set_Blat_Mb_start_2016, '')" + Probe_set_Blat_Mb_start_2016)) + '^^xsd:double)) + (set gnt:hasBlatSeq (sanitize-rdf-string (field ProbeSet BlatSeq))) + (set gnt:hasTargetSeq (sanitize-rdf-string (field ProbeSet TargetSeq))) + (set gnt:hasHomologeneId (ontology 'homologene: + (field ("IFNULL(ProbeSet.HomoloGeneID, '')" + HomoloGeneID)))) + (set gnt:hasUniprotId (ontology 'uniprot: + (field ("IFNULL(ProbeSet.UniProtID, '')" + UniProtID)))) + (set gnt:hasPubChemId (ontology + 'pubchem: + (field ("IFNULL(ProbeSet.PubChem_ID, '')" + PubChem_ID)))) + (set gnt:hasKeggId (ontology + 'kegg: + (field ("IFNULL(ProbeSet.KEGG_ID, '')" + KEGG_ID)))) + (set gnt:hasOmimId (ontology + 'omim: + (let ((omim (field ("IFNULL(ProbeSet.OMIM, '')" + OMIM)))) + (if (number? omim) + omim + (regexp-substitute/global + #f "[^0-9]" + omim + 'pre "" 'post))))) + (set gnt:hasChebiId (ontology + 'chebi: + (field ("IFNULL(ProbeSet.ChEBI_ID, '')" + ChEBI_ID)))))) @@ -53,10 +168,22 @@ (connection %connection-settings) (table-metadata? #f) (prefixes - '(("probeset:" "<http://genenetwork.org/probeset/>") + '(("gn:" "<http://genenetwork.org/id/>") + ("probeset:" "<http://genenetwork.org/probeset/>") + ("gnc:" "<http://genenetwork.org/category/>") + ("gnt:" "<http://genenetwork.org/term/>") ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>") - ("rdfs:" "<http://www.w3.org/2000/01/rdf-r") - ("uniprot:" "<http://purl.uniprot.org/uniprot/>"))) + ("kegg:" "<http://bio2rdf.org/ns/kegg#>") + ("pubchem:" "<https://pubchem.ncbi.nlm.nih.gov/>") + ("omim:" "<https://www.omim.org/entry/>") + ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>") + ("uniprot:" "<http://purl.uniprot.org/uniprot/>") + ("chebi:" "<http://purl.obolibrary.org/obo/CHEBI_>") + ("dct:" "<http://purl.org/dc/terms/>") + ("owl:" "<http://www.w3.org/2002/07/owl#>") + ("homologene:" "<https://bio2rdf.org/homologene:>") + ("xsd:" "<http://www.w3.org/2001/XMLSchema#>") + ("skos:" "<http://www.w3.org/2004/02/skos/core#>"))) (inputs (list dump-probeset)) (outputs |