#! /usr/bin/env guile
!#
(use-modules (srfi srfi-1)
(srfi srfi-26)
(ice-9 match)
(ice-9 regex)
(dump strings)
(dump sql)
(dump triples)
(dump special-forms))
(define %connection-settings
(call-with-input-file (list-ref (command-line) 1)
read))
(define-dump dump-probeset
(tables (ProbeSet
(left-join GeneChip "ON GeneChip.Id = ProbeSet.ChipId")))
(schema-triples
(gnc:probeset a skos:Concept)
(gnc:probeset
skos:description
"This is a set of controlled terms that are used to describe a given probeset")
(gnt:hasChip a owl:ObjectProperty)
(gnt:hasChip rdfs:domain gnc:probeset)
(gnt:hasTargetId a owl:ObjectProperty)
(gnt:hasTargetId rdfs:domain gnc:probeset)
(gnt:symbol rdfs:domain gnc:probeset)
(gnt:targetsRegion a owl:ObjectProperty)
(gnt:targetsRegion rdfs:domain gnc:probeset)
(gnt:chr rdfs:domain gnc:probeset)
(gnt:mb rdfs:domain gnc:probeset)
(gnt:mbMm8 rdfs:domain gnc:probeset)
(gnt:mb2016 rdfs:domain gnc:probeset)
(gnt:hasSpecificity a owl:ObjectProperty)
(gnt:hasSpecificity rdfs:domain gnc:probeset)
(gnt:hasBlatScore a owl:ObjectProperty)
(gnt:hasBlatScore rdfs:domain gnc:probeset)
(gnt:hasBlatMbStart a owl:ObjectProperty)
(gnt:hasBlatMbStart rdfs:domain gnc:probeset)
(gnt:hasBlatMbStart2016 a owl:ObjectProperty)
(gnt:hasBlatMbStart2016 rdfs:domain gnc:probeset)
(gnt:hasBlatMbEnd a owl:ObjectProperty)
(gnt:hasBlatMbEnd rdfs:domain gnc:probeset)
(gnt:hasBlatMbEnd2016 a owl:ObjectProperty)
(gnt:hasBlatMbEnd2016 rdfs:domain gnc:probeset)
(gnt:hasBlatSeq a owl:ObjectProperty)
(gnt:hasBlatSeq rdfs:domain gnc:probeset)
(gnt:hasTargetSeq a owl:ObjectProperty)
(gnt:hasTargetSeq rdfs:domain gnc:probeset)
(gnt:hasHomologeneId a owl:ObjectProperty)
(gnt:hasHomologeneId rdfs:domain gnc:probeset)
(gnt:hasPubChemId a owl:ObjectProperty)
(gnt:hasPubChemId rdfs:domain gnc:probeset)
(gnt:hasKeggId a owl:ObjectProperty)
(gnt:hasKeggId rdfs:domain gnc:probeset)
(gnt:hasOmimId a owl:ObjectProperty)
(gnt:hasOmimId rdfs:domain gnc:probeset)
(gnt:hasChebiId a owl:ObjectProperty)
(gnt:hasChebiId rdfs:domain gnc:probeset))
(triples
(let ((id (field ("IF(NULLIF(TRIM(ProbeSet.Name), '') IS NULL, '', TRIM(ProbeSet.Name))"
ProbeSetIdName)))
(probeset-id (field ProbeSet Id)))
(if (string-null? id)
(string->identifier
"probeset"
(number->string
probeset-id))
(string->identifier
""
(regexp-substitute/global
#f "[^A-Za-z0-9:]"
id
'pre "_" 'post)
#:separator ""
#:proc string-capitalize-first)))
(set rdf:type 'gnc:probeset)
(set rdfs:label (field ProbeSet Name))
(set skos:altLabel
(replace-substrings
(field ProbeSet alias)
'(("\r\n" . "; "))))
(set gnt:hasChip
(string->identifier
"platform"
(field ("IFNULL(GeneChip.Name, '')" GeneChipName))))
(set gnt:hasTargetId
(field ("NULLIF(TRIM(ProbeSet.TargetId), '')"
TargetId)))
(set gnt:symbol (field ProbeSet Symbol))
(set dct:description (sanitize-rdf-string (field ProbeSet description)))
(set gnt:targetsRegion
(sanitize-rdf-string
(field ("NULLIF(TRIM(ProbeSet.Probe_set_target_region), '')"
Probe_set_target_region))))
(set gnt:chr (field ProbeSet Chr))
(set gnt:mb (annotate-field (field ("IFNULL(ProbeSet.Mb, '')" Mb)) '^^xsd:double))
(set gnt:mbMm8 (annotate-field (field ("IFNULL(ProbeSet.Mb_mm8, '')" Mb_mm8))
'^^xsd:double))
(set gnt:mb2016
(annotate-field (field ("IFNULL(ProbeSet.Mb_2016, '')" Mb_2016))
'^^xsd:double))
(set gnt:hasSpecificity
(annotate-field (field ("IFNULL(ProbeSet.Probe_set_specificity, '')"
Probe_set_specificity))
'^^xsd:double))
(set gnt:hasBlatScore
(annotate-field (field ("IFNULL(ProbeSet.Probe_set_BLAT_score, '')"
Probe_set_BLAT_score))
'^^xsd:double))
(set gnt:hasBlatMbStart
(annotate-field (field ("IFNULL(ProbeSet.Probe_set_Blat_Mb_start, '')"
Probe_set_Blat_Mb_start))
'^^xsd:double))
(set gnt:hasBlatMbStart2016
(annotate-field (field ("IFNULL(ProbeSet.Probe_set_Blat_Mb_start_2016, '')"
Probe_set_Blat_Mb_start_2016))
'^^xsd:double))
(set gnt:hasBlatMbEnd
(annotate-field (field ("IFNULL(ProbeSet.Probe_set_Blat_Mb_end, '')"
Probe_set_Blat_Mb_end))
'^^xsd:double))
(set gnt:hasBlatMbEnd2016
(annotate-field (field ("IFNULL(ProbeSet.Probe_set_Blat_Mb_start_2016, '')"
Probe_set_Blat_Mb_start_2016))
'^^xsd:double))
(set gnt:hasBlatSeq (sanitize-rdf-string (field ProbeSet BlatSeq)))
(set gnt:hasTargetSeq (sanitize-rdf-string (field ProbeSet TargetSeq)))
(set gnt:hasHomologeneId (ontology 'homologene:
(field ("IFNULL(ProbeSet.HomoloGeneID, '')"
HomoloGeneID))))
(set gnt:hasUniprotId (ontology 'uniprot:
(field ("IFNULL(ProbeSet.UniProtID, '')"
UniProtID))))
(set gnt:hasPubChemId (ontology
'pubchem:
(field ("IFNULL(ProbeSet.PubChem_ID, '')"
PubChem_ID))))
(set gnt:hasKeggId (ontology
'kegg:
(field ("IFNULL(ProbeSet.KEGG_ID, '')"
KEGG_ID))))
(set gnt:hasOmimId (ontology
'omim:
(let ((omim (field ("IFNULL(ProbeSet.OMIM, '')"
OMIM))))
(if (number? omim)
omim
(regexp-substitute/global
#f "[^0-9]"
omim
'pre "" 'post)))))
(set gnt:hasChebiId (ontology
'chebi:
(field ("IFNULL(ProbeSet.ChEBI_ID, '')"
ChEBI_ID))))))
(dump-with-documentation
(name "ProbeSet Metadata")
(connection %connection-settings)
(table-metadata? #f)
(prefixes
'(("gn:" "")
("probeset:" "")
("gnc:" "")
("gnt:" "")
("rdf:" "")
("kegg:" "")
("pubchem:" "")
("omim:" "")
("rdfs:" "")
("uniprot:" "")
("chebi:" "")
("dct:" "")
("owl:" "")
("homologene:" "")
("xsd:" "")
("skos:" "")))
(inputs
(list dump-probeset))
(outputs
'(#:documentation "./docs/dump-probeset.md"
#:rdf "./verified-data/dump-probeset.ttl")))