aboutsummaryrefslogtreecommitdiff
path: root/examples
diff options
context:
space:
mode:
authorMunyoki Kilyungi2023-12-11 15:00:16 +0300
committerMunyoki Kilyungi2023-12-14 00:55:53 +0300
commitc3127599d06270acf85a5738c22913d079c38355 (patch)
treeda3f4987b249f32586caf54c4991b258388f78fc /examples
parent718fe5c900a5cb6390f96e0e2911bd9d7d2b7347 (diff)
downloadgn-transform-databases-c3127599d06270acf85a5738c22913d079c38355.tar.gz
Remove some resource links from ProbeSet RDF transform.
Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
Diffstat (limited to 'examples')
-rwxr-xr-xexamples/probeset.scm209
1 files changed, 2 insertions, 207 deletions
diff --git a/examples/probeset.scm b/examples/probeset.scm
index eeb2dcc..d214117 100755
--- a/examples/probeset.scm
+++ b/examples/probeset.scm
@@ -14,16 +14,9 @@
(define-transformer probeset
(tables (ProbeSet
(left-join GeneChip "ON GeneChip.Id = ProbeSet.ChipId")
- (left-join GeneList "ON GeneList.GeneID = ProbeSet.GeneId")
- (left-join GeneList_rn33 "ON GeneList.geneSymbol = ProbeSet.Symbol")
- (left-join Species "ON GeneChip.SpeciesId = Species.Id")))
+ (left-join Species "ON GeneChip.SpeciesId = Species.Id"))
+ "WHERE ProbeSet.Name IS NOT NULL")
(schema-triples
- (gnc:pantherLink rdf:type gnc:ResourceLink)
- (gnc:pantherLink rdfs:label "PANTHER")
- (gnc:pantherLink rdfs:comments "Gene and protein data resources from Celera-ABI")
- (gnc:gnt:NCBIGeneLink rdfs:Class gnc:ResourceLink)
- (gnc:gnt:NCBIGeneLink rdfs:label "Gene")
- (gnc:gnt:NCBIGeneLink rdfs:comments "Info from NCBI Entrez Gene")
(gnc:omimLink rdfs:Class gnc:ResourceLink)
(gnc:omimLink rdfs:label "OMIM")
(gnc:omimLink rdfs:comments "Summary from On Mendelion Inheritance in Man")
@@ -33,33 +26,6 @@
(gnc:uniprotLink rdfs:Class gnc:ResourceLink)
(gnc:uniprotLink rdfs:label "UniProt")
(gnc:uniprotLink rdfs:comments "UniProt")
- (gnc:stringLink rdfs:Class gnc:ResourceLink)
- (gnc:stringLink rdfs:label "STRING")
- (gnc:stringLink rdfs:comments "Protein interactions: known and inferred")
- (gnc:gtexLink rdfs:Class gnc:ResourceLink)
- (gnc:gtexLink rdfs:label "GTEx Portal")
- (gnc:gtexLink rdfs:comments "GTEx Portal")
- (gnc:ebiGwasLink rdfs:Class gnc:ResourceLink)
- (gnc:ebiGwasLink rdfs:label "EBI GWAS")
- (gnc:ebiGwasLink rdfs:comments "EBI GWAS")
- (gnc:genemaniaLink rdfs:Class gnc:ResourceLink)
- (gnc:genemaniaLink rdfs:label "GeneMANIA")
- (gnc:genemaniaLink rdfs:comments "GeneMANIA")
- (gnc:gemmaLink rdfs:Class gnc:ResourceLink)
- (gnc:gemmaLink rdfs:label "Gemma")
- (gnc:gemmaLink rdfs:comments "Meta-analysis of gene expression data")
- (gnc:biogpsLink rdfs:Class gnc:ResourceLink)
- (gnc:biogpsLink rdfs:label "BioGPS")
- (gnc:biogpsLink rdfs:comments "Expression across many tissues and cell types")
- (gnc:abaLink rdfs:Class gnc:ResourceLink)
- (gnc:abaLink rdfs:label "ABA")
- (gnc:abaLink rdfs:comments "Allen Brain Atlas")
- (gnc:ucsRefSeqLink rdfs:Class gnc:ResourceLink)
- (gnc:ucsRefSeqLink rdfs:label "Info from UCSC Genome Browser")
- (gnc:ucsRefSeqLink rdfs:comments "UCSC")
- (gnc:proteinAtlasLink rdfs:Class gnc:ResourceLink)
- (gnc:proteinAtlasLink rdfs:label "Protein Atlas")
- (gnc:proteinAtlasLink rdfs:comments "Human Protein Atlas")
(gnt:hasChip a owl:ObjectProperty)
(gnt:hasChip rdfs:domain gnc:Probeset)
(gnt:hasTargetId a owl:ObjectProperty)
@@ -148,17 +114,6 @@
((string=? "-" strand-probe)
" on the minus strand")
(else "")))))))
- ;; NCBI Gene Link
- (set dct:references
- (let ((geneId (field ProbeSet GeneId)))
- (if (not (string-blank? geneId))
- (string->symbol
- (format #f
- "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
- "http://www.ncbi.nlm.nih.gov/gene?cmd=Retrieve&dopt=Graphics&list_uids="
- geneId
- "a gnc:NCBIGeneLink"))
- "")))
;; OMIM Link
(set dct:references
(let ((omim (field ProbeSet OMIM)))
@@ -192,166 +147,6 @@
uniprot
"a gnc:uniprotLink"))
"")))
- ;; STRING Link
- (set dct:references
- (let ((symbol (field ProbeSet Symbol)))
- (if (not (string-blank? symbol))
- (string->symbol
- (format #f
- "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
- "http://string-db.org/newstring_cgi/show_network_section.pl?identifier="
- symbol
- "a gnc:stringLink"))
- "")))
- ;; GTEX link
- (set dct:references
- (let ((symbol (field ProbeSet Symbol)))
- (if (not (string-blank? symbol))
- (string->symbol
- (format #f
- "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
- "https://www.gtexportal.org/home/gene/"
- (string-trim-both symbol)
- "a gnc:gtexLink"))
- "")))
- ;; EBI GWAS Link
- (set dct:references
- (let ((symbol (field ProbeSet Symbol)))
- (if (not (string-blank? symbol))
- (string->symbol
- (format #f
- "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
- "https://www.ebi.ac.uk/gwas/search?query="
- (string-trim-both symbol)
- "a gnc:ebiGwasLink"))
- "")))
- ;; Protein Atlas Link
- (set dct:references
- (let ((symbol (field ProbeSet Symbol)))
- (if (not (string-blank? symbol))
- (string->symbol
- (format #f
- "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
- "http://www.proteinatlas.org/search/"
- (string-trim-both symbol)
- "a gnc:proteinAtlasLink"))
- "")))
- ;; UCS Link
- (set dct:references
- (let* ((symbol (field ProbeSet Symbol))
- (species (field Species Name))
- (db (if (string=? species "mouse")
- "mm10" "rn7"))
- (transcriptId (field ProbeSet RefSeq_TranscriptId))
- (kgId (field GeneList_rn33 kgId))
- (transcriptStart
- (if (string=? species "mouse")
- (field ("(GeneList.txStart * 1000000)" TranscriptStartMm10))
- (field ("(GeneList_rn33.txStart * 1000000)" TranscriptStartRn7))))
- (chromosome
- (if (string=? species "mouse")
- (field GeneList Chromosome)
- (field GeneList_rn33 Chromosome)))
- (transcriptEnd
- (if (string=? species "mouse")
- (field ("(GeneList.txEnd * 1000000)" TranscriptEndMm10))
- (field ("(GeneList_rn33.txEnd * 1000000)" TranscriptEndRn7))))
- (url (format
- #f
- "http://genome.cse.ucsc.edu/cgi-bin/hgTracks?db=~a&hgg_gene="
- db)))
- (if (and (not (string-blank? symbol))
- (not (string-blank? transcriptId))
- (number? transcriptStart)
- (number? transcriptEnd)
- (not (string-blank? chromosome))
- (or (string=? species "mouse")
- (string=? species "rat")))
- (string->symbol
- (format #f
- "<~0@*~a~1@*~a&hgg_chrom=chr~2@*~a&hgg_start=~3@*~a&hgg_end=~4@*~a> .~%<~0@*~a~1@*~a&hgg_chrom=chr~2@*~a&hgg_start=~3@*~a&hgg_end=~4@*~a> ~2@*~a"
- url
- transcriptId
- chromosome
- transcriptStart
- transcriptEnd
- "a gnc:ucsRefSeqLink"
- ))
- "")))
- ;; PANTHER link
- (set dct:references
- (let ((symbol (field ProbeSet Symbol)))
- (if (not (string-blank? symbol))
- (string->symbol
- (format #f
- "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
- "http://www.pantherdb.org/genes/geneList.do?searchType=basic&fieldName=all&organism=all&listType=1&fieldValue="
- (string-trim-both symbol)
- "a gnc:PantherLink"))
- "")))
- ;; A GeneManiaLink
- (set dct:references
- (let ((symbol (field ProbeSet GeneId))
- (species (lower-case-and-replace-spaces
- (field Species FullName))))
- (if (and (not (string-blank? symbol))
- (not (string-blank? species))
- (or
- (string=? species "mus-musculus")
- (string=? species "rattus-norvegicus")
- (string=? species "homo-sapiens")))
- (string->symbol
- (format #f "<~0@*~a/~1@*~a/~2@*~a> .~%<~0@*~a/~1@*~a/~2@*~a> ~3@*~a"
- "https://genemania.org/search" species (string-trim-both symbol)
- "a gnc:genemaniaLink"))
- "")))
- ;; ABA Link
- (set dct:references
- (let ((symbol (field ProbeSet Symbol))
- (geneId (field ProbeSet GeneId))
- (species (field Species name)))
- (if (and (not (string-blank? symbol))
- (not (string-blank? species))
- (or (string=? species "human")
- (string=? species "mouse")))
- (string->symbol
- (format #f "<~0@*~a> .~%<~0@*~a> ~2@*~a"
- "http://mouse.brain-map.org/search/show?search_type=gene&search_term="
- "a "
- (if (string=? species "mouse")
- (string-trim-both symbol)
- geneId)))
- "")))
- ;; Gemma Link
- (set dct:references
- (let ((geneId (field ProbeSet GeneId)))
- (if (not (string-blank? geneId))
- (string->symbol
- (format #f
- "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
- "http://www.chibi.ubc.ca/Gemma/gene/showGene.html?ncbiid="
- geneId
- "a gnc:gemmaLink"))
- "")))
- ;; BioGPS Link
- (set dct:references
- (let ((geneId (field ProbeSet GeneId))
- (species (field Species Name)))
- (if (and (not (string-blank? geneId))
- (not (string-blank? species))
- (or
- (string=? species "mouse")
- (string=? species "rat")
- (string=? species "humans")))
- (string->symbol
- (format #f
- "<~0@*~a~1@*~a~2@*~a~3@*~a> .~%<~0@*~a~1@*~a~2@*~a~3@*~a> ~4@*~a ; ~5@*~a"
- "http://biogps.org/?org="
- species
- "#goto=genereport&id="
- geneId
- "a gnc:ResourceLink"))
- "")))
(set gnt:strandProbe
(field ProbeSet Strand_Probe))
(set gnt:hasSpecificity