aboutsummaryrefslogtreecommitdiff
path: root/examples/dump-genotype.scm
diff options
context:
space:
mode:
authorMunyoki Kilyungi2023-07-19 17:43:13 +0300
committerMunyoki Kilyungi2023-07-21 14:36:43 +0300
commit0b7f3cd96c1db6c535f35e73fc8126542a0301cd (patch)
treee96345b256edd89cd892b0881c6ef5932763395f /examples/dump-genotype.scm
parent50fd5b4a9f2b4c687a59ac94260ab31789aceb00 (diff)
downloadgn-transform-databases-0b7f3cd96c1db6c535f35e73fc8126542a0301cd.tar.gz
Dump genotypes with the new syntax
Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
Diffstat (limited to 'examples/dump-genotype.scm')
-rwxr-xr-xexamples/dump-genotype.scm119
1 files changed, 61 insertions, 58 deletions
diff --git a/examples/dump-genotype.scm b/examples/dump-genotype.scm
index 1be1d34..0fbbbfe 100755
--- a/examples/dump-genotype.scm
+++ b/examples/dump-genotype.scm
@@ -18,9 +18,6 @@
(call-with-input-file (list-ref (command-line) 1)
read))
-(define %dump-directory
- (list-ref (command-line) 2))
-
(define-dump dump-genofreeze
@@ -29,24 +26,30 @@
(left-join InbredSet "ON GenoFreeze.InbredSetId = InbredSet.InbredSetId"))
"WHERE GenoFreeze.public > 0 AND GenoFreeze.confidentiality < 1 AND InfoFiles.InfoPageName IS NULL")
(schema-triples
- (gn:datasetOfInbredSet rdfs:range gn:inbredSet)
+ (gn-term:datasetOfInbredSet rdfs:range gn:inbredSet)
(gn:genotypeDataset rdfs:subPropertyOf gn:dataset)
- (gn:shortName rdfs:range rdfs:Literal))
- (triples (ontology
- 'dataset:
- (regexp-substitute/global
- #f "[^A-Za-z0-9:]"
- (field GenoFreeze Name)
- 'pre "_" 'post))
+ (gn-term:shortName rdfs:range rdfs:Literal))
+ (triples
+ (string->identifier
+ ""
+ (regexp-substitute/global
+ #f "[^A-Za-z0-9:]"
+ (regexp-substitute/global
+ #f "[^A-Za-z0-9:]"
+ (field GenoFreeze Name)
+ 'pre "_" 'post)
+ 'pre "_" 'post)
+ #:separator ""
+ #:proc string-capitalize-first)
(set rdf:type 'gn:genotypeDataset)
- (set gn:name (field GenoFreeze Name))
- (set gn:fullName (field GenoFreeze FullName))
- (set gn:shortName (field GenoFreeze ShortName))
+ (set gn-term:name (field GenoFreeze Name))
+ (set gn-term:fullName (field GenoFreeze FullName))
+ (set gn-term:shortName (field GenoFreeze ShortName))
(set dct:created (annotate-field
(field GenoFreeze CreateTime)
'^^xsd:date))
- (set gn:datasetOfInbredSet
- (string->identifier "inbredSet" (field InbredSet Name InbredSetName)))))
+ (set gn-term:datasetOfInbredSet
+ (string->identifier "" (field InbredSet Name InbredSetName)))))
(define-dump dump-genotypes
(tables (Geno
@@ -54,60 +57,60 @@
(left-join GenoFreeze "ON GenoFreeze.Id = GenoXRef.GenoFreezeId")
(left-join InfoFiles "ON InfoFiles.InfoPageName = GenoFreeze.Name")))
(schema-triples
- (gn:genotypeDataset rdfs:subPropertyOf gn:dataset))
+ (gn:genotype rdfs:range rdfs:Literal)
+ (gn-term:genotypeDataset rdfs:subPropertyOf gn:dataset))
(triples
- (ontology
- 'genotype:
+ (string->identifier
+ ""
(regexp-substitute/global
#f "[^A-Za-z0-9:]"
(field ("CONCAT(IF(GenoFreeze.Name IS NULL, '', CONCAT(GenoFreeze.Name, ':')), Geno.Name)" abbrev))
- 'pre "_" 'post))
+ 'pre "_" 'post)
+ #:separator ""
+ #:proc string-capitalize-first)
(set rdf:type 'gn:genotype)
- (set gn:name (sanitize-rdf-string (field Geno Name)))
- (set gn:markerName (sanitize-rdf-string (field Geno Marker_Name)))
- (set gn:chr (field Geno Chr))
- (set gn:mb (annotate-field (field ("IFNULL(Geno.Mb, '')" Mb)) '^^xsd:double))
- (set gn:sequence (annotate-field (field Geno Sequence) '^^xsd:int))
- (set gn:source (field Geno Source))
- (set gn:source2 (field Geno Source2))
- (set gn:genotypeOfDataset
- (ontology 'dataset:
- (regexp-substitute/global
- #f "[^A-Za-z0-9:]"
- (field ("IFNULL(GenoFreeze.Name, '')" DatasetName))
- 'pre "_" 'post)))
- (set gn:chrNum
+ (set gn-term:name (sanitize-rdf-string (field Geno Name)))
+ (set gn-term:markerName (sanitize-rdf-string (field Geno Marker_Name)))
+ (set gn-term:chr (field Geno Chr))
+ (set gn-term:mb (annotate-field (field ("IFNULL(Geno.Mb, '')" Mb)) '^^xsd:double))
+ (set gn-term:sequence (field Geno Sequence))
+ (set gn-term:source (field Geno Source))
+ (set gn-term:source2 (field Geno Source2))
+ (set gn-term:genotypeOfDataset
+ (string->identifier
+ ""
+ (regexp-substitute/global
+ #f "[^A-Za-z0-9:]"
+ (field ("IFNULL(GenoFreeze.Name, '')" DatasetName))
+ 'pre "_" 'post)
+ #:separator ""
+ #:proc string-capitalize-first)
+ )
+ (set gn-term:chrNum
(annotate-field
(field ("IFNULL(Geno.chr_num, '')" chr_num))
'^^xsd:int))
(set gn:comments (field ("CAST(CONVERT(BINARY CONVERT(Geno.Comments USING latin1) USING utf8) AS VARCHAR(255))" Comments)))
- (set gn:cM
+ (set gn-term:cM
(annotate-field
(field ("IFNULL(GenoXRef.cM, '')" Chr_mm8))
'^^xsd:int))))
-(call-with-target-database
- %connection-settings
- (lambda (db)
- (with-output-to-file (string-append %dump-directory "dump-genotype.ttl")
- (lambda ()
- (prefix "dct:" "<http://purl.org/dc/terms/>")
- (prefix "foaf:" "<http://xmlns.com/foaf/0.1/>")
- (prefix "generif:" "<http://www.ncbi.nlm.nih.gov/gene?cmd=Retrieve&dopt=Graphics&list_uids=>")
- (prefix "gn:" "<http://genenetwork.org/>")
- (prefix "owl:" "<http://www.w3.org/2002/07/owl#>")
- (prefix "phenotype:" "<http://genenetwork.org/phenotype/>")
- (prefix "pubmed:" "<http://rdf.ncbi.nlm.nih.gov/pubmed/>")
- (prefix "rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>")
- (prefix "rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>")
- (prefix "uniprot:" "<http://purl.uniprot.org/uniprot/>")
- (prefix "up:" "<http://purl.uniprot.org/core/>")
- (prefix "xsd:" "<http://www.w3.org/2001/XMLSchema#>")
- (prefix "genotype:" "<http://genenetwork.org/genotype/>")
- (prefix "dataset:" "<http://genenetwork.org/dataset/>")
- (newline)
- (dump-genofreeze db)
- (dump-genotypes db))
- #:encoding "utf8")))
+(dump-with-documentation
+ (name "Genotype Metadata")
+ (connection %connection-settings)
+ (table-metadata? #f)
+ (prefixes
+ '(("gn:" "<http://genenetwork.org/id/>")
+ ("gn-term:" "<http://genenetwork.org/term/>")
+ ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>")
+ ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>")
+ ("xsd:" "<http://www.w3.org/2001/XMLSchema#>")))
+ (inputs
+ (list dump-genofreeze
+ dump-genotypes))
+ (outputs
+ '(#:documentation "./docs/dump-genotype.md"
+ #:rdf "./verified-data/dump-genotype.ttl")))