aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMunyoki Kilyungi2023-08-17 18:23:29 +0300
committerMunyoki Kilyungi2023-08-17 18:23:29 +0300
commit2180d25701a2ab56f7fb0a20f6e113c23c6fcb25 (patch)
tree30b5dec29664d50a090eefa8e0dfc788cde18816
parent2684d58401c694e30551f424eb2404f3a5674769 (diff)
downloadgn-transform-databases-2180d25701a2ab56f7fb0a20f6e113c23c6fcb25.tar.gz
Update genotype rdf transformation
Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
-rwxr-xr-xexamples/dump-genotype.scm40
1 files changed, 17 insertions, 23 deletions
diff --git a/examples/dump-genotype.scm b/examples/dump-genotype.scm
index ed23e80..1024b90 100755
--- a/examples/dump-genotype.scm
+++ b/examples/dump-genotype.scm
@@ -20,6 +20,16 @@
+(define (remap-species-identifiers str)
+ "This procedure remaps identifiers to standard binominal. Obviously this should
+ be sorted by correcting the database!"
+ (match str
+ ["Fly (Drosophila melanogaster dm6)" "Drosophila melanogaster"]
+ ["Oryzias latipes (Japanese medaka)" "Oryzias latipes"]
+ ["Macaca mulatta" "Macaca nemestrina"]
+ ["Bat (Glossophaga soricina)" "Glossophaga soricina"]
+ [str str]))
+
(define-dump dump-genofreeze
(tables (GenoFreeze
(left-join InfoFiles "ON InfoFiles.InfoPageName = GenoFreeze.Name")
@@ -47,12 +57,10 @@
(set gnt:belongsToInbredSet
(string->identifier "" (field InbredSet Name InbredSetName)))))
+
(define-dump dump-genotypes
(tables (Geno
- (left-join GenoXRef "ON Geno.Id = GenoXRef.GenoId")
- (left-join GenoFreeze "ON GenoFreeze.Id = GenoXRef.GenoFreezeId")
- (left-join InbredSet "ON InbredSet.InbredSetId = GenoFreeze.InbredSetId")
- (left-join InfoFiles "ON InfoFiles.InfoPageName = GenoFreeze.Name")))
+ (left-join Species "USING (SpeciesId)")))
(schema-triples
(gnc:genotype a skos:Concept)
(gnc:genotype
@@ -84,19 +92,13 @@
(gnt:chrNum a owl:ObjectProperty)
(gnt:chrNum rdfs:domain gnc:genotype)
(gnt:chrNum skos:definition "The chromosome number for this resource")
- (gnt:cM a owl:ObjectProperty)
- (gnt:cM rdfs:domain gnc:genotype)
- (gnt:cM skos:definition "The centimorgan for this resource")
- (gnt:usedForMapping a owl:ObjectProperty)
- (gnt:usedForMapping rdfs:domain gnc:genotype)
- (gnt:usedForMapping
- skos:definition "This indicates whether this resource is used for mapping"))
+ (gnt:chrNum skos:definition "The chromosome number for this resource"))
(triples
(string->identifier
""
(regexp-substitute/global
#f "[^A-Za-z0-9:]"
- (field ("CONCAT(IF(GenoFreeze.Name IS NULL, '', CONCAT(GenoFreeze.Name, '_')), Geno.Name)" abbrev))
+ (field Geno Name)
'pre "_" 'post)
#:separator ""
#:proc string-capitalize-first)
@@ -116,24 +118,16 @@
(set gnt:hasAltSourceName
(field ("IF((Source2 = Source), NULL, Source2)"
Source2)))
- (set gnt:belongsToDataset
+ (set gnt:belongsToSpecies
(string->identifier
- ""
- (regexp-substitute/global
- #f "[^A-Za-z0-9:]"
- (field ("IFNULL(GenoFreeze.Name, '')" DatasetName))
- 'pre "_" 'post)
+ "" (remap-species-identifiers (field Species Fullname))
#:separator ""
#:proc string-capitalize-first))
(set gnt:chrNum
(annotate-field
(field Geno chr_num)
'^^xsd:int))
- (set rdfs:comments (field Geno Comments))
- (set gnt:cM
- (annotate-field
- (field GenoXRef cM)
- '^^xsd:int))))
+ (set rdfs:comments (field Geno Comments))))