From 2180d25701a2ab56f7fb0a20f6e113c23c6fcb25 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Thu, 17 Aug 2023 18:23:29 +0300 Subject: Update genotype rdf transformation Signed-off-by: Munyoki Kilyungi --- examples/dump-genotype.scm | 40 +++++++++++++++++----------------------- 1 file changed, 17 insertions(+), 23 deletions(-) diff --git a/examples/dump-genotype.scm b/examples/dump-genotype.scm index ed23e80..1024b90 100755 --- a/examples/dump-genotype.scm +++ b/examples/dump-genotype.scm @@ -20,6 +20,16 @@ +(define (remap-species-identifiers str) + "This procedure remaps identifiers to standard binominal. Obviously this should + be sorted by correcting the database!" + (match str + ["Fly (Drosophila melanogaster dm6)" "Drosophila melanogaster"] + ["Oryzias latipes (Japanese medaka)" "Oryzias latipes"] + ["Macaca mulatta" "Macaca nemestrina"] + ["Bat (Glossophaga soricina)" "Glossophaga soricina"] + [str str])) + (define-dump dump-genofreeze (tables (GenoFreeze (left-join InfoFiles "ON InfoFiles.InfoPageName = GenoFreeze.Name") @@ -47,12 +57,10 @@ (set gnt:belongsToInbredSet (string->identifier "" (field InbredSet Name InbredSetName))))) + (define-dump dump-genotypes (tables (Geno - (left-join GenoXRef "ON Geno.Id = GenoXRef.GenoId") - (left-join GenoFreeze "ON GenoFreeze.Id = GenoXRef.GenoFreezeId") - (left-join InbredSet "ON InbredSet.InbredSetId = GenoFreeze.InbredSetId") - (left-join InfoFiles "ON InfoFiles.InfoPageName = GenoFreeze.Name"))) + (left-join Species "USING (SpeciesId)"))) (schema-triples (gnc:genotype a skos:Concept) (gnc:genotype @@ -84,19 +92,13 @@ (gnt:chrNum a owl:ObjectProperty) (gnt:chrNum rdfs:domain gnc:genotype) (gnt:chrNum skos:definition "The chromosome number for this resource") - (gnt:cM a owl:ObjectProperty) - (gnt:cM rdfs:domain gnc:genotype) - (gnt:cM skos:definition "The centimorgan for this resource") - (gnt:usedForMapping a owl:ObjectProperty) - (gnt:usedForMapping rdfs:domain gnc:genotype) - (gnt:usedForMapping - skos:definition "This indicates whether this resource is used for mapping")) + (gnt:chrNum skos:definition "The chromosome number for this resource")) (triples (string->identifier "" (regexp-substitute/global #f "[^A-Za-z0-9:]" - (field ("CONCAT(IF(GenoFreeze.Name IS NULL, '', CONCAT(GenoFreeze.Name, '_')), Geno.Name)" abbrev)) + (field Geno Name) 'pre "_" 'post) #:separator "" #:proc string-capitalize-first) @@ -116,24 +118,16 @@ (set gnt:hasAltSourceName (field ("IF((Source2 = Source), NULL, Source2)" Source2))) - (set gnt:belongsToDataset + (set gnt:belongsToSpecies (string->identifier - "" - (regexp-substitute/global - #f "[^A-Za-z0-9:]" - (field ("IFNULL(GenoFreeze.Name, '')" DatasetName)) - 'pre "_" 'post) + "" (remap-species-identifiers (field Species Fullname)) #:separator "" #:proc string-capitalize-first)) (set gnt:chrNum (annotate-field (field Geno chr_num) '^^xsd:int)) - (set rdfs:comments (field Geno Comments)) - (set gnt:cM - (annotate-field - (field GenoXRef cM) - '^^xsd:int)))) + (set rdfs:comments (field Geno Comments)))) -- cgit v1.2.3