about summary refs log tree commit diff
diff options
context:
space:
mode:
authorMunyoki Kilyungi2023-08-17 18:23:29 +0300
committerMunyoki Kilyungi2023-08-17 18:23:29 +0300
commit2180d25701a2ab56f7fb0a20f6e113c23c6fcb25 (patch)
tree30b5dec29664d50a090eefa8e0dfc788cde18816
parent2684d58401c694e30551f424eb2404f3a5674769 (diff)
downloadgn-transform-databases-2180d25701a2ab56f7fb0a20f6e113c23c6fcb25.tar.gz
Update genotype rdf transformation
Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
-rwxr-xr-xexamples/dump-genotype.scm40
1 files changed, 17 insertions, 23 deletions
diff --git a/examples/dump-genotype.scm b/examples/dump-genotype.scm
index ed23e80..1024b90 100755
--- a/examples/dump-genotype.scm
+++ b/examples/dump-genotype.scm
@@ -20,6 +20,16 @@
 
 
 
+(define (remap-species-identifiers str)
+  "This procedure remaps identifiers to standard binominal. Obviously this should
+   be sorted by correcting the database!"
+  (match str
+    ["Fly (Drosophila melanogaster dm6)" "Drosophila melanogaster"]
+    ["Oryzias latipes (Japanese medaka)" "Oryzias latipes"]
+    ["Macaca mulatta" "Macaca nemestrina"]
+    ["Bat (Glossophaga soricina)" "Glossophaga soricina"]
+    [str str]))
+
 (define-dump dump-genofreeze
   (tables (GenoFreeze
            (left-join InfoFiles "ON InfoFiles.InfoPageName = GenoFreeze.Name")
@@ -47,12 +57,10 @@
     (set gnt:belongsToInbredSet
          (string->identifier "" (field InbredSet Name InbredSetName)))))
 
+
 (define-dump dump-genotypes
   (tables (Geno
-           (left-join GenoXRef "ON Geno.Id = GenoXRef.GenoId")
-           (left-join GenoFreeze "ON GenoFreeze.Id = GenoXRef.GenoFreezeId")
-           (left-join InbredSet "ON InbredSet.InbredSetId = GenoFreeze.InbredSetId")
-           (left-join InfoFiles "ON InfoFiles.InfoPageName = GenoFreeze.Name")))
+           (left-join Species "USING (SpeciesId)")))
   (schema-triples
    (gnc:genotype a skos:Concept)
    (gnc:genotype
@@ -84,19 +92,13 @@
    (gnt:chrNum a owl:ObjectProperty)
    (gnt:chrNum rdfs:domain gnc:genotype)
    (gnt:chrNum skos:definition "The chromosome number for this resource")
-   (gnt:cM a owl:ObjectProperty)
-   (gnt:cM rdfs:domain gnc:genotype)
-   (gnt:cM skos:definition "The centimorgan for this resource")
-   (gnt:usedForMapping a owl:ObjectProperty)
-   (gnt:usedForMapping rdfs:domain gnc:genotype)
-   (gnt:usedForMapping
-    skos:definition "This indicates whether this resource is used for mapping"))
+   (gnt:chrNum skos:definition "The chromosome number for this resource"))
   (triples
       (string->identifier
        ""
        (regexp-substitute/global
         #f "[^A-Za-z0-9:]"
-        (field ("CONCAT(IF(GenoFreeze.Name IS NULL, '', CONCAT(GenoFreeze.Name, '_')), Geno.Name)" abbrev))
+        (field Geno Name)
         'pre "_" 'post)
        #:separator ""
        #:proc string-capitalize-first)
@@ -116,24 +118,16 @@
     (set gnt:hasAltSourceName
          (field ("IF((Source2 = Source), NULL, Source2)"
                  Source2)))
-    (set gnt:belongsToDataset
+    (set gnt:belongsToSpecies
          (string->identifier
-          ""
-          (regexp-substitute/global
-           #f "[^A-Za-z0-9:]"
-           (field ("IFNULL(GenoFreeze.Name, '')" DatasetName))
-           'pre "_" 'post)
+          "" (remap-species-identifiers (field Species Fullname))
           #:separator ""
           #:proc string-capitalize-first))
     (set gnt:chrNum
          (annotate-field
           (field Geno chr_num)
           '^^xsd:int))
-    (set rdfs:comments (field Geno Comments))
-    (set gnt:cM
-         (annotate-field
-          (field GenoXRef cM)
-          '^^xsd:int))))
+    (set rdfs:comments (field Geno Comments))))