about summary refs log tree commit diff
diff options
context:
space:
mode:
authorMunyoki Kilyungi2023-07-19 17:43:13 +0300
committerMunyoki Kilyungi2023-07-21 14:36:43 +0300
commit0b7f3cd96c1db6c535f35e73fc8126542a0301cd (patch)
treee96345b256edd89cd892b0881c6ef5932763395f
parent50fd5b4a9f2b4c687a59ac94260ab31789aceb00 (diff)
downloadgn-transform-databases-0b7f3cd96c1db6c535f35e73fc8126542a0301cd.tar.gz
Dump genotypes with the new syntax
Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
-rwxr-xr-xexamples/dump-genotype.scm119
1 files changed, 61 insertions, 58 deletions
diff --git a/examples/dump-genotype.scm b/examples/dump-genotype.scm
index 1be1d34..0fbbbfe 100755
--- a/examples/dump-genotype.scm
+++ b/examples/dump-genotype.scm
@@ -18,9 +18,6 @@
   (call-with-input-file (list-ref (command-line) 1)
     read))
 
-(define %dump-directory
-  (list-ref (command-line) 2))
-
 
 
 (define-dump dump-genofreeze
@@ -29,24 +26,30 @@
            (left-join InbredSet "ON GenoFreeze.InbredSetId = InbredSet.InbredSetId"))
           "WHERE GenoFreeze.public > 0 AND GenoFreeze.confidentiality < 1 AND InfoFiles.InfoPageName IS NULL")
   (schema-triples
-   (gn:datasetOfInbredSet rdfs:range gn:inbredSet)
+   (gn-term:datasetOfInbredSet rdfs:range gn:inbredSet)
    (gn:genotypeDataset rdfs:subPropertyOf gn:dataset)
-   (gn:shortName rdfs:range rdfs:Literal))
-  (triples (ontology
-            'dataset:
-            (regexp-substitute/global
-             #f "[^A-Za-z0-9:]"
-             (field GenoFreeze Name)
-             'pre "_" 'post))
+   (gn-term:shortName rdfs:range rdfs:Literal))
+  (triples
+      (string->identifier
+       ""
+       (regexp-substitute/global
+        #f "[^A-Za-z0-9:]"
+        (regexp-substitute/global
+         #f "[^A-Za-z0-9:]"
+         (field GenoFreeze Name)
+         'pre "_" 'post)
+        'pre "_" 'post)
+       #:separator ""
+       #:proc string-capitalize-first)
     (set rdf:type 'gn:genotypeDataset)
-    (set gn:name (field GenoFreeze Name))
-    (set gn:fullName (field GenoFreeze FullName))
-    (set gn:shortName (field GenoFreeze ShortName))
+    (set gn-term:name (field GenoFreeze Name))
+    (set gn-term:fullName (field GenoFreeze FullName))
+    (set gn-term:shortName (field GenoFreeze ShortName))
     (set dct:created (annotate-field
                       (field GenoFreeze CreateTime)
                       '^^xsd:date))
-    (set gn:datasetOfInbredSet
-         (string->identifier "inbredSet" (field InbredSet Name InbredSetName)))))
+    (set gn-term:datasetOfInbredSet
+         (string->identifier "" (field InbredSet Name InbredSetName)))))
 
 (define-dump dump-genotypes
   (tables (Geno
@@ -54,60 +57,60 @@
            (left-join GenoFreeze "ON GenoFreeze.Id = GenoXRef.GenoFreezeId")
            (left-join InfoFiles "ON InfoFiles.InfoPageName = GenoFreeze.Name")))
   (schema-triples
-   (gn:genotypeDataset rdfs:subPropertyOf gn:dataset))
+   (gn:genotype rdfs:range rdfs:Literal)
+   (gn-term:genotypeDataset rdfs:subPropertyOf gn:dataset))
   (triples
-      (ontology
-       'genotype:
+      (string->identifier
+       ""
        (regexp-substitute/global
         #f "[^A-Za-z0-9:]"
         (field ("CONCAT(IF(GenoFreeze.Name IS NULL, '', CONCAT(GenoFreeze.Name, ':')), Geno.Name)" abbrev))
-        'pre "_" 'post))
+        'pre "_" 'post)
+       #:separator ""
+       #:proc string-capitalize-first)
     (set rdf:type 'gn:genotype)
-    (set gn:name (sanitize-rdf-string (field Geno Name)))
-    (set gn:markerName (sanitize-rdf-string (field Geno Marker_Name)))
-    (set gn:chr (field Geno Chr))
-    (set gn:mb (annotate-field (field ("IFNULL(Geno.Mb, '')" Mb)) '^^xsd:double))
-    (set gn:sequence (annotate-field (field Geno Sequence) '^^xsd:int))
-    (set gn:source (field Geno Source))
-    (set gn:source2 (field Geno Source2))
-    (set gn:genotypeOfDataset
-         (ontology 'dataset:
-                   (regexp-substitute/global
-                    #f "[^A-Za-z0-9:]"
-                    (field ("IFNULL(GenoFreeze.Name, '')" DatasetName))
-                    'pre "_" 'post)))
-    (set gn:chrNum
+    (set gn-term:name (sanitize-rdf-string (field Geno Name)))
+    (set gn-term:markerName (sanitize-rdf-string (field Geno Marker_Name)))
+    (set gn-term:chr (field Geno Chr))
+    (set gn-term:mb (annotate-field (field ("IFNULL(Geno.Mb, '')" Mb)) '^^xsd:double))
+    (set gn-term:sequence (field Geno Sequence))
+    (set gn-term:source (field Geno Source))
+    (set gn-term:source2 (field Geno Source2))
+    (set gn-term:genotypeOfDataset
+         (string->identifier
+          ""
+          (regexp-substitute/global
+                     #f "[^A-Za-z0-9:]"
+                     (field ("IFNULL(GenoFreeze.Name, '')" DatasetName))
+                     'pre "_" 'post)
+          #:separator ""
+          #:proc string-capitalize-first)
+         )
+    (set gn-term:chrNum
          (annotate-field
           (field ("IFNULL(Geno.chr_num, '')" chr_num))
           '^^xsd:int))
     (set gn:comments (field ("CAST(CONVERT(BINARY CONVERT(Geno.Comments USING latin1) USING utf8) AS VARCHAR(255))" Comments)))
-    (set gn:cM
+    (set gn-term:cM
          (annotate-field
           (field ("IFNULL(GenoXRef.cM, '')" Chr_mm8))
           '^^xsd:int))))
 
 
 
-(call-with-target-database
- %connection-settings
- (lambda (db)
-   (with-output-to-file (string-append %dump-directory "dump-genotype.ttl")
-     (lambda ()
-       (prefix "dct:" "<http://purl.org/dc/terms/>")
-       (prefix "foaf:" "<http://xmlns.com/foaf/0.1/>")
-       (prefix "generif:" "<http://www.ncbi.nlm.nih.gov/gene?cmd=Retrieve&dopt=Graphics&list_uids=>")
-       (prefix "gn:" "<http://genenetwork.org/>")
-       (prefix "owl:" "<http://www.w3.org/2002/07/owl#>")
-       (prefix "phenotype:" "<http://genenetwork.org/phenotype/>")
-       (prefix "pubmed:" "<http://rdf.ncbi.nlm.nih.gov/pubmed/>")
-       (prefix "rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>")
-       (prefix "rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>")
-       (prefix "uniprot:" "<http://purl.uniprot.org/uniprot/>")
-       (prefix "up:" "<http://purl.uniprot.org/core/>")
-       (prefix "xsd:" "<http://www.w3.org/2001/XMLSchema#>")
-       (prefix "genotype:" "<http://genenetwork.org/genotype/>")
-       (prefix "dataset:" "<http://genenetwork.org/dataset/>")
-       (newline)
-       (dump-genofreeze db)
-       (dump-genotypes db))
-     #:encoding "utf8")))
+(dump-with-documentation
+ (name "Genotype Metadata")
+ (connection %connection-settings)
+ (table-metadata? #f)
+ (prefixes
+  '(("gn:" "<http://genenetwork.org/id/>")
+    ("gn-term:" "<http://genenetwork.org/term/>")
+    ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>")
+    ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>")
+    ("xsd:" "<http://www.w3.org/2001/XMLSchema#>")))
+ (inputs
+  (list dump-genofreeze
+        dump-genotypes))
+ (outputs
+  '(#:documentation "./docs/dump-genotype.md"
+    #:rdf "./verified-data/dump-genotype.ttl")))