about summary refs log tree commit diff
diff options
context:
space:
mode:
authorMunyoki Kilyungi2023-07-19 16:41:36 +0300
committerBonfaceKilz2023-07-30 12:29:56 +0300
commit381acf546900c74a907bc56e236de4fece953869 (patch)
tree10840b253384587cf2cf982743b440aa75ac063e
parent47a2707a4d5fa33f3d9339c43ef28e96b116ea37 (diff)
downloadgn-transform-databases-381acf546900c74a907bc56e236de4fece953869.tar.gz
Use "gn:" and "gn-terms" prefixes
Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
-rwxr-xr-xexamples/dump-dataset-metadata.scm182
1 files changed, 92 insertions, 90 deletions
diff --git a/examples/dump-dataset-metadata.scm b/examples/dump-dataset-metadata.scm
index 53c381c..c51364a 100755
--- a/examples/dump-dataset-metadata.scm
+++ b/examples/dump-dataset-metadata.scm
@@ -52,11 +52,11 @@
    (foaf:givenName rdfs:range rdfs:Literal)
    (foaf:familyName rdfs:range rdfs:Literal)
    (foaf:homepage rdfs:range rdfs:Literal)
-   (gn:address rdfs:range rdfs:Literal)
-   (gn:city rdfs:range rdfs:Literal)
-   (gn:state rdfs:range rdfs:Literal)
-   (gn:zipCode rdfs:range rdfs:Literal)
-   (gn:country rdfs:range rdfs:Literal))
+   (gn-term:address rdfs:range rdfs:Literal)
+   (gn-term:city rdfs:range rdfs:Literal)
+   (gn-term:state rdfs:range rdfs:Literal)
+   (gn-term:zipCode rdfs:range rdfs:Literal)
+   (gn-term:country rdfs:range rdfs:Literal))
   (triples (investigator-attributes->id (field Investigators FirstName)
                                         (field Investigators LastName)
                                         (field Investigators Email))
@@ -70,11 +70,11 @@
     (set foaf:familyName
          (field ("CAST(CONVERT(BINARY CONVERT(LastName USING latin1) USING utf8) AS VARCHAR(100))" LastName)))
     (set foaf:homepage (field Investigators Url))
-    (set gn:address (field Investigators Address))
-    (set gn:city (field Investigators City))
-    (set gn:state (field Investigators State))
-    (set gn:zipCode (field Investigators ZipCode))
-    (set gn:country (field Investigators Country))))
+    (set gn-term:address (field Investigators Address))
+    (set gn-term:city (field Investigators City))
+    (set gn-term:state (field Investigators State))
+    (set gn-term:zipCode (field Investigators ZipCode))
+    (set gn-term:country (field Investigators Country))))
 
 (define-dump dump-info-files
   (tables (InfoFiles
@@ -92,123 +92,125 @@
            (left-join GeneChip "USING (GeneChipId)"))
           "WHERE GN_AccesionId IS NOT NULL")
   (schema-triples
-   (gn:dataset rdfs:range rdfs:Literal)
-   (gn:datasetOfInvestigator rdfs:domain gn:dataset)
-   (gn:datasetOfOrganization rdfs:domain gn:dataset)
-   (gn:datasetOfInvestigator rdfs:range foaf:Person)
-   (gn:datasetOfInbredSet rdfs:domain gn:dataset)
-   (gn:datasetOfInbredSet rdfs:range gn:inbredSet)
-   (gn:datasetOfSpecies rdfs:domain gn:dataset)
-   (gn:datasetOfSpecies rdfs:range gn:inbredSet)
-   (gn:datasetOfTissue rdfs:domain gn:dataset)
-   (gn:datasetOfTissue rdfs:range gn:tissue)
-   (gn:normalization rdfs:domain gn:dataset)
-   (gn:normalization rdfs:range gn:avgMethod)
-   (gn:datasetOfPlatform rdfs:domain gn:dataset)
-   (gn:datasetOfPlatform rdfs:range gn:geneChip)
-   (gn:accessionId rdfs:range rdfs:Literal)
-   (gn:datasetStatusName rdfs:range rdfs:Literal)
-   (gn:summary rdfs:range rdfs:Literal)
-   (gn:aboutTissue rdfs:range rdfs:Literal)
-   (gn:geoSeries rdfs:range rdfs:Literal)
-   (gn:name rdfs:range rdfs:Literal)
-   (gn:title rdfs:range rdfs:Literal)
-   (gn:publicationTitle rdfs:range rdfs:Literal)
-   (gn:specifics rdfs:range rdfs:Literal)
-   (gn:datasetGroup rdfs:range rdfs:Literal)
-   (gn:aboutCases rdfs:range rdfs:Literal)
-   (gn:aboutPlatform rdfs:range rdfs:Literal)
-   (gn:aboutDataProcessing rdfs:range rdfs:Literal)
-   (gn:notes rdfs:range rdfs:Literal)
-   (gn:experimentDesign rdfs:range rdfs:Literal)
-   (gn:contributors rdfs:range rdfs:Literal)
-   (gn:citation rdfs:range rdfs:Literal)
-   (gn:acknowledgment rdfs:range rdfs:Literal))
-  (triples (ontology 'dataset:
-                     (regexp-substitute/global #f "[^A-Za-z0-9:]"
-                                               (field InfoFiles InfoPageName)
-                                               'pre "_" 'post))
+   (gn-term:dataset rdfs:range rdfs:Literal)
+   (gn-term:datasetOfInvestigator rdfs:domain gn:dataset)
+   (gn-term:datasetOfOrganization rdfs:domain gn:dataset)
+   (gn-term:datasetOfInvestigator rdfs:range foaf:Person)
+   (gn-term:datasetOfInbredSet rdfs:domain gn:dataset)
+   (gn-term:datasetOfInbredSet rdfs:range gn:inbredSet)
+   (gn-term:datasetOfSpecies rdfs:domain gn:dataset)
+   (gn-term:datasetOfSpecies rdfs:range gn:inbredSet)
+   (gn-term:datasetOfTissue rdfs:domain gn:dataset)
+   (gn-term:datasetOfTissue rdfs:range gn:tissue)
+   (gn-term:normalization rdfs:domain gn:dataset)
+   (gn-term:normalization rdfs:range gn:avgMethod)
+   (gn-term:datasetOfPlatform rdfs:domain gn:dataset)
+   (gn-term:datasetOfPlatform rdfs:range gn:geneChip)
+   (gn-term:accessionId rdfs:range rdfs:Literal)
+   (gn-term:datasetStatusName rdfs:range rdfs:Literal)
+   (gn-term:summary rdfs:range rdfs:Literal)
+   (gn-term:aboutTissue rdfs:range rdfs:Literal)
+   (gn-term:geoSeries rdfs:range rdfs:Literal)
+   (gn-term:name rdfs:range rdfs:Literal)
+   (gn-term:title rdfs:range rdfs:Literal)
+   (gn-term:publicationTitle rdfs:range rdfs:Literal)
+   (gn-term:specifics rdfs:range rdfs:Literal)
+   (gn-term:datasetGroup rdfs:range rdfs:Literal)
+   (gn-term:aboutCases rdfs:range rdfs:Literal)
+   (gn-term:aboutPlatform rdfs:range rdfs:Literal)
+   (gn-term:aboutDataProcessing rdfs:range rdfs:Literal)
+   (gn-term:notes rdfs:range rdfs:Literal)
+   (gn-term:experimentDesign rdfs:range rdfs:Literal)
+   (gn-term:contributors rdfs:range rdfs:Literal)
+   (gn-term:citation rdfs:range rdfs:Literal)
+   (gn-term:acknowledgment rdfs:range rdfs:Literal))
+  (triples (string->identifier
+            "" (regexp-substitute/global #f "[^A-Za-z0-9:]"
+                                        (field InfoFiles InfoPageName)
+                                        'pre "_" 'post)
+            #:separator ""
+            #:proc string-capitalize-first)
     (set rdf:type (string->symbol
                    (field ("IF(GenoFreeze.Id IS NOT NULL, 'gn:genotypeDataset', IF(PublishFreeze.Id IS NOT NULL, 'gn:phenotypeDataset', 'gn:dataset'))"
                            rdfType))))
-    (set gn:name (regexp-substitute/global
-                  #f "^[Nn]one$"
-                  (field InfoFiles InfoPageName)
-                  ""))
-    (set gn:fullName
+    (set gn-term:name (regexp-substitute/global
+                       #f "^[Nn]one$"
+                       (field InfoFiles InfoPageName)
+                       ""))
+    (set gn-term:fullName
          (field ("IFNULL(GenoFreeze.FullName, IFNULL(PublishFreeze.FullName, ''))"
                  DatasetFullName)))
     (set dct:created
          (field ("IFNULL(GenoFreeze.CreateTime, IFNULL(PublishFreeze.CreateTime, IFNULL(ProbeSetFreeze.CreateTime, '')))"
                  createTimeGenoFreeze)))
-    (set gn:datasetOfInvestigator
+    (set gn-term:datasetOfInvestigator
          (investigator-attributes->id (field Investigators FirstName)
                                       (field Investigators LastName)
                                       (field Investigators Email)))
-    (set gn:datasetOfOrganization
+    (set gn-term:datasetOfOrganization
          (field ("CAST(CONVERT(BINARY CONVERT(Organizations.OrganizationName USING latin1) USING utf8) AS VARCHAR(1500))" Organizations)))
-    (set gn:accessionId (format #f "GN~a" (field InfoFiles GN_AccesionId)))
-    (set gn:datasetStatusName (string-downcase
-                               (field DatasetStatus DatasetStatusName)))
-    (set gn:datasetOfInbredSet
+    (set gn-term:accessionId (format #f "GN~a" (field InfoFiles GN_AccesionId)))
+    (set gn-term:datasetStatusName (string-downcase
+                                    (field DatasetStatus DatasetStatusName)))
+    (set gn-term:datasetOfInbredSet
          (string->identifier "inbredSet" (field InbredSet Name InbredSetName)))
-    (set gn:datasetOfTissue (string->identifier "tissue"
-                                                (field Tissue Short_Name)))
-    (set gn:normalization
+    (set gn-term:datasetOfTissue (string->identifier "tissue"
+                                                     (field Tissue Short_Name)))
+    (set gn-term:normalization
          (string->identifier "avgmethod"
                              ;; If AvgMethodName is NULL, assume N/A.
                              (if (string-blank? (field AvgMethod Name AvgMethodName))
                                  "N/A" (field AvgMethod Name AvgMethodName))))
-    (set gn:datasetOfPlatform
+    (set gn-term:datasetOfPlatform
          (string->identifier "platform"
                              (field GeneChip Name GeneChip)))
-    (set gn:summary
+    (set gn-term:summary
          (sanitize-rdf-string (field Datasets Summary)))
-    (set gn:aboutTissue
+    (set gn-term:aboutTissue
          (sanitize-rdf-string (field Datasets AboutTissue)))
-    (set gn:geoSeries
+    (set gn-term:geoSeries
          (let ((s
                 (string-match "GSE[0-9]*"
                               (field ("IFNULL(Datasets.GeoSeries, '')" GeoSeries)))))
            (if s (ontology
                   'geoSeries: (match:substring s))
                "")))
-    (set gn:title
+    (set gn-term:title
          (regexp-substitute/global
           #f "^[Nn]one$"
           (field InfoFiles InfoFileTitle)
           ""))
-    (set gn:publicationTitle
+    (set gn-term:publicationTitle
          (regexp-substitute/global
           #f "^[Nn]one$"
           (field Datasets PublicationTitle)
           ""))
-    (set gn:specifics (sanitize-rdf-string (field InfoFiles Specifics)))
-    (set gn:datasetGroup (field Datasets DatasetName DatasetGroup))
-    (set gn:aboutCases
+    (set gn-term:specifics (sanitize-rdf-string (field InfoFiles Specifics)))
+    (set gn-term:datasetGroup (field Datasets DatasetName DatasetGroup))
+    (set gn-term:aboutCases
          (sanitize-rdf-string
           (field ("CAST(CONVERT(BINARY CONVERT(Datasets.AboutCases USING latin1) USING utf8) AS VARCHAR(10000))" AboutCases))))
-    (set gn:aboutPlatform
+    (set gn-term:aboutPlatform
          (sanitize-rdf-string
           (field ("CAST(CONVERT(BINARY CONVERT(Datasets.AboutPlatform USING latin1) USING utf8) AS VARCHAR(1500))"
                   AboutPlatform))))
-    (set gn:aboutDataProcessing
+    (set gn-term:aboutDataProcessing
          (sanitize-rdf-string
           (field ("CAST(CONVERT(BINARY CONVERT(Datasets.AboutDataProcessing USING latin1) USING utf8) AS VARCHAR(1500))"
                   AboutDataProcessing))))
-    (set gn:notes
+    (set gn-term:notes
          (sanitize-rdf-string
           (field ("CAST(CONVERT(BINARY CONVERT(Datasets.Notes USING latin1) USING utf8) AS VARCHAR(1500))"
                   GNNotes))))
-    (set gn:experimentDesign
+    (set gn-term:experimentDesign
          (sanitize-rdf-string
           (field ("CAST(CONVERT(BINARY CONVERT(Datasets.ExperimentDesign USING latin1) USING utf8) AS VARCHAR(1500))"
                   ExperimentDesign))))
-    (set gn:contributors
+    (set gn-term:contributors
          (sanitize-rdf-string
           (field ("CAST(CONVERT(BINARY CONVERT(Datasets.Contributors USING latin1) USING utf8) AS VARCHAR(1500))"
                   Contributors))))
-    (set gn:citation
+    (set gn-term:citation
          (sanitize-rdf-string
           (regexp-substitute/global
            #f "^[Nn]one$"
@@ -216,7 +218,7 @@
             ("CAST(CONVERT(BINARY CONVERT(Datasets.Citation USING latin1) USING utf8) AS VARCHAR(1500))"
              Citation))
            "")))
-    (set gn:dataSourceAcknowledgment
+    (set gn-term:dataSourceAcknowledgment
          (sanitize-rdf-string
           (string-trim-both
            (regexp-substitute/global
@@ -224,8 +226,8 @@
             (field ("CAST(CONVERT(BINARY CONVERT(InfoFiles.Data_Source_Acknowledge USING latin1) USING utf8) AS VARCHAR(1500))"
                     Data_Source_Acknowledge))
             ""))))
-    (set gn:acknowledgment (sanitize-rdf-string
-                            (field Datasets Acknowledgment)))))
+    (set gn-term:acknowledgment (sanitize-rdf-string
+                                 (field Datasets Acknowledgment)))))
 
 
 
@@ -235,18 +237,18 @@
  (connection %connection-settings)
  (table-metadata? #f)
  (prefixes
-  (("dct:" "<http://purl.org/dc/terms/>")
-   ("geoSeries:" "<http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=>")
-   ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>")
-   ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>")
-   ("gn:" "<http://genenetwork.org/terms/>")
-   ("foaf:" "<http://xmlns.com/foaf/0.1/>")
-   ("taxon:" "<http://purl.uniprot.org/taxonomy/>")
-   ("dataset:" "<http://genenetwork.org/dataset/>")))
+  '(("foaf:" "<http://xmlns.com/foaf/0.1/>")
+    ("geoSeries:" "<http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=>")
+    ("gn-term:" "<http://genenetwork.org/term/>")
+    ("gn:" "<http://genenetwork.org/id/>")
+    ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>")
+    ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>")
+    ("taxon:" "<http://purl.uniprot.org/taxonomy/>")
+    ("dct:" "<http://purl.org/dc/terms/>")))
  (inputs
-  (dump-info-files
-   dump-investigators))
+  (list dump-info-files
+        dump-investigators))
  (outputs
-  (#:documentation "./docs/dump-info-pages.md"
-   #:rdf "./verified-data/dump-info-pages.ttl")))
+  '(#:documentation "./docs/dump-info-pages.md"
+    #:rdf "./verified-data/dump-info-pages.ttl")))