about summary refs log tree commit diff
path: root/examples/dataset-metadata.scm
diff options
context:
space:
mode:
authorMunyoki Kilyungi2025-12-23 12:06:06 +0300
committerMunyoki Kilyungi2026-01-13 12:02:49 +0300
commit1ca7e679b834ccaf53a3243d0e1c2f3f9e8d56d8 (patch)
tree514c544706986f3edd0b3f53a89113e334a0b9a3 /examples/dataset-metadata.scm
parentc42933e8f474d8d14eac387d5a94da6f52210629 (diff)
downloadgn-transform-databases-1ca7e679b834ccaf53a3243d0e1c2f3f9e8d56d8.tar.gz
Snake case gn/gnt/gnc identifiers.
Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
Diffstat (limited to 'examples/dataset-metadata.scm')
-rwxr-xr-xexamples/dataset-metadata.scm216
1 files changed, 108 insertions, 108 deletions
diff --git a/examples/dataset-metadata.scm b/examples/dataset-metadata.scm
index 9c30180..cd91dc4 100755
--- a/examples/dataset-metadata.scm
+++ b/examples/dataset-metadata.scm
@@ -64,27 +64,27 @@
   (tables (GeneChip
            (left-join Species "USING (SpeciesId)")))
   (schema-triples
-   (gnc:geneChip a skos:Concept)
-   (gnc:geneChip
+   (gnc:gene_chip a skos:Concept)
+   (gnc:gene_chip
     skos:description
     "This is a set of controlled terms that are used to describe a given gene chip/platform")
-   (gnt:hasGeoSeriesId rdfs:domain gnc:platform)
-   (gnt:hasGeoSeriesId rdfs:domain gnc:geneChip)
-   (gnt:hasGOTreeValue a owl:ObjectProperty)
-   (gnt:hasGOTreeValue skos:definition "This resource the following GO tree value")
-   (gnt:hasGOTreeValue rdfs:domain gnc:geneChip))
+   (gnt:has_geo_series_id rdfs:domain gnc:platform)
+   (gnt:has_geo_series_id rdfs:domain gnc:gene_chip)
+   (gnt:has_go_tree_value a owl:ObjectProperty)
+   (gnt:has_go_tree_value skos:definition "This resource the following GO tree value")
+   (gnt:has_go_tree_value rdfs:domain gnc:gene_chip))
   (triples (string->identifier "platform" (field GeneChip Name))
-    (set rdf:type 'gnc:geneChip)
+    (set rdf:type 'gnc:gene_chip)
     (set rdfs:label (field GeneChip GeneChipName))
     (set skos:prefLabel (field GeneChip Name))
     (set skos:altLabel (field ("IF(GeneChip.GeneChipName != GeneChip.Title, Title, NULL)"
                                Title)))
-    (set gnt:hasGOTreeValue (field GeneChip Go_tree_value))
+    (set gnt:has_go_tree_value (field GeneChip Go_tree_value))
     (set xkos:classifiedUnder
          (string->identifier "" (remap-species-identifiers (field Species Fullname))
                              #:separator ""
                              #:proc string-capitalize-first))
-    (set gnt:hasGeoSeriesId
+    (set gnt:has_geo_series_id
          (ontology 'geoSeries:
                    (string-trim-both (field GeneChip GeoPlatform))))))
 
@@ -107,70 +107,70 @@
           ;; if they exist in the (Publish/Geno)Freeze tables.
           "LEFT JOIN InbredSet PublishInbredSet ON PublishFreeze.InbredSetId = PublishInbredSet.InbredSetId LEFT JOIN InbredSet GenoInbredSet ON GenoFreeze.InbredSetId = GenoInbredSet.InbredSetId  WHERE GN_AccesionId IS NOT NULL")
   (schema-triples
-   (gnt:hasTissue rdfs:domain dcat:Dataset)
-   (gnt:hasTissue a owl:ObjectProperty)
-   (gnt:hasTissue skos:definition "Tissues this resource has")
-   (gnt:usesNormalization rdfs:domain dcat:Dataset)
-   (gnt:usesNormalization a owl:ObjectProperty)
-   (gnt:usesNormalization skos:definition "Normalization techniques this resource has")
-   (gnt:usesPlatform rdfs:domain dcat:Dataset)
-   (gnt:usesPlatform a owl:ObjectProperty)
-   (gnt:usesPlatform skos:definition "The Platform this resource uses")
-   (gnt:hasGeoSeriesId rdfs:domain dcat:Dataset)
-   (gnt:hasGeoSeriesId a owl:ObjectProperty)
-   (gnt:hasGeoSeriesId skos:definition "id of record in NCBI database")
-   (gnt:hasExperimentType rdfs:domain dcat:Dataset)
-   (gnt:hasExperimentType a owl:ObjectProperty)
-   (gnt:hasExperimentType rdfs:label "Experiment Type Metadata")
-   (gnt:hasExperimentType skos:definition "Information about the experiment type")
-   (gnt:hasTissueInfo rdfs:domain dcat:Dataset)
-   (gnt:hasTissueInfo a owl:ObjectProperty)
-   (gnt:hasTissueInfo skos:definition "Metadata about Tissue for this resource")
-   (gnt:hasExperimentDesignInfo rdfs:domain dcat:Dataset)
-   (gnt:hasExperimentDesignInfo rdfs:label "Experiment Design")
-   (gnt:hasExperimentDesignInfo a owl:ObjectProperty)
-   (gnt:hasExperimentDesignInfo skos:definition "Information about how the experiment was designed")
-   (gnt:hasNotes rdfs:domain dcat:Dataset)
-   (gnt:hasNotes a owl:ObjectProperty)
-   (gnt:hasNotes rdfs:label "Notes")
-   (gnt:hasNotes skos:definition "Extra Notes about this dataset")
-   (gnt:hasDataProcessingInfo rdfs:domain dcat:Dataset)
-   (gnt:hasDataProcessingInfo rdfs:label "About Data Processing")
-   (gnt:hasDataProcessingInfo a owl:ObjectProperty)
-   (gnt:hasDataProcessingInfo skos:definition "Information about how this dataset was processed")
-   (gnt:hasPlatformInfo rdfs:domain dcat:Dataset)
-   (gnt:hasPlatformInfo a owl:ObjectProperty)
-   (gnt:hasPlatformInfo rdfs:label "About Platform")
-   (gnt:hasPlatformInfo skos:definition "Information about the platform that was used with this dataset")
-   (gnt:hasCaseInfo rdfs:domain dcat:Dataset)
-   (gnt:hasCaseInfo rdfs:label "About Case")
-   (gnt:hasCaseInfo a owl:ObjectProperty)
-   (gnt:hasCaseInfo skos:definition "Information about the cases used in this platform")
-   (gnt:hasSummary rdfs:domain dcat:Dataset)
-   (gnt:hasSummary rdfs:label "Summary")
-   (gnt:hasSummary a owl:ObjectProperty)
-   (gnt:hasSummary skos:definition "Summary information about dataset")
-   (gnt:hasCitation rdfs:domain dcat:Dataset)
-   (gnt:hasCitation rdfs:label "Citation")
-   (gnt:hasCitation a owl:ObjectProperty)
-   (gnt:hasCitation skos:definition "Citation for this dataset")
-   (gnt:hasContributors rdfs:domain dcat:Dataset)
-   (gnt:hasContributors rdfs:label "Contributors")
-   (gnt:hasContributors a owl:ObjectProperty)
-   (gnt:hasContributors skos:definition "Contributors of this resource")
-   (gnt:hashasExperimentDesign rdfs:domain dcat:Dataset)
-   (gnt:hashasExperimentDesign rdfs:label "Experiment Design")
-   (gnt:hashasExperimentDesign a owl:ObjectProperty)
-   (gnt:hashasExperimentDesign skos:definition "Experiment Design for this resource")
-   (gnt:hasTissueInfo rdfs:domain dcat:Dataset)
-   (gnt:hasTissueInfo rdfs:label "Tissue Information")
-   (gnt:hasTissueInfo a owl:ObjectProperty)
-   (gnt:hasTissueInfo skos:definition "Tissue information about dataset")
-   (gnt:hasExperimentType skos:definition "Information about the experiment type")
-   (gnt:hasAcknowledgement rdfs:domain dcat:Dataset)
-   (gnt:hasAcknowledgement rdfs:label "Acknowledgement")
-   (gnt:hasAcknowledgement a owl:ObjectProperty)
-   (gnt:hasAcknowledgement skos:definition "People to acknowledge"))
+   (gnt:has_tissue rdfs:domain dcat:Dataset)
+   (gnt:has_tissue a owl:ObjectProperty)
+   (gnt:has_tissue skos:definition "Tissues this resource has")
+   (gnt:uses_normalization rdfs:domain dcat:Dataset)
+   (gnt:uses_normalization a owl:ObjectProperty)
+   (gnt:uses_normalization skos:definition "Normalization techniques this resource has")
+   (gnt:uses_platform rdfs:domain dcat:Dataset)
+   (gnt:uses_platform a owl:ObjectProperty)
+   (gnt:uses_platform skos:definition "The Platform this resource uses")
+   (gnt:has_geo_series_id rdfs:domain dcat:Dataset)
+   (gnt:has_geo_series_id a owl:ObjectProperty)
+   (gnt:has_geo_series_id skos:definition "id of record in NCBI database")
+   (gnt:has_experiment_type rdfs:domain dcat:Dataset)
+   (gnt:has_experiment_type a owl:ObjectProperty)
+   (gnt:has_experiment_type rdfs:label "Experiment Type Metadata")
+   (gnt:has_experiment_type skos:definition "Information about the experiment type")
+   (gnt:has_tissue_info rdfs:domain dcat:Dataset)
+   (gnt:has_tissue_info a owl:ObjectProperty)
+   (gnt:has_tissue_info skos:definition "Metadata about Tissue for this resource")
+   (gnt:has_experiment_design_info rdfs:domain dcat:Dataset)
+   (gnt:has_experiment_design_info rdfs:label "Experiment Design")
+   (gnt:has_experiment_design_info a owl:ObjectProperty)
+   (gnt:has_experiment_design_info skos:definition "Information about how the experiment was designed")
+   (gnt:has_notes rdfs:domain dcat:Dataset)
+   (gnt:has_notes a owl:ObjectProperty)
+   (gnt:has_notes rdfs:label "Notes")
+   (gnt:has_notes skos:definition "Extra Notes about this dataset")
+   (gnt:has_data_processing_info rdfs:domain dcat:Dataset)
+   (gnt:has_data_processing_info rdfs:label "About Data Processing")
+   (gnt:has_data_processing_info a owl:ObjectProperty)
+   (gnt:has_data_processing_info skos:definition "Information about how this dataset was processed")
+   (gnt:has_platform_info rdfs:domain dcat:Dataset)
+   (gnt:has_platform_info a owl:ObjectProperty)
+   (gnt:has_platform_info rdfs:label "About Platform")
+   (gnt:has_platform_info skos:definition "Information about the platform that was used with this dataset")
+   (gnt:has_case_info rdfs:domain dcat:Dataset)
+   (gnt:has_case_info rdfs:label "About Case")
+   (gnt:has_case_info a owl:ObjectProperty)
+   (gnt:has_case_info skos:definition "Information about the cases used in this platform")
+   (gnt:has_summary rdfs:domain dcat:Dataset)
+   (gnt:has_summary rdfs:label "Summary")
+   (gnt:has_summary a owl:ObjectProperty)
+   (gnt:has_summary skos:definition "Summary information about dataset")
+   (gnt:has_citation rdfs:domain dcat:Dataset)
+   (gnt:has_citation rdfs:label "Citation")
+   (gnt:has_citation a owl:ObjectProperty)
+   (gnt:has_citation skos:definition "Citation for this dataset")
+   (gnt:has_contributors rdfs:domain dcat:Dataset)
+   (gnt:has_contributors rdfs:label "Contributors")
+   (gnt:has_contributors a owl:ObjectProperty)
+   (gnt:has_contributors skos:definition "Contributors of this resource")
+   (gnt:has_experiment_design rdfs:domain dcat:Dataset)
+   (gnt:has_experiment_design rdfs:label "Experiment Design")
+   (gnt:has_experiment_design a owl:ObjectProperty)
+   (gnt:has_experiment_design skos:definition "Experiment Design for this resource")
+   (gnt:has_tissue_info rdfs:domain dcat:Dataset)
+   (gnt:has_tissue_info rdfs:label "Tissue Information")
+   (gnt:has_tissue_info a owl:ObjectProperty)
+   (gnt:has_tissue_info skos:definition "Tissue information about dataset")
+   (gnt:has_experiment_type skos:definition "Information about the experiment type")
+   (gnt:has_acknowledgement rdfs:domain dcat:Dataset)
+   (gnt:has_acknowledgement rdfs:label "Acknowledgement")
+   (gnt:has_acknowledgement a owl:ObjectProperty)
+   (gnt:has_acknowledgement skos:definition "People to acknowledge"))
   (triples (string->identifier
             "" (regexp-substitute/global #f "[^A-Za-z0-9:]"
                                          (field InfoFiles InfoPageName)
@@ -179,7 +179,7 @@
     (set xkos:classifiedUnder
          (let ([dataset-type
                 (string-trim-both
-                 (field ("IF(GenoFreeze.Id IS NOT NULL, 'gnc:Genotype', IF(PublishFreeze.Id IS NOT NULL, 'gnc:Phenotype', IF(ProbeSetFreeze.Name IS NOT NULL, 'gnc:Probeset', '')))"
+                 (field ("IF(GenoFreeze.Id IS NOT NULL, 'gnc:genotype', IF(PublishFreeze.Id IS NOT NULL, 'gnc:phenotype', IF(ProbeSetFreeze.Name IS NOT NULL, 'gnc:probeset', '')))"
                          DatasetType)))])
            (if (not (string-null? dataset-type))
                (string->symbol
@@ -214,19 +214,19 @@
     (set dct:identifier (format #f "GN~a" (field InfoFiles GN_AccesionId)))
     (set dct:accessRights (string-downcase
                            (field DatasetStatus DatasetStatusName)))
-    (set gnt:belongsToGroup
+    (set gnt:belongs_to_group
          (string->identifier
           "set"
           (field ("IFNULL(InbredSet.Name, IFNULL(PublishInbredSet.Name, GenoInbredSet.Name))"
                   InbredSetName))))
-    (set gnt:hasTissue (string->identifier "tissue"
+    (set gnt:has_tissue (string->identifier "tissue"
                                            (field Tissue Short_Name)))
-    (set gnt:usesNormalization
+    (set gnt:uses_normalization
          (string->identifier "avgMethod"
                              ;; If AvgMethodName is NULL, assume N/A.
                              (if (string-blank? (field AvgMethod Name AvgMethodName))
                                  "N/A" (field AvgMethod Name AvgMethodName))))
-    (set gnt:hasSummary
+    (set gnt:has_summary
          (let* ((summary-link
                  (format
                   #f "<https://git.genenetwork.org/gn-docs/tree/general/datasets/~a/summary.rtf>"
@@ -239,7 +239,7 @@
                  (field InfoFiles Summary)))
            (if (or (null? summary) (string-blank? summary))
                "" (string->symbol summary-link))))
-    (set gnt:hasTissueInfo
+    (set gnt:has_tissue_info
          (let* ((tissue-info-link
                  (format
                   #f "<https://git.genenetwork.org/gn-docs/tree/general/datasets/~a/tissue.rtf>"
@@ -252,7 +252,7 @@
                  (field Datasets AboutTissue)))
            (if (or (null? tissue-info) (string-blank? tissue-info))
                "" (string->symbol tissue-info-link))))
-    (set gnt:hasCitation
+    (set gnt:has_citation
          (let* ((citation-link
                  (format
                   #f "<https://git.genenetwork.org/gn-docs/tree/general/datasets/~a/citation.rtf>"
@@ -278,7 +278,7 @@
                  (field InfoFiles Specifics)))
            (if (or (null? specifics) (string-blank? specifics))
                "" (string->symbol specifics-link))))
-    (set gnt:hasCaseInfo
+    (set gnt:has_case_info
          (let* ((cases-link
                  (format
                   #f "<https://git.genenetwork.org/gn-docs/tree/general/datasets/~a/cases.rtf>"
@@ -291,7 +291,7 @@
                  (field Datasets AboutCases)))
            (if (or (null? cases) (string-blank? cases))
                "" (string->symbol cases-link))))
-    (set gnt:hasPlatformInfo
+    (set gnt:has_platform_info
          (let* ((platform-link
                  (format
                   #f "<https://git.genenetwork.org/gn-docs/tree/general/datasets/~a/platform.rtf>"
@@ -304,7 +304,7 @@
                  (field Datasets AboutPlatform)))
            (if (or (null? platform) (string-blank? platform))
                "" (string->symbol platform-link))))
-    (set gnt:hasDataProcessingInfo
+    (set gnt:has_data_processing_info
          (let* ((processing-link
                  (format
                   #f "<https://git.genenetwork.org/gn-docs/tree/general/datasets/~a/processing.rtf>"
@@ -317,7 +317,7 @@
                  (field Datasets AboutDataProcessing)))
            (if (or (null? processing) (string-blank? processing))
                "" (string->symbol processing-link))))
-    (set gnt:hasNotes
+    (set gnt:has_notes
          (let* ((notes-link
                  (format
                   #f "<https://git.genenetwork.org/gn-docs/tree/general/datasets/~a/notes.rtf>"
@@ -330,7 +330,7 @@
                  (field Datasets Notes)))
            (if (or (null? notes) (string-blank? notes))
                "" (string->symbol notes-link))))
-    (set gnt:hasExperimentType
+    (set gnt:has_experiment_type
          (let* ((experiment-type-link
                  (format
                   #f "<https://git.genenetwork.org/gn-docs/tree/general/datasets/~a/experiment-type.rtf>"
@@ -343,7 +343,7 @@
                  (field InfoFiles Experiment_Type)))
            (if (or (null? experiment-type) (string-blank? experiment-type))
                "" (string->symbol experiment-type-link))))
-    (set gnt:hasExperimentDesign
+    (set gnt:has_experiment_design
          (let* ((experiment-design-link
                  (format
                   #f "<https://git.genenetwork.org/gn-docs/tree/general/datasets/~a/experiment-design.rtf>"
@@ -356,7 +356,7 @@
                  (field Datasets ExperimentDesign)))
            (if (or (null? experiment-design) (string-blank? experiment-design))
                "" (string->symbol experiment-design-link))))
-    (set gnt:hasContributors
+    (set gnt:has_contributors
          (let* ((contributors-link
                  (format
                   #f "<https://git.genenetwork.org/gn-docs/tree/general/datasets/~a/contributors.rtf>"
@@ -369,7 +369,7 @@
                  (field Datasets Contributors)))
            (if (or (null? contributors) (string-blank? contributors))
                "" (string->symbol contributors-link))))
-    (set gnt:hasAcknowledgement
+    (set gnt:has_acknowledgement
          (let* ((acknowledgment-link
                  (format
                   #f "<https://git.genenetwork.org/gn-docs/tree/general/datasets/~a/acknowledgment.rtf>"
@@ -382,10 +382,10 @@
                  (field Datasets Acknowledgment)))
            (if (or (null? acknowledgment) (string-blank? acknowledgment))
                "" (string->symbol acknowledgment-link))))
-    (set gnt:usesPlatform
+    (set gnt:uses_platform
          (string->identifier "platform"
                              (field GeneChip Name GeneChip)))
-    (set gnt:hasGeoSeriesId
+    (set gnt:has_geo_series_id
          (let ((s
                 (string-match "GSE[0-9]*"
                               (field ("IFNULL(Datasets.GeoSeries, '')" GeoSeries)))))
@@ -406,18 +406,18 @@
                                  (field PublishFreeze Name)
                                  'pre "_" 'post))
     (set rdf:type 'dcat:Dataset)
-    (set xkos:classifiedUnder 'gnc:Phenotype)
+    (set xkos:classifiedUnder 'gnc:phenotype)
     (set dct:title (field PublishFreeze FullName))
     (set rdfs:label (field PublishFreeze Name))
     (set skos:altLabel (field PublishFreeze ShortName))
     (set dct:created (annotate-field
                       (field PublishFreeze CreateTime)
                       '^^xsd:date))
-    (set gnt:belongsToGroup
+    (set gnt:belongs_to_group
          (string->identifier
           "set" (field InbredSet Name InbredSetName)
-          #:separator ""
-          #:proc string-capitalize-first))))
+          #:separator "_"
+          #:proc (lambda (x) x)))))
 
 (define-transformer genofreeze
   (tables (GenoFreeze
@@ -435,18 +435,18 @@
          'pre "_" 'post)
         'pre "_" 'post))
     (set rdf:type 'dcat:Dataset)
-    (set xkos:classifiedUnder 'gnc:Genotype)
+    (set xkos:classifiedUnder 'gnc:genotype)
     (set rdfs:label (field GenoFreeze Name))
     (set dct:title (field GenoFreeze FullName))
     (set skos:altLabel (field GenoFreeze ShortName))
     (set dct:created (annotate-field
                       (field GenoFreeze CreateTime)
                       '^^xsd:date))
-    (set gnt:belongsToGroup
+    (set gnt:belongs_to_group
          (string->identifier
           "set" (field InbredSet Name InbredSetName)
-          #:separator ""
-          #:proc string-capitalize-first))))
+          #:separator "_"
+          #:proc (lambda (x) x)))))
 
 ;; Molecular Traits are also referred to as ProbeSets
 (define-transformer probesetfreeze
@@ -458,10 +458,10 @@
            (left-join Tissue "ON ProbeFreeze.TissueId = Tissue.TissueId"))
           "WHERE ProbeSetFreeze.public > 0 AND InfoFiles.InfoPageName IS NULL GROUP BY ProbeFreeze.Id")
   (schema-triples
-   (gnt:usesNormalization rdfs:domain gnc:probeset)
-   (gnt:usesDataScale rdfs:domain gnc:probeset)
-   (gnt:usesDataScale a owl:ObjectProperty)
-   (gnt:usesDataScale skos:definition "Thi data scale this resource uses"))
+   (gnt:uses_normalization rdfs:domain gnc:probeset)
+   (gnt:uses_data_scale rdfs:domain gnc:probeset)
+   (gnt:uses_data_scale a owl:ObjectProperty)
+   (gnt:uses_data_scale skos:definition "Thi data scale this resource uses"))
   (triples
       (string->identifier
        ""
@@ -470,8 +470,8 @@
         (field ProbeSetFreeze Name)
         'pre "_" 'post))
     (set rdf:type 'dcat:Dataset)
-    (set xkos:classifiedUnder 'gnc:Probeset)
-    (set gnt:usesNormalization
+    (set xkos:classifiedUnder 'gnc:probeset)
+    (set gnt:uses_normalization
          (string->identifier "avgMethod"
                              ;; If AvgMethodName is NULL, assume N/A.
                              (if (string-blank? (field AvgMethod Name AvgMethodName))
@@ -483,12 +483,12 @@
     (set dct:created (annotate-field
                       (field ProbeSetFreeze CreateTime)
                       '^^xsd:datetime))
-    (set gnt:usesDataScale (field ProbeSetFreeze DataScale))
-    (set gnt:hasTissue
+    (set gnt:uses_data_scale (field ProbeSetFreeze DataScale))
+    (set gnt:has_tissue
          (string->identifier
           "tissue"
           (field Tissue Short_Name)))
-    (set gnt:belongsToGroup
+    (set gnt:belongs_to_group
          (string->identifier
           "set" (field InbredSet Name InbredSetName)
           #:separator ""