From 1ca7e679b834ccaf53a3243d0e1c2f3f9e8d56d8 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Tue, 23 Dec 2025 12:06:06 +0300 Subject: Snake case gn/gnt/gnc identifiers. Signed-off-by: Munyoki Kilyungi --- examples/dataset-metadata.scm | 216 +++++++++++++++++++++--------------------- 1 file changed, 108 insertions(+), 108 deletions(-) (limited to 'examples/dataset-metadata.scm') diff --git a/examples/dataset-metadata.scm b/examples/dataset-metadata.scm index 9c30180..cd91dc4 100755 --- a/examples/dataset-metadata.scm +++ b/examples/dataset-metadata.scm @@ -64,27 +64,27 @@ (tables (GeneChip (left-join Species "USING (SpeciesId)"))) (schema-triples - (gnc:geneChip a skos:Concept) - (gnc:geneChip + (gnc:gene_chip a skos:Concept) + (gnc:gene_chip skos:description "This is a set of controlled terms that are used to describe a given gene chip/platform") - (gnt:hasGeoSeriesId rdfs:domain gnc:platform) - (gnt:hasGeoSeriesId rdfs:domain gnc:geneChip) - (gnt:hasGOTreeValue a owl:ObjectProperty) - (gnt:hasGOTreeValue skos:definition "This resource the following GO tree value") - (gnt:hasGOTreeValue rdfs:domain gnc:geneChip)) + (gnt:has_geo_series_id rdfs:domain gnc:platform) + (gnt:has_geo_series_id rdfs:domain gnc:gene_chip) + (gnt:has_go_tree_value a owl:ObjectProperty) + (gnt:has_go_tree_value skos:definition "This resource the following GO tree value") + (gnt:has_go_tree_value rdfs:domain gnc:gene_chip)) (triples (string->identifier "platform" (field GeneChip Name)) - (set rdf:type 'gnc:geneChip) + (set rdf:type 'gnc:gene_chip) (set rdfs:label (field GeneChip GeneChipName)) (set skos:prefLabel (field GeneChip Name)) (set skos:altLabel (field ("IF(GeneChip.GeneChipName != GeneChip.Title, Title, NULL)" Title))) - (set gnt:hasGOTreeValue (field GeneChip Go_tree_value)) + (set gnt:has_go_tree_value (field GeneChip Go_tree_value)) (set xkos:classifiedUnder (string->identifier "" (remap-species-identifiers (field Species Fullname)) #:separator "" #:proc string-capitalize-first)) - (set gnt:hasGeoSeriesId + (set gnt:has_geo_series_id (ontology 'geoSeries: (string-trim-both (field GeneChip GeoPlatform)))))) @@ -107,70 +107,70 @@ ;; if they exist in the (Publish/Geno)Freeze tables. "LEFT JOIN InbredSet PublishInbredSet ON PublishFreeze.InbredSetId = PublishInbredSet.InbredSetId LEFT JOIN InbredSet GenoInbredSet ON GenoFreeze.InbredSetId = GenoInbredSet.InbredSetId WHERE GN_AccesionId IS NOT NULL") (schema-triples - (gnt:hasTissue rdfs:domain dcat:Dataset) - (gnt:hasTissue a owl:ObjectProperty) - (gnt:hasTissue skos:definition "Tissues this resource has") - (gnt:usesNormalization rdfs:domain dcat:Dataset) - (gnt:usesNormalization a owl:ObjectProperty) - (gnt:usesNormalization skos:definition "Normalization techniques this resource has") - (gnt:usesPlatform rdfs:domain dcat:Dataset) - (gnt:usesPlatform a owl:ObjectProperty) - (gnt:usesPlatform skos:definition "The Platform this resource uses") - (gnt:hasGeoSeriesId rdfs:domain dcat:Dataset) - (gnt:hasGeoSeriesId a owl:ObjectProperty) - (gnt:hasGeoSeriesId skos:definition "id of record in NCBI database") - (gnt:hasExperimentType rdfs:domain dcat:Dataset) - (gnt:hasExperimentType a owl:ObjectProperty) - (gnt:hasExperimentType rdfs:label "Experiment Type Metadata") - (gnt:hasExperimentType skos:definition "Information about the experiment type") - (gnt:hasTissueInfo rdfs:domain dcat:Dataset) - (gnt:hasTissueInfo a owl:ObjectProperty) - (gnt:hasTissueInfo skos:definition "Metadata about Tissue for this resource") - (gnt:hasExperimentDesignInfo rdfs:domain dcat:Dataset) - (gnt:hasExperimentDesignInfo rdfs:label "Experiment Design") - (gnt:hasExperimentDesignInfo a owl:ObjectProperty) - (gnt:hasExperimentDesignInfo skos:definition "Information about how the experiment was designed") - (gnt:hasNotes rdfs:domain dcat:Dataset) - (gnt:hasNotes a owl:ObjectProperty) - (gnt:hasNotes rdfs:label "Notes") - (gnt:hasNotes skos:definition "Extra Notes about this dataset") - (gnt:hasDataProcessingInfo rdfs:domain dcat:Dataset) - (gnt:hasDataProcessingInfo rdfs:label "About Data Processing") - (gnt:hasDataProcessingInfo a owl:ObjectProperty) - (gnt:hasDataProcessingInfo skos:definition "Information about how this dataset was processed") - (gnt:hasPlatformInfo rdfs:domain dcat:Dataset) - (gnt:hasPlatformInfo a owl:ObjectProperty) - (gnt:hasPlatformInfo rdfs:label "About Platform") - (gnt:hasPlatformInfo skos:definition "Information about the platform that was used with this dataset") - (gnt:hasCaseInfo rdfs:domain dcat:Dataset) - (gnt:hasCaseInfo rdfs:label "About Case") - (gnt:hasCaseInfo a owl:ObjectProperty) - (gnt:hasCaseInfo skos:definition "Information about the cases used in this platform") - (gnt:hasSummary rdfs:domain dcat:Dataset) - (gnt:hasSummary rdfs:label "Summary") - (gnt:hasSummary a owl:ObjectProperty) - (gnt:hasSummary skos:definition "Summary information about dataset") - (gnt:hasCitation rdfs:domain dcat:Dataset) - (gnt:hasCitation rdfs:label "Citation") - (gnt:hasCitation a owl:ObjectProperty) - (gnt:hasCitation skos:definition "Citation for this dataset") - (gnt:hasContributors rdfs:domain dcat:Dataset) - (gnt:hasContributors rdfs:label "Contributors") - (gnt:hasContributors a owl:ObjectProperty) - (gnt:hasContributors skos:definition "Contributors of this resource") - (gnt:hashasExperimentDesign rdfs:domain dcat:Dataset) - (gnt:hashasExperimentDesign rdfs:label "Experiment Design") - (gnt:hashasExperimentDesign a owl:ObjectProperty) - (gnt:hashasExperimentDesign skos:definition "Experiment Design for this resource") - (gnt:hasTissueInfo rdfs:domain dcat:Dataset) - (gnt:hasTissueInfo rdfs:label "Tissue Information") - (gnt:hasTissueInfo a owl:ObjectProperty) - (gnt:hasTissueInfo skos:definition "Tissue information about dataset") - (gnt:hasExperimentType skos:definition "Information about the experiment type") - (gnt:hasAcknowledgement rdfs:domain dcat:Dataset) - (gnt:hasAcknowledgement rdfs:label "Acknowledgement") - (gnt:hasAcknowledgement a owl:ObjectProperty) - (gnt:hasAcknowledgement skos:definition "People to acknowledge")) + (gnt:has_tissue rdfs:domain dcat:Dataset) + (gnt:has_tissue a owl:ObjectProperty) + (gnt:has_tissue skos:definition "Tissues this resource has") + (gnt:uses_normalization rdfs:domain dcat:Dataset) + (gnt:uses_normalization a owl:ObjectProperty) + (gnt:uses_normalization skos:definition "Normalization techniques this resource has") + (gnt:uses_platform rdfs:domain dcat:Dataset) + (gnt:uses_platform a owl:ObjectProperty) + (gnt:uses_platform skos:definition "The Platform this resource uses") + (gnt:has_geo_series_id rdfs:domain dcat:Dataset) + (gnt:has_geo_series_id a owl:ObjectProperty) + (gnt:has_geo_series_id skos:definition "id of record in NCBI database") + (gnt:has_experiment_type rdfs:domain dcat:Dataset) + (gnt:has_experiment_type a owl:ObjectProperty) + (gnt:has_experiment_type rdfs:label "Experiment Type Metadata") + (gnt:has_experiment_type skos:definition "Information about the experiment type") + (gnt:has_tissue_info rdfs:domain dcat:Dataset) + (gnt:has_tissue_info a owl:ObjectProperty) + (gnt:has_tissue_info skos:definition "Metadata about Tissue for this resource") + (gnt:has_experiment_design_info rdfs:domain dcat:Dataset) + (gnt:has_experiment_design_info rdfs:label "Experiment Design") + (gnt:has_experiment_design_info a owl:ObjectProperty) + (gnt:has_experiment_design_info skos:definition "Information about how the experiment was designed") + (gnt:has_notes rdfs:domain dcat:Dataset) + (gnt:has_notes a owl:ObjectProperty) + (gnt:has_notes rdfs:label "Notes") + (gnt:has_notes skos:definition "Extra Notes about this dataset") + (gnt:has_data_processing_info rdfs:domain dcat:Dataset) + (gnt:has_data_processing_info rdfs:label "About Data Processing") + (gnt:has_data_processing_info a owl:ObjectProperty) + (gnt:has_data_processing_info skos:definition "Information about how this dataset was processed") + (gnt:has_platform_info rdfs:domain dcat:Dataset) + (gnt:has_platform_info a owl:ObjectProperty) + (gnt:has_platform_info rdfs:label "About Platform") + (gnt:has_platform_info skos:definition "Information about the platform that was used with this dataset") + (gnt:has_case_info rdfs:domain dcat:Dataset) + (gnt:has_case_info rdfs:label "About Case") + (gnt:has_case_info a owl:ObjectProperty) + (gnt:has_case_info skos:definition "Information about the cases used in this platform") + (gnt:has_summary rdfs:domain dcat:Dataset) + (gnt:has_summary rdfs:label "Summary") + (gnt:has_summary a owl:ObjectProperty) + (gnt:has_summary skos:definition "Summary information about dataset") + (gnt:has_citation rdfs:domain dcat:Dataset) + (gnt:has_citation rdfs:label "Citation") + (gnt:has_citation a owl:ObjectProperty) + (gnt:has_citation skos:definition "Citation for this dataset") + (gnt:has_contributors rdfs:domain dcat:Dataset) + (gnt:has_contributors rdfs:label "Contributors") + (gnt:has_contributors a owl:ObjectProperty) + (gnt:has_contributors skos:definition "Contributors of this resource") + (gnt:has_experiment_design rdfs:domain dcat:Dataset) + (gnt:has_experiment_design rdfs:label "Experiment Design") + (gnt:has_experiment_design a owl:ObjectProperty) + (gnt:has_experiment_design skos:definition "Experiment Design for this resource") + (gnt:has_tissue_info rdfs:domain dcat:Dataset) + (gnt:has_tissue_info rdfs:label "Tissue Information") + (gnt:has_tissue_info a owl:ObjectProperty) + (gnt:has_tissue_info skos:definition "Tissue information about dataset") + (gnt:has_experiment_type skos:definition "Information about the experiment type") + (gnt:has_acknowledgement rdfs:domain dcat:Dataset) + (gnt:has_acknowledgement rdfs:label "Acknowledgement") + (gnt:has_acknowledgement a owl:ObjectProperty) + (gnt:has_acknowledgement skos:definition "People to acknowledge")) (triples (string->identifier "" (regexp-substitute/global #f "[^A-Za-z0-9:]" (field InfoFiles InfoPageName) @@ -179,7 +179,7 @@ (set xkos:classifiedUnder (let ([dataset-type (string-trim-both - (field ("IF(GenoFreeze.Id IS NOT NULL, 'gnc:Genotype', IF(PublishFreeze.Id IS NOT NULL, 'gnc:Phenotype', IF(ProbeSetFreeze.Name IS NOT NULL, 'gnc:Probeset', '')))" + (field ("IF(GenoFreeze.Id IS NOT NULL, 'gnc:genotype', IF(PublishFreeze.Id IS NOT NULL, 'gnc:phenotype', IF(ProbeSetFreeze.Name IS NOT NULL, 'gnc:probeset', '')))" DatasetType)))]) (if (not (string-null? dataset-type)) (string->symbol @@ -214,19 +214,19 @@ (set dct:identifier (format #f "GN~a" (field InfoFiles GN_AccesionId))) (set dct:accessRights (string-downcase (field DatasetStatus DatasetStatusName))) - (set gnt:belongsToGroup + (set gnt:belongs_to_group (string->identifier "set" (field ("IFNULL(InbredSet.Name, IFNULL(PublishInbredSet.Name, GenoInbredSet.Name))" InbredSetName)))) - (set gnt:hasTissue (string->identifier "tissue" + (set gnt:has_tissue (string->identifier "tissue" (field Tissue Short_Name))) - (set gnt:usesNormalization + (set gnt:uses_normalization (string->identifier "avgMethod" ;; If AvgMethodName is NULL, assume N/A. (if (string-blank? (field AvgMethod Name AvgMethodName)) "N/A" (field AvgMethod Name AvgMethodName)))) - (set gnt:hasSummary + (set gnt:has_summary (let* ((summary-link (format #f "" @@ -239,7 +239,7 @@ (field InfoFiles Summary))) (if (or (null? summary) (string-blank? summary)) "" (string->symbol summary-link)))) - (set gnt:hasTissueInfo + (set gnt:has_tissue_info (let* ((tissue-info-link (format #f "" @@ -252,7 +252,7 @@ (field Datasets AboutTissue))) (if (or (null? tissue-info) (string-blank? tissue-info)) "" (string->symbol tissue-info-link)))) - (set gnt:hasCitation + (set gnt:has_citation (let* ((citation-link (format #f "" @@ -278,7 +278,7 @@ (field InfoFiles Specifics))) (if (or (null? specifics) (string-blank? specifics)) "" (string->symbol specifics-link)))) - (set gnt:hasCaseInfo + (set gnt:has_case_info (let* ((cases-link (format #f "" @@ -291,7 +291,7 @@ (field Datasets AboutCases))) (if (or (null? cases) (string-blank? cases)) "" (string->symbol cases-link)))) - (set gnt:hasPlatformInfo + (set gnt:has_platform_info (let* ((platform-link (format #f "" @@ -304,7 +304,7 @@ (field Datasets AboutPlatform))) (if (or (null? platform) (string-blank? platform)) "" (string->symbol platform-link)))) - (set gnt:hasDataProcessingInfo + (set gnt:has_data_processing_info (let* ((processing-link (format #f "" @@ -317,7 +317,7 @@ (field Datasets AboutDataProcessing))) (if (or (null? processing) (string-blank? processing)) "" (string->symbol processing-link)))) - (set gnt:hasNotes + (set gnt:has_notes (let* ((notes-link (format #f "" @@ -330,7 +330,7 @@ (field Datasets Notes))) (if (or (null? notes) (string-blank? notes)) "" (string->symbol notes-link)))) - (set gnt:hasExperimentType + (set gnt:has_experiment_type (let* ((experiment-type-link (format #f "" @@ -343,7 +343,7 @@ (field InfoFiles Experiment_Type))) (if (or (null? experiment-type) (string-blank? experiment-type)) "" (string->symbol experiment-type-link)))) - (set gnt:hasExperimentDesign + (set gnt:has_experiment_design (let* ((experiment-design-link (format #f "" @@ -356,7 +356,7 @@ (field Datasets ExperimentDesign))) (if (or (null? experiment-design) (string-blank? experiment-design)) "" (string->symbol experiment-design-link)))) - (set gnt:hasContributors + (set gnt:has_contributors (let* ((contributors-link (format #f "" @@ -369,7 +369,7 @@ (field Datasets Contributors))) (if (or (null? contributors) (string-blank? contributors)) "" (string->symbol contributors-link)))) - (set gnt:hasAcknowledgement + (set gnt:has_acknowledgement (let* ((acknowledgment-link (format #f "" @@ -382,10 +382,10 @@ (field Datasets Acknowledgment))) (if (or (null? acknowledgment) (string-blank? acknowledgment)) "" (string->symbol acknowledgment-link)))) - (set gnt:usesPlatform + (set gnt:uses_platform (string->identifier "platform" (field GeneChip Name GeneChip))) - (set gnt:hasGeoSeriesId + (set gnt:has_geo_series_id (let ((s (string-match "GSE[0-9]*" (field ("IFNULL(Datasets.GeoSeries, '')" GeoSeries))))) @@ -406,18 +406,18 @@ (field PublishFreeze Name) 'pre "_" 'post)) (set rdf:type 'dcat:Dataset) - (set xkos:classifiedUnder 'gnc:Phenotype) + (set xkos:classifiedUnder 'gnc:phenotype) (set dct:title (field PublishFreeze FullName)) (set rdfs:label (field PublishFreeze Name)) (set skos:altLabel (field PublishFreeze ShortName)) (set dct:created (annotate-field (field PublishFreeze CreateTime) '^^xsd:date)) - (set gnt:belongsToGroup + (set gnt:belongs_to_group (string->identifier "set" (field InbredSet Name InbredSetName) - #:separator "" - #:proc string-capitalize-first)))) + #:separator "_" + #:proc (lambda (x) x))))) (define-transformer genofreeze (tables (GenoFreeze @@ -435,18 +435,18 @@ 'pre "_" 'post) 'pre "_" 'post)) (set rdf:type 'dcat:Dataset) - (set xkos:classifiedUnder 'gnc:Genotype) + (set xkos:classifiedUnder 'gnc:genotype) (set rdfs:label (field GenoFreeze Name)) (set dct:title (field GenoFreeze FullName)) (set skos:altLabel (field GenoFreeze ShortName)) (set dct:created (annotate-field (field GenoFreeze CreateTime) '^^xsd:date)) - (set gnt:belongsToGroup + (set gnt:belongs_to_group (string->identifier "set" (field InbredSet Name InbredSetName) - #:separator "" - #:proc string-capitalize-first)))) + #:separator "_" + #:proc (lambda (x) x))))) ;; Molecular Traits are also referred to as ProbeSets (define-transformer probesetfreeze @@ -458,10 +458,10 @@ (left-join Tissue "ON ProbeFreeze.TissueId = Tissue.TissueId")) "WHERE ProbeSetFreeze.public > 0 AND InfoFiles.InfoPageName IS NULL GROUP BY ProbeFreeze.Id") (schema-triples - (gnt:usesNormalization rdfs:domain gnc:probeset) - (gnt:usesDataScale rdfs:domain gnc:probeset) - (gnt:usesDataScale a owl:ObjectProperty) - (gnt:usesDataScale skos:definition "Thi data scale this resource uses")) + (gnt:uses_normalization rdfs:domain gnc:probeset) + (gnt:uses_data_scale rdfs:domain gnc:probeset) + (gnt:uses_data_scale a owl:ObjectProperty) + (gnt:uses_data_scale skos:definition "Thi data scale this resource uses")) (triples (string->identifier "" @@ -470,8 +470,8 @@ (field ProbeSetFreeze Name) 'pre "_" 'post)) (set rdf:type 'dcat:Dataset) - (set xkos:classifiedUnder 'gnc:Probeset) - (set gnt:usesNormalization + (set xkos:classifiedUnder 'gnc:probeset) + (set gnt:uses_normalization (string->identifier "avgMethod" ;; If AvgMethodName is NULL, assume N/A. (if (string-blank? (field AvgMethod Name AvgMethodName)) @@ -483,12 +483,12 @@ (set dct:created (annotate-field (field ProbeSetFreeze CreateTime) '^^xsd:datetime)) - (set gnt:usesDataScale (field ProbeSetFreeze DataScale)) - (set gnt:hasTissue + (set gnt:uses_data_scale (field ProbeSetFreeze DataScale)) + (set gnt:has_tissue (string->identifier "tissue" (field Tissue Short_Name))) - (set gnt:belongsToGroup + (set gnt:belongs_to_group (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "" -- cgit 1.4.1