diff options
| author | Munyoki Kilyungi | 2025-12-23 12:06:06 +0300 |
|---|---|---|
| committer | Munyoki Kilyungi | 2026-01-13 12:02:49 +0300 |
| commit | 1ca7e679b834ccaf53a3243d0e1c2f3f9e8d56d8 (patch) | |
| tree | 514c544706986f3edd0b3f53a89113e334a0b9a3 | |
| parent | c42933e8f474d8d14eac387d5a94da6f52210629 (diff) | |
| download | gn-transform-databases-1ca7e679b834ccaf53a3243d0e1c2f3f9e8d56d8.tar.gz | |
Snake case gn/gnt/gnc identifiers.
Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
| -rwxr-xr-x | examples/classification.scm | 114 | ||||
| -rwxr-xr-x | examples/dataset-metadata.scm | 216 | ||||
| -rwxr-xr-x | examples/genbank.scm | 20 | ||||
| -rwxr-xr-x | examples/genelist.scm | 190 | ||||
| -rwxr-xr-x | examples/generif-old.scm | 241 | ||||
| -rwxr-xr-x | examples/generif.scm | 22 | ||||
| -rwxr-xr-x | examples/genotype.scm | 61 | ||||
| -rwxr-xr-x | examples/phenotype.scm | 48 | ||||
| -rwxr-xr-x | examples/strains.scm | 26 | ||||
| -rwxr-xr-x | examples/tissue.scm | 3 |
10 files changed, 346 insertions, 595 deletions
diff --git a/examples/classification.scm b/examples/classification.scm index 3024af6..8951c85 100755 --- a/examples/classification.scm +++ b/examples/classification.scm @@ -27,67 +27,67 @@ (define-transformer classification-scheme-species (tables (Species)) (schema-triples - (gnc:ResourceClassificationScheme a skos:ConceptScheme) - (gnc:ResourceClassificationScheme skos:prefLabel "GeneNetwork Classification Scheme For Resources") - (gnc:ResourceClassificationScheme xkos:numberOfLevels "3") - (gnc:ResourceClassificationScheme xkos:levels "( gnc:DatasetType gnc:Set gnc:Species )") - (gnc:DatasetType a xkos:ClassificationLevel) - (gnc:DatasetType skos:prefLabel "The Type of a Dataset which can be a ProbeSet, Genotype, or Phenotype") - (gnc:DatasetType xkos:depth "1") - (gnc:DatasetType skos:member gnc:Probeset) - (gnc:DatasetType skos:member gnc:Genotype) - (gnc:DatasetType skos:member gnc:Phenotype) - (gnc:Probeset skos:prefLabel "mRNA Assay Datasets") - (gnc:Probeset skos:altLabel "ProbeSet") - (gnc:Genotype skos:prefLabel "Genotype") - (gnc:Genotype skos:altLabel "DNA Markers and SNPs") - (gnc:Phenotype skos:prefLabel "Phenotype") - (gnc:Phenotype skos:altLabel "Traits and Cofactors") - (gnc:Species a xkos:ClassificationLevel) - (gnc:Species skos:prefLabel "The species in which this resource belongs") - (gnc:Species xkos:depth "3") - (gnc:Species xkos:specializes gnc:Set)) - (triples "gnc:Species" + (gnc:resource_classification_scheme a skos:ConceptScheme) + (gnc:resource_classification_scheme skos:prefLabel "GeneNetwork Classification Scheme For Resources which are either defines as a dataset, an inbred group, or a species.") + (gnc:resource_classification_scheme xkos:numberOfLevels "3") + (gnc:resource_classification_scheme xkos:levels "( gnc:dataset_type gnc:set gnc:species )") + (gnc:dataset_type a xkos:ClassificationLevel) + (gnc:dataset_type skos:prefLabel "The Type of a Dataset which can be a ProbeSet, Genotype, or Phenotype") + (gnc:dataset_type xkos:depth "1") + (gnc:dataset_type skos:member gnc:probeset) + (gnc:dataset_type skos:member gnc:genotype) + (gnc:dataset_type skos:member gnc:phenotype) + (gnc:probeset skos:prefLabel "mRNA Assay Datasets") + (gnc:probeset skos:altLabel "ProbeSet") + (gnc:genotype skos:prefLabel "Genotype") + (gnc:genotype skos:altLabel "DNA Markers and SNPs") + (gnc:phenotype skos:prefLabel "Phenotype") + (gnc:phenotype skos:altLabel "Traits and Cofactors") + (gnc:species a xkos:ClassificationLevel) + (gnc:species skos:prefLabel "The species in which this resource belongs") + (gnc:species xkos:depth "3") + (gnc:species xkos:specializes gnc:set)) + (triples "gnc:species" (set skos:member (string->identifier "" (remap-species-identifiers (field Species Fullname)) - #:separator "" - #:proc string-capitalize-first)))) + #:separator "_" + #:proc string-downcase)))) (define-transformer classification-scheme-set (tables (InbredSet)) (schema-triples - (gnc:Set a xkos:ClassificationLevel) - (gnc:Set skos:prefLabel "The Type of Set, Ie InbredSet/OutbredSet that a resource can belong to") - (gnc:Set xkos:depth "2") - (gnc:Set xkos:generalizes gnc:Species)) - (triples "gnc:Set" + (gnc:set a xkos:ClassificationLevel) + (gnc:set skos:prefLabel "The Type of Set, Ie InbredSet/OutbredSet that a resource can belong to") + (gnc:set xkos:depth "2") + (gnc:set xkos:generalizes gnc:species)) + (triples "gnc:set" (set skos:member (string->identifier "set" (field InbredSet Name InbredSetName) - #:separator "" - #:proc string-capitalize-first)))) + #:separator "_" + #:proc (lambda (x) x))))) (define-transformer species (tables (Species)) (schema-triples (gnt:family a owl:ObjectProperty) - (gnt:family rdfs:domain gnc:Species) + (gnt:family rdfs:domain gnc:species) (gnt:family skos:definition "This resource belongs to this family") - (gnt:shortName a owl:ObjectProperty) - (gnt:shortName rdfs:domain gnc:Species) - (gnt:shortName skos:definition "The short name of a given resource") - (gnt:belongsToSpecies a rdf:property) - (gnt:belongsToSpecies rdf:comment "This resource given to this species") - (gnt:belongsToSpecies rdf:label "belongsToSpecies")) + (gnt:short_name a owl:ObjectProperty) + (gnt:short_name rdfs:domain gnc:species) + (gnt:short_name skos:definition "The short name of a given resource") + (gnt:belongs_to_species a rdf:property) + (gnt:belongs_to_species rdf:comment "This resource given to this species") + (gnt:belongs_to_species rdf:label "belongsToSpecies")) (triples (string->identifier "" (remap-species-identifiers (field Species Fullname)) - #:separator "" - #:proc string-capitalize-first) - (set skos:inScheme 'gnc:ResourceClassificationScheme) + #:separator "_" + #:proc string-downcase) + (set skos:inScheme 'gnc:resource_classification_scheme) (set rdfs:label (remap-species-identifiers (field Species Fullname))) (set skos:prefLabel (field Species MenuName)) (set skos:altLabel (field Species SpeciesName)) - (set gnt:shortName (field Species Name)) + (set gnt:short_name (field Species Name)) (set gnt:family (field Species Family)) (set skos:notation (ontology 'taxon: @@ -99,32 +99,32 @@ (left-join MappingMethod "ON InbredSet.MappingMethodId=MappingMethod.Id"))) (schema-triples - (gnt:geneticType a owl:ObjectProperty) - (gnt:geneticType rdfs:domain gnc:set) + (gnt:genetic_type a owl:ObjectProperty) + (gnt:genetic_type rdfs:domain gnc:set) (gnt:code a owl:ObjectProperty) (gnt:code rdfs:domain gnc:set) ;; Already defined as an owl prop in species - (gnt:family rdfs:domain gnc:Set) - (gnt:mappingMethod a owl:ObjectProperty) - (gnt:mappingMethod rdfs:domain gnc:set) - (gnt:belongsToGroup a rdf:property) - (gnt:belongsToGroup rdf:comment "This resource given to this group") - (gnt:belongsToGroup rdf:label "belongsToGroup")) + (gnt:family rdfs:domain gnc:set) + (gnt:mapping_method a owl:ObjectProperty) + (gnt:mapping_method rdfs:domain gnc:set) + (gnt:belongs_to_group a rdf:property) + (gnt:belongs_to_group rdf:comment "This resource given to this group") + (gnt:belongs_to_group rdf:label "belongs_to_group")) (triples (string->identifier - "set" (field InbredSet Name InbredSetName) - #:separator "" - #:proc string-capitalize-first) - (set skos:inScheme 'gnc:ResourceClassificationScheme) + "set" (field InbredSet Name InbredSetName) + #:separator "_" + #:proc (lambda (x) x)) + (set skos:inScheme 'gnc:resource_classification_scheme) (set rdfs:label (field InbredSet FullName)) (set skos:prefLabel (field InbredSet Name InbredSetName)) - (set gnt:geneticType (field InbredSet GeneticType)) + (set gnt:genetic_type (field InbredSet GeneticType)) (set gnt:family (field InbredSet Family)) - (set gnt:mappingMethod (field MappingMethod Name)) + (set gnt:mapping_method (field MappingMethod Name)) (set gnt:code (field InbredSet InbredSetCode)) (set xkos:generalizes (string->identifier "" (remap-species-identifiers (field Species Fullname)) - #:separator "" - #:proc string-capitalize-first)))) + #:separator "_" + #:proc string-downcase)))) diff --git a/examples/dataset-metadata.scm b/examples/dataset-metadata.scm index 9c30180..cd91dc4 100755 --- a/examples/dataset-metadata.scm +++ b/examples/dataset-metadata.scm @@ -64,27 +64,27 @@ (tables (GeneChip (left-join Species "USING (SpeciesId)"))) (schema-triples - (gnc:geneChip a skos:Concept) - (gnc:geneChip + (gnc:gene_chip a skos:Concept) + (gnc:gene_chip skos:description "This is a set of controlled terms that are used to describe a given gene chip/platform") - (gnt:hasGeoSeriesId rdfs:domain gnc:platform) - (gnt:hasGeoSeriesId rdfs:domain gnc:geneChip) - (gnt:hasGOTreeValue a owl:ObjectProperty) - (gnt:hasGOTreeValue skos:definition "This resource the following GO tree value") - (gnt:hasGOTreeValue rdfs:domain gnc:geneChip)) + (gnt:has_geo_series_id rdfs:domain gnc:platform) + (gnt:has_geo_series_id rdfs:domain gnc:gene_chip) + (gnt:has_go_tree_value a owl:ObjectProperty) + (gnt:has_go_tree_value skos:definition "This resource the following GO tree value") + (gnt:has_go_tree_value rdfs:domain gnc:gene_chip)) (triples (string->identifier "platform" (field GeneChip Name)) - (set rdf:type 'gnc:geneChip) + (set rdf:type 'gnc:gene_chip) (set rdfs:label (field GeneChip GeneChipName)) (set skos:prefLabel (field GeneChip Name)) (set skos:altLabel (field ("IF(GeneChip.GeneChipName != GeneChip.Title, Title, NULL)" Title))) - (set gnt:hasGOTreeValue (field GeneChip Go_tree_value)) + (set gnt:has_go_tree_value (field GeneChip Go_tree_value)) (set xkos:classifiedUnder (string->identifier "" (remap-species-identifiers (field Species Fullname)) #:separator "" #:proc string-capitalize-first)) - (set gnt:hasGeoSeriesId + (set gnt:has_geo_series_id (ontology 'geoSeries: (string-trim-both (field GeneChip GeoPlatform)))))) @@ -107,70 +107,70 @@ ;; if they exist in the (Publish/Geno)Freeze tables. "LEFT JOIN InbredSet PublishInbredSet ON PublishFreeze.InbredSetId = PublishInbredSet.InbredSetId LEFT JOIN InbredSet GenoInbredSet ON GenoFreeze.InbredSetId = GenoInbredSet.InbredSetId WHERE GN_AccesionId IS NOT NULL") (schema-triples - (gnt:hasTissue rdfs:domain dcat:Dataset) - (gnt:hasTissue a owl:ObjectProperty) - (gnt:hasTissue skos:definition "Tissues this resource has") - (gnt:usesNormalization rdfs:domain dcat:Dataset) - (gnt:usesNormalization a owl:ObjectProperty) - (gnt:usesNormalization skos:definition "Normalization techniques this resource has") - (gnt:usesPlatform rdfs:domain dcat:Dataset) - (gnt:usesPlatform a owl:ObjectProperty) - (gnt:usesPlatform skos:definition "The Platform this resource uses") - (gnt:hasGeoSeriesId rdfs:domain dcat:Dataset) - (gnt:hasGeoSeriesId a owl:ObjectProperty) - (gnt:hasGeoSeriesId skos:definition "id of record in NCBI database") - (gnt:hasExperimentType rdfs:domain dcat:Dataset) - (gnt:hasExperimentType a owl:ObjectProperty) - (gnt:hasExperimentType rdfs:label "Experiment Type Metadata") - (gnt:hasExperimentType skos:definition "Information about the experiment type") - (gnt:hasTissueInfo rdfs:domain dcat:Dataset) - (gnt:hasTissueInfo a owl:ObjectProperty) - (gnt:hasTissueInfo skos:definition "Metadata about Tissue for this resource") - (gnt:hasExperimentDesignInfo rdfs:domain dcat:Dataset) - (gnt:hasExperimentDesignInfo rdfs:label "Experiment Design") - (gnt:hasExperimentDesignInfo a owl:ObjectProperty) - (gnt:hasExperimentDesignInfo skos:definition "Information about how the experiment was designed") - (gnt:hasNotes rdfs:domain dcat:Dataset) - (gnt:hasNotes a owl:ObjectProperty) - (gnt:hasNotes rdfs:label "Notes") - (gnt:hasNotes skos:definition "Extra Notes about this dataset") - (gnt:hasDataProcessingInfo rdfs:domain dcat:Dataset) - (gnt:hasDataProcessingInfo rdfs:label "About Data Processing") - (gnt:hasDataProcessingInfo a owl:ObjectProperty) - (gnt:hasDataProcessingInfo skos:definition "Information about how this dataset was processed") - (gnt:hasPlatformInfo rdfs:domain dcat:Dataset) - (gnt:hasPlatformInfo a owl:ObjectProperty) - (gnt:hasPlatformInfo rdfs:label "About Platform") - (gnt:hasPlatformInfo skos:definition "Information about the platform that was used with this dataset") - (gnt:hasCaseInfo rdfs:domain dcat:Dataset) - (gnt:hasCaseInfo rdfs:label "About Case") - (gnt:hasCaseInfo a owl:ObjectProperty) - (gnt:hasCaseInfo skos:definition "Information about the cases used in this platform") - (gnt:hasSummary rdfs:domain dcat:Dataset) - (gnt:hasSummary rdfs:label "Summary") - (gnt:hasSummary a owl:ObjectProperty) - (gnt:hasSummary skos:definition "Summary information about dataset") - (gnt:hasCitation rdfs:domain dcat:Dataset) - (gnt:hasCitation rdfs:label "Citation") - (gnt:hasCitation a owl:ObjectProperty) - (gnt:hasCitation skos:definition "Citation for this dataset") - (gnt:hasContributors rdfs:domain dcat:Dataset) - (gnt:hasContributors rdfs:label "Contributors") - (gnt:hasContributors a owl:ObjectProperty) - (gnt:hasContributors skos:definition "Contributors of this resource") - (gnt:hashasExperimentDesign rdfs:domain dcat:Dataset) - (gnt:hashasExperimentDesign rdfs:label "Experiment Design") - (gnt:hashasExperimentDesign a owl:ObjectProperty) - (gnt:hashasExperimentDesign skos:definition "Experiment Design for this resource") - (gnt:hasTissueInfo rdfs:domain dcat:Dataset) - (gnt:hasTissueInfo rdfs:label "Tissue Information") - (gnt:hasTissueInfo a owl:ObjectProperty) - (gnt:hasTissueInfo skos:definition "Tissue information about dataset") - (gnt:hasExperimentType skos:definition "Information about the experiment type") - (gnt:hasAcknowledgement rdfs:domain dcat:Dataset) - (gnt:hasAcknowledgement rdfs:label "Acknowledgement") - (gnt:hasAcknowledgement a owl:ObjectProperty) - (gnt:hasAcknowledgement skos:definition "People to acknowledge")) + (gnt:has_tissue rdfs:domain dcat:Dataset) + (gnt:has_tissue a owl:ObjectProperty) + (gnt:has_tissue skos:definition "Tissues this resource has") + (gnt:uses_normalization rdfs:domain dcat:Dataset) + (gnt:uses_normalization a owl:ObjectProperty) + (gnt:uses_normalization skos:definition "Normalization techniques this resource has") + (gnt:uses_platform rdfs:domain dcat:Dataset) + (gnt:uses_platform a owl:ObjectProperty) + (gnt:uses_platform skos:definition "The Platform this resource uses") + (gnt:has_geo_series_id rdfs:domain dcat:Dataset) + (gnt:has_geo_series_id a owl:ObjectProperty) + (gnt:has_geo_series_id skos:definition "id of record in NCBI database") + (gnt:has_experiment_type rdfs:domain dcat:Dataset) + (gnt:has_experiment_type a owl:ObjectProperty) + (gnt:has_experiment_type rdfs:label "Experiment Type Metadata") + (gnt:has_experiment_type skos:definition "Information about the experiment type") + (gnt:has_tissue_info rdfs:domain dcat:Dataset) + (gnt:has_tissue_info a owl:ObjectProperty) + (gnt:has_tissue_info skos:definition "Metadata about Tissue for this resource") + (gnt:has_experiment_design_info rdfs:domain dcat:Dataset) + (gnt:has_experiment_design_info rdfs:label "Experiment Design") + (gnt:has_experiment_design_info a owl:ObjectProperty) + (gnt:has_experiment_design_info skos:definition "Information about how the experiment was designed") + (gnt:has_notes rdfs:domain dcat:Dataset) + (gnt:has_notes a owl:ObjectProperty) + (gnt:has_notes rdfs:label "Notes") + (gnt:has_notes skos:definition "Extra Notes about this dataset") + (gnt:has_data_processing_info rdfs:domain dcat:Dataset) + (gnt:has_data_processing_info rdfs:label "About Data Processing") + (gnt:has_data_processing_info a owl:ObjectProperty) + (gnt:has_data_processing_info skos:definition "Information about how this dataset was processed") + (gnt:has_platform_info rdfs:domain dcat:Dataset) + (gnt:has_platform_info a owl:ObjectProperty) + (gnt:has_platform_info rdfs:label "About Platform") + (gnt:has_platform_info skos:definition "Information about the platform that was used with this dataset") + (gnt:has_case_info rdfs:domain dcat:Dataset) + (gnt:has_case_info rdfs:label "About Case") + (gnt:has_case_info a owl:ObjectProperty) + (gnt:has_case_info skos:definition "Information about the cases used in this platform") + (gnt:has_summary rdfs:domain dcat:Dataset) + (gnt:has_summary rdfs:label "Summary") + (gnt:has_summary a owl:ObjectProperty) + (gnt:has_summary skos:definition "Summary information about dataset") + (gnt:has_citation rdfs:domain dcat:Dataset) + (gnt:has_citation rdfs:label "Citation") + (gnt:has_citation a owl:ObjectProperty) + (gnt:has_citation skos:definition "Citation for this dataset") + (gnt:has_contributors rdfs:domain dcat:Dataset) + (gnt:has_contributors rdfs:label "Contributors") + (gnt:has_contributors a owl:ObjectProperty) + (gnt:has_contributors skos:definition "Contributors of this resource") + (gnt:has_experiment_design rdfs:domain dcat:Dataset) + (gnt:has_experiment_design rdfs:label "Experiment Design") + (gnt:has_experiment_design a owl:ObjectProperty) + (gnt:has_experiment_design skos:definition "Experiment Design for this resource") + (gnt:has_tissue_info rdfs:domain dcat:Dataset) + (gnt:has_tissue_info rdfs:label "Tissue Information") + (gnt:has_tissue_info a owl:ObjectProperty) + (gnt:has_tissue_info skos:definition "Tissue information about dataset") + (gnt:has_experiment_type skos:definition "Information about the experiment type") + (gnt:has_acknowledgement rdfs:domain dcat:Dataset) + (gnt:has_acknowledgement rdfs:label "Acknowledgement") + (gnt:has_acknowledgement a owl:ObjectProperty) + (gnt:has_acknowledgement skos:definition "People to acknowledge")) (triples (string->identifier "" (regexp-substitute/global #f "[^A-Za-z0-9:]" (field InfoFiles InfoPageName) @@ -179,7 +179,7 @@ (set xkos:classifiedUnder (let ([dataset-type (string-trim-both - (field ("IF(GenoFreeze.Id IS NOT NULL, 'gnc:Genotype', IF(PublishFreeze.Id IS NOT NULL, 'gnc:Phenotype', IF(ProbeSetFreeze.Name IS NOT NULL, 'gnc:Probeset', '')))" + (field ("IF(GenoFreeze.Id IS NOT NULL, 'gnc:genotype', IF(PublishFreeze.Id IS NOT NULL, 'gnc:phenotype', IF(ProbeSetFreeze.Name IS NOT NULL, 'gnc:probeset', '')))" DatasetType)))]) (if (not (string-null? dataset-type)) (string->symbol @@ -214,19 +214,19 @@ (set dct:identifier (format #f "GN~a" (field InfoFiles GN_AccesionId))) (set dct:accessRights (string-downcase (field DatasetStatus DatasetStatusName))) - (set gnt:belongsToGroup + (set gnt:belongs_to_group (string->identifier "set" (field ("IFNULL(InbredSet.Name, IFNULL(PublishInbredSet.Name, GenoInbredSet.Name))" InbredSetName)))) - (set gnt:hasTissue (string->identifier "tissue" + (set gnt:has_tissue (string->identifier "tissue" (field Tissue Short_Name))) - (set gnt:usesNormalization + (set gnt:uses_normalization (string->identifier "avgMethod" ;; If AvgMethodName is NULL, assume N/A. (if (string-blank? (field AvgMethod Name AvgMethodName)) "N/A" (field AvgMethod Name AvgMethodName)))) - (set gnt:hasSummary + (set gnt:has_summary (let* ((summary-link (format #f "<https://git.genenetwork.org/gn-docs/tree/general/datasets/~a/summary.rtf>" @@ -239,7 +239,7 @@ (field InfoFiles Summary))) (if (or (null? summary) (string-blank? summary)) "" (string->symbol summary-link)))) - (set gnt:hasTissueInfo + (set gnt:has_tissue_info (let* ((tissue-info-link (format #f "<https://git.genenetwork.org/gn-docs/tree/general/datasets/~a/tissue.rtf>" @@ -252,7 +252,7 @@ (field Datasets AboutTissue))) (if (or (null? tissue-info) (string-blank? tissue-info)) "" (string->symbol tissue-info-link)))) - (set gnt:hasCitation + (set gnt:has_citation (let* ((citation-link (format #f "<https://git.genenetwork.org/gn-docs/tree/general/datasets/~a/citation.rtf>" @@ -278,7 +278,7 @@ (field InfoFiles Specifics))) (if (or (null? specifics) (string-blank? specifics)) "" (string->symbol specifics-link)))) - (set gnt:hasCaseInfo + (set gnt:has_case_info (let* ((cases-link (format #f "<https://git.genenetwork.org/gn-docs/tree/general/datasets/~a/cases.rtf>" @@ -291,7 +291,7 @@ (field Datasets AboutCases))) (if (or (null? cases) (string-blank? cases)) "" (string->symbol cases-link)))) - (set gnt:hasPlatformInfo + (set gnt:has_platform_info (let* ((platform-link (format #f "<https://git.genenetwork.org/gn-docs/tree/general/datasets/~a/platform.rtf>" @@ -304,7 +304,7 @@ (field Datasets AboutPlatform))) (if (or (null? platform) (string-blank? platform)) "" (string->symbol platform-link)))) - (set gnt:hasDataProcessingInfo + (set gnt:has_data_processing_info (let* ((processing-link (format #f "<https://git.genenetwork.org/gn-docs/tree/general/datasets/~a/processing.rtf>" @@ -317,7 +317,7 @@ (field Datasets AboutDataProcessing))) (if (or (null? processing) (string-blank? processing)) "" (string->symbol processing-link)))) - (set gnt:hasNotes + (set gnt:has_notes (let* ((notes-link (format #f "<https://git.genenetwork.org/gn-docs/tree/general/datasets/~a/notes.rtf>" @@ -330,7 +330,7 @@ (field Datasets Notes))) (if (or (null? notes) (string-blank? notes)) "" (string->symbol notes-link)))) - (set gnt:hasExperimentType + (set gnt:has_experiment_type (let* ((experiment-type-link (format #f "<https://git.genenetwork.org/gn-docs/tree/general/datasets/~a/experiment-type.rtf>" @@ -343,7 +343,7 @@ (field InfoFiles Experiment_Type))) (if (or (null? experiment-type) (string-blank? experiment-type)) "" (string->symbol experiment-type-link)))) - (set gnt:hasExperimentDesign + (set gnt:has_experiment_design (let* ((experiment-design-link (format #f "<https://git.genenetwork.org/gn-docs/tree/general/datasets/~a/experiment-design.rtf>" @@ -356,7 +356,7 @@ (field Datasets ExperimentDesign))) (if (or (null? experiment-design) (string-blank? experiment-design)) "" (string->symbol experiment-design-link)))) - (set gnt:hasContributors + (set gnt:has_contributors (let* ((contributors-link (format #f "<https://git.genenetwork.org/gn-docs/tree/general/datasets/~a/contributors.rtf>" @@ -369,7 +369,7 @@ (field Datasets Contributors))) (if (or (null? contributors) (string-blank? contributors)) "" (string->symbol contributors-link)))) - (set gnt:hasAcknowledgement + (set gnt:has_acknowledgement (let* ((acknowledgment-link (format #f "<https://git.genenetwork.org/gn-docs/tree/general/datasets/~a/acknowledgment.rtf>" @@ -382,10 +382,10 @@ (field Datasets Acknowledgment))) (if (or (null? acknowledgment) (string-blank? acknowledgment)) "" (string->symbol acknowledgment-link)))) - (set gnt:usesPlatform + (set gnt:uses_platform (string->identifier "platform" (field GeneChip Name GeneChip))) - (set gnt:hasGeoSeriesId + (set gnt:has_geo_series_id (let ((s (string-match "GSE[0-9]*" (field ("IFNULL(Datasets.GeoSeries, '')" GeoSeries))))) @@ -406,18 +406,18 @@ (field PublishFreeze Name) 'pre "_" 'post)) (set rdf:type 'dcat:Dataset) - (set xkos:classifiedUnder 'gnc:Phenotype) + (set xkos:classifiedUnder 'gnc:phenotype) (set dct:title (field PublishFreeze FullName)) (set rdfs:label (field PublishFreeze Name)) (set skos:altLabel (field PublishFreeze ShortName)) (set dct:created (annotate-field (field PublishFreeze CreateTime) '^^xsd:date)) - (set gnt:belongsToGroup + (set gnt:belongs_to_group (string->identifier "set" (field InbredSet Name InbredSetName) - #:separator "" - #:proc string-capitalize-first)))) + #:separator "_" + #:proc (lambda (x) x))))) (define-transformer genofreeze (tables (GenoFreeze @@ -435,18 +435,18 @@ 'pre "_" 'post) 'pre "_" 'post)) (set rdf:type 'dcat:Dataset) - (set xkos:classifiedUnder 'gnc:Genotype) + (set xkos:classifiedUnder 'gnc:genotype) (set rdfs:label (field GenoFreeze Name)) (set dct:title (field GenoFreeze FullName)) (set skos:altLabel (field GenoFreeze ShortName)) (set dct:created (annotate-field (field GenoFreeze CreateTime) '^^xsd:date)) - (set gnt:belongsToGroup + (set gnt:belongs_to_group (string->identifier "set" (field InbredSet Name InbredSetName) - #:separator "" - #:proc string-capitalize-first)))) + #:separator "_" + #:proc (lambda (x) x))))) ;; Molecular Traits are also referred to as ProbeSets (define-transformer probesetfreeze @@ -458,10 +458,10 @@ (left-join Tissue "ON ProbeFreeze.TissueId = Tissue.TissueId")) "WHERE ProbeSetFreeze.public > 0 AND InfoFiles.InfoPageName IS NULL GROUP BY ProbeFreeze.Id") (schema-triples - (gnt:usesNormalization rdfs:domain gnc:probeset) - (gnt:usesDataScale rdfs:domain gnc:probeset) - (gnt:usesDataScale a owl:ObjectProperty) - (gnt:usesDataScale skos:definition "Thi data scale this resource uses")) + (gnt:uses_normalization rdfs:domain gnc:probeset) + (gnt:uses_data_scale rdfs:domain gnc:probeset) + (gnt:uses_data_scale a owl:ObjectProperty) + (gnt:uses_data_scale skos:definition "Thi data scale this resource uses")) (triples (string->identifier "" @@ -470,8 +470,8 @@ (field ProbeSetFreeze Name) 'pre "_" 'post)) (set rdf:type 'dcat:Dataset) - (set xkos:classifiedUnder 'gnc:Probeset) - (set gnt:usesNormalization + (set xkos:classifiedUnder 'gnc:probeset) + (set gnt:uses_normalization (string->identifier "avgMethod" ;; If AvgMethodName is NULL, assume N/A. (if (string-blank? (field AvgMethod Name AvgMethodName)) @@ -483,12 +483,12 @@ (set dct:created (annotate-field (field ProbeSetFreeze CreateTime) '^^xsd:datetime)) - (set gnt:usesDataScale (field ProbeSetFreeze DataScale)) - (set gnt:hasTissue + (set gnt:uses_data_scale (field ProbeSetFreeze DataScale)) + (set gnt:has_tissue (string->identifier "tissue" (field Tissue Short_Name))) - (set gnt:belongsToGroup + (set gnt:belongs_to_group (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "" diff --git a/examples/genbank.scm b/examples/genbank.scm index c83643c..7aae5ba 100755 --- a/examples/genbank.scm +++ b/examples/genbank.scm @@ -14,30 +14,20 @@ -(define (remap-species-identifiers str) - "This procedure remaps identifiers to standard binominal. Obviously this should - be sorted by correcting the database!" - (match str - ["Fly (Drosophila melanogaster dm6)" "Drosophila melanogaster"] - ["Oryzias latipes (Japanese medaka)" "Oryzias latipes"] - ["Macaca mulatta" "Macaca nemestrina"] - ["Bat (Glossophaga soricina)" "Glossophaga soricina"] - [str str])) - (define-transformer genbank (tables (Genbank (left-join Species "USING (SpeciesId)"))) (schema-triples (gnc:nucleotide a skos:Concept) - (gnt:hasSequence rdfs:domain gnc:nucleotide)) + (gnt:has_sequence rdfs:domain gnc:nucleotide)) (triples (ontology 'genbank: (field Genbank Id)) - (set gnt:hasSequence (field Genbank Sequence)) - (set gnt:belongsToSpecies + (set gnt:has_sequence (field Genbank Sequence)) + (set gnt:belongs_to_species (string->identifier "" (remap-species-identifiers (field Species Fullname)) - #:separator "" - #:proc string-capitalize-first)))) + #:separator "_" + #:proc string-downcase)))) diff --git a/examples/genelist.scm b/examples/genelist.scm index 8729626..18fd30b 100755 --- a/examples/genelist.scm +++ b/examples/genelist.scm @@ -18,63 +18,63 @@ (tables (GeneList (left-join Species "USING (SpeciesId)"))) (schema-triples - (gnc:GeneSymbol a rdfs:Class) - (gnc:GeneSymbol rdfs:label "A gene symbol") - (gnt:gene rdfs:domain gnc:GeneSymbol) - (gnt:belongsToSpecies rdfs:domain gnc:GeneSymbol) - (gnc:Gene a rdfs:Class) - (gnc:Gene rdfs:label "Gene") - (gnt:hasGeneId a owl:ObjectProperty) - (gnt:hasGeneId rdfs:domain gnc:NCBIWikiEntry) - (gnt:hasGeneId skos:definition "The GeneId of this this resource") - (gnc:transcript rdfs:domain gnc:GeneSymbol) + (gnc:gene_symbol a rdfs:Class) + (gnc:gene_symbol rdfs:label "A gene symbol") + (gnt:gene rdfs:domain gnc:gene_symbol) + (gnt:belongs_to_species rdfs:domain gnc:gene_symbol) + (gnc:gene a rdfs:Class) + (gnc:gene rdfs:label "Gene") + (gnt:has_gene_id a owl:ObjectProperty) + (gnt:has_gene_id rdfs:domain gnc:ncbi_wiki_entry) + (gnt:has_gene_id skos:definition "The GeneId of this this resource") + (gnc:transcript rdfs:domain gnc:gene_symbol) (gnt:transcript a owl:ObjectProperty) (gnc:transcript rdfs:comments "The gene transcript of this resource") - (gnc:ebiGwasLink rdfs:Class gnc:ResourceLink) - (gnc:ebiGwasLink rdfs:label "EBI GWAS") - (gnc:ebiGwasLink rdfs:comments "EBI GWAS") - (gnc:proteinAtlasLink rdfs:Class gnc:ResourceLink) - (gnc:proteinAtlasLink rdfs:label "Protein Atlas") - (gnc:proteinAtlasLink rdfs:comments "Human Protein Atlas") - (gnc:genemaniaLink rdfs:Class gnc:ResourceLink) - (gnc:genemaniaLink rdfs:label "GeneMANIA") - (gnc:genemaniaLink rdfs:comments "GeneMANIA") - (gnc:gemmaLink rdfs:Class gnc:ResourceLink) - (gnc:gemmaLink rdfs:label "Gemma") - (gnc:gemmaLink rdfs:comments "Meta-analysis of gene expression data") - (gnc:biogpsLink rdfs:Class gnc:ResourceLink) - (gnc:biogpsLink rdfs:label "BioGPS") - (gnc:biogpsLink rdfs:comments "Expression across many tissues and cell types") - (gnc:abaLink rdfs:Class gnc:ResourceLink) - (gnc:abaLink rdfs:label "ABA") - (gnc:abaLink rdfs:comments "Allen Brain Atlas") - (gnc:pantherLink rdfs:Class gnc:ResourceLink) - (gnc:pantherLink rdfs:label "PANTHER") - (gnc:pantherLink rdfs:comments "Gene and protein data resources from Celera-ABI") - (gnc:stringLink rdfs:Class gnc:ResourceLink) - (gnc:stringLink rdfs:label "STRING") - (gnc:stringLink rdfs:comments "Protein interactions: known and inferred") - (gnc:gtexLink rdfs:Class gnc:ResourceLink) - (gnc:gtexLink rdfs:label "GTEx Portal") - (gnc:gtexLink rdfs:comments "GTEx Portal") - (gnc:rgdLink rdfs:Class gnc:ResourceLink) - (gnc:rgdLink rdfs:label "Rat Genome DB") - (gnc:rgdLink rdfs:comments "Rat Genome DB") - (gnc:hasKgID rdfs:domain gnc:GeneSymbol) - (gnt:hasKgID a owl:ObjectProperty) - (gnc:hasKgID rdfs:comments "The kgID of this resource") - (gnc:hasUnigenID rdfs:domain gnc:GeneSymbol) - (gnt:hasUnigenID a owl:ObjectProperty) - (gnc:hasUnigenID rdfs:comments "The UnigenID of this resource") - (gnc:hasProteinID rdfs:domain gnc:GeneSymbol) - (gnt:hasProteinID a owl:ObjectProperty) - (gnc:hasProteinID rdfs:comments "The ProteinID of this resource") - (gnc:hasAlignID rdfs:domain gnc:GeneSymbol) - (gnt:hasAlignID a owl:ObjectProperty) - (gnc:hasAlignID rdfs:comments "The AlignID of this resource") - (gnt:TxEnd rdfs:range xsd:double) - (gnt:TxStart rdfs:range xsd:double) - (gnt:hasTargetSeq rdfs:domain gnc:Probeset)) + (gnc:ebi_gwas_link rdfs:Class gnc:ResourceLink) + (gnc:ebi_gwas_link rdfs:label "EBI GWAS") + (gnc:ebi_gwas_link rdfs:comments "EBI GWAS") + (gnc:protein_atlas_link rdfs:Class gnc:ResourceLink) + (gnc:protein_atlas_link rdfs:label "Protein Atlas") + (gnc:protein_atlas_link rdfs:comments "Human Protein Atlas") + (gnc:genemania_link rdfs:Class gnc:ResourceLink) + (gnc:genemania_link rdfs:label "GeneMANIA") + (gnc:genemania_link rdfs:comments "GeneMANIA") + (gnc:gemma_link rdfs:Class gnc:ResourceLink) + (gnc:gemma_link rdfs:label "Gemma") + (gnc:gemma_link rdfs:comments "Meta-analysis of gene expression data") + (gnc:biogps_link rdfs:Class gnc:ResourceLink) + (gnc:biogps_link rdfs:label "BioGPS") + (gnc:biogps_link rdfs:comments "Expression across many tissues and cell types") + (gnc:aba_link rdfs:Class gnc:ResourceLink) + (gnc:aba_link rdfs:label "ABA") + (gnc:aba_link rdfs:comments "Allen Brain Atlas") + (gnc:panther_link rdfs:Class gnc:ResourceLink) + (gnc:panther_link rdfs:label "PANTHER") + (gnc:panther_link rdfs:comments "Gene and protein data resources from Celera-ABI") + (gnc:panther_link rdfs:Class gnc:ResourceLink) + (gnc:panther_link rdfs:label "STRING") + (gnc:panther_link rdfs:comments "Protein interactions: known and inferred") + (gnc:gtex_link rdfs:Class gnc:ResourceLink) + (gnc:gtex_link rdfs:label "GTEx Portal") + (gnc:gtex_link rdfs:comments "GTEx Portal") + (gnc:rgd_link rdfs:Class gnc:ResourceLink) + (gnc:rgd_link rdfs:label "Rat Genome DB") + (gnc:rgd_link rdfs:comments "Rat Genome DB") + (gnc:has_kg_id rdfs:domain gnc:gene_symbol) + (gnc:has_kg_id a owl:ObjectProperty) + (gnc:has_kg_id rdfs:comments "The kgID of this resource") + (gnc:has_unigen_id rdfs:domain gnc:gene_symbol) + (gnc:has_unigen_id a owl:ObjectProperty) + (gnc:has_unigen_id rdfs:comments "The UnigenID of this resource") + (gnc:has_protein_id rdfs:domain gnc:gene_symbol) + (gnt:has_protein_id a owl:ObjectProperty) + (gnc:has_protein_id rdfs:comments "The ProteinID of this resource") + (gnc:has_align_id rdfs:domain gnc:gene_symbol) + (gnt:has_align_id a owl:ObjectProperty) + (gnc:has_align_id rdfs:comments "The AlignID of this resource") + (gnt:tx_end rdfs:range xsd:double) + (gnt:tx_start rdfs:range xsd:double) + (gnt:has_target_seq rdfs:domain gnc:probeset)) (triples (string->identifier "gene" (regexp-substitute/global @@ -83,10 +83,10 @@ (field ("CONCAT_WS('_', GeneSymbol, GeneID, AlignID)" GENE_UID))) 'pre "_" 'post) #:proc (lambda (x) x)) - (set rdf:type 'gnc:Gene) - (set gnt:geneSymbol (field GeneList GeneSymbol)) + (set rdf:type 'gnc:gene) + (set gnt:gene_symbol (field GeneList GeneSymbol)) (set dct:description (sanitize-rdf-string (field GeneList GeneDescription))) - (set gnt:hasGeneId (ontology 'gene: (field GeneList GeneId))) + (set gnt:has_gene_id (ontology 'gene: (field GeneList GeneId))) (set dct:references (let ((symbol (field GeneList GeneSymbol))) (if (not (string-blank? symbol)) @@ -96,7 +96,7 @@ "https://www.ebi.ac.uk/gwas/search?query=" (uri-encode (string-trim-both symbol)) - "a gnc:ebiGwasLink")) + "a gnc:ebi_gwas_link")) ""))) (set dct:references (let ((symbol (field GeneList GeneSymbol)) @@ -109,7 +109,7 @@ (string->symbol (format #f "<~0@*~a> .~%<~0@*~a> ~1@*~a" "http://mouse.brain-map.org/search/show?search_type=gene&search_term=" - "a gnc:abaLink" + "a gnc:aba_link" (if (string=? species "mouse") (uri-encode (string-trim-both symbol)) @@ -131,7 +131,7 @@ (string-trim-both symbol)) "&category=Gene&species=" (string-capitalize species) - "a gnc:rgdLink")) + "a gnc:rgd_link")) ""))) (set dct:references (let ((geneId (field GeneList GeneID)) @@ -149,7 +149,7 @@ species "#goto=genereport&id=" geneId - "a gnc:biogpsLink")) + "a gnc:biogps_link")) ""))) (set dct:references (let ((geneId (field GeneList GeneID))) @@ -159,7 +159,7 @@ "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a" "http://www.chibi.ubc.ca/Gemma/gene/showGene.html?ncbiid=" geneId - "a gnc:gemmaLink")) + "a gnc:gemma_link")) ""))) (set dct:references (let ((symbol (field GeneList GeneSymbol)) @@ -177,7 +177,7 @@ species (uri-encode (string-trim-both symbol)) - "a gnc:genemaniaLink")) + "a gnc:genemania_link")) ""))) (set dct:references (let ((symbol (field GeneList GeneSymbol))) @@ -188,7 +188,7 @@ "http://www.pantherdb.org/genes/geneList.do?searchType=basic&fieldName=all&organism=all&listType=1&fieldValue=" (uri-encode (string-trim-both symbol)) - "a gnc:pantherLink")) + "a gnc:panther_link")) ""))) (set dct:references (let ((symbol (field GeneList GeneSymbol))) @@ -199,7 +199,7 @@ "http://string-db.org/newstring_cgi/show_network_section.pl?identifier=" (uri-encode (string-trim-both symbol)) - "a gnc:stringLink")) + "a gnc:panther_link")) ""))) (set dct:references (let ((symbol (field GeneList GeneSymbol))) @@ -210,7 +210,7 @@ "https://www.gtexportal.org/home/gene/" (uri-encode (string-trim-both symbol)) - "a gnc:gtexLink")) + "a gnc:gtex_link")) ""))) (set dct:references (let ((symbol (field GeneList GeneSymbol))) @@ -221,18 +221,18 @@ "http://www.proteinatlas.org/search/" (uri-encode (string-trim-both symbol)) - "a gnc:proteinAtlasLink")) + "a gnc:protein_atlas_link")) ""))) (set gnt:chromosome (field GeneList Chromosome)) - (set gnt:TxStart (annotate-field - (field GeneList TxStart) + (set gnt:tx_start (annotate-field + (field GeneList tx_start) '^^xsd:double)) - (set gnt:TxEnd (annotate-field - (field GeneList TxEnd) + (set gnt:tx_end (annotate-field + (field GeneList tx_end) '^^xsd:double)) - (set gnt:Strand (string-trim-both (field GeneList Strand))) + (set gnt:strand (string-trim-both (field GeneList Strand))) (set - gnt:belongsToSpecies + gnt:belongs_to_species (string->identifier "" (remap-species-identifiers @@ -243,11 +243,11 @@ gnt:transcript (ontology 'transcript: (string-trim-both (field GeneList NM_ID)))) - (set gnt:hasKgID (string-trim-both (field GeneList kgID))) - (set gnt:hasUnigenID (string-trim-both (field GeneList UnigenID))) - (set gnt:hasProteinID (string-trim-both (field GeneList ProteinID))) - (set gnt:hasAlignID (string-trim-both (field GeneList AlignID))) - (set gnt:hasRgdID + (set gnc:has_kg_id (string-trim-both (field GeneList kgID))) + (set gnc:has_unigen_id (string-trim-both (field GeneList UnigenID))) + (set gnt:has_protein_id (string-trim-both (field GeneList ProteinID))) + (set gnt:has_align_id (string-trim-both (field GeneList AlignID))) + (set gnt:has_rgd_id (field ("IFNULL(RGD_ID, '')" RGD_ID))))) (define-transformer genelist-rn33 @@ -260,27 +260,27 @@ (number->string gene-uid) gene-uid))) - (set rdf:type 'gnc:Gene) - (set gnt:belongsToSpecies 'gn:Rattus_norvegicus) - (set gnt:geneSymbol (string-trim-both (field GeneList_rn33 geneSymbol))) + (set rdf:type 'gnc:gene) + (set gnt:belongs_to_species 'gn:Rattus_norvegicus) + (set gnt:gene_symbol (string-trim-both (field GeneList_rn33 gene_symbol))) (set gnt:chromosome (field GeneList_rn33 chromosome)) - (set gnt:TxStart (annotate-field + (set gnt:tx_start (annotate-field (field GeneList_rn33 txStart) '^^xsd:double)) - (set gnt:TxEnd (annotate-field + (set gnt:tx_end (annotate-field (field GeneList_rn33 txEnd) '^^xsd:double)) - (set gnt:Strand (string-trim-both (field GeneList_rn33 strand))) + (set gnt:strand (string-trim-both (field GeneList_rn33 strand))) (set gnt:transcript (ontology 'transcript: (string-trim-both (field GeneList_rn33 NM_ID)))) (set - gnt:hasKgID + gnc:has_kg_id (string-trim-both (field GeneList_rn33 kgID))) (set dct:references - (let ((symbol (field GeneList_rn33 geneSymbol))) + (let ((symbol (field GeneList_rn33 gene_symbol))) (if (not (string-blank? symbol)) (string->symbol (format #f @@ -290,17 +290,17 @@ "a gnc:PantherLink")) ""))) (set dct:references - (let ((symbol (string-trim-both (field GeneList_rn33 geneSymbol)))) + (let ((symbol (string-trim-both (field GeneList_rn33 gene_symbol)))) (if (not (string-blank? symbol)) (string->symbol (format #f "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a" "https://www.ebi.ac.uk/gwas/search?query=" (string-trim-both symbol) - "a gnc:ebiGwasLink")) + "a gnc:ebi_gwas_link")) ""))) (set dct:references - (let ((symbol (string-trim-both (field GeneList_rn33 geneSymbol)))) + (let ((symbol (string-trim-both (field GeneList_rn33 gene_symbol)))) (if (not (string-blank? symbol)) (string->symbol (format #f @@ -308,10 +308,10 @@ "http://string-db.org/newstring_cgi/show_network_section.pl?identifier=" (uri-encode (string-trim-both symbol)) - "a gnc:stringLink")) + "a gnc:panther_link")) ""))) (set dct:references - (let ((symbol (string-trim-both (field GeneList_rn33 geneSymbol)))) + (let ((symbol (string-trim-both (field GeneList_rn33 gene_symbol)))) (if (not (string-blank? symbol)) (string->symbol (format #f @@ -319,10 +319,10 @@ "https://www.gtexportal.org/home/gene/" (uri-encode (string-trim-both symbol)) - "a gnc:gtexLink")) + "a gnc:gtex_link")) ""))) (set dct:references - (let ((symbol (string-trim-both (field GeneList_rn33 geneSymbol)))) + (let ((symbol (string-trim-both (field GeneList_rn33 gene_symbol)))) (if (not (string-blank? symbol)) (string->symbol (format #f @@ -330,7 +330,7 @@ "http://www.proteinatlas.org/search/" (uri-encode (string-trim-both symbol)) - "a gnc:proteinAtlasLink")) + "a gnc:protein_atlas_link")) ""))))) diff --git a/examples/generif-old.scm b/examples/generif-old.scm deleted file mode 100755 index ede5a28..0000000 --- a/examples/generif-old.scm +++ /dev/null @@ -1,241 +0,0 @@ -#! /usr/bin/env guile -!# - -(use-modules (srfi srfi-1) - (srfi srfi-26) - (rnrs bytevectors) - (ice-9 format) - (ice-9 getopt-long) - (ice-9 match) - (ice-9 regex) - (transform strings) - (transform sql) - (transform triples) - (transform special-forms)) - - - -(define (fix-email-id email) - (string-delete #\space email)) - -(define (investigator-attributes->id first-name last-name email) - ;; There is just one record corresponding to "Evan Williams" which - ;; does not have an email ID. To accommodate that record, we - ;; construct the investigator ID from not just the email ID, but - ;; also the first and the last names. It would be preferable to just - ;; find Evan Williams' email ID and insert it into the database. - (string->identifier "investigator" - (string-join - (list first-name last-name (fix-email-id email)) - "_"))) - - - -(define-transformer genewiki-symbols - (tables (GeneRIF_BASIC) - "GROUP BY BINARY symbol") - (triples - (string->identifier - "symbol" - (regexp-substitute/global #f "[^A-Za-z0-9:]" - (field GeneRIF_BASIC symbol) - 'pre "_" 'post) - #:proc (lambda (x) x)) - (set rdfs:label - (field GeneRIF_BASIC symbol)))) - -;; Some symbols exist in the RIF table that don't exist in the GeneRIF -;; table. -(define-transformer generif-symbols - (tables (GeneRIF) - "WHERE symbol NOT IN (SELECT symbol from GeneRIF_BASIC) GROUP BY BINARY symbol") - (triples - (string->identifier - "symbol" - (regexp-substitute/global #f "[^A-Za-z0-9:]" - (field GeneRIF symbol) - 'pre "_" 'post) - #:proc (lambda (x) x)) - (set rdfs:label - (field GeneRIF symbol)))) - -(define-transformer gn-genewiki-entries - (tables (GeneRIF - (left-join Species "ON Species.SpeciesId = GeneRIF.SpeciesId") - (left-join GeneRIFXRef "ON GeneRIFXRef.GeneRIFId = GeneRIF.Id") - (left-join GeneCategory "ON GeneRIFXRef.GeneCategoryId = GeneCategory.Id") - (left-join Investigators "ON Investigators.Email = GeneRIF.email")) - "WHERE GeneRIF.display > 0 AND GeneRIF.VersionId = 0 AND GeneRIF.comment IS NOT NULL GROUP BY GeneRIF.comment, BINARY GeneRIF.symbol") - (schema-triples - (gnc:GeneWikiEntry a rdfs:Class) - (gnc:GNWikiEntry rdfs:subClassOf gnc:GeneWikiEntry) - (gnc:GNWikiEntry rdfs:comment "Represents GeneRIF Entries entered from GeneNetwork") - (gnt:geneSymbol rdfs:domain gnc:GNWikiEntry)) - (triples - (string->identifier - "symbol" - (regexp-substitute/global - #f "[^A-Za-z0-9:]" - (field GeneRIF symbol) - 'pre "_" 'post) - #:proc (lambda (x) x)) - (set rdfs:comment - (let* ([generif-comment (sanitize-rdf-string (field GeneRIF comment))] - [create-time (field GeneRIF createtime EntryCreateTime)] - [pmid (field GeneRIF PubMed_ID PMID)] - [web-url (field GeneRIF weburl)] - [species (string->identifier - "" - (remap-species-identifiers (field Species Fullname)) - #:separator "" - #:proc string-capitalize-first)] - [categories - (remove (lambda (x) - (or (eq? x #f) - (and (string? x) - (string-null? x)))) - (remove-duplicates - (string-split-substring - (field ("GROUP_CONCAT(DISTINCT GeneCategory.Name SEPARATOR '$$')" - GeneCategory)) - "$$")))]) - (string->symbol - (string-append - "[ " - (format #f "rdf:type gnc:GNWikiEntry ; ") - (if (string? species) - "" - (format #f "gnt:belongsToSpecies ~a ; " - species)) - (format #f "rdfs:comment ~s^^xsd:string ; " - generif-comment) - (if (string? create-time) - "" - (format #f "dct:created ~s^^xsd:datetime ; " - (time-unix->string - create-time "~5"))) - (if (and (string? pmid) (not (string-null? pmid))) - (format #f - "~{dct:references pubmed:~a ; ~}" - (string-split pmid #\space)) - "") - (if (and (not (string-null? - (string-trim-both (field GeneRIF email)))) - (not (string-null? (field Investigators Email)))) - (format #f "dct:creator ~a ; " - (investigator-attributes->id - (field Investigators FirstName) - (field Investigators LastName) - (field Investigators Email))) - "") - (if (not (null? categories)) - (format #f - "~{gnt:belongsToCategory ~s ; ~}" - categories) - "") - (if (and (string? web-url) (not (string-null? web-url))) - (format #f "foaf:homepage ~s ; " - web-url) - "") - " ] ")))))) - -(define-transformer ncbi-genewiki-entries - (tables (GeneRIF_BASIC - (left-join Species "USING (SpeciesId)")) - "WHERE GeneRIF_BASIC.comment IS NOT NULL AND TRIM(GeneRIF_BASIC.comment) != '' AND TRIM(GeneRIF_BASIC.symbol) != '' GROUP BY GeneRIF_BASIC.comment, GeneRIF_BASIC.createtime, GeneRIF_BASIC.VersionId, GeneRIF_BASIC.SpeciesId, GeneRIF_BASIC.TaxID") - (schema-triples - (gnc:NCBIWikiEntry rdfs:subClassOf gnc:GeneWikiEntry) - (gnc:NCBIWikiEntry rdfs:comment "Represents GeneRIF Entries obtained from NCBI") - (gnt:hasVersionId a owl:ObjectProperty) - (gnt:hasVersionId rdfs:domain gnc:NCBIWikiEntry) - (gnt:hasVersionId skos:definition "The VersionId of this this resource")) - (triples - (string->identifier - "symbol" - (regexp-substitute/global #f "[^A-Za-z0-9:]" - (field GeneRIF_BASIC symbol GeneRIFSymbol) - 'pre "_" 'post) - #:proc (lambda (x) x)) - (set rdfs:comment - (let ([ncbi-comment (sanitize-rdf-string (field GeneRIF_BASIC comment))] - [species-name - (string->identifier - "" - (remap-species-identifiers (field Species Fullname SpeciesFullName)) - #:separator "" - #:proc string-capitalize-first)] - [taxonomic-id (field GeneRIF_BASIC TaxID TaxonomicId)] - [create-time (field GeneRIF_BASIC createtime EntryCreateTime)] - [pmid (field GeneRIF_BASIC PubMed_ID PMID)] - [gene-id (field GeneRIF_BASIC GeneId)] - [version-id (field GeneRIF_BASIC VersionId)]) - (string->symbol - (string-append - "[ " - (format #f "rdf:type gnc:NCBIWikiEntry ; ") - (format #f "rdfs:comment ~s^^xsd:string ; " - ncbi-comment) - (format #f "gnt:belongsToSpecies ~a ; " - species-name) - (if (eq? #f taxonomic-id) - "" - (format #f "skos:notation taxon:~a ; " - taxonomic-id)) - (format #f "gnt:hasGeneId generif:~a ; " - gene-id) - (format #f "gnt:hasVersionId '~a'^^xsd:integer ; " - version-id) - (if (and (string? pmid) (not (string-null? pmid))) - (format #f - "~{dct:references pubmed:~a ; ~}" - (string-split pmid #\space)) - "") - (if (string? create-time) - "" - (format #f "dct:created ~s^^xsd:datetime ; " - (time-unix->string - create-time "~5"))) - " ]")))))) - - - -(let* ((option-spec - '((settings (single-char #\s) (value #t)) - (output (single-char #\o) (value #t)) - (documentation (single-char #\d) (value #t)))) - (options (getopt-long (command-line) option-spec)) - (settings (option-ref options 'settings #f)) - (output (option-ref options 'output #f)) - (documentation (option-ref options 'documentation #f)) - (%connection-settings - (call-with-input-file settings - read))) - - (with-documentation - (name "GeneRIF Metadata") - (connection %connection-settings) - (table-metadata? #f) - (prefixes - '(("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>") - ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>") - ("skos:" "<http://www.w3.org/2004/02/skos/core#>") - ("xkos:" "<http://rdf-vocabulary.ddialliance.org/xkos#>") - ("gn:" "<http://genenetwork.org/id/>") - ("gnc:" "<http://genenetwork.org/category/>") - ("gnt:" "<http://genenetwork.org/term/>") - ("dct:" "<http://purl.org/dc/terms/>") - ("foaf:" "<http://xmlns.com/foaf/0.1/>") - ("pubmed:" "<http://rdf.ncbi.nlm.nih.gov/pubmed/>") - ("taxon:" "<https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=>") - ("generif:" "<http://www.ncbi.nlm.nih.gov/gene?cmd=Retrieve&dopt=Graphics&list_uids=>") - ("xsd:" "<http://www.w3.org/2001/XMLSchema#>") - ("owl:" "<http://www.w3.org/2002/07/owl#>"))) - (inputs - (list - genewiki-symbols - generif-symbols - gn-genewiki-entries - ncbi-genewiki-entries)) - (outputs - `(#:documentation ,documentation - #:rdf ,output)))) diff --git a/examples/generif.scm b/examples/generif.scm index 628e34e..5fb95f7 100755 --- a/examples/generif.scm +++ b/examples/generif.scm @@ -23,16 +23,16 @@ "WHERE GeneRIF.display > 0 AND GeneRIF.comment IS NOT NULL GROUP BY GeneRIF.Id, GeneRIF.versionId, GeneRIF.symbol") (schema-triples - (gnc:GeneWikiEntry a rdfs:Class) - (gnc:GNWikiEntry rdfs:subClassOf gnc:GeneWikiEntry) + (gnc:gene_wiki_entry a rdfs:Class) + (gnc:GNWikiEntry rdfs:subClassOf gnc:gene_wiki_entry) (gnt:initial a owl:ObjectProperty) - (gnt:initial rdfs:domain gnc:GeneWikiEntry) + (gnt:initial rdfs:domain gnc:gene_wiki_entry) (gnt:initial skos:definition "Optional user or project code or your initials") (gnt:reason a owl:ObjectProperty) - (gnt:reason rdfs:domain gnc:GeneWikiEntry) + (gnt:reason rdfs:domain gnc:gene_wiki_entry) (gnt:reason skos:definition "The reason why this resource was modified") (gnc:GNWikiEntry rdfs:comment "Represents GeneRIF Entries entered from GeneNetwork") - (gnt:geneSymbol rdfs:domain gnc:GNWikiEntry)) + (gnt:gene_symbol rdfs:domain gnc:GNWikiEntry)) (triples (format #f "gn:wiki-~a-~a" @@ -46,7 +46,7 @@ GROUP BY GeneRIF.Id, GeneRIF.versionId, GeneRIF.symbol") '(("'" . "\\'")))))) (set rdf:type 'gnc:GNWikiEntry) (set gnt:symbol (field GeneRIF symbol)) - (set gnt:belongsToSpecies (string->identifier + (set gnt:belongs_to_species (string->identifier "" (remap-species-identifiers (field Species Fullname)) #:separator "" @@ -90,8 +90,8 @@ GROUP BY GeneRIF.Id, GeneRIF.versionId, GeneRIF.symbol") (tables (GeneRIF_BASIC (left-join Species "USING (SpeciesId)"))) (schema-triples - (gnc:NCBIWikiEntry rdfs:subClassOf gnc:GeneWikiEntry) - (gnc:NCBIWikiEntry rdfs:comment "Represents GeneRIF Entries obtained from NCBI")) + (gnc:ncbi_wiki_entry rdfs:subClassOf gnc:gene_wiki_entry) + (gnc:ncbi_wiki_entry rdfs:comment "Represents GeneRIF Entries obtained from NCBI")) (triples (format #f "gn:rif-~a-~a-~a-~a" @@ -124,11 +124,11 @@ GROUP BY GeneRIF.Id, GeneRIF.versionId, GeneRIF.symbol") (version-id (field GeneRIF_BASIC versionId))) (string->symbol (string-append - (format #f "gnc:NCBIWikiEntry ;\n") + (format #f "gnc:ncbi_wiki_entry ;\n") (format #f "\trdfs:label ~a ;\n" comment) - (format #f "\tgnt:belongsToSpecies ~a ;\n" species) + (format #f "\tgnt:belongs_to_species ~a ;\n" species) (format #f "\tgnt:symbol ~s ;\n" symbol) - (format #f "\tgnt:hasGeneId generif:~a ;\n" gene-id) + (format #f "\tgnt:has_gene_id generif:~a ;\n" gene-id) (match taxon-id ((? number? x) (format #f "\tskos:notation taxon:~a ;\n" taxon-id)) diff --git a/examples/genotype.scm b/examples/genotype.scm index 7e72cf8..257a3fa 100755 --- a/examples/genotype.scm +++ b/examples/genotype.scm @@ -21,30 +21,30 @@ (schema-triples (gnt:chr a owl:ObjectProperty) (gnt:chr skos:description "This resource is located on a given chromosome") - (gnt:chr rdfs:domain gnc:Genotype) + (gnt:chr rdfs:domain gnc:genotype) (gnt:mb a owl:ObjectProperty) (gnt:mb skos:definition "The size of this resource in Mb") - (gnt:mb rdfs:domain gnc:Genotype) - (gnt:mbMm8 a owl:ObjectProperty) - (gnt:mbMm8 skos:definition "TODO") - (gnt:mbMm8 rdfs:domain gnc:Genotype) + (gnt:mb rdfs:domain gnc:genotype) + (gnt:mb_mm8 a owl:ObjectProperty) + (gnt:mb_mm8 skos:definition "TODO") + (gnt:mb_mm8 rdfs:domain gnc:genotype) (gnt:mb2016 a owl:ObjectProperty) (gnt:mb2016 skos:definition "TODO") - (gnt:mb2016 rdfs:domain gnc:Genotype) - (gnt:hasSequence a owl:ObjectProperty) - (gnt:hasSequence skos:definition "This resource has a given sequence") - (gnt:hasSequence rdfs:domain gnc:Genotype) - (gnt:hasSource a owl:ObjectProperty) - (gnt:hasSource rdfs:domain gnc:Genotype) - (gnt:hasSource skos:definition "This resource was obtained from this given source") - (gnt:hasAltSourceName a owl:ObjectProperty) - (gnt:hasAltSourceName rdfs:domain gnc:Genotype) - (gnt:hasAltSourceName + (gnt:mb2016 rdfs:domain gnc:genotype) + (gnt:has_sequence a owl:ObjectProperty) + (gnt:has_sequence skos:definition "This resource has a given sequence") + (gnt:has_sequence rdfs:domain gnc:genotype) + (gnt:has_source a owl:ObjectProperty) + (gnt:has_source rdfs:domain gnc:genotype) + (gnt:has_source skos:definition "This resource was obtained from this given source") + (gnt:has_alt_source_name a owl:ObjectProperty) + (gnt:has_alt_source_name rdfs:domain gnc:genotype) + (gnt:has_alt_source_name skos:definition "The alternative name this resource was obtained from") - (gnt:chrNum a owl:ObjectProperty) - (gnt:chrNum rdfs:domain gnc:Genotype) - (gnt:chrNum skos:definition "The chromosome number for this resource")) + (gnt:chr_num a owl:ObjectProperty) + (gnt:chr_num rdfs:domain gnc:genotype) + (gnt:chr_num skos:definition "The chromosome number for this resource")) (triples (string->identifier "" @@ -52,30 +52,29 @@ #f "[^A-Za-z0-9:]" (field Geno Name) 'pre "_" 'post) - #:separator "" - #:proc string-capitalize-first) - (set rdf:type 'gnc:Genotype) + #:separator "_" + #:proc (lambda (x) x)) + (set rdf:type 'gnc:genotype) (set rdfs:label (sanitize-rdf-string (field Geno Name))) (set gnt:chr (field Geno Chr)) (set gnt:mb (annotate-field (field ("IFNULL(Geno.Mb, '')" Mb)) '^^xsd:double)) - (set gnt:mbMm8 (annotate-field (field ("IFNULL(Geno.Mb_mm8, '')" Mb_mm8)) + (set gnt:mb_mm8 (annotate-field (field ("IFNULL(Geno.Mb_mm8, '')" Mb_mm8)) '^^xsd:double)) (set gnt:mb2016 (annotate-field (field ("IFNULL(Geno.Mb_2016, '')" Mb_2016)) '^^xsd:double)) - (set gnt:hasSequence (field Geno Sequence)) - (set gnt:hasSource (field Geno Source)) + (set gnt:has_sequence (field Geno Sequence)) + (set gnt:has_source (field Geno Source)) ;; Only transform Source2 if it differs from Source - (set gnt:hasAltSourceName + (set gnt:has_alt_source_name (field ("IF((Source2 = Source), NULL, Source2)" Source2))) - (set gnt:belongsToSpecies - (string->identifier - "" (remap-species-identifiers (field Species Fullname)) - #:separator "" - #:proc string-capitalize-first)) - (set gnt:chrNum + (set gnt:belongs_to_species + (string->identifier "" (remap-species-identifiers (field Species Fullname)) + #:separator "_" + #:proc string-downcase)) + (set gnt:chr_num (annotate-field (field Geno chr_num) '^^xsd:int)) diff --git a/examples/phenotype.scm b/examples/phenotype.scm index aa1e9c5..1bec264 100755 --- a/examples/phenotype.scm +++ b/examples/phenotype.scm @@ -20,50 +20,52 @@ (left-join Publication "ON Publication.Id = PublishXRef.PublicationId") (left-join Phenotype "ON Phenotype.Id = PublishXRef.PhenotypeId"))) (schema-triples - (gnt:traitId a owl:ObjectProperty) - (gnt:traitId rdfs:domain gnc:Phenotype) - (gnt:traitId skos:definition "This is the unique trait id assigned from GeneNetwork") + (gnt:trait_id a owl:ObjectProperty) + (gnt:trait_id rdfs:domain gnc:phenotype) + (gnt:trait_id skos:definition "This is the unique trait id assigned from GeneNetwork") (gnt:abbreviation a owl:ObjectProperty) - (gnt:abbreviation rdfs:domain gnc:Phenotype) + (gnt:abbreviation rdfs:domain gnc:phenotype) (gnt:abbreviation skos:definition "The abbreviation used for this resource") (gnt:labCode a owl:ObjectProperty) - (gnt:labCode rdfs:domain gnc:Phenotype) + (gnt:labCode rdfs:domain gnc:phenotype) (gnt:submitter a owl:ObjectProperty) - (gnt:submitter rdfs:domain gnc:Phenotype) + (gnt:submitter rdfs:domain gnc:phenotype) (gnt:submitter skos:definition "A person who submitted this resource to GN") (gnt:mean a rdf:Property) (gnt:mean a qb:MeasureProperty) (gnt:mean rdfs:subPropertyOf sdmx-measure:obsValue) - (gnt:mean rdfs:domain gnc:Phenotype) + (gnt:mean rdfs:domain gnc:phenotype) (gnt:mean rdfs:range xsd:double) - (gnt:lodScore a rdf:Property) - (gnt:lodScore a qb:MeasureProperty) - (gnt:lodScore rdfs:subPropertyOf sdmx-measure:obsValue) - (gnt:lodScore rdfs:domain gnc:Phenotype) - (gnt:lodScore rdfs:range xsd:double) - (gnt:lodScore rdfs:label "Peak -logP") - (gnt:lodScore skos:definition "Statistical measurement assessing the likelihood of genetic linkage between traits or genetic markers.") + (gnt:lod_score a rdf:Property) + (gnt:lod_score a qb:MeasureProperty) + (gnt:lod_score rdfs:subPropertyOf sdmx-measure:obsValue) + (gnt:lod_score rdfs:domain gnc:phenotype) + (gnt:lod_score rdfs:range xsd:double) + (gnt:lod_score rdfs:label "Peak -logP") + (gnt:lod_score skos:definition "Statistical measurement assessing the likelihood of genetic linkage between traits or genetic markers.") (gnt:locus a rdf:Property) (gnt:locus a qb:MeasureProperty) (gnt:locus rdfs:subPropertyOf sdmx-measure:obsValue) - (gnt:locus rdfs:domain gnc:Phenotype) + (gnt:locus rdfs:domain gnc:phenotype) (gnt:locus rdfs:range rdfs:Literal) - (gnt:additive rdfs:domain gnc:Phenotype) + (gnt:additive rdfs:domain gnc:phenotype) (gnt:additive rdfs:range xsd:double) - (gnt:sequence rdfs:domain gnc:Phenotype) + (gnt:sequence rdfs:domain gnc:phenotype) (gnt:sequence rdfs:range xsd:integer)) (triples (string->identifier "trait" (field ("CONCAT(IFNULL(InbredSet.InbredSetCode, PublishXRef.InbredSetId), '_', PublishXRef.Id)" - Phenotype))) - (set rdf:type 'gnc:Phenotype) - (set gnt:belongsToGroup + Phenotype)) + #:separator "_" + #:proc (lambda (x) x)) + (set rdf:type 'gnc:phenotype) + (set gnt:belongs_to_group (string->identifier "set" (field InbredSet Name InbredSetName) - #:separator "" + #:separator "_" #:proc string-capitalize-first)) ;; This is the trait's name - (set gnt:traitId + (set gnt:trait_id (let ((trait-id (field PublishXRef Id))) (if (number? trait-id) (number->string trait-id) @@ -92,7 +94,7 @@ 'pre "_" 'post) #:separator "" #:proc string-capitalize-first)) - (set gnt:lodScore (annotate-field + (set gnt:lod_score (annotate-field (field ("IFNULL((PublishXRef.LRS/4.604), '')" lrs)) '^^xsd:double)) (set gnt:additive diff --git a/examples/strains.scm b/examples/strains.scm index 2e1e24f..ae45a93 100755 --- a/examples/strains.scm +++ b/examples/strains.scm @@ -69,8 +69,8 @@ At this point it is not very clear how Name, Name2, Symbol and Alias are used. (schema-triples (gnt:alias rdfs:domain gnc:strain) (gnt:alias a owl:ObjectProperty) - (gnt:geneSymbol rdfs:domain gnc:strain) - (gnt:geneSymbol a owl:ObjectProperty)) + (gnt:gene_symbol rdfs:domain gnc:strain) + (gnt:gene_symbol a owl:ObjectProperty)) (triples (string->identifier "" (regexp-substitute/global @@ -78,24 +78,24 @@ At this point it is not very clear how Name, Name2, Symbol and Alias are used. (field Strain Name) 'pre "_" 'post)) (set rdf:type 'gnc:strain) - (set gnt:belongsToSpecies + (set gnt:belongs_to_species (string->identifier "" (remap-species-identifiers (field Species Fullname)) - #:separator "" - #:proc string-capitalize-first)) + #:separator "_" + #:proc string-downcase)) ;; Name, and maybe a second name (set rdfs:label (sanitize-rdf-string (field Strain Name))) (set skos:altLabel (sanitize-rdf-string (field ("IF ((Strain.Name2 != Strain.Name), Strain.Name2, '')" Name2)))) (set gnt:alias (sanitize-rdf-string (field ("IF ((Strain.Alias != Strain.Name), Strain.Alias, '')" Alias)))) - (set gnt:geneSymbol (field Strain Symbol)))) + (set gnt:gene_symbol (field Strain Symbol)))) (define-transformer mapping-method (tables (MappingMethod)) (schema-triples - (gnc:mappingMethod a skos:Concept) - (gnc:mappingMethod skos:definition "Terms that decribe mapping methods used on this resource")) + (gnc:mapping_method a skos:Concept) + (gnc:mapping_method skos:definition "Terms that decribe mapping methods used on this resource")) (triples - (string->identifier "mappingMethod" (field MappingMethod Name)) - (set rdf:type 'gnc:mappingMethod) + (string->identifier "mapping_method" (field MappingMethod Name)) + (set rdf:type 'gnc:mapping_method) (set rdfs:label (field MappingMethod Name)))) (define-transformer avg-method @@ -103,10 +103,10 @@ At this point it is not very clear how Name, Name2, Symbol and Alias are used. ;; the Name field. (tables (AvgMethod)) (schema-triples - (gnc:avgMethod a skos:Concept) - (gnc:avgMethod skos:definition "Terms that decribe normalization methods used on this resource")) + (gnc:avg_method a skos:Concept) + (gnc:avg_method skos:definition "Terms that decribe normalization methods used on this resource")) (triples (string->identifier "avgMethod" (field AvgMethod Name AvgMethodName)) - (set rdf:type 'gnc:avgMethod) + (set rdf:type 'gnc:avg_method) (set rdfs:label (field AvgMethod Normalization)))) diff --git a/examples/tissue.scm b/examples/tissue.scm index 2659b66..6bd30ff 100755 --- a/examples/tissue.scm +++ b/examples/tissue.scm @@ -20,7 +20,8 @@ (gnc:tissue a skos:Concept)) ;; Hopefully the Short_Name field is distinct and can be used as an ;; identifier. - (triples (string->identifier "tissue" (field Tissue Short_Name)) + (triples (string->identifier "tissue" (field Tissue Short_Name) + #:separator "_") (set rdf:type 'gnc:tissue) (set rdfs:label (field Tissue Name)))) |
