about summary refs log tree commit diff
diff options
context:
space:
mode:
-rwxr-xr-xexamples/classification.scm99
-rwxr-xr-xexamples/datasets.scm50
-rwxr-xr-xexamples/generif.scm25
-rwxr-xr-xexamples/genotype-datasets.scm53
-rwxr-xr-xexamples/genotype.scm70
-rwxr-xr-xexamples/molecular-traits-datasets.scm25
-rwxr-xr-xexamples/ontology.scm324
-rwxr-xr-xexamples/phenotype-datasets.scm86
-rwxr-xr-xexamples/phenotype.scm167
-rwxr-xr-xexamples/probesets.scm133
-rwxr-xr-xexamples/publication.scm4
-rwxr-xr-xexamples/schema.scm17
-rwxr-xr-xload-rdf.scm2
-rw-r--r--manifest.scm3
-rw-r--r--transform/special-forms.scm341
-rw-r--r--transform/strings.scm17
16 files changed, 981 insertions, 435 deletions
diff --git a/examples/classification.scm b/examples/classification.scm
index 0a9631d..d44fe5d 100755
--- a/examples/classification.scm
+++ b/examples/classification.scm
@@ -17,30 +17,6 @@
 (define-transformer gnc:species->gn:species
   (tables (Species)
           "WHERE Name != 'monkey'")
-  (schema-triples
-   (gnc:resource_classification_scheme a skos:ConceptScheme)
-   (gnc:resource_classification_scheme skos:prefLabel "GeneNetwork Resource Classification Scheme")
-   (gnc:resource_classification_scheme skos:definition "A hierarchical classification scheme for organizing GeneNetwork resources by dataset type, resource set (inbredset group), or species.")
-   (gnc:resource_classification_scheme xkos:numberOfLevels "4")
-   (gnc:resource_classification_scheme xkos:levels gnc:taxonomic_family)
-   (gnc:resource_classification_scheme xkos:levels gnc:species)
-   (gnc:resource_classification_scheme xkos:levels gnc:population_category)
-   (gnc:resource_classification_scheme xkos:levels gnc:set)
-   (gnc:population_category a xkos:ClassificationLevel)
-   (gnc:population_category skos:inScheme gnc:resource_classification_scheme)
-   (gnc:population_category xkos:previousLevel gnc:species)
-   (gnc:population_category xkos:nextLevel gnc:set)
-   (gnc:population_category skos:prefLabel "Species")
-   (gnc:population_category rdfs:label "Population Category")
-   (gnc:population_category xkos:depth "3")
-   (gnt:population_category skos:definition "Classification of genetic populations by breeding design and data aggregation.")
-   (gnc:species a xkos:ClassificationLevel)
-   (gnc:species skos:inScheme gnc:resource_classification_scheme)
-   (gnc:species xkos:previousLevel gnc:taxonomic_family)
-   (gnc:species xkos:nextLevel gnc:population_category)
-   (gnc:species skos:prefLabel "Species")
-   (gnc:species skos:definition "A classification level that that associates a given resource to a species in GeneNetwork.")
-   (gnc:species xkos:depth "2"))
   (triples "gnc:species"
     (set skos:member
          (string->identifier "" (remap-species-identifiers (field Species Fullname))))))
@@ -48,13 +24,6 @@
 (define-transformer gnc:set->gn:set
   (tables (InbredSet)
           "WHERE public > 0 AND FullName NOT LIKE '%monkey%'")
-  (schema-triples
-   (gnc:set a xkos:ClassificationLevel)
-   (gnc:set skos:inScheme gnc:resource_classification_scheme)
-   (gnc:set xkos:previousLevel gnc:population_category)
-   (gnc:set skos:prefLabel "InbredSet Group")
-   (gnc:set skos:definition "A category representing groups of genetically related strains or individuals (inbred sets, recombinant inbred lines, etc.).")
-   (gnc:set xkos:depth "4"))
   (triples "gnc:set"
     (set skos:member
          (string->identifier
@@ -63,22 +32,6 @@
 (define-transformer gnc:species->metadata
   (tables (Species)
           "WHERE Name != 'monkey'")
-  (schema-triples
-   (gnt:has_uniprot_taxon_id a owl:ObjectProperty)
-   (gnt:has_uniprot_taxon_id rdfs:label "has uniprot taxonomic id")
-   (gnt:has_taxonomic_family a owl:ObjectProperty)
-   (gnt:has_taxonomic_family rdfs:label "has family")
-   (gnt:has_taxonomic_family skos:definition "Links a species to its taxonomic family")
-   (gnt:has_taxonomic_family schema:domainIncludes gnc:species)
-   (gnt:has_taxonomic_family schema:domainIncludes gnc:set)
-   (gnt:short_name a owl:DatatypeProperty)
-   (gnt:short_name rdfs:label "has short name")
-   (gnt:short_name rdfs:domain gnc:species)
-   (gnt:short_name skos:definition "The short name of a given resource")
-   (gnt:has_species a owl:ObjectProperty)
-   (gnt:has_species rdf:comment "This resource belongs to this species")
-   (gnt:has_species rdfs:label "belongs to species")
-   (gnt:has_species rdfs:range gnc:species))
   (triples
       (string->identifier "" (remap-species-identifiers (field Species Fullname)))
     (set rdf:type 'gnc:species)
@@ -95,12 +48,6 @@
   (tables (InbredSet
            (left-join Species "ON InbredSet.SpeciesId=Species.Id"))
           "WHERE public > 0 AND Species.Name != 'monkey'")
-  (schema-triples
-   (gnt:has_strain a owl:ObjectProperty)
-   (gnt:has_strain rdfs:range gnc:set)
-   (gnt:has_strain rdfs:domain gnc:species)
-   (gnt:has_strain rdfs:label "this resource belongs to this strain.")
-   (gnt:has_strain skos:definition "Lists all strains that belong to this resource."))
   (triples (string->identifier "" (remap-species-identifiers (field Species Fullname)))
     (set gnt:has_strain
          (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_"))))
@@ -108,15 +55,6 @@
 (define-transformer gn:family->gn:species/metadata
   (tables (Species)
           "WHERE Name != 'monkey' GROUP BY FAMILY")
-  (schema-triples
-   (gnc:taxonomic_family a xkos:ClassificationLevel)
-   (gnc:taxonomic_family skos:inScheme gnc:resource_classification_scheme)
-   (gnc:taxonomic_family skos:prefLabel "Family")
-   (gnc:taxonomic_family skos:definition "An organizational classification level used in GeneNetwork to group resources into families.")
-   (gnc:taxonomic_family xkos:depth "1")
-   (gnc:taxonomic_family xkos:nextLevel gnc:species)
-   (gnt:has_family_order_id a owl:DatatypeProperty)
-   (gnt:has_family_order_id rdfs:range xsd:integer))
   (triples (string->identifier "family" (field Species Family) #:separator "_")
     (set gnt:has_species
          (string->identifier "" (remap-species-identifiers (field Species Fullname))))
@@ -128,8 +66,6 @@
 (define-transformer gn:family->gn:species
   (tables (Species)
           "WHERE Name != 'monkey'")
-  (schema-triples
-   (gnt:has_family_order_id a owl:DatatypeProperty))
   (triples (string->identifier "family" (field Species Family) #:separator "_")
     (set gnt:has_species
          (string->identifier "" (remap-species-identifiers (field Species Fullname))))))
@@ -141,34 +77,12 @@
            (left-join MappingMethod
                       "ON InbredSet.MappingMethodId=MappingMethod.Id"))
           "WHERE public > 0 AND Species.Name != 'monkey'")
-  (schema-triples
-   (gnt:genetic_type a owl:DatatypeProperty)
-   (gnt:genetic_type rdfs:label "has genetic type")
-   (gnt:genetic_type skos:definition "Describes the genetic architecture of a resource set (e.g., intercross, riset).")
-   (gnt:genetic_type rdfs:domain gnc:set)
-   (gnt:genetic_type rdfs:range xsd:string)
-   (gnt:has_set_code a owl:DatatypeProperty)
-   (gnt:has_set_code rdfs:label "has set code")
-   (gnt:has_set_code skos:definition "Provides a unique identifier code for a resource set.")
-   (gnt:has_set_code rdfs:domain gnc:set)
-   (gnt:has_set_code rdfs:range xsd:string)
-   (gnt:uses_mapping_method a owl:ObjectProperty)
-   (gnt:uses_mapping_method rdfs:label "mapping method")
-   (gnt:uses_mapping_method rdfs:domain gnc:set)
-   (gnt:uses_mapping_method rdfs:range gnc:mapping_method)
-   (gnt:uses_mapping_method rdfs:comment "The method used to map genetic or experimental data for this resource.")
-   (gnt:has_strain a owl:ObjectProperty)
-   (gnt:has_strain rdf:comment "Indicates the group the resources belongs to")
-   (gnt:has_strain schema:domainIncludes dcat:Dataset)
-   (gnt:has_strain schema:domainIncludes gnc:species)
-   (gnt:has_strain rdfs:range gnc:set)
-   (gnt:has_strain rdfs:label "belongs-to-group"))
   (triples (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_")
     (set rdf:type 'gnc:set)
     (set rdfs:label (field InbredSet FullName))
     (set skos:prefLabel (field InbredSet Name InbredSetName))
     (set gnt:genetic_type (field InbredSet GeneticType))
-    (set dct:description (annotate-field (sanitize-rdf-string (field InbredSet description))
+    (set dct:description (annotate-field (sanitize-rdf-string (field InbredSet Description))
                                          '^^rdf:HTML))
     (set gnt:uses_mapping_method
          (string->identifier "mapping_method" (field MappingMethod Name) #:separator "_"))
@@ -191,11 +105,6 @@
 (define-transformer gn:population->metadata
   (tables (InbredSet)
           "WHERE Family IS NOT NULL AND FullName NOT LIKE '%monkey%' GROUP BY Family")
-  (schema-triples
-   (gnc:reference_population a skos:Concept)
-   (gnc:reference_population skos:inScheme gnc:population_category)
-   (gnc:reference_population skos:prefLabel "Reference population")
-   (gnc:reference_population skos:definition "A genetic population"))
   (triples (string->identifier "population" (field InbredSet Family) #:separator "_")
     (set rdf:type 'gnc:reference_population)
     (set rdfs:label (field InbredSet Family))
@@ -221,10 +130,6 @@
 (define-transformer gnc:taxonomic_family->gn:family
   (tables (Species)
           "WHERE Name != 'monkey' GROUP BY Family")
-  (schema-triples
-   (gnt:assigned_species rdfs:domain gnc:set)
-   (gnt:assigned_species a owl:ObjectProperty)
-   (gnt:assigned_species rdfs:label "These families have been assigned to these species"))
   (triples "gnc:taxonomic_family"
     (set gnt:has_taxonomic_family
          (string->identifier "family" (field Species Family) #:separator "_"))))
@@ -244,7 +149,7 @@
           read)))
 
   (with-documentation
-   (name "Species Metadata")
+   (name "GN Classification Hierarchy")
    (connection %connection-settings)
    (table-metadata? #f)
    (prefixes
diff --git a/examples/datasets.scm b/examples/datasets.scm
index 8abb84f..85a5aee 100755
--- a/examples/datasets.scm
+++ b/examples/datasets.scm
@@ -21,58 +21,11 @@
           ;; Skip monkey datasets
           "WHERE InfoFiles.InfoPageName NOT LIKE 'INIA_MacFas_%'"
           "GROUP BY Datasets.DatasetId")
-  (schema-triples
-    (gnt:has_case_info a owl:ObjectProperty)
-    (gnt:has_case_info rdfs:comment "Information about the cases used in this platform")
-    (gnt:has_case_info rdfs:domain dcat:Dataset)
-    (gnt:has_case_info rdfs:label "About Case")
-    (gnt:has_citation a owl:ObjectProperty)
-    (gnt:has_citation rdfs:comment "Citation for this dataset")
-    (gnt:has_citation rdfs:domain dcat:Dataset)
-    (gnt:has_citation rdfs:label "Citation")
-    (gnt:has_contributors a owl:ObjectProperty)
-    (gnt:has_contributors rdfs:comment "Contributors of this resource")
-    (gnt:has_contributors rdfs:comment "Contributors of this resource")
-    (gnt:has_contributors rdfs:domain dcat:Dataset)
-    (gnt:has_contributors rdfs:label "Contributors")
-    (gnt:has_data_processing_info a owl:ObjectProperty)
-    (gnt:has_data_processing_info rdfs:comment "Information about how this dataset was processed")
-    (gnt:has_data_processing_info rdfs:domain dcat:Dataset)
-    (gnt:has_data_processing_info rdfs:label "About Data Processing")
-    (gnt:has_experiment_design a owl:ObjectProperty)
-    (gnt:has_experiment_design rdfs:comment "Experiment Design for this resource")
-    (gnt:has_experiment_design rdfs:domain dcat:Dataset)
-    (gnt:has_experiment_design rdfs:label "Experiment Design")
-    (gnt:has_experiment_design_info a owl:ObjectProperty)
-    (gnt:has_experiment_design_info rdfs:comment "Information about how the experiment was designed")
-    (gnt:has_experiment_design_info rdfs:domain dcat:Dataset)
-    (gnt:has_experiment_design_info rdfs:label "Experiment Design")
-    (gnt:has_experiment_type a owl:ObjectProperty)
-    (gnt:has_experiment_type rdfs:comment "Information about the experiment type")
-    (gnt:has_experiment_type rdfs:domain dcat:Dataset)
-    (gnt:has_experiment_type rdfs:label "Experiment Type Metadata")
-    (gnt:has_platform_info a owl:ObjectProperty)
-    (gnt:has_platform_info rdfs:comment "Information about the platform that was used with this dataset")
-    (gnt:has_platform_info rdfs:domain dcat:Dataset)
-    (gnt:has_platform_info rdfs:label "About Platform")
-    (gnt:has_samples a owl:ObjectProperty)
-    (gnt:has_samples rdfs:domain dcat:Dataset)
-    (gnt:has_samples rdfs:label "Samples")
-    (gnt:has_specifics a owl:ObjectProperty)
-    (gnt:has_specifics rdfs:comment "Has specifics")
-    (gnt:has_specifics rdfs:domain dcat:Dataset)
-    (gnt:has_specifics rdfs:label "Specifics")
-    (gnt:has_summary a owl:ObjectProperty)
-    (gnt:has_summary rdfs:comment "Summary information about dataset")
-    (gnt:has_summary rdfs:domain dcat:Dataset)
-    (gnt:has_summary rdfs:label "Summary")
-    (gnt:has_tissue_info a owl:ObjectProperty)
-    (gnt:has_tissue_info rdfs:domain dcat:Dataset)
-    (gnt:has_tissue_info rdfs:label "Metadata about Tissue for this resource"))
   (triples (string->identifier "dataset" (field InfoFiles InfoPageName) #:separator "_")
     (set rdf:type 'dcat:Dataset)
     (set dct:title (normalize-string-field (field InfoFiles InfoPageName)))
     (set dct:identifier (format #f "GN~a" (field InfoFiles GN_AccesionId)))
+    (set gnt:has_genotype_files (string->symbol (format #f "gn-files:GN~a%2F" (field InfoFiles GN_AccesionId))))
     (set gnt:has_strain
          (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_"))
     (set gnt:has_experiment_type
@@ -153,6 +106,7 @@
       ("gn:" "<http://rdf.genenetwork.org/v1/id/>")
       ("gnc:" "<http://rdf.genenetwork.org/v1/category/>")
       ("gnt:" "<http://rdf.genenetwork.org/v1/term/>")
+      ("gn-files:" "<http://files.genenetwork.org/current/>")
       ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>")
       ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>")
       ("owl:" "<http://www.w3.org/2002/07/owl#>")
diff --git a/examples/generif.scm b/examples/generif.scm
index 3b794fa..a4a2e4b 100755
--- a/examples/generif.scm
+++ b/examples/generif.scm
@@ -22,17 +22,6 @@
            (left-join GeneCategory "ON GeneRIFXRef.GeneCategoryId = GeneCategory.Id"))
           "WHERE GeneRIF.display > 0 AND GeneRIF.comment IS NOT NULL
 GROUP BY GeneRIF.Id, GeneRIF.versionId, GeneRIF.symbol")
-  (schema-triples
-   (gnc:gene_wiki_entry a rdfs:Class)
-   (gnc:gn_wiki_entry rdfs:subClassOf gnc:gene_wiki_entry)
-   (gnt:initial a owl:ObjectProperty)
-   (gnt:initial rdfs:domain gnc:gene_wiki_entry)
-   (gnt:initial skos:definition "Optional user or project code or your initials")
-   (gnt:reason a owl:ObjectProperty)
-   (gnt:reason rdfs:domain gnc:gene_wiki_entry)
-   (gnt:reason skos:definition "The reason why this resource was modified")
-   (gnc:gn_wiki_entry rdfs:comment "Represents GeneRIF Entries entered from GeneNetwork")
-   (gnt:gene_symbol rdfs:domain gnc:gn_wiki_entry))
   (triples
       (string->identifier
        "wiki" (format #f "~a_~a"
@@ -61,11 +50,12 @@ GROUP BY GeneRIF.Id, GeneRIF.versionId, GeneRIF.symbol")
                            (format #f "pubmed:~a" (string-trim-both pmid))))))
                    (string-split (field GeneRIF PubMed_ID PMID)
                                  #\space)))
-    (set foaf:mbox
-         (match (sanitize-rdf-string (field GeneRIF email))
-           ((? string-blank? mbox) "")
-           (mbox (string->symbol
-                  (format #f "<~a>" mbox)))))
+    ;; Hide e-mail for now.
+    ;; (set foaf:mbox
+    ;;      (match (sanitize-rdf-string (field GeneRIF email))
+    ;;        ((? string-blank? mbox) "")
+    ;;        (mbox (string->symbol
+    ;;               (format #f "<~a>" mbox)))))
     (set dct:identifier (annotate-field (format #f "~s" (field GeneRIF Id))
                                         '^^xsd:integer))
     (set foaf:homepage
@@ -86,9 +76,6 @@ GROUP BY GeneRIF.Id, GeneRIF.versionId, GeneRIF.symbol")
 (define-transformer ncbi-genewiki-entries
   (tables (GeneRIF_BASIC
            (left-join Species "USING (SpeciesId)")))
-  (schema-triples
-   (gnc:ncbi_wiki_entry rdfs:subClassOf gnc:gene_wiki_entry)
-   (gnc:ncbi_wiki_entry rdfs:comment "Represents GeneRIF Entries obtained from NCBI"))
   (triples
       (string->identifier
        "rif" (format #f "~a_~a_~a_~a"
diff --git a/examples/genotype-datasets.scm b/examples/genotype-datasets.scm
index f140600..ebe2349 100755
--- a/examples/genotype-datasets.scm
+++ b/examples/genotype-datasets.scm
@@ -15,29 +15,39 @@
 
 
 (define-transformer gn:set->gn:dataset
-  (tables (Datasets
-           (inner-join InfoFiles "ON InfoFiles.DatasetId = Datasets.DatasetId")
-           (inner-join InbredSet "ON InbredSet.Id = InfoFiles.InbredSetId")
+  (tables (Species
+           (inner-join InbredSet "ON InbredSet.SpeciesId = Species.Id")
            (inner-join GenoFreeze "ON GenoFreeze.InbredSetId = InbredSet.Id"))
-          "WHERE GenoFreeze.public > 0 GROUP BY Datasets.DatasetId")
-  (schema-triples
-   (gnt:has_genotype_data rdf:type owl:ObjectProperty)
-   (gnt:has_genotype_data rdfs:label "this resources has genotype data.")
-   (gnt:has_genotype_data rdfs:comment "Associates a resource with its genotype data.")
-   (gnt:has_genotype_data rdfs:domain gnc:set)
-   (gnt:has_genotype_data rdfs:range dcat:Dataset)
-   (gnt:has_genotype_data rdfs:subPropertyOf dct:relation))
+          "WHERE GenoFreeze.public > 0 AND Species.Name != 'monkey' GROUP BY Species.Name, GenoFreeze.ShortName")
   (triples (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_")
-    (set gnt:has_genotype_data (string->identifier "dataset" (field GenoFreeze Name) #:separator "_"))))
+    (multiset gnt:has_genotype_data
+              (map (cut string->identifier "dataset" <> #:separator "_")
+                   (string-split
+                    (field ("GROUP_CONCAT(GenoFreeze.Name SEPARATOR ',')"
+                            dataset_name))
+                    #\,)))))
 
-(define-transformer gn:dataset->gn:set
-  (tables (Datasets
-           (inner-join InfoFiles "ON InfoFiles.DatasetId = Datasets.DatasetId")
-           (inner-join InbredSet "ON InbredSet.Id = InfoFiles.InbredSetId")
-           (inner-join GenoFreeze "ON GenoFreeze.InbredSetId = InbredSet.Id"))
-          "WHERE GenoFreeze.public > 0 GROUP BY Datasets.DatasetId")
+(define-transformer gn:dataset->metadata
+  (tables (GenoFreeze
+           (inner-join InbredSet "ON InbredSet.Id = GenoFreeze.InbredSetId")
+           (inner-join Species "ON InbredSet.SpeciesId = Species.Id"))
+          "WHERE GenoFreeze.public > 0 AND Species.Name != 'monkey'")
+  (triples (string->identifier "dataset" (field GenoFreeze Name) #:separator "_")
+    (set gnt:has_strain (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_"))
+    (set dct:created (annotate-field (field GenoFreeze CreateTime) '^^xsd:datetime))))
+
+(define-transformer gn:dataset->marker/snp-count
+  (tables (GenoFreeze
+           (inner-join InbredSet "ON InbredSet.Id = GenoFreeze.InbredSetId")
+           (inner-join Species "ON InbredSet.SpeciesId = Species.Id")
+           (inner-join Geno "ON Geno.SpeciesId = Species.Id"))
+          "WHERE GenoFreeze.public > 0 AND Species.Name != 'monkey' GROUP BY GenoFreeze.Name")
   (triples (string->identifier "dataset" (field GenoFreeze Name) #:separator "_")
-    (set gnt:has_strain (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_"))))
+    (set gnt:has_marker_count
+         (string->symbol
+          (format #f "'~s'^^xsd:integer"
+                  (field
+                   ("COUNT(DISTINCT Geno.Marker_Name)" MarkerCount)))))))
 
 
 (let* ((option-spec
@@ -68,7 +78,10 @@
       ("xkos:" "<http://rdf-vocabulary.ddialliance.org/xkos#>")
       ("xsd:" "<http://www.w3.org/2001/XMLSchema#>")))
    (inputs
-    (list gn:set->gn:dataset gn:dataset->gn:set))
+    (list
+     gn:set->gn:dataset
+     gn:dataset->metadata
+     gn:dataset->marker/snp-count))
    (outputs
     `(#:documentation ,documentation
       #:rdf ,output))))
diff --git a/examples/genotype.scm b/examples/genotype.scm
new file mode 100755
index 0000000..f2ba75f
--- /dev/null
+++ b/examples/genotype.scm
@@ -0,0 +1,70 @@
+#! /usr/bin/env guile
+!#
+
+(use-modules (rnrs programs)
+             (rnrs io ports)
+             (srfi srfi-1)
+             (srfi srfi-26)
+             (ice-9 getopt-long)
+             (ice-9 match)
+             (ice-9 regex)
+             (transform strings)
+             (transform sql)
+             (transform triples)
+             (transform special-forms))
+
+(define-transformer gn:markers/snps->metadata
+  (tables (Geno
+           (inner-join Species "ON Geno.SpeciesId = Species.Id"))
+          "WHERE Species.Name != 'monkey'")
+  (triples (string->identifier "marker" (field Geno Name) #:separator "_")
+    (set gnt:has_species
+         (string->identifier "" (remap-species-identifiers (field Species Fullname))))
+    (set rdf:type 'gnc:dna_marker)
+    (set skos:prefLabel (field Geno Name))
+    (set skos:altLabel (field Geno Marker_Name))
+    (set gnt:chr (field Geno Chr))
+    (set gnt:mb (annotate-field (field Geno Mb) '^^xsd:double))
+    (set gnt:sequence (field Geno Sequence))
+    (set gnt:source (field Geno Source))
+    (set rdfs:comment (field Geno Comments))))
+
+
+
+(let* ((option-spec
+        '((settings (single-char #\s) (value #t))
+          (output (single-char #\o) (value #t))
+          (documentation (single-char #\d) (value #t))))
+       (options (getopt-long (command-line) option-spec))
+       (settings (option-ref options 'settings #f))
+       (output (option-ref options 'output #f))
+       (documentation (option-ref options 'documentation #f))
+       (%connection-settings
+        (call-with-input-file settings
+          read)))
+  (with-documentation
+   (name "Phenotypes Metadata")
+   (connection %connection-settings)
+   (table-metadata? #f)
+   (prefixes
+    '(("dcat:" "<http://www.w3.org/ns/dcat#>")
+      ("dct:" "<http://purl.org/dc/terms/>")
+      ("gn:" "<http://rdf.genenetwork.org/v1/id/>")
+      ("owl:" "<http://www.w3.org/2002/07/owl#>")
+      ("gnc:" "<http://rdf.genenetwork.org/v1/category/>")
+      ("gnd:" "<https://cd.genenetwork.org/lmdb/v1/data/traits/>")
+      ("gnt:" "<http://rdf.genenetwork.org/v1/term/>")
+      ("sdmx-measure:" "<http://purl.org/linked-data/sdmx/2009/measure#>")
+      ("skos:" "<http://www.w3.org/2004/02/skos/core#>")
+      ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>")
+      ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>")
+      ("xsd:" "<http://www.w3.org/2001/XMLSchema#>")
+      ("qb:" "<http://purl.org/linked-data/cube#>")
+      ("xkos:" "<http://rdf-vocabulary.ddialliance.org/xkos#>")
+      ("pubmed:" "<http://rdf.ncbi.nlm.nih.gov/pubmed/>")))
+   (inputs
+    (list gn:markers/snps->metadata))
+   (outputs
+    `(#:documentation ,documentation
+      #:rdf ,output))))
+
diff --git a/examples/molecular-traits-datasets.scm b/examples/molecular-traits-datasets.scm
index 77bba08..34ddf3a 100755
--- a/examples/molecular-traits-datasets.scm
+++ b/examples/molecular-traits-datasets.scm
@@ -14,11 +14,6 @@
 
 (define-transformer gn:molecular-trait->gn:dataset
   (tables (Tissue))
-  (schema-triples
-   (gnc:molecular_trait a owl:Class)
-   (gnc:molecular_trait a skos:Concept)
-   (gnc:molecular_trait rdfs:subClassOf obo:UBERON_0000479)
-   (gnc:molecular_trait rdfs:label "Molecular Trait.   This describes a melecular trait of a given species.  We combine the species name and the tissue name in order to differentiate the traits across different inbredset groups."))
   (triples (string->identifier "trait" (field Tissue Short_Name) #:separator "_")
     (set rdf:type 'gnc:molecular_trait)
     (set skos:prefLabel (field Tissue Name))
@@ -36,13 +31,6 @@
            (inner-join ProbeSetFreeze "ON ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id")
            (inner-join Tissue "ON ProbeFreeze.TissueId = Tissue.Id"))
           "WHERE ProbeSetFreeze.public > 0 AND Species.Name != 'monkey' GROUP BY Species.Name, Tissue.Short_Name")
-  (schema-triples
-   (gnt:has_probeset_data rdf:type owl:ObjectProperty)
-   (gnt:has_probeset_data rdfs:label "this resources has this probeset data.")
-   (gnt:has_probeset_data rdfs:comment "Associates a resource with this probeset data.")
-   (gnt:has_probeset_data rdfs:domain gnc:set)
-   (gnt:has_probeset_data rdfs:range gnc:molecular_trait)
-   (gnt:has_probeset_data rdfs:subPropertyOf dct:relation))
   (triples (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_")
     (multiset gnt:has_probeset_data
               (map (cut string->identifier "dataset" <> #:separator "_")
@@ -62,20 +50,9 @@
            (inner-join Datasets "ON InfoFiles.DatasetId = Datasets.DatasetId")
            (left-join GeneChip "ON GeneChip.Id =  InfoFiles.GeneChipId"))
           "WHERE ProbeSetFreeze.public > 0 AND Species.Name != 'monkey'")
-  (schema-triples
-   (gnt:has_molecular_trait rdf:type owl:ObjectProperty)
-   (gnt:has_molecular_trait rdfs:domain gnc:set)
-   (gnt:has_molecular_trait rdfs:range gnc:molecular_trait)
-   (gnt:has_molecular_trait rdfs:label "has molecular trait")
-   (gnt:uses_genechip a owl:ObjectProperty)
-   (gnt:uses_genechip rdfs:domain dcat:Dataset)
-   (gnt:uses_genechip skos:definition "The Platform this resource uses..")
-   (gnt:uses_normalization_method rdfs:comment "The normalization method used for the molecular traits in this dataset")
-   (gnt:uses_normalization_method rdfs:domain dcat:Dataset)
-   (gnt:uses_normalization_method rdfs:label "Averaging method used for the molecular traits in this dataset.")
-   (gnt:uses_normalization_method rdfs:range gnc:avg_method))
   (triples (string->identifier "dataset" (field ProbeSetFreeze Name) #:separator "_")
     (set dct:created (annotate-field (field ProbeSetFreeze CreateTime) '^^xsd:datetime))
+    (set gnt:has_strain (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_"))
     (set gnt:uses_normalization_method
          (string->identifier "avg_method" (field AvgMethod Name AvgMethodName) #:separator "_"))
     (set gnt:has_molecular_trait
diff --git a/examples/ontology.scm b/examples/ontology.scm
new file mode 100755
index 0000000..f2b54cc
--- /dev/null
+++ b/examples/ontology.scm
@@ -0,0 +1,324 @@
+#! /usr/bin/env guile
+!#
+
+(use-modules (ice-9 getopt-long)
+             (transform triples)
+             (transform schema)
+             (transform special-forms))
+
+(let* ((option-spec
+        '((settings (single-char #\s) (value #t))
+          (output (single-char #\o) (value #t))
+          (documentation (single-char #\d) (value #t))))
+       (options (getopt-long (command-line) option-spec))
+       (settings (option-ref options 'settings #f))
+       (output (option-ref options 'output #f))
+       (documentation (option-ref options 'documentation #f))
+       (%connection-settings (call-with-input-file settings read)))
+  (with-output-to-file output
+    (lambda ()
+      ;; Define all GN ontology in one place.
+      (prefix "dcat:" "<http://www.w3.org/ns/dcat#>")
+      (prefix "dct:" "<http://purl.org/dc/terms/>")
+      (prefix "gn:" "<http://rdf.genenetwork.org/v1/id/>")
+      (prefix "owl:" "<http://www.w3.org/2002/07/owl#>")
+      (prefix "gnc:" "<http://rdf.genenetwork.org/v1/category/>")
+      (prefix "gnt:" "<http://rdf.genenetwork.org/v1/term/>")
+      (prefix "obo:" "<http://purl.obolibrary.org/obo/>")
+      (prefix "sdmx-measure:" "<http://purl.org/linked-data/sdmx/2009/measure#>")
+      (prefix "skos:" "<http://www.w3.org/2004/02/skos/core#>")
+      (prefix "rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>")
+      (prefix "rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>")
+      (prefix "xsd:" "<http://www.w3.org/2001/XMLSchema#>")
+      (prefix "qb:" "<http://purl.org/linked-data/cube#>")
+      (prefix "xkos:" "<http://rdf-vocabulary.ddialliance.org/xkos#>")
+      (prefix "pubmed:" "<http://rdf.ncbi.nlm.nih.gov/pubmed/>")
+      (prefix "schema:" "<https://schema.org/>")
+      (newline)
+      (triple 'gnc:population_category 'a 'xkos:ClassificationLevel)
+      (triple 'gnc:population_category 'rdfs:label "Population Category")
+      (triple 'gnc:population_category 'skos:inScheme 'gnc:resource_classification_scheme)
+      (triple 'gnc:population_category 'skos:prefLabel "Population Category")
+      (triple 'gnc:population_category 'xkos:depth "3")
+      (triple 'gnc:population_category 'xkos:nextLevel 'gnc:set)
+      (triple 'gnc:population_category 'xkos:previousLevel 'gnc:species)
+      (triple 'gnc:reference_population 'a 'skos:Concept)
+      (triple 'gnc:reference_population 'skos:definition "A genetic population")
+      (triple 'gnc:reference_population 'skos:inScheme 'gnc:population_category)
+      (triple 'gnc:reference_population 'skos:prefLabel "Reference population")
+      (triple 'gnc:resource_classification_scheme 'a 'skos:ConceptScheme)
+      (triple 'gnc:resource_classification_scheme 'skos:definition "A hierarchical classification scheme for organizing GeneNetwork resources by dataset type, resource set (inbredset group), or species.")
+      (triple 'gnc:resource_classification_scheme 'skos:prefLabel "GeneNetwork Resource Classification Scheme")
+      (triple 'gnc:resource_classification_scheme 'xkos:levels 'gnc:population_category)
+      (triple 'gnc:resource_classification_scheme 'xkos:levels 'gnc:set)
+      (triple 'gnc:resource_classification_scheme 'xkos:levels 'gnc:species)
+      (triple 'gnc:resource_classification_scheme 'xkos:levels 'gnc:taxonomic_family)
+      (triple 'gnc:resource_classification_scheme 'xkos:numberOfLevels "4")
+      (triple 'gnc:set 'a 'xkos:ClassificationLevel)
+      (triple 'gnc:set 'skos:definition "A category representing groups of genetically related strains or individuals (inbred sets, recombinant inbred lines, etc.).")
+      (triple 'gnc:set 'skos:inScheme 'gnc:resource_classification_scheme)
+      (triple 'gnc:set 'skos:prefLabel "InbredSet Group")
+      (triple 'gnc:set 'xkos:depth "4")
+      (triple 'gnc:set 'xkos:previousLevel 'gnc:population_category)
+      (triple 'gnc:species 'a 'xkos:ClassificationLevel)
+      (triple 'gnc:species 'skos:definition "A classification level that that associates a given resource to a species in GeneNetwork.")
+      (triple 'gnc:species 'skos:inScheme 'gnc:resource_classification_scheme)
+      (triple 'gnc:species 'skos:prefLabel "Species")
+      (triple 'gnc:species 'xkos:depth "2")
+      (triple 'gnc:species 'xkos:nextLevel 'gnc:population_category)
+      (triple 'gnc:species 'xkos:previousLevel 'gnc:taxonomic_family)
+      (triple 'gnc:taxonomic_family 'a 'xkos:ClassificationLevel)
+      (triple 'gnc:taxonomic_family 'skos:definition "An organizational classification level used in GeneNetwork to group resources into families.")
+      (triple 'gnc:taxonomic_family 'skos:inScheme 'gnc:resource_classification_scheme)
+      (triple 'gnc:taxonomic_family 'skos:prefLabel "Family")
+      (triple 'gnc:taxonomic_family 'xkos:depth "1")
+      (triple 'gnc:taxonomic_family 'xkos:nextLevel 'gnc:species)
+      (triple 'gnt:assigned_species 'a 'owl:ObjectProperty)
+      (triple 'gnt:assigned_species 'rdfs:domain 'gnc:set)
+      (triple 'gnt:assigned_species 'rdfs:label "These families have been assigned to these species")
+      (triple 'gnt:genetic_type 'a 'owl:DatatypeProperty)
+      (triple 'gnt:genetic_type 'rdfs:domain 'gnc:set)
+      (triple 'gnt:genetic_type 'rdfs:label "has genetic type")
+      (triple 'gnt:genetic_type 'rdfs:range 'xsd:string)
+      (triple 'gnt:genetic_type 'skos:definition "Describes the genetic architecture of a resource set (e.g., intercross, riset).")
+      (triple 'gnt:has_family_order_id 'a 'owl:DatatypeProperty)
+      (triple 'gnt:has_family_order_id 'a 'owl:DatatypeProperty)
+      (triple 'gnt:has_family_order_id 'rdfs:range 'xsd:integer)
+      (triple 'gnt:has_set_code 'a 'owl:DatatypeProperty)
+      (triple 'gnt:has_set_code 'rdfs:domain 'gnc:set)
+      (triple 'gnt:has_set_code 'rdfs:label "has set code")
+      (triple 'gnt:has_set_code 'rdfs:range 'xsd:string)
+      (triple 'gnt:has_set_code 'skos:definition "Provides a unique identifier code for a resource set.")
+      (triple 'gnt:has_species 'a 'owl:ObjectProperty)
+      (triple 'gnt:has_species 'rdf:comment "This resource belongs to this species")
+      (triple 'gnt:has_species 'rdfs:label "belongs to species")
+      (triple 'gnt:has_species 'rdfs:range 'gnc:species)
+      (triple 'gnt:has_strain 'a 'owl:ObjectProperty)
+      (triple 'gnt:has_strain 'rdf:comment "Indicates the group the resources belongs to")
+      (triple 'gnt:has_strain 'rdfs:domain 'gnc:species)
+      (triple 'gnt:has_strain 'rdfs:label "this resource belongs to this strain.")
+      (triple 'gnt:has_strain 'rdfs:range 'gnc:set)
+      (triple 'gnt:has_strain 'schema:domainIncludes 'dcat:Dataset)
+      (triple 'gnt:has_strain 'schema:domainIncludes 'gnc:species)
+      (triple 'gnt:has_strain 'skos:definition "Lists all strains that belong to this resource.")
+      (triple 'gnt:has_taxonomic_family 'a 'owl:ObjectProperty)
+      (triple 'gnt:has_taxonomic_family 'rdfs:label "has family")
+      (triple 'gnt:has_taxonomic_family 'schema:domainIncludes 'gnc:set)
+      (triple 'gnt:has_taxonomic_family 'schema:domainIncludes 'gnc:species)
+      (triple 'gnt:has_taxonomic_family 'skos:definition "Links a species to its taxonomic family")
+      (triple 'gnt:has_uniprot_taxon_id 'a 'owl:ObjectProperty)
+      (triple 'gnt:has_uniprot_taxon_id 'rdfs:label "has uniprot taxonomic id")
+      (triple 'gnt:population_category 'skos:definition "Classification of genetic populations by breeding design and data aggregation.")
+      (triple 'gnt:short_name 'a 'owl:DatatypeProperty)
+      (triple 'gnt:short_name 'rdfs:domain 'gnc:species)
+      (triple 'gnt:short_name 'rdfs:label "has short name")
+      (triple 'gnt:short_name 'skos:definition "The short name of a given resource")
+      (triple 'gnt:uses_mapping_method 'a 'owl:ObjectProperty)
+      (triple 'gnt:uses_mapping_method 'rdfs:comment "The method used to map genetic or experimental data for this resource.")
+      (triple 'gnt:uses_mapping_method 'rdfs:domain 'gnc:set)
+      (triple 'gnt:uses_mapping_method 'rdfs:label "mapping method")
+      (triple 'gnt:uses_mapping_method 'rdfs:range 'gnc:mapping_method)
+
+      ;; Describing Datasets
+      (triple 'gnc:molecular_trait 'a 'owl:Class)
+      (triple 'gnc:molecular_trait 'a 'skos:Concept)
+      (triple 'gnc:molecular_trait 'rdfs:label "Molecular Trait.   This describes a melecular trait of a given species.  We combine the species name and the tissue name in order to differentiate the traits across different inbredset groups.")
+      (triple 'gnc:molecular_trait 'rdfs:subClassOf 'obo:UBERON_0000479)
+      (triple 'gnt:has_case_info 'a 'owl:ObjectProperty)
+      (triple 'gnt:has_case_info 'rdfs:comment "Information about the cases used in this platform")
+      (triple 'gnt:has_case_info 'rdfs:domain 'dcat:Dataset)
+      (triple 'gnt:has_case_info 'rdfs:label "About Case")
+      (triple 'gnt:has_citation 'a 'owl:ObjectProperty)
+      (triple 'gnt:has_citation 'rdfs:comment "Citation for this dataset")
+      (triple 'gnt:has_citation 'rdfs:domain 'dcat:Dataset)
+      (triple 'gnt:has_citation 'rdfs:label "Citation")
+      (triple 'gnt:has_contributors 'a 'owl:ObjectProperty)
+      (triple 'gnt:has_contributors 'rdfs:comment "Contributors of this resource")
+      (triple 'gnt:has_contributors 'rdfs:comment "Contributors of this resource")
+      (triple 'gnt:has_contributors 'rdfs:domain 'dcat:Dataset)
+      (triple 'gnt:has_contributors 'rdfs:label "Contributors")
+      (triple 'gnt:has_data_processing_info 'a 'owl:ObjectProperty)
+      (triple 'gnt:has_data_processing_info 'rdfs:comment "Information about how this dataset was processed")
+      (triple 'gnt:has_data_processing_info 'rdfs:domain 'dcat:Dataset)
+      (triple 'gnt:has_data_processing_info 'rdfs:label "About Data Processing")
+      (triple 'gnt:has_experiment_design 'a 'owl:ObjectProperty)
+      (triple 'gnt:has_experiment_design 'rdfs:comment "Experiment Design for this resource")
+      (triple 'gnt:has_experiment_design 'rdfs:domain 'dcat:Dataset)
+      (triple 'gnt:has_experiment_design 'rdfs:label "Experiment Design")
+      (triple 'gnt:has_experiment_design_info 'a 'owl:ObjectProperty)
+      (triple 'gnt:has_experiment_design_info 'rdfs:comment "Information about how the experiment was designed")
+      (triple 'gnt:has_experiment_design_info 'rdfs:domain 'dcat:Dataset)
+      (triple 'gnt:has_experiment_design_info 'rdfs:label "Experiment Design")
+      (triple 'gnt:has_experiment_type 'a 'owl:ObjectProperty)
+      (triple 'gnt:has_experiment_type 'rdfs:comment "Information about the experiment type")
+      (triple 'gnt:has_experiment_type 'rdfs:domain 'dcat:Dataset)
+      (triple 'gnt:has_experiment_type 'rdfs:label "Experiment Type Metadata")
+      (triple 'gnt:has_molecular_trait 'rdf:type 'owl:ObjectProperty)
+      (triple 'gnt:has_molecular_trait 'rdfs:domain 'gnc:set)
+      (triple 'gnt:has_molecular_trait 'rdfs:label "has molecular trait")
+      (triple 'gnt:has_molecular_trait 'rdfs:range 'gnc:molecular_trait)
+      (triple 'gnt:has_phenotype_data 'rdf:type 'owl:ObjectProperty)
+      (triple 'gnt:has_phenotype_data 'rdfs:comment "Associates a resource with its phenotype data.")
+      (triple 'gnt:has_phenotype_data 'rdfs:domain 'gnc:set)
+      (triple 'gnt:has_phenotype_data 'rdfs:label "this resources has this phenotype data.")
+      (triple 'gnt:has_phenotype_data 'rdfs:range 'dcat:Dataset)
+      (triple 'gnt:has_phenotype_data 'rdfs:subPropertyOf 'dct:relation)
+      (triple 'gnt:has_platform_info 'a 'owl:ObjectProperty)
+      (triple 'gnt:has_platform_info 'rdfs:comment "Information about the platform that was used with this dataset")
+      (triple 'gnt:has_platform_info 'rdfs:domain 'dcat:Dataset)
+      (triple 'gnt:has_platform_info 'rdfs:label "About Platform")
+      (triple 'gnt:has_probeset_data 'rdf:type 'owl:ObjectProperty)
+      (triple 'gnt:has_probeset_data 'rdfs:comment "Associates a resource with this probeset data.")
+      (triple 'gnt:has_probeset_data 'rdfs:domain 'gnc:set)
+      (triple 'gnt:has_probeset_data 'rdfs:label "this resources has this probeset data.")
+      (triple 'gnt:has_probeset_data 'rdfs:range 'gnc:molecular_trait)
+      (triple 'gnt:has_probeset_data 'rdfs:subPropertyOf 'dct:relation)
+      (triple 'gnt:has_samples 'a 'owl:ObjectProperty)
+      (triple 'gnt:has_samples 'rdfs:domain 'dcat:Dataset)
+      (triple 'gnt:has_samples 'rdfs:label "Samples")
+      (triple 'gnt:has_specifics 'a 'owl:ObjectProperty)
+      (triple 'gnt:has_specifics 'rdfs:comment "Has specifics")
+      (triple 'gnt:has_specifics 'rdfs:domain 'dcat:Dataset)
+      (triple 'gnt:has_specifics 'rdfs:label "Specifics")
+      (triple 'gnt:has_summary 'a 'owl:ObjectProperty)
+      (triple 'gnt:has_summary 'rdfs:comment "Summary information about dataset")
+      (triple 'gnt:has_summary 'rdfs:domain 'dcat:Dataset)
+      (triple 'gnt:has_summary 'rdfs:label "Summary")
+      (triple 'gnt:has_tissue_info 'a 'owl:ObjectProperty)
+      (triple 'gnt:has_tissue_info 'rdfs:domain 'dcat:Dataset)
+      (triple 'gnt:has_tissue_info 'rdfs:label "Metadata about Tissue for this resource")
+      (triple 'gnt:uses_genechip 'a 'owl:ObjectProperty)
+      (triple 'gnt:uses_genechip 'rdfs:domain 'dcat:Dataset)
+      (triple 'gnt:uses_genechip 'skos:definition "The Platform this resource uses..")
+      (triple 'gnt:uses_normalization_method 'rdfs:comment "The normalization method used for the molecular traits in this dataset")
+      (triple 'gnt:uses_normalization_method 'rdfs:domain 'dcat:Dataset)
+      (triple 'gnt:uses_normalization_method 'rdfs:label "Averaging method used for the molecular traits in this dataset.")
+      (triple 'gnt:uses_normalization_method 'rdfs:range 'gnc:avg_method)
+
+      ;; Describing phenotypes
+      (triple 'gnc:phenotype 'a 'owl:Class)
+      (triple 'gnc:phenotype 'a 'skos:Concept)
+      (triple 'gnc:phenotype 'rdfs:label "A phenotype.")
+      (triple 'gnc:phenotype_trait 'a 'owl:Class)
+      (triple 'gnc:phenotype_trait 'a 'skos:Concept)
+      (triple 'gnc:phenotype_trait 'rdfs:label "A phenotype trait.")
+      (triple 'gnt:abbreviation 'a 'owl:ObjectProperty)
+      (triple 'gnt:abbreviation 'rdfs:domain 'gnc:phenotype)
+      (triple 'gnt:abbreviation 'skos:definition "The abbreviation used for this resource")
+      (triple 'gnt:additive 'rdfs:domain 'gnc:phenotype)
+      (triple 'gnt:additive 'rdfs:range 'xsd:double)
+      (triple 'gnt:lab_code 'a 'owl:ObjectProperty)
+      (triple 'gnt:lab_code 'rdfs:domain 'gnc:phenotype)
+      (triple 'gnt:locus 'a 'qb:MeasureProperty)
+      (triple 'gnt:locus 'a 'rdf:Property)
+      (triple 'gnt:locus 'rdfs:domain 'gnc:phenotype)
+      (triple 'gnt:locus 'rdfs:range 'rdfs:Literal)
+      (triple 'gnt:locus 'rdfs:subPropertyOf 'sdmx-measure:obsValue)
+      (triple 'gnt:lod_score 'a 'qb:MeasureProperty)
+      (triple 'gnt:lod_score 'a 'rdf:Property)
+      (triple 'gnt:lod_score 'rdfs:domain 'gnc:phenotype)
+      (triple 'gnt:lod_score 'rdfs:label "Peak -logP")
+      (triple 'gnt:lod_score 'rdfs:range 'xsd:double)
+      (triple 'gnt:lod_score 'rdfs:subPropertyOf 'sdmx-measure:obsValue)
+      (triple 'gnt:lod_score 'skos:definition "Statistical measurement assessing the likelihood of genetic linkage between traits or genetic markers.")
+      (triple 'gnt:mean 'a 'qb:MeasureProperty)
+      (triple 'gnt:mean 'a 'rdf:Property)
+      (triple 'gnt:mean 'rdfs:domain 'gnc:phenotype)
+      (triple 'gnt:mean 'rdfs:range 'xsd:double)
+      (triple 'gnt:mean 'rdfs:subPropertyOf 'sdmx-measure:obsValue)
+      (triple 'gnt:sequence 'rdfs:domain 'gnc:phenotype)
+      (triple 'gnt:sequence 'rdfs:range 'xsd:integer)
+      (triple 'gnt:submitter 'a 'owl:ObjectProperty)
+      (triple 'gnt:submitter 'rdfs:domain 'gnc:phenotype)
+      (triple 'gnt:submitter 'skos:definition "A person who submitted this resource to GN")
+      (triple 'gnt:submitter 'skos:definition "A person who submitted this resource to GN")
+      (triple 'gnt:has_phenotype_data 'a 'owl:ObjectProperty)
+      (triple 'gnt:has_phenotype_data 'rdfs:domain 'gnc:set)
+      (triple 'gnt:has_phenotype_data 'skos:definition "This resource has phenotype data.")
+
+      ;; Genotypes
+      (triple 'gnc:dna_marker 'a 'owl:Class)
+      (triple 'gnc:dna_marker 'a 'skos:Concept)
+      (triple 'gnc:dna_marker 'rdfs:label "A DNA Marker or SNP")
+      (triple 'gnt:has_genotype_files 'rdfs:label "This resource has these genotype files")
+      (triple 'gnt:has_genotype_files 'rdfs:domain 'dcat:Dataset)
+      (triple 'gnt:has_genotype_data 'rdf:type 'owl:ObjectProperty)
+      (triple 'gnt:has_genotype_data 'rdfs:label "this resources has genotype data.")
+      (triple 'gnt:has_genotype_data 'rdfs:comment "Associates a resource with its genotype data.")
+      (triple 'gnt:has_genotype_data 'rdfs:domain 'gnc:set)
+      (triple 'gnt:has_genotype_data 'rdfs:range 'dcat:Dataset)
+      (triple 'gnt:has_genotype_data 'rdfs:subPropertyOf 'dct:relation)
+      (triple 'gnt:has_marker_count 'rdf:type 'owl:ObjectProperty)
+      (triple 'gnt:has_marker_count 'rdfs:label "this resources has N number of dna markers/SNPs.")
+      (triple 'gnt:has_marker_count 'rdfs:domain 'xsd:integer)
+      (triple 'gnt:has_marker_count 'rdfs:range 'dcat:Dataset)
+      (triple 'gnt:chr 'a 'qb:MeasureProperty)
+      (triple 'gnt:chr 'a 'rdf:Property)
+      (triple 'gnt:chr 'rdfs:label "Chromosome")
+      (triple 'gnt:chr 'rdfs:domain 'gnc:marker)
+      (triple 'gnt:chr 'rdfs:range 'rdfs:Literal)
+      (triple 'gnt:chr 'rdfs:subPropertyOf 'sdmx-measure:obsValue)
+      (triple 'gnt:mb 'rdfs:label "Megabase")
+      (triple 'gnt:mb 'rdfs:domain 'gnc:marker)
+      (triple 'gnt:mb 'rdfs:range 'rdfs:Literal)
+      (triple 'gnt:mb 'rdfs:subPropertyOf 'sdmx-measure:obsValue)
+      (triple 'gnt:sequence 'rdfs:label "Sequence")
+      (triple 'gnt:sequence 'rdfs:domain 'gnc:marker)
+      (triple 'gnt:sequence 'rdfs:range 'rdfs:Literal)
+      (triple 'gnt:sequence 'rdfs:subPropertyOf 'sdmx-measure:obsValue)
+      (triple 'gnt:source 'rdfs:label "Source")
+      (triple 'gnt:source 'rdfs:domain 'gnc:marker)
+      (triple 'gnt:source 'rdfs:range 'rdfs:Literal)
+      (triple 'gnt:source 'rdfs:subPropertyOf 'sdmx-measure:obsValue)
+
+      ;; Probesets
+      (triple 'gnc:probeset 'a 'owl:Class)
+      (triple 'gnc:probeset 'a 'skos:Concept)
+      (triple 'gnc:probeset 'rdfs:label "A probeset")
+      (triple 'gnt:has_target_id 'a 'owl:ObjectProperty)
+      (triple 'gnt:has_target_id 'rdfs:label "The target id for this probeset")
+      (triple 'gnt:has_target_id 'rdfs:domain 'gnc:probeset)
+      (triple 'gnt:symbol 'a 'owl:ObjectProperty)
+      (triple 'gnt:symbol 'rdfs:domain 'gnc:probeset)
+      (triple 'gnt:targets_region 'a 'owl:ObjectProperty)
+      (triple 'gnt:targets_region 'rdfs:label "The target region")
+      (triple 'gnt:targets_region 'rdfs:domain 'gnc:probeset)
+      (triple 'gnt:mb_mm8 'rdfs:domain 'gnc:probeset)
+      (triple 'gnt:has_specificity 'a 'owl:ObjectProperty)
+      (triple 'gnt:has_specificity 'rdfs:domain 'gnc:probeset)
+      (triple 'gnt:has_blat_score 'a 'owl:ObjectProperty)
+      (triple 'gnt:has_blat_score 'rdfs:domain 'gnc:probeset)
+      (triple 'gnt:has_blat_mb_start 'a 'owl:ObjectProperty)
+      (triple 'gnt:has_blat_mb_start 'rdfs:domain 'gnc:probeset)
+      (triple 'gnt:has_blat_mb_end 'a 'owl:ObjectProperty)
+      (triple 'gnt:has_blat_mb_end 'rdfs:domain 'gnc:probeset)
+      (triple 'gnt:has_blat_seq 'a 'owl:ObjectProperty)
+      (triple 'gnt:has_blat_seq 'rdfs:domain 'gnc:probeset)
+      (triple 'gnt:has_target_seq 'a 'owl:ObjectProperty)
+      (triple 'gnt:has_target_seq 'rdfs:domain 'gnc:probeset)
+      (triple 'gnt:has_homologene_id 'a 'owl:ObjectProperty)
+      (triple 'gnt:has_homologene_id 'rdfs:domain 'gnc:probeset)
+      (triple 'gnt:has_uniprot_id 'a 'owl:ObjectProperty)
+      (triple 'gnt:has_uniprot_id 'rdfs:domain 'gnc:probeset)
+      (triple 'gnt:has_pub_chem_id 'a 'owl:ObjectProperty)
+      (triple 'gnt:has_pub_chem_id 'rdfs:domain 'gnc:probeset)
+      (triple 'gnt:has_kegg_id 'a 'owl:ObjectProperty)
+      (triple 'gnt:has_kegg_id 'rdfs:domain 'gnc:probeset)
+      (triple 'gnt:has_omim_id 'a 'owl:ObjectProperty)
+      (triple 'gnt:has_omim_id 'rdfs:domain 'gnc:probeset)
+      (triple 'gnt:has_chebi_id 'a 'owl:ObjectProperty)
+      (triple 'gnt:has_chebi_id 'rdfs:domain 'gnc:probeset)
+
+      ;; RIF
+      (triple 'gnc:gene_wiki_entry 'a 'rdfs:Class)
+      (triple 'gnc:gn_wiki_entry 'rdfs:subClassOf 'gnc:gene_wiki_entry)
+      (triple 'gnt:initial 'a 'owl:ObjectProperty)
+      (triple 'gnt:initial 'rdfs:domain 'gnc:gene_wiki_entry)
+      (triple 'gnt:initial 'skos:definition "Optional user or project code or your initials")
+      (triple 'gnt:reason 'a 'owl:ObjectProperty)
+      (triple 'gnt:reason 'rdfs:domain 'gnc:gene_wiki_entry)
+      (triple 'gnt:reason 'skos:definition "The reason why this resource was modified")
+      (triple 'gnc:gn_wiki_entry 'rdfs:comment "Represents GeneRIF Entries entered from GeneNetwork")
+      (triple 'gnt:gene_symbol 'rdfs:domain 'gnc:gn_wiki_entry)
+      (triple 'gnc:ncbi_wiki_entry 'rdfs:subClassOf 'gnc:gene_wiki_entry)
+      (triple 'gnc:ncbi_wiki_entry 'rdfs:comment "Represents GeneRIF Entries obtained from NCBI"))))
diff --git a/examples/phenotype-datasets.scm b/examples/phenotype-datasets.scm
new file mode 100755
index 0000000..c8657fc
--- /dev/null
+++ b/examples/phenotype-datasets.scm
@@ -0,0 +1,86 @@
+#! /usr/bin/env guile
+!#
+
+(use-modules (rnrs programs)
+             (rnrs io ports)
+             (srfi srfi-1)
+             (srfi srfi-26)
+             (ice-9 getopt-long)
+             (ice-9 match)
+             (ice-9 regex)
+             (transform strings)
+             (transform sql)
+             (transform triples)
+             (transform special-forms))
+
+
+(define-transformer gn:set->gn:dataset
+  (tables (Species
+           (inner-join InbredSet "ON InbredSet.SpeciesId = Species.Id")
+           (inner-join PublishFreeze "ON PublishFreeze.InbredSetId = InbredSet.Id"))
+          "WHERE PublishFreeze.public > 0 AND Species.Name != 'monkey' GROUP BY Species.Name, PublishFreeze.ShortName")
+  (triples (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_")
+    (multiset gnt:has_phenotype_data
+              (map (cut string->identifier "dataset" <> #:separator "_")
+                   (string-split
+                    (field ("GROUP_CONCAT(PublishFreeze.Name SEPARATOR ',')"
+                            dataset_name))
+                    #\,)))))
+
+(define-transformer gn:dataset->gn:trait
+  (tables (PublishXRef
+           (inner-join InbredSet "ON InbredSet.InbredSetId = PublishXRef.InbredSetId")
+           (inner-join Species "ON InbredSet.SpeciesId = Species.Id")
+           (inner-join PublishFreeze "ON PublishFreeze.InbredSetId = InbredSet.Id")
+           (inner-join Publication "ON Publication.Id = PublishXRef.PublicationId")
+           (inner-join Phenotype "ON Phenotype.Id = PublishXRef.PhenotypeId"))
+          "WHERE InbredSet.public > 0")
+  (triples (string->identifier "dataset" (field PublishFreeze Name) #:separator "_")
+    (set gnt:has_phenotype_trait
+         (let ((post-abbrev (blank-p (field Phenotype Post_publication_abbreviation)))
+               (pre-abbrev (blank-p (field Phenotype Pre_publication_abbreviation)))
+               (post-desc (blank-p (field Phenotype Post_publication_description)))
+               (pre-desc (blank-p (field Phenotype Post_publication_description))))
+           (string->identifier
+            "trait"
+            (format #f "~a_~a" (field PublishFreeze Name)
+                    (or post-abbrev pre-abbrev post-desc pre-desc))
+            #:separator "_")))
+    (set dct:created (annotate-field (field PublishFreeze CreateTime) '^^xsd:datetime))
+    (set gnt:has_strain (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_"))))
+
+
+(let* ((option-spec
+        '((settings (single-char #\s) (value #t))
+          (output (single-char #\o) (value #t))
+          (documentation (single-char #\d) (value #t))))
+       (options (getopt-long (command-line) option-spec))
+       (settings (option-ref options 'settings #f))
+       (output (option-ref options 'output #f))
+       (documentation (option-ref options 'documentation #f))
+       (%connection-settings
+        (call-with-input-file settings
+          read)))
+  (with-documentation
+   (name "Phenotype Datasets")
+   (connection %connection-settings)
+   (table-metadata? #f)
+   (prefixes
+    '(("dcat:" "<http://www.w3.org/ns/dcat#>")
+      ("dct:" "<http://purl.org/dc/terms/>")
+      ("gn:" "<http://rdf.genenetwork.org/v1/id/>")
+      ("gnc:" "<http://rdf.genenetwork.org/v1/category/>")
+      ("gnt:" "<http://rdf.genenetwork.org/v1/term/>")
+      ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>")
+      ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>")
+      ("owl:" "<http://www.w3.org/2002/07/owl#>")
+      ("skos:" "<http://www.w3.org/2004/02/skos/core#>")
+      ("xkos:" "<http://rdf-vocabulary.ddialliance.org/xkos#>")
+      ("xsd:" "<http://www.w3.org/2001/XMLSchema#>")))
+   (inputs
+    (list
+     gn:set->gn:dataset
+     gn:dataset->gn:trait))
+   (outputs
+    `(#:documentation ,documentation
+      #:rdf ,output))))
diff --git a/examples/phenotype.scm b/examples/phenotype.scm
index 03eec45..c2564b6 100755
--- a/examples/phenotype.scm
+++ b/examples/phenotype.scm
@@ -14,92 +14,112 @@
              (transform special-forms))
 
 
-(define-transformer phenotypes
-  (tables (PublishXRef
-           (left-join InbredSet "ON InbredSet.InbredSetId = PublishXRef.InbredSetId")
-           (left-join Publication "ON Publication.Id = PublishXRef.PublicationId")
-           (left-join Phenotype "ON Phenotype.Id = PublishXRef.PhenotypeId")))
-  (schema-triples
-   (gnt:trait_id a owl:ObjectProperty)
-   (gnt:trait_id rdfs:domain gnc:phenotype)
-   (gnt:trait_id skos:definition "This is the unique trait id assigned from GeneNetwork")
-   (gnt:abbreviation a owl:ObjectProperty)
-   (gnt:abbreviation rdfs:domain gnc:phenotype)
-   (gnt:abbreviation skos:definition "The abbreviation used for this resource")
-   (gnt:labCode a owl:ObjectProperty)
-   (gnt:labCode rdfs:domain gnc:phenotype)
-   (gnt:submitter a owl:ObjectProperty)
-   (gnt:submitter rdfs:domain gnc:phenotype)
-   (gnt:submitter skos:definition "A person who submitted this resource to GN")
-   (gnt:mean a rdf:Property)
-   (gnt:mean a qb:MeasureProperty)
-   (gnt:mean rdfs:subPropertyOf sdmx-measure:obsValue)
-   (gnt:mean rdfs:domain gnc:phenotype)
-   (gnt:mean rdfs:range xsd:double)
-   (gnt:lod_score a rdf:Property)
-   (gnt:lod_score a qb:MeasureProperty)
-   (gnt:lod_score rdfs:subPropertyOf sdmx-measure:obsValue)
-   (gnt:lod_score rdfs:domain gnc:phenotype)
-   (gnt:lod_score rdfs:range xsd:double)
-   (gnt:lod_score rdfs:label "Peak -logP")
-   (gnt:lod_score skos:definition "Statistical measurement assessing the likelihood of genetic linkage between traits or genetic markers.")
-   (gnt:locus a rdf:Property)
-   (gnt:locus a qb:MeasureProperty)
-   (gnt:locus rdfs:subPropertyOf sdmx-measure:obsValue)
-   (gnt:locus rdfs:domain gnc:phenotype)
-   (gnt:locus rdfs:range rdfs:Literal)
-   (gnt:additive rdfs:domain gnc:phenotype)
-   (gnt:additive rdfs:range xsd:double)
-   (gnt:sequence rdfs:domain gnc:phenotype)
-   (gnt:sequence rdfs:range xsd:integer))
-  (triples (string->identifier
-            "trait"
-            (field ("CONCAT(IFNULL(InbredSet.InbredSetCode, PublishXRef.InbredSetId), '_', PublishXRef.Id)"
-                    Phenotype))
-            #:separator "_")
+
+
+
+
+
+
+
+(define-transformer gnc:phenotype->gn:phenotype
+  (tables (Phenotype))
+  (triples "gnc:phenotype"
+    (set skos:member
+         (let ((post-abbrev (blank-p (field Phenotype Post_publication_abbreviation)))
+               (pre-abbrev (blank-p (field Phenotype Pre_publication_abbreviation)))
+               (post-desc (blank-p (field Phenotype Post_publication_description)))
+               (pre-desc (blank-p (field Phenotype Post_publication_description))))
+           (string->identifier
+            "phenotype"
+            (or post-abbrev pre-abbrev post-desc pre-desc)
+            #:separator "_")))))
+
+(define-transformer gn:phenotype->metadata
+  (tables (Phenotype))
+  (triples (let ((post-abbrev (blank-p (field Phenotype Post_publication_abbreviation)))
+                 (pre-abbrev (blank-p (field Phenotype Pre_publication_abbreviation)))
+                 (post-desc (blank-p (field Phenotype Post_publication_description)))
+                 (pre-desc (blank-p (field Phenotype Post_publication_description))))
+             (string->identifier
+              "phenotype"
+              (or post-abbrev pre-abbrev post-desc pre-desc)
+              #:separator "_"))
     (set rdf:type 'gnc:phenotype)
-    (set gnt:has_strain
-         (string->identifier
-          "set" (field InbredSet Name InbredSetName)
-          #:separator "_"))
-    ;; This is the trait's name
-    (set gnt:trait_id
-         (let ((trait-id (field PublishXRef Id)))
-           (if (number? trait-id)
-               (number->string trait-id)
-               trait-id)))
-    (set skos:altLabel
-         (field ("CONCAT(IFNULL(InbredSet.InbredSetCode, PublishXRef.InbredSetId), '_', PublishXRef.Id)"
-                 Phenotype)))
     ;; All phenotypes have a post-publication description
     (set dct:description
          (sanitize-rdf-string
           (field Phenotype Post_publication_description)))
     ;; All phenotypes have a post-publication abbreviation
-    (set gnt:abbreviation (field Phenotype Post_publication_abbreviation))
-    (set gnt:labCode (field Phenotype Lab_code))
+    (set gnt:abbreviation (sanitize-rdf-string (field Phenotype Post_publication_abbreviation)))
+    (set gnt:has_lab_code (field Phenotype Lab_code))
     (set gnt:submitter
          (sanitize-rdf-string (field Phenotype Submitter)))
     (set dct:contributor (sanitize-rdf-string (field Phenotype Owner)))
+    (set skos:member
+         (let ((post-abbrev (blank-p (field Phenotype Post_publication_abbreviation)))
+               (pre-abbrev (blank-p (field Phenotype Pre_publication_abbreviation)))
+               (post-desc (blank-p (field Phenotype Post_publication_description)))
+               (pre-desc (blank-p (field Phenotype Post_publication_description))))
+           (string->identifier
+            "phenotype"
+            (or post-abbrev pre-abbrev post-desc pre-desc)
+            #:separator "_")))))
+
+(define-transformer gn:trait->gn:phenotype
+  (tables (PublishXRef
+           (left-join InbredSet "ON InbredSet.InbredSetId = PublishXRef.InbredSetId")
+           (inner-join PublishFreeze "ON PublishFreeze.InbredSetId = InbredSet.Id")
+           (left-join Publication "ON Publication.Id = PublishXRef.PublicationId")
+           (left-join Phenotype "ON Phenotype.Id = PublishXRef.PhenotypeId"))
+          "WHERE InbredSet.public > 0")
+  (triples (let ((post-abbrev (blank-p (field Phenotype Post_publication_abbreviation)))
+                 (pre-abbrev (blank-p (field Phenotype Pre_publication_abbreviation)))
+                 (post-desc (blank-p (field Phenotype Post_publication_description)))
+                 (pre-desc (blank-p (field Phenotype Post_publication_description))))
+             (string->identifier
+              "trait"
+              (format #f "~a_~a" (field PublishFreeze Name)
+                      (or post-abbrev pre-abbrev post-desc pre-desc))
+              #:separator "_"))
+    (set rdf:type 'gnc:phenotype_trait)
+    (set gnt:has_strain (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_"))
+    (set owl:equivalentClass
+         (field ("CONCAT(PublishFreeze.Name, '_', PublishXRef.Id)"
+                 PublishFreeze)))
+    (set dcat:distribution
+         (string->symbol
+          (format #f "gnd:~a.json"
+                  (field ("CONCAT(PublishFreeze.Name, '_', PublishXRef.Id)"
+                          PublishFreeze)))) )
+    (set dct:references
+         (let ((pmid (field
+                      ("IF(Publication.PubMed_ID IS NULL, '', CONVERT(Publication.PubMed_Id, INT))"
+                       pmid)))
+               (publication-id (field Publication Id)))
+           (if (string-null? pmid)
+               (string->identifier "unpublished"
+                                   (number->string publication-id))
+               (ontology 'pubmed: pmid))))
+    (set gnt:has_phenotype
+         (let ((post-abbrev (blank-p (field Phenotype Post_publication_abbreviation)))
+               (pre-abbrev (blank-p (field Phenotype Pre_publication_abbreviation)))
+               (post-desc (blank-p (field Phenotype Post_publication_description)))
+               (pre-desc (blank-p (field Phenotype Post_publication_description))))
+           (string->identifier
+            "phenotype"
+            (or post-abbrev pre-abbrev post-desc pre-desc)
+            #:separator "_")))
     (set gnt:mean (annotate-field (field ("IFNULL(PublishXRef.mean, '')" mean))
                                   '^^xsd:double))
     (set gnt:locus (sanitize-rdf-string (field PublishXRef Locus)))
     (set gnt:lod_score (annotate-field
-                  (field ("IFNULL((PublishXRef.LRS/4.604), '')" lrs))
-                  '^^xsd:double))
+                        (field ("IFNULL((PublishXRef.LRS/4.604), '')" lrs))
+                        '^^xsd:double))
     (set gnt:additive
          (annotate-field (field ("IFNULL(PublishXRef.additive, '')" additive))
                          '^^xsd:double))
     (set gnt:sequence (annotate-field (field PublishXRef Sequence) '^^xsd:integer))
-    (set dct:isReferencedBy
-         (let ((pmid (field
-                      ("IF(Publication.PubMed_ID IS NULL, '', CONVERT(Publication.PubMed_Id, INT))"
-                       pmid)))
-               (publication-id (field Publication Id PublicationId)))
-           (if (string-null? pmid)
-               (string->identifier "unpublished"
-                                   (number->string publication-id))
-               (ontology 'pubmed: pmid))))))
+    (set rdfs:comment (sanitize-rdf-string (field PublishXRef comments)))))
 
 
 
@@ -119,10 +139,12 @@
    (connection %connection-settings)
    (table-metadata? #f)
    (prefixes
-    '(("dct:" "<http://purl.org/dc/terms/>")
+    '(("dcat:" "<http://www.w3.org/ns/dcat#>")
+      ("dct:" "<http://purl.org/dc/terms/>")
       ("gn:" "<http://rdf.genenetwork.org/v1/id/>")
       ("owl:" "<http://www.w3.org/2002/07/owl#>")
       ("gnc:" "<http://rdf.genenetwork.org/v1/category/>")
+      ("gnd:" "<https://cd.genenetwork.org/api3/lmdb/v1/data/traits/>")
       ("gnt:" "<http://rdf.genenetwork.org/v1/term/>")
       ("sdmx-measure:" "<http://purl.org/linked-data/sdmx/2009/measure#>")
       ("skos:" "<http://www.w3.org/2004/02/skos/core#>")
@@ -133,8 +155,9 @@
       ("xkos:" "<http://rdf-vocabulary.ddialliance.org/xkos#>")
       ("pubmed:" "<http://rdf.ncbi.nlm.nih.gov/pubmed/>")))
    (inputs
-    (list
-     phenotypes))
+    (list gnc:phenotype->gn:phenotype
+          gn:phenotype->metadata
+          gn:trait->gn:phenotype))
    (outputs
     `(#:documentation ,documentation
       #:rdf ,output))))
diff --git a/examples/probesets.scm b/examples/probesets.scm
new file mode 100755
index 0000000..97e5753
--- /dev/null
+++ b/examples/probesets.scm
@@ -0,0 +1,133 @@
+#! /usr/bin/env guile
+!#
+
+(use-modules (srfi srfi-1)
+             (srfi srfi-26)
+             (ice-9 getopt-long)
+             (ice-9 match)
+             (ice-9 regex)
+             (transform strings)
+             (transform sql)
+             (transform triples)
+             (transform special-forms)
+             (web uri))
+
+(define-transformer probeset->metadata
+  (tables (ProbeSet
+           (left-join GeneChip "ON GeneChip.Id = ProbeSet.ChipId"))
+          "WHERE ProbeSet.Name IS NOT NULL AND TRIM(ProbeSet.Name) != ''")
+  (triples
+      (string->identifier "probeset" (field ProbeSet Name))
+    (set rdf:type 'gnc:probeset)
+    (set skos:prefLabel (field ProbeSet Name))
+    (multiset skos:altLabel
+              (map string-trim-both
+                   (string-split (sanitize-rdf-string (field ProbeSet alias)) #\;)))
+    (set gnt:uses_genechip (string->identifier "platform" (field GeneChip Name) #:separator "_"))
+    (set gnt:has_target_id (string-trim-both (sanitize-rdf-string (field ProbeSet TargetId))))
+    (set gnt:symbol (string-trim-both (field ProbeSet Symbol)))
+    (set dct:description (sanitize-rdf-string (field ProbeSet description)))
+    (set gnt:targets_region (string-trim-both (sanitize-rdf-string (field ProbeSet Probe_set_target_region))))
+    (set gnt:chr (field ProbeSet Chr))
+    (set gnt:mb (annotate-field (field ("IFNULL(ProbeSet.Mb, '')" Mb)) '^^xsd:double))
+    (set gnt:mb_mm8 (annotate-field (field ("IFNULL(ProbeSet.Mb_mm8, '')" Mb_mm8))
+                                    '^^xsd:double))
+    (set gnt:has_specificity
+         (field ("IFNULL(ProbeSet.Probe_set_specificity, '')"
+                 Probe_set_specificity)))
+    (set gnt:has_blat_score
+         (field ("IFNULL(ProbeSet.Probe_set_BLAT_score, '')"
+                 Probe_set_BLAT_score)))
+    (set gnt:has_blat_mb_start
+         (annotate-field (field ("IFNULL(ProbeSet.Probe_set_Blat_Mb_start, '')"
+                                 Probe_set_Blat_Mb_start))
+                         '^^xsd:double))
+    (set gnt:has_blat_mb_end
+         (annotate-field (field ("IFNULL(ProbeSet.Probe_set_Blat_Mb_end, '')"
+                                 Probe_set_Blat_Mb_end))
+                         '^^xsd:double))
+    (set gnt:has_blat_seq (sanitize-rdf-string (field ProbeSet BlatSeq)))
+    (set gnt:has_target_seq (sanitize-rdf-string (field ProbeSet TargetSeq)))
+    (set gnt:has_homologene_id (ontology 'homologene:
+                                         (uri-encode
+                                          (field ("IFNULL(ProbeSet.HomoloGeneID, '')"
+                                                  HomoloGeneID)))))
+    (set gnt:has_uniprot_id (ontology 'uniprot:
+                                      (uri-encode
+                                       (field ("IFNULL(ProbeSet.UniProtID, '')"
+                                               UniProtID)))))
+    (set gnt:has_pub_chem_id (ontology
+                              'pubchem:
+                              (uri-encode
+                               (field ("IFNULL(ProbeSet.PubChem_ID, '')"
+                                       PubChem_ID)))))
+    (set gnt:has_kegg_id (ontology
+                          'kegg:
+                          (uri-encode
+                           (field ("IFNULL(ProbeSet.KEGG_ID, '')"
+                                   KEGG_ID)))))
+    (set gnt:has_omim_id (ontology
+                          'omim:
+                          (uri-encode
+                           (let ((omim (field ("IFNULL(ProbeSet.OMIM, '')"
+                                               OMIM))))
+                             (if (number? omim)
+                                 omim
+                                 (regexp-substitute/global
+                                  #f "[^0-9]"
+                                  omim
+                                  'pre "" 'post))))))
+    (set gnt:has_chebi_id (ontology
+                           'chebi:
+                           (uri-encode
+                            (field ("IFNULL(ProbeSet.ChEBI_ID, '')"
+                                    ChEBI_ID)))))))
+
+
+
+
+(let* ((option-spec
+        '((settings (single-char #\s) (value #t))
+          (output (single-char #\o) (value #t))
+          (documentation (single-char #\d) (value #t))))
+       (options (getopt-long (command-line) option-spec))
+       (settings (option-ref options 'settings #f))
+       (output (option-ref options 'output #f))
+       (documentation (option-ref options 'documentation #f))
+       (%connection-settings
+        (call-with-input-file settings
+          read)))
+  (call-with-target-database
+   %connection-settings
+   (lambda (db)
+     (with-documentation
+      (name "ProbeSet Metadata")
+      (connection %connection-settings)
+      (table-metadata? #f)
+      (total-rows (assoc-ref
+                   (sql-find db "SELECT count(*) AS count from ProbeSet")
+                   "count"))
+      (rows-per-chunk 1000000)
+      (prefixes
+       '(("gn:" "<http://rdf.genenetwork.org/v1/id/>")
+         ("gnc:" "<http://rdf.genenetwork.org/v1/category/>")
+         ("gnt:" "<http://rdf.genenetwork.org/v1/term/>")
+         ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>")
+         ("kegg:" "<http://bio2rdf.org/ns/kegg#>")
+         ("pubchem:" "<https://pubchem.ncbi.nlm.nih.gov/>")
+         ("omim:" "<https://www.omim.org/entry/>")
+         ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>")
+         ("uniprot:" "<http://purl.uniprot.org/uniprot/>")
+         ("chebi:" "<http://purl.obolibrary.org/obo/CHEBI_>")
+         ("dct:" "<http://purl.org/dc/terms/>")
+         ("owl:" "<http://www.w3.org/2002/07/owl#>")
+         ("homologene:" "<https://bio2rdf.org/homologene:>")
+         ("xsd:" "<http://www.w3.org/2001/XMLSchema#>")
+         ("qb:" "<http://purl.org/linked-data/cube#>")
+         ("sdmx-measure:" "<http://purl.org/linked-data/sdmx/2009/measure#>")
+         ("skos:" "<http://www.w3.org/2004/02/skos/core#>")))
+      (inputs
+       (list probeset->metadata))
+      (outputs
+       `(#:documentation ,documentation
+         #:rdf ,output))))))
diff --git a/examples/publication.scm b/examples/publication.scm
index 6b57856..c411af6 100755
--- a/examples/publication.scm
+++ b/examples/publication.scm
@@ -13,7 +13,7 @@
 
 
 
-(define-transformer publication
+(define-transformer publication->metadata
   (tables (Publication))
   (triples
       (let ((pmid (field
@@ -81,7 +81,7 @@
       ("xsd:" "<http://www.w3.org/2001/XMLSchema#>")
       ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>")))
    (inputs
-    (list publication))
+    (list publication->metadata))
    (outputs
     `(#:documentation ,documentation
       #:rdf ,output))))
diff --git a/examples/schema.scm b/examples/schema.scm
index 4bde895..c4ff082 100755
--- a/examples/schema.scm
+++ b/examples/schema.scm
@@ -8,18 +8,7 @@
              (transform sql)
              (transform table))
 
-(define (call-with-genenetwork-database connection-settings proc)
-  (call-with-database "mysql" (string-join
-                               (list (assq-ref connection-settings 'sql-username)
-                                     (assq-ref connection-settings 'sql-password)
-                                     (assq-ref connection-settings 'sql-database)
-                                     "tcp"
-                                     (assq-ref connection-settings 'sql-host)
-                                     (number->string
-                                      (assq-ref connection-settings 'sql-port)))
-                               ":")
-                      proc))
-
+
 (define (transform-table-schema connection-settings db)
   (let ((tables (tables connection-settings db)))
     (for-each (lambda (table)
@@ -44,7 +33,7 @@
                             (table-columns table))))
               tables)))
 
-
+
 (let* ((option-spec
         '((settings (single-char #\s) (value #t))
           (output (single-char #\o) (value #t))
@@ -54,7 +43,7 @@
        (output (option-ref options 'output #f))
        (documentation (option-ref options 'documentation #f))
        (%connection-settings (call-with-input-file settings read)))
-  (call-with-genenetwork-database
+  (call-with-target-database
    %connection-settings
    (lambda (db)
      (with-output-to-file output
diff --git a/load-rdf.scm b/load-rdf.scm
index 590fbc0..2ef79ac 100755
--- a/load-rdf.scm
+++ b/load-rdf.scm
@@ -111,6 +111,8 @@ DB.DBA.XML_SET_NS_DECL ('v', 'http://www.w3.org/2006/vcard/ns#', 2);
 DB.DBA.XML_SET_NS_DECL ('xkos', 'http://rdf-vocabulary.ddialliance.org/xkos#', 2);
 DB.DBA.XML_SET_NS_DECL ('schema', 'https://schema.org/', 2);
 DB.DBA.XML_SET_NS_DECL ('foaf', 'http://xmlns.com/foaf/0.1/#term_', 2);
+DB.DBA.XML_SET_NS_DECL ('gnd', 'https://cd.genenetwork.org/api3/lmdb/v1/data/traits/', 2);
+DB.DBA.XML_SET_NS_DECL ('gn-files', 'http://files.genenetwork.org/current/', 2);
 "
              port
              password))
diff --git a/manifest.scm b/manifest.scm
index d736e51..2905b6f 100644
--- a/manifest.scm
+++ b/manifest.scm
@@ -15,6 +15,7 @@
                         guile-dsv
                         guile-hashing
                         guile-libyaml
+                        guile-uuid
                         guile-dbd-mysql))
              ((gnu packages rdf) #:select (raptor2))
              (guix build-system gnu)
@@ -55,7 +56,7 @@
     (license license:gpl3+)))
 
 (packages->manifest
- (list gnu-make guile-3.0 guile-dbi guile-dbd-mysql guile-zlib
+ (list gnu-make guile-3.0 guile-dbi guile-dbd-mysql guile-zlib guile-uuid
        guile-json-4 guile-dsv
        ;; We abuse (ccwl graphviz) as a library to visualize the database
        ;; schema. Hence we need ccwl and guile-libyaml.
diff --git a/transform/special-forms.scm b/transform/special-forms.scm
index ddb3180..0c07a0a 100644
--- a/transform/special-forms.scm
+++ b/transform/special-forms.scm
@@ -1,10 +1,12 @@
 (define-module (transform special-forms)
   #:use-module (srfi srfi-1)
+  #:use-module (ice-9 regex)
   #:use-module (ice-9 match)
   #:use-module (srfi srfi-26)
   #:use-module (transform sql)
   #:use-module (transform table)
   #:use-module (transform triples)
+  #:use-module (transform strings)
   #:export (translate-forms
             collect-forms
             collect-keys
@@ -18,8 +20,50 @@
             syntax-let
             map-alist
 	    with-documentation
+            emit-short-turtle
             define-transformer))
 
+(define (emittable-object? o)
+  (cond
+    ((null? o) #f)
+    ((not o)   #f)
+    ((and (string? o) (string-blank? o)) #f)
+    (else #t)))
+
+(define (emit-short-turtle subject po-alist)
+  (let loop ((pairs po-alist) (first? #t))
+    (match pairs
+      (((p . o) rest ...)
+       (if (not (emittable-object? o))
+           (loop rest first?)   ; skip malformed or empty objects
+           (begin
+             ;; subject only once
+             (when first?
+               (format #t "~a " subject))
+             (when (not first?)
+               (format #t "\t"))
+
+             ;; emit predicate–object
+             (match o
+               ((? symbol?)
+                (format #t "~a ~a" p (symbol->string o)))
+               ((? string?)
+                (format #t "~a \"~a\"" p o))
+               (_
+                (format #t "~a ~s" p o)))
+
+             ;; separator depends on *remaining emittable pairs*
+             (if (any (match-lambda
+                        ((p . o) (emittable-object? o)))
+                      rest)
+                 (format #t " ;~%")
+                 (format #t " .~%"))
+
+             (loop rest #f))))
+      (() #f))))
+
+
+
 (define (key->assoc-ref alist x)
   "Recursively translate (key k) forms in source X to (assoc-ref ALIST
 k) forms."
@@ -375,57 +419,68 @@ must be remedied."
          #`(define* (name db #:key
                           (metadata? #f)
                           (data? #t)
-                          (documentation? #f))
-             (when metadata?
-               #,@(let ((table (symbol->string (syntax->datum #'primary-table)))
-                        (subject-type (any (lambda (predicate)
-                                             (syntax-case predicate (rdf:type)
-                                               ((_ rdf:type type) #'type)
-                                               (_ #f)))
-                                           #'(predicate-clauses ...))))
-                    (map (lambda (predicate-clause)
-                           (syntax-case predicate-clause ()
-                             ((_ predicate _)
-                              ;; Dump metadata about the transform itself.
-                              #`(begin
-                                  (scm->triples
-                                   (map-alist '()
-	        		     (set rdf:type 'gn-id:transform)
-	        		     (set gn-term:createsPredicate 'predicate)
-	        		     (filter-set gn-term:forSubjectType #,subject-type)
-	        		     (multiset gn-term:dependsOn
-	        			       '#,(map (lambda (field)
-	        					 (match (syntax->datum field)
-	        					   ((table-name column-name _ ...)
-	        					    (datum->syntax
-	        					     x (column-id (symbol->string table-name)
-	        							  (symbol->string column-name))))
-	        					   (((query alias))
-	        					    (datum->syntax
-	        					     x (column-id query (symbol->string alias))))))
-	        				       (collect-fields predicate-clause))))
-                                   #,(id table (syntax->datum #'predicate)))
-                                  ;; Automatically create domain triples
-                                  ;; for predicates.
-                                  (when #,subject-type
-                                    (triple 'predicate 'rdfs:domain #,subject-type))))
-                             (_ (error "Invalid predicate clause:" predicate-clause))))
-                         #'(predicate-clauses ...))))
-             (when documentation?
-               (format #t "~%## '~a'~%~%" (syntax->datum #'name))
-               #,(syntax-case #'schema-triples-clause (schema-triples)
-                   ((schema-triples (triple-subject triple-predicate triple-object) ...)
-                    #`(begin
-                        (when (not (list 'triple-subject ...))
-                          (format #t "## Schema Triples:~%~%```text~%")
-                          (for-each (lambda (s p o)
-                                      (format #t "~a -> ~a -> ~a~%" s p o))
-                                    (list 'triple-subject ...)
-                                    (list 'triple-predicate ...)
-                                    (list 'triple-object ...))
-                          (format #t "```"))))
-                   (_ (error "Invalid schema triples clause:" #'schema-triples-clause)))
-               (format #t "## Generated Triples:
+                          (documentation? #f)
+                          (limit #f)
+                          (offset #f))
+             (let* ((base-sql
+                     (select-query #,(collect-fields #'(subject predicate-clauses ...))
+                                   (primary-table other-tables ...)
+                                   tables-raw ...))
+                    (sql
+		      (if (and limit offset)
+			  (format #f "~a LIMIT ~a OFFSET ~a"
+				  base-sql limit offset)
+			  base-sql)))
+               (when metadata?
+                 #,@(let ((table (symbol->string (syntax->datum #'primary-table)))
+                          (subject-type (any (lambda (predicate)
+                                               (syntax-case predicate (rdf:type)
+                                                 ((_ rdf:type type) #'type)
+                                                 (_ #f)))
+                                             #'(predicate-clauses ...))))
+                      (map (lambda (predicate-clause)
+                             (syntax-case predicate-clause ()
+                               ((_ predicate _)
+                                ;; Dump metadata about the transform itself.
+                                #`(begin
+                                    (scm->triples
+                                     (map-alist '()
+	        		       (set rdf:type 'gn-id:transform)
+	        		       (set gn-term:createsPredicate 'predicate)
+	        		       (filter-set gn-term:forSubjectType #,subject-type)
+	        		       (multiset gn-term:dependsOn
+	        			         '#,(map (lambda (field)
+	        					   (match (syntax->datum field)
+	        					     ((table-name column-name _ ...)
+	        					      (datum->syntax
+	        					       x (column-id (symbol->string table-name)
+	        							    (symbol->string column-name))))
+	        					     (((query alias))
+	        					      (datum->syntax
+	        					       x (column-id query (symbol->string alias))))))
+	        				         (collect-fields predicate-clause))))
+                                     #,(id table (syntax->datum #'predicate)))
+                                    ;; Automatically create domain triples
+                                    ;; for predicates.
+                                    (when #,subject-type
+                                      (triple 'predicate 'rdfs:domain #,subject-type))))
+                               (_ (error "Invalid predicate clause:" predicate-clause))))
+                           #'(predicate-clauses ...))))
+               (when documentation?
+                 (format #t "~%## '~a'~%~%" (syntax->datum #'name))
+                 #,(syntax-case #'schema-triples-clause (schema-triples)
+                     ((schema-triples (triple-subject triple-predicate triple-object) ...)
+                      #`(begin
+                          (when (not (list 'triple-subject ...))
+                            (format #t "## Schema Triples:~%~%```text~%")
+                            (for-each (lambda (s p o)
+                                        (format #t "~a -> ~a -> ~a~%" s p o))
+                                      (list 'triple-subject ...)
+                                      (list 'triple-predicate ...)
+                                      (list 'triple-object ...))
+                            (format #t "```"))))
+                     (_ (error "Invalid schema triples clause:" #'schema-triples-clause)))
+                 (format #t "## Generated Triples:
 
 The following SQL query was executed:
 
@@ -437,67 +492,64 @@ The above query results to triples that have the form:
 
 ```text
 "
-                       (select-query #,(collect-fields #'(subject predicate-clauses ...))
-                                     (primary-table other-tables ...)
-                                     tables-raw ...))
-               (for-each (match-lambda
-                           ((predicate . object)
-                            (format #t "~a -> ~a -> ~a ~%"
-                                    (if (symbol? #,(field->datum #'subject))
-                                        (symbol->string #,(field->datum #'subject))
-                                        #,(field->datum #'subject))
-                                    predicate
-                                    (if (symbol? object)
-                                        (symbol->string object)
-                                        object))))
-                         (map-alist
-                             '()
-                           #,@(field->datum #'(predicate-clauses ...))))
-               (format #t "```~%Here's an example query:~%~%```sparql~%")
-               (documentation?)
-               (newline)
-               (let* ((result
-                       (map-alist (sql-find
-                                   db
-                                   (format #f "~a LIMIT 1"
-                                           (select-query #,(collect-fields #'(subject predicate-clauses ...))
-                                                         (primary-table other-tables ...)
-                                                         tables-raw ...)))
-        		 #,@(field->key #'(predicate-clauses ...))))
-                      (first-n (list-head result
-                                          (let ((n
-                                                 (min 4 (truncate
-                                                         (+ (exact-integer-sqrt (length result)) 1)))))
-                                            (if (< n 3)
-                                                (length result)
-                                                n)))))
-                 (format #t "SELECT * WHERE { ~%")
+                         (select-query #,(collect-fields #'(subject predicate-clauses ...))
+                                       (primary-table other-tables ...)
+                                       tables-raw ...))
                  (for-each (match-lambda
                              ((predicate . object)
-                              (match object
-                                        ((or (?  symbol? object)
-                                             (?  (lambda (el) (string-match "^\\[ .* \\]$" el)) object))
-                                         (format #t "    ?s ~a ~a .~%" predicate object))
-                                        ((and (? string? object)
-                                              (? (lambda (el) (not (string-null? el))) object))
-                                         (format #t "    ?s ~a \"~a\" .~%" predicate object))
-                                        (_ ""))))
-                           first-n)
-                 (format #t "    ?s ?p ?o .~%}~%```~%"))
-               (format #t "~%Expected Result:~%~%```rdf~%")
-               (sql-for-each (lambda (row)
-                               (scm->triples
-                                (map-alist row #,@(field->key #'(predicate-clauses ...)))
-                                #,(field->assoc-ref #'row #'subject)
-                                (lambda (s p o)
-                                  (triple s p o))))
-                             db
-                             (format #f "~a LIMIT 1"
-                                     (select-query #,(collect-fields #'(subject predicate-clauses ...))
-                                                   (primary-table other-tables ...)
-                                                   tables-raw ...)))
-               (format #t "```~%~%"))
-             (when data?
+                              (format #t "~a -> ~a -> ~a ~%"
+                                      (if (symbol? #,(field->datum #'subject))
+                                          (symbol->string #,(field->datum #'subject))
+                                          #,(field->datum #'subject))
+                                      predicate
+                                      (if (symbol? object)
+                                          (symbol->string object)
+                                          object))))
+                           (map-alist
+                               '()
+                             #,@(field->datum #'(predicate-clauses ...))))
+                 (format #t "```~%Here's an example query:~%~%```sparql~%")
+                 (documentation?)
+                 (newline)
+                 (let* ((result
+                         (map-alist (sql-find
+                                     db
+                                     (format #f "~a LIMIT 1"
+                                             (select-query #,(collect-fields #'(subject predicate-clauses ...))
+                                                           (primary-table other-tables ...)
+                                                           tables-raw ...)))
+        		   #,@(field->key #'(predicate-clauses ...))))
+                        (first-n (list-head result
+                                            (let ((n
+                                                   (min 4 (truncate
+                                                           (+ (exact-integer-sqrt (length result)) 1)))))
+                                              (if (< n 3)
+                                                  (length result)
+                                                  n)))))
+                   (format #t "SELECT * WHERE { ~%")
+                   (for-each (match-lambda
+                               ((predicate . object)
+                                (match object
+                                  ((or (?  symbol? object)
+                                       (?  (lambda (el) (string-match "^\\[ .* \\]$" el)) object))
+                                   (format #t "    ?s ~a ~a .~%" predicate object))
+                                  ((and (? string? object)
+                                        (? (lambda (el) (not (string-null? el))) object))
+                                   (format #t "    ?s ~a \"~a\" .~%" predicate object))
+                                  (_ ""))))
+                             first-n)
+                   (format #t "    ?s ?p ?o .~%}~%```~%"))
+                 (format #t "~%Expected Result:~%~%```rdf~%")
+                 (sql-for-each (lambda (row)
+                                 (scm->triples
+                                  (map-alist row #,@(field->key #'(predicate-clauses ...)))
+                                  #,(field->assoc-ref #'row #'subject)
+                                  (lambda (s p o)
+                                    (triple s p o))))
+                               db
+                               (format #f "~a LIMIT 1" base-sql))
+                 (format #t "```~%~%"))
+               (when data?
                #,(syntax-case #'schema-triples-clause (schema-triples)
                    ((schema-triples (triple-subject triple-predicate triple-object) ...)
                     #`(for-each triple
@@ -505,15 +557,14 @@ The above query results to triples that have the form:
                                 (list 'triple-predicate ...)
                                 (list 'triple-object ...)))
                    (_ (error "Invalid schema triples clause:" #'schema-triples-clause)))
-	       (sql-for-each (lambda (row)
-                               (scm->triples
-                                (map-alist row #,@(field->key #'(predicate-clauses ...)))
-                                #,(field->assoc-ref #'row #'subject)))
-                             db
-                             (select-query #,(collect-fields #'(subject predicate-clauses ...))
-                                           (primary-table other-tables ...)
-                                           tables-raw ...)))
-             )))
+	       (sql-for-each
+		  (lambda (row)
+                    (let* ((subject-val #,(field->assoc-ref #'row #'subject))
+                           (po-alist
+                            (map-alist row #,@(field->key #'(predicate-clauses ...)))))
+                      (emit-short-turtle subject-val po-alist)))
+                  db
+                  sql))))))
       (_ (error "Invalid define-transformer syntax:" (syntax->datum x))))))
 
 (define (get-keyword-value args keyword default)
@@ -532,8 +583,14 @@ The above query results to triples that have the form:
             (prefixes (assoc-ref alist 'prefixes))
             (inputs (assoc-ref alist 'inputs))
             (outputs (assoc-ref alist 'outputs))
-            (rdf-path (get-keyword-value outputs #:rdf ""))
-            (doc-path (get-keyword-value outputs #:documentation "")))
+            (total-rows (assoc-ref alist 'total-rows))
+            (rows-per-chunk (assoc-ref alist 'rows-per-chunk))
+            (chunking? (and total-rows rows-per-chunk))
+            (chunks (if chunking?
+                        (ceiling (/ total-rows rows-per-chunk))
+                        1))
+            (rdf-path (get-keyword-value outputs #:rdf #f))
+            (doc-path (get-keyword-value outputs #:documentation #f)))
        (call-with-target-database
         connection
         (lambda (db)
@@ -559,20 +616,30 @@ The above query results to triples that have the form:
 
           ;; Dumping the actual data
           (when rdf-path
-            (with-output-to-file
-                rdf-path
-              (lambda ()
-                ;; Add the prefixes
-                (for-each
-                 (match-lambda
-                   ((k v)
-                    (begin
-                      (prefix k v))))
-                 prefixes)
-                (newline)
-                (for-each
-                 (lambda (proc)
-                   (proc db #:metadata? table-metadata?))
-                 inputs))
-              #:encoding "UTF-8"))))))))
+            (do ((i 0 (+ i 1)))
+                ((>= i chunks))
+              (let* ((offset (* i (or rows-per-chunk 0)))
+                     (out-file
+                      (if (= chunks 1)
+                          rdf-path
+                          (string-append (path-without-extension rdf-path)
+                                         "." (number->string (+ i 1)) ".ttl"))))
+                (with-output-to-file
+                    out-file
+                  (lambda ()
+                    ;; Add the prefixes
+                    (for-each
+                     (match-lambda
+                       ((k v)
+                        (begin
+                          (prefix k v))))
+                     prefixes)
+                    (newline)
+                    (for-each
+                     (lambda (proc)
+                       (proc db #:metadata? table-metadata?
+                             #:limit rows-per-chunk
+                             #:offset offset))
+                     inputs))
+                  #:encoding "UTF-8"))))))))))
 
diff --git a/transform/strings.scm b/transform/strings.scm
index 51c5ed1..8b4ee45 100644
--- a/transform/strings.scm
+++ b/transform/strings.scm
@@ -2,6 +2,7 @@
   #:use-module (srfi srfi-1)
   #:use-module (srfi srfi-19)
   #:use-module (ice-9 match)
+  #:use-module (ice-9 rdelim)
   #:use-module (ice-9 string-fun)
   #:use-module (ice-9 textual-ports)
   #:export (string-blank?
@@ -17,7 +18,21 @@
             string-capitalize-first
             normalize-string-field
             fix-email-id
-            investigator-attributes->id))
+            blank-p
+            investigator-attributes->id
+            path-without-extension))
+
+(define (blank-p str)
+  (if (string-blank? str) #f str))
+
+(define (path-without-extension path)
+  (let* ((dir (dirname path))                ; directory part
+	 (base (basename path))              ; filename part
+	 (dot-pos (string-rindex base #\.))) ; last dot position
+    (string-append dir "/"		     ; reconstruct path
+		   (if dot-pos
+		       (substring base 0 dot-pos) ; strip extension
+		       base))))
 
 (define (lower-case-and-replace-spaces str)
   (string-map