about summary refs log tree commit diff
diff options
context:
space:
mode:
-rwxr-xr-xexamples/genelist.scm22
-rwxr-xr-xexamples/generif.scm87
-rwxr-xr-xexamples/genotype-datasets.scm6
-rwxr-xr-xexamples/ontology.scm173
-rwxr-xr-xexamples/phenotype-datasets.scm27
-rwxr-xr-xexamples/phenotype.scm7
-rwxr-xr-xexamples/probesets-experiment-metadata.scm110
-rwxr-xr-xexamples/probesets.scm133
-rwxr-xr-xload-rdf.scm10
-rw-r--r--manifest.scm3
-rw-r--r--transform/special-forms.scm356
-rw-r--r--transform/strings.scm24
-rw-r--r--transform/triples.scm3
13 files changed, 687 insertions, 274 deletions
diff --git a/examples/genelist.scm b/examples/genelist.scm
index 5048bf2..ecd5cad 100755
--- a/examples/genelist.scm
+++ b/examples/genelist.scm
@@ -30,34 +30,34 @@
    (gnc:transcript rdfs:domain gnc:gene_symbol)
    (gnt:transcript a owl:ObjectProperty)
    (gnc:transcript rdfs:comments "The gene transcript of this resource")
-   (gnc:ebi_gwas_link rdfs:Class gnc:ResourceLink)
+   (gnc:ebi_gwas_link rdfs:Class gnc:resource_link)
    (gnc:ebi_gwas_link rdfs:label "EBI GWAS")
    (gnc:ebi_gwas_link rdfs:comments "EBI GWAS")
-   (gnc:protein_atlas_link rdfs:Class gnc:ResourceLink)
+   (gnc:protein_atlas_link rdfs:Class gnc:resource_link)
    (gnc:protein_atlas_link rdfs:label "Protein Atlas")
    (gnc:protein_atlas_link rdfs:comments "Human Protein Atlas")
-   (gnc:genemania_link rdfs:Class gnc:ResourceLink)
+   (gnc:genemania_link rdfs:Class gnc:resource_link)
    (gnc:genemania_link rdfs:label "GeneMANIA")
    (gnc:genemania_link rdfs:comments "GeneMANIA")
-   (gnc:gemma_link rdfs:Class gnc:ResourceLink)
+   (gnc:gemma_link rdfs:Class gnc:resource_link)
    (gnc:gemma_link rdfs:label "Gemma")
    (gnc:gemma_link rdfs:comments "Meta-analysis of gene expression data")
-   (gnc:biogps_link rdfs:Class gnc:ResourceLink)
+   (gnc:biogps_link rdfs:Class gnc:resource_link)
    (gnc:biogps_link rdfs:label "BioGPS")
    (gnc:biogps_link rdfs:comments "Expression across many tissues and cell types")
-   (gnc:aba_link rdfs:Class gnc:ResourceLink)
+   (gnc:aba_link rdfs:Class gnc:resource_link)
    (gnc:aba_link rdfs:label "ABA")
    (gnc:aba_link rdfs:comments "Allen Brain Atlas")
-   (gnc:panther_link rdfs:Class gnc:ResourceLink)
+   (gnc:panther_link rdfs:Class gnc:resource_link)
    (gnc:panther_link rdfs:label "PANTHER")
    (gnc:panther_link rdfs:comments "Gene and protein data resources from Celera-ABI")
-   (gnc:panther_link rdfs:Class gnc:ResourceLink)
+   (gnc:panther_link rdfs:Class gnc:resource_link)
    (gnc:panther_link rdfs:label "STRING")
    (gnc:panther_link rdfs:comments "Protein interactions: known and inferred")
-   (gnc:gtex_link rdfs:Class gnc:ResourceLink)
+   (gnc:gtex_link rdfs:Class gnc:resource_link)
    (gnc:gtex_link rdfs:label "GTEx Portal")
    (gnc:gtex_link rdfs:comments "GTEx Portal")
-   (gnc:rgd_link rdfs:Class gnc:ResourceLink)
+   (gnc:rgd_link rdfs:Class gnc:resource_link)
    (gnc:rgd_link rdfs:label "Rat Genome DB")
    (gnc:rgd_link rdfs:comments "Rat Genome DB")
    (gnc:has_kg_id rdfs:domain gnc:gene_symbol)
@@ -279,7 +279,7 @@
                         "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
                         "http://www.pantherdb.org/genes/geneList.do?searchType=basic&fieldName=all&organism=all&listType=1&fieldValue="
                         (string-trim-both symbol)
-                        "a gnc:PantherLink"))
+                        "a gnc:panther_link"))
                "")))
     (set dct:references
          (let ((symbol (string-trim-both (field GeneList_rn33 geneSymbol))))
diff --git a/examples/generif.scm b/examples/generif.scm
index a4a2e4b..a8a8460 100755
--- a/examples/generif.scm
+++ b/examples/generif.scm
@@ -20,14 +20,16 @@
            (left-join Species "ON Species.SpeciesId = GeneRIF.SpeciesId")
            (left-join GeneRIFXRef "ON GeneRIFXRef.GeneRIFId = GeneRIF.Id")
            (left-join GeneCategory "ON GeneRIFXRef.GeneCategoryId = GeneCategory.Id"))
-          "WHERE GeneRIF.display > 0 AND GeneRIF.comment IS NOT NULL
-GROUP BY GeneRIF.Id, GeneRIF.versionId, GeneRIF.symbol")
+          "WHERE GeneRIF.display > 0 AND GeneRIF.comment IS NOT NULL GROUP BY GeneRIF.Id, GeneRIF.versionId, GeneRIF.symbol")
   (triples
-      (string->identifier
-       "wiki" (format #f "~a_~a"
-                      (field GeneRIF Id)
-                      (field GeneRIF versionId))
-       #:separator "_")
+      (string->identifier ""
+                          (gn-uuid (format #f "~a.~a.~a?type=wikii"
+                                           (field GeneRIF Id)
+                                           (field GeneRIF versionId)
+                                           (field GeneRIF createtime)))
+                          #:url-char #\-)
+    (set dct:identifier (gn-uuid (format #f "~a?type=wiki"
+                                         (field GeneRIF Id))))
     (set rdfs:label (string->symbol
                      (format #f "'~a'@en"
                              (replace-substrings
@@ -56,8 +58,6 @@ GROUP BY GeneRIF.Id, GeneRIF.versionId, GeneRIF.symbol")
     ;;        ((? string-blank? mbox) "")
     ;;        (mbox (string->symbol
     ;;               (format #f "<~a>" mbox)))))
-    (set dct:identifier (annotate-field (format #f "~s" (field GeneRIF Id))
-                                        '^^xsd:integer))
     (set foaf:homepage
          (match (sanitize-rdf-string (field GeneRIF weburl))
            ((? string-blank? homepage) "")
@@ -78,44 +78,32 @@ GROUP BY GeneRIF.Id, GeneRIF.versionId, GeneRIF.symbol")
            (left-join Species "USING (SpeciesId)")))
   (triples
       (string->identifier
-       "rif" (format #f "~a_~a_~a_~a"
-                     (field GeneRIF_BASIC GeneId)
-                     (field GeneRIF_BASIC PubMed_ID)
-                     (field ("DATE_FORMAT(createtime, '%Y-%m-%dT%T')" CreateTime))
-                     (field GeneRIF_BASIC VersionId))
-       #:separator "_")
-    (set rdf:type
-         (let* ((comment (format #f "'~a'@en"
-                                 (replace-substrings
-                                  (sanitize-rdf-string
-                                   (field GeneRIF_BASIC comment))
-                                  '(("\\" . "\\\\")
-                                    ("\n" . "\\n")
-                                    ("\r" . "\\r")
-                                    ("'" . "\\'")))))
-                (create-time (format #f "~s^^xsd:datetime"
-                                     (field
-                                      ("CAST(createtime AS CHAR)" EntryCreateTime))))
-                (symbol (field GeneRIF_BASIC symbol))
-                (species (string->identifier "" (remap-species-identifiers (field Species Fullname))))
-                (gene-id (field GeneRIF_BASIC GeneId))
-                (taxon-id (field GeneRIF_BASIC TaxID TaxonomicId))
-                (pmid (field GeneRIF_BASIC PubMed_ID))
-                (version-id (field GeneRIF_BASIC versionId)))
-           (string->symbol
-            (string-append
-             (format #f "gnc:ncbi_wiki_entry ;\n")
-             (format #f "\trdfs:label ~a ;\n" comment)
-             (format #f "\tgnt:has_species ~a ;\n" species)
-             (format #f "\tgnt:symbol ~s ;\n" symbol)
-             (format #f "\tgnt:has_gene_id generif:~a ;\n" gene-id)
-             (match taxon-id
-               ((? number? x)
-                (format #f "\tskos:notation taxon:~a ;\n" taxon-id))
-               (else ""))
-             (format #f "\tdct:hasVersion \"~a\"^^xsd:integer ;\n" version-id)
-             (format #f "\tdct:references pubmed:~a ;\n" pmid)
-             (format #f "\tdct:created ~a" create-time)))))))
+       "" (gn-uuid (format #f "~a_~a_~a_~a"
+                           (field GeneRIF_BASIC GeneId)
+                           (field GeneRIF_BASIC PubMed_ID)
+                           (field ("DATE_FORMAT(createtime, '%Y-%m-%dT%T')" CreateTime))
+                           (field GeneRIF_BASIC VersionId)))
+       #:url-char #\-)
+    (set rdf:type 'gnc:ncbi_wiki_entry)
+    (set rdfs:label (format #f "'~a'@en"
+                            (replace-substrings
+                             (sanitize-rdf-string
+                              (field GeneRIF_BASIC comment))
+                             '(("\\" . "\\\\")
+                               ("\n" . "\\n")
+                               ("\r" . "\\r")
+                               ("'" . "\\'")))))
+    (set gnt:symbol (field GeneRIF_BASIC symbol))
+    (set gnt:has_species (string->identifier "" (remap-species-identifiers (field Species Fullname))))
+    (set skos:notation (ontology 'taxon: (field GeneRIF_BASIC TaxID TaxonomicId)))
+    (set dct:hasVersion (annotate-field (field GeneRIF_BASIC versionId) '^^xsd:integer))
+    (set gnt:has_gene_id (ontology 'generif: (field GeneRIF_BASIC GeneId)))
+    (set dct:references (ontology 'pubmed: (field GeneRIF_BASIC PubMed_ID)))
+    (set dct:created
+         (string->symbol
+          (format #f "~s^^xsd:datetime"
+                  (field
+                   ("CAST(createtime AS CHAR)" EntryCreateTime)))))))
 
 
 
@@ -152,9 +140,8 @@ GROUP BY GeneRIF.Id, GeneRIF.versionId, GeneRIF.symbol")
       ("owl:" "<http://www.w3.org/2002/07/owl#>")))
    (inputs
     (list
-     ;; gn-genewiki-entries
-     ncbi-genewiki-entries
-     ))
+     gn-genewiki-entries
+     ncbi-genewiki-entries))
    (outputs
     `(#:documentation ,documentation
       #:rdf ,output))))
diff --git a/examples/genotype-datasets.scm b/examples/genotype-datasets.scm
index ebe2349..38d524b 100755
--- a/examples/genotype-datasets.scm
+++ b/examples/genotype-datasets.scm
@@ -18,7 +18,7 @@
   (tables (Species
            (inner-join InbredSet "ON InbredSet.SpeciesId = Species.Id")
            (inner-join GenoFreeze "ON GenoFreeze.InbredSetId = InbredSet.Id"))
-          "WHERE GenoFreeze.public > 0 AND Species.Name != 'monkey' GROUP BY Species.Name, GenoFreeze.ShortName")
+          "WHERE GenoFreeze.public > 0 AND GenoFreeze.confidentiality < 1 AND Species.Name != 'monkey' GROUP BY Species.Name, GenoFreeze.ShortName")
   (triples (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_")
     (multiset gnt:has_genotype_data
               (map (cut string->identifier "dataset" <> #:separator "_")
@@ -31,7 +31,7 @@
   (tables (GenoFreeze
            (inner-join InbredSet "ON InbredSet.Id = GenoFreeze.InbredSetId")
            (inner-join Species "ON InbredSet.SpeciesId = Species.Id"))
-          "WHERE GenoFreeze.public > 0 AND Species.Name != 'monkey'")
+          "WHERE GenoFreeze.public > 0 AND GenoFreeze.confidentiality < 1 AND Species.Name != 'monkey'")
   (triples (string->identifier "dataset" (field GenoFreeze Name) #:separator "_")
     (set gnt:has_strain (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_"))
     (set dct:created (annotate-field (field GenoFreeze CreateTime) '^^xsd:datetime))))
@@ -41,7 +41,7 @@
            (inner-join InbredSet "ON InbredSet.Id = GenoFreeze.InbredSetId")
            (inner-join Species "ON InbredSet.SpeciesId = Species.Id")
            (inner-join Geno "ON Geno.SpeciesId = Species.Id"))
-          "WHERE GenoFreeze.public > 0 AND Species.Name != 'monkey' GROUP BY GenoFreeze.Name")
+          "WHERE GenoFreeze.public > 0 AND GenoFreeze.confidentiality < 1 AND Species.Name != 'monkey' GROUP BY GenoFreeze.Name")
   (triples (string->identifier "dataset" (field GenoFreeze Name) #:separator "_")
     (set gnt:has_marker_count
          (string->symbol
diff --git a/examples/ontology.scm b/examples/ontology.scm
index f2b54cc..7ea9c4f 100755
--- a/examples/ontology.scm
+++ b/examples/ontology.scm
@@ -25,6 +25,7 @@
       (prefix "gnc:" "<http://rdf.genenetwork.org/v1/category/>")
       (prefix "gnt:" "<http://rdf.genenetwork.org/v1/term/>")
       (prefix "obo:" "<http://purl.obolibrary.org/obo/>")
+      (prefix "bfo:" "<http://purl.obolibrary.org/obo/BFO_>")
       (prefix "sdmx-measure:" "<http://purl.org/linked-data/sdmx/2009/measure#>")
       (prefix "skos:" "<http://www.w3.org/2004/02/skos/core#>")
       (prefix "rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>")
@@ -35,6 +36,28 @@
       (prefix "pubmed:" "<http://rdf.ncbi.nlm.nih.gov/pubmed/>")
       (prefix "schema:" "<https://schema.org/>")
       (newline)
+      (triple 'gnt:has_trait_page 'a 'owl:ObjectProperty)
+      (triple 'gnt:has_trait_page 'rdfs:label "has traits page")
+      (triple 'gnt:has_trait_page 'rdfs:comment "Links a trait resource to its GeneNetwork web interface page for interactive exploration.")
+      (triple 'gnt:has_trait_page 'skos:definition "Provides a resolvable HTTP link to the GeneNetwork trait interface for a given phenotype trait or dataset entry.")
+      (triple 'gnt:has_trait_page 'rdfs:domain 'gnc:phenotype_trait)
+      (triple 'gnt:has_trait_page 'rdfs:range 'gnc:resource_link)
+      (triple 'gnt:has_trait_page 'schema:domainIncludes 'gnc:phenotype)
+      (triple 'gnt:has_trait_page 'schema:domainIncludes 'dcat:Dataset)
+      (triple 'gnt:has_trait_page 'rdfs:subPropertyOf 'schema:url)
+      ;; Minimal BFO bridge for GN terms.
+      (triple 'gnc:resource_entity 'a 'owl:Class)
+      (triple 'gnc:resource_entity 'rdfs:label "GeneNetwork resource entity")
+      (triple 'gnc:resource_entity 'rdfs:subClassOf 'bfo:0000001)
+      (triple 'gnc:material_resource 'a 'owl:Class)
+      (triple 'gnc:material_resource 'rdfs:label "GeneNetwork material resource")
+      (triple 'gnc:material_resource 'rdfs:subClassOf 'bfo:0000040)
+      (triple 'gnc:material_resource 'rdfs:subClassOf 'gnc:resource_entity)
+      (triple 'gnc:information_resource 'a 'owl:Class)
+      (triple 'gnc:information_resource 'rdfs:label "GeneNetwork information resource")
+      (triple 'gnc:information_resource 'rdfs:subClassOf 'bfo:0000031)
+      (triple 'gnc:information_resource 'rdfs:subClassOf 'gnc:resource_entity)
+
       (triple 'gnc:population_category 'a 'xkos:ClassificationLevel)
       (triple 'gnc:population_category 'rdfs:label "Population Category")
       (triple 'gnc:population_category 'skos:inScheme 'gnc:resource_classification_scheme)
@@ -73,6 +96,10 @@
       (triple 'gnc:taxonomic_family 'skos:prefLabel "Family")
       (triple 'gnc:taxonomic_family 'xkos:depth "1")
       (triple 'gnc:taxonomic_family 'xkos:nextLevel 'gnc:species)
+      (triple 'gnc:strain 'a 'owl:Class)
+      (triple 'gnc:strain 'rdfs:subClassOf 'gnc:material_resource)
+      (triple 'gnc:mapping_method 'a 'skos:ConceptScheme)
+      (triple 'gnc:avg_method 'a 'skos:ConceptScheme)
       (triple 'gnt:assigned_species 'a 'owl:ObjectProperty)
       (triple 'gnt:assigned_species 'rdfs:domain 'gnc:set)
       (triple 'gnt:assigned_species 'rdfs:label "These families have been assigned to these species")
@@ -82,7 +109,6 @@
       (triple 'gnt:genetic_type 'rdfs:range 'xsd:string)
       (triple 'gnt:genetic_type 'skos:definition "Describes the genetic architecture of a resource set (e.g., intercross, riset).")
       (triple 'gnt:has_family_order_id 'a 'owl:DatatypeProperty)
-      (triple 'gnt:has_family_order_id 'a 'owl:DatatypeProperty)
       (triple 'gnt:has_family_order_id 'rdfs:range 'xsd:integer)
       (triple 'gnt:has_set_code 'a 'owl:DatatypeProperty)
       (triple 'gnt:has_set_code 'rdfs:domain 'gnc:set)
@@ -118,12 +144,27 @@
       (triple 'gnt:uses_mapping_method 'rdfs:domain 'gnc:set)
       (triple 'gnt:uses_mapping_method 'rdfs:label "mapping method")
       (triple 'gnt:uses_mapping_method 'rdfs:range 'gnc:mapping_method)
+      (triple 'gnt:has_reference_population 'a 'owl:ObjectProperty)
+      (triple 'gnt:has_reference_population 'schema:domainIncludes 'gnc:set)
+      (triple 'gnt:has_reference_population 'schema:domainIncludes 'gnc:population_category)
+      (triple 'gnt:has_reference_population 'rdfs:range 'gnc:reference_population)
+      (triple 'gnt:has_population_order_id 'a 'owl:DatatypeProperty)
+      (triple 'gnt:has_population_order_id 'rdfs:domain 'gnc:reference_population)
+      (triple 'gnt:has_population_order_id 'rdfs:range 'xsd:integer)
+      (triple 'gnt:alias 'a 'owl:DatatypeProperty)
+      (triple 'gnt:alias 'rdfs:domain 'gnc:strain)
+      (triple 'gnt:gene 'a 'owl:DatatypeProperty)
+      (triple 'gnt:gene 'rdfs:domain 'gnc:gene_symbol)
 
       ;; Describing Datasets
       (triple 'gnc:molecular_trait 'a 'owl:Class)
       (triple 'gnc:molecular_trait 'a 'skos:Concept)
       (triple 'gnc:molecular_trait 'rdfs:label "Molecular Trait.   This describes a melecular trait of a given species.  We combine the species name and the tissue name in order to differentiate the traits across different inbredset groups.")
       (triple 'gnc:molecular_trait 'rdfs:subClassOf 'obo:UBERON_0000479)
+      (triple 'gnc:molecular_trait 'rdfs:subClassOf 'gnc:information_resource)
+      (triple 'gnc:molecular_trait_metadata 'a 'owl:Class)
+      (triple 'gnc:molecular_trait_metadata 'rdfs:subClassOf 'gnc:information_resource)
+      (triple 'gnc:gene_chip 'a 'skos:ConceptScheme)
       (triple 'gnt:has_case_info 'a 'owl:ObjectProperty)
       (triple 'gnt:has_case_info 'rdfs:comment "Information about the cases used in this platform")
       (triple 'gnt:has_case_info 'rdfs:domain 'dcat:Dataset)
@@ -163,6 +204,9 @@
       (triple 'gnt:has_phenotype_data 'rdfs:label "this resources has this phenotype data.")
       (triple 'gnt:has_phenotype_data 'rdfs:range 'dcat:Dataset)
       (triple 'gnt:has_phenotype_data 'rdfs:subPropertyOf 'dct:relation)
+      (triple 'gnt:has_phenotype_trait 'a 'owl:ObjectProperty)
+      (triple 'gnt:has_phenotype_trait 'rdfs:domain 'dcat:Dataset)
+      (triple 'gnt:has_phenotype_trait 'rdfs:range 'gnc:phenotype_trait)
       (triple 'gnt:has_platform_info 'a 'owl:ObjectProperty)
       (triple 'gnt:has_platform_info 'rdfs:comment "Information about the platform that was used with this dataset")
       (triple 'gnt:has_platform_info 'rdfs:domain 'dcat:Dataset)
@@ -194,21 +238,32 @@
       (triple 'gnt:uses_normalization_method 'rdfs:domain 'dcat:Dataset)
       (triple 'gnt:uses_normalization_method 'rdfs:label "Averaging method used for the molecular traits in this dataset.")
       (triple 'gnt:uses_normalization_method 'rdfs:range 'gnc:avg_method)
+      (triple 'gnt:has_probeset 'a 'owl:ObjectProperty)
+      (triple 'gnt:has_probeset 'rdfs:domain 'gnc:molecular_trait_metadata)
+      (triple 'gnt:has_probeset 'rdfs:range 'gnc:probeset)
 
       ;; Describing phenotypes
       (triple 'gnc:phenotype 'a 'owl:Class)
       (triple 'gnc:phenotype 'a 'skos:Concept)
       (triple 'gnc:phenotype 'rdfs:label "A phenotype.")
+      (triple 'gnc:phenotype 'rdfs:subClassOf 'gnc:information_resource)
       (triple 'gnc:phenotype_trait 'a 'owl:Class)
       (triple 'gnc:phenotype_trait 'a 'skos:Concept)
       (triple 'gnc:phenotype_trait 'rdfs:label "A phenotype trait.")
-      (triple 'gnt:abbreviation 'a 'owl:ObjectProperty)
+      (triple 'gnc:phenotype_trait 'rdfs:subClassOf 'gnc:information_resource)
+      (triple 'gnt:abbreviation 'a 'owl:DatatypeProperty)
       (triple 'gnt:abbreviation 'rdfs:domain 'gnc:phenotype)
       (triple 'gnt:abbreviation 'skos:definition "The abbreviation used for this resource")
+      (triple 'gnt:has_phenotype 'a 'owl:ObjectProperty)
+      (triple 'gnt:has_phenotype 'rdfs:domain 'gnc:phenotype_trait)
+      (triple 'gnt:has_phenotype 'rdfs:range 'gnc:phenotype)
       (triple 'gnt:additive 'rdfs:domain 'gnc:phenotype)
       (triple 'gnt:additive 'rdfs:range 'xsd:double)
-      (triple 'gnt:lab_code 'a 'owl:ObjectProperty)
+      (triple 'gnt:lab_code 'a 'owl:DatatypeProperty)
       (triple 'gnt:lab_code 'rdfs:domain 'gnc:phenotype)
+      (triple 'gnt:has_lab_code 'a 'owl:DatatypeProperty)
+      (triple 'gnt:has_lab_code 'rdfs:domain 'gnc:phenotype)
+      (triple 'gnt:has_lab_code 'rdfs:subPropertyOf 'gnt:lab_code)
       (triple 'gnt:locus 'a 'qb:MeasureProperty)
       (triple 'gnt:locus 'a 'rdf:Property)
       (triple 'gnt:locus 'rdfs:domain 'gnc:phenotype)
@@ -226,9 +281,24 @@
       (triple 'gnt:mean 'rdfs:domain 'gnc:phenotype)
       (triple 'gnt:mean 'rdfs:range 'xsd:double)
       (triple 'gnt:mean 'rdfs:subPropertyOf 'sdmx-measure:obsValue)
+      (triple 'gnt:se 'a 'qb:MeasureProperty)
+      (triple 'gnt:se 'a 'rdf:Property)
+      (triple 'gnt:se 'rdfs:domain 'gnc:molecular_trait_metadata)
+      (triple 'gnt:se 'rdfs:range 'xsd:double)
+      (triple 'gnt:se 'rdfs:subPropertyOf 'sdmx-measure:obsValue)
+      (triple 'gnt:pvalue 'a 'qb:MeasureProperty)
+      (triple 'gnt:pvalue 'a 'rdf:Property)
+      (triple 'gnt:pvalue 'rdfs:domain 'gnc:molecular_trait_metadata)
+      (triple 'gnt:pvalue 'rdfs:range 'xsd:double)
+      (triple 'gnt:pvalue 'rdfs:subPropertyOf 'sdmx-measure:obsValue)
+      (triple 'gnt:h2 'a 'qb:MeasureProperty)
+      (triple 'gnt:h2 'a 'rdf:Property)
+      (triple 'gnt:h2 'rdfs:domain 'gnc:molecular_trait_metadata)
+      (triple 'gnt:h2 'rdfs:range 'xsd:double)
+      (triple 'gnt:h2 'rdfs:subPropertyOf 'sdmx-measure:obsValue)
       (triple 'gnt:sequence 'rdfs:domain 'gnc:phenotype)
       (triple 'gnt:sequence 'rdfs:range 'xsd:integer)
-      (triple 'gnt:submitter 'a 'owl:ObjectProperty)
+      (triple 'gnt:submitter 'a 'owl:DatatypeProperty)
       (triple 'gnt:submitter 'rdfs:domain 'gnc:phenotype)
       (triple 'gnt:submitter 'skos:definition "A person who submitted this resource to GN")
       (triple 'gnt:submitter 'skos:definition "A person who submitted this resource to GN")
@@ -240,6 +310,9 @@
       (triple 'gnc:dna_marker 'a 'owl:Class)
       (triple 'gnc:dna_marker 'a 'skos:Concept)
       (triple 'gnc:dna_marker 'rdfs:label "A DNA Marker or SNP")
+      (triple 'gnc:dna_marker 'rdfs:subClassOf 'gnc:material_resource)
+      (triple 'gnc:marker 'a 'owl:Class)
+      (triple 'gnc:marker 'rdfs:subClassOf 'gnc:dna_marker)
       (triple 'gnt:has_genotype_files 'rdfs:label "This resource has these genotype files")
       (triple 'gnt:has_genotype_files 'rdfs:domain 'dcat:Dataset)
       (triple 'gnt:has_genotype_data 'rdf:type 'owl:ObjectProperty)
@@ -248,16 +321,19 @@
       (triple 'gnt:has_genotype_data 'rdfs:domain 'gnc:set)
       (triple 'gnt:has_genotype_data 'rdfs:range 'dcat:Dataset)
       (triple 'gnt:has_genotype_data 'rdfs:subPropertyOf 'dct:relation)
-      (triple 'gnt:has_marker_count 'rdf:type 'owl:ObjectProperty)
+      (triple 'gnt:has_marker_count 'rdf:type 'owl:DatatypeProperty)
       (triple 'gnt:has_marker_count 'rdfs:label "this resources has N number of dna markers/SNPs.")
-      (triple 'gnt:has_marker_count 'rdfs:domain 'xsd:integer)
-      (triple 'gnt:has_marker_count 'rdfs:range 'dcat:Dataset)
+      (triple 'gnt:has_marker_count 'rdfs:domain 'dcat:Dataset)
+      (triple 'gnt:has_marker_count 'rdfs:range 'xsd:integer)
       (triple 'gnt:chr 'a 'qb:MeasureProperty)
       (triple 'gnt:chr 'a 'rdf:Property)
       (triple 'gnt:chr 'rdfs:label "Chromosome")
       (triple 'gnt:chr 'rdfs:domain 'gnc:marker)
       (triple 'gnt:chr 'rdfs:range 'rdfs:Literal)
       (triple 'gnt:chr 'rdfs:subPropertyOf 'sdmx-measure:obsValue)
+      (triple 'gnt:chromosome 'a 'owl:DatatypeProperty)
+      (triple 'gnt:chromosome 'rdfs:subPropertyOf 'gnt:chr)
+      (triple 'gnt:chromosome 'rdfs:range 'rdfs:Literal)
       (triple 'gnt:mb 'rdfs:label "Megabase")
       (triple 'gnt:mb 'rdfs:domain 'gnc:marker)
       (triple 'gnt:mb 'rdfs:range 'rdfs:Literal)
@@ -270,31 +346,39 @@
       (triple 'gnt:source 'rdfs:domain 'gnc:marker)
       (triple 'gnt:source 'rdfs:range 'rdfs:Literal)
       (triple 'gnt:source 'rdfs:subPropertyOf 'sdmx-measure:obsValue)
+      (triple 'gnc:nucleotide 'a 'owl:Class)
+      (triple 'gnc:nucleotide 'rdfs:subClassOf 'gnc:material_resource)
+      (triple 'gnt:has_sequence 'a 'owl:DatatypeProperty)
+      (triple 'gnt:has_sequence 'rdfs:domain 'gnc:nucleotide)
+      (triple 'gnt:has_sequence 'rdfs:range 'xsd:string)
 
       ;; Probesets
       (triple 'gnc:probeset 'a 'owl:Class)
       (triple 'gnc:probeset 'a 'skos:Concept)
       (triple 'gnc:probeset 'rdfs:label "A probeset")
+      (triple 'gnc:probeset 'rdfs:subClassOf 'gnc:material_resource)
       (triple 'gnt:has_target_id 'a 'owl:ObjectProperty)
       (triple 'gnt:has_target_id 'rdfs:label "The target id for this probeset")
       (triple 'gnt:has_target_id 'rdfs:domain 'gnc:probeset)
-      (triple 'gnt:symbol 'a 'owl:ObjectProperty)
+      (triple 'gnt:symbol 'a 'owl:DatatypeProperty)
       (triple 'gnt:symbol 'rdfs:domain 'gnc:probeset)
-      (triple 'gnt:targets_region 'a 'owl:ObjectProperty)
+      (triple 'gnt:targets_region 'a 'owl:DatatypeProperty)
       (triple 'gnt:targets_region 'rdfs:label "The target region")
       (triple 'gnt:targets_region 'rdfs:domain 'gnc:probeset)
+      (triple 'gnt:mb_mm8 'a 'owl:DatatypeProperty)
       (triple 'gnt:mb_mm8 'rdfs:domain 'gnc:probeset)
-      (triple 'gnt:has_specificity 'a 'owl:ObjectProperty)
+      (triple 'gnt:mb_mm8 'rdfs:range 'xsd:double)
+      (triple 'gnt:has_specificity 'a 'owl:DatatypeProperty)
       (triple 'gnt:has_specificity 'rdfs:domain 'gnc:probeset)
-      (triple 'gnt:has_blat_score 'a 'owl:ObjectProperty)
+      (triple 'gnt:has_blat_score 'a 'owl:DatatypeProperty)
       (triple 'gnt:has_blat_score 'rdfs:domain 'gnc:probeset)
-      (triple 'gnt:has_blat_mb_start 'a 'owl:ObjectProperty)
+      (triple 'gnt:has_blat_mb_start 'a 'owl:DatatypeProperty)
       (triple 'gnt:has_blat_mb_start 'rdfs:domain 'gnc:probeset)
-      (triple 'gnt:has_blat_mb_end 'a 'owl:ObjectProperty)
+      (triple 'gnt:has_blat_mb_end 'a 'owl:DatatypeProperty)
       (triple 'gnt:has_blat_mb_end 'rdfs:domain 'gnc:probeset)
-      (triple 'gnt:has_blat_seq 'a 'owl:ObjectProperty)
+      (triple 'gnt:has_blat_seq 'a 'owl:DatatypeProperty)
       (triple 'gnt:has_blat_seq 'rdfs:domain 'gnc:probeset)
-      (triple 'gnt:has_target_seq 'a 'owl:ObjectProperty)
+      (triple 'gnt:has_target_seq 'a 'owl:DatatypeProperty)
       (triple 'gnt:has_target_seq 'rdfs:domain 'gnc:probeset)
       (triple 'gnt:has_homologene_id 'a 'owl:ObjectProperty)
       (triple 'gnt:has_homologene_id 'rdfs:domain 'gnc:probeset)
@@ -310,15 +394,70 @@
       (triple 'gnt:has_chebi_id 'rdfs:domain 'gnc:probeset)
 
       ;; RIF
+      (triple 'gnc:gene 'a 'rdfs:Class)
+      (triple 'gnc:gene 'rdfs:subClassOf 'gnc:material_resource)
+      (triple 'gnc:gene_symbol 'a 'rdfs:Class)
+      (triple 'gnc:gene_symbol 'rdfs:subClassOf 'gnc:information_resource)
+      (triple 'gnc:transcript 'a 'rdfs:Class)
+      (triple 'gnc:transcript 'rdfs:subClassOf 'gnc:information_resource)
+      (triple 'gnc:resource_link 'a 'rdfs:Class)
+      (triple 'gnc:aba_link 'rdfs:subClassOf 'gnc:resource_link)
+      (triple 'gnc:biogps_link 'rdfs:subClassOf 'gnc:resource_link)
+      (triple 'gnc:ebi_gwas_link 'rdfs:subClassOf 'gnc:resource_link)
+      (triple 'gnc:gemma_link 'rdfs:subClassOf 'gnc:resource_link)
+      (triple 'gnc:genemania_link 'rdfs:subClassOf 'gnc:resource_link)
+      (triple 'gnc:gtex_link 'rdfs:subClassOf 'gnc:resource_link)
+      (triple 'gnc:panther_link 'rdfs:subClassOf 'gnc:resource_link)
+      (triple 'gnc:protein_atlas_link 'rdfs:subClassOf 'gnc:resource_link)
+      (triple 'gnc:rgd_link 'rdfs:subClassOf 'gnc:resource_link)
+      (triple 'gnc:has_kg_id 'a 'owl:DatatypeProperty)
+      (triple 'gnc:has_kg_id 'rdfs:domain 'gnc:gene)
+      (triple 'gnc:has_unigen_id 'a 'owl:DatatypeProperty)
+      (triple 'gnc:has_unigen_id 'rdfs:domain 'gnc:gene)
+      (triple 'gnc:has_protein_id 'a 'owl:DatatypeProperty)
+      (triple 'gnc:has_protein_id 'rdfs:domain 'gnc:gene)
+      (triple 'gnc:has_align_id 'a 'owl:DatatypeProperty)
+      (triple 'gnc:has_align_id 'rdfs:domain 'gnc:gene)
       (triple 'gnc:gene_wiki_entry 'a 'rdfs:Class)
+      (triple 'gnc:gene_wiki_entry 'rdfs:subClassOf 'gnc:information_resource)
       (triple 'gnc:gn_wiki_entry 'rdfs:subClassOf 'gnc:gene_wiki_entry)
-      (triple 'gnt:initial 'a 'owl:ObjectProperty)
+      (triple 'gnt:initial 'a 'owl:DatatypeProperty)
       (triple 'gnt:initial 'rdfs:domain 'gnc:gene_wiki_entry)
       (triple 'gnt:initial 'skos:definition "Optional user or project code or your initials")
-      (triple 'gnt:reason 'a 'owl:ObjectProperty)
+      (triple 'gnt:reason 'a 'owl:DatatypeProperty)
       (triple 'gnt:reason 'rdfs:domain 'gnc:gene_wiki_entry)
       (triple 'gnt:reason 'skos:definition "The reason why this resource was modified")
+      (triple 'gnt:belongs_to_category 'a 'owl:DatatypeProperty)
+      (triple 'gnt:belongs_to_category 'rdfs:domain 'gnc:gene_wiki_entry)
+      (triple 'gnt:has_gene_id 'a 'owl:ObjectProperty)
+      (triple 'gnt:has_gene_id 'schema:domainIncludes 'gnc:gene)
+      (triple 'gnt:has_gene_id 'schema:domainIncludes 'gnc:ncbi_wiki_entry)
+      (triple 'gnt:gene_symbol 'a 'owl:DatatypeProperty)
+      (triple 'gnt:gene_symbol 'rdfs:domain 'gnc:gene)
       (triple 'gnc:gn_wiki_entry 'rdfs:comment "Represents GeneRIF Entries entered from GeneNetwork")
       (triple 'gnt:gene_symbol 'rdfs:domain 'gnc:gn_wiki_entry)
+      (triple 'gnt:transcript 'a 'owl:ObjectProperty)
+      (triple 'gnt:transcript 'rdfs:domain 'gnc:gene)
+      (triple 'gnt:transcript 'rdfs:range 'gnc:transcript)
+      (triple 'gnt:strand 'a 'owl:DatatypeProperty)
+      (triple 'gnt:strand 'rdfs:domain 'gnc:gene)
+      (triple 'gnt:strand 'rdfs:range 'xsd:string)
+      (triple 'gnt:tx_start 'a 'owl:DatatypeProperty)
+      (triple 'gnt:tx_start 'rdfs:domain 'gnc:gene)
+      (triple 'gnt:tx_start 'rdfs:range 'xsd:double)
+      (triple 'gnt:tx_end 'a 'owl:DatatypeProperty)
+      (triple 'gnt:tx_end 'rdfs:domain 'gnc:gene)
+      (triple 'gnt:tx_end 'rdfs:range 'xsd:double)
+      (triple 'gnt:has_align_id 'a 'owl:DatatypeProperty)
+      (triple 'gnt:has_align_id 'rdfs:domain 'gnc:gene)
+      (triple 'gnt:has_protein_id 'a 'owl:DatatypeProperty)
+      (triple 'gnt:has_protein_id 'rdfs:domain 'gnc:gene)
+      (triple 'gnt:has_rgd_id 'a 'owl:DatatypeProperty)
+      (triple 'gnt:has_rgd_id 'rdfs:domain 'gnc:gene)
+      (triple 'gnt:has_geo_series_id 'a 'owl:ObjectProperty)
+      (triple 'gnt:has_geo_series_id 'rdfs:domain 'skos:Concept)
+      (triple 'gnt:has_go_tree_value 'a 'owl:DatatypeProperty)
+      (triple 'gnt:has_go_tree_value 'rdfs:domain 'skos:Concept)
+      (triple 'gnt:has_go_tree_value 'rdfs:range 'xsd:string)
       (triple 'gnc:ncbi_wiki_entry 'rdfs:subClassOf 'gnc:gene_wiki_entry)
       (triple 'gnc:ncbi_wiki_entry 'rdfs:comment "Represents GeneRIF Entries obtained from NCBI"))))
diff --git a/examples/phenotype-datasets.scm b/examples/phenotype-datasets.scm
index 4819627..c005621 100755
--- a/examples/phenotype-datasets.scm
+++ b/examples/phenotype-datasets.scm
@@ -18,7 +18,7 @@
   (tables (Species
            (inner-join InbredSet "ON InbredSet.SpeciesId = Species.Id")
            (inner-join PublishFreeze "ON PublishFreeze.InbredSetId = InbredSet.Id"))
-          "WHERE PublishFreeze.public > 0 AND Species.Name != 'monkey' GROUP BY Species.Name, PublishFreeze.ShortName")
+          "WHERE PublishFreeze.public > 0 AND PublishFreeze.confidentiality < 1 AND Species.Name != 'monkey' GROUP BY Species.Name, PublishFreeze.ShortName")
   (triples (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_")
     (multiset gnt:has_phenotype_data
               (map (cut string->identifier "dataset" <> #:separator "_")
@@ -27,27 +27,6 @@
                             dataset_name))
                     #\,)))))
 
-(define-transformer gn:dataset->gn:set
-  (tables (Datasets
-           (inner-join InfoFiles "ON InfoFiles.DatasetId = Datasets.DatasetId")
-           (inner-join InbredSet "ON InbredSet.Id = InfoFiles.InbredSetId")
-           (inner-join PublishFreeze "ON PublishFreeze.InbredSetId = InbredSet.Id"))
-          "WHERE PublishFreeze.public > 0 GROUP BY Datasets.DatasetId")
-  (triples (string->identifier "dataset" (field PublishFreeze Name) #:separator "_")
-    (set gnt:has_strain (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_"))))
-
-(define-transformer gn:dataset->metadata
-  (tables (PublishXRef
-           (inner-join InbredSet "ON InbredSet.InbredSetId = PublishXRef.InbredSetId")
-           (inner-join Species "ON InbredSet.SpeciesId = Species.Id")
-           (inner-join PublishFreeze "ON PublishFreeze.InbredSetId = InbredSet.Id")
-           (inner-join Publication "ON Publication.Id = PublishXRef.PublicationId")
-           (inner-join Phenotype "ON Phenotype.Id = PublishXRef.PhenotypeId"))
-          "WHERE InbredSet.public > 0 GROUP BY Species.Name, PublishFreeze.Name")
-  (triples (string->identifier "dataset" (field PublishFreeze Name) #:separator "_")
-    (set dct:created (annotate-field (field PublishFreeze CreateTime) '^^xsd:datetime))
-    (set gnt:has_strain (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_"))))
-
 (define-transformer gn:dataset->gn:trait
   (tables (PublishXRef
            (inner-join InbredSet "ON InbredSet.InbredSetId = PublishXRef.InbredSetId")
@@ -55,7 +34,7 @@
            (inner-join PublishFreeze "ON PublishFreeze.InbredSetId = InbredSet.Id")
            (inner-join Publication "ON Publication.Id = PublishXRef.PublicationId")
            (inner-join Phenotype "ON Phenotype.Id = PublishXRef.PhenotypeId"))
-          "WHERE InbredSet.public > 0")
+          "WHERE InbredSet.public > 0 AND PublishFreeze.public > 0 AND PublishFreeze.confidentiality < 1")
   (triples (string->identifier "dataset" (field PublishFreeze Name) #:separator "_")
     (set gnt:has_phenotype_trait
          (let ((post-abbrev (blank-p (field Phenotype Post_publication_abbreviation)))
@@ -101,8 +80,6 @@
    (inputs
     (list
      gn:set->gn:dataset
-     gn:dataset->gn:set
-     gn:dataset->metadata
      gn:dataset->gn:trait))
    (outputs
     `(#:documentation ,documentation
diff --git a/examples/phenotype.scm b/examples/phenotype.scm
index c2564b6..70deed7 100755
--- a/examples/phenotype.scm
+++ b/examples/phenotype.scm
@@ -71,7 +71,7 @@
            (inner-join PublishFreeze "ON PublishFreeze.InbredSetId = InbredSet.Id")
            (left-join Publication "ON Publication.Id = PublishXRef.PublicationId")
            (left-join Phenotype "ON Phenotype.Id = PublishXRef.PhenotypeId"))
-          "WHERE InbredSet.public > 0")
+          "WHERE InbredSet.public > 0 AND PublishFreeze.public > 0 AND PublishFreeze.confidentiality < 1")
   (triples (let ((post-abbrev (blank-p (field Phenotype Post_publication_abbreviation)))
                  (pre-abbrev (blank-p (field Phenotype Pre_publication_abbreviation)))
                  (post-desc (blank-p (field Phenotype Post_publication_description)))
@@ -86,6 +86,11 @@
     (set owl:equivalentClass
          (field ("CONCAT(PublishFreeze.Name, '_', PublishXRef.Id)"
                  PublishFreeze)))
+    (set gnt:has_trait_page
+         (string->symbol
+          (format #f "<https://genenetwork.org/show_trait?trait_id=~a&dataset=~a>"
+                  (field PublishXRef Id)
+                  (field PublishFreeze Name))))
     (set dcat:distribution
          (string->symbol
           (format #f "gnd:~a.json"
diff --git a/examples/probesets-experiment-metadata.scm b/examples/probesets-experiment-metadata.scm
new file mode 100755
index 0000000..4bab425
--- /dev/null
+++ b/examples/probesets-experiment-metadata.scm
@@ -0,0 +1,110 @@
+#! /usr/bin/env guile
+!#
+
+(use-modules (srfi srfi-1)
+             (srfi srfi-26)
+             (ice-9 getopt-long)
+             (ice-9 match)
+             (ice-9 regex)
+             (transform strings)
+             (transform sql)
+             (transform triples)
+             (transform special-forms)
+             (web uri))
+
+
+(define-transformer probesetxref->metadata
+  (tables (ProbeSetXRef
+           (inner-join ProbeSetFreeze "ON ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id")
+           (inner-join ProbeSet "ON ProbeSet.Id = ProbeSetXRef.ProbeSetId"))
+          "WHERE ProbeSetFreeze.public > 0 AND ProbeSetFreeze.confidentiality < 1")
+  (triples (string->identifier
+            "probeset_data"
+            (uri-encode
+             (format #f "~a_~a" (field ProbeSetFreeze Name ProbeSetFreezeName) (field ProbeSet Name ProbeSetName))) #:separator "_")
+    (set rdf:type 'gnc:molecular_trait_metadata)
+    ;; KLUDGE: Agree with Alex on how we want to name this.
+    ;; (set dcat:distribution
+    ;;      (string->symbol
+    ;;       (sanitize-rdf-string
+    ;;        (format #f "gnd:~a.json"
+    ;;                (field ("CONCAT(ProbeSetFreeze.Name, '_', ProbeSet.Name)"
+    ;;                        PublishFreeze))))) )
+    (set gnt:has_trait_page
+         (string->symbol
+          (format #f "<https://genenetwork.org/show_trait?trait_id=~a&dataset=~a>"
+                  (field ProbeSet Name)
+                  ;; GTEx_Lung _0414
+                  (uri-encode
+                   (field ProbeSetFreeze Name ProbeSetFreezeName)))))
+    (set gnt:has_probeset (string->identifier "probeset" (field ProbeSet Name ProbeSetName)))
+    (set dcat:isPartOf (string->identifier "dataset" (field ProbeSetFreeze Name ProbeSetFreezeName)
+                                           #:separator "_"))
+    (set gnt:mean (annotate-field (field ("IFNULL(ProbeSetXRef.mean, '')" mean))
+                                  '^^xsd:double))
+    (set gnt:se (annotate-field (field ("IFNULL(ProbeSetXRef.se, '')" se))
+                                '^^xsd:double))
+    (set gnt:locus (sanitize-rdf-string (field ProbeSetXRef Locus)))
+    (set gnt:lod_score (annotate-field
+                        (field ("IFNULL((ProbeSetXRef.LRS/4.604), '')" lrs))
+                        '^^xsd:double))
+    (set gnt:pvalue (annotate-field
+                     (field ("IFNULL((ProbeSetXRef.pValue), '')" pValue))
+                     '^^xsd:double))
+    (set gnt:additive (annotate-field
+                       (field ("IFNULL((ProbeSetXRef.additive), '')" additive))
+                       '^^xsd:double))
+    (set gnt:h2 (annotate-field
+                 (field ("IFNULL((ProbeSetXRef.h2), '')" h2))
+                 '^^xsd:double))))
+
+
+
+(let* ((option-spec
+        '((settings (single-char #\s) (value #t))
+          (output (single-char #\o) (value #t))
+          (documentation (single-char #\d) (value #t))))
+       (options (getopt-long (command-line) option-spec))
+       (settings (option-ref options 'settings #f))
+       (output (option-ref options 'output #f))
+       (documentation (option-ref options 'documentation #f))
+       (%connection-settings
+        (call-with-input-file settings
+          read)))
+  (call-with-target-database
+   %connection-settings
+   (lambda (db)
+     (with-documentation
+      (name "ProbeSet Experiments Metadata")
+      (connection %connection-settings)
+      (table-metadata? #f)
+      (total-rows (assoc-ref
+                   (sql-find db "SELECT count(*) AS count from ProbeSetXRef")
+                   "count"))
+      (rows-per-chunk 1000000)
+      (prefixes
+       '(("dcat:" "<http://www.w3.org/ns/dcat#>")
+         ("gn:" "<http://rdf.genenetwork.org/v1/id/>")
+         ("gnc:" "<http://rdf.genenetwork.org/v1/category/>")
+         ("gnt:" "<http://rdf.genenetwork.org/v1/term/>")
+         ("gnd:" "<https://cd.genenetwork.org/api3/lmdb/v1/data/traits/>")
+         ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>")
+         ("kegg:" "<http://bio2rdf.org/ns/kegg#>")
+         ("pubchem:" "<https://pubchem.ncbi.nlm.nih.gov/>")
+         ("omim:" "<https://www.omim.org/entry/>")
+         ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>")
+         ("uniprot:" "<http://purl.uniprot.org/uniprot/>")
+         ("chebi:" "<http://purl.obolibrary.org/obo/CHEBI_>")
+         ("dcat:" "<http://www.w3.org/ns/dcat#>")
+         ("dct:" "<http://purl.org/dc/terms/>")
+         ("owl:" "<http://www.w3.org/2002/07/owl#>")
+         ("homologene:" "<https://bio2rdf.org/homologene:>")
+         ("xsd:" "<http://www.w3.org/2001/XMLSchema#>")
+         ("qb:" "<http://purl.org/linked-data/cube#>")
+         ("sdmx-measure:" "<http://purl.org/linked-data/sdmx/2009/measure#>")
+         ("skos:" "<http://www.w3.org/2004/02/skos/core#>")))
+      (inputs
+       (list probesetxref->metadata))
+      (outputs
+       `(#:documentation ,documentation
+         #:rdf ,output))))))
diff --git a/examples/probesets.scm b/examples/probesets.scm
new file mode 100755
index 0000000..97e5753
--- /dev/null
+++ b/examples/probesets.scm
@@ -0,0 +1,133 @@
+#! /usr/bin/env guile
+!#
+
+(use-modules (srfi srfi-1)
+             (srfi srfi-26)
+             (ice-9 getopt-long)
+             (ice-9 match)
+             (ice-9 regex)
+             (transform strings)
+             (transform sql)
+             (transform triples)
+             (transform special-forms)
+             (web uri))
+
+(define-transformer probeset->metadata
+  (tables (ProbeSet
+           (left-join GeneChip "ON GeneChip.Id = ProbeSet.ChipId"))
+          "WHERE ProbeSet.Name IS NOT NULL AND TRIM(ProbeSet.Name) != ''")
+  (triples
+      (string->identifier "probeset" (field ProbeSet Name))
+    (set rdf:type 'gnc:probeset)
+    (set skos:prefLabel (field ProbeSet Name))
+    (multiset skos:altLabel
+              (map string-trim-both
+                   (string-split (sanitize-rdf-string (field ProbeSet alias)) #\;)))
+    (set gnt:uses_genechip (string->identifier "platform" (field GeneChip Name) #:separator "_"))
+    (set gnt:has_target_id (string-trim-both (sanitize-rdf-string (field ProbeSet TargetId))))
+    (set gnt:symbol (string-trim-both (field ProbeSet Symbol)))
+    (set dct:description (sanitize-rdf-string (field ProbeSet description)))
+    (set gnt:targets_region (string-trim-both (sanitize-rdf-string (field ProbeSet Probe_set_target_region))))
+    (set gnt:chr (field ProbeSet Chr))
+    (set gnt:mb (annotate-field (field ("IFNULL(ProbeSet.Mb, '')" Mb)) '^^xsd:double))
+    (set gnt:mb_mm8 (annotate-field (field ("IFNULL(ProbeSet.Mb_mm8, '')" Mb_mm8))
+                                    '^^xsd:double))
+    (set gnt:has_specificity
+         (field ("IFNULL(ProbeSet.Probe_set_specificity, '')"
+                 Probe_set_specificity)))
+    (set gnt:has_blat_score
+         (field ("IFNULL(ProbeSet.Probe_set_BLAT_score, '')"
+                 Probe_set_BLAT_score)))
+    (set gnt:has_blat_mb_start
+         (annotate-field (field ("IFNULL(ProbeSet.Probe_set_Blat_Mb_start, '')"
+                                 Probe_set_Blat_Mb_start))
+                         '^^xsd:double))
+    (set gnt:has_blat_mb_end
+         (annotate-field (field ("IFNULL(ProbeSet.Probe_set_Blat_Mb_end, '')"
+                                 Probe_set_Blat_Mb_end))
+                         '^^xsd:double))
+    (set gnt:has_blat_seq (sanitize-rdf-string (field ProbeSet BlatSeq)))
+    (set gnt:has_target_seq (sanitize-rdf-string (field ProbeSet TargetSeq)))
+    (set gnt:has_homologene_id (ontology 'homologene:
+                                         (uri-encode
+                                          (field ("IFNULL(ProbeSet.HomoloGeneID, '')"
+                                                  HomoloGeneID)))))
+    (set gnt:has_uniprot_id (ontology 'uniprot:
+                                      (uri-encode
+                                       (field ("IFNULL(ProbeSet.UniProtID, '')"
+                                               UniProtID)))))
+    (set gnt:has_pub_chem_id (ontology
+                              'pubchem:
+                              (uri-encode
+                               (field ("IFNULL(ProbeSet.PubChem_ID, '')"
+                                       PubChem_ID)))))
+    (set gnt:has_kegg_id (ontology
+                          'kegg:
+                          (uri-encode
+                           (field ("IFNULL(ProbeSet.KEGG_ID, '')"
+                                   KEGG_ID)))))
+    (set gnt:has_omim_id (ontology
+                          'omim:
+                          (uri-encode
+                           (let ((omim (field ("IFNULL(ProbeSet.OMIM, '')"
+                                               OMIM))))
+                             (if (number? omim)
+                                 omim
+                                 (regexp-substitute/global
+                                  #f "[^0-9]"
+                                  omim
+                                  'pre "" 'post))))))
+    (set gnt:has_chebi_id (ontology
+                           'chebi:
+                           (uri-encode
+                            (field ("IFNULL(ProbeSet.ChEBI_ID, '')"
+                                    ChEBI_ID)))))))
+
+
+
+
+(let* ((option-spec
+        '((settings (single-char #\s) (value #t))
+          (output (single-char #\o) (value #t))
+          (documentation (single-char #\d) (value #t))))
+       (options (getopt-long (command-line) option-spec))
+       (settings (option-ref options 'settings #f))
+       (output (option-ref options 'output #f))
+       (documentation (option-ref options 'documentation #f))
+       (%connection-settings
+        (call-with-input-file settings
+          read)))
+  (call-with-target-database
+   %connection-settings
+   (lambda (db)
+     (with-documentation
+      (name "ProbeSet Metadata")
+      (connection %connection-settings)
+      (table-metadata? #f)
+      (total-rows (assoc-ref
+                   (sql-find db "SELECT count(*) AS count from ProbeSet")
+                   "count"))
+      (rows-per-chunk 1000000)
+      (prefixes
+       '(("gn:" "<http://rdf.genenetwork.org/v1/id/>")
+         ("gnc:" "<http://rdf.genenetwork.org/v1/category/>")
+         ("gnt:" "<http://rdf.genenetwork.org/v1/term/>")
+         ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>")
+         ("kegg:" "<http://bio2rdf.org/ns/kegg#>")
+         ("pubchem:" "<https://pubchem.ncbi.nlm.nih.gov/>")
+         ("omim:" "<https://www.omim.org/entry/>")
+         ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>")
+         ("uniprot:" "<http://purl.uniprot.org/uniprot/>")
+         ("chebi:" "<http://purl.obolibrary.org/obo/CHEBI_>")
+         ("dct:" "<http://purl.org/dc/terms/>")
+         ("owl:" "<http://www.w3.org/2002/07/owl#>")
+         ("homologene:" "<https://bio2rdf.org/homologene:>")
+         ("xsd:" "<http://www.w3.org/2001/XMLSchema#>")
+         ("qb:" "<http://purl.org/linked-data/cube#>")
+         ("sdmx-measure:" "<http://purl.org/linked-data/sdmx/2009/measure#>")
+         ("skos:" "<http://www.w3.org/2004/02/skos/core#>")))
+      (inputs
+       (list probeset->metadata))
+      (outputs
+       `(#:documentation ,documentation
+         #:rdf ,output))))))
diff --git a/load-rdf.scm b/load-rdf.scm
index 2ef79ac..4acce8a 100755
--- a/load-rdf.scm
+++ b/load-rdf.scm
@@ -45,7 +45,8 @@ authenticating as the dba user with PASSWORD."
      (format out
              "SET DSN=localhost:~a;
 SET PWD=~s;
-DELETE FROM rdf_quad WHERE g = iri_to_id ('~a');"
+DELETE FROM rdf_quad WHERE g = iri_to_id ('~a');
+CHECKPOINT;"
              port
              password
              graph))
@@ -59,7 +60,8 @@ DELETE FROM rdf_quad WHERE g = iri_to_id ('~a');"
      (format out
              "SET DSN=localhost:~a;
 SET PWD=~s;
-DELETE FROM DB.DBA.load_list;"
+DELETE FROM DB.DBA.load_list;
+CHECKPOINT;"
              port
              password))
    OPEN_WRITE
@@ -101,7 +103,6 @@ DB.DBA.XML_SET_NS_DECL ('gnc', 'http://rdf.genenetwork.org/v1/category/', 2);
 DB.DBA.XML_SET_NS_DECL ('gnt', 'http://rdf.genenetwork.org/v1/term/', 2);
 DB.DBA.XML_SET_NS_DECL ('ncbiTaxon', 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=', 2);
 DB.DBA.XML_SET_NS_DECL ('prism', 'http://prismstandard.org/namespaces/basic/2.0/', 2);
-DB.DBA.XML_SET_NS_DECL ('probeset', 'http://rdf.genenetwork.org/v1/probeset/', 2);
 DB.DBA.XML_SET_NS_DECL ('pubmed', 'http://rdf.ncbi.nlm.nih.gov/pubmed/', 2);
 DB.DBA.XML_SET_NS_DECL ('qb', 'http://purl.org/linked-data/cube#', 2);
 DB.DBA.XML_SET_NS_DECL ('sdmx-measure', 'http://purl.org/linked-data/sdmx/2009/measure#', 2);
@@ -111,8 +112,10 @@ DB.DBA.XML_SET_NS_DECL ('v', 'http://www.w3.org/2006/vcard/ns#', 2);
 DB.DBA.XML_SET_NS_DECL ('xkos', 'http://rdf-vocabulary.ddialliance.org/xkos#', 2);
 DB.DBA.XML_SET_NS_DECL ('schema', 'https://schema.org/', 2);
 DB.DBA.XML_SET_NS_DECL ('foaf', 'http://xmlns.com/foaf/0.1/#term_', 2);
+DB.DBA.XML_SET_NS_DECL ('wd', 'http://www.wikidata.org/entity/', 2);
 DB.DBA.XML_SET_NS_DECL ('gnd', 'https://cd.genenetwork.org/api3/lmdb/v1/data/traits/', 2);
 DB.DBA.XML_SET_NS_DECL ('gn-files', 'http://files.genenetwork.org/current/', 2);
+CHECKPOINT;
 "
              port
              password))
@@ -128,6 +131,7 @@ DB.DBA.XML_SET_NS_DECL ('gn-files', 'http://files.genenetwork.org/current/', 2);
 SET PWD=~s;
 DB.DBA.RDF_OBJ_FT_RULE_ADD (null, null, 'All');
 DB.DBA.VT_INC_INDEX_DB_DBA_RDF_OBJ();
+CHECKPOINT;
 quit;
 "
              port
diff --git a/manifest.scm b/manifest.scm
index d736e51..2905b6f 100644
--- a/manifest.scm
+++ b/manifest.scm
@@ -15,6 +15,7 @@
                         guile-dsv
                         guile-hashing
                         guile-libyaml
+                        guile-uuid
                         guile-dbd-mysql))
              ((gnu packages rdf) #:select (raptor2))
              (guix build-system gnu)
@@ -55,7 +56,7 @@
     (license license:gpl3+)))
 
 (packages->manifest
- (list gnu-make guile-3.0 guile-dbi guile-dbd-mysql guile-zlib
+ (list gnu-make guile-3.0 guile-dbi guile-dbd-mysql guile-zlib guile-uuid
        guile-json-4 guile-dsv
        ;; We abuse (ccwl graphviz) as a library to visualize the database
        ;; schema. Hence we need ccwl and guile-libyaml.
diff --git a/transform/special-forms.scm b/transform/special-forms.scm
index 8de4966..0c07a0a 100644
--- a/transform/special-forms.scm
+++ b/transform/special-forms.scm
@@ -6,6 +6,7 @@
   #:use-module (transform sql)
   #:use-module (transform table)
   #:use-module (transform triples)
+  #:use-module (transform strings)
   #:export (translate-forms
             collect-forms
             collect-keys
@@ -22,36 +23,47 @@
             emit-short-turtle
             define-transformer))
 
+(define (emittable-object? o)
+  (cond
+    ((null? o) #f)
+    ((not o)   #f)
+    ((and (string? o) (string-blank? o)) #f)
+    (else #t)))
+
 (define (emit-short-turtle subject po-alist)
   (let loop ((pairs po-alist) (first? #t))
     (match pairs
       (((p . o) rest ...)
-       ;; subject only on first line
-       (when first?
-         (format #t "~a " subject))
-       (when (not first?)
-         (format #t "\t"))   ; indent following lines
-
-       (match o
-         ((? symbol?)
-          (format #t "~a ~a" p (symbol->string o)))
-         ((or (? (lambda (el) (and (string? el)
-                                   (string-match "^\\(.*\\)$" el))))
-              (? (lambda (el) (and (string? el)
-                                   (string-match "^\\[.*\\]$" el)))))
-          (format #t "~a ~s" p o))
-         (_
-          (format #t "~a \"~a\"" p o)))
-
-       (if (null? rest)
-           (format #t " .~%")   ; last triple
-           (format #t " ;~%"))  ; continuation
-
-       (loop rest #f))
-
+       (if (not (emittable-object? o))
+           (loop rest first?)   ; skip malformed or empty objects
+           (begin
+             ;; subject only once
+             (when first?
+               (format #t "~a " subject))
+             (when (not first?)
+               (format #t "\t"))
+
+             ;; emit predicate–object
+             (match o
+               ((? symbol?)
+                (format #t "~a ~a" p (symbol->string o)))
+               ((? string?)
+                (format #t "~a \"~a\"" p o))
+               (_
+                (format #t "~a ~s" p o)))
+
+             ;; separator depends on *remaining emittable pairs*
+             (if (any (match-lambda
+                        ((p . o) (emittable-object? o)))
+                      rest)
+                 (format #t " ;~%")
+                 (format #t " .~%"))
+
+             (loop rest #f))))
       (() #f))))
 
 
+
 (define (key->assoc-ref alist x)
   "Recursively translate (key k) forms in source X to (assoc-ref ALIST
 k) forms."
@@ -407,57 +419,68 @@ must be remedied."
          #`(define* (name db #:key
                           (metadata? #f)
                           (data? #t)
-                          (documentation? #f))
-             (when metadata?
-               #,@(let ((table (symbol->string (syntax->datum #'primary-table)))
-                        (subject-type (any (lambda (predicate)
-                                             (syntax-case predicate (rdf:type)
-                                               ((_ rdf:type type) #'type)
-                                               (_ #f)))
-                                           #'(predicate-clauses ...))))
-                    (map (lambda (predicate-clause)
-                           (syntax-case predicate-clause ()
-                             ((_ predicate _)
-                              ;; Dump metadata about the transform itself.
-                              #`(begin
-                                  (scm->triples
-                                   (map-alist '()
-	        		     (set rdf:type 'gn-id:transform)
-	        		     (set gn-term:createsPredicate 'predicate)
-	        		     (filter-set gn-term:forSubjectType #,subject-type)
-	        		     (multiset gn-term:dependsOn
-	        			       '#,(map (lambda (field)
-	        					 (match (syntax->datum field)
-	        					   ((table-name column-name _ ...)
-	        					    (datum->syntax
-	        					     x (column-id (symbol->string table-name)
-	        							  (symbol->string column-name))))
-	        					   (((query alias))
-	        					    (datum->syntax
-	        					     x (column-id query (symbol->string alias))))))
-	        				       (collect-fields predicate-clause))))
-                                   #,(id table (syntax->datum #'predicate)))
-                                  ;; Automatically create domain triples
-                                  ;; for predicates.
-                                  (when #,subject-type
-                                    (triple 'predicate 'rdfs:domain #,subject-type))))
-                             (_ (error "Invalid predicate clause:" predicate-clause))))
-                         #'(predicate-clauses ...))))
-             (when documentation?
-               (format #t "~%## '~a'~%~%" (syntax->datum #'name))
-               #,(syntax-case #'schema-triples-clause (schema-triples)
-                   ((schema-triples (triple-subject triple-predicate triple-object) ...)
-                    #`(begin
-                        (when (not (list 'triple-subject ...))
-                          (format #t "## Schema Triples:~%~%```text~%")
-                          (for-each (lambda (s p o)
-                                      (format #t "~a -> ~a -> ~a~%" s p o))
-                                    (list 'triple-subject ...)
-                                    (list 'triple-predicate ...)
-                                    (list 'triple-object ...))
-                          (format #t "```"))))
-                   (_ (error "Invalid schema triples clause:" #'schema-triples-clause)))
-               (format #t "## Generated Triples:
+                          (documentation? #f)
+                          (limit #f)
+                          (offset #f))
+             (let* ((base-sql
+                     (select-query #,(collect-fields #'(subject predicate-clauses ...))
+                                   (primary-table other-tables ...)
+                                   tables-raw ...))
+                    (sql
+		      (if (and limit offset)
+			  (format #f "~a LIMIT ~a OFFSET ~a"
+				  base-sql limit offset)
+			  base-sql)))
+               (when metadata?
+                 #,@(let ((table (symbol->string (syntax->datum #'primary-table)))
+                          (subject-type (any (lambda (predicate)
+                                               (syntax-case predicate (rdf:type)
+                                                 ((_ rdf:type type) #'type)
+                                                 (_ #f)))
+                                             #'(predicate-clauses ...))))
+                      (map (lambda (predicate-clause)
+                             (syntax-case predicate-clause ()
+                               ((_ predicate _)
+                                ;; Dump metadata about the transform itself.
+                                #`(begin
+                                    (scm->triples
+                                     (map-alist '()
+	        		       (set rdf:type 'gn-id:transform)
+	        		       (set gn-term:createsPredicate 'predicate)
+	        		       (filter-set gn-term:forSubjectType #,subject-type)
+	        		       (multiset gn-term:dependsOn
+	        			         '#,(map (lambda (field)
+	        					   (match (syntax->datum field)
+	        					     ((table-name column-name _ ...)
+	        					      (datum->syntax
+	        					       x (column-id (symbol->string table-name)
+	        							    (symbol->string column-name))))
+	        					     (((query alias))
+	        					      (datum->syntax
+	        					       x (column-id query (symbol->string alias))))))
+	        				         (collect-fields predicate-clause))))
+                                     #,(id table (syntax->datum #'predicate)))
+                                    ;; Automatically create domain triples
+                                    ;; for predicates.
+                                    (when #,subject-type
+                                      (triple 'predicate 'rdfs:domain #,subject-type))))
+                               (_ (error "Invalid predicate clause:" predicate-clause))))
+                           #'(predicate-clauses ...))))
+               (when documentation?
+                 (format #t "~%## '~a'~%~%" (syntax->datum #'name))
+                 #,(syntax-case #'schema-triples-clause (schema-triples)
+                     ((schema-triples (triple-subject triple-predicate triple-object) ...)
+                      #`(begin
+                          (when (not (list 'triple-subject ...))
+                            (format #t "## Schema Triples:~%~%```text~%")
+                            (for-each (lambda (s p o)
+                                        (format #t "~a -> ~a -> ~a~%" s p o))
+                                      (list 'triple-subject ...)
+                                      (list 'triple-predicate ...)
+                                      (list 'triple-object ...))
+                            (format #t "```"))))
+                     (_ (error "Invalid schema triples clause:" #'schema-triples-clause)))
+                 (format #t "## Generated Triples:
 
 The following SQL query was executed:
 
@@ -469,67 +492,64 @@ The above query results to triples that have the form:
 
 ```text
 "
-                       (select-query #,(collect-fields #'(subject predicate-clauses ...))
-                                     (primary-table other-tables ...)
-                                     tables-raw ...))
-               (for-each (match-lambda
-                           ((predicate . object)
-                            (format #t "~a -> ~a -> ~a ~%"
-                                    (if (symbol? #,(field->datum #'subject))
-                                        (symbol->string #,(field->datum #'subject))
-                                        #,(field->datum #'subject))
-                                    predicate
-                                    (if (symbol? object)
-                                        (symbol->string object)
-                                        object))))
-                         (map-alist
-                             '()
-                           #,@(field->datum #'(predicate-clauses ...))))
-               (format #t "```~%Here's an example query:~%~%```sparql~%")
-               (documentation?)
-               (newline)
-               (let* ((result
-                       (map-alist (sql-find
-                                   db
-                                   (format #f "~a LIMIT 1"
-                                           (select-query #,(collect-fields #'(subject predicate-clauses ...))
-                                                         (primary-table other-tables ...)
-                                                         tables-raw ...)))
-        		 #,@(field->key #'(predicate-clauses ...))))
-                      (first-n (list-head result
-                                          (let ((n
-                                                 (min 4 (truncate
-                                                         (+ (exact-integer-sqrt (length result)) 1)))))
-                                            (if (< n 3)
-                                                (length result)
-                                                n)))))
-                 (format #t "SELECT * WHERE { ~%")
+                         (select-query #,(collect-fields #'(subject predicate-clauses ...))
+                                       (primary-table other-tables ...)
+                                       tables-raw ...))
                  (for-each (match-lambda
                              ((predicate . object)
-                              (match object
-                                        ((or (?  symbol? object)
-                                             (?  (lambda (el) (string-match "^\\[ .* \\]$" el)) object))
-                                         (format #t "    ?s ~a ~a .~%" predicate object))
-                                        ((and (? string? object)
-                                              (? (lambda (el) (not (string-null? el))) object))
-                                         (format #t "    ?s ~a \"~a\" .~%" predicate object))
-                                        (_ ""))))
-                           first-n)
-                 (format #t "    ?s ?p ?o .~%}~%```~%"))
-               (format #t "~%Expected Result:~%~%```rdf~%")
-               (sql-for-each (lambda (row)
-                               (scm->triples
-                                (map-alist row #,@(field->key #'(predicate-clauses ...)))
-                                #,(field->assoc-ref #'row #'subject)
-                                (lambda (s p o)
-                                  (triple s p o))))
-                             db
-                             (format #f "~a LIMIT 1"
-                                     (select-query #,(collect-fields #'(subject predicate-clauses ...))
-                                                   (primary-table other-tables ...)
-                                                   tables-raw ...)))
-               (format #t "```~%~%"))
-             (when data?
+                              (format #t "~a -> ~a -> ~a ~%"
+                                      (if (symbol? #,(field->datum #'subject))
+                                          (symbol->string #,(field->datum #'subject))
+                                          #,(field->datum #'subject))
+                                      predicate
+                                      (if (symbol? object)
+                                          (symbol->string object)
+                                          object))))
+                           (map-alist
+                               '()
+                             #,@(field->datum #'(predicate-clauses ...))))
+                 (format #t "```~%Here's an example query:~%~%```sparql~%")
+                 (documentation?)
+                 (newline)
+                 (let* ((result
+                         (map-alist (sql-find
+                                     db
+                                     (format #f "~a LIMIT 1"
+                                             (select-query #,(collect-fields #'(subject predicate-clauses ...))
+                                                           (primary-table other-tables ...)
+                                                           tables-raw ...)))
+        		   #,@(field->key #'(predicate-clauses ...))))
+                        (first-n (list-head result
+                                            (let ((n
+                                                   (min 4 (truncate
+                                                           (+ (exact-integer-sqrt (length result)) 1)))))
+                                              (if (< n 3)
+                                                  (length result)
+                                                  n)))))
+                   (format #t "SELECT * WHERE { ~%")
+                   (for-each (match-lambda
+                               ((predicate . object)
+                                (match object
+                                  ((or (?  symbol? object)
+                                       (?  (lambda (el) (string-match "^\\[ .* \\]$" el)) object))
+                                   (format #t "    ?s ~a ~a .~%" predicate object))
+                                  ((and (? string? object)
+                                        (? (lambda (el) (not (string-null? el))) object))
+                                   (format #t "    ?s ~a \"~a\" .~%" predicate object))
+                                  (_ ""))))
+                             first-n)
+                   (format #t "    ?s ?p ?o .~%}~%```~%"))
+                 (format #t "~%Expected Result:~%~%```rdf~%")
+                 (sql-for-each (lambda (row)
+                                 (scm->triples
+                                  (map-alist row #,@(field->key #'(predicate-clauses ...)))
+                                  #,(field->assoc-ref #'row #'subject)
+                                  (lambda (s p o)
+                                    (triple s p o))))
+                               db
+                               (format #f "~a LIMIT 1" base-sql))
+                 (format #t "```~%~%"))
+               (when data?
                #,(syntax-case #'schema-triples-clause (schema-triples)
                    ((schema-triples (triple-subject triple-predicate triple-object) ...)
                     #`(for-each triple
@@ -537,16 +557,14 @@ The above query results to triples that have the form:
                                 (list 'triple-predicate ...)
                                 (list 'triple-object ...)))
                    (_ (error "Invalid schema triples clause:" #'schema-triples-clause)))
-	       (sql-for-each  (lambda (row)
-                                (let* ((subject-val #,(field->assoc-ref #'row #'subject))
-                                       (po-alist
-                                        (map-alist row #,@(field->key #'(predicate-clauses ...)))))
-                                  (emit-short-turtle subject-val po-alist)))
-                             db
-                             (select-query #,(collect-fields #'(subject predicate-clauses ...))
-                                           (primary-table other-tables ...)
-                                           tables-raw ...)))
-             )))
+	       (sql-for-each
+		  (lambda (row)
+                    (let* ((subject-val #,(field->assoc-ref #'row #'subject))
+                           (po-alist
+                            (map-alist row #,@(field->key #'(predicate-clauses ...)))))
+                      (emit-short-turtle subject-val po-alist)))
+                  db
+                  sql))))))
       (_ (error "Invalid define-transformer syntax:" (syntax->datum x))))))
 
 (define (get-keyword-value args keyword default)
@@ -565,8 +583,14 @@ The above query results to triples that have the form:
             (prefixes (assoc-ref alist 'prefixes))
             (inputs (assoc-ref alist 'inputs))
             (outputs (assoc-ref alist 'outputs))
-            (rdf-path (get-keyword-value outputs #:rdf ""))
-            (doc-path (get-keyword-value outputs #:documentation "")))
+            (total-rows (assoc-ref alist 'total-rows))
+            (rows-per-chunk (assoc-ref alist 'rows-per-chunk))
+            (chunking? (and total-rows rows-per-chunk))
+            (chunks (if chunking?
+                        (ceiling (/ total-rows rows-per-chunk))
+                        1))
+            (rdf-path (get-keyword-value outputs #:rdf #f))
+            (doc-path (get-keyword-value outputs #:documentation #f)))
        (call-with-target-database
         connection
         (lambda (db)
@@ -592,20 +616,30 @@ The above query results to triples that have the form:
 
           ;; Dumping the actual data
           (when rdf-path
-            (with-output-to-file
-                rdf-path
-              (lambda ()
-                ;; Add the prefixes
-                (for-each
-                 (match-lambda
-                   ((k v)
-                    (begin
-                      (prefix k v))))
-                 prefixes)
-                (newline)
-                (for-each
-                 (lambda (proc)
-                   (proc db #:metadata? table-metadata?))
-                 inputs))
-              #:encoding "UTF-8"))))))))
+            (do ((i 0 (+ i 1)))
+                ((>= i chunks))
+              (let* ((offset (* i (or rows-per-chunk 0)))
+                     (out-file
+                      (if (= chunks 1)
+                          rdf-path
+                          (string-append (path-without-extension rdf-path)
+                                         "." (number->string (+ i 1)) ".ttl"))))
+                (with-output-to-file
+                    out-file
+                  (lambda ()
+                    ;; Add the prefixes
+                    (for-each
+                     (match-lambda
+                       ((k v)
+                        (begin
+                          (prefix k v))))
+                     prefixes)
+                    (newline)
+                    (for-each
+                     (lambda (proc)
+                       (proc db #:metadata? table-metadata?
+                             #:limit rows-per-chunk
+                             #:offset offset))
+                     inputs))
+                  #:encoding "UTF-8"))))))))))
 
diff --git a/transform/strings.scm b/transform/strings.scm
index 7b62349..c0f02e5 100644
--- a/transform/strings.scm
+++ b/transform/strings.scm
@@ -1,7 +1,13 @@
 (define-module (transform strings)
   #:use-module (srfi srfi-1)
   #:use-module (srfi srfi-19)
+  #:use-module (rnrs bytevectors)
+  #:use-module (uuid generate)
+  #:use-module (uuid utils)
+  #:use-module (uuid well-known)
+  #:use-module (ice-9 iconv)
   #:use-module (ice-9 match)
+  #:use-module (ice-9 rdelim)
   #:use-module (ice-9 string-fun)
   #:use-module (ice-9 textual-ports)
   #:export (string-blank?
@@ -18,11 +24,27 @@
             normalize-string-field
             fix-email-id
             blank-p
-            investigator-attributes->id))
+            investigator-attributes->id
+            path-without-extension
+            gn-uuid))
+
+(define (gn-uuid string)
+  (generate-string-uuid
+   'uuidv5
+   (string->bytevector string "UTF-8")))
 
 (define (blank-p str)
   (if (string-blank? str) #f str))
 
+(define (path-without-extension path)
+  (let* ((dir (dirname path))                ; directory part
+	 (base (basename path))              ; filename part
+	 (dot-pos (string-rindex base #\.))) ; last dot position
+    (string-append dir "/"		     ; reconstruct path
+		   (if dot-pos
+		       (substring base 0 dot-pos) ; strip extension
+		       base))))
+
 (define (lower-case-and-replace-spaces str)
   (string-map
    (lambda (c)
diff --git a/transform/triples.scm b/transform/triples.scm
index 13758e5..7f96eea 100644
--- a/transform/triples.scm
+++ b/transform/triples.scm
@@ -39,6 +39,7 @@
           #:optional #:key
           (ontology "gn:")
           (separator "")
+          (url-char #\_)
           (proc (lambda (x) x)))
   "Convert STR to a turtle identifier after replacing illegal
 characters with an underscore and prefixing with gn:PREFIX."
@@ -55,7 +56,7 @@ characters with an underscore and prefixing with gn:PREFIX."
                                              (char-numeric? c)
                                              (char=? c #\_))
                                          c
-                                         #\_))
+                                         url-char))
                                    (proc str)))))))