about summary refs log tree commit diff
path: root/examples/dataset-metadata.scm
diff options
context:
space:
mode:
Diffstat (limited to 'examples/dataset-metadata.scm')
-rwxr-xr-xexamples/dataset-metadata.scm541
1 files changed, 0 insertions, 541 deletions
diff --git a/examples/dataset-metadata.scm b/examples/dataset-metadata.scm
deleted file mode 100755
index 9c30180..0000000
--- a/examples/dataset-metadata.scm
+++ /dev/null
@@ -1,541 +0,0 @@
-#! /usr/bin/env guile
-!#
-
-(use-modules (srfi srfi-1)
-             (srfi srfi-26)
-             (ice-9 getopt-long)
-             (ice-9 match)
-             (ice-9 regex)
-             (transform strings)
-             (transform sql)
-             (transform triples)
-             (transform special-forms))
-
-
-(define (remap-species-identifiers str)
-  "This procedure remaps identifiers to standard binominal. Obviously this should
-   be sorted by correcting the database!"
-  (match str
-    ["Fly (Drosophila melanogaster dm6)" "Drosophila melanogaster"]
-    ["Oryzias latipes (Japanese medaka)" "Oryzias latipes"]
-    ["Macaca mulatta" "Macaca nemestrina"]
-    ["Bat (Glossophaga soricina)" "Glossophaga soricina"]
-    [str str]))
-
-;; One email ID in the Investigators table has spaces in it. This
-;; function fixes that.
-(define (fix-email-id email)
-  (string-delete #\space email))
-
-(define (investigator-attributes->id first-name last-name email)
-  ;; There is just one record corresponding to "Evan Williams" which
-  ;; does not have an email ID. To accommodate that record, we
-  ;; construct the investigator ID from not just the email ID, but
-  ;; also the first and the last names. It would be preferable to just
-  ;; find Evan Williams' email ID and insert it into the database.
-  (string->identifier "investigator"
-                      (string-join
-                       (list first-name last-name (fix-email-id email))
-                       "_")))
-
-(define-transformer investigators
-  ;; There are a few duplicate entries. We group by email to
-  ;; deduplicate.
-  (tables (Investigators)
-          "GROUP BY Email")
-  (triples (investigator-attributes->id (field Investigators FirstName)
-                                        (field Investigators LastName)
-                                        (field Investigators Email))
-    (set rdf:type 'foaf:Person)
-    (set foaf:name (string-append (field Investigators FirstName) " "
-                                  (field Investigators LastName)))
-    (set foaf:givenName
-         (field Investigators FirstName))
-    (set foaf:familyName
-         (field Investigators LastName))
-    (set foaf:homepage (field Investigators Url))
-    (set v:adr (field Investigators Address))
-    (set v:locality (field Investigators City))
-    (set v:region (field Investigators State))
-    (set v:postal-code (field Investigators ZipCode))
-    (set v:country-name (field Investigators Country))))
-
-(define-transformer gene-chip
-  (tables (GeneChip
-           (left-join Species "USING (SpeciesId)")))
-  (schema-triples
-   (gnc:geneChip a skos:Concept)
-   (gnc:geneChip
-    skos:description
-    "This is a set of controlled terms that are used to describe a given gene chip/platform")
-   (gnt:hasGeoSeriesId rdfs:domain gnc:platform)
-   (gnt:hasGeoSeriesId rdfs:domain gnc:geneChip)
-   (gnt:hasGOTreeValue a owl:ObjectProperty)
-   (gnt:hasGOTreeValue skos:definition "This resource the following GO tree value")
-   (gnt:hasGOTreeValue rdfs:domain gnc:geneChip))
-  (triples (string->identifier "platform" (field GeneChip Name))
-    (set rdf:type 'gnc:geneChip)
-    (set rdfs:label (field GeneChip GeneChipName))
-    (set skos:prefLabel (field GeneChip Name))
-    (set skos:altLabel (field ("IF(GeneChip.GeneChipName != GeneChip.Title, Title, NULL)"
-                               Title)))
-    (set gnt:hasGOTreeValue (field GeneChip Go_tree_value))
-    (set xkos:classifiedUnder
-         (string->identifier "" (remap-species-identifiers (field Species Fullname))
-                             #:separator ""
-                             #:proc string-capitalize-first))
-    (set gnt:hasGeoSeriesId
-         (ontology 'geoSeries:
-                   (string-trim-both (field GeneChip GeoPlatform))))))
-
-(define-transformer info-files
-  (tables (InfoFiles
-           (left-join PublishFreeze "ON InfoFiles.InfoPageName = PublishFreeze.Name")
-           (left-join GenoFreeze "ON InfoFiles.InfoPageName = GenoFreeze.Name")
-           (left-join ProbeSetFreeze "ON InfoFiles.InfoPageName = ProbeSetFreeze.Name")
-           (left-join InbredSet "ON InfoFiles.InbredSetId = InbredSet.InbredSetId")
-           (left-join Species "ON InfoFiles.SpeciesId = Species.SpeciesId")
-           (left-join Datasets "USING (DatasetId)")
-           (left-join DatasetStatus "USING (DatasetStatusId)")
-           (left-join Tissue "USING (TissueId)")
-           (left-join Investigators "USING (InvestigatorId)")
-           (left-join AvgMethod "USING (AvgMethodId)")
-           (left-join Organizations "USING (OrganizationId)")
-           (left-join GeneChip "USING (GeneChipId)"))
-          ;; XXXX: There are datasets that don't have the InbredSetId
-          ;; in the Infofiles table.  This clause allows us to check
-          ;; if they exist in the (Publish/Geno)Freeze tables.
-          "LEFT JOIN InbredSet PublishInbredSet ON PublishFreeze.InbredSetId = PublishInbredSet.InbredSetId LEFT JOIN InbredSet GenoInbredSet ON GenoFreeze.InbredSetId = GenoInbredSet.InbredSetId  WHERE GN_AccesionId IS NOT NULL")
-  (schema-triples
-   (gnt:hasTissue rdfs:domain dcat:Dataset)
-   (gnt:hasTissue a owl:ObjectProperty)
-   (gnt:hasTissue skos:definition "Tissues this resource has")
-   (gnt:usesNormalization rdfs:domain dcat:Dataset)
-   (gnt:usesNormalization a owl:ObjectProperty)
-   (gnt:usesNormalization skos:definition "Normalization techniques this resource has")
-   (gnt:usesPlatform rdfs:domain dcat:Dataset)
-   (gnt:usesPlatform a owl:ObjectProperty)
-   (gnt:usesPlatform skos:definition "The Platform this resource uses")
-   (gnt:hasGeoSeriesId rdfs:domain dcat:Dataset)
-   (gnt:hasGeoSeriesId a owl:ObjectProperty)
-   (gnt:hasGeoSeriesId skos:definition "id of record in NCBI database")
-   (gnt:hasExperimentType rdfs:domain dcat:Dataset)
-   (gnt:hasExperimentType a owl:ObjectProperty)
-   (gnt:hasExperimentType rdfs:label "Experiment Type Metadata")
-   (gnt:hasExperimentType skos:definition "Information about the experiment type")
-   (gnt:hasTissueInfo rdfs:domain dcat:Dataset)
-   (gnt:hasTissueInfo a owl:ObjectProperty)
-   (gnt:hasTissueInfo skos:definition "Metadata about Tissue for this resource")
-   (gnt:hasExperimentDesignInfo rdfs:domain dcat:Dataset)
-   (gnt:hasExperimentDesignInfo rdfs:label "Experiment Design")
-   (gnt:hasExperimentDesignInfo a owl:ObjectProperty)
-   (gnt:hasExperimentDesignInfo skos:definition "Information about how the experiment was designed")
-   (gnt:hasNotes rdfs:domain dcat:Dataset)
-   (gnt:hasNotes a owl:ObjectProperty)
-   (gnt:hasNotes rdfs:label "Notes")
-   (gnt:hasNotes skos:definition "Extra Notes about this dataset")
-   (gnt:hasDataProcessingInfo rdfs:domain dcat:Dataset)
-   (gnt:hasDataProcessingInfo rdfs:label "About Data Processing")
-   (gnt:hasDataProcessingInfo a owl:ObjectProperty)
-   (gnt:hasDataProcessingInfo skos:definition "Information about how this dataset was processed")
-   (gnt:hasPlatformInfo rdfs:domain dcat:Dataset)
-   (gnt:hasPlatformInfo a owl:ObjectProperty)
-   (gnt:hasPlatformInfo rdfs:label "About Platform")
-   (gnt:hasPlatformInfo skos:definition "Information about the platform that was used with this dataset")
-   (gnt:hasCaseInfo rdfs:domain dcat:Dataset)
-   (gnt:hasCaseInfo rdfs:label "About Case")
-   (gnt:hasCaseInfo a owl:ObjectProperty)
-   (gnt:hasCaseInfo skos:definition "Information about the cases used in this platform")
-   (gnt:hasSummary rdfs:domain dcat:Dataset)
-   (gnt:hasSummary rdfs:label "Summary")
-   (gnt:hasSummary a owl:ObjectProperty)
-   (gnt:hasSummary skos:definition "Summary information about dataset")
-   (gnt:hasCitation rdfs:domain dcat:Dataset)
-   (gnt:hasCitation rdfs:label "Citation")
-   (gnt:hasCitation a owl:ObjectProperty)
-   (gnt:hasCitation skos:definition "Citation for this dataset")
-   (gnt:hasContributors rdfs:domain dcat:Dataset)
-   (gnt:hasContributors rdfs:label "Contributors")
-   (gnt:hasContributors a owl:ObjectProperty)
-   (gnt:hasContributors skos:definition "Contributors of this resource")
-   (gnt:hashasExperimentDesign rdfs:domain dcat:Dataset)
-   (gnt:hashasExperimentDesign rdfs:label "Experiment Design")
-   (gnt:hashasExperimentDesign a owl:ObjectProperty)
-   (gnt:hashasExperimentDesign skos:definition "Experiment Design for this resource")
-   (gnt:hasTissueInfo rdfs:domain dcat:Dataset)
-   (gnt:hasTissueInfo rdfs:label "Tissue Information")
-   (gnt:hasTissueInfo a owl:ObjectProperty)
-   (gnt:hasTissueInfo skos:definition "Tissue information about dataset")
-   (gnt:hasExperimentType skos:definition "Information about the experiment type")
-   (gnt:hasAcknowledgement rdfs:domain dcat:Dataset)
-   (gnt:hasAcknowledgement rdfs:label "Acknowledgement")
-   (gnt:hasAcknowledgement a owl:ObjectProperty)
-   (gnt:hasAcknowledgement skos:definition "People to acknowledge"))
-  (triples (string->identifier
-            "" (regexp-substitute/global #f "[^A-Za-z0-9:]"
-                                         (field InfoFiles InfoPageName)
-                                         'pre "_" 'post))
-    (set rdf:type 'dcat:Dataset)
-    (set xkos:classifiedUnder
-         (let ([dataset-type
-                (string-trim-both
-                 (field ("IF(GenoFreeze.Id IS NOT NULL, 'gnc:Genotype', IF(PublishFreeze.Id IS NOT NULL, 'gnc:Phenotype', IF(ProbeSetFreeze.Name IS NOT NULL, 'gnc:Probeset', '')))"
-                         DatasetType)))])
-           (if (not (string-null? dataset-type))
-               (string->symbol
-                dataset-type)
-               "")))
-    (set rdfs:label (regexp-substitute/global
-                     #f "^[Nn]one$"
-                     (field InfoFiles InfoPageName)
-                     ""))
-    (set skos:prefLabel
-         (field ("IFNULL(GenoFreeze.FullName, IFNULL(PublishFreeze.FullName, ''))"
-                 DatasetFullName)))
-    (set skos:altLabel (field Datasets DatasetName DatasetGroup))
-    (set dct:title
-         (regexp-substitute/global
-          #f "^[Nn]one$"
-          (or
-           (regexp-substitute/global
-            #f "^Unpublished$" (field Datasets PublicationTitle) "")
-           (field InfoFiles InfoFileTitle)
-           "")
-          ""))
-    (set dct:created
-         (field ("IFNULL(GenoFreeze.CreateTime, IFNULL(PublishFreeze.CreateTime, IFNULL(ProbeSetFreeze.CreateTime, '')))"
-                 createTimeGenoFreeze)))
-    (set dcat:contactPoint
-         (investigator-attributes->id (field Investigators FirstName)
-                                      (field Investigators LastName)
-                                      (field Investigators Email)))
-    (set foaf:Organization
-         (field Organizations OrganizationName))
-    (set dct:identifier (format #f "GN~a" (field InfoFiles GN_AccesionId)))
-    (set dct:accessRights (string-downcase
-                           (field DatasetStatus DatasetStatusName)))
-    (set gnt:belongsToGroup
-         (string->identifier
-          "set"
-          (field ("IFNULL(InbredSet.Name, IFNULL(PublishInbredSet.Name, GenoInbredSet.Name))"
-                  InbredSetName))))
-    (set gnt:hasTissue (string->identifier "tissue"
-                                           (field Tissue Short_Name)))
-    (set gnt:usesNormalization
-         (string->identifier "avgMethod"
-                             ;; If AvgMethodName is NULL, assume N/A.
-                             (if (string-blank? (field AvgMethod Name AvgMethodName))
-                                 "N/A" (field AvgMethod Name AvgMethodName))))
-    (set gnt:hasSummary
-         (let* ((summary-link
-                 (format
-                  #f "<https://git.genenetwork.org/gn-docs/tree/general/datasets/~a/summary.rtf>"
-                  (string-capitalize-first
-                   (regexp-substitute/global
-                    #f "[^A-Za-z0-9:]"
-                    (field InfoFiles InfoPageName)
-                    'pre "_" 'post))))
-                (summary
-                 (field InfoFiles Summary)))
-           (if (or (null? summary) (string-blank? summary))
-               "" (string->symbol summary-link))))
-    (set gnt:hasTissueInfo
-         (let* ((tissue-info-link
-                 (format
-                  #f "<https://git.genenetwork.org/gn-docs/tree/general/datasets/~a/tissue.rtf>"
-                  (string-capitalize-first
-                   (regexp-substitute/global
-                    #f "[^A-Za-z0-9:]"
-                    (field InfoFiles InfoPageName)
-                    'pre "_" 'post))))
-                (tissue-info
-                 (field Datasets AboutTissue)))
-           (if (or (null? tissue-info) (string-blank? tissue-info))
-               "" (string->symbol tissue-info-link))))
-    (set gnt:hasCitation
-         (let* ((citation-link
-                 (format
-                  #f "<https://git.genenetwork.org/gn-docs/tree/general/datasets/~a/citation.rtf>"
-                  (string-capitalize-first
-                   (regexp-substitute/global
-                    #f "[^A-Za-z0-9:]"
-                    (field InfoFiles InfoPageName)
-                    'pre "_" 'post))))
-                (citation
-                 (field Datasets Citation)))
-           (if (or (null? citation) (string-blank? citation))
-               "" (string->symbol citation-link))))
-    (set gnt:hasSpecifics
-         (let* ((specifics-link
-                 (format
-                  #f "<https://git.genenetwork.org/gn-docs/tree/general/datasets/~a/specifics.rtf>"
-                  (string-capitalize-first
-                   (regexp-substitute/global
-                    #f "[^A-Za-z0-9:]"
-                    (field InfoFiles InfoPageName)
-                    'pre "_" 'post))))
-                (specifics
-                 (field InfoFiles Specifics)))
-           (if (or (null? specifics) (string-blank? specifics))
-               "" (string->symbol specifics-link))))
-    (set gnt:hasCaseInfo
-         (let* ((cases-link
-                 (format
-                  #f "<https://git.genenetwork.org/gn-docs/tree/general/datasets/~a/cases.rtf>"
-                  (string-capitalize-first
-                   (regexp-substitute/global
-                    #f "[^A-Za-z0-9:]"
-                    (field InfoFiles InfoPageName)
-                    'pre "_" 'post))))
-                (cases
-                 (field Datasets AboutCases)))
-           (if (or (null? cases) (string-blank? cases))
-               "" (string->symbol cases-link))))
-    (set gnt:hasPlatformInfo
-         (let* ((platform-link
-                 (format
-                  #f "<https://git.genenetwork.org/gn-docs/tree/general/datasets/~a/platform.rtf>"
-                  (string-capitalize-first
-                   (regexp-substitute/global
-                    #f "[^A-Za-z0-9:]"
-                    (field InfoFiles InfoPageName)
-                    'pre "_" 'post))))
-                (platform
-                 (field Datasets AboutPlatform)))
-           (if (or (null? platform) (string-blank? platform))
-               "" (string->symbol platform-link))))
-    (set gnt:hasDataProcessingInfo
-         (let* ((processing-link
-                 (format
-                  #f "<https://git.genenetwork.org/gn-docs/tree/general/datasets/~a/processing.rtf>"
-                  (string-capitalize-first
-                   (regexp-substitute/global
-                    #f "[^A-Za-z0-9:]"
-                    (field InfoFiles InfoPageName)
-                    'pre "_" 'post))))
-                (processing
-                 (field Datasets AboutDataProcessing)))
-           (if (or (null? processing) (string-blank? processing))
-               "" (string->symbol processing-link))))
-    (set gnt:hasNotes
-         (let* ((notes-link
-                 (format
-                  #f "<https://git.genenetwork.org/gn-docs/tree/general/datasets/~a/notes.rtf>"
-                  (string-capitalize-first
-                   (regexp-substitute/global
-                    #f "[^A-Za-z0-9:]"
-                    (field InfoFiles InfoPageName)
-                    'pre "_" 'post))))
-                (notes
-                 (field Datasets Notes)))
-           (if (or (null? notes) (string-blank? notes))
-               "" (string->symbol notes-link))))
-    (set gnt:hasExperimentType
-         (let* ((experiment-type-link
-                 (format
-                  #f "<https://git.genenetwork.org/gn-docs/tree/general/datasets/~a/experiment-type.rtf>"
-                  (string-capitalize-first
-                   (regexp-substitute/global
-                    #f "[^A-Za-z0-9:]"
-                    (field InfoFiles InfoPageName)
-                    'pre "_" 'post))))
-                (experiment-type
-                 (field InfoFiles Experiment_Type)))
-           (if (or (null? experiment-type) (string-blank? experiment-type))
-               "" (string->symbol experiment-type-link))))
-    (set gnt:hasExperimentDesign
-         (let* ((experiment-design-link
-                 (format
-                  #f "<https://git.genenetwork.org/gn-docs/tree/general/datasets/~a/experiment-design.rtf>"
-                  (string-capitalize-first
-                   (regexp-substitute/global
-                    #f "[^A-Za-z0-9:]"
-                    (field InfoFiles InfoPageName)
-                    'pre "_" 'post))))
-                (experiment-design
-                 (field Datasets ExperimentDesign)))
-           (if (or (null? experiment-design) (string-blank? experiment-design))
-               "" (string->symbol experiment-design-link))))
-    (set gnt:hasContributors
-         (let* ((contributors-link
-                 (format
-                  #f "<https://git.genenetwork.org/gn-docs/tree/general/datasets/~a/contributors.rtf>"
-                  (string-capitalize-first
-                   (regexp-substitute/global
-                    #f "[^A-Za-z0-9:]"
-                    (field InfoFiles InfoPageName)
-                    'pre "_" 'post))))
-                (contributors
-                 (field Datasets Contributors)))
-           (if (or (null? contributors) (string-blank? contributors))
-               "" (string->symbol contributors-link))))
-    (set gnt:hasAcknowledgement
-         (let* ((acknowledgment-link
-                 (format
-                  #f "<https://git.genenetwork.org/gn-docs/tree/general/datasets/~a/acknowledgment.rtf>"
-                  (string-capitalize-first
-                   (regexp-substitute/global
-                    #f "[^A-Za-z0-9:]"
-                    (field InfoFiles InfoPageName)
-                    'pre "_" 'post))))
-                (acknowledgment
-                 (field Datasets Acknowledgment)))
-           (if (or (null? acknowledgment) (string-blank? acknowledgment))
-               "" (string->symbol acknowledgment-link))))
-    (set gnt:usesPlatform
-         (string->identifier "platform"
-                             (field GeneChip Name GeneChip)))
-    (set gnt:hasGeoSeriesId
-         (let ((s
-                (string-match "GSE[0-9]*"
-                              (field ("IFNULL(Datasets.GeoSeries, '')" GeoSeries)))))
-           (if s (ontology
-                  'geoSeries: (match:substring s))
-               "")))))
-
-;; These are phenotype datasets that don't have Infofile metadata
-(define-transformer publishfreeze
-  (tables (PublishFreeze
-           (left-join InfoFiles "ON InfoFiles.InfoPageName = PublishFreeze.Name")
-           (left-join InbredSet "ON PublishFreeze.InbredSetId = InbredSet.InbredSetId"))
-          "WHERE PublishFreeze.public > 0 AND PublishFreeze.confidentiality < 1 AND InfoFiles.InfoFileId IS NULL")
-  (triples
-      (string->identifier
-       ""
-       (regexp-substitute/global #f "[^A-Za-z0-9:]"
-                                 (field PublishFreeze Name)
-                                 'pre "_" 'post))
-    (set rdf:type 'dcat:Dataset)
-    (set xkos:classifiedUnder 'gnc:Phenotype)
-    (set dct:title (field PublishFreeze FullName))
-    (set rdfs:label (field PublishFreeze Name))
-    (set skos:altLabel (field PublishFreeze ShortName))
-    (set dct:created (annotate-field
-                      (field PublishFreeze CreateTime)
-                      '^^xsd:date))
-    (set gnt:belongsToGroup
-         (string->identifier
-          "set" (field InbredSet Name InbredSetName)
-          #:separator ""
-          #:proc string-capitalize-first))))
-
-(define-transformer genofreeze
-  (tables (GenoFreeze
-           (left-join InfoFiles "ON InfoFiles.InfoPageName = GenoFreeze.Name")
-           (left-join InbredSet "ON GenoFreeze.InbredSetId = InbredSet.InbredSetId"))
-          "WHERE GenoFreeze.public > 0 AND GenoFreeze.confidentiality < 1 AND InfoFiles.InfoPageName IS NULL")
-  (triples
-      (string->identifier
-       ""
-       (regexp-substitute/global
-        #f "[^A-Za-z0-9:]"
-        (regexp-substitute/global
-         #f "[^A-Za-z0-9:]"
-         (field GenoFreeze Name)
-         'pre "_" 'post)
-        'pre "_" 'post))
-    (set rdf:type 'dcat:Dataset)
-    (set xkos:classifiedUnder 'gnc:Genotype)
-    (set rdfs:label (field GenoFreeze Name))
-    (set dct:title (field GenoFreeze FullName))
-    (set skos:altLabel (field GenoFreeze ShortName))
-    (set dct:created (annotate-field
-                      (field GenoFreeze CreateTime)
-                      '^^xsd:date))
-    (set gnt:belongsToGroup
-         (string->identifier
-          "set" (field InbredSet Name InbredSetName)
-          #:separator ""
-          #:proc string-capitalize-first))))
-
-;; Molecular Traits are also referred to as ProbeSets
-(define-transformer probesetfreeze
-  (tables (ProbeSetFreeze
-           (left-join InfoFiles "ON InfoFiles.InfoPageName = ProbeSetFreeze.Name")
-           (left-join ProbeFreeze "USING (ProbeFreezeId)")
-           (left-join AvgMethod "ON AvgMethod.AvgMethodId = ProbeSetFreeze.AvgID")
-           (left-join InbredSet "ON ProbeFreeze.InbredSetId = InbredSet.Id")
-           (left-join Tissue "ON ProbeFreeze.TissueId = Tissue.TissueId"))
-          "WHERE ProbeSetFreeze.public > 0 AND InfoFiles.InfoPageName IS NULL GROUP BY ProbeFreeze.Id")
-  (schema-triples
-   (gnt:usesNormalization rdfs:domain gnc:probeset)
-   (gnt:usesDataScale rdfs:domain gnc:probeset)
-   (gnt:usesDataScale a owl:ObjectProperty)
-   (gnt:usesDataScale skos:definition "Thi data scale this resource uses"))
-  (triples
-      (string->identifier
-       ""
-       (regexp-substitute/global
-        #f "[^A-Za-z0-9:]"
-        (field ProbeSetFreeze Name)
-        'pre "_" 'post))
-    (set rdf:type 'dcat:Dataset)
-    (set xkos:classifiedUnder 'gnc:Probeset)
-    (set gnt:usesNormalization
-         (string->identifier "avgMethod"
-                             ;; If AvgMethodName is NULL, assume N/A.
-                             (if (string-blank? (field AvgMethod Name AvgMethodName))
-                                 "N/A" (field AvgMethod Name AvgMethodName))))
-    (set dct:title (field ProbeSetFreeze FullName))
-    (set rdfs:label (field ProbeSetFreeze ShortName))
-    (set skos:prefLabel (field ProbeSetFreeze Name))
-    (set skos:altLabel (field ProbeSetFreeze Name2))
-    (set dct:created (annotate-field
-                      (field ProbeSetFreeze CreateTime)
-                      '^^xsd:datetime))
-    (set gnt:usesDataScale (field ProbeSetFreeze DataScale))
-    (set gnt:hasTissue
-         (string->identifier
-          "tissue"
-          (field Tissue Short_Name)))
-    (set gnt:belongsToGroup
-         (string->identifier
-          "set" (field InbredSet Name InbredSetName)
-          #:separator ""
-          #:proc string-capitalize-first))))
-
-
-
-(let* ((option-spec
-        '((settings (single-char #\s) (value #t))
-          (output (single-char #\o) (value #t))
-          (documentation (single-char #\d) (value #t))))
-       (options (getopt-long (command-line) option-spec))
-       (settings (option-ref options 'settings #f))
-       (output (option-ref options 'output #f))
-       (documentation (option-ref options 'documentation #f))
-       (%connection-settings
-        (call-with-input-file settings
-          read)))
-  (with-documentation
-   (name "Info files / Investigators Metadata")
-   (connection %connection-settings)
-   (table-metadata? #f)
-   (prefixes
-    '(("v:" "<http://www.w3.org/2006/vcard/ns#>")
-      ("foaf:" "<http://xmlns.com/foaf/0.1/>")
-      ("xsd:" "<http://www.w3.org/2001/XMLSchema#>")
-      ("dcat:" "<http://www.w3.org/ns/dcat#>")
-      ("skos:" "<http://www.w3.org/2004/02/skos/core#>")
-      ("xkos:" "<http://rdf-vocabulary.ddialliance.org/xkos#>")
-      ("geoSeries:" "<http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=>")
-      ("gnt:" "<http://genenetwork.org/term/>")
-      ("gn:" "<http://genenetwork.org/id/>")
-      ("gnc:" "<http://genenetwork.org/category/>")
-      ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>")
-      ("owl:" "<http://www.w3.org/2002/07/owl#>")
-      ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>")
-      ("taxon:" "<http://purl.uniprot.org/taxonomy/>")
-      ("dct:" "<http://purl.org/dc/terms/>")))
-   (inputs
-    (list info-files
-          publishfreeze
-          genofreeze
-          probesetfreeze
-          investigators
-          gene-chip))
-   (outputs
-    `(#:documentation ,documentation
-      #:rdf ,output))))
-
-