aboutsummaryrefslogtreecommitdiff
#! /usr/bin/env guile
!#

(use-modules (srfi srfi-1)
             (srfi srfi-26)
             (ice-9 getopt-long)
             (ice-9 match)
             (ice-9 regex)
             (transform strings)
             (transform sql)
             (transform triples)
             (transform special-forms))


(define (remap-species-identifiers str)
  "This procedure remaps identifiers to standard binominal. Obviously this should
   be sorted by correcting the database!"
  (match str
    ["Fly (Drosophila melanogaster dm6)" "Drosophila melanogaster"]
    ["Oryzias latipes (Japanese medaka)" "Oryzias latipes"]
    ["Macaca mulatta" "Macaca nemestrina"]
    ["Bat (Glossophaga soricina)" "Glossophaga soricina"]
    [str str]))

;; One email ID in the Investigators table has spaces in it. This
;; function fixes that.
(define (fix-email-id email)
  (string-delete #\space email))

(define (investigator-attributes->id first-name last-name email)
  ;; There is just one record corresponding to "Evan Williams" which
  ;; does not have an email ID. To accommodate that record, we
  ;; construct the investigator ID from not just the email ID, but
  ;; also the first and the last names. It would be preferable to just
  ;; find Evan Williams' email ID and insert it into the database.
  (string->identifier "investigator"
                      (string-join
                       (list first-name last-name (fix-email-id email))
                       "_")))

(define-transformer investigators
  ;; There are a few duplicate entries. We group by email to
  ;; deduplicate.
  (tables (Investigators)
          "GROUP BY Email")
  (triples (investigator-attributes->id (field Investigators FirstName)
                                        (field Investigators LastName)
                                        (field Investigators Email))
    (set rdf:type 'foaf:Person)
    (set foaf:name (string-append (field Investigators FirstName) " "
                                  (field Investigators LastName)))
    (set foaf:givenName
         (field Investigators FirstName))
    (set foaf:familyName
         (field Investigators LastName))
    (set foaf:homepage (field Investigators Url))
    (set v:adr (field Investigators Address))
    (set v:locality (field Investigators City))
    (set v:region (field Investigators State))
    (set v:postal-code (field Investigators ZipCode))
    (set v:country-name (field Investigators Country))))

(define-transformer gene-chip
  (tables (GeneChip
           (left-join Species "USING (SpeciesId)")))
  (schema-triples
   (gnc:geneChip a skos:Concept)
   (gnc:geneChip
    skos:description
    "This is a set of controlled terms that are used to describe a given gene chip/platform")
   (gnt:hasGeoSeriesId rdfs:domain gnc:platform)
   (gnt:hasGeoSeriesId rdfs:domain gnc:geneChip)
   (gnt:hasGOTreeValue a owl:ObjectProperty)
   (gnt:hasGOTreeValue skos:definition "This resource the following GO tree value")
   (gnt:hasGOTreeValue rdfs:domain gnc:geneChip))
  (triples (string->identifier "platform" (field GeneChip Name))
    (set rdf:type 'gnc:geneChip)
    (set rdfs:label (field GeneChip GeneChipName))
    (set skos:prefLabel (field GeneChip Name))
    (set skos:altLabel (field ("IF(GeneChip.GeneChipName != GeneChip.Title, Title, NULL)"
                               Title)))
    (set gnt:hasGOTreeValue (field GeneChip Go_tree_value))
    (set xkos:classifiedUnder
         (string->identifier "" (remap-species-identifiers (field Species Fullname))
                             #:separator ""
                             #:proc string-capitalize-first))
    (set gnt:hasGeoSeriesId
         (ontology 'geoSeries:
                   (string-trim-both (field GeneChip GeoPlatform))))))

(define-transformer info-files
  (tables (InfoFiles
           (left-join PublishFreeze "ON InfoFiles.InfoPageName = PublishFreeze.Name")
           (left-join GenoFreeze "ON InfoFiles.InfoPageName = GenoFreeze.Name")
           (left-join ProbeSetFreeze "ON InfoFiles.InfoPageName = ProbeSetFreeze.Name")
           (left-join InbredSet "ON InfoFiles.InbredSetId = InbredSet.InbredSetId")
           (left-join Species "ON InfoFiles.SpeciesId = Species.SpeciesId")
           (left-join Datasets "USING (DatasetId)")
           (left-join DatasetStatus "USING (DatasetStatusId)")
           (left-join Tissue "USING (TissueId)")
           (left-join Investigators "USING (InvestigatorId)")
           (left-join AvgMethod "USING (AvgMethodId)")
           (left-join Organizations "USING (OrganizationId)")
           (left-join GeneChip "USING (GeneChipId)"))
          ;; XXXX: There are datasets that don't have the InbredSetId
          ;; in the Infofiles table.  This clause allows us to check
          ;; if they exist in the (Publish/Geno)Freeze tables.
          "LEFT JOIN InbredSet PublishInbredSet ON PublishFreeze.InbredSetId = PublishInbredSet.InbredSetId LEFT JOIN InbredSet GenoInbredSet ON GenoFreeze.InbredSetId = GenoInbredSet.InbredSetId  WHERE GN_AccesionId IS NOT NULL")
  (schema-triples
   (gnt:hasTissue rdfs:domain dcat:Dataset)
   (gnt:hasTissue a owl:ObjectProperty)
   (gnt:hasTissue skos:definition "Tissues this resource has")
   (gnt:usesNormalization rdfs:domain dcat:Dataset)
   (gnt:usesNormalization a owl:ObjectProperty)
   (gnt:usesNormalization skos:definition "Normalization techniques this resource has")
   (gnt:usesPlatform rdfs:domain dcat:Dataset)
   (gnt:usesPlatform a owl:ObjectProperty)
   (gnt:usesPlatform skos:definition "The Platform this resource uses")
   (gnt:hasGeoSeriesId rdfs:domain dcat:Dataset)
   (gnt:hasGeoSeriesId a owl:ObjectProperty)
   (gnt:hasGeoSeriesId skos:definition "id of record in NCBI database")
   (gnt:hasExperimentType rdfs:domain dcat:Dataset)
   (gnt:hasExperimentType a owl:ObjectProperty)
   (gnt:hasExperimentType rdfs:label "Experiment Type Metadata")
   (gnt:hasExperimentType skos:definition "Information about the experiment type")
   (gnt:hasTissueInfo rdfs:domain dcat:Dataset)
   (gnt:hasTissueInfo a owl:ObjectProperty)
   (gnt:hasTissueInfo skos:definition "Metadata about Tissue for this resource")
   (gnt:hasExperimentDesignInfo rdfs:domain dcat:Dataset)
   (gnt:hasExperimentDesignInfo rdfs:label "Experiment Design")
   (gnt:hasExperimentDesignInfo a owl:ObjectProperty)
   (gnt:hasExperimentDesignInfo skos:definition "Information about how the experiment was designed")
   (gnt:hasNotes rdfs:domain dcat:Dataset)
   (gnt:hasNotes a owl:ObjectProperty)
   (gnt:hasNotes rdfs:label "Notes")
   (gnt:hasNotes skos:definition "Extra Notes about this dataset")
   (gnt:hasDataProcessingInfo rdfs:domain dcat:Dataset)
   (gnt:hasDataProcessingInfo rdfs:label "About Data Processing")
   (gnt:hasDataProcessingInfo a owl:ObjectProperty)
   (gnt:hasDataProcessingInfo skos:definition "Information about how this dataset was processed")
   (gnt:hasPlatformInfo rdfs:domain dcat:Dataset)
   (gnt:hasPlatformInfo a owl:ObjectProperty)
   (gnt:hasPlatformInfo rdfs:label "About Platform")
   (gnt:hasPlatformInfo skos:definition "Information about the platform that was used with this dataset")
   (gnt:hasCaseInfo rdfs:domain dcat:Dataset)
   (gnt:hasCaseInfo rdfs:label "About Case")
   (gnt:hasCaseInfo a owl:ObjectProperty)
   (gnt:hasCaseInfo skos:definition "Information about the cases used in this platform")
   (gnt:hasSummary rdfs:domain dcat:Dataset)
   (gnt:hasSummary rdfs:label "Summary")
   (gnt:hasSummary a owl:ObjectProperty)
   (gnt:hasSummary skos:definition "Summary information about dataset")
   (gnt:hasCitation rdfs:domain dcat:Dataset)
   (gnt:hasCitation rdfs:label "Citation")
   (gnt:hasCitation a owl:ObjectProperty)
   (gnt:hasCitation skos:definition "Citation for this dataset")
   (gnt:hasContributors rdfs:domain dcat:Dataset)
   (gnt:hasContributors rdfs:label "Contributors")
   (gnt:hasContributors a owl:ObjectProperty)
   (gnt:hasContributors skos:definition "Contributors of this resource")
   (gnt:hashasExperimentDesign rdfs:domain dcat:Dataset)
   (gnt:hashasExperimentDesign rdfs:label "Experiment Design")
   (gnt:hashasExperimentDesign a owl:ObjectProperty)
   (gnt:hashasExperimentDesign skos:definition "Experiment Design for this resource")
   (gnt:hasTissueInfo rdfs:domain dcat:Dataset)
   (gnt:hasTissueInfo rdfs:label "Tissue Information")
   (gnt:hasTissueInfo a owl:ObjectProperty)
   (gnt:hasTissueInfo skos:definition "Tissue information about dataset")
   (gnt:hasExperimentType skos:definition "Information about the experiment type")
   (gnt:hasAcknowledgement rdfs:domain dcat:Dataset)
   (gnt:hasAcknowledgement rdfs:label "Acknowledgement")
   (gnt:hasAcknowledgement a owl:ObjectProperty)
   (gnt:hasAcknowledgement skos:definition "People to acknowledge"))
  (triples (string->identifier
            "" (regexp-substitute/global #f "[^A-Za-z0-9:]"
                                         (field InfoFiles InfoPageName)
                                         'pre "_" 'post))
    (set rdf:type 'dcat:Dataset)
    (set xkos:classifiedUnder
         (let ([dataset-type
                (string-trim-both
                 (field ("IF(GenoFreeze.Id IS NOT NULL, 'gnc:Genotype', IF(PublishFreeze.Id IS NOT NULL, 'gnc:Phenotype', IF(ProbeSetFreeze.Name IS NOT NULL, 'gnc:Probeset', '')))"
                         DatasetType)))])
           (if (not (string-null? dataset-type))
               (string->symbol
                dataset-type)
               "")))
    (set rdfs:label (regexp-substitute/global
                     #f "^[Nn]one$"
                     (field InfoFiles InfoPageName)
                     ""))
    (set skos:prefLabel
         (field ("IFNULL(GenoFreeze.FullName, IFNULL(PublishFreeze.FullName, ''))"
                 DatasetFullName)))
    (set skos:altLabel (field Datasets DatasetName DatasetGroup))
    (set dct:title
         (regexp-substitute/global
          #f "^[Nn]one$"
          (or
           (regexp-substitute/global
            #f "^Unpublished$" (field Datasets PublicationTitle) "")
           (field InfoFiles InfoFileTitle)
           "")
          ""))
    (set dct:created
         (field ("IFNULL(GenoFreeze.CreateTime, IFNULL(PublishFreeze.CreateTime, IFNULL(ProbeSetFreeze.CreateTime, '')))"
                 createTimeGenoFreeze)))
    (set dcat:contactPoint
         (investigator-attributes->id (field Investigators FirstName)
                                      (field Investigators LastName)
                                      (field Investigators Email)))
    (set foaf:Organization
         (field Organizations OrganizationName))
    (set dct:identifier (format #f "GN~a" (field InfoFiles GN_AccesionId)))
    (set dct:accessRights (string-downcase
                           (field DatasetStatus DatasetStatusName)))
    (set gnt:belongsToGroup
         (string->identifier
          "set"
          (field ("IFNULL(InbredSet.Name, IFNULL(PublishInbredSet.Name, GenoInbredSet.Name))"
                  InbredSetName))))
    (set gnt:hasTissue (string->identifier "tissue"
                                           (field Tissue Short_Name)))
    (set gnt:usesNormalization
         (string->identifier "avgMethod"
                             ;; If AvgMethodName is NULL, assume N/A.
                             (if (string-blank? (field AvgMethod Name AvgMethodName))
                                 "N/A" (field AvgMethod Name AvgMethodName))))
    (set gnt:hasSummary
         (let* ((summary-link
                 (format
                  #f "<https://git.genenetwork.org/gn-docs/tree/general/datasets/~a/summary.rtf>"
                  (string-capitalize-first
                   (regexp-substitute/global
                    #f "[^A-Za-z0-9:]"
                    (field InfoFiles InfoPageName)
                    'pre "_" 'post))))
                (summary
                 (field InfoFiles Summary)))
           (if (or (null? summary) (string-blank? summary))
               "" (string->symbol summary-link))))
    (set gnt:hasTissueInfo
         (let* ((tissue-info-link
                 (format
                  #f "<https://git.genenetwork.org/gn-docs/tree/general/datasets/~a/tissue.rtf>"
                  (string-capitalize-first
                   (regexp-substitute/global
                    #f "[^A-Za-z0-9:]"
                    (field InfoFiles InfoPageName)
                    'pre "_" 'post))))
                (tissue-info
                 (field Datasets AboutTissue)))
           (if (or (null? tissue-info) (string-blank? tissue-info))
               "" (string->symbol tissue-info-link))))
    (set gnt:hasCitation
         (let* ((citation-link
                 (format
                  #f "<https://git.genenetwork.org/gn-docs/tree/general/datasets/~a/citation.rtf>"
                  (string-capitalize-first
                   (regexp-substitute/global
                    #f "[^A-Za-z0-9:]"
                    (field InfoFiles InfoPageName)
                    'pre "_" 'post))))
                (citation
                 (field Datasets Citation)))
           (if (or (null? citation) (string-blank? citation))
               "" (string->symbol citation-link))))
    (set gnt:hasSpecifics
         (let* ((specifics-link
                 (format
                  #f "<https://git.genenetwork.org/gn-docs/tree/general/datasets/~a/specifics.rtf>"
                  (string-capitalize-first
                   (regexp-substitute/global
                    #f "[^A-Za-z0-9:]"
                    (field InfoFiles InfoPageName)
                    'pre "_" 'post))))
                (specifics
                 (field InfoFiles Specifics)))
           (if (or (null? specifics) (string-blank? specifics))
               "" (string->symbol specifics-link))))
    (set gnt:hasCaseInfo
         (let* ((cases-link
                 (format
                  #f "<https://git.genenetwork.org/gn-docs/tree/general/datasets/~a/cases.rtf>"
                  (string-capitalize-first
                   (regexp-substitute/global
                    #f "[^A-Za-z0-9:]"
                    (field InfoFiles InfoPageName)
                    'pre "_" 'post))))
                (cases
                 (field Datasets AboutCases)))
           (if (or (null? cases) (string-blank? cases))
               "" (string->symbol cases-link))))
    (set gnt:hasPlatformInfo
         (let* ((platform-link
                 (format
                  #f "<https://git.genenetwork.org/gn-docs/tree/general/datasets/~a/platform.rtf>"
                  (string-capitalize-first
                   (regexp-substitute/global
                    #f "[^A-Za-z0-9:]"
                    (field InfoFiles InfoPageName)
                    'pre "_" 'post))))
                (platform
                 (field Datasets AboutPlatform)))
           (if (or (null? platform) (string-blank? platform))
               "" (string->symbol platform-link))))
    (set gnt:hasDataProcessingInfo
         (let* ((processing-link
                 (format
                  #f "<https://git.genenetwork.org/gn-docs/tree/general/datasets/~a/processing.rtf>"
                  (string-capitalize-first
                   (regexp-substitute/global
                    #f "[^A-Za-z0-9:]"
                    (field InfoFiles InfoPageName)
                    'pre "_" 'post))))
                (processing
                 (field Datasets AboutDataProcessing)))
           (if (or (null? processing) (string-blank? processing))
               "" (string->symbol processing-link))))
    (set gnt:hasNotes
         (let* ((notes-link
                 (format
                  #f "<https://git.genenetwork.org/gn-docs/tree/general/datasets/~a/notes.rtf>"
                  (string-capitalize-first
                   (regexp-substitute/global
                    #f "[^A-Za-z0-9:]"
                    (field InfoFiles InfoPageName)
                    'pre "_" 'post))))
                (notes
                 (field Datasets Notes)))
           (if (or (null? notes) (string-blank? notes))
               "" (string->symbol notes-link))))
    (set gnt:hasExperimentType
         (let* ((experiment-type-link
                 (format
                  #f "<https://git.genenetwork.org/gn-docs/tree/general/datasets/~a/experiment-type.rtf>"
                  (string-capitalize-first
                   (regexp-substitute/global
                    #f "[^A-Za-z0-9:]"
                    (field InfoFiles InfoPageName)
                    'pre "_" 'post))))
                (experiment-type
                 (field InfoFiles Experiment_Type)))
           (if (or (null? experiment-type) (string-blank? experiment-type))
               "" (string->symbol experiment-type-link))))
    (set gnt:hasExperimentDesign
         (let* ((experiment-design-link
                 (format
                  #f "<https://git.genenetwork.org/gn-docs/tree/general/datasets/~a/experiment-design.rtf>"
                  (string-capitalize-first
                   (regexp-substitute/global
                    #f "[^A-Za-z0-9:]"
                    (field InfoFiles InfoPageName)
                    'pre "_" 'post))))
                (experiment-design
                 (field Datasets ExperimentDesign)))
           (if (or (null? experiment-design) (string-blank? experiment-design))
               "" (string->symbol experiment-design-link))))
    (set gnt:hasContributors
         (let* ((contributors-link
                 (format
                  #f "<https://git.genenetwork.org/gn-docs/tree/general/datasets/~a/contributors.rtf>"
                  (string-capitalize-first
                   (regexp-substitute/global
                    #f "[^A-Za-z0-9:]"
                    (field InfoFiles InfoPageName)
                    'pre "_" 'post))))
                (contributors
                 (field Datasets Contributors)))
           (if (or (null? contributors) (string-blank? contributors))
               "" (string->symbol contributors-link))))
    (set gnt:hasAcknowledgement
         (let* ((acknowledgment-link
                 (format
                  #f "<https://git.genenetwork.org/gn-docs/tree/general/datasets/~a/acknowledgment.rtf>"
                  (string-capitalize-first
                   (regexp-substitute/global
                    #f "[^A-Za-z0-9:]"
                    (field InfoFiles InfoPageName)
                    'pre "_" 'post))))
                (acknowledgment
                 (field Datasets Acknowledgment)))
           (if (or (null? acknowledgment) (string-blank? acknowledgment))
               "" (string->symbol acknowledgment-link))))
    (set gnt:usesPlatform
         (string->identifier "platform"
                             (field GeneChip Name GeneChip)))
    (set gnt:hasGeoSeriesId
         (let ((s
                (string-match "GSE[0-9]*"
                              (field ("IFNULL(Datasets.GeoSeries, '')" GeoSeries)))))
           (if s (ontology
                  'geoSeries: (match:substring s))
               "")))))

;; These are phenotype datasets that don't have Infofile metadata
(define-transformer publishfreeze
  (tables (PublishFreeze
           (left-join InfoFiles "ON InfoFiles.InfoPageName = PublishFreeze.Name")
           (left-join InbredSet "ON PublishFreeze.InbredSetId = InbredSet.InbredSetId"))
          "WHERE PublishFreeze.public > 0 AND PublishFreeze.confidentiality < 1 AND InfoFiles.InfoFileId IS NULL")
  (triples
      (string->identifier
       ""
       (regexp-substitute/global #f "[^A-Za-z0-9:]"
                                 (field PublishFreeze Name)
                                 'pre "_" 'post))
    (set rdf:type 'dcat:Dataset)
    (set xkos:classifiedUnder 'gnc:Phenotype)
    (set dct:title (field PublishFreeze FullName))
    (set rdfs:label (field PublishFreeze Name))
    (set skos:altLabel (field PublishFreeze ShortName))
    (set dct:created (annotate-field
                      (field PublishFreeze CreateTime)
                      '^^xsd:date))
    (set gnt:belongsToGroup
         (string->identifier
          "set" (field InbredSet Name InbredSetName)
          #:separator ""
          #:proc string-capitalize-first))))

(define-transformer genofreeze
  (tables (GenoFreeze
           (left-join InfoFiles "ON InfoFiles.InfoPageName = GenoFreeze.Name")
           (left-join InbredSet "ON GenoFreeze.InbredSetId = InbredSet.InbredSetId"))
          "WHERE GenoFreeze.public > 0 AND GenoFreeze.confidentiality < 1 AND InfoFiles.InfoPageName IS NULL")
  (triples
      (string->identifier
       ""
       (regexp-substitute/global
        #f "[^A-Za-z0-9:]"
        (regexp-substitute/global
         #f "[^A-Za-z0-9:]"
         (field GenoFreeze Name)
         'pre "_" 'post)
        'pre "_" 'post))
    (set rdf:type 'dcat:Dataset)
    (set xkos:classifiedUnder 'gnc:Genotype)
    (set rdfs:label (field GenoFreeze Name))
    (set dct:title (field GenoFreeze FullName))
    (set skos:altLabel (field GenoFreeze ShortName))
    (set dct:created (annotate-field
                      (field GenoFreeze CreateTime)
                      '^^xsd:date))
    (set gnt:belongsToGroup
         (string->identifier
          "set" (field InbredSet Name InbredSetName)
          #:separator ""
          #:proc string-capitalize-first))))

;; Molecular Traits are also referred to as ProbeSets
(define-transformer probesetfreeze
  (tables (ProbeSetFreeze
           (left-join InfoFiles "ON InfoFiles.InfoPageName = ProbeSetFreeze.Name")
           (left-join ProbeFreeze "USING (ProbeFreezeId)")
           (left-join AvgMethod "ON AvgMethod.AvgMethodId = ProbeSetFreeze.AvgID")
           (left-join InbredSet "ON ProbeFreeze.InbredSetId = InbredSet.Id")
           (left-join Tissue "ON ProbeFreeze.TissueId = Tissue.TissueId"))
          "WHERE ProbeSetFreeze.public > 0 AND InfoFiles.InfoPageName IS NULL GROUP BY ProbeFreeze.Id")
  (schema-triples
   (gnt:usesNormalization rdfs:domain gnc:probeset)
   (gnt:usesDataScale rdfs:domain gnc:probeset)
   (gnt:usesDataScale a owl:ObjectProperty)
   (gnt:usesDataScale skos:definition "Thi data scale this resource uses"))
  (triples
      (string->identifier
       ""
       (regexp-substitute/global
        #f "[^A-Za-z0-9:]"
        (field ProbeSetFreeze Name)
        'pre "_" 'post))
    (set rdf:type 'dcat:Dataset)
    (set xkos:classifiedUnder 'gnc:Probeset)
    (set gnt:usesNormalization
         (string->identifier "avgMethod"
                             ;; If AvgMethodName is NULL, assume N/A.
                             (if (string-blank? (field AvgMethod Name AvgMethodName))
                                 "N/A" (field AvgMethod Name AvgMethodName))))
    (set dct:title (field ProbeSetFreeze FullName))
    (set rdfs:label (field ProbeSetFreeze ShortName))
    (set skos:prefLabel (field ProbeSetFreeze Name))
    (set skos:altLabel (field ProbeSetFreeze Name2))
    (set dct:created (annotate-field
                      (field ProbeSetFreeze CreateTime)
                      '^^xsd:datetime))
    (set gnt:usesDataScale (field ProbeSetFreeze DataScale))
    (set gnt:hasTissue
         (string->identifier
          "tissue"
          (field Tissue Short_Name)))
    (set gnt:belongsToGroup
         (string->identifier
          "set" (field InbredSet Name InbredSetName)
          #:separator ""
          #:proc string-capitalize-first))))



(let* ((option-spec
        '((settings (single-char #\s) (value #t))
          (output (single-char #\o) (value #t))
          (documentation (single-char #\d) (value #t))))
       (options (getopt-long (command-line) option-spec))
       (settings (option-ref options 'settings #f))
       (output (option-ref options 'output #f))
       (documentation (option-ref options 'documentation #f))
       (%connection-settings
        (call-with-input-file settings
          read)))
  (with-documentation
   (name "Info files / Investigators Metadata")
   (connection %connection-settings)
   (table-metadata? #f)
   (prefixes
    '(("v:" "<http://www.w3.org/2006/vcard/ns#>")
      ("foaf:" "<http://xmlns.com/foaf/0.1/>")
      ("xsd:" "<http://www.w3.org/2001/XMLSchema#>")
      ("dcat:" "<http://www.w3.org/ns/dcat#>")
      ("skos:" "<http://www.w3.org/2004/02/skos/core#>")
      ("xkos:" "<http://rdf-vocabulary.ddialliance.org/xkos#>")
      ("geoSeries:" "<http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=>")
      ("gnt:" "<http://genenetwork.org/term/>")
      ("gn:" "<http://genenetwork.org/id/>")
      ("gnc:" "<http://genenetwork.org/category/>")
      ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>")
      ("owl:" "<http://www.w3.org/2002/07/owl#>")
      ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>")
      ("taxon:" "<http://purl.uniprot.org/taxonomy/>")
      ("dct:" "<http://purl.org/dc/terms/>")))
   (inputs
    (list info-files
          publishfreeze
          genofreeze
          probesetfreeze
          investigators
          gene-chip))
   (outputs
    `(#:documentation ,documentation
      #:rdf ,output))))