diff options
-rw-r--r-- | dump/schema-dump.scm | 20 | ||||
-rw-r--r-- | dump/special-forms.scm | 36 | ||||
-rwxr-xr-x | examples/dataset-metadata.scm (renamed from examples/dump-dataset-metadata.scm) | 30 | ||||
-rwxr-xr-x | examples/generif.scm (renamed from examples/dump-generif.scm) | 18 | ||||
-rwxr-xr-x | examples/genotype.scm (renamed from examples/dump-genotype.scm) | 10 | ||||
-rwxr-xr-x | examples/phenotype.scm (renamed from examples/dump-phenotype.scm) | 10 | ||||
-rwxr-xr-x | examples/probeset-data.scm (renamed from examples/dump-probeset-data.scm) | 20 | ||||
-rwxr-xr-x | examples/probeset.scm (renamed from examples/dump-probeset.scm) | 10 | ||||
-rwxr-xr-x | examples/publication.scm (renamed from examples/dump-publication.scm) | 10 | ||||
-rwxr-xr-x | examples/species-metadata.scm (renamed from examples/dump-species-metadata.scm) | 28 | ||||
-rwxr-xr-x | examples/tissue.scm (renamed from examples/dump-tissue.scm) | 10 | ||||
-rwxr-xr-x | json-dump.scm | 8 |
12 files changed, 101 insertions, 109 deletions
diff --git a/dump/schema-dump.scm b/dump/schema-dump.scm index 86626f4..525bf65 100644 --- a/dump/schema-dump.scm +++ b/dump/schema-dump.scm @@ -7,7 +7,7 @@ #:use-module (dump table)) -(define (dump-table-fields db table) +(define (table-fields db table) (format #t "* ~a~%" table) (match (sql-find db (select-query ((TableComments Comment)) @@ -41,10 +41,10 @@ (select-query ((TableComments TableName)) (TableComments)))) -(define (dump-schema-annotations db) +(define (schema-annotations db) (call-with-target-database (lambda (db) - (for-each (cut dump-table-fields db <>) + (for-each (cut table-fields db <>) (get-tables-from-comments db))))) (define (tables db) @@ -70,7 +70,7 @@ is a <table> object." (format #f "WHERE table_schema = '~a'" (assq-ref %connection-settings 'sql-database)))))) -(define (dump-schema db) +(define (schema db) (let ((tables (tables db))) (for-each (lambda (table) (let ((table-id (string->identifier @@ -94,13 +94,13 @@ is a <table> object." (table-columns table)))) tables))) -(define* (dump-data-table db table-name data-field - #:optional (default-dump-directory "")) - (let ((dump-directory (string-append default-dump-directory "/" table-name)) +(define* (data-table db table-name data-field + #:optional (default-directory "")) + (let ((directory (string-append default-directory "/" table-name)) (port #f) (current-strain-id #f)) - (unless (file-exists? dump-directory) - (mkdir dump-directory)) + (unless (file-exists? directory) + (mkdir directory)) (sql-for-each (match-lambda (((_ . strain-id) (_ . value)) @@ -112,7 +112,7 @@ is a <table> object." ;; If no file is open, open new file. (unless port (set! current-strain-id strain-id) - (let ((filename (string-append dump-directory + (let ((filename (string-append directory "/" (number->string strain-id)))) (display filename (current-error-port)) (newline (current-error-port)) diff --git a/dump/special-forms.scm b/dump/special-forms.scm index f771cc1..2650580 100644 --- a/dump/special-forms.scm +++ b/dump/special-forms.scm @@ -14,11 +14,11 @@ find-clause remove-namespace column-id - dump-id + id syntax-let blank-node map-alist - dump-with-documentation + with-documentation define-transformer)) (define (key->assoc-ref alist x) @@ -276,12 +276,12 @@ ALIST field-name) forms." "user2" table-name) "__" column-name))) - (define (dump-id dump-table predicate) + (define (id table predicate) (symbol->string (string->identifier "dump" (string-append - dump-table "_" (remove-namespace (symbol->string predicate))))))) + table "_" (remove-namespace (symbol->string predicate))))))) (define-syntax blank-node (syntax-rules () @@ -396,11 +396,11 @@ must be remedied." ((triples subject predicate-clauses ...) (triples) (find-clause #'(clauses ...) 'triples))) #`(define* (name db #:key - (dump-metadata? #f) - (dump-data? #t) - (dump-documentation? #f)) - (when dump-metadata? - #,@(let ((dump-table (symbol->string (syntax->datum #'primary-table))) + (metadata? #f) + (data? #t) + (documentation? #f)) + (when metadata? + #,@(let ((table (symbol->string (syntax->datum #'primary-table))) (subject-type (any (lambda (predicate) (syntax-case predicate (rdf:type) ((_ rdf:type type) #'type) @@ -427,14 +427,14 @@ must be remedied." (datum->syntax x (column-id query (symbol->string alias)))))) (collect-fields predicate-clause)))) - #,(dump-id dump-table (syntax->datum #'predicate))) + #,(id table (syntax->datum #'predicate))) ;; Automatically create domain triples ;; for predicates. (when #,subject-type (triple 'predicate 'rdfs:domain #,subject-type)))) (_ (error "Invalid predicate clause:" predicate-clause)))) #'(predicate-clauses ...)))) - (when dump-documentation? + (when documentation? (format #t "~%## '~a'~%~%" (syntax->datum #'name)) #,(syntax-case #'schema-triples-clause (schema-triples) ((schema-triples (triple-subject triple-predicate triple-object) ...) @@ -477,7 +477,7 @@ The above query results to triples that have the form: '() #,@(field->datum #'(predicate-clauses ...)))) (format #t "```~%Here's an example query:~%~%```sparql~%") - (dump-documentation?) + (documentation?) (newline) (let* ((result (map-alist (sql-find @@ -520,7 +520,7 @@ The above query results to triples that have the form: (primary-table other-tables ...) tables-raw ...))) (format #t "```~%~%")) - (when dump-data? + (when data? #,(syntax-case #'schema-triples-clause (schema-triples) ((schema-triples (triple-subject triple-predicate triple-object) ...) #`(for-each triple @@ -545,7 +545,7 @@ The above query results to triples that have the form: (cadr kv) default))) -(define-syntax dump-with-documentation +(define-syntax with-documentation (syntax-rules () ((_ (key value) ...) (let* ((alist `((key . ,value) ...)) @@ -567,9 +567,9 @@ The above query results to triples that have the form: (for-each (lambda (proc) (proc db - #:dump-metadata? #f - #:dump-data? #f - #:dump-documentation? + #:metadata? #f + #:data? #f + #:documentation? (lambda () (for-each (match-lambda ((k v) @@ -593,7 +593,7 @@ The above query results to triples that have the form: (newline) (for-each (lambda (proc) - (proc db #:dump-metadata? table-metadata?)) + (proc db #:metadata? table-metadata?)) inputs)) #:encoding "UTF-8"))))))) diff --git a/examples/dump-dataset-metadata.scm b/examples/dataset-metadata.scm index 6173201..5680a2b 100755 --- a/examples/dump-dataset-metadata.scm +++ b/examples/dataset-metadata.scm @@ -34,7 +34,7 @@ (list first-name last-name (fix-email-id email)) "_"))) -(define-transformer dump-investigators +(define-transformer investigators ;; There are a few duplicate entries. We group by email to ;; deduplicate. (tables (Investigators) @@ -56,7 +56,7 @@ (set v:postal-code (field Investigators ZipCode)) (set v:country-name (field Investigators Country)))) -(define-transformer dump-gene-chip +(define-transformer gene-chip (tables (GeneChip (left-join Species "USING (SpeciesId)"))) (schema-triples @@ -87,7 +87,7 @@ (ontology 'geoSeries: (string-trim-both (field GeneChip GeoPlatform)))))) -(define-transformer dump-info-files +(define-transformer info-files (tables (InfoFiles (left-join PublishFreeze "ON InfoFiles.InfoPageName = PublishFreeze.Name") (left-join GenoFreeze "ON InfoFiles.InfoPageName = GenoFreeze.Name") @@ -251,7 +251,7 @@ (field Datasets Acknowledgment))))) ;; These are phenotype datasets that don't have Infofile metadata -(define-transformer dump-publishfreeze +(define-transformer publishfreeze (tables (PublishFreeze (left-join InfoFiles "ON InfoFiles.InfoPageName = PublishFreeze.Name") (left-join InbredSet "ON PublishFreeze.InbredSetId = InbredSet.InbredSetId")) @@ -277,7 +277,7 @@ #:separator "" #:proc string-capitalize-first)))) -(define-transformer dump-genofreeze +(define-transformer genofreeze (tables (GenoFreeze (left-join InfoFiles "ON InfoFiles.InfoPageName = GenoFreeze.Name") (left-join InbredSet "ON GenoFreeze.InbredSetId = InbredSet.InbredSetId")) @@ -308,7 +308,7 @@ #:proc string-capitalize-first)))) ;; Molecular Traits are also referred to as ProbeSets -(define-transformer dump-probesetfreeze +(define-transformer probesetfreeze (tables (ProbeSetFreeze (left-join InfoFiles "ON InfoFiles.InfoPageName = ProbeSetFreeze.Name") (left-join ProbeFreeze "USING (ProbeFreezeId)") @@ -356,7 +356,7 @@ -(dump-with-documentation +(with-documentation (name "Info files / Investigators Metadata") (connection %connection-settings) (table-metadata? #f) @@ -375,13 +375,13 @@ ("taxon:" "<http://purl.uniprot.org/taxonomy/>") ("dct:" "<http://purl.org/dc/terms/>"))) (inputs - (list dump-info-files - dump-publishfreeze - dump-genofreeze - dump-probesetfreeze - dump-investigators - dump-gene-chip)) + (list info-files + publishfreeze + genofreeze + probesetfreeze + investigators + gene-chip)) (outputs - '(#:documentation "./docs/dump-info-pages.md" - #:rdf "/export/data/genenetwork-virtuoso/dump-info-pages.ttl"))) + '(#:documentation "./docs/info-pages.md" + #:rdf "/export/data/genenetwork-virtuoso/info-pages.ttl"))) diff --git a/examples/dump-generif.scm b/examples/generif.scm index f754274..0b3c8e4 100755 --- a/examples/dump-generif.scm +++ b/examples/generif.scm @@ -18,7 +18,7 @@ -(define-transformer dump-genewiki-symbols +(define-transformer genewiki-symbols (tables (GeneRIF_BASIC (left-join Species "USING (SpeciesId)")) "GROUP BY GeneId ORDER BY BINARY symbol") @@ -37,7 +37,7 @@ (string-split (field ("GROUP_CONCAT(DISTINCT TaxID)" taxId)) #\,))))) -(define-transformer dump-gn-genewiki-entries +(define-transformer gn-genewiki-entries (tables (GeneRIF (left-join GeneRIF_BASIC "USING (symbol)") (left-join Species "ON Species.SpeciesId = GeneRIF.SpeciesId") @@ -101,7 +101,7 @@ (cut string-split-substring <> "::::") comments)))))) -(define-transformer dump-ncbi-genewiki-entries +(define-transformer ncbi-genewiki-entries (tables (GeneRIF_BASIC) "GROUP BY GeneId, comment, createtime") (schema-triples @@ -124,7 +124,7 @@ -(dump-with-documentation +(with-documentation (name "GeneRIF Metadata") (connection %connection-settings) (table-metadata? #f) @@ -141,10 +141,10 @@ ("xsd:" "<http://www.w3.org/2001/XMLSchema#>") ("owl:" "<http://www.w3.org/2002/07/owl#>"))) (inputs - (list ;; dump-genewiki-symbols - dump-gn-genewiki-entries - ;; dump-ncbi-genewiki-entries + (list ;; genewiki-symbols + gn-genewiki-entries + ;; ncbi-genewiki-entries )) (outputs - '(#:documentation "./docs/dump-generif.md" - #:rdf "./verified-data/dump-generif.ttl"))) + '(#:documentation "./docs/generif.md" + #:rdf "./verified-data/generif.ttl"))) diff --git a/examples/dump-genotype.scm b/examples/genotype.scm index a055039..63b85a7 100755 --- a/examples/dump-genotype.scm +++ b/examples/genotype.scm @@ -30,7 +30,7 @@ ["Bat (Glossophaga soricina)" "Glossophaga soricina"] [str str])) -(define-transformer dump-genotypes +(define-transformer genotypes (tables (Geno (left-join Species "USING (SpeciesId)"))) (schema-triples @@ -103,7 +103,7 @@ -(dump-with-documentation +(with-documentation (name "Genotype Metadata") (connection %connection-settings) (table-metadata? #f) @@ -118,7 +118,7 @@ ("skos:" "<http://www.w3.org/2004/02/skos/core#>") ("xsd:" "<http://www.w3.org/2001/XMLSchema#>"))) (inputs - (list dump-genotypes)) + (list genotypes)) (outputs - '(#:documentation "./docs/dump-genotype.md" - #:rdf "/export/data/genenetwork-virtuoso/dump-genotype.ttl"))) + '(#:documentation "./docs/genotype.md" + #:rdf "/export/data/genenetwork-virtuoso/genotype.ttl"))) diff --git a/examples/dump-phenotype.scm b/examples/phenotype.scm index b7ae003..1c68159 100755 --- a/examples/dump-phenotype.scm +++ b/examples/phenotype.scm @@ -19,7 +19,7 @@ read)) -(define-transformer dump-phenotypes +(define-transformer phenotypes (tables (PublishXRef (left-join InbredSet "ON InbredSet.InbredSetId = PublishXRef.InbredSetId") (left-join Publication "ON Publication.Id = PublishXRef.PublicationId") @@ -102,7 +102,7 @@ -(dump-with-documentation +(with-documentation (name "Phenotypes Metadata") (connection %connection-settings) (table-metadata? #f) @@ -119,7 +119,7 @@ ("pubmed:" "<http://rdf.ncbi.nlm.nih.gov/pubmed/>"))) (inputs (list - dump-phenotypes)) + phenotypes)) (outputs - '(#:documentation "./docs/dump-phenotype.md" - #:rdf "/export/data/genenetwork-virtuoso/dump-phenotype.ttl"))) + '(#:documentation "./docs/phenotype.md" + #:rdf "/export/data/genenetwork-virtuoso/phenotype.ttl"))) diff --git a/examples/dump-probeset-data.scm b/examples/probeset-data.scm index 55f3f4b..d46bcda 100755 --- a/examples/dump-probeset-data.scm +++ b/examples/probeset-data.scm @@ -18,7 +18,7 @@ -(define-transformer dump-probeset-data +(define-transformer probeset-data (tables (ProbeSetXRef (left-join ProbeSet "ON ProbeSetXRef.ProbeSetId = ProbeSet.Id") (left-join ProbeSetFreeze "ON ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id")) @@ -74,19 +74,11 @@ (set gnt:pValue (annotate-field (field ("IFNULL(ProbeSetXRef.pValue, '')" pValue)) '^^xsd:double)) (set gnt:h2 (annotate-field (field ("IFNULL(ProbeSetXRef.h2, '')" h2)) - '^^xsd:double)) - (set gnt:belongsToDataset - (string->identifier - "" - (regexp-substitute/global #f "[^A-Za-z0-9:]" - (field ProbeSetFreeze Name) - 'pre "_" 'post) - #:separator "" - #:proc string-capitalize-first)))) + '^^xsd:double)))) -(dump-with-documentation +(with-documentation (name "Probeset Summary Statistics") (connection %connection-settings) (table-metadata? #f) @@ -100,7 +92,7 @@ ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>") ("xsd:" "<http://www.w3.org/2001/XMLSchema#>"))) (inputs - (list dump-probeset-data)) + (list probeset-data)) (outputs - '(#:documentation "./docs/dump-probeset-summary-stats.md" - #:rdf "./verified-data/dump-probeset-summary-stats.ttl"))) + '(#:documentation "./docs/probeset-summary-stats.md" + #:rdf "./verified-data/probeset-summary-stats.ttl"))) diff --git a/examples/dump-probeset.scm b/examples/probeset.scm index 3a55506..68ddb59 100755 --- a/examples/dump-probeset.scm +++ b/examples/probeset.scm @@ -17,7 +17,7 @@ read)) -(define-transformer dump-probeset +(define-transformer probeset (tables (ProbeSet (left-join GeneChip "ON GeneChip.Id = ProbeSet.ChipId"))) (schema-triples @@ -156,7 +156,7 @@ -(dump-with-documentation +(with-documentation (name "ProbeSet Metadata") (connection %connection-settings) (table-metadata? #f) @@ -178,7 +178,7 @@ ("xsd:" "<http://www.w3.org/2001/XMLSchema#>") ("skos:" "<http://www.w3.org/2004/02/skos/core#>"))) (inputs - (list dump-probeset)) + (list probeset)) (outputs - '(#:documentation "./docs/dump-probeset.md" - #:rdf "./verified-data/dump-probeset.ttl"))) + '(#:documentation "./docs/probeset.md" + #:rdf "./verified-data/probeset.ttl"))) diff --git a/examples/dump-publication.scm b/examples/publication.scm index 1881872..313ee96 100755 --- a/examples/dump-publication.scm +++ b/examples/publication.scm @@ -18,7 +18,7 @@ -(define-transformer dump-publication +(define-transformer publication (tables (Publication)) (triples (let ((pmid (field @@ -59,7 +59,7 @@ -(dump-with-documentation +(with-documentation (name "Publications Metadata") (connection %connection-settings) (table-metadata? #f) @@ -75,7 +75,7 @@ ("xsd:" "<http://www.w3.org/2001/XMLSchema#>") ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>"))) (inputs - (list dump-publication)) + (list publication)) (outputs - '(#:documentation "./docs/dump-publication.md" - #:rdf "./verified-data/dump-publication.ttl"))) + '(#:documentation "./docs/publication.md" + #:rdf "./verified-data/publication.ttl"))) diff --git a/examples/dump-species-metadata.scm b/examples/species-metadata.scm index b0ac6f8..f3794b8 100755 --- a/examples/dump-species-metadata.scm +++ b/examples/species-metadata.scm @@ -28,7 +28,7 @@ ["Bat (Glossophaga soricina)" "Glossophaga soricina"] [str str])) -(define-transformer dump-species +(define-transformer species (tables (Species)) (schema-triples (gnc:species a skos:Concept) @@ -97,7 +97,7 @@ At this point it is not very clear how Name, Name2, Symbol and Alias are used. !# -(define-transformer dump-strain +(define-transformer strain (tables (Strain (left-join Species "ON Strain.SpeciesId = Species.SpeciesId"))) (schema-triples @@ -129,7 +129,7 @@ At this point it is not very clear how Name, Name2, Symbol and Alias are used. (set gnt:alias (sanitize-rdf-string (field ("IF ((Strain.Alias != Strain.Name), Strain.Alias, '')" Alias)))) (set gnt:symbol (field ("IF ((Strain.Symbol != Strain.Name), Strain.Symbol, '')" Symbol))))) -(define-transformer dump-mapping-method +(define-transformer mapping-method (tables (MappingMethod)) (schema-triples (gnc:mappingMethod a skos:Concept) @@ -140,7 +140,7 @@ At this point it is not very clear how Name, Name2, Symbol and Alias are used. (set rdfs:label (field MappingMethod Name)))) -(define-transformer dump-inbred-set +(define-transformer inbred-set (tables (InbredSet (left-join Species "ON InbredSet.SpeciesId=Species.Id") (left-join MappingMethod @@ -152,7 +152,7 @@ At this point it is not very clear how Name, Name2, Symbol and Alias are used. (gnt:geneticType rdfs:domain gnc:set) (gnt:code a owl:ObjectProperty) (gnt:code rdfs:domain gnc:set) - ;; Already defined as an owl prop in dump-species + ;; Already defined as an owl prop in species (gnt:family rdfs:domain gnc:set) (gnt:phenotype a owl:ObjectProperty) (gnt:phenotype rdfs:domain gnc:set) @@ -189,7 +189,7 @@ At this point it is not very clear how Name, Name2, Symbol and Alias are used. molecularTrait)) "||"))))) -(define-transformer dump-avg-method +(define-transformer avg-method ;; The Name and Normalization fields seem to be the same. Dump only ;; the Name field. (tables (AvgMethod)) @@ -201,7 +201,7 @@ At this point it is not very clear how Name, Name2, Symbol and Alias are used. -(dump-with-documentation +(with-documentation (name "Species Metadata") (connection %connection-settings) (table-metadata? #f) @@ -216,11 +216,11 @@ At this point it is not very clear how Name, Name2, Symbol and Alias are used. ("taxon:" "<http://purl.uniprot.org/taxonomy/>"))) (inputs (list - dump-inbred-set - dump-species - dump-strain - dump-mapping-method - dump-avg-method)) + inbred-set + species + strain + mapping-method + avg-method)) (outputs - '(#:documentation "./docs/dump-species-metadata.md" - #:rdf "/export/data/genenetwork-virtuoso/dump-species-metadata.ttl"))) + '(#:documentation "./docs/species-metadata.md" + #:rdf "/export/data/genenetwork-virtuoso/species-metadata.ttl"))) diff --git a/examples/dump-tissue.scm b/examples/tissue.scm index 3658a26..8ce96c8 100755 --- a/examples/dump-tissue.scm +++ b/examples/tissue.scm @@ -18,7 +18,7 @@ -(define-transformer dump-tissue +(define-transformer tissue ;; The Name and TissueName fields seem to be identical. BIRN_lex_ID ;; and BIRN_lex_Name are mostly NULL. (tables (Tissue)) @@ -32,7 +32,7 @@ -(dump-with-documentation +(with-documentation (name "Tissue Metadata") (connection %connection-settings) (table-metadata? #f) @@ -44,7 +44,7 @@ ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>") ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>"))) (inputs - (list dump-tissue)) + (list tissue)) (outputs - '(#:documentation "./docs/dump-tissue.md" - #:rdf "./verified-data/dump-tissue.ttl"))) + '(#:documentation "./docs/tissue.md" + #:rdf "./verified-data/tissue.ttl"))) diff --git a/json-dump.scm b/json-dump.scm index ccb64bc..8625139 100755 --- a/json-dump.scm +++ b/json-dump.scm @@ -8,7 +8,7 @@ -(define %dump-directory +(define %directory (list-ref (command-line) 2)) (define %data-directory @@ -55,9 +55,9 @@ inside it." result) (file-system-fold enter? leaf down up skip error 0 path)) -(define (dump-rdf path) +(define (rdf path) (with-output-to-file - (string-append %dump-directory "/sampledata.ttl") + (string-append %directory "/sampledata.ttl") (lambda () (prefix "gn:" "<http://genenetwork.org/>") (newline) @@ -70,4 +70,4 @@ inside it." (newline)) (json-metadata->rdf file)))))))) -(dump-rdf %data-directory) +(rdf %data-directory) |