aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMunyoki Kilyungi2023-08-21 14:54:21 +0300
committerMunyoki Kilyungi2023-08-21 14:56:57 +0300
commit51b3c0548c98e0bc05e11a89cbf6b75d31b9f8d5 (patch)
treeab3d7c6f589ed8480f0a9d451566681bcfd8eaaf
parent849874fdfe11003f05abe5f82efde974a8c8a388 (diff)
downloadgn-transform-databases-51b3c0548c98e0bc05e11a89cbf6b75d31b9f8d5.tar.gz
Remove "dump-" prefix
Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
-rw-r--r--dump/schema-dump.scm20
-rw-r--r--dump/special-forms.scm36
-rwxr-xr-xexamples/dataset-metadata.scm (renamed from examples/dump-dataset-metadata.scm)30
-rwxr-xr-xexamples/generif.scm (renamed from examples/dump-generif.scm)18
-rwxr-xr-xexamples/genotype.scm (renamed from examples/dump-genotype.scm)10
-rwxr-xr-xexamples/phenotype.scm (renamed from examples/dump-phenotype.scm)10
-rwxr-xr-xexamples/probeset-data.scm (renamed from examples/dump-probeset-data.scm)20
-rwxr-xr-xexamples/probeset.scm (renamed from examples/dump-probeset.scm)10
-rwxr-xr-xexamples/publication.scm (renamed from examples/dump-publication.scm)10
-rwxr-xr-xexamples/species-metadata.scm (renamed from examples/dump-species-metadata.scm)28
-rwxr-xr-xexamples/tissue.scm (renamed from examples/dump-tissue.scm)10
-rwxr-xr-xjson-dump.scm8
12 files changed, 101 insertions, 109 deletions
diff --git a/dump/schema-dump.scm b/dump/schema-dump.scm
index 86626f4..525bf65 100644
--- a/dump/schema-dump.scm
+++ b/dump/schema-dump.scm
@@ -7,7 +7,7 @@
#:use-module (dump table))
-(define (dump-table-fields db table)
+(define (table-fields db table)
(format #t "* ~a~%" table)
(match (sql-find db
(select-query ((TableComments Comment))
@@ -41,10 +41,10 @@
(select-query ((TableComments TableName))
(TableComments))))
-(define (dump-schema-annotations db)
+(define (schema-annotations db)
(call-with-target-database
(lambda (db)
- (for-each (cut dump-table-fields db <>)
+ (for-each (cut table-fields db <>)
(get-tables-from-comments db)))))
(define (tables db)
@@ -70,7 +70,7 @@ is a <table> object."
(format #f "WHERE table_schema = '~a'"
(assq-ref %connection-settings 'sql-database))))))
-(define (dump-schema db)
+(define (schema db)
(let ((tables (tables db)))
(for-each (lambda (table)
(let ((table-id (string->identifier
@@ -94,13 +94,13 @@ is a <table> object."
(table-columns table))))
tables)))
-(define* (dump-data-table db table-name data-field
- #:optional (default-dump-directory ""))
- (let ((dump-directory (string-append default-dump-directory "/" table-name))
+(define* (data-table db table-name data-field
+ #:optional (default-directory ""))
+ (let ((directory (string-append default-directory "/" table-name))
(port #f)
(current-strain-id #f))
- (unless (file-exists? dump-directory)
- (mkdir dump-directory))
+ (unless (file-exists? directory)
+ (mkdir directory))
(sql-for-each (match-lambda
(((_ . strain-id)
(_ . value))
@@ -112,7 +112,7 @@ is a <table> object."
;; If no file is open, open new file.
(unless port
(set! current-strain-id strain-id)
- (let ((filename (string-append dump-directory
+ (let ((filename (string-append directory
"/" (number->string strain-id))))
(display filename (current-error-port))
(newline (current-error-port))
diff --git a/dump/special-forms.scm b/dump/special-forms.scm
index f771cc1..2650580 100644
--- a/dump/special-forms.scm
+++ b/dump/special-forms.scm
@@ -14,11 +14,11 @@
find-clause
remove-namespace
column-id
- dump-id
+ id
syntax-let
blank-node
map-alist
- dump-with-documentation
+ with-documentation
define-transformer))
(define (key->assoc-ref alist x)
@@ -276,12 +276,12 @@ ALIST field-name) forms."
"user2" table-name)
"__" column-name)))
- (define (dump-id dump-table predicate)
+ (define (id table predicate)
(symbol->string
(string->identifier
"dump"
(string-append
- dump-table "_" (remove-namespace (symbol->string predicate)))))))
+ table "_" (remove-namespace (symbol->string predicate)))))))
(define-syntax blank-node
(syntax-rules ()
@@ -396,11 +396,11 @@ must be remedied."
((triples subject predicate-clauses ...) (triples)
(find-clause #'(clauses ...) 'triples)))
#`(define* (name db #:key
- (dump-metadata? #f)
- (dump-data? #t)
- (dump-documentation? #f))
- (when dump-metadata?
- #,@(let ((dump-table (symbol->string (syntax->datum #'primary-table)))
+ (metadata? #f)
+ (data? #t)
+ (documentation? #f))
+ (when metadata?
+ #,@(let ((table (symbol->string (syntax->datum #'primary-table)))
(subject-type (any (lambda (predicate)
(syntax-case predicate (rdf:type)
((_ rdf:type type) #'type)
@@ -427,14 +427,14 @@ must be remedied."
(datum->syntax
x (column-id query (symbol->string alias))))))
(collect-fields predicate-clause))))
- #,(dump-id dump-table (syntax->datum #'predicate)))
+ #,(id table (syntax->datum #'predicate)))
;; Automatically create domain triples
;; for predicates.
(when #,subject-type
(triple 'predicate 'rdfs:domain #,subject-type))))
(_ (error "Invalid predicate clause:" predicate-clause))))
#'(predicate-clauses ...))))
- (when dump-documentation?
+ (when documentation?
(format #t "~%## '~a'~%~%" (syntax->datum #'name))
#,(syntax-case #'schema-triples-clause (schema-triples)
((schema-triples (triple-subject triple-predicate triple-object) ...)
@@ -477,7 +477,7 @@ The above query results to triples that have the form:
'()
#,@(field->datum #'(predicate-clauses ...))))
(format #t "```~%Here's an example query:~%~%```sparql~%")
- (dump-documentation?)
+ (documentation?)
(newline)
(let* ((result
(map-alist (sql-find
@@ -520,7 +520,7 @@ The above query results to triples that have the form:
(primary-table other-tables ...)
tables-raw ...)))
(format #t "```~%~%"))
- (when dump-data?
+ (when data?
#,(syntax-case #'schema-triples-clause (schema-triples)
((schema-triples (triple-subject triple-predicate triple-object) ...)
#`(for-each triple
@@ -545,7 +545,7 @@ The above query results to triples that have the form:
(cadr kv)
default)))
-(define-syntax dump-with-documentation
+(define-syntax with-documentation
(syntax-rules ()
((_ (key value) ...)
(let* ((alist `((key . ,value) ...))
@@ -567,9 +567,9 @@ The above query results to triples that have the form:
(for-each
(lambda (proc)
(proc db
- #:dump-metadata? #f
- #:dump-data? #f
- #:dump-documentation?
+ #:metadata? #f
+ #:data? #f
+ #:documentation?
(lambda () (for-each
(match-lambda
((k v)
@@ -593,7 +593,7 @@ The above query results to triples that have the form:
(newline)
(for-each
(lambda (proc)
- (proc db #:dump-metadata? table-metadata?))
+ (proc db #:metadata? table-metadata?))
inputs))
#:encoding "UTF-8")))))))
diff --git a/examples/dump-dataset-metadata.scm b/examples/dataset-metadata.scm
index 6173201..5680a2b 100755
--- a/examples/dump-dataset-metadata.scm
+++ b/examples/dataset-metadata.scm
@@ -34,7 +34,7 @@
(list first-name last-name (fix-email-id email))
"_")))
-(define-transformer dump-investigators
+(define-transformer investigators
;; There are a few duplicate entries. We group by email to
;; deduplicate.
(tables (Investigators)
@@ -56,7 +56,7 @@
(set v:postal-code (field Investigators ZipCode))
(set v:country-name (field Investigators Country))))
-(define-transformer dump-gene-chip
+(define-transformer gene-chip
(tables (GeneChip
(left-join Species "USING (SpeciesId)")))
(schema-triples
@@ -87,7 +87,7 @@
(ontology 'geoSeries:
(string-trim-both (field GeneChip GeoPlatform))))))
-(define-transformer dump-info-files
+(define-transformer info-files
(tables (InfoFiles
(left-join PublishFreeze "ON InfoFiles.InfoPageName = PublishFreeze.Name")
(left-join GenoFreeze "ON InfoFiles.InfoPageName = GenoFreeze.Name")
@@ -251,7 +251,7 @@
(field Datasets Acknowledgment)))))
;; These are phenotype datasets that don't have Infofile metadata
-(define-transformer dump-publishfreeze
+(define-transformer publishfreeze
(tables (PublishFreeze
(left-join InfoFiles "ON InfoFiles.InfoPageName = PublishFreeze.Name")
(left-join InbredSet "ON PublishFreeze.InbredSetId = InbredSet.InbredSetId"))
@@ -277,7 +277,7 @@
#:separator ""
#:proc string-capitalize-first))))
-(define-transformer dump-genofreeze
+(define-transformer genofreeze
(tables (GenoFreeze
(left-join InfoFiles "ON InfoFiles.InfoPageName = GenoFreeze.Name")
(left-join InbredSet "ON GenoFreeze.InbredSetId = InbredSet.InbredSetId"))
@@ -308,7 +308,7 @@
#:proc string-capitalize-first))))
;; Molecular Traits are also referred to as ProbeSets
-(define-transformer dump-probesetfreeze
+(define-transformer probesetfreeze
(tables (ProbeSetFreeze
(left-join InfoFiles "ON InfoFiles.InfoPageName = ProbeSetFreeze.Name")
(left-join ProbeFreeze "USING (ProbeFreezeId)")
@@ -356,7 +356,7 @@
-(dump-with-documentation
+(with-documentation
(name "Info files / Investigators Metadata")
(connection %connection-settings)
(table-metadata? #f)
@@ -375,13 +375,13 @@
("taxon:" "<http://purl.uniprot.org/taxonomy/>")
("dct:" "<http://purl.org/dc/terms/>")))
(inputs
- (list dump-info-files
- dump-publishfreeze
- dump-genofreeze
- dump-probesetfreeze
- dump-investigators
- dump-gene-chip))
+ (list info-files
+ publishfreeze
+ genofreeze
+ probesetfreeze
+ investigators
+ gene-chip))
(outputs
- '(#:documentation "./docs/dump-info-pages.md"
- #:rdf "/export/data/genenetwork-virtuoso/dump-info-pages.ttl")))
+ '(#:documentation "./docs/info-pages.md"
+ #:rdf "/export/data/genenetwork-virtuoso/info-pages.ttl")))
diff --git a/examples/dump-generif.scm b/examples/generif.scm
index f754274..0b3c8e4 100755
--- a/examples/dump-generif.scm
+++ b/examples/generif.scm
@@ -18,7 +18,7 @@
-(define-transformer dump-genewiki-symbols
+(define-transformer genewiki-symbols
(tables (GeneRIF_BASIC
(left-join Species "USING (SpeciesId)"))
"GROUP BY GeneId ORDER BY BINARY symbol")
@@ -37,7 +37,7 @@
(string-split (field ("GROUP_CONCAT(DISTINCT TaxID)" taxId))
#\,)))))
-(define-transformer dump-gn-genewiki-entries
+(define-transformer gn-genewiki-entries
(tables (GeneRIF
(left-join GeneRIF_BASIC "USING (symbol)")
(left-join Species "ON Species.SpeciesId = GeneRIF.SpeciesId")
@@ -101,7 +101,7 @@
(cut string-split-substring <> "::::")
comments))))))
-(define-transformer dump-ncbi-genewiki-entries
+(define-transformer ncbi-genewiki-entries
(tables (GeneRIF_BASIC)
"GROUP BY GeneId, comment, createtime")
(schema-triples
@@ -124,7 +124,7 @@
-(dump-with-documentation
+(with-documentation
(name "GeneRIF Metadata")
(connection %connection-settings)
(table-metadata? #f)
@@ -141,10 +141,10 @@
("xsd:" "<http://www.w3.org/2001/XMLSchema#>")
("owl:" "<http://www.w3.org/2002/07/owl#>")))
(inputs
- (list ;; dump-genewiki-symbols
- dump-gn-genewiki-entries
- ;; dump-ncbi-genewiki-entries
+ (list ;; genewiki-symbols
+ gn-genewiki-entries
+ ;; ncbi-genewiki-entries
))
(outputs
- '(#:documentation "./docs/dump-generif.md"
- #:rdf "./verified-data/dump-generif.ttl")))
+ '(#:documentation "./docs/generif.md"
+ #:rdf "./verified-data/generif.ttl")))
diff --git a/examples/dump-genotype.scm b/examples/genotype.scm
index a055039..63b85a7 100755
--- a/examples/dump-genotype.scm
+++ b/examples/genotype.scm
@@ -30,7 +30,7 @@
["Bat (Glossophaga soricina)" "Glossophaga soricina"]
[str str]))
-(define-transformer dump-genotypes
+(define-transformer genotypes
(tables (Geno
(left-join Species "USING (SpeciesId)")))
(schema-triples
@@ -103,7 +103,7 @@
-(dump-with-documentation
+(with-documentation
(name "Genotype Metadata")
(connection %connection-settings)
(table-metadata? #f)
@@ -118,7 +118,7 @@
("skos:" "<http://www.w3.org/2004/02/skos/core#>")
("xsd:" "<http://www.w3.org/2001/XMLSchema#>")))
(inputs
- (list dump-genotypes))
+ (list genotypes))
(outputs
- '(#:documentation "./docs/dump-genotype.md"
- #:rdf "/export/data/genenetwork-virtuoso/dump-genotype.ttl")))
+ '(#:documentation "./docs/genotype.md"
+ #:rdf "/export/data/genenetwork-virtuoso/genotype.ttl")))
diff --git a/examples/dump-phenotype.scm b/examples/phenotype.scm
index b7ae003..1c68159 100755
--- a/examples/dump-phenotype.scm
+++ b/examples/phenotype.scm
@@ -19,7 +19,7 @@
read))
-(define-transformer dump-phenotypes
+(define-transformer phenotypes
(tables (PublishXRef
(left-join InbredSet "ON InbredSet.InbredSetId = PublishXRef.InbredSetId")
(left-join Publication "ON Publication.Id = PublishXRef.PublicationId")
@@ -102,7 +102,7 @@
-(dump-with-documentation
+(with-documentation
(name "Phenotypes Metadata")
(connection %connection-settings)
(table-metadata? #f)
@@ -119,7 +119,7 @@
("pubmed:" "<http://rdf.ncbi.nlm.nih.gov/pubmed/>")))
(inputs
(list
- dump-phenotypes))
+ phenotypes))
(outputs
- '(#:documentation "./docs/dump-phenotype.md"
- #:rdf "/export/data/genenetwork-virtuoso/dump-phenotype.ttl")))
+ '(#:documentation "./docs/phenotype.md"
+ #:rdf "/export/data/genenetwork-virtuoso/phenotype.ttl")))
diff --git a/examples/dump-probeset-data.scm b/examples/probeset-data.scm
index 55f3f4b..d46bcda 100755
--- a/examples/dump-probeset-data.scm
+++ b/examples/probeset-data.scm
@@ -18,7 +18,7 @@
-(define-transformer dump-probeset-data
+(define-transformer probeset-data
(tables (ProbeSetXRef
(left-join ProbeSet "ON ProbeSetXRef.ProbeSetId = ProbeSet.Id")
(left-join ProbeSetFreeze "ON ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id"))
@@ -74,19 +74,11 @@
(set gnt:pValue (annotate-field (field ("IFNULL(ProbeSetXRef.pValue, '')" pValue))
'^^xsd:double))
(set gnt:h2 (annotate-field (field ("IFNULL(ProbeSetXRef.h2, '')" h2))
- '^^xsd:double))
- (set gnt:belongsToDataset
- (string->identifier
- ""
- (regexp-substitute/global #f "[^A-Za-z0-9:]"
- (field ProbeSetFreeze Name)
- 'pre "_" 'post)
- #:separator ""
- #:proc string-capitalize-first))))
+ '^^xsd:double))))
-(dump-with-documentation
+(with-documentation
(name "Probeset Summary Statistics")
(connection %connection-settings)
(table-metadata? #f)
@@ -100,7 +92,7 @@
("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>")
("xsd:" "<http://www.w3.org/2001/XMLSchema#>")))
(inputs
- (list dump-probeset-data))
+ (list probeset-data))
(outputs
- '(#:documentation "./docs/dump-probeset-summary-stats.md"
- #:rdf "./verified-data/dump-probeset-summary-stats.ttl")))
+ '(#:documentation "./docs/probeset-summary-stats.md"
+ #:rdf "./verified-data/probeset-summary-stats.ttl")))
diff --git a/examples/dump-probeset.scm b/examples/probeset.scm
index 3a55506..68ddb59 100755
--- a/examples/dump-probeset.scm
+++ b/examples/probeset.scm
@@ -17,7 +17,7 @@
read))
-(define-transformer dump-probeset
+(define-transformer probeset
(tables (ProbeSet
(left-join GeneChip "ON GeneChip.Id = ProbeSet.ChipId")))
(schema-triples
@@ -156,7 +156,7 @@
-(dump-with-documentation
+(with-documentation
(name "ProbeSet Metadata")
(connection %connection-settings)
(table-metadata? #f)
@@ -178,7 +178,7 @@
("xsd:" "<http://www.w3.org/2001/XMLSchema#>")
("skos:" "<http://www.w3.org/2004/02/skos/core#>")))
(inputs
- (list dump-probeset))
+ (list probeset))
(outputs
- '(#:documentation "./docs/dump-probeset.md"
- #:rdf "./verified-data/dump-probeset.ttl")))
+ '(#:documentation "./docs/probeset.md"
+ #:rdf "./verified-data/probeset.ttl")))
diff --git a/examples/dump-publication.scm b/examples/publication.scm
index 1881872..313ee96 100755
--- a/examples/dump-publication.scm
+++ b/examples/publication.scm
@@ -18,7 +18,7 @@
-(define-transformer dump-publication
+(define-transformer publication
(tables (Publication))
(triples
(let ((pmid (field
@@ -59,7 +59,7 @@
-(dump-with-documentation
+(with-documentation
(name "Publications Metadata")
(connection %connection-settings)
(table-metadata? #f)
@@ -75,7 +75,7 @@
("xsd:" "<http://www.w3.org/2001/XMLSchema#>")
("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>")))
(inputs
- (list dump-publication))
+ (list publication))
(outputs
- '(#:documentation "./docs/dump-publication.md"
- #:rdf "./verified-data/dump-publication.ttl")))
+ '(#:documentation "./docs/publication.md"
+ #:rdf "./verified-data/publication.ttl")))
diff --git a/examples/dump-species-metadata.scm b/examples/species-metadata.scm
index b0ac6f8..f3794b8 100755
--- a/examples/dump-species-metadata.scm
+++ b/examples/species-metadata.scm
@@ -28,7 +28,7 @@
["Bat (Glossophaga soricina)" "Glossophaga soricina"]
[str str]))
-(define-transformer dump-species
+(define-transformer species
(tables (Species))
(schema-triples
(gnc:species a skos:Concept)
@@ -97,7 +97,7 @@ At this point it is not very clear how Name, Name2, Symbol and Alias are used.
!#
-(define-transformer dump-strain
+(define-transformer strain
(tables (Strain
(left-join Species "ON Strain.SpeciesId = Species.SpeciesId")))
(schema-triples
@@ -129,7 +129,7 @@ At this point it is not very clear how Name, Name2, Symbol and Alias are used.
(set gnt:alias (sanitize-rdf-string (field ("IF ((Strain.Alias != Strain.Name), Strain.Alias, '')" Alias))))
(set gnt:symbol (field ("IF ((Strain.Symbol != Strain.Name), Strain.Symbol, '')" Symbol)))))
-(define-transformer dump-mapping-method
+(define-transformer mapping-method
(tables (MappingMethod))
(schema-triples
(gnc:mappingMethod a skos:Concept)
@@ -140,7 +140,7 @@ At this point it is not very clear how Name, Name2, Symbol and Alias are used.
(set rdfs:label (field MappingMethod Name))))
-(define-transformer dump-inbred-set
+(define-transformer inbred-set
(tables (InbredSet
(left-join Species "ON InbredSet.SpeciesId=Species.Id")
(left-join MappingMethod
@@ -152,7 +152,7 @@ At this point it is not very clear how Name, Name2, Symbol and Alias are used.
(gnt:geneticType rdfs:domain gnc:set)
(gnt:code a owl:ObjectProperty)
(gnt:code rdfs:domain gnc:set)
- ;; Already defined as an owl prop in dump-species
+ ;; Already defined as an owl prop in species
(gnt:family rdfs:domain gnc:set)
(gnt:phenotype a owl:ObjectProperty)
(gnt:phenotype rdfs:domain gnc:set)
@@ -189,7 +189,7 @@ At this point it is not very clear how Name, Name2, Symbol and Alias are used.
molecularTrait))
"||")))))
-(define-transformer dump-avg-method
+(define-transformer avg-method
;; The Name and Normalization fields seem to be the same. Dump only
;; the Name field.
(tables (AvgMethod))
@@ -201,7 +201,7 @@ At this point it is not very clear how Name, Name2, Symbol and Alias are used.
-(dump-with-documentation
+(with-documentation
(name "Species Metadata")
(connection %connection-settings)
(table-metadata? #f)
@@ -216,11 +216,11 @@ At this point it is not very clear how Name, Name2, Symbol and Alias are used.
("taxon:" "<http://purl.uniprot.org/taxonomy/>")))
(inputs
(list
- dump-inbred-set
- dump-species
- dump-strain
- dump-mapping-method
- dump-avg-method))
+ inbred-set
+ species
+ strain
+ mapping-method
+ avg-method))
(outputs
- '(#:documentation "./docs/dump-species-metadata.md"
- #:rdf "/export/data/genenetwork-virtuoso/dump-species-metadata.ttl")))
+ '(#:documentation "./docs/species-metadata.md"
+ #:rdf "/export/data/genenetwork-virtuoso/species-metadata.ttl")))
diff --git a/examples/dump-tissue.scm b/examples/tissue.scm
index 3658a26..8ce96c8 100755
--- a/examples/dump-tissue.scm
+++ b/examples/tissue.scm
@@ -18,7 +18,7 @@
-(define-transformer dump-tissue
+(define-transformer tissue
;; The Name and TissueName fields seem to be identical. BIRN_lex_ID
;; and BIRN_lex_Name are mostly NULL.
(tables (Tissue))
@@ -32,7 +32,7 @@
-(dump-with-documentation
+(with-documentation
(name "Tissue Metadata")
(connection %connection-settings)
(table-metadata? #f)
@@ -44,7 +44,7 @@
("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>")
("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>")))
(inputs
- (list dump-tissue))
+ (list tissue))
(outputs
- '(#:documentation "./docs/dump-tissue.md"
- #:rdf "./verified-data/dump-tissue.ttl")))
+ '(#:documentation "./docs/tissue.md"
+ #:rdf "./verified-data/tissue.ttl")))
diff --git a/json-dump.scm b/json-dump.scm
index ccb64bc..8625139 100755
--- a/json-dump.scm
+++ b/json-dump.scm
@@ -8,7 +8,7 @@
-(define %dump-directory
+(define %directory
(list-ref (command-line) 2))
(define %data-directory
@@ -55,9 +55,9 @@ inside it."
result)
(file-system-fold enter? leaf down up skip error 0 path))
-(define (dump-rdf path)
+(define (rdf path)
(with-output-to-file
- (string-append %dump-directory "/sampledata.ttl")
+ (string-append %directory "/sampledata.ttl")
(lambda ()
(prefix "gn:" "<http://genenetwork.org/>")
(newline)
@@ -70,4 +70,4 @@ inside it."
(newline))
(json-metadata->rdf file))))))))
-(dump-rdf %data-directory)
+(rdf %data-directory)