aboutsummaryrefslogtreecommitdiff
path: root/dump
diff options
context:
space:
mode:
authorMunyoki Kilyungi2023-08-21 15:03:20 +0300
committerMunyoki Kilyungi2023-08-21 15:06:06 +0300
commit8e1e4cceab516afab46ccced63ca9edab663ca11 (patch)
treecad625c3ecf0a555d7b56b777cdade535cb30d07 /dump
parent51b3c0548c98e0bc05e11a89cbf6b75d31b9f8d5 (diff)
downloadgn-transform-databases-8e1e4cceab516afab46ccced63ca9edab663ca11.tar.gz
Rename dump -> transform
Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
Diffstat (limited to 'dump')
-rw-r--r--dump/schema-dump.scm125
-rw-r--r--dump/special-forms.scm599
-rw-r--r--dump/sql.scm114
-rw-r--r--dump/string-similarity.scm37
-rw-r--r--dump/strings.scm101
-rw-r--r--dump/table.scm28
-rw-r--r--dump/triples.scm110
7 files changed, 0 insertions, 1114 deletions
diff --git a/dump/schema-dump.scm b/dump/schema-dump.scm
deleted file mode 100644
index 525bf65..0000000
--- a/dump/schema-dump.scm
+++ /dev/null
@@ -1,125 +0,0 @@
-(define-module (dump schema)
- #:use-module (ice-9 match)
- #:use-module (ice-9 srfi-26)
- #:use-module (dump sql)
- #:use-module (dump triples)
- #:use-module (dump strings)
- #:use-module (dump table))
-
-
-(define (table-fields db table)
- (format #t "* ~a~%" table)
- (match (sql-find db
- (select-query ((TableComments Comment))
- (TableComments)
- (format #f "WHERE TableName = '~a'" table)))
- ((("Comment" . comment))
- (format #t "~a~%" comment)))
- (sql-for-each (lambda (row)
- (match row
- ((("TableField" . table-field)
- ("Foreign_Key" . foreign-key)
- ("Annotation" . annotation))
- (format #t "** ~a~%" (substring table-field (1+ (string-length table))))
- (unless (string-null? foreign-key)
- (format #t "Foreign key to ~a~%" foreign-key))
- (unless (string-null? annotation)
- (display annotation)
- (newline)))))
- db
- (select-query ((TableFieldAnnotation TableField)
- (TableFieldAnnotation Foreign_Key)
- (TableFieldAnnotation Annotation))
- (TableFieldAnnotation)
- (format #f "WHERE TableField LIKE '~a.%'" table)))
- (newline))
-
-(define (get-tables-from-comments db)
- (sql-map (match-lambda
- ((("TableName" . table)) table))
- db
- (select-query ((TableComments TableName))
- (TableComments))))
-
-(define (schema-annotations db)
- (call-with-target-database
- (lambda (db)
- (for-each (cut table-fields db <>)
- (get-tables-from-comments db)))))
-
-(define (tables db)
- "Return list of all tables in DB. Each element of the returned list
-is a <table> object."
- (map (lambda (table)
- (set-table-columns table
- (sql-map (lambda (row)
- (make-column (assoc-ref row "Field")
- (assoc-ref row "Type")))
- db
- (format #f "SHOW COLUMNS FROM ~a" (table-name table)))))
- (sql-map (lambda (row)
- (make-table (assoc-ref row "table_name")
- ;; FIXME: This is probably correct only for
- ;; MyISAM tables.
- (assoc-ref row "data_length")
- #f))
- db
- (select-query ((information_schema.tables table_name)
- (information_schema.tables data_length))
- (information_schema.tables)
- (format #f "WHERE table_schema = '~a'"
- (assq-ref %connection-settings 'sql-database))))))
-
-(define (schema db)
- (let ((tables (tables db)))
- (for-each (lambda (table)
- (let ((table-id (string->identifier
- "table"
- ;; We downcase table names in
- ;; identifiers. So, we distinguish
- ;; between the user and User tables.
- (if (string=? (table-name table) "User")
- "user2"
- (table-name table)))))
- (triple table-id 'rdf:type 'gn:sqlTable)
- (triple table-id 'gn:name (table-name table))
- (triple table-id 'gn:hasSize (table-size table))
- (for-each (lambda (column)
- (let ((column-id (column-id (table-name table)
- (column-name column))))
- (triple column-id 'rdf:type 'gn:sqlTableField)
- (triple column-id 'gn:name (column-name column))
- (triple column-id 'gn:sqlFieldType (column-type column))
- (triple table-id 'gn:hasField column-id)))
- (table-columns table))))
- tables)))
-
-(define* (data-table db table-name data-field
- #:optional (default-directory ""))
- (let ((directory (string-append default-directory "/" table-name))
- (port #f)
- (current-strain-id #f))
- (unless (file-exists? directory)
- (mkdir directory))
- (sql-for-each (match-lambda
- (((_ . strain-id)
- (_ . value))
- ;; Close file if new strain.
- (when (and port
- (not (= current-strain-id strain-id)))
- (close-port port)
- (set! port #f))
- ;; If no file is open, open new file.
- (unless port
- (set! current-strain-id strain-id)
- (let ((filename (string-append directory
- "/" (number->string strain-id))))
- (display filename (current-error-port))
- (newline (current-error-port))
- (set! port (open-output-file filename))))
- (display value port)
- (newline port)))
- db
- (format #f "SELECT StrainId, ~a FROM ~a ORDER BY StrainId"
- data-field table-name))
- (close-port port)))
diff --git a/dump/special-forms.scm b/dump/special-forms.scm
deleted file mode 100644
index 2650580..0000000
--- a/dump/special-forms.scm
+++ /dev/null
@@ -1,599 +0,0 @@
-(define-module (dump special-forms)
- #:use-module (srfi srfi-1)
- #:use-module (ice-9 match)
- #:use-module (srfi srfi-26)
- #:use-module (dump sql)
- #:use-module (dump table)
- #:use-module (dump triples)
- #:export (translate-forms
- collect-forms
- collect-keys
- field->key
- field->assoc-ref
- collect-fields
- find-clause
- remove-namespace
- column-id
- id
- syntax-let
- blank-node
- map-alist
- with-documentation
- define-transformer))
-
-(define (key->assoc-ref alist x)
- "Recursively translate (key k) forms in source X to (assoc-ref ALIST
-k) forms."
- (translate-forms 'key
- ;; (syntax-rules (key)
- ;; ((key k) (assoc-ref alist k)))
- (lambda (x)
- (syntax-case x (key)
- ((key k) #`(assoc-ref #,alist 'k))))
- x))
-
-(define (alist-delete* alist keys)
- "Delete entries from ALIST whose key is equal (in the sense of
-equal?) to any in KEYS, a list."
- (remove (match-lambda
- ((key . value)
- (member key keys))
- (x (error "malformed alist element" x)))
- alist))
-
-(define (translate-forms from translator x)
- "Recursively pass (FROM ...) forms in source X to TRANSLATOR, and
-replace them with the return value."
- (syntax-case x ()
- ;; Handle translation base case.
- ((head tail ...) (eq? (syntax->datum #'head)
- from)
- (translator x))
- ;; Recurse.
- ((head tail ...)
- (cons (translate-forms from translator #'head)
- (map (cut translate-forms from translator <>)
- #'(tail ...))))
- ;; Handle leaf base case.
- (leaf #'leaf)))
-
-(define (collect-forms target x)
- "Recursively collect (TARGET ...) forms in source X and return them
-as a list."
- (syntax-case x ()
- ;; Handle collection base case.
- ((head tail ...) (eq? (syntax->datum #'head)
- target)
- (list x))
- ;; Recurse.
- ((head tail ...)
- (append (collect-forms target #'head)
- (append-map (cut collect-forms target <>) #'(tail ...))))
- ;; Handle leaf base case.
- (leaf (list))))
-
-(define (collect-keys x)
- "Recursively collect (key k) forms from source X and return as a
-list of all K."
- (map (syntax-rules (key)
- ((key k) 'k))
- (collect-forms 'key x)))
-
-(define-syntax map-alist
- (lambda (x)
- "Transform (aka map) ALIST, an association list, into another
-association list. The returned association list may contain multiple
-associations for the same key. equal? is used in all association list
-key comparisons.
-
-Syntax:
-
-(map-alist alist
- (verb key expression) ...
- (else=> proc))
-
-VERB must be one of set, filter-set, multiset and remove.
-
-For the set VERB, KEY is set to the result of evaluating
-EXPRESSION. Multiple set verbs on the same key will result in multiple
-associations for that key.
-
-For the filter-set VERB, KEY is set to the result of evaluating
-EXPRESSION only if that result is not #f.
-
-For the multiset VERB, EXPRESSION must return a list and KEY is
-associated multiple times once with each element of the returned list.
-
-For the remove VERB, KEY is discarded.
-
-EXPRESSIONs must reference elements of ALIST using (key k) forms where
-K is the key to be referenced from ALIST. K must not be quoted. That
-is, if K is the symbol 'bar, it must be referenced as (key bar), not
-as (key 'bar).
-
-The else=> clause is optional.
-
-If the else=> clause is present, PROC is passed all pairs of ALIST
-that are not set by an earlier (verb key expression) action. PROC must
-return #f or a pair to replace its input pair. If PROC returns #f,
-that pair is discarded.
-
-If the else=> clause is absent, all unset pairs are discarded.
-
-Example:
-
-(map-alist '((\"foo\" . 1)
- (bar . 2)
- (foobar . 5)
- (fubar . 3))
- (set spam (1+ (key \"foo\")))
- (set ham (* 2 (key bar)))
- (set eggs (* 3 (key bar)))
- (set aal (+ (key \"foo\")
- (key bar)))
- (multiset vel (iota (* 2 (key bar))))
- (remove foobar)
- (else=> (match-lambda
- ((key . value)
- (cons key (expt 2 value))))))
-=> ((spam . 2) (ham . 4) (eggs . 6) (aal . 3)
- (vel . 0) (vel . 1) (vel . 2) (vel . 3) (fubar . 8))"
- (syntax-case x ()
- ((_ alist actions ...)
- ;; TODO: Check that all actions are valid.
- #`(let ((evaluated-alist alist))
- (append (remove (match-lambda
- ;; Filter out results of filter-set actions.
- ((key . #f)
- (member key '#,(filter-map (lambda (action)
- (syntax-case action (filter-set)
- ((filter-set key expression) #'key)
- (_ #f)))
- #'(actions ...))))
- (_ #f))
- ;; Do set and filter-set.
- `#,(filter-map (lambda (action)
- (syntax-case action (set filter-set)
- ((set key expression)
- #`(key . ,#,(key->assoc-ref #'evaluated-alist #'expression)))
- ((filter-set key expression)
- #`(key . ,#,(key->assoc-ref #'evaluated-alist #'expression)))
- (_ #f)))
- #'(actions ...)))
- ;; Do multiset.
- #,@(filter-map (lambda (action)
- (syntax-case action (multiset)
- ((multiset key expression)
- #`(map (cut cons 'key <>)
- #,(key->assoc-ref #'evaluated-alist #'expression)))
- (_ #f)))
- #'(actions ...))
- ;; Apply else=> procedure on unspecified keys. If
- ;; no else=> procedure is specified, delete
- ;; unspecified keys.
- (filter-map #,(or (any (lambda (action)
- (syntax-case action (else=>)
- ((else=> proc) #'proc)
- (_ #f)))
- #'(actions ...))
- #'(const #f))
- ;; The unspecified part of the input
- ;; alist
- (alist-delete* evaluated-alist
- (list
- ;; Keys that were referenced
- #,@(append-map (lambda (action)
- (syntax-case action ()
- ((_ key expression)
- (collect-keys #'expression))
- (_ '())))
- #'(actions ...))
- ;; Keys that were deleted
- #,@(filter-map (lambda (action)
- (syntax-case action (remove)
- ((remove key) #''key)
- (_ #f)))
- #'(actions ...))
- ;; Keys that were set
- #,@(filter-map (lambda (action)
- (syntax-case action ()
- ((_ key expression) #''key)
- (_ #f)))
- #'(actions ...)))))))))))
-
-
-
-(eval-when (expand load )
- (define (field->datum x)
- (translate-forms
- 'field
- (lambda (x)
- (syntax-case x (field)
- ((field (query alias))
- #`(format #f "~a" (syntax->datum #'alias)))
- ((field table column)
- #`(format #f "~a(~a)"
- (syntax->datum #'table)
- (syntax->datum #'column)))
- ((field table column alias)
- #`(format #f "~a(~a)"
- (syntax->datum #'table)
- (syntax->datum #'alias)))))
- x))
-
- (define (field->key x)
- (translate-forms 'field
- (lambda (x)
- #`(key #,(symbol->string
- (syntax->datum
- ((syntax-rules (field)
- ((field (query alias)) alias)
- ((field table column) column)
- ((field table column alias) alias))
- x)))))
- x))
-
- (define (field->assoc-ref alist x)
- "Recursively translate field references in source X to (assoc-ref
-ALIST field-name) forms."
- (translate-forms 'field
- (lambda (x)
- #`(assoc-ref #,alist
- #,(symbol->string
- (syntax->datum
- ((syntax-rules (field)
- ((field (query alias)) alias)
- ((field table column) column)
- ((field table column alias) alias))
- x)))))
- x))
-
- (define (collect-fields x)
- (map (syntax-rules (field)
- ((field reference ...)
- (reference ...)))
- (collect-forms 'field x)))
-
- (define (find-clause clauses key)
- "Find (KEY ...) clause among CLAUSES, a list of syntax forms."
- (find (lambda (x)
- (syntax-case x ()
- ((clause-key _ ...)
- (eq? (syntax->datum #'clause-key)
- key))))
- clauses))
-
- (define (remove-namespace str)
- "Remove RDF namespace prefix from STR."
- (substring str (1+ (string-index str #\:))))
-
- (define (column-id table-name column-name)
- (string->identifier
- "field" (string-append
- ;; We downcase table names in identifiers. So, we
- ;; distinguish between the user and User tables.
- (if (string=? table-name "User")
- "user2" table-name)
- "__" column-name)))
-
- (define (id table predicate)
- (symbol->string
- (string->identifier
- "dump"
- (string-append
- table "_" (remove-namespace (symbol->string predicate)))))))
-
-(define-syntax blank-node
- (syntax-rules ()
- "Allow having set and multiset within the context of a blank-node"
- [(_ (op predicate object) ...)
- (let [(node (string-join
- (filter-map (match-lambda
- ((pred . obj)
- (match obj
- ((and (? string? obj)
- (? string-null? obj))
- #f)
- ((? symbol? obj)
- (format #f "~a ~a" pred (symbol->string obj)))
- (_
- (format #f "~a ~s" pred obj)))))
- (map-alist '()
- (op predicate object) ...))
- " ; "))]
- (if (string-null? node)
- ""
- (format #f "[ ~a ]" node)))]))
-
-(define-syntax syntax-let
- (syntax-rules ()
- "Like match-let, but for syntax.
-
-(syntax-let ((pattern literals expression))
- body ...)
-≡
-(syntax-case expression ()
- (pattern literals
- body ...))
-
-literals is optional. So,
-
-(syntax-let ((pattern expression))
- body ...)
-≡
-(syntax-case expression ()
- (pattern
- body ...))
-"
- ((_ () body ...)
- (begin body ...))
- ((_ ((pattern expression)
- bindings ...)
- body ...)
- (syntax-case expression ()
- (pattern
- (syntax-let (bindings ...)
- body ...))))
- ((_ ((pattern literals expression)
- bindings ...)
- body ...)
- (syntax-case expression literals
- (pattern
- (syntax-let (bindings ...)
- body ...))))))
-
-(define-syntax define-transformer
- (lambda (x)
- "Define FUNCTION-NAME, a function that dumps a view of the database.
-
-define-transformer consists of three order-agnostic clauses---tables,
-schema-triples and triples---in the form shown below.
-
-(define-transformer function-name
- (tables (table ...) raw-forms ...)
- (schema-triples
- (subject predicate object) ...)
- (triples subject
- (verb predicate object) ...))
-
-The tables clause specifies the database tables to be joined to
-construct the view to be dumped. TABLE must be either of the form
-TABLE-NAME or of the form (JOIN-OPERATOR TABLE-NAME
-RAW-CONDITION). TABLE-NAME must, obviously, be the name of the
-table. JOIN-OPERATOR must be one of join, left-join and
-inner-join. RAW-CONDITION should be the join condition as a raw
-string. This is usually something like
-\"USING (SpeciesId)\". RAW-FORMS are expressions that must evaluate to
-strings to be appended to the SQL query.
-
-The schema-triples clause specifies the list of triples to be written
-once when the dump starts.
-
-The triples clause specifies the triples to be dumped once for each
-row in the view. All triples have a common SUBJECT. The (verb
-predicate object) clauses are described below.
-
-VERB must either be set or multiset. For the set VERB, a single triple
-(SUBJECT PREDICATE OBJECT-VALUE) is written where OBJECT-VALUE is the
-result of evaluating OBJECT. For the multiset VERB, OBJECT must
-evaluate to a list, and a triple (SUBJECT PREDICATE
-OBJECT-VALUE-ELEMENT) is created for each element OBJECT-VALUE-ELEMENT
-of that list.
-
-The SUBJECT and OBJECT expressions in the triples clause must
-reference database fields using a (field TABLE COLUMN) clause where
-TABLE and COLUMN refer to the table and column of the field being
-referenced. Database fields can also be referenced using (field TABLE
-COLUMN ALIAS) where ALIAS is an alias for that column in the SQL
-query. Specification of ALIAS is indeed a leak in the abstraction, and
-must be remedied."
- (syntax-case x (tables schema-triples triples)
- ((_ name clauses ...)
- (syntax-let (((tables (primary-table other-tables ...) tables-raw ...) (tables)
- (find-clause #'(clauses ...) 'tables))
- (schema-triples-clause (or (find-clause #'(clauses ...) 'schema-triples)
- #'(schema-triples)))
- ((triples subject predicate-clauses ...) (triples)
- (find-clause #'(clauses ...) 'triples)))
- #`(define* (name db #:key
- (metadata? #f)
- (data? #t)
- (documentation? #f))
- (when metadata?
- #,@(let ((table (symbol->string (syntax->datum #'primary-table)))
- (subject-type (any (lambda (predicate)
- (syntax-case predicate (rdf:type)
- ((_ rdf:type type) #'type)
- (_ #f)))
- #'(predicate-clauses ...))))
- (map (lambda (predicate-clause)
- (syntax-case predicate-clause ()
- ((_ predicate _)
- ;; Dump metadata about the dump itself.
- #`(begin
- (scm->triples
- (map-alist '()
- (set rdf:type 'gn-id:dump)
- (set gn-term:createsPredicate 'predicate)
- (filter-set gn-term:forSubjectType #,subject-type)
- (multiset gn-term:dependsOn
- '#,(map (lambda (field)
- (match (syntax->datum field)
- ((table-name column-name _ ...)
- (datum->syntax
- x (column-id (symbol->string table-name)
- (symbol->string column-name))))
- (((query alias))
- (datum->syntax
- x (column-id query (symbol->string alias))))))
- (collect-fields predicate-clause))))
- #,(id table (syntax->datum #'predicate)))
- ;; Automatically create domain triples
- ;; for predicates.
- (when #,subject-type
- (triple 'predicate 'rdfs:domain #,subject-type))))
- (_ (error "Invalid predicate clause:" predicate-clause))))
- #'(predicate-clauses ...))))
- (when documentation?
- (format #t "~%## '~a'~%~%" (syntax->datum #'name))
- #,(syntax-case #'schema-triples-clause (schema-triples)
- ((schema-triples (triple-subject triple-predicate triple-object) ...)
- #`(begin
- (when (not (list 'triple-subject ...))
- (format #t "## Schema Triples:~%~%```text~%")
- (for-each (lambda (s p o)
- (format #t "~a -> ~a -> ~a~%" s p o))
- (list 'triple-subject ...)
- (list 'triple-predicate ...)
- (list 'triple-object ...))
- (format #t "```"))))
- (_ (error "Invalid schema triples clause:" #'schema-triples-clause)))
- (format #t "## Generated Triples:
-
-The following SQL query was executed:
-
-```sql
-~a
-```
-
-The above query results to triples that have the form:
-
-```text
-"
- (select-query #,(collect-fields #'(subject predicate-clauses ...))
- (primary-table other-tables ...)
- tables-raw ...))
- (for-each (match-lambda
- ((predicate . object)
- (format #t "~a -> ~a -> ~a ~%"
- (if (symbol? #,(field->datum #'subject))
- (symbol->string #,(field->datum #'subject))
- #,(field->datum #'subject))
- predicate
- (if (symbol? object)
- (symbol->string object)
- object))))
- (map-alist
- '()
- #,@(field->datum #'(predicate-clauses ...))))
- (format #t "```~%Here's an example query:~%~%```sparql~%")
- (documentation?)
- (newline)
- (let* ((result
- (map-alist (sql-find
- db
- (format #f "~a LIMIT 1"
- (select-query #,(collect-fields #'(subject predicate-clauses ...))
- (primary-table other-tables ...)
- tables-raw ...)))
- #,@(field->key #'(predicate-clauses ...))))
- (first-n (list-head result
- (let ((n
- (min 4 (truncate
- (+ (exact-integer-sqrt (length result)) 1)))))
- (if (< n 3)
- (length result)
- n)))))
- (format #t "SELECT * WHERE { ~%")
- (for-each (match-lambda
- ((predicate . object)
- (match object
- ((or (? symbol? object)
- (? (lambda (el) (string-match "^\\[ .* \\]$" el)) object))
- (format #t " ?s ~a ~a .~%" predicate object))
- ((and (? string? object)
- (? (lambda (el) (not (string-null? el))) object))
- (format #t " ?s ~a \"~a\" .~%" predicate object))
- (_ ""))))
- first-n)
- (format #t " ?s ?p ?o .~%}~%```~%"))
- (format #t "~%Expected Result:~%~%```rdf~%")
- (sql-for-each (lambda (row)
- (scm->triples
- (map-alist row #,@(field->key #'(predicate-clauses ...)))
- #,(field->assoc-ref #'row #'subject)
- (lambda (s p o)
- (triple s p o))))
- db
- (format #f "~a LIMIT 1"
- (select-query #,(collect-fields #'(subject predicate-clauses ...))
- (primary-table other-tables ...)
- tables-raw ...)))
- (format #t "```~%~%"))
- (when data?
- #,(syntax-case #'schema-triples-clause (schema-triples)
- ((schema-triples (triple-subject triple-predicate triple-object) ...)
- #`(for-each triple
- (list 'triple-subject ...)
- (list 'triple-predicate ...)
- (list 'triple-object ...)))
- (_ (error "Invalid schema triples clause:" #'schema-triples-clause)))
- (sql-for-each (lambda (row)
- (scm->triples
- (map-alist row #,@(field->key #'(predicate-clauses ...)))
- #,(field->assoc-ref #'row #'subject)))
- db
- (select-query #,(collect-fields #'(subject predicate-clauses ...))
- (primary-table other-tables ...)
- tables-raw ...)))
- )))
- (_ (error "Invalid define-transformer syntax:" (syntax->datum x))))))
-
-(define (get-keyword-value args keyword default)
- (let ((kv (memq keyword args)))
- (if (and kv (>= (length kv) 2))
- (cadr kv)
- default)))
-
-(define-syntax with-documentation
- (syntax-rules ()
- ((_ (key value) ...)
- (let* ((alist `((key . ,value) ...))
- (name (assoc-ref alist 'name))
- (connection (assoc-ref alist 'connection))
- (table-metadata? (assoc-ref alist 'table-metadata?))
- (prefixes (assoc-ref alist 'prefixes))
- (inputs (assoc-ref alist 'inputs))
- (outputs (assoc-ref alist 'outputs))
- (rdf-path (get-keyword-value outputs #:rdf ""))
- (doc-path (get-keyword-value outputs #:documentation "")))
- (call-with-target-database
- connection
- (lambda (db)
- (with-output-to-file ;
- doc-path
- (lambda ()
- (format #t "# ~a" name)
- (for-each
- (lambda (proc)
- (proc db
- #:metadata? #f
- #:data? #f
- #:documentation?
- (lambda () (for-each
- (match-lambda
- ((k v)
- (begin
- (prefix k v #f))))
- prefixes))))
- inputs))
- #:encoding "UTF-8")
-
- ;; Dumping the actual data
- (with-output-to-file
- rdf-path
- (lambda ()
- ;; Add the prefixes
- (for-each
- (match-lambda
- ((k v)
- (begin
- (prefix k v))))
- prefixes)
- (newline)
- (for-each
- (lambda (proc)
- (proc db #:metadata? table-metadata?))
- inputs))
- #:encoding "UTF-8")))))))
-
diff --git a/dump/sql.scm b/dump/sql.scm
deleted file mode 100644
index 9e6b21a..0000000
--- a/dump/sql.scm
+++ /dev/null
@@ -1,114 +0,0 @@
-;;; Database helpers
-;;;
-;;; Most of these functions should have been a part of
-;;; guile-dbi. Never too late to contribute upstream!
-
-(define-module (dump sql)
- #:use-module (srfi srfi-1)
- #:use-module (srfi srfi-26)
- #:use-module (ice-9 match)
- #:use-module (dbi dbi)
- #:export (select-query
- call-with-database
- call-with-target-database
- sql-exec
- sql-fold
- sql-map
- sql-for-each
- sql-find))
-
-;; A half-baked macro to make SQL SELECT queries a bit more
-;; S-expression friendly
-(define-syntax select-query
- (lambda (x)
- (syntax-case x ()
- ((_ fields tables raw-forms ...)
- #`(string-append "SELECT "
- #,(syntax-case #'fields (distinct)
- ((distinct _ ...)
- "DISTINCT ")
- (_ ""))
- #,(string-join (filter-map (match-lambda
- ('distinct #f)
- (((query alias))
- (format #f "~a AS ~a" query alias))
- ((table column)
- (format #f "~a.~a" table column))
- ((table column alias)
- (format #f "~a.~a AS ~a" table column alias))
- (field-spec
- (error "Invalid field specification" field-spec)))
- (syntax->datum #'fields))
- ", ")
- " FROM "
- #,(string-join (map (match-lambda
- ((join table condition)
- (format #f "~a ~a ~a"
- (case join
- ((join) "JOIN")
- ((left-join) "LEFT JOIN")
- ((inner-join) "INNER JOIN")
- (else (error "Invalid join operator" join)))
- table condition))
- ((? symbol? table)
- (symbol->string table))
- (table-spec
- (error "Invalid table specification" table-spec)))
- (syntax->datum #'tables))
- " ")
- #,(syntax-case #'(raw-forms ...) ()
- (() "")
- (_ " "))
- raw-forms ...))
- (_ (error "Invalid SQL select query" (syntax->datum x))))))
-
-(define (call-with-database backend connection-string proc)
- (let ((db #f))
- (dynamic-wind (lambda ()
- (set! db (dbi-open backend connection-string)))
- (cut proc db)
- (cut dbi-close db))))
-
-(define (database-check-status db)
- (match (dbi-get_status db)
- ((code . str)
- (unless (zero? code)
- (error str)))))
-
-(define (sql-exec db statement)
- (dbi-query db statement)
- (database-check-status db))
-
-(define (sql-fold proc init db statement)
- (sql-exec db statement)
- (let loop ((result init))
- (let ((row (dbi-get_row db)))
- (if row
- (loop (proc row result))
- result))))
-
-(define (sql-map proc db statement)
- (sql-fold (lambda (row result)
- (cons (proc row) result))
- (list) db statement))
-
-(define (sql-for-each proc db statement)
- (sql-fold (lambda (row _)
- (proc row))
- #f db statement))
-
-(define (sql-find db statement)
- (sql-exec db statement)
- (dbi-get_row db))
-
-(define (call-with-target-database connection-settings proc)
- (call-with-database "mysql" (string-join
- (list (assq-ref connection-settings 'sql-username)
- (assq-ref connection-settings 'sql-password)
- (assq-ref connection-settings 'sql-database)
- "tcp"
- (assq-ref connection-settings 'sql-host)
- (number->string
- (assq-ref connection-settings 'sql-port)))
- ":")
- proc))
diff --git a/dump/string-similarity.scm b/dump/string-similarity.scm
deleted file mode 100644
index 4bcdf7c..0000000
--- a/dump/string-similarity.scm
+++ /dev/null
@@ -1,37 +0,0 @@
-(define-module (dump string-similarity)
- #:use-module (srfi srfi-1)
- #:export (jaccard-string-similar?))
-
-(define (trigrams str)
- "Return all trigrams in STR."
- (if (< (string-length str) 3)
- '()
- (map (lambda (start)
- (substring str start (+ start 3)))
- (iota (- (string-length str) 2)))))
-
-(define (jaccard-index set1 set2)
- "Return the Jaccard similarity coefficient between lists SET1 and
-SET2. Similarity between null sets is defined to be 0."
- (if (and (null? set1)
- (null? set2))
- 0
- (let ((length-of-intersection (length (lset-intersection equal? set1 set2))))
- (exact->inexact
- (/ length-of-intersection
- (- (+ (length set1) (length set2))
- length-of-intersection))))))
-
-(define (jaccard-string-similarity str1 str2)
- "Return the trigram similarity between strings STR1 and STR2 as
-defined by the Jaccard index."
- (jaccard-index (trigrams (string-downcase str1))
- (trigrams (string-downcase str2))))
-
-(define (jaccard-string-similar? str1 str2)
- "Return #t if STR1 and STR2 have a trigram similarity greater than
-0.8. Else, return #f. The Jaccard index is used as the similarity
-metric."
- (let ((similarity-threshold 0.8))
- (> (jaccard-string-similarity str1 str2)
- similarity-threshold)))
diff --git a/dump/strings.scm b/dump/strings.scm
deleted file mode 100644
index e965f03..0000000
--- a/dump/strings.scm
+++ /dev/null
@@ -1,101 +0,0 @@
-(define-module (dump strings)
- #:use-module (srfi srfi-1)
- #:use-module (srfi srfi-19)
- #:use-module (ice-9 match)
- #:use-module (ice-9 string-fun)
- #:use-module (ice-9 textual-ports)
- #:export (string-blank?
- time-unix->string
- string-blank?
- string-split-substring
- delete-substrings
- replace-substrings
- sanitize-rdf-string
- snake->lower-camel
- string-capitalize-first))
-
-(define (time-unix->string seconds . maybe-format)
- "Given an integer saying the number of seconds since the Unix
-epoch (1970-01-01 00:00:00), SECONDS, format it as a human readable
-date and time-string, possible using the MAYBE-FORMAT."
- (letrec ([time-unix->time-utc
- (lambda (seconds)
- (add-duration
- (date->time-utc (make-date 0 0 0 0 1 1 1970 0))
- (make-time time-duration 0 seconds)))])
- (apply date->string
- (time-utc->date (time-unix->time-utc seconds))
- maybe-format)))
-
-(define (string-blank? str)
- "Return non-#f if STR consists only of whitespace characters."
- (string-every char-set:whitespace str))
-
-(define (string-split-substring str substr)
- "Split the string @var{str} into a list of substrings delimited by the
-substring @var{substr}."
-
- (define substrlen (string-length substr))
- (define strlen (string-length str))
-
- (define (loop index start)
- (cond
- ((>= start strlen) (list ""))
- ((not index) (list (substring str start)))
- (else
- (cons (substring str start index)
- (let ((new-start (+ index substrlen)))
- (loop (string-contains str substr new-start)
- new-start))))))
-
- (cond
- ((string-contains str substr) => (lambda (idx) (loop idx 0)))
- (else (list str))))
-
-(define (delete-substrings str . substrings)
- "Delete SUBSTRINGS, a list of strings, from STR."
- (fold (lambda (substring result)
- (string-replace-substring result substring ""))
- str
- substrings))
-
-(define (replace-substrings str replacement-alist)
- "Replace substrings in STR according to REPLACEMENT-ALIST, an
-association list mapping substrings to their replacements."
- (fold (match-lambda*
- (((substring . replacement) str)
- (string-replace-substring str substring replacement)))
- str
- replacement-alist))
-
-(define (sanitize-rdf-string str)
- (replace-substrings
- (string-trim-both str)
- '(("\r" . "\\r")
- ("\n" . "\\n")
- ("\"" . "'")
- ("\v" . "")
- ("\xad" . "")
- ("\x28" . "")
- ("\x29" . "")
- ("\xa0" . " ")
- ("\x02" . "")
- ("\x01" . ""))))
-
-(define (snake->lower-camel str)
- (let ((char-list (string->list str)))
- (call-with-output-string
- (lambda (port)
- (put-char port (char-downcase (string-ref str 0)))
- (map (lambda (char previous-char)
- (unless (char=? char #\_)
- (put-char port (if (char=? previous-char #\_)
- (char-upcase char)
- char))))
- (drop char-list 1)
- char-list)))))
-
-(define (string-capitalize-first string)
- (string-titlecase
- (string-downcase string) 0 1))
-
diff --git a/dump/table.scm b/dump/table.scm
deleted file mode 100644
index 834c4fd..0000000
--- a/dump/table.scm
+++ /dev/null
@@ -1,28 +0,0 @@
-(define-module (dump table)
- #:use-module (srfi srfi-9 gnu)
- #:export (make-table
- table-name
- table-size
- table-columns
- set-table-columns
- make-column
- column-name
- column-type
- column-dumped?))
-
-(define-immutable-record-type <table>
- (make-table name size columns)
- table?
- (name table-name)
- (size table-size)
- (columns table-columns set-table-columns))
-
-(define-immutable-record-type <column>
- (column-constructor name type dumped?)
- column?
- (name column-name)
- (type column-type)
- (dumped? column-dumped?))
-
-(define* (make-column name type #:optional dumped?)
- (column-constructor name type dumped?))
diff --git a/dump/triples.scm b/dump/triples.scm
deleted file mode 100644
index b0eb1e4..0000000
--- a/dump/triples.scm
+++ /dev/null
@@ -1,110 +0,0 @@
-(define-module (dump triples)
- #:use-module (ice-9 regex)
- #:use-module (ice-9 match)
- #:use-module (dump strings)
- #:export (ontology
- string->identifier
- prefix
- triple
- scm->triples
- annotate-field
- string->binomial-name))
-
-(define (annotate-field field schema)
- (let ([schema (cond ((symbol? schema)
- (symbol->string schema))
- ((string? schema) schema)
- (else
- (error "Use a string/symbol")))]
- [string-field (if (number? field) (number->string field) field)])
- (if (or (and (string? string-field) (string-null? string-field))
- (eq? string-field #f))
- ""
- (string->symbol
- (format #f "~s~a" string-field schema)))))
-
-(define* (string->identifier prefix str
- #:optional #:key
- (ontology "gn:")
- (separator "_")
- (proc string-downcase))
- "Convert STR to a turtle identifier after replacing illegal
-characters with an underscore and prefixing with gn:PREFIX."
- (if (or (and (string? str) (string-null? str))
- (eq? str #f))
- ""
- (string->symbol
- (string-append ontology prefix separator
- (string-delete
- (lambda (c)
- (eq? c #\)))
- (string-map (lambda (c)
- (case c
- ((#\/ #\< #\> #\+ #\( #\space #\@) #\_)
- (else c)))
- (proc
- (string-trim-right str #\.))))))))
-
-
-(define* (prefix prefix iri #:optional (ttl? #t))
- (format #t
- (if ttl?
- "@prefix ~a ~a .~%"
- "PREFIX ~a ~a ~%")
- prefix iri))
-
-(define (ontology prefix value)
- (if (and (string? value) (string-null? value))
- ""
- (string->symbol
- `,(format #f "~a~a" prefix value))))
-
-(define (triple subject predicate object)
- (unless (or (string? subject)
- (symbol? subject))
- (error "Triple subject not a string or symbol:"
- (list subject predicate object)))
- (unless (or (string? predicate)
- (symbol? predicate))
- (error "Triple predicate not a string or symbol:"
- (list subject predicate object)))
- (unless (or (string? object)
- (symbol? object)
- (number? object))
- (error "Triple object not a string, symbol or number:"
- (list subject predicate object)))
- (let ([pattern (match object
- ((or (? symbol? object)
- (? (lambda (el) (string-match "^\\[ .* \\]$" el)) object))
- "~a ~a ~a .~%")
- (_ "~a ~a \"~a\" .~%"))])
- (format #t pattern subject predicate
- (if (symbol? object) (symbol->string object) object))))
-
-(define* (scm->triples alist id
- #:optional
- (fn triple))
- (for-each (match-lambda
- ((predicate . object)
- (when (cond
- ((string? object)
- (not (string-blank? object)))
- (else object))
- (fn id predicate object))))
- alist))
-
-(define (string->binomial-name name)
- (let ((binomial?
- (string-match
- "\\\(.+\\)"
- name)))
- (string->identifier
- ""
- (if binomial?
- (regexp-substitute/global
- #f "[^[:space:]A-Za-z0-9:]"
- (match:substring binomial?)
- 'pre "" 'post)
- name)
- #:separator ""
- #:proc string-capitalize-first)))