From b44b81f070b22c315d381431e39fc80705f11566 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Thu, 29 Jun 2023 19:44:42 +0300 Subject: Add a configuration record-type for for the define macro * dump/special-forms.scm: Import (srfi srfi-9 gnu). (): New record-type. (define-dump): Use the above record-type. --- dump/special-forms.scm | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/dump/special-forms.scm b/dump/special-forms.scm index f9dca91..bd1760b 100644 --- a/dump/special-forms.scm +++ b/dump/special-forms.scm @@ -1,5 +1,6 @@ (define-module (dump special-forms) #:use-module (srfi srfi-1) + #:use-module (srfi srfi-9 gnu) #:use-module (ice-9 match) #:use-module (srfi srfi-26) #:use-module (dump sql) @@ -18,8 +19,23 @@ syntax-let blank-node map-alist + dump-configuration + dump-configuration-table-metadata? + dump-configuration-auto-document-path define-dump)) +(define-immutable-record-type + (%dump-configuration table-metadata? auto-document-path) + dump-configuration? + (table-metadata? dump-configuration-table-metadata?) + (auto-document-path dump-configuration-auto-document-path)) + +(define* (dump-configuration + #:optional (table-metadata? #f) + (auto-document-path #f)) + "Return a new configuration." + (%dump-configuration table-metadata? auto-document-path)) + (define (key->assoc-ref alist x) "Recursively translate (key k) forms in source X to (assoc-ref ALIST k) forms." @@ -377,7 +393,9 @@ must be remedied." #'(schema-triples))) ((triples subject predicate-clauses ...) (triples) (find-clause #'(clauses ...) 'triples))) - #`(define* (name db #:optional (table-metadata? #f)) + #`(define* (name db + #:optional (dump-configuration + (dump-configuration))) #,(syntax-case #'schema-triples-clause (schema-triples) ((schema-triples (triple-subject triple-predicate triple-object) ...) #`(for-each triple @@ -385,7 +403,8 @@ must be remedied." (list 'triple-predicate ...) (list 'triple-object ...))) (_ (error "Invalid schema triples clause:" #'schema-triples-clause))) - (when table-metadata? + (when (dump-configuration-table-metadata? + dump-configuration) #,@(let ((dump-table (symbol->string (syntax->datum #'primary-table))) (subject-type (any (lambda (predicate) (syntax-case predicate (rdf:type) -- cgit v1.2.3 From 1b775aaefc9e2eaf5bfda62ec94856d7c10fce45 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Thu, 29 Jun 2023 22:08:15 +0300 Subject: Optionally self-document while dumping --- dump/special-forms.scm | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/dump/special-forms.scm b/dump/special-forms.scm index bd1760b..0099f73 100644 --- a/dump/special-forms.scm +++ b/dump/special-forms.scm @@ -439,6 +439,26 @@ must be remedied." (triple 'predicate 'rdfs:domain #,subject-type)))) (_ (error "Invalid predicate clause:" predicate-clause)))) #'(predicate-clauses ...)))) + (when (dump-configuration-auto-document-path configuration) + (for-each (match-lambda + ((predicate . object) + (format #f "Subject:~a Predicate:~a Object:~a.~%" + #,(car (collect-keys + (field->key #'subject))) + predicate object))) + (map-alist + '() + #,@(translate-forms 'field + (lambda (x) + (symbol->string + (syntax->datum + ((syntax-rules (field) + ((field (query alias)) alias) + ((field table column) column) + ((field table column alias) alias)) + x)))) + #'(predicate-clauses ...)) + ))) (sql-for-each (lambda (row) (scm->triples (map-alist row #,@(field->key #'(predicate-clauses ...))) -- cgit v1.2.3 From ed440ac1c18058da9a0b7fa2cb62834f20f5d1ee Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Mon, 3 Jul 2023 13:49:39 +0300 Subject: Add table-dump? option to dump-configuration record * dump/special-forms.scm: Export dump-configuration-table-dump?. (): Add dump-configuration-table-dump. (dump-configuration): Make table-dump? default to #t. Signed-off-by: Munyoki Kilyungi --- dump/special-forms.scm | 49 ++++++++++++++++++++++--------------------------- 1 file changed, 22 insertions(+), 27 deletions(-) diff --git a/dump/special-forms.scm b/dump/special-forms.scm index 0099f73..ded0db5 100644 --- a/dump/special-forms.scm +++ b/dump/special-forms.scm @@ -21,6 +21,7 @@ map-alist dump-configuration dump-configuration-table-metadata? + dump-configuration-table-dump? dump-configuration-auto-document-path define-dump)) @@ -28,10 +29,13 @@ (%dump-configuration table-metadata? auto-document-path) dump-configuration? (table-metadata? dump-configuration-table-metadata?) + (table-dump? dump-configuration-table-dump?) (auto-document-path dump-configuration-auto-document-path)) (define* (dump-configuration - #:optional (table-metadata? #f) + #:optional + (table-dump? #t) + (table-metadata? #f) (auto-document-path #f)) "Return a new configuration." (%dump-configuration table-metadata? auto-document-path)) @@ -441,30 +445,21 @@ must be remedied." #'(predicate-clauses ...)))) (when (dump-configuration-auto-document-path configuration) (for-each (match-lambda - ((predicate . object) - (format #f "Subject:~a Predicate:~a Object:~a.~%" - #,(car (collect-keys - (field->key #'subject))) - predicate object))) - (map-alist - '() - #,@(translate-forms 'field - (lambda (x) - (symbol->string - (syntax->datum - ((syntax-rules (field) - ((field (query alias)) alias) - ((field table column) column) - ((field table column alias) alias)) - x)))) - #'(predicate-clauses ...)) - ))) - (sql-for-each (lambda (row) - (scm->triples - (map-alist row #,@(field->key #'(predicate-clauses ...))) - #,(field->assoc-ref #'row #'subject))) - db - (select-query #,(collect-fields #'(subject predicate-clauses ...)) - (primary-table other-tables ...) - tables-raw ...))))) + ((predicate . object) + (format #f "Subject:~a Predicate:~a Object:~a.~%" + #,(car (collect-keys + (field->key #'subject))) + predicate object))) + (map-alist + '() + #,@(translate-forms 'field + (lambda (x) + (symbol->string + (syntax->datum + ((syntax-rules (field) + ((field (query alias)) alias) + ((field table column) column) + ((field table column alias) alias)) + x)))) + #'(predicate-clauses ...))))) (_ (error "Invalid define-dump syntax:" (syntax->datum x)))))) -- cgit v1.2.3 From 909eb42f9cf08cc7c88bca2fc25f3281f9787190 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Mon, 3 Jul 2023 18:48:46 +0300 Subject: Make dumping optional, defaulting to #t There may be a case where one just wants to dump metadata about the s-exps only. --- dump/special-forms.scm | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/dump/special-forms.scm b/dump/special-forms.scm index ded0db5..2b6a0bb 100644 --- a/dump/special-forms.scm +++ b/dump/special-forms.scm @@ -462,4 +462,13 @@ must be remedied." ((field table column alias) alias)) x)))) #'(predicate-clauses ...))))) + (when (dump-configuration-table-dump? configuration) + (sql-for-each (lambda (row) + (scm->triples + (map-alist row #,@(field->key #'(predicate-clauses ...))) + #,(field->assoc-ref #'row #'subject))) + db + (select-query #,(collect-fields #'(subject predicate-clauses ...)) + (primary-table other-tables ...) + tables-raw ...)))))) (_ (error "Invalid define-dump syntax:" (syntax->datum x)))))) -- cgit v1.2.3 From d81fae54dff081fd53c397989b75cdb4e3701313 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Tue, 4 Jul 2023 16:44:55 +0300 Subject: Rename dump-configuration slots * dump/special-forms.scm: Export dump-configuration? Rename dump-configuration-table-metadata -> dump-configuration-triples?; dump-configuration-table-dump -> dump-configuration-table-metadata?; and dump-configuration-auto-document-path -> dump-configuration-auto-documentation-port (): Ditto renames. (dump-configuration): Ditto. Signed-off-by: Munyoki Kilyungi --- dump/special-forms.scm | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/dump/special-forms.scm b/dump/special-forms.scm index 2b6a0bb..5a47b8f 100644 --- a/dump/special-forms.scm +++ b/dump/special-forms.scm @@ -20,25 +20,26 @@ blank-node map-alist dump-configuration + dump-configuration? + dump-configuration-triples? dump-configuration-table-metadata? - dump-configuration-table-dump? - dump-configuration-auto-document-path + dump-configuration-auto-documentation-port define-dump)) (define-immutable-record-type - (%dump-configuration table-metadata? auto-document-path) + (%dump-configuration triples? table-metadata? auto-documentation-port) dump-configuration? + (triples? dump-configuration-triples?) (table-metadata? dump-configuration-table-metadata?) - (table-dump? dump-configuration-table-dump?) - (auto-document-path dump-configuration-auto-document-path)) + (auto-documentation-port dump-configuration-auto-documentation-port)) (define* (dump-configuration #:optional - (table-dump? #t) + (triples? #t) (table-metadata? #f) - (auto-document-path #f)) + (auto-documentation-port #f)) "Return a new configuration." - (%dump-configuration table-metadata? auto-document-path)) + (%dump-configuration triples? table-metadata? auto-documentation-port)) (define (key->assoc-ref alist x) "Recursively translate (key k) forms in source X to (assoc-ref ALIST -- cgit v1.2.3 From 7cf57c30129b6c41205386f4e2c3afaf9c3a80e0 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Tue, 4 Jul 2023 23:25:17 +0300 Subject: Conditionally autogenerate schema triples during dump --- dump/special-forms.scm | 68 +++++++++++++++++++++++++++++++++++--------------- 1 file changed, 48 insertions(+), 20 deletions(-) diff --git a/dump/special-forms.scm b/dump/special-forms.scm index 5a47b8f..d9e9299 100644 --- a/dump/special-forms.scm +++ b/dump/special-forms.scm @@ -444,26 +444,54 @@ must be remedied." (triple 'predicate 'rdfs:domain #,subject-type)))) (_ (error "Invalid predicate clause:" predicate-clause)))) #'(predicate-clauses ...)))) - (when (dump-configuration-auto-document-path configuration) - (for-each (match-lambda - ((predicate . object) - (format #f "Subject:~a Predicate:~a Object:~a.~%" - #,(car (collect-keys - (field->key #'subject))) - predicate object))) - (map-alist - '() - #,@(translate-forms 'field - (lambda (x) - (symbol->string - (syntax->datum - ((syntax-rules (field) - ((field (query alias)) alias) - ((field table column) column) - ((field table column alias) alias)) - x)))) - #'(predicate-clauses ...))))) - (when (dump-configuration-table-dump? configuration) + (when (dump-configuration-auto-documentation-port dump-configuration) + (let ((out + (dump-configuration-auto-documentation-port + dump-configuration))) + (format out "# ~a Metadata~%" (syntax->datum #`name)) + #,(syntax-case #'schema-triples-clause (schema-triples) + ((schema-triples (triple-subject triple-predicate triple-object) ...) + #`(for-each triple + (list 'triple-subject ...) + (list 'triple-predicate ...) + (list 'triple-object ...))) + (_ (error "Invalid schema triples clause:" #'schema-triples-clause))) + (format out "## Generated Triples: + +The following SQL query was executed: + +```sql +~a +``` + +Here are the generated triples: + +" + (select-query #,(collect-fields #'(subject predicate-clauses ...)) + (primary-table other-tables ...) + tables-raw ...)) + (for-each (match-lambda + ((predicate . object) + (format out "~a -> ~a -> ~a ~%" + #,(car (collect-keys + (field->key #'subject))) + predicate object))) + (map-alist + '() + #,@(translate-forms 'field + (lambda (x) + (symbol->string + (syntax->datum + ((syntax-rules (field) + ((field (query alias)) alias) + ((field table column) column) + ((field table column alias) alias)) + x)))) + #'(predicate-clauses ...)))) + ;; To clear the buffer + (force-output out) + )) + (when (dump-configuration-triples? dump-configuration) (sql-for-each (lambda (row) (scm->triples (map-alist row #,@(field->key #'(predicate-clauses ...))) -- cgit v1.2.3 From 2f94863679665ed41b371fdeb499e53c4e429eec Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Tue, 4 Jul 2023 23:26:21 +0300 Subject: Use call-with-target-database when dumping schema annotations * dump/schema-dump.scm (dump-schema-annotations): Replace call-with-genenetwork-database with call-with-target-database. --- dump/schema-dump.scm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dump/schema-dump.scm b/dump/schema-dump.scm index cda3fd0..876eafb 100644 --- a/dump/schema-dump.scm +++ b/dump/schema-dump.scm @@ -38,7 +38,7 @@ (TableComments)))) (define (dump-schema-annotations db) - (call-with-genenetwork-database + (call-with-target-database (lambda (db) (for-each (cut dump-table-fields db <>) (get-tables-from-comments db))))) -- cgit v1.2.3 From b0966168c8904a46d6dbdaf1c364c7a28b65d1af Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Wed, 5 Jul 2023 14:01:11 +0300 Subject: Replace #` with #'when fetching the name of the defined dump --- dump/special-forms.scm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dump/special-forms.scm b/dump/special-forms.scm index d9e9299..c6d43cc 100644 --- a/dump/special-forms.scm +++ b/dump/special-forms.scm @@ -448,7 +448,7 @@ must be remedied." (let ((out (dump-configuration-auto-documentation-port dump-configuration))) - (format out "# ~a Metadata~%" (syntax->datum #`name)) + (format out "# ~a Metadata~%" (syntax->datum #'name)) #,(syntax-case #'schema-triples-clause (schema-triples) ((schema-triples (triple-subject triple-predicate triple-object) ...) #`(for-each triple -- cgit v1.2.3 From 76000978a40f34d135f9545d75fd72bd5f928733 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Wed, 5 Jul 2023 23:04:40 +0300 Subject: Wrap the dump title in quotes --- dump/special-forms.scm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dump/special-forms.scm b/dump/special-forms.scm index c6d43cc..cd7a98c 100644 --- a/dump/special-forms.scm +++ b/dump/special-forms.scm @@ -448,7 +448,7 @@ must be remedied." (let ((out (dump-configuration-auto-documentation-port dump-configuration))) - (format out "# ~a Metadata~%" (syntax->datum #'name)) + (format out "# '~a' Metadata~%~%" (syntax->datum #'name)) #,(syntax-case #'schema-triples-clause (schema-triples) ((schema-triples (triple-subject triple-predicate triple-object) ...) #`(for-each triple -- cgit v1.2.3 From 9dc944bac064cd591580df4743063530f07d427b Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Wed, 5 Jul 2023 23:14:17 +0300 Subject: Add a section sub-heading for schema triples --- dump/special-forms.scm | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/dump/special-forms.scm b/dump/special-forms.scm index cd7a98c..d94733e 100644 --- a/dump/special-forms.scm +++ b/dump/special-forms.scm @@ -451,10 +451,13 @@ must be remedied." (format out "# '~a' Metadata~%~%" (syntax->datum #'name)) #,(syntax-case #'schema-triples-clause (schema-triples) ((schema-triples (triple-subject triple-predicate triple-object) ...) - #`(for-each triple - (list 'triple-subject ...) - (list 'triple-predicate ...) - (list 'triple-object ...))) + #`(begin + (format out "## Schema Triples for '~a'~%~%" (syntax->datum #'name)) + (for-each (lambda (s p o) + (format out "~a -> ~a -> ~a~%" s p o)) + (list 'triple-subject ...) + (list 'triple-predicate ...) + (list 'triple-object ...)))) (_ (error "Invalid schema triples clause:" #'schema-triples-clause))) (format out "## Generated Triples: -- cgit v1.2.3 From fe564abecd58bcbc3fbe6893d53ca7bf58251f2c Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Wed, 5 Jul 2023 23:20:25 +0300 Subject: Add table metadata to triple dump --- dump/special-forms.scm | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/dump/special-forms.scm b/dump/special-forms.scm index d94733e..727b356 100644 --- a/dump/special-forms.scm +++ b/dump/special-forms.scm @@ -459,7 +459,8 @@ must be remedied." (list 'triple-predicate ...) (list 'triple-object ...)))) (_ (error "Invalid schema triples clause:" #'schema-triples-clause))) - (format out "## Generated Triples: + (format out " +## Generated Triples: The following SQL query was executed: @@ -467,7 +468,7 @@ The following SQL query was executed: ~a ``` -Here are the generated triples: +Triples take the form: " (select-query #,(collect-fields #'(subject predicate-clauses ...)) @@ -483,14 +484,20 @@ Here are the generated triples: '() #,@(translate-forms 'field (lambda (x) - (symbol->string - (syntax->datum - ((syntax-rules (field) - ((field (query alias)) alias) - ((field table column) column) - ((field table column alias) alias)) - x)))) - #'(predicate-clauses ...)))) + (syntax-case x (field) + ((field (query alias)) + #`(format #f "~a" (syntax->datum #'alias))) + ((field table column) + #`(format #f "~a.~a" + (syntax->datum #'table) + (syntax->datum #'column))) + ((field table column alias) + #`(format #f "~a.~a" + (syntax->datum table) + (syntax->datum alias))))) + #'(predicate-clauses ...))) + ) + (format out "~%") ;; To clear the buffer (force-output out) )) -- cgit v1.2.3 From 7da7e15ac9fa0422caa4fc560f3c6957c2dbcc24 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Thu, 6 Jul 2023 15:21:10 +0300 Subject: Print out triples with the from Table.Name or Table.Alias * dump/special-forms.scm (field->datum): New function. (define-dump): Use the field->datum to convert triples into the form Table.Name or Table.Alias. Signed-off-by: Munyoki Kilyungi --- dump/special-forms.scm | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/dump/special-forms.scm b/dump/special-forms.scm index 727b356..2281f23 100644 --- a/dump/special-forms.scm +++ b/dump/special-forms.scm @@ -223,7 +223,24 @@ Example: -(eval-when (expand load eval) +(eval-when (expand load ) + (define (field->datum x) + (translate-forms + 'field + (lambda (x) + (syntax-case x (field) + ((field (query alias)) + #`(format #f "~a" (syntax->datum #'alias))) + ((field table column) + #`(format #f "~a.~a" + (syntax->datum #'table) + (syntax->datum #'column))) + ((field table column alias) + #`(format #f "~a.~a" + (syntax->datum table) + (syntax->datum alias))))) + x)) + (define (field->key x) (translate-forms 'field (lambda (x) @@ -477,30 +494,14 @@ Triples take the form: (for-each (match-lambda ((predicate . object) (format out "~a -> ~a -> ~a ~%" - #,(car (collect-keys - (field->key #'subject))) + #,(field->datum #'subject) predicate object))) (map-alist '() - #,@(translate-forms 'field - (lambda (x) - (syntax-case x (field) - ((field (query alias)) - #`(format #f "~a" (syntax->datum #'alias))) - ((field table column) - #`(format #f "~a.~a" - (syntax->datum #'table) - (syntax->datum #'column))) - ((field table column alias) - #`(format #f "~a.~a" - (syntax->datum table) - (syntax->datum alias))))) - #'(predicate-clauses ...))) - ) + #,@(field->datum #'subject))) (format out "~%") ;; To clear the buffer - (force-output out) - )) + (force-output out))) (when (dump-configuration-triples? dump-configuration) (sql-for-each (lambda (row) (scm->triples -- cgit v1.2.3 From 9fd4ef6dc5512aacdc60c1bf0dc378d72a8a3dfe Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Mon, 10 Jul 2023 12:22:29 +0300 Subject: Pass port as an optional argument * dump/triples.scm (triple): Add port as an optional argument. Defaults to #t. This enables us to use triple to print to other ports E.g in the case of dumping documentation to a different file. Signed-off-by: Munyoki Kilyungi --- dump/triples.scm | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/dump/triples.scm b/dump/triples.scm index a0f8213..19817bd 100644 --- a/dump/triples.scm +++ b/dump/triples.scm @@ -44,7 +44,9 @@ characters with an underscore and prefixing with gn:PREFIX." (string->symbol `,(format #f "~a~a" prefix value)))) -(define (triple subject predicate object) +(define* (triple subject predicate object + #:optional + (port #t)) (unless (or (string? subject) (symbol? subject)) (error "Triple subject not a string or symbol:" @@ -63,7 +65,7 @@ characters with an underscore and prefixing with gn:PREFIX." (? (lambda (el) (string-match "^\\[ .* \\]$" el)) object)) "~a ~a ~a .~%") (_ "~a ~a \"~a\" .~%"))]) - (format #t pattern subject predicate + (format port pattern subject predicate (if (symbol? object) (symbol->string object) object)))) (define (scm->triples alist id) -- cgit v1.2.3 From d9955860338facec2d36a80ef8959254ac4ef928 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Tue, 11 Jul 2023 10:26:21 +0300 Subject: Pass port to scm->triples * dump/triples.scm (scm->triples): Make define have an optional arg fn that defaults to "triple". --- dump/triples.scm | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/dump/triples.scm b/dump/triples.scm index 19817bd..710e00f 100644 --- a/dump/triples.scm +++ b/dump/triples.scm @@ -68,12 +68,14 @@ characters with an underscore and prefixing with gn:PREFIX." (format port pattern subject predicate (if (symbol? object) (symbol->string object) object)))) -(define (scm->triples alist id) +(define* (scm->triples alist id + #:optional + (fn triple)) (for-each (match-lambda ((predicate . object) (when (cond ((string? object) (not (string-blank? object))) (else object)) - (triple id predicate object)))) + (fn id predicate object)))) alist)) -- cgit v1.2.3 From 75126b1b7889769fa11dc928cf481796cf9d24a5 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Tue, 11 Jul 2023 11:03:45 +0300 Subject: Construct example SQL query and expected results --- dump/special-forms.scm | 39 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/dump/special-forms.scm b/dump/special-forms.scm index 2281f23..78ee879 100644 --- a/dump/special-forms.scm +++ b/dump/special-forms.scm @@ -485,7 +485,7 @@ The following SQL query was executed: ~a ``` -Triples take the form: +The above query results to triples that have the form: " (select-query #,(collect-fields #'(subject predicate-clauses ...)) @@ -498,8 +498,41 @@ Triples take the form: predicate object))) (map-alist '() - #,@(field->datum #'subject))) - (format out "~%") + #,@(field->datum #'(predicate-clauses ...)))) + (format out "~%Here's an example query:~%~%") + (let* ((result + (map-alist (sql-find + db + (format #f "~a LIMIT 1" + (select-query #,(collect-fields #'(subject predicate-clauses ...)) + (primary-table other-tables ...) + tables-raw ...))) + #,@(field->key #'(predicate-clauses ...)))) + (first-n (list-head result (truncate (/ (length result) 2))))) + (format out "SELECT ?s ?p ?o WHERE { ~%") + (for-each (match-lambda + ((predicate . object) + (format out + (match object + ((or (? symbol? object) + (? (lambda (el) (string-match "^\\[ .* \\]$" el)) object)) + " ?s ~a ~a .~%") + (_ " ?s ~a \"~a\" .~%")) + predicate object))) + first-n) + (format out " ?s ?p ?o .~%}~%")) + (format out "~%Expected Result:~%~%") + (sql-for-each (lambda (row) + (scm->triples + (map-alist row #,@(field->key #'(predicate-clauses ...))) + #,(field->assoc-ref #'row #'subject) + (lambda (s p o) + (triple s p o out)))) + db + (format #f "~a LIMIT 1" + (select-query #,(collect-fields #'(subject predicate-clauses ...)) + (primary-table other-tables ...) + tables-raw ...))) ;; To clear the buffer (force-output out))) (when (dump-configuration-triples? dump-configuration) -- cgit v1.2.3 From 3316c92904d4a9faf8cfa399a487ad47dff5740a Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Tue, 11 Jul 2023 15:21:51 +0300 Subject: Move dump-configuration record type to (dump documentation) Signed-off-by: Munyoki Kilyungi --- dump/documentation.scm | 24 ++++++++++++++++++++++++ dump/special-forms.scm | 22 +--------------------- 2 files changed, 25 insertions(+), 21 deletions(-) create mode 100644 dump/documentation.scm diff --git a/dump/documentation.scm b/dump/documentation.scm new file mode 100644 index 0000000..4c0fd88 --- /dev/null +++ b/dump/documentation.scm @@ -0,0 +1,24 @@ +(define-module (dump documentation) + #:use-module (srfi srfi-9 gnu) + #:export (dump-configuration + dump-configuration? + dump-configuration-triples? + dump-configuration-table-metadata? + dump-configuration-path)) + +(define-immutable-record-type + (%dump-configuration triples? table-metadata? path) + dump-configuration? + (triples? dump-configuration-triples?) + (table-metadata? dump-configuration-table-metadata?) + (path dump-configuration-path)) + +(define* (dump-configuration + #:optional + (triples? #t) + (table-metadata? #f) + (path #f)) + "Return a new configuration." + (%dump-configuration triples? table-metadata? path)) + + diff --git a/dump/special-forms.scm b/dump/special-forms.scm index 78ee879..b62eb09 100644 --- a/dump/special-forms.scm +++ b/dump/special-forms.scm @@ -1,11 +1,11 @@ (define-module (dump special-forms) #:use-module (srfi srfi-1) - #:use-module (srfi srfi-9 gnu) #:use-module (ice-9 match) #:use-module (srfi srfi-26) #:use-module (dump sql) #:use-module (dump table) #:use-module (dump triples) + #:use-module (dump documentation) #:export (translate-forms collect-forms collect-keys @@ -19,28 +19,8 @@ syntax-let blank-node map-alist - dump-configuration - dump-configuration? - dump-configuration-triples? - dump-configuration-table-metadata? - dump-configuration-auto-documentation-port define-dump)) -(define-immutable-record-type - (%dump-configuration triples? table-metadata? auto-documentation-port) - dump-configuration? - (triples? dump-configuration-triples?) - (table-metadata? dump-configuration-table-metadata?) - (auto-documentation-port dump-configuration-auto-documentation-port)) - -(define* (dump-configuration - #:optional - (triples? #t) - (table-metadata? #f) - (auto-documentation-port #f)) - "Return a new configuration." - (%dump-configuration triples? table-metadata? auto-documentation-port)) - (define (key->assoc-ref alist x) "Recursively translate (key k) forms in source X to (assoc-ref ALIST k) forms." -- cgit v1.2.3 From 64d5a6f645891eb9f3d7e6a9cd6f08d6b18bd384 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Tue, 11 Jul 2023 15:22:18 +0300 Subject: Rename auto-documentation-port -> path We don't need to pass around ports as part of a configuration when we can instead pass in a path that can be used to open a port. Signed-off-by: Munyoki Kilyungi --- dump/special-forms.scm | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dump/special-forms.scm b/dump/special-forms.scm index b62eb09..a4d1c12 100644 --- a/dump/special-forms.scm +++ b/dump/special-forms.scm @@ -441,9 +441,9 @@ must be remedied." (triple 'predicate 'rdfs:domain #,subject-type)))) (_ (error "Invalid predicate clause:" predicate-clause)))) #'(predicate-clauses ...)))) - (when (dump-configuration-auto-documentation-port dump-configuration) + (when (dump-configuration-path dump-configuration) (let ((out - (dump-configuration-auto-documentation-port + (dump-configuration-path dump-configuration))) (format out "# '~a' Metadata~%~%" (syntax->datum #'name)) #,(syntax-case #'schema-triples-clause (schema-triples) -- cgit v1.2.3 From bc3e2fe8bfbe0d9223318c72f35d8f9b2f6b10f5 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Tue, 11 Jul 2023 15:24:39 +0300 Subject: Create a new procedure for shadowing a file port * dump/documentation.scm (dump): Export call-with-documentation. (call-with-documentation): New function. Signed-off-by: Munyoki Kilyungi --- dump/documentation.scm | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/dump/documentation.scm b/dump/documentation.scm index 4c0fd88..81eec43 100644 --- a/dump/documentation.scm +++ b/dump/documentation.scm @@ -4,7 +4,9 @@ dump-configuration? dump-configuration-triples? dump-configuration-table-metadata? - dump-configuration-path)) + dump-configuration-path + call-with-documentation)) + (define-immutable-record-type (%dump-configuration triples? table-metadata? path) @@ -22,3 +24,12 @@ (%dump-configuration triples? table-metadata? path)) +(define (call-with-documentation conf proc) + (let ((port #f) + (path (dump-configuration-path conf))) + (when path + (dynamic-wind + (lambda () + (set! port (open-file path "w"))) + (cut proc port) + (cut close port))))) -- cgit v1.2.3 From 9a086f86ab72212c6ff767c32887b83d2b8cd9e2 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Tue, 11 Jul 2023 19:17:36 +0300 Subject: Add missing srfi-26 import Signed-off-by: Munyoki Kilyungi --- dump/documentation.scm | 1 + 1 file changed, 1 insertion(+) diff --git a/dump/documentation.scm b/dump/documentation.scm index 81eec43..5228559 100644 --- a/dump/documentation.scm +++ b/dump/documentation.scm @@ -1,5 +1,6 @@ (define-module (dump documentation) #:use-module (srfi srfi-9 gnu) + #:use-module (srfi srfi-26) #:export (dump-configuration dump-configuration? dump-configuration-triples? -- cgit v1.2.3 From 0743ac3e7727bd15a925345547c94376b949be18 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Mon, 17 Jul 2023 17:57:52 +0300 Subject: Revert "Pass port as an optional argument" This reverts commit 195ca590be5533935cf708d49fcb3736b3c78643. --- dump/triples.scm | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/dump/triples.scm b/dump/triples.scm index 710e00f..700437a 100644 --- a/dump/triples.scm +++ b/dump/triples.scm @@ -44,9 +44,7 @@ characters with an underscore and prefixing with gn:PREFIX." (string->symbol `,(format #f "~a~a" prefix value)))) -(define* (triple subject predicate object - #:optional - (port #t)) +(define (triple subject predicate object) (unless (or (string? subject) (symbol? subject)) (error "Triple subject not a string or symbol:" @@ -65,7 +63,7 @@ characters with an underscore and prefixing with gn:PREFIX." (? (lambda (el) (string-match "^\\[ .* \\]$" el)) object)) "~a ~a ~a .~%") (_ "~a ~a \"~a\" .~%"))]) - (format port pattern subject predicate + (format #t pattern subject predicate (if (symbol? object) (symbol->string object) object)))) (define* (scm->triples alist id -- cgit v1.2.3 From 591c09003bc0ccf4e8739ac3e39d60d36132a9be Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Mon, 17 Jul 2023 21:23:29 +0300 Subject: Make prefix function have an optional ttl? Signed-off-by: Munyoki Kilyungi --- dump/triples.scm | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/dump/triples.scm b/dump/triples.scm index 700437a..33b4ae3 100644 --- a/dump/triples.scm +++ b/dump/triples.scm @@ -35,8 +35,12 @@ characters with an underscore and prefixing with gn:PREFIX." (string-downcase (string-trim-right str #\.))))))) -(define (prefix prefix iri) - (format #t "@prefix ~a ~a .~%" prefix iri)) +(define* (prefix prefix iri #:optional (ttl? #t)) + (format #t + (if ttl? + "@prefix ~a ~a .~%" + "PREFIX ~a ~a ~%") + prefix iri)) (define (ontology prefix value) (if (and (string? value) (string-null? value)) -- cgit v1.2.3 From 86c10b7c591b621b4a9922541f79b3e958c57645 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Mon, 17 Jul 2023 21:26:52 +0300 Subject: Conditionally dump documentation --- dump/documentation.scm | 36 -------- dump/special-forms.scm | 235 ++++++++++++++++++++++++++++++------------------- 2 files changed, 142 insertions(+), 129 deletions(-) delete mode 100644 dump/documentation.scm diff --git a/dump/documentation.scm b/dump/documentation.scm deleted file mode 100644 index 5228559..0000000 --- a/dump/documentation.scm +++ /dev/null @@ -1,36 +0,0 @@ -(define-module (dump documentation) - #:use-module (srfi srfi-9 gnu) - #:use-module (srfi srfi-26) - #:export (dump-configuration - dump-configuration? - dump-configuration-triples? - dump-configuration-table-metadata? - dump-configuration-path - call-with-documentation)) - - -(define-immutable-record-type - (%dump-configuration triples? table-metadata? path) - dump-configuration? - (triples? dump-configuration-triples?) - (table-metadata? dump-configuration-table-metadata?) - (path dump-configuration-path)) - -(define* (dump-configuration - #:optional - (triples? #t) - (table-metadata? #f) - (path #f)) - "Return a new configuration." - (%dump-configuration triples? table-metadata? path)) - - -(define (call-with-documentation conf proc) - (let ((port #f) - (path (dump-configuration-path conf))) - (when path - (dynamic-wind - (lambda () - (set! port (open-file path "w"))) - (cut proc port) - (cut close port))))) diff --git a/dump/special-forms.scm b/dump/special-forms.scm index a4d1c12..cc58919 100644 --- a/dump/special-forms.scm +++ b/dump/special-forms.scm @@ -5,7 +5,6 @@ #:use-module (dump sql) #:use-module (dump table) #:use-module (dump triples) - #:use-module (dump documentation) #:export (translate-forms collect-forms collect-keys @@ -19,6 +18,7 @@ syntax-let blank-node map-alist + dump-with-documentation define-dump)) (define (key->assoc-ref alist x) @@ -212,13 +212,13 @@ Example: ((field (query alias)) #`(format #f "~a" (syntax->datum #'alias))) ((field table column) - #`(format #f "~a.~a" + #`(format #f "~a(~a)" (syntax->datum #'table) (syntax->datum #'column))) ((field table column alias) - #`(format #f "~a.~a" - (syntax->datum table) - (syntax->datum alias))))) + #`(format #f "~a(~a)" + (syntax->datum #'table) + (syntax->datum #'alias))))) x)) (define (field->key x) @@ -396,17 +396,19 @@ must be remedied." ((triples subject predicate-clauses ...) (triples) (find-clause #'(clauses ...) 'triples))) #`(define* (name db - #:optional (dump-configuration - (dump-configuration))) - #,(syntax-case #'schema-triples-clause (schema-triples) - ((schema-triples (triple-subject triple-predicate triple-object) ...) - #`(for-each triple - (list 'triple-subject ...) - (list 'triple-predicate ...) - (list 'triple-object ...))) - (_ (error "Invalid schema triples clause:" #'schema-triples-clause))) - (when (dump-configuration-table-metadata? - dump-configuration) + #:optional + (dump-metadata? #f) + (dump-data? #f) + (dump-documentation? #f)) + (when dump-data? + #,(syntax-case #'schema-triples-clause (schema-triples) + ((schema-triples (triple-subject triple-predicate triple-object) ...) + #`(for-each triple + (list 'triple-subject ...) + (list 'triple-predicate ...) + (list 'triple-object ...))) + (_ (error "Invalid schema triples clause:" #'schema-triples-clause)))) + (when dump-metadata? #,@(let ((dump-table (symbol->string (syntax->datum #'primary-table))) (subject-type (any (lambda (predicate) (syntax-case predicate (rdf:type) @@ -420,20 +422,20 @@ must be remedied." #`(begin (scm->triples (map-alist '() - (set rdf:type 'gn:dump) - (set gn:createsPredicate 'predicate) - (filter-set gn:forSubjectType #,subject-type) - (multiset gn:dependsOn - '#,(map (lambda (field) - (match (syntax->datum field) - ((table-name column-name _ ...) - (datum->syntax - x (column-id (symbol->string table-name) - (symbol->string column-name)))) - (((query alias)) - (datum->syntax - x (column-id query (symbol->string alias)))))) - (collect-fields predicate-clause)))) + (set rdf:type 'gn:dump) + (set gn:createsPredicate 'predicate) + (filter-set gn:forSubjectType #,subject-type) + (multiset gn:dependsOn + '#,(map (lambda (field) + (match (syntax->datum field) + ((table-name column-name _ ...) + (datum->syntax + x (column-id (symbol->string table-name) + (symbol->string column-name)))) + (((query alias)) + (datum->syntax + x (column-id query (symbol->string alias)))))) + (collect-fields predicate-clause)))) #,(dump-id dump-table (syntax->datum #'predicate))) ;; Automatically create domain triples ;; for predicates. @@ -441,22 +443,21 @@ must be remedied." (triple 'predicate 'rdfs:domain #,subject-type)))) (_ (error "Invalid predicate clause:" predicate-clause)))) #'(predicate-clauses ...)))) - (when (dump-configuration-path dump-configuration) - (let ((out - (dump-configuration-path - dump-configuration))) - (format out "# '~a' Metadata~%~%" (syntax->datum #'name)) - #,(syntax-case #'schema-triples-clause (schema-triples) - ((schema-triples (triple-subject triple-predicate triple-object) ...) - #`(begin - (format out "## Schema Triples for '~a'~%~%" (syntax->datum #'name)) - (for-each (lambda (s p o) - (format out "~a -> ~a -> ~a~%" s p o)) - (list 'triple-subject ...) - (list 'triple-predicate ...) - (list 'triple-object ...)))) - (_ (error "Invalid schema triples clause:" #'schema-triples-clause))) - (format out " + + (when dump-documentation? + (format #t "~%## '~a'~%~%" (syntax->datum #'name)) + #,(syntax-case #'schema-triples-clause (schema-triples) + ((schema-triples (triple-subject triple-predicate triple-object) ...) + #`(begin + (format #t "## Schema Triples:~%~%```text~%") + (for-each (lambda (s p o) + (format #t "~a -> ~a -> ~a~%" s p o)) + (list 'triple-subject ...) + (list 'triple-predicate ...) + (list 'triple-object ...)) + (format #t "```"))) + (_ (error "Invalid schema triples clause:" #'schema-triples-clause))) + (format #t " ## Generated Triples: The following SQL query was executed: @@ -467,61 +468,109 @@ The following SQL query was executed: The above query results to triples that have the form: +```text " - (select-query #,(collect-fields #'(subject predicate-clauses ...)) - (primary-table other-tables ...) - tables-raw ...)) + (select-query #,(collect-fields #'(subject predicate-clauses ...)) + (primary-table other-tables ...) + tables-raw ...)) + (for-each (match-lambda + ((predicate . object) + (format #t "~a -> ~a -> ~a ~%" + #,(field->datum #'subject) + predicate object))) + (map-alist + '() + #,@(field->datum #'(predicate-clauses ...)))) + (format #t "```~%Here's an example query:~%~%```sparql~%") + (dump-documentation?) + (newline) + (let* ((result + (map-alist (sql-find + db + (format #f "~a LIMIT 1" + (select-query #,(collect-fields #'(subject predicate-clauses ...)) + (primary-table other-tables ...) + tables-raw ...))) + #,@(field->key #'(predicate-clauses ...)))) + (first-n (list-head result + (let ((n (truncate + (+ (max (exact-integer-sqrt (length result))) 1)))) + (if (< n 3) + (truncate (/ (length result) 2)) + n))))) + (format #t "SELECT ?s ?p ?o WHERE { ~%") (for-each (match-lambda ((predicate . object) - (format out "~a -> ~a -> ~a ~%" - #,(field->datum #'subject) - predicate object))) - (map-alist - '() - #,@(field->datum #'(predicate-clauses ...)))) - (format out "~%Here's an example query:~%~%") - (let* ((result - (map-alist (sql-find - db - (format #f "~a LIMIT 1" - (select-query #,(collect-fields #'(subject predicate-clauses ...)) - (primary-table other-tables ...) - tables-raw ...))) - #,@(field->key #'(predicate-clauses ...)))) - (first-n (list-head result (truncate (/ (length result) 2))))) - (format out "SELECT ?s ?p ?o WHERE { ~%") - (for-each (match-lambda - ((predicate . object) - (format out - (match object - ((or (? symbol? object) - (? (lambda (el) (string-match "^\\[ .* \\]$" el)) object)) - " ?s ~a ~a .~%") - (_ " ?s ~a \"~a\" .~%")) - predicate object))) - first-n) - (format out " ?s ?p ?o .~%}~%")) - (format out "~%Expected Result:~%~%") - (sql-for-each (lambda (row) - (scm->triples - (map-alist row #,@(field->key #'(predicate-clauses ...))) - #,(field->assoc-ref #'row #'subject) - (lambda (s p o) - (triple s p o out)))) - db - (format #f "~a LIMIT 1" - (select-query #,(collect-fields #'(subject predicate-clauses ...)) - (primary-table other-tables ...) - tables-raw ...))) - ;; To clear the buffer - (force-output out))) - (when (dump-configuration-triples? dump-configuration) + (match object + ((or (? symbol? object) + (? (lambda (el) (string-match "^\\[ .* \\]$" el)) object)) + (format #t " ?s ~a ~a .~%" predicate object)) + ((and (? string? object) + (? (lambda (el) (not (string-null? el))) object)) + (format #t " ?s ~a \"~a\" .~%" predicate object)) + (_ "")))) + first-n) + (format #t " ?s ?p ?o .~%}~%```~%")) + (format #t "~%Expected Result:~%~%```rdf~%") (sql-for-each (lambda (row) + (scm->triples + (map-alist row #,@(field->key #'(predicate-clauses ...))) + #,(field->assoc-ref #'row #'subject) + (lambda (s p o) + (triple s p o)))) + db + (format #f "~a LIMIT 1" + (select-query #,(collect-fields #'(subject predicate-clauses ...)) + (primary-table other-tables ...) + tables-raw ...))) + (format #t "```~%~%")) + (when dump-data? + (sql-for-each (lambda (row) (scm->triples (map-alist row #,@(field->key #'(predicate-clauses ...))) #,(field->assoc-ref #'row #'subject))) db (select-query #,(collect-fields #'(subject predicate-clauses ...)) (primary-table other-tables ...) - tables-raw ...)))))) + tables-raw ...))) + ))) (_ (error "Invalid define-dump syntax:" (syntax->datum x)))))) + +(define (get-keyword-value args keyword default) + (let ((kv (memq keyword args))) + (if (and kv (>= (length kv) 2)) + (cadr kv) + default))) + +(define-syntax dump-with-documentation + (syntax-rules () + ((_ (name n) + (connection conn) + (table-metadata? t?) + (prefixes ((pref uri) ...)) + (inputs (in ...)) + (outputs out)) + (let ((rdf-path + (get-keyword-value `out #:rdf "")) + (doc-path + (get-keyword-value `out #:documentation ""))) + ;; Dumping documentation + (call-with-target-database + conn + (lambda (db) + (with-output-to-file ; + doc-path + (lambda () + (format #t "# ~a" n) + (in db #f #f + (lambda () (prefix pref uri #f) ...)) ...) + #:encoding "utf8") + ;; Dumping the actual data + (with-output-to-file + rdf-path + (lambda () + ;; Add the prefixes + (prefix pref uri) ... + (newline) + (in db #f #t #f) ...) + #:encoding "utf8"))))))) -- cgit v1.2.3 From bcfd39682bdf3a0a8ea5c1efaf0c33a9367991ad Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Mon, 17 Jul 2023 21:27:09 +0300 Subject: Dump species metadata with documentation Signed-off-by: Munyoki Kilyungi --- examples/dump-species-metadata.scm | 57 +++++++++++++------------------------- 1 file changed, 19 insertions(+), 38 deletions(-) diff --git a/examples/dump-species-metadata.scm b/examples/dump-species-metadata.scm index 2937c80..4ea9ce7 100755 --- a/examples/dump-species-metadata.scm +++ b/examples/dump-species-metadata.scm @@ -16,9 +16,6 @@ (call-with-input-file (list-ref (command-line) 1) read)) -(define %dump-directory - (list-ref (command-line) 2)) - (define-dump dump-species @@ -34,11 +31,11 @@ (set gn:displayName (field Species MenuName)) (set gn:binomialName (field Species FullName)) (set gn:family (field Species Family)) - (set gn:organism (ontology 'ncbiTaxon: (field Species TaxonomyId))))) + (set gn:organism (ontology 'taxon: (field Species TaxonomyId))))) (define-dump dump-strain (tables (Strain - (join Species "ON Strain.SpeciesId = Species.SpeciesId"))) + (left-join Species "ON Strain.SpeciesId = Species.SpeciesId"))) (schema-triples (gn:strainOfSpecies rdfs:domain gn:strain) (gn:strainOfSpecies rdfs:range gn:species) @@ -106,36 +103,20 @@ -(call-with-target-database - %connection-settings - (lambda (db) - (with-output-to-file (string-append %dump-directory "dump-species-metadata.ttl") - (lambda () - (prefix "chebi:" "") - (prefix "dct:" "") - (prefix "foaf:" "") - (prefix "generif:" "") - (prefix "gn:" "") - (prefix "hgnc:" "") - (prefix "homologene:" "") - (prefix "kegg:" "") - (prefix "molecularTrait:" "") - (prefix "nuccore:" "") - (prefix "omim:" "") - (prefix "owl:" "") - (prefix "phenotype:" "") - (prefix "pubchem:" "") - (prefix "pubmed:" "") - (prefix "rdf:" "") - (prefix "rdfs:" "") - (prefix "ncbiTaxon:" "") - (prefix "uniprot:" "") - (prefix "up:" "") - (prefix "xsd:" "") - (newline) - (dump-species db) - (dump-strain db) - (dump-mapping-method db) - (dump-inbred-set db) - (dump-avg-method db)) - #:encoding "utf8"))) +(dump-with-documentation + (name "Species Metadata") + (connection %connection-settings) + (table-metadata? #f) + (prefixes + (("rdf:" "") + ("rdfs:" "") + ("gn:" "") + ("taxon:" ""))) + (inputs + (dump-species + dump-strain + dump-mapping-method + dump-avg-method)) + (outputs + (#:documentation "docs/dump-species-metadata.md" + #:rdf "./verified-data/dump-species-metadata.ttl"))) -- cgit v1.2.3 From 3494fa35f332e0da6e4d2c76eac0a286fa4f5646 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Mon, 17 Jul 2023 23:03:07 +0300 Subject: Dump tissue metadata with documentation Signed-off-by: Munyoki Kilyungi --- examples/dump-tissue.scm | 57 ++++++++++++++++-------------------------------- 1 file changed, 19 insertions(+), 38 deletions(-) diff --git a/examples/dump-tissue.scm b/examples/dump-tissue.scm index b1104ab..4998cff 100755 --- a/examples/dump-tissue.scm +++ b/examples/dump-tissue.scm @@ -13,53 +13,34 @@ (define %connection-settings - (call-with-input-file (list-ref (command-line) 1) - read)) - -(define %dump-directory - (list-ref (command-line) 2)) + (call-with-input-file (list-ref (command-line) 1) + read)) (define-dump dump-tissue - ;; The Name and TissueName fields seem to be identical. BIRN_lex_ID - ;; and BIRN_lex_Name are mostly NULL. - (tables (Tissue)) + ;; The Name and TissueName fields seem to be identical. BIRN_lex_ID + ;; and BIRN_lex_Name are mostly NULL. + (tables (Tissue)) (schema-triples (gn:name rdfs:range rdfs:Literal)) ;; Hopefully the Short_Name field is distinct and can be used as an ;; identifier. (triples (string->identifier "tissue" (field Tissue Short_Name)) - (set rdf:type 'gn:tissue) - (set gn:name (field Tissue Name)))) + (set rdf:type 'gn:tissue) + (set gn:name (field Tissue Name)))) -(call-with-target-database - %connection-settings - (lambda (db) - (with-output-to-file (string-append %dump-directory "dump-tissue.ttl") - (lambda () - (prefix "chebi:" "") - (prefix "dct:" "") - (prefix "foaf:" "") - (prefix "generif:" "") - (prefix "gn:" "") - (prefix "hgnc:" "") - (prefix "homologene:" "") - (prefix "kegg:" "") - (prefix "molecularTrait:" "") - (prefix "nuccore:" "") - (prefix "omim:" "") - (prefix "owl:" "") - (prefix "phenotype:" "") - (prefix "pubchem:" "") - (prefix "pubmed:" "") - (prefix "rdf:" "") - (prefix "rdfs:" "") - (prefix "uniprot:" "") - (prefix "up:" "") - (prefix "xsd:" "") - (newline) - (dump-tissue db)) - #:encoding "utf8"))) +(dump-with-documentation + (name "Tissue Metadata") + (connection %connection-settings) + (table-metadata? #f) + (prefixes + (("rdf:" "") + ("rdfs:" "") + ("gn:" ""))) + (inputs + (dump-tissue)) + (outputs + (#:documentation "./docs/dump-tissue.md" #:rdf "./verified-data/dump-tissue.ttl"))) -- cgit v1.2.3 From 47a4af4abad4f872db2832adb61f0bc60ac0ee5a Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Mon, 17 Jul 2023 23:03:21 +0300 Subject: Dump publication metadata with documentation Signed-off-by: Munyoki Kilyungi --- examples/dump-publication.scm | 51 +++++++++++++++---------------------------- 1 file changed, 17 insertions(+), 34 deletions(-) diff --git a/examples/dump-publication.scm b/examples/dump-publication.scm index 784d815..ff46d3d 100755 --- a/examples/dump-publication.scm +++ b/examples/dump-publication.scm @@ -13,11 +13,8 @@ (define %connection-settings - (call-with-input-file (list-ref (command-line) 1) - read)) - -(define %dump-directory - (list-ref (command-line) 2)) + (call-with-input-file (list-ref (command-line) 1) + read)) @@ -65,32 +62,18 @@ -(call-with-target-database - %connection-settings - (lambda (db) - (with-output-to-file (string-append %dump-directory "dump-publication.ttl") - (lambda () - (prefix "chebi:" "") - (prefix "dct:" "") - (prefix "foaf:" "") - (prefix "generif:" "") - (prefix "gn:" "") - (prefix "hgnc:" "") - (prefix "homologene:" "") - (prefix "kegg:" "") - (prefix "molecularTrait:" "") - (prefix "nuccore:" "") - (prefix "omim:" "") - (prefix "owl:" "") - (prefix "phenotype:" "") - (prefix "pubchem:" "") - (prefix "pubmed:" "") - (prefix "rdf:" "") - (prefix "rdfs:" "") - (prefix "uniprot:" "") - (prefix "up:" "") - (prefix "xsd:" "") - (prefix "publication:" "") - (newline) - (dump-publication db)) - #:encoding "utf8"))) +(dump-with-documentation + (name "Publications Metadata") + (connection %connection-settings) + (table-metadata? #f) + (prefixes + (("rdf:" "") + ("rdfs:" "") + ("gn:" "") + ("publication:" "") + ("pubmed:" ""))) + (inputs + (dump-publication)) + (outputs + (#:documentation "./docs/dump-publication.md" + #:rdf "./verified-data/dump-publication.md"))) -- cgit v1.2.3 From 3453fed05b3222fab7a153ba378e833f79f02924 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Mon, 17 Jul 2023 23:03:39 +0300 Subject: Dump dataset metadata with documentation Signed-off-by: Munyoki Kilyungi --- examples/dump-dataset-metadata.scm | 50 ++++++++++++++++---------------------- 1 file changed, 21 insertions(+), 29 deletions(-) diff --git a/examples/dump-dataset-metadata.scm b/examples/dump-dataset-metadata.scm index 789e298..53c381c 100755 --- a/examples/dump-dataset-metadata.scm +++ b/examples/dump-dataset-metadata.scm @@ -16,9 +16,6 @@ (call-with-input-file (list-ref (command-line) 1) read)) -(define %dump-directory - (list-ref (command-line) 2)) - ;; One email ID in the Investigators table has spaces in it. This @@ -150,8 +147,7 @@ (field Investigators Email))) (set gn:datasetOfOrganization (field ("CAST(CONVERT(BINARY CONVERT(Organizations.OrganizationName USING latin1) USING utf8) AS VARCHAR(1500))" Organizations))) - (set gn:accessionId (string-append "GN" (number->string - (field InfoFiles GN_AccesionId)))) + (set gn:accessionId (format #f "GN~a" (field InfoFiles GN_AccesionId))) (set gn:datasetStatusName (string-downcase (field DatasetStatus DatasetStatusName))) (set gn:datasetOfInbredSet @@ -234,27 +230,23 @@ -(call-with-target-database - %connection-settings - (lambda (db) - (with-output-to-file (string-append %dump-directory "dump-info-pages.ttl") - (lambda () - (prefix "dct:" "") - (prefix "foaf:" "") - (prefix "generif:" "") - (prefix "geoSeries:" "") - (prefix "gn:" "") - (prefix "owl:" "") - (prefix "phenotype:" "") - (prefix "pubmed:" "") - (prefix "rdf:" "") - (prefix "rdfs:" "") - (prefix "uniprot:" "") - (prefix "up:" "") - (prefix "xsd:" "") - (prefix "probeset:" "") - (prefix "dataset:" "") - (newline) - (dump-info-files db) - (dump-investigators db)) - #:encoding "utf8"))) +(dump-with-documentation + (name "Info files / Investigators Metadata") + (connection %connection-settings) + (table-metadata? #f) + (prefixes + (("dct:" "") + ("geoSeries:" "") + ("rdf:" "") + ("rdfs:" "") + ("gn:" "") + ("foaf:" "") + ("taxon:" "") + ("dataset:" ""))) + (inputs + (dump-info-files + dump-investigators)) + (outputs + (#:documentation "./docs/dump-info-pages.md" + #:rdf "./verified-data/dump-info-pages.ttl"))) + -- cgit v1.2.3 From 3b8d4f106d0da0cc7400da29d0bc8d5bede2f016 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Tue, 18 Jul 2023 13:29:40 +0300 Subject: Rewrite dump-with-documentation to be order-agnostic Signed-off-by: Munyoki Kilyungi --- dump/special-forms.scm | 87 +++++++++++++++++++++++++++++++++----------------- 1 file changed, 57 insertions(+), 30 deletions(-) diff --git a/dump/special-forms.scm b/dump/special-forms.scm index cc58919..b37cbd3 100644 --- a/dump/special-forms.scm +++ b/dump/special-forms.scm @@ -544,33 +544,60 @@ The above query results to triples that have the form: (define-syntax dump-with-documentation (syntax-rules () - ((_ (name n) - (connection conn) - (table-metadata? t?) - (prefixes ((pref uri) ...)) - (inputs (in ...)) - (outputs out)) - (let ((rdf-path - (get-keyword-value `out #:rdf "")) - (doc-path - (get-keyword-value `out #:documentation ""))) - ;; Dumping documentation - (call-with-target-database - conn - (lambda (db) - (with-output-to-file ; - doc-path - (lambda () - (format #t "# ~a" n) - (in db #f #f - (lambda () (prefix pref uri #f) ...)) ...) - #:encoding "utf8") - ;; Dumping the actual data - (with-output-to-file - rdf-path - (lambda () - ;; Add the prefixes - (prefix pref uri) ... - (newline) - (in db #f #t #f) ...) - #:encoding "utf8"))))))) + ((_ (key value) ...) + (let ((name "") + (connection "") + (table-metadata? "") + (prefixes "") + (inputs "") + (outputs "")) + (for-each + (match-lambda + (('name n) + (set! name n)) + (('connection conn) + (set! connection conn)) + (('table-metadata? t-metadata?) + (set! table-metadata? t-metadata?)) + (('prefixes p) + (set! prefixes p)) + (('inputs i) + (set! inputs i)) + (('outputs o) + (set! outputs o))) + (list (list 'key value) ...)) + (let ((rdf-path (get-keyword-value outputs #:rdf "")) + (doc-path (get-keyword-value outputs #:documentation "")) + (prefix-thunk (lambda () (for-each + (match-lambda + ((k v) + (begin + (prefix k v)))) + prefixes)))) + ;; Dumping the documentation first + (call-with-target-database + connection + (lambda (db) + (with-output-to-file ; + doc-path + (lambda () + (format #t "# ~a" name) + (for-each + (lambda (proc) + (proc db #f #f prefix-thunk)) + inputs)) + #:encoding "utf8") + + ;; Dumping the actual data + (with-output-to-file + rdf-path + (lambda () + ;; Add the prefixes + (prefix-thunk) + (newline) + (for-each + (lambda (proc) + (proc db #f #t #f)) + inputs)) + #:encoding "utf8")))))))) + -- cgit v1.2.3 From b59dfba16fcae3f608b6e000f9193bdfad5d6b52 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Tue, 18 Jul 2023 13:30:09 +0300 Subject: Make dump-data? default to #f when dumping data Signed-off-by: Munyoki Kilyungi --- dump/special-forms.scm | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dump/special-forms.scm b/dump/special-forms.scm index b37cbd3..cf591e9 100644 --- a/dump/special-forms.scm +++ b/dump/special-forms.scm @@ -398,7 +398,7 @@ must be remedied." #`(define* (name db #:optional (dump-metadata? #f) - (dump-data? #f) + (dump-data? #t) (dump-documentation? #f)) (when dump-data? #,(syntax-case #'schema-triples-clause (schema-triples) @@ -443,7 +443,6 @@ must be remedied." (triple 'predicate 'rdfs:domain #,subject-type)))) (_ (error "Invalid predicate clause:" predicate-clause)))) #'(predicate-clauses ...)))) - (when dump-documentation? (format #t "~%## '~a'~%~%" (syntax->datum #'name)) #,(syntax-case #'schema-triples-clause (schema-triples) -- cgit v1.2.3 From 4c6ff9d1199ec9ba43a826dad3df2e766190d327 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Tue, 18 Jul 2023 16:09:17 +0300 Subject: Make string->identifier take optional keyword args Signed-off-by: Munyoki Kilyungi --- dump/triples.scm | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/dump/triples.scm b/dump/triples.scm index 33b4ae3..2b43d68 100644 --- a/dump/triples.scm +++ b/dump/triples.scm @@ -21,19 +21,27 @@ (string->symbol (format #f "~s~a" string-field schema))))) -(define (string->identifier prefix str) +(define* (string->identifier prefix str + #:optional #:key + (ontology "gn:") + (separator "_") + (proc string-downcase)) "Convert STR to a turtle identifier after replacing illegal characters with an underscore and prefixing with gn:PREFIX." (if (string-null? str) "" (string->symbol - (string-append "gn:" prefix "_" - (string-map (lambda (c) - (case c - ((#\/ #\< #\> #\+ #\( #\) #\space #\@) #\_) - (else c))) - (string-downcase - (string-trim-right str #\.))))))) + (string-append ontology prefix separator + (string-delete + (lambda (c) + (eq? c #\))) + (string-map (lambda (c) + (case c + ((#\/ #\< #\> #\+ #\( #\space #\@) #\_) + (else c))) + (proc + (string-trim-right str #\.)))))))) + (define* (prefix prefix iri #:optional (ttl? #t)) (format #t -- cgit v1.2.3 From d27851ca714d2b0c2256d295df38e1b841547aed Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Tue, 18 Jul 2023 16:09:45 +0300 Subject: Add string-capitalize-first * dump/strings.scm: Export string-capitalize-first (string-capitalize-first): New function. Signed-off-by: Munyoki Kilyungi --- dump/strings.scm | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/dump/strings.scm b/dump/strings.scm index 849b3c3..282b4e1 100644 --- a/dump/strings.scm +++ b/dump/strings.scm @@ -10,7 +10,8 @@ delete-substrings replace-substrings sanitize-rdf-string - snake->lower-camel)) + snake->lower-camel + string-capitalize-first)) (define (time-unix->string seconds . maybe-format) "Given an integer saying the number of seconds since the Unix @@ -92,3 +93,8 @@ association list mapping substrings to their replacements." char)))) (drop char-list 1) char-list))))) + +(define (string-capitalize-first string) + (string-titlecase + (string-downcase string) 0 1)) + -- cgit v1.2.3 From dfa1e64260d08cae88beb210569a5d0e231dc040 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Tue, 18 Jul 2023 16:18:54 +0300 Subject: Capitalize species identifier Replace gn:species_mus_musculus with gn:species:Mus_musculus. --- examples/dump-species-metadata.scm | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/examples/dump-species-metadata.scm b/examples/dump-species-metadata.scm index 4ea9ce7..898aa5a 100755 --- a/examples/dump-species-metadata.scm +++ b/examples/dump-species-metadata.scm @@ -25,7 +25,11 @@ (gn:displayName rdfs:range rdfs:Literal) (gn:binomialName rdfs:range rdfs:Literal) (gn:family rdfs:range rdfs:Literal)) - (triples (string->identifier "species" (field Species FullName)) + (triples + (string->identifier "" (field Species FullName) + #:ontology "gn:species:" + #:separator "" + #:proc string-capitalize-first) (set rdf:type 'gn:species) (set gn:name (field Species SpeciesName)) (set gn:displayName (field Species MenuName)) @@ -50,7 +54,10 @@ 'pre "_" 'post)) (set rdf:type 'gn:strain) (set gn:strainOfSpecies - (string->identifier "species" (field Species FullName))) + (string->identifier "" (field Species FullName) + #:ontology "gn:species:" + #:separator "" + #:proc string-capitalize-first)) ;; Name, and maybe a second name (set gn:name (sanitize-rdf-string (field Strain Name))) (set gn:name (sanitize-rdf-string (field Strain Name2))) @@ -85,7 +92,10 @@ (set gn:inbredSetOfMappingMethod (field MappingMethod Name)) (set gn:inbredSetCode (field InbredSet InbredSetCode)) (set gn:inbredSetOfSpecies - (string->identifier "species" (field Species FullName BinomialName))) + (string->identifier "" (field Species FullName BinomialName) + #:ontology "gn:species:" + #:separator "" + #:proc string-capitalize-first)) (set gn:genotype (field ("IF ((SELECT PublishFreeze.Name FROM PublishFreeze WHERE PublishFreeze.InbredSetId = InbredSet.Id LIMIT 1) IS NOT NULL, 'Traits and Cofactors', '')" genotypeP))) (set gn:phenotype -- cgit v1.2.3 From 5e4b70b286a155296e8efa6e4a3c01e5306e61ed Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Tue, 18 Jul 2023 16:26:59 +0300 Subject: Convert symbol to a string when show-casing triples in doc Signed-off-by: Munyoki Kilyungi --- dump/special-forms.scm | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dump/special-forms.scm b/dump/special-forms.scm index cf591e9..738c48c 100644 --- a/dump/special-forms.scm +++ b/dump/special-forms.scm @@ -476,7 +476,10 @@ The above query results to triples that have the form: ((predicate . object) (format #t "~a -> ~a -> ~a ~%" #,(field->datum #'subject) - predicate object))) + predicate + (if (symbol? object) + (symbol->string object) + object)))) (map-alist '() #,@(field->datum #'(predicate-clauses ...)))) -- cgit v1.2.3 From 4485d3e9c043bf6b3952f83175b358edf0ef63b3 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Tue, 18 Jul 2023 16:27:58 +0300 Subject: Add "gn:species:" prefix Signed-off-by: Munyoki Kilyungi --- examples/dump-species-metadata.scm | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/examples/dump-species-metadata.scm b/examples/dump-species-metadata.scm index 898aa5a..653fb11 100755 --- a/examples/dump-species-metadata.scm +++ b/examples/dump-species-metadata.scm @@ -118,10 +118,11 @@ (connection %connection-settings) (table-metadata? #f) (prefixes - (("rdf:" "") - ("rdfs:" "") - ("gn:" "") - ("taxon:" ""))) + '(("rdf:" "") + ("rdfs:" "") + ("gn:" "") + ("gn:species:" "") + ("taxon:" ""))) (inputs (dump-species dump-strain -- cgit v1.2.3 From 5ed67ee1ce654a545ec11481ec0e3a78cda89a75 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Tue, 18 Jul 2023 16:29:07 +0300 Subject: Update dump-species to have lists for inputs and output Signed-off-by: Munyoki Kilyungi --- examples/dump-species-metadata.scm | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/examples/dump-species-metadata.scm b/examples/dump-species-metadata.scm index 653fb11..7c6bfc3 100755 --- a/examples/dump-species-metadata.scm +++ b/examples/dump-species-metadata.scm @@ -124,10 +124,10 @@ ("gn:species:" "") ("taxon:" ""))) (inputs - (dump-species - dump-strain - dump-mapping-method - dump-avg-method)) + (list dump-species + dump-strain + dump-mapping-method + dump-avg-method)) (outputs - (#:documentation "docs/dump-species-metadata.md" - #:rdf "./verified-data/dump-species-metadata.ttl"))) + '(#:documentation "./docs/dump-species-metadata.md" + #:rdf "./verified-data/dump-species-metadata.ttl"))) -- cgit v1.2.3 From 90225bfaef8227ae47a592a2e3319633ff367adb Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Tue, 18 Jul 2023 16:38:42 +0300 Subject: Replace gn:species with gn-species Signed-off-by: Munyoki Kilyungi --- examples/dump-species-metadata.scm | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/dump-species-metadata.scm b/examples/dump-species-metadata.scm index 7c6bfc3..e83893f 100755 --- a/examples/dump-species-metadata.scm +++ b/examples/dump-species-metadata.scm @@ -27,7 +27,7 @@ (gn:family rdfs:range rdfs:Literal)) (triples (string->identifier "" (field Species FullName) - #:ontology "gn:species:" + #:ontology "gn-species:" #:separator "" #:proc string-capitalize-first) (set rdf:type 'gn:species) @@ -55,7 +55,7 @@ (set rdf:type 'gn:strain) (set gn:strainOfSpecies (string->identifier "" (field Species FullName) - #:ontology "gn:species:" + #:ontology "gn-species:" #:separator "" #:proc string-capitalize-first)) ;; Name, and maybe a second name @@ -93,7 +93,7 @@ (set gn:inbredSetCode (field InbredSet InbredSetCode)) (set gn:inbredSetOfSpecies (string->identifier "" (field Species FullName BinomialName) - #:ontology "gn:species:" + #:ontology "gn-species:" #:separator "" #:proc string-capitalize-first)) (set gn:genotype @@ -121,7 +121,7 @@ '(("rdf:" "") ("rdfs:" "") ("gn:" "") - ("gn:species:" "") + ("gn-species:" "") ("taxon:" ""))) (inputs (list dump-species -- cgit v1.2.3 From 5968603917672915f46d0715a47a0d0f9da34bec Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Tue, 18 Jul 2023 17:16:03 +0300 Subject: Use size of the results set to come up with sparql example Signed-off-by: Munyoki Kilyungi --- dump/special-forms.scm | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dump/special-forms.scm b/dump/special-forms.scm index 738c48c..88c2e82 100644 --- a/dump/special-forms.scm +++ b/dump/special-forms.scm @@ -498,8 +498,8 @@ The above query results to triples that have the form: (let ((n (truncate (+ (max (exact-integer-sqrt (length result))) 1)))) (if (< n 3) - (truncate (/ (length result) 2)) - n))))) + (length result) + n))))) (format #t "SELECT ?s ?p ?o WHERE { ~%") (for-each (match-lambda ((predicate . object) -- cgit v1.2.3 From a525d36edb35587d2f95142d4e6e7e9a32c0136d Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Tue, 18 Jul 2023 17:16:56 +0300 Subject: Replace gn with gn-term and gn-id where suitable Signed-off-by: Munyoki Kilyungi --- examples/dump-species-metadata.scm | 110 +++++++++++++++++++------------------ 1 file changed, 57 insertions(+), 53 deletions(-) diff --git a/examples/dump-species-metadata.scm b/examples/dump-species-metadata.scm index e83893f..48fd425 100755 --- a/examples/dump-species-metadata.scm +++ b/examples/dump-species-metadata.scm @@ -21,53 +21,54 @@ (define-dump dump-species (tables (Species)) (schema-triples - (gn:name rdfs:range rdfs:Literal) - (gn:displayName rdfs:range rdfs:Literal) - (gn:binomialName rdfs:range rdfs:Literal) - (gn:family rdfs:range rdfs:Literal)) + (gn-term:name rdfs:range rdfs:Literal) + (gn-term:displayName rdfs:range rdfs:Literal) + (gn-term:binomialName rdfs:range rdfs:Literal) + (gn-term:family rdfs:range rdfs:Literal)) (triples (string->identifier "" (field Species FullName) - #:ontology "gn-species:" #:separator "" #:proc string-capitalize-first) - (set rdf:type 'gn:species) - (set gn:name (field Species SpeciesName)) - (set gn:displayName (field Species MenuName)) - (set gn:binomialName (field Species FullName)) - (set gn:family (field Species Family)) - (set gn:organism (ontology 'taxon: (field Species TaxonomyId))))) + (set rdf:type 'gn-id:species) + (set gn-term:name (field Species SpeciesName)) + (set gn-term:displayName (field Species MenuName)) + (set gn-term:binomialName (field Species FullName)) + (set gn-term:family (field Species Family)) + (set gn-term:organism (ontology 'taxon: (field Species TaxonomyId))))) (define-dump dump-strain (tables (Strain (left-join Species "ON Strain.SpeciesId = Species.SpeciesId"))) (schema-triples - (gn:strainOfSpecies rdfs:domain gn:strain) - (gn:strainOfSpecies rdfs:range gn:species) - (gn:name rdfs:range rdfs:Literal) - (gn:alias rdfs:range rdfs:Literal) - (gn:symbol rdfs:range rdfs:Literal)) + (gn-term:strainOfSpecies rdfs:domain gn-term:strain) + (gn-term:strainOfSpecies rdfs:range gn-term:species) + (gn-term:name rdfs:range rdfs:Literal) + (gn-term:alias rdfs:range rdfs:Literal) + (gn-term:symbol rdfs:range rdfs:Literal)) (triples (string->identifier - "strain" + "" (regexp-substitute/global #f "[^A-Za-z0-9:]" (field ("CAST(CONVERT(BINARY CONVERT(Strain.Name USING latin1) USING utf8) AS VARCHAR(15000))" StrainName)) - 'pre "_" 'post)) - (set rdf:type 'gn:strain) - (set gn:strainOfSpecies + 'pre "_" 'post) + #:separator "" + #:proc string-capitalize-first) + (set rdf:type 'gn-id:strain) + (set gn-term:strainOfSpecies (string->identifier "" (field Species FullName) - #:ontology "gn-species:" #:separator "" #:proc string-capitalize-first)) ;; Name, and maybe a second name - (set gn:name (sanitize-rdf-string (field Strain Name))) - (set gn:name (sanitize-rdf-string (field Strain Name2))) - (set gn:alias (sanitize-rdf-string (field Strain Alias))) - (set gn:symbol (field Strain Symbol)))) + (set gn-term:name (sanitize-rdf-string (field Strain Name))) + (set gn-term:name2 (sanitize-rdf-string (field Strain Name2))) + (set gn-term:alias (sanitize-rdf-string (field Strain Alias))) + (set gn-term:symbol (field Strain Symbol)))) (define-dump dump-mapping-method (tables (MappingMethod)) - (triples (string->identifier "mappingMethod" (field MappingMethod Name)) - (set rdf:type 'gn:mappingMethod))) + (triples + (string->identifier "mappingMethod" (field MappingMethod Name)) + (set rdf:type 'gn-id:mappingMethod))) (define-dump dump-inbred-set (tables (InbredSet @@ -75,30 +76,33 @@ (left-join MappingMethod "ON InbredSet.MappingMethodId=MappingMethod.Id"))) (schema-triples - (gn:fullName rdfs:range rdfs:Literal) - (gn:geneticType rdfs:range rdfs:Literal) - (gn:inbredSetCode rdfs:range rdfs:Literal) - (gn:inbredFamily rdfs:range rdfs:Literal) - (gn:inbredSetOfSpecies rdfs:range gn:species) - (gn:inbredSetType rdfs:range rdfs:Literal) - (gn:phenotype rdfs:range gn:inbredSetType) - (gn:genotype rdfs:range gn:inbredSetType) - (gn:inbredSetOfMappingMethod rdfs:range gn:mappingMethod)) - (triples (string->identifier "inbredSet" (field InbredSet Name)) - (set rdf:type 'gn:inbredSet) - (set gn:binomialName (field InbredSet FullName)) - (set gn:geneticType (field InbredSet GeneticType)) - (set gn:inbredFamily (field InbredSet Family)) - (set gn:inbredSetOfMappingMethod (field MappingMethod Name)) - (set gn:inbredSetCode (field InbredSet InbredSetCode)) - (set gn:inbredSetOfSpecies + (gn-term:fullName rdfs:range rdfs:Literal) + (gn-term:geneticType rdfs:range rdfs:Literal) + (gn-term:inbredSetCode rdfs:range rdfs:Literal) + (gn-term:inbredFamily rdfs:range rdfs:Literal) + (gn-term:inbredSetOfSpecies rdfs:range gn:species) + (gn-term:inbredSetType rdfs:range rdfs:Literal) + (gn-term:phenotype rdfs:range gn-term:inbredSetType) + (gn-term:genotype rdfs:range gn-term:inbredSetType) + (gn-term:inbredSetOfMappingMethod rdfs:range gn-term:mappingMethod)) + (triples (string->identifier + "" (field InbredSet Name) + #:separator "" + #:proc string-capitalize-first) + (set rdf:type 'gn-id:inbredSet) + (set gn-term:binomialName (field InbredSet FullName)) + (set gn-term:geneticType (field InbredSet GeneticType)) + (set gn-term:inbredFamily (field InbredSet Family)) + (set gn-term:inbredSetOfMappingMethod (field MappingMethod Name)) + (set gn-term:inbredSetCode (field InbredSet InbredSetCode)) + (set gn-term:inbredSetOfSpecies (string->identifier "" (field Species FullName BinomialName) - #:ontology "gn-species:" + #:ontology "gn-id:" #:separator "" #:proc string-capitalize-first)) - (set gn:genotype + (set gn-term:genotype (field ("IF ((SELECT PublishFreeze.Name FROM PublishFreeze WHERE PublishFreeze.InbredSetId = InbredSet.Id LIMIT 1) IS NOT NULL, 'Traits and Cofactors', '')" genotypeP))) - (set gn:phenotype + (set gn-term:phenotype (field ("IF ((SELECT GenoFreeze.Name FROM GenoFreeze WHERE GenoFreeze.InbredSetId = InbredSet.Id LIMIT 1) IS NOT NULL, 'DNA Markers and SNPs', '')" phenotypeP))))) (define-dump dump-avg-method @@ -106,10 +110,10 @@ ;; the Name field. (tables (AvgMethod)) (schema-triples - (gn:name rdfs:range rdfs:Literal)) + (gn-term:normalization rdfs:range rdfs:Literal)) (triples (string->identifier "avgmethod" (field AvgMethod Name)) - (set rdf:type 'gn:avgMethod) - (set gn:name (field AvgMethod Name)))) + (set rdf:type 'gn-id:avgMethod) + (set gn-term:normalization (field AvgMethod Normalization)))) @@ -118,10 +122,10 @@ (connection %connection-settings) (table-metadata? #f) (prefixes - '(("rdf:" "") + '(("gn-id:" "") + ("gn-term:" "") + ("rdf:" "") ("rdfs:" "") - ("gn:" "") - ("gn-species:" "") ("taxon:" ""))) (inputs (list dump-species -- cgit v1.2.3 From 22de1613596c2a429d52a65702e72018d4011b46 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Tue, 18 Jul 2023 17:19:20 +0300 Subject: Update tissue dump to use gn-term/gn-id Signed-off-by: Munyoki Kilyungi --- examples/dump-tissue.scm | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/examples/dump-tissue.scm b/examples/dump-tissue.scm index 4998cff..376129c 100755 --- a/examples/dump-tissue.scm +++ b/examples/dump-tissue.scm @@ -23,12 +23,12 @@ ;; and BIRN_lex_Name are mostly NULL. (tables (Tissue)) (schema-triples - (gn:name rdfs:range rdfs:Literal)) + (gn-term:name rdfs:range rdfs:Literal)) ;; Hopefully the Short_Name field is distinct and can be used as an ;; identifier. (triples (string->identifier "tissue" (field Tissue Short_Name)) - (set rdf:type 'gn:tissue) - (set gn:name (field Tissue Name)))) + (set rdf:type 'gn-id:tissue) + (set gn-term:name (field Tissue Name)))) @@ -37,10 +37,12 @@ (connection %connection-settings) (table-metadata? #f) (prefixes - (("rdf:" "") - ("rdfs:" "") - ("gn:" ""))) + '(("gn-id:" "") + ("gn-term:" "") + ("rdf:" "") + ("rdfs:" ""))) (inputs - (dump-tissue)) + (list dump-tissue)) (outputs - (#:documentation "./docs/dump-tissue.md" #:rdf "./verified-data/dump-tissue.ttl"))) + '(#:documentation "./docs/dump-tissue.md" + #:rdf "./verified-data/dump-tissue.ttl"))) -- cgit v1.2.3 From 30525673f58ace73f9ccc84de570d6967e79958e Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Tue, 18 Jul 2023 17:26:50 +0300 Subject: Dump probeset metadata with documentation Signed-off-by: Munyoki Kilyungi --- examples/dump-probeset.scm | 174 ++++++++------------------------------------- 1 file changed, 31 insertions(+), 143 deletions(-) diff --git a/examples/dump-probeset.scm b/examples/dump-probeset.scm index 0a6e07b..be09b48 100755 --- a/examples/dump-probeset.scm +++ b/examples/dump-probeset.scm @@ -16,17 +16,13 @@ (call-with-input-file (list-ref (command-line) 1) read)) -(define %dump-directory - (list-ref (command-line) 2)) - -(define-dump dump-probeset-0 +(define-dump dump-probeset (tables (ProbeSet - (left-join GeneChip "ON GeneChip.Id = ProbeSet.ChipId")) - "LIMIT 2000000 OFFSET 0") + (left-join GeneChip "ON GeneChip.Id = ProbeSet.ChipId"))) (schema-triples - (gn:name rdfs:range rdfs:Literal) - (gn:probeset rdfs:range rdfs:Literal)) + (gn-term:name rdfs:range rdfs:Literal) + (gn-term:probeset rdfs:range rdfs:Literal)) (triples (ontology 'probeset: (string-trim-both @@ -35,142 +31,34 @@ (field ("IFNULL(NULLIF(TRIM(ProbeSet.Name), ''), ProbeSet.Id)" name)) 'pre "_" 'post))) - (set rdf:type 'gn:probeset) - (set gn:chipOf (string->identifier "platform" (field GeneChip Name))) - (set gn:name (field ProbeSet Name)) - (set gn:symbol (delete-substrings (field ProbeSet Symbol) "\"")) - (set gn:description (sanitize-rdf-string - (field ProbeSet description))) - (set gn:chr (field ProbeSet Chr)) - (set gn:mb (annotate-field (field ("IFNULL(ProbeSet.Mb, '')" Mb)) '^^xsd:double)) - ;; For now have the tissue, and alias as one line without - ;; splitting to make the dump faster - ;; (set gn:tissue (field ("IFNULL(ProbeSet.Tissue, '')" Tissue))) - ;; (set gn:alias (field ProbeSet alias)) - ;; (set gn:generif (ontology 'generif: (field ProbeSet GeneId))) - (set gn:blatSeq (sanitize-rdf-string - (string-trim-both (field ProbeSet BlatSeq)))) - (set gn:targetSeq (sanitize-rdf-string (field ProbeSet TargetSeq))) - ;; (set gn:unigene (field ProbeSet UniGeneId)) - ;; (set gn:genbank (field ProbeSet GenbankId)) - ;; (set gn:omim (sanitize-rdf-string (string-trim-both (field ProbeSet OMIM)))) - ;; (set gn:RefSeq_TranscriptId (field ProbeSet RefSeq_TranscriptId)) - (set gn:uniProtReference (ontology 'uniprot: - (field ProbeSet UniProtID))))) + (set rdf:type 'gn-id:probeset) + (set gn-term:chipOf (string->identifier "platform" (field GeneChip Name))) + (set gn-term:name (field ProbeSet Name)) + (set gn-term:symbol (delete-substrings (field ProbeSet Symbol) "\"")) + (set gn-term:description (sanitize-rdf-string + (field ProbeSet description))) + (set gn-term:chr (field ProbeSet Chr)) + (set gn-term:mb (annotate-field (field ("IFNULL(ProbeSet.Mb, '')" Mb)) '^^xsd:double)) + (set gn-term:blatSeq (sanitize-rdf-string + (string-trim-both (field ProbeSet BlatSeq)))) + (set gn-term:targetSeq (sanitize-rdf-string (field ProbeSet TargetSeq))) + (set gn-term:uniProtReference (ontology 'uniprot: + (field ProbeSet UniProtID))))) -(define-dump dump-probeset-1 - (tables (ProbeSet - (left-join GeneChip "ON GeneChip.Id = ProbeSet.ChipId")) - "LIMIT 2000000 OFFSET 2000000") - (schema-triples - (gn:name rdfs:range rdfs:Literal) - (gn:probeset rdfs:range rdfs:Literal)) - (triples (ontology - 'probeset: - (string-trim-both - (regexp-substitute/global - #f "[^A-Za-z0-9:]" - (field ("IFNULL(NULLIF(TRIM(ProbeSet.Name), ''), ProbeSet.Id)" - name)) - 'pre "_" 'post))) - (set rdf:type 'gn:probeset) - (set gn:chipOf (string->identifier "platform" (field GeneChip Name))) - (set gn:name (field ProbeSet Name)) - (set gn:symbol (delete-substrings (field ProbeSet Symbol) "\"")) - (set gn:description (sanitize-rdf-string - (field ProbeSet description))) - (set gn:chr (field ProbeSet Chr)) - (set gn:mb (annotate-field (field ("IFNULL(ProbeSet.Mb, '')" Mb)) '^^xsd:double)) - (set gn:blatSeq (sanitize-rdf-string - (string-trim-both (field ProbeSet BlatSeq)))) - (set gn:targetSeq (sanitize-rdf-string (field ProbeSet TargetSeq))) - (set gn:uniProtReference (ontology 'uniprot: - (field ProbeSet UniProtID))))) -(define-dump dump-probeset-2 - (tables (ProbeSet - (left-join GeneChip "ON GeneChip.Id = ProbeSet.ChipId")) - "WHERE ProbeSet.Name IS NOT NULL LIMIT 2000000 OFFSET 4000000") - (schema-triples - (gn:name rdfs:range rdfs:Literal) - (gn:probeset rdfs:range rdfs:Literal)) - (triples (ontology - 'probeset: - (string-trim-both - (regexp-substitute/global - #f "[^A-Za-z0-9:]" - (field ("IFNULL(ProbeSet.Name, ProbeSet.Id)" - name)) - 'pre "_" 'post))) - (set rdf:type 'gn:probeset) - (set gn:chipOf (string->identifier "platform" (field GeneChip Name))) - (set gn:name (field ProbeSet Name)) - (set gn:symbol (delete-substrings (field ProbeSet Symbol) "\"")) - (set gn:description (sanitize-rdf-string - (field ProbeSet description))) - (set gn:chr (field ProbeSet Chr)) - (set gn:mb (annotate-field (field ("IFNULL(ProbeSet.Mb, '')" Mb)) '^^xsd:double)) - (set gn:blatSeq (sanitize-rdf-string - (string-trim-both (field ProbeSet BlatSeq)))) - (set gn:targetSeq (sanitize-rdf-string (field ProbeSet TargetSeq))) - (set gn:uniProtReference (ontology 'uniprot: - (field ProbeSet UniProtID))))) -(call-with-target-database - %connection-settings - (lambda (db) - (with-output-to-file (string-append %dump-directory "dump-probeset-0.ttl") - (lambda () - (prefix "dct:" "") - (prefix "foaf:" "") - (prefix "generif:" "") - (prefix "gn:" "") - (prefix "owl:" "") - (prefix "phenotype:" "") - (prefix "pubmed:" "") - (prefix "rdf:" "") - (prefix "rdfs:" "") - (prefix "uniprot:" "") - (prefix "up:" "") - (prefix "xsd:" "") - (prefix "probeset:" "") - (newline) - (dump-probeset-0 db)) - #:encoding "utf8") - (with-output-to-file (string-append %dump-directory "dump-probeset-1.ttl") - (lambda () - (prefix "dct:" "") - (prefix "foaf:" "") - (prefix "generif:" "") - (prefix "gn:" "") - (prefix "owl:" "") - (prefix "phenotype:" "") - (prefix "pubmed:" "") - (prefix "rdf:" "") - (prefix "rdfs:" "") - (prefix "uniprot:" "") - (prefix "up:" "") - (prefix "xsd:" "") - (prefix "probeset:" "") - (newline) - (dump-probeset-1 db)) - #:encoding "utf8") - (with-output-to-file (string-append %dump-directory "dump-probeset-2.ttl") - (lambda () - (prefix "dct:" "") - (prefix "foaf:" "") - (prefix "generif:" "") - (prefix "gn:" "") - (prefix "owl:" "") - (prefix "phenotype:" "") - (prefix "pubmed:" "") - (prefix "rdf:" "") - (prefix "rdfs:" "") - (prefix "uniprot:" "") - (prefix "up:" "") - (prefix "xsd:" "") - (prefix "probeset:" "") - (newline) - (dump-probeset-2 db)) - #:encoding "utf8"))) +(dump-with-documentation + (name "ProbeSet Metadata") + (connection %connection-settings) + (table-metadata? #f) + (prefixes + '(("probeset:" "") + ("rdf:" "") + ("rdfs:" ""))) + (inputs + (list dump-probeset)) + (outputs + '(#:documentation "./docs/dump-probeset.md" + #:rdf "./verified-data/dump-probeset.ttl"))) -- cgit v1.2.3 From c4fcfa27ce508a77bae346449302bedb2f415ed2 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Tue, 18 Jul 2023 17:27:06 +0300 Subject: Make the default ontology "gn-id:" Signed-off-by: Munyoki Kilyungi --- dump/triples.scm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dump/triples.scm b/dump/triples.scm index 2b43d68..c168f3e 100644 --- a/dump/triples.scm +++ b/dump/triples.scm @@ -23,7 +23,7 @@ (define* (string->identifier prefix str #:optional #:key - (ontology "gn:") + (ontology "gn-id:") (separator "_") (proc string-downcase)) "Convert STR to a turtle identifier after replacing illegal -- cgit v1.2.3 From f385a1286aa2b53eaa85a3ca9ef3c5b82eabb3cc Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Tue, 18 Jul 2023 17:32:19 +0300 Subject: Dump phenotypes with documentation. --- examples/dump-phenotype.scm | 50 +++++++++++++++++---------------------------- 1 file changed, 19 insertions(+), 31 deletions(-) diff --git a/examples/dump-phenotype.scm b/examples/dump-phenotype.scm index e4d20c9..33577ce 100755 --- a/examples/dump-phenotype.scm +++ b/examples/dump-phenotype.scm @@ -111,34 +111,22 @@ (ontology 'publication: pmid)))))) -(call-with-target-database - %connection-settings - (lambda (db) - (with-output-to-file (string-append %dump-directory "dump-phenotype.ttl") - (lambda () - (prefix "chebi:" "") - (prefix "dct:" "") - (prefix "foaf:" "") - (prefix "generif:" "") - (prefix "gn:" "") - (prefix "hgnc:" "") - (prefix "homologene:" "") - (prefix "kegg:" "") - (prefix "molecularTrait:" "") - (prefix "nuccore:" "") - (prefix "omim:" "") - (prefix "owl:" "") - (prefix "phenotype:" "") - (prefix "pubchem:" "") - (prefix "pubmed:" "") - (prefix "rdf:" "") - (prefix "rdfs:" "") - (prefix "uniprot:" "") - (prefix "up:" "") - (prefix "xsd:" "") - (prefix "dataset:" "") - (prefix "publication:" "") - (newline) - (dump-publishfreeze db) - (dump-phenotypes db)) - #:encoding "utf8"))) +(dump-with-documentation + (name "Phenotypes Metadata") + (connection %connection-settings) + (table-metadata? #f) + (prefixes + '(("gn-id:" "") + ("gn-term:" "") + ("phenotype:" "") + ("rdf:" "") + ("rdfs:" "") + ("xsd:" "") + ("dataset:" "") + ("publication:" ""))) + (inputs + (list dump-publishfreeze + dump-phenotype)) + (outputs + '(#:documentation "./docs/dump-phenotype.md" + #:rdf "./verified-data/dump-phenotype.ttl"))) -- cgit v1.2.3 From 1c106042a130f0ec59e89110e673dfbfdf5943d7 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Tue, 18 Jul 2023 17:34:43 +0300 Subject: Replace :gn with :gn-id and :gn-term Signed-off-by: Munyoki Kilyungi --- dump/special-forms.scm | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dump/special-forms.scm b/dump/special-forms.scm index 88c2e82..283fcc2 100644 --- a/dump/special-forms.scm +++ b/dump/special-forms.scm @@ -422,10 +422,10 @@ must be remedied." #`(begin (scm->triples (map-alist '() - (set rdf:type 'gn:dump) - (set gn:createsPredicate 'predicate) - (filter-set gn:forSubjectType #,subject-type) - (multiset gn:dependsOn + (set rdf:type 'gn-id:dump) + (set gn-term:createsPredicate 'predicate) + (filter-set gn-term:forSubjectType #,subject-type) + (multiset gn-term:dependsOn '#,(map (lambda (field) (match (syntax->datum field) ((table-name column-name _ ...) -- cgit v1.2.3 From f7542c86fd6f63c0315776953d734eaef502d667 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Wed, 19 Jul 2023 14:28:15 +0300 Subject: Rename 'gn-id' prefix to 'gn' Signed-off-by: Munyoki Kilyungi --- dump/triples.scm | 2 +- examples/dump-publication.scm | 43 +++++++++++++++++++------------------- examples/dump-species-metadata.scm | 18 ++++++++-------- examples/dump-tissue.scm | 4 ++-- 4 files changed, 34 insertions(+), 33 deletions(-) diff --git a/dump/triples.scm b/dump/triples.scm index c168f3e..2b43d68 100644 --- a/dump/triples.scm +++ b/dump/triples.scm @@ -23,7 +23,7 @@ (define* (string->identifier prefix str #:optional #:key - (ontology "gn-id:") + (ontology "gn:") (separator "_") (proc string-downcase)) "Convert STR to a turtle identifier after replacing illegal diff --git a/examples/dump-publication.scm b/examples/dump-publication.scm index ff46d3d..6f349d6 100755 --- a/examples/dump-publication.scm +++ b/examples/dump-publication.scm @@ -21,15 +21,15 @@ (define-dump dump-publication (tables (Publication)) (schema-triples - (gn:pubMedId rdfs:range rdfs:Literal) - (gn:title rdfs:range rdfs:Literal) - (gn:journal rdfs:range rdfs:Literal) - (gn:volume rdfs:range rdfs:Literal) - (gn:pages rdfs:range rdfs:Literal) - (gn:month rdfs:range rdfs:Literal) - (gn:year rdfs:range rdfs:Literal) - (gn:author rdfs:range rdfs:Literal) - (gn:abstract rdfs:range rdfs:Literal)) + (gn-term:pubMedId rdfs:range rdfs:Literal) + (gn-term:title rdfs:range rdfs:Literal) + (gn-term:journal rdfs:range rdfs:Literal) + (gn-term:volume rdfs:range rdfs:Literal) + (gn-term:pages rdfs:range rdfs:Literal) + (gn-term:month rdfs:range rdfs:Literal) + (gn-term:year rdfs:range rdfs:Literal) + (gn-term:author rdfs:range rdfs:Literal) + (gn-term:abstract rdfs:range rdfs:Literal)) (triples (let ((pmid (field ("IF(Publication.PubMed_ID IS NULL, '', CONVERT(Publication.PubMed_Id, INT))" @@ -40,18 +40,19 @@ (number->string publication-id)) (ontology 'publication: pmid))) (set rdf:type 'gn:publication) - (set gn:pubMedId (ontology 'pubmed: (field ("IFNULL(PubMed_ID, '')" pubmedId)))) - (set gn:title (delete-substrings (field Publication Title) - "Unknown")) - (set gn:journal (delete-substrings (field Publication Journal) - "Unknown")) - (set gn:volume (delete-substrings (field Publication Volume) - "Unknown")) - (set gn:pages (delete-substrings (field Publication Pages) - "Unknown")) - (set gn:month (delete-substrings (field Publication Month) - "Unknown")) - (set gn:year (field Publication Year)) + (set gn-term:pubMedId + (ontology 'pubmed: (field ("IFNULL(PubMed_ID, '')" pubmedId)))) + (set gn-term:title (delete-substrings (field Publication Title) + "Unknown")) + (set gn-term:journal (delete-substrings (field Publication Journal) + "Unknown")) + (set gn-term:volume (delete-substrings (field Publication Volume) + "Unknown")) + (set gn-term:pages (delete-substrings (field Publication Pages) + "Unknown")) + (set gn-term:month (delete-substrings (field Publication Month) + "Unknown")) + (set gn-term:year (field Publication Year)) (multiset gn:author ;; The authors field is a comma ;; separated list. Split it. diff --git a/examples/dump-species-metadata.scm b/examples/dump-species-metadata.scm index 48fd425..41d5847 100755 --- a/examples/dump-species-metadata.scm +++ b/examples/dump-species-metadata.scm @@ -29,7 +29,7 @@ (string->identifier "" (field Species FullName) #:separator "" #:proc string-capitalize-first) - (set rdf:type 'gn-id:species) + (set rdf:type 'gn:species) (set gn-term:name (field Species SpeciesName)) (set gn-term:displayName (field Species MenuName)) (set gn-term:binomialName (field Species FullName)) @@ -53,7 +53,7 @@ 'pre "_" 'post) #:separator "" #:proc string-capitalize-first) - (set rdf:type 'gn-id:strain) + (set rdf:type 'gn:strain) (set gn-term:strainOfSpecies (string->identifier "" (field Species FullName) #:separator "" @@ -68,7 +68,7 @@ (tables (MappingMethod)) (triples (string->identifier "mappingMethod" (field MappingMethod Name)) - (set rdf:type 'gn-id:mappingMethod))) + (set rdf:type 'gn:mappingMethod))) (define-dump dump-inbred-set (tables (InbredSet @@ -89,7 +89,7 @@ "" (field InbredSet Name) #:separator "" #:proc string-capitalize-first) - (set rdf:type 'gn-id:inbredSet) + (set rdf:type 'gn:inbredSet) (set gn-term:binomialName (field InbredSet FullName)) (set gn-term:geneticType (field InbredSet GeneticType)) (set gn-term:inbredFamily (field InbredSet Family)) @@ -97,7 +97,7 @@ (set gn-term:inbredSetCode (field InbredSet InbredSetCode)) (set gn-term:inbredSetOfSpecies (string->identifier "" (field Species FullName BinomialName) - #:ontology "gn-id:" + #:ontology "gn:" #:separator "" #:proc string-capitalize-first)) (set gn-term:genotype @@ -112,7 +112,7 @@ (schema-triples (gn-term:normalization rdfs:range rdfs:Literal)) (triples (string->identifier "avgmethod" (field AvgMethod Name)) - (set rdf:type 'gn-id:avgMethod) + (set rdf:type 'gn:avgMethod) (set gn-term:normalization (field AvgMethod Normalization)))) @@ -122,9 +122,9 @@ (connection %connection-settings) (table-metadata? #f) (prefixes - '(("gn-id:" "") - ("gn-term:" "") - ("rdf:" "") + '(("gn:" "") + ("gn-term:" "") + ("rdf:" "") ("rdfs:" "") ("taxon:" ""))) (inputs diff --git a/examples/dump-tissue.scm b/examples/dump-tissue.scm index 376129c..ff6792e 100755 --- a/examples/dump-tissue.scm +++ b/examples/dump-tissue.scm @@ -27,7 +27,7 @@ ;; Hopefully the Short_Name field is distinct and can be used as an ;; identifier. (triples (string->identifier "tissue" (field Tissue Short_Name)) - (set rdf:type 'gn-id:tissue) + (set rdf:type 'gn:tissue) (set gn-term:name (field Tissue Name)))) @@ -37,7 +37,7 @@ (connection %connection-settings) (table-metadata? #f) (prefixes - '(("gn-id:" "") + '(("gn:" "") ("gn-term:" "") ("rdf:" "") ("rdfs:" ""))) -- cgit v1.2.3 From 54b54584215289dd394c1935aaa2775afd6c2863 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Wed, 19 Jul 2023 14:42:36 +0300 Subject: Only document a triple if it exists Signed-off-by: Munyoki Kilyungi --- dump/special-forms.scm | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/dump/special-forms.scm b/dump/special-forms.scm index 283fcc2..a356783 100644 --- a/dump/special-forms.scm +++ b/dump/special-forms.scm @@ -448,13 +448,14 @@ must be remedied." #,(syntax-case #'schema-triples-clause (schema-triples) ((schema-triples (triple-subject triple-predicate triple-object) ...) #`(begin - (format #t "## Schema Triples:~%~%```text~%") - (for-each (lambda (s p o) - (format #t "~a -> ~a -> ~a~%" s p o)) - (list 'triple-subject ...) - (list 'triple-predicate ...) - (list 'triple-object ...)) - (format #t "```"))) + (when (not (list 'triple-subject ...)) + (format #t "## Schema Triples:~%~%```text~%") + (for-each (lambda (s p o) + (format #t "~a -> ~a -> ~a~%" s p o)) + (list 'triple-subject ...) + (list 'triple-predicate ...) + (list 'triple-object ...)) + (format #t "```")))) (_ (error "Invalid schema triples clause:" #'schema-triples-clause))) (format #t " ## Generated Triples: -- cgit v1.2.3 From 1ed71a5bc2ec25d036aa0693afbb252328ca0421 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Wed, 19 Jul 2023 14:43:42 +0300 Subject: Use a min of 4 should the schema triples be many Signed-off-by: Munyoki Kilyungi --- dump/special-forms.scm | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dump/special-forms.scm b/dump/special-forms.scm index a356783..39c8c80 100644 --- a/dump/special-forms.scm +++ b/dump/special-forms.scm @@ -496,8 +496,9 @@ The above query results to triples that have the form: tables-raw ...))) #,@(field->key #'(predicate-clauses ...)))) (first-n (list-head result - (let ((n (truncate - (+ (max (exact-integer-sqrt (length result))) 1)))) + (let ((n + (min 4 (truncate + (+ (exact-integer-sqrt (length result)) 1))))) (if (< n 3) (length result) n))))) -- cgit v1.2.3 From 4e99bd1b99a239b43ba9aaa12e563cf774d7e68b Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Wed, 19 Jul 2023 14:57:59 +0300 Subject: Remove unnecessary prefixes Signed-off-by: Munyoki Kilyungi --- examples/dump-publication.scm | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/examples/dump-publication.scm b/examples/dump-publication.scm index 6f349d6..5d2cc9e 100755 --- a/examples/dump-publication.scm +++ b/examples/dump-publication.scm @@ -68,11 +68,12 @@ (connection %connection-settings) (table-metadata? #f) (prefixes - (("rdf:" "") - ("rdfs:" "") - ("gn:" "") - ("publication:" "") - ("pubmed:" ""))) + '(("gn-term:" "") + ("gn:" "") + ("publication:" "") + ("pubmed:" "") + ("rdfs:" "") + ("rdf:" ""))) (inputs (dump-publication)) (outputs -- cgit v1.2.3 From 47a2707a4d5fa33f3d9339c43ef28e96b116ea37 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Wed, 19 Jul 2023 14:58:27 +0300 Subject: Make 'inputs' and 'outputs' fields lists Signed-off-by: Munyoki Kilyungi --- examples/dump-publication.scm | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/dump-publication.scm b/examples/dump-publication.scm index 5d2cc9e..fc2e6d0 100755 --- a/examples/dump-publication.scm +++ b/examples/dump-publication.scm @@ -75,7 +75,7 @@ ("rdfs:" "") ("rdf:" ""))) (inputs - (dump-publication)) + (list dump-publication)) (outputs - (#:documentation "./docs/dump-publication.md" - #:rdf "./verified-data/dump-publication.md"))) + '(#:documentation "./docs/dump-publication.md" + #:rdf "./verified-data/dump-publication.md"))) -- cgit v1.2.3 From 381acf546900c74a907bc56e236de4fece953869 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Wed, 19 Jul 2023 16:41:36 +0300 Subject: Use "gn:" and "gn-terms" prefixes Signed-off-by: Munyoki Kilyungi --- examples/dump-dataset-metadata.scm | 182 +++++++++++++++++++------------------ 1 file changed, 92 insertions(+), 90 deletions(-) diff --git a/examples/dump-dataset-metadata.scm b/examples/dump-dataset-metadata.scm index 53c381c..c51364a 100755 --- a/examples/dump-dataset-metadata.scm +++ b/examples/dump-dataset-metadata.scm @@ -52,11 +52,11 @@ (foaf:givenName rdfs:range rdfs:Literal) (foaf:familyName rdfs:range rdfs:Literal) (foaf:homepage rdfs:range rdfs:Literal) - (gn:address rdfs:range rdfs:Literal) - (gn:city rdfs:range rdfs:Literal) - (gn:state rdfs:range rdfs:Literal) - (gn:zipCode rdfs:range rdfs:Literal) - (gn:country rdfs:range rdfs:Literal)) + (gn-term:address rdfs:range rdfs:Literal) + (gn-term:city rdfs:range rdfs:Literal) + (gn-term:state rdfs:range rdfs:Literal) + (gn-term:zipCode rdfs:range rdfs:Literal) + (gn-term:country rdfs:range rdfs:Literal)) (triples (investigator-attributes->id (field Investigators FirstName) (field Investigators LastName) (field Investigators Email)) @@ -70,11 +70,11 @@ (set foaf:familyName (field ("CAST(CONVERT(BINARY CONVERT(LastName USING latin1) USING utf8) AS VARCHAR(100))" LastName))) (set foaf:homepage (field Investigators Url)) - (set gn:address (field Investigators Address)) - (set gn:city (field Investigators City)) - (set gn:state (field Investigators State)) - (set gn:zipCode (field Investigators ZipCode)) - (set gn:country (field Investigators Country)))) + (set gn-term:address (field Investigators Address)) + (set gn-term:city (field Investigators City)) + (set gn-term:state (field Investigators State)) + (set gn-term:zipCode (field Investigators ZipCode)) + (set gn-term:country (field Investigators Country)))) (define-dump dump-info-files (tables (InfoFiles @@ -92,123 +92,125 @@ (left-join GeneChip "USING (GeneChipId)")) "WHERE GN_AccesionId IS NOT NULL") (schema-triples - (gn:dataset rdfs:range rdfs:Literal) - (gn:datasetOfInvestigator rdfs:domain gn:dataset) - (gn:datasetOfOrganization rdfs:domain gn:dataset) - (gn:datasetOfInvestigator rdfs:range foaf:Person) - (gn:datasetOfInbredSet rdfs:domain gn:dataset) - (gn:datasetOfInbredSet rdfs:range gn:inbredSet) - (gn:datasetOfSpecies rdfs:domain gn:dataset) - (gn:datasetOfSpecies rdfs:range gn:inbredSet) - (gn:datasetOfTissue rdfs:domain gn:dataset) - (gn:datasetOfTissue rdfs:range gn:tissue) - (gn:normalization rdfs:domain gn:dataset) - (gn:normalization rdfs:range gn:avgMethod) - (gn:datasetOfPlatform rdfs:domain gn:dataset) - (gn:datasetOfPlatform rdfs:range gn:geneChip) - (gn:accessionId rdfs:range rdfs:Literal) - (gn:datasetStatusName rdfs:range rdfs:Literal) - (gn:summary rdfs:range rdfs:Literal) - (gn:aboutTissue rdfs:range rdfs:Literal) - (gn:geoSeries rdfs:range rdfs:Literal) - (gn:name rdfs:range rdfs:Literal) - (gn:title rdfs:range rdfs:Literal) - (gn:publicationTitle rdfs:range rdfs:Literal) - (gn:specifics rdfs:range rdfs:Literal) - (gn:datasetGroup rdfs:range rdfs:Literal) - (gn:aboutCases rdfs:range rdfs:Literal) - (gn:aboutPlatform rdfs:range rdfs:Literal) - (gn:aboutDataProcessing rdfs:range rdfs:Literal) - (gn:notes rdfs:range rdfs:Literal) - (gn:experimentDesign rdfs:range rdfs:Literal) - (gn:contributors rdfs:range rdfs:Literal) - (gn:citation rdfs:range rdfs:Literal) - (gn:acknowledgment rdfs:range rdfs:Literal)) - (triples (ontology 'dataset: - (regexp-substitute/global #f "[^A-Za-z0-9:]" - (field InfoFiles InfoPageName) - 'pre "_" 'post)) + (gn-term:dataset rdfs:range rdfs:Literal) + (gn-term:datasetOfInvestigator rdfs:domain gn:dataset) + (gn-term:datasetOfOrganization rdfs:domain gn:dataset) + (gn-term:datasetOfInvestigator rdfs:range foaf:Person) + (gn-term:datasetOfInbredSet rdfs:domain gn:dataset) + (gn-term:datasetOfInbredSet rdfs:range gn:inbredSet) + (gn-term:datasetOfSpecies rdfs:domain gn:dataset) + (gn-term:datasetOfSpecies rdfs:range gn:inbredSet) + (gn-term:datasetOfTissue rdfs:domain gn:dataset) + (gn-term:datasetOfTissue rdfs:range gn:tissue) + (gn-term:normalization rdfs:domain gn:dataset) + (gn-term:normalization rdfs:range gn:avgMethod) + (gn-term:datasetOfPlatform rdfs:domain gn:dataset) + (gn-term:datasetOfPlatform rdfs:range gn:geneChip) + (gn-term:accessionId rdfs:range rdfs:Literal) + (gn-term:datasetStatusName rdfs:range rdfs:Literal) + (gn-term:summary rdfs:range rdfs:Literal) + (gn-term:aboutTissue rdfs:range rdfs:Literal) + (gn-term:geoSeries rdfs:range rdfs:Literal) + (gn-term:name rdfs:range rdfs:Literal) + (gn-term:title rdfs:range rdfs:Literal) + (gn-term:publicationTitle rdfs:range rdfs:Literal) + (gn-term:specifics rdfs:range rdfs:Literal) + (gn-term:datasetGroup rdfs:range rdfs:Literal) + (gn-term:aboutCases rdfs:range rdfs:Literal) + (gn-term:aboutPlatform rdfs:range rdfs:Literal) + (gn-term:aboutDataProcessing rdfs:range rdfs:Literal) + (gn-term:notes rdfs:range rdfs:Literal) + (gn-term:experimentDesign rdfs:range rdfs:Literal) + (gn-term:contributors rdfs:range rdfs:Literal) + (gn-term:citation rdfs:range rdfs:Literal) + (gn-term:acknowledgment rdfs:range rdfs:Literal)) + (triples (string->identifier + "" (regexp-substitute/global #f "[^A-Za-z0-9:]" + (field InfoFiles InfoPageName) + 'pre "_" 'post) + #:separator "" + #:proc string-capitalize-first) (set rdf:type (string->symbol (field ("IF(GenoFreeze.Id IS NOT NULL, 'gn:genotypeDataset', IF(PublishFreeze.Id IS NOT NULL, 'gn:phenotypeDataset', 'gn:dataset'))" rdfType)))) - (set gn:name (regexp-substitute/global - #f "^[Nn]one$" - (field InfoFiles InfoPageName) - "")) - (set gn:fullName + (set gn-term:name (regexp-substitute/global + #f "^[Nn]one$" + (field InfoFiles InfoPageName) + "")) + (set gn-term:fullName (field ("IFNULL(GenoFreeze.FullName, IFNULL(PublishFreeze.FullName, ''))" DatasetFullName))) (set dct:created (field ("IFNULL(GenoFreeze.CreateTime, IFNULL(PublishFreeze.CreateTime, IFNULL(ProbeSetFreeze.CreateTime, '')))" createTimeGenoFreeze))) - (set gn:datasetOfInvestigator + (set gn-term:datasetOfInvestigator (investigator-attributes->id (field Investigators FirstName) (field Investigators LastName) (field Investigators Email))) - (set gn:datasetOfOrganization + (set gn-term:datasetOfOrganization (field ("CAST(CONVERT(BINARY CONVERT(Organizations.OrganizationName USING latin1) USING utf8) AS VARCHAR(1500))" Organizations))) - (set gn:accessionId (format #f "GN~a" (field InfoFiles GN_AccesionId))) - (set gn:datasetStatusName (string-downcase - (field DatasetStatus DatasetStatusName))) - (set gn:datasetOfInbredSet + (set gn-term:accessionId (format #f "GN~a" (field InfoFiles GN_AccesionId))) + (set gn-term:datasetStatusName (string-downcase + (field DatasetStatus DatasetStatusName))) + (set gn-term:datasetOfInbredSet (string->identifier "inbredSet" (field InbredSet Name InbredSetName))) - (set gn:datasetOfTissue (string->identifier "tissue" - (field Tissue Short_Name))) - (set gn:normalization + (set gn-term:datasetOfTissue (string->identifier "tissue" + (field Tissue Short_Name))) + (set gn-term:normalization (string->identifier "avgmethod" ;; If AvgMethodName is NULL, assume N/A. (if (string-blank? (field AvgMethod Name AvgMethodName)) "N/A" (field AvgMethod Name AvgMethodName)))) - (set gn:datasetOfPlatform + (set gn-term:datasetOfPlatform (string->identifier "platform" (field GeneChip Name GeneChip))) - (set gn:summary + (set gn-term:summary (sanitize-rdf-string (field Datasets Summary))) - (set gn:aboutTissue + (set gn-term:aboutTissue (sanitize-rdf-string (field Datasets AboutTissue))) - (set gn:geoSeries + (set gn-term:geoSeries (let ((s (string-match "GSE[0-9]*" (field ("IFNULL(Datasets.GeoSeries, '')" GeoSeries))))) (if s (ontology 'geoSeries: (match:substring s)) ""))) - (set gn:title + (set gn-term:title (regexp-substitute/global #f "^[Nn]one$" (field InfoFiles InfoFileTitle) "")) - (set gn:publicationTitle + (set gn-term:publicationTitle (regexp-substitute/global #f "^[Nn]one$" (field Datasets PublicationTitle) "")) - (set gn:specifics (sanitize-rdf-string (field InfoFiles Specifics))) - (set gn:datasetGroup (field Datasets DatasetName DatasetGroup)) - (set gn:aboutCases + (set gn-term:specifics (sanitize-rdf-string (field InfoFiles Specifics))) + (set gn-term:datasetGroup (field Datasets DatasetName DatasetGroup)) + (set gn-term:aboutCases (sanitize-rdf-string (field ("CAST(CONVERT(BINARY CONVERT(Datasets.AboutCases USING latin1) USING utf8) AS VARCHAR(10000))" AboutCases)))) - (set gn:aboutPlatform + (set gn-term:aboutPlatform (sanitize-rdf-string (field ("CAST(CONVERT(BINARY CONVERT(Datasets.AboutPlatform USING latin1) USING utf8) AS VARCHAR(1500))" AboutPlatform)))) - (set gn:aboutDataProcessing + (set gn-term:aboutDataProcessing (sanitize-rdf-string (field ("CAST(CONVERT(BINARY CONVERT(Datasets.AboutDataProcessing USING latin1) USING utf8) AS VARCHAR(1500))" AboutDataProcessing)))) - (set gn:notes + (set gn-term:notes (sanitize-rdf-string (field ("CAST(CONVERT(BINARY CONVERT(Datasets.Notes USING latin1) USING utf8) AS VARCHAR(1500))" GNNotes)))) - (set gn:experimentDesign + (set gn-term:experimentDesign (sanitize-rdf-string (field ("CAST(CONVERT(BINARY CONVERT(Datasets.ExperimentDesign USING latin1) USING utf8) AS VARCHAR(1500))" ExperimentDesign)))) - (set gn:contributors + (set gn-term:contributors (sanitize-rdf-string (field ("CAST(CONVERT(BINARY CONVERT(Datasets.Contributors USING latin1) USING utf8) AS VARCHAR(1500))" Contributors)))) - (set gn:citation + (set gn-term:citation (sanitize-rdf-string (regexp-substitute/global #f "^[Nn]one$" @@ -216,7 +218,7 @@ ("CAST(CONVERT(BINARY CONVERT(Datasets.Citation USING latin1) USING utf8) AS VARCHAR(1500))" Citation)) ""))) - (set gn:dataSourceAcknowledgment + (set gn-term:dataSourceAcknowledgment (sanitize-rdf-string (string-trim-both (regexp-substitute/global @@ -224,8 +226,8 @@ (field ("CAST(CONVERT(BINARY CONVERT(InfoFiles.Data_Source_Acknowledge USING latin1) USING utf8) AS VARCHAR(1500))" Data_Source_Acknowledge)) "")))) - (set gn:acknowledgment (sanitize-rdf-string - (field Datasets Acknowledgment))))) + (set gn-term:acknowledgment (sanitize-rdf-string + (field Datasets Acknowledgment))))) @@ -235,18 +237,18 @@ (connection %connection-settings) (table-metadata? #f) (prefixes - (("dct:" "") - ("geoSeries:" "") - ("rdf:" "") - ("rdfs:" "") - ("gn:" "") - ("foaf:" "") - ("taxon:" "") - ("dataset:" ""))) + '(("foaf:" "") + ("geoSeries:" "") + ("gn-term:" "") + ("gn:" "") + ("rdf:" "") + ("rdfs:" "") + ("taxon:" "") + ("dct:" ""))) (inputs - (dump-info-files - dump-investigators)) + (list dump-info-files + dump-investigators)) (outputs - (#:documentation "./docs/dump-info-pages.md" - #:rdf "./verified-data/dump-info-pages.ttl"))) + '(#:documentation "./docs/dump-info-pages.md" + #:rdf "./verified-data/dump-info-pages.ttl"))) -- cgit v1.2.3 From bfeeefcd6b6383a5df317441f7e885a4631e5458 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Wed, 19 Jul 2023 16:56:27 +0300 Subject: Replace "publication:" with "pubmed:" Signed-off-by: Munyoki Kilyungi --- examples/dump-publication.scm | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/examples/dump-publication.scm b/examples/dump-publication.scm index fc2e6d0..f79696e 100755 --- a/examples/dump-publication.scm +++ b/examples/dump-publication.scm @@ -38,7 +38,7 @@ (if (string-null? pmid) (string->identifier "unpublished" (number->string publication-id)) - (ontology 'publication: pmid))) + (ontology 'pubmed: pmid))) (set rdf:type 'gn:publication) (set gn-term:pubMedId (ontology 'pubmed: (field ("IFNULL(PubMed_ID, '')" pubmedId)))) @@ -70,7 +70,6 @@ (prefixes '(("gn-term:" "") ("gn:" "") - ("publication:" "") ("pubmed:" "") ("rdfs:" "") ("rdf:" ""))) -- cgit v1.2.3 From d9e8b0ee01d4cdef99d5e23f53bcb34b8cd63d88 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Wed, 19 Jul 2023 16:57:20 +0300 Subject: Use "gn:" and "gn-term:" when dumping phenotypes Signed-off-by: Munyoki Kilyungi --- examples/dump-phenotype.scm | 98 +++++++++++++++++++++++---------------------- 1 file changed, 50 insertions(+), 48 deletions(-) diff --git a/examples/dump-phenotype.scm b/examples/dump-phenotype.scm index 33577ce..924ec9a 100755 --- a/examples/dump-phenotype.scm +++ b/examples/dump-phenotype.scm @@ -18,9 +18,6 @@ (call-with-input-file (list-ref (command-line) 1) read)) -(define %dump-directory - (list-ref (command-line) 2)) - ;; Only dump publish freeze entries that were not dumped from the InfoFiles page @@ -30,25 +27,28 @@ (left-join InbredSet "ON PublishFreeze.InbredSetId = InbredSet.InbredSetId")) "WHERE PublishFreeze.public > 0 AND PublishFreeze.confidentiality < 1 AND InfoFiles.InfoPageName IS NULL") (schema-triples - (gn:datasetOfInbredSet rdfs:range gn:inbredSet) - (gn:name rdfs:range rdfs:Literal) - (gn:fullName rdfs:range rdfs:Literal) - (gn:shortName rdfs:range rdfs:Literal) - (gn:createTime rdfs:range rdfs:Literal) + (gn-term:datasetOfInbredSet rdfs:range gn:inbredSet) + (gn-term:name rdfs:range rdfs:Literal) + (gn-term:fullName rdfs:range rdfs:Literal) + (gn-term:shortName rdfs:range rdfs:Literal) + (gn-term:createTime rdfs:range rdfs:Literal) (gn:phenotypeDataset rdf:subClassOf gn:dataset)) (triples - (ontology 'dataset: - (regexp-substitute/global #f "[^A-Za-z0-9:]" - (field PublishFreeze Name) - 'pre "_" 'post)) + (string->identifier + "" + (regexp-substitute/global #f "[^A-Za-z0-9:]" + (field PublishFreeze Name) + 'pre "_" 'post) + #:separator "" + #:proc string-capitalize-first) (set rdf:type 'gn:phenotypeDataset) - (set gn:name (field PublishFreeze Name)) - (set gn:fullName (field PublishFreeze FullName)) - (set gn:shortName (field PublishFreeze ShortName)) - (set dct:created (annotate-field + (set gn-term:name (field PublishFreeze Name)) + (set gn-term:fullName (field PublishFreeze FullName)) + (set gn-term:shortName (field PublishFreeze ShortName)) + (set dc-termt:created (annotate-field (field PublishFreeze CreateTime) '^^xsd:date)) - (set gn:datasetOfInbredSet + (set gn-term:datasetOfInbredSet (string->identifier "inbredSet" (field InbredSet Name InbredSetName))))) (define-dump dump-phenotypes @@ -59,48 +59,52 @@ (left-join InfoFiles "ON InfoFiles.InfoPageName = PublishFreeze.Name"))) (schema-triples (gn:phenotypeDataset rdfs:subPropertyOf gn:dataset)) - (triples (ontology 'phenotype: - (regexp-substitute/global #f "[^A-Za-z0-9:]" - (field ("CONCAT(IF(PublishFreeze.Name IS NULL, '', CONCAT(PublishFreeze.Name, ':')), IF(Phenotype.Post_publication_abbreviation IS NULL, IF(Phenotype.Pre_publication_abbreviation IS NULL, Phenotype.Id, Pre_publication_abbreviation), Phenotype.Post_publication_abbreviation))" abbrev)) - 'pre "_" 'post)) + (triples (string->identifier + "" + (regexp-substitute/global #f "[^A-Za-z0-9:]" + (field ("CONCAT(IF(PublishFreeze.Name IS NULL, '', CONCAT(PublishFreeze.Name, '_')), IF(Phenotype.Post_publication_abbreviation IS NULL, IF(Phenotype.Pre_publication_abbreviation IS NULL, Phenotype.Id, Pre_publication_abbreviation), Phenotype.Post_publication_abbreviation))" abbrev)) + 'pre "_" 'post) + #:separator "" + #:proc string-capitalize-first) (set rdf:type 'gn:phenotype) - (set gn:name (sanitize-rdf-string + (set gn-term:name (sanitize-rdf-string (field ("CAST(CONVERT(BINARY CONVERT(IF(Phenotype.Post_publication_abbreviation IS NULL, IF(Phenotype.Pre_publication_abbreviation IS NULL, Phenotype.Id, Phenotype.Pre_publication_abbreviation), Phenotype.Post_publication_abbreviation) USING latin1) USING utf8) AS VARCHAR(100))" PhenotypeName)))) ;; There is no row with an empty post-publication description so ;; use this field as the main publication description - (set gn:publicationDescription + (set gn-term:publicationDescription (sanitize-rdf-string (field ("CAST(CONVERT(BINARY CONVERT(Phenotype.Post_publication_description USING latin1) USING utf8) AS CHAR(10000))" postPubDescr)))) - (set gn:originalDescription (sanitize-rdf-string + (set gn-term:originalDescription (sanitize-rdf-string (delete-substrings (field Phenotype Original_description) "Original post publication description: "))) - (set gn:prePublicationDescription + (set gn-term:prePublicationDescription (sanitize-rdf-string (field ("CAST(CONVERT(BINARY CONVERT(Phenotype.Pre_publication_description USING latin1) USING utf8) AS VARCHAR(15000))" prePubDesc)))) - (set gn:prePublicationAbbreviation (sanitize-rdf-string (field Phenotype Pre_publication_abbreviation))) - (set gn:postPublicationAbbreviation (sanitize-rdf-string (field Phenotype Post_publication_abbreviation))) - (set gn:labCode (field Phenotype Lab_code)) - (set gn:submitter (sanitize-rdf-string (field Phenotype Submitter))) - (set gn:owner (sanitize-rdf-string (field Phenotype Owner))) - (set gn:mean (annotate-field (field ("IFNULL(PublishXRef.mean, '')" mean)) + (set gn-term:prePublicationAbbreviation (sanitize-rdf-string (field Phenotype Pre_publication_abbreviation))) + (set gn-term:postPublicationAbbreviation (sanitize-rdf-string (field Phenotype Post_publication_abbreviation))) + (set gn-term:labCode (field Phenotype Lab_code)) + (set gn-term:submitter (sanitize-rdf-string (field Phenotype Submitter))) + (set gn-term:owner (sanitize-rdf-string (field Phenotype Owner))) + (set gn-term:mean (annotate-field (field ("IFNULL(PublishXRef.mean, '')" mean)) '^^xsd:double)) - (set gn:locus (field PublishXRef Locus)) - (set gn:LRS (annotate-field (field ("IFNULL(PublishXRef.LRS, '')" lrs)) '^^xsd:float)) - (set gn:additive (annotate-field (field ("IFNULL(PublishXRef.additive, '')" additive)) '^^xsd:decimal)) - (set gn:sequence (annotate-field (field PublishXRef Sequence) '^^xsd:int)) - (set gn:phenotypeOfDataset - (ontology 'dataset: - (regexp-substitute/global - #f "[^A-Za-z0-9:]" - (field ("IFNULL(InfoFiles.InfoPageName, IFNULL(PublishFreeze.Name, ''))" DatasetName)) - 'pre "_" 'post))) - (set gn:phenotypeOfPublication + (set gn-term:locus (field PublishXRef Locus)) + (set gn-term:LRS (annotate-field (field ("IFNULL(PublishXRef.LRS, '')" lrs)) '^^xsd:float)) + (set gn-term:additive (annotate-field (field ("IFNULL(PublishXRef.additive, '')" additive)) '^^xsd:decimal)) + (set gn-term:sequence (annotate-field (field PublishXRef Sequence) '^^xsd:int)) + (set gn-term:phenotypeOfDataset + (string->identifier + "" + (field + ("IFNULL(InfoFiles.InfoPageName, IFNULL(PublishFreeze.Name, ''))" DatasetName)) + #:separator "" + #:proc string-capitalize-first)) + (set gn-term:phenotypeOfPublication (let ((pmid (field ("IF(Publication.PubMed_ID IS NULL, '', CONVERT(Publication.PubMed_Id, INT))" pmid))) @@ -108,7 +112,7 @@ (if (string-null? pmid) (string->identifier "unpublished" (number->string publication-id)) - (ontology 'publication: pmid)))))) + (ontology 'pubmed: pmid)))))) (dump-with-documentation @@ -116,17 +120,15 @@ (connection %connection-settings) (table-metadata? #f) (prefixes - '(("gn-id:" "") + '(("gn:" "") ("gn-term:" "") - ("phenotype:" "") ("rdf:" "") ("rdfs:" "") ("xsd:" "") - ("dataset:" "") - ("publication:" ""))) + ("pubmed:" ""))) (inputs (list dump-publishfreeze - dump-phenotype)) + dump-phenotypes)) (outputs '(#:documentation "./docs/dump-phenotype.md" #:rdf "./verified-data/dump-phenotype.ttl"))) -- cgit v1.2.3 From d7d1bef8c6dd18e2dfe8e48b7a23efdb640b1eaf Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Wed, 19 Jul 2023 17:43:13 +0300 Subject: Dump genotypes with the new syntax Signed-off-by: Munyoki Kilyungi --- examples/dump-genotype.scm | 119 +++++++++++++++++++++++---------------------- 1 file changed, 61 insertions(+), 58 deletions(-) diff --git a/examples/dump-genotype.scm b/examples/dump-genotype.scm index 1be1d34..0fbbbfe 100755 --- a/examples/dump-genotype.scm +++ b/examples/dump-genotype.scm @@ -18,9 +18,6 @@ (call-with-input-file (list-ref (command-line) 1) read)) -(define %dump-directory - (list-ref (command-line) 2)) - (define-dump dump-genofreeze @@ -29,24 +26,30 @@ (left-join InbredSet "ON GenoFreeze.InbredSetId = InbredSet.InbredSetId")) "WHERE GenoFreeze.public > 0 AND GenoFreeze.confidentiality < 1 AND InfoFiles.InfoPageName IS NULL") (schema-triples - (gn:datasetOfInbredSet rdfs:range gn:inbredSet) + (gn-term:datasetOfInbredSet rdfs:range gn:inbredSet) (gn:genotypeDataset rdfs:subPropertyOf gn:dataset) - (gn:shortName rdfs:range rdfs:Literal)) - (triples (ontology - 'dataset: - (regexp-substitute/global - #f "[^A-Za-z0-9:]" - (field GenoFreeze Name) - 'pre "_" 'post)) + (gn-term:shortName rdfs:range rdfs:Literal)) + (triples + (string->identifier + "" + (regexp-substitute/global + #f "[^A-Za-z0-9:]" + (regexp-substitute/global + #f "[^A-Za-z0-9:]" + (field GenoFreeze Name) + 'pre "_" 'post) + 'pre "_" 'post) + #:separator "" + #:proc string-capitalize-first) (set rdf:type 'gn:genotypeDataset) - (set gn:name (field GenoFreeze Name)) - (set gn:fullName (field GenoFreeze FullName)) - (set gn:shortName (field GenoFreeze ShortName)) + (set gn-term:name (field GenoFreeze Name)) + (set gn-term:fullName (field GenoFreeze FullName)) + (set gn-term:shortName (field GenoFreeze ShortName)) (set dct:created (annotate-field (field GenoFreeze CreateTime) '^^xsd:date)) - (set gn:datasetOfInbredSet - (string->identifier "inbredSet" (field InbredSet Name InbredSetName))))) + (set gn-term:datasetOfInbredSet + (string->identifier "" (field InbredSet Name InbredSetName))))) (define-dump dump-genotypes (tables (Geno @@ -54,60 +57,60 @@ (left-join GenoFreeze "ON GenoFreeze.Id = GenoXRef.GenoFreezeId") (left-join InfoFiles "ON InfoFiles.InfoPageName = GenoFreeze.Name"))) (schema-triples - (gn:genotypeDataset rdfs:subPropertyOf gn:dataset)) + (gn:genotype rdfs:range rdfs:Literal) + (gn-term:genotypeDataset rdfs:subPropertyOf gn:dataset)) (triples - (ontology - 'genotype: + (string->identifier + "" (regexp-substitute/global #f "[^A-Za-z0-9:]" (field ("CONCAT(IF(GenoFreeze.Name IS NULL, '', CONCAT(GenoFreeze.Name, ':')), Geno.Name)" abbrev)) - 'pre "_" 'post)) + 'pre "_" 'post) + #:separator "" + #:proc string-capitalize-first) (set rdf:type 'gn:genotype) - (set gn:name (sanitize-rdf-string (field Geno Name))) - (set gn:markerName (sanitize-rdf-string (field Geno Marker_Name))) - (set gn:chr (field Geno Chr)) - (set gn:mb (annotate-field (field ("IFNULL(Geno.Mb, '')" Mb)) '^^xsd:double)) - (set gn:sequence (annotate-field (field Geno Sequence) '^^xsd:int)) - (set gn:source (field Geno Source)) - (set gn:source2 (field Geno Source2)) - (set gn:genotypeOfDataset - (ontology 'dataset: - (regexp-substitute/global - #f "[^A-Za-z0-9:]" - (field ("IFNULL(GenoFreeze.Name, '')" DatasetName)) - 'pre "_" 'post))) - (set gn:chrNum + (set gn-term:name (sanitize-rdf-string (field Geno Name))) + (set gn-term:markerName (sanitize-rdf-string (field Geno Marker_Name))) + (set gn-term:chr (field Geno Chr)) + (set gn-term:mb (annotate-field (field ("IFNULL(Geno.Mb, '')" Mb)) '^^xsd:double)) + (set gn-term:sequence (field Geno Sequence)) + (set gn-term:source (field Geno Source)) + (set gn-term:source2 (field Geno Source2)) + (set gn-term:genotypeOfDataset + (string->identifier + "" + (regexp-substitute/global + #f "[^A-Za-z0-9:]" + (field ("IFNULL(GenoFreeze.Name, '')" DatasetName)) + 'pre "_" 'post) + #:separator "" + #:proc string-capitalize-first) + ) + (set gn-term:chrNum (annotate-field (field ("IFNULL(Geno.chr_num, '')" chr_num)) '^^xsd:int)) (set gn:comments (field ("CAST(CONVERT(BINARY CONVERT(Geno.Comments USING latin1) USING utf8) AS VARCHAR(255))" Comments))) - (set gn:cM + (set gn-term:cM (annotate-field (field ("IFNULL(GenoXRef.cM, '')" Chr_mm8)) '^^xsd:int)))) -(call-with-target-database - %connection-settings - (lambda (db) - (with-output-to-file (string-append %dump-directory "dump-genotype.ttl") - (lambda () - (prefix "dct:" "") - (prefix "foaf:" "") - (prefix "generif:" "") - (prefix "gn:" "") - (prefix "owl:" "") - (prefix "phenotype:" "") - (prefix "pubmed:" "") - (prefix "rdf:" "") - (prefix "rdfs:" "") - (prefix "uniprot:" "") - (prefix "up:" "") - (prefix "xsd:" "") - (prefix "genotype:" "") - (prefix "dataset:" "") - (newline) - (dump-genofreeze db) - (dump-genotypes db)) - #:encoding "utf8"))) +(dump-with-documentation + (name "Genotype Metadata") + (connection %connection-settings) + (table-metadata? #f) + (prefixes + '(("gn:" "") + ("gn-term:" "") + ("rdf:" "") + ("rdfs:" "") + ("xsd:" ""))) + (inputs + (list dump-genofreeze + dump-genotypes)) + (outputs + '(#:documentation "./docs/dump-genotype.md" + #:rdf "./verified-data/dump-genotype.ttl"))) -- cgit v1.2.3 From 0036a4f63fa3bef6eea95fe635eb23d4dc070727 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Wed, 19 Jul 2023 20:14:02 +0300 Subject: Dump probeset-metadata using the new syntax Signed-off-by: Munyoki Kilyungi --- examples/dump-probeset-metadata.scm | 65 ++++++++++++++----------------------- 1 file changed, 24 insertions(+), 41 deletions(-) diff --git a/examples/dump-probeset-metadata.scm b/examples/dump-probeset-metadata.scm index b0c4853..6da1eb0 100755 --- a/examples/dump-probeset-metadata.scm +++ b/examples/dump-probeset-metadata.scm @@ -16,9 +16,6 @@ (call-with-input-file (list-ref (command-line) 1) read)) -(define %dump-directory - (list-ref (command-line) 2)) - (define-dump dump-probeset-metadata (tables (ProbeSetXRef @@ -27,14 +24,14 @@ "WHERE ProbeSetFreeze.public > 0 AND ProbeSetFreeze.confidentiality < 1") (schema-triples (gn:probesetData rdfs:range gn:probeset) - (gn:hasProbeset rdfs:range rdfs:Literal)) + (gn-term:hasProbeset rdfs:range rdfs:Literal)) (triples (string->identifier "probesetData" (field ("CONCAT(ProbeSetFreeze.Name,':',IFNULL(ProbeSet.Name, ProbeSet.Id))" ProbeSetName))) (set rdf:type 'gn:probesetData) - (set gn:hasProbeset + (set gn-term:hasProbeset (ontology 'probeset: (regexp-substitute/global @@ -42,66 +39,52 @@ (field ("IFNULL(ProbeSet.Name, ProbeSet.Id)" name)) 'pre "_" 'post))) - (set gn:probesetOfDataset + (set gn-term:probesetOfDataset (ontology 'probeset: (regexp-substitute/global #f "[^A-Za-z0-9:]" (field ProbeSetFreeze Name) 'pre "_" 'post))) - (set gn:mean + (set gn-term:mean (annotate-field (field ("IFNULL(ProbeSetXRef.mean, '')" mean)) '^^xsd:double)) - (set gn:se + (set gn-term:se (annotate-field (field ("IFNULL(ProbeSetXRef.se, '')" se)) '^^xsd:double)) - (set gn:locus (field ProbeSetXRef Locus)) + (set gn-term:locus (field ProbeSetXRef Locus)) (set gn:LRS (annotate-field (field ("IFNULL(ProbeSetXRef.LRS, '')" LRS)) '^^xsd:double)) - (set gn:pValue + (set gn-term:pValue (annotate-field (field ("IFNULL(ProbeSetXRef.pValue, '')" pValue)) '^^xsd:double)) - (set gn:additive + (set gn-term:additive (annotate-field (field ("IFNULL(ProbeSetXRef.additive, '')" additive)) '^^xsd:double)) - (set gn:h2 + (set gn-term:h2 (annotate-field (field ("IFNULL(ProbeSetXRef.h2, '')" h2)) '^^xsd:float)))) -(call-with-target-database - %connection-settings - (lambda (db) - (with-output-to-file (string-append %dump-directory "dump-probeset-metadata.ttl") - (lambda () - (prefix "chebi:" "") - (prefix "dct:" "") - (prefix "foaf:" "") - (prefix "generif:" "") - (prefix "gn:" "") - (prefix "hgnc:" "") - (prefix "homologene:" "") - (prefix "kegg:" "") - (prefix "molecularTrait:" "") - (prefix "nuccore:" "") - (prefix "omim:" "") - (prefix "owl:" "") - (prefix "phenotype:" "") - (prefix "pubchem:" "") - (prefix "pubmed:" "") - (prefix "rdf:" "") - (prefix "rdfs:" "") - (prefix "uniprot:" "") - (prefix "up:" "") - (prefix "xsd:" "") - (prefix "probeset:" "") - (newline) - (dump-probeset-metadata db)) - #:encoding "utf8"))) +(dump-with-documentation + (name "Probeset Metadata") + (connection %connection-settings) + (table-metadata? #f) + (prefixes + '(("gn:" "") + ("gn-term:" "") + ("rdf:" "") + ("rdfs:" "") + ("xsd:" ""))) + (inputs + (list dump-probeset-metadata)) + (outputs + '(#:documentation "./docs/dump-probeset-metadata.md" + #:rdf "./verified-data/dump-probeset-metadata.ttl"))) -- cgit v1.2.3 From 16ebe166618b7e36d92bcc6c3e497dcfa188ce90 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Wed, 19 Jul 2023 20:23:58 +0300 Subject: Dump probesetfreeze metadata using new metadata --- examples/dump-probesetfreeze.scm | 77 +++++++++++++++++++--------------------- 1 file changed, 36 insertions(+), 41 deletions(-) diff --git a/examples/dump-probesetfreeze.scm b/examples/dump-probesetfreeze.scm index 0be81ac..a45fd0a 100755 --- a/examples/dump-probesetfreeze.scm +++ b/examples/dump-probesetfreeze.scm @@ -16,18 +16,15 @@ (call-with-input-file (list-ref (command-line) 1) read)) -(define %dump-directory - (list-ref (command-line) 2)) - (define-dump dump-gene-chip (tables (GeneChip)) (schema-triples - (gn:name rdfs:range rdfs:Literal)) + (gn-term:name rdfs:range rdfs:Literal)) (triples (string->identifier "platform" (field GeneChip Name)) (set rdf:type 'gn:platform) - (set gn:name (field GeneChip GeneChipName)) - (set gn:geoPlatform + (set gn-term:name (field GeneChip GeneChipName)) + (set gn-term:geoPlatform (ontology 'geoSeries: (string-trim-both (field GeneChip GeoPlatform)))))) @@ -41,48 +38,46 @@ (left-join Tissue "ON ProbeFreeze.TissueId = Tissue.TissueId")) "WHERE ProbeSetFreeze.public > 0 AND InfoFiles.InfoPageName IS NULL GROUP BY ProbeFreeze.Id") (schema-triples - (gn:avgMethod rdfs:range rdfs:Literal) - (gn:dataScale rdfs:range rdfs:Literal) + (gn-term:avgMethod rdfs:range rdfs:Literal) + (gn-term:dataScale rdfs:range rdfs:Literal) (gn:probesetDataset rdf:subClassOf gn:dataset)) (triples - (ontology 'probeset: - (regexp-substitute/global #f "[^A-Za-z0-9:]" - (field ProbeSetFreeze Name) - 'pre "_" 'post)) + (string->identifier + "" + (regexp-substitute/global + #f "[^A-Za-z0-9:]" + (field ProbeSetFreeze Name) + 'pre "_" 'post) + #:separator "" + #:proc string-capitalize-first) (set rdf:type 'gn:probesetDataset) - (set gn:avgMethod (string->identifier "avgmethod" (field AvgMethod Name))) - (set gn:fullName (field ProbeSetFreeze FullName)) - (set gn:shortName (field ProbeSetFreeze ShortName)) + (set gn-term:avgMethod (string->identifier "avgmethod" (field AvgMethod Name))) + (set gn-term:fullName (field ProbeSetFreeze FullName)) + (set gn-term:shortName (field ProbeSetFreeze ShortName)) (set dct:created (annotate-field (field ProbeSetFreeze CreateTime) '^^xsd:datetime)) - (set gn:dataScale (field ProbeSetFreeze DataScale)) - (set gn:tissueName (string->identifier "tissue" (field Tissue Short_Name))) - (set gn:datasetOfInbredSet + (set gn-term:dataScale (field ProbeSetFreeze DataScale)) + (set gn-term:tissueName (string->identifier "tissue" (field Tissue Short_Name))) + (set gn-term:datasetOfInbredSet (string->identifier "inbredSet" (field InbredSet Name InbredSetName))))) -(call-with-target-database - %connection-settings - (lambda (db) - (with-output-to-file (string-append %dump-directory "dump-probesetfreeze.ttl") - (lambda () - (prefix "dct:" "") - (prefix "foaf:" "") - (prefix "generif:" "") - (prefix "geoSeries:" "") - (prefix "gn:" "") - (prefix "owl:" "") - (prefix "phenotype:" "") - (prefix "pubmed:" "") - (prefix "rdf:" "") - (prefix "rdfs:" "") - (prefix "uniprot:" "") - (prefix "up:" "") - (prefix "xsd:" "") - (prefix "probeset:" "") - (newline) - (dump-gene-chip db) - (dump-probesetfreeze db)) - #:encoding "utf8"))) +(dump-with-documentation + (name "Probeset freeze metadata") + (connection %connection-settings) + (table-metadata? #f) + (prefixes + '(("geoSeries:" "") + ("gn:" "") + ("gn-term:" "") + ("rdf:" "") + ("rdfs:" "") + ("xsd:" ""))) + (inputs + (list dump-gene-chip + dump-probesetfreeze)) + (outputs + '(#:documentation "./docs/dump-gene-chip.md" + #:rdf "./verified-data/dump-probesetfreeze.ttl"))) -- cgit v1.2.3 From 1b72a21848524806411ea55ba5e7be2657ddc8cc Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Fri, 21 Jul 2023 12:54:47 +0300 Subject: Dump prefixes correctly when auto-generating docs Signed-off-by: Munyoki Kilyungi --- dump/special-forms.scm | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/dump/special-forms.scm b/dump/special-forms.scm index 39c8c80..1c0b15c 100644 --- a/dump/special-forms.scm +++ b/dump/special-forms.scm @@ -588,7 +588,13 @@ The above query results to triples that have the form: (format #t "# ~a" name) (for-each (lambda (proc) - (proc db #f #f prefix-thunk)) + (proc db #f #f + (lambda () (for-each + (match-lambda + ((k v) + (begin + (prefix k v #f)))) + prefixes)))) inputs)) #:encoding "utf8") -- cgit v1.2.3 From 76f532d492e37f51646fea3f893e3cecc32f2121 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Fri, 21 Jul 2023 14:29:33 +0300 Subject: Make define-dump take extra args as key Signed-off-by: Munyoki Kilyungi --- dump/special-forms.scm | 99 +++++++++++++++++++++++++------------------------- 1 file changed, 49 insertions(+), 50 deletions(-) diff --git a/dump/special-forms.scm b/dump/special-forms.scm index 1c0b15c..fe9dbe6 100644 --- a/dump/special-forms.scm +++ b/dump/special-forms.scm @@ -395,19 +395,10 @@ must be remedied." #'(schema-triples))) ((triples subject predicate-clauses ...) (triples) (find-clause #'(clauses ...) 'triples))) - #`(define* (name db - #:optional + #`(define* (name db #:key (dump-metadata? #f) (dump-data? #t) (dump-documentation? #f)) - (when dump-data? - #,(syntax-case #'schema-triples-clause (schema-triples) - ((schema-triples (triple-subject triple-predicate triple-object) ...) - #`(for-each triple - (list 'triple-subject ...) - (list 'triple-predicate ...) - (list 'triple-object ...))) - (_ (error "Invalid schema triples clause:" #'schema-triples-clause)))) (when dump-metadata? #,@(let ((dump-table (symbol->string (syntax->datum #'primary-table))) (subject-type (any (lambda (predicate) @@ -529,6 +520,13 @@ The above query results to triples that have the form: tables-raw ...))) (format #t "```~%~%")) (when dump-data? + #,(syntax-case #'schema-triples-clause (schema-triples) + ((schema-triples (triple-subject triple-predicate triple-object) ...) + #`(for-each triple + (list 'triple-subject ...) + (list 'triple-predicate ...) + (list 'triple-object ...))) + (_ (error "Invalid schema triples clause:" #'schema-triples-clause))) (sql-for-each (lambda (row) (scm->triples (map-alist row #,@(field->key #'(predicate-clauses ...))) @@ -571,43 +569,44 @@ The above query results to triples that have the form: (set! outputs o))) (list (list 'key value) ...)) (let ((rdf-path (get-keyword-value outputs #:rdf "")) - (doc-path (get-keyword-value outputs #:documentation "")) - (prefix-thunk (lambda () (for-each - (match-lambda - ((k v) - (begin - (prefix k v)))) - prefixes)))) - ;; Dumping the documentation first - (call-with-target-database - connection - (lambda (db) - (with-output-to-file ; - doc-path - (lambda () - (format #t "# ~a" name) - (for-each - (lambda (proc) - (proc db #f #f - (lambda () (for-each - (match-lambda - ((k v) - (begin - (prefix k v #f)))) - prefixes)))) - inputs)) - #:encoding "utf8") - - ;; Dumping the actual data - (with-output-to-file - rdf-path - (lambda () - ;; Add the prefixes - (prefix-thunk) - (newline) - (for-each - (lambda (proc) - (proc db #f #t #f)) - inputs)) - #:encoding "utf8")))))))) - + (doc-path (get-keyword-value outputs #:documentation ""))) + ;; Dumping the documentation first + (call-with-target-database + connection + (lambda (db) + (with-output-to-file ; + doc-path + (lambda () + (format #t "# ~a" name) + (for-each + (lambda (proc) + (proc db + #:dump-metadata? #f + #:dump-data? #f + #:dump-documentation? + (lambda () (for-each + (match-lambda + ((k v) + (begin + (prefix k v #f)))) + prefixes)))) + inputs)) + #:encoding "utf8") + + ;; Dumping the actual data + (with-output-to-file + rdf-path + (lambda () + ;; Add the prefixes + (for-each + (match-lambda + ((k v) + (begin + (prefix k v)))) + prefixes) + (newline) + (for-each + (lambda (proc) + (proc db #:dump-metadata? table-metadata?)) + inputs)) + #:encoding "utf8")))))))) -- cgit v1.2.3 From 0c0e80d850ae4ff72e02778afca64778421ba72c Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Fri, 21 Jul 2023 14:30:06 +0300 Subject: Update phenotype dump --- examples/dump-phenotype.scm | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/examples/dump-phenotype.scm b/examples/dump-phenotype.scm index 924ec9a..1ef498d 100755 --- a/examples/dump-phenotype.scm +++ b/examples/dump-phenotype.scm @@ -31,7 +31,6 @@ (gn-term:name rdfs:range rdfs:Literal) (gn-term:fullName rdfs:range rdfs:Literal) (gn-term:shortName rdfs:range rdfs:Literal) - (gn-term:createTime rdfs:range rdfs:Literal) (gn:phenotypeDataset rdf:subClassOf gn:dataset)) (triples (string->identifier @@ -45,7 +44,7 @@ (set gn-term:name (field PublishFreeze Name)) (set gn-term:fullName (field PublishFreeze FullName)) (set gn-term:shortName (field PublishFreeze ShortName)) - (set dc-termt:created (annotate-field + (set dct:created (annotate-field (field PublishFreeze CreateTime) '^^xsd:date)) (set gn-term:datasetOfInbredSet @@ -58,7 +57,20 @@ (left-join PublishFreeze "ON PublishFreeze.InbredSetId = PublishXRef.InbredSetId") (left-join InfoFiles "ON InfoFiles.InfoPageName = PublishFreeze.Name"))) (schema-triples - (gn:phenotypeDataset rdfs:subPropertyOf gn:dataset)) + (gn:phenotypeDataset rdfs:subPropertyOf gn:dataset) + (gn-term:publicationDescription rdfs:range rdfs:Literal) + (gn-term:originalDescription rdfs:range rdfs:Literal) + (gn-term:prePublicationDescription rdfs:range rdfs:Literal) + (gn-term:postPublicationAbbreviation rdfs:range rdfs:Literal) + (gn-term:labCode rdfs:range rdfs:Literal) + (gn-term:submitter rdfs:range rdfs:Literal) + (gn-term:owner rdfs:range rdfs:Literal) + (gn-term:mean rdfs:range xsd:double) + (gn-term:LRS rdfs:range xsd:float) + (gn-term:locus rdfs:range rdfs:Literal) + (gn-term:additive rdfs:range xsd:decimal) + (gn-term:sequence rdfs:range rdfs:Literal) + (gn-term:phenotypeOfPublication rdfs:range gn-term:pubMedId)) (triples (string->identifier "" (regexp-substitute/global #f "[^A-Za-z0-9:]" @@ -120,7 +132,8 @@ (connection %connection-settings) (table-metadata? #f) (prefixes - '(("gn:" "") + '(("dct:" "") + ("gn:" "") ("gn-term:" "") ("rdf:" "") ("rdfs:" "") -- cgit v1.2.3 From 1dea579f74cad817b5dcb92de73e3136a7058549 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Fri, 21 Jul 2023 14:30:36 +0300 Subject: Add dct: prefix to genotype dump Signed-off-by: Munyoki Kilyungi --- examples/dump-genotype.scm | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/dump-genotype.scm b/examples/dump-genotype.scm index 0fbbbfe..d97b7e5 100755 --- a/examples/dump-genotype.scm +++ b/examples/dump-genotype.scm @@ -103,7 +103,8 @@ (connection %connection-settings) (table-metadata? #f) (prefixes - '(("gn:" "") + '(("dct:" "") + ("gn:" "") ("gn-term:" "") ("rdf:" "") ("rdfs:" "") -- cgit v1.2.3 From e4e251ea2ac3afe2a3333b950b738fe0f68981bb Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Fri, 21 Jul 2023 14:38:03 +0300 Subject: Update wikidata metadata Signed-off-by: Munyoki Kilyungi --- schema/species.ttl | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/schema/species.ttl b/schema/species.ttl index 142a831..a7f33a4 100644 --- a/schema/species.ttl +++ b/schema/species.ttl @@ -3,17 +3,17 @@ @prefix rdf: . @prefix wd: . -@prefix gn: . +@prefix gn: . -gn:species_arabidopsis_thaliana rdf:isDefinedBy wd:Q158695 . -gn:species_bat__glossophaga_soricina_ rdf:isDefinedBy wd:Q304929 . -gn:species_fly__drosophila_melanogaster_dm6_ rdf:isDefinedBy wd:Q130888 . -gn:species_glycine_max rdf:isDefinedBy wd:Q11006 . -gn:species_homo_sapiens rdf:isDefinedBy wd:Q15978631 . -gn:species_hordeum_vulgare rdf:isDefinedBy wd:Q11577 . -gn:species_macaca_mulatta rdf:isDefinedBy wd:Q177601 . -gn:species_mus_musculus rdf:isDefinedBy wd:Q83310 . -gn:species_oryzias_latipes__japanese_medaka_ rdf:isDefinedBy wd:Q1142975 . -gn:species_populus_trichocarpa rdf:isDefinedBy wd:Q149382 . -gn:species_rattus_norvegicus rdf:isDefinedBy wd:Q184224 . -gn:species_solanum_lycopersicum rdf:isDefinedBy wd:Q23501 . +gn:Arabidopsis_thaliana rdf:isDefinedBy wd:Q158695 . +gn:Bat__glossophaga_soricina_ rdf:isDefinedBy wd:Q304929 . +gn:Fly__drosophila_melanogaster_dm6_ rdf:isDefinedBy wd:Q130888 . +gn:Glycine_max rdf:isDefinedBy wd:Q11006 . +gn:Homo_sapiens rdf:isDefinedBy wd:Q15978631 . +gn:Hordeum_vulgare rdf:isDefinedBy wd:Q11577 . +gn:Macaca_mulatta rdf:isDefinedBy wd:Q177601 . +gn:Mus_musculus rdf:isDefinedBy wd:Q83310 . +gn:Oryzias_latipes__japanese_medaka_ rdf:isDefinedBy wd:Q1142975 . +gn:Populus_trichocarpa rdf:isDefinedBy wd:Q149382 . +gn:Rattus_norvegicus rdf:isDefinedBy wd:Q184224 . +gn:Solanum_lycopersicum rdf:isDefinedBy wd:Q23501 . -- cgit v1.2.3 From e3ee3ebd94b7d704d33321fa8adc65423db808d4 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Fri, 21 Jul 2023 17:41:30 +0300 Subject: Convert subject into a string if it's a symbol Signed-off-by: Munyoki Kilyungi --- dump/special-forms.scm | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dump/special-forms.scm b/dump/special-forms.scm index fe9dbe6..948ebba 100644 --- a/dump/special-forms.scm +++ b/dump/special-forms.scm @@ -467,7 +467,9 @@ The above query results to triples that have the form: (for-each (match-lambda ((predicate . object) (format #t "~a -> ~a -> ~a ~%" - #,(field->datum #'subject) + (if (symbol? #,(field->datum #'subject)) + (symbol->string #,(field->datum #'subject)) + #,(field->datum #'subject)) predicate (if (symbol? object) (symbol->string object) -- cgit v1.2.3 From 36e4e31556e6e09396393f3cef3c802bc7b460bb Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Fri, 21 Jul 2023 17:43:52 +0300 Subject: Update some minor formatting Signed-off-by: Munyoki Kilyungi --- dump/special-forms.scm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dump/special-forms.scm b/dump/special-forms.scm index 948ebba..7fde470 100644 --- a/dump/special-forms.scm +++ b/dump/special-forms.scm @@ -435,7 +435,7 @@ must be remedied." (_ (error "Invalid predicate clause:" predicate-clause)))) #'(predicate-clauses ...)))) (when dump-documentation? - (format #t "~%## '~a'~%~%" (syntax->datum #'name)) + (format #t "## '~a'~%~%" (syntax->datum #'name)) #,(syntax-case #'schema-triples-clause (schema-triples) ((schema-triples (triple-subject triple-predicate triple-object) ...) #`(begin -- cgit v1.2.3 From c0873959703e31c4728ad74910a672751502dbf5 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Fri, 21 Jul 2023 17:47:54 +0300 Subject: Replace "?s ?p ?o" with "*" Signed-off-by: Munyoki Kilyungi --- dump/special-forms.scm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dump/special-forms.scm b/dump/special-forms.scm index 7fde470..fce3a11 100644 --- a/dump/special-forms.scm +++ b/dump/special-forms.scm @@ -495,7 +495,7 @@ The above query results to triples that have the form: (if (< n 3) (length result) n))))) - (format #t "SELECT ?s ?p ?o WHERE { ~%") + (format #t "SELECT * WHERE { ~%") (for-each (match-lambda ((predicate . object) (match object -- cgit v1.2.3 From 5a2a7dc79c08997868c6644c20443263ac6c7fec Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Mon, 24 Jul 2023 16:30:29 +0300 Subject: Use correct URL for rdf prefix Signed-off-by: Munyoki Kilyungi --- examples/dump-species-metadata.scm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/dump-species-metadata.scm b/examples/dump-species-metadata.scm index 41d5847..008c3a0 100755 --- a/examples/dump-species-metadata.scm +++ b/examples/dump-species-metadata.scm @@ -124,7 +124,7 @@ (prefixes '(("gn:" "") ("gn-term:" "") - ("rdf:" "") + ("rdf:" "") ("rdfs:" "") ("taxon:" ""))) (inputs -- cgit v1.2.3 From 052bd28d234961e56a41412209bc7e990d8f8be7 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Mon, 24 Jul 2023 17:27:31 +0300 Subject: Replace set! mutations with an alist Signed-off-by: Munyoki Kilyungi --- dump/special-forms.scm | 112 ++++++++++++++++++++++--------------------------- 1 file changed, 49 insertions(+), 63 deletions(-) diff --git a/dump/special-forms.scm b/dump/special-forms.scm index fce3a11..ad3296e 100644 --- a/dump/special-forms.scm +++ b/dump/special-forms.scm @@ -549,66 +549,52 @@ The above query results to triples that have the form: (define-syntax dump-with-documentation (syntax-rules () ((_ (key value) ...) - (let ((name "") - (connection "") - (table-metadata? "") - (prefixes "") - (inputs "") - (outputs "")) - (for-each - (match-lambda - (('name n) - (set! name n)) - (('connection conn) - (set! connection conn)) - (('table-metadata? t-metadata?) - (set! table-metadata? t-metadata?)) - (('prefixes p) - (set! prefixes p)) - (('inputs i) - (set! inputs i)) - (('outputs o) - (set! outputs o))) - (list (list 'key value) ...)) - (let ((rdf-path (get-keyword-value outputs #:rdf "")) - (doc-path (get-keyword-value outputs #:documentation ""))) - ;; Dumping the documentation first - (call-with-target-database - connection - (lambda (db) - (with-output-to-file ; - doc-path - (lambda () - (format #t "# ~a" name) - (for-each - (lambda (proc) - (proc db - #:dump-metadata? #f - #:dump-data? #f - #:dump-documentation? - (lambda () (for-each - (match-lambda - ((k v) - (begin - (prefix k v #f)))) - prefixes)))) - inputs)) - #:encoding "utf8") - - ;; Dumping the actual data - (with-output-to-file - rdf-path - (lambda () - ;; Add the prefixes - (for-each - (match-lambda - ((k v) - (begin - (prefix k v)))) - prefixes) - (newline) - (for-each - (lambda (proc) - (proc db #:dump-metadata? table-metadata?)) - inputs)) - #:encoding "utf8")))))))) + (let* ((alist `((key . ,value) ...)) + (name (assoc-ref alist 'name)) + (connection (assoc-ref alist 'connection)) + (table-metadata? (assoc-ref alist 'table-metadata?)) + (prefixes (assoc-ref alist 'prefixes)) + (inputs (assoc-ref alist 'inputs)) + (outputs (assoc-ref alist 'outputs)) + (rdf-path (get-keyword-value outputs #:rdf "")) + (doc-path (get-keyword-value outputs #:documentation ""))) + (call-with-target-database + connection + (lambda (db) + (with-output-to-file ; + doc-path + (lambda () + (format #t "# ~a" name) + (for-each + (lambda (proc) + (proc db + #:dump-metadata? #f + #:dump-data? #f + #:dump-documentation? + (lambda () (for-each + (match-lambda + ((k v) + (begin + (prefix k v #f)))) + prefixes)))) + inputs)) + #:encoding "utf8") + + ;; Dumping the actual data + (with-output-to-file + rdf-path + (lambda () + ;; Add the prefixes + (for-each + (match-lambda + ((k v) + (begin + (prefix k v)))) + prefixes) + (newline) + (for-each + (lambda (proc) + (proc db #:dump-metadata? table-metadata?)) + inputs)) + #:encoding "utf8"))))))) + -- cgit v1.2.3 From 1981e6085b3c22755c90082cef997e9d95a5bc9b Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Wed, 26 Jul 2023 13:15:19 +0300 Subject: Add missing import * dump/strings.scm: Import (ice-9 textual-ports) Signed-off-by: Munyoki Kilyungi --- dump/strings.scm | 1 + 1 file changed, 1 insertion(+) diff --git a/dump/strings.scm b/dump/strings.scm index 282b4e1..e965f03 100644 --- a/dump/strings.scm +++ b/dump/strings.scm @@ -3,6 +3,7 @@ #:use-module (srfi srfi-19) #:use-module (ice-9 match) #:use-module (ice-9 string-fun) + #:use-module (ice-9 textual-ports) #:export (string-blank? time-unix->string string-blank? -- cgit v1.2.3 From b9a322a1e37c3e535b6e7bbe718d06ba0da85952 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Wed, 26 Jul 2023 13:25:32 +0300 Subject: Update wikidata species schema Signed-off-by: Munyoki Kilyungi --- schema/species.ttl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/schema/species.ttl b/schema/species.ttl index a7f33a4..6b3acaf 100644 --- a/schema/species.ttl +++ b/schema/species.ttl @@ -6,14 +6,14 @@ @prefix gn: . gn:Arabidopsis_thaliana rdf:isDefinedBy wd:Q158695 . -gn:Bat__glossophaga_soricina_ rdf:isDefinedBy wd:Q304929 . -gn:Fly__drosophila_melanogaster_dm6_ rdf:isDefinedBy wd:Q130888 . +gn:Glossophaga_soricina rdf:isDefinedBy wd:Q304929 . +gn:Drosophila_melanogaster_dm6 rdf:isDefinedBy wd:Q130888 . gn:Glycine_max rdf:isDefinedBy wd:Q11006 . gn:Homo_sapiens rdf:isDefinedBy wd:Q15978631 . gn:Hordeum_vulgare rdf:isDefinedBy wd:Q11577 . gn:Macaca_mulatta rdf:isDefinedBy wd:Q177601 . gn:Mus_musculus rdf:isDefinedBy wd:Q83310 . -gn:Oryzias_latipes__japanese_medaka_ rdf:isDefinedBy wd:Q1142975 . +gn:Japanese_medaka rdf:isDefinedBy wd:Q1142975 . gn:Populus_trichocarpa rdf:isDefinedBy wd:Q149382 . gn:Rattus_norvegicus rdf:isDefinedBy wd:Q184224 . gn:Solanum_lycopersicum rdf:isDefinedBy wd:Q23501 . -- cgit v1.2.3 From ddf6bd5b1016df4a4d8d4727852a7ed80cc90407 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Wed, 26 Jul 2023 13:32:16 +0300 Subject: Add new method "string->binomial-name" * dump/triples.scm: Export string->binomial-name. (string->binomial-name): New procedure. Signed-off-by: Munyoki Kilyungi --- dump/triples.scm | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/dump/triples.scm b/dump/triples.scm index 2b43d68..0206ba3 100644 --- a/dump/triples.scm +++ b/dump/triples.scm @@ -7,7 +7,8 @@ prefix triple scm->triples - annotate-field)) + annotate-field + string->binomial-name)) (define (annotate-field field schema) (let ([schema (cond ((symbol? schema) @@ -89,3 +90,19 @@ characters with an underscore and prefixing with gn:PREFIX." (else object)) (fn id predicate object)))) alist)) + +(define (string->binomial-name name) + (let ((binomial? + (string-match + "\\\(.+\\)" + name))) + (string->identifier + "" + (if binomial? + (regexp-substitute/global + #f "[^[:space:]A-Za-z0-9:]" + (match:substring binomial?) + 'pre "" 'post) + name) + #:separator "" + #:proc string-capitalize-first))) -- cgit v1.2.3 From e662d9054a1f753044cfd13bf8f6965062879e86 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Wed, 26 Jul 2023 13:36:03 +0300 Subject: Use "string->binomial-name" to id species Signed-off-by: Munyoki Kilyungi --- examples/dump-species-metadata.scm | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/examples/dump-species-metadata.scm b/examples/dump-species-metadata.scm index 008c3a0..77db764 100755 --- a/examples/dump-species-metadata.scm +++ b/examples/dump-species-metadata.scm @@ -26,9 +26,7 @@ (gn-term:binomialName rdfs:range rdfs:Literal) (gn-term:family rdfs:range rdfs:Literal)) (triples - (string->identifier "" (field Species FullName) - #:separator "" - #:proc string-capitalize-first) + (string->binomial-name (field Species FullName)) (set rdf:type 'gn:species) (set gn-term:name (field Species SpeciesName)) (set gn-term:displayName (field Species MenuName)) @@ -55,9 +53,7 @@ #:proc string-capitalize-first) (set rdf:type 'gn:strain) (set gn-term:strainOfSpecies - (string->identifier "" (field Species FullName) - #:separator "" - #:proc string-capitalize-first)) + (string->binomial-name (field Species FullName))) ;; Name, and maybe a second name (set gn-term:name (sanitize-rdf-string (field Strain Name))) (set gn-term:name2 (sanitize-rdf-string (field Strain Name2))) @@ -96,10 +92,8 @@ (set gn-term:inbredSetOfMappingMethod (field MappingMethod Name)) (set gn-term:inbredSetCode (field InbredSet InbredSetCode)) (set gn-term:inbredSetOfSpecies - (string->identifier "" (field Species FullName BinomialName) - #:ontology "gn:" - #:separator "" - #:proc string-capitalize-first)) + (string->binomial-name + (field Species FullName BinomialName))) (set gn-term:genotype (field ("IF ((SELECT PublishFreeze.Name FROM PublishFreeze WHERE PublishFreeze.InbredSetId = InbredSet.Id LIMIT 1) IS NOT NULL, 'Traits and Cofactors', '')" genotypeP))) (set gn-term:phenotype -- cgit v1.2.3 From 1d81a238403c29bb46fb2352505b05cf3c150787 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Wed, 26 Jul 2023 13:43:33 +0300 Subject: Replace "gn-term" with "gnt" prefix Signed-off-by: Munyoki Kilyungi --- examples/dump-dataset-metadata.scm | 138 ++++++++++++++++++------------------ examples/dump-genotype.scm | 36 +++++----- examples/dump-phenotype.scm | 76 ++++++++++---------- examples/dump-probeset-metadata.scm | 20 +++--- examples/dump-probeset.scm | 22 +++--- examples/dump-probesetfreeze.scm | 25 +++---- examples/dump-publication.scm | 34 ++++----- examples/dump-species-metadata.scm | 35 ++++----- examples/dump-tissue.scm | 6 +- 9 files changed, 197 insertions(+), 195 deletions(-) diff --git a/examples/dump-dataset-metadata.scm b/examples/dump-dataset-metadata.scm index c51364a..33e72fe 100755 --- a/examples/dump-dataset-metadata.scm +++ b/examples/dump-dataset-metadata.scm @@ -52,11 +52,11 @@ (foaf:givenName rdfs:range rdfs:Literal) (foaf:familyName rdfs:range rdfs:Literal) (foaf:homepage rdfs:range rdfs:Literal) - (gn-term:address rdfs:range rdfs:Literal) - (gn-term:city rdfs:range rdfs:Literal) - (gn-term:state rdfs:range rdfs:Literal) - (gn-term:zipCode rdfs:range rdfs:Literal) - (gn-term:country rdfs:range rdfs:Literal)) + (gnt:address rdfs:range rdfs:Literal) + (gnt:city rdfs:range rdfs:Literal) + (gnt:state rdfs:range rdfs:Literal) + (gnt:zipCode rdfs:range rdfs:Literal) + (gnt:country rdfs:range rdfs:Literal)) (triples (investigator-attributes->id (field Investigators FirstName) (field Investigators LastName) (field Investigators Email)) @@ -70,11 +70,11 @@ (set foaf:familyName (field ("CAST(CONVERT(BINARY CONVERT(LastName USING latin1) USING utf8) AS VARCHAR(100))" LastName))) (set foaf:homepage (field Investigators Url)) - (set gn-term:address (field Investigators Address)) - (set gn-term:city (field Investigators City)) - (set gn-term:state (field Investigators State)) - (set gn-term:zipCode (field Investigators ZipCode)) - (set gn-term:country (field Investigators Country)))) + (set gnt:address (field Investigators Address)) + (set gnt:city (field Investigators City)) + (set gnt:state (field Investigators State)) + (set gnt:zipCode (field Investigators ZipCode)) + (set gnt:country (field Investigators Country)))) (define-dump dump-info-files (tables (InfoFiles @@ -92,38 +92,38 @@ (left-join GeneChip "USING (GeneChipId)")) "WHERE GN_AccesionId IS NOT NULL") (schema-triples - (gn-term:dataset rdfs:range rdfs:Literal) - (gn-term:datasetOfInvestigator rdfs:domain gn:dataset) - (gn-term:datasetOfOrganization rdfs:domain gn:dataset) - (gn-term:datasetOfInvestigator rdfs:range foaf:Person) - (gn-term:datasetOfInbredSet rdfs:domain gn:dataset) - (gn-term:datasetOfInbredSet rdfs:range gn:inbredSet) - (gn-term:datasetOfSpecies rdfs:domain gn:dataset) - (gn-term:datasetOfSpecies rdfs:range gn:inbredSet) - (gn-term:datasetOfTissue rdfs:domain gn:dataset) - (gn-term:datasetOfTissue rdfs:range gn:tissue) - (gn-term:normalization rdfs:domain gn:dataset) - (gn-term:normalization rdfs:range gn:avgMethod) - (gn-term:datasetOfPlatform rdfs:domain gn:dataset) - (gn-term:datasetOfPlatform rdfs:range gn:geneChip) - (gn-term:accessionId rdfs:range rdfs:Literal) - (gn-term:datasetStatusName rdfs:range rdfs:Literal) - (gn-term:summary rdfs:range rdfs:Literal) - (gn-term:aboutTissue rdfs:range rdfs:Literal) - (gn-term:geoSeries rdfs:range rdfs:Literal) - (gn-term:name rdfs:range rdfs:Literal) - (gn-term:title rdfs:range rdfs:Literal) - (gn-term:publicationTitle rdfs:range rdfs:Literal) - (gn-term:specifics rdfs:range rdfs:Literal) - (gn-term:datasetGroup rdfs:range rdfs:Literal) - (gn-term:aboutCases rdfs:range rdfs:Literal) - (gn-term:aboutPlatform rdfs:range rdfs:Literal) - (gn-term:aboutDataProcessing rdfs:range rdfs:Literal) - (gn-term:notes rdfs:range rdfs:Literal) - (gn-term:experimentDesign rdfs:range rdfs:Literal) - (gn-term:contributors rdfs:range rdfs:Literal) - (gn-term:citation rdfs:range rdfs:Literal) - (gn-term:acknowledgment rdfs:range rdfs:Literal)) + (gnt:dataset rdfs:range rdfs:Literal) + (gnt:datasetOfInvestigator rdfs:domain gn:dataset) + (gnt:datasetOfOrganization rdfs:domain gn:dataset) + (gnt:datasetOfInvestigator rdfs:range foaf:Person) + (gnt:datasetOfInbredSet rdfs:domain gn:dataset) + (gnt:datasetOfInbredSet rdfs:range gn:inbredSet) + (gnt:datasetOfSpecies rdfs:domain gn:dataset) + (gnt:datasetOfSpecies rdfs:range gn:inbredSet) + (gnt:datasetOfTissue rdfs:domain gn:dataset) + (gnt:datasetOfTissue rdfs:range gn:tissue) + (gnt:normalization rdfs:domain gn:dataset) + (gnt:normalization rdfs:range gn:avgMethod) + (gnt:datasetOfPlatform rdfs:domain gn:dataset) + (gnt:datasetOfPlatform rdfs:range gn:geneChip) + (gnt:accessionId rdfs:range rdfs:Literal) + (gnt:datasetStatusName rdfs:range rdfs:Literal) + (gnt:summary rdfs:range rdfs:Literal) + (gnt:aboutTissue rdfs:range rdfs:Literal) + (gnt:geoSeries rdfs:range rdfs:Literal) + (gnt:name rdfs:range rdfs:Literal) + (gnt:title rdfs:range rdfs:Literal) + (gnt:publicationTitle rdfs:range rdfs:Literal) + (gnt:specifics rdfs:range rdfs:Literal) + (gnt:datasetGroup rdfs:range rdfs:Literal) + (gnt:aboutCases rdfs:range rdfs:Literal) + (gnt:aboutPlatform rdfs:range rdfs:Literal) + (gnt:aboutDataProcessing rdfs:range rdfs:Literal) + (gnt:notes rdfs:range rdfs:Literal) + (gnt:experimentDesign rdfs:range rdfs:Literal) + (gnt:contributors rdfs:range rdfs:Literal) + (gnt:citation rdfs:range rdfs:Literal) + (gnt:acknowledgment rdfs:range rdfs:Literal)) (triples (string->identifier "" (regexp-substitute/global #f "[^A-Za-z0-9:]" (field InfoFiles InfoPageName) @@ -133,84 +133,84 @@ (set rdf:type (string->symbol (field ("IF(GenoFreeze.Id IS NOT NULL, 'gn:genotypeDataset', IF(PublishFreeze.Id IS NOT NULL, 'gn:phenotypeDataset', 'gn:dataset'))" rdfType)))) - (set gn-term:name (regexp-substitute/global + (set gnt:name (regexp-substitute/global #f "^[Nn]one$" (field InfoFiles InfoPageName) "")) - (set gn-term:fullName + (set gnt:fullName (field ("IFNULL(GenoFreeze.FullName, IFNULL(PublishFreeze.FullName, ''))" DatasetFullName))) (set dct:created (field ("IFNULL(GenoFreeze.CreateTime, IFNULL(PublishFreeze.CreateTime, IFNULL(ProbeSetFreeze.CreateTime, '')))" createTimeGenoFreeze))) - (set gn-term:datasetOfInvestigator + (set gnt:datasetOfInvestigator (investigator-attributes->id (field Investigators FirstName) (field Investigators LastName) (field Investigators Email))) - (set gn-term:datasetOfOrganization + (set gnt:datasetOfOrganization (field ("CAST(CONVERT(BINARY CONVERT(Organizations.OrganizationName USING latin1) USING utf8) AS VARCHAR(1500))" Organizations))) - (set gn-term:accessionId (format #f "GN~a" (field InfoFiles GN_AccesionId))) - (set gn-term:datasetStatusName (string-downcase + (set gnt:accessionId (format #f "GN~a" (field InfoFiles GN_AccesionId))) + (set gnt:datasetStatusName (string-downcase (field DatasetStatus DatasetStatusName))) - (set gn-term:datasetOfInbredSet + (set gnt:datasetOfInbredSet (string->identifier "inbredSet" (field InbredSet Name InbredSetName))) - (set gn-term:datasetOfTissue (string->identifier "tissue" + (set gnt:datasetOfTissue (string->identifier "tissue" (field Tissue Short_Name))) - (set gn-term:normalization + (set gnt:normalization (string->identifier "avgmethod" ;; If AvgMethodName is NULL, assume N/A. (if (string-blank? (field AvgMethod Name AvgMethodName)) "N/A" (field AvgMethod Name AvgMethodName)))) - (set gn-term:datasetOfPlatform + (set gnt:datasetOfPlatform (string->identifier "platform" (field GeneChip Name GeneChip))) - (set gn-term:summary + (set gnt:summary (sanitize-rdf-string (field Datasets Summary))) - (set gn-term:aboutTissue + (set gnt:aboutTissue (sanitize-rdf-string (field Datasets AboutTissue))) - (set gn-term:geoSeries + (set gnt:geoSeries (let ((s (string-match "GSE[0-9]*" (field ("IFNULL(Datasets.GeoSeries, '')" GeoSeries))))) (if s (ontology 'geoSeries: (match:substring s)) ""))) - (set gn-term:title + (set gnt:title (regexp-substitute/global #f "^[Nn]one$" (field InfoFiles InfoFileTitle) "")) - (set gn-term:publicationTitle + (set gnt:publicationTitle (regexp-substitute/global #f "^[Nn]one$" (field Datasets PublicationTitle) "")) - (set gn-term:specifics (sanitize-rdf-string (field InfoFiles Specifics))) - (set gn-term:datasetGroup (field Datasets DatasetName DatasetGroup)) - (set gn-term:aboutCases + (set gnt:specifics (sanitize-rdf-string (field InfoFiles Specifics))) + (set gnt:datasetGroup (field Datasets DatasetName DatasetGroup)) + (set gnt:aboutCases (sanitize-rdf-string (field ("CAST(CONVERT(BINARY CONVERT(Datasets.AboutCases USING latin1) USING utf8) AS VARCHAR(10000))" AboutCases)))) - (set gn-term:aboutPlatform + (set gnt:aboutPlatform (sanitize-rdf-string (field ("CAST(CONVERT(BINARY CONVERT(Datasets.AboutPlatform USING latin1) USING utf8) AS VARCHAR(1500))" AboutPlatform)))) - (set gn-term:aboutDataProcessing + (set gnt:aboutDataProcessing (sanitize-rdf-string (field ("CAST(CONVERT(BINARY CONVERT(Datasets.AboutDataProcessing USING latin1) USING utf8) AS VARCHAR(1500))" AboutDataProcessing)))) - (set gn-term:notes + (set gnt:notes (sanitize-rdf-string (field ("CAST(CONVERT(BINARY CONVERT(Datasets.Notes USING latin1) USING utf8) AS VARCHAR(1500))" GNNotes)))) - (set gn-term:experimentDesign + (set gnt:experimentDesign (sanitize-rdf-string (field ("CAST(CONVERT(BINARY CONVERT(Datasets.ExperimentDesign USING latin1) USING utf8) AS VARCHAR(1500))" ExperimentDesign)))) - (set gn-term:contributors + (set gnt:contributors (sanitize-rdf-string (field ("CAST(CONVERT(BINARY CONVERT(Datasets.Contributors USING latin1) USING utf8) AS VARCHAR(1500))" Contributors)))) - (set gn-term:citation + (set gnt:citation (sanitize-rdf-string (regexp-substitute/global #f "^[Nn]one$" @@ -218,7 +218,7 @@ ("CAST(CONVERT(BINARY CONVERT(Datasets.Citation USING latin1) USING utf8) AS VARCHAR(1500))" Citation)) ""))) - (set gn-term:dataSourceAcknowledgment + (set gnt:dataSourceAcknowledgment (sanitize-rdf-string (string-trim-both (regexp-substitute/global @@ -226,7 +226,7 @@ (field ("CAST(CONVERT(BINARY CONVERT(InfoFiles.Data_Source_Acknowledge USING latin1) USING utf8) AS VARCHAR(1500))" Data_Source_Acknowledge)) "")))) - (set gn-term:acknowledgment (sanitize-rdf-string + (set gnt:acknowledgment (sanitize-rdf-string (field Datasets Acknowledgment))))) @@ -239,7 +239,7 @@ (prefixes '(("foaf:" "") ("geoSeries:" "") - ("gn-term:" "") + ("gnt:" "") ("gn:" "") ("rdf:" "") ("rdfs:" "") diff --git a/examples/dump-genotype.scm b/examples/dump-genotype.scm index d97b7e5..88125fa 100755 --- a/examples/dump-genotype.scm +++ b/examples/dump-genotype.scm @@ -26,9 +26,9 @@ (left-join InbredSet "ON GenoFreeze.InbredSetId = InbredSet.InbredSetId")) "WHERE GenoFreeze.public > 0 AND GenoFreeze.confidentiality < 1 AND InfoFiles.InfoPageName IS NULL") (schema-triples - (gn-term:datasetOfInbredSet rdfs:range gn:inbredSet) + (gnt:datasetOfInbredSet rdfs:range gn:inbredSet) (gn:genotypeDataset rdfs:subPropertyOf gn:dataset) - (gn-term:shortName rdfs:range rdfs:Literal)) + (gnt:shortName rdfs:range rdfs:Literal)) (triples (string->identifier "" @@ -42,13 +42,13 @@ #:separator "" #:proc string-capitalize-first) (set rdf:type 'gn:genotypeDataset) - (set gn-term:name (field GenoFreeze Name)) - (set gn-term:fullName (field GenoFreeze FullName)) - (set gn-term:shortName (field GenoFreeze ShortName)) + (set gnt:name (field GenoFreeze Name)) + (set gnt:fullName (field GenoFreeze FullName)) + (set gnt:shortName (field GenoFreeze ShortName)) (set dct:created (annotate-field (field GenoFreeze CreateTime) '^^xsd:date)) - (set gn-term:datasetOfInbredSet + (set gnt:datasetOfInbredSet (string->identifier "" (field InbredSet Name InbredSetName))))) (define-dump dump-genotypes @@ -58,7 +58,7 @@ (left-join InfoFiles "ON InfoFiles.InfoPageName = GenoFreeze.Name"))) (schema-triples (gn:genotype rdfs:range rdfs:Literal) - (gn-term:genotypeDataset rdfs:subPropertyOf gn:dataset)) + (gnt:genotypeDataset rdfs:subPropertyOf gn:dataset)) (triples (string->identifier "" @@ -69,14 +69,14 @@ #:separator "" #:proc string-capitalize-first) (set rdf:type 'gn:genotype) - (set gn-term:name (sanitize-rdf-string (field Geno Name))) - (set gn-term:markerName (sanitize-rdf-string (field Geno Marker_Name))) - (set gn-term:chr (field Geno Chr)) - (set gn-term:mb (annotate-field (field ("IFNULL(Geno.Mb, '')" Mb)) '^^xsd:double)) - (set gn-term:sequence (field Geno Sequence)) - (set gn-term:source (field Geno Source)) - (set gn-term:source2 (field Geno Source2)) - (set gn-term:genotypeOfDataset + (set gnt:name (sanitize-rdf-string (field Geno Name))) + (set gnt:markerName (sanitize-rdf-string (field Geno Marker_Name))) + (set gnt:chr (field Geno Chr)) + (set gnt:mb (annotate-field (field ("IFNULL(Geno.Mb, '')" Mb)) '^^xsd:double)) + (set gnt:sequence (field Geno Sequence)) + (set gnt:source (field Geno Source)) + (set gnt:source2 (field Geno Source2)) + (set gnt:genotypeOfDataset (string->identifier "" (regexp-substitute/global @@ -86,12 +86,12 @@ #:separator "" #:proc string-capitalize-first) ) - (set gn-term:chrNum + (set gnt:chrNum (annotate-field (field ("IFNULL(Geno.chr_num, '')" chr_num)) '^^xsd:int)) (set gn:comments (field ("CAST(CONVERT(BINARY CONVERT(Geno.Comments USING latin1) USING utf8) AS VARCHAR(255))" Comments))) - (set gn-term:cM + (set gnt:cM (annotate-field (field ("IFNULL(GenoXRef.cM, '')" Chr_mm8)) '^^xsd:int)))) @@ -105,7 +105,7 @@ (prefixes '(("dct:" "") ("gn:" "") - ("gn-term:" "") + ("gnt:" "") ("rdf:" "") ("rdfs:" "") ("xsd:" ""))) diff --git a/examples/dump-phenotype.scm b/examples/dump-phenotype.scm index 1ef498d..00f99d2 100755 --- a/examples/dump-phenotype.scm +++ b/examples/dump-phenotype.scm @@ -27,10 +27,10 @@ (left-join InbredSet "ON PublishFreeze.InbredSetId = InbredSet.InbredSetId")) "WHERE PublishFreeze.public > 0 AND PublishFreeze.confidentiality < 1 AND InfoFiles.InfoPageName IS NULL") (schema-triples - (gn-term:datasetOfInbredSet rdfs:range gn:inbredSet) - (gn-term:name rdfs:range rdfs:Literal) - (gn-term:fullName rdfs:range rdfs:Literal) - (gn-term:shortName rdfs:range rdfs:Literal) + (gnt:datasetOfInbredSet rdfs:range gn:inbredSet) + (gnt:name rdfs:range rdfs:Literal) + (gnt:fullName rdfs:range rdfs:Literal) + (gnt:shortName rdfs:range rdfs:Literal) (gn:phenotypeDataset rdf:subClassOf gn:dataset)) (triples (string->identifier @@ -41,13 +41,13 @@ #:separator "" #:proc string-capitalize-first) (set rdf:type 'gn:phenotypeDataset) - (set gn-term:name (field PublishFreeze Name)) - (set gn-term:fullName (field PublishFreeze FullName)) - (set gn-term:shortName (field PublishFreeze ShortName)) + (set gnt:name (field PublishFreeze Name)) + (set gnt:fullName (field PublishFreeze FullName)) + (set gnt:shortName (field PublishFreeze ShortName)) (set dct:created (annotate-field (field PublishFreeze CreateTime) '^^xsd:date)) - (set gn-term:datasetOfInbredSet + (set gnt:datasetOfInbredSet (string->identifier "inbredSet" (field InbredSet Name InbredSetName))))) (define-dump dump-phenotypes @@ -58,19 +58,19 @@ (left-join InfoFiles "ON InfoFiles.InfoPageName = PublishFreeze.Name"))) (schema-triples (gn:phenotypeDataset rdfs:subPropertyOf gn:dataset) - (gn-term:publicationDescription rdfs:range rdfs:Literal) - (gn-term:originalDescription rdfs:range rdfs:Literal) - (gn-term:prePublicationDescription rdfs:range rdfs:Literal) - (gn-term:postPublicationAbbreviation rdfs:range rdfs:Literal) - (gn-term:labCode rdfs:range rdfs:Literal) - (gn-term:submitter rdfs:range rdfs:Literal) - (gn-term:owner rdfs:range rdfs:Literal) - (gn-term:mean rdfs:range xsd:double) - (gn-term:LRS rdfs:range xsd:float) - (gn-term:locus rdfs:range rdfs:Literal) - (gn-term:additive rdfs:range xsd:decimal) - (gn-term:sequence rdfs:range rdfs:Literal) - (gn-term:phenotypeOfPublication rdfs:range gn-term:pubMedId)) + (gnt:publicationDescription rdfs:range rdfs:Literal) + (gnt:originalDescription rdfs:range rdfs:Literal) + (gnt:prePublicationDescription rdfs:range rdfs:Literal) + (gnt:postPublicationAbbreviation rdfs:range rdfs:Literal) + (gnt:labCode rdfs:range rdfs:Literal) + (gnt:submitter rdfs:range rdfs:Literal) + (gnt:owner rdfs:range rdfs:Literal) + (gnt:mean rdfs:range xsd:double) + (gnt:LRS rdfs:range xsd:float) + (gnt:locus rdfs:range rdfs:Literal) + (gnt:additive rdfs:range xsd:decimal) + (gnt:sequence rdfs:range rdfs:Literal) + (gnt:phenotypeOfPublication rdfs:range gn-term:pubMedId)) (triples (string->identifier "" (regexp-substitute/global #f "[^A-Za-z0-9:]" @@ -79,44 +79,44 @@ #:separator "" #:proc string-capitalize-first) (set rdf:type 'gn:phenotype) - (set gn-term:name (sanitize-rdf-string + (set gnt:name (sanitize-rdf-string (field ("CAST(CONVERT(BINARY CONVERT(IF(Phenotype.Post_publication_abbreviation IS NULL, IF(Phenotype.Pre_publication_abbreviation IS NULL, Phenotype.Id, Phenotype.Pre_publication_abbreviation), Phenotype.Post_publication_abbreviation) USING latin1) USING utf8) AS VARCHAR(100))" PhenotypeName)))) ;; There is no row with an empty post-publication description so ;; use this field as the main publication description - (set gn-term:publicationDescription + (set gnt:publicationDescription (sanitize-rdf-string (field ("CAST(CONVERT(BINARY CONVERT(Phenotype.Post_publication_description USING latin1) USING utf8) AS CHAR(10000))" postPubDescr)))) - (set gn-term:originalDescription (sanitize-rdf-string + (set gnt:originalDescription (sanitize-rdf-string (delete-substrings (field Phenotype Original_description) "Original post publication description: "))) - (set gn-term:prePublicationDescription + (set gnt:prePublicationDescription (sanitize-rdf-string (field ("CAST(CONVERT(BINARY CONVERT(Phenotype.Pre_publication_description USING latin1) USING utf8) AS VARCHAR(15000))" prePubDesc)))) - (set gn-term:prePublicationAbbreviation (sanitize-rdf-string (field Phenotype Pre_publication_abbreviation))) - (set gn-term:postPublicationAbbreviation (sanitize-rdf-string (field Phenotype Post_publication_abbreviation))) - (set gn-term:labCode (field Phenotype Lab_code)) - (set gn-term:submitter (sanitize-rdf-string (field Phenotype Submitter))) - (set gn-term:owner (sanitize-rdf-string (field Phenotype Owner))) - (set gn-term:mean (annotate-field (field ("IFNULL(PublishXRef.mean, '')" mean)) + (set gnt:prePublicationAbbreviation (sanitize-rdf-string (field Phenotype Pre_publication_abbreviation))) + (set gnt:postPublicationAbbreviation (sanitize-rdf-string (field Phenotype Post_publication_abbreviation))) + (set gnt:labCode (field Phenotype Lab_code)) + (set gnt:submitter (sanitize-rdf-string (field Phenotype Submitter))) + (set gnt:owner (sanitize-rdf-string (field Phenotype Owner))) + (set gnt:mean (annotate-field (field ("IFNULL(PublishXRef.mean, '')" mean)) '^^xsd:double)) - (set gn-term:locus (field PublishXRef Locus)) - (set gn-term:LRS (annotate-field (field ("IFNULL(PublishXRef.LRS, '')" lrs)) '^^xsd:float)) - (set gn-term:additive (annotate-field (field ("IFNULL(PublishXRef.additive, '')" additive)) '^^xsd:decimal)) - (set gn-term:sequence (annotate-field (field PublishXRef Sequence) '^^xsd:int)) - (set gn-term:phenotypeOfDataset + (set gnt:locus (field PublishXRef Locus)) + (set gnt:LRS (annotate-field (field ("IFNULL(PublishXRef.LRS, '')" lrs)) '^^xsd:float)) + (set gnt:additive (annotate-field (field ("IFNULL(PublishXRef.additive, '')" additive)) '^^xsd:decimal)) + (set gnt:sequence (annotate-field (field PublishXRef Sequence) '^^xsd:int)) + (set gnt:phenotypeOfDataset (string->identifier "" (field ("IFNULL(InfoFiles.InfoPageName, IFNULL(PublishFreeze.Name, ''))" DatasetName)) #:separator "" #:proc string-capitalize-first)) - (set gn-term:phenotypeOfPublication + (set gnt:phenotypeOfPublication (let ((pmid (field ("IF(Publication.PubMed_ID IS NULL, '', CONVERT(Publication.PubMed_Id, INT))" pmid))) @@ -134,7 +134,7 @@ (prefixes '(("dct:" "") ("gn:" "") - ("gn-term:" "") + ("gnt:" "") ("rdf:" "") ("rdfs:" "") ("xsd:" "") diff --git a/examples/dump-probeset-metadata.scm b/examples/dump-probeset-metadata.scm index 6da1eb0..ddbea5e 100755 --- a/examples/dump-probeset-metadata.scm +++ b/examples/dump-probeset-metadata.scm @@ -24,14 +24,14 @@ "WHERE ProbeSetFreeze.public > 0 AND ProbeSetFreeze.confidentiality < 1") (schema-triples (gn:probesetData rdfs:range gn:probeset) - (gn-term:hasProbeset rdfs:range rdfs:Literal)) + (gnt:hasProbeset rdfs:range rdfs:Literal)) (triples (string->identifier "probesetData" (field ("CONCAT(ProbeSetFreeze.Name,':',IFNULL(ProbeSet.Name, ProbeSet.Id))" ProbeSetName))) (set rdf:type 'gn:probesetData) - (set gn-term:hasProbeset + (set gnt:hasProbeset (ontology 'probeset: (regexp-substitute/global @@ -39,34 +39,34 @@ (field ("IFNULL(ProbeSet.Name, ProbeSet.Id)" name)) 'pre "_" 'post))) - (set gn-term:probesetOfDataset + (set gnt:probesetOfDataset (ontology 'probeset: (regexp-substitute/global #f "[^A-Za-z0-9:]" (field ProbeSetFreeze Name) 'pre "_" 'post))) - (set gn-term:mean + (set gnt:mean (annotate-field (field ("IFNULL(ProbeSetXRef.mean, '')" mean)) '^^xsd:double)) - (set gn-term:se + (set gnt:se (annotate-field (field ("IFNULL(ProbeSetXRef.se, '')" se)) '^^xsd:double)) - (set gn-term:locus (field ProbeSetXRef Locus)) + (set gnt:locus (field ProbeSetXRef Locus)) (set gn:LRS (annotate-field (field ("IFNULL(ProbeSetXRef.LRS, '')" LRS)) '^^xsd:double)) - (set gn-term:pValue + (set gnt:pValue (annotate-field (field ("IFNULL(ProbeSetXRef.pValue, '')" pValue)) '^^xsd:double)) - (set gn-term:additive + (set gnt:additive (annotate-field (field ("IFNULL(ProbeSetXRef.additive, '')" additive)) '^^xsd:double)) - (set gn-term:h2 + (set gnt:h2 (annotate-field (field ("IFNULL(ProbeSetXRef.h2, '')" h2)) '^^xsd:float)))) @@ -79,7 +79,7 @@ (table-metadata? #f) (prefixes '(("gn:" "") - ("gn-term:" "") + ("gnt:" "") ("rdf:" "") ("rdfs:" "") ("xsd:" ""))) diff --git a/examples/dump-probeset.scm b/examples/dump-probeset.scm index be09b48..4d5f9a5 100755 --- a/examples/dump-probeset.scm +++ b/examples/dump-probeset.scm @@ -21,8 +21,8 @@ (tables (ProbeSet (left-join GeneChip "ON GeneChip.Id = ProbeSet.ChipId"))) (schema-triples - (gn-term:name rdfs:range rdfs:Literal) - (gn-term:probeset rdfs:range rdfs:Literal)) + (gnt:name rdfs:range rdfs:Literal) + (gnt:probeset rdfs:range rdfs:Literal)) (triples (ontology 'probeset: (string-trim-both @@ -32,17 +32,17 @@ name)) 'pre "_" 'post))) (set rdf:type 'gn-id:probeset) - (set gn-term:chipOf (string->identifier "platform" (field GeneChip Name))) - (set gn-term:name (field ProbeSet Name)) - (set gn-term:symbol (delete-substrings (field ProbeSet Symbol) "\"")) - (set gn-term:description (sanitize-rdf-string + (set gnt:chipOf (string->identifier "platform" (field GeneChip Name))) + (set gnt:name (field ProbeSet Name)) + (set gnt:symbol (delete-substrings (field ProbeSet Symbol) "\"")) + (set gnt:description (sanitize-rdf-string (field ProbeSet description))) - (set gn-term:chr (field ProbeSet Chr)) - (set gn-term:mb (annotate-field (field ("IFNULL(ProbeSet.Mb, '')" Mb)) '^^xsd:double)) - (set gn-term:blatSeq (sanitize-rdf-string + (set gnt:chr (field ProbeSet Chr)) + (set gnt:mb (annotate-field (field ("IFNULL(ProbeSet.Mb, '')" Mb)) '^^xsd:double)) + (set gnt:blatSeq (sanitize-rdf-string (string-trim-both (field ProbeSet BlatSeq)))) - (set gn-term:targetSeq (sanitize-rdf-string (field ProbeSet TargetSeq))) - (set gn-term:uniProtReference (ontology 'uniprot: + (set gnt:targetSeq (sanitize-rdf-string (field ProbeSet TargetSeq))) + (set gnt:uniProtReference (ontology 'uniprot: (field ProbeSet UniProtID))))) diff --git a/examples/dump-probesetfreeze.scm b/examples/dump-probesetfreeze.scm index a45fd0a..828ab00 100755 --- a/examples/dump-probesetfreeze.scm +++ b/examples/dump-probesetfreeze.scm @@ -20,11 +20,11 @@ (define-dump dump-gene-chip (tables (GeneChip)) (schema-triples - (gn-term:name rdfs:range rdfs:Literal)) + (gnt:name rdfs:range rdfs:Literal)) (triples (string->identifier "platform" (field GeneChip Name)) (set rdf:type 'gn:platform) - (set gn-term:name (field GeneChip GeneChipName)) - (set gn-term:geoPlatform + (set gnt:name (field GeneChip GeneChipName)) + (set gnt:geoPlatform (ontology 'geoSeries: (string-trim-both (field GeneChip GeoPlatform)))))) @@ -38,8 +38,8 @@ (left-join Tissue "ON ProbeFreeze.TissueId = Tissue.TissueId")) "WHERE ProbeSetFreeze.public > 0 AND InfoFiles.InfoPageName IS NULL GROUP BY ProbeFreeze.Id") (schema-triples - (gn-term:avgMethod rdfs:range rdfs:Literal) - (gn-term:dataScale rdfs:range rdfs:Literal) + (gnt:avgMethod rdfs:range rdfs:Literal) + (gnt:dataScale rdfs:range rdfs:Literal) (gn:probesetDataset rdf:subClassOf gn:dataset)) (triples (string->identifier @@ -51,15 +51,15 @@ #:separator "" #:proc string-capitalize-first) (set rdf:type 'gn:probesetDataset) - (set gn-term:avgMethod (string->identifier "avgmethod" (field AvgMethod Name))) - (set gn-term:fullName (field ProbeSetFreeze FullName)) - (set gn-term:shortName (field ProbeSetFreeze ShortName)) + (set gnt:avgMethod (string->identifier "avgmethod" (field AvgMethod Name))) + (set gnt:fullName (field ProbeSetFreeze FullName)) + (set gnt:shortName (field ProbeSetFreeze ShortName)) (set dct:created (annotate-field (field ProbeSetFreeze CreateTime) '^^xsd:datetime)) - (set gn-term:dataScale (field ProbeSetFreeze DataScale)) - (set gn-term:tissueName (string->identifier "tissue" (field Tissue Short_Name))) - (set gn-term:datasetOfInbredSet + (set gnt:dataScale (field ProbeSetFreeze DataScale)) + (set gnt:tissueName (string->identifier "tissue" (field Tissue Short_Name))) + (set gnt:datasetOfInbredSet (string->identifier "inbredSet" (field InbredSet Name InbredSetName))))) @@ -71,7 +71,8 @@ (prefixes '(("geoSeries:" "") ("gn:" "") - ("gn-term:" "") + ("dct:" "<>") + ("gnt:" "") ("rdf:" "") ("rdfs:" "") ("xsd:" ""))) diff --git a/examples/dump-publication.scm b/examples/dump-publication.scm index f79696e..1384261 100755 --- a/examples/dump-publication.scm +++ b/examples/dump-publication.scm @@ -21,15 +21,15 @@ (define-dump dump-publication (tables (Publication)) (schema-triples - (gn-term:pubMedId rdfs:range rdfs:Literal) - (gn-term:title rdfs:range rdfs:Literal) - (gn-term:journal rdfs:range rdfs:Literal) - (gn-term:volume rdfs:range rdfs:Literal) - (gn-term:pages rdfs:range rdfs:Literal) - (gn-term:month rdfs:range rdfs:Literal) - (gn-term:year rdfs:range rdfs:Literal) - (gn-term:author rdfs:range rdfs:Literal) - (gn-term:abstract rdfs:range rdfs:Literal)) + (gnt:pubMedId rdfs:range rdfs:Literal) + (gnt:title rdfs:range rdfs:Literal) + (gnt:journal rdfs:range rdfs:Literal) + (gnt:volume rdfs:range rdfs:Literal) + (gnt:pages rdfs:range rdfs:Literal) + (gnt:month rdfs:range rdfs:Literal) + (gnt:year rdfs:range rdfs:Literal) + (gnt:author rdfs:range rdfs:Literal) + (gnt:abstract rdfs:range rdfs:Literal)) (triples (let ((pmid (field ("IF(Publication.PubMed_ID IS NULL, '', CONVERT(Publication.PubMed_Id, INT))" @@ -40,19 +40,19 @@ (number->string publication-id)) (ontology 'pubmed: pmid))) (set rdf:type 'gn:publication) - (set gn-term:pubMedId + (set gnt:pubMedId (ontology 'pubmed: (field ("IFNULL(PubMed_ID, '')" pubmedId)))) - (set gn-term:title (delete-substrings (field Publication Title) + (set gnt:title (delete-substrings (field Publication Title) "Unknown")) - (set gn-term:journal (delete-substrings (field Publication Journal) + (set gnt:journal (delete-substrings (field Publication Journal) "Unknown")) - (set gn-term:volume (delete-substrings (field Publication Volume) + (set gnt:volume (delete-substrings (field Publication Volume) "Unknown")) - (set gn-term:pages (delete-substrings (field Publication Pages) + (set gnt:pages (delete-substrings (field Publication Pages) "Unknown")) - (set gn-term:month (delete-substrings (field Publication Month) + (set gnt:month (delete-substrings (field Publication Month) "Unknown")) - (set gn-term:year (field Publication Year)) + (set gnt:year (field Publication Year)) (multiset gn:author ;; The authors field is a comma ;; separated list. Split it. @@ -68,7 +68,7 @@ (connection %connection-settings) (table-metadata? #f) (prefixes - '(("gn-term:" "") + '(("gnt:" "") ("gn:" "") ("pubmed:" "") ("rdfs:" "") diff --git a/examples/dump-species-metadata.scm b/examples/dump-species-metadata.scm index 77db764..39f7147 100755 --- a/examples/dump-species-metadata.scm +++ b/examples/dump-species-metadata.scm @@ -21,25 +21,25 @@ (define-dump dump-species (tables (Species)) (schema-triples - (gn-term:name rdfs:range rdfs:Literal) - (gn-term:displayName rdfs:range rdfs:Literal) - (gn-term:binomialName rdfs:range rdfs:Literal) - (gn-term:family rdfs:range rdfs:Literal)) + (gnt:name rdfs:range rdfs:Literal) + (gnt:displayName rdfs:range rdfs:Literal) + (gnt:binomialName rdfs:range rdfs:Literal) + (gnt:family rdfs:range rdfs:Literal)) (triples (string->binomial-name (field Species FullName)) - (set rdf:type 'gn:species) - (set gn-term:name (field Species SpeciesName)) - (set gn-term:displayName (field Species MenuName)) - (set gn-term:binomialName (field Species FullName)) - (set gn-term:family (field Species Family)) - (set gn-term:organism (ontology 'taxon: (field Species TaxonomyId))))) + (set rdf:type 'gnc:species) + (set gnt:name (field Species SpeciesName)) + (set gnt:displayName (field Species MenuName)) + (set gnt:binomialName (field Species FullName)) + (set gnt:family (field Species Family)) + (set gnt:organism (ontology 'taxon: (field Species TaxonomyId))))) (define-dump dump-strain (tables (Strain (left-join Species "ON Strain.SpeciesId = Species.SpeciesId"))) (schema-triples - (gn-term:strainOfSpecies rdfs:domain gn-term:strain) - (gn-term:strainOfSpecies rdfs:range gn-term:species) + (gnt:strainOfSpecies rdfs:domain gnt:strain) + (gnt:strainOfSpecies rdfs:range gn-term:species) (gn-term:name rdfs:range rdfs:Literal) (gn-term:alias rdfs:range rdfs:Literal) (gn-term:symbol rdfs:range rdfs:Literal)) @@ -51,7 +51,7 @@ 'pre "_" 'post) #:separator "" #:proc string-capitalize-first) - (set rdf:type 'gn:strain) + (set rdf:type 'gnc:strain) (set gn-term:strainOfSpecies (string->binomial-name (field Species FullName))) ;; Name, and maybe a second name @@ -64,7 +64,7 @@ (tables (MappingMethod)) (triples (string->identifier "mappingMethod" (field MappingMethod Name)) - (set rdf:type 'gn:mappingMethod))) + (set rdf:type 'gnc:mappingMethod))) (define-dump dump-inbred-set (tables (InbredSet @@ -85,7 +85,7 @@ "" (field InbredSet Name) #:separator "" #:proc string-capitalize-first) - (set rdf:type 'gn:inbredSet) + (set rdf:type 'gnc:inbredSet) (set gn-term:binomialName (field InbredSet FullName)) (set gn-term:geneticType (field InbredSet GeneticType)) (set gn-term:inbredFamily (field InbredSet Family)) @@ -106,7 +106,7 @@ (schema-triples (gn-term:normalization rdfs:range rdfs:Literal)) (triples (string->identifier "avgmethod" (field AvgMethod Name)) - (set rdf:type 'gn:avgMethod) + (set rdf:type 'gnc:avgMethod) (set gn-term:normalization (field AvgMethod Normalization)))) @@ -117,7 +117,8 @@ (table-metadata? #f) (prefixes '(("gn:" "") - ("gn-term:" "") + ("gnc:" "") + ("gnt:" "") ("rdf:" "") ("rdfs:" "") ("taxon:" ""))) diff --git a/examples/dump-tissue.scm b/examples/dump-tissue.scm index ff6792e..a9a50f3 100755 --- a/examples/dump-tissue.scm +++ b/examples/dump-tissue.scm @@ -23,12 +23,12 @@ ;; and BIRN_lex_Name are mostly NULL. (tables (Tissue)) (schema-triples - (gn-term:name rdfs:range rdfs:Literal)) + (gnt:name rdfs:range rdfs:Literal)) ;; Hopefully the Short_Name field is distinct and can be used as an ;; identifier. (triples (string->identifier "tissue" (field Tissue Short_Name)) (set rdf:type 'gn:tissue) - (set gn-term:name (field Tissue Name)))) + (set gnt:name (field Tissue Name)))) @@ -38,7 +38,7 @@ (table-metadata? #f) (prefixes '(("gn:" "") - ("gn-term:" "") + ("gnt:" "") ("rdf:" "") ("rdfs:" ""))) (inputs -- cgit v1.2.3 From 6f5cca6b5511ce5c80639e477ae10b4e70e2b178 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Sun, 30 Jul 2023 12:27:50 +0300 Subject: Add gnc: prefix Signed-off-by: Munyoki Kilyungi --- examples/dump-generif.scm | 102 ++++++++++++++++-------------------- examples/dump-genotype.scm | 13 ++--- examples/dump-phenotype.scm | 5 +- examples/dump-probeset-metadata.scm | 3 +- examples/dump-probesetfreeze.scm | 5 +- examples/dump-publication.scm | 3 +- examples/dump-tissue.scm | 3 +- 7 files changed, 65 insertions(+), 69 deletions(-) diff --git a/examples/dump-generif.scm b/examples/dump-generif.scm index b546f42..0689f57 100755 --- a/examples/dump-generif.scm +++ b/examples/dump-generif.scm @@ -16,9 +16,6 @@ (call-with-input-file (list-ref (command-line) 1) read)) -(define %dump-directory - (list-ref (command-line) 2)) - (define-dump dump-genewiki-symbols @@ -26,17 +23,17 @@ (left-join Species "USING (SpeciesId)")) "GROUP BY GeneId ORDER BY BINARY symbol") (schema-triples - (gn:symbol rdfs:domain gn:geneWikiEntry) - (gn:wikiEntryOfSpecies rdfs:range gn:species) - (gn:taxid rdfs:domain gn:geneWikiEntry)) + (gnt:symbol rdfs:domain gn-term:geneWikiEntry) + (gnt:wikiEntryOfSpecies rdfs:range gn:species) + (gnt:taxid rdfs:domain gn-term:geneWikiEntry)) (triples (ontology 'generif: (field GeneRIF_BASIC GeneId)) - (multiset gn:symbol (string-split (field ("GROUP_CONCAT(DISTINCT symbol)" symbol)) + (multiset gnt:symbol (string-split (field ("GROUP_CONCAT(DISTINCT symbol)" symbol)) #\,)) - (multiset gn:wikiEntryOfSpecies + (multiset gnt:wikiEntryOfSpecies (string-split (field ("GROUP_CONCAT(DISTINCT Species.SpeciesName)" species)) #\,)) - (multiset gn:taxId (map (cut ontology 'ncbiTaxon: <>) + (multiset gnt:taxId (map (cut ontology 'ncbiTaxon: <>) (string-split (field ("GROUP_CONCAT(DISTINCT TaxID)" taxId)) #\,))))) @@ -48,16 +45,16 @@ (left-join GeneCategory "ON GeneRIFXRef.GeneCategoryId = GeneCategory.Id")) "WHERE GeneRIF.display > 0 AND GeneRIF.VersionId = 0 GROUP BY GeneRIF.symbol") (schema-triples - (gn:geneWikiEntry a rdfs:Class) - (gn:geneWikiEntry a owl:Class) - (gn:geneWikiEntry rdfs:comment "Represents GeneRIF Entries") - (gn:geneCategory rdfs:domain gn:geneWikiEntry) - (gn:geneWikiEntryOfGn rdfs:domain gn:geneWikiEntry) - (gn:geneWikiEntry rdfs:domain gn:geneWikiEntry)) + (gnt:geneWikiEntry a rdfs:Class) + (gnt:geneWikiEntry a owl:Class) + (gnt:geneWikiEntry rdfs:comment "Represents GeneRIF Entries") + (gnt:geneCategory rdfs:domain gn:geneWikiEntry) + (gnt:geneWikiEntryOfGn rdfs:domain gn:geneWikiEntry) + (gnt:geneWikiEntry rdfs:domain gn:geneWikiEntry)) (triples (let ([geneid (field GeneRIF_BASIC GeneId)]) (if (eq? geneid 0) - (ontology 'gn:anonSymbol_ + (ontology 'gnt:anonSymbol_ (field GeneRIF symbol)) (ontology 'generif: geneid))) @@ -65,14 +62,14 @@ (if (string-null? (field ("IFNULL(GeneRIF_BASIC.GeneId, '')" geneWikiEntryP))) "" 'gn:geneWikiEntry)) - (set gn:wikiEntryOfSpecies - (field Species SpeciesName)) + (set gnt:wikiEntryOfSpecies + (string->binomial-name (field Species FullName))) ;; This only dumps symbols not present in the GeneRIF_BASIC table - (set gn:symbol (let ([geneid (field GeneRIF_BASIC GeneId)]) + (set gnt:symbol (let ([geneid (field GeneRIF_BASIC GeneId)]) (if (eq? geneid 0) (field GeneRIF symbol) ""))) - (multiset gn:geneWikiEntryOfGn + (multiset gnt:geneWikiEntryOfGn (let* ([entries (sanitize-rdf-string (field @@ -83,7 +80,7 @@ (match-lambda ((genecategory pmid email text createtime weburl) (blank-node - (set gn:geneCategory genecategory) + (set gnt:geneCategory genecategory) (multiset dct:source (map (lambda (el) (if (string-null? el) "" @@ -94,7 +91,7 @@ 'pre "" 'post)) - (set gn:geneWikiEntry + (set gnt:geneWikiEntry (annotate-field text '^^xsd:string)) (set dct:created (annotate-field createtime @@ -108,12 +105,12 @@ (tables (GeneRIF_BASIC) "GROUP BY GeneId, comment, createtime") (schema-triples - (gn:geneWikiEntryofNCBI rdfs:domain gn:geneWikiEntry)) + (gnt:geneWikiEntryofNCBI rdfs:domain gn:geneWikiEntry)) (triples (ontology 'generif: (field GeneRIF_BASIC GeneId)) - (set gn:geneWikiEntryOfNCBI + (set gnt:geneWikiEntryOfNCBI (blank-node - (set gn:geneWikiEntry + (set gnt:geneWikiEntry (annotate-field (field GeneRIF_BASIC comment) '^^xsd:string)) (multiset dct:source (map (lambda (el) (if (string-null? el) @@ -127,34 +124,27 @@ -(call-with-target-database - %connection-settings - (lambda (db) - (with-output-to-file (string-append %dump-directory "dump-generif.ttl") - (lambda () - (prefix "rdf:" "") - (prefix "rdfs:" "") - (prefix "foaf:" "") - (prefix "gn:" "") - (prefix "dct:" "") - (prefix "pubmed:" "") - (prefix "up:" "") - (prefix "ncbiTaxon:" "") - (prefix "generif:" "") - (prefix "xsd:" "") - (prefix "owl:" "") - (prefix "phenotype:" "") - (prefix "molecularTrait:" "") - (prefix "nuccore:" "") - (prefix "omim:" "") - (prefix "pubchem:" "") - (prefix "uniprot:" "") - (prefix "hgnc:" "") - (prefix "homologene:" "") - (prefix "chebi:" "") - (prefix "kegg:" "") - (newline) - (dump-genewiki-symbols db) - (dump-gn-genewiki-entries db) - (dump-ncbi-genewiki-entries db)) - #:encoding "utf8"))) +(dump-with-documentation + (name "GeneRIF Metadata") + (connection %connection-settings) + (table-metadata? #f) + (prefixes + '(("rdf:" "") + ("rdfs:" "") + ("gn:" "") + ("gnc:" "") + ("gnt:" "") + ("dct:" "") + ("pubmed:" "") + ("ncbiTaxon:" "") + ("generif:" "") + ("xsd:" "") + ("owl:" ""))) + (inputs + (list ;; dump-genewiki-symbols + dump-gn-genewiki-entries + ;; dump-ncbi-genewiki-entries + )) + (outputs + '(#:documentation "./docs/dump-generif.md" + #:rdf "./verified-data/dump-generif.ttl"))) diff --git a/examples/dump-genotype.scm b/examples/dump-genotype.scm index 88125fa..50cafb6 100755 --- a/examples/dump-genotype.scm +++ b/examples/dump-genotype.scm @@ -26,9 +26,9 @@ (left-join InbredSet "ON GenoFreeze.InbredSetId = InbredSet.InbredSetId")) "WHERE GenoFreeze.public > 0 AND GenoFreeze.confidentiality < 1 AND InfoFiles.InfoPageName IS NULL") (schema-triples - (gnt:datasetOfInbredSet rdfs:range gn:inbredSet) - (gn:genotypeDataset rdfs:subPropertyOf gn:dataset) - (gnt:shortName rdfs:range rdfs:Literal)) + (gnt:datasetOfInbredSet rdfs:subPropertyOf gnc:inbredSet) + (gnc:genotypeDataset rdfs:subPropertyOf gnc:dataset) + (gnt:shortName rdfs:subPropertyOf gnc:genotypeDataset)) (triples (string->identifier "" @@ -41,7 +41,7 @@ 'pre "_" 'post) #:separator "" #:proc string-capitalize-first) - (set rdf:type 'gn:genotypeDataset) + (set rdf:type 'gnc:genotypeDataset) (set gnt:name (field GenoFreeze Name)) (set gnt:fullName (field GenoFreeze FullName)) (set gnt:shortName (field GenoFreeze ShortName)) @@ -57,7 +57,7 @@ (left-join GenoFreeze "ON GenoFreeze.Id = GenoXRef.GenoFreezeId") (left-join InfoFiles "ON InfoFiles.InfoPageName = GenoFreeze.Name"))) (schema-triples - (gn:genotype rdfs:range rdfs:Literal) + (gnc:genotype rdfs:range rdfs:Literal) (gnt:genotypeDataset rdfs:subPropertyOf gn:dataset)) (triples (string->identifier @@ -68,7 +68,7 @@ 'pre "_" 'post) #:separator "" #:proc string-capitalize-first) - (set rdf:type 'gn:genotype) + (set rdf:type 'gnc:genotype) (set gnt:name (sanitize-rdf-string (field Geno Name))) (set gnt:markerName (sanitize-rdf-string (field Geno Marker_Name))) (set gnt:chr (field Geno Chr)) @@ -105,6 +105,7 @@ (prefixes '(("dct:" "") ("gn:" "") + ("gnc:" "") ("gnt:" "") ("rdf:" "") ("rdfs:" "") diff --git a/examples/dump-phenotype.scm b/examples/dump-phenotype.scm index 00f99d2..983756b 100755 --- a/examples/dump-phenotype.scm +++ b/examples/dump-phenotype.scm @@ -40,7 +40,7 @@ 'pre "_" 'post) #:separator "" #:proc string-capitalize-first) - (set rdf:type 'gn:phenotypeDataset) + (set rdf:type 'gnc:phenotypeDataset) (set gnt:name (field PublishFreeze Name)) (set gnt:fullName (field PublishFreeze FullName)) (set gnt:shortName (field PublishFreeze ShortName)) @@ -78,7 +78,7 @@ 'pre "_" 'post) #:separator "" #:proc string-capitalize-first) - (set rdf:type 'gn:phenotype) + (set rdf:type 'gnc:phenotype) (set gnt:name (sanitize-rdf-string (field ("CAST(CONVERT(BINARY CONVERT(IF(Phenotype.Post_publication_abbreviation IS NULL, IF(Phenotype.Pre_publication_abbreviation IS NULL, Phenotype.Id, Phenotype.Pre_publication_abbreviation), Phenotype.Post_publication_abbreviation) USING latin1) USING utf8) AS VARCHAR(100))" @@ -134,6 +134,7 @@ (prefixes '(("dct:" "") ("gn:" "") + ("gnc:" "") ("gnt:" "") ("rdf:" "") ("rdfs:" "") diff --git a/examples/dump-probeset-metadata.scm b/examples/dump-probeset-metadata.scm index ddbea5e..37fef70 100755 --- a/examples/dump-probeset-metadata.scm +++ b/examples/dump-probeset-metadata.scm @@ -30,7 +30,7 @@ "probesetData" (field ("CONCAT(ProbeSetFreeze.Name,':',IFNULL(ProbeSet.Name, ProbeSet.Id))" ProbeSetName))) - (set rdf:type 'gn:probesetData) + (set rdf:type 'gnc:probesetData) (set gnt:hasProbeset (ontology 'probeset: @@ -79,6 +79,7 @@ (table-metadata? #f) (prefixes '(("gn:" "") + ("gnc:" "") ("gnt:" "") ("rdf:" "") ("rdfs:" "") diff --git a/examples/dump-probesetfreeze.scm b/examples/dump-probesetfreeze.scm index 828ab00..30ea9f4 100755 --- a/examples/dump-probesetfreeze.scm +++ b/examples/dump-probesetfreeze.scm @@ -22,7 +22,7 @@ (schema-triples (gnt:name rdfs:range rdfs:Literal)) (triples (string->identifier "platform" (field GeneChip Name)) - (set rdf:type 'gn:platform) + (set rdf:type 'gnc:platform) (set gnt:name (field GeneChip GeneChipName)) (set gnt:geoPlatform (ontology 'geoSeries: @@ -50,7 +50,7 @@ 'pre "_" 'post) #:separator "" #:proc string-capitalize-first) - (set rdf:type 'gn:probesetDataset) + (set rdf:type 'gnc:probesetDataset) (set gnt:avgMethod (string->identifier "avgmethod" (field AvgMethod Name))) (set gnt:fullName (field ProbeSetFreeze FullName)) (set gnt:shortName (field ProbeSetFreeze ShortName)) @@ -71,6 +71,7 @@ (prefixes '(("geoSeries:" "") ("gn:" "") + ("gnc:" "") ("dct:" "<>") ("gnt:" "") ("rdf:" "") diff --git a/examples/dump-publication.scm b/examples/dump-publication.scm index 1384261..50e4358 100755 --- a/examples/dump-publication.scm +++ b/examples/dump-publication.scm @@ -39,7 +39,7 @@ (string->identifier "unpublished" (number->string publication-id)) (ontology 'pubmed: pmid))) - (set rdf:type 'gn:publication) + (set rdf:type 'gnc:publication) (set gnt:pubMedId (ontology 'pubmed: (field ("IFNULL(PubMed_ID, '')" pubmedId)))) (set gnt:title (delete-substrings (field Publication Title) @@ -70,6 +70,7 @@ (prefixes '(("gnt:" "") ("gn:" "") + ("gnc:" "") ("pubmed:" "") ("rdfs:" "") ("rdf:" ""))) diff --git a/examples/dump-tissue.scm b/examples/dump-tissue.scm index a9a50f3..dc76600 100755 --- a/examples/dump-tissue.scm +++ b/examples/dump-tissue.scm @@ -27,7 +27,7 @@ ;; Hopefully the Short_Name field is distinct and can be used as an ;; identifier. (triples (string->identifier "tissue" (field Tissue Short_Name)) - (set rdf:type 'gn:tissue) + (set rdf:type 'gnc:tissue) (set gnt:name (field Tissue Name)))) @@ -39,6 +39,7 @@ (prefixes '(("gn:" "") ("gnt:" "") + ("gnc:" "") ("rdf:" "") ("rdfs:" ""))) (inputs -- cgit v1.2.3