diff options
-rwxr-xr-x | examples/genbank.scm | 3 | ||||
-rwxr-xr-x | examples/genelist.scm | 2 | ||||
-rwxr-xr-x | examples/generif-old.scm | 241 | ||||
-rwxr-xr-x | examples/generif.scm | 246 | ||||
-rwxr-xr-x | examples/probeset.scm | 205 | ||||
-rwxr-xr-x | examples/schema.scm | 70 | ||||
-rwxr-xr-x | generate-ttl-files.scm | 169 | ||||
-rw-r--r-- | transform/schema.scm (renamed from transform/schema-dump.scm) | 12 | ||||
-rw-r--r-- | transform/special-forms.scm | 70 | ||||
-rw-r--r-- | transform/uuid.scm | 234 |
10 files changed, 574 insertions, 678 deletions
diff --git a/examples/genbank.scm b/examples/genbank.scm index 391cff0..c83643c 100755 --- a/examples/genbank.scm +++ b/examples/genbank.scm @@ -10,8 +10,7 @@ (transform strings) (transform sql) (transform triples) - (transform special-forms) - (transform uuid)) + (transform special-forms)) diff --git a/examples/genelist.scm b/examples/genelist.scm index 9c1ced0..8729626 100755 --- a/examples/genelist.scm +++ b/examples/genelist.scm @@ -18,6 +18,8 @@ (tables (GeneList (left-join Species "USING (SpeciesId)"))) (schema-triples + (gnc:GeneSymbol a rdfs:Class) + (gnc:GeneSymbol rdfs:label "A gene symbol") (gnt:gene rdfs:domain gnc:GeneSymbol) (gnt:belongsToSpecies rdfs:domain gnc:GeneSymbol) (gnc:Gene a rdfs:Class) diff --git a/examples/generif-old.scm b/examples/generif-old.scm new file mode 100755 index 0000000..ede5a28 --- /dev/null +++ b/examples/generif-old.scm @@ -0,0 +1,241 @@ +#! /usr/bin/env guile +!# + +(use-modules (srfi srfi-1) + (srfi srfi-26) + (rnrs bytevectors) + (ice-9 format) + (ice-9 getopt-long) + (ice-9 match) + (ice-9 regex) + (transform strings) + (transform sql) + (transform triples) + (transform special-forms)) + + + +(define (fix-email-id email) + (string-delete #\space email)) + +(define (investigator-attributes->id first-name last-name email) + ;; There is just one record corresponding to "Evan Williams" which + ;; does not have an email ID. To accommodate that record, we + ;; construct the investigator ID from not just the email ID, but + ;; also the first and the last names. It would be preferable to just + ;; find Evan Williams' email ID and insert it into the database. + (string->identifier "investigator" + (string-join + (list first-name last-name (fix-email-id email)) + "_"))) + + + +(define-transformer genewiki-symbols + (tables (GeneRIF_BASIC) + "GROUP BY BINARY symbol") + (triples + (string->identifier + "symbol" + (regexp-substitute/global #f "[^A-Za-z0-9:]" + (field GeneRIF_BASIC symbol) + 'pre "_" 'post) + #:proc (lambda (x) x)) + (set rdfs:label + (field GeneRIF_BASIC symbol)))) + +;; Some symbols exist in the RIF table that don't exist in the GeneRIF +;; table. +(define-transformer generif-symbols + (tables (GeneRIF) + "WHERE symbol NOT IN (SELECT symbol from GeneRIF_BASIC) GROUP BY BINARY symbol") + (triples + (string->identifier + "symbol" + (regexp-substitute/global #f "[^A-Za-z0-9:]" + (field GeneRIF symbol) + 'pre "_" 'post) + #:proc (lambda (x) x)) + (set rdfs:label + (field GeneRIF symbol)))) + +(define-transformer gn-genewiki-entries + (tables (GeneRIF + (left-join Species "ON Species.SpeciesId = GeneRIF.SpeciesId") + (left-join GeneRIFXRef "ON GeneRIFXRef.GeneRIFId = GeneRIF.Id") + (left-join GeneCategory "ON GeneRIFXRef.GeneCategoryId = GeneCategory.Id") + (left-join Investigators "ON Investigators.Email = GeneRIF.email")) + "WHERE GeneRIF.display > 0 AND GeneRIF.VersionId = 0 AND GeneRIF.comment IS NOT NULL GROUP BY GeneRIF.comment, BINARY GeneRIF.symbol") + (schema-triples + (gnc:GeneWikiEntry a rdfs:Class) + (gnc:GNWikiEntry rdfs:subClassOf gnc:GeneWikiEntry) + (gnc:GNWikiEntry rdfs:comment "Represents GeneRIF Entries entered from GeneNetwork") + (gnt:geneSymbol rdfs:domain gnc:GNWikiEntry)) + (triples + (string->identifier + "symbol" + (regexp-substitute/global + #f "[^A-Za-z0-9:]" + (field GeneRIF symbol) + 'pre "_" 'post) + #:proc (lambda (x) x)) + (set rdfs:comment + (let* ([generif-comment (sanitize-rdf-string (field GeneRIF comment))] + [create-time (field GeneRIF createtime EntryCreateTime)] + [pmid (field GeneRIF PubMed_ID PMID)] + [web-url (field GeneRIF weburl)] + [species (string->identifier + "" + (remap-species-identifiers (field Species Fullname)) + #:separator "" + #:proc string-capitalize-first)] + [categories + (remove (lambda (x) + (or (eq? x #f) + (and (string? x) + (string-null? x)))) + (remove-duplicates + (string-split-substring + (field ("GROUP_CONCAT(DISTINCT GeneCategory.Name SEPARATOR '$$')" + GeneCategory)) + "$$")))]) + (string->symbol + (string-append + "[ " + (format #f "rdf:type gnc:GNWikiEntry ; ") + (if (string? species) + "" + (format #f "gnt:belongsToSpecies ~a ; " + species)) + (format #f "rdfs:comment ~s^^xsd:string ; " + generif-comment) + (if (string? create-time) + "" + (format #f "dct:created ~s^^xsd:datetime ; " + (time-unix->string + create-time "~5"))) + (if (and (string? pmid) (not (string-null? pmid))) + (format #f + "~{dct:references pubmed:~a ; ~}" + (string-split pmid #\space)) + "") + (if (and (not (string-null? + (string-trim-both (field GeneRIF email)))) + (not (string-null? (field Investigators Email)))) + (format #f "dct:creator ~a ; " + (investigator-attributes->id + (field Investigators FirstName) + (field Investigators LastName) + (field Investigators Email))) + "") + (if (not (null? categories)) + (format #f + "~{gnt:belongsToCategory ~s ; ~}" + categories) + "") + (if (and (string? web-url) (not (string-null? web-url))) + (format #f "foaf:homepage ~s ; " + web-url) + "") + " ] ")))))) + +(define-transformer ncbi-genewiki-entries + (tables (GeneRIF_BASIC + (left-join Species "USING (SpeciesId)")) + "WHERE GeneRIF_BASIC.comment IS NOT NULL AND TRIM(GeneRIF_BASIC.comment) != '' AND TRIM(GeneRIF_BASIC.symbol) != '' GROUP BY GeneRIF_BASIC.comment, GeneRIF_BASIC.createtime, GeneRIF_BASIC.VersionId, GeneRIF_BASIC.SpeciesId, GeneRIF_BASIC.TaxID") + (schema-triples + (gnc:NCBIWikiEntry rdfs:subClassOf gnc:GeneWikiEntry) + (gnc:NCBIWikiEntry rdfs:comment "Represents GeneRIF Entries obtained from NCBI") + (gnt:hasVersionId a owl:ObjectProperty) + (gnt:hasVersionId rdfs:domain gnc:NCBIWikiEntry) + (gnt:hasVersionId skos:definition "The VersionId of this this resource")) + (triples + (string->identifier + "symbol" + (regexp-substitute/global #f "[^A-Za-z0-9:]" + (field GeneRIF_BASIC symbol GeneRIFSymbol) + 'pre "_" 'post) + #:proc (lambda (x) x)) + (set rdfs:comment + (let ([ncbi-comment (sanitize-rdf-string (field GeneRIF_BASIC comment))] + [species-name + (string->identifier + "" + (remap-species-identifiers (field Species Fullname SpeciesFullName)) + #:separator "" + #:proc string-capitalize-first)] + [taxonomic-id (field GeneRIF_BASIC TaxID TaxonomicId)] + [create-time (field GeneRIF_BASIC createtime EntryCreateTime)] + [pmid (field GeneRIF_BASIC PubMed_ID PMID)] + [gene-id (field GeneRIF_BASIC GeneId)] + [version-id (field GeneRIF_BASIC VersionId)]) + (string->symbol + (string-append + "[ " + (format #f "rdf:type gnc:NCBIWikiEntry ; ") + (format #f "rdfs:comment ~s^^xsd:string ; " + ncbi-comment) + (format #f "gnt:belongsToSpecies ~a ; " + species-name) + (if (eq? #f taxonomic-id) + "" + (format #f "skos:notation taxon:~a ; " + taxonomic-id)) + (format #f "gnt:hasGeneId generif:~a ; " + gene-id) + (format #f "gnt:hasVersionId '~a'^^xsd:integer ; " + version-id) + (if (and (string? pmid) (not (string-null? pmid))) + (format #f + "~{dct:references pubmed:~a ; ~}" + (string-split pmid #\space)) + "") + (if (string? create-time) + "" + (format #f "dct:created ~s^^xsd:datetime ; " + (time-unix->string + create-time "~5"))) + " ]")))))) + + + +(let* ((option-spec + '((settings (single-char #\s) (value #t)) + (output (single-char #\o) (value #t)) + (documentation (single-char #\d) (value #t)))) + (options (getopt-long (command-line) option-spec)) + (settings (option-ref options 'settings #f)) + (output (option-ref options 'output #f)) + (documentation (option-ref options 'documentation #f)) + (%connection-settings + (call-with-input-file settings + read))) + + (with-documentation + (name "GeneRIF Metadata") + (connection %connection-settings) + (table-metadata? #f) + (prefixes + '(("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>") + ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>") + ("skos:" "<http://www.w3.org/2004/02/skos/core#>") + ("xkos:" "<http://rdf-vocabulary.ddialliance.org/xkos#>") + ("gn:" "<http://genenetwork.org/id/>") + ("gnc:" "<http://genenetwork.org/category/>") + ("gnt:" "<http://genenetwork.org/term/>") + ("dct:" "<http://purl.org/dc/terms/>") + ("foaf:" "<http://xmlns.com/foaf/0.1/>") + ("pubmed:" "<http://rdf.ncbi.nlm.nih.gov/pubmed/>") + ("taxon:" "<https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=>") + ("generif:" "<http://www.ncbi.nlm.nih.gov/gene?cmd=Retrieve&dopt=Graphics&list_uids=>") + ("xsd:" "<http://www.w3.org/2001/XMLSchema#>") + ("owl:" "<http://www.w3.org/2002/07/owl#>"))) + (inputs + (list + genewiki-symbols + generif-symbols + gn-genewiki-entries + ncbi-genewiki-entries)) + (outputs + `(#:documentation ,documentation + #:rdf ,output)))) diff --git a/examples/generif.scm b/examples/generif.scm index 11235e0..628e34e 100755 --- a/examples/generif.scm +++ b/examples/generif.scm @@ -11,45 +11,17 @@ (transform strings) (transform sql) (transform triples) - (transform special-forms) - (transform uuid)) + (transform special-forms)) -(define-transformer genewiki-symbols - (tables (GeneRIF_BASIC) - "GROUP BY BINARY symbol") - (triples - (string->identifier - "symbol" - (regexp-substitute/global #f "[^A-Za-z0-9:]" - (field GeneRIF_BASIC symbol) - 'pre "_" 'post) - #:proc (lambda (x) x)) - (set rdfs:label - (field GeneRIF_BASIC symbol)))) - -;; Some symbols exist in the RIF table that don't exist in the GeneRIF -;; table. -(define-transformer generif-symbols - (tables (GeneRIF) - "WHERE symbol NOT IN (SELECT symbol from GeneRIF_BASIC) GROUP BY BINARY symbol") - (triples - (string->identifier - "symbol" - (regexp-substitute/global #f "[^A-Za-z0-9:]" - (field GeneRIF symbol) - 'pre "_" 'post) - #:proc (lambda (x) x)) - (set rdfs:label - (field GeneRIF symbol)))) - (define-transformer gn-genewiki-entries (tables (GeneRIF (left-join Species "ON Species.SpeciesId = GeneRIF.SpeciesId") (left-join GeneRIFXRef "ON GeneRIFXRef.GeneRIFId = GeneRIF.Id") (left-join GeneCategory "ON GeneRIFXRef.GeneCategoryId = GeneCategory.Id")) - "WHERE GeneRIF.display > 0 AND GeneRIF.comment IS NOT NULL GROUP BY GeneRIF.Id, GeneRIF.versionId, GeneRIF.symbol, GeneRIF.SpeciesId, GeneRIF.createtime, GeneRIF.reason") + "WHERE GeneRIF.display > 0 AND GeneRIF.comment IS NOT NULL +GROUP BY GeneRIF.Id, GeneRIF.versionId, GeneRIF.symbol") (schema-triples (gnc:GeneWikiEntry a rdfs:Class) (gnc:GNWikiEntry rdfs:subClassOf gnc:GeneWikiEntry) @@ -62,130 +34,108 @@ (gnc:GNWikiEntry rdfs:comment "Represents GeneRIF Entries entered from GeneNetwork") (gnt:geneSymbol rdfs:domain gnc:GNWikiEntry)) (triples - (string->identifier - "symbol" - (regexp-substitute/global - #f "[^A-Za-z0-9:]" - (field GeneRIF symbol) - 'pre "_" 'post) - #:proc (lambda (x) x)) - (set rdfs:comment - (let* ((generif-comment (sanitize-rdf-string (field GeneRIF comment))) - (create-time (field GeneRIF createtime EntryCreateTime)) - (pmid (field GeneRIF PubMed_ID PMID)) - (web-url (field GeneRIF weburl)) - (species (string->identifier - "" - (remap-species-identifiers (field Species Fullname)) - #:separator "" - #:proc string-capitalize-first)) - (version-id (field GeneRIF versionId)) - (identifier (field GeneRIF Id)) - (initial (sanitize-rdf-string (field GeneRIF initial))) - (reason (field GeneRIF reason)) - (email (sanitize-rdf-string (field GeneRIF email))) - (category - (field ("GROUP_CONCAT(DISTINCT GeneCategory.Name SEPARATOR '; ')" - GeneCategory)))) - (string->symbol - (string-append - "[ " - (format #f "rdf:type gnc:GNWikiEntry ; ") - (if (string? species) - "" - (format #f "gnt:belongsToSpecies ~a ; " - species)) - (format #f "rdfs:comment ~s^^xsd:string ; " - generif-comment) - (if (string? create-time) - "" - (format #f "dct:created ~s^^xsd:datetime ; " - (time-unix->string - create-time "~5"))) - (if (and (string? pmid) (not (string-null? pmid))) - (format #f - "~{dct:references pubmed:~a ; ~}" - (string-split pmid #\space)) - "") - (if (string-blank? email) - "" - (format #f "foaf:mbox ~s ; " email)) - (format #f "dct:identifier ~s ; " identifier) - (format #f "dct:hasVersion \"~s\"^^xsd:int ; " version-id) - (if (string-blank? reason) - "" - (format #f "gnt:reason ~s ; " reason)) - (if (or (null? initial) - (string-blank? initial)) - "" (format #f "gnt:initial ~s ; " initial)) - (if (string-blank? category) - "" - (format #f - "gnt:belongsToCategory ~s ; " - category)) - (if (and (string? web-url) (not (string-null? web-url))) - (format #f "foaf:homepage ~s ; " - web-url) - "") - " ] ")))))) + (format + #f "gn:wiki-~a-~a" + (field GeneRIF Id) + (field GeneRIF versionId)) + (set rdfs:label (string->symbol + (format #f "'~a'@en" + (replace-substrings + (sanitize-rdf-string + (field GeneRIF comment)) + '(("'" . "\\'")))))) + (set rdf:type 'gnc:GNWikiEntry) + (set gnt:symbol (field GeneRIF symbol)) + (set gnt:belongsToSpecies (string->identifier + "" + (remap-species-identifiers (field Species Fullname)) + #:separator "" + #:proc string-capitalize-first)) + (set dct:created + (string->symbol + (format #f "~s^^xsd:datetime " + (field + ("CAST(createtime AS CHAR)" EntryCreateTime))))) + (multiset dct:references + (map (lambda (pmid) + (match pmid + ((? string-blank? p) "") + (p (string->symbol + (format #f "pubmed:~a" (string-trim-both pmid)))))) + (string-split (field GeneRIF PubMed_ID PMID) + #\space))) + (set foaf:mbox + (match (sanitize-rdf-string (field GeneRIF email)) + ((? string-blank? mbox) "") + (mbox (string->symbol + (format #f "<~a>" mbox))))) + (set dct:identifier (annotate-field (format #f "~s" (field GeneRIF Id)) + '^^xsd:integer)) + (set foaf:homepage + (match (sanitize-rdf-string (field GeneRIF weburl)) + ((? string-blank? homepage) "") + (homepage (string->symbol + (format #f "<~a>" homepage))))) + (set dct:hasVersion (annotate-field (format #f "~s" (field GeneRIF versionId)) + '^^xsd:integer)) + (set gnt:initial (sanitize-rdf-string (field GeneRIF initial))) + (set gnt:reason (field GeneRIF reason)) + (multiset gnt:belongsToCategory + (string-split + (field ("GROUP_CONCAT(DISTINCT GeneCategory.Name SEPARATOR ';')" + GeneCategory)) + #\;)))) (define-transformer ncbi-genewiki-entries (tables (GeneRIF_BASIC - (left-join Species "USING (SpeciesId)")) - "WHERE GeneRIF_BASIC.comment IS NOT NULL AND TRIM(GeneRIF_BASIC.comment) != '' AND TRIM(GeneRIF_BASIC.symbol) != '' GROUP BY GeneRIF_BASIC.comment, GeneRIF_BASIC.createtime, GeneRIF_BASIC.VersionId, GeneRIF_BASIC.SpeciesId, GeneRIF_BASIC.TaxID") + (left-join Species "USING (SpeciesId)"))) (schema-triples (gnc:NCBIWikiEntry rdfs:subClassOf gnc:GeneWikiEntry) - (gnc:NCBIWikiEntry rdfs:comment "Represents GeneRIF Entries obtained from NCBI") - (gnt:hasVersionId a owl:ObjectProperty) - (gnt:hasVersionId rdfs:domain gnc:NCBIWikiEntry) - (gnt:hasVersionId skos:definition "The VersionId of this this resource")) + (gnc:NCBIWikiEntry rdfs:comment "Represents GeneRIF Entries obtained from NCBI")) (triples - (string->identifier - "symbol" - (regexp-substitute/global #f "[^A-Za-z0-9:]" - (field GeneRIF_BASIC symbol GeneRIFSymbol) - 'pre "_" 'post) - #:proc (lambda (x) x)) - (set rdfs:comment - (let ([ncbi-comment (sanitize-rdf-string (field GeneRIF_BASIC comment))] - [species-name - (string->identifier - "" - (remap-species-identifiers (field Species Fullname SpeciesFullName)) - #:separator "" - #:proc string-capitalize-first)] - [taxonomic-id (field GeneRIF_BASIC TaxID TaxonomicId)] - [create-time (field GeneRIF_BASIC createtime EntryCreateTime)] - [pmid (field GeneRIF_BASIC PubMed_ID PMID)] - [gene-id (field GeneRIF_BASIC GeneId)] - [version-id (field GeneRIF_BASIC VersionId)]) + (format + #f "gn:rif-~a-~a-~a-~a" + (field GeneRIF_BASIC GeneId) + (field GeneRIF_BASIC PubMed_ID) + (field + ("DATE_FORMAT(createtime, '%Y-%m-%dT%T')" CreateTime)) + (field GeneRIF_BASIC VersionId)) + (set rdf:type + (let* ((comment (format #f "'~a'@en" + (replace-substrings + (sanitize-rdf-string + (field GeneRIF_BASIC comment)) + '(("\\" . "\\\\") + ("\n" . "\\n") + ("\r" . "\\r") + ("'" . "\\'"))))) + (create-time (format #f "~s^^xsd:datetime" + (field + ("CAST(createtime AS CHAR)" EntryCreateTime)))) + (symbol (field GeneRIF_BASIC symbol)) + (species (string->identifier + "" + (remap-species-identifiers (field Species Fullname)) + #:separator "" + #:proc string-capitalize-first)) + (gene-id (field GeneRIF_BASIC GeneId)) + (taxon-id (field GeneRIF_BASIC TaxID TaxonomicId)) + (pmid (field GeneRIF_BASIC PubMed_ID)) + (version-id (field GeneRIF_BASIC versionId))) (string->symbol (string-append - "[ " - (format #f "rdf:type gnc:NCBIWikiEntry ; ") - (format #f "rdfs:comment ~s^^xsd:string ; " - ncbi-comment) - (format #f "gnt:belongsToSpecies ~a ; " - species-name) - (if (eq? #f taxonomic-id) - "" - (format #f "skos:notation taxon:~a ; " - taxonomic-id)) - (format #f "gnt:hasGeneId generif:~a ; " - gene-id) - (format #f "dct:hasVersion '~a'^^xsd:int ; " - version-id) - (if (and (string? pmid) (not (string-null? pmid))) - (format #f - "~{dct:references pubmed:~a ; ~}" - (string-split pmid #\space)) - "") - (if (string? create-time) - "" - (format #f "dct:created ~s^^xsd:datetime ; " - (time-unix->string - create-time "~5"))) - " ]")))))) + (format #f "gnc:NCBIWikiEntry ;\n") + (format #f "\trdfs:label ~a ;\n" comment) + (format #f "\tgnt:belongsToSpecies ~a ;\n" species) + (format #f "\tgnt:symbol ~s ;\n" symbol) + (format #f "\tgnt:hasGeneId generif:~a ;\n" gene-id) + (match taxon-id + ((? number? x) + (format #f "\tskos:notation taxon:~a ;\n" taxon-id)) + (else "")) + (format #f "\tdct:hasVersion \"~a\"^^xsd:integer ;\n" version-id) + (format #f "\tdct:references pubmed:~a ;\n" pmid) + (format #f "\tdct:created ~a" create-time))))))) @@ -222,8 +172,6 @@ ("owl:" "<http://www.w3.org/2002/07/owl#>"))) (inputs (list - genewiki-symbols - generif-symbols gn-genewiki-entries ncbi-genewiki-entries)) (outputs diff --git a/examples/probeset.scm b/examples/probeset.scm deleted file mode 100755 index caf81aa..0000000 --- a/examples/probeset.scm +++ /dev/null @@ -1,205 +0,0 @@ -#! /usr/bin/env guile -!# - -(use-modules (srfi srfi-1) - (srfi srfi-26) - (ice-9 format) - (ice-9 getopt-long) - (ice-9 match) - (ice-9 regex) - (transform strings) - (transform sql) - (transform triples) - (transform special-forms) - (web uri)) - - -(define-transformer probeset - (tables (ProbeSet - (left-join GeneChip "ON GeneChip.Id = ProbeSet.ChipId") - (left-join Species "ON GeneChip.SpeciesId = Species.Id")) - "WHERE ProbeSet.Name IS NOT NULL") - (schema-triples - (gnc:omimLink rdfs:Class gnc:ResourceLink) - (gnc:omimLink rdfs:label "OMIM") - (gnc:omimLink rdfs:comments "Summary from On Mendelion Inheritance in Man") - (gnc:homologeneLink rdfs:Class gnc:ResourceLink) - (gnc:homologeneLink rdfs:label "HomoloGene") - (gnc:homologeneLink rdfs:comments "Find similar genes in other species") - (gnc:uniprot a owl:ObjectProperty) - (gnc:uniprot rdfs:label "UniProt") - (gnc:uniprot rdfs:comments "UniProt resource") - (gnt:hasChip a owl:ObjectProperty) - (gnt:hasChip rdfs:domain gnc:Probeset) - (gnt:hasTargetId a owl:ObjectProperty) - (gnt:hasTargetId rdfs:domain gnc:Probeset) - (gnt:geneSymbol rdfs:domain gnc:Probeset) - (gnt:location rdfs:domain gnc:ProbeSet) - (gnt:location a owl:ObjectProperty) - (gnt:strandPosition rdfs:domain gnc:ProbeSet) - (gnt:strandPosition a owl:ObjectProperty) - (gnt:targetsRegion a owl:ObjectProperty) - (gnt:targetsRegion rdfs:domain gnc:Probeset) - (gnt:chr rdfs:domain gnc:Probeset) - (gnt:mb rdfs:domain gnc:Probeset) - (gnt:hasSpecificity a owl:ObjectProperty) - (gnt:hasSpecificity rdfs:domain gnc:Probeset) - (gnt:hasBlatScore a owl:ObjectProperty) - (gnt:hasBlatScore rdfs:domain gnc:Probeset) - (gnt:hasBlatMbStart a owl:ObjectProperty) - (gnt:hasBlatMbStart rdfs:domain gnc:Probeset) - (gnt:hasBlatMbEnd a owl:ObjectProperty) - (gnt:hasBlatMbEnd rdfs:domain gnc:Probeset) - (gnt:hasBlatSeq a owl:ObjectProperty) - (gnt:hasBlatSeq rdfs:domain gnc:Probeset) - (gnt:hasTargetSeq a owl:ObjectProperty) - (gnt:hasTargetSeq rdfs:domain gnc:Probeset)) - (triples - (let ((id (field ("IF(NULLIF(TRIM(ProbeSet.Name), '') IS NULL, '', TRIM(ProbeSet.Name))" - ProbeSetIdName))) - (probeset-id (field ProbeSet Id))) - (string->identifier - "probeset" - (if (string-null? id) - (number->string probeset-id) - (regexp-substitute/global - #f "[^A-Za-z0-9:]" - id - 'pre "_" 'post)))) - (set rdf:type 'gnc:Probeset) - (set rdfs:label (field ProbeSet Name)) - (set skos:altLabel - (replace-substrings - (field ProbeSet alias) - '(("\r\n" . "; ")))) - (set gnt:hasChip - (string->identifier - "platform" - (field ("IFNULL(GeneChip.Name, '')" GeneChipName)))) - (set gnt:hasTargetId - (field ("NULLIF(TRIM(ProbeSet.TargetId), '')" - TargetId))) - (multiset gnt:geneSymbol - (map string-trim (string-split - (field ProbeSet Symbol) - #\,))) - (set dct:description (sanitize-rdf-string (field ProbeSet description))) - (set gnt:targetsRegion - (sanitize-rdf-string - (field ("NULLIF(TRIM(ProbeSet.Probe_set_target_region), '')" - Probe_set_target_region)))) - (set gnt:chr (field ProbeSet Chr)) - (set gnt:mb (annotate-field (field ("IFNULL(ProbeSet.Mb, '')" Mb)) '^^xsd:double)) - (set gnt:location - (let* ((mb (field ProbeSet Mb)) - (chr (field ProbeSet Chr)) - (strand-probe (field ProbeSet Strand_Probe)) - (location (list chr mb))) - (match location - (("Un" mb) - (format #f "Not available")) - ((chr "") - (if (string-blank? chr) - (format #f "Not available") - (format #f "Chr ~a @ Unknown position ~a~:[~;~a~]" - chr mb - (and (string? strand-probe) (or (string=? "+" strand-probe) - (string=? "-" strand-probe))) - (cond ((string=? "+" strand-probe) - "on the plus strand") - ((string=? "-" strand-probe) - "on the minus strand") - (else ""))))) - (_ - (format #f "Chr ~a @ ~a Mb ~:[~;~a~]" - chr mb - (and (string? strand-probe) (or (string=? "+" strand-probe) - (string=? "-" strand-probe))) - (cond ((string=? "+" strand-probe) - "on the plus strand") - ((string=? "-" strand-probe) - "on the minus strand") - (else ""))))))) - (set gnt:hasGeneId - (field ProbeSet GeneId)) - ;; OMIM Link - (set dct:references - (let ((omim (field ProbeSet OMIM))) - (if (not (string-blank? omim)) - (string->symbol - (format #f - "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a" - "http://www.ncbi.nlm.nih.gov/omim/" - (uri-encode omim) - "a gnc:omimLink")) - ""))) - ;; Homologene Link - (set dct:references - (let ((homologene (field ProbeSet HomoloGeneID))) - (if (not (string-blank? homologene)) - (string->symbol - (format #f - "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a" - "http://www.ncbi.nlm.nih.gov/homologene/?term=" - (uri-encode homologene) - "a gnc:homologeneLink")) - ""))) - (set gnt:uniprot - (ontology 'uniprot: (field ProbeSet UniProtID))) - (set gnt:strandProbe - (field ProbeSet Strand_Probe)) - (set gnt:hasSpecificity - (field ("IFNULL(ProbeSet.Probe_set_specificity, '')" - Probe_set_specificity))) - (set gnt:hasBlatScore - (field ("IFNULL(ProbeSet.Probe_set_BLAT_score, '')" - Probe_set_BLAT_score))) - (set gnt:hasBlatMbStart - (annotate-field (field ("IFNULL(ProbeSet.Probe_set_Blat_Mb_start, '')" - Probe_set_Blat_Mb_start)) - '^^xsd:double)) - (set gnt:hasBlatMbEnd - (annotate-field (field ("IFNULL(ProbeSet.Probe_set_Blat_Mb_end, '')" - Probe_set_Blat_Mb_end)) - '^^xsd:double)) - (set gnt:hasBlatSeq (sanitize-rdf-string (field ProbeSet BlatSeq))) - (set gnt:hasTargetSeq (sanitize-rdf-string (field ProbeSet TargetSeq))))) - - - - -(let* ((option-spec - '((settings (single-char #\s) (value #t)) - (output (single-char #\o) (value #t)) - (documentation (single-char #\d) (value #t)))) - (options (getopt-long (command-line) option-spec)) - (settings (option-ref options 'settings #f)) - (output (option-ref options 'output #f)) - (documentation (option-ref options 'documentation #f)) - (%connection-settings - (call-with-input-file settings - read))) - (with-documentation - (name "ProbeSet Metadata") - (connection %connection-settings) - (table-metadata? #f) - (prefixes - '(("gn:" "<http://genenetwork.org/id/>") - ("probeset:" "<http://genenetwork.org/probeset/>") - ("gnc:" "<http://genenetwork.org/category/>") - ("gene:" "<http://www.ncbi.nlm.nih.gov/gene?cmd=Retrieve&dopt=Graphics&list_uids=>") - ("gnt:" "<http://genenetwork.org/term/>") - ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>") - ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>") - ("dct:" "<http://purl.org/dc/terms/>") - ("uniprot:" "<http://purl.uniprot.org/uniprot/>") - ("owl:" "<http://www.w3.org/2002/07/owl#>") - ("xsd:" "<http://www.w3.org/2001/XMLSchema#>") - ("qb:" "<http://purl.org/linked-data/cube#>") - ("sdmx-measure:" "<http://purl.org/linked-data/sdmx/2009/measure#>") - ("skos:" "<http://www.w3.org/2004/02/skos/core#>"))) - (inputs - (list probeset)) - (outputs - `(#:documentation ,documentation - #:rdf ,output)))) diff --git a/examples/schema.scm b/examples/schema.scm new file mode 100755 index 0000000..50cfd6a --- /dev/null +++ b/examples/schema.scm @@ -0,0 +1,70 @@ +#! /usr/bin/env guile +!# + +(use-modules (ice-9 getopt-long) + (transform triples) + (transform schema) + (transform special-forms) + (transform sql) + (transform table)) + +(define (call-with-genenetwork-database connection-settings proc) + (call-with-database "mysql" (string-join + (list (assq-ref connection-settings 'sql-username) + (assq-ref connection-settings 'sql-password) + (assq-ref connection-settings 'sql-database) + "tcp" + (assq-ref connection-settings 'sql-host) + (number->string + (assq-ref connection-settings 'sql-port))) + ":") + proc)) + +(define (transform-table-schema connection-settings db) + (let ((tables (tables connection-settings db))) + (for-each (lambda (table) + (let ((table-id (string->identifier + "table" + ;; We downcase table names in + ;; identifiers. So, we distinguish + ;; between the user and User tables. + (if (string=? (table-name table) "User") + "user2" + (table-name table))))) + (triple table-id 'rdf:type 'gn:sqlTable) + (triple table-id 'gn:name (table-name table)) + (triple table-id 'gn:hasSize (string->symbol (format #f "~a" (table-size table)))) + (for-each (lambda (column) + (let ((column-id (column-id (table-name table) + (column-name column)))) + (triple column-id 'rdf:type 'gn:sqlTableField) + (triple column-id 'gn:name (column-name column)) + (triple column-id 'gn:sqlFieldType (column-type column)) + (triple table-id 'gn:hasField column-id))) + (table-columns table)))) + tables))) + + +(let* ((option-spec + '((settings (single-char #\s) (value #t)) + (output (single-char #\o) (value #t)) + (documentation (single-char #\d) (value #t)))) + (options (getopt-long (command-line) option-spec)) + (settings (option-ref options 'settings #f)) + (output (option-ref options 'output #f)) + (documentation (option-ref options 'documentation #f)) + (%connection-settings (call-with-input-file settings read))) + (call-with-genenetwork-database + %connection-settings + (lambda (db) + (with-output-to-file output + (lambda () + (prefix "rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>") + (prefix "rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>") + (prefix "gn:" "<http://genenetwork.org/id/>") + (prefix "gnc:" "<http://genenetwork.org/category/>") + (prefix "gnt:" "<http://genenetwork.org/term/>") + (prefix "xsd:" "<http://www.w3.org/2001/XMLSchema#>") + (prefix "owl:" "<http://www.w3.org/2002/07/owl#>") + (newline) + (transform-table-schema %connection-settings db)))))) diff --git a/generate-ttl-files.scm b/generate-ttl-files.scm index 65db03f..7afbe7b 100755 --- a/generate-ttl-files.scm +++ b/generate-ttl-files.scm @@ -1,60 +1,127 @@ -#! ./pre-inst-env +#! /usr/bin/env guile !# + (use-modules (ice-9 format) - (ice-9 futures) (ice-9 getopt-long) - (ice-9 ftw)) + (ice-9 ftw) + (ice-9 regex) + (srfi srfi-26) + (srfi srfi-34) + (srfi srfi-35)) + + +;; Copied over from GNU/Guix source tree. +(define (file-name-predicate regexp) + "Return a predicate that returns true when passed a file name whose base +name matches REGEXP." + (let ((file-rx (if (regexp? regexp) + regexp + (make-regexp regexp)))) + (lambda (file stat) + (regexp-exec file-rx (basename file))))) + +(define* (find-files dir #:optional (pred (const #t)) + #:key (stat lstat) + directories? + fail-on-error?) + "Return the lexicographically sorted list of files under DIR for which PRED +returns true. PRED is passed two arguments: the absolute file name, and its +stat buffer; the default predicate always returns true. PRED can also be a +regular expression, in which case it is equivalent to (file-name-predicate +PRED). STAT is used to obtain file information; using 'lstat' means that +symlinks are not followed. If DIRECTORIES? is true, then directories will +also be included. If FAIL-ON-ERROR? is true, raise an exception upon error." + (let ((pred (if (procedure? pred) + pred + (file-name-predicate pred)))) + ;; Sort the result to get deterministic results. + (sort (file-system-fold (const #t) + (lambda (file stat result) ; leaf + (if (pred file stat) + (cons file result) + result)) + (lambda (dir stat result) ; down + (if (and directories? + (pred dir stat)) + (cons dir result) + result)) + (lambda (dir stat result) ; up + result) + (lambda (file stat result) ; skip + result) + (lambda (file stat errno result) + (format (current-error-port) "find-files: ~a: ~a~%" + file (strerror errno)) + (when fail-on-error? + (error "find-files failed")) + result) + '() + dir + stat) + string<?))) + +(define-syntax-rule (warn-on-error expr file) + (catch 'system-error + (lambda () + expr) + (lambda args + (format (current-error-port) + "warning: failed to delete ~a: ~a~%" + file (strerror + (system-error-errno args)))))) + +(define* (delete-file-recursively dir + #:key follow-mounts?) + "Delete DIR recursively, like `rm -rf', without following symlinks. Don't +follow mount points either, unless FOLLOW-MOUNTS? is true. Report but ignore +errors." + (let ((dev (stat:dev (lstat dir)))) + (file-system-fold (lambda (dir stat result) ; enter? + (or follow-mounts? + (= dev (stat:dev stat)))) + (lambda (file stat result) ; leaf + (warn-on-error (delete-file file) file)) + (const #t) ; down + (lambda (dir stat result) ; up + (warn-on-error (rmdir dir) dir)) + (const #t) ; skip + (lambda (file stat errno result) + (format (current-error-port) + "warning: failed to delete ~a: ~a~%" + file (strerror errno))) + #t + dir + + ;; Don't follow symlinks. + lstat))) (let* ((option-spec '((settings (single-char #\s) (value #t)) - (output (single-char #\o) (value #t)) - (documentation (single-char #\d) (value #t)))) + (documentation (single-char #\d) (value #t)) + (output (single-char #\o) (value #t)))) (options (getopt-long (command-line) option-spec)) (settings (option-ref options 'settings #f)) (output (option-ref options 'output #f)) - (documentation (option-ref options 'documentation #f))) - (define (enter? name stat result) - stat result ;ignore - ;; Skip version control directories if any. - (not (member (basename name) '(".git" ".svn" "CVS")))) - - (define (leaf name stat result) - stat result ;ignore - (when (string-suffix? ".scm" name) - (let* ((base-file-name (basename name ".scm")) - (cmd (format #f " ~a --settings ~a --output ~a --documentation ~a" - name - settings - (string-append output "/" base-file-name ".ttl") - (string-append documentation "/" base-file-name ".md")))) - (touch - (future - (begin - (display (format #f "Running ~a" cmd)) - (display "\n") - (system cmd))))))) - - (define (down name stat result) - name stat ;ignore - result) - - (define (up name stat result) - name stat ;ignore - result) - - (define (skip name stat result) - name stat ;ignore - result) - - ;; Ignore unreadable files/directories but warn the user. - (define (error name stat errno result) - stat ;ignore - (format (current-error-port) "warning: ~a: ~a~%" - name (strerror errno)) - result) - - (file-system-fold enter? leaf down up skip error - 0 ;initial counter is zero bytes - "./examples")) - - + (documentation (option-ref options 'documentation #f)) + (%source-dir (dirname (current-filename)))) + (unless (file-exists? output) + (mkdir output)) + ;; Transform data to RDF + (for-each (lambda (file) + (let* ((base-file-name (basename file ".scm")) + (ttl-file (string-append output "/" base-file-name ".ttl"))) + ;; Ignore dataset-metadata-git.scm because TODO + (unless (string=? base-file-name "dataset-metadata-git") + (system* "./pre-inst-env" file "--settings" settings + "--output" ttl-file)))) + (find-files "./examples" ".scm")) + ;; Copy hand-woven ttl files. + (for-each (lambda (file) + (copy-file + file (format #f "~a/~a" output (basename file)))) + (find-files "./schema" ".ttl")) + ;; Validate transformed turtle files + (for-each (lambda (file) + (system* "rapper" "--input" "turtle" "--count" file)) + (append (find-files output ".ttl") + (find-files "./schema" ".ttl")))) diff --git a/transform/schema-dump.scm b/transform/schema.scm index 18df5da..cdfc834 100644 --- a/transform/schema-dump.scm +++ b/transform/schema.scm @@ -4,7 +4,13 @@ #:use-module (transform sql) #:use-module (transform triples) #:use-module (transform strings) - #:use-module (transform table)) + #:use-module (transform table) + #:export (table-fields + get-tables-from-comments + schema-annotations + tables + schema + data-table)) (define (table-fields db table) @@ -47,7 +53,7 @@ (for-each (cut table-fields db <>) (get-tables-from-comments db))))) -(define (tables db) +(define (tables connection-settings db) "Return list of all tables in DB. Each element of the returned list is a <table> object." (map (lambda (table) @@ -68,7 +74,7 @@ is a <table> object." (information_schema.tables data_length)) (information_schema.tables) (format #f "WHERE table_schema = '~a'" - (assq-ref %connection-settings 'sql-database)))))) + (assq-ref connection-settings 'sql-database)))))) (define (schema db) (let ((tables (tables db))) diff --git a/transform/special-forms.scm b/transform/special-forms.scm index 99b30df..ddb3180 100644 --- a/transform/special-forms.scm +++ b/transform/special-forms.scm @@ -537,40 +537,42 @@ The above query results to triples that have the form: (call-with-target-database connection (lambda (db) - (with-output-to-file ; - doc-path - (lambda () - (format #t "# ~a" name) - (for-each - (lambda (proc) - (proc db - #:metadata? #f - #:data? #f - #:documentation? - (lambda () (for-each - (match-lambda - ((k v) - (begin - (prefix k v #f)))) - prefixes)))) - inputs)) - #:encoding "UTF-8") + (when doc-path + (with-output-to-file ; + doc-path + (lambda () + (format #t "# ~a" name) + (for-each + (lambda (proc) + (proc db + #:metadata? #f + #:data? #f + #:documentation? + (lambda () (for-each + (match-lambda + ((k v) + (begin + (prefix k v #f)))) + prefixes)))) + inputs)) + #:encoding "UTF-8")) ;; Dumping the actual data - (with-output-to-file - rdf-path - (lambda () - ;; Add the prefixes - (for-each - (match-lambda - ((k v) - (begin - (prefix k v)))) - prefixes) - (newline) - (for-each - (lambda (proc) - (proc db #:metadata? table-metadata?)) - inputs)) - #:encoding "UTF-8"))))))) + (when rdf-path + (with-output-to-file + rdf-path + (lambda () + ;; Add the prefixes + (for-each + (match-lambda + ((k v) + (begin + (prefix k v)))) + prefixes) + (newline) + (for-each + (lambda (proc) + (proc db #:metadata? table-metadata?)) + inputs)) + #:encoding "UTF-8")))))))) diff --git a/transform/uuid.scm b/transform/uuid.scm deleted file mode 100644 index be0e592..0000000 --- a/transform/uuid.scm +++ /dev/null @@ -1,234 +0,0 @@ -;; CREDIT: https://lists.gnu.org/archive/html/guile-user/2018-01/msg00019.html -(define-module (transform uuid) - #:use-module (srfi srfi-1) - #:use-module (srfi srfi-11) - #:use-module (rnrs bytevectors) - #:use-module (ice-9 iconv) - #:export (bytevector->md5 - make-version-3-uuid)) - -(define (bytevector->md5 bytevector) - "Convert BYTEVECTOR to a bytevector containing the MD5 hash of -BYTEVECTOR." - ;; Implemented along RFC 1321. It should be easy to verify that - ;; this procedure performs the operations specified therein. - (define (append-padding-bits bytevector) - "Makes a list from BYTEVECTOR with padding as per RFC 1321 3.1." - (let* ((length-in-bits (* 8 (bytevector-length bytevector))) - (padding-bits (- 512 (modulo (- length-in-bits 448) 512)))) - (append (bytevector->u8-list bytevector) - '(128) ; #*10000000 - (iota - (- (/ padding-bits 8) 1) - 0 0)))) - (define (append-length msg-list message-length) - "Append MESSAGE-LENGTH as 8 byte values from a uint64 to MSG-LIST." - (append msg-list - ;; For numbers too large for an uint64, only the low-order - ;; bytes are returned. - (bytevector->u8-list (u64vector - (modulo - (* message-length 8) ; bits - (1+ #xffffffffffffffff)))))) - (let hash ((AA #x67452301) - (BB #xefcdab89) - (CC #x98badcfe) - (DD #x10325476) - (to-digest - (append-length - (append-padding-bits - bytevector) - (bytevector-length bytevector)))) - (define (F X Y Z) - (logior (logand X Y) (logand (lognot X) Z))) - (define (G X Y Z) - (logior (logand X Z) (logand Y (lognot Z)))) - (define (H X Y Z) - (logxor X Y Z)) - (define (I X Y Z) - (logxor Y (logior X (lognot Z)))) - (define (T i) - (inexact->exact (floor (* 4294967296 (abs (sin i)))))) - (define (number->u32 n) - "Cut off all bits that do not fit in a uint32." - (bit-extract n 0 32)) - (define (lsh32 n count) - (number->u32 (logior (ash n count) - (bit-extract n (- 32 count) 32)))) - (if (not (null? to-digest)) - (let* ((block (u8-list->bytevector - (list-head to-digest (/ 512 8)))) - (X (lambda (j) (bytevector-u32-ref - block (* 4 j) (endianness little)))) - (do-round1 - (lambda (A B C D) - (define (operation a b c d k s i) - (number->u32 - (+ b (lsh32 (+ a (F b c d) (X k) (T i)) s)))) - (let* ((A (operation A B C D 0 7 1)) - (D (operation D A B C 1 12 2)) - (C (operation C D A B 2 17 3)) - (B (operation B C D A 3 22 4)) - (A (operation A B C D 4 7 5)) - (D (operation D A B C 5 12 6)) - (C (operation C D A B 6 17 7)) - (B (operation B C D A 7 22 8)) - (A (operation A B C D 8 7 9)) - (D (operation D A B C 9 12 10)) - (C (operation C D A B 10 17 11)) - (B (operation B C D A 11 22 12)) - (A (operation A B C D 12 7 13)) - (D (operation D A B C 13 12 14)) - (C (operation C D A B 14 17 15)) - (B (operation B C D A 15 22 16))) - (values A B C D)))) - (do-round2 - (lambda (A B C D) - (define (operation a b c d k s i) - (number->u32 - (+ b (lsh32 (+ a (G b c d) (X k) (T i)) s)))) - (let* ((A (operation A B C D 1 5 17)) - (D (operation D A B C 6 9 18)) - (C (operation C D A B 11 14 19)) - (B (operation B C D A 0 20 20)) - (A (operation A B C D 5 5 21)) - (D (operation D A B C 10 9 22)) - (C (operation C D A B 15 14 23)) - (B (operation B C D A 4 20 24)) - (A (operation A B C D 9 5 25)) - (D (operation D A B C 14 9 26)) - (C (operation C D A B 3 14 27)) - (B (operation B C D A 8 20 28)) - (A (operation A B C D 13 5 29)) - (D (operation D A B C 2 9 30)) - (C (operation C D A B 7 14 31)) - (B (operation B C D A 12 20 32))) - (values A B C D)))) - (do-round3 - (lambda (A B C D) - (define (operation a b c d k s i) - (number->u32 - (+ b (lsh32 (+ a (H b c d) (X k) (T i)) s)))) - (let* ((A (operation A B C D 5 4 33)) - (D (operation D A B C 8 11 34)) - (C (operation C D A B 11 16 35)) - (B (operation B C D A 14 23 36)) - (A (operation A B C D 1 4 37)) - (D (operation D A B C 4 11 38)) - (C (operation C D A B 7 16 39)) - (B (operation B C D A 10 23 40)) - (A (operation A B C D 13 4 41)) - (D (operation D A B C 0 11 42)) - (C (operation C D A B 3 16 43)) - (B (operation B C D A 6 23 44)) - (A (operation A B C D 9 4 45)) - (D (operation D A B C 12 11 46)) - (C (operation C D A B 15 16 47)) - (B (operation B C D A 2 23 48))) - (values A B C D)))) - (do-round4 - (lambda (A B C D) - (define (operation a b c d k s i) - (number->u32 - (+ b (lsh32 (+ a (I b c d) (X k) (T i)) s)))) - (let* ((A (operation A B C D 0 6 49)) - (D (operation D A B C 7 10 50)) - (C (operation C D A B 14 15 51)) - (B (operation B C D A 5 21 52)) - (A (operation A B C D 12 6 53)) - (D (operation D A B C 3 10 54)) - (C (operation C D A B 10 15 55)) - (B (operation B C D A 1 21 56)) - (A (operation A B C D 8 6 57)) - (D (operation D A B C 15 10 58)) - (C (operation C D A B 6 15 59)) - (B (operation B C D A 13 21 60)) - (A (operation A B C D 4 6 61)) - (D (operation D A B C 11 10 62)) - (C (operation C D A B 2 15 63)) - (B (operation B C D A 9 21 64))) - (values A B C D))))) - (let*-values (((A B C D) (values AA BB CC DD)) - ((A B C D) (do-round1 A B C D)) - ((A B C D) (do-round2 A B C D)) - ((A B C D) (do-round3 A B C D)) - ((A B C D) (do-round4 A B C D))) - (hash (number->u32 (+ A AA)) - (number->u32 (+ B BB)) - (number->u32 (+ C CC)) - (number->u32 (+ D DD)) - (list-tail to-digest (/ 512 8))))) - ;; we’re done: - (u8-list->bytevector - (append - (bytevector->u8-list (u32vector AA)) - (bytevector->u8-list (u32vector BB)) - (bytevector->u8-list (u32vector CC)) - (bytevector->u8-list (u32vector DD))))))) - -(define* (make-version-3-uuid namespace-uuid str #:optional (prefix "urn:uuid:")) - "Generates a UUID string by computing the MD5 hash of NAMESPACE-UUID -and STR. NAMESPACE-UUID must be a bytevector consisting of the UUID’s -bytes, *not* the UUID’s string representation." - (define (half-byte->hex-char number) - "Returns the corresponding hexadecimal digit for a number NUMBER -between 0 and 15." - (case number - ((0) #\0) - ((1) #\1) - ((2) #\2) - ((3) #\3) - ((4) #\4) - ((5) #\5) - ((6) #\6) - ((7) #\7) - ((8) #\8) - ((9) #\9) - ((10) #\a) - ((11) #\b) - ((12) #\c) - ((13) #\d) - ((14) #\e) - ((15) #\f))) - (define (byte->hex-string bv index) - "Convert the byte at INDEX of bytevector BV to a hex string." - (let ((byte (bytevector-u8-ref bv index))) - (string (half-byte->hex-char (quotient byte 16)) - (half-byte->hex-char (modulo byte 16))))) - (let ((md5 (bytevector->md5 - (u8-list->bytevector - (append (bytevector->u8-list namespace-uuid) - (bytevector->u8-list (string->utf8 str))))))) - (string-append prefix - ;; time_low field: - (byte->hex-string md5 0) - (byte->hex-string md5 1) - (byte->hex-string md5 2) - (byte->hex-string md5 3) - "-" - ;; time_mid field: - (byte->hex-string md5 4) - (byte->hex-string md5 5) - "-" - ;; time_hi_and_version field: - (let ((byte (bytevector-u8-ref md5 6))) - (string (half-byte->hex-char 3) ; UUID version 3 - (half-byte->hex-char (modulo byte 16)))) - (byte->hex-string md5 7) - "-" - ;; clock_seq_hi_and_reserved field: - (let ((byte (bytevector-u8-ref md5 8))) - (string (half-byte->hex-char - (logior #b1000 ; most significant bits are 10 - (bit-extract (quotient byte 16) 0 2))) - (half-byte->hex-char (modulo byte 16)))) - ;; clock_seq_low field: - (byte->hex-string md5 9) - "-" - ;; node field: - (byte->hex-string md5 10) - (byte->hex-string md5 11) - (byte->hex-string md5 12) - (byte->hex-string md5 13) - (byte->hex-string md5 14) - (byte->hex-string md5 15)))) |