aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xexamples/genbank.scm3
-rwxr-xr-xexamples/genelist.scm2
-rwxr-xr-xexamples/generif-old.scm241
-rwxr-xr-xexamples/generif.scm246
-rwxr-xr-xexamples/probeset.scm205
-rwxr-xr-xexamples/schema.scm70
-rwxr-xr-xgenerate-ttl-files.scm169
-rw-r--r--transform/schema.scm (renamed from transform/schema-dump.scm)12
-rw-r--r--transform/special-forms.scm70
-rw-r--r--transform/uuid.scm234
10 files changed, 574 insertions, 678 deletions
diff --git a/examples/genbank.scm b/examples/genbank.scm
index 391cff0..c83643c 100755
--- a/examples/genbank.scm
+++ b/examples/genbank.scm
@@ -10,8 +10,7 @@
(transform strings)
(transform sql)
(transform triples)
- (transform special-forms)
- (transform uuid))
+ (transform special-forms))
diff --git a/examples/genelist.scm b/examples/genelist.scm
index 9c1ced0..8729626 100755
--- a/examples/genelist.scm
+++ b/examples/genelist.scm
@@ -18,6 +18,8 @@
(tables (GeneList
(left-join Species "USING (SpeciesId)")))
(schema-triples
+ (gnc:GeneSymbol a rdfs:Class)
+ (gnc:GeneSymbol rdfs:label "A gene symbol")
(gnt:gene rdfs:domain gnc:GeneSymbol)
(gnt:belongsToSpecies rdfs:domain gnc:GeneSymbol)
(gnc:Gene a rdfs:Class)
diff --git a/examples/generif-old.scm b/examples/generif-old.scm
new file mode 100755
index 0000000..ede5a28
--- /dev/null
+++ b/examples/generif-old.scm
@@ -0,0 +1,241 @@
+#! /usr/bin/env guile
+!#
+
+(use-modules (srfi srfi-1)
+ (srfi srfi-26)
+ (rnrs bytevectors)
+ (ice-9 format)
+ (ice-9 getopt-long)
+ (ice-9 match)
+ (ice-9 regex)
+ (transform strings)
+ (transform sql)
+ (transform triples)
+ (transform special-forms))
+
+
+
+(define (fix-email-id email)
+ (string-delete #\space email))
+
+(define (investigator-attributes->id first-name last-name email)
+ ;; There is just one record corresponding to "Evan Williams" which
+ ;; does not have an email ID. To accommodate that record, we
+ ;; construct the investigator ID from not just the email ID, but
+ ;; also the first and the last names. It would be preferable to just
+ ;; find Evan Williams' email ID and insert it into the database.
+ (string->identifier "investigator"
+ (string-join
+ (list first-name last-name (fix-email-id email))
+ "_")))
+
+
+
+(define-transformer genewiki-symbols
+ (tables (GeneRIF_BASIC)
+ "GROUP BY BINARY symbol")
+ (triples
+ (string->identifier
+ "symbol"
+ (regexp-substitute/global #f "[^A-Za-z0-9:]"
+ (field GeneRIF_BASIC symbol)
+ 'pre "_" 'post)
+ #:proc (lambda (x) x))
+ (set rdfs:label
+ (field GeneRIF_BASIC symbol))))
+
+;; Some symbols exist in the RIF table that don't exist in the GeneRIF
+;; table.
+(define-transformer generif-symbols
+ (tables (GeneRIF)
+ "WHERE symbol NOT IN (SELECT symbol from GeneRIF_BASIC) GROUP BY BINARY symbol")
+ (triples
+ (string->identifier
+ "symbol"
+ (regexp-substitute/global #f "[^A-Za-z0-9:]"
+ (field GeneRIF symbol)
+ 'pre "_" 'post)
+ #:proc (lambda (x) x))
+ (set rdfs:label
+ (field GeneRIF symbol))))
+
+(define-transformer gn-genewiki-entries
+ (tables (GeneRIF
+ (left-join Species "ON Species.SpeciesId = GeneRIF.SpeciesId")
+ (left-join GeneRIFXRef "ON GeneRIFXRef.GeneRIFId = GeneRIF.Id")
+ (left-join GeneCategory "ON GeneRIFXRef.GeneCategoryId = GeneCategory.Id")
+ (left-join Investigators "ON Investigators.Email = GeneRIF.email"))
+ "WHERE GeneRIF.display > 0 AND GeneRIF.VersionId = 0 AND GeneRIF.comment IS NOT NULL GROUP BY GeneRIF.comment, BINARY GeneRIF.symbol")
+ (schema-triples
+ (gnc:GeneWikiEntry a rdfs:Class)
+ (gnc:GNWikiEntry rdfs:subClassOf gnc:GeneWikiEntry)
+ (gnc:GNWikiEntry rdfs:comment "Represents GeneRIF Entries entered from GeneNetwork")
+ (gnt:geneSymbol rdfs:domain gnc:GNWikiEntry))
+ (triples
+ (string->identifier
+ "symbol"
+ (regexp-substitute/global
+ #f "[^A-Za-z0-9:]"
+ (field GeneRIF symbol)
+ 'pre "_" 'post)
+ #:proc (lambda (x) x))
+ (set rdfs:comment
+ (let* ([generif-comment (sanitize-rdf-string (field GeneRIF comment))]
+ [create-time (field GeneRIF createtime EntryCreateTime)]
+ [pmid (field GeneRIF PubMed_ID PMID)]
+ [web-url (field GeneRIF weburl)]
+ [species (string->identifier
+ ""
+ (remap-species-identifiers (field Species Fullname))
+ #:separator ""
+ #:proc string-capitalize-first)]
+ [categories
+ (remove (lambda (x)
+ (or (eq? x #f)
+ (and (string? x)
+ (string-null? x))))
+ (remove-duplicates
+ (string-split-substring
+ (field ("GROUP_CONCAT(DISTINCT GeneCategory.Name SEPARATOR '$$')"
+ GeneCategory))
+ "$$")))])
+ (string->symbol
+ (string-append
+ "[ "
+ (format #f "rdf:type gnc:GNWikiEntry ; ")
+ (if (string? species)
+ ""
+ (format #f "gnt:belongsToSpecies ~a ; "
+ species))
+ (format #f "rdfs:comment ~s^^xsd:string ; "
+ generif-comment)
+ (if (string? create-time)
+ ""
+ (format #f "dct:created ~s^^xsd:datetime ; "
+ (time-unix->string
+ create-time "~5")))
+ (if (and (string? pmid) (not (string-null? pmid)))
+ (format #f
+ "~{dct:references pubmed:~a ; ~}"
+ (string-split pmid #\space))
+ "")
+ (if (and (not (string-null?
+ (string-trim-both (field GeneRIF email))))
+ (not (string-null? (field Investigators Email))))
+ (format #f "dct:creator ~a ; "
+ (investigator-attributes->id
+ (field Investigators FirstName)
+ (field Investigators LastName)
+ (field Investigators Email)))
+ "")
+ (if (not (null? categories))
+ (format #f
+ "~{gnt:belongsToCategory ~s ; ~}"
+ categories)
+ "")
+ (if (and (string? web-url) (not (string-null? web-url)))
+ (format #f "foaf:homepage ~s ; "
+ web-url)
+ "")
+ " ] "))))))
+
+(define-transformer ncbi-genewiki-entries
+ (tables (GeneRIF_BASIC
+ (left-join Species "USING (SpeciesId)"))
+ "WHERE GeneRIF_BASIC.comment IS NOT NULL AND TRIM(GeneRIF_BASIC.comment) != '' AND TRIM(GeneRIF_BASIC.symbol) != '' GROUP BY GeneRIF_BASIC.comment, GeneRIF_BASIC.createtime, GeneRIF_BASIC.VersionId, GeneRIF_BASIC.SpeciesId, GeneRIF_BASIC.TaxID")
+ (schema-triples
+ (gnc:NCBIWikiEntry rdfs:subClassOf gnc:GeneWikiEntry)
+ (gnc:NCBIWikiEntry rdfs:comment "Represents GeneRIF Entries obtained from NCBI")
+ (gnt:hasVersionId a owl:ObjectProperty)
+ (gnt:hasVersionId rdfs:domain gnc:NCBIWikiEntry)
+ (gnt:hasVersionId skos:definition "The VersionId of this this resource"))
+ (triples
+ (string->identifier
+ "symbol"
+ (regexp-substitute/global #f "[^A-Za-z0-9:]"
+ (field GeneRIF_BASIC symbol GeneRIFSymbol)
+ 'pre "_" 'post)
+ #:proc (lambda (x) x))
+ (set rdfs:comment
+ (let ([ncbi-comment (sanitize-rdf-string (field GeneRIF_BASIC comment))]
+ [species-name
+ (string->identifier
+ ""
+ (remap-species-identifiers (field Species Fullname SpeciesFullName))
+ #:separator ""
+ #:proc string-capitalize-first)]
+ [taxonomic-id (field GeneRIF_BASIC TaxID TaxonomicId)]
+ [create-time (field GeneRIF_BASIC createtime EntryCreateTime)]
+ [pmid (field GeneRIF_BASIC PubMed_ID PMID)]
+ [gene-id (field GeneRIF_BASIC GeneId)]
+ [version-id (field GeneRIF_BASIC VersionId)])
+ (string->symbol
+ (string-append
+ "[ "
+ (format #f "rdf:type gnc:NCBIWikiEntry ; ")
+ (format #f "rdfs:comment ~s^^xsd:string ; "
+ ncbi-comment)
+ (format #f "gnt:belongsToSpecies ~a ; "
+ species-name)
+ (if (eq? #f taxonomic-id)
+ ""
+ (format #f "skos:notation taxon:~a ; "
+ taxonomic-id))
+ (format #f "gnt:hasGeneId generif:~a ; "
+ gene-id)
+ (format #f "gnt:hasVersionId '~a'^^xsd:integer ; "
+ version-id)
+ (if (and (string? pmid) (not (string-null? pmid)))
+ (format #f
+ "~{dct:references pubmed:~a ; ~}"
+ (string-split pmid #\space))
+ "")
+ (if (string? create-time)
+ ""
+ (format #f "dct:created ~s^^xsd:datetime ; "
+ (time-unix->string
+ create-time "~5")))
+ " ]"))))))
+
+
+
+(let* ((option-spec
+ '((settings (single-char #\s) (value #t))
+ (output (single-char #\o) (value #t))
+ (documentation (single-char #\d) (value #t))))
+ (options (getopt-long (command-line) option-spec))
+ (settings (option-ref options 'settings #f))
+ (output (option-ref options 'output #f))
+ (documentation (option-ref options 'documentation #f))
+ (%connection-settings
+ (call-with-input-file settings
+ read)))
+
+ (with-documentation
+ (name "GeneRIF Metadata")
+ (connection %connection-settings)
+ (table-metadata? #f)
+ (prefixes
+ '(("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>")
+ ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>")
+ ("skos:" "<http://www.w3.org/2004/02/skos/core#>")
+ ("xkos:" "<http://rdf-vocabulary.ddialliance.org/xkos#>")
+ ("gn:" "<http://genenetwork.org/id/>")
+ ("gnc:" "<http://genenetwork.org/category/>")
+ ("gnt:" "<http://genenetwork.org/term/>")
+ ("dct:" "<http://purl.org/dc/terms/>")
+ ("foaf:" "<http://xmlns.com/foaf/0.1/>")
+ ("pubmed:" "<http://rdf.ncbi.nlm.nih.gov/pubmed/>")
+ ("taxon:" "<https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=>")
+ ("generif:" "<http://www.ncbi.nlm.nih.gov/gene?cmd=Retrieve&dopt=Graphics&list_uids=>")
+ ("xsd:" "<http://www.w3.org/2001/XMLSchema#>")
+ ("owl:" "<http://www.w3.org/2002/07/owl#>")))
+ (inputs
+ (list
+ genewiki-symbols
+ generif-symbols
+ gn-genewiki-entries
+ ncbi-genewiki-entries))
+ (outputs
+ `(#:documentation ,documentation
+ #:rdf ,output))))
diff --git a/examples/generif.scm b/examples/generif.scm
index 11235e0..628e34e 100755
--- a/examples/generif.scm
+++ b/examples/generif.scm
@@ -11,45 +11,17 @@
(transform strings)
(transform sql)
(transform triples)
- (transform special-forms)
- (transform uuid))
+ (transform special-forms))
-(define-transformer genewiki-symbols
- (tables (GeneRIF_BASIC)
- "GROUP BY BINARY symbol")
- (triples
- (string->identifier
- "symbol"
- (regexp-substitute/global #f "[^A-Za-z0-9:]"
- (field GeneRIF_BASIC symbol)
- 'pre "_" 'post)
- #:proc (lambda (x) x))
- (set rdfs:label
- (field GeneRIF_BASIC symbol))))
-
-;; Some symbols exist in the RIF table that don't exist in the GeneRIF
-;; table.
-(define-transformer generif-symbols
- (tables (GeneRIF)
- "WHERE symbol NOT IN (SELECT symbol from GeneRIF_BASIC) GROUP BY BINARY symbol")
- (triples
- (string->identifier
- "symbol"
- (regexp-substitute/global #f "[^A-Za-z0-9:]"
- (field GeneRIF symbol)
- 'pre "_" 'post)
- #:proc (lambda (x) x))
- (set rdfs:label
- (field GeneRIF symbol))))
-
(define-transformer gn-genewiki-entries
(tables (GeneRIF
(left-join Species "ON Species.SpeciesId = GeneRIF.SpeciesId")
(left-join GeneRIFXRef "ON GeneRIFXRef.GeneRIFId = GeneRIF.Id")
(left-join GeneCategory "ON GeneRIFXRef.GeneCategoryId = GeneCategory.Id"))
- "WHERE GeneRIF.display > 0 AND GeneRIF.comment IS NOT NULL GROUP BY GeneRIF.Id, GeneRIF.versionId, GeneRIF.symbol, GeneRIF.SpeciesId, GeneRIF.createtime, GeneRIF.reason")
+ "WHERE GeneRIF.display > 0 AND GeneRIF.comment IS NOT NULL
+GROUP BY GeneRIF.Id, GeneRIF.versionId, GeneRIF.symbol")
(schema-triples
(gnc:GeneWikiEntry a rdfs:Class)
(gnc:GNWikiEntry rdfs:subClassOf gnc:GeneWikiEntry)
@@ -62,130 +34,108 @@
(gnc:GNWikiEntry rdfs:comment "Represents GeneRIF Entries entered from GeneNetwork")
(gnt:geneSymbol rdfs:domain gnc:GNWikiEntry))
(triples
- (string->identifier
- "symbol"
- (regexp-substitute/global
- #f "[^A-Za-z0-9:]"
- (field GeneRIF symbol)
- 'pre "_" 'post)
- #:proc (lambda (x) x))
- (set rdfs:comment
- (let* ((generif-comment (sanitize-rdf-string (field GeneRIF comment)))
- (create-time (field GeneRIF createtime EntryCreateTime))
- (pmid (field GeneRIF PubMed_ID PMID))
- (web-url (field GeneRIF weburl))
- (species (string->identifier
- ""
- (remap-species-identifiers (field Species Fullname))
- #:separator ""
- #:proc string-capitalize-first))
- (version-id (field GeneRIF versionId))
- (identifier (field GeneRIF Id))
- (initial (sanitize-rdf-string (field GeneRIF initial)))
- (reason (field GeneRIF reason))
- (email (sanitize-rdf-string (field GeneRIF email)))
- (category
- (field ("GROUP_CONCAT(DISTINCT GeneCategory.Name SEPARATOR '; ')"
- GeneCategory))))
- (string->symbol
- (string-append
- "[ "
- (format #f "rdf:type gnc:GNWikiEntry ; ")
- (if (string? species)
- ""
- (format #f "gnt:belongsToSpecies ~a ; "
- species))
- (format #f "rdfs:comment ~s^^xsd:string ; "
- generif-comment)
- (if (string? create-time)
- ""
- (format #f "dct:created ~s^^xsd:datetime ; "
- (time-unix->string
- create-time "~5")))
- (if (and (string? pmid) (not (string-null? pmid)))
- (format #f
- "~{dct:references pubmed:~a ; ~}"
- (string-split pmid #\space))
- "")
- (if (string-blank? email)
- ""
- (format #f "foaf:mbox ~s ; " email))
- (format #f "dct:identifier ~s ; " identifier)
- (format #f "dct:hasVersion \"~s\"^^xsd:int ; " version-id)
- (if (string-blank? reason)
- ""
- (format #f "gnt:reason ~s ; " reason))
- (if (or (null? initial)
- (string-blank? initial))
- "" (format #f "gnt:initial ~s ; " initial))
- (if (string-blank? category)
- ""
- (format #f
- "gnt:belongsToCategory ~s ; "
- category))
- (if (and (string? web-url) (not (string-null? web-url)))
- (format #f "foaf:homepage ~s ; "
- web-url)
- "")
- " ] "))))))
+ (format
+ #f "gn:wiki-~a-~a"
+ (field GeneRIF Id)
+ (field GeneRIF versionId))
+ (set rdfs:label (string->symbol
+ (format #f "'~a'@en"
+ (replace-substrings
+ (sanitize-rdf-string
+ (field GeneRIF comment))
+ '(("'" . "\\'"))))))
+ (set rdf:type 'gnc:GNWikiEntry)
+ (set gnt:symbol (field GeneRIF symbol))
+ (set gnt:belongsToSpecies (string->identifier
+ ""
+ (remap-species-identifiers (field Species Fullname))
+ #:separator ""
+ #:proc string-capitalize-first))
+ (set dct:created
+ (string->symbol
+ (format #f "~s^^xsd:datetime "
+ (field
+ ("CAST(createtime AS CHAR)" EntryCreateTime)))))
+ (multiset dct:references
+ (map (lambda (pmid)
+ (match pmid
+ ((? string-blank? p) "")
+ (p (string->symbol
+ (format #f "pubmed:~a" (string-trim-both pmid))))))
+ (string-split (field GeneRIF PubMed_ID PMID)
+ #\space)))
+ (set foaf:mbox
+ (match (sanitize-rdf-string (field GeneRIF email))
+ ((? string-blank? mbox) "")
+ (mbox (string->symbol
+ (format #f "<~a>" mbox)))))
+ (set dct:identifier (annotate-field (format #f "~s" (field GeneRIF Id))
+ '^^xsd:integer))
+ (set foaf:homepage
+ (match (sanitize-rdf-string (field GeneRIF weburl))
+ ((? string-blank? homepage) "")
+ (homepage (string->symbol
+ (format #f "<~a>" homepage)))))
+ (set dct:hasVersion (annotate-field (format #f "~s" (field GeneRIF versionId))
+ '^^xsd:integer))
+ (set gnt:initial (sanitize-rdf-string (field GeneRIF initial)))
+ (set gnt:reason (field GeneRIF reason))
+ (multiset gnt:belongsToCategory
+ (string-split
+ (field ("GROUP_CONCAT(DISTINCT GeneCategory.Name SEPARATOR ';')"
+ GeneCategory))
+ #\;))))
(define-transformer ncbi-genewiki-entries
(tables (GeneRIF_BASIC
- (left-join Species "USING (SpeciesId)"))
- "WHERE GeneRIF_BASIC.comment IS NOT NULL AND TRIM(GeneRIF_BASIC.comment) != '' AND TRIM(GeneRIF_BASIC.symbol) != '' GROUP BY GeneRIF_BASIC.comment, GeneRIF_BASIC.createtime, GeneRIF_BASIC.VersionId, GeneRIF_BASIC.SpeciesId, GeneRIF_BASIC.TaxID")
+ (left-join Species "USING (SpeciesId)")))
(schema-triples
(gnc:NCBIWikiEntry rdfs:subClassOf gnc:GeneWikiEntry)
- (gnc:NCBIWikiEntry rdfs:comment "Represents GeneRIF Entries obtained from NCBI")
- (gnt:hasVersionId a owl:ObjectProperty)
- (gnt:hasVersionId rdfs:domain gnc:NCBIWikiEntry)
- (gnt:hasVersionId skos:definition "The VersionId of this this resource"))
+ (gnc:NCBIWikiEntry rdfs:comment "Represents GeneRIF Entries obtained from NCBI"))
(triples
- (string->identifier
- "symbol"
- (regexp-substitute/global #f "[^A-Za-z0-9:]"
- (field GeneRIF_BASIC symbol GeneRIFSymbol)
- 'pre "_" 'post)
- #:proc (lambda (x) x))
- (set rdfs:comment
- (let ([ncbi-comment (sanitize-rdf-string (field GeneRIF_BASIC comment))]
- [species-name
- (string->identifier
- ""
- (remap-species-identifiers (field Species Fullname SpeciesFullName))
- #:separator ""
- #:proc string-capitalize-first)]
- [taxonomic-id (field GeneRIF_BASIC TaxID TaxonomicId)]
- [create-time (field GeneRIF_BASIC createtime EntryCreateTime)]
- [pmid (field GeneRIF_BASIC PubMed_ID PMID)]
- [gene-id (field GeneRIF_BASIC GeneId)]
- [version-id (field GeneRIF_BASIC VersionId)])
+ (format
+ #f "gn:rif-~a-~a-~a-~a"
+ (field GeneRIF_BASIC GeneId)
+ (field GeneRIF_BASIC PubMed_ID)
+ (field
+ ("DATE_FORMAT(createtime, '%Y-%m-%dT%T')" CreateTime))
+ (field GeneRIF_BASIC VersionId))
+ (set rdf:type
+ (let* ((comment (format #f "'~a'@en"
+ (replace-substrings
+ (sanitize-rdf-string
+ (field GeneRIF_BASIC comment))
+ '(("\\" . "\\\\")
+ ("\n" . "\\n")
+ ("\r" . "\\r")
+ ("'" . "\\'")))))
+ (create-time (format #f "~s^^xsd:datetime"
+ (field
+ ("CAST(createtime AS CHAR)" EntryCreateTime))))
+ (symbol (field GeneRIF_BASIC symbol))
+ (species (string->identifier
+ ""
+ (remap-species-identifiers (field Species Fullname))
+ #:separator ""
+ #:proc string-capitalize-first))
+ (gene-id (field GeneRIF_BASIC GeneId))
+ (taxon-id (field GeneRIF_BASIC TaxID TaxonomicId))
+ (pmid (field GeneRIF_BASIC PubMed_ID))
+ (version-id (field GeneRIF_BASIC versionId)))
(string->symbol
(string-append
- "[ "
- (format #f "rdf:type gnc:NCBIWikiEntry ; ")
- (format #f "rdfs:comment ~s^^xsd:string ; "
- ncbi-comment)
- (format #f "gnt:belongsToSpecies ~a ; "
- species-name)
- (if (eq? #f taxonomic-id)
- ""
- (format #f "skos:notation taxon:~a ; "
- taxonomic-id))
- (format #f "gnt:hasGeneId generif:~a ; "
- gene-id)
- (format #f "dct:hasVersion '~a'^^xsd:int ; "
- version-id)
- (if (and (string? pmid) (not (string-null? pmid)))
- (format #f
- "~{dct:references pubmed:~a ; ~}"
- (string-split pmid #\space))
- "")
- (if (string? create-time)
- ""
- (format #f "dct:created ~s^^xsd:datetime ; "
- (time-unix->string
- create-time "~5")))
- " ]"))))))
+ (format #f "gnc:NCBIWikiEntry ;\n")
+ (format #f "\trdfs:label ~a ;\n" comment)
+ (format #f "\tgnt:belongsToSpecies ~a ;\n" species)
+ (format #f "\tgnt:symbol ~s ;\n" symbol)
+ (format #f "\tgnt:hasGeneId generif:~a ;\n" gene-id)
+ (match taxon-id
+ ((? number? x)
+ (format #f "\tskos:notation taxon:~a ;\n" taxon-id))
+ (else ""))
+ (format #f "\tdct:hasVersion \"~a\"^^xsd:integer ;\n" version-id)
+ (format #f "\tdct:references pubmed:~a ;\n" pmid)
+ (format #f "\tdct:created ~a" create-time)))))))
@@ -222,8 +172,6 @@
("owl:" "<http://www.w3.org/2002/07/owl#>")))
(inputs
(list
- genewiki-symbols
- generif-symbols
gn-genewiki-entries
ncbi-genewiki-entries))
(outputs
diff --git a/examples/probeset.scm b/examples/probeset.scm
deleted file mode 100755
index caf81aa..0000000
--- a/examples/probeset.scm
+++ /dev/null
@@ -1,205 +0,0 @@
-#! /usr/bin/env guile
-!#
-
-(use-modules (srfi srfi-1)
- (srfi srfi-26)
- (ice-9 format)
- (ice-9 getopt-long)
- (ice-9 match)
- (ice-9 regex)
- (transform strings)
- (transform sql)
- (transform triples)
- (transform special-forms)
- (web uri))
-
-
-(define-transformer probeset
- (tables (ProbeSet
- (left-join GeneChip "ON GeneChip.Id = ProbeSet.ChipId")
- (left-join Species "ON GeneChip.SpeciesId = Species.Id"))
- "WHERE ProbeSet.Name IS NOT NULL")
- (schema-triples
- (gnc:omimLink rdfs:Class gnc:ResourceLink)
- (gnc:omimLink rdfs:label "OMIM")
- (gnc:omimLink rdfs:comments "Summary from On Mendelion Inheritance in Man")
- (gnc:homologeneLink rdfs:Class gnc:ResourceLink)
- (gnc:homologeneLink rdfs:label "HomoloGene")
- (gnc:homologeneLink rdfs:comments "Find similar genes in other species")
- (gnc:uniprot a owl:ObjectProperty)
- (gnc:uniprot rdfs:label "UniProt")
- (gnc:uniprot rdfs:comments "UniProt resource")
- (gnt:hasChip a owl:ObjectProperty)
- (gnt:hasChip rdfs:domain gnc:Probeset)
- (gnt:hasTargetId a owl:ObjectProperty)
- (gnt:hasTargetId rdfs:domain gnc:Probeset)
- (gnt:geneSymbol rdfs:domain gnc:Probeset)
- (gnt:location rdfs:domain gnc:ProbeSet)
- (gnt:location a owl:ObjectProperty)
- (gnt:strandPosition rdfs:domain gnc:ProbeSet)
- (gnt:strandPosition a owl:ObjectProperty)
- (gnt:targetsRegion a owl:ObjectProperty)
- (gnt:targetsRegion rdfs:domain gnc:Probeset)
- (gnt:chr rdfs:domain gnc:Probeset)
- (gnt:mb rdfs:domain gnc:Probeset)
- (gnt:hasSpecificity a owl:ObjectProperty)
- (gnt:hasSpecificity rdfs:domain gnc:Probeset)
- (gnt:hasBlatScore a owl:ObjectProperty)
- (gnt:hasBlatScore rdfs:domain gnc:Probeset)
- (gnt:hasBlatMbStart a owl:ObjectProperty)
- (gnt:hasBlatMbStart rdfs:domain gnc:Probeset)
- (gnt:hasBlatMbEnd a owl:ObjectProperty)
- (gnt:hasBlatMbEnd rdfs:domain gnc:Probeset)
- (gnt:hasBlatSeq a owl:ObjectProperty)
- (gnt:hasBlatSeq rdfs:domain gnc:Probeset)
- (gnt:hasTargetSeq a owl:ObjectProperty)
- (gnt:hasTargetSeq rdfs:domain gnc:Probeset))
- (triples
- (let ((id (field ("IF(NULLIF(TRIM(ProbeSet.Name), '') IS NULL, '', TRIM(ProbeSet.Name))"
- ProbeSetIdName)))
- (probeset-id (field ProbeSet Id)))
- (string->identifier
- "probeset"
- (if (string-null? id)
- (number->string probeset-id)
- (regexp-substitute/global
- #f "[^A-Za-z0-9:]"
- id
- 'pre "_" 'post))))
- (set rdf:type 'gnc:Probeset)
- (set rdfs:label (field ProbeSet Name))
- (set skos:altLabel
- (replace-substrings
- (field ProbeSet alias)
- '(("\r\n" . "; "))))
- (set gnt:hasChip
- (string->identifier
- "platform"
- (field ("IFNULL(GeneChip.Name, '')" GeneChipName))))
- (set gnt:hasTargetId
- (field ("NULLIF(TRIM(ProbeSet.TargetId), '')"
- TargetId)))
- (multiset gnt:geneSymbol
- (map string-trim (string-split
- (field ProbeSet Symbol)
- #\,)))
- (set dct:description (sanitize-rdf-string (field ProbeSet description)))
- (set gnt:targetsRegion
- (sanitize-rdf-string
- (field ("NULLIF(TRIM(ProbeSet.Probe_set_target_region), '')"
- Probe_set_target_region))))
- (set gnt:chr (field ProbeSet Chr))
- (set gnt:mb (annotate-field (field ("IFNULL(ProbeSet.Mb, '')" Mb)) '^^xsd:double))
- (set gnt:location
- (let* ((mb (field ProbeSet Mb))
- (chr (field ProbeSet Chr))
- (strand-probe (field ProbeSet Strand_Probe))
- (location (list chr mb)))
- (match location
- (("Un" mb)
- (format #f "Not available"))
- ((chr "")
- (if (string-blank? chr)
- (format #f "Not available")
- (format #f "Chr ~a @ Unknown position ~a~:[~;~a~]"
- chr mb
- (and (string? strand-probe) (or (string=? "+" strand-probe)
- (string=? "-" strand-probe)))
- (cond ((string=? "+" strand-probe)
- "on the plus strand")
- ((string=? "-" strand-probe)
- "on the minus strand")
- (else "")))))
- (_
- (format #f "Chr ~a @ ~a Mb ~:[~;~a~]"
- chr mb
- (and (string? strand-probe) (or (string=? "+" strand-probe)
- (string=? "-" strand-probe)))
- (cond ((string=? "+" strand-probe)
- "on the plus strand")
- ((string=? "-" strand-probe)
- "on the minus strand")
- (else "")))))))
- (set gnt:hasGeneId
- (field ProbeSet GeneId))
- ;; OMIM Link
- (set dct:references
- (let ((omim (field ProbeSet OMIM)))
- (if (not (string-blank? omim))
- (string->symbol
- (format #f
- "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
- "http://www.ncbi.nlm.nih.gov/omim/"
- (uri-encode omim)
- "a gnc:omimLink"))
- "")))
- ;; Homologene Link
- (set dct:references
- (let ((homologene (field ProbeSet HomoloGeneID)))
- (if (not (string-blank? homologene))
- (string->symbol
- (format #f
- "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
- "http://www.ncbi.nlm.nih.gov/homologene/?term="
- (uri-encode homologene)
- "a gnc:homologeneLink"))
- "")))
- (set gnt:uniprot
- (ontology 'uniprot: (field ProbeSet UniProtID)))
- (set gnt:strandProbe
- (field ProbeSet Strand_Probe))
- (set gnt:hasSpecificity
- (field ("IFNULL(ProbeSet.Probe_set_specificity, '')"
- Probe_set_specificity)))
- (set gnt:hasBlatScore
- (field ("IFNULL(ProbeSet.Probe_set_BLAT_score, '')"
- Probe_set_BLAT_score)))
- (set gnt:hasBlatMbStart
- (annotate-field (field ("IFNULL(ProbeSet.Probe_set_Blat_Mb_start, '')"
- Probe_set_Blat_Mb_start))
- '^^xsd:double))
- (set gnt:hasBlatMbEnd
- (annotate-field (field ("IFNULL(ProbeSet.Probe_set_Blat_Mb_end, '')"
- Probe_set_Blat_Mb_end))
- '^^xsd:double))
- (set gnt:hasBlatSeq (sanitize-rdf-string (field ProbeSet BlatSeq)))
- (set gnt:hasTargetSeq (sanitize-rdf-string (field ProbeSet TargetSeq)))))
-
-
-
-
-(let* ((option-spec
- '((settings (single-char #\s) (value #t))
- (output (single-char #\o) (value #t))
- (documentation (single-char #\d) (value #t))))
- (options (getopt-long (command-line) option-spec))
- (settings (option-ref options 'settings #f))
- (output (option-ref options 'output #f))
- (documentation (option-ref options 'documentation #f))
- (%connection-settings
- (call-with-input-file settings
- read)))
- (with-documentation
- (name "ProbeSet Metadata")
- (connection %connection-settings)
- (table-metadata? #f)
- (prefixes
- '(("gn:" "<http://genenetwork.org/id/>")
- ("probeset:" "<http://genenetwork.org/probeset/>")
- ("gnc:" "<http://genenetwork.org/category/>")
- ("gene:" "<http://www.ncbi.nlm.nih.gov/gene?cmd=Retrieve&dopt=Graphics&list_uids=>")
- ("gnt:" "<http://genenetwork.org/term/>")
- ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>")
- ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>")
- ("dct:" "<http://purl.org/dc/terms/>")
- ("uniprot:" "<http://purl.uniprot.org/uniprot/>")
- ("owl:" "<http://www.w3.org/2002/07/owl#>")
- ("xsd:" "<http://www.w3.org/2001/XMLSchema#>")
- ("qb:" "<http://purl.org/linked-data/cube#>")
- ("sdmx-measure:" "<http://purl.org/linked-data/sdmx/2009/measure#>")
- ("skos:" "<http://www.w3.org/2004/02/skos/core#>")))
- (inputs
- (list probeset))
- (outputs
- `(#:documentation ,documentation
- #:rdf ,output))))
diff --git a/examples/schema.scm b/examples/schema.scm
new file mode 100755
index 0000000..50cfd6a
--- /dev/null
+++ b/examples/schema.scm
@@ -0,0 +1,70 @@
+#! /usr/bin/env guile
+!#
+
+(use-modules (ice-9 getopt-long)
+ (transform triples)
+ (transform schema)
+ (transform special-forms)
+ (transform sql)
+ (transform table))
+
+(define (call-with-genenetwork-database connection-settings proc)
+ (call-with-database "mysql" (string-join
+ (list (assq-ref connection-settings 'sql-username)
+ (assq-ref connection-settings 'sql-password)
+ (assq-ref connection-settings 'sql-database)
+ "tcp"
+ (assq-ref connection-settings 'sql-host)
+ (number->string
+ (assq-ref connection-settings 'sql-port)))
+ ":")
+ proc))
+
+(define (transform-table-schema connection-settings db)
+ (let ((tables (tables connection-settings db)))
+ (for-each (lambda (table)
+ (let ((table-id (string->identifier
+ "table"
+ ;; We downcase table names in
+ ;; identifiers. So, we distinguish
+ ;; between the user and User tables.
+ (if (string=? (table-name table) "User")
+ "user2"
+ (table-name table)))))
+ (triple table-id 'rdf:type 'gn:sqlTable)
+ (triple table-id 'gn:name (table-name table))
+ (triple table-id 'gn:hasSize (string->symbol (format #f "~a" (table-size table))))
+ (for-each (lambda (column)
+ (let ((column-id (column-id (table-name table)
+ (column-name column))))
+ (triple column-id 'rdf:type 'gn:sqlTableField)
+ (triple column-id 'gn:name (column-name column))
+ (triple column-id 'gn:sqlFieldType (column-type column))
+ (triple table-id 'gn:hasField column-id)))
+ (table-columns table))))
+ tables)))
+
+
+(let* ((option-spec
+ '((settings (single-char #\s) (value #t))
+ (output (single-char #\o) (value #t))
+ (documentation (single-char #\d) (value #t))))
+ (options (getopt-long (command-line) option-spec))
+ (settings (option-ref options 'settings #f))
+ (output (option-ref options 'output #f))
+ (documentation (option-ref options 'documentation #f))
+ (%connection-settings (call-with-input-file settings read)))
+ (call-with-genenetwork-database
+ %connection-settings
+ (lambda (db)
+ (with-output-to-file output
+ (lambda ()
+ (prefix "rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>")
+ (prefix "rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>")
+ (prefix "gn:" "<http://genenetwork.org/id/>")
+ (prefix "gnc:" "<http://genenetwork.org/category/>")
+ (prefix "gnt:" "<http://genenetwork.org/term/>")
+ (prefix "xsd:" "<http://www.w3.org/2001/XMLSchema#>")
+ (prefix "owl:" "<http://www.w3.org/2002/07/owl#>")
+ (newline)
+ (transform-table-schema %connection-settings db))))))
diff --git a/generate-ttl-files.scm b/generate-ttl-files.scm
index 65db03f..7afbe7b 100755
--- a/generate-ttl-files.scm
+++ b/generate-ttl-files.scm
@@ -1,60 +1,127 @@
-#! ./pre-inst-env
+#! /usr/bin/env guile
!#
+
(use-modules (ice-9 format)
- (ice-9 futures)
(ice-9 getopt-long)
- (ice-9 ftw))
+ (ice-9 ftw)
+ (ice-9 regex)
+ (srfi srfi-26)
+ (srfi srfi-34)
+ (srfi srfi-35))
+
+
+;; Copied over from GNU/Guix source tree.
+(define (file-name-predicate regexp)
+ "Return a predicate that returns true when passed a file name whose base
+name matches REGEXP."
+ (let ((file-rx (if (regexp? regexp)
+ regexp
+ (make-regexp regexp))))
+ (lambda (file stat)
+ (regexp-exec file-rx (basename file)))))
+
+(define* (find-files dir #:optional (pred (const #t))
+ #:key (stat lstat)
+ directories?
+ fail-on-error?)
+ "Return the lexicographically sorted list of files under DIR for which PRED
+returns true. PRED is passed two arguments: the absolute file name, and its
+stat buffer; the default predicate always returns true. PRED can also be a
+regular expression, in which case it is equivalent to (file-name-predicate
+PRED). STAT is used to obtain file information; using 'lstat' means that
+symlinks are not followed. If DIRECTORIES? is true, then directories will
+also be included. If FAIL-ON-ERROR? is true, raise an exception upon error."
+ (let ((pred (if (procedure? pred)
+ pred
+ (file-name-predicate pred))))
+ ;; Sort the result to get deterministic results.
+ (sort (file-system-fold (const #t)
+ (lambda (file stat result) ; leaf
+ (if (pred file stat)
+ (cons file result)
+ result))
+ (lambda (dir stat result) ; down
+ (if (and directories?
+ (pred dir stat))
+ (cons dir result)
+ result))
+ (lambda (dir stat result) ; up
+ result)
+ (lambda (file stat result) ; skip
+ result)
+ (lambda (file stat errno result)
+ (format (current-error-port) "find-files: ~a: ~a~%"
+ file (strerror errno))
+ (when fail-on-error?
+ (error "find-files failed"))
+ result)
+ '()
+ dir
+ stat)
+ string<?)))
+
+(define-syntax-rule (warn-on-error expr file)
+ (catch 'system-error
+ (lambda ()
+ expr)
+ (lambda args
+ (format (current-error-port)
+ "warning: failed to delete ~a: ~a~%"
+ file (strerror
+ (system-error-errno args))))))
+
+(define* (delete-file-recursively dir
+ #:key follow-mounts?)
+ "Delete DIR recursively, like `rm -rf', without following symlinks. Don't
+follow mount points either, unless FOLLOW-MOUNTS? is true. Report but ignore
+errors."
+ (let ((dev (stat:dev (lstat dir))))
+ (file-system-fold (lambda (dir stat result) ; enter?
+ (or follow-mounts?
+ (= dev (stat:dev stat))))
+ (lambda (file stat result) ; leaf
+ (warn-on-error (delete-file file) file))
+ (const #t) ; down
+ (lambda (dir stat result) ; up
+ (warn-on-error (rmdir dir) dir))
+ (const #t) ; skip
+ (lambda (file stat errno result)
+ (format (current-error-port)
+ "warning: failed to delete ~a: ~a~%"
+ file (strerror errno)))
+ #t
+ dir
+
+ ;; Don't follow symlinks.
+ lstat)))
(let* ((option-spec
'((settings (single-char #\s) (value #t))
- (output (single-char #\o) (value #t))
- (documentation (single-char #\d) (value #t))))
+ (documentation (single-char #\d) (value #t))
+ (output (single-char #\o) (value #t))))
(options (getopt-long (command-line) option-spec))
(settings (option-ref options 'settings #f))
(output (option-ref options 'output #f))
- (documentation (option-ref options 'documentation #f)))
- (define (enter? name stat result)
- stat result ;ignore
- ;; Skip version control directories if any.
- (not (member (basename name) '(".git" ".svn" "CVS"))))
-
- (define (leaf name stat result)
- stat result ;ignore
- (when (string-suffix? ".scm" name)
- (let* ((base-file-name (basename name ".scm"))
- (cmd (format #f " ~a --settings ~a --output ~a --documentation ~a"
- name
- settings
- (string-append output "/" base-file-name ".ttl")
- (string-append documentation "/" base-file-name ".md"))))
- (touch
- (future
- (begin
- (display (format #f "Running ~a" cmd))
- (display "\n")
- (system cmd)))))))
-
- (define (down name stat result)
- name stat ;ignore
- result)
-
- (define (up name stat result)
- name stat ;ignore
- result)
-
- (define (skip name stat result)
- name stat ;ignore
- result)
-
- ;; Ignore unreadable files/directories but warn the user.
- (define (error name stat errno result)
- stat ;ignore
- (format (current-error-port) "warning: ~a: ~a~%"
- name (strerror errno))
- result)
-
- (file-system-fold enter? leaf down up skip error
- 0 ;initial counter is zero bytes
- "./examples"))
-
-
+ (documentation (option-ref options 'documentation #f))
+ (%source-dir (dirname (current-filename))))
+ (unless (file-exists? output)
+ (mkdir output))
+ ;; Transform data to RDF
+ (for-each (lambda (file)
+ (let* ((base-file-name (basename file ".scm"))
+ (ttl-file (string-append output "/" base-file-name ".ttl")))
+ ;; Ignore dataset-metadata-git.scm because TODO
+ (unless (string=? base-file-name "dataset-metadata-git")
+ (system* "./pre-inst-env" file "--settings" settings
+ "--output" ttl-file))))
+ (find-files "./examples" ".scm"))
+ ;; Copy hand-woven ttl files.
+ (for-each (lambda (file)
+ (copy-file
+ file (format #f "~a/~a" output (basename file))))
+ (find-files "./schema" ".ttl"))
+ ;; Validate transformed turtle files
+ (for-each (lambda (file)
+ (system* "rapper" "--input" "turtle" "--count" file))
+ (append (find-files output ".ttl")
+ (find-files "./schema" ".ttl"))))
diff --git a/transform/schema-dump.scm b/transform/schema.scm
index 18df5da..cdfc834 100644
--- a/transform/schema-dump.scm
+++ b/transform/schema.scm
@@ -4,7 +4,13 @@
#:use-module (transform sql)
#:use-module (transform triples)
#:use-module (transform strings)
- #:use-module (transform table))
+ #:use-module (transform table)
+ #:export (table-fields
+ get-tables-from-comments
+ schema-annotations
+ tables
+ schema
+ data-table))
(define (table-fields db table)
@@ -47,7 +53,7 @@
(for-each (cut table-fields db <>)
(get-tables-from-comments db)))))
-(define (tables db)
+(define (tables connection-settings db)
"Return list of all tables in DB. Each element of the returned list
is a <table> object."
(map (lambda (table)
@@ -68,7 +74,7 @@ is a <table> object."
(information_schema.tables data_length))
(information_schema.tables)
(format #f "WHERE table_schema = '~a'"
- (assq-ref %connection-settings 'sql-database))))))
+ (assq-ref connection-settings 'sql-database))))))
(define (schema db)
(let ((tables (tables db)))
diff --git a/transform/special-forms.scm b/transform/special-forms.scm
index 99b30df..ddb3180 100644
--- a/transform/special-forms.scm
+++ b/transform/special-forms.scm
@@ -537,40 +537,42 @@ The above query results to triples that have the form:
(call-with-target-database
connection
(lambda (db)
- (with-output-to-file ;
- doc-path
- (lambda ()
- (format #t "# ~a" name)
- (for-each
- (lambda (proc)
- (proc db
- #:metadata? #f
- #:data? #f
- #:documentation?
- (lambda () (for-each
- (match-lambda
- ((k v)
- (begin
- (prefix k v #f))))
- prefixes))))
- inputs))
- #:encoding "UTF-8")
+ (when doc-path
+ (with-output-to-file ;
+ doc-path
+ (lambda ()
+ (format #t "# ~a" name)
+ (for-each
+ (lambda (proc)
+ (proc db
+ #:metadata? #f
+ #:data? #f
+ #:documentation?
+ (lambda () (for-each
+ (match-lambda
+ ((k v)
+ (begin
+ (prefix k v #f))))
+ prefixes))))
+ inputs))
+ #:encoding "UTF-8"))
;; Dumping the actual data
- (with-output-to-file
- rdf-path
- (lambda ()
- ;; Add the prefixes
- (for-each
- (match-lambda
- ((k v)
- (begin
- (prefix k v))))
- prefixes)
- (newline)
- (for-each
- (lambda (proc)
- (proc db #:metadata? table-metadata?))
- inputs))
- #:encoding "UTF-8")))))))
+ (when rdf-path
+ (with-output-to-file
+ rdf-path
+ (lambda ()
+ ;; Add the prefixes
+ (for-each
+ (match-lambda
+ ((k v)
+ (begin
+ (prefix k v))))
+ prefixes)
+ (newline)
+ (for-each
+ (lambda (proc)
+ (proc db #:metadata? table-metadata?))
+ inputs))
+ #:encoding "UTF-8"))))))))
diff --git a/transform/uuid.scm b/transform/uuid.scm
deleted file mode 100644
index be0e592..0000000
--- a/transform/uuid.scm
+++ /dev/null
@@ -1,234 +0,0 @@
-;; CREDIT: https://lists.gnu.org/archive/html/guile-user/2018-01/msg00019.html
-(define-module (transform uuid)
- #:use-module (srfi srfi-1)
- #:use-module (srfi srfi-11)
- #:use-module (rnrs bytevectors)
- #:use-module (ice-9 iconv)
- #:export (bytevector->md5
- make-version-3-uuid))
-
-(define (bytevector->md5 bytevector)
- "Convert BYTEVECTOR to a bytevector containing the MD5 hash of
-BYTEVECTOR."
- ;; Implemented along RFC 1321. It should be easy to verify that
- ;; this procedure performs the operations specified therein.
- (define (append-padding-bits bytevector)
- "Makes a list from BYTEVECTOR with padding as per RFC 1321 3.1."
- (let* ((length-in-bits (* 8 (bytevector-length bytevector)))
- (padding-bits (- 512 (modulo (- length-in-bits 448) 512))))
- (append (bytevector->u8-list bytevector)
- '(128) ; #*10000000
- (iota
- (- (/ padding-bits 8) 1)
- 0 0))))
- (define (append-length msg-list message-length)
- "Append MESSAGE-LENGTH as 8 byte values from a uint64 to MSG-LIST."
- (append msg-list
- ;; For numbers too large for an uint64, only the low-order
- ;; bytes are returned.
- (bytevector->u8-list (u64vector
- (modulo
- (* message-length 8) ; bits
- (1+ #xffffffffffffffff))))))
- (let hash ((AA #x67452301)
- (BB #xefcdab89)
- (CC #x98badcfe)
- (DD #x10325476)
- (to-digest
- (append-length
- (append-padding-bits
- bytevector)
- (bytevector-length bytevector))))
- (define (F X Y Z)
- (logior (logand X Y) (logand (lognot X) Z)))
- (define (G X Y Z)
- (logior (logand X Z) (logand Y (lognot Z))))
- (define (H X Y Z)
- (logxor X Y Z))
- (define (I X Y Z)
- (logxor Y (logior X (lognot Z))))
- (define (T i)
- (inexact->exact (floor (* 4294967296 (abs (sin i))))))
- (define (number->u32 n)
- "Cut off all bits that do not fit in a uint32."
- (bit-extract n 0 32))
- (define (lsh32 n count)
- (number->u32 (logior (ash n count)
- (bit-extract n (- 32 count) 32))))
- (if (not (null? to-digest))
- (let* ((block (u8-list->bytevector
- (list-head to-digest (/ 512 8))))
- (X (lambda (j) (bytevector-u32-ref
- block (* 4 j) (endianness little))))
- (do-round1
- (lambda (A B C D)
- (define (operation a b c d k s i)
- (number->u32
- (+ b (lsh32 (+ a (F b c d) (X k) (T i)) s))))
- (let* ((A (operation A B C D 0 7 1))
- (D (operation D A B C 1 12 2))
- (C (operation C D A B 2 17 3))
- (B (operation B C D A 3 22 4))
- (A (operation A B C D 4 7 5))
- (D (operation D A B C 5 12 6))
- (C (operation C D A B 6 17 7))
- (B (operation B C D A 7 22 8))
- (A (operation A B C D 8 7 9))
- (D (operation D A B C 9 12 10))
- (C (operation C D A B 10 17 11))
- (B (operation B C D A 11 22 12))
- (A (operation A B C D 12 7 13))
- (D (operation D A B C 13 12 14))
- (C (operation C D A B 14 17 15))
- (B (operation B C D A 15 22 16)))
- (values A B C D))))
- (do-round2
- (lambda (A B C D)
- (define (operation a b c d k s i)
- (number->u32
- (+ b (lsh32 (+ a (G b c d) (X k) (T i)) s))))
- (let* ((A (operation A B C D 1 5 17))
- (D (operation D A B C 6 9 18))
- (C (operation C D A B 11 14 19))
- (B (operation B C D A 0 20 20))
- (A (operation A B C D 5 5 21))
- (D (operation D A B C 10 9 22))
- (C (operation C D A B 15 14 23))
- (B (operation B C D A 4 20 24))
- (A (operation A B C D 9 5 25))
- (D (operation D A B C 14 9 26))
- (C (operation C D A B 3 14 27))
- (B (operation B C D A 8 20 28))
- (A (operation A B C D 13 5 29))
- (D (operation D A B C 2 9 30))
- (C (operation C D A B 7 14 31))
- (B (operation B C D A 12 20 32)))
- (values A B C D))))
- (do-round3
- (lambda (A B C D)
- (define (operation a b c d k s i)
- (number->u32
- (+ b (lsh32 (+ a (H b c d) (X k) (T i)) s))))
- (let* ((A (operation A B C D 5 4 33))
- (D (operation D A B C 8 11 34))
- (C (operation C D A B 11 16 35))
- (B (operation B C D A 14 23 36))
- (A (operation A B C D 1 4 37))
- (D (operation D A B C 4 11 38))
- (C (operation C D A B 7 16 39))
- (B (operation B C D A 10 23 40))
- (A (operation A B C D 13 4 41))
- (D (operation D A B C 0 11 42))
- (C (operation C D A B 3 16 43))
- (B (operation B C D A 6 23 44))
- (A (operation A B C D 9 4 45))
- (D (operation D A B C 12 11 46))
- (C (operation C D A B 15 16 47))
- (B (operation B C D A 2 23 48)))
- (values A B C D))))
- (do-round4
- (lambda (A B C D)
- (define (operation a b c d k s i)
- (number->u32
- (+ b (lsh32 (+ a (I b c d) (X k) (T i)) s))))
- (let* ((A (operation A B C D 0 6 49))
- (D (operation D A B C 7 10 50))
- (C (operation C D A B 14 15 51))
- (B (operation B C D A 5 21 52))
- (A (operation A B C D 12 6 53))
- (D (operation D A B C 3 10 54))
- (C (operation C D A B 10 15 55))
- (B (operation B C D A 1 21 56))
- (A (operation A B C D 8 6 57))
- (D (operation D A B C 15 10 58))
- (C (operation C D A B 6 15 59))
- (B (operation B C D A 13 21 60))
- (A (operation A B C D 4 6 61))
- (D (operation D A B C 11 10 62))
- (C (operation C D A B 2 15 63))
- (B (operation B C D A 9 21 64)))
- (values A B C D)))))
- (let*-values (((A B C D) (values AA BB CC DD))
- ((A B C D) (do-round1 A B C D))
- ((A B C D) (do-round2 A B C D))
- ((A B C D) (do-round3 A B C D))
- ((A B C D) (do-round4 A B C D)))
- (hash (number->u32 (+ A AA))
- (number->u32 (+ B BB))
- (number->u32 (+ C CC))
- (number->u32 (+ D DD))
- (list-tail to-digest (/ 512 8)))))
- ;; we’re done:
- (u8-list->bytevector
- (append
- (bytevector->u8-list (u32vector AA))
- (bytevector->u8-list (u32vector BB))
- (bytevector->u8-list (u32vector CC))
- (bytevector->u8-list (u32vector DD)))))))
-
-(define* (make-version-3-uuid namespace-uuid str #:optional (prefix "urn:uuid:"))
- "Generates a UUID string by computing the MD5 hash of NAMESPACE-UUID
-and STR. NAMESPACE-UUID must be a bytevector consisting of the UUID’s
-bytes, *not* the UUID’s string representation."
- (define (half-byte->hex-char number)
- "Returns the corresponding hexadecimal digit for a number NUMBER
-between 0 and 15."
- (case number
- ((0) #\0)
- ((1) #\1)
- ((2) #\2)
- ((3) #\3)
- ((4) #\4)
- ((5) #\5)
- ((6) #\6)
- ((7) #\7)
- ((8) #\8)
- ((9) #\9)
- ((10) #\a)
- ((11) #\b)
- ((12) #\c)
- ((13) #\d)
- ((14) #\e)
- ((15) #\f)))
- (define (byte->hex-string bv index)
- "Convert the byte at INDEX of bytevector BV to a hex string."
- (let ((byte (bytevector-u8-ref bv index)))
- (string (half-byte->hex-char (quotient byte 16))
- (half-byte->hex-char (modulo byte 16)))))
- (let ((md5 (bytevector->md5
- (u8-list->bytevector
- (append (bytevector->u8-list namespace-uuid)
- (bytevector->u8-list (string->utf8 str)))))))
- (string-append prefix
- ;; time_low field:
- (byte->hex-string md5 0)
- (byte->hex-string md5 1)
- (byte->hex-string md5 2)
- (byte->hex-string md5 3)
- "-"
- ;; time_mid field:
- (byte->hex-string md5 4)
- (byte->hex-string md5 5)
- "-"
- ;; time_hi_and_version field:
- (let ((byte (bytevector-u8-ref md5 6)))
- (string (half-byte->hex-char 3) ; UUID version 3
- (half-byte->hex-char (modulo byte 16))))
- (byte->hex-string md5 7)
- "-"
- ;; clock_seq_hi_and_reserved field:
- (let ((byte (bytevector-u8-ref md5 8)))
- (string (half-byte->hex-char
- (logior #b1000 ; most significant bits are 10
- (bit-extract (quotient byte 16) 0 2)))
- (half-byte->hex-char (modulo byte 16))))
- ;; clock_seq_low field:
- (byte->hex-string md5 9)
- "-"
- ;; node field:
- (byte->hex-string md5 10)
- (byte->hex-string md5 11)
- (byte->hex-string md5 12)
- (byte->hex-string md5 13)
- (byte->hex-string md5 14)
- (byte->hex-string md5 15))))