diff options
Diffstat (limited to 'examples/phenotype.scm')
-rwxr-xr-x | examples/phenotype.scm | 125 |
1 files changed, 125 insertions, 0 deletions
diff --git a/examples/phenotype.scm b/examples/phenotype.scm new file mode 100755 index 0000000..1c68159 --- /dev/null +++ b/examples/phenotype.scm @@ -0,0 +1,125 @@ +#! /usr/bin/env guile +!# + +(use-modules (rnrs programs) + (rnrs io ports) + (srfi srfi-1) + (srfi srfi-26) + (ice-9 match) + (ice-9 regex) + (dump strings) + (dump sql) + (dump triples) + (dump special-forms)) + + + +(define %connection-settings + (call-with-input-file (list-ref (command-line) 1) + read)) + + +(define-transformer phenotypes + (tables (PublishXRef + (left-join InbredSet "ON InbredSet.InbredSetId = PublishXRef.InbredSetId") + (left-join Publication "ON Publication.Id = PublishXRef.PublicationId") + (left-join Phenotype "ON Phenotype.Id = PublishXRef.PhenotypeId")) + "WHERE PublishXRef.InbredSetId IN (SELECT PublishFreeze.InbredSetId FROM PublishFreeze)") + (schema-triples + (gnc:phenotype a skos:Concept) + (gnc:phenotype skos:description "This is a set of controlled terms that are used to describe a given phenotype") + (gnt:abbreviation a owl:ObjectProperty) + (gnt:abbreviation rdfs:domain gnc:phenotype) + (gnt:abbreviation skos:definition "The abbreviation used for this resource") + (gnt:traitName a owl:ObjectProperty) + (gnt:traitName rdfs:domain gnc:phenotype) + (gnt:traitName skos:definition "The trait Name of this resource") + (gnt:labCode a owl:ObjectProperty) + (gnt:labCode rdfs:domain gnc:phenotype) + (gnt:submitter a owl:ObjectProperty) + (gnt:submitter rdfs:domain gnc:phenotype) + (gnt:submitter skos:definition "A person who submitted this resource to GN") + (gnt:mean rdfs:domain gnc:phenotype) + (gnt:mean rdfs:range xsd:double) + (gnt:LRS rdfs:domain gnc:phenotype) + (gnt:LRS rdfs:range xsd:double) + (gnt:locus rdfs:domain gnc:phenotype) + (gnt:locus rdfs:range rdfs:Literal) + (gnt:additive rdfs:domain gnc:phenotype) + (gnt:additive rdfs:range xsd:double) + (gnt:sequence rdfs:domain gnc:phenotype) + (gnt:sequence rdfs:range xsd:integer)) + (triples (string->identifier + "trait" + (field ("CONCAT(IFNULL(InbredSet.Name, PublishXRef.InbredSetId), '_', PublishXRef.Id)" + Phenotype))) + (set rdf:type 'gnc:phenotype) + (set gnt:belongsToSet + (string->identifier + "set" (field InbredSet Name) + #:separator "" + #:proc string-capitalize-first)) + (set gnt:traitName + (let ((trait-id (field PublishXRef Id))) + (if (number? trait-id) + (number->string trait-id) + trait-id))) + (set rdfs:label + (field ("CONCAT(IFNULL(InbredSet.Name, PublishXRef.InbredSetId), '_', PublishXRef.Id)" + Phenotype))) + ;; All phenotypes have a post-publication description + (set dct:description + (sanitize-rdf-string + (field Phenotype Post_publication_description))) + ;; All phenotypes have a post-publication abbreviation + (set gnt:abbreviation (field Phenotype Post_publication_abbreviation)) + (set gnt:labCode (field Phenotype Lab_code)) + (set gnt:submitter + (sanitize-rdf-string (field Phenotype Submitter))) + (multiset dct:contributor + (string-split + (sanitize-rdf-string (field Phenotype Owner)) + #\,)) + (set gnt:mean (annotate-field (field ("IFNULL(PublishXRef.mean, '')" mean)) + '^^xsd:double)) + (set gnt:locus (field PublishXRef Locus)) + (set gnt:LRS (annotate-field + (field ("IFNULL(PublishXRef.LRS, '')" lrs)) + '^^xsd:double)) + (set gnt:additive + (annotate-field (field ("IFNULL(PublishXRef.additive, '')" additive)) + '^^xsd:double)) + (set gnt:sequence (annotate-field (field PublishXRef Sequence) '^^xsd:integer)) + (set dct:isReferencedBy + (let ((pmid (field + ("IF(Publication.PubMed_ID IS NULL, '', CONVERT(Publication.PubMed_Id, INT))" + pmid))) + (publication-id (field Publication Id))) + (if (string-null? pmid) + (string->identifier "unpublished" + (number->string publication-id)) + (ontology 'pubmed: pmid)))))) + + + +(with-documentation + (name "Phenotypes Metadata") + (connection %connection-settings) + (table-metadata? #f) + (prefixes + '(("dct:" "<http://purl.org/dc/terms/>") + ("gn:" "<http://genenetwork.org/id/>") + ("owl:" "<http://www.w3.org/2002/07/owl#>") + ("gnc:" "<http://genenetwork.org/category/>") + ("gnt:" "<http://genenetwork.org/terms/>") + ("skos:" "<http://www.w3.org/2004/02/skos/core#>") + ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>") + ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>") + ("xsd:" "<http://www.w3.org/2001/XMLSchema#>") + ("pubmed:" "<http://rdf.ncbi.nlm.nih.gov/pubmed/>"))) + (inputs + (list + phenotypes)) + (outputs + '(#:documentation "./docs/phenotype.md" + #:rdf "/export/data/genenetwork-virtuoso/phenotype.ttl"))) |