diff options
Diffstat (limited to 'examples/phenotype.scm')
| -rwxr-xr-x | examples/phenotype.scm | 187 |
1 files changed, 101 insertions, 86 deletions
diff --git a/examples/phenotype.scm b/examples/phenotype.scm index aa1e9c5..37bbd59 100755 --- a/examples/phenotype.scm +++ b/examples/phenotype.scm @@ -14,100 +14,112 @@ (transform special-forms)) -(define-transformer phenotypes - (tables (PublishXRef - (left-join InbredSet "ON InbredSet.InbredSetId = PublishXRef.InbredSetId") - (left-join Publication "ON Publication.Id = PublishXRef.PublicationId") - (left-join Phenotype "ON Phenotype.Id = PublishXRef.PhenotypeId"))) - (schema-triples - (gnt:traitId a owl:ObjectProperty) - (gnt:traitId rdfs:domain gnc:Phenotype) - (gnt:traitId skos:definition "This is the unique trait id assigned from GeneNetwork") - (gnt:abbreviation a owl:ObjectProperty) - (gnt:abbreviation rdfs:domain gnc:Phenotype) - (gnt:abbreviation skos:definition "The abbreviation used for this resource") - (gnt:labCode a owl:ObjectProperty) - (gnt:labCode rdfs:domain gnc:Phenotype) - (gnt:submitter a owl:ObjectProperty) - (gnt:submitter rdfs:domain gnc:Phenotype) - (gnt:submitter skos:definition "A person who submitted this resource to GN") - (gnt:mean a rdf:Property) - (gnt:mean a qb:MeasureProperty) - (gnt:mean rdfs:subPropertyOf sdmx-measure:obsValue) - (gnt:mean rdfs:domain gnc:Phenotype) - (gnt:mean rdfs:range xsd:double) - (gnt:lodScore a rdf:Property) - (gnt:lodScore a qb:MeasureProperty) - (gnt:lodScore rdfs:subPropertyOf sdmx-measure:obsValue) - (gnt:lodScore rdfs:domain gnc:Phenotype) - (gnt:lodScore rdfs:range xsd:double) - (gnt:lodScore rdfs:label "Peak -logP") - (gnt:lodScore skos:definition "Statistical measurement assessing the likelihood of genetic linkage between traits or genetic markers.") - (gnt:locus a rdf:Property) - (gnt:locus a qb:MeasureProperty) - (gnt:locus rdfs:subPropertyOf sdmx-measure:obsValue) - (gnt:locus rdfs:domain gnc:Phenotype) - (gnt:locus rdfs:range rdfs:Literal) - (gnt:additive rdfs:domain gnc:Phenotype) - (gnt:additive rdfs:range xsd:double) - (gnt:sequence rdfs:domain gnc:Phenotype) - (gnt:sequence rdfs:range xsd:integer)) - (triples (string->identifier - "trait" - (field ("CONCAT(IFNULL(InbredSet.InbredSetCode, PublishXRef.InbredSetId), '_', PublishXRef.Id)" - Phenotype))) - (set rdf:type 'gnc:Phenotype) - (set gnt:belongsToGroup - (string->identifier - "set" (field InbredSet Name InbredSetName) - #:separator "" - #:proc string-capitalize-first)) - ;; This is the trait's name - (set gnt:traitId - (let ((trait-id (field PublishXRef Id))) - (if (number? trait-id) - (number->string trait-id) - trait-id))) - (set skos:altLabel - (field ("CONCAT(IFNULL(InbredSet.InbredSetCode, PublishXRef.InbredSetId), '_', PublishXRef.Id)" - Phenotype))) + + + + + + + +(define-transformer gnc:phenotype->gn:phenotype + (tables (Phenotype)) + (triples "gnc:phenotype" + (set skos:member + (let ((post-abbrev (blank-p (field Phenotype Post_publication_abbreviation))) + (pre-abbrev (blank-p (field Phenotype Pre_publication_abbreviation))) + (post-desc (blank-p (field Phenotype Post_publication_description))) + (pre-desc (blank-p (field Phenotype Post_publication_description)))) + (string->identifier + "phenotype" + (or post-abbrev pre-abbrev post-desc pre-desc) + #:separator "_"))))) + +(define-transformer gn:phenotype->metadata + (tables (Phenotype)) + (triples (let ((post-abbrev (blank-p (field Phenotype Post_publication_abbreviation))) + (pre-abbrev (blank-p (field Phenotype Pre_publication_abbreviation))) + (post-desc (blank-p (field Phenotype Post_publication_description))) + (pre-desc (blank-p (field Phenotype Post_publication_description)))) + (string->identifier + "phenotype" + (or post-abbrev pre-abbrev post-desc pre-desc) + #:separator "_")) + (set rdf:type 'gnc:phenotype) ;; All phenotypes have a post-publication description (set dct:description (sanitize-rdf-string (field Phenotype Post_publication_description))) ;; All phenotypes have a post-publication abbreviation - (set gnt:abbreviation (field Phenotype Post_publication_abbreviation)) - (set gnt:labCode (field Phenotype Lab_code)) + (set gnt:abbreviation (sanitize-rdf-string (field Phenotype Post_publication_abbreviation))) + (set gnt:has_lab_code (field Phenotype Lab_code)) (set gnt:submitter (sanitize-rdf-string (field Phenotype Submitter))) (set dct:contributor (sanitize-rdf-string (field Phenotype Owner))) - (set gnt:mean (annotate-field (field ("IFNULL(PublishXRef.mean, '')" mean)) - '^^xsd:double)) - (set gnt:locus - (string->identifier - "" - (regexp-substitute/global - #f "[^A-Za-z0-9:]" - (sanitize-rdf-string (field PublishXRef Locus)) - 'pre "_" 'post) - #:separator "" - #:proc string-capitalize-first)) - (set gnt:lodScore (annotate-field - (field ("IFNULL((PublishXRef.LRS/4.604), '')" lrs)) - '^^xsd:double)) - (set gnt:additive - (annotate-field (field ("IFNULL(PublishXRef.additive, '')" additive)) - '^^xsd:double)) - (set gnt:sequence (annotate-field (field PublishXRef Sequence) '^^xsd:integer)) - (set dct:isReferencedBy + (set skos:member + (let ((post-abbrev (blank-p (field Phenotype Post_publication_abbreviation))) + (pre-abbrev (blank-p (field Phenotype Pre_publication_abbreviation))) + (post-desc (blank-p (field Phenotype Post_publication_description))) + (pre-desc (blank-p (field Phenotype Post_publication_description)))) + (string->identifier + "phenotype" + (or post-abbrev pre-abbrev post-desc pre-desc) + #:separator "_"))))) + +(define-transformer gn:trait->gn:phenotype + (tables (PublishXRef + (left-join InbredSet "ON InbredSet.InbredSetId = PublishXRef.InbredSetId") + (inner-join PublishFreeze "ON PublishFreeze.InbredSetId = InbredSet.Id") + (left-join Publication "ON Publication.Id = PublishXRef.PublicationId") + (left-join Phenotype "ON Phenotype.Id = PublishXRef.PhenotypeId")) + "WHERE InbredSet.public > 0") + (triples (let ((post-abbrev (blank-p (field Phenotype Post_publication_abbreviation))) + (pre-abbrev (blank-p (field Phenotype Pre_publication_abbreviation))) + (post-desc (blank-p (field Phenotype Post_publication_description))) + (pre-desc (blank-p (field Phenotype Post_publication_description)))) + (string->identifier + "trait" + (format #f "~a_~a" (field PublishFreeze Name) + (or post-abbrev pre-abbrev post-desc pre-desc)) + #:separator "_")) + (set rdf:type 'gnc:phenotype_trait) + (set gnt:has_strain (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_")) + (set owl:equivalentClass + (field ("CONCAT(PublishFreeze.Name, '_', PublishXRef.Id)" + PublishFreeze))) + (set dcat:distribution + (string->symbol + (format #f "gnd:~a" + (field ("CONCAT(PublishFreeze.Name, '_', PublishXRef.Id)" + PublishFreeze)))) ) + (set dct:references (let ((pmid (field ("IF(Publication.PubMed_ID IS NULL, '', CONVERT(Publication.PubMed_Id, INT))" pmid))) - (publication-id (field Publication Id PublicationId))) + (publication-id (field Publication Id))) (if (string-null? pmid) (string->identifier "unpublished" (number->string publication-id)) - (ontology 'pubmed: pmid)))))) + (ontology 'pubmed: pmid)))) + (set gnt:has_phenotype + (let ((post-abbrev (blank-p (field Phenotype Post_publication_abbreviation))) + (pre-abbrev (blank-p (field Phenotype Pre_publication_abbreviation))) + (post-desc (blank-p (field Phenotype Post_publication_description))) + (pre-desc (blank-p (field Phenotype Post_publication_description)))) + (string->identifier + "phenotype" + (or post-abbrev pre-abbrev post-desc pre-desc) + #:separator "_"))) + (set gnt:mean (annotate-field (field ("IFNULL(PublishXRef.mean, '')" mean)) + '^^xsd:double)) + (set gnt:locus (sanitize-rdf-string (field PublishXRef Locus))) + (set gnt:lod_score (annotate-field + (field ("IFNULL((PublishXRef.LRS/4.604), '')" lrs)) + '^^xsd:double)) + (set gnt:additive + (annotate-field (field ("IFNULL(PublishXRef.additive, '')" additive)) + '^^xsd:double)) + (set gnt:sequence (annotate-field (field PublishXRef Sequence) '^^xsd:integer)) + (set rdfs:comment (sanitize-rdf-string (field PublishXRef comments))))) @@ -127,11 +139,13 @@ (connection %connection-settings) (table-metadata? #f) (prefixes - '(("dct:" "<http://purl.org/dc/terms/>") - ("gn:" "<http://genenetwork.org/id/>") + '(("dcat:" "<http://www.w3.org/ns/dcat#>") + ("dct:" "<http://purl.org/dc/terms/>") + ("gn:" "<http://rdf.genenetwork.org/v1/id/>") ("owl:" "<http://www.w3.org/2002/07/owl#>") - ("gnc:" "<http://genenetwork.org/category/>") - ("gnt:" "<http://genenetwork.org/term/>") + ("gnc:" "<http://rdf.genenetwork.org/v1/category/>") + ("gnd:" "<https://cd.genenetwork.org/api3/lmdb/v1/data/traits/>") + ("gnt:" "<http://rdf.genenetwork.org/v1/term/>") ("sdmx-measure:" "<http://purl.org/linked-data/sdmx/2009/measure#>") ("skos:" "<http://www.w3.org/2004/02/skos/core#>") ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>") @@ -141,8 +155,9 @@ ("xkos:" "<http://rdf-vocabulary.ddialliance.org/xkos#>") ("pubmed:" "<http://rdf.ncbi.nlm.nih.gov/pubmed/>"))) (inputs - (list - phenotypes)) + (list gnc:phenotype->gn:phenotype + gn:phenotype->metadata + gn:trait->gn:phenotype)) (outputs `(#:documentation ,documentation #:rdf ,output)))) |
