From 2684d58401c694e30551f424eb2404f3a5674769 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Thu, 17 Aug 2023 16:54:18 +0300 Subject: Update how phenotypes are transformed Signed-off-by: Munyoki Kilyungi --- examples/dump-phenotype.scm | 51 +++++++++++++-------------------------------- 1 file changed, 15 insertions(+), 36 deletions(-) (limited to 'examples') diff --git a/examples/dump-phenotype.scm b/examples/dump-phenotype.scm index ed10b24..8d04a80 100755 --- a/examples/dump-phenotype.scm +++ b/examples/dump-phenotype.scm @@ -48,16 +48,11 @@ #:proc string-capitalize-first)))) (define-dump dump-phenotypes - (tables (Phenotype - (left-join PublishXRef "ON Phenotype.Id = PublishXRef.PhenotypeId") - (left-join Publication "ON Publication.Id = PublishXRef.PublicationId") - ;; We need this join so as to construct the trait's skos:altLabel + (tables (PublishXRef (left-join InbredSet "ON InbredSet.InbredSetId = PublishXRef.InbredSetId") - (left-join PublishFreeze "ON PublishFreeze.InbredSetId = PublishXRef.InbredSetId") - (left-join InfoFiles "ON InfoFiles.InfoPageName = PublishFreeze.Name")) - ;; Only dump public traits; Ignore "hanging" traits - ;; I.e. traits that have no associated vectors - "WHERE PublishFreeze.public > 0 AND PublishFreeze.confidentiality < 1 AND PublishFreeze.Id IS NOT NULL") + (left-join Publication "ON Publication.Id = PublishXRef.PublicationId") + (left-join Phenotype "ON Phenotype.Id = PublishXRef.PhenotypeId")) + "WHERE PublishXRef.InbredSetId IN (SELECT PublishFreeze.InbredSetId FROM PublishFreeze)") (schema-triples (gnc:phenotype a skos:Concept) (gnc:phenotype skos:description "This is a set of controlled terms that are used to describe a given phenotype") @@ -80,22 +75,13 @@ (gnt:sequence rdfs:domain gnc:phenotype) (gnt:sequence rdfs:range xsd:integer)) (triples (string->identifier - "" - (regexp-substitute/global #f "[^A-Za-z0-9:]" - (field ("CONCAT(IF(PublishFreeze.Name IS NULL, '', CONCAT(PublishFreeze.Name, '_')), IF(Phenotype.Post_publication_abbreviation IS NULL, IF(Phenotype.Pre_publication_abbreviation IS NULL, Phenotype.Id, Pre_publication_abbreviation), Phenotype.Post_publication_abbreviation))" abbrev)) - 'pre "_" 'post) - #:separator "" - #:proc string-capitalize-first) + "trait" + (field ("CONCAT(IFNULL(InbredSet.Name, PublishXRef.InbredSetId), '_', PublishXRef.Id)" + Phenotype))) (set rdf:type 'gnc:phenotype) - (set skos:prefLabel (sanitize-rdf-string - (field - ("IF(Phenotype.Post_publication_abbreviation IS NULL, IF(Phenotype.Pre_publication_abbreviation IS NULL, Phenotype.Id, Phenotype.Pre_publication_abbreviation), Phenotype.Post_publication_abbreviation)" - PhenotypeName)))) - ;; Add an alternative name for this resources. This is how GN - ;; currently labels phenotypes - (set skos:altLabel (field - ("CONCAT(InbredSet.Name, '_', PublishXRef.Id)" - phenotypeAltName))) + (set rdfs:label + (field ("CONCAT(IFNULL(InbredSet.Name, PublishXRef.InbredSetId), '_', PublishXRef.Id)" + Phenotype))) ;; All phenotypes have a post-publication description (set dct:description (sanitize-rdf-string @@ -105,10 +91,10 @@ (set gnt:labCode (field Phenotype Lab_code)) (set gnt:submitter (sanitize-rdf-string (field Phenotype Submitter))) - (set dct:contributor (sanitize-rdf-string (field Phenotype Owner))) - (multiset dct:contributor (string-split - (sanitize-rdf-string (field Phenotype Owner)) - #\,)) + (multiset dct:contributor + (string-split + (sanitize-rdf-string (field Phenotype Owner)) + #\,)) (set gnt:mean (annotate-field (field ("IFNULL(PublishXRef.mean, '')" mean)) '^^xsd:double)) (set gnt:locus (field PublishXRef Locus)) @@ -119,14 +105,6 @@ (annotate-field (field ("IFNULL(PublishXRef.additive, '')" additive)) '^^xsd:double)) (set gnt:sequence (annotate-field (field PublishXRef Sequence) '^^xsd:integer)) - (set gnt:belongsToDataset - (string->identifier - "" - (regexp-substitute/global #f "[^A-Za-z0-9:]" - (field InfoFiles InfoPageName) - 'pre "_" 'post) - #:separator "" - #:proc string-capitalize-first)) (set dct:isReferencedBy (let ((pmid (field ("IF(Publication.PubMed_ID IS NULL, '', CONVERT(Publication.PubMed_Id, INT))" @@ -136,6 +114,7 @@ (string->identifier "unpublished" (number->string publication-id)) (ontology 'pubmed: pmid)))))) + (dump-with-documentation -- cgit v1.2.3