From e4ef2dc1c3bc0169b2dcad37d3a56373b66f8902 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Mon, 7 Aug 2023 20:47:57 +0300 Subject: Update phenotype and publishfreeze dump Signed-off-by: Munyoki Kilyungi --- examples/dump-phenotype.scm | 53 ++++++++++++++++++++------------------------- 1 file changed, 24 insertions(+), 29 deletions(-) (limited to 'examples') diff --git a/examples/dump-phenotype.scm b/examples/dump-phenotype.scm index 9a1651a..cd6ca95 100755 --- a/examples/dump-phenotype.scm +++ b/examples/dump-phenotype.scm @@ -54,11 +54,15 @@ (left-join PublishFreeze "ON PublishFreeze.InbredSetId = PublishXRef.InbredSetId") (left-join InfoFiles "ON InfoFiles.InfoPageName = PublishFreeze.Name"))) (schema-triples - (gn:phenotypeDataset rdfs:subPropertyOf gn:dataset) - (gnt:publicationDescription rdfs:range rdfs:Literal) - (gnt:originalDescription rdfs:range rdfs:Literal) - (gnt:prePublicationDescription rdfs:range rdfs:Literal) - (gnt:postPublicationAbbreviation rdfs:range rdfs:Literal) + (gnt:originalDescription a owl:ObjectProperty) + (gnt:originalDescription rdfs:domain gnc:phenotype) + (gnt:originalDescription skos:definition "The original description of this resource") + (gnt:prePublicationDescription a owl:ObjectProperty) + (gnt:prePublicationDescription rdfs:domain gnc:phenotype) + (gnt:prePublicationDescription skos:definition "The pre publication details of this resource") + (gnt:abbreviation a owl:ObjectProperty) + (gnt:abbreviation rdfs:domain gnc:phenotype) + (gnt:abbreviation skos:definition "The abbreviation used for this resource") (gnt:labCode rdfs:range rdfs:Literal) (gnt:submitter rdfs:range rdfs:Literal) (gnt:owner rdfs:range rdfs:Literal) @@ -71,49 +75,40 @@ (triples (string->identifier "" (regexp-substitute/global #f "[^A-Za-z0-9:]" - (field ("CONCAT(IF(PublishFreeze.Name IS NULL, '', CONCAT(PublishFreeze.Name, '_')), IF(Phenotype.Post_publication_abbreviation IS NULL, IF(Phenotype.Pre_publication_abbreviation IS NULL, Phenotype.Id, Pre_publication_abbreviation), Phenotype.Post_publication_abbreviation))" abbrev)) - 'pre "_" 'post) + (field ("CONCAT(IF(PublishFreeze.Name IS NULL, '', CONCAT(PublishFreeze.Name, '_')), IF(Phenotype.Post_publication_abbreviation IS NULL, IF(Phenotype.Pre_publication_abbreviation IS NULL, Phenotype.Id, Pre_publication_abbreviation), Phenotype.Post_publication_abbreviation))" abbrev)) + 'pre "_" 'post) #:separator "" #:proc string-capitalize-first) (set rdf:type 'gnc:phenotype) - (set gnt:name (sanitize-rdf-string - (field - ("CAST(CONVERT(BINARY CONVERT(IF(Phenotype.Post_publication_abbreviation IS NULL, IF(Phenotype.Pre_publication_abbreviation IS NULL, Phenotype.Id, Phenotype.Pre_publication_abbreviation), Phenotype.Post_publication_abbreviation) USING latin1) USING utf8) AS VARCHAR(100))" - PhenotypeName)))) + (set rdfs:label (sanitize-rdf-string + (field + ("IF(Phenotype.Post_publication_abbreviation IS NULL, IF(Phenotype.Pre_publication_abbreviation IS NULL, Phenotype.Id, Phenotype.Pre_publication_abbreviation), Phenotype.Post_publication_abbreviation)" + PhenotypeName)))) ;; There is no row with an empty post-publication description so ;; use this field as the main publication description - (set gnt:publicationDescription + (set dct:description (sanitize-rdf-string - (field ("CAST(CONVERT(BINARY CONVERT(Phenotype.Post_publication_description USING latin1) USING utf8) AS CHAR(10000))" - postPubDescr)))) - (set gnt:originalDescription (sanitize-rdf-string - (delete-substrings - (field Phenotype Original_description) - "Original post publication description: "))) - (set gnt:prePublicationDescription - (sanitize-rdf-string - (field - ("CAST(CONVERT(BINARY CONVERT(Phenotype.Pre_publication_description USING latin1) USING utf8) AS VARCHAR(15000))" - prePubDesc)))) + (field Phenotype Post_publication_description))) (set gnt:prePublicationAbbreviation (sanitize-rdf-string (field Phenotype Pre_publication_abbreviation))) (set gnt:postPublicationAbbreviation (sanitize-rdf-string (field Phenotype Post_publication_abbreviation))) (set gnt:labCode (field Phenotype Lab_code)) - (set gnt:submitter (sanitize-rdf-string (field Phenotype Submitter))) + (set gdmt:hasDistributorInfo + (sanitize-rdf-string (field Phenotype Submitter))) (set gnt:owner (sanitize-rdf-string (field Phenotype Owner))) (set gnt:mean (annotate-field (field ("IFNULL(PublishXRef.mean, '')" mean)) - '^^xsd:double)) + '^^xsd:double)) (set gnt:locus (field PublishXRef Locus)) - (set gnt:LRS (annotate-field (field ("IFNULL(PublishXRef.LRS, '')" lrs)) '^^xsd:float)) - (set gnt:additive (annotate-field (field ("IFNULL(PublishXRef.additive, '')" additive)) '^^xsd:decimal)) + (set gnt:LRS (annotate-field (field ("IFNULL(PublishXRef.LRS, '')" lrs)) '^^xsd:double)) + (set gnt:additive (annotate-field (field ("IFNULL(PublishXRef.additive, '')" additive)) '^^xsd:double)) (set gnt:sequence (annotate-field (field PublishXRef Sequence) '^^xsd:int)) - (set gnt:phenotypeOfDataset + (set gnt:belongsToDataset (string->identifier "" (field ("IFNULL(InfoFiles.InfoPageName, IFNULL(PublishFreeze.Name, ''))" DatasetName)) #:separator "" #:proc string-capitalize-first)) - (set gnt:phenotypeOfPublication + (set dct:isReferencedBy (let ((pmid (field ("IF(Publication.PubMed_ID IS NULL, '', CONVERT(Publication.PubMed_Id, INT))" pmid))) -- cgit v1.2.3