aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMunyoki Kilyungi2023-08-17 16:54:18 +0300
committerMunyoki Kilyungi2023-08-17 17:02:25 +0300
commit2684d58401c694e30551f424eb2404f3a5674769 (patch)
treeb9a09e01f5fab24c675a154c8935a4047ed7747c
parentdfbc9e0f93a9446108ac59f74aa42071f422f950 (diff)
downloadgn-transform-databases-2684d58401c694e30551f424eb2404f3a5674769.tar.gz
Update how phenotypes are transformed
Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
-rwxr-xr-xexamples/dump-phenotype.scm51
1 files changed, 15 insertions, 36 deletions
diff --git a/examples/dump-phenotype.scm b/examples/dump-phenotype.scm
index ed10b24..8d04a80 100755
--- a/examples/dump-phenotype.scm
+++ b/examples/dump-phenotype.scm
@@ -48,16 +48,11 @@
#:proc string-capitalize-first))))
(define-dump dump-phenotypes
- (tables (Phenotype
- (left-join PublishXRef "ON Phenotype.Id = PublishXRef.PhenotypeId")
- (left-join Publication "ON Publication.Id = PublishXRef.PublicationId")
- ;; We need this join so as to construct the trait's skos:altLabel
+ (tables (PublishXRef
(left-join InbredSet "ON InbredSet.InbredSetId = PublishXRef.InbredSetId")
- (left-join PublishFreeze "ON PublishFreeze.InbredSetId = PublishXRef.InbredSetId")
- (left-join InfoFiles "ON InfoFiles.InfoPageName = PublishFreeze.Name"))
- ;; Only dump public traits; Ignore "hanging" traits
- ;; I.e. traits that have no associated vectors
- "WHERE PublishFreeze.public > 0 AND PublishFreeze.confidentiality < 1 AND PublishFreeze.Id IS NOT NULL")
+ (left-join Publication "ON Publication.Id = PublishXRef.PublicationId")
+ (left-join Phenotype "ON Phenotype.Id = PublishXRef.PhenotypeId"))
+ "WHERE PublishXRef.InbredSetId IN (SELECT PublishFreeze.InbredSetId FROM PublishFreeze)")
(schema-triples
(gnc:phenotype a skos:Concept)
(gnc:phenotype skos:description "This is a set of controlled terms that are used to describe a given phenotype")
@@ -80,22 +75,13 @@
(gnt:sequence rdfs:domain gnc:phenotype)
(gnt:sequence rdfs:range xsd:integer))
(triples (string->identifier
- ""
- (regexp-substitute/global #f "[^A-Za-z0-9:]"
- (field ("CONCAT(IF(PublishFreeze.Name IS NULL, '', CONCAT(PublishFreeze.Name, '_')), IF(Phenotype.Post_publication_abbreviation IS NULL, IF(Phenotype.Pre_publication_abbreviation IS NULL, Phenotype.Id, Pre_publication_abbreviation), Phenotype.Post_publication_abbreviation))" abbrev))
- 'pre "_" 'post)
- #:separator ""
- #:proc string-capitalize-first)
+ "trait"
+ (field ("CONCAT(IFNULL(InbredSet.Name, PublishXRef.InbredSetId), '_', PublishXRef.Id)"
+ Phenotype)))
(set rdf:type 'gnc:phenotype)
- (set skos:prefLabel (sanitize-rdf-string
- (field
- ("IF(Phenotype.Post_publication_abbreviation IS NULL, IF(Phenotype.Pre_publication_abbreviation IS NULL, Phenotype.Id, Phenotype.Pre_publication_abbreviation), Phenotype.Post_publication_abbreviation)"
- PhenotypeName))))
- ;; Add an alternative name for this resources. This is how GN
- ;; currently labels phenotypes
- (set skos:altLabel (field
- ("CONCAT(InbredSet.Name, '_', PublishXRef.Id)"
- phenotypeAltName)))
+ (set rdfs:label
+ (field ("CONCAT(IFNULL(InbredSet.Name, PublishXRef.InbredSetId), '_', PublishXRef.Id)"
+ Phenotype)))
;; All phenotypes have a post-publication description
(set dct:description
(sanitize-rdf-string
@@ -105,10 +91,10 @@
(set gnt:labCode (field Phenotype Lab_code))
(set gnt:submitter
(sanitize-rdf-string (field Phenotype Submitter)))
- (set dct:contributor (sanitize-rdf-string (field Phenotype Owner)))
- (multiset dct:contributor (string-split
- (sanitize-rdf-string (field Phenotype Owner))
- #\,))
+ (multiset dct:contributor
+ (string-split
+ (sanitize-rdf-string (field Phenotype Owner))
+ #\,))
(set gnt:mean (annotate-field (field ("IFNULL(PublishXRef.mean, '')" mean))
'^^xsd:double))
(set gnt:locus (field PublishXRef Locus))
@@ -119,14 +105,6 @@
(annotate-field (field ("IFNULL(PublishXRef.additive, '')" additive))
'^^xsd:double))
(set gnt:sequence (annotate-field (field PublishXRef Sequence) '^^xsd:integer))
- (set gnt:belongsToDataset
- (string->identifier
- ""
- (regexp-substitute/global #f "[^A-Za-z0-9:]"
- (field InfoFiles InfoPageName)
- 'pre "_" 'post)
- #:separator ""
- #:proc string-capitalize-first))
(set dct:isReferencedBy
(let ((pmid (field
("IF(Publication.PubMed_ID IS NULL, '', CONVERT(Publication.PubMed_Id, INT))"
@@ -136,6 +114,7 @@
(string->identifier "unpublished"
(number->string publication-id))
(ontology 'pubmed: pmid))))))
+
(dump-with-documentation