diff options
author | Munyoki Kilyungi | 2023-08-14 18:05:44 +0300 |
---|---|---|
committer | Munyoki Kilyungi | 2023-08-15 19:32:48 +0300 |
commit | 877906dc164add37bfff07ebc1d0684ab6a3a333 (patch) | |
tree | 21329eb8a8ff92c5f0ef1b803b024e875c8b4d59 | |
parent | 6e4359319695a6d93de55339b758b6ec6926c5ca (diff) | |
download | gn-transform-databases-877906dc164add37bfff07ebc1d0684ab6a3a333.tar.gz |
Update how genotypes are dumped
Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
-rwxr-xr-x | examples/dump-genotype.scm | 91 |
1 files changed, 64 insertions, 27 deletions
diff --git a/examples/dump-genotype.scm b/examples/dump-genotype.scm index 50cafb6..4ac836d 100755 --- a/examples/dump-genotype.scm +++ b/examples/dump-genotype.scm @@ -25,10 +25,6 @@ (left-join InfoFiles "ON InfoFiles.InfoPageName = GenoFreeze.Name") (left-join InbredSet "ON GenoFreeze.InbredSetId = InbredSet.InbredSetId")) "WHERE GenoFreeze.public > 0 AND GenoFreeze.confidentiality < 1 AND InfoFiles.InfoPageName IS NULL") - (schema-triples - (gnt:datasetOfInbredSet rdfs:subPropertyOf gnc:inbredSet) - (gnc:genotypeDataset rdfs:subPropertyOf gnc:dataset) - (gnt:shortName rdfs:subPropertyOf gnc:genotypeDataset)) (triples (string->identifier "" @@ -41,59 +37,98 @@ 'pre "_" 'post) #:separator "" #:proc string-capitalize-first) - (set rdf:type 'gnc:genotypeDataset) - (set gnt:name (field GenoFreeze Name)) - (set gnt:fullName (field GenoFreeze FullName)) - (set gnt:shortName (field GenoFreeze ShortName)) + (set rdf:type 'gnc:genotype) + (set rdfs:label (field GenoFreeze Name)) + (set skos:prefLabel (field GenoFreeze FullName)) + (set skos:altLabel (field GenoFreeze ShortName)) (set dct:created (annotate-field (field GenoFreeze CreateTime) '^^xsd:date)) - (set gnt:datasetOfInbredSet + (set gnt:belongsToInbredSet (string->identifier "" (field InbredSet Name InbredSetName))))) (define-dump dump-genotypes (tables (Geno (left-join GenoXRef "ON Geno.Id = GenoXRef.GenoId") (left-join GenoFreeze "ON GenoFreeze.Id = GenoXRef.GenoFreezeId") + (left-join InbredSet "ON InbredSet.InbredSetId = GenoFreeze.InbredSetId") (left-join InfoFiles "ON InfoFiles.InfoPageName = GenoFreeze.Name"))) (schema-triples - (gnc:genotype rdfs:range rdfs:Literal) - (gnt:genotypeDataset rdfs:subPropertyOf gn:dataset)) + (gnt:chr a owl:ObjectProperty) + (gnt:chr skos:description "This resource is located on a given chromosome") + (gnt:chr rdfs:domain gnc:genotype) + (gnt:mb a owl:ObjectProperty) + (gnt:mb skos:definition "The size of this resource in Mb") + (gnt:mb rdfs:domain gnc:genotype) + (gnt:mbMm8 a owl:ObjectProperty) + (gnt:mbMm8 skos:definition "TODO") + (gnt:mbMm8 rdfs:domain gnc:genotype) + (gnt:mb2016 a owl:ObjectProperty) + (gnt:mb2016 skos:definition "TODO") + (gnt:mb2016 rdfs:domain gnc:genotype) + (gnt:hasSequence a owl:ObjectProperty) + (gnt:hasSequence skos:definition "This resource has a given sequence") + (gnt:hasSequence rdfs:domain gnc:genotype) + (gnt:hasSource a owl:ObjectProperty) + (gnt:hasSource rdfs:domain gnc:genotype) + (gnt:hasSource skos:definition "This resource was obtained from this given source") + (gnt:hasAltSourceName a owl:ObjectProperty) + (gnt:hasAltSourceName rdfs:domain gnc:genotype) + (gnt:hasAltSourceName + skos:definition + "The alternative name this resource was obtained from") + (gnt:chrNum a owl:ObjectProperty) + (gnt:chrNum rdfs:domain gnc:genotype) + (gnt:chrNum skos:definition "The chromosome number for this resource") + (gnt:cM a owl:ObjectProperty) + (gnt:cM rdfs:domain gnc:genotype) + (gnt:cM skos:definition "The centimorgan for this resource") + (gnt:usedForMapping a owl:ObjectProperty) + (gnt:usedForMapping rdfs:domain gnc:genotype) + (gnt:usedForMapping + skos:definition "This indicates whether this resource is used for mapping")) (triples (string->identifier "" (regexp-substitute/global #f "[^A-Za-z0-9:]" - (field ("CONCAT(IF(GenoFreeze.Name IS NULL, '', CONCAT(GenoFreeze.Name, ':')), Geno.Name)" abbrev)) + (field ("CONCAT(IF(GenoFreeze.Name IS NULL, '', CONCAT(GenoFreeze.Name, '_')), Geno.Name)" abbrev)) 'pre "_" 'post) #:separator "" #:proc string-capitalize-first) (set rdf:type 'gnc:genotype) - (set gnt:name (sanitize-rdf-string (field Geno Name))) - (set gnt:markerName (sanitize-rdf-string (field Geno Marker_Name))) + (set skos:prefLabel (sanitize-rdf-string (field Geno Name))) (set gnt:chr (field Geno Chr)) - (set gnt:mb (annotate-field (field ("IFNULL(Geno.Mb, '')" Mb)) '^^xsd:double)) - (set gnt:sequence (field Geno Sequence)) - (set gnt:source (field Geno Source)) - (set gnt:source2 (field Geno Source2)) - (set gnt:genotypeOfDataset + (set gnt:mb (annotate-field + (field ("IFNULL(Geno.Mb, '')" Mb)) '^^xsd:double)) + (set gnt:mbMm8 (annotate-field (field ("IFNULL(Geno.Mb_mm8, '')" Mb_mm8)) + '^^xsd:double)) + (set gnt:mb2016 + (annotate-field (field ("IFNULL(Geno.Mb_2016, '')" Mb_2016)) + '^^xsd:double)) + (set gnt:hasSequence (field Geno Sequence)) + (set gnt:hasSource (field Geno Source)) + ;; Only dump Source2 if it differs from Source + (set gnt:hasAltSourceName + (field ("IF((Source2 = Source), NULL, Source2)" + Source2))) + (set gnt:belongsToDataset (string->identifier "" (regexp-substitute/global - #f "[^A-Za-z0-9:]" - (field ("IFNULL(GenoFreeze.Name, '')" DatasetName)) - 'pre "_" 'post) + #f "[^A-Za-z0-9:]" + (field ("IFNULL(GenoFreeze.Name, '')" DatasetName)) + 'pre "_" 'post) #:separator "" - #:proc string-capitalize-first) - ) + #:proc string-capitalize-first)) (set gnt:chrNum (annotate-field - (field ("IFNULL(Geno.chr_num, '')" chr_num)) + (field Geno chr_num) '^^xsd:int)) - (set gn:comments (field ("CAST(CONVERT(BINARY CONVERT(Geno.Comments USING latin1) USING utf8) AS VARCHAR(255))" Comments))) + (set rdfs:comments (field Geno Comments)) (set gnt:cM (annotate-field - (field ("IFNULL(GenoXRef.cM, '')" Chr_mm8)) + (field GenoXRef cM) '^^xsd:int)))) @@ -109,6 +144,8 @@ ("gnt:" "<http://genenetwork.org/term/>") ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>") ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>") + ("owl:" "<http://www.w3.org/2002/07/owl#>") + ("skos:" "<http://www.w3.org/2004/02/skos/core#>") ("xsd:" "<http://www.w3.org/2001/XMLSchema#>"))) (inputs (list dump-genofreeze |