From 0e9401165a0c3ad2b891c01efc485431f3229ef8 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Thu, 20 Jul 2023 09:25:18 +0300 Subject: Update metadata information Signed-off-by: Munyoki Kilyungi --- rdf-documentation/dump-gene-chip.md | 100 ++++++++++++++ rdf-documentation/dump-genotype.md | 112 +++++++++++++++ rdf-documentation/dump-info-pages.md | 203 +++++++++++----------------- rdf-documentation/dump-phenotype.md | 122 +++++++++++++++++ rdf-documentation/dump-probeset-metadata.md | 58 ++++++++ rdf-documentation/dump-publication.md | 44 +++--- rdf-documentation/dump-species-metadata.md | 112 ++++++--------- rdf-documentation/dump-tissue.md | 16 +-- 8 files changed, 536 insertions(+), 231 deletions(-) create mode 100644 rdf-documentation/dump-gene-chip.md create mode 100644 rdf-documentation/dump-genotype.md create mode 100644 rdf-documentation/dump-phenotype.md create mode 100644 rdf-documentation/dump-probeset-metadata.md diff --git a/rdf-documentation/dump-gene-chip.md b/rdf-documentation/dump-gene-chip.md new file mode 100644 index 0000000..e25573a --- /dev/null +++ b/rdf-documentation/dump-gene-chip.md @@ -0,0 +1,100 @@ +# Probeset freeze metadata +## 'dump-gene-chip' + + +## Generated Triples: + +The following SQL query was executed: + +```sql +SELECT GeneChip.Name, GeneChip.GeneChipName, GeneChip.GeoPlatform FROM GeneChip +``` + +The above query results to triples that have the form: + +```text +gn:platform_genechip_name -> rdf:type -> gn:platform +gn:platform_genechip_name -> gn-term:name -> GeneChip(GeneChipName) +gn:platform_genechip_name -> gn-term:geoPlatform -> geoSeries:GeneChip(GeoPlatform) +``` +Here's an example query: + +```sparql +@prefix geoSeries: . +@prefix gn: . +@prefix gn-term: . +@prefix rdf: . +@prefix rdfs: . +@prefix xsd: . + +SELECT ?s ?p ?o WHERE { + ?s rdf:type gn:platform . + ?s gn-term:name "Affy Mouse Genome U74Av2 (GPL81)" . + ?s gn-term:geoPlatform geoSeries:GPL81 . + ?s ?p ?o . +} +``` + +Expected Result: + +```rdf +gn:platform_mg_u74av2 rdf:type gn:platform . +gn:platform_mg_u74av2 gn-term:name "Affy Mouse Genome U74Av2 (GPL81)" . +gn:platform_mg_u74av2 gn-term:geoPlatform geoSeries:GPL81 . +``` + + +## 'dump-probesetfreeze' + + +## Generated Triples: + +The following SQL query was executed: + +```sql +SELECT ProbeSetFreeze.Name, AvgMethod.Name, ProbeSetFreeze.FullName, ProbeSetFreeze.ShortName, ProbeSetFreeze.CreateTime, ProbeSetFreeze.DataScale, Tissue.Short_Name, InbredSet.Name AS InbredSetName FROM ProbeSetFreeze LEFT JOIN InfoFiles ON InfoFiles.InfoPageName = ProbeSetFreeze.Name LEFT JOIN ProbeFreeze USING (ProbeFreezeId) LEFT JOIN AvgMethod ON AvgMethod.AvgMethodId = ProbeSetFreeze.AvgID LEFT JOIN InbredSet ON ProbeFreeze.InbredSetId = InbredSet.Id LEFT JOIN Tissue ON ProbeFreeze.TissueId = Tissue.TissueId WHERE ProbeSetFreeze.public > 0 AND InfoFiles.InfoPageName IS NULL GROUP BY ProbeFreeze.Id +``` + +The above query results to triples that have the form: + +```text +gn:Probesetfreeze_name_ -> rdf:type -> gn:probesetDataset +gn:Probesetfreeze_name_ -> gn-term:avgMethod -> gn:avgmethod_avgmethod_name +gn:Probesetfreeze_name_ -> gn-term:fullName -> ProbeSetFreeze(FullName) +gn:Probesetfreeze_name_ -> gn-term:shortName -> ProbeSetFreeze(ShortName) +gn:Probesetfreeze_name_ -> dct:created -> "ProbeSetFreeze(CreateTime)"^^xsd:datetime +gn:Probesetfreeze_name_ -> gn-term:dataScale -> ProbeSetFreeze(DataScale) +gn:Probesetfreeze_name_ -> gn-term:tissueName -> gn:tissue_tissue_short_name +gn:Probesetfreeze_name_ -> gn-term:datasetOfInbredSet -> gn:inbredSet_inbredset_inbredsetname +``` +Here's an example query: + +```sparql +@prefix geoSeries: . +@prefix gn: . +@prefix gn-term: . +@prefix rdf: . +@prefix rdfs: . +@prefix xsd: . + +SELECT ?s ?p ?o WHERE { + ?s rdf:type gn:probesetDataset . + ?s gn-term:avgMethod gn:avgmethod_cmmtubcbxdp00cerilm0513 . + ?s gn-term:fullName "UBC/CMMT BXD P0 Cerebellum ILM Mouse WG-6 v2.0 (May13) RankInv" . + ?s ?p ?o . +} +``` + +Expected Result: + +```rdf +gn:Cmmtubcbxdp00cerilm0513 rdf:type gn:probesetDataset . +gn:Cmmtubcbxdp00cerilm0513 gn-term:avgMethod gn:avgmethod_cmmtubcbxdp00cerilm0513 . +gn:Cmmtubcbxdp00cerilm0513 gn-term:fullName "UBC/CMMT BXD P0 Cerebellum ILM Mouse WG-6 v2.0 (May13) RankInv" . +gn:Cmmtubcbxdp00cerilm0513 gn-term:shortName "UBC/CMMT BXD P0 Cerebellum ILM Mouse WG-6 v2.0 (May13) RankInv" . +gn:Cmmtubcbxdp00cerilm0513 dct:created "2013-04-22"^^xsd:datetime . +gn:Cmmtubcbxdp00cerilm0513 gn-term:dataScale "log2" . +gn:Cmmtubcbxdp00cerilm0513 gn-term:tissueName gn:tissue_cb . +gn:Cmmtubcbxdp00cerilm0513 gn-term:datasetOfInbredSet gn:inbredSet_bxd . +``` + diff --git a/rdf-documentation/dump-genotype.md b/rdf-documentation/dump-genotype.md new file mode 100644 index 0000000..cc35bc4 --- /dev/null +++ b/rdf-documentation/dump-genotype.md @@ -0,0 +1,112 @@ +# Genotype Metadata +## 'dump-genofreeze' + + +## Generated Triples: + +The following SQL query was executed: + +```sql +SELECT GenoFreeze.Name, GenoFreeze.Name, GenoFreeze.FullName, GenoFreeze.ShortName, GenoFreeze.CreateTime, InbredSet.Name AS InbredSetName FROM GenoFreeze LEFT JOIN InfoFiles ON InfoFiles.InfoPageName = GenoFreeze.Name LEFT JOIN InbredSet ON GenoFreeze.InbredSetId = InbredSet.InbredSetId WHERE GenoFreeze.public > 0 AND GenoFreeze.confidentiality < 1 AND InfoFiles.InfoPageName IS NULL +``` + +The above query results to triples that have the form: + +```text +gn:Genofreeze_name_ -> rdf:type -> gn:genotypeDataset +gn:Genofreeze_name_ -> gn-term:name -> GenoFreeze(Name) +gn:Genofreeze_name_ -> gn-term:fullName -> GenoFreeze(FullName) +gn:Genofreeze_name_ -> gn-term:shortName -> GenoFreeze(ShortName) +gn:Genofreeze_name_ -> dct:created -> "GenoFreeze(CreateTime)"^^xsd:date +gn:Genofreeze_name_ -> gn-term:datasetOfInbredSet -> gn:_inbredset_inbredsetname +``` +Here's an example query: + +```sparql +@prefix gn: . +@prefix gn-term: . +@prefix rdf: . +@prefix rdfs: . +@prefix xsd: . + +SELECT ?s ?p ?o WHERE { + ?s rdf:type gn:genotypeDataset . + ?s gn-term:name "B6D2RIGeno" . + ?s gn-term:fullName "B6D2RI Genotypes" . + ?s ?p ?o . +} +``` + +Expected Result: + +```rdf +gn:B6d2rigeno rdf:type gn:genotypeDataset . +gn:B6d2rigeno gn-term:name "B6D2RIGeno" . +gn:B6d2rigeno gn-term:fullName "B6D2RI Genotypes" . +gn:B6d2rigeno gn-term:shortName "B6D2RIGeno" . +gn:B6d2rigeno dct:created "2022-10-24"^^xsd:date . +gn:B6d2rigeno gn-term:datasetOfInbredSet gn:_b6d2ri . +``` + + +## 'dump-genotypes' + + +## Generated Triples: + +The following SQL query was executed: + +```sql +SELECT CONCAT(IF(GenoFreeze.Name IS NULL, '', CONCAT(GenoFreeze.Name, ':')), Geno.Name) AS abbrev, Geno.Name, Geno.Marker_Name, Geno.Chr, IFNULL(Geno.Mb, '') AS Mb, Geno.Sequence, Geno.Source, Geno.Source2, IFNULL(GenoFreeze.Name, '') AS DatasetName, IFNULL(Geno.chr_num, '') AS chr_num, CAST(CONVERT(BINARY CONVERT(Geno.Comments USING latin1) USING utf8) AS VARCHAR(255)) AS Comments, IFNULL(GenoXRef.cM, '') AS Chr_mm8 FROM Geno LEFT JOIN GenoXRef ON Geno.Id = GenoXRef.GenoId LEFT JOIN GenoFreeze ON GenoFreeze.Id = GenoXRef.GenoFreezeId LEFT JOIN InfoFiles ON InfoFiles.InfoPageName = GenoFreeze.Name +``` + +The above query results to triples that have the form: + +```text +gn:Abbrev -> rdf:type -> gn:genotype +gn:Abbrev -> gn-term:name -> GenoName +gn:Abbrev -> gn-term:markerName -> GenoMarker_Name +gn:Abbrev -> gn-term:chr -> Geno(Chr) +gn:Abbrev -> gn-term:mb -> "Mb"^^xsd:double +gn:Abbrev -> gn-term:sequence -> Geno(Sequence) +gn:Abbrev -> gn-term:source -> Geno(Source) +gn:Abbrev -> gn-term:source2 -> Geno(Source2) +gn:Abbrev -> gn-term:genotypeOfDataset -> gn:Datasetname +gn:Abbrev -> gn-term:chrNum -> "chr_num"^^xsd:int +gn:Abbrev -> gn:comments -> Comments +gn:Abbrev -> gn-term:cM -> "Chr_mm8"^^xsd:int +``` +Here's an example query: + +```sparql +@prefix gn: . +@prefix gn-term: . +@prefix rdf: . +@prefix rdfs: . +@prefix xsd: . + +SELECT ?s ?p ?o WHERE { + ?s rdf:type gn:genotype . + ?s gn-term:name "D1Mit296" . + ?s gn-term:markerName "D1Mit296" . + ?s gn-term:chr "1" . + ?s ?p ?o . +} +``` + +Expected Result: + +```rdf +gn:Axbxageno:d1mit296 rdf:type gn:genotype . +gn:Axbxageno:d1mit296 gn-term:name "D1Mit296" . +gn:Axbxageno:d1mit296 gn-term:markerName "D1Mit296" . +gn:Axbxageno:d1mit296 gn-term:chr "1" . +gn:Axbxageno:d1mit296 gn-term:mb "9.749729"^^xsd:double . +gn:Axbxageno:d1mit296 gn-term:sequence "CTTGCATGCCTGCGGNTNCGNACTCTAGAGGATCTCCCTATTATTNTNACATNACTTTNAATTAAAATAATAATCAGATAACTTCAACNNNNTGNNCACTTCTGTCAAGTGGACAGAAATAAACATAGAGCCTAATTATCCTGAATTTNAGAGAAAAGAGTGTGTTTANCACAANAGAACAGTTATAGATCTACACACACACACACACACACACACACACACACATACAGTTTGAAAAATGCATCAGTTGAGACC" . +gn:Axbxageno:d1mit296 gn-term:source "Mit" . +gn:Axbxageno:d1mit296 gn-term:source2 "Mit" . +gn:Axbxageno:d1mit296 gn-term:genotypeOfDataset gn:Axbxageno . +gn:Axbxageno:d1mit296 gn-term:chrNum "1"^^xsd:int . +gn:Axbxageno:d1mit296 gn-term:cM "0"^^xsd:int . +``` + diff --git a/rdf-documentation/dump-info-pages.md b/rdf-documentation/dump-info-pages.md index e51f33b..7cd2ecd 100644 --- a/rdf-documentation/dump-info-pages.md +++ b/rdf-documentation/dump-info-pages.md @@ -1,42 +1,7 @@ # Info files / Investigators Metadata ## 'dump-info-files' -## Schema Triples: -```text -gn:dataset -> rdfs:range -> rdfs:Literal -gn:datasetOfInvestigator -> rdfs:domain -> gn:dataset -gn:datasetOfOrganization -> rdfs:domain -> gn:dataset -gn:datasetOfInvestigator -> rdfs:range -> foaf:Person -gn:datasetOfInbredSet -> rdfs:domain -> gn:dataset -gn:datasetOfInbredSet -> rdfs:range -> gn:inbredSet -gn:datasetOfSpecies -> rdfs:domain -> gn:dataset -gn:datasetOfSpecies -> rdfs:range -> gn:inbredSet -gn:datasetOfTissue -> rdfs:domain -> gn:dataset -gn:datasetOfTissue -> rdfs:range -> gn:tissue -gn:normalization -> rdfs:domain -> gn:dataset -gn:normalization -> rdfs:range -> gn:avgMethod -gn:datasetOfPlatform -> rdfs:domain -> gn:dataset -gn:datasetOfPlatform -> rdfs:range -> gn:geneChip -gn:accessionId -> rdfs:range -> rdfs:Literal -gn:datasetStatusName -> rdfs:range -> rdfs:Literal -gn:summary -> rdfs:range -> rdfs:Literal -gn:aboutTissue -> rdfs:range -> rdfs:Literal -gn:geoSeries -> rdfs:range -> rdfs:Literal -gn:name -> rdfs:range -> rdfs:Literal -gn:title -> rdfs:range -> rdfs:Literal -gn:publicationTitle -> rdfs:range -> rdfs:Literal -gn:specifics -> rdfs:range -> rdfs:Literal -gn:datasetGroup -> rdfs:range -> rdfs:Literal -gn:aboutCases -> rdfs:range -> rdfs:Literal -gn:aboutPlatform -> rdfs:range -> rdfs:Literal -gn:aboutDataProcessing -> rdfs:range -> rdfs:Literal -gn:notes -> rdfs:range -> rdfs:Literal -gn:experimentDesign -> rdfs:range -> rdfs:Literal -gn:contributors -> rdfs:range -> rdfs:Literal -gn:citation -> rdfs:range -> rdfs:Literal -gn:acknowledgment -> rdfs:range -> rdfs:Literal -``` ## Generated Triples: The following SQL query was executed: @@ -48,53 +13,51 @@ SELECT InfoFiles.InfoPageName, IF(GenoFreeze.Id IS NOT NULL, 'gn:genotypeDataset The above query results to triples that have the form: ```text -dataset:InfoFiles_InfoPageName_ -> rdf:type -> rdfType -dataset:InfoFiles_InfoPageName_ -> gn:name -> InfoFiles(InfoPageName) -dataset:InfoFiles_InfoPageName_ -> gn:fullName -> DatasetFullName -dataset:InfoFiles_InfoPageName_ -> dct:created -> createTimeGenoFreeze -dataset:InfoFiles_InfoPageName_ -> gn:datasetOfInvestigator -> gn:investigator_investigators_firstname__investigators_lastname__investigators_email_ -dataset:InfoFiles_InfoPageName_ -> gn:datasetOfOrganization -> Organizations -dataset:InfoFiles_InfoPageName_ -> gn:accessionId -> GNInfoFiles(GN_AccesionId) -dataset:InfoFiles_InfoPageName_ -> gn:datasetStatusName -> datasetstatus(datasetstatusname) -dataset:InfoFiles_InfoPageName_ -> gn:datasetOfInbredSet -> gn:inbredSet_inbredset_inbredsetname_ -dataset:InfoFiles_InfoPageName_ -> gn:datasetOfTissue -> gn:tissue_tissue_short_name_ -dataset:InfoFiles_InfoPageName_ -> gn:normalization -> gn:avgmethod_avgmethod_avgmethodname_ -dataset:InfoFiles_InfoPageName_ -> gn:datasetOfPlatform -> gn:platform_genechip_genechip_ -dataset:InfoFiles_InfoPageName_ -> gn:summary -> DatasetsSummary -dataset:InfoFiles_InfoPageName_ -> gn:aboutTissue -> DatasetsAboutTissue -dataset:InfoFiles_InfoPageName_ -> gn:geoSeries -> -dataset:InfoFiles_InfoPageName_ -> gn:title -> InfoFiles(InfoFileTitle) -dataset:InfoFiles_InfoPageName_ -> gn:publicationTitle -> Datasets(PublicationTitle) -dataset:InfoFiles_InfoPageName_ -> gn:specifics -> InfoFilesSpecifics -dataset:InfoFiles_InfoPageName_ -> gn:datasetGroup -> Datasets(DatasetGroup) -dataset:InfoFiles_InfoPageName_ -> gn:aboutCases -> AboutCases -dataset:InfoFiles_InfoPageName_ -> gn:aboutPlatform -> AboutPlatform -dataset:InfoFiles_InfoPageName_ -> gn:aboutDataProcessing -> AboutDataProcessing -dataset:InfoFiles_InfoPageName_ -> gn:notes -> GNNotes -dataset:InfoFiles_InfoPageName_ -> gn:experimentDesign -> ExperimentDesign -dataset:InfoFiles_InfoPageName_ -> gn:contributors -> Contributors -dataset:InfoFiles_InfoPageName_ -> gn:citation -> Citation -dataset:InfoFiles_InfoPageName_ -> gn:dataSourceAcknowledgment -> Data_Source_Acknowledge -dataset:InfoFiles_InfoPageName_ -> gn:acknowledgment -> DatasetsAcknowledgment +gn:Infofiles_infopagename_ -> rdf:type -> rdfType +gn:Infofiles_infopagename_ -> gn-term:name -> InfoFiles(InfoPageName) +gn:Infofiles_infopagename_ -> gn-term:fullName -> DatasetFullName +gn:Infofiles_infopagename_ -> dct:created -> createTimeGenoFreeze +gn:Infofiles_infopagename_ -> gn-term:datasetOfInvestigator -> gn:investigator_investigators_firstname_investigators_lastname_investigators_email +gn:Infofiles_infopagename_ -> gn-term:datasetOfOrganization -> Organizations +gn:Infofiles_infopagename_ -> gn-term:accessionId -> GNInfoFiles(GN_AccesionId) +gn:Infofiles_infopagename_ -> gn-term:datasetStatusName -> datasetstatus(datasetstatusname) +gn:Infofiles_infopagename_ -> gn-term:datasetOfInbredSet -> gn:inbredSet_inbredset_inbredsetname +gn:Infofiles_infopagename_ -> gn-term:datasetOfTissue -> gn:tissue_tissue_short_name +gn:Infofiles_infopagename_ -> gn-term:normalization -> gn:avgmethod_avgmethod_avgmethodname +gn:Infofiles_infopagename_ -> gn-term:datasetOfPlatform -> gn:platform_genechip_genechip +gn:Infofiles_infopagename_ -> gn-term:summary -> DatasetsSummary +gn:Infofiles_infopagename_ -> gn-term:aboutTissue -> DatasetsAboutTissue +gn:Infofiles_infopagename_ -> gn-term:geoSeries -> +gn:Infofiles_infopagename_ -> gn-term:title -> InfoFiles(InfoFileTitle) +gn:Infofiles_infopagename_ -> gn-term:publicationTitle -> Datasets(PublicationTitle) +gn:Infofiles_infopagename_ -> gn-term:specifics -> InfoFilesSpecifics +gn:Infofiles_infopagename_ -> gn-term:datasetGroup -> Datasets(DatasetGroup) +gn:Infofiles_infopagename_ -> gn-term:aboutCases -> AboutCases +gn:Infofiles_infopagename_ -> gn-term:aboutPlatform -> AboutPlatform +gn:Infofiles_infopagename_ -> gn-term:aboutDataProcessing -> AboutDataProcessing +gn:Infofiles_infopagename_ -> gn-term:notes -> GNNotes +gn:Infofiles_infopagename_ -> gn-term:experimentDesign -> ExperimentDesign +gn:Infofiles_infopagename_ -> gn-term:contributors -> Contributors +gn:Infofiles_infopagename_ -> gn-term:citation -> Citation +gn:Infofiles_infopagename_ -> gn-term:dataSourceAcknowledgment -> Data_Source_Acknowledge +gn:Infofiles_infopagename_ -> gn-term:acknowledgment -> DatasetsAcknowledgment ``` Here's an example query: ```sparql -PREFIX dct: -PREFIX geoSeries: -PREFIX rdf: -PREFIX rdfs: -PREFIX gn: -PREFIX foaf: -PREFIX taxon: -PREFIX dataset: +@prefix foaf: . +@prefix geoSeries: . +@prefix gn-term: . +@prefix gn: . +@prefix rdf: . +@prefix rdfs: . +@prefix taxon: . +@prefix dct: . SELECT ?s ?p ?o WHERE { ?s rdf:type gn:dataset . - ?s gn:name "Br_U_0803_M" . + ?s gn-term:name "Br_U_0803_M" . ?s dct:created "2003-08-01" . - ?s gn:datasetOfInvestigator gn:investigator_robert_williams_rwilliams_uthsc.edu . - ?s gn:datasetOfOrganization "University of Tennessee Health Science Center" . ?s ?p ?o . } ``` @@ -102,45 +65,33 @@ SELECT ?s ?p ?o WHERE { Expected Result: ```rdf -dataset:Br_U_0803_M rdf:type gn:dataset . -dataset:Br_U_0803_M gn:name "Br_U_0803_M" . -dataset:Br_U_0803_M dct:created "2003-08-01" . -dataset:Br_U_0803_M gn:datasetOfInvestigator gn:investigator_robert_williams_rwilliams_uthsc.edu . -dataset:Br_U_0803_M gn:datasetOfOrganization "University of Tennessee Health Science Center" . -dataset:Br_U_0803_M gn:accessionId "GN1" . -dataset:Br_U_0803_M gn:datasetStatusName "public" . -dataset:Br_U_0803_M gn:datasetOfInbredSet gn:inbredSet_bxd . -dataset:Br_U_0803_M gn:datasetOfTissue gn:tissue_brn . -dataset:Br_U_0803_M gn:normalization gn:avgmethod_mas5 . -dataset:Br_U_0803_M gn:datasetOfPlatform gn:platform_mg_u74av2 . -dataset:Br_U_0803_M gn:summary "

This August 2003 freeze provides estimates of mRNA expression in brains of BXD recombinant inbred mice measured using Affymetrix U74Av2 microarrays. This is data set includes six arrays which are of marginal quality. New users are encouraged to use one of the more recent data sets December 2003 or March 2004 from which these six arrays have been excluded. Data were generated at the University of Tennessee Health Science Center UTHSC. Over 300 brain samples from 35 strains were hybridized in small pools n=3 to 106 arrays. Data were processed using the Microarray Suite 5 MAS 5 protocol of Affymetrix. To simplify comparison between transforms, MAS 5 values of each array were adjusted to an average of 8 units and a variance of 2 units. In general, the MAS 5 transform does not perform as well as RMA, PDNN, or the new heritability weighted transforms HW1PM.

" . -dataset:Br_U_0803_M gn:aboutTissue "

Each array was hybridized with labeled cRNA generated from a pool of three brains from adult animals usually of the same age and always of the same sex. The brain region included most of the forebrain and midbrain, bilaterally. However, the sample excluded the olfactory bulbs, retinas, or the posterior pituitary all formally part of the forebrain. A total of 100 such pooled samples were arrayed: 74 from females and 26 from males. Animals ranged in age from 56 to 441 days, usually with a balanced design: one pool at approximately 8 weeks, one pool at approximately 20 weeks, and one pool at approximately 1 year. Strain averages of mRNA expression level are therefore typically based on three pooled biological replicate arrays. This data set does not incorporate statistical adjustment for possible effects of age and sex. Users can select the strain symbol in the table above to review details about the specific cases and array processing center DP = Divyen Patel at Genome Explorations, Inc; TS = Thomas Sutter at University of Memphis. You can also click on the individual symbols males or females to view the array image.

" . -dataset:Br_U_0803_M gn:title "UTHSC Brain mRNA U74Av2 (Aug03) MAS5" . -dataset:Br_U_0803_M gn:datasetGroup "UTHSC Brain mRNA U74Av2 (Aug-Sep03)" . -dataset:Br_U_0803_M gn:aboutCases "

This data set includes estimate of gene expression for 35 genetically uniform lines of mice: C57BL/6J B6, or simply B, DBA/2J D2 or D, their B6D2 F1 intercross, and 32 BXD recombinant inbred RI strains derived by crossing female B6 mice with male D2 mice and then inbreeding progeny for over 21 generations. This set of RI strains is a remarkable resource because many of these strains have been extensively phenotyped for hundreds of interesting traits over a 25-year period. A significant advantage of this RI set is that the two parental strains B6 and D2 have both been extensively sequenced and are known to differ at approximately 1.8 million SNPs. Coding variants mostly single nucleotide polymorphisms and insertion-deletions that may produce interesting phenotypes can be rapidly identified in this particular RI set.

\r\n\r\n

BXD1 through BXD32 were produced by Benjamin A. Taylor starting in the late 1970s. BXD33 through BXD42 were also produced by Taylor, but from a second set of crosses initiated in the early 1990s. These strains are all available from the Jackson Laboratory, Bar Harbor, Maine. BXD43 through BXD99 were produced by Lu Lu, Jeremy Peirce, Lee M. Silver, and Robert W. Williams in the late 1990s and early 2000s using advanced intercross progeny Peirce et al. 2004. Only two of these incipient strains are included in the current database BXD67 and BXD68.

\r\n\r\n

In this mRNA expression database we generally used progeny of stock obtained from The Jackson Laboratory between 1999 and 2001. Animals were generated in-house at the University of Alabama by John Mountz and Hui-Chen Hsu and at the University of Tennessee Health Science Center by Lu Lu and Robert Williams.

\r\n\r\n

The table below lists the arrays by strain, sex, and age. Each array was hybridized to a pool of mRNA from three mice. Note that this table includes six arrays dropped from the December 2003 data sets BXD6, n=2; BXD12, BXD16, BXD40, and BXD67, n=1 each.

\r\n\r\n\r\n \r\n \r\n
\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n
Strain\r\n

Age

\r\n
Strain\r\n

Age

\r\n
\r\n

8 Wks

\r\n
\r\n

20 Wks

\r\n
\r\n

52 Wks

\r\n
\r\n

8 Wks

\r\n
\r\n

20 Wks

\r\n
\r\n

52 Wks

\r\n
C57BL/6J B6???????????????DBA/2J D2???????????? 
B6D2F1 F1??? ?????? BXD1?????? ???
BXD2?????????BXD5?????????  
BXD6?????????BXD8????????? 
BXD9?????????BXD11?????? ???
BXD12 ?????????BXD13???   
BXD14 ?????????BXD15??? ???
BXD16????????? BXD18?????????
BXD19?????????BXD21???????????? 
BXD22??????http://genome.ucsc.edu/cgi-bin/hgBlat?command=start&org=mouse. We thank Yan Cui UTHSC for allowing us to use his Linux cluster to perform this analysis. It is possiible to confirm the BLAT alignment results yourself simply by clicking on the Verify link in the Trait Data and Editing Form right side of the Location line.

" . -dataset:Br_U_0803_M gn:aboutDataProcessing "
Probe cell level data from the CEL file: Probe signal intensity estimates in the Affymetrix CEL files are the 75% quantile value taken from a set of 36 6x6 pixels per probe cell in the DAT image file.\r\n
    \r\n
  • Step 1: We added an offset of 1.0 to the CEL expression values for each cell to ensure that all values could be logged without generating negative values.
  • \r\n
  • Step 2: We took the log2 of each cell signal intensity.
  • \r\n
  • Step 3: We computed the Z score for each of these log2 cell signal intensity values within a single array.
  • \r\n
  • Step 4: We multiplied all Z scores by 2.
  • \r\n
  • Step 5: We added a constant of 8 units to the value of the Z score. The consequence of this simple set of transformations is to produce a set of Z scores that have a mean of 8 units, a variance of 4 units, and a standard deviation of 2 units. The advantage of this modified Z score is that a 2-fold difference in expression level corresponds roughly to 1 unit.
  • \r\n
  • Step 6: We computed the arithmetic mean of the values for the set of microarrays for each strain. We have not corrected for variance introduced by sex, age, source of animals, or any possible interaction. We have not corrected for background beyond that implemented by Affymetrix in generating the CEL file.
  • \r\n
\r\nProbe set data from the CHP file: Probe set estimates of expression were initi" . -dataset:Br_U_0803_M gn:notes "

This text file originally generated by RWW, EJC, and YHQ, August 2003. Updated by RWW, October 30, 2004.

" . -dataset:Br_U_0803_M gn:dataSourceAcknowledgment "

Data were generated with funds to RWW from the Dunavant Chair of\r\nExcellence, University of Tennessee Health Science Center, Department\r\nof Pediatrics. The majority of arrays were processed at Genome Explorations by Divyen Patel. We thank Guomin Zhou for generating advanced intercross stock used to produce most of the new BXD RI strains.\r\n

" . -dataset:Br_U_0803_M gn:acknowledgment "

Data were generated with funds to RWW from the Dunavant Chair of Excellence, University of Tennessee Health Science Center, Department of Pediatrics. The majority of arrays were processed at Genome Explorations by Divyen Patel. We thank Guomin Zhou for generating advanced intercross stock used to produce most of the new BXD RI strains.

" . +gn:Br_u_0803_m rdf:type gn:dataset . +gn:Br_u_0803_m gn-term:name "Br_U_0803_M" . +gn:Br_u_0803_m dct:created "2003-08-01" . +gn:Br_u_0803_m gn-term:datasetOfInvestigator gn:investigator_robert_williams_rwilliams_uthsc.edu . +gn:Br_u_0803_m gn-term:datasetOfOrganization "University of Tennessee Health Science Center" . +gn:Br_u_0803_m gn-term:accessionId "GN1" . +gn:Br_u_0803_m gn-term:datasetStatusName "public" . +gn:Br_u_0803_m gn-term:datasetOfInbredSet gn:inbredSet_bxd . +gn:Br_u_0803_m gn-term:datasetOfTissue gn:tissue_brn . +gn:Br_u_0803_m gn-term:normalization gn:avgmethod_mas5 . +gn:Br_u_0803_m gn-term:datasetOfPlatform gn:platform_mg_u74av2 . +gn:Br_u_0803_m gn-term:summary "

This August 2003 freeze provides estimates of mRNA expression in brains of BXD recombinant inbred mice measured using Affymetrix U74Av2 microarrays. This is data set includes six arrays which are of marginal quality. New users are encouraged to use one of the more recent data sets December 2003 or March 2004 from which these six arrays have been excluded. Data were generated at the University of Tennessee Health Science Center UTHSC. Over 300 brain samples from 35 strains were hybridized in small pools n=3 to 106 arrays. Data were processed using the Microarray Suite 5 MAS 5 protocol of Affymetrix. To simplify comparison between transforms, MAS 5 values of each array were adjusted to an average of 8 units and a variance of 2 units. In general, the MAS 5 transform does not perform as well as RMA, PDNN, or the new heritability weighted transforms HW1PM.

" . +gn:Br_u_0803_m gn-term:aboutTissue "

Each array was hybridized with labeled cRNA generated from a pool of three brains from adult animals usually of the same age and always of the same sex. The brain region included most of the forebrain and midbrain, bilaterally. However, the sample excluded the olfactory bulbs, retinas, or the posterior pituitary all formally part of the forebrain. A total of 100 such pooled samples were arrayed: 74 from females and 26 from males. Animals ranged in age from 56 to 441 days, usually with a balanced design: one pool at approximately 8 weeks, one pool at approximately 20 weeks, and one pool at approximately 1 year. Strain averages of mRNA expression level are therefore typically based on three pooled biological replicate arrays. This data set does not incorporate statistical adjustment for possible effects of age and sex. Users can select the strain symbol in the table above to review details about the specific cases and array processing center DP = Divyen Patel at Genome Explorations, Inc; TS = Thomas Sutter at University of Memphis. You can also click on the individual symbols males or females to view the array image.

" . +gn:Br_u_0803_m gn-term:title "UTHSC Brain mRNA U74Av2 (Aug03) MAS5" . +gn:Br_u_0803_m gn-term:datasetGroup "UTHSC Brain mRNA U74Av2 (Aug-Sep03)" . +gn:Br_u_0803_m gn-term:aboutCases "

This data set includes estimate of gene expression for 35 genetically uniform lines of mice: C57BL/6J B6, or simply B, DBA/2J D2 or D, their B6D2 F1 intercross, and 32 BXD recombinant inbred RI strains derived by crossing female B6 mice with male D2 mice and then inbreeding progeny for over 21 generations. This set of RI strains is a remarkable resource because many of these strains have been extensively phenotyped for hundreds of interesting traits over a 25-year period. A significant advantage of this RI set is that the two parental strains B6 and D2 have both been extensively sequenced and are known to differ at approximately 1.8 million SNPs. Coding variants mostly single nucleotide polymorphisms and insertion-deletions that may produce interesting phenotypes can be rapidly identified in this particular RI set.

\r\n\r\n

BXD1 through BXD32 were produced by Benjamin A. Taylor starting in the late 1970s. BXD33 through BXD42 were also produced by Taylor, but from a second set of crosses initiated in the early 1990s. These strains are all available from the Jackson Laboratory, Bar Harbor, Maine. BXD43 through BXD99 were produced by Lu Lu, Jeremy Peirce, Lee M. Silver, and Robert W. Williams in the late 1990s and early 2000s using advanced intercross progeny Peirce et al. 2004. Only two of these incipient strains are included in the current database BXD67 and BXD68.

\r\n\r\n

In this mRNA expression database we generally used progeny of stock obtained from The Jackson Laboratory between 1999 and 2001. Animals were generated in-house at the University of Alabama by John Mountz and Hui-Chen Hsu and at the University of Tennessee Health Science Center by Lu Lu and Robert Williams.

\r\n\r\n

The table below lists the arrays by strain, sex, and age. Each array was hybridized to a pool of mRNA from three mice. Note that this table includes six arrays dropped from the December 2003 data sets BXD6, n=2; BXD12, BXD16, BXD40, and BXD67, n=1 each.

\r\n\r\n\r\n \r\n \r\n
\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n
Strain\r\n

Age

\r\n
Strain\r\n

Age

\r\n
\r\n

8 Wks

\r\n
\r\n

20 Wks

\r\n
\r\n

52 Wks

\r\n
\r\n

8 Wks

\r\n
\r\n

20 Wks

\r\n
\r\n

52 Wks

\r\n
C57BL/6J B6???????????????DBA/2J D2???????????? 
B6D2F1 F1??? ?????? BXD1?????? ???
BXD2?????????BXD5?????????  
BXD6?????????BXD8????????? 
BXD9?????????BXD11?????? ???
BXD12 ?????????BXD13???   
BXD14 ?????????BXD15??? ???
BXD16????????? BXD18?????????
BXD19?????????BXD21???????????? 
BXD22??????http://genome.ucsc.edu/cgi-bin/hgBlat?command=start&org=mouse. We thank Yan Cui UTHSC for allowing us to use his Linux cluster to perform this analysis. It is possiible to confirm the BLAT alignment results yourself simply by clicking on the Verify link in the Trait Data and Editing Form right side of the Location line.

" . +gn:Br_u_0803_m gn-term:aboutDataProcessing "
Probe cell level data from the CEL file: Probe signal intensity estimates in the Affymetrix CEL files are the 75% quantile value taken from a set of 36 6x6 pixels per probe cell in the DAT image file.\r\n
    \r\n
  • Step 1: We added an offset of 1.0 to the CEL expression values for each cell to ensure that all values could be logged without generating negative values.
  • \r\n
  • Step 2: We took the log2 of each cell signal intensity.
  • \r\n
  • Step 3: We computed the Z score for each of these log2 cell signal intensity values within a single array.
  • \r\n
  • Step 4: We multiplied all Z scores by 2.
  • \r\n
  • Step 5: We added a constant of 8 units to the value of the Z score. The consequence of this simple set of transformations is to produce a set of Z scores that have a mean of 8 units, a variance of 4 units, and a standard deviation of 2 units. The advantage of this modified Z score is that a 2-fold difference in expression level corresponds roughly to 1 unit.
  • \r\n
  • Step 6: We computed the arithmetic mean of the values for the set of microarrays for each strain. We have not corrected for variance introduced by sex, age, source of animals, or any possible interaction. We have not corrected for background beyond that implemented by Affymetrix in generating the CEL file.
  • \r\n
\r\nProbe set data from the CHP file: Probe set estimates of expression were initi" . +gn:Br_u_0803_m gn-term:notes "

This text file originally generated by RWW, EJC, and YHQ, August 2003. Updated by RWW, October 30, 2004.

" . +gn:Br_u_0803_m gn-term:dataSourceAcknowledgment "

Data were generated with funds to RWW from the Dunavant Chair of\r\nExcellence, University of Tennessee Health Science Center, Department\r\nof Pediatrics. The majority of arrays were processed at Genome Explorations by Divyen Patel. We thank Guomin Zhou for generating advanced intercross stock used to produce most of the new BXD RI strains.\r\n

" . +gn:Br_u_0803_m gn-term:acknowledgment "

Data were generated with funds to RWW from the Dunavant Chair of Excellence, University of Tennessee Health Science Center, Department of Pediatrics. The majority of arrays were processed at Genome Explorations by Divyen Patel. We thank Guomin Zhou for generating advanced intercross stock used to produce most of the new BXD RI strains.

" . ``` ## 'dump-investigators' -## Schema Triples: -```text -foaf:name -> rdfs:range -> rdfs:Literal -foaf:givenName -> rdfs:range -> rdfs:Literal -foaf:familyName -> rdfs:range -> rdfs:Literal -foaf:homepage -> rdfs:range -> rdfs:Literal -gn:address -> rdfs:range -> rdfs:Literal -gn:city -> rdfs:range -> rdfs:Literal -gn:state -> rdfs:range -> rdfs:Literal -gn:zipCode -> rdfs:range -> rdfs:Literal -gn:country -> rdfs:range -> rdfs:Literal -``` ## Generated Triples: The following SQL query was executed: @@ -152,28 +103,28 @@ SELECT Investigators.FirstName, Investigators.LastName, Investigators.Email, Inv The above query results to triples that have the form: ```text -gn:investigator_investigators_firstname__investigators_lastname__investigators_email_ -> rdf:type -> foaf:Person -gn:investigator_investigators_firstname__investigators_lastname__investigators_email_ -> foaf:name -> Investigators(FirstName) Investigators(LastName) -gn:investigator_investigators_firstname__investigators_lastname__investigators_email_ -> foaf:givenName -> FirstName -gn:investigator_investigators_firstname__investigators_lastname__investigators_email_ -> foaf:familyName -> LastName -gn:investigator_investigators_firstname__investigators_lastname__investigators_email_ -> foaf:homepage -> Investigators(Url) -gn:investigator_investigators_firstname__investigators_lastname__investigators_email_ -> gn:address -> Investigators(Address) -gn:investigator_investigators_firstname__investigators_lastname__investigators_email_ -> gn:city -> Investigators(City) -gn:investigator_investigators_firstname__investigators_lastname__investigators_email_ -> gn:state -> Investigators(State) -gn:investigator_investigators_firstname__investigators_lastname__investigators_email_ -> gn:zipCode -> Investigators(ZipCode) -gn:investigator_investigators_firstname__investigators_lastname__investigators_email_ -> gn:country -> Investigators(Country) +gn:investigator_investigators_firstname_investigators_lastname_investigators_email -> rdf:type -> foaf:Person +gn:investigator_investigators_firstname_investigators_lastname_investigators_email -> foaf:name -> Investigators(FirstName) Investigators(LastName) +gn:investigator_investigators_firstname_investigators_lastname_investigators_email -> foaf:givenName -> FirstName +gn:investigator_investigators_firstname_investigators_lastname_investigators_email -> foaf:familyName -> LastName +gn:investigator_investigators_firstname_investigators_lastname_investigators_email -> foaf:homepage -> Investigators(Url) +gn:investigator_investigators_firstname_investigators_lastname_investigators_email -> gn-term:address -> Investigators(Address) +gn:investigator_investigators_firstname_investigators_lastname_investigators_email -> gn-term:city -> Investigators(City) +gn:investigator_investigators_firstname_investigators_lastname_investigators_email -> gn-term:state -> Investigators(State) +gn:investigator_investigators_firstname_investigators_lastname_investigators_email -> gn-term:zipCode -> Investigators(ZipCode) +gn:investigator_investigators_firstname_investigators_lastname_investigators_email -> gn-term:country -> Investigators(Country) ``` Here's an example query: ```sparql -PREFIX dct: -PREFIX geoSeries: -PREFIX rdf: -PREFIX rdfs: -PREFIX gn: -PREFIX foaf: -PREFIX taxon: -PREFIX dataset: +@prefix foaf: . +@prefix geoSeries: . +@prefix gn-term: . +@prefix gn: . +@prefix rdf: . +@prefix rdfs: . +@prefix taxon: . +@prefix dct: . SELECT ?s ?p ?o WHERE { ?s rdf:type foaf:Person . @@ -191,6 +142,6 @@ gn:investigator_evan_williams_ rdf:type foaf:Person . gn:investigator_evan_williams_ foaf:name "Evan Williams" . gn:investigator_evan_williams_ foaf:givenName "Evan" . gn:investigator_evan_williams_ foaf:familyName "Williams" . -gn:investigator_evan_williams_ gn:country "Switzerland" . +gn:investigator_evan_williams_ gn-term:country "Switzerland" . ``` diff --git a/rdf-documentation/dump-phenotype.md b/rdf-documentation/dump-phenotype.md new file mode 100644 index 0000000..62e5488 --- /dev/null +++ b/rdf-documentation/dump-phenotype.md @@ -0,0 +1,122 @@ +# Phenotypes Metadata +## 'dump-publishfreeze' + + +## Generated Triples: + +The following SQL query was executed: + +```sql +SELECT PublishFreeze.Name, PublishFreeze.Name, PublishFreeze.FullName, PublishFreeze.ShortName, PublishFreeze.CreateTime, InbredSet.Name AS InbredSetName FROM PublishFreeze LEFT JOIN InfoFiles ON InfoFiles.InfoPageName = PublishFreeze.Name LEFT JOIN InbredSet ON PublishFreeze.InbredSetId = InbredSet.InbredSetId WHERE PublishFreeze.public > 0 AND PublishFreeze.confidentiality < 1 AND InfoFiles.InfoPageName IS NULL +``` + +The above query results to triples that have the form: + +```text +gn:Publishfreeze_name_ -> rdf:type -> gn:phenotypeDataset +gn:Publishfreeze_name_ -> gn-term:name -> PublishFreeze(Name) +gn:Publishfreeze_name_ -> gn-term:fullName -> PublishFreeze(FullName) +gn:Publishfreeze_name_ -> gn-term:shortName -> PublishFreeze(ShortName) +gn:Publishfreeze_name_ -> dc-termt:created -> "PublishFreeze(CreateTime)"^^xsd:date +gn:Publishfreeze_name_ -> gn-term:datasetOfInbredSet -> gn:inbredSet_inbredset_inbredsetname +``` +Here's an example query: + +```sparql +@prefix gn: . +@prefix gn-term: . +@prefix rdf: . +@prefix rdfs: . +@prefix xsd: . +@prefix pubmed: . + +SELECT ?s ?p ?o WHERE { + ?s rdf:type gn:phenotypeDataset . + ?s gn-term:name "B6D2F2-PSUPublish" . + ?s gn-term:fullName "B6D2F2 PSU Phenotypes" . + ?s ?p ?o . +} +``` + +Expected Result: + +```rdf +gn:B6d2f2_psupublish rdf:type gn:phenotypeDataset . +gn:B6d2f2_psupublish gn-term:name "B6D2F2-PSUPublish" . +gn:B6d2f2_psupublish gn-term:fullName "B6D2F2 PSU Phenotypes" . +gn:B6d2f2_psupublish gn-term:shortName "B6D2F2 PSU Publish" . +gn:B6d2f2_psupublish dc-termt:created "2015-03-18"^^xsd:date . +gn:B6d2f2_psupublish gn-term:datasetOfInbredSet gn:inbredSet_b6d2f2-psu . +``` + + +## 'dump-phenotypes' + + +## Generated Triples: + +The following SQL query was executed: + +```sql +SELECT CONCAT(IF(PublishFreeze.Name IS NULL, '', CONCAT(PublishFreeze.Name, '_')), IF(Phenotype.Post_publication_abbreviation IS NULL, IF(Phenotype.Pre_publication_abbreviation IS NULL, Phenotype.Id, Pre_publication_abbreviation), Phenotype.Post_publication_abbreviation)) AS abbrev, CAST(CONVERT(BINARY CONVERT(IF(Phenotype.Post_publication_abbreviation IS NULL, IF(Phenotype.Pre_publication_abbreviation IS NULL, Phenotype.Id, Phenotype.Pre_publication_abbreviation), Phenotype.Post_publication_abbreviation) USING latin1) USING utf8) AS VARCHAR(100)) AS PhenotypeName, CAST(CONVERT(BINARY CONVERT(Phenotype.Post_publication_description USING latin1) USING utf8) AS CHAR(10000)) AS postPubDescr, Phenotype.Original_description, CAST(CONVERT(BINARY CONVERT(Phenotype.Pre_publication_description USING latin1) USING utf8) AS VARCHAR(15000)) AS prePubDesc, Phenotype.Pre_publication_abbreviation, Phenotype.Post_publication_abbreviation, Phenotype.Lab_code, Phenotype.Submitter, Phenotype.Owner, IFNULL(PublishXRef.mean, '') AS mean, PublishXRef.Locus, IFNULL(PublishXRef.LRS, '') AS lrs, IFNULL(PublishXRef.additive, '') AS additive, PublishXRef.Sequence, IFNULL(InfoFiles.InfoPageName, IFNULL(PublishFreeze.Name, '')) AS DatasetName, IF(Publication.PubMed_ID IS NULL, '', CONVERT(Publication.PubMed_Id, INT)) AS pmid, Publication.Id FROM Phenotype LEFT JOIN PublishXRef ON Phenotype.Id = PublishXRef.PhenotypeId LEFT JOIN Publication ON Publication.Id = PublishXRef.PublicationId LEFT JOIN PublishFreeze ON PublishFreeze.InbredSetId = PublishXRef.InbredSetId LEFT JOIN InfoFiles ON InfoFiles.InfoPageName = PublishFreeze.Name +``` + +The above query results to triples that have the form: + +```text +gn:Abbrev -> rdf:type -> gn:phenotype +gn:Abbrev -> gn-term:name -> PhenotypeName +gn:Abbrev -> gn-term:publicationDescription -> postPubDescr +gn:Abbrev -> gn-term:originalDescription -> PhenotypeOriginal_description +gn:Abbrev -> gn-term:prePublicationDescription -> prePubDesc +gn:Abbrev -> gn-term:prePublicationAbbreviation -> PhenotypePre_publication_abbreviation +gn:Abbrev -> gn-term:postPublicationAbbreviation -> PhenotypePost_publication_abbreviation +gn:Abbrev -> gn-term:labCode -> Phenotype(Lab_code) +gn:Abbrev -> gn-term:submitter -> PhenotypeSubmitter +gn:Abbrev -> gn-term:owner -> PhenotypeOwner +gn:Abbrev -> gn-term:mean -> "mean"^^xsd:double +gn:Abbrev -> gn-term:locus -> PublishXRef(Locus) +gn:Abbrev -> gn-term:LRS -> "lrs"^^xsd:float +gn:Abbrev -> gn-term:additive -> "additive"^^xsd:decimal +gn:Abbrev -> gn-term:sequence -> "PublishXRef(Sequence)"^^xsd:int +gn:Abbrev -> gn-term:phenotypeOfDataset -> gn:Datasetname +gn:Abbrev -> gn-term:phenotypeOfPublication -> pubmed:pmid +``` +Here's an example query: + +```sparql +@prefix gn: . +@prefix gn-term: . +@prefix rdf: . +@prefix rdfs: . +@prefix xsd: . +@prefix pubmed: . + +SELECT ?s ?p ?o WHERE { + ?s rdf:type gn:phenotype . + ?s gn-term:name "CBLWT2" . + ?s gn-term:publicationDescription "Central nervous system, morphology: Cerebellum weight, whole, bilateral in adults of both sexes [mg]" . + ?s gn-term:originalDescription "Cerebellum weight [mg]" . + ?s ?p ?o . +} +``` + +Expected Result: + +```rdf +gn:Bxdpublish_cblwt2 rdf:type gn:phenotype . +gn:Bxdpublish_cblwt2 gn-term:name "CBLWT2" . +gn:Bxdpublish_cblwt2 gn-term:publicationDescription "Central nervous system, morphology: Cerebellum weight, whole, bilateral in adults of both sexes [mg]" . +gn:Bxdpublish_cblwt2 gn-term:originalDescription "Cerebellum weight [mg]" . +gn:Bxdpublish_cblwt2 gn-term:prePublicationDescription "Central nervous system, morphology: Cerebellum weight, whole, bilateral in adults of both sexes [mg]" . +gn:Bxdpublish_cblwt2 gn-term:postPublicationAbbreviation "CBLWT2" . +gn:Bxdpublish_cblwt2 gn-term:submitter "robwilliams" . +gn:Bxdpublish_cblwt2 gn-term:mean "52.13529418496525"^^xsd:double . +gn:Bxdpublish_cblwt2 gn-term:locus "rs48756159" . +gn:Bxdpublish_cblwt2 gn-term:LRS "13.4974911471087"^^xsd:float . +gn:Bxdpublish_cblwt2 gn-term:additive "2.39444435069444"^^xsd:decimal . +gn:Bxdpublish_cblwt2 gn-term:sequence "1"^^xsd:int . +gn:Bxdpublish_cblwt2 gn-term:phenotypeOfDataset gn:Bxdpublish . +gn:Bxdpublish_cblwt2 gn-term:phenotypeOfPublication pubmed:11438585 . +``` + diff --git a/rdf-documentation/dump-probeset-metadata.md b/rdf-documentation/dump-probeset-metadata.md new file mode 100644 index 0000000..b47ce20 --- /dev/null +++ b/rdf-documentation/dump-probeset-metadata.md @@ -0,0 +1,58 @@ +# Probeset Metadata +## 'dump-probeset-metadata' + + +## Generated Triples: + +The following SQL query was executed: + +```sql +SELECT CONCAT(ProbeSetFreeze.Name,':',IFNULL(ProbeSet.Name, ProbeSet.Id)) AS ProbeSetName, IFNULL(ProbeSet.Name, ProbeSet.Id) AS name, ProbeSetFreeze.Name, IFNULL(ProbeSetXRef.mean, '') AS mean, IFNULL(ProbeSetXRef.se, '') AS se, ProbeSetXRef.Locus, IFNULL(ProbeSetXRef.LRS, '') AS LRS, IFNULL(ProbeSetXRef.pValue, '') AS pValue, IFNULL(ProbeSetXRef.additive, '') AS additive, IFNULL(ProbeSetXRef.h2, '') AS h2 FROM ProbeSetXRef LEFT JOIN ProbeSet ON ProbeSetXRef.ProbeSetId = ProbeSet.Id LEFT JOIN ProbeSetFreeze ON ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id WHERE ProbeSetFreeze.public > 0 AND ProbeSetFreeze.confidentiality < 1 +``` + +The above query results to triples that have the form: + +```text +gn:probesetData_probesetname -> rdf:type -> gn:probesetData +gn:probesetData_probesetname -> gn-term:hasProbeset -> probeset:name +gn:probesetData_probesetname -> gn-term:probesetOfDataset -> probeset:ProbeSetFreeze_Name_ +gn:probesetData_probesetname -> gn-term:mean -> "mean"^^xsd:double +gn:probesetData_probesetname -> gn-term:se -> "se"^^xsd:double +gn:probesetData_probesetname -> gn-term:locus -> ProbeSetXRef(Locus) +gn:probesetData_probesetname -> gn:LRS -> "LRS"^^xsd:double +gn:probesetData_probesetname -> gn-term:pValue -> "pValue"^^xsd:double +gn:probesetData_probesetname -> gn-term:additive -> "additive"^^xsd:double +gn:probesetData_probesetname -> gn-term:h2 -> "h2"^^xsd:float +``` +Here's an example query: + +```sparql +@prefix gn: . +@prefix gn-term: . +@prefix rdf: . +@prefix rdfs: . +@prefix xsd: . + +SELECT ?s ?p ?o WHERE { + ?s rdf:type gn:probesetData . + ?s gn-term:hasProbeset probeset:100001_at . + ?s gn-term:probesetOfDataset probeset:HC_U_0304_R . + ?s gn-term:mean #{"8.14033666666667"^^xsd:double}# . + ?s ?p ?o . +} +``` + +Expected Result: + +```rdf +gn:probesetData_hc_u_0304_r:100001_at rdf:type gn:probesetData . +gn:probesetData_hc_u_0304_r:100001_at gn-term:hasProbeset probeset:100001_at . +gn:probesetData_hc_u_0304_r:100001_at gn-term:probesetOfDataset probeset:HC_U_0304_R . +gn:probesetData_hc_u_0304_r:100001_at gn-term:mean "8.14033666666667"^^xsd:double . +gn:probesetData_hc_u_0304_r:100001_at gn-term:se "0.023595817125580502"^^xsd:double . +gn:probesetData_hc_u_0304_r:100001_at gn-term:locus "rsm10000021399" . +gn:probesetData_hc_u_0304_r:100001_at gn:LRS "12.2805314427567"^^xsd:double . +gn:probesetData_hc_u_0304_r:100001_at gn-term:pValue "0.118"^^xsd:double . +gn:probesetData_hc_u_0304_r:100001_at gn-term:additive "0.0803547619047631"^^xsd:double . +``` + diff --git a/rdf-documentation/dump-publication.md b/rdf-documentation/dump-publication.md index 248be48..a40b597 100644 --- a/rdf-documentation/dump-publication.md +++ b/rdf-documentation/dump-publication.md @@ -1,19 +1,7 @@ # Publications Metadata ## 'dump-publication' -## Schema Triples: -```text -gn:pubMedId -> rdfs:range -> rdfs:Literal -gn:title -> rdfs:range -> rdfs:Literal -gn:journal -> rdfs:range -> rdfs:Literal -gn:volume -> rdfs:range -> rdfs:Literal -gn:pages -> rdfs:range -> rdfs:Literal -gn:month -> rdfs:range -> rdfs:Literal -gn:year -> rdfs:range -> rdfs:Literal -gn:author -> rdfs:range -> rdfs:Literal -gn:abstract -> rdfs:range -> rdfs:Literal -``` ## Generated Triples: The following SQL query was executed: @@ -25,25 +13,25 @@ SELECT IF(Publication.PubMed_ID IS NULL, '', CONVERT(Publication.PubMed_Id, INT) The above query results to triples that have the form: ```text -publication:pmid -> rdf:type -> gn:publication -publication:pmid -> gn:pubMedId -> pubmed:pubmedId -publication:pmid -> gn:title -> Publication(Title) -publication:pmid -> gn:journal -> Publication(Journal) -publication:pmid -> gn:volume -> Publication(Volume) -publication:pmid -> gn:pages -> Publication(Pages) -publication:pmid -> gn:month -> Publication(Month) -publication:pmid -> gn:year -> Publication(Year) -publication:pmid -> gn:abstract -> Abstract -publication:pmid -> gn:author -> PublicationAuthors +pubmed:pmid -> rdf:type -> gn:publication +pubmed:pmid -> gn-term:pubMedId -> pubmed:pubmedId +pubmed:pmid -> gn-term:title -> Publication(Title) +pubmed:pmid -> gn-term:journal -> Publication(Journal) +pubmed:pmid -> gn-term:volume -> Publication(Volume) +pubmed:pmid -> gn-term:pages -> Publication(Pages) +pubmed:pmid -> gn-term:month -> Publication(Month) +pubmed:pmid -> gn-term:year -> Publication(Year) +pubmed:pmid -> gn:abstract -> Abstract +pubmed:pmid -> gn:author -> PublicationAuthors ``` Here's an example query: ```sparql -PREFIX rdf: -PREFIX rdfs: -PREFIX gn: -PREFIX publication: -PREFIX pubmed: +@prefix gn-term: . +@prefix gn: . +@prefix pubmed: . +@prefix rdfs: . +@prefix rdf: . SELECT ?s ?p ?o WHERE { ?s rdf:type gn:publication . @@ -55,6 +43,6 @@ Expected Result: ```rdf gn:unpublished_1 rdf:type gn:publication . -gn:unpublished_1 gn:year "0" . +gn:unpublished_1 gn-term:year "0" . ``` diff --git a/rdf-documentation/dump-species-metadata.md b/rdf-documentation/dump-species-metadata.md index ca09458..f64232f 100644 --- a/rdf-documentation/dump-species-metadata.md +++ b/rdf-documentation/dump-species-metadata.md @@ -1,14 +1,7 @@ # Species Metadata ## 'dump-species' -## Schema Triples: -```text -gn-term:name -> rdfs:range -> rdfs:Literal -gn-term:displayName -> rdfs:range -> rdfs:Literal -gn-term:binomialName -> rdfs:range -> rdfs:Literal -gn-term:family -> rdfs:range -> rdfs:Literal -``` ## Generated Triples: The following SQL query was executed: @@ -20,24 +13,24 @@ SELECT Species.FullName, Species.SpeciesName, Species.MenuName, Species.FullName The above query results to triples that have the form: ```text -gn-id:Species_fullname -> rdf:type -> gn-id:species -gn-id:Species_fullname -> gn-term:name -> Species(SpeciesName) -gn-id:Species_fullname -> gn-term:displayName -> Species(MenuName) -gn-id:Species_fullname -> gn-term:binomialName -> Species(FullName) -gn-id:Species_fullname -> gn-term:family -> Species(Family) -gn-id:Species_fullname -> gn-term:organism -> taxon:Species(TaxonomyId) +gn:Species_fullname -> rdf:type -> gn:species +gn:Species_fullname -> gn-term:name -> Species(SpeciesName) +gn:Species_fullname -> gn-term:displayName -> Species(MenuName) +gn:Species_fullname -> gn-term:binomialName -> Species(FullName) +gn:Species_fullname -> gn-term:family -> Species(Family) +gn:Species_fullname -> gn-term:organism -> taxon:Species(TaxonomyId) ``` Here's an example query: ```sparql -@prefix gn-id: . -@prefix gn-term: . -@prefix rdf: . +@prefix gn: . +@prefix gn-term: . +@prefix rdf: . @prefix rdfs: . @prefix taxon: . SELECT ?s ?p ?o WHERE { - ?s rdf:type gn-id:species . + ?s rdf:type gn:species . ?s gn-term:name "Mouse" . ?s gn-term:displayName "Mouse (Mus musculus, mm10)" . ?s ?p ?o . @@ -47,26 +40,18 @@ SELECT ?s ?p ?o WHERE { Expected Result: ```rdf -gn-id:Mus_musculus rdf:type gn-id:species . -gn-id:Mus_musculus gn-term:name "Mouse" . -gn-id:Mus_musculus gn-term:displayName "Mouse (Mus musculus, mm10)" . -gn-id:Mus_musculus gn-term:binomialName "Mus musculus" . -gn-id:Mus_musculus gn-term:family "Vertebrates" . -gn-id:Mus_musculus gn-term:organism taxon:10090 . +gn:Mus_musculus rdf:type gn:species . +gn:Mus_musculus gn-term:name "Mouse" . +gn:Mus_musculus gn-term:displayName "Mouse (Mus musculus, mm10)" . +gn:Mus_musculus gn-term:binomialName "Mus musculus" . +gn:Mus_musculus gn-term:family "Vertebrates" . +gn:Mus_musculus gn-term:organism taxon:10090 . ``` ## 'dump-strain' -## Schema Triples: -```text -gn-term:strainOfSpecies -> rdfs:domain -> gn-term:strain -gn-term:strainOfSpecies -> rdfs:range -> gn-term:species -gn-term:name -> rdfs:range -> rdfs:Literal -gn-term:alias -> rdfs:range -> rdfs:Literal -gn-term:symbol -> rdfs:range -> rdfs:Literal -``` ## Generated Triples: The following SQL query was executed: @@ -78,25 +63,25 @@ SELECT CAST(CONVERT(BINARY CONVERT(Strain.Name USING latin1) USING utf8) AS VARC The above query results to triples that have the form: ```text -gn-id:Strainname -> rdf:type -> gn-id:strain -gn-id:Strainname -> gn-term:strainOfSpecies -> gn-id:Species_fullname -gn-id:Strainname -> gn-term:name -> StrainName -gn-id:Strainname -> gn-term:name2 -> StrainName2 -gn-id:Strainname -> gn-term:alias -> StrainAlias -gn-id:Strainname -> gn-term:symbol -> Strain(Symbol) +gn:Strainname -> rdf:type -> gn:strain +gn:Strainname -> gn-term:strainOfSpecies -> gn:Species_fullname +gn:Strainname -> gn-term:name -> StrainName +gn:Strainname -> gn-term:name2 -> StrainName2 +gn:Strainname -> gn-term:alias -> StrainAlias +gn:Strainname -> gn-term:symbol -> Strain(Symbol) ``` Here's an example query: ```sparql -@prefix gn-id: . -@prefix gn-term: . -@prefix rdf: . +@prefix gn: . +@prefix gn-term: . +@prefix rdf: . @prefix rdfs: . @prefix taxon: . SELECT ?s ?p ?o WHERE { - ?s rdf:type gn-id:strain . - ?s gn-term:strainOfSpecies gn-id:Mus_musculus . + ?s rdf:type gn:strain . + ?s gn-term:strainOfSpecies gn:Mus_musculus . ?s gn-term:name "B6D2F1" . ?s ?p ?o . } @@ -105,19 +90,16 @@ SELECT ?s ?p ?o WHERE { Expected Result: ```rdf -gn-id:B6d2f1 rdf:type gn-id:strain . -gn-id:B6d2f1 gn-term:strainOfSpecies gn-id:Mus_musculus . -gn-id:B6d2f1 gn-term:name "B6D2F1" . -gn-id:B6d2f1 gn-term:name2 "B6D2F1" . +gn:B6d2f1 rdf:type gn:strain . +gn:B6d2f1 gn-term:strainOfSpecies gn:Mus_musculus . +gn:B6d2f1 gn-term:name "B6D2F1" . +gn:B6d2f1 gn-term:name2 "B6D2F1" . ``` ## 'dump-mapping-method' -## Schema Triples: -```text -``` ## Generated Triples: The following SQL query was executed: @@ -129,19 +111,19 @@ SELECT MappingMethod.Name FROM MappingMethod The above query results to triples that have the form: ```text -gn-id:mappingMethod_mappingmethod_name -> rdf:type -> gn-id:mappingMethod +gn:mappingMethod_mappingmethod_name -> rdf:type -> gn:mappingMethod ``` Here's an example query: ```sparql -@prefix gn-id: . -@prefix gn-term: . -@prefix rdf: . +@prefix gn: . +@prefix gn-term: . +@prefix rdf: . @prefix rdfs: . @prefix taxon: . SELECT ?s ?p ?o WHERE { - ?s rdf:type gn-id:mappingMethod . + ?s rdf:type gn:mappingMethod . ?s ?p ?o . } ``` @@ -149,17 +131,13 @@ SELECT ?s ?p ?o WHERE { Expected Result: ```rdf -gn-id:mappingMethod_qtlreaper rdf:type gn-id:mappingMethod . +gn:mappingMethod_qtlreaper rdf:type gn:mappingMethod . ``` ## 'dump-avg-method' -## Schema Triples: -```text -gn-term:normalization -> rdfs:range -> rdfs:Literal -``` ## Generated Triples: The following SQL query was executed: @@ -171,20 +149,20 @@ SELECT AvgMethod.Name, AvgMethod.Normalization FROM AvgMethod The above query results to triples that have the form: ```text -gn-id:avgmethod_avgmethod_name -> rdf:type -> gn-id:avgMethod -gn-id:avgmethod_avgmethod_name -> gn-term:normalization -> AvgMethod(Normalization) +gn:avgmethod_avgmethod_name -> rdf:type -> gn:avgMethod +gn:avgmethod_avgmethod_name -> gn-term:normalization -> AvgMethod(Normalization) ``` Here's an example query: ```sparql -@prefix gn-id: . -@prefix gn-term: . -@prefix rdf: . +@prefix gn: . +@prefix gn-term: . +@prefix rdf: . @prefix rdfs: . @prefix taxon: . SELECT ?s ?p ?o WHERE { - ?s rdf:type gn-id:avgMethod . + ?s rdf:type gn:avgMethod . ?s gn-term:normalization "MAS5" . ?s ?p ?o . } @@ -193,7 +171,7 @@ SELECT ?s ?p ?o WHERE { Expected Result: ```rdf -gn-id:avgmethod_mas5 rdf:type gn-id:avgMethod . -gn-id:avgmethod_mas5 gn-term:normalization "MAS5" . +gn:avgmethod_mas5 rdf:type gn:avgMethod . +gn:avgmethod_mas5 gn-term:normalization "MAS5" . ``` diff --git a/rdf-documentation/dump-tissue.md b/rdf-documentation/dump-tissue.md index dd64f83..12271e8 100644 --- a/rdf-documentation/dump-tissue.md +++ b/rdf-documentation/dump-tissue.md @@ -1,11 +1,7 @@ # Tissue Metadata ## 'dump-tissue' -## Schema Triples: -```text -gn-term:name -> rdfs:range -> rdfs:Literal -``` ## Generated Triples: The following SQL query was executed: @@ -17,19 +13,19 @@ SELECT Tissue.Short_Name, Tissue.Name FROM Tissue The above query results to triples that have the form: ```text -gn-id:tissue_tissue_short_name -> rdf:type -> gn-id:tissue -gn-id:tissue_tissue_short_name -> gn-term:name -> Tissue(Name) +gn:tissue_tissue_short_name -> rdf:type -> gn:tissue +gn:tissue_tissue_short_name -> gn-term:name -> Tissue(Name) ``` Here's an example query: ```sparql -@prefix gn-id: . +@prefix gn: . @prefix gn-term: . @prefix rdf: . @prefix rdfs: . SELECT ?s ?p ?o WHERE { - ?s rdf:type gn-id:tissue . + ?s rdf:type gn:tissue . ?s gn-term:name "Brain mRNA" . ?s ?p ?o . } @@ -38,7 +34,7 @@ SELECT ?s ?p ?o WHERE { Expected Result: ```rdf -gn-id:tissue_brn rdf:type gn-id:tissue . -gn-id:tissue_brn gn-term:name "Brain mRNA" . +gn:tissue_brn rdf:type gn:tissue . +gn:tissue_brn gn-term:name "Brain mRNA" . ``` -- cgit v1.2.3