From edca04399ae950698a89a64160cd35f6164f4b1c Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Thu, 24 Aug 2023 13:08:59 +0300 Subject: Update autogenerated docs Signed-off-by: Munyoki Kilyungi --- rdf-documentation/dataset-metadata.md | 393 +++++++++++++++++++++++ rdf-documentation/dump-gene-chip.md | 118 ------- rdf-documentation/dump-genotype.md | 117 ------- rdf-documentation/dump-info-pages.md | 155 --------- rdf-documentation/dump-phenotype.md | 121 ------- rdf-documentation/dump-probeset-metadata.md | 58 ---- rdf-documentation/dump-probeset-summary-stats.md | 60 ---- rdf-documentation/dump-probeset.md | 95 ------ rdf-documentation/dump-publication.md | 51 --- rdf-documentation/dump-species-metadata.md | 373 --------------------- rdf-documentation/dump-tissue.md | 41 --- rdf-documentation/generif-metadata.md | 170 ++++++++++ rdf-documentation/genotype-metadata.md | 64 ++++ rdf-documentation/phenotype-metadata.md | 71 ++++ rdf-documentation/probeset-metadata.md | 95 ++++++ rdf-documentation/publication-metadata.md | 51 +++ rdf-documentation/species-metadata.md | 375 +++++++++++++++++++++ rdf-documentation/tissue-metadata.md | 41 +++ 18 files changed, 1260 insertions(+), 1189 deletions(-) create mode 100644 rdf-documentation/dataset-metadata.md delete mode 100644 rdf-documentation/dump-gene-chip.md delete mode 100644 rdf-documentation/dump-genotype.md delete mode 100644 rdf-documentation/dump-info-pages.md delete mode 100644 rdf-documentation/dump-phenotype.md delete mode 100644 rdf-documentation/dump-probeset-metadata.md delete mode 100644 rdf-documentation/dump-probeset-summary-stats.md delete mode 100644 rdf-documentation/dump-probeset.md delete mode 100644 rdf-documentation/dump-publication.md delete mode 100644 rdf-documentation/dump-species-metadata.md delete mode 100644 rdf-documentation/dump-tissue.md create mode 100644 rdf-documentation/generif-metadata.md create mode 100644 rdf-documentation/genotype-metadata.md create mode 100644 rdf-documentation/phenotype-metadata.md create mode 100644 rdf-documentation/probeset-metadata.md create mode 100644 rdf-documentation/publication-metadata.md create mode 100644 rdf-documentation/species-metadata.md create mode 100644 rdf-documentation/tissue-metadata.md diff --git a/rdf-documentation/dataset-metadata.md b/rdf-documentation/dataset-metadata.md new file mode 100644 index 0000000..20f3e29 --- /dev/null +++ b/rdf-documentation/dataset-metadata.md @@ -0,0 +1,393 @@ +# Info files / Investigators Metadata +## 'info-files' + +## Generated Triples: + +The following SQL query was executed: + +```sql +SELECT InfoFiles.InfoPageName, IF(GenoFreeze.Id IS NOT NULL, 'gnc:genotypeDataset', IF(PublishFreeze.Id IS NOT NULL, 'gnc:phenotypeDataset', IF(ProbeSetFreeze.Name IS NOT NULL, 'gnc:probesetDataset', 'gnc:dataset'))) AS rdfType, InfoFiles.InfoPageName, IFNULL(GenoFreeze.FullName, IFNULL(PublishFreeze.FullName, '')) AS DatasetFullName, Datasets.DatasetName AS DatasetGroup, InfoFiles.InfoFileTitle, Datasets.PublicationTitle, IFNULL(GenoFreeze.CreateTime, IFNULL(PublishFreeze.CreateTime, IFNULL(ProbeSetFreeze.CreateTime, ''))) AS createTimeGenoFreeze, Investigators.FirstName, Investigators.LastName, Investigators.Email, Organizations.OrganizationName, InfoFiles.GN_AccesionId, DatasetStatus.DatasetStatusName, InbredSet.Name, Tissue.Short_Name, AvgMethod.Name AS AvgMethodName, AvgMethod.Name AS AvgMethodName, GeneChip.Name AS GeneChip, Datasets.Summary, IFNULL(Datasets.GeoSeries, '') AS GeoSeries, Datasets.AboutTissue, InfoFiles.Specifics, Datasets.AboutCases, Datasets.AboutPlatform, Datasets.AboutDataProcessing, Datasets.Notes, Datasets.ExperimentDesign, Datasets.Contributors, Datasets.Citation, InfoFiles.Data_Source_Acknowledge, Datasets.Acknowledgment FROM InfoFiles LEFT JOIN PublishFreeze ON InfoFiles.InfoPageName = PublishFreeze.Name LEFT JOIN GenoFreeze ON InfoFiles.InfoPageName = GenoFreeze.Name LEFT JOIN ProbeSetFreeze ON InfoFiles.InfoPageName = ProbeSetFreeze.Name LEFT JOIN InbredSet ON InfoFiles.InbredSetId = InbredSet.InbredSetId LEFT JOIN Species ON InfoFiles.SpeciesId = Species.SpeciesId LEFT JOIN Datasets USING (DatasetId) LEFT JOIN DatasetStatus USING (DatasetStatusId) LEFT JOIN Tissue USING (TissueId) LEFT JOIN Investigators USING (InvestigatorId) LEFT JOIN AvgMethod USING (AvgMethodId) LEFT JOIN Organizations USING (OrganizationId) LEFT JOIN GeneChip USING (GeneChipId) WHERE GN_AccesionId IS NOT NULL +``` + +The above query results to triples that have the form: + +```text +gn:Infofiles_infopagename_ -> rdf:type -> rdfType +gn:Infofiles_infopagename_ -> rdfs:label -> InfoFiles(InfoPageName) +gn:Infofiles_infopagename_ -> skos:prefLabel -> DatasetFullName +gn:Infofiles_infopagename_ -> skos:prefLabel -> Datasets(DatasetGroup) +gn:Infofiles_infopagename_ -> gdmt:hasTitleInfo -> InfoFiles(InfoFileTitle) +gn:Infofiles_infopagename_ -> dct:title -> Datasets(PublicationTitle) +gn:Infofiles_infopagename_ -> dct:created -> createTimeGenoFreeze +gn:Infofiles_infopagename_ -> gdmt:hasCreatorInfo -> gn:investigator_investigators_firstname_investigators_lastname_investigators_email +gn:Infofiles_infopagename_ -> gdmt:hasCreatorAffiliation -> Organizations(OrganizationName) +gn:Infofiles_infopagename_ -> gdmt:hasDatasetIdentifierSubType -> GNInfoFiles(GN_AccesionId) +gn:Infofiles_infopagename_ -> gdmt:hasRightsInfo -> datasetstatus(datasetstatusname) +gn:Infofiles_infopagename_ -> gnt:belongsToSet -> gn:setInbredset_name +gn:Infofiles_infopagename_ -> gnt:hasTissue -> gn:tissue_tissue_short_name +gn:Infofiles_infopagename_ -> gnt:usesNormalization -> gn:avgmethod_avgmethod_avgmethodname +gn:Infofiles_infopagename_ -> gnt:usesPlatform -> gn:platform_genechip_genechip +gn:Infofiles_infopagename_ -> gdmt:isDescribedBy -> DatasetsSummary +gn:Infofiles_infopagename_ -> gnt:hasGeoSeriesId -> +gn:Infofiles_infopagename_ -> gnt:hasTissueInfo -> DatasetsAboutTissue +gn:Infofiles_infopagename_ -> gnt:hasContentInfo -> InfoFilesSpecifics +gn:Infofiles_infopagename_ -> gnt:hasCaseInfo -> DatasetsAboutCases +gn:Infofiles_infopagename_ -> gnt:hasPlatformInfo -> DatasetsAboutPlatform +gn:Infofiles_infopagename_ -> gnt:hasDataProcessingInfo -> DatasetsAboutDataProcessing +gn:Infofiles_infopagename_ -> gnt:hasNotes -> DatasetsNotes +gn:Infofiles_infopagename_ -> gnt:hasExperimentDesignInfo -> DatasetsExperimentDesign +gn:Infofiles_infopagename_ -> gdmt:hasContributorInfo -> DatasetsContributors +gn:Infofiles_infopagename_ -> gdmt:IsCitedBy -> DatasetsCitation +gn:Infofiles_infopagename_ -> gnt:hasAcknowledgement -> InfoFilesData_Source_Acknowledge +gn:Infofiles_infopagename_ -> gnt:hasAcknowledgement -> DatasetsAcknowledgment +``` +Here's an example query: + +```sparql +PREFIX v: +PREFIX foaf: +PREFIX gdmt: +PREFIX skos: +PREFIX geoSeries: +PREFIX gnt: +PREFIX gn: +PREFIX gnc: +PREFIX rdf: +PREFIX owl: +PREFIX rdfs: +PREFIX taxon: +PREFIX dct: + +SELECT * WHERE { + ?s rdf:type gnc:probesetDataset . + ?s rdfs:label "Br_U_0803_M" . + ?s skos:prefLabel "UTHSC Brain mRNA U74Av2 (Aug-Sep03)" . + ?s ?p ?o . +} +``` + +Expected Result: + +```rdf +gn:Br_u_0803_m rdf:type gnc:probesetDataset . +gn:Br_u_0803_m rdfs:label "Br_U_0803_M" . +gn:Br_u_0803_m skos:prefLabel "UTHSC Brain mRNA U74Av2 (Aug-Sep03)" . +gn:Br_u_0803_m gdmt:hasTitleInfo "UTHSC Brain mRNA U74Av2 (Aug03) MAS5" . +gn:Br_u_0803_m dct:created "2003-08-01" . +gn:Br_u_0803_m gdmt:hasCreatorInfo gn:investigator_robert_williams_rwilliams_uthsc.edu . +gn:Br_u_0803_m gdmt:hasCreatorAffiliation "University of Tennessee Health Science Center" . +gn:Br_u_0803_m gdmt:hasDatasetIdentifierSubType "GN1" . +gn:Br_u_0803_m gdmt:hasRightsInfo "public" . +gn:Br_u_0803_m gnt:belongsToSet gn:setBxd . +gn:Br_u_0803_m gnt:hasTissue gn:tissue_brn . +gn:Br_u_0803_m gnt:usesNormalization gn:avgmethod_mas5 . +gn:Br_u_0803_m gnt:usesPlatform gn:platform_mg_u74av2 . +gn:Br_u_0803_m gdmt:isDescribedBy "

This August 2003 freeze provides estimates of mRNA expression in brains of BXD recombinant inbred mice measured using Affymetrix U74Av2 microarrays. This is data set includes six arrays which are of marginal quality. New users are encouraged to use one of the more recent data sets December 2003 or March 2004 from which these six arrays have been excluded. Data were generated at the University of Tennessee Health Science Center UTHSC. Over 300 brain samples from 35 strains were hybridized in small pools n=3 to 106 arrays. Data were processed using the Microarray Suite 5 MAS 5 protocol of Affymetrix. To simplify comparison between transforms, MAS 5 values of each array were adjusted to an average of 8 units and a variance of 2 units. In general, the MAS 5 transform does not perform as well as RMA, PDNN, or the new heritability weighted transforms HW1PM.

" . +gn:Br_u_0803_m gnt:hasTissueInfo "

Each array was hybridized with labeled cRNA generated from a pool of three brains from adult animals usually of the same age and always of the same sex. The brain region included most of the forebrain and midbrain, bilaterally. However, the sample excluded the olfactory bulbs, retinas, or the posterior pituitary all formally part of the forebrain. A total of 100 such pooled samples were arrayed: 74 from females and 26 from males. Animals ranged in age from 56 to 441 days, usually with a balanced design: one pool at approximately 8 weeks, one pool at approximately 20 weeks, and one pool at approximately 1 year. Strain averages of mRNA expression level are therefore typically based on three pooled biological replicate arrays. This data set does not incorporate statistical adjustment for possible effects of age and sex. Users can select the strain symbol in the table above to review details about the specific cases and array processing center DP = Divyen Patel at Genome Explorations, Inc; TS = Thomas Sutter at University of Memphis. You can also click on the individual symbols males or females to view the array image.

" . +gn:Br_u_0803_m gnt:hasCaseInfo "

This data set includes estimate of gene expression for 35 genetically uniform lines of mice: C57BL/6J B6, or simply B, DBA/2J D2 or D, their B6D2 F1 intercross, and 32 BXD recombinant inbred RI strains derived by crossing female B6 mice with male D2 mice and then inbreeding progeny for over 21 generations. This set of RI strains is a remarkable resource because many of these strains have been extensively phenotyped for hundreds of interesting traits over a 25-year period. A significant advantage of this RI set is that the two parental strains B6 and D2 have both been extensively sequenced and are known to differ at approximately 1.8 million SNPs. Coding variants mostly single nucleotide polymorphisms and insertion-deletions that may produce interesting phenotypes can be rapidly identified in this particular RI set.

\r\n\r\n

BXD1 through BXD32 were produced by Benjamin A. Taylor starting in the late 1970s. BXD33 through BXD42 were also produced by Taylor, but from a second set of crosses initiated in the early 1990s. These strains are all available from the Jackson Laboratory, Bar Harbor, Maine. BXD43 through BXD99 were produced by Lu Lu, Jeremy Peirce, Lee M. Silver, and Robert W. Williams in the late 1990s and early 2000s using advanced intercross progeny Peirce et al. 2004. Only two of these incipient strains are included in the current database BXD67 and BXD68.

\r\n\r\n

In this mRNA expression database we generally used progeny of stock obtained from The Jackson Laboratory between 1999 and 2001. Animals were generated in-house at the University of Alabama by John Mountz and Hui-Chen Hsu and at the University of Tennessee Health Science Center by Lu Lu and Robert Williams.

\r\n\r\n

The table below lists the arrays by strain, sex, and age. Each array was hybridized to a pool of mRNA from three mice. Note that this table includes six arrays dropped from the December 2003 data sets BXD6, n=2; BXD12, BXD16, BXD40, and BXD67, n=1 each.

\r\n\r\n\r\n \r\n \r\n \r\n \r\n \r\n
\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n
Strain\r\n

Age

\r\n
Strain\r\n

Age

\r\n
\r\n

8 Wks

\r\n
\r\n

20 Wks

\r\n
\r\n

52 Wks

\r\n
\r\n

8 Wks

\r\n
\r\n

20 Wks

\r\n
\r\n

52 Wks

\r\n
C57BL/6J B6♂♂♂♀♀DBA/2J D2♀♀♂♂ 
B6D2F1 F1♀ ♀♀ BXD1♀♀ â™€
BXD2♂♀♀BXD5♂♂♀  
BXD6♀♀♀BXD8♀♂♀ 
BXD9♂♀♀BXD11♀♀ â™€
BXD12 â™‚♀♀BXD13♀   
BXD14 â™€â™€â™€BXD15♀ â™€
BXD16♀♀♀ BXD18♀♂♀
BXD19♀♀♀BXD21♀♀♂♂ 
BXD22♀♀♀ BXD23♀  
BXD24♀♀ â™€BXD25♀♀ ♀♀  
BXD27  â™€â™€BXD28♀♀♀
BXD29♂ â™€BXD31♀♀♀♀ 
BXD32♀♂♀♀BXD33♂♀♀ 
BXD34♂♀♀ BXD38♂♀♀  
BXD39♂♀ ♂ BXD40♂♂♀♀  
BXD42♂♂ ♀  BXD67 F8♀ ♀♂  
BXD68 F9♀ ♀♂     
\r\n
" . +gn:Br_u_0803_m gnt:hasPlatformInfo "

Affymetrix U74Av2 GeneChip: The expression data were generated using 100 U74Av2 arrays. The chromosomal locations of U74Av2 probe sets were determined by BLAT analysis of concatenated probe sequences using the Mouse Genome Sequencing Consortium May 2004 mm5 assembly. This BLAT analysis is performed periodically by Yanhua Qu as each new build of the mouse genome is released see http://genome.ucsc.edu/cgi-bin/hgBlat?command=start&org=mouse. We thank Yan Cui UTHSC for allowing us to use his Linux cluster to perform this analysis. It is possiible to confirm the BLAT alignment results yourself simply by clicking on the Verify link in the Trait Data and Editing Form right side of the Location line.

" . +gn:Br_u_0803_m gnt:hasDataProcessingInfo "
Probe cell level data from the CEL file: Probe signal intensity estimates in the Affymetrix CEL files are the 75% quantile value taken from a set of 36 6x6 pixels per probe cell in the DAT image file.\r\n
    \r\n
  • Step 1: We added an offset of 1.0 to the CEL expression values for each cell to ensure that all values could be logged without generating negative values.
  • \r\n
  • Step 2: We took the log2 of each cell signal intensity.
  • \r\n
  • Step 3: We computed the Z score for each of these log2 cell signal intensity values within a single array.
  • \r\n
  • Step 4: We multiplied all Z scores by 2.
  • \r\n
  • Step 5: We added a constant of 8 units to the value of the Z score. The consequence of this simple set of transformations is to produce a set of Z scores that have a mean of 8 units, a variance of 4 units, and a standard deviation of 2 units. The advantage of this modified Z score is that a 2-fold difference in expression level corresponds roughly to 1 unit.
  • \r\n
  • Step 6: We computed the arithmetic mean of the values for the set of microarrays for each strain. We have not corrected for variance introduced by sex, age, source of animals, or any possible interaction. We have not corrected for background beyond that implemented by Affymetrix in generating the CEL file.
  • \r\n
\r\nProbe set data from the CHP file: Probe set estimates of expression were initially generated using the standard Affymetrix MAS 5 algorithm. The CHP values were then processed following precisely the same six steps listed above to normalize expression and stabilize the variance of all 106 arrays. The mean expression within each array is therefore 8 units with a standard deviation of 2 units. A 1-unit difference represents roughly a 2-fold difference in expression level. Expression levels below 5 are close to the background noise level. While a value of 8 unit is nominally the average expression, this average includes all those transcripts with negligible expression in the brain that would often be eliminated from subsequent analysis so-called "absent" and "marginal" calls in the CHP file.
\r\n\r\n

About the array probe set names:

\r\n\r\n
\r\n

Most probe sets on the U74Av2 array consist of a total of 32 probes, divided into 16 perfect match probes and 16 mismatch controls. Each set of these 25-nucleotide-long probes has an identifier code that includes a unique number, an underscore character, and several suffix characters that highlight design features. The most common probe set suffix is at. This code indicates that the probes should hybridize relatively selectively with the complementary anti-sense target i.e., the complemenary RNA produced from a single gene. Other codes include:

\r\n\r\n
    \r\n
  • f_at sequence family: Some probes in this probe set will hybridize to identical and/or slightly different sequences of related gene transcripts.
  • \r\n
  • s_at similarity constraint: All Probes in this probe set target common sequences found in transcripts from several genes.
  • \r\n
  • g_at common groups: Some probes in this set target identical sequences in multiple genes and some target unique sequences in the intended target gene.
  • \r\n
  • r_at rules dropped: Probe sets for which it was not possible to pick a full set of unique probes using the Affymetrix probe selection rules. Probes were picked after dropping some of the selection rules.
  • \r\n
  • i_at incomplete: Designates probe sets for which there are fewer than the standard numbers of unique probes specified in the design 16 perfect match for the U74Av2.
  • \r\n
  • st sense target: Designates a sense target; almost always generated in error.
  • \r\n
\r\n\r\n

Descriptions for the probe set extensions were taken from the Affymetrix GeneChip Expression Analysis Fundamentals.

\r\n
" . +gn:Br_u_0803_m gnt:hasNotes "

This text file originally generated by RWW, EJC, and YHQ, August 2003. Updated by RWW, October 30, 2004.

" . +gn:Br_u_0803_m gnt:hasAcknowledgement "

Data were generated with funds to RWW from the Dunavant Chair of\r\nExcellence, University of Tennessee Health Science Center, Department\r\nof Pediatrics. The majority of arrays were processed at Genome Explorations by Divyen Patel. We thank Guomin Zhou for generating advanced intercross stock used to produce most of the new BXD RI strains.\r\n

" . +gn:Br_u_0803_m gnt:hasAcknowledgement "

Data were generated with funds to RWW from the Dunavant Chair of Excellence, University of Tennessee Health Science Center, Department of Pediatrics. The majority of arrays were processed at Genome Explorations by Divyen Patel. We thank Guomin Zhou for generating advanced intercross stock used to produce most of the new BXD RI strains.

" . +``` + + +## 'publishfreeze' + +## Generated Triples: + +The following SQL query was executed: + +```sql +SELECT PublishFreeze.Name, PublishFreeze.Name, PublishFreeze.FullName, PublishFreeze.ShortName, PublishFreeze.CreateTime, InbredSet.Name FROM PublishFreeze LEFT JOIN InfoFiles ON InfoFiles.InfoPageName = PublishFreeze.Name LEFT JOIN InbredSet ON PublishFreeze.InbredSetId = InbredSet.InbredSetId WHERE PublishFreeze.public > 0 AND PublishFreeze.confidentiality < 1 AND InfoFiles.InfoFileId IS NULL +``` + +The above query results to triples that have the form: + +```text +gn:Publishfreeze_name_ -> rdf:type -> gnc:phenotypeDataset +gn:Publishfreeze_name_ -> rdfs:label -> PublishFreeze(Name) +gn:Publishfreeze_name_ -> skos:prefLabel -> PublishFreeze(FullName) +gn:Publishfreeze_name_ -> skos:altLabel -> PublishFreeze(ShortName) +gn:Publishfreeze_name_ -> dct:created -> "PublishFreeze(CreateTime)"^^xsd:date +gn:Publishfreeze_name_ -> gnt:belongsToSet -> gn:setInbredset_name +``` +Here's an example query: + +```sparql +PREFIX v: +PREFIX foaf: +PREFIX gdmt: +PREFIX skos: +PREFIX geoSeries: +PREFIX gnt: +PREFIX gn: +PREFIX gnc: +PREFIX rdf: +PREFIX owl: +PREFIX rdfs: +PREFIX taxon: +PREFIX dct: + +SELECT * WHERE { + ?s rdf:type gnc:phenotypeDataset . + ?s rdfs:label "B6D2F2-PSUPublish" . + ?s skos:prefLabel "B6D2F2 PSU Phenotypes" . + ?s ?p ?o . +} +``` + +Expected Result: + +```rdf +gn:B6d2f2_psupublish rdf:type gnc:phenotypeDataset . +gn:B6d2f2_psupublish rdfs:label "B6D2F2-PSUPublish" . +gn:B6d2f2_psupublish skos:prefLabel "B6D2F2 PSU Phenotypes" . +gn:B6d2f2_psupublish skos:altLabel "B6D2F2 PSU Publish" . +gn:B6d2f2_psupublish dct:created "2015-03-18"^^xsd:date . +gn:B6d2f2_psupublish gnt:belongsToSet gn:setB6d2f2-psupublish . +``` + + +## 'genofreeze' + +## Generated Triples: + +The following SQL query was executed: + +```sql +SELECT GenoFreeze.Name, GenoFreeze.Name, GenoFreeze.FullName, GenoFreeze.ShortName, GenoFreeze.CreateTime, InbredSet.Name FROM GenoFreeze LEFT JOIN InfoFiles ON InfoFiles.InfoPageName = GenoFreeze.Name LEFT JOIN InbredSet ON GenoFreeze.InbredSetId = InbredSet.InbredSetId WHERE GenoFreeze.public > 0 AND GenoFreeze.confidentiality < 1 AND InfoFiles.InfoPageName IS NULL +``` + +The above query results to triples that have the form: + +```text +gn:Genofreeze_name_ -> rdf:type -> gnc:genotypeDataset +gn:Genofreeze_name_ -> rdfs:label -> GenoFreeze(Name) +gn:Genofreeze_name_ -> skos:prefLabel -> GenoFreeze(FullName) +gn:Genofreeze_name_ -> skos:altLabel -> GenoFreeze(ShortName) +gn:Genofreeze_name_ -> dct:created -> "GenoFreeze(CreateTime)"^^xsd:date +gn:Genofreeze_name_ -> gnt:belongsToSet -> gn:setInbredset_name +``` +Here's an example query: + +```sparql +PREFIX v: +PREFIX foaf: +PREFIX gdmt: +PREFIX skos: +PREFIX geoSeries: +PREFIX gnt: +PREFIX gn: +PREFIX gnc: +PREFIX rdf: +PREFIX owl: +PREFIX rdfs: +PREFIX taxon: +PREFIX dct: + +SELECT * WHERE { + ?s rdf:type gnc:genotypeDataset . + ?s rdfs:label "B6D2RIGeno" . + ?s skos:prefLabel "B6D2RI Genotypes" . + ?s ?p ?o . +} +``` + +Expected Result: + +```rdf +gn:B6d2rigeno rdf:type gnc:genotypeDataset . +gn:B6d2rigeno rdfs:label "B6D2RIGeno" . +gn:B6d2rigeno skos:prefLabel "B6D2RI Genotypes" . +gn:B6d2rigeno skos:altLabel "B6D2RIGeno" . +gn:B6d2rigeno dct:created "2022-10-24"^^xsd:date . +gn:B6d2rigeno gnt:belongsToSet gn:setB6d2rigeno . +``` + + +## 'probesetfreeze' + +## Generated Triples: + +The following SQL query was executed: + +```sql +SELECT ProbeSetFreeze.Name, AvgMethod.Name AS AvgMethodName, AvgMethod.Name AS AvgMethodName, ProbeSetFreeze.FullName, ProbeSetFreeze.ShortName, ProbeSetFreeze.Name, ProbeSetFreeze.Name2, ProbeSetFreeze.CreateTime, ProbeSetFreeze.DataScale, Tissue.Short_Name, InbredSet.Name FROM ProbeSetFreeze LEFT JOIN InfoFiles ON InfoFiles.InfoPageName = ProbeSetFreeze.Name LEFT JOIN ProbeFreeze USING (ProbeFreezeId) LEFT JOIN AvgMethod ON AvgMethod.AvgMethodId = ProbeSetFreeze.AvgID LEFT JOIN InbredSet ON ProbeFreeze.InbredSetId = InbredSet.Id LEFT JOIN Tissue ON ProbeFreeze.TissueId = Tissue.TissueId WHERE ProbeSetFreeze.public > 0 AND InfoFiles.InfoPageName IS NULL GROUP BY ProbeFreeze.Id +``` + +The above query results to triples that have the form: + +```text +gn:Probesetfreeze_name_ -> rdf:type -> gnc:probesetDataset +gn:Probesetfreeze_name_ -> gnt:usesNormalization -> gn:avgmethod_avgmethod_avgmethodname +gn:Probesetfreeze_name_ -> dct:title -> ProbeSetFreeze(FullName) +gn:Probesetfreeze_name_ -> rdfs:label -> ProbeSetFreeze(ShortName) +gn:Probesetfreeze_name_ -> skos:prefLabel -> ProbeSetFreeze(Name) +gn:Probesetfreeze_name_ -> skos:altLabel -> ProbeSetFreeze(Name2) +gn:Probesetfreeze_name_ -> dct:created -> "ProbeSetFreeze(CreateTime)"^^xsd:datetime +gn:Probesetfreeze_name_ -> gnt:usesDataScale -> ProbeSetFreeze(DataScale) +gn:Probesetfreeze_name_ -> gnt:hasTissue -> gn:tissue_tissue_short_name +gn:Probesetfreeze_name_ -> gnt:belongsToSet -> gn:setInbredset_name +``` +Here's an example query: + +```sparql +PREFIX v: +PREFIX foaf: +PREFIX gdmt: +PREFIX skos: +PREFIX geoSeries: +PREFIX gnt: +PREFIX gn: +PREFIX gnc: +PREFIX rdf: +PREFIX owl: +PREFIX rdfs: +PREFIX taxon: +PREFIX dct: + +SELECT * WHERE { + ?s rdf:type gnc:probesetDataset . + ?s gnt:usesNormalization gn:avgmethod_rankinv . + ?s dct:title "UBC/CMMT BXD P0 Cerebellum ILM Mouse WG-6 v2.0 (May13) RankInv" . + ?s rdfs:label "UBC/CMMT BXD P0 Cerebellum ILM Mouse WG-6 v2.0 (May13) RankInv" . + ?s ?p ?o . +} +``` + +Expected Result: + +```rdf +gn:Cmmtubcbxdp00cerilm0513 rdf:type gnc:probesetDataset . +gn:Cmmtubcbxdp00cerilm0513 gnt:usesNormalization gn:avgmethod_rankinv . +gn:Cmmtubcbxdp00cerilm0513 dct:title "UBC/CMMT BXD P0 Cerebellum ILM Mouse WG-6 v2.0 (May13) RankInv" . +gn:Cmmtubcbxdp00cerilm0513 rdfs:label "UBC/CMMT BXD P0 Cerebellum ILM Mouse WG-6 v2.0 (May13) RankInv" . +gn:Cmmtubcbxdp00cerilm0513 skos:prefLabel "CMMTUBCBXDP00CerILM0513" . +gn:Cmmtubcbxdp00cerilm0513 skos:altLabel "CMMTUBCBXDP00CerILMMay13" . +gn:Cmmtubcbxdp00cerilm0513 dct:created "2013-04-22"^^xsd:datetime . +gn:Cmmtubcbxdp00cerilm0513 gnt:usesDataScale "log2" . +gn:Cmmtubcbxdp00cerilm0513 gnt:hasTissue gn:tissue_cb . +gn:Cmmtubcbxdp00cerilm0513 gnt:belongsToSet gn:setCmmtubcbxdp00cerilm0513 . +``` + + +## 'investigators' + +## Generated Triples: + +The following SQL query was executed: + +```sql +SELECT Investigators.FirstName, Investigators.LastName, Investigators.Email, Investigators.FirstName, Investigators.LastName, Investigators.FirstName, Investigators.LastName, Investigators.Url, Investigators.Address, Investigators.City, Investigators.State, Investigators.ZipCode, Investigators.Country FROM Investigators GROUP BY Email +``` + +The above query results to triples that have the form: + +```text +gn:investigator_investigators_firstname_investigators_lastname_investigators_email -> rdf:type -> foaf:Person +gn:investigator_investigators_firstname_investigators_lastname_investigators_email -> foaf:name -> Investigators(FirstName) Investigators(LastName) +gn:investigator_investigators_firstname_investigators_lastname_investigators_email -> foaf:givenName -> Investigators(FirstName) +gn:investigator_investigators_firstname_investigators_lastname_investigators_email -> foaf:familyName -> Investigators(LastName) +gn:investigator_investigators_firstname_investigators_lastname_investigators_email -> foaf:homepage -> Investigators(Url) +gn:investigator_investigators_firstname_investigators_lastname_investigators_email -> v:adr -> Investigators(Address) +gn:investigator_investigators_firstname_investigators_lastname_investigators_email -> v:locality -> Investigators(City) +gn:investigator_investigators_firstname_investigators_lastname_investigators_email -> v:region -> Investigators(State) +gn:investigator_investigators_firstname_investigators_lastname_investigators_email -> v:postal-code -> Investigators(ZipCode) +gn:investigator_investigators_firstname_investigators_lastname_investigators_email -> v:country-name -> Investigators(Country) +``` +Here's an example query: + +```sparql +PREFIX v: +PREFIX foaf: +PREFIX gdmt: +PREFIX skos: +PREFIX geoSeries: +PREFIX gnt: +PREFIX gn: +PREFIX gnc: +PREFIX rdf: +PREFIX owl: +PREFIX rdfs: +PREFIX taxon: +PREFIX dct: + +SELECT * WHERE { + ?s rdf:type foaf:Person . + ?s foaf:name "Evan Williams" . + ?s foaf:givenName "Evan" . + ?s foaf:familyName "Williams" . + ?s ?p ?o . +} +``` + +Expected Result: + +```rdf +gn:investigator_evan_williams_ rdf:type foaf:Person . +gn:investigator_evan_williams_ foaf:name "Evan Williams" . +gn:investigator_evan_williams_ foaf:givenName "Evan" . +gn:investigator_evan_williams_ foaf:familyName "Williams" . +gn:investigator_evan_williams_ v:country-name "Switzerland" . +``` + + +## 'gene-chip' + +## Generated Triples: + +The following SQL query was executed: + +```sql +SELECT GeneChip.Name, GeneChip.GeneChipName, GeneChip.Name, IF(GeneChip.GeneChipName != GeneChip.Title, Title, NULL) AS Title, GeneChip.Go_tree_value, Species.Fullname, GeneChip.GeoPlatform FROM GeneChip LEFT JOIN Species USING (SpeciesId) +``` + +The above query results to triples that have the form: + +```text +gn:platform_genechip_name -> rdf:type -> gnc:geneChip +gn:platform_genechip_name -> rdfs:label -> GeneChip(GeneChipName) +gn:platform_genechip_name -> skos:prefLabel -> GeneChip(Name) +gn:platform_genechip_name -> skos:altLabel -> Title +gn:platform_genechip_name -> gnt:hasGOTreeValue -> GeneChip(Go_tree_value) +gn:platform_genechip_name -> gnt:belongsToSpecies -> gn:Species_fullname +gn:platform_genechip_name -> gnt:hasGeoSeriesId -> geoSeries:GeneChip(GeoPlatform) +``` +Here's an example query: + +```sparql +PREFIX v: +PREFIX foaf: +PREFIX gdmt: +PREFIX skos: +PREFIX geoSeries: +PREFIX gnt: +PREFIX gn: +PREFIX gnc: +PREFIX rdf: +PREFIX owl: +PREFIX rdfs: +PREFIX taxon: +PREFIX dct: + +SELECT * WHERE { + ?s rdf:type gnc:geneChip . + ?s rdfs:label "Affy Mouse Genome U74Av2 (GPL81)" . + ?s skos:prefLabel "MG_U74AV2" . + ?s ?p ?o . +} +``` + +Expected Result: + +```rdf +gn:platform_mg_u74av2 rdf:type gnc:geneChip . +gn:platform_mg_u74av2 rdfs:label "Affy Mouse Genome U74Av2 (GPL81)" . +gn:platform_mg_u74av2 skos:prefLabel "MG_U74AV2" . +gn:platform_mg_u74av2 gnt:hasGOTreeValue "affy_mg_u74av2" . +gn:platform_mg_u74av2 gnt:belongsToSpecies gn:Mus_musculus . +gn:platform_mg_u74av2 gnt:hasGeoSeriesId geoSeries:GPL81 . +``` + diff --git a/rdf-documentation/dump-gene-chip.md b/rdf-documentation/dump-gene-chip.md deleted file mode 100644 index abd68d8..0000000 --- a/rdf-documentation/dump-gene-chip.md +++ /dev/null @@ -1,118 +0,0 @@ -# Probeset freeze metadata -## 'dump-gene-chip' - -## Generated Triples: - -The following SQL query was executed: - -```sql -SELECT GeneChip.Name, GeneChip.GeneChipName, GeneChip.Name, IF(GeneChip.GeneChipName != GeneChip.Title, Title, NULL) AS Title, GeneChip.Go_tree_value, Species.Fullname, GeneChip.GeoPlatform FROM GeneChip LEFT JOIN Species USING (SpeciesId) -``` - -The above query results to triples that have the form: - -```text -gn:platform_genechip_name -> rdf:type -> gnc:geneChip -gn:platform_genechip_name -> rdfs:label -> GeneChip(GeneChipName) -gn:platform_genechip_name -> skos:prefLabel -> GeneChip(Name) -gn:platform_genechip_name -> skos:altLabel -> Title -gn:platform_genechip_name -> gnt:hasGOTreeValue -> GeneChip(Go_tree_value) -gn:platform_genechip_name -> gnt:belongsToSpecies -> gn:Species_fullname -gn:platform_genechip_name -> gnt:hasGeoSeriesId -> geoSeries:GeneChip(GeoPlatform) -``` -Here's an example query: - -```sparql -PREFIX geoSeries: -PREFIX gn: -PREFIX gnc: -PREFIX dct: -PREFIX owl: -PREFIX skos: -PREFIX gnt: -PREFIX rdf: -PREFIX rdfs: -PREFIX xsd: - -SELECT * WHERE { - ?s rdf:type gnc:geneChip . - ?s rdfs:label "Affy Mouse Genome U74Av2 (GPL81)" . - ?s skos:prefLabel "MG_U74AV2" . - ?s ?p ?o . -} -``` - -Expected Result: - -```rdf -gn:platform_mg_u74av2 rdf:type gnc:geneChip . -gn:platform_mg_u74av2 rdfs:label "Affy Mouse Genome U74Av2 (GPL81)" . -gn:platform_mg_u74av2 skos:prefLabel "MG_U74AV2" . -gn:platform_mg_u74av2 gnt:hasGOTreeValue "affy_mg_u74av2" . -gn:platform_mg_u74av2 gnt:belongsToSpecies gn:Mus_musculus . -gn:platform_mg_u74av2 gnt:hasGeoSeriesId geoSeries:GPL81 . -``` - - -## 'dump-probesetfreeze' - -## Generated Triples: - -The following SQL query was executed: - -```sql -SELECT ProbeSetFreeze.Name, AvgMethod.Name AS AvgMethodName, AvgMethod.Name AS AvgMethodName, ProbeSetFreeze.FullName, ProbeSetFreeze.ShortName, ProbeSetFreeze.Name, ProbeSetFreeze.Name2, ProbeSetFreeze.CreateTime, ProbeSetFreeze.DataScale, Tissue.Short_Name, InbredSet.Name AS InbredSetName FROM ProbeSetFreeze LEFT JOIN InfoFiles ON InfoFiles.InfoPageName = ProbeSetFreeze.Name LEFT JOIN ProbeFreeze USING (ProbeFreezeId) LEFT JOIN AvgMethod ON AvgMethod.AvgMethodId = ProbeSetFreeze.AvgID LEFT JOIN InbredSet ON ProbeFreeze.InbredSetId = InbredSet.Id LEFT JOIN Tissue ON ProbeFreeze.TissueId = Tissue.TissueId WHERE ProbeSetFreeze.public > 0 AND InfoFiles.InfoPageName IS NULL GROUP BY ProbeFreeze.Id -``` - -The above query results to triples that have the form: - -```text -gn:Probesetfreeze_name_ -> rdf:type -> gnc:probesetDataset -gn:Probesetfreeze_name_ -> gnt:usesNormalization -> gn:avgmethod_avgmethod_avgmethodname -gn:Probesetfreeze_name_ -> dct:title -> ProbeSetFreeze(FullName) -gn:Probesetfreeze_name_ -> rdfs:label -> ProbeSetFreeze(ShortName) -gn:Probesetfreeze_name_ -> skos:prefLabel -> ProbeSetFreeze(Name) -gn:Probesetfreeze_name_ -> skos:altLabel -> ProbeSetFreeze(Name2) -gn:Probesetfreeze_name_ -> dct:created -> "ProbeSetFreeze(CreateTime)"^^xsd:datetime -gn:Probesetfreeze_name_ -> gnt:usesDataScale -> ProbeSetFreeze(DataScale) -gn:Probesetfreeze_name_ -> gnt:hasTissue -> gn:tissue_tissue_short_name -gn:Probesetfreeze_name_ -> gnt:belongsToInbredSet -> gn:inbredSet_inbredset_inbredsetname -``` -Here's an example query: - -```sparql -PREFIX geoSeries: -PREFIX gn: -PREFIX gnc: -PREFIX dct: -PREFIX owl: -PREFIX skos: -PREFIX gnt: -PREFIX rdf: -PREFIX rdfs: -PREFIX xsd: - -SELECT * WHERE { - ?s rdf:type gnc:probesetDataset . - ?s gnt:usesNormalization gn:avgmethod_rankinv . - ?s dct:title "UBC/CMMT BXD P0 Cerebellum ILM Mouse WG-6 v2.0 (May13) RankInv" . - ?s rdfs:label "UBC/CMMT BXD P0 Cerebellum ILM Mouse WG-6 v2.0 (May13) RankInv" . - ?s ?p ?o . -} -``` - -Expected Result: - -```rdf -gn:Cmmtubcbxdp00cerilm0513 rdf:type gnc:probesetDataset . -gn:Cmmtubcbxdp00cerilm0513 gnt:usesNormalization gn:avgmethod_rankinv . -gn:Cmmtubcbxdp00cerilm0513 dct:title "UBC/CMMT BXD P0 Cerebellum ILM Mouse WG-6 v2.0 (May13) RankInv" . -gn:Cmmtubcbxdp00cerilm0513 rdfs:label "UBC/CMMT BXD P0 Cerebellum ILM Mouse WG-6 v2.0 (May13) RankInv" . -gn:Cmmtubcbxdp00cerilm0513 skos:prefLabel "CMMTUBCBXDP00CerILM0513" . -gn:Cmmtubcbxdp00cerilm0513 skos:altLabel "CMMTUBCBXDP00CerILMMay13" . -gn:Cmmtubcbxdp00cerilm0513 dct:created "2013-04-22"^^xsd:datetime . -gn:Cmmtubcbxdp00cerilm0513 gnt:usesDataScale "log2" . -gn:Cmmtubcbxdp00cerilm0513 gnt:hasTissue gn:tissue_cb . -gn:Cmmtubcbxdp00cerilm0513 gnt:belongsToInbredSet gn:inbredSet_bxd . -``` - diff --git a/rdf-documentation/dump-genotype.md b/rdf-documentation/dump-genotype.md deleted file mode 100644 index 2f5edbc..0000000 --- a/rdf-documentation/dump-genotype.md +++ /dev/null @@ -1,117 +0,0 @@ -# Genotype Metadata -## 'dump-genofreeze' - -## Generated Triples: - -The following SQL query was executed: - -```sql -SELECT GenoFreeze.Name, GenoFreeze.Name, GenoFreeze.FullName, GenoFreeze.ShortName, GenoFreeze.CreateTime, InbredSet.Name AS InbredSetName FROM GenoFreeze LEFT JOIN InfoFiles ON InfoFiles.InfoPageName = GenoFreeze.Name LEFT JOIN InbredSet ON GenoFreeze.InbredSetId = InbredSet.InbredSetId WHERE GenoFreeze.public > 0 AND GenoFreeze.confidentiality < 1 AND InfoFiles.InfoPageName IS NULL -``` - -The above query results to triples that have the form: - -```text -gn:Genofreeze_name_ -> rdf:type -> gnc:genotypeDataset -gn:Genofreeze_name_ -> rdfs:label -> GenoFreeze(Name) -gn:Genofreeze_name_ -> skos:prefLabel -> GenoFreeze(FullName) -gn:Genofreeze_name_ -> skos:altLabel -> GenoFreeze(ShortName) -gn:Genofreeze_name_ -> dct:created -> "GenoFreeze(CreateTime)"^^xsd:date -gn:Genofreeze_name_ -> gnt:belongsToInbredSet -> gn:_inbredset_inbredsetname -``` -Here's an example query: - -```sparql -PREFIX dct: -PREFIX gn: -PREFIX gnc: -PREFIX gnt: -PREFIX rdf: -PREFIX rdfs: -PREFIX owl: -PREFIX skos: -PREFIX xsd: - -SELECT * WHERE { - ?s rdf:type gnc:genotypeDataset . - ?s rdfs:label "B6D2RIGeno" . - ?s skos:prefLabel "B6D2RI Genotypes" . - ?s ?p ?o . -} -``` - -Expected Result: - -```rdf -gn:B6d2rigeno rdf:type gnc:genotypeDataset . -gn:B6d2rigeno rdfs:label "B6D2RIGeno" . -gn:B6d2rigeno skos:prefLabel "B6D2RI Genotypes" . -gn:B6d2rigeno skos:altLabel "B6D2RIGeno" . -gn:B6d2rigeno dct:created "2022-10-24"^^xsd:date . -gn:B6d2rigeno gnt:belongsToInbredSet gn:_b6d2ri . -``` - - -## 'dump-genotypes' - -## Generated Triples: - -The following SQL query was executed: - -```sql -SELECT Geno.Name, Geno.Name, Geno.Chr, IFNULL(Geno.Mb, '') AS Mb, IFNULL(Geno.Mb_mm8, '') AS Mb_mm8, IFNULL(Geno.Mb_2016, '') AS Mb_2016, Geno.Sequence, Geno.Source, IF((Source2 = Source), NULL, Source2) AS Source2, Species.Fullname, Geno.chr_num, Geno.Comments FROM Geno LEFT JOIN Species USING (SpeciesId) -``` - -The above query results to triples that have the form: - -```text -gn:Geno_name_ -> rdf:type -> gnc:genotype -gn:Geno_name_ -> skos:prefLabel -> GenoName -gn:Geno_name_ -> gnt:chr -> Geno(Chr) -gn:Geno_name_ -> gnt:mb -> "Mb"^^xsd:double -gn:Geno_name_ -> gnt:mbMm8 -> "Mb_mm8"^^xsd:double -gn:Geno_name_ -> gnt:mb2016 -> "Mb_2016"^^xsd:double -gn:Geno_name_ -> gnt:hasSequence -> Geno(Sequence) -gn:Geno_name_ -> gnt:hasSource -> Geno(Source) -gn:Geno_name_ -> gnt:hasAltSourceName -> Source2 -gn:Geno_name_ -> gnt:belongsToSpecies -> gn:Species_fullname -gn:Geno_name_ -> gnt:chrNum -> "Geno(chr_num)"^^xsd:int -gn:Geno_name_ -> rdfs:comments -> Geno(Comments) -``` -Here's an example query: - -```sparql -PREFIX dct: -PREFIX gn: -PREFIX gnc: -PREFIX gnt: -PREFIX rdf: -PREFIX rdfs: -PREFIX owl: -PREFIX skos: -PREFIX xsd: - -SELECT * WHERE { - ?s rdf:type gnc:genotype . - ?s skos:prefLabel "D1Mit296" . - ?s gnt:chr "1" . - ?s gnt:mb #{"9.749729"^^xsd:double}# . - ?s ?p ?o . -} -``` - -Expected Result: - -```rdf -gn:D1mit296 rdf:type gnc:genotype . -gn:D1mit296 skos:prefLabel "D1Mit296" . -gn:D1mit296 gnt:chr "1" . -gn:D1mit296 gnt:mb "9.749729"^^xsd:double . -gn:D1mit296 gnt:mbMm8 "9.734943"^^xsd:double . -gn:D1mit296 gnt:mb2016 "9.73981"^^xsd:double . -gn:D1mit296 gnt:hasSequence "CTTGCATGCCTGCGGNTNCGNACTCTAGAGGATCTCCCTATTATTNTNACATNACTTTNAATTAAAATAATAATCAGATAACTTCAACNNNNTGNNCACTTCTGTCAAGTGGACAGAAATAAACATAGAGCCTAATTATCCTGAATTTNAGAGAAAAGAGTGTGTTTANCACAANAGAACAGTTATAGATCTACACACACACACACACACACACACACACACACATACAGTTTGAAAAATGCATCAGTTGAGACC" . -gn:D1mit296 gnt:hasSource "Mit" . -gn:D1mit296 gnt:belongsToSpecies gn:Mus_musculus . -gn:D1mit296 gnt:chrNum "1"^^xsd:int . -``` - diff --git a/rdf-documentation/dump-info-pages.md b/rdf-documentation/dump-info-pages.md deleted file mode 100644 index e7b42bf..0000000 --- a/rdf-documentation/dump-info-pages.md +++ /dev/null @@ -1,155 +0,0 @@ -# Info files / Investigators Metadata -## 'dump-info-files' - -## Generated Triples: - -The following SQL query was executed: - -```sql -SELECT InfoFiles.InfoPageName, IF(GenoFreeze.Id IS NOT NULL, 'gnc:genotypeDataset', IF(PublishFreeze.Id IS NOT NULL, 'gnc:phenotypeDataset', IF(ProbeSetFreeze.Name IS NOT NULL, 'gnc:probesetDataset', 'gnc:dataset'))) AS rdfType, InfoFiles.InfoPageName, IFNULL(GenoFreeze.FullName, IFNULL(PublishFreeze.FullName, '')) AS DatasetFullName, Datasets.DatasetName AS DatasetGroup, InfoFiles.InfoFileTitle, Datasets.PublicationTitle, IFNULL(GenoFreeze.CreateTime, IFNULL(PublishFreeze.CreateTime, IFNULL(ProbeSetFreeze.CreateTime, ''))) AS createTimeGenoFreeze, Investigators.FirstName, Investigators.LastName, Investigators.Email, Organizations.OrganizationName, InfoFiles.GN_AccesionId, DatasetStatus.DatasetStatusName, InbredSet.Name AS InbredSetName, Tissue.Short_Name, AvgMethod.Name AS AvgMethodName, AvgMethod.Name AS AvgMethodName, GeneChip.Name AS GeneChip, Datasets.Summary, IFNULL(Datasets.GeoSeries, '') AS GeoSeries, Datasets.AboutTissue, InfoFiles.Specifics, Datasets.AboutCases, Datasets.AboutPlatform, Datasets.AboutDataProcessing, Datasets.Notes, Datasets.ExperimentDesign, Datasets.Contributors, Datasets.Citation, InfoFiles.Data_Source_Acknowledge, Datasets.Acknowledgment FROM InfoFiles LEFT JOIN PublishFreeze ON InfoFiles.InfoPageName = PublishFreeze.Name LEFT JOIN GenoFreeze ON InfoFiles.InfoPageName = GenoFreeze.Name LEFT JOIN ProbeSetFreeze ON InfoFiles.InfoPageName = ProbeSetFreeze.Name LEFT JOIN InbredSet ON InfoFiles.InbredSetId = InbredSet.InbredSetId LEFT JOIN Species ON InfoFiles.SpeciesId = Species.SpeciesId LEFT JOIN Datasets USING (DatasetId) LEFT JOIN DatasetStatus USING (DatasetStatusId) LEFT JOIN Tissue USING (TissueId) LEFT JOIN Investigators USING (InvestigatorId) LEFT JOIN AvgMethod USING (AvgMethodId) LEFT JOIN Organizations USING (OrganizationId) LEFT JOIN GeneChip USING (GeneChipId) WHERE GN_AccesionId IS NOT NULL -``` - -The above query results to triples that have the form: - -```text -gn:Infofiles_infopagename_ -> rdf:type -> rdfType -gn:Infofiles_infopagename_ -> rdfs:label -> InfoFiles(InfoPageName) -gn:Infofiles_infopagename_ -> skos:prefLabel -> DatasetFullName -gn:Infofiles_infopagename_ -> skos:prefLabel -> Datasets(DatasetGroup) -gn:Infofiles_infopagename_ -> gdmt:hasTitleInfo -> InfoFiles(InfoFileTitle) -gn:Infofiles_infopagename_ -> dct:title -> Datasets(PublicationTitle) -gn:Infofiles_infopagename_ -> dct:created -> createTimeGenoFreeze -gn:Infofiles_infopagename_ -> gdmt:hasCreatorInfo -> gn:investigator_investigators_firstname_investigators_lastname_investigators_email -gn:Infofiles_infopagename_ -> gdmt:hasCreatorAffiliation -> Organizations(OrganizationName) -gn:Infofiles_infopagename_ -> gdmt:hasDatasetIdentifierSubType -> GNInfoFiles(GN_AccesionId) -gn:Infofiles_infopagename_ -> gdmt:hasRightsInfo -> datasetstatus(datasetstatusname) -gn:Infofiles_infopagename_ -> gnt:belongsToInbredSet -> gn:inbredSet_inbredset_inbredsetname -gn:Infofiles_infopagename_ -> gnt:hasTissue -> gn:tissue_tissue_short_name -gn:Infofiles_infopagename_ -> gnt:usesNormalization -> gn:avgmethod_avgmethod_avgmethodname -gn:Infofiles_infopagename_ -> gnt:usesPlatform -> gn:platform_genechip_genechip -gn:Infofiles_infopagename_ -> gdmt:isDescribedBy -> DatasetsSummary -gn:Infofiles_infopagename_ -> gnt:hasGeoSeriesId -> -gn:Infofiles_infopagename_ -> gnt:hasTissueInfo -> DatasetsAboutTissue -gn:Infofiles_infopagename_ -> gnt:hasContentInfo -> InfoFilesSpecifics -gn:Infofiles_infopagename_ -> gnt:hasCaseInfo -> DatasetsAboutCases -gn:Infofiles_infopagename_ -> gnt:hasPlatformInfo -> DatasetsAboutPlatform -gn:Infofiles_infopagename_ -> gnt:hasDataProcessingInfo -> DatasetsAboutDataProcessing -gn:Infofiles_infopagename_ -> gnt:hasNotes -> DatasetsNotes -gn:Infofiles_infopagename_ -> gnt:hasExperimentDesignInfo -> DatasetsExperimentDesign -gn:Infofiles_infopagename_ -> gdmt:hasContributorInfo -> DatasetsContributors -gn:Infofiles_infopagename_ -> gdmt:IsCitedBy -> DatasetsCitation -gn:Infofiles_infopagename_ -> gnt:hasAcknowledgement -> InfoFilesData_Source_Acknowledge -gn:Infofiles_infopagename_ -> gnt:hasAcknowledgement -> DatasetsAcknowledgment -``` -Here's an example query: - -```sparql -PREFIX v: -PREFIX foaf: -PREFIX gdmt: -PREFIX skos: -PREFIX geoSeries: -PREFIX gnt: -PREFIX gn: -PREFIX gnc: -PREFIX rdf: -PREFIX owl: -PREFIX rdfs: -PREFIX taxon: -PREFIX dct: - -SELECT * WHERE { - ?s rdf:type gnc:probesetDataset . - ?s rdfs:label "Br_U_0803_M" . - ?s skos:prefLabel "UTHSC Brain mRNA U74Av2 (Aug-Sep03)" . - ?s ?p ?o . -} -``` - -Expected Result: - -```rdf -gn:Br_u_0803_m rdf:type gnc:probesetDataset . -gn:Br_u_0803_m rdfs:label "Br_U_0803_M" . -gn:Br_u_0803_m skos:prefLabel "UTHSC Brain mRNA U74Av2 (Aug-Sep03)" . -gn:Br_u_0803_m gdmt:hasTitleInfo "UTHSC Brain mRNA U74Av2 (Aug03) MAS5" . -gn:Br_u_0803_m dct:created "2003-08-01" . -gn:Br_u_0803_m gdmt:hasCreatorInfo gn:investigator_robert_williams_rwilliams_uthsc.edu . -gn:Br_u_0803_m gdmt:hasCreatorAffiliation "University of Tennessee Health Science Center" . -gn:Br_u_0803_m gdmt:hasDatasetIdentifierSubType "GN1" . -gn:Br_u_0803_m gdmt:hasRightsInfo "public" . -gn:Br_u_0803_m gnt:belongsToInbredSet gn:inbredSet_bxd . -gn:Br_u_0803_m gnt:hasTissue gn:tissue_brn . -gn:Br_u_0803_m gnt:usesNormalization gn:avgmethod_mas5 . -gn:Br_u_0803_m gnt:usesPlatform gn:platform_mg_u74av2 . -gn:Br_u_0803_m gdmt:isDescribedBy "

This August 2003 freeze provides estimates of mRNA expression in brains of BXD recombinant inbred mice measured using Affymetrix U74Av2 microarrays. This is data set includes six arrays which are of marginal quality. New users are encouraged to use one of the more recent data sets December 2003 or March 2004 from which these six arrays have been excluded. Data were generated at the University of Tennessee Health Science Center UTHSC. Over 300 brain samples from 35 strains were hybridized in small pools n=3 to 106 arrays. Data were processed using the Microarray Suite 5 MAS 5 protocol of Affymetrix. To simplify comparison between transforms, MAS 5 values of each array were adjusted to an average of 8 units and a variance of 2 units. In general, the MAS 5 transform does not perform as well as RMA, PDNN, or the new heritability weighted transforms HW1PM.

" . -gn:Br_u_0803_m gnt:hasTissueInfo "

Each array was hybridized with labeled cRNA generated from a pool of three brains from adult animals usually of the same age and always of the same sex. The brain region included most of the forebrain and midbrain, bilaterally. However, the sample excluded the olfactory bulbs, retinas, or the posterior pituitary all formally part of the forebrain. A total of 100 such pooled samples were arrayed: 74 from females and 26 from males. Animals ranged in age from 56 to 441 days, usually with a balanced design: one pool at approximately 8 weeks, one pool at approximately 20 weeks, and one pool at approximately 1 year. Strain averages of mRNA expression level are therefore typically based on three pooled biological replicate arrays. This data set does not incorporate statistical adjustment for possible effects of age and sex. Users can select the strain symbol in the table above to review details about the specific cases and array processing center DP = Divyen Patel at Genome Explorations, Inc; TS = Thomas Sutter at University of Memphis. You can also click on the individual symbols males or females to view the array image.

" . -gn:Br_u_0803_m gnt:hasCaseInfo "

This data set includes estimate of gene expression for 35 genetically uniform lines of mice: C57BL/6J B6, or simply B, DBA/2J D2 or D, their B6D2 F1 intercross, and 32 BXD recombinant inbred RI strains derived by crossing female B6 mice with male D2 mice and then inbreeding progeny for over 21 generations. This set of RI strains is a remarkable resource because many of these strains have been extensively phenotyped for hundreds of interesting traits over a 25-year period. A significant advantage of this RI set is that the two parental strains B6 and D2 have both been extensively sequenced and are known to differ at approximately 1.8 million SNPs. Coding variants mostly single nucleotide polymorphisms and insertion-deletions that may produce interesting phenotypes can be rapidly identified in this particular RI set.

\r\n\r\n

BXD1 through BXD32 were produced by Benjamin A. Taylor starting in the late 1970s. BXD33 through BXD42 were also produced by Taylor, but from a second set of crosses initiated in the early 1990s. These strains are all available from the Jackson Laboratory, Bar Harbor, Maine. BXD43 through BXD99 were produced by Lu Lu, Jeremy Peirce, Lee M. Silver, and Robert W. Williams in the late 1990s and early 2000s using advanced intercross progeny Peirce et al. 2004. Only two of these incipient strains are included in the current database BXD67 and BXD68.

\r\n\r\n

In this mRNA expression database we generally used progeny of stock obtained from The Jackson Laboratory between 1999 and 2001. Animals were generated in-house at the University of Alabama by John Mountz and Hui-Chen Hsu and at the University of Tennessee Health Science Center by Lu Lu and Robert Williams.

\r\n\r\n

The table below lists the arrays by strain, sex, and age. Each array was hybridized to a pool of mRNA from three mice. Note that this table includes six arrays dropped from the December 2003 data sets BXD6, n=2; BXD12, BXD16, BXD40, and BXD67, n=1 each.

\r\n\r\n\r\n \r\n \r\n \r\n \r\n \r\n
\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n
Strain\r\n

Age

\r\n
Strain\r\n

Age

\r\n
\r\n

8 Wks

\r\n
\r\n

20 Wks

\r\n
\r\n

52 Wks

\r\n
\r\n

8 Wks

\r\n
\r\n

20 Wks

\r\n
\r\n

52 Wks

\r\n
C57BL/6J B6♂♂♂♀♀DBA/2J D2♀♀♂♂ 
B6D2F1 F1♀ ♀♀ BXD1♀♀ â™€
BXD2♂♀♀BXD5♂♂♀  
BXD6♀♀♀BXD8♀♂♀ 
BXD9♂♀♀BXD11♀♀ â™€
BXD12 â™‚♀♀BXD13♀   
BXD14 â™€â™€â™€BXD15♀ â™€
BXD16♀♀♀ BXD18♀♂♀
BXD19♀♀♀BXD21♀♀♂♂ 
BXD22♀♀♀ BXD23♀  
BXD24♀♀ â™€BXD25♀♀ ♀♀  
BXD27  â™€â™€BXD28♀♀♀
BXD29♂ â™€BXD31♀♀♀♀ 
BXD32♀♂♀♀BXD33♂♀♀ 
BXD34♂♀♀ BXD38♂♀♀  
BXD39♂♀ ♂ BXD40♂♂♀♀  
BXD42♂♂ ♀  BXD67 F8♀ ♀♂  
BXD68 F9♀ ♀♂     
\r\n
" . -gn:Br_u_0803_m gnt:hasPlatformInfo "

Affymetrix U74Av2 GeneChip: The expression data were generated using 100 U74Av2 arrays. The chromosomal locations of U74Av2 probe sets were determined by BLAT analysis of concatenated probe sequences using the Mouse Genome Sequencing Consortium May 2004 mm5 assembly. This BLAT analysis is performed periodically by Yanhua Qu as each new build of the mouse genome is released see http://genome.ucsc.edu/cgi-bin/hgBlat?command=start&org=mouse. We thank Yan Cui UTHSC for allowing us to use his Linux cluster to perform this analysis. It is possiible to confirm the BLAT alignment results yourself simply by clicking on the Verify link in the Trait Data and Editing Form right side of the Location line.

" . -gn:Br_u_0803_m gnt:hasDataProcessingInfo "
Probe cell level data from the CEL file: Probe signal intensity estimates in the Affymetrix CEL files are the 75% quantile value taken from a set of 36 6x6 pixels per probe cell in the DAT image file.\r\n
    \r\n
  • Step 1: We added an offset of 1.0 to the CEL expression values for each cell to ensure that all values could be logged without generating negative values.
  • \r\n
  • Step 2: We took the log2 of each cell signal intensity.
  • \r\n
  • Step 3: We computed the Z score for each of these log2 cell signal intensity values within a single array.
  • \r\n
  • Step 4: We multiplied all Z scores by 2.
  • \r\n
  • Step 5: We added a constant of 8 units to the value of the Z score. The consequence of this simple set of transformations is to produce a set of Z scores that have a mean of 8 units, a variance of 4 units, and a standard deviation of 2 units. The advantage of this modified Z score is that a 2-fold difference in expression level corresponds roughly to 1 unit.
  • \r\n
  • Step 6: We computed the arithmetic mean of the values for the set of microarrays for each strain. We have not corrected for variance introduced by sex, age, source of animals, or any possible interaction. We have not corrected for background beyond that implemented by Affymetrix in generating the CEL file.
  • \r\n
\r\nProbe set data from the CHP file: Probe set estimates of expression were initially generated using the standard Affymetrix MAS 5 algorithm. The CHP values were then processed following precisely the same six steps listed above to normalize expression and stabilize the variance of all 106 arrays. The mean expression within each array is therefore 8 units with a standard deviation of 2 units. A 1-unit difference represents roughly a 2-fold difference in expression level. Expression levels below 5 are close to the background noise level. While a value of 8 unit is nominally the average expression, this average includes all those transcripts with negligible expression in the brain that would often be eliminated from subsequent analysis so-called "absent" and "marginal" calls in the CHP file.
\r\n\r\n

About the array probe set names:

\r\n\r\n
\r\n

Most probe sets on the U74Av2 array consist of a total of 32 probes, divided into 16 perfect match probes and 16 mismatch controls. Each set of these 25-nucleotide-long probes has an identifier code that includes a unique number, an underscore character, and several suffix characters that highlight design features. The most common probe set suffix is at. This code indicates that the probes should hybridize relatively selectively with the complementary anti-sense target i.e., the complemenary RNA produced from a single gene. Other codes include:

\r\n\r\n
    \r\n
  • f_at sequence family: Some probes in this probe set will hybridize to identical and/or slightly different sequences of related gene transcripts.
  • \r\n
  • s_at similarity constraint: All Probes in this probe set target common sequences found in transcripts from several genes.
  • \r\n
  • g_at common groups: Some probes in this set target identical sequences in multiple genes and some target unique sequences in the intended target gene.
  • \r\n
  • r_at rules dropped: Probe sets for which it was not possible to pick a full set of unique probes using the Affymetrix probe selection rules. Probes were picked after dropping some of the selection rules.
  • \r\n
  • i_at incomplete: Designates probe sets for which there are fewer than the standard numbers of unique probes specified in the design 16 perfect match for the U74Av2.
  • \r\n
  • st sense target: Designates a sense target; almost always generated in error.
  • \r\n
\r\n\r\n

Descriptions for the probe set extensions were taken from the Affymetrix GeneChip Expression Analysis Fundamentals.

\r\n
" . -gn:Br_u_0803_m gnt:hasNotes "

This text file originally generated by RWW, EJC, and YHQ, August 2003. Updated by RWW, October 30, 2004.

" . -gn:Br_u_0803_m gnt:hasAcknowledgement "

Data were generated with funds to RWW from the Dunavant Chair of\r\nExcellence, University of Tennessee Health Science Center, Department\r\nof Pediatrics. The majority of arrays were processed at Genome Explorations by Divyen Patel. We thank Guomin Zhou for generating advanced intercross stock used to produce most of the new BXD RI strains.\r\n

" . -gn:Br_u_0803_m gnt:hasAcknowledgement "

Data were generated with funds to RWW from the Dunavant Chair of Excellence, University of Tennessee Health Science Center, Department of Pediatrics. The majority of arrays were processed at Genome Explorations by Divyen Patel. We thank Guomin Zhou for generating advanced intercross stock used to produce most of the new BXD RI strains.

" . -``` - - -## 'dump-investigators' - -## Generated Triples: - -The following SQL query was executed: - -```sql -SELECT Investigators.FirstName, Investigators.LastName, Investigators.Email, Investigators.FirstName, Investigators.LastName, Investigators.FirstName, Investigators.LastName, Investigators.Url, Investigators.Address, Investigators.City, Investigators.State, Investigators.ZipCode, Investigators.Country FROM Investigators GROUP BY Email -``` - -The above query results to triples that have the form: - -```text -gn:investigator_investigators_firstname_investigators_lastname_investigators_email -> rdf:type -> foaf:Person -gn:investigator_investigators_firstname_investigators_lastname_investigators_email -> foaf:name -> Investigators(FirstName) Investigators(LastName) -gn:investigator_investigators_firstname_investigators_lastname_investigators_email -> foaf:givenName -> Investigators(FirstName) -gn:investigator_investigators_firstname_investigators_lastname_investigators_email -> foaf:familyName -> Investigators(LastName) -gn:investigator_investigators_firstname_investigators_lastname_investigators_email -> foaf:homepage -> Investigators(Url) -gn:investigator_investigators_firstname_investigators_lastname_investigators_email -> v:adr -> Investigators(Address) -gn:investigator_investigators_firstname_investigators_lastname_investigators_email -> v:locality -> Investigators(City) -gn:investigator_investigators_firstname_investigators_lastname_investigators_email -> v:region -> Investigators(State) -gn:investigator_investigators_firstname_investigators_lastname_investigators_email -> v:postal-code -> Investigators(ZipCode) -gn:investigator_investigators_firstname_investigators_lastname_investigators_email -> v:country-name -> Investigators(Country) -``` -Here's an example query: - -```sparql -PREFIX v: -PREFIX foaf: -PREFIX gdmt: -PREFIX skos: -PREFIX geoSeries: -PREFIX gnt: -PREFIX gn: -PREFIX gnc: -PREFIX rdf: -PREFIX owl: -PREFIX rdfs: -PREFIX taxon: -PREFIX dct: - -SELECT * WHERE { - ?s rdf:type foaf:Person . - ?s foaf:name "Evan Williams" . - ?s foaf:givenName "Evan" . - ?s foaf:familyName "Williams" . - ?s ?p ?o . -} -``` - -Expected Result: - -```rdf -gn:investigator_evan_williams_ rdf:type foaf:Person . -gn:investigator_evan_williams_ foaf:name "Evan Williams" . -gn:investigator_evan_williams_ foaf:givenName "Evan" . -gn:investigator_evan_williams_ foaf:familyName "Williams" . -gn:investigator_evan_williams_ v:country-name "Switzerland" . -``` - diff --git a/rdf-documentation/dump-phenotype.md b/rdf-documentation/dump-phenotype.md deleted file mode 100644 index c9436a3..0000000 --- a/rdf-documentation/dump-phenotype.md +++ /dev/null @@ -1,121 +0,0 @@ -# Phenotypes Metadata -## 'dump-publishfreeze' - -## Generated Triples: - -The following SQL query was executed: - -```sql -SELECT PublishFreeze.Name, PublishFreeze.Name, PublishFreeze.FullName, PublishFreeze.ShortName, PublishFreeze.CreateTime, InbredSet.Name FROM PublishFreeze LEFT JOIN InfoFiles ON InfoFiles.InfoPageName = PublishFreeze.Name LEFT JOIN InbredSet ON PublishFreeze.InbredSetId = InbredSet.InbredSetId WHERE PublishFreeze.public > 0 AND PublishFreeze.confidentiality < 1 AND InfoFiles.InfoFileId IS NULL -``` - -The above query results to triples that have the form: - -```text -gn:Publishfreeze_name_ -> rdf:type -> gnc:phenotypeDataset -gn:Publishfreeze_name_ -> rdfs:label -> PublishFreeze(Name) -gn:Publishfreeze_name_ -> skos:prefLabel -> PublishFreeze(FullName) -gn:Publishfreeze_name_ -> skos:altLabel -> PublishFreeze(ShortName) -gn:Publishfreeze_name_ -> dct:created -> "PublishFreeze(CreateTime)"^^xsd:date -gn:Publishfreeze_name_ -> gnt:belongsToInbredSet -> gn:Inbredset_name -``` -Here's an example query: - -```sparql -PREFIX dct: -PREFIX gn: -PREFIX owl: -PREFIX gnc: -PREFIX gnt: -PREFIX skos: -PREFIX rdf: -PREFIX rdfs: -PREFIX xsd: -PREFIX pubmed: - -SELECT * WHERE { - ?s rdf:type gnc:phenotypeDataset . - ?s rdfs:label "B6D2F2-PSUPublish" . - ?s skos:prefLabel "B6D2F2 PSU Phenotypes" . - ?s ?p ?o . -} -``` - -Expected Result: - -```rdf -gn:B6d2f2_psupublish rdf:type gnc:phenotypeDataset . -gn:B6d2f2_psupublish rdfs:label "B6D2F2-PSUPublish" . -gn:B6d2f2_psupublish skos:prefLabel "B6D2F2 PSU Phenotypes" . -gn:B6d2f2_psupublish skos:altLabel "B6D2F2 PSU Publish" . -gn:B6d2f2_psupublish dct:created "2015-03-18"^^xsd:date . -gn:B6d2f2_psupublish gnt:belongsToInbredSet gn:B6d2f2-psupublish . -``` - - -## 'dump-phenotypes' - -## Generated Triples: - -The following SQL query was executed: - -```sql -SELECT CONCAT(IFNULL(InbredSet.Name, PublishXRef.InbredSetId), '_', PublishXRef.Id) AS Phenotype, CONCAT(IFNULL(InbredSet.Name, PublishXRef.InbredSetId), '_', PublishXRef.Id) AS Phenotype, Phenotype.Post_publication_description, Phenotype.Post_publication_abbreviation, Phenotype.Lab_code, Phenotype.Submitter, Phenotype.Owner, IFNULL(PublishXRef.mean, '') AS mean, PublishXRef.Locus, IFNULL(PublishXRef.LRS, '') AS lrs, IFNULL(PublishXRef.additive, '') AS additive, PublishXRef.Sequence, IF(Publication.PubMed_ID IS NULL, '', CONVERT(Publication.PubMed_Id, INT)) AS pmid, Publication.Id FROM PublishXRef LEFT JOIN InbredSet ON InbredSet.InbredSetId = PublishXRef.InbredSetId LEFT JOIN Publication ON Publication.Id = PublishXRef.PublicationId LEFT JOIN Phenotype ON Phenotype.Id = PublishXRef.PhenotypeId WHERE PublishXRef.InbredSetId IN (SELECT PublishFreeze.InbredSetId FROM PublishFreeze) -``` - -The above query results to triples that have the form: - -```text -gn:trait_phenotype -> rdf:type -> gnc:phenotype -gn:trait_phenotype -> rdfs:label -> Phenotype -gn:trait_phenotype -> dct:description -> PhenotypePost_publication_description -gn:trait_phenotype -> gnt:abbreviation -> Phenotype(Post_publication_abbreviation) -gn:trait_phenotype -> gnt:labCode -> Phenotype(Lab_code) -gn:trait_phenotype -> gnt:submitter -> PhenotypeSubmitter -gn:trait_phenotype -> gnt:mean -> "mean"^^xsd:double -gn:trait_phenotype -> gnt:locus -> PublishXRef(Locus) -gn:trait_phenotype -> gnt:LRS -> "lrs"^^xsd:double -gn:trait_phenotype -> gnt:additive -> "additive"^^xsd:double -gn:trait_phenotype -> gnt:sequence -> "PublishXRef(Sequence)"^^xsd:integer -gn:trait_phenotype -> dct:isReferencedBy -> pubmed:pmid -gn:trait_phenotype -> dct:contributor -> PhenotypeOwner -``` -Here's an example query: - -```sparql -PREFIX dct: -PREFIX gn: -PREFIX owl: -PREFIX gnc: -PREFIX gnt: -PREFIX skos: -PREFIX rdf: -PREFIX rdfs: -PREFIX xsd: -PREFIX pubmed: - -SELECT * WHERE { - ?s rdf:type gnc:phenotype . - ?s rdfs:label "BXD_10001" . - ?s dct:description "Central nervous system, morphology: Cerebellum weight, whole, bilateral in adults of both sexes [mg]" . - ?s gnt:abbreviation "CBLWT2" . - ?s ?p ?o . -} -``` - -Expected Result: - -```rdf -gn:trait_bxd_10001 rdf:type gnc:phenotype . -gn:trait_bxd_10001 rdfs:label "BXD_10001" . -gn:trait_bxd_10001 dct:description "Central nervous system, morphology: Cerebellum weight, whole, bilateral in adults of both sexes [mg]" . -gn:trait_bxd_10001 gnt:abbreviation "CBLWT2" . -gn:trait_bxd_10001 gnt:submitter "robwilliams" . -gn:trait_bxd_10001 gnt:mean "52.13529418496525"^^xsd:double . -gn:trait_bxd_10001 gnt:locus "rs48756159" . -gn:trait_bxd_10001 gnt:LRS "13.4974911471087"^^xsd:double . -gn:trait_bxd_10001 gnt:additive "2.39444435069444"^^xsd:double . -gn:trait_bxd_10001 gnt:sequence "1"^^xsd:integer . -gn:trait_bxd_10001 dct:isReferencedBy pubmed:11438585 . -``` - diff --git a/rdf-documentation/dump-probeset-metadata.md b/rdf-documentation/dump-probeset-metadata.md deleted file mode 100644 index 37aa2c5..0000000 --- a/rdf-documentation/dump-probeset-metadata.md +++ /dev/null @@ -1,58 +0,0 @@ -# Probeset Metadata -## 'dump-probeset-metadata' - -## Generated Triples: - -The following SQL query was executed: - -```sql -SELECT ProbeSetFreeze.Name, IFNULL(ProbeSet.Name, ProbeSet.Id) AS name, ProbeSetFreeze.Name, IFNULL(ProbeSetXRef.mean, '') AS mean, IFNULL(ProbeSetXRef.se, '') AS se, ProbeSetXRef.Locus, IFNULL(ProbeSetXRef.LRS, '') AS LRS, IFNULL(ProbeSetXRef.pValue, '') AS pValue, IFNULL(ProbeSetXRef.additive, '') AS additive, IFNULL(ProbeSetXRef.h2, '') AS h2 FROM ProbeSetXRef LEFT JOIN ProbeSet ON ProbeSetXRef.ProbeSetId = ProbeSet.Id LEFT JOIN ProbeSetFreeze ON ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id WHERE ProbeSetFreeze.public > 0 AND ProbeSetFreeze.confidentiality < 1 -``` - -The above query results to triples that have the form: - -```text -gn:Probesetfreeze_name_ -> rdf:type -> gnc:probesetData -gn:Probesetfreeze_name_ -> gnt:hasProbeset -> probeset:name -gn:Probesetfreeze_name_ -> gnt:probesetOfDataset -> probeset:ProbeSetFreeze_Name_ -gn:Probesetfreeze_name_ -> gnt:mean -> "mean"^^xsd:double -gn:Probesetfreeze_name_ -> gnt:se -> "se"^^xsd:double -gn:Probesetfreeze_name_ -> gnt:locus -> ProbeSetXRef(Locus) -gn:Probesetfreeze_name_ -> gn:LRS -> "LRS"^^xsd:double -gn:Probesetfreeze_name_ -> gnt:pValue -> "pValue"^^xsd:double -gn:Probesetfreeze_name_ -> gnt:additive -> "additive"^^xsd:double -gn:Probesetfreeze_name_ -> gnt:h2 -> "h2"^^xsd:float -``` -Here's an example query: - -```sparql -PREFIX gn: -PREFIX gnc: -PREFIX gnt: -PREFIX rdf: -PREFIX rdfs: -PREFIX xsd: - -SELECT * WHERE { - ?s rdf:type gnc:probesetData . - ?s gnt:hasProbeset probeset:100001_at . - ?s gnt:probesetOfDataset probeset:HC_U_0304_R . - ?s gnt:mean #{"8.14033666666667"^^xsd:double}# . - ?s ?p ?o . -} -``` - -Expected Result: - -```rdf -gn:Hc_u_0304_r rdf:type gnc:probesetData . -gn:Hc_u_0304_r gnt:hasProbeset probeset:100001_at . -gn:Hc_u_0304_r gnt:probesetOfDataset probeset:HC_U_0304_R . -gn:Hc_u_0304_r gnt:mean "8.14033666666667"^^xsd:double . -gn:Hc_u_0304_r gnt:se "0.023595817125580502"^^xsd:double . -gn:Hc_u_0304_r gnt:locus "rsm10000021399" . -gn:Hc_u_0304_r gn:LRS "12.2805314427567"^^xsd:double . -gn:Hc_u_0304_r gnt:pValue "0.118"^^xsd:double . -gn:Hc_u_0304_r gnt:additive "0.0803547619047631"^^xsd:double . -``` - diff --git a/rdf-documentation/dump-probeset-summary-stats.md b/rdf-documentation/dump-probeset-summary-stats.md deleted file mode 100644 index 422513b..0000000 --- a/rdf-documentation/dump-probeset-summary-stats.md +++ /dev/null @@ -1,60 +0,0 @@ -# Probeset Summary Statistics -## 'dump-probeset-data' - -## Generated Triples: - -The following SQL query was executed: - -```sql -SELECT CONCAT(ProbeSetFreeze.Name, '_', IF(NULLIF(TRIM(ProbeSet.Name), ProbeSet.Id) IS NULL, '', TRIM(ProbeSet.Name))) AS probesetData, IF(NULLIF(TRIM(ProbeSet.Name), '') IS NULL, '', TRIM(ProbeSet.Name)) AS ProbeSetIdName, ProbeSet.Id, IFNULL(ProbeSetXRef.mean, '') AS mean, ProbeSetXRef.Locus, IFNULL(ProbeSetXRef.LRS, '') AS lrs, IFNULL(ProbeSetXRef.additive, '') AS additive, IFNULL(ProbeSetXRef.se, '') AS stdErr, IFNULL(ProbeSetXRef.pValue, '') AS pValue, IFNULL(ProbeSetXRef.h2, '') AS h2, ProbeSetFreeze.Name FROM ProbeSetXRef LEFT JOIN ProbeSet ON ProbeSetXRef.ProbeSetId = ProbeSet.Id LEFT JOIN ProbeSetFreeze ON ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id WHERE ProbeSetFreeze.public > 0 AND ProbeSetFreeze.confidentiality < 1 -``` - -The above query results to triples that have the form: - -```text -gn:Probesetdata -> rdf:type -> gnc:probesetStatistics -gn:Probesetdata -> gnt:hasProbeSet -> gn:probeset_probesetidname -gn:Probesetdata -> gnt:mean -> "mean"^^xsd:double -gn:Probesetdata -> gnt:locus -> ProbeSetXRef(Locus) -gn:Probesetdata -> gnt:LRS -> "lrs"^^xsd:double -gn:Probesetdata -> gnt:additive -> "additive"^^xsd:double -gn:Probesetdata -> gnt:stdErr -> "stdErr"^^xsd:double -gn:Probesetdata -> gnt:pValue -> "pValue"^^xsd:double -gn:Probesetdata -> gnt:h2 -> "h2"^^xsd:double -gn:Probesetdata -> gnt:belongsToDataset -> gn:Probesetfreeze_name_ -``` -Here's an example query: - -```sparql -PREFIX gn: -PREFIX gnc: -PREFIX gnt: -PREFIX skos: -PREFIX owl: -PREFIX rdf: -PREFIX rdfs: -PREFIX xsd: - -SELECT * WHERE { - ?s rdf:type gnc:probesetStatistics . - ?s gnt:hasProbeSet gn:probeset_100001_at . - ?s gnt:mean #{"8.14033666666667"^^xsd:double}# . - ?s gnt:locus "rsm10000021399" . - ?s ?p ?o . -} -``` - -Expected Result: - -```rdf -gn:Hc_u_0304_r_100001_at rdf:type gnc:probesetStatistics . -gn:Hc_u_0304_r_100001_at gnt:hasProbeSet gn:probeset_100001_at . -gn:Hc_u_0304_r_100001_at gnt:mean "8.14033666666667"^^xsd:double . -gn:Hc_u_0304_r_100001_at gnt:locus "rsm10000021399" . -gn:Hc_u_0304_r_100001_at gnt:LRS "12.2805314427567"^^xsd:double . -gn:Hc_u_0304_r_100001_at gnt:additive "0.0803547619047631"^^xsd:double . -gn:Hc_u_0304_r_100001_at gnt:stdErr "0.023595817125580502"^^xsd:double . -gn:Hc_u_0304_r_100001_at gnt:pValue "0.118"^^xsd:double . -gn:Hc_u_0304_r_100001_at gnt:belongsToDataset gn:Hc_u_0304_r . -``` - diff --git a/rdf-documentation/dump-probeset.md b/rdf-documentation/dump-probeset.md deleted file mode 100644 index d5729cf..0000000 --- a/rdf-documentation/dump-probeset.md +++ /dev/null @@ -1,95 +0,0 @@ -# ProbeSet Metadata -## 'dump-probeset' - -## Generated Triples: - -The following SQL query was executed: - -```sql -SELECT IF(NULLIF(TRIM(ProbeSet.Name), '') IS NULL, '', TRIM(ProbeSet.Name)) AS ProbeSetIdName, ProbeSet.Id, ProbeSet.Name, ProbeSet.alias, IFNULL(GeneChip.Name, '') AS GeneChipName, NULLIF(TRIM(ProbeSet.TargetId), '') AS TargetId, ProbeSet.Symbol, ProbeSet.description, NULLIF(TRIM(ProbeSet.Probe_set_target_region), '') AS Probe_set_target_region, ProbeSet.Chr, IFNULL(ProbeSet.Mb, '') AS Mb, IFNULL(ProbeSet.Mb_mm8, '') AS Mb_mm8, IFNULL(ProbeSet.Mb_2016, '') AS Mb_2016, IFNULL(ProbeSet.Probe_set_specificity, '') AS Probe_set_specificity, IFNULL(ProbeSet.Probe_set_BLAT_score, '') AS Probe_set_BLAT_score, IFNULL(ProbeSet.Probe_set_Blat_Mb_start, '') AS Probe_set_Blat_Mb_start, IFNULL(ProbeSet.Probe_set_Blat_Mb_start_2016, '') AS Probe_set_Blat_Mb_start_2016, IFNULL(ProbeSet.Probe_set_Blat_Mb_end, '') AS Probe_set_Blat_Mb_end, IFNULL(ProbeSet.Probe_set_Blat_Mb_start_2016, '') AS Probe_set_Blat_Mb_start_2016, ProbeSet.BlatSeq, ProbeSet.TargetSeq, IFNULL(ProbeSet.HomoloGeneID, '') AS HomoloGeneID, IFNULL(ProbeSet.UniProtID, '') AS UniProtID, IFNULL(ProbeSet.PubChem_ID, '') AS PubChem_ID, IFNULL(ProbeSet.KEGG_ID, '') AS KEGG_ID, IFNULL(ProbeSet.OMIM, '') AS OMIM, IFNULL(ProbeSet.ChEBI_ID, '') AS ChEBI_ID FROM ProbeSet LEFT JOIN GeneChip ON GeneChip.Id = ProbeSet.ChipId -``` - -The above query results to triples that have the form: - -```text -gn:probeset_probesetidname -> rdf:type -> gnc:probeset -gn:probeset_probesetidname -> rdfs:label -> ProbeSet(Name) -gn:probeset_probesetidname -> skos:altLabel -> ProbeSet(alias) -gn:probeset_probesetidname -> gnt:hasChip -> gn:platform_genechipname -gn:probeset_probesetidname -> gnt:hasTargetId -> TargetId -gn:probeset_probesetidname -> gnt:symbol -> ProbeSet(Symbol) -gn:probeset_probesetidname -> dct:description -> ProbeSetdescription -gn:probeset_probesetidname -> gnt:targetsRegion -> Probe_set_target_region -gn:probeset_probesetidname -> gnt:chr -> ProbeSet(Chr) -gn:probeset_probesetidname -> gnt:mb -> "Mb"^^xsd:double -gn:probeset_probesetidname -> gnt:mbMm8 -> "Mb_mm8"^^xsd:double -gn:probeset_probesetidname -> gnt:mb2016 -> "Mb_2016"^^xsd:double -gn:probeset_probesetidname -> gnt:hasSpecificity -> Probe_set_specificity -gn:probeset_probesetidname -> gnt:hasBlatScore -> Probe_set_BLAT_score -gn:probeset_probesetidname -> gnt:hasBlatMbStart -> "Probe_set_Blat_Mb_start"^^xsd:double -gn:probeset_probesetidname -> gnt:hasBlatMbStart2016 -> "Probe_set_Blat_Mb_start_2016"^^xsd:double -gn:probeset_probesetidname -> gnt:hasBlatMbEnd -> "Probe_set_Blat_Mb_end"^^xsd:double -gn:probeset_probesetidname -> gnt:hasBlatMbEnd2016 -> "Probe_set_Blat_Mb_start_2016"^^xsd:double -gn:probeset_probesetidname -> gnt:hasBlatSeq -> ProbeSetBlatSeq -gn:probeset_probesetidname -> gnt:hasTargetSeq -> ProbeSetTargetSeq -gn:probeset_probesetidname -> gnt:hasHomologeneId -> homologene:HomoloGeneID -gn:probeset_probesetidname -> gnt:hasUniprotId -> uniprot:UniProtID -gn:probeset_probesetidname -> gnt:hasPubChemId -> pubchem:PubChem_ID -gn:probeset_probesetidname -> gnt:hasKeggId -> kegg:KEGG_ID -gn:probeset_probesetidname -> gnt:hasOmimId -> -gn:probeset_probesetidname -> gnt:hasChebiId -> chebi:ChEBI_ID -``` -Here's an example query: - -```sparql -PREFIX gn: -PREFIX probeset: -PREFIX gnc: -PREFIX gnt: -PREFIX rdf: -PREFIX kegg: -PREFIX pubchem: -PREFIX omim: -PREFIX rdfs: -PREFIX uniprot: -PREFIX chebi: -PREFIX dct: -PREFIX owl: -PREFIX homologene: -PREFIX xsd: -PREFIX skos: - -SELECT * WHERE { - ?s rdf:type gnc:probeset . - ?s rdfs:label "100001_at" . - ?s skos:altLabel "T3g; Ctg3; Ctg-3" . - ?s gnt:hasChip gn:platform_mg_u74av2 . - ?s ?p ?o . -} -``` - -Expected Result: - -```rdf -gn:probeset_100001_at rdf:type gnc:probeset . -gn:probeset_100001_at rdfs:label "100001_at" . -gn:probeset_100001_at skos:altLabel "T3g; Ctg3; Ctg-3" . -gn:probeset_100001_at gnt:hasChip gn:platform_mg_u74av2 . -gn:probeset_100001_at gnt:symbol "Cd3g" . -gn:probeset_100001_at dct:description "CD3d antigen, gamma polypeptide" . -gn:probeset_100001_at gnt:chr "9" . -gn:probeset_100001_at gnt:mb "44.970689"^^xsd:double . -gn:probeset_100001_at gnt:mbMm8 "44.721684"^^xsd:double . -gn:probeset_100001_at gnt:mb2016 "44.778772"^^xsd:double . -gn:probeset_100001_at gnt:hasSpecificity "9.3" . -gn:probeset_100001_at gnt:hasBlatScore "186" . -gn:probeset_100001_at gnt:hasBlatMbStart "44.970689"^^xsd:double . -gn:probeset_100001_at gnt:hasBlatMbStart2016 "44.778772"^^xsd:double . -gn:probeset_100001_at gnt:hasBlatMbEnd "44.971291"^^xsd:double . -gn:probeset_100001_at gnt:hasBlatMbEnd2016 "44.778772"^^xsd:double . -gn:probeset_100001_at gnt:hasBlatSeq "CTCTGTTGCAAAATGAACAGCTGTACAGCCCCTCAAGGACCGGGAATATGACCAGTACAGCCATCTCCAAGGAAACCAACTGAGGAAGAAGTGAACTCAGCAGGACTCAGGGTGTCCCCACAATGCATTTTGGAGAGAGCCCAGACTGCAAGCAGAGAGGAAGAACTGAGGAAAACAAGCACAGCGTGGTGTT" . -gn:probeset_100001_at gnt:hasTargetSeq "ctctgttgcaaaatgaacagctgtaccagcccctcaaggaccgggaatatgaccagtacagccatctccaaggaaaccaactgaggaagaagtgaactcagcaggactcagggtgtccccccttntatccagcacccagaatcaaaacaatgcattttggagagagcccagtagagagattttcaaccctacaggtagactgcaagcagagaggaagaactgtcaaagaaattttggtcttttttttttttttnncaaaataaaataaaagcttggaggagccagtggtatgantnnnnnntgnancanttgtcaaccttgtttggggttnncagcaccccacccccagaccccccaaaaaaattcagtgaaggaaaacaagcacagcgtggtgtt" . -gn:probeset_100001_at gnt:hasHomologeneId homologene:55 . -gn:probeset_100001_at gnt:hasOmimId omim:186740 . -``` - diff --git a/rdf-documentation/dump-publication.md b/rdf-documentation/dump-publication.md deleted file mode 100644 index 708e47f..0000000 --- a/rdf-documentation/dump-publication.md +++ /dev/null @@ -1,51 +0,0 @@ -# Publications Metadata -## 'dump-publication' - -## Generated Triples: - -The following SQL query was executed: - -```sql -SELECT IF(Publication.PubMed_ID IS NULL, '', CONVERT(Publication.PubMed_Id, INT)) AS pmid, Publication.Id, IFNULL(PubMed_ID, '') AS pubmedId, Publication.Title, Publication.Journal, Publication.Volume, Publication.Pages, Publication.Month, IF(Publication.Year = 0, NULL, Publication.Year) AS Year, Publication.Authors, Publication.Abstract FROM Publication -``` - -The above query results to triples that have the form: - -```text -pubmed:pmid -> rdf:type -> fabio:ResearchPaper -pubmed:pmid -> fabio:hasPubMedId -> pubmed:pubmedId -pubmed:pmid -> dct:title -> Publication(Title) -pubmed:pmid -> fabio:Journal -> Publication(Journal) -pubmed:pmid -> prism:volume -> Publication(Volume) -pubmed:pmid -> fabio:page -> Publication(Pages) -pubmed:pmid -> prism:publicationDate -> "Publication(Month)"^^xsd:gMonth -pubmed:pmid -> fabio:hasPublicationYear -> "Year"^^xsd:gYear -pubmed:pmid -> dct:abstract -> PublicationAbstract -pubmed:pmid -> dct:creator -> PublicationAuthors -``` -Here's an example query: - -```sparql -PREFIX gnt: -PREFIX fabio: -PREFIX dct: -PREFIX prism: -PREFIX gn: -PREFIX gnc: -PREFIX pubmed: -PREFIX rdfs: -PREFIX xsd: -PREFIX rdf: - -SELECT * WHERE { - ?s rdf:type fabio:ResearchPaper . - ?s ?p ?o . -} -``` - -Expected Result: - -```rdf -gn:unpublished_1 rdf:type fabio:ResearchPaper . -``` - diff --git a/rdf-documentation/dump-species-metadata.md b/rdf-documentation/dump-species-metadata.md deleted file mode 100644 index 160cf44..0000000 --- a/rdf-documentation/dump-species-metadata.md +++ /dev/null @@ -1,373 +0,0 @@ -# Species Metadata -## 'dump-inbred-set' - -## Generated Triples: - -The following SQL query was executed: - -```sql -SELECT InbredSet.Name, InbredSet.FullName, InbredSet.GeneticType, InbredSet.Family, MappingMethod.Name, InbredSet.InbredSetCode, Species.Fullname, IF ((SELECT PublishFreeze.Name FROM PublishFreeze WHERE PublishFreeze.InbredSetId = InbredSet.Id LIMIT 1) IS NOT NULL, 'Traits and Cofactors', '') AS genotypeP, IF ((SELECT GenoFreeze.Name FROM GenoFreeze WHERE GenoFreeze.InbredSetId = InbredSet.Id LIMIT 1) IS NOT NULL, 'DNA Markers and SNPs', '') AS phenotypeP, (SELECT GROUP_CONCAT(DISTINCT Tissue.Short_Name SEPARATOR'||') AS MolecularTraits FROM ProbeFreeze, ProbeSetFreeze, InbredSet, Tissue, Species WHERE ProbeFreeze.TissueId = Tissue.Id AND ProbeFreeze.InbredSetId = InbredSet.Id AND ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id ORDER BY Tissue.Name) AS molecularTrait FROM InbredSet LEFT JOIN Species ON InbredSet.SpeciesId=Species.Id LEFT JOIN MappingMethod ON InbredSet.MappingMethodId=MappingMethod.Id -``` - -The above query results to triples that have the form: - -```text -gn:Inbredset_name -> rdf:type -> gnc:inbredSet -gn:Inbredset_name -> rdfs:label -> InbredSet(FullName) -gn:Inbredset_name -> gnt:geneticType -> InbredSet(GeneticType) -gn:Inbredset_name -> gnt:family -> InbredSet(Family) -gn:Inbredset_name -> gnt:mappingMethod -> MappingMethod(Name) -gn:Inbredset_name -> gnt:code -> InbredSet(InbredSetCode) -gn:Inbredset_name -> gnt:belongsToSpecies -> gn:Species_fullname -gn:Inbredset_name -> gnt:genotype -> genotypeP -gn:Inbredset_name -> gnt:phenotype -> phenotypeP -gn:Inbredset_name -> gnt:hasTissue -> gn:tissue_moleculartrait -``` -Here's an example query: - -```sparql -PREFIX gn: -PREFIX gnc: -PREFIX owl: -PREFIX gnt: -PREFIX skos: -PREFIX rdf: -PREFIX rdfs: -PREFIX taxon: - -SELECT * WHERE { - ?s rdf:type gnc:inbredSet . - ?s rdfs:label "BXD Family" . - ?s gnt:geneticType "riset" . - ?s gnt:family "Reference Populations (replicate average, SE, N)" . - ?s ?p ?o . -} -``` - -Expected Result: - -```rdf -gn:Bxd rdf:type gnc:inbredSet . -gn:Bxd rdfs:label "BXD Family" . -gn:Bxd gnt:geneticType "riset" . -gn:Bxd gnt:family "Reference Populations (replicate average, SE, N)" . -gn:Bxd gnt:mappingMethod "BXD" . -gn:Bxd gnt:code "BXD" . -gn:Bxd gnt:belongsToSpecies gn:Mus_musculus . -gn:Bxd gnt:genotype "Traits and Cofactors" . -gn:Bxd gnt:phenotype "DNA Markers and SNPs" . -gn:Bxd gnt:hasTissue gn:tissue_a1c . -gn:Bxd gnt:hasTissue gn:tissue_acc . -gn:Bxd gnt:hasTissue gn:tissue_adr . -gn:Bxd gnt:hasTissue gn:tissue_amg . -gn:Bxd gnt:hasTissue gn:tissue_bebv . -gn:Bxd gnt:hasTissue gn:tissue_bla . -gn:Bxd gnt:hasTissue gn:tissue_brmet . -gn:Bxd gnt:hasTissue gn:tissue_brmicrorna . -gn:Bxd gnt:hasTissue gn:tissue_brn . -gn:Bxd gnt:hasTissue gn:tissue_cart . -gn:Bxd gnt:hasTissue gn:tissue_cb . -gn:Bxd gnt:hasTissue gn:tissue_cbc . -gn:Bxd gnt:hasTissue gn:tissue_ctx . -gn:Bxd gnt:hasTissue gn:tissue_dfc . -gn:Bxd gnt:hasTissue gn:tissue_drg . -gn:Bxd gnt:hasTissue gn:tissue_ec . -gn:Bxd gnt:hasTissue gn:tissue_emb . -gn:Bxd gnt:hasTissue gn:tissue_eye . -gn:Bxd gnt:hasTissue gn:tissue_fat . -gn:Bxd gnt:hasTissue gn:tissue_fecmet . -gn:Bxd gnt:hasTissue gn:tissue_femur . -gn:Bxd gnt:hasTissue gn:tissue_gtex_aor . -gn:Bxd gnt:hasTissue gn:tissue_gtex_atr . -gn:Bxd gnt:hasTissue gn:tissue_gtex_blo . -gn:Bxd gnt:hasTissue gn:tissue_gtex_bonm . -gn:Bxd gnt:hasTissue gn:tissue_gtex_bre . -gn:Bxd gnt:hasTissue gn:tissue_gtex_cau . -gn:Bxd gnt:hasTissue gn:tissue_gtex_cer . -gn:Bxd gnt:hasTissue gn:tissue_gtex_cerv . -gn:Bxd gnt:hasTissue gn:tissue_gtex_cml . -gn:Bxd gnt:hasTissue gn:tissue_gtex_col . -gn:Bxd gnt:hasTissue gn:tissue_gtex_colsig . -gn:Bxd gnt:hasTissue gn:tissue_gtex_cor . -gn:Bxd gnt:hasTissue gn:tissue_gtex_ebv . -gn:Bxd gnt:hasTissue gn:tissue_gtex_eso . -gn:Bxd gnt:hasTissue gn:tissue_gtex_esogas . -gn:Bxd gnt:hasTissue gn:tissue_gtex_fal . -gn:Bxd gnt:hasTissue gn:tissue_gtex_fro . -gn:Bxd gnt:hasTissue gn:tissue_gtex_muc . -gn:Bxd gnt:hasTissue gn:tissue_gtex_ner . -gn:Bxd gnt:hasTissue gn:tissue_gtex_pan . -gn:Bxd gnt:hasTissue gn:tissue_gtex_put . -gn:Bxd gnt:hasTissue gn:tissue_gtex_sintter . -gn:Bxd gnt:hasTissue gn:tissue_gtex_skinex . -gn:Bxd gnt:hasTissue gn:tissue_gtex_skisex . -gn:Bxd gnt:hasTissue gn:tissue_gtex_sn . -gn:Bxd gnt:hasTissue gn:tissue_gtex_sto . -gn:Bxd gnt:hasTissue gn:tissue_gtex_sub . -gn:Bxd gnt:hasTissue gn:tissue_gtex_tf . -gn:Bxd gnt:hasTissue gn:tissue_gtex_thy . -gn:Bxd gnt:hasTissue gn:tissue_gtex_tib . -gn:Bxd gnt:hasTissue gn:tissue_gtex_vag . -gn:Bxd gnt:hasTissue gn:tissue_gtex_ven . -gn:Bxd gnt:hasTissue gn:tissue_gtex_vis . -gn:Bxd gnt:hasTissue gn:tissue_gtex_who . -gn:Bxd gnt:hasTissue gn:tissue_gut . -gn:Bxd gnt:hasTissue gn:tissue_hea . -gn:Bxd gnt:hasTissue gn:tissue_hip . -gn:Bxd gnt:hasTissue gn:tissue_hippreccel . -gn:Bxd gnt:hasTissue gn:tissue_hipprot . -gn:Bxd gnt:hasTissue gn:tissue_hip_mirna . -gn:Bxd gnt:hasTissue gn:tissue_hsc . -gn:Bxd gnt:hasTissue gn:tissue_hyp . -gn:Bxd gnt:hasTissue gn:tissue_ifra_ctx . -gn:Bxd gnt:hasTissue gn:tissue_ipc . -gn:Bxd gnt:hasTissue gn:tissue_isl . -gn:Bxd gnt:hasTissue gn:tissue_itc . -gn:Bxd gnt:hasTissue gn:tissue_kid . -gn:Bxd gnt:hasTissue gn:tissue_lathab . -gn:Bxd gnt:hasTissue gn:tissue_lcm_brreg . -gn:Bxd gnt:hasTissue gn:tissue_leaf . -gn:Bxd gnt:hasTissue gn:tissue_liv . -gn:Bxd gnt:hasTissue gn:tissue_livdnam . -gn:Bxd gnt:hasTissue gn:tissue_livmet . -gn:Bxd gnt:hasTissue gn:tissue_livpro . -gn:Bxd gnt:hasTissue gn:tissue_lung . -gn:Bxd gnt:hasTissue gn:tissue_m1c . -gn:Bxd gnt:hasTissue gn:tissue_mam . -gn:Bxd gnt:hasTissue gn:tissue_mamtum . -gn:Bxd gnt:hasTissue gn:tissue_mbr . -gn:Bxd gnt:hasTissue gn:tissue_md . -gn:Bxd gnt:hasTissue gn:tissue_methyl . -gn:Bxd gnt:hasTissue gn:tissue_mfc . -gn:Bxd gnt:hasTissue gn:tissue_musmet . -gn:Bxd gnt:hasTissue gn:tissue_nac . -gn:Bxd gnt:hasTissue gn:tissue_nbcb . -gn:Bxd gnt:hasTissue gn:tissue_neutrophil . -gn:Bxd gnt:hasTissue gn:tissue_ocl . -gn:Bxd gnt:hasTissue gn:tissue_ofc . -gn:Bxd gnt:hasTissue gn:tissue_of_ctx . -gn:Bxd gnt:hasTissue gn:tissue_ova . -gn:Bxd gnt:hasTissue gn:tissue_pcg . -gn:Bxd gnt:hasTissue gn:tissue_pfc . -gn:Bxd gnt:hasTissue gn:tissue_pg . -gn:Bxd gnt:hasTissue gn:tissue_pln . -gn:Bxd gnt:hasTissue gn:tissue_pl_ctx . -gn:Bxd gnt:hasTissue gn:tissue_pons . -gn:Bxd gnt:hasTissue gn:tissue_pro . -gn:Bxd gnt:hasTissue gn:tissue_ret . -gn:Bxd gnt:hasTissue gn:tissue_ret_mirna . -gn:Bxd gnt:hasTissue gn:tissue_ret_sc-rna-s . -gn:Bxd gnt:hasTissue gn:tissue_s1c . -gn:Bxd gnt:hasTissue gn:tissue_sal . -gn:Bxd gnt:hasTissue gn:tissue_sg . -gn:Bxd gnt:hasTissue gn:tissue_skm . -gn:Bxd gnt:hasTissue gn:tissue_spi . -gn:Bxd gnt:hasTissue gn:tissue_spl . -gn:Bxd gnt:hasTissue gn:tissue_stc . -gn:Bxd gnt:hasTissue gn:tissue_str . -gn:Bxd gnt:hasTissue gn:tissue_tc . -gn:Bxd gnt:hasTissue gn:tissue_tes . -gn:Bxd gnt:hasTissue gn:tissue_tes_dna_met . -gn:Bxd gnt:hasTissue gn:tissue_thelp . -gn:Bxd gnt:hasTissue gn:tissue_thy . -gn:Bxd gnt:hasTissue gn:tissue_treg . -gn:Bxd gnt:hasTissue gn:tissue_ute . -gn:Bxd gnt:hasTissue gn:tissue_v1 . -gn:Bxd gnt:hasTissue gn:tissue_vfc . -gn:Bxd gnt:hasTissue gn:tissue_vta . -gn:Bxd gnt:hasTissue gn:tissue_wb . -gn:Bxd gnt:hasTissue gn:tissue_wbc . -gn:Bxd gnt:hasTissue gn:tissue_wbpr . -gn:Bxd gnt:hasTissue gn:tissue_wfat . -gn:Bxd gnt:hasTissue gn:tissue_wfat_pro . -``` - - -## 'dump-species' - -## Generated Triples: - -The following SQL query was executed: - -```sql -SELECT Species.Fullname, Species.SpeciesName, Species.Name, Species.MenuName, Species.FullName, Species.Family, Species.TaxonomyId FROM Species -``` - -The above query results to triples that have the form: - -```text -gn:Species_fullname -> rdf:type -> gnc:species -gn:Species_fullname -> skos:label -> Species(SpeciesName) -gn:Species_fullname -> skos:altLabel -> Species(Name) -gn:Species_fullname -> rdfs:label -> Species(MenuName) -gn:Species_fullname -> gnt:binomialName -> Species(FullName) -gn:Species_fullname -> gnt:family -> Species(Family) -gn:Species_fullname -> gnt:organism -> taxon:Species(TaxonomyId) -``` -Here's an example query: - -```sparql -PREFIX gn: -PREFIX gnc: -PREFIX owl: -PREFIX gnt: -PREFIX skos: -PREFIX rdf: -PREFIX rdfs: -PREFIX taxon: - -SELECT * WHERE { - ?s rdf:type gnc:species . - ?s skos:label "Mouse" . - ?s skos:altLabel "mouse" . - ?s ?p ?o . -} -``` - -Expected Result: - -```rdf -gn:Mus_musculus rdf:type gnc:species . -gn:Mus_musculus skos:label "Mouse" . -gn:Mus_musculus skos:altLabel "mouse" . -gn:Mus_musculus rdfs:label "Mouse (Mus musculus, mm10)" . -gn:Mus_musculus gnt:binomialName "Mus musculus" . -gn:Mus_musculus gnt:family "Vertebrates" . -gn:Mus_musculus gnt:organism taxon:10090 . -``` - - -## 'dump-strain' - -## Generated Triples: - -The following SQL query was executed: - -```sql -SELECT Strain.Name, Species.Fullname, Strain.Name, IF ((Strain.Name2 != Strain.Name), Strain.Name2, '') AS Name2, IF ((Strain.Alias != Strain.Name), Strain.Alias, '') AS Alias, IF ((Strain.Symbol != Strain.Name), Strain.Symbol, '') AS Symbol FROM Strain LEFT JOIN Species ON Strain.SpeciesId = Species.SpeciesId -``` - -The above query results to triples that have the form: - -```text -gn:Strain_name_ -> rdf:type -> gnc:strain -gn:Strain_name_ -> gnt:belongsToSpecies -> gn:Species_fullname -gn:Strain_name_ -> rdfs:label -> StrainName -gn:Strain_name_ -> rdfs:label -> Name2 -gn:Strain_name_ -> gnt:alias -> Alias -gn:Strain_name_ -> gnt:symbol -> Symbol -``` -Here's an example query: - -```sparql -PREFIX gn: -PREFIX gnc: -PREFIX owl: -PREFIX gnt: -PREFIX skos: -PREFIX rdf: -PREFIX rdfs: -PREFIX taxon: - -SELECT * WHERE { - ?s rdf:type gnc:strain . - ?s gnt:belongsToSpecies gn:Mus_musculus . - ?s rdfs:label "B6D2F1" . - ?s ?p ?o . -} -``` - -Expected Result: - -```rdf -gn:B6d2f1 rdf:type gnc:strain . -gn:B6d2f1 gnt:belongsToSpecies gn:Mus_musculus . -gn:B6d2f1 rdfs:label "B6D2F1" . -``` - - -## 'dump-mapping-method' - -## Generated Triples: - -The following SQL query was executed: - -```sql -SELECT MappingMethod.Name, MappingMethod.Name FROM MappingMethod -``` - -The above query results to triples that have the form: - -```text -gn:mappingMethod_mappingmethod_name -> rdf:type -> gnc:mappingMethod -gn:mappingMethod_mappingmethod_name -> rdfs:label -> MappingMethod(Name) -``` -Here's an example query: - -```sparql -PREFIX gn: -PREFIX gnc: -PREFIX owl: -PREFIX gnt: -PREFIX skos: -PREFIX rdf: -PREFIX rdfs: -PREFIX taxon: - -SELECT * WHERE { - ?s rdf:type gnc:mappingMethod . - ?s rdfs:label "qtlreaper" . - ?s ?p ?o . -} -``` - -Expected Result: - -```rdf -gn:mappingMethod_qtlreaper rdf:type gnc:mappingMethod . -gn:mappingMethod_qtlreaper rdfs:label "qtlreaper" . -``` - - -## 'dump-avg-method' - -## Generated Triples: - -The following SQL query was executed: - -```sql -SELECT AvgMethod.Name, AvgMethod.Normalization FROM AvgMethod -``` - -The above query results to triples that have the form: - -```text -gn:avgmethod_avgmethod_name -> rdf:type -> gnc:avgMethod -gn:avgmethod_avgmethod_name -> rdfs:label -> AvgMethod(Normalization) -``` -Here's an example query: - -```sparql -PREFIX gn: -PREFIX gnc: -PREFIX owl: -PREFIX gnt: -PREFIX skos: -PREFIX rdf: -PREFIX rdfs: -PREFIX taxon: - -SELECT * WHERE { - ?s rdf:type gnc:avgMethod . - ?s rdfs:label "MAS5" . - ?s ?p ?o . -} -``` - -Expected Result: - -```rdf -gn:avgmethod_mas5 rdf:type gnc:avgMethod . -gn:avgmethod_mas5 rdfs:label "MAS5" . -``` - diff --git a/rdf-documentation/dump-tissue.md b/rdf-documentation/dump-tissue.md deleted file mode 100644 index b7f45d7..0000000 --- a/rdf-documentation/dump-tissue.md +++ /dev/null @@ -1,41 +0,0 @@ -# Tissue Metadata -## 'dump-tissue' - -## Generated Triples: - -The following SQL query was executed: - -```sql -SELECT Tissue.Short_Name, Tissue.Name FROM Tissue -``` - -The above query results to triples that have the form: - -```text -gn:tissue_tissue_short_name -> rdf:type -> gnc:tissue -gn:tissue_tissue_short_name -> rdfs:label -> Tissue(Name) -``` -Here's an example query: - -```sparql -PREFIX gn: -PREFIX gnt: -PREFIX skos: -PREFIX gnc: -PREFIX rdf: -PREFIX rdfs: - -SELECT * WHERE { - ?s rdf:type gnc:tissue . - ?s rdfs:label "Brain mRNA" . - ?s ?p ?o . -} -``` - -Expected Result: - -```rdf -gn:tissue_brn rdf:type gnc:tissue . -gn:tissue_brn rdfs:label "Brain mRNA" . -``` - diff --git a/rdf-documentation/generif-metadata.md b/rdf-documentation/generif-metadata.md new file mode 100644 index 0000000..fc058bc --- /dev/null +++ b/rdf-documentation/generif-metadata.md @@ -0,0 +1,170 @@ +# GeneRIF Metadata +## 'genewiki-symbols' + +## Generated Triples: + +The following SQL query was executed: + +```sql +SELECT GeneRIF_BASIC.GeneId, GROUP_CONCAT(DISTINCT symbol) AS symbol, GROUP_CONCAT(DISTINCT Species.SpeciesName) AS species, GROUP_CONCAT(DISTINCT TaxID) AS taxId FROM GeneRIF_BASIC LEFT JOIN Species USING (SpeciesId) GROUP BY GeneId ORDER BY BINARY symbol +``` + +The above query results to triples that have the form: + +```text +generif:GeneRIF_BASIC(GeneId) -> gnt:symbol -> symbol +generif:GeneRIF_BASIC(GeneId) -> gnt:belongsToSpecies -> species +generif:GeneRIF_BASIC(GeneId) -> dct:relation -> ncbiTaxon:taxId +``` +Here's an example query: + +```sparql +PREFIX rdf: +PREFIX rdfs: +PREFIX gn: +PREFIX gnc: +PREFIX gnt: +PREFIX dct: +PREFIX foaf: +PREFIX pubmed: +PREFIX ncbiTaxon: +PREFIX generif: +PREFIX xsd: +PREFIX owl: + +SELECT * WHERE { + ?s gnt:belongsToSpecies "Human" . + ?s dct:relation ncbiTaxon:9606 . + ?s ?p ?o . +} +``` + +Expected Result: + +```rdf +generif:233 gnt:belongsToSpecies "Human" . +generif:233 dct:relation ncbiTaxon:9606 . +``` + + +## 'gn-genewiki-entries' + +## Generated Triples: + +The following SQL query was executed: + +```sql +SELECT Species.FullName, GeneRIF.comment, GeneRIF.symbol, GeneRIF.createtime, GeneRIF.comment, GeneRIF.symbol, GROUP_CONCAT(DISTINCT GeneCategory.Name SEPARATOR '$$') AS GeneCategory, Species.Fullname, IFNULL(GeneRIF.PubMed_ID, '') AS PubMed_ID, GeneRIF.createtime, GeneRIF.email, Investigators.Email, Investigators.FirstName, Investigators.LastName, Investigators.Email, GeneRIF.weburl FROM GeneRIF LEFT JOIN Species ON Species.SpeciesId = GeneRIF.SpeciesId LEFT JOIN GeneRIFXRef ON GeneRIFXRef.GeneRIFId = GeneRIF.Id LEFT JOIN GeneCategory ON GeneRIFXRef.GeneCategoryId = GeneCategory.Id LEFT JOIN Investigators ON Investigators.Email = GeneRIF.email WHERE GeneRIF.display > 0 AND GeneRIF.VersionId = 0 AND GeneRIF.comment IS NOT NULL GROUP BY GeneRIF.comment, GeneRIF.createtime +``` + +The above query results to triples that have the form: + +```text +gn:generif_e72e92f4-59b5-3bbd-ac46-a39a23f25e55 -> rdf:type -> gnc:GNWikiEntry +gn:generif_e72e92f4-59b5-3bbd-ac46-a39a23f25e55 -> rdfs:label -> GeneRIFcomment +gn:generif_e72e92f4-59b5-3bbd-ac46-a39a23f25e55 -> gnt:symbol -> GeneRIF(symbol) +gn:generif_e72e92f4-59b5-3bbd-ac46-a39a23f25e55 -> gnt:belongsToSpecies -> gn:Species_fullname +gn:generif_e72e92f4-59b5-3bbd-ac46-a39a23f25e55 -> dct:created -> +gn:generif_e72e92f4-59b5-3bbd-ac46-a39a23f25e55 -> dct:creator -> gn:investigator_investigators_firstname_investigators_lastname_investigators_email +gn:generif_e72e92f4-59b5-3bbd-ac46-a39a23f25e55 -> foaf:homepage -> GeneRIF(weburl) +gn:generif_e72e92f4-59b5-3bbd-ac46-a39a23f25e55 -> gnt:belongsToCategory -> GeneCategory +gn:generif_e72e92f4-59b5-3bbd-ac46-a39a23f25e55 -> dct:references -> pubmed:PubMed_ID +``` +Here's an example query: + +```sparql +PREFIX rdf: +PREFIX rdfs: +PREFIX gn: +PREFIX gnc: +PREFIX gnt: +PREFIX dct: +PREFIX foaf: +PREFIX pubmed: +PREFIX ncbiTaxon: +PREFIX generif: +PREFIX xsd: +PREFIX owl: + +SELECT * WHERE { + ?s rdf:type gnc:GNWikiEntry . + ?s rdfs:label "Part 2 of the Slc9a1 wiki.\r\n\r\nThe human SLC9A1 gene was cloned and mapped to human chromosome 1p Lifton et al., 1990.\r\n\r\nThe mouse Slc9a1 gene maps to chromosome 4. Morahan et al., 1993. There are three common alleles of Slc9a1, originally detected by RFLP analyses. Each of these allelic SLC9A1 proteins have different levels of antiporter activity. Morahan et al. 1994 Remarkably, intracellular pH varies between strains based on their Slc9a1 alleles. McClive et al. 1996." . + ?s gnt:symbol "Slc9a1" . + ?s gnt:belongsToSpecies gn:Mus_musculus . + ?s ?p ?o . +} +``` + +Expected Result: + +```rdf +gn:generif_beb6fe8c-d5bc-36b7-9a9f-9030f19e605f rdf:type gnc:GNWikiEntry . +gn:generif_beb6fe8c-d5bc-36b7-9a9f-9030f19e605f rdfs:label "Part 2 of the Slc9a1 wiki.\r\n\r\nThe human SLC9A1 gene was cloned and mapped to human chromosome 1p Lifton et al., 1990.\r\n\r\nThe mouse Slc9a1 gene maps to chromosome 4. Morahan et al., 1993. There are three common alleles of Slc9a1, originally detected by RFLP analyses. Each of these allelic SLC9A1 proteins have different levels of antiporter activity. Morahan et al. 1994 Remarkably, intracellular pH varies between strains based on their Slc9a1 alleles. McClive et al. 1996." . +gn:generif_beb6fe8c-d5bc-36b7-9a9f-9030f19e605f gnt:symbol "Slc9a1" . +gn:generif_beb6fe8c-d5bc-36b7-9a9f-9030f19e605f gnt:belongsToSpecies gn:Mus_musculus . +gn:generif_beb6fe8c-d5bc-36b7-9a9f-9030f19e605f dct:created "2011-06-10T12:06:30"^^xsd:datetime . +gn:generif_beb6fe8c-d5bc-36b7-9a9f-9030f19e605f dct:creator gn:investigator_grant_morahan_gem_waimr.uwa.edu.au . +gn:generif_beb6fe8c-d5bc-36b7-9a9f-9030f19e605f gnt:belongsToCategory "Biochemistry" . +gn:generif_beb6fe8c-d5bc-36b7-9a9f-9030f19e605f gnt:belongsToCategory "Genetic variation and alleles" . +gn:generif_beb6fe8c-d5bc-36b7-9a9f-9030f19e605f gnt:belongsToCategory "Physiology and function" . +gn:generif_beb6fe8c-d5bc-36b7-9a9f-9030f19e605f dct:references pubmed:094369 . +gn:generif_beb6fe8c-d5bc-36b7-9a9f-9030f19e605f dct:references pubmed:8016086 . +gn:generif_beb6fe8c-d5bc-36b7-9a9f-9030f19e605f dct:references pubmed:8550102 . +``` + + +## 'ncbi-genewiki-entries' + +## Generated Triples: + +The following SQL query was executed: + +```sql +SELECT GeneRIF_BASIC.GeneId, GeneRIF_BASIC.VersionId, GeneRIF_BASIC.comment, GeneRIF_BASIC.symbol, GeneRIF_BASIC.createtime, GeneRIF_BASIC.comment, GeneRIF_BASIC.symbol, GROUP_CONCAT(PubMed_ID) AS pmids, GeneRIF_BASIC.VersionId, GeneRIF_BASIC.createtime FROM GeneRIF_BASIC GROUP BY GeneId, comment, createtime +``` + +The above query results to triples that have the form: + +```text +gn:generif_d6552bcc-c798-34ab-a533-e23323255d1b -> rdf:type -> gnc:NCBIWikiEntry +gn:generif_d6552bcc-c798-34ab-a533-e23323255d1b -> rdfs:label -> "GeneRIF_BASIC(comment)"^^xsd:string +gn:generif_d6552bcc-c798-34ab-a533-e23323255d1b -> gnt:symbol -> GeneRIF_BASIC(symbol) +gn:generif_d6552bcc-c798-34ab-a533-e23323255d1b -> gnt:hasVersion -> GeneRIF_BASIC(VersionId) +gn:generif_d6552bcc-c798-34ab-a533-e23323255d1b -> dct:created -> +gn:generif_d6552bcc-c798-34ab-a533-e23323255d1b -> dct:references -> pubmed:pmids +``` +Here's an example query: + +```sparql +PREFIX rdf: +PREFIX rdfs: +PREFIX gn: +PREFIX gnc: +PREFIX gnt: +PREFIX dct: +PREFIX foaf: +PREFIX pubmed: +PREFIX ncbiTaxon: +PREFIX generif: +PREFIX xsd: +PREFIX owl: + +SELECT * WHERE { + ?s rdf:type gnc:NCBIWikiEntry . + ?s rdfs:label #{"A1BG-cysteine-rich secretory protein 3 complex displays a similar function in protecting the circulation from a potentially harmful effect of free CRISP-3"^^xsd:string}# . + ?s gnt:symbol "A1BG" . + ?s ?p ?o . +} +``` + +Expected Result: + +```rdf +gn:generif_fc8da76d-e68e-3622-b23a-d5a5daf06c99 rdf:type gnc:NCBIWikiEntry . +gn:generif_fc8da76d-e68e-3622-b23a-d5a5daf06c99 rdfs:label "A1BG-cysteine-rich secretory protein 3 complex displays a similar function in protecting the circulation from a potentially harmful effect of free CRISP-3"^^xsd:string . +gn:generif_fc8da76d-e68e-3622-b23a-d5a5daf06c99 gnt:symbol "A1BG" . +gn:generif_fc8da76d-e68e-3622-b23a-d5a5daf06c99 gnt:hasVersion "1" . +gn:generif_fc8da76d-e68e-3622-b23a-d5a5daf06c99 dct:created "2010-01-20T18:00:00"^^xsd:datetime . +gn:generif_fc8da76d-e68e-3622-b23a-d5a5daf06c99 dct:references pubmed:15461460 . +``` + diff --git a/rdf-documentation/genotype-metadata.md b/rdf-documentation/genotype-metadata.md new file mode 100644 index 0000000..fc2682c --- /dev/null +++ b/rdf-documentation/genotype-metadata.md @@ -0,0 +1,64 @@ +# Genotype Metadata +## 'genotypes' + +## Generated Triples: + +The following SQL query was executed: + +```sql +SELECT Geno.Name, Geno.Name, Geno.Chr, IFNULL(Geno.Mb, '') AS Mb, IFNULL(Geno.Mb_mm8, '') AS Mb_mm8, IFNULL(Geno.Mb_2016, '') AS Mb_2016, Geno.Sequence, Geno.Source, IF((Source2 = Source), NULL, Source2) AS Source2, Species.Fullname, Geno.chr_num, Geno.Comments FROM Geno LEFT JOIN Species USING (SpeciesId) +``` + +The above query results to triples that have the form: + +```text +gn:Geno_name_ -> rdf:type -> gnc:genotype +gn:Geno_name_ -> skos:prefLabel -> GenoName +gn:Geno_name_ -> gnt:chr -> Geno(Chr) +gn:Geno_name_ -> gnt:mb -> "Mb"^^xsd:double +gn:Geno_name_ -> gnt:mbMm8 -> "Mb_mm8"^^xsd:double +gn:Geno_name_ -> gnt:mb2016 -> "Mb_2016"^^xsd:double +gn:Geno_name_ -> gnt:hasSequence -> Geno(Sequence) +gn:Geno_name_ -> gnt:hasSource -> Geno(Source) +gn:Geno_name_ -> gnt:hasAltSourceName -> Source2 +gn:Geno_name_ -> gnt:belongsToSpecies -> gn:Species_fullname +gn:Geno_name_ -> gnt:chrNum -> "Geno(chr_num)"^^xsd:int +gn:Geno_name_ -> rdfs:comments -> Geno(Comments) +``` +Here's an example query: + +```sparql +PREFIX dct: +PREFIX gn: +PREFIX gnc: +PREFIX gnt: +PREFIX rdf: +PREFIX rdfs: +PREFIX owl: +PREFIX skos: +PREFIX xsd: + +SELECT * WHERE { + ?s rdf:type gnc:genotype . + ?s skos:prefLabel "D1Mit296" . + ?s gnt:chr "1" . + ?s gnt:mb #{"9.749729"^^xsd:double}# . + ?s ?p ?o . +} +``` + +Expected Result: + +```rdf +gn:D1mit296 rdf:type gnc:genotype . +gn:D1mit296 skos:prefLabel "D1Mit296" . +gn:D1mit296 gnt:chr "1" . +gn:D1mit296 gnt:mb "9.749729"^^xsd:double . +gn:D1mit296 gnt:mbMm8 "9.734943"^^xsd:double . +gn:D1mit296 gnt:mb2016 "9.73981"^^xsd:double . +gn:D1mit296 gnt:hasSequence "CTTGCATGCCTGCGGNTNCGNACTCTAGAGGATCTCCCTATTATTNTNACATNACTTTNAATTAAAATAATAATCAGATAACTTCAACNNNNTGNNCACTTCTGTCAAGTGGACAGAAATAAACATAGAGCCTAATTATCCTGAATTTNAGAGAAAAGAGTGTGTTTANCACAANAGAACAGTTATAGATCTACACACACACACACACACACACACACACACACATACAGTTTGAAAAATGCATCAGTTGAGACC" . +gn:D1mit296 gnt:hasSource "Mit" . +gn:D1mit296 gnt:belongsToSpecies gn:Mus_musculus . +gn:D1mit296 gnt:chrNum "1"^^xsd:int . +``` + diff --git a/rdf-documentation/phenotype-metadata.md b/rdf-documentation/phenotype-metadata.md new file mode 100644 index 0000000..b673bb1 --- /dev/null +++ b/rdf-documentation/phenotype-metadata.md @@ -0,0 +1,71 @@ +# Phenotypes Metadata +## 'phenotypes' + +## Generated Triples: + +The following SQL query was executed: + +```sql +SELECT CONCAT(IFNULL(InbredSet.Name, PublishXRef.InbredSetId), '_', PublishXRef.Id) AS Phenotype, InbredSet.Name, PublishXRef.Id, CONCAT(IFNULL(InbredSet.Name, PublishXRef.InbredSetId), '_', PublishXRef.Id) AS Phenotype, Phenotype.Post_publication_description, Phenotype.Post_publication_abbreviation, Phenotype.Lab_code, Phenotype.Submitter, Phenotype.Owner, IFNULL(PublishXRef.mean, '') AS mean, PublishXRef.Locus, IFNULL(PublishXRef.LRS, '') AS lrs, IFNULL(PublishXRef.additive, '') AS additive, PublishXRef.Sequence, IF(Publication.PubMed_ID IS NULL, '', CONVERT(Publication.PubMed_Id, INT)) AS pmid, Publication.Id FROM PublishXRef LEFT JOIN InbredSet ON InbredSet.InbredSetId = PublishXRef.InbredSetId LEFT JOIN Publication ON Publication.Id = PublishXRef.PublicationId LEFT JOIN Phenotype ON Phenotype.Id = PublishXRef.PhenotypeId WHERE PublishXRef.InbredSetId IN (SELECT PublishFreeze.InbredSetId FROM PublishFreeze) +``` + +The above query results to triples that have the form: + +```text +gn:trait_phenotype -> rdf:type -> gnc:phenotype +gn:trait_phenotype -> gnt:belongsToSet -> gn:setInbredset_name +gn:trait_phenotype -> rdfs:label -> PublishXRef(Id) +gn:trait_phenotype -> skos:altLabel -> Phenotype +gn:trait_phenotype -> dct:description -> PhenotypePost_publication_description +gn:trait_phenotype -> gnt:abbreviation -> Phenotype(Post_publication_abbreviation) +gn:trait_phenotype -> gnt:labCode -> Phenotype(Lab_code) +gn:trait_phenotype -> gnt:submitter -> PhenotypeSubmitter +gn:trait_phenotype -> gnt:mean -> "mean"^^xsd:double +gn:trait_phenotype -> gnt:locus -> PublishXRef(Locus) +gn:trait_phenotype -> gnt:LRS -> "lrs"^^xsd:double +gn:trait_phenotype -> gnt:additive -> "additive"^^xsd:double +gn:trait_phenotype -> gnt:sequence -> "PublishXRef(Sequence)"^^xsd:integer +gn:trait_phenotype -> dct:isReferencedBy -> pubmed:pmid +gn:trait_phenotype -> dct:contributor -> PhenotypeOwner +``` +Here's an example query: + +```sparql +PREFIX dct: +PREFIX gn: +PREFIX owl: +PREFIX gnc: +PREFIX gnt: +PREFIX skos: +PREFIX rdf: +PREFIX rdfs: +PREFIX xsd: +PREFIX pubmed: + +SELECT * WHERE { + ?s rdf:type gnc:phenotype . + ?s gnt:belongsToSet gn:setBxd . + ?s rdfs:label "10001" . + ?s skos:altLabel "BXD_10001" . + ?s ?p ?o . +} +``` + +Expected Result: + +```rdf +gn:trait_bxd_10001 rdf:type gnc:phenotype . +gn:trait_bxd_10001 gnt:belongsToSet gn:setBxd . +gn:trait_bxd_10001 rdfs:label "10001" . +gn:trait_bxd_10001 skos:altLabel "BXD_10001" . +gn:trait_bxd_10001 dct:description "Central nervous system, morphology: Cerebellum weight, whole, bilateral in adults of both sexes [mg]" . +gn:trait_bxd_10001 gnt:abbreviation "CBLWT2" . +gn:trait_bxd_10001 gnt:submitter "robwilliams" . +gn:trait_bxd_10001 gnt:mean "52.13529418496525"^^xsd:double . +gn:trait_bxd_10001 gnt:locus "rs48756159" . +gn:trait_bxd_10001 gnt:LRS "13.4974911471087"^^xsd:double . +gn:trait_bxd_10001 gnt:additive "2.39444435069444"^^xsd:double . +gn:trait_bxd_10001 gnt:sequence "1"^^xsd:integer . +gn:trait_bxd_10001 dct:isReferencedBy pubmed:11438585 . +``` + diff --git a/rdf-documentation/probeset-metadata.md b/rdf-documentation/probeset-metadata.md new file mode 100644 index 0000000..2387145 --- /dev/null +++ b/rdf-documentation/probeset-metadata.md @@ -0,0 +1,95 @@ +# ProbeSet Metadata +## 'probeset' + +## Generated Triples: + +The following SQL query was executed: + +```sql +SELECT IF(NULLIF(TRIM(ProbeSet.Name), '') IS NULL, '', TRIM(ProbeSet.Name)) AS ProbeSetIdName, ProbeSet.Id, ProbeSet.Name, ProbeSet.alias, IFNULL(GeneChip.Name, '') AS GeneChipName, NULLIF(TRIM(ProbeSet.TargetId), '') AS TargetId, ProbeSet.Symbol, ProbeSet.description, NULLIF(TRIM(ProbeSet.Probe_set_target_region), '') AS Probe_set_target_region, ProbeSet.Chr, IFNULL(ProbeSet.Mb, '') AS Mb, IFNULL(ProbeSet.Mb_mm8, '') AS Mb_mm8, IFNULL(ProbeSet.Mb_2016, '') AS Mb_2016, IFNULL(ProbeSet.Probe_set_specificity, '') AS Probe_set_specificity, IFNULL(ProbeSet.Probe_set_BLAT_score, '') AS Probe_set_BLAT_score, IFNULL(ProbeSet.Probe_set_Blat_Mb_start, '') AS Probe_set_Blat_Mb_start, IFNULL(ProbeSet.Probe_set_Blat_Mb_start_2016, '') AS Probe_set_Blat_Mb_start_2016, IFNULL(ProbeSet.Probe_set_Blat_Mb_end, '') AS Probe_set_Blat_Mb_end, IFNULL(ProbeSet.Probe_set_Blat_Mb_start_2016, '') AS Probe_set_Blat_Mb_start_2016, ProbeSet.BlatSeq, ProbeSet.TargetSeq, IFNULL(ProbeSet.HomoloGeneID, '') AS HomoloGeneID, IFNULL(ProbeSet.UniProtID, '') AS UniProtID, IFNULL(ProbeSet.PubChem_ID, '') AS PubChem_ID, IFNULL(ProbeSet.KEGG_ID, '') AS KEGG_ID, IFNULL(ProbeSet.OMIM, '') AS OMIM, IFNULL(ProbeSet.ChEBI_ID, '') AS ChEBI_ID FROM ProbeSet LEFT JOIN GeneChip ON GeneChip.Id = ProbeSet.ChipId +``` + +The above query results to triples that have the form: + +```text +gn:probeset_probesetidname -> rdf:type -> gnc:probeset +gn:probeset_probesetidname -> rdfs:label -> ProbeSet(Name) +gn:probeset_probesetidname -> skos:altLabel -> ProbeSet(alias) +gn:probeset_probesetidname -> gnt:hasChip -> gn:platform_genechipname +gn:probeset_probesetidname -> gnt:hasTargetId -> TargetId +gn:probeset_probesetidname -> gnt:symbol -> ProbeSet(Symbol) +gn:probeset_probesetidname -> dct:description -> ProbeSetdescription +gn:probeset_probesetidname -> gnt:targetsRegion -> Probe_set_target_region +gn:probeset_probesetidname -> gnt:chr -> ProbeSet(Chr) +gn:probeset_probesetidname -> gnt:mb -> "Mb"^^xsd:double +gn:probeset_probesetidname -> gnt:mbMm8 -> "Mb_mm8"^^xsd:double +gn:probeset_probesetidname -> gnt:mb2016 -> "Mb_2016"^^xsd:double +gn:probeset_probesetidname -> gnt:hasSpecificity -> Probe_set_specificity +gn:probeset_probesetidname -> gnt:hasBlatScore -> Probe_set_BLAT_score +gn:probeset_probesetidname -> gnt:hasBlatMbStart -> "Probe_set_Blat_Mb_start"^^xsd:double +gn:probeset_probesetidname -> gnt:hasBlatMbStart2016 -> "Probe_set_Blat_Mb_start_2016"^^xsd:double +gn:probeset_probesetidname -> gnt:hasBlatMbEnd -> "Probe_set_Blat_Mb_end"^^xsd:double +gn:probeset_probesetidname -> gnt:hasBlatMbEnd2016 -> "Probe_set_Blat_Mb_start_2016"^^xsd:double +gn:probeset_probesetidname -> gnt:hasBlatSeq -> ProbeSetBlatSeq +gn:probeset_probesetidname -> gnt:hasTargetSeq -> ProbeSetTargetSeq +gn:probeset_probesetidname -> gnt:hasHomologeneId -> homologene:HomoloGeneID +gn:probeset_probesetidname -> gnt:hasUniprotId -> uniprot:UniProtID +gn:probeset_probesetidname -> gnt:hasPubChemId -> pubchem:PubChem_ID +gn:probeset_probesetidname -> gnt:hasKeggId -> kegg:KEGG_ID +gn:probeset_probesetidname -> gnt:hasOmimId -> +gn:probeset_probesetidname -> gnt:hasChebiId -> chebi:ChEBI_ID +``` +Here's an example query: + +```sparql +PREFIX gn: +PREFIX probeset: +PREFIX gnc: +PREFIX gnt: +PREFIX rdf: +PREFIX kegg: +PREFIX pubchem: +PREFIX omim: +PREFIX rdfs: +PREFIX uniprot: +PREFIX chebi: +PREFIX dct: +PREFIX owl: +PREFIX homologene: +PREFIX xsd: +PREFIX skos: + +SELECT * WHERE { + ?s rdf:type gnc:probeset . + ?s rdfs:label "100001_at" . + ?s skos:altLabel "T3g; Ctg3; Ctg-3" . + ?s gnt:hasChip gn:platform_mg_u74av2 . + ?s ?p ?o . +} +``` + +Expected Result: + +```rdf +gn:probeset_100001_at rdf:type gnc:probeset . +gn:probeset_100001_at rdfs:label "100001_at" . +gn:probeset_100001_at skos:altLabel "T3g; Ctg3; Ctg-3" . +gn:probeset_100001_at gnt:hasChip gn:platform_mg_u74av2 . +gn:probeset_100001_at gnt:symbol "Cd3g" . +gn:probeset_100001_at dct:description "CD3d antigen, gamma polypeptide" . +gn:probeset_100001_at gnt:chr "9" . +gn:probeset_100001_at gnt:mb "44.970689"^^xsd:double . +gn:probeset_100001_at gnt:mbMm8 "44.721684"^^xsd:double . +gn:probeset_100001_at gnt:mb2016 "44.778772"^^xsd:double . +gn:probeset_100001_at gnt:hasSpecificity "9.3" . +gn:probeset_100001_at gnt:hasBlatScore "186" . +gn:probeset_100001_at gnt:hasBlatMbStart "44.970689"^^xsd:double . +gn:probeset_100001_at gnt:hasBlatMbStart2016 "44.778772"^^xsd:double . +gn:probeset_100001_at gnt:hasBlatMbEnd "44.971291"^^xsd:double . +gn:probeset_100001_at gnt:hasBlatMbEnd2016 "44.778772"^^xsd:double . +gn:probeset_100001_at gnt:hasBlatSeq "CTCTGTTGCAAAATGAACAGCTGTACAGCCCCTCAAGGACCGGGAATATGACCAGTACAGCCATCTCCAAGGAAACCAACTGAGGAAGAAGTGAACTCAGCAGGACTCAGGGTGTCCCCACAATGCATTTTGGAGAGAGCCCAGACTGCAAGCAGAGAGGAAGAACTGAGGAAAACAAGCACAGCGTGGTGTT" . +gn:probeset_100001_at gnt:hasTargetSeq "ctctgttgcaaaatgaacagctgtaccagcccctcaaggaccgggaatatgaccagtacagccatctccaaggaaaccaactgaggaagaagtgaactcagcaggactcagggtgtccccccttntatccagcacccagaatcaaaacaatgcattttggagagagcccagtagagagattttcaaccctacaggtagactgcaagcagagaggaagaactgtcaaagaaattttggtcttttttttttttttnncaaaataaaataaaagcttggaggagccagtggtatgantnnnnnntgnancanttgtcaaccttgtttggggttnncagcaccccacccccagaccccccaaaaaaattcagtgaaggaaaacaagcacagcgtggtgtt" . +gn:probeset_100001_at gnt:hasHomologeneId homologene:55 . +gn:probeset_100001_at gnt:hasOmimId omim:186740 . +``` + diff --git a/rdf-documentation/publication-metadata.md b/rdf-documentation/publication-metadata.md new file mode 100644 index 0000000..018f6da --- /dev/null +++ b/rdf-documentation/publication-metadata.md @@ -0,0 +1,51 @@ +# Publications Metadata +## 'publication' + +## Generated Triples: + +The following SQL query was executed: + +```sql +SELECT IF(Publication.PubMed_ID IS NULL, '', CONVERT(Publication.PubMed_Id, INT)) AS pmid, Publication.Id, IFNULL(PubMed_ID, '') AS pubmedId, Publication.Title, Publication.Journal, Publication.Volume, Publication.Pages, Publication.Month, IF(Publication.Year = 0, NULL, Publication.Year) AS Year, Publication.Authors, Publication.Abstract FROM Publication +``` + +The above query results to triples that have the form: + +```text +pubmed:pmid -> rdf:type -> fabio:ResearchPaper +pubmed:pmid -> fabio:hasPubMedId -> pubmed:pubmedId +pubmed:pmid -> dct:title -> Publication(Title) +pubmed:pmid -> fabio:Journal -> Publication(Journal) +pubmed:pmid -> prism:volume -> Publication(Volume) +pubmed:pmid -> fabio:page -> Publication(Pages) +pubmed:pmid -> prism:publicationDate -> "Publication(Month)"^^xsd:gMonth +pubmed:pmid -> fabio:hasPublicationYear -> "Year"^^xsd:gYear +pubmed:pmid -> dct:abstract -> PublicationAbstract +pubmed:pmid -> dct:creator -> PublicationAuthors +``` +Here's an example query: + +```sparql +PREFIX gnt: +PREFIX fabio: +PREFIX dct: +PREFIX prism: +PREFIX gn: +PREFIX gnc: +PREFIX pubmed: +PREFIX rdfs: +PREFIX xsd: +PREFIX rdf: + +SELECT * WHERE { + ?s rdf:type fabio:ResearchPaper . + ?s ?p ?o . +} +``` + +Expected Result: + +```rdf +gn:unpublished_1 rdf:type fabio:ResearchPaper . +``` + diff --git a/rdf-documentation/species-metadata.md b/rdf-documentation/species-metadata.md new file mode 100644 index 0000000..79e897c --- /dev/null +++ b/rdf-documentation/species-metadata.md @@ -0,0 +1,375 @@ +# Species Metadata +## 'inbred-set' + +## Generated Triples: + +The following SQL query was executed: + +```sql +SELECT InbredSet.Name, InbredSet.FullName, InbredSet.Name, InbredSet.GeneticType, InbredSet.Family, MappingMethod.Name, InbredSet.InbredSetCode, Species.Fullname, IF ((SELECT PublishFreeze.Name FROM PublishFreeze WHERE PublishFreeze.InbredSetId = InbredSet.Id LIMIT 1) IS NOT NULL, 'Traits and Cofactors', '') AS genotypeP, IF ((SELECT GenoFreeze.Name FROM GenoFreeze WHERE GenoFreeze.InbredSetId = InbredSet.Id LIMIT 1) IS NOT NULL, 'DNA Markers and SNPs', '') AS phenotypeP, (SELECT GROUP_CONCAT(DISTINCT Tissue.Short_Name SEPARATOR'||') AS MolecularTraits FROM ProbeFreeze, ProbeSetFreeze, InbredSet, Tissue, Species WHERE ProbeFreeze.TissueId = Tissue.Id AND ProbeFreeze.InbredSetId = InbredSet.Id AND ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id ORDER BY Tissue.Name) AS molecularTrait FROM InbredSet LEFT JOIN Species ON InbredSet.SpeciesId=Species.Id LEFT JOIN MappingMethod ON InbredSet.MappingMethodId=MappingMethod.Id +``` + +The above query results to triples that have the form: + +```text +gn:setInbredset_name -> rdf:type -> gnc:set +gn:setInbredset_name -> rdfs:label -> InbredSet(FullName) +gn:setInbredset_name -> skos:altLabel -> InbredSet(Name) +gn:setInbredset_name -> gnt:geneticType -> InbredSet(GeneticType) +gn:setInbredset_name -> gnt:family -> InbredSet(Family) +gn:setInbredset_name -> gnt:mappingMethod -> MappingMethod(Name) +gn:setInbredset_name -> gnt:code -> InbredSet(InbredSetCode) +gn:setInbredset_name -> gnt:belongsToSpecies -> gn:Species_fullname +gn:setInbredset_name -> gnt:genotype -> genotypeP +gn:setInbredset_name -> gnt:phenotype -> phenotypeP +gn:setInbredset_name -> gnt:hasTissue -> gn:tissue_moleculartrait +``` +Here's an example query: + +```sparql +PREFIX gn: +PREFIX gnc: +PREFIX owl: +PREFIX gnt: +PREFIX skos: +PREFIX rdf: +PREFIX rdfs: +PREFIX taxon: + +SELECT * WHERE { + ?s rdf:type gnc:set . + ?s rdfs:label "BXD Family" . + ?s skos:altLabel "BXD" . + ?s gnt:geneticType "riset" . + ?s ?p ?o . +} +``` + +Expected Result: + +```rdf +gn:setBxd rdf:type gnc:set . +gn:setBxd rdfs:label "BXD Family" . +gn:setBxd skos:altLabel "BXD" . +gn:setBxd gnt:geneticType "riset" . +gn:setBxd gnt:family "Reference Populations (replicate average, SE, N)" . +gn:setBxd gnt:mappingMethod "BXD" . +gn:setBxd gnt:code "BXD" . +gn:setBxd gnt:belongsToSpecies gn:Mus_musculus . +gn:setBxd gnt:genotype "Traits and Cofactors" . +gn:setBxd gnt:phenotype "DNA Markers and SNPs" . +gn:setBxd gnt:hasTissue gn:tissue_a1c . +gn:setBxd gnt:hasTissue gn:tissue_acc . +gn:setBxd gnt:hasTissue gn:tissue_adr . +gn:setBxd gnt:hasTissue gn:tissue_amg . +gn:setBxd gnt:hasTissue gn:tissue_bebv . +gn:setBxd gnt:hasTissue gn:tissue_bla . +gn:setBxd gnt:hasTissue gn:tissue_brmet . +gn:setBxd gnt:hasTissue gn:tissue_brmicrorna . +gn:setBxd gnt:hasTissue gn:tissue_brn . +gn:setBxd gnt:hasTissue gn:tissue_cart . +gn:setBxd gnt:hasTissue gn:tissue_cb . +gn:setBxd gnt:hasTissue gn:tissue_cbc . +gn:setBxd gnt:hasTissue gn:tissue_ctx . +gn:setBxd gnt:hasTissue gn:tissue_dfc . +gn:setBxd gnt:hasTissue gn:tissue_drg . +gn:setBxd gnt:hasTissue gn:tissue_ec . +gn:setBxd gnt:hasTissue gn:tissue_emb . +gn:setBxd gnt:hasTissue gn:tissue_eye . +gn:setBxd gnt:hasTissue gn:tissue_fat . +gn:setBxd gnt:hasTissue gn:tissue_fecmet . +gn:setBxd gnt:hasTissue gn:tissue_femur . +gn:setBxd gnt:hasTissue gn:tissue_gtex_aor . +gn:setBxd gnt:hasTissue gn:tissue_gtex_atr . +gn:setBxd gnt:hasTissue gn:tissue_gtex_blo . +gn:setBxd gnt:hasTissue gn:tissue_gtex_bonm . +gn:setBxd gnt:hasTissue gn:tissue_gtex_bre . +gn:setBxd gnt:hasTissue gn:tissue_gtex_cau . +gn:setBxd gnt:hasTissue gn:tissue_gtex_cer . +gn:setBxd gnt:hasTissue gn:tissue_gtex_cerv . +gn:setBxd gnt:hasTissue gn:tissue_gtex_cml . +gn:setBxd gnt:hasTissue gn:tissue_gtex_col . +gn:setBxd gnt:hasTissue gn:tissue_gtex_colsig . +gn:setBxd gnt:hasTissue gn:tissue_gtex_cor . +gn:setBxd gnt:hasTissue gn:tissue_gtex_ebv . +gn:setBxd gnt:hasTissue gn:tissue_gtex_eso . +gn:setBxd gnt:hasTissue gn:tissue_gtex_esogas . +gn:setBxd gnt:hasTissue gn:tissue_gtex_fal . +gn:setBxd gnt:hasTissue gn:tissue_gtex_fro . +gn:setBxd gnt:hasTissue gn:tissue_gtex_muc . +gn:setBxd gnt:hasTissue gn:tissue_gtex_ner . +gn:setBxd gnt:hasTissue gn:tissue_gtex_pan . +gn:setBxd gnt:hasTissue gn:tissue_gtex_put . +gn:setBxd gnt:hasTissue gn:tissue_gtex_sintter . +gn:setBxd gnt:hasTissue gn:tissue_gtex_skinex . +gn:setBxd gnt:hasTissue gn:tissue_gtex_skisex . +gn:setBxd gnt:hasTissue gn:tissue_gtex_sn . +gn:setBxd gnt:hasTissue gn:tissue_gtex_sto . +gn:setBxd gnt:hasTissue gn:tissue_gtex_sub . +gn:setBxd gnt:hasTissue gn:tissue_gtex_tf . +gn:setBxd gnt:hasTissue gn:tissue_gtex_thy . +gn:setBxd gnt:hasTissue gn:tissue_gtex_tib . +gn:setBxd gnt:hasTissue gn:tissue_gtex_vag . +gn:setBxd gnt:hasTissue gn:tissue_gtex_ven . +gn:setBxd gnt:hasTissue gn:tissue_gtex_vis . +gn:setBxd gnt:hasTissue gn:tissue_gtex_who . +gn:setBxd gnt:hasTissue gn:tissue_gut . +gn:setBxd gnt:hasTissue gn:tissue_hea . +gn:setBxd gnt:hasTissue gn:tissue_hip . +gn:setBxd gnt:hasTissue gn:tissue_hippreccel . +gn:setBxd gnt:hasTissue gn:tissue_hipprot . +gn:setBxd gnt:hasTissue gn:tissue_hip_mirna . +gn:setBxd gnt:hasTissue gn:tissue_hsc . +gn:setBxd gnt:hasTissue gn:tissue_hyp . +gn:setBxd gnt:hasTissue gn:tissue_ifra_ctx . +gn:setBxd gnt:hasTissue gn:tissue_ipc . +gn:setBxd gnt:hasTissue gn:tissue_isl . +gn:setBxd gnt:hasTissue gn:tissue_itc . +gn:setBxd gnt:hasTissue gn:tissue_kid . +gn:setBxd gnt:hasTissue gn:tissue_lathab . +gn:setBxd gnt:hasTissue gn:tissue_lcm_brreg . +gn:setBxd gnt:hasTissue gn:tissue_leaf . +gn:setBxd gnt:hasTissue gn:tissue_liv . +gn:setBxd gnt:hasTissue gn:tissue_livdnam . +gn:setBxd gnt:hasTissue gn:tissue_livmet . +gn:setBxd gnt:hasTissue gn:tissue_livpro . +gn:setBxd gnt:hasTissue gn:tissue_lung . +gn:setBxd gnt:hasTissue gn:tissue_m1c . +gn:setBxd gnt:hasTissue gn:tissue_mam . +gn:setBxd gnt:hasTissue gn:tissue_mamtum . +gn:setBxd gnt:hasTissue gn:tissue_mbr . +gn:setBxd gnt:hasTissue gn:tissue_md . +gn:setBxd gnt:hasTissue gn:tissue_methyl . +gn:setBxd gnt:hasTissue gn:tissue_mfc . +gn:setBxd gnt:hasTissue gn:tissue_musmet . +gn:setBxd gnt:hasTissue gn:tissue_nac . +gn:setBxd gnt:hasTissue gn:tissue_nbcb . +gn:setBxd gnt:hasTissue gn:tissue_neutrophil . +gn:setBxd gnt:hasTissue gn:tissue_ocl . +gn:setBxd gnt:hasTissue gn:tissue_ofc . +gn:setBxd gnt:hasTissue gn:tissue_of_ctx . +gn:setBxd gnt:hasTissue gn:tissue_ova . +gn:setBxd gnt:hasTissue gn:tissue_pcg . +gn:setBxd gnt:hasTissue gn:tissue_pfc . +gn:setBxd gnt:hasTissue gn:tissue_pg . +gn:setBxd gnt:hasTissue gn:tissue_pln . +gn:setBxd gnt:hasTissue gn:tissue_pl_ctx . +gn:setBxd gnt:hasTissue gn:tissue_pons . +gn:setBxd gnt:hasTissue gn:tissue_pro . +gn:setBxd gnt:hasTissue gn:tissue_ret . +gn:setBxd gnt:hasTissue gn:tissue_ret_mirna . +gn:setBxd gnt:hasTissue gn:tissue_ret_sc-rna-s . +gn:setBxd gnt:hasTissue gn:tissue_s1c . +gn:setBxd gnt:hasTissue gn:tissue_sal . +gn:setBxd gnt:hasTissue gn:tissue_sg . +gn:setBxd gnt:hasTissue gn:tissue_skm . +gn:setBxd gnt:hasTissue gn:tissue_spi . +gn:setBxd gnt:hasTissue gn:tissue_spl . +gn:setBxd gnt:hasTissue gn:tissue_stc . +gn:setBxd gnt:hasTissue gn:tissue_str . +gn:setBxd gnt:hasTissue gn:tissue_tc . +gn:setBxd gnt:hasTissue gn:tissue_tes . +gn:setBxd gnt:hasTissue gn:tissue_tes_dna_met . +gn:setBxd gnt:hasTissue gn:tissue_thelp . +gn:setBxd gnt:hasTissue gn:tissue_thy . +gn:setBxd gnt:hasTissue gn:tissue_treg . +gn:setBxd gnt:hasTissue gn:tissue_ute . +gn:setBxd gnt:hasTissue gn:tissue_v1 . +gn:setBxd gnt:hasTissue gn:tissue_vfc . +gn:setBxd gnt:hasTissue gn:tissue_vta . +gn:setBxd gnt:hasTissue gn:tissue_wb . +gn:setBxd gnt:hasTissue gn:tissue_wbc . +gn:setBxd gnt:hasTissue gn:tissue_wbpr . +gn:setBxd gnt:hasTissue gn:tissue_wfat . +gn:setBxd gnt:hasTissue gn:tissue_wfat_pro . +``` + + +## 'species' + +## Generated Triples: + +The following SQL query was executed: + +```sql +SELECT Species.Fullname, Species.SpeciesName, Species.Name, Species.MenuName, Species.FullName, Species.Family, Species.TaxonomyId FROM Species +``` + +The above query results to triples that have the form: + +```text +gn:Species_fullname -> rdf:type -> gnc:species +gn:Species_fullname -> skos:label -> Species(SpeciesName) +gn:Species_fullname -> skos:altLabel -> Species(Name) +gn:Species_fullname -> rdfs:label -> Species(MenuName) +gn:Species_fullname -> gnt:binomialName -> Species(FullName) +gn:Species_fullname -> gnt:family -> Species(Family) +gn:Species_fullname -> gnt:organism -> taxon:Species(TaxonomyId) +``` +Here's an example query: + +```sparql +PREFIX gn: +PREFIX gnc: +PREFIX owl: +PREFIX gnt: +PREFIX skos: +PREFIX rdf: +PREFIX rdfs: +PREFIX taxon: + +SELECT * WHERE { + ?s rdf:type gnc:species . + ?s skos:label "Mouse" . + ?s skos:altLabel "mouse" . + ?s ?p ?o . +} +``` + +Expected Result: + +```rdf +gn:Mus_musculus rdf:type gnc:species . +gn:Mus_musculus skos:label "Mouse" . +gn:Mus_musculus skos:altLabel "mouse" . +gn:Mus_musculus rdfs:label "Mouse (Mus musculus, mm10)" . +gn:Mus_musculus gnt:binomialName "Mus musculus" . +gn:Mus_musculus gnt:family "Vertebrates" . +gn:Mus_musculus gnt:organism taxon:10090 . +``` + + +## 'strain' + +## Generated Triples: + +The following SQL query was executed: + +```sql +SELECT Strain.Name, Species.Fullname, Strain.Name, IF ((Strain.Name2 != Strain.Name), Strain.Name2, '') AS Name2, IF ((Strain.Alias != Strain.Name), Strain.Alias, '') AS Alias, IF ((Strain.Symbol != Strain.Name), Strain.Symbol, '') AS Symbol FROM Strain LEFT JOIN Species ON Strain.SpeciesId = Species.SpeciesId +``` + +The above query results to triples that have the form: + +```text +gn:Strain_name_ -> rdf:type -> gnc:strain +gn:Strain_name_ -> gnt:belongsToSpecies -> gn:Species_fullname +gn:Strain_name_ -> rdfs:label -> StrainName +gn:Strain_name_ -> rdfs:label -> Name2 +gn:Strain_name_ -> gnt:alias -> Alias +gn:Strain_name_ -> gnt:symbol -> Symbol +``` +Here's an example query: + +```sparql +PREFIX gn: +PREFIX gnc: +PREFIX owl: +PREFIX gnt: +PREFIX skos: +PREFIX rdf: +PREFIX rdfs: +PREFIX taxon: + +SELECT * WHERE { + ?s rdf:type gnc:strain . + ?s gnt:belongsToSpecies gn:Mus_musculus . + ?s rdfs:label "B6D2F1" . + ?s ?p ?o . +} +``` + +Expected Result: + +```rdf +gn:B6d2f1 rdf:type gnc:strain . +gn:B6d2f1 gnt:belongsToSpecies gn:Mus_musculus . +gn:B6d2f1 rdfs:label "B6D2F1" . +``` + + +## 'mapping-method' + +## Generated Triples: + +The following SQL query was executed: + +```sql +SELECT MappingMethod.Name, MappingMethod.Name FROM MappingMethod +``` + +The above query results to triples that have the form: + +```text +gn:mappingMethod_mappingmethod_name -> rdf:type -> gnc:mappingMethod +gn:mappingMethod_mappingmethod_name -> rdfs:label -> MappingMethod(Name) +``` +Here's an example query: + +```sparql +PREFIX gn: +PREFIX gnc: +PREFIX owl: +PREFIX gnt: +PREFIX skos: +PREFIX rdf: +PREFIX rdfs: +PREFIX taxon: + +SELECT * WHERE { + ?s rdf:type gnc:mappingMethod . + ?s rdfs:label "qtlreaper" . + ?s ?p ?o . +} +``` + +Expected Result: + +```rdf +gn:mappingMethod_qtlreaper rdf:type gnc:mappingMethod . +gn:mappingMethod_qtlreaper rdfs:label "qtlreaper" . +``` + + +## 'avg-method' + +## Generated Triples: + +The following SQL query was executed: + +```sql +SELECT AvgMethod.Name, AvgMethod.Normalization FROM AvgMethod +``` + +The above query results to triples that have the form: + +```text +gn:avgmethod_avgmethod_name -> rdf:type -> gnc:avgMethod +gn:avgmethod_avgmethod_name -> rdfs:label -> AvgMethod(Normalization) +``` +Here's an example query: + +```sparql +PREFIX gn: +PREFIX gnc: +PREFIX owl: +PREFIX gnt: +PREFIX skos: +PREFIX rdf: +PREFIX rdfs: +PREFIX taxon: + +SELECT * WHERE { + ?s rdf:type gnc:avgMethod . + ?s rdfs:label "MAS5" . + ?s ?p ?o . +} +``` + +Expected Result: + +```rdf +gn:avgmethod_mas5 rdf:type gnc:avgMethod . +gn:avgmethod_mas5 rdfs:label "MAS5" . +``` + diff --git a/rdf-documentation/tissue-metadata.md b/rdf-documentation/tissue-metadata.md new file mode 100644 index 0000000..3c4fae9 --- /dev/null +++ b/rdf-documentation/tissue-metadata.md @@ -0,0 +1,41 @@ +# Tissue Metadata +## 'tissue' + +## Generated Triples: + +The following SQL query was executed: + +```sql +SELECT Tissue.Short_Name, Tissue.Name FROM Tissue +``` + +The above query results to triples that have the form: + +```text +gn:tissue_tissue_short_name -> rdf:type -> gnc:tissue +gn:tissue_tissue_short_name -> rdfs:label -> Tissue(Name) +``` +Here's an example query: + +```sparql +PREFIX gn: +PREFIX gnt: +PREFIX skos: +PREFIX gnc: +PREFIX rdf: +PREFIX rdfs: + +SELECT * WHERE { + ?s rdf:type gnc:tissue . + ?s rdfs:label "Brain mRNA" . + ?s ?p ?o . +} +``` + +Expected Result: + +```rdf +gn:tissue_brn rdf:type gnc:tissue . +gn:tissue_brn rdfs:label "Brain mRNA" . +``` + -- cgit v1.2.3