From 9d2fd59a7b4c7a6a269c33b16e956b4b5a975267 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Thu, 7 Dec 2023 16:14:45 +0300 Subject: Update documentation. Signed-off-by: Munyoki Kilyungi --- api/GN3-REST-API-Implementation.md | 55 +++++++++++++--- rdf-documentation/dataset-metadata.md | 32 ++++++---- rdf-documentation/genbank-metadata.md | 6 +- rdf-documentation/generif-metadata.md | 12 ++-- rdf-documentation/genotype-metadata.md | 7 +- rdf-documentation/phenotype-metadata.md | 19 +++--- rdf-documentation/probeset-metadata.md | 102 ++++++++++++++++++------------ rdf-documentation/publication-metadata.md | 2 +- rdf-documentation/strains.md | 12 ++-- rdf-documentation/tissue-metadata.md | 2 +- 10 files changed, 159 insertions(+), 90 deletions(-) diff --git a/api/GN3-REST-API-Implementation.md b/api/GN3-REST-API-Implementation.md index 31248fb..200ca72 100644 --- a/api/GN3-REST-API-Implementation.md +++ b/api/GN3-REST-API-Implementation.md @@ -198,13 +198,18 @@ Example Result: ``` { "@context": { + "GoTree": "gnt:hasGoTreeValue", "accessRights": "dct:accessRights", "accessionId": "dct:identifier", "acknowledgement": "gnt:hasAcknowledgement", "altLabel": "skos:altLabel", "caseInfo": "gnt:hasCaseInfo", + "citation": "dct:isReferencedBy", "classifiedUnder": "xkos:classifiedUnder", + "contactName": "foaf:name", "contactPoint": "dcat:contactPoint", + "contactWebUrl": "foaf:homepage", + "contributors": "dct:creator", "created": "dct:created", "data": "@graph", "dcat": "http://www.w3.org/ns/dcat#", @@ -216,39 +221,71 @@ Example Result: "geoSeriesId": "gnt:hasGeoSeriesId", "gnt": "http://genenetwork.org/term/", "id": "@id", - "inbredSet": "ex:belongsToInbredSet", - "info": "ex:info", + "inbredSet": "gnt:belongsToGroup", "label": "rdfs:label", "normalization": "gnt:usesNormalization", "notes": "gnt:hasNotes", "organization": "foaf:Organization", - "platform": "ex:platform", + "platform": "gnt:usesPlatform", + "platformInfo": "gnt:hasPlatformInfo", "prefLabel": "skos:prefLabel", "processingInfo": "gnt:hasDataProcessingInfo", "rdfs": "http://www.w3.org/2000/01/rdf-schema#", "skos": "http://www.w3.org/2004/02/skos/core#", - "tissue": "ex:tissue", + "specifics": "gnt:hasContentInfo", + "tissue": "gnt:hasTissue", + "tissueInfo": "gnt:hasTissueInfo", "title": "dct:title", "type": "@type", "xkos": "http://rdf-vocabulary.ddialliance.org/xkos#" }, + "accessRights": "public", "accessionId": "GN11", - "contactPoint": "Michael Miles", + "acknowledgement": "
Data for the microarrays were generously provided by support from NIAAA INIA grants to RWW and Thomas Sutter. Support for sample acquistion and WebQTL have been provided by NIMH Human Brain Project, and the Dunavant Chair of Excellence, University of Tennessee Health Science Center. All arrays were processed at the University of Memphis by Dr. Thomas Sutter and colleagues with support of the INIA Bioanalytical Core.
", + "altLabel": "INIA Brain mRNA M430 (Feb04)", + "caseInfo": "

We have exploited a set of BXD recombinant inbred strains. The parental strains from which all BXD lines are derived are C57BL/6J B and DBA/2J D. Both B and D strains have been almost fully sequence 8x coverage for B by a public consortium and approximately 1.5x coverage for D by Celera.

\n\n

BXD1 through BXD32 were produced by Benjamin A. Taylor starting in the late 1970s. BXD33 through BXD42 were also produced by Taylor, but from a second set of crosses initiated in the early 1990s. These strains are all available from the Jackson Laboratory, Bar Harbor, Maine.

", + "classifiedUnder": { + "id": "http://genenetwork.org/category/Probeset", + "prefLabel": "mRNA Assay Datasets" + }, + "contactPoint": { + "contactName": "Michael Miles", + "contactWebUrl": "http://www.brainchip.vcu.edu", + "id": "http://genenetwork.org/id/investigatorMichael_miles_mfmiles_vcu.edu" + }, + "created": "2004-02-01", + "description": "

This October 2004 data freeze provides initial estimates of mRNA expression in brains of adult BXD recombinant inbred mice measured using Affymetrix M430AB microarrays. In contast to the U74Av2 array, this new data set provides broader coverage ~45,000 transcripts but does not include replicates or as many strains 25 vs 35. Data were generated at UTHSC and the University of Memphis with support from grants from the NIAAA Integrative Neuroscience Initiative on Alcoholism INIA. Data were processed using the PDNN method of Zhang. To simplify comparison among transforms, PDNN values of each array were adjusted to an average of 8 units and a variance of 2 units.

", "id": "http://genenetwork.org/id/Ibr_m_0204_m", - "inbredSet": "BXD Family", + "inbredSet": { + "id": "http://genenetwork.org/id/setBxd", + "label": "BXD Family" + }, "label": "IBR_M_0204_M", - "normalization": "MAS5", + "normalization": { + "id": "http://genenetwork.org/id/avgMethodMas5", + "label": "MAS5" + }, + "notes": "

This text file originally generated by RWW, YHQ, and EJC, Oct 2004. Updated by RWW, Nov 5, 2004.

", + "organization": "Virginia Commonwealth University", "platform": { + "classifiedUnder": { + "id": "http://genenetwork.org/id/Mus_musculus" + }, + "geoSeriesId": { + "id": "http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GPL339_340" + }, + "gnt:hasGOTreeValue": "GN_GPL339_340", "id": "http://genenetwork.org/id/platformMoe430", - "info": "[...]", "label": "Affy Mouse Genome 430A, 430B, 430A 2.0 (GPL339,GPL340)", + "platformInfo": "

Affymetrix 430A and 430B GeneChip Set: Expression data were generated using 430AB array pairs. The chromosomal locations of probe sets were determined by BLAT analysis of concatenated probe sequences using the Mouse Genome Sequencing Consortium May 2004 mm5 assembly. This BLAT analysis is performed periodically by Yanhua Qu as each new build of the mouse genome is released. We thank Yan Cui UTHSC for allowing us to use his Linux cluster to perform this analysis. It is possible to confirm the BLAT alignment results yourself simply by clicking on either the Verify UCSC and Verify Ensembl links in the Trait Data and Editing Form right side of the Location line.

", "prefLabel": "MOE430", "type": "http://genenetwork.org/category/geneChip" }, + "processingInfo": "
Probe cell level data from the CEL file: These CEL values produced by GCOS are 75% quantiles from a set of 91 pixel values per cell.
\n\n
\n\n
\n\n
\n

Probe set data: The original expression values in the Affymetrix CEL files were read into PerfectMatch to generate the normalized PDNN data set.

\n\n

PDNN values of each array were subsequently normalized to a achieve a mean value of 8 units and a variance of 2 units.

\n\n

When necessary, we computed the arithmetic mean for technical replicates and treated these as single samples. We then computed the arithmetic mean for the set of 2 to 5 biological replicates for each strain.

\n
\n\n

About the array probe sets names:

\n\n
\n

Most probe sets on the mouse 430A and 430B arrays consist of a total of 22 probes, divided into 11 perfect matchPM probes and 11 mismatch MM controls. Each set of these 25-nucleotide-long probes has an identifier code that includes a unique number, an underscore character, several suffix characters that highlight design features, a a final A or B character to specify the array pair member. The most common probe set suffix is at. This code indicates that the probes should hybridize relatively selectively with the complementary anti-sense target i.e., the complemenary RNA produced from a single gene.

\n
", "tissue": { "id": "http://genenetwork.org/id/tissueBrn", - "info": "[...]", "label": "Brain mRNA", + "tissueInfo": "

The data set consists of a single batch of Affymetrix mouse expression 430A and 430B GeneChip array pairs. Each AB pair was hybridized in sequence A array first, B array second with a pool of brain tissue forebrain minus olfactory bulb, plus the entire midbrain taken from three adult animals of closely matched age and the same sex. RNA was extracted at UTHSC by Lu Lu, Zhiping Jia, and Hongtao Zhai. All samples were subsequently processed in the INIA Bioanalytical Core at the W. Harry Feinstone Center of Excellence by Thomas R. Sutter and colleagues at the University of Memphis. Before running the main batch of 30 pairs of array, we ran four "test" samples one male and one female pool from each of the two parental strains, C57BL/6J and DBA/2J. The main set of 30 array pairs includes the same four samples in other words we have four technical replicates, two F1 hybrid sample each run two times for a within-batch technical replication, and 22 BXD strains. The data set therefore consists of one male and one female pool from C57BL/6J, DBA/2J, the B6D2F1 hybrid, 11 female BXD samples, and 11 male BXD samples. We should note that the four technical replicates between batches were eventually combined with a correction for a highly significant batch effect. This was done at both the probe and probe set levels to "align" the test batch values with the two main batches. The ratio of the probe average in the four test arrays to the average of the same probe in the four corresponding main batch arrays was used as a correction factor. The F1 within-batch technical replicates were simply averaged. In the next batch we will reverse the sex of the BXD samples to achieve a balance with at least 22 BXD strains with one male and one female sample each.

\n\n

The table below lists the arrays by strain, sex, age, sample identifier, and data results were obtained from the Bioanalytical Core at the University of Memphis. Each array was hybridized to a pool of mRNA from three mice.

\n\n\n\t\n\t\t\n\t\t\t\n\t\t\n\t\n
\n\t\t\t\n\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\n\t\t\t
StrainSexAgeSampleIDDate
B6D2F1F127919-F1Jan04
B6D2F1F127919-F2Jan04
B6D2F1M127920-F1Jan04
B6D2F1M127920-F2Jan04
C57BL/6JF65903-F1Nov03
C57BL/6JF65903-F2Jan03
C57BL/6JM66906-F1Nov03
C57BL/6JM66906-F2Jan04
DBA/2JF60917-F1Nov03
DBA/2JF60917-F2Jan04
DBA/2JM60918-F1Nov03
DBA/2JM60918-F2Jan04
BXD1F95895-F1Jan04
BXD5M71728-F1Jan04
BXD6M92902-F1Jan04
BXD8F72S167-F1Jan04
BXD9M86909-F1Jan04
BXD12M64897-F1Jan04
BXD13F86748-F1Jan04
BXD14M91912-F1Jan04
BXD18F108771-F1Jan04
BXD19F56S236-F1Jan04
BXD21F67740-F1Jan04
BXD23F88815-F1Jan04
BXD24M71913-F1Jan04
BXD25F74S373-F1Jan04
BXD28F79910-F1Jan04
BXD29F76693-F1Jan04
BXD32F93898-F1Jan04
BXD33M77915-F1Jan04
BXD34M72916-F1Jan04
BXD36M77926-F1Jan04
BXD38M69731-F1Jan04
BXD42M97936-F1Jan04
\n\t\t\t
", "type": "http://genenetwork.org/category/tissue" }, "type": "dcat:Dataset" diff --git a/rdf-documentation/dataset-metadata.md b/rdf-documentation/dataset-metadata.md index 59246b2..d5f4f2b 100644 --- a/rdf-documentation/dataset-metadata.md +++ b/rdf-documentation/dataset-metadata.md @@ -6,7 +6,7 @@ The following SQL query was executed: ```sql -SELECT InfoFiles.InfoPageName, IF(GenoFreeze.Id IS NOT NULL, 'gnc:Genotype', IF(PublishFreeze.Id IS NOT NULL, 'gnc:Phenotype', IF(ProbeSetFreeze.Name IS NOT NULL, 'gnc:Probeset', ''))) AS DatasetType, InfoFiles.InfoPageName, IFNULL(GenoFreeze.FullName, IFNULL(PublishFreeze.FullName, '')) AS DatasetFullName, Datasets.DatasetName AS DatasetGroup, Datasets.PublicationTitle, InfoFiles.InfoFileTitle, IFNULL(GenoFreeze.CreateTime, IFNULL(PublishFreeze.CreateTime, IFNULL(ProbeSetFreeze.CreateTime, ''))) AS createTimeGenoFreeze, Investigators.FirstName, Investigators.LastName, Investigators.Email, Organizations.OrganizationName, InfoFiles.GN_AccesionId, DatasetStatus.DatasetStatusName, InbredSet.Name AS InbredSetName, Tissue.Short_Name, AvgMethod.Name AS AvgMethodName, AvgMethod.Name AS AvgMethodName, GeneChip.Name AS GeneChip, Datasets.Summary, IFNULL(Datasets.GeoSeries, '') AS GeoSeries, Datasets.AboutTissue, InfoFiles.Specifics, Datasets.AboutCases, Datasets.AboutPlatform, Datasets.AboutDataProcessing, Datasets.Notes, Datasets.ExperimentDesign, Datasets.Contributors, Datasets.Citation, Datasets.Acknowledgment FROM InfoFiles LEFT JOIN PublishFreeze ON InfoFiles.InfoPageName = PublishFreeze.Name LEFT JOIN GenoFreeze ON InfoFiles.InfoPageName = GenoFreeze.Name LEFT JOIN ProbeSetFreeze ON InfoFiles.InfoPageName = ProbeSetFreeze.Name LEFT JOIN InbredSet ON InfoFiles.InbredSetId = InbredSet.InbredSetId LEFT JOIN Species ON InfoFiles.SpeciesId = Species.SpeciesId LEFT JOIN Datasets USING (DatasetId) LEFT JOIN DatasetStatus USING (DatasetStatusId) LEFT JOIN Tissue USING (TissueId) LEFT JOIN Investigators USING (InvestigatorId) LEFT JOIN AvgMethod USING (AvgMethodId) LEFT JOIN Organizations USING (OrganizationId) LEFT JOIN GeneChip USING (GeneChipId) WHERE GN_AccesionId IS NOT NULL +SELECT InfoFiles.InfoPageName, IF(GenoFreeze.Id IS NOT NULL, 'gnc:Genotype', IF(PublishFreeze.Id IS NOT NULL, 'gnc:Phenotype', IF(ProbeSetFreeze.Name IS NOT NULL, 'gnc:Probeset', ''))) AS DatasetType, InfoFiles.InfoPageName, IFNULL(GenoFreeze.FullName, IFNULL(PublishFreeze.FullName, '')) AS DatasetFullName, Datasets.DatasetName AS DatasetGroup, Datasets.PublicationTitle, InfoFiles.InfoFileTitle, IFNULL(GenoFreeze.CreateTime, IFNULL(PublishFreeze.CreateTime, IFNULL(ProbeSetFreeze.CreateTime, ''))) AS createTimeGenoFreeze, Investigators.FirstName, Investigators.LastName, Investigators.Email, Organizations.OrganizationName, InfoFiles.GN_AccesionId, DatasetStatus.DatasetStatusName, IFNULL(InbredSet.Name, IFNULL(PublishInbredSet.Name, GenoInbredSet.Name)) AS InbredSetName, Tissue.Short_Name, AvgMethod.Name AS AvgMethodName, AvgMethod.Name AS AvgMethodName, GeneChip.Name AS GeneChip, Datasets.Summary, IFNULL(Datasets.GeoSeries, '') AS GeoSeries, Datasets.AboutTissue, InfoFiles.Specifics, Datasets.AboutCases, Datasets.AboutPlatform, Datasets.AboutDataProcessing, Datasets.Notes, Datasets.ExperimentDesign, Datasets.Contributors, Datasets.Citation, Datasets.Acknowledgment FROM InfoFiles LEFT JOIN PublishFreeze ON InfoFiles.InfoPageName = PublishFreeze.Name LEFT JOIN GenoFreeze ON InfoFiles.InfoPageName = GenoFreeze.Name LEFT JOIN ProbeSetFreeze ON InfoFiles.InfoPageName = ProbeSetFreeze.Name LEFT JOIN InbredSet ON InfoFiles.InbredSetId = InbredSet.InbredSetId LEFT JOIN Species ON InfoFiles.SpeciesId = Species.SpeciesId LEFT JOIN Datasets USING (DatasetId) LEFT JOIN DatasetStatus USING (DatasetStatusId) LEFT JOIN Tissue USING (TissueId) LEFT JOIN Investigators USING (InvestigatorId) LEFT JOIN AvgMethod USING (AvgMethodId) LEFT JOIN Organizations USING (OrganizationId) LEFT JOIN GeneChip USING (GeneChipId) LEFT JOIN InbredSet PublishInbredSet ON PublishFreeze.InbredSetId = PublishInbredSet.InbredSetId LEFT JOIN InbredSet GenoInbredSet ON GenoFreeze.InbredSetId = GenoInbredSet.InbredSetId WHERE GN_AccesionId IS NOT NULL ``` The above query results to triples that have the form: @@ -23,7 +23,7 @@ gn:Infofiles_infopagename_ -> dcat:contactPoint -> gn:investigatorInvestigators_ gn:Infofiles_infopagename_ -> foaf:Organization -> Organizations(OrganizationName) gn:Infofiles_infopagename_ -> dct:identifier -> GNInfoFiles(GN_AccesionId) gn:Infofiles_infopagename_ -> dct:accessRights -> datasetstatus(datasetstatusname) -gn:Infofiles_infopagename_ -> xkos:classifiedUnder -> gn:setInbredset_inbredsetname +gn:Infofiles_infopagename_ -> gnt:belongsToGroup -> gn:setInbredsetname gn:Infofiles_infopagename_ -> gnt:hasTissue -> gn:tissueTissue_short_name gn:Infofiles_infopagename_ -> gnt:usesNormalization -> gn:avgMethodAvgmethod_avgmethodname gn:Infofiles_infopagename_ -> gnt:usesPlatform -> gn:platformGenechip_genechip @@ -79,7 +79,7 @@ gn:Br_u_0803_m dcat:contactPoint gn:investigatorRobert_williams_rwilliams_uthsc. gn:Br_u_0803_m foaf:Organization "University of Tennessee Health Science Center" . gn:Br_u_0803_m dct:identifier "GN1" . gn:Br_u_0803_m dct:accessRights "public" . -gn:Br_u_0803_m xkos:classifiedUnder gn:setBxd . +gn:Br_u_0803_m gnt:belongsToGroup gn:setBxd . gn:Br_u_0803_m gnt:hasTissue gn:tissueBrn . gn:Br_u_0803_m gnt:usesNormalization gn:avgMethodMas5 . gn:Br_u_0803_m gnt:usesPlatform gn:platformMg_u74av2 . @@ -106,12 +106,13 @@ SELECT PublishFreeze.Name, PublishFreeze.FullName, PublishFreeze.Name, PublishFr The above query results to triples that have the form: ```text +gn:Publishfreeze_name_ -> rdf:type -> dcat:Dataset gn:Publishfreeze_name_ -> xkos:classifiedUnder -> gnc:Phenotype gn:Publishfreeze_name_ -> dct:title -> PublishFreeze(FullName) gn:Publishfreeze_name_ -> rdfs:label -> PublishFreeze(Name) gn:Publishfreeze_name_ -> skos:altLabel -> PublishFreeze(ShortName) gn:Publishfreeze_name_ -> dct:created -> "PublishFreeze(CreateTime)"^^xsd:date -gn:Publishfreeze_name_ -> xkos:classifiedUnder -> gn:setInbredset_inbredsetname +gn:Publishfreeze_name_ -> gnt:belongsToGroup -> gn:setInbredset_inbredsetname ``` Here's an example query: @@ -133,9 +134,9 @@ PREFIX taxon: PREFIX dct: SELECT * WHERE { + ?s rdf:type dcat:Dataset . ?s xkos:classifiedUnder gnc:Phenotype . ?s dct:title "B6D2F2 PSU Phenotypes" . - ?s rdfs:label "B6D2F2-PSUPublish" . ?s ?p ?o . } ``` @@ -143,12 +144,13 @@ SELECT * WHERE { Expected Result: ```rdf +gn:B6d2f2_psupublish rdf:type dcat:Dataset . gn:B6d2f2_psupublish xkos:classifiedUnder gnc:Phenotype . gn:B6d2f2_psupublish dct:title "B6D2F2 PSU Phenotypes" . gn:B6d2f2_psupublish rdfs:label "B6D2F2-PSUPublish" . gn:B6d2f2_psupublish skos:altLabel "B6D2F2 PSU Publish" . gn:B6d2f2_psupublish dct:created "2015-03-18"^^xsd:date . -gn:B6d2f2_psupublish xkos:classifiedUnder gn:setB6d2f2-psu . +gn:B6d2f2_psupublish gnt:belongsToGroup gn:setB6d2f2-psu . ``` @@ -159,18 +161,19 @@ gn:B6d2f2_psupublish xkos:classifiedUnder gn:setB6d2f2-psu . The following SQL query was executed: ```sql -SELECT GenoFreeze.Name, GenoFreeze.Name, GenoFreeze.FullName, GenoFreeze.ShortName, GenoFreeze.CreateTime, InbredSet.Name FROM GenoFreeze LEFT JOIN InfoFiles ON InfoFiles.InfoPageName = GenoFreeze.Name LEFT JOIN InbredSet ON GenoFreeze.InbredSetId = InbredSet.InbredSetId WHERE GenoFreeze.public > 0 AND GenoFreeze.confidentiality < 1 AND InfoFiles.InfoPageName IS NULL +SELECT GenoFreeze.Name, GenoFreeze.Name, GenoFreeze.FullName, GenoFreeze.ShortName, GenoFreeze.CreateTime, InbredSet.Name AS InbredSetName FROM GenoFreeze LEFT JOIN InfoFiles ON InfoFiles.InfoPageName = GenoFreeze.Name LEFT JOIN InbredSet ON GenoFreeze.InbredSetId = InbredSet.InbredSetId WHERE GenoFreeze.public > 0 AND GenoFreeze.confidentiality < 1 AND InfoFiles.InfoPageName IS NULL ``` The above query results to triples that have the form: ```text +gn:Genofreeze_name_ -> rdf:type -> dcat:Dataset gn:Genofreeze_name_ -> xkos:classifiedUnder -> gnc:Genotype gn:Genofreeze_name_ -> rdfs:label -> GenoFreeze(Name) gn:Genofreeze_name_ -> dct:title -> GenoFreeze(FullName) gn:Genofreeze_name_ -> skos:altLabel -> GenoFreeze(ShortName) gn:Genofreeze_name_ -> dct:created -> "GenoFreeze(CreateTime)"^^xsd:date -gn:Genofreeze_name_ -> xkos:classifiedUnder -> gn:setInbredset_name +gn:Genofreeze_name_ -> gnt:belongsToGroup -> gn:setInbredset_inbredsetname ``` Here's an example query: @@ -192,9 +195,9 @@ PREFIX taxon: PREFIX dct: SELECT * WHERE { + ?s rdf:type dcat:Dataset . ?s xkos:classifiedUnder gnc:Genotype . ?s rdfs:label "B6D2RIGeno" . - ?s dct:title "B6D2RI Genotypes" . ?s ?p ?o . } ``` @@ -202,12 +205,13 @@ SELECT * WHERE { Expected Result: ```rdf +gn:B6d2rigeno rdf:type dcat:Dataset . gn:B6d2rigeno xkos:classifiedUnder gnc:Genotype . gn:B6d2rigeno rdfs:label "B6D2RIGeno" . gn:B6d2rigeno dct:title "B6D2RI Genotypes" . gn:B6d2rigeno skos:altLabel "B6D2RIGeno" . gn:B6d2rigeno dct:created "2022-10-24"^^xsd:date . -gn:B6d2rigeno xkos:classifiedUnder gn:setB6d2rigeno . +gn:B6d2rigeno gnt:belongsToGroup gn:setB6d2ri . ``` @@ -224,6 +228,7 @@ SELECT ProbeSetFreeze.Name, AvgMethod.Name AS AvgMethodName, AvgMethod.Name AS A The above query results to triples that have the form: ```text +gn:Probesetfreeze_name_ -> rdf:type -> dcat:Dataset gn:Probesetfreeze_name_ -> xkos:classifiedUnder -> gnc:Probeset gn:Probesetfreeze_name_ -> gnt:usesNormalization -> gn:avgMethodAvgmethod_avgmethodname gn:Probesetfreeze_name_ -> dct:title -> ProbeSetFreeze(FullName) @@ -233,7 +238,7 @@ gn:Probesetfreeze_name_ -> skos:altLabel -> ProbeSetFreeze(Name2) gn:Probesetfreeze_name_ -> dct:created -> "ProbeSetFreeze(CreateTime)"^^xsd:datetime gn:Probesetfreeze_name_ -> gnt:usesDataScale -> ProbeSetFreeze(DataScale) gn:Probesetfreeze_name_ -> gnt:hasTissue -> gn:tissueTissue_short_name -gn:Probesetfreeze_name_ -> xkos:classifiedUnder -> gn:setInbredset_inbredsetname +gn:Probesetfreeze_name_ -> gnt:belongsToGroup -> gn:setInbredset_inbredsetname ``` Here's an example query: @@ -255,10 +260,10 @@ PREFIX taxon: PREFIX dct: SELECT * WHERE { + ?s rdf:type dcat:Dataset . ?s xkos:classifiedUnder gnc:Probeset . ?s gnt:usesNormalization gn:avgMethodRankinv . ?s dct:title "UBC/CMMT BXD P0 Cerebellum ILM Mouse WG-6 v2.0 (May13) RankInv" . - ?s rdfs:label "UBC/CMMT BXD P0 Cerebellum ILM Mouse WG-6 v2.0 (May13) RankInv" . ?s ?p ?o . } ``` @@ -266,6 +271,7 @@ SELECT * WHERE { Expected Result: ```rdf +gn:Cmmtubcbxdp00cerilm0513 rdf:type dcat:Dataset . gn:Cmmtubcbxdp00cerilm0513 xkos:classifiedUnder gnc:Probeset . gn:Cmmtubcbxdp00cerilm0513 gnt:usesNormalization gn:avgMethodRankinv . gn:Cmmtubcbxdp00cerilm0513 dct:title "UBC/CMMT BXD P0 Cerebellum ILM Mouse WG-6 v2.0 (May13) RankInv" . @@ -275,7 +281,7 @@ gn:Cmmtubcbxdp00cerilm0513 skos:altLabel "CMMTUBCBXDP00CerILMMay13" . gn:Cmmtubcbxdp00cerilm0513 dct:created "2013-04-22"^^xsd:datetime . gn:Cmmtubcbxdp00cerilm0513 gnt:usesDataScale "log2" . gn:Cmmtubcbxdp00cerilm0513 gnt:hasTissue gn:tissueCb . -gn:Cmmtubcbxdp00cerilm0513 xkos:classifiedUnder gn:setBxd . +gn:Cmmtubcbxdp00cerilm0513 gnt:belongsToGroup gn:setBxd . ``` diff --git a/rdf-documentation/genbank-metadata.md b/rdf-documentation/genbank-metadata.md index 7e519fd..e91b459 100644 --- a/rdf-documentation/genbank-metadata.md +++ b/rdf-documentation/genbank-metadata.md @@ -13,7 +13,7 @@ The above query results to triples that have the form: ```text genbank:Genbank(Id) -> gnt:hasSequence -> Genbank(Sequence) -genbank:Genbank(Id) -> xkos:classifiedUnder -> gn:Species_fullname +genbank:Genbank(Id) -> gnt:belongsToSpecies -> gn:Species_fullname ``` Here's an example query: @@ -36,7 +36,7 @@ PREFIX owl: SELECT * WHERE { ?s gnt:hasSequence "GAAAAGGACGAGAGAAAATTATTTTTAAGATAATTAAACATAAAAACCCTGGTGCTTATTACATTATAAAGTACGTTTTTAAAAACCCACAAACTATTATACATACGTTTATGAATCAATTAAATACTCTGCACTTGTTAGGAACACGCATATCCCTTCTTTGTTGAGTTTAACGGAACGGGACAGCGGCGTGCGCCCGCGGCTGGGCTGCTCTGGCCGCGGGTCTCCCCAGGCG" . - ?s xkos:classifiedUnder gn:Mus_musculus . + ?s gnt:belongsToSpecies gn:Mus_musculus . ?s ?p ?o . } ``` @@ -45,6 +45,6 @@ Expected Result: ```rdf genbank:AA002843 gnt:hasSequence "GAAAAGGACGAGAGAAAATTATTTTTAAGATAATTAAACATAAAAACCCTGGTGCTTATTACATTATAAAGTACGTTTTTAAAAACCCACAAACTATTATACATACGTTTATGAATCAATTAAATACTCTGCACTTGTTAGGAACACGCATATCCCTTCTTTGTTGAGTTTAACGGAACGGGACAGCGGCGTGCGCCCGCGGCTGGGCTGCTCTGGCCGCGGGTCTCCCCAGGCG" . -genbank:AA002843 xkos:classifiedUnder gn:Mus_musculus . +genbank:AA002843 gnt:belongsToSpecies gn:Mus_musculus . ``` diff --git a/rdf-documentation/generif-metadata.md b/rdf-documentation/generif-metadata.md index 7e3c4d6..37424f8 100644 --- a/rdf-documentation/generif-metadata.md +++ b/rdf-documentation/generif-metadata.md @@ -56,7 +56,7 @@ SELECT GeneRIF.symbol, GeneRIF.comment, GeneRIF.createtime AS EntryCreateTime, G The above query results to triples that have the form: ```text -gn:symbolGeneRIF_symbol_ -> rdfs:comment -> [ rdf:type gnc:GNWikiEntry ; xkos:classifiedUnder gn:Species_fullname ; rdfs:comment "GeneRIFcomment"^^xsd:string ; dct:references pubmed:GeneRIF(PMID) ; dct:creator gn:investigatorInvestigators_firstname_investigators_lastname_investigators_email ; gnt:belongsToCategory "GeneCategory" ; foaf:homepage "GeneRIF(weburl)" ; ] +gn:symbolGeneRIF_symbol_ -> rdfs:comment -> [ rdf:type gnc:GNWikiEntry ; gnt:belongsToSpecies gn:Species_fullname ; rdfs:comment "GeneRIFcomment"^^xsd:string ; dct:references pubmed:GeneRIF(PMID) ; dct:creator gn:investigatorInvestigators_firstname_investigators_lastname_investigators_email ; gnt:belongsToCategory "GeneCategory" ; foaf:homepage "GeneRIF(weburl)" ; ] ``` Here's an example query: @@ -77,7 +77,7 @@ PREFIX xsd: PREFIX owl: SELECT * WHERE { - ?s rdfs:comment #{\x5b; rdf:type gnc:GNWikiEntry ; xkos:classifiedUnder gn:Mus_musculus ; rdfs:comment "Part 2 of the Slc9a1 wiki.\\r\\n\\r\\nThe human SLC9A1 gene was cloned and mapped to human chromosome 1p Lifton et al., 1990.\\r\\n\\r\\nThe mouse Slc9a1 gene maps to chromosome 4. Morahan et al., 1993. There are three common alleles of Slc9a1, originally detected by RFLP analyses. Each of these allelic SLC9A1 proteins have different levels of antiporter activity. Morahan et al. 1994 Remarkably, intracellular pH varies between strains based on their Slc9a1 alleles. McClive et al. 1996."^^xsd:string ; dct:created "2011-06-10T12:06:30"^^xsd:datetime ; dct:references pubmed:094369 ; dct:references pubmed:8016086 ; dct:references pubmed:8550102 ; dct:creator gn:investigatorGrant_morahan_gem_waimr.uwa.edu.au ; gnt:belongsToCategory "Biochemistry" ; gnt:belongsToCategory "Genetic variation and alleles" ; gnt:belongsToCategory "Physiology and function" ; \x5d; }# . + ?s rdfs:comment #{\x5b; rdf:type gnc:GNWikiEntry ; gnt:belongsToSpecies gn:Mus_musculus ; rdfs:comment "Part 2 of the Slc9a1 wiki.\\r\\n\\r\\nThe human SLC9A1 gene was cloned and mapped to human chromosome 1p Lifton et al., 1990.\\r\\n\\r\\nThe mouse Slc9a1 gene maps to chromosome 4. Morahan et al., 1993. There are three common alleles of Slc9a1, originally detected by RFLP analyses. Each of these allelic SLC9A1 proteins have different levels of antiporter activity. Morahan et al. 1994 Remarkably, intracellular pH varies between strains based on their Slc9a1 alleles. McClive et al. 1996."^^xsd:string ; dct:created "2011-06-10T12:06:30"^^xsd:datetime ; dct:references pubmed:094369 ; dct:references pubmed:8016086 ; dct:references pubmed:8550102 ; dct:creator gn:investigatorGrant_morahan_gem_waimr.uwa.edu.au ; gnt:belongsToCategory "Biochemistry" ; gnt:belongsToCategory "Genetic variation and alleles" ; gnt:belongsToCategory "Physiology and function" ; \x5d; }# . ?s ?p ?o . } ``` @@ -85,7 +85,7 @@ SELECT * WHERE { Expected Result: ```rdf -gn:symbolSlc9a1 rdfs:comment [ rdf:type gnc:GNWikiEntry ; xkos:classifiedUnder gn:Mus_musculus ; rdfs:comment "Part 2 of the Slc9a1 wiki.\\r\\n\\r\\nThe human SLC9A1 gene was cloned and mapped to human chromosome 1p Lifton et al., 1990.\\r\\n\\r\\nThe mouse Slc9a1 gene maps to chromosome 4. Morahan et al., 1993. There are three common alleles of Slc9a1, originally detected by RFLP analyses. Each of these allelic SLC9A1 proteins have different levels of antiporter activity. Morahan et al. 1994 Remarkably, intracellular pH varies between strains based on their Slc9a1 alleles. McClive et al. 1996."^^xsd:string ; dct:created "2011-06-10T12:06:30"^^xsd:datetime ; dct:references pubmed:094369 ; dct:references pubmed:8016086 ; dct:references pubmed:8550102 ; dct:creator gn:investigatorGrant_morahan_gem_waimr.uwa.edu.au ; gnt:belongsToCategory "Biochemistry" ; gnt:belongsToCategory "Genetic variation and alleles" ; gnt:belongsToCategory "Physiology and function" ; ] . +gn:symbolSlc9a1 rdfs:comment [ rdf:type gnc:GNWikiEntry ; gnt:belongsToSpecies gn:Mus_musculus ; rdfs:comment "Part 2 of the Slc9a1 wiki.\\r\\n\\r\\nThe human SLC9A1 gene was cloned and mapped to human chromosome 1p Lifton et al., 1990.\\r\\n\\r\\nThe mouse Slc9a1 gene maps to chromosome 4. Morahan et al., 1993. There are three common alleles of Slc9a1, originally detected by RFLP analyses. Each of these allelic SLC9A1 proteins have different levels of antiporter activity. Morahan et al. 1994 Remarkably, intracellular pH varies between strains based on their Slc9a1 alleles. McClive et al. 1996."^^xsd:string ; dct:created "2011-06-10T12:06:30"^^xsd:datetime ; dct:references pubmed:094369 ; dct:references pubmed:8016086 ; dct:references pubmed:8550102 ; dct:creator gn:investigatorGrant_morahan_gem_waimr.uwa.edu.au ; gnt:belongsToCategory "Biochemistry" ; gnt:belongsToCategory "Genetic variation and alleles" ; gnt:belongsToCategory "Physiology and function" ; ] . ``` @@ -102,7 +102,7 @@ SELECT GeneRIF_BASIC.symbol AS GeneRIFSymbol, GeneRIF_BASIC.comment, Species.Ful The above query results to triples that have the form: ```text -gn:symbolGeneRIF_BASIC_GeneRIFSymbol_ -> rdfs:comment -> [ rdf:type gnc:NCBIWikiEntry ; rdfs:comment "GeneRIF_BASICcomment"^^xsd:string ; xkos:classifiedUnder gn:Species_speciesfullname ; skos:notation taxon:GeneRIF_BASIC(TaxonomicId) ; gnt:hasGeneId generif:GeneRIF_BASIC(GeneId) ; gnt:hasVersionId 'GeneRIF_BASIC(VersionId)'^^xsd:integer ; dct:references pubmed:GeneRIF_BASIC(PMID) ; ] +gn:symbolGeneRIF_BASIC_GeneRIFSymbol_ -> rdfs:comment -> [ rdf:type gnc:NCBIWikiEntry ; rdfs:comment "GeneRIF_BASICcomment"^^xsd:string ; gnt:belongsToSpecies gn:Species_speciesfullname ; skos:notation taxon:GeneRIF_BASIC(TaxonomicId) ; gnt:hasGeneId generif:GeneRIF_BASIC(GeneId) ; gnt:hasVersionId 'GeneRIF_BASIC(VersionId)'^^xsd:integer ; dct:references pubmed:GeneRIF_BASIC(PMID) ; ] ``` Here's an example query: @@ -123,7 +123,7 @@ PREFIX xsd: PREFIX owl: SELECT * WHERE { - ?s rdfs:comment #{\x5b; rdf:type gnc:NCBIWikiEntry ; rdfs:comment "he results demonstrate that apoM-S1P inhibits ox-LDL-induced inflammation in HUVECs via the S1PR2-mediated PI3K/Akt signaling pathway."^^xsd:string ; xkos:classifiedUnder gn:Homo_sapiens ; skos:notation taxon:9606 ; gnt:hasGeneId generif:55937 ; gnt:hasVersionId '1'^^xsd:integer ; dct:created "2019-08-03T07:43:00"^^xsd:datetime ; \x5d;}# . + ?s rdfs:comment #{\x5b; rdf:type gnc:NCBIWikiEntry ; rdfs:comment "he results demonstrate that apoM-S1P inhibits ox-LDL-induced inflammation in HUVECs via the S1PR2-mediated PI3K/Akt signaling pathway."^^xsd:string ; gnt:belongsToSpecies gn:Homo_sapiens ; skos:notation taxon:9606 ; gnt:hasGeneId generif:55937 ; gnt:hasVersionId '1'^^xsd:integer ; dct:created "2019-08-03T07:43:00"^^xsd:datetime ; \x5d;}# . ?s ?p ?o . } ``` @@ -131,6 +131,6 @@ SELECT * WHERE { Expected Result: ```rdf -gn:symbolAPOM rdfs:comment [ rdf:type gnc:NCBIWikiEntry ; rdfs:comment "he results demonstrate that apoM-S1P inhibits ox-LDL-induced inflammation in HUVECs via the S1PR2-mediated PI3K/Akt signaling pathway."^^xsd:string ; xkos:classifiedUnder gn:Homo_sapiens ; skos:notation taxon:9606 ; gnt:hasGeneId generif:55937 ; gnt:hasVersionId '1'^^xsd:integer ; dct:created "2019-08-03T07:43:00"^^xsd:datetime ; ] . +gn:symbolAPOM rdfs:comment [ rdf:type gnc:NCBIWikiEntry ; rdfs:comment "he results demonstrate that apoM-S1P inhibits ox-LDL-induced inflammation in HUVECs via the S1PR2-mediated PI3K/Akt signaling pathway."^^xsd:string ; gnt:belongsToSpecies gn:Homo_sapiens ; skos:notation taxon:9606 ; gnt:hasGeneId generif:55937 ; gnt:hasVersionId '1'^^xsd:integer ; dct:created "2019-08-03T07:43:00"^^xsd:datetime ; ] . ``` diff --git a/rdf-documentation/genotype-metadata.md b/rdf-documentation/genotype-metadata.md index 4ca1bd2..e64be09 100644 --- a/rdf-documentation/genotype-metadata.md +++ b/rdf-documentation/genotype-metadata.md @@ -13,7 +13,7 @@ The above query results to triples that have the form: ```text gn:Geno_name_ -> rdf:type -> gnc:Genotype -gn:Geno_name_ -> skos:prefLabel -> GenoName +gn:Geno_name_ -> rdfs:label -> GenoName gn:Geno_name_ -> gnt:chr -> Geno(Chr) gn:Geno_name_ -> gnt:mb -> "Mb"^^xsd:double gn:Geno_name_ -> gnt:mbMm8 -> "Mb_mm8"^^xsd:double @@ -36,11 +36,12 @@ PREFIX rdf: PREFIX rdfs: PREFIX owl: PREFIX skos: +PREFIX xkos: PREFIX xsd: SELECT * WHERE { ?s rdf:type gnc:Genotype . - ?s skos:prefLabel "D1Mit296" . + ?s rdfs:label "D1Mit296" . ?s gnt:chr "1" . ?s gnt:mb #{"9.749729"^^xsd:double}# . ?s ?p ?o . @@ -51,7 +52,7 @@ Expected Result: ```rdf gn:D1mit296 rdf:type gnc:Genotype . -gn:D1mit296 skos:prefLabel "D1Mit296" . +gn:D1mit296 rdfs:label "D1Mit296" . gn:D1mit296 gnt:chr "1" . gn:D1mit296 gnt:mb "9.749729"^^xsd:double . gn:D1mit296 gnt:mbMm8 "9.734943"^^xsd:double . diff --git a/rdf-documentation/phenotype-metadata.md b/rdf-documentation/phenotype-metadata.md index 8a7e421..d811d40 100644 --- a/rdf-documentation/phenotype-metadata.md +++ b/rdf-documentation/phenotype-metadata.md @@ -6,14 +6,14 @@ The following SQL query was executed: ```sql -SELECT CONCAT(IFNULL(InbredSet.Name, PublishXRef.InbredSetId), '_', PublishXRef.Id) AS Phenotype, InbredSet.Name AS InbredSetName, PublishXRef.Id, CONCAT(IFNULL(InbredSet.Name, PublishXRef.InbredSetId), '_', PublishXRef.Id) AS Phenotype, Phenotype.Post_publication_description, Phenotype.Post_publication_abbreviation, Phenotype.Lab_code, Phenotype.Submitter, Phenotype.Owner, IFNULL(PublishXRef.mean, '') AS mean, PublishXRef.Locus, IFNULL(PublishXRef.LRS, '') AS lrs, IFNULL(PublishXRef.additive, '') AS additive, PublishXRef.Sequence, IF(Publication.PubMed_ID IS NULL, '', CONVERT(Publication.PubMed_Id, INT)) AS pmid, Publication.Id FROM PublishXRef LEFT JOIN InbredSet ON InbredSet.InbredSetId = PublishXRef.InbredSetId LEFT JOIN Publication ON Publication.Id = PublishXRef.PublicationId LEFT JOIN Phenotype ON Phenotype.Id = PublishXRef.PhenotypeId WHERE PublishXRef.InbredSetId IN (SELECT PublishFreeze.InbredSetId FROM PublishFreeze) +SELECT CONCAT(IFNULL(InbredSet.InbredSetCode, PublishXRef.InbredSetId), '_', PublishXRef.Id) AS Phenotype, InbredSet.Name AS InbredSetName, PublishXRef.Id, CONCAT(IFNULL(InbredSet.InbredSetCode, PublishXRef.InbredSetId), '_', PublishXRef.Id) AS Phenotype, Phenotype.Post_publication_description, Phenotype.Post_publication_abbreviation, Phenotype.Lab_code, Phenotype.Submitter, Phenotype.Owner, IFNULL(PublishXRef.mean, '') AS mean, PublishXRef.Locus, IFNULL((PublishXRef.LRS/4.604), '') AS lrs, IFNULL(PublishXRef.additive, '') AS additive, PublishXRef.Sequence, IF(Publication.PubMed_ID IS NULL, '', CONVERT(Publication.PubMed_Id, INT)) AS pmid, Publication.Id AS PublicationId FROM PublishXRef LEFT JOIN InbredSet ON InbredSet.InbredSetId = PublishXRef.InbredSetId LEFT JOIN Publication ON Publication.Id = PublishXRef.PublicationId LEFT JOIN Phenotype ON Phenotype.Id = PublishXRef.PhenotypeId ``` The above query results to triples that have the form: ```text gn:traitPhenotype -> rdf:type -> gnc:Phenotype -gn:traitPhenotype -> xkos:classifiedUnder -> gn:setInbredset_inbredsetname +gn:traitPhenotype -> gnt:belongsToGroup -> gn:setInbredset_inbredsetname gn:traitPhenotype -> rdfs:label -> PublishXRef(Id) gn:traitPhenotype -> skos:altLabel -> Phenotype gn:traitPhenotype -> dct:description -> PhenotypePost_publication_description @@ -21,8 +21,8 @@ gn:traitPhenotype -> gnt:abbreviation -> Phenotype(Post_publication_abbreviation gn:traitPhenotype -> gnt:labCode -> Phenotype(Lab_code) gn:traitPhenotype -> gnt:submitter -> PhenotypeSubmitter gn:traitPhenotype -> gnt:mean -> "mean"^^xsd:double -gn:traitPhenotype -> gnt:locus -> PublishXRef(Locus) -gn:traitPhenotype -> gnt:LRS -> "lrs"^^xsd:double +gn:traitPhenotype -> gnt:locus -> gn:Publishxreflocus +gn:traitPhenotype -> gnt:lodScore -> "lrs"^^xsd:double gn:traitPhenotype -> gnt:additive -> "additive"^^xsd:double gn:traitPhenotype -> gnt:sequence -> "PublishXRef(Sequence)"^^xsd:integer gn:traitPhenotype -> dct:isReferencedBy -> pubmed:pmid @@ -35,18 +35,19 @@ PREFIX dct: PREFIX gn: PREFIX owl: PREFIX gnc: -PREFIX gnt: +PREFIX gnt: PREFIX sdmx-measure: PREFIX skos: PREFIX rdf: PREFIX rdfs: PREFIX xsd: PREFIX qb: +PREFIX xkos: PREFIX pubmed: SELECT * WHERE { ?s rdf:type gnc:Phenotype . - ?s xkos:classifiedUnder gn:setBxd . + ?s gnt:belongsToGroup gn:setBxd . ?s rdfs:label "10001" . ?s skos:altLabel "BXD_10001" . ?s ?p ?o . @@ -57,15 +58,15 @@ Expected Result: ```rdf gn:traitBxd_10001 rdf:type gnc:Phenotype . -gn:traitBxd_10001 xkos:classifiedUnder gn:setBxd . +gn:traitBxd_10001 gnt:belongsToGroup gn:setBxd . gn:traitBxd_10001 rdfs:label "10001" . gn:traitBxd_10001 skos:altLabel "BXD_10001" . gn:traitBxd_10001 dct:description "Central nervous system, morphology: Cerebellum weight, whole, bilateral in adults of both sexes [mg]" . gn:traitBxd_10001 gnt:abbreviation "CBLWT2" . gn:traitBxd_10001 gnt:submitter "robwilliams" . gn:traitBxd_10001 gnt:mean "52.13529418496525"^^xsd:double . -gn:traitBxd_10001 gnt:locus "rs48756159" . -gn:traitBxd_10001 gnt:LRS "13.4974911471087"^^xsd:double . +gn:traitBxd_10001 gnt:locus gn:Rs48756159 . +gn:traitBxd_10001 gnt:lodScore "2.9316879120566246"^^xsd:double . gn:traitBxd_10001 gnt:additive "2.39444435069444"^^xsd:double . gn:traitBxd_10001 gnt:sequence "1"^^xsd:integer . gn:traitBxd_10001 dct:isReferencedBy pubmed:11438585 . diff --git a/rdf-documentation/probeset-metadata.md b/rdf-documentation/probeset-metadata.md index cde4e43..ce51854 100644 --- a/rdf-documentation/probeset-metadata.md +++ b/rdf-documentation/probeset-metadata.md @@ -6,7 +6,7 @@ The following SQL query was executed: ```sql -SELECT IF(NULLIF(TRIM(ProbeSet.Name), '') IS NULL, '', TRIM(ProbeSet.Name)) AS ProbeSetIdName, ProbeSet.Id, ProbeSet.Name, ProbeSet.alias, IFNULL(GeneChip.Name, '') AS GeneChipName, NULLIF(TRIM(ProbeSet.TargetId), '') AS TargetId, ProbeSet.Symbol, ProbeSet.description, NULLIF(TRIM(ProbeSet.Probe_set_target_region), '') AS Probe_set_target_region, ProbeSet.Chr, IFNULL(ProbeSet.Mb, '') AS Mb, IFNULL(ProbeSet.Mb_mm8, '') AS Mb_mm8, IFNULL(ProbeSet.Mb_2016, '') AS Mb_2016, IFNULL(ProbeSet.Probe_set_specificity, '') AS Probe_set_specificity, IFNULL(ProbeSet.Probe_set_BLAT_score, '') AS Probe_set_BLAT_score, IFNULL(ProbeSet.Probe_set_Blat_Mb_start, '') AS Probe_set_Blat_Mb_start, IFNULL(ProbeSet.Probe_set_Blat_Mb_start_2016, '') AS Probe_set_Blat_Mb_start_2016, IFNULL(ProbeSet.Probe_set_Blat_Mb_end, '') AS Probe_set_Blat_Mb_end, IFNULL(ProbeSet.Probe_set_Blat_Mb_start_2016, '') AS Probe_set_Blat_Mb_start_2016, ProbeSet.BlatSeq, ProbeSet.TargetSeq, IFNULL(ProbeSet.HomoloGeneID, '') AS HomoloGeneID, IFNULL(ProbeSet.UniProtID, '') AS UniProtID, IFNULL(ProbeSet.PubChem_ID, '') AS PubChem_ID, IFNULL(ProbeSet.KEGG_ID, '') AS KEGG_ID, IFNULL(ProbeSet.OMIM, '') AS OMIM, IFNULL(ProbeSet.ChEBI_ID, '') AS ChEBI_ID FROM ProbeSet LEFT JOIN GeneChip ON GeneChip.Id = ProbeSet.ChipId +SELECT IF(NULLIF(TRIM(ProbeSet.Name), '') IS NULL, '', TRIM(ProbeSet.Name)) AS ProbeSetIdName, ProbeSet.Id, ProbeSet.Name, ProbeSet.alias, IFNULL(GeneChip.Name, '') AS GeneChipName, NULLIF(TRIM(ProbeSet.TargetId), '') AS TargetId, ProbeSet.Symbol, ProbeSet.description, NULLIF(TRIM(ProbeSet.Probe_set_target_region), '') AS Probe_set_target_region, ProbeSet.Chr, IFNULL(ProbeSet.Mb, '') AS Mb, ProbeSet.Mb, ProbeSet.Chr, ProbeSet.Strand_Probe, ProbeSet.GeneId, ProbeSet.OMIM, ProbeSet.HomoloGeneID, ProbeSet.UniProtID, ProbeSet.Symbol, ProbeSet.Symbol, ProbeSet.Symbol, ProbeSet.Symbol, ProbeSet.Symbol, Species.Name, ProbeSet.RefSeq_TranscriptId, GeneList_rn33.kgId, (GeneList.txStart * 1000000) AS TranscriptStartMm10, (GeneList_rn33.txStart * 1000000) AS TranscriptStartRn7, GeneList.Chromosome, GeneList_rn33.Chromosome, (GeneList.txEnd * 1000000) AS TranscriptEndMm10, (GeneList_rn33.txEnd * 1000000) AS TranscriptEndRn7, ProbeSet.Symbol, ProbeSet.GeneId, Species.FullName, ProbeSet.Symbol, ProbeSet.GeneId, Species.name, ProbeSet.GeneId, ProbeSet.GeneId, Species.Name, ProbeSet.Strand_Probe, IFNULL(ProbeSet.Probe_set_specificity, '') AS Probe_set_specificity, IFNULL(ProbeSet.Probe_set_BLAT_score, '') AS Probe_set_BLAT_score, IFNULL(ProbeSet.Probe_set_Blat_Mb_start, '') AS Probe_set_Blat_Mb_start, IFNULL(ProbeSet.Probe_set_Blat_Mb_end, '') AS Probe_set_Blat_Mb_end, ProbeSet.BlatSeq, ProbeSet.TargetSeq FROM ProbeSet LEFT JOIN GeneChip ON GeneChip.Id = ProbeSet.ChipId LEFT JOIN GeneList ON GeneList.GeneID = ProbeSet.GeneId LEFT JOIN GeneList_rn33 ON GeneList.geneSymbol = ProbeSet.Symbol LEFT JOIN Species ON GeneChip.SpeciesId = Species.Id ``` The above query results to triples that have the form: @@ -22,22 +22,38 @@ gn:probesetProbesetidname -> dct:description -> ProbeSetdescription gn:probesetProbesetidname -> gnt:targetsRegion -> Probe_set_target_region gn:probesetProbesetidname -> gnt:chr -> ProbeSet(Chr) gn:probesetProbesetidname -> gnt:mb -> "Mb"^^xsd:double -gn:probesetProbesetidname -> gnt:mbMm8 -> "Mb_mm8"^^xsd:double -gn:probesetProbesetidname -> gnt:mb2016 -> "Mb_2016"^^xsd:double +gn:probesetProbesetidname -> gnt:location -> Chr ProbeSet(Chr) @ ProbeSet(Mb) +gn:probesetProbesetidname -> dct:references -> . + a gnc:NCBIGeneLink +gn:probesetProbesetidname -> dct:references -> . + a gnc:omimLink +gn:probesetProbesetidname -> dct:references -> . + a gnc:homologeneLink +gn:probesetProbesetidname -> dct:references -> . + a gnc:uniprotLink +gn:probesetProbesetidname -> dct:references -> . + a gnc:stringLink +gn:probesetProbesetidname -> dct:references -> . + a gnc:gtexLink +gn:probesetProbesetidname -> dct:references -> . + a gnc:ebiGwasLink +gn:probesetProbesetidname -> dct:references -> . + a gnc:proteinAtlasLink +gn:probesetProbesetidname -> dct:references -> +gn:probesetProbesetidname -> dct:references -> . + a gnc:PantherLink +gn:probesetProbesetidname -> dct:references -> +gn:probesetProbesetidname -> dct:references -> +gn:probesetProbesetidname -> dct:references -> . + a gnc:gemmaLink +gn:probesetProbesetidname -> dct:references -> +gn:probesetProbesetidname -> gnt:strandProbe -> ProbeSet(Strand_Probe) gn:probesetProbesetidname -> gnt:hasSpecificity -> Probe_set_specificity gn:probesetProbesetidname -> gnt:hasBlatScore -> Probe_set_BLAT_score gn:probesetProbesetidname -> gnt:hasBlatMbStart -> "Probe_set_Blat_Mb_start"^^xsd:double -gn:probesetProbesetidname -> gnt:hasBlatMbStart2016 -> "Probe_set_Blat_Mb_start_2016"^^xsd:double gn:probesetProbesetidname -> gnt:hasBlatMbEnd -> "Probe_set_Blat_Mb_end"^^xsd:double -gn:probesetProbesetidname -> gnt:hasBlatMbEnd2016 -> "Probe_set_Blat_Mb_start_2016"^^xsd:double gn:probesetProbesetidname -> gnt:hasBlatSeq -> ProbeSetBlatSeq gn:probesetProbesetidname -> gnt:hasTargetSeq -> ProbeSetTargetSeq -gn:probesetProbesetidname -> gnt:hasHomologeneId -> homologene:HomoloGeneID -gn:probesetProbesetidname -> gnt:hasUniprotId -> uniprot:UniProtID -gn:probesetProbesetidname -> gnt:hasPubChemId -> pubchem:PubChem_ID -gn:probesetProbesetidname -> gnt:hasKeggId -> kegg:KEGG_ID -gn:probesetProbesetidname -> gnt:hasOmimId -> -gn:probesetProbesetidname -> gnt:hasChebiId -> chebi:ChEBI_ID ``` Here's an example query: @@ -47,15 +63,9 @@ PREFIX probeset: PREFIX gnc: PREFIX gnt: PREFIX rdf: -PREFIX kegg: -PREFIX pubchem: -PREFIX omim: PREFIX rdfs: -PREFIX uniprot: -PREFIX chebi: PREFIX dct: PREFIX owl: -PREFIX homologene: PREFIX xsd: PREFIX qb: PREFIX sdmx-measure: @@ -63,8 +73,8 @@ PREFIX skos: SELECT * WHERE { ?s rdf:type gnc:Probeset . - ?s rdfs:label "100001_at" . - ?s skos:altLabel "T3g; Ctg3; Ctg-3" . + ?s rdfs:label "100322_at" . + ?s skos:altLabel "IGHG2A; AU044919; MGC102604; MGC102659; Ighg" . ?s gnt:hasChip gn:platformMg_u74av2 . ?s ?p ?o . } @@ -73,25 +83,39 @@ SELECT * WHERE { Expected Result: ```rdf -gn:probeset100001_at rdf:type gnc:Probeset . -gn:probeset100001_at rdfs:label "100001_at" . -gn:probeset100001_at skos:altLabel "T3g; Ctg3; Ctg-3" . -gn:probeset100001_at gnt:hasChip gn:platformMg_u74av2 . -gn:probeset100001_at gnt:symbol "Cd3g" . -gn:probeset100001_at dct:description "CD3d antigen, gamma polypeptide" . -gn:probeset100001_at gnt:chr "9" . -gn:probeset100001_at gnt:mb "44.970689"^^xsd:double . -gn:probeset100001_at gnt:mbMm8 "44.721684"^^xsd:double . -gn:probeset100001_at gnt:mb2016 "44.778772"^^xsd:double . -gn:probeset100001_at gnt:hasSpecificity "9.3" . -gn:probeset100001_at gnt:hasBlatScore "186" . -gn:probeset100001_at gnt:hasBlatMbStart "44.970689"^^xsd:double . -gn:probeset100001_at gnt:hasBlatMbStart2016 "44.778772"^^xsd:double . -gn:probeset100001_at gnt:hasBlatMbEnd "44.971291"^^xsd:double . -gn:probeset100001_at gnt:hasBlatMbEnd2016 "44.778772"^^xsd:double . -gn:probeset100001_at gnt:hasBlatSeq "CTCTGTTGCAAAATGAACAGCTGTACAGCCCCTCAAGGACCGGGAATATGACCAGTACAGCCATCTCCAAGGAAACCAACTGAGGAAGAAGTGAACTCAGCAGGACTCAGGGTGTCCCCACAATGCATTTTGGAGAGAGCCCAGACTGCAAGCAGAGAGGAAGAACTGAGGAAAACAAGCACAGCGTGGTGTT" . -gn:probeset100001_at gnt:hasTargetSeq "ctctgttgcaaaatgaacagctgtaccagcccctcaaggaccgggaatatgaccagtacagccatctccaaggaaaccaactgaggaagaagtgaactcagcaggactcagggtgtccccccttntatccagcacccagaatcaaaacaatgcattttggagagagcccagtagagagattttcaaccctacaggtagactgcaagcagagaggaagaactgtcaaagaaattttggtcttttttttttttttnncaaaataaaataaaagcttggaggagccagtggtatgantnnnnnntgnancanttgtcaaccttgtttggggttnncagcaccccacccccagaccccccaaaaaaattcagtgaaggaaaacaagcacagcgtggtgtt" . -gn:probeset100001_at gnt:hasHomologeneId homologene:55 . -gn:probeset100001_at gnt:hasOmimId omim:186740 . +gn:probeset100322_at rdf:type gnc:Probeset . +gn:probeset100322_at rdfs:label "100322_at" . +gn:probeset100322_at skos:altLabel "IGHG2A; AU044919; MGC102604; MGC102659; Ighg" . +gn:probeset100322_at gnt:hasChip gn:platformMg_u74av2 . +gn:probeset100322_at gnt:symbol "Ighg" . +gn:probeset100322_at dct:description "immunoglobulin heavy chain gamma polypeptide" . +gn:probeset100322_at gnt:chr "12" . +gn:probeset100322_at gnt:mb "114.322406"^^xsd:double . +gn:probeset100322_at gnt:location "Chr 12 @ 114.322406 on the minus strand" . +gn:probeset100322_at dct:references . + a gnc:NCBIGeneLink . +gn:probeset100322_at dct:references . + a gnc:stringLink . +gn:probeset100322_at dct:references . + a gnc:gtexLink . +gn:probeset100322_at dct:references . + a gnc:ebiGwasLink . +gn:probeset100322_at dct:references . + a gnc:proteinAtlasLink . +gn:probeset100322_at dct:references . + a gnc:PantherLink . +gn:probeset100322_at dct:references . + a gnc:genemaniaLink . +gn:probeset100322_at dct:references . + Ighg . +gn:probeset100322_at dct:references . + a gnc:gemmaLink . +gn:probeset100322_at gnt:strandProbe "-" . +gn:probeset100322_at gnt:hasSpecificity "3.1" . +gn:probeset100322_at gnt:hasBlatScore "62" . +gn:probeset100322_at gnt:hasBlatMbStart "114.322406"^^xsd:double . +gn:probeset100322_at gnt:hasBlatMbEnd "114.322571"^^xsd:double . +gn:probeset100322_at gnt:hasBlatSeq "TGGTCACAGCTTTCCGCTCACGTTCACTGAAACGGGCTGATGCTGCACCAACTGTATCTTCCCACCATCCAGTAAGCTTGGGCCCGGTGGTTACTGGAACTGGATCCGGAAATTCCCAGGGAATATTACCTGCAGTTGAATTCTGTGACTACT" . +gn:probeset100322_at gnt:hasTargetSeq "tggtcacagctttccgctcacgttcggtgctgggaccaagctggaactgaaacgggctgatgctgcaccaactgtatccatcttcccaccatccagtaagcttgggcccggtgggggcnnnnngnnnngnnnnnnntnnnnnnngnnngncnnnnngnnnnncnnntcngaggtgcagcttcaggagtcaggacctngcctngnnaaaccttctcagactctgtccctcacctgttctgtcactggcnactccatcaccagtgnttactggaactggatccggaaattcccagggaataaacttgantacatgggntacataanctacagtggtnncacttactacaatccatctctcaaaagtcgaatctccatnactnnagacacatccaagaaccantattacctgcagttgaattctgtgactact" . ``` diff --git a/rdf-documentation/publication-metadata.md b/rdf-documentation/publication-metadata.md index c4262dc..be809a2 100644 --- a/rdf-documentation/publication-metadata.md +++ b/rdf-documentation/publication-metadata.md @@ -26,7 +26,7 @@ pubmed:pmid -> dct:creator -> PublicationAuthors Here's an example query: ```sparql -PREFIX gnt: +PREFIX gnt: PREFIX fabio: PREFIX dct: PREFIX prism: diff --git a/rdf-documentation/strains.md b/rdf-documentation/strains.md index 4ecb12f..87a8ce9 100644 --- a/rdf-documentation/strains.md +++ b/rdf-documentation/strains.md @@ -13,7 +13,7 @@ The above query results to triples that have the form: ```text gn:Strain_name_ -> rdf:type -> gnc:strain -gn:Strain_name_ -> xkos:classifiedUnder -> gn:Species_fullname +gn:Strain_name_ -> gnt:belongsToSpecies -> gn:Species_fullname gn:Strain_name_ -> rdfs:label -> StrainName gn:Strain_name_ -> skos:altLabel -> Name2 gn:Strain_name_ -> gnt:alias -> Alias @@ -34,7 +34,7 @@ PREFIX taxon: SELECT * WHERE { ?s rdf:type gnc:strain . - ?s xkos:classifiedUnder gn:Mus_musculus . + ?s gnt:belongsToSpecies gn:Mus_musculus . ?s rdfs:label "B6D2F1" . ?s ?p ?o . } @@ -44,7 +44,7 @@ Expected Result: ```rdf gn:B6d2f1 rdf:type gnc:strain . -gn:B6d2f1 xkos:classifiedUnder gn:Mus_musculus . +gn:B6d2f1 gnt:belongsToSpecies gn:Mus_musculus . gn:B6d2f1 rdfs:label "B6D2F1" . ``` @@ -100,14 +100,14 @@ gn:mappingMethodQtlreaper rdfs:label "qtlreaper" . The following SQL query was executed: ```sql -SELECT AvgMethod.Name, AvgMethod.Normalization FROM AvgMethod +SELECT AvgMethod.Name AS AvgMethodName, AvgMethod.Normalization FROM AvgMethod ``` The above query results to triples that have the form: ```text -gn:avgMethodAvgmethod_name -> rdf:type -> gnc:avgMethod -gn:avgMethodAvgmethod_name -> rdfs:label -> AvgMethod(Normalization) +gn:avgMethodAvgmethod_avgmethodname -> rdf:type -> gnc:avgMethod +gn:avgMethodAvgmethod_avgmethodname -> rdfs:label -> AvgMethod(Normalization) ``` Here's an example query: diff --git a/rdf-documentation/tissue-metadata.md b/rdf-documentation/tissue-metadata.md index 37145c4..61ef899 100644 --- a/rdf-documentation/tissue-metadata.md +++ b/rdf-documentation/tissue-metadata.md @@ -19,7 +19,7 @@ Here's an example query: ```sparql PREFIX gn: -PREFIX gnt: +PREFIX gnt: PREFIX skos: PREFIX gnc: PREFIX rdf: -- cgit v1.2.3