diff options
-rw-r--r-- | issues/fetch_trait_data_using_genofiles.gmi | 199 |
1 files changed, 195 insertions, 4 deletions
diff --git a/issues/fetch_trait_data_using_genofiles.gmi b/issues/fetch_trait_data_using_genofiles.gmi index 633d15e..1108166 100644 --- a/issues/fetch_trait_data_using_genofiles.gmi +++ b/issues/fetch_trait_data_using_genofiles.gmi @@ -11,6 +11,8 @@ ## Notes since Genenetwork database does not have all genotype files when fetching sample data use genotypes to fetch trat data given a dataset and the trait +Having fetched the sample names of a given group from the genofiles use +that to fetch the sample values + CaseAttributes example of this @@ -19,15 +21,204 @@ example of this DATASET_NAME:HC_M2_0606_P TRAIT: 1454998_at GROUP: BXD - //expected results are[id,strain,Value, CaseAttributes data()] using the genofiles ``` -[] indentify all required fields for this -[] code implementation -[] handling null values +### TODOS + +[x] indentify all required fields for this +[x] code implementation +[ ] handling null values + + +Documented for Probeset,Genoset and PublishData + +### Publish Dataset + +Example for phenotype fetching data for phenotype 35 +```sql +SELECT A.Sample, A.Value, A.SE, A.Count, B.CaseAttributes FROM +(SELECT DISTINCT st.Name as 'Sample', ifnull(pd.value, 'x') as 'Value', +ifnull(ps.error, 'x') as 'SE', ifnull(ns.count, 'x') as 'Count', ps.StrainId as 'StrainId' +FROM PublishFreeze pf JOIN PublishXRef px ON px.InbredSetId = pf.InbredSetId +JOIN PublishData pd ON pd.Id = px.DataId JOIN Strain st ON pd.StrainId = st.Id +LEFT JOIN PublishSE ps ON ps.DataId = pd.Id AND ps.StrainId = pd.StrainId +LEFT JOIN NStrain ns ON ns.DataId = pd.Id AND ns.StrainId = pd.StrainId +WHERE px.PhenotypeId = 35 ORDER BY st.Name) A +JOIN +(SELECT cxref.StrainId as StrainId, group_concat(ca.Name, '=', cxref.Value) as "CaseAttributes" +FROM CaseAttributeXRefNew cxref LEFT JOIN CaseAttribute ca +ON ca.Id = cxref.CaseAttributeId +GROUP BY InbredSetId, cxref.StrainId) B ON A.StrainId = B.StrainId +LIMIT 5; +//results ++--------+-----------+------+-------+----------------------------------------------------------------+ +| Sample | Value | SE | Count | CaseAttributes | ++--------+-----------+------+-------+----------------------------------------------------------------+ +| BXD1 | 18.700001 | 1.6 | x | Status=Live,Strain=BXD1,RRID=JAX:000036,Epoch=1,SeqCvge=36+39L | +| BXD11 | 18.900000 | 0.78 | x | Status=UTHSC,Strain=BXD11,RRID=JAX:000012,Epoch=1,SeqCvge=42 | +| BXD12 | 16.000000 | 0.86 | x | Status=Live,Strain=BXD12,RRID=JAX:000045,Epoch=1,SeqCvge=43 | ++--------+-----------+------+-------+----------------------------------------------------------------+ + + +fetching specific trait sample data and case attributes for Publish using the parsed genofiles + +``` sql + +SELECT +Strain.Name, GenoData.value, GenoSE.error, GenoData.Id +FROM +(GenoData, GenoFreeze, Strain, Geno, GenoXRef) +left join GenoSE on +(GenoSE.DataId = GenoData.Id AND GenoSE.StrainId = GenoData.StrainId) +WHERE +Geno.SpeciesId = %s AND Geno.Name = '%s' AND GenoXRef.GenoId = Geno.Id AND +GenoXRef.GenoFreezeId = GenoFreeze.Id AND +GenoFreeze.Name = '%s' AND +GenoXRef.DataId = GenoData.Id AND +GenoData.StrainId = Strain.Id +Order BY +Strain.Name +``` + +### Probeset Dataset +Dataset:Exon, +Dataset_id:206 +trait:4336695 +```sql + +SELECT Strain.Name, ProbeSetData.value, ProbeSetSE.error, NStrain.count, Strain.Name2,B.CaseAttributes +FROM +(ProbeSetData, ProbeSetFreeze, + Strain, ProbeSet, ProbeSetXRef) +left join ProbeSetSE on +(ProbeSetSE.DataId = ProbeSetData.Id AND ProbeSetSE.StrainId = ProbeSetData.StrainId) +left join NStrain on +(NStrain.DataId = ProbeSetData.Id AND +NStrain.StrainId = ProbeSetData.StrainId) +left join +(SELECT cxref.StrainId as StrainId, group_concat(ca.Name, '=', cxref.Value) as "CaseAttributes" +FROM CaseAttributeXRefNew cxref LEFT JOIN CaseAttribute ca +ON ca.Id = cxref.CaseAttributeId +GROUP BY InbredSetId, cxref.StrainId) B ON ProbeSetData.StrainId= B.StrainId +WHERE +ProbeSet.Name = '4336695' AND ProbeSetXRef.ProbeSetId = ProbeSet.Id AND +ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND +ProbeSetFreeze.Name = 'UMUTAffyExon_0209_RMA' AND +ProbeSetXRef.DataId = ProbeSetData.Id AND +ProbeSetData.StrainId = Strain.Id +Order BY +Strain.Name; +//results + ++--------------+---------+----------+-------+--------------+-----------------------------------------------------------------+ +| Name | value | error | count | Name2 | CaseAttributes | ++--------------+---------+----------+-------+--------------+-----------------------------------------------------------------+ +| 129S1/SvImJ | 5.83203 | 0.26209 | NULL | 129S1/SvImJ | NULL | +| A/J | 6.15107 | 0.013283 | NULL | A/J | NULL | +| AKR/J | 6.2494 | 0.01774 | NULL | AKR/J | NULL | +| B6D2F1 | 6.50013 | 0.175722 | NULL | B6D2F1 | Status=Live,Strain=B6D2F1,RRID=JAX:100006,Epoch=0,SeqCvge= | +``` +Fetch specific traits +```sql + +SELECT +Strain.Name, ProbeSetData.value, ProbeSetSE.error, ProbeSetData.Id +FROM +(ProbeSetData, ProbeSetFreeze, Strain, ProbeSet, ProbeSetXRef) +left join ProbeSetSE on +(ProbeSetSE.DataId = ProbeSetData.Id AND ProbeSetSE.StrainId = ProbeSetData.StrainId) +WHERE +ProbeSet.Name = '%s' AND ProbeSetXRef.ProbeSetId = ProbeSet.Id AND +ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND +ProbeSetFreeze.Name = '%s' AND +ProbeSetXRef.DataId = ProbeSetData.Id AND +ProbeSetData.StrainId = Strain.Id +Order BY +Strain.Name + +``` + + +``` +for count +```sql +SELECT COUNT(*) QUERY_STATEMENT +``` + +### Genotype + +to fetch trait and dataset samples + case attributes +``` sql +SELECT + Strain.Name, GenoData.value, GenoSE.error, GenoData.Id +FROM (GenoData, GenoFreeze, Strain, GenoXRef) +left join GenoSE on +GenoSE.DataId = GenoData.Id AND GenoSE.StrainId = GenoData.StrainId +WHERE + GenoXRef.GenoFreezeId = GenoFreeze.Id AND + GenoXRef.DataId = GenoData.Id AND + GenoData.StrainId = Strain.Id +JOIN +(SELECT cxref.StrainId as StrainId, group_concat(ca.Name, '=', cxref.Value) as "CaseAttributes" +FROM CaseAttributeXRefNew cxref LEFT JOIN CaseAttribute ca +ON ca.Id = cxref.CaseAttributeId +GROUP BY InbredSetId, cxref.StrainId) B ON GenoData.StrainId = B.StrainId +LIMIT 5; +``` + +individual trait +``` + + SELECT + Strain.Name, GenoData.value, GenoSE.error, GenoData.Id + FROM + (GenoData, GenoFreeze, Strain, Geno, GenoXRef) + left join GenoSE on + (GenoSE.DataId = GenoData.Id AND GenoSE.StrainId = GenoData.StrainId) + WHERE + Geno.SpeciesId = %s AND Geno.Name = '%s' AND GenoXRef.GenoId = Geno.Id AND + GenoXRef.GenoFreezeId = GenoFreeze.Id AND + GenoFreeze.Name = '%s' AND + GenoXRef.DataId = GenoData.Id AND + GenoData.StrainId = Strain.Id + Order BY + Strain.Name + +``` + + +### general fetching case attributes + +sql code for fetching case attributes + +```sql +SELECT DISTINCT CaseAttribute.Id, CaseAttribute.Name, CaseAttribute.Description, CaseAttributeXRefNew.Value +FROM CaseAttribute, CaseAttributeXRefNew +WHERE CaseAttributeXRefNew.CaseAttributeId = CaseAttribute.Id + AND CaseAttributeXRefNew.InbredSetId = %s + ORDER BY CaseAttribute.Id''' + +%s -> requires id for the group for example BXD->1 +``` + +### fetch extra attributes with sample names + +```sql +SELECT Strain.Name AS SampleName, CaseAttributeId AS Id, CaseAttributeXRefNew.Value +FROM Strain, StrainXRef, InbredSet, CaseAttributeXRefNew +WHERE StrainXRef.StrainId = Strain.Id +AND InbredSet.Id = StrainXRef.InbredSetId +AND CaseAttributeXRefNew.StrainId = Strain.Id +AND InbredSet.Id = CaseAttributeXRefNew.InbredSetId +AND CaseAttributeXRefNew.InbredSetId = 1 +ORDER BY SampleName + +fetch sample case attributes for bxd + +``` |