gn-gemtext - GeneNetwork gemtext knowledge base and issue tracker (mirror right now)

diff options


context:
space:
mode:

Diffstat (limited to 'issues/fetch_trait_data_using_genofiles.gmi')

-rw-r--r--

issues/fetch_trait_data_using_genofiles.gmi

199

1 files changed, 195 insertions, 4 deletions

diff --git a/issues/fetch_trait_data_using_genofiles.gmi b/issues/fetch_trait_data_using_genofiles.gmi
index 633d15e..1108166 100644
--- a/issues/fetch_trait_data_using_genofiles.gmi
+++ b/issues/fetch_trait_data_using_genofiles.gmi

@@ -11,6 +11,8 @@

## Notes

since Genenetwork database does not have all genotype files when fetching sample data use genotypes to fetch trat data given a dataset and the trait

+Having fetched the sample names of a given group from the genofiles use

+that to fetch the sample values + CaseAttributes

example of this

@@ -19,15 +21,204 @@ example of this

DATASET_NAME:HC_M2_0606_P

TRAIT: 1454998_at

GROUP: BXD

//expected results are[id,strain,Value, CaseAttributes data()] using the genofiles

```

-[] indentify all required fields for this

-[] code implementation

-[] handling null values

+### TODOS

+[x] indentify all required fields for this

+[x] code implementation

+[ ] handling null values

+Documented for Probeset,Genoset and PublishData

+### Publish Dataset

+Example for phenotype fetching data for phenotype 35

+```sql

+SELECT A.Sample, A.Value, A.SE, A.Count, B.CaseAttributes FROM

+(SELECT DISTINCT st.Name as 'Sample', ifnull(pd.value, 'x') as 'Value',

+ifnull(ps.error, 'x') as 'SE', ifnull(ns.count, 'x') as 'Count', ps.StrainId as 'StrainId'

+FROM PublishFreeze pf JOIN PublishXRef px ON px.InbredSetId = pf.InbredSetId

+JOIN PublishData pd ON pd.Id = px.DataId JOIN Strain st ON pd.StrainId = st.Id

+LEFT JOIN PublishSE ps ON ps.DataId = pd.Id AND ps.StrainId = pd.StrainId

+LEFT JOIN NStrain ns ON ns.DataId = pd.Id AND ns.StrainId = pd.StrainId

+WHERE px.PhenotypeId = 35 ORDER BY st.Name) A

+JOIN

+(SELECT cxref.StrainId as StrainId, group_concat(ca.Name, '=', cxref.Value) as "CaseAttributes"

+FROM CaseAttributeXRefNew cxref LEFT JOIN CaseAttribute ca

+ON ca.Id = cxref.CaseAttributeId

+GROUP BY InbredSetId, cxref.StrainId) B ON A.StrainId = B.StrainId

+LIMIT 5;

+//results

++--------+-----------+------+-------+----------------------------------------------------------------+

++--------+-----------+------+-------+----------------------------------------------------------------+

+| BXD1 | 18.700001 | 1.6 | x | Status=Live,Strain=BXD1,RRID=JAX:000036,Epoch=1,SeqCvge=36+39L |

+| BXD11 | 18.900000 | 0.78 | x | Status=UTHSC,Strain=BXD11,RRID=JAX:000012,Epoch=1,SeqCvge=42 |

+| BXD12 | 16.000000 | 0.86 | x | Status=Live,Strain=BXD12,RRID=JAX:000045,Epoch=1,SeqCvge=43 |

++--------+-----------+------+-------+----------------------------------------------------------------+

+fetching specific trait sample data and case attributes for Publish using the parsed genofiles

+``` sql

+SELECT

+Strain.Name, GenoData.value, GenoSE.error, GenoData.Id

+FROM

+(GenoData, GenoFreeze, Strain, Geno, GenoXRef)

+left join GenoSE on

+(GenoSE.DataId = GenoData.Id AND GenoSE.StrainId = GenoData.StrainId)

+WHERE

+Geno.SpeciesId = %s AND Geno.Name = '%s' AND GenoXRef.GenoId = Geno.Id AND

+GenoXRef.GenoFreezeId = GenoFreeze.Id AND

+GenoFreeze.Name = '%s' AND

+GenoXRef.DataId = GenoData.Id AND

+GenoData.StrainId = Strain.Id

+Order BY

+Strain.Name

+```

+### Probeset Dataset

+Dataset:Exon,

+Dataset_id:206

+trait:4336695

+```sql

+SELECT Strain.Name, ProbeSetData.value, ProbeSetSE.error, NStrain.count, Strain.Name2,B.CaseAttributes

+FROM

+(ProbeSetData, ProbeSetFreeze,

+ Strain, ProbeSet, ProbeSetXRef)

+left join ProbeSetSE on

+(ProbeSetSE.DataId = ProbeSetData.Id AND ProbeSetSE.StrainId = ProbeSetData.StrainId)

+left join NStrain on

+(NStrain.DataId = ProbeSetData.Id AND

+NStrain.StrainId = ProbeSetData.StrainId)

+left join

+(SELECT cxref.StrainId as StrainId, group_concat(ca.Name, '=', cxref.Value) as "CaseAttributes"

+FROM CaseAttributeXRefNew cxref LEFT JOIN CaseAttribute ca

+ON ca.Id = cxref.CaseAttributeId

+GROUP BY InbredSetId, cxref.StrainId) B ON ProbeSetData.StrainId= B.StrainId

+WHERE

+ProbeSet.Name = '4336695' AND ProbeSetXRef.ProbeSetId = ProbeSet.Id AND

+ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND

+ProbeSetFreeze.Name = 'UMUTAffyExon_0209_RMA' AND

+ProbeSetXRef.DataId = ProbeSetData.Id AND

+ProbeSetData.StrainId = Strain.Id

+Order BY

+Strain.Name;

+//results

++--------------+---------+----------+-------+--------------+-----------------------------------------------------------------+

++--------------+---------+----------+-------+--------------+-----------------------------------------------------------------+

+| 129S1/SvImJ | 5.83203 | 0.26209 | NULL | 129S1/SvImJ | NULL |

+| A/J | 6.15107 | 0.013283 | NULL | A/J | NULL |

+| AKR/J | 6.2494 | 0.01774 | NULL | AKR/J | NULL |

+| B6D2F1 | 6.50013 | 0.175722 | NULL | B6D2F1 | Status=Live,Strain=B6D2F1,RRID=JAX:100006,Epoch=0,SeqCvge= |

+```

+Fetch specific traits

+```sql

+SELECT

+Strain.Name, ProbeSetData.value, ProbeSetSE.error, ProbeSetData.Id

+FROM

+(ProbeSetData, ProbeSetFreeze, Strain, ProbeSet, ProbeSetXRef)

+left join ProbeSetSE on

+(ProbeSetSE.DataId = ProbeSetData.Id AND ProbeSetSE.StrainId = ProbeSetData.StrainId)

+WHERE

+ProbeSet.Name = '%s' AND ProbeSetXRef.ProbeSetId = ProbeSet.Id AND

+ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND

+ProbeSetFreeze.Name = '%s' AND

+ProbeSetXRef.DataId = ProbeSetData.Id AND

+ProbeSetData.StrainId = Strain.Id

+Order BY

+Strain.Name

+```

+for count

+```sql

+SELECT COUNT(*) QUERY_STATEMENT

+```

+### Genotype

+to fetch trait and dataset samples + case attributes

+``` sql

+SELECT

+ Strain.Name, GenoData.value, GenoSE.error, GenoData.Id

+FROM (GenoData, GenoFreeze, Strain, GenoXRef)

+left join GenoSE on

+GenoSE.DataId = GenoData.Id AND GenoSE.StrainId = GenoData.StrainId

+WHERE

+ GenoXRef.GenoFreezeId = GenoFreeze.Id AND

+ GenoXRef.DataId = GenoData.Id AND

+ GenoData.StrainId = Strain.Id

+JOIN

+(SELECT cxref.StrainId as StrainId, group_concat(ca.Name, '=', cxref.Value) as "CaseAttributes"

+FROM CaseAttributeXRefNew cxref LEFT JOIN CaseAttribute ca

+ON ca.Id = cxref.CaseAttributeId

+GROUP BY InbredSetId, cxref.StrainId) B ON GenoData.StrainId = B.StrainId

+LIMIT 5;

+```

+individual trait

+```

+ SELECT

+ Strain.Name, GenoData.value, GenoSE.error, GenoData.Id

+ FROM

+ (GenoData, GenoFreeze, Strain, Geno, GenoXRef)

+ left join GenoSE on

+ (GenoSE.DataId = GenoData.Id AND GenoSE.StrainId = GenoData.StrainId)

+ WHERE

+ Geno.SpeciesId = %s AND Geno.Name = '%s' AND GenoXRef.GenoId = Geno.Id AND

+ GenoXRef.GenoFreezeId = GenoFreeze.Id AND

+ GenoFreeze.Name = '%s' AND

+ GenoXRef.DataId = GenoData.Id AND

+ GenoData.StrainId = Strain.Id

+ Order BY

+ Strain.Name

+```

+### general fetching case attributes

+sql code for fetching case attributes

+```sql

+SELECT DISTINCT CaseAttribute.Id, CaseAttribute.Name, CaseAttribute.Description, CaseAttributeXRefNew.Value

+FROM CaseAttribute, CaseAttributeXRefNew

+WHERE CaseAttributeXRefNew.CaseAttributeId = CaseAttribute.Id

+ AND CaseAttributeXRefNew.InbredSetId = %s

+ ORDER BY CaseAttribute.Id'''

+%s -> requires id for the group for example BXD->1

+```

+### fetch extra attributes with sample names

+```sql

+SELECT Strain.Name AS SampleName, CaseAttributeId AS Id, CaseAttributeXRefNew.Value

+FROM Strain, StrainXRef, InbredSet, CaseAttributeXRefNew

+WHERE StrainXRef.StrainId = Strain.Id

+AND InbredSet.Id = StrainXRef.InbredSetId

+AND CaseAttributeXRefNew.StrainId = Strain.Id

+AND InbredSet.Id = CaseAttributeXRefNew.InbredSetId

+AND CaseAttributeXRefNew.InbredSetId = 1

+ORDER BY SampleName

+fetch sample case attributes for bxd

+```