diff options
author | SoloDShelby | 2024-07-19 14:41:40 +0300 |
---|---|---|
committer | SoloDShelby | 2024-07-19 14:41:40 +0300 |
commit | 3fa31b50af2861382fbe2c76406f5a04c3fefc93 (patch) | |
tree | 34d581648b0e0d3fc8dbe6577752a4fd433a3258 /gnqa/paper1_eval/src/data/queries | |
parent | 74616897e30c7daafe5e74d34073466464921316 (diff) | |
download | gn-ai-3fa31b50af2861382fbe2c76406f5a04c3fefc93.tar.gz |
Evaluation code for paper 1
Diffstat (limited to 'gnqa/paper1_eval/src/data/queries')
4 files changed, 250 insertions, 0 deletions
diff --git a/gnqa/paper1_eval/src/data/queries/gpt4o-queries-partial.json b/gnqa/paper1_eval/src/data/queries/gpt4o-queries-partial.json new file mode 100644 index 0000000..02e7b48 --- /dev/null +++ b/gnqa/paper1_eval/src/data/queries/gpt4o-queries-partial.json @@ -0,0 +1,45 @@ +[ + { + "level": "domainexpert", + "domain": "gn", + "query": [ + "How are spatial transcriptomics approaches being integrated into GeneNetwork.org to enhance understanding of tissue architecture and function?", + "How do recent developments in quantitative trait locus (QTL) mapping refine our understanding of gene regulatory variants?", + "What are the implications of incorporating epigenomic data, such as histone modification maps, into the gene networks on GeneNetwork.org?", + "How do recent findings on 3D genome organization contribute to our understanding of functional genomic networks?", + "What are the potential applications of artificial intelligence in improving the annotation and interpretation of gene networks?", + "How has the study of genetic pleiotropy been advanced by data available on GeneNetwork.org?", + "What novel genetic pathways have been identified in GeneNetwork.org studies related to aging and lifespan?", + "How do polygenic risk scores (PRS) developed using GeneNetwork.org data enhance the prediction and prevention of complex diseases?", + "How do recent advancements in network-based integrative genomics alter our understanding of complex trait architectures?", + "What are the latest methodological improvements in evaluating gene-environment interactions using GeneNetwork.org?" + ] + }, + { + "level": "citizenscientist", + "domain": "gn", + "query": [ + "What is GeneNetwork.org, and how does it help scientists understand genetics?", + "How do researchers use GeneNetwork.org to study diseases?", + "What can GeneNetwork.org tell us about how genes interact with each other?", + "How does GeneNetwork.org help in finding the genetic causes of common diseases?", + "Can GeneNetwork.org predict my risk of developing certain health conditions based on my genes?", + "How does GeneNetwork.org make use of data from different populations around the world?", + "What kinds of genetic data are available on GeneNetwork.org?", + "How do scientists use GeneNetwork.org to study differences in gene expression?", + "Can GeneNetwork.org be used to learn about genetic influences on behavior?", + "What role does GeneNetwork.org play in personalized medicine?", + "How does the information on GeneNetwork.org help in developing new treatments for diseases?", + "What is a gene network, and why is it important for understanding genetics?", + "How do researchers identify which genes are important for certain traits using GeneNetwork.org?", + "How can GeneNetwork.org help in understanding complex traits like height or intelligence?", + "Are there any known genetic mutations that cause premature aging?", + "What are the practical applications of the research done through GeneNetwork.org?", + "How can I access and use the data available on GeneNetwork.org?", + "What are some recent discoveries made using GeneNetwork.org?", + "How do scientists ensure the accuracy of the data on GeneNetwork.org?", + "What’s the difference between looking at one gene and studying a whole gene network?", + "How can GeneNetwork.org contribute to advancements in genetic engineering?" + ] + } +] diff --git a/gnqa/paper1_eval/src/data/queries/gpt4o-queries.json b/gnqa/paper1_eval/src/data/queries/gpt4o-queries.json new file mode 100644 index 0000000..74c18b0 --- /dev/null +++ b/gnqa/paper1_eval/src/data/queries/gpt4o-queries.json @@ -0,0 +1,159 @@ +[ + { + "level": "domainexpert", + "domain": "diabetes", + "query": [ + "How do recent advancements in multi-omics approaches, including proteomics and metabolomics, contribute to our understanding of Type 2 diabetes pathogenesis?", + "What novel diabetic loci have been identified through the latest meta-analyses of large-scale genome-wide association studies (GWAS)?", + "How do epigenetic modifications, such as DNA methylation and histone modification, influence the expression of diabetes-related genes?", + "Can you elaborate on the role of the gut microbiome in modulating host genetic predispositions to diabetes?", + "How effective are machine learning algorithms in integrating genomic data to predict individual risk and progression of diabetes?", + "What are the implications of recent findings on the role of long non-coding RNAs (lncRNAs) in the regulation of insulin secretion and sensitivity?", + "How do post-translational modifications of proteins affect key signaling pathways involved in glucose homeostasis?", + "What insights have been gained from studying the genetic basis of syndromic forms of diabetes, such as Wolfram Syndrome and Alström Syndrome?", + "How do genetic and epigenetic differences between monozygotic twins discordant for diabetes inform our understanding of its etiology?", + "What potential therapeutic targets have been identified through recent studies on the interaction between genetic variants and environmental factors in diabetes development?", + "How do rare variants identified through whole-genome sequencing contribute to the heritability of Type 2 diabetes?", + "What are the latest findings on the role of non-coding RNAs in the pathogenesis of diabetes?", + "How does the interaction between multiple polygenic risk scores (PRS) improve the prediction of Type 1 and Type 2 diabetes?", + "What are the mechanistic insights into the beta-cell failure pathways gleaned from recent single-cell RNA-sequencing studies?", + "How does the epigenetic landscape of key metabolic tissues change in diabetic versus non-diabetic individuals?", + "What recent advancements have been made in leveraging CRISPR-based approaches to correct monogenic forms of diabetes in vivo?", + "How do genome-wide association studies (GWAS) integrate with multi-omics data to elucidate the complex genetic architectures of diabetes?", + "What is the impact of genomic imprinting on the susceptibility and progression of diabetes?,", + "How do longitudinal genomics studies help in understanding gene-environment interactions in diabetes onset and management?", + "How have recent integrative genomics approaches, such as the use of single-cell RNA sequencing combined with epigenomic profiling, advanced our understanding of cellular heterogeneity and gene regulatory networks in pancreatic beta cells under diabetic conditions?" + ] + }, + { + "level": "citizenscientist", + "domain": "diabetes", + "query": [ + "How do genetic mutations in the insulin gene affect glucose metabolism?", + "What are the most common genetic loci associated with an increased risk of Type 2 diabetes?", + "How does genome-wide association studies (GWAS) help in identifying diabetes-related genes?", + "What is the role of the HLA region in the genetic predisposition to Type 1 diabetes?", + "How do genetic differences contribute to variations in diabetes prevalence among different populations?", + "What is the function of the PPAR-gamma gene in diabetes, and how do its variants impact the disease?", + "How can CRISPR/Cas9 technology be used to study or treat genetic forms of diabetes?", + "What is the significance of genetic polymorphisms in the GLUT4 gene for Type 2 diabetes?", + "How do microRNAs regulate gene expression related to diabetes?", + "What insights have been gained from studying the genetic basis of MODY (Maturity Onset Diabetes of the Young)?", + "What genes are most commonly associated with an increased risk of developing diabetes?", + "How can genetic testing help predict a person's risk for diabetes?", + "What role do family genetics play in the likelihood of getting diabetes?", + "Can lifestyle changes affect genetic risk factors for diabetes?", + "What recent breakthroughs have been made in understanding the genetic causes of diabetes?", + "How do genes influence how our bodies respond to sugar and insulin?", + "Are there specific genetic markers that can indicate a higher risk for Type 1 versus Type 2 diabetes?", + "How can new gene therapies potentially cure or treat diabetes?", + "What is the difference between monogenic and polygenic diabetes?", + "How does studying the DNA of people with diabetes help scientists find better treatments or cures?" + ] + }, + { + "level":"citizenscientist", + "domain": "aging", + "query": [ + "What are the main genetic factors that influence aging?", + "How do genes affect the aging process in humans?", + "What lifestyle choices can help slow down genetic aging?", + "How do scientists study the genetics of aging in animals?", + "Are there specific genes that have been linked to longer lifespans?", + "How do telomeres affect the aging process?", + "What role does DNA repair play in aging?", + "Can genetic research lead to treatments that slow down aging?", + "How does mitochondrial DNA influence aging?", + "Are there any known genetic mutations that cause premature aging?", + "What recent discoveries have been made about the genetics of aging?", + "How do epigenetic changes affect aging?", + "What is the role of the gene FOXO3 in longevity?", + "How does the environment interact with genes to influence aging?", + "What are senescent cells and how do they contribute to aging?", + "Are there any known lifestyle interventions that can positively impact genes related to aging?", + "What is the 'epigenetic clock,' and how is it used in aging research?", + "How do researchers use model organisms like yeast or worms to study human aging?", + "Are there any promising anti-aging therapies being developed based on genetic research?", + "How do caloric restriction and diet impact the genetics of aging?" + ] + }, + { + "level":"domainexpert", + "domain":"aging", + "query": [ + "How do recent single-cell transcriptomics studies enhance our understanding of cellular heterogeneity in aging tissues?", + "What are the latest findings on the role of senescence-associated secretory phenotype (SASP) factors in age-related tissue dysfunction?", + "How do age-related changes in chromatin architecture contribute to the decline in cellular function?", + "What insights have been gained from studying the epigenetic reprogramming of aged cells to a more youthful state?", + "How do alterations in the mitochondrial genome and bioenergetics influence the aging process in humans?", + "What are the therapeutic potentials and challenges of targeting the insulin/IGF-1 signaling pathway for extending healthspan and lifespan?", + "How can the integration of proteomics and metabolomics data shed light on age-associated metabolic shifts?", + "What role do long non-coding RNAs (lncRNAs) play in the regulation of aging and age-related diseases?", + "How do recent advancements in CRISPR/Cas9 technology open new avenues for studying and potentially reversing aging?", + "What is the significance of the DNA damage response (DDR) in the context of both replicative and chronological aging?", + "How do age-dependent changes in the immune system, such as immunosenescence, contribute to increased susceptibility to diseases?", + "How do advancements in machine learning and artificial intelligence aid in the identification of biomarkers for biological aging?", + "What recent discoveries have been made regarding the impact of systemic factors, such as circulating microvesicles, on aging phenotypes?", + "How do changes in the gut microbiome composition correlate with aging and longevity?", + "What are the key molecular mechanisms through which caloric restriction exerts its lifespan-extending effects across different species?", + "How do oxidative stress and the subsequent accumulation of damaged macromolecules contribute to cellular aging?", + "How are extracellular matrix remodeling and tissue stiffness implicated in the aging process?", + "How do recent developments in autophagy research contribute to our understanding of its role in aging and longevity?", + "What are the implications of age-related shifts in stem cell niche composition and function for tissue regeneration capacity?", + "How do cross-links and advanced glycation end-products (AGEs) accumulation affect the structural integrity and function of aging tissues?" + ] + }, + { + "level": "domainexpert", + "domain": "gn", + "query": [ + "How do recent advancements in network-based integrative genomics alter our understanding of complex trait architectures?", + "What are the latest methodological improvements in evaluating gene-environment interactions using GeneNetwork.org?", + "How do multi-omics data integration techniques enhance the prediction accuracy of phenotypic traits in GeneNetwork datasets?", + "What are the computational challenges and solutions in analyzing large-scale transcriptomic data within GeneNetwork.org?", + "How has the inclusion of data from diverse populations impacted the generalizability of findings on GeneNetwork.org?", + "What novel insights have been obtained from GeneNetwork.org regarding the genetic basis of psychiatric disorders?", + "How do advancements in machine learning algorithms contribute to the deconvolution of gene expression data in complex tissues?", + "What role do enhancer-promoter interactions play in the regulation of gene networks uncovered through GeneNetwork.org?", + "How can the integration of ATAC-seq data with RNA-seq data on GeneNetwork.org inform about chromatin accessibility and gene regulation?", + "What are the latest strategies for inferring causal relationships within gene networks using data from GeneNetwork.org?", + "How do advancements in single-nucleus RNA sequencing provide more granular insights into cell-type-specific gene expression networks?", + "What impact have recent discoveries in non-coding RNA regulation had on refining gene interaction maps on GeneNetwork.org?", + "How are spatial transcriptomics approaches being integrated into GeneNetwork.org to enhance understanding of tissue architecture and function?", + "How do recent developments in quantitative trait locus (QTL) mapping refine our understanding of gene regulatory variants?", + "What are the implications of incorporating epigenomic data, such as histone modification maps, into the gene networks on GeneNetwork.org?", + "How do recent findings on 3D genome organization contribute to our understanding of functional genomic networks?", + "What are the potential applications of artificial intelligence in improving the annotation and interpretation of gene networks?", + "How has the study of genetic pleiotropy been advanced by data available on GeneNetwork.org?", + "What novel genetic pathways have been identified in GeneNetwork.org studies related to aging and lifespan?", + "How do polygenic risk scores (PRS) developed using GeneNetwork.org data enhance the prediction and prevention of complex diseases?" + ] + }, + { + "level": "citizenscientist", + "domain": "gn", + "query": [ + "What is GeneNetwork.org, and how does it help scientists understand genetics?", + "How do researchers use GeneNetwork.org to study diseases?", + "What can GeneNetwork.org tell us about how genes interact with each other?", + "How does GeneNetwork.org help in finding the genetic causes of common diseases?", + "Can GeneNetwork.org predict my risk of developing certain health conditions based on my genes?", + "How does GeneNetwork.org make use of data from different populations around the world?", + "What kinds of genetic data are available on GeneNetwork.org?", + "How do scientists use GeneNetwork.org to study differences in gene expression?", + "Can GeneNetwork.org be used to learn about genetic influences on behavior?", + "What role does GeneNetwork.org play in personalized medicine?", + "How does the information on GeneNetwork.org help in developing new treatments for diseases?", + "What is a gene network, and why is it important for understanding genetics?", + "How do researchers identify which genes are important for certain traits using GeneNetwork.org?", + "How can GeneNetwork.org help in understanding complex traits like height or intelligence?", + "Are there any known genetic mutations that cause premature aging?", + "What are the practical applications of the research done through GeneNetwork.org?", + "How can I access and use the data available on GeneNetwork.org?", + "What are some recent discoveries made using GeneNetwork.org?", + "How do scientists ensure the accuracy of the data on GeneNetwork.org?", + "What’s the difference between looking at one gene and studying a whole gene network?", + "How can GeneNetwork.org contribute to advancements in genetic engineering?" + ] + } +] diff --git a/gnqa/paper1_eval/src/data/queries/query_generation_prompt.md b/gnqa/paper1_eval/src/data/queries/query_generation_prompt.md new file mode 100644 index 0000000..5b09832 --- /dev/null +++ b/gnqa/paper1_eval/src/data/queries/query_generation_prompt.md @@ -0,0 +1,14 @@ +# OpenAI gpt4o Query Generation prompt + +## System settings +There is a retrieval augmented generation system, called GNQA, that holds a corpus of 3000 research documents. The documents span the topics of research related to genenetwork.org, research about the genetics and genomics of diabetes and aging. The systems topics will be referred to as GN, aging, and sugah. Two types of individuals query GNQA, citizen scientists and domain experts. A citizen scientist is someone with no more than undergraduate level understanding of biology and is someone who did not major or minor in biology. A domain expert has studied advanced biology and has a graduate degree in a type of biology or majored in biology for undergraduate school. + +## User messages + +Generate 20 questions, for GNQA, about research on GN from the perspective of a citizen scientist. +Generate 20 questions, for GNQA, about research on GN from the perspective of a domain expert. +Generate 20 questions, for GNQA, about research on aging from the perspective of a domain expert. +Generate 20 questions, for GNQA, about research on aging from the perspective of a citizen scientist. +Generate 20 questions, for GNQA, about research on sugah from the perspective of domain expert. +Generate 20 questions, for GNQA, about research on sugah from the perspective of citizen scientist. + diff --git a/gnqa/paper1_eval/src/data/queries/voluteer_queries.json b/gnqa/paper1_eval/src/data/queries/voluteer_queries.json new file mode 100644 index 0000000..d855140 --- /dev/null +++ b/gnqa/paper1_eval/src/data/queries/voluteer_queries.json @@ -0,0 +1,32 @@ +"stuff_a": { + "level": "domainexpert", + "domain": "gn", + "query": [ + "What is ensembl", + "Which database can I use for genetic, genomics, phenotype, and disease-related data generated from rat research?", + "What is RGD?", + "What resources can I use to do pathway analyses?", + "Which genes give a predisposition to developing T1D?" + ], + "task_id": [ + "7C028B1D0013EA11574B094986ABE4C2", + "55562016699AFE4B8AD9A7F29A806CB5", + "C9B1B98F9207B79EBBC98790A769CB51", + "242918F32291CC085DEB319A7EE3284B", + "029A427CEEBABE644F12EE390469B134" + ] +}, +"stuff_b": { + "level": "domainexpert", + "domain": "diabetes", + "query": [ + "Is any of the genes SH2B3, IFIH1 or ERBB3 related to diabetes?", + "How can I use genenetwork to find genes related with diabetes in humans?", + "How can I use the GeneNetwork tool to find genes related with diabetes in humans?" + ], + "task_id": [ + "173BE1F01E4A6074A0EBB7CC6137AC8D", + "F55160D302C899B2131AA8502A640684", + "B52AF52D46499DE2B98933F1786EC9E0" + ] +} |