From 835e229909e9bdb6e084c5112672065886517adb Mon Sep 17 00:00:00 2001 From: Nyeusi D. Shebes Date: Thu, 27 Feb 2025 22:04:47 -0600 Subject: refactoring codebase --- gnqa/paper2_eval/README.md | 27 ++++ gnqa/paper2_eval/data/gpt4o-queries.json | 159 --------------------- gnqa/paper2_eval/data/human-questions.json | 172 ----------------------- gnqa/paper2_eval/data/lists/gpt4o-queries.json | 159 +++++++++++++++++++++ gnqa/paper2_eval/data/lists/human-questions.json | 172 +++++++++++++++++++++++ 5 files changed, 358 insertions(+), 331 deletions(-) delete mode 100644 gnqa/paper2_eval/data/gpt4o-queries.json delete mode 100644 gnqa/paper2_eval/data/human-questions.json create mode 100644 gnqa/paper2_eval/data/lists/gpt4o-queries.json create mode 100644 gnqa/paper2_eval/data/lists/human-questions.json (limited to 'gnqa/paper2_eval') diff --git a/gnqa/paper2_eval/README.md b/gnqa/paper2_eval/README.md index 13cb113..8dff6f6 100644 --- a/gnqa/paper2_eval/README.md +++ b/gnqa/paper2_eval/README.md @@ -4,3 +4,30 @@ This directory contains the code created to evaluate questions submitted to GNQA. Unlike the evaluation in paper 1, this work uses different LLMs and a different RAG engine. RAGAS is still used to evaluate the queries. + +The RAG engine being used is [R2R](https://github.com/SciPhi-AI/R2R). It is open source and has performance similar to the engine we used for our 1st GNQA paper. + +The evaluation workflow is organized around reading questions that can be organized with two sets of categories, e.g. category 1 - who asked the questions, category 2 - the field to which the question belongs. +In our initial work our category 1 consists of citizen scientists and domain experts. +While category 2 consists of three fields or specializations: Genenetwork.org systems genetics, the genetics of diabetes and the genetics of aging. + +We will have make the code more configurable by pulling the categories out of the source code and keeping them strictly in settings files. + +It is best to define a structure for your different types of data: sets, lists, responses, and scores. + +| File Operator | From directory | To directory | command | +|:---:|---:|---:|:--| +| create_dataset | list | dataset | python create_dataset.py \ +| | | |     ../data/lists/list_catA_catB.json \ | +| | | |     ../data/dataset/catA_catB.json | +| run_questions | list | responses | +| | | |     ../data/list/catA_question_list.json \ | +| | | |     ../data/responses/resp_catA_catB.json | +| parse_r2r_result | responses | dataset | | +| | | |     ../data/responses/resp_catA_catB.json \ | +| | | |     ../data/dataset/intermediate_files/catA_catB_.json | +| ragas_eval | dataset | scores | python3 ragas_eval.py \ | +| | | |     ../data/datasets/catA/catB_1.json \ | +| | | |     ../data/scores/catA/catB_1.json \ | +| | | |     3 # run evaluation 3 times | + \ No newline at end of file diff --git a/gnqa/paper2_eval/data/gpt4o-queries.json b/gnqa/paper2_eval/data/gpt4o-queries.json deleted file mode 100644 index 74c18b0..0000000 --- a/gnqa/paper2_eval/data/gpt4o-queries.json +++ /dev/null @@ -1,159 +0,0 @@ -[ - { - "level": "domainexpert", - "domain": "diabetes", - "query": [ - "How do recent advancements in multi-omics approaches, including proteomics and metabolomics, contribute to our understanding of Type 2 diabetes pathogenesis?", - "What novel diabetic loci have been identified through the latest meta-analyses of large-scale genome-wide association studies (GWAS)?", - "How do epigenetic modifications, such as DNA methylation and histone modification, influence the expression of diabetes-related genes?", - "Can you elaborate on the role of the gut microbiome in modulating host genetic predispositions to diabetes?", - "How effective are machine learning algorithms in integrating genomic data to predict individual risk and progression of diabetes?", - "What are the implications of recent findings on the role of long non-coding RNAs (lncRNAs) in the regulation of insulin secretion and sensitivity?", - "How do post-translational modifications of proteins affect key signaling pathways involved in glucose homeostasis?", - "What insights have been gained from studying the genetic basis of syndromic forms of diabetes, such as Wolfram Syndrome and Alström Syndrome?", - "How do genetic and epigenetic differences between monozygotic twins discordant for diabetes inform our understanding of its etiology?", - "What potential therapeutic targets have been identified through recent studies on the interaction between genetic variants and environmental factors in diabetes development?", - "How do rare variants identified through whole-genome sequencing contribute to the heritability of Type 2 diabetes?", - "What are the latest findings on the role of non-coding RNAs in the pathogenesis of diabetes?", - "How does the interaction between multiple polygenic risk scores (PRS) improve the prediction of Type 1 and Type 2 diabetes?", - "What are the mechanistic insights into the beta-cell failure pathways gleaned from recent single-cell RNA-sequencing studies?", - "How does the epigenetic landscape of key metabolic tissues change in diabetic versus non-diabetic individuals?", - "What recent advancements have been made in leveraging CRISPR-based approaches to correct monogenic forms of diabetes in vivo?", - "How do genome-wide association studies (GWAS) integrate with multi-omics data to elucidate the complex genetic architectures of diabetes?", - "What is the impact of genomic imprinting on the susceptibility and progression of diabetes?,", - "How do longitudinal genomics studies help in understanding gene-environment interactions in diabetes onset and management?", - "How have recent integrative genomics approaches, such as the use of single-cell RNA sequencing combined with epigenomic profiling, advanced our understanding of cellular heterogeneity and gene regulatory networks in pancreatic beta cells under diabetic conditions?" - ] - }, - { - "level": "citizenscientist", - "domain": "diabetes", - "query": [ - "How do genetic mutations in the insulin gene affect glucose metabolism?", - "What are the most common genetic loci associated with an increased risk of Type 2 diabetes?", - "How does genome-wide association studies (GWAS) help in identifying diabetes-related genes?", - "What is the role of the HLA region in the genetic predisposition to Type 1 diabetes?", - "How do genetic differences contribute to variations in diabetes prevalence among different populations?", - "What is the function of the PPAR-gamma gene in diabetes, and how do its variants impact the disease?", - "How can CRISPR/Cas9 technology be used to study or treat genetic forms of diabetes?", - "What is the significance of genetic polymorphisms in the GLUT4 gene for Type 2 diabetes?", - "How do microRNAs regulate gene expression related to diabetes?", - "What insights have been gained from studying the genetic basis of MODY (Maturity Onset Diabetes of the Young)?", - "What genes are most commonly associated with an increased risk of developing diabetes?", - "How can genetic testing help predict a person's risk for diabetes?", - "What role do family genetics play in the likelihood of getting diabetes?", - "Can lifestyle changes affect genetic risk factors for diabetes?", - "What recent breakthroughs have been made in understanding the genetic causes of diabetes?", - "How do genes influence how our bodies respond to sugar and insulin?", - "Are there specific genetic markers that can indicate a higher risk for Type 1 versus Type 2 diabetes?", - "How can new gene therapies potentially cure or treat diabetes?", - "What is the difference between monogenic and polygenic diabetes?", - "How does studying the DNA of people with diabetes help scientists find better treatments or cures?" - ] - }, - { - "level":"citizenscientist", - "domain": "aging", - "query": [ - "What are the main genetic factors that influence aging?", - "How do genes affect the aging process in humans?", - "What lifestyle choices can help slow down genetic aging?", - "How do scientists study the genetics of aging in animals?", - "Are there specific genes that have been linked to longer lifespans?", - "How do telomeres affect the aging process?", - "What role does DNA repair play in aging?", - "Can genetic research lead to treatments that slow down aging?", - "How does mitochondrial DNA influence aging?", - "Are there any known genetic mutations that cause premature aging?", - "What recent discoveries have been made about the genetics of aging?", - "How do epigenetic changes affect aging?", - "What is the role of the gene FOXO3 in longevity?", - "How does the environment interact with genes to influence aging?", - "What are senescent cells and how do they contribute to aging?", - "Are there any known lifestyle interventions that can positively impact genes related to aging?", - "What is the 'epigenetic clock,' and how is it used in aging research?", - "How do researchers use model organisms like yeast or worms to study human aging?", - "Are there any promising anti-aging therapies being developed based on genetic research?", - "How do caloric restriction and diet impact the genetics of aging?" - ] - }, - { - "level":"domainexpert", - "domain":"aging", - "query": [ - "How do recent single-cell transcriptomics studies enhance our understanding of cellular heterogeneity in aging tissues?", - "What are the latest findings on the role of senescence-associated secretory phenotype (SASP) factors in age-related tissue dysfunction?", - "How do age-related changes in chromatin architecture contribute to the decline in cellular function?", - "What insights have been gained from studying the epigenetic reprogramming of aged cells to a more youthful state?", - "How do alterations in the mitochondrial genome and bioenergetics influence the aging process in humans?", - "What are the therapeutic potentials and challenges of targeting the insulin/IGF-1 signaling pathway for extending healthspan and lifespan?", - "How can the integration of proteomics and metabolomics data shed light on age-associated metabolic shifts?", - "What role do long non-coding RNAs (lncRNAs) play in the regulation of aging and age-related diseases?", - "How do recent advancements in CRISPR/Cas9 technology open new avenues for studying and potentially reversing aging?", - "What is the significance of the DNA damage response (DDR) in the context of both replicative and chronological aging?", - "How do age-dependent changes in the immune system, such as immunosenescence, contribute to increased susceptibility to diseases?", - "How do advancements in machine learning and artificial intelligence aid in the identification of biomarkers for biological aging?", - "What recent discoveries have been made regarding the impact of systemic factors, such as circulating microvesicles, on aging phenotypes?", - "How do changes in the gut microbiome composition correlate with aging and longevity?", - "What are the key molecular mechanisms through which caloric restriction exerts its lifespan-extending effects across different species?", - "How do oxidative stress and the subsequent accumulation of damaged macromolecules contribute to cellular aging?", - "How are extracellular matrix remodeling and tissue stiffness implicated in the aging process?", - "How do recent developments in autophagy research contribute to our understanding of its role in aging and longevity?", - "What are the implications of age-related shifts in stem cell niche composition and function for tissue regeneration capacity?", - "How do cross-links and advanced glycation end-products (AGEs) accumulation affect the structural integrity and function of aging tissues?" - ] - }, - { - "level": "domainexpert", - "domain": "gn", - "query": [ - "How do recent advancements in network-based integrative genomics alter our understanding of complex trait architectures?", - "What are the latest methodological improvements in evaluating gene-environment interactions using GeneNetwork.org?", - "How do multi-omics data integration techniques enhance the prediction accuracy of phenotypic traits in GeneNetwork datasets?", - "What are the computational challenges and solutions in analyzing large-scale transcriptomic data within GeneNetwork.org?", - "How has the inclusion of data from diverse populations impacted the generalizability of findings on GeneNetwork.org?", - "What novel insights have been obtained from GeneNetwork.org regarding the genetic basis of psychiatric disorders?", - "How do advancements in machine learning algorithms contribute to the deconvolution of gene expression data in complex tissues?", - "What role do enhancer-promoter interactions play in the regulation of gene networks uncovered through GeneNetwork.org?", - "How can the integration of ATAC-seq data with RNA-seq data on GeneNetwork.org inform about chromatin accessibility and gene regulation?", - "What are the latest strategies for inferring causal relationships within gene networks using data from GeneNetwork.org?", - "How do advancements in single-nucleus RNA sequencing provide more granular insights into cell-type-specific gene expression networks?", - "What impact have recent discoveries in non-coding RNA regulation had on refining gene interaction maps on GeneNetwork.org?", - "How are spatial transcriptomics approaches being integrated into GeneNetwork.org to enhance understanding of tissue architecture and function?", - "How do recent developments in quantitative trait locus (QTL) mapping refine our understanding of gene regulatory variants?", - "What are the implications of incorporating epigenomic data, such as histone modification maps, into the gene networks on GeneNetwork.org?", - "How do recent findings on 3D genome organization contribute to our understanding of functional genomic networks?", - "What are the potential applications of artificial intelligence in improving the annotation and interpretation of gene networks?", - "How has the study of genetic pleiotropy been advanced by data available on GeneNetwork.org?", - "What novel genetic pathways have been identified in GeneNetwork.org studies related to aging and lifespan?", - "How do polygenic risk scores (PRS) developed using GeneNetwork.org data enhance the prediction and prevention of complex diseases?" - ] - }, - { - "level": "citizenscientist", - "domain": "gn", - "query": [ - "What is GeneNetwork.org, and how does it help scientists understand genetics?", - "How do researchers use GeneNetwork.org to study diseases?", - "What can GeneNetwork.org tell us about how genes interact with each other?", - "How does GeneNetwork.org help in finding the genetic causes of common diseases?", - "Can GeneNetwork.org predict my risk of developing certain health conditions based on my genes?", - "How does GeneNetwork.org make use of data from different populations around the world?", - "What kinds of genetic data are available on GeneNetwork.org?", - "How do scientists use GeneNetwork.org to study differences in gene expression?", - "Can GeneNetwork.org be used to learn about genetic influences on behavior?", - "What role does GeneNetwork.org play in personalized medicine?", - "How does the information on GeneNetwork.org help in developing new treatments for diseases?", - "What is a gene network, and why is it important for understanding genetics?", - "How do researchers identify which genes are important for certain traits using GeneNetwork.org?", - "How can GeneNetwork.org help in understanding complex traits like height or intelligence?", - "Are there any known genetic mutations that cause premature aging?", - "What are the practical applications of the research done through GeneNetwork.org?", - "How can I access and use the data available on GeneNetwork.org?", - "What are some recent discoveries made using GeneNetwork.org?", - "How do scientists ensure the accuracy of the data on GeneNetwork.org?", - "What’s the difference between looking at one gene and studying a whole gene network?", - "How can GeneNetwork.org contribute to advancements in genetic engineering?" - ] - } -] diff --git a/gnqa/paper2_eval/data/human-questions.json b/gnqa/paper2_eval/data/human-questions.json deleted file mode 100644 index 4142e5b..0000000 --- a/gnqa/paper2_eval/data/human-questions.json +++ /dev/null @@ -1,172 +0,0 @@ -[ - { - "level": "domainexpert", - "domain": "gn", - "query": [ - "What are the potential benefits and risk associated with gene editing technologies like CRISPRR-Cas9?", - "How does epigenetics inluence gene expression without changing the underlying DNA sequence?", - "Describe the role of mitochondrial DNA in heredity and how it differs from nuclear DNA.", - "What are the ethical considerations surrounding prenatal genetic testing and the selective termination of pregnancies based on genetic factors?", - "Create a how-to guide for genetic sequencing.", - - "Which genes give a predisposition to developing T1D?", - "What is ensembl", - "Which database can I use for genetic, genomics, phenotype, and disease-related data generated from rat research?", - "What is RGD?", - "What resources can I use to do pathway analyses?", - - "Once a sperm combines with an egg, what determines how traits are passed onto the resulting lifeform?", - "Why is genetic tracing matrilineal rather than patrilineal?", - "Explain the process of DNA replication and how it ensures accurate copying of genetic information during cell division.", - "What are the potential benefits and risks associated with gene editing technologies like CRISPR-Cas9?", - "How does one tell the difference between X and Y DNA, with repsect to DNA tracing and determining QTLs?", - - - "For text and biological resources, do you mean add some books (on biology stuff) or/and web resources (as ensembl) on your system?", - "what is ensembl?", - "What is the difference between QTL mapping and GWAS?", - "How do I determine which gene in my QTL is causal for the trait?", - "Why do males have two Y chromosomes and females only one?", - - "How does one tell the difference between X and Y DNA, with respect to DNA tracing and determining QTLs", - "Once a sperm combines with an egg, what determines how traits are passed onto the resulting lifeform?", - "How can I add a new species to the GeneNetwork database?", - "which genes are typically associated with diabetes in QTL analyses?", - "In which diseases is the gene TCF7L2 involved?", - - "Once a sperm combines with an egg, what determines how traits are passed onto the resulting lifeform?", - "Can you explain what a ribosomal binding site at a high level and make it accessable to a non-expert?", - "Once a sperm combines with an egg, what determines how traits are passed onto the resulting lifeform?", - "Can you explain the difference between sequencing with short reads vs long reads? Please make you answer accessible to a non-expert.", - "Can you explain why using a pangenome-based reference might be more useful than simply using a single linear reference? Please make you answer accessible to a non-expert.", - - "Is all genetic regulation done through DNA (e.g., prompters, repressors, activators) or are there other forms of genetic regulation? Please make you answer accessible to a non-expert.", - "What are the different relationship between traits?", - "Can landscape of QTL and GWAS hits be used to find relationships between traits?" - ] - }, - { - "level": "domainexpert", - "domain": "diabetes", - "query": [ - "How is gene expression in the liver affected by diabetes?", - "Is any of the genes SH2B3, IFIH1 or ERBB3 related to diabetes?", - "nutrition is a factor for diabetes. how can genomics be use to better understand nutritional factors of diabetes", - "nutrition is a factor for diabetes. construct an abstract about how can genomics be use to better understand nutritional factors of diabetes", - "Is the gene TCF7L2 involved in diabetes?", - - "Is any of the genes SH2B3, IFIH1 or ERBB3 related to diabetes?", - "How can I use genenetwork to find genes related with diabetes in humans?", - "How can I use the GeneNetwork tool to find genes related with diabetes in humans?", - "what are confounding factors in diabetes?", - "How is the immune system related to diabetes?", - - "What are the genomic variants associated with immune system components and diabetes?", - "What is the role of the immune system in the metabolomics of diabetes and associated conditions?", - "Can the landscape of QTL and GWAS hits be used to dissect the role of immune system in diabetes and complications?" - - ] - }, - { - "level": "domainexpert", - "domain": "aging", - "query": [ - "What is the significance of the length of telomeres?", - "Which mouse genes have been associated with longevity?", - "what genetic factor are associated with aging", - "which genes are typically associated with early aging?", - "How do I generate a linkage or association mapping study in mice to understand aging?", - "is there a specific genetic variation that can cause someone to live longer? please make your answer accessible to a non-expert" - ] - }, - { - "level": "citizenscientist", - "domain": "gn", - "query": [ - "What is the most cited environmental factor for the onset of asthma?", - "How would one extract the DNA, from say, flora or fauna?","genetics", - "what is bioinformatics", - "Explain the process of finding a genetic marker followed by a quantitative trait loci.", - - "What about recombination in human centromeres?", - "How does recombination work in human centromeres?", - "What about recombination in the human genome?", - "Create a how to guide for genetic sequencing", - "What is the significance of the length of telomeres? ", - - "Once a sperm combines with an egg, what determines how traits are passed on to the resulting lifeform? ", - "Why is genetic tracing matrilineal rather than patrilineal? ", - "How does one tell the difference between X and Y DNA, with respect to DNA tracing and determining QTLs?", - "what type of dataset is useful for qtl mapping analysis in genenetwork2?", - "what are the bioinformatics tools for QTLs analysis?", - - "what are the statistical approaches for qtls analysis?", - "Create a how-to guide for GWAS analysis?", - "Create a how-to guide for genetic sequencing", - "Create a how-to guide for genetic sequencing.", - "What is the significance of the length of telomeres?", - - "Create a how-to guide for genetic sequencing", - "Create a guide for genetic sequencing", - "Define dyslipidemia.", - "What is cytochrome?", - "How does one tell the difference between X and Y DNA, with respect to DNA tracing and determining QTLs?", - - "how does environment influence fertilisation", - "how does diet impact someone's height", - "which animal has the same number of chromosomes as human", - "what ensures brains work", - "how do our brains maintain emotions", - - "what hormones do our brains release during stressful experiences?", - "what is the use of corticosterone?" - ] - }, - { - "level": "citizenscientist", - "domain": "diabetes", - "query": [ - "What are the genetic bases for the varying efficacy of diabetes treatments among individuals?", - "Explain Protective Genetic Factors Against Diabetes in Elderly Populations", - "Explain Effect of Lifestyle Modifications on Aging-Associated Diabetes Risk", - "Explain The Role of Longevity Genes in Protecting Against Diabetes", - "What are the types of diabetes", - - "How many types of diabetes exist?", - "Is there a direct association between aging and susceptibility to having diabetes?", - "How does genetics influence the emergency of diabetes?", - "what genes are associated with diabetes?", - "What causes diabetes?", - - "Does cycling reduce risk of diabetes?", - "How can GeneNetwork assist in identifying genetic factors involved in diabetes?", - "What specific tools within GeneNetwork are most useful for diabetes research, and how are they applied?", - "What role does insulin play in the regulation of blood glucose levels?", - "How does aging affect the risk of developing type 2 diabetes?", - "Can lifestyle changes reverse type 2 diabetes?" - - ] - }, - { - "level": "citizenscientist", - "domain": "aging", - "query": [ - "List as many studies as you can that include rapamycin.", - "Why is it so diffuclut to map gene loci that control aging in humans?", - "What is apoptosis?", - "which genes are involved in the aging process", - "what causes the aging process", - - "which genes are involved in aging", - "what genes are involved in the aging process", - "Describe the genotypes related to Alzheimers and dementia which have commonalities with those for aging.", - "Describe the genotypes related to Alzheimer's and dementia which have commonalities with those for aging.", - "What genetic factors influence aging in humans?", - - "what genes are associated with aging?", - "Which genes are associated with aging in human", - "What is GeneNetwork and how does it relate to aging research?" - - ] - } -] \ No newline at end of file diff --git a/gnqa/paper2_eval/data/lists/gpt4o-queries.json b/gnqa/paper2_eval/data/lists/gpt4o-queries.json new file mode 100644 index 0000000..74c18b0 --- /dev/null +++ b/gnqa/paper2_eval/data/lists/gpt4o-queries.json @@ -0,0 +1,159 @@ +[ + { + "level": "domainexpert", + "domain": "diabetes", + "query": [ + "How do recent advancements in multi-omics approaches, including proteomics and metabolomics, contribute to our understanding of Type 2 diabetes pathogenesis?", + "What novel diabetic loci have been identified through the latest meta-analyses of large-scale genome-wide association studies (GWAS)?", + "How do epigenetic modifications, such as DNA methylation and histone modification, influence the expression of diabetes-related genes?", + "Can you elaborate on the role of the gut microbiome in modulating host genetic predispositions to diabetes?", + "How effective are machine learning algorithms in integrating genomic data to predict individual risk and progression of diabetes?", + "What are the implications of recent findings on the role of long non-coding RNAs (lncRNAs) in the regulation of insulin secretion and sensitivity?", + "How do post-translational modifications of proteins affect key signaling pathways involved in glucose homeostasis?", + "What insights have been gained from studying the genetic basis of syndromic forms of diabetes, such as Wolfram Syndrome and Alström Syndrome?", + "How do genetic and epigenetic differences between monozygotic twins discordant for diabetes inform our understanding of its etiology?", + "What potential therapeutic targets have been identified through recent studies on the interaction between genetic variants and environmental factors in diabetes development?", + "How do rare variants identified through whole-genome sequencing contribute to the heritability of Type 2 diabetes?", + "What are the latest findings on the role of non-coding RNAs in the pathogenesis of diabetes?", + "How does the interaction between multiple polygenic risk scores (PRS) improve the prediction of Type 1 and Type 2 diabetes?", + "What are the mechanistic insights into the beta-cell failure pathways gleaned from recent single-cell RNA-sequencing studies?", + "How does the epigenetic landscape of key metabolic tissues change in diabetic versus non-diabetic individuals?", + "What recent advancements have been made in leveraging CRISPR-based approaches to correct monogenic forms of diabetes in vivo?", + "How do genome-wide association studies (GWAS) integrate with multi-omics data to elucidate the complex genetic architectures of diabetes?", + "What is the impact of genomic imprinting on the susceptibility and progression of diabetes?,", + "How do longitudinal genomics studies help in understanding gene-environment interactions in diabetes onset and management?", + "How have recent integrative genomics approaches, such as the use of single-cell RNA sequencing combined with epigenomic profiling, advanced our understanding of cellular heterogeneity and gene regulatory networks in pancreatic beta cells under diabetic conditions?" + ] + }, + { + "level": "citizenscientist", + "domain": "diabetes", + "query": [ + "How do genetic mutations in the insulin gene affect glucose metabolism?", + "What are the most common genetic loci associated with an increased risk of Type 2 diabetes?", + "How does genome-wide association studies (GWAS) help in identifying diabetes-related genes?", + "What is the role of the HLA region in the genetic predisposition to Type 1 diabetes?", + "How do genetic differences contribute to variations in diabetes prevalence among different populations?", + "What is the function of the PPAR-gamma gene in diabetes, and how do its variants impact the disease?", + "How can CRISPR/Cas9 technology be used to study or treat genetic forms of diabetes?", + "What is the significance of genetic polymorphisms in the GLUT4 gene for Type 2 diabetes?", + "How do microRNAs regulate gene expression related to diabetes?", + "What insights have been gained from studying the genetic basis of MODY (Maturity Onset Diabetes of the Young)?", + "What genes are most commonly associated with an increased risk of developing diabetes?", + "How can genetic testing help predict a person's risk for diabetes?", + "What role do family genetics play in the likelihood of getting diabetes?", + "Can lifestyle changes affect genetic risk factors for diabetes?", + "What recent breakthroughs have been made in understanding the genetic causes of diabetes?", + "How do genes influence how our bodies respond to sugar and insulin?", + "Are there specific genetic markers that can indicate a higher risk for Type 1 versus Type 2 diabetes?", + "How can new gene therapies potentially cure or treat diabetes?", + "What is the difference between monogenic and polygenic diabetes?", + "How does studying the DNA of people with diabetes help scientists find better treatments or cures?" + ] + }, + { + "level":"citizenscientist", + "domain": "aging", + "query": [ + "What are the main genetic factors that influence aging?", + "How do genes affect the aging process in humans?", + "What lifestyle choices can help slow down genetic aging?", + "How do scientists study the genetics of aging in animals?", + "Are there specific genes that have been linked to longer lifespans?", + "How do telomeres affect the aging process?", + "What role does DNA repair play in aging?", + "Can genetic research lead to treatments that slow down aging?", + "How does mitochondrial DNA influence aging?", + "Are there any known genetic mutations that cause premature aging?", + "What recent discoveries have been made about the genetics of aging?", + "How do epigenetic changes affect aging?", + "What is the role of the gene FOXO3 in longevity?", + "How does the environment interact with genes to influence aging?", + "What are senescent cells and how do they contribute to aging?", + "Are there any known lifestyle interventions that can positively impact genes related to aging?", + "What is the 'epigenetic clock,' and how is it used in aging research?", + "How do researchers use model organisms like yeast or worms to study human aging?", + "Are there any promising anti-aging therapies being developed based on genetic research?", + "How do caloric restriction and diet impact the genetics of aging?" + ] + }, + { + "level":"domainexpert", + "domain":"aging", + "query": [ + "How do recent single-cell transcriptomics studies enhance our understanding of cellular heterogeneity in aging tissues?", + "What are the latest findings on the role of senescence-associated secretory phenotype (SASP) factors in age-related tissue dysfunction?", + "How do age-related changes in chromatin architecture contribute to the decline in cellular function?", + "What insights have been gained from studying the epigenetic reprogramming of aged cells to a more youthful state?", + "How do alterations in the mitochondrial genome and bioenergetics influence the aging process in humans?", + "What are the therapeutic potentials and challenges of targeting the insulin/IGF-1 signaling pathway for extending healthspan and lifespan?", + "How can the integration of proteomics and metabolomics data shed light on age-associated metabolic shifts?", + "What role do long non-coding RNAs (lncRNAs) play in the regulation of aging and age-related diseases?", + "How do recent advancements in CRISPR/Cas9 technology open new avenues for studying and potentially reversing aging?", + "What is the significance of the DNA damage response (DDR) in the context of both replicative and chronological aging?", + "How do age-dependent changes in the immune system, such as immunosenescence, contribute to increased susceptibility to diseases?", + "How do advancements in machine learning and artificial intelligence aid in the identification of biomarkers for biological aging?", + "What recent discoveries have been made regarding the impact of systemic factors, such as circulating microvesicles, on aging phenotypes?", + "How do changes in the gut microbiome composition correlate with aging and longevity?", + "What are the key molecular mechanisms through which caloric restriction exerts its lifespan-extending effects across different species?", + "How do oxidative stress and the subsequent accumulation of damaged macromolecules contribute to cellular aging?", + "How are extracellular matrix remodeling and tissue stiffness implicated in the aging process?", + "How do recent developments in autophagy research contribute to our understanding of its role in aging and longevity?", + "What are the implications of age-related shifts in stem cell niche composition and function for tissue regeneration capacity?", + "How do cross-links and advanced glycation end-products (AGEs) accumulation affect the structural integrity and function of aging tissues?" + ] + }, + { + "level": "domainexpert", + "domain": "gn", + "query": [ + "How do recent advancements in network-based integrative genomics alter our understanding of complex trait architectures?", + "What are the latest methodological improvements in evaluating gene-environment interactions using GeneNetwork.org?", + "How do multi-omics data integration techniques enhance the prediction accuracy of phenotypic traits in GeneNetwork datasets?", + "What are the computational challenges and solutions in analyzing large-scale transcriptomic data within GeneNetwork.org?", + "How has the inclusion of data from diverse populations impacted the generalizability of findings on GeneNetwork.org?", + "What novel insights have been obtained from GeneNetwork.org regarding the genetic basis of psychiatric disorders?", + "How do advancements in machine learning algorithms contribute to the deconvolution of gene expression data in complex tissues?", + "What role do enhancer-promoter interactions play in the regulation of gene networks uncovered through GeneNetwork.org?", + "How can the integration of ATAC-seq data with RNA-seq data on GeneNetwork.org inform about chromatin accessibility and gene regulation?", + "What are the latest strategies for inferring causal relationships within gene networks using data from GeneNetwork.org?", + "How do advancements in single-nucleus RNA sequencing provide more granular insights into cell-type-specific gene expression networks?", + "What impact have recent discoveries in non-coding RNA regulation had on refining gene interaction maps on GeneNetwork.org?", + "How are spatial transcriptomics approaches being integrated into GeneNetwork.org to enhance understanding of tissue architecture and function?", + "How do recent developments in quantitative trait locus (QTL) mapping refine our understanding of gene regulatory variants?", + "What are the implications of incorporating epigenomic data, such as histone modification maps, into the gene networks on GeneNetwork.org?", + "How do recent findings on 3D genome organization contribute to our understanding of functional genomic networks?", + "What are the potential applications of artificial intelligence in improving the annotation and interpretation of gene networks?", + "How has the study of genetic pleiotropy been advanced by data available on GeneNetwork.org?", + "What novel genetic pathways have been identified in GeneNetwork.org studies related to aging and lifespan?", + "How do polygenic risk scores (PRS) developed using GeneNetwork.org data enhance the prediction and prevention of complex diseases?" + ] + }, + { + "level": "citizenscientist", + "domain": "gn", + "query": [ + "What is GeneNetwork.org, and how does it help scientists understand genetics?", + "How do researchers use GeneNetwork.org to study diseases?", + "What can GeneNetwork.org tell us about how genes interact with each other?", + "How does GeneNetwork.org help in finding the genetic causes of common diseases?", + "Can GeneNetwork.org predict my risk of developing certain health conditions based on my genes?", + "How does GeneNetwork.org make use of data from different populations around the world?", + "What kinds of genetic data are available on GeneNetwork.org?", + "How do scientists use GeneNetwork.org to study differences in gene expression?", + "Can GeneNetwork.org be used to learn about genetic influences on behavior?", + "What role does GeneNetwork.org play in personalized medicine?", + "How does the information on GeneNetwork.org help in developing new treatments for diseases?", + "What is a gene network, and why is it important for understanding genetics?", + "How do researchers identify which genes are important for certain traits using GeneNetwork.org?", + "How can GeneNetwork.org help in understanding complex traits like height or intelligence?", + "Are there any known genetic mutations that cause premature aging?", + "What are the practical applications of the research done through GeneNetwork.org?", + "How can I access and use the data available on GeneNetwork.org?", + "What are some recent discoveries made using GeneNetwork.org?", + "How do scientists ensure the accuracy of the data on GeneNetwork.org?", + "What’s the difference between looking at one gene and studying a whole gene network?", + "How can GeneNetwork.org contribute to advancements in genetic engineering?" + ] + } +] diff --git a/gnqa/paper2_eval/data/lists/human-questions.json b/gnqa/paper2_eval/data/lists/human-questions.json new file mode 100644 index 0000000..4142e5b --- /dev/null +++ b/gnqa/paper2_eval/data/lists/human-questions.json @@ -0,0 +1,172 @@ +[ + { + "level": "domainexpert", + "domain": "gn", + "query": [ + "What are the potential benefits and risk associated with gene editing technologies like CRISPRR-Cas9?", + "How does epigenetics inluence gene expression without changing the underlying DNA sequence?", + "Describe the role of mitochondrial DNA in heredity and how it differs from nuclear DNA.", + "What are the ethical considerations surrounding prenatal genetic testing and the selective termination of pregnancies based on genetic factors?", + "Create a how-to guide for genetic sequencing.", + + "Which genes give a predisposition to developing T1D?", + "What is ensembl", + "Which database can I use for genetic, genomics, phenotype, and disease-related data generated from rat research?", + "What is RGD?", + "What resources can I use to do pathway analyses?", + + "Once a sperm combines with an egg, what determines how traits are passed onto the resulting lifeform?", + "Why is genetic tracing matrilineal rather than patrilineal?", + "Explain the process of DNA replication and how it ensures accurate copying of genetic information during cell division.", + "What are the potential benefits and risks associated with gene editing technologies like CRISPR-Cas9?", + "How does one tell the difference between X and Y DNA, with repsect to DNA tracing and determining QTLs?", + + + "For text and biological resources, do you mean add some books (on biology stuff) or/and web resources (as ensembl) on your system?", + "what is ensembl?", + "What is the difference between QTL mapping and GWAS?", + "How do I determine which gene in my QTL is causal for the trait?", + "Why do males have two Y chromosomes and females only one?", + + "How does one tell the difference between X and Y DNA, with respect to DNA tracing and determining QTLs", + "Once a sperm combines with an egg, what determines how traits are passed onto the resulting lifeform?", + "How can I add a new species to the GeneNetwork database?", + "which genes are typically associated with diabetes in QTL analyses?", + "In which diseases is the gene TCF7L2 involved?", + + "Once a sperm combines with an egg, what determines how traits are passed onto the resulting lifeform?", + "Can you explain what a ribosomal binding site at a high level and make it accessable to a non-expert?", + "Once a sperm combines with an egg, what determines how traits are passed onto the resulting lifeform?", + "Can you explain the difference between sequencing with short reads vs long reads? Please make you answer accessible to a non-expert.", + "Can you explain why using a pangenome-based reference might be more useful than simply using a single linear reference? Please make you answer accessible to a non-expert.", + + "Is all genetic regulation done through DNA (e.g., prompters, repressors, activators) or are there other forms of genetic regulation? Please make you answer accessible to a non-expert.", + "What are the different relationship between traits?", + "Can landscape of QTL and GWAS hits be used to find relationships between traits?" + ] + }, + { + "level": "domainexpert", + "domain": "diabetes", + "query": [ + "How is gene expression in the liver affected by diabetes?", + "Is any of the genes SH2B3, IFIH1 or ERBB3 related to diabetes?", + "nutrition is a factor for diabetes. how can genomics be use to better understand nutritional factors of diabetes", + "nutrition is a factor for diabetes. construct an abstract about how can genomics be use to better understand nutritional factors of diabetes", + "Is the gene TCF7L2 involved in diabetes?", + + "Is any of the genes SH2B3, IFIH1 or ERBB3 related to diabetes?", + "How can I use genenetwork to find genes related with diabetes in humans?", + "How can I use the GeneNetwork tool to find genes related with diabetes in humans?", + "what are confounding factors in diabetes?", + "How is the immune system related to diabetes?", + + "What are the genomic variants associated with immune system components and diabetes?", + "What is the role of the immune system in the metabolomics of diabetes and associated conditions?", + "Can the landscape of QTL and GWAS hits be used to dissect the role of immune system in diabetes and complications?" + + ] + }, + { + "level": "domainexpert", + "domain": "aging", + "query": [ + "What is the significance of the length of telomeres?", + "Which mouse genes have been associated with longevity?", + "what genetic factor are associated with aging", + "which genes are typically associated with early aging?", + "How do I generate a linkage or association mapping study in mice to understand aging?", + "is there a specific genetic variation that can cause someone to live longer? please make your answer accessible to a non-expert" + ] + }, + { + "level": "citizenscientist", + "domain": "gn", + "query": [ + "What is the most cited environmental factor for the onset of asthma?", + "How would one extract the DNA, from say, flora or fauna?","genetics", + "what is bioinformatics", + "Explain the process of finding a genetic marker followed by a quantitative trait loci.", + + "What about recombination in human centromeres?", + "How does recombination work in human centromeres?", + "What about recombination in the human genome?", + "Create a how to guide for genetic sequencing", + "What is the significance of the length of telomeres? ", + + "Once a sperm combines with an egg, what determines how traits are passed on to the resulting lifeform? ", + "Why is genetic tracing matrilineal rather than patrilineal? ", + "How does one tell the difference between X and Y DNA, with respect to DNA tracing and determining QTLs?", + "what type of dataset is useful for qtl mapping analysis in genenetwork2?", + "what are the bioinformatics tools for QTLs analysis?", + + "what are the statistical approaches for qtls analysis?", + "Create a how-to guide for GWAS analysis?", + "Create a how-to guide for genetic sequencing", + "Create a how-to guide for genetic sequencing.", + "What is the significance of the length of telomeres?", + + "Create a how-to guide for genetic sequencing", + "Create a guide for genetic sequencing", + "Define dyslipidemia.", + "What is cytochrome?", + "How does one tell the difference between X and Y DNA, with respect to DNA tracing and determining QTLs?", + + "how does environment influence fertilisation", + "how does diet impact someone's height", + "which animal has the same number of chromosomes as human", + "what ensures brains work", + "how do our brains maintain emotions", + + "what hormones do our brains release during stressful experiences?", + "what is the use of corticosterone?" + ] + }, + { + "level": "citizenscientist", + "domain": "diabetes", + "query": [ + "What are the genetic bases for the varying efficacy of diabetes treatments among individuals?", + "Explain Protective Genetic Factors Against Diabetes in Elderly Populations", + "Explain Effect of Lifestyle Modifications on Aging-Associated Diabetes Risk", + "Explain The Role of Longevity Genes in Protecting Against Diabetes", + "What are the types of diabetes", + + "How many types of diabetes exist?", + "Is there a direct association between aging and susceptibility to having diabetes?", + "How does genetics influence the emergency of diabetes?", + "what genes are associated with diabetes?", + "What causes diabetes?", + + "Does cycling reduce risk of diabetes?", + "How can GeneNetwork assist in identifying genetic factors involved in diabetes?", + "What specific tools within GeneNetwork are most useful for diabetes research, and how are they applied?", + "What role does insulin play in the regulation of blood glucose levels?", + "How does aging affect the risk of developing type 2 diabetes?", + "Can lifestyle changes reverse type 2 diabetes?" + + ] + }, + { + "level": "citizenscientist", + "domain": "aging", + "query": [ + "List as many studies as you can that include rapamycin.", + "Why is it so diffuclut to map gene loci that control aging in humans?", + "What is apoptosis?", + "which genes are involved in the aging process", + "what causes the aging process", + + "which genes are involved in aging", + "what genes are involved in the aging process", + "Describe the genotypes related to Alzheimers and dementia which have commonalities with those for aging.", + "Describe the genotypes related to Alzheimer's and dementia which have commonalities with those for aging.", + "What genetic factors influence aging in humans?", + + "what genes are associated with aging?", + "Which genes are associated with aging in human", + "What is GeneNetwork and how does it relate to aging research?" + + ] + } +] \ No newline at end of file -- cgit v1.2.3