diff options
-rw-r--r-- | wqflask/scripts/index.py | 142 |
1 files changed, 71 insertions, 71 deletions
diff --git a/wqflask/scripts/index.py b/wqflask/scripts/index.py index f59a8788..c871db43 100644 --- a/wqflask/scripts/index.py +++ b/wqflask/scripts/index.py @@ -30,47 +30,47 @@ def write_document(db, idterm, doctype, doc): # pylint: disable=missing-function-docstring def main(): - with database_connection() as conn, conn.cursor(MonadicDictCursor) as cursor: - # FIXME: Some Max LRS values in the DB are wrongly listed as - # 0.000, but shouldn't be displayed. Make them NULLs in the - # database. - cursor.execute(""" - SELECT ProbeSet.Name AS name, - ProbeSet.Symbol AS symbol, - ProbeSet.description AS description, - ProbeSet.Chr AS chr, - ProbeSet.Mb AS mb, - ProbeSet.alias AS alias, - ProbeSet.GenbankId AS genbankid, - ProbeSet.UniGeneId AS unigeneid, - ProbeSet.Probe_Target_Description AS probe_target_description, - ProbeSetFreeze.Name AS dataset, - ProbeSetFreeze.FullName AS dataset_fullname, - ProbeSetFreeze.Id AS dataset_id, - Species.Name AS species, - InbredSet.Name AS `group`, - Tissue.Name AS tissue, - ProbeSetXRef.Mean AS mean, - ProbeSetXRef.LRS AS lrs, - ProbeSetXRef.additive AS additive, - Geno.Chr as geno_chr, - Geno.Mb as geno_mb - FROM Species - INNER JOIN InbredSet ON InbredSet.SpeciesId = Species.Id - INNER JOIN ProbeFreeze ON ProbeFreeze.InbredSetId = InbredSet.Id - INNER JOIN Tissue ON ProbeFreeze.TissueId = Tissue.Id - INNER JOIN ProbeSetFreeze ON ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id - INNER JOIN ProbeSetXRef ON ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id - INNER JOIN ProbeSet ON ProbeSet.Id = ProbeSetXRef.ProbeSetId - LEFT JOIN Geno ON ProbeSetXRef.Locus = Geno.Name AND Geno.SpeciesId = Species.Id - WHERE ProbeSetFreeze.confidentiality < 1 AND ProbeSetFreeze.public > 0 - """) - termgenerator = xapian.TermGenerator() - termgenerator.set_stemmer(xapian.Stem("en")) - indexer = partial(index_text, termgenerator) - - # pylint: disable=invalid-name - with xapian_writable_database() as db: + termgenerator = xapian.TermGenerator() + termgenerator.set_stemmer(xapian.Stem("en")) + indexer = partial(index_text, termgenerator) + + # FIXME: Some Max LRS values in the DB are wrongly listed as + # 0.000, but shouldn't be displayed. Make them NULLs in the + # database. + # pylint: disable=invalid-name + with xapian_writable_database() as db: + with database_connection() as conn, conn.cursor(MonadicDictCursor) as cursor: + cursor.execute(""" + SELECT ProbeSet.Name AS name, + ProbeSet.Symbol AS symbol, + ProbeSet.description AS description, + ProbeSet.Chr AS chr, + ProbeSet.Mb AS mb, + ProbeSet.alias AS alias, + ProbeSet.GenbankId AS genbankid, + ProbeSet.UniGeneId AS unigeneid, + ProbeSet.Probe_Target_Description AS probe_target_description, + ProbeSetFreeze.Name AS dataset, + ProbeSetFreeze.FullName AS dataset_fullname, + ProbeSetFreeze.Id AS dataset_id, + Species.Name AS species, + InbredSet.Name AS `group`, + Tissue.Name AS tissue, + ProbeSetXRef.Mean AS mean, + ProbeSetXRef.LRS AS lrs, + ProbeSetXRef.additive AS additive, + Geno.Chr as geno_chr, + Geno.Mb as geno_mb + FROM Species + INNER JOIN InbredSet ON InbredSet.SpeciesId = Species.Id + INNER JOIN ProbeFreeze ON ProbeFreeze.InbredSetId = InbredSet.Id + INNER JOIN Tissue ON ProbeFreeze.TissueId = Tissue.Id + INNER JOIN ProbeSetFreeze ON ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id + INNER JOIN ProbeSetXRef ON ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id + INNER JOIN ProbeSet ON ProbeSet.Id = ProbeSetXRef.ProbeSetId + LEFT JOIN Geno ON ProbeSetXRef.Locus = Geno.Name AND Geno.SpeciesId = Species.Id + WHERE ProbeSetFreeze.confidentiality < 1 AND ProbeSetFreeze.public > 0 + """) for trait in cursor.fetchall(): doc = xapian.Document() termgenerator.set_document(doc) @@ -87,36 +87,36 @@ def main(): doc.set_data(json.dumps(trait.data)) write_document(db, trait["name"].bind(lambda name: f"Q{name}"), "gene", doc) - cursor.execute(""" - SELECT Species.Name AS species, - InbredSet.Name AS `group`, - PublishFreeze.Name AS dataset, - PublishFreeze.FullName AS dataset_fullname, - PublishXRef.Id AS name, - COALESCE(Phenotype.Post_publication_abbreviation, Phenotype.Pre_publication_abbreviation) AS abbreviation, - COALESCE(Phenotype.Post_publication_description, Phenotype.Pre_publication_description) AS description, - Phenotype.Lab_code, - Publication.Abstract, - Publication.Title, - Publication.Authors AS authors, - Publication.Year AS year, - Publication.PubMed_ID AS pubmed_id, - PublishXRef.LRS as lrs, - PublishXRef.additive, - InbredSet.InbredSetCode AS inbredsetcode, - PublishXRef.mean, - PublishFreeze.Id AS dataset_id, - Geno.Chr as geno_chr, - Geno.Mb as geno_mb - FROM Species - INNER JOIN InbredSet ON InbredSet.SpeciesId = Species.Id - INNER JOIN PublishFreeze ON PublishFreeze.InbredSetId = InbredSet.Id - INNER JOIN PublishXRef ON PublishXRef.InbredSetId = InbredSet.Id - INNER JOIN Phenotype ON PublishXRef.PhenotypeId = Phenotype.Id - INNER JOIN Publication ON PublishXRef.PublicationId = Publication.Id - LEFT JOIN Geno ON PublishXRef.Locus = Geno.Name AND Geno.SpeciesId = Species.Id - """) - with xapian_writable_database() as db: + with database_connection() as conn, conn.cursor(MonadicDictCursor) as cursor: + cursor.execute(""" + SELECT Species.Name AS species, + InbredSet.Name AS `group`, + PublishFreeze.Name AS dataset, + PublishFreeze.FullName AS dataset_fullname, + PublishXRef.Id AS name, + COALESCE(Phenotype.Post_publication_abbreviation, Phenotype.Pre_publication_abbreviation) AS abbreviation, + COALESCE(Phenotype.Post_publication_description, Phenotype.Pre_publication_description) AS description, + Phenotype.Lab_code, + Publication.Abstract, + Publication.Title, + Publication.Authors AS authors, + Publication.Year AS year, + Publication.PubMed_ID AS pubmed_id, + PublishXRef.LRS as lrs, + PublishXRef.additive, + InbredSet.InbredSetCode AS inbredsetcode, + PublishXRef.mean, + PublishFreeze.Id AS dataset_id, + Geno.Chr as geno_chr, + Geno.Mb as geno_mb + FROM Species + INNER JOIN InbredSet ON InbredSet.SpeciesId = Species.Id + INNER JOIN PublishFreeze ON PublishFreeze.InbredSetId = InbredSet.Id + INNER JOIN PublishXRef ON PublishXRef.InbredSetId = InbredSet.Id + INNER JOIN Phenotype ON PublishXRef.PhenotypeId = Phenotype.Id + INNER JOIN Publication ON PublishXRef.PublicationId = Publication.Id + LEFT JOIN Geno ON PublishXRef.Locus = Geno.Name AND Geno.SpeciesId = Species.Id + """) for i, trait in enumerate(cursor.fetchall()): doc = xapian.Document() termgenerator.set_document(doc) |