from __future__ import print_function, division, absolute_import

"""
Results will be returned for each of several trait types: mRNA assays, phenotypes, genotypes, and
(maybe later) genes

For each trait type, the results for each species should be given; for example, have a "Mouse" tab
with the mouse traits in a table inside it

This table will then list each trait, its dataset, and several columns determined by its trait type
(phenotype, genotype, etc)

"""


import sys
sys.path.append("../../..")

import simplejson as json

import sqlalchemy as sa
from sqlalchemy.dialects import mysql
from sqlalchemy.orm import scoped_session, sessionmaker, relationship, backref
from sqlalchemy.orm.exc import NoResultFound
from sqlalchemy.ext.declarative import declarative_base

from BeautifulSoup import UnicodeDammit

import zach_settings as settings

Engine = sa.create_engine(settings.SQLALCHEMY_DATABASE_URI,
                       #encoding='utf-8',
                       #client_encoding='utf-8',
                       #echo="debug",
                       )

Session = scoped_session(sessionmaker(bind=Engine)) #, extension=VersionedListener()))
#Xsession = Session()

Base = declarative_base(bind=Engine)
Metadata = sa.MetaData()
Metadata.bind = Engine


class ProbeSet(Base):
    __tablename__ = 'ProbeSetXRef'
    __table_args__ = {'autoload': True}
    
QuickSearch = sa.Table("QuickSearch", Metadata,
        sa.Column('table_name', sa.String(15),
                  primary_key=True, nullable=False, autoincrement=False), # table that item is inserted from
        sa.Column('the_key', sa.String(30),
                  primary_key=True, nullable=False, autoincrement=False), # key in database table
        sa.Column('terms', sa.Text), # terms to compare search string with
        #sa.Column('species', sa.Text),
        sa.Column('result_fields', sa.Text)  # json
                    )

QuickSearch.drop(Engine, checkfirst=True)
Metadata.create_all(Engine)

#class QuickSearch(Base):
#    table_name = Column(String)
#    the_key = Column(String)
#    terms = Column(String)
#    
#    def __init__(self, table_name, the_key, terms, category, species, result_fields):
#        self.table_name = table_name
#        self.the_key = the_key
#        self.terms = terms
#        self.species = species
#        self.category = category
#        self.result_fields = json.dumps(sort_keys=True)


def get_unique_terms(trait_type, trait_id):
    #if not args:
    #    return None
    
    if trait_type="ProbeSet":
        results = Session.query(
                "name",
                "symbol",
                "description",
                "alias"
            ).from_statement(
                "SELECT ProbeSet.Name as name, "
                "ProbeSet.Symbol as symbol, "
                "ProbeSet.description as description, "
                "ProbeSet.alias as alias "
                "FROM ProbeSet"
                "WHERE ProbeSet.Id = :probeset_id ").params(probeset_id=trait_id).all()
    
    unique = set()
    for item in results[0]:
        #print("locals:", locals())
        if not item:
            continue
        for token in item.split():
            if token.startswith(('(','[')):
                token = token[1:]
            if token.endswith((')', ']')):
                token = token[:-1]
            if token.endswith(';'):
                token = token[:-1]
            if len(token) > 2:
                try:
                    # This hopefully ensures that the token is utf-8
                    token = token.encode('utf-8')
                    print(" ->", token)
                except UnicodeDecodeError:
                    print("\n-- UDE \n")
                    # Can't get it into utf-8, we won't use it
                    continue 
                
                unique.add(token)
    print("\nUnique terms are: {}\n".format(unique))
    return " ".join(unique)

def main():
    conn = Engine.connect()
    counter = 0
    for ps in page_query(Session.query(ProbeSet)):   #all()
        values = {}
        values['table_name'] = "ProbeSetXRef"
        values['the_key'] = json.dumps([ps.ProbeSetId, ps.ProbeSetFreezeId])
        values['terms'] = get_unique_terms("ProbeSet", ps.ProbeSetId)
        print("terms is:", values['terms'])
        #values['species'] = get_species("ProbeSet", ps.Id)
        values['result_fields'] = get_result_fields("ProbeSet", ps.ProbeSetId, ps.ProbeSetFreezeId)
        ins = QuickSearch.insert().values(**values)
        conn.execute(ins)
        counter += 1
        print("Done:", counter)


def get_species(trait_type, trait_id):
    if trait_type == "ProbeSet":
        print("Before species query")
        results = Session.query("Name").from_statement("SELECT Species.Name "
                    "FROM ProbeSetXRef, "
                    "ProbeSetFreeze, "
                    "ProbeFreeze, "
                    "InbredSet, "
                    "Species "
                    "WHERE ProbeSetXRef.ProbeSetId =:probeset_id and "
                    "ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id and "
                    "ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and "
                    "ProbeFreeze.InbredSetId = InbredSet.Id and "
                    "InbredSet.SpeciesId = Species.Id").params(probeset_id=trait_id).all()
        print("After query")
        
        assert len(set([result.Name for result in results])) == 1, "Multiple names?"

    print("species is:", results[0].Name)

    return results[0].Name

def get_result_fields(trait_type, *args):
    if trait_type == "ProbeSet":
        print("qs1")
        results = Session.query(
                "name",
                "symbol",
                "description",
                "chr", "mb",
                "genbank_id",
                "gene_id",
                "chip_id",
                "chip_name"
            ).from_statement(
                "SELECT ProbeSet.Name as name, "
                "ProbeSet.Symbol as symbol, "
                "ProbeSet.description as description, "
                "ProbeSet.Chr as chr, "
                "ProbeSet.Mb as mb, "
                "ProbeSet.GenbankId as genbank_id, "
                "ProbeSet.GeneId as gene_id, "
                "ProbeSet.ChipId as chip_id, "
                "GeneChip.Name as chip_name "
                "FROM ProbeSet, GeneChip "
                "WHERE ProbeSet.ChipId = GeneChip.Id and "
                "ProbeSet.Id = :probeset_id ").params(probeset_id=*args[0], dataset_id=*args[1]).all()
        print("qs2")
        for result in results:
            print(result)
        assert len(set(result for result in results)) == 1, "Different results"
    
    print("results are:", results)
    result = results[0]
    result = row2dict(result)
    try:
        json_results = json.dumps(result, sort_keys=True)
    except UnicodeDecodeError:
        print("\n\nTrying to massage unicode\n\n")
        #print("result.__dict__ is [{}]: {}".format(type(result.__dict__), result.__dict__))
        #resultd = dict(**result.__dict__)
        for key, value in result.iteritems():
            print("   key is:", key)
            print("   value is:", value)
            if isinstance(value, basestring):
                result[key] = value.decode('utf-8', errors='ignore')
        json_results = json.dumps(result, sort_keys=True)
    
    #print("json is: ", json_results)
    
    return json_results


def row2dict(row):
    return dict(zip(row.keys(), row))  # http://stackoverflow.com/a/2848519/1175849
    #"""http://stackoverflow.com/a/1960546/1175849"""
    #d = {}
    #for column in row.__table__.columns:
    #    d[column.name] = getattr(row, column.name)
    #
    #return d

def page_query(q):
    """http://stackoverflow.com/a/1217947/1175849"""
    offset = 0
    while True:
        r = False
        for elem in q.limit(1000).offset(offset):
           r = True
           yield elem
        offset += 1000
        if not r:
            break

if __name__ == "__main__":
    main()