# coding:utf-8

import ply.lex as lex
from datetime import date

# Allows to write g="XXX" instead of groups__name="XXX"
shortcuts = {
    'g': 'groups__name',
    's': 'state__name'
}

# =============================
#           LEXER
# =============================

u_ops = ('NOT',)
b_ops = ('AND', 'OR')

tokens = (
   'NUMBER',
   'DATE',
   'STRING',
   'FIELD',
   'U_OP',
   'B_OP',
   'COMPA',
)

literals = '()'

t_COMPA = r'=|[<>]=?|~~?'

def t_STRING(t):
    r'"[^"]*"'
    t.value = t.value[1:-1]
    return t

# dates are in ch_FR format: dd/mm/yyyy
def t_DATE(t):
    r'(?P<day>\d{1,2})/(?P<month>\d{1,2})/(?P<year>\d{4})'
    day = int(t.lexer.lexmatch.group('day'))
    month = int(t.lexer.lexmatch.group('month'))
    year = int(t.lexer.lexmatch.group('year'))
    t.value = date(year,month,day)
    return t

def t_NUMBER(t):
    r'\d+'
    t.value = int(t.value)    
    return t

def t_FIELD(t):
    r'[A-Za-z_][\w_]*'
    if t.value in u_ops:
        t.type = 'U_OP'
    elif t.value in b_ops:
        t.type = 'B_OP'
    return t

def t_error(t):
    raise CompileException(u"Cannot make sense of char: %s" % t.value[0])


# ignore tabs and spaces
t_ignore  = ' \t'

# =============================
#           PARSER
# =============================

import ply.yacc as yacc
from django.db.models import Q

# missing: i* (case insensitive), in, startswith, endswith, day&co, isnull
# TODO: range
compa2lookup = {
    '=': '',
    '~': 'contains',
    '~~': 'regex',
    '>': 'gt',
    '>=': 'gte',
    '<': 'lt',
    '<=': 'lte',
}

def p_expression_b_op(p):
    '''expression : expression B_OP expression'''
    if p[2] == 'AND':
        p[0] = p[1] & p[3]
    elif p[2] == 'OR':
        p[0] = p[1] | p[3]

def p_expression_u_op(p):
    '''expression : U_OP expression'''
    if p[1] == 'NOT':
        p[0] = ~ p[2]

def p_expression_paren(p):
    "expression : '(' expression ')' "
    p[0] = p[2]

def p_expression_ID(p):
    'expression : FIELD COMPA value'
    
    lookup = compa2lookup[p[2]]
    
    try:
        field = shortcuts[p[1]]
    except:
        field = p[1]
    
    if lookup:
        field = '%s__%s' % (field, lookup)

    # In some situations (which ones?), python
    # refuses unicode strings as dict keys for
    # Q(**d)
    field = str(field)
    
    d = {field: p[3]}
    
    p[0] = Q(**d)


def p_value(p):
    '''value : STRING
            | NUMBER
            | DATE'''
    p[0] = p[1]

def p_error(p):
    if p:
        raise CompileException(u"Parsing error around token: %s" % p.value)
    raise CompileException(u"Parsing error: unexpected end of expression")

precedence = (
    ('left', 'B_OP'),
    ('right', 'U_OP'),
)


class CompileException(Exception):
    
    def __init__(self, message):
        self.message = message        

def compile(expr):
    # create separate lexer and parser for each compilation
    # to be thread-safe
    lexer = lex.lex()
    parser = yacc.yacc()
    # now, parse!
    return parser.parse(expr,lexer=lexer)

if __name__ == '__main__':
    
    input = '(modified > 1/4/2011 OR NOT s="OK") AND g=="XXX"'
    
    try:
        print compile(input)
    except CompileException, e:
        print e.message