=PigParser.g=

parser grammar PigParser;

options {
    tokenVocab=PigLexer;
    output=AST;
    backtrack=true;
}

tokens {
    QUERY;
    STATEMENT;
    SCHEMA;
    FUNC;
    COND;
    CAST_EXPR;
    BIN_EXPR;
    TUPLE_VAL;
    MAP_VAL;
    BAG_VAL;
    KEY_VAL_PAIR;
    TUPLE_DEF;
    FIELD;
}

@header {
package pig;
}

query : statement* 
     -> ^( QUERY statement* )
;

statement : ( alias EQUAL ) ? op_clause SEMI_COLON 
         -> ^( STATEMENT alias? op_clause )
;

alias : IDENTIFIER
;

op_clause : load_clause | store_clause | filter_clause | distinct_clause
;

load_clause : LOAD^ filename ( USING! func_clause )? as_clause?
;

filename : QUOTEDSTRING
;

as_clause: AS! tuple_def
;

tuple_def : LEFT_PAREN field ( COMMA field )* RIGHT_PAREN
            -> ^( TUPLE_DEF field+ )
;

field : IDENTIFIER ( COLON type )?
     -> ^( FIELD IDENTIFIER type? )
;

type : simple_type | tuple_type | bag_type | map_type
;

simple_type : INT | LONG | FLOAT | DOUBLE | CHARARRAY | BYTEARRAY
;

tuple_type : TUPLE! tuple_def
;

bag_type : BAG! tuple_def
;

map_type : MAP! LEFT_BRACKET! RIGHT_BRACKET!
;

func_clause : func_name LEFT_PAREN func_args? RIGHT_PAREN
           -> ^( FUNC func_name func_args? )
;

func_name : IDENTIFIER ( PERIOD IDENTIFIER )*
         -> IDENTIFIER+
;

func_args : QUOTEDSTRING ( COMMA QUOTEDSTRING )*
         -> QUOTEDSTRING+
;

store_clause : STORE^ alias INTO! filename ( USING! func_clause ) ?
;

filter_clause : FILTER^ alias BY! cond { System.out.println( "in filter_clause" ); }
;

cond : or_cond
;

or_cond : and_cond  ( OR^ and_cond )*
;

and_cond : unary_cond ( AND^ unary_cond )*
;

unary_cond : LEFT_PAREN! cond RIGHT_PAREN! { System.out.println( "in uni_cond 1" ); }
           |
             expr FILTEROP^ expr { System.out.println( "in exp op exp" ); }
           |
             func_clause { System.out.println( "in uni_cond 4" ); }
           |
             null_check_cond  { System.out.println( "in uni_cond 5" ); }
           |
             not_cond { System.out.println( "in uni_cond 6" ); }
;

not_cond : NOT^ unary_cond
;

null_check_cond : expr IS! NOT? NULL^
;

expr : add_expr
;

add_expr : multi_expr ( ( PLUS | MINUS )^ multi_expr )*
;

multi_expr : cast_expr ( ( START | DIV | PERCENT )^ cast_expr )*
;

cast_expr : ( LEFT_PAREN type RIGHT_PAREN ) unary_expr
         -> ^( CAST_EXPR type unary_expr )
          | unary_expr
;

unary_expr : eval_expr | ( LEFT_PAREN! infix_expr RIGHT_PAREN! )  | neg_expr
;

eval_expr : const_expr | var_expr
;

var_expr : projectable_expr^ ( dot_proj | pound_proj )*
;

projectable_expr: func_clause | col_ref | bin_expr
;

dot_proj : PERIOD ( col_ref | ( LEFT_PAREN col_ref ( COMMA col_ref )* RIGHT_PAREN ) )
        -> ^( PERIOD col_ref+ )
;

pound_proj : POUND^ ( QUOTEDSTRING | NULL )
;

bin_expr : LEFT_PAREN cond QMARK exp1 = infix_expr COLON exp2 = infix_expr RIGHT_PAREN
        -> ^( BIN_EXPR cond $exp1 $exp2 )
;

neg_expr : MINUS^ cast_expr
;

distinct_clause : DISTINCT^ alias
;

col_ref : alias_col_ref | dollar_col_ref
;

alias_col_ref : GROUP | IDENTIFIER
;

dollar_col_ref : DOLLAR! INTEGER
;

infix_expr : add_expr
;

const_expr : scalar | map | bag | tuple
;

scalar : INTEGER | LONGINEGER | FLOATNUMBER | DOUBLENUMBER | QUOTEDSTRING | NULL
;

map : LEFT_BRACKET ( keyvalue ( COMMA keyvalue )* )? RIGHT_BRACKET
   -> ^( MAP_VAL keyvalue+ )
;

keyvalue : string_val POUND const_expr
        -> ^( KEY_VAL_PAIR string_val const_expr )
;

string_val : QUOTEDSTRING | NULL
;

bag : LEFT_CURLY ( tuple ( COMMA tuple )* )? RIGHT_CURLY
   -> ^( BAG_VAL tuple+ )
;

tuple : LEFT_PAREN ( const_expr ( COMMA const_expr )* )? RIGHT_PAREN
     -> ^( TUPLE_VAL const_expr+ )
;

PigParser (last edited 2010-10-08 22:17:52 by XuefuZhang)