// antlr4 -Dlanguage=Python3 ./dateparse_utils.g4 // Hi, self. In ANTLR grammars, there are two separate types of symbols: those // for the lexer and those for the parser. The former begin with a CAPITAL // whereas the latter begin with lowercase. The order of the lexer symbols // is the order that the lexer will recognize them in. There's a good tutorial // on this shit at: // // https://tomassetti.me/antlr-mega-tutorial/ // // There are also a zillion premade grammars at: // // https://github.com/antlr/grammars-v4 grammar dateparse_utils; parse: dateExpr ; dateExpr : singleDateExpr | baseAndOffsetDateExpr ; singleDateExpr : monthDayMaybeYearExpr | dayMonthMaybeYearExpr | yearMonthDayExpr | specialDateMaybeYearExpr | nthWeekdayInMonthMaybeYearExpr | firstLastWeekdayInMonthMaybeYearExpr ; monthDayMaybeYearExpr : monthExpr DIV* dayOfMonth (DIV* year)? ; dayMonthMaybeYearExpr : dayOfMonth DIV* monthName (DIV* year)? ; yearMonthDayExpr : year DIV* monthName DIV* dayOfMonth ; nthWeekdayInMonthMaybeYearExpr : nth dayName ('in'|'of') monthName (DIV* year)? ; firstLastWeekdayInMonthMaybeYearExpr : firstOrLast dayName ('in'|'of'|DIV)? monthName (DIV* year)? ; specialDateMaybeYearExpr : specialDate (DIV* year)? ; baseAndOffsetDateExpr : baseDate deltaPlusMinusExpr | deltaPlusMinusExpr baseDate ; baseDate: singleDateExpr ; deltaPlusMinusExpr: deltaInt deltaUnit deltaBeforeAfter? ; deltaUnit: (WEEK|DAY|SUN|WEEKDAY) ; deltaBeforeAfter: (BEFORE|AFTER) ; monthExpr : monthName | monthNumber ; year: DIGIT DIGIT DIGIT DIGIT ; specialDate: SPECIAL_DATE ; dayOfMonth: DIGIT? DIGIT ('st'|'nd'|'rd'|'th')? ; firstOrLast: (FIRST|LAST) ; nth: DIGIT ('st'|'nd'|'rd'|'th')? ; deltaInt: ('+'|'-')? DIGIT+ ; dayName: WEEKDAY ; monthName: MONTH ; monthNumber: DIGIT? DIGIT ; // ---------------------------------- COMMENT: '#' ~[\r\n]* -> skip ; SPACE: [ \t\r\n] -> skip ; THE: 'the' -> skip ; DIV: ('/'|','|'.') ; MONTH: (JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC) ; JAN : 'jan' | 'january' ; FEB : 'feb' | 'february' ; MAR : 'mar' | 'march' ; APR : 'apr' | 'april' ; MAY : 'may' ; JUN : 'jun' | 'june' ; JUL : 'jul' | 'july' ; AUG : 'aug' | 'august' ; SEP : 'sep' | 'sept' | 'september' ; OCT : 'oct' | 'october' ; NOV : 'nov' | 'november' ; DEC : 'dec' | 'december' ; WEEKDAY: (SUN|MON|TUE|WED|THU|FRI|SAT) ; SUN : 'sun' | 'suns' | 'sunday' | 'sundays' ; MON : 'mon' | 'mons' | 'monday' | 'mondays' ; TUE : 'tue' | 'tues' | 'tuesday' | 'tuesdays' ; WED : 'wed' | 'weds' | 'wednesday' | 'wednesdays' ; THU : 'thu' | 'thur' | 'thurs' | 'thursday' | 'thursdays' ; FRI : 'fri' | 'fris' | 'friday' | 'fridays' ; SAT : 'sat' | 'sats' | 'saturday' | 'saturdays' ; WEEK : 'week' | 'weeks' ; DAY : 'day' | 'days' ; SPECIAL_DATE : TODAY | NEW_YEARS_EVE | NEW_YEARS_DAY | MARTIN_LUTHER_KING_DAY | PRESIDENTS_DAY | EASTER | MEMORIAL_DAY | INDEPENDENCE_DAY | LABOR_DAY | COLUMBUS_DAY | VETERANS_DAY | THANKSGIVING_DAY | CHRISTMAS_EVE | CHRISTMAS ; // today TODAY : 'today' ; // easte EASTER : 'easter' | 'easter sunday' ; // newye NEW_YEARS_DAY : 'new years' | 'new years day' | 'new year\'s' | 'new year\'s day' ; // newyeeve NEW_YEARS_EVE : 'nye' | 'new years eve' | 'new year\'s eve' ; // chris CHRISTMAS : 'christmas' | 'christmas day' | 'xmas' | 'xmas day' ; // chriseve CHRISTMAS_EVE : 'christmas eve' | 'xmas eve' ; // mlk MARTIN_LUTHER_KING_DAY : 'martin luther king day' | 'mlk day' | 'mlk' ; // memor MEMORIAL_DAY : 'memorial' | 'memorial day' ; // indep INDEPENDENCE_DAY : 'independence day' ; // labor LABOR_DAY : 'labor' | 'labor day' ; // presi PRESIDENTS_DAY : 'presidents\' day' | 'president\'s day' | 'presidents day' | 'presidents' | 'president\'s' | 'presidents\'' ; // colum COLUMBUS_DAY : 'columbus' | 'columbus day' | 'indiginous peoples day' | 'indiginous peoples\' day' ; // veter VETERANS_DAY : 'veterans' | 'veterans day' | 'veterans\' day' ; // thank THANKSGIVING_DAY : 'thanksgiving' | 'thanksgiving day' ; FIRST: 'first' ; LAST: 'last' ; BEFORE: 'before' ; AFTER: ('after'|'from') ; DIGIT: ('0'..'9') ;