X-Git-Url: https://wannabe.guru.org/gitweb/?a=blobdiff_plain;f=dateparse%2Fdateparse_utils.g4;h=364aa0f985592f84deb1e41b5bcd09c3c7126873;hb=e46158e49121b8a955bb07b73f5bcf9928b79c90;hp=86a9cd40c4dfb7e2e13aae870551033cc248673c;hpb=497fb9e21f45ec08e1486abaee6dfa7b20b8a691;p=python_utils.git diff --git a/dateparse/dateparse_utils.g4 b/dateparse/dateparse_utils.g4 index 86a9cd4..364aa0f 100644 --- a/dateparse/dateparse_utils.g4 +++ b/dateparse/dateparse_utils.g4 @@ -1,5 +1,7 @@ +// © Copyright 2021-2022, Scott Gasch +// // antlr4 -Dlanguage=Python3 ./dateparse_utils.g4 - +// // Hi, self. In ANTLR grammars, there are two separate types of symbols: those // for the lexer and those for the parser. The former begin with a CAPITAL // whereas the latter begin with lowercase. The order of the lexer symbols @@ -14,13 +16,39 @@ grammar dateparse_utils; -parse: dateExpr ; +parse + : SPACE* dateExpr + | SPACE* timeExpr + | SPACE* dateExpr SPACE* dtdiv? SPACE* timeExpr + | SPACE* timeExpr SPACE* tddiv? SPACE+ dateExpr + ; dateExpr : singleDateExpr | baseAndOffsetDateExpr ; +timeExpr + : singleTimeExpr + | baseAndOffsetTimeExpr + ; + +singleTimeExpr + : twentyFourHourTimeExpr + | twelveHourTimeExpr + | specialTimeExpr + ; + +twentyFourHourTimeExpr + : hour ((SPACE|tdiv)+ minute ((SPACE|tdiv)+ second ((SPACE|tdiv)+ micros)? )? )? SPACE* tzExpr? + ; + +twelveHourTimeExpr + : hour ((SPACE|tdiv)+ minute ((SPACE|tdiv)+ second ((SPACE|tdiv)+ micros)? )? )? SPACE* ampm SPACE* tzExpr? + ; + +ampm: ('a'|'am'|'p'|'pm'|'AM'|'PM'|'A'|'P'); + singleDateExpr : monthDayMaybeYearExpr | dayMonthMaybeYearExpr @@ -28,45 +56,92 @@ singleDateExpr | specialDateMaybeYearExpr | nthWeekdayInMonthMaybeYearExpr | firstLastWeekdayInMonthMaybeYearExpr + | deltaDateExprRelativeToTodayImplied + | dayName (SPACE|ddiv)+ monthDayMaybeYearExpr (SPACE|ddiv)* singleTimeExpr* + | dayName ; monthDayMaybeYearExpr - : monthExpr DIV* dayOfMonth (DIV* year)? + : monthExpr (SPACE|ddiv)+ dayOfMonth ((SPACE|ddiv)+ year)? ; dayMonthMaybeYearExpr - : dayOfMonth DIV* monthName (DIV* year)? + : dayOfMonth (SPACE|ddiv)+ monthName ((SPACE|ddiv)+ year)? ; yearMonthDayExpr - : year DIV* monthName DIV* dayOfMonth + : year (SPACE|ddiv)+ monthExpr (SPACE|ddiv)+ dayOfMonth ; nthWeekdayInMonthMaybeYearExpr - : nth dayName ('in'|'of') monthName (DIV* year)? + : nth SPACE+ dayName SPACE+ ('in'|'of') SPACE+ monthName ((ddiv|SPACE)+ year)? ; firstLastWeekdayInMonthMaybeYearExpr - : firstOrLast dayName ('in'|'of'|DIV)? monthName (DIV* year)? + : firstOrLast SPACE+ dayName (SPACE+ ('in'|'of'))? SPACE+ monthName ((ddiv|SPACE)+ year)? ; specialDateMaybeYearExpr - : specialDate (DIV* year)? + : (thisNextLast SPACE+)? specialDate ((SPACE|ddiv)+ year)? ; +thisNextLast: (THIS|NEXT|LAST) ; + baseAndOffsetDateExpr - : baseDate deltaPlusMinusExpr - | deltaPlusMinusExpr baseDate + : baseDate SPACE+ deltaPlusMinusExpr + | deltaPlusMinusExpr SPACE+ baseDate + ; + +deltaDateExprRelativeToTodayImplied + : nFoosFromTodayAgoExpr + | deltaRelativeToTodayExpr + ; + +deltaRelativeToTodayExpr + : thisNextLast SPACE+ deltaUnit + ; + +nFoosFromTodayAgoExpr + : unsignedInt SPACE+ deltaUnit SPACE+ AGO_FROM_NOW ; baseDate: singleDateExpr ; -deltaPlusMinusExpr: deltaInt deltaUnit deltaBeforeAfter? ; +baseAndOffsetTimeExpr + : deltaPlusMinusTimeExpr SPACE+ baseTime + | baseTime SPACE+ deltaPlusMinusTimeExpr + ; + +baseTime: singleTimeExpr ; + +deltaPlusMinusExpr + : nth SPACE+ deltaUnit (SPACE+ deltaBeforeAfter)? + ; + +deltaNextLast + : (NEXT|LAST) ; -deltaUnit: (WEEK|DAY|SUN|WEEKDAY) ; +deltaPlusMinusTimeExpr + : countUnitsBeforeAfterTimeExpr + | fractionBeforeAfterTimeExpr + ; + +countUnitsBeforeAfterTimeExpr + : nth (SPACE+ deltaTimeUnit)? SPACE+ deltaTimeBeforeAfter + ; + +fractionBeforeAfterTimeExpr + : deltaTimeFraction SPACE+ deltaTimeBeforeAfter + ; + +deltaUnit: (YEAR|MONTH|WEEK|DAY|WEEKDAY|WORKDAY) ; + +deltaTimeUnit: (SECOND|MINUTE|HOUR|WORKDAY) ; deltaBeforeAfter: (BEFORE|AFTER) ; +deltaTimeBeforeAfter: (BEFORE|AFTER) ; + monthExpr : monthName | monthNumber @@ -76,137 +151,318 @@ year: DIGIT DIGIT DIGIT DIGIT ; specialDate: SPECIAL_DATE ; -dayOfMonth: DIGIT? DIGIT ('st'|'nd'|'rd'|'th')? ; +dayOfMonth + : DIGIT DIGIT? ('st'|'ST'|'nd'|'ND'|'rd'|'RD'|'th'|'TH')? + | KALENDS (SPACE+ 'of')? + | IDES (SPACE+ 'of')? + | NONES (SPACE+ 'of')? + ; firstOrLast: (FIRST|LAST) ; -nth: DIGIT ('st'|'nd'|'rd'|'th')? ; +nth: (DASH|PLUS)? DIGIT+ ('st'|'ST'|'nd'|'ND'|'rd'|'RD'|'th'|'TH')? ; + +unsignedInt: DIGIT+ ; -deltaInt: ('+'|'-')? DIGIT+ ; +deltaTimeFraction: DELTA_TIME_FRACTION ; + +specialTimeExpr: specialTime (SPACE+ tzExpr)? ; + +specialTime: SPECIAL_TIME ; dayName: WEEKDAY ; -monthName: MONTH ; +monthName: MONTH_NAME ; + +monthNumber: DIGIT DIGIT? ; + +hour: DIGIT DIGIT? ; -monthNumber: DIGIT? DIGIT ; +minute: DIGIT DIGIT ; + +second: DIGIT DIGIT ; + +micros: DIGIT DIGIT? DIGIT? DIGIT? DIGIT? DIGIT? DIGIT? ; + +ddiv: (SLASH|DASH|',') ; + +tdiv: (COLON|DOT) ; + +dtdiv: ('T'|'t'|'at'|'AT'|','|';') ; + +tddiv: ('on'|'ON'|','|';') ; + +tzExpr + : ntz + | ltz + ; + +ntz: (PLUS|DASH) DIGIT DIGIT COLON? DIGIT DIGIT ; + +ltz: UPPERCASE_STRING ; // ---------------------------------- +SPACE: [ \t\r\n] ; + COMMENT: '#' ~[\r\n]* -> skip ; -SPACE: [ \t\r\n] -> skip ; +THE: ('the'|'The') SPACE* -> skip ; + +DASH: '-' ; + +PLUS: '+' ; -THE: 'the' -> skip ; +SLASH: '/' ; -DIV: ('/'|','|'.') ; +DOT: '.' ; -MONTH: (JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC) ; +COLON: ':' ; + +MONTH_NAME: (JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC) ; JAN : 'jan' + | 'Jan' + | 'JAN' + | 'January' | 'january' ; FEB : 'feb' + | 'Feb' + | 'FEB' + | 'February' | 'february' ; MAR : 'mar' + | 'Mar' + | 'MAR' + | 'March' | 'march' ; APR : 'apr' + | 'Apr' + | 'APR' + | 'April' | 'april' ; MAY : 'may' + | 'May' + | 'MAY' ; JUN : 'jun' + | 'Jun' + | 'JUN' + | 'June' | 'june' ; JUL : 'jul' + | 'Jul' + | 'JUL' + | 'July' | 'july' ; AUG : 'aug' + | 'Aug' + | 'AUG' + | 'August' | 'august' ; SEP : 'sep' + | 'Sep' + | 'SEP' | 'sept' + | 'Sept' + | 'SEPT' + | 'September' | 'september' ; OCT : 'oct' + | 'Oct' + | 'OCT' + | 'October' | 'october' ; NOV : 'nov' + | 'Nov' + | 'NOV' + | 'November' | 'november' ; DEC : 'dec' + | 'Dec' + | 'DEC' + | 'December' | 'december' ; WEEKDAY: (SUN|MON|TUE|WED|THU|FRI|SAT) ; SUN : 'sun' + | 'Sun' + | 'SUN' | 'suns' + | 'Suns' + | 'SUNS' | 'sunday' + | 'Sunday' | 'sundays' + | 'Sundays' ; MON : 'mon' + | 'Mon' + | 'MON' | 'mons' + | 'Mons' + | 'MONS' | 'monday' + | 'Monday' | 'mondays' + | 'Mondays' ; TUE : 'tue' + | 'Tue' + | 'TUE' | 'tues' + | 'Tues' + | 'TUES' | 'tuesday' + | 'Tuesday' | 'tuesdays' + | 'Tuesdays' ; WED : 'wed' + | 'Wed' + | 'WED' | 'weds' + | 'Weds' + | 'WEDS' | 'wednesday' + | 'Wednesday' | 'wednesdays' + | 'Wednesdays' ; THU : 'thu' + | 'Thu' + | 'THU' | 'thur' + | 'Thur' + | 'THUR' | 'thurs' + | 'Thurs' + | 'THURS' | 'thursday' + | 'Thursday' | 'thursdays' + | 'Thursdays' ; FRI : 'fri' + | 'Fri' + | 'FRI' | 'fris' + | 'Fris' + | 'FRIS' | 'friday' + | 'Friday' | 'fridays' + | 'Fridays' ; SAT : 'sat' + | 'Sat' + | 'SAT' | 'sats' + | 'Sats' + | 'SATS' | 'saturday' + | 'Saturday' | 'saturdays' + | 'Saturdays' ; WEEK : 'week' + | 'Week' | 'weeks' + | 'Weeks' + | 'wks' ; DAY : 'day' + | 'Day' | 'days' + | 'Days' + ; + +HOUR + : 'hour' + | 'Hour' + | 'hours' + | 'Hours' + | 'hrs' + ; + +MINUTE + : 'min' + | 'Min' + | 'MIN' + | 'mins' + | 'Mins' + | 'MINS' + | 'minute' + | 'Minute' + | 'minutes' + | 'Minutes' + ; + +SECOND + : 'sec' + | 'Sec' + | 'SEC' + | 'secs' + | 'Secs' + | 'SECS' + | 'second' + | 'Second' + | 'seconds' + | 'Seconds' + ; + +MONTH + : 'month' + | 'Month' + | 'months' + | 'Months' + ; + +YEAR + : 'year' + | 'Year' + | 'years' + | 'Years' + | 'yrs' ; SPECIAL_DATE : TODAY + | YESTERDAY + | TOMORROW | NEW_YEARS_EVE | NEW_YEARS_DAY | MARTIN_LUTHER_KING_DAY @@ -217,73 +473,120 @@ SPECIAL_DATE | LABOR_DAY | COLUMBUS_DAY | VETERANS_DAY + | HALLOWEEN | THANKSGIVING_DAY | CHRISTMAS_EVE | CHRISTMAS ; +SPECIAL_TIME + : NOON + | MIDNIGHT + ; + +NOON + : ('noon'|'Noon'|'midday'|'Midday') + ; + +MIDNIGHT + : ('midnight'|'Midnight') + ; + // today TODAY - : 'today' + : ('today'|'Today'|'now'|'Now') + ; + +// yeste +YESTERDAY + : ('yesterday'|'Yesterday') + ; + +// tomor +TOMORROW + : ('tomorrow'|'Tomorrow') ; // easte EASTER - : 'easter' - | 'easter sunday' + : 'easter' SUN? + | 'Easter' SUN? ; // newye NEW_YEARS_DAY : 'new years' + | 'New Years' | 'new years day' + | 'New Years Day' | 'new year\'s' + | 'New Year\'s' | 'new year\'s day' + | 'New year\'s Day' ; // newyeeve NEW_YEARS_EVE : 'nye' + | 'NYE' | 'new years eve' + | 'New Years Eve' | 'new year\'s eve' + | 'New Year\'s Eve' ; // chris CHRISTMAS : 'christmas' + | 'Christmas' | 'christmas day' + | 'Christmas Day' | 'xmas' + | 'Xmas' | 'xmas day' + | 'Xmas Day' ; // chriseve CHRISTMAS_EVE : 'christmas eve' + | 'Christmas Eve' | 'xmas eve' + | 'Xmas Eve' ; // mlk MARTIN_LUTHER_KING_DAY : 'martin luther king day' + | 'Martin Luther King Day' | 'mlk day' + | 'MLK Day' + | 'MLK day' | 'mlk' + | 'MLK' ; // memor MEMORIAL_DAY : 'memorial' + | 'Memorial' | 'memorial day' + | 'Memorial Day' ; // indep INDEPENDENCE_DAY : 'independence day' + | 'Independence day' + | 'Independence Day' ; // labor LABOR_DAY : 'labor' + | 'Labor' | 'labor day' + | 'Labor Day' ; // presi @@ -294,6 +597,12 @@ PRESIDENTS_DAY | 'presidents' | 'president\'s' | 'presidents\'' + | 'Presidents\' Day' + | 'President\'s Day' + | 'Presidents Day' + | 'Presidents' + | 'President\'s' + | 'Presidents\'' ; // colum @@ -302,6 +611,10 @@ COLUMBUS_DAY | 'columbus day' | 'indiginous peoples day' | 'indiginous peoples\' day' + | 'Columbus' + | 'Columbus Day' + | 'Indiginous Peoples Day' + | 'Indiginous Peoples\' Day' ; // veter @@ -309,20 +622,62 @@ VETERANS_DAY : 'veterans' | 'veterans day' | 'veterans\' day' + | 'Veterans' + | 'Veterans Day' + | 'Veterans\' Day' + ; + +// hallo +HALLOWEEN + : 'halloween' + | 'Halloween' ; // thank THANKSGIVING_DAY : 'thanksgiving' | 'thanksgiving day' + | 'Thanksgiving' + | 'Thanksgiving Day' ; -FIRST: 'first' ; +FIRST: ('first'|'First') ; + +LAST: ('last'|'Last'|'this past') ; + +THIS: ('this'|'This'|'this coming') ; + +NEXT: ('next'|'Next') ; -LAST: 'last' ; +AGO_FROM_NOW: (AGO|FROM_NOW) ; -BEFORE: 'before' ; +AGO: ('ago'|'Ago'|'back'|'Back') ; -AFTER: ('after'|'from') ; +FROM_NOW: ('from now'|'From Now') ; + +BEFORE: ('to'|'To'|'before'|'Before'|'til'|'until'|'Until') ; + +AFTER: ('after'|'After'|'from'|'From'|'past'|'Past') ; + +DELTA_TIME_FRACTION: ('quarter'|'Quarter'|'half'|'Half') ; DIGIT: ('0'..'9') ; + +IDES: ('ides'|'Ides') ; + +NONES: ('nones'|'Nones') ; + +KALENDS: ('kalends'|'Kalends') ; + +WORKDAY + : 'workday' + | 'workdays' + | 'work days' + | 'working days' + | 'Workday' + | 'Workdays' + | 'Work Days' + | 'Working Days' + ; + +UPPERCASE_STRING: [A-Z]+ ;