-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathJFlexScannerWallStreetBets.flex
More file actions
63 lines (58 loc) · 1.64 KB
/
JFlexScannerWallStreetBets.flex
File metadata and controls
63 lines (58 loc) · 1.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
package scanner;
/**
* This file defines a simple lexer for the compilers course 2017-2018
* Comment this file
*/
import java.io.*;
%%
/* lexical functions */
/* specify that the class will be called Scanner and the function to get the next
* token is called nextToken.
*/
%class ScannerWSB
%unicode
%line
%public
%function nextToken
/* return String objects - the actual lexemes */
/* returns the String "EOF: at end of file */
%type String
%eofval{
return "EOF";
%eofval}
/**
* Pattern definitions
*/
AllLetters = [a-zA-Z]
LowercaseLetters = [a-z]
Digit = [0-9]
Punctuation = [.|_|-|\-|'|’|\"|\“|\”|:|!|?|(|)|\[|\]|/]
Symbol = [$|#|@|%|\^|&|*|+|=|<|>|~]
LineTerminator = \r|\n|\r\n
WhiteSpace = {LineTerminator} | [ \t\f]
AllCharacters = [^,]+
UserID = ("pf")({LowercaseLetters} | {Digit}){4}
SubredditID = (2th52)
Time = {Digit}{10}
PostURL = ("https://old.reddit.com/r/wallstreetbets/"){AllCharacters}
AttachmentURL = ("https://"){AllCharacters}
Domain = {AllLetters}+[.]{AllLetters}+([.]{AllLetters})?
Word = {AllLetters}+
Number = {Digit}+
%%
/**
* lexical rules
*/
{UserID} {return "USER ID: " + yytext(); }
{SubredditID} {return "SUBREDDIT ID: " + yytext();}
{Time} {return "POST CREATION TIME: " + yytext();}
{PostURL} {return "POST URL: " + yytext();}
{AttachmentURL} {return "ATTACHMENT URL: " + yytext();}
{Domain} {return "DOMAIN: " + yytext();}
{Word} {return "WORD: " + yytext(); }
{Number} {return "NUMBER: " + yytext(); }
{Punctuation} {return "PUNCTUATION: " + yytext();}
{Symbol} {return "SYMBOL: " + yytext(); }
{WhiteSpace} {}
[,] {}
[^] {return "UNKNOWN: " + yytext();}