#include /* global variables... * ...could be avoided */ char nextChar; /* next character read from input stream */ int charClass; /* character class of nextChar */ /* character classes: */ #define LETTER 0 /* [a-zA-Z] */ #define DIGIT 1 /* [0-9] */ #define UNKNOWN 2 /* .* */ /* also EOF from stdio.h (which is -1) */ /* tokens: */ #define GENERIC_TOKEN 1 #define INTEGER_LITERAL 2 #define UNKNOWN_TOKEN 3 #define INT_DATA_TYPE_TOKEN 4 #define MAXLENGTH 100 char lexeme[MAXLENGTH]; int lexemeLength; /* getChar() function gets the next character and * determines its character class */ void getChar() { /*** Fill in code here to get the next character *** from input in global variable nextChar ***/ nextChar = getc(stdin); /* Determine what the next character is... */ if (nextChar == EOF) charClass = EOF; else if (isalpha(nextChar)) charClass = LETTER; else if (isdigit(nextChar)) charClass = DIGIT; else charClass = UNKNOWN; } /* addChar() function adds nextChar to lexeme */ void addChar() { if (lexemeLength < MAXLENGTH) { lexeme[lexemeLength++] = nextChar; lexeme[lexemeLength] = '\0'; } else fprintf(stderr, "ERROR: Lexeme is too long\n"); } /* lookup() function returns token for keywords, * identifiers, etc. */ int lookup(char *lexeme) { if ( strcmp( lexeme, "int" ) == 0 ) return INT_DATA_TYPE_TOKEN; else return GENERIC_TOKEN; } /* lex -- a simple lexical analyzer in C */ int lex() { lexemeLength = 0; static int first = 1; if (first) { /* if we're just starting up, */ getChar(); /* get the first character from */ first = 0; /* the input stream */ } /* Keep reading input until a non-whitespace char */ while (isspace(nextChar)) getChar(); /* lex() function continued... */ switch (charClass) { case LETTER: addChar(); getChar(); while (charClass == LETTER || charClass == DIGIT) { addChar(); getChar(); } return lookup(lexeme); case DIGIT: addChar(); getChar(); while (charClass == DIGIT) { addChar(); getChar(); } return INTEGER_LITERAL; case UNKNOWN: addChar(); getChar(); return UNKNOWN_TOKEN; case EOF: return EOF; } } main() { int x; do { x = lex(); /* printf("[%s] %d\n", lexeme, x); */ printf("[%s]", lexeme); if (x == GENERIC_TOKEN) printf("GENERIC_LEXEME\n"); else if (x == INTEGER_LITERAL) printf("INTEGER_LITERAL\n"); else if (x == INT_DATA_TYPE_TOKEN) printf("INT_DATA_TYPE_TOKEN\n"); else printf("UNKNOWN TOKEN\n"); } while (x != EOF); }