/*-------------------------------------------------------------------*/ /* dfa.c */ /* Illustrate lexical analysis using a deterministic finite */ /* automaton (DFA) */ /*-------------------------------------------------------------------*/ #include "list.h" #include #include #include #define BOOLEAN int #define TRUE 1 #define FALSE 0 #define MAX_LINE_SIZE 2048 #define MAX_TOKEN_SIZE 1024 enum LexState {START_STATE, IN_OCT_OR_HEX_CONST_STATE, IN_DEC_CONST_STATE, IN_OCT_CONST_STATE, IN_HEX_CONST_STATE, DEC_CONST_FOUND_STATE, OCT_CONST_FOUND_STATE, HEX_CONST_FOUND_STATE, NO_TOKEN_FOUND_STATE}; BOOLEAN isNewline(int iChar) { return iChar == '\n'; } BOOLEAN isZero(int iChar) { return iChar == '0'; } BOOLEAN isDigit(int iChar) { return (iChar >= '0') && (iChar <= '9'); } BOOLEAN isNonzeroDigit(int iChar) { return (iChar >= '1') && (iChar <= '9'); } BOOLEAN isOctalDigit(int iChar) { return (iChar >= '0') && (iChar <= '7'); } BOOLEAN isHexDigit(int iChar) { return ((iChar >= '0') && (iChar <= '9')) || ((iChar >= 'a') && (iChar <= 'f')) || ((iChar >= 'A') && (iChar <= 'F')); } BOOLEAN isHexIndicator(int iChar) { return (iChar == 'x') || (iChar == 'X'); } char *getToken(char **ppcCurrentChar) { enum LexState iState = START_STATE; static char pcBuffer[MAX_TOKEN_SIZE]; int i = 0; int iChar; char *pcToken; while (TRUE) { switch (iState) { case START_STATE: iChar = **ppcCurrentChar; ++(*ppcCurrentChar); if (isNewline(iChar)) { iState = NO_TOKEN_FOUND_STATE; } else if (isZero(iChar)) { pcBuffer[i++] = (char)iChar; iState = IN_OCT_OR_HEX_CONST_STATE; } else if (isNonzeroDigit(iChar)) { pcBuffer[i++] = (char)iChar; iState = IN_DEC_CONST_STATE; } else { iState = START_STATE; } break; case IN_OCT_OR_HEX_CONST_STATE: iChar = **ppcCurrentChar; ++(*ppcCurrentChar); if (isNewline(iChar)) { --(*ppcCurrentChar); iState = DEC_CONST_FOUND_STATE; } else if (isOctalDigit(iChar)) { pcBuffer[i++] = (char)iChar; iState = IN_OCT_CONST_STATE; } else if (isHexIndicator(iChar)) { pcBuffer[i++] = (char)iChar; iState = IN_HEX_CONST_STATE; } else { --(*ppcCurrentChar); iState = DEC_CONST_FOUND_STATE; } break; case IN_DEC_CONST_STATE: iChar = **ppcCurrentChar; ++(*ppcCurrentChar); if (isNewline(iChar)) { --(*ppcCurrentChar); iState = DEC_CONST_FOUND_STATE; } else if (isDigit(iChar)) { pcBuffer[i++] = (char)iChar; iState = IN_DEC_CONST_STATE; } else { --(*ppcCurrentChar); iState = DEC_CONST_FOUND_STATE; } break; case IN_OCT_CONST_STATE: iChar = **ppcCurrentChar; ++(*ppcCurrentChar); if (isNewline(iChar)) { --(*ppcCurrentChar); iState = OCT_CONST_FOUND_STATE; } else if (isOctalDigit(iChar)) { pcBuffer[i++] = (char)iChar; iState = IN_OCT_CONST_STATE; } else { --(*ppcCurrentChar); iState = OCT_CONST_FOUND_STATE; } break; case IN_HEX_CONST_STATE: iChar = **ppcCurrentChar; ++(*ppcCurrentChar); if (isNewline(iChar)) { --(*ppcCurrentChar); iState = HEX_CONST_FOUND_STATE; } else if (isHexDigit(iChar)) { pcBuffer[i++] = (char)iChar; iState = IN_HEX_CONST_STATE; } else { --(*ppcCurrentChar); iState = HEX_CONST_FOUND_STATE; } break; case DEC_CONST_FOUND_STATE: pcBuffer[i++] = '\0'; pcToken = (char*)malloc(i); strcpy(pcToken, pcBuffer); return pcToken; case OCT_CONST_FOUND_STATE: pcBuffer[i++] = '\0'; pcToken = (char*)malloc(i); strcpy(pcToken, pcBuffer); return pcToken; case HEX_CONST_FOUND_STATE: pcBuffer[i++] = '\0'; pcToken = (char*)malloc(i); strcpy(pcToken, pcBuffer); return pcToken; case NO_TOKEN_FOUND_STATE: return NULL; } } } void printToken(void **ppvToken, void *pvExtra) { char *pcToken = *((char**)ppvToken); printf("%s\n", pcToken); } void freeToken(void **ppvToken, void *pvExtra) { char *pcToken = *((char**)ppvToken); free(pcToken); } int main(int argc, char *argv[]) /* Read a C program from stdin, and write the integer constants that it contains to stdout. For simplicity, assume that the C program contains no: -- Invalid integer constants. -- Floating point constants. -- Integer constants that use a long-suffix (L or l) or an unsigned-suffix (U or u). -- Comments that contain characters that could be construed as integer constants. */ { char *pcToken; List_T oTokenList; char pcLine[MAX_LINE_SIZE]; char *pcCurrentChar; oTokenList = List_new(); while (fgets(pcLine, MAX_LINE_SIZE, stdin) != NULL) { pcCurrentChar = pcLine; while ((pcToken = getToken(&pcCurrentChar)) != NULL) List_addLast(oTokenList, pcToken); } printf("Integer constants:\n"); List_map(oTokenList, printToken, NULL); List_map(oTokenList, freeToken, NULL); return 0; }