C:/Users/Dennis/src/lang/bertrand/BERTRAND/bertrand/scanner.c

Go to the documentation of this file.
00001 /******************************************************************
00002  *
00003  * scanner for a constraint satisfaction system
00004  * EOF must be -1 for character translation to work
00005  * Entry point is scan()
00006  *
00007  ******************************************************************/
00008 
00009 #include "def.h"
00010 
00011 /* global values returned by the scanner */
00012 double token_val;               /* value of numeric token */
00013 char token_prval[MAXTOKEN + 1]; /* print value of token */
00014 OP *token_op;                   /* operator node for operator token */
00015 
00016 /* globals for printing informative error messages */
00017 int lineno = 1;                 /* current line number */
00018 int charno = 0;                 /* position in current line */
00019 
00020 /* input file pointers */
00021 FILE *infile;                   /* default to standard input */
00022 char *infilename;               /* input file name */
00023 
00024 /* used for #include file stacks */
00025 int filespushed = 0;            /* depth of #includes */
00026 FILE *infiles[MAXFILES];        /* #include file pointers */
00027 char *infilenames[MAXFILES];    /* #include file names */
00028 int inlinenos[MAXFILES];        /* line numbers */
00029 int verboses[MAXFILES];         /* verbose flags */
00030 
00031 /* lists of user defined operators, from ops.c */
00032 extern OP *single_op;           /* list of single-character operators */
00033 extern OP *double_op;           /* list of two character operators */
00034 extern OP *name_op;             /* list of alphanumeric operators */
00035 extern OP *type_op;             /* list of types */
00036 
00037 /* handle preprocessor statements */
00038 extern void preprocess();       /* from prep.c */
00039 extern void char_free();        /* from util.c */
00040 
00041 /*  Character input class translations: */
00042 /*  These definitions are in def.h */
00043 /* C_EOF        0       eof, null                       */
00044 /* C_CTRL       1       (invalid) control character     */
00045 /* C_NL         2       lf, cr, ff (lineno++)           */
00046 /* C_WS         3       blank, tab (whitespace)         */
00047 /* C_SPC        4       special character               */
00048 /* C_NUM        5       numeric 0-9                     */
00049 /* C_ALPH       6       alphabetic a-Z, underscore _    */
00050 /* C_PER        7       period .                        */
00051 /* C_DQ         8       double quote " (string)         */
00052 /* C_BQ         9       back quote ` (string escape)    */
00053 /* C_SQ         10      single quote ' (type)           */
00054 /* C_BRC        11      braces { } (rule body)          */
00055 /* C_LB         12      pound sign # (preprocessor)     */
00056 /*  special characters are all symbols such as + * & , etc. */
00057 static char transtab[] =
00058 /*    -1        0       1       2       3       4       5       6       7   */
00059     {C_EOF, C_EOF, C_CTRL, C_CTRL, C_CTRL, C_CTRL, C_CTRL, C_CTRL, C_CTRL,
00060 /* 0x08 */ C_CTRL,   C_WS,   C_NL, C_CTRL,   C_NL,   C_NL, C_CTRL, C_CTRL,
00061 /* 0x10 */ C_CTRL, C_CTRL, C_CTRL, C_CTRL, C_CTRL, C_CTRL, C_CTRL, C_CTRL,
00062 /* 0x18 */ C_CTRL, C_CTRL, C_CTRL, C_CTRL, C_CTRL, C_CTRL, C_CTRL, C_CTRL,
00063 /* 0x20 */   C_WS,  C_SPC,   C_DQ,   C_LB,  C_SPC,  C_SPC,  C_SPC,   C_SQ,
00064 /* 0x28 */  C_SPC,  C_SPC,  C_SPC,  C_SPC,  C_SPC,  C_SPC,  C_PER,  C_SPC,
00065 /* 0x30 */  C_NUM,  C_NUM,  C_NUM,  C_NUM,  C_NUM,  C_NUM,  C_NUM,  C_NUM,
00066 /* 0x38 */  C_NUM,  C_NUM,  C_SPC,  C_SPC,  C_SPC,  C_SPC,  C_SPC,  C_SPC,
00067 /* 0x40 */  C_SPC, C_ALPH, C_ALPH, C_ALPH, C_ALPH, C_ALPH, C_ALPH, C_ALPH,
00068 /* 0x48 */ C_ALPH, C_ALPH, C_ALPH, C_ALPH, C_ALPH, C_ALPH, C_ALPH, C_ALPH,
00069 /* 0x50 */ C_ALPH, C_ALPH, C_ALPH, C_ALPH, C_ALPH, C_ALPH, C_ALPH, C_ALPH,
00070 /* 0x58 */ C_ALPH, C_ALPH, C_ALPH,  C_SPC,  C_SPC,  C_SPC,  C_SPC, C_ALPH,
00071 /* 0x60 */   C_BQ, C_ALPH, C_ALPH, C_ALPH, C_ALPH, C_ALPH, C_ALPH, C_ALPH,
00072 /* 0x68 */ C_ALPH, C_ALPH, C_ALPH, C_ALPH, C_ALPH, C_ALPH, C_ALPH, C_ALPH,
00073 /* 0x70 */ C_ALPH, C_ALPH, C_ALPH, C_ALPH, C_ALPH, C_ALPH, C_ALPH, C_ALPH,
00074 /* 0x78 */ C_ALPH, C_ALPH, C_ALPH,  C_BRC,  C_SPC,  C_BRC,  C_SPC, C_CTRL};
00075 /* trans[c] is class of input character c */
00076 char *trans = transtab + 1;     /* trans[-1] == transtab[0] */
00077 
00078 /* Finite state machine states:         */
00079 #define ST  0   /* start                */
00080 #define SP  1   /* special              */
00081 #define S2  2   /* second special       */
00082 #define SL  3   /* leading period       */
00083 #define SN  4   /* number               */
00084 #define SF  5   /* fraction of number   */
00085 #define SI  6   /* identifier           */
00086 #define SS  7   /* string               */
00087 #define SE  8   /* string escape        */
00088 #define SC  9   /* comment              */
00089 #define SX 10   /* preprocessor syntaX  */
00090 
00091 #define TERM_STATE 50   /* start of terminal states */
00092 /* temporary terminal states */
00093 #define XR 51   /* Reserved char { } .  */
00094 /* scanner input errors */
00095 #define EC 55   /* Character error      */
00096 #define EN 56   /* Number error         */
00097 #define ES 57   /* String error         */
00098 /* terminal states */
00099 #define TI 61   /* Identifier           */
00100 #define TN 62   /* Number               */
00101 #define TO 63   /* single char Operator */
00102 #define T2 64   /* 2 char operator      */
00103 #define TS 65   /* String               */
00104 static char stab[11][13] = {    /* state table  */
00105 /*0   1   2   3   4   5   6   7   8   9  10  11  12
00106 EOF CTL  `n  SP   *   0   a   .   "   `  '   {}  #
00107          `f  `t   +   9   Z                             */
00108 {ST, EC, ST, ST, SP, SN, SI, SL, SS, SI, SI, XR, SX},  /* STart */
00109 {TO, TO, TO, TO, S2, TO, TO, TO, TO, TO, TO, TO, TO},  /* SPecial */
00110 {T2, T2, T2, T2, T2, T2, T2, T2, T2, T2, T2, T2, T2},  /* S 2 special */
00111 {XR, XR, XR, XR, XR, SF, XR, SC, XR, XR, XR, XR, XR},  /* S Leading decimal */
00112 {TN, TN, TN, TN, TN, SN, EN, SF, TN, TN, TN, TN, TN},  /* S Number */
00113 {TN, TN, TN, TN, TN, SF, EN, EN, TN, TN, TN, TN, TN},  /* S Fraction */
00114 {TI, TI, TI, TI, TI, SI, SI, TI, TI, TI, TI, TI, TI},  /* S Ident */
00115 {ES, SS, ES, SS, SS, SS, SS, SS, TS, SE, SS, SS, SS},  /* S String */
00116 {ES, SS, SS, SS, SS, SS, SS, SS, SS, SS, SS, SS, SS},  /* S Escape */
00117 {ST, SC, ST, SC, SC, SC, SC, SC, SC, SC, SC, SC, SC},  /* S Comment */
00118 {ST, ST, ST, SX, SX, SX, SX, SX, SX, SX, SX, SX, SX}}; /* SyntaX */
00119 
00120 /* action table */
00121 #define AU 0    /* unget */
00122 #define AE 1    /* eat character */
00123 #define AA 2    /* add to symbol */
00124 #define AN 3    /* add numeric */
00125 #define AF 4    /* add decimal fraction */
00126 #define AX 5    /* add escape */
00127 #define AS 6    /* check special */
00128 #define AR 7    /* Restart */
00129 #define AP 8    /* preprocessor */
00130 #define AC 9    /* check end of file */
00131 static char atab[11][13] = {    /* action table */
00132 /*0   1   2   3   4   5   6   7   8   9  10  11  12
00133 EOF CTL  `n  SP   *   0   a   .   "   `  '   {}  #
00134          `f  `t   +   9   Z                             */
00135 {AC, AA, AE, AE, AA, AN, AA, AA, AE, AA, AA, AA, AE},  /* STart */
00136 {AU, AU, AE, AE, AS, AU, AU, AU, AU, AU, AU, AU, AU},  /* SPecial */
00137 {AU, AU, AE, AE, AU, AU, AU, AU, AU, AU, AU, AU, AU},  /* S 2 special */
00138 {AU, AU, AE, AE, AU, AF, AU, AE, AU, AU, AU, AU, AU},  /* S Leading decimal */
00139 {AU, AU, AE, AE, AU, AN, AA, AA, AU, AU, AU, AU, AU},  /* S Number */
00140 {AU, AU, AE, AE, AU, AF, AA, AA, AU, AU, AU, AU, AU},  /* S Fraction */
00141 {AU, AU, AE, AE, AU, AA, AA, AU, AU, AU, AU, AU, AU},  /* S Ident */
00142 {AU, AA, AU, AA, AA, AA, AA, AA, AE, AE, AA, AA, AA},  /* S String */
00143 {AU, AX, AE, AX, AX, AX, AX, AX, AA, AA, AX, AX, AX},  /* S Escape */
00144 {AR, AE, AR, AE, AE, AE, AE, AE, AE, AE, AE, AE, AE},  /* S Comment */
00145 {AP, AP, AP, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA}}; /* SyntaX */
00146 
00147 /*****************************************************************
00148  * Finite state automaton scanner.  Reads infile.
00149  *
00150  * exit:        returns token type (defined in def.h)
00151  *              sets global values:
00152  *              token_prval - print name of token
00153  *              token_val - if token is of type NUMBER
00154  *              token_op - if token is of type OPER
00155  *
00156  *****************************************************************/
00157 int
00158 scan()
00159 {
00160 
00161 /* static variables, initial values used only first time scanner called */
00162 static int class = C_NL;        /* input char class, initially newline */
00163 static int c = '\n';            /* last character read */
00164 
00165 /* these variables are reinitialized every time scanner is called */
00166 register int state = ST;        /* current state, initially start */
00167 register int tlength = 0;       /* length of token, initially zero */
00168 register double fvalue = 0.0;   /* floating point value */
00169 register double place = 0.1;    /* place after decimal point */
00170 
00171 #ifdef DEBUG
00172 printf("enter scanner: state %d, class %d, char '%c'\n", state, class, c);
00173 fflush(stdout);
00174 #endif
00175 
00176 /* states >= TERM_STATE mean that a token has been recognized */
00177 while (state < TERM_STATE) {
00178 #ifdef DEBUG
00179 printf("scanner: action %d,", atab[state][class]);
00180 fflush(stdout);
00181 #endif
00182 
00183    switch(atab[state][class]) { /* perform action */
00184         case AA: L1: token_prval[tlength++] = c;        /* add */
00185                  if (tlength>MAXTOKEN) tlength = MAXTOKEN;
00186                  /* fall through */
00187         case AE: c = getc(infile);                      /* eat */
00188                  if (C_NL==trans[c]) { lineno++; charno = 0; }
00189                  else charno++;
00190                  /* fall through */
00191         case AU: break;                 /* do nothing for unget */
00192         case AN: fvalue = (fvalue * 10) + (c-'0');      /* numeric */
00193                  goto L1;
00194         case AF: fvalue += (c-'0') * place;             /* decimal fraction */
00195                  place /= 10;
00196                  goto L1;
00197         case AX: switch(c) {                            /* escape */
00198                     case 'n': c = '\n'; break;  /* newline */
00199                     case 't': c = '\t'; break;  /* tab */
00200                     case 'b': c = '\b'; break;  /* backspace */
00201                     case 'r': c = '\r'; break;  /* return */
00202                     case 'f': c = '\f'; break;  /* formfeed */
00203                     default:
00204                         fprintf(stderr, "character '%c':\n", c);
00205                         error("illegal string escape");
00206                     } /* end switch */
00207                  goto L1;       /* add character */
00208         case AS:                /* check for special operator */
00209             /* see if *token_prval and c form a double operator */
00210             for (token_op = double_op; token_op; token_op = token_op->next) {
00211                 if (*token_prval == token_op->pname[0] && 
00212                   c == token_op->pname[1]) 
00213                     goto L1;    /* found, do add action */
00214                 }
00215             /* if not double op, *token_prval must be single char oper */
00216             token_prval[1] = '\0';
00217 #           ifdef DEBUG
00218             printf("\tchange state to %d\n", TO);
00219             fflush(stdout);
00220 #           endif
00221             goto L2;            /* exit to terminal state TO */
00222         case AP:                /* interpret preprocessor statement */
00223             token_prval[tlength++] = '\n';
00224             token_prval[tlength] = '\0';
00225             preprocess();
00226             /* fall through */
00227         case AR:                /* restart, throw away token */
00228             tlength = 0;
00229             break;              /* unget */
00230         case AC:                /* end of file */
00231             if (filespushed) {
00232                 fclose(infile);
00233                 filespushed--;
00234                 char_free(infilename);  /* free character string */
00235                 infile = infiles[filespushed];
00236                 infilename = infilenames[filespushed];
00237                 lineno = inlinenos[filespushed];
00238                 verbose = verboses[filespushed];
00239                 c = '\n';
00240                 charno = 0;
00241 #               ifdef DEBUG
00242                 printf("back to file %s, line %d\n", infilename, lineno);
00243 #               endif
00244                 }
00245             else {
00246                 class = C_NL;
00247                 c = '\n';
00248                 return EOF;
00249                 }
00250 
00251      } /* end switch */
00252 
00253    state = stab[state][class];  /* calculate new state */
00254    class = trans[c];
00255 #ifdef DEBUG
00256 printf(" new state %d,", state);
00257 if (c != EOF) printf(" next char '%c',", c);
00258 else printf(" next char 'EOF',");
00259 printf(" next class %d\n", class);
00260 fflush(stdout);
00261 #endif
00262   }     /* if you leave this loop, you have a token */
00263 
00264 token_prval[tlength] = '\0';    /* null terminate token print name */
00265 
00266 #ifdef DEBUG
00267 printf("terminal state %d, token is '%s'\n", state, token_prval);
00268 fflush(stdout);
00269 #endif
00270 
00271 if (tlength > MAXTOKEN) switch(state) {
00272         case TS: error("string too long");
00273         case TI: error("name too long");
00274         case TN: error("number too big");
00275         default: error("object too long");
00276         }
00277 
00278 switch(state) {
00279  case TI:       /* identifier (could be a name operator) */
00280         if (*token_prval == '\'') {     /* a type */
00281             for (token_op = type_op; token_op; token_op = token_op->next) {
00282                 if (strcmp(token_prval+1, token_op->pname) == 0) return TYPE;
00283                 }
00284             fprintf(stderr, "type name %s not declared\n", token_prval);
00285             error("invalid type");
00286             }
00287         for (token_op = name_op; token_op; token_op = token_op->next) {
00288             if (strcmp(token_prval, token_op->pname) == 0) return OPER;
00289             }
00290         /* otherwise */ return IDENT;
00291 
00292  case TO:       /* single character operator */
00293         L2:     /* came from AS */
00294         for (token_op = single_op; token_op; token_op = token_op->next) {
00295            if (*token_prval == token_op->pname[0]) return OPER;
00296            }
00297         /* error, special char that is not an operator */
00298         fprintf(stderr, "character is: '%c'\n", *token_prval);
00299         error("invalid character");
00300  case T2:       /* two character operator */
00301         /* token_op was set in AS above */
00302         return OPER;
00303  case TN:       /* number */
00304         token_val = fvalue;
00305         return NUMBER;
00306  case TS:       /* string */
00307         return STRING;
00308  case XR:       /* reserved character . { } */
00309         return (int) *token_prval;
00310  case EC:
00311         error("illegal character");
00312  case EN:
00313         error("mis-formed number (or possibly name)");
00314  case ES:
00315         error("unterminated string");
00316         }
00317 }

Generated on Fri Jan 25 09:58:43 2008 for Bertrand by  doxygen 1.5.4