637 lines
12 KiB
C
637 lines
12 KiB
C
/* Jim Noble at Planning Research Corporation, June 1987. Fixes for */
|
|
/* miscellaneous bugs found when reformatting state transititon code in */
|
|
/* CKCPRO.W. */
|
|
|
|
char *wartv = "Wart Version 1A(005) Jan 1988";
|
|
|
|
/* W A R T */
|
|
|
|
/*
|
|
pre-process a lex-like file into a C program.
|
|
|
|
Author:Jeff Damens, Columbia University Center for Computing Activites, 11/84.
|
|
Copyright (C) 1985, Trustees of Columbia University in the City of New York.
|
|
Permission is granted to any individual or institution to use, copy, or
|
|
redistribute this software so long as it is not sold for profit, provided this
|
|
copyright notice is retained.
|
|
|
|
* input format is:
|
|
* lines to be copied | %state <state names...>
|
|
* %%
|
|
* <state> | <state,state,...> CHAR { actions }
|
|
* ...
|
|
* %%
|
|
*/
|
|
|
|
#include "ckcdeb.h" /* Includes */
|
|
#include <stdio.h>
|
|
#include <ctype.h>
|
|
|
|
#define C_L 014 /* Formfeed */
|
|
|
|
#define SEP 1 /* Token types */
|
|
#define LBRACK 2
|
|
#define RBRACK 3
|
|
#define WORD 4
|
|
#define COMMA 5
|
|
|
|
/* Storage sizes */
|
|
|
|
#define MAXSTATES 50 /* max number of states */
|
|
#define MAXWORD 50 /* max # of chars/word */
|
|
#define SBYTES ((MAXSTATES+7)/8) /* # of bytes for state bitmask */
|
|
|
|
/* Name of wart function in generated program */
|
|
|
|
#ifndef FNAME
|
|
#define FNAME "wart"
|
|
#endif
|
|
|
|
/* Structure for state information */
|
|
|
|
struct trans { CHAR states[SBYTES]; /* included states */
|
|
int anyst; /* true if this good from any state */
|
|
CHAR inchr; /* input character */
|
|
int actno; /* associated action */
|
|
struct trans *nxt; }; /* next transition */
|
|
|
|
typedef struct trans *Trans;
|
|
|
|
char *malloc(); /* Returns pointer (not int) */
|
|
|
|
|
|
/* Variables and tables */
|
|
|
|
int lines,nstates,nacts;
|
|
|
|
char tokval[MAXWORD];
|
|
|
|
int tbl[MAXSTATES*128];
|
|
|
|
|
|
|
|
char *txt1 = "\n#define BEGIN state =\n\nint state = 0;\n\n";
|
|
|
|
char *fname = FNAME; /* function name goes here */
|
|
|
|
/* rest of program... */
|
|
|
|
char *txt2 = "()\n\
|
|
{\n\
|
|
int c,actno;\n\
|
|
extern int tbl[];\n\
|
|
while (1) {\n\
|
|
c = input();\n\
|
|
if ((actno = tbl[c + state*128]) != -1)\n\
|
|
switch(actno) {\n";
|
|
|
|
/* this program's output goes here, followed by final text... */
|
|
|
|
char *txt3 = "\n }\n }\n\}\n\n";
|
|
|
|
|
|
/*
|
|
* turn on the bit associated with the given state
|
|
*
|
|
*/
|
|
setstate(state,t)
|
|
int state;
|
|
Trans t;
|
|
{
|
|
int idx,msk;
|
|
idx = state/8; /* byte associated with state */
|
|
msk = 0x80 >> (state % 8); /* bit mask for state */
|
|
t->states[idx] |= msk;
|
|
}
|
|
|
|
/*
|
|
* see if the state is involved in the transition
|
|
*
|
|
*/
|
|
|
|
teststate(state,t)
|
|
int state;
|
|
Trans t;
|
|
{
|
|
int idx,msk;
|
|
idx = state/8;
|
|
msk = 0x80 >> (state % 8);
|
|
return(t->states[idx] & msk);
|
|
}
|
|
|
|
|
|
/*
|
|
* read input from here...
|
|
*
|
|
*/
|
|
|
|
Trans
|
|
rdinput(infp,outfp)
|
|
FILE *infp,*outfp;
|
|
{
|
|
Trans x,rdrules();
|
|
lines = 1; /* line counter */
|
|
nstates = 0; /* no states */
|
|
nacts = 0; /* no actions yet */
|
|
fprintf(outfp,"\n%c* WARNING -- This C source program generated by ",'/');
|
|
fprintf(outfp,"Wart preprocessor. */\n");
|
|
fprintf(outfp,"%c* Do not edit this file; edit the Wart-format ",'/');
|
|
fprintf(outfp,"source file instead, */\n");
|
|
fprintf(outfp,"%c* and then run it through Wart to produce a new ",'/');
|
|
fprintf(outfp,"C source file. */\n\n");
|
|
fprintf(outfp,"%c* Wart Version Info: */\n",'/');
|
|
fprintf(outfp,"char *wartv = \"%s\";\n\n",wartv);
|
|
|
|
initial(infp,outfp); /* read state names, initial defs */
|
|
prolog(outfp); /* write out our initial code */
|
|
x = rdrules(infp,outfp); /* read rules */
|
|
epilogue(outfp); /* write out epilogue code */
|
|
return(x);
|
|
}
|
|
|
|
|
|
/*
|
|
* initial - read initial definitions and state names. Returns
|
|
* on EOF or %%.
|
|
*
|
|
*/
|
|
|
|
initial(infp,outfp)
|
|
FILE *infp,*outfp;
|
|
{
|
|
int c;
|
|
char wordbuf[MAXWORD];
|
|
while ((c = getc(infp)) != EOF) {
|
|
if (c == '%') {
|
|
rdword(infp,wordbuf);
|
|
if (strcmp(wordbuf,"states") == 0)
|
|
rdstates(infp,outfp);
|
|
else if (strcmp(wordbuf,"%") == 0) return;
|
|
else fprintf(outfp,"%%%s",wordbuf);
|
|
}
|
|
else putc(c,outfp);
|
|
if (c == '\n') lines++;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* boolean function to tell if the given character can be part of
|
|
* a word.
|
|
*
|
|
*/
|
|
isin(s,c) char *s; int c; {
|
|
for (; *s != '\0'; s++)
|
|
if (*s == c) return(1);
|
|
return(0);
|
|
}
|
|
isword(c)
|
|
int c;
|
|
{
|
|
static char special[] = ".%_-$@"; /* these are allowable */
|
|
return(isalnum(c) || isin(special,c));
|
|
}
|
|
|
|
/*
|
|
* read the next word into the given buffer.
|
|
*
|
|
*/
|
|
rdword(fp,buf)
|
|
FILE *fp;
|
|
char *buf;
|
|
{
|
|
int len = 0,c;
|
|
while (isword(c = getc(fp)) && ++len < MAXWORD) *buf++ = c;
|
|
*buf++ = '\0'; /* tie off word */
|
|
ungetc(c,fp); /* put break char back */
|
|
}
|
|
|
|
|
|
/*
|
|
* read state names, up to a newline.
|
|
*
|
|
*/
|
|
|
|
rdstates(fp,ofp)
|
|
FILE *fp,*ofp;
|
|
{
|
|
int c;
|
|
char wordbuf[MAXWORD];
|
|
while ((c = getc(fp)) != EOF && c != '\n')
|
|
{
|
|
if (isspace(c) || c == C_L) continue; /* skip whitespace */
|
|
ungetc(c,fp); /* put char back */
|
|
rdword(fp,wordbuf); /* read the whole word */
|
|
enter(wordbuf,++nstates); /* put into symbol tbl */
|
|
fprintf(ofp,"#define %s %d\n",wordbuf,nstates);
|
|
}
|
|
lines++;
|
|
}
|
|
|
|
/*
|
|
* allocate a new, empty transition node
|
|
*
|
|
*/
|
|
|
|
Trans
|
|
newtrans()
|
|
{
|
|
Trans new;
|
|
int i;
|
|
new = (Trans) malloc(sizeof (struct trans));
|
|
for (i=0; i<SBYTES; i++) new->states[i] = 0;
|
|
new->anyst = 0;
|
|
new->nxt = NULL;
|
|
return(new);
|
|
}
|
|
|
|
|
|
/*
|
|
* read all the rules.
|
|
*
|
|
*/
|
|
|
|
Trans
|
|
rdrules(fp,out)
|
|
FILE *fp,*out;
|
|
{
|
|
Trans head,cur,prev;
|
|
int curtok,i;
|
|
head = cur = NULL;
|
|
while ((curtok = gettoken(fp)) != SEP)
|
|
|
|
switch(curtok) {
|
|
case LBRACK: if (cur == NULL) cur = newtrans();
|
|
else fatal("duplicate state list");
|
|
statelist(fp,cur);/* set states */
|
|
continue; /* prepare to read char */
|
|
|
|
case WORD: if (strlen(tokval) != 1)
|
|
fatal("multiple chars in state");
|
|
if (cur == NULL) {
|
|
cur = newtrans();
|
|
cur->anyst = 1;
|
|
}
|
|
cur->actno = ++nacts;
|
|
cur->inchr = tokval[0];
|
|
if (head == NULL) head = cur;
|
|
else prev->nxt = cur;
|
|
prev = cur;
|
|
cur = NULL;
|
|
copyact(fp,out,nacts);
|
|
break;
|
|
default: fatal("bad input format");
|
|
}
|
|
|
|
return(head);
|
|
}
|
|
|
|
|
|
/*
|
|
* read a list of (comma-separated) states, set them in the
|
|
* given transition.
|
|
*
|
|
*/
|
|
statelist(fp,t)
|
|
FILE *fp;
|
|
Trans t;
|
|
{
|
|
int curtok,sval;
|
|
curtok = COMMA;
|
|
while (curtok != RBRACK) {
|
|
if (curtok != COMMA) fatal("missing comma");
|
|
if ((curtok = gettoken(fp)) != WORD) fatal("missing state name");
|
|
if ((sval = lkup(tokval)) == -1) {
|
|
fprintf(stderr,"state %s undefined\n",tokval);
|
|
fatal("undefined state");
|
|
}
|
|
setstate(sval,t);
|
|
curtok = gettoken(fp);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* copy an action from the input to the output file
|
|
*
|
|
*/
|
|
copyact(inp,outp,actno)
|
|
FILE *inp,*outp;
|
|
int actno;
|
|
{
|
|
int c,bcnt;
|
|
fprintf(outp,"case %d:\n",actno);
|
|
while (c = getc(inp), (isspace(c) || c == C_L))
|
|
if (c == '\n') lines++;
|
|
if (c == '{') {
|
|
bcnt = 1;
|
|
fputs(" {",outp);
|
|
while (bcnt > 0 && (c = getc(inp)) != EOF) {
|
|
if (c == '{') bcnt++;
|
|
else if (c == '}') bcnt--;
|
|
else if (c == '\n') lines++;
|
|
putc(c,outp);
|
|
}
|
|
if (bcnt > 0) fatal("action doesn't end");
|
|
}
|
|
else {
|
|
while (c != '\n' && c != EOF) {
|
|
putc(c,outp);
|
|
c = getc(inp);
|
|
}
|
|
lines++;
|
|
}
|
|
fprintf(outp,"\n break;\n");
|
|
}
|
|
|
|
|
|
/*
|
|
* find the action associated with a given character and state.
|
|
* returns -1 if one can't be found.
|
|
*
|
|
*/
|
|
faction(hd,state,chr)
|
|
Trans hd;
|
|
int state,chr;
|
|
{
|
|
while (hd != NULL) {
|
|
if (hd->anyst || teststate(state,hd))
|
|
if (hd->inchr == '.' || hd->inchr == chr) return(hd->actno);
|
|
hd = hd->nxt;
|
|
}
|
|
return(-1);
|
|
}
|
|
|
|
|
|
/*
|
|
* empty the table...
|
|
*
|
|
*/
|
|
emptytbl()
|
|
{
|
|
int i;
|
|
for (i=0; i<nstates*128; i++) tbl[i] = -1;
|
|
}
|
|
|
|
/*
|
|
* add the specified action to the output for the given state and chr.
|
|
*
|
|
*/
|
|
|
|
addaction(act,state,chr)
|
|
int act,state,chr;
|
|
{
|
|
tbl[state*128 + chr] = act;
|
|
}
|
|
|
|
writetbl(fp)
|
|
FILE *fp;
|
|
{
|
|
warray(fp,"tbl",tbl,128*(nstates+1));
|
|
}
|
|
|
|
|
|
/*
|
|
* write an array to the output file, given its name and size.
|
|
*
|
|
*/
|
|
warray(fp,nam,cont,siz)
|
|
FILE *fp;
|
|
char *nam;
|
|
int cont[],siz;
|
|
{
|
|
int i;
|
|
fprintf(fp,"int %s[] = {\n",nam);
|
|
for (i = 0; i < siz; ) {
|
|
fprintf(fp,"%2d, ",cont[i]);
|
|
if ((++i % 16) == 0) putc('\n',fp);
|
|
}
|
|
fprintf(fp,"};\n");
|
|
}
|
|
|
|
main(argc,argv)
|
|
int argc;
|
|
char *argv[];
|
|
{
|
|
Trans head;
|
|
int state,c;
|
|
FILE *infile,*outfile;
|
|
|
|
if (argc > 1) {
|
|
if ((infile = fopen(argv[1],"r")) == NULL) {
|
|
fprintf(stderr,"Can't open %s\n",argv[1]);
|
|
fatal("unreadable input file"); } }
|
|
else infile = stdin;
|
|
|
|
if (argc > 2) {
|
|
if ((outfile = fopen(argv[2],"w")) == NULL) {
|
|
fprintf(stderr,"Can't write to %s\n",argv[2]);
|
|
fatal("bad output file"); } }
|
|
else outfile = stdout;
|
|
|
|
clrhash(); /* empty hash table */
|
|
head = rdinput(infile,outfile); /* read input file */
|
|
emptytbl(); /* empty our tables */
|
|
for (state = 0; state <= nstates; state++)
|
|
for (c = 1; c < 128; c++)
|
|
addaction(faction(head,state,c),state,c); /* find actions, add to tbl */
|
|
writetbl(outfile);
|
|
copyrest(infile,outfile);
|
|
printf("%d states, %d actions\n",nstates,nacts);
|
|
#ifdef undef
|
|
for (state = 1; state <= nstates; state ++)
|
|
for (c = 1; c < 128; c++)
|
|
if (tbl[state*128 + c] != -1) printf("state %d, chr %d, act %d\n",
|
|
state,c,tbl[state*128 + c]);
|
|
#endif
|
|
exit(GOOD_EXIT);
|
|
}
|
|
|
|
|
|
/*
|
|
* fatal error handler
|
|
*
|
|
*/
|
|
|
|
fatal(msg)
|
|
char *msg;
|
|
{
|
|
fprintf(stderr,"error in line %d: %s\n",lines,msg);
|
|
exit(BAD_EXIT);
|
|
}
|
|
|
|
prolog(outfp)
|
|
FILE *outfp;
|
|
{
|
|
int c;
|
|
while ((c = *txt1++) != '\0') putc(c,outfp);
|
|
while ((c = *fname++) != '\0') putc(c,outfp);
|
|
while ((c = *txt2++) != '\0') putc(c,outfp);
|
|
}
|
|
|
|
epilogue(outfp)
|
|
FILE *outfp;
|
|
{
|
|
int c;
|
|
while ((c = *txt3++) != '\0') putc(c,outfp);
|
|
}
|
|
|
|
copyrest(in,out)
|
|
FILE *in,*out;
|
|
{
|
|
int c;
|
|
while ((c = getc(in)) != EOF) putc(c,out);
|
|
}
|
|
|
|
|
|
/*
|
|
* gettoken - returns token type of next token, sets tokval
|
|
* to the string value of the token if appropriate.
|
|
*
|
|
*/
|
|
|
|
gettoken(fp)
|
|
FILE *fp;
|
|
{
|
|
int c;
|
|
while (1) { /* loop if reading comments... */
|
|
do {
|
|
c = getc(fp);
|
|
if (c == '\n') lines++;
|
|
} while ((isspace(c) || c == C_L)); /* skip whitespace */
|
|
switch(c) {
|
|
case EOF: return(SEP);
|
|
case '%': if ((c = getc(fp)) == '%') return(SEP);
|
|
tokval[0] = '%';
|
|
tokval[1] = c;
|
|
rdword(fp,tokval+2);
|
|
return(WORD);
|
|
case '<': return(LBRACK);
|
|
case '>': return(RBRACK);
|
|
case ',': return(COMMA);
|
|
case '/': if ((c = getc(fp)) == '*') {
|
|
rdcmnt(fp); /* skip over the comment */
|
|
continue; } /* and keep looping */
|
|
else {
|
|
ungetc(c,fp); /* put this back into input */
|
|
c = '/'; } /* put character back, fall thru */
|
|
|
|
default: if (isword(c)) {
|
|
ungetc(c,fp);
|
|
rdword(fp,tokval);
|
|
return(WORD);
|
|
}
|
|
else fatal("Invalid character in input");
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* skip over a comment
|
|
*
|
|
*/
|
|
|
|
rdcmnt(fp)
|
|
FILE *fp;
|
|
{
|
|
int c,star,prcnt;
|
|
prcnt = star = 0; /* no star seen yet */
|
|
while (!((c = getc(fp)) == '/' && star)) {
|
|
if (c == EOF || (prcnt && c == '%')) fatal("Unterminated comment");
|
|
prcnt = (c == '%');
|
|
star = (c == '*');
|
|
if (c == '\n') lines++; }
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
* symbol table management for wart
|
|
*
|
|
* entry points:
|
|
* clrhash - empty hash table.
|
|
* enter - enter a name into the symbol table
|
|
* lkup - find a name's value in the symbol table.
|
|
*
|
|
*/
|
|
|
|
#define HASHSIZE 101 /* # of entries in hash table */
|
|
|
|
struct sym { char *name; /* symbol name */
|
|
int val; /* value */
|
|
struct sym *hnxt; } /* next on collision chain */
|
|
*htab[HASHSIZE]; /* the hash table */
|
|
|
|
|
|
/*
|
|
* empty the hash table before using it...
|
|
*
|
|
*/
|
|
clrhash()
|
|
{
|
|
int i;
|
|
for (i=0; i<HASHSIZE; i++) htab[i] = NULL;
|
|
}
|
|
|
|
/*
|
|
* compute the value of the hash for a symbol
|
|
*
|
|
*/
|
|
hash(name)
|
|
char *name;
|
|
{
|
|
int sum;
|
|
for (sum = 0; *name != '\0'; name++) sum += (sum + *name);
|
|
sum %= HASHSIZE; /* take sum mod hashsize */
|
|
if (sum < 0) sum += HASHSIZE; /* disallow negative hash value */
|
|
return(sum);
|
|
}
|
|
|
|
/*
|
|
* make a private copy of a string...
|
|
*
|
|
*/
|
|
char *
|
|
copy(s)
|
|
char *s;
|
|
{
|
|
char *new;
|
|
new = (char *) malloc(strlen(s) + 1);
|
|
strcpy(new,s);
|
|
return(new);
|
|
}
|
|
|
|
|
|
/*
|
|
* enter state name into the hash table
|
|
*
|
|
*/
|
|
enter(name,svalue)
|
|
char *name;
|
|
int svalue;
|
|
{
|
|
int h;
|
|
struct sym *cur;
|
|
if (lkup(name) != -1) {
|
|
fprintf(stderr,"state %s appears twice...\n");
|
|
exit(BAD_EXIT); }
|
|
h = hash(name);
|
|
cur = (struct sym *)malloc(sizeof (struct sym));
|
|
cur->name = copy(name);
|
|
cur->val = svalue;
|
|
cur->hnxt = htab[h];
|
|
htab[h] = cur;
|
|
}
|
|
|
|
/*
|
|
* find name in the symbol table, return its value. Returns -1
|
|
* if not found.
|
|
*
|
|
*/
|
|
lkup(name)
|
|
char *name;
|
|
{
|
|
struct sym *cur;
|
|
for (cur = htab[hash(name)]; cur != NULL; cur = cur->hnxt)
|
|
if (strcmp(cur->name,name) == 0) return(cur->val);
|
|
return(-1);
|
|
}
|