add directory study

This commit is contained in:
gohigh
2024-02-19 00:25:23 -05:00
parent b1306b38b1
commit f3774e2f8c
4001 changed files with 2285787 additions and 0 deletions

View File

@@ -0,0 +1,28 @@
#
# Makefile for BAWK (MINIX)
#
# Makefile @(#)1.1
#
OPSYS = MINIX
CFLAGS = -D$(OPSYS)
OBJ = bawk.s bawkact.s bawksym.s bawkpat.s bawkdo.s
bawk: $(OBJ)
@echo Start linking BAWK
@cc -i -o bawk $(OBJ)
bawk.s: bawk.h bawk.c
bawkact.s: bawk.h bawkact.c
bawkpat.s: bawk.h bawkpat.c
bawksym.s: bawk.h bawksym.c
bawkdo.s: bawk.h bawkdo.c
clean:
@rm -f *.bak *.s bawk

View File

@@ -0,0 +1,224 @@
/*
* Bawk constants and variable declarations.
*/
#ifdef MAIN
# define EXTERN
#else
# define EXTERN extern
#endif
#include <minix/config.h>
#if (CHIP == M68000)
# define INT long
# define ALIGN(p) (((long)(p) & 1) ? ++(p) : (p) )
#else
# define INT int
# define ALIGN(p) (p)
#endif
/*#define DEBUG 1 remove this line to compile without debug statements */
#ifdef DEBUG
EXTERN char Debug; /* debug print flag */
#endif
/*
* Table and buffer sizes
*/
#define MAXLINELEN 128 /* longest input line */
#define MAXWORDS (MAXLINELEN/2) /* max # of words in a line */
#define MAXWORKBUFLEN 4096 /* longest action or regular expression */
#define MAXVARTABSZ 50 /* max # of symbols */
#define MAXVARLEN 10 /* symbol name length */
#define MAXSTACKSZ 40 /* max value stack length (for expressions) */
/**********************************************************
* Current Input File variables *
**********************************************************/
/*
* Current Input File pointer:
*/
EXTERN FILE *Fileptr;
EXTERN char *Filename; /* current input file name */
EXTERN char *Filechar; /* ptr to next input char if input is string */
EXTERN int Linecount; /* current input line number */
EXTERN int Recordcount; /* record count */
/*
* Working buffers.
*/
EXTERN char Linebuf[ MAXLINELEN ]; /* current input line buffer */
EXTERN char *Fields[ MAXWORDS ]; /* pointers to the words in Linebuf */
EXTERN int Fieldcount; /* and the # of words */
EXTERN char Workbuf[ MAXWORKBUFLEN ]; /* work area for C action and */
/* regular expression parsers */
/**********************************************************
* Regular Expression Parser variables *
**********************************************************/
/*
* Tokens:
*/
#define CHAR 1
#define BOL 2
#define EOL 3
#define ANY 4
#define CLASS 5
#define NCLASS 6
#define STAR 7
#define PLUS 8
#define MINUS 9
#define ALPHA 10
#define DIGIT 11
#define NALPHA 12
#define PUNCT 13
#define RANGE 14
#define ENDPAT 15
/**********************************************************
* C Actions Interpreter variables *
**********************************************************/
/*
* Tokens:
*/
#define T_STRING 'S' /* primaries: */
#define T_DOLLAR '$'
#define T_REGEXP 'r'
#define T_CONSTANT 'C'
#define T_VARIABLE 'V'
#define T_FUNCTION 'F'
#define T_SEMICOLON ';' /* punctuation */
#define T_EOF 'Z'
#define T_LBRACE '{'
#define T_RBRACE '}'
#define T_LPAREN '('
#define T_RPAREN ')'
#define T_LBRACKET '['
#define T_RBRACKET ']'
#define T_COMMA ','
#define T_ASSIGN '=' /* operators: */
#define T_MUL '*'
#define T_DIV '/'
#define T_MOD '%'
#define T_ADD '+'
#define T_SUB '-'
#define T_SHL 'L'
#define T_SHR 'R'
#define T_LT '<'
#define T_LE 'l'
#define T_GT '>'
#define T_GE 'g'
#define T_EQ 'q'
#define T_NE 'n'
#define T_NOT '~'
#define T_AND '&'
#define T_XOR '^'
#define T_IOR '|'
#define T_LNOT '!'
#define T_LAND 'a'
#define T_LIOR 'o'
#define T_INCR 'p'
#define T_DECR 'm'
#define T_IF 'i' /* keywords: */
#define T_ELSE 'e'
#define T_WHILE 'w'
#define T_BREAK 'b'
#define T_CHAR 'c'
#define T_INT 't'
#define T_BEGIN 'B'
#define T_END 'E'
#define T_NF 'f'
#define T_NR '#'
#define T_FS ' '
#define T_RS '\n'
#define T_FILENAME 'z'
#define PATTERN 'P' /* indicates C statement is within a pattern */
#define ACTION 'A' /* indicates C statement is within an action */
/*
* Symbol Table values
*/
#define ACTUAL 0
#define LVALUE 1
#define BYTE 1
#define WORD sizeof(INT) /* ugh ! */
/*
* Symbol table
*/
struct variable {
char vname[ MAXVARLEN ];
char vclass;
char vsize;
int vlen;
char *vptr;
};
#define VARIABLE struct variable
EXTERN VARIABLE Vartab[ MAXVARTABSZ ], *Nextvar;
/*
* Value stack
*/
union datum {
INT ival;
char *dptr;
char **ptrptr;
};
#define DATUM union datum
struct item {
char class;
char lvalue;
char size;
DATUM value;
};
#define ITEM struct item
EXTERN ITEM Stackbtm[ MAXSTACKSZ ], *Stackptr, *Stacktop;
/*
* Miscellaneous
*/
EXTERN char *Actptr; /* pointer into Workbuf during compilation */
EXTERN char Token; /* current input token */
EXTERN DATUM Value; /* and its value */
EXTERN char Saw_break; /* set when break stmt seen */
EXTERN char Where; /* indicates whether C stmt is a PATTERN or ACTION */
EXTERN char Fieldsep[20]; /* field seperator */
EXTERN char Recordsep[20]; /* record seperator */
EXTERN char *Beginact; /* BEGINning of input actions */
EXTERN char *Endact; /* END of input actions */
/**********************************************************
* Rules structure *
**********************************************************/
struct rule {
struct {
char *start; /* C statements that match pattern start */
char *stop; /* C statements that match pattern end */
char startseen; /* set if both a start and stop pattern */
/* given and if an input line matched the */
/* start pattern */
} pattern;
char *action; /* contains quasi-C statements of actions */
struct rule *nextrule; /* pointer to next rule */
};
#define RULE struct rule
EXTERN RULE *Rules, /* rule structures linked list head */
*Rulep; /* working pointer */
/**********************************************************
* Miscellaneous *
**********************************************************/
/*
* Error exit values (returned to command shell)
*/
#define USAGE_ERROR 1 /* error in invokation */
#define FILE_ERROR 2 /* file not found errors */
#define RE_ERROR 3 /* bad regular expression */
#define ACT_ERROR 4 /* bad C action stmt */
#define MEM_ERROR 5 /* out of memory errors */
/*
* Functions that return something special:
*/
extern char *str_compile(), *getmem(), *cclass(), *pmatch(), *fetchptr();
extern VARIABLE *findvar(), *addvar(), *decl();
extern char *newfile();
extern INT pop(), popint(), dopattern();

View File

@@ -0,0 +1,430 @@
/*
* Bawk C actions compiler
*/
#include <ctype.h>
#include <stdio.h>
#include "bawk.h"
act_compile( actbuf )
char *actbuf; /* where tokenized actions are compiled into */
{
Where = ACTION;
return stmt_compile( actbuf );
}
pat_compile( actbuf )
char *actbuf; /* where tokenized actions are compiled into */
{
Where = PATTERN;
return stmt_compile( actbuf );
}
stmt_compile( actbuf )
char *actbuf; /* where tokenized actions are compiled into */
{
/*
* Read and tokenize C actions from current input file into the
* action buffer. Strip out comments and whitespace in the
* process.
*/
char *actptr, /* actbuf pointer */
*cp, /* work pointer */
buf[MAXLINELEN];/* string buffer */
int braces, /* counts '{}' pairs - return when 0 */
parens, /* counts '()' pairs */
i, /* temp */
c; /* current input character */
braces = parens = 0;
actptr = actbuf;
while ( (c = getcharacter()) != -1 )
{
/*
* Skip over spaces, tabs and newlines
*/
if ( c==' ' || c=='\t' || c=='\n' )
continue;
if ( c=='#' )
{
/*
* Skip comments. Comments start with a '#' and
* end at the next newline.
*/
while ( (c = getcharacter()) != -1 && c!='\n' )
;
continue;
}
if ( c=='{' )
{
if ( Where==PATTERN )
{
/*
* We're compiling a pattern. The '{' marks
* the beginning of an action statement.
* Push the character back and return.
*/
ungetcharacter( '{' );
break;
}
else
{
/*
* We must be compiling an action statement.
* '{'s mark beginning of action or compound
* statements.
*/
++braces;
*actptr++ = T_LBRACE;
}
}
else if ( c=='}' )
{
*actptr++ = T_RBRACE;
if ( ! --braces )
/*
* Found the end of the action string
*/
break;
}
else if ( c=='(' )
{
++parens;
*actptr++ = T_LPAREN;
}
else if ( c==')' )
{
if ( --parens < 0 )
error( "mismatched '()'", ACT_ERROR );
*actptr++ = T_RPAREN;
}
else if ( c==',' && !braces && !parens && Where==PATTERN )
{
/*
* found a comma outside of any braces or parens-
* this must be a regular expression seperator.
*/
ungetcharacter( ',' );
break;
}
/*
* Check if it's a regular expression:
*/
else if ( c=='/' )
{
/*
* A '/' inside a pattern string starts a regular
* expression. Inside action strings, a '/' is
* the division operator.
*/
if ( Where == PATTERN )
goto dopattern;
else
*actptr++ = T_DIV;
}
else if ( c=='@' )
{
dopattern:
/*
* Within action strings, only the '@' may be used to
* delimit regular expressions
*/
*actptr++ = T_REGEXP;
ungetcharacter( c );
actptr += re_compile( actptr );
}
/*
* symbol, string or constant:
*/
else if ( alpha( c ) )
{
/*
* It's a symbol reference. Copy the symbol into
* string buffer.
*/
cp = buf;
do
*cp++ = c;
while ( (c=getcharacter()) != -1 && alphanum( c ) );
ungetcharacter( c );
*cp = 0;
/*
* Check if a keyword, builtin function or variable.
*/
if ( c = iskeyword( buf ) )
*actptr++ = c;
else if ( i = isfunction( buf ) )
{
*actptr++ = T_FUNCTION;
*(int *) ALIGN(actptr) = i;
actptr += sizeof( int )/sizeof( char );
}
else
{
/*
* It's a symbol name.
*/
*actptr++ = T_VARIABLE;
if ( !(cp = (char *) findvar( buf )) )
cp = (char *) addvar( buf );
*(char **) ALIGN(actptr) = cp;
actptr += sizeof( char * )/sizeof( char );
}
}
else if ( c == '"' )
{
/*
* It's a string constant
*/
*actptr++ = T_STRING;
actptr = str_compile( actptr, '"' );
}
else if ( c == '\'' )
{
/*
* It's a character constant
*/
*actptr++ = T_CONSTANT;
str_compile( buf, '\'' );
*(INT *) ALIGN(actptr) = *buf;
actptr += sizeof( INT )/sizeof( char );
}
else if ( isdigit( c ) )
{
/*
* It's a numeric constant
*/
*actptr++ = T_CONSTANT;
cp = buf;
do
*cp++ = c;
while ( (c=getcharacter()) != -1 && isdigit( c ) );
ungetcharacter( c );
*cp = 0;
*(INT *) ALIGN(actptr) = atoi( buf );
actptr += sizeof( INT )/sizeof( char );
}
/*
* unary operator:
*/
else if ( c == '$' )
*actptr++ = T_DOLLAR;
/*
* or binary operator:
*/
else if ( c == '=' )
{
if ( (c=getcharacter()) == '=' )
*actptr++ = T_EQ;
else
{
ungetcharacter( c );
*actptr++ = T_ASSIGN;
}
}
else if ( c == '!' )
{
if ( (c=getcharacter()) == '=' )
*actptr++ = T_NE;
else
{
ungetcharacter( c );
*actptr++ = T_LNOT;
}
}
else if ( c == '<' )
{
if ( (c=getcharacter()) == '<' )
*actptr++ = T_SHL;
else if ( c == '=' )
*actptr++ = T_LE;
else
{
ungetcharacter( c );
*actptr++ = T_LT;
}
}
else if ( c == '>' )
{
if ( (c=getcharacter()) == '>' )
*actptr++ = T_SHR;
else if ( c == '=' )
*actptr++ = T_GE;
else
{
ungetcharacter( c );
*actptr++ = T_GT;
}
}
else if ( c == '&' )
{
if ( (c=getcharacter()) == '&' )
*actptr++ = T_LAND;
else
{
ungetcharacter( c );
*actptr++ = T_AND;
}
}
else if ( c == '|' )
{
if ( (c=getcharacter()) == '|' )
*actptr++ = T_LIOR;
else
{
ungetcharacter( c );
*actptr++ = T_IOR;
}
}
else if ( c == '+' )
{
if ( (c=getcharacter()) == '+' )
*actptr++ = T_INCR;
else
{
ungetcharacter( c );
*actptr++ = T_ADD;
}
}
else if ( c == '-' )
{
if ( (c=getcharacter()) == '-' )
*actptr++ = T_DECR;
else
{
ungetcharacter( c );
*actptr++ = T_SUB;
}
}
/*
* punctuation
*/
else if ( instr( c, "[](),;*/%+-^~" ) )
*actptr++ = c;
else
{
/*
* Bad character in input line
*/
error( "lexical error", ACT_ERROR );
}
if ( actptr >= Workbuf + MAXWORKBUFLEN )
error( "action too long", MEM_ERROR );
}
if ( braces || parens )
error( "mismatched '{}' or '()'", ACT_ERROR );
*actptr++ = T_EOF;
return actptr - actbuf;
}
/* FIX FvK: was INT */
char *str_compile( str, delim )
char *str, delim;
{
/*
* Compile a string from current input file into the given string
* buffer. Stop when input character is the delimiter in "delim".
* Returns a pointer to the first character after the string.
*/
int c;
char buf[ MAXLINELEN ];
while ( (c = getcharacter()) != -1 && c != delim)
{
if ( c == '\\' )
{
switch ( c = getcharacter() )
{
case -1: goto err;
case 'b': c = '\b'; break;
case 'n': c = '\n'; break;
case 't': c = '\t'; break;
case 'f': c = '\f'; break;
case 'r': c = '\r'; break;
case '0':
case '1':
case '2':
case '3':
*buf = c;
for ( c=1; c<3; ++c )
{
if ( (buf[c]=getcharacter()) == -1 )
goto err;
}
buf[c] = 0;
sscanf( buf, "%o", &c );
break;
case '\n':
if ( getcharacter() == -1 )
goto err;
default:
if ( (c = getcharacter()) == -1 )
goto err;
}
}
*str++ = c;
}
*str++ = 0;
return str;
err:
sprintf( buf, "missing %c delimiter", delim );
error( buf, 4 );
}
getoken()
{
char *cp;
int i;
switch ( Token = *Actptr++ )
{
case T_STRING:
case T_REGEXP:
Value.dptr = Actptr;
Actptr += strlen( Actptr ) + 1;
break;
case T_VARIABLE:
Value.dptr = *(char **) ALIGN(Actptr);
Actptr += sizeof( char * )/sizeof( char );
break;
case T_FUNCTION:
Value.ival = *(int *) ALIGN(Actptr);
Actptr += sizeof( int )/sizeof( char );
break;
case T_CONSTANT:
Value.ival = *(INT *) ALIGN(Actptr);
Actptr += sizeof( INT )/sizeof( char );
break;
case T_EOF:
--Actptr;
default:
Value.dptr = 0;
}
#ifdef DEBUG
if ( Debug > 1 )
printf( "Token='%c' (0x%x), Value=%d\n",
Token,Token,Value.ival );
#endif
return Token;
}

View File

@@ -0,0 +1,369 @@
/*
* Bawk regular expression compiler/interpreter
*/
#include <ctype.h>
#include <stdio.h>
#include "bawk.h"
re_compile( patbuf )
char *patbuf; /* where to put compiled pattern */
{
/*
* Compile a regular expression from current input file
* into the given pattern buffer.
*/
int c, /* Current character */
o; /* Temp */
char *patptr, /* destination string pntr */
*lp, /* Last pattern pointer */
*spp, /* Save beginning of pattern */
delim, /* pattern delimiter */
*cclass(); /* Compile class routine */
lp = patptr = patbuf;
delim = getcharacter();
while ( (c = getcharacter()) != -1 && c != delim )
{
/*
* STAR, PLUS and MINUS are special.
*/
if (c == '*' || c == '+' || c == '-') {
if (patptr == patbuf ||
(o=patptr[-1]) == BOL ||
o == EOL ||
o == STAR ||
o == PLUS ||
o == MINUS)
error( "illegal occurrance op", RE_ERROR );
*patptr++ = ENDPAT;
*patptr++ = ENDPAT;
spp = patptr; /* Save pattern end */
while (--patptr > lp) /* Move pattern down... */
*patptr = patptr[-1]; /* one byte */
*patptr = (c == '*') ? STAR :
(c == '-') ? MINUS : PLUS;
patptr = spp; /* Restore pattern end */
continue;
}
/*
* All the rest.
*/
lp = patptr; /* Remember start */
switch(c) {
case '^':
*patptr++ = BOL;
break;
case '$':
*patptr++ = EOL;
break;
case '.':
*patptr++ = ANY;
break;
case '[':
patptr = cclass( patptr );
break;
case ':':
if ( (c=getcharacter()) != -1 )
{
switch( tolower( c ) )
{
case 'a':
*patptr++ = ALPHA;
break;
case 'd':
*patptr++ = DIGIT;
break;
case 'n':
*patptr++ = NALPHA;
break;
case ' ':
*patptr++ = PUNCT;
break;
default:
error( "unknown ':' type", RE_ERROR );
}
}
else
error( "no ':' type", RE_ERROR );
break;
case '\\':
c = getcharacter();
default:
*patptr++ = CHAR;
*patptr++ = c;
}
}
*patptr++ = ENDPAT;
*patptr++ = 0; /* Terminate string */
#ifdef DEBUG
if ( Debug>1 )
{
for ( lp=patbuf; lp<patptr; ++lp )
{
switch ( c = *lp )
{
case CHAR: printf("char "); break;
case BOL: printf("bol "); break;
case EOL: printf("eol "); break;
case ANY: printf("any "); break;
case CLASS: printf("class(%d) ", *++lp); break;
case NCLASS: printf("notclass(%d) ",*++lp); break;
case STAR: printf("star "); break;
case PLUS: printf("plus "); break;
case MINUS: printf("minus "); break;
case ALPHA: printf("alpha "); break;
case DIGIT: printf("digit "); break;
case NALPHA: printf("notalpha "); break;
case PUNCT: printf("punct "); break;
case RANGE: printf("range "); break;
case ENDPAT: printf("endpat "); break;
default: printf("<%c> ", c); break;
}
}
printf( "\n" );
}
#endif
return patptr - patbuf;
}
char *
cclass( patbuf )
char *patbuf; /* destination pattern buffer */
{
/*
* Compile a class (within [])
*/
char *patptr, /* destination pattern pointer */
*cp; /* Pattern start */
int c, /* Current character */
o; /* Temp */
patptr = patbuf;
if ( (c = getcharacter()) == -1 )
error( "class terminates badly", RE_ERROR );
else if ( c == '^')
{
/*
* Class exclusion, for example: [^abc]
* Swallow the "^" and set token type to class exclusion.
*/
o = NCLASS;
}
else
{
/*
* Normal class, for example: [abc]
* push back the character and set token type to class
*/
ungetcharacter( c );
o = CLASS;
}
*patptr++ = o;
cp = patptr; /* remember where byte count is */
*patptr++ = 0; /* and initialize byte count */
while ( (c = getcharacter()) != -1 && c!=']' )
{
o = getcharacter(); /* peek at next char */
if (c == '\\') /* Store quoted chars */
{
if ( o == -1) /* Gotta get something */
error( "class terminates badly", RE_ERROR );
*patptr++ = o;
}
else if ( c=='-' && (patptr-cp)>1 && o!=']' && o != -1 )
{
c = patptr[-1]; /* Range start */
patptr[-1] = RANGE; /* Range signal */
*patptr++ = c; /* Re-store start */
*patptr++ = o; /* Store end char */
}
else
{
*patptr++ = c; /* Store normal char */
ungetcharacter( o );
}
}
if (c != ']')
error( "unterminated class", RE_ERROR );
if ( (c = (patptr - cp)) >= 256 )
error( "class too large", RE_ERROR );
if ( c == 0 )
error( "empty class", RE_ERROR );
*cp = c; /* fill in byte count */
return patptr;
}
match( line, pattern )
char *line; /* line to match */
char *pattern; /* pattern to match */
{
/*
* Match the current line (in Linebuf[]), return 1 if it does.
*/
char *l; /* Line pointer */
char *pmatch();
char *next;
int matches;
matches = 0;
for (l = line; *l; l++)
{
if ( next = pmatch(line, l, pattern) )
{
l = next - 1;
++matches;
#ifdef DEBUG
if ( Debug )
printf( "match!\n" );
#endif
}
}
return matches;
}
char *
pmatch(linestart, line, pattern)
char *linestart; /* start of line to match */
char *line; /* (partial) line to match */
char *pattern; /* (partial) pattern to match */
{
char *l; /* Current line pointer */
char *p; /* Current pattern pointer */
char c; /* Current character */
char *e; /* End for STAR and PLUS match */
int op; /* Pattern operation */
int n; /* Class counter */
char *are; /* Start of STAR match */
l = line;
#ifdef DEBUG
if (Debug > 1)
printf("pmatch(\"%s\")\n", line);
#endif
p = pattern;
while ((op = *p++) != ENDPAT) {
#ifdef DEBUG
if (Debug > 1)
printf("byte[%d] = 0%o, '%c', op = 0%o\n",
l-line, *l, *l, op);
#endif
switch(op) {
case CHAR:
if ( *l++ != *p++)
return 0;
break;
case BOL:
if (l != linestart)
return 0;
break;
case EOL:
if (*l != '\0')
return 0;
break;
case ANY:
if (*l++ == '\0')
return 0;
break;
case DIGIT:
if ((c = *l++) < '0' || (c > '9'))
return 0;
break;
case ALPHA:
c = tolower( *l++ );
if (c < 'a' || c > 'z')
return 0;
break;
case NALPHA:
c = tolower(*l++);
if (c >= 'a' && c <= 'z')
break;
else if (c < '0' || c > '9')
return 0;
break;
case PUNCT:
c = *l++;
if (c == 0 || c > ' ')
return 0;
break;
case CLASS:
case NCLASS:
c = *l++;
n = *p++ & 0377;
do {
if (*p == RANGE) {
p += 3;
n -= 2;
if (c >= p[-2] && c <= p[-1])
break;
}
else if (c == *p++)
break;
} while (--n > 1);
if ((op == CLASS) == (n <= 1))
return 0;
if (op == CLASS)
p += n - 2;
break;
case MINUS:
e = pmatch(linestart,l,p);/* Look for a match */
while (*p++ != ENDPAT); /* Skip over pattern */
if (e) /* Got a match? */
l = e; /* Yes, update string */
break; /* Always succeeds */
case PLUS: /* One or more ... */
if ((l = pmatch(linestart,l,p)) == 0)
return 0; /* Gotta have a match */
case STAR: /* Zero or more ... */
are = l; /* Remember line start */
while (*l && (e = pmatch(linestart,l,p)))
l = e; /* Get longest match */
while (*p++ != ENDPAT); /* Skip over pattern */
while (l >= are) { /* Try to match rest */
if (e = pmatch(linestart,l,p))
return e;
--l; /* Nope, try earlier */
}
return 0; /* Nothing else worked */
default:
fprintf( stderr, "bad op code %d\n", op );
error( "can't happen -- match", RE_ERROR );
}
}
return l;
}

View File

@@ -0,0 +1,538 @@
/*
* Bawk C actions builtin functions, variable declaration, and
* stack management routines.
*/
#include <ctype.h>
#include <stdio.h>
#include "bawk.h"
#define MAXARGS 10 /* max # of arguments to a builtin func */
#define F_PRINT 1
#define F_PRINTF 2
#define F_GETLINE 3
#define F_STRLEN 4
#define F_STRCPY 5
#define F_STRCMP 6
#define F_TOUPPER 7
#define F_TOLOWER 8
#define F_MATCH 9
#define F_NEXTFILE 10
isfunction( s )
char *s;
{
/*
* Compare the string "s" to a list of builtin functions
* and return its (non-zero) token number.
* Return zero if "s" is not a function.
*/
if ( !strcmp(s, "print") )
return F_PRINT;
if ( !strcmp( s, "printf" ) )
return F_PRINTF;
if ( !strcmp( s, "getline" ) )
return F_GETLINE;
if ( !strcmp( s, "strlen" ) )
return F_STRLEN;
if ( !strcmp( s, "strcpy" ) )
return F_STRCPY;
if ( !strcmp( s, "strcmp" ) )
return F_STRCMP;
if ( !strcmp( s, "toupper" ) )
return F_TOUPPER;
if ( !strcmp( s, "tolower" ) )
return F_TOLOWER;
if ( !strcmp( s, "match" ) )
return F_MATCH;
if ( !strcmp( s, "nextfile" ) )
return F_NEXTFILE;
return 0;
}
iskeyword( s )
char *s;
{
/*
* Compare the string "s" to a list of keywords and return its
* (non-zero) token number. Return zero if "s" is not a keyword.
*/
if ( !strcmp( s, "char" ) )
return T_CHAR;
if ( !strcmp( s, "int" ) )
return T_INT;
if ( !strcmp( s, "if" ) )
return T_IF;
if ( !strcmp( s, "else" ) )
return T_ELSE;
if ( !strcmp( s, "while" ) )
return T_WHILE;
if ( !strcmp( s, "break" ) )
return T_BREAK;
if ( !strcmp( s, "NF" ) )
return T_NF;
if ( !strcmp( s, "NR" ) )
return T_NR;
if ( !strcmp( s, "FS" ) )
return T_FS;
if ( !strcmp( s, "RS" ) )
return T_RS;
if ( !strcmp( s, "FILENAME" ) )
return T_FILENAME;
if ( !strcmp( s, "BEGIN" ) )
return T_BEGIN;
if ( !strcmp( s, "END" ) )
return T_END;
return 0;
}
function( funcnum )
{
int argc, j;
char lpar;
DATUM args[ MAXARGS ];
argc = 0;
if ( Token==T_LPAREN )
{
lpar = 1;
getoken();
}
else
lpar = 0;
/*
* If there are any arguments, evaluate them and copy their values
* to a local array. Clear the array first.
*/
for (j = 0; j < MAXARGS; j++) args[j].ival = 0;
if ( Token!=T_RPAREN && Token!=T_EOF )
{
for ( ;; )
{
expression();
if ( argc<MAXARGS )
args[ argc++ ].ival = popint();
else
popint();
if ( Token==T_COMMA )
getoken();
else
break;
}
}
if ( lpar )
{
if ( Token!=T_RPAREN )
error( "missing ')'", ACT_ERROR );
else
getoken();
}
switch ( funcnum )
{
case F_PRINT: /* quick and simple string printing */
pushint( (INT)pprint( args ) );
break;
case F_PRINTF: /* just like the real printf() function */
pushint( (INT)pprntf( args[0].dptr, &args[1] ) );
break;
case F_GETLINE:
/*
* Get the next line of input from the current input file
* and parse according to the current field seperator.
* Don't forget to free up the previous line's words first...
*/
while ( Fieldcount )
free( Fields[ --Fieldcount ] );
pushint( (INT)getline() );
Fieldcount = parse( Linebuf, Fields, Fieldsep );
break;
case F_STRLEN: /* calculate length of string argument */
pushint( (INT)strlen( args[0].dptr ) );
break;
case F_STRCPY: /* copy second string argument to first string */
pushint( (INT)strcpy( args[0].dptr, args[1].dptr ) );
break;
case F_STRCMP: /* compare two strings */
pushint( (INT)strcmp( args[0].dptr, args[1].dptr ) );
break;
case F_TOUPPER: /* convert the character argument to upper case */
pushint( (INT)toupper( args[0].ival ) );
break;
case F_TOLOWER: /* convert the character argument to lower case */
pushint( (INT)tolower( args[0].ival ) );
break;
case F_MATCH: /* match a string argument to a regular expression */
pushint( (INT)match( args[0].dptr, args[1].dptr ) );
break;
case F_NEXTFILE:/* close current input file and process next file */
pushint( (INT)endfile() );
break;
default: /* oops! */
error( "bad function call", ACT_ERROR );
}
}
VARIABLE *
findvar( s )
char *s;
{
/*
* Search the symbol table for a variable whose name is "s".
*/
VARIABLE *pvar;
int i;
char name[ MAXVARLEN ];
i = 0;
while ( i < MAXVARLEN && alphanum( *s ) )
name[i++] = *s++;
if ( i<MAXVARLEN )
name[i] = 0;
for ( pvar = Vartab; pvar<Nextvar; ++pvar )
{
if ( !strncmp( pvar->vname, name, MAXVARLEN ) )
return pvar;
}
return NULL;
}
VARIABLE *
addvar( name )
char *name;
{
/*
* Add a new variable to symbol table and assign it default
* attributes (int name;)
*/
int i;
if ( Nextvar <= Vartab + MAXVARTABSZ )
{
i = 0;
while ( i<MAXVARLEN && alphanum( *name ) )
Nextvar->vname[i++] = *name++;
if ( i<MAXVARLEN )
Nextvar->vname[i] = 0;
Nextvar->vclass = 0;
Nextvar->vsize = WORD;
Nextvar->vlen = 0;
/*
* Allocate some new room
*/
Nextvar->vptr = getmem((unsigned int) WORD );
fillmem( Nextvar->vptr, WORD, 0 );
}
else
error( "symbol table overflow", MEM_ERROR );
return Nextvar++;
}
declist()
{
/*
* Parse a "char" or "int" statement.
*/
char type;
type = Token;
getoken();
decl( type );
while ( Token==T_COMMA )
{
getoken();
decl( type );
}
if ( Token==T_SEMICOLON )
getoken();
}
VARIABLE *
decl( type )
{
/*
* Parse an element of a "char" or "int" declaration list.
* The function stmt_compile() has already entered the variable
* into the symbol table as an integer, this routine simply changes
* the symbol's class, size or length according to the declaraction.
* WARNING: The interpreter depends on the fact that pointers are
* the same length as int's. If your machine uses long's for
* pointers either change the code or #define int long (or whatever).
*/
char class, size;
int len;
unsigned oldsize, newsize;
VARIABLE *pvar;
if ( Token==T_MUL )
{
/*
* it's a pointer
*/
getoken();
pvar = decl( type );
++pvar->vclass;
}
else if ( Token==T_VARIABLE )
{
/*
* Simple variable so far. The token value (in the global
* "Value" variable) is a pointer to the variable's symbol
* table entry.
*/
pvar = (VARIABLE *)Value.dptr;
getoken();
class = 0;
/*
* Compute its length
*/
if ( Token==T_LBRACKET )
{
/*
* It's an array.
*/
getoken();
++class;
/*
* Compute the dimension
*/
expression();
if ( Token!=T_RBRACKET )
error( "missing ']'", ACT_ERROR );
getoken();
len = popint();
}
else
/*
* It's a simple variable - array length is zero.
*/
len = 0;
size = (type==T_CHAR) ? BYTE : WORD;
newsize = (len ? len : 1) * size;
oldsize = (pvar->vlen ? pvar->vlen : 1) * pvar->vsize;
if ( newsize != oldsize )
{
/*
* The amount of storage needed for the variable
* has changed - free up memory allocated initially
* and reallocate for new size.
*/
free( pvar->vptr );
pvar->vptr = getmem( newsize );
}
/*
* Now change the variable's attributes.
*/
pvar->vclass = class;
pvar->vsize = size;
pvar->vlen = len;
}
else
syntaxerror();
return pvar;
}
assignment()
{
/*
* Perform an assignment
*/
INT ival;
ival = popint();
/*
* make sure we've got an lvalue
*/
if ( Stackptr->lvalue )
{
if ( Stackptr->class )
movemem((char *) &ival, Stackptr->value.dptr, WORD );
else
movemem((char *) &ival, Stackptr->value.dptr, Stackptr->size);
pop();
pushint( ival );
}
else
error( "'=' needs an lvalue", ACT_ERROR );
}
INT pop()
{
/*
* Pop the stack and return the integer value
*/
if ( Stackptr >= Stackbtm )
return (Stackptr--)->value.ival;
return error( "stack underflow", ACT_ERROR );
}
push( pclass, plvalue, psize, pdatum )
char pclass, plvalue, psize;
DATUM *pdatum;
{
/*
* Push item parts onto the stack
*/
if ( ++Stackptr <= Stacktop )
{
Stackptr->lvalue = plvalue;
Stackptr->size = psize;
if ( !(Stackptr->class = pclass) && !plvalue )
Stackptr->value.ival = pdatum->ival;
else
Stackptr->value.dptr = pdatum->dptr;
}
else
error( "stack overflow", MEM_ERROR );
}
pushint( intvalue )
INT intvalue;
{
/*
* push an integer onto the stack
*/
if ( ++Stackptr <= Stacktop )
{
Stackptr->lvalue =
Stackptr->class = 0;
Stackptr->size = WORD;
Stackptr->value.ival = intvalue;
}
else
error( "stack overflow", MEM_ERROR );
}
INT popint()
{
/*
* Resolve the item on the top of the stack and return it
*/
INT intvalue;
if ( Stackptr->lvalue )
{
/*
* if it's a byte indirect, sign extend it
*/
if ( Stackptr->size == BYTE && !Stackptr->class )
intvalue = *Stackptr->value.dptr;
else
{
/*
* otherwise, it's an unsigned int
*/
intvalue = *(int *)Stackptr->value.dptr;
}
pop();
return intvalue;
}
else
{
/*
* else it's an ACTUAL, just pop it
*/
return pop();
}
}
pprint( args )
DATUM args[];
{
/*
* execute the "print string, ..." command
*/
int i;
for ( i=0; i<MAXARGS; i++ )
if ( args[i].dptr != (char *)NULL )
printf(" %s", args[i].dptr);
printf("\n"); /* AWK's default behaviour */
return 0;
}
pprntf( fmt, args )
char *fmt;
DATUM args[];
{
/*
* execute the "printf fmt, data ..." command
*/
char lfmt[40], *t;
register char *s;
int i, type, lflg;
#define LINT 0
#define SINT 1
#define PNTR 2
s = fmt;
i = 0;
while ( *s != '\0' && i < MAXARGS-1 )
{
if ( *s != '%' )
{
putc( *s++, stdout );
continue;
}
t = lfmt;
*t++ = *s++; /* % */
if ( *s == '-' || *s == '+' )
*t++ = *s++; /* sign */
while ( *s >= '0' && *s <= '9' || *s == '*' )
*t++ = *s++; /* width */
if ( *s == '.' )
*t++ = *s++; /* . */
while ( *s >= '0' && *s <= '9' || *s == '*' )
*t++ = *s++; /* digits */
lflg = (*s == 'l' || *s == 'L');
if ( lflg )
*t++ = *s++ ;
if ( *s == 'd' || *s == 'D' || *s == 'u' || *s == 'U' ||
*s == 'x' || *s == 'X' || *s == 'o' || *s == 'O' )
{
type = lflg ? LINT : SINT;
*t++ = *s++;
}
else if ( *s == 'p' || *s == 'P' || *s == 's' || *s == 'S' )
{
type = PNTR;
*t++ = *s++;
}
else /* yuk. Better to forbid %e, %f and %g here ? */
{
type = SINT;
if (*s) *t++ = *s++;
}
*t++ = '\0';
switch ( type )
{
case LINT:
printf( lfmt, (long) args[i++].ival );
break;
case SINT:
printf( lfmt, (int) args[i++].ival );
break;
case PNTR:
printf( lfmt, args[i++].dptr );
break;
default:
/* impossible */
break;
}
}
return 0;
}