Path: icaen!news.uiowa.edu!uunet!dziuxsolim.rutgers.edu!pilot.njin.net!not-for-mail From: comp-sources-apple2@pilot.njin.net Newsgroups: comp.sources.apple2 Subject: v001SRC095: AWK -- 16-bit Port of AT&T AWK (GNO) 02/06 Date: 1 Jan 1995 17:39:14 -0500 Organization: Rutgers University Lines: 1668 Sender: jac@pilot.njin.net Approved: jac@pilot.njin.net Distribution: world Message-ID: <3e7aui$d60@pilot.njin.net> NNTP-Posting-Host: pilot.njin.net Submitted-By: Jawaid Bazyar (bazyar@netcom.com) Posting-number: Volume 1, Source 95 Archive-Name: gno/util/awk.02 Architecture: 2gs,UNIX Version-Number: 1.00 =b.c -/**************************************************************** -Copyright (C) AT&T 1993 -All Rights Reserved - -Permission to use, copy, modify, and distribute this software and -its documentation for any purpose and without fee is hereby -granted, provided that the above copyright notice appear in all -copies and that both that the copyright notice and this -permission notice and warranty disclaimer appear in supporting -documentation, and that the name of AT&T or any of its entities -not be used in advertising or publicity pertaining to -distribution of the software without specific, written prior -permission. - -AT&T DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, -INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. -IN NO EVENT SHALL AT&T OR ANY OF ITS ENTITIES BE LIABLE FOR ANY -SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER -IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -THIS SOFTWARE. -****************************************************************/ - -/* lasciate ogni speranza, voi ch'entrate. */ - -#ifdef __ORCAC__ -segment "b"; -#endif - -#define DEBUG - -#include -#include -#include -#include -#include "awk.h" -#include "y.tab.h" - -#define HAT (NCHARS-1) /* matches ^ in regular expr */ - /* NCHARS is 2**n */ -#define MAXLIN 512 - -#define type(v) (v)->nobj -#define left(v) (v)->narg[0] -#define right(v) (v)->narg[1] -#define parent(v) (v)->nnext - -#define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL: -#define UNARY case STAR: case PLUS: case QUEST: - -/* encoding in tree Nodes: - leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL): - left is index, right contains value or pointer to value - unary (STAR, PLUS, QUEST): left is child, right is null - binary (CAT, OR): left and right are children - parent contains pointer to parent -*/ - - -uchar chars[MAXLIN]; -int setvec[MAXLIN]; -int tmpset[MAXLIN]; -Node *point[MAXLIN]; - -int rtok; /* next token in current re */ -int rlxval; -uchar *rlxstr; -uchar *prestr; /* current position in current re */ -uchar *lastre; /* origin of last re */ - -static int setcnt; -static int poscnt; - -uchar *patbeg; -int patlen; - -#define NFA 20 /* cache this many dynamic fa's */ -fa *fatab[NFA]; -int nfatab = 0; /* entries in fatab */ - -fa *makedfa(uchar *s, int anchor) /* returns dfa for reg expr s */ -{ - int i, use, nuse; - fa *pfa; - - if (compile_time) /* a constant for sure */ - return mkdfa(s, anchor); - for (i = 0; i < nfatab; i++) /* is it there already? */ - if (fatab[i]->anchor == anchor && strcmp(fatab[i]->restr,s) == 0) { - fatab[i]->use++; - return fatab[i]; - } - pfa = mkdfa(s, anchor); - if (nfatab < NFA) { /* room for another */ - fatab[nfatab] = pfa; - fatab[nfatab]->use = 1; - nfatab++; - return pfa; - } - use = fatab[0]->use; /* replace least-recently used */ - nuse = 0; - for (i = 1; i < nfatab; i++) - if (fatab[i]->use < use) { - use = fatab[i]->use; - nuse = i; - } - freefa(fatab[nuse]); - fatab[nuse] = pfa; - pfa->use = 1; - return pfa; -} - -fa *mkdfa(uchar *s, int anchor) /* does the real work of making a dfa */ - /* anchor = 1 for anchored matches, else 0 */ -{ - Node *p, *p1; - fa *f; - - p = reparse(s); - p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p); - /* put ALL STAR in front of reg. exp. */ - p1 = op2(CAT, p1, op2(FINAL, NIL, NIL)); - /* put FINAL after reg. exp. */ - - poscnt = 0; - penter(p1); /* enter parent pointers and leaf indices */ - if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL) - overflo("out of space for fa"); - f->accept = poscnt-1; /* penter has computed number of positions in re */ - cfoll(f, p1); /* set up follow sets */ - freetr(p1); - if ((f->posns[0] = (int *) calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL) - overflo("out of space in makedfa"); - if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL) - overflo("out of space in makedfa"); - *f->posns[1] = 0; - f->initstat = makeinit(f, anchor); - f->anchor = anchor; - f->restr = tostring(s); - return f; -} - -int makeinit(fa *f, int anchor) -{ - register int i, k; - - f->curstat = 2; - f->out[2] = 0; - f->reset = 0; - k = *(f->re[0].lfollow); - xfree(f->posns[2]); - if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL) - overflo("out of space in makeinit"); - for (i=0; i<=k; i++) { - (f->posns[2])[i] = (f->re[0].lfollow)[i]; - } - if ((f->posns[2])[1] == f->accept) - f->out[2] = 1; - for (i=0; igototab[2][i] = 0; - f->curstat = cgoto(f, 2, HAT); - if (anchor) { - *f->posns[2] = k-1; /* leave out position 0 */ - for (i=0; iposns[0])[i] = (f->posns[2])[i]; - } - - f->out[0] = f->out[2]; - if (f->curstat != 2) - --(*f->posns[f->curstat]); - } - return f->curstat; -} - -void penter(Node *p) /* set up parent pointers and leaf indices */ -{ - switch (type(p)) { - LEAF - left(p) = (Node *) poscnt; - if (poscnt >= MAXLIN) - overflo("leaf index overflow in penter"); - point[poscnt++] = p; - break; - UNARY - penter(left(p)); - parent(left(p)) = p; - break; - case CAT: - case OR: - penter(left(p)); - penter(right(p)); - parent(left(p)) = p; - parent(right(p)) = p; - break; - default: /* can't happen */ - ERROR "unknown type %d in penter", type(p) FATAL; - break; - } -} - -void freetr(Node *p) /* free parse tree */ -{ - switch (type(p)) { - LEAF - xfree(p); - break; - UNARY - freetr(left(p)); - xfree(p); - break; - case CAT: - case OR: - freetr(left(p)); - freetr(right(p)); - xfree(p); - break; - default: /* can't happen */ - ERROR "unknown type %d in freetr", type(p) FATAL; - break; - } -} - -/* in the parsing of regular expressions, metacharacters like . have */ -/* to be seen literally; \056 is not a metacharacter. */ - -int hexstr(char **pp) /* find and eval hex string at pp, return new p */ -{ - char *p; - int n = 0; - - for (p = *pp; isxdigit(*p); p++) { - if (isdigit(*p)) - n = 16 * n + *p - '0'; - else if (*p >= 'a' && *p <= 'f') - n = 16 * n + *p - 'a' + 10; - else if (*p >= 'A' && *p <= 'F') - n = 16 * n + *p - 'A' + 10; - } - *pp = p; - return n; -} - -#define isoctdigit(c) ((c) >= '0' && (c) <= '8') /* multiple use of arg */ - -int quoted(char **pp) /* pick up next thing after a \\ */ - /* and increment *pp */ -{ - char *p = *pp; - int c; - - if ((c = *p++) == 't') - c = '\t'; - else if (c == 'n') - c = '\n'; - else if (c == 'f') - c = '\f'; - else if (c == 'r') - c = '\r'; - else if (c == 'b') - c = '\b'; - else if (c == '\\') - c = '\\'; - else if (c == 'x') { /* hexadecimal goo follows */ - c = hexstr(&p); - } else if (isoctdigit(c)) { /* \d \dd \ddd */ - int n = c - '0'; - if (isoctdigit(*p)) { - n = 8 * n + *p++ - '0'; - if (isoctdigit(*p)) - n = 8 * n + *p++ - '0'; - } - c = n; - } /* else */ - /* c = c; */ - *pp = p; - return c; -} - -uchar *cclenter(uchar *p) /* add a character class */ -{ - register int i, c, c2; - uchar *op; - - op = p; - i = 0; - while ((c = *p++) != 0) { - if (c == '\\') { - c = quoted(&p); - } else if (c == '-' && i > 0 && chars[i-1] != 0) { - if (*p != 0) { - c = chars[i-1]; - c2 = *p++; - if (c2 == '\\') - c2 = quoted(&p); - while (c < c2) { - if (i >= MAXLIN-1) - overflo("character class too big"); - chars[i++] = ++c; - } - continue; - } - } - if (i >= MAXLIN-1) - overflo("character class too big"); - chars[i++] = c; - } - chars[i++] = '\0'; - dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, chars) ); - xfree(op); - return(tostring(chars)); -} - -void overflo(uchar *s) -{ - ERROR "regular expression too big: %.30s...", s FATAL; -} - -void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfollow[leaf] */ -{ - register int i; - register int *p; - - switch (type(v)) { - LEAF - f->re[(int) left(v)].ltype = type(v); - f->re[(int) left(v)].lval = (long) right(v); /* assumes ptr & long fit */ - for (i=0; i<=f->accept; i++) - setvec[i] = 0; - setcnt = 0; - follow(v); /* computes setvec and setcnt */ - if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL) - overflo("out of space building follow set"); - f->re[(int) left(v)].lfollow = p; - *p = setcnt; - for (i = f->accept; i >= 0; i--) - if (setvec[i] == 1) *++p = i; - break; - UNARY - cfoll(f,left(v)); - break; - case CAT: - case OR: - cfoll(f,left(v)); - cfoll(f,right(v)); - break; - default: /* can't happen */ - ERROR "unknown type %d in cfoll", type(v) FATAL; - } -} - -int first(Node *p) /* collects initially active leaves of p into setvec */ - /* returns 0 or 1 depending on whether p matches empty string */ -{ - register int b; - - switch (type(p)) { - LEAF - if (setvec[(int) left(p)] != 1) { - setvec[(int) left(p)] = 1; - setcnt++; - } - if (type(p) == CCL && (*(uchar *) right(p)) == '\0') - return(0); /* empty CCL */ - else return(1); - case PLUS: - if (first(left(p)) == 0) return(0); - return(1); - case STAR: - case QUEST: - first(left(p)); - return(0); - case CAT: - if (first(left(p)) == 0 && first(right(p)) == 0) return(0); - return(1); - case OR: - b = first(right(p)); - if (first(left(p)) == 0 || b == 0) return(0); - return(1); - } - ERROR "unknown type %d in first", type(p) FATAL; /* can't happen */ - return(-1); -} - -void follow(Node *v) /* collects leaves that can follow v into setvec */ -{ - Node *p; - - if (type(v) == FINAL) - return; - p = parent(v); - switch (type(p)) { - case STAR: - case PLUS: - first(v); - follow(p); - return; - - case OR: - case QUEST: - follow(p); - return; - - case CAT: - if (v == left(p)) { /* v is left child of p */ - if (first(right(p)) == 0) { - follow(p); - return; - } - } else /* v is right child */ - follow(p); - return; - } -} - -int member(int c, uchar *s) /* is c in s? */ -{ - while (*s) - if (c == *s++) - return(1); - return(0); -} - -int match(fa *f, uchar *p) /* shortest match ? */ -{ - register int s, ns; - - s = f->reset ? makeinit(f,0) : f->initstat; - if (f->out[s]) - return(1); - do { - if (ns=f->gototab[s][*p]) - s = ns; - else - s = cgoto(f,s,*p); - if (f->out[s]) - return(1); - } while (*p++ != 0); - return(0); -} - -int pmatch(fa *f, uchar *p) /* longest match, for sub */ -{ - register int s, ns; - register uchar *q; - int i, k; - - s = f->reset ? makeinit(f,1) : f->initstat; - patbeg = p; - patlen = -1; - do { - q = p; - do { - if (f->out[s]) /* final state */ - patlen = q-p; - if (ns=f->gototab[s][*q]) - s = ns; - else - s = cgoto(f,s,*q); - if (s == 1) /* no transition */ - if (patlen >= 0) { - patbeg = p; - return(1); - } - else - goto nextin; /* no match */ - } while (*q++ != 0); - if (f->out[s]) - patlen = q-p-1; /* don't count $ */ - if (patlen >= 0) { - patbeg = p; - return(1); - } - nextin: - s = 2; - if (f->reset) { - for (i = 2; i <= f->curstat; i++) - xfree(f->posns[i]); - k = *f->posns[0]; - if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL) - overflo("out of space in pmatch"); - for (i = 0; i <= k; i++) - (f->posns[2])[i] = (f->posns[0])[i]; - f->initstat = f->curstat = 2; - f->out[2] = f->out[0]; - for (i = 0; i < NCHARS; i++) - f->gototab[2][i] = 0; - } - } while (*p++ != 0); - return (0); -} - -int nematch(fa *f, uchar *p) /* non-empty match, for sub */ -{ - register int s, ns; - register uchar *q; - int i, k; - - s = f->reset ? makeinit(f,1) : f->initstat; - patlen = -1; - while (*p) { - q = p; - do { - if (f->out[s]) /* final state */ - patlen = q-p; - if (ns = f->gototab[s][*q]) - s = ns; - else - s = cgoto(f,s,*q); - if (s == 1) /* no transition */ - if (patlen > 0) { - patbeg = p; - return(1); - } else - goto nnextin; /* no nonempty match */ - } while (*q++ != 0); - if (f->out[s]) - patlen = q-p-1; /* don't count $ */ - if (patlen > 0 ) { - patbeg = p; - return(1); - } - nnextin: - s = 2; - if (f->reset) { - for (i = 2; i <= f->curstat; i++) - xfree(f->posns[i]); - k = *f->posns[0]; - if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL) - overflo("out of state space"); - for (i = 0; i <= k; i++) - (f->posns[2])[i] = (f->posns[0])[i]; - f->initstat = f->curstat = 2; - f->out[2] = f->out[0]; - for (i = 0; i < NCHARS; i++) - f->gototab[2][i] = 0; - } - p++; - } - return (0); -} - -Node *reparse(uchar *p) /* parses regular expression pointed to by p */ -{ /* uses relex() to scan regular expression */ - Node *np; - - dprintf( ("reparse <%s>\n", p) ); - lastre = prestr = p; /* prestr points to string to be parsed */ - rtok = relex(); - if (rtok == '\0') - ERROR "empty regular expression" FATAL; - np = regexp(); - if (rtok != '\0') - ERROR "syntax error in regular expression %s at %s", lastre, prestr FATAL; - return(np); -} - -Node *regexp(void) /* top-level parse of reg expr */ -{ - return (alt(concat(primary()))); -} - -Node *primary(void) -{ - Node *np; - - switch (rtok) { - case CHAR: - np = op2(CHAR, NIL, (Node *) rlxval); - rtok = relex(); - return (unary(np)); - case ALL: - rtok = relex(); - return (unary(op2(ALL, NIL, NIL))); - case DOT: - rtok = relex(); - return (unary(op2(DOT, NIL, NIL))); - case CCL: - np = op2(CCL, NIL, (Node*) cclenter(rlxstr)); - rtok = relex(); - return (unary(np)); - case NCCL: - np = op2(NCCL, NIL, (Node *) cclenter(rlxstr)); - rtok = relex(); - return (unary(np)); - case '^': - rtok = relex(); - return (unary(op2(CHAR, NIL, (Node *) HAT))); - case '$': - rtok = relex(); - return (unary(op2(CHAR, NIL, NIL))); - case '(': - rtok = relex(); - if (rtok == ')') { /* special pleading for () */ - rtok = relex(); - return unary(op2(CCL, NIL, (Node *) tostring(""))); - } - np = regexp(); - if (rtok == ')') { - rtok = relex(); - return (unary(np)); - } - else - ERROR "syntax error in regular expression %s at %s", lastre, prestr FATAL; - default: - ERROR "illegal primary in regular expression %s at %s", lastre, prestr FATAL; - } - return 0; /*NOTREACHED*/ -} - -Node *concat(Node *np) -{ - switch (rtok) { - case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(': - return (concat(op2(CAT, np, primary()))); - } - return (np); -} - -Node *alt(Node *np) -{ - if (rtok == OR) { - rtok = relex(); - return (alt(op2(OR, np, concat(primary())))); - } - return (np); -} - -Node *unary(Node *np) -{ - switch (rtok) { - case STAR: - rtok = relex(); - return (unary(op2(STAR, np, NIL))); - case PLUS: - rtok = relex(); - return (unary(op2(PLUS, np, NIL))); - case QUEST: - rtok = relex(); - return (unary(op2(QUEST, np, NIL))); - default: - return (np); - } -} - -int relex(void) /* lexical analyzer for reparse */ -{ - register int c; - static uchar cbuf[MAXLIN]; - int clen, cflag; - - switch (c = *prestr++) { - case '|': return OR; - case '*': return STAR; - case '+': return PLUS; - case '?': return QUEST; - case '.': return DOT; - case '\0': prestr--; return '\0'; - case '^': - case '$': - case '(': - case ')': - return c; - case '\\': - rlxval = quoted(&prestr); - return CHAR; - default: - rlxval = c; - return CHAR; - case '[': - clen = 0; - if (*prestr == '^') { - cflag = 1; - prestr++; - } - else - cflag = 0; - for ( ; clen < MAXLIN-1; ) { - if ((c = *prestr++) == '\\') { - cbuf[clen++] = '\\'; - if ((c = *prestr++) == '\0') - ERROR "nonterminated character class %.20s...", lastre FATAL; - cbuf[clen++] = c; - } else if (c == ']') { - cbuf[clen] = 0; - rlxstr = tostring(cbuf); - if (cflag == 0) - return CCL; - else - return NCCL; - } else if (c == '\n') { - ERROR "newline in character class %.20s...", lastre FATAL; - } else if (c == '\0') { - ERROR "nonterminated character class %.20s", lastre FATAL; - } else - cbuf[clen++] = c; - } - if (clen >= MAXLIN-1) - ERROR "character class %.20s... too long", cbuf FATAL; - } - /* can't happen */ - return 0; -} - -int cgoto(fa *f, int s, int c) -{ - register int i, j, k; - register int *p, *q; - - for (i = 0; i <= f->accept; i++) - setvec[i] = 0; - setcnt = 0; - /* compute positions of gototab[s,c] into setvec */ - p = f->posns[s]; - for (i = 1; i <= *p; i++) { - if ((k = f->re[p[i]].ltype) != FINAL) { - if (k == CHAR && c == f->re[p[i]].lval - || k == DOT && c != 0 && c != HAT - || k == ALL && c != 0 - || k == CCL && member(c, (uchar *) f->re[p[i]].lval) - || k == NCCL && !member(c, (uchar *) f->re[p[i]].lval) && c != 0 && c != HAT) { - q = f->re[p[i]].lfollow; - for (j = 1; j <= *q; j++) { - if (setvec[q[j]] == 0) { - setcnt++; - setvec[q[j]] = 1; - } - } - } - } - } - /* determine if setvec is a previous state */ - tmpset[0] = setcnt; - j = 1; - for (i = f->accept; i >= 0; i--) - if (setvec[i]) { - tmpset[j++] = i; - } - /* tmpset == previous state? */ - for (i = 1; i <= f->curstat; i++) { - p = f->posns[i]; - if ((k = tmpset[0]) != p[0]) - goto different; - for (j = 1; j <= k; j++) - if (tmpset[j] != p[j]) - goto different; - /* setvec is state i */ - f->gototab[s][c] = i; - return i; - different:; - } - - /* add tmpset to current set of states */ - if (f->curstat >= NSTATES-1) { - f->curstat = 2; - f->reset = 1; - for (i = 2; i < NSTATES; i++) - xfree(f->posns[i]); - } else - ++(f->curstat); - for (i = 0; i < NCHARS; i++) - f->gototab[f->curstat][i] = 0; - xfree(f->posns[f->curstat]); - if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL) - overflo("out of space in cgoto"); - - f->posns[f->curstat] = p; - f->gototab[s][c] = f->curstat; - for (i = 0; i <= setcnt; i++) - p[i] = tmpset[i]; - if (setvec[f->accept]) - f->out[f->curstat] = 1; - else - f->out[f->curstat] = 0; - return f->curstat; -} - - -void freefa(fa *f) /* free a finite automaton */ -{ - register int i; - - if (f == NULL) - return; - for (i = 0; i <= f->curstat; i++) - xfree(f->posns[i]); - for (i = 0; i <= f->accept; i++) { - xfree(f->re[i].lfollow); - if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL) - xfree(f->re[i].lval); - } - xfree(f->restr); - xfree(f); -} =lib.c -/**************************************************************** -Copyright (C) AT&T 1993 -All Rights Reserved - -Permission to use, copy, modify, and distribute this software and -its documentation for any purpose and without fee is hereby -granted, provided that the above copyright notice appear in all -copies and that both that the copyright notice and this -permission notice and warranty disclaimer appear in supporting -documentation, and that the name of AT&T or any of its entities -not be used in advertising or publicity pertaining to -distribution of the software without specific, written prior -permission. - -AT&T DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, -INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. -IN NO EVENT SHALL AT&T OR ANY OF ITS ENTITIES BE LIABLE FOR ANY -SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER -IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -THIS SOFTWARE. -****************************************************************/ - -#ifdef __ORCAC__ -segment "lib"; -#endif - -#define DEBUG -#include -#include -#include -#include -#include -#include "awk.h" -#include "y.tab.h" - -#define getfval(p) (((p)->tval & (ARR|FLD|REC|NUM)) == NUM ? (p)->fval : r_getfval(p)) -#define getsval(p) (((p)->tval & (ARR|FLD|REC|STR)) == STR ? (p)->sval : r_getsval(p)) - -FILE *infile = NULL; -uchar *file = (uchar*) ""; -int recsize = RECSIZE; -uchar *recdata; -uchar *record; -uchar *fields; -Cell *fldtab; - -#define MAXFLD 200 -int nfields = MAXFLD; /* can be set from commandline in main */ - -int donefld; /* 1 = implies rec broken into fields */ -int donerec; /* 1 = record is valid (no flds have changed) */ - -int maxfld = 0; /* last used field */ -int argno = 1; /* current input argument number */ -extern Awkfloat *ARGC; - -void recinit(unsigned int n) -{ - static Cell dollar0 = { - OCELL, CFLD, (uchar*) "$0", /*recdata*/0, 0.0, REC|STR|DONTFREE }; - static Cell dollar1 = { - OCELL, CFLD, NULL, (uchar*) "", 0.0, FLD|STR|DONTFREE }; - int i; - - record = recdata = (uchar *) malloc(n); - fields = (uchar *) malloc(n); - fldtab = (Cell *) malloc(nfields * sizeof(Cell)); - if (recdata == NULL || fields == NULL || fldtab == NULL) - ERROR "out of space for $0 and fields" FATAL; - fldtab[0] = dollar0; - fldtab[0].sval = recdata; - for (i = 1; i < nfields; i++) - fldtab[i] = dollar1; -} - -void initgetrec(void) -{ - int i; - uchar *p; - - dprintf( ("inside initgetrec %06lX", lookup("FILENAME",symtab)) ); - for (i = 1; i < *ARGC; i++) { - if (!isclvar(p = getargv(i))) { /* find 1st real filename */ - setsval(lookup("FILENAME", symtab), getargv(i)); - return; - } - setclvar(p); /* a commandline assignment before filename */ - argno++; - } - infile = stdin; /* no filenames, so use stdin */ -} - -int getrec(uchar *buf) /* get next input record from whatever source */ -{ /* note: tests whether buf == record */ - int c; - static int firsttime = 1; - - if (firsttime) { - firsttime = 0; - initgetrec(); - } - dprintf( ("RS=<%s>, FS=<%s>, ARGC=%g, FILENAME=%s\n", - *RS, *FS, *ARGC, *FILENAME) ); - donefld = 0; - donerec = 1; - buf[0] = 0; - while (argno < *ARGC || infile == stdin) { - dprintf( ("argno=%d, file=|%s|\n", (int)argno, file) ); - if (infile == NULL) { /* have to open a new file */ - file = getargv(argno); - if (*file == '\0') { /* it's been zapped */ - argno++; - continue; - } - if (isclvar(file)) { /* a var=value arg */ - setclvar(file); - argno++; - continue; - } - *FILENAME = file; - dprintf( ("opening file %s\n", file) ); - if (*file == '-' && *(file+1) == '\0') - infile = stdin; - else if ((infile = fopen((char *)file, "r")) == NULL) - ERROR "can't open file %s", file FATAL; - setfval(fnrloc, 0.0); - } - c = readrec(buf, recsize, infile); - if (c != 0 || buf[0] != '\0') { /* normal record */ - if (buf == record) { - if (!(recloc->tval & DONTFREE)) - xfree(recloc->sval); - recloc->sval = record; - recloc->tval = REC | STR | DONTFREE; - if (isnumber(recloc->sval)) { - recloc->fval = atof(recloc->sval); - recloc->tval |= NUM; - } - } - setfval(nrloc, nrloc->fval+1); - setfval(fnrloc, fnrloc->fval+1); - return 1; - } - /* EOF arrived on this file; set up next */ - if (infile != stdin) - fclose(infile); - infile = NULL; - argno++; - } - return 0; /* true end of file */ -} - -int readrec(uchar *buf, int bufsize, FILE *inf) /* read one record into buf */ -{ - register int sep, c; - register uchar *rr; - register int nrr; - - if ((sep = **RS) == 0) { - sep = '\n'; - while ((c=getc(inf)) == '\n' && c != EOF) /* skip leading \n's */ - ; - if (c != EOF) - ungetc(c, inf); - } - for (rr = buf, nrr = bufsize; ; ) { - for (; (c=getc(inf)) != sep && c != EOF; *rr++ = c) - if (--nrr < 0) - ERROR "input record `%.30s...' too long; try -mr n", buf FATAL; - if (**RS == sep || c == EOF) - break; - if ((c = getc(inf)) == '\n' || c == EOF) /* 2 in a row */ - break; - *rr++ = '\n'; - *rr++ = c; - } - if (rr > buf + bufsize) - ERROR "input record `%.30s...' too long; try -mr n", buf FATAL; - *rr = 0; - dprintf( ("readrec saw <%s>, returns %d\n", buf, c == EOF && rr == buf ? 0 : 1) ); - return c == EOF && rr == buf ? 0 : 1; -} - -uchar *getargv(int n) /* get ARGV[n] */ -{ - Cell *x; - uchar *s, temp[10]; - extern Array *ARGVtab; - - sprintf((char *)temp, "%d", n); - x = setsymtab(temp, "", 0.0, STR, ARGVtab); - s = getsval(x); - dprintf( ("getargv(%d) returns |%s|\n", (int)n, s) ); - return s; -} - -void setclvar(uchar *s) /* set var=value from s */ -{ - uchar *p; - Cell *q; - - for (p=s; *p != '='; p++) - ; - *p++ = 0; - p = qstring(p, '\0'); - q = setsymtab(s, p, 0.0, STR, symtab); - setsval(q, p); - if (isnumber(q->sval)) { - q->fval = atof(q->sval); - q->tval |= NUM; - } - dprintf( ("command line set %s to |%s|\n", s, p) ); -} - - -void fldbld(void) /* create fields from current record */ -{ - register uchar *r, *fr, sep; - Cell *p; - int i; - - if (donefld) - return; - if (!(recloc->tval & STR)) - getsval(recloc); - r = recloc->sval; - fr = fields; - i = 0; /* number of fields accumulated here */ - if (strlen(*FS) > 1) { /* it's a regular expression */ - i = refldbld(r, *FS); - } else if ((sep = **FS) == ' ') { /* default whitespace */ - for (i = 0; ; ) { - while (*r == ' ' || *r == '\t' || *r == '\n') - r++; - if (*r == 0) - break; - i++; - if (i >= nfields) - break; - if (!(fldtab[i].tval & DONTFREE)) - xfree(fldtab[i].sval); - fldtab[i].sval = fr; - fldtab[i].tval = FLD | STR | DONTFREE; - do - *fr++ = *r++; - while (*r != ' ' && *r != '\t' && *r != '\n' && *r != '\0'); - *fr++ = 0; - } - *fr = 0; - } else if (*r != 0) { /* if 0, it's a null field */ - for (;;) { - i++; - if (i >= nfields) - break; - if (!(fldtab[i].tval & DONTFREE)) - xfree(fldtab[i].sval); - fldtab[i].sval = fr; - fldtab[i].tval = FLD | STR | DONTFREE; - while (*r != sep && *r != '\n' && *r != '\0') /* \n is always a separator */ - *fr++ = *r++; - *fr++ = 0; - if (*r++ == 0) - break; - } - *fr = 0; - } - if (i >= nfields) - ERROR "record `%.30s...' has too many fields; try -mf n", record FATAL; - /* clean out junk from previous record */ - cleanfld(i, maxfld); - maxfld = i; - donefld = 1; - for (p = fldtab+1; p <= fldtab+maxfld; p++) { - if(isnumber(p->sval)) { - p->fval = atof(p->sval); - p->tval |= NUM; - } - } - setfval(nfloc, (Awkfloat) maxfld); - if (dbg) - for (p = fldtab; p <= fldtab+maxfld; p++) - printf("field %d: |%s|\n", p-fldtab, p->sval); -} - -void cleanfld(int n1, int n2) /* clean out fields n1..n2 inclusive */ -{ - static uchar *nullstat = (uchar *) ""; - register Cell *p, *q; - - for (p = &fldtab[n2], q = &fldtab[n1]; p > q; p--) { - if (!(p->tval & DONTFREE)) - xfree(p->sval); - p->tval = FLD | STR | DONTFREE; - p->sval = nullstat; - } -} - -void newfld(int n) /* add field n (after end) */ -{ - if (n >= nfields) - ERROR "creating too many fields (%d); try -mf n", n, record FATAL; - cleanfld(maxfld, n); - maxfld = n; - setfval(nfloc, (Awkfloat) n); -} - -int refldbld(uchar *rec, uchar *fs) /* build fields from reg expr in FS */ -{ - uchar *fr; - int i, tempstat; - fa *pfa; - - fr = fields; - *fr = '\0'; - if (*rec == '\0') - return 0; - pfa = makedfa(fs, 1); - dprintf( ("into refldbld, rec = <%s>, pat = <%s>\n", rec, fs) ); - tempstat = pfa->initstat; - for (i = 1; i < nfields; i++) { - if (!(fldtab[i].tval & DONTFREE)) - xfree(fldtab[i].sval); - fldtab[i].tval = FLD | STR | DONTFREE; - fldtab[i].sval = fr; - dprintf( ("refldbld: i=%d\n", (int)i) ); - if (nematch(pfa, rec)) { - pfa->initstat = 2; /* horrible coupling */ - dprintf( ("match %s (%d chars)\n", patbeg, (int)patlen) ); - strncpy(fr, rec, patbeg-rec); - fr += patbeg - rec + 1; - *(fr-1) = '\0'; - rec = patbeg + patlen; - } else { - dprintf( ("no match %s\n", rec) ); - strcpy(fr, rec); - pfa->initstat = tempstat; - break; - } - } - return i; -} - -void recbld(void) /* create $0 from $1..$NF if necessary */ -{ - register int i; - register uchar *r, *p; - static uchar *rec = 0; - - if (donerec == 1) - return; - if (rec == 0) { - rec = (uchar *) malloc(recsize); - if (rec == 0) - ERROR "out of space building $0, record size %d", recsize FATAL; - } - r = rec; - for (i = 1; i <= *NF; i++) { - p = getsval(&fldtab[i]); - while (r < rec+recsize-1 && (*r = *p++)) - r++; - if (i < *NF) - for (p = *OFS; r < rec+recsize-1 && (*r = *p++); ) - r++; - } - if (r > rec + recsize - 1) - ERROR "built giant record `%.30s...'; try -mr n", record FATAL; - *r = '\0'; - dprintf( ("in recbld FS=%o, recloc=%o\n", (int)**FS, (int)recloc) ); - recloc->tval = REC | STR | DONTFREE; - recloc->sval = record = rec; - dprintf( ("in recbld FS=%o, recloc=%o\n", (int)**FS, (int)recloc) ); - dprintf( ("recbld = |%s|\n", record) ); - donerec = 1; -} - -Cell *fieldadr(int n) -{ - if (n < 0 || n >= nfields) - ERROR "trying to access field %d; try -mf n", n FATAL; - return(&fldtab[n]); -} - -int errorflag = 0; -char errbuf[200]; - -void yyerror(uchar *s) -{ - extern uchar *cmdname, *curfname; - static int been_here = 0; - - if (been_here++ > 2) - return; - fprintf(stderr, "%s: %s", cmdname, s); - fprintf(stderr, " at source line %d", lineno); - if (curfname != NULL) - fprintf(stderr, " in function %s", curfname); - fprintf(stderr, "\n"); - errorflag = 2; - eprint(); -} - -void fpecatch(int n) -{ - ERROR "floating point exception %d", n FATAL; -} - -extern int bracecnt, brackcnt, parencnt; - -void bracecheck(void) -{ - int c; - static int beenhere = 0; - - if (beenhere++) - return; - while ((c = input()) != EOF && c != '\0') - bclass(c); - bcheck2(bracecnt, '{', '}'); - bcheck2(brackcnt, '[', ']'); - bcheck2(parencnt, '(', ')'); -} - -void bcheck2(int n, int c1, int c2) -{ - if (n == 1) - fprintf(stderr, "\tmissing %c\n", c2); - else if (n > 1) - fprintf(stderr, "\t%d missing %c's\n", n, c2); - else if (n == -1) - fprintf(stderr, "\textra %c\n", c2); - else if (n < -1) - fprintf(stderr, "\t%d extra %c's\n", -n, c2); -} - -void error(int f, char *s) -{ - extern Node *curnode; - extern uchar *cmdname; - - fflush(stdout); - fprintf(stderr, "%s: ", cmdname); - fprintf(stderr, "%s", s); - fprintf(stderr, "\n"); - if (compile_time != 2 && NR && *NR > 0) { - fprintf(stderr, " input record number %g", *FNR); - if (strcmp(*FILENAME, "-") != 0) - fprintf(stderr, ", file %s", *FILENAME); - fprintf(stderr, "\n"); - } - if (compile_time != 2 && curnode) - fprintf(stderr, " source line number %d\n", curnode->lineno); - else if (compile_time != 2 && lineno) - fprintf(stderr, " source line number %d\n", lineno); - eprint(); - if (f) { - if (dbg > 1) /* core dump if serious debugging on */ - abort(); - exit(2); - } -} - -void eprint(void) /* try to print context around error */ -{ - uchar *p, *q; - int c; - static int been_here = 0; - extern uchar ebuf[], *ep; - - if (compile_time == 2 || compile_time == 0 || been_here++ > 0) - return; - p = ep - 1; - if (p > ebuf && *p == '\n') - p--; - for ( ; p > ebuf && *p != '\n' && *p != '\0'; p--) - ; - while (*p == '\n') - p++; - fprintf(stderr, " context is\n\t"); - for (q=ep-1; q>=p && *q!=' ' && *q!='\t' && *q!='\n'; q--) - ; - for ( ; p < q; p++) - if (*p) - putc(*p, stderr); - fprintf(stderr, " >>> "); - for ( ; p < ep; p++) - if (*p) - putc(*p, stderr); - fprintf(stderr, " <<< "); - if (*ep) - while ((c = input()) != '\n' && c != '\0' && c != EOF) { - putc(c, stderr); - bclass(c); - } - putc('\n', stderr); - ep = ebuf; -} - -void bclass(int c) -{ - switch (c) { - case '{': bracecnt++; break; - case '}': bracecnt--; break; - case '[': brackcnt++; break; - case ']': brackcnt--; break; - case '(': parencnt++; break; - case ')': parencnt--; break; - } -} - -double errcheck(double x, uchar *s) -{ - extern int errno; - - if (errno == EDOM) { - errno = 0; - ERROR "%s argument out of domain", s WARNING; - x = 1; - } else if (errno == ERANGE) { - errno = 0; - ERROR "%s result out of range", s WARNING; - x = 1; - } - return x; -} - -int isclvar(uchar *s) /* is s of form var=something ? */ -{ - uchar *os = s; - - if (!isalpha(*s) && *s != '_') - return 0; - for ( ; *s; s++) - if (!(isalnum(*s) || *s == '_')) - break; - return *s == '=' && s > os && *(s+1) != '='; -} - -#define MAXEXPON 38 /* maximum exponent for fp number. should be IEEE */ - -int isnumber(uchar *s) /* probably should be done by a library function */ -{ - register int d1, d2; - int point; - uchar *es; - - d1 = d2 = point = 0; - while (*s == ' ' || *s == '\t' || *s == '\n') - s++; - if (*s == '\0') - return(0); /* empty stuff isn't number */ - if (*s == '+' || *s == '-') - s++; - if (!isdigit(*s) && *s != '.') - return(0); - if (isdigit(*s)) { - do { - d1++; - s++; - } while (isdigit(*s)); - } - if (*s == '.') { - point++; - s++; - } - if (isdigit(*s)) { - d2++; - do { - s++; - } while (isdigit(*s)); - } - if (!(d1 || point && d2)) - return(0); - if (*s == 'e' || *s == 'E') { - s++; - if (*s == '+' || *s == '-') - s++; - if (!isdigit(*s)) - return(0); - es = s; - do { - s++; - } while (isdigit(*s)); - if (s - es > 2) - return(0); - else if (s - es == 2 && (int)(10 * (*es-'0') + *(es+1)-'0') >= MAXEXPON) - return(0); - } - while (*s == ' ' || *s == '\t' || *s == '\n') - s++; - if (*s == '\0') - return(1); - else - return(0); -} - -FILE *popen(char *a, char *b) -{ -return NULL; -} - -int pclose(FILE *x) -{ -} =main.c -/**************************************************************** -Copyright (C) AT&T 1993 -All Rights Reserved - -Permission to use, copy, modify, and distribute this software and -its documentation for any purpose and without fee is hereby -granted, provided that the above copyright notice appear in all -copies and that both that the copyright notice and this -permission notice and warranty disclaimer appear in supporting -documentation, and that the name of AT&T or any of its entities -not be used in advertising or publicity pertaining to -distribution of the software without specific, written prior -permission. - -AT&T DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, -INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. -IN NO EVENT SHALL AT&T OR ANY OF ITS ENTITIES BE LIABLE FOR ANY -SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER -IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -THIS SOFTWARE. -****************************************************************/ - -char *version = "version July 23, 1993"; - -#define DEBUG -#include -#include -#include -#include -#include -#include "awk.h" -#include "y.tab.h" - -extern char **environ; -extern int nfields; - -int dbg = 0; -uchar *cmdname; /* gets argv[0] for error messages */ -extern FILE *yyin; /* lex input file */ -uchar *lexprog; /* points to program argument if it exists */ -extern int errorflag; /* non-zero if any syntax errors; set by yyerror */ -int compile_time = 2; /* for error printing: */ - /* 2 = cmdline, 1 = compile, 0 = running */ - -uchar *pfile[20]; /* program filenames from -f's */ -int npfile = 0; /* number of filenames */ -int curpfile = 0; /* current filename */ - - -#ifdef __ORCAC__ -int main(int argc, uchar **argv) -#else -main(int argc, char *argv[]) -#endif -{ - uchar *fs = NULL, *marg; - int temp; - extern void initlex(void); - extern void initfiles(void); - extern int initenv(void); - - initlex(); - initfiles(); - initenv(); - cmdname = argv[0]; - if (argc == 1) { - fprintf(stderr, "Usage: %s [-f programfile | 'program'] [-Ffieldsep] [-v var=value] [-mf n] [-mr n] [files]\n", cmdname); - exit(1); - } -#ifndef __ORCAC__ - signal(SIGFPE, fpecatch); -#endif - yyin = NULL; - symtab = makesymtab(NSYMTAB); - while (argc > 1 && argv[1][0] == '-' && argv[1][1] != '\0') { - if (strcmp((char *) argv[1], "--") == 0) { /* explicit end of args */ - argc--; - argv++; - break; - } - switch (argv[1][1]) { - case 'f': /* next argument is program filename */ - argc--; - argv++; - if (argc <= 1) - ERROR "no program filename" FATAL; - pfile[npfile++] = argv[1]; - break; - case 'F': /* set field separator */ - if (argv[1][2] != 0) { /* arg is -Fsomething */ - if (argv[1][2] == 't' && argv[1][3] == 0) /* wart: t=>\t */ - fs = (uchar *) "\t"; - else if (argv[1][2] != 0) - fs = &argv[1][2]; - } else { /* arg is -F something */ - argc--; argv++; - if (argc > 1 && argv[1][0] == 't' && argv[1][1] == 0) /* wart: t=>\t */ - fs = (uchar *) "\t"; - else if (argc > 1 && argv[1][0] != 0) - fs = &argv[1][0]; - } - if (fs == NULL || *fs == '\0') - ERROR "field separator FS is empty" WARNING; - break; - case 'v': /* -v a=1 to be done NOW. one -v for each */ - if (argv[1][2] == '\0' && --argc > 1 && isclvar((++argv)[1])) - setclvar(argv[1]); - break; - case 'm': /* more memory: -mr=record, -mf=fields */ - marg = argv[1]; - if (argv[1][3]) - temp = atoi(&argv[1][3]); - else { - argv++; argc--; - temp = atoi(&argv[1][0]); - } - switch (marg[2]) { - case 'r': recsize = temp; break; - case 'f': nfields = temp; break; - default: ERROR "unknown option %s\n", marg FATAL; - } - break; - case 'd': - dbg = atoi(&argv[1][2]); - if (dbg == 0) - dbg = 1; - printf("awk %s\n", version); - break; - default: - ERROR "unknown option %s ignored", argv[1] WARNING; - break; - } - argc--; - argv++; - } - /* argv[1] is now the first argument */ - if (npfile == 0) { /* no -f; first argument is program */ - if (argc <= 1) { - if (dbg) - exit(0); - ERROR "no program given" FATAL; - } - dprintf( ("program = |%s|\n", argv[1]) ); - lexprog = argv[1]; - argc--; - argv++; - } - recinit(recsize); - syminit(); - compile_time = 1; - argv[0] = cmdname; /* put prog name at front of arglist */ - dprintf( ("argc=%d, argv[0]=%s\n", argc, argv[0]) ); - arginit(argc, argv); - envinit(environ); - yyparse(); - if (fs) - *FS = tostring(qstring(fs, '\0')); - dprintf( ("errorflag=%d\n", errorflag) ); - if (errorflag == 0) { - compile_time = 0; - run(winner); - } else - bracecheck(); - return(errorflag); -} - -int pgetc(void) /* get 1 character from awk program */ -{ - int c; - - for (;;) { - if (yyin == NULL) { - if (curpfile >= npfile) - return EOF; - if (strcmp((char *) pfile[curpfile], "-") == 0) - yyin = stdin; - else if ((yyin = fopen((char *) pfile[curpfile], "r")) == NULL) - ERROR "can't open file %s", pfile[curpfile] FATAL; - } - if ((c = getc(yyin)) != EOF) - return c; - if (yyin != stdin) - fclose(yyin); - yyin = NULL; - curpfile++; - } -} =makefile -# /**************************************************************** -# Copyright (C) AT&T 1993 -# All Rights Reserved -# -# Permission to use, copy, modify, and distribute this software and -# its documentation for any purpose and without fee is hereby -# granted, provided that the above copyright notice appear in all -# copies and that both that the copyright notice and this -# permission notice and warranty disclaimer appear in supporting -# documentation, and that the name of AT&T or any of its entities -# not be used in advertising or publicity pertaining to -# distribution of the software without specific, written prior -# permission. -# -# AT&T DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, -# INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. -# IN NO EVENT SHALL AT&T OR ANY OF ITS ENTITIES BE LIABLE FOR ANY -# SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER -# IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -# ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -# THIS SOFTWARE. -# ****************************************************************/ - -YFLAGS = -d -CFLAGS = -g -N -I/usr/include/lcc -A -CC = lcc - -OFILES = b.o main.o parse.o proctab.o tran.o lib.o run.o - -SOURCE = awk.h proto.h awk.g.y awk.lx.l b.c main.c maketab.c parse.c lib.c run.c tran.c -CSOURCE = y.tab.c lex.yy.c b.c main.c proctab.c parse.c lib.c run.c tran.c - -a.out: awk.g.o awk.lx.o $(OFILES) $(ALLOC) awk.h - $(CC) $(CFLAGS) awk.g.o awk.lx.o $(OFILES) $(ALLOC) -lm - -$(OFILES) awk.lx.o: awk.h prevy.tab.h proto.h - -awk.g.o: awk.h proto.h - -prevy.tab.h: y.tab.h - -cmp -s y.tab.h prevy.tab.h || (cp y.tab.h prevy.tab.h; echo change maketab) - -proctab.c: maketab - ./maketab >proctab.c - -maketab: prevy.tab.h maketab.c - $(CC) $(CFLAGS) maketab.c -o maketab - -bundle: - @bundle README FIXES $(SOURCE) makefile awk.1 - -bowell: $(SOURCE) makefile NEW FIXES awktest.a - push bowell $? /usr/src/cmd/awk - touch bowell - -profile: - make 'CFLAGS=-p' - -install: a.out - cp a.out /usr/bin/awk - strip /usr/bin/awk - -clean: - rm -f a.out *.o t.* *temp* *.out *junk* y.tab.* lex.yy.c prevy.tab.h maketab proctab.c foo* glop* + END OF ARCHIVE