master xplshn/aruu / cmd / posix / awk / awkgram.y
  1/****************************************************************
  2Copyright (C) Lucent Technologies 1997
  3All Rights Reserved
  4
  5Permission to use, copy, modify, and distribute this software and
  6its documentation for any purpose and without fee is hereby
  7granted, provided that the above copyright notice appear in all
  8copies and that both that the copyright notice and this
  9permission notice and warranty disclaimer appear in supporting
 10documentation, and that the name Lucent Technologies or any of
 11its entities not be used in advertising or publicity pertaining
 12to distribution of the software without specific, written prior
 13permission.
 14
 15LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
 16INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
 17IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
 18SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 19WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
 20IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
 21ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
 22THIS SOFTWARE.
 23****************************************************************/
 24
 25%{
 26#include <stdio.h>
 27#include <string.h>
 28#include "awk.h"
 29
 30void checkdup(Node *list, Cell *item);
 31int yywrap(void) { return(1); }
 32
 33Node	*beginloc = 0;
 34Node	*endloc = 0;
 35bool	infunc	= false;	/* = true if in arglist or body of func */
 36int	inloop	= 0;	/* >= 1 if in while, for, do; can't be bool, since loops can next */
 37char	*curfname = 0;	/* current function name */
 38Node	*arglist = 0;	/* list of args for current function */
 39%}
 40
 41%union {
 42	Node	*p;
 43	Cell	*cp;
 44	int	i;
 45	char	*s;
 46}
 47
 48%token	<i>	FIRSTTOKEN	/* must be first */
 49%token	<p>	PROGRAM PASTAT PASTAT2 XBEGIN XEND
 50%token	<i>	NL ',' '{' '(' '|' ';' '/' ')' '}' '[' ']'
 51%token	<i>	ARRAY
 52%token	<i>	MATCH NOTMATCH MATCHOP
 53%token	<i>	FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS EMPTYRE ZERO
 54%token	<i>	AND BOR APPEND EQ GE GT LE LT NE IN
 55%token	<i>	ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC
 56%token	<i>	SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE
 57%token	<i>	ADD MINUS MULT DIVIDE MOD
 58%token	<i>	ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ
 59%token	<i>	PRINT PRINTF SPRINTF
 60%token	<p>	ELSE INTEST CONDEXPR
 61%token	<i>	POSTINCR PREINCR POSTDECR PREDECR
 62%token	<cp>	VAR IVAR VARNF CALL NUMBER STRING
 63%token	<s>	REGEXPR
 64
 65%type	<p>	pas pattern ppattern plist pplist patlist prarg term re
 66%type	<p>	pa_pat pa_stat pa_stats
 67%type	<s>	reg_expr
 68%type	<p>	simple_stmt opt_simple_stmt stmt stmtlist
 69%type	<p>	var varname funcname varlist
 70%type	<p>	for if else while
 71%type	<i>	do st
 72%type	<i>	pst opt_pst lbrace rbrace rparen comma nl opt_nl and bor
 73%type	<i>	subop print
 74%type	<cp>	string
 75
 76%right	ASGNOP
 77%right	'?'
 78%right	':'
 79%left	BOR
 80%left	AND
 81%left	GETLINE
 82%nonassoc APPEND EQ GE GT LE LT NE MATCHOP IN '|'
 83%left	ARG BLTIN BREAK CALL CLOSE CONTINUE DELETE DO EXIT FOR FUNC
 84%left	GSUB IF INDEX LSUBSTR MATCHFCN NEXT NUMBER
 85%left	PRINT PRINTF RETURN SPLIT SPRINTF STRING SUB SUBSTR
 86%left	REGEXPR VAR VARNF IVAR WHILE '('
 87%left	CAT
 88%left	'+' '-'
 89%left	'*' '/' '%'
 90%left	NOT UMINUS UPLUS
 91%right	POWER
 92%right	DECR INCR
 93%left	INDIRECT
 94%token	LASTTOKEN	/* must be last */
 95
 96%%
 97
 98program:
 99	  pas	{ if (errorflag==0)
100			winner = (Node *)stat3(PROGRAM, beginloc, $1, endloc); }
101	| error	{ yyclearin; bracecheck(); SYNTAX("bailing out"); }
102	;
103
104and:
105	  AND | and NL
106	;
107
108bor:
109	  BOR | bor NL
110	;
111
112comma:
113	  ',' | comma NL
114	;
115
116do:
117	  DO | do NL
118	;
119
120else:
121	  ELSE | else NL
122	;
123
124for:
125	  FOR '(' opt_simple_stmt ';' opt_nl pattern ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt
126		{ --inloop; $$ = stat4(FOR, $3, notnull($6), $9, $12); }
127	| FOR '(' opt_simple_stmt ';'  ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt
128		{ --inloop; $$ = stat4(FOR, $3, NIL, $7, $10); }
129	| FOR '(' varname IN varname rparen {inloop++;} stmt
130		{ --inloop; $$ = stat3(IN, $3, makearr($5), $8); }
131	;
132
133funcname:
134	  VAR	{ setfname($1); }
135	| CALL	{ setfname($1); }
136	;
137
138if:
139	  IF '(' pattern rparen		{ $$ = notnull($3); }
140	;
141
142lbrace:
143	  '{' | lbrace NL
144	;
145
146nl:
147	  NL | nl NL
148	;
149
150opt_nl:
151	  /* empty */	{ $$ = 0; }
152	| nl
153	;
154
155opt_pst:
156	  /* empty */	{ $$ = 0; }
157	| pst
158	;
159
160
161opt_simple_stmt:
162	  /* empty */			{ $$ = 0; }
163	| simple_stmt
164	;
165
166pas:
167	  opt_pst			{ $$ = 0; }
168	| opt_pst pa_stats opt_pst	{ $$ = $2; }
169	;
170
171pa_pat:
172	  pattern	{ $$ = notnull($1); }
173	;
174
175pa_stat:
176	  pa_pat			{ $$ = stat2(PASTAT, $1, stat2(PRINT, rectonode(), NIL)); }
177	| pa_pat lbrace stmtlist '}'	{ $$ = stat2(PASTAT, $1, $3); }
178	| pa_pat ',' opt_nl pa_pat		{ $$ = pa2stat($1, $4, stat2(PRINT, rectonode(), NIL)); }
179	| pa_pat ',' opt_nl pa_pat lbrace stmtlist '}'	{ $$ = pa2stat($1, $4, $6); }
180	| lbrace stmtlist '}'		{ $$ = stat2(PASTAT, NIL, $2); }
181	| XBEGIN lbrace stmtlist '}'
182		{ beginloc = linkum(beginloc, $3); $$ = 0; }
183	| XEND lbrace stmtlist '}'
184		{ endloc = linkum(endloc, $3); $$ = 0; }
185	| FUNC funcname '(' varlist rparen {infunc = true;} lbrace stmtlist '}'
186		{ infunc = false; curfname=0; defn((Cell *)$2, $4, $8); $$ = 0; }
187	;
188
189pa_stats:
190	  pa_stat
191	| pa_stats opt_pst pa_stat	{ $$ = linkum($1, $3); }
192	;
193
194patlist:
195	  pattern
196	| patlist comma pattern		{ $$ = linkum($1, $3); }
197	;
198
199ppattern:
200	  var ASGNOP ppattern		{ $$ = op2($2, $1, $3); }
201	| ppattern '?' ppattern ':' ppattern %prec '?'
202	 	{ $$ = op3(CONDEXPR, notnull($1), $3, $5); }
203	| ppattern bor ppattern %prec BOR
204		{ $$ = op2(BOR, notnull($1), notnull($3)); }
205	| ppattern and ppattern %prec AND
206		{ $$ = op2(AND, notnull($1), notnull($3)); }
207	| ppattern MATCHOP reg_expr	{ $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); free($3); }
208	| ppattern MATCHOP ppattern
209		{ if (constnode($3)) {
210			$$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0));
211			free($3);
212		  } else
213			$$ = op3($2, (Node *)1, $1, $3); }
214	| ppattern IN varname		{ $$ = op2(INTEST, $1, makearr($3)); }
215	| '(' plist ')' IN varname	{ $$ = op2(INTEST, $2, makearr($5)); }
216	| ppattern term %prec CAT	{ $$ = op2(CAT, $1, $2); }
217	| re
218	| term
219	;
220
221pattern:
222	  var ASGNOP pattern		{ $$ = op2($2, $1, $3); }
223	| pattern '?' pattern ':' pattern %prec '?'
224	 	{ $$ = op3(CONDEXPR, notnull($1), $3, $5); }
225	| pattern bor pattern %prec BOR
226		{ $$ = op2(BOR, notnull($1), notnull($3)); }
227	| pattern and pattern %prec AND
228		{ $$ = op2(AND, notnull($1), notnull($3)); }
229	| pattern EQ pattern		{ $$ = op2($2, $1, $3); }
230	| pattern GE pattern		{ $$ = op2($2, $1, $3); }
231	| pattern GT pattern		{ $$ = op2($2, $1, $3); }
232	| pattern LE pattern		{ $$ = op2($2, $1, $3); }
233	| pattern LT pattern		{ $$ = op2($2, $1, $3); }
234	| pattern NE pattern		{ $$ = op2($2, $1, $3); }
235	| pattern MATCHOP reg_expr	{ $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); free($3); }
236	| pattern MATCHOP pattern
237		{ if (constnode($3)) {
238			$$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0));
239			free($3);
240		  } else
241			$$ = op3($2, (Node *)1, $1, $3); }
242	| pattern IN varname		{ $$ = op2(INTEST, $1, makearr($3)); }
243	| '(' plist ')' IN varname	{ $$ = op2(INTEST, $2, makearr($5)); }
244	| pattern '|' GETLINE var	{
245			if (safe) SYNTAX("cmd | getline is unsafe");
246			else $$ = op3(GETLINE, $4, itonp($2), $1); }
247	| pattern '|' GETLINE		{
248			if (safe) SYNTAX("cmd | getline is unsafe");
249			else $$ = op3(GETLINE, (Node*)0, itonp($2), $1); }
250	| pattern term %prec CAT	{ $$ = op2(CAT, $1, $2); }
251	| re
252	| term
253	;
254
255plist:
256	  pattern comma pattern		{ $$ = linkum($1, $3); }
257	| plist comma pattern		{ $$ = linkum($1, $3); }
258	;
259
260pplist:
261	  ppattern
262	| pplist comma ppattern		{ $$ = linkum($1, $3); }
263	;
264
265prarg:
266	  /* empty */			{ $$ = rectonode(); }
267	| pplist
268	| '(' plist ')'			{ $$ = $2; }
269	;
270
271print:
272	  PRINT | PRINTF
273	;
274
275pst:
276	  NL | ';' | pst NL | pst ';'
277	;
278
279rbrace:
280	  '}' | rbrace NL
281	;
282
283re:
284	   reg_expr
285		{ $$ = op3(MATCH, NIL, rectonode(), (Node*)makedfa($1, 0)); free($1); }
286	| NOT re	{ $$ = op1(NOT, notnull($2)); }
287	;
288
289reg_expr:
290	  '/' {startreg();} REGEXPR '/'		{ $$ = $3; }
291	;
292
293rparen:
294	  ')' | rparen NL
295	;
296
297simple_stmt:
298	  print prarg '|' term		{
299			if (safe) SYNTAX("print | is unsafe");
300			else $$ = stat3($1, $2, itonp($3), $4); }
301	| print prarg APPEND term	{
302			if (safe) SYNTAX("print >> is unsafe");
303			else $$ = stat3($1, $2, itonp($3), $4); }
304	| print prarg GT term		{
305			if (safe) SYNTAX("print > is unsafe");
306			else $$ = stat3($1, $2, itonp($3), $4); }
307	| print prarg			{ $$ = stat3($1, $2, NIL, NIL); }
308	| DELETE varname '[' patlist ']' { $$ = stat2(DELETE, makearr($2), $4); }
309	| DELETE varname		 { $$ = stat2(DELETE, makearr($2), 0); }
310	| pattern			{ $$ = exptostat($1); }
311	| error				{ yyclearin; SYNTAX("illegal statement"); }
312	;
313
314st:
315	  nl
316	| ';' opt_nl
317	;
318
319stmt:
320	  BREAK st		{ if (!inloop) SYNTAX("break illegal outside of loops");
321				  $$ = stat1(BREAK, NIL); }
322	| CONTINUE st		{  if (!inloop) SYNTAX("continue illegal outside of loops");
323				  $$ = stat1(CONTINUE, NIL); }
324	| do {inloop++;} stmt {--inloop;} WHILE '(' pattern ')' st
325		{ $$ = stat2(DO, $3, notnull($7)); }
326	| EXIT pattern st	{ $$ = stat1(EXIT, $2); }
327	| EXIT st		{ $$ = stat1(EXIT, NIL); }
328	| for
329	| if stmt else stmt	{ $$ = stat3(IF, $1, $2, $4); }
330	| if stmt		{ $$ = stat3(IF, $1, $2, NIL); }
331	| lbrace stmtlist rbrace { $$ = $2; }
332	| NEXT st	{ if (infunc)
333				SYNTAX("next is illegal inside a function");
334			  $$ = stat1(NEXT, NIL); }
335	| NEXTFILE st	{ if (infunc)
336				SYNTAX("nextfile is illegal inside a function");
337			  $$ = stat1(NEXTFILE, NIL); }
338	| RETURN pattern st	{ $$ = stat1(RETURN, $2); }
339	| RETURN st		{ $$ = stat1(RETURN, NIL); }
340	| simple_stmt st
341	| while {inloop++;} stmt	{ --inloop; $$ = stat2(WHILE, $1, $3); }
342	| ';' opt_nl		{ $$ = 0; }
343	;
344
345stmtlist:
346	  stmt
347	| stmtlist stmt		{ $$ = linkum($1, $2); }
348	;
349
350subop:
351	  SUB | GSUB
352	;
353
354string:
355	  STRING
356	| string STRING		{ $$ = catstr($1, $2); }
357	;
358
359term:
360 	  term '/' ASGNOP term		{ $$ = op2(DIVEQ, $1, $4); }
361 	| term '+' term			{ $$ = op2(ADD, $1, $3); }
362	| term '-' term			{ $$ = op2(MINUS, $1, $3); }
363	| term '*' term			{ $$ = op2(MULT, $1, $3); }
364	| term '/' term			{ $$ = op2(DIVIDE, $1, $3); }
365	| term '%' term			{ $$ = op2(MOD, $1, $3); }
366	| term POWER term		{ $$ = op2(POWER, $1, $3); }
367	| '-' term %prec UMINUS		{ $$ = op1(UMINUS, $2); }
368	| '+' term %prec UMINUS		{ $$ = op1(UPLUS, $2); }
369	| NOT term %prec UMINUS		{ $$ = op1(NOT, notnull($2)); }
370	| BLTIN '(' ')'			{ $$ = op2(BLTIN, itonp($1), rectonode()); }
371	| BLTIN '(' patlist ')'		{ $$ = op2(BLTIN, itonp($1), $3); }
372	| BLTIN				{ $$ = op2(BLTIN, itonp($1), rectonode()); }
373	| CALL '(' ')'			{ $$ = op2(CALL, celltonode($1,CVAR), NIL); }
374	| CALL '(' patlist ')'		{ $$ = op2(CALL, celltonode($1,CVAR), $3); }
375	| CLOSE term			{ $$ = op1(CLOSE, $2); }
376	| DECR var			{ $$ = op1(PREDECR, $2); }
377	| INCR var			{ $$ = op1(PREINCR, $2); }
378	| var DECR			{ $$ = op1(POSTDECR, $1); }
379	| var INCR			{ $$ = op1(POSTINCR, $1); }
380	| GETLINE var LT term		{ $$ = op3(GETLINE, $2, itonp($3), $4); }
381	| GETLINE LT term		{ $$ = op3(GETLINE, NIL, itonp($2), $3); }
382	| GETLINE var			{ $$ = op3(GETLINE, $2, NIL, NIL); }
383	| GETLINE			{ $$ = op3(GETLINE, NIL, NIL, NIL); }
384	| INDEX '(' pattern comma pattern ')'
385		{ $$ = op2(INDEX, $3, $5); }
386	| INDEX '(' pattern comma reg_expr ')'
387		{ SYNTAX("index() doesn't permit regular expressions");
388		  $$ = op2(INDEX, $3, (Node*)$5); }
389	| '(' pattern ')'		{ $$ = $2; }
390	| MATCHFCN '(' pattern comma reg_expr ')'
391		{ $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa($5, 1)); free($5); }
392	| MATCHFCN '(' pattern comma pattern ')'
393		{ if (constnode($5)) {
394			$$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa(strnode($5), 1));
395			free($5);
396		  } else
397			$$ = op3(MATCHFCN, (Node *)1, $3, $5); }
398	| NUMBER			{ $$ = celltonode($1, CCON); }
399	| SPLIT '(' pattern comma varname comma pattern ')'     /* string */
400		{ $$ = op4(SPLIT, $3, makearr($5), $7, (Node*)STRING); }
401	| SPLIT '(' pattern comma varname comma reg_expr ')'    /* const /regexp/ */
402		{ $$ = op4(SPLIT, $3, makearr($5), (Node*)makedfa($7, 1), (Node *)REGEXPR); free($7); }
403	| SPLIT '(' pattern comma varname ')'
404		{ $$ = op4(SPLIT, $3, makearr($5), NIL, (Node*)STRING); }  /* default */
405	| SPRINTF '(' patlist ')'	{ $$ = op1($1, $3); }
406	| string	 		{ $$ = celltonode($1, CCON); }
407	| subop '(' reg_expr comma pattern ')'
408		{ $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, rectonode()); free($3); }
409	| subop '(' pattern comma pattern ')'
410		{ if (constnode($3)) {
411			$$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, rectonode());
412			free($3);
413		  } else
414			$$ = op4($1, (Node *)1, $3, $5, rectonode()); }
415	| subop '(' reg_expr comma pattern comma var ')'
416		{ $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, $7); free($3); }
417	| subop '(' pattern comma pattern comma var ')'
418		{ if (constnode($3)) {
419			$$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, $7);
420			free($3);
421		  } else
422			$$ = op4($1, (Node *)1, $3, $5, $7); }
423	| SUBSTR '(' pattern comma pattern comma pattern ')'
424		{ $$ = op3(SUBSTR, $3, $5, $7); }
425	| SUBSTR '(' pattern comma pattern ')'
426		{ $$ = op3(SUBSTR, $3, $5, NIL); }
427	| var
428	;
429
430var:
431	  varname
432	| varname '[' patlist ']'	{ $$ = op2(ARRAY, makearr($1), $3); }
433	| IVAR				{ $$ = op1(INDIRECT, celltonode($1, CVAR)); }
434	| INDIRECT term	 		{ $$ = op1(INDIRECT, $2); }
435	;
436
437varlist:
438	  /* nothing */		{ arglist = $$ = 0; }
439	| VAR			{ arglist = $$ = celltonode($1,CVAR); }
440	| varlist comma VAR	{
441			checkdup($1, $3);
442			arglist = $$ = linkum($1,celltonode($3,CVAR)); }
443	;
444
445varname:
446	  VAR			{ $$ = celltonode($1, CVAR); }
447	| ARG 			{ $$ = op1(ARG, itonp($1)); }
448	| VARNF			{ $$ = op1(VARNF, (Node *) $1); }
449	;
450
451
452while:
453	  WHILE '(' pattern rparen	{ $$ = notnull($3); }
454	;
455
456%%
457
458void setfname(Cell *p)
459{
460	if (isarr(p))
461		SYNTAX("%s is an array, not a function", p->nval);
462	else if (isfcn(p))
463		SYNTAX("you can't define function %s more than once", p->nval);
464	curfname = p->nval;
465}
466
467int constnode(Node *p)
468{
469	return isvalue(p) && ((Cell *) (p->narg[0]))->csub == CCON;
470}
471
472char *strnode(Node *p)
473{
474	return ((Cell *)(p->narg[0]))->sval;
475}
476
477Node *notnull(Node *n)
478{
479	switch (n->nobj) {
480	case LE: case LT: case EQ: case NE: case GT: case GE:
481	case BOR: case AND: case NOT:
482		return n;
483	default:
484		return op2(NE, n, nullnode);
485	}
486}
487
488void checkdup(Node *vl, Cell *cp)	/* check if name already in list */
489{
490	char *s = cp->nval;
491	for ( ; vl; vl = vl->nnext) {
492		if (strcmp(s, ((Cell *)(vl->narg[0]))->nval) == 0) {
493			SYNTAX("duplicate argument %s", s);
494			break;
495		}
496	}
497}