master xplshn/aruu / shared / libutf / mkrunetype.awk
  1# See LICENSE file for copyright and license details.
  2
  3BEGIN {
  4	FS = ";"
  5	# set up hexadecimal lookup table
  6	for(i = 0; i < 16; i++)
  7		hex[sprintf("%X",i)] = i;
  8	HEADER = "/* Automatically generated by mkrunetype.awk */\n#include <stdlib.h>\n\n#include \"../utf.h\"\n#include \"runetype.h\"\n"
  9	HEADER_OTHER = "/* Automatically generated by mkrunetype.awk */\n#include \"../utf.h\"\n#include \"runetype.h\"\n"
 10}
 11
 12$3  ~ /^L/ { alphav[alphac++] = $1; }
 13($3  ~ /^Z/) || ($5 == "WS") || ($5 == "S") || ($5 == "B") { spacev[spacec++] = $1; }
 14$3 == "Cc" { cntrlv[cntrlc++] = $1; }
 15$3 == "Lu" { upperv[upperc++] = $1; tolowerv[uppercc++] = ($14 == "") ? $1 : $14; }
 16$3 == "Ll" { lowerv[lowerc++] = $1; toupperv[lowercc++] = ($13 == "") ? $1 : $13; }
 17$3 == "Lt" { titlev[titlec++] = $1; }
 18$3 == "Nd" { digitv[digitc++] = $1; }
 19
 20END {
 21	system("rm -f isalpharune.c isspacerune.c iscntrlrune.c upperrune.c lowerrune.c istitlerune.c isdigitrune.c");
 22
 23	mkis("alpha", alphav, alphac, "isalpharune.c", q, "");
 24	mkis("space", spacev, spacec, "isspacerune.c", q, "");
 25	mkis("cntrl", cntrlv, cntrlc, "iscntrlrune.c", q, "");
 26	mkis("upper", upperv, upperc,   "upperrune.c", tolowerv, "lower");
 27	mkis("lower", lowerv, lowerc,   "lowerrune.c", toupperv, "upper");
 28	mkis("title", titlev, titlec, "istitlerune.c", q, "");
 29	mkis("digit", digitv, digitc, "isdigitrune.c", q, "");
 30
 31	system("rm -f isalnumrune.c isblankrune.c isprintrune.c isgraphrune.c ispunctrune.c isxdigitrune.c");
 32
 33	otheris();
 34}
 35
 36# parse hexadecimal rune index to int
 37function code(s) {
 38	x = 0;
 39	for(i = 1; i <= length(s); i++) {
 40		c = substr(s, i, 1);
 41		x = (x*16) + hex[c];
 42	}
 43	return x;
 44}
 45
 46# generate 'is<name>rune' unicode lookup function
 47function mkis(name, runev, runec, file, casev, casename) {
 48	rune1c = 0;
 49	rune2c = 0;
 50	rune3c = 0;
 51	rune4c = 0;
 52	mode = 1;
 53
 54	#sort rune groups into singletons, ranges and laces
 55	for(j = 0; j < runec; j++) {
 56		# range
 57		if(code(runev[j+1]) == code(runev[j])+1 && ((length(casev) == 0) ||
 58		   code(casev[j+1]) == code(casev[j])+1) && j+1 < runec) {
 59			if (mode == 2) {
 60				continue;
 61			} else if (mode == 3) {
 62				rune3v1[rune3c] = runev[j];
 63				rune3c++;
 64			} else if (mode == 4) {
 65				rune4v1[rune4c] = runev[j];
 66				rune4c++;
 67			}
 68			mode = 2;
 69			rune2v0[rune2c] = runev[j];
 70			if(length(casev) > 0) {
 71				case2v[rune2c] = casev[j];
 72			}
 73			continue;
 74		}
 75		# lace 1
 76		if(code(runev[j+1]) == code(runev[j])+2 && ((length(casev) == 0) ||
 77		   (code(casev[j+1]) == code(runev[j+1])+1 && code(casev[j]) == code(runev[j])+1)) &&
 78		   j+1 < runec) {
 79			if (mode == 3) {
 80				continue;
 81			} else if (mode == 2) {
 82				rune2v1[rune2c] = runev[j];
 83				rune2c++;
 84			} else if (mode == 4) {
 85				rune4v1[rune2c] = runev[j];
 86				rune4c++;
 87			}
 88			mode = 3;
 89			rune3v0[rune3c] = runev[j];
 90			continue;
 91		}
 92		# lace 2
 93		if(code(runev[j+1]) == code(runev[j])+2 && ((length(casev) == 0) ||
 94		   (code(casev[j+1]) == code(runev[j+1])-1 && code(casev[j]) == code(runev[j])-1)) &&
 95		   j+1 < runec) {
 96			if (mode == 4) {
 97				continue;
 98			} else if (mode == 2) {
 99				rune2v1[rune2c] = runev[j];
100				rune2c++;
101			} else if (mode == 3) {
102				rune3v1[rune2c] = runev[j];
103				rune3c++;
104			}
105			mode = 4;
106			rune4v0[rune4c] = runev[j];
107			continue;
108		}
109		# terminating case
110		if (mode == 1) {
111			rune1v[rune1c] = runev[j];
112			if (length(casev) > 0) {
113				case1v[rune1c] = casev[j];
114			}
115			rune1c++;
116		} else if (mode == 2) {
117			rune2v1[rune2c] = runev[j];
118			rune2c++;
119		} else if (mode == 3) {
120			rune3v1[rune3c] = runev[j];
121			rune3c++;
122		} else { #lace 2
123			rune4v1[rune4c] = runev[j];
124			rune4c++;
125		}
126		mode = 1;
127	}
128	print HEADER > file;
129
130	#generate list of laces 1
131	if(rune3c > 0) {
132		print "static const Rune "name"3[][2] = {" > file;
133		for(j = 0; j < rune3c; j++) {
134			print "\t{ 0x"rune3v0[j]", 0x"rune3v1[j]" }," > file;
135		}
136		print "};\n" > file;
137	}
138
139	#generate list of laces 2
140	if(rune4c > 0) {
141		print "static const Rune "name"4[][2] = {" > file;
142		for(j = 0; j < rune4c; j++) {
143			print "\t{ 0x"rune4v0[j]", 0x"rune4v1[j]" }," > file;
144		}
145		print "};\n" > file;
146	}
147
148	# generate list of ranges
149	if(rune2c > 0) {
150		if(length(casev) > 0) {
151			print "static const Rune "name"2[][3] = {" > file;
152			for(j = 0; j < rune2c; j++) {
153				print "\t{ 0x"rune2v0[j]", 0x"rune2v1[j]", 0x"case2v[j]" }," > file;
154			}
155		} else {
156			print "static const Rune "name"2[][2] = {" > file
157			for(j = 0; j < rune2c; j++) {
158				print "\t{ 0x"rune2v0[j]", 0x"rune2v1[j]" }," > file;
159			}
160		}
161		print "};\n" > file;
162	}
163
164	# generate list of singletons
165	if(rune1c > 0) {
166		if(length(casev) > 0) {
167			print "static const Rune "name"1[][2] = {" > file;
168			for(j = 0; j < rune1c; j++) {
169				print "\t{ 0x"rune1v[j]", 0x"case1v[j]" }," > file;
170			}
171		} else {
172			print "static const Rune "name"1[] = {" > file;
173			for(j = 0; j < rune1c; j++) {
174				print "\t0x"rune1v[j]"," > file;
175			}
176		}
177		print "};\n" > file;
178	}
179	# generate lookup function
180	print "int\nis"name"rune(Rune r)\n{" > file;
181	if(rune4c > 0 || rune3c > 0)
182		print "\tconst Rune *match;\n" > file;
183	if(rune4c > 0) {
184		print "\tif((match = bsearch(&r, "name"4, nelem("name"4), sizeof *"name"4, &rune2cmp)))" > file;
185		print "\t\treturn !((r - match[0]) % 2);" > file;
186	}
187	if(rune3c > 0) {
188		print "\tif((match = bsearch(&r, "name"3, nelem("name"3), sizeof *"name"3, &rune2cmp)))" > file;
189		print "\t\treturn !((r - match[0]) % 2);" > file;
190	}
191	if(rune2c > 0) {
192		print "\tif(bsearch(&r, "name"2, nelem("name"2), sizeof *"name"2, &rune2cmp))\n\t\treturn 1;" > file;
193	}
194	if(rune1c > 0) {
195		print "\tif(bsearch(&r, "name"1, nelem("name"1), sizeof *"name"1, &rune1cmp))\n\t\treturn 1;" > file;
196	}
197	print "\treturn 0;\n}" > file;
198
199	# generate case conversion function
200	if(length(casev) > 0) {
201		print "\nint\nto"casename"rune(Rune r)\n{\n\tRune *match;\n" > file;
202		if(rune4c > 0) {
203			print "\tmatch = bsearch(&r, "name"4, nelem("name"4), sizeof *"name"4, &rune2cmp);" > file;
204			print "\tif (match)" > file;
205			print "\t\treturn ((r - match[0]) % 2) ? r : r - 1;" > file;
206		}
207		if(rune3c > 0) {
208			print "\tmatch = bsearch(&r, "name"3, nelem("name"3), sizeof *"name"3, &rune2cmp);" > file;
209			print "\tif (match)" > file;
210			print "\t\treturn ((r - match[0]) % 2) ? r : r + 1;" > file;
211		}
212		if(rune2c > 0) {
213			print "\tmatch = bsearch(&r, "name"2, nelem("name"2), sizeof *"name"2, &rune2cmp);" > file;
214			print "\tif (match)" > file;
215			print "\t\treturn match[2] + (r - match[0]);" > file;
216		}
217		if(rune1c > 0) {
218			print "\tmatch = bsearch(&r, "name"1, nelem("name"1), sizeof *"name"1, &rune1cmp);" > file;
219			print "\tif (match)" > file;
220			print "\t\treturn match[1];" > file;
221		}
222		print "\treturn r;\n}" > file;
223	}
224}
225
226function otheris() {
227	print HEADER_OTHER > "isalnumrune.c";
228	print "int\nisalnumrune(Rune r)\n{\n\treturn isalpharune(r) || isdigitrune(r);\n}" > "isalnumrune.c";
229	print HEADER_OTHER > "isblankrune.c";
230	print "int\nisblankrune(Rune r)\n{\n\treturn r == ' ' || r == '\\t';\n}" > "isblankrune.c";
231	print HEADER_OTHER > "isprintrune.c";
232	print "int\nisprintrune(Rune r)\n{\n\treturn !iscntrlrune(r) && (r != 0x2028) && (r != 0x2029) &&" > "isprintrune.c";
233	print "\t       ((r < 0xFFF9) || (r > 0xFFFB));\n}" > "isprintrune.c";
234	print HEADER_OTHER > "isgraphrune.c";
235	print "int\nisgraphrune(Rune r)\n{\n\treturn !isspacerune(r) && isprintrune(r);\n}" > "isgraphrune.c";
236	print HEADER_OTHER > "ispunctrune.c";
237	print "int\nispunctrune(Rune r)\n{\n\treturn isgraphrune(r) && !isalnumrune(r);\n}" > "ispunctrune.c";
238	print HEADER_OTHER > "isxdigitrune.c";
239	print "int\nisxdigitrune(Rune r)\n{\n\treturn (r >= '0' && (r - '0') < 10) || (r >= 'a' && (r - 'a') < 6);\n}" > "isxdigitrune.c";
240}