commit 9faf0d2
pascalecu
·
2026-02-19 21:21:11 +0000 UTC
parent 67d10f2
reformat everything
93 files changed,
+24395,
-18432
+64,
-60
1@@ -33,8 +33,8 @@
2 #include <stdlib.h>
3 #include <string.h>
4 #include <sys/mman.h>
5-#include <sys/types.h>
6 #include <sys/stat.h>
7+#include <sys/types.h>
8
9 #define min(a, b) ((a) < (b) ? (a) : (b))
10 #define max(a, b) ((a) > (b) ? (a) : (b))
11@@ -53,7 +53,7 @@ struct glyph {
12 static struct {
13 int count;
14 struct glyph *glyphs;
15-} extracted_font = { 0, NULL };
16+} extracted_font = {0, NULL};
17
18 #define PCF_PROPERTIES (1 << 0)
19 #define PCF_ACCELERATORS (1 << 1)
20@@ -136,10 +136,8 @@ handle_compressed_metrics(int32_t count, struct compressed_metrics *m)
21 int i;
22 for (i = 0; i < count; ++i) {
23 struct glyph *glyph = &extracted_font.glyphs[i];
24- glyph->left_bearing =
25- ((int16_t)m[i].left_sided_bearing) - 0x80;
26- glyph->right_bearing =
27- ((int16_t)m[i].right_side_bearing) - 0x80;
28+ glyph->left_bearing = ((int16_t)m[i].left_sided_bearing) - 0x80;
29+ glyph->right_bearing = ((int16_t)m[i].right_side_bearing) - 0x80;
30 glyph->width = ((int16_t)m[i].character_width) - 0x80;
31 glyph->ascent = ((int16_t)m[i].character_ascent) - 0x80;
32 glyph->descent = ((int16_t)m[i].character_descent) - 0x80;
33@@ -161,9 +159,8 @@ handle_metrics(void *metricbuf)
34 if ((metrics->format & PCF_FORMAT_MASK) == PCF_DEFAULT_FORMAT) {
35 fprintf(stderr, "todo...\n");
36 } else if ((metrics->format & PCF_FORMAT_MASK) == PCF_COMPRESSED_METRICS) {
37- handle_compressed_metrics(
38- metrics->compressed.count,
39- &metrics->compressed.compressed_metrics[0]);
40+ handle_compressed_metrics(metrics->compressed.count,
41+ &metrics->compressed.compressed_metrics[0]);
42 } else {
43 fprintf(stderr, "incompatible format\n");
44 abort();
45@@ -181,8 +178,8 @@ handle_glyph_names(struct glyph_names *names)
46
47 fprintf(stderr, "glyph names format %x\n", names->format);
48
49- char *names_start = ((char *)names) + sizeof(struct glyph_names)
50- + (names->glyph_count + 1) * sizeof(int32_t);
51+ char *names_start = ((char *)names) + sizeof(struct glyph_names) +
52+ (names->glyph_count + 1) * sizeof(int32_t);
53
54 int i;
55 for (i = 0; i < names->glyph_count; ++i) {
56@@ -210,8 +207,8 @@ handle_bitmaps(struct bitmaps *bitmaps)
57 abort();
58 }
59
60- char *bitmaps_start = ((char *)bitmaps) + sizeof(struct bitmaps)
61- + (bitmaps->glyph_count + 4) * sizeof(int32_t);
62+ char *bitmaps_start = ((char *)bitmaps) + sizeof(struct bitmaps) +
63+ (bitmaps->glyph_count + 4) * sizeof(int32_t);
64
65 for (unsigned i = 0; i < bitmaps->glyph_count; ++i) {
66 int32_t offset = bitmaps->offsets[i];
67@@ -247,8 +244,9 @@ get_glyph_pixel(struct glyph *glyph, int x, int y)
68 int absx = glyph->hotx + x;
69 int absy = glyph->hoty + y;
70
71- if (absx < 0 || absx >= glyph->width || absy < 0 || absy >= glyph->height)
72+ if (absx < 0 || absx >= glyph->width || absy < 0 || absy >= glyph->height) {
73 return 0;
74+ }
75
76 int stride = (glyph->width + 31) / 32 * 4;
77 unsigned char block = glyph->data[absy * stride + (absx / 8)];
78@@ -275,8 +273,7 @@ add_pixel(uint32_t pixel)
79 if (data_buffer.size == data_buffer.capacity) {
80 data_buffer.capacity *= 2;
81 data_buffer.data =
82- realloc(data_buffer.data,
83- sizeof(uint32_t) * data_buffer.capacity);
84+ realloc(data_buffer.data, sizeof(uint32_t) * data_buffer.capacity);
85 }
86 data_buffer.data[data_buffer.size++] = pixel;
87 }
88@@ -296,8 +293,7 @@ reconstruct_glyph(struct glyph *cursor, struct glyph *mask, char *name,
89 int maxx = max(cursor->right_bearing, mask->right_bearing);
90
91 int miny = min(-cursor->hoty, -mask->hoty);
92- int maxy = max(cursor->height - cursor->hoty,
93- mask->height - mask->hoty);
94+ int maxy = max(cursor->height - cursor->hoty, mask->height - mask->hoty);
95
96 int width = maxx - minx;
97 int height = maxy - miny;
98@@ -315,10 +311,11 @@ reconstruct_glyph(struct glyph *cursor, struct glyph *mask, char *name,
99 char alpha = get_glyph_pixel(mask, x, y);
100 if (alpha) {
101 char color = get_glyph_pixel(cursor, x, y);
102- if (color)
103+ if (color) {
104 add_pixel(0xff000000);
105- else
106+ } else {
107 add_pixel(0xffffffff);
108+ }
109 } else {
110 add_pixel(0);
111 }
112@@ -326,25 +323,33 @@ reconstruct_glyph(struct glyph *cursor, struct glyph *mask, char *name,
113 }
114 }
115
116-/* From http://cgit.freedesktop.org/xorg/lib/libXfont/tree/src/builtins/fonts.c */
117+/* From http://cgit.freedesktop.org/xorg/lib/libXfont/tree/src/builtins/fonts.c
118+ */
119 static const char cursor_licence[] =
120 "/*\n"
121 "* Copyright 1999 SuSE, Inc.\n"
122 "*\n"
123- "* Permission to use, copy, modify, distribute, and sell this software and its\n"
124- "* documentation for any purpose is hereby granted without fee, provided that\n"
125+ "* Permission to use, copy, modify, distribute, and sell this software and "
126+ "its\n"
127+ "* documentation for any purpose is hereby granted without fee, provided "
128+ "that\n"
129 "* the above copyright notice appear in all copies and that both that\n"
130 "* copyright notice and this permission notice appear in supporting\n"
131 "* documentation, and that the name of SuSE not be used in advertising or\n"
132 "* publicity pertaining to distribution of the software without specific,\n"
133 "* written prior permission. SuSE makes no representations about the\n"
134- "* suitability of this software for any purpose. It is provided \"as is\"\n"
135+ "* suitability of this software for any purpose. It is provided \"as "
136+ "is\"\n"
137 "* without express or implied warranty.\n"
138 "*\n"
139- "* SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL\n"
140- "* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE\n"
141- "* BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES\n"
142- "* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION\n"
143+ "* SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING "
144+ "ALL\n"
145+ "* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL "
146+ "SuSE\n"
147+ "* BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY "
148+ "DAMAGES\n"
149+ "* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN "
150+ "ACTION\n"
151 "* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN\n"
152 "* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.\n"
153 "*\n"
154@@ -368,16 +373,18 @@ write_output_file(FILE *file, struct reconstructed_glyph *glyphs, int n)
155
156 for (j = 0; j < size; ++j) {
157 fprintf(file, "0x%08x, ", data[j]);
158- if (++counter % 6 == 0)
159+ if (++counter % 6 == 0) {
160 fprintf(file, "\n\t");
161+ }
162 }
163 }
164 fprintf(file, "\n};\n\n");
165
166 fputs("enum cursor_type {\n", file);
167
168- for (i = 0; i < n; ++i)
169+ for (i = 0; i < n; ++i) {
170 fprintf(file, "\tcursor_%s,\n", glyphs[i].name);
171+ }
172
173 fputs("};\n\n", file);
174
175@@ -388,11 +395,11 @@ write_output_file(FILE *file, struct reconstructed_glyph *glyphs, int n)
176 "\tsize_t offset;\n"
177 "} cursor_metadata[] = {\n");
178
179- for (i = 0; i < n; ++i)
180- fprintf(file, "\t{ %d, %d, %d, %d, %zu }, /* %s */\n",
181- glyphs[i].width, glyphs[i].height,
182- glyphs[i].hotspot_x, glyphs[i].hotspot_y,
183+ for (i = 0; i < n; ++i) {
184+ fprintf(file, "\t{ %d, %d, %d, %d, %zu }, /* %s */\n", glyphs[i].width,
185+ glyphs[i].height, glyphs[i].hotspot_x, glyphs[i].hotspot_y,
186 glyphs[i].offset, glyphs[i].name);
187+ }
188
189 fputs("};\n", file);
190 }
191@@ -408,7 +415,8 @@ find_mask_glyph(char *name)
192 for (i = 0; i < extracted_font.count; ++i) {
193 struct glyph *g = &extracted_font.glyphs[i];
194 int l2 = strlen(g->name);
195- if ((l2 == len + masklen) && (memcmp(g->name, name, len) == 0) && (memcmp(g->name + len, mask, masklen) == 0)) {
196+ if ((l2 == len + masklen) && (memcmp(g->name, name, len) == 0) &&
197+ (memcmp(g->name + len, mask, masklen) == 0)) {
198 return g;
199 }
200 }
201@@ -428,43 +436,41 @@ find_cursor_and_mask(const char *name,
202
203 for (i = 0; i < extracted_font.count && (!*mask || !*cursor); ++i) {
204 struct glyph *g = &extracted_font.glyphs[i];
205- if (!strcmp(name, g->name))
206+ if (!strcmp(name, g->name)) {
207 *cursor = g;
208- else if (!strcmp(mask_name, g->name))
209+ } else if (!strcmp(mask_name, g->name)) {
210 *mask = g;
211+ }
212 }
213 }
214
215 static struct {
216 char *target_name, *source_name;
217-} interesting_cursors[] = {
218- { "bottom_left_corner", "bottom_left_corner" },
219- { "bottom_right_corner", "bottom_right_corner" },
220- { "bottom_side", "bottom_side" },
221- { "grabbing", "fleur" },
222- { "left_ptr", "left_ptr" },
223- { "left_side", "left_side" },
224- { "right_side", "right_side" },
225- { "top_left_corner", "top_left_corner" },
226- { "top_right_corner", "top_right_corner" },
227- { "top_side", "top_side" },
228- { "xterm", "xterm" },
229- { "hand1", "hand1" },
230- { "watch", "watch" }
231-};
232+} interesting_cursors[] = {{"bottom_left_corner", "bottom_left_corner"},
233+ {"bottom_right_corner", "bottom_right_corner"},
234+ {"bottom_side", "bottom_side"},
235+ {"grabbing", "fleur"},
236+ {"left_ptr", "left_ptr"},
237+ {"left_side", "left_side"},
238+ {"right_side", "right_side"},
239+ {"top_left_corner", "top_left_corner"},
240+ {"top_right_corner", "top_right_corner"},
241+ {"top_side", "top_side"},
242+ {"xterm", "xterm"},
243+ {"hand1", "hand1"},
244+ {"watch", "watch"}};
245
246 static void
247 output_interesting_cursors(FILE *file)
248 {
249 int i;
250 int n = sizeof(interesting_cursors) / sizeof(interesting_cursors[0]);
251- struct reconstructed_glyph *glyphs =
252- malloc(n * sizeof(*glyphs));
253+ struct reconstructed_glyph *glyphs = malloc(n * sizeof(*glyphs));
254
255 for (i = 0; i < n; ++i) {
256 struct glyph *cursor, *mask;
257- find_cursor_and_mask(interesting_cursors[i].source_name,
258- &cursor, &mask);
259+ find_cursor_and_mask(interesting_cursors[i].source_name, &cursor,
260+ &mask);
261 if (!cursor) {
262 fprintf(stderr, "no cursor for %s\n",
263 interesting_cursors[i].source_name);
264@@ -475,8 +481,7 @@ output_interesting_cursors(FILE *file)
265 interesting_cursors[i].source_name);
266 abort();
267 }
268- reconstruct_glyph(cursor, mask,
269- interesting_cursors[i].target_name,
270+ reconstruct_glyph(cursor, mask, interesting_cursors[i].target_name,
271 &glyphs[i]);
272 }
273
274@@ -496,8 +501,7 @@ main(int argc, char *argv[])
275
276 fstat(fd, &filestat);
277
278- void *fontbuf = mmap(NULL, filestat.st_size, PROT_READ,
279- MAP_PRIVATE, fd, 0);
280+ void *fontbuf = mmap(NULL, filestat.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
281
282 handle_pcf(fontbuf);
283
+45,
-36
1@@ -39,8 +39,8 @@ struct window {
2 struct wl_list link;
3 };
4
5-static const char *terminal_command[] = { "st-wl", NULL };
6-static const char *dmenu_command[] = { "dmenu_run-wl", NULL };
7+static const char *terminal_command[] = {"st-wl", NULL};
8+static const char *dmenu_command[] = {"dmenu_run-wl", NULL};
9 static const uint32_t border_width = 1;
10 static const uint32_t border_color_active = 0xff333388;
11 static const uint32_t border_color_normal = 0xff888888;
12@@ -60,27 +60,29 @@ arrange(struct screen *screen)
13 struct swc_rectangle geometry;
14 struct swc_rectangle *screen_geometry = &screen->swc->usable_geometry;
15
16- if (screen->num_windows == 0)
17+ if (screen->num_windows == 0) {
18 return;
19+ }
20
21 num_columns = ceil(sqrt(screen->num_windows));
22 num_rows = screen->num_windows / num_columns + 1;
23 window = wl_container_of(screen->windows.next, window, link);
24
25 for (column_index = 0; &window->link != &screen->windows; ++column_index) {
26- geometry.x = screen_geometry->x + border_width
27- + screen_geometry->width * column_index / num_columns;
28- geometry.width = screen_geometry->width / num_columns
29- - 2 * border_width;
30+ geometry.x = screen_geometry->x + border_width +
31+ screen_geometry->width * column_index / num_columns;
32+ geometry.width =
33+ screen_geometry->width / num_columns - 2 * border_width;
34
35- if (column_index == screen->num_windows % num_columns)
36+ if (column_index == screen->num_windows % num_columns) {
37 --num_rows;
38+ }
39
40 for (row_index = 0; row_index < num_rows; ++row_index) {
41- geometry.y = screen_geometry->y + border_width
42- + screen_geometry->height * row_index / num_rows;
43- geometry.height = screen_geometry->height / num_rows
44- - 2 * border_width;
45+ geometry.y = screen_geometry->y + border_width +
46+ screen_geometry->height * row_index / num_rows;
47+ geometry.height =
48+ screen_geometry->height / num_rows - 2 * border_width;
49
50 swc_window_set_geometry(window->swc, &geometry);
51 window = wl_container_of(window->link.next, window, link);
52@@ -112,15 +114,16 @@ static void
53 focus(struct window *window)
54 {
55 if (focused_window) {
56- swc_window_set_border(focused_window->swc,
57- border_color_normal, border_width);
58+ swc_window_set_border(focused_window->swc, border_color_normal,
59+ border_width);
60 }
61
62 if (window) {
63 swc_window_set_border(window->swc, border_color_active, border_width);
64 swc_window_focus(window->swc);
65- } else
66+ } else {
67 swc_window_focus(NULL);
68+ }
69
70 focused_window = window;
71 }
72@@ -145,8 +148,8 @@ screen_entered(void *data)
73 }
74
75 static const struct swc_screen_handler screen_handler = {
76- .usable_geometry_changed = &screen_usable_geometry_changed,
77- .entered = &screen_entered,
78+ .usable_geometry_changed = &screen_usable_geometry_changed,
79+ .entered = &screen_entered,
80 };
81
82 static void
83@@ -159,11 +162,11 @@ window_destroy(void *data)
84 next_focus = wl_container_of(window->link.next, window, link);
85
86 if (&next_focus->link == &window->screen->windows) {
87- next_focus = wl_container_of(window->link.prev,
88- window, link);
89+ next_focus = wl_container_of(window->link.prev, window, link);
90
91- if (&next_focus->link == &window->screen->windows)
92+ if (&next_focus->link == &window->screen->windows) {
93 next_focus = NULL;
94+ }
95 }
96
97 focus(next_focus);
98@@ -182,8 +185,8 @@ window_entered(void *data)
99 }
100
101 static const struct swc_window_handler window_handler = {
102- .destroy = &window_destroy,
103- .entered = &window_entered,
104+ .destroy = &window_destroy,
105+ .entered = &window_entered,
106 };
107
108 static void
109@@ -193,8 +196,9 @@ new_screen(struct swc_screen *swc)
110
111 screen = malloc(sizeof(*screen));
112
113- if (!screen)
114+ if (!screen) {
115 return;
116+ }
117
118 screen->swc = swc;
119 screen->num_windows = 0;
120@@ -210,8 +214,9 @@ new_window(struct swc_window *swc)
121
122 window = malloc(sizeof(*window));
123
124- if (!window)
125+ if (!window) {
126 return;
127+ }
128
129 window->swc = swc;
130 window->screen = NULL;
131@@ -221,15 +226,16 @@ new_window(struct swc_window *swc)
132 focus(window);
133 }
134
135-const struct swc_manager manager = { &new_screen, &new_window };
136+const struct swc_manager manager = {&new_screen, &new_window};
137
138 static void
139 spawn(void *data, uint32_t time, uint32_t value, uint32_t state)
140 {
141 char *const *command = data;
142
143- if (state != WL_KEYBOARD_KEY_STATE_PRESSED)
144+ if (state != WL_KEYBOARD_KEY_STATE_PRESSED) {
145 return;
146+ }
147
148 if (fork() == 0) {
149 execvp(command[0], command);
150@@ -240,8 +246,9 @@ spawn(void *data, uint32_t time, uint32_t value, uint32_t state)
151 static void
152 quit(void *data, uint32_t time, uint32_t value, uint32_t state)
153 {
154- if (state != WL_KEYBOARD_KEY_STATE_PRESSED)
155+ if (state != WL_KEYBOARD_KEY_STATE_PRESSED) {
156 return;
157+ }
158
159 wl_display_terminate(display);
160 }
161@@ -252,23 +259,25 @@ main(int argc, char *argv[])
162 const char *socket;
163
164 display = wl_display_create();
165- if (!display)
166+ if (!display) {
167 return EXIT_FAILURE;
168+ }
169
170 socket = wl_display_add_socket_auto(display);
171- if (!socket)
172+ if (!socket) {
173 return EXIT_FAILURE;
174+ }
175 setenv("WAYLAND_DISPLAY", socket, 1);
176
177- if (!swc_initialize(display, NULL, &manager))
178+ if (!swc_initialize(display, NULL, &manager)) {
179 return EXIT_FAILURE;
180+ }
181
182- swc_add_binding(SWC_BINDING_KEY, SWC_MOD_LOGO, XKB_KEY_Return,
183- &spawn, terminal_command);
184- swc_add_binding(SWC_BINDING_KEY, SWC_MOD_LOGO, XKB_KEY_r,
185- &spawn, dmenu_command);
186- swc_add_binding(SWC_BINDING_KEY, SWC_MOD_LOGO, XKB_KEY_q,
187- &quit, NULL);
188+ swc_add_binding(SWC_BINDING_KEY, SWC_MOD_LOGO, XKB_KEY_Return, &spawn,
189+ terminal_command);
190+ swc_add_binding(SWC_BINDING_KEY, SWC_MOD_LOGO, XKB_KEY_r, &spawn,
191+ dmenu_command);
192+ swc_add_binding(SWC_BINDING_KEY, SWC_MOD_LOGO, XKB_KEY_q, &quit, NULL);
193
194 event_loop = wl_display_get_event_loop(display);
195 wl_display_run(display);
+2,
-2
1@@ -25,9 +25,9 @@
2 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
3 * SOFTWARE.
4 */
5-#include <sys/sysmacros.h>
6-#include <linux/major.h>
7 #include "devmajor.h"
8+#include <linux/major.h>
9+#include <sys/sysmacros.h>
10
11 #ifndef DRM_MAJOR
12 #define DRM_MAJOR 226
+8,
-6
1@@ -20,19 +20,22 @@
2 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
3 * SOFTWARE.
4 */
5-#include <sys/stat.h>
6-#include <stdlib.h>
7 #include "devmajor.h"
8+#include <stdlib.h>
9+#include <sys/stat.h>
10
11 bool
12 device_is_input(dev_t rdev)
13 {
14- if (major(rdev) == getdevmajor("wskbd", S_IFCHR))
15+ if (major(rdev) == getdevmajor("wskbd", S_IFCHR)) {
16 return true;
17- if (major(rdev) == getdevmajor("wsmouse", S_IFCHR))
18+ }
19+ if (major(rdev) == getdevmajor("wsmouse", S_IFCHR)) {
20 return true;
21- if (major(rdev) == getdevmajor("wsmux", S_IFCHR))
22+ }
23+ if (major(rdev) == getdevmajor("wsmux", S_IFCHR)) {
24 return true;
25+ }
26 return false;
27 }
28
29@@ -47,4 +50,3 @@ device_is_drm(dev_t rdev)
30 {
31 return major(rdev) == getdevmajor("drm", S_IFCHR);
32 }
33-
+14,
-11
1@@ -21,10 +21,10 @@
2 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
3 * SOFTWARE.
4 */
5-#include <sys/stat.h>
6+#include "devmajor.h"
7 #include <stdlib.h>
8 #include <string.h>
9-#include "devmajor.h"
10+#include <sys/stat.h>
11
12 static bool
13 devname_is(dev_t rdev, const char *prefix)
14@@ -33,8 +33,9 @@ devname_is(dev_t rdev, const char *prefix)
15 size_t len;
16
17 name = devname(rdev, S_IFCHR);
18- if (!name || name[0] == '?' || name[1] == '?')
19+ if (!name || name[0] == '?' || name[1] == '?') {
20 return false;
21+ }
22
23 len = strlen(prefix);
24 return strncmp(name, prefix, len) == 0;
25@@ -43,12 +44,15 @@ devname_is(dev_t rdev, const char *prefix)
26 bool
27 device_is_input(dev_t rdev)
28 {
29- if (devname_is(rdev, "wskbd"))
30+ if (devname_is(rdev, "wskbd")) {
31 return true;
32- if (devname_is(rdev, "wsmouse"))
33+ }
34+ if (devname_is(rdev, "wsmouse")) {
35 return true;
36- if (devname_is(rdev, "wsmux"))
37+ }
38+ if (devname_is(rdev, "wsmux")) {
39 return true;
40+ }
41 return false;
42 }
43
44@@ -64,11 +68,10 @@ device_is_drm(dev_t rdev)
45 const char *n;
46
47 n = devname(rdev, S_IFCHR);
48- if (!n)
49+ if (!n) {
50 return false;
51+ }
52
53- return
54- strncmp(n, "drm", 3) == 0 ||
55- strncmp(n, "dri/card", 8) == 0 ||
56- strncmp(n, "dri/renderD", 11) == 0;
57+ return strncmp(n, "drm", 3) == 0 || strncmp(n, "dri/card", 8) == 0 ||
58+ strncmp(n, "dri/renderD", 11) == 0;
59 }
+1,
-1
1@@ -24,8 +24,8 @@
2 #ifndef DEVMAJOR_H
3 #define DEVMAJOR_H
4
5-#include <sys/stat.h>
6 #include <stdbool.h>
7+#include <sys/stat.h>
8
9 bool device_is_input(dev_t);
10
+119,
-69
1@@ -25,13 +25,14 @@
2 * SOFTWARE.
3 */
4
5-#include "protocol.h"
6 #include "devmajor.h"
7+#include "protocol.h"
8
9 #include <errno.h>
10 #include <fcntl.h>
11 #include <limits.h>
12 #include <poll.h>
13+#include <signal.h>
14 #include <spawn.h>
15 #include <stdbool.h>
16 #include <stdio.h>
17@@ -39,13 +40,12 @@
18 #include <stdnoreturn.h>
19 #include <string.h>
20 #include <unistd.h>
21-#include <signal.h>
22
23+#include <sys/ioctl.h>
24 #include <sys/socket.h>
25 #include <sys/stat.h>
26-#include <sys/wait.h>
27-#include <sys/ioctl.h>
28 #include <sys/types.h>
29+#include <sys/wait.h>
30 #ifdef __linux__
31 #include <sys/sysmacros.h>
32 #endif
33@@ -69,8 +69,10 @@
34
35 #define ARRAY_LENGTH(array) (sizeof(array) / sizeof(array)[0])
36
37-static void activate(void);
38-static void deactivate(void);
39+static void
40+activate(void);
41+static void
42+deactivate(void);
43
44 static bool nflag;
45 static int sigfd[2], sock[2];
46@@ -86,9 +88,11 @@ static struct {
47 long console_mode;
48 } original_vt_state;
49
50-static void cleanup(void);
51+static void
52+cleanup(void);
53
54-static noreturn void usage(const char *name)
55+static noreturn void
56+usage(const char *name)
57 {
58 fprintf(stderr, "usage: %s [-n] [-t tty] [--] server [args...]\n", name);
59 exit(2);
60@@ -103,8 +107,9 @@ die(const char *format, ...)
61 vfprintf(stderr, format, args);
62 va_end(args);
63
64- if (format[0] && format[strlen(format) - 1] == ':')
65+ if (format[0] && format[strlen(format) - 1] == ':') {
66 fprintf(stderr, " %s", strerror(errno));
67+ }
68 fputc('\n', stderr);
69
70 cleanup();
71@@ -117,8 +122,9 @@ start_devices(void)
72 int i;
73
74 for (i = 0; i < num_drm_fds; ++i) {
75- if (drmSetMaster(drm_fds[i]) < 0)
76+ if (drmSetMaster(drm_fds[i]) < 0) {
77 die("failed to set DRM master");
78+ }
79 }
80 }
81
82@@ -128,13 +134,16 @@ stop_devices(bool fatal)
83 int i;
84
85 for (i = 0; i < num_drm_fds; ++i) {
86- if (drmDropMaster(drm_fds[i]) < 0 && fatal)
87+ if (drmDropMaster(drm_fds[i]) < 0 && fatal) {
88 die("drmDropMaster:");
89+ }
90 }
91 for (i = 0; i < num_input_fds; ++i) {
92 #ifdef EVIOCREVOKE
93- if (ioctl(input_fds[i], EVIOCREVOKE, 0) < 0 && errno != ENODEV && fatal)
94+ if (ioctl(input_fds[i], EVIOCREVOKE, 0) < 0 && errno != ENODEV &&
95+ fatal) {
96 die("ioctl EVIOCREVOKE:");
97+ }
98 #endif
99 close(input_fds[i]);
100 }
101@@ -148,11 +157,12 @@ cleanup(void)
102 struct vt_mode mode = {.mode = VT_AUTO};
103 #endif
104
105- if (!original_vt_state.altered)
106+ if (!original_vt_state.altered) {
107 return;
108+ }
109
110- /* Stop devices before switching the VT to make sure we have released the DRM
111- * device before the next session tries to claim it. */
112+ /* Stop devices before switching the VT to make sure we have released the
113+ * DRM device before the next session tries to claim it. */
114 stop_devices(false);
115
116 /* Cleanup VT */
117@@ -202,19 +212,20 @@ handle_socket_data(int socket)
118 struct swc_launch_event response;
119 char path[PATH_MAX];
120 struct iovec request_iov[2] = {
121- {.iov_base = &request, .iov_len = sizeof(request)},
122- {.iov_base = path, .iov_len = sizeof(path)},
123+ {.iov_base = &request, .iov_len = sizeof(request)},
124+ {.iov_base = path, .iov_len = sizeof(path)},
125 };
126 struct iovec response_iov[1] = {
127- {.iov_base = &response, .iov_len = sizeof(response)},
128+ {.iov_base = &response, .iov_len = sizeof(response)},
129 };
130 int fd = -1;
131 struct stat st;
132 ssize_t size;
133
134 size = receive_fd(socket, &fd, request_iov, 2);
135- if (size == -1 || size == 0 || size < sizeof(request))
136+ if (size == -1 || size == 0 || size < sizeof(request)) {
137 return;
138+ }
139 size -= sizeof(request);
140
141 response.type = SWC_LAUNCH_EVENT_RESPONSE;
142@@ -226,7 +237,8 @@ handle_socket_data(int socket)
143 fprintf(stderr, "path is not NULL terminated\n");
144 goto fail;
145 }
146- if ((request.flags & (O_ACCMODE|O_NONBLOCK|O_CLOEXEC)) != request.flags) {
147+ if ((request.flags & (O_ACCMODE | O_NONBLOCK | O_CLOEXEC)) !=
148+ request.flags) {
149 fprintf(stderr, "invalid open flags\n");
150 goto fail;
151 }
152@@ -242,8 +254,9 @@ handle_socket_data(int socket)
153 }
154
155 if (device_is_input(st.st_rdev)) {
156- if (!active)
157+ if (!active) {
158 goto fail;
159+ }
160 if (num_input_fds == ARRAY_LENGTH(input_fds)) {
161 fprintf(stderr, "too many input devices opened\n");
162 goto fail;
163@@ -261,11 +274,14 @@ handle_socket_data(int socket)
164 }
165 break;
166 case SWC_LAUNCH_REQUEST_ACTIVATE_VT:
167- if (!active)
168+ if (!active) {
169 goto fail;
170+ }
171
172- if (ioctl(tty_fd, VT_ACTIVATE, request.vt) == -1)
173- fprintf(stderr, "failed to activate VT %d: %s\n", request.vt, strerror(errno));
174+ if (ioctl(tty_fd, VT_ACTIVATE, request.vt) == -1) {
175+ fprintf(stderr, "failed to activate VT %d: %s\n", request.vt,
176+ strerror(errno));
177+ }
178 break;
179 default:
180 fprintf(stderr, "unknown request %u\n", request.type);
181@@ -277,8 +293,9 @@ handle_socket_data(int socket)
182
183 fail:
184 response.success = false;
185- if (fd != -1)
186+ if (fd != -1) {
187 close(fd);
188+ }
189 fd = -1;
190 done:
191 send_fd(socket, fd, response_iov, 1);
192@@ -288,33 +305,41 @@ static void
193 find_vt(char *vt, size_t size)
194 {
195 #if defined(__NetBSD__)
196- if (snprintf(vt, size, "/dev/ttyE1") >= size)
197+ if (snprintf(vt, size, "/dev/ttyE1") >= size) {
198 die("VT number is too large");
199+ }
200 #elif defined(__OpenBSD__)
201 const char *tty;
202 tty = ttyname(STDIN_FILENO);
203- if (!tty || strncmp(tty, "/dev/ttyC", 8) != 0)
204+ if (!tty || strncmp(tty, "/dev/ttyC", 8) != 0) {
205 die("must be run from wscons VT (/dev/ttyC*)");
206- if (snprintf(vt, size, "%s", tty) >= size)
207+ }
208+ if (snprintf(vt, size, "%s", tty) >= size) {
209 die("VT number is too large");
210+ }
211 #else
212 char *vtnr;
213 int tty0_fd, vt_num;
214
215- /* If we are running from an existing X or wayland session, always open a new
216- * VT instead of using the current one. */
217- if (getenv("DISPLAY") || getenv("WAYLAND_DISPLAY") || !(vtnr = getenv("XDG_VTNR"))) {
218+ /* If we are running from an existing X or wayland session, always open a
219+ * new VT instead of using the current one. */
220+ if (getenv("DISPLAY") || getenv("WAYLAND_DISPLAY") ||
221+ !(vtnr = getenv("XDG_VTNR"))) {
222 tty0_fd = open("/dev/tty0", O_RDWR);
223- if (tty0_fd == -1)
224+ if (tty0_fd == -1) {
225 die("open /dev/tty0:");
226- if (ioctl(tty0_fd, VT_OPENQRY, &vt_num) != 0)
227+ }
228+ if (ioctl(tty0_fd, VT_OPENQRY, &vt_num) != 0) {
229 die("VT open query failed:");
230+ }
231 close(tty0_fd);
232- if (snprintf(vt, size, "/dev/tty%d", vt_num) >= size)
233+ if (snprintf(vt, size, "/dev/tty%d", vt_num) >= size) {
234 die("VT number is too large");
235+ }
236 } else {
237- if (snprintf(vt, size, "/dev/tty%s", vtnr) >= size)
238+ if (snprintf(vt, size, "/dev/tty%s", vtnr) >= size) {
239 die("XDG_VTNR is too long");
240+ }
241 }
242 #endif
243 }
244@@ -326,12 +351,15 @@ open_tty(const char *tty_name)
245 int fd;
246
247 /* Check if we are already running on the desired VT */
248- if ((stdin_tty = ttyname(STDIN_FILENO)) && strcmp(tty_name, stdin_tty) == 0)
249+ if ((stdin_tty = ttyname(STDIN_FILENO)) &&
250+ strcmp(tty_name, stdin_tty) == 0) {
251 return STDIN_FILENO;
252+ }
253
254 fd = open(tty_name, O_RDWR | O_NOCTTY);
255- if (fd < 0)
256+ if (fd < 0) {
257 die("open %s:", tty_name);
258+ }
259
260 return fd;
261 }
262@@ -344,29 +372,30 @@ setup_tty(int fd)
263 #ifndef __OpenBSD__
264 struct vt_stat state;
265 struct vt_mode mode = {
266- .mode = VT_PROCESS,
267- .relsig = SIGUSR1,
268- .acqsig = SIGUSR2
269- };
270+ .mode = VT_PROCESS, .relsig = SIGUSR1, .acqsig = SIGUSR2};
271 #endif
272
273- if (fstat(fd, &st) == -1)
274+ if (fstat(fd, &st) == -1) {
275 die("failed to stat TTY fd:");
276+ }
277 vt = minor(st.st_rdev);
278
279 #ifdef __OpenBSD__
280- if (!device_is_tty(st.st_rdev))
281+ if (!device_is_tty(st.st_rdev)) {
282 die("not a valid VT");
283+ }
284 #else
285- if (!device_is_tty(st.st_rdev) || vt == 0)
286+ if (!device_is_tty(st.st_rdev) || vt == 0) {
287 die("not a valid VT");
288+ }
289 #endif
290
291 #ifdef __OpenBSD__
292 /* OpenBSD wscons has no VT_GETSTATE */
293 #else
294- if (ioctl(fd, VT_GETSTATE, &state) == -1)
295+ if (ioctl(fd, VT_GETSTATE, &state) == -1) {
296 die("failed to get the current VT state:");
297+ }
298 #endif
299
300 #ifndef __OpenBSD__
301@@ -376,18 +405,21 @@ setup_tty(int fd)
302 #endif
303
304 #ifdef KDGETMODE
305- if (ioctl(fd, KDGKBMODE, &original_vt_state.kb_mode))
306+ if (ioctl(fd, KDGKBMODE, &original_vt_state.kb_mode)) {
307 die("failed to get keyboard mode:");
308- if (ioctl(fd, KDGETMODE, &original_vt_state.console_mode))
309+ }
310+ if (ioctl(fd, KDGETMODE, &original_vt_state.console_mode)) {
311 die("failed to get console mode:");
312+ }
313 #else
314 original_vt_state.kb_mode = K_XLATE;
315 original_vt_state.console_mode = KD_TEXT;
316 #endif
317
318 #ifdef K_OFF
319- if (ioctl(fd, KDSKBMODE, K_OFF) == -1)
320+ if (ioctl(fd, KDSKBMODE, K_OFF) == -1) {
321 die("failed to set keyboard mode to K_OFF:");
322+ }
323 #endif
324 if (ioctl(fd, KDSETMODE, KD_GRAPHICS) == -1) {
325 perror("KDSETMODE KD_GRAPHICS");
326@@ -445,7 +477,7 @@ setup_tty(int fd)
327
328 #ifndef __OpenBSD__
329 error2:
330- mode = (struct vt_mode){.mode = VT_AUTO };
331+ mode = (struct vt_mode){.mode = VT_AUTO};
332 ioctl(fd, VT_SETMODE, &mode);
333 error1:
334 ioctl(fd, KDSKBMODE, original_vt_state.kb_mode);
335@@ -456,25 +488,29 @@ error0:
336 }
337
338 static void
339-run(int fd) {
340+run(int fd)
341+{
342 struct pollfd fds[] = {
343- {.fd = fd, .events = POLLIN},
344- {.fd = sigfd[0], .events = POLLIN},
345+ {.fd = fd, .events = POLLIN},
346+ {.fd = sigfd[0], .events = POLLIN},
347 };
348 int status;
349 char sig;
350
351 for (;;) {
352 if (poll(fds, ARRAY_LENGTH(fds), -1) < 0) {
353- if (errno == EINTR)
354+ if (errno == EINTR) {
355 continue;
356+ }
357 die("poll:");
358 }
359- if (fds[0].revents)
360+ if (fds[0].revents) {
361 handle_socket_data(fd);
362+ }
363 if (fds[1].revents) {
364- if (read(sigfd[0], &sig, 1) <= 0)
365+ if (read(sigfd[0], &sig, 1) <= 0) {
366 continue;
367+ }
368 switch (sig) {
369 case SIGCHLD:
370 wait(&status);
371@@ -500,8 +536,8 @@ main(int argc, char *argv[])
372 int option;
373 char *vt = NULL, buf[64];
374 struct sigaction action = {
375- .sa_handler = handle_signal,
376- .sa_flags = SA_RESTART,
377+ .sa_handler = handle_signal,
378+ .sa_flags = SA_RESTART,
379 };
380 sigset_t set;
381 pid_t pid;
382@@ -520,22 +556,29 @@ main(int argc, char *argv[])
383 }
384 }
385
386- if (argc - optind < 1)
387+ if (argc - optind < 1) {
388 usage(argv[0]);
389+ }
390
391- if (socketpair(AF_LOCAL, SOCK_SEQPACKET, 0, sock) == -1)
392+ if (socketpair(AF_LOCAL, SOCK_SEQPACKET, 0, sock) == -1) {
393 die("socketpair:");
394- if (fcntl(sock[0], F_SETFD, FD_CLOEXEC) == -1)
395+ }
396+ if (fcntl(sock[0], F_SETFD, FD_CLOEXEC) == -1) {
397 die("failed set CLOEXEC on socket:");
398+ }
399
400- if (pipe2(sigfd, O_CLOEXEC) == -1)
401+ if (pipe2(sigfd, O_CLOEXEC) == -1) {
402 die("pipe:");
403- if (sigaction(SIGCHLD, &action, NULL) == -1)
404+ }
405+ if (sigaction(SIGCHLD, &action, NULL) == -1) {
406 die("sigaction SIGCHLD:");
407- if (sigaction(SIGUSR1, &action, NULL) == -1)
408+ }
409+ if (sigaction(SIGUSR1, &action, NULL) == -1) {
410 die("sigaction SIGUSR1:");
411- if (sigaction(SIGUSR2, &action, NULL) == -1)
412+ }
413+ if (sigaction(SIGUSR2, &action, NULL) == -1) {
414 die("sigaction SIGUSR2:");
415+ }
416
417 sigfillset(&set);
418 sigdelset(&set, SIGCHLD);
419@@ -559,21 +602,28 @@ main(int argc, char *argv[])
420 if (!getenv("XDG_RUNTIME_DIR")) {
421 uid_t uid = getuid();
422 snprintf(buf, sizeof(buf), "/tmp/XDG_RUNTIME_DIR_%d", uid);
423- if (mkdir(buf, 0700) == -1 && errno != EEXIST)
424+ if (mkdir(buf, 0700) == -1 && errno != EEXIST) {
425 die("mkdir %s:", buf);
426+ }
427 setenv("XDG_RUNTIME_DIR", buf, 1);
428 fprintf(stderr, "set XDG_RUNTIME_DIR=%s\n", buf);
429 }
430
431- if ((errno = posix_spawnattr_init(&attr)))
432+ if ((errno = posix_spawnattr_init(&attr))) {
433 die("posix_spawnattr_init:");
434- if ((errno = posix_spawnattr_setflags(&attr, POSIX_SPAWN_RESETIDS|POSIX_SPAWN_SETSIGMASK)))
435+ }
436+ if ((errno = posix_spawnattr_setflags(&attr, POSIX_SPAWN_RESETIDS |
437+ POSIX_SPAWN_SETSIGMASK))) {
438 die("posix_spawnattr_setflags:");
439+ }
440 sigemptyset(&set);
441- if ((errno = posix_spawnattr_setsigmask(&attr, &set)))
442+ if ((errno = posix_spawnattr_setsigmask(&attr, &set))) {
443 die("posix_spawnattr_setsigmask:");
444- if ((errno = posix_spawnp(&pid, argv[optind], NULL, &attr, argv + optind, environ)))
445+ }
446+ if ((errno = posix_spawnp(&pid, argv[optind], NULL, &attr, argv + optind,
447+ environ))) {
448 die("posix_spawnp %s:", argv[optind]);
449+ }
450 posix_spawnattr_destroy(&attr);
451
452 close(sock[1]);
+15,
-13
1@@ -1,24 +1,23 @@
2 #include "protocol.h"
3
4-#include <sys/socket.h>
5 #include <stdio.h>
6 #include <string.h>
7+#include <sys/socket.h>
8
9 ssize_t
10 send_fd(int socket, int fd, struct iovec *iov, int iovlen)
11 {
12 char control[CMSG_SPACE(sizeof(fd))];
13 struct msghdr message = {
14- .msg_name = NULL,
15- .msg_namelen = 0,
16- .msg_iov = iov,
17- .msg_iovlen = iovlen,
18+ .msg_name = NULL,
19+ .msg_namelen = 0,
20+ .msg_iov = iov,
21+ .msg_iovlen = iovlen,
22 };
23 struct cmsghdr *cmsg;
24
25 if (fd != -1) {
26- message.msg_control = control,
27- message.msg_controllen = sizeof(control);
28+ message.msg_control = control, message.msg_controllen = sizeof(control);
29
30 cmsg = CMSG_FIRSTHDR(&message);
31 cmsg->cmsg_len = CMSG_LEN(sizeof(fd));
32@@ -40,10 +39,10 @@ receive_fd(int socket, int *fd, struct iovec *iov, int iovlen)
33 ssize_t size;
34 char control[CMSG_SPACE(sizeof(*fd))];
35 struct msghdr message = {
36- .msg_name = NULL,
37- .msg_namelen = 0,
38- .msg_iov = iov,
39- .msg_iovlen = iovlen,
40+ .msg_name = NULL,
41+ .msg_namelen = 0,
42+ .msg_iov = iov,
43+ .msg_iovlen = iovlen,
44 };
45 struct cmsghdr *cmsg;
46
47@@ -54,12 +53,15 @@ receive_fd(int socket, int *fd, struct iovec *iov, int iovlen)
48 }
49
50 size = recvmsg(socket, &message, MSG_CMSG_CLOEXEC);
51- if (size < 0)
52+ if (size < 0) {
53 return -1;
54+ }
55
56 cmsg = CMSG_FIRSTHDR(&message);
57- if (fd && cmsg && cmsg->cmsg_len == CMSG_LEN(sizeof(*fd)) && cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS)
58+ if (fd && cmsg && cmsg->cmsg_len == CMSG_LEN(sizeof(*fd)) &&
59+ cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) {
60 memcpy(fd, CMSG_DATA(cmsg), sizeof(*fd));
61+ }
62
63 return size;
64 }
+4,
-2
1@@ -65,7 +65,9 @@ struct swc_launch_event {
2 };
3 };
4
5-ssize_t send_fd(int socket, int fd, struct iovec *iov, int iovlen);
6-ssize_t receive_fd(int socket, int *fd, struct iovec *iov, int iovlen);
7+ssize_t
8+send_fd(int socket, int fd, struct iovec *iov, int iovlen);
9+ssize_t
10+receive_fd(int socket, int *fd, struct iovec *iov, int iovlen);
11
12 #endif
+59,
-31
1@@ -21,12 +21,12 @@
2 * SOFTWARE.
3 */
4
5-#include "swc.h"
6 #include "bindings.h"
7 #include "internal.h"
8 #include "keyboard.h"
9 #include "pointer.h"
10 #include "seat.h"
11+#include "swc.h"
12 #include "util.h"
13
14 #include <errno.h>
15@@ -47,26 +47,32 @@ struct axis_binding {
16 void *data;
17 };
18
19-static bool handle_key(struct keyboard *keyboard, uint32_t time, struct key *key, uint32_t state);
20+static bool
21+handle_key(struct keyboard *keyboard, uint32_t time, struct key *key,
22+ uint32_t state);
23
24 static struct keyboard_handler key_binding_handler = {
25- .key = handle_key,
26+ .key = handle_key,
27 };
28
29-static bool handle_button(struct pointer_handler *handler, uint32_t time, struct button *button, uint32_t state);
30-static bool handle_axis(struct pointer_handler *handler, uint32_t time, enum wl_pointer_axis axis,
31- enum wl_pointer_axis_source source, wl_fixed_t value, int value120);
32+static bool
33+handle_button(struct pointer_handler *handler, uint32_t time,
34+ struct button *button, uint32_t state);
35+static bool
36+handle_axis(struct pointer_handler *handler, uint32_t time,
37+ enum wl_pointer_axis axis, enum wl_pointer_axis_source source,
38+ wl_fixed_t value, int value120);
39
40 static struct pointer_handler button_binding_handler = {
41- .button = handle_button,
42- .axis = handle_axis,
43+ .button = handle_button,
44+ .axis = handle_axis,
45 };
46
47 static struct wl_array key_bindings, button_bindings, axis_bindings;
48
49 const struct swc_bindings swc_bindings = {
50- .keyboard_handler = &key_binding_handler,
51- .pointer_handler = &button_binding_handler,
52+ .keyboard_handler = &key_binding_handler,
53+ .pointer_handler = &button_binding_handler,
54 };
55
56 static struct binding *
57@@ -74,9 +80,12 @@ find_binding(struct wl_array *bindings, uint32_t modifiers, uint32_t value)
58 {
59 struct binding *binding;
60
61- wl_array_for_each (binding, bindings) {
62- if (binding->value == value && (binding->modifiers == modifiers || binding->modifiers == SWC_MOD_ANY))
63+ wl_array_for_each(binding, bindings)
64+ {
65+ if (binding->value == value && (binding->modifiers == modifiers ||
66+ binding->modifiers == SWC_MOD_ANY)) {
67 return binding;
68+ }
69 }
70
71 return NULL;
72@@ -93,18 +102,21 @@ find_key_binding(uint32_t modifiers, uint32_t key)
73 keysym = xkb_state_key_get_one_sym(xkb->state, XKB_KEY(key));
74 binding = find_binding(&key_bindings, modifiers, keysym);
75
76- if (binding)
77+ if (binding) {
78 return binding;
79+ }
80
81 xkb_layout_index_t layout;
82 const xkb_keysym_t *keysyms;
83
84 /* Then try the keysym associated with shift-level 0 for the key. */
85 layout = xkb_state_key_get_layout(xkb->state, XKB_KEY(key));
86- xkb_keymap_key_get_syms_by_level(xkb->keymap.map, XKB_KEY(key), layout, 0, &keysyms);
87+ xkb_keymap_key_get_syms_by_level(xkb->keymap.map, XKB_KEY(key), layout, 0,
88+ &keysyms);
89
90- if (!keysyms)
91+ if (!keysyms) {
92 return NULL;
93+ }
94
95 binding = find_binding(&key_bindings, modifiers, keysyms[0]);
96
97@@ -122,25 +134,31 @@ find_axis_binding(uint32_t modifiers, uint32_t axis)
98 {
99 struct axis_binding *binding;
100
101- wl_array_for_each (binding, &axis_bindings) {
102- if (binding->axis == axis && (binding->modifiers == modifiers || binding->modifiers == SWC_MOD_ANY))
103+ wl_array_for_each(binding, &axis_bindings)
104+ {
105+ if (binding->axis == axis && (binding->modifiers == modifiers ||
106+ binding->modifiers == SWC_MOD_ANY)) {
107 return binding;
108+ }
109 }
110
111 return NULL;
112 }
113
114 static bool
115-handle_binding(uint32_t time, struct press *press, uint32_t state, struct binding *(*find_binding)(uint32_t, uint32_t))
116+handle_binding(uint32_t time, struct press *press, uint32_t state,
117+ struct binding *(*find_binding)(uint32_t, uint32_t))
118 {
119 struct binding *binding;
120- uint32_t modifiers = swc.seat && swc.seat->keyboard ? swc.seat->keyboard->modifiers : 0;
121+ uint32_t modifiers =
122+ swc.seat && swc.seat->keyboard ? swc.seat->keyboard->modifiers : 0;
123
124 if (state) {
125 binding = find_binding(modifiers, press->value);
126
127- if (!binding)
128+ if (!binding) {
129 return false;
130+ }
131
132 press->data = binding;
133 } else {
134@@ -153,35 +171,41 @@ handle_binding(uint32_t time, struct press *press, uint32_t state, struct bindin
135 }
136
137 bool
138-handle_key(struct keyboard *keyboard, uint32_t time, struct key *key, uint32_t state)
139+handle_key(struct keyboard *keyboard, uint32_t time, struct key *key,
140+ uint32_t state)
141 {
142 return handle_binding(time, &key->press, state, &find_key_binding);
143 }
144
145 bool
146-handle_button(struct pointer_handler *handler, uint32_t time, struct button *button, uint32_t state)
147+handle_button(struct pointer_handler *handler, uint32_t time,
148+ struct button *button, uint32_t state)
149 {
150 return handle_binding(time, &button->press, state, &find_button_binding);
151 }
152
153 bool
154-handle_axis(struct pointer_handler *handler, uint32_t time, enum wl_pointer_axis axis,
155- enum wl_pointer_axis_source source, wl_fixed_t value, int value120)
156+handle_axis(struct pointer_handler *handler, uint32_t time,
157+ enum wl_pointer_axis axis, enum wl_pointer_axis_source source,
158+ wl_fixed_t value, int value120)
159 {
160 (void)handler;
161 (void)source;
162
163- uint32_t modifiers = swc.seat && swc.seat->keyboard ? swc.seat->keyboard->modifiers : 0;
164+ uint32_t modifiers =
165+ swc.seat && swc.seat->keyboard ? swc.seat->keyboard->modifiers : 0;
166 struct axis_binding *binding = find_axis_binding(modifiers, axis);
167 int32_t delta120 = value120;
168
169- if (!binding || !binding->handler)
170+ if (!binding || !binding->handler) {
171 return false;
172+ }
173
174 if (!delta120 && value) {
175 delta120 = (int32_t)(wl_fixed_to_double(value) * 120.0);
176- if (!delta120)
177+ if (!delta120) {
178 delta120 = value > 0 ? 1 : -1;
179+ }
180 }
181
182 binding->handler(binding->data, time, axis, delta120);
183@@ -207,7 +231,8 @@ bindings_finalize(void)
184 }
185
186 EXPORT int
187-swc_add_binding(enum swc_binding_type type, uint32_t modifiers, uint32_t value, swc_binding_handler handler, void *data)
188+swc_add_binding(enum swc_binding_type type, uint32_t modifiers, uint32_t value,
189+ swc_binding_handler handler, void *data)
190 {
191 struct binding *binding;
192 struct wl_array *bindings;
193@@ -223,8 +248,9 @@ swc_add_binding(enum swc_binding_type type, uint32_t modifiers, uint32_t value,
194 return -EINVAL;
195 }
196
197- if (!(binding = wl_array_add(bindings, sizeof(*binding))))
198+ if (!(binding = wl_array_add(bindings, sizeof(*binding)))) {
199 return -ENOMEM;
200+ }
201
202 binding->value = value;
203 binding->modifiers = modifiers;
204@@ -235,12 +261,14 @@ swc_add_binding(enum swc_binding_type type, uint32_t modifiers, uint32_t value,
205 }
206
207 EXPORT int
208-swc_add_axis_binding(uint32_t modifiers, uint32_t axis, swc_axis_binding_handler handler, void *data)
209+swc_add_axis_binding(uint32_t modifiers, uint32_t axis,
210+ swc_axis_binding_handler handler, void *data)
211 {
212 struct axis_binding *binding;
213
214- if (!(binding = wl_array_add(&axis_bindings, sizeof(*binding))))
215+ if (!(binding = wl_array_add(&axis_bindings, sizeof(*binding)))) {
216 return -ENOMEM;
217+ }
218
219 binding->axis = axis;
220 binding->modifiers = modifiers;
+4,
-2
1@@ -31,7 +31,9 @@ struct swc_bindings {
2 struct pointer_handler *pointer_handler;
3 };
4
5-bool bindings_initialize(void);
6-void bindings_finalize(void);
7+bool
8+bindings_initialize(void);
9+void
10+bindings_finalize(void);
11
12 #endif
+459,
-287
1@@ -27,7 +27,6 @@
2 * SOFTWARE.
3 */
4
5-#include "swc.h"
6 #include "compositor.h"
7 #include "data_device_manager.h"
8 #include "drm.h"
9@@ -40,29 +39,32 @@
10 #include "screen.h"
11 #include "seat.h"
12 #include "shm.h"
13-#include "surface.h"
14 #include "subsurface.h"
15+#include "surface.h"
16+#include "swc.h"
17 #include "util.h"
18 #include "view.h"
19 #include "wallpaper.h"
20 #include "window.h"
21
22-#include <errno.h>
23-#include <stdlib.h>
24-#include <stdio.h>
25 #include <assert.h>
26+#include <errno.h>
27 #include <limits.h>
28-#include <wld/wld.h>
29+#include <stdio.h>
30+#include <stdlib.h>
31 #include <wld/drm.h>
32+#include <wld/wld.h>
33 #include <xkbcommon/xkbcommon-keysyms.h>
34
35 static inline int32_t
36 clamp_i32(int64_t v)
37 {
38- if (v > INT32_MAX)
39+ if (v > INT32_MAX) {
40 return INT32_MAX;
41- if (v < INT32_MIN)
42+ }
43+ if (v < INT32_MIN) {
44 return INT32_MIN;
45+ }
46 return (int32_t)v;
47 }
48
49@@ -71,10 +73,12 @@ span_u32(int32_t a, int32_t b)
50 {
51 int64_t d = (int64_t)b - (int64_t)a;
52
53- if (d <= 0)
54+ if (d <= 0) {
55 return 0;
56- if (d > UINT32_MAX)
57+ }
58+ if (d > UINT32_MAX) {
59 return UINT32_MAX;
60+ }
61 return (uint32_t)d;
62 }
63
64@@ -88,13 +92,18 @@ struct target {
65 struct wl_listener screen_destroy_listener;
66 };
67
68-static bool handle_motion(struct pointer_handler *handler, uint32_t time, wl_fixed_t x, wl_fixed_t y);
69-static bool handle_button(struct pointer_handler *handler, uint32_t time, struct button *button, uint32_t state);
70-static void perform_update(void *data);
71+static bool
72+handle_motion(struct pointer_handler *handler, uint32_t time, wl_fixed_t x,
73+ wl_fixed_t y);
74+static bool
75+handle_button(struct pointer_handler *handler, uint32_t time,
76+ struct button *button, uint32_t state);
77+static void
78+perform_update(void *data);
79
80 static struct pointer_handler pointer_handler = {
81- .motion = handle_motion,
82- .button = handle_button,
83+ .motion = handle_motion,
84+ .button = handle_button,
85 };
86
87 static struct {
88@@ -102,7 +111,8 @@ static struct {
89 pixman_region32_t damage, opaque;
90 struct wl_listener swc_listener;
91
92- /* A mask of screens that have been repainted but are waiting on a page flip. */
93+ /* A mask of screens that have been repainted but are waiting on a page
94+ * flip. */
95 uint32_t pending_flips;
96
97 /* A mask of screens that are scheduled to be repainted on the next idle. */
98@@ -126,13 +136,14 @@ static struct {
99 } overlay;
100
101 struct swc_compositor swc_compositor = {
102- .pointer_handler = &pointer_handler,
103+ .pointer_handler = &pointer_handler,
104 };
105
106 static void
107 handle_screen_destroy(struct wl_listener *listener, void *data)
108 {
109- struct target *target = wl_container_of(listener, target, screen_destroy_listener);
110+ struct target *target =
111+ wl_container_of(listener, target, screen_destroy_listener);
112
113 wld_destroy_surface(target->surface);
114 free(target);
115@@ -141,10 +152,12 @@ handle_screen_destroy(struct wl_listener *listener, void *data)
116 static struct target *
117 target_get(struct screen *screen)
118 {
119- struct wl_listener *listener = wl_signal_get(&screen->destroy_signal, &handle_screen_destroy);
120+ struct wl_listener *listener =
121+ wl_signal_get(&screen->destroy_signal, &handle_screen_destroy);
122 struct target *target;
123
124- return listener ? wl_container_of(listener, target, screen_destroy_listener) : NULL;
125+ return listener ? wl_container_of(listener, target, screen_destroy_listener)
126+ : NULL;
127 }
128
129 static void
130@@ -155,26 +168,30 @@ handle_screen_frame(struct view_handler *handler, uint32_t time)
131
132 compositor.pending_flips &= ~target->mask;
133
134- wl_list_for_each (view, &compositor.views, link) {
135- if (view->visible && view->base.screens & target->mask)
136+ wl_list_for_each(view, &compositor.views, link)
137+ {
138+ if (view->visible && view->base.screens & target->mask) {
139 view_frame(&view->base, time);
140+ }
141 }
142
143- if (target->current_buffer)
144+ if (target->current_buffer) {
145 wld_surface_release(target->surface, target->current_buffer);
146+ }
147
148 target->current_buffer = target->next_buffer;
149
150- /* If we had scheduled updates that couldn't run because we were waiting on a
151- * page flip, run them now. If the compositor is currently updating, then the
152- * frame finished immediately, and we can be sure that there are no pending
153- * updates. */
154- if (compositor.scheduled_updates && !compositor.updating)
155+ /* If we had scheduled updates that couldn't run because we were waiting on
156+ * a page flip, run them now. If the compositor is currently updating, then
157+ * the frame finished immediately, and we can be sure that there are no
158+ * pending updates. */
159+ if (compositor.scheduled_updates && !compositor.updating) {
160 perform_update(NULL);
161+ }
162 }
163
164 static const struct view_handler_impl screen_view_handler = {
165- .frame = handle_screen_frame,
166+ .frame = handle_screen_frame,
167 };
168
169 static int
170@@ -190,13 +207,17 @@ target_new(struct screen *screen)
171 struct target *target;
172 struct swc_rectangle *geom = &screen->base.geometry;
173
174- if (!(target = malloc(sizeof(*target))))
175+ if (!(target = malloc(sizeof(*target)))) {
176 goto error0;
177+ }
178
179- target->surface = wld_create_surface(swc.drm->context, geom->width, geom->height, WLD_FORMAT_XRGB8888, WLD_DRM_FLAG_SCANOUT);
180+ target->surface =
181+ wld_create_surface(swc.drm->context, geom->width, geom->height,
182+ WLD_FORMAT_XRGB8888, WLD_DRM_FLAG_SCANOUT);
183
184- if (!target->surface)
185+ if (!target->surface) {
186 goto error1;
187+ }
188
189 target->view = &screen->planes.primary.view;
190 target->view_handler.impl = &screen_view_handler;
191@@ -218,34 +239,41 @@ error0:
192 /* Rendering {{{ */
193
194 static void
195-repaint_view(struct target *target, struct compositor_view *view, pixman_region32_t *damage)
196+repaint_view(struct target *target, struct compositor_view *view,
197+ pixman_region32_t *damage)
198 {
199- pixman_region32_t geom_region, buffer_region, border_region, view_damage, buffer_damage, border_damage;
200- const struct swc_rectangle *geom = &view->base.geometry, *target_geom = &target->view->geometry;
201+ pixman_region32_t geom_region, buffer_region, border_region, view_damage,
202+ buffer_damage, border_damage;
203+ const struct swc_rectangle *geom = &view->base.geometry,
204+ *target_geom = &target->view->geometry;
205 int32_t buf_x, buf_y;
206 uint32_t buf_w, buf_h;
207 int64_t total_border;
208
209- if (!view->base.buffer)
210+ if (!view->base.buffer) {
211 return;
212+ }
213
214 buf_w = view->base.buffer->width;
215 buf_h = view->base.buffer->height;
216 buf_x = geom->x - view->buffer_offset_x;
217 buf_y = geom->y - view->buffer_offset_y;
218
219- total_border = (int64_t)view->border.outwidth + (int64_t)view->border.inwidth;
220- pixman_region32_init_rect(&geom_region, geom->x, geom->y, geom->width, geom->height);
221+ total_border =
222+ (int64_t)view->border.outwidth + (int64_t)view->border.inwidth;
223+ pixman_region32_init_rect(&geom_region, geom->x, geom->y, geom->width,
224+ geom->height);
225 if (view->window) {
226- pixman_region32_init_rect(&buffer_region, geom->x, geom->y, geom->width, geom->height);
227+ pixman_region32_init_rect(&buffer_region, geom->x, geom->y, geom->width,
228+ geom->height);
229 } else {
230 pixman_region32_init_rect(&buffer_region, buf_x, buf_y, buf_w, buf_h);
231 }
232 pixman_region32_init_rect(&border_region,
233- geom->x - (int32_t)total_border,
234- geom->y - (int32_t)total_border,
235- geom->width + (uint32_t)(2 * total_border),
236- geom->height + (uint32_t)(2 * total_border));
237+ geom->x - (int32_t)total_border,
238+ geom->y - (int32_t)total_border,
239+ geom->width + (uint32_t)(2 * total_border),
240+ geom->height + (uint32_t)(2 * total_border));
241 pixman_region32_subtract(&border_region, &border_region, &geom_region);
242 pixman_region32_init_with_extents(&view_damage, &view->extents);
243 pixman_region32_init(&buffer_damage);
244@@ -257,20 +285,22 @@ repaint_view(struct target *target, struct compositor_view *view, pixman_region3
245 pixman_region32_intersect(&buffer_damage, &view_damage, &buffer_region);
246
247 if (pixman_region32_not_empty(&buffer_damage)) {
248- pixman_region32_translate(&buffer_damage, -geom->x + view->buffer_offset_x, -geom->y + view->buffer_offset_y);
249- wld_copy_region(swc.drm->renderer, view->buffer,
250- buf_x - target_geom->x, buf_y - target_geom->y, &buffer_damage);
251+ pixman_region32_translate(&buffer_damage,
252+ -geom->x + view->buffer_offset_x,
253+ -geom->y + view->buffer_offset_y);
254+ wld_copy_region(swc.drm->renderer, view->buffer, buf_x - target_geom->x,
255+ buf_y - target_geom->y, &buffer_damage);
256 }
257
258 pixman_region32_fini(&view_damage);
259 pixman_region32_fini(&buffer_damage);
260
261 pixman_region32_t in_rect;
262- pixman_region32_init_rect(&in_rect,
263- geom->x - view->border.inwidth,
264- geom->y - view->border.inwidth,
265- geom->width + (2 * view->border.inwidth),
266- geom->height + (2 * view->border.inwidth));
267+ pixman_region32_init_rect(&in_rect,
268+ geom->x - view->border.inwidth,
269+ geom->y - view->border.inwidth,
270+ geom->width + (2 * view->border.inwidth),
271+ geom->height + (2 * view->border.inwidth));
272
273 pixman_region32_t out_border;
274 pixman_region32_init(&out_border);
275@@ -280,14 +310,15 @@ repaint_view(struct target *target, struct compositor_view *view, pixman_region3
276 pixman_region32_init(&in_border);
277 pixman_region32_subtract(&in_border, &in_rect, &geom_region);
278 pixman_region32_intersect(&in_border, &in_border, &border_damage);
279-
280+
281 pixman_region32_fini(&geom_region);
282 pixman_region32_fini(&buffer_region);
283 pixman_region32_fini(&border_region);
284
285 /* Draw border */
286 if (view->border.outwidth > 0 && pixman_region32_not_empty(&out_border)) {
287- pixman_region32_translate(&out_border, -target_geom->x, -target_geom->y);
288+ pixman_region32_translate(&out_border, -target_geom->x,
289+ -target_geom->y);
290 wld_fill_region(swc.drm->renderer, view->border.outcolor, &out_border);
291 }
292
293@@ -300,11 +331,12 @@ repaint_view(struct target *target, struct compositor_view *view, pixman_region3
294 pixman_region32_fini(&in_rect);
295 pixman_region32_fini(&out_border);
296 pixman_region32_fini(&in_border);
297-
298 }
299
300 static void
301-renderer_repaint(struct target *target, pixman_region32_t *damage, pixman_region32_t *base_damage, struct wl_list *views, struct screen *screen)
302+renderer_repaint(struct target *target, pixman_region32_t *damage,
303+ pixman_region32_t *base_damage, struct wl_list *views,
304+ struct screen *screen)
305 {
306 struct compositor_view *view;
307 const struct swc_rectangle *target_geom = &target->view->geometry;
308@@ -318,54 +350,66 @@ renderer_repaint(struct target *target, pixman_region32_t *damage, pixman_region
309 if (pixman_region32_not_empty(base_damage)) {
310 struct wld_buffer *background = swc_wallpaper_buffer_for_screen(screen);
311
312- pixman_region32_translate(base_damage, -target->view->geometry.x, -target->view->geometry.y);
313-
314- if (background)
315+ pixman_region32_translate(base_damage, -target->view->geometry.x,
316+ -target->view->geometry.y);
317+
318+ if (background) {
319 wld_copy_region(swc.drm->renderer, background, 0, 0, base_damage);
320+ }
321
322- else
323+ else {
324 wld_fill_region(swc.drm->renderer, bgcolor, base_damage);
325+ }
326 }
327
328- wl_list_for_each_reverse (view, views, link) {
329- if (view->visible && view->base.screens & target->mask)
330+ wl_list_for_each_reverse(view, views, link)
331+ {
332+ if (view->visible && view->base.screens & target->mask) {
333 repaint_view(target, view, damage);
334+ }
335 }
336
337 if (overlay.active && overlay.border_width > 0) {
338 int32_t x = overlay.x - target_geom->x;
339 int32_t y = overlay.y - target_geom->y;
340- uint32_t w = overlay.width, h = overlay.height, bw = overlay.border_width;
341+ uint32_t w = overlay.width, h = overlay.height,
342+ bw = overlay.border_width;
343 int32_t tx = (int32_t)target_geom->width;
344 int32_t ty = (int32_t)target_geom->height;
345
346- /* draw box as 4 rectangles with wld */
347- #define CLAMP_LOW(v, lo) ((v) < (lo) ? (lo) : (v))
348- #define CLAMP_HIGH(v, hi) ((v) > (hi) ? (hi) : (v))
349- #define DRAW_CLIPPED(rx, ry, rw, rh) do { \
350- int32_t _x1 = CLAMP_LOW((rx), 0); \
351- int32_t _y1 = CLAMP_LOW((ry), 0); \
352- int32_t _x2 = CLAMP_HIGH((rx) + (int32_t)(rw), tx); \
353- int32_t _y2 = CLAMP_HIGH((ry) + (int32_t)(rh), ty); \
354- if (_x2 > _x1 && _y2 > _y1) \
355- wld_fill_rectangle(swc.drm->renderer, overlay.color, _x1, _y1, (uint32_t)(_x2 - _x1), (uint32_t)(_y2 - _y1)); \
356- } while (0)
357+/* draw box as 4 rectangles with wld */
358+#define CLAMP_LOW(v, lo) ((v) < (lo) ? (lo) : (v))
359+#define CLAMP_HIGH(v, hi) ((v) > (hi) ? (hi) : (v))
360+#define DRAW_CLIPPED(rx, ry, rw, rh) \
361+ do { \
362+ int32_t _x1 = CLAMP_LOW((rx), 0); \
363+ int32_t _y1 = CLAMP_LOW((ry), 0); \
364+ int32_t _x2 = CLAMP_HIGH((rx) + (int32_t)(rw), tx); \
365+ int32_t _y2 = CLAMP_HIGH((ry) + (int32_t)(rh), ty); \
366+ if (_x2 > _x1 && _y2 > _y1) \
367+ wld_fill_rectangle(swc.drm->renderer, overlay.color, _x1, _y1, \
368+ (uint32_t)(_x2 - _x1), (uint32_t)(_y2 - _y1)); \
369+ } while (0)
370
371 if (w > 0 && h > 0) {
372- if (bw > w)
373+ if (bw > w) {
374 bw = w;
375- if (bw > h)
376+ }
377+ if (bw > h) {
378 bw = h;
379+ }
380
381- DRAW_CLIPPED(x, y, (int32_t)w, (int32_t)bw); /* top */
382- DRAW_CLIPPED(x, y + (int32_t)h - (int32_t)bw, (int32_t)w, (int32_t)bw); /* bottom */
383- DRAW_CLIPPED(x, y, (int32_t)bw, (int32_t)h); /* left */
384- DRAW_CLIPPED(x + (int32_t)w - (int32_t)bw, y, (int32_t)bw, (int32_t)h); /* right */
385+ DRAW_CLIPPED(x, y, (int32_t)w, (int32_t)bw); /* top */
386+ DRAW_CLIPPED(x, y + (int32_t)h - (int32_t)bw, (int32_t)w,
387+ (int32_t)bw); /* bottom */
388+ DRAW_CLIPPED(x, y, (int32_t)bw, (int32_t)h); /* left */
389+ DRAW_CLIPPED(x + (int32_t)w - (int32_t)bw, y, (int32_t)bw,
390+ (int32_t)h); /* right */
391 }
392
393- #undef DRAW_CLIPPED
394- #undef CLAMP_HIGH
395- #undef CLAMP_LOW
396+#undef DRAW_CLIPPED
397+#undef CLAMP_HIGH
398+#undef CLAMP_LOW
399 }
400
401 wld_flush(swc.drm->renderer);
402@@ -376,19 +420,26 @@ renderer_attach(struct compositor_view *view, struct wld_buffer *client_buffer)
403 {
404 struct wld_buffer *buffer;
405 bool was_proxy = view->buffer != view->base.buffer;
406- bool needs_proxy = client_buffer && !(wld_capabilities(swc.drm->renderer, client_buffer) & WLD_CAPABILITY_READ);
407- bool resized = view->buffer && client_buffer && (view->buffer->width != client_buffer->width || view->buffer->height != client_buffer->height);
408+ bool needs_proxy =
409+ client_buffer && !(wld_capabilities(swc.drm->renderer, client_buffer) &
410+ WLD_CAPABILITY_READ);
411+ bool resized = view->buffer && client_buffer &&
412+ (view->buffer->width != client_buffer->width ||
413+ view->buffer->height != client_buffer->height);
414
415 if (client_buffer) {
416- /* Create a proxy buffer if necessary (for example a hardware buffer backing
417- * a SHM buffer). */
418+ /* Create a proxy buffer if necessary (for example a hardware buffer
419+ * backing a SHM buffer). */
420 if (needs_proxy) {
421 if (!was_proxy || resized) {
422 DEBUG("Creating a proxy buffer\n");
423- buffer = wld_create_buffer(swc.drm->context, client_buffer->width, client_buffer->height, client_buffer->format, WLD_FLAG_MAP);
424+ buffer = wld_create_buffer(
425+ swc.drm->context, client_buffer->width,
426+ client_buffer->height, client_buffer->format, WLD_FLAG_MAP);
427
428- if (!buffer)
429+ if (!buffer) {
430 return -ENOMEM;
431+ }
432 } else {
433 /* Otherwise we can keep the original proxy buffer. */
434 buffer = view->buffer;
435@@ -402,8 +453,10 @@ renderer_attach(struct compositor_view *view, struct wld_buffer *client_buffer)
436
437 /* If we no longer need a proxy buffer, or the original buffer is of a
438 * different size, destroy the old proxy image. */
439- if (view->buffer && ((!needs_proxy && was_proxy) || (needs_proxy && resized)))
440+ if (view->buffer &&
441+ ((!needs_proxy && was_proxy) || (needs_proxy && resized))) {
442 wld_buffer_unreference(view->buffer);
443+ }
444
445 view->buffer = buffer;
446
447@@ -413,11 +466,13 @@ renderer_attach(struct compositor_view *view, struct wld_buffer *client_buffer)
448 static void
449 renderer_flush_view(struct compositor_view *view)
450 {
451- if (view->buffer == view->base.buffer)
452+ if (view->buffer == view->base.buffer) {
453 return;
454+ }
455
456 wld_set_target_buffer(swc.shm->renderer, view->buffer);
457- wld_copy_region(swc.shm->renderer, view->base.buffer, 0, 0, &view->surface->state.damage);
458+ wld_copy_region(swc.shm->renderer, view->base.buffer, 0, 0,
459+ &view->surface->state.damage);
460 wld_flush(swc.shm->renderer);
461 }
462
463@@ -434,7 +489,8 @@ damage_below_view(struct compositor_view *view)
464 pixman_region32_t damage_below;
465
466 pixman_region32_init_with_extents(&damage_below, &view->extents);
467- pixman_region32_union(&compositor.damage, &compositor.damage, &damage_below);
468+ pixman_region32_union(&compositor.damage, &compositor.damage,
469+ &damage_below);
470 pixman_region32_fini(&damage_below);
471 }
472
473@@ -452,7 +508,8 @@ damage_view(struct compositor_view *view)
474 static void
475 update_extents(struct compositor_view *view)
476 {
477- int64_t total_border = (int64_t)view->border.outwidth + (int64_t)view->border.inwidth;
478+ int64_t total_border =
479+ (int64_t)view->border.outwidth + (int64_t)view->border.inwidth;
480 int64_t geom_x = view->base.geometry.x;
481 int64_t geom_y = view->base.geometry.y;
482 int64_t geom_w = view->base.geometry.width;
483@@ -465,46 +522,55 @@ update_extents(struct compositor_view *view)
484
485 int64_t buffer_x1 = geom_x - view->buffer_offset_x;
486 int64_t buffer_y1 = geom_y - view->buffer_offset_y;
487- int64_t buffer_x2 = buffer_x1 + (view->base.buffer ? view->base.buffer->width : (uint32_t)geom_w);
488- int64_t buffer_y2 = buffer_y1 + (view->base.buffer ? view->base.buffer->height : (uint32_t)geom_h);
489+ int64_t buffer_x2 =
490+ buffer_x1 +
491+ (view->base.buffer ? view->base.buffer->width : (uint32_t)geom_w);
492+ int64_t buffer_y2 =
493+ buffer_y1 +
494+ (view->base.buffer ? view->base.buffer->height : (uint32_t)geom_h);
495
496 view->extents.x1 = clamp_i32(MIN(border_x1, buffer_x1));
497 view->extents.y1 = clamp_i32(MIN(border_y1, buffer_y1));
498 view->extents.x2 = clamp_i32(MAX(border_x2, buffer_x2));
499 view->extents.y2 = clamp_i32(MAX(border_y2, buffer_y2));
500
501- if (view->extents.x2 < view->extents.x1)
502+ if (view->extents.x2 < view->extents.x1) {
503 view->extents.x2 = view->extents.x1;
504- if (view->extents.y2 < view->extents.y1)
505+ }
506+ if (view->extents.y2 < view->extents.y1) {
507 view->extents.y2 = view->extents.y1;
508+ }
509
510 /* Damage border. */
511 view->border.damaged_border1 = true;
512 view->border.damaged_border2 = true;
513 }
514
515-
516 static void
517 schedule_updates(uint32_t screens)
518 {
519- if (compositor.scheduled_updates == 0)
520+ if (compositor.scheduled_updates == 0) {
521 wl_event_loop_add_idle(swc.event_loop, &perform_update, NULL);
522+ }
523
524 if (screens == -1) {
525 struct screen *screen;
526
527 screens = 0;
528- wl_list_for_each (screen, &swc.screens, link)
529- screens |= screen_mask(screen);
530+ wl_list_for_each(screen, &swc.screens, link) screens |=
531+ screen_mask(screen);
532 }
533
534- /* when zoomed, force full screen damage since actual area differs from world coords */
535+ /* when zoomed, force full screen damage since actual area differs from
536+ * world coords */
537 if (compositor.zoom != 1.0f) {
538 struct screen *screen;
539- wl_list_for_each (screen, &swc.screens, link) {
540- pixman_region32_union_rect(&compositor.damage, &compositor.damage,
541- screen->base.geometry.x, screen->base.geometry.y,
542- screen->base.geometry.width, screen->base.geometry.height);
543+ wl_list_for_each(screen, &swc.screens, link)
544+ {
545+ pixman_region32_union_rect(
546+ &compositor.damage, &compositor.damage, screen->base.geometry.x,
547+ screen->base.geometry.y, screen->base.geometry.width,
548+ screen->base.geometry.height);
549 screens |= screen_mask(screen);
550 }
551 }
552@@ -517,38 +583,47 @@ compositor_damage_all(void)
553 {
554 struct screen *screen;
555
556- if (!compositor.initialized)
557+ if (!compositor.initialized) {
558 return;
559+ }
560
561- wl_list_for_each (screen, &swc.screens, link) {
562- pixman_region32_union_rect(&compositor.damage, &compositor.damage,
563- screen->base.geometry.x, screen->base.geometry.y,
564- screen->base.geometry.width, screen->base.geometry.height);
565+ wl_list_for_each(screen, &swc.screens, link)
566+ {
567+ pixman_region32_union_rect(
568+ &compositor.damage, &compositor.damage, screen->base.geometry.x,
569+ screen->base.geometry.y, screen->base.geometry.width,
570+ screen->base.geometry.height);
571 }
572
573 schedule_updates(-1);
574 }
575
576 static void
577-overlay_damage_region(int32_t x, int32_t y, uint32_t width, uint32_t height, uint32_t border_width)
578+overlay_damage_region(int32_t x, int32_t y, uint32_t width, uint32_t height,
579+ uint32_t border_width)
580 {
581 (void)border_width;
582- pixman_region32_union_rect(&compositor.damage, &compositor.damage, x, y, width, height);
583+ pixman_region32_union_rect(&compositor.damage, &compositor.damage, x, y,
584+ width, height);
585 }
586
587 EXPORT void
588-swc_overlay_set_box(int32_t x1, int32_t y1, int32_t x2, int32_t y2, uint32_t color, uint32_t border_width)
589+swc_overlay_set_box(int32_t x1, int32_t y1, int32_t x2, int32_t y2,
590+ uint32_t color, uint32_t border_width)
591 {
592 int32_t x = x1 < x2 ? x1 : x2;
593 int32_t y = y1 < y2 ? y1 : y2;
594 uint32_t width = (uint32_t)abs(x2 - x1);
595 uint32_t height = (uint32_t)abs(y2 - y1);
596
597- if (border_width == 0)
598+ if (border_width == 0) {
599 border_width = 1;
600+ }
601
602- if (overlay.active)
603- overlay_damage_region(overlay.x, overlay.y, overlay.width, overlay.height, overlay.border_width);
604+ if (overlay.active) {
605+ overlay_damage_region(overlay.x, overlay.y, overlay.width,
606+ overlay.height, overlay.border_width);
607+ }
608
609 overlay.active = true;
610 overlay.x = x;
611@@ -558,17 +633,20 @@ swc_overlay_set_box(int32_t x1, int32_t y1, int32_t x2, int32_t y2, uint32_t col
612 overlay.color = color;
613 overlay.border_width = border_width;
614
615- overlay_damage_region(overlay.x, overlay.y, overlay.width, overlay.height, overlay.border_width);
616+ overlay_damage_region(overlay.x, overlay.y, overlay.width, overlay.height,
617+ overlay.border_width);
618 schedule_updates(-1);
619 }
620
621 EXPORT void
622 swc_overlay_clear(void)
623 {
624- if (!overlay.active)
625+ if (!overlay.active) {
626 return;
627+ }
628
629- overlay_damage_region(overlay.x, overlay.y, overlay.width, overlay.height, overlay.border_width);
630+ overlay_damage_region(overlay.x, overlay.y, overlay.width, overlay.height,
631+ overlay.border_width);
632 overlay.active = false;
633 schedule_updates(-1);
634 }
635@@ -576,10 +654,12 @@ swc_overlay_clear(void)
636 EXPORT void
637 swc_set_zoom(float level)
638 {
639- if (level < 0.1f)
640+ if (level < 0.1f) {
641 level = 0.1f;
642- if (level > 10.0f)
643+ }
644+ if (level > 10.0f) {
645 level = 10.0f;
646+ }
647
648 if (compositor.zoom != level) {
649 compositor.zoom = level;
650@@ -620,10 +700,11 @@ render_zoomed_to_shm(struct screen *screen, float zoom)
651 struct compositor_view *view;
652 struct wld_buffer *background;
653
654- struct wld_buffer *buffer = wld_create_buffer(swc.shm->context,
655- width, height, WLD_FORMAT_XRGB8888, WLD_FLAG_MAP);
656- if (!buffer)
657+ struct wld_buffer *buffer = wld_create_buffer(
658+ swc.shm->context, width, height, WLD_FORMAT_XRGB8888, WLD_FLAG_MAP);
659+ if (!buffer) {
660 return NULL;
661+ }
662
663 if (!wld_set_target_buffer(swc.shm->renderer, buffer)) {
664 wld_buffer_unreference(buffer);
665@@ -633,10 +714,11 @@ render_zoomed_to_shm(struct screen *screen, float zoom)
666 pixman_region32_t full;
667 pixman_region32_init_rect(&full, 0, 0, width, height);
668 background = swc_wallpaper_buffer_for_screen(screen);
669- if (background)
670+ if (background) {
671 wld_copy_region(swc.shm->renderer, background, 0, 0, &full);
672- else
673+ } else {
674 wld_fill_region(swc.shm->renderer, bgcolor, &full);
675+ }
676 pixman_region32_fini(&full);
677 wld_flush(swc.shm->renderer);
678
679@@ -646,9 +728,8 @@ render_zoomed_to_shm(struct screen *screen, float zoom)
680 }
681
682 pixman_image_t *dst_img = pixman_image_create_bits(
683- wld_to_pixman_format(buffer->format),
684- buffer->width, buffer->height,
685- buffer->map, buffer->pitch);
686+ wld_to_pixman_format(buffer->format), buffer->width, buffer->height,
687+ buffer->map, buffer->pitch);
688
689 if (!dst_img) {
690 wld_unmap(buffer);
691@@ -657,17 +738,21 @@ render_zoomed_to_shm(struct screen *screen, float zoom)
692 }
693
694 /* render each view with scaling */
695- wl_list_for_each_reverse(view, &compositor.views, link) {
696+ wl_list_for_each_reverse(view, &compositor.views, link)
697+ {
698 struct wld_buffer *src = view->buffer;
699 const struct swc_rectangle *geom = &view->base.geometry;
700
701- if (!src)
702+ if (!src) {
703 continue;
704+ }
705
706- if (!(wld_capabilities(swc.shm->renderer, src) & WLD_CAPABILITY_READ))
707+ if (!(wld_capabilities(swc.shm->renderer, src) & WLD_CAPABILITY_READ)) {
708 src = view->base.buffer;
709- if (!src)
710+ }
711+ if (!src) {
712 continue;
713+ }
714
715 /* maths zoom position and size */
716 float zoomed_x = (geom->x - cx) * zoom + width / 2.0f;
717@@ -679,9 +764,12 @@ render_zoomed_to_shm(struct screen *screen, float zoom)
718 float border_in = view->border.inwidth * zoom;
719 float total_border = border_out + border_in;
720
721- if (zoomed_x + zoomed_w + total_border < 0 || zoomed_x - total_border >= (int32_t)width ||
722- zoomed_y + zoomed_h + total_border < 0 || zoomed_y - total_border >= (int32_t)height)
723+ if (zoomed_x + zoomed_w + total_border < 0 ||
724+ zoomed_x - total_border >= (int32_t)width ||
725+ zoomed_y + zoomed_h + total_border < 0 ||
726+ zoomed_y - total_border >= (int32_t)height) {
727 continue;
728+ }
729
730 if (view->border.outwidth > 0 && border_out >= 1) {
731 int32_t bx = (int32_t)(zoomed_x - total_border);
732@@ -691,21 +779,21 @@ render_zoomed_to_shm(struct screen *screen, float zoom)
733 int32_t bo = (int32_t)border_out;
734
735 pixman_color_t color = {
736- .red = ((view->border.outcolor >> 16) & 0xff) * 257,
737- .green = ((view->border.outcolor >> 8) & 0xff) * 257,
738- .blue = (view->border.outcolor & 0xff) * 257,
739- .alpha = 0xffff
740- };
741+ .red = ((view->border.outcolor >> 16) & 0xff) * 257,
742+ .green = ((view->border.outcolor >> 8) & 0xff) * 257,
743+ .blue = (view->border.outcolor & 0xff) * 257,
744+ .alpha = 0xffff};
745 pixman_image_t *fill = pixman_image_create_solid_fill(&color);
746 if (fill) {
747- pixman_image_composite32(PIXMAN_OP_OVER, fill, NULL, dst_img,
748- 0, 0, 0, 0, bx, by, bw, bo);
749- pixman_image_composite32(PIXMAN_OP_OVER, fill, NULL, dst_img,
750- 0, 0, 0, 0, bx, by + bh - bo, bw, bo);
751- pixman_image_composite32(PIXMAN_OP_OVER, fill, NULL, dst_img,
752- 0, 0, 0, 0, bx, by + bo, bo, bh - 2 * bo);
753- pixman_image_composite32(PIXMAN_OP_OVER, fill, NULL, dst_img,
754- 0, 0, 0, 0, bx + bw - bo, by + bo, bo, bh - 2 * bo);
755+ pixman_image_composite32(PIXMAN_OP_OVER, fill, NULL, dst_img, 0,
756+ 0, 0, 0, bx, by, bw, bo);
757+ pixman_image_composite32(PIXMAN_OP_OVER, fill, NULL, dst_img, 0,
758+ 0, 0, 0, bx, by + bh - bo, bw, bo);
759+ pixman_image_composite32(PIXMAN_OP_OVER, fill, NULL, dst_img, 0,
760+ 0, 0, 0, bx, by + bo, bo, bh - 2 * bo);
761+ pixman_image_composite32(PIXMAN_OP_OVER, fill, NULL, dst_img, 0,
762+ 0, 0, 0, bx + bw - bo, by + bo, bo,
763+ bh - 2 * bo);
764 pixman_image_unref(fill);
765 }
766 }
767@@ -718,32 +806,32 @@ render_zoomed_to_shm(struct screen *screen, float zoom)
768 int32_t bi = (int32_t)border_in;
769
770 pixman_color_t color = {
771- .red = ((view->border.incolor >> 16) & 0xff) * 257,
772- .green = ((view->border.incolor >> 8) & 0xff) * 257,
773- .blue = (view->border.incolor & 0xff) * 257,
774- .alpha = 0xffff
775- };
776+ .red = ((view->border.incolor >> 16) & 0xff) * 257,
777+ .green = ((view->border.incolor >> 8) & 0xff) * 257,
778+ .blue = (view->border.incolor & 0xff) * 257,
779+ .alpha = 0xffff};
780 pixman_image_t *fill = pixman_image_create_solid_fill(&color);
781 if (fill) {
782- pixman_image_composite32(PIXMAN_OP_OVER, fill, NULL, dst_img,
783- 0, 0, 0, 0, bx, by, bw, bi);
784- pixman_image_composite32(PIXMAN_OP_OVER, fill, NULL, dst_img,
785- 0, 0, 0, 0, bx, by + bh - bi, bw, bi);
786- pixman_image_composite32(PIXMAN_OP_OVER, fill, NULL, dst_img,
787- 0, 0, 0, 0, bx, by + bi, bi, bh - 2 * bi);
788- pixman_image_composite32(PIXMAN_OP_OVER, fill, NULL, dst_img,
789- 0, 0, 0, 0, bx + bw - bi, by + bi, bi, bh - 2 * bi);
790+ pixman_image_composite32(PIXMAN_OP_OVER, fill, NULL, dst_img, 0,
791+ 0, 0, 0, bx, by, bw, bi);
792+ pixman_image_composite32(PIXMAN_OP_OVER, fill, NULL, dst_img, 0,
793+ 0, 0, 0, bx, by + bh - bi, bw, bi);
794+ pixman_image_composite32(PIXMAN_OP_OVER, fill, NULL, dst_img, 0,
795+ 0, 0, 0, bx, by + bi, bi, bh - 2 * bi);
796+ pixman_image_composite32(PIXMAN_OP_OVER, fill, NULL, dst_img, 0,
797+ 0, 0, 0, bx + bw - bi, by + bi, bi,
798+ bh - 2 * bi);
799 pixman_image_unref(fill);
800 }
801 }
802
803- if (!wld_map(src))
804+ if (!wld_map(src)) {
805 continue;
806+ }
807
808 pixman_image_t *src_img = pixman_image_create_bits(
809- wld_to_pixman_format(src->format),
810- src->width, src->height,
811- src->map, src->pitch);
812+ wld_to_pixman_format(src->format), src->width, src->height,
813+ src->map, src->pitch);
814
815 if (src_img) {
816 pixman_transform_t transform;
817@@ -753,11 +841,10 @@ render_zoomed_to_shm(struct screen *screen, float zoom)
818 pixman_image_set_transform(src_img, &transform);
819 pixman_image_set_filter(src_img, PIXMAN_FILTER_BILINEAR, NULL, 0);
820
821- pixman_image_composite32(PIXMAN_OP_OVER,
822- src_img, NULL, dst_img,
823- 0, 0, 0, 0,
824- (int32_t)zoomed_x, (int32_t)zoomed_y,
825- (int32_t)(zoomed_w + 1), (int32_t)(zoomed_h + 1));
826+ pixman_image_composite32(PIXMAN_OP_OVER, src_img, NULL, dst_img, 0,
827+ 0, 0, 0, (int32_t)zoomed_x,
828+ (int32_t)zoomed_y, (int32_t)(zoomed_w + 1),
829+ (int32_t)(zoomed_h + 1));
830
831 pixman_image_unref(src_img);
832 }
833@@ -776,8 +863,9 @@ update(struct view *base)
834 {
835 struct compositor_view *view = (void *)base;
836
837- if (!swc.active || !view->visible)
838+ if (!swc.active || !view->visible) {
839 return false;
840+ }
841
842 schedule_updates(view->base.screens);
843
844@@ -795,8 +883,9 @@ attach(struct view *base, struct wld_buffer *buffer)
845 uint32_t new_height = buffer ? buffer->height : 0;
846 int ret;
847
848- if ((ret = renderer_attach(view, buffer)) < 0)
849+ if ((ret = renderer_attach(view, buffer)) < 0) {
850 return ret;
851+ }
852
853 /* Schedule updates on the screens the view was previously
854 * visible on. */
855@@ -853,8 +942,8 @@ move(struct view *base, int32_t x, int32_t y)
856 update_extents(view);
857
858 if (view->visible) {
859- /* Assume worst-case no clipping until we draw the next frame (in case the
860- * surface gets moved again before that). */
861+ /* Assume worst-case no clipping until we draw the next frame (in
862+ * case the surface gets moved again before that). */
863 pixman_region32_init(&view->clip);
864
865 view_update_screens(&view->base);
866@@ -867,9 +956,9 @@ move(struct view *base, int32_t x, int32_t y)
867 }
868
869 static const struct view_impl view_impl = {
870- .update = update,
871- .attach = attach,
872- .move = move,
873+ .update = update,
874+ .attach = attach,
875+ .move = move,
876 };
877
878 static struct compositor_view *
879@@ -879,29 +968,33 @@ view_at(int32_t x, int32_t y)
880 struct swc_rectangle *geom;
881 struct swc_rectangle buffer_geom;
882
883- wl_list_for_each (view, &compositor.views, link) {
884- if (!view->visible)
885+ wl_list_for_each(view, &compositor.views, link)
886+ {
887+ if (!view->visible) {
888 continue;
889+ }
890
891 geom = &view->base.geometry;
892 if (view->window) {
893- if (!rectangle_contains_point(geom, x, y))
894+ if (!rectangle_contains_point(geom, x, y)) {
895 continue;
896+ }
897 } else if (view->base.buffer) {
898 buffer_geom.x = geom->x - view->buffer_offset_x;
899 buffer_geom.y = geom->y - view->buffer_offset_y;
900 buffer_geom.width = view->base.buffer->width;
901 buffer_geom.height = view->base.buffer->height;
902- if (!rectangle_contains_point(&buffer_geom, x, y))
903+ if (!rectangle_contains_point(&buffer_geom, x, y)) {
904 continue;
905+ }
906 } else if (!rectangle_contains_point(geom, x, y)) {
907 continue;
908 }
909
910 if (pixman_region32_contains_point(&view->surface->state.input,
911 x - geom->x + view->buffer_offset_x,
912- y - geom->y + view->buffer_offset_y, NULL))
913- {
914+ y - geom->y + view->buffer_offset_y,
915+ NULL)) {
916 return view;
917 }
918 }
919@@ -912,8 +1005,9 @@ view_at(int32_t x, int32_t y)
920 static struct compositor_view *
921 window_view(struct compositor_view *view)
922 {
923- while (view && !view->window && view->parent && view->parent != view)
924+ while (view && !view->window && view->parent && view->parent != view) {
925 view = view->parent;
926+ }
927 return (view && view->window) ? view : NULL;
928 }
929
930@@ -925,14 +1019,17 @@ raise_window(struct compositor_view *view)
931 uint32_t screens;
932
933 view = window_view(view);
934- if (!view || !view->visible)
935+ if (!view || !view->visible) {
936 return;
937+ }
938
939 top_window = NULL;
940 insert_after = &compositor.views;
941- wl_list_for_each (other, &compositor.views, link) {
942- if (!other->visible)
943+ wl_list_for_each(other, &compositor.views, link)
944+ {
945+ if (!other->visible) {
946 continue;
947+ }
948
949 if (other->window) {
950 top_window = other;
951@@ -941,8 +1038,9 @@ raise_window(struct compositor_view *view)
952 insert_after = &other->link;
953 }
954
955- if (view == top_window)
956+ if (view == top_window) {
957 return;
958+ }
959
960 screens = view->base.screens;
961
962@@ -970,8 +1068,9 @@ view_for_window(struct swc_window *base)
963 {
964 struct window *window;
965
966- if (!base)
967+ if (!base) {
968 return NULL;
969+ }
970
971 window = (struct window *)base;
972 return window->view;
973@@ -986,8 +1085,9 @@ prev_window_view(struct compositor_view *view)
974 for (link = view->link.prev; link != &compositor.views; link = link->prev) {
975 other = wl_container_of(link, other, link);
976
977- if (other->visible && other->window)
978+ if (other->visible && other->window) {
979 return other;
980+ }
981 }
982
983 return NULL;
984@@ -1002,8 +1102,9 @@ next_window_view(struct compositor_view *view)
985 for (link = view->link.next; link != &compositor.views; link = link->next) {
986 other = wl_container_of(link, other, link);
987
988- if (other->visible && other->window)
989+ if (other->visible && other->window) {
990 return other;
991+ }
992 }
993
994 return NULL;
995@@ -1039,19 +1140,22 @@ swc_window_stack(struct swc_window *window, int32_t direction)
996 struct compositor_view *view = view_for_window(window);
997 struct compositor_view *other = NULL;
998
999- if (!view || !view->visible || direction == 0)
1000+ if (!view || !view->visible || direction == 0) {
1001 return;
1002+ }
1003
1004 if (direction < 0) {
1005 other = prev_window_view(view);
1006- if (!other)
1007+ if (!other) {
1008 return;
1009+ }
1010 wl_list_remove(&view->link);
1011 wl_list_insert(other->link.prev, &view->link);
1012 } else {
1013 other = next_window_view(view);
1014- if (!other)
1015+ if (!other) {
1016 return;
1017+ }
1018 wl_list_remove(&view->link);
1019 wl_list_insert(&other->link, &view->link);
1020 }
1021@@ -1066,8 +1170,9 @@ compositor_create_view(struct surface *surface)
1022
1023 view = malloc(sizeof(*view));
1024
1025- if (!view)
1026+ if (!view) {
1027 return NULL;
1028+ }
1029
1030 view_initialize(&view->base, &view_impl);
1031 view->surface = surface;
1032@@ -1114,30 +1219,36 @@ compositor_view(struct view *view)
1033 }
1034
1035 void
1036-compositor_view_set_parent(struct compositor_view *view, struct compositor_view *parent)
1037+compositor_view_set_parent(struct compositor_view *view,
1038+ struct compositor_view *parent)
1039 {
1040 view->parent = parent;
1041
1042- if (parent->visible)
1043+ if (parent->visible) {
1044 compositor_view_show(view);
1045- else
1046+ } else {
1047 compositor_view_hide(view);
1048+ }
1049 }
1050
1051 void
1052-compositor_view_restack(struct compositor_view *view, struct compositor_view *sibling, bool above)
1053+compositor_view_restack(struct compositor_view *view,
1054+ struct compositor_view *sibling, bool above)
1055 {
1056- if (!view || !sibling || view == sibling)
1057+ if (!view || !sibling || view == sibling) {
1058 return;
1059+ }
1060
1061 if (above) {
1062- if (view->link.next == &sibling->link)
1063+ if (view->link.next == &sibling->link) {
1064 return;
1065+ }
1066 wl_list_remove(&view->link);
1067 wl_list_insert(sibling->link.prev, &view->link);
1068 } else {
1069- if (view->link.prev == &sibling->link)
1070+ if (view->link.prev == &sibling->link) {
1071 return;
1072+ }
1073 wl_list_remove(&view->link);
1074 wl_list_insert(&sibling->link, &view->link);
1075 }
1076@@ -1151,13 +1262,15 @@ compositor_view_show(struct compositor_view *view)
1077 struct compositor_view *other;
1078 struct subsurface *subsurface;
1079
1080- if (view->visible)
1081+ if (view->visible) {
1082 return;
1083+ }
1084
1085 subsurface = view->surface ? view->surface->subsurface : NULL;
1086 if (subsurface) {
1087- if (!subsurface->added || !view->surface->state.buffer)
1088+ if (!subsurface->added || !view->surface->state.buffer) {
1089 return;
1090+ }
1091 }
1092
1093 view->visible = true;
1094@@ -1169,9 +1282,11 @@ compositor_view_show(struct compositor_view *view)
1095 damage_view(view);
1096 update(&view->base);
1097
1098- wl_list_for_each (other, &compositor.views, link) {
1099- if (other->parent == view)
1100+ wl_list_for_each(other, &compositor.views, link)
1101+ {
1102+ if (other->parent == view) {
1103 compositor_view_show(other);
1104+ }
1105 }
1106 }
1107
1108@@ -1180,8 +1295,9 @@ compositor_view_hide(struct compositor_view *view)
1109 {
1110 struct compositor_view *other;
1111
1112- if (!view->visible)
1113+ if (!view->visible) {
1114 return;
1115+ }
1116
1117 /* Update all the screens the view was on. */
1118 update(&view->base);
1119@@ -1190,17 +1306,21 @@ compositor_view_hide(struct compositor_view *view)
1120 view_set_screens(&view->base, 0);
1121 view->visible = false;
1122
1123- wl_list_for_each (other, &compositor.views, link) {
1124- if (other->parent == view)
1125+ wl_list_for_each(other, &compositor.views, link)
1126+ {
1127+ if (other->parent == view) {
1128 compositor_view_hide(other);
1129+ }
1130 }
1131 }
1132
1133 void
1134-compositor_view_set_border_width(struct compositor_view *view, uint32_t outwidth, uint32_t inwidth)
1135+compositor_view_set_border_width(struct compositor_view *view,
1136+ uint32_t outwidth, uint32_t inwidth)
1137 {
1138- if (view->border.outwidth == outwidth && view->border.inwidth == inwidth)
1139+ if (view->border.outwidth == outwidth && view->border.inwidth == inwidth) {
1140 return;
1141+ }
1142
1143 view->border.outwidth = outwidth;
1144 view->border.damaged_border1 = true;
1145@@ -1215,17 +1335,18 @@ compositor_view_set_border_width(struct compositor_view *view, uint32_t outwidth
1146 }
1147
1148 void
1149-compositor_view_set_border_color(struct compositor_view *view, uint32_t outcolor, uint32_t incolor)
1150+compositor_view_set_border_color(struct compositor_view *view,
1151+ uint32_t outcolor, uint32_t incolor)
1152 {
1153- if (view->border.outcolor == outcolor && view->border.incolor == incolor)
1154+ if (view->border.outcolor == outcolor && view->border.incolor == incolor) {
1155 return;
1156+ }
1157
1158 view->border.outcolor = outcolor;
1159 view->border.damaged_border1 = true;
1160
1161 view->border.incolor = incolor;
1162 view->border.damaged_border2 = true;
1163-
1164
1165 /* XXX: Damage above surface for transparent surfaces? */
1166
1167@@ -1245,14 +1366,17 @@ calculate_damage(void)
1168 pixman_region32_init(&surface_opaque);
1169
1170 /* Go through views top-down to calculate clipping regions. */
1171- wl_list_for_each (view, &compositor.views, link) {
1172- if (!view->visible)
1173+ wl_list_for_each(view, &compositor.views, link)
1174+ {
1175+ if (!view->visible) {
1176 continue;
1177+ }
1178
1179 geom = &view->base.geometry;
1180 pixman_region32_t view_region;
1181
1182- pixman_region32_init_rect(&view_region, geom->x, geom->y, geom->width, geom->height);
1183+ pixman_region32_init_rect(&view_region, geom->x, geom->y, geom->width,
1184+ geom->height);
1185
1186 /* Clip the surface by the opaque region covering it. */
1187 pixman_region32_copy(&view->clip, &compositor.opaque);
1188@@ -1262,10 +1386,12 @@ calculate_damage(void)
1189 pixman_region32_translate(&surface_opaque,
1190 geom->x - view->buffer_offset_x,
1191 geom->y - view->buffer_offset_y);
1192- pixman_region32_intersect(&surface_opaque, &surface_opaque, &view_region);
1193+ pixman_region32_intersect(&surface_opaque, &surface_opaque,
1194+ &view_region);
1195
1196 /* Add the surface's opaque region to the accumulated opaque region. */
1197- pixman_region32_union(&compositor.opaque, &compositor.opaque, &surface_opaque);
1198+ pixman_region32_union(&compositor.opaque, &compositor.opaque,
1199+ &surface_opaque);
1200
1201 surface_damage = &view->surface->state.damage;
1202
1203@@ -1278,27 +1404,30 @@ calculate_damage(void)
1204 geom->y - view->buffer_offset_y);
1205
1206 /* Add the surface damage to the compositor damage. */
1207- pixman_region32_union(&compositor.damage, &compositor.damage, surface_damage);
1208+ pixman_region32_union(&compositor.damage, &compositor.damage,
1209+ surface_damage);
1210 pixman_region32_clear(surface_damage);
1211 }
1212
1213- /* redraw entire thingy if either */
1214- if (view->border.damaged_border1 || view->border.damaged_border2) {
1215- pixman_region32_t border_region;
1216+ /* redraw entire thingy if either */
1217+ if (view->border.damaged_border1 || view->border.damaged_border2) {
1218+ pixman_region32_t border_region;
1219
1220- pixman_region32_init_with_extents(&border_region, &view->extents);
1221+ pixman_region32_init_with_extents(&border_region, &view->extents);
1222
1223- pixman_region32_subtract(&border_region, &border_region, &view_region);
1224+ pixman_region32_subtract(&border_region, &border_region,
1225+ &view_region);
1226
1227- pixman_region32_union(&compositor.damage, &compositor.damage, &border_region);
1228+ pixman_region32_union(&compositor.damage, &compositor.damage,
1229+ &border_region);
1230
1231- pixman_region32_fini(&border_region);
1232+ pixman_region32_fini(&border_region);
1233
1234- view->border.damaged_border1 = false;
1235- view->border.damaged_border2 = false;
1236- }
1237+ view->border.damaged_border1 = false;
1238+ view->border.damaged_border2 = false;
1239+ }
1240
1241- pixman_region32_fini(&view_region);
1242+ pixman_region32_fini(&view_region);
1243 }
1244
1245 pixman_region32_fini(&surface_opaque);
1246@@ -1311,14 +1440,17 @@ update_screen(struct screen *screen)
1247 const struct swc_rectangle *geom = &screen->base.geometry;
1248 pixman_region32_t damage, *total_damage;
1249
1250- if (!(compositor.scheduled_updates & screen_mask(screen)))
1251+ if (!(compositor.scheduled_updates & screen_mask(screen))) {
1252 return;
1253+ }
1254
1255- if (!(target = target_get(screen)))
1256+ if (!(target = target_get(screen))) {
1257 return;
1258+ }
1259
1260 pixman_region32_init(&damage);
1261- pixman_region32_intersect_rect(&damage, &compositor.damage, geom->x, geom->y, geom->width, geom->height);
1262+ pixman_region32_intersect_rect(&damage, &compositor.damage, geom->x,
1263+ geom->y, geom->width, geom->height);
1264 pixman_region32_translate(&damage, -geom->x, -geom->y);
1265 total_damage = wld_surface_damage(target->surface, &damage);
1266
1267@@ -1332,9 +1464,11 @@ update_screen(struct screen *screen)
1268 if (compositor.zoom != 1.0f) {
1269 pixman_region32_fini(&damage);
1270
1271- struct wld_buffer *zoomed = render_zoomed_to_shm(screen, compositor.zoom);
1272- if (!zoomed)
1273+ struct wld_buffer *zoomed =
1274+ render_zoomed_to_shm(screen, compositor.zoom);
1275+ if (!zoomed) {
1276 return;
1277+ }
1278
1279 pixman_region32_t full;
1280 pixman_region32_init_rect(&full, 0, 0, geom->width, geom->height);
1281@@ -1350,15 +1484,16 @@ update_screen(struct screen *screen)
1282 pixman_region32_translate(&damage, geom->x, geom->y);
1283 pixman_region32_init(&base_damage);
1284 pixman_region32_subtract(&base_damage, &damage, &compositor.opaque);
1285- renderer_repaint(target, &damage, &base_damage, &compositor.views, screen);
1286+ renderer_repaint(target, &damage, &base_damage, &compositor.views,
1287+ screen);
1288 pixman_region32_fini(&damage);
1289 pixman_region32_fini(&base_damage);
1290 }
1291
1292 switch (target_swap_buffers(target)) {
1293 case -EACCES:
1294- /* If we get an EACCES, it is because this session is being deactivated, but
1295- * we haven't yet received the deactivate signal from swc-launch. */
1296+ /* If we get an EACCES, it is because this session is being deactivated,
1297+ * but we haven't yet received the deactivate signal from swc-launch. */
1298 swc_deactivate();
1299 break;
1300 case 0:
1301@@ -1373,16 +1508,16 @@ perform_update(void *data)
1302 struct screen *screen;
1303 uint32_t updates = compositor.scheduled_updates & ~compositor.pending_flips;
1304
1305- if (!swc.active || !updates)
1306+ if (!swc.active || !updates) {
1307 return;
1308+ }
1309
1310 DEBUG("Performing update\n");
1311
1312 compositor.updating = true;
1313 calculate_damage();
1314
1315- wl_list_for_each (screen, &swc.screens, link)
1316- update_screen(screen);
1317+ wl_list_for_each(screen, &swc.screens, link) update_screen(screen);
1318
1319 /* XXX: Should assert that all damage was covered by some output */
1320 pixman_region32_clear(&compositor.damage);
1321@@ -1391,13 +1526,15 @@ perform_update(void *data)
1322 }
1323
1324 bool
1325-handle_motion(struct pointer_handler *handler, uint32_t time, wl_fixed_t fx, wl_fixed_t fy)
1326+handle_motion(struct pointer_handler *handler, uint32_t time, wl_fixed_t fx,
1327+ wl_fixed_t fy)
1328 {
1329 int32_t x = wl_fixed_to_int(fx), y = wl_fixed_to_int(fy);
1330
1331 /* If buttons are pressed, don't change pointer focus. */
1332- if (swc.seat->pointer->buttons.size > 0)
1333+ if (swc.seat->pointer->buttons.size > 0) {
1334 return false;
1335+ }
1336
1337 struct compositor_view *view = view_at(x, y);
1338
1339@@ -1407,14 +1544,16 @@ handle_motion(struct pointer_handler *handler, uint32_t time, wl_fixed_t fx, wl_
1340 }
1341
1342 static bool
1343-handle_button(struct pointer_handler *handler, uint32_t time, struct button *button, uint32_t state)
1344+handle_button(struct pointer_handler *handler, uint32_t time,
1345+ struct button *button, uint32_t state)
1346 {
1347 (void)handler;
1348 (void)time;
1349 (void)button;
1350
1351- if (state != WL_POINTER_BUTTON_STATE_PRESSED)
1352+ if (state != WL_POINTER_BUTTON_STATE_PRESSED) {
1353 return false;
1354+ }
1355
1356 int32_t x = wl_fixed_to_int(swc.seat->pointer->x);
1357 int32_t y = wl_fixed_to_int(swc.seat->pointer->y);
1358@@ -1429,8 +1568,9 @@ handle_button(struct pointer_handler *handler, uint32_t time, struct button *but
1359 static void
1360 handle_terminate(void *data, uint32_t time, uint32_t value, uint32_t state)
1361 {
1362- if (state == WL_KEYBOARD_KEY_STATE_PRESSED)
1363+ if (state == WL_KEYBOARD_KEY_STATE_PRESSED) {
1364 wl_display_terminate(swc.display);
1365+ }
1366 }
1367
1368 static void
1369@@ -1438,8 +1578,9 @@ handle_switch_vt(void *data, uint32_t time, uint32_t value, uint32_t state)
1370 {
1371 uint8_t vt = value - XKB_KEY_XF86Switch_VT_1 + 1;
1372
1373- if (state == WL_KEYBOARD_KEY_STATE_PRESSED)
1374+ if (state == WL_KEYBOARD_KEY_STATE_PRESSED) {
1375 launch_activate_vt(vt);
1376+ }
1377 }
1378
1379 static void
1380@@ -1458,7 +1599,8 @@ handle_swc_event(struct wl_listener *listener, void *data)
1381 }
1382
1383 static void
1384-create_surface(struct wl_client *client, struct wl_resource *resource, uint32_t id)
1385+create_surface(struct wl_client *client, struct wl_resource *resource,
1386+ uint32_t id)
1387 {
1388 struct surface *surface;
1389
1390@@ -1474,23 +1616,27 @@ create_surface(struct wl_client *client, struct wl_resource *resource, uint32_t
1391 }
1392
1393 static void
1394-create_region(struct wl_client *client, struct wl_resource *resource, uint32_t id)
1395+create_region(struct wl_client *client, struct wl_resource *resource,
1396+ uint32_t id)
1397 {
1398- if (!region_new(client, wl_resource_get_version(resource), id))
1399+ if (!region_new(client, wl_resource_get_version(resource), id)) {
1400 wl_resource_post_no_memory(resource);
1401+ }
1402 }
1403
1404 static const struct wl_compositor_interface compositor_impl = {
1405- .create_surface = create_surface,
1406- .create_region = create_region,
1407+ .create_surface = create_surface,
1408+ .create_region = create_region,
1409 };
1410
1411 static void
1412-bind_compositor(struct wl_client *client, void *data, uint32_t version, uint32_t id)
1413+bind_compositor(struct wl_client *client, void *data, uint32_t version,
1414+ uint32_t id)
1415 {
1416 struct wl_resource *resource;
1417
1418- resource = wl_resource_create(client, &wl_compositor_interface, version, id);
1419+ resource =
1420+ wl_resource_create(client, &wl_compositor_interface, version, id);
1421 if (!resource) {
1422 wl_client_post_no_memory(client);
1423 return;
1424@@ -1504,10 +1650,12 @@ compositor_initialize(void)
1425 struct screen *screen;
1426 uint32_t keysym;
1427
1428- compositor.global = wl_global_create(swc.display, &wl_compositor_interface, 4, NULL, &bind_compositor);
1429+ compositor.global = wl_global_create(swc.display, &wl_compositor_interface,
1430+ 4, NULL, &bind_compositor);
1431
1432- if (!compositor.global)
1433+ if (!compositor.global) {
1434 return false;
1435+ }
1436
1437 compositor.scheduled_updates = 0;
1438 compositor.pending_flips = 0;
1439@@ -1521,15 +1669,19 @@ compositor_initialize(void)
1440 compositor.swc_listener.notify = &handle_swc_event;
1441 wl_signal_add(&swc.event_signal, &compositor.swc_listener);
1442
1443- wl_list_for_each (screen, &swc.screens, link)
1444- target_new(screen);
1445- if (swc.active)
1446+ wl_list_for_each(screen, &swc.screens, link) target_new(screen);
1447+ if (swc.active) {
1448 schedule_updates(-1);
1449+ }
1450
1451- swc_add_binding(SWC_BINDING_KEY, SWC_MOD_CTRL | SWC_MOD_ALT, XKB_KEY_BackSpace, &handle_terminate, NULL);
1452+ swc_add_binding(SWC_BINDING_KEY, SWC_MOD_CTRL | SWC_MOD_ALT,
1453+ XKB_KEY_BackSpace, &handle_terminate, NULL);
1454
1455- for (keysym = XKB_KEY_XF86Switch_VT_1; keysym <= XKB_KEY_XF86Switch_VT_12; ++keysym)
1456- swc_add_binding(SWC_BINDING_KEY, SWC_MOD_ANY, keysym, &handle_switch_vt, NULL);
1457+ for (keysym = XKB_KEY_XF86Switch_VT_1; keysym <= XKB_KEY_XF86Switch_VT_12;
1458+ ++keysym) {
1459+ swc_add_binding(SWC_BINDING_KEY, SWC_MOD_ANY, keysym, &handle_switch_vt,
1460+ NULL);
1461+ }
1462
1463 compositor.initialized = true;
1464
1465@@ -1541,8 +1693,9 @@ compositor_finalize(void)
1466 {
1467 compositor.initialized = false;
1468
1469- if (compositor.zoom_buffer)
1470+ if (compositor.zoom_buffer) {
1471 wld_buffer_unreference(compositor.zoom_buffer);
1472+ }
1473 pixman_region32_fini(&compositor.damage);
1474 pixman_region32_fini(&compositor.opaque);
1475 wl_global_destroy(compositor.global);
1476@@ -1552,8 +1705,9 @@ struct wld_buffer *
1477 compositor_get_buffer(struct screen *screen)
1478 {
1479 struct target *target = target_get(screen);
1480- if (!target)
1481+ if (!target) {
1482 return NULL;
1483+ }
1484 return target->current_buffer;
1485 }
1486
1487@@ -1572,8 +1726,9 @@ compositor_render_to_shm(struct screen *screen)
1488 /* create shm buf */
1489 buffer = wld_create_buffer(swc.shm->context, width, height,
1490 WLD_FORMAT_ARGB8888, WLD_FLAG_MAP);
1491- if (!buffer)
1492+ if (!buffer) {
1493 return NULL;
1494+ }
1495
1496 caps = wld_capabilities(swc.shm->renderer, buffer);
1497 if (!(caps & WLD_CAPABILITY_WRITE) ||
1498@@ -1584,52 +1739,63 @@ compositor_render_to_shm(struct screen *screen)
1499
1500 /* set reigon */
1501 pixman_region32_init_rect(®ion, 0, 0, width, height);
1502- pixman_region32_init_rect(&damage, screen->base.geometry.x, screen->base.geometry.y, width, height);
1503+ pixman_region32_init_rect(&damage, screen->base.geometry.x,
1504+ screen->base.geometry.y, width, height);
1505
1506 /* background */
1507 background = swc_wallpaper_buffer_for_screen(screen);
1508- if (background)
1509+ if (background) {
1510 wld_copy_region(swc.shm->renderer, background, 0, 0, ®ion);
1511- else
1512+ } else {
1513 wld_fill_region(swc.shm->renderer, bgcolor, ®ion);
1514+ }
1515
1516- wl_list_for_each_reverse(view, &compositor.views, link) {
1517+ wl_list_for_each_reverse(view, &compositor.views, link)
1518+ {
1519 struct wld_buffer *src = view->buffer;
1520
1521- if (!view->visible)
1522+ if (!view->visible) {
1523 continue;
1524+ }
1525
1526- if (src && !(wld_capabilities(swc.shm->renderer, src) & WLD_CAPABILITY_READ))
1527+ if (src &&
1528+ !(wld_capabilities(swc.shm->renderer, src) & WLD_CAPABILITY_READ)) {
1529 src = view->base.buffer;
1530+ }
1531
1532- if (src && (wld_capabilities(swc.shm->renderer, src) & WLD_CAPABILITY_READ)) {
1533+ if (src &&
1534+ (wld_capabilities(swc.shm->renderer, src) & WLD_CAPABILITY_READ)) {
1535 int32_t x = view->base.geometry.x - screen->base.geometry.x;
1536 int32_t y = view->base.geometry.y - screen->base.geometry.y;
1537
1538- wld_copy_rectangle(swc.shm->renderer, src,
1539- x, y, 0, 0,
1540- view->base.geometry.width, view->base.geometry.height);
1541+ wld_copy_rectangle(swc.shm->renderer, src, x, y, 0, 0,
1542+ view->base.geometry.width,
1543+ view->base.geometry.height);
1544 }
1545
1546- if ((view->border.outwidth > 0 || view->border.inwidth > 0) && view->base.buffer) {
1547+ if ((view->border.outwidth > 0 || view->border.inwidth > 0) &&
1548+ view->base.buffer) {
1549 pixman_region32_t view_region, view_damage, border_damage;
1550 const struct swc_rectangle *geom = &view->base.geometry;
1551 const struct swc_rectangle *target_geom = &screen->base.geometry;
1552
1553- pixman_region32_init_rect(&view_region, geom->x, geom->y, geom->width, geom->height);
1554+ pixman_region32_init_rect(&view_region, geom->x, geom->y,
1555+ geom->width, geom->height);
1556 pixman_region32_init_with_extents(&view_damage, &view->extents);
1557 pixman_region32_init(&border_damage);
1558
1559 pixman_region32_intersect(&view_damage, &view_damage, &damage);
1560 pixman_region32_subtract(&view_damage, &view_damage, &view->clip);
1561- pixman_region32_subtract(&border_damage, &view_damage, &view_region);
1562+ pixman_region32_subtract(&border_damage, &view_damage,
1563+ &view_region);
1564
1565 pixman_region32_t in_rect;
1566 pixman_region32_init_rect(&in_rect,
1567 geom->x - view->border.inwidth,
1568 geom->y - view->border.inwidth,
1569 geom->width + (2 * view->border.inwidth),
1570- geom->height + (2 * view->border.inwidth));
1571+ geom->height +
1572+ (2 * view->border.inwidth));
1573
1574 pixman_region32_t out_border;
1575 pixman_region32_init(&out_border);
1576@@ -1640,14 +1806,20 @@ compositor_render_to_shm(struct screen *screen)
1577 pixman_region32_subtract(&in_border, &in_rect, &view_region);
1578 pixman_region32_intersect(&in_border, &in_border, &border_damage);
1579
1580- if (view->border.outwidth > 0 && pixman_region32_not_empty(&out_border)) {
1581- pixman_region32_translate(&out_border, -target_geom->x, -target_geom->y);
1582- wld_fill_region(swc.shm->renderer, view->border.outcolor, &out_border);
1583+ if (view->border.outwidth > 0 &&
1584+ pixman_region32_not_empty(&out_border)) {
1585+ pixman_region32_translate(&out_border, -target_geom->x,
1586+ -target_geom->y);
1587+ wld_fill_region(swc.shm->renderer, view->border.outcolor,
1588+ &out_border);
1589 }
1590
1591- if (view->border.inwidth > 0 && pixman_region32_not_empty(&in_border)) {
1592- pixman_region32_translate(&in_border, -target_geom->x, -target_geom->y);
1593- wld_fill_region(swc.shm->renderer, view->border.incolor, &in_border);
1594+ if (view->border.inwidth > 0 &&
1595+ pixman_region32_not_empty(&in_border)) {
1596+ pixman_region32_translate(&in_border, -target_geom->x,
1597+ -target_geom->y);
1598+ wld_fill_region(swc.shm->renderer, view->border.incolor,
1599+ &in_border);
1600 }
1601
1602 pixman_region32_fini(&border_damage);
+39,
-21
1@@ -26,8 +26,8 @@
2
3 #include "view.h"
4
5-#include <stdbool.h>
6 #include <pixman.h>
7+#include <stdbool.h>
8 #include <wayland-server.h>
9
10 struct screen;
11@@ -46,9 +46,12 @@ struct swc_compositor {
12 } signal;
13 };
14
15-bool compositor_initialize(void);
16-void compositor_finalize(void);
17-void compositor_damage_all(void);
18+bool
19+compositor_initialize(void);
20+void
21+compositor_finalize(void);
22+void
23+compositor_damage_all(void);
24
25 struct compositor_view {
26 struct view base;
27@@ -74,11 +77,11 @@ struct compositor_view {
28 uint32_t outcolor;
29
30 bool damaged_border1;
31-
32+
33 /* sir, a second border has hit the compositor! */
34 uint32_t inwidth;
35 uint32_t incolor;
36-
37+
38 bool damaged_border2;
39 } border;
40
41@@ -86,34 +89,49 @@ struct compositor_view {
42 struct wl_signal destroy_signal;
43 };
44
45-struct compositor_view *compositor_create_view(struct surface *surface);
46+struct compositor_view *
47+compositor_create_view(struct surface *surface);
48
49-void compositor_view_destroy(struct compositor_view *view);
50+void
51+compositor_view_destroy(struct compositor_view *view);
52
53 /**
54 * Returns view as a compositor_view, or NULL if view is not a compositor_view.
55 */
56-struct compositor_view *compositor_view(struct view *view);
57-
58-void compositor_view_set_parent(struct compositor_view *view, struct compositor_view *parent);
59-void compositor_view_restack(struct compositor_view *view, struct compositor_view *sibling, bool above);
60-
61-void compositor_view_show(struct compositor_view *view);
62-void compositor_view_hide(struct compositor_view *view);
63-
64-void compositor_view_set_border_color(struct compositor_view *view, uint32_t outcolor, uint32_t incolor);
65-void compositor_view_set_border_width(struct compositor_view *view, uint32_t outwidth, uint32_t inwidth);
66+struct compositor_view *
67+compositor_view(struct view *view);
68+
69+void
70+compositor_view_set_parent(struct compositor_view *view,
71+ struct compositor_view *parent);
72+void
73+compositor_view_restack(struct compositor_view *view,
74+ struct compositor_view *sibling, bool above);
75+
76+void
77+compositor_view_show(struct compositor_view *view);
78+void
79+compositor_view_hide(struct compositor_view *view);
80+
81+void
82+compositor_view_set_border_color(struct compositor_view *view,
83+ uint32_t outcolor, uint32_t incolor);
84+void
85+compositor_view_set_border_width(struct compositor_view *view,
86+ uint32_t outwidth, uint32_t inwidth);
87
88 /**
89 * get the current composited buffer for a screen for screenshotss.
90 * returns null if no buffer
91 */
92-struct wld_buffer *compositor_get_buffer(struct screen *screen);
93+struct wld_buffer *
94+compositor_get_buffer(struct screen *screen);
95
96 /**
97- * render the compositor scene into a shm buffer
98+ * render the compositor scene into a shm buffer
99 * caller must free with wld_buffer_unreference()
100 */
101-struct wld_buffer *compositor_render_to_shm(struct screen *screen);
102+struct wld_buffer *
103+compositor_render_to_shm(struct screen *screen);
104
105 #endif
+43,
-29
1@@ -36,48 +36,55 @@ struct data {
2 };
3
4 static void
5-offer_accept(struct wl_client *client, struct wl_resource *offer, uint32_t serial, const char *mime_type)
6+offer_accept(struct wl_client *client, struct wl_resource *offer,
7+ uint32_t serial, const char *mime_type)
8 {
9 struct data *data = wl_resource_get_user_data(offer);
10
11 /* Protect against expired data_offers being used. */
12- if (!data)
13+ if (!data) {
14 return;
15+ }
16
17 wl_data_source_send_target(data->source, mime_type);
18 }
19
20 static void
21-offer_receive(struct wl_client *client, struct wl_resource *offer, const char *mime_type, int fd)
22+offer_receive(struct wl_client *client, struct wl_resource *offer,
23+ const char *mime_type, int fd)
24 {
25 struct data *data = wl_resource_get_user_data(offer);
26
27 /* Protect against expired data_offers being used. */
28- if (!data)
29+ if (!data) {
30 return;
31+ }
32
33 wl_data_source_send_send(data->source, mime_type, fd);
34 close(fd);
35 }
36
37 static const struct wl_data_offer_interface data_offer_impl = {
38- .accept = offer_accept,
39- .receive = offer_receive,
40- .destroy = destroy_resource,
41+ .accept = offer_accept,
42+ .receive = offer_receive,
43+ .destroy = destroy_resource,
44 };
45
46 static void
47-source_offer(struct wl_client *client, struct wl_resource *source, const char *mime_type)
48+source_offer(struct wl_client *client, struct wl_resource *source,
49+ const char *mime_type)
50 {
51 struct data *data = wl_resource_get_user_data(source);
52 char *s, **dst;
53
54 s = strdup(mime_type);
55- if (!s)
56+ if (!s) {
57 goto error0;
58+ }
59 dst = wl_array_add(&data->mime_types, sizeof(*dst));
60- if (!dst)
61+ if (!dst) {
62 goto error1;
63+ }
64 *dst = s;
65 return;
66
67@@ -88,8 +95,8 @@ error0:
68 }
69
70 static const struct wl_data_source_interface data_source_impl = {
71- .offer = source_offer,
72- .destroy = destroy_resource,
73+ .offer = source_offer,
74+ .destroy = destroy_resource,
75 };
76
77 static void
78@@ -99,19 +106,19 @@ data_destroy(struct wl_resource *source)
79 struct wl_resource *offer;
80 char **mime_type;
81
82- wl_array_for_each (mime_type, &data->mime_types)
83- free(*mime_type);
84+ wl_array_for_each(mime_type, &data->mime_types) free(*mime_type);
85 wl_array_release(&data->mime_types);
86
87 /* After this data_source is destroyed, each of the data_offer objects
88- * associated with the data_source has a pointer to a free'd struct. We can't
89- * destroy the resources because this results in a segfault on the client when
90- * it correctly tries to call data_source.destroy. However, a misbehaving
91- * client could still attempt to call accept or receive on the data_offer,
92- * which would crash the server.
93+ * associated with the data_source has a pointer to a free'd struct. We
94+ * can't destroy the resources because this results in a segfault on the
95+ * client when it correctly tries to call data_source.destroy. However, a
96+ * misbehaving client could still attempt to call accept or receive on the
97+ * data_offer, which would crash the server.
98 *
99 * So, we clear the user data on each of the offers to protect us. */
100- wl_resource_for_each (offer, &data->offers) {
101+ wl_resource_for_each(offer, &data->offers)
102+ {
103 wl_resource_set_user_data(offer, NULL);
104 wl_resource_set_destructor(offer, NULL);
105 }
106@@ -125,15 +132,19 @@ data_source_new(struct wl_client *client, uint32_t version, uint32_t id)
107 struct data *data;
108
109 data = malloc(sizeof(*data));
110- if (!data)
111+ if (!data) {
112 goto error0;
113+ }
114 wl_array_init(&data->mime_types);
115 wl_list_init(&data->offers);
116
117- data->source = wl_resource_create(client, &wl_data_source_interface, version, id);
118- if (!data->source)
119+ data->source =
120+ wl_resource_create(client, &wl_data_source_interface, version, id);
121+ if (!data->source) {
122 goto error1;
123- wl_resource_set_implementation(data->source, &data_source_impl, data, &data_destroy);
124+ }
125+ wl_resource_set_implementation(data->source, &data_source_impl, data,
126+ &data_destroy);
127
128 return data->source;
129
130@@ -144,15 +155,18 @@ error0:
131 }
132
133 struct wl_resource *
134-data_offer_new(struct wl_client *client, struct wl_resource *source, uint32_t version)
135+data_offer_new(struct wl_client *client, struct wl_resource *source,
136+ uint32_t version)
137 {
138 struct data *data = wl_resource_get_user_data(source);
139 struct wl_resource *offer;
140
141 offer = wl_resource_create(client, &wl_data_offer_interface, version, 0);
142- if (!offer)
143+ if (!offer) {
144 return NULL;
145- wl_resource_set_implementation(offer, &data_offer_impl, data, &remove_resource);
146+ }
147+ wl_resource_set_implementation(offer, &data_offer_impl, data,
148+ &remove_resource);
149 wl_list_insert(&data->offers, wl_resource_get_link(offer));
150
151 return offer;
152@@ -164,6 +178,6 @@ data_send_mime_types(struct wl_resource *source, struct wl_resource *offer)
153 struct data *data = wl_resource_get_user_data(source);
154 char **mime_type;
155
156- wl_array_for_each (mime_type, &data->mime_types)
157- wl_data_offer_send_offer(offer, *mime_type);
158+ wl_array_for_each(mime_type, &data->mime_types)
159+ wl_data_offer_send_offer(offer, *mime_type);
160 }
+7,
-3
1@@ -28,8 +28,12 @@
2
3 struct wl_client;
4
5-struct wl_resource *data_source_new(struct wl_client *client, uint32_t version, uint32_t id);
6-struct wl_resource *data_offer_new(struct wl_client *client, struct wl_resource *source, uint32_t version);
7-void data_send_mime_types(struct wl_resource *source, struct wl_resource *offer);
8+struct wl_resource *
9+data_source_new(struct wl_client *client, uint32_t version, uint32_t id);
10+struct wl_resource *
11+data_offer_new(struct wl_client *client, struct wl_resource *source,
12+ uint32_t version);
13+void
14+data_send_mime_types(struct wl_resource *source, struct wl_resource *offer);
15
16 #endif
+41,
-23
1@@ -28,20 +28,23 @@
2
3 static void
4 start_drag(struct wl_client *client, struct wl_resource *resource,
5- struct wl_resource *source_resource, struct wl_resource *origin_resource,
6+ struct wl_resource *source_resource,
7+ struct wl_resource *origin_resource,
8 struct wl_resource *icon_resource, uint32_t serial)
9 {
10 /* XXX: Implement */
11 }
12
13 static void
14-set_selection(struct wl_client *client, struct wl_resource *resource, struct wl_resource *data_source, uint32_t serial)
15+set_selection(struct wl_client *client, struct wl_resource *resource,
16+ struct wl_resource *data_source, uint32_t serial)
17 {
18 struct data_device *data_device = wl_resource_get_user_data(resource);
19
20 /* Check if this data source is already the current selection. */
21- if (data_source == data_device->selection)
22+ if (data_source == data_device->selection) {
23 return;
24+ }
25
26 if (data_device->selection) {
27 wl_data_source_send_cancelled(data_device->selection);
28@@ -50,25 +53,30 @@ set_selection(struct wl_client *client, struct wl_resource *resource, struct wl_
29
30 data_device->selection = data_source;
31
32- if (data_source)
33- wl_resource_add_destroy_listener(data_source, &data_device->selection_destroy_listener);
34+ if (data_source) {
35+ wl_resource_add_destroy_listener(
36+ data_source, &data_device->selection_destroy_listener);
37+ }
38
39- send_event(&data_device->event_signal, DATA_DEVICE_EVENT_SELECTION_CHANGED, NULL);
40+ send_event(&data_device->event_signal, DATA_DEVICE_EVENT_SELECTION_CHANGED,
41+ NULL);
42 }
43
44 static const struct wl_data_device_interface data_device_impl = {
45- .start_drag = start_drag,
46- .set_selection = set_selection,
47- .release = destroy_resource,
48+ .start_drag = start_drag,
49+ .set_selection = set_selection,
50+ .release = destroy_resource,
51 };
52
53 static void
54 handle_selection_destroy(struct wl_listener *listener, void *data)
55 {
56- struct data_device *data_device = wl_container_of(listener, data_device, selection_destroy_listener);
57+ struct data_device *data_device =
58+ wl_container_of(listener, data_device, selection_destroy_listener);
59
60 data_device->selection = NULL;
61- send_event(&data_device->event_signal, DATA_DEVICE_EVENT_SELECTION_CHANGED, NULL);
62+ send_event(&data_device->event_signal, DATA_DEVICE_EVENT_SELECTION_CHANGED,
63+ NULL);
64 }
65
66 struct data_device *
67@@ -77,8 +85,9 @@ data_device_create(void)
68 struct data_device *data_device;
69
70 data_device = malloc(sizeof(*data_device));
71- if (!data_device)
72+ if (!data_device) {
73 return NULL;
74+ }
75 data_device->selection = NULL;
76 data_device->selection_destroy_listener.notify = &handle_selection_destroy;
77 wl_signal_init(&data_device->event_signal);
78@@ -92,33 +101,39 @@ data_device_destroy(struct data_device *data_device)
79 {
80 struct wl_resource *resource, *tmp;
81
82- wl_list_for_each_safe (resource, tmp, &data_device->resources, link)
83- wl_resource_destroy(resource);
84+ wl_list_for_each_safe(resource, tmp, &data_device->resources, link)
85+ wl_resource_destroy(resource);
86 free(data_device);
87 }
88
89 struct wl_resource *
90-data_device_bind(struct data_device *data_device, struct wl_client *client, uint32_t version, uint32_t id)
91+data_device_bind(struct data_device *data_device, struct wl_client *client,
92+ uint32_t version, uint32_t id)
93 {
94 struct wl_resource *resource;
95
96- resource = wl_resource_create(client, &wl_data_device_interface, version, id);
97- if (!resource)
98+ resource =
99+ wl_resource_create(client, &wl_data_device_interface, version, id);
100+ if (!resource) {
101 return NULL;
102- wl_resource_set_implementation(resource, &data_device_impl, data_device, &remove_resource);
103+ }
104+ wl_resource_set_implementation(resource, &data_device_impl, data_device,
105+ &remove_resource);
106 wl_list_insert(&data_device->resources, &resource->link);
107
108 return resource;
109 }
110
111 static struct wl_resource *
112-new_offer(struct wl_resource *resource, struct wl_client *client, struct wl_resource *source)
113+new_offer(struct wl_resource *resource, struct wl_client *client,
114+ struct wl_resource *source)
115 {
116 struct wl_resource *offer;
117
118 offer = data_offer_new(client, source, wl_resource_get_version(resource));
119- if (!offer)
120+ if (!offer) {
121 return NULL;
122+ }
123 wl_data_device_send_data_offer(resource, offer);
124 data_send_mime_types(source, offer);
125
126@@ -126,7 +141,8 @@ new_offer(struct wl_resource *resource, struct wl_client *client, struct wl_reso
127 }
128
129 void
130-data_device_offer_selection(struct data_device *data_device, struct wl_client *client)
131+data_device_offer_selection(struct data_device *data_device,
132+ struct wl_client *client)
133 {
134 struct wl_resource *resource;
135 struct wl_resource *offer = NULL;
136@@ -135,12 +151,14 @@ data_device_offer_selection(struct data_device *data_device, struct wl_client *c
137 resource = wl_resource_find_for_client(&data_device->resources, client);
138
139 /* If the client does not have a data device, there is nothing to do. */
140- if (!resource)
141+ if (!resource) {
142 return;
143+ }
144
145 /* If we have a selection, create a new offer for the client. */
146- if (data_device->selection)
147+ if (data_device->selection) {
148 offer = new_offer(resource, client, data_device->selection);
149+ }
150
151 wl_data_device_send_selection(resource, offer);
152 }
+11,
-7
1@@ -27,9 +27,7 @@
2 #include <stdbool.h>
3 #include <wayland-server.h>
4
5-enum {
6- DATA_DEVICE_EVENT_SELECTION_CHANGED
7-};
8+enum { DATA_DEVICE_EVENT_SELECTION_CHANGED };
9
10 struct data_device {
11 /* The data source corresponding to the current selection. */
12@@ -40,9 +38,15 @@ struct data_device {
13 struct wl_list resources;
14 };
15
16-struct data_device *data_device_create(void);
17-void data_device_destroy(struct data_device *data_device);
18-struct wl_resource *data_device_bind(struct data_device *data_device, struct wl_client *client, uint32_t version, uint32_t id);
19-void data_device_offer_selection(struct data_device *data_device, struct wl_client *client);
20+struct data_device *
21+data_device_create(void);
22+void
23+data_device_destroy(struct data_device *data_device);
24+struct wl_resource *
25+data_device_bind(struct data_device *data_device, struct wl_client *client,
26+ uint32_t version, uint32_t id);
27+void
28+data_device_offer_selection(struct data_device *data_device,
29+ struct wl_client *client);
30
31 #endif
+21,
-11
1@@ -28,41 +28,51 @@
2 #include "seat.h"
3
4 static void
5-create_data_source(struct wl_client *client, struct wl_resource *resource, uint32_t id)
6+create_data_source(struct wl_client *client, struct wl_resource *resource,
7+ uint32_t id)
8 {
9- if (!data_source_new(client, wl_resource_get_version(resource), id))
10+ if (!data_source_new(client, wl_resource_get_version(resource), id)) {
11 wl_resource_post_no_memory(resource);
12+ }
13 }
14
15 static void
16-get_data_device(struct wl_client *client, struct wl_resource *resource, uint32_t id, struct wl_resource *seat_resource)
17+get_data_device(struct wl_client *client, struct wl_resource *resource,
18+ uint32_t id, struct wl_resource *seat_resource)
19 {
20 struct swc_seat *seat = wl_resource_get_user_data(seat_resource);
21
22- if (!data_device_bind(seat->data_device, client, wl_resource_get_version(resource), id))
23+ if (!data_device_bind(seat->data_device, client,
24+ wl_resource_get_version(resource), id)) {
25 wl_resource_post_no_memory(resource);
26+ }
27 }
28
29-static const struct wl_data_device_manager_interface data_device_manager_impl = {
30- .create_data_source = create_data_source,
31- .get_data_device = get_data_device,
32+static const struct wl_data_device_manager_interface data_device_manager_impl =
33+ {
34+ .create_data_source = create_data_source,
35+ .get_data_device = get_data_device,
36 };
37
38 static void
39-bind_data_device_manager(struct wl_client *client, void *data, uint32_t version, uint32_t id)
40+bind_data_device_manager(struct wl_client *client, void *data, uint32_t version,
41+ uint32_t id)
42 {
43 struct wl_resource *resource;
44
45- resource = wl_resource_create(client, &wl_data_device_manager_interface, version, id);
46+ resource = wl_resource_create(client, &wl_data_device_manager_interface,
47+ version, id);
48 if (!resource) {
49 wl_client_post_no_memory(client);
50 return;
51 }
52- wl_resource_set_implementation(resource, &data_device_manager_impl, NULL, NULL);
53+ wl_resource_set_implementation(resource, &data_device_manager_impl, NULL,
54+ NULL);
55 }
56
57 struct wl_global *
58 data_device_manager_create(struct wl_display *display)
59 {
60- return wl_global_create(display, &wl_data_device_manager_interface, 2, NULL, &bind_data_device_manager);
61+ return wl_global_create(display, &wl_data_device_manager_interface, 2, NULL,
62+ &bind_data_device_manager);
63 }
+2,
-1
1@@ -26,6 +26,7 @@
2
3 struct wl_display;
4
5-struct wl_global *data_device_manager_create(struct wl_display *display);
6+struct wl_global *
7+data_device_manager_create(struct wl_display *display);
8
9 #endif
+74,
-39
1@@ -27,13 +27,13 @@
2 #include "util.h"
3 #include "wayland_buffer.h"
4
5+#include "linux-dmabuf-unstable-v1-server-protocol.h"
6+#include <drm_fourcc.h>
7 #include <stdint.h>
8 #include <stdlib.h>
9-#include <drm_fourcc.h>
10 #include <unistd.h>
11-#include <wld/wld.h>
12 #include <wld/drm.h>
13-#include "linux-dmabuf-unstable-v1-server-protocol.h"
14+#include <wld/wld.h>
15
16 struct params {
17 struct wl_resource *resource;
18@@ -45,20 +45,28 @@ struct params {
19 };
20
21 static void
22-add(struct wl_client *client, struct wl_resource *resource, int32_t fd, uint32_t i, uint32_t offset, uint32_t stride, uint32_t modifier_hi, uint32_t modifier_lo)
23+add(struct wl_client *client, struct wl_resource *resource, int32_t fd,
24+ uint32_t i, uint32_t offset, uint32_t stride, uint32_t modifier_hi,
25+ uint32_t modifier_lo)
26 {
27 struct params *params = wl_resource_get_user_data(resource);
28
29 if (params->created) {
30- wl_resource_post_error(resource, ZWP_LINUX_BUFFER_PARAMS_V1_ERROR_ALREADY_USED, "buffer already created");
31+ wl_resource_post_error(resource,
32+ ZWP_LINUX_BUFFER_PARAMS_V1_ERROR_ALREADY_USED,
33+ "buffer already created");
34 return;
35 }
36 if (i > ARRAY_LENGTH(params->fd)) {
37- wl_resource_post_error(resource, ZWP_LINUX_BUFFER_PARAMS_V1_ERROR_PLANE_IDX, "plane index too large");
38+ wl_resource_post_error(resource,
39+ ZWP_LINUX_BUFFER_PARAMS_V1_ERROR_PLANE_IDX,
40+ "plane index too large");
41 return;
42 }
43 if (params->fd[i] != -1) {
44- wl_resource_post_error(resource, ZWP_LINUX_BUFFER_PARAMS_V1_ERROR_PLANE_SET, "buffer plane already set");
45+ wl_resource_post_error(resource,
46+ ZWP_LINUX_BUFFER_PARAMS_V1_ERROR_PLANE_SET,
47+ "buffer plane already set");
48 return;
49 }
50 params->fd[i] = fd;
51@@ -68,8 +76,9 @@ add(struct wl_client *client, struct wl_resource *resource, int32_t fd, uint32_t
52 }
53
54 static void
55-create_immed(struct wl_client *client, struct wl_resource *resource, uint32_t id,
56- int32_t width, int32_t height, uint32_t format, uint32_t flags)
57+create_immed(struct wl_client *client, struct wl_resource *resource,
58+ uint32_t id, int32_t width, int32_t height, uint32_t format,
59+ uint32_t flags)
60 {
61 struct params *params = wl_resource_get_user_data(resource);
62 struct wld_buffer *buffer;
63@@ -78,7 +87,9 @@ create_immed(struct wl_client *client, struct wl_resource *resource, uint32_t id
64 int num_planes, i;
65
66 if (params->created) {
67- wl_resource_post_error(resource, ZWP_LINUX_BUFFER_PARAMS_V1_ERROR_ALREADY_USED, "buffer already created");
68+ wl_resource_post_error(resource,
69+ ZWP_LINUX_BUFFER_PARAMS_V1_ERROR_ALREADY_USED,
70+ "buffer already created");
71 return;
72 }
73 params->created = true;
74@@ -88,49 +99,62 @@ create_immed(struct wl_client *client, struct wl_resource *resource, uint32_t id
75 num_planes = 1;
76 break;
77 default:
78- wl_resource_post_error(resource, ZWP_LINUX_BUFFER_PARAMS_V1_ERROR_INVALID_FORMAT, "unsupported format %#" PRIx32, format);
79+ wl_resource_post_error(resource,
80+ ZWP_LINUX_BUFFER_PARAMS_V1_ERROR_INVALID_FORMAT,
81+ "unsupported format %#" PRIx32, format);
82 return;
83 }
84 for (i = 0; i < num_planes; ++i) {
85- if (params->fd[i] == -1)
86- wl_resource_post_error(resource, ZWP_LINUX_BUFFER_PARAMS_V1_ERROR_INCOMPLETE, "missing plane %d", i);
87+ if (params->fd[i] == -1) {
88+ wl_resource_post_error(resource,
89+ ZWP_LINUX_BUFFER_PARAMS_V1_ERROR_INCOMPLETE,
90+ "missing plane %d", i);
91+ }
92 }
93 for (; i < ARRAY_LENGTH(params->fd); ++i) {
94- if (params->fd[i] != -1)
95- wl_resource_post_error(resource, ZWP_LINUX_BUFFER_PARAMS_V1_ERROR_INCOMPLETE, "too many planes");
96+ if (params->fd[i] != -1) {
97+ wl_resource_post_error(resource,
98+ ZWP_LINUX_BUFFER_PARAMS_V1_ERROR_INCOMPLETE,
99+ "too many planes");
100+ }
101 }
102 object.i = params->fd[0];
103- buffer = wld_import_buffer(swc.drm->context, WLD_DRM_OBJECT_PRIME_FD, object, width, height, format, params->stride[0]);
104+ buffer =
105+ wld_import_buffer(swc.drm->context, WLD_DRM_OBJECT_PRIME_FD, object,
106+ width, height, format, params->stride[0]);
107 for (i = 0; i < num_planes; ++i) {
108 close(params->fd[i]);
109 params->fd[i] = -1;
110 }
111- if (!buffer)
112+ if (!buffer) {
113 zwp_linux_buffer_params_v1_send_failed(resource);
114+ }
115
116 buffer_resource = wayland_buffer_create_resource(client, 1, id, buffer);
117 if (!buffer_resource) {
118- if (buffer)
119+ if (buffer) {
120 wld_buffer_unreference(buffer);
121+ }
122 wl_resource_post_no_memory(resource);
123 return;
124 }
125- if (id == 0 && buffer)
126+ if (id == 0 && buffer) {
127 zwp_linux_buffer_params_v1_send_created(resource, buffer_resource);
128+ }
129 }
130
131 static void
132-create(struct wl_client *client, struct wl_resource *resource,
133- int32_t width, int32_t height, uint32_t format, uint32_t flags)
134+create(struct wl_client *client, struct wl_resource *resource, int32_t width,
135+ int32_t height, uint32_t format, uint32_t flags)
136 {
137 create_immed(client, resource, 0, width, height, format, flags);
138 }
139
140 static const struct zwp_linux_buffer_params_v1_interface params_impl = {
141- .destroy = destroy_resource,
142- .add = add,
143- .create = create,
144- .create_immed = create_immed,
145+ .destroy = destroy_resource,
146+ .add = add,
147+ .create = create,
148+ .create_immed = create_immed,
149 };
150
151 static void
152@@ -139,26 +163,34 @@ params_destroy(struct wl_resource *resource)
153 struct params *params = wl_resource_get_user_data(resource);
154 int i;
155
156- for (i = 0; i < ARRAY_LENGTH(params->fd); ++i)
157+ for (i = 0; i < ARRAY_LENGTH(params->fd); ++i) {
158 close(params->fd[i]);
159+ }
160 }
161
162 static void
163-create_params(struct wl_client *client, struct wl_resource *resource, uint32_t id)
164+create_params(struct wl_client *client, struct wl_resource *resource,
165+ uint32_t id)
166 {
167 struct params *params;
168 int i;
169
170 params = malloc(sizeof(*params));
171- if (!params)
172+ if (!params) {
173 goto error0;
174+ }
175 params->created = false;
176- params->resource = wl_resource_create(client, &zwp_linux_buffer_params_v1_interface, wl_resource_get_version(resource), id);
177- if (!params->resource)
178+ params->resource =
179+ wl_resource_create(client, &zwp_linux_buffer_params_v1_interface,
180+ wl_resource_get_version(resource), id);
181+ if (!params->resource) {
182 goto error1;
183- for (i = 0; i < ARRAY_LENGTH(params->fd); ++i)
184+ }
185+ for (i = 0; i < ARRAY_LENGTH(params->fd); ++i) {
186 params->fd[i] = -1;
187- wl_resource_set_implementation(params->resource, ¶ms_impl, params, params_destroy);
188+ }
189+ wl_resource_set_implementation(params->resource, ¶ms_impl, params,
190+ params_destroy);
191 return;
192
193 error1:
194@@ -168,22 +200,23 @@ error0:
195 }
196
197 static const struct zwp_linux_dmabuf_v1_interface dmabuf_impl = {
198- .destroy = destroy_resource,
199- .create_params = create_params,
200+ .destroy = destroy_resource,
201+ .create_params = create_params,
202 };
203
204 static void
205 bind_dmabuf(struct wl_client *client, void *data, uint32_t version, uint32_t id)
206 {
207 static const uint32_t formats[] = {
208- DRM_FORMAT_XRGB8888,
209- DRM_FORMAT_ARGB8888,
210+ DRM_FORMAT_XRGB8888,
211+ DRM_FORMAT_ARGB8888,
212 };
213 uint64_t modifier = DRM_FORMAT_MOD_INVALID;
214 struct wl_resource *resource;
215 size_t i;
216
217- resource = wl_resource_create(client, &zwp_linux_dmabuf_v1_interface, version, id);
218+ resource =
219+ wl_resource_create(client, &zwp_linux_dmabuf_v1_interface, version, id);
220 if (!resource) {
221 wl_client_post_no_memory(client);
222 return;
223@@ -192,7 +225,8 @@ bind_dmabuf(struct wl_client *client, void *data, uint32_t version, uint32_t id)
224 for (i = 0; i < ARRAY_LENGTH(formats); ++i) {
225 if (version >= 3) {
226 /* TODO: need a way to query DRM modifiers of wld */
227- zwp_linux_dmabuf_v1_send_modifier(resource, formats[i], modifier >> 32, modifier & 0xffffffff);
228+ zwp_linux_dmabuf_v1_send_modifier(
229+ resource, formats[i], modifier >> 32, modifier & 0xffffffff);
230 } else {
231 zwp_linux_dmabuf_v1_send_format(resource, formats[i]);
232 }
233@@ -202,5 +236,6 @@ bind_dmabuf(struct wl_client *client, void *data, uint32_t version, uint32_t id)
234 struct wl_global *
235 swc_dmabuf_create(struct wl_display *display)
236 {
237- return wl_global_create(display, &zwp_linux_dmabuf_v1_interface, 3, NULL, &bind_dmabuf);
238+ return wl_global_create(display, &zwp_linux_dmabuf_v1_interface, 3, NULL,
239+ &bind_dmabuf);
240 }
+2,
-1
1@@ -26,6 +26,7 @@
2
3 struct wl_display;
4
5-struct wl_global *swc_dmabuf_create(struct wl_display *display);
6+struct wl_global *
7+swc_dmabuf_create(struct wl_display *display);
8
9 #endif
+103,
-67
1@@ -32,21 +32,21 @@
2 #include "util.h"
3 #include "wayland_buffer.h"
4
5+#include "wayland-drm-server-protocol.h"
6 #include <dirent.h>
7+#include <drm.h>
8 #include <errno.h>
9+#include <fcntl.h>
10 #include <limits.h>
11 #include <stdio.h>
12 #include <stdlib.h>
13 #include <string.h>
14 #include <strings.h>
15-#include <fcntl.h>
16 #include <unistd.h>
17-#include <drm.h>
18-#include <xf86drm.h>
19-#include <wld/wld.h>
20-#include <wld/drm.h>
21 #include <wayland-server.h>
22-#include "wayland-drm-server-protocol.h"
23+#include <wld/drm.h>
24+#include <wld/wld.h>
25+#include <xf86drm.h>
26
27 struct swc_drm swc_drm;
28
29@@ -59,49 +59,58 @@ static struct {
30 } drm;
31
32 static void
33-authenticate(struct wl_client *client, struct wl_resource *resource, uint32_t magic)
34+authenticate(struct wl_client *client, struct wl_resource *resource,
35+ uint32_t magic)
36 {
37 wl_drm_send_authenticated(resource);
38 }
39
40 static void
41-create_buffer(struct wl_client *client, struct wl_resource *resource, uint32_t id,
42- uint32_t name, int32_t width, int32_t height, uint32_t stride, uint32_t format)
43+create_buffer(struct wl_client *client, struct wl_resource *resource,
44+ uint32_t id, uint32_t name, int32_t width, int32_t height,
45+ uint32_t stride, uint32_t format)
46 {
47- wl_resource_post_error(resource, WL_DRM_ERROR_INVALID_NAME, "GEM names are not supported, use a PRIME fd instead");
48+ wl_resource_post_error(
49+ resource, WL_DRM_ERROR_INVALID_NAME,
50+ "GEM names are not supported, use a PRIME fd instead");
51 }
52
53 static void
54-create_planar_buffer(struct wl_client *client, struct wl_resource *resource, uint32_t id,
55- uint32_t name, int32_t width, int32_t height, uint32_t format,
56- int32_t offset0, int32_t stride0,
57- int32_t offset1, int32_t stride1,
58- int32_t offset2, int32_t stride2)
59+create_planar_buffer(struct wl_client *client, struct wl_resource *resource,
60+ uint32_t id, uint32_t name, int32_t width, int32_t height,
61+ uint32_t format, int32_t offset0, int32_t stride0,
62+ int32_t offset1, int32_t stride1, int32_t offset2,
63+ int32_t stride2)
64 {
65- wl_resource_post_error(resource, WL_DRM_ERROR_INVALID_FORMAT, "planar buffers are not supported\n");
66+ wl_resource_post_error(resource, WL_DRM_ERROR_INVALID_FORMAT,
67+ "planar buffers are not supported\n");
68 }
69
70 static void
71-create_prime_buffer(struct wl_client *client, struct wl_resource *resource, uint32_t id,
72- int32_t fd, int32_t width, int32_t height, uint32_t format,
73- int32_t offset0, int32_t stride0,
74- int32_t offset1, int32_t stride1,
75- int32_t offset2, int32_t stride2)
76+create_prime_buffer(struct wl_client *client, struct wl_resource *resource,
77+ uint32_t id, int32_t fd, int32_t width, int32_t height,
78+ uint32_t format, int32_t offset0, int32_t stride0,
79+ int32_t offset1, int32_t stride1, int32_t offset2,
80+ int32_t stride2)
81 {
82 struct wld_buffer *buffer;
83 struct wl_resource *buffer_resource;
84- union wld_object object = { .i = fd };
85+ union wld_object object = {.i = fd};
86
87- buffer = wld_import_buffer(swc.drm->context, WLD_DRM_OBJECT_PRIME_FD, object, width, height, format, stride0);
88+ buffer = wld_import_buffer(swc.drm->context, WLD_DRM_OBJECT_PRIME_FD,
89+ object, width, height, format, stride0);
90 close(fd);
91
92- if (!buffer)
93+ if (!buffer) {
94 goto error0;
95+ }
96
97- buffer_resource = wayland_buffer_create_resource(client, wl_resource_get_version(resource), id, buffer);
98+ buffer_resource = wayland_buffer_create_resource(
99+ client, wl_resource_get_version(resource), id, buffer);
100
101- if (!buffer_resource)
102+ if (!buffer_resource) {
103 goto error1;
104+ }
105
106 return;
107
108@@ -112,10 +121,10 @@ error0:
109 }
110
111 static const struct wl_drm_interface drm_impl = {
112- .authenticate = authenticate,
113- .create_buffer = create_buffer,
114- .create_planar_buffer = create_planar_buffer,
115- .create_prime_buffer = create_prime_buffer,
116+ .authenticate = authenticate,
117+ .create_buffer = create_buffer,
118+ .create_planar_buffer = create_planar_buffer,
119+ .create_prime_buffer = create_prime_buffer,
120 };
121
122 static int
123@@ -136,11 +145,13 @@ find_primary_drm_device(char *path, size_t size)
124
125 num_cards = scandir("/dev/dri", &cards, &select_card, &alphasort);
126
127- if (num_cards == -1)
128+ if (num_cards == -1) {
129 return false;
130+ }
131
132 for (index = 0; index < num_cards; ++index) {
133- snprintf(path, size, "/sys/class/drm/%s/device/boot_vga", cards[index]->d_name);
134+ snprintf(path, size, "/sys/class/drm/%s/device/boot_vga",
135+ cards[index]->d_name);
136
137 if ((file = fopen(path, "r"))) {
138 ret = fscanf(file, "%hhu", &boot_vga);
139@@ -154,26 +165,30 @@ find_primary_drm_device(char *path, size_t size)
140 }
141 }
142
143- if (!card)
144+ if (!card) {
145 card = cards[index];
146- else
147+ } else {
148 free(cards[index]);
149+ }
150 }
151
152 free(cards);
153
154- if (!card)
155+ if (!card) {
156 return false;
157+ }
158
159- if (snprintf(path, size, "/dev/dri/%s", card->d_name) >= size)
160+ if (snprintf(path, size, "/dev/dri/%s", card->d_name) >= size) {
161 return false;
162+ }
163
164 free(card);
165 return true;
166 }
167
168 static bool
169-find_available_crtc(drmModeRes *resources, drmModeConnector *connector, uint32_t taken_crtcs, int *crtc_index)
170+find_available_crtc(drmModeRes *resources, drmModeConnector *connector,
171+ uint32_t taken_crtcs, int *crtc_index)
172 {
173 int i, j;
174 uint32_t possible_crtcs;
175@@ -196,12 +211,14 @@ find_available_crtc(drmModeRes *resources, drmModeConnector *connector, uint32_t
176 }
177
178 static void
179-handle_vblank(int fd, unsigned int sequence, unsigned int sec, unsigned int usec, void *data)
180+handle_vblank(int fd, unsigned int sequence, unsigned int sec,
181+ unsigned int usec, void *data)
182 {
183 }
184
185 static void
186-handle_page_flip(int fd, unsigned int sequence, unsigned int sec, unsigned int usec, unsigned int crtc_id, void *data)
187+handle_page_flip(int fd, unsigned int sequence, unsigned int sec,
188+ unsigned int usec, unsigned int crtc_id, void *data)
189 {
190 struct drm_handler *handler = data;
191
192@@ -209,9 +226,9 @@ handle_page_flip(int fd, unsigned int sequence, unsigned int sec, unsigned int u
193 }
194
195 static drmEventContext event_context = {
196- .version = DRM_EVENT_CONTEXT_VERSION,
197- .vblank_handler = handle_vblank,
198- .page_flip_handler2 = handle_page_flip,
199+ .version = DRM_EVENT_CONTEXT_VERSION,
200+ .vblank_handler = handle_vblank,
201+ .page_flip_handler2 = handle_page_flip,
202 };
203
204 static int
205@@ -233,8 +250,9 @@ bind_drm(struct wl_client *client, void *data, uint32_t version, uint32_t id)
206 }
207 wl_resource_set_implementation(resource, &drm_impl, NULL, NULL);
208
209- if (version >= 2)
210+ if (version >= 2) {
211 wl_drm_send_capabilities(resource, WL_DRM_CAPABILITY_PRIME);
212+ }
213
214 wl_drm_send_device(resource, drm.path);
215 wl_drm_send_format(resource, WL_DRM_FORMAT_XRGB8888);
216@@ -261,11 +279,13 @@ drm_initialize(void)
217 ERROR("Could not enable DRM universal planes\n");
218 goto error1;
219 }
220- if (drmGetCap(swc.drm->fd, DRM_CAP_CURSOR_WIDTH, &val) < 0)
221+ if (drmGetCap(swc.drm->fd, DRM_CAP_CURSOR_WIDTH, &val) < 0) {
222 val = 64;
223+ }
224 swc.drm->cursor_w = val;
225- if (drmGetCap(swc.drm->fd, DRM_CAP_CURSOR_HEIGHT, &val) < 0)
226+ if (drmGetCap(swc.drm->fd, DRM_CAP_CURSOR_HEIGHT, &val) < 0) {
227 val = 64;
228+ }
229 swc.drm->cursor_h = val;
230
231 drm.path = drmGetRenderDeviceNameFromFd(swc.drm->fd);
232@@ -284,7 +304,8 @@ drm_initialize(void)
233 goto error2;
234 }
235
236- drm.event_source = wl_event_loop_add_fd(swc.event_loop, swc.drm->fd, WL_EVENT_READABLE, &handle_data, NULL);
237+ drm.event_source = wl_event_loop_add_fd(
238+ swc.event_loop, swc.drm->fd, WL_EVENT_READABLE, &handle_data, NULL);
239
240 if (!drm.event_source) {
241 ERROR("Could not create DRM event source\n");
242@@ -292,7 +313,8 @@ drm_initialize(void)
243 }
244
245 if (!wld_drm_is_dumb(swc.drm->context)) {
246- drm.global = wl_global_create(swc.display, &wl_drm_interface, 2, NULL, &bind_drm);
247+ drm.global = wl_global_create(swc.display, &wl_drm_interface, 2, NULL,
248+ &bind_drm);
249 if (!drm.global) {
250 ERROR("Could not create wl_drm global\n");
251 goto error4;
252@@ -321,8 +343,9 @@ error0:
253 void
254 drm_finalize(void)
255 {
256- if (drm.global)
257+ if (drm.global) {
258 wl_global_destroy(drm.global);
259+ }
260 wl_event_source_remove(drm.event_source);
261 wld_destroy_renderer(swc.drm->renderer);
262 wld_destroy_context(swc.drm->context);
263@@ -349,8 +372,9 @@ drm_create_screens(struct wl_list *screens)
264 wl_list_init(&planes);
265 for (i = 0; i < plane_ids->count_planes; ++i) {
266 plane = plane_new(plane_ids->planes[i]);
267- if (plane)
268+ if (plane) {
269 wl_list_insert(&planes, &plane->link);
270+ }
271 }
272 drmModeFreePlaneResources(plane_ids);
273
274@@ -359,33 +383,40 @@ drm_create_screens(struct wl_list *screens)
275 ERROR("Could not get DRM resources\n");
276 return false;
277 }
278- for (i = 0; i < resources->count_connectors; ++i, drmModeFreeConnector(connector)) {
279+ for (i = 0; i < resources->count_connectors;
280+ ++i, drmModeFreeConnector(connector)) {
281 connector = drmModeGetConnector(swc.drm->fd, resources->connectors[i]);
282
283 if (connector->connection == DRM_MODE_CONNECTED) {
284 int crtc_index;
285
286- if (!find_available_crtc(resources, connector, taken_crtcs, &crtc_index)) {
287+ if (!find_available_crtc(resources, connector, taken_crtcs,
288+ &crtc_index)) {
289 WARNING("Could not find CRTC for connector %d\n", i);
290 continue;
291 }
292
293 cursor_plane = NULL;
294- wl_list_for_each (plane, &planes, link) {
295- if (plane->type == DRM_PLANE_TYPE_CURSOR && plane->possible_crtcs & 1 << crtc_index) {
296+ wl_list_for_each(plane, &planes, link)
297+ {
298+ if (plane->type == DRM_PLANE_TYPE_CURSOR &&
299+ plane->possible_crtcs & 1 << crtc_index) {
300 wl_list_remove(&plane->link);
301 cursor_plane = plane;
302 break;
303 }
304 }
305 if (!cursor_plane) {
306- WARNING("Could not find cursor plane for CRTC %d\n", crtc_index);
307+ WARNING("Could not find cursor plane for CRTC %d\n",
308+ crtc_index);
309 }
310
311- if (!(output = output_new(connector)))
312+ if (!(output = output_new(connector))) {
313 continue;
314+ }
315
316- output->screen = screen_new(resources->crtcs[crtc_index], output, cursor_plane);
317+ output->screen =
318+ screen_new(resources->crtcs[crtc_index], output, cursor_plane);
319 output->screen->id = crtc_index;
320 taken_crtcs |= 1 << crtc_index;
321
322@@ -397,9 +428,7 @@ drm_create_screens(struct wl_list *screens)
323 return true;
324 }
325
326-enum {
327- WLD_USER_OBJECT_FRAMEBUFFER = WLD_USER_ID
328-};
329+enum { WLD_USER_OBJECT_FRAMEBUFFER = WLD_USER_ID };
330
331 struct framebuffer {
332 struct wld_exporter exporter;
333@@ -408,9 +437,11 @@ struct framebuffer {
334 };
335
336 static bool
337-framebuffer_export(struct wld_exporter *exporter, struct wld_buffer *buffer, uint32_t type, union wld_object *object)
338+framebuffer_export(struct wld_exporter *exporter, struct wld_buffer *buffer,
339+ uint32_t type, union wld_object *object)
340 {
341- struct framebuffer *framebuffer = wl_container_of(exporter, framebuffer, exporter);
342+ struct framebuffer *framebuffer =
343+ wl_container_of(exporter, framebuffer, exporter);
344
345 switch (type) {
346 case WLD_USER_OBJECT_FRAMEBUFFER:
347@@ -426,7 +457,8 @@ framebuffer_export(struct wld_exporter *exporter, struct wld_buffer *buffer, uin
348 static void
349 framebuffer_destroy(struct wld_destructor *destructor)
350 {
351- struct framebuffer *framebuffer = wl_container_of(destructor, framebuffer, destructor);
352+ struct framebuffer *framebuffer =
353+ wl_container_of(destructor, framebuffer, destructor);
354
355 drmModeRmFB(swc.drm->fd, framebuffer->id);
356 free(framebuffer);
357@@ -439,22 +471,26 @@ drm_get_framebuffer(struct wld_buffer *buffer)
358 union wld_object object;
359 int ret;
360
361- if (!buffer)
362+ if (!buffer) {
363 return 0;
364+ }
365
366- if (wld_export(buffer, WLD_USER_OBJECT_FRAMEBUFFER, &object))
367+ if (wld_export(buffer, WLD_USER_OBJECT_FRAMEBUFFER, &object)) {
368 return object.u32;
369+ }
370
371 if (!wld_export(buffer, WLD_DRM_OBJECT_HANDLE, &object)) {
372 ERROR("Could not get buffer handle\n");
373 return 0;
374 }
375
376- if (!(framebuffer = malloc(sizeof(*framebuffer))))
377+ if (!(framebuffer = malloc(sizeof(*framebuffer)))) {
378 return 0;
379+ }
380
381- ret = drmModeAddFB2(swc.drm->fd, buffer->width, buffer->height, buffer->format,
382- (uint32_t[4]){object.u32}, (uint32_t[4]){buffer->pitch}, (uint32_t[4]){0},
383+ ret = drmModeAddFB2(swc.drm->fd, buffer->width, buffer->height,
384+ buffer->format, (uint32_t[4]){object.u32},
385+ (uint32_t[4]){buffer->pitch}, (uint32_t[4]){0},
386 &framebuffer->id, 0);
387 if (ret < 0) {
388 free(framebuffer);
+8,
-4
1@@ -18,10 +18,14 @@ struct swc_drm {
2 struct wld_renderer *renderer;
3 };
4
5-bool drm_initialize(void);
6-void drm_finalize(void);
7+bool
8+drm_initialize(void);
9+void
10+drm_finalize(void);
11
12-bool drm_create_screens(struct wl_list *screens);
13-uint32_t drm_get_framebuffer(struct wld_buffer *buffer);
14+bool
15+drm_create_screens(struct wl_list *screens);
16+uint32_t
17+drm_get_framebuffer(struct wld_buffer *buffer);
18
19 #endif
+1,
-1
1@@ -29,7 +29,7 @@ struct event {
2 static inline void
3 send_event(struct wl_signal *signal, uint32_t type, void *event_data)
4 {
5- struct event event = { .type = type, .data = event_data };
6+ struct event event = {.type = type, .data = event_data};
7 wl_signal_emit(signal, &event);
8 }
9
+26,
-13
1@@ -35,26 +35,32 @@ focus(struct input_focus *input_focus, struct compositor_view *view)
2
3 if (view) {
4 client = wl_resource_get_client(view->surface->resource);
5- wl_resource_for_each_safe (resource, tmp, &input_focus->inactive) {
6+ wl_resource_for_each_safe(resource, tmp, &input_focus->inactive)
7+ {
8 if (wl_resource_get_client(resource) == client) {
9 wl_list_remove(wl_resource_get_link(resource));
10- wl_list_insert(&input_focus->active, wl_resource_get_link(resource));
11+ wl_list_insert(&input_focus->active,
12+ wl_resource_get_link(resource));
13 }
14 }
15- wl_signal_add(&view->destroy_signal, &input_focus->view_destroy_listener);
16+ wl_signal_add(&view->destroy_signal,
17+ &input_focus->view_destroy_listener);
18 }
19
20 input_focus->client = client;
21 input_focus->view = view;
22- input_focus->handler->enter(input_focus->handler, &input_focus->active, view);
23+ input_focus->handler->enter(input_focus->handler, &input_focus->active,
24+ view);
25 }
26
27 static void
28 unfocus(struct input_focus *input_focus)
29 {
30- if (input_focus->view)
31+ if (input_focus->view) {
32 wl_list_remove(&input_focus->view_destroy_listener.link);
33- input_focus->handler->leave(input_focus->handler, &input_focus->active, input_focus->view);
34+ }
35+ input_focus->handler->leave(input_focus->handler, &input_focus->active,
36+ input_focus->view);
37 wl_list_insert_list(&input_focus->inactive, &input_focus->active);
38 wl_list_init(&input_focus->active);
39 }
40@@ -62,7 +68,8 @@ unfocus(struct input_focus *input_focus)
41 static void
42 handle_focus_view_destroy(struct wl_listener *listener, void *data)
43 {
44- struct input_focus *input_focus = wl_container_of(listener, input_focus, view_destroy_listener);
45+ struct input_focus *input_focus =
46+ wl_container_of(listener, input_focus, view_destroy_listener);
47
48 /* XXX: Should this call unfocus? */
49 wl_list_insert_list(&input_focus->inactive, &input_focus->active);
50@@ -72,7 +79,8 @@ handle_focus_view_destroy(struct wl_listener *listener, void *data)
51 }
52
53 bool
54-input_focus_initialize(struct input_focus *input_focus, struct input_focus_handler *handler)
55+input_focus_initialize(struct input_focus *input_focus,
56+ struct input_focus_handler *handler)
57 {
58 input_focus->client = NULL;
59 input_focus->view = NULL;
60@@ -93,16 +101,19 @@ input_focus_finalize(struct input_focus *input_focus)
61 }
62
63 void
64-input_focus_add_resource(struct input_focus *input_focus, struct wl_resource *resource)
65+input_focus_add_resource(struct input_focus *input_focus,
66+ struct wl_resource *resource)
67 {
68 struct wl_list resources, *target = &input_focus->inactive;
69
70 wl_list_init(&resources);
71 wl_list_insert(&resources, wl_resource_get_link(resource));
72
73- /* If this new input resource corresponds to the focused client, send an enter event. */
74+ /* If this new input resource corresponds to the focused client, send an
75+ * enter event. */
76 if (wl_resource_get_client(resource) == input_focus->client) {
77- input_focus->handler->enter(input_focus->handler, &resources, input_focus->view);
78+ input_focus->handler->enter(input_focus->handler, &resources,
79+ input_focus->view);
80 target = &input_focus->active;
81 }
82
83@@ -110,7 +121,8 @@ input_focus_add_resource(struct input_focus *input_focus, struct wl_resource *re
84 }
85
86 void
87-input_focus_remove_resource(struct input_focus *input_focus, struct wl_resource *resource)
88+input_focus_remove_resource(struct input_focus *input_focus,
89+ struct wl_resource *resource)
90 {
91 wl_list_remove(wl_resource_get_link(resource));
92 }
93@@ -120,8 +132,9 @@ input_focus_set(struct input_focus *input_focus, struct compositor_view *view)
94 {
95 struct input_focus_event_data data;
96
97- if (view == input_focus->view)
98+ if (view == input_focus->view) {
99 return;
100+ }
101
102 data.old = input_focus->view;
103 data.new = view;
+18,
-10
1@@ -29,17 +29,17 @@
2
3 /* Focus {{{ */
4
5-enum {
6- INPUT_FOCUS_EVENT_CHANGED
7-};
8+enum { INPUT_FOCUS_EVENT_CHANGED };
9
10 struct input_focus_event_data {
11 struct compositor_view *old, *new;
12 };
13
14 struct input_focus_handler {
15- void (*enter)(struct input_focus_handler *handler, struct wl_list *resources, struct compositor_view *view);
16- void (*leave)(struct input_focus_handler *handler, struct wl_list *resources, struct compositor_view *view);
17+ void (*enter)(struct input_focus_handler *handler,
18+ struct wl_list *resources, struct compositor_view *view);
19+ void (*leave)(struct input_focus_handler *handler,
20+ struct wl_list *resources, struct compositor_view *view);
21 };
22
23 struct input_focus {
24@@ -53,11 +53,19 @@ struct input_focus {
25 struct wl_signal event_signal;
26 };
27
28-bool input_focus_initialize(struct input_focus *input_focus, struct input_focus_handler *input_handler);
29-void input_focus_finalize(struct input_focus *input_focus);
30-void input_focus_add_resource(struct input_focus *input_focus, struct wl_resource *resource);
31-void input_focus_remove_resource(struct input_focus *input_focus, struct wl_resource *resource);
32-void input_focus_set(struct input_focus *input_focus, struct compositor_view *view);
33+bool
34+input_focus_initialize(struct input_focus *input_focus,
35+ struct input_focus_handler *input_handler);
36+void
37+input_focus_finalize(struct input_focus *input_focus);
38+void
39+input_focus_add_resource(struct input_focus *input_focus,
40+ struct wl_resource *resource);
41+void
42+input_focus_remove_resource(struct input_focus *input_focus,
43+ struct wl_resource *resource);
44+void
45+input_focus_set(struct input_focus *input_focus, struct compositor_view *view);
46
47 /* }}} */
48
+5,
-3
1@@ -24,8 +24,8 @@
2 #ifndef SWC_INTERNAL_H
3 #define SWC_INTERNAL_H
4
5-#include <wayland-server.h>
6 #include <stdbool.h>
7+#include <wayland-server.h>
8
9 enum {
10 SWC_EVENT_ACTIVATED,
11@@ -62,7 +62,9 @@ struct swc {
12
13 extern struct swc swc;
14
15-void swc_activate(void);
16-void swc_deactivate(void);
17+void
18+swc_activate(void);
19+void
20+swc_deactivate(void);
21
22 #endif
+26,
-14
1@@ -24,11 +24,12 @@
2 #include "kde_decoration.h"
3 #include "util.h"
4
5-#include <wayland-server.h>
6 #include "server-decoration-server-protocol.h"
7+#include <wayland-server.h>
8
9 static void
10-request_mode(struct wl_client *client, struct wl_resource *resource, uint32_t mode)
11+request_mode(struct wl_client *client, struct wl_resource *resource,
12+ uint32_t mode)
13 {
14 /* Server is required to send back the mode requested by
15 * the client, we just don't plan to do anything with it. */
16@@ -36,44 +37,55 @@ request_mode(struct wl_client *client, struct wl_resource *resource, uint32_t mo
17 }
18
19 static const struct org_kde_kwin_server_decoration_interface decoration_impl = {
20- .release = destroy_resource,
21- .request_mode = request_mode,
22+ .release = destroy_resource,
23+ .request_mode = request_mode,
24 };
25
26 static void
27-create(struct wl_client *client, struct wl_resource *resource, uint32_t id, struct wl_resource *toplevel_resource)
28+create(struct wl_client *client, struct wl_resource *resource, uint32_t id,
29+ struct wl_resource *toplevel_resource)
30 {
31 struct wl_resource *decoration;
32
33- decoration = wl_resource_create(client, &org_kde_kwin_server_decoration_interface, wl_resource_get_version(resource), id);
34+ decoration =
35+ wl_resource_create(client, &org_kde_kwin_server_decoration_interface,
36+ wl_resource_get_version(resource), id);
37 if (!decoration) {
38 wl_resource_post_no_memory(resource);
39 return;
40 }
41 wl_resource_set_implementation(decoration, &decoration_impl, NULL, NULL);
42- org_kde_kwin_server_decoration_send_mode(decoration, ORG_KDE_KWIN_SERVER_DECORATION_MANAGER_MODE_SERVER);
43+ org_kde_kwin_server_decoration_send_mode(
44+ decoration, ORG_KDE_KWIN_SERVER_DECORATION_MANAGER_MODE_SERVER);
45 }
46
47-static const struct org_kde_kwin_server_decoration_manager_interface decoration_manager_impl = {
48- .create = create,
49+static const struct org_kde_kwin_server_decoration_manager_interface
50+ decoration_manager_impl = {
51+ .create = create,
52 };
53
54 static void
55-bind_decoration_manager(struct wl_client *client, void *data, uint32_t version, uint32_t id)
56+bind_decoration_manager(struct wl_client *client, void *data, uint32_t version,
57+ uint32_t id)
58 {
59 struct wl_resource *resource;
60
61- resource = wl_resource_create(client, &org_kde_kwin_server_decoration_manager_interface, version, id);
62+ resource = wl_resource_create(
63+ client, &org_kde_kwin_server_decoration_manager_interface, version, id);
64 if (!resource) {
65 wl_client_post_no_memory(client);
66 return;
67 }
68- wl_resource_set_implementation(resource, &decoration_manager_impl, NULL, NULL);
69- org_kde_kwin_server_decoration_manager_send_default_mode(resource, ORG_KDE_KWIN_SERVER_DECORATION_MANAGER_MODE_SERVER);
70+ wl_resource_set_implementation(resource, &decoration_manager_impl, NULL,
71+ NULL);
72+ org_kde_kwin_server_decoration_manager_send_default_mode(
73+ resource, ORG_KDE_KWIN_SERVER_DECORATION_MANAGER_MODE_SERVER);
74 }
75
76 struct wl_global *
77 kde_decoration_manager_create(struct wl_display *display)
78 {
79- return wl_global_create(display, &org_kde_kwin_server_decoration_manager_interface, 1, NULL, &bind_decoration_manager);
80+ return wl_global_create(display,
81+ &org_kde_kwin_server_decoration_manager_interface,
82+ 1, NULL, &bind_decoration_manager);
83 }
+2,
-1
1@@ -26,6 +26,7 @@
2
3 struct wl_display;
4
5-struct wl_global *kde_decoration_manager_create(struct wl_display *display);
6+struct wl_global *
7+kde_decoration_manager_create(struct wl_display *display);
8
9 #endif
+141,
-67
1@@ -25,11 +25,11 @@
2 * SOFTWARE.
3 */
4
5-#include "swc.h"
6+#include "keyboard.h"
7 #include "compositor.h"
8 #include "internal.h"
9-#include "keyboard.h"
10 #include "surface.h"
11+#include "swc.h"
12 #include "util.h"
13
14 #include <assert.h>
15@@ -44,44 +44,61 @@
16 static const int repeat_delay = 500, repeat_rate = 40;
17
18 static void
19-enter(struct input_focus_handler *handler, struct wl_list *resources, struct compositor_view *view)
20+enter(struct input_focus_handler *handler,
21+ struct wl_list *resources,
22+ struct compositor_view *view)
23 {
24- struct keyboard *keyboard = wl_container_of(handler, keyboard, focus_handler);
25+ struct keyboard *keyboard =
26+ wl_container_of(handler, keyboard, focus_handler);
27 struct keyboard_modifier_state *state = &keyboard->modifier_state;
28 struct wl_resource *resource;
29 uint32_t serial;
30
31 serial = wl_display_next_serial(swc.display);
32- wl_resource_for_each (resource, resources) {
33- wl_keyboard_send_modifiers(resource, serial, state->depressed, state->locked, state->latched, state->group);
34- wl_keyboard_send_enter(resource, serial, view->surface->resource, &keyboard->client_keys);
35+ wl_resource_for_each(resource, resources)
36+ {
37+ wl_keyboard_send_modifiers(resource,
38+ serial,
39+ state->depressed,
40+ state->locked,
41+ state->latched,
42+ state->group);
43+ wl_keyboard_send_enter(
44+ resource, serial, view->surface->resource, &keyboard->client_keys);
45 }
46 }
47
48 static void
49-leave(struct input_focus_handler *handler, struct wl_list *resources, struct compositor_view *view)
50+leave(struct input_focus_handler *handler,
51+ struct wl_list *resources,
52+ struct compositor_view *view)
53 {
54 struct wl_resource *resource;
55 uint32_t serial;
56
57 serial = wl_display_next_serial(swc.display);
58- wl_resource_for_each (resource, resources)
59- wl_keyboard_send_leave(resource, serial, view->surface->resource);
60+ wl_resource_for_each(resource, resources)
61+ wl_keyboard_send_leave(resource, serial, view->surface->resource);
62 }
63
64 static bool
65-client_handle_key(struct keyboard *keyboard, uint32_t time, struct key *key, uint32_t state)
66+client_handle_key(struct keyboard *keyboard,
67+ uint32_t time,
68+ struct key *key,
69+ uint32_t state)
70 {
71 uint32_t *value;
72 struct wl_resource *resource;
73
74 if (state == WL_KEYBOARD_KEY_STATE_PRESSED) {
75- if (!(value = wl_array_add(&keyboard->client_keys, sizeof(*value))))
76+ if (!(value = wl_array_add(&keyboard->client_keys, sizeof(*value)))) {
77 return false;
78+ }
79
80 *value = key->press.value;
81 } else {
82- wl_array_for_each (value, &keyboard->client_keys) {
83+ wl_array_for_each(value, &keyboard->client_keys)
84+ {
85 if (*value == key->press.value) {
86 array_remove(&keyboard->client_keys, value, sizeof(*value));
87 break;
88@@ -89,23 +106,31 @@ client_handle_key(struct keyboard *keyboard, uint32_t time, struct key *key, uin
89 }
90 }
91
92- wl_resource_for_each (resource, &keyboard->focus.active)
93- wl_keyboard_send_key(resource, key->press.serial, time, key->press.value, state);
94+ wl_resource_for_each(resource, &keyboard->focus.active)
95+ wl_keyboard_send_key(
96+ resource, key->press.serial, time, key->press.value, state);
97 return true;
98 }
99
100 static bool
101-client_handle_modifiers(struct keyboard *keyboard, const struct keyboard_modifier_state *state)
102+client_handle_modifiers(struct keyboard *keyboard,
103+ const struct keyboard_modifier_state *state)
104 {
105 struct wl_resource *resource;
106 uint32_t serial;
107
108- if (wl_list_empty(&keyboard->focus.active))
109+ if (wl_list_empty(&keyboard->focus.active)) {
110 return false;
111+ }
112
113 serial = wl_display_next_serial(swc.display);
114- wl_resource_for_each (resource, &keyboard->focus.active)
115- wl_keyboard_send_modifiers(resource, serial, state->depressed, state->locked, state->latched, state->group);
116+ wl_resource_for_each(resource, &keyboard->focus.active)
117+ wl_keyboard_send_modifiers(resource,
118+ serial,
119+ state->depressed,
120+ state->locked,
121+ state->latched,
122+ state->group);
123 return true;
124 }
125
126@@ -117,24 +142,33 @@ update_keymap(struct xkb *xkb)
127 char *keymap_string;
128 int ret;
129
130- if (!(keymap_directory = getenv("XDG_RUNTIME_DIR")))
131+ if (!(keymap_directory = getenv("XDG_RUNTIME_DIR"))) {
132 keymap_directory = "/tmp";
133+ }
134
135- xkb->indices.ctrl = xkb_keymap_mod_get_index(xkb->keymap.map, XKB_MOD_NAME_CTRL);
136- xkb->indices.alt = xkb_keymap_mod_get_index(xkb->keymap.map, XKB_MOD_NAME_ALT);
137- xkb->indices.super = xkb_keymap_mod_get_index(xkb->keymap.map, XKB_MOD_NAME_LOGO);
138- xkb->indices.shift = xkb_keymap_mod_get_index(xkb->keymap.map, XKB_MOD_NAME_SHIFT);
139+ xkb->indices.ctrl =
140+ xkb_keymap_mod_get_index(xkb->keymap.map, XKB_MOD_NAME_CTRL);
141+ xkb->indices.alt =
142+ xkb_keymap_mod_get_index(xkb->keymap.map, XKB_MOD_NAME_ALT);
143+ xkb->indices.super =
144+ xkb_keymap_mod_get_index(xkb->keymap.map, XKB_MOD_NAME_LOGO);
145+ xkb->indices.shift =
146+ xkb_keymap_mod_get_index(xkb->keymap.map, XKB_MOD_NAME_SHIFT);
147
148 /* In order to send the keymap to clients, we must first convert it to a
149 * string and then mmap it to a file. */
150- keymap_string = xkb_keymap_get_as_string(xkb->keymap.map, XKB_KEYMAP_FORMAT_TEXT_V1);
151+ keymap_string =
152+ xkb_keymap_get_as_string(xkb->keymap.map, XKB_KEYMAP_FORMAT_TEXT_V1);
153
154 if (!keymap_string) {
155 WARNING("Could not get XKB keymap as a string\n");
156 goto error0;
157 }
158
159- ret = snprintf(keymap_path, sizeof(keymap_path), "%s/swc-xkb-keymap-XXXXXX", keymap_directory);
160+ ret = snprintf(keymap_path,
161+ sizeof(keymap_path),
162+ "%s/swc-xkb-keymap-XXXXXX",
163+ keymap_directory);
164 if (ret < 0 || (size_t)ret >= sizeof(keymap_path)) {
165 WARNING("Could not determine XKB keymap path\n");
166 goto error1;
167@@ -165,7 +199,12 @@ update_keymap(struct xkb *xkb)
168 }
169 #endif
170
171- xkb->keymap.area = mmap(NULL, xkb->keymap.size, PROT_READ | PROT_WRITE, MAP_SHARED, xkb->keymap.fd, 0);
172+ xkb->keymap.area = mmap(NULL,
173+ xkb->keymap.size,
174+ PROT_READ | PROT_WRITE,
175+ MAP_SHARED,
176+ xkb->keymap.fd,
177+ 0);
178
179 if (xkb->keymap.area == MAP_FAILED) {
180 WARNING("Could not mmap XKB keymap string\n");
181@@ -192,8 +231,9 @@ keyboard_create(struct xkb_rule_names *names)
182 struct xkb *xkb;
183
184 keyboard = malloc(sizeof(*keyboard));
185- if (!keyboard)
186+ if (!keyboard) {
187 goto error0;
188+ }
189
190 xkb = &keyboard->xkb;
191 if (!(xkb->context = xkb_context_new(0))) {
192@@ -201,7 +241,8 @@ keyboard_create(struct xkb_rule_names *names)
193 goto error1;
194 }
195
196- if (!(xkb->keymap.map = xkb_keymap_new_from_names(xkb->context, names, 0))) {
197+ if (!(xkb->keymap.map =
198+ xkb_keymap_new_from_names(xkb->context, names, 0))) {
199 ERROR("Could not create XKB keymap\n");
200 goto error2;
201 }
202@@ -216,8 +257,9 @@ keyboard_create(struct xkb_rule_names *names)
203 goto error4;
204 }
205
206- if (!input_focus_initialize(&keyboard->focus, &keyboard->focus_handler))
207+ if (!input_focus_initialize(&keyboard->focus, &keyboard->focus_handler)) {
208 goto error4;
209+ }
210
211 keyboard->modifier_state = (struct keyboard_modifier_state){0};
212 keyboard->modifiers = 0;
213@@ -266,13 +308,15 @@ keyboard_reset(struct keyboard *keyboard)
214 struct xkb_state *state;
215
216 /* Send simulated key release events for all current key handlers. */
217- wl_array_for_each (key, &keyboard->keys) {
218+ wl_array_for_each(key, &keyboard->keys)
219+ {
220 if (key->handler) {
221 key->press.serial = wl_display_next_serial(swc.display);
222- key->handler->key(keyboard, time, key, WL_KEYBOARD_KEY_STATE_RELEASED);
223- /* Don't bother updating the XKB state because we will be resetting it
224- * later on and it is unlikely that a key handler cares about the keyboard
225- * state for release events. */
226+ key->handler->key(
227+ keyboard, time, key, WL_KEYBOARD_KEY_STATE_RELEASED);
228+ /* Don't bother updating the XKB state because we will be resetting
229+ * it later on and it is unlikely that a key handler cares about the
230+ * keyboard state for release events. */
231 }
232 }
233
234@@ -304,7 +348,7 @@ keyboard_set_focus(struct keyboard *keyboard, struct compositor_view *view)
235 }
236
237 static const struct wl_keyboard_interface keyboard_impl = {
238- .release = destroy_resource,
239+ .release = destroy_resource,
240 };
241
242 static void
243@@ -315,28 +359,42 @@ unbind(struct wl_resource *resource)
244 }
245
246 struct wl_resource *
247-keyboard_bind(struct keyboard *keyboard, struct wl_client *client, uint32_t version, uint32_t id)
248+keyboard_bind(struct keyboard *keyboard,
249+ struct wl_client *client,
250+ uint32_t version,
251+ uint32_t id)
252 {
253 struct wl_resource *client_resource;
254
255- client_resource = wl_resource_create(client, &wl_keyboard_interface, version, id);
256- if (!client_resource)
257+ client_resource =
258+ wl_resource_create(client, &wl_keyboard_interface, version, id);
259+ if (!client_resource) {
260 return NULL;
261- wl_resource_set_implementation(client_resource, &keyboard_impl, keyboard, &unbind);
262+ }
263+ wl_resource_set_implementation(
264+ client_resource, &keyboard_impl, keyboard, &unbind);
265
266 /* Subtract one to remove terminating NULL character. */
267- wl_keyboard_send_keymap(client_resource, WL_KEYBOARD_KEYMAP_FORMAT_XKB_V1, keyboard->xkb.keymap.fd, keyboard->xkb.keymap.size - 1);
268+ wl_keyboard_send_keymap(client_resource,
269+ WL_KEYBOARD_KEYMAP_FORMAT_XKB_V1,
270+ keyboard->xkb.keymap.fd,
271+ keyboard->xkb.keymap.size - 1);
272
273 input_focus_add_resource(&keyboard->focus, client_resource);
274
275- if (version >= 4)
276- wl_keyboard_send_repeat_info(client_resource, repeat_rate, repeat_delay);
277+ if (version >= 4) {
278+ wl_keyboard_send_repeat_info(
279+ client_resource, repeat_rate, repeat_delay);
280+ }
281
282 return client_resource;
283 }
284
285 void
286-keyboard_handle_key(struct keyboard *keyboard, uint32_t time, uint32_t value, uint32_t state)
287+keyboard_handle_key(struct keyboard *keyboard,
288+ uint32_t time,
289+ uint32_t value,
290+ uint32_t state)
291 {
292 struct key *key;
293 struct keyboard_modifier_state modifier_state;
294@@ -348,11 +406,13 @@ keyboard_handle_key(struct keyboard *keyboard, uint32_t time, uint32_t value, ui
295 serial = wl_display_next_serial(swc.display);
296
297 /* First handle key release events associated with a particular handler. */
298- wl_array_for_each (key, &keyboard->keys) {
299+ wl_array_for_each(key, &keyboard->keys)
300+ {
301 if (key->press.value == value) {
302 /* Ignore repeat events. */
303- if (state == WL_KEYBOARD_KEY_STATE_PRESSED)
304+ if (state == WL_KEYBOARD_KEY_STATE_PRESSED) {
305 return;
306+ }
307
308 if (key->handler) {
309 key->press.serial = serial;
310@@ -365,18 +425,21 @@ keyboard_handle_key(struct keyboard *keyboard, uint32_t time, uint32_t value, ui
311 }
312
313 /* If we get a unpaired release event, just ignore it. */
314- if (state == WL_KEYBOARD_KEY_STATE_RELEASED)
315+ if (state == WL_KEYBOARD_KEY_STATE_RELEASED) {
316 return;
317+ }
318
319- if (!(key = wl_array_add(&keyboard->keys, sizeof(*key))))
320+ if (!(key = wl_array_add(&keyboard->keys, sizeof(*key)))) {
321 goto update_xkb_state;
322+ }
323
324 key->press.value = value;
325 key->press.serial = serial;
326 key->handler = NULL;
327
328 /* Go through handlers to see if any will accept this key event. */
329- wl_list_for_each (handler, &keyboard->handlers, link) {
330+ wl_list_for_each(handler, &keyboard->handlers, link)
331+ {
332 if (handler->key && handler->key(keyboard, time, key, state)) {
333 key->handler = handler;
334 break;
335@@ -385,37 +448,48 @@ keyboard_handle_key(struct keyboard *keyboard, uint32_t time, uint32_t value, ui
336
337 /* Update XKB state. */
338 update_xkb_state:
339- direction = state == WL_KEYBOARD_KEY_STATE_PRESSED ? XKB_KEY_DOWN : XKB_KEY_UP;
340+ direction =
341+ state == WL_KEYBOARD_KEY_STATE_PRESSED ? XKB_KEY_DOWN : XKB_KEY_UP;
342 xkb_state_update_key(xkb->state, XKB_KEY(value), direction);
343
344- modifier_state.depressed = xkb_state_serialize_mods(xkb->state, XKB_STATE_DEPRESSED);
345- modifier_state.latched = xkb_state_serialize_mods(xkb->state, XKB_STATE_LATCHED);
346- modifier_state.locked = xkb_state_serialize_mods(xkb->state, XKB_STATE_LOCKED);
347- modifier_state.group = xkb_state_serialize_layout(xkb->state, XKB_STATE_LAYOUT_EFFECTIVE);
348-
349- if (modifier_state.depressed != keyboard->modifier_state.depressed
350- || modifier_state.latched != keyboard->modifier_state.latched
351- || modifier_state.locked != keyboard->modifier_state.locked
352- || modifier_state.group != keyboard->modifier_state.group)
353- {
354- uint32_t mods_active = modifier_state.depressed | modifier_state.latched;
355+ modifier_state.depressed =
356+ xkb_state_serialize_mods(xkb->state, XKB_STATE_DEPRESSED);
357+ modifier_state.latched =
358+ xkb_state_serialize_mods(xkb->state, XKB_STATE_LATCHED);
359+ modifier_state.locked =
360+ xkb_state_serialize_mods(xkb->state, XKB_STATE_LOCKED);
361+ modifier_state.group =
362+ xkb_state_serialize_layout(xkb->state, XKB_STATE_LAYOUT_EFFECTIVE);
363+
364+ if (modifier_state.depressed != keyboard->modifier_state.depressed ||
365+ modifier_state.latched != keyboard->modifier_state.latched ||
366+ modifier_state.locked != keyboard->modifier_state.locked ||
367+ modifier_state.group != keyboard->modifier_state.group) {
368+ uint32_t mods_active =
369+ modifier_state.depressed | modifier_state.latched;
370
371 /* Update keyboard modifier state. */
372 keyboard->modifier_state = modifier_state;
373 keyboard->modifiers = 0;
374- if (mods_active & (1 << keyboard->xkb.indices.ctrl))
375+ if (mods_active & (1 << keyboard->xkb.indices.ctrl)) {
376 keyboard->modifiers |= SWC_MOD_CTRL;
377- if (mods_active & (1 << keyboard->xkb.indices.alt))
378+ }
379+ if (mods_active & (1 << keyboard->xkb.indices.alt)) {
380 keyboard->modifiers |= SWC_MOD_ALT;
381- if (mods_active & (1 << keyboard->xkb.indices.super))
382+ }
383+ if (mods_active & (1 << keyboard->xkb.indices.super)) {
384 keyboard->modifiers |= SWC_MOD_LOGO;
385- if (mods_active & (1 << keyboard->xkb.indices.shift))
386+ }
387+ if (mods_active & (1 << keyboard->xkb.indices.shift)) {
388 keyboard->modifiers |= SWC_MOD_SHIFT;
389+ }
390
391 /* Run any modifier handlers. */
392- wl_list_for_each (handler, &keyboard->handlers, link) {
393- if (handler->modifiers)
394+ wl_list_for_each(handler, &keyboard->handlers, link)
395+ {
396+ if (handler->modifiers) {
397 handler->modifiers(keyboard, &modifier_state);
398+ }
399 }
400 }
401 }
+19,
-9
1@@ -26,8 +26,8 @@
2
3 #include "input.h"
4
5-#include <xkbcommon/xkbcommon.h>
6 #include <wayland-util.h>
7+#include <xkbcommon/xkbcommon.h>
8
9 /* Keycodes are offset by 8 in XKB. */
10 #define XKB_KEY(key) ((key) + 8)
11@@ -48,8 +48,10 @@ struct keyboard_modifier_state {
12 };
13
14 struct keyboard_handler {
15- bool (*key)(struct keyboard *keyboard, uint32_t time, struct key *key, uint32_t state);
16- bool (*modifiers)(struct keyboard *keyboard, const struct keyboard_modifier_state *state);
17+ bool (*key)(struct keyboard *keyboard, uint32_t time, struct key *key,
18+ uint32_t state);
19+ bool (*modifiers)(struct keyboard *keyboard,
20+ const struct keyboard_modifier_state *state);
21
22 struct wl_list link;
23 };
24@@ -84,11 +86,19 @@ struct keyboard {
25 uint32_t modifiers;
26 };
27
28-struct keyboard *keyboard_create(struct xkb_rule_names *names);
29-void keyboard_destroy(struct keyboard *keyboard);
30-bool keyboard_reset(struct keyboard *keyboard);
31-void keyboard_set_focus(struct keyboard *keyboard, struct compositor_view *view);
32-struct wl_resource *keyboard_bind(struct keyboard *keyboard, struct wl_client *client, uint32_t version, uint32_t id);
33-void keyboard_handle_key(struct keyboard *keyboard, uint32_t time, uint32_t key, uint32_t state);
34+struct keyboard *
35+keyboard_create(struct xkb_rule_names *names);
36+void
37+keyboard_destroy(struct keyboard *keyboard);
38+bool
39+keyboard_reset(struct keyboard *keyboard);
40+void
41+keyboard_set_focus(struct keyboard *keyboard, struct compositor_view *view);
42+struct wl_resource *
43+keyboard_bind(struct keyboard *keyboard, struct wl_client *client,
44+ uint32_t version, uint32_t id);
45+void
46+keyboard_handle_key(struct keyboard *keyboard, uint32_t time, uint32_t key,
47+ uint32_t state);
48
49 #endif
+28,
-16
1@@ -27,8 +27,8 @@
2 #include "launch/protocol.h"
3 #include "util.h"
4
5-#include <sys/uio.h>
6 #include <fcntl.h>
7+#include <sys/uio.h>
8 #include <unistd.h>
9 #include <wayland-server.h>
10
11@@ -60,11 +60,12 @@ handle_data(int fd, uint32_t mask, void *data)
12 {
13 struct swc_launch_event event;
14 struct iovec iov[1] = {
15- {.iov_base = &event, .iov_len = sizeof(event)},
16+ {.iov_base = &event, .iov_len = sizeof(event)},
17 };
18
19- if (receive_fd(fd, NULL, iov, 1) != -1)
20+ if (receive_fd(fd, NULL, iov, 1) != -1) {
21 handle_event(&event);
22+ }
23 return 1;
24 }
25
26@@ -73,20 +74,25 @@ launch_initialize(void)
27 {
28 char *socket_string, *end;
29
30- if (!(socket_string = getenv(SWC_LAUNCH_SOCKET_ENV)))
31+ if (!(socket_string = getenv(SWC_LAUNCH_SOCKET_ENV))) {
32 return false;
33+ }
34
35 launch.socket = strtol(socket_string, &end, 10);
36- if (*end != '\0')
37+ if (*end != '\0') {
38 return false;
39+ }
40
41 unsetenv(SWC_LAUNCH_SOCKET_ENV);
42- if (fcntl(launch.socket, F_SETFD, FD_CLOEXEC) < 0)
43+ if (fcntl(launch.socket, F_SETFD, FD_CLOEXEC) < 0) {
44 return false;
45+ }
46
47- launch.source = wl_event_loop_add_fd(swc.event_loop, launch.socket, WL_EVENT_READABLE, &handle_data, NULL);
48- if (!launch.source)
49+ launch.source = wl_event_loop_add_fd(swc.event_loop, launch.socket,
50+ WL_EVENT_READABLE, &handle_data, NULL);
51+ if (!launch.source) {
52 return false;
53+ }
54
55 return true;
56 }
57@@ -99,24 +105,28 @@ launch_finalize(void)
58 }
59
60 static bool
61-send_request(struct swc_launch_request *request, const void *data, size_t size, struct swc_launch_event *event, int out_fd, int *in_fd)
62+send_request(struct swc_launch_request *request, const void *data, size_t size,
63+ struct swc_launch_event *event, int out_fd, int *in_fd)
64 {
65 struct iovec request_iov[2] = {
66- {.iov_base = request, .iov_len = sizeof(*request)},
67- {.iov_base = (void *)data, .iov_len = size},
68+ {.iov_base = request, .iov_len = sizeof(*request)},
69+ {.iov_base = (void *)data, .iov_len = size},
70 };
71 struct iovec response_iov[1] = {
72- {.iov_base = event, .iov_len = sizeof(*event)},
73+ {.iov_base = event, .iov_len = sizeof(*event)},
74 };
75
76 request->serial = ++launch.next_serial;
77
78- if (send_fd(launch.socket, out_fd, request_iov, 1 + (size > 0)) == -1)
79+ if (send_fd(launch.socket, out_fd, request_iov, 1 + (size > 0)) == -1) {
80 return false;
81+ }
82
83 while (receive_fd(launch.socket, in_fd, response_iov, 1) != -1) {
84- if (event->type == SWC_LAUNCH_EVENT_RESPONSE && event->serial == request->serial)
85+ if (event->type == SWC_LAUNCH_EVENT_RESPONSE &&
86+ event->serial == request->serial) {
87 return true;
88+ }
89 handle_event(event);
90 }
91
92@@ -133,8 +143,9 @@ launch_open_device(const char *path, int flags)
93 request.type = SWC_LAUNCH_REQUEST_OPEN_DEVICE;
94 request.flags = flags;
95
96- if (!send_request(&request, path, strlen(path) + 1, &response, -1, &fd))
97+ if (!send_request(&request, path, strlen(path) + 1, &response, -1, &fd)) {
98 return -1;
99+ }
100
101 return fd;
102 }
103@@ -148,8 +159,9 @@ launch_activate_vt(unsigned vt)
104 request.type = SWC_LAUNCH_REQUEST_ACTIVATE_VT;
105 request.vt = vt;
106
107- if (!send_request(&request, NULL, 0, &response, -1, NULL))
108+ if (!send_request(&request, NULL, 0, &response, -1, NULL)) {
109 return false;
110+ }
111
112 return response.success;
113 }
+8,
-4
1@@ -26,9 +26,13 @@
2
3 #include <stdbool.h>
4
5-bool launch_initialize(void);
6-void launch_finalize(void);
7-int launch_open_device(const char *path, int flags);
8-bool launch_activate_vt(unsigned vt);
9+bool
10+launch_initialize(void);
11+void
12+launch_finalize(void);
13+int
14+launch_open_device(const char *path, int flags);
15+bool
16+launch_activate_vt(unsigned vt);
17
18 #endif
+2,
-3
1@@ -37,7 +37,6 @@ mode_initialize(struct mode *mode, drmModeModeInfo *mode_info)
2 bool
3 mode_equal(const struct mode *mode1, const struct mode *mode2)
4 {
5- return mode1->width == mode2->width
6- && mode1->height == mode2->height
7- && mode1->refresh == mode2->refresh;
8+ return mode1->width == mode2->width && mode1->height == mode2->height &&
9+ mode1->refresh == mode2->refresh;
10 }
+4,
-2
1@@ -38,7 +38,9 @@ struct mode {
2 drmModeModeInfo info;
3 };
4
5-bool mode_initialize(struct mode *mode, drmModeModeInfo *mode_info);
6-bool mode_equal(const struct mode *mode1, const struct mode *mode2);
7+bool
8+mode_initialize(struct mode *mode, drmModeModeInfo *mode_info);
9+bool
10+mode_equal(const struct mode *mode1, const struct mode *mode2);
11
12 #endif
+30,
-17
1@@ -5,14 +5,14 @@
2 #include "screen.h"
3 #include "util.h"
4
5+#include <drm.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9-#include <drm.h>
10 #include <xf86drm.h>
11
12 static const struct wl_output_interface output_impl = {
13- .release = destroy_resource,
14+ .release = destroy_resource,
15 };
16
17 static void
18@@ -31,25 +31,32 @@ bind_output(struct wl_client *client, void *data, uint32_t version, uint32_t id)
19 return;
20 }
21
22- wl_resource_set_implementation(resource, &output_impl, output, &remove_resource);
23+ wl_resource_set_implementation(resource, &output_impl, output,
24+ &remove_resource);
25 wl_list_insert(&output->resources, wl_resource_get_link(resource));
26
27- wl_output_send_geometry(resource, screen->base.geometry.x, screen->base.geometry.y,
28- output->physical_width, output->physical_height,
29- 0, "unknown", "unknown", WL_OUTPUT_TRANSFORM_NORMAL);
30+ wl_output_send_geometry(resource, screen->base.geometry.x,
31+ screen->base.geometry.y, output->physical_width,
32+ output->physical_height, 0, "unknown", "unknown",
33+ WL_OUTPUT_TRANSFORM_NORMAL);
34
35- wl_array_for_each (mode, &output->modes) {
36+ wl_array_for_each(mode, &output->modes)
37+ {
38 flags = 0;
39- if (mode->preferred)
40+ if (mode->preferred) {
41 flags |= WL_OUTPUT_MODE_PREFERRED;
42- if (mode_equal(&screen->planes.primary.mode, mode))
43+ }
44+ if (mode_equal(&screen->planes.primary.mode, mode)) {
45 flags |= WL_OUTPUT_MODE_CURRENT;
46+ }
47
48- wl_output_send_mode(resource, flags, mode->width, mode->height, mode->refresh);
49+ wl_output_send_mode(resource, flags, mode->width, mode->height,
50+ mode->refresh);
51 }
52
53- if (version >= 2)
54+ if (version >= 2) {
55 wl_output_send_done(resource);
56+ }
57 }
58
59 struct output *
60@@ -64,7 +71,8 @@ output_new(drmModeConnectorPtr connector)
61 goto error0;
62 }
63
64- output->global = wl_global_create(swc.display, &wl_output_interface, 3, output, &bind_output);
65+ output->global = wl_global_create(swc.display, &wl_output_interface, 3,
66+ output, &bind_output);
67
68 if (!output->global) {
69 ERROR("Failed to create output global\n");
70@@ -82,22 +90,27 @@ output_new(drmModeConnectorPtr connector)
71
72 output->connector = connector->connector_id;
73
74- if (connector->count_modes == 0)
75+ if (connector->count_modes == 0) {
76 goto error2;
77+ }
78
79- modes = wl_array_add(&output->modes, connector->count_modes * sizeof(*modes));
80- if (!modes)
81+ modes =
82+ wl_array_add(&output->modes, connector->count_modes * sizeof(*modes));
83+ if (!modes) {
84 goto error2;
85+ }
86
87 for (i = 0; i < connector->count_modes; ++i) {
88 mode_initialize(&modes[i], &connector->modes[i]);
89
90- if (modes[i].preferred)
91+ if (modes[i].preferred) {
92 output->preferred_mode = &modes[i];
93+ }
94 }
95
96- if (!output->preferred_mode)
97+ if (!output->preferred_mode) {
98 output->preferred_mode = &modes[0];
99+ }
100
101 return output;
102
+5,
-3
1@@ -1,8 +1,8 @@
2 #ifndef SWC_OUTPUT_H
3 #define SWC_OUTPUT_H
4
5-#include <stdint.h>
6 #include <pixman.h>
7+#include <stdint.h>
8 #include <wayland-util.h>
9 #include <xf86drmMode.h>
10
11@@ -27,7 +27,9 @@ struct output {
12 struct wl_list link;
13 };
14
15-struct output *output_new(drmModeConnector *connector);
16-void output_destroy(struct output *output);
17+struct output *
18+output_new(drmModeConnector *connector);
19+void
20+output_destroy(struct output *output);
21
22 #endif
+50,
-34
1@@ -32,9 +32,9 @@
2 #include "util.h"
3 #include "view.h"
4
5+#include "swc-server-protocol.h"
6 #include <assert.h>
7 #include <stdlib.h>
8-#include "swc-server-protocol.h"
9
10 struct panel {
11 struct wl_resource *resource;
12@@ -53,7 +53,8 @@ static void
13 update_position(struct panel *panel)
14 {
15 int32_t x, y;
16- struct swc_rectangle *screen = &panel->screen->base.geometry, *view = &panel->view->base.geometry;
17+ struct swc_rectangle *screen = &panel->screen->base.geometry,
18+ *view = &panel->view->base.geometry;
19
20 switch (panel->edge) {
21 case SWC_PANEL_EDGE_TOP:
22@@ -80,16 +81,18 @@ update_position(struct panel *panel)
23 }
24
25 static void
26-dock(struct wl_client *client, struct wl_resource *resource, uint32_t edge, struct wl_resource *screen_resource, uint32_t focus)
27+dock(struct wl_client *client, struct wl_resource *resource, uint32_t edge,
28+ struct wl_resource *screen_resource, uint32_t focus)
29 {
30 struct panel *panel = wl_resource_get_user_data(resource);
31 struct screen *screen;
32 uint32_t length;
33
34- if (screen_resource)
35+ if (screen_resource) {
36 screen = wl_resource_get_user_data(screen_resource);
37- else
38+ } else {
39 screen = wl_container_of(swc.screens.next, screen, link);
40+ }
41
42 switch (edge) {
43 case SWC_PANEL_EDGE_TOP:
44@@ -117,64 +120,69 @@ dock(struct wl_client *client, struct wl_resource *resource, uint32_t edge, stru
45 compositor_view_show(panel->view);
46 wl_list_insert(&screen->modifiers, &panel->modifier.link);
47
48- if (focus)
49+ if (focus) {
50 keyboard_set_focus(swc.seat->keyboard, panel->view);
51+ }
52
53 swc_panel_send_docked(resource, length);
54 }
55
56 static void
57-set_offset(struct wl_client *client, struct wl_resource *resource, uint32_t offset)
58+set_offset(struct wl_client *client, struct wl_resource *resource,
59+ uint32_t offset)
60 {
61 struct panel *panel = wl_resource_get_user_data(resource);
62
63 panel->offset = offset;
64- if (panel->docked)
65+ if (panel->docked) {
66 update_position(panel);
67+ }
68 }
69
70 static void
71-set_strut(struct wl_client *client, struct wl_resource *resource, uint32_t size, uint32_t begin, uint32_t end)
72+set_strut(struct wl_client *client, struct wl_resource *resource, uint32_t size,
73+ uint32_t begin, uint32_t end)
74 {
75 struct panel *panel = wl_resource_get_user_data(resource);
76
77 panel->strut_size = size;
78- if (panel->docked)
79+ if (panel->docked) {
80 screen_update_usable_geometry(panel->screen);
81+ }
82 }
83
84 static const struct swc_panel_interface panel_impl = {
85- .dock = dock,
86- .set_offset = set_offset,
87- .set_strut = set_strut,
88+ .dock = dock,
89+ .set_offset = set_offset,
90+ .set_strut = set_strut,
91 };
92
93 static void
94-handle_resize(struct view_handler *handler, uint32_t old_width, uint32_t old_height)
95+handle_resize(struct view_handler *handler, uint32_t old_width,
96+ uint32_t old_height)
97 {
98 struct panel *panel = wl_container_of(handler, panel, view_handler);
99 update_position(panel);
100 }
101
102 static const struct view_handler_impl view_handler_impl = {
103- .resize = handle_resize,
104+ .resize = handle_resize,
105 };
106
107 static void
108-modify(struct screen_modifier *modifier, const struct swc_rectangle *geom, pixman_region32_t *usable)
109+modify(struct screen_modifier *modifier, const struct swc_rectangle *geom,
110+ pixman_region32_t *usable)
111 {
112 struct panel *panel = wl_container_of(modifier, panel, modifier);
113- pixman_box32_t box = {
114- .x1 = geom->x,
115- .y1 = geom->y,
116- .x2 = geom->x + geom->width,
117- .y2 = geom->y + geom->height
118- };
119+ pixman_box32_t box = {.x1 = geom->x,
120+ .y1 = geom->y,
121+ .x2 = geom->x + geom->width,
122+ .y2 = geom->y + geom->height};
123
124 assert(panel->docked);
125
126- DEBUG("Original geometry { x1: %d, y1: %d, x2: %d, y2: %d }\n",
127- box.x1, box.y1, box.x2, box.y2);
128+ DEBUG("Original geometry { x1: %d, y1: %d, x2: %d, y2: %d }\n", box.x1,
129+ box.y1, box.x2, box.y2);
130
131 switch (panel->edge) {
132 case SWC_PANEL_EDGE_TOP:
133@@ -191,8 +199,8 @@ modify(struct screen_modifier *modifier, const struct swc_rectangle *geom, pixma
134 break;
135 }
136
137- DEBUG("Usable region { x1: %d, y1: %d, x2: %d, y2: %d }\n",
138- box.x1, box.y1, box.x2, box.y2);
139+ DEBUG("Usable region { x1: %d, y1: %d, x2: %d, y2: %d }\n", box.x1, box.y1,
140+ box.x2, box.y2);
141
142 pixman_region32_reset(usable, &box);
143 }
144@@ -214,29 +222,36 @@ destroy_panel(struct wl_resource *resource)
145 static void
146 handle_surface_destroy(struct wl_listener *listener, void *data)
147 {
148- struct panel *panel = wl_container_of(listener, panel, surface_destroy_listener);
149+ struct panel *panel =
150+ wl_container_of(listener, panel, surface_destroy_listener);
151 wl_resource_destroy(panel->resource);
152 }
153
154 struct panel *
155-panel_new(struct wl_client *client, uint32_t version, uint32_t id, struct surface *surface)
156+panel_new(struct wl_client *client, uint32_t version, uint32_t id,
157+ struct surface *surface)
158 {
159 struct panel *panel;
160
161 panel = malloc(sizeof(*panel));
162
163- if (!panel)
164+ if (!panel) {
165 goto error0;
166+ }
167
168- panel->resource = wl_resource_create(client, &swc_panel_interface, version, id);
169+ panel->resource =
170+ wl_resource_create(client, &swc_panel_interface, version, id);
171
172- if (!panel->resource)
173+ if (!panel->resource) {
174 goto error1;
175+ }
176
177- if (!(panel->view = compositor_create_view(surface)))
178+ if (!(panel->view = compositor_create_view(surface))) {
179 goto error2;
180+ }
181
182- wl_resource_set_implementation(panel->resource, &panel_impl, panel, &destroy_panel);
183+ wl_resource_set_implementation(panel->resource, &panel_impl, panel,
184+ &destroy_panel);
185 panel->surface_destroy_listener.notify = &handle_surface_destroy;
186 panel->view_handler.impl = &view_handler_impl;
187 panel->modifier.modify = &modify;
188@@ -245,7 +260,8 @@ panel_new(struct wl_client *client, uint32_t version, uint32_t id, struct surfac
189 panel->strut_size = 0;
190 panel->docked = false;
191 wl_list_insert(&panel->view->base.handlers, &panel->view_handler.link);
192- wl_resource_add_destroy_listener(surface->resource, &panel->surface_destroy_listener);
193+ wl_resource_add_destroy_listener(surface->resource,
194+ &panel->surface_destroy_listener);
195
196 return panel;
197
+3,
-1
1@@ -29,6 +29,8 @@
2 struct surface;
3 struct wl_client;
4
5-struct panel *panel_new(struct wl_client *client, uint32_t version, uint32_t id, struct surface *surface);
6+struct panel *
7+panel_new(struct wl_client *client, uint32_t version, uint32_t id,
8+ struct surface *surface);
9
10 #endif
+12,
-7
1@@ -25,28 +25,32 @@
2 #include "internal.h"
3 #include "panel.h"
4
5-#include <wayland-server.h>
6 #include "swc-server-protocol.h"
7+#include <wayland-server.h>
8
9 static void
10-create_panel(struct wl_client *client, struct wl_resource *resource, uint32_t id, struct wl_resource *surface_resource)
11+create_panel(struct wl_client *client, struct wl_resource *resource,
12+ uint32_t id, struct wl_resource *surface_resource)
13 {
14 struct surface *surface = wl_resource_get_user_data(surface_resource);
15
16- if (!panel_new(client, wl_resource_get_version(resource), id, surface))
17+ if (!panel_new(client, wl_resource_get_version(resource), id, surface)) {
18 wl_client_post_no_memory(client);
19+ }
20 }
21
22 static const struct swc_panel_manager_interface panel_manager_impl = {
23- .create_panel = create_panel,
24+ .create_panel = create_panel,
25 };
26
27 static void
28-bind_panel_manager(struct wl_client *client, void *data, uint32_t version, uint32_t id)
29+bind_panel_manager(struct wl_client *client, void *data, uint32_t version,
30+ uint32_t id)
31 {
32 struct wl_resource *resource;
33
34- resource = wl_resource_create(client, &swc_panel_manager_interface, version, id);
35+ resource =
36+ wl_resource_create(client, &swc_panel_manager_interface, version, id);
37 if (!resource) {
38 wl_client_post_no_memory(client);
39 return;
40@@ -57,5 +61,6 @@ bind_panel_manager(struct wl_client *client, void *data, uint32_t version, uint3
41 struct wl_global *
42 panel_manager_create(struct wl_display *display)
43 {
44- return wl_global_create(display, &swc_panel_manager_interface, 1, NULL, &bind_panel_manager);
45+ return wl_global_create(display, &swc_panel_manager_interface, 1, NULL,
46+ &bind_panel_manager);
47 }
+2,
-1
1@@ -26,6 +26,7 @@
2
3 struct wl_display;
4
5-struct wl_global *panel_manager_create(struct wl_display *display);
6+struct wl_global *
7+panel_manager_create(struct wl_display *display);
8
9 #endif
+24,
-22
1@@ -22,16 +22,16 @@
2 */
3
4 #include "plane.h"
5-#include "event.h"
6 #include "drm.h"
7+#include "event.h"
8 #include "internal.h"
9 #include "screen.h"
10 #include "util.h"
11
12 #include <errno.h>
13 #include <stdlib.h>
14-#include <wld/wld.h>
15 #include <wld/drm.h>
16+#include <wld/wld.h>
17 #include <xf86drmMode.h>
18
19 enum plane_property {
20@@ -54,13 +54,16 @@ update(struct view *view)
21 struct plane *plane = wl_container_of(view, plane, view);
22 uint32_t x, y, w, h;
23
24- if (!plane->screen)
25+ if (!plane->screen) {
26 return false;
27+ }
28 x = view->geometry.x - plane->screen->base.geometry.x;
29 y = view->geometry.y - plane->screen->base.geometry.y;
30 w = view->geometry.width;
31 h = view->geometry.height;
32- if (swc.active && drmModeSetPlane(swc.drm->fd, plane->id, plane->screen->crtc, plane->fb, 0, x, y, w, h, 0, 0, w << 16, h << 16) < 0) {
33+ if (swc.active &&
34+ drmModeSetPlane(swc.drm->fd, plane->id, plane->screen->crtc, plane->fb,
35+ 0, x, y, w, h, 0, 0, w << 16, h << 16) < 0) {
36 ERROR("Could not set cursor: %s\n", strerror(errno));
37 return false;
38 }
39@@ -86,32 +89,28 @@ move(struct view *view, int32_t x, int32_t y)
40 }
41
42 static const struct view_impl view_impl = {
43- .update = update,
44- .attach = attach,
45- .move = move,
46+ .update = update,
47+ .attach = attach,
48+ .move = move,
49 };
50
51 static enum plane_property
52 find_prop(const char *name)
53 {
54 static const char property_names[][16] = {
55- [PLANE_TYPE] = "type",
56- [PLANE_IN_FENCE_FD] = "IN_FENCE_FD",
57- [PLANE_CRTC_ID] = "CRTC_ID",
58- [PLANE_CRTC_X] = "CRTC_X",
59- [PLANE_CRTC_Y] = "CRTC_Y",
60- [PLANE_CRTC_W] = "CRTC_W",
61- [PLANE_CRTC_H] = "CRTC_H",
62- [PLANE_SRC_X] = "SRC_X",
63- [PLANE_SRC_Y] = "SRC_Y",
64- [PLANE_SRC_W] = "SRC_W",
65- [PLANE_SRC_H] = "SRC_H",
66+ [PLANE_TYPE] = "type", [PLANE_IN_FENCE_FD] = "IN_FENCE_FD",
67+ [PLANE_CRTC_ID] = "CRTC_ID", [PLANE_CRTC_X] = "CRTC_X",
68+ [PLANE_CRTC_Y] = "CRTC_Y", [PLANE_CRTC_W] = "CRTC_W",
69+ [PLANE_CRTC_H] = "CRTC_H", [PLANE_SRC_X] = "SRC_X",
70+ [PLANE_SRC_Y] = "SRC_Y", [PLANE_SRC_W] = "SRC_W",
71+ [PLANE_SRC_H] = "SRC_H",
72 };
73 size_t i;
74
75 for (i = 0; i < ARRAY_LENGTH(property_names); ++i) {
76- if (strcmp(name, property_names[i]) == 0)
77+ if (strcmp(name, property_names[i]) == 0) {
78 return i;
79+ }
80 }
81 return -1;
82 }
83@@ -139,11 +138,13 @@ plane_new(uint32_t id)
84 drmModePlane *drm_plane;
85
86 plane = malloc(sizeof(*plane));
87- if (!plane)
88+ if (!plane) {
89 goto error0;
90+ }
91 drm_plane = drmModeGetPlane(swc.drm->fd, id);
92- if (!drm_plane)
93+ if (!drm_plane) {
94 goto error1;
95+ }
96 plane->id = id;
97 plane->fb = 0;
98 plane->screen = NULL;
99@@ -153,8 +154,9 @@ plane_new(uint32_t id)
100 props = drmModeObjectGetProperties(swc.drm->fd, id, DRM_MODE_OBJECT_PLANE);
101 for (i = 0; i < props->count_props; ++i, drmModeFreeProperty(prop)) {
102 prop = drmModeGetProperty(swc.drm->fd, props->props[i]);
103- if (prop && find_prop(prop->name) == PLANE_TYPE)
104+ if (prop && find_prop(prop->name) == PLANE_TYPE) {
105 plane->type = props->prop_values[i];
106+ }
107 }
108 plane->swc_listener.notify = &handle_swc_event;
109 wl_signal_add(&swc.event_signal, &plane->swc_listener);
+4,
-2
1@@ -39,7 +39,9 @@ struct plane {
2 struct wl_list link;
3 };
4
5-struct plane *plane_new(uint32_t id);
6-void plane_destroy(struct plane *plane);
7+struct plane *
8+plane_new(uint32_t id);
9+void
10+plane_destroy(struct plane *plane);
11
12 #endif
+193,
-105
1@@ -23,6 +23,7 @@
2
3 #include "pointer.h"
4 #include "compositor.h"
5+#include "cursor/cursor_data.h"
6 #include "event.h"
7 #include "internal.h"
8 #include "plane.h"
9@@ -31,7 +32,6 @@
10 #include "shm.h"
11 #include "surface.h"
12 #include "util.h"
13-#include "cursor/cursor_data.h"
14
15 #include <assert.h>
16 #include <stdio.h>
17@@ -55,15 +55,19 @@ swc_pointer_send_button(uint32_t time, uint32_t button, uint32_t state)
18 struct wl_resource *resource;
19 uint32_t serial;
20
21- if (!pointer || wl_list_empty(&pointer->focus.active))
22+ if (!pointer || wl_list_empty(&pointer->focus.active)) {
23 return;
24+ }
25
26 serial = wl_display_next_serial(swc.display);
27- wl_resource_for_each (resource, &pointer->focus.active)
28- wl_pointer_send_button(resource, serial, time, button, state);
29- wl_resource_for_each (resource, &pointer->focus.active) {
30- if (wl_resource_get_version(resource) >= WL_POINTER_FRAME_SINCE_VERSION)
31+ wl_resource_for_each(resource, &pointer->focus.active)
32+ wl_pointer_send_button(resource, serial, time, button, state);
33+ wl_resource_for_each(resource, &pointer->focus.active)
34+ {
35+ if (wl_resource_get_version(resource) >=
36+ WL_POINTER_FRAME_SINCE_VERSION) {
37 wl_pointer_send_frame(resource);
38+ }
39 }
40 pointer->client_axis_source = -1;
41 }
42@@ -75,38 +79,47 @@ swc_pointer_send_axis(uint32_t time, uint32_t axis, int32_t value120)
43 struct wl_resource *resource;
44 wl_fixed_t value;
45
46- if (!pointer || wl_list_empty(&pointer->focus.active))
47+ if (!pointer || wl_list_empty(&pointer->focus.active)) {
48 return;
49+ }
50
51 value = wl_fixed_from_double((double)value120 / 120.0);
52
53- wl_resource_for_each (resource, &pointer->focus.active) {
54+ wl_resource_for_each(resource, &pointer->focus.active)
55+ {
56 int ver = wl_resource_get_version(resource);
57
58- if (ver >= WL_POINTER_AXIS_SOURCE_SINCE_VERSION)
59+ if (ver >= WL_POINTER_AXIS_SOURCE_SINCE_VERSION) {
60 wl_pointer_send_axis_source(resource, WL_POINTER_AXIS_SOURCE_WHEEL);
61+ }
62 if (value120) {
63- if (ver >= WL_POINTER_AXIS_VALUE120_SINCE_VERSION)
64+ if (ver >= WL_POINTER_AXIS_VALUE120_SINCE_VERSION) {
65 wl_pointer_send_axis_value120(resource, axis, value120);
66- else if (ver >= WL_POINTER_AXIS_DISCRETE_SINCE_VERSION)
67+ } else if (ver >= WL_POINTER_AXIS_DISCRETE_SINCE_VERSION) {
68 wl_pointer_send_axis_discrete(resource, axis, value120 / 120);
69+ }
70 }
71
72- if (value)
73+ if (value) {
74 wl_pointer_send_axis(resource, time, axis, value);
75- else if (ver >= WL_POINTER_AXIS_STOP_SINCE_VERSION)
76+ } else if (ver >= WL_POINTER_AXIS_STOP_SINCE_VERSION) {
77 wl_pointer_send_axis_stop(resource, time, axis);
78+ }
79 }
80
81- wl_resource_for_each (resource, &pointer->focus.active) {
82- if (wl_resource_get_version(resource) >= WL_POINTER_FRAME_SINCE_VERSION)
83+ wl_resource_for_each(resource, &pointer->focus.active)
84+ {
85+ if (wl_resource_get_version(resource) >=
86+ WL_POINTER_FRAME_SINCE_VERSION) {
87 wl_pointer_send_frame(resource);
88+ }
89 }
90 pointer->client_axis_source = -1;
91 }
92
93 static void
94-enter(struct input_focus_handler *handler, struct wl_list *resources, struct compositor_view *view)
95+enter(struct input_focus_handler *handler, struct wl_list *resources,
96+ struct compositor_view *view)
97 {
98 struct pointer *pointer = wl_container_of(handler, pointer, focus_handler);
99 struct wl_resource *resource;
100@@ -124,25 +137,27 @@ enter(struct input_focus_handler *handler, struct wl_list *resources, struct com
101 origin_y = view->base.geometry.y - view->buffer_offset_y;
102 surface_x = pointer->x - wl_fixed_from_int(origin_x);
103 surface_y = pointer->y - wl_fixed_from_int(origin_y);
104- wl_resource_for_each (resource, resources)
105- wl_pointer_send_enter(resource, serial, view->surface->resource, surface_x, surface_y);
106+ wl_resource_for_each(resource, resources) wl_pointer_send_enter(
107+ resource, serial, view->surface->resource, surface_x, surface_y);
108 }
109
110 static void
111-leave(struct input_focus_handler *handler, struct wl_list *resources, struct compositor_view *view)
112+leave(struct input_focus_handler *handler, struct wl_list *resources,
113+ struct compositor_view *view)
114 {
115 struct wl_resource *resource;
116 uint32_t serial;
117
118 serial = wl_display_next_serial(swc.display);
119- wl_resource_for_each (resource, resources)
120- wl_pointer_send_leave(resource, serial, view->surface->resource);
121+ wl_resource_for_each(resource, resources)
122+ wl_pointer_send_leave(resource, serial, view->surface->resource);
123 }
124
125 static void
126 handle_cursor_surface_destroy(struct wl_listener *listener, void *data)
127 {
128- struct pointer *pointer = wl_container_of(listener, pointer, cursor.destroy_listener);
129+ struct pointer *pointer =
130+ wl_container_of(listener, pointer, cursor.destroy_listener);
131
132 view_attach(&pointer->cursor.view, NULL);
133 pointer->cursor.surface = NULL;
134@@ -162,27 +177,36 @@ attach(struct view *view, struct wld_buffer *buffer)
135 struct surface *surface = pointer->cursor.surface;
136 struct screen *screen;
137
138- if (surface && !pixman_region32_not_empty(&surface->state.damage))
139+ if (surface && !pixman_region32_not_empty(&surface->state.damage)) {
140 return 0;
141+ }
142
143 wld_set_target_buffer(swc.shm->renderer, pointer->cursor.buffer);
144- wld_fill_rectangle(swc.shm->renderer, 0x00000000, 0, 0, pointer->cursor.buffer->width, pointer->cursor.buffer->height);
145+ wld_fill_rectangle(swc.shm->renderer, 0x00000000, 0, 0,
146+ pointer->cursor.buffer->width,
147+ pointer->cursor.buffer->height);
148
149- if (buffer)
150- wld_copy_rectangle(swc.shm->renderer, buffer, 0, 0, 0, 0, buffer->width, buffer->height);
151+ if (buffer) {
152+ wld_copy_rectangle(swc.shm->renderer, buffer, 0, 0, 0, 0, buffer->width,
153+ buffer->height);
154+ }
155
156 wld_flush(swc.shm->renderer);
157
158- if (surface)
159+ if (surface) {
160 pixman_region32_clear(&surface->state.damage);
161+ }
162
163 /* TODO: Send an early release to the buffer */
164
165- if (view_set_size_from_buffer(view, buffer))
166+ if (view_set_size_from_buffer(view, buffer)) {
167 view_update_screens(view);
168+ }
169
170- wl_list_for_each (screen, &swc.screens, link) {
171- view_attach(&screen->planes.cursor->view, buffer ? pointer->cursor.buffer : NULL);
172+ wl_list_for_each(screen, &swc.screens, link)
173+ {
174+ view_attach(&screen->planes.cursor->view,
175+ buffer ? pointer->cursor.buffer : NULL);
176 view_update(&screen->planes.cursor->view);
177 }
178
179@@ -194,11 +218,14 @@ move(struct view *view, int32_t x, int32_t y)
180 {
181 struct screen *screen;
182
183- if (view_set_position(view, x, y))
184+ if (view_set_position(view, x, y)) {
185 view_update_screens(view);
186+ }
187
188- wl_list_for_each (screen, &swc.screens, link) {
189- view_move(&screen->planes.cursor->view, view->geometry.x, view->geometry.y);
190+ wl_list_for_each(screen, &swc.screens, link)
191+ {
192+ view_move(&screen->planes.cursor->view, view->geometry.x,
193+ view->geometry.y);
194 view_update(&screen->planes.cursor->view);
195 }
196
197@@ -206,9 +233,9 @@ move(struct view *view, int32_t x, int32_t y)
198 }
199
200 static const struct view_impl view_impl = {
201- .update = update,
202- .attach = attach,
203- .move = move,
204+ .update = update,
205+ .attach = attach,
206+ .move = move,
207 };
208
209 static inline void
210@@ -223,8 +250,9 @@ update_cursor(struct pointer *pointer)
211 static void
212 drop_client_cursor_surface(struct pointer *pointer)
213 {
214- if (!pointer || !pointer->cursor.surface)
215+ if (!pointer || !pointer->cursor.surface) {
216 return;
217+ }
218 surface_set_view(pointer->cursor.surface, NULL);
219 wl_list_remove(&pointer->cursor.destroy_listener.link);
220 pointer->cursor.surface = NULL;
221@@ -233,8 +261,9 @@ drop_client_cursor_surface(struct pointer *pointer)
222 static void
223 apply_cursor_override(struct pointer *pointer)
224 {
225- if (!pointer || pointer->cursor.surface)
226+ if (!pointer || pointer->cursor.surface) {
227 return;
228+ }
229
230 pointer_set_cursor(pointer, cursor_left_ptr);
231 }
232@@ -257,23 +286,25 @@ swc_set_cursor_mode(enum swc_cursor_mode mode)
233 struct pointer *pointer = swc.seat ? swc.seat->pointer : NULL;
234
235 cursor_mode = mode;
236- if (cursor_mode == SWC_CURSOR_MODE_COMPOSITOR)
237+ if (cursor_mode == SWC_CURSOR_MODE_COMPOSITOR) {
238 drop_client_cursor_surface(pointer);
239+ }
240 apply_cursor_override(pointer);
241 }
242
243 EXPORT void
244-swc_set_cursor_image(enum swc_cursor_kind kind,
245- const uint32_t *argb8888,
246- uint32_t width, uint32_t height,
247- int32_t hotspot_x, int32_t hotspot_y)
248+swc_set_cursor_image(enum swc_cursor_kind kind, const uint32_t *argb8888,
249+ uint32_t width, uint32_t height, int32_t hotspot_x,
250+ int32_t hotspot_y)
251 {
252 struct pointer *pointer = swc.seat ? swc.seat->pointer : NULL;
253
254- if (kind < 0 || kind >= (int)ARRAY_LENGTH(cursor_images))
255+ if (kind < 0 || kind >= (int)ARRAY_LENGTH(cursor_images)) {
256 return;
257- if (!argb8888 || width == 0 || height == 0)
258+ }
259+ if (!argb8888 || width == 0 || height == 0) {
260 return;
261+ }
262
263 cursor_images[kind].data = argb8888;
264 cursor_images[kind].width = width;
265@@ -282,8 +313,9 @@ swc_set_cursor_image(enum swc_cursor_kind kind,
266 cursor_images[kind].hotspot_y = hotspot_y;
267 cursor_images[kind].active = true;
268
269- if (cursor_mode == SWC_CURSOR_MODE_COMPOSITOR)
270+ if (cursor_mode == SWC_CURSOR_MODE_COMPOSITOR) {
271 drop_client_cursor_surface(pointer);
272+ }
273 apply_cursor_override(pointer);
274 }
275
276@@ -292,8 +324,9 @@ swc_clear_cursor_image(enum swc_cursor_kind kind)
277 {
278 struct pointer *pointer = swc.seat ? swc.seat->pointer : NULL;
279
280- if (kind < 0 || kind >= (int)ARRAY_LENGTH(cursor_images))
281+ if (kind < 0 || kind >= (int)ARRAY_LENGTH(cursor_images)) {
282 return;
283+ }
284
285 cursor_images[kind].active = false;
286 cursor_images[kind].data = NULL;
287@@ -306,13 +339,14 @@ pointer_set_cursor(struct pointer *pointer, uint32_t id)
288 {
289 struct cursor *cursor = &cursor_metadata[id];
290 const uint32_t *data = cursor_data;
291- union wld_object object = { .ptr = &cursor_data[cursor->offset] };
292+ union wld_object object = {.ptr = &cursor_data[cursor->offset]};
293 struct wld_buffer *buffer;
294
295 if (id == cursor_left_ptr) {
296 enum swc_cursor_kind kind = cursor_override;
297- if (kind < 0 || kind >= (int)ARRAY_LENGTH(cursor_images))
298+ if (kind < 0 || kind >= (int)ARRAY_LENGTH(cursor_images)) {
299 kind = SWC_CURSOR_DEFAULT;
300+ }
301
302 if (cursor_images[kind].active) {
303 static struct cursor custom_cursor;
304@@ -328,8 +362,9 @@ pointer_set_cursor(struct pointer *pointer, uint32_t id)
305 }
306 }
307
308- if (pointer->cursor.internal_buffer)
309+ if (pointer->cursor.internal_buffer) {
310 wld_buffer_unreference(pointer->cursor.internal_buffer);
311+ }
312 if (pointer->cursor.surface) {
313 surface_set_view(pointer->cursor.surface, NULL);
314 wl_list_remove(&pointer->cursor.destroy_listener.link);
315@@ -337,9 +372,11 @@ pointer_set_cursor(struct pointer *pointer, uint32_t id)
316 }
317
318 buffer = wld_import_buffer(swc.shm->context, WLD_OBJECT_DATA, object,
319- cursor->width, cursor->height, WLD_FORMAT_ARGB8888, cursor->width * 4);
320- if (!buffer)
321+ cursor->width, cursor->height,
322+ WLD_FORMAT_ARGB8888, cursor->width * 4);
323+ if (!buffer) {
324 WARNING("Failed to create cursor buffer\n");
325+ }
326 pointer->cursor.internal_buffer = buffer;
327 pointer->cursor.hotspot.x = cursor->hotspot_x;
328 pointer->cursor.hotspot.y = cursor->hotspot_y;
329@@ -348,48 +385,59 @@ pointer_set_cursor(struct pointer *pointer, uint32_t id)
330 }
331
332 static bool
333-client_handle_motion(struct pointer_handler *handler, uint32_t time, wl_fixed_t x, wl_fixed_t y)
334+client_handle_motion(struct pointer_handler *handler, uint32_t time,
335+ wl_fixed_t x, wl_fixed_t y)
336 {
337 struct pointer *pointer = wl_container_of(handler, pointer, client_handler);
338 struct wl_resource *resource;
339 wl_fixed_t sx, sy;
340 int32_t origin_x, origin_y;
341
342- if (wl_list_empty(&pointer->focus.active))
343+ if (wl_list_empty(&pointer->focus.active)) {
344 return false;
345+ }
346
347- origin_x = pointer->focus.view->base.geometry.x - pointer->focus.view->buffer_offset_x;
348- origin_y = pointer->focus.view->base.geometry.y - pointer->focus.view->buffer_offset_y;
349+ origin_x = pointer->focus.view->base.geometry.x -
350+ pointer->focus.view->buffer_offset_x;
351+ origin_y = pointer->focus.view->base.geometry.y -
352+ pointer->focus.view->buffer_offset_y;
353 sx = x - wl_fixed_from_int(origin_x);
354 sy = y - wl_fixed_from_int(origin_y);
355- wl_resource_for_each (resource, &pointer->focus.active)
356- wl_pointer_send_motion(resource, time, sx, sy);
357+ wl_resource_for_each(resource, &pointer->focus.active)
358+ wl_pointer_send_motion(resource, time, sx, sy);
359 return true;
360 }
361
362 static bool
363-client_handle_button(struct pointer_handler *handler, uint32_t time, struct button *button, uint32_t state)
364+client_handle_button(struct pointer_handler *handler, uint32_t time,
365+ struct button *button, uint32_t state)
366 {
367 struct pointer *pointer = wl_container_of(handler, pointer, client_handler);
368 struct wl_resource *resource;
369
370- if (wl_list_empty(&pointer->focus.active))
371+ if (wl_list_empty(&pointer->focus.active)) {
372 return false;
373+ }
374
375- wl_resource_for_each (resource, &pointer->focus.active)
376- wl_pointer_send_button(resource, button->press.serial, time, button->press.value, state);
377+ wl_resource_for_each(resource, &pointer->focus.active)
378+ wl_pointer_send_button(resource, button->press.serial, time,
379+ button->press.value, state);
380 return true;
381 }
382
383 static bool
384-client_handle_axis(struct pointer_handler *handler, uint32_t time, enum wl_pointer_axis axis, enum wl_pointer_axis_source source, wl_fixed_t value, int value120)
385+client_handle_axis(struct pointer_handler *handler, uint32_t time,
386+ enum wl_pointer_axis axis,
387+ enum wl_pointer_axis_source source, wl_fixed_t value,
388+ int value120)
389 {
390 struct pointer *pointer = wl_container_of(handler, pointer, client_handler);
391 struct wl_resource *resource;
392 int ver;
393
394- if (wl_list_empty(&pointer->focus.active))
395+ if (wl_list_empty(&pointer->focus.active)) {
396 return false;
397+ }
398
399 if (pointer->client_axis_source != -1) {
400 assert(pointer->client_axis_source == source);
401@@ -398,20 +446,24 @@ client_handle_axis(struct pointer_handler *handler, uint32_t time, enum wl_point
402 pointer->client_axis_source = source;
403 }
404
405- wl_resource_for_each (resource, &pointer->focus.active) {
406+ wl_resource_for_each(resource, &pointer->focus.active)
407+ {
408 ver = wl_resource_get_version(resource);
409- if (source != -1 && ver >= WL_POINTER_AXIS_SOURCE_SINCE_VERSION)
410+ if (source != -1 && ver >= WL_POINTER_AXIS_SOURCE_SINCE_VERSION) {
411 wl_pointer_send_axis_source(resource, source);
412+ }
413 if (value120) {
414- if (ver >= WL_POINTER_AXIS_VALUE120_SINCE_VERSION)
415+ if (ver >= WL_POINTER_AXIS_VALUE120_SINCE_VERSION) {
416 wl_pointer_send_axis_value120(resource, axis, value120);
417- else if (ver >= WL_POINTER_AXIS_DISCRETE_SINCE_VERSION)
418+ } else if (ver >= WL_POINTER_AXIS_DISCRETE_SINCE_VERSION) {
419 wl_pointer_send_axis_discrete(resource, axis, value120 / 120);
420+ }
421 }
422- if (value)
423+ if (value) {
424 wl_pointer_send_axis(resource, time, axis, value);
425- else if (ver >= WL_POINTER_AXIS_STOP_SINCE_VERSION)
426+ } else if (ver >= WL_POINTER_AXIS_STOP_SINCE_VERSION) {
427 wl_pointer_send_axis_stop(resource, time, axis);
428+ }
429 }
430 return true;
431 }
432@@ -422,9 +474,12 @@ client_handle_frame(struct pointer_handler *handler)
433 struct pointer *pointer = wl_container_of(handler, pointer, client_handler);
434 struct wl_resource *resource;
435
436- wl_resource_for_each (resource, &pointer->focus.active) {
437- if (wl_resource_get_version(resource) >= WL_POINTER_FRAME_SINCE_VERSION)
438+ wl_resource_for_each(resource, &pointer->focus.active)
439+ {
440+ if (wl_resource_get_version(resource) >=
441+ WL_POINTER_FRAME_SINCE_VERSION) {
442 wl_pointer_send_frame(resource);
443+ }
444 }
445 pointer->client_axis_source = -1;
446 }
447@@ -454,16 +509,19 @@ pointer_initialize(struct pointer *pointer)
448 view_initialize(&pointer->cursor.view, &view_impl);
449 pointer->cursor.surface = NULL;
450 pointer->cursor.destroy_listener.notify = &handle_cursor_surface_destroy;
451- pointer->cursor.buffer = wld_create_buffer(swc.drm->context, swc.drm->cursor_w, swc.drm->cursor_h, WLD_FORMAT_ARGB8888, WLD_FLAG_MAP | WLD_FLAG_CURSOR);
452+ pointer->cursor.buffer = wld_create_buffer(
453+ swc.drm->context, swc.drm->cursor_w, swc.drm->cursor_h,
454+ WLD_FORMAT_ARGB8888, WLD_FLAG_MAP | WLD_FLAG_CURSOR);
455 pointer->cursor.internal_buffer = NULL;
456
457- if (!pointer->cursor.buffer)
458+ if (!pointer->cursor.buffer) {
459 return false;
460+ }
461
462 pointer_set_cursor(pointer, cursor_left_ptr);
463
464- wl_list_for_each (screen, &swc.screens, link)
465- view_attach(&screen->planes.cursor->view, pointer->cursor.buffer);
466+ wl_list_for_each(screen, &swc.screens, link)
467+ view_attach(&screen->planes.cursor->view, pointer->cursor.buffer);
468
469 input_focus_initialize(&pointer->focus, &pointer->focus_handler);
470 pixman_region32_init(&pointer->region);
471@@ -496,7 +554,8 @@ clip_position(struct pointer *pointer, wl_fixed_t fx, wl_fixed_t fy)
472 last_y = wl_fixed_to_int(pointer->y);
473
474 if (!pixman_region32_contains_point(&pointer->region, x, y, NULL)) {
475- if (!pixman_region32_contains_point(&pointer->region, last_x, last_y, &box)) {
476+ if (!pixman_region32_contains_point(&pointer->region, last_x, last_y,
477+ &box)) {
478 WARNING("cursor is not in the visible screen area\n");
479 pointer->x = 0;
480 pointer->y = 0;
481@@ -521,40 +580,46 @@ pointer_set_region(struct pointer *pointer, pixman_region32_t *region)
482
483 static void
484 set_cursor(struct wl_client *client, struct wl_resource *resource,
485- uint32_t serial, struct wl_resource *surface_resource, int32_t hotspot_x, int32_t hotspot_y)
486+ uint32_t serial, struct wl_resource *surface_resource,
487+ int32_t hotspot_x, int32_t hotspot_y)
488 {
489 struct pointer *pointer = wl_resource_get_user_data(resource);
490 struct surface *surface;
491
492 (void)serial;
493
494- if (client != pointer->focus.client)
495+ if (client != pointer->focus.client) {
496 return;
497+ }
498
499 /* If forcing compositor cursor, ignore client cursor surfaces. */
500- if (cursor_mode == SWC_CURSOR_MODE_COMPOSITOR || cursor_override != SWC_CURSOR_DEFAULT)
501+ if (cursor_mode == SWC_CURSOR_MODE_COMPOSITOR ||
502+ cursor_override != SWC_CURSOR_DEFAULT) {
503 return;
504+ }
505
506 if (pointer->cursor.surface) {
507 surface_set_view(pointer->cursor.surface, NULL);
508 wl_list_remove(&pointer->cursor.destroy_listener.link);
509 }
510
511- surface = surface_resource ? wl_resource_get_user_data(surface_resource) : NULL;
512+ surface =
513+ surface_resource ? wl_resource_get_user_data(surface_resource) : NULL;
514 pointer->cursor.surface = surface;
515 pointer->cursor.hotspot.x = hotspot_x;
516 pointer->cursor.hotspot.y = hotspot_y;
517
518 if (surface) {
519 surface_set_view(surface, &pointer->cursor.view);
520- wl_resource_add_destroy_listener(surface->resource, &pointer->cursor.destroy_listener);
521+ wl_resource_add_destroy_listener(surface->resource,
522+ &pointer->cursor.destroy_listener);
523 update_cursor(pointer);
524 }
525 }
526
527 static const struct wl_pointer_interface pointer_impl = {
528- .set_cursor = set_cursor,
529- .release = destroy_resource,
530+ .set_cursor = set_cursor,
531+ .release = destroy_resource,
532 };
533
534 static void
535@@ -565,14 +630,18 @@ unbind(struct wl_resource *resource)
536 }
537
538 struct wl_resource *
539-pointer_bind(struct pointer *pointer, struct wl_client *client, uint32_t version, uint32_t id)
540+pointer_bind(struct pointer *pointer, struct wl_client *client,
541+ uint32_t version, uint32_t id)
542 {
543 struct wl_resource *client_resource;
544
545- client_resource = wl_resource_create(client, &wl_pointer_interface, version, id);
546- if (!client_resource)
547+ client_resource =
548+ wl_resource_create(client, &wl_pointer_interface, version, id);
549+ if (!client_resource) {
550 return NULL;
551- wl_resource_set_implementation(client_resource, &pointer_impl, pointer, &unbind);
552+ }
553+ wl_resource_set_implementation(client_resource, &pointer_impl, pointer,
554+ &unbind);
555 input_focus_add_resource(&pointer->focus, client_resource);
556
557 return client_resource;
558@@ -583,16 +652,19 @@ pointer_get_button(struct pointer *pointer, uint32_t serial)
559 {
560 struct button *button;
561
562- wl_array_for_each (button, &pointer->buttons) {
563- if (button->press.serial == serial)
564+ wl_array_for_each(button, &pointer->buttons)
565+ {
566+ if (button->press.serial == serial) {
567 return button;
568+ }
569 }
570
571 return NULL;
572 }
573
574 void
575-pointer_handle_button(struct pointer *pointer, uint32_t time, uint32_t value, uint32_t state)
576+pointer_handle_button(struct pointer *pointer, uint32_t time, uint32_t value,
577+ uint32_t state)
578 {
579 struct pointer_handler *handler;
580 struct button *button;
581@@ -601,11 +673,13 @@ pointer_handle_button(struct pointer *pointer, uint32_t time, uint32_t value, ui
582 serial = wl_display_next_serial(swc.display);
583
584 if (state == WL_POINTER_BUTTON_STATE_RELEASED) {
585- wl_array_for_each (button, &pointer->buttons) {
586+ wl_array_for_each(button, &pointer->buttons)
587+ {
588 if (button->press.value == value) {
589 if (button->handler) {
590 button->press.serial = serial;
591- button->handler->button(button->handler, time, button, state);
592+ button->handler->button(button->handler, time, button,
593+ state);
594 button->handler->pending = true;
595 }
596
597@@ -616,15 +690,18 @@ pointer_handle_button(struct pointer *pointer, uint32_t time, uint32_t value, ui
598 } else {
599 button = wl_array_add(&pointer->buttons, sizeof(*button));
600
601- if (!button)
602+ if (!button) {
603 return;
604+ }
605
606 button->press.value = value;
607 button->press.serial = serial;
608 button->handler = NULL;
609
610- wl_list_for_each (handler, &pointer->handlers, link) {
611- if (handler->button && handler->button(handler, time, button, state)) {
612+ wl_list_for_each(handler, &pointer->handlers, link)
613+ {
614+ if (handler->button &&
615+ handler->button(handler, time, button, state)) {
616 button->handler = handler;
617 handler->pending = true;
618 break;
619@@ -634,12 +711,17 @@ pointer_handle_button(struct pointer *pointer, uint32_t time, uint32_t value, ui
620 }
621
622 void
623-pointer_handle_axis(struct pointer *pointer, uint32_t time, enum wl_pointer_axis axis, enum wl_pointer_axis_source source, wl_fixed_t value, int value120)
624+pointer_handle_axis(struct pointer *pointer, uint32_t time,
625+ enum wl_pointer_axis axis,
626+ enum wl_pointer_axis_source source, wl_fixed_t value,
627+ int value120)
628 {
629 struct pointer_handler *handler;
630
631- wl_list_for_each (handler, &pointer->handlers, link) {
632- if (handler->axis && handler->axis(handler, time, axis, source, value, value120)) {
633+ wl_list_for_each(handler, &pointer->handlers, link)
634+ {
635+ if (handler->axis &&
636+ handler->axis(handler, time, axis, source, value, value120)) {
637 handler->pending = true;
638 break;
639 }
640@@ -647,20 +729,25 @@ pointer_handle_axis(struct pointer *pointer, uint32_t time, enum wl_pointer_axis
641 }
642
643 void
644-pointer_handle_relative_motion(struct pointer *pointer, uint32_t time, wl_fixed_t dx, wl_fixed_t dy)
645+pointer_handle_relative_motion(struct pointer *pointer, uint32_t time,
646+ wl_fixed_t dx, wl_fixed_t dy)
647 {
648- pointer_handle_absolute_motion(pointer, time, pointer->x + dx, pointer->y + dy);
649+ pointer_handle_absolute_motion(pointer, time, pointer->x + dx,
650+ pointer->y + dy);
651 }
652
653 void
654-pointer_handle_absolute_motion(struct pointer *pointer, uint32_t time, wl_fixed_t x, wl_fixed_t y)
655+pointer_handle_absolute_motion(struct pointer *pointer, uint32_t time,
656+ wl_fixed_t x, wl_fixed_t y)
657 {
658 struct pointer_handler *handler;
659
660 clip_position(pointer, x, y);
661
662- wl_list_for_each (handler, &pointer->handlers, link) {
663- if (handler->motion && handler->motion(handler, time, pointer->x, pointer->y)) {
664+ wl_list_for_each(handler, &pointer->handlers, link)
665+ {
666+ if (handler->motion &&
667+ handler->motion(handler, time, pointer->x, pointer->y)) {
668 handler->pending = true;
669 break;
670 }
671@@ -674,7 +761,8 @@ pointer_handle_frame(struct pointer *pointer)
672 {
673 struct pointer_handler *handler;
674
675- wl_list_for_each (handler, &pointer->handlers, link) {
676+ wl_list_for_each(handler, &pointer->handlers, link)
677+ {
678 if (handler->pending && handler->frame) {
679 handler->frame(handler);
680 handler->pending = false;
+40,
-17
1@@ -36,9 +36,13 @@ struct button {
2 };
3
4 struct pointer_handler {
5- bool (*motion)(struct pointer_handler *handler, uint32_t time, wl_fixed_t x, wl_fixed_t y);
6- bool (*button)(struct pointer_handler *handler, uint32_t time, struct button *button, uint32_t state);
7- bool (*axis)(struct pointer_handler *handler, uint32_t time, enum wl_pointer_axis axis, enum wl_pointer_axis_source source, wl_fixed_t value, int value120);
8+ bool (*motion)(struct pointer_handler *handler, uint32_t time, wl_fixed_t x,
9+ wl_fixed_t y);
10+ bool (*button)(struct pointer_handler *handler, uint32_t time,
11+ struct button *button, uint32_t state);
12+ bool (*axis)(struct pointer_handler *handler, uint32_t time,
13+ enum wl_pointer_axis axis, enum wl_pointer_axis_source source,
14+ wl_fixed_t value, int value120);
15 void (*frame)(struct pointer_handler *handler);
16
17 int pending;
18@@ -72,19 +76,38 @@ struct pointer {
19 pixman_region32_t region;
20 };
21
22-bool pointer_initialize(struct pointer *pointer);
23-void pointer_finalize(struct pointer *pointer);
24-void pointer_set_focus(struct pointer *pointer, struct compositor_view *view);
25-void pointer_set_region(struct pointer *pointer, pixman_region32_t *region);
26-void pointer_set_cursor(struct pointer *pointer, uint32_t id);
27-
28-struct button *pointer_get_button(struct pointer *pointer, uint32_t serial);
29-
30-struct wl_resource *pointer_bind(struct pointer *pointer, struct wl_client *client, uint32_t version, uint32_t id);
31-void pointer_handle_button(struct pointer *pointer, uint32_t time, uint32_t button, uint32_t state);
32-void pointer_handle_axis(struct pointer *pointer, uint32_t time, enum wl_pointer_axis axis, enum wl_pointer_axis_source source, wl_fixed_t value, int value120);
33-void pointer_handle_relative_motion(struct pointer *pointer, uint32_t time, wl_fixed_t dx, wl_fixed_t dy);
34-void pointer_handle_absolute_motion(struct pointer *pointer, uint32_t time, wl_fixed_t x, wl_fixed_t y);
35-void pointer_handle_frame(struct pointer *pointer);
36+bool
37+pointer_initialize(struct pointer *pointer);
38+void
39+pointer_finalize(struct pointer *pointer);
40+void
41+pointer_set_focus(struct pointer *pointer, struct compositor_view *view);
42+void
43+pointer_set_region(struct pointer *pointer, pixman_region32_t *region);
44+void
45+pointer_set_cursor(struct pointer *pointer, uint32_t id);
46+
47+struct button *
48+pointer_get_button(struct pointer *pointer, uint32_t serial);
49+
50+struct wl_resource *
51+pointer_bind(struct pointer *pointer, struct wl_client *client,
52+ uint32_t version, uint32_t id);
53+void
54+pointer_handle_button(struct pointer *pointer, uint32_t time, uint32_t button,
55+ uint32_t state);
56+void
57+pointer_handle_axis(struct pointer *pointer, uint32_t time,
58+ enum wl_pointer_axis axis,
59+ enum wl_pointer_axis_source source, wl_fixed_t value,
60+ int value120);
61+void
62+pointer_handle_relative_motion(struct pointer *pointer, uint32_t time,
63+ wl_fixed_t dx, wl_fixed_t dy);
64+void
65+pointer_handle_absolute_motion(struct pointer *pointer, uint32_t time,
66+ wl_fixed_t x, wl_fixed_t y);
67+void
68+pointer_handle_frame(struct pointer *pointer);
69
70 #endif
+24,
-13
1@@ -29,8 +29,8 @@
2 #include "util.h"
3
4 #include <errno.h>
5-#include <wld/wld.h>
6 #include <wld/drm.h>
7+#include <wld/wld.h>
8 #include <xf86drm.h>
9 #include <xf86drmMode.h>
10
11@@ -57,17 +57,21 @@ attach(struct view *view, struct wld_buffer *buffer)
12
13 fb = drm_get_framebuffer(buffer);
14 if (plane->need_modeset) {
15- ret = drmModeSetCrtc(swc.drm->fd, plane->crtc, fb, 0, 0, plane->connectors.data, plane->connectors.size / 4, &plane->mode.info);
16+ ret = drmModeSetCrtc(swc.drm->fd, plane->crtc, fb, 0, 0,
17+ plane->connectors.data, plane->connectors.size / 4,
18+ &plane->mode.info);
19
20 if (ret == 0) {
21 wl_event_loop_add_idle(swc.event_loop, &send_frame, plane);
22 plane->need_modeset = false;
23 } else {
24- ERROR("Could not set CRTC to next framebuffer: %s\n", strerror(-ret));
25+ ERROR("Could not set CRTC to next framebuffer: %s\n",
26+ strerror(-ret));
27 return ret;
28 }
29 } else {
30- ret = drmModePageFlip(swc.drm->fd, plane->crtc, fb, DRM_MODE_PAGE_FLIP_EVENT, &plane->drm_handler);
31+ ret = drmModePageFlip(swc.drm->fd, plane->crtc, fb,
32+ DRM_MODE_PAGE_FLIP_EVENT, &plane->drm_handler);
33
34 if (ret < 0) {
35 ERROR("Page flip failed: %s\n", strerror(errno));
36@@ -86,9 +90,9 @@ move(struct view *view, int32_t x, int32_t y)
37 }
38
39 static const struct view_impl view_impl = {
40- .update = update,
41- .attach = attach,
42- .move = move,
43+ .update = update,
44+ .attach = attach,
45+ .move = move,
46 };
47
48 static void
49@@ -102,7 +106,8 @@ static void
50 handle_swc_event(struct wl_listener *listener, void *data)
51 {
52 struct event *event = data;
53- struct primary_plane *plane = wl_container_of(listener, plane, swc_listener);
54+ struct primary_plane *plane =
55+ wl_container_of(listener, plane, swc_listener);
56
57 switch (event->type) {
58 case SWC_EVENT_ACTIVATED:
59@@ -112,24 +117,29 @@ handle_swc_event(struct wl_listener *listener, void *data)
60 }
61
62 bool
63-primary_plane_initialize(struct primary_plane *plane, uint32_t crtc, struct mode *mode, uint32_t *connectors, uint32_t num_connectors)
64+primary_plane_initialize(struct primary_plane *plane, uint32_t crtc,
65+ struct mode *mode, uint32_t *connectors,
66+ uint32_t num_connectors)
67 {
68 uint32_t *plane_connectors;
69
70 if (!(plane->original_crtc_state = drmModeGetCrtc(swc.drm->fd, crtc))) {
71- ERROR("Failed to get CRTC state for CRTC %u: %s\n", crtc, strerror(errno));
72+ ERROR("Failed to get CRTC state for CRTC %u: %s\n", crtc,
73+ strerror(errno));
74 goto error0;
75 }
76
77 wl_array_init(&plane->connectors);
78- plane_connectors = wl_array_add(&plane->connectors, num_connectors * sizeof(connectors[0]));
79+ plane_connectors = wl_array_add(&plane->connectors,
80+ num_connectors * sizeof(connectors[0]));
81
82 if (!plane_connectors) {
83 ERROR("Failed to allocate connector array\n");
84 goto error1;
85 }
86
87- memcpy(plane_connectors, connectors, num_connectors * sizeof(connectors[0]));
88+ memcpy(plane_connectors, connectors,
89+ num_connectors * sizeof(connectors[0]));
90 plane->crtc = crtc;
91 plane->need_modeset = true;
92 view_initialize(&plane->view, &view_impl);
93@@ -153,6 +163,7 @@ primary_plane_finalize(struct primary_plane *plane)
94 {
95 wl_array_release(&plane->connectors);
96 drmModeCrtcPtr crtc = plane->original_crtc_state;
97- drmModeSetCrtc(swc.drm->fd, crtc->crtc_id, crtc->buffer_id, crtc->x, crtc->y, NULL, 0, &crtc->mode);
98+ drmModeSetCrtc(swc.drm->fd, crtc->crtc_id, crtc->buffer_id, crtc->x,
99+ crtc->y, NULL, 0, &crtc->mode);
100 drmModeFreeCrtc(crtc);
101 }
+7,
-3
1@@ -28,8 +28,8 @@
2 #include "mode.h"
3 #include "view.h"
4
5-#include <stdint.h>
6 #include <stdbool.h>
7+#include <stdint.h>
8 #include <wayland-server.h>
9
10 struct primary_plane {
11@@ -43,7 +43,11 @@ struct primary_plane {
12 struct wl_listener swc_listener;
13 };
14
15-bool primary_plane_initialize(struct primary_plane *plane, uint32_t crtc, struct mode *mode, uint32_t *connectors, uint32_t num_connectors);
16-void primary_plane_finalize(struct primary_plane *plane);
17+bool
18+primary_plane_initialize(struct primary_plane *plane, uint32_t crtc,
19+ struct mode *mode, uint32_t *connectors,
20+ uint32_t num_connectors);
21+void
22+primary_plane_finalize(struct primary_plane *plane);
23
24 #endif
+13,
-8
1@@ -6,7 +6,8 @@
2 #include <wayland-server.h>
3
4 static void
5-add(struct wl_client *client, struct wl_resource *resource, int32_t x, int32_t y, int32_t width, int32_t height)
6+add(struct wl_client *client, struct wl_resource *resource, int32_t x,
7+ int32_t y, int32_t width, int32_t height)
8 {
9 pixman_region32_t *region = wl_resource_get_user_data(resource);
10
11@@ -14,7 +15,8 @@ add(struct wl_client *client, struct wl_resource *resource, int32_t x, int32_t y
12 }
13
14 static void
15-subtract(struct wl_client *client, struct wl_resource *resource, int32_t x, int32_t y, int32_t width, int32_t height)
16+subtract(struct wl_client *client, struct wl_resource *resource, int32_t x,
17+ int32_t y, int32_t width, int32_t height)
18 {
19 pixman_region32_t *region = wl_resource_get_user_data(resource);
20 pixman_region32_t operand;
21@@ -24,9 +26,9 @@ subtract(struct wl_client *client, struct wl_resource *resource, int32_t x, int3
22 }
23
24 static const struct wl_region_interface region_impl = {
25- .destroy = destroy_resource,
26- .add = add,
27- .subtract = subtract,
28+ .destroy = destroy_resource,
29+ .add = add,
30+ .subtract = subtract,
31 };
32
33 static void
34@@ -45,13 +47,16 @@ region_new(struct wl_client *client, uint32_t version, uint32_t id)
35 struct wl_resource *resource;
36
37 region = malloc(sizeof(*region));
38- if (!region)
39+ if (!region) {
40 goto error0;
41+ }
42
43 resource = wl_resource_create(client, &wl_region_interface, version, id);
44- if (!resource)
45+ if (!resource) {
46 goto error1;
47- wl_resource_set_implementation(resource, ®ion_impl, region, ®ion_destroy);
48+ }
49+ wl_resource_set_implementation(resource, ®ion_impl, region,
50+ ®ion_destroy);
51
52 pixman_region32_init(region);
53
+2,
-1
1@@ -5,6 +5,7 @@
2
3 struct wl_client;
4
5-struct wl_resource *region_new(struct wl_client *client, uint32_t version, uint32_t id);
6+struct wl_resource *
7+region_new(struct wl_client *client, uint32_t version, uint32_t id);
8
9 #endif
+51,
-28
1@@ -31,22 +31,28 @@
2 #include "pointer.h"
3 #include "util.h"
4
5-#include <stdlib.h>
6 #include "swc-server-protocol.h"
7+#include <stdlib.h>
8
9 #define INTERNAL(s) ((struct screen *)(s))
10
11 static struct screen *active_screen;
12 static const struct swc_screen_handler null_handler;
13
14-static bool handle_motion(struct pointer_handler *handler, uint32_t time, wl_fixed_t x, wl_fixed_t y);
15+static bool
16+handle_motion(struct pointer_handler *handler,
17+ uint32_t time,
18+ wl_fixed_t x,
19+ wl_fixed_t y);
20
21 struct pointer_handler screens_pointer_handler = {
22- .motion = handle_motion,
23+ .motion = handle_motion,
24 };
25
26 EXPORT void
27-swc_screen_set_handler(struct swc_screen *base, const struct swc_screen_handler *handler, void *data)
28+swc_screen_set_handler(struct swc_screen *base,
29+ const struct swc_screen_handler *handler,
30+ void *data)
31 {
32 struct screen *screen = INTERNAL(base);
33
34@@ -59,11 +65,13 @@ screens_initialize(void)
35 {
36 wl_list_init(&swc.screens);
37
38- if (!drm_create_screens(&swc.screens))
39+ if (!drm_create_screens(&swc.screens)) {
40 return false;
41+ }
42
43- if (wl_list_empty(&swc.screens))
44+ if (wl_list_empty(&swc.screens)) {
45 return false;
46+ }
47
48 return true;
49 }
50@@ -73,8 +81,8 @@ screens_finalize(void)
51 {
52 struct screen *screen, *tmp;
53
54- wl_list_for_each_safe (screen, tmp, &swc.screens, link)
55- screen_destroy(screen);
56+ wl_list_for_each_safe(screen, tmp, &swc.screens, link)
57+ screen_destroy(screen);
58 }
59
60 static void
61@@ -101,13 +109,15 @@ screen_new(uint32_t crtc, struct output *output, struct plane *cursor_plane)
62 int32_t x = 0;
63
64 /* Simple heuristic for initial screen positioning. */
65- wl_list_for_each (screen, &swc.screens, link)
66- x = MAX(x, screen->base.geometry.x + screen->base.geometry.width);
67+ wl_list_for_each(screen, &swc.screens, link) x =
68+ MAX(x, screen->base.geometry.x + screen->base.geometry.width);
69
70- if (!(screen = malloc(sizeof(*screen))))
71+ if (!(screen = malloc(sizeof(*screen)))) {
72 goto error0;
73+ }
74
75- screen->global = wl_global_create(swc.display, &swc_screen_interface, 1, screen, &bind_screen);
76+ screen->global = wl_global_create(
77+ swc.display, &swc_screen_interface, 1, screen, &bind_screen);
78
79 if (!screen->global) {
80 ERROR("Failed to create screen global\n");
81@@ -116,7 +126,11 @@ screen_new(uint32_t crtc, struct output *output, struct plane *cursor_plane)
82
83 screen->crtc = crtc;
84
85- if (!primary_plane_initialize(&screen->planes.primary, crtc, output->preferred_mode, &output->connector, 1)) {
86+ if (!primary_plane_initialize(&screen->planes.primary,
87+ crtc,
88+ output->preferred_mode,
89+ &output->connector,
90+ 1)) {
91 ERROR("Failed to initialize primary plane\n");
92 goto error2;
93 }
94@@ -152,13 +166,15 @@ screen_destroy(struct screen *screen)
95 {
96 struct output *output, *next;
97
98- if (active_screen == screen)
99+ if (active_screen == screen) {
100 active_screen = NULL;
101- if (screen->handler->destroy)
102+ }
103+ if (screen->handler->destroy) {
104 screen->handler->destroy(screen->handler_data);
105+ }
106 wl_signal_emit(&screen->destroy_signal, NULL);
107- wl_list_for_each_safe (output, next, &screen->outputs, link)
108- output_destroy(output);
109+ wl_list_for_each_safe(output, next, &screen->outputs, link)
110+ output_destroy(output);
111 primary_plane_finalize(&screen->planes.primary);
112 plane_destroy(screen->planes.cursor);
113 free(screen);
114@@ -174,44 +190,51 @@ screen_update_usable_geometry(struct screen *screen)
115
116 DEBUG("Updating usable geometry\n");
117
118- pixman_region32_init_rect(&total_usable, geom->x, geom->y, geom->width, geom->height);
119+ pixman_region32_init_rect(
120+ &total_usable, geom->x, geom->y, geom->width, geom->height);
121 pixman_region32_init(&usable);
122
123- wl_list_for_each (modifier, &screen->modifiers, link) {
124+ wl_list_for_each(modifier, &screen->modifiers, link)
125+ {
126 modifier->modify(modifier, geom, &usable);
127 pixman_region32_intersect(&total_usable, &total_usable, &usable);
128 }
129
130 extents = pixman_region32_extents(&total_usable);
131
132- if (extents->x1 != screen->base.usable_geometry.x
133- || extents->y1 != screen->base.usable_geometry.y
134- || (extents->x2 - extents->x1) != screen->base.usable_geometry.width
135- || (extents->y2 - extents->y1) != screen->base.usable_geometry.height)
136- {
137+ if (extents->x1 != screen->base.usable_geometry.x ||
138+ extents->y1 != screen->base.usable_geometry.y ||
139+ (extents->x2 - extents->x1) != screen->base.usable_geometry.width ||
140+ (extents->y2 - extents->y1) != screen->base.usable_geometry.height) {
141 screen->base.usable_geometry.x = extents->x1;
142 screen->base.usable_geometry.y = extents->y1;
143 screen->base.usable_geometry.width = extents->x2 - extents->x1;
144 screen->base.usable_geometry.height = extents->y2 - extents->y1;
145
146- if (screen->handler->usable_geometry_changed)
147+ if (screen->handler->usable_geometry_changed) {
148 screen->handler->usable_geometry_changed(screen->handler_data);
149+ }
150 }
151 }
152
153 bool
154-handle_motion(struct pointer_handler *handler, uint32_t time, wl_fixed_t fx, wl_fixed_t fy)
155+handle_motion(struct pointer_handler *handler,
156+ uint32_t time,
157+ wl_fixed_t fx,
158+ wl_fixed_t fy)
159 {
160 struct screen *screen;
161 int32_t x = wl_fixed_to_int(fx), y = wl_fixed_to_int(fy);
162
163- wl_list_for_each (screen, &swc.screens, link) {
164+ wl_list_for_each(screen, &swc.screens, link)
165+ {
166 if (rectangle_contains_point(&screen->base.geometry, x, y)) {
167 if (screen != active_screen) {
168 active_screen = screen;
169
170- if (screen->handler->entered)
171+ if (screen->handler->entered) {
172 screen->handler->entered(screen->handler_data);
173+ }
174 }
175 break;
176 }
+14,
-7
1@@ -24,8 +24,8 @@
2 #ifndef SWC_SCREEN_H
3 #define SWC_SCREEN_H
4
5-#include "swc.h"
6 #include "primary_plane.h"
7+#include "swc.h"
8
9 #include <wayland-util.h>
10
11@@ -37,7 +37,9 @@ struct screen_modifier {
12 * Takes the screen geometry and sets 'usable' to the usable region of the
13 * screen. 'usable' is an already initialized pixman region.
14 */
15- void (*modify)(struct screen_modifier *modifier, const struct swc_rectangle *geometry, struct pixman_region32 *usable);
16+ void (*modify)(struct screen_modifier *modifier,
17+ const struct swc_rectangle *geometry,
18+ struct pixman_region32 *usable);
19
20 struct wl_list link;
21 };
22@@ -64,11 +66,15 @@ struct screen {
23 struct wl_list link;
24 };
25
26-bool screens_initialize(void);
27-void screens_finalize(void);
28+bool
29+screens_initialize(void);
30+void
31+screens_finalize(void);
32
33-struct screen *screen_new(uint32_t crtc, struct output *output, struct plane *cursor_plane);
34-void screen_destroy(struct screen *screen);
35+struct screen *
36+screen_new(uint32_t crtc, struct output *output, struct plane *cursor_plane);
37+void
38+screen_destroy(struct screen *screen);
39
40 static inline uint32_t
41 screen_mask(struct screen *screen)
42@@ -76,6 +82,7 @@ screen_mask(struct screen *screen)
43 return 1 << screen->id;
44 }
45
46-void screen_update_usable_geometry(struct screen *screen);
47+void
48+screen_update_usable_geometry(struct screen *screen);
49
50 #endif
+173,
-95
1@@ -1,4 +1,3 @@
2-#include "seat.h"
3 #include "compositor.h"
4 #include "data_device.h"
5 #include "event.h"
6@@ -7,9 +6,11 @@
7 #include "launch.h"
8 #include "pointer.h"
9 #include "screen.h"
10+#include "seat.h"
11 #include "surface.h"
12 #include "util.h"
13
14+#include <ctype.h>
15 #include <dirent.h>
16 #include <errno.h>
17 #include <fcntl.h>
18@@ -19,9 +20,8 @@
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <string.h>
22-#include <unistd.h>
23-#include <ctype.h>
24 #include <sys/ioctl.h>
25+#include <unistd.h>
26
27 #include <linux/input.h>
28
29@@ -67,15 +67,18 @@ struct seat {
30 static void
31 handle_keyboard_focus_event(struct wl_listener *listener, void *data)
32 {
33- struct seat *seat = wl_container_of(listener, seat, keyboard_focus_listener);
34+ struct seat *seat =
35+ wl_container_of(listener, seat, keyboard_focus_listener);
36 struct event *ev = data;
37 struct input_focus_event_data *event_data = ev->data;
38
39- if (ev->type != INPUT_FOCUS_EVENT_CHANGED)
40+ if (ev->type != INPUT_FOCUS_EVENT_CHANGED) {
41 return;
42+ }
43
44 if (event_data->new) {
45- struct wl_client *client = wl_resource_get_client(event_data->new->surface->resource);
46+ struct wl_client *client =
47+ wl_resource_get_client(event_data->new->surface->resource);
48
49 /* offer the selection to the new focus */
50 data_device_offer_selection(seat->base.data_device, client);
51@@ -88,11 +91,14 @@ handle_data_device_event(struct wl_listener *listener, void *data)
52 struct seat *seat = wl_container_of(listener, seat, data_device_listener);
53 struct event *ev = data;
54
55- if (ev->type != DATA_DEVICE_EVENT_SELECTION_CHANGED)
56+ if (ev->type != DATA_DEVICE_EVENT_SELECTION_CHANGED) {
57 return;
58+ }
59
60- if (seat->base.keyboard->focus.client)
61- data_device_offer_selection(seat->base.data_device, seat->base.keyboard->focus.client);
62+ if (seat->base.keyboard->focus.client) {
63+ data_device_offer_selection(seat->base.data_device,
64+ seat->base.keyboard->focus.client);
65+ }
66 }
67
68 static void
69@@ -122,11 +128,13 @@ get_pointer(struct wl_client *client, struct wl_resource *resource, uint32_t id)
70 }
71
72 static void
73-get_keyboard(struct wl_client *client, struct wl_resource *resource, uint32_t id)
74+get_keyboard(struct wl_client *client, struct wl_resource *resource,
75+ uint32_t id)
76 {
77 struct seat *seat = wl_resource_get_user_data(resource);
78
79- keyboard_bind(seat->base.keyboard, client, wl_resource_get_version(resource), id);
80+ keyboard_bind(seat->base.keyboard, client,
81+ wl_resource_get_version(resource), id);
82 }
83
84 static void
85@@ -135,9 +143,9 @@ get_touch(struct wl_client *client, struct wl_resource *resource, uint32_t id)
86 }
87
88 static struct wl_seat_interface seat_impl = {
89- .get_pointer = get_pointer,
90- .get_keyboard = get_keyboard,
91- .get_touch = get_touch,
92+ .get_pointer = get_pointer,
93+ .get_keyboard = get_keyboard,
94+ .get_touch = get_touch,
95 };
96
97 static void
98@@ -146,15 +154,18 @@ bind_seat(struct wl_client *client, void *data, uint32_t version, uint32_t id)
99 struct seat *seat = data;
100 struct wl_resource *resource;
101
102- if (version > 4)
103+ if (version > 4) {
104 version = 4;
105+ }
106
107 resource = wl_resource_create(client, &wl_seat_interface, version, id);
108- wl_resource_set_implementation(resource, &seat_impl, seat, &remove_resource);
109+ wl_resource_set_implementation(resource, &seat_impl, seat,
110+ &remove_resource);
111 wl_list_insert(&seat->resources, wl_resource_get_link(resource));
112
113- if (version >= 2)
114+ if (version >= 2) {
115 wl_seat_send_name(resource, seat->name);
116+ }
117
118 wl_seat_send_capabilities(resource, seat->capabilities);
119 }
120@@ -171,16 +182,20 @@ handle_evdev_key(struct seat *seat, const struct input_event *ev)
121 uint32_t state;
122 uint32_t time = event_time_ms(ev);
123
124- if (ev->value == 2)
125+ if (ev->value == 2) {
126 return;
127+ }
128
129- if (ev->code >= BTN_MISC)
130+ if (ev->code >= BTN_MISC) {
131 pointer_handle_button(seat->base.pointer, time, ev->code,
132- ev->value ? WL_POINTER_BUTTON_STATE_PRESSED : WL_POINTER_BUTTON_STATE_RELEASED);
133- else {
134- if (ev->code > 255)
135+ ev->value ? WL_POINTER_BUTTON_STATE_PRESSED
136+ : WL_POINTER_BUTTON_STATE_RELEASED);
137+ } else {
138+ if (ev->code > 255) {
139 return;
140- state = (ev->value ? WL_KEYBOARD_KEY_STATE_PRESSED : WL_KEYBOARD_KEY_STATE_RELEASED);
141+ }
142+ state = (ev->value ? WL_KEYBOARD_KEY_STATE_PRESSED
143+ : WL_KEYBOARD_KEY_STATE_RELEASED);
144 keyboard_handle_key(seat->base.keyboard, time, ev->code, state);
145 }
146 }
147@@ -193,18 +208,24 @@ handle_evdev_rel(struct seat *seat, const struct input_event *ev)
148
149 switch (ev->code) {
150 case REL_X:
151- pointer_handle_relative_motion(seat->base.pointer, time, wl_fixed_from_int(ev->value), 0);
152+ pointer_handle_relative_motion(seat->base.pointer, time,
153+ wl_fixed_from_int(ev->value), 0);
154 break;
155 case REL_Y:
156- pointer_handle_relative_motion(seat->base.pointer, time, 0, wl_fixed_from_int(ev->value));
157+ pointer_handle_relative_motion(seat->base.pointer, time, 0,
158+ wl_fixed_from_int(ev->value));
159 break;
160 case REL_WHEEL:
161 value = wl_fixed_from_int(ev->value * 10);
162- pointer_handle_axis(seat->base.pointer, time, WL_POINTER_AXIS_VERTICAL_SCROLL, WL_POINTER_AXIS_SOURCE_WHEEL, value, ev->value * 120);
163+ pointer_handle_axis(
164+ seat->base.pointer, time, WL_POINTER_AXIS_VERTICAL_SCROLL,
165+ WL_POINTER_AXIS_SOURCE_WHEEL, value, ev->value * 120);
166 break;
167 case REL_HWHEEL:
168 value = wl_fixed_from_int(ev->value * 10);
169- pointer_handle_axis(seat->base.pointer, time, WL_POINTER_AXIS_HORIZONTAL_SCROLL, WL_POINTER_AXIS_SOURCE_WHEEL, value, ev->value * 120);
170+ pointer_handle_axis(
171+ seat->base.pointer, time, WL_POINTER_AXIS_HORIZONTAL_SCROLL,
172+ WL_POINTER_AXIS_SOURCE_WHEEL, value, ev->value * 120);
173 break;
174 default:
175 break;
176@@ -229,8 +250,10 @@ handle_evdev_abs(struct seat *seat, const struct input_event *ev)
177 return;
178 }
179
180- if (seat->abs_initialized)
181- pointer_handle_absolute_motion(seat->base.pointer, time, seat->abs_x, seat->abs_y);
182+ if (seat->abs_initialized) {
183+ pointer_handle_absolute_motion(seat->base.pointer, time, seat->abs_x,
184+ seat->abs_y);
185+ }
186 }
187
188 static int
189@@ -243,12 +266,14 @@ handle_evdev_data(int fd, uint32_t mask, void *data)
190 while (!seat->ignore) {
191 n = read(fd, &ev, sizeof(ev));
192 if (n == -1) {
193- if (errno == EAGAIN || errno == EINTR)
194+ if (errno == EAGAIN || errno == EINTR) {
195 break;
196+ }
197 return 0;
198 }
199- if (n != (ssize_t)sizeof(ev))
200+ if (n != (ssize_t)sizeof(ev)) {
201 break;
202+ }
203
204 switch (ev.type) {
205 case EV_KEY:
206@@ -261,8 +286,9 @@ handle_evdev_data(int fd, uint32_t mask, void *data)
207 handle_evdev_abs(seat, &ev);
208 break;
209 case EV_SYN:
210- if (ev.code == SYN_REPORT)
211+ if (ev.code == SYN_REPORT) {
212 pointer_handle_frame(seat->base.pointer);
213+ }
214 break;
215 default:
216 break;
217@@ -275,7 +301,9 @@ handle_evdev_data(int fd, uint32_t mask, void *data)
218 static bool
219 test_bit(const unsigned long *bits, size_t bit)
220 {
221- return (bits[bit / (sizeof(unsigned long) * 8)] >> (bit % (sizeof(unsigned long) * 8))) & 1;
222+ return (bits[bit / (sizeof(unsigned long) * 8)] >>
223+ (bit % (sizeof(unsigned long) * 8))) &
224+ 1;
225 }
226
227 static bool
228@@ -284,8 +312,9 @@ contains_ci(const char *haystack, const char *needle)
229 size_t nlen;
230 const char *h;
231
232- if (!haystack || !needle || !*needle)
233+ if (!haystack || !needle || !*needle) {
234 return false;
235+ }
236
237 nlen = strlen(needle);
238 for (h = haystack; *h; ++h) {
239@@ -293,11 +322,13 @@ contains_ci(const char *haystack, const char *needle)
240 for (i = 0; i < nlen; ++i) {
241 unsigned char hc = (unsigned char)h[i];
242 unsigned char nc = (unsigned char)needle[i];
243- if (!h[i] || tolower(hc) != tolower(nc))
244+ if (!h[i] || tolower(hc) != tolower(nc)) {
245 break;
246+ }
247 }
248- if (i == nlen)
249+ if (i == nlen) {
250 return true;
251+ }
252 }
253 return false;
254 }
255@@ -305,52 +336,63 @@ contains_ci(const char *haystack, const char *needle)
256 static bool
257 is_keyboard_device(int fd)
258 {
259- unsigned long ev_bits[(EV_MAX + 8 * sizeof(unsigned long) - 1) / (8 * sizeof(unsigned long))];
260- unsigned long key_bits[(KEY_MAX + 8 * sizeof(unsigned long) - 1) / (8 * sizeof(unsigned long))];
261+ unsigned long ev_bits[(EV_MAX + 8 * sizeof(unsigned long) - 1) /
262+ (8 * sizeof(unsigned long))];
263+ unsigned long key_bits[(KEY_MAX + 8 * sizeof(unsigned long) - 1) /
264+ (8 * sizeof(unsigned long))];
265
266 memset(ev_bits, 0, sizeof(ev_bits));
267 memset(key_bits, 0, sizeof(key_bits));
268
269- if (ioctl(fd, EVIOCGBIT(0, sizeof(ev_bits)), ev_bits) < 0)
270+ if (ioctl(fd, EVIOCGBIT(0, sizeof(ev_bits)), ev_bits) < 0) {
271 return false;
272- if (!test_bit(ev_bits, EV_KEY))
273+ }
274+ if (!test_bit(ev_bits, EV_KEY)) {
275 return false;
276- if (ioctl(fd, EVIOCGBIT(EV_KEY, sizeof(key_bits)), key_bits) < 0)
277+ }
278+ if (ioctl(fd, EVIOCGBIT(EV_KEY, sizeof(key_bits)), key_bits) < 0) {
279 return false;
280+ }
281
282- return test_bit(key_bits, KEY_A) &&
283- test_bit(key_bits, KEY_Z) &&
284- test_bit(key_bits, KEY_ENTER) &&
285- test_bit(key_bits, KEY_ESC) &&
286- test_bit(key_bits, KEY_SPACE);
287+ return test_bit(key_bits, KEY_A) && test_bit(key_bits, KEY_Z) &&
288+ test_bit(key_bits, KEY_ENTER) && test_bit(key_bits, KEY_ESC) &&
289+ test_bit(key_bits, KEY_SPACE);
290 }
291
292 static bool
293 is_pointer_device(int fd)
294 {
295- unsigned long ev_bits[(EV_MAX + 8 * sizeof(unsigned long) - 1) / (8 * sizeof(unsigned long))];
296- unsigned long rel_bits[(REL_MAX + 8 * sizeof(unsigned long) - 1) / (8 * sizeof(unsigned long))];
297- unsigned long key_bits[(KEY_MAX + 8 * sizeof(unsigned long) - 1) / (8 * sizeof(unsigned long))];
298+ unsigned long ev_bits[(EV_MAX + 8 * sizeof(unsigned long) - 1) /
299+ (8 * sizeof(unsigned long))];
300+ unsigned long rel_bits[(REL_MAX + 8 * sizeof(unsigned long) - 1) /
301+ (8 * sizeof(unsigned long))];
302+ unsigned long key_bits[(KEY_MAX + 8 * sizeof(unsigned long) - 1) /
303+ (8 * sizeof(unsigned long))];
304
305 memset(ev_bits, 0, sizeof(ev_bits));
306 memset(rel_bits, 0, sizeof(rel_bits));
307 memset(key_bits, 0, sizeof(key_bits));
308
309- if (ioctl(fd, EVIOCGBIT(0, sizeof(ev_bits)), ev_bits) < 0)
310+ if (ioctl(fd, EVIOCGBIT(0, sizeof(ev_bits)), ev_bits) < 0) {
311 return false;
312+ }
313
314 if (test_bit(ev_bits, EV_REL)) {
315- if (ioctl(fd, EVIOCGBIT(EV_REL, sizeof(rel_bits)), rel_bits) < 0)
316+ if (ioctl(fd, EVIOCGBIT(EV_REL, sizeof(rel_bits)), rel_bits) < 0) {
317 return false;
318- if (test_bit(rel_bits, REL_X) && test_bit(rel_bits, REL_Y))
319+ }
320+ if (test_bit(rel_bits, REL_X) && test_bit(rel_bits, REL_Y)) {
321 return true;
322+ }
323 }
324
325 if (test_bit(ev_bits, EV_KEY)) {
326- if (ioctl(fd, EVIOCGBIT(EV_KEY, sizeof(key_bits)), key_bits) < 0)
327+ if (ioctl(fd, EVIOCGBIT(EV_KEY, sizeof(key_bits)), key_bits) < 0) {
328 return false;
329- if (test_bit(key_bits, BTN_LEFT) && test_bit(key_bits, BTN_RIGHT))
330+ }
331+ if (test_bit(key_bits, BTN_LEFT) && test_bit(key_bits, BTN_RIGHT)) {
332 return true;
333+ }
334 }
335
336 return false;
337@@ -367,7 +409,8 @@ static int
338 score_candidate(int fd, bool want_keyboard, const char *id_name)
339 {
340 char name[256];
341- unsigned long ev_bits[(EV_MAX + 8 * sizeof(unsigned long) - 1) / (8 * sizeof(unsigned long))];
342+ unsigned long ev_bits[(EV_MAX + 8 * sizeof(unsigned long) - 1) /
343+ (8 * sizeof(unsigned long))];
344 bool is_kbd;
345 bool is_ptr;
346 int score = 10;
347@@ -375,45 +418,59 @@ score_candidate(int fd, bool want_keyboard, const char *id_name)
348 is_kbd = is_keyboard_device(fd);
349 is_ptr = is_pointer_device(fd);
350
351- if (want_keyboard && !is_kbd)
352+ if (want_keyboard && !is_kbd) {
353 return -1;
354- if (!want_keyboard && !is_ptr)
355+ }
356+ if (!want_keyboard && !is_ptr) {
357 return -1;
358+ }
359
360- if (ioctl(fd, EVIOCGNAME(sizeof(name)), name) < 0)
361+ if (ioctl(fd, EVIOCGNAME(sizeof(name)), name) < 0) {
362 name[0] = '\0';
363+ }
364
365- if (!get_ev_bits(fd, ev_bits, sizeof(ev_bits)))
366+ if (!get_ev_bits(fd, ev_bits, sizeof(ev_bits))) {
367 memset(ev_bits, 0, sizeof(ev_bits));
368+ }
369
370 if (want_keyboard) {
371- if (is_ptr)
372+ if (is_ptr) {
373 score -= 6;
374- if (contains_ci(id_name, "mouse") || contains_ci(name, "mouse"))
375+ }
376+ if (contains_ci(id_name, "mouse") || contains_ci(name, "mouse")) {
377 score -= 12;
378- if (contains_ci(id_name, "kbd") || contains_ci(id_name, "keyboard"))
379+ }
380+ if (contains_ci(id_name, "kbd") || contains_ci(id_name, "keyboard")) {
381 score += 4;
382- if (contains_ci(name, "keyboard"))
383+ }
384+ if (contains_ci(name, "keyboard")) {
385 score += 2;
386- if (test_bit(ev_bits, EV_LED))
387+ }
388+ if (test_bit(ev_bits, EV_LED)) {
389 score += 3;
390- if (test_bit(ev_bits, EV_REP))
391+ }
392+ if (test_bit(ev_bits, EV_REP)) {
393 score += 1;
394+ }
395 } else {
396- if (contains_ci(id_name, "mouse") || contains_ci(name, "mouse"))
397+ if (contains_ci(id_name, "mouse") || contains_ci(name, "mouse")) {
398 score += 4;
399- if (contains_ci(id_name, "kbd") || contains_ci(id_name, "keyboard"))
400+ }
401+ if (contains_ci(id_name, "kbd") || contains_ci(id_name, "keyboard")) {
402 score -= 6;
403- if (contains_ci(name, "keyboard"))
404+ }
405+ if (contains_ci(name, "keyboard")) {
406 score -= 4;
407+ }
408 }
409
410 return score;
411 }
412
413 static bool
414-pick_best_device(const char *dir_path, const char *name_prefix, const char *name_substr,
415- bool want_keyboard, char *out, size_t out_len)
416+pick_best_device(const char *dir_path, const char *name_prefix,
417+ const char *name_substr, bool want_keyboard, char *out,
418+ size_t out_len)
419 {
420 DIR *dir;
421 struct dirent *ent;
422@@ -422,25 +479,30 @@ pick_best_device(const char *dir_path, const char *name_prefix, const char *name
423 size_t prefix_len = name_prefix ? strlen(name_prefix) : 0;
424
425 dir = opendir(dir_path);
426- if (!dir)
427+ if (!dir) {
428 return false;
429+ }
430
431 while ((ent = readdir(dir)) != NULL) {
432 char path[PATH_MAX];
433 int fd;
434 int score;
435
436- if (ent->d_name[0] == '.')
437+ if (ent->d_name[0] == '.') {
438 continue;
439- if (name_prefix && strncmp(ent->d_name, name_prefix, prefix_len) != 0)
440+ }
441+ if (name_prefix && strncmp(ent->d_name, name_prefix, prefix_len) != 0) {
442 continue;
443- if (name_substr && !strstr(ent->d_name, name_substr))
444+ }
445+ if (name_substr && !strstr(ent->d_name, name_substr)) {
446 continue;
447+ }
448
449 snprintf(path, sizeof(path), "%s/%s", dir_path, ent->d_name);
450 fd = launch_open_device(path, O_RDONLY | O_NONBLOCK);
451- if (fd == -1)
452+ if (fd == -1) {
453 continue;
454+ }
455
456 score = score_candidate(fd, want_keyboard, ent->d_name);
457 if (score < 0) {
458@@ -469,19 +531,27 @@ initialize_evdev(struct seat *seat)
459 const char *kbd_dev = EVDEV_KBD_DEVICE;
460 const char *mouse_dev = EVDEV_POINTER_DEVICE;
461
462- if (pick_best_device("/dev/input/by-id", NULL, "event-kbd", true, kbd_path, sizeof(kbd_path)))
463+ if (pick_best_device("/dev/input/by-id", NULL, "event-kbd", true, kbd_path,
464+ sizeof(kbd_path))) {
465 kbd_dev = kbd_path;
466- else if (pick_best_device("/dev/input/by-path", NULL, "event-kbd", true, kbd_path, sizeof(kbd_path)))
467+ } else if (pick_best_device("/dev/input/by-path", NULL, "event-kbd", true,
468+ kbd_path, sizeof(kbd_path))) {
469 kbd_dev = kbd_path;
470- else if (pick_best_device("/dev/input", "event", NULL, true, kbd_path, sizeof(kbd_path)))
471+ } else if (pick_best_device("/dev/input", "event", NULL, true, kbd_path,
472+ sizeof(kbd_path))) {
473 kbd_dev = kbd_path;
474+ }
475
476- if (pick_best_device("/dev/input/by-id", NULL, "event-mouse", false, mouse_path, sizeof(mouse_path)))
477+ if (pick_best_device("/dev/input/by-id", NULL, "event-mouse", false,
478+ mouse_path, sizeof(mouse_path))) {
479 mouse_dev = mouse_path;
480- else if (pick_best_device("/dev/input/by-path", NULL, "event-mouse", false, mouse_path, sizeof(mouse_path)))
481+ } else if (pick_best_device("/dev/input/by-path", NULL, "event-mouse",
482+ false, mouse_path, sizeof(mouse_path))) {
483 mouse_dev = mouse_path;
484- else if (pick_best_device("/dev/input", "event", NULL, false, mouse_path, sizeof(mouse_path)))
485+ } else if (pick_best_device("/dev/input", "event", NULL, false, mouse_path,
486+ sizeof(mouse_path))) {
487 mouse_dev = mouse_path;
488+ }
489
490 DEBUG("evdev devices: keyboard=%s pointer=%s\n", kbd_dev, mouse_dev);
491
492@@ -517,8 +587,9 @@ seat_create(struct wl_display *display, const char *seat_name)
493 struct seat *seat;
494
495 seat = malloc(sizeof(*seat));
496- if (!seat)
497+ if (!seat) {
498 goto error0;
499+ }
500
501 memset(&seat->names, 0, sizeof(seat->names));
502 seat->names.rules = "base";
503@@ -533,13 +604,17 @@ seat_create(struct wl_display *display, const char *seat_name)
504 goto error1;
505 }
506
507- if (!initialize_evdev(seat))
508+ if (!initialize_evdev(seat)) {
509 goto error2;
510+ }
511
512- seat->global = wl_global_create(display, &wl_seat_interface, 4, seat, &bind_seat);
513- if (!seat->global)
514+ seat->global =
515+ wl_global_create(display, &wl_seat_interface, 4, seat, &bind_seat);
516+ if (!seat->global) {
517 goto error2;
518- seat->capabilities = WL_SEAT_CAPABILITY_KEYBOARD | WL_SEAT_CAPABILITY_POINTER;
519+ }
520+ seat->capabilities =
521+ WL_SEAT_CAPABILITY_KEYBOARD | WL_SEAT_CAPABILITY_POINTER;
522 wl_list_init(&seat->resources);
523
524 seat->swc_listener.notify = &handle_swc_event;
525@@ -551,7 +626,8 @@ seat_create(struct wl_display *display, const char *seat_name)
526 goto error3;
527 }
528 seat->data_device_listener.notify = &handle_data_device_event;
529- wl_signal_add(&seat->base.data_device->event_signal, &seat->data_device_listener);
530+ wl_signal_add(&seat->base.data_device->event_signal,
531+ &seat->data_device_listener);
532
533 seat->base.keyboard = keyboard_create(&seat->names);
534 if (!seat->base.keyboard) {
535@@ -559,7 +635,8 @@ seat_create(struct wl_display *display, const char *seat_name)
536 goto error4;
537 }
538 seat->keyboard_focus_listener.notify = handle_keyboard_focus_event;
539- wl_signal_add(&seat->base.keyboard->focus.event_signal, &seat->keyboard_focus_listener);
540+ wl_signal_add(&seat->base.keyboard->focus.event_signal,
541+ &seat->keyboard_focus_listener);
542
543 if (!pointer_initialize(&seat->pointer)) {
544 ERROR("Could not initialize pointer\n");
545@@ -567,13 +644,13 @@ seat_create(struct wl_display *display, const char *seat_name)
546 }
547 seat->base.pointer = &seat->pointer;
548
549- seat->kbd_source = wl_event_loop_add_fd
550- (swc.event_loop, seat->kbd_fd, WL_EVENT_READABLE,
551- &handle_evdev_data, seat);
552+ seat->kbd_source =
553+ wl_event_loop_add_fd(swc.event_loop, seat->kbd_fd, WL_EVENT_READABLE,
554+ &handle_evdev_data, seat);
555 if (!seat->shared_fd) {
556- seat->mouse_source = wl_event_loop_add_fd
557- (swc.event_loop, seat->mouse_fd, WL_EVENT_READABLE,
558- &handle_evdev_data, seat);
559+ seat->mouse_source =
560+ wl_event_loop_add_fd(swc.event_loop, seat->mouse_fd,
561+ WL_EVENT_READABLE, &handle_evdev_data, seat);
562 } else {
563 seat->mouse_source = NULL;
564 }
565@@ -601,8 +678,9 @@ seat_destroy(struct swc_seat *seat_base)
566 {
567 struct seat *seat = wl_container_of(seat_base, seat, base);
568
569- if (seat->mouse_source)
570+ if (seat->mouse_source) {
571 wl_event_source_remove(seat->mouse_source);
572+ }
573 wl_event_source_remove(seat->kbd_source);
574 if (seat->mouse_source) {
575 close(seat->mouse_fd);
+71,
-62
1@@ -22,9 +22,6 @@
2 * SOFTWARE.
3 */
4
5-#include "wscons/atKeynames.h"
6-#include "wscons/bsd_KbdMap.h"
7-#include "seat.h"
8 #include "compositor.h"
9 #include "data_device.h"
10 #include "event.h"
11@@ -33,15 +30,18 @@
12 #include "launch.h"
13 #include "pointer.h"
14 #include "screen.h"
15+#include "seat.h"
16 #include "surface.h"
17 #include "util.h"
18+#include "wscons/atKeynames.h"
19+#include "wscons/bsd_KbdMap.h"
20
21+#include <errno.h>
22+#include <fcntl.h>
23 #include <stdbool.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27-#include <errno.h>
28-#include <fcntl.h>
29 #include <unistd.h>
30
31 #include <dev/wscons/wsconsio.h>
32@@ -51,35 +51,21 @@
33 /* Map wscons encodings to libxkbcommon layout names. */
34 struct ws_xkb_map {
35 const int ws;
36- const char * const xkb;
37+ const char *const xkb;
38 };
39
40 static const struct ws_xkb_map ws_xkb_encodings[] = {
41- { KB_UK, "gb" },
42- { KB_BE, "be" },
43+ {KB_UK, "gb"}, {KB_BE, "be"},
44 #ifdef KB_CZ
45- { KB_CZ, "cz" },
46+ {KB_CZ, "cz"},
47 #endif
48- { KB_DK, "dk" },
49- { KB_NL, "nl" },
50- { KB_DE, "de" },
51+ {KB_DK, "dk"}, {KB_NL, "nl"}, {KB_DE, "de"},
52 #ifdef KB_GR
53- { KB_GR, "gr" },
54+ {KB_GR, "gr"},
55 #endif
56- { KB_HU, "hu" },
57- { KB_IT, "it" },
58- { KB_JP, "jp" },
59- { KB_NO, "no" },
60- { KB_PL, "pl" },
61- { KB_PT, "pt" },
62- { KB_RU, "ru" },
63- { KB_ES, "es" },
64- { KB_SV, "sv" },
65- { KB_SG, "sg" },
66- { KB_TR, "tr" },
67- { KB_UA, "ua" },
68- { -1, NULL }
69-};
70+ {KB_HU, "hu"}, {KB_IT, "it"}, {KB_JP, "jp"}, {KB_NO, "no"}, {KB_PL, "pl"},
71+ {KB_PT, "pt"}, {KB_RU, "ru"}, {KB_ES, "es"}, {KB_SV, "sv"}, {KB_SG, "sg"},
72+ {KB_TR, "tr"}, {KB_UA, "ua"}, {-1, NULL}};
73
74 struct seat {
75 struct swc_seat base;
76@@ -111,15 +97,18 @@ struct seat {
77 static void
78 handle_keyboard_focus_event(struct wl_listener *listener, void *data)
79 {
80- struct seat *seat = wl_container_of(listener, seat, keyboard_focus_listener);
81+ struct seat *seat =
82+ wl_container_of(listener, seat, keyboard_focus_listener);
83 struct event *ev = data;
84 struct input_focus_event_data *event_data = ev->data;
85
86- if (ev->type != INPUT_FOCUS_EVENT_CHANGED)
87+ if (ev->type != INPUT_FOCUS_EVENT_CHANGED) {
88 return;
89+ }
90
91 if (event_data->new) {
92- struct wl_client *client = wl_resource_get_client(event_data->new->surface->resource);
93+ struct wl_client *client =
94+ wl_resource_get_client(event_data->new->surface->resource);
95
96 /* Offer the selection to the new focus. */
97 data_device_offer_selection(seat->base.data_device, client);
98@@ -132,11 +121,14 @@ handle_data_device_event(struct wl_listener *listener, void *data)
99 struct seat *seat = wl_container_of(listener, seat, data_device_listener);
100 struct event *ev = data;
101
102- if (ev->type != DATA_DEVICE_EVENT_SELECTION_CHANGED)
103+ if (ev->type != DATA_DEVICE_EVENT_SELECTION_CHANGED) {
104 return;
105+ }
106
107- if (seat->base.keyboard->focus.client)
108- data_device_offer_selection(seat->base.data_device, seat->base.keyboard->focus.client);
109+ if (seat->base.keyboard->focus.client) {
110+ data_device_offer_selection(seat->base.data_device,
111+ seat->base.keyboard->focus.client);
112+ }
113 }
114
115 static void
116@@ -166,11 +158,13 @@ get_pointer(struct wl_client *client, struct wl_resource *resource, uint32_t id)
117 }
118
119 static void
120-get_keyboard(struct wl_client *client, struct wl_resource *resource, uint32_t id)
121+get_keyboard(struct wl_client *client, struct wl_resource *resource,
122+ uint32_t id)
123 {
124 struct seat *seat = wl_resource_get_user_data(resource);
125
126- keyboard_bind(seat->base.keyboard, client, wl_resource_get_version(resource), id);
127+ keyboard_bind(seat->base.keyboard, client,
128+ wl_resource_get_version(resource), id);
129 }
130
131 static void
132@@ -180,9 +174,9 @@ get_touch(struct wl_client *client, struct wl_resource *resource, uint32_t id)
133 }
134
135 static struct wl_seat_interface seat_impl = {
136- .get_pointer = get_pointer,
137- .get_keyboard = get_keyboard,
138- .get_touch = get_touch,
139+ .get_pointer = get_pointer,
140+ .get_keyboard = get_keyboard,
141+ .get_touch = get_touch,
142 };
143
144 static void
145@@ -191,15 +185,18 @@ bind_seat(struct wl_client *client, void *data, uint32_t version, uint32_t id)
146 struct seat *seat = data;
147 struct wl_resource *resource;
148
149- if (version > 4)
150+ if (version > 4) {
151 version = 4;
152+ }
153
154 resource = wl_resource_create(client, &wl_seat_interface, version, id);
155- wl_resource_set_implementation(resource, &seat_impl, seat, &remove_resource);
156+ wl_resource_set_implementation(resource, &seat_impl, seat,
157+ &remove_resource);
158 wl_list_insert(&seat->resources, wl_resource_get_link(resource));
159
160- if (version >= 2)
161+ if (version >= 2) {
162 wl_seat_send_name(resource, seat->name);
163+ }
164
165 wl_seat_send_capabilities(resource, seat->capabilities);
166 }
167@@ -321,11 +318,13 @@ initialize_wscons(struct seat *seat)
168 int kbd_ver = WSKBDIO_EVENT_VERSION;
169 #endif
170
171- if ((seat->mouse_fd = launch_open_device("/dev/wsmouse", O_RDWR | O_NONBLOCK)) == -1) {
172+ if ((seat->mouse_fd =
173+ launch_open_device("/dev/wsmouse", O_RDWR | O_NONBLOCK)) == -1) {
174 ERROR("Could not open mouse device\n");
175 goto error0;
176 }
177- if ((seat->kbd_fd = launch_open_device("/dev/wskbd", O_RDWR | O_NONBLOCK)) == -1) {
178+ if ((seat->kbd_fd =
179+ launch_open_device("/dev/wskbd", O_RDWR | O_NONBLOCK)) == -1) {
180 ERROR("Could not open keyboard device\n");
181 goto error1;
182 }
183@@ -340,16 +339,20 @@ initialize_wscons(struct seat *seat)
184 /* set devices to nativemode to receive events */
185 #ifdef WSMOUSEIO_SETMODE
186 {
187- int mode = WSMOUSE_COMPAT; /* use compat mode; it sends events */
188- if (ioctl(seat->mouse_fd, WSMOUSEIO_SETMODE, &mode) == -1)
189- fprintf(stderr, "wscons: WSMOUSEIO_SETMODE failed: %s\n", strerror(errno));
190+ int mode = WSMOUSE_COMPAT; /* use compat mode; it sends events */
191+ if (ioctl(seat->mouse_fd, WSMOUSEIO_SETMODE, &mode) == -1) {
192+ fprintf(stderr, "wscons: WSMOUSEIO_SETMODE failed: %s\n",
193+ strerror(errno));
194+ }
195 }
196 #endif /* WSMOUSEIO_SETMODE */
197 #ifdef WSKBDIO_SETMODE
198 {
199- int mode = WSKBD_TRANSLATED; /* use translated mode for key events */
200- if (ioctl(seat->kbd_fd, WSKBDIO_SETMODE, &mode) == -1)
201- fprintf(stderr, "wscons: WSKBDIO_SETMODE failed: %s\n", strerror(errno));
202+ int mode = WSKBD_TRANSLATED; /* use translated mode for key events */
203+ if (ioctl(seat->kbd_fd, WSKBDIO_SETMODE, &mode) == -1) {
204+ fprintf(stderr, "wscons: WSKBDIO_SETMODE failed: %s\n",
205+ strerror(errno));
206+ }
207 }
208 #endif /* WSKBDIO_SETMODE */
209
210@@ -397,8 +400,9 @@ seat_create(struct wl_display *display, const char *seat_name)
211 struct seat *seat;
212
213 seat = malloc(sizeof(*seat));
214- if (!seat)
215+ if (!seat) {
216 goto error0;
217+ }
218
219 seat->ignore = false;
220 memset(&seat->names, 0, sizeof(seat->names));
221@@ -413,13 +417,17 @@ seat_create(struct wl_display *display, const char *seat_name)
222 goto error1;
223 }
224
225- if (!initialize_wscons(seat))
226+ if (!initialize_wscons(seat)) {
227 goto error2;
228+ }
229
230- seat->global = wl_global_create(display, &wl_seat_interface, 4, seat, &bind_seat);
231- if (!seat->global)
232+ seat->global =
233+ wl_global_create(display, &wl_seat_interface, 4, seat, &bind_seat);
234+ if (!seat->global) {
235 goto error2;
236- seat->capabilities = WL_SEAT_CAPABILITY_KEYBOARD | WL_SEAT_CAPABILITY_POINTER;
237+ }
238+ seat->capabilities =
239+ WL_SEAT_CAPABILITY_KEYBOARD | WL_SEAT_CAPABILITY_POINTER;
240 wl_list_init(&seat->resources);
241
242 seat->swc_listener.notify = &handle_swc_event;
243@@ -431,7 +439,8 @@ seat_create(struct wl_display *display, const char *seat_name)
244 goto error3;
245 }
246 seat->data_device_listener.notify = &handle_data_device_event;
247- wl_signal_add(&seat->base.data_device->event_signal, &seat->data_device_listener);
248+ wl_signal_add(&seat->base.data_device->event_signal,
249+ &seat->data_device_listener);
250
251 seat->base.keyboard = keyboard_create(&seat->names);
252 if (!seat->base.keyboard) {
253@@ -439,7 +448,8 @@ seat_create(struct wl_display *display, const char *seat_name)
254 goto error4;
255 }
256 seat->keyboard_focus_listener.notify = handle_keyboard_focus_event;
257- wl_signal_add(&seat->base.keyboard->focus.event_signal, &seat->keyboard_focus_listener);
258+ wl_signal_add(&seat->base.keyboard->focus.event_signal,
259+ &seat->keyboard_focus_listener);
260
261 if (!pointer_initialize(&seat->pointer)) {
262 ERROR("Could not initialize pointer\n");
263@@ -447,12 +457,11 @@ seat_create(struct wl_display *display, const char *seat_name)
264 }
265 seat->base.pointer = &seat->pointer;
266
267- seat->kbd_source = wl_event_loop_add_fd
268- (swc.event_loop, seat->kbd_fd, WL_EVENT_READABLE,
269- &handle_ws_data, seat);
270- seat->mouse_source = wl_event_loop_add_fd
271- (swc.event_loop, seat->mouse_fd, WL_EVENT_READABLE,
272- &handle_ws_data, seat);
273+ seat->kbd_source = wl_event_loop_add_fd(
274+ swc.event_loop, seat->kbd_fd, WL_EVENT_READABLE, &handle_ws_data, seat);
275+ seat->mouse_source =
276+ wl_event_loop_add_fd(swc.event_loop, seat->mouse_fd, WL_EVENT_READABLE,
277+ &handle_ws_data, seat);
278
279 return &seat->base;
280
+100,
-49
1@@ -43,11 +43,11 @@
2 #include <libinput.h>
3 #include <linux/input.h>
4 #ifdef ENABLE_LIBUDEV
5-# include <libudev.h>
6+#include <libudev.h>
7 #endif
8
9 #ifndef NETLINK_MASK
10-# define NETLINK_MASK 4
11+#define NETLINK_MASK 4
12 #endif
13
14 struct seat {
15@@ -76,15 +76,18 @@ struct seat {
16 static void
17 handle_keyboard_focus_event(struct wl_listener *listener, void *data)
18 {
19- struct seat *seat = wl_container_of(listener, seat, keyboard_focus_listener);
20+ struct seat *seat =
21+ wl_container_of(listener, seat, keyboard_focus_listener);
22 struct event *ev = data;
23 struct input_focus_event_data *event_data = ev->data;
24
25- if (ev->type != INPUT_FOCUS_EVENT_CHANGED)
26+ if (ev->type != INPUT_FOCUS_EVENT_CHANGED) {
27 return;
28+ }
29
30 if (event_data->new) {
31- struct wl_client *client = wl_resource_get_client(event_data->new->surface->resource);
32+ struct wl_client *client =
33+ wl_resource_get_client(event_data->new->surface->resource);
34
35 /* Offer the selection to the new focus. */
36 data_device_offer_selection(seat->base.data_device, client);
37@@ -97,11 +100,14 @@ handle_data_device_event(struct wl_listener *listener, void *data)
38 struct seat *seat = wl_container_of(listener, seat, data_device_listener);
39 struct event *ev = data;
40
41- if (ev->type != DATA_DEVICE_EVENT_SELECTION_CHANGED)
42+ if (ev->type != DATA_DEVICE_EVENT_SELECTION_CHANGED) {
43 return;
44+ }
45
46- if (seat->base.keyboard->focus.client)
47- data_device_offer_selection(seat->base.data_device, seat->base.keyboard->focus.client);
48+ if (seat->base.keyboard->focus.client) {
49+ data_device_offer_selection(seat->base.data_device,
50+ seat->base.keyboard->focus.client);
51+ }
52 }
53
54 static void
55@@ -116,8 +122,9 @@ handle_swc_event(struct wl_listener *listener, void *data)
56 keyboard_reset(seat->base.keyboard);
57 break;
58 case SWC_EVENT_ACTIVATED:
59- if (libinput_resume(seat->libinput) != 0)
60+ if (libinput_resume(seat->libinput) != 0) {
61 WARNING("Failed to resume libinput context\n");
62+ }
63 break;
64 }
65 }
66@@ -128,17 +135,22 @@ get_pointer(struct wl_client *client, struct wl_resource *resource, uint32_t id)
67 {
68 struct seat *seat = wl_resource_get_user_data(resource);
69
70- if (!pointer_bind(&seat->pointer, client, wl_resource_get_version(resource), id))
71+ if (!pointer_bind(&seat->pointer, client, wl_resource_get_version(resource),
72+ id)) {
73 wl_resource_post_no_memory(resource);
74+ }
75 }
76
77 static void
78-get_keyboard(struct wl_client *client, struct wl_resource *resource, uint32_t id)
79+get_keyboard(struct wl_client *client, struct wl_resource *resource,
80+ uint32_t id)
81 {
82 struct seat *seat = wl_resource_get_user_data(resource);
83
84- if (!keyboard_bind(seat->base.keyboard, client, wl_resource_get_version(resource), id))
85+ if (!keyboard_bind(seat->base.keyboard, client,
86+ wl_resource_get_version(resource), id)) {
87 wl_resource_post_no_memory(resource);
88+ }
89 }
90
91 static void
92@@ -148,9 +160,9 @@ get_touch(struct wl_client *client, struct wl_resource *resource, uint32_t id)
93 }
94
95 static const struct wl_seat_interface seat_impl = {
96- .get_pointer = get_pointer,
97- .get_keyboard = get_keyboard,
98- .get_touch = get_touch,
99+ .get_pointer = get_pointer,
100+ .get_keyboard = get_keyboard,
101+ .get_touch = get_touch,
102 };
103
104 static void
105@@ -164,11 +176,13 @@ bind_seat(struct wl_client *client, void *data, uint32_t version, uint32_t id)
106 wl_client_post_no_memory(client);
107 return;
108 }
109- wl_resource_set_implementation(resource, &seat_impl, seat, &remove_resource);
110+ wl_resource_set_implementation(resource, &seat_impl, seat,
111+ &remove_resource);
112 wl_list_insert(&seat->resources, wl_resource_get_link(resource));
113
114- if (version >= 2)
115+ if (version >= 2) {
116 wl_seat_send_name(resource, seat->name);
117+ }
118
119 wl_seat_send_capabilities(resource, seat->capabilities);
120 }
121@@ -178,12 +192,13 @@ update_capabilities(struct seat *seat, uint32_t capabilities)
122 {
123 struct wl_resource *resource;
124
125- if (!(~seat->capabilities & capabilities))
126+ if (!(~seat->capabilities & capabilities)) {
127 return;
128+ }
129
130 seat->capabilities |= capabilities;
131 wl_list_for_each(resource, &seat->resources, link)
132- wl_seat_send_capabilities(resource, seat->capabilities);
133+ wl_seat_send_capabilities(resource, seat->capabilities);
134 }
135
136 static int
137@@ -199,8 +214,8 @@ close_restricted(int fd, void *user_data)
138 }
139
140 const struct libinput_interface libinput_interface = {
141- .open_restricted = open_restricted,
142- .close_restricted = close_restricted,
143+ .open_restricted = open_restricted,
144+ .close_restricted = close_restricted,
145 };
146
147 static uint32_t
148@@ -208,10 +223,12 @@ device_capabilities(struct libinput_device *device)
149 {
150 uint32_t capabilities = 0;
151
152- if (libinput_device_has_capability(device, LIBINPUT_DEVICE_CAP_KEYBOARD))
153+ if (libinput_device_has_capability(device, LIBINPUT_DEVICE_CAP_KEYBOARD)) {
154 capabilities |= WL_SEAT_CAPABILITY_KEYBOARD;
155- if (libinput_device_has_capability(device, LIBINPUT_DEVICE_CAP_POINTER))
156+ }
157+ if (libinput_device_has_capability(device, LIBINPUT_DEVICE_CAP_POINTER)) {
158 capabilities |= WL_SEAT_CAPABILITY_POINTER;
159+ }
160 /* TODO: Add touch device support
161 * if (libinput_device_has_capability(device, LIBINPUT_DEVICE_CAP_TOUCH))
162 * capabilities |= WL_SEAT_CAPABILITY_TOUCH;
163@@ -247,8 +264,9 @@ handle_libinput_data(int fd, uint32_t mask, void *data)
164 case LIBINPUT_EVENT_DEVICE_ADDED:
165 device = libinput_event_get_device(generic_event);
166 update_capabilities(seat, device_capabilities(device));
167- if (swc.manager->new_device)
168+ if (swc.manager->new_device) {
169 swc.manager->new_device(device);
170+ }
171 break;
172 case LIBINPUT_EVENT_KEYBOARD_KEY:
173 event.k = libinput_event_get_keyboard_event(generic_event);
174@@ -270,8 +288,12 @@ handle_libinput_data(int fd, uint32_t mask, void *data)
175 rect = &screen->base.geometry;
176 event.p = libinput_event_get_pointer_event(generic_event);
177 time = libinput_event_pointer_get_time(event.p);
178- x = wl_fixed_from_double(libinput_event_pointer_get_absolute_x_transformed(event.p, rect->width));
179- y = wl_fixed_from_double(libinput_event_pointer_get_absolute_y_transformed(event.p, rect->height));
180+ x = wl_fixed_from_double(
181+ libinput_event_pointer_get_absolute_x_transformed(event.p,
182+ rect->width));
183+ y = wl_fixed_from_double(
184+ libinput_event_pointer_get_absolute_y_transformed(
185+ event.p, rect->height));
186 pointer_handle_absolute_motion(&seat->pointer, time, x, y);
187 pointer_handle_frame(&seat->pointer);
188 break;
189@@ -282,15 +304,19 @@ handle_libinput_data(int fd, uint32_t mask, void *data)
190 state = libinput_event_pointer_get_button_state(event.p);
191 pointer_handle_button(&seat->pointer, time, key, state);
192 if (state == LIBINPUT_BUTTON_STATE_PRESSED) {
193- /* qemu generates GEAR_UP/GEAR_DOWN events on scroll, so pass
194+ /* qemu generates GEAR_UP/GEAR_DOWN events on scroll, so pass
195 * those through as axis events. */
196 source = WL_POINTER_AXIS_SOURCE_WHEEL;
197 switch (key) {
198 case BTN_GEAR_DOWN:
199- pointer_handle_axis(&seat->pointer, time, WL_POINTER_AXIS_VERTICAL_SCROLL, source, wl_fixed_from_int(10), 120);
200+ pointer_handle_axis(&seat->pointer, time,
201+ WL_POINTER_AXIS_VERTICAL_SCROLL, source,
202+ wl_fixed_from_int(10), 120);
203 break;
204 case BTN_GEAR_UP:
205- pointer_handle_axis(&seat->pointer, time, WL_POINTER_AXIS_VERTICAL_SCROLL, source, wl_fixed_from_int(-10), -120);
206+ pointer_handle_axis(&seat->pointer, time,
207+ WL_POINTER_AXIS_VERTICAL_SCROLL, source,
208+ wl_fixed_from_int(-10), -120);
209 break;
210 }
211 }
212@@ -309,17 +335,31 @@ handle_libinput_data(int fd, uint32_t mask, void *data)
213 event.p = libinput_event_get_pointer_event(generic_event);
214 time = libinput_event_pointer_get_time(event.p);
215 value120 = 0;
216- if (libinput_event_pointer_has_axis(event.p, LIBINPUT_POINTER_AXIS_SCROLL_VERTICAL)) {
217- value = wl_fixed_from_double(libinput_event_pointer_get_scroll_value(event.p, LIBINPUT_POINTER_AXIS_SCROLL_VERTICAL));
218- if (source == WL_POINTER_AXIS_SOURCE_WHEEL)
219- value120 = libinput_event_pointer_get_scroll_value_v120(event.p, LIBINPUT_POINTER_AXIS_SCROLL_VERTICAL);
220- pointer_handle_axis(&seat->pointer, time, WL_POINTER_AXIS_VERTICAL_SCROLL, source, value, value120);
221+ if (libinput_event_pointer_has_axis(
222+ event.p, LIBINPUT_POINTER_AXIS_SCROLL_VERTICAL)) {
223+ value = wl_fixed_from_double(
224+ libinput_event_pointer_get_scroll_value(
225+ event.p, LIBINPUT_POINTER_AXIS_SCROLL_VERTICAL));
226+ if (source == WL_POINTER_AXIS_SOURCE_WHEEL) {
227+ value120 = libinput_event_pointer_get_scroll_value_v120(
228+ event.p, LIBINPUT_POINTER_AXIS_SCROLL_VERTICAL);
229+ }
230+ pointer_handle_axis(&seat->pointer, time,
231+ WL_POINTER_AXIS_VERTICAL_SCROLL, source,
232+ value, value120);
233 }
234- if (libinput_event_pointer_has_axis(event.p, LIBINPUT_POINTER_AXIS_SCROLL_HORIZONTAL)) {
235- value = wl_fixed_from_double(libinput_event_pointer_get_scroll_value(event.p, LIBINPUT_POINTER_AXIS_SCROLL_HORIZONTAL));
236- if (source == WL_POINTER_AXIS_SOURCE_WHEEL)
237- value120 = libinput_event_pointer_get_scroll_value_v120(event.p, LIBINPUT_POINTER_AXIS_SCROLL_HORIZONTAL);
238- pointer_handle_axis(&seat->pointer, time, WL_POINTER_AXIS_HORIZONTAL_SCROLL, source, value, value120);
239+ if (libinput_event_pointer_has_axis(
240+ event.p, LIBINPUT_POINTER_AXIS_SCROLL_HORIZONTAL)) {
241+ value = wl_fixed_from_double(
242+ libinput_event_pointer_get_scroll_value(
243+ event.p, LIBINPUT_POINTER_AXIS_SCROLL_HORIZONTAL));
244+ if (source == WL_POINTER_AXIS_SOURCE_WHEEL) {
245+ value120 = libinput_event_pointer_get_scroll_value_v120(
246+ event.p, LIBINPUT_POINTER_AXIS_SCROLL_HORIZONTAL);
247+ }
248+ pointer_handle_axis(&seat->pointer, time,
249+ WL_POINTER_AXIS_HORIZONTAL_SCROLL, source,
250+ value, value120);
251 }
252 pointer_handle_frame(&seat->pointer);
253 break;
254@@ -342,9 +382,11 @@ initialize_libinput(struct seat *seat)
255 goto error0;
256 }
257
258- seat->libinput = libinput_udev_create_context(&libinput_interface, NULL, seat->udev);
259+ seat->libinput =
260+ libinput_udev_create_context(&libinput_interface, NULL, seat->udev);
261 #else
262- seat->libinput = libinput_netlink_create_context(&libinput_interface, NULL, NETLINK_MASK);
263+ seat->libinput = libinput_netlink_create_context(&libinput_interface, NULL,
264+ NETLINK_MASK);
265 #endif
266
267 if (!seat->libinput) {
268@@ -364,14 +406,17 @@ initialize_libinput(struct seat *seat)
269 }
270 #endif
271
272- seat->libinput_source = wl_event_loop_add_fd(swc.event_loop, libinput_get_fd(seat->libinput), WL_EVENT_READABLE, &handle_libinput_data, seat);
273+ seat->libinput_source =
274+ wl_event_loop_add_fd(swc.event_loop, libinput_get_fd(seat->libinput),
275+ WL_EVENT_READABLE, &handle_libinput_data, seat);
276 if (!seat->libinput_source) {
277 ERROR("Could not create event source for libinput\n");
278 goto error2;
279 }
280
281- if (!swc.active)
282+ if (!swc.active) {
283 libinput_suspend(seat->libinput);
284+ }
285
286 return true;
287
288@@ -391,16 +436,19 @@ seat_create(struct wl_display *display, const char *seat_name)
289 struct seat *seat;
290
291 seat = malloc(sizeof(*seat));
292- if (!seat)
293+ if (!seat) {
294 goto error0;
295+ }
296 seat->name = strdup(seat_name);
297 if (!seat->name) {
298 ERROR("Could not allocate seat name string\n");
299 goto error1;
300 }
301- seat->global = wl_global_create(display, &wl_seat_interface, 8, seat, &bind_seat);
302- if (!seat->global)
303+ seat->global =
304+ wl_global_create(display, &wl_seat_interface, 8, seat, &bind_seat);
305+ if (!seat->global) {
306 goto error2;
307+ }
308 seat->capabilities = 0;
309 wl_list_init(&seat->resources);
310
311@@ -413,7 +461,8 @@ seat_create(struct wl_display *display, const char *seat_name)
312 goto error3;
313 }
314 seat->data_device_listener.notify = &handle_data_device_event;
315- wl_signal_add(&seat->base.data_device->event_signal, &seat->data_device_listener);
316+ wl_signal_add(&seat->base.data_device->event_signal,
317+ &seat->data_device_listener);
318
319 seat->base.keyboard = keyboard_create(NULL);
320 if (!seat->base.keyboard) {
321@@ -421,7 +470,8 @@ seat_create(struct wl_display *display, const char *seat_name)
322 goto error4;
323 }
324 seat->keyboard_focus_listener.notify = handle_keyboard_focus_event;
325- wl_signal_add(&seat->base.keyboard->focus.event_signal, &seat->keyboard_focus_listener);
326+ wl_signal_add(&seat->base.keyboard->focus.event_signal,
327+ &seat->keyboard_focus_listener);
328
329 if (!pointer_initialize(&seat->pointer)) {
330 ERROR("Could not initialize pointer\n");
331@@ -429,8 +479,9 @@ seat_create(struct wl_display *display, const char *seat_name)
332 }
333 seat->base.pointer = &seat->pointer;
334
335- if (!initialize_libinput(seat))
336+ if (!initialize_libinput(seat)) {
337 goto error6;
338+ }
339
340 return &seat->base;
341
+4,
-2
1@@ -32,7 +32,9 @@ struct swc_seat {
2 struct data_device *data_device;
3 };
4
5-struct swc_seat *seat_create(struct wl_display *display, const char *name);
6-void seat_destroy(struct swc_seat *seat);
7+struct swc_seat *
8+seat_create(struct wl_display *display, const char *name);
9+void
10+seat_destroy(struct swc_seat *seat);
11
12 #endif
+7,
-4
1@@ -28,19 +28,22 @@
2 #include <wayland-server.h>
3
4 static void
5-get_shell_surface(struct wl_client *client, struct wl_resource *resource, uint32_t id, struct wl_resource *surface_resource)
6+get_shell_surface(struct wl_client *client, struct wl_resource *resource,
7+ uint32_t id, struct wl_resource *surface_resource)
8 {
9 struct surface *surface = wl_resource_get_user_data(surface_resource);
10 struct shell_surface *shell_surface;
11
12- shell_surface = shell_surface_new(client, wl_resource_get_version(resource), id, surface);
13+ shell_surface = shell_surface_new(client, wl_resource_get_version(resource),
14+ id, surface);
15
16- if (!shell_surface)
17+ if (!shell_surface) {
18 wl_resource_post_no_memory(resource);
19+ }
20 }
21
22 static const struct wl_shell_interface shell_implementation = {
23- .get_shell_surface = get_shell_surface,
24+ .get_shell_surface = get_shell_surface,
25 };
26
27 static void
+2,
-1
1@@ -26,6 +26,7 @@
2
3 struct wl_display;
4
5-struct wl_global *shell_create(struct wl_display *display);
6+struct wl_global *
7+shell_create(struct wl_display *display);
8
9 #endif
+63,
-37
1@@ -33,8 +33,8 @@
2 #include "view.h"
3 #include "window.h"
4
5-#include <stdlib.h>
6 #include <signal.h>
7+#include <stdlib.h>
8
9 struct shell_surface {
10 struct window window;
11@@ -46,9 +46,11 @@ struct shell_surface {
12 static void
13 configure(struct window *window, uint32_t width, uint32_t height)
14 {
15- struct shell_surface *shell_surface = wl_container_of(window, shell_surface, window);
16+ struct shell_surface *shell_surface =
17+ wl_container_of(window, shell_surface, window);
18
19- wl_shell_surface_send_configure(shell_surface->resource, WL_SHELL_SURFACE_RESIZE_NONE, width, height);
20+ wl_shell_surface_send_configure(
21+ shell_surface->resource, WL_SHELL_SURFACE_RESIZE_NONE, width, height);
22
23 /* wl_shell does not support acknowledging configures. */
24 window->configure.acknowledged = true;
25@@ -57,7 +59,8 @@ configure(struct window *window, uint32_t width, uint32_t height)
26 static void
27 close_(struct window *window)
28 {
29- struct shell_surface *shell_surface = wl_container_of(window, shell_surface, window);
30+ struct shell_surface *shell_surface =
31+ wl_container_of(window, shell_surface, window);
32 struct wl_client *client;
33 pid_t pid;
34
35@@ -67,8 +70,8 @@ close_(struct window *window)
36 }
37
38 static const struct window_impl window_impl = {
39- .configure = configure,
40- .close = close_,
41+ .configure = configure,
42+ .close = close_,
43 };
44
45 static void
46@@ -77,13 +80,15 @@ pong(struct wl_client *client, struct wl_resource *resource, uint32_t serial)
47 }
48
49 static void
50-move(struct wl_client *client, struct wl_resource *resource, struct wl_resource *seat_resource, uint32_t serial)
51+move(struct wl_client *client, struct wl_resource *resource,
52+ struct wl_resource *seat_resource, uint32_t serial)
53 {
54 struct shell_surface *shell_surface = wl_resource_get_user_data(resource);
55 struct button *button;
56
57- if (!(button = pointer_get_button(swc.seat->pointer, serial)))
58+ if (!(button = pointer_get_button(swc.seat->pointer, serial))) {
59 return;
60+ }
61
62 window_begin_move(&shell_surface->window, button);
63 }
64@@ -95,8 +100,9 @@ resize(struct wl_client *client, struct wl_resource *resource,
65 struct shell_surface *shell_surface = wl_resource_get_user_data(resource);
66 struct button *button;
67
68- if (!(button = pointer_get_button(swc.seat->pointer, serial)))
69+ if (!(button = pointer_get_button(swc.seat->pointer, serial))) {
70 return;
71+ }
72
73 window_begin_resize(&shell_surface->window, edges, button);
74 }
75@@ -112,14 +118,16 @@ set_toplevel(struct wl_client *client, struct wl_resource *resource)
76
77 static void
78 set_transient(struct wl_client *client, struct wl_resource *resource,
79- struct wl_resource *parent_resource, int32_t x, int32_t y, uint32_t flags)
80+ struct wl_resource *parent_resource, int32_t x, int32_t y,
81+ uint32_t flags)
82 {
83 struct shell_surface *shell_surface = wl_resource_get_user_data(resource);
84 struct surface *parent_surface = wl_resource_get_user_data(parent_resource);
85 struct compositor_view *parent_view = compositor_view(parent_surface->view);
86
87- if (!parent_view || !parent_view->window)
88+ if (!parent_view || !parent_view->window) {
89 return;
90+ }
91
92 window_manage(&shell_surface->window);
93 window_set_parent(&shell_surface->window, parent_view->window);
94@@ -127,13 +135,16 @@ set_transient(struct wl_client *client, struct wl_resource *resource,
95
96 static void
97 set_fullscreen(struct wl_client *client, struct wl_resource *resource,
98- uint32_t method, uint32_t framerate, struct wl_resource *output_resource)
99+ uint32_t method, uint32_t framerate,
100+ struct wl_resource *output_resource)
101 {
102 struct shell_surface *shell_surface = wl_resource_get_user_data(resource);
103- struct output *output = output_resource ? wl_resource_get_user_data(output_resource) : NULL;
104+ struct output *output =
105+ output_resource ? wl_resource_get_user_data(output_resource) : NULL;
106 struct screen *screen;
107
108- screen = output ? output->screen : wl_container_of(swc.screens.next, screen, link);
109+ screen = output ? output->screen
110+ : wl_container_of(swc.screens.next, screen, link);
111
112 /* TODO: Handle fullscreen windows. */
113
114@@ -144,22 +155,27 @@ set_fullscreen(struct wl_client *client, struct wl_resource *resource,
115 static void
116 set_popup(struct wl_client *client, struct wl_resource *resource,
117 struct wl_resource *seat_resource, uint32_t serial,
118- struct wl_resource *parent_resource, int32_t x, int32_t y, uint32_t flags)
119+ struct wl_resource *parent_resource, int32_t x, int32_t y,
120+ uint32_t flags)
121 {
122 struct shell_surface *shell_surface = wl_resource_get_user_data(resource);
123 struct surface *parent_surface = wl_resource_get_user_data(parent_resource);
124 struct compositor_view *parent_view = compositor_view(parent_surface->view);
125
126- if (!parent_view || !parent_view->window)
127+ if (!parent_view || !parent_view->window) {
128 return;
129+ }
130
131 window_unmanage(&shell_surface->window);
132 window_set_parent(&shell_surface->window, parent_view->window);
133- view_move(&shell_surface->window.view->base, parent_view->base.geometry.x + x, parent_view->base.geometry.y + y);
134+ view_move(&shell_surface->window.view->base,
135+ parent_view->base.geometry.x + x,
136+ parent_view->base.geometry.y + y);
137 }
138
139 static void
140-set_maximized(struct wl_client *client, struct wl_resource *resource, struct wl_resource *output_resource)
141+set_maximized(struct wl_client *client, struct wl_resource *resource,
142+ struct wl_resource *output_resource)
143 {
144 struct shell_surface *shell_surface = wl_resource_get_user_data(resource);
145
146@@ -170,36 +186,39 @@ set_maximized(struct wl_client *client, struct wl_resource *resource, struct wl_
147 }
148
149 static void
150-set_title(struct wl_client *client, struct wl_resource *resource, const char *title)
151+set_title(struct wl_client *client, struct wl_resource *resource,
152+ const char *title)
153 {
154 struct shell_surface *shell_surface = wl_resource_get_user_data(resource);
155 window_set_title(&shell_surface->window, title, -1);
156 }
157
158 static void
159-set_class(struct wl_client *client, struct wl_resource *resource, const char *class)
160+set_class(struct wl_client *client, struct wl_resource *resource,
161+ const char *class)
162 {
163 struct shell_surface *shell_surface = wl_resource_get_user_data(resource);
164 window_set_app_id(&shell_surface->window, class);
165 }
166
167 static const struct wl_shell_surface_interface shell_surface_implementation = {
168- .pong = pong,
169- .move = move,
170- .resize = resize,
171- .set_toplevel = set_toplevel,
172- .set_transient = set_transient,
173- .set_fullscreen = set_fullscreen,
174- .set_popup = set_popup,
175- .set_maximized = set_maximized,
176- .set_title = set_title,
177- .set_class = set_class,
178+ .pong = pong,
179+ .move = move,
180+ .resize = resize,
181+ .set_toplevel = set_toplevel,
182+ .set_transient = set_transient,
183+ .set_fullscreen = set_fullscreen,
184+ .set_popup = set_popup,
185+ .set_maximized = set_maximized,
186+ .set_title = set_title,
187+ .set_class = set_class,
188 };
189
190 static void
191 handle_surface_destroy(struct wl_listener *listener, void *data)
192 {
193- struct shell_surface *shell_surface = wl_container_of(listener, shell_surface, surface_destroy_listener);
194+ struct shell_surface *shell_surface =
195+ wl_container_of(listener, shell_surface, surface_destroy_listener);
196 wl_resource_destroy(shell_surface->resource);
197 }
198
199@@ -213,24 +232,31 @@ destroy_shell_surface(struct wl_resource *resource)
200 }
201
202 struct shell_surface *
203-shell_surface_new(struct wl_client *client, uint32_t version, uint32_t id, struct surface *surface)
204+shell_surface_new(struct wl_client *client, uint32_t version, uint32_t id,
205+ struct surface *surface)
206 {
207 struct shell_surface *shell_surface;
208
209 shell_surface = malloc(sizeof(*shell_surface));
210
211- if (!shell_surface)
212+ if (!shell_surface) {
213 goto error0;
214+ }
215
216- shell_surface->resource = wl_resource_create(client, &wl_shell_surface_interface, version, id);
217+ shell_surface->resource =
218+ wl_resource_create(client, &wl_shell_surface_interface, version, id);
219
220- if (!shell_surface->resource)
221+ if (!shell_surface->resource) {
222 goto error1;
223+ }
224
225- wl_resource_set_implementation(shell_surface->resource, &shell_surface_implementation, shell_surface, &destroy_shell_surface);
226+ wl_resource_set_implementation(shell_surface->resource,
227+ &shell_surface_implementation, shell_surface,
228+ &destroy_shell_surface);
229 window_initialize(&shell_surface->window, &window_impl, surface);
230 shell_surface->surface_destroy_listener.notify = &handle_surface_destroy;
231- wl_resource_add_destroy_listener(surface->resource, &shell_surface->surface_destroy_listener);
232+ wl_resource_add_destroy_listener(surface->resource,
233+ &shell_surface->surface_destroy_listener);
234
235 return shell_surface;
236
+3,
-1
1@@ -29,6 +29,8 @@
2 struct surface;
3 struct wl_client;
4
5-struct shell_surface *shell_surface_new(struct wl_client *client, uint32_t version, uint32_t id, struct surface *surface);
6+struct shell_surface *
7+shell_surface_new(struct wl_client *client, uint32_t version, uint32_t id,
8+ struct surface *surface);
9
10 #endif
+51,
-29
1@@ -65,8 +65,9 @@ swc_mremap(struct pool *pool, void *oldp, size_t oldsize, size_t newsize)
2 void *newp;
3
4 newp = mmap(NULL, newsize, PROT_READ, MAP_SHARED, pool->fd, 0);
5- if (newp == MAP_FAILED)
6+ if (newp == MAP_FAILED) {
7 return MAP_FAILED;
8+ }
9
10 (void)munmap(oldp, oldsize);
11 return newp;
12@@ -76,8 +77,9 @@ swc_mremap(struct pool *pool, void *oldp, size_t oldsize, size_t newsize)
13 static void
14 unref_pool(struct pool *pool)
15 {
16- if (--pool->references > 0)
17+ if (--pool->references > 0) {
18 return;
19+ }
20
21 munmap(pool->data, pool->size);
22 close(pool->fd);
23@@ -94,7 +96,8 @@ destroy_pool_resource(struct wl_resource *resource)
24 static void
25 handle_buffer_destroy(struct wld_destructor *destructor)
26 {
27- struct pool_reference *reference = wl_container_of(destructor, reference, destructor);
28+ struct pool_reference *reference =
29+ wl_container_of(destructor, reference, destructor);
30 unref_pool(reference->pool);
31 }
32
33@@ -113,7 +116,8 @@ format_shm_to_wld(uint32_t format)
34
35 static void
36 create_buffer(struct wl_client *client, struct wl_resource *resource,
37- uint32_t id, int32_t offset, int32_t width, int32_t height, int32_t stride, uint32_t format)
38+ uint32_t id, int32_t offset, int32_t width, int32_t height,
39+ int32_t stride, uint32_t format)
40 {
41 struct pool *pool = wl_resource_get_user_data(resource);
42 struct pool_reference *reference;
43@@ -122,23 +126,30 @@ create_buffer(struct wl_client *client, struct wl_resource *resource,
44 union wld_object object;
45
46 if (offset > pool->size || offset < 0) {
47- wl_resource_post_error(resource, WL_SHM_ERROR_INVALID_STRIDE, "offset is too big or negative");
48+ wl_resource_post_error(resource, WL_SHM_ERROR_INVALID_STRIDE,
49+ "offset is too big or negative");
50 return;
51 }
52
53 object.ptr = (void *)((uintptr_t)pool->data + offset);
54- buffer = wld_import_buffer(pool->shm->context, WLD_OBJECT_DATA, object, width, height, format_shm_to_wld(format), stride);
55+ buffer =
56+ wld_import_buffer(pool->shm->context, WLD_OBJECT_DATA, object, width,
57+ height, format_shm_to_wld(format), stride);
58
59- if (!buffer)
60+ if (!buffer) {
61 goto error0;
62+ }
63
64- buffer_resource = wayland_buffer_create_resource(client, wl_resource_get_version(resource), id, buffer);
65+ buffer_resource = wayland_buffer_create_resource(
66+ client, wl_resource_get_version(resource), id, buffer);
67
68- if (!buffer_resource)
69+ if (!buffer_resource) {
70 goto error1;
71+ }
72
73- if (!(reference = malloc(sizeof(*reference))))
74+ if (!(reference = malloc(sizeof(*reference)))) {
75 goto error2;
76+ }
77
78 reference->pool = pool;
79 reference->destructor.destroy = &handle_buffer_destroy;
80@@ -163,17 +174,20 @@ resize(struct wl_client *client, struct wl_resource *resource, int32_t size)
81 struct stat st;
82
83 if (fstat(pool->fd, &st) != 0) {
84- wl_resource_post_error(resource, WL_SHM_ERROR_INVALID_FD, "fstat failed: %s", strerror(errno));
85+ wl_resource_post_error(resource, WL_SHM_ERROR_INVALID_FD,
86+ "fstat failed: %s", strerror(errno));
87 return;
88 }
89 if (st.st_size < size) {
90 if (ftruncate(pool->fd, size) != 0) {
91 int saved = errno;
92 /* some clients seal memfd if size is already fine, allo */
93- if ((saved == EPERM || saved == EACCES) && fstat(pool->fd, &st) == 0 && st.st_size >= size) {
94+ if ((saved == EPERM || saved == EACCES) &&
95+ fstat(pool->fd, &st) == 0 && st.st_size >= size) {
96 goto remap;
97 }
98- wl_resource_post_error(resource, WL_SHM_ERROR_INVALID_FD, "ftruncate failed: %s", strerror(saved));
99+ wl_resource_post_error(resource, WL_SHM_ERROR_INVALID_FD,
100+ "ftruncate failed: %s", strerror(saved));
101 return;
102 }
103 }
104@@ -181,7 +195,8 @@ resize(struct wl_client *client, struct wl_resource *resource, int32_t size)
105 remap:
106 data = swc_mremap(pool, pool->data, pool->size, size);
107 if (data == MAP_FAILED) {
108- wl_resource_post_error(resource, WL_SHM_ERROR_INVALID_FD, "mremap failed: %s", strerror(errno));
109+ wl_resource_post_error(resource, WL_SHM_ERROR_INVALID_FD,
110+ "mremap failed: %s", strerror(errno));
111 return;
112 }
113 pool->data = data;
114@@ -189,13 +204,14 @@ remap:
115 }
116
117 static const struct wl_shm_pool_interface shm_pool_impl = {
118- .create_buffer = create_buffer,
119- .destroy = destroy_resource,
120- .resize = resize,
121+ .create_buffer = create_buffer,
122+ .destroy = destroy_resource,
123+ .resize = resize,
124 };
125
126 static void
127-create_pool(struct wl_client *client, struct wl_resource *resource, uint32_t id, int32_t fd, int32_t size)
128+create_pool(struct wl_client *client, struct wl_resource *resource, uint32_t id,
129+ int32_t fd, int32_t size)
130 {
131 struct swc_shm *shm = wl_resource_get_user_data(resource);
132 struct pool *pool;
133@@ -206,15 +222,18 @@ create_pool(struct wl_client *client, struct wl_resource *resource, uint32_t id,
134 goto error0;
135 }
136 pool->shm = shm;
137- pool->resource = wl_resource_create(client, &wl_shm_pool_interface, wl_resource_get_version(resource), id);
138+ pool->resource = wl_resource_create(client, &wl_shm_pool_interface,
139+ wl_resource_get_version(resource), id);
140 if (!pool->resource) {
141 wl_resource_post_no_memory(resource);
142 goto error1;
143 }
144- wl_resource_set_implementation(pool->resource, &shm_pool_impl, pool, &destroy_pool_resource);
145+ wl_resource_set_implementation(pool->resource, &shm_pool_impl, pool,
146+ &destroy_pool_resource);
147 pool->data = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, 0);
148 if (pool->data == MAP_FAILED) {
149- wl_resource_post_error(resource, WL_SHM_ERROR_INVALID_FD, "mmap failed: %s", strerror(errno));
150+ wl_resource_post_error(resource, WL_SHM_ERROR_INVALID_FD,
151+ "mmap failed: %s", strerror(errno));
152 goto error2;
153 }
154 /* close(fd); */
155@@ -231,9 +250,7 @@ error0:
156 close(fd);
157 }
158
159-static const struct wl_shm_interface shm_impl = {
160- .create_pool = &create_pool
161-};
162+static const struct wl_shm_interface shm_impl = {.create_pool = &create_pool};
163
164 static void
165 bind_shm(struct wl_client *client, void *data, uint32_t version, uint32_t id)
166@@ -258,17 +275,22 @@ shm_create(struct wl_display *display)
167 struct swc_shm *shm;
168
169 shm = malloc(sizeof(*shm));
170- if (!shm)
171+ if (!shm) {
172 goto error0;
173+ }
174 shm->context = wld_pixman_create_context();
175- if (!shm->context)
176+ if (!shm->context) {
177 goto error1;
178+ }
179 shm->renderer = wld_create_renderer(shm->context);
180- if (!shm->renderer)
181+ if (!shm->renderer) {
182 goto error2;
183- shm->global = wl_global_create(display, &wl_shm_interface, 1, shm, &bind_shm);
184- if (!shm->global)
185+ }
186+ shm->global =
187+ wl_global_create(display, &wl_shm_interface, 1, shm, &bind_shm);
188+ if (!shm->global) {
189 goto error3;
190+ }
191
192 return shm;
193
+4,
-2
1@@ -32,7 +32,9 @@ struct swc_shm {
2 struct wld_renderer *renderer;
3 };
4
5-struct swc_shm *shm_create(struct wl_display *display);
6-void shm_destroy(struct swc_shm *shm);
7+struct swc_shm *
8+shm_create(struct wl_display *display);
9+void
10+shm_destroy(struct swc_shm *shm);
11
12 #endif
+34,
-24
1@@ -1,19 +1,19 @@
2 #include "snap.h"
3+#include "compositor.h"
4 #include "internal.h"
5+#include "pointer.h"
6 #include "screen.h"
7-#include "compositor.h"
8-#include "shm.h"
9 #include "seat.h"
10-#include "pointer.h"
11+#include "shm.h"
12
13+#include "swc_snap-server-protocol.h"
14+#include <stdint.h>
15 #include <stdio.h>
16 #include <stdlib.h>
17-#include <stdint.h>
18 #include <string.h>
19 #include <unistd.h>
20 #include <wayland-server.h>
21 #include <wld/wld.h>
22-#include "swc_snap-server-protocol.h"
23
24 static void
25 ppm(int fd, const uint8_t *pixels, uint32_t width, uint32_t height,
26@@ -34,19 +34,15 @@ ppm(int fd, const uint8_t *pixels, uint32_t width, uint32_t height,
27
28 for (uint32_t x = 0; x < width; x++) {
29 uint32_t pixel = row[x];
30- unsigned char rgb[3] = {
31- (pixel >> 16) & 0xFF,
32- (pixel >> 8) & 0xFF,
33- pixel & 0xFF
34- };
35+ unsigned char rgb[3] = {(pixel >> 16) & 0xFF, (pixel >> 8) & 0xFF,
36+ pixel & 0xFF};
37 fwrite(rgb, 1, 3, f);
38 }
39 }
40
41- fclose(f);
42+ fclose(f);
43 }
44
45-
46 /* get cursor */
47 static void
48 cursor(uint8_t *dst, uint32_t dst_width, uint32_t dst_height,
49@@ -59,15 +55,18 @@ cursor(uint8_t *dst, uint32_t dst_width, uint32_t dst_height,
50 int32_t src_x = 0, src_y = 0;
51 uint32_t copy_w, copy_h;
52
53- if (!pointer || !pointer->cursor.buffer || !pointer->cursor.view.buffer)
54+ if (!pointer || !pointer->cursor.buffer || !pointer->cursor.view.buffer) {
55 return;
56+ }
57
58- if (!(pointer->cursor.view.screens & screen_mask(screen)))
59+ if (!(pointer->cursor.view.screens & screen_mask(screen))) {
60 return;
61+ }
62
63 cursor_buf = pointer->cursor.buffer;
64- if (!wld_map(cursor_buf) || !cursor_buf->map)
65+ if (!wld_map(cursor_buf) || !cursor_buf->map) {
66 return;
67+ }
68
69 dst_x = pointer->cursor.view.geometry.x - screen->base.geometry.x;
70 dst_y = pointer->cursor.view.geometry.y - screen->base.geometry.y;
71@@ -89,24 +88,32 @@ cursor(uint8_t *dst, uint32_t dst_width, uint32_t dst_height,
72 }
73
74 copy_w = cursor_buf->width - (uint32_t)src_x;
75- if (copy_w > dst_width - (uint32_t)dst_x)
76+ if (copy_w > dst_width - (uint32_t)dst_x) {
77 copy_w = dst_width - (uint32_t)dst_x;
78+ }
79 copy_h = cursor_buf->height - (uint32_t)src_y;
80- if (copy_h > dst_height - (uint32_t)dst_y)
81+ if (copy_h > dst_height - (uint32_t)dst_y) {
82 copy_h = dst_height - (uint32_t)dst_y;
83+ }
84
85 src = cursor_buf->map;
86
87 for (uint32_t y = 0; y < copy_h; y++) {
88- const uint32_t *src_row = (const uint32_t *)(src + ((size_t)(src_y + (int32_t)y) * cursor_buf->pitch)) + src_x;
89- uint32_t *dst_row = (uint32_t *)(dst + ((size_t)(dst_y + (int32_t)y) * dst_pitch)) + dst_x;
90+ const uint32_t *src_row =
91+ (const uint32_t *)(src + ((size_t)(src_y + (int32_t)y) *
92+ cursor_buf->pitch)) +
93+ src_x;
94+ uint32_t *dst_row =
95+ (uint32_t *)(dst + ((size_t)(dst_y + (int32_t)y) * dst_pitch)) +
96+ dst_x;
97
98 for (uint32_t x = 0; x < copy_w; x++) {
99 uint32_t src_px = src_row[x];
100 uint32_t a = src_px >> 24;
101
102- if (a == 0)
103+ if (a == 0) {
104 continue;
105+ }
106 if (a == 255) {
107 dst_row[x] = 0xFF000000 | (src_px & 0x00FFFFFF);
108 continue;
109@@ -114,9 +121,12 @@ cursor(uint8_t *dst, uint32_t dst_width, uint32_t dst_height,
110
111 uint32_t dst_px = dst_row[x];
112 uint32_t inv = 255 - a;
113- uint32_t r = ((src_px >> 16) & 0xFF) + ((((dst_px >> 16) & 0xFF) * inv + 127) / 255);
114- uint32_t g = ((src_px >> 8) & 0xFF) + ((((dst_px >> 8) & 0xFF) * inv + 127) / 255);
115- uint32_t b = (src_px & 0xFF) + (((dst_px & 0xFF) * inv + 127) / 255);
116+ uint32_t r = ((src_px >> 16) & 0xFF) +
117+ ((((dst_px >> 16) & 0xFF) * inv + 127) / 255);
118+ uint32_t g = ((src_px >> 8) & 0xFF) +
119+ ((((dst_px >> 8) & 0xFF) * inv + 127) / 255);
120+ uint32_t b =
121+ (src_px & 0xFF) + (((dst_px & 0xFF) * inv + 127) / 255);
122
123 dst_row[x] = 0xFF000000 | (r << 16) | (g << 8) | b;
124 }
125@@ -170,7 +180,7 @@ capture(struct wl_client *client, struct wl_resource *resource, int32_t fd)
126 }
127
128 static const struct swc_snap_interface snap_impl = {
129- .capture = capture,
130+ .capture = capture,
131 };
132
133 static void
+2,
-1
1@@ -4,6 +4,7 @@
2 struct wl_display;
3 struct wl_global;
4
5-struct wl_global *snap_manager_create(struct wl_display *display);
6+struct wl_global *
7+snap_manager_create(struct wl_display *display);
8
9 #endif
+22,
-13
1@@ -21,11 +21,11 @@
2 * SOFTWARE.
3 */
4
5-#include "swc.h"
6-#include "internal.h"
7 #include "subcompositor.h"
8+#include "internal.h"
9 #include "subsurface.h"
10 #include "surface.h"
11+#include "swc.h"
12 #include "util.h"
13
14 static bool
15@@ -33,8 +33,9 @@ is_descendant_of(struct surface *ancestor, struct surface *surface)
16 {
17 while (surface && surface->subsurface) {
18 surface = surface->subsurface->parent;
19- if (surface == ancestor)
20+ if (surface == ancestor) {
21 return true;
22+ }
23 }
24
25 return false;
26@@ -42,28 +43,33 @@ is_descendant_of(struct surface *ancestor, struct surface *surface)
27
28 static void
29 get_subsurface(struct wl_client *client, struct wl_resource *resource,
30- uint32_t id, struct wl_resource *surface_resource, struct wl_resource *parent_resource)
31+ uint32_t id, struct wl_resource *surface_resource,
32+ struct wl_resource *parent_resource)
33 {
34 struct subsurface *subsurface;
35 struct surface *surface = wl_resource_get_user_data(surface_resource);
36 struct surface *parent = wl_resource_get_user_data(parent_resource);
37
38 if (!surface || !parent) {
39- wl_resource_post_error(resource, WL_SUBCOMPOSITOR_ERROR_BAD_SURFACE, "invalid surface");
40+ wl_resource_post_error(resource, WL_SUBCOMPOSITOR_ERROR_BAD_SURFACE,
41+ "invalid surface");
42 return;
43 }
44
45 if (surface == parent || is_descendant_of(surface, parent)) {
46- wl_resource_post_error(resource, WL_SUBCOMPOSITOR_ERROR_BAD_PARENT, "invalid parent surface");
47+ wl_resource_post_error(resource, WL_SUBCOMPOSITOR_ERROR_BAD_PARENT,
48+ "invalid parent surface");
49 return;
50 }
51
52 if (surface->subsurface) {
53- wl_resource_post_error(resource, WL_SUBCOMPOSITOR_ERROR_BAD_SURFACE, "surface already has a subsurface role");
54+ wl_resource_post_error(resource, WL_SUBCOMPOSITOR_ERROR_BAD_SURFACE,
55+ "surface already has a subsurface role");
56 return;
57 }
58
59- subsurface = subsurface_new(client, wl_resource_get_version(resource), id, surface, parent);
60+ subsurface = subsurface_new(client, wl_resource_get_version(resource), id,
61+ surface, parent);
62
63 if (!subsurface) {
64 wl_resource_post_no_memory(resource);
65@@ -73,16 +79,18 @@ get_subsurface(struct wl_client *client, struct wl_resource *resource,
66 }
67
68 static const struct wl_subcompositor_interface subcompositor_impl = {
69- .destroy = destroy_resource,
70- .get_subsurface = get_subsurface,
71+ .destroy = destroy_resource,
72+ .get_subsurface = get_subsurface,
73 };
74
75 static void
76-bind_subcompositor(struct wl_client *client, void *data, uint32_t version, uint32_t id)
77+bind_subcompositor(struct wl_client *client, void *data, uint32_t version,
78+ uint32_t id)
79 {
80 struct wl_resource *resource;
81
82- resource = wl_resource_create(client, &wl_subcompositor_interface, version, id);
83+ resource =
84+ wl_resource_create(client, &wl_subcompositor_interface, version, id);
85 if (!resource) {
86 wl_client_post_no_memory(client);
87 return;
88@@ -93,5 +101,6 @@ bind_subcompositor(struct wl_client *client, void *data, uint32_t version, uint3
89 struct wl_global *
90 subcompositor_create(struct wl_display *display)
91 {
92- return wl_global_create(display, &wl_subcompositor_interface, 1, NULL, &bind_subcompositor);
93+ return wl_global_create(display, &wl_subcompositor_interface, 1, NULL,
94+ &bind_subcompositor);
95 }
+2,
-1
1@@ -26,6 +26,7 @@
2
3 struct wl_display;
4
5-struct wl_global *subcompositor_create(struct wl_display *display);
6+struct wl_global *
7+subcompositor_create(struct wl_display *display);
8
9 #endif
+147,
-81
1@@ -34,10 +34,12 @@ bool
2 subsurface_is_synchronized(const struct subsurface *subsurface)
3 {
4 while (subsurface) {
5- if (subsurface->sync)
6+ if (subsurface->sync) {
7 return true;
8- if (!subsurface->parent)
9+ }
10+ if (!subsurface->parent) {
11 return false;
12+ }
13 subsurface = subsurface->parent->subsurface;
14 }
15
16@@ -50,17 +52,21 @@ subsurface_update_position(struct subsurface *subsurface)
17 struct compositor_view *parent_view;
18 struct compositor_view *view;
19
20- if (!subsurface->surface || !subsurface->parent)
21+ if (!subsurface->surface || !subsurface->parent) {
22 return;
23+ }
24
25 view = compositor_view(subsurface->surface->view);
26 parent_view = compositor_view(subsurface->parent->view);
27- if (!view || !parent_view)
28+ if (!view || !parent_view) {
29 return;
30+ }
31
32 view_move(&view->base,
33- parent_view->base.geometry.x + subsurface->x - parent_view->buffer_offset_x,
34- parent_view->base.geometry.y + subsurface->y - parent_view->buffer_offset_y);
35+ parent_view->base.geometry.x + subsurface->x -
36+ parent_view->buffer_offset_x,
37+ parent_view->base.geometry.y + subsurface->y -
38+ parent_view->buffer_offset_y);
39 }
40
41 static void
42@@ -78,29 +84,35 @@ subsurface_update_visibility(struct subsurface *subsurface)
43 struct compositor_view *view;
44 struct compositor_view *parent_view;
45
46- if (!subsurface || !subsurface->surface || !subsurface->parent)
47+ if (!subsurface || !subsurface->surface || !subsurface->parent) {
48 return;
49+ }
50
51 view = compositor_view(subsurface->surface->view);
52 parent_view = compositor_view(subsurface->parent->view);
53- if (!view || !parent_view)
54+ if (!view || !parent_view) {
55 return;
56+ }
57
58- if (subsurface->added && parent_view->visible && subsurface->surface->state.buffer)
59+ if (subsurface->added && parent_view->visible &&
60+ subsurface->surface->state.buffer) {
61 compositor_view_show(view);
62- else
63+ } else {
64 compositor_view_hide(view);
65+ }
66 }
67
68 static void
69 handle_parent_view_change(struct view_handler *handler)
70 {
71- struct subsurface *subsurface = wl_container_of(handler, subsurface, parent_view_handler);
72+ struct subsurface *subsurface =
73+ wl_container_of(handler, subsurface, parent_view_handler);
74 subsurface_update_position(subsurface);
75 }
76
77 static void
78-handle_parent_view_resize(struct view_handler *handler, uint32_t old_width, uint32_t old_height)
79+handle_parent_view_resize(struct view_handler *handler, uint32_t old_width,
80+ uint32_t old_height)
81 {
82 (void)old_width;
83 (void)old_height;
84@@ -108,9 +120,9 @@ handle_parent_view_resize(struct view_handler *handler, uint32_t old_width, uint
85 }
86
87 static const struct view_handler_impl parent_view_handler_impl = {
88- .attach = handle_parent_view_change,
89- .move = handle_parent_view_change,
90- .resize = handle_parent_view_resize,
91+ .attach = handle_parent_view_change,
92+ .move = handle_parent_view_change,
93+ .resize = handle_parent_view_resize,
94 };
95
96 static struct subsurface *
97@@ -119,17 +131,24 @@ subsurface_find_sibling(struct subsurface *subsurface, struct surface *surface)
98 struct surface *parent = subsurface->parent;
99 struct subsurface *sibling;
100
101- if (!parent)
102+ if (!parent) {
103 return NULL;
104+ }
105
106- wl_list_for_each (sibling, &parent->pending.state.subsurfaces_below, pending_link) {
107- if (sibling->surface == surface && sibling != subsurface)
108+ wl_list_for_each(sibling, &parent->pending.state.subsurfaces_below,
109+ pending_link)
110+ {
111+ if (sibling->surface == surface && sibling != subsurface) {
112 return sibling;
113+ }
114 }
115
116- wl_list_for_each (sibling, &parent->pending.state.subsurfaces_above, pending_link) {
117- if (sibling->surface == surface && sibling != subsurface)
118+ wl_list_for_each(sibling, &parent->pending.state.subsurfaces_above,
119+ pending_link)
120+ {
121+ if (sibling->surface == surface && sibling != subsurface) {
122 return sibling;
123+ }
124 }
125
126 return NULL;
127@@ -141,8 +160,10 @@ is_valid_sibling(struct subsurface *subsurface, struct surface *sibling_surface,
128 {
129 struct subsurface *sibling;
130
131- if (!subsurface->parent || !sibling_surface || sibling_surface == subsurface->surface)
132+ if (!subsurface->parent || !sibling_surface ||
133+ sibling_surface == subsurface->surface) {
134 return false;
135+ }
136
137 if (sibling_surface == subsurface->parent) {
138 *sibling_subsurface = NULL;
139@@ -150,8 +171,9 @@ is_valid_sibling(struct subsurface *subsurface, struct surface *sibling_surface,
140 }
141
142 sibling = subsurface_find_sibling(subsurface, sibling_surface);
143- if (!sibling)
144+ if (!sibling) {
145 return false;
146+ }
147
148 *sibling_subsurface = sibling;
149 return true;
150@@ -161,20 +183,24 @@ static void
151 handle_surface_destroy(struct wl_listener *listener, void *data)
152 {
153 (void)data;
154- struct subsurface *subsurface = wl_container_of(listener, subsurface, surface_destroy_listener);
155- if (subsurface->resource)
156+ struct subsurface *subsurface =
157+ wl_container_of(listener, subsurface, surface_destroy_listener);
158+ if (subsurface->resource) {
159 wl_resource_destroy(subsurface->resource);
160+ }
161 }
162
163 static void
164 handle_parent_destroy(struct wl_listener *listener, void *data)
165 {
166 (void)data;
167- struct subsurface *subsurface = wl_container_of(listener, subsurface, parent_destroy_listener);
168+ struct subsurface *subsurface =
169+ wl_container_of(listener, subsurface, parent_destroy_listener);
170 struct compositor_view *view = NULL;
171
172- if (subsurface->surface && subsurface->surface->view)
173+ if (subsurface->surface && subsurface->surface->view) {
174 view = compositor_view(subsurface->surface->view);
175+ }
176
177 if (view) {
178 view->parent = NULL;
179@@ -198,7 +224,8 @@ handle_parent_destroy(struct wl_listener *listener, void *data)
180 }
181
182 static void
183-set_position(struct wl_client *client, struct wl_resource *resource, int32_t x, int32_t y)
184+set_position(struct wl_client *client, struct wl_resource *resource, int32_t x,
185+ int32_t y)
186 {
187 (void)client;
188 struct subsurface *subsurface = wl_resource_get_user_data(resource);
189@@ -209,46 +236,56 @@ set_position(struct wl_client *client, struct wl_resource *resource, int32_t x,
190 }
191
192 static void
193-place_above(struct wl_client *client, struct wl_resource *resource, struct wl_resource *sibling_resource)
194+place_above(struct wl_client *client, struct wl_resource *resource,
195+ struct wl_resource *sibling_resource)
196 {
197 (void)client;
198 struct subsurface *subsurface = wl_resource_get_user_data(resource);
199- struct surface *sibling_surface = wl_resource_get_user_data(sibling_resource);
200+ struct surface *sibling_surface =
201+ wl_resource_get_user_data(sibling_resource);
202 struct subsurface *sibling_subsurface;
203
204 if (!is_valid_sibling(subsurface, sibling_surface, &sibling_subsurface)) {
205- wl_resource_post_error(resource, WL_SUBSURFACE_ERROR_BAD_SURFACE, "invalid sibling surface");
206+ wl_resource_post_error(resource, WL_SUBSURFACE_ERROR_BAD_SURFACE,
207+ "invalid sibling surface");
208 return;
209 }
210
211 if (!sibling_subsurface) {
212 wl_list_remove(&subsurface->pending_link);
213- wl_list_insert(&subsurface->parent->pending.state.subsurfaces_above, &subsurface->pending_link);
214+ wl_list_insert(&subsurface->parent->pending.state.subsurfaces_above,
215+ &subsurface->pending_link);
216 } else {
217 wl_list_remove(&subsurface->pending_link);
218- wl_list_insert(&sibling_subsurface->pending_link, &subsurface->pending_link);
219+ wl_list_insert(&sibling_subsurface->pending_link,
220+ &subsurface->pending_link);
221 }
222 }
223
224 static void
225-place_below(struct wl_client *client, struct wl_resource *resource, struct wl_resource *sibling_resource)
226+place_below(struct wl_client *client, struct wl_resource *resource,
227+ struct wl_resource *sibling_resource)
228 {
229 (void)client;
230 struct subsurface *subsurface = wl_resource_get_user_data(resource);
231- struct surface *sibling_surface = wl_resource_get_user_data(sibling_resource);
232+ struct surface *sibling_surface =
233+ wl_resource_get_user_data(sibling_resource);
234 struct subsurface *sibling_subsurface;
235
236 if (!is_valid_sibling(subsurface, sibling_surface, &sibling_subsurface)) {
237- wl_resource_post_error(resource, WL_SUBSURFACE_ERROR_BAD_SURFACE, "invalid sibling surface");
238+ wl_resource_post_error(resource, WL_SUBSURFACE_ERROR_BAD_SURFACE,
239+ "invalid sibling surface");
240 return;
241 }
242
243 if (!sibling_subsurface) {
244 wl_list_remove(&subsurface->pending_link);
245- wl_list_insert(subsurface->parent->pending.state.subsurfaces_below.prev, &subsurface->pending_link);
246+ wl_list_insert(subsurface->parent->pending.state.subsurfaces_below.prev,
247+ &subsurface->pending_link);
248 } else {
249 wl_list_remove(&subsurface->pending_link);
250- wl_list_insert(sibling_subsurface->pending_link.prev, &subsurface->pending_link);
251+ wl_list_insert(sibling_subsurface->pending_link.prev,
252+ &subsurface->pending_link);
253 }
254 }
255
256@@ -269,11 +306,8 @@ set_desync(struct wl_client *client, struct wl_resource *resource)
257
258 subsurface->sync = false;
259
260- if (synchronized
261- && !subsurface_is_synchronized(subsurface)
262- && subsurface->pending
263- && subsurface->surface)
264- {
265+ if (synchronized && !subsurface_is_synchronized(subsurface) &&
266+ subsurface->pending && subsurface->surface) {
267 surface_commit_pending(subsurface->surface);
268 }
269 }
270@@ -286,53 +320,67 @@ subsurface_parent_commit(struct surface *parent)
271 struct compositor_view *reference;
272 struct compositor_view *child_view;
273
274- if (!parent)
275+ if (!parent) {
276 return;
277+ }
278
279- wl_list_for_each (child, &parent->subsurfaces, link)
280- list_remove_if_linked(&child->current_link);
281+ wl_list_for_each(child, &parent->subsurfaces, link)
282+ list_remove_if_linked(&child->current_link);
283
284 wl_list_init(&parent->state.subsurfaces_below);
285 wl_list_init(&parent->state.subsurfaces_above);
286
287- wl_list_for_each (child, &parent->pending.state.subsurfaces_below, pending_link)
288- wl_list_insert(parent->state.subsurfaces_below.prev, &child->current_link);
289+ wl_list_for_each(child, &parent->pending.state.subsurfaces_below,
290+ pending_link)
291+ wl_list_insert(parent->state.subsurfaces_below.prev,
292+ &child->current_link);
293
294- wl_list_for_each (child, &parent->pending.state.subsurfaces_above, pending_link)
295- wl_list_insert(parent->state.subsurfaces_above.prev, &child->current_link);
296+ wl_list_for_each(child, &parent->pending.state.subsurfaces_above,
297+ pending_link)
298+ wl_list_insert(parent->state.subsurfaces_above.prev,
299+ &child->current_link);
300
301 parent_view = parent->view ? compositor_view(parent->view) : NULL;
302 if (parent_view) {
303 reference = parent_view;
304- wl_list_for_each_reverse (child, &parent->state.subsurfaces_below, current_link) {
305- if (!child->surface || !child->surface->view)
306+ wl_list_for_each_reverse(child, &parent->state.subsurfaces_below,
307+ current_link)
308+ {
309+ if (!child->surface || !child->surface->view) {
310 continue;
311+ }
312
313 child_view = compositor_view(child->surface->view);
314- if (!child_view)
315+ if (!child_view) {
316 continue;
317+ }
318
319 compositor_view_restack(child_view, reference, false);
320 reference = child_view;
321 }
322
323 reference = parent_view;
324- wl_list_for_each (child, &parent->state.subsurfaces_above, current_link) {
325- if (!child->surface || !child->surface->view)
326+ wl_list_for_each(child, &parent->state.subsurfaces_above, current_link)
327+ {
328+ if (!child->surface || !child->surface->view) {
329 continue;
330+ }
331
332 child_view = compositor_view(child->surface->view);
333- if (!child_view)
334+ if (!child_view) {
335 continue;
336+ }
337
338 compositor_view_restack(child_view, reference, true);
339 reference = child_view;
340 }
341 }
342
343- wl_list_for_each (child, &parent->subsurfaces, link) {
344- if (!child->pending_position)
345+ wl_list_for_each(child, &parent->subsurfaces, link)
346+ {
347+ if (!child->pending_position) {
348 continue;
349+ }
350
351 child->x = child->pending_x;
352 child->y = child->pending_y;
353@@ -340,25 +388,29 @@ subsurface_parent_commit(struct surface *parent)
354 subsurface_update_position(child);
355 }
356
357- wl_list_for_each (child, &parent->state.subsurfaces_below, current_link) {
358- if (!child->added)
359+ wl_list_for_each(child, &parent->state.subsurfaces_below, current_link)
360+ {
361+ if (!child->added) {
362 child->added = true;
363+ }
364 subsurface_update_visibility(child);
365 }
366- wl_list_for_each (child, &parent->state.subsurfaces_above, current_link) {
367- if (!child->added)
368+ wl_list_for_each(child, &parent->state.subsurfaces_above, current_link)
369+ {
370+ if (!child->added) {
371 child->added = true;
372+ }
373 subsurface_update_visibility(child);
374 }
375 }
376
377 static const struct wl_subsurface_interface subsurface_impl = {
378- .destroy = destroy_resource,
379- .set_position = set_position,
380- .place_above = place_above,
381- .place_below = place_below,
382- .set_sync = set_sync,
383- .set_desync = set_desync,
384+ .destroy = destroy_resource,
385+ .set_position = set_position,
386+ .place_above = place_above,
387+ .place_below = place_below,
388+ .set_sync = set_sync,
389+ .set_desync = set_desync,
390 };
391
392 static void
393@@ -367,8 +419,9 @@ subsurface_destroy(struct wl_resource *resource)
394 struct subsurface *subsurface = wl_resource_get_user_data(resource);
395
396 if (subsurface->surface) {
397- if (subsurface->surface->subsurface == subsurface)
398+ if (subsurface->surface->subsurface == subsurface) {
399 subsurface->surface->subsurface = NULL;
400+ }
401 }
402
403 if (!wl_list_empty(&subsurface->parent_destroy_listener.link)) {
404@@ -394,9 +447,11 @@ subsurface_destroy(struct wl_resource *resource)
405 list_remove_if_linked(&subsurface->current_link);
406
407 if (subsurface->surface && subsurface->surface->view) {
408- struct compositor_view *view = compositor_view(subsurface->surface->view);
409- if (view && !view->window)
410+ struct compositor_view *view =
411+ compositor_view(subsurface->surface->view);
412+ if (view && !view->window) {
413 compositor_view_destroy(view);
414+ }
415 }
416
417 free(subsurface);
418@@ -410,15 +465,19 @@ subsurface_new(struct wl_client *client, uint32_t version, uint32_t id,
419 struct compositor_view *parent_view;
420 struct compositor_view *view;
421
422- if (!(subsurface = malloc(sizeof(*subsurface))))
423+ if (!(subsurface = malloc(sizeof(*subsurface)))) {
424 goto error0;
425+ }
426
427- subsurface->resource = wl_resource_create(client, &wl_subsurface_interface, version, id);
428+ subsurface->resource =
429+ wl_resource_create(client, &wl_subsurface_interface, version, id);
430
431- if (!subsurface->resource)
432+ if (!subsurface->resource) {
433 goto error1;
434+ }
435
436- wl_resource_set_implementation(subsurface->resource, &subsurface_impl, subsurface, &subsurface_destroy);
437+ wl_resource_set_implementation(subsurface->resource, &subsurface_impl,
438+ subsurface, &subsurface_destroy);
439
440 subsurface->surface = surface;
441 subsurface->parent = parent;
442@@ -439,30 +498,37 @@ subsurface_new(struct wl_client *client, uint32_t version, uint32_t id,
443 wl_list_init(&subsurface->pending_link);
444 wl_list_init(&subsurface->current_link);
445
446- if (!surface->view)
447+ if (!surface->view) {
448 compositor_create_view(surface);
449- if (!parent->view)
450+ }
451+ if (!parent->view) {
452 compositor_create_view(parent);
453+ }
454
455 parent_view = compositor_view(parent->view);
456 view = compositor_view(surface->view);
457- if (!parent_view || !view)
458+ if (!parent_view || !view) {
459 goto error2;
460+ }
461
462 compositor_view_set_parent(view, parent_view);
463 wl_list_remove(&view->link);
464 wl_list_insert(parent_view->link.prev, &view->link);
465
466- wl_list_insert(&parent_view->base.handlers, &subsurface->parent_view_handler.link);
467+ wl_list_insert(&parent_view->base.handlers,
468+ &subsurface->parent_view_handler.link);
469 subsurface_update_position(subsurface);
470 wl_list_insert(&parent->subsurfaces, &subsurface->link);
471- wl_list_insert(parent->pending.state.subsurfaces_above.prev, &subsurface->pending_link);
472+ wl_list_insert(parent->pending.state.subsurfaces_above.prev,
473+ &subsurface->pending_link);
474 subsurface_update_visibility(subsurface);
475
476 subsurface->surface_destroy_listener.notify = handle_surface_destroy;
477- wl_resource_add_destroy_listener(surface->resource, &subsurface->surface_destroy_listener);
478+ wl_resource_add_destroy_listener(surface->resource,
479+ &subsurface->surface_destroy_listener);
480 subsurface->parent_destroy_listener.notify = handle_parent_destroy;
481- wl_resource_add_destroy_listener(parent->resource, &subsurface->parent_destroy_listener);
482+ wl_resource_add_destroy_listener(parent->resource,
483+ &subsurface->parent_destroy_listener);
484
485 return subsurface;
486
+9,
-5
1@@ -51,11 +51,15 @@ struct subsurface {
2 bool added;
3 };
4
5-bool subsurface_is_synchronized(const struct subsurface *subsurface);
6-void subsurface_update_visibility(struct subsurface *subsurface);
7-void subsurface_parent_commit(struct surface *parent);
8+bool
9+subsurface_is_synchronized(const struct subsurface *subsurface);
10+void
11+subsurface_update_visibility(struct subsurface *subsurface);
12+void
13+subsurface_parent_commit(struct surface *parent);
14
15-struct subsurface *subsurface_new(struct wl_client *client, uint32_t version, uint32_t id,
16- struct surface *surface, struct surface *parent);
17+struct subsurface *
18+subsurface_new(struct wl_client *client, uint32_t version, uint32_t id,
19+ struct surface *surface, struct surface *parent);
20
21 #endif
+109,
-61
1@@ -32,8 +32,8 @@
2 #include "view.h"
3 #include "wayland_buffer.h"
4
5-#include <stdlib.h>
6 #include <stdio.h>
7+#include <stdlib.h>
8 #include <wld/wld.h>
9
10 /**
11@@ -68,16 +68,17 @@ state_finalize(struct surface_state *state)
12 {
13 struct wl_resource *resource, *tmp;
14
15- if (state->buffer)
16+ if (state->buffer) {
17 wl_list_remove(&state->buffer_destroy_listener.link);
18+ }
19
20 pixman_region32_fini(&state->damage);
21 pixman_region32_fini(&state->opaque);
22 pixman_region32_fini(&state->input);
23
24 /* Remove all leftover callbacks. */
25- wl_list_for_each_safe (resource, tmp, &state->frame_callbacks, link)
26- wl_resource_destroy(resource);
27+ wl_list_for_each_safe(resource, tmp, &state->frame_callbacks, link)
28+ wl_resource_destroy(resource);
29 }
30
31 /**
32@@ -89,11 +90,14 @@ state_set_buffer(struct surface_state *state, struct wl_resource *resource)
33 {
34 struct wld_buffer *buffer = resource ? wayland_buffer_get(resource) : NULL;
35
36- if (state->buffer)
37+ if (state->buffer) {
38 wl_list_remove(&state->buffer_destroy_listener.link);
39+ }
40
41- if (buffer)
42- wl_resource_add_destroy_listener(resource, &state->buffer_destroy_listener);
43+ if (buffer) {
44+ wl_resource_add_destroy_listener(resource,
45+ &state->buffer_destroy_listener);
46+ }
47
48 state->buffer = buffer;
49 state->buffer_resource = resource;
50@@ -105,7 +109,8 @@ handle_frame(struct view_handler *handler, uint32_t time)
51 struct surface *surface = wl_container_of(handler, surface, view_handler);
52 struct wl_resource *resource, *tmp;
53
54- wl_list_for_each_safe (resource, tmp, &surface->state.frame_callbacks, link) {
55+ wl_list_for_each_safe(resource, tmp, &surface->state.frame_callbacks, link)
56+ {
57 wl_callback_send_done(resource, time);
58 wl_resource_destroy(resource);
59 }
60@@ -124,26 +129,30 @@ handle_screens(struct view_handler *handler, uint32_t entered, uint32_t left)
61
62 client = wl_resource_get_client(surface->resource);
63
64- wl_list_for_each (screen, &swc.screens, link) {
65- if (!((entered | left) & screen_mask(screen)))
66+ wl_list_for_each(screen, &swc.screens, link)
67+ {
68+ if (!((entered | left) & screen_mask(screen))) {
69 continue;
70+ }
71
72- wl_list_for_each (output, &screen->outputs, link) {
73+ wl_list_for_each(output, &screen->outputs, link)
74+ {
75 resource = wl_resource_find_for_client(&output->resources, client);
76
77 if (resource) {
78- if (entered & screen_mask(screen))
79+ if (entered & screen_mask(screen)) {
80 wl_surface_send_enter(surface->resource, resource);
81- else if (left & screen_mask(screen))
82+ } else if (left & screen_mask(screen)) {
83 wl_surface_send_leave(surface->resource, resource);
84+ }
85 }
86 }
87 }
88 }
89
90 static const struct view_handler_impl view_handler_impl = {
91- .frame = handle_frame,
92- .screens = handle_screens,
93+ .frame = handle_frame,
94+ .screens = handle_screens,
95 };
96
97 static void
98@@ -160,12 +169,15 @@ attach(struct wl_client *client, struct wl_resource *resource,
99 }
100
101 static void
102-damage(struct wl_client *client, struct wl_resource *resource, int32_t x, int32_t y, int32_t width, int32_t height)
103+damage(struct wl_client *client, struct wl_resource *resource, int32_t x,
104+ int32_t y, int32_t width, int32_t height)
105 {
106 struct surface *surface = wl_resource_get_user_data(resource);
107
108 surface->pending.commit |= SURFACE_COMMIT_DAMAGE;
109- pixman_region32_union_rect(&surface->pending.state.damage, &surface->pending.state.damage, x, y, width, height);
110+ pixman_region32_union_rect(&surface->pending.state.damage,
111+ &surface->pending.state.damage, x, y, width,
112+ height);
113 }
114
115 static void
116@@ -174,18 +186,22 @@ frame(struct wl_client *client, struct wl_resource *resource, uint32_t id)
117 struct surface *surface = wl_resource_get_user_data(resource);
118 struct wl_resource *callback_resource;
119
120- callback_resource = wl_resource_create(client, &wl_callback_interface, 1, id);
121+ callback_resource =
122+ wl_resource_create(client, &wl_callback_interface, 1, id);
123 if (!callback_resource) {
124 wl_resource_post_no_memory(resource);
125 return;
126 }
127 surface->pending.commit |= SURFACE_COMMIT_FRAME;
128- wl_resource_set_implementation(callback_resource, NULL, NULL, &remove_resource);
129- wl_list_insert(surface->pending.state.frame_callbacks.prev, wl_resource_get_link(callback_resource));
130+ wl_resource_set_implementation(callback_resource, NULL, NULL,
131+ &remove_resource);
132+ wl_list_insert(surface->pending.state.frame_callbacks.prev,
133+ wl_resource_get_link(callback_resource));
134 }
135
136 static void
137-set_opaque_region(struct wl_client *client, struct wl_resource *resource, struct wl_resource *region_resource)
138+set_opaque_region(struct wl_client *client, struct wl_resource *resource,
139+ struct wl_resource *region_resource)
140 {
141 struct surface *surface = wl_resource_get_user_data(resource);
142
143@@ -200,7 +216,8 @@ set_opaque_region(struct wl_client *client, struct wl_resource *resource, struct
144 }
145
146 static void
147-set_input_region(struct wl_client *client, struct wl_resource *resource, struct wl_resource *region_resource)
148+set_input_region(struct wl_client *client, struct wl_resource *resource,
149+ struct wl_resource *region_resource)
150 {
151 struct surface *surface = wl_resource_get_user_data(resource);
152
153@@ -217,7 +234,9 @@ set_input_region(struct wl_client *client, struct wl_resource *resource, struct
154 static inline void
155 trim_region(pixman_region32_t *region, struct wld_buffer *buffer)
156 {
157- pixman_region32_intersect_rect(region, region, 0, 0, buffer ? buffer->width : 0, buffer ? buffer->height : 0);
158+ pixman_region32_intersect_rect(region, region, 0, 0,
159+ buffer ? buffer->width : 0,
160+ buffer ? buffer->height : 0);
161 }
162
163 static void
164@@ -227,31 +246,40 @@ surface_apply_pending(struct surface *surface, bool flush_children)
165
166 /* Attach */
167 if (surface->pending.commit & SURFACE_COMMIT_ATTACH) {
168- if (surface->state.buffer && surface->state.buffer != surface->pending.state.buffer)
169+ if (surface->state.buffer &&
170+ surface->state.buffer != surface->pending.state.buffer) {
171 wl_buffer_send_release(surface->state.buffer_resource);
172+ }
173
174- state_set_buffer(&surface->state, surface->pending.state.buffer_resource);
175+ state_set_buffer(&surface->state,
176+ surface->pending.state.buffer_resource);
177 }
178
179 buffer = surface->state.buffer;
180
181 /* Damage */
182 if (surface->pending.commit & SURFACE_COMMIT_DAMAGE) {
183- pixman_region32_union(&surface->state.damage, &surface->state.damage, &surface->pending.state.damage);
184+ pixman_region32_union(&surface->state.damage, &surface->state.damage,
185+ &surface->pending.state.damage);
186 pixman_region32_clear(&surface->pending.state.damage);
187 }
188
189 /* Opaque */
190- if (surface->pending.commit & SURFACE_COMMIT_OPAQUE)
191- pixman_region32_copy(&surface->state.opaque, &surface->pending.state.opaque);
192+ if (surface->pending.commit & SURFACE_COMMIT_OPAQUE) {
193+ pixman_region32_copy(&surface->state.opaque,
194+ &surface->pending.state.opaque);
195+ }
196
197 /* Input */
198- if (surface->pending.commit & SURFACE_COMMIT_INPUT)
199- pixman_region32_copy(&surface->state.input, &surface->pending.state.input);
200+ if (surface->pending.commit & SURFACE_COMMIT_INPUT) {
201+ pixman_region32_copy(&surface->state.input,
202+ &surface->pending.state.input);
203+ }
204
205 /* Frame */
206 if (surface->pending.commit & SURFACE_COMMIT_FRAME) {
207- wl_list_insert_list(&surface->state.frame_callbacks, &surface->pending.state.frame_callbacks);
208+ wl_list_insert_list(&surface->state.frame_callbacks,
209+ &surface->pending.state.frame_callbacks);
210 wl_list_init(&surface->pending.state.frame_callbacks);
211 }
212
213@@ -259,28 +287,34 @@ surface_apply_pending(struct surface *surface, bool flush_children)
214 trim_region(&surface->state.opaque, buffer);
215
216 if (surface->view) {
217- if (surface->pending.commit & SURFACE_COMMIT_ATTACH)
218+ if (surface->pending.commit & SURFACE_COMMIT_ATTACH) {
219 view_attach(surface->view, buffer);
220+ }
221 view_update(surface->view);
222 }
223
224 surface->pending.commit = 0;
225
226- if (surface->subsurface)
227+ if (surface->subsurface) {
228 surface->subsurface->pending = false;
229+ }
230
231- if (surface->subsurface)
232+ if (surface->subsurface) {
233 subsurface_update_visibility(surface->subsurface);
234+ }
235
236 subsurface_parent_commit(surface);
237
238 if (flush_children) {
239 struct subsurface *child;
240- wl_list_for_each (child, &surface->subsurfaces, link) {
241- if (!child->pending || !subsurface_is_synchronized(child))
242+ wl_list_for_each(child, &surface->subsurfaces, link)
243+ {
244+ if (!child->pending || !subsurface_is_synchronized(child)) {
245 continue;
246- if (child->surface)
247+ }
248+ if (child->surface) {
249 surface_apply_pending(child->surface, true);
250+ }
251 }
252 }
253 }
254@@ -290,7 +324,8 @@ commit(struct wl_client *client, struct wl_resource *resource)
255 {
256 struct surface *surface = wl_resource_get_user_data(resource);
257
258- if (surface->subsurface && subsurface_is_synchronized(surface->subsurface)) {
259+ if (surface->subsurface &&
260+ subsurface_is_synchronized(surface->subsurface)) {
261 surface->subsurface->pending = true;
262 return;
263 }
264@@ -299,38 +334,44 @@ commit(struct wl_client *client, struct wl_resource *resource)
265 }
266
267 static void
268-set_buffer_transform(struct wl_client *client, struct wl_resource *surface, int32_t transform)
269+set_buffer_transform(struct wl_client *client, struct wl_resource *surface,
270+ int32_t transform)
271 {
272 if (transform != WL_OUTPUT_TRANSFORM_NORMAL) {
273 wl_resource_post_error(surface, WL_SURFACE_ERROR_INVALID_TRANSFORM,
274- "buffer transform %" PRId32 " not supported", transform);
275+ "buffer transform %" PRId32 " not supported",
276+ transform);
277 }
278 }
279
280 static void
281-set_buffer_scale(struct wl_client *client, struct wl_resource *surface, int32_t scale)
282+set_buffer_scale(struct wl_client *client, struct wl_resource *surface,
283+ int32_t scale)
284 {
285- if (scale != 1)
286- wl_resource_post_error(surface, WL_SURFACE_ERROR_INVALID_SCALE, "buffer scale not supported");
287+ if (scale != 1) {
288+ wl_resource_post_error(surface, WL_SURFACE_ERROR_INVALID_SCALE,
289+ "buffer scale not supported");
290+ }
291 }
292
293 static void
294-damage_buffer(struct wl_client *client, struct wl_resource *surface, int32_t x, int32_t y, int32_t w, int32_t h)
295+damage_buffer(struct wl_client *client, struct wl_resource *surface, int32_t x,
296+ int32_t y, int32_t w, int32_t h)
297 {
298 damage(client, surface, x, y, w, h);
299 }
300
301 static const struct wl_surface_interface surface_impl = {
302- .destroy = destroy_resource,
303- .attach = attach,
304- .damage = damage,
305- .frame = frame,
306- .set_opaque_region = set_opaque_region,
307- .set_input_region = set_input_region,
308- .commit = commit,
309- .set_buffer_transform = set_buffer_transform,
310- .set_buffer_scale = set_buffer_scale,
311- .damage_buffer = damage_buffer,
312+ .destroy = destroy_resource,
313+ .attach = attach,
314+ .damage = damage,
315+ .frame = frame,
316+ .set_opaque_region = set_opaque_region,
317+ .set_input_region = set_input_region,
318+ .commit = commit,
319+ .set_buffer_transform = set_buffer_transform,
320+ .set_buffer_scale = set_buffer_scale,
321+ .damage_buffer = damage_buffer,
322 };
323
324 static void
325@@ -341,8 +382,9 @@ surface_destroy(struct wl_resource *resource)
326 state_finalize(&surface->state);
327 state_finalize(&surface->pending.state);
328
329- if (surface->view)
330+ if (surface->view) {
331 wl_list_remove(&surface->view_handler.link);
332+ }
333
334 free(surface);
335 }
336@@ -360,13 +402,17 @@ surface_new(struct wl_client *client, uint32_t version, uint32_t id)
337 struct surface *surface;
338
339 surface = malloc(sizeof(*surface));
340- if (!surface)
341+ if (!surface) {
342 goto error0;
343+ }
344
345- surface->resource = wl_resource_create(client, &wl_surface_interface, version, id);
346- if (!surface->resource)
347+ surface->resource =
348+ wl_resource_create(client, &wl_surface_interface, version, id);
349+ if (!surface->resource) {
350 goto error1;
351- wl_resource_set_implementation(surface->resource, &surface_impl, surface, &surface_destroy);
352+ }
353+ wl_resource_set_implementation(surface->resource, &surface_impl, surface,
354+ &surface_destroy);
355
356 /* Initialize the surface. */
357 surface->pending.commit = 0;
358@@ -395,11 +441,13 @@ error0:
359 void
360 surface_set_view(struct surface *surface, struct view *view)
361 {
362- if (surface->view == view)
363+ if (surface->view == view) {
364 return;
365+ }
366
367- if (surface->view)
368+ if (surface->view) {
369 wl_list_remove(&surface->view_handler.link);
370+ }
371
372 surface->view = view;
373
+6,
-3
1@@ -82,8 +82,11 @@ struct surface {
2 bool window_geometry_applied;
3 };
4
5-struct surface *surface_new(struct wl_client *client, uint32_t version, uint32_t id);
6-void surface_set_view(struct surface *surface, struct view *view);
7-void surface_commit_pending(struct surface *surface);
8+struct surface *
9+surface_new(struct wl_client *client, uint32_t version, uint32_t id);
10+void
11+surface_set_view(struct surface *surface, struct view *view);
12+void
13+surface_commit_pending(struct surface *surface);
14
15 #endif
+36,
-21
1@@ -28,9 +28,9 @@
2 #include "drm.h"
3 #include "event.h"
4 #include "internal.h"
5-#include "launch.h"
6 #include "kde_decoration.h"
7 #include "keyboard.h"
8+#include "launch.h"
9 #include "panel_manager.h"
10 #include "pointer.h"
11 #include "screen.h"
12@@ -45,7 +45,7 @@
13 #include "xdg_decoration.h"
14 #include "xdg_shell.h"
15 #ifdef ENABLE_XWAYLAND
16-# include "xserver.h"
17+#include "xserver.h"
18 #endif
19
20 extern struct swc_launch swc_launch;
21@@ -59,11 +59,11 @@ extern struct swc_xserver swc_xserver;
22 extern struct pointer_handler screens_pointer_handler;
23
24 struct swc swc = {
25- .bindings = &swc_bindings,
26- .compositor = &swc_compositor,
27- .drm = &swc_drm,
28+ .bindings = &swc_bindings,
29+ .compositor = &swc_compositor,
30+ .drm = &swc_drm,
31 #ifdef ENABLE_XWAYLAND
32- .xserver = &swc_xserver,
33+ .xserver = &swc_xserver,
34 #endif
35 };
36
37@@ -74,18 +74,24 @@ setup_compositor(void)
38 struct screen *screen;
39 struct swc_rectangle *geom;
40
41- wl_list_insert(&swc.seat->keyboard->handlers, &swc.bindings->keyboard_handler->link);
42- wl_list_insert(&swc.seat->pointer->handlers, &swc.bindings->pointer_handler->link);
43- wl_list_insert(&swc.seat->pointer->handlers, &swc.compositor->pointer_handler->link);
44+ wl_list_insert(&swc.seat->keyboard->handlers,
45+ &swc.bindings->keyboard_handler->link);
46+ wl_list_insert(&swc.seat->pointer->handlers,
47+ &swc.bindings->pointer_handler->link);
48+ wl_list_insert(&swc.seat->pointer->handlers,
49+ &swc.compositor->pointer_handler->link);
50 wl_list_insert(&swc.seat->pointer->handlers, &screens_pointer_handler.link);
51- wl_signal_add(&swc.seat->pointer->focus.event_signal, &window_enter_listener);
52+ wl_signal_add(&swc.seat->pointer->focus.event_signal,
53+ &window_enter_listener);
54
55 /* Calculate pointer region */
56 pixman_region32_init(&pointer_region);
57
58- wl_list_for_each (screen, &swc.screens, link) {
59+ wl_list_for_each(screen, &swc.screens, link)
60+ {
61 geom = &screen->base.geometry;
62- pixman_region32_union_rect(&pointer_region, &pointer_region, geom->x, geom->y, geom->width, geom->height);
63+ pixman_region32_union_rect(&pointer_region, &pointer_region, geom->x,
64+ geom->y, geom->width, geom->height);
65 }
66
67 pointer_set_region(swc.seat->pointer, &pointer_region);
68@@ -97,8 +103,9 @@ swc_activate(void)
69 {
70 swc.active = true;
71 send_event(&swc.event_signal, SWC_EVENT_ACTIVATED, NULL);
72- if (swc.manager->activate)
73+ if (swc.manager->activate) {
74 swc.manager->activate();
75+ }
76 }
77
78 void
79@@ -106,34 +113,42 @@ swc_deactivate(void)
80 {
81 swc.active = false;
82 send_event(&swc.event_signal, SWC_EVENT_DEACTIVATED, NULL);
83- if (swc.manager->deactivate)
84+ if (swc.manager->deactivate) {
85 swc.manager->deactivate();
86+ }
87 }
88
89 EXPORT bool
90 swc_cursor_position(int32_t *x, int32_t *y)
91 {
92- if (x)
93+ if (x) {
94 *x = 0;
95- if (y)
96+ }
97+ if (y) {
98 *y = 0;
99+ }
100
101- if (!swc.seat || !swc.seat->pointer)
102+ if (!swc.seat || !swc.seat->pointer) {
103 return false;
104+ }
105
106- if (x)
107+ if (x) {
108 *x = swc.seat->pointer->x;
109- if (y)
110+ }
111+ if (y) {
112 *y = swc.seat->pointer->y;
113+ }
114
115 return true;
116 }
117
118 EXPORT bool
119-swc_initialize(struct wl_display *display, struct wl_event_loop *event_loop, const struct swc_manager *manager)
120+swc_initialize(struct wl_display *display, struct wl_event_loop *event_loop,
121+ const struct swc_manager *manager)
122 {
123 swc.display = display;
124- swc.event_loop = event_loop ? event_loop : wl_display_get_event_loop(display);
125+ swc.event_loop =
126+ event_loop ? event_loop : wl_display_get_event_loop(display);
127 swc.manager = manager;
128 const char *default_seat = "seat0";
129 wl_signal_init(&swc.event_signal);
+103,
-50
1@@ -45,7 +45,8 @@ struct wld_buffer;
2 * wayland headers
3 *
4 */
5-bool swc_cursor_position(int32_t *x, int32_t *y);
6+bool
7+swc_cursor_position(int32_t *x, int32_t *y);
8
9 /**
10 * Send a pointer button event to the currently focused client.
11@@ -53,7 +54,8 @@ bool swc_cursor_position(int32_t *x, int32_t *y);
12 * This is intended for window managers which intercept button events (for
13 * example for mouse chords) but want normal clicks to still reach clients.
14 */
15-void swc_pointer_send_button(uint32_t time, uint32_t button, uint32_t state);
16+void
17+swc_pointer_send_button(uint32_t time, uint32_t button, uint32_t state);
18
19 /**
20 * Send a pointer axis event to the currently focused client.
21@@ -63,7 +65,8 @@ void swc_pointer_send_button(uint32_t time, uint32_t button, uint32_t state);
22 *
23 * value120 uses the wl_pointer "120 units" convention.
24 */
25-void swc_pointer_send_axis(uint32_t time, uint32_t axis, int32_t value120);
26+void
27+swc_pointer_send_axis(uint32_t time, uint32_t axis, int32_t value120);
28
29 /* Cursor control (compositor-internal cursor) */
30 enum swc_cursor_kind {
31@@ -85,15 +88,18 @@ enum swc_cursor_mode {
32 /**
33 * override the compositor's internal cursor
34 *
35- * this is intended for window managers to show mode cursors (move/resize/select) like the ones in hevel
36- * If a client has set its own cursor surface, swc may ignore the override.
37+ * this is intended for window managers to show mode cursors
38+ * (move/resize/select) like the ones in hevel If a client has set its own
39+ * cursor surface, swc may ignore the override.
40 */
41-void swc_set_cursor(enum swc_cursor_kind kind);
42+void
43+swc_set_cursor(enum swc_cursor_kind kind);
44
45 /**
46 * control whether client cursor surfaces are honored
47 */
48-void swc_set_cursor_mode(enum swc_cursor_mode mode);
49+void
50+swc_set_cursor_mode(enum swc_cursor_mode mode);
51
52 /**
53 * set a custom argb8888 cursor image for a given kind
54@@ -101,12 +107,13 @@ void swc_set_cursor_mode(enum swc_cursor_mode mode);
55 * `argb8888` is a pointer to `width*height` pixels in ARGB8888 order.
56 * the caller has to keep the pixel memory alive for as long as it may be used
57 */
58-void swc_set_cursor_image(enum swc_cursor_kind kind,
59- const uint32_t *argb8888,
60- uint32_t width, uint32_t height,
61- int32_t hotspot_x, int32_t hotspot_y);
62+void
63+swc_set_cursor_image(enum swc_cursor_kind kind, const uint32_t *argb8888,
64+ uint32_t width, uint32_t height, int32_t hotspot_x,
65+ int32_t hotspot_y);
66
67-void swc_clear_cursor_image(enum swc_cursor_kind kind);
68+void
69+swc_clear_cursor_image(enum swc_cursor_kind kind);
70
71 /**
72 * draw [or update] a simple box overlay
73@@ -115,12 +122,15 @@ void swc_clear_cursor_image(enum swc_cursor_kind kind);
74 * coordinates. this draws only the border. Call swc_overlay_clear() to remove
75 * it
76 */
77-void swc_overlay_set_box(int32_t x1, int32_t y1, int32_t x2, int32_t y2, uint32_t color, uint32_t border_width);
78+void
79+swc_overlay_set_box(int32_t x1, int32_t y1, int32_t x2, int32_t y2,
80+ uint32_t color, uint32_t border_width);
81
82 /**
83 * Clear the current overlay, if any.
84 */
85-void swc_overlay_clear(void);
86+void
87+swc_overlay_clear(void);
88
89 /**
90 * Set the compositor zoom level.
91@@ -128,12 +138,14 @@ void swc_overlay_clear(void);
92 * 1.0 = normal, >1.0 = zoomed in, <1.0 = zoomed out
93 * Uses software (pixman) scaling.
94 */
95-void swc_set_zoom(float level);
96+void
97+swc_set_zoom(float level);
98
99 /**
100 * Get the current zoom level.
101 */
102-float swc_get_zoom(void);
103+float
104+swc_get_zoom(void);
105
106 /* Rectangles {{{ */
107
108@@ -160,8 +172,8 @@ struct swc_screen_handler {
109 void (*geometry_changed)(void *data);
110
111 /**
112- * Called when the geometry of the screen available for laying out windows has
113- * changed.
114+ * Called when the geometry of the screen available for laying out windows
115+ * has changed.
116 *
117 * A window manager should respond by making sure all visible windows are
118 * within this area.
119@@ -189,7 +201,9 @@ struct swc_screen {
120 /**
121 * Set the handler associated with this screen.
122 */
123-void swc_screen_set_handler(struct swc_screen *screen, const struct swc_screen_handler *handler, void *data);
124+void
125+swc_screen_set_handler(struct swc_screen *screen,
126+ const struct swc_screen_handler *handler, void *data);
127
128 /* }}} */
129
130@@ -260,29 +274,35 @@ struct swc_window {
131 /**
132 * Set the handler associated with this window.
133 */
134-void swc_window_set_handler(struct swc_window *window, const struct swc_window_handler *handler, void *data);
135+void
136+swc_window_set_handler(struct swc_window *window,
137+ const struct swc_window_handler *handler, void *data);
138
139 /**
140 * Request that the specified window close.
141 */
142-void swc_window_close(struct swc_window *window);
143+void
144+swc_window_close(struct swc_window *window);
145
146 /**
147 * Make the specified window visible.
148 */
149-void swc_window_show(struct swc_window *window);
150+void
151+swc_window_show(struct swc_window *window);
152
153 /**
154 * Make the specified window hidden.
155 */
156-void swc_window_hide(struct swc_window *window);
157+void
158+swc_window_hide(struct swc_window *window);
159
160 /**
161 * Set the keyboard focus to the specified window.
162 *
163 * If window is NULL, the keyboard will have no focus.
164 */
165-void swc_window_focus(struct swc_window *window);
166+void
167+swc_window_focus(struct swc_window *window);
168
169 /**
170 * Sets the window to stacked mode.
171@@ -293,7 +313,8 @@ void swc_window_focus(struct swc_window *window);
172 *
173 * Use of this mode is required to allow interactive moving and resizing.
174 */
175-void swc_window_set_stacked(struct swc_window *window);
176+void
177+swc_window_set_stacked(struct swc_window *window);
178
179 /**
180 * Sets the window to tiled mode.
181@@ -304,12 +325,14 @@ void swc_window_set_stacked(struct swc_window *window);
182 *
183 * It is invalid to interactively move or resize a window in tiled mode.
184 */
185-void swc_window_set_tiled(struct swc_window *window);
186+void
187+swc_window_set_tiled(struct swc_window *window);
188
189 /**
190 * Sets the window to fullscreen mode.
191 */
192-void swc_window_set_fullscreen(struct swc_window *window, struct swc_screen *screen);
193+void
194+swc_window_set_fullscreen(struct swc_window *window, struct swc_screen *screen);
195
196 /**
197 * Set the window's position.
198@@ -317,7 +340,8 @@ void swc_window_set_fullscreen(struct swc_window *window, struct swc_screen *scr
199 * The x and y coordinates refer to the top-left corner of the actual contents
200 * of the window and should be adjusted for the border size.
201 */
202-void swc_window_set_position(struct swc_window *window, int32_t x, int32_t y);
203+void
204+swc_window_set_position(struct swc_window *window, int32_t x, int32_t y);
205
206 /**
207 * Set the window's size.
208@@ -325,7 +349,8 @@ void swc_window_set_position(struct swc_window *window, int32_t x, int32_t y);
209 * The width and height refer to the dimension of the actual contents of the
210 * window and should be adjusted for the border size.
211 */
212-void swc_window_set_size(struct swc_window *window, uint32_t width, uint32_t height);
213+void
214+swc_window_set_size(struct swc_window *window, uint32_t width, uint32_t height);
215
216 /**
217 * Set the window's size and position.
218@@ -333,39 +358,49 @@ void swc_window_set_size(struct swc_window *window, uint32_t width, uint32_t hei
219 * This is a convenience function that is equivalent to calling
220 * swc_window_set_size and then swc_window_set_position.
221 */
222-void swc_window_set_geometry(struct swc_window *window, const struct swc_rectangle *geometry);
223+void
224+swc_window_set_geometry(struct swc_window *window,
225+ const struct swc_rectangle *geometry);
226
227 /**
228 * Get the window's current geometry in compositor-global coordinates.
229 */
230-bool swc_window_get_geometry(const struct swc_window *window, struct swc_rectangle *geometry);
231+bool
232+swc_window_get_geometry(const struct swc_window *window,
233+ struct swc_rectangle *geometry);
234
235 /**
236 * Get the pid of the client that owns this window
237 *
238 * returns pid, or 0 if unavailable
239 */
240-pid_t swc_window_get_pid(struct swc_window *window);
241+pid_t
242+swc_window_get_pid(struct swc_window *window);
243
244 /**
245 * Set the window's border color and width.
246 *
247 * NOTE: The window's geometry remains unchanged, and should be updated if a
248 * fixed top-left corner of the border is desired.
249- *
250+ *
251 * info from dalem: unsure how much double borders break!
252 */
253-void swc_window_set_border(struct swc_window *window, uint32_t inner_border_color, uint32_t inner_border_width, uint32_t outer_border_color, uint32_t outer_border_width);
254+void
255+swc_window_set_border(struct swc_window *window, uint32_t inner_border_color,
256+ uint32_t inner_border_width, uint32_t outer_border_color,
257+ uint32_t outer_border_width);
258
259 /**
260 * Begin an interactive move of the specified window.
261 */
262-void swc_window_begin_move(struct swc_window *window);
263+void
264+swc_window_begin_move(struct swc_window *window);
265
266 /**
267 * End an interactive move of the specified window.
268 */
269-void swc_window_end_move(struct swc_window *window);
270+void
271+swc_window_end_move(struct swc_window *window);
272
273 enum {
274 SWC_WINDOW_EDGE_AUTO = 0,
275@@ -378,19 +413,22 @@ enum {
276 /**
277 * Begin an interactive resize of the specified window.
278 */
279-void swc_window_begin_resize(struct swc_window *window, uint32_t edges);
280+void
281+swc_window_begin_resize(struct swc_window *window, uint32_t edges);
282
283 /**
284 * End an interactive resize of the specified window.
285 */
286-void swc_window_end_resize(struct swc_window *window);
287+void
288+swc_window_end_resize(struct swc_window *window);
289
290 /**
291 * returns the topmost window at any given compositor global coordinates
292 *
293 * returns null if there is no window at that point
294 */
295-struct swc_window *swc_window_at(int32_t x, int32_t y);
296+struct swc_window *
297+swc_window_at(int32_t x, int32_t y);
298
299 /**
300 * move a window in the stacking order by one step
301@@ -398,7 +436,8 @@ struct swc_window *swc_window_at(int32_t x, int32_t y);
302 * direction < 0 moves the window towards the front (higher)
303 * direction > 0 moves the window towards the back (lower)
304 */
305-void swc_window_stack(struct swc_window *window, int32_t direction);
306+void
307+swc_window_stack(struct swc_window *window, int32_t direction);
308
309 /* }}} */
310
311@@ -417,15 +456,19 @@ enum swc_binding_type {
312 SWC_BINDING_BUTTON,
313 };
314
315-typedef void (*swc_binding_handler)(void *data, uint32_t time, uint32_t value, uint32_t state);
316-typedef void (*swc_axis_binding_handler)(void *data, uint32_t time, uint32_t axis, int32_t value120);
317+typedef void (*swc_binding_handler)(void *data, uint32_t time, uint32_t value,
318+ uint32_t state);
319+typedef void (*swc_axis_binding_handler)(void *data, uint32_t time,
320+ uint32_t axis, int32_t value120);
321
322 /**
323 * Register a new input binding.
324 *
325 * Returns 0 on success, negative error code otherwise.
326 */
327-int swc_add_binding(enum swc_binding_type type, uint32_t modifiers, uint32_t value, swc_binding_handler handler, void *data);
328+int
329+swc_add_binding(enum swc_binding_type type, uint32_t modifiers, uint32_t value,
330+ swc_binding_handler handler, void *data);
331
332 /**
333 * register a new pointer axis binding
334@@ -433,7 +476,9 @@ int swc_add_binding(enum swc_binding_type type, uint32_t modifiers, uint32_t val
335 * this will intercept axis events from clients; use swc_pointer_send_axis()
336 * from the handler to forward events when appropriate
337 */
338-int swc_add_axis_binding(uint32_t modifiers, uint32_t axis, swc_axis_binding_handler handler, void *data);
339+int
340+swc_add_axis_binding(uint32_t modifiers, uint32_t axis,
341+ swc_axis_binding_handler handler, void *data);
342
343 /* }}} */
344
345@@ -442,14 +487,17 @@ int swc_add_axis_binding(uint32_t modifiers, uint32_t axis, swc_axis_binding_han
346 /**
347 * Set fallback wallpaper buffer for all screens that dom't have an override.
348 */
349-void swc_wallpaper_set_buffer(struct wld_buffer *buffer);
350+void
351+swc_wallpaper_set_buffer(struct wld_buffer *buffer);
352
353 /**
354 * Set wallpaper buffer for specified screen id.
355 *
356 * Passing NULL clears the override for that screen.
357 */
358-void swc_wallpaper_set_buffer_for_screen(uint8_t screen_id, struct wld_buffer *buffer);
359+void
360+swc_wallpaper_set_buffer_for_screen(uint8_t screen_id,
361+ struct wld_buffer *buffer);
362
363 /**
364 * Set wallpaper to a single color
365@@ -458,7 +506,8 @@ void swc_wallpaper_set_buffer_for_screen(uint8_t screen_id, struct wld_buffer *b
366 */
367
368 extern uint32_t bgcolor;
369-void swc_wallpaper_color_set(uint32_t color);
370+void
371+swc_wallpaper_color_set(uint32_t color);
372
373 /* }}} */
374
375@@ -483,7 +532,8 @@ struct swc_manager {
376 void (*new_device)(struct libinput_device *device);
377
378 /**
379- * Called when the session gets activated (for example, startup or VT switch).
380+ * Called when the session gets activated (for example, startup or VT
381+ * switch).
382 */
383 void (*activate)(void);
384
385@@ -497,12 +547,15 @@ struct swc_manager {
386 * Initializes the compositor using the specified display, event_loop, and
387 * manager.
388 */
389-bool swc_initialize(struct wl_display *display, struct wl_event_loop *event_loop, const struct swc_manager *manager);
390+bool
391+swc_initialize(struct wl_display *display, struct wl_event_loop *event_loop,
392+ const struct swc_manager *manager);
393
394 /**
395 * Stops the compositor, releasing any used resources.
396 */
397-void swc_finalize(void);
398+void
399+swc_finalize(void);
400
401 #ifdef __cplusplus
402 }
+4,
-2
1@@ -26,8 +26,10 @@
2 #include <wayland-server.h>
3
4 pixman_box32_t infinite_extents = {
5- .x1 = INT32_MIN, .y1 = INT32_MIN,
6- .x2 = INT32_MAX, .y2 = INT32_MAX,
7+ .x1 = INT32_MIN,
8+ .y1 = INT32_MIN,
9+ .x2 = INT32_MAX,
10+ .y2 = INT32_MAX,
11 };
12
13 void
+27,
-21
1@@ -26,27 +26,26 @@
2
3 #include "swc.h"
4
5-#include <stdlib.h>
6-#include <stdio.h>
7+#include <pixman.h>
8 #include <stdbool.h>
9+#include <stdio.h>
10+#include <stdlib.h>
11 #include <string.h>
12 #include <sys/time.h>
13-#include <pixman.h>
14 #include <wayland-util.h>
15
16 #define EXPORT __attribute__((visibility("default")))
17
18 #if ENABLE_DEBUG
19-#define MESSAGE_SOURCE \
20- fprintf(stderr, "[swc:%s:%d] ", __FILE__, __LINE__);
21+#define MESSAGE_SOURCE fprintf(stderr, "[swc:%s:%d] ", __FILE__, __LINE__);
22 #else
23 #define MESSAGE_SOURCE
24 #endif
25
26-#define MESSAGE(type, format, ...) \
27- do { \
28- MESSAGE_SOURCE \
29- fprintf(stderr, type ": " format, ##__VA_ARGS__); \
30+#define MESSAGE(type, format, ...) \
31+ do { \
32+ MESSAGE_SOURCE \
33+ fprintf(stderr, type ": " format, ##__VA_ARGS__); \
34 } while (false)
35
36 #define WARNING(format, ...) MESSAGE("WARNING", format, ##__VA_ARGS__)
37@@ -66,8 +65,10 @@
38 struct wl_resource;
39 struct wl_client;
40
41-void remove_resource(struct wl_resource *resource);
42-void destroy_resource(struct wl_client *client, struct wl_resource *resource);
43+void
44+remove_resource(struct wl_resource *resource);
45+void
46+destroy_resource(struct wl_client *client, struct wl_resource *resource);
47
48 static inline uint32_t
49 get_time(void)
50@@ -81,27 +82,32 @@ get_time(void)
51 extern pixman_box32_t infinite_extents;
52
53 static inline bool
54-rectangle_contains_point(const struct swc_rectangle *rectangle, int32_t x, int32_t y)
55+rectangle_contains_point(const struct swc_rectangle *rectangle,
56+ int32_t x,
57+ int32_t y)
58 {
59- return x > rectangle->x && x < rectangle->x + rectangle->width
60- && y > rectangle->y && y < rectangle->y + rectangle->height;
61+ return x > rectangle->x && x < rectangle->x + rectangle->width &&
62+ y > rectangle->y && y < rectangle->y + rectangle->height;
63 }
64
65 static inline bool
66-rectangle_overlap(const struct swc_rectangle *r1, const struct swc_rectangle *r2)
67+rectangle_overlap(const struct swc_rectangle *r1,
68+ const struct swc_rectangle *r2)
69 {
70- return (MAX(r1->x + r1->width, r2->x + r2->width) - MIN(r1->x, r2->x)
71- < r1->width + r2->width)
72- && (MAX(r1->y + r1->height, r2->y + r2->height) - MIN(r1->y, r2->y)
73- < r1->height + r2->height);
74+ return (MAX(r1->x + r1->width, r2->x + r2->width) - MIN(r1->x, r2->x) <
75+ r1->width + r2->width) &&
76+ (MAX(r1->y + r1->height, r2->y + r2->height) - MIN(r1->y, r2->y) <
77+ r1->height + r2->height);
78 }
79
80 static inline void
81 array_remove(struct wl_array *array, void *item, size_t size)
82 {
83- size_t bytes = array->size - ((intptr_t)item + size - (intptr_t)array->data);
84- if (bytes > 0)
85+ size_t bytes =
86+ array->size - ((intptr_t)item + size - (intptr_t)array->data);
87+ if (bytes > 0) {
88 memmove(item, (void *)((intptr_t)item + size), bytes);
89+ }
90 array->size -= size;
91 }
92
+31,
-18
1@@ -29,12 +29,13 @@
2
3 #include <wld/wld.h>
4
5-#define HANDLE(view, handler, method, ...) \
6- do { \
7- wl_list_for_each (handler, &view->handlers, link) { \
8- if (handler->impl->method) \
9- handler->impl->method(handler, ##__VA_ARGS__); \
10- } \
11+#define HANDLE(view, handler, method, ...) \
12+ do { \
13+ wl_list_for_each(handler, &view->handlers, link) \
14+ { \
15+ if (handler->impl->method) \
16+ handler->impl->method(handler, ##__VA_ARGS__); \
17+ } \
18 } while (0)
19
20 void
21@@ -53,8 +54,9 @@ view_initialize(struct view *view, const struct view_impl *impl)
22 void
23 view_finalize(struct view *view)
24 {
25- if (view->buffer)
26+ if (view->buffer) {
27 wld_buffer_unreference(view->buffer);
28+ }
29 }
30
31 int
32@@ -63,14 +65,17 @@ view_attach(struct view *view, struct wld_buffer *buffer)
33 int ret;
34 struct view_handler *handler;
35
36- if ((ret = view->impl->attach(view, buffer)) < 0)
37+ if ((ret = view->impl->attach(view, buffer)) < 0) {
38 return ret;
39+ }
40
41- if (view->buffer)
42+ if (view->buffer) {
43 wld_buffer_unreference(view->buffer);
44+ }
45
46- if (buffer)
47+ if (buffer) {
48 wld_buffer_reference(buffer);
49+ }
50
51 view->buffer = buffer;
52 HANDLE(view, handler, attach);
53@@ -95,8 +100,9 @@ view_set_position(struct view *view, int32_t x, int32_t y)
54 {
55 struct view_handler *handler;
56
57- if (x == view->geometry.x && y == view->geometry.y)
58+ if (x == view->geometry.x && y == view->geometry.y) {
59 return false;
60+ }
61
62 view->geometry.x = x;
63 view->geometry.y = y;
64@@ -110,10 +116,12 @@ view_set_size(struct view *view, uint32_t width, uint32_t height)
65 {
66 struct view_handler *handler;
67
68- if (view->geometry.width == width && view->geometry.height == height)
69+ if (view->geometry.width == width && view->geometry.height == height) {
70 return false;
71+ }
72
73- uint32_t old_width = view->geometry.width, old_height = view->geometry.height;
74+ uint32_t old_width = view->geometry.width,
75+ old_height = view->geometry.height;
76
77 view->geometry.width = width;
78 view->geometry.height = height;
79@@ -125,16 +133,19 @@ view_set_size(struct view *view, uint32_t width, uint32_t height)
80 bool
81 view_set_size_from_buffer(struct view *view, struct wld_buffer *buffer)
82 {
83- return view_set_size(view, buffer ? buffer->width : 0, buffer ? buffer->height : 0);
84+ return view_set_size(view, buffer ? buffer->width : 0,
85+ buffer ? buffer->height : 0);
86 }
87
88 void
89 view_set_screens(struct view *view, uint32_t screens)
90 {
91- if (view->screens == screens)
92+ if (view->screens == screens) {
93 return;
94+ }
95
96- uint32_t entered = screens & ~view->screens, left = view->screens & ~screens;
97+ uint32_t entered = screens & ~view->screens,
98+ left = view->screens & ~screens;
99 struct view_handler *handler;
100
101 view->screens = screens;
102@@ -147,9 +158,11 @@ view_update_screens(struct view *view)
103 uint32_t screens = 0;
104 struct screen *screen;
105
106- wl_list_for_each (screen, &swc.screens, link) {
107- if (rectangle_overlap(&screen->base.geometry, &view->geometry))
108+ wl_list_for_each(screen, &swc.screens, link)
109+ {
110+ if (rectangle_overlap(&screen->base.geometry, &view->geometry)) {
111 screens |= screen_mask(screen);
112+ }
113 }
114
115 view_set_screens(view, screens);
+27,
-14
1@@ -74,9 +74,11 @@ struct view_handler_impl {
2 /* Called after the view's position changes. */
3 void (*move)(struct view_handler *handler);
4 /* Called after the view's size changes. */
5- void (*resize)(struct view_handler *handler, uint32_t old_width, uint32_t old_height);
6+ void (*resize)(struct view_handler *handler, uint32_t old_width,
7+ uint32_t old_height);
8 /* Called when the set of screens the view is visible on changes. */
9- void (*screens)(struct view_handler *handler, uint32_t left, uint32_t entered);
10+ void (*screens)(struct view_handler *handler, uint32_t left,
11+ uint32_t entered);
12 };
13
14 /**
15@@ -86,39 +88,49 @@ struct view_handler_impl {
16 *
17 * @return 0 on success, negative error code otherwise.
18 */
19-int view_attach(struct view *view, struct wld_buffer *buffer);
20+int
21+view_attach(struct view *view, struct wld_buffer *buffer);
22
23 /**
24 * Display a new frame consisting of the currently attached buffer.
25 *
26 * @return Whether or not the update succeeds.
27 */
28-bool view_update(struct view *view);
29+bool
30+view_update(struct view *view);
31
32 /**
33 * Move the view to the specified coordinates, if supported.
34 *
35 * @return Whether or not the move succeeds.
36 */
37-bool view_move(struct view *view, int32_t x, int32_t y);
38+bool
39+view_move(struct view *view, int32_t x, int32_t y);
40
41 /**** For internal view use only ****/
42
43 /**
44 * Initialize a new view with the specified implementation.
45 */
46-void view_initialize(struct view *view, const struct view_impl *impl);
47+void
48+view_initialize(struct view *view, const struct view_impl *impl);
49
50 /**
51 * Release any resources associated with this view.
52 */
53-void view_finalize(struct view *view);
54-
55-bool view_set_position(struct view *view, int32_t x, int32_t y);
56-bool view_set_size(struct view *view, uint32_t width, uint32_t height);
57-bool view_set_size_from_buffer(struct view *view, struct wld_buffer *bufer);
58-void view_set_screens(struct view *view, uint32_t screens);
59-void view_update_screens(struct view *view);
60+void
61+view_finalize(struct view *view);
62+
63+bool
64+view_set_position(struct view *view, int32_t x, int32_t y);
65+bool
66+view_set_size(struct view *view, uint32_t width, uint32_t height);
67+bool
68+view_set_size_from_buffer(struct view *view, struct wld_buffer *bufer);
69+void
70+view_set_screens(struct view *view, uint32_t screens);
71+void
72+view_update_screens(struct view *view);
73
74 /**
75 * Send a new frame event through the view's event signal.
76@@ -127,6 +139,7 @@ void view_update_screens(struct view *view);
77 * the user. If time information is not available, get_time() can be passed
78 * instead.
79 */
80-void view_frame(struct view *view, uint32_t time);
81+void
82+view_frame(struct view *view, uint32_t time);
83
84 #endif
+24,
-16
1@@ -1,12 +1,12 @@
2 #include <wld/wld.h>
3
4-#include "swc.h"
5 #include "compositor.h"
6 #include "screen.h"
7+#include "swc.h"
8+#include "swc_wallpaper-server-protocol.h"
9 #include "util.h"
10-#include "wayland_buffer.h"
11 #include "wallpaper.h"
12-#include "swc_wallpaper-server-protocol.h"
13+#include "wayland_buffer.h"
14
15 #define MAX_WALLPAPER_SCREENS 32
16
17@@ -17,10 +17,12 @@ uint32_t bgcolor = 0xff000000;
18 static void
19 set_buffer_slot(struct wld_buffer **slot, struct wld_buffer *buffer)
20 {
21- if (buffer)
22+ if (buffer) {
23 wld_buffer_reference(buffer);
24- if (*slot)
25+ }
26+ if (*slot) {
27 wld_buffer_unreference(*slot);
28+ }
29
30 *slot = buffer;
31 }
32@@ -28,10 +30,10 @@ set_buffer_slot(struct wld_buffer **slot, struct wld_buffer *buffer)
33 struct wld_buffer *
34 swc_wallpaper_buffer_for_screen(struct screen *screen)
35 {
36- if (screen
37- && screen->id < ARRAY_LENGTH(screen_wallbuf)
38- && screen_wallbuf[screen->id])
39+ if (screen && screen->id < ARRAY_LENGTH(screen_wallbuf) &&
40+ screen_wallbuf[screen->id]) {
41 return screen_wallbuf[screen->id];
42+ }
43
44 return wallbuf;
45 }
46@@ -44,10 +46,12 @@ swc_wallpaper_set_buffer(struct wld_buffer *buffer)
47 }
48
49 EXPORT void
50-swc_wallpaper_set_buffer_for_screen(uint8_t screen_id, struct wld_buffer *buffer)
51+swc_wallpaper_set_buffer_for_screen(uint8_t screen_id,
52+ struct wld_buffer *buffer)
53 {
54- if (screen_id >= ARRAY_LENGTH(screen_wallbuf))
55+ if (screen_id >= ARRAY_LENGTH(screen_wallbuf)) {
56 return;
57+ }
58
59 set_buffer_slot(&screen_wallbuf[screen_id], buffer);
60 compositor_damage_all();
61@@ -82,25 +86,28 @@ set_buffer(struct wl_client *client, struct wl_resource *resource,
62 return;
63 }
64
65- if (screen_id < 0 || screen_id >= ARRAY_LENGTH(screen_wallbuf))
66+ if (screen_id < 0 || screen_id >= ARRAY_LENGTH(screen_wallbuf)) {
67 return;
68+ }
69
70 swc_wallpaper_set_buffer_for_screen((uint8_t)screen_id, buffer);
71 }
72
73 static const struct swc_wallpaper_interface wallpaper_impl = {
74- .destroy = destroy_resource,
75- .set_buffer = set_buffer,
76+ .destroy = destroy_resource,
77+ .set_buffer = set_buffer,
78 };
79
80 static void
81-bind_wallpaper(struct wl_client *client, void *data, uint32_t version, uint32_t id)
82+bind_wallpaper(struct wl_client *client, void *data, uint32_t version,
83+ uint32_t id)
84 {
85 (void)data;
86
87 struct wl_resource *resource;
88
89- resource = wl_resource_create(client, &swc_wallpaper_interface, version, id);
90+ resource =
91+ wl_resource_create(client, &swc_wallpaper_interface, version, id);
92 if (!resource) {
93 wl_client_post_no_memory(client);
94 return;
95@@ -112,5 +119,6 @@ bind_wallpaper(struct wl_client *client, void *data, uint32_t version, uint32_t
96 struct wl_global *
97 swc_wallpaper_manager_create(struct wl_display *display)
98 {
99- return wl_global_create(display, &swc_wallpaper_interface, 1, NULL, bind_wallpaper);
100+ return wl_global_create(display, &swc_wallpaper_interface, 1, NULL,
101+ bind_wallpaper);
102 }
+4,
-2
1@@ -6,7 +6,9 @@ struct wl_global;
2 struct wld_buffer;
3 struct screen;
4
5-struct wl_global *swc_wallpaper_manager_create(struct wl_display *display);
6-struct wld_buffer *swc_wallpaper_buffer_for_screen(struct screen *screen);
7+struct wl_global *
8+swc_wallpaper_manager_create(struct wl_display *display);
9+struct wld_buffer *
10+swc_wallpaper_buffer_for_screen(struct screen *screen);
11
12 #endif
+10,
-6
1@@ -26,18 +26,19 @@
2 #include "shm.h"
3 #include "util.h"
4
5-#include <wld/wld.h>
6 #include <wld/pixman.h>
7+#include <wld/wld.h>
8
9 static const struct wl_buffer_interface buffer_impl = {
10- .destroy = destroy_resource,
11+ .destroy = destroy_resource,
12 };
13
14 struct wld_buffer *
15 wayland_buffer_get(struct wl_resource *resource)
16 {
17- if (wl_resource_instance_of(resource, &wl_buffer_interface, &buffer_impl))
18+ if (wl_resource_instance_of(resource, &wl_buffer_interface, &buffer_impl)) {
19 return wl_resource_get_user_data(resource);
20+ }
21
22 return NULL;
23 }
24@@ -50,12 +51,15 @@ destroy_buffer(struct wl_resource *resource)
25 }
26
27 struct wl_resource *
28-wayland_buffer_create_resource(struct wl_client *client, uint32_t version, uint32_t id, struct wld_buffer *buffer)
29+wayland_buffer_create_resource(struct wl_client *client, uint32_t version,
30+ uint32_t id, struct wld_buffer *buffer)
31 {
32 struct wl_resource *resource;
33
34 resource = wl_resource_create(client, &wl_buffer_interface, version, id);
35- if (resource)
36- wl_resource_set_implementation(resource, &buffer_impl, buffer, &destroy_buffer);
37+ if (resource) {
38+ wl_resource_set_implementation(resource, &buffer_impl, buffer,
39+ &destroy_buffer);
40+ }
41 return resource;
42 }
+5,
-2
1@@ -29,7 +29,10 @@
2 struct wl_client;
3 struct wl_resource;
4
5-struct wld_buffer *wayland_buffer_get(struct wl_resource *resource);
6-struct wl_resource *wayland_buffer_create_resource(struct wl_client *client, uint32_t version, uint32_t id, struct wld_buffer *buffer);
7+struct wld_buffer *
8+wayland_buffer_get(struct wl_resource *resource);
9+struct wl_resource *
10+wayland_buffer_create_resource(struct wl_client *client, uint32_t version,
11+ uint32_t id, struct wld_buffer *buffer);
12
13 #endif
+167,
-86
1@@ -45,11 +45,13 @@ static const struct swc_window_handler null_handler;
2 static bool
3 should_throttle_motion(uint32_t throttle_ms, uint32_t *last_time, uint32_t time)
4 {
5- if (!throttle_ms)
6+ if (!throttle_ms) {
7 return false;
8+ }
9
10- if (*last_time && time - *last_time < throttle_ms)
11+ if (*last_time && time - *last_time < throttle_ms) {
12 return true;
13+ }
14
15 *last_time = time;
16 return false;
17@@ -58,31 +60,39 @@ should_throttle_motion(uint32_t throttle_ms, uint32_t *last_time, uint32_t time)
18 static uint32_t
19 clamp_dimension(int32_t value, uint32_t min, uint32_t max)
20 {
21- if (value < 0)
22+ if (value < 0) {
23 value = 0;
24+ }
25
26- if (min && value < min)
27+ if (min && value < min) {
28 value = min;
29+ }
30
31 if (max) {
32- if (min && max < min)
33+ if (min && max < min) {
34 max = min;
35+ }
36
37- if (value > max)
38+ if (value > max) {
39 value = max;
40+ }
41 }
42
43- if (value > UINT32_MAX)
44+ if (value > UINT32_MAX) {
45 value = UINT32_MAX;
46+ }
47
48 return value;
49 }
50
51 static void
52-clamp_window_size(const struct window *window, uint32_t *width, uint32_t *height)
53+clamp_window_size(const struct window *window, uint32_t *width,
54+ uint32_t *height)
55 {
56- *width = clamp_dimension(*width, window->base.min_width, window->base.max_width);
57- *height = clamp_dimension(*height, window->base.min_height, window->base.max_height);
58+ *width =
59+ clamp_dimension(*width, window->base.min_width, window->base.max_width);
60+ *height = clamp_dimension(*height, window->base.min_height,
61+ window->base.max_height);
62 }
63
64 static void
65@@ -92,22 +102,26 @@ handle_window_enter(struct wl_listener *listener, void *data)
66 struct input_focus_event_data *event_data = event->data;
67 struct window *window;
68
69- if (event->type != INPUT_FOCUS_EVENT_CHANGED)
70+ if (event->type != INPUT_FOCUS_EVENT_CHANGED) {
71 return;
72+ }
73
74- if (!event_data->new || !(window = event_data->new->window))
75+ if (!event_data->new || !(window = event_data->new->window)) {
76 return;
77+ }
78
79- if (window->handler->entered)
80+ if (window->handler->entered) {
81 window->handler->entered(window->handler_data);
82+ }
83 }
84
85 struct wl_listener window_enter_listener = {
86- .notify = handle_window_enter,
87+ .notify = handle_window_enter,
88 };
89
90 static void
91-begin_interaction(struct window_pointer_interaction *interaction, struct button *button)
92+begin_interaction(struct window_pointer_interaction *interaction,
93+ struct button *button)
94 {
95 if (button) {
96 /* Store the serial of the button press so we are able to cancel the
97@@ -124,10 +138,12 @@ begin_interaction(struct window_pointer_interaction *interaction, struct button
98 }
99
100 static void
101-end_interaction(struct window_pointer_interaction *interaction, struct button *button)
102+end_interaction(struct window_pointer_interaction *interaction,
103+ struct button *button)
104 {
105- if (!interaction->active)
106+ if (!interaction->active) {
107 return;
108+ }
109
110 if (interaction->original_handler) {
111 if (!button) {
112@@ -139,7 +155,9 @@ end_interaction(struct window_pointer_interaction *interaction, struct button *b
113 }
114 }
115
116- interaction->original_handler->button(interaction->original_handler, get_time(), button, WL_POINTER_BUTTON_STATE_RELEASED);
117+ interaction->original_handler->button(interaction->original_handler,
118+ get_time(), button,
119+ WL_POINTER_BUTTON_STATE_RELEASED);
120 }
121
122 remove:
123@@ -151,8 +169,9 @@ static void
124 flush(struct window *window)
125 {
126 if (window->move.pending) {
127- if (window->impl->move)
128+ if (window->impl->move) {
129 window->impl->move(window, window->move.x, window->move.y);
130+ }
131
132 view_move(&window->view->base, window->move.x, window->move.y);
133 window->move.pending = false;
134@@ -160,7 +179,8 @@ flush(struct window *window)
135 }
136
137 EXPORT void
138-swc_window_set_handler(struct swc_window *base, const struct swc_window_handler *handler, void *data)
139+swc_window_set_handler(struct swc_window *base,
140+ const struct swc_window_handler *handler, void *data)
141 {
142 struct window *window = INTERNAL(base);
143
144@@ -173,8 +193,9 @@ swc_window_close(struct swc_window *base)
145 {
146 struct window *window = INTERNAL(base);
147
148- if (window->impl->close)
149+ if (window->impl->close) {
150 window->impl->close(window);
151+ }
152 }
153
154 EXPORT void
155@@ -193,18 +214,22 @@ EXPORT void
156 swc_window_focus(struct swc_window *base)
157 {
158 struct window *window = INTERNAL(base);
159- struct compositor_view *new = window ? window->view : NULL, *old = swc.seat->keyboard->focus.view;
160+ struct compositor_view *new = window ? window->view : NULL,
161+ *old = swc.seat->keyboard->focus.view;
162
163- if (new == old)
164+ if (new == old) {
165 return;
166+ }
167
168 /* Focus the new window before unfocusing the old one in case both are X11
169 * windows so the xwl_window implementation can handle this transition
170 * correctly. */
171- if (window && window->impl->focus)
172+ if (window && window->impl->focus) {
173 window->impl->focus(window);
174- if (old && old->window && old->window->impl->unfocus)
175+ }
176+ if (old && old->window && old->window->impl->unfocus) {
177 old->window->impl->unfocus(old->window);
178+ }
179
180 keyboard_set_focus(swc.seat->keyboard, new);
181 }
182@@ -218,8 +243,9 @@ swc_window_set_stacked(struct swc_window *base)
183 window->configure.pending = false;
184 window->configure.width = 0;
185 window->configure.height = 0;
186- if (window->impl->set_mode)
187+ if (window->impl->set_mode) {
188 window->impl->set_mode(window, WINDOW_MODE_STACKED);
189+ }
190 window->mode = WINDOW_MODE_STACKED;
191 }
192
193@@ -230,8 +256,9 @@ swc_window_set_tiled(struct swc_window *base)
194
195 end_interaction(&window->move.interaction, NULL);
196 end_interaction(&window->resize.interaction, NULL);
197- if (window->impl->set_mode)
198+ if (window->impl->set_mode) {
199 window->impl->set_mode(window, WINDOW_MODE_TILED);
200+ }
201 window->mode = WINDOW_MODE_TILED;
202 }
203
204@@ -242,14 +269,15 @@ swc_window_set_fullscreen(struct swc_window *base, struct swc_screen *screen)
205
206 struct swc_rectangle geom;
207 swc_window_get_geometry(base, &geom);
208-
209+
210 if (window->mode != WINDOW_MODE_FULLSCREEN) {
211 window->prev.geom = geom;
212 window->prev.mode = window->mode;
213 swc_window_set_geometry(base, &screen->usable_geometry);
214
215- if (window->impl->set_mode)
216+ if (window->impl->set_mode) {
217 window->impl->set_mode(window, WINDOW_MODE_FULLSCREEN);
218+ }
219 window->mode = WINDOW_MODE_FULLSCREEN;
220 }
221
222@@ -275,8 +303,9 @@ swc_window_set_position(struct swc_window *base, int32_t x, int32_t y)
223 window->move.pending = true;
224
225 /* If we don't have a configure pending, perform the move now. */
226- if (!window->configure.pending)
227+ if (!window->configure.pending) {
228 flush(window);
229+ }
230 }
231
232 EXPORT void
233@@ -287,9 +316,10 @@ swc_window_set_size(struct swc_window *base, uint32_t width, uint32_t height)
234
235 clamp_window_size(window, &width, &height);
236
237- if ((window->configure.pending && width == window->configure.width && height == window->configure.height)
238- || (!window->configure.pending && width == geom->width && height == geom->height))
239- {
240+ if ((window->configure.pending && width == window->configure.width &&
241+ height == window->configure.height) ||
242+ (!window->configure.pending && width == geom->width &&
243+ height == geom->height)) {
244 return;
245 }
246
247@@ -303,35 +333,40 @@ swc_window_set_size(struct swc_window *base, uint32_t width, uint32_t height)
248 }
249
250 EXPORT void
251-swc_window_set_geometry(struct swc_window *window, const struct swc_rectangle *geometry)
252+swc_window_set_geometry(struct swc_window *window,
253+ const struct swc_rectangle *geometry)
254 {
255 swc_window_set_size(window, geometry->width, geometry->height);
256 swc_window_set_position(window, geometry->x, geometry->y);
257 }
258
259 EXPORT bool
260-swc_window_get_geometry(const struct swc_window *base, struct swc_rectangle *geometry)
261+swc_window_get_geometry(const struct swc_window *base,
262+ struct swc_rectangle *geometry)
263 {
264 struct window *window = INTERNAL((struct swc_window *)base);
265
266- if (!window || !geometry)
267+ if (!window || !geometry) {
268 return false;
269+ }
270
271 *geometry = window->view->base.geometry;
272 return true;
273 }
274
275 EXPORT void
276-swc_window_set_border(struct swc_window *window, uint32_t inner_border_color, uint32_t inner_border_width,
277- uint32_t outer_border_color, uint32_t outer_border_width)
278+swc_window_set_border(struct swc_window *window, uint32_t inner_border_color,
279+ uint32_t inner_border_width, uint32_t outer_border_color,
280+ uint32_t outer_border_width)
281 {
282 struct compositor_view *view = INTERNAL(window)->view;
283
284- compositor_view_set_border_color(view, outer_border_color, inner_border_color);
285- compositor_view_set_border_width(view, outer_border_width, inner_border_width);
286+ compositor_view_set_border_color(view, outer_border_color,
287+ inner_border_color);
288+ compositor_view_set_border_width(view, outer_border_width,
289+ inner_border_width);
290 }
291
292-
293 EXPORT void
294 swc_window_begin_move(struct swc_window *window)
295 {
296@@ -357,12 +392,16 @@ swc_window_end_resize(struct swc_window *window)
297 }
298
299 static bool
300-move_motion(struct pointer_handler *handler, uint32_t time, wl_fixed_t fx, wl_fixed_t fy)
301+move_motion(struct pointer_handler *handler, uint32_t time, wl_fixed_t fx,
302+ wl_fixed_t fy)
303 {
304- struct window *window = wl_container_of(handler, window, move.interaction.handler);
305+ struct window *window =
306+ wl_container_of(handler, window, move.interaction.handler);
307
308- if (should_throttle_motion(window->base.motion_throttle_ms, &window->move.last_time, time))
309+ if (should_throttle_motion(window->base.motion_throttle_ms,
310+ &window->move.last_time, time)) {
311 return true;
312+ }
313
314 int32_t x = wl_fixed_to_int(fx) + window->move.offset.x,
315 y = wl_fixed_to_int(fy) + window->move.offset.y;
316@@ -372,24 +411,30 @@ move_motion(struct pointer_handler *handler, uint32_t time, wl_fixed_t fx, wl_fi
317 }
318
319 static bool
320-resize_motion(struct pointer_handler *handler, uint32_t time, wl_fixed_t fx, wl_fixed_t fy)
321+resize_motion(struct pointer_handler *handler, uint32_t time, wl_fixed_t fx,
322+ wl_fixed_t fy)
323 {
324- struct window *window = wl_container_of(handler, window, resize.interaction.handler);
325+ struct window *window =
326+ wl_container_of(handler, window, resize.interaction.handler);
327 const struct swc_rectangle *geometry = &window->view->base.geometry;
328 uint32_t width = geometry->width, height = geometry->height;
329
330- if (should_throttle_motion(window->base.motion_throttle_ms, &window->resize.last_time, time))
331+ if (should_throttle_motion(window->base.motion_throttle_ms,
332+ &window->resize.last_time, time)) {
333 return true;
334+ }
335
336- if (window->resize.edges & SWC_WINDOW_EDGE_LEFT)
337+ if (window->resize.edges & SWC_WINDOW_EDGE_LEFT) {
338 width -= wl_fixed_to_int(fx) + window->resize.offset.x - geometry->x;
339- else if (window->resize.edges & SWC_WINDOW_EDGE_RIGHT)
340+ } else if (window->resize.edges & SWC_WINDOW_EDGE_RIGHT) {
341 width = wl_fixed_to_int(fx) + window->resize.offset.x - geometry->x;
342+ }
343
344- if (window->resize.edges & SWC_WINDOW_EDGE_TOP)
345+ if (window->resize.edges & SWC_WINDOW_EDGE_TOP) {
346 height -= wl_fixed_to_int(fy) + window->resize.offset.y - geometry->y;
347- else if (window->resize.edges & SWC_WINDOW_EDGE_BOTTOM)
348+ } else if (window->resize.edges & SWC_WINDOW_EDGE_BOTTOM) {
349 height = wl_fixed_to_int(fy) + window->resize.offset.y - geometry->y;
350+ }
351
352 clamp_window_size(window, &width, &height);
353 window->impl->configure(window, width, height);
354@@ -398,12 +443,16 @@ resize_motion(struct pointer_handler *handler, uint32_t time, wl_fixed_t fx, wl_
355 }
356
357 static bool
358-handle_button(struct pointer_handler *handler, uint32_t time, struct button *button, uint32_t state)
359+handle_button(struct pointer_handler *handler, uint32_t time,
360+ struct button *button, uint32_t state)
361 {
362- struct window_pointer_interaction *interaction = wl_container_of(handler, interaction, handler);
363+ struct window_pointer_interaction *interaction =
364+ wl_container_of(handler, interaction, handler);
365
366- if (state != WL_POINTER_BUTTON_STATE_RELEASED || !interaction->original_handler)
367+ if (state != WL_POINTER_BUTTON_STATE_RELEASED ||
368+ !interaction->original_handler) {
369 return false;
370+ }
371
372 end_interaction(interaction, button);
373 return true;
374@@ -414,36 +463,42 @@ handle_attach(struct view_handler *handler)
375 {
376 struct window *window = wl_container_of(handler, window, view_handler);
377
378- if (window->configure.acknowledged)
379+ if (window->configure.acknowledged) {
380 flush(window);
381+ }
382 window->configure.pending = false;
383 }
384
385 static void
386-handle_resize(struct view_handler *handler, uint32_t old_width, uint32_t old_height)
387+handle_resize(struct view_handler *handler, uint32_t old_width,
388+ uint32_t old_height)
389 {
390 struct window *window = wl_container_of(handler, window, view_handler);
391
392- if (window->resize.interaction.active && window->resize.edges & (SWC_WINDOW_EDGE_TOP | SWC_WINDOW_EDGE_LEFT)) {
393+ if (window->resize.interaction.active &&
394+ window->resize.edges & (SWC_WINDOW_EDGE_TOP | SWC_WINDOW_EDGE_LEFT)) {
395 const struct swc_rectangle *geometry = &window->view->base.geometry;
396 int32_t x = geometry->x, y = geometry->y;
397
398- if (window->resize.edges & SWC_WINDOW_EDGE_LEFT)
399+ if (window->resize.edges & SWC_WINDOW_EDGE_LEFT) {
400 x += old_width - geometry->width;
401- if (window->resize.edges & SWC_WINDOW_EDGE_TOP)
402+ }
403+ if (window->resize.edges & SWC_WINDOW_EDGE_TOP) {
404 y += old_height - geometry->height;
405+ }
406
407 view_move(&window->view->base, x, y);
408 }
409 }
410
411 static const struct view_handler_impl view_handler_impl = {
412- .attach = handle_attach,
413- .resize = handle_resize,
414+ .attach = handle_attach,
415+ .resize = handle_resize,
416 };
417
418 bool
419-window_initialize(struct window *window, const struct window_impl *impl, struct surface *surface)
420+window_initialize(struct window *window, const struct window_impl *impl,
421+ struct surface *surface)
422 {
423 DEBUG("Initializing window, %p\n", window);
424
425@@ -453,11 +508,13 @@ window_initialize(struct window *window, const struct window_impl *impl, struct
426
427 if (surface->view) {
428 window->view = compositor_view(surface->view);
429- if (!window->view || window->view->window)
430+ if (!window->view || window->view->window) {
431 return false;
432+ }
433 } else {
434- if (!(window->view = compositor_create_view(surface)))
435+ if (!(window->view = compositor_create_view(surface))) {
436 return false;
437+ }
438 }
439
440 window->impl = impl;
441@@ -475,16 +532,16 @@ window_initialize(struct window *window, const struct window_impl *impl, struct
442 window->move.last_time = 0;
443 window->move.interaction.active = false;
444 window->move.interaction.handler = (struct pointer_handler){
445- .motion = move_motion,
446- .button = handle_button,
447+ .motion = move_motion,
448+ .button = handle_button,
449 };
450 window->configure.pending = false;
451 window->configure.width = 0;
452 window->configure.height = 0;
453 window->resize.interaction.active = false;
454 window->resize.interaction.handler = (struct pointer_handler){
455- .motion = resize_motion,
456- .button = handle_button,
457+ .motion = resize_motion,
458+ .button = handle_button,
459 };
460 window->resize.last_time = 0;
461
462@@ -507,8 +564,9 @@ window_finalize(struct window *window)
463 void
464 window_manage(struct window *window)
465 {
466- if (window->managed)
467+ if (window->managed) {
468 return;
469+ }
470
471 swc.manager->new_window(&window->base);
472 window->managed = true;
473@@ -517,11 +575,13 @@ window_manage(struct window *window)
474 void
475 window_unmanage(struct window *window)
476 {
477- if (!window->managed)
478+ if (!window->managed) {
479 return;
480+ }
481
482- if (window->handler->destroy)
483+ if (window->handler->destroy) {
484 window->handler->destroy(window->handler_data);
485+ }
486 window->handler = &null_handler;
487 window->managed = false;
488 }
489@@ -532,8 +592,9 @@ window_set_title(struct window *window, const char *title, size_t length)
490 free(window->base.title);
491 window->base.title = strndup(title, length);
492
493- if (window->handler->title_changed)
494+ if (window->handler->title_changed) {
495 window->handler->title_changed(window->handler_data);
496+ }
497 }
498
499 void
500@@ -542,31 +603,37 @@ window_set_app_id(struct window *window, const char *app_id)
501 free(window->base.app_id);
502 window->base.app_id = strdup(app_id);
503
504- if (window->handler->app_id_changed)
505+ if (window->handler->app_id_changed) {
506 window->handler->app_id_changed(window->handler_data);
507+ }
508 }
509
510 void
511 window_set_parent(struct window *window, struct window *parent)
512 {
513- if (window->base.parent == &parent->base)
514+ if (window->base.parent == &parent->base) {
515 return;
516+ }
517
518 compositor_view_set_parent(window->view, parent->view);
519 window->base.parent = &parent->base;
520
521- if (window->handler->parent_changed)
522+ if (window->handler->parent_changed) {
523 window->handler->parent_changed(window->handler_data);
524+ }
525 }
526
527 void
528 window_begin_move(struct window *window, struct button *button)
529 {
530- if (window->mode != WINDOW_MODE_STACKED && window->handler->move)
531+ if (window->mode != WINDOW_MODE_STACKED && window->handler->move) {
532 window->handler->move(window->handler_data);
533+ }
534
535- if (window->mode != WINDOW_MODE_STACKED || window->move.interaction.active)
536+ if (window->mode != WINDOW_MODE_STACKED ||
537+ window->move.interaction.active) {
538 return;
539+ }
540
541 struct swc_rectangle *geometry = &window->view->base.geometry;
542 int32_t px = wl_fixed_to_int(swc.seat->pointer->x),
543@@ -579,13 +646,17 @@ window_begin_move(struct window *window, struct button *button)
544 }
545
546 void
547-window_begin_resize(struct window *window, uint32_t edges, struct button *button)
548+window_begin_resize(struct window *window, uint32_t edges,
549+ struct button *button)
550 {
551- if (window->mode != WINDOW_MODE_STACKED && window->handler->resize)
552+ if (window->mode != WINDOW_MODE_STACKED && window->handler->resize) {
553 window->handler->resize(window->handler_data);
554+ }
555
556- if (window->mode != WINDOW_MODE_STACKED || window->resize.interaction.active)
557+ if (window->mode != WINDOW_MODE_STACKED ||
558+ window->resize.interaction.active) {
559 return;
560+ }
561
562 struct swc_rectangle *geometry = &window->view->base.geometry;
563 int32_t px = wl_fixed_to_int(swc.seat->pointer->x),
564@@ -595,12 +666,20 @@ window_begin_resize(struct window *window, uint32_t edges, struct button *button
565 window->resize.last_time = 0;
566
567 if (!edges) {
568- edges |= (px < geometry->x + geometry->width / 2) ? SWC_WINDOW_EDGE_LEFT : SWC_WINDOW_EDGE_RIGHT;
569- edges |= (py < geometry->y + geometry->height / 2) ? SWC_WINDOW_EDGE_TOP : SWC_WINDOW_EDGE_BOTTOM;
570+ edges |= (px < geometry->x + geometry->width / 2)
571+ ? SWC_WINDOW_EDGE_LEFT
572+ : SWC_WINDOW_EDGE_RIGHT;
573+ edges |= (py < geometry->y + geometry->height / 2)
574+ ? SWC_WINDOW_EDGE_TOP
575+ : SWC_WINDOW_EDGE_BOTTOM;
576 }
577
578- window->resize.offset.x = geometry->x - px + ((edges & SWC_WINDOW_EDGE_RIGHT) ? geometry->width : 0);
579- window->resize.offset.y = geometry->y - py + ((edges & SWC_WINDOW_EDGE_BOTTOM) ? geometry->height : 0);
580+ window->resize.offset.x =
581+ geometry->x - px +
582+ ((edges & SWC_WINDOW_EDGE_RIGHT) ? geometry->width : 0);
583+ window->resize.offset.y =
584+ geometry->y - py +
585+ ((edges & SWC_WINDOW_EDGE_BOTTOM) ? geometry->height : 0);
586 window->resize.edges = edges;
587 }
588
589@@ -614,12 +693,14 @@ swc_window_get_pid(struct swc_window *base)
590 uid_t uid;
591 gid_t gid;
592
593- if (!window || !window->view || !window->view->surface)
594+ if (!window || !window->view || !window->view->surface) {
595 return 0;
596+ }
597
598 surface = window->view->surface;
599- if (!surface->resource)
600+ if (!surface->resource) {
601 return 0;
602+ }
603
604 client = wl_resource_get_client(surface->resource);
605 wl_client_get_credentials(client, &pid, &uid, &gid);
+22,
-11
1@@ -24,8 +24,8 @@
2 #ifndef SWC_WINDOW_H
3 #define SWC_WINDOW_H
4
5-#include "swc.h"
6 #include "pointer.h"
7+#include "swc.h"
8
9 #include <stdint.h>
10 #include <wayland-server.h>
11@@ -52,7 +52,7 @@ struct window {
12 struct view_handler view_handler;
13 bool managed;
14 unsigned mode;
15-
16+
17 struct {
18 struct swc_rectangle geom;
19 unsigned mode;
20@@ -95,14 +95,25 @@ struct window_impl {
21
22 extern struct wl_listener window_enter_listener;
23
24-bool window_initialize(struct window *window, const struct window_impl *impl, struct surface *surface);
25-void window_finalize(struct window *window);
26-void window_manage(struct window *window);
27-void window_unmanage(struct window *window);
28-void window_set_title(struct window *window, const char *title, size_t length);
29-void window_set_app_id(struct window *window, const char *app_id);
30-void window_set_parent(struct window *window, struct window *parent);
31-void window_begin_move(struct window *window, struct button *button);
32-void window_begin_resize(struct window *window, uint32_t edges, struct button *button);
33+bool
34+window_initialize(struct window *window, const struct window_impl *impl,
35+ struct surface *surface);
36+void
37+window_finalize(struct window *window);
38+void
39+window_manage(struct window *window);
40+void
41+window_unmanage(struct window *window);
42+void
43+window_set_title(struct window *window, const char *title, size_t length);
44+void
45+window_set_app_id(struct window *window, const char *app_id);
46+void
47+window_set_parent(struct window *window, struct window *parent);
48+void
49+window_begin_move(struct window *window, struct button *button);
50+void
51+window_begin_resize(struct window *window, uint32_t edges,
52+ struct button *button);
53
54 #endif
+171,
-171
1@@ -51,7 +51,7 @@
2 #define _ATKEYNAMES_H
3
4 #define XK_TECHNICAL
5-#define XK_KATAKANA
6+#define XK_KATAKANA
7
8 /*
9 * NOTE: The AT/MF keyboards can generate (via the 8042) two (MF: three)
10@@ -77,151 +77,151 @@
11 * ---------------- ---------- ------- ------ ------
12 */
13
14-#define KEY_Escape /* Escape 0x01 */ 1
15-#define KEY_1 /* 1 ! 0x02 */ 2
16-#define KEY_2 /* 2 @ 0x03 */ 3
17-#define KEY_3 /* 3 # 0x04 */ 4
18-#define KEY_4 /* 4 $ 0x05 */ 5
19-#define KEY_5 /* 5 % 0x06 */ 6
20-#define KEY_6 /* 6 ^ 0x07 */ 7
21-#define KEY_7 /* 7 & 0x08 */ 8
22-#define KEY_8 /* 8 * 0x09 */ 9
23-#define KEY_9 /* 9 ( 0x0a */ 10
24-#define KEY_0 /* 0 ) 0x0b */ 11
25-#define KEY_Minus /* - (Minus) _ (Under) 0x0c */ 12
26-#define KEY_Equal /* = (Equal) + 0x0d */ 13
27-#define KEY_BackSpace /* Back Space 0x0e */ 14
28-#define KEY_Tab /* Tab 0x0f */ 15
29-#define KEY_Q /* Q 0x10 */ 16
30-#define KEY_W /* W 0x11 */ 17
31-#define KEY_E /* E 0x12 */ 18
32-#define KEY_R /* R 0x13 */ 19
33-#define KEY_T /* T 0x14 */ 20
34-#define KEY_Y /* Y 0x15 */ 21
35-#define KEY_U /* U 0x16 */ 22
36-#define KEY_I /* I 0x17 */ 23
37-#define KEY_O /* O 0x18 */ 24
38-#define KEY_P /* P 0x19 */ 25
39-#define KEY_LBrace /* [ { 0x1a */ 26
40-#define KEY_RBrace /* ] } 0x1b */ 27
41-#define KEY_Enter /* Enter 0x1c */ 28
42-#define KEY_LCtrl /* Ctrl(left) 0x1d */ 29
43-#define KEY_A /* A 0x1e */ 30
44-#define KEY_S /* S 0x1f */ 31
45-#define KEY_D /* D 0x20 */ 32
46-#define KEY_F /* F 0x21 */ 33
47-#define KEY_G /* G 0x22 */ 34
48-#define KEY_H /* H 0x23 */ 35
49-#define KEY_J /* J 0x24 */ 36
50-#define KEY_K /* K 0x25 */ 37
51-#define KEY_L /* L 0x26 */ 38
52-#define KEY_SemiColon /* ;(SemiColon) :(Colon) 0x27 */ 39
53-#define KEY_Quote /* ' (Apostr) " (Quote) 0x28 */ 40
54-#define KEY_Tilde /* ` (Accent) ~ (Tilde) 0x29 */ 41
55-#define KEY_ShiftL /* Shift(left) 0x2a */ 42
56-#define KEY_BSlash /* \(BckSlash) |(VertBar)0x2b */ 43
57-#define KEY_Z /* Z 0x2c */ 44
58-#define KEY_X /* X 0x2d */ 45
59-#define KEY_C /* C 0x2e */ 46
60-#define KEY_V /* V 0x2f */ 47
61-#define KEY_B /* B 0x30 */ 48
62-#define KEY_N /* N 0x31 */ 49
63-#define KEY_M /* M 0x32 */ 50
64-#define KEY_Comma /* , (Comma) < (Less) 0x33 */ 51
65-#define KEY_Period /* . (Period) >(Greater)0x34 */ 52
66-#define KEY_Slash /* / (Slash) ? 0x35 */ 53
67-#define KEY_ShiftR /* Shift(right) 0x36 */ 54
68-#define KEY_KP_Multiply /* * 0x37 */ 55
69-#define KEY_Alt /* Alt(left) 0x38 */ 56
70-#define KEY_Space /* (SpaceBar) 0x39 */ 57
71-#define KEY_CapsLock /* CapsLock 0x3a */ 58
72-#define KEY_F1 /* F1 0x3b */ 59
73-#define KEY_F2 /* F2 0x3c */ 60
74-#define KEY_F3 /* F3 0x3d */ 61
75-#define KEY_F4 /* F4 0x3e */ 62
76-#define KEY_F5 /* F5 0x3f */ 63
77-#define KEY_F6 /* F6 0x40 */ 64
78-#define KEY_F7 /* F7 0x41 */ 65
79-#define KEY_F8 /* F8 0x42 */ 66
80-#define KEY_F9 /* F9 0x43 */ 67
81-#define KEY_F10 /* F10 0x44 */ 68
82-#define KEY_NumLock /* NumLock 0x45 */ 69
83-#define KEY_ScrollLock /* ScrollLock 0x46 */ 70
84-#define KEY_KP_7 /* 7 Home 0x47 */ 71
85-#define KEY_KP_8 /* 8 Up 0x48 */ 72
86-#define KEY_KP_9 /* 9 PgUp 0x49 */ 73
87-#define KEY_KP_Minus /* - (Minus) 0x4a */ 74
88-#define KEY_KP_4 /* 4 Left 0x4b */ 75
89-#define KEY_KP_5 /* 5 0x4c */ 76
90-#define KEY_KP_6 /* 6 Right 0x4d */ 77
91-#define KEY_KP_Plus /* + (Plus) 0x4e */ 78
92-#define KEY_KP_1 /* 1 End 0x4f */ 79
93-#define KEY_KP_2 /* 2 Down 0x50 */ 80
94-#define KEY_KP_3 /* 3 PgDown 0x51 */ 81
95-#define KEY_KP_0 /* 0 Insert 0x52 */ 82
96-#define KEY_KP_Decimal /* . (Decimal) Delete 0x53 */ 83
97-#define KEY_SysReqest /* SysReqest 0x54 */ 84
98- /* NOTUSED 0x55 */
99-#define KEY_Less /* < (Less) >(Greater) 0x56 */ 86
100-#define KEY_F11 /* F11 0x57 */ 87
101-#define KEY_F12 /* F12 0x58 */ 88
102+#define KEY_Escape /* Escape 0x01 */ 1
103+#define KEY_1 /* 1 ! 0x02 */ 2
104+#define KEY_2 /* 2 @ 0x03 */ 3
105+#define KEY_3 /* 3 # 0x04 */ 4
106+#define KEY_4 /* 4 $ 0x05 */ 5
107+#define KEY_5 /* 5 % 0x06 */ 6
108+#define KEY_6 /* 6 ^ 0x07 */ 7
109+#define KEY_7 /* 7 & 0x08 */ 8
110+#define KEY_8 /* 8 * 0x09 */ 9
111+#define KEY_9 /* 9 ( 0x0a */ 10
112+#define KEY_0 /* 0 ) 0x0b */ 11
113+#define KEY_Minus /* - (Minus) _ (Under) 0x0c */ 12
114+#define KEY_Equal /* = (Equal) + 0x0d */ 13
115+#define KEY_BackSpace /* Back Space 0x0e */ 14
116+#define KEY_Tab /* Tab 0x0f */ 15
117+#define KEY_Q /* Q 0x10 */ 16
118+#define KEY_W /* W 0x11 */ 17
119+#define KEY_E /* E 0x12 */ 18
120+#define KEY_R /* R 0x13 */ 19
121+#define KEY_T /* T 0x14 */ 20
122+#define KEY_Y /* Y 0x15 */ 21
123+#define KEY_U /* U 0x16 */ 22
124+#define KEY_I /* I 0x17 */ 23
125+#define KEY_O /* O 0x18 */ 24
126+#define KEY_P /* P 0x19 */ 25
127+#define KEY_LBrace /* [ { 0x1a */ 26
128+#define KEY_RBrace /* ] } 0x1b */ 27
129+#define KEY_Enter /* Enter 0x1c */ 28
130+#define KEY_LCtrl /* Ctrl(left) 0x1d */ 29
131+#define KEY_A /* A 0x1e */ 30
132+#define KEY_S /* S 0x1f */ 31
133+#define KEY_D /* D 0x20 */ 32
134+#define KEY_F /* F 0x21 */ 33
135+#define KEY_G /* G 0x22 */ 34
136+#define KEY_H /* H 0x23 */ 35
137+#define KEY_J /* J 0x24 */ 36
138+#define KEY_K /* K 0x25 */ 37
139+#define KEY_L /* L 0x26 */ 38
140+#define KEY_SemiColon /* ;(SemiColon) :(Colon) 0x27 */ 39
141+#define KEY_Quote /* ' (Apostr) " (Quote) 0x28 */ 40
142+#define KEY_Tilde /* ` (Accent) ~ (Tilde) 0x29 */ 41
143+#define KEY_ShiftL /* Shift(left) 0x2a */ 42
144+#define KEY_BSlash /* \(BckSlash) |(VertBar)0x2b */ 43
145+#define KEY_Z /* Z 0x2c */ 44
146+#define KEY_X /* X 0x2d */ 45
147+#define KEY_C /* C 0x2e */ 46
148+#define KEY_V /* V 0x2f */ 47
149+#define KEY_B /* B 0x30 */ 48
150+#define KEY_N /* N 0x31 */ 49
151+#define KEY_M /* M 0x32 */ 50
152+#define KEY_Comma /* , (Comma) < (Less) 0x33 */ 51
153+#define KEY_Period /* . (Period) >(Greater)0x34 */ 52
154+#define KEY_Slash /* / (Slash) ? 0x35 */ 53
155+#define KEY_ShiftR /* Shift(right) 0x36 */ 54
156+#define KEY_KP_Multiply /* * 0x37 */ 55
157+#define KEY_Alt /* Alt(left) 0x38 */ 56
158+#define KEY_Space /* (SpaceBar) 0x39 */ 57
159+#define KEY_CapsLock /* CapsLock 0x3a */ 58
160+#define KEY_F1 /* F1 0x3b */ 59
161+#define KEY_F2 /* F2 0x3c */ 60
162+#define KEY_F3 /* F3 0x3d */ 61
163+#define KEY_F4 /* F4 0x3e */ 62
164+#define KEY_F5 /* F5 0x3f */ 63
165+#define KEY_F6 /* F6 0x40 */ 64
166+#define KEY_F7 /* F7 0x41 */ 65
167+#define KEY_F8 /* F8 0x42 */ 66
168+#define KEY_F9 /* F9 0x43 */ 67
169+#define KEY_F10 /* F10 0x44 */ 68
170+#define KEY_NumLock /* NumLock 0x45 */ 69
171+#define KEY_ScrollLock /* ScrollLock 0x46 */ 70
172+#define KEY_KP_7 /* 7 Home 0x47 */ 71
173+#define KEY_KP_8 /* 8 Up 0x48 */ 72
174+#define KEY_KP_9 /* 9 PgUp 0x49 */ 73
175+#define KEY_KP_Minus /* - (Minus) 0x4a */ 74
176+#define KEY_KP_4 /* 4 Left 0x4b */ 75
177+#define KEY_KP_5 /* 5 0x4c */ 76
178+#define KEY_KP_6 /* 6 Right 0x4d */ 77
179+#define KEY_KP_Plus /* + (Plus) 0x4e */ 78
180+#define KEY_KP_1 /* 1 End 0x4f */ 79
181+#define KEY_KP_2 /* 2 Down 0x50 */ 80
182+#define KEY_KP_3 /* 3 PgDown 0x51 */ 81
183+#define KEY_KP_0 /* 0 Insert 0x52 */ 82
184+#define KEY_KP_Decimal /* . (Decimal) Delete 0x53 */ 83
185+#define KEY_SysReqest /* SysReqest 0x54 */ 84
186+/* NOTUSED 0x55 */
187+#define KEY_Less /* < (Less) >(Greater) 0x56 */ 86
188+#define KEY_F11 /* F11 0x57 */ 87
189+#define KEY_F12 /* F12 0x58 */ 88
190
191-#define KEY_Prefix0 /* special 0x60 */ 96
192-#define KEY_Prefix1 /* specail 0x61 */ 97
193+#define KEY_Prefix0 /* special 0x60 */ 96
194+#define KEY_Prefix1 /* specail 0x61 */ 97
195
196 /*
197 * The 'scancodes' below are generated by the server, because the MF101/102
198 * keyboard sends them as sequence of other scancodes
199 */
200-#define KEY_Home /* Home 0x59 */ 89
201-#define KEY_Up /* Up 0x5a */ 90
202-#define KEY_PgUp /* PgUp 0x5b */ 91
203-#define KEY_Left /* Left 0x5c */ 92
204-#define KEY_Begin /* Begin 0x5d */ 93
205-#define KEY_Right /* Right 0x5e */ 94
206-#define KEY_End /* End 0x5f */ 95
207-#define KEY_Down /* Down 0x60 */ 96
208-#define KEY_PgDown /* PgDown 0x61 */ 97
209-#define KEY_Insert /* Insert 0x62 */ 98
210-#define KEY_Delete /* Delete 0x63 */ 99
211-#define KEY_KP_Enter /* Enter 0x64 */ 100
212-#define KEY_RCtrl /* Ctrl(right) 0x65 */ 101
213-#define KEY_Pause /* Pause 0x66 */ 102
214-#define KEY_Print /* Print 0x67 */ 103
215-#define KEY_KP_Divide /* Divide 0x68 */ 104
216-#define KEY_AltLang /* AtlLang(right) 0x69 */ 105
217-#define KEY_Break /* Break 0x6a */ 106
218-#define KEY_LMeta /* Left Meta 0x6b */ 107
219-#define KEY_RMeta /* Right Meta 0x6c */ 108
220-#define KEY_Menu /* Menu 0x6d */ 109
221-#define KEY_F13 /* F13 0x6e */ 110
222-#define KEY_F14 /* F14 0x6f */ 111
223-#define KEY_F15 /* F15 0x70 */ 112
224-#define KEY_HKTG /* Hirugana/Katakana tog 0x70 */ 112
225-#define KEY_F16 /* F16 0x71 */ 113
226-#define KEY_F17 /* F17 0x72 */ 114
227-#define KEY_KP_DEC /* KP_DEC 0x73 */ 115
228-#define KEY_BSlash2 /* \ _ 0x73 */ 115
229-#define KEY_KP_Equal /* Equal (Keypad) 0x76 */ 118
230-#define KEY_XFER /* Kanji Transfer 0x79 */ 121
231-#define KEY_NFER /* No Kanji Transfer 0x7b */ 123
232-#define KEY_Yen /* Yen 0x7d */ 125
233+#define KEY_Home /* Home 0x59 */ 89
234+#define KEY_Up /* Up 0x5a */ 90
235+#define KEY_PgUp /* PgUp 0x5b */ 91
236+#define KEY_Left /* Left 0x5c */ 92
237+#define KEY_Begin /* Begin 0x5d */ 93
238+#define KEY_Right /* Right 0x5e */ 94
239+#define KEY_End /* End 0x5f */ 95
240+#define KEY_Down /* Down 0x60 */ 96
241+#define KEY_PgDown /* PgDown 0x61 */ 97
242+#define KEY_Insert /* Insert 0x62 */ 98
243+#define KEY_Delete /* Delete 0x63 */ 99
244+#define KEY_KP_Enter /* Enter 0x64 */ 100
245+#define KEY_RCtrl /* Ctrl(right) 0x65 */ 101
246+#define KEY_Pause /* Pause 0x66 */ 102
247+#define KEY_Print /* Print 0x67 */ 103
248+#define KEY_KP_Divide /* Divide 0x68 */ 104
249+#define KEY_AltLang /* AtlLang(right) 0x69 */ 105
250+#define KEY_Break /* Break 0x6a */ 106
251+#define KEY_LMeta /* Left Meta 0x6b */ 107
252+#define KEY_RMeta /* Right Meta 0x6c */ 108
253+#define KEY_Menu /* Menu 0x6d */ 109
254+#define KEY_F13 /* F13 0x6e */ 110
255+#define KEY_F14 /* F14 0x6f */ 111
256+#define KEY_F15 /* F15 0x70 */ 112
257+#define KEY_HKTG /* Hirugana/Katakana tog 0x70 */ 112
258+#define KEY_F16 /* F16 0x71 */ 113
259+#define KEY_F17 /* F17 0x72 */ 114
260+#define KEY_KP_DEC /* KP_DEC 0x73 */ 115
261+#define KEY_BSlash2 /* \ _ 0x73 */ 115
262+#define KEY_KP_Equal /* Equal (Keypad) 0x76 */ 118
263+#define KEY_XFER /* Kanji Transfer 0x79 */ 121
264+#define KEY_NFER /* No Kanji Transfer 0x7b */ 123
265+#define KEY_Yen /* Yen 0x7d */ 125
266
267-#define KEY_Power /* Power Key 0x84 */ 132
268-#define KEY_Mute /* Audio Mute 0x85 */ 133
269-#define KEY_AudioLower /* Audio Lower 0x86 */ 134
270-#define KEY_AudioRaise /* Audio Raise 0x87 */ 135
271-#define KEY_Help /* Help 0x88 */ 136
272-#define KEY_L1 /* Stop 0x89 */ 137
273-#define KEY_L2 /* Again 0x8a */ 138
274-#define KEY_L3 /* Props 0x8b */ 139
275-#define KEY_L4 /* Undo 0x8c */ 140
276-#define KEY_L5 /* Front 0x8d */ 141
277-#define KEY_L6 /* Copy 0x8e */ 142
278-#define KEY_L7 /* Open 0x8f */ 143
279-#define KEY_L8 /* Paste 0x90 */ 144
280-#define KEY_L9 /* Find 0x91 */ 145
281-#define KEY_L10 /* Cut 0x92 */ 146
282+#define KEY_Power /* Power Key 0x84 */ 132
283+#define KEY_Mute /* Audio Mute 0x85 */ 133
284+#define KEY_AudioLower /* Audio Lower 0x86 */ 134
285+#define KEY_AudioRaise /* Audio Raise 0x87 */ 135
286+#define KEY_Help /* Help 0x88 */ 136
287+#define KEY_L1 /* Stop 0x89 */ 137
288+#define KEY_L2 /* Again 0x8a */ 138
289+#define KEY_L3 /* Props 0x8b */ 139
290+#define KEY_L4 /* Undo 0x8c */ 140
291+#define KEY_L5 /* Front 0x8d */ 141
292+#define KEY_L6 /* Copy 0x8e */ 142
293+#define KEY_L7 /* Open 0x8f */ 143
294+#define KEY_L8 /* Paste 0x90 */ 144
295+#define KEY_L9 /* Find 0x91 */ 145
296+#define KEY_L10 /* Cut 0x92 */ 146
297
298 /*
299 * Fake 'scancodes' in the following ranges are generated for 2-byte
300@@ -243,39 +243,39 @@
301 * 0x59-0x5f,0x62-0x76. These are used for some extra keys on some keyboards.
302 */
303
304-#define KEY_0x59 0x95
305-#define KEY_0x5A 0xA2
306-#define KEY_0x5B 0xAD
307-#define KEY_0x5C KEY_KP_EQUAL
308-#define KEY_0x5D 0xAE
309-#define KEY_0x5E 0xAF
310-#define KEY_0x5F 0xB0
311-#define KEY_0x62 0xB5
312-#define KEY_0x63 0xB6
313-#define KEY_0x64 0xB7
314-#define KEY_0x65 0xB8
315-#define KEY_0x66 0xB9
316-#define KEY_0x67 0xBE
317-#define KEY_0x68 0xBF
318-#define KEY_0x69 0xC0
319-#define KEY_0x6A 0xC1
320-#define KEY_0x6B 0xC3
321-#define KEY_0x6C 0xC4
322-#define KEY_0x6D 0xC5
323-#define KEY_0x6E 0xC6
324-#define KEY_0x6F 0xC7
325-#define KEY_0x70 0xC8
326-#define KEY_0x71 0xC9
327-#define KEY_0x72 0xCA
328-#define KEY_0x73 0xCB
329-#define KEY_0x74 0xD3
330-#define KEY_0x75 0xD4
331-#define KEY_0x76 0xD5
332-#define KEY_R_0xF4 0xF4
333-#define KEY_R_0xF5 0xF5
334+#define KEY_0x59 0x95
335+#define KEY_0x5A 0xA2
336+#define KEY_0x5B 0xAD
337+#define KEY_0x5C KEY_KP_EQUAL
338+#define KEY_0x5D 0xAE
339+#define KEY_0x5E 0xAF
340+#define KEY_0x5F 0xB0
341+#define KEY_0x62 0xB5
342+#define KEY_0x63 0xB6
343+#define KEY_0x64 0xB7
344+#define KEY_0x65 0xB8
345+#define KEY_0x66 0xB9
346+#define KEY_0x67 0xBE
347+#define KEY_0x68 0xBF
348+#define KEY_0x69 0xC0
349+#define KEY_0x6A 0xC1
350+#define KEY_0x6B 0xC3
351+#define KEY_0x6C 0xC4
352+#define KEY_0x6D 0xC5
353+#define KEY_0x6E 0xC6
354+#define KEY_0x6F 0xC7
355+#define KEY_0x70 0xC8
356+#define KEY_0x71 0xC9
357+#define KEY_0x72 0xCA
358+#define KEY_0x73 0xCB
359+#define KEY_0x74 0xD3
360+#define KEY_0x75 0xD4
361+#define KEY_0x76 0xD5
362+#define KEY_R_0xF4 0xF4
363+#define KEY_R_0xF5 0xF5
364
365 /* These are for "notused" and "unknown" entries in translation maps. */
366-#define KEY_NOTUSED 0
367-#define KEY_UNKNOWN 255
368+#define KEY_NOTUSED 0
369+#define KEY_UNKNOWN 255
370
371 #endif /* _ATKEYNAMES_H */
+458,
-458
1@@ -9,477 +9,477 @@
2 #include <stdint.h>
3
4 static uint8_t wsUsbMap[] = {
5- /* 0 */ KEY_NOTUSED,
6- /* 1 */ KEY_NOTUSED,
7- /* 2 */ KEY_NOTUSED,
8- /* 3 */ KEY_NOTUSED,
9- /* 4 */ KEY_A,
10- /* 5 */ KEY_B,
11- /* 6 */ KEY_C,
12- /* 7 */ KEY_D,
13- /* 8 */ KEY_E,
14- /* 9 */ KEY_F,
15- /* 10 */ KEY_G,
16- /* 11 */ KEY_H,
17- /* 12 */ KEY_I,
18- /* 13 */ KEY_J,
19- /* 14 */ KEY_K,
20- /* 15 */ KEY_L,
21- /* 16 */ KEY_M,
22- /* 17 */ KEY_N,
23- /* 18 */ KEY_O,
24- /* 19 */ KEY_P,
25- /* 20 */ KEY_Q,
26- /* 21 */ KEY_R,
27- /* 22 */ KEY_S,
28- /* 23 */ KEY_T,
29- /* 24 */ KEY_U,
30- /* 25 */ KEY_V,
31- /* 26 */ KEY_W,
32- /* 27 */ KEY_X,
33- /* 28 */ KEY_Y,
34- /* 29 */ KEY_Z,
35- /* 30 */ KEY_1, /* 1 !*/
36- /* 31 */ KEY_2, /* 2 @ */
37- /* 32 */ KEY_3, /* 3 # */
38- /* 33 */ KEY_4, /* 4 $ */
39- /* 34 */ KEY_5, /* 5 % */
40- /* 35 */ KEY_6, /* 6 ^ */
41- /* 36 */ KEY_7, /* 7 & */
42- /* 37 */ KEY_8, /* 8 * */
43- /* 38 */ KEY_9, /* 9 ( */
44- /* 39 */ KEY_0, /* 0 ) */
45- /* 40 */ KEY_Enter, /* Return */
46- /* 41 */ KEY_Escape, /* Escape */
47- /* 42 */ KEY_BackSpace, /* Backspace Delete */
48- /* 43 */ KEY_Tab, /* Tab */
49- /* 44 */ KEY_Space, /* Space */
50- /* 45 */ KEY_Minus, /* - _ */
51- /* 46 */ KEY_Equal, /* = + */
52- /* 47 */ KEY_LBrace, /* [ { */
53- /* 48 */ KEY_RBrace, /* ] } */
54- /* 49 */ KEY_BSlash, /* \ | */
55- /* 50 */ KEY_BSlash, /* \ _ # ~ on some keyboards */
56- /* 51 */ KEY_SemiColon, /* ; : */
57- /* 52 */ KEY_Quote, /* ' " */
58- /* 53 */ KEY_Tilde, /* ` ~ */
59- /* 54 */ KEY_Comma, /* , < */
60- /* 55 */ KEY_Period, /* . > */
61- /* 56 */ KEY_Slash, /* / ? */
62- /* 57 */ KEY_CapsLock, /* Caps Lock */
63- /* 58 */ KEY_F1, /* F1 */
64- /* 59 */ KEY_F2, /* F2 */
65- /* 60 */ KEY_F3, /* F3 */
66- /* 61 */ KEY_F4, /* F4 */
67- /* 62 */ KEY_F5, /* F5 */
68- /* 63 */ KEY_F6, /* F6 */
69- /* 64 */ KEY_F7, /* F7 */
70- /* 65 */ KEY_F8, /* F8 */
71- /* 66 */ KEY_F9, /* F9 */
72- /* 67 */ KEY_F10, /* F10 */
73- /* 68 */ KEY_F11, /* F11 */
74- /* 69 */ KEY_F12, /* F12 */
75- /* 70 */ KEY_Print, /* PrintScrn SysReq */
76- /* 71 */ KEY_ScrollLock, /* Scroll Lock */
77- /* 72 */ KEY_Pause, /* Pause Break */
78- /* 73 */ KEY_Insert, /* Insert XXX Help on some Mac Keyboards */
79- /* 74 */ KEY_Home, /* Home */
80- /* 75 */ KEY_PgUp, /* Page Up */
81- /* 76 */ KEY_Delete, /* Delete */
82- /* 77 */ KEY_End, /* End */
83- /* 78 */ KEY_PgDown, /* Page Down */
84- /* 79 */ KEY_Right, /* Right Arrow */
85- /* 80 */ KEY_Left, /* Left Arrow */
86- /* 81 */ KEY_Down, /* Down Arrow */
87- /* 82 */ KEY_Up, /* Up Arrow */
88- /* 83 */ KEY_NumLock, /* Num Lock */
89- /* 84 */ KEY_KP_Divide, /* Keypad / */
90- /* 85 */ KEY_KP_Multiply, /* Keypad * */
91- /* 86 */ KEY_KP_Minus, /* Keypad - */
92- /* 87 */ KEY_KP_Plus, /* Keypad + */
93- /* 88 */ KEY_KP_Enter, /* Keypad Enter */
94- /* 89 */ KEY_KP_1, /* Keypad 1 End */
95- /* 90 */ KEY_KP_2, /* Keypad 2 Down */
96- /* 91 */ KEY_KP_3, /* Keypad 3 Pg Down */
97- /* 92 */ KEY_KP_4, /* Keypad 4 Left */
98- /* 93 */ KEY_KP_5, /* Keypad 5 */
99- /* 94 */ KEY_KP_6, /* Keypad 6 */
100- /* 95 */ KEY_KP_7, /* Keypad 7 Home */
101- /* 96 */ KEY_KP_8, /* Keypad 8 Up */
102- /* 97 */ KEY_KP_9, /* KEypad 9 Pg Up */
103- /* 98 */ KEY_KP_0, /* Keypad 0 Ins */
104- /* 99 */ KEY_KP_Decimal, /* Keypad . Del */
105- /* 100 */ KEY_Less, /* < > on some keyboards */
106- /* 101 */ KEY_Menu, /* Menu */
107- /* 102 */ KEY_Power, /* sleep key on Sun USB */
108- /* 103 */ KEY_KP_Equal, /* Keypad = on Mac keyboards */
109- /* 104 */ KEY_F13,
110- /* 105 */ KEY_F14,
111- /* 106 */ KEY_F15,
112- /* 107 */ KEY_F16,
113- /* 108 */ KEY_NOTUSED,
114- /* 109 */ KEY_Power,
115- /* 110 */ KEY_NOTUSED,
116- /* 111 */ KEY_NOTUSED,
117- /* 112 */ KEY_NOTUSED,
118- /* 113 */ KEY_NOTUSED,
119- /* 114 */ KEY_NOTUSED,
120- /* 115 */ KEY_NOTUSED,
121- /* 116 */ KEY_L7,
122- /* 117 */ KEY_Help,
123- /* 118 */ KEY_L3,
124- /* 119 */ KEY_L5,
125- /* 120 */ KEY_L1,
126- /* 121 */ KEY_L2,
127- /* 122 */ KEY_L4,
128- /* 123 */ KEY_L10,
129- /* 124 */ KEY_L6,
130- /* 125 */ KEY_L8,
131- /* 126 */ KEY_L9,
132- /* 127 */ KEY_Mute,
133- /* 128 */ KEY_AudioRaise,
134- /* 129 */ KEY_AudioLower,
135- /* 130 */ KEY_NOTUSED,
136- /* 131 */ KEY_NOTUSED,
137- /* 132 */ KEY_NOTUSED,
138- /* 133 */ KEY_NOTUSED,
139- /* 134 */ KEY_NOTUSED,
140+ /* 0 */ KEY_NOTUSED,
141+ /* 1 */ KEY_NOTUSED,
142+ /* 2 */ KEY_NOTUSED,
143+ /* 3 */ KEY_NOTUSED,
144+ /* 4 */ KEY_A,
145+ /* 5 */ KEY_B,
146+ /* 6 */ KEY_C,
147+ /* 7 */ KEY_D,
148+ /* 8 */ KEY_E,
149+ /* 9 */ KEY_F,
150+ /* 10 */ KEY_G,
151+ /* 11 */ KEY_H,
152+ /* 12 */ KEY_I,
153+ /* 13 */ KEY_J,
154+ /* 14 */ KEY_K,
155+ /* 15 */ KEY_L,
156+ /* 16 */ KEY_M,
157+ /* 17 */ KEY_N,
158+ /* 18 */ KEY_O,
159+ /* 19 */ KEY_P,
160+ /* 20 */ KEY_Q,
161+ /* 21 */ KEY_R,
162+ /* 22 */ KEY_S,
163+ /* 23 */ KEY_T,
164+ /* 24 */ KEY_U,
165+ /* 25 */ KEY_V,
166+ /* 26 */ KEY_W,
167+ /* 27 */ KEY_X,
168+ /* 28 */ KEY_Y,
169+ /* 29 */ KEY_Z,
170+ /* 30 */ KEY_1, /* 1 !*/
171+ /* 31 */ KEY_2, /* 2 @ */
172+ /* 32 */ KEY_3, /* 3 # */
173+ /* 33 */ KEY_4, /* 4 $ */
174+ /* 34 */ KEY_5, /* 5 % */
175+ /* 35 */ KEY_6, /* 6 ^ */
176+ /* 36 */ KEY_7, /* 7 & */
177+ /* 37 */ KEY_8, /* 8 * */
178+ /* 38 */ KEY_9, /* 9 ( */
179+ /* 39 */ KEY_0, /* 0 ) */
180+ /* 40 */ KEY_Enter, /* Return */
181+ /* 41 */ KEY_Escape, /* Escape */
182+ /* 42 */ KEY_BackSpace, /* Backspace Delete */
183+ /* 43 */ KEY_Tab, /* Tab */
184+ /* 44 */ KEY_Space, /* Space */
185+ /* 45 */ KEY_Minus, /* - _ */
186+ /* 46 */ KEY_Equal, /* = + */
187+ /* 47 */ KEY_LBrace, /* [ { */
188+ /* 48 */ KEY_RBrace, /* ] } */
189+ /* 49 */ KEY_BSlash, /* \ | */
190+ /* 50 */ KEY_BSlash, /* \ _ # ~ on some keyboards */
191+ /* 51 */ KEY_SemiColon, /* ; : */
192+ /* 52 */ KEY_Quote, /* ' " */
193+ /* 53 */ KEY_Tilde, /* ` ~ */
194+ /* 54 */ KEY_Comma, /* , < */
195+ /* 55 */ KEY_Period, /* . > */
196+ /* 56 */ KEY_Slash, /* / ? */
197+ /* 57 */ KEY_CapsLock, /* Caps Lock */
198+ /* 58 */ KEY_F1, /* F1 */
199+ /* 59 */ KEY_F2, /* F2 */
200+ /* 60 */ KEY_F3, /* F3 */
201+ /* 61 */ KEY_F4, /* F4 */
202+ /* 62 */ KEY_F5, /* F5 */
203+ /* 63 */ KEY_F6, /* F6 */
204+ /* 64 */ KEY_F7, /* F7 */
205+ /* 65 */ KEY_F8, /* F8 */
206+ /* 66 */ KEY_F9, /* F9 */
207+ /* 67 */ KEY_F10, /* F10 */
208+ /* 68 */ KEY_F11, /* F11 */
209+ /* 69 */ KEY_F12, /* F12 */
210+ /* 70 */ KEY_Print, /* PrintScrn SysReq */
211+ /* 71 */ KEY_ScrollLock, /* Scroll Lock */
212+ /* 72 */ KEY_Pause, /* Pause Break */
213+ /* 73 */ KEY_Insert, /* Insert XXX Help on some Mac Keyboards */
214+ /* 74 */ KEY_Home, /* Home */
215+ /* 75 */ KEY_PgUp, /* Page Up */
216+ /* 76 */ KEY_Delete, /* Delete */
217+ /* 77 */ KEY_End, /* End */
218+ /* 78 */ KEY_PgDown, /* Page Down */
219+ /* 79 */ KEY_Right, /* Right Arrow */
220+ /* 80 */ KEY_Left, /* Left Arrow */
221+ /* 81 */ KEY_Down, /* Down Arrow */
222+ /* 82 */ KEY_Up, /* Up Arrow */
223+ /* 83 */ KEY_NumLock, /* Num Lock */
224+ /* 84 */ KEY_KP_Divide, /* Keypad / */
225+ /* 85 */ KEY_KP_Multiply, /* Keypad * */
226+ /* 86 */ KEY_KP_Minus, /* Keypad - */
227+ /* 87 */ KEY_KP_Plus, /* Keypad + */
228+ /* 88 */ KEY_KP_Enter, /* Keypad Enter */
229+ /* 89 */ KEY_KP_1, /* Keypad 1 End */
230+ /* 90 */ KEY_KP_2, /* Keypad 2 Down */
231+ /* 91 */ KEY_KP_3, /* Keypad 3 Pg Down */
232+ /* 92 */ KEY_KP_4, /* Keypad 4 Left */
233+ /* 93 */ KEY_KP_5, /* Keypad 5 */
234+ /* 94 */ KEY_KP_6, /* Keypad 6 */
235+ /* 95 */ KEY_KP_7, /* Keypad 7 Home */
236+ /* 96 */ KEY_KP_8, /* Keypad 8 Up */
237+ /* 97 */ KEY_KP_9, /* KEypad 9 Pg Up */
238+ /* 98 */ KEY_KP_0, /* Keypad 0 Ins */
239+ /* 99 */ KEY_KP_Decimal, /* Keypad . Del */
240+ /* 100 */ KEY_Less, /* < > on some keyboards */
241+ /* 101 */ KEY_Menu, /* Menu */
242+ /* 102 */ KEY_Power, /* sleep key on Sun USB */
243+ /* 103 */ KEY_KP_Equal, /* Keypad = on Mac keyboards */
244+ /* 104 */ KEY_F13,
245+ /* 105 */ KEY_F14,
246+ /* 106 */ KEY_F15,
247+ /* 107 */ KEY_F16,
248+ /* 108 */ KEY_NOTUSED,
249+ /* 109 */ KEY_Power,
250+ /* 110 */ KEY_NOTUSED,
251+ /* 111 */ KEY_NOTUSED,
252+ /* 112 */ KEY_NOTUSED,
253+ /* 113 */ KEY_NOTUSED,
254+ /* 114 */ KEY_NOTUSED,
255+ /* 115 */ KEY_NOTUSED,
256+ /* 116 */ KEY_L7,
257+ /* 117 */ KEY_Help,
258+ /* 118 */ KEY_L3,
259+ /* 119 */ KEY_L5,
260+ /* 120 */ KEY_L1,
261+ /* 121 */ KEY_L2,
262+ /* 122 */ KEY_L4,
263+ /* 123 */ KEY_L10,
264+ /* 124 */ KEY_L6,
265+ /* 125 */ KEY_L8,
266+ /* 126 */ KEY_L9,
267+ /* 127 */ KEY_Mute,
268+ /* 128 */ KEY_AudioRaise,
269+ /* 129 */ KEY_AudioLower,
270+ /* 130 */ KEY_NOTUSED,
271+ /* 131 */ KEY_NOTUSED,
272+ /* 132 */ KEY_NOTUSED,
273+ /* 133 */ KEY_NOTUSED,
274+ /* 134 */ KEY_NOTUSED,
275 /*
276 * Special keycodes for Japanese keyboards
277 * Override atKeyname HKTG and BSlash2 code to unique values for JP106 keyboards
278 */
279 #undef KEY_HKTG
280-#define KEY_HKTG 200 /* Japanese Hiragana Katakana Toggle */
281+#define KEY_HKTG 200 /* Japanese Hiragana Katakana Toggle */
282 #undef KEY_BSlash2
283-#define KEY_BSlash2 203 /* Japanese '\_' key */
284+#define KEY_BSlash2 203 /* Japanese '\_' key */
285
286- /* 135 */ KEY_BSlash2, /* Japanese 106 kbd: '\_' */
287- /* 136 */ KEY_HKTG, /* Japanese 106 kbd: Hiragana Katakana toggle */
288- /* 137 */ KEY_Yen, /* Japanese 106 kbd: '\|' */
289- /* 138 */ KEY_XFER, /* Japanese 106 kbd: Henkan */
290- /* 139 */ KEY_NFER, /* Japanese 106 kbd: Muhenkan */
291- /* 140 */ KEY_NOTUSED,
292- /* 141 */ KEY_NOTUSED,
293- /* 142 */ KEY_NOTUSED,
294- /* 143 */ KEY_NOTUSED,
295+ /* 135 */ KEY_BSlash2, /* Japanese 106 kbd: '\_' */
296+ /* 136 */ KEY_HKTG, /* Japanese 106 kbd: Hiragana Katakana toggle */
297+ /* 137 */ KEY_Yen, /* Japanese 106 kbd: '\|' */
298+ /* 138 */ KEY_XFER, /* Japanese 106 kbd: Henkan */
299+ /* 139 */ KEY_NFER, /* Japanese 106 kbd: Muhenkan */
300+ /* 140 */ KEY_NOTUSED,
301+ /* 141 */ KEY_NOTUSED,
302+ /* 142 */ KEY_NOTUSED,
303+ /* 143 */ KEY_NOTUSED,
304 /*
305 * Special keycodes for Korean keyboards
306 * Define Hangul and Hangul_Hanja unique key codes
307 * These keys also use KANA and EISU on some Macintosh Japanese USB keyboards
308 */
309-#define KEY_Hangul 201 /* Also KANA Key on Mac JP USB kbd */
310-#define KEY_Hangul_Hanja 202 /* Also EISU Key on Mac JP USB kbd */
311- /* 144 */ KEY_Hangul, /* Korean 106 kbd: Hangul */
312- /* 145 */ KEY_Hangul_Hanja, /* Korean 106 kbd: Hangul Hanja */
313- /* 146 */ KEY_NOTUSED,
314- /* 147 */ KEY_NOTUSED,
315- /* 148 */ KEY_NOTUSED,
316- /* 149 */ KEY_NOTUSED,
317- /* 150 */ KEY_NOTUSED,
318- /* 151 */ KEY_NOTUSED,
319- /* 152 */ KEY_NOTUSED,
320- /* 153 */ KEY_NOTUSED,
321- /* 154 */ KEY_NOTUSED,
322- /* 155 */ KEY_NOTUSED,
323- /* 156 */ KEY_NOTUSED,
324- /* 157 */ KEY_NOTUSED,
325- /* 158 */ KEY_NOTUSED,
326- /* 159 */ KEY_NOTUSED,
327- /* 160 */ KEY_NOTUSED,
328- /* 161 */ KEY_NOTUSED,
329- /* 162 */ KEY_NOTUSED,
330- /* 163 */ KEY_NOTUSED,
331- /* 164 */ KEY_NOTUSED,
332- /* 165 */ KEY_NOTUSED,
333- /* 166 */ KEY_NOTUSED,
334- /* 167 */ KEY_NOTUSED,
335- /* 168 */ KEY_NOTUSED,
336- /* 169 */ KEY_NOTUSED,
337- /* 170 */ KEY_NOTUSED,
338- /* 171 */ KEY_NOTUSED,
339- /* 172 */ KEY_NOTUSED,
340- /* 173 */ KEY_NOTUSED,
341- /* 174 */ KEY_NOTUSED,
342- /* 175 */ KEY_NOTUSED,
343- /* 176 */ KEY_NOTUSED,
344- /* 177 */ KEY_NOTUSED,
345- /* 178 */ KEY_NOTUSED,
346- /* 179 */ KEY_NOTUSED,
347- /* 180 */ KEY_NOTUSED,
348- /* 181 */ KEY_NOTUSED,
349- /* 182 */ KEY_NOTUSED,
350- /* 183 */ KEY_NOTUSED,
351- /* 184 */ KEY_NOTUSED,
352- /* 185 */ KEY_NOTUSED,
353- /* 186 */ KEY_NOTUSED,
354- /* 187 */ KEY_NOTUSED,
355- /* 188 */ KEY_NOTUSED,
356- /* 189 */ KEY_NOTUSED,
357- /* 190 */ KEY_NOTUSED,
358- /* 191 */ KEY_NOTUSED,
359- /* 192 */ KEY_NOTUSED,
360- /* 193 */ KEY_NOTUSED,
361- /* 194 */ KEY_NOTUSED,
362- /* 195 */ KEY_NOTUSED,
363- /* 196 */ KEY_NOTUSED,
364- /* 197 */ KEY_NOTUSED,
365- /* 198 */ KEY_NOTUSED,
366- /* 199 */ KEY_NOTUSED,
367- /* 200 */ KEY_NOTUSED,
368- /* 201 */ KEY_NOTUSED,
369- /* 202 */ KEY_NOTUSED,
370- /* 203 */ KEY_NOTUSED,
371- /* 204 */ KEY_NOTUSED,
372- /* 205 */ KEY_NOTUSED,
373- /* 206 */ KEY_NOTUSED,
374- /* 207 */ KEY_NOTUSED,
375- /* 208 */ KEY_NOTUSED,
376- /* 209 */ KEY_NOTUSED,
377- /* 210 */ KEY_NOTUSED,
378- /* 211 */ KEY_NOTUSED,
379- /* 212 */ KEY_NOTUSED,
380- /* 213 */ KEY_NOTUSED,
381- /* 214 */ KEY_NOTUSED,
382- /* 215 */ KEY_NOTUSED,
383- /* 216 */ KEY_NOTUSED,
384- /* 217 */ KEY_NOTUSED,
385- /* 218 */ KEY_NOTUSED,
386- /* 219 */ KEY_NOTUSED,
387- /* 220 */ KEY_NOTUSED,
388- /* 221 */ KEY_NOTUSED,
389- /* 222 */ KEY_NOTUSED,
390- /* 223 */ KEY_NOTUSED,
391- /* 224 */ KEY_LCtrl, /* Left Control */
392- /* 225 */ KEY_ShiftL, /* Left Shift */
393- /* 226 */ KEY_Alt, /* Left Alt */
394- /* 227 */ KEY_LMeta, /* Left Meta */
395- /* 228 */ KEY_RCtrl, /* Right Control */
396- /* 229 */ KEY_ShiftR, /* Right Shift */
397- /* 230 */ KEY_AltLang, /* Right Alt, AKA AltGr */
398- /* 231 */ KEY_LMeta, /* Right Meta XXX */
399+#define KEY_Hangul 201 /* Also KANA Key on Mac JP USB kbd */
400+#define KEY_Hangul_Hanja 202 /* Also EISU Key on Mac JP USB kbd */
401+ /* 144 */ KEY_Hangul, /* Korean 106 kbd: Hangul */
402+ /* 145 */ KEY_Hangul_Hanja, /* Korean 106 kbd: Hangul Hanja */
403+ /* 146 */ KEY_NOTUSED,
404+ /* 147 */ KEY_NOTUSED,
405+ /* 148 */ KEY_NOTUSED,
406+ /* 149 */ KEY_NOTUSED,
407+ /* 150 */ KEY_NOTUSED,
408+ /* 151 */ KEY_NOTUSED,
409+ /* 152 */ KEY_NOTUSED,
410+ /* 153 */ KEY_NOTUSED,
411+ /* 154 */ KEY_NOTUSED,
412+ /* 155 */ KEY_NOTUSED,
413+ /* 156 */ KEY_NOTUSED,
414+ /* 157 */ KEY_NOTUSED,
415+ /* 158 */ KEY_NOTUSED,
416+ /* 159 */ KEY_NOTUSED,
417+ /* 160 */ KEY_NOTUSED,
418+ /* 161 */ KEY_NOTUSED,
419+ /* 162 */ KEY_NOTUSED,
420+ /* 163 */ KEY_NOTUSED,
421+ /* 164 */ KEY_NOTUSED,
422+ /* 165 */ KEY_NOTUSED,
423+ /* 166 */ KEY_NOTUSED,
424+ /* 167 */ KEY_NOTUSED,
425+ /* 168 */ KEY_NOTUSED,
426+ /* 169 */ KEY_NOTUSED,
427+ /* 170 */ KEY_NOTUSED,
428+ /* 171 */ KEY_NOTUSED,
429+ /* 172 */ KEY_NOTUSED,
430+ /* 173 */ KEY_NOTUSED,
431+ /* 174 */ KEY_NOTUSED,
432+ /* 175 */ KEY_NOTUSED,
433+ /* 176 */ KEY_NOTUSED,
434+ /* 177 */ KEY_NOTUSED,
435+ /* 178 */ KEY_NOTUSED,
436+ /* 179 */ KEY_NOTUSED,
437+ /* 180 */ KEY_NOTUSED,
438+ /* 181 */ KEY_NOTUSED,
439+ /* 182 */ KEY_NOTUSED,
440+ /* 183 */ KEY_NOTUSED,
441+ /* 184 */ KEY_NOTUSED,
442+ /* 185 */ KEY_NOTUSED,
443+ /* 186 */ KEY_NOTUSED,
444+ /* 187 */ KEY_NOTUSED,
445+ /* 188 */ KEY_NOTUSED,
446+ /* 189 */ KEY_NOTUSED,
447+ /* 190 */ KEY_NOTUSED,
448+ /* 191 */ KEY_NOTUSED,
449+ /* 192 */ KEY_NOTUSED,
450+ /* 193 */ KEY_NOTUSED,
451+ /* 194 */ KEY_NOTUSED,
452+ /* 195 */ KEY_NOTUSED,
453+ /* 196 */ KEY_NOTUSED,
454+ /* 197 */ KEY_NOTUSED,
455+ /* 198 */ KEY_NOTUSED,
456+ /* 199 */ KEY_NOTUSED,
457+ /* 200 */ KEY_NOTUSED,
458+ /* 201 */ KEY_NOTUSED,
459+ /* 202 */ KEY_NOTUSED,
460+ /* 203 */ KEY_NOTUSED,
461+ /* 204 */ KEY_NOTUSED,
462+ /* 205 */ KEY_NOTUSED,
463+ /* 206 */ KEY_NOTUSED,
464+ /* 207 */ KEY_NOTUSED,
465+ /* 208 */ KEY_NOTUSED,
466+ /* 209 */ KEY_NOTUSED,
467+ /* 210 */ KEY_NOTUSED,
468+ /* 211 */ KEY_NOTUSED,
469+ /* 212 */ KEY_NOTUSED,
470+ /* 213 */ KEY_NOTUSED,
471+ /* 214 */ KEY_NOTUSED,
472+ /* 215 */ KEY_NOTUSED,
473+ /* 216 */ KEY_NOTUSED,
474+ /* 217 */ KEY_NOTUSED,
475+ /* 218 */ KEY_NOTUSED,
476+ /* 219 */ KEY_NOTUSED,
477+ /* 220 */ KEY_NOTUSED,
478+ /* 221 */ KEY_NOTUSED,
479+ /* 222 */ KEY_NOTUSED,
480+ /* 223 */ KEY_NOTUSED,
481+ /* 224 */ KEY_LCtrl, /* Left Control */
482+ /* 225 */ KEY_ShiftL, /* Left Shift */
483+ /* 226 */ KEY_Alt, /* Left Alt */
484+ /* 227 */ KEY_LMeta, /* Left Meta */
485+ /* 228 */ KEY_RCtrl, /* Right Control */
486+ /* 229 */ KEY_ShiftR, /* Right Shift */
487+ /* 230 */ KEY_AltLang, /* Right Alt, AKA AltGr */
488+ /* 231 */ KEY_LMeta, /* Right Meta XXX */
489 };
490
491 static uint8_t wsXtMap[] = {
492- /* 0 */ KEY_NOTUSED,
493- /* 1 */ KEY_Escape,
494- /* 2 */ KEY_1,
495- /* 3 */ KEY_2,
496- /* 4 */ KEY_3,
497- /* 5 */ KEY_4,
498- /* 6 */ KEY_5,
499- /* 7 */ KEY_6,
500- /* 8 */ KEY_7,
501- /* 9 */ KEY_8,
502- /* 10 */ KEY_9,
503- /* 11 */ KEY_0,
504- /* 12 */ KEY_Minus,
505- /* 13 */ KEY_Equal,
506- /* 14 */ KEY_BackSpace,
507- /* 15 */ KEY_Tab,
508- /* 16 */ KEY_Q,
509- /* 17 */ KEY_W,
510- /* 18 */ KEY_E,
511- /* 19 */ KEY_R,
512- /* 20 */ KEY_T,
513- /* 21 */ KEY_Y,
514- /* 22 */ KEY_U,
515- /* 23 */ KEY_I,
516- /* 24 */ KEY_O,
517- /* 25 */ KEY_P,
518- /* 26 */ KEY_LBrace,
519- /* 27 */ KEY_RBrace,
520- /* 28 */ KEY_Enter,
521- /* 29 */ KEY_LCtrl,
522- /* 30 */ KEY_A,
523- /* 31 */ KEY_S,
524- /* 32 */ KEY_D,
525- /* 33 */ KEY_F,
526- /* 34 */ KEY_G,
527- /* 35 */ KEY_H,
528- /* 36 */ KEY_J,
529- /* 37 */ KEY_K,
530- /* 38 */ KEY_L,
531- /* 39 */ KEY_SemiColon,
532- /* 40 */ KEY_Quote,
533- /* 41 */ KEY_Tilde,
534- /* 42 */ KEY_ShiftL,
535- /* 43 */ KEY_BSlash,
536- /* 44 */ KEY_Z,
537- /* 45 */ KEY_X,
538- /* 46 */ KEY_C,
539- /* 47 */ KEY_V,
540- /* 48 */ KEY_B,
541- /* 49 */ KEY_N,
542- /* 50 */ KEY_M,
543- /* 51 */ KEY_Comma,
544- /* 52 */ KEY_Period,
545- /* 53 */ KEY_Slash,
546- /* 54 */ KEY_ShiftR,
547- /* 55 */ KEY_KP_Multiply,
548- /* 56 */ KEY_Alt,
549- /* 57 */ KEY_Space,
550- /* 58 */ KEY_CapsLock,
551- /* 59 */ KEY_F1,
552- /* 60 */ KEY_F2,
553- /* 61 */ KEY_F3,
554- /* 62 */ KEY_F4,
555- /* 63 */ KEY_F5,
556- /* 64 */ KEY_F6,
557- /* 65 */ KEY_F7,
558- /* 66 */ KEY_F8,
559- /* 67 */ KEY_F9,
560- /* 68 */ KEY_F10,
561- /* 69 */ KEY_NumLock,
562- /* 70 */ KEY_ScrollLock,
563- /* 71 */ KEY_KP_7,
564- /* 72 */ KEY_KP_8,
565- /* 73 */ KEY_KP_9,
566- /* 74 */ KEY_KP_Minus,
567- /* 75 */ KEY_KP_4,
568- /* 76 */ KEY_KP_5,
569- /* 77 */ KEY_KP_6,
570- /* 78 */ KEY_KP_Plus,
571- /* 79 */ KEY_KP_1,
572- /* 80 */ KEY_KP_2,
573- /* 81 */ KEY_KP_3,
574- /* 82 */ KEY_KP_0,
575- /* 83 */ KEY_KP_Decimal,
576- /* 84 */ KEY_NOTUSED,
577- /* 85 */ KEY_NOTUSED,
578- /* 86 */ KEY_Less, /* backslash on uk, < on german */
579- /* 87 */ KEY_F11,
580- /* 88 */ KEY_F12,
581- /* 89 */ KEY_NOTUSED,
582- /* 90 */ KEY_NOTUSED,
583- /* 91 */ KEY_NOTUSED,
584- /* 92 */ KEY_NOTUSED,
585- /* 93 */ KEY_NOTUSED,
586- /* 94 */ KEY_NOTUSED,
587- /* 95 */ KEY_NOTUSED,
588- /* 96 */ KEY_NOTUSED,
589- /* 97 */ KEY_NOTUSED,
590- /* 98 */ KEY_NOTUSED,
591- /* 99 */ KEY_NOTUSED,
592- /* 100 */ KEY_NOTUSED,
593- /* 101 */ KEY_NOTUSED,
594- /* 102 */ KEY_NOTUSED,
595- /* 103 */ KEY_NOTUSED,
596- /* 104 */ KEY_NOTUSED,
597- /* 105 */ KEY_NOTUSED,
598- /* 106 */ KEY_NOTUSED,
599- /* 107 */ KEY_NOTUSED,
600- /* 108 */ KEY_NOTUSED,
601- /* 109 */ KEY_NOTUSED,
602- /* 110 */ KEY_NOTUSED,
603- /* 111 */ KEY_NOTUSED,
604- /* 112 */ KEY_NOTUSED,
605- /* 113 */ KEY_NOTUSED,
606- /* 114 */ KEY_NOTUSED,
607- /* 115 */ KEY_NOTUSED,
608- /* 116 */ KEY_NOTUSED,
609- /* 117 */ KEY_NOTUSED,
610- /* 118 */ KEY_NOTUSED,
611- /* 119 */ KEY_NOTUSED,
612- /* 120 */ KEY_NOTUSED,
613- /* 121 */ KEY_NOTUSED,
614- /* 122 */ KEY_NOTUSED,
615- /* 123 */ KEY_NOTUSED,
616- /* 124 */ KEY_NOTUSED,
617- /* 125 */ KEY_NOTUSED,
618- /* 126 */ KEY_NOTUSED,
619- /* 127 */ KEY_Pause,
620- /* 128 */ KEY_NOTUSED,
621- /* 129 */ KEY_NOTUSED,
622- /* 130 */ KEY_NOTUSED,
623- /* 131 */ KEY_NOTUSED,
624- /* 132 */ KEY_NOTUSED,
625- /* 133 */ KEY_NOTUSED,
626- /* 134 */ KEY_NOTUSED,
627- /* 135 */ KEY_NOTUSED,
628- /* 136 */ KEY_NOTUSED,
629- /* 137 */ KEY_NOTUSED,
630- /* 138 */ KEY_NOTUSED,
631- /* 139 */ KEY_NOTUSED,
632- /* 140 */ KEY_NOTUSED,
633- /* 141 */ KEY_NOTUSED,
634- /* 142 */ KEY_NOTUSED,
635- /* 143 */ KEY_NOTUSED,
636- /* 144 */ KEY_NOTUSED,
637- /* 145 */ KEY_NOTUSED,
638- /* 146 */ KEY_NOTUSED,
639- /* 147 */ KEY_NOTUSED,
640- /* 148 */ KEY_NOTUSED,
641- /* 149 */ KEY_NOTUSED,
642- /* 150 */ KEY_NOTUSED,
643- /* 151 */ KEY_NOTUSED,
644- /* 152 */ KEY_NOTUSED,
645- /* 153 */ KEY_NOTUSED,
646- /* 154 */ KEY_NOTUSED,
647- /* 155 */ KEY_NOTUSED,
648- /* 156 */ KEY_KP_Enter,
649- /* 157 */ KEY_RCtrl,
650- /* 158 */ KEY_NOTUSED,
651- /* 159 */ KEY_NOTUSED,
652- /* 160 */ KEY_Mute,
653- /* 161 */ KEY_NOTUSED,
654- /* 162 */ KEY_NOTUSED,
655- /* 163 */ KEY_NOTUSED,
656- /* 164 */ KEY_NOTUSED,
657- /* 165 */ KEY_NOTUSED,
658- /* 166 */ KEY_NOTUSED,
659- /* 167 */ KEY_NOTUSED,
660- /* 168 */ KEY_NOTUSED,
661- /* 169 */ KEY_NOTUSED,
662- /* 170 */ KEY_Print,
663- /* 171 */ KEY_NOTUSED,
664- /* 172 */ KEY_NOTUSED,
665- /* 173 */ KEY_NOTUSED,
666- /* 174 */ KEY_AudioLower,
667- /* 175 */ KEY_AudioRaise,
668- /* 176 */ KEY_NOTUSED,
669- /* 177 */ KEY_NOTUSED,
670- /* 178 */ KEY_NOTUSED,
671- /* 179 */ KEY_NOTUSED,
672- /* 180 */ KEY_NOTUSED,
673- /* 181 */ KEY_KP_Divide,
674- /* 182 */ KEY_NOTUSED,
675- /* 183 */ KEY_Print,
676- /* 184 */ KEY_AltLang,
677- /* 185 */ KEY_NOTUSED,
678- /* 186 */ KEY_NOTUSED,
679- /* 187 */ KEY_NOTUSED,
680- /* 188 */ KEY_NOTUSED,
681- /* 189 */ KEY_NOTUSED,
682- /* 190 */ KEY_NOTUSED,
683- /* 191 */ KEY_NOTUSED,
684- /* 192 */ KEY_NOTUSED,
685- /* 193 */ KEY_NOTUSED,
686- /* 194 */ KEY_NOTUSED,
687- /* 195 */ KEY_NOTUSED,
688- /* 196 */ KEY_NOTUSED,
689- /* 197 */ KEY_NOTUSED,
690- /* 198 */ KEY_NOTUSED,
691- /* 199 */ KEY_Home,
692- /* 200 */ KEY_Up,
693- /* 201 */ KEY_PgUp,
694- /* 202 */ KEY_NOTUSED,
695- /* 203 */ KEY_Left,
696- /* 204 */ KEY_NOTUSED,
697- /* 205 */ KEY_Right,
698- /* 206 */ KEY_NOTUSED,
699- /* 207 */ KEY_End,
700- /* 208 */ KEY_Down,
701- /* 209 */ KEY_PgDown,
702- /* 210 */ KEY_Insert,
703- /* 211 */ KEY_Delete,
704- /* 212 */ KEY_NOTUSED,
705- /* 213 */ KEY_NOTUSED,
706- /* 214 */ KEY_NOTUSED,
707- /* 215 */ KEY_NOTUSED,
708- /* 216 */ KEY_NOTUSED,
709- /* 217 */ KEY_NOTUSED,
710- /* 218 */ KEY_NOTUSED,
711- /* 219 */ KEY_LMeta,
712- /* 220 */ KEY_RMeta,
713- /* 221 */ KEY_Menu,
714+ /* 0 */ KEY_NOTUSED,
715+ /* 1 */ KEY_Escape,
716+ /* 2 */ KEY_1,
717+ /* 3 */ KEY_2,
718+ /* 4 */ KEY_3,
719+ /* 5 */ KEY_4,
720+ /* 6 */ KEY_5,
721+ /* 7 */ KEY_6,
722+ /* 8 */ KEY_7,
723+ /* 9 */ KEY_8,
724+ /* 10 */ KEY_9,
725+ /* 11 */ KEY_0,
726+ /* 12 */ KEY_Minus,
727+ /* 13 */ KEY_Equal,
728+ /* 14 */ KEY_BackSpace,
729+ /* 15 */ KEY_Tab,
730+ /* 16 */ KEY_Q,
731+ /* 17 */ KEY_W,
732+ /* 18 */ KEY_E,
733+ /* 19 */ KEY_R,
734+ /* 20 */ KEY_T,
735+ /* 21 */ KEY_Y,
736+ /* 22 */ KEY_U,
737+ /* 23 */ KEY_I,
738+ /* 24 */ KEY_O,
739+ /* 25 */ KEY_P,
740+ /* 26 */ KEY_LBrace,
741+ /* 27 */ KEY_RBrace,
742+ /* 28 */ KEY_Enter,
743+ /* 29 */ KEY_LCtrl,
744+ /* 30 */ KEY_A,
745+ /* 31 */ KEY_S,
746+ /* 32 */ KEY_D,
747+ /* 33 */ KEY_F,
748+ /* 34 */ KEY_G,
749+ /* 35 */ KEY_H,
750+ /* 36 */ KEY_J,
751+ /* 37 */ KEY_K,
752+ /* 38 */ KEY_L,
753+ /* 39 */ KEY_SemiColon,
754+ /* 40 */ KEY_Quote,
755+ /* 41 */ KEY_Tilde,
756+ /* 42 */ KEY_ShiftL,
757+ /* 43 */ KEY_BSlash,
758+ /* 44 */ KEY_Z,
759+ /* 45 */ KEY_X,
760+ /* 46 */ KEY_C,
761+ /* 47 */ KEY_V,
762+ /* 48 */ KEY_B,
763+ /* 49 */ KEY_N,
764+ /* 50 */ KEY_M,
765+ /* 51 */ KEY_Comma,
766+ /* 52 */ KEY_Period,
767+ /* 53 */ KEY_Slash,
768+ /* 54 */ KEY_ShiftR,
769+ /* 55 */ KEY_KP_Multiply,
770+ /* 56 */ KEY_Alt,
771+ /* 57 */ KEY_Space,
772+ /* 58 */ KEY_CapsLock,
773+ /* 59 */ KEY_F1,
774+ /* 60 */ KEY_F2,
775+ /* 61 */ KEY_F3,
776+ /* 62 */ KEY_F4,
777+ /* 63 */ KEY_F5,
778+ /* 64 */ KEY_F6,
779+ /* 65 */ KEY_F7,
780+ /* 66 */ KEY_F8,
781+ /* 67 */ KEY_F9,
782+ /* 68 */ KEY_F10,
783+ /* 69 */ KEY_NumLock,
784+ /* 70 */ KEY_ScrollLock,
785+ /* 71 */ KEY_KP_7,
786+ /* 72 */ KEY_KP_8,
787+ /* 73 */ KEY_KP_9,
788+ /* 74 */ KEY_KP_Minus,
789+ /* 75 */ KEY_KP_4,
790+ /* 76 */ KEY_KP_5,
791+ /* 77 */ KEY_KP_6,
792+ /* 78 */ KEY_KP_Plus,
793+ /* 79 */ KEY_KP_1,
794+ /* 80 */ KEY_KP_2,
795+ /* 81 */ KEY_KP_3,
796+ /* 82 */ KEY_KP_0,
797+ /* 83 */ KEY_KP_Decimal,
798+ /* 84 */ KEY_NOTUSED,
799+ /* 85 */ KEY_NOTUSED,
800+ /* 86 */ KEY_Less, /* backslash on uk, < on german */
801+ /* 87 */ KEY_F11,
802+ /* 88 */ KEY_F12,
803+ /* 89 */ KEY_NOTUSED,
804+ /* 90 */ KEY_NOTUSED,
805+ /* 91 */ KEY_NOTUSED,
806+ /* 92 */ KEY_NOTUSED,
807+ /* 93 */ KEY_NOTUSED,
808+ /* 94 */ KEY_NOTUSED,
809+ /* 95 */ KEY_NOTUSED,
810+ /* 96 */ KEY_NOTUSED,
811+ /* 97 */ KEY_NOTUSED,
812+ /* 98 */ KEY_NOTUSED,
813+ /* 99 */ KEY_NOTUSED,
814+ /* 100 */ KEY_NOTUSED,
815+ /* 101 */ KEY_NOTUSED,
816+ /* 102 */ KEY_NOTUSED,
817+ /* 103 */ KEY_NOTUSED,
818+ /* 104 */ KEY_NOTUSED,
819+ /* 105 */ KEY_NOTUSED,
820+ /* 106 */ KEY_NOTUSED,
821+ /* 107 */ KEY_NOTUSED,
822+ /* 108 */ KEY_NOTUSED,
823+ /* 109 */ KEY_NOTUSED,
824+ /* 110 */ KEY_NOTUSED,
825+ /* 111 */ KEY_NOTUSED,
826+ /* 112 */ KEY_NOTUSED,
827+ /* 113 */ KEY_NOTUSED,
828+ /* 114 */ KEY_NOTUSED,
829+ /* 115 */ KEY_NOTUSED,
830+ /* 116 */ KEY_NOTUSED,
831+ /* 117 */ KEY_NOTUSED,
832+ /* 118 */ KEY_NOTUSED,
833+ /* 119 */ KEY_NOTUSED,
834+ /* 120 */ KEY_NOTUSED,
835+ /* 121 */ KEY_NOTUSED,
836+ /* 122 */ KEY_NOTUSED,
837+ /* 123 */ KEY_NOTUSED,
838+ /* 124 */ KEY_NOTUSED,
839+ /* 125 */ KEY_NOTUSED,
840+ /* 126 */ KEY_NOTUSED,
841+ /* 127 */ KEY_Pause,
842+ /* 128 */ KEY_NOTUSED,
843+ /* 129 */ KEY_NOTUSED,
844+ /* 130 */ KEY_NOTUSED,
845+ /* 131 */ KEY_NOTUSED,
846+ /* 132 */ KEY_NOTUSED,
847+ /* 133 */ KEY_NOTUSED,
848+ /* 134 */ KEY_NOTUSED,
849+ /* 135 */ KEY_NOTUSED,
850+ /* 136 */ KEY_NOTUSED,
851+ /* 137 */ KEY_NOTUSED,
852+ /* 138 */ KEY_NOTUSED,
853+ /* 139 */ KEY_NOTUSED,
854+ /* 140 */ KEY_NOTUSED,
855+ /* 141 */ KEY_NOTUSED,
856+ /* 142 */ KEY_NOTUSED,
857+ /* 143 */ KEY_NOTUSED,
858+ /* 144 */ KEY_NOTUSED,
859+ /* 145 */ KEY_NOTUSED,
860+ /* 146 */ KEY_NOTUSED,
861+ /* 147 */ KEY_NOTUSED,
862+ /* 148 */ KEY_NOTUSED,
863+ /* 149 */ KEY_NOTUSED,
864+ /* 150 */ KEY_NOTUSED,
865+ /* 151 */ KEY_NOTUSED,
866+ /* 152 */ KEY_NOTUSED,
867+ /* 153 */ KEY_NOTUSED,
868+ /* 154 */ KEY_NOTUSED,
869+ /* 155 */ KEY_NOTUSED,
870+ /* 156 */ KEY_KP_Enter,
871+ /* 157 */ KEY_RCtrl,
872+ /* 158 */ KEY_NOTUSED,
873+ /* 159 */ KEY_NOTUSED,
874+ /* 160 */ KEY_Mute,
875+ /* 161 */ KEY_NOTUSED,
876+ /* 162 */ KEY_NOTUSED,
877+ /* 163 */ KEY_NOTUSED,
878+ /* 164 */ KEY_NOTUSED,
879+ /* 165 */ KEY_NOTUSED,
880+ /* 166 */ KEY_NOTUSED,
881+ /* 167 */ KEY_NOTUSED,
882+ /* 168 */ KEY_NOTUSED,
883+ /* 169 */ KEY_NOTUSED,
884+ /* 170 */ KEY_Print,
885+ /* 171 */ KEY_NOTUSED,
886+ /* 172 */ KEY_NOTUSED,
887+ /* 173 */ KEY_NOTUSED,
888+ /* 174 */ KEY_AudioLower,
889+ /* 175 */ KEY_AudioRaise,
890+ /* 176 */ KEY_NOTUSED,
891+ /* 177 */ KEY_NOTUSED,
892+ /* 178 */ KEY_NOTUSED,
893+ /* 179 */ KEY_NOTUSED,
894+ /* 180 */ KEY_NOTUSED,
895+ /* 181 */ KEY_KP_Divide,
896+ /* 182 */ KEY_NOTUSED,
897+ /* 183 */ KEY_Print,
898+ /* 184 */ KEY_AltLang,
899+ /* 185 */ KEY_NOTUSED,
900+ /* 186 */ KEY_NOTUSED,
901+ /* 187 */ KEY_NOTUSED,
902+ /* 188 */ KEY_NOTUSED,
903+ /* 189 */ KEY_NOTUSED,
904+ /* 190 */ KEY_NOTUSED,
905+ /* 191 */ KEY_NOTUSED,
906+ /* 192 */ KEY_NOTUSED,
907+ /* 193 */ KEY_NOTUSED,
908+ /* 194 */ KEY_NOTUSED,
909+ /* 195 */ KEY_NOTUSED,
910+ /* 196 */ KEY_NOTUSED,
911+ /* 197 */ KEY_NOTUSED,
912+ /* 198 */ KEY_NOTUSED,
913+ /* 199 */ KEY_Home,
914+ /* 200 */ KEY_Up,
915+ /* 201 */ KEY_PgUp,
916+ /* 202 */ KEY_NOTUSED,
917+ /* 203 */ KEY_Left,
918+ /* 204 */ KEY_NOTUSED,
919+ /* 205 */ KEY_Right,
920+ /* 206 */ KEY_NOTUSED,
921+ /* 207 */ KEY_End,
922+ /* 208 */ KEY_Down,
923+ /* 209 */ KEY_PgDown,
924+ /* 210 */ KEY_Insert,
925+ /* 211 */ KEY_Delete,
926+ /* 212 */ KEY_NOTUSED,
927+ /* 213 */ KEY_NOTUSED,
928+ /* 214 */ KEY_NOTUSED,
929+ /* 215 */ KEY_NOTUSED,
930+ /* 216 */ KEY_NOTUSED,
931+ /* 217 */ KEY_NOTUSED,
932+ /* 218 */ KEY_NOTUSED,
933+ /* 219 */ KEY_LMeta,
934+ /* 220 */ KEY_RMeta,
935+ /* 221 */ KEY_Menu,
936 };
+35,
-20
1@@ -24,8 +24,8 @@
2 #include "xdg_decoration.h"
3 #include "util.h"
4
5-#include <wayland-server.h>
6 #include "xdg-decoration-unstable-v1-server-protocol.h"
7+#include <wayland-server.h>
8
9 struct xdg_toplevel_decoration {
10 struct wl_resource *resource;
11@@ -43,15 +43,16 @@ unset_mode(struct wl_client *client, struct wl_resource *resource)
12 }
13
14 static const struct zxdg_toplevel_decoration_v1_interface decoration_impl = {
15- .destroy = destroy_resource,
16- .set_mode = set_mode,
17- .unset_mode = unset_mode,
18+ .destroy = destroy_resource,
19+ .set_mode = set_mode,
20+ .unset_mode = unset_mode,
21 };
22
23 static void
24 handle_toplevel_destroy(struct wl_listener *listener, void *data)
25 {
26- struct xdg_toplevel_decoration *decoration = wl_container_of(listener, decoration, toplevel_destroy_listener);
27+ struct xdg_toplevel_decoration *decoration =
28+ wl_container_of(listener, decoration, toplevel_destroy_listener);
29
30 wl_resource_destroy(decoration->resource);
31 }
32@@ -59,27 +60,36 @@ handle_toplevel_destroy(struct wl_listener *listener, void *data)
33 static void
34 decoration_destroy(struct wl_resource *resource)
35 {
36- struct xdg_toplevel_decoration *decoration = wl_resource_get_user_data(resource);
37+ struct xdg_toplevel_decoration *decoration =
38+ wl_resource_get_user_data(resource);
39
40 wl_list_remove(&decoration->toplevel_destroy_listener.link);
41 free(decoration);
42 }
43
44 static void
45-get_toplevel_decoration(struct wl_client *client, struct wl_resource *resource, uint32_t id, struct wl_resource *toplevel_resource)
46+get_toplevel_decoration(struct wl_client *client, struct wl_resource *resource,
47+ uint32_t id, struct wl_resource *toplevel_resource)
48 {
49 struct xdg_toplevel_decoration *decoration;
50
51 decoration = malloc(sizeof(*decoration));
52- if (!decoration)
53+ if (!decoration) {
54 goto error0;
55- decoration->resource = wl_resource_create(client, &zxdg_toplevel_decoration_v1_interface, wl_resource_get_version(resource), id);
56- if (!decoration->resource)
57+ }
58+ decoration->resource =
59+ wl_resource_create(client, &zxdg_toplevel_decoration_v1_interface,
60+ wl_resource_get_version(resource), id);
61+ if (!decoration->resource) {
62 goto error1;
63+ }
64 decoration->toplevel_destroy_listener.notify = &handle_toplevel_destroy;
65- wl_resource_add_destroy_listener(toplevel_resource, &decoration->toplevel_destroy_listener);
66- wl_resource_set_implementation(decoration->resource, &decoration_impl, decoration, decoration_destroy);
67- zxdg_toplevel_decoration_v1_send_configure(decoration->resource, ZXDG_TOPLEVEL_DECORATION_V1_MODE_SERVER_SIDE);
68+ wl_resource_add_destroy_listener(toplevel_resource,
69+ &decoration->toplevel_destroy_listener);
70+ wl_resource_set_implementation(decoration->resource, &decoration_impl,
71+ decoration, decoration_destroy);
72+ zxdg_toplevel_decoration_v1_send_configure(
73+ decoration->resource, ZXDG_TOPLEVEL_DECORATION_V1_MODE_SERVER_SIDE);
74 return;
75
76 error1:
77@@ -88,26 +98,31 @@ error0:
78 wl_resource_post_no_memory(resource);
79 }
80
81-static const struct zxdg_decoration_manager_v1_interface decoration_manager_impl = {
82- .destroy = destroy_resource,
83- .get_toplevel_decoration = get_toplevel_decoration,
84+static const struct zxdg_decoration_manager_v1_interface
85+ decoration_manager_impl = {
86+ .destroy = destroy_resource,
87+ .get_toplevel_decoration = get_toplevel_decoration,
88 };
89
90 static void
91-bind_decoration_manager(struct wl_client *client, void *data, uint32_t version, uint32_t id)
92+bind_decoration_manager(struct wl_client *client, void *data, uint32_t version,
93+ uint32_t id)
94 {
95 struct wl_resource *resource;
96
97- resource = wl_resource_create(client, &zxdg_decoration_manager_v1_interface, version, id);
98+ resource = wl_resource_create(client, &zxdg_decoration_manager_v1_interface,
99+ version, id);
100 if (!resource) {
101 wl_client_post_no_memory(client);
102 return;
103 }
104- wl_resource_set_implementation(resource, &decoration_manager_impl, NULL, NULL);
105+ wl_resource_set_implementation(resource, &decoration_manager_impl, NULL,
106+ NULL);
107 }
108
109 struct wl_global *
110 xdg_decoration_manager_create(struct wl_display *display)
111 {
112- return wl_global_create(display, &zxdg_decoration_manager_v1_interface, 1, NULL, &bind_decoration_manager);
113+ return wl_global_create(display, &zxdg_decoration_manager_v1_interface, 1,
114+ NULL, &bind_decoration_manager);
115 }
+2,
-1
1@@ -26,6 +26,7 @@
2
3 struct wl_display;
4
5-struct wl_global *xdg_decoration_manager_create(struct wl_display *display);
6+struct wl_global *
7+xdg_decoration_manager_create(struct wl_display *display);
8
9 #endif
+208,
-124
1@@ -22,17 +22,17 @@
2 */
3
4 #include "xdg_shell.h"
5-#include "internal.h"
6 #include "compositor.h"
7+#include "internal.h"
8 #include "seat.h"
9 #include "surface.h"
10 #include "util.h"
11 #include "window.h"
12
13+#include "xdg-shell-server-protocol.h"
14 #include <assert.h>
15 #include <stdlib.h>
16 #include <wayland-server.h>
17-#include "xdg-shell-server-protocol.h"
18
19 struct xdg_surface {
20 struct wl_resource *resource, *role;
21@@ -75,12 +75,14 @@ destroy_positioner(struct wl_resource *resource)
22 }
23
24 static void
25-set_size(struct wl_client *client, struct wl_resource *resource, int32_t width, int32_t height)
26+set_size(struct wl_client *client, struct wl_resource *resource, int32_t width,
27+ int32_t height)
28 {
29 struct xdg_positioner *positioner = wl_resource_get_user_data(resource);
30
31 if (width <= 0 || height <= 0) {
32- wl_resource_post_error(resource, XDG_POSITIONER_ERROR_INVALID_INPUT, "invalid size");
33+ wl_resource_post_error(resource, XDG_POSITIONER_ERROR_INVALID_INPUT,
34+ "invalid size");
35 return;
36 }
37 positioner->width = width;
38@@ -88,12 +90,14 @@ set_size(struct wl_client *client, struct wl_resource *resource, int32_t width,
39 }
40
41 static void
42-set_anchor_rect(struct wl_client *client, struct wl_resource *resource, int32_t x, int32_t y, int32_t width, int32_t height)
43+set_anchor_rect(struct wl_client *client, struct wl_resource *resource,
44+ int32_t x, int32_t y, int32_t width, int32_t height)
45 {
46 struct xdg_positioner *positioner = wl_resource_get_user_data(resource);
47
48 if (width <= 0 || height <= 0) {
49- wl_resource_post_error(resource, XDG_POSITIONER_ERROR_INVALID_INPUT, "invalid anchor size");
50+ wl_resource_post_error(resource, XDG_POSITIONER_ERROR_INVALID_INPUT,
51+ "invalid anchor size");
52 return;
53 }
54 positioner->anchor_x = x;
55@@ -103,7 +107,8 @@ set_anchor_rect(struct wl_client *client, struct wl_resource *resource, int32_t
56 }
57
58 static void
59-set_anchor(struct wl_client *client, struct wl_resource *resource, uint32_t anchor)
60+set_anchor(struct wl_client *client, struct wl_resource *resource,
61+ uint32_t anchor)
62 {
63 struct xdg_positioner *positioner = wl_resource_get_user_data(resource);
64
65@@ -111,7 +116,8 @@ set_anchor(struct wl_client *client, struct wl_resource *resource, uint32_t anch
66 }
67
68 static void
69-set_gravity(struct wl_client *client, struct wl_resource *resource, uint32_t gravity)
70+set_gravity(struct wl_client *client, struct wl_resource *resource,
71+ uint32_t gravity)
72 {
73 struct xdg_positioner *positioner = wl_resource_get_user_data(resource);
74
75@@ -119,7 +125,8 @@ set_gravity(struct wl_client *client, struct wl_resource *resource, uint32_t gra
76 }
77
78 static void
79-set_constraint_adjustment(struct wl_client *client, struct wl_resource *resource, uint32_t constraint)
80+set_constraint_adjustment(struct wl_client *client,
81+ struct wl_resource *resource, uint32_t constraint)
82 {
83 struct xdg_positioner *positioner = wl_resource_get_user_data(resource);
84
85@@ -127,7 +134,8 @@ set_constraint_adjustment(struct wl_client *client, struct wl_resource *resource
86 }
87
88 static void
89-set_offset(struct wl_client *client, struct wl_resource *resource, int32_t x, int32_t y)
90+set_offset(struct wl_client *client, struct wl_resource *resource, int32_t x,
91+ int32_t y)
92 {
93 struct xdg_positioner *positioner = wl_resource_get_user_data(resource);
94
95@@ -136,23 +144,23 @@ set_offset(struct wl_client *client, struct wl_resource *resource, int32_t x, in
96 }
97
98 static const struct xdg_positioner_interface positioner_impl = {
99- .destroy = destroy_resource,
100- .set_size = set_size,
101- .set_anchor_rect = set_anchor_rect,
102- .set_anchor = set_anchor,
103- .set_gravity = set_gravity,
104- .set_constraint_adjustment = set_constraint_adjustment,
105- .set_offset = set_offset,
106+ .destroy = destroy_resource,
107+ .set_size = set_size,
108+ .set_anchor_rect = set_anchor_rect,
109+ .set_anchor = set_anchor,
110+ .set_gravity = set_gravity,
111+ .set_constraint_adjustment = set_constraint_adjustment,
112+ .set_offset = set_offset,
113 };
114
115 static struct swc_rectangle
116 calculate_position(struct xdg_positioner *positioner)
117 {
118 struct swc_rectangle r = {
119- .x = positioner->offset_x,
120- .y = positioner->offset_y,
121- .width = positioner->width,
122- .height = positioner->height,
123+ .x = positioner->offset_x,
124+ .y = positioner->offset_y,
125+ .width = positioner->width,
126+ .height = positioner->height,
127 };
128
129 switch (positioner->anchor) {
130@@ -229,9 +237,11 @@ add_state(struct xdg_toplevel *toplevel, uint32_t state)
131 {
132 uint32_t *current_state;
133
134- wl_array_for_each (current_state, &toplevel->states) {
135- if (*current_state == state)
136+ wl_array_for_each(current_state, &toplevel->states)
137+ {
138+ if (*current_state == state) {
139 return false;
140+ }
141 }
142
143 if (!(current_state = wl_array_add(&toplevel->states, sizeof(state)))) {
144@@ -248,7 +258,8 @@ remove_state(struct xdg_toplevel *toplevel, uint32_t state)
145 {
146 uint32_t *current_state;
147
148- wl_array_for_each (current_state, &toplevel->states) {
149+ wl_array_for_each(current_state, &toplevel->states)
150+ {
151 if (*current_state == state) {
152 array_remove(&toplevel->states, current_state, sizeof(state));
153 return true;
154@@ -259,15 +270,19 @@ remove_state(struct xdg_toplevel *toplevel, uint32_t state)
155 }
156
157 static uint32_t
158-send_configure(struct xdg_toplevel *toplevel, int32_t width, int32_t height) {
159+send_configure(struct xdg_toplevel *toplevel, int32_t width, int32_t height)
160+{
161 uint32_t serial = wl_display_next_serial(swc.display);
162
163- if (width < 0)
164+ if (width < 0) {
165 width = toplevel->window.configure.width;
166- if (height < 0)
167+ }
168+ if (height < 0) {
169 height = toplevel->window.configure.height;
170+ }
171
172- xdg_toplevel_send_configure(toplevel->resource, width, height, &toplevel->states);
173+ xdg_toplevel_send_configure(toplevel->resource, width, height,
174+ &toplevel->states);
175 xdg_surface_send_configure(toplevel->xdg_surface->resource, serial);
176
177 return serial;
178@@ -279,7 +294,8 @@ configure(struct window *window, uint32_t width, uint32_t height)
179 struct xdg_toplevel *toplevel = wl_container_of(window, toplevel, window);
180
181 window->configure.acknowledged = false;
182- toplevel->xdg_surface->configure_serial = send_configure(toplevel, width, height);
183+ toplevel->xdg_surface->configure_serial =
184+ send_configure(toplevel, width, height);
185 }
186
187 static void
188@@ -291,7 +307,8 @@ focus(struct window *window)
189
190 add_state(toplevel, XDG_TOPLEVEL_STATE_ACTIVATED);
191 /* dont send 0x0 on focus change */
192- send_configure(toplevel, width ? (int32_t)width : -1, height ? (int32_t)height : -1);
193+ send_configure(toplevel, width ? (int32_t)width : -1,
194+ height ? (int32_t)height : -1);
195 }
196
197 static void
198@@ -302,7 +319,8 @@ unfocus(struct window *window)
199 uint32_t height = window->view->base.geometry.height;
200
201 remove_state(toplevel, XDG_TOPLEVEL_STATE_ACTIVATED);
202- send_configure(toplevel, width ? (int32_t)width : -1, height ? (int32_t)height : -1);
203+ send_configure(toplevel, width ? (int32_t)width : -1,
204+ height ? (int32_t)height : -1);
205 }
206
207 static void
208@@ -340,71 +358,84 @@ set_mode(struct window *window, unsigned mode)
209 }
210
211 static const struct window_impl toplevel_window_impl = {
212- .configure = configure,
213- .focus = focus,
214- .unfocus = unfocus,
215- .close = close_,
216- .set_mode = set_mode,
217+ .configure = configure,
218+ .focus = focus,
219+ .unfocus = unfocus,
220+ .close = close_,
221+ .set_mode = set_mode,
222 };
223
224 static void
225-set_parent(struct wl_client *client, struct wl_resource *resource, struct wl_resource *parent_resource)
226+set_parent(struct wl_client *client, struct wl_resource *resource,
227+ struct wl_resource *parent_resource)
228 {
229- struct xdg_toplevel *toplevel = wl_resource_get_user_data(resource), *parent = NULL;
230+ struct xdg_toplevel *toplevel = wl_resource_get_user_data(resource),
231+ *parent = NULL;
232
233- if (parent_resource)
234+ if (parent_resource) {
235 parent = wl_resource_get_user_data(parent_resource);
236+ }
237 window_set_parent(&toplevel->window, parent ? &parent->window : NULL);
238 }
239
240 static void
241-set_title(struct wl_client *client, struct wl_resource *resource, const char *title)
242+set_title(struct wl_client *client, struct wl_resource *resource,
243+ const char *title)
244 {
245 struct xdg_toplevel *toplevel = wl_resource_get_user_data(resource);
246 window_set_title(&toplevel->window, title, -1);
247 }
248
249 static void
250-set_app_id(struct wl_client *client, struct wl_resource *resource, const char *app_id)
251+set_app_id(struct wl_client *client, struct wl_resource *resource,
252+ const char *app_id)
253 {
254 struct xdg_toplevel *toplevel = wl_resource_get_user_data(resource);
255 window_set_app_id(&toplevel->window, app_id);
256 }
257
258 static void
259-show_window_menu(struct wl_client *client, struct wl_resource *resource, struct wl_resource *seat, uint32_t serial, int32_t x, int32_t y)
260+show_window_menu(struct wl_client *client, struct wl_resource *resource,
261+ struct wl_resource *seat, uint32_t serial, int32_t x,
262+ int32_t y)
263 {
264 }
265
266 static void
267-move(struct wl_client *client, struct wl_resource *resource, struct wl_resource *seat, uint32_t serial)
268+move(struct wl_client *client, struct wl_resource *resource,
269+ struct wl_resource *seat, uint32_t serial)
270 {
271 struct xdg_toplevel *toplevel = wl_resource_get_user_data(resource);
272 struct button *button;
273
274 button = pointer_get_button(swc.seat->pointer, serial);
275- if (button)
276+ if (button) {
277 window_begin_move(&toplevel->window, button);
278+ }
279 }
280
281 static void
282-resize(struct wl_client *client, struct wl_resource *resource, struct wl_resource *seat, uint32_t serial, uint32_t edges)
283+resize(struct wl_client *client, struct wl_resource *resource,
284+ struct wl_resource *seat, uint32_t serial, uint32_t edges)
285 {
286 struct xdg_toplevel *toplevel = wl_resource_get_user_data(resource);
287 struct button *button;
288
289 button = pointer_get_button(swc.seat->pointer, serial);
290- if (button)
291+ if (button) {
292 window_begin_resize(&toplevel->window, edges, button);
293+ }
294 }
295
296 static void
297-set_max_size(struct wl_client *client, struct wl_resource *resource, int32_t width, int32_t height)
298+set_max_size(struct wl_client *client, struct wl_resource *resource,
299+ int32_t width, int32_t height)
300 {
301 }
302
303 static void
304-set_min_size(struct wl_client *client, struct wl_resource *resource, int32_t width, int32_t height)
305+set_min_size(struct wl_client *client, struct wl_resource *resource,
306+ int32_t width, int32_t height)
307 {
308 }
309
310@@ -419,7 +450,8 @@ unset_maximized(struct wl_client *client, struct wl_resource *resource)
311 }
312
313 static void
314-set_fullscreen(struct wl_client *client, struct wl_resource *resource, struct wl_resource *output)
315+set_fullscreen(struct wl_client *client, struct wl_resource *resource,
316+ struct wl_resource *output)
317 {
318 }
319
320@@ -434,37 +466,43 @@ set_minimized(struct wl_client *client, struct wl_resource *resource)
321 }
322
323 static const struct xdg_toplevel_interface toplevel_impl = {
324- .destroy = destroy_resource,
325- .set_parent = set_parent,
326- .set_title = set_title,
327- .set_app_id = set_app_id,
328- .show_window_menu = show_window_menu,
329- .move = move,
330- .resize = resize,
331- .set_max_size = set_max_size,
332- .set_min_size = set_min_size,
333- .set_maximized = set_maximized,
334- .unset_maximized = unset_maximized,
335- .set_fullscreen = set_fullscreen,
336- .unset_fullscreen = unset_fullscreen,
337- .set_minimized = set_minimized,
338+ .destroy = destroy_resource,
339+ .set_parent = set_parent,
340+ .set_title = set_title,
341+ .set_app_id = set_app_id,
342+ .show_window_menu = show_window_menu,
343+ .move = move,
344+ .resize = resize,
345+ .set_max_size = set_max_size,
346+ .set_min_size = set_min_size,
347+ .set_maximized = set_maximized,
348+ .unset_maximized = unset_maximized,
349+ .set_fullscreen = set_fullscreen,
350+ .unset_fullscreen = unset_fullscreen,
351+ .set_minimized = set_minimized,
352 };
353
354 static struct xdg_toplevel *
355-xdg_toplevel_new(struct wl_client *client, uint32_t version, uint32_t id, struct xdg_surface *xdg_surface)
356+xdg_toplevel_new(struct wl_client *client, uint32_t version, uint32_t id,
357+ struct xdg_surface *xdg_surface)
358 {
359 struct xdg_toplevel *toplevel;
360
361 toplevel = malloc(sizeof(*toplevel));
362- if (!toplevel)
363+ if (!toplevel) {
364 goto error0;
365+ }
366 toplevel->xdg_surface = xdg_surface;
367- toplevel->resource = wl_resource_create(client, &xdg_toplevel_interface, version, id);
368- if (!toplevel->resource)
369+ toplevel->resource =
370+ wl_resource_create(client, &xdg_toplevel_interface, version, id);
371+ if (!toplevel->resource) {
372 goto error1;
373- window_initialize(&toplevel->window, &toplevel_window_impl, xdg_surface->surface);
374+ }
375+ window_initialize(&toplevel->window, &toplevel_window_impl,
376+ xdg_surface->surface);
377 wl_array_init(&toplevel->states);
378- wl_resource_set_implementation(toplevel->resource, &toplevel_impl, toplevel, &destroy_toplevel);
379+ wl_resource_set_implementation(toplevel->resource, &toplevel_impl, toplevel,
380+ &destroy_toplevel);
381 window_manage(&toplevel->window);
382
383 return toplevel;
384@@ -486,42 +524,54 @@ destroy_popup(struct wl_resource *resource)
385 }
386
387 static void
388-grab(struct wl_client *client, struct wl_resource *resource, struct wl_resource *seat, uint32_t serial)
389+grab(struct wl_client *client, struct wl_resource *resource,
390+ struct wl_resource *seat, uint32_t serial)
391 {
392 }
393
394 static const struct xdg_popup_interface popup_impl = {
395- .destroy = destroy_resource,
396- .grab = grab,
397+ .destroy = destroy_resource,
398+ .grab = grab,
399 };
400
401 static struct xdg_popup *
402-xdg_popup_new(struct wl_client *client, uint32_t version, uint32_t id, struct xdg_surface *xdg_surface, struct xdg_surface *parent, struct xdg_positioner *positioner)
403+xdg_popup_new(struct wl_client *client, uint32_t version, uint32_t id,
404+ struct xdg_surface *xdg_surface, struct xdg_surface *parent,
405+ struct xdg_positioner *positioner)
406 {
407 struct xdg_popup *popup;
408- struct compositor_view *parent_view = compositor_view(parent->surface->view);
409+ struct compositor_view *parent_view =
410+ compositor_view(parent->surface->view);
411 uint32_t serial = wl_display_next_serial(swc.display);
412 struct swc_rectangle rect;
413
414- if (!parent_view)
415+ if (!parent_view) {
416 goto error0;
417+ }
418 popup = malloc(sizeof(*popup));
419- if (!popup)
420+ if (!popup) {
421 goto error0;
422+ }
423 popup->xdg_surface = xdg_surface;
424 popup->positioner = *positioner;
425- popup->resource = wl_resource_create(client, &xdg_popup_interface, version, id);
426- if (!popup->resource)
427+ popup->resource =
428+ wl_resource_create(client, &xdg_popup_interface, version, id);
429+ if (!popup->resource) {
430 goto error1;
431- wl_resource_set_implementation(popup->resource, &popup_impl, popup, &destroy_popup);
432+ }
433+ wl_resource_set_implementation(popup->resource, &popup_impl, popup,
434+ &destroy_popup);
435 popup->view = compositor_create_view(xdg_surface->surface);
436- if (!popup->view)
437+ if (!popup->view) {
438 goto error2;
439+ }
440
441 rect = calculate_position(positioner);
442 compositor_view_set_parent(popup->view, parent_view);
443- view_move(&popup->view->base, parent_view->base.geometry.x + rect.x, parent_view->base.geometry.y + rect.y);
444- xdg_popup_send_configure(popup->resource, rect.x, rect.y, rect.width, rect.height);
445+ view_move(&popup->view->base, parent_view->base.geometry.x + rect.x,
446+ parent_view->base.geometry.y + rect.y);
447+ xdg_popup_send_configure(popup->resource, rect.x, rect.y, rect.width,
448+ rect.height);
449 xdg_surface_send_configure(xdg_surface->resource, serial);
450
451 return popup;
452@@ -536,60 +586,74 @@ error0:
453
454 /* xdg_surface */
455 static void
456-get_toplevel(struct wl_client *client, struct wl_resource *resource, uint32_t id)
457+get_toplevel(struct wl_client *client, struct wl_resource *resource,
458+ uint32_t id)
459 {
460 struct xdg_surface *xdg_surface = wl_resource_get_user_data(resource);
461 struct xdg_toplevel *toplevel;
462
463 if (xdg_surface->role) {
464- wl_resource_post_error(resource, XDG_WM_BASE_ERROR_ROLE, "surface already has a role");
465+ wl_resource_post_error(resource, XDG_WM_BASE_ERROR_ROLE,
466+ "surface already has a role");
467 return;
468 }
469- toplevel = xdg_toplevel_new(client, wl_resource_get_version(resource), id, xdg_surface);
470+ toplevel = xdg_toplevel_new(client, wl_resource_get_version(resource), id,
471+ xdg_surface);
472 if (!toplevel) {
473 wl_client_post_no_memory(client);
474 return;
475 }
476 xdg_surface->role = toplevel->resource;
477- wl_resource_add_destroy_listener(xdg_surface->role, &xdg_surface->role_destroy_listener);
478+ wl_resource_add_destroy_listener(xdg_surface->role,
479+ &xdg_surface->role_destroy_listener);
480 }
481
482 static void
483-get_popup(struct wl_client *client, struct wl_resource *resource, uint32_t id, struct wl_resource *parent_resource, struct wl_resource *positioner_resource)
484+get_popup(struct wl_client *client, struct wl_resource *resource, uint32_t id,
485+ struct wl_resource *parent_resource,
486+ struct wl_resource *positioner_resource)
487 {
488 struct xdg_surface *xdg_surface = wl_resource_get_user_data(resource);
489 struct xdg_surface *parent = wl_resource_get_user_data(parent_resource);
490- struct xdg_positioner *positioner = wl_resource_get_user_data(positioner_resource);
491+ struct xdg_positioner *positioner =
492+ wl_resource_get_user_data(positioner_resource);
493 struct xdg_popup *popup;
494
495 if (xdg_surface->role) {
496- wl_resource_post_error(resource, XDG_WM_BASE_ERROR_ROLE, "surface already has a role");
497+ wl_resource_post_error(resource, XDG_WM_BASE_ERROR_ROLE,
498+ "surface already has a role");
499 return;
500 }
501- popup = xdg_popup_new(client, wl_resource_get_version(resource), id, xdg_surface, parent, positioner);
502+ popup = xdg_popup_new(client, wl_resource_get_version(resource), id,
503+ xdg_surface, parent, positioner);
504 if (!popup) {
505 wl_client_post_no_memory(client);
506 return;
507 }
508 xdg_surface->role = popup->resource;
509- wl_resource_add_destroy_listener(xdg_surface->role, &xdg_surface->role_destroy_listener);
510+ wl_resource_add_destroy_listener(xdg_surface->role,
511+ &xdg_surface->role_destroy_listener);
512 }
513
514 static void
515-ack_configure(struct wl_client *client, struct wl_resource *resource, uint32_t serial)
516+ack_configure(struct wl_client *client, struct wl_resource *resource,
517+ uint32_t serial)
518 {
519 struct xdg_surface *xdg_surface = wl_resource_get_user_data(resource);
520 struct window *window;
521
522- if (!xdg_surface->role)
523+ if (!xdg_surface->role) {
524 return;
525+ }
526 window = wl_resource_get_user_data(xdg_surface->role);
527- if (window && serial == xdg_surface->configure_serial)
528+ if (window && serial == xdg_surface->configure_serial) {
529 window->configure.acknowledged = true;
530+ }
531 }
532
533 static void
534-set_window_geometry(struct wl_client *client, struct wl_resource *resource, int32_t x, int32_t y, int32_t width, int32_t height)
535+set_window_geometry(struct wl_client *client, struct wl_resource *resource,
536+ int32_t x, int32_t y, int32_t width, int32_t height)
537 {
538 (void)client;
539 struct xdg_surface *xdg_surface = wl_resource_get_user_data(resource);
540@@ -605,7 +669,8 @@ set_window_geometry(struct wl_client *client, struct wl_resource *resource, int3
541 surface->window_y = y;
542 surface->window_width = width;
543 surface->window_height = height;
544- if (!surface->window_geometry_applied && surface->view && (x != 0 || y != 0)) {
545+ if (!surface->window_geometry_applied && surface->view &&
546+ (x != 0 || y != 0)) {
547 struct swc_rectangle *geom = &surface->view->geometry;
548 view_move(surface->view, geom->x - x, geom->y - y);
549 surface->window_geometry_applied = true;
550@@ -613,17 +678,18 @@ set_window_geometry(struct wl_client *client, struct wl_resource *resource, int3
551 }
552
553 static const struct xdg_surface_interface xdg_surface_impl = {
554- .destroy = destroy_resource,
555- .get_toplevel = get_toplevel,
556- .get_popup = get_popup,
557- .ack_configure = ack_configure,
558- .set_window_geometry = set_window_geometry,
559+ .destroy = destroy_resource,
560+ .get_toplevel = get_toplevel,
561+ .get_popup = get_popup,
562+ .ack_configure = ack_configure,
563+ .set_window_geometry = set_window_geometry,
564 };
565
566 static void
567 handle_surface_destroy(struct wl_listener *listener, void *data)
568 {
569- struct xdg_surface *xdg_surface = wl_container_of(listener, xdg_surface, surface_destroy_listener);
570+ struct xdg_surface *xdg_surface =
571+ wl_container_of(listener, xdg_surface, surface_destroy_listener);
572
573 wl_resource_destroy(xdg_surface->resource);
574 }
575@@ -631,7 +697,8 @@ handle_surface_destroy(struct wl_listener *listener, void *data)
576 static void
577 handle_role_destroy(struct wl_listener *listener, void *data)
578 {
579- struct xdg_surface *xdg_surface = wl_container_of(listener, xdg_surface, role_destroy_listener);
580+ struct xdg_surface *xdg_surface =
581+ wl_container_of(listener, xdg_surface, role_destroy_listener);
582
583 xdg_surface->role = NULL;
584 }
585@@ -642,28 +709,35 @@ destroy_xdg_surface(struct wl_resource *resource)
586 struct xdg_surface *xdg_surface = wl_resource_get_user_data(resource);
587
588 wl_list_remove(&xdg_surface->surface_destroy_listener.link);
589- if (xdg_surface->role)
590+ if (xdg_surface->role) {
591 wl_resource_destroy(xdg_surface->role);
592+ }
593 free(xdg_surface);
594 }
595
596 static struct xdg_surface *
597-xdg_surface_new(struct wl_client *client, uint32_t version, uint32_t id, struct surface *surface)
598+xdg_surface_new(struct wl_client *client, uint32_t version, uint32_t id,
599+ struct surface *surface)
600 {
601 struct xdg_surface *xdg_surface;
602
603 xdg_surface = malloc(sizeof(*xdg_surface));
604- if (!xdg_surface)
605+ if (!xdg_surface) {
606 goto error0;
607- xdg_surface->resource = wl_resource_create(client, &xdg_surface_interface, version, id);
608- if (!xdg_surface->resource)
609+ }
610+ xdg_surface->resource =
611+ wl_resource_create(client, &xdg_surface_interface, version, id);
612+ if (!xdg_surface->resource) {
613 goto error1;
614+ }
615 xdg_surface->surface = surface;
616 xdg_surface->surface_destroy_listener.notify = &handle_surface_destroy;
617 xdg_surface->role = NULL;
618 xdg_surface->role_destroy_listener.notify = &handle_role_destroy;
619- wl_resource_add_destroy_listener(surface->resource, &xdg_surface->surface_destroy_listener);
620- wl_resource_set_implementation(xdg_surface->resource, &xdg_surface_impl, xdg_surface, destroy_xdg_surface);
621+ wl_resource_add_destroy_listener(surface->resource,
622+ &xdg_surface->surface_destroy_listener);
623+ wl_resource_set_implementation(xdg_surface->resource, &xdg_surface_impl,
624+ xdg_surface, destroy_xdg_surface);
625
626 return xdg_surface;
627
628@@ -675,21 +749,26 @@ error0:
629
630 /* xdg_shell */
631 static void
632-create_positioner(struct wl_client *client, struct wl_resource *resource, uint32_t id)
633+create_positioner(struct wl_client *client, struct wl_resource *resource,
634+ uint32_t id)
635 {
636 struct xdg_positioner *positioner;
637 struct wl_resource *positioner_resource;
638 uint32_t version;
639
640 positioner = calloc(1, sizeof(*positioner));
641- if (!positioner)
642+ if (!positioner) {
643 goto error0;
644+ }
645
646 version = wl_resource_get_version(resource);
647- positioner_resource = wl_resource_create(client, &xdg_positioner_interface, version, id);
648- if (!positioner_resource)
649+ positioner_resource =
650+ wl_resource_create(client, &xdg_positioner_interface, version, id);
651+ if (!positioner_resource) {
652 goto error1;
653- wl_resource_set_implementation(positioner_resource, &positioner_impl, positioner, &destroy_positioner);
654+ }
655+ wl_resource_set_implementation(positioner_resource, &positioner_impl,
656+ positioner, &destroy_positioner);
657 return;
658
659 error1:
660@@ -699,14 +778,17 @@ error0:
661 }
662
663 static void
664-get_xdg_surface(struct wl_client *client, struct wl_resource *resource, uint32_t id, struct wl_resource *surface_resource)
665+get_xdg_surface(struct wl_client *client, struct wl_resource *resource,
666+ uint32_t id, struct wl_resource *surface_resource)
667 {
668 struct xdg_surface *xdg_surface;
669 struct surface *surface = wl_resource_get_user_data(surface_resource);
670
671- xdg_surface = xdg_surface_new(client, wl_resource_get_version(resource), id, surface);
672- if (!xdg_surface)
673+ xdg_surface =
674+ xdg_surface_new(client, wl_resource_get_version(resource), id, surface);
675+ if (!xdg_surface) {
676 wl_client_post_no_memory(client);
677+ }
678 }
679
680 static void
681@@ -715,14 +797,15 @@ pong(struct wl_client *client, struct wl_resource *resource, uint32_t serial)
682 }
683
684 static const struct xdg_wm_base_interface wm_base_impl = {
685- .destroy = destroy_resource,
686- .create_positioner = create_positioner,
687- .get_xdg_surface = get_xdg_surface,
688- .pong = pong,
689+ .destroy = destroy_resource,
690+ .create_positioner = create_positioner,
691+ .get_xdg_surface = get_xdg_surface,
692+ .pong = pong,
693 };
694
695 static void
696-bind_wm_base(struct wl_client *client, void *data, uint32_t version, uint32_t id)
697+bind_wm_base(struct wl_client *client, void *data, uint32_t version,
698+ uint32_t id)
699 {
700 struct wl_resource *resource;
701
702@@ -737,5 +820,6 @@ bind_wm_base(struct wl_client *client, void *data, uint32_t version, uint32_t id
703 struct wl_global *
704 xdg_shell_create(struct wl_display *display)
705 {
706- return wl_global_create(display, &xdg_wm_base_interface, 1, NULL, &bind_wm_base);
707+ return wl_global_create(display, &xdg_wm_base_interface, 1, NULL,
708+ &bind_wm_base);
709 }
+2,
-1
1@@ -26,6 +26,7 @@
2
3 struct wl_display;
4
5-struct wl_global *xdg_shell_create(struct wl_display *display);
6+struct wl_global *
7+xdg_shell_create(struct wl_display *display);
8
9 #endif
+60,
-42
1@@ -30,15 +30,15 @@
2 #include "util.h"
3 #include "xwm.h"
4
5+#include <errno.h>
6+#include <fcntl.h>
7 #include <signal.h>
8-#include <stdlib.h>
9 #include <stdio.h>
10-#include <unistd.h>
11-#include <fcntl.h>
12-#include <errno.h>
13-#include <sys/stat.h>
14+#include <stdlib.h>
15 #include <sys/socket.h>
16+#include <sys/stat.h>
17 #include <sys/un.h>
18+#include <unistd.h>
19 #include <wayland-server.h>
20
21 #define LOCK_FMT "/tmp/.X%d-lock"
22@@ -61,24 +61,28 @@ open_socket(struct sockaddr_un *addr)
23 {
24 int fd;
25
26- if ((fd = socket(PF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0)) < 0)
27+ if ((fd = socket(PF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0)) < 0) {
28 goto error0;
29+ }
30
31 /* Unlink the socket location in case it was being used by a process which
32 * left around a stale lockfile. */
33 unlink(addr->sun_path);
34
35- if (bind(fd, (struct sockaddr *)addr, sizeof(*addr)) < 0)
36+ if (bind(fd, (struct sockaddr *)addr, sizeof(*addr)) < 0) {
37 goto error1;
38+ }
39
40- if (listen(fd, 1) < 0)
41+ if (listen(fd, 1) < 0) {
42 goto error2;
43+ }
44
45 return fd;
46
47 error2:
48- if (addr->sun_path[0])
49+ if (addr->sun_path[0]) {
50 unlink(addr->sun_path);
51+ }
52 error1:
53 close(fd);
54 error0:
55@@ -116,22 +120,27 @@ begin:
56 pid_t owner;
57
58 /* Check if the owning process is still alive. */
59- if ((lock_fd = open(lock_name, O_RDONLY)) == -1)
60+ if ((lock_fd = open(lock_name, O_RDONLY)) == -1) {
61 goto retry0;
62+ }
63
64- if (read(lock_fd, pid, sizeof(pid) - 1) != sizeof(pid) - 1)
65+ if (read(lock_fd, pid, sizeof(pid) - 1) != sizeof(pid) - 1) {
66 goto retry0;
67+ }
68
69 owner = strtol(pid, &end, 10);
70
71- if (end != pid + 10)
72+ if (end != pid + 10) {
73 goto retry0;
74+ }
75
76- if (kill(owner, 0) == 0 || errno != ESRCH)
77+ if (kill(owner, 0) == 0 || errno != ESRCH) {
78 goto retry0;
79+ }
80
81- if (unlink(lock_name) != 0)
82+ if (unlink(lock_name) != 0) {
83 goto retry0;
84+ }
85
86 goto begin;
87 }
88@@ -148,17 +157,21 @@ begin:
89
90 /* Bind to abstract socket */
91 addr.sun_path[0] = '\0';
92- snprintf(addr.sun_path + 1, sizeof(addr.sun_path) - 1, SOCKET_FMT, xserver.display);
93- if ((xserver.abstract_fd = open_socket(&addr)) < 0)
94+ snprintf(addr.sun_path + 1, sizeof(addr.sun_path) - 1, SOCKET_FMT,
95+ xserver.display);
96+ if ((xserver.abstract_fd = open_socket(&addr)) < 0) {
97 goto retry1;
98+ }
99
100 /* Bind to unix socket */
101 mkdir(SOCKET_DIR, 0777);
102 snprintf(addr.sun_path, sizeof(addr.sun_path), SOCKET_FMT, xserver.display);
103- if ((xserver.unix_fd = open_socket(&addr)) < 0)
104+ if ((xserver.unix_fd = open_socket(&addr)) < 0) {
105 goto retry2;
106+ }
107
108- snprintf(xserver.display_name, sizeof(xserver.display_name), ":%d", xserver.display);
109+ snprintf(xserver.display_name, sizeof(xserver.display_name), ":%d",
110+ xserver.display);
111 setenv("DISPLAY", xserver.display_name, true);
112
113 return true;
114@@ -196,12 +209,13 @@ handle_usr1(int signal_number, void *data)
115 }
116
117 static void
118-handle_client_destroy(struct wl_listener *listener, void *data) {
119+handle_client_destroy(struct wl_listener *listener, void *data)
120+{
121 swc_xserver.client = NULL;
122 }
123
124 static struct wl_listener client_destroy_listener = {
125- .notify = handle_client_destroy,
126+ .notify = handle_client_destroy,
127 };
128
129 bool
130@@ -215,7 +229,8 @@ xserver_initialize(void)
131 goto error0;
132 }
133
134- xserver.usr1_source = wl_event_loop_add_signal(swc.event_loop, SIGUSR1, &handle_usr1, NULL);
135+ xserver.usr1_source =
136+ wl_event_loop_add_signal(swc.event_loop, SIGUSR1, &handle_usr1, NULL);
137
138 if (!xserver.usr1_source) {
139 ERROR("Failed to create SIGUSR1 event source\n");
140@@ -234,56 +249,57 @@ xserver_initialize(void)
141 goto error3;
142 }
143
144- if (!(swc_xserver.client = wl_client_create(swc.display, wl[0])))
145+ if (!(swc_xserver.client = wl_client_create(swc.display, wl[0]))) {
146 goto error4;
147+ }
148
149- wl_client_add_destroy_listener(swc_xserver.client, &client_destroy_listener);
150+ wl_client_add_destroy_listener(swc_xserver.client,
151+ &client_destroy_listener);
152 xserver.wm_fd = wm[0];
153
154 /* Start the X server */
155 switch (fork()) {
156 case 0: {
157- int fds[] = { wl[1], wm[1], xserver.abstract_fd, xserver.unix_fd };
158+ int fds[] = {wl[1], wm[1], xserver.abstract_fd, xserver.unix_fd};
159 char strings[ARRAY_LENGTH(fds)][16];
160 unsigned index;
161- struct sigaction action = {.sa_handler = SIG_IGN };
162+ struct sigaction action = {.sa_handler = SIG_IGN};
163
164- /* Unset the FD_CLOEXEC flag on the FDs that will get passed to Xwayland. */
165+ /* Unset the FD_CLOEXEC flag on the FDs that will get passed to
166+ * Xwayland. */
167 for (index = 0; index < ARRAY_LENGTH(fds); ++index) {
168 if (fcntl(fds[index], F_SETFD, 0) != 0) {
169 ERROR("fcntl() failed: %s\n", strerror(errno));
170 goto fail;
171 }
172
173- if (snprintf(strings[index], sizeof(strings[index]), "%d", fds[index]) >= sizeof(strings[index])) {
174+ if (snprintf(strings[index], sizeof(strings[index]), "%d",
175+ fds[index]) >= sizeof(strings[index])) {
176 ERROR("FD is too large\n");
177 goto fail;
178 }
179 }
180
181- /* Ignore the USR1 signal so that Xwayland will send a USR1 signal to the
182- * parent process (us) after it finishes initializing. See Xserver(1) for
183- * more details. */
184+ /* Ignore the USR1 signal so that Xwayland will send a USR1 signal to
185+ * the parent process (us) after it finishes initializing. See
186+ * Xserver(1) for more details. */
187 if (sigaction(SIGUSR1, &action, NULL) != 0) {
188- ERROR("Failed to set SIGUSR1 handler to SIG_IGN: %s\n", strerror(errno));
189+ ERROR("Failed to set SIGUSR1 handler to SIG_IGN: %s\n",
190+ strerror(errno));
191 goto fail;
192 }
193
194 setenv("WAYLAND_SOCKET", strings[0], true);
195- execlp("Xwayland", "Xwayland",
196- xserver.display_name,
197- "-rootless",
198- "-terminate",
199- "-listen", strings[2],
200- "-listen", strings[3],
201- "-wm", strings[1],
202- NULL);
203+ execlp("Xwayland", "Xwayland", xserver.display_name, "-rootless",
204+ "-terminate", "-listen", strings[2], "-listen", strings[3],
205+ "-wm", strings[1], NULL);
206
207 fail:
208 exit(EXIT_FAILURE);
209 }
210 case -1:
211- ERROR("fork() failed when trying to start X server: %s\n", strerror(errno));
212+ ERROR("fork() failed when trying to start X server: %s\n",
213+ strerror(errno));
214 goto error5;
215 }
216
217@@ -311,9 +327,11 @@ error0:
218 void
219 xserver_finalize(void)
220 {
221- if (xserver.xwm_initialized)
222+ if (xserver.xwm_initialized) {
223 xwm_finalize();
224- if (swc_xserver.client)
225+ }
226+ if (swc_xserver.client) {
227 wl_client_destroy(swc_xserver.client);
228+ }
229 close_display();
230 }
+4,
-2
1@@ -30,7 +30,9 @@ struct swc_xserver {
2 struct wl_client *client;
3 };
4
5-bool xserver_initialize(void);
6-void xserver_finalize(void);
7+bool
8+xserver_initialize(void);
9+void
10+xserver_finalize(void);
11
12 #endif
+102,
-67
1@@ -69,14 +69,12 @@ static struct {
2 xcb_intern_atom_cookie_t cookie;
3 xcb_atom_t value;
4 } atoms[4];
5-} xwm = {
6- .atoms = {
7- [ATOM_WL_SURFACE_ID] = {"WL_SURFACE_ID"},
8- [ATOM_WM_DELETE_WINDOW] = {"WM_DELETE_WINDOW"},
9- [ATOM_WM_PROTOCOLS] = {"WM_PROTOCOLS"},
10- [ATOM_WM_S0] = {"WM_S0"},
11- }
12-};
13+} xwm = {.atoms = {
14+ [ATOM_WL_SURFACE_ID] = {"WL_SURFACE_ID"},
15+ [ATOM_WM_DELETE_WINDOW] = {"WM_DELETE_WINDOW"},
16+ [ATOM_WM_PROTOCOLS] = {"WM_PROTOCOLS"},
17+ [ATOM_WM_S0] = {"WM_S0"},
18+ }};
19
20 static void
21 update_name(struct xwl_window *xwl_window)
22@@ -86,8 +84,10 @@ update_name(struct xwl_window *xwl_window)
23
24 wm_name_cookie = xcb_ewmh_get_wm_name(&xwm.ewmh, xwl_window->id);
25
26- if (xcb_ewmh_get_wm_name_reply(&xwm.ewmh, wm_name_cookie, &wm_name_reply, NULL)) {
27- window_set_title(&xwl_window->window, wm_name_reply.strings, wm_name_reply.strings_len);
28+ if (xcb_ewmh_get_wm_name_reply(&xwm.ewmh, wm_name_cookie, &wm_name_reply,
29+ NULL)) {
30+ window_set_title(&xwl_window->window, wm_name_reply.strings,
31+ wm_name_reply.strings_len);
32 xcb_ewmh_get_utf8_strings_reply_wipe(&wm_name_reply);
33 } else {
34 window_set_title(&xwl_window->window, NULL, 0);
35@@ -101,15 +101,19 @@ update_protocols(struct xwl_window *xwl_window)
36 xcb_icccm_get_wm_protocols_reply_t reply;
37 unsigned index;
38
39- cookie = xcb_icccm_get_wm_protocols(xwm.connection, xwl_window->id, xwm.atoms[ATOM_WM_PROTOCOLS].value);
40+ cookie = xcb_icccm_get_wm_protocols(xwm.connection, xwl_window->id,
41+ xwm.atoms[ATOM_WM_PROTOCOLS].value);
42 xwl_window->supports_delete = true;
43
44- if (!xcb_icccm_get_wm_protocols_reply(xwm.connection, cookie, &reply, NULL))
45+ if (!xcb_icccm_get_wm_protocols_reply(xwm.connection, cookie, &reply,
46+ NULL)) {
47 return;
48+ }
49
50 for (index = 0; index < reply.atoms_len; ++index) {
51- if (reply.atoms[index] == xwm.atoms[ATOM_WM_DELETE_WINDOW].value)
52+ if (reply.atoms[index] == xwm.atoms[ATOM_WM_DELETE_WINDOW].value) {
53 xwl_window->supports_delete = true;
54+ }
55 }
56
57 xcb_icccm_get_wm_protocols_reply_wipe(&reply);
58@@ -120,9 +124,11 @@ find_window(struct wl_list *list, xcb_window_t id)
59 {
60 struct xwl_window *window;
61
62- wl_list_for_each (window, list, link) {
63- if (window->id == id)
64+ wl_list_for_each(window, list, link)
65+ {
66+ if (window->id == id) {
67 return window;
68+ }
69 }
70
71 return NULL;
72@@ -133,9 +139,11 @@ find_window_by_surface_id(struct wl_list *list, uint32_t id)
73 {
74 struct xwl_window *window;
75
76- wl_list_for_each (window, list, link) {
77- if (window->surface_id == id)
78+ wl_list_for_each(window, list, link)
79+ {
80+ if (window->surface_id == id) {
81 return window;
82+ }
83 }
84
85 return NULL;
86@@ -175,7 +183,8 @@ focus(struct window *window)
87 {
88 struct xwl_window *xwl_window = wl_container_of(window, xwl_window, window);
89
90- xcb_set_input_focus(xwm.connection, XCB_INPUT_FOCUS_NONE, xwl_window->id, XCB_CURRENT_TIME);
91+ xcb_set_input_focus(xwm.connection, XCB_INPUT_FOCUS_NONE, xwl_window->id,
92+ XCB_CURRENT_TIME);
93 xcb_flush(xwm.connection);
94 xwm.focus = xwl_window;
95 }
96@@ -185,12 +194,13 @@ unfocus(struct window *window)
97 {
98 struct xwl_window *xwl_window = wl_container_of(window, xwl_window, window);
99
100- /* If the window we are unfocusing is the latest xwl_window to be focused, we
101- * know we have transitioned to some other window type, so the X11 focus can
102- * be set to XCB_NONE. Otherwise, we have transitioned to another X11 window,
103- * and the X11 focus has already been updated. */
104+ /* If the window we are unfocusing is the latest xwl_window to be focused,
105+ * we know we have transitioned to some other window type, so the X11 focus
106+ * can be set to XCB_NONE. Otherwise, we have transitioned to another X11
107+ * window, and the X11 focus has already been updated. */
108 if (xwl_window == xwm.focus) {
109- xcb_set_input_focus(xwm.connection, XCB_INPUT_FOCUS_NONE, XCB_NONE, XCB_CURRENT_TIME);
110+ xcb_set_input_focus(xwm.connection, XCB_INPUT_FOCUS_NONE, XCB_NONE,
111+ XCB_CURRENT_TIME);
112 xcb_flush(xwm.connection);
113 }
114 }
115@@ -202,17 +212,19 @@ close_(struct window *window)
116
117 if (xwl_window->supports_delete) {
118 xcb_client_message_event_t event = {
119- .response_type = XCB_CLIENT_MESSAGE,
120- .format = 32,
121- .window = xwl_window->id,
122- .type = xwm.atoms[ATOM_WM_PROTOCOLS].value,
123- .data.data32 = {
124- xwm.atoms[ATOM_WM_DELETE_WINDOW].value,
125- XCB_CURRENT_TIME,
126- },
127+ .response_type = XCB_CLIENT_MESSAGE,
128+ .format = 32,
129+ .window = xwl_window->id,
130+ .type = xwm.atoms[ATOM_WM_PROTOCOLS].value,
131+ .data.data32 =
132+ {
133+ xwm.atoms[ATOM_WM_DELETE_WINDOW].value,
134+ XCB_CURRENT_TIME,
135+ },
136 };
137
138- xcb_send_event(xwm.connection, false, xwl_window->id, XCB_EVENT_MASK_NO_EVENT, (const char *)&event);
139+ xcb_send_event(xwm.connection, false, xwl_window->id,
140+ XCB_EVENT_MASK_NO_EVENT, (const char *)&event);
141 } else {
142 xcb_kill_client(xwm.connection, xwl_window->id);
143 }
144@@ -221,20 +233,22 @@ close_(struct window *window)
145 }
146
147 static const struct window_impl xwl_window_handler = {
148- .move = move,
149- .configure = configure,
150- .focus = focus,
151- .unfocus = unfocus,
152- .close = close_,
153+ .move = move,
154+ .configure = configure,
155+ .focus = focus,
156+ .unfocus = unfocus,
157+ .close = close_,
158 };
159
160 static void
161 handle_surface_destroy(struct wl_listener *listener, void *data)
162 {
163- struct xwl_window *xwl_window = wl_container_of(listener, xwl_window, surface_destroy_listener);
164+ struct xwl_window *xwl_window =
165+ wl_container_of(listener, xwl_window, surface_destroy_listener);
166
167- if (xwm.focus == xwl_window)
168+ if (xwm.focus == xwl_window) {
169 xwm.focus = NULL;
170+ }
171
172 window_finalize(&xwl_window->window);
173 wl_list_remove(&xwl_window->link);
174@@ -250,19 +264,23 @@ manage_window(struct xwl_window *xwl_window)
175 xcb_get_geometry_cookie_t geometry_cookie;
176 xcb_get_geometry_reply_t *geometry_reply;
177
178- resource = wl_client_get_object(swc.xserver->client, xwl_window->surface_id);
179+ resource =
180+ wl_client_get_object(swc.xserver->client, xwl_window->surface_id);
181
182- if (!resource)
183+ if (!resource) {
184 return false;
185+ }
186
187 surface = wl_resource_get_user_data(resource);
188 geometry_cookie = xcb_get_geometry(xwm.connection, xwl_window->id);
189
190 window_initialize(&xwl_window->window, &xwl_window_handler, surface);
191 xwl_window->surface_destroy_listener.notify = &handle_surface_destroy;
192- wl_resource_add_destroy_listener(surface->resource, &xwl_window->surface_destroy_listener);
193+ wl_resource_add_destroy_listener(surface->resource,
194+ &xwl_window->surface_destroy_listener);
195
196- if ((geometry_reply = xcb_get_geometry_reply(xwm.connection, geometry_cookie, NULL))) {
197+ if ((geometry_reply =
198+ xcb_get_geometry_reply(xwm.connection, geometry_cookie, NULL))) {
199 view_move(surface->view, geometry_reply->x, geometry_reply->y);
200 free(geometry_reply);
201 }
202@@ -274,7 +292,8 @@ manage_window(struct xwl_window *xwl_window)
203
204 mask = XCB_CW_EVENT_MASK;
205 values[0] = XCB_EVENT_MASK_PROPERTY_CHANGE;
206- xcb_change_window_attributes(xwm.connection, xwl_window->id, mask, values);
207+ xcb_change_window_attributes(xwm.connection, xwl_window->id, mask,
208+ values);
209 mask = XCB_CONFIG_WINDOW_BORDER_WIDTH;
210 values[0] = 0;
211 xcb_configure_window(xwm.connection, xwl_window->id, mask, values);
212@@ -295,17 +314,18 @@ handle_new_surface(struct wl_listener *listener, void *data)
213 struct surface *surface = data;
214 struct xwl_window *window;
215
216- window = find_window_by_surface_id(&xwm.unpaired_windows, wl_resource_get_id(surface->resource));
217+ window = find_window_by_surface_id(&xwm.unpaired_windows,
218+ wl_resource_get_id(surface->resource));
219
220- if (!window)
221+ if (!window) {
222 return;
223+ }
224
225 manage_window(window);
226 }
227
228-static struct wl_listener new_surface_listener = {
229- .notify = &handle_new_surface
230-};
231+static struct wl_listener new_surface_listener = {.notify =
232+ &handle_new_surface};
233
234 /* X event handlers */
235 static void
236@@ -313,8 +333,9 @@ create_notify(xcb_create_notify_event_t *event)
237 {
238 struct xwl_window *xwl_window;
239
240- if (!(xwl_window = malloc(sizeof *xwl_window)))
241+ if (!(xwl_window = malloc(sizeof *xwl_window))) {
242 return;
243+ }
244
245 xwl_window->id = event->window;
246 xwl_window->surface_id = 0;
247@@ -330,7 +351,8 @@ destroy_notify(xcb_destroy_notify_event_t *event)
248 if ((xwl_window = find_window(&xwm.windows, event->window))) {
249 wl_list_remove(&xwl_window->surface_destroy_listener.link);
250 window_finalize(&xwl_window->window);
251- } else if (!(xwl_window = find_window(&xwm.unpaired_windows, event->window))) {
252+ } else if (!(xwl_window =
253+ find_window(&xwm.unpaired_windows, event->window))) {
254 return;
255 }
256
257@@ -354,13 +376,16 @@ property_notify(xcb_property_notify_event_t *event)
258 {
259 struct xwl_window *xwl_window;
260
261- if (!(xwl_window = find_window(&xwm.windows, event->window)))
262+ if (!(xwl_window = find_window(&xwm.windows, event->window))) {
263 return;
264+ }
265
266- if (event->atom == xwm.ewmh._NET_WM_NAME && event->state == XCB_PROPERTY_NEW_VALUE)
267+ if (event->atom == xwm.ewmh._NET_WM_NAME &&
268+ event->state == XCB_PROPERTY_NEW_VALUE) {
269 update_name(xwl_window);
270- else if (event->atom == xwm.atoms[ATOM_WM_PROTOCOLS].value)
271+ } else if (event->atom == xwm.atoms[ATOM_WM_PROTOCOLS].value) {
272 update_protocols(xwl_window);
273+ }
274 }
275
276 static void
277@@ -369,8 +394,9 @@ client_message(xcb_client_message_event_t *event)
278 if (event->type == xwm.atoms[ATOM_WL_SURFACE_ID].value) {
279 struct xwl_window *xwl_window;
280
281- if (!(xwl_window = find_window(&xwm.unpaired_windows, event->window)))
282+ if (!(xwl_window = find_window(&xwm.unpaired_windows, event->window))) {
283 return;
284+ }
285
286 xwl_window->surface_id = event->data.data32[0];
287 manage_window(xwl_window);
288@@ -446,7 +472,8 @@ xwm_initialize(int fd)
289
290 for (index = 0; index < ARRAY_LENGTH(xwm.atoms); ++index) {
291 name = xwm.atoms[index].name;
292- xwm.atoms[index].cookie = xcb_intern_atom(xwm.connection, 0, strlen(name), name);
293+ xwm.atoms[index].cookie =
294+ xcb_intern_atom(xwm.connection, 0, strlen(name), name);
295 }
296
297 setup = xcb_get_setup(xwm.connection);
298@@ -455,10 +482,13 @@ xwm_initialize(int fd)
299
300 /* Try to select for substructure redirect. */
301 mask = XCB_CW_EVENT_MASK;
302- values[0] = XCB_EVENT_MASK_SUBSTRUCTURE_NOTIFY | XCB_EVENT_MASK_SUBSTRUCTURE_REDIRECT;
303- change_attributes_cookie = xcb_change_window_attributes(xwm.connection, xwm.screen->root, mask, values);
304+ values[0] = XCB_EVENT_MASK_SUBSTRUCTURE_NOTIFY |
305+ XCB_EVENT_MASK_SUBSTRUCTURE_REDIRECT;
306+ change_attributes_cookie = xcb_change_window_attributes(
307+ xwm.connection, xwm.screen->root, mask, values);
308
309- xwm.source = wl_event_loop_add_fd(swc.event_loop, fd, WL_EVENT_READABLE, &connection_data, NULL);
310+ xwm.source = wl_event_loop_add_fd(swc.event_loop, fd, WL_EVENT_READABLE,
311+ &connection_data, NULL);
312 wl_list_init(&xwm.windows);
313 wl_list_init(&xwm.unpaired_windows);
314
315@@ -467,14 +497,16 @@ xwm_initialize(int fd)
316 goto error2;
317 }
318
319- composite_extension = xcb_get_extension_data(xwm.connection, &xcb_composite_id);
320+ composite_extension =
321+ xcb_get_extension_data(xwm.connection, &xcb_composite_id);
322
323 if (!composite_extension->present) {
324 ERROR("xwm: X server does not have composite extension\n");
325 goto error3;
326 }
327
328- redirect_subwindows_cookie = xcb_composite_redirect_subwindows_checked(xwm.connection, xwm.screen->root, XCB_COMPOSITE_REDIRECT_MANUAL);
329+ redirect_subwindows_cookie = xcb_composite_redirect_subwindows_checked(
330+ xwm.connection, xwm.screen->root, XCB_COMPOSITE_REDIRECT_MANUAL);
331
332 if ((error = xcb_request_check(xwm.connection, change_attributes_cookie))) {
333 ERROR("xwm: Another window manager is running\n");
334@@ -482,16 +514,17 @@ xwm_initialize(int fd)
335 goto error3;
336 }
337
338- if ((error = xcb_request_check(xwm.connection, redirect_subwindows_cookie))) {
339+ if ((error =
340+ xcb_request_check(xwm.connection, redirect_subwindows_cookie))) {
341 ERROR("xwm: Could not redirect subwindows of root for compositing\n");
342 free(error);
343 goto error3;
344 }
345
346 xwm.window = xcb_generate_id(xwm.connection);
347- xcb_create_window(xwm.connection, 0, xwm.window, xwm.screen->root,
348- 0, 0, 1, 1, 0, XCB_WINDOW_CLASS_INPUT_ONLY,
349- XCB_COPY_FROM_PARENT, 0, NULL);
350+ xcb_create_window(xwm.connection, 0, xwm.window, xwm.screen->root, 0, 0, 1,
351+ 1, 0, XCB_WINDOW_CLASS_INPUT_ONLY, XCB_COPY_FROM_PARENT,
352+ 0, NULL);
353
354 xcb_ewmh_init_atoms_replies(&xwm.ewmh, ewmh_cookies, &error);
355
356@@ -501,7 +534,8 @@ xwm_initialize(int fd)
357 }
358
359 for (index = 0; index < ARRAY_LENGTH(xwm.atoms); ++index) {
360- atom_reply = xcb_intern_atom_reply(xwm.connection, xwm.atoms[index].cookie, &error);
361+ atom_reply = xcb_intern_atom_reply(xwm.connection,
362+ xwm.atoms[index].cookie, &error);
363
364 if (error) {
365 ERROR("xwm: Failed to get atom reply: %u\n", error->error_code);
366@@ -512,7 +546,8 @@ xwm_initialize(int fd)
367 free(atom_reply);
368 }
369
370- xcb_set_selection_owner(xwm.connection, xwm.window, xwm.atoms[ATOM_WM_S0].value, XCB_CURRENT_TIME);
371+ xcb_set_selection_owner(xwm.connection, xwm.window,
372+ xwm.atoms[ATOM_WM_S0].value, XCB_CURRENT_TIME);
373 xcb_flush(xwm.connection);
374
375 wl_signal_add(&swc.compositor->signal.new_surface, &new_surface_listener);
+4,
-2
1@@ -26,7 +26,9 @@
2
3 #include <stdbool.h>
4
5-bool xwm_initialize(int fd);
6-void xwm_finalize(void);
7+bool
8+xwm_initialize(int fd);
9+void
10+xwm_finalize(void);
11
12 #endif
+8344,
-6457
1@@ -3,7 +3,8 @@
2
3 Do this:
4 #define STB_IMAGE_IMPLEMENTATION
5- before you include this file in *one* C or C++ file to create the implementation.
6+ before you include this file in *one* C or C++ file to create the
7+implementation.
8
9 // i.e. it should look like this:
10 #include ...
11@@ -13,15 +14,16 @@
12 #include "stb_image.h"
13
14 You can #define STBI_ASSERT(x) before the #include to avoid using assert.h.
15- And #define STBI_MALLOC, STBI_REALLOC, and STBI_FREE to avoid using malloc,realloc,free
16+ And #define STBI_MALLOC, STBI_REALLOC, and STBI_FREE to avoid using
17+malloc,realloc,free
18
19
20 QUICK NOTES:
21 Primarily of interest to game developers and other people who can
22 avoid problematic images and only need the trivial interface
23
24- JPEG baseline & progressive (12 bpc/arithmetic not supported, same as stock IJG lib)
25- PNG 1/2/4/8/16-bit-per-channel
26+ JPEG baseline & progressive (12 bpc/arithmetic not supported, same as
27+stock IJG lib) PNG 1/2/4/8/16-bit-per-channel
28
29 TGA (not sure what subset, if a subset)
30 BMP non-1bpp, non-RLE
31@@ -51,23 +53,19 @@ RECENT REVISION HISTORY:
32 2.30 (2024-05-31) avoid erroneous gcc warning
33 2.29 (2023-05-xx) optimizations
34 2.28 (2023-01-29) many error fixes, security errors, just tons of stuff
35- 2.27 (2021-07-11) document stbi_info better, 16-bit PNM support, bug fixes
36- 2.26 (2020-07-13) many minor fixes
37- 2.25 (2020-02-02) fix warnings
38- 2.24 (2020-02-02) fix warnings; thread-local failure_reason and flip_vertically
39- 2.23 (2019-08-11) fix clang static analysis warning
40- 2.22 (2019-03-04) gif fixes, fix warnings
41- 2.21 (2019-02-25) fix typo in comment
42- 2.20 (2019-02-07) support utf8 filenames in Windows; fix warnings and platform ifdefs
43- 2.19 (2018-02-11) fix warning
44- 2.18 (2018-01-30) fix warnings
45- 2.17 (2018-01-29) bugfix, 1-bit BMP, 16-bitness query, fix warnings
46- 2.16 (2017-07-23) all functions have 16-bit variants; optimizations; bugfixes
47- 2.15 (2017-03-18) fix png-1,2,4; all Imagenet JPGs; no runtime SSE detection on GCC
48- 2.14 (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs
49- 2.13 (2016-12-04) experimental 16-bit API, only for PNG so far; fixes
50- 2.12 (2016-04-02) fix typo in 2.11 PSD fix that caused crashes
51- 2.11 (2016-04-02) 16-bit PNGS; enable SSE2 in non-gcc x64
52+ 2.27 (2021-07-11) document stbi_info better, 16-bit PNM support, bug
53+fixes 2.26 (2020-07-13) many minor fixes 2.25 (2020-02-02) fix warnings 2.24
54+(2020-02-02) fix warnings; thread-local failure_reason and flip_vertically 2.23
55+(2019-08-11) fix clang static analysis warning 2.22 (2019-03-04) gif fixes, fix
56+warnings 2.21 (2019-02-25) fix typo in comment 2.20 (2019-02-07) support utf8
57+filenames in Windows; fix warnings and platform ifdefs 2.19 (2018-02-11) fix
58+warning 2.18 (2018-01-30) fix warnings 2.17 (2018-01-29) bugfix, 1-bit BMP,
59+16-bitness query, fix warnings 2.16 (2017-07-23) all functions have 16-bit
60+variants; optimizations; bugfixes 2.15 (2017-03-18) fix png-1,2,4; all Imagenet
61+JPGs; no runtime SSE detection on GCC 2.14 (2017-03-03) remove deprecated
62+STBI_JPEG_OLD; fixes for Imagenet JPGs 2.13 (2016-12-04) experimental 16-bit
63+API, only for PNG so far; fixes 2.12 (2016-04-02) fix typo in 2.11 PSD fix that
64+caused crashes 2.11 (2016-04-02) 16-bit PNGS; enable SSE2 in non-gcc x64
65 RGB-format JPEG; remove white matting in PSD;
66 allocate large structures on the stack;
67 correct channel count for PNG & BMP
68@@ -90,40 +88,39 @@ RECENT REVISION HISTORY:
69 github:urraka (animated gif) Junggon Kim (PNM comments)
70 Christopher Forseth (animated gif) Daniel Gibson (16-bit TGA)
71 socks-the-fox (16-bit PNG)
72- Jeremy Sawicki (handle all ImageNet JPGs)
73- Optimizations & bugfixes Mikhail Morozov (1-bit BMP)
74+ Jeremy Sawicki (handle all ImageNet
75+JPGs) Optimizations & bugfixes Mikhail Morozov (1-bit BMP)
76 Fabian "ryg" Giesen Anael Seghezzi (is-16-bit query)
77 Arseny Kapoulkine Simon Breuss (16-bit PNM)
78 John-Mark Allen
79 Carmelo J Fdez-Aguera
80
81 Bug & warning fixes
82- Marc LeBlanc David Woo Guillaume George Martins Mozeiko
83- Christpher Lloyd Jerry Jansson Joseph Thomson Blazej Dariusz Roszkowski
84- Phil Jordan Dave Moore Roy Eltham
85- Hayaki Saito Nathan Reed Won Chun
86- Luke Graham Johan Duparc Nick Verigakis the Horde3D community
87- Thomas Ruf Ronny Chevalier github:rlyeh
88- Janez Zemva John Bartholomew Michal Cichon github:romigrou
89- Jonathan Blow Ken Hamada Tero Hanninen github:svdijk
90- Eugene Golushkov Laurent Gomila Cort Stratton github:snagar
91- Aruelien Pocheville Sergio Gonzalez Thibault Reuille github:Zelex
92- Cass Everitt Ryamond Barbiero github:grim210
93- Paul Du Bois Engin Manap Aldo Culquicondor github:sammyhw
94- Philipp Wiesemann Dale Weiler Oriol Ferrer Mesia github:phprus
95- Josh Tobin Neil Bickford Matthew Gregan github:poppolopoppo
96- Julian Raschke Gregory Mullen Christian Floisand github:darealshinji
97- Baldur Karlsson Kevin Schmidt JR Smith github:Michaelangel007
98- Brad Weinberger Matvey Cherevko github:mosra
99- Luca Sas Alexander Veselov Zack Middleton [reserved]
100- Ryan C. Gordon [reserved] [reserved]
101- DO NOT ADD YOUR NAME HERE
102+ Marc LeBlanc David Woo Guillaume George Martins
103+Mozeiko Christpher Lloyd Jerry Jansson Joseph Thomson Blazej
104+Dariusz Roszkowski Phil Jordan Dave Moore Roy
105+Eltham Hayaki Saito Nathan Reed Won Chun Luke Graham Johan
106+Duparc Nick Verigakis the Horde3D community Thomas Ruf Ronny
107+Chevalier github:rlyeh Janez Zemva John
108+Bartholomew Michal Cichon github:romigrou Jonathan Blow Ken
109+Hamada Tero Hanninen github:svdijk Eugene Golushkov Laurent
110+Gomila Cort Stratton github:snagar Aruelien Pocheville Sergio
111+Gonzalez Thibault Reuille github:Zelex Cass Everitt Ryamond
112+Barbiero github:grim210 Paul Du Bois Engin
113+Manap Aldo Culquicondor github:sammyhw Philipp Wiesemann Dale
114+Weiler Oriol Ferrer Mesia github:phprus Josh Tobin Neil
115+Bickford Matthew Gregan github:poppolopoppo Julian Raschke Gregory
116+Mullen Christian Floisand github:darealshinji Baldur Karlsson Kevin
117+Schmidt JR Smith github:Michaelangel007 Brad Weinberger Matvey
118+Cherevko github:mosra Luca Sas Alexander Veselov Zack
119+Middleton [reserved] Ryan C. Gordon [reserved] [reserved] DO NOT
120+ADD YOUR NAME HERE
121
122 Jacko Dirks
123
124- To add your name to the credits, pick a random blank space in the middle and fill it.
125- 80% of merge conflicts on stb PRs are due to people adding their name at the end
126- of the credits.
127+ To add your name to the credits, pick a random blank space in the middle and
128+fill it. 80% of merge conflicts on stb PRs are due to people adding their name
129+at the end of the credits.
130 */
131
132 #ifndef STBI_INCLUDE_STB_IMAGE_H
133@@ -142,14 +139,15 @@ RECENT REVISION HISTORY:
134 // // ... process data if not NULL ...
135 // // ... x = width, y = height, n = # 8-bit components per pixel ...
136 // // ... replace '0' with '1'..'4' to force that many components per pixel
137-// // ... but 'n' will always be the number that it would have been if you said 0
138-// stbi_image_free(data);
139+// // ... but 'n' will always be the number that it would have been if you
140+// said 0 stbi_image_free(data);
141 //
142 // Standard parameters:
143 // int *x -- outputs image width in pixels
144 // int *y -- outputs image height in pixels
145 // int *channels_in_file -- outputs # of image components in image file
146-// int desired_channels -- if non-zero, # of image components requested in result
147+// int desired_channels -- if non-zero, # of image components requested in
148+// result
149 //
150 // The return value from an image loader is an 'unsigned char *' which points
151 // to the pixel data, or NULL on an allocation failure or if the image is
152@@ -177,8 +175,8 @@ RECENT REVISION HISTORY:
153 // and *x, *y, *channels_in_file will be unchanged. The function
154 // stbi_failure_reason() can be queried for an extremely brief, end-user
155 // unfriendly explanation of why the load failed. Define STBI_NO_FAILURE_STRINGS
156-// to avoid compiling these strings at all, and STBI_FAILURE_USERMSG to get slightly
157-// more user-friendly ones.
158+// to avoid compiling these strings at all, and STBI_FAILURE_USERMSG to get
159+// slightly more user-friendly ones.
160 //
161 // Paletted PNG, BMP, GIF, and PIC images are automatically depalettized.
162 //
163@@ -228,11 +226,12 @@ RECENT REVISION HISTORY:
164 // 2. easy to maintain
165 // 3. good performance
166 //
167-// Sometimes I let "good performance" creep up in priority over "easy to maintain",
168-// and for best performance I may provide less-easy-to-use APIs that give higher
169-// performance, in addition to the easy-to-use ones. Nevertheless, it's important
170-// to keep in mind that from the standpoint of you, a client of this library,
171-// all you care about is #1 and #3, and stb libraries DO NOT emphasize #3 above all.
172+// Sometimes I let "good performance" creep up in priority over "easy to
173+// maintain", and for best performance I may provide less-easy-to-use APIs that
174+// give higher performance, in addition to the easy-to-use ones. Nevertheless,
175+// it's important to keep in mind that from the standpoint of you, a client of
176+// this library, all you care about is #1 and #3, and stb libraries DO NOT
177+// emphasize #3 above all.
178 //
179 // Some secondary priorities arise directly from the first two, some of which
180 // provide more explicit reasons why performance can't be emphasized.
181@@ -251,7 +250,8 @@ RECENT REVISION HISTORY:
182 // overhead.
183 //
184 // The three functions you must define are "read" (reads some bytes of data),
185-// "skip" (skips some bytes of data), "eof" (reports if the stream is at the end).
186+// "skip" (skips some bytes of data), "eof" (reports if the stream is at the
187+// end).
188 //
189 // ===========================================================================
190 //
191@@ -279,10 +279,11 @@ RECENT REVISION HISTORY:
192 // HDR image support (disable by defining STBI_NO_HDR)
193 //
194 // stb_image supports loading HDR images in general, and currently the Radiance
195-// .HDR file format specifically. You can still load any file through the existing
196-// interface; if you attempt to load an HDR file, it will be automatically remapped
197-// to LDR, assuming gamma 2.2 and an arbitrary scale factor defaulting to 1;
198-// both of these constants can be reconfigured through this interface:
199+// .HDR file format specifically. You can still load any file through the
200+// existing interface; if you attempt to load an HDR file, it will be
201+// automatically remapped to LDR, assuming gamma 2.2 and an arbitrary scale
202+// factor defaulting to 1; both of these constants can be reconfigured through
203+// this interface:
204 //
205 // stbi_hdr_to_ldr_gamma(2.2f);
206 // stbi_hdr_to_ldr_scale(1.0f);
207@@ -373,14 +374,13 @@ RECENT REVISION HISTORY:
208
209 #define STBI_VERSION 1
210
211-enum
212-{
213- STBI_default = 0, // only used for desired_channels
214+enum {
215+ STBI_default = 0, // only used for desired_channels
216
217- STBI_grey = 1,
218- STBI_grey_alpha = 2,
219- STBI_rgb = 3,
220- STBI_rgb_alpha = 4
221+ STBI_grey = 1,
222+ STBI_grey_alpha = 2,
223+ STBI_rgb = 3,
224+ STBI_rgb_alpha = 4
225 };
226
227 #include <stdlib.h>
228@@ -408,11 +408,13 @@ extern "C" {
229 // load image by filename, open file, or memory buffer
230 //
231
232-typedef struct
233-{
234- int (*read) (void *user,char *data,int size); // fill 'data' with 'size' bytes. return number of bytes actually read
235- void (*skip) (void *user,int n); // skip the next 'n' bytes, or 'unget' the last -n bytes if negative
236- int (*eof) (void *user); // returns nonzero if we are at end of file/data
237+typedef struct {
238+ int (*read)(void *user, char *data,
239+ int size); // fill 'data' with 'size' bytes. return number of
240+ // bytes actually read
241+ void (*skip)(void *user, int n); // skip the next 'n' bytes, or 'unget' the
242+ // last -n bytes if negative
243+ int (*eof)(void *user); // returns nonzero if we are at end of file/data
244 } stbi_io_callbacks;
245
246 ////////////////////////////////////
247@@ -420,21 +422,34 @@ typedef struct
248 // 8-bits-per-channel interface
249 //
250
251-STBIDEF stbi_uc *stbi_load_from_memory (stbi_uc const *buffer, int len , int *x, int *y, int *channels_in_file, int desired_channels);
252-STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk , void *user, int *x, int *y, int *channels_in_file, int desired_channels);
253+STBIDEF stbi_uc *
254+stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y,
255+ int *channels_in_file, int desired_channels);
256+STBIDEF stbi_uc *
257+stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x,
258+ int *y, int *channels_in_file, int desired_channels);
259
260 #ifndef STBI_NO_STDIO
261-STBIDEF stbi_uc *stbi_load (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
262-STBIDEF stbi_uc *stbi_load_from_file (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
263-// for stbi_load_from_file, file pointer is left pointing immediately after image
264+STBIDEF stbi_uc *
265+stbi_load(char const *filename, int *x, int *y, int *channels_in_file,
266+ int desired_channels);
267+STBIDEF stbi_uc *
268+stbi_load_from_file(FILE *f, int *x, int *y, int *channels_in_file,
269+ int desired_channels);
270+// for stbi_load_from_file, file pointer is left pointing immediately after
271+// image
272 #endif
273
274 #ifndef STBI_NO_GIF
275-STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp);
276+STBIDEF stbi_uc *
277+stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x,
278+ int *y, int *z, int *comp, int req_comp);
279 #endif
280
281 #ifdef STBI_WINDOWS_UTF8
282-STBIDEF int stbi_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input);
283+STBIDEF int
284+stbi_convert_wchar_to_utf8(char *buffer, size_t bufferlen,
285+ const wchar_t *input);
286 #endif
287
288 ////////////////////////////////////
289@@ -442,12 +457,21 @@ STBIDEF int stbi_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wch
290 // 16-bits-per-channel interface
291 //
292
293-STBIDEF stbi_us *stbi_load_16_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels);
294-STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels);
295+STBIDEF stbi_us *
296+stbi_load_16_from_memory(stbi_uc const *buffer, int len, int *x, int *y,
297+ int *channels_in_file, int desired_channels);
298+STBIDEF stbi_us *
299+stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x,
300+ int *y, int *channels_in_file,
301+ int desired_channels);
302
303 #ifndef STBI_NO_STDIO
304-STBIDEF stbi_us *stbi_load_16 (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
305-STBIDEF stbi_us *stbi_load_from_file_16(FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
306+STBIDEF stbi_us *
307+stbi_load_16(char const *filename, int *x, int *y, int *channels_in_file,
308+ int desired_channels);
309+STBIDEF stbi_us *
310+stbi_load_from_file_16(FILE *f, int *x, int *y, int *channels_in_file,
311+ int desired_channels);
312 #endif
313
314 ////////////////////////////////////
315@@ -455,85 +479,126 @@ STBIDEF stbi_us *stbi_load_from_file_16(FILE *f, int *x, int *y, int *channels_i
316 // float-per-channel interface
317 //
318 #ifndef STBI_NO_LINEAR
319- STBIDEF float *stbi_loadf_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels);
320- STBIDEF float *stbi_loadf_from_callbacks (stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels);
321+STBIDEF float *
322+stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y,
323+ int *channels_in_file, int desired_channels);
324+STBIDEF float *
325+stbi_loadf_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x,
326+ int *y, int *channels_in_file, int desired_channels);
327
328- #ifndef STBI_NO_STDIO
329- STBIDEF float *stbi_loadf (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
330- STBIDEF float *stbi_loadf_from_file (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
331- #endif
332+#ifndef STBI_NO_STDIO
333+STBIDEF float *
334+stbi_loadf(char const *filename, int *x, int *y, int *channels_in_file,
335+ int desired_channels);
336+STBIDEF float *
337+stbi_loadf_from_file(FILE *f, int *x, int *y, int *channels_in_file,
338+ int desired_channels);
339+#endif
340 #endif
341
342 #ifndef STBI_NO_HDR
343- STBIDEF void stbi_hdr_to_ldr_gamma(float gamma);
344- STBIDEF void stbi_hdr_to_ldr_scale(float scale);
345+STBIDEF void
346+stbi_hdr_to_ldr_gamma(float gamma);
347+STBIDEF void
348+stbi_hdr_to_ldr_scale(float scale);
349 #endif // STBI_NO_HDR
350
351 #ifndef STBI_NO_LINEAR
352- STBIDEF void stbi_ldr_to_hdr_gamma(float gamma);
353- STBIDEF void stbi_ldr_to_hdr_scale(float scale);
354+STBIDEF void
355+stbi_ldr_to_hdr_gamma(float gamma);
356+STBIDEF void
357+stbi_ldr_to_hdr_scale(float scale);
358 #endif // STBI_NO_LINEAR
359
360 // stbi_is_hdr is always defined, but always returns false if STBI_NO_HDR
361-STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user);
362-STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len);
363+STBIDEF int
364+stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user);
365+STBIDEF int
366+stbi_is_hdr_from_memory(stbi_uc const *buffer, int len);
367 #ifndef STBI_NO_STDIO
368-STBIDEF int stbi_is_hdr (char const *filename);
369-STBIDEF int stbi_is_hdr_from_file(FILE *f);
370+STBIDEF int
371+stbi_is_hdr(char const *filename);
372+STBIDEF int
373+stbi_is_hdr_from_file(FILE *f);
374 #endif // STBI_NO_STDIO
375
376-
377 // get a VERY brief reason for failure
378 // on most compilers (and ALL modern mainstream compilers) this is threadsafe
379-STBIDEF const char *stbi_failure_reason (void);
380+STBIDEF const char *
381+stbi_failure_reason(void);
382
383 // free the loaded image -- this is just free()
384-STBIDEF void stbi_image_free (void *retval_from_stbi_load);
385+STBIDEF void
386+stbi_image_free(void *retval_from_stbi_load);
387
388 // get image dimensions & components without fully decoding
389-STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp);
390-STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp);
391-STBIDEF int stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len);
392-STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *clbk, void *user);
393+STBIDEF int
394+stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y,
395+ int *comp);
396+STBIDEF int
397+stbi_info_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x,
398+ int *y, int *comp);
399+STBIDEF int
400+stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len);
401+STBIDEF int
402+stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *clbk, void *user);
403
404 #ifndef STBI_NO_STDIO
405-STBIDEF int stbi_info (char const *filename, int *x, int *y, int *comp);
406-STBIDEF int stbi_info_from_file (FILE *f, int *x, int *y, int *comp);
407-STBIDEF int stbi_is_16_bit (char const *filename);
408-STBIDEF int stbi_is_16_bit_from_file(FILE *f);
409+STBIDEF int
410+stbi_info(char const *filename, int *x, int *y, int *comp);
411+STBIDEF int
412+stbi_info_from_file(FILE *f, int *x, int *y, int *comp);
413+STBIDEF int
414+stbi_is_16_bit(char const *filename);
415+STBIDEF int
416+stbi_is_16_bit_from_file(FILE *f);
417 #endif
418
419-
420-
421 // for image formats that explicitly notate that they have premultiplied alpha,
422 // we just return the colors as stored in the file. set this flag to force
423 // unpremultiplication. results are undefined if the unpremultiply overflow.
424-STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply);
425+STBIDEF void
426+stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply);
427
428 // indicate whether we should process iphone images back to canonical format,
429 // or just pass them through "as-is"
430-STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert);
431-
432-// flip the image vertically, so the first pixel in the output array is the bottom left
433-STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip);
434-
435-// as above, but only applies to images loaded on the thread that calls the function
436-// this function is only available if your compiler supports thread-local variables;
437-// calling it will fail to link if your compiler doesn't
438-STBIDEF void stbi_set_unpremultiply_on_load_thread(int flag_true_if_should_unpremultiply);
439-STBIDEF void stbi_convert_iphone_png_to_rgb_thread(int flag_true_if_should_convert);
440-STBIDEF void stbi_set_flip_vertically_on_load_thread(int flag_true_if_should_flip);
441+STBIDEF void
442+stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert);
443+
444+// flip the image vertically, so the first pixel in the output array is the
445+// bottom left
446+STBIDEF void
447+stbi_set_flip_vertically_on_load(int flag_true_if_should_flip);
448+
449+// as above, but only applies to images loaded on the thread that calls the
450+// function this function is only available if your compiler supports
451+// thread-local variables; calling it will fail to link if your compiler doesn't
452+STBIDEF void
453+stbi_set_unpremultiply_on_load_thread(int flag_true_if_should_unpremultiply);
454+STBIDEF void
455+stbi_convert_iphone_png_to_rgb_thread(int flag_true_if_should_convert);
456+STBIDEF void
457+stbi_set_flip_vertically_on_load_thread(int flag_true_if_should_flip);
458
459 // ZLIB client - used by PNG, available for other purposes
460
461-STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen);
462-STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header);
463-STBIDEF char *stbi_zlib_decode_malloc(const char *buffer, int len, int *outlen);
464-STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);
465-
466-STBIDEF char *stbi_zlib_decode_noheader_malloc(const char *buffer, int len, int *outlen);
467-STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);
468-
469+STBIDEF char *
470+stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size,
471+ int *outlen);
472+STBIDEF char *
473+stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len,
474+ int initial_size, int *outlen,
475+ int parse_header);
476+STBIDEF char *
477+stbi_zlib_decode_malloc(const char *buffer, int len, int *outlen);
478+STBIDEF int
479+stbi_zlib_decode_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);
480+
481+STBIDEF char *
482+stbi_zlib_decode_noheader_malloc(const char *buffer, int len, int *outlen);
483+STBIDEF int
484+stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer,
485+ int ilen);
486
487 #ifdef __cplusplus
488 }
489@@ -546,52 +611,53 @@ STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const ch
490
491 #ifdef STB_IMAGE_IMPLEMENTATION
492
493-#if defined(STBI_ONLY_JPEG) || defined(STBI_ONLY_PNG) || defined(STBI_ONLY_BMP) \
494- || defined(STBI_ONLY_TGA) || defined(STBI_ONLY_GIF) || defined(STBI_ONLY_PSD) \
495- || defined(STBI_ONLY_HDR) || defined(STBI_ONLY_PIC) || defined(STBI_ONLY_PNM) \
496- || defined(STBI_ONLY_ZLIB)
497- #ifndef STBI_ONLY_JPEG
498- #define STBI_NO_JPEG
499- #endif
500- #ifndef STBI_ONLY_PNG
501- #define STBI_NO_PNG
502- #endif
503- #ifndef STBI_ONLY_BMP
504- #define STBI_NO_BMP
505- #endif
506- #ifndef STBI_ONLY_PSD
507- #define STBI_NO_PSD
508- #endif
509- #ifndef STBI_ONLY_TGA
510- #define STBI_NO_TGA
511- #endif
512- #ifndef STBI_ONLY_GIF
513- #define STBI_NO_GIF
514- #endif
515- #ifndef STBI_ONLY_HDR
516- #define STBI_NO_HDR
517- #endif
518- #ifndef STBI_ONLY_PIC
519- #define STBI_NO_PIC
520- #endif
521- #ifndef STBI_ONLY_PNM
522- #define STBI_NO_PNM
523- #endif
524-#endif
525-
526-#if defined(STBI_NO_PNG) && !defined(STBI_SUPPORT_ZLIB) && !defined(STBI_NO_ZLIB)
527-#define STBI_NO_ZLIB
528+#if defined(STBI_ONLY_JPEG) || defined(STBI_ONLY_PNG) || \
529+ defined(STBI_ONLY_BMP) || defined(STBI_ONLY_TGA) || \
530+ defined(STBI_ONLY_GIF) || defined(STBI_ONLY_PSD) || \
531+ defined(STBI_ONLY_HDR) || defined(STBI_ONLY_PIC) || \
532+ defined(STBI_ONLY_PNM) || defined(STBI_ONLY_ZLIB)
533+#ifndef STBI_ONLY_JPEG
534+#define STBI_NO_JPEG
535+#endif
536+#ifndef STBI_ONLY_PNG
537+#define STBI_NO_PNG
538+#endif
539+#ifndef STBI_ONLY_BMP
540+#define STBI_NO_BMP
541+#endif
542+#ifndef STBI_ONLY_PSD
543+#define STBI_NO_PSD
544+#endif
545+#ifndef STBI_ONLY_TGA
546+#define STBI_NO_TGA
547+#endif
548+#ifndef STBI_ONLY_GIF
549+#define STBI_NO_GIF
550+#endif
551+#ifndef STBI_ONLY_HDR
552+#define STBI_NO_HDR
553+#endif
554+#ifndef STBI_ONLY_PIC
555+#define STBI_NO_PIC
556+#endif
557+#ifndef STBI_ONLY_PNM
558+#define STBI_NO_PNM
559+#endif
560 #endif
561
562+#if defined(STBI_NO_PNG) && !defined(STBI_SUPPORT_ZLIB) && \
563+ !defined(STBI_NO_ZLIB)
564+#define STBI_NO_ZLIB
565+#endif
566
567+#include <limits.h>
568 #include <stdarg.h>
569 #include <stddef.h> // ptrdiff_t on osx
570 #include <stdlib.h>
571 #include <string.h>
572-#include <limits.h>
573
574 #if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
575-#include <math.h> // ldexp, pow
576+#include <math.h> // ldexp, pow
577 #endif
578
579 #ifndef STBI_NO_STDIO
580@@ -609,55 +675,55 @@ STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const ch
581 #define STBI_EXTERN extern
582 #endif
583
584-
585 #ifndef _MSC_VER
586- #ifdef __cplusplus
587- #define stbi_inline inline
588- #else
589- #define stbi_inline
590- #endif
591+#ifdef __cplusplus
592+#define stbi_inline inline
593+#else
594+#define stbi_inline
595+#endif
596 #else
597- #define stbi_inline __forceinline
598+#define stbi_inline __forceinline
599 #endif
600
601 #ifndef STBI_NO_THREAD_LOCALS
602- #if defined(__cplusplus) && __cplusplus >= 201103L
603- #define STBI_THREAD_LOCAL thread_local
604- #elif defined(__GNUC__) && __GNUC__ < 5
605- #define STBI_THREAD_LOCAL __thread
606- #elif defined(_MSC_VER)
607- #define STBI_THREAD_LOCAL __declspec(thread)
608- #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L && !defined(__STDC_NO_THREADS__)
609- #define STBI_THREAD_LOCAL _Thread_local
610- #endif
611-
612- #ifndef STBI_THREAD_LOCAL
613- #if defined(__GNUC__)
614- #define STBI_THREAD_LOCAL __thread
615- #endif
616- #endif
617+#if defined(__cplusplus) && __cplusplus >= 201103L
618+#define STBI_THREAD_LOCAL thread_local
619+#elif defined(__GNUC__) && __GNUC__ < 5
620+#define STBI_THREAD_LOCAL __thread
621+#elif defined(_MSC_VER)
622+#define STBI_THREAD_LOCAL __declspec(thread)
623+#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L && \
624+ !defined(__STDC_NO_THREADS__)
625+#define STBI_THREAD_LOCAL _Thread_local
626+#endif
627+
628+#ifndef STBI_THREAD_LOCAL
629+#if defined(__GNUC__)
630+#define STBI_THREAD_LOCAL __thread
631+#endif
632+#endif
633 #endif
634
635 #if defined(_MSC_VER) || defined(__SYMBIAN32__)
636 typedef unsigned short stbi__uint16;
637-typedef signed short stbi__int16;
638-typedef unsigned int stbi__uint32;
639-typedef signed int stbi__int32;
640+typedef signed short stbi__int16;
641+typedef unsigned int stbi__uint32;
642+typedef signed int stbi__int32;
643 #else
644 #include <stdint.h>
645 typedef uint16_t stbi__uint16;
646-typedef int16_t stbi__int16;
647+typedef int16_t stbi__int16;
648 typedef uint32_t stbi__uint32;
649-typedef int32_t stbi__int32;
650+typedef int32_t stbi__int32;
651 #endif
652
653 // should produce compiler error if size is wrong
654-typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1];
655+typedef unsigned char validate_uint32[sizeof(stbi__uint32) == 4 ? 1 : -1];
656
657 #ifdef _MSC_VER
658-#define STBI_NOTUSED(v) (void)(v)
659+#define STBI_NOTUSED(v) (void)(v)
660 #else
661-#define STBI_NOTUSED(v) (void)sizeof(v)
662+#define STBI_NOTUSED(v) (void)sizeof(v)
663 #endif
664
665 #ifdef _MSC_VER
666@@ -665,27 +731,30 @@ typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1];
667 #endif
668
669 #ifdef STBI_HAS_LROTL
670- #define stbi_lrot(x,y) _lrotl(x,y)
671+#define stbi_lrot(x, y) _lrotl(x, y)
672 #else
673- #define stbi_lrot(x,y) (((x) << (y)) | ((x) >> (-(y) & 31)))
674+#define stbi_lrot(x, y) (((x) << (y)) | ((x) >> (-(y) & 31)))
675 #endif
676
677-#if defined(STBI_MALLOC) && defined(STBI_FREE) && (defined(STBI_REALLOC) || defined(STBI_REALLOC_SIZED))
678+#if defined(STBI_MALLOC) && defined(STBI_FREE) && \
679+ (defined(STBI_REALLOC) || defined(STBI_REALLOC_SIZED))
680 // ok
681-#elif !defined(STBI_MALLOC) && !defined(STBI_FREE) && !defined(STBI_REALLOC) && !defined(STBI_REALLOC_SIZED)
682+#elif !defined(STBI_MALLOC) && !defined(STBI_FREE) && \
683+ !defined(STBI_REALLOC) && !defined(STBI_REALLOC_SIZED)
684 // ok
685 #else
686-#error "Must define all or none of STBI_MALLOC, STBI_FREE, and STBI_REALLOC (or STBI_REALLOC_SIZED)."
687+#error \
688+ "Must define all or none of STBI_MALLOC, STBI_FREE, and STBI_REALLOC (or STBI_REALLOC_SIZED)."
689 #endif
690
691 #ifndef STBI_MALLOC
692-#define STBI_MALLOC(sz) malloc(sz)
693-#define STBI_REALLOC(p,newsz) realloc(p,newsz)
694-#define STBI_FREE(p) free(p)
695+#define STBI_MALLOC(sz) malloc(sz)
696+#define STBI_REALLOC(p, newsz) realloc(p, newsz)
697+#define STBI_FREE(p) free(p)
698 #endif
699
700 #ifndef STBI_REALLOC_SIZED
701-#define STBI_REALLOC_SIZED(p,oldsz,newsz) STBI_REALLOC(p,newsz)
702+#define STBI_REALLOC_SIZED(p, oldsz, newsz) STBI_REALLOC(p, newsz)
703 #endif
704
705 // x86/x64 detection
706@@ -695,7 +764,8 @@ typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1];
707 #define STBI__X86_TARGET
708 #endif
709
710-#if defined(__GNUC__) && defined(STBI__X86_TARGET) && !defined(__SSE2__) && !defined(STBI_NO_SIMD)
711+#if defined(__GNUC__) && defined(STBI__X86_TARGET) && !defined(__SSE2__) && \
712+ !defined(STBI_NO_SIMD)
713 // gcc doesn't support sse2 intrinsics unless you compile with -msse2,
714 // which in turn means it gets to use SSE2 everywhere. This is unfortunate,
715 // but previous attempts to provide the SSE2 functions with runtime
716@@ -706,8 +776,10 @@ typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1];
717 #define STBI_NO_SIMD
718 #endif
719
720-#if defined(__MINGW32__) && defined(STBI__X86_TARGET) && !defined(STBI_MINGW_ENABLE_SSE2) && !defined(STBI_NO_SIMD)
721-// Note that __MINGW32__ doesn't actually mean 32-bit, so we have to avoid STBI__X64_TARGET
722+#if defined(__MINGW32__) && defined(STBI__X86_TARGET) && \
723+ !defined(STBI_MINGW_ENABLE_SSE2) && !defined(STBI_NO_SIMD)
724+// Note that __MINGW32__ doesn't actually mean 32-bit, so we have to avoid
725+// STBI__X64_TARGET
726 //
727 // 32-bit MinGW wants ESP to be 16-byte aligned, but this is not in the
728 // Windows ABI and VC++ as well as Windows DLLs don't maintain that invariant.
729@@ -717,44 +789,49 @@ typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1];
730 // See https://github.com/nothings/stb/issues/81 for more information.
731 //
732 // So default to no SSE2 on 32-bit MinGW. If you've read this far and added
733-// -mstackrealign to your build settings, feel free to #define STBI_MINGW_ENABLE_SSE2.
734+// -mstackrealign to your build settings, feel free to #define
735+// STBI_MINGW_ENABLE_SSE2.
736 #define STBI_NO_SIMD
737 #endif
738
739-#if !defined(STBI_NO_SIMD) && (defined(STBI__X86_TARGET) || defined(STBI__X64_TARGET))
740+#if !defined(STBI_NO_SIMD) && \
741+ (defined(STBI__X86_TARGET) || defined(STBI__X64_TARGET))
742 #define STBI_SSE2
743 #include <emmintrin.h>
744
745 #ifdef _MSC_VER
746
747-#if _MSC_VER >= 1400 // not VC6
748-#include <intrin.h> // __cpuid
749-static int stbi__cpuid3(void)
750+#if _MSC_VER >= 1400 // not VC6
751+#include <intrin.h> // __cpuid
752+static int
753+stbi__cpuid3(void)
754 {
755- int info[4];
756- __cpuid(info,1);
757- return info[3];
758+ int info[4];
759+ __cpuid(info, 1);
760+ return info[3];
761 }
762 #else
763-static int stbi__cpuid3(void)
764+static int
765+stbi__cpuid3(void)
766 {
767- int res;
768- __asm {
769+ int res;
770+ __asm {
771 mov eax,1
772 cpuid
773 mov res,edx
774- }
775- return res;
776+ }
777+ return res;
778 }
779 #endif
780
781 #define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name
782
783 #if !defined(STBI_NO_JPEG) && defined(STBI_SSE2)
784-static int stbi__sse2_available(void)
785+static int
786+stbi__sse2_available(void)
787 {
788- int info3 = stbi__cpuid3();
789- return ((info3 >> 26) & 1) != 0;
790+ int info3 = stbi__cpuid3();
791+ return ((info3 >> 26) & 1) != 0;
792 }
793 #endif
794
795@@ -762,12 +839,13 @@ static int stbi__sse2_available(void)
796 #define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
797
798 #if !defined(STBI_NO_JPEG) && defined(STBI_SSE2)
799-static int stbi__sse2_available(void)
800+static int
801+stbi__sse2_available(void)
802 {
803- // If we're even attempting to compile this on GCC/Clang, that means
804- // -msse2 is on, which means the compiler is allowed to use SSE2
805- // instructions at will, and so are we.
806- return 1;
807+ // If we're even attempting to compile this on GCC/Clang, that means
808+ // -msse2 is on, which means the compiler is allowed to use SSE2
809+ // instructions at will, and so are we.
810+ return 1;
811 }
812 #endif
813
814@@ -802,189 +880,234 @@ static int stbi__sse2_available(void)
815
816 // stbi__context structure is our basic context used by all images, so it
817 // contains all the IO context, plus some basic image information
818-typedef struct
819-{
820- stbi__uint32 img_x, img_y;
821- int img_n, img_out_n;
822+typedef struct {
823+ stbi__uint32 img_x, img_y;
824+ int img_n, img_out_n;
825
826- stbi_io_callbacks io;
827- void *io_user_data;
828+ stbi_io_callbacks io;
829+ void *io_user_data;
830
831- int read_from_callbacks;
832- int buflen;
833- stbi_uc buffer_start[128];
834- int callback_already_read;
835+ int read_from_callbacks;
836+ int buflen;
837+ stbi_uc buffer_start[128];
838+ int callback_already_read;
839
840- stbi_uc *img_buffer, *img_buffer_end;
841- stbi_uc *img_buffer_original, *img_buffer_original_end;
842+ stbi_uc *img_buffer, *img_buffer_end;
843+ stbi_uc *img_buffer_original, *img_buffer_original_end;
844 } stbi__context;
845
846-
847-static void stbi__refill_buffer(stbi__context *s);
848+static void
849+stbi__refill_buffer(stbi__context *s);
850
851 // initialize a memory-decode context
852-static void stbi__start_mem(stbi__context *s, stbi_uc const *buffer, int len)
853+static void
854+stbi__start_mem(stbi__context *s, stbi_uc const *buffer, int len)
855 {
856- s->io.read = NULL;
857- s->read_from_callbacks = 0;
858- s->callback_already_read = 0;
859- s->img_buffer = s->img_buffer_original = (stbi_uc *) buffer;
860- s->img_buffer_end = s->img_buffer_original_end = (stbi_uc *) buffer+len;
861+ s->io.read = NULL;
862+ s->read_from_callbacks = 0;
863+ s->callback_already_read = 0;
864+ s->img_buffer = s->img_buffer_original = (stbi_uc *)buffer;
865+ s->img_buffer_end = s->img_buffer_original_end = (stbi_uc *)buffer + len;
866 }
867
868 // initialize a callback-based context
869-static void stbi__start_callbacks(stbi__context *s, stbi_io_callbacks *c, void *user)
870+static void
871+stbi__start_callbacks(stbi__context *s, stbi_io_callbacks *c, void *user)
872 {
873- s->io = *c;
874- s->io_user_data = user;
875- s->buflen = sizeof(s->buffer_start);
876- s->read_from_callbacks = 1;
877- s->callback_already_read = 0;
878- s->img_buffer = s->img_buffer_original = s->buffer_start;
879- stbi__refill_buffer(s);
880- s->img_buffer_original_end = s->img_buffer_end;
881+ s->io = *c;
882+ s->io_user_data = user;
883+ s->buflen = sizeof(s->buffer_start);
884+ s->read_from_callbacks = 1;
885+ s->callback_already_read = 0;
886+ s->img_buffer = s->img_buffer_original = s->buffer_start;
887+ stbi__refill_buffer(s);
888+ s->img_buffer_original_end = s->img_buffer_end;
889 }
890
891 #ifndef STBI_NO_STDIO
892
893-static int stbi__stdio_read(void *user, char *data, int size)
894+static int
895+stbi__stdio_read(void *user, char *data, int size)
896 {
897- return (int) fread(data,1,size,(FILE*) user);
898+ return (int)fread(data, 1, size, (FILE *)user);
899 }
900
901-static void stbi__stdio_skip(void *user, int n)
902+static void
903+stbi__stdio_skip(void *user, int n)
904 {
905- int ch;
906- fseek((FILE*) user, n, SEEK_CUR);
907- ch = fgetc((FILE*) user); /* have to read a byte to reset feof()'s flag */
908- if (ch != EOF) {
909- ungetc(ch, (FILE *) user); /* push byte back onto stream if valid. */
910- }
911+ int ch;
912+ fseek((FILE *)user, n, SEEK_CUR);
913+ ch = fgetc((FILE *)user); /* have to read a byte to reset feof()'s flag */
914+ if (ch != EOF) {
915+ ungetc(ch, (FILE *)user); /* push byte back onto stream if valid. */
916+ }
917 }
918
919-static int stbi__stdio_eof(void *user)
920+static int
921+stbi__stdio_eof(void *user)
922 {
923- return feof((FILE*) user) || ferror((FILE *) user);
924+ return feof((FILE *)user) || ferror((FILE *)user);
925 }
926
927-static stbi_io_callbacks stbi__stdio_callbacks =
928-{
929- stbi__stdio_read,
930- stbi__stdio_skip,
931- stbi__stdio_eof,
932+static stbi_io_callbacks stbi__stdio_callbacks = {
933+ stbi__stdio_read,
934+ stbi__stdio_skip,
935+ stbi__stdio_eof,
936 };
937
938-static void stbi__start_file(stbi__context *s, FILE *f)
939+static void
940+stbi__start_file(stbi__context *s, FILE *f)
941 {
942- stbi__start_callbacks(s, &stbi__stdio_callbacks, (void *) f);
943+ stbi__start_callbacks(s, &stbi__stdio_callbacks, (void *)f);
944 }
945
946-//static void stop_file(stbi__context *s) { }
947+// static void stop_file(stbi__context *s) { }
948
949 #endif // !STBI_NO_STDIO
950
951-static void stbi__rewind(stbi__context *s)
952+static void
953+stbi__rewind(stbi__context *s)
954 {
955- // conceptually rewind SHOULD rewind to the beginning of the stream,
956- // but we just rewind to the beginning of the initial buffer, because
957- // we only use it after doing 'test', which only ever looks at at most 92 bytes
958- s->img_buffer = s->img_buffer_original;
959- s->img_buffer_end = s->img_buffer_original_end;
960+ // conceptually rewind SHOULD rewind to the beginning of the stream,
961+ // but we just rewind to the beginning of the initial buffer, because
962+ // we only use it after doing 'test', which only ever looks at at most 92
963+ // bytes
964+ s->img_buffer = s->img_buffer_original;
965+ s->img_buffer_end = s->img_buffer_original_end;
966 }
967
968-enum
969-{
970- STBI_ORDER_RGB,
971- STBI_ORDER_BGR
972-};
973+enum { STBI_ORDER_RGB, STBI_ORDER_BGR };
974
975-typedef struct
976-{
977- int bits_per_channel;
978- int num_channels;
979- int channel_order;
980+typedef struct {
981+ int bits_per_channel;
982+ int num_channels;
983+ int channel_order;
984 } stbi__result_info;
985
986 #ifndef STBI_NO_JPEG
987-static int stbi__jpeg_test(stbi__context *s);
988-static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
989-static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp);
990+static int
991+stbi__jpeg_test(stbi__context *s);
992+static void *
993+stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp,
994+ stbi__result_info *ri);
995+static int
996+stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp);
997 #endif
998
999 #ifndef STBI_NO_PNG
1000-static int stbi__png_test(stbi__context *s);
1001-static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
1002-static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp);
1003-static int stbi__png_is16(stbi__context *s);
1004+static int
1005+stbi__png_test(stbi__context *s);
1006+static void *
1007+stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp,
1008+ stbi__result_info *ri);
1009+static int
1010+stbi__png_info(stbi__context *s, int *x, int *y, int *comp);
1011+static int
1012+stbi__png_is16(stbi__context *s);
1013 #endif
1014
1015 #ifndef STBI_NO_BMP
1016-static int stbi__bmp_test(stbi__context *s);
1017-static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
1018-static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp);
1019+static int
1020+stbi__bmp_test(stbi__context *s);
1021+static void *
1022+stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp,
1023+ stbi__result_info *ri);
1024+static int
1025+stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp);
1026 #endif
1027
1028 #ifndef STBI_NO_TGA
1029-static int stbi__tga_test(stbi__context *s);
1030-static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
1031-static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp);
1032+static int
1033+stbi__tga_test(stbi__context *s);
1034+static void *
1035+stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp,
1036+ stbi__result_info *ri);
1037+static int
1038+stbi__tga_info(stbi__context *s, int *x, int *y, int *comp);
1039 #endif
1040
1041 #ifndef STBI_NO_PSD
1042-static int stbi__psd_test(stbi__context *s);
1043-static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc);
1044-static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp);
1045-static int stbi__psd_is16(stbi__context *s);
1046+static int
1047+stbi__psd_test(stbi__context *s);
1048+static void *
1049+stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp,
1050+ stbi__result_info *ri, int bpc);
1051+static int
1052+stbi__psd_info(stbi__context *s, int *x, int *y, int *comp);
1053+static int
1054+stbi__psd_is16(stbi__context *s);
1055 #endif
1056
1057 #ifndef STBI_NO_HDR
1058-static int stbi__hdr_test(stbi__context *s);
1059-static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
1060-static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp);
1061+static int
1062+stbi__hdr_test(stbi__context *s);
1063+static float *
1064+stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp,
1065+ stbi__result_info *ri);
1066+static int
1067+stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp);
1068 #endif
1069
1070 #ifndef STBI_NO_PIC
1071-static int stbi__pic_test(stbi__context *s);
1072-static void *stbi__pic_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
1073-static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp);
1074+static int
1075+stbi__pic_test(stbi__context *s);
1076+static void *
1077+stbi__pic_load(stbi__context *s, int *x, int *y, int *comp, int req_comp,
1078+ stbi__result_info *ri);
1079+static int
1080+stbi__pic_info(stbi__context *s, int *x, int *y, int *comp);
1081 #endif
1082
1083 #ifndef STBI_NO_GIF
1084-static int stbi__gif_test(stbi__context *s);
1085-static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
1086-static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp);
1087-static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp);
1088+static int
1089+stbi__gif_test(stbi__context *s);
1090+static void *
1091+stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp,
1092+ stbi__result_info *ri);
1093+static void *
1094+stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z,
1095+ int *comp, int req_comp);
1096+static int
1097+stbi__gif_info(stbi__context *s, int *x, int *y, int *comp);
1098 #endif
1099
1100 #ifndef STBI_NO_PNM
1101-static int stbi__pnm_test(stbi__context *s);
1102-static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
1103-static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp);
1104-static int stbi__pnm_is16(stbi__context *s);
1105+static int
1106+stbi__pnm_test(stbi__context *s);
1107+static void *
1108+stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp,
1109+ stbi__result_info *ri);
1110+static int
1111+stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp);
1112+static int
1113+stbi__pnm_is16(stbi__context *s);
1114 #endif
1115
1116 static
1117 #ifdef STBI_THREAD_LOCAL
1118-STBI_THREAD_LOCAL
1119+ STBI_THREAD_LOCAL
1120 #endif
1121-const char *stbi__g_failure_reason;
1122+ const char *stbi__g_failure_reason;
1123
1124-STBIDEF const char *stbi_failure_reason(void)
1125+STBIDEF const char *
1126+stbi_failure_reason(void)
1127 {
1128- return stbi__g_failure_reason;
1129+ return stbi__g_failure_reason;
1130 }
1131
1132 #ifndef STBI_NO_FAILURE_STRINGS
1133-static int stbi__err(const char *str)
1134+static int
1135+stbi__err(const char *str)
1136 {
1137- stbi__g_failure_reason = str;
1138- return 0;
1139+ stbi__g_failure_reason = str;
1140+ return 0;
1141 }
1142 #endif
1143
1144-static void *stbi__malloc(size_t size)
1145+static void *
1146+stbi__malloc(size_t size)
1147 {
1148- return STBI_MALLOC(size);
1149+ return STBI_MALLOC(size);
1150 }
1151
1152 // stb_image uses ints pervasively, including for offset calculations.
1153@@ -999,88 +1122,128 @@ static void *stbi__malloc(size_t size)
1154
1155 // return 1 if the sum is valid, 0 on overflow.
1156 // negative terms are considered invalid.
1157-static int stbi__addsizes_valid(int a, int b)
1158+static int
1159+stbi__addsizes_valid(int a, int b)
1160 {
1161- if (b < 0) return 0;
1162- // now 0 <= b <= INT_MAX, hence also
1163- // 0 <= INT_MAX - b <= INTMAX.
1164- // And "a + b <= INT_MAX" (which might overflow) is the
1165- // same as a <= INT_MAX - b (no overflow)
1166- return a <= INT_MAX - b;
1167+ if (b < 0) {
1168+ return 0;
1169+ }
1170+ // now 0 <= b <= INT_MAX, hence also
1171+ // 0 <= INT_MAX - b <= INTMAX.
1172+ // And "a + b <= INT_MAX" (which might overflow) is the
1173+ // same as a <= INT_MAX - b (no overflow)
1174+ return a <= INT_MAX - b;
1175 }
1176
1177 // returns 1 if the product is valid, 0 on overflow.
1178 // negative factors are considered invalid.
1179-static int stbi__mul2sizes_valid(int a, int b)
1180+static int
1181+stbi__mul2sizes_valid(int a, int b)
1182 {
1183- if (a < 0 || b < 0) return 0;
1184- if (b == 0) return 1; // mul-by-0 is always safe
1185- // portable way to check for no overflows in a*b
1186- return a <= INT_MAX/b;
1187+ if (a < 0 || b < 0) {
1188+ return 0;
1189+ }
1190+ if (b == 0) {
1191+ return 1; // mul-by-0 is always safe
1192+ }
1193+ // portable way to check for no overflows in a*b
1194+ return a <= INT_MAX / b;
1195 }
1196
1197-#if !defined(STBI_NO_JPEG) || !defined(STBI_NO_PNG) || !defined(STBI_NO_TGA) || !defined(STBI_NO_HDR)
1198+#if !defined(STBI_NO_JPEG) || !defined(STBI_NO_PNG) || \
1199+ !defined(STBI_NO_TGA) || !defined(STBI_NO_HDR)
1200 // returns 1 if "a*b + add" has no negative terms/factors and doesn't overflow
1201-static int stbi__mad2sizes_valid(int a, int b, int add)
1202+static int
1203+stbi__mad2sizes_valid(int a, int b, int add)
1204 {
1205- return stbi__mul2sizes_valid(a, b) && stbi__addsizes_valid(a*b, add);
1206+ return stbi__mul2sizes_valid(a, b) && stbi__addsizes_valid(a * b, add);
1207 }
1208 #endif
1209
1210 // returns 1 if "a*b*c + add" has no negative terms/factors and doesn't overflow
1211-static int stbi__mad3sizes_valid(int a, int b, int c, int add)
1212+static int
1213+stbi__mad3sizes_valid(int a, int b, int c, int add)
1214 {
1215- return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) &&
1216- stbi__addsizes_valid(a*b*c, add);
1217+ return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a * b, c) &&
1218+ stbi__addsizes_valid(a * b * c, add);
1219 }
1220
1221-// returns 1 if "a*b*c*d + add" has no negative terms/factors and doesn't overflow
1222+// returns 1 if "a*b*c*d + add" has no negative terms/factors and doesn't
1223+// overflow
1224 #if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) || !defined(STBI_NO_PNM)
1225-static int stbi__mad4sizes_valid(int a, int b, int c, int d, int add)
1226+static int
1227+stbi__mad4sizes_valid(int a, int b, int c, int d, int add)
1228 {
1229- return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) &&
1230- stbi__mul2sizes_valid(a*b*c, d) && stbi__addsizes_valid(a*b*c*d, add);
1231+ return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a * b, c) &&
1232+ stbi__mul2sizes_valid(a * b * c, d) &&
1233+ stbi__addsizes_valid(a * b * c * d, add);
1234 }
1235 #endif
1236
1237-#if !defined(STBI_NO_JPEG) || !defined(STBI_NO_PNG) || !defined(STBI_NO_TGA) || !defined(STBI_NO_HDR)
1238+#if !defined(STBI_NO_JPEG) || !defined(STBI_NO_PNG) || \
1239+ !defined(STBI_NO_TGA) || !defined(STBI_NO_HDR)
1240 // mallocs with size overflow checking
1241-static void *stbi__malloc_mad2(int a, int b, int add)
1242+static void *
1243+stbi__malloc_mad2(int a, int b, int add)
1244 {
1245- if (!stbi__mad2sizes_valid(a, b, add)) return NULL;
1246- return stbi__malloc(a*b + add);
1247+ if (!stbi__mad2sizes_valid(a, b, add)) {
1248+ return NULL;
1249+ }
1250+ return stbi__malloc(a * b + add);
1251 }
1252 #endif
1253
1254-static void *stbi__malloc_mad3(int a, int b, int c, int add)
1255+static void *
1256+stbi__malloc_mad3(int a, int b, int c, int add)
1257 {
1258- if (!stbi__mad3sizes_valid(a, b, c, add)) return NULL;
1259- return stbi__malloc(a*b*c + add);
1260+ if (!stbi__mad3sizes_valid(a, b, c, add)) {
1261+ return NULL;
1262+ }
1263+ return stbi__malloc(a * b * c + add);
1264 }
1265
1266 #if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) || !defined(STBI_NO_PNM)
1267-static void *stbi__malloc_mad4(int a, int b, int c, int d, int add)
1268+static void *
1269+stbi__malloc_mad4(int a, int b, int c, int d, int add)
1270 {
1271- if (!stbi__mad4sizes_valid(a, b, c, d, add)) return NULL;
1272- return stbi__malloc(a*b*c*d + add);
1273+ if (!stbi__mad4sizes_valid(a, b, c, d, add)) {
1274+ return NULL;
1275+ }
1276+ return stbi__malloc(a * b * c * d + add);
1277 }
1278 #endif
1279
1280-// returns 1 if the sum of two signed ints is valid (between -2^31 and 2^31-1 inclusive), 0 on overflow.
1281-static int stbi__addints_valid(int a, int b)
1282+// returns 1 if the sum of two signed ints is valid (between -2^31 and 2^31-1
1283+// inclusive), 0 on overflow.
1284+static int
1285+stbi__addints_valid(int a, int b)
1286 {
1287- if ((a >= 0) != (b >= 0)) return 1; // a and b have different signs, so no overflow
1288- if (a < 0 && b < 0) return a >= INT_MIN - b; // same as a + b >= INT_MIN; INT_MIN - b cannot overflow since b < 0.
1289- return a <= INT_MAX - b;
1290+ if ((a >= 0) != (b >= 0)) {
1291+ return 1; // a and b have different signs, so no overflow
1292+ }
1293+ if (a < 0 && b < 0) {
1294+ return a >= INT_MIN - b; // same as a + b >= INT_MIN; INT_MIN - b cannot
1295+ // overflow since b < 0.
1296+ }
1297+ return a <= INT_MAX - b;
1298 }
1299
1300 // returns 1 if the product of two ints fits in a signed short, 0 on overflow.
1301-static int stbi__mul2shorts_valid(int a, int b)
1302-{
1303- if (b == 0 || b == -1) return 1; // multiplication by 0 is always 0; check for -1 so SHRT_MIN/b doesn't overflow
1304- if ((a >= 0) == (b >= 0)) return a <= SHRT_MAX/b; // product is positive, so similar to mul2sizes_valid
1305- if (b < 0) return a <= SHRT_MIN / b; // same as a * b >= SHRT_MIN
1306- return a >= SHRT_MIN / b;
1307+static int
1308+stbi__mul2shorts_valid(int a, int b)
1309+{
1310+ if (b == 0 || b == -1) {
1311+ return 1; // multiplication by 0 is always 0; check for -1 so SHRT_MIN/b
1312+ // doesn't overflow
1313+ }
1314+ if ((a >= 0) == (b >= 0)) {
1315+ return a <= SHRT_MAX /
1316+ b; // product is positive, so similar to mul2sizes_valid
1317+ }
1318+ if (b < 0) {
1319+ return a <= SHRT_MIN / b; // same as a * b >= SHRT_MIN
1320+ }
1321+ return a >= SHRT_MIN / b;
1322 }
1323
1324 // stbi__err - error
1325@@ -1088,423 +1251,524 @@ static int stbi__mul2shorts_valid(int a, int b)
1326 // stbi__errpuc - error returning pointer to unsigned char
1327
1328 #ifdef STBI_NO_FAILURE_STRINGS
1329- #define stbi__err(x,y) 0
1330+#define stbi__err(x, y) 0
1331 #elif defined(STBI_FAILURE_USERMSG)
1332- #define stbi__err(x,y) stbi__err(y)
1333+#define stbi__err(x, y) stbi__err(y)
1334 #else
1335- #define stbi__err(x,y) stbi__err(x)
1336+#define stbi__err(x, y) stbi__err(x)
1337 #endif
1338
1339-#define stbi__errpf(x,y) ((float *)(size_t) (stbi__err(x,y)?NULL:NULL))
1340-#define stbi__errpuc(x,y) ((unsigned char *)(size_t) (stbi__err(x,y)?NULL:NULL))
1341+#define stbi__errpf(x, y) ((float *)(size_t)(stbi__err(x, y) ? NULL : NULL))
1342+#define stbi__errpuc(x, y) \
1343+ ((unsigned char *)(size_t)(stbi__err(x, y) ? NULL : NULL))
1344
1345-STBIDEF void stbi_image_free(void *retval_from_stbi_load)
1346+STBIDEF void
1347+stbi_image_free(void *retval_from_stbi_load)
1348 {
1349- STBI_FREE(retval_from_stbi_load);
1350+ STBI_FREE(retval_from_stbi_load);
1351 }
1352
1353 #ifndef STBI_NO_LINEAR
1354-static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp);
1355+static float *
1356+stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp);
1357 #endif
1358
1359 #ifndef STBI_NO_HDR
1360-static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp);
1361+static stbi_uc *
1362+stbi__hdr_to_ldr(float *data, int x, int y, int comp);
1363 #endif
1364
1365 static int stbi__vertically_flip_on_load_global = 0;
1366
1367-STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip)
1368+STBIDEF void
1369+stbi_set_flip_vertically_on_load(int flag_true_if_should_flip)
1370 {
1371- stbi__vertically_flip_on_load_global = flag_true_if_should_flip;
1372+ stbi__vertically_flip_on_load_global = flag_true_if_should_flip;
1373 }
1374
1375 #ifndef STBI_THREAD_LOCAL
1376-#define stbi__vertically_flip_on_load stbi__vertically_flip_on_load_global
1377+#define stbi__vertically_flip_on_load stbi__vertically_flip_on_load_global
1378 #else
1379-static STBI_THREAD_LOCAL int stbi__vertically_flip_on_load_local, stbi__vertically_flip_on_load_set;
1380+static STBI_THREAD_LOCAL int stbi__vertically_flip_on_load_local,
1381+ stbi__vertically_flip_on_load_set;
1382
1383-STBIDEF void stbi_set_flip_vertically_on_load_thread(int flag_true_if_should_flip)
1384+STBIDEF void
1385+stbi_set_flip_vertically_on_load_thread(int flag_true_if_should_flip)
1386 {
1387- stbi__vertically_flip_on_load_local = flag_true_if_should_flip;
1388- stbi__vertically_flip_on_load_set = 1;
1389+ stbi__vertically_flip_on_load_local = flag_true_if_should_flip;
1390+ stbi__vertically_flip_on_load_set = 1;
1391 }
1392
1393-#define stbi__vertically_flip_on_load (stbi__vertically_flip_on_load_set \
1394- ? stbi__vertically_flip_on_load_local \
1395- : stbi__vertically_flip_on_load_global)
1396+#define stbi__vertically_flip_on_load \
1397+ (stbi__vertically_flip_on_load_set ? stbi__vertically_flip_on_load_local \
1398+ : stbi__vertically_flip_on_load_global)
1399 #endif // STBI_THREAD_LOCAL
1400
1401-static void *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc)
1402-{
1403- memset(ri, 0, sizeof(*ri)); // make sure it's initialized if we add new fields
1404- ri->bits_per_channel = 8; // default is 8 so most paths don't have to be changed
1405- ri->channel_order = STBI_ORDER_RGB; // all current input & output are this, but this is here so we can add BGR order
1406- ri->num_channels = 0;
1407-
1408- // test the formats with a very explicit header first (at least a FOURCC
1409- // or distinctive magic number first)
1410- #ifndef STBI_NO_PNG
1411- if (stbi__png_test(s)) return stbi__png_load(s,x,y,comp,req_comp, ri);
1412- #endif
1413- #ifndef STBI_NO_BMP
1414- if (stbi__bmp_test(s)) return stbi__bmp_load(s,x,y,comp,req_comp, ri);
1415- #endif
1416- #ifndef STBI_NO_GIF
1417- if (stbi__gif_test(s)) return stbi__gif_load(s,x,y,comp,req_comp, ri);
1418- #endif
1419- #ifndef STBI_NO_PSD
1420- if (stbi__psd_test(s)) return stbi__psd_load(s,x,y,comp,req_comp, ri, bpc);
1421- #else
1422- STBI_NOTUSED(bpc);
1423- #endif
1424- #ifndef STBI_NO_PIC
1425- if (stbi__pic_test(s)) return stbi__pic_load(s,x,y,comp,req_comp, ri);
1426- #endif
1427-
1428- // then the formats that can end up attempting to load with just 1 or 2
1429- // bytes matching expectations; these are prone to false positives, so
1430- // try them later
1431- #ifndef STBI_NO_JPEG
1432- if (stbi__jpeg_test(s)) return stbi__jpeg_load(s,x,y,comp,req_comp, ri);
1433- #endif
1434- #ifndef STBI_NO_PNM
1435- if (stbi__pnm_test(s)) return stbi__pnm_load(s,x,y,comp,req_comp, ri);
1436- #endif
1437-
1438- #ifndef STBI_NO_HDR
1439- if (stbi__hdr_test(s)) {
1440- float *hdr = stbi__hdr_load(s, x,y,comp,req_comp, ri);
1441- return stbi__hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp);
1442- }
1443- #endif
1444-
1445- #ifndef STBI_NO_TGA
1446- // test tga last because it's a crappy test!
1447- if (stbi__tga_test(s))
1448- return stbi__tga_load(s,x,y,comp,req_comp, ri);
1449- #endif
1450-
1451- return stbi__errpuc("unknown image type", "Image not of any known type, or corrupt");
1452-}
1453-
1454-static stbi_uc *stbi__convert_16_to_8(stbi__uint16 *orig, int w, int h, int channels)
1455-{
1456- int i;
1457- int img_len = w * h * channels;
1458- stbi_uc *reduced;
1459-
1460- reduced = (stbi_uc *) stbi__malloc(img_len);
1461- if (reduced == NULL) return stbi__errpuc("outofmem", "Out of memory");
1462-
1463- for (i = 0; i < img_len; ++i)
1464- reduced[i] = (stbi_uc)((orig[i] >> 8) & 0xFF); // top half of each byte is sufficient approx of 16->8 bit scaling
1465-
1466- STBI_FREE(orig);
1467- return reduced;
1468-}
1469-
1470-static stbi__uint16 *stbi__convert_8_to_16(stbi_uc *orig, int w, int h, int channels)
1471-{
1472- int i;
1473- int img_len = w * h * channels;
1474- stbi__uint16 *enlarged;
1475-
1476- enlarged = (stbi__uint16 *) stbi__malloc(img_len*2);
1477- if (enlarged == NULL) return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory");
1478-
1479- for (i = 0; i < img_len; ++i)
1480- enlarged[i] = (stbi__uint16)((orig[i] << 8) + orig[i]); // replicate to high and low byte, maps 0->0, 255->0xffff
1481-
1482- STBI_FREE(orig);
1483- return enlarged;
1484-}
1485-
1486-static void stbi__vertical_flip(void *image, int w, int h, int bytes_per_pixel)
1487-{
1488- int row;
1489- size_t bytes_per_row = (size_t)w * bytes_per_pixel;
1490- stbi_uc temp[2048];
1491- stbi_uc *bytes = (stbi_uc *)image;
1492-
1493- for (row = 0; row < (h>>1); row++) {
1494- stbi_uc *row0 = bytes + row*bytes_per_row;
1495- stbi_uc *row1 = bytes + (h - row - 1)*bytes_per_row;
1496- // swap row0 with row1
1497- size_t bytes_left = bytes_per_row;
1498- while (bytes_left) {
1499- size_t bytes_copy = (bytes_left < sizeof(temp)) ? bytes_left : sizeof(temp);
1500- memcpy(temp, row0, bytes_copy);
1501- memcpy(row0, row1, bytes_copy);
1502- memcpy(row1, temp, bytes_copy);
1503- row0 += bytes_copy;
1504- row1 += bytes_copy;
1505- bytes_left -= bytes_copy;
1506- }
1507- }
1508+static void *
1509+stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int req_comp,
1510+ stbi__result_info *ri, int bpc)
1511+{
1512+ memset(ri, 0,
1513+ sizeof(*ri)); // make sure it's initialized if we add new fields
1514+ ri->bits_per_channel =
1515+ 8; // default is 8 so most paths don't have to be changed
1516+ ri->channel_order =
1517+ STBI_ORDER_RGB; // all current input & output are this, but this is here
1518+ // so we can add BGR order
1519+ ri->num_channels = 0;
1520+
1521+// test the formats with a very explicit header first (at least a FOURCC
1522+// or distinctive magic number first)
1523+#ifndef STBI_NO_PNG
1524+ if (stbi__png_test(s)) {
1525+ return stbi__png_load(s, x, y, comp, req_comp, ri);
1526+ }
1527+#endif
1528+#ifndef STBI_NO_BMP
1529+ if (stbi__bmp_test(s)) {
1530+ return stbi__bmp_load(s, x, y, comp, req_comp, ri);
1531+ }
1532+#endif
1533+#ifndef STBI_NO_GIF
1534+ if (stbi__gif_test(s)) {
1535+ return stbi__gif_load(s, x, y, comp, req_comp, ri);
1536+ }
1537+#endif
1538+#ifndef STBI_NO_PSD
1539+ if (stbi__psd_test(s)) {
1540+ return stbi__psd_load(s, x, y, comp, req_comp, ri, bpc);
1541+ }
1542+#else
1543+ STBI_NOTUSED(bpc);
1544+#endif
1545+#ifndef STBI_NO_PIC
1546+ if (stbi__pic_test(s)) {
1547+ return stbi__pic_load(s, x, y, comp, req_comp, ri);
1548+ }
1549+#endif
1550+
1551+// then the formats that can end up attempting to load with just 1 or 2
1552+// bytes matching expectations; these are prone to false positives, so
1553+// try them later
1554+#ifndef STBI_NO_JPEG
1555+ if (stbi__jpeg_test(s)) {
1556+ return stbi__jpeg_load(s, x, y, comp, req_comp, ri);
1557+ }
1558+#endif
1559+#ifndef STBI_NO_PNM
1560+ if (stbi__pnm_test(s)) {
1561+ return stbi__pnm_load(s, x, y, comp, req_comp, ri);
1562+ }
1563+#endif
1564+
1565+#ifndef STBI_NO_HDR
1566+ if (stbi__hdr_test(s)) {
1567+ float *hdr = stbi__hdr_load(s, x, y, comp, req_comp, ri);
1568+ return stbi__hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp);
1569+ }
1570+#endif
1571+
1572+#ifndef STBI_NO_TGA
1573+ // test tga last because it's a crappy test!
1574+ if (stbi__tga_test(s)) {
1575+ return stbi__tga_load(s, x, y, comp, req_comp, ri);
1576+ }
1577+#endif
1578+
1579+ return stbi__errpuc("unknown image type",
1580+ "Image not of any known type, or corrupt");
1581+}
1582+
1583+static stbi_uc *
1584+stbi__convert_16_to_8(stbi__uint16 *orig, int w, int h, int channels)
1585+{
1586+ int i;
1587+ int img_len = w * h * channels;
1588+ stbi_uc *reduced;
1589+
1590+ reduced = (stbi_uc *)stbi__malloc(img_len);
1591+ if (reduced == NULL) {
1592+ return stbi__errpuc("outofmem", "Out of memory");
1593+ }
1594+
1595+ for (i = 0; i < img_len; ++i) {
1596+ reduced[i] = (stbi_uc)((orig[i] >> 8) &
1597+ 0xFF); // top half of each byte is sufficient
1598+ // approx of 16->8 bit scaling
1599+ }
1600+
1601+ STBI_FREE(orig);
1602+ return reduced;
1603+}
1604+
1605+static stbi__uint16 *
1606+stbi__convert_8_to_16(stbi_uc *orig, int w, int h, int channels)
1607+{
1608+ int i;
1609+ int img_len = w * h * channels;
1610+ stbi__uint16 *enlarged;
1611+
1612+ enlarged = (stbi__uint16 *)stbi__malloc(img_len * 2);
1613+ if (enlarged == NULL) {
1614+ return (stbi__uint16 *)stbi__errpuc("outofmem", "Out of memory");
1615+ }
1616+
1617+ for (i = 0; i < img_len; ++i) {
1618+ enlarged[i] = (stbi__uint16)((orig[i] << 8) +
1619+ orig[i]); // replicate to high and low
1620+ // byte, maps 0->0, 255->0xffff
1621+ }
1622+
1623+ STBI_FREE(orig);
1624+ return enlarged;
1625+}
1626+
1627+static void
1628+stbi__vertical_flip(void *image, int w, int h, int bytes_per_pixel)
1629+{
1630+ int row;
1631+ size_t bytes_per_row = (size_t)w * bytes_per_pixel;
1632+ stbi_uc temp[2048];
1633+ stbi_uc *bytes = (stbi_uc *)image;
1634+
1635+ for (row = 0; row < (h >> 1); row++) {
1636+ stbi_uc *row0 = bytes + row * bytes_per_row;
1637+ stbi_uc *row1 = bytes + (h - row - 1) * bytes_per_row;
1638+ // swap row0 with row1
1639+ size_t bytes_left = bytes_per_row;
1640+ while (bytes_left) {
1641+ size_t bytes_copy =
1642+ (bytes_left < sizeof(temp)) ? bytes_left : sizeof(temp);
1643+ memcpy(temp, row0, bytes_copy);
1644+ memcpy(row0, row1, bytes_copy);
1645+ memcpy(row1, temp, bytes_copy);
1646+ row0 += bytes_copy;
1647+ row1 += bytes_copy;
1648+ bytes_left -= bytes_copy;
1649+ }
1650+ }
1651 }
1652
1653 #ifndef STBI_NO_GIF
1654-static void stbi__vertical_flip_slices(void *image, int w, int h, int z, int bytes_per_pixel)
1655+static void
1656+stbi__vertical_flip_slices(void *image, int w, int h, int z,
1657+ int bytes_per_pixel)
1658 {
1659- int slice;
1660- int slice_size = w * h * bytes_per_pixel;
1661+ int slice;
1662+ int slice_size = w * h * bytes_per_pixel;
1663
1664- stbi_uc *bytes = (stbi_uc *)image;
1665- for (slice = 0; slice < z; ++slice) {
1666- stbi__vertical_flip(bytes, w, h, bytes_per_pixel);
1667- bytes += slice_size;
1668- }
1669+ stbi_uc *bytes = (stbi_uc *)image;
1670+ for (slice = 0; slice < z; ++slice) {
1671+ stbi__vertical_flip(bytes, w, h, bytes_per_pixel);
1672+ bytes += slice_size;
1673+ }
1674 }
1675 #endif
1676
1677-static unsigned char *stbi__load_and_postprocess_8bit(stbi__context *s, int *x, int *y, int *comp, int req_comp)
1678+static unsigned char *
1679+stbi__load_and_postprocess_8bit(stbi__context *s, int *x, int *y, int *comp,
1680+ int req_comp)
1681 {
1682- stbi__result_info ri;
1683- void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 8);
1684+ stbi__result_info ri;
1685+ void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 8);
1686
1687- if (result == NULL)
1688- return NULL;
1689+ if (result == NULL) {
1690+ return NULL;
1691+ }
1692
1693- // it is the responsibility of the loaders to make sure we get either 8 or 16 bit.
1694- STBI_ASSERT(ri.bits_per_channel == 8 || ri.bits_per_channel == 16);
1695+ // it is the responsibility of the loaders to make sure we get either 8 or
1696+ // 16 bit.
1697+ STBI_ASSERT(ri.bits_per_channel == 8 || ri.bits_per_channel == 16);
1698
1699- if (ri.bits_per_channel != 8) {
1700- result = stbi__convert_16_to_8((stbi__uint16 *) result, *x, *y, req_comp == 0 ? *comp : req_comp);
1701- ri.bits_per_channel = 8;
1702- }
1703+ if (ri.bits_per_channel != 8) {
1704+ result = stbi__convert_16_to_8((stbi__uint16 *)result, *x, *y,
1705+ req_comp == 0 ? *comp : req_comp);
1706+ ri.bits_per_channel = 8;
1707+ }
1708
1709- // @TODO: move stbi__convert_format to here
1710+ // @TODO: move stbi__convert_format to here
1711
1712- if (stbi__vertically_flip_on_load) {
1713- int channels = req_comp ? req_comp : *comp;
1714- stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi_uc));
1715- }
1716+ if (stbi__vertically_flip_on_load) {
1717+ int channels = req_comp ? req_comp : *comp;
1718+ stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi_uc));
1719+ }
1720
1721- return (unsigned char *) result;
1722+ return (unsigned char *)result;
1723 }
1724
1725-static stbi__uint16 *stbi__load_and_postprocess_16bit(stbi__context *s, int *x, int *y, int *comp, int req_comp)
1726+static stbi__uint16 *
1727+stbi__load_and_postprocess_16bit(stbi__context *s, int *x, int *y, int *comp,
1728+ int req_comp)
1729 {
1730- stbi__result_info ri;
1731- void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 16);
1732+ stbi__result_info ri;
1733+ void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 16);
1734
1735- if (result == NULL)
1736- return NULL;
1737+ if (result == NULL) {
1738+ return NULL;
1739+ }
1740
1741- // it is the responsibility of the loaders to make sure we get either 8 or 16 bit.
1742- STBI_ASSERT(ri.bits_per_channel == 8 || ri.bits_per_channel == 16);
1743+ // it is the responsibility of the loaders to make sure we get either 8 or
1744+ // 16 bit.
1745+ STBI_ASSERT(ri.bits_per_channel == 8 || ri.bits_per_channel == 16);
1746
1747- if (ri.bits_per_channel != 16) {
1748- result = stbi__convert_8_to_16((stbi_uc *) result, *x, *y, req_comp == 0 ? *comp : req_comp);
1749- ri.bits_per_channel = 16;
1750- }
1751+ if (ri.bits_per_channel != 16) {
1752+ result = stbi__convert_8_to_16((stbi_uc *)result, *x, *y,
1753+ req_comp == 0 ? *comp : req_comp);
1754+ ri.bits_per_channel = 16;
1755+ }
1756
1757- // @TODO: move stbi__convert_format16 to here
1758- // @TODO: special case RGB-to-Y (and RGBA-to-YA) for 8-bit-to-16-bit case to keep more precision
1759+ // @TODO: move stbi__convert_format16 to here
1760+ // @TODO: special case RGB-to-Y (and RGBA-to-YA) for 8-bit-to-16-bit case to
1761+ // keep more precision
1762
1763- if (stbi__vertically_flip_on_load) {
1764- int channels = req_comp ? req_comp : *comp;
1765- stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi__uint16));
1766- }
1767+ if (stbi__vertically_flip_on_load) {
1768+ int channels = req_comp ? req_comp : *comp;
1769+ stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi__uint16));
1770+ }
1771
1772- return (stbi__uint16 *) result;
1773+ return (stbi__uint16 *)result;
1774 }
1775
1776 #if !defined(STBI_NO_HDR) && !defined(STBI_NO_LINEAR)
1777-static void stbi__float_postprocess(float *result, int *x, int *y, int *comp, int req_comp)
1778+static void
1779+stbi__float_postprocess(float *result, int *x, int *y, int *comp, int req_comp)
1780 {
1781- if (stbi__vertically_flip_on_load && result != NULL) {
1782- int channels = req_comp ? req_comp : *comp;
1783- stbi__vertical_flip(result, *x, *y, channels * sizeof(float));
1784- }
1785+ if (stbi__vertically_flip_on_load && result != NULL) {
1786+ int channels = req_comp ? req_comp : *comp;
1787+ stbi__vertical_flip(result, *x, *y, channels * sizeof(float));
1788+ }
1789 }
1790 #endif
1791
1792 #ifndef STBI_NO_STDIO
1793
1794 #if defined(_WIN32) && defined(STBI_WINDOWS_UTF8)
1795-STBI_EXTERN __declspec(dllimport) int __stdcall MultiByteToWideChar(unsigned int cp, unsigned long flags, const char *str, int cbmb, wchar_t *widestr, int cchwide);
1796-STBI_EXTERN __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int cp, unsigned long flags, const wchar_t *widestr, int cchwide, char *str, int cbmb, const char *defchar, int *used_default);
1797+STBI_EXTERN __declspec(dllimport) int __stdcall MultiByteToWideChar(
1798+ unsigned int cp, unsigned long flags, const char *str, int cbmb,
1799+ wchar_t *widestr, int cchwide);
1800+STBI_EXTERN __declspec(dllimport) int __stdcall WideCharToMultiByte(
1801+ unsigned int cp, unsigned long flags, const wchar_t *widestr, int cchwide,
1802+ char *str, int cbmb, const char *defchar, int *used_default);
1803 #endif
1804
1805 #if defined(_WIN32) && defined(STBI_WINDOWS_UTF8)
1806-STBIDEF int stbi_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input)
1807+STBIDEF int
1808+stbi_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t *input)
1809 {
1810- return WideCharToMultiByte(65001 /* UTF8 */, 0, input, -1, buffer, (int) bufferlen, NULL, NULL);
1811+ return WideCharToMultiByte(65001 /* UTF8 */, 0, input, -1, buffer,
1812+ (int)bufferlen, NULL, NULL);
1813 }
1814 #endif
1815
1816-static FILE *stbi__fopen(char const *filename, char const *mode)
1817+static FILE *
1818+stbi__fopen(char const *filename, char const *mode)
1819 {
1820- FILE *f;
1821+ FILE *f;
1822 #if defined(_WIN32) && defined(STBI_WINDOWS_UTF8)
1823- wchar_t wMode[64];
1824- wchar_t wFilename[1024];
1825- if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, filename, -1, wFilename, sizeof(wFilename)/sizeof(*wFilename)))
1826- return 0;
1827-
1828- if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, mode, -1, wMode, sizeof(wMode)/sizeof(*wMode)))
1829- return 0;
1830+ wchar_t wMode[64];
1831+ wchar_t wFilename[1024];
1832+ if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, filename, -1, wFilename,
1833+ sizeof(wFilename) / sizeof(*wFilename))) {
1834+ return 0;
1835+ }
1836+
1837+ if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, mode, -1, wMode,
1838+ sizeof(wMode) / sizeof(*wMode))) {
1839+ return 0;
1840+ }
1841
1842 #if defined(_MSC_VER) && _MSC_VER >= 1400
1843- if (0 != _wfopen_s(&f, wFilename, wMode))
1844+ if (0 != _wfopen_s(&f, wFilename, wMode)) {
1845 f = 0;
1846+ }
1847 #else
1848- f = _wfopen(wFilename, wMode);
1849+ f = _wfopen(wFilename, wMode);
1850 #endif
1851
1852 #elif defined(_MSC_VER) && _MSC_VER >= 1400
1853- if (0 != fopen_s(&f, filename, mode))
1854- f=0;
1855+ if (0 != fopen_s(&f, filename, mode)) {
1856+ f = 0;
1857+ }
1858 #else
1859- f = fopen(filename, mode);
1860+ f = fopen(filename, mode);
1861 #endif
1862- return f;
1863+ return f;
1864 }
1865
1866-
1867-STBIDEF stbi_uc *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp)
1868+STBIDEF stbi_uc *
1869+stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp)
1870 {
1871- FILE *f = stbi__fopen(filename, "rb");
1872- unsigned char *result;
1873- if (!f) return stbi__errpuc("can't fopen", "Unable to open file");
1874- result = stbi_load_from_file(f,x,y,comp,req_comp);
1875- fclose(f);
1876- return result;
1877+ FILE *f = stbi__fopen(filename, "rb");
1878+ unsigned char *result;
1879+ if (!f) {
1880+ return stbi__errpuc("can't fopen", "Unable to open file");
1881+ }
1882+ result = stbi_load_from_file(f, x, y, comp, req_comp);
1883+ fclose(f);
1884+ return result;
1885 }
1886
1887-STBIDEF stbi_uc *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
1888+STBIDEF stbi_uc *
1889+stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
1890 {
1891- unsigned char *result;
1892- stbi__context s;
1893- stbi__start_file(&s,f);
1894- result = stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
1895- if (result) {
1896- // need to 'unget' all the characters in the IO buffer
1897- fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR);
1898- }
1899- return result;
1900+ unsigned char *result;
1901+ stbi__context s;
1902+ stbi__start_file(&s, f);
1903+ result = stbi__load_and_postprocess_8bit(&s, x, y, comp, req_comp);
1904+ if (result) {
1905+ // need to 'unget' all the characters in the IO buffer
1906+ fseek(f, -(int)(s.img_buffer_end - s.img_buffer), SEEK_CUR);
1907+ }
1908+ return result;
1909 }
1910
1911-STBIDEF stbi__uint16 *stbi_load_from_file_16(FILE *f, int *x, int *y, int *comp, int req_comp)
1912+STBIDEF stbi__uint16 *
1913+stbi_load_from_file_16(FILE *f, int *x, int *y, int *comp, int req_comp)
1914 {
1915- stbi__uint16 *result;
1916- stbi__context s;
1917- stbi__start_file(&s,f);
1918- result = stbi__load_and_postprocess_16bit(&s,x,y,comp,req_comp);
1919- if (result) {
1920- // need to 'unget' all the characters in the IO buffer
1921- fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR);
1922- }
1923- return result;
1924+ stbi__uint16 *result;
1925+ stbi__context s;
1926+ stbi__start_file(&s, f);
1927+ result = stbi__load_and_postprocess_16bit(&s, x, y, comp, req_comp);
1928+ if (result) {
1929+ // need to 'unget' all the characters in the IO buffer
1930+ fseek(f, -(int)(s.img_buffer_end - s.img_buffer), SEEK_CUR);
1931+ }
1932+ return result;
1933 }
1934
1935-STBIDEF stbi_us *stbi_load_16(char const *filename, int *x, int *y, int *comp, int req_comp)
1936+STBIDEF stbi_us *
1937+stbi_load_16(char const *filename, int *x, int *y, int *comp, int req_comp)
1938 {
1939- FILE *f = stbi__fopen(filename, "rb");
1940- stbi__uint16 *result;
1941- if (!f) return (stbi_us *) stbi__errpuc("can't fopen", "Unable to open file");
1942- result = stbi_load_from_file_16(f,x,y,comp,req_comp);
1943- fclose(f);
1944- return result;
1945+ FILE *f = stbi__fopen(filename, "rb");
1946+ stbi__uint16 *result;
1947+ if (!f) {
1948+ return (stbi_us *)stbi__errpuc("can't fopen", "Unable to open file");
1949+ }
1950+ result = stbi_load_from_file_16(f, x, y, comp, req_comp);
1951+ fclose(f);
1952+ return result;
1953 }
1954
1955+#endif //! STBI_NO_STDIO
1956
1957-#endif //!STBI_NO_STDIO
1958-
1959-STBIDEF stbi_us *stbi_load_16_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels)
1960+STBIDEF stbi_us *
1961+stbi_load_16_from_memory(stbi_uc const *buffer, int len, int *x, int *y,
1962+ int *channels_in_file, int desired_channels)
1963 {
1964- stbi__context s;
1965- stbi__start_mem(&s,buffer,len);
1966- return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels);
1967+ stbi__context s;
1968+ stbi__start_mem(&s, buffer, len);
1969+ return stbi__load_and_postprocess_16bit(&s, x, y, channels_in_file,
1970+ desired_channels);
1971 }
1972
1973-STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels)
1974+STBIDEF stbi_us *
1975+stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x,
1976+ int *y, int *channels_in_file, int desired_channels)
1977 {
1978- stbi__context s;
1979- stbi__start_callbacks(&s, (stbi_io_callbacks *)clbk, user);
1980- return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels);
1981+ stbi__context s;
1982+ stbi__start_callbacks(&s, (stbi_io_callbacks *)clbk, user);
1983+ return stbi__load_and_postprocess_16bit(&s, x, y, channels_in_file,
1984+ desired_channels);
1985 }
1986
1987-STBIDEF stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
1988+STBIDEF stbi_uc *
1989+stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp,
1990+ int req_comp)
1991 {
1992- stbi__context s;
1993- stbi__start_mem(&s,buffer,len);
1994- return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
1995+ stbi__context s;
1996+ stbi__start_mem(&s, buffer, len);
1997+ return stbi__load_and_postprocess_8bit(&s, x, y, comp, req_comp);
1998 }
1999
2000-STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
2001+STBIDEF stbi_uc *
2002+stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x,
2003+ int *y, int *comp, int req_comp)
2004 {
2005- stbi__context s;
2006- stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
2007- return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
2008+ stbi__context s;
2009+ stbi__start_callbacks(&s, (stbi_io_callbacks *)clbk, user);
2010+ return stbi__load_and_postprocess_8bit(&s, x, y, comp, req_comp);
2011 }
2012
2013 #ifndef STBI_NO_GIF
2014-STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp)
2015+STBIDEF stbi_uc *
2016+stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x,
2017+ int *y, int *z, int *comp, int req_comp)
2018 {
2019- unsigned char *result;
2020- stbi__context s;
2021- stbi__start_mem(&s,buffer,len);
2022+ unsigned char *result;
2023+ stbi__context s;
2024+ stbi__start_mem(&s, buffer, len);
2025
2026- result = (unsigned char*) stbi__load_gif_main(&s, delays, x, y, z, comp, req_comp);
2027- if (stbi__vertically_flip_on_load) {
2028- stbi__vertical_flip_slices( result, *x, *y, *z, *comp );
2029- }
2030+ result = (unsigned char *)stbi__load_gif_main(&s, delays, x, y, z, comp,
2031+ req_comp);
2032+ if (stbi__vertically_flip_on_load) {
2033+ stbi__vertical_flip_slices(result, *x, *y, *z, *comp);
2034+ }
2035
2036- return result;
2037+ return result;
2038 }
2039 #endif
2040
2041 #ifndef STBI_NO_LINEAR
2042-static float *stbi__loadf_main(stbi__context *s, int *x, int *y, int *comp, int req_comp)
2043+static float *
2044+stbi__loadf_main(stbi__context *s, int *x, int *y, int *comp, int req_comp)
2045 {
2046- unsigned char *data;
2047- #ifndef STBI_NO_HDR
2048- if (stbi__hdr_test(s)) {
2049- stbi__result_info ri;
2050- float *hdr_data = stbi__hdr_load(s,x,y,comp,req_comp, &ri);
2051- if (hdr_data)
2052- stbi__float_postprocess(hdr_data,x,y,comp,req_comp);
2053- return hdr_data;
2054- }
2055- #endif
2056- data = stbi__load_and_postprocess_8bit(s, x, y, comp, req_comp);
2057- if (data)
2058- return stbi__ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp);
2059- return stbi__errpf("unknown image type", "Image not of any known type, or corrupt");
2060+ unsigned char *data;
2061+#ifndef STBI_NO_HDR
2062+ if (stbi__hdr_test(s)) {
2063+ stbi__result_info ri;
2064+ float *hdr_data = stbi__hdr_load(s, x, y, comp, req_comp, &ri);
2065+ if (hdr_data) {
2066+ stbi__float_postprocess(hdr_data, x, y, comp, req_comp);
2067+ }
2068+ return hdr_data;
2069+ }
2070+#endif
2071+ data = stbi__load_and_postprocess_8bit(s, x, y, comp, req_comp);
2072+ if (data) {
2073+ return stbi__ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp);
2074+ }
2075+ return stbi__errpf("unknown image type",
2076+ "Image not of any known type, or corrupt");
2077 }
2078
2079-STBIDEF float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
2080+STBIDEF float *
2081+stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y,
2082+ int *comp, int req_comp)
2083 {
2084- stbi__context s;
2085- stbi__start_mem(&s,buffer,len);
2086- return stbi__loadf_main(&s,x,y,comp,req_comp);
2087+ stbi__context s;
2088+ stbi__start_mem(&s, buffer, len);
2089+ return stbi__loadf_main(&s, x, y, comp, req_comp);
2090 }
2091
2092-STBIDEF float *stbi_loadf_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
2093+STBIDEF float *
2094+stbi_loadf_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x,
2095+ int *y, int *comp, int req_comp)
2096 {
2097- stbi__context s;
2098- stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
2099- return stbi__loadf_main(&s,x,y,comp,req_comp);
2100+ stbi__context s;
2101+ stbi__start_callbacks(&s, (stbi_io_callbacks *)clbk, user);
2102+ return stbi__loadf_main(&s, x, y, comp, req_comp);
2103 }
2104
2105 #ifndef STBI_NO_STDIO
2106-STBIDEF float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp)
2107+STBIDEF float *
2108+stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp)
2109 {
2110- float *result;
2111- FILE *f = stbi__fopen(filename, "rb");
2112- if (!f) return stbi__errpf("can't fopen", "Unable to open file");
2113- result = stbi_loadf_from_file(f,x,y,comp,req_comp);
2114- fclose(f);
2115- return result;
2116+ float *result;
2117+ FILE *f = stbi__fopen(filename, "rb");
2118+ if (!f) {
2119+ return stbi__errpf("can't fopen", "Unable to open file");
2120+ }
2121+ result = stbi_loadf_from_file(f, x, y, comp, req_comp);
2122+ fclose(f);
2123+ return result;
2124 }
2125
2126-STBIDEF float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
2127+STBIDEF float *
2128+stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
2129 {
2130- stbi__context s;
2131- stbi__start_file(&s,f);
2132- return stbi__loadf_main(&s,x,y,comp,req_comp);
2133+ stbi__context s;
2134+ stbi__start_file(&s, f);
2135+ return stbi__loadf_main(&s, x, y, comp, req_comp);
2136 }
2137 #endif // !STBI_NO_STDIO
2138
2139@@ -1514,222 +1778,262 @@ STBIDEF float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_
2140 // defined, for API simplicity; if STBI_NO_LINEAR is defined, it always
2141 // reports false!
2142
2143-STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len)
2144+STBIDEF int
2145+stbi_is_hdr_from_memory(stbi_uc const *buffer, int len)
2146 {
2147- #ifndef STBI_NO_HDR
2148- stbi__context s;
2149- stbi__start_mem(&s,buffer,len);
2150- return stbi__hdr_test(&s);
2151- #else
2152- STBI_NOTUSED(buffer);
2153- STBI_NOTUSED(len);
2154- return 0;
2155- #endif
2156+#ifndef STBI_NO_HDR
2157+ stbi__context s;
2158+ stbi__start_mem(&s, buffer, len);
2159+ return stbi__hdr_test(&s);
2160+#else
2161+ STBI_NOTUSED(buffer);
2162+ STBI_NOTUSED(len);
2163+ return 0;
2164+#endif
2165 }
2166
2167 #ifndef STBI_NO_STDIO
2168-STBIDEF int stbi_is_hdr (char const *filename)
2169-{
2170- FILE *f = stbi__fopen(filename, "rb");
2171- int result=0;
2172- if (f) {
2173- result = stbi_is_hdr_from_file(f);
2174- fclose(f);
2175- }
2176- return result;
2177-}
2178-
2179-STBIDEF int stbi_is_hdr_from_file(FILE *f)
2180-{
2181- #ifndef STBI_NO_HDR
2182- long pos = ftell(f);
2183- int res;
2184- stbi__context s;
2185- stbi__start_file(&s,f);
2186- res = stbi__hdr_test(&s);
2187- fseek(f, pos, SEEK_SET);
2188- return res;
2189- #else
2190- STBI_NOTUSED(f);
2191- return 0;
2192- #endif
2193+STBIDEF int
2194+stbi_is_hdr(char const *filename)
2195+{
2196+ FILE *f = stbi__fopen(filename, "rb");
2197+ int result = 0;
2198+ if (f) {
2199+ result = stbi_is_hdr_from_file(f);
2200+ fclose(f);
2201+ }
2202+ return result;
2203+}
2204+
2205+STBIDEF int
2206+stbi_is_hdr_from_file(FILE *f)
2207+{
2208+#ifndef STBI_NO_HDR
2209+ long pos = ftell(f);
2210+ int res;
2211+ stbi__context s;
2212+ stbi__start_file(&s, f);
2213+ res = stbi__hdr_test(&s);
2214+ fseek(f, pos, SEEK_SET);
2215+ return res;
2216+#else
2217+ STBI_NOTUSED(f);
2218+ return 0;
2219+#endif
2220 }
2221 #endif // !STBI_NO_STDIO
2222
2223-STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user)
2224+STBIDEF int
2225+stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user)
2226 {
2227- #ifndef STBI_NO_HDR
2228- stbi__context s;
2229- stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
2230- return stbi__hdr_test(&s);
2231- #else
2232- STBI_NOTUSED(clbk);
2233- STBI_NOTUSED(user);
2234- return 0;
2235- #endif
2236+#ifndef STBI_NO_HDR
2237+ stbi__context s;
2238+ stbi__start_callbacks(&s, (stbi_io_callbacks *)clbk, user);
2239+ return stbi__hdr_test(&s);
2240+#else
2241+ STBI_NOTUSED(clbk);
2242+ STBI_NOTUSED(user);
2243+ return 0;
2244+#endif
2245 }
2246
2247 #ifndef STBI_NO_LINEAR
2248-static float stbi__l2h_gamma=2.2f, stbi__l2h_scale=1.0f;
2249+static float stbi__l2h_gamma = 2.2f, stbi__l2h_scale = 1.0f;
2250
2251-STBIDEF void stbi_ldr_to_hdr_gamma(float gamma) { stbi__l2h_gamma = gamma; }
2252-STBIDEF void stbi_ldr_to_hdr_scale(float scale) { stbi__l2h_scale = scale; }
2253+STBIDEF void
2254+stbi_ldr_to_hdr_gamma(float gamma)
2255+{
2256+ stbi__l2h_gamma = gamma;
2257+}
2258+STBIDEF void
2259+stbi_ldr_to_hdr_scale(float scale)
2260+{
2261+ stbi__l2h_scale = scale;
2262+}
2263 #endif
2264
2265-static float stbi__h2l_gamma_i=1.0f/2.2f, stbi__h2l_scale_i=1.0f;
2266-
2267-STBIDEF void stbi_hdr_to_ldr_gamma(float gamma) { stbi__h2l_gamma_i = 1/gamma; }
2268-STBIDEF void stbi_hdr_to_ldr_scale(float scale) { stbi__h2l_scale_i = 1/scale; }
2269+static float stbi__h2l_gamma_i = 1.0f / 2.2f, stbi__h2l_scale_i = 1.0f;
2270
2271+STBIDEF void
2272+stbi_hdr_to_ldr_gamma(float gamma)
2273+{
2274+ stbi__h2l_gamma_i = 1 / gamma;
2275+}
2276+STBIDEF void
2277+stbi_hdr_to_ldr_scale(float scale)
2278+{
2279+ stbi__h2l_scale_i = 1 / scale;
2280+}
2281
2282 //////////////////////////////////////////////////////////////////////////////
2283 //
2284 // Common code used by all image loaders
2285 //
2286
2287-enum
2288-{
2289- STBI__SCAN_load=0,
2290- STBI__SCAN_type,
2291- STBI__SCAN_header
2292-};
2293-
2294-static void stbi__refill_buffer(stbi__context *s)
2295-{
2296- int n = (s->io.read)(s->io_user_data,(char*)s->buffer_start,s->buflen);
2297- s->callback_already_read += (int) (s->img_buffer - s->img_buffer_original);
2298- if (n == 0) {
2299- // at end of file, treat same as if from memory, but need to handle case
2300- // where s->img_buffer isn't pointing to safe memory, e.g. 0-byte file
2301- s->read_from_callbacks = 0;
2302- s->img_buffer = s->buffer_start;
2303- s->img_buffer_end = s->buffer_start+1;
2304- *s->img_buffer = 0;
2305- } else {
2306- s->img_buffer = s->buffer_start;
2307- s->img_buffer_end = s->buffer_start + n;
2308- }
2309-}
2310-
2311-stbi_inline static stbi_uc stbi__get8(stbi__context *s)
2312-{
2313- if (s->img_buffer < s->img_buffer_end)
2314- return *s->img_buffer++;
2315- if (s->read_from_callbacks) {
2316- stbi__refill_buffer(s);
2317- return *s->img_buffer++;
2318- }
2319- return 0;
2320-}
2321-
2322-#if defined(STBI_NO_JPEG) && defined(STBI_NO_HDR) && defined(STBI_NO_PIC) && defined(STBI_NO_PNM)
2323+enum { STBI__SCAN_load = 0, STBI__SCAN_type, STBI__SCAN_header };
2324+
2325+static void
2326+stbi__refill_buffer(stbi__context *s)
2327+{
2328+ int n = (s->io.read)(s->io_user_data, (char *)s->buffer_start, s->buflen);
2329+ s->callback_already_read += (int)(s->img_buffer - s->img_buffer_original);
2330+ if (n == 0) {
2331+ // at end of file, treat same as if from memory, but need to handle case
2332+ // where s->img_buffer isn't pointing to safe memory, e.g. 0-byte file
2333+ s->read_from_callbacks = 0;
2334+ s->img_buffer = s->buffer_start;
2335+ s->img_buffer_end = s->buffer_start + 1;
2336+ *s->img_buffer = 0;
2337+ } else {
2338+ s->img_buffer = s->buffer_start;
2339+ s->img_buffer_end = s->buffer_start + n;
2340+ }
2341+}
2342+
2343+stbi_inline static stbi_uc
2344+stbi__get8(stbi__context *s)
2345+{
2346+ if (s->img_buffer < s->img_buffer_end) {
2347+ return *s->img_buffer++;
2348+ }
2349+ if (s->read_from_callbacks) {
2350+ stbi__refill_buffer(s);
2351+ return *s->img_buffer++;
2352+ }
2353+ return 0;
2354+}
2355+
2356+#if defined(STBI_NO_JPEG) && defined(STBI_NO_HDR) && defined(STBI_NO_PIC) && \
2357+ defined(STBI_NO_PNM)
2358 // nothing
2359 #else
2360-stbi_inline static int stbi__at_eof(stbi__context *s)
2361-{
2362- if (s->io.read) {
2363- if (!(s->io.eof)(s->io_user_data)) return 0;
2364- // if feof() is true, check if buffer = end
2365- // special case: we've only got the special 0 character at the end
2366- if (s->read_from_callbacks == 0) return 1;
2367- }
2368-
2369- return s->img_buffer >= s->img_buffer_end;
2370+stbi_inline static int
2371+stbi__at_eof(stbi__context *s)
2372+{
2373+ if (s->io.read) {
2374+ if (!(s->io.eof)(s->io_user_data)) {
2375+ return 0;
2376+ }
2377+ // if feof() is true, check if buffer = end
2378+ // special case: we've only got the special 0 character at the end
2379+ if (s->read_from_callbacks == 0) {
2380+ return 1;
2381+ }
2382+ }
2383+
2384+ return s->img_buffer >= s->img_buffer_end;
2385 }
2386 #endif
2387
2388-#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && defined(STBI_NO_PIC)
2389+#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && \
2390+ defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && \
2391+ defined(STBI_NO_PIC)
2392 // nothing
2393 #else
2394-static void stbi__skip(stbi__context *s, int n)
2395-{
2396- if (n == 0) return; // already there!
2397- if (n < 0) {
2398- s->img_buffer = s->img_buffer_end;
2399- return;
2400- }
2401- if (s->io.read) {
2402- int blen = (int) (s->img_buffer_end - s->img_buffer);
2403- if (blen < n) {
2404- s->img_buffer = s->img_buffer_end;
2405- (s->io.skip)(s->io_user_data, n - blen);
2406- return;
2407- }
2408- }
2409- s->img_buffer += n;
2410-}
2411-#endif
2412-
2413-#if defined(STBI_NO_PNG) && defined(STBI_NO_TGA) && defined(STBI_NO_HDR) && defined(STBI_NO_PNM)
2414+static void
2415+stbi__skip(stbi__context *s, int n)
2416+{
2417+ if (n == 0) {
2418+ return; // already there!
2419+ }
2420+ if (n < 0) {
2421+ s->img_buffer = s->img_buffer_end;
2422+ return;
2423+ }
2424+ if (s->io.read) {
2425+ int blen = (int)(s->img_buffer_end - s->img_buffer);
2426+ if (blen < n) {
2427+ s->img_buffer = s->img_buffer_end;
2428+ (s->io.skip)(s->io_user_data, n - blen);
2429+ return;
2430+ }
2431+ }
2432+ s->img_buffer += n;
2433+}
2434+#endif
2435+
2436+#if defined(STBI_NO_PNG) && defined(STBI_NO_TGA) && defined(STBI_NO_HDR) && \
2437+ defined(STBI_NO_PNM)
2438 // nothing
2439 #else
2440-static int stbi__getn(stbi__context *s, stbi_uc *buffer, int n)
2441-{
2442- if (s->io.read) {
2443- int blen = (int) (s->img_buffer_end - s->img_buffer);
2444- if (blen < n) {
2445- int res, count;
2446-
2447- memcpy(buffer, s->img_buffer, blen);
2448-
2449- count = (s->io.read)(s->io_user_data, (char*) buffer + blen, n - blen);
2450- res = (count == (n-blen));
2451- s->img_buffer = s->img_buffer_end;
2452- return res;
2453- }
2454- }
2455-
2456- if (s->img_buffer+n <= s->img_buffer_end) {
2457- memcpy(buffer, s->img_buffer, n);
2458- s->img_buffer += n;
2459- return 1;
2460- } else
2461- return 0;
2462+static int
2463+stbi__getn(stbi__context *s, stbi_uc *buffer, int n)
2464+{
2465+ if (s->io.read) {
2466+ int blen = (int)(s->img_buffer_end - s->img_buffer);
2467+ if (blen < n) {
2468+ int res, count;
2469+
2470+ memcpy(buffer, s->img_buffer, blen);
2471+
2472+ count =
2473+ (s->io.read)(s->io_user_data, (char *)buffer + blen, n - blen);
2474+ res = (count == (n - blen));
2475+ s->img_buffer = s->img_buffer_end;
2476+ return res;
2477+ }
2478+ }
2479+
2480+ if (s->img_buffer + n <= s->img_buffer_end) {
2481+ memcpy(buffer, s->img_buffer, n);
2482+ s->img_buffer += n;
2483+ return 1;
2484+ } else {
2485+ return 0;
2486+ }
2487 }
2488 #endif
2489
2490-#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_PSD) && defined(STBI_NO_PIC)
2491+#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_PSD) && \
2492+ defined(STBI_NO_PIC)
2493 // nothing
2494 #else
2495-static int stbi__get16be(stbi__context *s)
2496+static int
2497+stbi__get16be(stbi__context *s)
2498 {
2499- int z = stbi__get8(s);
2500- return (z << 8) + stbi__get8(s);
2501+ int z = stbi__get8(s);
2502+ return (z << 8) + stbi__get8(s);
2503 }
2504 #endif
2505
2506 #if defined(STBI_NO_PNG) && defined(STBI_NO_PSD) && defined(STBI_NO_PIC)
2507 // nothing
2508 #else
2509-static stbi__uint32 stbi__get32be(stbi__context *s)
2510+static stbi__uint32
2511+stbi__get32be(stbi__context *s)
2512 {
2513- stbi__uint32 z = stbi__get16be(s);
2514- return (z << 16) + stbi__get16be(s);
2515+ stbi__uint32 z = stbi__get16be(s);
2516+ return (z << 16) + stbi__get16be(s);
2517 }
2518 #endif
2519
2520 #if defined(STBI_NO_BMP) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF)
2521 // nothing
2522 #else
2523-static int stbi__get16le(stbi__context *s)
2524+static int
2525+stbi__get16le(stbi__context *s)
2526 {
2527- int z = stbi__get8(s);
2528- return z + (stbi__get8(s) << 8);
2529+ int z = stbi__get8(s);
2530+ return z + (stbi__get8(s) << 8);
2531 }
2532 #endif
2533
2534 #ifndef STBI_NO_BMP
2535-static stbi__uint32 stbi__get32le(stbi__context *s)
2536+static stbi__uint32
2537+stbi__get32le(stbi__context *s)
2538 {
2539- stbi__uint32 z = stbi__get16le(s);
2540- z += (stbi__uint32)stbi__get16le(s) << 16;
2541- return z;
2542+ stbi__uint32 z = stbi__get16le(s);
2543+ z += (stbi__uint32)stbi__get16le(s) << 16;
2544+ return z;
2545 }
2546 #endif
2547
2548-#define STBI__BYTECAST(x) ((stbi_uc) ((x) & 255)) // truncate int to byte without warnings
2549+#define STBI__BYTECAST(x) \
2550+ ((stbi_uc)((x) & 255)) // truncate int to byte without warnings
2551
2552-#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && defined(STBI_NO_PIC) && defined(STBI_NO_PNM)
2553+#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && \
2554+ defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && \
2555+ defined(STBI_NO_PIC) && defined(STBI_NO_PNM)
2556 // nothing
2557 #else
2558 //////////////////////////////////////////////////////////////////////////////
2559@@ -1743,169 +2047,327 @@ static stbi__uint32 stbi__get32le(stbi__context *s)
2560 // assume data buffer is malloced, so malloc a new one and free that one
2561 // only failure mode is malloc failing
2562
2563-static stbi_uc stbi__compute_y(int r, int g, int b)
2564+static stbi_uc
2565+stbi__compute_y(int r, int g, int b)
2566 {
2567- return (stbi_uc) (((r*77) + (g*150) + (29*b)) >> 8);
2568+ return (stbi_uc)(((r * 77) + (g * 150) + (29 * b)) >> 8);
2569 }
2570 #endif
2571
2572-#if defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && defined(STBI_NO_PIC) && defined(STBI_NO_PNM)
2573+#if defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && \
2574+ defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && defined(STBI_NO_PIC) && \
2575+ defined(STBI_NO_PNM)
2576 // nothing
2577 #else
2578-static unsigned char *stbi__convert_format(unsigned char *data, int img_n, int req_comp, unsigned int x, unsigned int y)
2579-{
2580- int i,j;
2581- unsigned char *good;
2582-
2583- if (req_comp == img_n) return data;
2584- STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
2585-
2586- good = (unsigned char *) stbi__malloc_mad3(req_comp, x, y, 0);
2587- if (good == NULL) {
2588- STBI_FREE(data);
2589- return stbi__errpuc("outofmem", "Out of memory");
2590- }
2591-
2592- for (j=0; j < (int) y; ++j) {
2593- unsigned char *src = data + j * x * img_n ;
2594- unsigned char *dest = good + j * x * req_comp;
2595-
2596- #define STBI__COMBO(a,b) ((a)*8+(b))
2597- #define STBI__CASE(a,b) case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
2598- // convert source image with img_n components to one with req_comp components;
2599- // avoid switch per pixel, so use switch per scanline and massive macros
2600- switch (STBI__COMBO(img_n, req_comp)) {
2601- STBI__CASE(1,2) { dest[0]=src[0]; dest[1]=255; } break;
2602- STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0]; } break;
2603- STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=255; } break;
2604- STBI__CASE(2,1) { dest[0]=src[0]; } break;
2605- STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0]; } break;
2606- STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=src[1]; } break;
2607- STBI__CASE(3,4) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];dest[3]=255; } break;
2608- STBI__CASE(3,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); } break;
2609- STBI__CASE(3,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); dest[1] = 255; } break;
2610- STBI__CASE(4,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); } break;
2611- STBI__CASE(4,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); dest[1] = src[3]; } break;
2612- STBI__CASE(4,3) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2]; } break;
2613- default: STBI_ASSERT(0); STBI_FREE(data); STBI_FREE(good); return stbi__errpuc("unsupported", "Unsupported format conversion");
2614- }
2615- #undef STBI__CASE
2616- }
2617-
2618- STBI_FREE(data);
2619- return good;
2620+static unsigned char *
2621+stbi__convert_format(unsigned char *data, int img_n, int req_comp,
2622+ unsigned int x, unsigned int y)
2623+{
2624+ int i, j;
2625+ unsigned char *good;
2626+
2627+ if (req_comp == img_n) {
2628+ return data;
2629+ }
2630+ STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
2631+
2632+ good = (unsigned char *)stbi__malloc_mad3(req_comp, x, y, 0);
2633+ if (good == NULL) {
2634+ STBI_FREE(data);
2635+ return stbi__errpuc("outofmem", "Out of memory");
2636+ }
2637+
2638+ for (j = 0; j < (int)y; ++j) {
2639+ unsigned char *src = data + j * x * img_n;
2640+ unsigned char *dest = good + j * x * req_comp;
2641+
2642+#define STBI__COMBO(a, b) ((a) * 8 + (b))
2643+#define STBI__CASE(a, b) \
2644+ case STBI__COMBO(a, b): \
2645+ for (i = x - 1; i >= 0; --i, src += a, dest += b)
2646+ // convert source image with img_n components to one with req_comp
2647+ // components; avoid switch per pixel, so use switch per scanline and
2648+ // massive macros
2649+ switch (STBI__COMBO(img_n, req_comp)) {
2650+ STBI__CASE(1, 2)
2651+ {
2652+ dest[0] = src[0];
2653+ dest[1] = 255;
2654+ }
2655+ break;
2656+ STBI__CASE(1, 3) { dest[0] = dest[1] = dest[2] = src[0]; }
2657+ break;
2658+ STBI__CASE(1, 4)
2659+ {
2660+ dest[0] = dest[1] = dest[2] = src[0];
2661+ dest[3] = 255;
2662+ }
2663+ break;
2664+ STBI__CASE(2, 1) { dest[0] = src[0]; }
2665+ break;
2666+ STBI__CASE(2, 3) { dest[0] = dest[1] = dest[2] = src[0]; }
2667+ break;
2668+ STBI__CASE(2, 4)
2669+ {
2670+ dest[0] = dest[1] = dest[2] = src[0];
2671+ dest[3] = src[1];
2672+ }
2673+ break;
2674+ STBI__CASE(3, 4)
2675+ {
2676+ dest[0] = src[0];
2677+ dest[1] = src[1];
2678+ dest[2] = src[2];
2679+ dest[3] = 255;
2680+ }
2681+ break;
2682+ STBI__CASE(3, 1)
2683+ {
2684+ dest[0] = stbi__compute_y(src[0], src[1], src[2]);
2685+ }
2686+ break;
2687+ STBI__CASE(3, 2)
2688+ {
2689+ dest[0] = stbi__compute_y(src[0], src[1], src[2]);
2690+ dest[1] = 255;
2691+ }
2692+ break;
2693+ STBI__CASE(4, 1)
2694+ {
2695+ dest[0] = stbi__compute_y(src[0], src[1], src[2]);
2696+ }
2697+ break;
2698+ STBI__CASE(4, 2)
2699+ {
2700+ dest[0] = stbi__compute_y(src[0], src[1], src[2]);
2701+ dest[1] = src[3];
2702+ }
2703+ break;
2704+ STBI__CASE(4, 3)
2705+ {
2706+ dest[0] = src[0];
2707+ dest[1] = src[1];
2708+ dest[2] = src[2];
2709+ }
2710+ break;
2711+ default:
2712+ STBI_ASSERT(0);
2713+ STBI_FREE(data);
2714+ STBI_FREE(good);
2715+ return stbi__errpuc("unsupported", "Unsupported format conversion");
2716+ }
2717+#undef STBI__CASE
2718+ }
2719+
2720+ STBI_FREE(data);
2721+ return good;
2722 }
2723 #endif
2724
2725 #if defined(STBI_NO_PNG) && defined(STBI_NO_PSD)
2726 // nothing
2727 #else
2728-static stbi__uint16 stbi__compute_y_16(int r, int g, int b)
2729+static stbi__uint16
2730+stbi__compute_y_16(int r, int g, int b)
2731 {
2732- return (stbi__uint16) (((r*77) + (g*150) + (29*b)) >> 8);
2733+ return (stbi__uint16)(((r * 77) + (g * 150) + (29 * b)) >> 8);
2734 }
2735 #endif
2736
2737 #if defined(STBI_NO_PNG) && defined(STBI_NO_PSD)
2738 // nothing
2739 #else
2740-static stbi__uint16 *stbi__convert_format16(stbi__uint16 *data, int img_n, int req_comp, unsigned int x, unsigned int y)
2741-{
2742- int i,j;
2743- stbi__uint16 *good;
2744-
2745- if (req_comp == img_n) return data;
2746- STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
2747-
2748- good = (stbi__uint16 *) stbi__malloc(req_comp * x * y * 2);
2749- if (good == NULL) {
2750- STBI_FREE(data);
2751- return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory");
2752- }
2753-
2754- for (j=0; j < (int) y; ++j) {
2755- stbi__uint16 *src = data + j * x * img_n ;
2756- stbi__uint16 *dest = good + j * x * req_comp;
2757-
2758- #define STBI__COMBO(a,b) ((a)*8+(b))
2759- #define STBI__CASE(a,b) case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
2760- // convert source image with img_n components to one with req_comp components;
2761- // avoid switch per pixel, so use switch per scanline and massive macros
2762- switch (STBI__COMBO(img_n, req_comp)) {
2763- STBI__CASE(1,2) { dest[0]=src[0]; dest[1]=0xffff; } break;
2764- STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0]; } break;
2765- STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=0xffff; } break;
2766- STBI__CASE(2,1) { dest[0]=src[0]; } break;
2767- STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0]; } break;
2768- STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=src[1]; } break;
2769- STBI__CASE(3,4) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];dest[3]=0xffff; } break;
2770- STBI__CASE(3,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); } break;
2771- STBI__CASE(3,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); dest[1] = 0xffff; } break;
2772- STBI__CASE(4,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); } break;
2773- STBI__CASE(4,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); dest[1] = src[3]; } break;
2774- STBI__CASE(4,3) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2]; } break;
2775- default: STBI_ASSERT(0); STBI_FREE(data); STBI_FREE(good); return (stbi__uint16*) stbi__errpuc("unsupported", "Unsupported format conversion");
2776- }
2777- #undef STBI__CASE
2778- }
2779-
2780- STBI_FREE(data);
2781- return good;
2782+static stbi__uint16 *
2783+stbi__convert_format16(stbi__uint16 *data, int img_n, int req_comp,
2784+ unsigned int x, unsigned int y)
2785+{
2786+ int i, j;
2787+ stbi__uint16 *good;
2788+
2789+ if (req_comp == img_n) {
2790+ return data;
2791+ }
2792+ STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
2793+
2794+ good = (stbi__uint16 *)stbi__malloc(req_comp * x * y * 2);
2795+ if (good == NULL) {
2796+ STBI_FREE(data);
2797+ return (stbi__uint16 *)stbi__errpuc("outofmem", "Out of memory");
2798+ }
2799+
2800+ for (j = 0; j < (int)y; ++j) {
2801+ stbi__uint16 *src = data + j * x * img_n;
2802+ stbi__uint16 *dest = good + j * x * req_comp;
2803+
2804+#define STBI__COMBO(a, b) ((a) * 8 + (b))
2805+#define STBI__CASE(a, b) \
2806+ case STBI__COMBO(a, b): \
2807+ for (i = x - 1; i >= 0; --i, src += a, dest += b)
2808+ // convert source image with img_n components to one with req_comp
2809+ // components; avoid switch per pixel, so use switch per scanline and
2810+ // massive macros
2811+ switch (STBI__COMBO(img_n, req_comp)) {
2812+ STBI__CASE(1, 2)
2813+ {
2814+ dest[0] = src[0];
2815+ dest[1] = 0xffff;
2816+ }
2817+ break;
2818+ STBI__CASE(1, 3) { dest[0] = dest[1] = dest[2] = src[0]; }
2819+ break;
2820+ STBI__CASE(1, 4)
2821+ {
2822+ dest[0] = dest[1] = dest[2] = src[0];
2823+ dest[3] = 0xffff;
2824+ }
2825+ break;
2826+ STBI__CASE(2, 1) { dest[0] = src[0]; }
2827+ break;
2828+ STBI__CASE(2, 3) { dest[0] = dest[1] = dest[2] = src[0]; }
2829+ break;
2830+ STBI__CASE(2, 4)
2831+ {
2832+ dest[0] = dest[1] = dest[2] = src[0];
2833+ dest[3] = src[1];
2834+ }
2835+ break;
2836+ STBI__CASE(3, 4)
2837+ {
2838+ dest[0] = src[0];
2839+ dest[1] = src[1];
2840+ dest[2] = src[2];
2841+ dest[3] = 0xffff;
2842+ }
2843+ break;
2844+ STBI__CASE(3, 1)
2845+ {
2846+ dest[0] = stbi__compute_y_16(src[0], src[1], src[2]);
2847+ }
2848+ break;
2849+ STBI__CASE(3, 2)
2850+ {
2851+ dest[0] = stbi__compute_y_16(src[0], src[1], src[2]);
2852+ dest[1] = 0xffff;
2853+ }
2854+ break;
2855+ STBI__CASE(4, 1)
2856+ {
2857+ dest[0] = stbi__compute_y_16(src[0], src[1], src[2]);
2858+ }
2859+ break;
2860+ STBI__CASE(4, 2)
2861+ {
2862+ dest[0] = stbi__compute_y_16(src[0], src[1], src[2]);
2863+ dest[1] = src[3];
2864+ }
2865+ break;
2866+ STBI__CASE(4, 3)
2867+ {
2868+ dest[0] = src[0];
2869+ dest[1] = src[1];
2870+ dest[2] = src[2];
2871+ }
2872+ break;
2873+ default:
2874+ STBI_ASSERT(0);
2875+ STBI_FREE(data);
2876+ STBI_FREE(good);
2877+ return (stbi__uint16 *)stbi__errpuc(
2878+ "unsupported", "Unsupported format conversion");
2879+ }
2880+#undef STBI__CASE
2881+ }
2882+
2883+ STBI_FREE(data);
2884+ return good;
2885 }
2886 #endif
2887
2888 #ifndef STBI_NO_LINEAR
2889-static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp)
2890-{
2891- int i,k,n;
2892- float *output;
2893- if (!data) return NULL;
2894- output = (float *) stbi__malloc_mad4(x, y, comp, sizeof(float), 0);
2895- if (output == NULL) { STBI_FREE(data); return stbi__errpf("outofmem", "Out of memory"); }
2896- // compute number of non-alpha components
2897- if (comp & 1) n = comp; else n = comp-1;
2898- for (i=0; i < x*y; ++i) {
2899- for (k=0; k < n; ++k) {
2900- output[i*comp + k] = (float) (pow(data[i*comp+k]/255.0f, stbi__l2h_gamma) * stbi__l2h_scale);
2901- }
2902- }
2903- if (n < comp) {
2904- for (i=0; i < x*y; ++i) {
2905- output[i*comp + n] = data[i*comp + n]/255.0f;
2906- }
2907- }
2908- STBI_FREE(data);
2909- return output;
2910+static float *
2911+stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp)
2912+{
2913+ int i, k, n;
2914+ float *output;
2915+ if (!data) {
2916+ return NULL;
2917+ }
2918+ output = (float *)stbi__malloc_mad4(x, y, comp, sizeof(float), 0);
2919+ if (output == NULL) {
2920+ STBI_FREE(data);
2921+ return stbi__errpf("outofmem", "Out of memory");
2922+ }
2923+ // compute number of non-alpha components
2924+ if (comp & 1) {
2925+ n = comp;
2926+ } else {
2927+ n = comp - 1;
2928+ }
2929+ for (i = 0; i < x * y; ++i) {
2930+ for (k = 0; k < n; ++k) {
2931+ output[i * comp + k] =
2932+ (float)(pow(data[i * comp + k] / 255.0f, stbi__l2h_gamma) *
2933+ stbi__l2h_scale);
2934+ }
2935+ }
2936+ if (n < comp) {
2937+ for (i = 0; i < x * y; ++i) {
2938+ output[i * comp + n] = data[i * comp + n] / 255.0f;
2939+ }
2940+ }
2941+ STBI_FREE(data);
2942+ return output;
2943 }
2944 #endif
2945
2946 #ifndef STBI_NO_HDR
2947-#define stbi__float2int(x) ((int) (x))
2948-static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp)
2949-{
2950- int i,k,n;
2951- stbi_uc *output;
2952- if (!data) return NULL;
2953- output = (stbi_uc *) stbi__malloc_mad3(x, y, comp, 0);
2954- if (output == NULL) { STBI_FREE(data); return stbi__errpuc("outofmem", "Out of memory"); }
2955- // compute number of non-alpha components
2956- if (comp & 1) n = comp; else n = comp-1;
2957- for (i=0; i < x*y; ++i) {
2958- for (k=0; k < n; ++k) {
2959- float z = (float) pow(data[i*comp+k]*stbi__h2l_scale_i, stbi__h2l_gamma_i) * 255 + 0.5f;
2960- if (z < 0) z = 0;
2961- if (z > 255) z = 255;
2962- output[i*comp + k] = (stbi_uc) stbi__float2int(z);
2963- }
2964- if (k < comp) {
2965- float z = data[i*comp+k] * 255 + 0.5f;
2966- if (z < 0) z = 0;
2967- if (z > 255) z = 255;
2968- output[i*comp + k] = (stbi_uc) stbi__float2int(z);
2969- }
2970- }
2971- STBI_FREE(data);
2972- return output;
2973+#define stbi__float2int(x) ((int)(x))
2974+static stbi_uc *
2975+stbi__hdr_to_ldr(float *data, int x, int y, int comp)
2976+{
2977+ int i, k, n;
2978+ stbi_uc *output;
2979+ if (!data) {
2980+ return NULL;
2981+ }
2982+ output = (stbi_uc *)stbi__malloc_mad3(x, y, comp, 0);
2983+ if (output == NULL) {
2984+ STBI_FREE(data);
2985+ return stbi__errpuc("outofmem", "Out of memory");
2986+ }
2987+ // compute number of non-alpha components
2988+ if (comp & 1) {
2989+ n = comp;
2990+ } else {
2991+ n = comp - 1;
2992+ }
2993+ for (i = 0; i < x * y; ++i) {
2994+ for (k = 0; k < n; ++k) {
2995+ float z = (float)pow(data[i * comp + k] * stbi__h2l_scale_i,
2996+ stbi__h2l_gamma_i) *
2997+ 255 +
2998+ 0.5f;
2999+ if (z < 0) {
3000+ z = 0;
3001+ }
3002+ if (z > 255) {
3003+ z = 255;
3004+ }
3005+ output[i * comp + k] = (stbi_uc)stbi__float2int(z);
3006+ }
3007+ if (k < comp) {
3008+ float z = data[i * comp + k] * 255 + 0.5f;
3009+ if (z < 0) {
3010+ z = 0;
3011+ }
3012+ if (z > 255) {
3013+ z = 255;
3014+ }
3015+ output[i * comp + k] = (stbi_uc)stbi__float2int(z);
3016+ }
3017+ }
3018+ STBI_FREE(data);
3019+ return output;
3020 }
3021 #endif
3022
3023@@ -1933,763 +2395,899 @@ static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp)
3024 #ifndef STBI_NO_JPEG
3025
3026 // huffman decoding acceleration
3027-#define FAST_BITS 9 // larger handles more cases; smaller stomps less cache
3028-
3029-typedef struct
3030-{
3031- stbi_uc fast[1 << FAST_BITS];
3032- // weirdly, repacking this into AoS is a 10% speed loss, instead of a win
3033- stbi__uint16 code[256];
3034- stbi_uc values[256];
3035- stbi_uc size[257];
3036- unsigned int maxcode[18];
3037- int delta[17]; // old 'firstsymbol' - old 'firstcode'
3038+#define FAST_BITS 9 // larger handles more cases; smaller stomps less cache
3039+
3040+typedef struct {
3041+ stbi_uc fast[1 << FAST_BITS];
3042+ // weirdly, repacking this into AoS is a 10% speed loss, instead of a win
3043+ stbi__uint16 code[256];
3044+ stbi_uc values[256];
3045+ stbi_uc size[257];
3046+ unsigned int maxcode[18];
3047+ int delta[17]; // old 'firstsymbol' - old 'firstcode'
3048 } stbi__huffman;
3049
3050-typedef struct
3051-{
3052- stbi__context *s;
3053- stbi__huffman huff_dc[4];
3054- stbi__huffman huff_ac[4];
3055- stbi__uint16 dequant[4][64];
3056- stbi__int16 fast_ac[4][1 << FAST_BITS];
3057-
3058-// sizes for components, interleaved MCUs
3059- int img_h_max, img_v_max;
3060- int img_mcu_x, img_mcu_y;
3061- int img_mcu_w, img_mcu_h;
3062-
3063-// definition of jpeg image component
3064- struct
3065- {
3066- int id;
3067- int h,v;
3068- int tq;
3069- int hd,ha;
3070- int dc_pred;
3071-
3072- int x,y,w2,h2;
3073- stbi_uc *data;
3074- void *raw_data, *raw_coeff;
3075- stbi_uc *linebuf;
3076- short *coeff; // progressive only
3077- int coeff_w, coeff_h; // number of 8x8 coefficient blocks
3078- } img_comp[4];
3079-
3080- stbi__uint32 code_buffer; // jpeg entropy-coded buffer
3081- int code_bits; // number of valid bits
3082- unsigned char marker; // marker seen while filling entropy buffer
3083- int nomore; // flag if we saw a marker so must stop
3084-
3085- int progressive;
3086- int spec_start;
3087- int spec_end;
3088- int succ_high;
3089- int succ_low;
3090- int eob_run;
3091- int jfif;
3092- int app14_color_transform; // Adobe APP14 tag
3093- int rgb;
3094-
3095- int scan_n, order[4];
3096- int restart_interval, todo;
3097-
3098-// kernels
3099- void (*idct_block_kernel)(stbi_uc *out, int out_stride, short data[64]);
3100- void (*YCbCr_to_RGB_kernel)(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step);
3101- stbi_uc *(*resample_row_hv_2_kernel)(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs);
3102+typedef struct {
3103+ stbi__context *s;
3104+ stbi__huffman huff_dc[4];
3105+ stbi__huffman huff_ac[4];
3106+ stbi__uint16 dequant[4][64];
3107+ stbi__int16 fast_ac[4][1 << FAST_BITS];
3108+
3109+ // sizes for components, interleaved MCUs
3110+ int img_h_max, img_v_max;
3111+ int img_mcu_x, img_mcu_y;
3112+ int img_mcu_w, img_mcu_h;
3113+
3114+ // definition of jpeg image component
3115+ struct {
3116+ int id;
3117+ int h, v;
3118+ int tq;
3119+ int hd, ha;
3120+ int dc_pred;
3121+
3122+ int x, y, w2, h2;
3123+ stbi_uc *data;
3124+ void *raw_data, *raw_coeff;
3125+ stbi_uc *linebuf;
3126+ short *coeff; // progressive only
3127+ int coeff_w, coeff_h; // number of 8x8 coefficient blocks
3128+ } img_comp[4];
3129+
3130+ stbi__uint32 code_buffer; // jpeg entropy-coded buffer
3131+ int code_bits; // number of valid bits
3132+ unsigned char marker; // marker seen while filling entropy buffer
3133+ int nomore; // flag if we saw a marker so must stop
3134+
3135+ int progressive;
3136+ int spec_start;
3137+ int spec_end;
3138+ int succ_high;
3139+ int succ_low;
3140+ int eob_run;
3141+ int jfif;
3142+ int app14_color_transform; // Adobe APP14 tag
3143+ int rgb;
3144+
3145+ int scan_n, order[4];
3146+ int restart_interval, todo;
3147+
3148+ // kernels
3149+ void (*idct_block_kernel)(stbi_uc *out, int out_stride, short data[64]);
3150+ void (*YCbCr_to_RGB_kernel)(stbi_uc *out, const stbi_uc *y,
3151+ const stbi_uc *pcb, const stbi_uc *pcr,
3152+ int count, int step);
3153+ stbi_uc *(*resample_row_hv_2_kernel)(stbi_uc *out, stbi_uc *in_near,
3154+ stbi_uc *in_far, int w, int hs);
3155 } stbi__jpeg;
3156
3157-static int stbi__build_huffman(stbi__huffman *h, int *count)
3158-{
3159- int i,j,k=0;
3160- unsigned int code;
3161- // build size list for each symbol (from JPEG spec)
3162- for (i=0; i < 16; ++i) {
3163- for (j=0; j < count[i]; ++j) {
3164- h->size[k++] = (stbi_uc) (i+1);
3165- if(k >= 257) return stbi__err("bad size list","Corrupt JPEG");
3166- }
3167- }
3168- h->size[k] = 0;
3169-
3170- // compute actual symbols (from jpeg spec)
3171- code = 0;
3172- k = 0;
3173- for(j=1; j <= 16; ++j) {
3174- // compute delta to add to code to compute symbol id
3175- h->delta[j] = k - code;
3176- if (h->size[k] == j) {
3177- while (h->size[k] == j)
3178- h->code[k++] = (stbi__uint16) (code++);
3179- if (code-1 >= (1u << j)) return stbi__err("bad code lengths","Corrupt JPEG");
3180- }
3181- // compute largest code + 1 for this size, preshifted as needed later
3182- h->maxcode[j] = code << (16-j);
3183- code <<= 1;
3184- }
3185- h->maxcode[j] = 0xffffffff;
3186-
3187- // build non-spec acceleration table; 255 is flag for not-accelerated
3188- memset(h->fast, 255, 1 << FAST_BITS);
3189- for (i=0; i < k; ++i) {
3190- int s = h->size[i];
3191- if (s <= FAST_BITS) {
3192- int c = h->code[i] << (FAST_BITS-s);
3193- int m = 1 << (FAST_BITS-s);
3194- for (j=0; j < m; ++j) {
3195- h->fast[c+j] = (stbi_uc) i;
3196- }
3197- }
3198- }
3199- return 1;
3200+static int
3201+stbi__build_huffman(stbi__huffman *h, int *count)
3202+{
3203+ int i, j, k = 0;
3204+ unsigned int code;
3205+ // build size list for each symbol (from JPEG spec)
3206+ for (i = 0; i < 16; ++i) {
3207+ for (j = 0; j < count[i]; ++j) {
3208+ h->size[k++] = (stbi_uc)(i + 1);
3209+ if (k >= 257) {
3210+ return stbi__err("bad size list", "Corrupt JPEG");
3211+ }
3212+ }
3213+ }
3214+ h->size[k] = 0;
3215+
3216+ // compute actual symbols (from jpeg spec)
3217+ code = 0;
3218+ k = 0;
3219+ for (j = 1; j <= 16; ++j) {
3220+ // compute delta to add to code to compute symbol id
3221+ h->delta[j] = k - code;
3222+ if (h->size[k] == j) {
3223+ while (h->size[k] == j) {
3224+ h->code[k++] = (stbi__uint16)(code++);
3225+ }
3226+ if (code - 1 >= (1u << j)) {
3227+ return stbi__err("bad code lengths", "Corrupt JPEG");
3228+ }
3229+ }
3230+ // compute largest code + 1 for this size, preshifted as needed later
3231+ h->maxcode[j] = code << (16 - j);
3232+ code <<= 1;
3233+ }
3234+ h->maxcode[j] = 0xffffffff;
3235+
3236+ // build non-spec acceleration table; 255 is flag for not-accelerated
3237+ memset(h->fast, 255, 1 << FAST_BITS);
3238+ for (i = 0; i < k; ++i) {
3239+ int s = h->size[i];
3240+ if (s <= FAST_BITS) {
3241+ int c = h->code[i] << (FAST_BITS - s);
3242+ int m = 1 << (FAST_BITS - s);
3243+ for (j = 0; j < m; ++j) {
3244+ h->fast[c + j] = (stbi_uc)i;
3245+ }
3246+ }
3247+ }
3248+ return 1;
3249 }
3250
3251 // build a table that decodes both magnitude and value of small ACs in
3252 // one go.
3253-static void stbi__build_fast_ac(stbi__int16 *fast_ac, stbi__huffman *h)
3254-{
3255- int i;
3256- for (i=0; i < (1 << FAST_BITS); ++i) {
3257- stbi_uc fast = h->fast[i];
3258- fast_ac[i] = 0;
3259- if (fast < 255) {
3260- int rs = h->values[fast];
3261- int run = (rs >> 4) & 15;
3262- int magbits = rs & 15;
3263- int len = h->size[fast];
3264-
3265- if (magbits && len + magbits <= FAST_BITS) {
3266- // magnitude code followed by receive_extend code
3267- int k = ((i << len) & ((1 << FAST_BITS) - 1)) >> (FAST_BITS - magbits);
3268- int m = 1 << (magbits - 1);
3269- if (k < m) k += (~0U << magbits) + 1;
3270- // if the result is small enough, we can fit it in fast_ac table
3271- if (k >= -128 && k <= 127)
3272- fast_ac[i] = (stbi__int16) ((k * 256) + (run * 16) + (len + magbits));
3273- }
3274- }
3275- }
3276-}
3277-
3278-static void stbi__grow_buffer_unsafe(stbi__jpeg *j)
3279-{
3280- do {
3281- unsigned int b = j->nomore ? 0 : stbi__get8(j->s);
3282- if (b == 0xff) {
3283- int c = stbi__get8(j->s);
3284- while (c == 0xff) c = stbi__get8(j->s); // consume fill bytes
3285- if (c != 0) {
3286- j->marker = (unsigned char) c;
3287- j->nomore = 1;
3288- return;
3289- }
3290- }
3291- j->code_buffer |= b << (24 - j->code_bits);
3292- j->code_bits += 8;
3293- } while (j->code_bits <= 24);
3294+static void
3295+stbi__build_fast_ac(stbi__int16 *fast_ac, stbi__huffman *h)
3296+{
3297+ int i;
3298+ for (i = 0; i < (1 << FAST_BITS); ++i) {
3299+ stbi_uc fast = h->fast[i];
3300+ fast_ac[i] = 0;
3301+ if (fast < 255) {
3302+ int rs = h->values[fast];
3303+ int run = (rs >> 4) & 15;
3304+ int magbits = rs & 15;
3305+ int len = h->size[fast];
3306+
3307+ if (magbits && len + magbits <= FAST_BITS) {
3308+ // magnitude code followed by receive_extend code
3309+ int k = ((i << len) & ((1 << FAST_BITS) - 1)) >>
3310+ (FAST_BITS - magbits);
3311+ int m = 1 << (magbits - 1);
3312+ if (k < m) {
3313+ k += (~0U << magbits) + 1;
3314+ }
3315+ // if the result is small enough, we can fit it in fast_ac table
3316+ if (k >= -128 && k <= 127) {
3317+ fast_ac[i] =
3318+ (stbi__int16)((k * 256) + (run * 16) + (len + magbits));
3319+ }
3320+ }
3321+ }
3322+ }
3323+}
3324+
3325+static void
3326+stbi__grow_buffer_unsafe(stbi__jpeg *j)
3327+{
3328+ do {
3329+ unsigned int b = j->nomore ? 0 : stbi__get8(j->s);
3330+ if (b == 0xff) {
3331+ int c = stbi__get8(j->s);
3332+ while (c == 0xff) {
3333+ c = stbi__get8(j->s); // consume fill bytes
3334+ }
3335+ if (c != 0) {
3336+ j->marker = (unsigned char)c;
3337+ j->nomore = 1;
3338+ return;
3339+ }
3340+ }
3341+ j->code_buffer |= b << (24 - j->code_bits);
3342+ j->code_bits += 8;
3343+ } while (j->code_bits <= 24);
3344 }
3345
3346 // (1 << n) - 1
3347-static const stbi__uint32 stbi__bmask[17]={0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535};
3348+static const stbi__uint32 stbi__bmask[17] = {
3349+ 0, 1, 3, 7, 15, 31, 63, 127, 255,
3350+ 511, 1023, 2047, 4095, 8191, 16383, 32767, 65535};
3351
3352 // decode a jpeg huffman value from the bitstream
3353-stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffman *h)
3354-{
3355- unsigned int temp;
3356- int c,k;
3357-
3358- if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
3359-
3360- // look at the top FAST_BITS and determine what symbol ID it is,
3361- // if the code is <= FAST_BITS
3362- c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
3363- k = h->fast[c];
3364- if (k < 255) {
3365- int s = h->size[k];
3366- if (s > j->code_bits)
3367- return -1;
3368- j->code_buffer <<= s;
3369- j->code_bits -= s;
3370- return h->values[k];
3371- }
3372-
3373- // naive test is to shift the code_buffer down so k bits are
3374- // valid, then test against maxcode. To speed this up, we've
3375- // preshifted maxcode left so that it has (16-k) 0s at the
3376- // end; in other words, regardless of the number of bits, it
3377- // wants to be compared against something shifted to have 16;
3378- // that way we don't need to shift inside the loop.
3379- temp = j->code_buffer >> 16;
3380- for (k=FAST_BITS+1 ; ; ++k)
3381- if (temp < h->maxcode[k])
3382- break;
3383- if (k == 17) {
3384- // error! code not found
3385- j->code_bits -= 16;
3386- return -1;
3387- }
3388-
3389- if (k > j->code_bits)
3390- return -1;
3391-
3392- // convert the huffman code to the symbol id
3393- c = ((j->code_buffer >> (32 - k)) & stbi__bmask[k]) + h->delta[k];
3394- if(c < 0 || c >= 256) // symbol id out of bounds!
3395- return -1;
3396- STBI_ASSERT((((j->code_buffer) >> (32 - h->size[c])) & stbi__bmask[h->size[c]]) == h->code[c]);
3397-
3398- // convert the id to a symbol
3399- j->code_bits -= k;
3400- j->code_buffer <<= k;
3401- return h->values[c];
3402+stbi_inline static int
3403+stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffman *h)
3404+{
3405+ unsigned int temp;
3406+ int c, k;
3407+
3408+ if (j->code_bits < 16) {
3409+ stbi__grow_buffer_unsafe(j);
3410+ }
3411+
3412+ // look at the top FAST_BITS and determine what symbol ID it is,
3413+ // if the code is <= FAST_BITS
3414+ c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS) - 1);
3415+ k = h->fast[c];
3416+ if (k < 255) {
3417+ int s = h->size[k];
3418+ if (s > j->code_bits) {
3419+ return -1;
3420+ }
3421+ j->code_buffer <<= s;
3422+ j->code_bits -= s;
3423+ return h->values[k];
3424+ }
3425+
3426+ // naive test is to shift the code_buffer down so k bits are
3427+ // valid, then test against maxcode. To speed this up, we've
3428+ // preshifted maxcode left so that it has (16-k) 0s at the
3429+ // end; in other words, regardless of the number of bits, it
3430+ // wants to be compared against something shifted to have 16;
3431+ // that way we don't need to shift inside the loop.
3432+ temp = j->code_buffer >> 16;
3433+ for (k = FAST_BITS + 1;; ++k) {
3434+ if (temp < h->maxcode[k]) {
3435+ break;
3436+ }
3437+ }
3438+ if (k == 17) {
3439+ // error! code not found
3440+ j->code_bits -= 16;
3441+ return -1;
3442+ }
3443+
3444+ if (k > j->code_bits) {
3445+ return -1;
3446+ }
3447+
3448+ // convert the huffman code to the symbol id
3449+ c = ((j->code_buffer >> (32 - k)) & stbi__bmask[k]) + h->delta[k];
3450+ if (c < 0 || c >= 256) { // symbol id out of bounds!
3451+ return -1;
3452+ }
3453+ STBI_ASSERT((((j->code_buffer) >> (32 - h->size[c])) &
3454+ stbi__bmask[h->size[c]]) == h->code[c]);
3455+
3456+ // convert the id to a symbol
3457+ j->code_bits -= k;
3458+ j->code_buffer <<= k;
3459+ return h->values[c];
3460 }
3461
3462 // bias[n] = (-1<<n) + 1
3463-static const int stbi__jbias[16] = {0,-1,-3,-7,-15,-31,-63,-127,-255,-511,-1023,-2047,-4095,-8191,-16383,-32767};
3464+static const int stbi__jbias[16] = {0, -1, -3, -7, -15, -31,
3465+ -63, -127, -255, -511, -1023, -2047,
3466+ -4095, -8191, -16383, -32767};
3467
3468 // combined JPEG 'receive' and JPEG 'extend', since baseline
3469 // always extends everything it receives.
3470-stbi_inline static int stbi__extend_receive(stbi__jpeg *j, int n)
3471-{
3472- unsigned int k;
3473- int sgn;
3474- if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
3475- if (j->code_bits < n) return 0; // ran out of bits from stream, return 0s intead of continuing
3476-
3477- sgn = j->code_buffer >> 31; // sign bit always in MSB; 0 if MSB clear (positive), 1 if MSB set (negative)
3478- k = stbi_lrot(j->code_buffer, n);
3479- j->code_buffer = k & ~stbi__bmask[n];
3480- k &= stbi__bmask[n];
3481- j->code_bits -= n;
3482- return k + (stbi__jbias[n] & (sgn - 1));
3483+stbi_inline static int
3484+stbi__extend_receive(stbi__jpeg *j, int n)
3485+{
3486+ unsigned int k;
3487+ int sgn;
3488+ if (j->code_bits < n) {
3489+ stbi__grow_buffer_unsafe(j);
3490+ }
3491+ if (j->code_bits < n) {
3492+ return 0; // ran out of bits from stream, return 0s intead of continuing
3493+ }
3494+
3495+ sgn = j->code_buffer >> 31; // sign bit always in MSB; 0 if MSB clear
3496+ // (positive), 1 if MSB set (negative)
3497+ k = stbi_lrot(j->code_buffer, n);
3498+ j->code_buffer = k & ~stbi__bmask[n];
3499+ k &= stbi__bmask[n];
3500+ j->code_bits -= n;
3501+ return k + (stbi__jbias[n] & (sgn - 1));
3502 }
3503
3504 // get some unsigned bits
3505-stbi_inline static int stbi__jpeg_get_bits(stbi__jpeg *j, int n)
3506-{
3507- unsigned int k;
3508- if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
3509- if (j->code_bits < n) return 0; // ran out of bits from stream, return 0s intead of continuing
3510- k = stbi_lrot(j->code_buffer, n);
3511- j->code_buffer = k & ~stbi__bmask[n];
3512- k &= stbi__bmask[n];
3513- j->code_bits -= n;
3514- return k;
3515-}
3516-
3517-stbi_inline static int stbi__jpeg_get_bit(stbi__jpeg *j)
3518-{
3519- unsigned int k;
3520- if (j->code_bits < 1) stbi__grow_buffer_unsafe(j);
3521- if (j->code_bits < 1) return 0; // ran out of bits from stream, return 0s intead of continuing
3522- k = j->code_buffer;
3523- j->code_buffer <<= 1;
3524- --j->code_bits;
3525- return k & 0x80000000;
3526+stbi_inline static int
3527+stbi__jpeg_get_bits(stbi__jpeg *j, int n)
3528+{
3529+ unsigned int k;
3530+ if (j->code_bits < n) {
3531+ stbi__grow_buffer_unsafe(j);
3532+ }
3533+ if (j->code_bits < n) {
3534+ return 0; // ran out of bits from stream, return 0s intead of continuing
3535+ }
3536+ k = stbi_lrot(j->code_buffer, n);
3537+ j->code_buffer = k & ~stbi__bmask[n];
3538+ k &= stbi__bmask[n];
3539+ j->code_bits -= n;
3540+ return k;
3541+}
3542+
3543+stbi_inline static int
3544+stbi__jpeg_get_bit(stbi__jpeg *j)
3545+{
3546+ unsigned int k;
3547+ if (j->code_bits < 1) {
3548+ stbi__grow_buffer_unsafe(j);
3549+ }
3550+ if (j->code_bits < 1) {
3551+ return 0; // ran out of bits from stream, return 0s intead of continuing
3552+ }
3553+ k = j->code_buffer;
3554+ j->code_buffer <<= 1;
3555+ --j->code_bits;
3556+ return k & 0x80000000;
3557 }
3558
3559 // given a value that's at position X in the zigzag stream,
3560 // where does it appear in the 8x8 matrix coded as row-major?
3561-static const stbi_uc stbi__jpeg_dezigzag[64+15] =
3562-{
3563- 0, 1, 8, 16, 9, 2, 3, 10,
3564- 17, 24, 32, 25, 18, 11, 4, 5,
3565- 12, 19, 26, 33, 40, 48, 41, 34,
3566- 27, 20, 13, 6, 7, 14, 21, 28,
3567- 35, 42, 49, 56, 57, 50, 43, 36,
3568- 29, 22, 15, 23, 30, 37, 44, 51,
3569- 58, 59, 52, 45, 38, 31, 39, 46,
3570- 53, 60, 61, 54, 47, 55, 62, 63,
3571- // let corrupt input sample past end
3572- 63, 63, 63, 63, 63, 63, 63, 63,
3573- 63, 63, 63, 63, 63, 63, 63
3574-};
3575+static const stbi_uc stbi__jpeg_dezigzag[64 + 15] = {
3576+ 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, 12, 19, 26, 33, 40,
3577+ 48, 41, 34, 27, 20, 13, 6, 7, 14, 21, 28, 35, 42, 49, 56, 57, 50, 43, 36,
3578+ 29, 22, 15, 23, 30, 37, 44, 51, 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61,
3579+ 54, 47, 55, 62, 63,
3580+ // let corrupt input sample past end
3581+ 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63};
3582
3583 // decode one 64-entry block--
3584-static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman *hdc, stbi__huffman *hac, stbi__int16 *fac, int b, stbi__uint16 *dequant)
3585-{
3586- int diff,dc,k;
3587- int t;
3588-
3589- if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
3590- t = stbi__jpeg_huff_decode(j, hdc);
3591- if (t < 0 || t > 15) return stbi__err("bad huffman code","Corrupt JPEG");
3592-
3593- // 0 all the ac values now so we can do it 32-bits at a time
3594- memset(data,0,64*sizeof(data[0]));
3595-
3596- diff = t ? stbi__extend_receive(j, t) : 0;
3597- if (!stbi__addints_valid(j->img_comp[b].dc_pred, diff)) return stbi__err("bad delta","Corrupt JPEG");
3598- dc = j->img_comp[b].dc_pred + diff;
3599- j->img_comp[b].dc_pred = dc;
3600- if (!stbi__mul2shorts_valid(dc, dequant[0])) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
3601- data[0] = (short) (dc * dequant[0]);
3602-
3603- // decode AC components, see JPEG spec
3604- k = 1;
3605- do {
3606- unsigned int zig;
3607- int c,r,s;
3608- if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
3609- c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
3610- r = fac[c];
3611- if (r) { // fast-AC path
3612- k += (r >> 4) & 15; // run
3613- s = r & 15; // combined length
3614- if (s > j->code_bits) return stbi__err("bad huffman code", "Combined length longer than code bits available");
3615- j->code_buffer <<= s;
3616- j->code_bits -= s;
3617- // decode into unzigzag'd location
3618- zig = stbi__jpeg_dezigzag[k++];
3619- data[zig] = (short) ((r >> 8) * dequant[zig]);
3620- } else {
3621- int rs = stbi__jpeg_huff_decode(j, hac);
3622- if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
3623- s = rs & 15;
3624- r = rs >> 4;
3625- if (s == 0) {
3626- if (rs != 0xf0) break; // end block
3627- k += 16;
3628- } else {
3629- k += r;
3630- // decode into unzigzag'd location
3631- zig = stbi__jpeg_dezigzag[k++];
3632- data[zig] = (short) (stbi__extend_receive(j,s) * dequant[zig]);
3633- }
3634- }
3635- } while (k < 64);
3636- return 1;
3637-}
3638-
3639-static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64], stbi__huffman *hdc, int b)
3640-{
3641- int diff,dc;
3642- int t;
3643- if (j->spec_end != 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
3644-
3645- if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
3646-
3647- if (j->succ_high == 0) {
3648- // first scan for DC coefficient, must be first
3649- memset(data,0,64*sizeof(data[0])); // 0 all the ac values now
3650- t = stbi__jpeg_huff_decode(j, hdc);
3651- if (t < 0 || t > 15) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
3652- diff = t ? stbi__extend_receive(j, t) : 0;
3653-
3654- if (!stbi__addints_valid(j->img_comp[b].dc_pred, diff)) return stbi__err("bad delta", "Corrupt JPEG");
3655- dc = j->img_comp[b].dc_pred + diff;
3656- j->img_comp[b].dc_pred = dc;
3657- if (!stbi__mul2shorts_valid(dc, 1 << j->succ_low)) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
3658- data[0] = (short) (dc * (1 << j->succ_low));
3659- } else {
3660- // refinement scan for DC coefficient
3661- if (stbi__jpeg_get_bit(j))
3662- data[0] += (short) (1 << j->succ_low);
3663- }
3664- return 1;
3665+static int
3666+stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman *hdc,
3667+ stbi__huffman *hac, stbi__int16 *fac, int b,
3668+ stbi__uint16 *dequant)
3669+{
3670+ int diff, dc, k;
3671+ int t;
3672+
3673+ if (j->code_bits < 16) {
3674+ stbi__grow_buffer_unsafe(j);
3675+ }
3676+ t = stbi__jpeg_huff_decode(j, hdc);
3677+ if (t < 0 || t > 15) {
3678+ return stbi__err("bad huffman code", "Corrupt JPEG");
3679+ }
3680+
3681+ // 0 all the ac values now so we can do it 32-bits at a time
3682+ memset(data, 0, 64 * sizeof(data[0]));
3683+
3684+ diff = t ? stbi__extend_receive(j, t) : 0;
3685+ if (!stbi__addints_valid(j->img_comp[b].dc_pred, diff)) {
3686+ return stbi__err("bad delta", "Corrupt JPEG");
3687+ }
3688+ dc = j->img_comp[b].dc_pred + diff;
3689+ j->img_comp[b].dc_pred = dc;
3690+ if (!stbi__mul2shorts_valid(dc, dequant[0])) {
3691+ return stbi__err("can't merge dc and ac", "Corrupt JPEG");
3692+ }
3693+ data[0] = (short)(dc * dequant[0]);
3694+
3695+ // decode AC components, see JPEG spec
3696+ k = 1;
3697+ do {
3698+ unsigned int zig;
3699+ int c, r, s;
3700+ if (j->code_bits < 16) {
3701+ stbi__grow_buffer_unsafe(j);
3702+ }
3703+ c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS) - 1);
3704+ r = fac[c];
3705+ if (r) { // fast-AC path
3706+ k += (r >> 4) & 15; // run
3707+ s = r & 15; // combined length
3708+ if (s > j->code_bits) {
3709+ return stbi__err(
3710+ "bad huffman code",
3711+ "Combined length longer than code bits available");
3712+ }
3713+ j->code_buffer <<= s;
3714+ j->code_bits -= s;
3715+ // decode into unzigzag'd location
3716+ zig = stbi__jpeg_dezigzag[k++];
3717+ data[zig] = (short)((r >> 8) * dequant[zig]);
3718+ } else {
3719+ int rs = stbi__jpeg_huff_decode(j, hac);
3720+ if (rs < 0) {
3721+ return stbi__err("bad huffman code", "Corrupt JPEG");
3722+ }
3723+ s = rs & 15;
3724+ r = rs >> 4;
3725+ if (s == 0) {
3726+ if (rs != 0xf0) {
3727+ break; // end block
3728+ }
3729+ k += 16;
3730+ } else {
3731+ k += r;
3732+ // decode into unzigzag'd location
3733+ zig = stbi__jpeg_dezigzag[k++];
3734+ data[zig] = (short)(stbi__extend_receive(j, s) * dequant[zig]);
3735+ }
3736+ }
3737+ } while (k < 64);
3738+ return 1;
3739+}
3740+
3741+static int
3742+stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64],
3743+ stbi__huffman *hdc, int b)
3744+{
3745+ int diff, dc;
3746+ int t;
3747+ if (j->spec_end != 0) {
3748+ return stbi__err("can't merge dc and ac", "Corrupt JPEG");
3749+ }
3750+
3751+ if (j->code_bits < 16) {
3752+ stbi__grow_buffer_unsafe(j);
3753+ }
3754+
3755+ if (j->succ_high == 0) {
3756+ // first scan for DC coefficient, must be first
3757+ memset(data, 0, 64 * sizeof(data[0])); // 0 all the ac values now
3758+ t = stbi__jpeg_huff_decode(j, hdc);
3759+ if (t < 0 || t > 15) {
3760+ return stbi__err("can't merge dc and ac", "Corrupt JPEG");
3761+ }
3762+ diff = t ? stbi__extend_receive(j, t) : 0;
3763+
3764+ if (!stbi__addints_valid(j->img_comp[b].dc_pred, diff)) {
3765+ return stbi__err("bad delta", "Corrupt JPEG");
3766+ }
3767+ dc = j->img_comp[b].dc_pred + diff;
3768+ j->img_comp[b].dc_pred = dc;
3769+ if (!stbi__mul2shorts_valid(dc, 1 << j->succ_low)) {
3770+ return stbi__err("can't merge dc and ac", "Corrupt JPEG");
3771+ }
3772+ data[0] = (short)(dc * (1 << j->succ_low));
3773+ } else {
3774+ // refinement scan for DC coefficient
3775+ if (stbi__jpeg_get_bit(j)) {
3776+ data[0] += (short)(1 << j->succ_low);
3777+ }
3778+ }
3779+ return 1;
3780 }
3781
3782 // @OPTIMIZE: store non-zigzagged during the decode passes,
3783 // and only de-zigzag when dequantizing
3784-static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__huffman *hac, stbi__int16 *fac)
3785-{
3786- int k;
3787- if (j->spec_start == 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
3788-
3789- if (j->succ_high == 0) {
3790- int shift = j->succ_low;
3791-
3792- if (j->eob_run) {
3793- --j->eob_run;
3794- return 1;
3795- }
3796-
3797- k = j->spec_start;
3798- do {
3799- unsigned int zig;
3800- int c,r,s;
3801- if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
3802- c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
3803- r = fac[c];
3804- if (r) { // fast-AC path
3805- k += (r >> 4) & 15; // run
3806- s = r & 15; // combined length
3807- if (s > j->code_bits) return stbi__err("bad huffman code", "Combined length longer than code bits available");
3808- j->code_buffer <<= s;
3809- j->code_bits -= s;
3810- zig = stbi__jpeg_dezigzag[k++];
3811- data[zig] = (short) ((r >> 8) * (1 << shift));
3812- } else {
3813- int rs = stbi__jpeg_huff_decode(j, hac);
3814- if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
3815- s = rs & 15;
3816- r = rs >> 4;
3817- if (s == 0) {
3818- if (r < 15) {
3819- j->eob_run = (1 << r);
3820- if (r)
3821- j->eob_run += stbi__jpeg_get_bits(j, r);
3822- --j->eob_run;
3823- break;
3824- }
3825- k += 16;
3826- } else {
3827- k += r;
3828- zig = stbi__jpeg_dezigzag[k++];
3829- data[zig] = (short) (stbi__extend_receive(j,s) * (1 << shift));
3830- }
3831- }
3832- } while (k <= j->spec_end);
3833- } else {
3834- // refinement scan for these AC coefficients
3835-
3836- short bit = (short) (1 << j->succ_low);
3837-
3838- if (j->eob_run) {
3839- --j->eob_run;
3840- for (k = j->spec_start; k <= j->spec_end; ++k) {
3841- short *p = &data[stbi__jpeg_dezigzag[k]];
3842- if (*p != 0)
3843- if (stbi__jpeg_get_bit(j))
3844- if ((*p & bit)==0) {
3845- if (*p > 0)
3846- *p += bit;
3847- else
3848- *p -= bit;
3849- }
3850- }
3851- } else {
3852- k = j->spec_start;
3853- do {
3854- int r,s;
3855- int rs = stbi__jpeg_huff_decode(j, hac); // @OPTIMIZE see if we can use the fast path here, advance-by-r is so slow, eh
3856- if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
3857- s = rs & 15;
3858- r = rs >> 4;
3859- if (s == 0) {
3860- if (r < 15) {
3861- j->eob_run = (1 << r) - 1;
3862- if (r)
3863- j->eob_run += stbi__jpeg_get_bits(j, r);
3864- r = 64; // force end of block
3865- } else {
3866- // r=15 s=0 should write 16 0s, so we just do
3867- // a run of 15 0s and then write s (which is 0),
3868- // so we don't have to do anything special here
3869- }
3870- } else {
3871- if (s != 1) return stbi__err("bad huffman code", "Corrupt JPEG");
3872- // sign bit
3873- if (stbi__jpeg_get_bit(j))
3874- s = bit;
3875- else
3876- s = -bit;
3877- }
3878-
3879- // advance by r
3880- while (k <= j->spec_end) {
3881- short *p = &data[stbi__jpeg_dezigzag[k++]];
3882- if (*p != 0) {
3883- if (stbi__jpeg_get_bit(j))
3884- if ((*p & bit)==0) {
3885- if (*p > 0)
3886- *p += bit;
3887- else
3888- *p -= bit;
3889- }
3890- } else {
3891- if (r == 0) {
3892- *p = (short) s;
3893- break;
3894- }
3895- --r;
3896- }
3897- }
3898- } while (k <= j->spec_end);
3899- }
3900- }
3901- return 1;
3902+static int
3903+stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64],
3904+ stbi__huffman *hac, stbi__int16 *fac)
3905+{
3906+ int k;
3907+ if (j->spec_start == 0) {
3908+ return stbi__err("can't merge dc and ac", "Corrupt JPEG");
3909+ }
3910+
3911+ if (j->succ_high == 0) {
3912+ int shift = j->succ_low;
3913+
3914+ if (j->eob_run) {
3915+ --j->eob_run;
3916+ return 1;
3917+ }
3918+
3919+ k = j->spec_start;
3920+ do {
3921+ unsigned int zig;
3922+ int c, r, s;
3923+ if (j->code_bits < 16) {
3924+ stbi__grow_buffer_unsafe(j);
3925+ }
3926+ c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS) - 1);
3927+ r = fac[c];
3928+ if (r) { // fast-AC path
3929+ k += (r >> 4) & 15; // run
3930+ s = r & 15; // combined length
3931+ if (s > j->code_bits) {
3932+ return stbi__err(
3933+ "bad huffman code",
3934+ "Combined length longer than code bits available");
3935+ }
3936+ j->code_buffer <<= s;
3937+ j->code_bits -= s;
3938+ zig = stbi__jpeg_dezigzag[k++];
3939+ data[zig] = (short)((r >> 8) * (1 << shift));
3940+ } else {
3941+ int rs = stbi__jpeg_huff_decode(j, hac);
3942+ if (rs < 0) {
3943+ return stbi__err("bad huffman code", "Corrupt JPEG");
3944+ }
3945+ s = rs & 15;
3946+ r = rs >> 4;
3947+ if (s == 0) {
3948+ if (r < 15) {
3949+ j->eob_run = (1 << r);
3950+ if (r) {
3951+ j->eob_run += stbi__jpeg_get_bits(j, r);
3952+ }
3953+ --j->eob_run;
3954+ break;
3955+ }
3956+ k += 16;
3957+ } else {
3958+ k += r;
3959+ zig = stbi__jpeg_dezigzag[k++];
3960+ data[zig] =
3961+ (short)(stbi__extend_receive(j, s) * (1 << shift));
3962+ }
3963+ }
3964+ } while (k <= j->spec_end);
3965+ } else {
3966+ // refinement scan for these AC coefficients
3967+
3968+ short bit = (short)(1 << j->succ_low);
3969+
3970+ if (j->eob_run) {
3971+ --j->eob_run;
3972+ for (k = j->spec_start; k <= j->spec_end; ++k) {
3973+ short *p = &data[stbi__jpeg_dezigzag[k]];
3974+ if (*p != 0) {
3975+ if (stbi__jpeg_get_bit(j)) {
3976+ if ((*p & bit) == 0) {
3977+ if (*p > 0) {
3978+ *p += bit;
3979+ } else {
3980+ *p -= bit;
3981+ }
3982+ }
3983+ }
3984+ }
3985+ }
3986+ } else {
3987+ k = j->spec_start;
3988+ do {
3989+ int r, s;
3990+ int rs = stbi__jpeg_huff_decode(
3991+ j, hac); // @OPTIMIZE see if we can use the fast path here,
3992+ // advance-by-r is so slow, eh
3993+ if (rs < 0) {
3994+ return stbi__err("bad huffman code", "Corrupt JPEG");
3995+ }
3996+ s = rs & 15;
3997+ r = rs >> 4;
3998+ if (s == 0) {
3999+ if (r < 15) {
4000+ j->eob_run = (1 << r) - 1;
4001+ if (r) {
4002+ j->eob_run += stbi__jpeg_get_bits(j, r);
4003+ }
4004+ r = 64; // force end of block
4005+ } else {
4006+ // r=15 s=0 should write 16 0s, so we just do
4007+ // a run of 15 0s and then write s (which is 0),
4008+ // so we don't have to do anything special here
4009+ }
4010+ } else {
4011+ if (s != 1) {
4012+ return stbi__err("bad huffman code", "Corrupt JPEG");
4013+ }
4014+ // sign bit
4015+ if (stbi__jpeg_get_bit(j)) {
4016+ s = bit;
4017+ } else {
4018+ s = -bit;
4019+ }
4020+ }
4021+
4022+ // advance by r
4023+ while (k <= j->spec_end) {
4024+ short *p = &data[stbi__jpeg_dezigzag[k++]];
4025+ if (*p != 0) {
4026+ if (stbi__jpeg_get_bit(j)) {
4027+ if ((*p & bit) == 0) {
4028+ if (*p > 0) {
4029+ *p += bit;
4030+ } else {
4031+ *p -= bit;
4032+ }
4033+ }
4034+ }
4035+ } else {
4036+ if (r == 0) {
4037+ *p = (short)s;
4038+ break;
4039+ }
4040+ --r;
4041+ }
4042+ }
4043+ } while (k <= j->spec_end);
4044+ }
4045+ }
4046+ return 1;
4047 }
4048
4049 // take a -128..127 value and stbi__clamp it and convert to 0..255
4050-stbi_inline static stbi_uc stbi__clamp(int x)
4051+stbi_inline static stbi_uc
4052+stbi__clamp(int x)
4053 {
4054- // trick to use a single test to catch both cases
4055- if ((unsigned int) x > 255) {
4056- if (x < 0) return 0;
4057- if (x > 255) return 255;
4058- }
4059- return (stbi_uc) x;
4060+ // trick to use a single test to catch both cases
4061+ if ((unsigned int)x > 255) {
4062+ if (x < 0) {
4063+ return 0;
4064+ }
4065+ if (x > 255) {
4066+ return 255;
4067+ }
4068+ }
4069+ return (stbi_uc)x;
4070 }
4071
4072-#define stbi__f2f(x) ((int) (((x) * 4096 + 0.5)))
4073-#define stbi__fsh(x) ((x) * 4096)
4074+#define stbi__f2f(x) ((int)(((x) * 4096 + 0.5)))
4075+#define stbi__fsh(x) ((x) * 4096)
4076
4077 // derived from jidctint -- DCT_ISLOW
4078-#define STBI__IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7) \
4079- int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \
4080- p2 = s2; \
4081- p3 = s6; \
4082- p1 = (p2+p3) * stbi__f2f(0.5411961f); \
4083- t2 = p1 + p3*stbi__f2f(-1.847759065f); \
4084- t3 = p1 + p2*stbi__f2f( 0.765366865f); \
4085- p2 = s0; \
4086- p3 = s4; \
4087- t0 = stbi__fsh(p2+p3); \
4088- t1 = stbi__fsh(p2-p3); \
4089- x0 = t0+t3; \
4090- x3 = t0-t3; \
4091- x1 = t1+t2; \
4092- x2 = t1-t2; \
4093- t0 = s7; \
4094- t1 = s5; \
4095- t2 = s3; \
4096- t3 = s1; \
4097- p3 = t0+t2; \
4098- p4 = t1+t3; \
4099- p1 = t0+t3; \
4100- p2 = t1+t2; \
4101- p5 = (p3+p4)*stbi__f2f( 1.175875602f); \
4102- t0 = t0*stbi__f2f( 0.298631336f); \
4103- t1 = t1*stbi__f2f( 2.053119869f); \
4104- t2 = t2*stbi__f2f( 3.072711026f); \
4105- t3 = t3*stbi__f2f( 1.501321110f); \
4106- p1 = p5 + p1*stbi__f2f(-0.899976223f); \
4107- p2 = p5 + p2*stbi__f2f(-2.562915447f); \
4108- p3 = p3*stbi__f2f(-1.961570560f); \
4109- p4 = p4*stbi__f2f(-0.390180644f); \
4110- t3 += p1+p4; \
4111- t2 += p2+p3; \
4112- t1 += p2+p4; \
4113- t0 += p1+p3;
4114-
4115-static void stbi__idct_block(stbi_uc *out, int out_stride, short data[64])
4116-{
4117- int i,val[64],*v=val;
4118- stbi_uc *o;
4119- short *d = data;
4120-
4121- // columns
4122- for (i=0; i < 8; ++i,++d, ++v) {
4123- // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
4124- if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0
4125- && d[40]==0 && d[48]==0 && d[56]==0) {
4126- // no shortcut 0 seconds
4127- // (1|2|3|4|5|6|7)==0 0 seconds
4128- // all separate -0.047 seconds
4129- // 1 && 2|3 && 4|5 && 6|7: -0.047 seconds
4130- int dcterm = d[0]*4;
4131- v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;
4132- } else {
4133- STBI__IDCT_1D(d[ 0],d[ 8],d[16],d[24],d[32],d[40],d[48],d[56])
4134- // constants scaled things up by 1<<12; let's bring them back
4135- // down, but keep 2 extra bits of precision
4136- x0 += 512; x1 += 512; x2 += 512; x3 += 512;
4137- v[ 0] = (x0+t3) >> 10;
4138- v[56] = (x0-t3) >> 10;
4139- v[ 8] = (x1+t2) >> 10;
4140- v[48] = (x1-t2) >> 10;
4141- v[16] = (x2+t1) >> 10;
4142- v[40] = (x2-t1) >> 10;
4143- v[24] = (x3+t0) >> 10;
4144- v[32] = (x3-t0) >> 10;
4145- }
4146- }
4147-
4148- for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) {
4149- // no fast case since the first 1D IDCT spread components out
4150- STBI__IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7])
4151- // constants scaled things up by 1<<12, plus we had 1<<2 from first
4152- // loop, plus horizontal and vertical each scale by sqrt(8) so together
4153- // we've got an extra 1<<3, so 1<<17 total we need to remove.
4154- // so we want to round that, which means adding 0.5 * 1<<17,
4155- // aka 65536. Also, we'll end up with -128 to 127 that we want
4156- // to encode as 0..255 by adding 128, so we'll add that before the shift
4157- x0 += 65536 + (128<<17);
4158- x1 += 65536 + (128<<17);
4159- x2 += 65536 + (128<<17);
4160- x3 += 65536 + (128<<17);
4161- // tried computing the shifts into temps, or'ing the temps to see
4162- // if any were out of range, but that was slower
4163- o[0] = stbi__clamp((x0+t3) >> 17);
4164- o[7] = stbi__clamp((x0-t3) >> 17);
4165- o[1] = stbi__clamp((x1+t2) >> 17);
4166- o[6] = stbi__clamp((x1-t2) >> 17);
4167- o[2] = stbi__clamp((x2+t1) >> 17);
4168- o[5] = stbi__clamp((x2-t1) >> 17);
4169- o[3] = stbi__clamp((x3+t0) >> 17);
4170- o[4] = stbi__clamp((x3-t0) >> 17);
4171- }
4172+#define STBI__IDCT_1D(s0, s1, s2, s3, s4, s5, s6, s7) \
4173+ int t0, t1, t2, t3, p1, p2, p3, p4, p5, x0, x1, x2, x3; \
4174+ p2 = s2; \
4175+ p3 = s6; \
4176+ p1 = (p2 + p3) * stbi__f2f(0.5411961f); \
4177+ t2 = p1 + p3 * stbi__f2f(-1.847759065f); \
4178+ t3 = p1 + p2 * stbi__f2f(0.765366865f); \
4179+ p2 = s0; \
4180+ p3 = s4; \
4181+ t0 = stbi__fsh(p2 + p3); \
4182+ t1 = stbi__fsh(p2 - p3); \
4183+ x0 = t0 + t3; \
4184+ x3 = t0 - t3; \
4185+ x1 = t1 + t2; \
4186+ x2 = t1 - t2; \
4187+ t0 = s7; \
4188+ t1 = s5; \
4189+ t2 = s3; \
4190+ t3 = s1; \
4191+ p3 = t0 + t2; \
4192+ p4 = t1 + t3; \
4193+ p1 = t0 + t3; \
4194+ p2 = t1 + t2; \
4195+ p5 = (p3 + p4) * stbi__f2f(1.175875602f); \
4196+ t0 = t0 * stbi__f2f(0.298631336f); \
4197+ t1 = t1 * stbi__f2f(2.053119869f); \
4198+ t2 = t2 * stbi__f2f(3.072711026f); \
4199+ t3 = t3 * stbi__f2f(1.501321110f); \
4200+ p1 = p5 + p1 * stbi__f2f(-0.899976223f); \
4201+ p2 = p5 + p2 * stbi__f2f(-2.562915447f); \
4202+ p3 = p3 * stbi__f2f(-1.961570560f); \
4203+ p4 = p4 * stbi__f2f(-0.390180644f); \
4204+ t3 += p1 + p4; \
4205+ t2 += p2 + p3; \
4206+ t1 += p2 + p4; \
4207+ t0 += p1 + p3;
4208+
4209+static void
4210+stbi__idct_block(stbi_uc *out, int out_stride, short data[64])
4211+{
4212+ int i, val[64], *v = val;
4213+ stbi_uc *o;
4214+ short *d = data;
4215+
4216+ // columns
4217+ for (i = 0; i < 8; ++i, ++d, ++v) {
4218+ // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
4219+ if (d[8] == 0 && d[16] == 0 && d[24] == 0 && d[32] == 0 && d[40] == 0 &&
4220+ d[48] == 0 && d[56] == 0) {
4221+ // no shortcut 0 seconds
4222+ // (1|2|3|4|5|6|7)==0 0 seconds
4223+ // all separate -0.047 seconds
4224+ // 1 && 2|3 && 4|5 && 6|7: -0.047 seconds
4225+ int dcterm = d[0] * 4;
4226+ v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] =
4227+ dcterm;
4228+ } else {
4229+ STBI__IDCT_1D(d[0], d[8], d[16], d[24], d[32], d[40], d[48], d[56])
4230+ // constants scaled things up by 1<<12; let's bring them back
4231+ // down, but keep 2 extra bits of precision
4232+ x0 += 512;
4233+ x1 += 512;
4234+ x2 += 512;
4235+ x3 += 512;
4236+ v[0] = (x0 + t3) >> 10;
4237+ v[56] = (x0 - t3) >> 10;
4238+ v[8] = (x1 + t2) >> 10;
4239+ v[48] = (x1 - t2) >> 10;
4240+ v[16] = (x2 + t1) >> 10;
4241+ v[40] = (x2 - t1) >> 10;
4242+ v[24] = (x3 + t0) >> 10;
4243+ v[32] = (x3 - t0) >> 10;
4244+ }
4245+ }
4246+
4247+ for (i = 0, v = val, o = out; i < 8; ++i, v += 8, o += out_stride) {
4248+ // no fast case since the first 1D IDCT spread components out
4249+ STBI__IDCT_1D(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7])
4250+ // constants scaled things up by 1<<12, plus we had 1<<2 from first
4251+ // loop, plus horizontal and vertical each scale by sqrt(8) so together
4252+ // we've got an extra 1<<3, so 1<<17 total we need to remove.
4253+ // so we want to round that, which means adding 0.5 * 1<<17,
4254+ // aka 65536. Also, we'll end up with -128 to 127 that we want
4255+ // to encode as 0..255 by adding 128, so we'll add that before the shift
4256+ x0 += 65536 + (128 << 17);
4257+ x1 += 65536 + (128 << 17);
4258+ x2 += 65536 + (128 << 17);
4259+ x3 += 65536 + (128 << 17);
4260+ // tried computing the shifts into temps, or'ing the temps to see
4261+ // if any were out of range, but that was slower
4262+ o[0] = stbi__clamp((x0 + t3) >> 17);
4263+ o[7] = stbi__clamp((x0 - t3) >> 17);
4264+ o[1] = stbi__clamp((x1 + t2) >> 17);
4265+ o[6] = stbi__clamp((x1 - t2) >> 17);
4266+ o[2] = stbi__clamp((x2 + t1) >> 17);
4267+ o[5] = stbi__clamp((x2 - t1) >> 17);
4268+ o[3] = stbi__clamp((x3 + t0) >> 17);
4269+ o[4] = stbi__clamp((x3 - t0) >> 17);
4270+ }
4271 }
4272
4273 #ifdef STBI_SSE2
4274 // sse2 integer IDCT. not the fastest possible implementation but it
4275 // produces bit-identical results to the generic C version so it's
4276 // fully "transparent".
4277-static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
4278-{
4279- // This is constructed to match our regular (generic) integer IDCT exactly.
4280- __m128i row0, row1, row2, row3, row4, row5, row6, row7;
4281- __m128i tmp;
4282-
4283- // dot product constant: even elems=x, odd elems=y
4284- #define dct_const(x,y) _mm_setr_epi16((x),(y),(x),(y),(x),(y),(x),(y))
4285-
4286- // out(0) = c0[even]*x + c0[odd]*y (c0, x, y 16-bit, out 32-bit)
4287- // out(1) = c1[even]*x + c1[odd]*y
4288- #define dct_rot(out0,out1, x,y,c0,c1) \
4289- __m128i c0##lo = _mm_unpacklo_epi16((x),(y)); \
4290- __m128i c0##hi = _mm_unpackhi_epi16((x),(y)); \
4291- __m128i out0##_l = _mm_madd_epi16(c0##lo, c0); \
4292- __m128i out0##_h = _mm_madd_epi16(c0##hi, c0); \
4293- __m128i out1##_l = _mm_madd_epi16(c0##lo, c1); \
4294- __m128i out1##_h = _mm_madd_epi16(c0##hi, c1)
4295-
4296- // out = in << 12 (in 16-bit, out 32-bit)
4297- #define dct_widen(out, in) \
4298- __m128i out##_l = _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), (in)), 4); \
4299- __m128i out##_h = _mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), (in)), 4)
4300-
4301- // wide add
4302- #define dct_wadd(out, a, b) \
4303- __m128i out##_l = _mm_add_epi32(a##_l, b##_l); \
4304- __m128i out##_h = _mm_add_epi32(a##_h, b##_h)
4305-
4306- // wide sub
4307- #define dct_wsub(out, a, b) \
4308- __m128i out##_l = _mm_sub_epi32(a##_l, b##_l); \
4309- __m128i out##_h = _mm_sub_epi32(a##_h, b##_h)
4310-
4311- // butterfly a/b, add bias, then shift by "s" and pack
4312- #define dct_bfly32o(out0, out1, a,b,bias,s) \
4313- { \
4314- __m128i abiased_l = _mm_add_epi32(a##_l, bias); \
4315- __m128i abiased_h = _mm_add_epi32(a##_h, bias); \
4316- dct_wadd(sum, abiased, b); \
4317- dct_wsub(dif, abiased, b); \
4318- out0 = _mm_packs_epi32(_mm_srai_epi32(sum_l, s), _mm_srai_epi32(sum_h, s)); \
4319- out1 = _mm_packs_epi32(_mm_srai_epi32(dif_l, s), _mm_srai_epi32(dif_h, s)); \
4320- }
4321-
4322- // 8-bit interleave step (for transposes)
4323- #define dct_interleave8(a, b) \
4324- tmp = a; \
4325- a = _mm_unpacklo_epi8(a, b); \
4326- b = _mm_unpackhi_epi8(tmp, b)
4327-
4328- // 16-bit interleave step (for transposes)
4329- #define dct_interleave16(a, b) \
4330- tmp = a; \
4331- a = _mm_unpacklo_epi16(a, b); \
4332- b = _mm_unpackhi_epi16(tmp, b)
4333-
4334- #define dct_pass(bias,shift) \
4335- { \
4336- /* even part */ \
4337- dct_rot(t2e,t3e, row2,row6, rot0_0,rot0_1); \
4338- __m128i sum04 = _mm_add_epi16(row0, row4); \
4339- __m128i dif04 = _mm_sub_epi16(row0, row4); \
4340- dct_widen(t0e, sum04); \
4341- dct_widen(t1e, dif04); \
4342- dct_wadd(x0, t0e, t3e); \
4343- dct_wsub(x3, t0e, t3e); \
4344- dct_wadd(x1, t1e, t2e); \
4345- dct_wsub(x2, t1e, t2e); \
4346- /* odd part */ \
4347- dct_rot(y0o,y2o, row7,row3, rot2_0,rot2_1); \
4348- dct_rot(y1o,y3o, row5,row1, rot3_0,rot3_1); \
4349- __m128i sum17 = _mm_add_epi16(row1, row7); \
4350- __m128i sum35 = _mm_add_epi16(row3, row5); \
4351- dct_rot(y4o,y5o, sum17,sum35, rot1_0,rot1_1); \
4352- dct_wadd(x4, y0o, y4o); \
4353- dct_wadd(x5, y1o, y5o); \
4354- dct_wadd(x6, y2o, y5o); \
4355- dct_wadd(x7, y3o, y4o); \
4356- dct_bfly32o(row0,row7, x0,x7,bias,shift); \
4357- dct_bfly32o(row1,row6, x1,x6,bias,shift); \
4358- dct_bfly32o(row2,row5, x2,x5,bias,shift); \
4359- dct_bfly32o(row3,row4, x3,x4,bias,shift); \
4360- }
4361-
4362- __m128i rot0_0 = dct_const(stbi__f2f(0.5411961f), stbi__f2f(0.5411961f) + stbi__f2f(-1.847759065f));
4363- __m128i rot0_1 = dct_const(stbi__f2f(0.5411961f) + stbi__f2f( 0.765366865f), stbi__f2f(0.5411961f));
4364- __m128i rot1_0 = dct_const(stbi__f2f(1.175875602f) + stbi__f2f(-0.899976223f), stbi__f2f(1.175875602f));
4365- __m128i rot1_1 = dct_const(stbi__f2f(1.175875602f), stbi__f2f(1.175875602f) + stbi__f2f(-2.562915447f));
4366- __m128i rot2_0 = dct_const(stbi__f2f(-1.961570560f) + stbi__f2f( 0.298631336f), stbi__f2f(-1.961570560f));
4367- __m128i rot2_1 = dct_const(stbi__f2f(-1.961570560f), stbi__f2f(-1.961570560f) + stbi__f2f( 3.072711026f));
4368- __m128i rot3_0 = dct_const(stbi__f2f(-0.390180644f) + stbi__f2f( 2.053119869f), stbi__f2f(-0.390180644f));
4369- __m128i rot3_1 = dct_const(stbi__f2f(-0.390180644f), stbi__f2f(-0.390180644f) + stbi__f2f( 1.501321110f));
4370-
4371- // rounding biases in column/row passes, see stbi__idct_block for explanation.
4372- __m128i bias_0 = _mm_set1_epi32(512);
4373- __m128i bias_1 = _mm_set1_epi32(65536 + (128<<17));
4374-
4375- // load
4376- row0 = _mm_load_si128((const __m128i *) (data + 0*8));
4377- row1 = _mm_load_si128((const __m128i *) (data + 1*8));
4378- row2 = _mm_load_si128((const __m128i *) (data + 2*8));
4379- row3 = _mm_load_si128((const __m128i *) (data + 3*8));
4380- row4 = _mm_load_si128((const __m128i *) (data + 4*8));
4381- row5 = _mm_load_si128((const __m128i *) (data + 5*8));
4382- row6 = _mm_load_si128((const __m128i *) (data + 6*8));
4383- row7 = _mm_load_si128((const __m128i *) (data + 7*8));
4384-
4385- // column pass
4386- dct_pass(bias_0, 10);
4387-
4388- {
4389- // 16bit 8x8 transpose pass 1
4390- dct_interleave16(row0, row4);
4391- dct_interleave16(row1, row5);
4392- dct_interleave16(row2, row6);
4393- dct_interleave16(row3, row7);
4394-
4395- // transpose pass 2
4396- dct_interleave16(row0, row2);
4397- dct_interleave16(row1, row3);
4398- dct_interleave16(row4, row6);
4399- dct_interleave16(row5, row7);
4400-
4401- // transpose pass 3
4402- dct_interleave16(row0, row1);
4403- dct_interleave16(row2, row3);
4404- dct_interleave16(row4, row5);
4405- dct_interleave16(row6, row7);
4406- }
4407-
4408- // row pass
4409- dct_pass(bias_1, 17);
4410-
4411- {
4412- // pack
4413- __m128i p0 = _mm_packus_epi16(row0, row1); // a0a1a2a3...a7b0b1b2b3...b7
4414- __m128i p1 = _mm_packus_epi16(row2, row3);
4415- __m128i p2 = _mm_packus_epi16(row4, row5);
4416- __m128i p3 = _mm_packus_epi16(row6, row7);
4417-
4418- // 8bit 8x8 transpose pass 1
4419- dct_interleave8(p0, p2); // a0e0a1e1...
4420- dct_interleave8(p1, p3); // c0g0c1g1...
4421-
4422- // transpose pass 2
4423- dct_interleave8(p0, p1); // a0c0e0g0...
4424- dct_interleave8(p2, p3); // b0d0f0h0...
4425-
4426- // transpose pass 3
4427- dct_interleave8(p0, p2); // a0b0c0d0...
4428- dct_interleave8(p1, p3); // a4b4c4d4...
4429-
4430- // store
4431- _mm_storel_epi64((__m128i *) out, p0); out += out_stride;
4432- _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p0, 0x4e)); out += out_stride;
4433- _mm_storel_epi64((__m128i *) out, p2); out += out_stride;
4434- _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p2, 0x4e)); out += out_stride;
4435- _mm_storel_epi64((__m128i *) out, p1); out += out_stride;
4436- _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p1, 0x4e)); out += out_stride;
4437- _mm_storel_epi64((__m128i *) out, p3); out += out_stride;
4438- _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p3, 0x4e));
4439- }
4440+static void
4441+stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
4442+{
4443+ // This is constructed to match our regular (generic) integer IDCT exactly.
4444+ __m128i row0, row1, row2, row3, row4, row5, row6, row7;
4445+ __m128i tmp;
4446+
4447+// dot product constant: even elems=x, odd elems=y
4448+#define dct_const(x, y) _mm_setr_epi16((x), (y), (x), (y), (x), (y), (x), (y))
4449+
4450+// out(0) = c0[even]*x + c0[odd]*y (c0, x, y 16-bit, out 32-bit)
4451+// out(1) = c1[even]*x + c1[odd]*y
4452+#define dct_rot(out0, out1, x, y, c0, c1) \
4453+ __m128i c0##lo = _mm_unpacklo_epi16((x), (y)); \
4454+ __m128i c0##hi = _mm_unpackhi_epi16((x), (y)); \
4455+ __m128i out0##_l = _mm_madd_epi16(c0##lo, c0); \
4456+ __m128i out0##_h = _mm_madd_epi16(c0##hi, c0); \
4457+ __m128i out1##_l = _mm_madd_epi16(c0##lo, c1); \
4458+ __m128i out1##_h = _mm_madd_epi16(c0##hi, c1)
4459+
4460+// out = in << 12 (in 16-bit, out 32-bit)
4461+#define dct_widen(out, in) \
4462+ __m128i out##_l = \
4463+ _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), (in)), 4); \
4464+ __m128i out##_h = \
4465+ _mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), (in)), 4)
4466+
4467+// wide add
4468+#define dct_wadd(out, a, b) \
4469+ __m128i out##_l = _mm_add_epi32(a##_l, b##_l); \
4470+ __m128i out##_h = _mm_add_epi32(a##_h, b##_h)
4471+
4472+// wide sub
4473+#define dct_wsub(out, a, b) \
4474+ __m128i out##_l = _mm_sub_epi32(a##_l, b##_l); \
4475+ __m128i out##_h = _mm_sub_epi32(a##_h, b##_h)
4476+
4477+// butterfly a/b, add bias, then shift by "s" and pack
4478+#define dct_bfly32o(out0, out1, a, b, bias, s) \
4479+ { \
4480+ __m128i abiased_l = _mm_add_epi32(a##_l, bias); \
4481+ __m128i abiased_h = _mm_add_epi32(a##_h, bias); \
4482+ dct_wadd(sum, abiased, b); \
4483+ dct_wsub(dif, abiased, b); \
4484+ out0 = _mm_packs_epi32(_mm_srai_epi32(sum_l, s), \
4485+ _mm_srai_epi32(sum_h, s)); \
4486+ out1 = _mm_packs_epi32(_mm_srai_epi32(dif_l, s), \
4487+ _mm_srai_epi32(dif_h, s)); \
4488+ }
4489+
4490+// 8-bit interleave step (for transposes)
4491+#define dct_interleave8(a, b) \
4492+ tmp = a; \
4493+ a = _mm_unpacklo_epi8(a, b); \
4494+ b = _mm_unpackhi_epi8(tmp, b)
4495+
4496+// 16-bit interleave step (for transposes)
4497+#define dct_interleave16(a, b) \
4498+ tmp = a; \
4499+ a = _mm_unpacklo_epi16(a, b); \
4500+ b = _mm_unpackhi_epi16(tmp, b)
4501+
4502+#define dct_pass(bias, shift) \
4503+ { \
4504+ /* even part */ \
4505+ dct_rot(t2e, t3e, row2, row6, rot0_0, rot0_1); \
4506+ __m128i sum04 = _mm_add_epi16(row0, row4); \
4507+ __m128i dif04 = _mm_sub_epi16(row0, row4); \
4508+ dct_widen(t0e, sum04); \
4509+ dct_widen(t1e, dif04); \
4510+ dct_wadd(x0, t0e, t3e); \
4511+ dct_wsub(x3, t0e, t3e); \
4512+ dct_wadd(x1, t1e, t2e); \
4513+ dct_wsub(x2, t1e, t2e); \
4514+ /* odd part */ \
4515+ dct_rot(y0o, y2o, row7, row3, rot2_0, rot2_1); \
4516+ dct_rot(y1o, y3o, row5, row1, rot3_0, rot3_1); \
4517+ __m128i sum17 = _mm_add_epi16(row1, row7); \
4518+ __m128i sum35 = _mm_add_epi16(row3, row5); \
4519+ dct_rot(y4o, y5o, sum17, sum35, rot1_0, rot1_1); \
4520+ dct_wadd(x4, y0o, y4o); \
4521+ dct_wadd(x5, y1o, y5o); \
4522+ dct_wadd(x6, y2o, y5o); \
4523+ dct_wadd(x7, y3o, y4o); \
4524+ dct_bfly32o(row0, row7, x0, x7, bias, shift); \
4525+ dct_bfly32o(row1, row6, x1, x6, bias, shift); \
4526+ dct_bfly32o(row2, row5, x2, x5, bias, shift); \
4527+ dct_bfly32o(row3, row4, x3, x4, bias, shift); \
4528+ }
4529+
4530+ __m128i rot0_0 =
4531+ dct_const(stbi__f2f(0.5411961f),
4532+ stbi__f2f(0.5411961f) + stbi__f2f(-1.847759065f));
4533+ __m128i rot0_1 = dct_const(stbi__f2f(0.5411961f) + stbi__f2f(0.765366865f),
4534+ stbi__f2f(0.5411961f));
4535+ __m128i rot1_0 =
4536+ dct_const(stbi__f2f(1.175875602f) + stbi__f2f(-0.899976223f),
4537+ stbi__f2f(1.175875602f));
4538+ __m128i rot1_1 =
4539+ dct_const(stbi__f2f(1.175875602f),
4540+ stbi__f2f(1.175875602f) + stbi__f2f(-2.562915447f));
4541+ __m128i rot2_0 =
4542+ dct_const(stbi__f2f(-1.961570560f) + stbi__f2f(0.298631336f),
4543+ stbi__f2f(-1.961570560f));
4544+ __m128i rot2_1 =
4545+ dct_const(stbi__f2f(-1.961570560f),
4546+ stbi__f2f(-1.961570560f) + stbi__f2f(3.072711026f));
4547+ __m128i rot3_0 =
4548+ dct_const(stbi__f2f(-0.390180644f) + stbi__f2f(2.053119869f),
4549+ stbi__f2f(-0.390180644f));
4550+ __m128i rot3_1 =
4551+ dct_const(stbi__f2f(-0.390180644f),
4552+ stbi__f2f(-0.390180644f) + stbi__f2f(1.501321110f));
4553+
4554+ // rounding biases in column/row passes, see stbi__idct_block for
4555+ // explanation.
4556+ __m128i bias_0 = _mm_set1_epi32(512);
4557+ __m128i bias_1 = _mm_set1_epi32(65536 + (128 << 17));
4558+
4559+ // load
4560+ row0 = _mm_load_si128((const __m128i *)(data + 0 * 8));
4561+ row1 = _mm_load_si128((const __m128i *)(data + 1 * 8));
4562+ row2 = _mm_load_si128((const __m128i *)(data + 2 * 8));
4563+ row3 = _mm_load_si128((const __m128i *)(data + 3 * 8));
4564+ row4 = _mm_load_si128((const __m128i *)(data + 4 * 8));
4565+ row5 = _mm_load_si128((const __m128i *)(data + 5 * 8));
4566+ row6 = _mm_load_si128((const __m128i *)(data + 6 * 8));
4567+ row7 = _mm_load_si128((const __m128i *)(data + 7 * 8));
4568+
4569+ // column pass
4570+ dct_pass(bias_0, 10);
4571+
4572+ {
4573+ // 16bit 8x8 transpose pass 1
4574+ dct_interleave16(row0, row4);
4575+ dct_interleave16(row1, row5);
4576+ dct_interleave16(row2, row6);
4577+ dct_interleave16(row3, row7);
4578+
4579+ // transpose pass 2
4580+ dct_interleave16(row0, row2);
4581+ dct_interleave16(row1, row3);
4582+ dct_interleave16(row4, row6);
4583+ dct_interleave16(row5, row7);
4584+
4585+ // transpose pass 3
4586+ dct_interleave16(row0, row1);
4587+ dct_interleave16(row2, row3);
4588+ dct_interleave16(row4, row5);
4589+ dct_interleave16(row6, row7);
4590+ }
4591+
4592+ // row pass
4593+ dct_pass(bias_1, 17);
4594+
4595+ {
4596+ // pack
4597+ __m128i p0 = _mm_packus_epi16(row0, row1); // a0a1a2a3...a7b0b1b2b3...b7
4598+ __m128i p1 = _mm_packus_epi16(row2, row3);
4599+ __m128i p2 = _mm_packus_epi16(row4, row5);
4600+ __m128i p3 = _mm_packus_epi16(row6, row7);
4601+
4602+ // 8bit 8x8 transpose pass 1
4603+ dct_interleave8(p0, p2); // a0e0a1e1...
4604+ dct_interleave8(p1, p3); // c0g0c1g1...
4605+
4606+ // transpose pass 2
4607+ dct_interleave8(p0, p1); // a0c0e0g0...
4608+ dct_interleave8(p2, p3); // b0d0f0h0...
4609+
4610+ // transpose pass 3
4611+ dct_interleave8(p0, p2); // a0b0c0d0...
4612+ dct_interleave8(p1, p3); // a4b4c4d4...
4613+
4614+ // store
4615+ _mm_storel_epi64((__m128i *)out, p0);
4616+ out += out_stride;
4617+ _mm_storel_epi64((__m128i *)out, _mm_shuffle_epi32(p0, 0x4e));
4618+ out += out_stride;
4619+ _mm_storel_epi64((__m128i *)out, p2);
4620+ out += out_stride;
4621+ _mm_storel_epi64((__m128i *)out, _mm_shuffle_epi32(p2, 0x4e));
4622+ out += out_stride;
4623+ _mm_storel_epi64((__m128i *)out, p1);
4624+ out += out_stride;
4625+ _mm_storel_epi64((__m128i *)out, _mm_shuffle_epi32(p1, 0x4e));
4626+ out += out_stride;
4627+ _mm_storel_epi64((__m128i *)out, p3);
4628+ out += out_stride;
4629+ _mm_storel_epi64((__m128i *)out, _mm_shuffle_epi32(p3, 0x4e));
4630+ }
4631
4632 #undef dct_const
4633 #undef dct_rot
4634@@ -2708,198 +3306,240 @@ static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
4635
4636 // NEON integer IDCT. should produce bit-identical
4637 // results to the generic C version.
4638-static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
4639-{
4640- int16x8_t row0, row1, row2, row3, row4, row5, row6, row7;
4641-
4642- int16x4_t rot0_0 = vdup_n_s16(stbi__f2f(0.5411961f));
4643- int16x4_t rot0_1 = vdup_n_s16(stbi__f2f(-1.847759065f));
4644- int16x4_t rot0_2 = vdup_n_s16(stbi__f2f( 0.765366865f));
4645- int16x4_t rot1_0 = vdup_n_s16(stbi__f2f( 1.175875602f));
4646- int16x4_t rot1_1 = vdup_n_s16(stbi__f2f(-0.899976223f));
4647- int16x4_t rot1_2 = vdup_n_s16(stbi__f2f(-2.562915447f));
4648- int16x4_t rot2_0 = vdup_n_s16(stbi__f2f(-1.961570560f));
4649- int16x4_t rot2_1 = vdup_n_s16(stbi__f2f(-0.390180644f));
4650- int16x4_t rot3_0 = vdup_n_s16(stbi__f2f( 0.298631336f));
4651- int16x4_t rot3_1 = vdup_n_s16(stbi__f2f( 2.053119869f));
4652- int16x4_t rot3_2 = vdup_n_s16(stbi__f2f( 3.072711026f));
4653- int16x4_t rot3_3 = vdup_n_s16(stbi__f2f( 1.501321110f));
4654-
4655-#define dct_long_mul(out, inq, coeff) \
4656- int32x4_t out##_l = vmull_s16(vget_low_s16(inq), coeff); \
4657- int32x4_t out##_h = vmull_s16(vget_high_s16(inq), coeff)
4658-
4659-#define dct_long_mac(out, acc, inq, coeff) \
4660- int32x4_t out##_l = vmlal_s16(acc##_l, vget_low_s16(inq), coeff); \
4661- int32x4_t out##_h = vmlal_s16(acc##_h, vget_high_s16(inq), coeff)
4662-
4663-#define dct_widen(out, inq) \
4664- int32x4_t out##_l = vshll_n_s16(vget_low_s16(inq), 12); \
4665- int32x4_t out##_h = vshll_n_s16(vget_high_s16(inq), 12)
4666+static void
4667+stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
4668+{
4669+ int16x8_t row0, row1, row2, row3, row4, row5, row6, row7;
4670+
4671+ int16x4_t rot0_0 = vdup_n_s16(stbi__f2f(0.5411961f));
4672+ int16x4_t rot0_1 = vdup_n_s16(stbi__f2f(-1.847759065f));
4673+ int16x4_t rot0_2 = vdup_n_s16(stbi__f2f(0.765366865f));
4674+ int16x4_t rot1_0 = vdup_n_s16(stbi__f2f(1.175875602f));
4675+ int16x4_t rot1_1 = vdup_n_s16(stbi__f2f(-0.899976223f));
4676+ int16x4_t rot1_2 = vdup_n_s16(stbi__f2f(-2.562915447f));
4677+ int16x4_t rot2_0 = vdup_n_s16(stbi__f2f(-1.961570560f));
4678+ int16x4_t rot2_1 = vdup_n_s16(stbi__f2f(-0.390180644f));
4679+ int16x4_t rot3_0 = vdup_n_s16(stbi__f2f(0.298631336f));
4680+ int16x4_t rot3_1 = vdup_n_s16(stbi__f2f(2.053119869f));
4681+ int16x4_t rot3_2 = vdup_n_s16(stbi__f2f(3.072711026f));
4682+ int16x4_t rot3_3 = vdup_n_s16(stbi__f2f(1.501321110f));
4683+
4684+#define dct_long_mul(out, inq, coeff) \
4685+ int32x4_t out##_l = vmull_s16(vget_low_s16(inq), coeff); \
4686+ int32x4_t out##_h = vmull_s16(vget_high_s16(inq), coeff)
4687+
4688+#define dct_long_mac(out, acc, inq, coeff) \
4689+ int32x4_t out##_l = vmlal_s16(acc##_l, vget_low_s16(inq), coeff); \
4690+ int32x4_t out##_h = vmlal_s16(acc##_h, vget_high_s16(inq), coeff)
4691+
4692+#define dct_widen(out, inq) \
4693+ int32x4_t out##_l = vshll_n_s16(vget_low_s16(inq), 12); \
4694+ int32x4_t out##_h = vshll_n_s16(vget_high_s16(inq), 12)
4695
4696 // wide add
4697-#define dct_wadd(out, a, b) \
4698- int32x4_t out##_l = vaddq_s32(a##_l, b##_l); \
4699- int32x4_t out##_h = vaddq_s32(a##_h, b##_h)
4700+#define dct_wadd(out, a, b) \
4701+ int32x4_t out##_l = vaddq_s32(a##_l, b##_l); \
4702+ int32x4_t out##_h = vaddq_s32(a##_h, b##_h)
4703
4704 // wide sub
4705-#define dct_wsub(out, a, b) \
4706- int32x4_t out##_l = vsubq_s32(a##_l, b##_l); \
4707- int32x4_t out##_h = vsubq_s32(a##_h, b##_h)
4708+#define dct_wsub(out, a, b) \
4709+ int32x4_t out##_l = vsubq_s32(a##_l, b##_l); \
4710+ int32x4_t out##_h = vsubq_s32(a##_h, b##_h)
4711
4712 // butterfly a/b, then shift using "shiftop" by "s" and pack
4713-#define dct_bfly32o(out0,out1, a,b,shiftop,s) \
4714- { \
4715- dct_wadd(sum, a, b); \
4716- dct_wsub(dif, a, b); \
4717- out0 = vcombine_s16(shiftop(sum_l, s), shiftop(sum_h, s)); \
4718- out1 = vcombine_s16(shiftop(dif_l, s), shiftop(dif_h, s)); \
4719- }
4720-
4721-#define dct_pass(shiftop, shift) \
4722- { \
4723- /* even part */ \
4724- int16x8_t sum26 = vaddq_s16(row2, row6); \
4725- dct_long_mul(p1e, sum26, rot0_0); \
4726- dct_long_mac(t2e, p1e, row6, rot0_1); \
4727- dct_long_mac(t3e, p1e, row2, rot0_2); \
4728- int16x8_t sum04 = vaddq_s16(row0, row4); \
4729- int16x8_t dif04 = vsubq_s16(row0, row4); \
4730- dct_widen(t0e, sum04); \
4731- dct_widen(t1e, dif04); \
4732- dct_wadd(x0, t0e, t3e); \
4733- dct_wsub(x3, t0e, t3e); \
4734- dct_wadd(x1, t1e, t2e); \
4735- dct_wsub(x2, t1e, t2e); \
4736- /* odd part */ \
4737- int16x8_t sum15 = vaddq_s16(row1, row5); \
4738- int16x8_t sum17 = vaddq_s16(row1, row7); \
4739- int16x8_t sum35 = vaddq_s16(row3, row5); \
4740- int16x8_t sum37 = vaddq_s16(row3, row7); \
4741- int16x8_t sumodd = vaddq_s16(sum17, sum35); \
4742- dct_long_mul(p5o, sumodd, rot1_0); \
4743- dct_long_mac(p1o, p5o, sum17, rot1_1); \
4744- dct_long_mac(p2o, p5o, sum35, rot1_2); \
4745- dct_long_mul(p3o, sum37, rot2_0); \
4746- dct_long_mul(p4o, sum15, rot2_1); \
4747- dct_wadd(sump13o, p1o, p3o); \
4748- dct_wadd(sump24o, p2o, p4o); \
4749- dct_wadd(sump23o, p2o, p3o); \
4750- dct_wadd(sump14o, p1o, p4o); \
4751- dct_long_mac(x4, sump13o, row7, rot3_0); \
4752- dct_long_mac(x5, sump24o, row5, rot3_1); \
4753- dct_long_mac(x6, sump23o, row3, rot3_2); \
4754- dct_long_mac(x7, sump14o, row1, rot3_3); \
4755- dct_bfly32o(row0,row7, x0,x7,shiftop,shift); \
4756- dct_bfly32o(row1,row6, x1,x6,shiftop,shift); \
4757- dct_bfly32o(row2,row5, x2,x5,shiftop,shift); \
4758- dct_bfly32o(row3,row4, x3,x4,shiftop,shift); \
4759- }
4760-
4761- // load
4762- row0 = vld1q_s16(data + 0*8);
4763- row1 = vld1q_s16(data + 1*8);
4764- row2 = vld1q_s16(data + 2*8);
4765- row3 = vld1q_s16(data + 3*8);
4766- row4 = vld1q_s16(data + 4*8);
4767- row5 = vld1q_s16(data + 5*8);
4768- row6 = vld1q_s16(data + 6*8);
4769- row7 = vld1q_s16(data + 7*8);
4770-
4771- // add DC bias
4772- row0 = vaddq_s16(row0, vsetq_lane_s16(1024, vdupq_n_s16(0), 0));
4773-
4774- // column pass
4775- dct_pass(vrshrn_n_s32, 10);
4776-
4777- // 16bit 8x8 transpose
4778- {
4779+#define dct_bfly32o(out0, out1, a, b, shiftop, s) \
4780+ { \
4781+ dct_wadd(sum, a, b); \
4782+ dct_wsub(dif, a, b); \
4783+ out0 = vcombine_s16(shiftop(sum_l, s), shiftop(sum_h, s)); \
4784+ out1 = vcombine_s16(shiftop(dif_l, s), shiftop(dif_h, s)); \
4785+ }
4786+
4787+#define dct_pass(shiftop, shift) \
4788+ { \
4789+ /* even part */ \
4790+ int16x8_t sum26 = vaddq_s16(row2, row6); \
4791+ dct_long_mul(p1e, sum26, rot0_0); \
4792+ dct_long_mac(t2e, p1e, row6, rot0_1); \
4793+ dct_long_mac(t3e, p1e, row2, rot0_2); \
4794+ int16x8_t sum04 = vaddq_s16(row0, row4); \
4795+ int16x8_t dif04 = vsubq_s16(row0, row4); \
4796+ dct_widen(t0e, sum04); \
4797+ dct_widen(t1e, dif04); \
4798+ dct_wadd(x0, t0e, t3e); \
4799+ dct_wsub(x3, t0e, t3e); \
4800+ dct_wadd(x1, t1e, t2e); \
4801+ dct_wsub(x2, t1e, t2e); \
4802+ /* odd part */ \
4803+ int16x8_t sum15 = vaddq_s16(row1, row5); \
4804+ int16x8_t sum17 = vaddq_s16(row1, row7); \
4805+ int16x8_t sum35 = vaddq_s16(row3, row5); \
4806+ int16x8_t sum37 = vaddq_s16(row3, row7); \
4807+ int16x8_t sumodd = vaddq_s16(sum17, sum35); \
4808+ dct_long_mul(p5o, sumodd, rot1_0); \
4809+ dct_long_mac(p1o, p5o, sum17, rot1_1); \
4810+ dct_long_mac(p2o, p5o, sum35, rot1_2); \
4811+ dct_long_mul(p3o, sum37, rot2_0); \
4812+ dct_long_mul(p4o, sum15, rot2_1); \
4813+ dct_wadd(sump13o, p1o, p3o); \
4814+ dct_wadd(sump24o, p2o, p4o); \
4815+ dct_wadd(sump23o, p2o, p3o); \
4816+ dct_wadd(sump14o, p1o, p4o); \
4817+ dct_long_mac(x4, sump13o, row7, rot3_0); \
4818+ dct_long_mac(x5, sump24o, row5, rot3_1); \
4819+ dct_long_mac(x6, sump23o, row3, rot3_2); \
4820+ dct_long_mac(x7, sump14o, row1, rot3_3); \
4821+ dct_bfly32o(row0, row7, x0, x7, shiftop, shift); \
4822+ dct_bfly32o(row1, row6, x1, x6, shiftop, shift); \
4823+ dct_bfly32o(row2, row5, x2, x5, shiftop, shift); \
4824+ dct_bfly32o(row3, row4, x3, x4, shiftop, shift); \
4825+ }
4826+
4827+ // load
4828+ row0 = vld1q_s16(data + 0 * 8);
4829+ row1 = vld1q_s16(data + 1 * 8);
4830+ row2 = vld1q_s16(data + 2 * 8);
4831+ row3 = vld1q_s16(data + 3 * 8);
4832+ row4 = vld1q_s16(data + 4 * 8);
4833+ row5 = vld1q_s16(data + 5 * 8);
4834+ row6 = vld1q_s16(data + 6 * 8);
4835+ row7 = vld1q_s16(data + 7 * 8);
4836+
4837+ // add DC bias
4838+ row0 = vaddq_s16(row0, vsetq_lane_s16(1024, vdupq_n_s16(0), 0));
4839+
4840+ // column pass
4841+ dct_pass(vrshrn_n_s32, 10);
4842+
4843+ // 16bit 8x8 transpose
4844+ {
4845 // these three map to a single VTRN.16, VTRN.32, and VSWP, respectively.
4846 // whether compilers actually get this is another story, sadly.
4847-#define dct_trn16(x, y) { int16x8x2_t t = vtrnq_s16(x, y); x = t.val[0]; y = t.val[1]; }
4848-#define dct_trn32(x, y) { int32x4x2_t t = vtrnq_s32(vreinterpretq_s32_s16(x), vreinterpretq_s32_s16(y)); x = vreinterpretq_s16_s32(t.val[0]); y = vreinterpretq_s16_s32(t.val[1]); }
4849-#define dct_trn64(x, y) { int16x8_t x0 = x; int16x8_t y0 = y; x = vcombine_s16(vget_low_s16(x0), vget_low_s16(y0)); y = vcombine_s16(vget_high_s16(x0), vget_high_s16(y0)); }
4850-
4851- // pass 1
4852- dct_trn16(row0, row1); // a0b0a2b2a4b4a6b6
4853- dct_trn16(row2, row3);
4854- dct_trn16(row4, row5);
4855- dct_trn16(row6, row7);
4856-
4857- // pass 2
4858- dct_trn32(row0, row2); // a0b0c0d0a4b4c4d4
4859- dct_trn32(row1, row3);
4860- dct_trn32(row4, row6);
4861- dct_trn32(row5, row7);
4862-
4863- // pass 3
4864- dct_trn64(row0, row4); // a0b0c0d0e0f0g0h0
4865- dct_trn64(row1, row5);
4866- dct_trn64(row2, row6);
4867- dct_trn64(row3, row7);
4868+#define dct_trn16(x, y) \
4869+ { \
4870+ int16x8x2_t t = vtrnq_s16(x, y); \
4871+ x = t.val[0]; \
4872+ y = t.val[1]; \
4873+ }
4874+#define dct_trn32(x, y) \
4875+ { \
4876+ int32x4x2_t t = \
4877+ vtrnq_s32(vreinterpretq_s32_s16(x), vreinterpretq_s32_s16(y)); \
4878+ x = vreinterpretq_s16_s32(t.val[0]); \
4879+ y = vreinterpretq_s16_s32(t.val[1]); \
4880+ }
4881+#define dct_trn64(x, y) \
4882+ { \
4883+ int16x8_t x0 = x; \
4884+ int16x8_t y0 = y; \
4885+ x = vcombine_s16(vget_low_s16(x0), vget_low_s16(y0)); \
4886+ y = vcombine_s16(vget_high_s16(x0), vget_high_s16(y0)); \
4887+ }
4888+
4889+ // pass 1
4890+ dct_trn16(row0, row1); // a0b0a2b2a4b4a6b6
4891+ dct_trn16(row2, row3);
4892+ dct_trn16(row4, row5);
4893+ dct_trn16(row6, row7);
4894+
4895+ // pass 2
4896+ dct_trn32(row0, row2); // a0b0c0d0a4b4c4d4
4897+ dct_trn32(row1, row3);
4898+ dct_trn32(row4, row6);
4899+ dct_trn32(row5, row7);
4900+
4901+ // pass 3
4902+ dct_trn64(row0, row4); // a0b0c0d0e0f0g0h0
4903+ dct_trn64(row1, row5);
4904+ dct_trn64(row2, row6);
4905+ dct_trn64(row3, row7);
4906
4907 #undef dct_trn16
4908 #undef dct_trn32
4909 #undef dct_trn64
4910- }
4911-
4912- // row pass
4913- // vrshrn_n_s32 only supports shifts up to 16, we need
4914- // 17. so do a non-rounding shift of 16 first then follow
4915- // up with a rounding shift by 1.
4916- dct_pass(vshrn_n_s32, 16);
4917-
4918- {
4919- // pack and round
4920- uint8x8_t p0 = vqrshrun_n_s16(row0, 1);
4921- uint8x8_t p1 = vqrshrun_n_s16(row1, 1);
4922- uint8x8_t p2 = vqrshrun_n_s16(row2, 1);
4923- uint8x8_t p3 = vqrshrun_n_s16(row3, 1);
4924- uint8x8_t p4 = vqrshrun_n_s16(row4, 1);
4925- uint8x8_t p5 = vqrshrun_n_s16(row5, 1);
4926- uint8x8_t p6 = vqrshrun_n_s16(row6, 1);
4927- uint8x8_t p7 = vqrshrun_n_s16(row7, 1);
4928-
4929- // again, these can translate into one instruction, but often don't.
4930-#define dct_trn8_8(x, y) { uint8x8x2_t t = vtrn_u8(x, y); x = t.val[0]; y = t.val[1]; }
4931-#define dct_trn8_16(x, y) { uint16x4x2_t t = vtrn_u16(vreinterpret_u16_u8(x), vreinterpret_u16_u8(y)); x = vreinterpret_u8_u16(t.val[0]); y = vreinterpret_u8_u16(t.val[1]); }
4932-#define dct_trn8_32(x, y) { uint32x2x2_t t = vtrn_u32(vreinterpret_u32_u8(x), vreinterpret_u32_u8(y)); x = vreinterpret_u8_u32(t.val[0]); y = vreinterpret_u8_u32(t.val[1]); }
4933-
4934- // sadly can't use interleaved stores here since we only write
4935- // 8 bytes to each scan line!
4936-
4937- // 8x8 8-bit transpose pass 1
4938- dct_trn8_8(p0, p1);
4939- dct_trn8_8(p2, p3);
4940- dct_trn8_8(p4, p5);
4941- dct_trn8_8(p6, p7);
4942-
4943- // pass 2
4944- dct_trn8_16(p0, p2);
4945- dct_trn8_16(p1, p3);
4946- dct_trn8_16(p4, p6);
4947- dct_trn8_16(p5, p7);
4948-
4949- // pass 3
4950- dct_trn8_32(p0, p4);
4951- dct_trn8_32(p1, p5);
4952- dct_trn8_32(p2, p6);
4953- dct_trn8_32(p3, p7);
4954-
4955- // store
4956- vst1_u8(out, p0); out += out_stride;
4957- vst1_u8(out, p1); out += out_stride;
4958- vst1_u8(out, p2); out += out_stride;
4959- vst1_u8(out, p3); out += out_stride;
4960- vst1_u8(out, p4); out += out_stride;
4961- vst1_u8(out, p5); out += out_stride;
4962- vst1_u8(out, p6); out += out_stride;
4963- vst1_u8(out, p7);
4964+ }
4965+
4966+ // row pass
4967+ // vrshrn_n_s32 only supports shifts up to 16, we need
4968+ // 17. so do a non-rounding shift of 16 first then follow
4969+ // up with a rounding shift by 1.
4970+ dct_pass(vshrn_n_s32, 16);
4971+
4972+ {
4973+ // pack and round
4974+ uint8x8_t p0 = vqrshrun_n_s16(row0, 1);
4975+ uint8x8_t p1 = vqrshrun_n_s16(row1, 1);
4976+ uint8x8_t p2 = vqrshrun_n_s16(row2, 1);
4977+ uint8x8_t p3 = vqrshrun_n_s16(row3, 1);
4978+ uint8x8_t p4 = vqrshrun_n_s16(row4, 1);
4979+ uint8x8_t p5 = vqrshrun_n_s16(row5, 1);
4980+ uint8x8_t p6 = vqrshrun_n_s16(row6, 1);
4981+ uint8x8_t p7 = vqrshrun_n_s16(row7, 1);
4982+
4983+ // again, these can translate into one instruction, but often don't.
4984+#define dct_trn8_8(x, y) \
4985+ { \
4986+ uint8x8x2_t t = vtrn_u8(x, y); \
4987+ x = t.val[0]; \
4988+ y = t.val[1]; \
4989+ }
4990+#define dct_trn8_16(x, y) \
4991+ { \
4992+ uint16x4x2_t t = \
4993+ vtrn_u16(vreinterpret_u16_u8(x), vreinterpret_u16_u8(y)); \
4994+ x = vreinterpret_u8_u16(t.val[0]); \
4995+ y = vreinterpret_u8_u16(t.val[1]); \
4996+ }
4997+#define dct_trn8_32(x, y) \
4998+ { \
4999+ uint32x2x2_t t = \
5000+ vtrn_u32(vreinterpret_u32_u8(x), vreinterpret_u32_u8(y)); \
5001+ x = vreinterpret_u8_u32(t.val[0]); \
5002+ y = vreinterpret_u8_u32(t.val[1]); \
5003+ }
5004+
5005+ // sadly can't use interleaved stores here since we only write
5006+ // 8 bytes to each scan line!
5007+
5008+ // 8x8 8-bit transpose pass 1
5009+ dct_trn8_8(p0, p1);
5010+ dct_trn8_8(p2, p3);
5011+ dct_trn8_8(p4, p5);
5012+ dct_trn8_8(p6, p7);
5013+
5014+ // pass 2
5015+ dct_trn8_16(p0, p2);
5016+ dct_trn8_16(p1, p3);
5017+ dct_trn8_16(p4, p6);
5018+ dct_trn8_16(p5, p7);
5019+
5020+ // pass 3
5021+ dct_trn8_32(p0, p4);
5022+ dct_trn8_32(p1, p5);
5023+ dct_trn8_32(p2, p6);
5024+ dct_trn8_32(p3, p7);
5025+
5026+ // store
5027+ vst1_u8(out, p0);
5028+ out += out_stride;
5029+ vst1_u8(out, p1);
5030+ out += out_stride;
5031+ vst1_u8(out, p2);
5032+ out += out_stride;
5033+ vst1_u8(out, p3);
5034+ out += out_stride;
5035+ vst1_u8(out, p4);
5036+ out += out_stride;
5037+ vst1_u8(out, p5);
5038+ out += out_stride;
5039+ vst1_u8(out, p6);
5040+ out += out_stride;
5041+ vst1_u8(out, p7);
5042
5043 #undef dct_trn8_8
5044 #undef dct_trn8_16
5045 #undef dct_trn8_32
5046- }
5047+ }
5048
5049 #undef dct_long_mul
5050 #undef dct_long_mac
5051@@ -2912,1169 +3552,1498 @@ static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
5052
5053 #endif // STBI_NEON
5054
5055-#define STBI__MARKER_none 0xff
5056+#define STBI__MARKER_none 0xff
5057 // if there's a pending marker from the entropy stream, return that
5058 // otherwise, fetch from the stream and get a marker. if there's no
5059 // marker, return 0xff, which is never a valid marker value
5060-static stbi_uc stbi__get_marker(stbi__jpeg *j)
5061-{
5062- stbi_uc x;
5063- if (j->marker != STBI__MARKER_none) { x = j->marker; j->marker = STBI__MARKER_none; return x; }
5064- x = stbi__get8(j->s);
5065- if (x != 0xff) return STBI__MARKER_none;
5066- while (x == 0xff)
5067- x = stbi__get8(j->s); // consume repeated 0xff fill bytes
5068- return x;
5069+static stbi_uc
5070+stbi__get_marker(stbi__jpeg *j)
5071+{
5072+ stbi_uc x;
5073+ if (j->marker != STBI__MARKER_none) {
5074+ x = j->marker;
5075+ j->marker = STBI__MARKER_none;
5076+ return x;
5077+ }
5078+ x = stbi__get8(j->s);
5079+ if (x != 0xff) {
5080+ return STBI__MARKER_none;
5081+ }
5082+ while (x == 0xff) {
5083+ x = stbi__get8(j->s); // consume repeated 0xff fill bytes
5084+ }
5085+ return x;
5086 }
5087
5088 // in each scan, we'll have scan_n components, and the order
5089 // of the components is specified by order[]
5090-#define STBI__RESTART(x) ((x) >= 0xd0 && (x) <= 0xd7)
5091+#define STBI__RESTART(x) ((x) >= 0xd0 && (x) <= 0xd7)
5092
5093 // after a restart interval, stbi__jpeg_reset the entropy decoder and
5094 // the dc prediction
5095-static void stbi__jpeg_reset(stbi__jpeg *j)
5096-{
5097- j->code_bits = 0;
5098- j->code_buffer = 0;
5099- j->nomore = 0;
5100- j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = j->img_comp[3].dc_pred = 0;
5101- j->marker = STBI__MARKER_none;
5102- j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff;
5103- j->eob_run = 0;
5104- // no more than 1<<31 MCUs if no restart_interal? that's plenty safe,
5105- // since we don't even allow 1<<30 pixels
5106-}
5107-
5108-static int stbi__parse_entropy_coded_data(stbi__jpeg *z)
5109-{
5110- stbi__jpeg_reset(z);
5111- if (!z->progressive) {
5112- if (z->scan_n == 1) {
5113- int i,j;
5114- STBI_SIMD_ALIGN(short, data[64]);
5115- int n = z->order[0];
5116- // non-interleaved data, we just need to process one block at a time,
5117- // in trivial scanline order
5118- // number of blocks to do just depends on how many actual "pixels" this
5119- // component has, independent of interleaved MCU blocking and such
5120- int w = (z->img_comp[n].x+7) >> 3;
5121- int h = (z->img_comp[n].y+7) >> 3;
5122- for (j=0; j < h; ++j) {
5123- for (i=0; i < w; ++i) {
5124- int ha = z->img_comp[n].ha;
5125- if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
5126- z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data);
5127- // every data block is an MCU, so countdown the restart interval
5128- if (--z->todo <= 0) {
5129- if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
5130- // if it's NOT a restart, then just bail, so we get corrupt data
5131- // rather than no data
5132- if (!STBI__RESTART(z->marker)) return 1;
5133- stbi__jpeg_reset(z);
5134- }
5135- }
5136- }
5137- return 1;
5138- } else { // interleaved
5139- int i,j,k,x,y;
5140- STBI_SIMD_ALIGN(short, data[64]);
5141- for (j=0; j < z->img_mcu_y; ++j) {
5142- for (i=0; i < z->img_mcu_x; ++i) {
5143- // scan an interleaved mcu... process scan_n components in order
5144- for (k=0; k < z->scan_n; ++k) {
5145- int n = z->order[k];
5146- // scan out an mcu's worth of this component; that's just determined
5147- // by the basic H and V specified for the component
5148- for (y=0; y < z->img_comp[n].v; ++y) {
5149- for (x=0; x < z->img_comp[n].h; ++x) {
5150- int x2 = (i*z->img_comp[n].h + x)*8;
5151- int y2 = (j*z->img_comp[n].v + y)*8;
5152- int ha = z->img_comp[n].ha;
5153- if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
5154- z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data);
5155- }
5156- }
5157- }
5158- // after all interleaved components, that's an interleaved MCU,
5159- // so now count down the restart interval
5160- if (--z->todo <= 0) {
5161- if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
5162- if (!STBI__RESTART(z->marker)) return 1;
5163- stbi__jpeg_reset(z);
5164- }
5165- }
5166- }
5167- return 1;
5168- }
5169- } else {
5170- if (z->scan_n == 1) {
5171- int i,j;
5172- int n = z->order[0];
5173- // non-interleaved data, we just need to process one block at a time,
5174- // in trivial scanline order
5175- // number of blocks to do just depends on how many actual "pixels" this
5176- // component has, independent of interleaved MCU blocking and such
5177- int w = (z->img_comp[n].x+7) >> 3;
5178- int h = (z->img_comp[n].y+7) >> 3;
5179- for (j=0; j < h; ++j) {
5180- for (i=0; i < w; ++i) {
5181- short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
5182- if (z->spec_start == 0) {
5183- if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
5184- return 0;
5185- } else {
5186- int ha = z->img_comp[n].ha;
5187- if (!stbi__jpeg_decode_block_prog_ac(z, data, &z->huff_ac[ha], z->fast_ac[ha]))
5188- return 0;
5189- }
5190- // every data block is an MCU, so countdown the restart interval
5191- if (--z->todo <= 0) {
5192- if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
5193- if (!STBI__RESTART(z->marker)) return 1;
5194- stbi__jpeg_reset(z);
5195- }
5196- }
5197- }
5198- return 1;
5199- } else { // interleaved
5200- int i,j,k,x,y;
5201- for (j=0; j < z->img_mcu_y; ++j) {
5202- for (i=0; i < z->img_mcu_x; ++i) {
5203- // scan an interleaved mcu... process scan_n components in order
5204- for (k=0; k < z->scan_n; ++k) {
5205- int n = z->order[k];
5206- // scan out an mcu's worth of this component; that's just determined
5207- // by the basic H and V specified for the component
5208- for (y=0; y < z->img_comp[n].v; ++y) {
5209- for (x=0; x < z->img_comp[n].h; ++x) {
5210- int x2 = (i*z->img_comp[n].h + x);
5211- int y2 = (j*z->img_comp[n].v + y);
5212- short *data = z->img_comp[n].coeff + 64 * (x2 + y2 * z->img_comp[n].coeff_w);
5213- if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
5214- return 0;
5215- }
5216- }
5217- }
5218- // after all interleaved components, that's an interleaved MCU,
5219- // so now count down the restart interval
5220- if (--z->todo <= 0) {
5221- if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
5222- if (!STBI__RESTART(z->marker)) return 1;
5223- stbi__jpeg_reset(z);
5224- }
5225- }
5226- }
5227- return 1;
5228- }
5229- }
5230-}
5231-
5232-static void stbi__jpeg_dequantize(short *data, stbi__uint16 *dequant)
5233-{
5234- int i;
5235- for (i=0; i < 64; ++i)
5236- data[i] *= dequant[i];
5237-}
5238-
5239-static void stbi__jpeg_finish(stbi__jpeg *z)
5240-{
5241- if (z->progressive) {
5242- // dequantize and idct the data
5243- int i,j,n;
5244- for (n=0; n < z->s->img_n; ++n) {
5245- int w = (z->img_comp[n].x+7) >> 3;
5246- int h = (z->img_comp[n].y+7) >> 3;
5247- for (j=0; j < h; ++j) {
5248- for (i=0; i < w; ++i) {
5249- short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
5250- stbi__jpeg_dequantize(data, z->dequant[z->img_comp[n].tq]);
5251- z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data);
5252- }
5253- }
5254- }
5255- }
5256-}
5257-
5258-static int stbi__process_marker(stbi__jpeg *z, int m)
5259-{
5260- int L;
5261- switch (m) {
5262- case STBI__MARKER_none: // no marker found
5263- return stbi__err("expected marker","Corrupt JPEG");
5264-
5265- case 0xDD: // DRI - specify restart interval
5266- if (stbi__get16be(z->s) != 4) return stbi__err("bad DRI len","Corrupt JPEG");
5267- z->restart_interval = stbi__get16be(z->s);
5268- return 1;
5269-
5270- case 0xDB: // DQT - define quantization table
5271- L = stbi__get16be(z->s)-2;
5272- while (L > 0) {
5273- int q = stbi__get8(z->s);
5274- int p = q >> 4, sixteen = (p != 0);
5275- int t = q & 15,i;
5276- if (p != 0 && p != 1) return stbi__err("bad DQT type","Corrupt JPEG");
5277- if (t > 3) return stbi__err("bad DQT table","Corrupt JPEG");
5278-
5279- for (i=0; i < 64; ++i)
5280- z->dequant[t][stbi__jpeg_dezigzag[i]] = (stbi__uint16)(sixteen ? stbi__get16be(z->s) : stbi__get8(z->s));
5281- L -= (sixteen ? 129 : 65);
5282- }
5283- return L==0;
5284-
5285- case 0xC4: // DHT - define huffman table
5286- L = stbi__get16be(z->s)-2;
5287- while (L > 0) {
5288- stbi_uc *v;
5289- int sizes[16],i,n=0;
5290- int q = stbi__get8(z->s);
5291- int tc = q >> 4;
5292- int th = q & 15;
5293- if (tc > 1 || th > 3) return stbi__err("bad DHT header","Corrupt JPEG");
5294- for (i=0; i < 16; ++i) {
5295- sizes[i] = stbi__get8(z->s);
5296- n += sizes[i];
5297- }
5298- if(n > 256) return stbi__err("bad DHT header","Corrupt JPEG"); // Loop over i < n would write past end of values!
5299- L -= 17;
5300- if (tc == 0) {
5301- if (!stbi__build_huffman(z->huff_dc+th, sizes)) return 0;
5302- v = z->huff_dc[th].values;
5303- } else {
5304- if (!stbi__build_huffman(z->huff_ac+th, sizes)) return 0;
5305- v = z->huff_ac[th].values;
5306- }
5307- for (i=0; i < n; ++i)
5308- v[i] = stbi__get8(z->s);
5309- if (tc != 0)
5310- stbi__build_fast_ac(z->fast_ac[th], z->huff_ac + th);
5311- L -= n;
5312- }
5313- return L==0;
5314- }
5315-
5316- // check for comment block or APP blocks
5317- if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) {
5318- L = stbi__get16be(z->s);
5319- if (L < 2) {
5320- if (m == 0xFE)
5321- return stbi__err("bad COM len","Corrupt JPEG");
5322- else
5323- return stbi__err("bad APP len","Corrupt JPEG");
5324- }
5325- L -= 2;
5326-
5327- if (m == 0xE0 && L >= 5) { // JFIF APP0 segment
5328- static const unsigned char tag[5] = {'J','F','I','F','\0'};
5329- int ok = 1;
5330- int i;
5331- for (i=0; i < 5; ++i)
5332- if (stbi__get8(z->s) != tag[i])
5333- ok = 0;
5334- L -= 5;
5335- if (ok)
5336- z->jfif = 1;
5337- } else if (m == 0xEE && L >= 12) { // Adobe APP14 segment
5338- static const unsigned char tag[6] = {'A','d','o','b','e','\0'};
5339- int ok = 1;
5340- int i;
5341- for (i=0; i < 6; ++i)
5342- if (stbi__get8(z->s) != tag[i])
5343- ok = 0;
5344- L -= 6;
5345- if (ok) {
5346- stbi__get8(z->s); // version
5347- stbi__get16be(z->s); // flags0
5348- stbi__get16be(z->s); // flags1
5349- z->app14_color_transform = stbi__get8(z->s); // color transform
5350- L -= 6;
5351- }
5352- }
5353-
5354- stbi__skip(z->s, L);
5355- return 1;
5356- }
5357-
5358- return stbi__err("unknown marker","Corrupt JPEG");
5359+static void
5360+stbi__jpeg_reset(stbi__jpeg *j)
5361+{
5362+ j->code_bits = 0;
5363+ j->code_buffer = 0;
5364+ j->nomore = 0;
5365+ j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred =
5366+ j->img_comp[3].dc_pred = 0;
5367+ j->marker = STBI__MARKER_none;
5368+ j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff;
5369+ j->eob_run = 0;
5370+ // no more than 1<<31 MCUs if no restart_interal? that's plenty safe,
5371+ // since we don't even allow 1<<30 pixels
5372+}
5373+
5374+static int
5375+stbi__parse_entropy_coded_data(stbi__jpeg *z)
5376+{
5377+ stbi__jpeg_reset(z);
5378+ if (!z->progressive) {
5379+ if (z->scan_n == 1) {
5380+ int i, j;
5381+ STBI_SIMD_ALIGN(short, data[64]);
5382+ int n = z->order[0];
5383+ // non-interleaved data, we just need to process one block at a
5384+ // time, in trivial scanline order number of blocks to do just
5385+ // depends on how many actual "pixels" this component has,
5386+ // independent of interleaved MCU blocking and such
5387+ int w = (z->img_comp[n].x + 7) >> 3;
5388+ int h = (z->img_comp[n].y + 7) >> 3;
5389+ for (j = 0; j < h; ++j) {
5390+ for (i = 0; i < w; ++i) {
5391+ int ha = z->img_comp[n].ha;
5392+ if (!stbi__jpeg_decode_block(
5393+ z, data, z->huff_dc + z->img_comp[n].hd,
5394+ z->huff_ac + ha, z->fast_ac[ha], n,
5395+ z->dequant[z->img_comp[n].tq])) {
5396+ return 0;
5397+ }
5398+ z->idct_block_kernel(z->img_comp[n].data +
5399+ z->img_comp[n].w2 * j * 8 + i * 8,
5400+ z->img_comp[n].w2, data);
5401+ // every data block is an MCU, so countdown the restart
5402+ // interval
5403+ if (--z->todo <= 0) {
5404+ if (z->code_bits < 24) {
5405+ stbi__grow_buffer_unsafe(z);
5406+ }
5407+ // if it's NOT a restart, then just bail, so we get
5408+ // corrupt data rather than no data
5409+ if (!STBI__RESTART(z->marker)) {
5410+ return 1;
5411+ }
5412+ stbi__jpeg_reset(z);
5413+ }
5414+ }
5415+ }
5416+ return 1;
5417+ } else { // interleaved
5418+ int i, j, k, x, y;
5419+ STBI_SIMD_ALIGN(short, data[64]);
5420+ for (j = 0; j < z->img_mcu_y; ++j) {
5421+ for (i = 0; i < z->img_mcu_x; ++i) {
5422+ // scan an interleaved mcu... process scan_n components in
5423+ // order
5424+ for (k = 0; k < z->scan_n; ++k) {
5425+ int n = z->order[k];
5426+ // scan out an mcu's worth of this component; that's
5427+ // just determined by the basic H and V specified for
5428+ // the component
5429+ for (y = 0; y < z->img_comp[n].v; ++y) {
5430+ for (x = 0; x < z->img_comp[n].h; ++x) {
5431+ int x2 = (i * z->img_comp[n].h + x) * 8;
5432+ int y2 = (j * z->img_comp[n].v + y) * 8;
5433+ int ha = z->img_comp[n].ha;
5434+ if (!stbi__jpeg_decode_block(
5435+ z, data, z->huff_dc + z->img_comp[n].hd,
5436+ z->huff_ac + ha, z->fast_ac[ha], n,
5437+ z->dequant[z->img_comp[n].tq])) {
5438+ return 0;
5439+ }
5440+ z->idct_block_kernel(
5441+ z->img_comp[n].data +
5442+ z->img_comp[n].w2 * y2 + x2,
5443+ z->img_comp[n].w2, data);
5444+ }
5445+ }
5446+ }
5447+ // after all interleaved components, that's an interleaved
5448+ // MCU, so now count down the restart interval
5449+ if (--z->todo <= 0) {
5450+ if (z->code_bits < 24) {
5451+ stbi__grow_buffer_unsafe(z);
5452+ }
5453+ if (!STBI__RESTART(z->marker)) {
5454+ return 1;
5455+ }
5456+ stbi__jpeg_reset(z);
5457+ }
5458+ }
5459+ }
5460+ return 1;
5461+ }
5462+ } else {
5463+ if (z->scan_n == 1) {
5464+ int i, j;
5465+ int n = z->order[0];
5466+ // non-interleaved data, we just need to process one block at a
5467+ // time, in trivial scanline order number of blocks to do just
5468+ // depends on how many actual "pixels" this component has,
5469+ // independent of interleaved MCU blocking and such
5470+ int w = (z->img_comp[n].x + 7) >> 3;
5471+ int h = (z->img_comp[n].y + 7) >> 3;
5472+ for (j = 0; j < h; ++j) {
5473+ for (i = 0; i < w; ++i) {
5474+ short *data = z->img_comp[n].coeff +
5475+ 64 * (i + j * z->img_comp[n].coeff_w);
5476+ if (z->spec_start == 0) {
5477+ if (!stbi__jpeg_decode_block_prog_dc(
5478+ z, data, &z->huff_dc[z->img_comp[n].hd], n)) {
5479+ return 0;
5480+ }
5481+ } else {
5482+ int ha = z->img_comp[n].ha;
5483+ if (!stbi__jpeg_decode_block_prog_ac(
5484+ z, data, &z->huff_ac[ha], z->fast_ac[ha])) {
5485+ return 0;
5486+ }
5487+ }
5488+ // every data block is an MCU, so countdown the restart
5489+ // interval
5490+ if (--z->todo <= 0) {
5491+ if (z->code_bits < 24) {
5492+ stbi__grow_buffer_unsafe(z);
5493+ }
5494+ if (!STBI__RESTART(z->marker)) {
5495+ return 1;
5496+ }
5497+ stbi__jpeg_reset(z);
5498+ }
5499+ }
5500+ }
5501+ return 1;
5502+ } else { // interleaved
5503+ int i, j, k, x, y;
5504+ for (j = 0; j < z->img_mcu_y; ++j) {
5505+ for (i = 0; i < z->img_mcu_x; ++i) {
5506+ // scan an interleaved mcu... process scan_n components in
5507+ // order
5508+ for (k = 0; k < z->scan_n; ++k) {
5509+ int n = z->order[k];
5510+ // scan out an mcu's worth of this component; that's
5511+ // just determined by the basic H and V specified for
5512+ // the component
5513+ for (y = 0; y < z->img_comp[n].v; ++y) {
5514+ for (x = 0; x < z->img_comp[n].h; ++x) {
5515+ int x2 = (i * z->img_comp[n].h + x);
5516+ int y2 = (j * z->img_comp[n].v + y);
5517+ short *data =
5518+ z->img_comp[n].coeff +
5519+ 64 * (x2 + y2 * z->img_comp[n].coeff_w);
5520+ if (!stbi__jpeg_decode_block_prog_dc(
5521+ z, data, &z->huff_dc[z->img_comp[n].hd],
5522+ n)) {
5523+ return 0;
5524+ }
5525+ }
5526+ }
5527+ }
5528+ // after all interleaved components, that's an interleaved
5529+ // MCU, so now count down the restart interval
5530+ if (--z->todo <= 0) {
5531+ if (z->code_bits < 24) {
5532+ stbi__grow_buffer_unsafe(z);
5533+ }
5534+ if (!STBI__RESTART(z->marker)) {
5535+ return 1;
5536+ }
5537+ stbi__jpeg_reset(z);
5538+ }
5539+ }
5540+ }
5541+ return 1;
5542+ }
5543+ }
5544+}
5545+
5546+static void
5547+stbi__jpeg_dequantize(short *data, stbi__uint16 *dequant)
5548+{
5549+ int i;
5550+ for (i = 0; i < 64; ++i) {
5551+ data[i] *= dequant[i];
5552+ }
5553+}
5554+
5555+static void
5556+stbi__jpeg_finish(stbi__jpeg *z)
5557+{
5558+ if (z->progressive) {
5559+ // dequantize and idct the data
5560+ int i, j, n;
5561+ for (n = 0; n < z->s->img_n; ++n) {
5562+ int w = (z->img_comp[n].x + 7) >> 3;
5563+ int h = (z->img_comp[n].y + 7) >> 3;
5564+ for (j = 0; j < h; ++j) {
5565+ for (i = 0; i < w; ++i) {
5566+ short *data = z->img_comp[n].coeff +
5567+ 64 * (i + j * z->img_comp[n].coeff_w);
5568+ stbi__jpeg_dequantize(data, z->dequant[z->img_comp[n].tq]);
5569+ z->idct_block_kernel(z->img_comp[n].data +
5570+ z->img_comp[n].w2 * j * 8 + i * 8,
5571+ z->img_comp[n].w2, data);
5572+ }
5573+ }
5574+ }
5575+ }
5576+}
5577+
5578+static int
5579+stbi__process_marker(stbi__jpeg *z, int m)
5580+{
5581+ int L;
5582+ switch (m) {
5583+ case STBI__MARKER_none: // no marker found
5584+ return stbi__err("expected marker", "Corrupt JPEG");
5585+
5586+ case 0xDD: // DRI - specify restart interval
5587+ if (stbi__get16be(z->s) != 4) {
5588+ return stbi__err("bad DRI len", "Corrupt JPEG");
5589+ }
5590+ z->restart_interval = stbi__get16be(z->s);
5591+ return 1;
5592+
5593+ case 0xDB: // DQT - define quantization table
5594+ L = stbi__get16be(z->s) - 2;
5595+ while (L > 0) {
5596+ int q = stbi__get8(z->s);
5597+ int p = q >> 4, sixteen = (p != 0);
5598+ int t = q & 15, i;
5599+ if (p != 0 && p != 1) {
5600+ return stbi__err("bad DQT type", "Corrupt JPEG");
5601+ }
5602+ if (t > 3) {
5603+ return stbi__err("bad DQT table", "Corrupt JPEG");
5604+ }
5605+
5606+ for (i = 0; i < 64; ++i) {
5607+ z->dequant[t][stbi__jpeg_dezigzag[i]] =
5608+ (stbi__uint16)(sixteen ? stbi__get16be(z->s)
5609+ : stbi__get8(z->s));
5610+ }
5611+ L -= (sixteen ? 129 : 65);
5612+ }
5613+ return L == 0;
5614+
5615+ case 0xC4: // DHT - define huffman table
5616+ L = stbi__get16be(z->s) - 2;
5617+ while (L > 0) {
5618+ stbi_uc *v;
5619+ int sizes[16], i, n = 0;
5620+ int q = stbi__get8(z->s);
5621+ int tc = q >> 4;
5622+ int th = q & 15;
5623+ if (tc > 1 || th > 3) {
5624+ return stbi__err("bad DHT header", "Corrupt JPEG");
5625+ }
5626+ for (i = 0; i < 16; ++i) {
5627+ sizes[i] = stbi__get8(z->s);
5628+ n += sizes[i];
5629+ }
5630+ if (n > 256) {
5631+ return stbi__err("bad DHT header",
5632+ "Corrupt JPEG"); // Loop over i < n would write
5633+ // past end of values!
5634+ }
5635+ L -= 17;
5636+ if (tc == 0) {
5637+ if (!stbi__build_huffman(z->huff_dc + th, sizes)) {
5638+ return 0;
5639+ }
5640+ v = z->huff_dc[th].values;
5641+ } else {
5642+ if (!stbi__build_huffman(z->huff_ac + th, sizes)) {
5643+ return 0;
5644+ }
5645+ v = z->huff_ac[th].values;
5646+ }
5647+ for (i = 0; i < n; ++i) {
5648+ v[i] = stbi__get8(z->s);
5649+ }
5650+ if (tc != 0) {
5651+ stbi__build_fast_ac(z->fast_ac[th], z->huff_ac + th);
5652+ }
5653+ L -= n;
5654+ }
5655+ return L == 0;
5656+ }
5657+
5658+ // check for comment block or APP blocks
5659+ if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) {
5660+ L = stbi__get16be(z->s);
5661+ if (L < 2) {
5662+ if (m == 0xFE) {
5663+ return stbi__err("bad COM len", "Corrupt JPEG");
5664+ } else {
5665+ return stbi__err("bad APP len", "Corrupt JPEG");
5666+ }
5667+ }
5668+ L -= 2;
5669+
5670+ if (m == 0xE0 && L >= 5) { // JFIF APP0 segment
5671+ static const unsigned char tag[5] = {'J', 'F', 'I', 'F', '\0'};
5672+ int ok = 1;
5673+ int i;
5674+ for (i = 0; i < 5; ++i) {
5675+ if (stbi__get8(z->s) != tag[i]) {
5676+ ok = 0;
5677+ }
5678+ }
5679+ L -= 5;
5680+ if (ok) {
5681+ z->jfif = 1;
5682+ }
5683+ } else if (m == 0xEE && L >= 12) { // Adobe APP14 segment
5684+ static const unsigned char tag[6] = {'A', 'd', 'o', 'b', 'e', '\0'};
5685+ int ok = 1;
5686+ int i;
5687+ for (i = 0; i < 6; ++i) {
5688+ if (stbi__get8(z->s) != tag[i]) {
5689+ ok = 0;
5690+ }
5691+ }
5692+ L -= 6;
5693+ if (ok) {
5694+ stbi__get8(z->s); // version
5695+ stbi__get16be(z->s); // flags0
5696+ stbi__get16be(z->s); // flags1
5697+ z->app14_color_transform = stbi__get8(z->s); // color transform
5698+ L -= 6;
5699+ }
5700+ }
5701+
5702+ stbi__skip(z->s, L);
5703+ return 1;
5704+ }
5705+
5706+ return stbi__err("unknown marker", "Corrupt JPEG");
5707 }
5708
5709 // after we see SOS
5710-static int stbi__process_scan_header(stbi__jpeg *z)
5711-{
5712- int i;
5713- int Ls = stbi__get16be(z->s);
5714- z->scan_n = stbi__get8(z->s);
5715- if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int) z->s->img_n) return stbi__err("bad SOS component count","Corrupt JPEG");
5716- if (Ls != 6+2*z->scan_n) return stbi__err("bad SOS len","Corrupt JPEG");
5717- for (i=0; i < z->scan_n; ++i) {
5718- int id = stbi__get8(z->s), which;
5719- int q = stbi__get8(z->s);
5720- for (which = 0; which < z->s->img_n; ++which)
5721- if (z->img_comp[which].id == id)
5722- break;
5723- if (which == z->s->img_n) return 0; // no match
5724- z->img_comp[which].hd = q >> 4; if (z->img_comp[which].hd > 3) return stbi__err("bad DC huff","Corrupt JPEG");
5725- z->img_comp[which].ha = q & 15; if (z->img_comp[which].ha > 3) return stbi__err("bad AC huff","Corrupt JPEG");
5726- z->order[i] = which;
5727- }
5728-
5729- {
5730- int aa;
5731- z->spec_start = stbi__get8(z->s);
5732- z->spec_end = stbi__get8(z->s); // should be 63, but might be 0
5733- aa = stbi__get8(z->s);
5734- z->succ_high = (aa >> 4);
5735- z->succ_low = (aa & 15);
5736- if (z->progressive) {
5737- if (z->spec_start > 63 || z->spec_end > 63 || z->spec_start > z->spec_end || z->succ_high > 13 || z->succ_low > 13)
5738- return stbi__err("bad SOS", "Corrupt JPEG");
5739- } else {
5740- if (z->spec_start != 0) return stbi__err("bad SOS","Corrupt JPEG");
5741- if (z->succ_high != 0 || z->succ_low != 0) return stbi__err("bad SOS","Corrupt JPEG");
5742- z->spec_end = 63;
5743- }
5744- }
5745-
5746- return 1;
5747-}
5748-
5749-static int stbi__free_jpeg_components(stbi__jpeg *z, int ncomp, int why)
5750-{
5751- int i;
5752- for (i=0; i < ncomp; ++i) {
5753- if (z->img_comp[i].raw_data) {
5754- STBI_FREE(z->img_comp[i].raw_data);
5755- z->img_comp[i].raw_data = NULL;
5756- z->img_comp[i].data = NULL;
5757- }
5758- if (z->img_comp[i].raw_coeff) {
5759- STBI_FREE(z->img_comp[i].raw_coeff);
5760- z->img_comp[i].raw_coeff = 0;
5761- z->img_comp[i].coeff = 0;
5762- }
5763- if (z->img_comp[i].linebuf) {
5764- STBI_FREE(z->img_comp[i].linebuf);
5765- z->img_comp[i].linebuf = NULL;
5766- }
5767- }
5768- return why;
5769-}
5770-
5771-static int stbi__process_frame_header(stbi__jpeg *z, int scan)
5772-{
5773- stbi__context *s = z->s;
5774- int Lf,p,i,q, h_max=1,v_max=1,c;
5775- Lf = stbi__get16be(s); if (Lf < 11) return stbi__err("bad SOF len","Corrupt JPEG"); // JPEG
5776- p = stbi__get8(s); if (p != 8) return stbi__err("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline
5777- s->img_y = stbi__get16be(s); if (s->img_y == 0) return stbi__err("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG
5778- s->img_x = stbi__get16be(s); if (s->img_x == 0) return stbi__err("0 width","Corrupt JPEG"); // JPEG requires
5779- if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)");
5780- if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)");
5781- c = stbi__get8(s);
5782- if (c != 3 && c != 1 && c != 4) return stbi__err("bad component count","Corrupt JPEG");
5783- s->img_n = c;
5784- for (i=0; i < c; ++i) {
5785- z->img_comp[i].data = NULL;
5786- z->img_comp[i].linebuf = NULL;
5787- }
5788-
5789- if (Lf != 8+3*s->img_n) return stbi__err("bad SOF len","Corrupt JPEG");
5790-
5791- z->rgb = 0;
5792- for (i=0; i < s->img_n; ++i) {
5793- static const unsigned char rgb[3] = { 'R', 'G', 'B' };
5794- z->img_comp[i].id = stbi__get8(s);
5795- if (s->img_n == 3 && z->img_comp[i].id == rgb[i])
5796- ++z->rgb;
5797- q = stbi__get8(s);
5798- z->img_comp[i].h = (q >> 4); if (!z->img_comp[i].h || z->img_comp[i].h > 4) return stbi__err("bad H","Corrupt JPEG");
5799- z->img_comp[i].v = q & 15; if (!z->img_comp[i].v || z->img_comp[i].v > 4) return stbi__err("bad V","Corrupt JPEG");
5800- z->img_comp[i].tq = stbi__get8(s); if (z->img_comp[i].tq > 3) return stbi__err("bad TQ","Corrupt JPEG");
5801- }
5802-
5803- if (scan != STBI__SCAN_load) return 1;
5804-
5805- if (!stbi__mad3sizes_valid(s->img_x, s->img_y, s->img_n, 0)) return stbi__err("too large", "Image too large to decode");
5806-
5807- for (i=0; i < s->img_n; ++i) {
5808- if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h;
5809- if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v;
5810- }
5811-
5812- // check that plane subsampling factors are integer ratios; our resamplers can't deal with fractional ratios
5813- // and I've never seen a non-corrupted JPEG file actually use them
5814- for (i=0; i < s->img_n; ++i) {
5815- if (h_max % z->img_comp[i].h != 0) return stbi__err("bad H","Corrupt JPEG");
5816- if (v_max % z->img_comp[i].v != 0) return stbi__err("bad V","Corrupt JPEG");
5817- }
5818-
5819- // compute interleaved mcu info
5820- z->img_h_max = h_max;
5821- z->img_v_max = v_max;
5822- z->img_mcu_w = h_max * 8;
5823- z->img_mcu_h = v_max * 8;
5824- // these sizes can't be more than 17 bits
5825- z->img_mcu_x = (s->img_x + z->img_mcu_w-1) / z->img_mcu_w;
5826- z->img_mcu_y = (s->img_y + z->img_mcu_h-1) / z->img_mcu_h;
5827-
5828- for (i=0; i < s->img_n; ++i) {
5829- // number of effective pixels (e.g. for non-interleaved MCU)
5830- z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max-1) / h_max;
5831- z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max-1) / v_max;
5832- // to simplify generation, we'll allocate enough memory to decode
5833- // the bogus oversized data from using interleaved MCUs and their
5834- // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't
5835- // discard the extra data until colorspace conversion
5836- //
5837- // img_mcu_x, img_mcu_y: <=17 bits; comp[i].h and .v are <=4 (checked earlier)
5838- // so these muls can't overflow with 32-bit ints (which we require)
5839- z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8;
5840- z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8;
5841- z->img_comp[i].coeff = 0;
5842- z->img_comp[i].raw_coeff = 0;
5843- z->img_comp[i].linebuf = NULL;
5844- z->img_comp[i].raw_data = stbi__malloc_mad2(z->img_comp[i].w2, z->img_comp[i].h2, 15);
5845- if (z->img_comp[i].raw_data == NULL)
5846- return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory"));
5847- // align blocks for idct using mmx/sse
5848- z->img_comp[i].data = (stbi_uc*) (((size_t) z->img_comp[i].raw_data + 15) & ~15);
5849- if (z->progressive) {
5850- // w2, h2 are multiples of 8 (see above)
5851- z->img_comp[i].coeff_w = z->img_comp[i].w2 / 8;
5852- z->img_comp[i].coeff_h = z->img_comp[i].h2 / 8;
5853- z->img_comp[i].raw_coeff = stbi__malloc_mad3(z->img_comp[i].w2, z->img_comp[i].h2, sizeof(short), 15);
5854- if (z->img_comp[i].raw_coeff == NULL)
5855- return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory"));
5856- z->img_comp[i].coeff = (short*) (((size_t) z->img_comp[i].raw_coeff + 15) & ~15);
5857- }
5858- }
5859-
5860- return 1;
5861+static int
5862+stbi__process_scan_header(stbi__jpeg *z)
5863+{
5864+ int i;
5865+ int Ls = stbi__get16be(z->s);
5866+ z->scan_n = stbi__get8(z->s);
5867+ if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int)z->s->img_n) {
5868+ return stbi__err("bad SOS component count", "Corrupt JPEG");
5869+ }
5870+ if (Ls != 6 + 2 * z->scan_n) {
5871+ return stbi__err("bad SOS len", "Corrupt JPEG");
5872+ }
5873+ for (i = 0; i < z->scan_n; ++i) {
5874+ int id = stbi__get8(z->s), which;
5875+ int q = stbi__get8(z->s);
5876+ for (which = 0; which < z->s->img_n; ++which) {
5877+ if (z->img_comp[which].id == id) {
5878+ break;
5879+ }
5880+ }
5881+ if (which == z->s->img_n) {
5882+ return 0; // no match
5883+ }
5884+ z->img_comp[which].hd = q >> 4;
5885+ if (z->img_comp[which].hd > 3) {
5886+ return stbi__err("bad DC huff", "Corrupt JPEG");
5887+ }
5888+ z->img_comp[which].ha = q & 15;
5889+ if (z->img_comp[which].ha > 3) {
5890+ return stbi__err("bad AC huff", "Corrupt JPEG");
5891+ }
5892+ z->order[i] = which;
5893+ }
5894+
5895+ {
5896+ int aa;
5897+ z->spec_start = stbi__get8(z->s);
5898+ z->spec_end = stbi__get8(z->s); // should be 63, but might be 0
5899+ aa = stbi__get8(z->s);
5900+ z->succ_high = (aa >> 4);
5901+ z->succ_low = (aa & 15);
5902+ if (z->progressive) {
5903+ if (z->spec_start > 63 || z->spec_end > 63 ||
5904+ z->spec_start > z->spec_end || z->succ_high > 13 ||
5905+ z->succ_low > 13) {
5906+ return stbi__err("bad SOS", "Corrupt JPEG");
5907+ }
5908+ } else {
5909+ if (z->spec_start != 0) {
5910+ return stbi__err("bad SOS", "Corrupt JPEG");
5911+ }
5912+ if (z->succ_high != 0 || z->succ_low != 0) {
5913+ return stbi__err("bad SOS", "Corrupt JPEG");
5914+ }
5915+ z->spec_end = 63;
5916+ }
5917+ }
5918+
5919+ return 1;
5920+}
5921+
5922+static int
5923+stbi__free_jpeg_components(stbi__jpeg *z, int ncomp, int why)
5924+{
5925+ int i;
5926+ for (i = 0; i < ncomp; ++i) {
5927+ if (z->img_comp[i].raw_data) {
5928+ STBI_FREE(z->img_comp[i].raw_data);
5929+ z->img_comp[i].raw_data = NULL;
5930+ z->img_comp[i].data = NULL;
5931+ }
5932+ if (z->img_comp[i].raw_coeff) {
5933+ STBI_FREE(z->img_comp[i].raw_coeff);
5934+ z->img_comp[i].raw_coeff = 0;
5935+ z->img_comp[i].coeff = 0;
5936+ }
5937+ if (z->img_comp[i].linebuf) {
5938+ STBI_FREE(z->img_comp[i].linebuf);
5939+ z->img_comp[i].linebuf = NULL;
5940+ }
5941+ }
5942+ return why;
5943+}
5944+
5945+static int
5946+stbi__process_frame_header(stbi__jpeg *z, int scan)
5947+{
5948+ stbi__context *s = z->s;
5949+ int Lf, p, i, q, h_max = 1, v_max = 1, c;
5950+ Lf = stbi__get16be(s);
5951+ if (Lf < 11) {
5952+ return stbi__err("bad SOF len", "Corrupt JPEG"); // JPEG
5953+ }
5954+ p = stbi__get8(s);
5955+ if (p != 8) {
5956+ return stbi__err(
5957+ "only 8-bit",
5958+ "JPEG format not supported: 8-bit only"); // JPEG baseline
5959+ }
5960+ s->img_y = stbi__get16be(s);
5961+ if (s->img_y == 0) {
5962+ return stbi__err(
5963+ "no header height",
5964+ "JPEG format not supported: delayed height"); // Legal, but we don't
5965+ // handle it--but
5966+ // neither does IJG
5967+ }
5968+ s->img_x = stbi__get16be(s);
5969+ if (s->img_x == 0) {
5970+ return stbi__err("0 width", "Corrupt JPEG"); // JPEG requires
5971+ }
5972+ if (s->img_y > STBI_MAX_DIMENSIONS) {
5973+ return stbi__err("too large", "Very large image (corrupt?)");
5974+ }
5975+ if (s->img_x > STBI_MAX_DIMENSIONS) {
5976+ return stbi__err("too large", "Very large image (corrupt?)");
5977+ }
5978+ c = stbi__get8(s);
5979+ if (c != 3 && c != 1 && c != 4) {
5980+ return stbi__err("bad component count", "Corrupt JPEG");
5981+ }
5982+ s->img_n = c;
5983+ for (i = 0; i < c; ++i) {
5984+ z->img_comp[i].data = NULL;
5985+ z->img_comp[i].linebuf = NULL;
5986+ }
5987+
5988+ if (Lf != 8 + 3 * s->img_n) {
5989+ return stbi__err("bad SOF len", "Corrupt JPEG");
5990+ }
5991+
5992+ z->rgb = 0;
5993+ for (i = 0; i < s->img_n; ++i) {
5994+ static const unsigned char rgb[3] = {'R', 'G', 'B'};
5995+ z->img_comp[i].id = stbi__get8(s);
5996+ if (s->img_n == 3 && z->img_comp[i].id == rgb[i]) {
5997+ ++z->rgb;
5998+ }
5999+ q = stbi__get8(s);
6000+ z->img_comp[i].h = (q >> 4);
6001+ if (!z->img_comp[i].h || z->img_comp[i].h > 4) {
6002+ return stbi__err("bad H", "Corrupt JPEG");
6003+ }
6004+ z->img_comp[i].v = q & 15;
6005+ if (!z->img_comp[i].v || z->img_comp[i].v > 4) {
6006+ return stbi__err("bad V", "Corrupt JPEG");
6007+ }
6008+ z->img_comp[i].tq = stbi__get8(s);
6009+ if (z->img_comp[i].tq > 3) {
6010+ return stbi__err("bad TQ", "Corrupt JPEG");
6011+ }
6012+ }
6013+
6014+ if (scan != STBI__SCAN_load) {
6015+ return 1;
6016+ }
6017+
6018+ if (!stbi__mad3sizes_valid(s->img_x, s->img_y, s->img_n, 0)) {
6019+ return stbi__err("too large", "Image too large to decode");
6020+ }
6021+
6022+ for (i = 0; i < s->img_n; ++i) {
6023+ if (z->img_comp[i].h > h_max) {
6024+ h_max = z->img_comp[i].h;
6025+ }
6026+ if (z->img_comp[i].v > v_max) {
6027+ v_max = z->img_comp[i].v;
6028+ }
6029+ }
6030+
6031+ // check that plane subsampling factors are integer ratios; our resamplers
6032+ // can't deal with fractional ratios and I've never seen a non-corrupted
6033+ // JPEG file actually use them
6034+ for (i = 0; i < s->img_n; ++i) {
6035+ if (h_max % z->img_comp[i].h != 0) {
6036+ return stbi__err("bad H", "Corrupt JPEG");
6037+ }
6038+ if (v_max % z->img_comp[i].v != 0) {
6039+ return stbi__err("bad V", "Corrupt JPEG");
6040+ }
6041+ }
6042+
6043+ // compute interleaved mcu info
6044+ z->img_h_max = h_max;
6045+ z->img_v_max = v_max;
6046+ z->img_mcu_w = h_max * 8;
6047+ z->img_mcu_h = v_max * 8;
6048+ // these sizes can't be more than 17 bits
6049+ z->img_mcu_x = (s->img_x + z->img_mcu_w - 1) / z->img_mcu_w;
6050+ z->img_mcu_y = (s->img_y + z->img_mcu_h - 1) / z->img_mcu_h;
6051+
6052+ for (i = 0; i < s->img_n; ++i) {
6053+ // number of effective pixels (e.g. for non-interleaved MCU)
6054+ z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max - 1) / h_max;
6055+ z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max - 1) / v_max;
6056+ // to simplify generation, we'll allocate enough memory to decode
6057+ // the bogus oversized data from using interleaved MCUs and their
6058+ // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't
6059+ // discard the extra data until colorspace conversion
6060+ //
6061+ // img_mcu_x, img_mcu_y: <=17 bits; comp[i].h and .v are <=4 (checked
6062+ // earlier) so these muls can't overflow with 32-bit ints (which we
6063+ // require)
6064+ z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8;
6065+ z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8;
6066+ z->img_comp[i].coeff = 0;
6067+ z->img_comp[i].raw_coeff = 0;
6068+ z->img_comp[i].linebuf = NULL;
6069+ z->img_comp[i].raw_data =
6070+ stbi__malloc_mad2(z->img_comp[i].w2, z->img_comp[i].h2, 15);
6071+ if (z->img_comp[i].raw_data == NULL) {
6072+ return stbi__free_jpeg_components(
6073+ z, i + 1, stbi__err("outofmem", "Out of memory"));
6074+ }
6075+ // align blocks for idct using mmx/sse
6076+ z->img_comp[i].data =
6077+ (stbi_uc *)(((size_t)z->img_comp[i].raw_data + 15) & ~15);
6078+ if (z->progressive) {
6079+ // w2, h2 are multiples of 8 (see above)
6080+ z->img_comp[i].coeff_w = z->img_comp[i].w2 / 8;
6081+ z->img_comp[i].coeff_h = z->img_comp[i].h2 / 8;
6082+ z->img_comp[i].raw_coeff = stbi__malloc_mad3(
6083+ z->img_comp[i].w2, z->img_comp[i].h2, sizeof(short), 15);
6084+ if (z->img_comp[i].raw_coeff == NULL) {
6085+ return stbi__free_jpeg_components(
6086+ z, i + 1, stbi__err("outofmem", "Out of memory"));
6087+ }
6088+ z->img_comp[i].coeff =
6089+ (short *)(((size_t)z->img_comp[i].raw_coeff + 15) & ~15);
6090+ }
6091+ }
6092+
6093+ return 1;
6094 }
6095
6096 // use comparisons since in some cases we handle more than one case (e.g. SOF)
6097-#define stbi__DNL(x) ((x) == 0xdc)
6098-#define stbi__SOI(x) ((x) == 0xd8)
6099-#define stbi__EOI(x) ((x) == 0xd9)
6100-#define stbi__SOF(x) ((x) == 0xc0 || (x) == 0xc1 || (x) == 0xc2)
6101-#define stbi__SOS(x) ((x) == 0xda)
6102-
6103-#define stbi__SOF_progressive(x) ((x) == 0xc2)
6104-
6105-static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan)
6106-{
6107- int m;
6108- z->jfif = 0;
6109- z->app14_color_transform = -1; // valid values are 0,1,2
6110- z->marker = STBI__MARKER_none; // initialize cached marker to empty
6111- m = stbi__get_marker(z);
6112- if (!stbi__SOI(m)) return stbi__err("no SOI","Corrupt JPEG");
6113- if (scan == STBI__SCAN_type) return 1;
6114- m = stbi__get_marker(z);
6115- while (!stbi__SOF(m)) {
6116- if (!stbi__process_marker(z,m)) return 0;
6117- m = stbi__get_marker(z);
6118- while (m == STBI__MARKER_none) {
6119- // some files have extra padding after their blocks, so ok, we'll scan
6120- if (stbi__at_eof(z->s)) return stbi__err("no SOF", "Corrupt JPEG");
6121- m = stbi__get_marker(z);
6122- }
6123- }
6124- z->progressive = stbi__SOF_progressive(m);
6125- if (!stbi__process_frame_header(z, scan)) return 0;
6126- return 1;
6127-}
6128-
6129-static stbi_uc stbi__skip_jpeg_junk_at_end(stbi__jpeg *j)
6130-{
6131- // some JPEGs have junk at end, skip over it but if we find what looks
6132- // like a valid marker, resume there
6133- while (!stbi__at_eof(j->s)) {
6134- stbi_uc x = stbi__get8(j->s);
6135- while (x == 0xff) { // might be a marker
6136- if (stbi__at_eof(j->s)) return STBI__MARKER_none;
6137- x = stbi__get8(j->s);
6138- if (x != 0x00 && x != 0xff) {
6139- // not a stuffed zero or lead-in to another marker, looks
6140- // like an actual marker, return it
6141- return x;
6142- }
6143- // stuffed zero has x=0 now which ends the loop, meaning we go
6144- // back to regular scan loop.
6145- // repeated 0xff keeps trying to read the next byte of the marker.
6146- }
6147- }
6148- return STBI__MARKER_none;
6149+#define stbi__DNL(x) ((x) == 0xdc)
6150+#define stbi__SOI(x) ((x) == 0xd8)
6151+#define stbi__EOI(x) ((x) == 0xd9)
6152+#define stbi__SOF(x) ((x) == 0xc0 || (x) == 0xc1 || (x) == 0xc2)
6153+#define stbi__SOS(x) ((x) == 0xda)
6154+
6155+#define stbi__SOF_progressive(x) ((x) == 0xc2)
6156+
6157+static int
6158+stbi__decode_jpeg_header(stbi__jpeg *z, int scan)
6159+{
6160+ int m;
6161+ z->jfif = 0;
6162+ z->app14_color_transform = -1; // valid values are 0,1,2
6163+ z->marker = STBI__MARKER_none; // initialize cached marker to empty
6164+ m = stbi__get_marker(z);
6165+ if (!stbi__SOI(m)) {
6166+ return stbi__err("no SOI", "Corrupt JPEG");
6167+ }
6168+ if (scan == STBI__SCAN_type) {
6169+ return 1;
6170+ }
6171+ m = stbi__get_marker(z);
6172+ while (!stbi__SOF(m)) {
6173+ if (!stbi__process_marker(z, m)) {
6174+ return 0;
6175+ }
6176+ m = stbi__get_marker(z);
6177+ while (m == STBI__MARKER_none) {
6178+ // some files have extra padding after their blocks, so ok, we'll
6179+ // scan
6180+ if (stbi__at_eof(z->s)) {
6181+ return stbi__err("no SOF", "Corrupt JPEG");
6182+ }
6183+ m = stbi__get_marker(z);
6184+ }
6185+ }
6186+ z->progressive = stbi__SOF_progressive(m);
6187+ if (!stbi__process_frame_header(z, scan)) {
6188+ return 0;
6189+ }
6190+ return 1;
6191+}
6192+
6193+static stbi_uc
6194+stbi__skip_jpeg_junk_at_end(stbi__jpeg *j)
6195+{
6196+ // some JPEGs have junk at end, skip over it but if we find what looks
6197+ // like a valid marker, resume there
6198+ while (!stbi__at_eof(j->s)) {
6199+ stbi_uc x = stbi__get8(j->s);
6200+ while (x == 0xff) { // might be a marker
6201+ if (stbi__at_eof(j->s)) {
6202+ return STBI__MARKER_none;
6203+ }
6204+ x = stbi__get8(j->s);
6205+ if (x != 0x00 && x != 0xff) {
6206+ // not a stuffed zero or lead-in to another marker, looks
6207+ // like an actual marker, return it
6208+ return x;
6209+ }
6210+ // stuffed zero has x=0 now which ends the loop, meaning we go
6211+ // back to regular scan loop.
6212+ // repeated 0xff keeps trying to read the next byte of the marker.
6213+ }
6214+ }
6215+ return STBI__MARKER_none;
6216 }
6217
6218 // decode image to YCbCr format
6219-static int stbi__decode_jpeg_image(stbi__jpeg *j)
6220-{
6221- int m;
6222- for (m = 0; m < 4; m++) {
6223- j->img_comp[m].raw_data = NULL;
6224- j->img_comp[m].raw_coeff = NULL;
6225- }
6226- j->restart_interval = 0;
6227- if (!stbi__decode_jpeg_header(j, STBI__SCAN_load)) return 0;
6228- m = stbi__get_marker(j);
6229- while (!stbi__EOI(m)) {
6230- if (stbi__SOS(m)) {
6231- if (!stbi__process_scan_header(j)) return 0;
6232- if (!stbi__parse_entropy_coded_data(j)) return 0;
6233- if (j->marker == STBI__MARKER_none ) {
6234- j->marker = stbi__skip_jpeg_junk_at_end(j);
6235- // if we reach eof without hitting a marker, stbi__get_marker() below will fail and we'll eventually return 0
6236- }
6237- m = stbi__get_marker(j);
6238- if (STBI__RESTART(m))
6239- m = stbi__get_marker(j);
6240- } else if (stbi__DNL(m)) {
6241- int Ld = stbi__get16be(j->s);
6242- stbi__uint32 NL = stbi__get16be(j->s);
6243- if (Ld != 4) return stbi__err("bad DNL len", "Corrupt JPEG");
6244- if (NL != j->s->img_y) return stbi__err("bad DNL height", "Corrupt JPEG");
6245- m = stbi__get_marker(j);
6246- } else {
6247- if (!stbi__process_marker(j, m)) return 1;
6248- m = stbi__get_marker(j);
6249- }
6250- }
6251- if (j->progressive)
6252- stbi__jpeg_finish(j);
6253- return 1;
6254+static int
6255+stbi__decode_jpeg_image(stbi__jpeg *j)
6256+{
6257+ int m;
6258+ for (m = 0; m < 4; m++) {
6259+ j->img_comp[m].raw_data = NULL;
6260+ j->img_comp[m].raw_coeff = NULL;
6261+ }
6262+ j->restart_interval = 0;
6263+ if (!stbi__decode_jpeg_header(j, STBI__SCAN_load)) {
6264+ return 0;
6265+ }
6266+ m = stbi__get_marker(j);
6267+ while (!stbi__EOI(m)) {
6268+ if (stbi__SOS(m)) {
6269+ if (!stbi__process_scan_header(j)) {
6270+ return 0;
6271+ }
6272+ if (!stbi__parse_entropy_coded_data(j)) {
6273+ return 0;
6274+ }
6275+ if (j->marker == STBI__MARKER_none) {
6276+ j->marker = stbi__skip_jpeg_junk_at_end(j);
6277+ // if we reach eof without hitting a marker, stbi__get_marker()
6278+ // below will fail and we'll eventually return 0
6279+ }
6280+ m = stbi__get_marker(j);
6281+ if (STBI__RESTART(m)) {
6282+ m = stbi__get_marker(j);
6283+ }
6284+ } else if (stbi__DNL(m)) {
6285+ int Ld = stbi__get16be(j->s);
6286+ stbi__uint32 NL = stbi__get16be(j->s);
6287+ if (Ld != 4) {
6288+ return stbi__err("bad DNL len", "Corrupt JPEG");
6289+ }
6290+ if (NL != j->s->img_y) {
6291+ return stbi__err("bad DNL height", "Corrupt JPEG");
6292+ }
6293+ m = stbi__get_marker(j);
6294+ } else {
6295+ if (!stbi__process_marker(j, m)) {
6296+ return 1;
6297+ }
6298+ m = stbi__get_marker(j);
6299+ }
6300+ }
6301+ if (j->progressive) {
6302+ stbi__jpeg_finish(j);
6303+ }
6304+ return 1;
6305 }
6306
6307 // static jfif-centered resampling (across block boundaries)
6308
6309 typedef stbi_uc *(*resample_row_func)(stbi_uc *out, stbi_uc *in0, stbi_uc *in1,
6310- int w, int hs);
6311+ int w, int hs);
6312
6313-#define stbi__div4(x) ((stbi_uc) ((x) >> 2))
6314+#define stbi__div4(x) ((stbi_uc)((x) >> 2))
6315
6316-static stbi_uc *resample_row_1(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
6317+static stbi_uc *
6318+resample_row_1(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
6319 {
6320- STBI_NOTUSED(out);
6321- STBI_NOTUSED(in_far);
6322- STBI_NOTUSED(w);
6323- STBI_NOTUSED(hs);
6324- return in_near;
6325+ STBI_NOTUSED(out);
6326+ STBI_NOTUSED(in_far);
6327+ STBI_NOTUSED(w);
6328+ STBI_NOTUSED(hs);
6329+ return in_near;
6330 }
6331
6332-static stbi_uc* stbi__resample_row_v_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
6333+static stbi_uc *
6334+stbi__resample_row_v_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w,
6335+ int hs)
6336 {
6337- // need to generate two samples vertically for every one in input
6338- int i;
6339- STBI_NOTUSED(hs);
6340- for (i=0; i < w; ++i)
6341- out[i] = stbi__div4(3*in_near[i] + in_far[i] + 2);
6342- return out;
6343+ // need to generate two samples vertically for every one in input
6344+ int i;
6345+ STBI_NOTUSED(hs);
6346+ for (i = 0; i < w; ++i) {
6347+ out[i] = stbi__div4(3 * in_near[i] + in_far[i] + 2);
6348+ }
6349+ return out;
6350 }
6351
6352-static stbi_uc* stbi__resample_row_h_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
6353+static stbi_uc *
6354+stbi__resample_row_h_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w,
6355+ int hs)
6356 {
6357- // need to generate two samples horizontally for every one in input
6358- int i;
6359- stbi_uc *input = in_near;
6360+ // need to generate two samples horizontally for every one in input
6361+ int i;
6362+ stbi_uc *input = in_near;
6363
6364- if (w == 1) {
6365- // if only one sample, can't do any interpolation
6366- out[0] = out[1] = input[0];
6367- return out;
6368- }
6369+ if (w == 1) {
6370+ // if only one sample, can't do any interpolation
6371+ out[0] = out[1] = input[0];
6372+ return out;
6373+ }
6374
6375- out[0] = input[0];
6376- out[1] = stbi__div4(input[0]*3 + input[1] + 2);
6377- for (i=1; i < w-1; ++i) {
6378- int n = 3*input[i]+2;
6379- out[i*2+0] = stbi__div4(n+input[i-1]);
6380- out[i*2+1] = stbi__div4(n+input[i+1]);
6381- }
6382- out[i*2+0] = stbi__div4(input[w-2]*3 + input[w-1] + 2);
6383- out[i*2+1] = input[w-1];
6384+ out[0] = input[0];
6385+ out[1] = stbi__div4(input[0] * 3 + input[1] + 2);
6386+ for (i = 1; i < w - 1; ++i) {
6387+ int n = 3 * input[i] + 2;
6388+ out[i * 2 + 0] = stbi__div4(n + input[i - 1]);
6389+ out[i * 2 + 1] = stbi__div4(n + input[i + 1]);
6390+ }
6391+ out[i * 2 + 0] = stbi__div4(input[w - 2] * 3 + input[w - 1] + 2);
6392+ out[i * 2 + 1] = input[w - 1];
6393
6394- STBI_NOTUSED(in_far);
6395- STBI_NOTUSED(hs);
6396+ STBI_NOTUSED(in_far);
6397+ STBI_NOTUSED(hs);
6398
6399- return out;
6400+ return out;
6401 }
6402
6403-#define stbi__div16(x) ((stbi_uc) ((x) >> 4))
6404+#define stbi__div16(x) ((stbi_uc)((x) >> 4))
6405
6406-static stbi_uc *stbi__resample_row_hv_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
6407+static stbi_uc *
6408+stbi__resample_row_hv_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w,
6409+ int hs)
6410 {
6411- // need to generate 2x2 samples for every one in input
6412- int i,t0,t1;
6413- if (w == 1) {
6414- out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2);
6415- return out;
6416- }
6417+ // need to generate 2x2 samples for every one in input
6418+ int i, t0, t1;
6419+ if (w == 1) {
6420+ out[0] = out[1] = stbi__div4(3 * in_near[0] + in_far[0] + 2);
6421+ return out;
6422+ }
6423
6424- t1 = 3*in_near[0] + in_far[0];
6425- out[0] = stbi__div4(t1+2);
6426- for (i=1; i < w; ++i) {
6427- t0 = t1;
6428- t1 = 3*in_near[i]+in_far[i];
6429- out[i*2-1] = stbi__div16(3*t0 + t1 + 8);
6430- out[i*2 ] = stbi__div16(3*t1 + t0 + 8);
6431- }
6432- out[w*2-1] = stbi__div4(t1+2);
6433+ t1 = 3 * in_near[0] + in_far[0];
6434+ out[0] = stbi__div4(t1 + 2);
6435+ for (i = 1; i < w; ++i) {
6436+ t0 = t1;
6437+ t1 = 3 * in_near[i] + in_far[i];
6438+ out[i * 2 - 1] = stbi__div16(3 * t0 + t1 + 8);
6439+ out[i * 2] = stbi__div16(3 * t1 + t0 + 8);
6440+ }
6441+ out[w * 2 - 1] = stbi__div4(t1 + 2);
6442
6443- STBI_NOTUSED(hs);
6444+ STBI_NOTUSED(hs);
6445
6446- return out;
6447+ return out;
6448 }
6449
6450 #if defined(STBI_SSE2) || defined(STBI_NEON)
6451-static stbi_uc *stbi__resample_row_hv_2_simd(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
6452-{
6453- // need to generate 2x2 samples for every one in input
6454- int i=0,t0,t1;
6455-
6456- if (w == 1) {
6457- out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2);
6458- return out;
6459- }
6460-
6461- t1 = 3*in_near[0] + in_far[0];
6462- // process groups of 8 pixels for as long as we can.
6463- // note we can't handle the last pixel in a row in this loop
6464- // because we need to handle the filter boundary conditions.
6465- for (; i < ((w-1) & ~7); i += 8) {
6466+static stbi_uc *
6467+stbi__resample_row_hv_2_simd(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far,
6468+ int w, int hs)
6469+{
6470+ // need to generate 2x2 samples for every one in input
6471+ int i = 0, t0, t1;
6472+
6473+ if (w == 1) {
6474+ out[0] = out[1] = stbi__div4(3 * in_near[0] + in_far[0] + 2);
6475+ return out;
6476+ }
6477+
6478+ t1 = 3 * in_near[0] + in_far[0];
6479+ // process groups of 8 pixels for as long as we can.
6480+ // note we can't handle the last pixel in a row in this loop
6481+ // because we need to handle the filter boundary conditions.
6482+ for (; i < ((w - 1) & ~7); i += 8) {
6483 #if defined(STBI_SSE2)
6484- // load and perform the vertical filtering pass
6485- // this uses 3*x + y = 4*x + (y - x)
6486- __m128i zero = _mm_setzero_si128();
6487- __m128i farb = _mm_loadl_epi64((__m128i *) (in_far + i));
6488- __m128i nearb = _mm_loadl_epi64((__m128i *) (in_near + i));
6489- __m128i farw = _mm_unpacklo_epi8(farb, zero);
6490- __m128i nearw = _mm_unpacklo_epi8(nearb, zero);
6491- __m128i diff = _mm_sub_epi16(farw, nearw);
6492- __m128i nears = _mm_slli_epi16(nearw, 2);
6493- __m128i curr = _mm_add_epi16(nears, diff); // current row
6494-
6495- // horizontal filter works the same based on shifted vers of current
6496- // row. "prev" is current row shifted right by 1 pixel; we need to
6497- // insert the previous pixel value (from t1).
6498- // "next" is current row shifted left by 1 pixel, with first pixel
6499- // of next block of 8 pixels added in.
6500- __m128i prv0 = _mm_slli_si128(curr, 2);
6501- __m128i nxt0 = _mm_srli_si128(curr, 2);
6502- __m128i prev = _mm_insert_epi16(prv0, t1, 0);
6503- __m128i next = _mm_insert_epi16(nxt0, 3*in_near[i+8] + in_far[i+8], 7);
6504-
6505- // horizontal filter, polyphase implementation since it's convenient:
6506- // even pixels = 3*cur + prev = cur*4 + (prev - cur)
6507- // odd pixels = 3*cur + next = cur*4 + (next - cur)
6508- // note the shared term.
6509- __m128i bias = _mm_set1_epi16(8);
6510- __m128i curs = _mm_slli_epi16(curr, 2);
6511- __m128i prvd = _mm_sub_epi16(prev, curr);
6512- __m128i nxtd = _mm_sub_epi16(next, curr);
6513- __m128i curb = _mm_add_epi16(curs, bias);
6514- __m128i even = _mm_add_epi16(prvd, curb);
6515- __m128i odd = _mm_add_epi16(nxtd, curb);
6516-
6517- // interleave even and odd pixels, then undo scaling.
6518- __m128i int0 = _mm_unpacklo_epi16(even, odd);
6519- __m128i int1 = _mm_unpackhi_epi16(even, odd);
6520- __m128i de0 = _mm_srli_epi16(int0, 4);
6521- __m128i de1 = _mm_srli_epi16(int1, 4);
6522-
6523- // pack and write output
6524- __m128i outv = _mm_packus_epi16(de0, de1);
6525- _mm_storeu_si128((__m128i *) (out + i*2), outv);
6526+ // load and perform the vertical filtering pass
6527+ // this uses 3*x + y = 4*x + (y - x)
6528+ __m128i zero = _mm_setzero_si128();
6529+ __m128i farb = _mm_loadl_epi64((__m128i *)(in_far + i));
6530+ __m128i nearb = _mm_loadl_epi64((__m128i *)(in_near + i));
6531+ __m128i farw = _mm_unpacklo_epi8(farb, zero);
6532+ __m128i nearw = _mm_unpacklo_epi8(nearb, zero);
6533+ __m128i diff = _mm_sub_epi16(farw, nearw);
6534+ __m128i nears = _mm_slli_epi16(nearw, 2);
6535+ __m128i curr = _mm_add_epi16(nears, diff); // current row
6536+
6537+ // horizontal filter works the same based on shifted vers of current
6538+ // row. "prev" is current row shifted right by 1 pixel; we need to
6539+ // insert the previous pixel value (from t1).
6540+ // "next" is current row shifted left by 1 pixel, with first pixel
6541+ // of next block of 8 pixels added in.
6542+ __m128i prv0 = _mm_slli_si128(curr, 2);
6543+ __m128i nxt0 = _mm_srli_si128(curr, 2);
6544+ __m128i prev = _mm_insert_epi16(prv0, t1, 0);
6545+ __m128i next =
6546+ _mm_insert_epi16(nxt0, 3 * in_near[i + 8] + in_far[i + 8], 7);
6547+
6548+ // horizontal filter, polyphase implementation since it's convenient:
6549+ // even pixels = 3*cur + prev = cur*4 + (prev - cur)
6550+ // odd pixels = 3*cur + next = cur*4 + (next - cur)
6551+ // note the shared term.
6552+ __m128i bias = _mm_set1_epi16(8);
6553+ __m128i curs = _mm_slli_epi16(curr, 2);
6554+ __m128i prvd = _mm_sub_epi16(prev, curr);
6555+ __m128i nxtd = _mm_sub_epi16(next, curr);
6556+ __m128i curb = _mm_add_epi16(curs, bias);
6557+ __m128i even = _mm_add_epi16(prvd, curb);
6558+ __m128i odd = _mm_add_epi16(nxtd, curb);
6559+
6560+ // interleave even and odd pixels, then undo scaling.
6561+ __m128i int0 = _mm_unpacklo_epi16(even, odd);
6562+ __m128i int1 = _mm_unpackhi_epi16(even, odd);
6563+ __m128i de0 = _mm_srli_epi16(int0, 4);
6564+ __m128i de1 = _mm_srli_epi16(int1, 4);
6565+
6566+ // pack and write output
6567+ __m128i outv = _mm_packus_epi16(de0, de1);
6568+ _mm_storeu_si128((__m128i *)(out + i * 2), outv);
6569 #elif defined(STBI_NEON)
6570- // load and perform the vertical filtering pass
6571- // this uses 3*x + y = 4*x + (y - x)
6572- uint8x8_t farb = vld1_u8(in_far + i);
6573- uint8x8_t nearb = vld1_u8(in_near + i);
6574- int16x8_t diff = vreinterpretq_s16_u16(vsubl_u8(farb, nearb));
6575- int16x8_t nears = vreinterpretq_s16_u16(vshll_n_u8(nearb, 2));
6576- int16x8_t curr = vaddq_s16(nears, diff); // current row
6577-
6578- // horizontal filter works the same based on shifted vers of current
6579- // row. "prev" is current row shifted right by 1 pixel; we need to
6580- // insert the previous pixel value (from t1).
6581- // "next" is current row shifted left by 1 pixel, with first pixel
6582- // of next block of 8 pixels added in.
6583- int16x8_t prv0 = vextq_s16(curr, curr, 7);
6584- int16x8_t nxt0 = vextq_s16(curr, curr, 1);
6585- int16x8_t prev = vsetq_lane_s16(t1, prv0, 0);
6586- int16x8_t next = vsetq_lane_s16(3*in_near[i+8] + in_far[i+8], nxt0, 7);
6587-
6588- // horizontal filter, polyphase implementation since it's convenient:
6589- // even pixels = 3*cur + prev = cur*4 + (prev - cur)
6590- // odd pixels = 3*cur + next = cur*4 + (next - cur)
6591- // note the shared term.
6592- int16x8_t curs = vshlq_n_s16(curr, 2);
6593- int16x8_t prvd = vsubq_s16(prev, curr);
6594- int16x8_t nxtd = vsubq_s16(next, curr);
6595- int16x8_t even = vaddq_s16(curs, prvd);
6596- int16x8_t odd = vaddq_s16(curs, nxtd);
6597-
6598- // undo scaling and round, then store with even/odd phases interleaved
6599- uint8x8x2_t o;
6600- o.val[0] = vqrshrun_n_s16(even, 4);
6601- o.val[1] = vqrshrun_n_s16(odd, 4);
6602- vst2_u8(out + i*2, o);
6603-#endif
6604-
6605- // "previous" value for next iter
6606- t1 = 3*in_near[i+7] + in_far[i+7];
6607- }
6608-
6609- t0 = t1;
6610- t1 = 3*in_near[i] + in_far[i];
6611- out[i*2] = stbi__div16(3*t1 + t0 + 8);
6612-
6613- for (++i; i < w; ++i) {
6614- t0 = t1;
6615- t1 = 3*in_near[i]+in_far[i];
6616- out[i*2-1] = stbi__div16(3*t0 + t1 + 8);
6617- out[i*2 ] = stbi__div16(3*t1 + t0 + 8);
6618- }
6619- out[w*2-1] = stbi__div4(t1+2);
6620-
6621- STBI_NOTUSED(hs);
6622-
6623- return out;
6624-}
6625-#endif
6626-
6627-static stbi_uc *stbi__resample_row_generic(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
6628-{
6629- // resample with nearest-neighbor
6630- int i,j;
6631- STBI_NOTUSED(in_far);
6632- for (i=0; i < w; ++i)
6633- for (j=0; j < hs; ++j)
6634- out[i*hs+j] = in_near[i];
6635- return out;
6636+ // load and perform the vertical filtering pass
6637+ // this uses 3*x + y = 4*x + (y - x)
6638+ uint8x8_t farb = vld1_u8(in_far + i);
6639+ uint8x8_t nearb = vld1_u8(in_near + i);
6640+ int16x8_t diff = vreinterpretq_s16_u16(vsubl_u8(farb, nearb));
6641+ int16x8_t nears = vreinterpretq_s16_u16(vshll_n_u8(nearb, 2));
6642+ int16x8_t curr = vaddq_s16(nears, diff); // current row
6643+
6644+ // horizontal filter works the same based on shifted vers of current
6645+ // row. "prev" is current row shifted right by 1 pixel; we need to
6646+ // insert the previous pixel value (from t1).
6647+ // "next" is current row shifted left by 1 pixel, with first pixel
6648+ // of next block of 8 pixels added in.
6649+ int16x8_t prv0 = vextq_s16(curr, curr, 7);
6650+ int16x8_t nxt0 = vextq_s16(curr, curr, 1);
6651+ int16x8_t prev = vsetq_lane_s16(t1, prv0, 0);
6652+ int16x8_t next =
6653+ vsetq_lane_s16(3 * in_near[i + 8] + in_far[i + 8], nxt0, 7);
6654+
6655+ // horizontal filter, polyphase implementation since it's convenient:
6656+ // even pixels = 3*cur + prev = cur*4 + (prev - cur)
6657+ // odd pixels = 3*cur + next = cur*4 + (next - cur)
6658+ // note the shared term.
6659+ int16x8_t curs = vshlq_n_s16(curr, 2);
6660+ int16x8_t prvd = vsubq_s16(prev, curr);
6661+ int16x8_t nxtd = vsubq_s16(next, curr);
6662+ int16x8_t even = vaddq_s16(curs, prvd);
6663+ int16x8_t odd = vaddq_s16(curs, nxtd);
6664+
6665+ // undo scaling and round, then store with even/odd phases interleaved
6666+ uint8x8x2_t o;
6667+ o.val[0] = vqrshrun_n_s16(even, 4);
6668+ o.val[1] = vqrshrun_n_s16(odd, 4);
6669+ vst2_u8(out + i * 2, o);
6670+#endif
6671+
6672+ // "previous" value for next iter
6673+ t1 = 3 * in_near[i + 7] + in_far[i + 7];
6674+ }
6675+
6676+ t0 = t1;
6677+ t1 = 3 * in_near[i] + in_far[i];
6678+ out[i * 2] = stbi__div16(3 * t1 + t0 + 8);
6679+
6680+ for (++i; i < w; ++i) {
6681+ t0 = t1;
6682+ t1 = 3 * in_near[i] + in_far[i];
6683+ out[i * 2 - 1] = stbi__div16(3 * t0 + t1 + 8);
6684+ out[i * 2] = stbi__div16(3 * t1 + t0 + 8);
6685+ }
6686+ out[w * 2 - 1] = stbi__div4(t1 + 2);
6687+
6688+ STBI_NOTUSED(hs);
6689+
6690+ return out;
6691+}
6692+#endif
6693+
6694+static stbi_uc *
6695+stbi__resample_row_generic(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far,
6696+ int w, int hs)
6697+{
6698+ // resample with nearest-neighbor
6699+ int i, j;
6700+ STBI_NOTUSED(in_far);
6701+ for (i = 0; i < w; ++i) {
6702+ for (j = 0; j < hs; ++j) {
6703+ out[i * hs + j] = in_near[i];
6704+ }
6705+ }
6706+ return out;
6707 }
6708
6709 // this is a reduced-precision calculation of YCbCr-to-RGB introduced
6710 // to make sure the code produces the same results in both SIMD and scalar
6711-#define stbi__float2fixed(x) (((int) ((x) * 4096.0f + 0.5f)) << 8)
6712-static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step)
6713-{
6714- int i;
6715- for (i=0; i < count; ++i) {
6716- int y_fixed = (y[i] << 20) + (1<<19); // rounding
6717- int r,g,b;
6718- int cr = pcr[i] - 128;
6719- int cb = pcb[i] - 128;
6720- r = y_fixed + cr* stbi__float2fixed(1.40200f);
6721- g = y_fixed + (cr*-stbi__float2fixed(0.71414f)) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000);
6722- b = y_fixed + cb* stbi__float2fixed(1.77200f);
6723- r >>= 20;
6724- g >>= 20;
6725- b >>= 20;
6726- if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
6727- if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
6728- if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
6729- out[0] = (stbi_uc)r;
6730- out[1] = (stbi_uc)g;
6731- out[2] = (stbi_uc)b;
6732- out[3] = 255;
6733- out += step;
6734- }
6735+#define stbi__float2fixed(x) (((int)((x) * 4096.0f + 0.5f)) << 8)
6736+static void
6737+stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb,
6738+ const stbi_uc *pcr, int count, int step)
6739+{
6740+ int i;
6741+ for (i = 0; i < count; ++i) {
6742+ int y_fixed = (y[i] << 20) + (1 << 19); // rounding
6743+ int r, g, b;
6744+ int cr = pcr[i] - 128;
6745+ int cb = pcb[i] - 128;
6746+ r = y_fixed + cr * stbi__float2fixed(1.40200f);
6747+ g = y_fixed + (cr * -stbi__float2fixed(0.71414f)) +
6748+ ((cb * -stbi__float2fixed(0.34414f)) & 0xffff0000);
6749+ b = y_fixed + cb * stbi__float2fixed(1.77200f);
6750+ r >>= 20;
6751+ g >>= 20;
6752+ b >>= 20;
6753+ if ((unsigned)r > 255) {
6754+ if (r < 0) {
6755+ r = 0;
6756+ } else {
6757+ r = 255;
6758+ }
6759+ }
6760+ if ((unsigned)g > 255) {
6761+ if (g < 0) {
6762+ g = 0;
6763+ } else {
6764+ g = 255;
6765+ }
6766+ }
6767+ if ((unsigned)b > 255) {
6768+ if (b < 0) {
6769+ b = 0;
6770+ } else {
6771+ b = 255;
6772+ }
6773+ }
6774+ out[0] = (stbi_uc)r;
6775+ out[1] = (stbi_uc)g;
6776+ out[2] = (stbi_uc)b;
6777+ out[3] = 255;
6778+ out += step;
6779+ }
6780 }
6781
6782 #if defined(STBI_SSE2) || defined(STBI_NEON)
6783-static void stbi__YCbCr_to_RGB_simd(stbi_uc *out, stbi_uc const *y, stbi_uc const *pcb, stbi_uc const *pcr, int count, int step)
6784+static void
6785+stbi__YCbCr_to_RGB_simd(stbi_uc *out, stbi_uc const *y, stbi_uc const *pcb,
6786+ stbi_uc const *pcr, int count, int step)
6787 {
6788- int i = 0;
6789+ int i = 0;
6790
6791 #ifdef STBI_SSE2
6792- // step == 3 is pretty ugly on the final interleave, and i'm not convinced
6793- // it's useful in practice (you wouldn't use it for textures, for example).
6794- // so just accelerate step == 4 case.
6795- if (step == 4) {
6796- // this is a fairly straightforward implementation and not super-optimized.
6797- __m128i signflip = _mm_set1_epi8(-0x80);
6798- __m128i cr_const0 = _mm_set1_epi16( (short) ( 1.40200f*4096.0f+0.5f));
6799- __m128i cr_const1 = _mm_set1_epi16( - (short) ( 0.71414f*4096.0f+0.5f));
6800- __m128i cb_const0 = _mm_set1_epi16( - (short) ( 0.34414f*4096.0f+0.5f));
6801- __m128i cb_const1 = _mm_set1_epi16( (short) ( 1.77200f*4096.0f+0.5f));
6802- __m128i y_bias = _mm_set1_epi8((char) (unsigned char) 128);
6803- __m128i xw = _mm_set1_epi16(255); // alpha channel
6804-
6805- for (; i+7 < count; i += 8) {
6806- // load
6807- __m128i y_bytes = _mm_loadl_epi64((__m128i *) (y+i));
6808- __m128i cr_bytes = _mm_loadl_epi64((__m128i *) (pcr+i));
6809- __m128i cb_bytes = _mm_loadl_epi64((__m128i *) (pcb+i));
6810- __m128i cr_biased = _mm_xor_si128(cr_bytes, signflip); // -128
6811- __m128i cb_biased = _mm_xor_si128(cb_bytes, signflip); // -128
6812-
6813- // unpack to short (and left-shift cr, cb by 8)
6814- __m128i yw = _mm_unpacklo_epi8(y_bias, y_bytes);
6815- __m128i crw = _mm_unpacklo_epi8(_mm_setzero_si128(), cr_biased);
6816- __m128i cbw = _mm_unpacklo_epi8(_mm_setzero_si128(), cb_biased);
6817-
6818- // color transform
6819- __m128i yws = _mm_srli_epi16(yw, 4);
6820- __m128i cr0 = _mm_mulhi_epi16(cr_const0, crw);
6821- __m128i cb0 = _mm_mulhi_epi16(cb_const0, cbw);
6822- __m128i cb1 = _mm_mulhi_epi16(cbw, cb_const1);
6823- __m128i cr1 = _mm_mulhi_epi16(crw, cr_const1);
6824- __m128i rws = _mm_add_epi16(cr0, yws);
6825- __m128i gwt = _mm_add_epi16(cb0, yws);
6826- __m128i bws = _mm_add_epi16(yws, cb1);
6827- __m128i gws = _mm_add_epi16(gwt, cr1);
6828-
6829- // descale
6830- __m128i rw = _mm_srai_epi16(rws, 4);
6831- __m128i bw = _mm_srai_epi16(bws, 4);
6832- __m128i gw = _mm_srai_epi16(gws, 4);
6833-
6834- // back to byte, set up for transpose
6835- __m128i brb = _mm_packus_epi16(rw, bw);
6836- __m128i gxb = _mm_packus_epi16(gw, xw);
6837-
6838- // transpose to interleave channels
6839- __m128i t0 = _mm_unpacklo_epi8(brb, gxb);
6840- __m128i t1 = _mm_unpackhi_epi8(brb, gxb);
6841- __m128i o0 = _mm_unpacklo_epi16(t0, t1);
6842- __m128i o1 = _mm_unpackhi_epi16(t0, t1);
6843-
6844- // store
6845- _mm_storeu_si128((__m128i *) (out + 0), o0);
6846- _mm_storeu_si128((__m128i *) (out + 16), o1);
6847- out += 32;
6848- }
6849- }
6850+ // step == 3 is pretty ugly on the final interleave, and i'm not convinced
6851+ // it's useful in practice (you wouldn't use it for textures, for example).
6852+ // so just accelerate step == 4 case.
6853+ if (step == 4) {
6854+ // this is a fairly straightforward implementation and not
6855+ // super-optimized.
6856+ __m128i signflip = _mm_set1_epi8(-0x80);
6857+ __m128i cr_const0 = _mm_set1_epi16((short)(1.40200f * 4096.0f + 0.5f));
6858+ __m128i cr_const1 = _mm_set1_epi16(-(short)(0.71414f * 4096.0f + 0.5f));
6859+ __m128i cb_const0 = _mm_set1_epi16(-(short)(0.34414f * 4096.0f + 0.5f));
6860+ __m128i cb_const1 = _mm_set1_epi16((short)(1.77200f * 4096.0f + 0.5f));
6861+ __m128i y_bias = _mm_set1_epi8((char)(unsigned char)128);
6862+ __m128i xw = _mm_set1_epi16(255); // alpha channel
6863+
6864+ for (; i + 7 < count; i += 8) {
6865+ // load
6866+ __m128i y_bytes = _mm_loadl_epi64((__m128i *)(y + i));
6867+ __m128i cr_bytes = _mm_loadl_epi64((__m128i *)(pcr + i));
6868+ __m128i cb_bytes = _mm_loadl_epi64((__m128i *)(pcb + i));
6869+ __m128i cr_biased = _mm_xor_si128(cr_bytes, signflip); // -128
6870+ __m128i cb_biased = _mm_xor_si128(cb_bytes, signflip); // -128
6871+
6872+ // unpack to short (and left-shift cr, cb by 8)
6873+ __m128i yw = _mm_unpacklo_epi8(y_bias, y_bytes);
6874+ __m128i crw = _mm_unpacklo_epi8(_mm_setzero_si128(), cr_biased);
6875+ __m128i cbw = _mm_unpacklo_epi8(_mm_setzero_si128(), cb_biased);
6876+
6877+ // color transform
6878+ __m128i yws = _mm_srli_epi16(yw, 4);
6879+ __m128i cr0 = _mm_mulhi_epi16(cr_const0, crw);
6880+ __m128i cb0 = _mm_mulhi_epi16(cb_const0, cbw);
6881+ __m128i cb1 = _mm_mulhi_epi16(cbw, cb_const1);
6882+ __m128i cr1 = _mm_mulhi_epi16(crw, cr_const1);
6883+ __m128i rws = _mm_add_epi16(cr0, yws);
6884+ __m128i gwt = _mm_add_epi16(cb0, yws);
6885+ __m128i bws = _mm_add_epi16(yws, cb1);
6886+ __m128i gws = _mm_add_epi16(gwt, cr1);
6887+
6888+ // descale
6889+ __m128i rw = _mm_srai_epi16(rws, 4);
6890+ __m128i bw = _mm_srai_epi16(bws, 4);
6891+ __m128i gw = _mm_srai_epi16(gws, 4);
6892+
6893+ // back to byte, set up for transpose
6894+ __m128i brb = _mm_packus_epi16(rw, bw);
6895+ __m128i gxb = _mm_packus_epi16(gw, xw);
6896+
6897+ // transpose to interleave channels
6898+ __m128i t0 = _mm_unpacklo_epi8(brb, gxb);
6899+ __m128i t1 = _mm_unpackhi_epi8(brb, gxb);
6900+ __m128i o0 = _mm_unpacklo_epi16(t0, t1);
6901+ __m128i o1 = _mm_unpackhi_epi16(t0, t1);
6902+
6903+ // store
6904+ _mm_storeu_si128((__m128i *)(out + 0), o0);
6905+ _mm_storeu_si128((__m128i *)(out + 16), o1);
6906+ out += 32;
6907+ }
6908+ }
6909 #endif
6910
6911 #ifdef STBI_NEON
6912- // in this version, step=3 support would be easy to add. but is there demand?
6913- if (step == 4) {
6914- // this is a fairly straightforward implementation and not super-optimized.
6915- uint8x8_t signflip = vdup_n_u8(0x80);
6916- int16x8_t cr_const0 = vdupq_n_s16( (short) ( 1.40200f*4096.0f+0.5f));
6917- int16x8_t cr_const1 = vdupq_n_s16( - (short) ( 0.71414f*4096.0f+0.5f));
6918- int16x8_t cb_const0 = vdupq_n_s16( - (short) ( 0.34414f*4096.0f+0.5f));
6919- int16x8_t cb_const1 = vdupq_n_s16( (short) ( 1.77200f*4096.0f+0.5f));
6920-
6921- for (; i+7 < count; i += 8) {
6922- // load
6923- uint8x8_t y_bytes = vld1_u8(y + i);
6924- uint8x8_t cr_bytes = vld1_u8(pcr + i);
6925- uint8x8_t cb_bytes = vld1_u8(pcb + i);
6926- int8x8_t cr_biased = vreinterpret_s8_u8(vsub_u8(cr_bytes, signflip));
6927- int8x8_t cb_biased = vreinterpret_s8_u8(vsub_u8(cb_bytes, signflip));
6928-
6929- // expand to s16
6930- int16x8_t yws = vreinterpretq_s16_u16(vshll_n_u8(y_bytes, 4));
6931- int16x8_t crw = vshll_n_s8(cr_biased, 7);
6932- int16x8_t cbw = vshll_n_s8(cb_biased, 7);
6933-
6934- // color transform
6935- int16x8_t cr0 = vqdmulhq_s16(crw, cr_const0);
6936- int16x8_t cb0 = vqdmulhq_s16(cbw, cb_const0);
6937- int16x8_t cr1 = vqdmulhq_s16(crw, cr_const1);
6938- int16x8_t cb1 = vqdmulhq_s16(cbw, cb_const1);
6939- int16x8_t rws = vaddq_s16(yws, cr0);
6940- int16x8_t gws = vaddq_s16(vaddq_s16(yws, cb0), cr1);
6941- int16x8_t bws = vaddq_s16(yws, cb1);
6942-
6943- // undo scaling, round, convert to byte
6944- uint8x8x4_t o;
6945- o.val[0] = vqrshrun_n_s16(rws, 4);
6946- o.val[1] = vqrshrun_n_s16(gws, 4);
6947- o.val[2] = vqrshrun_n_s16(bws, 4);
6948- o.val[3] = vdup_n_u8(255);
6949-
6950- // store, interleaving r/g/b/a
6951- vst4_u8(out, o);
6952- out += 8*4;
6953- }
6954- }
6955-#endif
6956-
6957- for (; i < count; ++i) {
6958- int y_fixed = (y[i] << 20) + (1<<19); // rounding
6959- int r,g,b;
6960- int cr = pcr[i] - 128;
6961- int cb = pcb[i] - 128;
6962- r = y_fixed + cr* stbi__float2fixed(1.40200f);
6963- g = y_fixed + cr*-stbi__float2fixed(0.71414f) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000);
6964- b = y_fixed + cb* stbi__float2fixed(1.77200f);
6965- r >>= 20;
6966- g >>= 20;
6967- b >>= 20;
6968- if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
6969- if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
6970- if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
6971- out[0] = (stbi_uc)r;
6972- out[1] = (stbi_uc)g;
6973- out[2] = (stbi_uc)b;
6974- out[3] = 255;
6975- out += step;
6976- }
6977+ // in this version, step=3 support would be easy to add. but is there
6978+ // demand?
6979+ if (step == 4) {
6980+ // this is a fairly straightforward implementation and not
6981+ // super-optimized.
6982+ uint8x8_t signflip = vdup_n_u8(0x80);
6983+ int16x8_t cr_const0 = vdupq_n_s16((short)(1.40200f * 4096.0f + 0.5f));
6984+ int16x8_t cr_const1 = vdupq_n_s16(-(short)(0.71414f * 4096.0f + 0.5f));
6985+ int16x8_t cb_const0 = vdupq_n_s16(-(short)(0.34414f * 4096.0f + 0.5f));
6986+ int16x8_t cb_const1 = vdupq_n_s16((short)(1.77200f * 4096.0f + 0.5f));
6987+
6988+ for (; i + 7 < count; i += 8) {
6989+ // load
6990+ uint8x8_t y_bytes = vld1_u8(y + i);
6991+ uint8x8_t cr_bytes = vld1_u8(pcr + i);
6992+ uint8x8_t cb_bytes = vld1_u8(pcb + i);
6993+ int8x8_t cr_biased =
6994+ vreinterpret_s8_u8(vsub_u8(cr_bytes, signflip));
6995+ int8x8_t cb_biased =
6996+ vreinterpret_s8_u8(vsub_u8(cb_bytes, signflip));
6997+
6998+ // expand to s16
6999+ int16x8_t yws = vreinterpretq_s16_u16(vshll_n_u8(y_bytes, 4));
7000+ int16x8_t crw = vshll_n_s8(cr_biased, 7);
7001+ int16x8_t cbw = vshll_n_s8(cb_biased, 7);
7002+
7003+ // color transform
7004+ int16x8_t cr0 = vqdmulhq_s16(crw, cr_const0);
7005+ int16x8_t cb0 = vqdmulhq_s16(cbw, cb_const0);
7006+ int16x8_t cr1 = vqdmulhq_s16(crw, cr_const1);
7007+ int16x8_t cb1 = vqdmulhq_s16(cbw, cb_const1);
7008+ int16x8_t rws = vaddq_s16(yws, cr0);
7009+ int16x8_t gws = vaddq_s16(vaddq_s16(yws, cb0), cr1);
7010+ int16x8_t bws = vaddq_s16(yws, cb1);
7011+
7012+ // undo scaling, round, convert to byte
7013+ uint8x8x4_t o;
7014+ o.val[0] = vqrshrun_n_s16(rws, 4);
7015+ o.val[1] = vqrshrun_n_s16(gws, 4);
7016+ o.val[2] = vqrshrun_n_s16(bws, 4);
7017+ o.val[3] = vdup_n_u8(255);
7018+
7019+ // store, interleaving r/g/b/a
7020+ vst4_u8(out, o);
7021+ out += 8 * 4;
7022+ }
7023+ }
7024+#endif
7025+
7026+ for (; i < count; ++i) {
7027+ int y_fixed = (y[i] << 20) + (1 << 19); // rounding
7028+ int r, g, b;
7029+ int cr = pcr[i] - 128;
7030+ int cb = pcb[i] - 128;
7031+ r = y_fixed + cr * stbi__float2fixed(1.40200f);
7032+ g = y_fixed + cr * -stbi__float2fixed(0.71414f) +
7033+ ((cb * -stbi__float2fixed(0.34414f)) & 0xffff0000);
7034+ b = y_fixed + cb * stbi__float2fixed(1.77200f);
7035+ r >>= 20;
7036+ g >>= 20;
7037+ b >>= 20;
7038+ if ((unsigned)r > 255) {
7039+ if (r < 0) {
7040+ r = 0;
7041+ } else {
7042+ r = 255;
7043+ }
7044+ }
7045+ if ((unsigned)g > 255) {
7046+ if (g < 0) {
7047+ g = 0;
7048+ } else {
7049+ g = 255;
7050+ }
7051+ }
7052+ if ((unsigned)b > 255) {
7053+ if (b < 0) {
7054+ b = 0;
7055+ } else {
7056+ b = 255;
7057+ }
7058+ }
7059+ out[0] = (stbi_uc)r;
7060+ out[1] = (stbi_uc)g;
7061+ out[2] = (stbi_uc)b;
7062+ out[3] = 255;
7063+ out += step;
7064+ }
7065 }
7066 #endif
7067
7068 // set up the kernels
7069-static void stbi__setup_jpeg(stbi__jpeg *j)
7070+static void
7071+stbi__setup_jpeg(stbi__jpeg *j)
7072 {
7073- j->idct_block_kernel = stbi__idct_block;
7074- j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_row;
7075- j->resample_row_hv_2_kernel = stbi__resample_row_hv_2;
7076+ j->idct_block_kernel = stbi__idct_block;
7077+ j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_row;
7078+ j->resample_row_hv_2_kernel = stbi__resample_row_hv_2;
7079
7080 #ifdef STBI_SSE2
7081- if (stbi__sse2_available()) {
7082- j->idct_block_kernel = stbi__idct_simd;
7083- j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
7084- j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
7085- }
7086+ if (stbi__sse2_available()) {
7087+ j->idct_block_kernel = stbi__idct_simd;
7088+ j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
7089+ j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
7090+ }
7091 #endif
7092
7093 #ifdef STBI_NEON
7094- j->idct_block_kernel = stbi__idct_simd;
7095- j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
7096- j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
7097+ j->idct_block_kernel = stbi__idct_simd;
7098+ j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
7099+ j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
7100 #endif
7101 }
7102
7103 // clean up the temporary component buffers
7104-static void stbi__cleanup_jpeg(stbi__jpeg *j)
7105+static void
7106+stbi__cleanup_jpeg(stbi__jpeg *j)
7107 {
7108- stbi__free_jpeg_components(j, j->s->img_n, 0);
7109+ stbi__free_jpeg_components(j, j->s->img_n, 0);
7110 }
7111
7112-typedef struct
7113-{
7114- resample_row_func resample;
7115- stbi_uc *line0,*line1;
7116- int hs,vs; // expansion factor in each axis
7117- int w_lores; // horizontal pixels pre-expansion
7118- int ystep; // how far through vertical expansion we are
7119- int ypos; // which pre-expansion row we're on
7120+typedef struct {
7121+ resample_row_func resample;
7122+ stbi_uc *line0, *line1;
7123+ int hs, vs; // expansion factor in each axis
7124+ int w_lores; // horizontal pixels pre-expansion
7125+ int ystep; // how far through vertical expansion we are
7126+ int ypos; // which pre-expansion row we're on
7127 } stbi__resample;
7128
7129 // fast 0..255 * 0..255 => 0..255 rounded multiplication
7130-static stbi_uc stbi__blinn_8x8(stbi_uc x, stbi_uc y)
7131-{
7132- unsigned int t = x*y + 128;
7133- return (stbi_uc) ((t + (t >>8)) >> 8);
7134-}
7135-
7136-static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp, int req_comp)
7137-{
7138- int n, decode_n, is_rgb;
7139- z->s->img_n = 0; // make stbi__cleanup_jpeg safe
7140-
7141- // validate req_comp
7142- if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
7143-
7144- // load a jpeg image from whichever source, but leave in YCbCr format
7145- if (!stbi__decode_jpeg_image(z)) { stbi__cleanup_jpeg(z); return NULL; }
7146-
7147- // determine actual number of components to generate
7148- n = req_comp ? req_comp : z->s->img_n >= 3 ? 3 : 1;
7149-
7150- is_rgb = z->s->img_n == 3 && (z->rgb == 3 || (z->app14_color_transform == 0 && !z->jfif));
7151-
7152- if (z->s->img_n == 3 && n < 3 && !is_rgb)
7153- decode_n = 1;
7154- else
7155- decode_n = z->s->img_n;
7156-
7157- // nothing to do if no components requested; check this now to avoid
7158- // accessing uninitialized coutput[0] later
7159- if (decode_n <= 0) { stbi__cleanup_jpeg(z); return NULL; }
7160-
7161- // resample and color-convert
7162- {
7163- int k;
7164- unsigned int i,j;
7165- stbi_uc *output;
7166- stbi_uc *coutput[4] = { NULL, NULL, NULL, NULL };
7167-
7168- stbi__resample res_comp[4];
7169-
7170- for (k=0; k < decode_n; ++k) {
7171- stbi__resample *r = &res_comp[k];
7172-
7173- // allocate line buffer big enough for upsampling off the edges
7174- // with upsample factor of 4
7175- z->img_comp[k].linebuf = (stbi_uc *) stbi__malloc(z->s->img_x + 3);
7176- if (!z->img_comp[k].linebuf) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }
7177-
7178- r->hs = z->img_h_max / z->img_comp[k].h;
7179- r->vs = z->img_v_max / z->img_comp[k].v;
7180- r->ystep = r->vs >> 1;
7181- r->w_lores = (z->s->img_x + r->hs-1) / r->hs;
7182- r->ypos = 0;
7183- r->line0 = r->line1 = z->img_comp[k].data;
7184-
7185- if (r->hs == 1 && r->vs == 1) r->resample = resample_row_1;
7186- else if (r->hs == 1 && r->vs == 2) r->resample = stbi__resample_row_v_2;
7187- else if (r->hs == 2 && r->vs == 1) r->resample = stbi__resample_row_h_2;
7188- else if (r->hs == 2 && r->vs == 2) r->resample = z->resample_row_hv_2_kernel;
7189- else r->resample = stbi__resample_row_generic;
7190- }
7191-
7192- // can't error after this so, this is safe
7193- output = (stbi_uc *) stbi__malloc_mad3(n, z->s->img_x, z->s->img_y, 1);
7194- if (!output) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }
7195-
7196- // now go ahead and resample
7197- for (j=0; j < z->s->img_y; ++j) {
7198- stbi_uc *out = output + n * z->s->img_x * j;
7199- for (k=0; k < decode_n; ++k) {
7200- stbi__resample *r = &res_comp[k];
7201- int y_bot = r->ystep >= (r->vs >> 1);
7202- coutput[k] = r->resample(z->img_comp[k].linebuf,
7203- y_bot ? r->line1 : r->line0,
7204- y_bot ? r->line0 : r->line1,
7205- r->w_lores, r->hs);
7206- if (++r->ystep >= r->vs) {
7207- r->ystep = 0;
7208- r->line0 = r->line1;
7209- if (++r->ypos < z->img_comp[k].y)
7210- r->line1 += z->img_comp[k].w2;
7211- }
7212- }
7213- if (n >= 3) {
7214- stbi_uc *y = coutput[0];
7215- if (z->s->img_n == 3) {
7216- if (is_rgb) {
7217- for (i=0; i < z->s->img_x; ++i) {
7218- out[0] = y[i];
7219- out[1] = coutput[1][i];
7220- out[2] = coutput[2][i];
7221- out[3] = 255;
7222- out += n;
7223- }
7224- } else {
7225- z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
7226- }
7227- } else if (z->s->img_n == 4) {
7228- if (z->app14_color_transform == 0) { // CMYK
7229- for (i=0; i < z->s->img_x; ++i) {
7230- stbi_uc m = coutput[3][i];
7231- out[0] = stbi__blinn_8x8(coutput[0][i], m);
7232- out[1] = stbi__blinn_8x8(coutput[1][i], m);
7233- out[2] = stbi__blinn_8x8(coutput[2][i], m);
7234- out[3] = 255;
7235- out += n;
7236- }
7237- } else if (z->app14_color_transform == 2) { // YCCK
7238- z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
7239- for (i=0; i < z->s->img_x; ++i) {
7240- stbi_uc m = coutput[3][i];
7241- out[0] = stbi__blinn_8x8(255 - out[0], m);
7242- out[1] = stbi__blinn_8x8(255 - out[1], m);
7243- out[2] = stbi__blinn_8x8(255 - out[2], m);
7244- out += n;
7245- }
7246- } else { // YCbCr + alpha? Ignore the fourth channel for now
7247- z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
7248- }
7249- } else
7250- for (i=0; i < z->s->img_x; ++i) {
7251- out[0] = out[1] = out[2] = y[i];
7252- out[3] = 255; // not used if n==3
7253- out += n;
7254- }
7255- } else {
7256- if (is_rgb) {
7257- if (n == 1)
7258- for (i=0; i < z->s->img_x; ++i)
7259- *out++ = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);
7260- else {
7261- for (i=0; i < z->s->img_x; ++i, out += 2) {
7262- out[0] = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);
7263- out[1] = 255;
7264- }
7265- }
7266- } else if (z->s->img_n == 4 && z->app14_color_transform == 0) {
7267- for (i=0; i < z->s->img_x; ++i) {
7268- stbi_uc m = coutput[3][i];
7269- stbi_uc r = stbi__blinn_8x8(coutput[0][i], m);
7270- stbi_uc g = stbi__blinn_8x8(coutput[1][i], m);
7271- stbi_uc b = stbi__blinn_8x8(coutput[2][i], m);
7272- out[0] = stbi__compute_y(r, g, b);
7273- out[1] = 255;
7274- out += n;
7275- }
7276- } else if (z->s->img_n == 4 && z->app14_color_transform == 2) {
7277- for (i=0; i < z->s->img_x; ++i) {
7278- out[0] = stbi__blinn_8x8(255 - coutput[0][i], coutput[3][i]);
7279- out[1] = 255;
7280- out += n;
7281- }
7282- } else {
7283- stbi_uc *y = coutput[0];
7284- if (n == 1)
7285- for (i=0; i < z->s->img_x; ++i) out[i] = y[i];
7286- else
7287- for (i=0; i < z->s->img_x; ++i) { *out++ = y[i]; *out++ = 255; }
7288- }
7289- }
7290- }
7291- stbi__cleanup_jpeg(z);
7292- *out_x = z->s->img_x;
7293- *out_y = z->s->img_y;
7294- if (comp) *comp = z->s->img_n >= 3 ? 3 : 1; // report original components, not output
7295- return output;
7296- }
7297-}
7298-
7299-static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
7300-{
7301- unsigned char* result;
7302- stbi__jpeg* j = (stbi__jpeg*) stbi__malloc(sizeof(stbi__jpeg));
7303- if (!j) return stbi__errpuc("outofmem", "Out of memory");
7304- memset(j, 0, sizeof(stbi__jpeg));
7305- STBI_NOTUSED(ri);
7306- j->s = s;
7307- stbi__setup_jpeg(j);
7308- result = load_jpeg_image(j, x,y,comp,req_comp);
7309- STBI_FREE(j);
7310- return result;
7311-}
7312-
7313-static int stbi__jpeg_test(stbi__context *s)
7314-{
7315- int r;
7316- stbi__jpeg* j = (stbi__jpeg*)stbi__malloc(sizeof(stbi__jpeg));
7317- if (!j) return stbi__err("outofmem", "Out of memory");
7318- memset(j, 0, sizeof(stbi__jpeg));
7319- j->s = s;
7320- stbi__setup_jpeg(j);
7321- r = stbi__decode_jpeg_header(j, STBI__SCAN_type);
7322- stbi__rewind(s);
7323- STBI_FREE(j);
7324- return r;
7325-}
7326-
7327-static int stbi__jpeg_info_raw(stbi__jpeg *j, int *x, int *y, int *comp)
7328-{
7329- if (!stbi__decode_jpeg_header(j, STBI__SCAN_header)) {
7330- stbi__rewind( j->s );
7331- return 0;
7332- }
7333- if (x) *x = j->s->img_x;
7334- if (y) *y = j->s->img_y;
7335- if (comp) *comp = j->s->img_n >= 3 ? 3 : 1;
7336- return 1;
7337-}
7338-
7339-static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp)
7340-{
7341- int result;
7342- stbi__jpeg* j = (stbi__jpeg*) (stbi__malloc(sizeof(stbi__jpeg)));
7343- if (!j) return stbi__err("outofmem", "Out of memory");
7344- memset(j, 0, sizeof(stbi__jpeg));
7345- j->s = s;
7346- result = stbi__jpeg_info_raw(j, x, y, comp);
7347- STBI_FREE(j);
7348- return result;
7349+static stbi_uc
7350+stbi__blinn_8x8(stbi_uc x, stbi_uc y)
7351+{
7352+ unsigned int t = x * y + 128;
7353+ return (stbi_uc)((t + (t >> 8)) >> 8);
7354+}
7355+
7356+static stbi_uc *
7357+load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp, int req_comp)
7358+{
7359+ int n, decode_n, is_rgb;
7360+ z->s->img_n = 0; // make stbi__cleanup_jpeg safe
7361+
7362+ // validate req_comp
7363+ if (req_comp < 0 || req_comp > 4) {
7364+ return stbi__errpuc("bad req_comp", "Internal error");
7365+ }
7366+
7367+ // load a jpeg image from whichever source, but leave in YCbCr format
7368+ if (!stbi__decode_jpeg_image(z)) {
7369+ stbi__cleanup_jpeg(z);
7370+ return NULL;
7371+ }
7372+
7373+ // determine actual number of components to generate
7374+ n = req_comp ? req_comp : z->s->img_n >= 3 ? 3 : 1;
7375+
7376+ is_rgb = z->s->img_n == 3 &&
7377+ (z->rgb == 3 || (z->app14_color_transform == 0 && !z->jfif));
7378+
7379+ if (z->s->img_n == 3 && n < 3 && !is_rgb) {
7380+ decode_n = 1;
7381+ } else {
7382+ decode_n = z->s->img_n;
7383+ }
7384+
7385+ // nothing to do if no components requested; check this now to avoid
7386+ // accessing uninitialized coutput[0] later
7387+ if (decode_n <= 0) {
7388+ stbi__cleanup_jpeg(z);
7389+ return NULL;
7390+ }
7391+
7392+ // resample and color-convert
7393+ {
7394+ int k;
7395+ unsigned int i, j;
7396+ stbi_uc *output;
7397+ stbi_uc *coutput[4] = {NULL, NULL, NULL, NULL};
7398+
7399+ stbi__resample res_comp[4];
7400+
7401+ for (k = 0; k < decode_n; ++k) {
7402+ stbi__resample *r = &res_comp[k];
7403+
7404+ // allocate line buffer big enough for upsampling off the edges
7405+ // with upsample factor of 4
7406+ z->img_comp[k].linebuf = (stbi_uc *)stbi__malloc(z->s->img_x + 3);
7407+ if (!z->img_comp[k].linebuf) {
7408+ stbi__cleanup_jpeg(z);
7409+ return stbi__errpuc("outofmem", "Out of memory");
7410+ }
7411+
7412+ r->hs = z->img_h_max / z->img_comp[k].h;
7413+ r->vs = z->img_v_max / z->img_comp[k].v;
7414+ r->ystep = r->vs >> 1;
7415+ r->w_lores = (z->s->img_x + r->hs - 1) / r->hs;
7416+ r->ypos = 0;
7417+ r->line0 = r->line1 = z->img_comp[k].data;
7418+
7419+ if (r->hs == 1 && r->vs == 1) {
7420+ r->resample = resample_row_1;
7421+ } else if (r->hs == 1 && r->vs == 2) {
7422+ r->resample = stbi__resample_row_v_2;
7423+ } else if (r->hs == 2 && r->vs == 1) {
7424+ r->resample = stbi__resample_row_h_2;
7425+ } else if (r->hs == 2 && r->vs == 2) {
7426+ r->resample = z->resample_row_hv_2_kernel;
7427+ } else {
7428+ r->resample = stbi__resample_row_generic;
7429+ }
7430+ }
7431+
7432+ // can't error after this so, this is safe
7433+ output = (stbi_uc *)stbi__malloc_mad3(n, z->s->img_x, z->s->img_y, 1);
7434+ if (!output) {
7435+ stbi__cleanup_jpeg(z);
7436+ return stbi__errpuc("outofmem", "Out of memory");
7437+ }
7438+
7439+ // now go ahead and resample
7440+ for (j = 0; j < z->s->img_y; ++j) {
7441+ stbi_uc *out = output + n * z->s->img_x * j;
7442+ for (k = 0; k < decode_n; ++k) {
7443+ stbi__resample *r = &res_comp[k];
7444+ int y_bot = r->ystep >= (r->vs >> 1);
7445+ coutput[k] = r->resample(
7446+ z->img_comp[k].linebuf, y_bot ? r->line1 : r->line0,
7447+ y_bot ? r->line0 : r->line1, r->w_lores, r->hs);
7448+ if (++r->ystep >= r->vs) {
7449+ r->ystep = 0;
7450+ r->line0 = r->line1;
7451+ if (++r->ypos < z->img_comp[k].y) {
7452+ r->line1 += z->img_comp[k].w2;
7453+ }
7454+ }
7455+ }
7456+ if (n >= 3) {
7457+ stbi_uc *y = coutput[0];
7458+ if (z->s->img_n == 3) {
7459+ if (is_rgb) {
7460+ for (i = 0; i < z->s->img_x; ++i) {
7461+ out[0] = y[i];
7462+ out[1] = coutput[1][i];
7463+ out[2] = coutput[2][i];
7464+ out[3] = 255;
7465+ out += n;
7466+ }
7467+ } else {
7468+ z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2],
7469+ z->s->img_x, n);
7470+ }
7471+ } else if (z->s->img_n == 4) {
7472+ if (z->app14_color_transform == 0) { // CMYK
7473+ for (i = 0; i < z->s->img_x; ++i) {
7474+ stbi_uc m = coutput[3][i];
7475+ out[0] = stbi__blinn_8x8(coutput[0][i], m);
7476+ out[1] = stbi__blinn_8x8(coutput[1][i], m);
7477+ out[2] = stbi__blinn_8x8(coutput[2][i], m);
7478+ out[3] = 255;
7479+ out += n;
7480+ }
7481+ } else if (z->app14_color_transform == 2) { // YCCK
7482+ z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2],
7483+ z->s->img_x, n);
7484+ for (i = 0; i < z->s->img_x; ++i) {
7485+ stbi_uc m = coutput[3][i];
7486+ out[0] = stbi__blinn_8x8(255 - out[0], m);
7487+ out[1] = stbi__blinn_8x8(255 - out[1], m);
7488+ out[2] = stbi__blinn_8x8(255 - out[2], m);
7489+ out += n;
7490+ }
7491+ } else { // YCbCr + alpha? Ignore the fourth channel for
7492+ // now
7493+ z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2],
7494+ z->s->img_x, n);
7495+ }
7496+ } else {
7497+ for (i = 0; i < z->s->img_x; ++i) {
7498+ out[0] = out[1] = out[2] = y[i];
7499+ out[3] = 255; // not used if n==3
7500+ out += n;
7501+ }
7502+ }
7503+ } else {
7504+ if (is_rgb) {
7505+ if (n == 1) {
7506+ for (i = 0; i < z->s->img_x; ++i) {
7507+ *out++ = stbi__compute_y(
7508+ coutput[0][i], coutput[1][i], coutput[2][i]);
7509+ }
7510+ } else {
7511+ for (i = 0; i < z->s->img_x; ++i, out += 2) {
7512+ out[0] = stbi__compute_y(
7513+ coutput[0][i], coutput[1][i], coutput[2][i]);
7514+ out[1] = 255;
7515+ }
7516+ }
7517+ } else if (z->s->img_n == 4 && z->app14_color_transform == 0) {
7518+ for (i = 0; i < z->s->img_x; ++i) {
7519+ stbi_uc m = coutput[3][i];
7520+ stbi_uc r = stbi__blinn_8x8(coutput[0][i], m);
7521+ stbi_uc g = stbi__blinn_8x8(coutput[1][i], m);
7522+ stbi_uc b = stbi__blinn_8x8(coutput[2][i], m);
7523+ out[0] = stbi__compute_y(r, g, b);
7524+ out[1] = 255;
7525+ out += n;
7526+ }
7527+ } else if (z->s->img_n == 4 && z->app14_color_transform == 2) {
7528+ for (i = 0; i < z->s->img_x; ++i) {
7529+ out[0] =
7530+ stbi__blinn_8x8(255 - coutput[0][i], coutput[3][i]);
7531+ out[1] = 255;
7532+ out += n;
7533+ }
7534+ } else {
7535+ stbi_uc *y = coutput[0];
7536+ if (n == 1) {
7537+ for (i = 0; i < z->s->img_x; ++i) {
7538+ out[i] = y[i];
7539+ }
7540+ } else {
7541+ for (i = 0; i < z->s->img_x; ++i) {
7542+ *out++ = y[i];
7543+ *out++ = 255;
7544+ }
7545+ }
7546+ }
7547+ }
7548+ }
7549+ stbi__cleanup_jpeg(z);
7550+ *out_x = z->s->img_x;
7551+ *out_y = z->s->img_y;
7552+ if (comp) {
7553+ *comp = z->s->img_n >= 3
7554+ ? 3
7555+ : 1; // report original components, not output
7556+ }
7557+ return output;
7558+ }
7559+}
7560+
7561+static void *
7562+stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp,
7563+ stbi__result_info *ri)
7564+{
7565+ unsigned char *result;
7566+ stbi__jpeg *j = (stbi__jpeg *)stbi__malloc(sizeof(stbi__jpeg));
7567+ if (!j) {
7568+ return stbi__errpuc("outofmem", "Out of memory");
7569+ }
7570+ memset(j, 0, sizeof(stbi__jpeg));
7571+ STBI_NOTUSED(ri);
7572+ j->s = s;
7573+ stbi__setup_jpeg(j);
7574+ result = load_jpeg_image(j, x, y, comp, req_comp);
7575+ STBI_FREE(j);
7576+ return result;
7577+}
7578+
7579+static int
7580+stbi__jpeg_test(stbi__context *s)
7581+{
7582+ int r;
7583+ stbi__jpeg *j = (stbi__jpeg *)stbi__malloc(sizeof(stbi__jpeg));
7584+ if (!j) {
7585+ return stbi__err("outofmem", "Out of memory");
7586+ }
7587+ memset(j, 0, sizeof(stbi__jpeg));
7588+ j->s = s;
7589+ stbi__setup_jpeg(j);
7590+ r = stbi__decode_jpeg_header(j, STBI__SCAN_type);
7591+ stbi__rewind(s);
7592+ STBI_FREE(j);
7593+ return r;
7594+}
7595+
7596+static int
7597+stbi__jpeg_info_raw(stbi__jpeg *j, int *x, int *y, int *comp)
7598+{
7599+ if (!stbi__decode_jpeg_header(j, STBI__SCAN_header)) {
7600+ stbi__rewind(j->s);
7601+ return 0;
7602+ }
7603+ if (x) {
7604+ *x = j->s->img_x;
7605+ }
7606+ if (y) {
7607+ *y = j->s->img_y;
7608+ }
7609+ if (comp) {
7610+ *comp = j->s->img_n >= 3 ? 3 : 1;
7611+ }
7612+ return 1;
7613+}
7614+
7615+static int
7616+stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp)
7617+{
7618+ int result;
7619+ stbi__jpeg *j = (stbi__jpeg *)(stbi__malloc(sizeof(stbi__jpeg)));
7620+ if (!j) {
7621+ return stbi__err("outofmem", "Out of memory");
7622+ }
7623+ memset(j, 0, sizeof(stbi__jpeg));
7624+ j->s = s;
7625+ result = stbi__jpeg_info_raw(j, x, y, comp);
7626+ STBI_FREE(j);
7627+ return result;
7628 }
7629 #endif
7630
7631@@ -4088,84 +5057,92 @@ static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp)
7632 #ifndef STBI_NO_ZLIB
7633
7634 // fast-way is faster to check than jpeg huffman, but slow way is slower
7635-#define STBI__ZFAST_BITS 9 // accelerate all cases in default tables
7636-#define STBI__ZFAST_MASK ((1 << STBI__ZFAST_BITS) - 1)
7637+#define STBI__ZFAST_BITS 9 // accelerate all cases in default tables
7638+#define STBI__ZFAST_MASK ((1 << STBI__ZFAST_BITS) - 1)
7639 #define STBI__ZNSYMS 288 // number of symbols in literal/length alphabet
7640
7641 // zlib-style huffman encoding
7642 // (jpegs packs from left, zlib from right, so can't share code)
7643-typedef struct
7644-{
7645- stbi__uint16 fast[1 << STBI__ZFAST_BITS];
7646- stbi__uint16 firstcode[16];
7647- int maxcode[17];
7648- stbi__uint16 firstsymbol[16];
7649- stbi_uc size[STBI__ZNSYMS];
7650- stbi__uint16 value[STBI__ZNSYMS];
7651+typedef struct {
7652+ stbi__uint16 fast[1 << STBI__ZFAST_BITS];
7653+ stbi__uint16 firstcode[16];
7654+ int maxcode[17];
7655+ stbi__uint16 firstsymbol[16];
7656+ stbi_uc size[STBI__ZNSYMS];
7657+ stbi__uint16 value[STBI__ZNSYMS];
7658 } stbi__zhuffman;
7659
7660-stbi_inline static int stbi__bitreverse16(int n)
7661-{
7662- n = ((n & 0xAAAA) >> 1) | ((n & 0x5555) << 1);
7663- n = ((n & 0xCCCC) >> 2) | ((n & 0x3333) << 2);
7664- n = ((n & 0xF0F0) >> 4) | ((n & 0x0F0F) << 4);
7665- n = ((n & 0xFF00) >> 8) | ((n & 0x00FF) << 8);
7666- return n;
7667-}
7668-
7669-stbi_inline static int stbi__bit_reverse(int v, int bits)
7670-{
7671- STBI_ASSERT(bits <= 16);
7672- // to bit reverse n bits, reverse 16 and shift
7673- // e.g. 11 bits, bit reverse and shift away 5
7674- return stbi__bitreverse16(v) >> (16-bits);
7675-}
7676-
7677-static int stbi__zbuild_huffman(stbi__zhuffman *z, const stbi_uc *sizelist, int num)
7678-{
7679- int i,k=0;
7680- int code, next_code[16], sizes[17];
7681-
7682- // DEFLATE spec for generating codes
7683- memset(sizes, 0, sizeof(sizes));
7684- memset(z->fast, 0, sizeof(z->fast));
7685- for (i=0; i < num; ++i)
7686- ++sizes[sizelist[i]];
7687- sizes[0] = 0;
7688- for (i=1; i < 16; ++i)
7689- if (sizes[i] > (1 << i))
7690- return stbi__err("bad sizes", "Corrupt PNG");
7691- code = 0;
7692- for (i=1; i < 16; ++i) {
7693- next_code[i] = code;
7694- z->firstcode[i] = (stbi__uint16) code;
7695- z->firstsymbol[i] = (stbi__uint16) k;
7696- code = (code + sizes[i]);
7697- if (sizes[i])
7698- if (code-1 >= (1 << i)) return stbi__err("bad codelengths","Corrupt PNG");
7699- z->maxcode[i] = code << (16-i); // preshift for inner loop
7700- code <<= 1;
7701- k += sizes[i];
7702- }
7703- z->maxcode[16] = 0x10000; // sentinel
7704- for (i=0; i < num; ++i) {
7705- int s = sizelist[i];
7706- if (s) {
7707- int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s];
7708- stbi__uint16 fastv = (stbi__uint16) ((s << 9) | i);
7709- z->size [c] = (stbi_uc ) s;
7710- z->value[c] = (stbi__uint16) i;
7711- if (s <= STBI__ZFAST_BITS) {
7712- int j = stbi__bit_reverse(next_code[s],s);
7713- while (j < (1 << STBI__ZFAST_BITS)) {
7714- z->fast[j] = fastv;
7715- j += (1 << s);
7716- }
7717- }
7718- ++next_code[s];
7719- }
7720- }
7721- return 1;
7722+stbi_inline static int
7723+stbi__bitreverse16(int n)
7724+{
7725+ n = ((n & 0xAAAA) >> 1) | ((n & 0x5555) << 1);
7726+ n = ((n & 0xCCCC) >> 2) | ((n & 0x3333) << 2);
7727+ n = ((n & 0xF0F0) >> 4) | ((n & 0x0F0F) << 4);
7728+ n = ((n & 0xFF00) >> 8) | ((n & 0x00FF) << 8);
7729+ return n;
7730+}
7731+
7732+stbi_inline static int
7733+stbi__bit_reverse(int v, int bits)
7734+{
7735+ STBI_ASSERT(bits <= 16);
7736+ // to bit reverse n bits, reverse 16 and shift
7737+ // e.g. 11 bits, bit reverse and shift away 5
7738+ return stbi__bitreverse16(v) >> (16 - bits);
7739+}
7740+
7741+static int
7742+stbi__zbuild_huffman(stbi__zhuffman *z, const stbi_uc *sizelist, int num)
7743+{
7744+ int i, k = 0;
7745+ int code, next_code[16], sizes[17];
7746+
7747+ // DEFLATE spec for generating codes
7748+ memset(sizes, 0, sizeof(sizes));
7749+ memset(z->fast, 0, sizeof(z->fast));
7750+ for (i = 0; i < num; ++i) {
7751+ ++sizes[sizelist[i]];
7752+ }
7753+ sizes[0] = 0;
7754+ for (i = 1; i < 16; ++i) {
7755+ if (sizes[i] > (1 << i)) {
7756+ return stbi__err("bad sizes", "Corrupt PNG");
7757+ }
7758+ }
7759+ code = 0;
7760+ for (i = 1; i < 16; ++i) {
7761+ next_code[i] = code;
7762+ z->firstcode[i] = (stbi__uint16)code;
7763+ z->firstsymbol[i] = (stbi__uint16)k;
7764+ code = (code + sizes[i]);
7765+ if (sizes[i]) {
7766+ if (code - 1 >= (1 << i)) {
7767+ return stbi__err("bad codelengths", "Corrupt PNG");
7768+ }
7769+ }
7770+ z->maxcode[i] = code << (16 - i); // preshift for inner loop
7771+ code <<= 1;
7772+ k += sizes[i];
7773+ }
7774+ z->maxcode[16] = 0x10000; // sentinel
7775+ for (i = 0; i < num; ++i) {
7776+ int s = sizelist[i];
7777+ if (s) {
7778+ int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s];
7779+ stbi__uint16 fastv = (stbi__uint16)((s << 9) | i);
7780+ z->size[c] = (stbi_uc)s;
7781+ z->value[c] = (stbi__uint16)i;
7782+ if (s <= STBI__ZFAST_BITS) {
7783+ int j = stbi__bit_reverse(next_code[s], s);
7784+ while (j < (1 << STBI__ZFAST_BITS)) {
7785+ z->fast[j] = fastv;
7786+ j += (1 << s);
7787+ }
7788+ }
7789+ ++next_code[s];
7790+ }
7791+ }
7792+ return 1;
7793 }
7794
7795 // zlib-from-memory implementation for PNG reading
7796@@ -4174,297 +5151,397 @@ static int stbi__zbuild_huffman(stbi__zhuffman *z, const stbi_uc *sizelist, int
7797 // we require PNG read all the IDATs and combine them into a single
7798 // memory buffer
7799
7800-typedef struct
7801-{
7802- stbi_uc *zbuffer, *zbuffer_end;
7803- int num_bits;
7804- int hit_zeof_once;
7805- stbi__uint32 code_buffer;
7806+typedef struct {
7807+ stbi_uc *zbuffer, *zbuffer_end;
7808+ int num_bits;
7809+ int hit_zeof_once;
7810+ stbi__uint32 code_buffer;
7811
7812- char *zout;
7813- char *zout_start;
7814- char *zout_end;
7815- int z_expandable;
7816+ char *zout;
7817+ char *zout_start;
7818+ char *zout_end;
7819+ int z_expandable;
7820
7821- stbi__zhuffman z_length, z_distance;
7822+ stbi__zhuffman z_length, z_distance;
7823 } stbi__zbuf;
7824
7825-stbi_inline static int stbi__zeof(stbi__zbuf *z)
7826-{
7827- return (z->zbuffer >= z->zbuffer_end);
7828-}
7829-
7830-stbi_inline static stbi_uc stbi__zget8(stbi__zbuf *z)
7831-{
7832- return stbi__zeof(z) ? 0 : *z->zbuffer++;
7833-}
7834-
7835-static void stbi__fill_bits(stbi__zbuf *z)
7836-{
7837- do {
7838- if (z->code_buffer >= (1U << z->num_bits)) {
7839- z->zbuffer = z->zbuffer_end; /* treat this as EOF so we fail. */
7840- return;
7841- }
7842- z->code_buffer |= (unsigned int) stbi__zget8(z) << z->num_bits;
7843- z->num_bits += 8;
7844- } while (z->num_bits <= 24);
7845-}
7846-
7847-stbi_inline static unsigned int stbi__zreceive(stbi__zbuf *z, int n)
7848-{
7849- unsigned int k;
7850- if (z->num_bits < n) stbi__fill_bits(z);
7851- k = z->code_buffer & ((1 << n) - 1);
7852- z->code_buffer >>= n;
7853- z->num_bits -= n;
7854- return k;
7855-}
7856-
7857-static int stbi__zhuffman_decode_slowpath(stbi__zbuf *a, stbi__zhuffman *z)
7858-{
7859- int b,s,k;
7860- // not resolved by fast table, so compute it the slow way
7861- // use jpeg approach, which requires MSbits at top
7862- k = stbi__bit_reverse(a->code_buffer, 16);
7863- for (s=STBI__ZFAST_BITS+1; ; ++s)
7864- if (k < z->maxcode[s])
7865- break;
7866- if (s >= 16) return -1; // invalid code!
7867- // code size is s, so:
7868- b = (k >> (16-s)) - z->firstcode[s] + z->firstsymbol[s];
7869- if (b >= STBI__ZNSYMS) return -1; // some data was corrupt somewhere!
7870- if (z->size[b] != s) return -1; // was originally an assert, but report failure instead.
7871- a->code_buffer >>= s;
7872- a->num_bits -= s;
7873- return z->value[b];
7874-}
7875-
7876-stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z)
7877-{
7878- int b,s;
7879- if (a->num_bits < 16) {
7880- if (stbi__zeof(a)) {
7881- if (!a->hit_zeof_once) {
7882- // This is the first time we hit eof, insert 16 extra padding btis
7883- // to allow us to keep going; if we actually consume any of them
7884- // though, that is invalid data. This is caught later.
7885- a->hit_zeof_once = 1;
7886- a->num_bits += 16; // add 16 implicit zero bits
7887- } else {
7888- // We already inserted our extra 16 padding bits and are again
7889- // out, this stream is actually prematurely terminated.
7890- return -1;
7891- }
7892- } else {
7893- stbi__fill_bits(a);
7894- }
7895- }
7896- b = z->fast[a->code_buffer & STBI__ZFAST_MASK];
7897- if (b) {
7898- s = b >> 9;
7899- a->code_buffer >>= s;
7900- a->num_bits -= s;
7901- return b & 511;
7902- }
7903- return stbi__zhuffman_decode_slowpath(a, z);
7904-}
7905-
7906-static int stbi__zexpand(stbi__zbuf *z, char *zout, int n) // need to make room for n bytes
7907-{
7908- char *q;
7909- unsigned int cur, limit, old_limit;
7910- z->zout = zout;
7911- if (!z->z_expandable) return stbi__err("output buffer limit","Corrupt PNG");
7912- cur = (unsigned int) (z->zout - z->zout_start);
7913- limit = old_limit = (unsigned) (z->zout_end - z->zout_start);
7914- if (UINT_MAX - cur < (unsigned) n) return stbi__err("outofmem", "Out of memory");
7915- while (cur + n > limit) {
7916- if(limit > UINT_MAX / 2) return stbi__err("outofmem", "Out of memory");
7917- limit *= 2;
7918- }
7919- q = (char *) STBI_REALLOC_SIZED(z->zout_start, old_limit, limit);
7920- STBI_NOTUSED(old_limit);
7921- if (q == NULL) return stbi__err("outofmem", "Out of memory");
7922- z->zout_start = q;
7923- z->zout = q + cur;
7924- z->zout_end = q + limit;
7925- return 1;
7926+stbi_inline static int
7927+stbi__zeof(stbi__zbuf *z)
7928+{
7929+ return (z->zbuffer >= z->zbuffer_end);
7930+}
7931+
7932+stbi_inline static stbi_uc
7933+stbi__zget8(stbi__zbuf *z)
7934+{
7935+ return stbi__zeof(z) ? 0 : *z->zbuffer++;
7936+}
7937+
7938+static void
7939+stbi__fill_bits(stbi__zbuf *z)
7940+{
7941+ do {
7942+ if (z->code_buffer >= (1U << z->num_bits)) {
7943+ z->zbuffer = z->zbuffer_end; /* treat this as EOF so we fail. */
7944+ return;
7945+ }
7946+ z->code_buffer |= (unsigned int)stbi__zget8(z) << z->num_bits;
7947+ z->num_bits += 8;
7948+ } while (z->num_bits <= 24);
7949+}
7950+
7951+stbi_inline static unsigned int
7952+stbi__zreceive(stbi__zbuf *z, int n)
7953+{
7954+ unsigned int k;
7955+ if (z->num_bits < n) {
7956+ stbi__fill_bits(z);
7957+ }
7958+ k = z->code_buffer & ((1 << n) - 1);
7959+ z->code_buffer >>= n;
7960+ z->num_bits -= n;
7961+ return k;
7962+}
7963+
7964+static int
7965+stbi__zhuffman_decode_slowpath(stbi__zbuf *a, stbi__zhuffman *z)
7966+{
7967+ int b, s, k;
7968+ // not resolved by fast table, so compute it the slow way
7969+ // use jpeg approach, which requires MSbits at top
7970+ k = stbi__bit_reverse(a->code_buffer, 16);
7971+ for (s = STBI__ZFAST_BITS + 1;; ++s) {
7972+ if (k < z->maxcode[s]) {
7973+ break;
7974+ }
7975+ }
7976+ if (s >= 16) {
7977+ return -1; // invalid code!
7978+ }
7979+ // code size is s, so:
7980+ b = (k >> (16 - s)) - z->firstcode[s] + z->firstsymbol[s];
7981+ if (b >= STBI__ZNSYMS) {
7982+ return -1; // some data was corrupt somewhere!
7983+ }
7984+ if (z->size[b] != s) {
7985+ return -1; // was originally an assert, but report failure instead.
7986+ }
7987+ a->code_buffer >>= s;
7988+ a->num_bits -= s;
7989+ return z->value[b];
7990+}
7991+
7992+stbi_inline static int
7993+stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z)
7994+{
7995+ int b, s;
7996+ if (a->num_bits < 16) {
7997+ if (stbi__zeof(a)) {
7998+ if (!a->hit_zeof_once) {
7999+ // This is the first time we hit eof, insert 16 extra padding
8000+ // btis to allow us to keep going; if we actually consume any of
8001+ // them though, that is invalid data. This is caught later.
8002+ a->hit_zeof_once = 1;
8003+ a->num_bits += 16; // add 16 implicit zero bits
8004+ } else {
8005+ // We already inserted our extra 16 padding bits and are again
8006+ // out, this stream is actually prematurely terminated.
8007+ return -1;
8008+ }
8009+ } else {
8010+ stbi__fill_bits(a);
8011+ }
8012+ }
8013+ b = z->fast[a->code_buffer & STBI__ZFAST_MASK];
8014+ if (b) {
8015+ s = b >> 9;
8016+ a->code_buffer >>= s;
8017+ a->num_bits -= s;
8018+ return b & 511;
8019+ }
8020+ return stbi__zhuffman_decode_slowpath(a, z);
8021+}
8022+
8023+static int
8024+stbi__zexpand(stbi__zbuf *z, char *zout, int n) // need to make room for n bytes
8025+{
8026+ char *q;
8027+ unsigned int cur, limit, old_limit;
8028+ z->zout = zout;
8029+ if (!z->z_expandable) {
8030+ return stbi__err("output buffer limit", "Corrupt PNG");
8031+ }
8032+ cur = (unsigned int)(z->zout - z->zout_start);
8033+ limit = old_limit = (unsigned)(z->zout_end - z->zout_start);
8034+ if (UINT_MAX - cur < (unsigned)n) {
8035+ return stbi__err("outofmem", "Out of memory");
8036+ }
8037+ while (cur + n > limit) {
8038+ if (limit > UINT_MAX / 2) {
8039+ return stbi__err("outofmem", "Out of memory");
8040+ }
8041+ limit *= 2;
8042+ }
8043+ q = (char *)STBI_REALLOC_SIZED(z->zout_start, old_limit, limit);
8044+ STBI_NOTUSED(old_limit);
8045+ if (q == NULL) {
8046+ return stbi__err("outofmem", "Out of memory");
8047+ }
8048+ z->zout_start = q;
8049+ z->zout = q + cur;
8050+ z->zout_end = q + limit;
8051+ return 1;
8052 }
8053
8054 static const int stbi__zlength_base[31] = {
8055- 3,4,5,6,7,8,9,10,11,13,
8056- 15,17,19,23,27,31,35,43,51,59,
8057- 67,83,99,115,131,163,195,227,258,0,0 };
8058-
8059-static const int stbi__zlength_extra[31]=
8060-{ 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 };
8061-
8062-static const int stbi__zdist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,
8063-257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0};
8064-
8065-static const int stbi__zdist_extra[32] =
8066-{ 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
8067-
8068-static int stbi__parse_huffman_block(stbi__zbuf *a)
8069-{
8070- char *zout = a->zout;
8071- for(;;) {
8072- int z = stbi__zhuffman_decode(a, &a->z_length);
8073- if (z < 256) {
8074- if (z < 0) return stbi__err("bad huffman code","Corrupt PNG"); // error in huffman codes
8075- if (zout >= a->zout_end) {
8076- if (!stbi__zexpand(a, zout, 1)) return 0;
8077- zout = a->zout;
8078- }
8079- *zout++ = (char) z;
8080- } else {
8081- stbi_uc *p;
8082- int len,dist;
8083- if (z == 256) {
8084- a->zout = zout;
8085- if (a->hit_zeof_once && a->num_bits < 16) {
8086- // The first time we hit zeof, we inserted 16 extra zero bits into our bit
8087- // buffer so the decoder can just do its speculative decoding. But if we
8088- // actually consumed any of those bits (which is the case when num_bits < 16),
8089- // the stream actually read past the end so it is malformed.
8090- return stbi__err("unexpected end","Corrupt PNG");
8091- }
8092- return 1;
8093- }
8094- if (z >= 286) return stbi__err("bad huffman code","Corrupt PNG"); // per DEFLATE, length codes 286 and 287 must not appear in compressed data
8095- z -= 257;
8096- len = stbi__zlength_base[z];
8097- if (stbi__zlength_extra[z]) len += stbi__zreceive(a, stbi__zlength_extra[z]);
8098- z = stbi__zhuffman_decode(a, &a->z_distance);
8099- if (z < 0 || z >= 30) return stbi__err("bad huffman code","Corrupt PNG"); // per DEFLATE, distance codes 30 and 31 must not appear in compressed data
8100- dist = stbi__zdist_base[z];
8101- if (stbi__zdist_extra[z]) dist += stbi__zreceive(a, stbi__zdist_extra[z]);
8102- if (zout - a->zout_start < dist) return stbi__err("bad dist","Corrupt PNG");
8103- if (len > a->zout_end - zout) {
8104- if (!stbi__zexpand(a, zout, len)) return 0;
8105- zout = a->zout;
8106- }
8107- p = (stbi_uc *) (zout - dist);
8108- if (dist == 1) { // run of one byte; common in images.
8109- stbi_uc v = *p;
8110- if (len) { do *zout++ = v; while (--len); }
8111- } else {
8112- if (len) { do *zout++ = *p++; while (--len); }
8113- }
8114- }
8115- }
8116-}
8117-
8118-static int stbi__compute_huffman_codes(stbi__zbuf *a)
8119-{
8120- static const stbi_uc length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 };
8121- stbi__zhuffman z_codelength;
8122- stbi_uc lencodes[286+32+137];//padding for maximum single op
8123- stbi_uc codelength_sizes[19];
8124- int i,n;
8125-
8126- int hlit = stbi__zreceive(a,5) + 257;
8127- int hdist = stbi__zreceive(a,5) + 1;
8128- int hclen = stbi__zreceive(a,4) + 4;
8129- int ntot = hlit + hdist;
8130-
8131- memset(codelength_sizes, 0, sizeof(codelength_sizes));
8132- for (i=0; i < hclen; ++i) {
8133- int s = stbi__zreceive(a,3);
8134- codelength_sizes[length_dezigzag[i]] = (stbi_uc) s;
8135- }
8136- if (!stbi__zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0;
8137-
8138- n = 0;
8139- while (n < ntot) {
8140- int c = stbi__zhuffman_decode(a, &z_codelength);
8141- if (c < 0 || c >= 19) return stbi__err("bad codelengths", "Corrupt PNG");
8142- if (c < 16)
8143- lencodes[n++] = (stbi_uc) c;
8144- else {
8145- stbi_uc fill = 0;
8146- if (c == 16) {
8147- c = stbi__zreceive(a,2)+3;
8148- if (n == 0) return stbi__err("bad codelengths", "Corrupt PNG");
8149- fill = lencodes[n-1];
8150- } else if (c == 17) {
8151- c = stbi__zreceive(a,3)+3;
8152- } else if (c == 18) {
8153- c = stbi__zreceive(a,7)+11;
8154- } else {
8155- return stbi__err("bad codelengths", "Corrupt PNG");
8156- }
8157- if (ntot - n < c) return stbi__err("bad codelengths", "Corrupt PNG");
8158- memset(lencodes+n, fill, c);
8159- n += c;
8160- }
8161- }
8162- if (n != ntot) return stbi__err("bad codelengths","Corrupt PNG");
8163- if (!stbi__zbuild_huffman(&a->z_length, lencodes, hlit)) return 0;
8164- if (!stbi__zbuild_huffman(&a->z_distance, lencodes+hlit, hdist)) return 0;
8165- return 1;
8166-}
8167-
8168-static int stbi__parse_uncompressed_block(stbi__zbuf *a)
8169-{
8170- stbi_uc header[4];
8171- int len,nlen,k;
8172- if (a->num_bits & 7)
8173- stbi__zreceive(a, a->num_bits & 7); // discard
8174- // drain the bit-packed data into header
8175- k = 0;
8176- while (a->num_bits > 0) {
8177- header[k++] = (stbi_uc) (a->code_buffer & 255); // suppress MSVC run-time check
8178- a->code_buffer >>= 8;
8179- a->num_bits -= 8;
8180- }
8181- if (a->num_bits < 0) return stbi__err("zlib corrupt","Corrupt PNG");
8182- // now fill header the normal way
8183- while (k < 4)
8184- header[k++] = stbi__zget8(a);
8185- len = header[1] * 256 + header[0];
8186- nlen = header[3] * 256 + header[2];
8187- if (nlen != (len ^ 0xffff)) return stbi__err("zlib corrupt","Corrupt PNG");
8188- if (a->zbuffer + len > a->zbuffer_end) return stbi__err("read past buffer","Corrupt PNG");
8189- if (a->zout + len > a->zout_end)
8190- if (!stbi__zexpand(a, a->zout, len)) return 0;
8191- memcpy(a->zout, a->zbuffer, len);
8192- a->zbuffer += len;
8193- a->zout += len;
8194- return 1;
8195-}
8196-
8197-static int stbi__parse_zlib_header(stbi__zbuf *a)
8198-{
8199- int cmf = stbi__zget8(a);
8200- int cm = cmf & 15;
8201- /* int cinfo = cmf >> 4; */
8202- int flg = stbi__zget8(a);
8203- if (stbi__zeof(a)) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec
8204- if ((cmf*256+flg) % 31 != 0) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec
8205- if (flg & 32) return stbi__err("no preset dict","Corrupt PNG"); // preset dictionary not allowed in png
8206- if (cm != 8) return stbi__err("bad compression","Corrupt PNG"); // DEFLATE required for png
8207- // window = 1 << (8 + cinfo)... but who cares, we fully buffer output
8208- return 1;
8209-}
8210-
8211-static const stbi_uc stbi__zdefault_length[STBI__ZNSYMS] =
8212-{
8213- 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
8214- 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
8215- 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
8216- 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
8217- 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
8218- 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
8219- 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
8220- 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
8221- 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8
8222-};
8223-static const stbi_uc stbi__zdefault_distance[32] =
8224-{
8225- 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5
8226-};
8227+ 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
8228+ 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0};
8229+
8230+static const int stbi__zlength_extra[31] = {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
8231+ 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4,
8232+ 4, 4, 5, 5, 5, 5, 0, 0, 0};
8233+
8234+static const int stbi__zdist_base[32] = {
8235+ 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33,
8236+ 49, 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537,
8237+ 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577, 0, 0};
8238+
8239+static const int stbi__zdist_extra[32] = {0, 0, 0, 0, 1, 1, 2, 2, 3, 3,
8240+ 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
8241+ 9, 9, 10, 10, 11, 11, 12, 12, 13, 13};
8242+
8243+static int
8244+stbi__parse_huffman_block(stbi__zbuf *a)
8245+{
8246+ char *zout = a->zout;
8247+ for (;;) {
8248+ int z = stbi__zhuffman_decode(a, &a->z_length);
8249+ if (z < 256) {
8250+ if (z < 0) {
8251+ return stbi__err("bad huffman code",
8252+ "Corrupt PNG"); // error in huffman codes
8253+ }
8254+ if (zout >= a->zout_end) {
8255+ if (!stbi__zexpand(a, zout, 1)) {
8256+ return 0;
8257+ }
8258+ zout = a->zout;
8259+ }
8260+ *zout++ = (char)z;
8261+ } else {
8262+ stbi_uc *p;
8263+ int len, dist;
8264+ if (z == 256) {
8265+ a->zout = zout;
8266+ if (a->hit_zeof_once && a->num_bits < 16) {
8267+ // The first time we hit zeof, we inserted 16 extra zero
8268+ // bits into our bit buffer so the decoder can just do its
8269+ // speculative decoding. But if we actually consumed any of
8270+ // those bits (which is the case when num_bits < 16), the
8271+ // stream actually read past the end so it is malformed.
8272+ return stbi__err("unexpected end", "Corrupt PNG");
8273+ }
8274+ return 1;
8275+ }
8276+ if (z >= 286) {
8277+ return stbi__err(
8278+ "bad huffman code",
8279+ "Corrupt PNG"); // per DEFLATE, length codes 286 and 287
8280+ // must not appear in compressed data
8281+ }
8282+ z -= 257;
8283+ len = stbi__zlength_base[z];
8284+ if (stbi__zlength_extra[z]) {
8285+ len += stbi__zreceive(a, stbi__zlength_extra[z]);
8286+ }
8287+ z = stbi__zhuffman_decode(a, &a->z_distance);
8288+ if (z < 0 || z >= 30) {
8289+ return stbi__err(
8290+ "bad huffman code",
8291+ "Corrupt PNG"); // per DEFLATE, distance codes 30 and 31
8292+ // must not appear in compressed data
8293+ }
8294+ dist = stbi__zdist_base[z];
8295+ if (stbi__zdist_extra[z]) {
8296+ dist += stbi__zreceive(a, stbi__zdist_extra[z]);
8297+ }
8298+ if (zout - a->zout_start < dist) {
8299+ return stbi__err("bad dist", "Corrupt PNG");
8300+ }
8301+ if (len > a->zout_end - zout) {
8302+ if (!stbi__zexpand(a, zout, len)) {
8303+ return 0;
8304+ }
8305+ zout = a->zout;
8306+ }
8307+ p = (stbi_uc *)(zout - dist);
8308+ if (dist == 1) { // run of one byte; common in images.
8309+ stbi_uc v = *p;
8310+ if (len) {
8311+ do {
8312+ *zout++ = v;
8313+ } while (--len);
8314+ }
8315+ } else {
8316+ if (len) {
8317+ do {
8318+ *zout++ = *p++;
8319+ } while (--len);
8320+ }
8321+ }
8322+ }
8323+ }
8324+}
8325+
8326+static int
8327+stbi__compute_huffman_codes(stbi__zbuf *a)
8328+{
8329+ static const stbi_uc length_dezigzag[19] = {
8330+ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
8331+ stbi__zhuffman z_codelength;
8332+ stbi_uc lencodes[286 + 32 + 137]; // padding for maximum single op
8333+ stbi_uc codelength_sizes[19];
8334+ int i, n;
8335+
8336+ int hlit = stbi__zreceive(a, 5) + 257;
8337+ int hdist = stbi__zreceive(a, 5) + 1;
8338+ int hclen = stbi__zreceive(a, 4) + 4;
8339+ int ntot = hlit + hdist;
8340+
8341+ memset(codelength_sizes, 0, sizeof(codelength_sizes));
8342+ for (i = 0; i < hclen; ++i) {
8343+ int s = stbi__zreceive(a, 3);
8344+ codelength_sizes[length_dezigzag[i]] = (stbi_uc)s;
8345+ }
8346+ if (!stbi__zbuild_huffman(&z_codelength, codelength_sizes, 19)) {
8347+ return 0;
8348+ }
8349+
8350+ n = 0;
8351+ while (n < ntot) {
8352+ int c = stbi__zhuffman_decode(a, &z_codelength);
8353+ if (c < 0 || c >= 19) {
8354+ return stbi__err("bad codelengths", "Corrupt PNG");
8355+ }
8356+ if (c < 16) {
8357+ lencodes[n++] = (stbi_uc)c;
8358+ } else {
8359+ stbi_uc fill = 0;
8360+ if (c == 16) {
8361+ c = stbi__zreceive(a, 2) + 3;
8362+ if (n == 0) {
8363+ return stbi__err("bad codelengths", "Corrupt PNG");
8364+ }
8365+ fill = lencodes[n - 1];
8366+ } else if (c == 17) {
8367+ c = stbi__zreceive(a, 3) + 3;
8368+ } else if (c == 18) {
8369+ c = stbi__zreceive(a, 7) + 11;
8370+ } else {
8371+ return stbi__err("bad codelengths", "Corrupt PNG");
8372+ }
8373+ if (ntot - n < c) {
8374+ return stbi__err("bad codelengths", "Corrupt PNG");
8375+ }
8376+ memset(lencodes + n, fill, c);
8377+ n += c;
8378+ }
8379+ }
8380+ if (n != ntot) {
8381+ return stbi__err("bad codelengths", "Corrupt PNG");
8382+ }
8383+ if (!stbi__zbuild_huffman(&a->z_length, lencodes, hlit)) {
8384+ return 0;
8385+ }
8386+ if (!stbi__zbuild_huffman(&a->z_distance, lencodes + hlit, hdist)) {
8387+ return 0;
8388+ }
8389+ return 1;
8390+}
8391+
8392+static int
8393+stbi__parse_uncompressed_block(stbi__zbuf *a)
8394+{
8395+ stbi_uc header[4];
8396+ int len, nlen, k;
8397+ if (a->num_bits & 7) {
8398+ stbi__zreceive(a, a->num_bits & 7); // discard
8399+ }
8400+ // drain the bit-packed data into header
8401+ k = 0;
8402+ while (a->num_bits > 0) {
8403+ header[k++] =
8404+ (stbi_uc)(a->code_buffer & 255); // suppress MSVC run-time check
8405+ a->code_buffer >>= 8;
8406+ a->num_bits -= 8;
8407+ }
8408+ if (a->num_bits < 0) {
8409+ return stbi__err("zlib corrupt", "Corrupt PNG");
8410+ }
8411+ // now fill header the normal way
8412+ while (k < 4) {
8413+ header[k++] = stbi__zget8(a);
8414+ }
8415+ len = header[1] * 256 + header[0];
8416+ nlen = header[3] * 256 + header[2];
8417+ if (nlen != (len ^ 0xffff)) {
8418+ return stbi__err("zlib corrupt", "Corrupt PNG");
8419+ }
8420+ if (a->zbuffer + len > a->zbuffer_end) {
8421+ return stbi__err("read past buffer", "Corrupt PNG");
8422+ }
8423+ if (a->zout + len > a->zout_end) {
8424+ if (!stbi__zexpand(a, a->zout, len)) {
8425+ return 0;
8426+ }
8427+ }
8428+ memcpy(a->zout, a->zbuffer, len);
8429+ a->zbuffer += len;
8430+ a->zout += len;
8431+ return 1;
8432+}
8433+
8434+static int
8435+stbi__parse_zlib_header(stbi__zbuf *a)
8436+{
8437+ int cmf = stbi__zget8(a);
8438+ int cm = cmf & 15;
8439+ /* int cinfo = cmf >> 4; */
8440+ int flg = stbi__zget8(a);
8441+ if (stbi__zeof(a)) {
8442+ return stbi__err("bad zlib header", "Corrupt PNG"); // zlib spec
8443+ }
8444+ if ((cmf * 256 + flg) % 31 != 0) {
8445+ return stbi__err("bad zlib header", "Corrupt PNG"); // zlib spec
8446+ }
8447+ if (flg & 32) {
8448+ return stbi__err("no preset dict",
8449+ "Corrupt PNG"); // preset dictionary not allowed in png
8450+ }
8451+ if (cm != 8) {
8452+ return stbi__err("bad compression",
8453+ "Corrupt PNG"); // DEFLATE required for png
8454+ }
8455+ // window = 1 << (8 + cinfo)... but who cares, we fully buffer output
8456+ return 1;
8457+}
8458+
8459+static const stbi_uc stbi__zdefault_length[STBI__ZNSYMS] = {
8460+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8461+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8462+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8463+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8464+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8465+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8466+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
8467+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
8468+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
8469+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
8470+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 7, 7, 7, 7, 7, 7, 7, 7,
8471+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8};
8472+static const stbi_uc stbi__zdefault_distance[32] = {
8473+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
8474+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5};
8475 /*
8476 Init algorithm:
8477 {
8478@@ -4478,118 +5555,159 @@ Init algorithm:
8479 }
8480 */
8481
8482-static int stbi__parse_zlib(stbi__zbuf *a, int parse_header)
8483-{
8484- int final, type;
8485- if (parse_header)
8486- if (!stbi__parse_zlib_header(a)) return 0;
8487- a->num_bits = 0;
8488- a->code_buffer = 0;
8489- a->hit_zeof_once = 0;
8490- do {
8491- final = stbi__zreceive(a,1);
8492- type = stbi__zreceive(a,2);
8493- if (type == 0) {
8494- if (!stbi__parse_uncompressed_block(a)) return 0;
8495- } else if (type == 3) {
8496- return 0;
8497- } else {
8498- if (type == 1) {
8499- // use fixed code lengths
8500- if (!stbi__zbuild_huffman(&a->z_length , stbi__zdefault_length , STBI__ZNSYMS)) return 0;
8501- if (!stbi__zbuild_huffman(&a->z_distance, stbi__zdefault_distance, 32)) return 0;
8502- } else {
8503- if (!stbi__compute_huffman_codes(a)) return 0;
8504- }
8505- if (!stbi__parse_huffman_block(a)) return 0;
8506- }
8507- } while (!final);
8508- return 1;
8509-}
8510-
8511-static int stbi__do_zlib(stbi__zbuf *a, char *obuf, int olen, int exp, int parse_header)
8512-{
8513- a->zout_start = obuf;
8514- a->zout = obuf;
8515- a->zout_end = obuf + olen;
8516- a->z_expandable = exp;
8517-
8518- return stbi__parse_zlib(a, parse_header);
8519-}
8520-
8521-STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen)
8522-{
8523- stbi__zbuf a;
8524- char *p = (char *) stbi__malloc(initial_size);
8525- if (p == NULL) return NULL;
8526- a.zbuffer = (stbi_uc *) buffer;
8527- a.zbuffer_end = (stbi_uc *) buffer + len;
8528- if (stbi__do_zlib(&a, p, initial_size, 1, 1)) {
8529- if (outlen) *outlen = (int) (a.zout - a.zout_start);
8530- return a.zout_start;
8531- } else {
8532- STBI_FREE(a.zout_start);
8533- return NULL;
8534- }
8535-}
8536-
8537-STBIDEF char *stbi_zlib_decode_malloc(char const *buffer, int len, int *outlen)
8538-{
8539- return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen);
8540-}
8541-
8542-STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header)
8543-{
8544- stbi__zbuf a;
8545- char *p = (char *) stbi__malloc(initial_size);
8546- if (p == NULL) return NULL;
8547- a.zbuffer = (stbi_uc *) buffer;
8548- a.zbuffer_end = (stbi_uc *) buffer + len;
8549- if (stbi__do_zlib(&a, p, initial_size, 1, parse_header)) {
8550- if (outlen) *outlen = (int) (a.zout - a.zout_start);
8551- return a.zout_start;
8552- } else {
8553- STBI_FREE(a.zout_start);
8554- return NULL;
8555- }
8556-}
8557-
8558-STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, char const *ibuffer, int ilen)
8559-{
8560- stbi__zbuf a;
8561- a.zbuffer = (stbi_uc *) ibuffer;
8562- a.zbuffer_end = (stbi_uc *) ibuffer + ilen;
8563- if (stbi__do_zlib(&a, obuffer, olen, 0, 1))
8564- return (int) (a.zout - a.zout_start);
8565- else
8566- return -1;
8567-}
8568-
8569-STBIDEF char *stbi_zlib_decode_noheader_malloc(char const *buffer, int len, int *outlen)
8570-{
8571- stbi__zbuf a;
8572- char *p = (char *) stbi__malloc(16384);
8573- if (p == NULL) return NULL;
8574- a.zbuffer = (stbi_uc *) buffer;
8575- a.zbuffer_end = (stbi_uc *) buffer+len;
8576- if (stbi__do_zlib(&a, p, 16384, 1, 0)) {
8577- if (outlen) *outlen = (int) (a.zout - a.zout_start);
8578- return a.zout_start;
8579- } else {
8580- STBI_FREE(a.zout_start);
8581- return NULL;
8582- }
8583-}
8584-
8585-STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen)
8586-{
8587- stbi__zbuf a;
8588- a.zbuffer = (stbi_uc *) ibuffer;
8589- a.zbuffer_end = (stbi_uc *) ibuffer + ilen;
8590- if (stbi__do_zlib(&a, obuffer, olen, 0, 0))
8591- return (int) (a.zout - a.zout_start);
8592- else
8593- return -1;
8594+static int
8595+stbi__parse_zlib(stbi__zbuf *a, int parse_header)
8596+{
8597+ int final, type;
8598+ if (parse_header) {
8599+ if (!stbi__parse_zlib_header(a)) {
8600+ return 0;
8601+ }
8602+ }
8603+ a->num_bits = 0;
8604+ a->code_buffer = 0;
8605+ a->hit_zeof_once = 0;
8606+ do {
8607+ final = stbi__zreceive(a, 1);
8608+ type = stbi__zreceive(a, 2);
8609+ if (type == 0) {
8610+ if (!stbi__parse_uncompressed_block(a)) {
8611+ return 0;
8612+ }
8613+ } else if (type == 3) {
8614+ return 0;
8615+ } else {
8616+ if (type == 1) {
8617+ // use fixed code lengths
8618+ if (!stbi__zbuild_huffman(&a->z_length, stbi__zdefault_length,
8619+ STBI__ZNSYMS)) {
8620+ return 0;
8621+ }
8622+ if (!stbi__zbuild_huffman(&a->z_distance,
8623+ stbi__zdefault_distance, 32)) {
8624+ return 0;
8625+ }
8626+ } else {
8627+ if (!stbi__compute_huffman_codes(a)) {
8628+ return 0;
8629+ }
8630+ }
8631+ if (!stbi__parse_huffman_block(a)) {
8632+ return 0;
8633+ }
8634+ }
8635+ } while (!final);
8636+ return 1;
8637+}
8638+
8639+static int
8640+stbi__do_zlib(stbi__zbuf *a, char *obuf, int olen, int exp, int parse_header)
8641+{
8642+ a->zout_start = obuf;
8643+ a->zout = obuf;
8644+ a->zout_end = obuf + olen;
8645+ a->z_expandable = exp;
8646+
8647+ return stbi__parse_zlib(a, parse_header);
8648+}
8649+
8650+STBIDEF char *
8651+stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size,
8652+ int *outlen)
8653+{
8654+ stbi__zbuf a;
8655+ char *p = (char *)stbi__malloc(initial_size);
8656+ if (p == NULL) {
8657+ return NULL;
8658+ }
8659+ a.zbuffer = (stbi_uc *)buffer;
8660+ a.zbuffer_end = (stbi_uc *)buffer + len;
8661+ if (stbi__do_zlib(&a, p, initial_size, 1, 1)) {
8662+ if (outlen) {
8663+ *outlen = (int)(a.zout - a.zout_start);
8664+ }
8665+ return a.zout_start;
8666+ } else {
8667+ STBI_FREE(a.zout_start);
8668+ return NULL;
8669+ }
8670+}
8671+
8672+STBIDEF char *
8673+stbi_zlib_decode_malloc(char const *buffer, int len, int *outlen)
8674+{
8675+ return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen);
8676+}
8677+
8678+STBIDEF char *
8679+stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len,
8680+ int initial_size, int *outlen,
8681+ int parse_header)
8682+{
8683+ stbi__zbuf a;
8684+ char *p = (char *)stbi__malloc(initial_size);
8685+ if (p == NULL) {
8686+ return NULL;
8687+ }
8688+ a.zbuffer = (stbi_uc *)buffer;
8689+ a.zbuffer_end = (stbi_uc *)buffer + len;
8690+ if (stbi__do_zlib(&a, p, initial_size, 1, parse_header)) {
8691+ if (outlen) {
8692+ *outlen = (int)(a.zout - a.zout_start);
8693+ }
8694+ return a.zout_start;
8695+ } else {
8696+ STBI_FREE(a.zout_start);
8697+ return NULL;
8698+ }
8699+}
8700+
8701+STBIDEF int
8702+stbi_zlib_decode_buffer(char *obuffer, int olen, char const *ibuffer, int ilen)
8703+{
8704+ stbi__zbuf a;
8705+ a.zbuffer = (stbi_uc *)ibuffer;
8706+ a.zbuffer_end = (stbi_uc *)ibuffer + ilen;
8707+ if (stbi__do_zlib(&a, obuffer, olen, 0, 1)) {
8708+ return (int)(a.zout - a.zout_start);
8709+ } else {
8710+ return -1;
8711+ }
8712+}
8713+
8714+STBIDEF char *
8715+stbi_zlib_decode_noheader_malloc(char const *buffer, int len, int *outlen)
8716+{
8717+ stbi__zbuf a;
8718+ char *p = (char *)stbi__malloc(16384);
8719+ if (p == NULL) {
8720+ return NULL;
8721+ }
8722+ a.zbuffer = (stbi_uc *)buffer;
8723+ a.zbuffer_end = (stbi_uc *)buffer + len;
8724+ if (stbi__do_zlib(&a, p, 16384, 1, 0)) {
8725+ if (outlen) {
8726+ *outlen = (int)(a.zout - a.zout_start);
8727+ }
8728+ return a.zout_start;
8729+ } else {
8730+ STBI_FREE(a.zout_start);
8731+ return NULL;
8732+ }
8733+}
8734+
8735+STBIDEF int
8736+stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer,
8737+ int ilen)
8738+{
8739+ stbi__zbuf a;
8740+ a.zbuffer = (stbi_uc *)ibuffer;
8741+ a.zbuffer_end = (stbi_uc *)ibuffer + ilen;
8742+ if (stbi__do_zlib(&a, obuffer, olen, 0, 0)) {
8743+ return (int)(a.zout - a.zout_start);
8744+ } else {
8745+ return -1;
8746+ }
8747 }
8748 #endif
8749
8750@@ -4604,1131 +5722,1498 @@ STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char
8751 // - uses stb_zlib, a PD zlib implementation with fast huffman decoding
8752
8753 #ifndef STBI_NO_PNG
8754-typedef struct
8755-{
8756- stbi__uint32 length;
8757- stbi__uint32 type;
8758+typedef struct {
8759+ stbi__uint32 length;
8760+ stbi__uint32 type;
8761 } stbi__pngchunk;
8762
8763-static stbi__pngchunk stbi__get_chunk_header(stbi__context *s)
8764+static stbi__pngchunk
8765+stbi__get_chunk_header(stbi__context *s)
8766 {
8767- stbi__pngchunk c;
8768- c.length = stbi__get32be(s);
8769- c.type = stbi__get32be(s);
8770- return c;
8771+ stbi__pngchunk c;
8772+ c.length = stbi__get32be(s);
8773+ c.type = stbi__get32be(s);
8774+ return c;
8775 }
8776
8777-static int stbi__check_png_header(stbi__context *s)
8778+static int
8779+stbi__check_png_header(stbi__context *s)
8780 {
8781- static const stbi_uc png_sig[8] = { 137,80,78,71,13,10,26,10 };
8782- int i;
8783- for (i=0; i < 8; ++i)
8784- if (stbi__get8(s) != png_sig[i]) return stbi__err("bad png sig","Not a PNG");
8785- return 1;
8786+ static const stbi_uc png_sig[8] = {137, 80, 78, 71, 13, 10, 26, 10};
8787+ int i;
8788+ for (i = 0; i < 8; ++i) {
8789+ if (stbi__get8(s) != png_sig[i]) {
8790+ return stbi__err("bad png sig", "Not a PNG");
8791+ }
8792+ }
8793+ return 1;
8794 }
8795
8796-typedef struct
8797-{
8798- stbi__context *s;
8799- stbi_uc *idata, *expanded, *out;
8800- int depth;
8801+typedef struct {
8802+ stbi__context *s;
8803+ stbi_uc *idata, *expanded, *out;
8804+ int depth;
8805 } stbi__png;
8806
8807-
8808 enum {
8809- STBI__F_none=0,
8810- STBI__F_sub=1,
8811- STBI__F_up=2,
8812- STBI__F_avg=3,
8813- STBI__F_paeth=4,
8814- // synthetic filter used for first scanline to avoid needing a dummy row of 0s
8815- STBI__F_avg_first
8816+ STBI__F_none = 0,
8817+ STBI__F_sub = 1,
8818+ STBI__F_up = 2,
8819+ STBI__F_avg = 3,
8820+ STBI__F_paeth = 4,
8821+ // synthetic filter used for first scanline to avoid needing a dummy row of
8822+ // 0s
8823+ STBI__F_avg_first
8824 };
8825
8826-static stbi_uc first_row_filter[5] =
8827-{
8828- STBI__F_none,
8829- STBI__F_sub,
8830- STBI__F_none,
8831- STBI__F_avg_first,
8832- STBI__F_sub // Paeth with b=c=0 turns out to be equivalent to sub
8833+static stbi_uc first_row_filter[5] = {
8834+ STBI__F_none, STBI__F_sub, STBI__F_none, STBI__F_avg_first,
8835+ STBI__F_sub // Paeth with b=c=0 turns out to be equivalent to sub
8836 };
8837
8838-static int stbi__paeth(int a, int b, int c)
8839+static int
8840+stbi__paeth(int a, int b, int c)
8841 {
8842- // This formulation looks very different from the reference in the PNG spec, but is
8843- // actually equivalent and has favorable data dependencies and admits straightforward
8844- // generation of branch-free code, which helps performance significantly.
8845- int thresh = c*3 - (a + b);
8846- int lo = a < b ? a : b;
8847- int hi = a < b ? b : a;
8848- int t0 = (hi <= thresh) ? lo : c;
8849- int t1 = (thresh <= lo) ? hi : t0;
8850- return t1;
8851+ // This formulation looks very different from the reference in the PNG spec,
8852+ // but is actually equivalent and has favorable data dependencies and admits
8853+ // straightforward generation of branch-free code, which helps performance
8854+ // significantly.
8855+ int thresh = c * 3 - (a + b);
8856+ int lo = a < b ? a : b;
8857+ int hi = a < b ? b : a;
8858+ int t0 = (hi <= thresh) ? lo : c;
8859+ int t1 = (thresh <= lo) ? hi : t0;
8860+ return t1;
8861 }
8862
8863-static const stbi_uc stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01 };
8864+static const stbi_uc stbi__depth_scale_table[9] = {0, 0xff, 0x55, 0, 0x11,
8865+ 0, 0, 0, 0x01};
8866
8867 // adds an extra all-255 alpha channel
8868 // dest == src is legal
8869 // img_n must be 1 or 3
8870-static void stbi__create_png_alpha_expand8(stbi_uc *dest, stbi_uc *src, stbi__uint32 x, int img_n)
8871-{
8872- int i;
8873- // must process data backwards since we allow dest==src
8874- if (img_n == 1) {
8875- for (i=x-1; i >= 0; --i) {
8876- dest[i*2+1] = 255;
8877- dest[i*2+0] = src[i];
8878- }
8879- } else {
8880- STBI_ASSERT(img_n == 3);
8881- for (i=x-1; i >= 0; --i) {
8882- dest[i*4+3] = 255;
8883- dest[i*4+2] = src[i*3+2];
8884- dest[i*4+1] = src[i*3+1];
8885- dest[i*4+0] = src[i*3+0];
8886- }
8887- }
8888+static void
8889+stbi__create_png_alpha_expand8(stbi_uc *dest, stbi_uc *src, stbi__uint32 x,
8890+ int img_n)
8891+{
8892+ int i;
8893+ // must process data backwards since we allow dest==src
8894+ if (img_n == 1) {
8895+ for (i = x - 1; i >= 0; --i) {
8896+ dest[i * 2 + 1] = 255;
8897+ dest[i * 2 + 0] = src[i];
8898+ }
8899+ } else {
8900+ STBI_ASSERT(img_n == 3);
8901+ for (i = x - 1; i >= 0; --i) {
8902+ dest[i * 4 + 3] = 255;
8903+ dest[i * 4 + 2] = src[i * 3 + 2];
8904+ dest[i * 4 + 1] = src[i * 3 + 1];
8905+ dest[i * 4 + 0] = src[i * 3 + 0];
8906+ }
8907+ }
8908 }
8909
8910 // create the png data from post-deflated data
8911-static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 raw_len, int out_n, stbi__uint32 x, stbi__uint32 y, int depth, int color)
8912-{
8913- int bytes = (depth == 16 ? 2 : 1);
8914- stbi__context *s = a->s;
8915- stbi__uint32 i,j,stride = x*out_n*bytes;
8916- stbi__uint32 img_len, img_width_bytes;
8917- stbi_uc *filter_buf;
8918- int all_ok = 1;
8919- int k;
8920- int img_n = s->img_n; // copy it into a local for later
8921-
8922- int output_bytes = out_n*bytes;
8923- int filter_bytes = img_n*bytes;
8924- int width = x;
8925-
8926- STBI_ASSERT(out_n == s->img_n || out_n == s->img_n+1);
8927- a->out = (stbi_uc *) stbi__malloc_mad3(x, y, output_bytes, 0); // extra bytes to write off the end into
8928- if (!a->out) return stbi__err("outofmem", "Out of memory");
8929-
8930- // note: error exits here don't need to clean up a->out individually,
8931- // stbi__do_png always does on error.
8932- if (!stbi__mad3sizes_valid(img_n, x, depth, 7)) return stbi__err("too large", "Corrupt PNG");
8933- img_width_bytes = (((img_n * x * depth) + 7) >> 3);
8934- if (!stbi__mad2sizes_valid(img_width_bytes, y, img_width_bytes)) return stbi__err("too large", "Corrupt PNG");
8935- img_len = (img_width_bytes + 1) * y;
8936-
8937- // we used to check for exact match between raw_len and img_len on non-interlaced PNGs,
8938- // but issue #276 reported a PNG in the wild that had extra data at the end (all zeros),
8939- // so just check for raw_len < img_len always.
8940- if (raw_len < img_len) return stbi__err("not enough pixels","Corrupt PNG");
8941-
8942- // Allocate two scan lines worth of filter workspace buffer.
8943- filter_buf = (stbi_uc *) stbi__malloc_mad2(img_width_bytes, 2, 0);
8944- if (!filter_buf) return stbi__err("outofmem", "Out of memory");
8945-
8946- // Filtering for low-bit-depth images
8947- if (depth < 8) {
8948- filter_bytes = 1;
8949- width = img_width_bytes;
8950- }
8951-
8952- for (j=0; j < y; ++j) {
8953- // cur/prior filter buffers alternate
8954- stbi_uc *cur = filter_buf + (j & 1)*img_width_bytes;
8955- stbi_uc *prior = filter_buf + (~j & 1)*img_width_bytes;
8956- stbi_uc *dest = a->out + stride*j;
8957- int nk = width * filter_bytes;
8958- int filter = *raw++;
8959-
8960- // check filter type
8961- if (filter > 4) {
8962- all_ok = stbi__err("invalid filter","Corrupt PNG");
8963- break;
8964- }
8965-
8966- // if first row, use special filter that doesn't sample previous row
8967- if (j == 0) filter = first_row_filter[filter];
8968-
8969- // perform actual filtering
8970- switch (filter) {
8971- case STBI__F_none:
8972- memcpy(cur, raw, nk);
8973- break;
8974- case STBI__F_sub:
8975- memcpy(cur, raw, filter_bytes);
8976- for (k = filter_bytes; k < nk; ++k)
8977- cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]);
8978- break;
8979- case STBI__F_up:
8980- for (k = 0; k < nk; ++k)
8981- cur[k] = STBI__BYTECAST(raw[k] + prior[k]);
8982- break;
8983- case STBI__F_avg:
8984- for (k = 0; k < filter_bytes; ++k)
8985- cur[k] = STBI__BYTECAST(raw[k] + (prior[k]>>1));
8986- for (k = filter_bytes; k < nk; ++k)
8987- cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1));
8988- break;
8989- case STBI__F_paeth:
8990- for (k = 0; k < filter_bytes; ++k)
8991- cur[k] = STBI__BYTECAST(raw[k] + prior[k]); // prior[k] == stbi__paeth(0,prior[k],0)
8992- for (k = filter_bytes; k < nk; ++k)
8993- cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes], prior[k], prior[k-filter_bytes]));
8994- break;
8995- case STBI__F_avg_first:
8996- memcpy(cur, raw, filter_bytes);
8997- for (k = filter_bytes; k < nk; ++k)
8998- cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1));
8999- break;
9000- }
9001-
9002- raw += nk;
9003-
9004- // expand decoded bits in cur to dest, also adding an extra alpha channel if desired
9005- if (depth < 8) {
9006- stbi_uc scale = (color == 0) ? stbi__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range
9007- stbi_uc *in = cur;
9008- stbi_uc *out = dest;
9009- stbi_uc inb = 0;
9010- stbi__uint32 nsmp = x*img_n;
9011-
9012- // expand bits to bytes first
9013- if (depth == 4) {
9014- for (i=0; i < nsmp; ++i) {
9015- if ((i & 1) == 0) inb = *in++;
9016- *out++ = scale * (inb >> 4);
9017- inb <<= 4;
9018- }
9019- } else if (depth == 2) {
9020- for (i=0; i < nsmp; ++i) {
9021- if ((i & 3) == 0) inb = *in++;
9022- *out++ = scale * (inb >> 6);
9023- inb <<= 2;
9024- }
9025- } else {
9026- STBI_ASSERT(depth == 1);
9027- for (i=0; i < nsmp; ++i) {
9028- if ((i & 7) == 0) inb = *in++;
9029- *out++ = scale * (inb >> 7);
9030- inb <<= 1;
9031- }
9032- }
9033-
9034- // insert alpha=255 values if desired
9035- if (img_n != out_n)
9036- stbi__create_png_alpha_expand8(dest, dest, x, img_n);
9037- } else if (depth == 8) {
9038- if (img_n == out_n)
9039- memcpy(dest, cur, x*img_n);
9040- else
9041- stbi__create_png_alpha_expand8(dest, cur, x, img_n);
9042- } else if (depth == 16) {
9043- // convert the image data from big-endian to platform-native
9044- stbi__uint16 *dest16 = (stbi__uint16*)dest;
9045- stbi__uint32 nsmp = x*img_n;
9046-
9047- if (img_n == out_n) {
9048- for (i = 0; i < nsmp; ++i, ++dest16, cur += 2)
9049- *dest16 = (cur[0] << 8) | cur[1];
9050- } else {
9051- STBI_ASSERT(img_n+1 == out_n);
9052- if (img_n == 1) {
9053- for (i = 0; i < x; ++i, dest16 += 2, cur += 2) {
9054- dest16[0] = (cur[0] << 8) | cur[1];
9055- dest16[1] = 0xffff;
9056- }
9057- } else {
9058- STBI_ASSERT(img_n == 3);
9059- for (i = 0; i < x; ++i, dest16 += 4, cur += 6) {
9060- dest16[0] = (cur[0] << 8) | cur[1];
9061- dest16[1] = (cur[2] << 8) | cur[3];
9062- dest16[2] = (cur[4] << 8) | cur[5];
9063- dest16[3] = 0xffff;
9064- }
9065- }
9066- }
9067- }
9068- }
9069-
9070- STBI_FREE(filter_buf);
9071- if (!all_ok) return 0;
9072-
9073- return 1;
9074-}
9075-
9076-static int stbi__create_png_image(stbi__png *a, stbi_uc *image_data, stbi__uint32 image_data_len, int out_n, int depth, int color, int interlaced)
9077-{
9078- int bytes = (depth == 16 ? 2 : 1);
9079- int out_bytes = out_n * bytes;
9080- stbi_uc *final;
9081- int p;
9082- if (!interlaced)
9083- return stbi__create_png_image_raw(a, image_data, image_data_len, out_n, a->s->img_x, a->s->img_y, depth, color);
9084-
9085- // de-interlacing
9086- final = (stbi_uc *) stbi__malloc_mad3(a->s->img_x, a->s->img_y, out_bytes, 0);
9087- if (!final) return stbi__err("outofmem", "Out of memory");
9088- for (p=0; p < 7; ++p) {
9089- int xorig[] = { 0,4,0,2,0,1,0 };
9090- int yorig[] = { 0,0,4,0,2,0,1 };
9091- int xspc[] = { 8,8,4,4,2,2,1 };
9092- int yspc[] = { 8,8,8,4,4,2,2 };
9093- int i,j,x,y;
9094- // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1
9095- x = (a->s->img_x - xorig[p] + xspc[p]-1) / xspc[p];
9096- y = (a->s->img_y - yorig[p] + yspc[p]-1) / yspc[p];
9097- if (x && y) {
9098- stbi__uint32 img_len = ((((a->s->img_n * x * depth) + 7) >> 3) + 1) * y;
9099- if (!stbi__create_png_image_raw(a, image_data, image_data_len, out_n, x, y, depth, color)) {
9100- STBI_FREE(final);
9101- return 0;
9102- }
9103- for (j=0; j < y; ++j) {
9104- for (i=0; i < x; ++i) {
9105- int out_y = j*yspc[p]+yorig[p];
9106- int out_x = i*xspc[p]+xorig[p];
9107- memcpy(final + out_y*a->s->img_x*out_bytes + out_x*out_bytes,
9108- a->out + (j*x+i)*out_bytes, out_bytes);
9109- }
9110- }
9111- STBI_FREE(a->out);
9112- image_data += img_len;
9113- image_data_len -= img_len;
9114- }
9115- }
9116- a->out = final;
9117-
9118- return 1;
9119-}
9120-
9121-static int stbi__compute_transparency(stbi__png *z, stbi_uc tc[3], int out_n)
9122-{
9123- stbi__context *s = z->s;
9124- stbi__uint32 i, pixel_count = s->img_x * s->img_y;
9125- stbi_uc *p = z->out;
9126-
9127- // compute color-based transparency, assuming we've
9128- // already got 255 as the alpha value in the output
9129- STBI_ASSERT(out_n == 2 || out_n == 4);
9130-
9131- if (out_n == 2) {
9132- for (i=0; i < pixel_count; ++i) {
9133- p[1] = (p[0] == tc[0] ? 0 : 255);
9134- p += 2;
9135- }
9136- } else {
9137- for (i=0; i < pixel_count; ++i) {
9138- if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
9139- p[3] = 0;
9140- p += 4;
9141- }
9142- }
9143- return 1;
9144-}
9145-
9146-static int stbi__compute_transparency16(stbi__png *z, stbi__uint16 tc[3], int out_n)
9147-{
9148- stbi__context *s = z->s;
9149- stbi__uint32 i, pixel_count = s->img_x * s->img_y;
9150- stbi__uint16 *p = (stbi__uint16*) z->out;
9151-
9152- // compute color-based transparency, assuming we've
9153- // already got 65535 as the alpha value in the output
9154- STBI_ASSERT(out_n == 2 || out_n == 4);
9155-
9156- if (out_n == 2) {
9157- for (i = 0; i < pixel_count; ++i) {
9158- p[1] = (p[0] == tc[0] ? 0 : 65535);
9159- p += 2;
9160- }
9161- } else {
9162- for (i = 0; i < pixel_count; ++i) {
9163- if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
9164- p[3] = 0;
9165- p += 4;
9166- }
9167- }
9168- return 1;
9169-}
9170-
9171-static int stbi__expand_png_palette(stbi__png *a, stbi_uc *palette, int len, int pal_img_n)
9172-{
9173- stbi__uint32 i, pixel_count = a->s->img_x * a->s->img_y;
9174- stbi_uc *p, *temp_out, *orig = a->out;
9175-
9176- p = (stbi_uc *) stbi__malloc_mad2(pixel_count, pal_img_n, 0);
9177- if (p == NULL) return stbi__err("outofmem", "Out of memory");
9178-
9179- // between here and free(out) below, exitting would leak
9180- temp_out = p;
9181-
9182- if (pal_img_n == 3) {
9183- for (i=0; i < pixel_count; ++i) {
9184- int n = orig[i]*4;
9185- p[0] = palette[n ];
9186- p[1] = palette[n+1];
9187- p[2] = palette[n+2];
9188- p += 3;
9189- }
9190- } else {
9191- for (i=0; i < pixel_count; ++i) {
9192- int n = orig[i]*4;
9193- p[0] = palette[n ];
9194- p[1] = palette[n+1];
9195- p[2] = palette[n+2];
9196- p[3] = palette[n+3];
9197- p += 4;
9198- }
9199- }
9200- STBI_FREE(a->out);
9201- a->out = temp_out;
9202-
9203- STBI_NOTUSED(len);
9204-
9205- return 1;
9206+static int
9207+stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 raw_len,
9208+ int out_n, stbi__uint32 x, stbi__uint32 y, int depth,
9209+ int color)
9210+{
9211+ int bytes = (depth == 16 ? 2 : 1);
9212+ stbi__context *s = a->s;
9213+ stbi__uint32 i, j, stride = x * out_n * bytes;
9214+ stbi__uint32 img_len, img_width_bytes;
9215+ stbi_uc *filter_buf;
9216+ int all_ok = 1;
9217+ int k;
9218+ int img_n = s->img_n; // copy it into a local for later
9219+
9220+ int output_bytes = out_n * bytes;
9221+ int filter_bytes = img_n * bytes;
9222+ int width = x;
9223+
9224+ STBI_ASSERT(out_n == s->img_n || out_n == s->img_n + 1);
9225+ a->out = (stbi_uc *)stbi__malloc_mad3(
9226+ x, y, output_bytes, 0); // extra bytes to write off the end into
9227+ if (!a->out) {
9228+ return stbi__err("outofmem", "Out of memory");
9229+ }
9230+
9231+ // note: error exits here don't need to clean up a->out individually,
9232+ // stbi__do_png always does on error.
9233+ if (!stbi__mad3sizes_valid(img_n, x, depth, 7)) {
9234+ return stbi__err("too large", "Corrupt PNG");
9235+ }
9236+ img_width_bytes = (((img_n * x * depth) + 7) >> 3);
9237+ if (!stbi__mad2sizes_valid(img_width_bytes, y, img_width_bytes)) {
9238+ return stbi__err("too large", "Corrupt PNG");
9239+ }
9240+ img_len = (img_width_bytes + 1) * y;
9241+
9242+ // we used to check for exact match between raw_len and img_len on
9243+ // non-interlaced PNGs, but issue #276 reported a PNG in the wild that had
9244+ // extra data at the end (all zeros), so just check for raw_len < img_len
9245+ // always.
9246+ if (raw_len < img_len) {
9247+ return stbi__err("not enough pixels", "Corrupt PNG");
9248+ }
9249+
9250+ // Allocate two scan lines worth of filter workspace buffer.
9251+ filter_buf = (stbi_uc *)stbi__malloc_mad2(img_width_bytes, 2, 0);
9252+ if (!filter_buf) {
9253+ return stbi__err("outofmem", "Out of memory");
9254+ }
9255+
9256+ // Filtering for low-bit-depth images
9257+ if (depth < 8) {
9258+ filter_bytes = 1;
9259+ width = img_width_bytes;
9260+ }
9261+
9262+ for (j = 0; j < y; ++j) {
9263+ // cur/prior filter buffers alternate
9264+ stbi_uc *cur = filter_buf + (j & 1) * img_width_bytes;
9265+ stbi_uc *prior = filter_buf + (~j & 1) * img_width_bytes;
9266+ stbi_uc *dest = a->out + stride * j;
9267+ int nk = width * filter_bytes;
9268+ int filter = *raw++;
9269+
9270+ // check filter type
9271+ if (filter > 4) {
9272+ all_ok = stbi__err("invalid filter", "Corrupt PNG");
9273+ break;
9274+ }
9275+
9276+ // if first row, use special filter that doesn't sample previous row
9277+ if (j == 0) {
9278+ filter = first_row_filter[filter];
9279+ }
9280+
9281+ // perform actual filtering
9282+ switch (filter) {
9283+ case STBI__F_none:
9284+ memcpy(cur, raw, nk);
9285+ break;
9286+ case STBI__F_sub:
9287+ memcpy(cur, raw, filter_bytes);
9288+ for (k = filter_bytes; k < nk; ++k) {
9289+ cur[k] = STBI__BYTECAST(raw[k] + cur[k - filter_bytes]);
9290+ }
9291+ break;
9292+ case STBI__F_up:
9293+ for (k = 0; k < nk; ++k) {
9294+ cur[k] = STBI__BYTECAST(raw[k] + prior[k]);
9295+ }
9296+ break;
9297+ case STBI__F_avg:
9298+ for (k = 0; k < filter_bytes; ++k) {
9299+ cur[k] = STBI__BYTECAST(raw[k] + (prior[k] >> 1));
9300+ }
9301+ for (k = filter_bytes; k < nk; ++k) {
9302+ cur[k] = STBI__BYTECAST(
9303+ raw[k] + ((prior[k] + cur[k - filter_bytes]) >> 1));
9304+ }
9305+ break;
9306+ case STBI__F_paeth:
9307+ for (k = 0; k < filter_bytes; ++k) {
9308+ cur[k] = STBI__BYTECAST(
9309+ raw[k] + prior[k]); // prior[k] == stbi__paeth(0,prior[k],0)
9310+ }
9311+ for (k = filter_bytes; k < nk; ++k) {
9312+ cur[k] = STBI__BYTECAST(
9313+ raw[k] + stbi__paeth(cur[k - filter_bytes], prior[k],
9314+ prior[k - filter_bytes]));
9315+ }
9316+ break;
9317+ case STBI__F_avg_first:
9318+ memcpy(cur, raw, filter_bytes);
9319+ for (k = filter_bytes; k < nk; ++k) {
9320+ cur[k] = STBI__BYTECAST(raw[k] + (cur[k - filter_bytes] >> 1));
9321+ }
9322+ break;
9323+ }
9324+
9325+ raw += nk;
9326+
9327+ // expand decoded bits in cur to dest, also adding an extra alpha
9328+ // channel if desired
9329+ if (depth < 8) {
9330+ stbi_uc scale = (color == 0)
9331+ ? stbi__depth_scale_table[depth]
9332+ : 1; // scale grayscale values to 0..255 range
9333+ stbi_uc *in = cur;
9334+ stbi_uc *out = dest;
9335+ stbi_uc inb = 0;
9336+ stbi__uint32 nsmp = x * img_n;
9337+
9338+ // expand bits to bytes first
9339+ if (depth == 4) {
9340+ for (i = 0; i < nsmp; ++i) {
9341+ if ((i & 1) == 0) {
9342+ inb = *in++;
9343+ }
9344+ *out++ = scale * (inb >> 4);
9345+ inb <<= 4;
9346+ }
9347+ } else if (depth == 2) {
9348+ for (i = 0; i < nsmp; ++i) {
9349+ if ((i & 3) == 0) {
9350+ inb = *in++;
9351+ }
9352+ *out++ = scale * (inb >> 6);
9353+ inb <<= 2;
9354+ }
9355+ } else {
9356+ STBI_ASSERT(depth == 1);
9357+ for (i = 0; i < nsmp; ++i) {
9358+ if ((i & 7) == 0) {
9359+ inb = *in++;
9360+ }
9361+ *out++ = scale * (inb >> 7);
9362+ inb <<= 1;
9363+ }
9364+ }
9365+
9366+ // insert alpha=255 values if desired
9367+ if (img_n != out_n) {
9368+ stbi__create_png_alpha_expand8(dest, dest, x, img_n);
9369+ }
9370+ } else if (depth == 8) {
9371+ if (img_n == out_n) {
9372+ memcpy(dest, cur, x * img_n);
9373+ } else {
9374+ stbi__create_png_alpha_expand8(dest, cur, x, img_n);
9375+ }
9376+ } else if (depth == 16) {
9377+ // convert the image data from big-endian to platform-native
9378+ stbi__uint16 *dest16 = (stbi__uint16 *)dest;
9379+ stbi__uint32 nsmp = x * img_n;
9380+
9381+ if (img_n == out_n) {
9382+ for (i = 0; i < nsmp; ++i, ++dest16, cur += 2) {
9383+ *dest16 = (cur[0] << 8) | cur[1];
9384+ }
9385+ } else {
9386+ STBI_ASSERT(img_n + 1 == out_n);
9387+ if (img_n == 1) {
9388+ for (i = 0; i < x; ++i, dest16 += 2, cur += 2) {
9389+ dest16[0] = (cur[0] << 8) | cur[1];
9390+ dest16[1] = 0xffff;
9391+ }
9392+ } else {
9393+ STBI_ASSERT(img_n == 3);
9394+ for (i = 0; i < x; ++i, dest16 += 4, cur += 6) {
9395+ dest16[0] = (cur[0] << 8) | cur[1];
9396+ dest16[1] = (cur[2] << 8) | cur[3];
9397+ dest16[2] = (cur[4] << 8) | cur[5];
9398+ dest16[3] = 0xffff;
9399+ }
9400+ }
9401+ }
9402+ }
9403+ }
9404+
9405+ STBI_FREE(filter_buf);
9406+ if (!all_ok) {
9407+ return 0;
9408+ }
9409+
9410+ return 1;
9411+}
9412+
9413+static int
9414+stbi__create_png_image(stbi__png *a, stbi_uc *image_data,
9415+ stbi__uint32 image_data_len, int out_n, int depth,
9416+ int color, int interlaced)
9417+{
9418+ int bytes = (depth == 16 ? 2 : 1);
9419+ int out_bytes = out_n * bytes;
9420+ stbi_uc *final;
9421+ int p;
9422+ if (!interlaced) {
9423+ return stbi__create_png_image_raw(a, image_data, image_data_len, out_n,
9424+ a->s->img_x, a->s->img_y, depth,
9425+ color);
9426+ }
9427+
9428+ // de-interlacing
9429+ final =
9430+ (stbi_uc *)stbi__malloc_mad3(a->s->img_x, a->s->img_y, out_bytes, 0);
9431+ if (!final) {
9432+ return stbi__err("outofmem", "Out of memory");
9433+ }
9434+ for (p = 0; p < 7; ++p) {
9435+ int xorig[] = {0, 4, 0, 2, 0, 1, 0};
9436+ int yorig[] = {0, 0, 4, 0, 2, 0, 1};
9437+ int xspc[] = {8, 8, 4, 4, 2, 2, 1};
9438+ int yspc[] = {8, 8, 8, 4, 4, 2, 2};
9439+ int i, j, x, y;
9440+ // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1
9441+ x = (a->s->img_x - xorig[p] + xspc[p] - 1) / xspc[p];
9442+ y = (a->s->img_y - yorig[p] + yspc[p] - 1) / yspc[p];
9443+ if (x && y) {
9444+ stbi__uint32 img_len =
9445+ ((((a->s->img_n * x * depth) + 7) >> 3) + 1) * y;
9446+ if (!stbi__create_png_image_raw(a, image_data, image_data_len,
9447+ out_n, x, y, depth, color)) {
9448+ STBI_FREE(final);
9449+ return 0;
9450+ }
9451+ for (j = 0; j < y; ++j) {
9452+ for (i = 0; i < x; ++i) {
9453+ int out_y = j * yspc[p] + yorig[p];
9454+ int out_x = i * xspc[p] + xorig[p];
9455+ memcpy(final + out_y * a->s->img_x * out_bytes +
9456+ out_x * out_bytes,
9457+ a->out + (j * x + i) * out_bytes, out_bytes);
9458+ }
9459+ }
9460+ STBI_FREE(a->out);
9461+ image_data += img_len;
9462+ image_data_len -= img_len;
9463+ }
9464+ }
9465+ a->out = final;
9466+
9467+ return 1;
9468+}
9469+
9470+static int
9471+stbi__compute_transparency(stbi__png *z, stbi_uc tc[3], int out_n)
9472+{
9473+ stbi__context *s = z->s;
9474+ stbi__uint32 i, pixel_count = s->img_x * s->img_y;
9475+ stbi_uc *p = z->out;
9476+
9477+ // compute color-based transparency, assuming we've
9478+ // already got 255 as the alpha value in the output
9479+ STBI_ASSERT(out_n == 2 || out_n == 4);
9480+
9481+ if (out_n == 2) {
9482+ for (i = 0; i < pixel_count; ++i) {
9483+ p[1] = (p[0] == tc[0] ? 0 : 255);
9484+ p += 2;
9485+ }
9486+ } else {
9487+ for (i = 0; i < pixel_count; ++i) {
9488+ if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2]) {
9489+ p[3] = 0;
9490+ }
9491+ p += 4;
9492+ }
9493+ }
9494+ return 1;
9495+}
9496+
9497+static int
9498+stbi__compute_transparency16(stbi__png *z, stbi__uint16 tc[3], int out_n)
9499+{
9500+ stbi__context *s = z->s;
9501+ stbi__uint32 i, pixel_count = s->img_x * s->img_y;
9502+ stbi__uint16 *p = (stbi__uint16 *)z->out;
9503+
9504+ // compute color-based transparency, assuming we've
9505+ // already got 65535 as the alpha value in the output
9506+ STBI_ASSERT(out_n == 2 || out_n == 4);
9507+
9508+ if (out_n == 2) {
9509+ for (i = 0; i < pixel_count; ++i) {
9510+ p[1] = (p[0] == tc[0] ? 0 : 65535);
9511+ p += 2;
9512+ }
9513+ } else {
9514+ for (i = 0; i < pixel_count; ++i) {
9515+ if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2]) {
9516+ p[3] = 0;
9517+ }
9518+ p += 4;
9519+ }
9520+ }
9521+ return 1;
9522+}
9523+
9524+static int
9525+stbi__expand_png_palette(stbi__png *a, stbi_uc *palette, int len, int pal_img_n)
9526+{
9527+ stbi__uint32 i, pixel_count = a->s->img_x * a->s->img_y;
9528+ stbi_uc *p, *temp_out, *orig = a->out;
9529+
9530+ p = (stbi_uc *)stbi__malloc_mad2(pixel_count, pal_img_n, 0);
9531+ if (p == NULL) {
9532+ return stbi__err("outofmem", "Out of memory");
9533+ }
9534+
9535+ // between here and free(out) below, exitting would leak
9536+ temp_out = p;
9537+
9538+ if (pal_img_n == 3) {
9539+ for (i = 0; i < pixel_count; ++i) {
9540+ int n = orig[i] * 4;
9541+ p[0] = palette[n];
9542+ p[1] = palette[n + 1];
9543+ p[2] = palette[n + 2];
9544+ p += 3;
9545+ }
9546+ } else {
9547+ for (i = 0; i < pixel_count; ++i) {
9548+ int n = orig[i] * 4;
9549+ p[0] = palette[n];
9550+ p[1] = palette[n + 1];
9551+ p[2] = palette[n + 2];
9552+ p[3] = palette[n + 3];
9553+ p += 4;
9554+ }
9555+ }
9556+ STBI_FREE(a->out);
9557+ a->out = temp_out;
9558+
9559+ STBI_NOTUSED(len);
9560+
9561+ return 1;
9562 }
9563
9564 static int stbi__unpremultiply_on_load_global = 0;
9565 static int stbi__de_iphone_flag_global = 0;
9566
9567-STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply)
9568+STBIDEF void
9569+stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply)
9570 {
9571- stbi__unpremultiply_on_load_global = flag_true_if_should_unpremultiply;
9572+ stbi__unpremultiply_on_load_global = flag_true_if_should_unpremultiply;
9573 }
9574
9575-STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert)
9576+STBIDEF void
9577+stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert)
9578 {
9579- stbi__de_iphone_flag_global = flag_true_if_should_convert;
9580+ stbi__de_iphone_flag_global = flag_true_if_should_convert;
9581 }
9582
9583 #ifndef STBI_THREAD_LOCAL
9584-#define stbi__unpremultiply_on_load stbi__unpremultiply_on_load_global
9585-#define stbi__de_iphone_flag stbi__de_iphone_flag_global
9586+#define stbi__unpremultiply_on_load stbi__unpremultiply_on_load_global
9587+#define stbi__de_iphone_flag stbi__de_iphone_flag_global
9588 #else
9589-static STBI_THREAD_LOCAL int stbi__unpremultiply_on_load_local, stbi__unpremultiply_on_load_set;
9590-static STBI_THREAD_LOCAL int stbi__de_iphone_flag_local, stbi__de_iphone_flag_set;
9591+static STBI_THREAD_LOCAL int stbi__unpremultiply_on_load_local,
9592+ stbi__unpremultiply_on_load_set;
9593+static STBI_THREAD_LOCAL int stbi__de_iphone_flag_local,
9594+ stbi__de_iphone_flag_set;
9595
9596-STBIDEF void stbi_set_unpremultiply_on_load_thread(int flag_true_if_should_unpremultiply)
9597+STBIDEF void
9598+stbi_set_unpremultiply_on_load_thread(int flag_true_if_should_unpremultiply)
9599 {
9600- stbi__unpremultiply_on_load_local = flag_true_if_should_unpremultiply;
9601- stbi__unpremultiply_on_load_set = 1;
9602+ stbi__unpremultiply_on_load_local = flag_true_if_should_unpremultiply;
9603+ stbi__unpremultiply_on_load_set = 1;
9604 }
9605
9606-STBIDEF void stbi_convert_iphone_png_to_rgb_thread(int flag_true_if_should_convert)
9607+STBIDEF void
9608+stbi_convert_iphone_png_to_rgb_thread(int flag_true_if_should_convert)
9609 {
9610- stbi__de_iphone_flag_local = flag_true_if_should_convert;
9611- stbi__de_iphone_flag_set = 1;
9612+ stbi__de_iphone_flag_local = flag_true_if_should_convert;
9613+ stbi__de_iphone_flag_set = 1;
9614 }
9615
9616-#define stbi__unpremultiply_on_load (stbi__unpremultiply_on_load_set \
9617- ? stbi__unpremultiply_on_load_local \
9618- : stbi__unpremultiply_on_load_global)
9619-#define stbi__de_iphone_flag (stbi__de_iphone_flag_set \
9620- ? stbi__de_iphone_flag_local \
9621- : stbi__de_iphone_flag_global)
9622+#define stbi__unpremultiply_on_load \
9623+ (stbi__unpremultiply_on_load_set ? stbi__unpremultiply_on_load_local \
9624+ : stbi__unpremultiply_on_load_global)
9625+#define stbi__de_iphone_flag \
9626+ (stbi__de_iphone_flag_set ? stbi__de_iphone_flag_local \
9627+ : stbi__de_iphone_flag_global)
9628 #endif // STBI_THREAD_LOCAL
9629
9630-static void stbi__de_iphone(stbi__png *z)
9631-{
9632- stbi__context *s = z->s;
9633- stbi__uint32 i, pixel_count = s->img_x * s->img_y;
9634- stbi_uc *p = z->out;
9635-
9636- if (s->img_out_n == 3) { // convert bgr to rgb
9637- for (i=0; i < pixel_count; ++i) {
9638- stbi_uc t = p[0];
9639- p[0] = p[2];
9640- p[2] = t;
9641- p += 3;
9642- }
9643- } else {
9644- STBI_ASSERT(s->img_out_n == 4);
9645- if (stbi__unpremultiply_on_load) {
9646- // convert bgr to rgb and unpremultiply
9647- for (i=0; i < pixel_count; ++i) {
9648- stbi_uc a = p[3];
9649- stbi_uc t = p[0];
9650- if (a) {
9651- stbi_uc half = a / 2;
9652- p[0] = (p[2] * 255 + half) / a;
9653- p[1] = (p[1] * 255 + half) / a;
9654- p[2] = ( t * 255 + half) / a;
9655- } else {
9656- p[0] = p[2];
9657- p[2] = t;
9658- }
9659- p += 4;
9660- }
9661- } else {
9662- // convert bgr to rgb
9663- for (i=0; i < pixel_count; ++i) {
9664- stbi_uc t = p[0];
9665- p[0] = p[2];
9666- p[2] = t;
9667- p += 4;
9668- }
9669- }
9670- }
9671-}
9672-
9673-#define STBI__PNG_TYPE(a,b,c,d) (((unsigned) (a) << 24) + ((unsigned) (b) << 16) + ((unsigned) (c) << 8) + (unsigned) (d))
9674-
9675-static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
9676-{
9677- stbi_uc palette[1024], pal_img_n=0;
9678- stbi_uc has_trans=0, tc[3]={0};
9679- stbi__uint16 tc16[3];
9680- stbi__uint32 ioff=0, idata_limit=0, i, pal_len=0;
9681- int first=1,k,interlace=0, color=0, is_iphone=0;
9682- stbi__context *s = z->s;
9683-
9684- z->expanded = NULL;
9685- z->idata = NULL;
9686- z->out = NULL;
9687-
9688- if (!stbi__check_png_header(s)) return 0;
9689-
9690- if (scan == STBI__SCAN_type) return 1;
9691-
9692- for (;;) {
9693- stbi__pngchunk c = stbi__get_chunk_header(s);
9694- switch (c.type) {
9695- case STBI__PNG_TYPE('C','g','B','I'):
9696- is_iphone = 1;
9697- stbi__skip(s, c.length);
9698- break;
9699- case STBI__PNG_TYPE('I','H','D','R'): {
9700- int comp,filter;
9701- if (!first) return stbi__err("multiple IHDR","Corrupt PNG");
9702- first = 0;
9703- if (c.length != 13) return stbi__err("bad IHDR len","Corrupt PNG");
9704- s->img_x = stbi__get32be(s);
9705- s->img_y = stbi__get32be(s);
9706- if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)");
9707- if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)");
9708- z->depth = stbi__get8(s); if (z->depth != 1 && z->depth != 2 && z->depth != 4 && z->depth != 8 && z->depth != 16) return stbi__err("1/2/4/8/16-bit only","PNG not supported: 1/2/4/8/16-bit only");
9709- color = stbi__get8(s); if (color > 6) return stbi__err("bad ctype","Corrupt PNG");
9710- if (color == 3 && z->depth == 16) return stbi__err("bad ctype","Corrupt PNG");
9711- if (color == 3) pal_img_n = 3; else if (color & 1) return stbi__err("bad ctype","Corrupt PNG");
9712- comp = stbi__get8(s); if (comp) return stbi__err("bad comp method","Corrupt PNG");
9713- filter= stbi__get8(s); if (filter) return stbi__err("bad filter method","Corrupt PNG");
9714- interlace = stbi__get8(s); if (interlace>1) return stbi__err("bad interlace method","Corrupt PNG");
9715- if (!s->img_x || !s->img_y) return stbi__err("0-pixel image","Corrupt PNG");
9716- if (!pal_img_n) {
9717- s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0);
9718- if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode");
9719- } else {
9720- // if paletted, then pal_n is our final components, and
9721- // img_n is # components to decompress/filter.
9722- s->img_n = 1;
9723- if ((1 << 30) / s->img_x / 4 < s->img_y) return stbi__err("too large","Corrupt PNG");
9724- }
9725- // even with SCAN_header, have to scan to see if we have a tRNS
9726- break;
9727- }
9728-
9729- case STBI__PNG_TYPE('P','L','T','E'): {
9730- if (first) return stbi__err("first not IHDR", "Corrupt PNG");
9731- if (c.length > 256*3) return stbi__err("invalid PLTE","Corrupt PNG");
9732- pal_len = c.length / 3;
9733- if (pal_len * 3 != c.length) return stbi__err("invalid PLTE","Corrupt PNG");
9734- for (i=0; i < pal_len; ++i) {
9735- palette[i*4+0] = stbi__get8(s);
9736- palette[i*4+1] = stbi__get8(s);
9737- palette[i*4+2] = stbi__get8(s);
9738- palette[i*4+3] = 255;
9739- }
9740- break;
9741- }
9742-
9743- case STBI__PNG_TYPE('t','R','N','S'): {
9744- if (first) return stbi__err("first not IHDR", "Corrupt PNG");
9745- if (z->idata) return stbi__err("tRNS after IDAT","Corrupt PNG");
9746- if (pal_img_n) {
9747- if (scan == STBI__SCAN_header) { s->img_n = 4; return 1; }
9748- if (pal_len == 0) return stbi__err("tRNS before PLTE","Corrupt PNG");
9749- if (c.length > pal_len) return stbi__err("bad tRNS len","Corrupt PNG");
9750- pal_img_n = 4;
9751- for (i=0; i < c.length; ++i)
9752- palette[i*4+3] = stbi__get8(s);
9753- } else {
9754- if (!(s->img_n & 1)) return stbi__err("tRNS with alpha","Corrupt PNG");
9755- if (c.length != (stbi__uint32) s->img_n*2) return stbi__err("bad tRNS len","Corrupt PNG");
9756- has_trans = 1;
9757- // non-paletted with tRNS = constant alpha. if header-scanning, we can stop now.
9758- if (scan == STBI__SCAN_header) { ++s->img_n; return 1; }
9759- if (z->depth == 16) {
9760- for (k = 0; k < s->img_n && k < 3; ++k) // extra loop test to suppress false GCC warning
9761- tc16[k] = (stbi__uint16)stbi__get16be(s); // copy the values as-is
9762- } else {
9763- for (k = 0; k < s->img_n && k < 3; ++k)
9764- tc[k] = (stbi_uc)(stbi__get16be(s) & 255) * stbi__depth_scale_table[z->depth]; // non 8-bit images will be larger
9765- }
9766- }
9767- break;
9768- }
9769-
9770- case STBI__PNG_TYPE('I','D','A','T'): {
9771- if (first) return stbi__err("first not IHDR", "Corrupt PNG");
9772- if (pal_img_n && !pal_len) return stbi__err("no PLTE","Corrupt PNG");
9773- if (scan == STBI__SCAN_header) {
9774- // header scan definitely stops at first IDAT
9775- if (pal_img_n)
9776- s->img_n = pal_img_n;
9777- return 1;
9778- }
9779- if (c.length > (1u << 30)) return stbi__err("IDAT size limit", "IDAT section larger than 2^30 bytes");
9780- if ((int)(ioff + c.length) < (int)ioff) return 0;
9781- if (ioff + c.length > idata_limit) {
9782- stbi__uint32 idata_limit_old = idata_limit;
9783- stbi_uc *p;
9784- if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096;
9785- while (ioff + c.length > idata_limit)
9786- idata_limit *= 2;
9787- STBI_NOTUSED(idata_limit_old);
9788- p = (stbi_uc *) STBI_REALLOC_SIZED(z->idata, idata_limit_old, idata_limit); if (p == NULL) return stbi__err("outofmem", "Out of memory");
9789- z->idata = p;
9790- }
9791- if (!stbi__getn(s, z->idata+ioff,c.length)) return stbi__err("outofdata","Corrupt PNG");
9792- ioff += c.length;
9793- break;
9794- }
9795-
9796- case STBI__PNG_TYPE('I','E','N','D'): {
9797- stbi__uint32 raw_len, bpl;
9798- if (first) return stbi__err("first not IHDR", "Corrupt PNG");
9799- if (scan != STBI__SCAN_load) return 1;
9800- if (z->idata == NULL) return stbi__err("no IDAT","Corrupt PNG");
9801- // initial guess for decoded data size to avoid unnecessary reallocs
9802- bpl = (s->img_x * z->depth + 7) / 8; // bytes per line, per component
9803- raw_len = bpl * s->img_y * s->img_n /* pixels */ + s->img_y /* filter mode per row */;
9804- z->expanded = (stbi_uc *) stbi_zlib_decode_malloc_guesssize_headerflag((char *) z->idata, ioff, raw_len, (int *) &raw_len, !is_iphone);
9805- if (z->expanded == NULL) return 0; // zlib should set error
9806- STBI_FREE(z->idata); z->idata = NULL;
9807- if ((req_comp == s->img_n+1 && req_comp != 3 && !pal_img_n) || has_trans)
9808- s->img_out_n = s->img_n+1;
9809- else
9810- s->img_out_n = s->img_n;
9811- if (!stbi__create_png_image(z, z->expanded, raw_len, s->img_out_n, z->depth, color, interlace)) return 0;
9812- if (has_trans) {
9813- if (z->depth == 16) {
9814- if (!stbi__compute_transparency16(z, tc16, s->img_out_n)) return 0;
9815- } else {
9816- if (!stbi__compute_transparency(z, tc, s->img_out_n)) return 0;
9817- }
9818- }
9819- if (is_iphone && stbi__de_iphone_flag && s->img_out_n > 2)
9820- stbi__de_iphone(z);
9821- if (pal_img_n) {
9822- // pal_img_n == 3 or 4
9823- s->img_n = pal_img_n; // record the actual colors we had
9824- s->img_out_n = pal_img_n;
9825- if (req_comp >= 3) s->img_out_n = req_comp;
9826- if (!stbi__expand_png_palette(z, palette, pal_len, s->img_out_n))
9827- return 0;
9828- } else if (has_trans) {
9829- // non-paletted image with tRNS -> source image has (constant) alpha
9830- ++s->img_n;
9831- }
9832- STBI_FREE(z->expanded); z->expanded = NULL;
9833- // end of PNG chunk, read and skip CRC
9834- stbi__get32be(s);
9835- return 1;
9836- }
9837-
9838- default:
9839- // if critical, fail
9840- if (first) return stbi__err("first not IHDR", "Corrupt PNG");
9841- if ((c.type & (1 << 29)) == 0) {
9842- #ifndef STBI_NO_FAILURE_STRINGS
9843- // not threadsafe
9844- static char invalid_chunk[] = "XXXX PNG chunk not known";
9845- invalid_chunk[0] = STBI__BYTECAST(c.type >> 24);
9846- invalid_chunk[1] = STBI__BYTECAST(c.type >> 16);
9847- invalid_chunk[2] = STBI__BYTECAST(c.type >> 8);
9848- invalid_chunk[3] = STBI__BYTECAST(c.type >> 0);
9849- #endif
9850- return stbi__err(invalid_chunk, "PNG not supported: unknown PNG chunk type");
9851- }
9852- stbi__skip(s, c.length);
9853- break;
9854- }
9855- // end of PNG chunk, read and skip CRC
9856- stbi__get32be(s);
9857- }
9858-}
9859-
9860-static void *stbi__do_png(stbi__png *p, int *x, int *y, int *n, int req_comp, stbi__result_info *ri)
9861-{
9862- void *result=NULL;
9863- if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
9864- if (stbi__parse_png_file(p, STBI__SCAN_load, req_comp)) {
9865- if (p->depth <= 8)
9866- ri->bits_per_channel = 8;
9867- else if (p->depth == 16)
9868- ri->bits_per_channel = 16;
9869- else
9870- return stbi__errpuc("bad bits_per_channel", "PNG not supported: unsupported color depth");
9871- result = p->out;
9872- p->out = NULL;
9873- if (req_comp && req_comp != p->s->img_out_n) {
9874- if (ri->bits_per_channel == 8)
9875- result = stbi__convert_format((unsigned char *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
9876- else
9877- result = stbi__convert_format16((stbi__uint16 *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
9878- p->s->img_out_n = req_comp;
9879- if (result == NULL) return result;
9880- }
9881- *x = p->s->img_x;
9882- *y = p->s->img_y;
9883- if (n) *n = p->s->img_n;
9884- }
9885- STBI_FREE(p->out); p->out = NULL;
9886- STBI_FREE(p->expanded); p->expanded = NULL;
9887- STBI_FREE(p->idata); p->idata = NULL;
9888-
9889- return result;
9890-}
9891-
9892-static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
9893-{
9894- stbi__png p;
9895- p.s = s;
9896- return stbi__do_png(&p, x,y,comp,req_comp, ri);
9897-}
9898-
9899-static int stbi__png_test(stbi__context *s)
9900-{
9901- int r;
9902- r = stbi__check_png_header(s);
9903- stbi__rewind(s);
9904- return r;
9905-}
9906-
9907-static int stbi__png_info_raw(stbi__png *p, int *x, int *y, int *comp)
9908-{
9909- if (!stbi__parse_png_file(p, STBI__SCAN_header, 0)) {
9910- stbi__rewind( p->s );
9911- return 0;
9912- }
9913- if (x) *x = p->s->img_x;
9914- if (y) *y = p->s->img_y;
9915- if (comp) *comp = p->s->img_n;
9916- return 1;
9917-}
9918-
9919-static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp)
9920-{
9921- stbi__png p;
9922- p.s = s;
9923- return stbi__png_info_raw(&p, x, y, comp);
9924-}
9925-
9926-static int stbi__png_is16(stbi__context *s)
9927-{
9928- stbi__png p;
9929- p.s = s;
9930- if (!stbi__png_info_raw(&p, NULL, NULL, NULL))
9931- return 0;
9932- if (p.depth != 16) {
9933- stbi__rewind(p.s);
9934- return 0;
9935- }
9936- return 1;
9937+static void
9938+stbi__de_iphone(stbi__png *z)
9939+{
9940+ stbi__context *s = z->s;
9941+ stbi__uint32 i, pixel_count = s->img_x * s->img_y;
9942+ stbi_uc *p = z->out;
9943+
9944+ if (s->img_out_n == 3) { // convert bgr to rgb
9945+ for (i = 0; i < pixel_count; ++i) {
9946+ stbi_uc t = p[0];
9947+ p[0] = p[2];
9948+ p[2] = t;
9949+ p += 3;
9950+ }
9951+ } else {
9952+ STBI_ASSERT(s->img_out_n == 4);
9953+ if (stbi__unpremultiply_on_load) {
9954+ // convert bgr to rgb and unpremultiply
9955+ for (i = 0; i < pixel_count; ++i) {
9956+ stbi_uc a = p[3];
9957+ stbi_uc t = p[0];
9958+ if (a) {
9959+ stbi_uc half = a / 2;
9960+ p[0] = (p[2] * 255 + half) / a;
9961+ p[1] = (p[1] * 255 + half) / a;
9962+ p[2] = (t * 255 + half) / a;
9963+ } else {
9964+ p[0] = p[2];
9965+ p[2] = t;
9966+ }
9967+ p += 4;
9968+ }
9969+ } else {
9970+ // convert bgr to rgb
9971+ for (i = 0; i < pixel_count; ++i) {
9972+ stbi_uc t = p[0];
9973+ p[0] = p[2];
9974+ p[2] = t;
9975+ p += 4;
9976+ }
9977+ }
9978+ }
9979+}
9980+
9981+#define STBI__PNG_TYPE(a, b, c, d) \
9982+ (((unsigned)(a) << 24) + ((unsigned)(b) << 16) + ((unsigned)(c) << 8) + \
9983+ (unsigned)(d))
9984+
9985+static int
9986+stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
9987+{
9988+ stbi_uc palette[1024], pal_img_n = 0;
9989+ stbi_uc has_trans = 0, tc[3] = {0};
9990+ stbi__uint16 tc16[3];
9991+ stbi__uint32 ioff = 0, idata_limit = 0, i, pal_len = 0;
9992+ int first = 1, k, interlace = 0, color = 0, is_iphone = 0;
9993+ stbi__context *s = z->s;
9994+
9995+ z->expanded = NULL;
9996+ z->idata = NULL;
9997+ z->out = NULL;
9998+
9999+ if (!stbi__check_png_header(s)) {
10000+ return 0;
10001+ }
10002+
10003+ if (scan == STBI__SCAN_type) {
10004+ return 1;
10005+ }
10006+
10007+ for (;;) {
10008+ stbi__pngchunk c = stbi__get_chunk_header(s);
10009+ switch (c.type) {
10010+ case STBI__PNG_TYPE('C', 'g', 'B', 'I'):
10011+ is_iphone = 1;
10012+ stbi__skip(s, c.length);
10013+ break;
10014+ case STBI__PNG_TYPE('I', 'H', 'D', 'R'): {
10015+ int comp, filter;
10016+ if (!first) {
10017+ return stbi__err("multiple IHDR", "Corrupt PNG");
10018+ }
10019+ first = 0;
10020+ if (c.length != 13) {
10021+ return stbi__err("bad IHDR len", "Corrupt PNG");
10022+ }
10023+ s->img_x = stbi__get32be(s);
10024+ s->img_y = stbi__get32be(s);
10025+ if (s->img_y > STBI_MAX_DIMENSIONS) {
10026+ return stbi__err("too large", "Very large image (corrupt?)");
10027+ }
10028+ if (s->img_x > STBI_MAX_DIMENSIONS) {
10029+ return stbi__err("too large", "Very large image (corrupt?)");
10030+ }
10031+ z->depth = stbi__get8(s);
10032+ if (z->depth != 1 && z->depth != 2 && z->depth != 4 &&
10033+ z->depth != 8 && z->depth != 16) {
10034+ return stbi__err("1/2/4/8/16-bit only",
10035+ "PNG not supported: 1/2/4/8/16-bit only");
10036+ }
10037+ color = stbi__get8(s);
10038+ if (color > 6) {
10039+ return stbi__err("bad ctype", "Corrupt PNG");
10040+ }
10041+ if (color == 3 && z->depth == 16) {
10042+ return stbi__err("bad ctype", "Corrupt PNG");
10043+ }
10044+ if (color == 3) {
10045+ pal_img_n = 3;
10046+ } else if (color & 1) {
10047+ return stbi__err("bad ctype", "Corrupt PNG");
10048+ }
10049+ comp = stbi__get8(s);
10050+ if (comp) {
10051+ return stbi__err("bad comp method", "Corrupt PNG");
10052+ }
10053+ filter = stbi__get8(s);
10054+ if (filter) {
10055+ return stbi__err("bad filter method", "Corrupt PNG");
10056+ }
10057+ interlace = stbi__get8(s);
10058+ if (interlace > 1) {
10059+ return stbi__err("bad interlace method", "Corrupt PNG");
10060+ }
10061+ if (!s->img_x || !s->img_y) {
10062+ return stbi__err("0-pixel image", "Corrupt PNG");
10063+ }
10064+ if (!pal_img_n) {
10065+ s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0);
10066+ if ((1 << 30) / s->img_x / s->img_n < s->img_y) {
10067+ return stbi__err("too large", "Image too large to decode");
10068+ }
10069+ } else {
10070+ // if paletted, then pal_n is our final components, and
10071+ // img_n is # components to decompress/filter.
10072+ s->img_n = 1;
10073+ if ((1 << 30) / s->img_x / 4 < s->img_y) {
10074+ return stbi__err("too large", "Corrupt PNG");
10075+ }
10076+ }
10077+ // even with SCAN_header, have to scan to see if we have a tRNS
10078+ break;
10079+ }
10080+
10081+ case STBI__PNG_TYPE('P', 'L', 'T', 'E'): {
10082+ if (first) {
10083+ return stbi__err("first not IHDR", "Corrupt PNG");
10084+ }
10085+ if (c.length > 256 * 3) {
10086+ return stbi__err("invalid PLTE", "Corrupt PNG");
10087+ }
10088+ pal_len = c.length / 3;
10089+ if (pal_len * 3 != c.length) {
10090+ return stbi__err("invalid PLTE", "Corrupt PNG");
10091+ }
10092+ for (i = 0; i < pal_len; ++i) {
10093+ palette[i * 4 + 0] = stbi__get8(s);
10094+ palette[i * 4 + 1] = stbi__get8(s);
10095+ palette[i * 4 + 2] = stbi__get8(s);
10096+ palette[i * 4 + 3] = 255;
10097+ }
10098+ break;
10099+ }
10100+
10101+ case STBI__PNG_TYPE('t', 'R', 'N', 'S'): {
10102+ if (first) {
10103+ return stbi__err("first not IHDR", "Corrupt PNG");
10104+ }
10105+ if (z->idata) {
10106+ return stbi__err("tRNS after IDAT", "Corrupt PNG");
10107+ }
10108+ if (pal_img_n) {
10109+ if (scan == STBI__SCAN_header) {
10110+ s->img_n = 4;
10111+ return 1;
10112+ }
10113+ if (pal_len == 0) {
10114+ return stbi__err("tRNS before PLTE", "Corrupt PNG");
10115+ }
10116+ if (c.length > pal_len) {
10117+ return stbi__err("bad tRNS len", "Corrupt PNG");
10118+ }
10119+ pal_img_n = 4;
10120+ for (i = 0; i < c.length; ++i) {
10121+ palette[i * 4 + 3] = stbi__get8(s);
10122+ }
10123+ } else {
10124+ if (!(s->img_n & 1)) {
10125+ return stbi__err("tRNS with alpha", "Corrupt PNG");
10126+ }
10127+ if (c.length != (stbi__uint32)s->img_n * 2) {
10128+ return stbi__err("bad tRNS len", "Corrupt PNG");
10129+ }
10130+ has_trans = 1;
10131+ // non-paletted with tRNS = constant alpha. if header-scanning,
10132+ // we can stop now.
10133+ if (scan == STBI__SCAN_header) {
10134+ ++s->img_n;
10135+ return 1;
10136+ }
10137+ if (z->depth == 16) {
10138+ for (k = 0; k < s->img_n && k < 3;
10139+ ++k) { // extra loop test to suppress false GCC warning
10140+ tc16[k] = (stbi__uint16)stbi__get16be(
10141+ s); // copy the values as-is
10142+ }
10143+ } else {
10144+ for (k = 0; k < s->img_n && k < 3; ++k) {
10145+ tc[k] =
10146+ (stbi_uc)(stbi__get16be(s) & 255) *
10147+ stbi__depth_scale_table
10148+ [z->depth]; // non 8-bit images will be larger
10149+ }
10150+ }
10151+ }
10152+ break;
10153+ }
10154+
10155+ case STBI__PNG_TYPE('I', 'D', 'A', 'T'): {
10156+ if (first) {
10157+ return stbi__err("first not IHDR", "Corrupt PNG");
10158+ }
10159+ if (pal_img_n && !pal_len) {
10160+ return stbi__err("no PLTE", "Corrupt PNG");
10161+ }
10162+ if (scan == STBI__SCAN_header) {
10163+ // header scan definitely stops at first IDAT
10164+ if (pal_img_n) {
10165+ s->img_n = pal_img_n;
10166+ }
10167+ return 1;
10168+ }
10169+ if (c.length > (1u << 30)) {
10170+ return stbi__err("IDAT size limit",
10171+ "IDAT section larger than 2^30 bytes");
10172+ }
10173+ if ((int)(ioff + c.length) < (int)ioff) {
10174+ return 0;
10175+ }
10176+ if (ioff + c.length > idata_limit) {
10177+ stbi__uint32 idata_limit_old = idata_limit;
10178+ stbi_uc *p;
10179+ if (idata_limit == 0) {
10180+ idata_limit = c.length > 4096 ? c.length : 4096;
10181+ }
10182+ while (ioff + c.length > idata_limit) {
10183+ idata_limit *= 2;
10184+ }
10185+ STBI_NOTUSED(idata_limit_old);
10186+ p = (stbi_uc *)STBI_REALLOC_SIZED(z->idata, idata_limit_old,
10187+ idata_limit);
10188+ if (p == NULL) {
10189+ return stbi__err("outofmem", "Out of memory");
10190+ }
10191+ z->idata = p;
10192+ }
10193+ if (!stbi__getn(s, z->idata + ioff, c.length)) {
10194+ return stbi__err("outofdata", "Corrupt PNG");
10195+ }
10196+ ioff += c.length;
10197+ break;
10198+ }
10199+
10200+ case STBI__PNG_TYPE('I', 'E', 'N', 'D'): {
10201+ stbi__uint32 raw_len, bpl;
10202+ if (first) {
10203+ return stbi__err("first not IHDR", "Corrupt PNG");
10204+ }
10205+ if (scan != STBI__SCAN_load) {
10206+ return 1;
10207+ }
10208+ if (z->idata == NULL) {
10209+ return stbi__err("no IDAT", "Corrupt PNG");
10210+ }
10211+ // initial guess for decoded data size to avoid unnecessary reallocs
10212+ bpl =
10213+ (s->img_x * z->depth + 7) / 8; // bytes per line, per component
10214+ raw_len = bpl * s->img_y * s->img_n /* pixels */ +
10215+ s->img_y /* filter mode per row */;
10216+ z->expanded =
10217+ (stbi_uc *)stbi_zlib_decode_malloc_guesssize_headerflag(
10218+ (char *)z->idata, ioff, raw_len, (int *)&raw_len,
10219+ !is_iphone);
10220+ if (z->expanded == NULL) {
10221+ return 0; // zlib should set error
10222+ }
10223+ STBI_FREE(z->idata);
10224+ z->idata = NULL;
10225+ if ((req_comp == s->img_n + 1 && req_comp != 3 && !pal_img_n) ||
10226+ has_trans) {
10227+ s->img_out_n = s->img_n + 1;
10228+ } else {
10229+ s->img_out_n = s->img_n;
10230+ }
10231+ if (!stbi__create_png_image(z, z->expanded, raw_len, s->img_out_n,
10232+ z->depth, color, interlace)) {
10233+ return 0;
10234+ }
10235+ if (has_trans) {
10236+ if (z->depth == 16) {
10237+ if (!stbi__compute_transparency16(z, tc16, s->img_out_n)) {
10238+ return 0;
10239+ }
10240+ } else {
10241+ if (!stbi__compute_transparency(z, tc, s->img_out_n)) {
10242+ return 0;
10243+ }
10244+ }
10245+ }
10246+ if (is_iphone && stbi__de_iphone_flag && s->img_out_n > 2) {
10247+ stbi__de_iphone(z);
10248+ }
10249+ if (pal_img_n) {
10250+ // pal_img_n == 3 or 4
10251+ s->img_n = pal_img_n; // record the actual colors we had
10252+ s->img_out_n = pal_img_n;
10253+ if (req_comp >= 3) {
10254+ s->img_out_n = req_comp;
10255+ }
10256+ if (!stbi__expand_png_palette(z, palette, pal_len,
10257+ s->img_out_n)) {
10258+ return 0;
10259+ }
10260+ } else if (has_trans) {
10261+ // non-paletted image with tRNS -> source image has (constant)
10262+ // alpha
10263+ ++s->img_n;
10264+ }
10265+ STBI_FREE(z->expanded);
10266+ z->expanded = NULL;
10267+ // end of PNG chunk, read and skip CRC
10268+ stbi__get32be(s);
10269+ return 1;
10270+ }
10271+
10272+ default:
10273+ // if critical, fail
10274+ if (first) {
10275+ return stbi__err("first not IHDR", "Corrupt PNG");
10276+ }
10277+ if ((c.type & (1 << 29)) == 0) {
10278+#ifndef STBI_NO_FAILURE_STRINGS
10279+ // not threadsafe
10280+ static char invalid_chunk[] = "XXXX PNG chunk not known";
10281+ invalid_chunk[0] = STBI__BYTECAST(c.type >> 24);
10282+ invalid_chunk[1] = STBI__BYTECAST(c.type >> 16);
10283+ invalid_chunk[2] = STBI__BYTECAST(c.type >> 8);
10284+ invalid_chunk[3] = STBI__BYTECAST(c.type >> 0);
10285+#endif
10286+ return stbi__err(invalid_chunk,
10287+ "PNG not supported: unknown PNG chunk type");
10288+ }
10289+ stbi__skip(s, c.length);
10290+ break;
10291+ }
10292+ // end of PNG chunk, read and skip CRC
10293+ stbi__get32be(s);
10294+ }
10295+}
10296+
10297+static void *
10298+stbi__do_png(stbi__png *p, int *x, int *y, int *n, int req_comp,
10299+ stbi__result_info *ri)
10300+{
10301+ void *result = NULL;
10302+ if (req_comp < 0 || req_comp > 4) {
10303+ return stbi__errpuc("bad req_comp", "Internal error");
10304+ }
10305+ if (stbi__parse_png_file(p, STBI__SCAN_load, req_comp)) {
10306+ if (p->depth <= 8) {
10307+ ri->bits_per_channel = 8;
10308+ } else if (p->depth == 16) {
10309+ ri->bits_per_channel = 16;
10310+ } else {
10311+ return stbi__errpuc("bad bits_per_channel",
10312+ "PNG not supported: unsupported color depth");
10313+ }
10314+ result = p->out;
10315+ p->out = NULL;
10316+ if (req_comp && req_comp != p->s->img_out_n) {
10317+ if (ri->bits_per_channel == 8) {
10318+ result = stbi__convert_format((unsigned char *)result,
10319+ p->s->img_out_n, req_comp,
10320+ p->s->img_x, p->s->img_y);
10321+ } else {
10322+ result = stbi__convert_format16((stbi__uint16 *)result,
10323+ p->s->img_out_n, req_comp,
10324+ p->s->img_x, p->s->img_y);
10325+ }
10326+ p->s->img_out_n = req_comp;
10327+ if (result == NULL) {
10328+ return result;
10329+ }
10330+ }
10331+ *x = p->s->img_x;
10332+ *y = p->s->img_y;
10333+ if (n) {
10334+ *n = p->s->img_n;
10335+ }
10336+ }
10337+ STBI_FREE(p->out);
10338+ p->out = NULL;
10339+ STBI_FREE(p->expanded);
10340+ p->expanded = NULL;
10341+ STBI_FREE(p->idata);
10342+ p->idata = NULL;
10343+
10344+ return result;
10345+}
10346+
10347+static void *
10348+stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp,
10349+ stbi__result_info *ri)
10350+{
10351+ stbi__png p;
10352+ p.s = s;
10353+ return stbi__do_png(&p, x, y, comp, req_comp, ri);
10354+}
10355+
10356+static int
10357+stbi__png_test(stbi__context *s)
10358+{
10359+ int r;
10360+ r = stbi__check_png_header(s);
10361+ stbi__rewind(s);
10362+ return r;
10363+}
10364+
10365+static int
10366+stbi__png_info_raw(stbi__png *p, int *x, int *y, int *comp)
10367+{
10368+ if (!stbi__parse_png_file(p, STBI__SCAN_header, 0)) {
10369+ stbi__rewind(p->s);
10370+ return 0;
10371+ }
10372+ if (x) {
10373+ *x = p->s->img_x;
10374+ }
10375+ if (y) {
10376+ *y = p->s->img_y;
10377+ }
10378+ if (comp) {
10379+ *comp = p->s->img_n;
10380+ }
10381+ return 1;
10382+}
10383+
10384+static int
10385+stbi__png_info(stbi__context *s, int *x, int *y, int *comp)
10386+{
10387+ stbi__png p;
10388+ p.s = s;
10389+ return stbi__png_info_raw(&p, x, y, comp);
10390+}
10391+
10392+static int
10393+stbi__png_is16(stbi__context *s)
10394+{
10395+ stbi__png p;
10396+ p.s = s;
10397+ if (!stbi__png_info_raw(&p, NULL, NULL, NULL)) {
10398+ return 0;
10399+ }
10400+ if (p.depth != 16) {
10401+ stbi__rewind(p.s);
10402+ return 0;
10403+ }
10404+ return 1;
10405 }
10406 #endif
10407
10408 // Microsoft/Windows BMP image
10409
10410 #ifndef STBI_NO_BMP
10411-static int stbi__bmp_test_raw(stbi__context *s)
10412-{
10413- int r;
10414- int sz;
10415- if (stbi__get8(s) != 'B') return 0;
10416- if (stbi__get8(s) != 'M') return 0;
10417- stbi__get32le(s); // discard filesize
10418- stbi__get16le(s); // discard reserved
10419- stbi__get16le(s); // discard reserved
10420- stbi__get32le(s); // discard data offset
10421- sz = stbi__get32le(s);
10422- r = (sz == 12 || sz == 40 || sz == 56 || sz == 108 || sz == 124);
10423- return r;
10424+static int
10425+stbi__bmp_test_raw(stbi__context *s)
10426+{
10427+ int r;
10428+ int sz;
10429+ if (stbi__get8(s) != 'B') {
10430+ return 0;
10431+ }
10432+ if (stbi__get8(s) != 'M') {
10433+ return 0;
10434+ }
10435+ stbi__get32le(s); // discard filesize
10436+ stbi__get16le(s); // discard reserved
10437+ stbi__get16le(s); // discard reserved
10438+ stbi__get32le(s); // discard data offset
10439+ sz = stbi__get32le(s);
10440+ r = (sz == 12 || sz == 40 || sz == 56 || sz == 108 || sz == 124);
10441+ return r;
10442+}
10443+
10444+static int
10445+stbi__bmp_test(stbi__context *s)
10446+{
10447+ int r = stbi__bmp_test_raw(s);
10448+ stbi__rewind(s);
10449+ return r;
10450 }
10451
10452-static int stbi__bmp_test(stbi__context *s)
10453-{
10454- int r = stbi__bmp_test_raw(s);
10455- stbi__rewind(s);
10456- return r;
10457-}
10458-
10459-
10460 // returns 0..31 for the highest set bit
10461-static int stbi__high_bit(unsigned int z)
10462-{
10463- int n=0;
10464- if (z == 0) return -1;
10465- if (z >= 0x10000) { n += 16; z >>= 16; }
10466- if (z >= 0x00100) { n += 8; z >>= 8; }
10467- if (z >= 0x00010) { n += 4; z >>= 4; }
10468- if (z >= 0x00004) { n += 2; z >>= 2; }
10469- if (z >= 0x00002) { n += 1;/* >>= 1;*/ }
10470- return n;
10471-}
10472-
10473-static int stbi__bitcount(unsigned int a)
10474-{
10475- a = (a & 0x55555555) + ((a >> 1) & 0x55555555); // max 2
10476- a = (a & 0x33333333) + ((a >> 2) & 0x33333333); // max 4
10477- a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits
10478- a = (a + (a >> 8)); // max 16 per 8 bits
10479- a = (a + (a >> 16)); // max 32 per 8 bits
10480- return a & 0xff;
10481+static int
10482+stbi__high_bit(unsigned int z)
10483+{
10484+ int n = 0;
10485+ if (z == 0) {
10486+ return -1;
10487+ }
10488+ if (z >= 0x10000) {
10489+ n += 16;
10490+ z >>= 16;
10491+ }
10492+ if (z >= 0x00100) {
10493+ n += 8;
10494+ z >>= 8;
10495+ }
10496+ if (z >= 0x00010) {
10497+ n += 4;
10498+ z >>= 4;
10499+ }
10500+ if (z >= 0x00004) {
10501+ n += 2;
10502+ z >>= 2;
10503+ }
10504+ if (z >= 0x00002) {
10505+ n += 1; /* >>= 1;*/
10506+ }
10507+ return n;
10508+}
10509+
10510+static int
10511+stbi__bitcount(unsigned int a)
10512+{
10513+ a = (a & 0x55555555) + ((a >> 1) & 0x55555555); // max 2
10514+ a = (a & 0x33333333) + ((a >> 2) & 0x33333333); // max 4
10515+ a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits
10516+ a = (a + (a >> 8)); // max 16 per 8 bits
10517+ a = (a + (a >> 16)); // max 32 per 8 bits
10518+ return a & 0xff;
10519 }
10520
10521 // extract an arbitrarily-aligned N-bit value (N=bits)
10522 // from v, and then make it 8-bits long and fractionally
10523 // extend it to full full range.
10524-static int stbi__shiftsigned(unsigned int v, int shift, int bits)
10525-{
10526- static unsigned int mul_table[9] = {
10527- 0,
10528- 0xff/*0b11111111*/, 0x55/*0b01010101*/, 0x49/*0b01001001*/, 0x11/*0b00010001*/,
10529- 0x21/*0b00100001*/, 0x41/*0b01000001*/, 0x81/*0b10000001*/, 0x01/*0b00000001*/,
10530- };
10531- static unsigned int shift_table[9] = {
10532- 0, 0,0,1,0,2,4,6,0,
10533- };
10534- if (shift < 0)
10535- v <<= -shift;
10536- else
10537- v >>= shift;
10538- STBI_ASSERT(v < 256);
10539- v >>= (8-bits);
10540- STBI_ASSERT(bits >= 0 && bits <= 8);
10541- return (int) ((unsigned) v * mul_table[bits]) >> shift_table[bits];
10542-}
10543-
10544-typedef struct
10545-{
10546- int bpp, offset, hsz;
10547- unsigned int mr,mg,mb,ma, all_a;
10548- int extra_read;
10549+static int
10550+stbi__shiftsigned(unsigned int v, int shift, int bits)
10551+{
10552+ static unsigned int mul_table[9] = {
10553+ 0,
10554+ 0xff /*0b11111111*/,
10555+ 0x55 /*0b01010101*/,
10556+ 0x49 /*0b01001001*/,
10557+ 0x11 /*0b00010001*/,
10558+ 0x21 /*0b00100001*/,
10559+ 0x41 /*0b01000001*/,
10560+ 0x81 /*0b10000001*/,
10561+ 0x01 /*0b00000001*/,
10562+ };
10563+ static unsigned int shift_table[9] = {
10564+ 0, 0, 0, 1, 0, 2, 4, 6, 0,
10565+ };
10566+ if (shift < 0) {
10567+ v <<= -shift;
10568+ } else {
10569+ v >>= shift;
10570+ }
10571+ STBI_ASSERT(v < 256);
10572+ v >>= (8 - bits);
10573+ STBI_ASSERT(bits >= 0 && bits <= 8);
10574+ return (int)((unsigned)v * mul_table[bits]) >> shift_table[bits];
10575+}
10576+
10577+typedef struct {
10578+ int bpp, offset, hsz;
10579+ unsigned int mr, mg, mb, ma, all_a;
10580+ int extra_read;
10581 } stbi__bmp_data;
10582
10583-static int stbi__bmp_set_mask_defaults(stbi__bmp_data *info, int compress)
10584-{
10585- // BI_BITFIELDS specifies masks explicitly, don't override
10586- if (compress == 3)
10587- return 1;
10588-
10589- if (compress == 0) {
10590- if (info->bpp == 16) {
10591- info->mr = 31u << 10;
10592- info->mg = 31u << 5;
10593- info->mb = 31u << 0;
10594- } else if (info->bpp == 32) {
10595- info->mr = 0xffu << 16;
10596- info->mg = 0xffu << 8;
10597- info->mb = 0xffu << 0;
10598- info->ma = 0xffu << 24;
10599- info->all_a = 0; // if all_a is 0 at end, then we loaded alpha channel but it was all 0
10600- } else {
10601- // otherwise, use defaults, which is all-0
10602- info->mr = info->mg = info->mb = info->ma = 0;
10603- }
10604- return 1;
10605- }
10606- return 0; // error
10607-}
10608-
10609-static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info)
10610-{
10611- int hsz;
10612- if (stbi__get8(s) != 'B' || stbi__get8(s) != 'M') return stbi__errpuc("not BMP", "Corrupt BMP");
10613- stbi__get32le(s); // discard filesize
10614- stbi__get16le(s); // discard reserved
10615- stbi__get16le(s); // discard reserved
10616- info->offset = stbi__get32le(s);
10617- info->hsz = hsz = stbi__get32le(s);
10618- info->mr = info->mg = info->mb = info->ma = 0;
10619- info->extra_read = 14;
10620-
10621- if (info->offset < 0) return stbi__errpuc("bad BMP", "bad BMP");
10622-
10623- if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108 && hsz != 124) return stbi__errpuc("unknown BMP", "BMP type not supported: unknown");
10624- if (hsz == 12) {
10625- s->img_x = stbi__get16le(s);
10626- s->img_y = stbi__get16le(s);
10627- } else {
10628- s->img_x = stbi__get32le(s);
10629- s->img_y = stbi__get32le(s);
10630- }
10631- if (stbi__get16le(s) != 1) return stbi__errpuc("bad BMP", "bad BMP");
10632- info->bpp = stbi__get16le(s);
10633- if (hsz != 12) {
10634- int compress = stbi__get32le(s);
10635- if (compress == 1 || compress == 2) return stbi__errpuc("BMP RLE", "BMP type not supported: RLE");
10636- if (compress >= 4) return stbi__errpuc("BMP JPEG/PNG", "BMP type not supported: unsupported compression"); // this includes PNG/JPEG modes
10637- if (compress == 3 && info->bpp != 16 && info->bpp != 32) return stbi__errpuc("bad BMP", "bad BMP"); // bitfields requires 16 or 32 bits/pixel
10638- stbi__get32le(s); // discard sizeof
10639- stbi__get32le(s); // discard hres
10640- stbi__get32le(s); // discard vres
10641- stbi__get32le(s); // discard colorsused
10642- stbi__get32le(s); // discard max important
10643- if (hsz == 40 || hsz == 56) {
10644- if (hsz == 56) {
10645- stbi__get32le(s);
10646- stbi__get32le(s);
10647- stbi__get32le(s);
10648- stbi__get32le(s);
10649- }
10650- if (info->bpp == 16 || info->bpp == 32) {
10651- if (compress == 0) {
10652- stbi__bmp_set_mask_defaults(info, compress);
10653- } else if (compress == 3) {
10654- info->mr = stbi__get32le(s);
10655- info->mg = stbi__get32le(s);
10656- info->mb = stbi__get32le(s);
10657- info->extra_read += 12;
10658- // not documented, but generated by photoshop and handled by mspaint
10659- if (info->mr == info->mg && info->mg == info->mb) {
10660- // ?!?!?
10661- return stbi__errpuc("bad BMP", "bad BMP");
10662- }
10663- } else
10664- return stbi__errpuc("bad BMP", "bad BMP");
10665- }
10666- } else {
10667- // V4/V5 header
10668- int i;
10669- if (hsz != 108 && hsz != 124)
10670- return stbi__errpuc("bad BMP", "bad BMP");
10671- info->mr = stbi__get32le(s);
10672- info->mg = stbi__get32le(s);
10673- info->mb = stbi__get32le(s);
10674- info->ma = stbi__get32le(s);
10675- if (compress != 3) // override mr/mg/mb unless in BI_BITFIELDS mode, as per docs
10676- stbi__bmp_set_mask_defaults(info, compress);
10677- stbi__get32le(s); // discard color space
10678- for (i=0; i < 12; ++i)
10679- stbi__get32le(s); // discard color space parameters
10680- if (hsz == 124) {
10681- stbi__get32le(s); // discard rendering intent
10682- stbi__get32le(s); // discard offset of profile data
10683- stbi__get32le(s); // discard size of profile data
10684- stbi__get32le(s); // discard reserved
10685- }
10686- }
10687- }
10688- return (void *) 1;
10689-}
10690-
10691-
10692-static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
10693-{
10694- stbi_uc *out;
10695- unsigned int mr=0,mg=0,mb=0,ma=0, all_a;
10696- stbi_uc pal[256][4];
10697- int psize=0,i,j,width;
10698- int flip_vertically, pad, target;
10699- stbi__bmp_data info;
10700- STBI_NOTUSED(ri);
10701-
10702- info.all_a = 255;
10703- if (stbi__bmp_parse_header(s, &info) == NULL)
10704- return NULL; // error code already set
10705-
10706- flip_vertically = ((int) s->img_y) > 0;
10707- s->img_y = abs((int) s->img_y);
10708-
10709- if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
10710- if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
10711-
10712- mr = info.mr;
10713- mg = info.mg;
10714- mb = info.mb;
10715- ma = info.ma;
10716- all_a = info.all_a;
10717-
10718- if (info.hsz == 12) {
10719- if (info.bpp < 24)
10720- psize = (info.offset - info.extra_read - 24) / 3;
10721- } else {
10722- if (info.bpp < 16)
10723- psize = (info.offset - info.extra_read - info.hsz) >> 2;
10724- }
10725- if (psize == 0) {
10726- // accept some number of extra bytes after the header, but if the offset points either to before
10727- // the header ends or implies a large amount of extra data, reject the file as malformed
10728- int bytes_read_so_far = s->callback_already_read + (int)(s->img_buffer - s->img_buffer_original);
10729- int header_limit = 1024; // max we actually read is below 256 bytes currently.
10730- int extra_data_limit = 256*4; // what ordinarily goes here is a palette; 256 entries*4 bytes is its max size.
10731- if (bytes_read_so_far <= 0 || bytes_read_so_far > header_limit) {
10732- return stbi__errpuc("bad header", "Corrupt BMP");
10733- }
10734- // we established that bytes_read_so_far is positive and sensible.
10735- // the first half of this test rejects offsets that are either too small positives, or
10736- // negative, and guarantees that info.offset >= bytes_read_so_far > 0. this in turn
10737- // ensures the number computed in the second half of the test can't overflow.
10738- if (info.offset < bytes_read_so_far || info.offset - bytes_read_so_far > extra_data_limit) {
10739- return stbi__errpuc("bad offset", "Corrupt BMP");
10740- } else {
10741- stbi__skip(s, info.offset - bytes_read_so_far);
10742- }
10743- }
10744-
10745- if (info.bpp == 24 && ma == 0xff000000)
10746- s->img_n = 3;
10747- else
10748- s->img_n = ma ? 4 : 3;
10749- if (req_comp && req_comp >= 3) // we can directly decode 3 or 4
10750- target = req_comp;
10751- else
10752- target = s->img_n; // if they want monochrome, we'll post-convert
10753-
10754- // sanity-check size
10755- if (!stbi__mad3sizes_valid(target, s->img_x, s->img_y, 0))
10756- return stbi__errpuc("too large", "Corrupt BMP");
10757-
10758- out = (stbi_uc *) stbi__malloc_mad3(target, s->img_x, s->img_y, 0);
10759- if (!out) return stbi__errpuc("outofmem", "Out of memory");
10760- if (info.bpp < 16) {
10761- int z=0;
10762- if (psize == 0 || psize > 256) { STBI_FREE(out); return stbi__errpuc("invalid", "Corrupt BMP"); }
10763- for (i=0; i < psize; ++i) {
10764- pal[i][2] = stbi__get8(s);
10765- pal[i][1] = stbi__get8(s);
10766- pal[i][0] = stbi__get8(s);
10767- if (info.hsz != 12) stbi__get8(s);
10768- pal[i][3] = 255;
10769- }
10770- stbi__skip(s, info.offset - info.extra_read - info.hsz - psize * (info.hsz == 12 ? 3 : 4));
10771- if (info.bpp == 1) width = (s->img_x + 7) >> 3;
10772- else if (info.bpp == 4) width = (s->img_x + 1) >> 1;
10773- else if (info.bpp == 8) width = s->img_x;
10774- else { STBI_FREE(out); return stbi__errpuc("bad bpp", "Corrupt BMP"); }
10775- pad = (-width)&3;
10776- if (info.bpp == 1) {
10777- for (j=0; j < (int) s->img_y; ++j) {
10778- int bit_offset = 7, v = stbi__get8(s);
10779- for (i=0; i < (int) s->img_x; ++i) {
10780- int color = (v>>bit_offset)&0x1;
10781- out[z++] = pal[color][0];
10782- out[z++] = pal[color][1];
10783- out[z++] = pal[color][2];
10784- if (target == 4) out[z++] = 255;
10785- if (i+1 == (int) s->img_x) break;
10786- if((--bit_offset) < 0) {
10787- bit_offset = 7;
10788- v = stbi__get8(s);
10789- }
10790- }
10791- stbi__skip(s, pad);
10792- }
10793- } else {
10794- for (j=0; j < (int) s->img_y; ++j) {
10795- for (i=0; i < (int) s->img_x; i += 2) {
10796- int v=stbi__get8(s),v2=0;
10797- if (info.bpp == 4) {
10798- v2 = v & 15;
10799- v >>= 4;
10800- }
10801- out[z++] = pal[v][0];
10802- out[z++] = pal[v][1];
10803- out[z++] = pal[v][2];
10804- if (target == 4) out[z++] = 255;
10805- if (i+1 == (int) s->img_x) break;
10806- v = (info.bpp == 8) ? stbi__get8(s) : v2;
10807- out[z++] = pal[v][0];
10808- out[z++] = pal[v][1];
10809- out[z++] = pal[v][2];
10810- if (target == 4) out[z++] = 255;
10811- }
10812- stbi__skip(s, pad);
10813- }
10814- }
10815- } else {
10816- int rshift=0,gshift=0,bshift=0,ashift=0,rcount=0,gcount=0,bcount=0,acount=0;
10817- int z = 0;
10818- int easy=0;
10819- stbi__skip(s, info.offset - info.extra_read - info.hsz);
10820- if (info.bpp == 24) width = 3 * s->img_x;
10821- else if (info.bpp == 16) width = 2*s->img_x;
10822- else /* bpp = 32 and pad = 0 */ width=0;
10823- pad = (-width) & 3;
10824- if (info.bpp == 24) {
10825- easy = 1;
10826- } else if (info.bpp == 32) {
10827- if (mb == 0xff && mg == 0xff00 && mr == 0x00ff0000 && ma == 0xff000000)
10828- easy = 2;
10829- }
10830- if (!easy) {
10831- if (!mr || !mg || !mb) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); }
10832- // right shift amt to put high bit in position #7
10833- rshift = stbi__high_bit(mr)-7; rcount = stbi__bitcount(mr);
10834- gshift = stbi__high_bit(mg)-7; gcount = stbi__bitcount(mg);
10835- bshift = stbi__high_bit(mb)-7; bcount = stbi__bitcount(mb);
10836- ashift = stbi__high_bit(ma)-7; acount = stbi__bitcount(ma);
10837- if (rcount > 8 || gcount > 8 || bcount > 8 || acount > 8) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); }
10838- }
10839- for (j=0; j < (int) s->img_y; ++j) {
10840- if (easy) {
10841- for (i=0; i < (int) s->img_x; ++i) {
10842- unsigned char a;
10843- out[z+2] = stbi__get8(s);
10844- out[z+1] = stbi__get8(s);
10845- out[z+0] = stbi__get8(s);
10846- z += 3;
10847- a = (easy == 2 ? stbi__get8(s) : 255);
10848- all_a |= a;
10849- if (target == 4) out[z++] = a;
10850- }
10851- } else {
10852- int bpp = info.bpp;
10853- for (i=0; i < (int) s->img_x; ++i) {
10854- stbi__uint32 v = (bpp == 16 ? (stbi__uint32) stbi__get16le(s) : stbi__get32le(s));
10855- unsigned int a;
10856- out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mr, rshift, rcount));
10857- out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mg, gshift, gcount));
10858- out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mb, bshift, bcount));
10859- a = (ma ? stbi__shiftsigned(v & ma, ashift, acount) : 255);
10860- all_a |= a;
10861- if (target == 4) out[z++] = STBI__BYTECAST(a);
10862- }
10863- }
10864- stbi__skip(s, pad);
10865- }
10866- }
10867-
10868- // if alpha channel is all 0s, replace with all 255s
10869- if (target == 4 && all_a == 0)
10870- for (i=4*s->img_x*s->img_y-1; i >= 0; i -= 4)
10871- out[i] = 255;
10872-
10873- if (flip_vertically) {
10874- stbi_uc t;
10875- for (j=0; j < (int) s->img_y>>1; ++j) {
10876- stbi_uc *p1 = out + j *s->img_x*target;
10877- stbi_uc *p2 = out + (s->img_y-1-j)*s->img_x*target;
10878- for (i=0; i < (int) s->img_x*target; ++i) {
10879- t = p1[i]; p1[i] = p2[i]; p2[i] = t;
10880- }
10881- }
10882- }
10883-
10884- if (req_comp && req_comp != target) {
10885- out = stbi__convert_format(out, target, req_comp, s->img_x, s->img_y);
10886- if (out == NULL) return out; // stbi__convert_format frees input on failure
10887- }
10888-
10889- *x = s->img_x;
10890- *y = s->img_y;
10891- if (comp) *comp = s->img_n;
10892- return out;
10893+static int
10894+stbi__bmp_set_mask_defaults(stbi__bmp_data *info, int compress)
10895+{
10896+ // BI_BITFIELDS specifies masks explicitly, don't override
10897+ if (compress == 3) {
10898+ return 1;
10899+ }
10900+
10901+ if (compress == 0) {
10902+ if (info->bpp == 16) {
10903+ info->mr = 31u << 10;
10904+ info->mg = 31u << 5;
10905+ info->mb = 31u << 0;
10906+ } else if (info->bpp == 32) {
10907+ info->mr = 0xffu << 16;
10908+ info->mg = 0xffu << 8;
10909+ info->mb = 0xffu << 0;
10910+ info->ma = 0xffu << 24;
10911+ info->all_a = 0; // if all_a is 0 at end, then we loaded alpha
10912+ // channel but it was all 0
10913+ } else {
10914+ // otherwise, use defaults, which is all-0
10915+ info->mr = info->mg = info->mb = info->ma = 0;
10916+ }
10917+ return 1;
10918+ }
10919+ return 0; // error
10920+}
10921+
10922+static void *
10923+stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info)
10924+{
10925+ int hsz;
10926+ if (stbi__get8(s) != 'B' || stbi__get8(s) != 'M') {
10927+ return stbi__errpuc("not BMP", "Corrupt BMP");
10928+ }
10929+ stbi__get32le(s); // discard filesize
10930+ stbi__get16le(s); // discard reserved
10931+ stbi__get16le(s); // discard reserved
10932+ info->offset = stbi__get32le(s);
10933+ info->hsz = hsz = stbi__get32le(s);
10934+ info->mr = info->mg = info->mb = info->ma = 0;
10935+ info->extra_read = 14;
10936+
10937+ if (info->offset < 0) {
10938+ return stbi__errpuc("bad BMP", "bad BMP");
10939+ }
10940+
10941+ if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108 && hsz != 124) {
10942+ return stbi__errpuc("unknown BMP", "BMP type not supported: unknown");
10943+ }
10944+ if (hsz == 12) {
10945+ s->img_x = stbi__get16le(s);
10946+ s->img_y = stbi__get16le(s);
10947+ } else {
10948+ s->img_x = stbi__get32le(s);
10949+ s->img_y = stbi__get32le(s);
10950+ }
10951+ if (stbi__get16le(s) != 1) {
10952+ return stbi__errpuc("bad BMP", "bad BMP");
10953+ }
10954+ info->bpp = stbi__get16le(s);
10955+ if (hsz != 12) {
10956+ int compress = stbi__get32le(s);
10957+ if (compress == 1 || compress == 2) {
10958+ return stbi__errpuc("BMP RLE", "BMP type not supported: RLE");
10959+ }
10960+ if (compress >= 4) {
10961+ return stbi__errpuc(
10962+ "BMP JPEG/PNG",
10963+ "BMP type not supported: unsupported compression"); // this
10964+ // includes
10965+ // PNG/JPEG
10966+ // modes
10967+ }
10968+ if (compress == 3 && info->bpp != 16 && info->bpp != 32) {
10969+ return stbi__errpuc(
10970+ "bad BMP", "bad BMP"); // bitfields requires 16 or 32 bits/pixel
10971+ }
10972+ stbi__get32le(s); // discard sizeof
10973+ stbi__get32le(s); // discard hres
10974+ stbi__get32le(s); // discard vres
10975+ stbi__get32le(s); // discard colorsused
10976+ stbi__get32le(s); // discard max important
10977+ if (hsz == 40 || hsz == 56) {
10978+ if (hsz == 56) {
10979+ stbi__get32le(s);
10980+ stbi__get32le(s);
10981+ stbi__get32le(s);
10982+ stbi__get32le(s);
10983+ }
10984+ if (info->bpp == 16 || info->bpp == 32) {
10985+ if (compress == 0) {
10986+ stbi__bmp_set_mask_defaults(info, compress);
10987+ } else if (compress == 3) {
10988+ info->mr = stbi__get32le(s);
10989+ info->mg = stbi__get32le(s);
10990+ info->mb = stbi__get32le(s);
10991+ info->extra_read += 12;
10992+ // not documented, but generated by photoshop and handled by
10993+ // mspaint
10994+ if (info->mr == info->mg && info->mg == info->mb) {
10995+ // ?!?!?
10996+ return stbi__errpuc("bad BMP", "bad BMP");
10997+ }
10998+ } else {
10999+ return stbi__errpuc("bad BMP", "bad BMP");
11000+ }
11001+ }
11002+ } else {
11003+ // V4/V5 header
11004+ int i;
11005+ if (hsz != 108 && hsz != 124) {
11006+ return stbi__errpuc("bad BMP", "bad BMP");
11007+ }
11008+ info->mr = stbi__get32le(s);
11009+ info->mg = stbi__get32le(s);
11010+ info->mb = stbi__get32le(s);
11011+ info->ma = stbi__get32le(s);
11012+ if (compress != 3) { // override mr/mg/mb unless in BI_BITFIELDS
11013+ // mode, as per docs
11014+ stbi__bmp_set_mask_defaults(info, compress);
11015+ }
11016+ stbi__get32le(s); // discard color space
11017+ for (i = 0; i < 12; ++i) {
11018+ stbi__get32le(s); // discard color space parameters
11019+ }
11020+ if (hsz == 124) {
11021+ stbi__get32le(s); // discard rendering intent
11022+ stbi__get32le(s); // discard offset of profile data
11023+ stbi__get32le(s); // discard size of profile data
11024+ stbi__get32le(s); // discard reserved
11025+ }
11026+ }
11027+ }
11028+ return (void *)1;
11029+}
11030+
11031+static void *
11032+stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp,
11033+ stbi__result_info *ri)
11034+{
11035+ stbi_uc *out;
11036+ unsigned int mr = 0, mg = 0, mb = 0, ma = 0, all_a;
11037+ stbi_uc pal[256][4];
11038+ int psize = 0, i, j, width;
11039+ int flip_vertically, pad, target;
11040+ stbi__bmp_data info;
11041+ STBI_NOTUSED(ri);
11042+
11043+ info.all_a = 255;
11044+ if (stbi__bmp_parse_header(s, &info) == NULL) {
11045+ return NULL; // error code already set
11046+ }
11047+
11048+ flip_vertically = ((int)s->img_y) > 0;
11049+ s->img_y = abs((int)s->img_y);
11050+
11051+ if (s->img_y > STBI_MAX_DIMENSIONS) {
11052+ return stbi__errpuc("too large", "Very large image (corrupt?)");
11053+ }
11054+ if (s->img_x > STBI_MAX_DIMENSIONS) {
11055+ return stbi__errpuc("too large", "Very large image (corrupt?)");
11056+ }
11057+
11058+ mr = info.mr;
11059+ mg = info.mg;
11060+ mb = info.mb;
11061+ ma = info.ma;
11062+ all_a = info.all_a;
11063+
11064+ if (info.hsz == 12) {
11065+ if (info.bpp < 24) {
11066+ psize = (info.offset - info.extra_read - 24) / 3;
11067+ }
11068+ } else {
11069+ if (info.bpp < 16) {
11070+ psize = (info.offset - info.extra_read - info.hsz) >> 2;
11071+ }
11072+ }
11073+ if (psize == 0) {
11074+ // accept some number of extra bytes after the header, but if the offset
11075+ // points either to before the header ends or implies a large amount of
11076+ // extra data, reject the file as malformed
11077+ int bytes_read_so_far = s->callback_already_read +
11078+ (int)(s->img_buffer - s->img_buffer_original);
11079+ int header_limit =
11080+ 1024; // max we actually read is below 256 bytes currently.
11081+ int extra_data_limit =
11082+ 256 * 4; // what ordinarily goes here is a palette; 256 entries*4
11083+ // bytes is its max size.
11084+ if (bytes_read_so_far <= 0 || bytes_read_so_far > header_limit) {
11085+ return stbi__errpuc("bad header", "Corrupt BMP");
11086+ }
11087+ // we established that bytes_read_so_far is positive and sensible.
11088+ // the first half of this test rejects offsets that are either too small
11089+ // positives, or negative, and guarantees that info.offset >=
11090+ // bytes_read_so_far > 0. this in turn ensures the number computed in
11091+ // the second half of the test can't overflow.
11092+ if (info.offset < bytes_read_so_far ||
11093+ info.offset - bytes_read_so_far > extra_data_limit) {
11094+ return stbi__errpuc("bad offset", "Corrupt BMP");
11095+ } else {
11096+ stbi__skip(s, info.offset - bytes_read_so_far);
11097+ }
11098+ }
11099+
11100+ if (info.bpp == 24 && ma == 0xff000000) {
11101+ s->img_n = 3;
11102+ } else {
11103+ s->img_n = ma ? 4 : 3;
11104+ }
11105+ if (req_comp && req_comp >= 3) { // we can directly decode 3 or 4
11106+ target = req_comp;
11107+ } else {
11108+ target = s->img_n; // if they want monochrome, we'll post-convert
11109+ }
11110+
11111+ // sanity-check size
11112+ if (!stbi__mad3sizes_valid(target, s->img_x, s->img_y, 0)) {
11113+ return stbi__errpuc("too large", "Corrupt BMP");
11114+ }
11115+
11116+ out = (stbi_uc *)stbi__malloc_mad3(target, s->img_x, s->img_y, 0);
11117+ if (!out) {
11118+ return stbi__errpuc("outofmem", "Out of memory");
11119+ }
11120+ if (info.bpp < 16) {
11121+ int z = 0;
11122+ if (psize == 0 || psize > 256) {
11123+ STBI_FREE(out);
11124+ return stbi__errpuc("invalid", "Corrupt BMP");
11125+ }
11126+ for (i = 0; i < psize; ++i) {
11127+ pal[i][2] = stbi__get8(s);
11128+ pal[i][1] = stbi__get8(s);
11129+ pal[i][0] = stbi__get8(s);
11130+ if (info.hsz != 12) {
11131+ stbi__get8(s);
11132+ }
11133+ pal[i][3] = 255;
11134+ }
11135+ stbi__skip(s, info.offset - info.extra_read - info.hsz -
11136+ psize * (info.hsz == 12 ? 3 : 4));
11137+ if (info.bpp == 1) {
11138+ width = (s->img_x + 7) >> 3;
11139+ } else if (info.bpp == 4) {
11140+ width = (s->img_x + 1) >> 1;
11141+ } else if (info.bpp == 8) {
11142+ width = s->img_x;
11143+ } else {
11144+ STBI_FREE(out);
11145+ return stbi__errpuc("bad bpp", "Corrupt BMP");
11146+ }
11147+ pad = (-width) & 3;
11148+ if (info.bpp == 1) {
11149+ for (j = 0; j < (int)s->img_y; ++j) {
11150+ int bit_offset = 7, v = stbi__get8(s);
11151+ for (i = 0; i < (int)s->img_x; ++i) {
11152+ int color = (v >> bit_offset) & 0x1;
11153+ out[z++] = pal[color][0];
11154+ out[z++] = pal[color][1];
11155+ out[z++] = pal[color][2];
11156+ if (target == 4) {
11157+ out[z++] = 255;
11158+ }
11159+ if (i + 1 == (int)s->img_x) {
11160+ break;
11161+ }
11162+ if ((--bit_offset) < 0) {
11163+ bit_offset = 7;
11164+ v = stbi__get8(s);
11165+ }
11166+ }
11167+ stbi__skip(s, pad);
11168+ }
11169+ } else {
11170+ for (j = 0; j < (int)s->img_y; ++j) {
11171+ for (i = 0; i < (int)s->img_x; i += 2) {
11172+ int v = stbi__get8(s), v2 = 0;
11173+ if (info.bpp == 4) {
11174+ v2 = v & 15;
11175+ v >>= 4;
11176+ }
11177+ out[z++] = pal[v][0];
11178+ out[z++] = pal[v][1];
11179+ out[z++] = pal[v][2];
11180+ if (target == 4) {
11181+ out[z++] = 255;
11182+ }
11183+ if (i + 1 == (int)s->img_x) {
11184+ break;
11185+ }
11186+ v = (info.bpp == 8) ? stbi__get8(s) : v2;
11187+ out[z++] = pal[v][0];
11188+ out[z++] = pal[v][1];
11189+ out[z++] = pal[v][2];
11190+ if (target == 4) {
11191+ out[z++] = 255;
11192+ }
11193+ }
11194+ stbi__skip(s, pad);
11195+ }
11196+ }
11197+ } else {
11198+ int rshift = 0, gshift = 0, bshift = 0, ashift = 0, rcount = 0,
11199+ gcount = 0, bcount = 0, acount = 0;
11200+ int z = 0;
11201+ int easy = 0;
11202+ stbi__skip(s, info.offset - info.extra_read - info.hsz);
11203+ if (info.bpp == 24) {
11204+ width = 3 * s->img_x;
11205+ } else if (info.bpp == 16) {
11206+ width = 2 * s->img_x;
11207+ } else { /* bpp = 32 and pad = 0 */
11208+ width = 0;
11209+ }
11210+ pad = (-width) & 3;
11211+ if (info.bpp == 24) {
11212+ easy = 1;
11213+ } else if (info.bpp == 32) {
11214+ if (mb == 0xff && mg == 0xff00 && mr == 0x00ff0000 &&
11215+ ma == 0xff000000) {
11216+ easy = 2;
11217+ }
11218+ }
11219+ if (!easy) {
11220+ if (!mr || !mg || !mb) {
11221+ STBI_FREE(out);
11222+ return stbi__errpuc("bad masks", "Corrupt BMP");
11223+ }
11224+ // right shift amt to put high bit in position #7
11225+ rshift = stbi__high_bit(mr) - 7;
11226+ rcount = stbi__bitcount(mr);
11227+ gshift = stbi__high_bit(mg) - 7;
11228+ gcount = stbi__bitcount(mg);
11229+ bshift = stbi__high_bit(mb) - 7;
11230+ bcount = stbi__bitcount(mb);
11231+ ashift = stbi__high_bit(ma) - 7;
11232+ acount = stbi__bitcount(ma);
11233+ if (rcount > 8 || gcount > 8 || bcount > 8 || acount > 8) {
11234+ STBI_FREE(out);
11235+ return stbi__errpuc("bad masks", "Corrupt BMP");
11236+ }
11237+ }
11238+ for (j = 0; j < (int)s->img_y; ++j) {
11239+ if (easy) {
11240+ for (i = 0; i < (int)s->img_x; ++i) {
11241+ unsigned char a;
11242+ out[z + 2] = stbi__get8(s);
11243+ out[z + 1] = stbi__get8(s);
11244+ out[z + 0] = stbi__get8(s);
11245+ z += 3;
11246+ a = (easy == 2 ? stbi__get8(s) : 255);
11247+ all_a |= a;
11248+ if (target == 4) {
11249+ out[z++] = a;
11250+ }
11251+ }
11252+ } else {
11253+ int bpp = info.bpp;
11254+ for (i = 0; i < (int)s->img_x; ++i) {
11255+ stbi__uint32 v = (bpp == 16 ? (stbi__uint32)stbi__get16le(s)
11256+ : stbi__get32le(s));
11257+ unsigned int a;
11258+ out[z++] = STBI__BYTECAST(
11259+ stbi__shiftsigned(v & mr, rshift, rcount));
11260+ out[z++] = STBI__BYTECAST(
11261+ stbi__shiftsigned(v & mg, gshift, gcount));
11262+ out[z++] = STBI__BYTECAST(
11263+ stbi__shiftsigned(v & mb, bshift, bcount));
11264+ a = (ma ? stbi__shiftsigned(v & ma, ashift, acount) : 255);
11265+ all_a |= a;
11266+ if (target == 4) {
11267+ out[z++] = STBI__BYTECAST(a);
11268+ }
11269+ }
11270+ }
11271+ stbi__skip(s, pad);
11272+ }
11273+ }
11274+
11275+ // if alpha channel is all 0s, replace with all 255s
11276+ if (target == 4 && all_a == 0) {
11277+ for (i = 4 * s->img_x * s->img_y - 1; i >= 0; i -= 4) {
11278+ out[i] = 255;
11279+ }
11280+ }
11281+
11282+ if (flip_vertically) {
11283+ stbi_uc t;
11284+ for (j = 0; j < (int)s->img_y >> 1; ++j) {
11285+ stbi_uc *p1 = out + j * s->img_x * target;
11286+ stbi_uc *p2 = out + (s->img_y - 1 - j) * s->img_x * target;
11287+ for (i = 0; i < (int)s->img_x * target; ++i) {
11288+ t = p1[i];
11289+ p1[i] = p2[i];
11290+ p2[i] = t;
11291+ }
11292+ }
11293+ }
11294+
11295+ if (req_comp && req_comp != target) {
11296+ out = stbi__convert_format(out, target, req_comp, s->img_x, s->img_y);
11297+ if (out == NULL) {
11298+ return out; // stbi__convert_format frees input on failure
11299+ }
11300+ }
11301+
11302+ *x = s->img_x;
11303+ *y = s->img_y;
11304+ if (comp) {
11305+ *comp = s->img_n;
11306+ }
11307+ return out;
11308 }
11309 #endif
11310
11311@@ -5736,592 +7221,690 @@ static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req
11312 // by Jonathan Dummer
11313 #ifndef STBI_NO_TGA
11314 // returns STBI_rgb or whatever, 0 on error
11315-static int stbi__tga_get_comp(int bits_per_pixel, int is_grey, int* is_rgb16)
11316-{
11317- // only RGB or RGBA (incl. 16bit) or grey allowed
11318- if (is_rgb16) *is_rgb16 = 0;
11319- switch(bits_per_pixel) {
11320- case 8: return STBI_grey;
11321- case 16: if(is_grey) return STBI_grey_alpha;
11322- // fallthrough
11323- case 15: if(is_rgb16) *is_rgb16 = 1;
11324- return STBI_rgb;
11325- case 24: // fallthrough
11326- case 32: return bits_per_pixel/8;
11327- default: return 0;
11328- }
11329-}
11330-
11331-static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp)
11332-{
11333- int tga_w, tga_h, tga_comp, tga_image_type, tga_bits_per_pixel, tga_colormap_bpp;
11334- int sz, tga_colormap_type;
11335- stbi__get8(s); // discard Offset
11336- tga_colormap_type = stbi__get8(s); // colormap type
11337- if( tga_colormap_type > 1 ) {
11338- stbi__rewind(s);
11339- return 0; // only RGB or indexed allowed
11340- }
11341- tga_image_type = stbi__get8(s); // image type
11342- if ( tga_colormap_type == 1 ) { // colormapped (paletted) image
11343- if (tga_image_type != 1 && tga_image_type != 9) {
11344- stbi__rewind(s);
11345- return 0;
11346- }
11347- stbi__skip(s,4); // skip index of first colormap entry and number of entries
11348- sz = stbi__get8(s); // check bits per palette color entry
11349- if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) {
11350- stbi__rewind(s);
11351- return 0;
11352- }
11353- stbi__skip(s,4); // skip image x and y origin
11354- tga_colormap_bpp = sz;
11355- } else { // "normal" image w/o colormap - only RGB or grey allowed, +/- RLE
11356- if ( (tga_image_type != 2) && (tga_image_type != 3) && (tga_image_type != 10) && (tga_image_type != 11) ) {
11357- stbi__rewind(s);
11358- return 0; // only RGB or grey allowed, +/- RLE
11359- }
11360- stbi__skip(s,9); // skip colormap specification and image x/y origin
11361- tga_colormap_bpp = 0;
11362- }
11363- tga_w = stbi__get16le(s);
11364- if( tga_w < 1 ) {
11365- stbi__rewind(s);
11366- return 0; // test width
11367- }
11368- tga_h = stbi__get16le(s);
11369- if( tga_h < 1 ) {
11370- stbi__rewind(s);
11371- return 0; // test height
11372- }
11373- tga_bits_per_pixel = stbi__get8(s); // bits per pixel
11374- stbi__get8(s); // ignore alpha bits
11375- if (tga_colormap_bpp != 0) {
11376- if((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16)) {
11377- // when using a colormap, tga_bits_per_pixel is the size of the indexes
11378- // I don't think anything but 8 or 16bit indexes makes sense
11379- stbi__rewind(s);
11380- return 0;
11381- }
11382- tga_comp = stbi__tga_get_comp(tga_colormap_bpp, 0, NULL);
11383- } else {
11384- tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3) || (tga_image_type == 11), NULL);
11385- }
11386- if(!tga_comp) {
11387- stbi__rewind(s);
11388- return 0;
11389- }
11390- if (x) *x = tga_w;
11391- if (y) *y = tga_h;
11392- if (comp) *comp = tga_comp;
11393- return 1; // seems to have passed everything
11394-}
11395-
11396-static int stbi__tga_test(stbi__context *s)
11397-{
11398- int res = 0;
11399- int sz, tga_color_type;
11400- stbi__get8(s); // discard Offset
11401- tga_color_type = stbi__get8(s); // color type
11402- if ( tga_color_type > 1 ) goto errorEnd; // only RGB or indexed allowed
11403- sz = stbi__get8(s); // image type
11404- if ( tga_color_type == 1 ) { // colormapped (paletted) image
11405- if (sz != 1 && sz != 9) goto errorEnd; // colortype 1 demands image type 1 or 9
11406- stbi__skip(s,4); // skip index of first colormap entry and number of entries
11407- sz = stbi__get8(s); // check bits per palette color entry
11408- if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd;
11409- stbi__skip(s,4); // skip image x and y origin
11410- } else { // "normal" image w/o colormap
11411- if ( (sz != 2) && (sz != 3) && (sz != 10) && (sz != 11) ) goto errorEnd; // only RGB or grey allowed, +/- RLE
11412- stbi__skip(s,9); // skip colormap specification and image x/y origin
11413- }
11414- if ( stbi__get16le(s) < 1 ) goto errorEnd; // test width
11415- if ( stbi__get16le(s) < 1 ) goto errorEnd; // test height
11416- sz = stbi__get8(s); // bits per pixel
11417- if ( (tga_color_type == 1) && (sz != 8) && (sz != 16) ) goto errorEnd; // for colormapped images, bpp is size of an index
11418- if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd;
11419-
11420- res = 1; // if we got this far, everything's good and we can return 1 instead of 0
11421+static int
11422+stbi__tga_get_comp(int bits_per_pixel, int is_grey, int *is_rgb16)
11423+{
11424+ // only RGB or RGBA (incl. 16bit) or grey allowed
11425+ if (is_rgb16) {
11426+ *is_rgb16 = 0;
11427+ }
11428+ switch (bits_per_pixel) {
11429+ case 8:
11430+ return STBI_grey;
11431+ case 16:
11432+ if (is_grey) {
11433+ return STBI_grey_alpha;
11434+ }
11435+ // fallthrough
11436+ case 15:
11437+ if (is_rgb16) {
11438+ *is_rgb16 = 1;
11439+ }
11440+ return STBI_rgb;
11441+ case 24: // fallthrough
11442+ case 32:
11443+ return bits_per_pixel / 8;
11444+ default:
11445+ return 0;
11446+ }
11447+}
11448+
11449+static int
11450+stbi__tga_info(stbi__context *s, int *x, int *y, int *comp)
11451+{
11452+ int tga_w, tga_h, tga_comp, tga_image_type, tga_bits_per_pixel,
11453+ tga_colormap_bpp;
11454+ int sz, tga_colormap_type;
11455+ stbi__get8(s); // discard Offset
11456+ tga_colormap_type = stbi__get8(s); // colormap type
11457+ if (tga_colormap_type > 1) {
11458+ stbi__rewind(s);
11459+ return 0; // only RGB or indexed allowed
11460+ }
11461+ tga_image_type = stbi__get8(s); // image type
11462+ if (tga_colormap_type == 1) { // colormapped (paletted) image
11463+ if (tga_image_type != 1 && tga_image_type != 9) {
11464+ stbi__rewind(s);
11465+ return 0;
11466+ }
11467+ stbi__skip(
11468+ s, 4); // skip index of first colormap entry and number of entries
11469+ sz = stbi__get8(s); // check bits per palette color entry
11470+ if ((sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32)) {
11471+ stbi__rewind(s);
11472+ return 0;
11473+ }
11474+ stbi__skip(s, 4); // skip image x and y origin
11475+ tga_colormap_bpp = sz;
11476+ } else { // "normal" image w/o colormap - only RGB or grey allowed, +/- RLE
11477+ if ((tga_image_type != 2) && (tga_image_type != 3) &&
11478+ (tga_image_type != 10) && (tga_image_type != 11)) {
11479+ stbi__rewind(s);
11480+ return 0; // only RGB or grey allowed, +/- RLE
11481+ }
11482+ stbi__skip(s, 9); // skip colormap specification and image x/y origin
11483+ tga_colormap_bpp = 0;
11484+ }
11485+ tga_w = stbi__get16le(s);
11486+ if (tga_w < 1) {
11487+ stbi__rewind(s);
11488+ return 0; // test width
11489+ }
11490+ tga_h = stbi__get16le(s);
11491+ if (tga_h < 1) {
11492+ stbi__rewind(s);
11493+ return 0; // test height
11494+ }
11495+ tga_bits_per_pixel = stbi__get8(s); // bits per pixel
11496+ stbi__get8(s); // ignore alpha bits
11497+ if (tga_colormap_bpp != 0) {
11498+ if ((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16)) {
11499+ // when using a colormap, tga_bits_per_pixel is the size of the
11500+ // indexes I don't think anything but 8 or 16bit indexes makes sense
11501+ stbi__rewind(s);
11502+ return 0;
11503+ }
11504+ tga_comp = stbi__tga_get_comp(tga_colormap_bpp, 0, NULL);
11505+ } else {
11506+ tga_comp = stbi__tga_get_comp(
11507+ tga_bits_per_pixel, (tga_image_type == 3) || (tga_image_type == 11),
11508+ NULL);
11509+ }
11510+ if (!tga_comp) {
11511+ stbi__rewind(s);
11512+ return 0;
11513+ }
11514+ if (x) {
11515+ *x = tga_w;
11516+ }
11517+ if (y) {
11518+ *y = tga_h;
11519+ }
11520+ if (comp) {
11521+ *comp = tga_comp;
11522+ }
11523+ return 1; // seems to have passed everything
11524+}
11525+
11526+static int
11527+stbi__tga_test(stbi__context *s)
11528+{
11529+ int res = 0;
11530+ int sz, tga_color_type;
11531+ stbi__get8(s); // discard Offset
11532+ tga_color_type = stbi__get8(s); // color type
11533+ if (tga_color_type > 1) {
11534+ goto errorEnd; // only RGB or indexed allowed
11535+ }
11536+ sz = stbi__get8(s); // image type
11537+ if (tga_color_type == 1) { // colormapped (paletted) image
11538+ if (sz != 1 && sz != 9) {
11539+ goto errorEnd; // colortype 1 demands image type 1 or 9
11540+ }
11541+ stbi__skip(
11542+ s, 4); // skip index of first colormap entry and number of entries
11543+ sz = stbi__get8(s); // check bits per palette color entry
11544+ if ((sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32)) {
11545+ goto errorEnd;
11546+ }
11547+ stbi__skip(s, 4); // skip image x and y origin
11548+ } else { // "normal" image w/o colormap
11549+ if ((sz != 2) && (sz != 3) && (sz != 10) && (sz != 11)) {
11550+ goto errorEnd; // only RGB or grey allowed, +/- RLE
11551+ }
11552+ stbi__skip(s, 9); // skip colormap specification and image x/y origin
11553+ }
11554+ if (stbi__get16le(s) < 1) {
11555+ goto errorEnd; // test width
11556+ }
11557+ if (stbi__get16le(s) < 1) {
11558+ goto errorEnd; // test height
11559+ }
11560+ sz = stbi__get8(s); // bits per pixel
11561+ if ((tga_color_type == 1) && (sz != 8) && (sz != 16)) {
11562+ goto errorEnd; // for colormapped images, bpp is size of an index
11563+ }
11564+ if ((sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32)) {
11565+ goto errorEnd;
11566+ }
11567+
11568+ res = 1; // if we got this far, everything's good and we can return 1
11569+ // instead of 0
11570
11571 errorEnd:
11572- stbi__rewind(s);
11573- return res;
11574+ stbi__rewind(s);
11575+ return res;
11576 }
11577
11578 // read 16bit value and convert to 24bit RGB
11579-static void stbi__tga_read_rgb16(stbi__context *s, stbi_uc* out)
11580-{
11581- stbi__uint16 px = (stbi__uint16)stbi__get16le(s);
11582- stbi__uint16 fiveBitMask = 31;
11583- // we have 3 channels with 5bits each
11584- int r = (px >> 10) & fiveBitMask;
11585- int g = (px >> 5) & fiveBitMask;
11586- int b = px & fiveBitMask;
11587- // Note that this saves the data in RGB(A) order, so it doesn't need to be swapped later
11588- out[0] = (stbi_uc)((r * 255)/31);
11589- out[1] = (stbi_uc)((g * 255)/31);
11590- out[2] = (stbi_uc)((b * 255)/31);
11591-
11592- // some people claim that the most significant bit might be used for alpha
11593- // (possibly if an alpha-bit is set in the "image descriptor byte")
11594- // but that only made 16bit test images completely translucent..
11595- // so let's treat all 15 and 16bit TGAs as RGB with no alpha.
11596-}
11597-
11598-static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
11599-{
11600- // read in the TGA header stuff
11601- int tga_offset = stbi__get8(s);
11602- int tga_indexed = stbi__get8(s);
11603- int tga_image_type = stbi__get8(s);
11604- int tga_is_RLE = 0;
11605- int tga_palette_start = stbi__get16le(s);
11606- int tga_palette_len = stbi__get16le(s);
11607- int tga_palette_bits = stbi__get8(s);
11608- int tga_x_origin = stbi__get16le(s);
11609- int tga_y_origin = stbi__get16le(s);
11610- int tga_width = stbi__get16le(s);
11611- int tga_height = stbi__get16le(s);
11612- int tga_bits_per_pixel = stbi__get8(s);
11613- int tga_comp, tga_rgb16=0;
11614- int tga_inverted = stbi__get8(s);
11615- // int tga_alpha_bits = tga_inverted & 15; // the 4 lowest bits - unused (useless?)
11616- // image data
11617- unsigned char *tga_data;
11618- unsigned char *tga_palette = NULL;
11619- int i, j;
11620- unsigned char raw_data[4] = {0};
11621- int RLE_count = 0;
11622- int RLE_repeating = 0;
11623- int read_next_pixel = 1;
11624- STBI_NOTUSED(ri);
11625- STBI_NOTUSED(tga_x_origin); // @TODO
11626- STBI_NOTUSED(tga_y_origin); // @TODO
11627-
11628- if (tga_height > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
11629- if (tga_width > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
11630-
11631- // do a tiny bit of precessing
11632- if ( tga_image_type >= 8 )
11633- {
11634- tga_image_type -= 8;
11635- tga_is_RLE = 1;
11636- }
11637- tga_inverted = 1 - ((tga_inverted >> 5) & 1);
11638-
11639- // If I'm paletted, then I'll use the number of bits from the palette
11640- if ( tga_indexed ) tga_comp = stbi__tga_get_comp(tga_palette_bits, 0, &tga_rgb16);
11641- else tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3), &tga_rgb16);
11642-
11643- if(!tga_comp) // shouldn't really happen, stbi__tga_test() should have ensured basic consistency
11644- return stbi__errpuc("bad format", "Can't find out TGA pixelformat");
11645-
11646- // tga info
11647- *x = tga_width;
11648- *y = tga_height;
11649- if (comp) *comp = tga_comp;
11650-
11651- if (!stbi__mad3sizes_valid(tga_width, tga_height, tga_comp, 0))
11652- return stbi__errpuc("too large", "Corrupt TGA");
11653-
11654- tga_data = (unsigned char*)stbi__malloc_mad3(tga_width, tga_height, tga_comp, 0);
11655- if (!tga_data) return stbi__errpuc("outofmem", "Out of memory");
11656-
11657- // skip to the data's starting position (offset usually = 0)
11658- stbi__skip(s, tga_offset );
11659-
11660- if ( !tga_indexed && !tga_is_RLE && !tga_rgb16 ) {
11661- for (i=0; i < tga_height; ++i) {
11662- int row = tga_inverted ? tga_height -i - 1 : i;
11663- stbi_uc *tga_row = tga_data + row*tga_width*tga_comp;
11664- stbi__getn(s, tga_row, tga_width * tga_comp);
11665- }
11666- } else {
11667- // do I need to load a palette?
11668- if ( tga_indexed)
11669- {
11670- if (tga_palette_len == 0) { /* you have to have at least one entry! */
11671- STBI_FREE(tga_data);
11672- return stbi__errpuc("bad palette", "Corrupt TGA");
11673- }
11674-
11675- // any data to skip? (offset usually = 0)
11676- stbi__skip(s, tga_palette_start );
11677- // load the palette
11678- tga_palette = (unsigned char*)stbi__malloc_mad2(tga_palette_len, tga_comp, 0);
11679- if (!tga_palette) {
11680- STBI_FREE(tga_data);
11681- return stbi__errpuc("outofmem", "Out of memory");
11682- }
11683- if (tga_rgb16) {
11684- stbi_uc *pal_entry = tga_palette;
11685- STBI_ASSERT(tga_comp == STBI_rgb);
11686- for (i=0; i < tga_palette_len; ++i) {
11687- stbi__tga_read_rgb16(s, pal_entry);
11688- pal_entry += tga_comp;
11689- }
11690- } else if (!stbi__getn(s, tga_palette, tga_palette_len * tga_comp)) {
11691- STBI_FREE(tga_data);
11692- STBI_FREE(tga_palette);
11693- return stbi__errpuc("bad palette", "Corrupt TGA");
11694- }
11695- }
11696- // load the data
11697- for (i=0; i < tga_width * tga_height; ++i)
11698- {
11699- // if I'm in RLE mode, do I need to get a RLE stbi__pngchunk?
11700- if ( tga_is_RLE )
11701- {
11702- if ( RLE_count == 0 )
11703- {
11704- // yep, get the next byte as a RLE command
11705- int RLE_cmd = stbi__get8(s);
11706- RLE_count = 1 + (RLE_cmd & 127);
11707- RLE_repeating = RLE_cmd >> 7;
11708- read_next_pixel = 1;
11709- } else if ( !RLE_repeating )
11710- {
11711- read_next_pixel = 1;
11712- }
11713- } else
11714- {
11715- read_next_pixel = 1;
11716- }
11717- // OK, if I need to read a pixel, do it now
11718- if ( read_next_pixel )
11719- {
11720- // load however much data we did have
11721- if ( tga_indexed )
11722- {
11723- // read in index, then perform the lookup
11724- int pal_idx = (tga_bits_per_pixel == 8) ? stbi__get8(s) : stbi__get16le(s);
11725- if ( pal_idx >= tga_palette_len ) {
11726- // invalid index
11727- pal_idx = 0;
11728- }
11729- pal_idx *= tga_comp;
11730- for (j = 0; j < tga_comp; ++j) {
11731- raw_data[j] = tga_palette[pal_idx+j];
11732- }
11733- } else if(tga_rgb16) {
11734- STBI_ASSERT(tga_comp == STBI_rgb);
11735- stbi__tga_read_rgb16(s, raw_data);
11736- } else {
11737- // read in the data raw
11738- for (j = 0; j < tga_comp; ++j) {
11739- raw_data[j] = stbi__get8(s);
11740- }
11741- }
11742- // clear the reading flag for the next pixel
11743- read_next_pixel = 0;
11744- } // end of reading a pixel
11745-
11746- // copy data
11747- for (j = 0; j < tga_comp; ++j)
11748- tga_data[i*tga_comp+j] = raw_data[j];
11749-
11750- // in case we're in RLE mode, keep counting down
11751- --RLE_count;
11752- }
11753- // do I need to invert the image?
11754- if ( tga_inverted )
11755- {
11756- for (j = 0; j*2 < tga_height; ++j)
11757- {
11758- int index1 = j * tga_width * tga_comp;
11759- int index2 = (tga_height - 1 - j) * tga_width * tga_comp;
11760- for (i = tga_width * tga_comp; i > 0; --i)
11761- {
11762- unsigned char temp = tga_data[index1];
11763- tga_data[index1] = tga_data[index2];
11764- tga_data[index2] = temp;
11765- ++index1;
11766- ++index2;
11767- }
11768- }
11769- }
11770- // clear my palette, if I had one
11771- if ( tga_palette != NULL )
11772- {
11773- STBI_FREE( tga_palette );
11774- }
11775- }
11776-
11777- // swap RGB - if the source data was RGB16, it already is in the right order
11778- if (tga_comp >= 3 && !tga_rgb16)
11779- {
11780- unsigned char* tga_pixel = tga_data;
11781- for (i=0; i < tga_width * tga_height; ++i)
11782- {
11783- unsigned char temp = tga_pixel[0];
11784- tga_pixel[0] = tga_pixel[2];
11785- tga_pixel[2] = temp;
11786- tga_pixel += tga_comp;
11787- }
11788- }
11789-
11790- // convert to target component count
11791- if (req_comp && req_comp != tga_comp)
11792- tga_data = stbi__convert_format(tga_data, tga_comp, req_comp, tga_width, tga_height);
11793-
11794- // the things I do to get rid of an error message, and yet keep
11795- // Microsoft's C compilers happy... [8^(
11796- tga_palette_start = tga_palette_len = tga_palette_bits =
11797- tga_x_origin = tga_y_origin = 0;
11798- STBI_NOTUSED(tga_palette_start);
11799- // OK, done
11800- return tga_data;
11801+static void
11802+stbi__tga_read_rgb16(stbi__context *s, stbi_uc *out)
11803+{
11804+ stbi__uint16 px = (stbi__uint16)stbi__get16le(s);
11805+ stbi__uint16 fiveBitMask = 31;
11806+ // we have 3 channels with 5bits each
11807+ int r = (px >> 10) & fiveBitMask;
11808+ int g = (px >> 5) & fiveBitMask;
11809+ int b = px & fiveBitMask;
11810+ // Note that this saves the data in RGB(A) order, so it doesn't need to be
11811+ // swapped later
11812+ out[0] = (stbi_uc)((r * 255) / 31);
11813+ out[1] = (stbi_uc)((g * 255) / 31);
11814+ out[2] = (stbi_uc)((b * 255) / 31);
11815+
11816+ // some people claim that the most significant bit might be used for alpha
11817+ // (possibly if an alpha-bit is set in the "image descriptor byte")
11818+ // but that only made 16bit test images completely translucent..
11819+ // so let's treat all 15 and 16bit TGAs as RGB with no alpha.
11820+}
11821+
11822+static void *
11823+stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp,
11824+ stbi__result_info *ri)
11825+{
11826+ // read in the TGA header stuff
11827+ int tga_offset = stbi__get8(s);
11828+ int tga_indexed = stbi__get8(s);
11829+ int tga_image_type = stbi__get8(s);
11830+ int tga_is_RLE = 0;
11831+ int tga_palette_start = stbi__get16le(s);
11832+ int tga_palette_len = stbi__get16le(s);
11833+ int tga_palette_bits = stbi__get8(s);
11834+ int tga_x_origin = stbi__get16le(s);
11835+ int tga_y_origin = stbi__get16le(s);
11836+ int tga_width = stbi__get16le(s);
11837+ int tga_height = stbi__get16le(s);
11838+ int tga_bits_per_pixel = stbi__get8(s);
11839+ int tga_comp, tga_rgb16 = 0;
11840+ int tga_inverted = stbi__get8(s);
11841+ // int tga_alpha_bits = tga_inverted & 15; // the 4 lowest bits - unused
11842+ // (useless?)
11843+ // image data
11844+ unsigned char *tga_data;
11845+ unsigned char *tga_palette = NULL;
11846+ int i, j;
11847+ unsigned char raw_data[4] = {0};
11848+ int RLE_count = 0;
11849+ int RLE_repeating = 0;
11850+ int read_next_pixel = 1;
11851+ STBI_NOTUSED(ri);
11852+ STBI_NOTUSED(tga_x_origin); // @TODO
11853+ STBI_NOTUSED(tga_y_origin); // @TODO
11854+
11855+ if (tga_height > STBI_MAX_DIMENSIONS) {
11856+ return stbi__errpuc("too large", "Very large image (corrupt?)");
11857+ }
11858+ if (tga_width > STBI_MAX_DIMENSIONS) {
11859+ return stbi__errpuc("too large", "Very large image (corrupt?)");
11860+ }
11861+
11862+ // do a tiny bit of precessing
11863+ if (tga_image_type >= 8) {
11864+ tga_image_type -= 8;
11865+ tga_is_RLE = 1;
11866+ }
11867+ tga_inverted = 1 - ((tga_inverted >> 5) & 1);
11868+
11869+ // If I'm paletted, then I'll use the number of bits from the palette
11870+ if (tga_indexed) {
11871+ tga_comp = stbi__tga_get_comp(tga_palette_bits, 0, &tga_rgb16);
11872+ } else {
11873+ tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3),
11874+ &tga_rgb16);
11875+ }
11876+
11877+ if (!tga_comp) { // shouldn't really happen, stbi__tga_test() should have
11878+ // ensured basic consistency
11879+ return stbi__errpuc("bad format", "Can't find out TGA pixelformat");
11880+ }
11881+
11882+ // tga info
11883+ *x = tga_width;
11884+ *y = tga_height;
11885+ if (comp) {
11886+ *comp = tga_comp;
11887+ }
11888+
11889+ if (!stbi__mad3sizes_valid(tga_width, tga_height, tga_comp, 0)) {
11890+ return stbi__errpuc("too large", "Corrupt TGA");
11891+ }
11892+
11893+ tga_data =
11894+ (unsigned char *)stbi__malloc_mad3(tga_width, tga_height, tga_comp, 0);
11895+ if (!tga_data) {
11896+ return stbi__errpuc("outofmem", "Out of memory");
11897+ }
11898+
11899+ // skip to the data's starting position (offset usually = 0)
11900+ stbi__skip(s, tga_offset);
11901+
11902+ if (!tga_indexed && !tga_is_RLE && !tga_rgb16) {
11903+ for (i = 0; i < tga_height; ++i) {
11904+ int row = tga_inverted ? tga_height - i - 1 : i;
11905+ stbi_uc *tga_row = tga_data + row * tga_width * tga_comp;
11906+ stbi__getn(s, tga_row, tga_width * tga_comp);
11907+ }
11908+ } else {
11909+ // do I need to load a palette?
11910+ if (tga_indexed) {
11911+ if (tga_palette_len ==
11912+ 0) { /* you have to have at least one entry! */
11913+ STBI_FREE(tga_data);
11914+ return stbi__errpuc("bad palette", "Corrupt TGA");
11915+ }
11916+
11917+ // any data to skip? (offset usually = 0)
11918+ stbi__skip(s, tga_palette_start);
11919+ // load the palette
11920+ tga_palette = (unsigned char *)stbi__malloc_mad2(tga_palette_len,
11921+ tga_comp, 0);
11922+ if (!tga_palette) {
11923+ STBI_FREE(tga_data);
11924+ return stbi__errpuc("outofmem", "Out of memory");
11925+ }
11926+ if (tga_rgb16) {
11927+ stbi_uc *pal_entry = tga_palette;
11928+ STBI_ASSERT(tga_comp == STBI_rgb);
11929+ for (i = 0; i < tga_palette_len; ++i) {
11930+ stbi__tga_read_rgb16(s, pal_entry);
11931+ pal_entry += tga_comp;
11932+ }
11933+ } else if (!stbi__getn(s, tga_palette,
11934+ tga_palette_len * tga_comp)) {
11935+ STBI_FREE(tga_data);
11936+ STBI_FREE(tga_palette);
11937+ return stbi__errpuc("bad palette", "Corrupt TGA");
11938+ }
11939+ }
11940+ // load the data
11941+ for (i = 0; i < tga_width * tga_height; ++i) {
11942+ // if I'm in RLE mode, do I need to get a RLE stbi__pngchunk?
11943+ if (tga_is_RLE) {
11944+ if (RLE_count == 0) {
11945+ // yep, get the next byte as a RLE command
11946+ int RLE_cmd = stbi__get8(s);
11947+ RLE_count = 1 + (RLE_cmd & 127);
11948+ RLE_repeating = RLE_cmd >> 7;
11949+ read_next_pixel = 1;
11950+ } else if (!RLE_repeating) {
11951+ read_next_pixel = 1;
11952+ }
11953+ } else {
11954+ read_next_pixel = 1;
11955+ }
11956+ // OK, if I need to read a pixel, do it now
11957+ if (read_next_pixel) {
11958+ // load however much data we did have
11959+ if (tga_indexed) {
11960+ // read in index, then perform the lookup
11961+ int pal_idx = (tga_bits_per_pixel == 8) ? stbi__get8(s)
11962+ : stbi__get16le(s);
11963+ if (pal_idx >= tga_palette_len) {
11964+ // invalid index
11965+ pal_idx = 0;
11966+ }
11967+ pal_idx *= tga_comp;
11968+ for (j = 0; j < tga_comp; ++j) {
11969+ raw_data[j] = tga_palette[pal_idx + j];
11970+ }
11971+ } else if (tga_rgb16) {
11972+ STBI_ASSERT(tga_comp == STBI_rgb);
11973+ stbi__tga_read_rgb16(s, raw_data);
11974+ } else {
11975+ // read in the data raw
11976+ for (j = 0; j < tga_comp; ++j) {
11977+ raw_data[j] = stbi__get8(s);
11978+ }
11979+ }
11980+ // clear the reading flag for the next pixel
11981+ read_next_pixel = 0;
11982+ } // end of reading a pixel
11983+
11984+ // copy data
11985+ for (j = 0; j < tga_comp; ++j) {
11986+ tga_data[i * tga_comp + j] = raw_data[j];
11987+ }
11988+
11989+ // in case we're in RLE mode, keep counting down
11990+ --RLE_count;
11991+ }
11992+ // do I need to invert the image?
11993+ if (tga_inverted) {
11994+ for (j = 0; j * 2 < tga_height; ++j) {
11995+ int index1 = j * tga_width * tga_comp;
11996+ int index2 = (tga_height - 1 - j) * tga_width * tga_comp;
11997+ for (i = tga_width * tga_comp; i > 0; --i) {
11998+ unsigned char temp = tga_data[index1];
11999+ tga_data[index1] = tga_data[index2];
12000+ tga_data[index2] = temp;
12001+ ++index1;
12002+ ++index2;
12003+ }
12004+ }
12005+ }
12006+ // clear my palette, if I had one
12007+ if (tga_palette != NULL) {
12008+ STBI_FREE(tga_palette);
12009+ }
12010+ }
12011+
12012+ // swap RGB - if the source data was RGB16, it already is in the right order
12013+ if (tga_comp >= 3 && !tga_rgb16) {
12014+ unsigned char *tga_pixel = tga_data;
12015+ for (i = 0; i < tga_width * tga_height; ++i) {
12016+ unsigned char temp = tga_pixel[0];
12017+ tga_pixel[0] = tga_pixel[2];
12018+ tga_pixel[2] = temp;
12019+ tga_pixel += tga_comp;
12020+ }
12021+ }
12022+
12023+ // convert to target component count
12024+ if (req_comp && req_comp != tga_comp) {
12025+ tga_data = stbi__convert_format(tga_data, tga_comp, req_comp, tga_width,
12026+ tga_height);
12027+ }
12028+
12029+ // the things I do to get rid of an error message, and yet keep
12030+ // Microsoft's C compilers happy... [8^(
12031+ tga_palette_start = tga_palette_len = tga_palette_bits = tga_x_origin =
12032+ tga_y_origin = 0;
12033+ STBI_NOTUSED(tga_palette_start);
12034+ // OK, done
12035+ return tga_data;
12036 }
12037 #endif
12038
12039 // *************************************************************************************************
12040-// Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicolas Schulz, tweaked by STB
12041+// Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicolas Schulz,
12042+// tweaked by STB
12043
12044 #ifndef STBI_NO_PSD
12045-static int stbi__psd_test(stbi__context *s)
12046-{
12047- int r = (stbi__get32be(s) == 0x38425053);
12048- stbi__rewind(s);
12049- return r;
12050-}
12051-
12052-static int stbi__psd_decode_rle(stbi__context *s, stbi_uc *p, int pixelCount)
12053-{
12054- int count, nleft, len;
12055-
12056- count = 0;
12057- while ((nleft = pixelCount - count) > 0) {
12058- len = stbi__get8(s);
12059- if (len == 128) {
12060- // No-op.
12061- } else if (len < 128) {
12062- // Copy next len+1 bytes literally.
12063- len++;
12064- if (len > nleft) return 0; // corrupt data
12065- count += len;
12066- while (len) {
12067- *p = stbi__get8(s);
12068- p += 4;
12069- len--;
12070- }
12071- } else if (len > 128) {
12072- stbi_uc val;
12073- // Next -len+1 bytes in the dest are replicated from next source byte.
12074- // (Interpret len as a negative 8-bit int.)
12075- len = 257 - len;
12076- if (len > nleft) return 0; // corrupt data
12077- val = stbi__get8(s);
12078- count += len;
12079- while (len) {
12080- *p = val;
12081- p += 4;
12082- len--;
12083- }
12084- }
12085- }
12086-
12087- return 1;
12088-}
12089-
12090-static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc)
12091-{
12092- int pixelCount;
12093- int channelCount, compression;
12094- int channel, i;
12095- int bitdepth;
12096- int w,h;
12097- stbi_uc *out;
12098- STBI_NOTUSED(ri);
12099-
12100- // Check identifier
12101- if (stbi__get32be(s) != 0x38425053) // "8BPS"
12102- return stbi__errpuc("not PSD", "Corrupt PSD image");
12103-
12104- // Check file type version.
12105- if (stbi__get16be(s) != 1)
12106- return stbi__errpuc("wrong version", "Unsupported version of PSD image");
12107-
12108- // Skip 6 reserved bytes.
12109- stbi__skip(s, 6 );
12110-
12111- // Read the number of channels (R, G, B, A, etc).
12112- channelCount = stbi__get16be(s);
12113- if (channelCount < 0 || channelCount > 16)
12114- return stbi__errpuc("wrong channel count", "Unsupported number of channels in PSD image");
12115-
12116- // Read the rows and columns of the image.
12117- h = stbi__get32be(s);
12118- w = stbi__get32be(s);
12119-
12120- if (h > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
12121- if (w > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
12122-
12123- // Make sure the depth is 8 bits.
12124- bitdepth = stbi__get16be(s);
12125- if (bitdepth != 8 && bitdepth != 16)
12126- return stbi__errpuc("unsupported bit depth", "PSD bit depth is not 8 or 16 bit");
12127-
12128- // Make sure the color mode is RGB.
12129- // Valid options are:
12130- // 0: Bitmap
12131- // 1: Grayscale
12132- // 2: Indexed color
12133- // 3: RGB color
12134- // 4: CMYK color
12135- // 7: Multichannel
12136- // 8: Duotone
12137- // 9: Lab color
12138- if (stbi__get16be(s) != 3)
12139- return stbi__errpuc("wrong color format", "PSD is not in RGB color format");
12140-
12141- // Skip the Mode Data. (It's the palette for indexed color; other info for other modes.)
12142- stbi__skip(s,stbi__get32be(s) );
12143-
12144- // Skip the image resources. (resolution, pen tool paths, etc)
12145- stbi__skip(s, stbi__get32be(s) );
12146-
12147- // Skip the reserved data.
12148- stbi__skip(s, stbi__get32be(s) );
12149-
12150- // Find out if the data is compressed.
12151- // Known values:
12152- // 0: no compression
12153- // 1: RLE compressed
12154- compression = stbi__get16be(s);
12155- if (compression > 1)
12156- return stbi__errpuc("bad compression", "PSD has an unknown compression format");
12157-
12158- // Check size
12159- if (!stbi__mad3sizes_valid(4, w, h, 0))
12160- return stbi__errpuc("too large", "Corrupt PSD");
12161-
12162- // Create the destination image.
12163-
12164- if (!compression && bitdepth == 16 && bpc == 16) {
12165- out = (stbi_uc *) stbi__malloc_mad3(8, w, h, 0);
12166- ri->bits_per_channel = 16;
12167- } else
12168- out = (stbi_uc *) stbi__malloc(4 * w*h);
12169-
12170- if (!out) return stbi__errpuc("outofmem", "Out of memory");
12171- pixelCount = w*h;
12172-
12173- // Initialize the data to zero.
12174- //memset( out, 0, pixelCount * 4 );
12175-
12176- // Finally, the image data.
12177- if (compression) {
12178- // RLE as used by .PSD and .TIFF
12179- // Loop until you get the number of unpacked bytes you are expecting:
12180- // Read the next source byte into n.
12181- // If n is between 0 and 127 inclusive, copy the next n+1 bytes literally.
12182- // Else if n is between -127 and -1 inclusive, copy the next byte -n+1 times.
12183- // Else if n is 128, noop.
12184- // Endloop
12185-
12186- // The RLE-compressed data is preceded by a 2-byte data count for each row in the data,
12187- // which we're going to just skip.
12188- stbi__skip(s, h * channelCount * 2 );
12189-
12190- // Read the RLE data by channel.
12191- for (channel = 0; channel < 4; channel++) {
12192- stbi_uc *p;
12193-
12194- p = out+channel;
12195- if (channel >= channelCount) {
12196- // Fill this channel with default data.
12197- for (i = 0; i < pixelCount; i++, p += 4)
12198- *p = (channel == 3 ? 255 : 0);
12199- } else {
12200- // Read the RLE data.
12201- if (!stbi__psd_decode_rle(s, p, pixelCount)) {
12202- STBI_FREE(out);
12203- return stbi__errpuc("corrupt", "bad RLE data");
12204- }
12205- }
12206- }
12207-
12208- } else {
12209- // We're at the raw image data. It's each channel in order (Red, Green, Blue, Alpha, ...)
12210- // where each channel consists of an 8-bit (or 16-bit) value for each pixel in the image.
12211-
12212- // Read the data by channel.
12213- for (channel = 0; channel < 4; channel++) {
12214- if (channel >= channelCount) {
12215- // Fill this channel with default data.
12216- if (bitdepth == 16 && bpc == 16) {
12217- stbi__uint16 *q = ((stbi__uint16 *) out) + channel;
12218- stbi__uint16 val = channel == 3 ? 65535 : 0;
12219- for (i = 0; i < pixelCount; i++, q += 4)
12220- *q = val;
12221- } else {
12222- stbi_uc *p = out+channel;
12223- stbi_uc val = channel == 3 ? 255 : 0;
12224- for (i = 0; i < pixelCount; i++, p += 4)
12225- *p = val;
12226- }
12227- } else {
12228- if (ri->bits_per_channel == 16) { // output bpc
12229- stbi__uint16 *q = ((stbi__uint16 *) out) + channel;
12230- for (i = 0; i < pixelCount; i++, q += 4)
12231- *q = (stbi__uint16) stbi__get16be(s);
12232- } else {
12233- stbi_uc *p = out+channel;
12234- if (bitdepth == 16) { // input bpc
12235- for (i = 0; i < pixelCount; i++, p += 4)
12236- *p = (stbi_uc) (stbi__get16be(s) >> 8);
12237- } else {
12238- for (i = 0; i < pixelCount; i++, p += 4)
12239- *p = stbi__get8(s);
12240- }
12241- }
12242- }
12243- }
12244- }
12245-
12246- // remove weird white matte from PSD
12247- if (channelCount >= 4) {
12248- if (ri->bits_per_channel == 16) {
12249- for (i=0; i < w*h; ++i) {
12250- stbi__uint16 *pixel = (stbi__uint16 *) out + 4*i;
12251- if (pixel[3] != 0 && pixel[3] != 65535) {
12252- float a = pixel[3] / 65535.0f;
12253- float ra = 1.0f / a;
12254- float inv_a = 65535.0f * (1 - ra);
12255- pixel[0] = (stbi__uint16) (pixel[0]*ra + inv_a);
12256- pixel[1] = (stbi__uint16) (pixel[1]*ra + inv_a);
12257- pixel[2] = (stbi__uint16) (pixel[2]*ra + inv_a);
12258- }
12259- }
12260- } else {
12261- for (i=0; i < w*h; ++i) {
12262- unsigned char *pixel = out + 4*i;
12263- if (pixel[3] != 0 && pixel[3] != 255) {
12264- float a = pixel[3] / 255.0f;
12265- float ra = 1.0f / a;
12266- float inv_a = 255.0f * (1 - ra);
12267- pixel[0] = (unsigned char) (pixel[0]*ra + inv_a);
12268- pixel[1] = (unsigned char) (pixel[1]*ra + inv_a);
12269- pixel[2] = (unsigned char) (pixel[2]*ra + inv_a);
12270- }
12271- }
12272- }
12273- }
12274-
12275- // convert to desired output format
12276- if (req_comp && req_comp != 4) {
12277- if (ri->bits_per_channel == 16)
12278- out = (stbi_uc *) stbi__convert_format16((stbi__uint16 *) out, 4, req_comp, w, h);
12279- else
12280- out = stbi__convert_format(out, 4, req_comp, w, h);
12281- if (out == NULL) return out; // stbi__convert_format frees input on failure
12282- }
12283-
12284- if (comp) *comp = 4;
12285- *y = h;
12286- *x = w;
12287-
12288- return out;
12289+static int
12290+stbi__psd_test(stbi__context *s)
12291+{
12292+ int r = (stbi__get32be(s) == 0x38425053);
12293+ stbi__rewind(s);
12294+ return r;
12295+}
12296+
12297+static int
12298+stbi__psd_decode_rle(stbi__context *s, stbi_uc *p, int pixelCount)
12299+{
12300+ int count, nleft, len;
12301+
12302+ count = 0;
12303+ while ((nleft = pixelCount - count) > 0) {
12304+ len = stbi__get8(s);
12305+ if (len == 128) {
12306+ // No-op.
12307+ } else if (len < 128) {
12308+ // Copy next len+1 bytes literally.
12309+ len++;
12310+ if (len > nleft) {
12311+ return 0; // corrupt data
12312+ }
12313+ count += len;
12314+ while (len) {
12315+ *p = stbi__get8(s);
12316+ p += 4;
12317+ len--;
12318+ }
12319+ } else if (len > 128) {
12320+ stbi_uc val;
12321+ // Next -len+1 bytes in the dest are replicated from next source
12322+ // byte. (Interpret len as a negative 8-bit int.)
12323+ len = 257 - len;
12324+ if (len > nleft) {
12325+ return 0; // corrupt data
12326+ }
12327+ val = stbi__get8(s);
12328+ count += len;
12329+ while (len) {
12330+ *p = val;
12331+ p += 4;
12332+ len--;
12333+ }
12334+ }
12335+ }
12336+
12337+ return 1;
12338+}
12339+
12340+static void *
12341+stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp,
12342+ stbi__result_info *ri, int bpc)
12343+{
12344+ int pixelCount;
12345+ int channelCount, compression;
12346+ int channel, i;
12347+ int bitdepth;
12348+ int w, h;
12349+ stbi_uc *out;
12350+ STBI_NOTUSED(ri);
12351+
12352+ // Check identifier
12353+ if (stbi__get32be(s) != 0x38425053) { // "8BPS"
12354+ return stbi__errpuc("not PSD", "Corrupt PSD image");
12355+ }
12356+
12357+ // Check file type version.
12358+ if (stbi__get16be(s) != 1) {
12359+ return stbi__errpuc("wrong version",
12360+ "Unsupported version of PSD image");
12361+ }
12362+
12363+ // Skip 6 reserved bytes.
12364+ stbi__skip(s, 6);
12365+
12366+ // Read the number of channels (R, G, B, A, etc).
12367+ channelCount = stbi__get16be(s);
12368+ if (channelCount < 0 || channelCount > 16) {
12369+ return stbi__errpuc("wrong channel count",
12370+ "Unsupported number of channels in PSD image");
12371+ }
12372+
12373+ // Read the rows and columns of the image.
12374+ h = stbi__get32be(s);
12375+ w = stbi__get32be(s);
12376+
12377+ if (h > STBI_MAX_DIMENSIONS) {
12378+ return stbi__errpuc("too large", "Very large image (corrupt?)");
12379+ }
12380+ if (w > STBI_MAX_DIMENSIONS) {
12381+ return stbi__errpuc("too large", "Very large image (corrupt?)");
12382+ }
12383+
12384+ // Make sure the depth is 8 bits.
12385+ bitdepth = stbi__get16be(s);
12386+ if (bitdepth != 8 && bitdepth != 16) {
12387+ return stbi__errpuc("unsupported bit depth",
12388+ "PSD bit depth is not 8 or 16 bit");
12389+ }
12390+
12391+ // Make sure the color mode is RGB.
12392+ // Valid options are:
12393+ // 0: Bitmap
12394+ // 1: Grayscale
12395+ // 2: Indexed color
12396+ // 3: RGB color
12397+ // 4: CMYK color
12398+ // 7: Multichannel
12399+ // 8: Duotone
12400+ // 9: Lab color
12401+ if (stbi__get16be(s) != 3) {
12402+ return stbi__errpuc("wrong color format",
12403+ "PSD is not in RGB color format");
12404+ }
12405+
12406+ // Skip the Mode Data. (It's the palette for indexed color; other info for
12407+ // other modes.)
12408+ stbi__skip(s, stbi__get32be(s));
12409+
12410+ // Skip the image resources. (resolution, pen tool paths, etc)
12411+ stbi__skip(s, stbi__get32be(s));
12412+
12413+ // Skip the reserved data.
12414+ stbi__skip(s, stbi__get32be(s));
12415+
12416+ // Find out if the data is compressed.
12417+ // Known values:
12418+ // 0: no compression
12419+ // 1: RLE compressed
12420+ compression = stbi__get16be(s);
12421+ if (compression > 1) {
12422+ return stbi__errpuc("bad compression",
12423+ "PSD has an unknown compression format");
12424+ }
12425+
12426+ // Check size
12427+ if (!stbi__mad3sizes_valid(4, w, h, 0)) {
12428+ return stbi__errpuc("too large", "Corrupt PSD");
12429+ }
12430+
12431+ // Create the destination image.
12432+
12433+ if (!compression && bitdepth == 16 && bpc == 16) {
12434+ out = (stbi_uc *)stbi__malloc_mad3(8, w, h, 0);
12435+ ri->bits_per_channel = 16;
12436+ } else {
12437+ out = (stbi_uc *)stbi__malloc(4 * w * h);
12438+ }
12439+
12440+ if (!out) {
12441+ return stbi__errpuc("outofmem", "Out of memory");
12442+ }
12443+ pixelCount = w * h;
12444+
12445+ // Initialize the data to zero.
12446+ // memset( out, 0, pixelCount * 4 );
12447+
12448+ // Finally, the image data.
12449+ if (compression) {
12450+ // RLE as used by .PSD and .TIFF
12451+ // Loop until you get the number of unpacked bytes you are expecting:
12452+ // Read the next source byte into n.
12453+ // If n is between 0 and 127 inclusive, copy the next n+1 bytes
12454+ // literally. Else if n is between -127 and -1 inclusive, copy the
12455+ // next byte -n+1 times. Else if n is 128, noop.
12456+ // Endloop
12457+
12458+ // The RLE-compressed data is preceded by a 2-byte data count for each
12459+ // row in the data, which we're going to just skip.
12460+ stbi__skip(s, h * channelCount * 2);
12461+
12462+ // Read the RLE data by channel.
12463+ for (channel = 0; channel < 4; channel++) {
12464+ stbi_uc *p;
12465+
12466+ p = out + channel;
12467+ if (channel >= channelCount) {
12468+ // Fill this channel with default data.
12469+ for (i = 0; i < pixelCount; i++, p += 4) {
12470+ *p = (channel == 3 ? 255 : 0);
12471+ }
12472+ } else {
12473+ // Read the RLE data.
12474+ if (!stbi__psd_decode_rle(s, p, pixelCount)) {
12475+ STBI_FREE(out);
12476+ return stbi__errpuc("corrupt", "bad RLE data");
12477+ }
12478+ }
12479+ }
12480+
12481+ } else {
12482+ // We're at the raw image data. It's each channel in order (Red, Green,
12483+ // Blue, Alpha, ...) where each channel consists of an 8-bit (or 16-bit)
12484+ // value for each pixel in the image.
12485+
12486+ // Read the data by channel.
12487+ for (channel = 0; channel < 4; channel++) {
12488+ if (channel >= channelCount) {
12489+ // Fill this channel with default data.
12490+ if (bitdepth == 16 && bpc == 16) {
12491+ stbi__uint16 *q = ((stbi__uint16 *)out) + channel;
12492+ stbi__uint16 val = channel == 3 ? 65535 : 0;
12493+ for (i = 0; i < pixelCount; i++, q += 4) {
12494+ *q = val;
12495+ }
12496+ } else {
12497+ stbi_uc *p = out + channel;
12498+ stbi_uc val = channel == 3 ? 255 : 0;
12499+ for (i = 0; i < pixelCount; i++, p += 4) {
12500+ *p = val;
12501+ }
12502+ }
12503+ } else {
12504+ if (ri->bits_per_channel == 16) { // output bpc
12505+ stbi__uint16 *q = ((stbi__uint16 *)out) + channel;
12506+ for (i = 0; i < pixelCount; i++, q += 4) {
12507+ *q = (stbi__uint16)stbi__get16be(s);
12508+ }
12509+ } else {
12510+ stbi_uc *p = out + channel;
12511+ if (bitdepth == 16) { // input bpc
12512+ for (i = 0; i < pixelCount; i++, p += 4) {
12513+ *p = (stbi_uc)(stbi__get16be(s) >> 8);
12514+ }
12515+ } else {
12516+ for (i = 0; i < pixelCount; i++, p += 4) {
12517+ *p = stbi__get8(s);
12518+ }
12519+ }
12520+ }
12521+ }
12522+ }
12523+ }
12524+
12525+ // remove weird white matte from PSD
12526+ if (channelCount >= 4) {
12527+ if (ri->bits_per_channel == 16) {
12528+ for (i = 0; i < w * h; ++i) {
12529+ stbi__uint16 *pixel = (stbi__uint16 *)out + 4 * i;
12530+ if (pixel[3] != 0 && pixel[3] != 65535) {
12531+ float a = pixel[3] / 65535.0f;
12532+ float ra = 1.0f / a;
12533+ float inv_a = 65535.0f * (1 - ra);
12534+ pixel[0] = (stbi__uint16)(pixel[0] * ra + inv_a);
12535+ pixel[1] = (stbi__uint16)(pixel[1] * ra + inv_a);
12536+ pixel[2] = (stbi__uint16)(pixel[2] * ra + inv_a);
12537+ }
12538+ }
12539+ } else {
12540+ for (i = 0; i < w * h; ++i) {
12541+ unsigned char *pixel = out + 4 * i;
12542+ if (pixel[3] != 0 && pixel[3] != 255) {
12543+ float a = pixel[3] / 255.0f;
12544+ float ra = 1.0f / a;
12545+ float inv_a = 255.0f * (1 - ra);
12546+ pixel[0] = (unsigned char)(pixel[0] * ra + inv_a);
12547+ pixel[1] = (unsigned char)(pixel[1] * ra + inv_a);
12548+ pixel[2] = (unsigned char)(pixel[2] * ra + inv_a);
12549+ }
12550+ }
12551+ }
12552+ }
12553+
12554+ // convert to desired output format
12555+ if (req_comp && req_comp != 4) {
12556+ if (ri->bits_per_channel == 16) {
12557+ out = (stbi_uc *)stbi__convert_format16((stbi__uint16 *)out, 4,
12558+ req_comp, w, h);
12559+ } else {
12560+ out = stbi__convert_format(out, 4, req_comp, w, h);
12561+ }
12562+ if (out == NULL) {
12563+ return out; // stbi__convert_format frees input on failure
12564+ }
12565+ }
12566+
12567+ if (comp) {
12568+ *comp = 4;
12569+ }
12570+ *y = h;
12571+ *x = w;
12572+
12573+ return out;
12574 }
12575 #endif
12576
12577@@ -6333,216 +7916,273 @@ static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req
12578 // See http://ozviz.wasp.uwa.edu.au/~pbourke/dataformats/softimagepic/
12579
12580 #ifndef STBI_NO_PIC
12581-static int stbi__pic_is4(stbi__context *s,const char *str)
12582+static int
12583+stbi__pic_is4(stbi__context *s, const char *str)
12584 {
12585- int i;
12586- for (i=0; i<4; ++i)
12587- if (stbi__get8(s) != (stbi_uc)str[i])
12588- return 0;
12589+ int i;
12590+ for (i = 0; i < 4; ++i) {
12591+ if (stbi__get8(s) != (stbi_uc)str[i]) {
12592+ return 0;
12593+ }
12594+ }
12595
12596- return 1;
12597+ return 1;
12598 }
12599
12600-static int stbi__pic_test_core(stbi__context *s)
12601+static int
12602+stbi__pic_test_core(stbi__context *s)
12603 {
12604- int i;
12605+ int i;
12606
12607- if (!stbi__pic_is4(s,"\x53\x80\xF6\x34"))
12608- return 0;
12609+ if (!stbi__pic_is4(s, "\x53\x80\xF6\x34")) {
12610+ return 0;
12611+ }
12612
12613- for(i=0;i<84;++i)
12614- stbi__get8(s);
12615+ for (i = 0; i < 84; ++i) {
12616+ stbi__get8(s);
12617+ }
12618
12619- if (!stbi__pic_is4(s,"PICT"))
12620- return 0;
12621+ if (!stbi__pic_is4(s, "PICT")) {
12622+ return 0;
12623+ }
12624
12625- return 1;
12626+ return 1;
12627 }
12628
12629-typedef struct
12630-{
12631- stbi_uc size,type,channel;
12632+typedef struct {
12633+ stbi_uc size, type, channel;
12634 } stbi__pic_packet;
12635
12636-static stbi_uc *stbi__readval(stbi__context *s, int channel, stbi_uc *dest)
12637+static stbi_uc *
12638+stbi__readval(stbi__context *s, int channel, stbi_uc *dest)
12639 {
12640- int mask=0x80, i;
12641+ int mask = 0x80, i;
12642
12643- for (i=0; i<4; ++i, mask>>=1) {
12644- if (channel & mask) {
12645- if (stbi__at_eof(s)) return stbi__errpuc("bad file","PIC file too short");
12646- dest[i]=stbi__get8(s);
12647- }
12648- }
12649+ for (i = 0; i < 4; ++i, mask >>= 1) {
12650+ if (channel & mask) {
12651+ if (stbi__at_eof(s)) {
12652+ return stbi__errpuc("bad file", "PIC file too short");
12653+ }
12654+ dest[i] = stbi__get8(s);
12655+ }
12656+ }
12657
12658- return dest;
12659+ return dest;
12660 }
12661
12662-static void stbi__copyval(int channel,stbi_uc *dest,const stbi_uc *src)
12663+static void
12664+stbi__copyval(int channel, stbi_uc *dest, const stbi_uc *src)
12665 {
12666- int mask=0x80,i;
12667+ int mask = 0x80, i;
12668
12669- for (i=0;i<4; ++i, mask>>=1)
12670- if (channel&mask)
12671- dest[i]=src[i];
12672+ for (i = 0; i < 4; ++i, mask >>= 1) {
12673+ if (channel & mask) {
12674+ dest[i] = src[i];
12675+ }
12676+ }
12677 }
12678
12679-static stbi_uc *stbi__pic_load_core(stbi__context *s,int width,int height,int *comp, stbi_uc *result)
12680-{
12681- int act_comp=0,num_packets=0,y,chained;
12682- stbi__pic_packet packets[10];
12683-
12684- // this will (should...) cater for even some bizarre stuff like having data
12685- // for the same channel in multiple packets.
12686- do {
12687- stbi__pic_packet *packet;
12688-
12689- if (num_packets==sizeof(packets)/sizeof(packets[0]))
12690- return stbi__errpuc("bad format","too many packets");
12691-
12692- packet = &packets[num_packets++];
12693-
12694- chained = stbi__get8(s);
12695- packet->size = stbi__get8(s);
12696- packet->type = stbi__get8(s);
12697- packet->channel = stbi__get8(s);
12698-
12699- act_comp |= packet->channel;
12700-
12701- if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (reading packets)");
12702- if (packet->size != 8) return stbi__errpuc("bad format","packet isn't 8bpp");
12703- } while (chained);
12704-
12705- *comp = (act_comp & 0x10 ? 4 : 3); // has alpha channel?
12706-
12707- for(y=0; y<height; ++y) {
12708- int packet_idx;
12709-
12710- for(packet_idx=0; packet_idx < num_packets; ++packet_idx) {
12711- stbi__pic_packet *packet = &packets[packet_idx];
12712- stbi_uc *dest = result+y*width*4;
12713-
12714- switch (packet->type) {
12715- default:
12716- return stbi__errpuc("bad format","packet has bad compression type");
12717-
12718- case 0: {//uncompressed
12719- int x;
12720-
12721- for(x=0;x<width;++x, dest+=4)
12722- if (!stbi__readval(s,packet->channel,dest))
12723- return 0;
12724- break;
12725- }
12726-
12727- case 1://Pure RLE
12728- {
12729- int left=width, i;
12730-
12731- while (left>0) {
12732- stbi_uc count,value[4];
12733-
12734- count=stbi__get8(s);
12735- if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (pure read count)");
12736-
12737- if (count > left)
12738- count = (stbi_uc) left;
12739-
12740- if (!stbi__readval(s,packet->channel,value)) return 0;
12741-
12742- for(i=0; i<count; ++i,dest+=4)
12743- stbi__copyval(packet->channel,dest,value);
12744- left -= count;
12745- }
12746- }
12747- break;
12748-
12749- case 2: {//Mixed RLE
12750- int left=width;
12751- while (left>0) {
12752- int count = stbi__get8(s), i;
12753- if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (mixed read count)");
12754-
12755- if (count >= 128) { // Repeated
12756- stbi_uc value[4];
12757-
12758- if (count==128)
12759- count = stbi__get16be(s);
12760- else
12761- count -= 127;
12762- if (count > left)
12763- return stbi__errpuc("bad file","scanline overrun");
12764-
12765- if (!stbi__readval(s,packet->channel,value))
12766- return 0;
12767-
12768- for(i=0;i<count;++i, dest += 4)
12769- stbi__copyval(packet->channel,dest,value);
12770- } else { // Raw
12771- ++count;
12772- if (count>left) return stbi__errpuc("bad file","scanline overrun");
12773-
12774- for(i=0;i<count;++i, dest+=4)
12775- if (!stbi__readval(s,packet->channel,dest))
12776- return 0;
12777- }
12778- left-=count;
12779- }
12780- break;
12781- }
12782- }
12783- }
12784- }
12785-
12786- return result;
12787-}
12788-
12789-static void *stbi__pic_load(stbi__context *s,int *px,int *py,int *comp,int req_comp, stbi__result_info *ri)
12790-{
12791- stbi_uc *result;
12792- int i, x,y, internal_comp;
12793- STBI_NOTUSED(ri);
12794-
12795- if (!comp) comp = &internal_comp;
12796-
12797- for (i=0; i<92; ++i)
12798- stbi__get8(s);
12799-
12800- x = stbi__get16be(s);
12801- y = stbi__get16be(s);
12802-
12803- if (y > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
12804- if (x > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
12805-
12806- if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (pic header)");
12807- if (!stbi__mad3sizes_valid(x, y, 4, 0)) return stbi__errpuc("too large", "PIC image too large to decode");
12808-
12809- stbi__get32be(s); //skip `ratio'
12810- stbi__get16be(s); //skip `fields'
12811- stbi__get16be(s); //skip `pad'
12812-
12813- // intermediate buffer is RGBA
12814- result = (stbi_uc *) stbi__malloc_mad3(x, y, 4, 0);
12815- if (!result) return stbi__errpuc("outofmem", "Out of memory");
12816- memset(result, 0xff, x*y*4);
12817-
12818- if (!stbi__pic_load_core(s,x,y,comp, result)) {
12819- STBI_FREE(result);
12820- result=0;
12821- }
12822- *px = x;
12823- *py = y;
12824- if (req_comp == 0) req_comp = *comp;
12825- result=stbi__convert_format(result,4,req_comp,x,y);
12826-
12827- return result;
12828-}
12829-
12830-static int stbi__pic_test(stbi__context *s)
12831-{
12832- int r = stbi__pic_test_core(s);
12833- stbi__rewind(s);
12834- return r;
12835+static stbi_uc *
12836+stbi__pic_load_core(stbi__context *s, int width, int height, int *comp,
12837+ stbi_uc *result)
12838+{
12839+ int act_comp = 0, num_packets = 0, y, chained;
12840+ stbi__pic_packet packets[10];
12841+
12842+ // this will (should...) cater for even some bizarre stuff like having data
12843+ // for the same channel in multiple packets.
12844+ do {
12845+ stbi__pic_packet *packet;
12846+
12847+ if (num_packets == sizeof(packets) / sizeof(packets[0])) {
12848+ return stbi__errpuc("bad format", "too many packets");
12849+ }
12850+
12851+ packet = &packets[num_packets++];
12852+
12853+ chained = stbi__get8(s);
12854+ packet->size = stbi__get8(s);
12855+ packet->type = stbi__get8(s);
12856+ packet->channel = stbi__get8(s);
12857+
12858+ act_comp |= packet->channel;
12859+
12860+ if (stbi__at_eof(s)) {
12861+ return stbi__errpuc("bad file", "file too short (reading packets)");
12862+ }
12863+ if (packet->size != 8) {
12864+ return stbi__errpuc("bad format", "packet isn't 8bpp");
12865+ }
12866+ } while (chained);
12867+
12868+ *comp = (act_comp & 0x10 ? 4 : 3); // has alpha channel?
12869+
12870+ for (y = 0; y < height; ++y) {
12871+ int packet_idx;
12872+
12873+ for (packet_idx = 0; packet_idx < num_packets; ++packet_idx) {
12874+ stbi__pic_packet *packet = &packets[packet_idx];
12875+ stbi_uc *dest = result + y * width * 4;
12876+
12877+ switch (packet->type) {
12878+ default:
12879+ return stbi__errpuc("bad format",
12880+ "packet has bad compression type");
12881+
12882+ case 0: { // uncompressed
12883+ int x;
12884+
12885+ for (x = 0; x < width; ++x, dest += 4) {
12886+ if (!stbi__readval(s, packet->channel, dest)) {
12887+ return 0;
12888+ }
12889+ }
12890+ break;
12891+ }
12892+
12893+ case 1: // Pure RLE
12894+ {
12895+ int left = width, i;
12896+
12897+ while (left > 0) {
12898+ stbi_uc count, value[4];
12899+
12900+ count = stbi__get8(s);
12901+ if (stbi__at_eof(s)) {
12902+ return stbi__errpuc("bad file",
12903+ "file too short (pure read count)");
12904+ }
12905+
12906+ if (count > left) {
12907+ count = (stbi_uc)left;
12908+ }
12909+
12910+ if (!stbi__readval(s, packet->channel, value)) {
12911+ return 0;
12912+ }
12913+
12914+ for (i = 0; i < count; ++i, dest += 4) {
12915+ stbi__copyval(packet->channel, dest, value);
12916+ }
12917+ left -= count;
12918+ }
12919+ } break;
12920+
12921+ case 2: { // Mixed RLE
12922+ int left = width;
12923+ while (left > 0) {
12924+ int count = stbi__get8(s), i;
12925+ if (stbi__at_eof(s)) {
12926+ return stbi__errpuc(
12927+ "bad file", "file too short (mixed read count)");
12928+ }
12929+
12930+ if (count >= 128) { // Repeated
12931+ stbi_uc value[4];
12932+
12933+ if (count == 128) {
12934+ count = stbi__get16be(s);
12935+ } else {
12936+ count -= 127;
12937+ }
12938+ if (count > left) {
12939+ return stbi__errpuc("bad file", "scanline overrun");
12940+ }
12941+
12942+ if (!stbi__readval(s, packet->channel, value)) {
12943+ return 0;
12944+ }
12945+
12946+ for (i = 0; i < count; ++i, dest += 4) {
12947+ stbi__copyval(packet->channel, dest, value);
12948+ }
12949+ } else { // Raw
12950+ ++count;
12951+ if (count > left) {
12952+ return stbi__errpuc("bad file", "scanline overrun");
12953+ }
12954+
12955+ for (i = 0; i < count; ++i, dest += 4) {
12956+ if (!stbi__readval(s, packet->channel, dest)) {
12957+ return 0;
12958+ }
12959+ }
12960+ }
12961+ left -= count;
12962+ }
12963+ break;
12964+ }
12965+ }
12966+ }
12967+ }
12968+
12969+ return result;
12970+}
12971+
12972+static void *
12973+stbi__pic_load(stbi__context *s, int *px, int *py, int *comp, int req_comp,
12974+ stbi__result_info *ri)
12975+{
12976+ stbi_uc *result;
12977+ int i, x, y, internal_comp;
12978+ STBI_NOTUSED(ri);
12979+
12980+ if (!comp) {
12981+ comp = &internal_comp;
12982+ }
12983+
12984+ for (i = 0; i < 92; ++i) {
12985+ stbi__get8(s);
12986+ }
12987+
12988+ x = stbi__get16be(s);
12989+ y = stbi__get16be(s);
12990+
12991+ if (y > STBI_MAX_DIMENSIONS) {
12992+ return stbi__errpuc("too large", "Very large image (corrupt?)");
12993+ }
12994+ if (x > STBI_MAX_DIMENSIONS) {
12995+ return stbi__errpuc("too large", "Very large image (corrupt?)");
12996+ }
12997+
12998+ if (stbi__at_eof(s)) {
12999+ return stbi__errpuc("bad file", "file too short (pic header)");
13000+ }
13001+ if (!stbi__mad3sizes_valid(x, y, 4, 0)) {
13002+ return stbi__errpuc("too large", "PIC image too large to decode");
13003+ }
13004+
13005+ stbi__get32be(s); // skip `ratio'
13006+ stbi__get16be(s); // skip `fields'
13007+ stbi__get16be(s); // skip `pad'
13008+
13009+ // intermediate buffer is RGBA
13010+ result = (stbi_uc *)stbi__malloc_mad3(x, y, 4, 0);
13011+ if (!result) {
13012+ return stbi__errpuc("outofmem", "Out of memory");
13013+ }
13014+ memset(result, 0xff, x * y * 4);
13015+
13016+ if (!stbi__pic_load_core(s, x, y, comp, result)) {
13017+ STBI_FREE(result);
13018+ result = 0;
13019+ }
13020+ *px = x;
13021+ *py = y;
13022+ if (req_comp == 0) {
13023+ req_comp = *comp;
13024+ }
13025+ result = stbi__convert_format(result, 4, req_comp, x, y);
13026+
13027+ return result;
13028+}
13029+
13030+static int
13031+stbi__pic_test(stbi__context *s)
13032+{
13033+ int r = stbi__pic_test_core(s);
13034+ stbi__rewind(s);
13035+ return r;
13036 }
13037 #endif
13038
13039@@ -6550,533 +8190,630 @@ static int stbi__pic_test(stbi__context *s)
13040 // GIF loader -- public domain by Jean-Marc Lienher -- simplified/shrunk by stb
13041
13042 #ifndef STBI_NO_GIF
13043-typedef struct
13044-{
13045- stbi__int16 prefix;
13046- stbi_uc first;
13047- stbi_uc suffix;
13048+typedef struct {
13049+ stbi__int16 prefix;
13050+ stbi_uc first;
13051+ stbi_uc suffix;
13052 } stbi__gif_lzw;
13053
13054-typedef struct
13055-{
13056- int w,h;
13057- stbi_uc *out; // output buffer (always 4 components)
13058- stbi_uc *background; // The current "background" as far as a gif is concerned
13059- stbi_uc *history;
13060- int flags, bgindex, ratio, transparent, eflags;
13061- stbi_uc pal[256][4];
13062- stbi_uc lpal[256][4];
13063- stbi__gif_lzw codes[8192];
13064- stbi_uc *color_table;
13065- int parse, step;
13066- int lflags;
13067- int start_x, start_y;
13068- int max_x, max_y;
13069- int cur_x, cur_y;
13070- int line_size;
13071- int delay;
13072+typedef struct {
13073+ int w, h;
13074+ stbi_uc *out; // output buffer (always 4 components)
13075+ stbi_uc
13076+ *background; // The current "background" as far as a gif is concerned
13077+ stbi_uc *history;
13078+ int flags, bgindex, ratio, transparent, eflags;
13079+ stbi_uc pal[256][4];
13080+ stbi_uc lpal[256][4];
13081+ stbi__gif_lzw codes[8192];
13082+ stbi_uc *color_table;
13083+ int parse, step;
13084+ int lflags;
13085+ int start_x, start_y;
13086+ int max_x, max_y;
13087+ int cur_x, cur_y;
13088+ int line_size;
13089+ int delay;
13090 } stbi__gif;
13091
13092-static int stbi__gif_test_raw(stbi__context *s)
13093-{
13094- int sz;
13095- if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8') return 0;
13096- sz = stbi__get8(s);
13097- if (sz != '9' && sz != '7') return 0;
13098- if (stbi__get8(s) != 'a') return 0;
13099- return 1;
13100-}
13101-
13102-static int stbi__gif_test(stbi__context *s)
13103-{
13104- int r = stbi__gif_test_raw(s);
13105- stbi__rewind(s);
13106- return r;
13107-}
13108-
13109-static void stbi__gif_parse_colortable(stbi__context *s, stbi_uc pal[256][4], int num_entries, int transp)
13110-{
13111- int i;
13112- for (i=0; i < num_entries; ++i) {
13113- pal[i][2] = stbi__get8(s);
13114- pal[i][1] = stbi__get8(s);
13115- pal[i][0] = stbi__get8(s);
13116- pal[i][3] = transp == i ? 0 : 255;
13117- }
13118-}
13119-
13120-static int stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp, int is_info)
13121-{
13122- stbi_uc version;
13123- if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8')
13124- return stbi__err("not GIF", "Corrupt GIF");
13125-
13126- version = stbi__get8(s);
13127- if (version != '7' && version != '9') return stbi__err("not GIF", "Corrupt GIF");
13128- if (stbi__get8(s) != 'a') return stbi__err("not GIF", "Corrupt GIF");
13129-
13130- stbi__g_failure_reason = "";
13131- g->w = stbi__get16le(s);
13132- g->h = stbi__get16le(s);
13133- g->flags = stbi__get8(s);
13134- g->bgindex = stbi__get8(s);
13135- g->ratio = stbi__get8(s);
13136- g->transparent = -1;
13137-
13138- if (g->w > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)");
13139- if (g->h > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)");
13140-
13141- if (comp != 0) *comp = 4; // can't actually tell whether it's 3 or 4 until we parse the comments
13142-
13143- if (is_info) return 1;
13144-
13145- if (g->flags & 0x80)
13146- stbi__gif_parse_colortable(s,g->pal, 2 << (g->flags & 7), -1);
13147-
13148- return 1;
13149-}
13150-
13151-static int stbi__gif_info_raw(stbi__context *s, int *x, int *y, int *comp)
13152-{
13153- stbi__gif* g = (stbi__gif*) stbi__malloc(sizeof(stbi__gif));
13154- if (!g) return stbi__err("outofmem", "Out of memory");
13155- if (!stbi__gif_header(s, g, comp, 1)) {
13156- STBI_FREE(g);
13157- stbi__rewind( s );
13158- return 0;
13159- }
13160- if (x) *x = g->w;
13161- if (y) *y = g->h;
13162- STBI_FREE(g);
13163- return 1;
13164-}
13165-
13166-static void stbi__out_gif_code(stbi__gif *g, stbi__uint16 code)
13167-{
13168- stbi_uc *p, *c;
13169- int idx;
13170-
13171- // recurse to decode the prefixes, since the linked-list is backwards,
13172- // and working backwards through an interleaved image would be nasty
13173- if (g->codes[code].prefix >= 0)
13174- stbi__out_gif_code(g, g->codes[code].prefix);
13175-
13176- if (g->cur_y >= g->max_y) return;
13177-
13178- idx = g->cur_x + g->cur_y;
13179- p = &g->out[idx];
13180- g->history[idx / 4] = 1;
13181-
13182- c = &g->color_table[g->codes[code].suffix * 4];
13183- if (c[3] > 128) { // don't render transparent pixels;
13184- p[0] = c[2];
13185- p[1] = c[1];
13186- p[2] = c[0];
13187- p[3] = c[3];
13188- }
13189- g->cur_x += 4;
13190-
13191- if (g->cur_x >= g->max_x) {
13192- g->cur_x = g->start_x;
13193- g->cur_y += g->step;
13194-
13195- while (g->cur_y >= g->max_y && g->parse > 0) {
13196- g->step = (1 << g->parse) * g->line_size;
13197- g->cur_y = g->start_y + (g->step >> 1);
13198- --g->parse;
13199- }
13200- }
13201-}
13202-
13203-static stbi_uc *stbi__process_gif_raster(stbi__context *s, stbi__gif *g)
13204-{
13205- stbi_uc lzw_cs;
13206- stbi__int32 len, init_code;
13207- stbi__uint32 first;
13208- stbi__int32 codesize, codemask, avail, oldcode, bits, valid_bits, clear;
13209- stbi__gif_lzw *p;
13210-
13211- lzw_cs = stbi__get8(s);
13212- if (lzw_cs > 12) return NULL;
13213- clear = 1 << lzw_cs;
13214- first = 1;
13215- codesize = lzw_cs + 1;
13216- codemask = (1 << codesize) - 1;
13217- bits = 0;
13218- valid_bits = 0;
13219- for (init_code = 0; init_code < clear; init_code++) {
13220- g->codes[init_code].prefix = -1;
13221- g->codes[init_code].first = (stbi_uc) init_code;
13222- g->codes[init_code].suffix = (stbi_uc) init_code;
13223- }
13224-
13225- // support no starting clear code
13226- avail = clear+2;
13227- oldcode = -1;
13228-
13229- len = 0;
13230- for(;;) {
13231- if (valid_bits < codesize) {
13232- if (len == 0) {
13233- len = stbi__get8(s); // start new block
13234- if (len == 0)
13235- return g->out;
13236- }
13237- --len;
13238- bits |= (stbi__int32) stbi__get8(s) << valid_bits;
13239- valid_bits += 8;
13240- } else {
13241- stbi__int32 code = bits & codemask;
13242- bits >>= codesize;
13243- valid_bits -= codesize;
13244- // @OPTIMIZE: is there some way we can accelerate the non-clear path?
13245- if (code == clear) { // clear code
13246- codesize = lzw_cs + 1;
13247- codemask = (1 << codesize) - 1;
13248- avail = clear + 2;
13249- oldcode = -1;
13250- first = 0;
13251- } else if (code == clear + 1) { // end of stream code
13252- stbi__skip(s, len);
13253- while ((len = stbi__get8(s)) > 0)
13254- stbi__skip(s,len);
13255- return g->out;
13256- } else if (code <= avail) {
13257- if (first) {
13258- return stbi__errpuc("no clear code", "Corrupt GIF");
13259- }
13260-
13261- if (oldcode >= 0) {
13262- p = &g->codes[avail++];
13263- if (avail > 8192) {
13264- return stbi__errpuc("too many codes", "Corrupt GIF");
13265- }
13266-
13267- p->prefix = (stbi__int16) oldcode;
13268- p->first = g->codes[oldcode].first;
13269- p->suffix = (code == avail) ? p->first : g->codes[code].first;
13270- } else if (code == avail)
13271- return stbi__errpuc("illegal code in raster", "Corrupt GIF");
13272-
13273- stbi__out_gif_code(g, (stbi__uint16) code);
13274-
13275- if ((avail & codemask) == 0 && avail <= 0x0FFF) {
13276- codesize++;
13277- codemask = (1 << codesize) - 1;
13278- }
13279-
13280- oldcode = code;
13281- } else {
13282- return stbi__errpuc("illegal code in raster", "Corrupt GIF");
13283- }
13284- }
13285- }
13286-}
13287-
13288-// this function is designed to support animated gifs, although stb_image doesn't support it
13289-// two back is the image from two frames ago, used for a very specific disposal format
13290-static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, int req_comp, stbi_uc *two_back)
13291-{
13292- int dispose;
13293- int first_frame;
13294- int pi;
13295- int pcount;
13296- STBI_NOTUSED(req_comp);
13297-
13298- // on first frame, any non-written pixels get the background colour (non-transparent)
13299- first_frame = 0;
13300- if (g->out == 0) {
13301- if (!stbi__gif_header(s, g, comp,0)) return 0; // stbi__g_failure_reason set by stbi__gif_header
13302- if (!stbi__mad3sizes_valid(4, g->w, g->h, 0))
13303- return stbi__errpuc("too large", "GIF image is too large");
13304- pcount = g->w * g->h;
13305- g->out = (stbi_uc *) stbi__malloc(4 * pcount);
13306- g->background = (stbi_uc *) stbi__malloc(4 * pcount);
13307- g->history = (stbi_uc *) stbi__malloc(pcount);
13308- if (!g->out || !g->background || !g->history)
13309- return stbi__errpuc("outofmem", "Out of memory");
13310-
13311- // image is treated as "transparent" at the start - ie, nothing overwrites the current background;
13312- // background colour is only used for pixels that are not rendered first frame, after that "background"
13313- // color refers to the color that was there the previous frame.
13314- memset(g->out, 0x00, 4 * pcount);
13315- memset(g->background, 0x00, 4 * pcount); // state of the background (starts transparent)
13316- memset(g->history, 0x00, pcount); // pixels that were affected previous frame
13317- first_frame = 1;
13318- } else {
13319- // second frame - how do we dispose of the previous one?
13320- dispose = (g->eflags & 0x1C) >> 2;
13321- pcount = g->w * g->h;
13322-
13323- if ((dispose == 3) && (two_back == 0)) {
13324- dispose = 2; // if I don't have an image to revert back to, default to the old background
13325- }
13326-
13327- if (dispose == 3) { // use previous graphic
13328- for (pi = 0; pi < pcount; ++pi) {
13329- if (g->history[pi]) {
13330- memcpy( &g->out[pi * 4], &two_back[pi * 4], 4 );
13331- }
13332- }
13333- } else if (dispose == 2) {
13334- // restore what was changed last frame to background before that frame;
13335- for (pi = 0; pi < pcount; ++pi) {
13336- if (g->history[pi]) {
13337- memcpy( &g->out[pi * 4], &g->background[pi * 4], 4 );
13338- }
13339- }
13340- } else {
13341- // This is a non-disposal case eithe way, so just
13342- // leave the pixels as is, and they will become the new background
13343- // 1: do not dispose
13344- // 0: not specified.
13345- }
13346-
13347- // background is what out is after the undoing of the previou frame;
13348- memcpy( g->background, g->out, 4 * g->w * g->h );
13349- }
13350-
13351- // clear my history;
13352- memset( g->history, 0x00, g->w * g->h ); // pixels that were affected previous frame
13353-
13354- for (;;) {
13355- int tag = stbi__get8(s);
13356- switch (tag) {
13357- case 0x2C: /* Image Descriptor */
13358- {
13359- stbi__int32 x, y, w, h;
13360- stbi_uc *o;
13361-
13362- x = stbi__get16le(s);
13363- y = stbi__get16le(s);
13364- w = stbi__get16le(s);
13365- h = stbi__get16le(s);
13366- if (((x + w) > (g->w)) || ((y + h) > (g->h)))
13367- return stbi__errpuc("bad Image Descriptor", "Corrupt GIF");
13368-
13369- g->line_size = g->w * 4;
13370- g->start_x = x * 4;
13371- g->start_y = y * g->line_size;
13372- g->max_x = g->start_x + w * 4;
13373- g->max_y = g->start_y + h * g->line_size;
13374- g->cur_x = g->start_x;
13375- g->cur_y = g->start_y;
13376-
13377- // if the width of the specified rectangle is 0, that means
13378- // we may not see *any* pixels or the image is malformed;
13379- // to make sure this is caught, move the current y down to
13380- // max_y (which is what out_gif_code checks).
13381- if (w == 0)
13382- g->cur_y = g->max_y;
13383-
13384- g->lflags = stbi__get8(s);
13385-
13386- if (g->lflags & 0x40) {
13387- g->step = 8 * g->line_size; // first interlaced spacing
13388- g->parse = 3;
13389- } else {
13390- g->step = g->line_size;
13391- g->parse = 0;
13392- }
13393-
13394- if (g->lflags & 0x80) {
13395- stbi__gif_parse_colortable(s,g->lpal, 2 << (g->lflags & 7), g->eflags & 0x01 ? g->transparent : -1);
13396- g->color_table = (stbi_uc *) g->lpal;
13397- } else if (g->flags & 0x80) {
13398- g->color_table = (stbi_uc *) g->pal;
13399- } else
13400- return stbi__errpuc("missing color table", "Corrupt GIF");
13401-
13402- o = stbi__process_gif_raster(s, g);
13403- if (!o) return NULL;
13404-
13405- // if this was the first frame,
13406- pcount = g->w * g->h;
13407- if (first_frame && (g->bgindex > 0)) {
13408- // if first frame, any pixel not drawn to gets the background color
13409- for (pi = 0; pi < pcount; ++pi) {
13410- if (g->history[pi] == 0) {
13411- g->pal[g->bgindex][3] = 255; // just in case it was made transparent, undo that; It will be reset next frame if need be;
13412- memcpy( &g->out[pi * 4], &g->pal[g->bgindex], 4 );
13413- }
13414- }
13415- }
13416-
13417- return o;
13418- }
13419-
13420- case 0x21: // Comment Extension.
13421- {
13422- int len;
13423- int ext = stbi__get8(s);
13424- if (ext == 0xF9) { // Graphic Control Extension.
13425- len = stbi__get8(s);
13426- if (len == 4) {
13427- g->eflags = stbi__get8(s);
13428- g->delay = 10 * stbi__get16le(s); // delay - 1/100th of a second, saving as 1/1000ths.
13429-
13430- // unset old transparent
13431- if (g->transparent >= 0) {
13432- g->pal[g->transparent][3] = 255;
13433- }
13434- if (g->eflags & 0x01) {
13435- g->transparent = stbi__get8(s);
13436- if (g->transparent >= 0) {
13437- g->pal[g->transparent][3] = 0;
13438- }
13439- } else {
13440- // don't need transparent
13441- stbi__skip(s, 1);
13442- g->transparent = -1;
13443- }
13444- } else {
13445- stbi__skip(s, len);
13446- break;
13447- }
13448- }
13449- while ((len = stbi__get8(s)) != 0) {
13450- stbi__skip(s, len);
13451- }
13452- break;
13453- }
13454-
13455- case 0x3B: // gif stream termination code
13456- return (stbi_uc *) s; // using '1' causes warning on some compilers
13457-
13458- default:
13459- return stbi__errpuc("unknown code", "Corrupt GIF");
13460- }
13461- }
13462-}
13463-
13464-static void *stbi__load_gif_main_outofmem(stbi__gif *g, stbi_uc *out, int **delays)
13465-{
13466- STBI_FREE(g->out);
13467- STBI_FREE(g->history);
13468- STBI_FREE(g->background);
13469-
13470- if (out) STBI_FREE(out);
13471- if (delays && *delays) STBI_FREE(*delays);
13472- return stbi__errpuc("outofmem", "Out of memory");
13473-}
13474-
13475-static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp)
13476-{
13477- if (stbi__gif_test(s)) {
13478- int layers = 0;
13479- stbi_uc *u = 0;
13480- stbi_uc *out = 0;
13481- stbi_uc *two_back = 0;
13482- stbi__gif g;
13483- int stride;
13484- int out_size = 0;
13485- int delays_size = 0;
13486-
13487- STBI_NOTUSED(out_size);
13488- STBI_NOTUSED(delays_size);
13489-
13490- memset(&g, 0, sizeof(g));
13491- if (delays) {
13492- *delays = 0;
13493- }
13494-
13495- do {
13496- u = stbi__gif_load_next(s, &g, comp, req_comp, two_back);
13497- if (u == (stbi_uc *) s) u = 0; // end of animated gif marker
13498-
13499- if (u) {
13500- *x = g.w;
13501- *y = g.h;
13502- ++layers;
13503- stride = g.w * g.h * 4;
13504-
13505- if (out) {
13506- void *tmp = (stbi_uc*) STBI_REALLOC_SIZED( out, out_size, layers * stride );
13507- if (!tmp)
13508- return stbi__load_gif_main_outofmem(&g, out, delays);
13509- else {
13510- out = (stbi_uc*) tmp;
13511- out_size = layers * stride;
13512- }
13513-
13514- if (delays) {
13515- int *new_delays = (int*) STBI_REALLOC_SIZED( *delays, delays_size, sizeof(int) * layers );
13516- if (!new_delays)
13517- return stbi__load_gif_main_outofmem(&g, out, delays);
13518- *delays = new_delays;
13519- delays_size = layers * sizeof(int);
13520- }
13521- } else {
13522- out = (stbi_uc*)stbi__malloc( layers * stride );
13523- if (!out)
13524- return stbi__load_gif_main_outofmem(&g, out, delays);
13525- out_size = layers * stride;
13526- if (delays) {
13527- *delays = (int*) stbi__malloc( layers * sizeof(int) );
13528- if (!*delays)
13529- return stbi__load_gif_main_outofmem(&g, out, delays);
13530- delays_size = layers * sizeof(int);
13531- }
13532- }
13533- memcpy( out + ((layers - 1) * stride), u, stride );
13534- if (layers >= 2) {
13535- two_back = out - 2 * stride;
13536- }
13537-
13538- if (delays) {
13539- (*delays)[layers - 1U] = g.delay;
13540- }
13541- }
13542- } while (u != 0);
13543-
13544- // free temp buffer;
13545- STBI_FREE(g.out);
13546- STBI_FREE(g.history);
13547- STBI_FREE(g.background);
13548-
13549- // do the final conversion after loading everything;
13550- if (req_comp && req_comp != 4)
13551- out = stbi__convert_format(out, 4, req_comp, layers * g.w, g.h);
13552-
13553- *z = layers;
13554- return out;
13555- } else {
13556- return stbi__errpuc("not GIF", "Image was not as a gif type.");
13557- }
13558-}
13559-
13560-static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
13561-{
13562- stbi_uc *u = 0;
13563- stbi__gif g;
13564- memset(&g, 0, sizeof(g));
13565- STBI_NOTUSED(ri);
13566-
13567- u = stbi__gif_load_next(s, &g, comp, req_comp, 0);
13568- if (u == (stbi_uc *) s) u = 0; // end of animated gif marker
13569- if (u) {
13570- *x = g.w;
13571- *y = g.h;
13572-
13573- // moved conversion to after successful load so that the same
13574- // can be done for multiple frames.
13575- if (req_comp && req_comp != 4)
13576- u = stbi__convert_format(u, 4, req_comp, g.w, g.h);
13577- } else if (g.out) {
13578- // if there was an error and we allocated an image buffer, free it!
13579- STBI_FREE(g.out);
13580- }
13581-
13582- // free buffers needed for multiple frame loading;
13583- STBI_FREE(g.history);
13584- STBI_FREE(g.background);
13585-
13586- return u;
13587-}
13588-
13589-static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp)
13590-{
13591- return stbi__gif_info_raw(s,x,y,comp);
13592+static int
13593+stbi__gif_test_raw(stbi__context *s)
13594+{
13595+ int sz;
13596+ if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' ||
13597+ stbi__get8(s) != '8') {
13598+ return 0;
13599+ }
13600+ sz = stbi__get8(s);
13601+ if (sz != '9' && sz != '7') {
13602+ return 0;
13603+ }
13604+ if (stbi__get8(s) != 'a') {
13605+ return 0;
13606+ }
13607+ return 1;
13608+}
13609+
13610+static int
13611+stbi__gif_test(stbi__context *s)
13612+{
13613+ int r = stbi__gif_test_raw(s);
13614+ stbi__rewind(s);
13615+ return r;
13616+}
13617+
13618+static void
13619+stbi__gif_parse_colortable(stbi__context *s, stbi_uc pal[256][4],
13620+ int num_entries, int transp)
13621+{
13622+ int i;
13623+ for (i = 0; i < num_entries; ++i) {
13624+ pal[i][2] = stbi__get8(s);
13625+ pal[i][1] = stbi__get8(s);
13626+ pal[i][0] = stbi__get8(s);
13627+ pal[i][3] = transp == i ? 0 : 255;
13628+ }
13629+}
13630+
13631+static int
13632+stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp, int is_info)
13633+{
13634+ stbi_uc version;
13635+ if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' ||
13636+ stbi__get8(s) != '8') {
13637+ return stbi__err("not GIF", "Corrupt GIF");
13638+ }
13639+
13640+ version = stbi__get8(s);
13641+ if (version != '7' && version != '9') {
13642+ return stbi__err("not GIF", "Corrupt GIF");
13643+ }
13644+ if (stbi__get8(s) != 'a') {
13645+ return stbi__err("not GIF", "Corrupt GIF");
13646+ }
13647+
13648+ stbi__g_failure_reason = "";
13649+ g->w = stbi__get16le(s);
13650+ g->h = stbi__get16le(s);
13651+ g->flags = stbi__get8(s);
13652+ g->bgindex = stbi__get8(s);
13653+ g->ratio = stbi__get8(s);
13654+ g->transparent = -1;
13655+
13656+ if (g->w > STBI_MAX_DIMENSIONS) {
13657+ return stbi__err("too large", "Very large image (corrupt?)");
13658+ }
13659+ if (g->h > STBI_MAX_DIMENSIONS) {
13660+ return stbi__err("too large", "Very large image (corrupt?)");
13661+ }
13662+
13663+ if (comp != 0) {
13664+ *comp = 4; // can't actually tell whether it's 3 or 4 until we parse the
13665+ // comments
13666+ }
13667+
13668+ if (is_info) {
13669+ return 1;
13670+ }
13671+
13672+ if (g->flags & 0x80) {
13673+ stbi__gif_parse_colortable(s, g->pal, 2 << (g->flags & 7), -1);
13674+ }
13675+
13676+ return 1;
13677+}
13678+
13679+static int
13680+stbi__gif_info_raw(stbi__context *s, int *x, int *y, int *comp)
13681+{
13682+ stbi__gif *g = (stbi__gif *)stbi__malloc(sizeof(stbi__gif));
13683+ if (!g) {
13684+ return stbi__err("outofmem", "Out of memory");
13685+ }
13686+ if (!stbi__gif_header(s, g, comp, 1)) {
13687+ STBI_FREE(g);
13688+ stbi__rewind(s);
13689+ return 0;
13690+ }
13691+ if (x) {
13692+ *x = g->w;
13693+ }
13694+ if (y) {
13695+ *y = g->h;
13696+ }
13697+ STBI_FREE(g);
13698+ return 1;
13699+}
13700+
13701+static void
13702+stbi__out_gif_code(stbi__gif *g, stbi__uint16 code)
13703+{
13704+ stbi_uc *p, *c;
13705+ int idx;
13706+
13707+ // recurse to decode the prefixes, since the linked-list is backwards,
13708+ // and working backwards through an interleaved image would be nasty
13709+ if (g->codes[code].prefix >= 0) {
13710+ stbi__out_gif_code(g, g->codes[code].prefix);
13711+ }
13712+
13713+ if (g->cur_y >= g->max_y) {
13714+ return;
13715+ }
13716+
13717+ idx = g->cur_x + g->cur_y;
13718+ p = &g->out[idx];
13719+ g->history[idx / 4] = 1;
13720+
13721+ c = &g->color_table[g->codes[code].suffix * 4];
13722+ if (c[3] > 128) { // don't render transparent pixels;
13723+ p[0] = c[2];
13724+ p[1] = c[1];
13725+ p[2] = c[0];
13726+ p[3] = c[3];
13727+ }
13728+ g->cur_x += 4;
13729+
13730+ if (g->cur_x >= g->max_x) {
13731+ g->cur_x = g->start_x;
13732+ g->cur_y += g->step;
13733+
13734+ while (g->cur_y >= g->max_y && g->parse > 0) {
13735+ g->step = (1 << g->parse) * g->line_size;
13736+ g->cur_y = g->start_y + (g->step >> 1);
13737+ --g->parse;
13738+ }
13739+ }
13740+}
13741+
13742+static stbi_uc *
13743+stbi__process_gif_raster(stbi__context *s, stbi__gif *g)
13744+{
13745+ stbi_uc lzw_cs;
13746+ stbi__int32 len, init_code;
13747+ stbi__uint32 first;
13748+ stbi__int32 codesize, codemask, avail, oldcode, bits, valid_bits, clear;
13749+ stbi__gif_lzw *p;
13750+
13751+ lzw_cs = stbi__get8(s);
13752+ if (lzw_cs > 12) {
13753+ return NULL;
13754+ }
13755+ clear = 1 << lzw_cs;
13756+ first = 1;
13757+ codesize = lzw_cs + 1;
13758+ codemask = (1 << codesize) - 1;
13759+ bits = 0;
13760+ valid_bits = 0;
13761+ for (init_code = 0; init_code < clear; init_code++) {
13762+ g->codes[init_code].prefix = -1;
13763+ g->codes[init_code].first = (stbi_uc)init_code;
13764+ g->codes[init_code].suffix = (stbi_uc)init_code;
13765+ }
13766+
13767+ // support no starting clear code
13768+ avail = clear + 2;
13769+ oldcode = -1;
13770+
13771+ len = 0;
13772+ for (;;) {
13773+ if (valid_bits < codesize) {
13774+ if (len == 0) {
13775+ len = stbi__get8(s); // start new block
13776+ if (len == 0) {
13777+ return g->out;
13778+ }
13779+ }
13780+ --len;
13781+ bits |= (stbi__int32)stbi__get8(s) << valid_bits;
13782+ valid_bits += 8;
13783+ } else {
13784+ stbi__int32 code = bits & codemask;
13785+ bits >>= codesize;
13786+ valid_bits -= codesize;
13787+ // @OPTIMIZE: is there some way we can accelerate the non-clear
13788+ // path?
13789+ if (code == clear) { // clear code
13790+ codesize = lzw_cs + 1;
13791+ codemask = (1 << codesize) - 1;
13792+ avail = clear + 2;
13793+ oldcode = -1;
13794+ first = 0;
13795+ } else if (code == clear + 1) { // end of stream code
13796+ stbi__skip(s, len);
13797+ while ((len = stbi__get8(s)) > 0) {
13798+ stbi__skip(s, len);
13799+ }
13800+ return g->out;
13801+ } else if (code <= avail) {
13802+ if (first) {
13803+ return stbi__errpuc("no clear code", "Corrupt GIF");
13804+ }
13805+
13806+ if (oldcode >= 0) {
13807+ p = &g->codes[avail++];
13808+ if (avail > 8192) {
13809+ return stbi__errpuc("too many codes", "Corrupt GIF");
13810+ }
13811+
13812+ p->prefix = (stbi__int16)oldcode;
13813+ p->first = g->codes[oldcode].first;
13814+ p->suffix =
13815+ (code == avail) ? p->first : g->codes[code].first;
13816+ } else if (code == avail) {
13817+ return stbi__errpuc("illegal code in raster",
13818+ "Corrupt GIF");
13819+ }
13820+
13821+ stbi__out_gif_code(g, (stbi__uint16)code);
13822+
13823+ if ((avail & codemask) == 0 && avail <= 0x0FFF) {
13824+ codesize++;
13825+ codemask = (1 << codesize) - 1;
13826+ }
13827+
13828+ oldcode = code;
13829+ } else {
13830+ return stbi__errpuc("illegal code in raster", "Corrupt GIF");
13831+ }
13832+ }
13833+ }
13834+}
13835+
13836+// this function is designed to support animated gifs, although stb_image
13837+// doesn't support it two back is the image from two frames ago, used for a very
13838+// specific disposal format
13839+static stbi_uc *
13840+stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, int req_comp,
13841+ stbi_uc *two_back)
13842+{
13843+ int dispose;
13844+ int first_frame;
13845+ int pi;
13846+ int pcount;
13847+ STBI_NOTUSED(req_comp);
13848+
13849+ // on first frame, any non-written pixels get the background colour
13850+ // (non-transparent)
13851+ first_frame = 0;
13852+ if (g->out == 0) {
13853+ if (!stbi__gif_header(s, g, comp, 0)) {
13854+ return 0; // stbi__g_failure_reason set by stbi__gif_header
13855+ }
13856+ if (!stbi__mad3sizes_valid(4, g->w, g->h, 0)) {
13857+ return stbi__errpuc("too large", "GIF image is too large");
13858+ }
13859+ pcount = g->w * g->h;
13860+ g->out = (stbi_uc *)stbi__malloc(4 * pcount);
13861+ g->background = (stbi_uc *)stbi__malloc(4 * pcount);
13862+ g->history = (stbi_uc *)stbi__malloc(pcount);
13863+ if (!g->out || !g->background || !g->history) {
13864+ return stbi__errpuc("outofmem", "Out of memory");
13865+ }
13866+
13867+ // image is treated as "transparent" at the start - ie, nothing
13868+ // overwrites the current background; background colour is only used for
13869+ // pixels that are not rendered first frame, after that "background"
13870+ // color refers to the color that was there the previous frame.
13871+ memset(g->out, 0x00, 4 * pcount);
13872+ memset(g->background, 0x00,
13873+ 4 * pcount); // state of the background (starts transparent)
13874+ memset(g->history, 0x00,
13875+ pcount); // pixels that were affected previous frame
13876+ first_frame = 1;
13877+ } else {
13878+ // second frame - how do we dispose of the previous one?
13879+ dispose = (g->eflags & 0x1C) >> 2;
13880+ pcount = g->w * g->h;
13881+
13882+ if ((dispose == 3) && (two_back == 0)) {
13883+ dispose = 2; // if I don't have an image to revert back to, default
13884+ // to the old background
13885+ }
13886+
13887+ if (dispose == 3) { // use previous graphic
13888+ for (pi = 0; pi < pcount; ++pi) {
13889+ if (g->history[pi]) {
13890+ memcpy(&g->out[pi * 4], &two_back[pi * 4], 4);
13891+ }
13892+ }
13893+ } else if (dispose == 2) {
13894+ // restore what was changed last frame to background before that
13895+ // frame;
13896+ for (pi = 0; pi < pcount; ++pi) {
13897+ if (g->history[pi]) {
13898+ memcpy(&g->out[pi * 4], &g->background[pi * 4], 4);
13899+ }
13900+ }
13901+ } else {
13902+ // This is a non-disposal case eithe way, so just
13903+ // leave the pixels as is, and they will become the new background
13904+ // 1: do not dispose
13905+ // 0: not specified.
13906+ }
13907+
13908+ // background is what out is after the undoing of the previou frame;
13909+ memcpy(g->background, g->out, 4 * g->w * g->h);
13910+ }
13911+
13912+ // clear my history;
13913+ memset(g->history, 0x00,
13914+ g->w * g->h); // pixels that were affected previous frame
13915+
13916+ for (;;) {
13917+ int tag = stbi__get8(s);
13918+ switch (tag) {
13919+ case 0x2C: /* Image Descriptor */
13920+ {
13921+ stbi__int32 x, y, w, h;
13922+ stbi_uc *o;
13923+
13924+ x = stbi__get16le(s);
13925+ y = stbi__get16le(s);
13926+ w = stbi__get16le(s);
13927+ h = stbi__get16le(s);
13928+ if (((x + w) > (g->w)) || ((y + h) > (g->h))) {
13929+ return stbi__errpuc("bad Image Descriptor", "Corrupt GIF");
13930+ }
13931+
13932+ g->line_size = g->w * 4;
13933+ g->start_x = x * 4;
13934+ g->start_y = y * g->line_size;
13935+ g->max_x = g->start_x + w * 4;
13936+ g->max_y = g->start_y + h * g->line_size;
13937+ g->cur_x = g->start_x;
13938+ g->cur_y = g->start_y;
13939+
13940+ // if the width of the specified rectangle is 0, that means
13941+ // we may not see *any* pixels or the image is malformed;
13942+ // to make sure this is caught, move the current y down to
13943+ // max_y (which is what out_gif_code checks).
13944+ if (w == 0) {
13945+ g->cur_y = g->max_y;
13946+ }
13947+
13948+ g->lflags = stbi__get8(s);
13949+
13950+ if (g->lflags & 0x40) {
13951+ g->step = 8 * g->line_size; // first interlaced spacing
13952+ g->parse = 3;
13953+ } else {
13954+ g->step = g->line_size;
13955+ g->parse = 0;
13956+ }
13957+
13958+ if (g->lflags & 0x80) {
13959+ stbi__gif_parse_colortable(s, g->lpal, 2 << (g->lflags & 7),
13960+ g->eflags & 0x01 ? g->transparent
13961+ : -1);
13962+ g->color_table = (stbi_uc *)g->lpal;
13963+ } else if (g->flags & 0x80) {
13964+ g->color_table = (stbi_uc *)g->pal;
13965+ } else {
13966+ return stbi__errpuc("missing color table", "Corrupt GIF");
13967+ }
13968+
13969+ o = stbi__process_gif_raster(s, g);
13970+ if (!o) {
13971+ return NULL;
13972+ }
13973+
13974+ // if this was the first frame,
13975+ pcount = g->w * g->h;
13976+ if (first_frame && (g->bgindex > 0)) {
13977+ // if first frame, any pixel not drawn to gets the background
13978+ // color
13979+ for (pi = 0; pi < pcount; ++pi) {
13980+ if (g->history[pi] == 0) {
13981+ g->pal[g->bgindex][3] =
13982+ 255; // just in case it was made transparent, undo
13983+ // that; It will be reset next frame if need
13984+ // be;
13985+ memcpy(&g->out[pi * 4], &g->pal[g->bgindex], 4);
13986+ }
13987+ }
13988+ }
13989+
13990+ return o;
13991+ }
13992+
13993+ case 0x21: // Comment Extension.
13994+ {
13995+ int len;
13996+ int ext = stbi__get8(s);
13997+ if (ext == 0xF9) { // Graphic Control Extension.
13998+ len = stbi__get8(s);
13999+ if (len == 4) {
14000+ g->eflags = stbi__get8(s);
14001+ g->delay =
14002+ 10 * stbi__get16le(s); // delay - 1/100th of a second,
14003+ // saving as 1/1000ths.
14004+
14005+ // unset old transparent
14006+ if (g->transparent >= 0) {
14007+ g->pal[g->transparent][3] = 255;
14008+ }
14009+ if (g->eflags & 0x01) {
14010+ g->transparent = stbi__get8(s);
14011+ if (g->transparent >= 0) {
14012+ g->pal[g->transparent][3] = 0;
14013+ }
14014+ } else {
14015+ // don't need transparent
14016+ stbi__skip(s, 1);
14017+ g->transparent = -1;
14018+ }
14019+ } else {
14020+ stbi__skip(s, len);
14021+ break;
14022+ }
14023+ }
14024+ while ((len = stbi__get8(s)) != 0) {
14025+ stbi__skip(s, len);
14026+ }
14027+ break;
14028+ }
14029+
14030+ case 0x3B: // gif stream termination code
14031+ return (stbi_uc *)s; // using '1' causes warning on some compilers
14032+
14033+ default:
14034+ return stbi__errpuc("unknown code", "Corrupt GIF");
14035+ }
14036+ }
14037+}
14038+
14039+static void *
14040+stbi__load_gif_main_outofmem(stbi__gif *g, stbi_uc *out, int **delays)
14041+{
14042+ STBI_FREE(g->out);
14043+ STBI_FREE(g->history);
14044+ STBI_FREE(g->background);
14045+
14046+ if (out) {
14047+ STBI_FREE(out);
14048+ }
14049+ if (delays && *delays) {
14050+ STBI_FREE(*delays);
14051+ }
14052+ return stbi__errpuc("outofmem", "Out of memory");
14053+}
14054+
14055+static void *
14056+stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z,
14057+ int *comp, int req_comp)
14058+{
14059+ if (stbi__gif_test(s)) {
14060+ int layers = 0;
14061+ stbi_uc *u = 0;
14062+ stbi_uc *out = 0;
14063+ stbi_uc *two_back = 0;
14064+ stbi__gif g;
14065+ int stride;
14066+ int out_size = 0;
14067+ int delays_size = 0;
14068+
14069+ STBI_NOTUSED(out_size);
14070+ STBI_NOTUSED(delays_size);
14071+
14072+ memset(&g, 0, sizeof(g));
14073+ if (delays) {
14074+ *delays = 0;
14075+ }
14076+
14077+ do {
14078+ u = stbi__gif_load_next(s, &g, comp, req_comp, two_back);
14079+ if (u == (stbi_uc *)s) {
14080+ u = 0; // end of animated gif marker
14081+ }
14082+
14083+ if (u) {
14084+ *x = g.w;
14085+ *y = g.h;
14086+ ++layers;
14087+ stride = g.w * g.h * 4;
14088+
14089+ if (out) {
14090+ void *tmp = (stbi_uc *)STBI_REALLOC_SIZED(out, out_size,
14091+ layers * stride);
14092+ if (!tmp) {
14093+ return stbi__load_gif_main_outofmem(&g, out, delays);
14094+ } else {
14095+ out = (stbi_uc *)tmp;
14096+ out_size = layers * stride;
14097+ }
14098+
14099+ if (delays) {
14100+ int *new_delays = (int *)STBI_REALLOC_SIZED(
14101+ *delays, delays_size, sizeof(int) * layers);
14102+ if (!new_delays) {
14103+ return stbi__load_gif_main_outofmem(&g, out,
14104+ delays);
14105+ }
14106+ *delays = new_delays;
14107+ delays_size = layers * sizeof(int);
14108+ }
14109+ } else {
14110+ out = (stbi_uc *)stbi__malloc(layers * stride);
14111+ if (!out) {
14112+ return stbi__load_gif_main_outofmem(&g, out, delays);
14113+ }
14114+ out_size = layers * stride;
14115+ if (delays) {
14116+ *delays = (int *)stbi__malloc(layers * sizeof(int));
14117+ if (!*delays) {
14118+ return stbi__load_gif_main_outofmem(&g, out,
14119+ delays);
14120+ }
14121+ delays_size = layers * sizeof(int);
14122+ }
14123+ }
14124+ memcpy(out + ((layers - 1) * stride), u, stride);
14125+ if (layers >= 2) {
14126+ two_back = out - 2 * stride;
14127+ }
14128+
14129+ if (delays) {
14130+ (*delays)[layers - 1U] = g.delay;
14131+ }
14132+ }
14133+ } while (u != 0);
14134+
14135+ // free temp buffer;
14136+ STBI_FREE(g.out);
14137+ STBI_FREE(g.history);
14138+ STBI_FREE(g.background);
14139+
14140+ // do the final conversion after loading everything;
14141+ if (req_comp && req_comp != 4) {
14142+ out = stbi__convert_format(out, 4, req_comp, layers * g.w, g.h);
14143+ }
14144+
14145+ *z = layers;
14146+ return out;
14147+ } else {
14148+ return stbi__errpuc("not GIF", "Image was not as a gif type.");
14149+ }
14150+}
14151+
14152+static void *
14153+stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp,
14154+ stbi__result_info *ri)
14155+{
14156+ stbi_uc *u = 0;
14157+ stbi__gif g;
14158+ memset(&g, 0, sizeof(g));
14159+ STBI_NOTUSED(ri);
14160+
14161+ u = stbi__gif_load_next(s, &g, comp, req_comp, 0);
14162+ if (u == (stbi_uc *)s) {
14163+ u = 0; // end of animated gif marker
14164+ }
14165+ if (u) {
14166+ *x = g.w;
14167+ *y = g.h;
14168+
14169+ // moved conversion to after successful load so that the same
14170+ // can be done for multiple frames.
14171+ if (req_comp && req_comp != 4) {
14172+ u = stbi__convert_format(u, 4, req_comp, g.w, g.h);
14173+ }
14174+ } else if (g.out) {
14175+ // if there was an error and we allocated an image buffer, free it!
14176+ STBI_FREE(g.out);
14177+ }
14178+
14179+ // free buffers needed for multiple frame loading;
14180+ STBI_FREE(g.history);
14181+ STBI_FREE(g.background);
14182+
14183+ return u;
14184+}
14185+
14186+static int
14187+stbi__gif_info(stbi__context *s, int *x, int *y, int *comp)
14188+{
14189+ return stbi__gif_info_raw(s, x, y, comp);
14190 }
14191 #endif
14192
14193@@ -7084,397 +8821,496 @@ static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp)
14194 // Radiance RGBE HDR loader
14195 // originally by Nicolas Schulz
14196 #ifndef STBI_NO_HDR
14197-static int stbi__hdr_test_core(stbi__context *s, const char *signature)
14198-{
14199- int i;
14200- for (i=0; signature[i]; ++i)
14201- if (stbi__get8(s) != signature[i])
14202- return 0;
14203- stbi__rewind(s);
14204- return 1;
14205-}
14206-
14207-static int stbi__hdr_test(stbi__context* s)
14208-{
14209- int r = stbi__hdr_test_core(s, "#?RADIANCE\n");
14210- stbi__rewind(s);
14211- if(!r) {
14212- r = stbi__hdr_test_core(s, "#?RGBE\n");
14213- stbi__rewind(s);
14214- }
14215- return r;
14216-}
14217-
14218-#define STBI__HDR_BUFLEN 1024
14219-static char *stbi__hdr_gettoken(stbi__context *z, char *buffer)
14220-{
14221- int len=0;
14222- char c = '\0';
14223-
14224- c = (char) stbi__get8(z);
14225-
14226- while (!stbi__at_eof(z) && c != '\n') {
14227- buffer[len++] = c;
14228- if (len == STBI__HDR_BUFLEN-1) {
14229- // flush to end of line
14230- while (!stbi__at_eof(z) && stbi__get8(z) != '\n')
14231- ;
14232- break;
14233- }
14234- c = (char) stbi__get8(z);
14235- }
14236-
14237- buffer[len] = 0;
14238- return buffer;
14239-}
14240-
14241-static void stbi__hdr_convert(float *output, stbi_uc *input, int req_comp)
14242-{
14243- if ( input[3] != 0 ) {
14244- float f1;
14245- // Exponent
14246- f1 = (float) ldexp(1.0f, input[3] - (int)(128 + 8));
14247- if (req_comp <= 2)
14248- output[0] = (input[0] + input[1] + input[2]) * f1 / 3;
14249- else {
14250- output[0] = input[0] * f1;
14251- output[1] = input[1] * f1;
14252- output[2] = input[2] * f1;
14253- }
14254- if (req_comp == 2) output[1] = 1;
14255- if (req_comp == 4) output[3] = 1;
14256- } else {
14257- switch (req_comp) {
14258- case 4: output[3] = 1; /* fallthrough */
14259- case 3: output[0] = output[1] = output[2] = 0;
14260- break;
14261- case 2: output[1] = 1; /* fallthrough */
14262- case 1: output[0] = 0;
14263- break;
14264- }
14265- }
14266-}
14267-
14268-static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
14269-{
14270- char buffer[STBI__HDR_BUFLEN];
14271- char *token;
14272- int valid = 0;
14273- int width, height;
14274- stbi_uc *scanline;
14275- float *hdr_data;
14276- int len;
14277- unsigned char count, value;
14278- int i, j, k, c1,c2, z;
14279- const char *headerToken;
14280- STBI_NOTUSED(ri);
14281-
14282- // Check identifier
14283- headerToken = stbi__hdr_gettoken(s,buffer);
14284- if (strcmp(headerToken, "#?RADIANCE") != 0 && strcmp(headerToken, "#?RGBE") != 0)
14285- return stbi__errpf("not HDR", "Corrupt HDR image");
14286-
14287- // Parse header
14288- for(;;) {
14289- token = stbi__hdr_gettoken(s,buffer);
14290- if (token[0] == 0) break;
14291- if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
14292- }
14293-
14294- if (!valid) return stbi__errpf("unsupported format", "Unsupported HDR format");
14295-
14296- // Parse width and height
14297- // can't use sscanf() if we're not using stdio!
14298- token = stbi__hdr_gettoken(s,buffer);
14299- if (strncmp(token, "-Y ", 3)) return stbi__errpf("unsupported data layout", "Unsupported HDR format");
14300- token += 3;
14301- height = (int) strtol(token, &token, 10);
14302- while (*token == ' ') ++token;
14303- if (strncmp(token, "+X ", 3)) return stbi__errpf("unsupported data layout", "Unsupported HDR format");
14304- token += 3;
14305- width = (int) strtol(token, NULL, 10);
14306-
14307- if (height > STBI_MAX_DIMENSIONS) return stbi__errpf("too large","Very large image (corrupt?)");
14308- if (width > STBI_MAX_DIMENSIONS) return stbi__errpf("too large","Very large image (corrupt?)");
14309-
14310- *x = width;
14311- *y = height;
14312-
14313- if (comp) *comp = 3;
14314- if (req_comp == 0) req_comp = 3;
14315-
14316- if (!stbi__mad4sizes_valid(width, height, req_comp, sizeof(float), 0))
14317- return stbi__errpf("too large", "HDR image is too large");
14318-
14319- // Read data
14320- hdr_data = (float *) stbi__malloc_mad4(width, height, req_comp, sizeof(float), 0);
14321- if (!hdr_data)
14322- return stbi__errpf("outofmem", "Out of memory");
14323-
14324- // Load image data
14325- // image data is stored as some number of sca
14326- if ( width < 8 || width >= 32768) {
14327- // Read flat data
14328- for (j=0; j < height; ++j) {
14329- for (i=0; i < width; ++i) {
14330- stbi_uc rgbe[4];
14331- main_decode_loop:
14332- stbi__getn(s, rgbe, 4);
14333- stbi__hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp);
14334- }
14335- }
14336- } else {
14337- // Read RLE-encoded data
14338- scanline = NULL;
14339-
14340- for (j = 0; j < height; ++j) {
14341- c1 = stbi__get8(s);
14342- c2 = stbi__get8(s);
14343- len = stbi__get8(s);
14344- if (c1 != 2 || c2 != 2 || (len & 0x80)) {
14345- // not run-length encoded, so we have to actually use THIS data as a decoded
14346- // pixel (note this can't be a valid pixel--one of RGB must be >= 128)
14347- stbi_uc rgbe[4];
14348- rgbe[0] = (stbi_uc) c1;
14349- rgbe[1] = (stbi_uc) c2;
14350- rgbe[2] = (stbi_uc) len;
14351- rgbe[3] = (stbi_uc) stbi__get8(s);
14352- stbi__hdr_convert(hdr_data, rgbe, req_comp);
14353- i = 1;
14354- j = 0;
14355- STBI_FREE(scanline);
14356- goto main_decode_loop; // yes, this makes no sense
14357- }
14358- len <<= 8;
14359- len |= stbi__get8(s);
14360- if (len != width) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("invalid decoded scanline length", "corrupt HDR"); }
14361- if (scanline == NULL) {
14362- scanline = (stbi_uc *) stbi__malloc_mad2(width, 4, 0);
14363- if (!scanline) {
14364- STBI_FREE(hdr_data);
14365- return stbi__errpf("outofmem", "Out of memory");
14366- }
14367- }
14368-
14369- for (k = 0; k < 4; ++k) {
14370- int nleft;
14371- i = 0;
14372- while ((nleft = width - i) > 0) {
14373- count = stbi__get8(s);
14374- if (count > 128) {
14375- // Run
14376- value = stbi__get8(s);
14377- count -= 128;
14378- if ((count == 0) || (count > nleft)) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); }
14379- for (z = 0; z < count; ++z)
14380- scanline[i++ * 4 + k] = value;
14381- } else {
14382- // Dump
14383- if ((count == 0) || (count > nleft)) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); }
14384- for (z = 0; z < count; ++z)
14385- scanline[i++ * 4 + k] = stbi__get8(s);
14386- }
14387- }
14388- }
14389- for (i=0; i < width; ++i)
14390- stbi__hdr_convert(hdr_data+(j*width + i)*req_comp, scanline + i*4, req_comp);
14391- }
14392- if (scanline)
14393- STBI_FREE(scanline);
14394- }
14395-
14396- return hdr_data;
14397-}
14398-
14399-static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp)
14400-{
14401- char buffer[STBI__HDR_BUFLEN];
14402- char *token;
14403- int valid = 0;
14404- int dummy;
14405-
14406- if (!x) x = &dummy;
14407- if (!y) y = &dummy;
14408- if (!comp) comp = &dummy;
14409-
14410- if (stbi__hdr_test(s) == 0) {
14411- stbi__rewind( s );
14412- return 0;
14413- }
14414-
14415- for(;;) {
14416- token = stbi__hdr_gettoken(s,buffer);
14417- if (token[0] == 0) break;
14418- if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
14419- }
14420-
14421- if (!valid) {
14422- stbi__rewind( s );
14423- return 0;
14424- }
14425- token = stbi__hdr_gettoken(s,buffer);
14426- if (strncmp(token, "-Y ", 3)) {
14427- stbi__rewind( s );
14428- return 0;
14429- }
14430- token += 3;
14431- *y = (int) strtol(token, &token, 10);
14432- while (*token == ' ') ++token;
14433- if (strncmp(token, "+X ", 3)) {
14434- stbi__rewind( s );
14435- return 0;
14436- }
14437- token += 3;
14438- *x = (int) strtol(token, NULL, 10);
14439- *comp = 3;
14440- return 1;
14441+static int
14442+stbi__hdr_test_core(stbi__context *s, const char *signature)
14443+{
14444+ int i;
14445+ for (i = 0; signature[i]; ++i) {
14446+ if (stbi__get8(s) != signature[i]) {
14447+ return 0;
14448+ }
14449+ }
14450+ stbi__rewind(s);
14451+ return 1;
14452+}
14453+
14454+static int
14455+stbi__hdr_test(stbi__context *s)
14456+{
14457+ int r = stbi__hdr_test_core(s, "#?RADIANCE\n");
14458+ stbi__rewind(s);
14459+ if (!r) {
14460+ r = stbi__hdr_test_core(s, "#?RGBE\n");
14461+ stbi__rewind(s);
14462+ }
14463+ return r;
14464+}
14465+
14466+#define STBI__HDR_BUFLEN 1024
14467+static char *
14468+stbi__hdr_gettoken(stbi__context *z, char *buffer)
14469+{
14470+ int len = 0;
14471+ char c = '\0';
14472+
14473+ c = (char)stbi__get8(z);
14474+
14475+ while (!stbi__at_eof(z) && c != '\n') {
14476+ buffer[len++] = c;
14477+ if (len == STBI__HDR_BUFLEN - 1) {
14478+ // flush to end of line
14479+ while (!stbi__at_eof(z) && stbi__get8(z) != '\n')
14480+ ;
14481+ break;
14482+ }
14483+ c = (char)stbi__get8(z);
14484+ }
14485+
14486+ buffer[len] = 0;
14487+ return buffer;
14488+}
14489+
14490+static void
14491+stbi__hdr_convert(float *output, stbi_uc *input, int req_comp)
14492+{
14493+ if (input[3] != 0) {
14494+ float f1;
14495+ // Exponent
14496+ f1 = (float)ldexp(1.0f, input[3] - (int)(128 + 8));
14497+ if (req_comp <= 2) {
14498+ output[0] = (input[0] + input[1] + input[2]) * f1 / 3;
14499+ } else {
14500+ output[0] = input[0] * f1;
14501+ output[1] = input[1] * f1;
14502+ output[2] = input[2] * f1;
14503+ }
14504+ if (req_comp == 2) {
14505+ output[1] = 1;
14506+ }
14507+ if (req_comp == 4) {
14508+ output[3] = 1;
14509+ }
14510+ } else {
14511+ switch (req_comp) {
14512+ case 4:
14513+ output[3] = 1; /* fallthrough */
14514+ case 3:
14515+ output[0] = output[1] = output[2] = 0;
14516+ break;
14517+ case 2:
14518+ output[1] = 1; /* fallthrough */
14519+ case 1:
14520+ output[0] = 0;
14521+ break;
14522+ }
14523+ }
14524+}
14525+
14526+static float *
14527+stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp,
14528+ stbi__result_info *ri)
14529+{
14530+ char buffer[STBI__HDR_BUFLEN];
14531+ char *token;
14532+ int valid = 0;
14533+ int width, height;
14534+ stbi_uc *scanline;
14535+ float *hdr_data;
14536+ int len;
14537+ unsigned char count, value;
14538+ int i, j, k, c1, c2, z;
14539+ const char *headerToken;
14540+ STBI_NOTUSED(ri);
14541+
14542+ // Check identifier
14543+ headerToken = stbi__hdr_gettoken(s, buffer);
14544+ if (strcmp(headerToken, "#?RADIANCE") != 0 &&
14545+ strcmp(headerToken, "#?RGBE") != 0) {
14546+ return stbi__errpf("not HDR", "Corrupt HDR image");
14547+ }
14548+
14549+ // Parse header
14550+ for (;;) {
14551+ token = stbi__hdr_gettoken(s, buffer);
14552+ if (token[0] == 0) {
14553+ break;
14554+ }
14555+ if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) {
14556+ valid = 1;
14557+ }
14558+ }
14559+
14560+ if (!valid) {
14561+ return stbi__errpf("unsupported format", "Unsupported HDR format");
14562+ }
14563+
14564+ // Parse width and height
14565+ // can't use sscanf() if we're not using stdio!
14566+ token = stbi__hdr_gettoken(s, buffer);
14567+ if (strncmp(token, "-Y ", 3)) {
14568+ return stbi__errpf("unsupported data layout", "Unsupported HDR format");
14569+ }
14570+ token += 3;
14571+ height = (int)strtol(token, &token, 10);
14572+ while (*token == ' ') {
14573+ ++token;
14574+ }
14575+ if (strncmp(token, "+X ", 3)) {
14576+ return stbi__errpf("unsupported data layout", "Unsupported HDR format");
14577+ }
14578+ token += 3;
14579+ width = (int)strtol(token, NULL, 10);
14580+
14581+ if (height > STBI_MAX_DIMENSIONS) {
14582+ return stbi__errpf("too large", "Very large image (corrupt?)");
14583+ }
14584+ if (width > STBI_MAX_DIMENSIONS) {
14585+ return stbi__errpf("too large", "Very large image (corrupt?)");
14586+ }
14587+
14588+ *x = width;
14589+ *y = height;
14590+
14591+ if (comp) {
14592+ *comp = 3;
14593+ }
14594+ if (req_comp == 0) {
14595+ req_comp = 3;
14596+ }
14597+
14598+ if (!stbi__mad4sizes_valid(width, height, req_comp, sizeof(float), 0)) {
14599+ return stbi__errpf("too large", "HDR image is too large");
14600+ }
14601+
14602+ // Read data
14603+ hdr_data =
14604+ (float *)stbi__malloc_mad4(width, height, req_comp, sizeof(float), 0);
14605+ if (!hdr_data) {
14606+ return stbi__errpf("outofmem", "Out of memory");
14607+ }
14608+
14609+ // Load image data
14610+ // image data is stored as some number of sca
14611+ if (width < 8 || width >= 32768) {
14612+ // Read flat data
14613+ for (j = 0; j < height; ++j) {
14614+ for (i = 0; i < width; ++i) {
14615+ stbi_uc rgbe[4];
14616+ main_decode_loop:
14617+ stbi__getn(s, rgbe, 4);
14618+ stbi__hdr_convert(hdr_data + j * width * req_comp +
14619+ i * req_comp,
14620+ rgbe, req_comp);
14621+ }
14622+ }
14623+ } else {
14624+ // Read RLE-encoded data
14625+ scanline = NULL;
14626+
14627+ for (j = 0; j < height; ++j) {
14628+ c1 = stbi__get8(s);
14629+ c2 = stbi__get8(s);
14630+ len = stbi__get8(s);
14631+ if (c1 != 2 || c2 != 2 || (len & 0x80)) {
14632+ // not run-length encoded, so we have to actually use THIS data
14633+ // as a decoded pixel (note this can't be a valid pixel--one of
14634+ // RGB must be >= 128)
14635+ stbi_uc rgbe[4];
14636+ rgbe[0] = (stbi_uc)c1;
14637+ rgbe[1] = (stbi_uc)c2;
14638+ rgbe[2] = (stbi_uc)len;
14639+ rgbe[3] = (stbi_uc)stbi__get8(s);
14640+ stbi__hdr_convert(hdr_data, rgbe, req_comp);
14641+ i = 1;
14642+ j = 0;
14643+ STBI_FREE(scanline);
14644+ goto main_decode_loop; // yes, this makes no sense
14645+ }
14646+ len <<= 8;
14647+ len |= stbi__get8(s);
14648+ if (len != width) {
14649+ STBI_FREE(hdr_data);
14650+ STBI_FREE(scanline);
14651+ return stbi__errpf("invalid decoded scanline length",
14652+ "corrupt HDR");
14653+ }
14654+ if (scanline == NULL) {
14655+ scanline = (stbi_uc *)stbi__malloc_mad2(width, 4, 0);
14656+ if (!scanline) {
14657+ STBI_FREE(hdr_data);
14658+ return stbi__errpf("outofmem", "Out of memory");
14659+ }
14660+ }
14661+
14662+ for (k = 0; k < 4; ++k) {
14663+ int nleft;
14664+ i = 0;
14665+ while ((nleft = width - i) > 0) {
14666+ count = stbi__get8(s);
14667+ if (count > 128) {
14668+ // Run
14669+ value = stbi__get8(s);
14670+ count -= 128;
14671+ if ((count == 0) || (count > nleft)) {
14672+ STBI_FREE(hdr_data);
14673+ STBI_FREE(scanline);
14674+ return stbi__errpf("corrupt",
14675+ "bad RLE data in HDR");
14676+ }
14677+ for (z = 0; z < count; ++z) {
14678+ scanline[i++ * 4 + k] = value;
14679+ }
14680+ } else {
14681+ // Dump
14682+ if ((count == 0) || (count > nleft)) {
14683+ STBI_FREE(hdr_data);
14684+ STBI_FREE(scanline);
14685+ return stbi__errpf("corrupt",
14686+ "bad RLE data in HDR");
14687+ }
14688+ for (z = 0; z < count; ++z) {
14689+ scanline[i++ * 4 + k] = stbi__get8(s);
14690+ }
14691+ }
14692+ }
14693+ }
14694+ for (i = 0; i < width; ++i) {
14695+ stbi__hdr_convert(hdr_data + (j * width + i) * req_comp,
14696+ scanline + i * 4, req_comp);
14697+ }
14698+ }
14699+ if (scanline) {
14700+ STBI_FREE(scanline);
14701+ }
14702+ }
14703+
14704+ return hdr_data;
14705+}
14706+
14707+static int
14708+stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp)
14709+{
14710+ char buffer[STBI__HDR_BUFLEN];
14711+ char *token;
14712+ int valid = 0;
14713+ int dummy;
14714+
14715+ if (!x) {
14716+ x = &dummy;
14717+ }
14718+ if (!y) {
14719+ y = &dummy;
14720+ }
14721+ if (!comp) {
14722+ comp = &dummy;
14723+ }
14724+
14725+ if (stbi__hdr_test(s) == 0) {
14726+ stbi__rewind(s);
14727+ return 0;
14728+ }
14729+
14730+ for (;;) {
14731+ token = stbi__hdr_gettoken(s, buffer);
14732+ if (token[0] == 0) {
14733+ break;
14734+ }
14735+ if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) {
14736+ valid = 1;
14737+ }
14738+ }
14739+
14740+ if (!valid) {
14741+ stbi__rewind(s);
14742+ return 0;
14743+ }
14744+ token = stbi__hdr_gettoken(s, buffer);
14745+ if (strncmp(token, "-Y ", 3)) {
14746+ stbi__rewind(s);
14747+ return 0;
14748+ }
14749+ token += 3;
14750+ *y = (int)strtol(token, &token, 10);
14751+ while (*token == ' ') {
14752+ ++token;
14753+ }
14754+ if (strncmp(token, "+X ", 3)) {
14755+ stbi__rewind(s);
14756+ return 0;
14757+ }
14758+ token += 3;
14759+ *x = (int)strtol(token, NULL, 10);
14760+ *comp = 3;
14761+ return 1;
14762 }
14763 #endif // STBI_NO_HDR
14764
14765 #ifndef STBI_NO_BMP
14766-static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp)
14767-{
14768- void *p;
14769- stbi__bmp_data info;
14770-
14771- info.all_a = 255;
14772- p = stbi__bmp_parse_header(s, &info);
14773- if (p == NULL) {
14774- stbi__rewind( s );
14775- return 0;
14776- }
14777- if (x) *x = s->img_x;
14778- if (y) *y = s->img_y;
14779- if (comp) {
14780- if (info.bpp == 24 && info.ma == 0xff000000)
14781- *comp = 3;
14782- else
14783- *comp = info.ma ? 4 : 3;
14784- }
14785- return 1;
14786+static int
14787+stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp)
14788+{
14789+ void *p;
14790+ stbi__bmp_data info;
14791+
14792+ info.all_a = 255;
14793+ p = stbi__bmp_parse_header(s, &info);
14794+ if (p == NULL) {
14795+ stbi__rewind(s);
14796+ return 0;
14797+ }
14798+ if (x) {
14799+ *x = s->img_x;
14800+ }
14801+ if (y) {
14802+ *y = s->img_y;
14803+ }
14804+ if (comp) {
14805+ if (info.bpp == 24 && info.ma == 0xff000000) {
14806+ *comp = 3;
14807+ } else {
14808+ *comp = info.ma ? 4 : 3;
14809+ }
14810+ }
14811+ return 1;
14812 }
14813 #endif
14814
14815 #ifndef STBI_NO_PSD
14816-static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp)
14817-{
14818- int channelCount, dummy, depth;
14819- if (!x) x = &dummy;
14820- if (!y) y = &dummy;
14821- if (!comp) comp = &dummy;
14822- if (stbi__get32be(s) != 0x38425053) {
14823- stbi__rewind( s );
14824- return 0;
14825- }
14826- if (stbi__get16be(s) != 1) {
14827- stbi__rewind( s );
14828- return 0;
14829- }
14830- stbi__skip(s, 6);
14831- channelCount = stbi__get16be(s);
14832- if (channelCount < 0 || channelCount > 16) {
14833- stbi__rewind( s );
14834- return 0;
14835- }
14836- *y = stbi__get32be(s);
14837- *x = stbi__get32be(s);
14838- depth = stbi__get16be(s);
14839- if (depth != 8 && depth != 16) {
14840- stbi__rewind( s );
14841- return 0;
14842- }
14843- if (stbi__get16be(s) != 3) {
14844- stbi__rewind( s );
14845- return 0;
14846- }
14847- *comp = 4;
14848- return 1;
14849-}
14850-
14851-static int stbi__psd_is16(stbi__context *s)
14852-{
14853- int channelCount, depth;
14854- if (stbi__get32be(s) != 0x38425053) {
14855- stbi__rewind( s );
14856- return 0;
14857- }
14858- if (stbi__get16be(s) != 1) {
14859- stbi__rewind( s );
14860- return 0;
14861- }
14862- stbi__skip(s, 6);
14863- channelCount = stbi__get16be(s);
14864- if (channelCount < 0 || channelCount > 16) {
14865- stbi__rewind( s );
14866- return 0;
14867- }
14868- STBI_NOTUSED(stbi__get32be(s));
14869- STBI_NOTUSED(stbi__get32be(s));
14870- depth = stbi__get16be(s);
14871- if (depth != 16) {
14872- stbi__rewind( s );
14873- return 0;
14874- }
14875- return 1;
14876+static int
14877+stbi__psd_info(stbi__context *s, int *x, int *y, int *comp)
14878+{
14879+ int channelCount, dummy, depth;
14880+ if (!x) {
14881+ x = &dummy;
14882+ }
14883+ if (!y) {
14884+ y = &dummy;
14885+ }
14886+ if (!comp) {
14887+ comp = &dummy;
14888+ }
14889+ if (stbi__get32be(s) != 0x38425053) {
14890+ stbi__rewind(s);
14891+ return 0;
14892+ }
14893+ if (stbi__get16be(s) != 1) {
14894+ stbi__rewind(s);
14895+ return 0;
14896+ }
14897+ stbi__skip(s, 6);
14898+ channelCount = stbi__get16be(s);
14899+ if (channelCount < 0 || channelCount > 16) {
14900+ stbi__rewind(s);
14901+ return 0;
14902+ }
14903+ *y = stbi__get32be(s);
14904+ *x = stbi__get32be(s);
14905+ depth = stbi__get16be(s);
14906+ if (depth != 8 && depth != 16) {
14907+ stbi__rewind(s);
14908+ return 0;
14909+ }
14910+ if (stbi__get16be(s) != 3) {
14911+ stbi__rewind(s);
14912+ return 0;
14913+ }
14914+ *comp = 4;
14915+ return 1;
14916+}
14917+
14918+static int
14919+stbi__psd_is16(stbi__context *s)
14920+{
14921+ int channelCount, depth;
14922+ if (stbi__get32be(s) != 0x38425053) {
14923+ stbi__rewind(s);
14924+ return 0;
14925+ }
14926+ if (stbi__get16be(s) != 1) {
14927+ stbi__rewind(s);
14928+ return 0;
14929+ }
14930+ stbi__skip(s, 6);
14931+ channelCount = stbi__get16be(s);
14932+ if (channelCount < 0 || channelCount > 16) {
14933+ stbi__rewind(s);
14934+ return 0;
14935+ }
14936+ STBI_NOTUSED(stbi__get32be(s));
14937+ STBI_NOTUSED(stbi__get32be(s));
14938+ depth = stbi__get16be(s);
14939+ if (depth != 16) {
14940+ stbi__rewind(s);
14941+ return 0;
14942+ }
14943+ return 1;
14944 }
14945 #endif
14946
14947 #ifndef STBI_NO_PIC
14948-static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp)
14949-{
14950- int act_comp=0,num_packets=0,chained,dummy;
14951- stbi__pic_packet packets[10];
14952-
14953- if (!x) x = &dummy;
14954- if (!y) y = &dummy;
14955- if (!comp) comp = &dummy;
14956-
14957- if (!stbi__pic_is4(s,"\x53\x80\xF6\x34")) {
14958- stbi__rewind(s);
14959- return 0;
14960- }
14961-
14962- stbi__skip(s, 88);
14963-
14964- *x = stbi__get16be(s);
14965- *y = stbi__get16be(s);
14966- if (stbi__at_eof(s)) {
14967- stbi__rewind( s);
14968- return 0;
14969- }
14970- if ( (*x) != 0 && (1 << 28) / (*x) < (*y)) {
14971- stbi__rewind( s );
14972- return 0;
14973- }
14974-
14975- stbi__skip(s, 8);
14976-
14977- do {
14978- stbi__pic_packet *packet;
14979-
14980- if (num_packets==sizeof(packets)/sizeof(packets[0]))
14981- return 0;
14982-
14983- packet = &packets[num_packets++];
14984- chained = stbi__get8(s);
14985- packet->size = stbi__get8(s);
14986- packet->type = stbi__get8(s);
14987- packet->channel = stbi__get8(s);
14988- act_comp |= packet->channel;
14989-
14990- if (stbi__at_eof(s)) {
14991- stbi__rewind( s );
14992- return 0;
14993- }
14994- if (packet->size != 8) {
14995- stbi__rewind( s );
14996- return 0;
14997- }
14998- } while (chained);
14999-
15000- *comp = (act_comp & 0x10 ? 4 : 3);
15001-
15002- return 1;
15003+static int
15004+stbi__pic_info(stbi__context *s, int *x, int *y, int *comp)
15005+{
15006+ int act_comp = 0, num_packets = 0, chained, dummy;
15007+ stbi__pic_packet packets[10];
15008+
15009+ if (!x) {
15010+ x = &dummy;
15011+ }
15012+ if (!y) {
15013+ y = &dummy;
15014+ }
15015+ if (!comp) {
15016+ comp = &dummy;
15017+ }
15018+
15019+ if (!stbi__pic_is4(s, "\x53\x80\xF6\x34")) {
15020+ stbi__rewind(s);
15021+ return 0;
15022+ }
15023+
15024+ stbi__skip(s, 88);
15025+
15026+ *x = stbi__get16be(s);
15027+ *y = stbi__get16be(s);
15028+ if (stbi__at_eof(s)) {
15029+ stbi__rewind(s);
15030+ return 0;
15031+ }
15032+ if ((*x) != 0 && (1 << 28) / (*x) < (*y)) {
15033+ stbi__rewind(s);
15034+ return 0;
15035+ }
15036+
15037+ stbi__skip(s, 8);
15038+
15039+ do {
15040+ stbi__pic_packet *packet;
15041+
15042+ if (num_packets == sizeof(packets) / sizeof(packets[0])) {
15043+ return 0;
15044+ }
15045+
15046+ packet = &packets[num_packets++];
15047+ chained = stbi__get8(s);
15048+ packet->size = stbi__get8(s);
15049+ packet->type = stbi__get8(s);
15050+ packet->channel = stbi__get8(s);
15051+ act_comp |= packet->channel;
15052+
15053+ if (stbi__at_eof(s)) {
15054+ stbi__rewind(s);
15055+ return 0;
15056+ }
15057+ if (packet->size != 8) {
15058+ stbi__rewind(s);
15059+ return 0;
15060+ }
15061+ } while (chained);
15062+
15063+ *comp = (act_comp & 0x10 ? 4 : 3);
15064+
15065+ return 1;
15066 }
15067 #endif
15068
15069@@ -7491,282 +9327,369 @@ static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp)
15070
15071 #ifndef STBI_NO_PNM
15072
15073-static int stbi__pnm_test(stbi__context *s)
15074-{
15075- char p, t;
15076- p = (char) stbi__get8(s);
15077- t = (char) stbi__get8(s);
15078- if (p != 'P' || (t != '5' && t != '6')) {
15079- stbi__rewind( s );
15080- return 0;
15081- }
15082- return 1;
15083-}
15084-
15085-static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
15086-{
15087- stbi_uc *out;
15088- STBI_NOTUSED(ri);
15089-
15090- ri->bits_per_channel = stbi__pnm_info(s, (int *)&s->img_x, (int *)&s->img_y, (int *)&s->img_n);
15091- if (ri->bits_per_channel == 0)
15092- return 0;
15093-
15094- if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
15095- if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
15096-
15097- *x = s->img_x;
15098- *y = s->img_y;
15099- if (comp) *comp = s->img_n;
15100-
15101- if (!stbi__mad4sizes_valid(s->img_n, s->img_x, s->img_y, ri->bits_per_channel / 8, 0))
15102- return stbi__errpuc("too large", "PNM too large");
15103-
15104- out = (stbi_uc *) stbi__malloc_mad4(s->img_n, s->img_x, s->img_y, ri->bits_per_channel / 8, 0);
15105- if (!out) return stbi__errpuc("outofmem", "Out of memory");
15106- if (!stbi__getn(s, out, s->img_n * s->img_x * s->img_y * (ri->bits_per_channel / 8))) {
15107- STBI_FREE(out);
15108- return stbi__errpuc("bad PNM", "PNM file truncated");
15109- }
15110-
15111- if (req_comp && req_comp != s->img_n) {
15112- if (ri->bits_per_channel == 16) {
15113- out = (stbi_uc *) stbi__convert_format16((stbi__uint16 *) out, s->img_n, req_comp, s->img_x, s->img_y);
15114- } else {
15115- out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y);
15116- }
15117- if (out == NULL) return out; // stbi__convert_format frees input on failure
15118- }
15119- return out;
15120-}
15121-
15122-static int stbi__pnm_isspace(char c)
15123-{
15124- return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r';
15125-}
15126-
15127-static void stbi__pnm_skip_whitespace(stbi__context *s, char *c)
15128-{
15129- for (;;) {
15130- while (!stbi__at_eof(s) && stbi__pnm_isspace(*c))
15131- *c = (char) stbi__get8(s);
15132-
15133- if (stbi__at_eof(s) || *c != '#')
15134- break;
15135-
15136- while (!stbi__at_eof(s) && *c != '\n' && *c != '\r' )
15137- *c = (char) stbi__get8(s);
15138- }
15139-}
15140-
15141-static int stbi__pnm_isdigit(char c)
15142-{
15143- return c >= '0' && c <= '9';
15144-}
15145-
15146-static int stbi__pnm_getinteger(stbi__context *s, char *c)
15147-{
15148- int value = 0;
15149-
15150- while (!stbi__at_eof(s) && stbi__pnm_isdigit(*c)) {
15151- value = value*10 + (*c - '0');
15152- *c = (char) stbi__get8(s);
15153- if((value > 214748364) || (value == 214748364 && *c > '7'))
15154- return stbi__err("integer parse overflow", "Parsing an integer in the PPM header overflowed a 32-bit int");
15155- }
15156-
15157- return value;
15158-}
15159-
15160-static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp)
15161-{
15162- int maxv, dummy;
15163- char c, p, t;
15164-
15165- if (!x) x = &dummy;
15166- if (!y) y = &dummy;
15167- if (!comp) comp = &dummy;
15168+static int
15169+stbi__pnm_test(stbi__context *s)
15170+{
15171+ char p, t;
15172+ p = (char)stbi__get8(s);
15173+ t = (char)stbi__get8(s);
15174+ if (p != 'P' || (t != '5' && t != '6')) {
15175+ stbi__rewind(s);
15176+ return 0;
15177+ }
15178+ return 1;
15179+}
15180+
15181+static void *
15182+stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp,
15183+ stbi__result_info *ri)
15184+{
15185+ stbi_uc *out;
15186+ STBI_NOTUSED(ri);
15187+
15188+ ri->bits_per_channel =
15189+ stbi__pnm_info(s, (int *)&s->img_x, (int *)&s->img_y, (int *)&s->img_n);
15190+ if (ri->bits_per_channel == 0) {
15191+ return 0;
15192+ }
15193
15194- stbi__rewind(s);
15195+ if (s->img_y > STBI_MAX_DIMENSIONS) {
15196+ return stbi__errpuc("too large", "Very large image (corrupt?)");
15197+ }
15198+ if (s->img_x > STBI_MAX_DIMENSIONS) {
15199+ return stbi__errpuc("too large", "Very large image (corrupt?)");
15200+ }
15201+
15202+ *x = s->img_x;
15203+ *y = s->img_y;
15204+ if (comp) {
15205+ *comp = s->img_n;
15206+ }
15207+
15208+ if (!stbi__mad4sizes_valid(s->img_n, s->img_x, s->img_y,
15209+ ri->bits_per_channel / 8, 0)) {
15210+ return stbi__errpuc("too large", "PNM too large");
15211+ }
15212+
15213+ out = (stbi_uc *)stbi__malloc_mad4(s->img_n, s->img_x, s->img_y,
15214+ ri->bits_per_channel / 8, 0);
15215+ if (!out) {
15216+ return stbi__errpuc("outofmem", "Out of memory");
15217+ }
15218+ if (!stbi__getn(s, out,
15219+ s->img_n * s->img_x * s->img_y *
15220+ (ri->bits_per_channel / 8))) {
15221+ STBI_FREE(out);
15222+ return stbi__errpuc("bad PNM", "PNM file truncated");
15223+ }
15224+
15225+ if (req_comp && req_comp != s->img_n) {
15226+ if (ri->bits_per_channel == 16) {
15227+ out = (stbi_uc *)stbi__convert_format16(
15228+ (stbi__uint16 *)out, s->img_n, req_comp, s->img_x, s->img_y);
15229+ } else {
15230+ out = stbi__convert_format(out, s->img_n, req_comp, s->img_x,
15231+ s->img_y);
15232+ }
15233+ if (out == NULL) {
15234+ return out; // stbi__convert_format frees input on failure
15235+ }
15236+ }
15237+ return out;
15238+}
15239+
15240+static int
15241+stbi__pnm_isspace(char c)
15242+{
15243+ return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' ||
15244+ c == '\r';
15245+}
15246+
15247+static void
15248+stbi__pnm_skip_whitespace(stbi__context *s, char *c)
15249+{
15250+ for (;;) {
15251+ while (!stbi__at_eof(s) && stbi__pnm_isspace(*c)) {
15252+ *c = (char)stbi__get8(s);
15253+ }
15254+
15255+ if (stbi__at_eof(s) || *c != '#') {
15256+ break;
15257+ }
15258+
15259+ while (!stbi__at_eof(s) && *c != '\n' && *c != '\r') {
15260+ *c = (char)stbi__get8(s);
15261+ }
15262+ }
15263+}
15264+
15265+static int
15266+stbi__pnm_isdigit(char c)
15267+{
15268+ return c >= '0' && c <= '9';
15269+}
15270+
15271+static int
15272+stbi__pnm_getinteger(stbi__context *s, char *c)
15273+{
15274+ int value = 0;
15275+
15276+ while (!stbi__at_eof(s) && stbi__pnm_isdigit(*c)) {
15277+ value = value * 10 + (*c - '0');
15278+ *c = (char)stbi__get8(s);
15279+ if ((value > 214748364) || (value == 214748364 && *c > '7')) {
15280+ return stbi__err(
15281+ "integer parse overflow",
15282+ "Parsing an integer in the PPM header overflowed a 32-bit int");
15283+ }
15284+ }
15285+
15286+ return value;
15287+}
15288+
15289+static int
15290+stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp)
15291+{
15292+ int maxv, dummy;
15293+ char c, p, t;
15294+
15295+ if (!x) {
15296+ x = &dummy;
15297+ }
15298+ if (!y) {
15299+ y = &dummy;
15300+ }
15301+ if (!comp) {
15302+ comp = &dummy;
15303+ }
15304+
15305+ stbi__rewind(s);
15306+
15307+ // Get identifier
15308+ p = (char)stbi__get8(s);
15309+ t = (char)stbi__get8(s);
15310+ if (p != 'P' || (t != '5' && t != '6')) {
15311+ stbi__rewind(s);
15312+ return 0;
15313+ }
15314+
15315+ *comp =
15316+ (t == '6') ? 3 : 1; // '5' is 1-component .pgm; '6' is 3-component .ppm
15317+
15318+ c = (char)stbi__get8(s);
15319+ stbi__pnm_skip_whitespace(s, &c);
15320+
15321+ *x = stbi__pnm_getinteger(s, &c); // read width
15322+ if (*x == 0) {
15323+ return stbi__err("invalid width",
15324+ "PPM image header had zero or overflowing width");
15325+ }
15326+ stbi__pnm_skip_whitespace(s, &c);
15327
15328- // Get identifier
15329- p = (char) stbi__get8(s);
15330- t = (char) stbi__get8(s);
15331- if (p != 'P' || (t != '5' && t != '6')) {
15332- stbi__rewind(s);
15333- return 0;
15334- }
15335+ *y = stbi__pnm_getinteger(s, &c); // read height
15336+ if (*y == 0) {
15337+ return stbi__err("invalid width",
15338+ "PPM image header had zero or overflowing width");
15339+ }
15340+ stbi__pnm_skip_whitespace(s, &c);
15341
15342- *comp = (t == '6') ? 3 : 1; // '5' is 1-component .pgm; '6' is 3-component .ppm
15343-
15344- c = (char) stbi__get8(s);
15345- stbi__pnm_skip_whitespace(s, &c);
15346-
15347- *x = stbi__pnm_getinteger(s, &c); // read width
15348- if(*x == 0)
15349- return stbi__err("invalid width", "PPM image header had zero or overflowing width");
15350- stbi__pnm_skip_whitespace(s, &c);
15351-
15352- *y = stbi__pnm_getinteger(s, &c); // read height
15353- if (*y == 0)
15354- return stbi__err("invalid width", "PPM image header had zero or overflowing width");
15355- stbi__pnm_skip_whitespace(s, &c);
15356-
15357- maxv = stbi__pnm_getinteger(s, &c); // read max value
15358- if (maxv > 65535)
15359- return stbi__err("max value > 65535", "PPM image supports only 8-bit and 16-bit images");
15360- else if (maxv > 255)
15361- return 16;
15362- else
15363- return 8;
15364+ maxv = stbi__pnm_getinteger(s, &c); // read max value
15365+ if (maxv > 65535) {
15366+ return stbi__err("max value > 65535",
15367+ "PPM image supports only 8-bit and 16-bit images");
15368+ } else if (maxv > 255) {
15369+ return 16;
15370+ } else {
15371+ return 8;
15372+ }
15373 }
15374
15375-static int stbi__pnm_is16(stbi__context *s)
15376+static int
15377+stbi__pnm_is16(stbi__context *s)
15378 {
15379- if (stbi__pnm_info(s, NULL, NULL, NULL) == 16)
15380- return 1;
15381- return 0;
15382+ if (stbi__pnm_info(s, NULL, NULL, NULL) == 16) {
15383+ return 1;
15384+ }
15385+ return 0;
15386 }
15387 #endif
15388
15389-static int stbi__info_main(stbi__context *s, int *x, int *y, int *comp)
15390+static int
15391+stbi__info_main(stbi__context *s, int *x, int *y, int *comp)
15392 {
15393- #ifndef STBI_NO_JPEG
15394- if (stbi__jpeg_info(s, x, y, comp)) return 1;
15395- #endif
15396+#ifndef STBI_NO_JPEG
15397+ if (stbi__jpeg_info(s, x, y, comp)) {
15398+ return 1;
15399+ }
15400+#endif
15401
15402- #ifndef STBI_NO_PNG
15403- if (stbi__png_info(s, x, y, comp)) return 1;
15404- #endif
15405+#ifndef STBI_NO_PNG
15406+ if (stbi__png_info(s, x, y, comp)) {
15407+ return 1;
15408+ }
15409+#endif
15410
15411- #ifndef STBI_NO_GIF
15412- if (stbi__gif_info(s, x, y, comp)) return 1;
15413- #endif
15414+#ifndef STBI_NO_GIF
15415+ if (stbi__gif_info(s, x, y, comp)) {
15416+ return 1;
15417+ }
15418+#endif
15419
15420- #ifndef STBI_NO_BMP
15421- if (stbi__bmp_info(s, x, y, comp)) return 1;
15422- #endif
15423+#ifndef STBI_NO_BMP
15424+ if (stbi__bmp_info(s, x, y, comp)) {
15425+ return 1;
15426+ }
15427+#endif
15428
15429- #ifndef STBI_NO_PSD
15430- if (stbi__psd_info(s, x, y, comp)) return 1;
15431- #endif
15432+#ifndef STBI_NO_PSD
15433+ if (stbi__psd_info(s, x, y, comp)) {
15434+ return 1;
15435+ }
15436+#endif
15437
15438- #ifndef STBI_NO_PIC
15439- if (stbi__pic_info(s, x, y, comp)) return 1;
15440- #endif
15441+#ifndef STBI_NO_PIC
15442+ if (stbi__pic_info(s, x, y, comp)) {
15443+ return 1;
15444+ }
15445+#endif
15446
15447- #ifndef STBI_NO_PNM
15448- if (stbi__pnm_info(s, x, y, comp)) return 1;
15449- #endif
15450+#ifndef STBI_NO_PNM
15451+ if (stbi__pnm_info(s, x, y, comp)) {
15452+ return 1;
15453+ }
15454+#endif
15455
15456- #ifndef STBI_NO_HDR
15457- if (stbi__hdr_info(s, x, y, comp)) return 1;
15458- #endif
15459+#ifndef STBI_NO_HDR
15460+ if (stbi__hdr_info(s, x, y, comp)) {
15461+ return 1;
15462+ }
15463+#endif
15464
15465- // test tga last because it's a crappy test!
15466- #ifndef STBI_NO_TGA
15467- if (stbi__tga_info(s, x, y, comp))
15468- return 1;
15469- #endif
15470- return stbi__err("unknown image type", "Image not of any known type, or corrupt");
15471+// test tga last because it's a crappy test!
15472+#ifndef STBI_NO_TGA
15473+ if (stbi__tga_info(s, x, y, comp)) {
15474+ return 1;
15475+ }
15476+#endif
15477+ return stbi__err("unknown image type",
15478+ "Image not of any known type, or corrupt");
15479 }
15480
15481-static int stbi__is_16_main(stbi__context *s)
15482+static int
15483+stbi__is_16_main(stbi__context *s)
15484 {
15485- #ifndef STBI_NO_PNG
15486- if (stbi__png_is16(s)) return 1;
15487- #endif
15488+#ifndef STBI_NO_PNG
15489+ if (stbi__png_is16(s)) {
15490+ return 1;
15491+ }
15492+#endif
15493
15494- #ifndef STBI_NO_PSD
15495- if (stbi__psd_is16(s)) return 1;
15496- #endif
15497+#ifndef STBI_NO_PSD
15498+ if (stbi__psd_is16(s)) {
15499+ return 1;
15500+ }
15501+#endif
15502
15503- #ifndef STBI_NO_PNM
15504- if (stbi__pnm_is16(s)) return 1;
15505- #endif
15506- return 0;
15507+#ifndef STBI_NO_PNM
15508+ if (stbi__pnm_is16(s)) {
15509+ return 1;
15510+ }
15511+#endif
15512+ return 0;
15513 }
15514
15515 #ifndef STBI_NO_STDIO
15516-STBIDEF int stbi_info(char const *filename, int *x, int *y, int *comp)
15517-{
15518- FILE *f = stbi__fopen(filename, "rb");
15519- int result;
15520- if (!f) return stbi__err("can't fopen", "Unable to open file");
15521- result = stbi_info_from_file(f, x, y, comp);
15522- fclose(f);
15523- return result;
15524-}
15525-
15526-STBIDEF int stbi_info_from_file(FILE *f, int *x, int *y, int *comp)
15527-{
15528- int r;
15529- stbi__context s;
15530- long pos = ftell(f);
15531- stbi__start_file(&s, f);
15532- r = stbi__info_main(&s,x,y,comp);
15533- fseek(f,pos,SEEK_SET);
15534- return r;
15535-}
15536-
15537-STBIDEF int stbi_is_16_bit(char const *filename)
15538-{
15539- FILE *f = stbi__fopen(filename, "rb");
15540- int result;
15541- if (!f) return stbi__err("can't fopen", "Unable to open file");
15542- result = stbi_is_16_bit_from_file(f);
15543- fclose(f);
15544- return result;
15545-}
15546-
15547-STBIDEF int stbi_is_16_bit_from_file(FILE *f)
15548-{
15549- int r;
15550- stbi__context s;
15551- long pos = ftell(f);
15552- stbi__start_file(&s, f);
15553- r = stbi__is_16_main(&s);
15554- fseek(f,pos,SEEK_SET);
15555- return r;
15556+STBIDEF int
15557+stbi_info(char const *filename, int *x, int *y, int *comp)
15558+{
15559+ FILE *f = stbi__fopen(filename, "rb");
15560+ int result;
15561+ if (!f) {
15562+ return stbi__err("can't fopen", "Unable to open file");
15563+ }
15564+ result = stbi_info_from_file(f, x, y, comp);
15565+ fclose(f);
15566+ return result;
15567+}
15568+
15569+STBIDEF int
15570+stbi_info_from_file(FILE *f, int *x, int *y, int *comp)
15571+{
15572+ int r;
15573+ stbi__context s;
15574+ long pos = ftell(f);
15575+ stbi__start_file(&s, f);
15576+ r = stbi__info_main(&s, x, y, comp);
15577+ fseek(f, pos, SEEK_SET);
15578+ return r;
15579+}
15580+
15581+STBIDEF int
15582+stbi_is_16_bit(char const *filename)
15583+{
15584+ FILE *f = stbi__fopen(filename, "rb");
15585+ int result;
15586+ if (!f) {
15587+ return stbi__err("can't fopen", "Unable to open file");
15588+ }
15589+ result = stbi_is_16_bit_from_file(f);
15590+ fclose(f);
15591+ return result;
15592+}
15593+
15594+STBIDEF int
15595+stbi_is_16_bit_from_file(FILE *f)
15596+{
15597+ int r;
15598+ stbi__context s;
15599+ long pos = ftell(f);
15600+ stbi__start_file(&s, f);
15601+ r = stbi__is_16_main(&s);
15602+ fseek(f, pos, SEEK_SET);
15603+ return r;
15604 }
15605 #endif // !STBI_NO_STDIO
15606
15607-STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp)
15608+STBIDEF int
15609+stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp)
15610 {
15611- stbi__context s;
15612- stbi__start_mem(&s,buffer,len);
15613- return stbi__info_main(&s,x,y,comp);
15614+ stbi__context s;
15615+ stbi__start_mem(&s, buffer, len);
15616+ return stbi__info_main(&s, x, y, comp);
15617 }
15618
15619-STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *c, void *user, int *x, int *y, int *comp)
15620+STBIDEF int
15621+stbi_info_from_callbacks(stbi_io_callbacks const *c, void *user, int *x, int *y,
15622+ int *comp)
15623 {
15624- stbi__context s;
15625- stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user);
15626- return stbi__info_main(&s,x,y,comp);
15627+ stbi__context s;
15628+ stbi__start_callbacks(&s, (stbi_io_callbacks *)c, user);
15629+ return stbi__info_main(&s, x, y, comp);
15630 }
15631
15632-STBIDEF int stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len)
15633+STBIDEF int
15634+stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len)
15635 {
15636- stbi__context s;
15637- stbi__start_mem(&s,buffer,len);
15638- return stbi__is_16_main(&s);
15639+ stbi__context s;
15640+ stbi__start_mem(&s, buffer, len);
15641+ return stbi__is_16_main(&s);
15642 }
15643
15644-STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *c, void *user)
15645+STBIDEF int
15646+stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *c, void *user)
15647 {
15648- stbi__context s;
15649- stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user);
15650- return stbi__is_16_main(&s);
15651+ stbi__context s;
15652+ stbi__start_callbacks(&s, (stbi_io_callbacks *)c, user);
15653+ return stbi__is_16_main(&s);
15654 }
15655
15656 #endif // STB_IMAGE_IMPLEMENTATION
15657
15658 /*
15659 revision history:
15660- 2.20 (2019-02-07) support utf8 filenames in Windows; fix warnings and platform ifdefs
15661- 2.19 (2018-02-11) fix warning
15662- 2.18 (2018-01-30) fix warnings
15663- 2.17 (2018-01-29) change sbti__shiftsigned to avoid clang -O2 bug
15664+ 2.20 (2019-02-07) support utf8 filenames in Windows; fix warnings and
15665+ platform ifdefs 2.19 (2018-02-11) fix warning 2.18 (2018-01-30) fix
15666+ warnings 2.17 (2018-01-29) change sbti__shiftsigned to avoid clang -O2 bug
15667 1-bit BMP
15668 *_is_16_bit api
15669 avoid warnings
15670@@ -7781,13 +9704,11 @@ STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *c, void *user
15671 warning fixes; disable run-time SSE detection on gcc;
15672 uniform handling of optional "return" values;
15673 thread-safe initialization of zlib tables
15674- 2.14 (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs
15675- 2.13 (2016-11-29) add 16-bit API, only supported for PNG right now
15676- 2.12 (2016-04-02) fix typo in 2.11 PSD fix that caused crashes
15677- 2.11 (2016-04-02) allocate large structures on the stack
15678- remove white matting for transparent PSD
15679- fix reported channel count for PNG & BMP
15680- re-enable SSE2 in non-gcc 64-bit
15681+ 2.14 (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet
15682+ JPGs 2.13 (2016-11-29) add 16-bit API, only supported for PNG right now 2.12
15683+ (2016-04-02) fix typo in 2.11 PSD fix that caused crashes 2.11 (2016-04-02)
15684+ allocate large structures on the stack remove white matting for transparent
15685+ PSD fix reported channel count for PNG & BMP re-enable SSE2 in non-gcc 64-bit
15686 support RGB-formatted JPEG
15687 read 16-bit PNGs (only as 8-bit)
15688 2.10 (2016-01-22) avoid warning introduced in 2.09 by STBI_REALLOC_SIZED
15689@@ -7795,11 +9716,9 @@ STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *c, void *user
15690 16-bit-per-pixel TGA (not bit-per-component)
15691 info() for TGA could break due to .hdr handling
15692 info() for BMP to shares code instead of sloppy parse
15693- can use STBI_REALLOC_SIZED if allocator doesn't support realloc
15694- code cleanup
15695- 2.08 (2015-09-13) fix to 2.07 cleanup, reading RGB PSD as RGBA
15696- 2.07 (2015-09-13) fix compiler warnings
15697- partial animated GIF support
15698+ can use STBI_REALLOC_SIZED if allocator doesn't support
15699+ realloc code cleanup 2.08 (2015-09-13) fix to 2.07 cleanup, reading RGB PSD
15700+ as RGBA 2.07 (2015-09-13) fix compiler warnings partial animated GIF support
15701 limited 16-bpc PSD support
15702 #ifdef unused functions
15703 bug with < 92 byte PIC,PNM,HDR,TGA
15704@@ -7810,23 +9729,18 @@ STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *c, void *user
15705 stbi_set_flip_vertically_on_load (nguillemot)
15706 fix NEON support; fix mingw support
15707 2.02 (2015-01-19) fix incorrect assert, fix warning
15708- 2.01 (2015-01-17) fix various warnings; suppress SIMD on gcc 32-bit without -msse2
15709- 2.00b (2014-12-25) fix STBI_MALLOC in progressive JPEG
15710- 2.00 (2014-12-25) optimize JPG, including x86 SSE2 & NEON SIMD (ryg)
15711- progressive JPEG (stb)
15712- PGM/PPM support (Ken Miller)
15713- STBI_MALLOC,STBI_REALLOC,STBI_FREE
15714+ 2.01 (2015-01-17) fix various warnings; suppress SIMD on gcc 32-bit
15715+ without -msse2 2.00b (2014-12-25) fix STBI_MALLOC in progressive JPEG 2.00
15716+ (2014-12-25) optimize JPG, including x86 SSE2 & NEON SIMD (ryg) progressive
15717+ JPEG (stb) PGM/PPM support (Ken Miller) STBI_MALLOC,STBI_REALLOC,STBI_FREE
15718 GIF bugfix -- seemingly never worked
15719 STBI_NO_*, STBI_ONLY_*
15720 1.48 (2014-12-14) fix incorrectly-named assert()
15721- 1.47 (2014-12-14) 1/2/4-bit PNG support, both direct and paletted (Omar Cornut & stb)
15722- optimize PNG (ryg)
15723- fix bug in interlaced PNG with user-specified channel count (stb)
15724- 1.46 (2014-08-26)
15725- fix broken tRNS chunk (colorkey-style transparency) in non-paletted PNG
15726- 1.45 (2014-08-16)
15727- fix MSVC-ARM internal compiler error by wrapping malloc
15728- 1.44 (2014-08-07)
15729+ 1.47 (2014-12-14) 1/2/4-bit PNG support, both direct and paletted (Omar
15730+ Cornut & stb) optimize PNG (ryg) fix bug in interlaced PNG with
15731+ user-specified channel count (stb) 1.46 (2014-08-26) fix broken tRNS chunk
15732+ (colorkey-style transparency) in non-paletted PNG 1.45 (2014-08-16) fix
15733+ MSVC-ARM internal compiler error by wrapping malloc 1.44 (2014-08-07)
15734 various warning fixes from Ronny Chevalier
15735 1.43 (2014-07-15)
15736 fix MSVC-only compiler problem in code changed in 1.42
15737@@ -7835,73 +9749,48 @@ STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *c, void *user
15738 fixes to stbi__cleanup_jpeg path
15739 added STBI_ASSERT to avoid requiring assert.h
15740 1.41 (2014-06-25)
15741- fix search&replace from 1.36 that messed up comments/error messages
15742- 1.40 (2014-06-22)
15743- fix gcc struct-initialization warning
15744- 1.39 (2014-06-15)
15745- fix to TGA optimization when req_comp != number of components in TGA;
15746- fix to GIF loading because BMP wasn't rewinding (whoops, no GIFs in my test suite)
15747- add support for BMP version 5 (more ignored fields)
15748- 1.38 (2014-06-06)
15749- suppress MSVC warnings on integer casts truncating values
15750- fix accidental rename of 'skip' field of I/O
15751- 1.37 (2014-06-04)
15752- remove duplicate typedef
15753- 1.36 (2014-06-03)
15754- convert to header file single-file library
15755- if de-iphone isn't set, load iphone images color-swapped instead of returning NULL
15756- 1.35 (2014-05-27)
15757- various warnings
15758- fix broken STBI_SIMD path
15759- fix bug where stbi_load_from_file no longer left file pointer in correct place
15760- fix broken non-easy path for 32-bit BMP (possibly never used)
15761- TGA optimization by Arseny Kapoulkine
15762- 1.34 (unknown)
15763- use STBI_NOTUSED in stbi__resample_row_generic(), fix one more leak in tga failure case
15764- 1.33 (2011-07-14)
15765- make stbi_is_hdr work in STBI_NO_HDR (as specified), minor compiler-friendly improvements
15766- 1.32 (2011-07-13)
15767- support for "info" function for all supported filetypes (SpartanJ)
15768- 1.31 (2011-06-20)
15769- a few more leak fixes, bug in PNG handling (SpartanJ)
15770- 1.30 (2011-06-11)
15771- added ability to load files via callbacks to accomidate custom input streams (Ben Wenger)
15772+ fix search&replace from 1.36 that messed up comments/error
15773+ messages 1.40 (2014-06-22) fix gcc struct-initialization warning 1.39
15774+ (2014-06-15) fix to TGA optimization when req_comp != number of components in
15775+ TGA; fix to GIF loading because BMP wasn't rewinding (whoops, no GIFs in my
15776+ test suite) add support for BMP version 5 (more ignored fields) 1.38
15777+ (2014-06-06) suppress MSVC warnings on integer casts truncating values fix
15778+ accidental rename of 'skip' field of I/O 1.37 (2014-06-04) remove duplicate
15779+ typedef 1.36 (2014-06-03) convert to header file single-file library if
15780+ de-iphone isn't set, load iphone images color-swapped instead of returning
15781+ NULL 1.35 (2014-05-27) various warnings fix broken STBI_SIMD path fix bug
15782+ where stbi_load_from_file no longer left file pointer in correct place fix
15783+ broken non-easy path for 32-bit BMP (possibly never used) TGA optimization by
15784+ Arseny Kapoulkine 1.34 (unknown) use STBI_NOTUSED in
15785+ stbi__resample_row_generic(), fix one more leak in tga failure case 1.33
15786+ (2011-07-14) make stbi_is_hdr work in STBI_NO_HDR (as specified), minor
15787+ compiler-friendly improvements 1.32 (2011-07-13) support for "info" function
15788+ for all supported filetypes (SpartanJ) 1.31 (2011-06-20) a few more leak
15789+ fixes, bug in PNG handling (SpartanJ) 1.30 (2011-06-11) added ability to
15790+ load files via callbacks to accomidate custom input streams (Ben Wenger)
15791 removed deprecated format-specific test/load functions
15792- removed support for installable file formats (stbi_loader) -- would have been broken for IO callbacks anyway
15793- error cases in bmp and tga give messages and don't leak (Raymond Barbiero, grisha)
15794- fix inefficiency in decoding 32-bit BMP (David Woo)
15795- 1.29 (2010-08-16)
15796- various warning fixes from Aurelien Pocheville
15797- 1.28 (2010-08-01)
15798- fix bug in GIF palette transparency (SpartanJ)
15799- 1.27 (2010-08-01)
15800- cast-to-stbi_uc to fix warnings
15801- 1.26 (2010-07-24)
15802- fix bug in file buffering for PNG reported by SpartanJ
15803- 1.25 (2010-07-17)
15804- refix trans_data warning (Won Chun)
15805- 1.24 (2010-07-12)
15806- perf improvements reading from files on platforms with lock-heavy fgetc()
15807- minor perf improvements for jpeg
15808- deprecated type-specific functions so we'll get feedback if they're needed
15809- attempt to fix trans_data warning (Won Chun)
15810- 1.23 fixed bug in iPhone support
15811- 1.22 (2010-07-10)
15812- removed image *writing* support
15813- stbi_info support from Jetro Lauha
15814- GIF support from Jean-Marc Lienher
15815+ removed support for installable file formats (stbi_loader) --
15816+ would have been broken for IO callbacks anyway error cases in bmp and tga
15817+ give messages and don't leak (Raymond Barbiero, grisha) fix inefficiency in
15818+ decoding 32-bit BMP (David Woo) 1.29 (2010-08-16) various warning fixes from
15819+ Aurelien Pocheville 1.28 (2010-08-01) fix bug in GIF palette transparency
15820+ (SpartanJ) 1.27 (2010-08-01) cast-to-stbi_uc to fix warnings 1.26
15821+ (2010-07-24) fix bug in file buffering for PNG reported by SpartanJ 1.25
15822+ (2010-07-17) refix trans_data warning (Won Chun) 1.24 (2010-07-12) perf
15823+ improvements reading from files on platforms with lock-heavy fgetc() minor
15824+ perf improvements for jpeg deprecated type-specific functions so we'll get
15825+ feedback if they're needed attempt to fix trans_data warning (Won Chun) 1.23
15826+ fixed bug in iPhone support 1.22 (2010-07-10) removed image *writing*
15827+ support stbi_info support from Jetro Lauha GIF support from Jean-Marc Lienher
15828 iPhone PNG-extensions from James Brown
15829- warning-fixes from Nicolas Schulz and Janez Zemva (i.stbi__err. Janez (U+017D)emva)
15830- 1.21 fix use of 'stbi_uc' in header (reported by jon blow)
15831- 1.20 added support for Softimage PIC, by Tom Seddon
15832- 1.19 bug in interlaced PNG corruption check (found by ryg)
15833- 1.18 (2008-08-02)
15834- fix a threading bug (local mutable static)
15835- 1.17 support interlaced PNG
15836- 1.16 major bugfix - stbi__convert_format converted one too many pixels
15837- 1.15 initialize some fields for thread safety
15838- 1.14 fix threadsafe conversion bug
15839- header-file-only version (#define STBI_HEADER_FILE_ONLY before including)
15840+ warning-fixes from Nicolas Schulz and Janez Zemva (i.stbi__err.
15841+ Janez (U+017D)emva) 1.21 fix use of 'stbi_uc' in header (reported by jon
15842+ blow) 1.20 added support for Softimage PIC, by Tom Seddon 1.19 bug in
15843+ interlaced PNG corruption check (found by ryg) 1.18 (2008-08-02) fix a
15844+ threading bug (local mutable static) 1.17 support interlaced PNG 1.16
15845+ major bugfix - stbi__convert_format converted one too many pixels 1.15
15846+ initialize some fields for thread safety 1.14 fix threadsafe conversion
15847+ bug header-file-only version (#define STBI_HEADER_FILE_ONLY before including)
15848 1.13 threadsafe
15849 1.12 const qualifiers in the API
15850 1.11 Support installable IDCT, colorspace conversion routines
15851@@ -7911,15 +9800,14 @@ STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *c, void *user
15852 1.08 Thatcher Ulrich's PSD code integrated by Nicolas Schulz
15853 1.07 attempt to fix C++ warning/errors again
15854 1.06 attempt to fix C++ warning/errors again
15855- 1.05 fix TGA loading to return correct *comp and use good luminance calc
15856- 1.04 default float alpha is 1, not 255; use 'void *' for stbi_image_free
15857- 1.03 bugfixes to STBI_NO_STDIO, STBI_NO_HDR
15858- 1.02 support for (subset of) HDR files, float interface for preferred access to them
15859- 1.01 fix bug: possible bug in handling right-side up bmps... not sure
15860- fix bug: the stbi__bmp_load() and stbi__tga_load() functions didn't work at all
15861- 1.00 interface to zlib that skips zlib header
15862- 0.99 correct handling of alpha in palette
15863- 0.98 TGA loader by lonesock; dynamically add loaders (untested)
15864+ 1.05 fix TGA loading to return correct *comp and use good luminance
15865+ calc 1.04 default float alpha is 1, not 255; use 'void *' for
15866+ stbi_image_free 1.03 bugfixes to STBI_NO_STDIO, STBI_NO_HDR 1.02 support
15867+ for (subset of) HDR files, float interface for preferred access to them 1.01
15868+ fix bug: possible bug in handling right-side up bmps... not sure fix bug: the
15869+ stbi__bmp_load() and stbi__tga_load() functions didn't work at all 1.00
15870+ interface to zlib that skips zlib header 0.99 correct handling of alpha in
15871+ palette 0.98 TGA loader by lonesock; dynamically add loaders (untested)
15872 0.97 jpeg errors on too large a file; also catch another malloc failure
15873 0.96 fix detection of invalid v value - particleman@mollyrocket forum
15874 0.95 during header scan, seek to markers in case of padding
15875@@ -7932,8 +9820,8 @@ STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *c, void *user
15876 0.60 fix compiling as c++
15877 0.59 fix warnings: merge Dave Moore's -Wall fixes
15878 0.58 fix bug: zlib uncompressed mode len/nlen was wrong endian
15879- 0.57 fix bug: jpg last huffman symbol before marker was >9 bits but less than 16 available
15880- 0.56 fix bug: zlib uncompressed mode len vs. nlen
15881+ 0.57 fix bug: jpg last huffman symbol before marker was >9 bits but
15882+ less than 16 available 0.56 fix bug: zlib uncompressed mode len vs. nlen
15883 0.55 fix bug: restart_interval not initialized to 0
15884 0.54 allow NULL for 'int *comp'
15885 0.53 fix bug in png 3->4; speedup png decoding
15886@@ -7944,7 +9832,6 @@ STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *c, void *user
15887 first released version
15888 */
15889
15890-
15891 /*
15892 ------------------------------------------------------------------------------
15893 This software is available under 2 licenses -- choose whichever you prefer.
+11837,
-9229
1@@ -3,8 +3,8 @@
2 by Jeff Roberts (v2) and Jorge L Rodriguez
3 http://github.com/nothings/stb
4
5- Can be threaded with the extended API. SSE2, AVX, Neon and WASM SIMD support. Only
6- scaling and translation is supported, no rotations or shears.
7+ Can be threaded with the extended API. SSE2, AVX, Neon and WASM SIMD support.
8+ Only scaling and translation is supported, no rotations or shears.
9
10 COMPILING & LINKING
11 In one C/C++ file that #includes this file, do this:
12@@ -12,34 +12,37 @@
13 before the #include. That will create the implementation in that file.
14
15 EASY API CALLS:
16- Easy API downsamples w/Mitchell filter, upsamples w/cubic interpolation, clamps to edge.
17+ Easy API downsamples w/Mitchell filter, upsamples w/cubic interpolation,
18+ clamps to edge.
19
20- stbir_resize_uint8_srgb( input_pixels, input_w, input_h, input_stride_in_bytes,
21- output_pixels, output_w, output_h, output_stride_in_bytes,
22- pixel_layout_enum )
23+ stbir_resize_uint8_srgb( input_pixels, input_w, input_h,
24+ input_stride_in_bytes, output_pixels, output_w, output_h,
25+ output_stride_in_bytes, pixel_layout_enum )
26
27- stbir_resize_uint8_linear( input_pixels, input_w, input_h, input_stride_in_bytes,
28- output_pixels, output_w, output_h, output_stride_in_bytes,
29- pixel_layout_enum )
30+ stbir_resize_uint8_linear( input_pixels, input_w, input_h,
31+ input_stride_in_bytes, output_pixels, output_w, output_h,
32+ output_stride_in_bytes, pixel_layout_enum )
33
34- stbir_resize_float_linear( input_pixels, input_w, input_h, input_stride_in_bytes,
35- output_pixels, output_w, output_h, output_stride_in_bytes,
36- pixel_layout_enum )
37+ stbir_resize_float_linear( input_pixels, input_w, input_h,
38+ input_stride_in_bytes, output_pixels, output_w, output_h,
39+ output_stride_in_bytes, pixel_layout_enum )
40
41- If you pass NULL or zero for the output_pixels, we will allocate the output buffer
42- for you and return it from the function (free with free() or STBIR_FREE).
43- As a special case, XX_stride_in_bytes of 0 means packed continuously in memory.
44+ If you pass NULL or zero for the output_pixels, we will allocate the output
45+ buffer for you and return it from the function (free with free() or
46+ STBIR_FREE). As a special case, XX_stride_in_bytes of 0 means packed
47+ continuously in memory.
48
49 API LEVELS
50- There are three levels of API - easy-to-use, medium-complexity and extended-complexity.
51+ There are three levels of API - easy-to-use, medium-complexity and
52+ extended-complexity.
53
54 See the "header file" section of the source for API documentation.
55
56 ADDITIONAL DOCUMENTATION
57
58 MEMORY ALLOCATION
59- By default, we use malloc and free for memory allocation. To override the
60- memory allocation, before the implementation #include, add a:
61+ By default, we use malloc and free for memory allocation. To override
62+ the memory allocation, before the implementation #include, add a:
63
64 #define STBIR_MALLOC(size,user_data) ...
65 #define STBIR_FREE(ptr,user_data) ...
66@@ -51,79 +54,81 @@
67 PERFORMANCE
68 This library was written with an emphasis on performance. When testing
69 stb_image_resize with RGBA, the fastest mode is STBIR_4CHANNEL with
70- STBIR_TYPE_UINT8 pixels and CLAMPed edges (which is what many other resize
71- libs do by default). Also, make sure SIMD is turned on of course (default
72- for 64-bit targets). Avoid WRAP edge mode if you want the fastest speed.
73+ STBIR_TYPE_UINT8 pixels and CLAMPed edges (which is what many other
74+ resize libs do by default). Also, make sure SIMD is turned on of course
75+ (default for 64-bit targets). Avoid WRAP edge mode if you want the fastest
76+ speed.
77
78- This library also comes with profiling built-in. If you define STBIR_PROFILE,
79- you can use the advanced API and get low-level profiling information by
80- calling stbir_resize_extended_profile_info() or stbir_resize_split_profile_info()
81- after a resize.
82+ This library also comes with profiling built-in. If you define
83+ STBIR_PROFILE, you can use the advanced API and get low-level profiling
84+ information by calling stbir_resize_extended_profile_info() or
85+ stbir_resize_split_profile_info() after a resize.
86
87 SIMD
88 Most of the routines have optimized SSE2, AVX, NEON and WASM versions.
89
90- On Microsoft compilers, we automatically turn on SIMD for 64-bit x64 and
91- ARM; for 32-bit x86 and ARM, you select SIMD mode by defining STBIR_SSE2 or
92- STBIR_NEON. For AVX and AVX2, we auto-select it by detecting the /arch:AVX
93- or /arch:AVX2 switches. You can also always manually turn SSE2, AVX or AVX2
94- support on by defining STBIR_SSE2, STBIR_AVX or STBIR_AVX2.
95-
96- On Linux, SSE2 and Neon is on by default for 64-bit x64 or ARM64. For 32-bit,
97- we select x86 SIMD mode by whether you have -msse2, -mavx or -mavx2 enabled
98- on the command line. For 32-bit ARM, you must pass -mfpu=neon-vfpv4 for both
99- clang and GCC, but GCC also requires an additional -mfp16-format=ieee to
100- automatically enable NEON.
101-
102- On x86 platforms, you can also define STBIR_FP16C to turn on FP16C instructions
103- for converting back and forth to half-floats. This is autoselected when we
104- are using AVX2. Clang and GCC also require the -mf16c switch. ARM always uses
105- the built-in half float hardware NEON instructions.
106-
107- You can also tell us to use multiply-add instructions with STBIR_USE_FMA.
108- Because x86 doesn't always have fma, we turn it off by default to maintain
109- determinism across all platforms. If you don't care about non-FMA determinism
110- and are willing to restrict yourself to more recent x86 CPUs (around the AVX
111- timeframe), then fma will give you around a 15% speedup.
112-
113- You can force off SIMD in all cases by defining STBIR_NO_SIMD. You can turn
114- off AVX or AVX2 specifically with STBIR_NO_AVX or STBIR_NO_AVX2. AVX is 10%
115- to 40% faster, and AVX2 is generally another 12%.
116+ On Microsoft compilers, we automatically turn on SIMD for 64-bit x64
117+ and ARM; for 32-bit x86 and ARM, you select SIMD mode by defining STBIR_SSE2
118+ or STBIR_NEON. For AVX and AVX2, we auto-select it by detecting the /arch:AVX
119+ or /arch:AVX2 switches. You can also always manually turn SSE2, AVX or
120+ AVX2 support on by defining STBIR_SSE2, STBIR_AVX or STBIR_AVX2.
121+
122+ On Linux, SSE2 and Neon is on by default for 64-bit x64 or ARM64. For
123+ 32-bit, we select x86 SIMD mode by whether you have -msse2, -mavx or -mavx2
124+ enabled on the command line. For 32-bit ARM, you must pass -mfpu=neon-vfpv4
125+ for both clang and GCC, but GCC also requires an additional
126+ -mfp16-format=ieee to automatically enable NEON.
127+
128+ On x86 platforms, you can also define STBIR_FP16C to turn on FP16C
129+ instructions for converting back and forth to half-floats. This is
130+ autoselected when we are using AVX2. Clang and GCC also require the -mf16c
131+ switch. ARM always uses the built-in half float hardware NEON instructions.
132+
133+ You can also tell us to use multiply-add instructions with
134+ STBIR_USE_FMA. Because x86 doesn't always have fma, we turn it off by default
135+ to maintain determinism across all platforms. If you don't care about non-FMA
136+ determinism and are willing to restrict yourself to more recent x86 CPUs
137+ (around the AVX timeframe), then fma will give you around a 15% speedup.
138+
139+ You can force off SIMD in all cases by defining STBIR_NO_SIMD. You can
140+ turn off AVX or AVX2 specifically with STBIR_NO_AVX or STBIR_NO_AVX2. AVX is
141+ 10% to 40% faster, and AVX2 is generally another 12%.
142
143 ALPHA CHANNEL
144- Most of the resizing functions provide the ability to control how the alpha
145- channel of an image is processed.
146+ Most of the resizing functions provide the ability to control how the
147+ alpha channel of an image is processed.
148
149 When alpha represents transparency, it is important that when combining
150 colors with filtering, the pixels should not be treated equally; they
151 should use a weighted average based on their alpha values. For example,
152 if a pixel is 1% opaque bright green and another pixel is 99% opaque
153 black and you average them, the average will be 50% opaque, but the
154- unweighted average and will be a middling green color, while the weighted
155- average will be nearly black. This means the unweighted version introduced
156- green energy that didn't exist in the source image.
157+ unweighted average and will be a middling green color, while the
158+ weighted average will be nearly black. This means the unweighted version
159+ introduced green energy that didn't exist in the source image.
160
161- (If you want to know why this makes sense, you can work out the math for
162- the following: consider what happens if you alpha composite a source image
163- over a fixed color and then average the output, vs. if you average the
164+ (If you want to know why this makes sense, you can work out the math
165+ for the following: consider what happens if you alpha composite a source
166+ image over a fixed color and then average the output, vs. if you average the
167 source image pixels and then composite that over the same fixed color.
168 Only the weighted average produces the same result as the ground truth
169 composite-then-average result.)
170
171- Therefore, it is in general best to "alpha weight" the pixels when applying
172- filters to them. This essentially means multiplying the colors by the alpha
173- values before combining them, and then dividing by the alpha value at the
174- end.
175-
176- The computer graphics industry introduced a technique called "premultiplied
177- alpha" or "associated alpha" in which image colors are stored in image files
178- already multiplied by their alpha. This saves some math when compositing,
179- and also avoids the need to divide by the alpha at the end (which is quite
180- inefficient). However, while premultiplied alpha is common in the movie CGI
181- industry, it is not commonplace in other industries like videogames, and most
182- consumer file formats are generally expected to contain not-premultiplied
183- colors. For example, Photoshop saves PNG files "unpremultiplied", and web
184- browsers like Chrome and Firefox expect PNG images to be unpremultiplied.
185+ Therefore, it is in general best to "alpha weight" the pixels when
186+ applying filters to them. This essentially means multiplying the colors by
187+ the alpha values before combining them, and then dividing by the alpha value
188+ at the end.
189+
190+ The computer graphics industry introduced a technique called
191+ "premultiplied alpha" or "associated alpha" in which image colors are stored
192+ in image files already multiplied by their alpha. This saves some math when
193+ compositing, and also avoids the need to divide by the alpha at the end
194+ (which is quite inefficient). However, while premultiplied alpha is common in
195+ the movie CGI industry, it is not commonplace in other industries like
196+ videogames, and most consumer file formats are generally expected to contain
197+ not-premultiplied colors. For example, Photoshop saves PNG files
198+ "unpremultiplied", and web browsers like Chrome and Firefox expect PNG images
199+ to be unpremultiplied.
200
201 Note that there are three possibilities that might describe your image
202 and resize expectation:
203@@ -132,100 +137,101 @@
204 2. images are not premultiplied, alpha weighting is not desired
205 3. images are premultiplied
206
207- Both case #2 and case #3 require the exact same math: no alpha weighting
208- should be applied or removed. Only case 1 requires extra math operations;
209- the other two cases can be handled identically.
210+ Both case #2 and case #3 require the exact same math: no alpha
211+ weighting should be applied or removed. Only case 1 requires extra math
212+ operations; the other two cases can be handled identically.
213
214- stb_image_resize expects case #1 by default, applying alpha weighting to
215- images, expecting the input images to be unpremultiplied. This is what the
216+ stb_image_resize expects case #1 by default, applying alpha weighting
217+ to images, expecting the input images to be unpremultiplied. This is what the
218 COLOR+ALPHA buffer types tell the resizer to do.
219
220 When you use the pixel layouts STBIR_RGBA, STBIR_BGRA, STBIR_ARGB,
221- STBIR_ABGR, STBIR_RX, or STBIR_XR you are telling us that the pixels are
222- non-premultiplied. In these cases, the resizer will alpha weight the colors
223- (effectively creating the premultiplied image), do the filtering, and then
224- convert back to non-premult on exit.
225-
226- When you use the pixel layouts STBIR_RGBA_PM, STBIR_RGBA_PM, STBIR_RGBA_PM,
227- STBIR_RGBA_PM, STBIR_RX_PM or STBIR_XR_PM, you are telling that the pixels
228- ARE premultiplied. In this case, the resizer doesn't have to do the
229- premultipling - it can filter directly on the input. This about twice as
230- fast as the non-premultiplied case, so it's the right option if your data is
231- already setup correctly.
232+ STBIR_ABGR, STBIR_RX, or STBIR_XR you are telling us that the pixels
233+ are non-premultiplied. In these cases, the resizer will alpha weight the
234+ colors (effectively creating the premultiplied image), do the filtering, and
235+ then convert back to non-premult on exit.
236+
237+ When you use the pixel layouts STBIR_RGBA_PM, STBIR_RGBA_PM,
238+ STBIR_RGBA_PM, STBIR_RGBA_PM, STBIR_RX_PM or STBIR_XR_PM, you are telling
239+ that the pixels ARE premultiplied. In this case, the resizer doesn't have to
240+ do the premultipling - it can filter directly on the input. This about twice
241+ as fast as the non-premultiplied case, so it's the right option if your data
242+ is already setup correctly.
243
244 When you use the pixel layout STBIR_4CHANNEL or STBIR_2CHANNEL, you are
245- telling us that there is no channel that represents transparency; it may be
246- RGB and some unrelated fourth channel that has been stored in the alpha
247- channel, but it is actually not alpha. No special processing will be
248+ telling us that there is no channel that represents transparency; it
249+ may be RGB and some unrelated fourth channel that has been stored in the
250+ alpha channel, but it is actually not alpha. No special processing will be
251 performed.
252
253 The difference between the generic 4 or 2 channel layouts, and the
254- specialized _PM versions is with the _PM versions you are telling us that
255- the data *is* alpha, just don't premultiply it. That's important when
256+ specialized _PM versions is with the _PM versions you are telling us
257+ that the data *is* alpha, just don't premultiply it. That's important when
258 using SRGB pixel formats, we need to know where the alpha is, because
259 it is converted linearly (rather than with the SRGB converters).
260
261 Because alpha weighting produces the same effect as premultiplying, you
262 even have the option with non-premultiplied inputs to let the resizer
263- produce a premultiplied output. Because the intially computed alpha-weighted
264- output image is effectively premultiplied, this is actually more performant
265- than the normal path which un-premultiplies the output image as a final step.
266+ produce a premultiplied output. Because the intially computed
267+ alpha-weighted output image is effectively premultiplied, this is actually
268+ more performant than the normal path which un-premultiplies the output image
269+ as a final step.
270
271- Finally, when converting both in and out of non-premulitplied space (for
272- example, when using STBIR_RGBA), we go to somewhat heroic measures to
273+ Finally, when converting both in and out of non-premulitplied space
274+ (for example, when using STBIR_RGBA), we go to somewhat heroic measures to
275 ensure that areas with zero alpha value pixels get something reasonable
276 in the RGB values. If you don't care about the RGB values of zero alpha
277 pixels, you can call the stbir_set_non_pm_alpha_speed_over_quality()
278- function - this runs a premultiplied resize about 25% faster. That said,
279- when you really care about speed, using premultiplied pixels for both in
280- and out (STBIR_RGBA_PM, etc) much faster than both of these premultiplied
281+ function - this runs a premultiplied resize about 25% faster. That
282+ said, when you really care about speed, using premultiplied pixels for both
283+ in and out (STBIR_RGBA_PM, etc) much faster than both of these premultiplied
284 options.
285
286 PIXEL LAYOUT CONVERSION
287- The resizer can convert from some pixel layouts to others. When using the
288- stbir_set_pixel_layouts(), you can, for example, specify STBIR_RGBA
289- on input, and STBIR_ARGB on output, and it will re-organize the channels
290- during the resize. Currently, you can only convert between two pixel
291- layouts with the same number of channels.
292+ The resizer can convert from some pixel layouts to others. When using
293+ the stbir_set_pixel_layouts(), you can, for example, specify STBIR_RGBA on
294+ input, and STBIR_ARGB on output, and it will re-organize the channels during
295+ the resize. Currently, you can only convert between two pixel layouts with
296+ the same number of channels.
297
298 DETERMINISM
299- We commit to being deterministic (from x64 to ARM to scalar to SIMD, etc).
300- This requires compiling with fast-math off (using at least /fp:precise).
301- Also, you must turn off fp-contracting (which turns mult+adds into fmas)!
302- We attempt to do this with pragmas, but with Clang, you usually want to add
303- -ffp-contract=off to the command line as well.
304-
305- For 32-bit x86, you must use SSE and SSE2 codegen for determinism. That is,
306- if the scalar x87 unit gets used at all, we immediately lose determinism.
307- On Microsoft Visual Studio 2008 and earlier, from what we can tell there is
308- no way to be deterministic in 32-bit x86 (some x87 always leaks in, even
309- with fp:strict). On 32-bit x86 GCC, determinism requires both -msse2 and
310+ We commit to being deterministic (from x64 to ARM to scalar to SIMD,
311+ etc). This requires compiling with fast-math off (using at least
312+ /fp:precise). Also, you must turn off fp-contracting (which turns mult+adds
313+ into fmas)! We attempt to do this with pragmas, but with Clang, you usually
314+ want to add -ffp-contract=off to the command line as well.
315+
316+ For 32-bit x86, you must use SSE and SSE2 codegen for determinism. That
317+ is, if the scalar x87 unit gets used at all, we immediately lose determinism.
318+ On Microsoft Visual Studio 2008 and earlier, from what we can tell
319+ there is no way to be deterministic in 32-bit x86 (some x87 always leaks in,
320+ even with fp:strict). On 32-bit x86 GCC, determinism requires both -msse2 and
321 -fpmath=sse.
322
323- Note that we will not be deterministic with float data containing NaNs -
324- the NaNs will propagate differently on different SIMD and platforms.
325+ Note that we will not be deterministic with float data containing NaNs
326+ - the NaNs will propagate differently on different SIMD and platforms.
327
328 If you turn on STBIR_USE_FMA, then we will be deterministic with other
329- fma targets, but we will differ from non-fma targets (this is unavoidable,
330- because a fma isn't simply an add with a mult - it also introduces a
331- rounding difference compared to non-fma instruction sequences.
332+ fma targets, but we will differ from non-fma targets (this is
333+ unavoidable, because a fma isn't simply an add with a mult - it also
334+ introduces a rounding difference compared to non-fma instruction sequences.
335
336 FLOAT PIXEL FORMAT RANGE
337- Any range of values can be used for the non-alpha float data that you pass
338- in (0 to 1, -1 to 1, whatever). However, if you are inputting float values
339- but *outputting* bytes or shorts, you must use a range of 0 to 1 so that we
340- scale back properly. The alpha channel must also be 0 to 1 for any format
341- that does premultiplication prior to resizing.
342+ Any range of values can be used for the non-alpha float data that you
343+ pass in (0 to 1, -1 to 1, whatever). However, if you are inputting float
344+ values but *outputting* bytes or shorts, you must use a range of 0 to 1 so
345+ that we scale back properly. The alpha channel must also be 0 to 1 for any
346+ format that does premultiplication prior to resizing.
347
348- Note also that with float output, using filters with negative lobes, the
349- output filtered values might go slightly out of range. You can define
350- STBIR_FLOAT_LOW_CLAMP and/or STBIR_FLOAT_HIGH_CLAMP to specify the range
351- to clamp to on output, if that's important.
352+ Note also that with float output, using filters with negative lobes,
353+ the output filtered values might go slightly out of range. You can define
354+ STBIR_FLOAT_LOW_CLAMP and/or STBIR_FLOAT_HIGH_CLAMP to specify the
355+ range to clamp to on output, if that's important.
356
357 MAX/MIN SCALE FACTORS
358- The input pixel resolutions are in integers, and we do the internal pointer
359- resolution in size_t sized integers. However, the scale ratio from input
360- resolution to output resolution is calculated in float form. This means
361+ The input pixel resolutions are in integers, and we do the internal
362+ pointer resolution in size_t sized integers. However, the scale ratio from
363+ input resolution to output resolution is calculated in float form. This means
364 the effective possible scale ratio is limited to 24 bits (or 16 million
365 to 1). As you get close to the size of the float resolution (again, 16
366 million pixels wide or high), you might start seeing float inaccuracy
367@@ -234,10 +240,10 @@
368 buffers).
369
370 FLIPPED IMAGES
371- Stride is just the delta from one scanline to the next. This means you can
372- use a negative stride to handle inverted images (point to the final
373- scanline and use a negative stride). You can invert the input or output,
374- using negative strides.
375+ Stride is just the delta from one scanline to the next. This means you
376+ can use a negative stride to handle inverted images (point to the final
377+ scanline and use a negative stride). You can invert the input or
378+ output, using negative strides.
379
380 DEFAULT FILTERS
381 For functions which don't provide explicit control over what filters to
382@@ -254,37 +260,41 @@
383 using the stbir_set_filter_callbacks function.
384
385 PROGRESS
386- For interactive use with slow resize operations, you can use the
387- scanline callbacks in the extended API. It would have to be a *very* large
388- image resample to need progress though - we're very fast.
389+ For interactive use with slow resize operations, you can use the
390+ scanline callbacks in the extended API. It would have to be a *very*
391+ large image resample to need progress though - we're very fast.
392
393 CEIL and FLOOR
394- In scalar mode, the only functions we use from math.h are ceilf and floorf,
395- but if you have your own versions, you can define the STBIR_CEILF(v) and
396- STBIR_FLOORF(v) macros and we'll use them instead. In SIMD, we just use
397+ In scalar mode, the only functions we use from math.h are ceilf and
398+ floorf, but if you have your own versions, you can define the STBIR_CEILF(v)
399+ and STBIR_FLOORF(v) macros and we'll use them instead. In SIMD, we just use
400 our own versions.
401
402 ASSERT
403 Define STBIR_ASSERT(boolval) to override assert() and not use assert.h
404
405 PORTING FROM VERSION 1
406- The API has changed. You can continue to use the old version of stb_image_resize.h,
407- which is available in the "deprecated/" directory.
408+ The API has changed. You can continue to use the old version of
409+ stb_image_resize.h, which is available in the "deprecated/" directory.
410
411 If you're using the old simple-to-use API, porting is straightforward.
412 (For more advanced APIs, read the documentation.)
413
414 stbir_resize_uint8():
415- - call `stbir_resize_uint8_linear`, cast channel count to `stbir_pixel_layout`
416+ - call `stbir_resize_uint8_linear`, cast channel count to
417+ `stbir_pixel_layout`
418
419 stbir_resize_float():
420- - call `stbir_resize_float_linear`, cast channel count to `stbir_pixel_layout`
421+ - call `stbir_resize_float_linear`, cast channel count to
422+ `stbir_pixel_layout`
423
424 stbir_resize_uint8_srgb():
425 - function name is unchanged
426 - cast channel count to `stbir_pixel_layout`
427- - above is sufficient unless your image has alpha and it's not RGBA/BGRA
428- - in that case, follow the below instructions for stbir_resize_uint8_srgb_edgemode
429+ - above is sufficient unless your image has alpha and it's not
430+ RGBA/BGRA
431+ - in that case, follow the below instructions for
432+ stbir_resize_uint8_srgb_edgemode
433
434 stbir_resize_uint8_srgb_edgemode()
435 - switch to the "medium complexity" API
436@@ -293,7 +303,8 @@
437 - data_type: STBIR_TYPE_UINT8_SRGB
438 - edge: unchanged (STBIR_EDGE_WRAP, etc.)
439 - filter: STBIR_FILTER_DEFAULT
440- - which channel is alpha is specified in stbir_pixel_layout, see enum for details
441+ - which channel is alpha is specified in stbir_pixel_layout, see
442+ enum for details
443
444 FUTURE TODOS
445 * For polyphase integral filters, we just memcpy the coeffs to dupe
446@@ -302,20 +313,20 @@
447 (maybe, 1->3/4, 3->4, 4->1, 3->1).
448 * For SIMD encode and decode scanline routines, do any pre-aligning
449 for bad input/output buffer alignments and pitch?
450- * For very wide scanlines, we should we do vertical strips to stay within
451- L2 cache. Maybe do chunks of 1K pixels at a time. There would be
452- some pixel reconversion, but probably dwarfed by things falling out
453- of cache. Probably also something possible with alternating between
454- scattering and gathering at high resize scales?
455+ * For very wide scanlines, we should we do vertical strips to stay
456+ within L2 cache. Maybe do chunks of 1K pixels at a time. There would be some
457+ pixel reconversion, but probably dwarfed by things falling out of cache.
458+ Probably also something possible with alternating between scattering and
459+ gathering at high resize scales?
460 * Should we have a multiple MIPs at the same time function (could keep
461 more memory in cache during multiple resizes)?
462 * Rewrite the coefficient generator to do many at once.
463 * AVX-512 vertical kernels - worried about downclocking here.
464 * Convert the reincludes to macros when we know they aren't changing.
465 * Experiment with pivoting the horizontal and always using the
466- vertical filters (which are faster, but perhaps not enough to overcome
467- the pivot cost and the extra memory touches). Need to buffer the whole
468- image so have to balance memory use.
469+ vertical filters (which are faster, but perhaps not enough to
470+ overcome the pivot cost and the extra memory touches). Need to buffer the
471+ whole image so have to balance memory use.
472 * Most of our code is internally function pointers, should we compile
473 all the SIMD stuff always and dynamically dispatch?
474
475@@ -330,69 +341,63 @@
476
477 REVISIONS
478 2.17 (2025-10-25) silly format bug in easy-to-use APIs.
479- 2.16 (2025-10-21) fixed the easy-to-use APIs to allow inverted bitmaps (negative
480- strides), fix vertical filter kernel callback, fix threaded
481- gather buffer priming (and assert).
482- (thanks adipose, TainZerL, and Harrison Green)
483- 2.15 (2025-07-17) fixed an assert in debug mode when using floats with input
484- callbacks, work around GCC warning when adding to null ptr
485- (thanks Johannes Spohr and Pyry Kovanen).
486- 2.14 (2025-05-09) fixed a bug using downsampling gather horizontal first, and
487- scatter with vertical first.
488- 2.13 (2025-02-27) fixed a bug when using input callbacks, turned off simd for
489- tiny-c, fixed some variables that should have been static,
490- fixes a bug when calculating temp memory with resizes that
491- exceed 2GB of temp memory (very large resizes).
492- 2.12 (2024-10-18) fix incorrect use of user_data with STBIR_FREE
493- 2.11 (2024-09-08) fix harmless asan warnings in 2-channel and 3-channel mode
494- with AVX-2, fix some weird scaling edge conditions with
495- point sample mode.
496- 2.10 (2024-07-27) fix the defines GCC and mingw for loop unroll control,
497- fix MSVC 32-bit arm half float routines.
498- 2.09 (2024-06-19) fix the defines for 32-bit ARM GCC builds (was selecting
499+ 2.16 (2025-10-21) fixed the easy-to-use APIs to allow inverted bitmaps
500+ (negative strides), fix vertical filter kernel callback, fix threaded gather
501+ buffer priming (and assert). (thanks adipose, TainZerL, and Harrison Green)
502+ 2.15 (2025-07-17) fixed an assert in debug mode when using floats with
503+ input callbacks, work around GCC warning when adding to null ptr (thanks
504+ Johannes Spohr and Pyry Kovanen). 2.14 (2025-05-09) fixed a bug using
505+ downsampling gather horizontal first, and scatter with vertical first. 2.13
506+ (2025-02-27) fixed a bug when using input callbacks, turned off simd for
507+ tiny-c, fixed some variables that should have been
508+ static, fixes a bug when calculating temp memory with resizes that exceed 2GB
509+ of temp memory (very large resizes). 2.12 (2024-10-18) fix incorrect use of
510+ user_data with STBIR_FREE 2.11 (2024-09-08) fix harmless asan warnings in
511+ 2-channel and 3-channel mode with AVX-2, fix some weird scaling edge
512+ conditions with point sample mode. 2.10 (2024-07-27) fix the defines GCC and
513+ mingw for loop unroll control, fix MSVC 32-bit arm half float routines. 2.09
514+ (2024-06-19) fix the defines for 32-bit ARM GCC builds (was selecting
515 hardware half floats).
516- 2.08 (2024-06-10) fix for RGB->BGR three channel flips and add SIMD (thanks
517- to Ryan Salsbury), fix for sub-rect resizes, use the
518- pragmas to control unrolling when they are available.
519- 2.07 (2024-05-24) fix for slow final split during threaded conversions of very
520- wide scanlines when downsampling (caused by extra input
521- converting), fix for wide scanline resamples with many
522- splits (int overflow), fix GCC warning.
523- 2.06 (2024-02-10) fix for identical width/height 3x or more down-scaling
524- undersampling a single row on rare resize ratios (about 1%).
525- 2.05 (2024-02-07) fix for 2 pixel to 1 pixel resizes with wrap (thanks Aras),
526- fix for output callback (thanks Julien Koenen).
527- 2.04 (2023-11-17) fix for rare AVX bug, shadowed symbol (thanks Nikola Smiljanic).
528- 2.03 (2023-11-01) ASAN and TSAN warnings fixed, minor tweaks.
529- 2.00 (2023-10-10) mostly new source: new api, optimizations, simd, vertical-first, etc
530- 2x-5x faster without simd, 4x-12x faster with simd,
531- in some cases, 20x to 40x faster esp resizing large to very small.
532- 0.96 (2019-03-04) fixed warnings
533- 0.95 (2017-07-23) fixed warnings
534- 0.94 (2017-03-18) fixed warnings
535- 0.93 (2017-03-03) fixed bug with certain combinations of heights
536- 0.92 (2017-01-02) fix integer overflow on large (>2GB) images
537- 0.91 (2016-04-02) fix warnings; fix handling of subpixel regions
538- 0.90 (2014-09-17) first released version
539+ 2.08 (2024-06-10) fix for RGB->BGR three channel flips and add SIMD
540+ (thanks to Ryan Salsbury), fix for sub-rect resizes, use the pragmas to
541+ control unrolling when they are available. 2.07 (2024-05-24) fix for slow
542+ final split during threaded conversions of very wide scanlines when
543+ downsampling (caused by extra input converting), fix for wide scanline
544+ resamples with many splits (int overflow), fix GCC warning. 2.06 (2024-02-10)
545+ fix for identical width/height 3x or more down-scaling undersampling a single
546+ row on rare resize ratios (about 1%). 2.05 (2024-02-07) fix for 2 pixel to 1
547+ pixel resizes with wrap (thanks Aras), fix for output callback (thanks Julien
548+ Koenen). 2.04 (2023-11-17) fix for rare AVX bug, shadowed symbol (thanks
549+ Nikola Smiljanic). 2.03 (2023-11-01) ASAN and TSAN warnings fixed, minor
550+ tweaks. 2.00 (2023-10-10) mostly new source: new api, optimizations, simd,
551+ vertical-first, etc 2x-5x faster without simd, 4x-12x faster with simd, in
552+ some cases, 20x to 40x faster esp resizing large to very small. 0.96
553+ (2019-03-04) fixed warnings 0.95 (2017-07-23) fixed warnings 0.94
554+ (2017-03-18) fixed warnings 0.93 (2017-03-03) fixed bug with certain
555+ combinations of heights 0.92 (2017-01-02) fix integer overflow on large
556+ (>2GB) images 0.91 (2016-04-02) fix warnings; fix handling of subpixel
557+ regions 0.90 (2014-09-17) first released version
558
559 LICENSE
560 See end of file for license information.
561 */
562
563-#if !defined(STB_IMAGE_RESIZE_DO_HORIZONTALS) && !defined(STB_IMAGE_RESIZE_DO_VERTICALS) && !defined(STB_IMAGE_RESIZE_DO_CODERS) // for internal re-includes
564+#if !defined(STB_IMAGE_RESIZE_DO_HORIZONTALS) && \
565+ !defined(STB_IMAGE_RESIZE_DO_VERTICALS) && \
566+ !defined(STB_IMAGE_RESIZE_DO_CODERS) // for internal re-includes
567
568 #ifndef STBIR_INCLUDE_STB_IMAGE_RESIZE2_H
569 #define STBIR_INCLUDE_STB_IMAGE_RESIZE2_H
570
571 #include <stddef.h>
572 #ifdef _MSC_VER
573-typedef unsigned char stbir_uint8;
574-typedef unsigned short stbir_uint16;
575-typedef unsigned int stbir_uint32;
576+typedef unsigned char stbir_uint8;
577+typedef unsigned short stbir_uint16;
578+typedef unsigned int stbir_uint32;
579 typedef unsigned __int64 stbir_uint64;
580 #else
581 #include <stdint.h>
582-typedef uint8_t stbir_uint8;
583+typedef uint8_t stbir_uint8;
584 typedef uint16_t stbir_uint16;
585 typedef uint32_t stbir_uint32;
586 typedef uint64_t stbir_uint64;
587@@ -422,60 +427,74 @@ typedef uint64_t stbir_uint64;
588 // * Uses edge mode clamped
589 // * returned result is 1 for success or 0 in case of an error.
590
591-
592 // stbir_pixel_layout specifies:
593 // number of channels
594 // order of channels
595 // whether color is premultiplied by alpha
596-// for back compatibility, you can cast the old channel count to an stbir_pixel_layout
597-typedef enum
598-{
599- STBIR_1CHANNEL = 1,
600- STBIR_2CHANNEL = 2,
601- STBIR_RGB = 3, // 3-chan, with order specified (for channel flipping)
602- STBIR_BGR = 0, // 3-chan, with order specified (for channel flipping)
603- STBIR_4CHANNEL = 5,
604-
605- STBIR_RGBA = 4, // alpha formats, where alpha is NOT premultiplied into color channels
606- STBIR_BGRA = 6,
607- STBIR_ARGB = 7,
608- STBIR_ABGR = 8,
609- STBIR_RA = 9,
610- STBIR_AR = 10,
611-
612- STBIR_RGBA_PM = 11, // alpha formats, where alpha is premultiplied into color channels
613- STBIR_BGRA_PM = 12,
614- STBIR_ARGB_PM = 13,
615- STBIR_ABGR_PM = 14,
616- STBIR_RA_PM = 15,
617- STBIR_AR_PM = 16,
618-
619- STBIR_RGBA_NO_AW = 11, // alpha formats, where NO alpha weighting is applied at all!
620- STBIR_BGRA_NO_AW = 12, // these are just synonyms for the _PM flags (which also do
621- STBIR_ARGB_NO_AW = 13, // no alpha weighting). These names just make it more clear
622- STBIR_ABGR_NO_AW = 14, // for some folks).
623- STBIR_RA_NO_AW = 15,
624- STBIR_AR_NO_AW = 16,
625+// for back compatibility, you can cast the old channel count to an
626+// stbir_pixel_layout
627+typedef enum {
628+ STBIR_1CHANNEL = 1,
629+ STBIR_2CHANNEL = 2,
630+ STBIR_RGB = 3, // 3-chan, with order specified (for channel flipping)
631+ STBIR_BGR = 0, // 3-chan, with order specified (for channel flipping)
632+ STBIR_4CHANNEL = 5,
633+
634+ STBIR_RGBA = 4, // alpha formats, where alpha is NOT premultiplied into
635+ // color channels
636+ STBIR_BGRA = 6,
637+ STBIR_ARGB = 7,
638+ STBIR_ABGR = 8,
639+ STBIR_RA = 9,
640+ STBIR_AR = 10,
641+
642+ STBIR_RGBA_PM =
643+ 11, // alpha formats, where alpha is premultiplied into color channels
644+ STBIR_BGRA_PM = 12,
645+ STBIR_ARGB_PM = 13,
646+ STBIR_ABGR_PM = 14,
647+ STBIR_RA_PM = 15,
648+ STBIR_AR_PM = 16,
649+
650+ STBIR_RGBA_NO_AW =
651+ 11, // alpha formats, where NO alpha weighting is applied at all!
652+ STBIR_BGRA_NO_AW =
653+ 12, // these are just synonyms for the _PM flags (which also do
654+ STBIR_ARGB_NO_AW =
655+ 13, // no alpha weighting). These names just make it more clear
656+ STBIR_ABGR_NO_AW = 14, // for some folks).
657+ STBIR_RA_NO_AW = 15,
658+ STBIR_AR_NO_AW = 16,
659
660 } stbir_pixel_layout;
661
662 //===============================================================
663 // Simple-complexity API
664 //
665-// If output_pixels is NULL (0), then we will allocate the buffer and return it to you.
666+// If output_pixels is NULL (0), then we will allocate the buffer and return
667+// it to you.
668 //--------------------------------
669
670-STBIRDEF unsigned char * stbir_resize_uint8_srgb( const unsigned char *input_pixels , int input_w , int input_h, int input_stride_in_bytes,
671- unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
672- stbir_pixel_layout pixel_type );
673-
674-STBIRDEF unsigned char * stbir_resize_uint8_linear( const unsigned char *input_pixels , int input_w , int input_h, int input_stride_in_bytes,
675- unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
676- stbir_pixel_layout pixel_type );
677-
678-STBIRDEF float * stbir_resize_float_linear( const float *input_pixels , int input_w , int input_h, int input_stride_in_bytes,
679- float *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
680- stbir_pixel_layout pixel_type );
681+STBIRDEF unsigned char *
682+stbir_resize_uint8_srgb(const unsigned char *input_pixels, int input_w,
683+ int input_h, int input_stride_in_bytes,
684+ unsigned char *output_pixels, int output_w,
685+ int output_h, int output_stride_in_bytes,
686+ stbir_pixel_layout pixel_type);
687+
688+STBIRDEF unsigned char *
689+stbir_resize_uint8_linear(const unsigned char *input_pixels, int input_w,
690+ int input_h, int input_stride_in_bytes,
691+ unsigned char *output_pixels, int output_w,
692+ int output_h, int output_stride_in_bytes,
693+ stbir_pixel_layout pixel_type);
694+
695+STBIRDEF float *
696+stbir_resize_float_linear(const float *input_pixels, int input_w, int input_h,
697+ int input_stride_in_bytes, float *output_pixels,
698+ int output_w, int output_h,
699+ int output_stride_in_bytes,
700+ stbir_pixel_layout pixel_type);
701 //===============================================================
702
703 //===============================================================
704@@ -488,45 +507,48 @@ STBIRDEF float * stbir_resize_float_linear( const float *input_pixels , int inpu
705 // * Filter can be selected explicitly
706 //--------------------------------
707
708-typedef enum
709-{
710- STBIR_EDGE_CLAMP = 0,
711- STBIR_EDGE_REFLECT = 1,
712- STBIR_EDGE_WRAP = 2, // this edge mode is slower and uses more memory
713- STBIR_EDGE_ZERO = 3,
714+typedef enum {
715+ STBIR_EDGE_CLAMP = 0,
716+ STBIR_EDGE_REFLECT = 1,
717+ STBIR_EDGE_WRAP = 2, // this edge mode is slower and uses more memory
718+ STBIR_EDGE_ZERO = 3,
719 } stbir_edge;
720
721-typedef enum
722-{
723- STBIR_FILTER_DEFAULT = 0, // use same filter type that easy-to-use API chooses
724- STBIR_FILTER_BOX = 1, // A trapezoid w/1-pixel wide ramps, same result as box for integer scale ratios
725- STBIR_FILTER_TRIANGLE = 2, // On upsampling, produces same results as bilinear texture filtering
726- STBIR_FILTER_CUBICBSPLINE = 3, // The cubic b-spline (aka Mitchell-Netrevalli with B=1,C=0), gaussian-esque
727- STBIR_FILTER_CATMULLROM = 4, // An interpolating cubic spline
728- STBIR_FILTER_MITCHELL = 5, // Mitchell-Netrevalli filter with B=1/3, C=1/3
729- STBIR_FILTER_POINT_SAMPLE = 6, // Simple point sampling
730- STBIR_FILTER_OTHER = 7, // User callback specified
731+typedef enum {
732+ STBIR_FILTER_DEFAULT =
733+ 0, // use same filter type that easy-to-use API chooses
734+ STBIR_FILTER_BOX = 1, // A trapezoid w/1-pixel wide ramps, same result as
735+ // box for integer scale ratios
736+ STBIR_FILTER_TRIANGLE =
737+ 2, // On upsampling, produces same results as bilinear texture filtering
738+ STBIR_FILTER_CUBICBSPLINE =
739+ 3, // The cubic b-spline (aka Mitchell-Netrevalli with B=1,C=0),
740+ // gaussian-esque
741+ STBIR_FILTER_CATMULLROM = 4, // An interpolating cubic spline
742+ STBIR_FILTER_MITCHELL = 5, // Mitchell-Netrevalli filter with B=1/3, C=1/3
743+ STBIR_FILTER_POINT_SAMPLE = 6, // Simple point sampling
744+ STBIR_FILTER_OTHER = 7, // User callback specified
745 } stbir_filter;
746
747-typedef enum
748-{
749- STBIR_TYPE_UINT8 = 0,
750- STBIR_TYPE_UINT8_SRGB = 1,
751- STBIR_TYPE_UINT8_SRGB_ALPHA = 2, // alpha channel, when present, should also be SRGB (this is very unusual)
752- STBIR_TYPE_UINT16 = 3,
753- STBIR_TYPE_FLOAT = 4,
754- STBIR_TYPE_HALF_FLOAT = 5
755+typedef enum {
756+ STBIR_TYPE_UINT8 = 0,
757+ STBIR_TYPE_UINT8_SRGB = 1,
758+ STBIR_TYPE_UINT8_SRGB_ALPHA = 2, // alpha channel, when present, should also
759+ // be SRGB (this is very unusual)
760+ STBIR_TYPE_UINT16 = 3,
761+ STBIR_TYPE_FLOAT = 4,
762+ STBIR_TYPE_HALF_FLOAT = 5
763 } stbir_datatype;
764
765 // medium api
766-STBIRDEF void * stbir_resize( const void *input_pixels , int input_w , int input_h, int input_stride_in_bytes,
767- void *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
768- stbir_pixel_layout pixel_layout, stbir_datatype data_type,
769- stbir_edge edge, stbir_filter filter );
770+STBIRDEF void *
771+stbir_resize(const void *input_pixels, int input_w, int input_h,
772+ int input_stride_in_bytes, void *output_pixels, int output_w,
773+ int output_h, int output_stride_in_bytes,
774+ stbir_pixel_layout pixel_layout, stbir_datatype data_type,
775+ stbir_edge edge, stbir_filter filter);
776 //===============================================================
777
778-
779-
780 //===============================================================
781 // Extended-complexity API
782 //
783@@ -540,122 +562,175 @@ STBIRDEF void * stbir_resize( const void *input_pixels , int input_w , int inpu
784 // * Can specify a memory callback
785 // * Can specify a callback data type for pixel input and output
786 // * Can be threaded for a single resize
787-// * Can be used to resize many frames without recalculating the sampler info
788+// * Can be used to resize many frames without recalculating the sampler
789+// info
790 //
791 // Use this API as follows:
792 // 1) Call the stbir_resize_init function on a local STBIR_RESIZE structure
793 // 2) Call any of the stbir_set functions
794-// 3) Optionally call stbir_build_samplers() if you are going to resample multiple times
795+// 3) Optionally call stbir_build_samplers() if you are going to resample
796+// multiple times
797 // with the same input and output dimensions (like resizing video frames)
798 // 4) Resample by calling stbir_resize_extended().
799 // 5) Call stbir_free_samplers() if you called stbir_build_samplers()
800 //--------------------------------
801
802-
803 // Types:
804
805 // INPUT CALLBACK: this callback is used for input scanlines
806-typedef void const * stbir_input_callback( void * optional_output, void const * input_ptr, int num_pixels, int x, int y, void * context );
807+typedef void const *
808+stbir_input_callback(void *optional_output, void const *input_ptr,
809+ int num_pixels, int x, int y, void *context);
810
811 // OUTPUT CALLBACK: this callback is used for output scanlines
812-typedef void stbir_output_callback( void const * output_ptr, int num_pixels, int y, void * context );
813+typedef void
814+stbir_output_callback(void const *output_ptr, int num_pixels, int y,
815+ void *context);
816
817 // callbacks for user installed filters
818-typedef float stbir__kernel_callback( float x, float scale, void * user_data ); // centered at zero
819-typedef float stbir__support_callback( float scale, void * user_data );
820+typedef float
821+stbir__kernel_callback(float x, float scale,
822+ void *user_data); // centered at zero
823+typedef float
824+stbir__support_callback(float scale, void *user_data);
825
826 // internal structure with precomputed scaling
827 typedef struct stbir__info stbir__info;
828
829-typedef struct STBIR_RESIZE // use the stbir_resize_init and stbir_override functions to set these values for future compatibility
830-{
831- void * user_data;
832- void const * input_pixels;
833- int input_w, input_h;
834- double input_s0, input_t0, input_s1, input_t1;
835- stbir_input_callback * input_cb;
836- void * output_pixels;
837- int output_w, output_h;
838- int output_subx, output_suby, output_subw, output_subh;
839- stbir_output_callback * output_cb;
840- int input_stride_in_bytes;
841- int output_stride_in_bytes;
842- int splits;
843- int fast_alpha;
844- int needs_rebuild;
845- int called_alloc;
846- stbir_pixel_layout input_pixel_layout_public;
847- stbir_pixel_layout output_pixel_layout_public;
848- stbir_datatype input_data_type;
849- stbir_datatype output_data_type;
850- stbir_filter horizontal_filter, vertical_filter;
851- stbir_edge horizontal_edge, vertical_edge;
852- stbir__kernel_callback * horizontal_filter_kernel; stbir__support_callback * horizontal_filter_support;
853- stbir__kernel_callback * vertical_filter_kernel; stbir__support_callback * vertical_filter_support;
854- stbir__info * samplers;
855+typedef struct STBIR_RESIZE // use the stbir_resize_init and stbir_override
856+ // functions to set these values for future
857+ // compatibility
858+{
859+ void *user_data;
860+ void const *input_pixels;
861+ int input_w, input_h;
862+ double input_s0, input_t0, input_s1, input_t1;
863+ stbir_input_callback *input_cb;
864+ void *output_pixels;
865+ int output_w, output_h;
866+ int output_subx, output_suby, output_subw, output_subh;
867+ stbir_output_callback *output_cb;
868+ int input_stride_in_bytes;
869+ int output_stride_in_bytes;
870+ int splits;
871+ int fast_alpha;
872+ int needs_rebuild;
873+ int called_alloc;
874+ stbir_pixel_layout input_pixel_layout_public;
875+ stbir_pixel_layout output_pixel_layout_public;
876+ stbir_datatype input_data_type;
877+ stbir_datatype output_data_type;
878+ stbir_filter horizontal_filter, vertical_filter;
879+ stbir_edge horizontal_edge, vertical_edge;
880+ stbir__kernel_callback *horizontal_filter_kernel;
881+ stbir__support_callback *horizontal_filter_support;
882+ stbir__kernel_callback *vertical_filter_kernel;
883+ stbir__support_callback *vertical_filter_support;
884+ stbir__info *samplers;
885 } STBIR_RESIZE;
886
887 // extended complexity api
888
889-
890-// First off, you must ALWAYS call stbir_resize_init on your resize structure before any of the other calls!
891-STBIRDEF void stbir_resize_init( STBIR_RESIZE * resize,
892- const void *input_pixels, int input_w, int input_h, int input_stride_in_bytes, // stride can be zero
893- void *output_pixels, int output_w, int output_h, int output_stride_in_bytes, // stride can be zero
894- stbir_pixel_layout pixel_layout, stbir_datatype data_type );
895+// First off, you must ALWAYS call stbir_resize_init on your resize structure
896+// before any of the other calls!
897+STBIRDEF void
898+stbir_resize_init(STBIR_RESIZE *resize, const void *input_pixels, int input_w,
899+ int input_h, int input_stride_in_bytes, // stride can be zero
900+ void *output_pixels, int output_w, int output_h,
901+ int output_stride_in_bytes, // stride can be zero
902+ stbir_pixel_layout pixel_layout, stbir_datatype data_type);
903
904 //===============================================================
905-// You can update these parameters any time after resize_init and there is no cost
906+// You can update these parameters any time after resize_init and there is no
907+// cost
908 //--------------------------------
909
910-STBIRDEF void stbir_set_datatypes( STBIR_RESIZE * resize, stbir_datatype input_type, stbir_datatype output_type );
911-STBIRDEF void stbir_set_pixel_callbacks( STBIR_RESIZE * resize, stbir_input_callback * input_cb, stbir_output_callback * output_cb ); // no callbacks by default
912-STBIRDEF void stbir_set_user_data( STBIR_RESIZE * resize, void * user_data ); // pass back STBIR_RESIZE* by default
913-STBIRDEF void stbir_set_buffer_ptrs( STBIR_RESIZE * resize, const void * input_pixels, int input_stride_in_bytes, void * output_pixels, int output_stride_in_bytes );
914+STBIRDEF void
915+stbir_set_datatypes(STBIR_RESIZE *resize, stbir_datatype input_type,
916+ stbir_datatype output_type);
917+STBIRDEF void
918+stbir_set_pixel_callbacks(
919+ STBIR_RESIZE *resize, stbir_input_callback *input_cb,
920+ stbir_output_callback *output_cb); // no callbacks by default
921+STBIRDEF void
922+stbir_set_user_data(STBIR_RESIZE *resize,
923+ void *user_data); // pass back STBIR_RESIZE* by default
924+STBIRDEF void
925+stbir_set_buffer_ptrs(STBIR_RESIZE *resize, const void *input_pixels,
926+ int input_stride_in_bytes, void *output_pixels,
927+ int output_stride_in_bytes);
928
929 //===============================================================
930
931-
932 //===============================================================
933 // If you call any of these functions, you will trigger a sampler rebuild!
934 //--------------------------------
935
936-STBIRDEF int stbir_set_pixel_layouts( STBIR_RESIZE * resize, stbir_pixel_layout input_pixel_layout, stbir_pixel_layout output_pixel_layout ); // sets new buffer layouts
937-STBIRDEF int stbir_set_edgemodes( STBIR_RESIZE * resize, stbir_edge horizontal_edge, stbir_edge vertical_edge ); // CLAMP by default
938-
939-STBIRDEF int stbir_set_filters( STBIR_RESIZE * resize, stbir_filter horizontal_filter, stbir_filter vertical_filter ); // STBIR_DEFAULT_FILTER_UPSAMPLE/DOWNSAMPLE by default
940-STBIRDEF int stbir_set_filter_callbacks( STBIR_RESIZE * resize, stbir__kernel_callback * horizontal_filter, stbir__support_callback * horizontal_support, stbir__kernel_callback * vertical_filter, stbir__support_callback * vertical_support );
941-
942-STBIRDEF int stbir_set_pixel_subrect( STBIR_RESIZE * resize, int subx, int suby, int subw, int subh ); // sets both sub-regions (full regions by default)
943-STBIRDEF int stbir_set_input_subrect( STBIR_RESIZE * resize, double s0, double t0, double s1, double t1 ); // sets input sub-region (full region by default)
944-STBIRDEF int stbir_set_output_pixel_subrect( STBIR_RESIZE * resize, int subx, int suby, int subw, int subh ); // sets output sub-region (full region by default)
945-
946-// when inputting AND outputting non-premultiplied alpha pixels, we use a slower but higher quality technique
947-// that fills the zero alpha pixel's RGB values with something plausible. If you don't care about areas of
948-// zero alpha, you can call this function to get about a 25% speed improvement for STBIR_RGBA to STBIR_RGBA
949-// types of resizes.
950-STBIRDEF int stbir_set_non_pm_alpha_speed_over_quality( STBIR_RESIZE * resize, int non_pma_alpha_speed_over_quality );
951+STBIRDEF int
952+stbir_set_pixel_layouts(
953+ STBIR_RESIZE *resize, stbir_pixel_layout input_pixel_layout,
954+ stbir_pixel_layout output_pixel_layout); // sets new buffer layouts
955+STBIRDEF int
956+stbir_set_edgemodes(STBIR_RESIZE *resize, stbir_edge horizontal_edge,
957+ stbir_edge vertical_edge); // CLAMP by default
958+
959+STBIRDEF int
960+stbir_set_filters(STBIR_RESIZE *resize, stbir_filter horizontal_filter,
961+ stbir_filter vertical_filter); // STBIR_DEFAULT_FILTER_UPSAMPLE/DOWNSAMPLE
962+ // by default
963+STBIRDEF int
964+stbir_set_filter_callbacks(STBIR_RESIZE *resize,
965+ stbir__kernel_callback *horizontal_filter,
966+ stbir__support_callback *horizontal_support,
967+ stbir__kernel_callback *vertical_filter,
968+ stbir__support_callback *vertical_support);
969+
970+STBIRDEF int
971+stbir_set_pixel_subrect(
972+ STBIR_RESIZE *resize, int subx, int suby, int subw,
973+ int subh); // sets both sub-regions (full regions by default)
974+STBIRDEF int
975+stbir_set_input_subrect(
976+ STBIR_RESIZE *resize, double s0, double t0, double s1,
977+ double t1); // sets input sub-region (full region by default)
978+STBIRDEF int
979+stbir_set_output_pixel_subrect(
980+ STBIR_RESIZE *resize, int subx, int suby, int subw,
981+ int subh); // sets output sub-region (full region by default)
982+
983+// when inputting AND outputting non-premultiplied alpha pixels, we use a slower
984+// but higher quality technique
985+// that fills the zero alpha pixel's RGB values with something plausible. If
986+// you don't care about areas of zero alpha, you can call this function to get
987+// about a 25% speed improvement for STBIR_RGBA to STBIR_RGBA types of
988+// resizes.
989+STBIRDEF int
990+stbir_set_non_pm_alpha_speed_over_quality(STBIR_RESIZE *resize,
991+ int non_pma_alpha_speed_over_quality);
992 //===============================================================
993
994-
995 //===============================================================
996-// You can call build_samplers to prebuild all the internal data we need to resample.
997-// Then, if you call resize_extended many times with the same resize, you only pay the
998-// cost once.
999+// You can call build_samplers to prebuild all the internal data we need to
1000+// resample.
1001+// Then, if you call resize_extended many times with the same resize, you only
1002+// pay the cost once.
1003 // If you do call build_samplers, you MUST call free_samplers eventually.
1004 //--------------------------------
1005
1006 // This builds the samplers and does one allocation
1007-STBIRDEF int stbir_build_samplers( STBIR_RESIZE * resize );
1008+STBIRDEF int
1009+stbir_build_samplers(STBIR_RESIZE *resize);
1010
1011-// You MUST call this, if you call stbir_build_samplers or stbir_build_samplers_with_splits
1012-STBIRDEF void stbir_free_samplers( STBIR_RESIZE * resize );
1013+// You MUST call this, if you call stbir_build_samplers or
1014+// stbir_build_samplers_with_splits
1015+STBIRDEF void
1016+stbir_free_samplers(STBIR_RESIZE *resize);
1017 //===============================================================
1018
1019-
1020-// And this is the main function to perform the resize synchronously on one thread.
1021-STBIRDEF int stbir_resize_extended( STBIR_RESIZE * resize );
1022-
1023+// And this is the main function to perform the resize synchronously on one
1024+// thread.
1025+STBIRDEF int
1026+stbir_resize_extended(STBIR_RESIZE *resize);
1027
1028 //===============================================================
1029 // Use these functions for multithreading.
1030@@ -669,23 +744,30 @@ STBIRDEF int stbir_resize_extended( STBIR_RESIZE * resize );
1031 // It returns the number of splits (threads) that you can call it with.
1032 /// It might be less if the image resize can't be split up that many ways.
1033
1034-STBIRDEF int stbir_build_samplers_with_splits( STBIR_RESIZE * resize, int try_splits );
1035+STBIRDEF int
1036+stbir_build_samplers_with_splits(STBIR_RESIZE *resize, int try_splits);
1037
1038 // This function does a split of the resizing (you call this fuction for each
1039-// split, on multiple threads). A split is a piece of the output resize pixel space.
1040+// split, on multiple threads). A split is a piece of the output resize pixel
1041+// space.
1042
1043-// Note that you MUST call stbir_build_samplers_with_splits before stbir_resize_extended_split!
1044+// Note that you MUST call stbir_build_samplers_with_splits before
1045+// stbir_resize_extended_split!
1046
1047-// Usually, you will always call stbir_resize_split with split_start as the thread_index
1048+// Usually, you will always call stbir_resize_split with split_start as the
1049+// thread_index
1050 // and "1" for the split_count.
1051-// But, if you have a weird situation where you MIGHT want 8 threads, but sometimes
1052-// only 4 threads, you can use 0,2,4,6 for the split_start's and use "2" for the
1053-// split_count each time to turn in into a 4 thread resize. (This is unusual).
1054-
1055-STBIRDEF int stbir_resize_extended_split( STBIR_RESIZE * resize, int split_start, int split_count );
1056+// But, if you have a weird situation where you MIGHT want 8 threads, but
1057+// sometimes
1058+// only 4 threads, you can use 0,2,4,6 for the split_start's and use "2" for
1059+// the split_count each time to turn in into a 4 thread resize. (This is
1060+// unusual).
1061+
1062+STBIRDEF int
1063+stbir_resize_extended_split(STBIR_RESIZE *resize, int split_start,
1064+ int split_count);
1065 //===============================================================
1066
1067-
1068 //===============================================================
1069 // Pixel Callbacks info:
1070 //--------------------------------
1071@@ -700,83 +782,98 @@ STBIRDEF int stbir_resize_extended_split( STBIR_RESIZE * resize, int split_start
1072 // calculate your own input_ptr based on the size of each non-supported pixel.
1073 // (Something like the third example below.)
1074 //
1075-// You can also install just an input or just an output callback by setting the
1076-// callback that you don't want to zero.
1077+// You can also install just an input or just an output callback by setting
1078+// the callback that you don't want to zero.
1079 //
1080-// First example, progress: (getting a callback that you can monitor the progress):
1081-// void const * my_callback( void * optional_output, void const * input_ptr, int num_pixels, int x, int y, void * context )
1082+// First example, progress: (getting a callback that you can monitor the
1083+// progress):
1084+// void const * my_callback( void * optional_output, void const *
1085+// input_ptr, int num_pixels, int x, int y, void * context )
1086 // {
1087 // percentage_done = y / input_height;
1088 // return input_ptr; // use buffer from call
1089 // }
1090 //
1091 // Next example, copying: (copy from some other buffer or stream):
1092-// void const * my_callback( void * optional_output, void const * input_ptr, int num_pixels, int x, int y, void * context )
1093+// void const * my_callback( void * optional_output, void const *
1094+// input_ptr, int num_pixels, int x, int y, void * context )
1095 // {
1096-// CopyOrStreamData( optional_output, other_data_src, num_pixels * pixel_width_in_bytes );
1097-// return optional_output; // return the optional buffer that we filled
1098+// CopyOrStreamData( optional_output, other_data_src, num_pixels *
1099+// pixel_width_in_bytes ); return optional_output; // return the
1100+// optional buffer that we filled
1101 // }
1102 //
1103-// Third example, input another buffer without copying: (zero-copy from other buffer):
1104-// void const * my_callback( void * optional_output, void const * input_ptr, int num_pixels, int x, int y, void * context )
1105+// Third example, input another buffer without copying: (zero-copy from
1106+// other buffer):
1107+// void const * my_callback( void * optional_output, void const *
1108+// input_ptr, int num_pixels, int x, int y, void * context )
1109 // {
1110-// void * pixels = ( (char*) other_image_base ) + ( y * other_image_stride ) + ( x * other_pixel_width_in_bytes );
1111-// return pixels; // return pointer to your data without copying
1112+// void * pixels = ( (char*) other_image_base ) + ( y *
1113+// other_image_stride ) + ( x * other_pixel_width_in_bytes ); return
1114+// pixels; // return pointer to your data without copying
1115 // }
1116 //
1117 //
1118-// The output callback is considerably simpler - it just calls you so that you can dump
1119-// out each scanline. You could even directly copy out to disk if you have a simple format
1120-// like TGA or BMP. You can also convert to other output types here if you want.
1121+// The output callback is considerably simpler - it just calls you so that you
1122+// can dump out each scanline. You could even directly copy out to disk if you
1123+// have a simple format like TGA or BMP. You can also convert to other output
1124+// types here if you want.
1125 //
1126 // Simple example:
1127-// void const * my_output( void * output_ptr, int num_pixels, int y, void * context )
1128+// void const * my_output( void * output_ptr, int num_pixels, int y, void
1129+// * context )
1130 // {
1131 // percentage_done = y / output_height;
1132-// fwrite( output_ptr, pixel_width_in_bytes, num_pixels, output_file );
1133+// fwrite( output_ptr, pixel_width_in_bytes, num_pixels, output_file
1134+// );
1135 // }
1136 //===============================================================
1137
1138-
1139-
1140-
1141 //===============================================================
1142 // optional built-in profiling API
1143 //--------------------------------
1144
1145 #ifdef STBIR_PROFILE
1146
1147-typedef struct STBIR_PROFILE_INFO
1148-{
1149- stbir_uint64 total_clocks;
1150+typedef struct STBIR_PROFILE_INFO {
1151+ stbir_uint64 total_clocks;
1152
1153- // how many clocks spent (of total_clocks) in the various resize routines, along with a string description
1154- // there are "resize_count" number of zones
1155- stbir_uint64 clocks[ 8 ];
1156- char const ** descriptions;
1157+ // how many clocks spent (of total_clocks) in the various resize routines,
1158+ // along with a string description
1159+ // there are "resize_count" number of zones
1160+ stbir_uint64 clocks[8];
1161+ char const **descriptions;
1162
1163- // count of clocks and descriptions
1164- stbir_uint32 count;
1165+ // count of clocks and descriptions
1166+ stbir_uint32 count;
1167 } STBIR_PROFILE_INFO;
1168
1169-// use after calling stbir_resize_extended (or stbir_build_samplers or stbir_build_samplers_with_splits)
1170-STBIRDEF void stbir_resize_build_profile_info( STBIR_PROFILE_INFO * out_info, STBIR_RESIZE const * resize );
1171+// use after calling stbir_resize_extended (or stbir_build_samplers or
1172+// stbir_build_samplers_with_splits)
1173+STBIRDEF void
1174+stbir_resize_build_profile_info(STBIR_PROFILE_INFO *out_info,
1175+ STBIR_RESIZE const *resize);
1176
1177 // use after calling stbir_resize_extended
1178-STBIRDEF void stbir_resize_extended_profile_info( STBIR_PROFILE_INFO * out_info, STBIR_RESIZE const * resize );
1179+STBIRDEF void
1180+stbir_resize_extended_profile_info(STBIR_PROFILE_INFO *out_info,
1181+ STBIR_RESIZE const *resize);
1182
1183 // use after calling stbir_resize_extended_split
1184-STBIRDEF void stbir_resize_split_profile_info( STBIR_PROFILE_INFO * out_info, STBIR_RESIZE const * resize, int split_start, int split_num );
1185+STBIRDEF void
1186+stbir_resize_split_profile_info(STBIR_PROFILE_INFO *out_info,
1187+ STBIR_RESIZE const *resize, int split_start,
1188+ int split_num);
1189
1190 //===============================================================
1191
1192 #endif
1193
1194-
1195 //// end header file /////////////////////////////////////////////////////
1196 #endif // STBIR_INCLUDE_STB_IMAGE_RESIZE2_H
1197
1198-#if defined(STB_IMAGE_RESIZE_IMPLEMENTATION) || defined(STB_IMAGE_RESIZE2_IMPLEMENTATION)
1199+#if defined(STB_IMAGE_RESIZE_IMPLEMENTATION) || \
1200+ defined(STB_IMAGE_RESIZE2_IMPLEMENTATION)
1201
1202 #ifndef STBIR_ASSERT
1203 #include <assert.h>
1204@@ -785,9 +882,10 @@ STBIRDEF void stbir_resize_split_profile_info( STBIR_PROFILE_INFO * out_info, ST
1205
1206 #ifndef STBIR_MALLOC
1207 #include <stdlib.h>
1208-#define STBIR_MALLOC(size,user_data) ((void)(user_data), malloc(size))
1209-#define STBIR_FREE(ptr,user_data) ((void)(user_data), free(ptr))
1210-// (we used the comma operator to evaluate user_data, to avoid "unused parameter" warnings)
1211+#define STBIR_MALLOC(size, user_data) ((void)(user_data), malloc(size))
1212+#define STBIR_FREE(ptr, user_data) ((void)(user_data), free(ptr))
1213+// (we used the comma operator to evaluate user_data, to avoid "unused
1214+// parameter" warnings)
1215 #endif
1216
1217 #ifdef _MSC_VER
1218@@ -800,30 +898,31 @@ STBIRDEF void stbir_resize_split_profile_info( STBIR_PROFILE_INFO * out_info, ST
1219
1220 // Clang address sanitizer
1221 #if defined(__has_feature)
1222- #if __has_feature(address_sanitizer) || __has_feature(memory_sanitizer)
1223- #ifndef STBIR__SEPARATE_ALLOCATIONS
1224- #define STBIR__SEPARATE_ALLOCATIONS
1225- #endif
1226- #endif
1227+#if __has_feature(address_sanitizer) || __has_feature(memory_sanitizer)
1228+#ifndef STBIR__SEPARATE_ALLOCATIONS
1229+#define STBIR__SEPARATE_ALLOCATIONS
1230+#endif
1231+#endif
1232 #endif
1233
1234 #endif
1235
1236 // GCC and MSVC
1237 #if defined(__SANITIZE_ADDRESS__)
1238- #ifndef STBIR__SEPARATE_ALLOCATIONS
1239- #define STBIR__SEPARATE_ALLOCATIONS
1240- #endif
1241+#ifndef STBIR__SEPARATE_ALLOCATIONS
1242+#define STBIR__SEPARATE_ALLOCATIONS
1243+#endif
1244 #endif
1245
1246 // Always turn off automatic FMA use - use STBIR_USE_FMA if you want.
1247 // Otherwise, this is a determinism disaster.
1248-#ifndef STBIR_DONT_CHANGE_FP_CONTRACT // override in case you don't want this behavior
1249+#ifndef STBIR_DONT_CHANGE_FP_CONTRACT // override in case you don't want this
1250+ // behavior
1251 #if defined(_MSC_VER) && !defined(__clang__)
1252 #if _MSC_VER > 1200
1253 #pragma fp_contract(off)
1254 #endif
1255-#elif defined(__GNUC__) && !defined(__clang__)
1256+#elif defined(__GNUC__) && !defined(__clang__)
1257 #pragma GCC optimize("fp-contract=off")
1258 #else
1259 #pragma STDC FP_CONTRACT OFF
1260@@ -831,53 +930,53 @@ STBIRDEF void stbir_resize_split_profile_info( STBIR_PROFILE_INFO * out_info, ST
1261 #endif
1262
1263 #ifdef _MSC_VER
1264-#define STBIR__UNUSED(v) (void)(v)
1265+#define STBIR__UNUSED(v) (void)(v)
1266 #else
1267-#define STBIR__UNUSED(v) (void)sizeof(v)
1268+#define STBIR__UNUSED(v) (void)sizeof(v)
1269 #endif
1270
1271-#define STBIR__ARRAY_SIZE(a) (sizeof((a))/sizeof((a)[0]))
1272-
1273+#define STBIR__ARRAY_SIZE(a) (sizeof((a)) / sizeof((a)[0]))
1274
1275 #ifndef STBIR_DEFAULT_FILTER_UPSAMPLE
1276-#define STBIR_DEFAULT_FILTER_UPSAMPLE STBIR_FILTER_CATMULLROM
1277+#define STBIR_DEFAULT_FILTER_UPSAMPLE STBIR_FILTER_CATMULLROM
1278 #endif
1279
1280 #ifndef STBIR_DEFAULT_FILTER_DOWNSAMPLE
1281-#define STBIR_DEFAULT_FILTER_DOWNSAMPLE STBIR_FILTER_MITCHELL
1282+#define STBIR_DEFAULT_FILTER_DOWNSAMPLE STBIR_FILTER_MITCHELL
1283 #endif
1284
1285-
1286 #ifndef STBIR__HEADER_FILENAME
1287 #define STBIR__HEADER_FILENAME "stb_image_resize2.h"
1288 #endif
1289
1290-// the internal pixel layout enums are in a different order, so we can easily do range comparisons of types
1291-// the public pixel layout is ordered in a way that if you cast num_channels (1-4) to the enum, you get something sensible
1292-typedef enum
1293-{
1294- STBIRI_1CHANNEL = 0,
1295- STBIRI_2CHANNEL = 1,
1296- STBIRI_RGB = 2,
1297- STBIRI_BGR = 3,
1298- STBIRI_4CHANNEL = 4,
1299-
1300- STBIRI_RGBA = 5,
1301- STBIRI_BGRA = 6,
1302- STBIRI_ARGB = 7,
1303- STBIRI_ABGR = 8,
1304- STBIRI_RA = 9,
1305- STBIRI_AR = 10,
1306-
1307- STBIRI_RGBA_PM = 11,
1308- STBIRI_BGRA_PM = 12,
1309- STBIRI_ARGB_PM = 13,
1310- STBIRI_ABGR_PM = 14,
1311- STBIRI_RA_PM = 15,
1312- STBIRI_AR_PM = 16,
1313+// the internal pixel layout enums are in a different order, so we can easily do
1314+// range comparisons of types
1315+// the public pixel layout is ordered in a way that if you cast num_channels
1316+// (1-4) to the enum, you get something sensible
1317+typedef enum {
1318+ STBIRI_1CHANNEL = 0,
1319+ STBIRI_2CHANNEL = 1,
1320+ STBIRI_RGB = 2,
1321+ STBIRI_BGR = 3,
1322+ STBIRI_4CHANNEL = 4,
1323+
1324+ STBIRI_RGBA = 5,
1325+ STBIRI_BGRA = 6,
1326+ STBIRI_ARGB = 7,
1327+ STBIRI_ABGR = 8,
1328+ STBIRI_RA = 9,
1329+ STBIRI_AR = 10,
1330+
1331+ STBIRI_RGBA_PM = 11,
1332+ STBIRI_BGRA_PM = 12,
1333+ STBIRI_ARGB_PM = 13,
1334+ STBIRI_ABGR_PM = 14,
1335+ STBIRI_RA_PM = 15,
1336+ STBIRI_AR_PM = 16,
1337 } stbir_internal_pixel_layout;
1338
1339-// define the public pixel layouts to not compile inside the implementation (to avoid accidental use)
1340+// define the public pixel layouts to not compile inside the implementation (to
1341+// avoid accidental use)
1342 #define STBIR_BGR bad_dont_use_in_implementation
1343 #define STBIR_1CHANNEL STBIR_BGR
1344 #define STBIR_2CHANNEL STBIR_BGR
1345@@ -898,277 +997,324 @@ typedef enum
1346
1347 // must match stbir_datatype
1348 static unsigned char stbir__type_size[] = {
1349- 1,1,1,2,4,2 // STBIR_TYPE_UINT8,STBIR_TYPE_UINT8_SRGB,STBIR_TYPE_UINT8_SRGB_ALPHA,STBIR_TYPE_UINT16,STBIR_TYPE_FLOAT,STBIR_TYPE_HALF_FLOAT
1350+ 1, 1, 1, 2,
1351+ 4, 2 // STBIR_TYPE_UINT8,STBIR_TYPE_UINT8_SRGB,STBIR_TYPE_UINT8_SRGB_ALPHA,STBIR_TYPE_UINT16,STBIR_TYPE_FLOAT,STBIR_TYPE_HALF_FLOAT
1352 };
1353
1354 // When gathering, the contributors are which source pixels contribute.
1355-// When scattering, the contributors are which destination pixels are contributed to.
1356-typedef struct
1357-{
1358- int n0; // First contributing pixel
1359- int n1; // Last contributing pixel
1360+// When scattering, the contributors are which destination pixels are
1361+// contributed to.
1362+typedef struct {
1363+ int n0; // First contributing pixel
1364+ int n1; // Last contributing pixel
1365 } stbir__contributors;
1366
1367-typedef struct
1368-{
1369- int lowest; // First sample index for whole filter
1370- int highest; // Last sample index for whole filter
1371- int widest; // widest single set of samples for an output
1372+typedef struct {
1373+ int lowest; // First sample index for whole filter
1374+ int highest; // Last sample index for whole filter
1375+ int widest; // widest single set of samples for an output
1376 } stbir__filter_extent_info;
1377
1378-typedef struct
1379-{
1380- int n0; // First pixel of decode buffer to write to
1381- int n1; // Last pixel of decode that will be written to
1382- int pixel_offset_for_input; // Pixel offset into input_scanline
1383+typedef struct {
1384+ int n0; // First pixel of decode buffer to write to
1385+ int n1; // Last pixel of decode that will be written to
1386+ int pixel_offset_for_input; // Pixel offset into input_scanline
1387 } stbir__span;
1388
1389-typedef struct stbir__scale_info
1390-{
1391- int input_full_size;
1392- int output_sub_size;
1393- float scale;
1394- float inv_scale;
1395- float pixel_shift; // starting shift in output pixel space (in pixels)
1396- int scale_is_rational;
1397- stbir_uint32 scale_numerator, scale_denominator;
1398+typedef struct stbir__scale_info {
1399+ int input_full_size;
1400+ int output_sub_size;
1401+ float scale;
1402+ float inv_scale;
1403+ float pixel_shift; // starting shift in output pixel space (in pixels)
1404+ int scale_is_rational;
1405+ stbir_uint32 scale_numerator, scale_denominator;
1406 } stbir__scale_info;
1407
1408-typedef struct
1409-{
1410- stbir__contributors * contributors;
1411- float* coefficients;
1412- stbir__contributors * gather_prescatter_contributors;
1413- float * gather_prescatter_coefficients;
1414- stbir__scale_info scale_info;
1415- float support;
1416- stbir_filter filter_enum;
1417- stbir__kernel_callback * filter_kernel;
1418- stbir__support_callback * filter_support;
1419- stbir_edge edge;
1420- int coefficient_width;
1421- int filter_pixel_width;
1422- int filter_pixel_margin;
1423- int num_contributors;
1424- int contributors_size;
1425- int coefficients_size;
1426- stbir__filter_extent_info extent_info;
1427- int is_gather; // 0 = scatter, 1 = gather with scale >= 1, 2 = gather with scale < 1
1428- int gather_prescatter_num_contributors;
1429- int gather_prescatter_coefficient_width;
1430- int gather_prescatter_contributors_size;
1431- int gather_prescatter_coefficients_size;
1432+typedef struct {
1433+ stbir__contributors *contributors;
1434+ float *coefficients;
1435+ stbir__contributors *gather_prescatter_contributors;
1436+ float *gather_prescatter_coefficients;
1437+ stbir__scale_info scale_info;
1438+ float support;
1439+ stbir_filter filter_enum;
1440+ stbir__kernel_callback *filter_kernel;
1441+ stbir__support_callback *filter_support;
1442+ stbir_edge edge;
1443+ int coefficient_width;
1444+ int filter_pixel_width;
1445+ int filter_pixel_margin;
1446+ int num_contributors;
1447+ int contributors_size;
1448+ int coefficients_size;
1449+ stbir__filter_extent_info extent_info;
1450+ int is_gather; // 0 = scatter, 1 = gather with scale >= 1, 2 = gather with
1451+ // scale < 1
1452+ int gather_prescatter_num_contributors;
1453+ int gather_prescatter_coefficient_width;
1454+ int gather_prescatter_contributors_size;
1455+ int gather_prescatter_coefficients_size;
1456 } stbir__sampler;
1457
1458-typedef struct
1459-{
1460- stbir__contributors conservative;
1461- int edge_sizes[2]; // this can be less than filter_pixel_margin, if the filter and scaling falls off
1462- stbir__span spans[2]; // can be two spans, if doing input subrect with clamp mode WRAP
1463+typedef struct {
1464+ stbir__contributors conservative;
1465+ int edge_sizes[2]; // this can be less than filter_pixel_margin, if the
1466+ // filter and scaling falls off
1467+ stbir__span spans[2]; // can be two spans, if doing input subrect with clamp
1468+ // mode WRAP
1469 } stbir__extents;
1470
1471-typedef struct
1472-{
1473+typedef struct {
1474 #ifdef STBIR_PROFILE
1475- union
1476- {
1477- struct { stbir_uint64 total, looping, vertical, horizontal, decode, encode, alpha, unalpha; } named;
1478- stbir_uint64 array[8];
1479- } profile;
1480- stbir_uint64 * current_zone_excluded_ptr;
1481-#endif
1482- float* decode_buffer;
1483-
1484- int ring_buffer_first_scanline;
1485- int ring_buffer_last_scanline;
1486- int ring_buffer_begin_index; // first_scanline is at this index in the ring buffer
1487- int start_output_y, end_output_y;
1488- int start_input_y, end_input_y; // used in scatter only
1489-
1490- #ifdef STBIR__SEPARATE_ALLOCATIONS
1491- float** ring_buffers; // one pointer for each ring buffer
1492- #else
1493- float* ring_buffer; // one big buffer that we index into
1494- #endif
1495-
1496- float* vertical_buffer;
1497-
1498- char no_cache_straddle[64];
1499-} stbir__per_split_info;
1500+ union {
1501+ struct {
1502+ stbir_uint64 total, looping, vertical, horizontal, decode, encode,
1503+ alpha, unalpha;
1504+ } named;
1505+ stbir_uint64 array[8];
1506+ } profile;
1507+ stbir_uint64 *current_zone_excluded_ptr;
1508+#endif
1509+ float *decode_buffer;
1510
1511-typedef float * stbir__decode_pixels_func( float * decode, int width_times_channels, void const * input );
1512-typedef void stbir__alpha_weight_func( float * decode_buffer, int width_times_channels );
1513-typedef void stbir__horizontal_gather_channels_func( float * output_buffer, unsigned int output_sub_size, float const * decode_buffer,
1514- stbir__contributors const * horizontal_contributors, float const * horizontal_coefficients, int coefficient_width );
1515-typedef void stbir__alpha_unweight_func(float * encode_buffer, int width_times_channels );
1516-typedef void stbir__encode_pixels_func( void * output, int width_times_channels, float const * encode );
1517+ int ring_buffer_first_scanline;
1518+ int ring_buffer_last_scanline;
1519+ int ring_buffer_begin_index; // first_scanline is at this index in the ring
1520+ // buffer
1521+ int start_output_y, end_output_y;
1522+ int start_input_y, end_input_y; // used in scatter only
1523
1524-struct stbir__info
1525-{
1526+#ifdef STBIR__SEPARATE_ALLOCATIONS
1527+ float **ring_buffers; // one pointer for each ring buffer
1528+#else
1529+ float *ring_buffer; // one big buffer that we index into
1530+#endif
1531+
1532+ float *vertical_buffer;
1533+
1534+ char no_cache_straddle[64];
1535+} stbir__per_split_info;
1536+
1537+typedef float *
1538+stbir__decode_pixels_func(float *decode, int width_times_channels,
1539+ void const *input);
1540+typedef void
1541+stbir__alpha_weight_func(float *decode_buffer, int width_times_channels);
1542+typedef void
1543+stbir__horizontal_gather_channels_func(
1544+ float *output_buffer, unsigned int output_sub_size,
1545+ float const *decode_buffer,
1546+ stbir__contributors const *horizontal_contributors,
1547+ float const *horizontal_coefficients, int coefficient_width);
1548+typedef void
1549+stbir__alpha_unweight_func(float *encode_buffer, int width_times_channels);
1550+typedef void
1551+stbir__encode_pixels_func(void *output, int width_times_channels,
1552+ float const *encode);
1553+
1554+struct stbir__info {
1555 #ifdef STBIR_PROFILE
1556- union
1557- {
1558- struct { stbir_uint64 total, build, alloc, horizontal, vertical, cleanup, pivot; } named;
1559- stbir_uint64 array[7];
1560- } profile;
1561- stbir_uint64 * current_zone_excluded_ptr;
1562-#endif
1563- stbir__sampler horizontal;
1564- stbir__sampler vertical;
1565-
1566- void const * input_data;
1567- void * output_data;
1568-
1569- int input_stride_bytes;
1570- int output_stride_bytes;
1571- int ring_buffer_length_bytes; // The length of an individual entry in the ring buffer. The total number of ring buffers is stbir__get_filter_pixel_width(filter)
1572- int ring_buffer_num_entries; // Total number of entries in the ring buffer.
1573-
1574- stbir_datatype input_type;
1575- stbir_datatype output_type;
1576-
1577- stbir_input_callback * in_pixels_cb;
1578- void * user_data;
1579- stbir_output_callback * out_pixels_cb;
1580-
1581- stbir__extents scanline_extents;
1582-
1583- void * alloced_mem;
1584- stbir__per_split_info * split_info; // by default 1, but there will be N of these allocated based on the thread init you did
1585-
1586- stbir__decode_pixels_func * decode_pixels;
1587- stbir__alpha_weight_func * alpha_weight;
1588- stbir__horizontal_gather_channels_func * horizontal_gather_channels;
1589- stbir__alpha_unweight_func * alpha_unweight;
1590- stbir__encode_pixels_func * encode_pixels;
1591-
1592- int alloc_ring_buffer_num_entries; // Number of entries in the ring buffer that will be allocated
1593- int splits; // count of splits
1594-
1595- stbir_internal_pixel_layout input_pixel_layout_internal;
1596- stbir_internal_pixel_layout output_pixel_layout_internal;
1597-
1598- int input_color_and_type;
1599- int offset_x, offset_y; // offset within output_data
1600- int vertical_first;
1601- int channels;
1602- int effective_channels; // same as channels, except on RGBA/ARGB (7), or XA/AX (3)
1603- size_t alloced_total;
1604+ union {
1605+ struct {
1606+ stbir_uint64 total, build, alloc, horizontal, vertical, cleanup,
1607+ pivot;
1608+ } named;
1609+ stbir_uint64 array[7];
1610+ } profile;
1611+ stbir_uint64 *current_zone_excluded_ptr;
1612+#endif
1613+ stbir__sampler horizontal;
1614+ stbir__sampler vertical;
1615+
1616+ void const *input_data;
1617+ void *output_data;
1618+
1619+ int input_stride_bytes;
1620+ int output_stride_bytes;
1621+ int ring_buffer_length_bytes; // The length of an individual entry in the
1622+ // ring buffer. The total number of ring
1623+ // buffers is
1624+ // stbir__get_filter_pixel_width(filter)
1625+ int ring_buffer_num_entries; // Total number of entries in the ring buffer.
1626+
1627+ stbir_datatype input_type;
1628+ stbir_datatype output_type;
1629+
1630+ stbir_input_callback *in_pixels_cb;
1631+ void *user_data;
1632+ stbir_output_callback *out_pixels_cb;
1633+
1634+ stbir__extents scanline_extents;
1635+
1636+ void *alloced_mem;
1637+ stbir__per_split_info
1638+ *split_info; // by default 1, but there will be N of these allocated
1639+ // based on the thread init you did
1640+
1641+ stbir__decode_pixels_func *decode_pixels;
1642+ stbir__alpha_weight_func *alpha_weight;
1643+ stbir__horizontal_gather_channels_func *horizontal_gather_channels;
1644+ stbir__alpha_unweight_func *alpha_unweight;
1645+ stbir__encode_pixels_func *encode_pixels;
1646+
1647+ int alloc_ring_buffer_num_entries; // Number of entries in the ring buffer
1648+ // that will be allocated
1649+ int splits; // count of splits
1650+
1651+ stbir_internal_pixel_layout input_pixel_layout_internal;
1652+ stbir_internal_pixel_layout output_pixel_layout_internal;
1653+
1654+ int input_color_and_type;
1655+ int offset_x, offset_y; // offset within output_data
1656+ int vertical_first;
1657+ int channels;
1658+ int effective_channels; // same as channels, except on RGBA/ARGB (7), or
1659+ // XA/AX (3)
1660+ size_t alloced_total;
1661 };
1662
1663-
1664-#define stbir__max_uint8_as_float 255.0f
1665-#define stbir__max_uint16_as_float 65535.0f
1666-#define stbir__max_uint8_as_float_inverted 3.9215689e-03f // (1.0f/255.0f)
1667-#define stbir__max_uint16_as_float_inverted 1.5259022e-05f // (1.0f/65535.0f)
1668-#define stbir__small_float ((float)1 / (1 << 20) / (1 << 20) / (1 << 20) / (1 << 20) / (1 << 20) / (1 << 20))
1669+#define stbir__max_uint8_as_float 255.0f
1670+#define stbir__max_uint16_as_float 65535.0f
1671+#define stbir__max_uint8_as_float_inverted 3.9215689e-03f // (1.0f/255.0f)
1672+#define stbir__max_uint16_as_float_inverted 1.5259022e-05f // (1.0f/65535.0f)
1673+#define stbir__small_float \
1674+ ((float)1 / (1 << 20) / (1 << 20) / (1 << 20) / (1 << 20) / (1 << 20) / \
1675+ (1 << 20))
1676
1677 // min/max friendly
1678-#define STBIR_CLAMP(x, xmin, xmax) for(;;) { \
1679- if ( (x) < (xmin) ) (x) = (xmin); \
1680- if ( (x) > (xmax) ) (x) = (xmax); \
1681- break; \
1682-}
1683+#define STBIR_CLAMP(x, xmin, xmax) \
1684+ for (;;) { \
1685+ if ((x) < (xmin)) \
1686+ (x) = (xmin); \
1687+ if ((x) > (xmax)) \
1688+ (x) = (xmax); \
1689+ break; \
1690+ }
1691
1692-static stbir__inline int stbir__min(int a, int b)
1693+static stbir__inline int
1694+stbir__min(int a, int b)
1695 {
1696- return a < b ? a : b;
1697+ return a < b ? a : b;
1698 }
1699
1700-static stbir__inline int stbir__max(int a, int b)
1701+static stbir__inline int
1702+stbir__max(int a, int b)
1703 {
1704- return a > b ? a : b;
1705+ return a > b ? a : b;
1706 }
1707
1708 static float stbir__srgb_uchar_to_linear_float[256] = {
1709- 0.000000f, 0.000304f, 0.000607f, 0.000911f, 0.001214f, 0.001518f, 0.001821f, 0.002125f, 0.002428f, 0.002732f, 0.003035f,
1710- 0.003347f, 0.003677f, 0.004025f, 0.004391f, 0.004777f, 0.005182f, 0.005605f, 0.006049f, 0.006512f, 0.006995f, 0.007499f,
1711- 0.008023f, 0.008568f, 0.009134f, 0.009721f, 0.010330f, 0.010960f, 0.011612f, 0.012286f, 0.012983f, 0.013702f, 0.014444f,
1712- 0.015209f, 0.015996f, 0.016807f, 0.017642f, 0.018500f, 0.019382f, 0.020289f, 0.021219f, 0.022174f, 0.023153f, 0.024158f,
1713- 0.025187f, 0.026241f, 0.027321f, 0.028426f, 0.029557f, 0.030713f, 0.031896f, 0.033105f, 0.034340f, 0.035601f, 0.036889f,
1714- 0.038204f, 0.039546f, 0.040915f, 0.042311f, 0.043735f, 0.045186f, 0.046665f, 0.048172f, 0.049707f, 0.051269f, 0.052861f,
1715- 0.054480f, 0.056128f, 0.057805f, 0.059511f, 0.061246f, 0.063010f, 0.064803f, 0.066626f, 0.068478f, 0.070360f, 0.072272f,
1716- 0.074214f, 0.076185f, 0.078187f, 0.080220f, 0.082283f, 0.084376f, 0.086500f, 0.088656f, 0.090842f, 0.093059f, 0.095307f,
1717- 0.097587f, 0.099899f, 0.102242f, 0.104616f, 0.107023f, 0.109462f, 0.111932f, 0.114435f, 0.116971f, 0.119538f, 0.122139f,
1718- 0.124772f, 0.127438f, 0.130136f, 0.132868f, 0.135633f, 0.138432f, 0.141263f, 0.144128f, 0.147027f, 0.149960f, 0.152926f,
1719- 0.155926f, 0.158961f, 0.162029f, 0.165132f, 0.168269f, 0.171441f, 0.174647f, 0.177888f, 0.181164f, 0.184475f, 0.187821f,
1720- 0.191202f, 0.194618f, 0.198069f, 0.201556f, 0.205079f, 0.208637f, 0.212231f, 0.215861f, 0.219526f, 0.223228f, 0.226966f,
1721- 0.230740f, 0.234551f, 0.238398f, 0.242281f, 0.246201f, 0.250158f, 0.254152f, 0.258183f, 0.262251f, 0.266356f, 0.270498f,
1722- 0.274677f, 0.278894f, 0.283149f, 0.287441f, 0.291771f, 0.296138f, 0.300544f, 0.304987f, 0.309469f, 0.313989f, 0.318547f,
1723- 0.323143f, 0.327778f, 0.332452f, 0.337164f, 0.341914f, 0.346704f, 0.351533f, 0.356400f, 0.361307f, 0.366253f, 0.371238f,
1724- 0.376262f, 0.381326f, 0.386430f, 0.391573f, 0.396755f, 0.401978f, 0.407240f, 0.412543f, 0.417885f, 0.423268f, 0.428691f,
1725- 0.434154f, 0.439657f, 0.445201f, 0.450786f, 0.456411f, 0.462077f, 0.467784f, 0.473532f, 0.479320f, 0.485150f, 0.491021f,
1726- 0.496933f, 0.502887f, 0.508881f, 0.514918f, 0.520996f, 0.527115f, 0.533276f, 0.539480f, 0.545725f, 0.552011f, 0.558340f,
1727- 0.564712f, 0.571125f, 0.577581f, 0.584078f, 0.590619f, 0.597202f, 0.603827f, 0.610496f, 0.617207f, 0.623960f, 0.630757f,
1728- 0.637597f, 0.644480f, 0.651406f, 0.658375f, 0.665387f, 0.672443f, 0.679543f, 0.686685f, 0.693872f, 0.701102f, 0.708376f,
1729- 0.715694f, 0.723055f, 0.730461f, 0.737911f, 0.745404f, 0.752942f, 0.760525f, 0.768151f, 0.775822f, 0.783538f, 0.791298f,
1730- 0.799103f, 0.806952f, 0.814847f, 0.822786f, 0.830770f, 0.838799f, 0.846873f, 0.854993f, 0.863157f, 0.871367f, 0.879622f,
1731- 0.887923f, 0.896269f, 0.904661f, 0.913099f, 0.921582f, 0.930111f, 0.938686f, 0.947307f, 0.955974f, 0.964686f, 0.973445f,
1732- 0.982251f, 0.991102f, 1.0f
1733-};
1734-
1735-typedef union
1736-{
1737- unsigned int u;
1738- float f;
1739+ 0.000000f, 0.000304f, 0.000607f, 0.000911f, 0.001214f, 0.001518f, 0.001821f,
1740+ 0.002125f, 0.002428f, 0.002732f, 0.003035f, 0.003347f, 0.003677f, 0.004025f,
1741+ 0.004391f, 0.004777f, 0.005182f, 0.005605f, 0.006049f, 0.006512f, 0.006995f,
1742+ 0.007499f, 0.008023f, 0.008568f, 0.009134f, 0.009721f, 0.010330f, 0.010960f,
1743+ 0.011612f, 0.012286f, 0.012983f, 0.013702f, 0.014444f, 0.015209f, 0.015996f,
1744+ 0.016807f, 0.017642f, 0.018500f, 0.019382f, 0.020289f, 0.021219f, 0.022174f,
1745+ 0.023153f, 0.024158f, 0.025187f, 0.026241f, 0.027321f, 0.028426f, 0.029557f,
1746+ 0.030713f, 0.031896f, 0.033105f, 0.034340f, 0.035601f, 0.036889f, 0.038204f,
1747+ 0.039546f, 0.040915f, 0.042311f, 0.043735f, 0.045186f, 0.046665f, 0.048172f,
1748+ 0.049707f, 0.051269f, 0.052861f, 0.054480f, 0.056128f, 0.057805f, 0.059511f,
1749+ 0.061246f, 0.063010f, 0.064803f, 0.066626f, 0.068478f, 0.070360f, 0.072272f,
1750+ 0.074214f, 0.076185f, 0.078187f, 0.080220f, 0.082283f, 0.084376f, 0.086500f,
1751+ 0.088656f, 0.090842f, 0.093059f, 0.095307f, 0.097587f, 0.099899f, 0.102242f,
1752+ 0.104616f, 0.107023f, 0.109462f, 0.111932f, 0.114435f, 0.116971f, 0.119538f,
1753+ 0.122139f, 0.124772f, 0.127438f, 0.130136f, 0.132868f, 0.135633f, 0.138432f,
1754+ 0.141263f, 0.144128f, 0.147027f, 0.149960f, 0.152926f, 0.155926f, 0.158961f,
1755+ 0.162029f, 0.165132f, 0.168269f, 0.171441f, 0.174647f, 0.177888f, 0.181164f,
1756+ 0.184475f, 0.187821f, 0.191202f, 0.194618f, 0.198069f, 0.201556f, 0.205079f,
1757+ 0.208637f, 0.212231f, 0.215861f, 0.219526f, 0.223228f, 0.226966f, 0.230740f,
1758+ 0.234551f, 0.238398f, 0.242281f, 0.246201f, 0.250158f, 0.254152f, 0.258183f,
1759+ 0.262251f, 0.266356f, 0.270498f, 0.274677f, 0.278894f, 0.283149f, 0.287441f,
1760+ 0.291771f, 0.296138f, 0.300544f, 0.304987f, 0.309469f, 0.313989f, 0.318547f,
1761+ 0.323143f, 0.327778f, 0.332452f, 0.337164f, 0.341914f, 0.346704f, 0.351533f,
1762+ 0.356400f, 0.361307f, 0.366253f, 0.371238f, 0.376262f, 0.381326f, 0.386430f,
1763+ 0.391573f, 0.396755f, 0.401978f, 0.407240f, 0.412543f, 0.417885f, 0.423268f,
1764+ 0.428691f, 0.434154f, 0.439657f, 0.445201f, 0.450786f, 0.456411f, 0.462077f,
1765+ 0.467784f, 0.473532f, 0.479320f, 0.485150f, 0.491021f, 0.496933f, 0.502887f,
1766+ 0.508881f, 0.514918f, 0.520996f, 0.527115f, 0.533276f, 0.539480f, 0.545725f,
1767+ 0.552011f, 0.558340f, 0.564712f, 0.571125f, 0.577581f, 0.584078f, 0.590619f,
1768+ 0.597202f, 0.603827f, 0.610496f, 0.617207f, 0.623960f, 0.630757f, 0.637597f,
1769+ 0.644480f, 0.651406f, 0.658375f, 0.665387f, 0.672443f, 0.679543f, 0.686685f,
1770+ 0.693872f, 0.701102f, 0.708376f, 0.715694f, 0.723055f, 0.730461f, 0.737911f,
1771+ 0.745404f, 0.752942f, 0.760525f, 0.768151f, 0.775822f, 0.783538f, 0.791298f,
1772+ 0.799103f, 0.806952f, 0.814847f, 0.822786f, 0.830770f, 0.838799f, 0.846873f,
1773+ 0.854993f, 0.863157f, 0.871367f, 0.879622f, 0.887923f, 0.896269f, 0.904661f,
1774+ 0.913099f, 0.921582f, 0.930111f, 0.938686f, 0.947307f, 0.955974f, 0.964686f,
1775+ 0.973445f, 0.982251f, 0.991102f, 1.0f};
1776+
1777+typedef union {
1778+ unsigned int u;
1779+ float f;
1780 } stbir__FP32;
1781
1782 // From https://gist.github.com/rygorous/2203834
1783
1784 static const stbir_uint32 fp32_to_srgb8_tab4[104] = {
1785- 0x0073000d, 0x007a000d, 0x0080000d, 0x0087000d, 0x008d000d, 0x0094000d, 0x009a000d, 0x00a1000d,
1786- 0x00a7001a, 0x00b4001a, 0x00c1001a, 0x00ce001a, 0x00da001a, 0x00e7001a, 0x00f4001a, 0x0101001a,
1787- 0x010e0033, 0x01280033, 0x01410033, 0x015b0033, 0x01750033, 0x018f0033, 0x01a80033, 0x01c20033,
1788- 0x01dc0067, 0x020f0067, 0x02430067, 0x02760067, 0x02aa0067, 0x02dd0067, 0x03110067, 0x03440067,
1789- 0x037800ce, 0x03df00ce, 0x044600ce, 0x04ad00ce, 0x051400ce, 0x057b00c5, 0x05dd00bc, 0x063b00b5,
1790- 0x06970158, 0x07420142, 0x07e30130, 0x087b0120, 0x090b0112, 0x09940106, 0x0a1700fc, 0x0a9500f2,
1791- 0x0b0f01cb, 0x0bf401ae, 0x0ccb0195, 0x0d950180, 0x0e56016e, 0x0f0d015e, 0x0fbc0150, 0x10630143,
1792- 0x11070264, 0x1238023e, 0x1357021d, 0x14660201, 0x156601e9, 0x165a01d3, 0x174401c0, 0x182401af,
1793- 0x18fe0331, 0x1a9602fe, 0x1c1502d2, 0x1d7e02ad, 0x1ed4028d, 0x201a0270, 0x21520256, 0x227d0240,
1794- 0x239f0443, 0x25c003fe, 0x27bf03c4, 0x29a10392, 0x2b6a0367, 0x2d1d0341, 0x2ebe031f, 0x304d0300,
1795- 0x31d105b0, 0x34a80555, 0x37520507, 0x39d504c5, 0x3c37048b, 0x3e7c0458, 0x40a8042a, 0x42bd0401,
1796- 0x44c20798, 0x488e071e, 0x4c1c06b6, 0x4f76065d, 0x52a50610, 0x55ac05cc, 0x5892058f, 0x5b590559,
1797- 0x5e0c0a23, 0x631c0980, 0x67db08f6, 0x6c55087f, 0x70940818, 0x74a007bd, 0x787d076c, 0x7c330723,
1798+ 0x0073000d, 0x007a000d, 0x0080000d, 0x0087000d, 0x008d000d, 0x0094000d,
1799+ 0x009a000d, 0x00a1000d, 0x00a7001a, 0x00b4001a, 0x00c1001a, 0x00ce001a,
1800+ 0x00da001a, 0x00e7001a, 0x00f4001a, 0x0101001a, 0x010e0033, 0x01280033,
1801+ 0x01410033, 0x015b0033, 0x01750033, 0x018f0033, 0x01a80033, 0x01c20033,
1802+ 0x01dc0067, 0x020f0067, 0x02430067, 0x02760067, 0x02aa0067, 0x02dd0067,
1803+ 0x03110067, 0x03440067, 0x037800ce, 0x03df00ce, 0x044600ce, 0x04ad00ce,
1804+ 0x051400ce, 0x057b00c5, 0x05dd00bc, 0x063b00b5, 0x06970158, 0x07420142,
1805+ 0x07e30130, 0x087b0120, 0x090b0112, 0x09940106, 0x0a1700fc, 0x0a9500f2,
1806+ 0x0b0f01cb, 0x0bf401ae, 0x0ccb0195, 0x0d950180, 0x0e56016e, 0x0f0d015e,
1807+ 0x0fbc0150, 0x10630143, 0x11070264, 0x1238023e, 0x1357021d, 0x14660201,
1808+ 0x156601e9, 0x165a01d3, 0x174401c0, 0x182401af, 0x18fe0331, 0x1a9602fe,
1809+ 0x1c1502d2, 0x1d7e02ad, 0x1ed4028d, 0x201a0270, 0x21520256, 0x227d0240,
1810+ 0x239f0443, 0x25c003fe, 0x27bf03c4, 0x29a10392, 0x2b6a0367, 0x2d1d0341,
1811+ 0x2ebe031f, 0x304d0300, 0x31d105b0, 0x34a80555, 0x37520507, 0x39d504c5,
1812+ 0x3c37048b, 0x3e7c0458, 0x40a8042a, 0x42bd0401, 0x44c20798, 0x488e071e,
1813+ 0x4c1c06b6, 0x4f76065d, 0x52a50610, 0x55ac05cc, 0x5892058f, 0x5b590559,
1814+ 0x5e0c0a23, 0x631c0980, 0x67db08f6, 0x6c55087f, 0x70940818, 0x74a007bd,
1815+ 0x787d076c, 0x7c330723,
1816 };
1817
1818-static stbir__inline stbir_uint8 stbir__linear_to_srgb_uchar(float in)
1819+static stbir__inline stbir_uint8
1820+stbir__linear_to_srgb_uchar(float in)
1821 {
1822- static const stbir__FP32 almostone = { 0x3f7fffff }; // 1-eps
1823- static const stbir__FP32 minval = { (127-13) << 23 };
1824- stbir_uint32 tab,bias,scale,t;
1825- stbir__FP32 f;
1826+ static const stbir__FP32 almostone = {0x3f7fffff}; // 1-eps
1827+ static const stbir__FP32 minval = {(127 - 13) << 23};
1828+ stbir_uint32 tab, bias, scale, t;
1829+ stbir__FP32 f;
1830
1831- // Clamp to [2^(-13), 1-eps]; these two values map to 0 and 1, respectively.
1832- // The tests are carefully written so that NaNs map to 0, same as in the reference
1833- // implementation.
1834- if (!(in > minval.f)) // written this way to catch NaNs
1835- return 0;
1836- if (in > almostone.f)
1837- return 255;
1838+ // Clamp to [2^(-13), 1-eps]; these two values map to 0 and 1, respectively.
1839+ // The tests are carefully written so that NaNs map to 0, same as in the
1840+ // reference implementation.
1841+ if (!(in > minval.f)) { // written this way to catch NaNs
1842+ return 0;
1843+ }
1844+ if (in > almostone.f) {
1845+ return 255;
1846+ }
1847
1848- // Do the table lookup and unpack bias, scale
1849- f.f = in;
1850- tab = fp32_to_srgb8_tab4[(f.u - minval.u) >> 20];
1851- bias = (tab >> 16) << 9;
1852- scale = tab & 0xffff;
1853+ // Do the table lookup and unpack bias, scale
1854+ f.f = in;
1855+ tab = fp32_to_srgb8_tab4[(f.u - minval.u) >> 20];
1856+ bias = (tab >> 16) << 9;
1857+ scale = tab & 0xffff;
1858
1859- // Grab next-highest mantissa bits and perform linear interpolation
1860- t = (f.u >> 12) & 0xff;
1861- return (unsigned char) ((bias + scale*t) >> 16);
1862+ // Grab next-highest mantissa bits and perform linear interpolation
1863+ t = (f.u >> 12) & 0xff;
1864+ return (unsigned char)((bias + scale * t) >> 16);
1865 }
1866
1867 #ifndef STBIR_FORCE_GATHER_FILTER_SCANLINES_AMOUNT
1868-#define STBIR_FORCE_GATHER_FILTER_SCANLINES_AMOUNT 32 // when downsampling and <= 32 scanlines of buffering, use gather. gather used down to 1/8th scaling for 25% win.
1869+#define STBIR_FORCE_GATHER_FILTER_SCANLINES_AMOUNT \
1870+ 32 // when downsampling and <= 32 scanlines of buffering, use gather. gather
1871+ // used down to 1/8th scaling for 25% win.
1872 #endif
1873
1874 #ifndef STBIR_FORCE_MINIMUM_SCANLINES_FOR_SPLITS
1875-#define STBIR_FORCE_MINIMUM_SCANLINES_FOR_SPLITS 4 // when threading, what is the minimum number of scanlines for a split?
1876+#define STBIR_FORCE_MINIMUM_SCANLINES_FOR_SPLITS \
1877+ 4 // when threading, what is the minimum number of scanlines for a split?
1878 #endif
1879
1880 #define STBIR_INPUT_CALLBACK_PADDING 3
1881
1882 #ifdef _M_IX86_FP
1883-#if ( _M_IX86_FP >= 1 )
1884+#if (_M_IX86_FP >= 1)
1885 #ifndef STBIR_SSE
1886 #define STBIR_SSE
1887 #endif
1888@@ -1176,41 +1322,47 @@ static stbir__inline stbir_uint8 stbir__linear_to_srgb_uchar(float in)
1889 #endif
1890
1891 #ifdef __TINYC__
1892- // tiny c has no intrinsics yet - this can become a version check if they add them
1893- #define STBIR_NO_SIMD
1894-#endif
1895-
1896-#if defined(_x86_64) || defined( __x86_64__ ) || defined( _M_X64 ) || defined(__x86_64) || defined(_M_AMD64) || defined(__SSE2__) || defined(STBIR_SSE) || defined(STBIR_SSE2)
1897- #ifndef STBIR_SSE2
1898- #define STBIR_SSE2
1899- #endif
1900- #if defined(__AVX__) || defined(STBIR_AVX2)
1901- #ifndef STBIR_AVX
1902- #ifndef STBIR_NO_AVX
1903- #define STBIR_AVX
1904- #endif
1905- #endif
1906- #endif
1907- #if defined(__AVX2__) || defined(STBIR_AVX2)
1908- #ifndef STBIR_NO_AVX2
1909- #ifndef STBIR_AVX2
1910- #define STBIR_AVX2
1911- #endif
1912- #if defined( _MSC_VER ) && !defined(__clang__)
1913- #ifndef STBIR_FP16C // FP16C instructions are on all AVX2 cpus, so we can autoselect it here on microsoft - clang needs -m16c
1914- #define STBIR_FP16C
1915- #endif
1916- #endif
1917- #endif
1918- #endif
1919- #ifdef __F16C__
1920- #ifndef STBIR_FP16C // turn on FP16C instructions if the define is set (for clang and gcc)
1921- #define STBIR_FP16C
1922- #endif
1923- #endif
1924-#endif
1925-
1926-#if defined( _M_ARM64 ) || defined( __aarch64__ ) || defined( __arm64__ ) || ((__ARM_NEON_FP & 4) != 0) || defined(__ARM_NEON__)
1927+// tiny c has no intrinsics yet - this can become a version check if they add
1928+// them
1929+#define STBIR_NO_SIMD
1930+#endif
1931+
1932+#if defined(_x86_64) || defined(__x86_64__) || defined(_M_X64) || \
1933+ defined(__x86_64) || defined(_M_AMD64) || defined(__SSE2__) || \
1934+ defined(STBIR_SSE) || defined(STBIR_SSE2)
1935+#ifndef STBIR_SSE2
1936+#define STBIR_SSE2
1937+#endif
1938+#if defined(__AVX__) || defined(STBIR_AVX2)
1939+#ifndef STBIR_AVX
1940+#ifndef STBIR_NO_AVX
1941+#define STBIR_AVX
1942+#endif
1943+#endif
1944+#endif
1945+#if defined(__AVX2__) || defined(STBIR_AVX2)
1946+#ifndef STBIR_NO_AVX2
1947+#ifndef STBIR_AVX2
1948+#define STBIR_AVX2
1949+#endif
1950+#if defined(_MSC_VER) && !defined(__clang__)
1951+#ifndef STBIR_FP16C // FP16C instructions are on all AVX2 cpus, so we can
1952+ // autoselect it here on microsoft - clang needs -m16c
1953+#define STBIR_FP16C
1954+#endif
1955+#endif
1956+#endif
1957+#endif
1958+#ifdef __F16C__
1959+#ifndef STBIR_FP16C // turn on FP16C instructions if the define is set (for
1960+ // clang and gcc)
1961+#define STBIR_FP16C
1962+#endif
1963+#endif
1964+#endif
1965+
1966+#if defined(_M_ARM64) || defined(__aarch64__) || defined(__arm64__) || \
1967+ ((__ARM_NEON_FP & 4) != 0) || defined(__ARM_NEON__)
1968 #ifndef STBIR_NEON
1969 #define STBIR_NEON
1970 #endif
1971@@ -1229,35 +1381,39 @@ static stbir__inline stbir_uint8 stbir__linear_to_srgb_uchar(float in)
1972 #endif
1973
1974 // restrict pointers for the output pointers, other loop and unroll control
1975-#if defined( _MSC_VER ) && !defined(__clang__)
1976- #define STBIR_STREAMOUT_PTR( star ) star __restrict
1977- #define STBIR_NO_UNROLL( ptr ) __assume(ptr) // this oddly keeps msvc from unrolling a loop
1978- #if _MSC_VER >= 1900
1979- #define STBIR_NO_UNROLL_LOOP_START __pragma(loop( no_vector ))
1980- #else
1981- #define STBIR_NO_UNROLL_LOOP_START
1982- #endif
1983-#elif defined( __clang__ )
1984- #define STBIR_STREAMOUT_PTR( star ) star __restrict__
1985- #define STBIR_NO_UNROLL( ptr ) __asm__ (""::"r"(ptr))
1986- #if ( __clang_major__ >= 4 ) || ( ( __clang_major__ >= 3 ) && ( __clang_minor__ >= 5 ) )
1987- #define STBIR_NO_UNROLL_LOOP_START _Pragma("clang loop unroll(disable)") _Pragma("clang loop vectorize(disable)")
1988- #else
1989- #define STBIR_NO_UNROLL_LOOP_START
1990- #endif
1991-#elif defined( __GNUC__ )
1992- #define STBIR_STREAMOUT_PTR( star ) star __restrict__
1993- #define STBIR_NO_UNROLL( ptr ) __asm__ (""::"r"(ptr))
1994- #if __GNUC__ >= 14
1995- #define STBIR_NO_UNROLL_LOOP_START _Pragma("GCC unroll 0") _Pragma("GCC novector")
1996- #else
1997- #define STBIR_NO_UNROLL_LOOP_START
1998- #endif
1999- #define STBIR_NO_UNROLL_LOOP_START_INF_FOR
2000+#if defined(_MSC_VER) && !defined(__clang__)
2001+#define STBIR_STREAMOUT_PTR(star) star __restrict
2002+#define STBIR_NO_UNROLL(ptr) \
2003+ __assume(ptr) // this oddly keeps msvc from unrolling a loop
2004+#if _MSC_VER >= 1900
2005+#define STBIR_NO_UNROLL_LOOP_START __pragma(loop(no_vector))
2006+#else
2007+#define STBIR_NO_UNROLL_LOOP_START
2008+#endif
2009+#elif defined(__clang__)
2010+#define STBIR_STREAMOUT_PTR(star) star __restrict__
2011+#define STBIR_NO_UNROLL(ptr) __asm__("" ::"r"(ptr))
2012+#if (__clang_major__ >= 4) || ((__clang_major__ >= 3) && (__clang_minor__ >= 5))
2013+#define STBIR_NO_UNROLL_LOOP_START \
2014+ _Pragma("clang loop unroll(disable)") \
2015+ _Pragma("clang loop vectorize(disable)")
2016+#else
2017+#define STBIR_NO_UNROLL_LOOP_START
2018+#endif
2019+#elif defined(__GNUC__)
2020+#define STBIR_STREAMOUT_PTR(star) star __restrict__
2021+#define STBIR_NO_UNROLL(ptr) __asm__("" ::"r"(ptr))
2022+#if __GNUC__ >= 14
2023+#define STBIR_NO_UNROLL_LOOP_START \
2024+ _Pragma("GCC unroll 0") _Pragma("GCC novector")
2025 #else
2026- #define STBIR_STREAMOUT_PTR( star ) star
2027- #define STBIR_NO_UNROLL( ptr )
2028- #define STBIR_NO_UNROLL_LOOP_START
2029+#define STBIR_NO_UNROLL_LOOP_START
2030+#endif
2031+#define STBIR_NO_UNROLL_LOOP_START_INF_FOR
2032+#else
2033+#define STBIR_STREAMOUT_PTR(star) star
2034+#define STBIR_NO_UNROLL(ptr)
2035+#define STBIR_NO_UNROLL_LOOP_START
2036 #endif
2037
2038 #ifndef STBIR_NO_UNROLL_LOOP_START_INF_FOR
2039@@ -1299,1463 +1455,1914 @@ static stbir__inline stbir_uint8 stbir__linear_to_srgb_uchar(float in)
2040 #else // STBIR_SIMD
2041
2042 #ifdef STBIR_SSE2
2043- #include <emmintrin.h>
2044-
2045- #define stbir__simdf __m128
2046- #define stbir__simdi __m128i
2047-
2048- #define stbir_simdi_castf( reg ) _mm_castps_si128(reg)
2049- #define stbir_simdf_casti( reg ) _mm_castsi128_ps(reg)
2050-
2051- #define stbir__simdf_load( reg, ptr ) (reg) = _mm_loadu_ps( (float const*)(ptr) )
2052- #define stbir__simdi_load( reg, ptr ) (reg) = _mm_loadu_si128 ( (stbir__simdi const*)(ptr) )
2053- #define stbir__simdf_load1( out, ptr ) (out) = _mm_load_ss( (float const*)(ptr) ) // top values can be random (not denormal or nan for perf)
2054- #define stbir__simdi_load1( out, ptr ) (out) = _mm_castps_si128( _mm_load_ss( (float const*)(ptr) ))
2055- #define stbir__simdf_load1z( out, ptr ) (out) = _mm_load_ss( (float const*)(ptr) ) // top values must be zero
2056- #define stbir__simdf_frep4( fvar ) _mm_set_ps1( fvar )
2057- #define stbir__simdf_load1frep4( out, fvar ) (out) = _mm_set_ps1( fvar )
2058- #define stbir__simdf_load2( out, ptr ) (out) = _mm_castsi128_ps( _mm_loadl_epi64( (__m128i*)(ptr)) ) // top values can be random (not denormal or nan for perf)
2059- #define stbir__simdf_load2z( out, ptr ) (out) = _mm_castsi128_ps( _mm_loadl_epi64( (__m128i*)(ptr)) ) // top values must be zero
2060- #define stbir__simdf_load2hmerge( out, reg, ptr ) (out) = _mm_castpd_ps(_mm_loadh_pd( _mm_castps_pd(reg), (double*)(ptr) ))
2061-
2062- #define stbir__simdf_zeroP() _mm_setzero_ps()
2063- #define stbir__simdf_zero( reg ) (reg) = _mm_setzero_ps()
2064-
2065- #define stbir__simdf_store( ptr, reg ) _mm_storeu_ps( (float*)(ptr), reg )
2066- #define stbir__simdf_store1( ptr, reg ) _mm_store_ss( (float*)(ptr), reg )
2067- #define stbir__simdf_store2( ptr, reg ) _mm_storel_epi64( (__m128i*)(ptr), _mm_castps_si128(reg) )
2068- #define stbir__simdf_store2h( ptr, reg ) _mm_storeh_pd( (double*)(ptr), _mm_castps_pd(reg) )
2069-
2070- #define stbir__simdi_store( ptr, reg ) _mm_storeu_si128( (__m128i*)(ptr), reg )
2071- #define stbir__simdi_store1( ptr, reg ) _mm_store_ss( (float*)(ptr), _mm_castsi128_ps(reg) )
2072- #define stbir__simdi_store2( ptr, reg ) _mm_storel_epi64( (__m128i*)(ptr), (reg) )
2073-
2074- #define stbir__prefetch( ptr ) _mm_prefetch((char*)(ptr), _MM_HINT_T0 )
2075-
2076- #define stbir__simdi_expand_u8_to_u32(out0,out1,out2,out3,ireg) \
2077- { \
2078- stbir__simdi zero = _mm_setzero_si128(); \
2079- out2 = _mm_unpacklo_epi8( ireg, zero ); \
2080- out3 = _mm_unpackhi_epi8( ireg, zero ); \
2081- out0 = _mm_unpacklo_epi16( out2, zero ); \
2082- out1 = _mm_unpackhi_epi16( out2, zero ); \
2083- out2 = _mm_unpacklo_epi16( out3, zero ); \
2084- out3 = _mm_unpackhi_epi16( out3, zero ); \
2085- }
2086-
2087-#define stbir__simdi_expand_u8_to_1u32(out,ireg) \
2088- { \
2089- stbir__simdi zero = _mm_setzero_si128(); \
2090- out = _mm_unpacklo_epi8( ireg, zero ); \
2091- out = _mm_unpacklo_epi16( out, zero ); \
2092- }
2093-
2094- #define stbir__simdi_expand_u16_to_u32(out0,out1,ireg) \
2095- { \
2096- stbir__simdi zero = _mm_setzero_si128(); \
2097- out0 = _mm_unpacklo_epi16( ireg, zero ); \
2098- out1 = _mm_unpackhi_epi16( ireg, zero ); \
2099- }
2100-
2101- #define stbir__simdf_convert_float_to_i32( i, f ) (i) = _mm_cvttps_epi32(f)
2102- #define stbir__simdf_convert_float_to_int( f ) _mm_cvtt_ss2si(f)
2103- #define stbir__simdf_convert_float_to_uint8( f ) ((unsigned char)_mm_cvtsi128_si32(_mm_cvttps_epi32(_mm_max_ps(_mm_min_ps(f,STBIR__CONSTF(STBIR_max_uint8_as_float)),_mm_setzero_ps()))))
2104- #define stbir__simdf_convert_float_to_short( f ) ((unsigned short)_mm_cvtsi128_si32(_mm_cvttps_epi32(_mm_max_ps(_mm_min_ps(f,STBIR__CONSTF(STBIR_max_uint16_as_float)),_mm_setzero_ps()))))
2105-
2106- #define stbir__simdi_to_int( i ) _mm_cvtsi128_si32(i)
2107- #define stbir__simdi_convert_i32_to_float(out, ireg) (out) = _mm_cvtepi32_ps( ireg )
2108- #define stbir__simdf_add( out, reg0, reg1 ) (out) = _mm_add_ps( reg0, reg1 )
2109- #define stbir__simdf_mult( out, reg0, reg1 ) (out) = _mm_mul_ps( reg0, reg1 )
2110- #define stbir__simdf_mult_mem( out, reg, ptr ) (out) = _mm_mul_ps( reg, _mm_loadu_ps( (float const*)(ptr) ) )
2111- #define stbir__simdf_mult1_mem( out, reg, ptr ) (out) = _mm_mul_ss( reg, _mm_load_ss( (float const*)(ptr) ) )
2112- #define stbir__simdf_add_mem( out, reg, ptr ) (out) = _mm_add_ps( reg, _mm_loadu_ps( (float const*)(ptr) ) )
2113- #define stbir__simdf_add1_mem( out, reg, ptr ) (out) = _mm_add_ss( reg, _mm_load_ss( (float const*)(ptr) ) )
2114-
2115- #ifdef STBIR_USE_FMA // not on by default to maintain bit identical simd to non-simd
2116- #include <immintrin.h>
2117- #define stbir__simdf_madd( out, add, mul1, mul2 ) (out) = _mm_fmadd_ps( mul1, mul2, add )
2118- #define stbir__simdf_madd1( out, add, mul1, mul2 ) (out) = _mm_fmadd_ss( mul1, mul2, add )
2119- #define stbir__simdf_madd_mem( out, add, mul, ptr ) (out) = _mm_fmadd_ps( mul, _mm_loadu_ps( (float const*)(ptr) ), add )
2120- #define stbir__simdf_madd1_mem( out, add, mul, ptr ) (out) = _mm_fmadd_ss( mul, _mm_load_ss( (float const*)(ptr) ), add )
2121- #else
2122- #define stbir__simdf_madd( out, add, mul1, mul2 ) (out) = _mm_add_ps( add, _mm_mul_ps( mul1, mul2 ) )
2123- #define stbir__simdf_madd1( out, add, mul1, mul2 ) (out) = _mm_add_ss( add, _mm_mul_ss( mul1, mul2 ) )
2124- #define stbir__simdf_madd_mem( out, add, mul, ptr ) (out) = _mm_add_ps( add, _mm_mul_ps( mul, _mm_loadu_ps( (float const*)(ptr) ) ) )
2125- #define stbir__simdf_madd1_mem( out, add, mul, ptr ) (out) = _mm_add_ss( add, _mm_mul_ss( mul, _mm_load_ss( (float const*)(ptr) ) ) )
2126- #endif
2127-
2128- #define stbir__simdf_add1( out, reg0, reg1 ) (out) = _mm_add_ss( reg0, reg1 )
2129- #define stbir__simdf_mult1( out, reg0, reg1 ) (out) = _mm_mul_ss( reg0, reg1 )
2130-
2131- #define stbir__simdf_and( out, reg0, reg1 ) (out) = _mm_and_ps( reg0, reg1 )
2132- #define stbir__simdf_or( out, reg0, reg1 ) (out) = _mm_or_ps( reg0, reg1 )
2133-
2134- #define stbir__simdf_min( out, reg0, reg1 ) (out) = _mm_min_ps( reg0, reg1 )
2135- #define stbir__simdf_max( out, reg0, reg1 ) (out) = _mm_max_ps( reg0, reg1 )
2136- #define stbir__simdf_min1( out, reg0, reg1 ) (out) = _mm_min_ss( reg0, reg1 )
2137- #define stbir__simdf_max1( out, reg0, reg1 ) (out) = _mm_max_ss( reg0, reg1 )
2138-
2139- #define stbir__simdf_0123ABCDto3ABx( out, reg0, reg1 ) (out)=_mm_castsi128_ps( _mm_shuffle_epi32( _mm_castps_si128( _mm_shuffle_ps( reg1,reg0, (0<<0) + (1<<2) + (2<<4) + (3<<6) )), (3<<0) + (0<<2) + (1<<4) + (2<<6) ) )
2140- #define stbir__simdf_0123ABCDto23Ax( out, reg0, reg1 ) (out)=_mm_castsi128_ps( _mm_shuffle_epi32( _mm_castps_si128( _mm_shuffle_ps( reg1,reg0, (0<<0) + (1<<2) + (2<<4) + (3<<6) )), (2<<0) + (3<<2) + (0<<4) + (1<<6) ) )
2141-
2142- static const stbir__simdf STBIR_zeroones = { 0.0f,1.0f,0.0f,1.0f };
2143- static const stbir__simdf STBIR_onezeros = { 1.0f,0.0f,1.0f,0.0f };
2144- #define stbir__simdf_aaa1( out, alp, ones ) (out)=_mm_castsi128_ps( _mm_shuffle_epi32( _mm_castps_si128( _mm_movehl_ps( ones, alp ) ), (1<<0) + (1<<2) + (1<<4) + (2<<6) ) )
2145- #define stbir__simdf_1aaa( out, alp, ones ) (out)=_mm_castsi128_ps( _mm_shuffle_epi32( _mm_castps_si128( _mm_movelh_ps( ones, alp ) ), (0<<0) + (2<<2) + (2<<4) + (2<<6) ) )
2146- #define stbir__simdf_a1a1( out, alp, ones) (out) = _mm_or_ps( _mm_castsi128_ps( _mm_srli_epi64( _mm_castps_si128(alp), 32 ) ), STBIR_zeroones )
2147- #define stbir__simdf_1a1a( out, alp, ones) (out) = _mm_or_ps( _mm_castsi128_ps( _mm_slli_epi64( _mm_castps_si128(alp), 32 ) ), STBIR_onezeros )
2148-
2149- #define stbir__simdf_swiz( reg, one, two, three, four ) _mm_castsi128_ps( _mm_shuffle_epi32( _mm_castps_si128( reg ), (one<<0) + (two<<2) + (three<<4) + (four<<6) ) )
2150-
2151- #define stbir__simdi_and( out, reg0, reg1 ) (out) = _mm_and_si128( reg0, reg1 )
2152- #define stbir__simdi_or( out, reg0, reg1 ) (out) = _mm_or_si128( reg0, reg1 )
2153- #define stbir__simdi_16madd( out, reg0, reg1 ) (out) = _mm_madd_epi16( reg0, reg1 )
2154-
2155- #define stbir__simdf_pack_to_8bytes(out,aa,bb) \
2156- { \
2157- stbir__simdf af,bf; \
2158- stbir__simdi a,b; \
2159- af = _mm_min_ps( aa, STBIR_max_uint8_as_float ); \
2160- bf = _mm_min_ps( bb, STBIR_max_uint8_as_float ); \
2161- af = _mm_max_ps( af, _mm_setzero_ps() ); \
2162- bf = _mm_max_ps( bf, _mm_setzero_ps() ); \
2163- a = _mm_cvttps_epi32( af ); \
2164- b = _mm_cvttps_epi32( bf ); \
2165- a = _mm_packs_epi32( a, b ); \
2166- out = _mm_packus_epi16( a, a ); \
2167- }
2168-
2169- #define stbir__simdf_load4_transposed( o0, o1, o2, o3, ptr ) \
2170- stbir__simdf_load( o0, (ptr) ); \
2171- stbir__simdf_load( o1, (ptr)+4 ); \
2172- stbir__simdf_load( o2, (ptr)+8 ); \
2173- stbir__simdf_load( o3, (ptr)+12 ); \
2174- { \
2175- __m128 tmp0, tmp1, tmp2, tmp3; \
2176- tmp0 = _mm_unpacklo_ps(o0, o1); \
2177- tmp2 = _mm_unpacklo_ps(o2, o3); \
2178- tmp1 = _mm_unpackhi_ps(o0, o1); \
2179- tmp3 = _mm_unpackhi_ps(o2, o3); \
2180- o0 = _mm_movelh_ps(tmp0, tmp2); \
2181- o1 = _mm_movehl_ps(tmp2, tmp0); \
2182- o2 = _mm_movelh_ps(tmp1, tmp3); \
2183- o3 = _mm_movehl_ps(tmp3, tmp1); \
2184- }
2185-
2186- #define stbir__interleave_pack_and_store_16_u8( ptr, r0, r1, r2, r3 ) \
2187- r0 = _mm_packs_epi32( r0, r1 ); \
2188- r2 = _mm_packs_epi32( r2, r3 ); \
2189- r1 = _mm_unpacklo_epi16( r0, r2 ); \
2190- r3 = _mm_unpackhi_epi16( r0, r2 ); \
2191- r0 = _mm_unpacklo_epi16( r1, r3 ); \
2192- r2 = _mm_unpackhi_epi16( r1, r3 ); \
2193- r0 = _mm_packus_epi16( r0, r2 ); \
2194- stbir__simdi_store( ptr, r0 ); \
2195-
2196- #define stbir__simdi_32shr( out, reg, imm ) out = _mm_srli_epi32( reg, imm )
2197-
2198- #if defined(_MSC_VER) && !defined(__clang__)
2199- // msvc inits with 8 bytes
2200- #define STBIR__CONST_32_TO_8( v ) (char)(unsigned char)((v)&255),(char)(unsigned char)(((v)>>8)&255),(char)(unsigned char)(((v)>>16)&255),(char)(unsigned char)(((v)>>24)&255)
2201- #define STBIR__CONST_4_32i( v ) STBIR__CONST_32_TO_8( v ), STBIR__CONST_32_TO_8( v ), STBIR__CONST_32_TO_8( v ), STBIR__CONST_32_TO_8( v )
2202- #define STBIR__CONST_4d_32i( v0, v1, v2, v3 ) STBIR__CONST_32_TO_8( v0 ), STBIR__CONST_32_TO_8( v1 ), STBIR__CONST_32_TO_8( v2 ), STBIR__CONST_32_TO_8( v3 )
2203- #else
2204- // everything else inits with long long's
2205- #define STBIR__CONST_4_32i( v ) (long long)((((stbir_uint64)(stbir_uint32)(v))<<32)|((stbir_uint64)(stbir_uint32)(v))),(long long)((((stbir_uint64)(stbir_uint32)(v))<<32)|((stbir_uint64)(stbir_uint32)(v)))
2206- #define STBIR__CONST_4d_32i( v0, v1, v2, v3 ) (long long)((((stbir_uint64)(stbir_uint32)(v1))<<32)|((stbir_uint64)(stbir_uint32)(v0))),(long long)((((stbir_uint64)(stbir_uint32)(v3))<<32)|((stbir_uint64)(stbir_uint32)(v2)))
2207- #endif
2208-
2209- #define STBIR__SIMDF_CONST(var, x) stbir__simdf var = { x, x, x, x }
2210- #define STBIR__SIMDI_CONST(var, x) stbir__simdi var = { STBIR__CONST_4_32i(x) }
2211- #define STBIR__CONSTF(var) (var)
2212- #define STBIR__CONSTI(var) (var)
2213-
2214- #if defined(STBIR_AVX) || defined(__SSE4_1__)
2215- #include <smmintrin.h>
2216- #define stbir__simdf_pack_to_8words(out,reg0,reg1) out = _mm_packus_epi32(_mm_cvttps_epi32(_mm_max_ps(_mm_min_ps(reg0,STBIR__CONSTF(STBIR_max_uint16_as_float)),_mm_setzero_ps())), _mm_cvttps_epi32(_mm_max_ps(_mm_min_ps(reg1,STBIR__CONSTF(STBIR_max_uint16_as_float)),_mm_setzero_ps())))
2217- #else
2218- static STBIR__SIMDI_CONST(stbir__s32_32768, 32768);
2219- static STBIR__SIMDI_CONST(stbir__s16_32768, ((32768<<16)|32768));
2220-
2221- #define stbir__simdf_pack_to_8words(out,reg0,reg1) \
2222- { \
2223- stbir__simdi tmp0,tmp1; \
2224- tmp0 = _mm_cvttps_epi32(_mm_max_ps(_mm_min_ps(reg0,STBIR__CONSTF(STBIR_max_uint16_as_float)),_mm_setzero_ps())); \
2225- tmp1 = _mm_cvttps_epi32(_mm_max_ps(_mm_min_ps(reg1,STBIR__CONSTF(STBIR_max_uint16_as_float)),_mm_setzero_ps())); \
2226- tmp0 = _mm_sub_epi32( tmp0, stbir__s32_32768 ); \
2227- tmp1 = _mm_sub_epi32( tmp1, stbir__s32_32768 ); \
2228- out = _mm_packs_epi32( tmp0, tmp1 ); \
2229- out = _mm_sub_epi16( out, stbir__s16_32768 ); \
2230- }
2231-
2232- #endif
2233-
2234- #define STBIR_SIMD
2235-
2236- // if we detect AVX, set the simd8 defines
2237- #ifdef STBIR_AVX
2238- #include <immintrin.h>
2239- #define STBIR_SIMD8
2240- #define stbir__simdf8 __m256
2241- #define stbir__simdi8 __m256i
2242- #define stbir__simdf8_load( out, ptr ) (out) = _mm256_loadu_ps( (float const *)(ptr) )
2243- #define stbir__simdi8_load( out, ptr ) (out) = _mm256_loadu_si256( (__m256i const *)(ptr) )
2244- #define stbir__simdf8_mult( out, a, b ) (out) = _mm256_mul_ps( (a), (b) )
2245- #define stbir__simdf8_store( ptr, out ) _mm256_storeu_ps( (float*)(ptr), out )
2246- #define stbir__simdi8_store( ptr, reg ) _mm256_storeu_si256( (__m256i*)(ptr), reg )
2247- #define stbir__simdf8_frep8( fval ) _mm256_set1_ps( fval )
2248-
2249- #define stbir__simdf8_min( out, reg0, reg1 ) (out) = _mm256_min_ps( reg0, reg1 )
2250- #define stbir__simdf8_max( out, reg0, reg1 ) (out) = _mm256_max_ps( reg0, reg1 )
2251-
2252- #define stbir__simdf8_add4halves( out, bot4, top8 ) (out) = _mm_add_ps( bot4, _mm256_extractf128_ps( top8, 1 ) )
2253- #define stbir__simdf8_mult_mem( out, reg, ptr ) (out) = _mm256_mul_ps( reg, _mm256_loadu_ps( (float const*)(ptr) ) )
2254- #define stbir__simdf8_add_mem( out, reg, ptr ) (out) = _mm256_add_ps( reg, _mm256_loadu_ps( (float const*)(ptr) ) )
2255- #define stbir__simdf8_add( out, a, b ) (out) = _mm256_add_ps( a, b )
2256- #define stbir__simdf8_load1b( out, ptr ) (out) = _mm256_broadcast_ss( ptr )
2257- #define stbir__simdf_load1rep4( out, ptr ) (out) = _mm_broadcast_ss( ptr ) // avx load instruction
2258-
2259- #define stbir__simdi8_convert_i32_to_float(out, ireg) (out) = _mm256_cvtepi32_ps( ireg )
2260- #define stbir__simdf8_convert_float_to_i32( i, f ) (i) = _mm256_cvttps_epi32(f)
2261-
2262- #define stbir__simdf8_bot4s( out, a, b ) (out) = _mm256_permute2f128_ps(a,b, (0<<0)+(2<<4) )
2263- #define stbir__simdf8_top4s( out, a, b ) (out) = _mm256_permute2f128_ps(a,b, (1<<0)+(3<<4) )
2264-
2265- #define stbir__simdf8_gettop4( reg ) _mm256_extractf128_ps(reg,1)
2266-
2267- #ifdef STBIR_AVX2
2268-
2269- #define stbir__simdi8_expand_u8_to_u32(out0,out1,ireg) \
2270- { \
2271- stbir__simdi8 a, zero =_mm256_setzero_si256();\
2272- a = _mm256_permute4x64_epi64( _mm256_unpacklo_epi8( _mm256_permute4x64_epi64(_mm256_castsi128_si256(ireg),(0<<0)+(2<<2)+(1<<4)+(3<<6)), zero ),(0<<0)+(2<<2)+(1<<4)+(3<<6)); \
2273- out0 = _mm256_unpacklo_epi16( a, zero ); \
2274- out1 = _mm256_unpackhi_epi16( a, zero ); \
2275- }
2276-
2277- #define stbir__simdf8_pack_to_16bytes(out,aa,bb) \
2278- { \
2279- stbir__simdi8 t; \
2280- stbir__simdf8 af,bf; \
2281- stbir__simdi8 a,b; \
2282- af = _mm256_min_ps( aa, STBIR_max_uint8_as_floatX ); \
2283- bf = _mm256_min_ps( bb, STBIR_max_uint8_as_floatX ); \
2284- af = _mm256_max_ps( af, _mm256_setzero_ps() ); \
2285- bf = _mm256_max_ps( bf, _mm256_setzero_ps() ); \
2286- a = _mm256_cvttps_epi32( af ); \
2287- b = _mm256_cvttps_epi32( bf ); \
2288- t = _mm256_permute4x64_epi64( _mm256_packs_epi32( a, b ), (0<<0)+(2<<2)+(1<<4)+(3<<6) ); \
2289- out = _mm256_castsi256_si128( _mm256_permute4x64_epi64( _mm256_packus_epi16( t, t ), (0<<0)+(2<<2)+(1<<4)+(3<<6) ) ); \
2290- }
2291-
2292- #define stbir__simdi8_expand_u16_to_u32(out,ireg) out = _mm256_unpacklo_epi16( _mm256_permute4x64_epi64(_mm256_castsi128_si256(ireg),(0<<0)+(2<<2)+(1<<4)+(3<<6)), _mm256_setzero_si256() );
2293-
2294- #define stbir__simdf8_pack_to_16words(out,aa,bb) \
2295- { \
2296- stbir__simdf8 af,bf; \
2297- stbir__simdi8 a,b; \
2298- af = _mm256_min_ps( aa, STBIR_max_uint16_as_floatX ); \
2299- bf = _mm256_min_ps( bb, STBIR_max_uint16_as_floatX ); \
2300- af = _mm256_max_ps( af, _mm256_setzero_ps() ); \
2301- bf = _mm256_max_ps( bf, _mm256_setzero_ps() ); \
2302- a = _mm256_cvttps_epi32( af ); \
2303- b = _mm256_cvttps_epi32( bf ); \
2304- (out) = _mm256_permute4x64_epi64( _mm256_packus_epi32(a, b), (0<<0)+(2<<2)+(1<<4)+(3<<6) ); \
2305- }
2306-
2307- #else
2308-
2309- #define stbir__simdi8_expand_u8_to_u32(out0,out1,ireg) \
2310- { \
2311- stbir__simdi a,zero = _mm_setzero_si128(); \
2312- a = _mm_unpacklo_epi8( ireg, zero ); \
2313- out0 = _mm256_setr_m128i( _mm_unpacklo_epi16( a, zero ), _mm_unpackhi_epi16( a, zero ) ); \
2314- a = _mm_unpackhi_epi8( ireg, zero ); \
2315- out1 = _mm256_setr_m128i( _mm_unpacklo_epi16( a, zero ), _mm_unpackhi_epi16( a, zero ) ); \
2316- }
2317-
2318- #define stbir__simdf8_pack_to_16bytes(out,aa,bb) \
2319- { \
2320- stbir__simdi t; \
2321- stbir__simdf8 af,bf; \
2322- stbir__simdi8 a,b; \
2323- af = _mm256_min_ps( aa, STBIR_max_uint8_as_floatX ); \
2324- bf = _mm256_min_ps( bb, STBIR_max_uint8_as_floatX ); \
2325- af = _mm256_max_ps( af, _mm256_setzero_ps() ); \
2326- bf = _mm256_max_ps( bf, _mm256_setzero_ps() ); \
2327- a = _mm256_cvttps_epi32( af ); \
2328- b = _mm256_cvttps_epi32( bf ); \
2329- out = _mm_packs_epi32( _mm256_castsi256_si128(a), _mm256_extractf128_si256( a, 1 ) ); \
2330- out = _mm_packus_epi16( out, out ); \
2331- t = _mm_packs_epi32( _mm256_castsi256_si128(b), _mm256_extractf128_si256( b, 1 ) ); \
2332- t = _mm_packus_epi16( t, t ); \
2333- out = _mm_castps_si128( _mm_shuffle_ps( _mm_castsi128_ps(out), _mm_castsi128_ps(t), (0<<0)+(1<<2)+(0<<4)+(1<<6) ) ); \
2334- }
2335-
2336- #define stbir__simdi8_expand_u16_to_u32(out,ireg) \
2337- { \
2338- stbir__simdi a,b,zero = _mm_setzero_si128(); \
2339- a = _mm_unpacklo_epi16( ireg, zero ); \
2340- b = _mm_unpackhi_epi16( ireg, zero ); \
2341- out = _mm256_insertf128_si256( _mm256_castsi128_si256( a ), b, 1 ); \
2342- }
2343-
2344- #define stbir__simdf8_pack_to_16words(out,aa,bb) \
2345- { \
2346- stbir__simdi t0,t1; \
2347- stbir__simdf8 af,bf; \
2348- stbir__simdi8 a,b; \
2349- af = _mm256_min_ps( aa, STBIR_max_uint16_as_floatX ); \
2350- bf = _mm256_min_ps( bb, STBIR_max_uint16_as_floatX ); \
2351- af = _mm256_max_ps( af, _mm256_setzero_ps() ); \
2352- bf = _mm256_max_ps( bf, _mm256_setzero_ps() ); \
2353- a = _mm256_cvttps_epi32( af ); \
2354- b = _mm256_cvttps_epi32( bf ); \
2355- t0 = _mm_packus_epi32( _mm256_castsi256_si128(a), _mm256_extractf128_si256( a, 1 ) ); \
2356- t1 = _mm_packus_epi32( _mm256_castsi256_si128(b), _mm256_extractf128_si256( b, 1 ) ); \
2357- out = _mm256_setr_m128i( t0, t1 ); \
2358- }
2359-
2360- #endif
2361-
2362- static __m256i stbir_00001111 = { STBIR__CONST_4d_32i( 0, 0, 0, 0 ), STBIR__CONST_4d_32i( 1, 1, 1, 1 ) };
2363- #define stbir__simdf8_0123to00001111( out, in ) (out) = _mm256_permutevar_ps ( in, stbir_00001111 )
2364-
2365- static __m256i stbir_22223333 = { STBIR__CONST_4d_32i( 2, 2, 2, 2 ), STBIR__CONST_4d_32i( 3, 3, 3, 3 ) };
2366- #define stbir__simdf8_0123to22223333( out, in ) (out) = _mm256_permutevar_ps ( in, stbir_22223333 )
2367-
2368- #define stbir__simdf8_0123to2222( out, in ) (out) = stbir__simdf_swiz(_mm256_castps256_ps128(in), 2,2,2,2 )
2369-
2370- #define stbir__simdf8_load4b( out, ptr ) (out) = _mm256_broadcast_ps( (__m128 const *)(ptr) )
2371-
2372- static __m256i stbir_00112233 = { STBIR__CONST_4d_32i( 0, 0, 1, 1 ), STBIR__CONST_4d_32i( 2, 2, 3, 3 ) };
2373- #define stbir__simdf8_0123to00112233( out, in ) (out) = _mm256_permutevar_ps ( in, stbir_00112233 )
2374- #define stbir__simdf8_add4( out, a8, b ) (out) = _mm256_add_ps( a8, _mm256_castps128_ps256( b ) )
2375-
2376- static __m256i stbir_load6 = { STBIR__CONST_4_32i( 0x80000000 ), STBIR__CONST_4d_32i( 0x80000000, 0x80000000, 0, 0 ) };
2377- #define stbir__simdf8_load6z( out, ptr ) (out) = _mm256_maskload_ps( ptr, stbir_load6 )
2378-
2379- #define stbir__simdf8_0123to00000000( out, in ) (out) = _mm256_shuffle_ps ( in, in, (0<<0)+(0<<2)+(0<<4)+(0<<6) )
2380- #define stbir__simdf8_0123to11111111( out, in ) (out) = _mm256_shuffle_ps ( in, in, (1<<0)+(1<<2)+(1<<4)+(1<<6) )
2381- #define stbir__simdf8_0123to22222222( out, in ) (out) = _mm256_shuffle_ps ( in, in, (2<<0)+(2<<2)+(2<<4)+(2<<6) )
2382- #define stbir__simdf8_0123to33333333( out, in ) (out) = _mm256_shuffle_ps ( in, in, (3<<0)+(3<<2)+(3<<4)+(3<<6) )
2383- #define stbir__simdf8_0123to21032103( out, in ) (out) = _mm256_shuffle_ps ( in, in, (2<<0)+(1<<2)+(0<<4)+(3<<6) )
2384- #define stbir__simdf8_0123to32103210( out, in ) (out) = _mm256_shuffle_ps ( in, in, (3<<0)+(2<<2)+(1<<4)+(0<<6) )
2385- #define stbir__simdf8_0123to12301230( out, in ) (out) = _mm256_shuffle_ps ( in, in, (1<<0)+(2<<2)+(3<<4)+(0<<6) )
2386- #define stbir__simdf8_0123to10321032( out, in ) (out) = _mm256_shuffle_ps ( in, in, (1<<0)+(0<<2)+(3<<4)+(2<<6) )
2387- #define stbir__simdf8_0123to30123012( out, in ) (out) = _mm256_shuffle_ps ( in, in, (3<<0)+(0<<2)+(1<<4)+(2<<6) )
2388-
2389- #define stbir__simdf8_0123to11331133( out, in ) (out) = _mm256_shuffle_ps ( in, in, (1<<0)+(1<<2)+(3<<4)+(3<<6) )
2390- #define stbir__simdf8_0123to00220022( out, in ) (out) = _mm256_shuffle_ps ( in, in, (0<<0)+(0<<2)+(2<<4)+(2<<6) )
2391-
2392- #define stbir__simdf8_aaa1( out, alp, ones ) (out) = _mm256_blend_ps( alp, ones, (1<<0)+(1<<1)+(1<<2)+(0<<3)+(1<<4)+(1<<5)+(1<<6)+(0<<7)); (out)=_mm256_shuffle_ps( out,out, (3<<0) + (3<<2) + (3<<4) + (0<<6) )
2393- #define stbir__simdf8_1aaa( out, alp, ones ) (out) = _mm256_blend_ps( alp, ones, (0<<0)+(1<<1)+(1<<2)+(1<<3)+(0<<4)+(1<<5)+(1<<6)+(1<<7)); (out)=_mm256_shuffle_ps( out,out, (1<<0) + (0<<2) + (0<<4) + (0<<6) )
2394- #define stbir__simdf8_a1a1( out, alp, ones) (out) = _mm256_blend_ps( alp, ones, (1<<0)+(0<<1)+(1<<2)+(0<<3)+(1<<4)+(0<<5)+(1<<6)+(0<<7)); (out)=_mm256_shuffle_ps( out,out, (1<<0) + (0<<2) + (3<<4) + (2<<6) )
2395- #define stbir__simdf8_1a1a( out, alp, ones) (out) = _mm256_blend_ps( alp, ones, (0<<0)+(1<<1)+(0<<2)+(1<<3)+(0<<4)+(1<<5)+(0<<6)+(1<<7)); (out)=_mm256_shuffle_ps( out,out, (1<<0) + (0<<2) + (3<<4) + (2<<6) )
2396-
2397- #define stbir__simdf8_zero( reg ) (reg) = _mm256_setzero_ps()
2398-
2399- #ifdef STBIR_USE_FMA // not on by default to maintain bit identical simd to non-simd
2400- #define stbir__simdf8_madd( out, add, mul1, mul2 ) (out) = _mm256_fmadd_ps( mul1, mul2, add )
2401- #define stbir__simdf8_madd_mem( out, add, mul, ptr ) (out) = _mm256_fmadd_ps( mul, _mm256_loadu_ps( (float const*)(ptr) ), add )
2402- #define stbir__simdf8_madd_mem4( out, add, mul, ptr )(out) = _mm256_fmadd_ps( _mm256_setr_m128( mul, _mm_setzero_ps() ), _mm256_setr_m128( _mm_loadu_ps( (float const*)(ptr) ), _mm_setzero_ps() ), add )
2403- #else
2404- #define stbir__simdf8_madd( out, add, mul1, mul2 ) (out) = _mm256_add_ps( add, _mm256_mul_ps( mul1, mul2 ) )
2405- #define stbir__simdf8_madd_mem( out, add, mul, ptr ) (out) = _mm256_add_ps( add, _mm256_mul_ps( mul, _mm256_loadu_ps( (float const*)(ptr) ) ) )
2406- #define stbir__simdf8_madd_mem4( out, add, mul, ptr ) (out) = _mm256_add_ps( add, _mm256_setr_m128( _mm_mul_ps( mul, _mm_loadu_ps( (float const*)(ptr) ) ), _mm_setzero_ps() ) )
2407- #endif
2408- #define stbir__if_simdf8_cast_to_simdf4( val ) _mm256_castps256_ps128( val )
2409-
2410- #endif
2411-
2412- #ifdef STBIR_FLOORF
2413- #undef STBIR_FLOORF
2414- #endif
2415- #define STBIR_FLOORF stbir_simd_floorf
2416- static stbir__inline float stbir_simd_floorf(float x) // martins floorf
2417- {
2418- #if defined(STBIR_AVX) || defined(__SSE4_1__) || defined(STBIR_SSE41)
2419- __m128 t = _mm_set_ss(x);
2420- return _mm_cvtss_f32( _mm_floor_ss(t, t) );
2421- #else
2422- __m128 f = _mm_set_ss(x);
2423- __m128 t = _mm_cvtepi32_ps(_mm_cvttps_epi32(f));
2424- __m128 r = _mm_add_ss(t, _mm_and_ps(_mm_cmplt_ss(f, t), _mm_set_ss(-1.0f)));
2425- return _mm_cvtss_f32(r);
2426- #endif
2427- }
2428-
2429- #ifdef STBIR_CEILF
2430- #undef STBIR_CEILF
2431- #endif
2432- #define STBIR_CEILF stbir_simd_ceilf
2433- static stbir__inline float stbir_simd_ceilf(float x) // martins ceilf
2434- {
2435- #if defined(STBIR_AVX) || defined(__SSE4_1__) || defined(STBIR_SSE41)
2436- __m128 t = _mm_set_ss(x);
2437- return _mm_cvtss_f32( _mm_ceil_ss(t, t) );
2438- #else
2439- __m128 f = _mm_set_ss(x);
2440- __m128 t = _mm_cvtepi32_ps(_mm_cvttps_epi32(f));
2441- __m128 r = _mm_add_ss(t, _mm_and_ps(_mm_cmplt_ss(t, f), _mm_set_ss(1.0f)));
2442- return _mm_cvtss_f32(r);
2443- #endif
2444- }
2445+#include <emmintrin.h>
2446+
2447+#define stbir__simdf __m128
2448+#define stbir__simdi __m128i
2449+
2450+#define stbir_simdi_castf(reg) _mm_castps_si128(reg)
2451+#define stbir_simdf_casti(reg) _mm_castsi128_ps(reg)
2452+
2453+#define stbir__simdf_load(reg, ptr) (reg) = _mm_loadu_ps((float const *)(ptr))
2454+#define stbir__simdi_load(reg, ptr) \
2455+ (reg) = _mm_loadu_si128((stbir__simdi const *)(ptr))
2456+#define stbir__simdf_load1(out, ptr) \
2457+ (out) = _mm_load_ss((float const *)(ptr)) // top values can be random (not
2458+ // denormal or nan for perf)
2459+#define stbir__simdi_load1(out, ptr) \
2460+ (out) = _mm_castps_si128(_mm_load_ss((float const *)(ptr)))
2461+#define stbir__simdf_load1z(out, ptr) \
2462+ (out) = _mm_load_ss((float const *)(ptr)) // top values must be zero
2463+#define stbir__simdf_frep4(fvar) _mm_set_ps1(fvar)
2464+#define stbir__simdf_load1frep4(out, fvar) (out) = _mm_set_ps1(fvar)
2465+#define stbir__simdf_load2(out, ptr) \
2466+ (out) = _mm_castsi128_ps( \
2467+ _mm_loadl_epi64((__m128i *)(ptr))) // top values can be random (not
2468+ // denormal or nan for perf)
2469+#define stbir__simdf_load2z(out, ptr) \
2470+ (out) = _mm_castsi128_ps( \
2471+ _mm_loadl_epi64((__m128i *)(ptr))) // top values must be zero
2472+#define stbir__simdf_load2hmerge(out, reg, ptr) \
2473+ (out) = _mm_castpd_ps(_mm_loadh_pd(_mm_castps_pd(reg), (double *)(ptr)))
2474+
2475+#define stbir__simdf_zeroP() _mm_setzero_ps()
2476+#define stbir__simdf_zero(reg) (reg) = _mm_setzero_ps()
2477+
2478+#define stbir__simdf_store(ptr, reg) _mm_storeu_ps((float *)(ptr), reg)
2479+#define stbir__simdf_store1(ptr, reg) _mm_store_ss((float *)(ptr), reg)
2480+#define stbir__simdf_store2(ptr, reg) \
2481+ _mm_storel_epi64((__m128i *)(ptr), _mm_castps_si128(reg))
2482+#define stbir__simdf_store2h(ptr, reg) \
2483+ _mm_storeh_pd((double *)(ptr), _mm_castps_pd(reg))
2484+
2485+#define stbir__simdi_store(ptr, reg) _mm_storeu_si128((__m128i *)(ptr), reg)
2486+#define stbir__simdi_store1(ptr, reg) \
2487+ _mm_store_ss((float *)(ptr), _mm_castsi128_ps(reg))
2488+#define stbir__simdi_store2(ptr, reg) _mm_storel_epi64((__m128i *)(ptr), (reg))
2489+
2490+#define stbir__prefetch(ptr) _mm_prefetch((char *)(ptr), _MM_HINT_T0)
2491+
2492+#define stbir__simdi_expand_u8_to_u32(out0, out1, out2, out3, ireg) \
2493+ { \
2494+ stbir__simdi zero = _mm_setzero_si128(); \
2495+ out2 = _mm_unpacklo_epi8(ireg, zero); \
2496+ out3 = _mm_unpackhi_epi8(ireg, zero); \
2497+ out0 = _mm_unpacklo_epi16(out2, zero); \
2498+ out1 = _mm_unpackhi_epi16(out2, zero); \
2499+ out2 = _mm_unpacklo_epi16(out3, zero); \
2500+ out3 = _mm_unpackhi_epi16(out3, zero); \
2501+ }
2502+
2503+#define stbir__simdi_expand_u8_to_1u32(out, ireg) \
2504+ { \
2505+ stbir__simdi zero = _mm_setzero_si128(); \
2506+ out = _mm_unpacklo_epi8(ireg, zero); \
2507+ out = _mm_unpacklo_epi16(out, zero); \
2508+ }
2509+
2510+#define stbir__simdi_expand_u16_to_u32(out0, out1, ireg) \
2511+ { \
2512+ stbir__simdi zero = _mm_setzero_si128(); \
2513+ out0 = _mm_unpacklo_epi16(ireg, zero); \
2514+ out1 = _mm_unpackhi_epi16(ireg, zero); \
2515+ }
2516+
2517+#define stbir__simdf_convert_float_to_i32(i, f) (i) = _mm_cvttps_epi32(f)
2518+#define stbir__simdf_convert_float_to_int(f) _mm_cvtt_ss2si(f)
2519+#define stbir__simdf_convert_float_to_uint8(f) \
2520+ ((unsigned char)_mm_cvtsi128_si32(_mm_cvttps_epi32( \
2521+ _mm_max_ps(_mm_min_ps(f, STBIR__CONSTF(STBIR_max_uint8_as_float)), \
2522+ _mm_setzero_ps()))))
2523+#define stbir__simdf_convert_float_to_short(f) \
2524+ ((unsigned short)_mm_cvtsi128_si32(_mm_cvttps_epi32( \
2525+ _mm_max_ps(_mm_min_ps(f, STBIR__CONSTF(STBIR_max_uint16_as_float)), \
2526+ _mm_setzero_ps()))))
2527+
2528+#define stbir__simdi_to_int(i) _mm_cvtsi128_si32(i)
2529+#define stbir__simdi_convert_i32_to_float(out, ireg) \
2530+ (out) = _mm_cvtepi32_ps(ireg)
2531+#define stbir__simdf_add(out, reg0, reg1) (out) = _mm_add_ps(reg0, reg1)
2532+#define stbir__simdf_mult(out, reg0, reg1) (out) = _mm_mul_ps(reg0, reg1)
2533+#define stbir__simdf_mult_mem(out, reg, ptr) \
2534+ (out) = _mm_mul_ps(reg, _mm_loadu_ps((float const *)(ptr)))
2535+#define stbir__simdf_mult1_mem(out, reg, ptr) \
2536+ (out) = _mm_mul_ss(reg, _mm_load_ss((float const *)(ptr)))
2537+#define stbir__simdf_add_mem(out, reg, ptr) \
2538+ (out) = _mm_add_ps(reg, _mm_loadu_ps((float const *)(ptr)))
2539+#define stbir__simdf_add1_mem(out, reg, ptr) \
2540+ (out) = _mm_add_ss(reg, _mm_load_ss((float const *)(ptr)))
2541+
2542+#ifdef STBIR_USE_FMA // not on by default to maintain bit identical simd to
2543+ // non-simd
2544+#include <immintrin.h>
2545+#define stbir__simdf_madd(out, add, mul1, mul2) \
2546+ (out) = _mm_fmadd_ps(mul1, mul2, add)
2547+#define stbir__simdf_madd1(out, add, mul1, mul2) \
2548+ (out) = _mm_fmadd_ss(mul1, mul2, add)
2549+#define stbir__simdf_madd_mem(out, add, mul, ptr) \
2550+ (out) = _mm_fmadd_ps(mul, _mm_loadu_ps((float const *)(ptr)), add)
2551+#define stbir__simdf_madd1_mem(out, add, mul, ptr) \
2552+ (out) = _mm_fmadd_ss(mul, _mm_load_ss((float const *)(ptr)), add)
2553+#else
2554+#define stbir__simdf_madd(out, add, mul1, mul2) \
2555+ (out) = _mm_add_ps(add, _mm_mul_ps(mul1, mul2))
2556+#define stbir__simdf_madd1(out, add, mul1, mul2) \
2557+ (out) = _mm_add_ss(add, _mm_mul_ss(mul1, mul2))
2558+#define stbir__simdf_madd_mem(out, add, mul, ptr) \
2559+ (out) = _mm_add_ps(add, _mm_mul_ps(mul, _mm_loadu_ps((float const *)(ptr))))
2560+#define stbir__simdf_madd1_mem(out, add, mul, ptr) \
2561+ (out) = _mm_add_ss(add, _mm_mul_ss(mul, _mm_load_ss((float const *)(ptr))))
2562+#endif
2563+
2564+#define stbir__simdf_add1(out, reg0, reg1) (out) = _mm_add_ss(reg0, reg1)
2565+#define stbir__simdf_mult1(out, reg0, reg1) (out) = _mm_mul_ss(reg0, reg1)
2566+
2567+#define stbir__simdf_and(out, reg0, reg1) (out) = _mm_and_ps(reg0, reg1)
2568+#define stbir__simdf_or(out, reg0, reg1) (out) = _mm_or_ps(reg0, reg1)
2569+
2570+#define stbir__simdf_min(out, reg0, reg1) (out) = _mm_min_ps(reg0, reg1)
2571+#define stbir__simdf_max(out, reg0, reg1) (out) = _mm_max_ps(reg0, reg1)
2572+#define stbir__simdf_min1(out, reg0, reg1) (out) = _mm_min_ss(reg0, reg1)
2573+#define stbir__simdf_max1(out, reg0, reg1) (out) = _mm_max_ss(reg0, reg1)
2574+
2575+#define stbir__simdf_0123ABCDto3ABx(out, reg0, reg1) \
2576+ (out) = _mm_castsi128_ps(_mm_shuffle_epi32( \
2577+ _mm_castps_si128(_mm_shuffle_ps( \
2578+ reg1, reg0, (0 << 0) + (1 << 2) + (2 << 4) + (3 << 6))), \
2579+ (3 << 0) + (0 << 2) + (1 << 4) + (2 << 6)))
2580+#define stbir__simdf_0123ABCDto23Ax(out, reg0, reg1) \
2581+ (out) = _mm_castsi128_ps(_mm_shuffle_epi32( \
2582+ _mm_castps_si128(_mm_shuffle_ps( \
2583+ reg1, reg0, (0 << 0) + (1 << 2) + (2 << 4) + (3 << 6))), \
2584+ (2 << 0) + (3 << 2) + (0 << 4) + (1 << 6)))
2585+
2586+static const stbir__simdf STBIR_zeroones = {0.0f, 1.0f, 0.0f, 1.0f};
2587+static const stbir__simdf STBIR_onezeros = {1.0f, 0.0f, 1.0f, 0.0f};
2588+#define stbir__simdf_aaa1(out, alp, ones) \
2589+ (out) = _mm_castsi128_ps( \
2590+ _mm_shuffle_epi32(_mm_castps_si128(_mm_movehl_ps(ones, alp)), \
2591+ (1 << 0) + (1 << 2) + (1 << 4) + (2 << 6)))
2592+#define stbir__simdf_1aaa(out, alp, ones) \
2593+ (out) = _mm_castsi128_ps( \
2594+ _mm_shuffle_epi32(_mm_castps_si128(_mm_movelh_ps(ones, alp)), \
2595+ (0 << 0) + (2 << 2) + (2 << 4) + (2 << 6)))
2596+#define stbir__simdf_a1a1(out, alp, ones) \
2597+ (out) = \
2598+ _mm_or_ps(_mm_castsi128_ps(_mm_srli_epi64(_mm_castps_si128(alp), 32)), \
2599+ STBIR_zeroones)
2600+#define stbir__simdf_1a1a(out, alp, ones) \
2601+ (out) = \
2602+ _mm_or_ps(_mm_castsi128_ps(_mm_slli_epi64(_mm_castps_si128(alp), 32)), \
2603+ STBIR_onezeros)
2604+
2605+#define stbir__simdf_swiz(reg, one, two, three, four) \
2606+ _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(reg), \
2607+ (one << 0) + (two << 2) + \
2608+ (three << 4) + (four << 6)))
2609+
2610+#define stbir__simdi_and(out, reg0, reg1) (out) = _mm_and_si128(reg0, reg1)
2611+#define stbir__simdi_or(out, reg0, reg1) (out) = _mm_or_si128(reg0, reg1)
2612+#define stbir__simdi_16madd(out, reg0, reg1) (out) = _mm_madd_epi16(reg0, reg1)
2613+
2614+#define stbir__simdf_pack_to_8bytes(out, aa, bb) \
2615+ { \
2616+ stbir__simdf af, bf; \
2617+ stbir__simdi a, b; \
2618+ af = _mm_min_ps(aa, STBIR_max_uint8_as_float); \
2619+ bf = _mm_min_ps(bb, STBIR_max_uint8_as_float); \
2620+ af = _mm_max_ps(af, _mm_setzero_ps()); \
2621+ bf = _mm_max_ps(bf, _mm_setzero_ps()); \
2622+ a = _mm_cvttps_epi32(af); \
2623+ b = _mm_cvttps_epi32(bf); \
2624+ a = _mm_packs_epi32(a, b); \
2625+ out = _mm_packus_epi16(a, a); \
2626+ }
2627+
2628+#define stbir__simdf_load4_transposed(o0, o1, o2, o3, ptr) \
2629+ stbir__simdf_load(o0, (ptr)); \
2630+ stbir__simdf_load(o1, (ptr) + 4); \
2631+ stbir__simdf_load(o2, (ptr) + 8); \
2632+ stbir__simdf_load(o3, (ptr) + 12); \
2633+ { \
2634+ __m128 tmp0, tmp1, tmp2, tmp3; \
2635+ tmp0 = _mm_unpacklo_ps(o0, o1); \
2636+ tmp2 = _mm_unpacklo_ps(o2, o3); \
2637+ tmp1 = _mm_unpackhi_ps(o0, o1); \
2638+ tmp3 = _mm_unpackhi_ps(o2, o3); \
2639+ o0 = _mm_movelh_ps(tmp0, tmp2); \
2640+ o1 = _mm_movehl_ps(tmp2, tmp0); \
2641+ o2 = _mm_movelh_ps(tmp1, tmp3); \
2642+ o3 = _mm_movehl_ps(tmp3, tmp1); \
2643+ }
2644+
2645+#define stbir__interleave_pack_and_store_16_u8(ptr, r0, r1, r2, r3) \
2646+ r0 = _mm_packs_epi32(r0, r1); \
2647+ r2 = _mm_packs_epi32(r2, r3); \
2648+ r1 = _mm_unpacklo_epi16(r0, r2); \
2649+ r3 = _mm_unpackhi_epi16(r0, r2); \
2650+ r0 = _mm_unpacklo_epi16(r1, r3); \
2651+ r2 = _mm_unpackhi_epi16(r1, r3); \
2652+ r0 = _mm_packus_epi16(r0, r2); \
2653+ stbir__simdi_store(ptr, r0);
2654+
2655+#define stbir__simdi_32shr(out, reg, imm) out = _mm_srli_epi32(reg, imm)
2656+
2657+#if defined(_MSC_VER) && !defined(__clang__)
2658+// msvc inits with 8 bytes
2659+#define STBIR__CONST_32_TO_8(v) \
2660+ (char)(unsigned char)((v) & 255), (char)(unsigned char)(((v) >> 8) & 255), \
2661+ (char)(unsigned char)(((v) >> 16) & 255), \
2662+ (char)(unsigned char)(((v) >> 24) & 255)
2663+#define STBIR__CONST_4_32i(v) \
2664+ STBIR__CONST_32_TO_8(v), STBIR__CONST_32_TO_8(v), STBIR__CONST_32_TO_8(v), \
2665+ STBIR__CONST_32_TO_8(v)
2666+#define STBIR__CONST_4d_32i(v0, v1, v2, v3) \
2667+ STBIR__CONST_32_TO_8(v0), STBIR__CONST_32_TO_8(v1), \
2668+ STBIR__CONST_32_TO_8(v2), STBIR__CONST_32_TO_8(v3)
2669+#else
2670+// everything else inits with long long's
2671+#define STBIR__CONST_4_32i(v) \
2672+ (long long)((((stbir_uint64)(stbir_uint32)(v)) << 32) | \
2673+ ((stbir_uint64)(stbir_uint32)(v))), \
2674+ (long long)((((stbir_uint64)(stbir_uint32)(v)) << 32) | \
2675+ ((stbir_uint64)(stbir_uint32)(v)))
2676+#define STBIR__CONST_4d_32i(v0, v1, v2, v3) \
2677+ (long long)((((stbir_uint64)(stbir_uint32)(v1)) << 32) | \
2678+ ((stbir_uint64)(stbir_uint32)(v0))), \
2679+ (long long)((((stbir_uint64)(stbir_uint32)(v3)) << 32) | \
2680+ ((stbir_uint64)(stbir_uint32)(v2)))
2681+#endif
2682+
2683+#define STBIR__SIMDF_CONST(var, x) stbir__simdf var = {x, x, x, x}
2684+#define STBIR__SIMDI_CONST(var, x) stbir__simdi var = {STBIR__CONST_4_32i(x)}
2685+#define STBIR__CONSTF(var) (var)
2686+#define STBIR__CONSTI(var) (var)
2687+
2688+#if defined(STBIR_AVX) || defined(__SSE4_1__)
2689+#include <smmintrin.h>
2690+#define stbir__simdf_pack_to_8words(out, reg0, reg1) \
2691+ out = _mm_packus_epi32( \
2692+ _mm_cvttps_epi32(_mm_max_ps( \
2693+ _mm_min_ps(reg0, STBIR__CONSTF(STBIR_max_uint16_as_float)), \
2694+ _mm_setzero_ps())), \
2695+ _mm_cvttps_epi32(_mm_max_ps( \
2696+ _mm_min_ps(reg1, STBIR__CONSTF(STBIR_max_uint16_as_float)), \
2697+ _mm_setzero_ps())))
2698+#else
2699+static STBIR__SIMDI_CONST(stbir__s32_32768, 32768);
2700+static STBIR__SIMDI_CONST(stbir__s16_32768, ((32768 << 16) | 32768));
2701+
2702+#define stbir__simdf_pack_to_8words(out, reg0, reg1) \
2703+ { \
2704+ stbir__simdi tmp0, tmp1; \
2705+ tmp0 = _mm_cvttps_epi32(_mm_max_ps( \
2706+ _mm_min_ps(reg0, STBIR__CONSTF(STBIR_max_uint16_as_float)), \
2707+ _mm_setzero_ps())); \
2708+ tmp1 = _mm_cvttps_epi32(_mm_max_ps( \
2709+ _mm_min_ps(reg1, STBIR__CONSTF(STBIR_max_uint16_as_float)), \
2710+ _mm_setzero_ps())); \
2711+ tmp0 = _mm_sub_epi32(tmp0, stbir__s32_32768); \
2712+ tmp1 = _mm_sub_epi32(tmp1, stbir__s32_32768); \
2713+ out = _mm_packs_epi32(tmp0, tmp1); \
2714+ out = _mm_sub_epi16(out, stbir__s16_32768); \
2715+ }
2716+
2717+#endif
2718+
2719+#define STBIR_SIMD
2720+
2721+// if we detect AVX, set the simd8 defines
2722+#ifdef STBIR_AVX
2723+#include <immintrin.h>
2724+#define STBIR_SIMD8
2725+#define stbir__simdf8 __m256
2726+#define stbir__simdi8 __m256i
2727+#define stbir__simdf8_load(out, ptr) \
2728+ (out) = _mm256_loadu_ps((float const *)(ptr))
2729+#define stbir__simdi8_load(out, ptr) \
2730+ (out) = _mm256_loadu_si256((__m256i const *)(ptr))
2731+#define stbir__simdf8_mult(out, a, b) (out) = _mm256_mul_ps((a), (b))
2732+#define stbir__simdf8_store(ptr, out) _mm256_storeu_ps((float *)(ptr), out)
2733+#define stbir__simdi8_store(ptr, reg) _mm256_storeu_si256((__m256i *)(ptr), reg)
2734+#define stbir__simdf8_frep8(fval) _mm256_set1_ps(fval)
2735+
2736+#define stbir__simdf8_min(out, reg0, reg1) (out) = _mm256_min_ps(reg0, reg1)
2737+#define stbir__simdf8_max(out, reg0, reg1) (out) = _mm256_max_ps(reg0, reg1)
2738+
2739+#define stbir__simdf8_add4halves(out, bot4, top8) \
2740+ (out) = _mm_add_ps(bot4, _mm256_extractf128_ps(top8, 1))
2741+#define stbir__simdf8_mult_mem(out, reg, ptr) \
2742+ (out) = _mm256_mul_ps(reg, _mm256_loadu_ps((float const *)(ptr)))
2743+#define stbir__simdf8_add_mem(out, reg, ptr) \
2744+ (out) = _mm256_add_ps(reg, _mm256_loadu_ps((float const *)(ptr)))
2745+#define stbir__simdf8_add(out, a, b) (out) = _mm256_add_ps(a, b)
2746+#define stbir__simdf8_load1b(out, ptr) (out) = _mm256_broadcast_ss(ptr)
2747+#define stbir__simdf_load1rep4(out, ptr) \
2748+ (out) = _mm_broadcast_ss(ptr) // avx load instruction
2749+
2750+#define stbir__simdi8_convert_i32_to_float(out, ireg) \
2751+ (out) = _mm256_cvtepi32_ps(ireg)
2752+#define stbir__simdf8_convert_float_to_i32(i, f) (i) = _mm256_cvttps_epi32(f)
2753+
2754+#define stbir__simdf8_bot4s(out, a, b) \
2755+ (out) = _mm256_permute2f128_ps(a, b, (0 << 0) + (2 << 4))
2756+#define stbir__simdf8_top4s(out, a, b) \
2757+ (out) = _mm256_permute2f128_ps(a, b, (1 << 0) + (3 << 4))
2758+
2759+#define stbir__simdf8_gettop4(reg) _mm256_extractf128_ps(reg, 1)
2760+
2761+#ifdef STBIR_AVX2
2762+
2763+#define stbir__simdi8_expand_u8_to_u32(out0, out1, ireg) \
2764+ { \
2765+ stbir__simdi8 a, zero = _mm256_setzero_si256(); \
2766+ a = _mm256_permute4x64_epi64( \
2767+ _mm256_unpacklo_epi8( \
2768+ _mm256_permute4x64_epi64(_mm256_castsi128_si256(ireg), \
2769+ (0 << 0) + (2 << 2) + (1 << 4) + \
2770+ (3 << 6)), \
2771+ zero), \
2772+ (0 << 0) + (2 << 2) + (1 << 4) + (3 << 6)); \
2773+ out0 = _mm256_unpacklo_epi16(a, zero); \
2774+ out1 = _mm256_unpackhi_epi16(a, zero); \
2775+ }
2776+
2777+#define stbir__simdf8_pack_to_16bytes(out, aa, bb) \
2778+ { \
2779+ stbir__simdi8 t; \
2780+ stbir__simdf8 af, bf; \
2781+ stbir__simdi8 a, b; \
2782+ af = _mm256_min_ps(aa, STBIR_max_uint8_as_floatX); \
2783+ bf = _mm256_min_ps(bb, STBIR_max_uint8_as_floatX); \
2784+ af = _mm256_max_ps(af, _mm256_setzero_ps()); \
2785+ bf = _mm256_max_ps(bf, _mm256_setzero_ps()); \
2786+ a = _mm256_cvttps_epi32(af); \
2787+ b = _mm256_cvttps_epi32(bf); \
2788+ t = _mm256_permute4x64_epi64(_mm256_packs_epi32(a, b), \
2789+ (0 << 0) + (2 << 2) + (1 << 4) + \
2790+ (3 << 6)); \
2791+ out = _mm256_castsi256_si128(_mm256_permute4x64_epi64( \
2792+ _mm256_packus_epi16(t, t), \
2793+ (0 << 0) + (2 << 2) + (1 << 4) + (3 << 6))); \
2794+ }
2795+
2796+#define stbir__simdi8_expand_u16_to_u32(out, ireg) \
2797+ out = _mm256_unpacklo_epi16( \
2798+ _mm256_permute4x64_epi64(_mm256_castsi128_si256(ireg), \
2799+ (0 << 0) + (2 << 2) + (1 << 4) + (3 << 6)), \
2800+ _mm256_setzero_si256());
2801+
2802+#define stbir__simdf8_pack_to_16words(out, aa, bb) \
2803+ { \
2804+ stbir__simdf8 af, bf; \
2805+ stbir__simdi8 a, b; \
2806+ af = _mm256_min_ps(aa, STBIR_max_uint16_as_floatX); \
2807+ bf = _mm256_min_ps(bb, STBIR_max_uint16_as_floatX); \
2808+ af = _mm256_max_ps(af, _mm256_setzero_ps()); \
2809+ bf = _mm256_max_ps(bf, _mm256_setzero_ps()); \
2810+ a = _mm256_cvttps_epi32(af); \
2811+ b = _mm256_cvttps_epi32(bf); \
2812+ (out) = _mm256_permute4x64_epi64(_mm256_packus_epi32(a, b), \
2813+ (0 << 0) + (2 << 2) + (1 << 4) + \
2814+ (3 << 6)); \
2815+ }
2816+
2817+#else
2818+
2819+#define stbir__simdi8_expand_u8_to_u32(out0, out1, ireg) \
2820+ { \
2821+ stbir__simdi a, zero = _mm_setzero_si128(); \
2822+ a = _mm_unpacklo_epi8(ireg, zero); \
2823+ out0 = _mm256_setr_m128i(_mm_unpacklo_epi16(a, zero), \
2824+ _mm_unpackhi_epi16(a, zero)); \
2825+ a = _mm_unpackhi_epi8(ireg, zero); \
2826+ out1 = _mm256_setr_m128i(_mm_unpacklo_epi16(a, zero), \
2827+ _mm_unpackhi_epi16(a, zero)); \
2828+ }
2829+
2830+#define stbir__simdf8_pack_to_16bytes(out, aa, bb) \
2831+ { \
2832+ stbir__simdi t; \
2833+ stbir__simdf8 af, bf; \
2834+ stbir__simdi8 a, b; \
2835+ af = _mm256_min_ps(aa, STBIR_max_uint8_as_floatX); \
2836+ bf = _mm256_min_ps(bb, STBIR_max_uint8_as_floatX); \
2837+ af = _mm256_max_ps(af, _mm256_setzero_ps()); \
2838+ bf = _mm256_max_ps(bf, _mm256_setzero_ps()); \
2839+ a = _mm256_cvttps_epi32(af); \
2840+ b = _mm256_cvttps_epi32(bf); \
2841+ out = _mm_packs_epi32(_mm256_castsi256_si128(a), \
2842+ _mm256_extractf128_si256(a, 1)); \
2843+ out = _mm_packus_epi16(out, out); \
2844+ t = _mm_packs_epi32(_mm256_castsi256_si128(b), \
2845+ _mm256_extractf128_si256(b, 1)); \
2846+ t = _mm_packus_epi16(t, t); \
2847+ out = _mm_castps_si128( \
2848+ _mm_shuffle_ps(_mm_castsi128_ps(out), _mm_castsi128_ps(t), \
2849+ (0 << 0) + (1 << 2) + (0 << 4) + (1 << 6))); \
2850+ }
2851+
2852+#define stbir__simdi8_expand_u16_to_u32(out, ireg) \
2853+ { \
2854+ stbir__simdi a, b, zero = _mm_setzero_si128(); \
2855+ a = _mm_unpacklo_epi16(ireg, zero); \
2856+ b = _mm_unpackhi_epi16(ireg, zero); \
2857+ out = _mm256_insertf128_si256(_mm256_castsi128_si256(a), b, 1); \
2858+ }
2859+
2860+#define stbir__simdf8_pack_to_16words(out, aa, bb) \
2861+ { \
2862+ stbir__simdi t0, t1; \
2863+ stbir__simdf8 af, bf; \
2864+ stbir__simdi8 a, b; \
2865+ af = _mm256_min_ps(aa, STBIR_max_uint16_as_floatX); \
2866+ bf = _mm256_min_ps(bb, STBIR_max_uint16_as_floatX); \
2867+ af = _mm256_max_ps(af, _mm256_setzero_ps()); \
2868+ bf = _mm256_max_ps(bf, _mm256_setzero_ps()); \
2869+ a = _mm256_cvttps_epi32(af); \
2870+ b = _mm256_cvttps_epi32(bf); \
2871+ t0 = _mm_packus_epi32(_mm256_castsi256_si128(a), \
2872+ _mm256_extractf128_si256(a, 1)); \
2873+ t1 = _mm_packus_epi32(_mm256_castsi256_si128(b), \
2874+ _mm256_extractf128_si256(b, 1)); \
2875+ out = _mm256_setr_m128i(t0, t1); \
2876+ }
2877+
2878+#endif
2879+
2880+static __m256i stbir_00001111 = {STBIR__CONST_4d_32i(0, 0, 0, 0),
2881+ STBIR__CONST_4d_32i(1, 1, 1, 1)};
2882+#define stbir__simdf8_0123to00001111(out, in) \
2883+ (out) = _mm256_permutevar_ps(in, stbir_00001111)
2884+
2885+static __m256i stbir_22223333 = {STBIR__CONST_4d_32i(2, 2, 2, 2),
2886+ STBIR__CONST_4d_32i(3, 3, 3, 3)};
2887+#define stbir__simdf8_0123to22223333(out, in) \
2888+ (out) = _mm256_permutevar_ps(in, stbir_22223333)
2889+
2890+#define stbir__simdf8_0123to2222(out, in) \
2891+ (out) = stbir__simdf_swiz(_mm256_castps256_ps128(in), 2, 2, 2, 2)
2892+
2893+#define stbir__simdf8_load4b(out, ptr) \
2894+ (out) = _mm256_broadcast_ps((__m128 const *)(ptr))
2895+
2896+static __m256i stbir_00112233 = {STBIR__CONST_4d_32i(0, 0, 1, 1),
2897+ STBIR__CONST_4d_32i(2, 2, 3, 3)};
2898+#define stbir__simdf8_0123to00112233(out, in) \
2899+ (out) = _mm256_permutevar_ps(in, stbir_00112233)
2900+#define stbir__simdf8_add4(out, a8, b) \
2901+ (out) = _mm256_add_ps(a8, _mm256_castps128_ps256(b))
2902+
2903+static __m256i stbir_load6 = {
2904+ STBIR__CONST_4_32i(0x80000000),
2905+ STBIR__CONST_4d_32i(0x80000000, 0x80000000, 0, 0)};
2906+#define stbir__simdf8_load6z(out, ptr) \
2907+ (out) = _mm256_maskload_ps(ptr, stbir_load6)
2908+
2909+#define stbir__simdf8_0123to00000000(out, in) \
2910+ (out) = _mm256_shuffle_ps(in, in, (0 << 0) + (0 << 2) + (0 << 4) + (0 << 6))
2911+#define stbir__simdf8_0123to11111111(out, in) \
2912+ (out) = _mm256_shuffle_ps(in, in, (1 << 0) + (1 << 2) + (1 << 4) + (1 << 6))
2913+#define stbir__simdf8_0123to22222222(out, in) \
2914+ (out) = _mm256_shuffle_ps(in, in, (2 << 0) + (2 << 2) + (2 << 4) + (2 << 6))
2915+#define stbir__simdf8_0123to33333333(out, in) \
2916+ (out) = _mm256_shuffle_ps(in, in, (3 << 0) + (3 << 2) + (3 << 4) + (3 << 6))
2917+#define stbir__simdf8_0123to21032103(out, in) \
2918+ (out) = _mm256_shuffle_ps(in, in, (2 << 0) + (1 << 2) + (0 << 4) + (3 << 6))
2919+#define stbir__simdf8_0123to32103210(out, in) \
2920+ (out) = _mm256_shuffle_ps(in, in, (3 << 0) + (2 << 2) + (1 << 4) + (0 << 6))
2921+#define stbir__simdf8_0123to12301230(out, in) \
2922+ (out) = _mm256_shuffle_ps(in, in, (1 << 0) + (2 << 2) + (3 << 4) + (0 << 6))
2923+#define stbir__simdf8_0123to10321032(out, in) \
2924+ (out) = _mm256_shuffle_ps(in, in, (1 << 0) + (0 << 2) + (3 << 4) + (2 << 6))
2925+#define stbir__simdf8_0123to30123012(out, in) \
2926+ (out) = _mm256_shuffle_ps(in, in, (3 << 0) + (0 << 2) + (1 << 4) + (2 << 6))
2927+
2928+#define stbir__simdf8_0123to11331133(out, in) \
2929+ (out) = _mm256_shuffle_ps(in, in, (1 << 0) + (1 << 2) + (3 << 4) + (3 << 6))
2930+#define stbir__simdf8_0123to00220022(out, in) \
2931+ (out) = _mm256_shuffle_ps(in, in, (0 << 0) + (0 << 2) + (2 << 4) + (2 << 6))
2932+
2933+#define stbir__simdf8_aaa1(out, alp, ones) \
2934+ (out) = _mm256_blend_ps(alp, ones, \
2935+ (1 << 0) + (1 << 1) + (1 << 2) + (0 << 3) + \
2936+ (1 << 4) + (1 << 5) + (1 << 6) + (0 << 7)); \
2937+ (out) = \
2938+ _mm256_shuffle_ps(out, out, (3 << 0) + (3 << 2) + (3 << 4) + (0 << 6))
2939+#define stbir__simdf8_1aaa(out, alp, ones) \
2940+ (out) = _mm256_blend_ps(alp, ones, \
2941+ (0 << 0) + (1 << 1) + (1 << 2) + (1 << 3) + \
2942+ (0 << 4) + (1 << 5) + (1 << 6) + (1 << 7)); \
2943+ (out) = \
2944+ _mm256_shuffle_ps(out, out, (1 << 0) + (0 << 2) + (0 << 4) + (0 << 6))
2945+#define stbir__simdf8_a1a1(out, alp, ones) \
2946+ (out) = _mm256_blend_ps(alp, ones, \
2947+ (1 << 0) + (0 << 1) + (1 << 2) + (0 << 3) + \
2948+ (1 << 4) + (0 << 5) + (1 << 6) + (0 << 7)); \
2949+ (out) = \
2950+ _mm256_shuffle_ps(out, out, (1 << 0) + (0 << 2) + (3 << 4) + (2 << 6))
2951+#define stbir__simdf8_1a1a(out, alp, ones) \
2952+ (out) = _mm256_blend_ps(alp, ones, \
2953+ (0 << 0) + (1 << 1) + (0 << 2) + (1 << 3) + \
2954+ (0 << 4) + (1 << 5) + (0 << 6) + (1 << 7)); \
2955+ (out) = \
2956+ _mm256_shuffle_ps(out, out, (1 << 0) + (0 << 2) + (3 << 4) + (2 << 6))
2957+
2958+#define stbir__simdf8_zero(reg) (reg) = _mm256_setzero_ps()
2959+
2960+#ifdef STBIR_USE_FMA // not on by default to maintain bit identical simd to
2961+ // non-simd
2962+#define stbir__simdf8_madd(out, add, mul1, mul2) \
2963+ (out) = _mm256_fmadd_ps(mul1, mul2, add)
2964+#define stbir__simdf8_madd_mem(out, add, mul, ptr) \
2965+ (out) = _mm256_fmadd_ps(mul, _mm256_loadu_ps((float const *)(ptr)), add)
2966+#define stbir__simdf8_madd_mem4(out, add, mul, ptr) \
2967+ (out) = \
2968+ _mm256_fmadd_ps(_mm256_setr_m128(mul, _mm_setzero_ps()), \
2969+ _mm256_setr_m128(_mm_loadu_ps((float const *)(ptr)), \
2970+ _mm_setzero_ps()), \
2971+ add)
2972+#else
2973+#define stbir__simdf8_madd(out, add, mul1, mul2) \
2974+ (out) = _mm256_add_ps(add, _mm256_mul_ps(mul1, mul2))
2975+#define stbir__simdf8_madd_mem(out, add, mul, ptr) \
2976+ (out) = _mm256_add_ps( \
2977+ add, _mm256_mul_ps(mul, _mm256_loadu_ps((float const *)(ptr))))
2978+#define stbir__simdf8_madd_mem4(out, add, mul, ptr) \
2979+ (out) = _mm256_add_ps( \
2980+ add, \
2981+ _mm256_setr_m128(_mm_mul_ps(mul, _mm_loadu_ps((float const *)(ptr))), \
2982+ _mm_setzero_ps()))
2983+#endif
2984+#define stbir__if_simdf8_cast_to_simdf4(val) _mm256_castps256_ps128(val)
2985+
2986+#endif
2987+
2988+#ifdef STBIR_FLOORF
2989+#undef STBIR_FLOORF
2990+#endif
2991+#define STBIR_FLOORF stbir_simd_floorf
2992+static stbir__inline float
2993+stbir_simd_floorf(float x) // martins floorf
2994+{
2995+#if defined(STBIR_AVX) || defined(__SSE4_1__) || defined(STBIR_SSE41)
2996+ __m128 t = _mm_set_ss(x);
2997+ return _mm_cvtss_f32(_mm_floor_ss(t, t));
2998+#else
2999+ __m128 f = _mm_set_ss(x);
3000+ __m128 t = _mm_cvtepi32_ps(_mm_cvttps_epi32(f));
3001+ __m128 r = _mm_add_ss(t, _mm_and_ps(_mm_cmplt_ss(f, t), _mm_set_ss(-1.0f)));
3002+ return _mm_cvtss_f32(r);
3003+#endif
3004+}
3005+
3006+#ifdef STBIR_CEILF
3007+#undef STBIR_CEILF
3008+#endif
3009+#define STBIR_CEILF stbir_simd_ceilf
3010+static stbir__inline float
3011+stbir_simd_ceilf(float x) // martins ceilf
3012+{
3013+#if defined(STBIR_AVX) || defined(__SSE4_1__) || defined(STBIR_SSE41)
3014+ __m128 t = _mm_set_ss(x);
3015+ return _mm_cvtss_f32(_mm_ceil_ss(t, t));
3016+#else
3017+ __m128 f = _mm_set_ss(x);
3018+ __m128 t = _mm_cvtepi32_ps(_mm_cvttps_epi32(f));
3019+ __m128 r = _mm_add_ss(t, _mm_and_ps(_mm_cmplt_ss(t, f), _mm_set_ss(1.0f)));
3020+ return _mm_cvtss_f32(r);
3021+#endif
3022+}
3023
3024 #elif defined(STBIR_NEON)
3025
3026- #include <arm_neon.h>
3027-
3028- #define stbir__simdf float32x4_t
3029- #define stbir__simdi uint32x4_t
3030-
3031- #define stbir_simdi_castf( reg ) vreinterpretq_u32_f32(reg)
3032- #define stbir_simdf_casti( reg ) vreinterpretq_f32_u32(reg)
3033-
3034- #define stbir__simdf_load( reg, ptr ) (reg) = vld1q_f32( (float const*)(ptr) )
3035- #define stbir__simdi_load( reg, ptr ) (reg) = vld1q_u32( (uint32_t const*)(ptr) )
3036- #define stbir__simdf_load1( out, ptr ) (out) = vld1q_dup_f32( (float const*)(ptr) ) // top values can be random (not denormal or nan for perf)
3037- #define stbir__simdi_load1( out, ptr ) (out) = vld1q_dup_u32( (uint32_t const*)(ptr) )
3038- #define stbir__simdf_load1z( out, ptr ) (out) = vld1q_lane_f32( (float const*)(ptr), vdupq_n_f32(0), 0 ) // top values must be zero
3039- #define stbir__simdf_frep4( fvar ) vdupq_n_f32( fvar )
3040- #define stbir__simdf_load1frep4( out, fvar ) (out) = vdupq_n_f32( fvar )
3041- #define stbir__simdf_load2( out, ptr ) (out) = vcombine_f32( vld1_f32( (float const*)(ptr) ), vcreate_f32(0) ) // top values can be random (not denormal or nan for perf)
3042- #define stbir__simdf_load2z( out, ptr ) (out) = vcombine_f32( vld1_f32( (float const*)(ptr) ), vcreate_f32(0) ) // top values must be zero
3043- #define stbir__simdf_load2hmerge( out, reg, ptr ) (out) = vcombine_f32( vget_low_f32(reg), vld1_f32( (float const*)(ptr) ) )
3044-
3045- #define stbir__simdf_zeroP() vdupq_n_f32(0)
3046- #define stbir__simdf_zero( reg ) (reg) = vdupq_n_f32(0)
3047-
3048- #define stbir__simdf_store( ptr, reg ) vst1q_f32( (float*)(ptr), reg )
3049- #define stbir__simdf_store1( ptr, reg ) vst1q_lane_f32( (float*)(ptr), reg, 0)
3050- #define stbir__simdf_store2( ptr, reg ) vst1_f32( (float*)(ptr), vget_low_f32(reg) )
3051- #define stbir__simdf_store2h( ptr, reg ) vst1_f32( (float*)(ptr), vget_high_f32(reg) )
3052-
3053- #define stbir__simdi_store( ptr, reg ) vst1q_u32( (uint32_t*)(ptr), reg )
3054- #define stbir__simdi_store1( ptr, reg ) vst1q_lane_u32( (uint32_t*)(ptr), reg, 0 )
3055- #define stbir__simdi_store2( ptr, reg ) vst1_u32( (uint32_t*)(ptr), vget_low_u32(reg) )
3056-
3057- #define stbir__prefetch( ptr )
3058-
3059- #define stbir__simdi_expand_u8_to_u32(out0,out1,out2,out3,ireg) \
3060- { \
3061- uint16x8_t l = vmovl_u8( vget_low_u8 ( vreinterpretq_u8_u32(ireg) ) ); \
3062- uint16x8_t h = vmovl_u8( vget_high_u8( vreinterpretq_u8_u32(ireg) ) ); \
3063- out0 = vmovl_u16( vget_low_u16 ( l ) ); \
3064- out1 = vmovl_u16( vget_high_u16( l ) ); \
3065- out2 = vmovl_u16( vget_low_u16 ( h ) ); \
3066- out3 = vmovl_u16( vget_high_u16( h ) ); \
3067- }
3068-
3069- #define stbir__simdi_expand_u8_to_1u32(out,ireg) \
3070- { \
3071- uint16x8_t tmp = vmovl_u8( vget_low_u8( vreinterpretq_u8_u32(ireg) ) ); \
3072- out = vmovl_u16( vget_low_u16( tmp ) ); \
3073- }
3074-
3075- #define stbir__simdi_expand_u16_to_u32(out0,out1,ireg) \
3076- { \
3077- uint16x8_t tmp = vreinterpretq_u16_u32(ireg); \
3078- out0 = vmovl_u16( vget_low_u16 ( tmp ) ); \
3079- out1 = vmovl_u16( vget_high_u16( tmp ) ); \
3080- }
3081-
3082- #define stbir__simdf_convert_float_to_i32( i, f ) (i) = vreinterpretq_u32_s32( vcvtq_s32_f32(f) )
3083- #define stbir__simdf_convert_float_to_int( f ) vgetq_lane_s32(vcvtq_s32_f32(f), 0)
3084- #define stbir__simdi_to_int( i ) (int)vgetq_lane_u32(i, 0)
3085- #define stbir__simdf_convert_float_to_uint8( f ) ((unsigned char)vgetq_lane_s32(vcvtq_s32_f32(vmaxq_f32(vminq_f32(f,STBIR__CONSTF(STBIR_max_uint8_as_float)),vdupq_n_f32(0))), 0))
3086- #define stbir__simdf_convert_float_to_short( f ) ((unsigned short)vgetq_lane_s32(vcvtq_s32_f32(vmaxq_f32(vminq_f32(f,STBIR__CONSTF(STBIR_max_uint16_as_float)),vdupq_n_f32(0))), 0))
3087- #define stbir__simdi_convert_i32_to_float(out, ireg) (out) = vcvtq_f32_s32( vreinterpretq_s32_u32(ireg) )
3088- #define stbir__simdf_add( out, reg0, reg1 ) (out) = vaddq_f32( reg0, reg1 )
3089- #define stbir__simdf_mult( out, reg0, reg1 ) (out) = vmulq_f32( reg0, reg1 )
3090- #define stbir__simdf_mult_mem( out, reg, ptr ) (out) = vmulq_f32( reg, vld1q_f32( (float const*)(ptr) ) )
3091- #define stbir__simdf_mult1_mem( out, reg, ptr ) (out) = vmulq_f32( reg, vld1q_dup_f32( (float const*)(ptr) ) )
3092- #define stbir__simdf_add_mem( out, reg, ptr ) (out) = vaddq_f32( reg, vld1q_f32( (float const*)(ptr) ) )
3093- #define stbir__simdf_add1_mem( out, reg, ptr ) (out) = vaddq_f32( reg, vld1q_dup_f32( (float const*)(ptr) ) )
3094-
3095- #ifdef STBIR_USE_FMA // not on by default to maintain bit identical simd to non-simd (and also x64 no madd to arm madd)
3096- #define stbir__simdf_madd( out, add, mul1, mul2 ) (out) = vfmaq_f32( add, mul1, mul2 )
3097- #define stbir__simdf_madd1( out, add, mul1, mul2 ) (out) = vfmaq_f32( add, mul1, mul2 )
3098- #define stbir__simdf_madd_mem( out, add, mul, ptr ) (out) = vfmaq_f32( add, mul, vld1q_f32( (float const*)(ptr) ) )
3099- #define stbir__simdf_madd1_mem( out, add, mul, ptr ) (out) = vfmaq_f32( add, mul, vld1q_dup_f32( (float const*)(ptr) ) )
3100- #else
3101- #define stbir__simdf_madd( out, add, mul1, mul2 ) (out) = vaddq_f32( add, vmulq_f32( mul1, mul2 ) )
3102- #define stbir__simdf_madd1( out, add, mul1, mul2 ) (out) = vaddq_f32( add, vmulq_f32( mul1, mul2 ) )
3103- #define stbir__simdf_madd_mem( out, add, mul, ptr ) (out) = vaddq_f32( add, vmulq_f32( mul, vld1q_f32( (float const*)(ptr) ) ) )
3104- #define stbir__simdf_madd1_mem( out, add, mul, ptr ) (out) = vaddq_f32( add, vmulq_f32( mul, vld1q_dup_f32( (float const*)(ptr) ) ) )
3105- #endif
3106-
3107- #define stbir__simdf_add1( out, reg0, reg1 ) (out) = vaddq_f32( reg0, reg1 )
3108- #define stbir__simdf_mult1( out, reg0, reg1 ) (out) = vmulq_f32( reg0, reg1 )
3109-
3110- #define stbir__simdf_and( out, reg0, reg1 ) (out) = vreinterpretq_f32_u32( vandq_u32( vreinterpretq_u32_f32(reg0), vreinterpretq_u32_f32(reg1) ) )
3111- #define stbir__simdf_or( out, reg0, reg1 ) (out) = vreinterpretq_f32_u32( vorrq_u32( vreinterpretq_u32_f32(reg0), vreinterpretq_u32_f32(reg1) ) )
3112-
3113- #define stbir__simdf_min( out, reg0, reg1 ) (out) = vminq_f32( reg0, reg1 )
3114- #define stbir__simdf_max( out, reg0, reg1 ) (out) = vmaxq_f32( reg0, reg1 )
3115- #define stbir__simdf_min1( out, reg0, reg1 ) (out) = vminq_f32( reg0, reg1 )
3116- #define stbir__simdf_max1( out, reg0, reg1 ) (out) = vmaxq_f32( reg0, reg1 )
3117-
3118- #define stbir__simdf_0123ABCDto3ABx( out, reg0, reg1 ) (out) = vextq_f32( reg0, reg1, 3 )
3119- #define stbir__simdf_0123ABCDto23Ax( out, reg0, reg1 ) (out) = vextq_f32( reg0, reg1, 2 )
3120-
3121- #define stbir__simdf_a1a1( out, alp, ones ) (out) = vzipq_f32(vuzpq_f32(alp, alp).val[1], ones).val[0]
3122- #define stbir__simdf_1a1a( out, alp, ones ) (out) = vzipq_f32(ones, vuzpq_f32(alp, alp).val[0]).val[0]
3123-
3124- #if defined( _M_ARM64 ) || defined( __aarch64__ ) || defined( __arm64__ )
3125-
3126- #define stbir__simdf_aaa1( out, alp, ones ) (out) = vcopyq_laneq_f32(vdupq_n_f32(vgetq_lane_f32(alp, 3)), 3, ones, 3)
3127- #define stbir__simdf_1aaa( out, alp, ones ) (out) = vcopyq_laneq_f32(vdupq_n_f32(vgetq_lane_f32(alp, 0)), 0, ones, 0)
3128-
3129- #if defined( _MSC_VER ) && !defined(__clang__)
3130- #define stbir_make16(a,b,c,d) vcombine_u8( \
3131- vcreate_u8( (4*a+0) | ((4*a+1)<<8) | ((4*a+2)<<16) | ((4*a+3)<<24) | \
3132- ((stbir_uint64)(4*b+0)<<32) | ((stbir_uint64)(4*b+1)<<40) | ((stbir_uint64)(4*b+2)<<48) | ((stbir_uint64)(4*b+3)<<56)), \
3133- vcreate_u8( (4*c+0) | ((4*c+1)<<8) | ((4*c+2)<<16) | ((4*c+3)<<24) | \
3134- ((stbir_uint64)(4*d+0)<<32) | ((stbir_uint64)(4*d+1)<<40) | ((stbir_uint64)(4*d+2)<<48) | ((stbir_uint64)(4*d+3)<<56) ) )
3135-
3136- static stbir__inline uint8x16x2_t stbir_make16x2(float32x4_t rega,float32x4_t regb)
3137- {
3138- uint8x16x2_t r = { vreinterpretq_u8_f32(rega), vreinterpretq_u8_f32(regb) };
3139- return r;
3140- }
3141- #else
3142- #define stbir_make16(a,b,c,d) (uint8x16_t){4*a+0,4*a+1,4*a+2,4*a+3,4*b+0,4*b+1,4*b+2,4*b+3,4*c+0,4*c+1,4*c+2,4*c+3,4*d+0,4*d+1,4*d+2,4*d+3}
3143- #define stbir_make16x2(a,b) (uint8x16x2_t){{vreinterpretq_u8_f32(a),vreinterpretq_u8_f32(b)}}
3144- #endif
3145-
3146- #define stbir__simdf_swiz( reg, one, two, three, four ) vreinterpretq_f32_u8( vqtbl1q_u8( vreinterpretq_u8_f32(reg), stbir_make16(one, two, three, four) ) )
3147- #define stbir__simdf_swiz2( rega, regb, one, two, three, four ) vreinterpretq_f32_u8( vqtbl2q_u8( stbir_make16x2(rega,regb), stbir_make16(one, two, three, four) ) )
3148-
3149- #define stbir__simdi_16madd( out, reg0, reg1 ) \
3150- { \
3151- int16x8_t r0 = vreinterpretq_s16_u32(reg0); \
3152- int16x8_t r1 = vreinterpretq_s16_u32(reg1); \
3153- int32x4_t tmp0 = vmull_s16( vget_low_s16(r0), vget_low_s16(r1) ); \
3154- int32x4_t tmp1 = vmull_s16( vget_high_s16(r0), vget_high_s16(r1) ); \
3155- (out) = vreinterpretq_u32_s32( vpaddq_s32(tmp0, tmp1) ); \
3156- }
3157-
3158- #else
3159-
3160- #define stbir__simdf_aaa1( out, alp, ones ) (out) = vsetq_lane_f32(1.0f, vdupq_n_f32(vgetq_lane_f32(alp, 3)), 3)
3161- #define stbir__simdf_1aaa( out, alp, ones ) (out) = vsetq_lane_f32(1.0f, vdupq_n_f32(vgetq_lane_f32(alp, 0)), 0)
3162-
3163- #if defined( _MSC_VER ) && !defined(__clang__)
3164- static stbir__inline uint8x8x2_t stbir_make8x2(float32x4_t reg)
3165- {
3166- uint8x8x2_t r = { { vget_low_u8(vreinterpretq_u8_f32(reg)), vget_high_u8(vreinterpretq_u8_f32(reg)) } };
3167- return r;
3168- }
3169- #define stbir_make8(a,b) vcreate_u8( \
3170- (4*a+0) | ((4*a+1)<<8) | ((4*a+2)<<16) | ((4*a+3)<<24) | \
3171- ((stbir_uint64)(4*b+0)<<32) | ((stbir_uint64)(4*b+1)<<40) | ((stbir_uint64)(4*b+2)<<48) | ((stbir_uint64)(4*b+3)<<56) )
3172- #else
3173- #define stbir_make8x2(reg) (uint8x8x2_t){ { vget_low_u8(vreinterpretq_u8_f32(reg)), vget_high_u8(vreinterpretq_u8_f32(reg)) } }
3174- #define stbir_make8(a,b) (uint8x8_t){4*a+0,4*a+1,4*a+2,4*a+3,4*b+0,4*b+1,4*b+2,4*b+3}
3175- #endif
3176-
3177- #define stbir__simdf_swiz( reg, one, two, three, four ) vreinterpretq_f32_u8( vcombine_u8( \
3178- vtbl2_u8( stbir_make8x2( reg ), stbir_make8( one, two ) ), \
3179- vtbl2_u8( stbir_make8x2( reg ), stbir_make8( three, four ) ) ) )
3180-
3181- #define stbir__simdi_16madd( out, reg0, reg1 ) \
3182- { \
3183- int16x8_t r0 = vreinterpretq_s16_u32(reg0); \
3184- int16x8_t r1 = vreinterpretq_s16_u32(reg1); \
3185- int32x4_t tmp0 = vmull_s16( vget_low_s16(r0), vget_low_s16(r1) ); \
3186- int32x4_t tmp1 = vmull_s16( vget_high_s16(r0), vget_high_s16(r1) ); \
3187- int32x2_t out0 = vpadd_s32( vget_low_s32(tmp0), vget_high_s32(tmp0) ); \
3188- int32x2_t out1 = vpadd_s32( vget_low_s32(tmp1), vget_high_s32(tmp1) ); \
3189- (out) = vreinterpretq_u32_s32( vcombine_s32(out0, out1) ); \
3190- }
3191-
3192- #endif
3193-
3194- #define stbir__simdi_and( out, reg0, reg1 ) (out) = vandq_u32( reg0, reg1 )
3195- #define stbir__simdi_or( out, reg0, reg1 ) (out) = vorrq_u32( reg0, reg1 )
3196-
3197- #define stbir__simdf_pack_to_8bytes(out,aa,bb) \
3198- { \
3199- float32x4_t af = vmaxq_f32( vminq_f32(aa,STBIR__CONSTF(STBIR_max_uint8_as_float) ), vdupq_n_f32(0) ); \
3200- float32x4_t bf = vmaxq_f32( vminq_f32(bb,STBIR__CONSTF(STBIR_max_uint8_as_float) ), vdupq_n_f32(0) ); \
3201- int16x4_t ai = vqmovn_s32( vcvtq_s32_f32( af ) ); \
3202- int16x4_t bi = vqmovn_s32( vcvtq_s32_f32( bf ) ); \
3203- uint8x8_t out8 = vqmovun_s16( vcombine_s16(ai, bi) ); \
3204- out = vreinterpretq_u32_u8( vcombine_u8(out8, out8) ); \
3205- }
3206-
3207- #define stbir__simdf_pack_to_8words(out,aa,bb) \
3208- { \
3209- float32x4_t af = vmaxq_f32( vminq_f32(aa,STBIR__CONSTF(STBIR_max_uint16_as_float) ), vdupq_n_f32(0) ); \
3210- float32x4_t bf = vmaxq_f32( vminq_f32(bb,STBIR__CONSTF(STBIR_max_uint16_as_float) ), vdupq_n_f32(0) ); \
3211- int32x4_t ai = vcvtq_s32_f32( af ); \
3212- int32x4_t bi = vcvtq_s32_f32( bf ); \
3213- out = vreinterpretq_u32_u16( vcombine_u16(vqmovun_s32(ai), vqmovun_s32(bi)) ); \
3214- }
3215-
3216- #define stbir__interleave_pack_and_store_16_u8( ptr, r0, r1, r2, r3 ) \
3217- { \
3218- int16x4x2_t tmp0 = vzip_s16( vqmovn_s32(vreinterpretq_s32_u32(r0)), vqmovn_s32(vreinterpretq_s32_u32(r2)) ); \
3219- int16x4x2_t tmp1 = vzip_s16( vqmovn_s32(vreinterpretq_s32_u32(r1)), vqmovn_s32(vreinterpretq_s32_u32(r3)) ); \
3220- uint8x8x2_t out = \
3221- { { \
3222- vqmovun_s16( vcombine_s16(tmp0.val[0], tmp0.val[1]) ), \
3223- vqmovun_s16( vcombine_s16(tmp1.val[0], tmp1.val[1]) ), \
3224- } }; \
3225- vst2_u8(ptr, out); \
3226- }
3227-
3228- #define stbir__simdf_load4_transposed( o0, o1, o2, o3, ptr ) \
3229- { \
3230- float32x4x4_t tmp = vld4q_f32(ptr); \
3231- o0 = tmp.val[0]; \
3232- o1 = tmp.val[1]; \
3233- o2 = tmp.val[2]; \
3234- o3 = tmp.val[3]; \
3235- }
3236-
3237- #define stbir__simdi_32shr( out, reg, imm ) out = vshrq_n_u32( reg, imm )
3238-
3239- #if defined( _MSC_VER ) && !defined(__clang__)
3240- #define STBIR__SIMDF_CONST(var, x) __declspec(align(8)) float var[] = { x, x, x, x }
3241- #define STBIR__SIMDI_CONST(var, x) __declspec(align(8)) uint32_t var[] = { x, x, x, x }
3242- #define STBIR__CONSTF(var) (*(const float32x4_t*)var)
3243- #define STBIR__CONSTI(var) (*(const uint32x4_t*)var)
3244- #else
3245- #define STBIR__SIMDF_CONST(var, x) stbir__simdf var = { x, x, x, x }
3246- #define STBIR__SIMDI_CONST(var, x) stbir__simdi var = { x, x, x, x }
3247- #define STBIR__CONSTF(var) (var)
3248- #define STBIR__CONSTI(var) (var)
3249- #endif
3250-
3251- #ifdef STBIR_FLOORF
3252- #undef STBIR_FLOORF
3253- #endif
3254- #define STBIR_FLOORF stbir_simd_floorf
3255- static stbir__inline float stbir_simd_floorf(float x)
3256- {
3257- #if defined( _M_ARM64 ) || defined( __aarch64__ ) || defined( __arm64__ )
3258- return vget_lane_f32( vrndm_f32( vdup_n_f32(x) ), 0);
3259- #else
3260- float32x2_t f = vdup_n_f32(x);
3261- float32x2_t t = vcvt_f32_s32(vcvt_s32_f32(f));
3262- uint32x2_t a = vclt_f32(f, t);
3263- uint32x2_t b = vreinterpret_u32_f32(vdup_n_f32(-1.0f));
3264- float32x2_t r = vadd_f32(t, vreinterpret_f32_u32(vand_u32(a, b)));
3265- return vget_lane_f32(r, 0);
3266- #endif
3267- }
3268-
3269- #ifdef STBIR_CEILF
3270- #undef STBIR_CEILF
3271- #endif
3272- #define STBIR_CEILF stbir_simd_ceilf
3273- static stbir__inline float stbir_simd_ceilf(float x)
3274- {
3275- #if defined( _M_ARM64 ) || defined( __aarch64__ ) || defined( __arm64__ )
3276- return vget_lane_f32( vrndp_f32( vdup_n_f32(x) ), 0);
3277- #else
3278- float32x2_t f = vdup_n_f32(x);
3279- float32x2_t t = vcvt_f32_s32(vcvt_s32_f32(f));
3280- uint32x2_t a = vclt_f32(t, f);
3281- uint32x2_t b = vreinterpret_u32_f32(vdup_n_f32(1.0f));
3282- float32x2_t r = vadd_f32(t, vreinterpret_f32_u32(vand_u32(a, b)));
3283- return vget_lane_f32(r, 0);
3284- #endif
3285- }
3286-
3287- #define STBIR_SIMD
3288+#include <arm_neon.h>
3289+
3290+#define stbir__simdf float32x4_t
3291+#define stbir__simdi uint32x4_t
3292+
3293+#define stbir_simdi_castf(reg) vreinterpretq_u32_f32(reg)
3294+#define stbir_simdf_casti(reg) vreinterpretq_f32_u32(reg)
3295+
3296+#define stbir__simdf_load(reg, ptr) (reg) = vld1q_f32((float const *)(ptr))
3297+#define stbir__simdi_load(reg, ptr) (reg) = vld1q_u32((uint32_t const *)(ptr))
3298+#define stbir__simdf_load1(out, ptr) \
3299+ (out) = vld1q_dup_f32((float const *)(ptr)) // top values can be random (not
3300+ // denormal or nan for perf)
3301+#define stbir__simdi_load1(out, ptr) \
3302+ (out) = vld1q_dup_u32((uint32_t const *)(ptr))
3303+#define stbir__simdf_load1z(out, ptr) \
3304+ (out) = vld1q_lane_f32((float const *)(ptr), vdupq_n_f32(0), \
3305+ 0) // top values must be zero
3306+#define stbir__simdf_frep4(fvar) vdupq_n_f32(fvar)
3307+#define stbir__simdf_load1frep4(out, fvar) (out) = vdupq_n_f32(fvar)
3308+#define stbir__simdf_load2(out, ptr) \
3309+ (out) = vcombine_f32( \
3310+ vld1_f32((float const *)(ptr)), \
3311+ vcreate_f32( \
3312+ 0)) // top values can be random (not denormal or nan for perf)
3313+#define stbir__simdf_load2z(out, ptr) \
3314+ (out) = vcombine_f32(vld1_f32((float const *)(ptr)), \
3315+ vcreate_f32(0)) // top values must be zero
3316+#define stbir__simdf_load2hmerge(out, reg, ptr) \
3317+ (out) = vcombine_f32(vget_low_f32(reg), vld1_f32((float const *)(ptr)))
3318+
3319+#define stbir__simdf_zeroP() vdupq_n_f32(0)
3320+#define stbir__simdf_zero(reg) (reg) = vdupq_n_f32(0)
3321+
3322+#define stbir__simdf_store(ptr, reg) vst1q_f32((float *)(ptr), reg)
3323+#define stbir__simdf_store1(ptr, reg) vst1q_lane_f32((float *)(ptr), reg, 0)
3324+#define stbir__simdf_store2(ptr, reg) \
3325+ vst1_f32((float *)(ptr), vget_low_f32(reg))
3326+#define stbir__simdf_store2h(ptr, reg) \
3327+ vst1_f32((float *)(ptr), vget_high_f32(reg))
3328+
3329+#define stbir__simdi_store(ptr, reg) vst1q_u32((uint32_t *)(ptr), reg)
3330+#define stbir__simdi_store1(ptr, reg) vst1q_lane_u32((uint32_t *)(ptr), reg, 0)
3331+#define stbir__simdi_store2(ptr, reg) \
3332+ vst1_u32((uint32_t *)(ptr), vget_low_u32(reg))
3333+
3334+#define stbir__prefetch(ptr)
3335+
3336+#define stbir__simdi_expand_u8_to_u32(out0, out1, out2, out3, ireg) \
3337+ { \
3338+ uint16x8_t l = vmovl_u8(vget_low_u8(vreinterpretq_u8_u32(ireg))); \
3339+ uint16x8_t h = vmovl_u8(vget_high_u8(vreinterpretq_u8_u32(ireg))); \
3340+ out0 = vmovl_u16(vget_low_u16(l)); \
3341+ out1 = vmovl_u16(vget_high_u16(l)); \
3342+ out2 = vmovl_u16(vget_low_u16(h)); \
3343+ out3 = vmovl_u16(vget_high_u16(h)); \
3344+ }
3345+
3346+#define stbir__simdi_expand_u8_to_1u32(out, ireg) \
3347+ { \
3348+ uint16x8_t tmp = vmovl_u8(vget_low_u8(vreinterpretq_u8_u32(ireg))); \
3349+ out = vmovl_u16(vget_low_u16(tmp)); \
3350+ }
3351+
3352+#define stbir__simdi_expand_u16_to_u32(out0, out1, ireg) \
3353+ { \
3354+ uint16x8_t tmp = vreinterpretq_u16_u32(ireg); \
3355+ out0 = vmovl_u16(vget_low_u16(tmp)); \
3356+ out1 = vmovl_u16(vget_high_u16(tmp)); \
3357+ }
3358+
3359+#define stbir__simdf_convert_float_to_i32(i, f) \
3360+ (i) = vreinterpretq_u32_s32(vcvtq_s32_f32(f))
3361+#define stbir__simdf_convert_float_to_int(f) vgetq_lane_s32(vcvtq_s32_f32(f), 0)
3362+#define stbir__simdi_to_int(i) (int)vgetq_lane_u32(i, 0)
3363+#define stbir__simdf_convert_float_to_uint8(f) \
3364+ ((unsigned char)vgetq_lane_s32( \
3365+ vcvtq_s32_f32( \
3366+ vmaxq_f32(vminq_f32(f, STBIR__CONSTF(STBIR_max_uint8_as_float)), \
3367+ vdupq_n_f32(0))), \
3368+ 0))
3369+#define stbir__simdf_convert_float_to_short(f) \
3370+ ((unsigned short)vgetq_lane_s32( \
3371+ vcvtq_s32_f32( \
3372+ vmaxq_f32(vminq_f32(f, STBIR__CONSTF(STBIR_max_uint16_as_float)), \
3373+ vdupq_n_f32(0))), \
3374+ 0))
3375+#define stbir__simdi_convert_i32_to_float(out, ireg) \
3376+ (out) = vcvtq_f32_s32(vreinterpretq_s32_u32(ireg))
3377+#define stbir__simdf_add(out, reg0, reg1) (out) = vaddq_f32(reg0, reg1)
3378+#define stbir__simdf_mult(out, reg0, reg1) (out) = vmulq_f32(reg0, reg1)
3379+#define stbir__simdf_mult_mem(out, reg, ptr) \
3380+ (out) = vmulq_f32(reg, vld1q_f32((float const *)(ptr)))
3381+#define stbir__simdf_mult1_mem(out, reg, ptr) \
3382+ (out) = vmulq_f32(reg, vld1q_dup_f32((float const *)(ptr)))
3383+#define stbir__simdf_add_mem(out, reg, ptr) \
3384+ (out) = vaddq_f32(reg, vld1q_f32((float const *)(ptr)))
3385+#define stbir__simdf_add1_mem(out, reg, ptr) \
3386+ (out) = vaddq_f32(reg, vld1q_dup_f32((float const *)(ptr)))
3387+
3388+#ifdef STBIR_USE_FMA // not on by default to maintain bit identical simd to
3389+ // non-simd (and also x64 no madd to arm madd)
3390+#define stbir__simdf_madd(out, add, mul1, mul2) \
3391+ (out) = vfmaq_f32(add, mul1, mul2)
3392+#define stbir__simdf_madd1(out, add, mul1, mul2) \
3393+ (out) = vfmaq_f32(add, mul1, mul2)
3394+#define stbir__simdf_madd_mem(out, add, mul, ptr) \
3395+ (out) = vfmaq_f32(add, mul, vld1q_f32((float const *)(ptr)))
3396+#define stbir__simdf_madd1_mem(out, add, mul, ptr) \
3397+ (out) = vfmaq_f32(add, mul, vld1q_dup_f32((float const *)(ptr)))
3398+#else
3399+#define stbir__simdf_madd(out, add, mul1, mul2) \
3400+ (out) = vaddq_f32(add, vmulq_f32(mul1, mul2))
3401+#define stbir__simdf_madd1(out, add, mul1, mul2) \
3402+ (out) = vaddq_f32(add, vmulq_f32(mul1, mul2))
3403+#define stbir__simdf_madd_mem(out, add, mul, ptr) \
3404+ (out) = vaddq_f32(add, vmulq_f32(mul, vld1q_f32((float const *)(ptr))))
3405+#define stbir__simdf_madd1_mem(out, add, mul, ptr) \
3406+ (out) = vaddq_f32(add, vmulq_f32(mul, vld1q_dup_f32((float const *)(ptr))))
3407+#endif
3408+
3409+#define stbir__simdf_add1(out, reg0, reg1) (out) = vaddq_f32(reg0, reg1)
3410+#define stbir__simdf_mult1(out, reg0, reg1) (out) = vmulq_f32(reg0, reg1)
3411+
3412+#define stbir__simdf_and(out, reg0, reg1) \
3413+ (out) = vreinterpretq_f32_u32( \
3414+ vandq_u32(vreinterpretq_u32_f32(reg0), vreinterpretq_u32_f32(reg1)))
3415+#define stbir__simdf_or(out, reg0, reg1) \
3416+ (out) = vreinterpretq_f32_u32( \
3417+ vorrq_u32(vreinterpretq_u32_f32(reg0), vreinterpretq_u32_f32(reg1)))
3418+
3419+#define stbir__simdf_min(out, reg0, reg1) (out) = vminq_f32(reg0, reg1)
3420+#define stbir__simdf_max(out, reg0, reg1) (out) = vmaxq_f32(reg0, reg1)
3421+#define stbir__simdf_min1(out, reg0, reg1) (out) = vminq_f32(reg0, reg1)
3422+#define stbir__simdf_max1(out, reg0, reg1) (out) = vmaxq_f32(reg0, reg1)
3423+
3424+#define stbir__simdf_0123ABCDto3ABx(out, reg0, reg1) \
3425+ (out) = vextq_f32(reg0, reg1, 3)
3426+#define stbir__simdf_0123ABCDto23Ax(out, reg0, reg1) \
3427+ (out) = vextq_f32(reg0, reg1, 2)
3428+
3429+#define stbir__simdf_a1a1(out, alp, ones) \
3430+ (out) = vzipq_f32(vuzpq_f32(alp, alp).val[1], ones).val[0]
3431+#define stbir__simdf_1a1a(out, alp, ones) \
3432+ (out) = vzipq_f32(ones, vuzpq_f32(alp, alp).val[0]).val[0]
3433+
3434+#if defined(_M_ARM64) || defined(__aarch64__) || defined(__arm64__)
3435+
3436+#define stbir__simdf_aaa1(out, alp, ones) \
3437+ (out) = vcopyq_laneq_f32(vdupq_n_f32(vgetq_lane_f32(alp, 3)), 3, ones, 3)
3438+#define stbir__simdf_1aaa(out, alp, ones) \
3439+ (out) = vcopyq_laneq_f32(vdupq_n_f32(vgetq_lane_f32(alp, 0)), 0, ones, 0)
3440+
3441+#if defined(_MSC_VER) && !defined(__clang__)
3442+#define stbir_make16(a, b, c, d) \
3443+ vcombine_u8( \
3444+ vcreate_u8((4 * a + 0) | ((4 * a + 1) << 8) | ((4 * a + 2) << 16) | \
3445+ ((4 * a + 3) << 24) | ((stbir_uint64)(4 * b + 0) << 32) | \
3446+ ((stbir_uint64)(4 * b + 1) << 40) | \
3447+ ((stbir_uint64)(4 * b + 2) << 48) | \
3448+ ((stbir_uint64)(4 * b + 3) << 56)), \
3449+ vcreate_u8((4 * c + 0) | ((4 * c + 1) << 8) | ((4 * c + 2) << 16) | \
3450+ ((4 * c + 3) << 24) | ((stbir_uint64)(4 * d + 0) << 32) | \
3451+ ((stbir_uint64)(4 * d + 1) << 40) | \
3452+ ((stbir_uint64)(4 * d + 2) << 48) | \
3453+ ((stbir_uint64)(4 * d + 3) << 56)))
3454+
3455+static stbir__inline uint8x16x2_t
3456+stbir_make16x2(float32x4_t rega, float32x4_t regb)
3457+{
3458+ uint8x16x2_t r = {vreinterpretq_u8_f32(rega), vreinterpretq_u8_f32(regb)};
3459+ return r;
3460+}
3461+#else
3462+#define stbir_make16(a, b, c, d) \
3463+ (uint8x16_t){4 * a + 0, 4 * a + 1, 4 * a + 2, 4 * a + 3, \
3464+ 4 * b + 0, 4 * b + 1, 4 * b + 2, 4 * b + 3, \
3465+ 4 * c + 0, 4 * c + 1, 4 * c + 2, 4 * c + 3, \
3466+ 4 * d + 0, 4 * d + 1, 4 * d + 2, 4 * d + 3}
3467+#define stbir_make16x2(a, b) \
3468+ (uint8x16x2_t) \
3469+ { \
3470+ { \
3471+ vreinterpretq_u8_f32(a), vreinterpretq_u8_f32(b) \
3472+ } \
3473+ }
3474+#endif
3475+
3476+#define stbir__simdf_swiz(reg, one, two, three, four) \
3477+ vreinterpretq_f32_u8(vqtbl1q_u8(vreinterpretq_u8_f32(reg), \
3478+ stbir_make16(one, two, three, four)))
3479+#define stbir__simdf_swiz2(rega, regb, one, two, three, four) \
3480+ vreinterpretq_f32_u8(vqtbl2q_u8(stbir_make16x2(rega, regb), \
3481+ stbir_make16(one, two, three, four)))
3482+
3483+#define stbir__simdi_16madd(out, reg0, reg1) \
3484+ { \
3485+ int16x8_t r0 = vreinterpretq_s16_u32(reg0); \
3486+ int16x8_t r1 = vreinterpretq_s16_u32(reg1); \
3487+ int32x4_t tmp0 = vmull_s16(vget_low_s16(r0), vget_low_s16(r1)); \
3488+ int32x4_t tmp1 = vmull_s16(vget_high_s16(r0), vget_high_s16(r1)); \
3489+ (out) = vreinterpretq_u32_s32(vpaddq_s32(tmp0, tmp1)); \
3490+ }
3491+
3492+#else
3493+
3494+#define stbir__simdf_aaa1(out, alp, ones) \
3495+ (out) = vsetq_lane_f32(1.0f, vdupq_n_f32(vgetq_lane_f32(alp, 3)), 3)
3496+#define stbir__simdf_1aaa(out, alp, ones) \
3497+ (out) = vsetq_lane_f32(1.0f, vdupq_n_f32(vgetq_lane_f32(alp, 0)), 0)
3498+
3499+#if defined(_MSC_VER) && !defined(__clang__)
3500+static stbir__inline uint8x8x2_t
3501+stbir_make8x2(float32x4_t reg)
3502+{
3503+ uint8x8x2_t r = {{vget_low_u8(vreinterpretq_u8_f32(reg)),
3504+ vget_high_u8(vreinterpretq_u8_f32(reg))}};
3505+ return r;
3506+}
3507+#define stbir_make8(a, b) \
3508+ vcreate_u8((4 * a + 0) | ((4 * a + 1) << 8) | ((4 * a + 2) << 16) | \
3509+ ((4 * a + 3) << 24) | ((stbir_uint64)(4 * b + 0) << 32) | \
3510+ ((stbir_uint64)(4 * b + 1) << 40) | \
3511+ ((stbir_uint64)(4 * b + 2) << 48) | \
3512+ ((stbir_uint64)(4 * b + 3) << 56))
3513+#else
3514+#define stbir_make8x2(reg) \
3515+ (uint8x8x2_t) \
3516+ { \
3517+ { \
3518+ vget_low_u8(vreinterpretq_u8_f32(reg)), \
3519+ vget_high_u8(vreinterpretq_u8_f32(reg)) \
3520+ } \
3521+ }
3522+#define stbir_make8(a, b) \
3523+ (uint8x8_t){4 * a + 0, 4 * a + 1, 4 * a + 2, 4 * a + 3, \
3524+ 4 * b + 0, 4 * b + 1, 4 * b + 2, 4 * b + 3}
3525+#endif
3526+
3527+#define stbir__simdf_swiz(reg, one, two, three, four) \
3528+ vreinterpretq_f32_u8( \
3529+ vcombine_u8(vtbl2_u8(stbir_make8x2(reg), stbir_make8(one, two)), \
3530+ vtbl2_u8(stbir_make8x2(reg), stbir_make8(three, four))))
3531+
3532+#define stbir__simdi_16madd(out, reg0, reg1) \
3533+ { \
3534+ int16x8_t r0 = vreinterpretq_s16_u32(reg0); \
3535+ int16x8_t r1 = vreinterpretq_s16_u32(reg1); \
3536+ int32x4_t tmp0 = vmull_s16(vget_low_s16(r0), vget_low_s16(r1)); \
3537+ int32x4_t tmp1 = vmull_s16(vget_high_s16(r0), vget_high_s16(r1)); \
3538+ int32x2_t out0 = vpadd_s32(vget_low_s32(tmp0), vget_high_s32(tmp0)); \
3539+ int32x2_t out1 = vpadd_s32(vget_low_s32(tmp1), vget_high_s32(tmp1)); \
3540+ (out) = vreinterpretq_u32_s32(vcombine_s32(out0, out1)); \
3541+ }
3542+
3543+#endif
3544+
3545+#define stbir__simdi_and(out, reg0, reg1) (out) = vandq_u32(reg0, reg1)
3546+#define stbir__simdi_or(out, reg0, reg1) (out) = vorrq_u32(reg0, reg1)
3547+
3548+#define stbir__simdf_pack_to_8bytes(out, aa, bb) \
3549+ { \
3550+ float32x4_t af = \
3551+ vmaxq_f32(vminq_f32(aa, STBIR__CONSTF(STBIR_max_uint8_as_float)), \
3552+ vdupq_n_f32(0)); \
3553+ float32x4_t bf = \
3554+ vmaxq_f32(vminq_f32(bb, STBIR__CONSTF(STBIR_max_uint8_as_float)), \
3555+ vdupq_n_f32(0)); \
3556+ int16x4_t ai = vqmovn_s32(vcvtq_s32_f32(af)); \
3557+ int16x4_t bi = vqmovn_s32(vcvtq_s32_f32(bf)); \
3558+ uint8x8_t out8 = vqmovun_s16(vcombine_s16(ai, bi)); \
3559+ out = vreinterpretq_u32_u8(vcombine_u8(out8, out8)); \
3560+ }
3561+
3562+#define stbir__simdf_pack_to_8words(out, aa, bb) \
3563+ { \
3564+ float32x4_t af = \
3565+ vmaxq_f32(vminq_f32(aa, STBIR__CONSTF(STBIR_max_uint16_as_float)), \
3566+ vdupq_n_f32(0)); \
3567+ float32x4_t bf = \
3568+ vmaxq_f32(vminq_f32(bb, STBIR__CONSTF(STBIR_max_uint16_as_float)), \
3569+ vdupq_n_f32(0)); \
3570+ int32x4_t ai = vcvtq_s32_f32(af); \
3571+ int32x4_t bi = vcvtq_s32_f32(bf); \
3572+ out = vreinterpretq_u32_u16( \
3573+ vcombine_u16(vqmovun_s32(ai), vqmovun_s32(bi))); \
3574+ }
3575+
3576+#define stbir__interleave_pack_and_store_16_u8(ptr, r0, r1, r2, r3) \
3577+ { \
3578+ int16x4x2_t tmp0 = vzip_s16(vqmovn_s32(vreinterpretq_s32_u32(r0)), \
3579+ vqmovn_s32(vreinterpretq_s32_u32(r2))); \
3580+ int16x4x2_t tmp1 = vzip_s16(vqmovn_s32(vreinterpretq_s32_u32(r1)), \
3581+ vqmovn_s32(vreinterpretq_s32_u32(r3))); \
3582+ uint8x8x2_t out = {{ \
3583+ vqmovun_s16(vcombine_s16(tmp0.val[0], tmp0.val[1])), \
3584+ vqmovun_s16(vcombine_s16(tmp1.val[0], tmp1.val[1])), \
3585+ }}; \
3586+ vst2_u8(ptr, out); \
3587+ }
3588+
3589+#define stbir__simdf_load4_transposed(o0, o1, o2, o3, ptr) \
3590+ { \
3591+ float32x4x4_t tmp = vld4q_f32(ptr); \
3592+ o0 = tmp.val[0]; \
3593+ o1 = tmp.val[1]; \
3594+ o2 = tmp.val[2]; \
3595+ o3 = tmp.val[3]; \
3596+ }
3597+
3598+#define stbir__simdi_32shr(out, reg, imm) out = vshrq_n_u32(reg, imm)
3599+
3600+#if defined(_MSC_VER) && !defined(__clang__)
3601+#define STBIR__SIMDF_CONST(var, x) \
3602+ __declspec(align(8)) float var[] = {x, x, x, x}
3603+#define STBIR__SIMDI_CONST(var, x) \
3604+ __declspec(align(8)) uint32_t var[] = {x, x, x, x}
3605+#define STBIR__CONSTF(var) (*(const float32x4_t *)var)
3606+#define STBIR__CONSTI(var) (*(const uint32x4_t *)var)
3607+#else
3608+#define STBIR__SIMDF_CONST(var, x) stbir__simdf var = {x, x, x, x}
3609+#define STBIR__SIMDI_CONST(var, x) stbir__simdi var = {x, x, x, x}
3610+#define STBIR__CONSTF(var) (var)
3611+#define STBIR__CONSTI(var) (var)
3612+#endif
3613+
3614+#ifdef STBIR_FLOORF
3615+#undef STBIR_FLOORF
3616+#endif
3617+#define STBIR_FLOORF stbir_simd_floorf
3618+static stbir__inline float
3619+stbir_simd_floorf(float x)
3620+{
3621+#if defined(_M_ARM64) || defined(__aarch64__) || defined(__arm64__)
3622+ return vget_lane_f32(vrndm_f32(vdup_n_f32(x)), 0);
3623+#else
3624+ float32x2_t f = vdup_n_f32(x);
3625+ float32x2_t t = vcvt_f32_s32(vcvt_s32_f32(f));
3626+ uint32x2_t a = vclt_f32(f, t);
3627+ uint32x2_t b = vreinterpret_u32_f32(vdup_n_f32(-1.0f));
3628+ float32x2_t r = vadd_f32(t, vreinterpret_f32_u32(vand_u32(a, b)));
3629+ return vget_lane_f32(r, 0);
3630+#endif
3631+}
3632+
3633+#ifdef STBIR_CEILF
3634+#undef STBIR_CEILF
3635+#endif
3636+#define STBIR_CEILF stbir_simd_ceilf
3637+static stbir__inline float
3638+stbir_simd_ceilf(float x)
3639+{
3640+#if defined(_M_ARM64) || defined(__aarch64__) || defined(__arm64__)
3641+ return vget_lane_f32(vrndp_f32(vdup_n_f32(x)), 0);
3642+#else
3643+ float32x2_t f = vdup_n_f32(x);
3644+ float32x2_t t = vcvt_f32_s32(vcvt_s32_f32(f));
3645+ uint32x2_t a = vclt_f32(t, f);
3646+ uint32x2_t b = vreinterpret_u32_f32(vdup_n_f32(1.0f));
3647+ float32x2_t r = vadd_f32(t, vreinterpret_f32_u32(vand_u32(a, b)));
3648+ return vget_lane_f32(r, 0);
3649+#endif
3650+}
3651+
3652+#define STBIR_SIMD
3653
3654 #elif defined(STBIR_WASM)
3655
3656- #include <wasm_simd128.h>
3657-
3658- #define stbir__simdf v128_t
3659- #define stbir__simdi v128_t
3660-
3661- #define stbir_simdi_castf( reg ) (reg)
3662- #define stbir_simdf_casti( reg ) (reg)
3663-
3664- #define stbir__simdf_load( reg, ptr ) (reg) = wasm_v128_load( (void const*)(ptr) )
3665- #define stbir__simdi_load( reg, ptr ) (reg) = wasm_v128_load( (void const*)(ptr) )
3666- #define stbir__simdf_load1( out, ptr ) (out) = wasm_v128_load32_splat( (void const*)(ptr) ) // top values can be random (not denormal or nan for perf)
3667- #define stbir__simdi_load1( out, ptr ) (out) = wasm_v128_load32_splat( (void const*)(ptr) )
3668- #define stbir__simdf_load1z( out, ptr ) (out) = wasm_v128_load32_zero( (void const*)(ptr) ) // top values must be zero
3669- #define stbir__simdf_frep4( fvar ) wasm_f32x4_splat( fvar )
3670- #define stbir__simdf_load1frep4( out, fvar ) (out) = wasm_f32x4_splat( fvar )
3671- #define stbir__simdf_load2( out, ptr ) (out) = wasm_v128_load64_splat( (void const*)(ptr) ) // top values can be random (not denormal or nan for perf)
3672- #define stbir__simdf_load2z( out, ptr ) (out) = wasm_v128_load64_zero( (void const*)(ptr) ) // top values must be zero
3673- #define stbir__simdf_load2hmerge( out, reg, ptr ) (out) = wasm_v128_load64_lane( (void const*)(ptr), reg, 1 )
3674-
3675- #define stbir__simdf_zeroP() wasm_f32x4_const_splat(0)
3676- #define stbir__simdf_zero( reg ) (reg) = wasm_f32x4_const_splat(0)
3677-
3678- #define stbir__simdf_store( ptr, reg ) wasm_v128_store( (void*)(ptr), reg )
3679- #define stbir__simdf_store1( ptr, reg ) wasm_v128_store32_lane( (void*)(ptr), reg, 0 )
3680- #define stbir__simdf_store2( ptr, reg ) wasm_v128_store64_lane( (void*)(ptr), reg, 0 )
3681- #define stbir__simdf_store2h( ptr, reg ) wasm_v128_store64_lane( (void*)(ptr), reg, 1 )
3682-
3683- #define stbir__simdi_store( ptr, reg ) wasm_v128_store( (void*)(ptr), reg )
3684- #define stbir__simdi_store1( ptr, reg ) wasm_v128_store32_lane( (void*)(ptr), reg, 0 )
3685- #define stbir__simdi_store2( ptr, reg ) wasm_v128_store64_lane( (void*)(ptr), reg, 0 )
3686-
3687- #define stbir__prefetch( ptr )
3688-
3689- #define stbir__simdi_expand_u8_to_u32(out0,out1,out2,out3,ireg) \
3690- { \
3691- v128_t l = wasm_u16x8_extend_low_u8x16 ( ireg ); \
3692- v128_t h = wasm_u16x8_extend_high_u8x16( ireg ); \
3693- out0 = wasm_u32x4_extend_low_u16x8 ( l ); \
3694- out1 = wasm_u32x4_extend_high_u16x8( l ); \
3695- out2 = wasm_u32x4_extend_low_u16x8 ( h ); \
3696- out3 = wasm_u32x4_extend_high_u16x8( h ); \
3697- }
3698-
3699- #define stbir__simdi_expand_u8_to_1u32(out,ireg) \
3700- { \
3701- v128_t tmp = wasm_u16x8_extend_low_u8x16(ireg); \
3702- out = wasm_u32x4_extend_low_u16x8(tmp); \
3703- }
3704-
3705- #define stbir__simdi_expand_u16_to_u32(out0,out1,ireg) \
3706- { \
3707- out0 = wasm_u32x4_extend_low_u16x8 ( ireg ); \
3708- out1 = wasm_u32x4_extend_high_u16x8( ireg ); \
3709- }
3710-
3711- #define stbir__simdf_convert_float_to_i32( i, f ) (i) = wasm_i32x4_trunc_sat_f32x4(f)
3712- #define stbir__simdf_convert_float_to_int( f ) wasm_i32x4_extract_lane(wasm_i32x4_trunc_sat_f32x4(f), 0)
3713- #define stbir__simdi_to_int( i ) wasm_i32x4_extract_lane(i, 0)
3714- #define stbir__simdf_convert_float_to_uint8( f ) ((unsigned char)wasm_i32x4_extract_lane(wasm_i32x4_trunc_sat_f32x4(wasm_f32x4_max(wasm_f32x4_min(f,STBIR_max_uint8_as_float),wasm_f32x4_const_splat(0))), 0))
3715- #define stbir__simdf_convert_float_to_short( f ) ((unsigned short)wasm_i32x4_extract_lane(wasm_i32x4_trunc_sat_f32x4(wasm_f32x4_max(wasm_f32x4_min(f,STBIR_max_uint16_as_float),wasm_f32x4_const_splat(0))), 0))
3716- #define stbir__simdi_convert_i32_to_float(out, ireg) (out) = wasm_f32x4_convert_i32x4(ireg)
3717- #define stbir__simdf_add( out, reg0, reg1 ) (out) = wasm_f32x4_add( reg0, reg1 )
3718- #define stbir__simdf_mult( out, reg0, reg1 ) (out) = wasm_f32x4_mul( reg0, reg1 )
3719- #define stbir__simdf_mult_mem( out, reg, ptr ) (out) = wasm_f32x4_mul( reg, wasm_v128_load( (void const*)(ptr) ) )
3720- #define stbir__simdf_mult1_mem( out, reg, ptr ) (out) = wasm_f32x4_mul( reg, wasm_v128_load32_splat( (void const*)(ptr) ) )
3721- #define stbir__simdf_add_mem( out, reg, ptr ) (out) = wasm_f32x4_add( reg, wasm_v128_load( (void const*)(ptr) ) )
3722- #define stbir__simdf_add1_mem( out, reg, ptr ) (out) = wasm_f32x4_add( reg, wasm_v128_load32_splat( (void const*)(ptr) ) )
3723-
3724- #define stbir__simdf_madd( out, add, mul1, mul2 ) (out) = wasm_f32x4_add( add, wasm_f32x4_mul( mul1, mul2 ) )
3725- #define stbir__simdf_madd1( out, add, mul1, mul2 ) (out) = wasm_f32x4_add( add, wasm_f32x4_mul( mul1, mul2 ) )
3726- #define stbir__simdf_madd_mem( out, add, mul, ptr ) (out) = wasm_f32x4_add( add, wasm_f32x4_mul( mul, wasm_v128_load( (void const*)(ptr) ) ) )
3727- #define stbir__simdf_madd1_mem( out, add, mul, ptr ) (out) = wasm_f32x4_add( add, wasm_f32x4_mul( mul, wasm_v128_load32_splat( (void const*)(ptr) ) ) )
3728-
3729- #define stbir__simdf_add1( out, reg0, reg1 ) (out) = wasm_f32x4_add( reg0, reg1 )
3730- #define stbir__simdf_mult1( out, reg0, reg1 ) (out) = wasm_f32x4_mul( reg0, reg1 )
3731-
3732- #define stbir__simdf_and( out, reg0, reg1 ) (out) = wasm_v128_and( reg0, reg1 )
3733- #define stbir__simdf_or( out, reg0, reg1 ) (out) = wasm_v128_or( reg0, reg1 )
3734-
3735- #define stbir__simdf_min( out, reg0, reg1 ) (out) = wasm_f32x4_min( reg0, reg1 )
3736- #define stbir__simdf_max( out, reg0, reg1 ) (out) = wasm_f32x4_max( reg0, reg1 )
3737- #define stbir__simdf_min1( out, reg0, reg1 ) (out) = wasm_f32x4_min( reg0, reg1 )
3738- #define stbir__simdf_max1( out, reg0, reg1 ) (out) = wasm_f32x4_max( reg0, reg1 )
3739-
3740- #define stbir__simdf_0123ABCDto3ABx( out, reg0, reg1 ) (out) = wasm_i32x4_shuffle( reg0, reg1, 3, 4, 5, -1 )
3741- #define stbir__simdf_0123ABCDto23Ax( out, reg0, reg1 ) (out) = wasm_i32x4_shuffle( reg0, reg1, 2, 3, 4, -1 )
3742-
3743- #define stbir__simdf_aaa1(out,alp,ones) (out) = wasm_i32x4_shuffle(alp, ones, 3, 3, 3, 4)
3744- #define stbir__simdf_1aaa(out,alp,ones) (out) = wasm_i32x4_shuffle(alp, ones, 4, 0, 0, 0)
3745- #define stbir__simdf_a1a1(out,alp,ones) (out) = wasm_i32x4_shuffle(alp, ones, 1, 4, 3, 4)
3746- #define stbir__simdf_1a1a(out,alp,ones) (out) = wasm_i32x4_shuffle(alp, ones, 4, 0, 4, 2)
3747-
3748- #define stbir__simdf_swiz( reg, one, two, three, four ) wasm_i32x4_shuffle(reg, reg, one, two, three, four)
3749-
3750- #define stbir__simdi_and( out, reg0, reg1 ) (out) = wasm_v128_and( reg0, reg1 )
3751- #define stbir__simdi_or( out, reg0, reg1 ) (out) = wasm_v128_or( reg0, reg1 )
3752- #define stbir__simdi_16madd( out, reg0, reg1 ) (out) = wasm_i32x4_dot_i16x8( reg0, reg1 )
3753-
3754- #define stbir__simdf_pack_to_8bytes(out,aa,bb) \
3755- { \
3756- v128_t af = wasm_f32x4_max( wasm_f32x4_min(aa, STBIR_max_uint8_as_float), wasm_f32x4_const_splat(0) ); \
3757- v128_t bf = wasm_f32x4_max( wasm_f32x4_min(bb, STBIR_max_uint8_as_float), wasm_f32x4_const_splat(0) ); \
3758- v128_t ai = wasm_i32x4_trunc_sat_f32x4( af ); \
3759- v128_t bi = wasm_i32x4_trunc_sat_f32x4( bf ); \
3760- v128_t out16 = wasm_i16x8_narrow_i32x4( ai, bi ); \
3761- out = wasm_u8x16_narrow_i16x8( out16, out16 ); \
3762- }
3763-
3764- #define stbir__simdf_pack_to_8words(out,aa,bb) \
3765- { \
3766- v128_t af = wasm_f32x4_max( wasm_f32x4_min(aa, STBIR_max_uint16_as_float), wasm_f32x4_const_splat(0)); \
3767- v128_t bf = wasm_f32x4_max( wasm_f32x4_min(bb, STBIR_max_uint16_as_float), wasm_f32x4_const_splat(0)); \
3768- v128_t ai = wasm_i32x4_trunc_sat_f32x4( af ); \
3769- v128_t bi = wasm_i32x4_trunc_sat_f32x4( bf ); \
3770- out = wasm_u16x8_narrow_i32x4( ai, bi ); \
3771- }
3772-
3773- #define stbir__interleave_pack_and_store_16_u8( ptr, r0, r1, r2, r3 ) \
3774- { \
3775- v128_t tmp0 = wasm_i16x8_narrow_i32x4(r0, r1); \
3776- v128_t tmp1 = wasm_i16x8_narrow_i32x4(r2, r3); \
3777- v128_t tmp = wasm_u8x16_narrow_i16x8(tmp0, tmp1); \
3778- tmp = wasm_i8x16_shuffle(tmp, tmp, 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15); \
3779- wasm_v128_store( (void*)(ptr), tmp); \
3780- }
3781-
3782- #define stbir__simdf_load4_transposed( o0, o1, o2, o3, ptr ) \
3783- { \
3784- v128_t t0 = wasm_v128_load( ptr ); \
3785- v128_t t1 = wasm_v128_load( ptr+4 ); \
3786- v128_t t2 = wasm_v128_load( ptr+8 ); \
3787- v128_t t3 = wasm_v128_load( ptr+12 ); \
3788- v128_t s0 = wasm_i32x4_shuffle(t0, t1, 0, 4, 2, 6); \
3789- v128_t s1 = wasm_i32x4_shuffle(t0, t1, 1, 5, 3, 7); \
3790- v128_t s2 = wasm_i32x4_shuffle(t2, t3, 0, 4, 2, 6); \
3791- v128_t s3 = wasm_i32x4_shuffle(t2, t3, 1, 5, 3, 7); \
3792- o0 = wasm_i32x4_shuffle(s0, s2, 0, 1, 4, 5); \
3793- o1 = wasm_i32x4_shuffle(s1, s3, 0, 1, 4, 5); \
3794- o2 = wasm_i32x4_shuffle(s0, s2, 2, 3, 6, 7); \
3795- o3 = wasm_i32x4_shuffle(s1, s3, 2, 3, 6, 7); \
3796- }
3797-
3798- #define stbir__simdi_32shr( out, reg, imm ) out = wasm_u32x4_shr( reg, imm )
3799-
3800- typedef float stbir__f32x4 __attribute__((__vector_size__(16), __aligned__(16)));
3801- #define STBIR__SIMDF_CONST(var, x) stbir__simdf var = (v128_t)(stbir__f32x4){ x, x, x, x }
3802- #define STBIR__SIMDI_CONST(var, x) stbir__simdi var = { x, x, x, x }
3803- #define STBIR__CONSTF(var) (var)
3804- #define STBIR__CONSTI(var) (var)
3805-
3806- #ifdef STBIR_FLOORF
3807- #undef STBIR_FLOORF
3808- #endif
3809- #define STBIR_FLOORF stbir_simd_floorf
3810- static stbir__inline float stbir_simd_floorf(float x)
3811- {
3812- return wasm_f32x4_extract_lane( wasm_f32x4_floor( wasm_f32x4_splat(x) ), 0);
3813- }
3814-
3815- #ifdef STBIR_CEILF
3816- #undef STBIR_CEILF
3817- #endif
3818- #define STBIR_CEILF stbir_simd_ceilf
3819- static stbir__inline float stbir_simd_ceilf(float x)
3820- {
3821- return wasm_f32x4_extract_lane( wasm_f32x4_ceil( wasm_f32x4_splat(x) ), 0);
3822- }
3823-
3824- #define STBIR_SIMD
3825-
3826-#endif // SSE2/NEON/WASM
3827+#include <wasm_simd128.h>
3828+
3829+#define stbir__simdf v128_t
3830+#define stbir__simdi v128_t
3831+
3832+#define stbir_simdi_castf(reg) (reg)
3833+#define stbir_simdf_casti(reg) (reg)
3834+
3835+#define stbir__simdf_load(reg, ptr) (reg) = wasm_v128_load((void const *)(ptr))
3836+#define stbir__simdi_load(reg, ptr) (reg) = wasm_v128_load((void const *)(ptr))
3837+#define stbir__simdf_load1(out, ptr) \
3838+ (out) = wasm_v128_load32_splat( \
3839+ (void const *)(ptr)) // top values can be random (not denormal or nan
3840+ // for perf)
3841+#define stbir__simdi_load1(out, ptr) \
3842+ (out) = wasm_v128_load32_splat((void const *)(ptr))
3843+#define stbir__simdf_load1z(out, ptr) \
3844+ (out) = \
3845+ wasm_v128_load32_zero((void const *)(ptr)) // top values must be zero
3846+#define stbir__simdf_frep4(fvar) wasm_f32x4_splat(fvar)
3847+#define stbir__simdf_load1frep4(out, fvar) (out) = wasm_f32x4_splat(fvar)
3848+#define stbir__simdf_load2(out, ptr) \
3849+ (out) = wasm_v128_load64_splat( \
3850+ (void const *)(ptr)) // top values can be random (not denormal or nan
3851+ // for perf)
3852+#define stbir__simdf_load2z(out, ptr) \
3853+ (out) = \
3854+ wasm_v128_load64_zero((void const *)(ptr)) // top values must be zero
3855+#define stbir__simdf_load2hmerge(out, reg, ptr) \
3856+ (out) = wasm_v128_load64_lane((void const *)(ptr), reg, 1)
3857+
3858+#define stbir__simdf_zeroP() wasm_f32x4_const_splat(0)
3859+#define stbir__simdf_zero(reg) (reg) = wasm_f32x4_const_splat(0)
3860+
3861+#define stbir__simdf_store(ptr, reg) wasm_v128_store((void *)(ptr), reg)
3862+#define stbir__simdf_store1(ptr, reg) \
3863+ wasm_v128_store32_lane((void *)(ptr), reg, 0)
3864+#define stbir__simdf_store2(ptr, reg) \
3865+ wasm_v128_store64_lane((void *)(ptr), reg, 0)
3866+#define stbir__simdf_store2h(ptr, reg) \
3867+ wasm_v128_store64_lane((void *)(ptr), reg, 1)
3868+
3869+#define stbir__simdi_store(ptr, reg) wasm_v128_store((void *)(ptr), reg)
3870+#define stbir__simdi_store1(ptr, reg) \
3871+ wasm_v128_store32_lane((void *)(ptr), reg, 0)
3872+#define stbir__simdi_store2(ptr, reg) \
3873+ wasm_v128_store64_lane((void *)(ptr), reg, 0)
3874+
3875+#define stbir__prefetch(ptr)
3876+
3877+#define stbir__simdi_expand_u8_to_u32(out0, out1, out2, out3, ireg) \
3878+ { \
3879+ v128_t l = wasm_u16x8_extend_low_u8x16(ireg); \
3880+ v128_t h = wasm_u16x8_extend_high_u8x16(ireg); \
3881+ out0 = wasm_u32x4_extend_low_u16x8(l); \
3882+ out1 = wasm_u32x4_extend_high_u16x8(l); \
3883+ out2 = wasm_u32x4_extend_low_u16x8(h); \
3884+ out3 = wasm_u32x4_extend_high_u16x8(h); \
3885+ }
3886+
3887+#define stbir__simdi_expand_u8_to_1u32(out, ireg) \
3888+ { \
3889+ v128_t tmp = wasm_u16x8_extend_low_u8x16(ireg); \
3890+ out = wasm_u32x4_extend_low_u16x8(tmp); \
3891+ }
3892+
3893+#define stbir__simdi_expand_u16_to_u32(out0, out1, ireg) \
3894+ { \
3895+ out0 = wasm_u32x4_extend_low_u16x8(ireg); \
3896+ out1 = wasm_u32x4_extend_high_u16x8(ireg); \
3897+ }
3898+
3899+#define stbir__simdf_convert_float_to_i32(i, f) \
3900+ (i) = wasm_i32x4_trunc_sat_f32x4(f)
3901+#define stbir__simdf_convert_float_to_int(f) \
3902+ wasm_i32x4_extract_lane(wasm_i32x4_trunc_sat_f32x4(f), 0)
3903+#define stbir__simdi_to_int(i) wasm_i32x4_extract_lane(i, 0)
3904+#define stbir__simdf_convert_float_to_uint8(f) \
3905+ ((unsigned char)wasm_i32x4_extract_lane( \
3906+ wasm_i32x4_trunc_sat_f32x4( \
3907+ wasm_f32x4_max(wasm_f32x4_min(f, STBIR_max_uint8_as_float), \
3908+ wasm_f32x4_const_splat(0))), \
3909+ 0))
3910+#define stbir__simdf_convert_float_to_short(f) \
3911+ ((unsigned short)wasm_i32x4_extract_lane( \
3912+ wasm_i32x4_trunc_sat_f32x4( \
3913+ wasm_f32x4_max(wasm_f32x4_min(f, STBIR_max_uint16_as_float), \
3914+ wasm_f32x4_const_splat(0))), \
3915+ 0))
3916+#define stbir__simdi_convert_i32_to_float(out, ireg) \
3917+ (out) = wasm_f32x4_convert_i32x4(ireg)
3918+#define stbir__simdf_add(out, reg0, reg1) (out) = wasm_f32x4_add(reg0, reg1)
3919+#define stbir__simdf_mult(out, reg0, reg1) (out) = wasm_f32x4_mul(reg0, reg1)
3920+#define stbir__simdf_mult_mem(out, reg, ptr) \
3921+ (out) = wasm_f32x4_mul(reg, wasm_v128_load((void const *)(ptr)))
3922+#define stbir__simdf_mult1_mem(out, reg, ptr) \
3923+ (out) = wasm_f32x4_mul(reg, wasm_v128_load32_splat((void const *)(ptr)))
3924+#define stbir__simdf_add_mem(out, reg, ptr) \
3925+ (out) = wasm_f32x4_add(reg, wasm_v128_load((void const *)(ptr)))
3926+#define stbir__simdf_add1_mem(out, reg, ptr) \
3927+ (out) = wasm_f32x4_add(reg, wasm_v128_load32_splat((void const *)(ptr)))
3928+
3929+#define stbir__simdf_madd(out, add, mul1, mul2) \
3930+ (out) = wasm_f32x4_add(add, wasm_f32x4_mul(mul1, mul2))
3931+#define stbir__simdf_madd1(out, add, mul1, mul2) \
3932+ (out) = wasm_f32x4_add(add, wasm_f32x4_mul(mul1, mul2))
3933+#define stbir__simdf_madd_mem(out, add, mul, ptr) \
3934+ (out) = wasm_f32x4_add( \
3935+ add, wasm_f32x4_mul(mul, wasm_v128_load((void const *)(ptr))))
3936+#define stbir__simdf_madd1_mem(out, add, mul, ptr) \
3937+ (out) = wasm_f32x4_add( \
3938+ add, wasm_f32x4_mul(mul, wasm_v128_load32_splat((void const *)(ptr))))
3939+
3940+#define stbir__simdf_add1(out, reg0, reg1) (out) = wasm_f32x4_add(reg0, reg1)
3941+#define stbir__simdf_mult1(out, reg0, reg1) (out) = wasm_f32x4_mul(reg0, reg1)
3942+
3943+#define stbir__simdf_and(out, reg0, reg1) (out) = wasm_v128_and(reg0, reg1)
3944+#define stbir__simdf_or(out, reg0, reg1) (out) = wasm_v128_or(reg0, reg1)
3945+
3946+#define stbir__simdf_min(out, reg0, reg1) (out) = wasm_f32x4_min(reg0, reg1)
3947+#define stbir__simdf_max(out, reg0, reg1) (out) = wasm_f32x4_max(reg0, reg1)
3948+#define stbir__simdf_min1(out, reg0, reg1) (out) = wasm_f32x4_min(reg0, reg1)
3949+#define stbir__simdf_max1(out, reg0, reg1) (out) = wasm_f32x4_max(reg0, reg1)
3950+
3951+#define stbir__simdf_0123ABCDto3ABx(out, reg0, reg1) \
3952+ (out) = wasm_i32x4_shuffle(reg0, reg1, 3, 4, 5, -1)
3953+#define stbir__simdf_0123ABCDto23Ax(out, reg0, reg1) \
3954+ (out) = wasm_i32x4_shuffle(reg0, reg1, 2, 3, 4, -1)
3955+
3956+#define stbir__simdf_aaa1(out, alp, ones) \
3957+ (out) = wasm_i32x4_shuffle(alp, ones, 3, 3, 3, 4)
3958+#define stbir__simdf_1aaa(out, alp, ones) \
3959+ (out) = wasm_i32x4_shuffle(alp, ones, 4, 0, 0, 0)
3960+#define stbir__simdf_a1a1(out, alp, ones) \
3961+ (out) = wasm_i32x4_shuffle(alp, ones, 1, 4, 3, 4)
3962+#define stbir__simdf_1a1a(out, alp, ones) \
3963+ (out) = wasm_i32x4_shuffle(alp, ones, 4, 0, 4, 2)
3964+
3965+#define stbir__simdf_swiz(reg, one, two, three, four) \
3966+ wasm_i32x4_shuffle(reg, reg, one, two, three, four)
3967+
3968+#define stbir__simdi_and(out, reg0, reg1) (out) = wasm_v128_and(reg0, reg1)
3969+#define stbir__simdi_or(out, reg0, reg1) (out) = wasm_v128_or(reg0, reg1)
3970+#define stbir__simdi_16madd(out, reg0, reg1) \
3971+ (out) = wasm_i32x4_dot_i16x8(reg0, reg1)
3972+
3973+#define stbir__simdf_pack_to_8bytes(out, aa, bb) \
3974+ { \
3975+ v128_t af = \
3976+ wasm_f32x4_max(wasm_f32x4_min(aa, STBIR_max_uint8_as_float), \
3977+ wasm_f32x4_const_splat(0)); \
3978+ v128_t bf = \
3979+ wasm_f32x4_max(wasm_f32x4_min(bb, STBIR_max_uint8_as_float), \
3980+ wasm_f32x4_const_splat(0)); \
3981+ v128_t ai = wasm_i32x4_trunc_sat_f32x4(af); \
3982+ v128_t bi = wasm_i32x4_trunc_sat_f32x4(bf); \
3983+ v128_t out16 = wasm_i16x8_narrow_i32x4(ai, bi); \
3984+ out = wasm_u8x16_narrow_i16x8(out16, out16); \
3985+ }
3986+
3987+#define stbir__simdf_pack_to_8words(out, aa, bb) \
3988+ { \
3989+ v128_t af = \
3990+ wasm_f32x4_max(wasm_f32x4_min(aa, STBIR_max_uint16_as_float), \
3991+ wasm_f32x4_const_splat(0)); \
3992+ v128_t bf = \
3993+ wasm_f32x4_max(wasm_f32x4_min(bb, STBIR_max_uint16_as_float), \
3994+ wasm_f32x4_const_splat(0)); \
3995+ v128_t ai = wasm_i32x4_trunc_sat_f32x4(af); \
3996+ v128_t bi = wasm_i32x4_trunc_sat_f32x4(bf); \
3997+ out = wasm_u16x8_narrow_i32x4(ai, bi); \
3998+ }
3999+
4000+#define stbir__interleave_pack_and_store_16_u8(ptr, r0, r1, r2, r3) \
4001+ { \
4002+ v128_t tmp0 = wasm_i16x8_narrow_i32x4(r0, r1); \
4003+ v128_t tmp1 = wasm_i16x8_narrow_i32x4(r2, r3); \
4004+ v128_t tmp = wasm_u8x16_narrow_i16x8(tmp0, tmp1); \
4005+ tmp = wasm_i8x16_shuffle(tmp, tmp, 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, \
4006+ 14, 3, 7, 11, 15); \
4007+ wasm_v128_store((void *)(ptr), tmp); \
4008+ }
4009+
4010+#define stbir__simdf_load4_transposed(o0, o1, o2, o3, ptr) \
4011+ { \
4012+ v128_t t0 = wasm_v128_load(ptr); \
4013+ v128_t t1 = wasm_v128_load(ptr + 4); \
4014+ v128_t t2 = wasm_v128_load(ptr + 8); \
4015+ v128_t t3 = wasm_v128_load(ptr + 12); \
4016+ v128_t s0 = wasm_i32x4_shuffle(t0, t1, 0, 4, 2, 6); \
4017+ v128_t s1 = wasm_i32x4_shuffle(t0, t1, 1, 5, 3, 7); \
4018+ v128_t s2 = wasm_i32x4_shuffle(t2, t3, 0, 4, 2, 6); \
4019+ v128_t s3 = wasm_i32x4_shuffle(t2, t3, 1, 5, 3, 7); \
4020+ o0 = wasm_i32x4_shuffle(s0, s2, 0, 1, 4, 5); \
4021+ o1 = wasm_i32x4_shuffle(s1, s3, 0, 1, 4, 5); \
4022+ o2 = wasm_i32x4_shuffle(s0, s2, 2, 3, 6, 7); \
4023+ o3 = wasm_i32x4_shuffle(s1, s3, 2, 3, 6, 7); \
4024+ }
4025+
4026+#define stbir__simdi_32shr(out, reg, imm) out = wasm_u32x4_shr(reg, imm)
4027+
4028+typedef float stbir__f32x4
4029+ __attribute__((__vector_size__(16), __aligned__(16)));
4030+#define STBIR__SIMDF_CONST(var, x) \
4031+ stbir__simdf var = (v128_t)(stbir__f32x4) { x, x, x, x }
4032+#define STBIR__SIMDI_CONST(var, x) stbir__simdi var = {x, x, x, x}
4033+#define STBIR__CONSTF(var) (var)
4034+#define STBIR__CONSTI(var) (var)
4035+
4036+#ifdef STBIR_FLOORF
4037+#undef STBIR_FLOORF
4038+#endif
4039+#define STBIR_FLOORF stbir_simd_floorf
4040+static stbir__inline float
4041+stbir_simd_floorf(float x)
4042+{
4043+ return wasm_f32x4_extract_lane(wasm_f32x4_floor(wasm_f32x4_splat(x)), 0);
4044+}
4045+
4046+#ifdef STBIR_CEILF
4047+#undef STBIR_CEILF
4048+#endif
4049+#define STBIR_CEILF stbir_simd_ceilf
4050+static stbir__inline float
4051+stbir_simd_ceilf(float x)
4052+{
4053+ return wasm_f32x4_extract_lane(wasm_f32x4_ceil(wasm_f32x4_splat(x)), 0);
4054+}
4055+
4056+#define STBIR_SIMD
4057+
4058+#endif // SSE2/NEON/WASM
4059
4060 #endif // NO SIMD
4061
4062 #ifdef STBIR_SIMD8
4063- #define stbir__simdfX stbir__simdf8
4064- #define stbir__simdiX stbir__simdi8
4065- #define stbir__simdfX_load stbir__simdf8_load
4066- #define stbir__simdiX_load stbir__simdi8_load
4067- #define stbir__simdfX_mult stbir__simdf8_mult
4068- #define stbir__simdfX_add_mem stbir__simdf8_add_mem
4069- #define stbir__simdfX_madd_mem stbir__simdf8_madd_mem
4070- #define stbir__simdfX_store stbir__simdf8_store
4071- #define stbir__simdiX_store stbir__simdi8_store
4072- #define stbir__simdf_frepX stbir__simdf8_frep8
4073- #define stbir__simdfX_madd stbir__simdf8_madd
4074- #define stbir__simdfX_min stbir__simdf8_min
4075- #define stbir__simdfX_max stbir__simdf8_max
4076- #define stbir__simdfX_aaa1 stbir__simdf8_aaa1
4077- #define stbir__simdfX_1aaa stbir__simdf8_1aaa
4078- #define stbir__simdfX_a1a1 stbir__simdf8_a1a1
4079- #define stbir__simdfX_1a1a stbir__simdf8_1a1a
4080- #define stbir__simdfX_convert_float_to_i32 stbir__simdf8_convert_float_to_i32
4081- #define stbir__simdfX_pack_to_words stbir__simdf8_pack_to_16words
4082- #define stbir__simdfX_zero stbir__simdf8_zero
4083- #define STBIR_onesX STBIR_ones8
4084- #define STBIR_max_uint8_as_floatX STBIR_max_uint8_as_float8
4085- #define STBIR_max_uint16_as_floatX STBIR_max_uint16_as_float8
4086- #define STBIR_simd_point5X STBIR_simd_point58
4087- #define stbir__simdfX_float_count 8
4088- #define stbir__simdfX_0123to1230 stbir__simdf8_0123to12301230
4089- #define stbir__simdfX_0123to2103 stbir__simdf8_0123to21032103
4090- static const stbir__simdf8 STBIR_max_uint16_as_float_inverted8 = { stbir__max_uint16_as_float_inverted,stbir__max_uint16_as_float_inverted,stbir__max_uint16_as_float_inverted,stbir__max_uint16_as_float_inverted,stbir__max_uint16_as_float_inverted,stbir__max_uint16_as_float_inverted,stbir__max_uint16_as_float_inverted,stbir__max_uint16_as_float_inverted };
4091- static const stbir__simdf8 STBIR_max_uint8_as_float_inverted8 = { stbir__max_uint8_as_float_inverted,stbir__max_uint8_as_float_inverted,stbir__max_uint8_as_float_inverted,stbir__max_uint8_as_float_inverted,stbir__max_uint8_as_float_inverted,stbir__max_uint8_as_float_inverted,stbir__max_uint8_as_float_inverted,stbir__max_uint8_as_float_inverted };
4092- static const stbir__simdf8 STBIR_ones8 = { 1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0 };
4093- static const stbir__simdf8 STBIR_simd_point58 = { 0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5 };
4094- static const stbir__simdf8 STBIR_max_uint8_as_float8 = { stbir__max_uint8_as_float,stbir__max_uint8_as_float,stbir__max_uint8_as_float,stbir__max_uint8_as_float, stbir__max_uint8_as_float,stbir__max_uint8_as_float,stbir__max_uint8_as_float,stbir__max_uint8_as_float };
4095- static const stbir__simdf8 STBIR_max_uint16_as_float8 = { stbir__max_uint16_as_float,stbir__max_uint16_as_float,stbir__max_uint16_as_float,stbir__max_uint16_as_float, stbir__max_uint16_as_float,stbir__max_uint16_as_float,stbir__max_uint16_as_float,stbir__max_uint16_as_float };
4096+#define stbir__simdfX stbir__simdf8
4097+#define stbir__simdiX stbir__simdi8
4098+#define stbir__simdfX_load stbir__simdf8_load
4099+#define stbir__simdiX_load stbir__simdi8_load
4100+#define stbir__simdfX_mult stbir__simdf8_mult
4101+#define stbir__simdfX_add_mem stbir__simdf8_add_mem
4102+#define stbir__simdfX_madd_mem stbir__simdf8_madd_mem
4103+#define stbir__simdfX_store stbir__simdf8_store
4104+#define stbir__simdiX_store stbir__simdi8_store
4105+#define stbir__simdf_frepX stbir__simdf8_frep8
4106+#define stbir__simdfX_madd stbir__simdf8_madd
4107+#define stbir__simdfX_min stbir__simdf8_min
4108+#define stbir__simdfX_max stbir__simdf8_max
4109+#define stbir__simdfX_aaa1 stbir__simdf8_aaa1
4110+#define stbir__simdfX_1aaa stbir__simdf8_1aaa
4111+#define stbir__simdfX_a1a1 stbir__simdf8_a1a1
4112+#define stbir__simdfX_1a1a stbir__simdf8_1a1a
4113+#define stbir__simdfX_convert_float_to_i32 stbir__simdf8_convert_float_to_i32
4114+#define stbir__simdfX_pack_to_words stbir__simdf8_pack_to_16words
4115+#define stbir__simdfX_zero stbir__simdf8_zero
4116+#define STBIR_onesX STBIR_ones8
4117+#define STBIR_max_uint8_as_floatX STBIR_max_uint8_as_float8
4118+#define STBIR_max_uint16_as_floatX STBIR_max_uint16_as_float8
4119+#define STBIR_simd_point5X STBIR_simd_point58
4120+#define stbir__simdfX_float_count 8
4121+#define stbir__simdfX_0123to1230 stbir__simdf8_0123to12301230
4122+#define stbir__simdfX_0123to2103 stbir__simdf8_0123to21032103
4123+static const stbir__simdf8 STBIR_max_uint16_as_float_inverted8 = {
4124+ stbir__max_uint16_as_float_inverted, stbir__max_uint16_as_float_inverted,
4125+ stbir__max_uint16_as_float_inverted, stbir__max_uint16_as_float_inverted,
4126+ stbir__max_uint16_as_float_inverted, stbir__max_uint16_as_float_inverted,
4127+ stbir__max_uint16_as_float_inverted, stbir__max_uint16_as_float_inverted};
4128+static const stbir__simdf8 STBIR_max_uint8_as_float_inverted8 = {
4129+ stbir__max_uint8_as_float_inverted, stbir__max_uint8_as_float_inverted,
4130+ stbir__max_uint8_as_float_inverted, stbir__max_uint8_as_float_inverted,
4131+ stbir__max_uint8_as_float_inverted, stbir__max_uint8_as_float_inverted,
4132+ stbir__max_uint8_as_float_inverted, stbir__max_uint8_as_float_inverted};
4133+static const stbir__simdf8 STBIR_ones8 = {1.0, 1.0, 1.0, 1.0,
4134+ 1.0, 1.0, 1.0, 1.0};
4135+static const stbir__simdf8 STBIR_simd_point58 = {0.5, 0.5, 0.5, 0.5,
4136+ 0.5, 0.5, 0.5, 0.5};
4137+static const stbir__simdf8 STBIR_max_uint8_as_float8 = {
4138+ stbir__max_uint8_as_float, stbir__max_uint8_as_float,
4139+ stbir__max_uint8_as_float, stbir__max_uint8_as_float,
4140+ stbir__max_uint8_as_float, stbir__max_uint8_as_float,
4141+ stbir__max_uint8_as_float, stbir__max_uint8_as_float};
4142+static const stbir__simdf8 STBIR_max_uint16_as_float8 = {
4143+ stbir__max_uint16_as_float, stbir__max_uint16_as_float,
4144+ stbir__max_uint16_as_float, stbir__max_uint16_as_float,
4145+ stbir__max_uint16_as_float, stbir__max_uint16_as_float,
4146+ stbir__max_uint16_as_float, stbir__max_uint16_as_float};
4147 #else
4148- #define stbir__simdfX stbir__simdf
4149- #define stbir__simdiX stbir__simdi
4150- #define stbir__simdfX_load stbir__simdf_load
4151- #define stbir__simdiX_load stbir__simdi_load
4152- #define stbir__simdfX_mult stbir__simdf_mult
4153- #define stbir__simdfX_add_mem stbir__simdf_add_mem
4154- #define stbir__simdfX_madd_mem stbir__simdf_madd_mem
4155- #define stbir__simdfX_store stbir__simdf_store
4156- #define stbir__simdiX_store stbir__simdi_store
4157- #define stbir__simdf_frepX stbir__simdf_frep4
4158- #define stbir__simdfX_madd stbir__simdf_madd
4159- #define stbir__simdfX_min stbir__simdf_min
4160- #define stbir__simdfX_max stbir__simdf_max
4161- #define stbir__simdfX_aaa1 stbir__simdf_aaa1
4162- #define stbir__simdfX_1aaa stbir__simdf_1aaa
4163- #define stbir__simdfX_a1a1 stbir__simdf_a1a1
4164- #define stbir__simdfX_1a1a stbir__simdf_1a1a
4165- #define stbir__simdfX_convert_float_to_i32 stbir__simdf_convert_float_to_i32
4166- #define stbir__simdfX_pack_to_words stbir__simdf_pack_to_8words
4167- #define stbir__simdfX_zero stbir__simdf_zero
4168- #define STBIR_onesX STBIR__CONSTF(STBIR_ones)
4169- #define STBIR_simd_point5X STBIR__CONSTF(STBIR_simd_point5)
4170- #define STBIR_max_uint8_as_floatX STBIR__CONSTF(STBIR_max_uint8_as_float)
4171- #define STBIR_max_uint16_as_floatX STBIR__CONSTF(STBIR_max_uint16_as_float)
4172- #define stbir__simdfX_float_count 4
4173- #define stbir__if_simdf8_cast_to_simdf4( val ) ( val )
4174- #define stbir__simdfX_0123to1230 stbir__simdf_0123to1230
4175- #define stbir__simdfX_0123to2103 stbir__simdf_0123to2103
4176+#define stbir__simdfX stbir__simdf
4177+#define stbir__simdiX stbir__simdi
4178+#define stbir__simdfX_load stbir__simdf_load
4179+#define stbir__simdiX_load stbir__simdi_load
4180+#define stbir__simdfX_mult stbir__simdf_mult
4181+#define stbir__simdfX_add_mem stbir__simdf_add_mem
4182+#define stbir__simdfX_madd_mem stbir__simdf_madd_mem
4183+#define stbir__simdfX_store stbir__simdf_store
4184+#define stbir__simdiX_store stbir__simdi_store
4185+#define stbir__simdf_frepX stbir__simdf_frep4
4186+#define stbir__simdfX_madd stbir__simdf_madd
4187+#define stbir__simdfX_min stbir__simdf_min
4188+#define stbir__simdfX_max stbir__simdf_max
4189+#define stbir__simdfX_aaa1 stbir__simdf_aaa1
4190+#define stbir__simdfX_1aaa stbir__simdf_1aaa
4191+#define stbir__simdfX_a1a1 stbir__simdf_a1a1
4192+#define stbir__simdfX_1a1a stbir__simdf_1a1a
4193+#define stbir__simdfX_convert_float_to_i32 stbir__simdf_convert_float_to_i32
4194+#define stbir__simdfX_pack_to_words stbir__simdf_pack_to_8words
4195+#define stbir__simdfX_zero stbir__simdf_zero
4196+#define STBIR_onesX STBIR__CONSTF(STBIR_ones)
4197+#define STBIR_simd_point5X STBIR__CONSTF(STBIR_simd_point5)
4198+#define STBIR_max_uint8_as_floatX STBIR__CONSTF(STBIR_max_uint8_as_float)
4199+#define STBIR_max_uint16_as_floatX STBIR__CONSTF(STBIR_max_uint16_as_float)
4200+#define stbir__simdfX_float_count 4
4201+#define stbir__if_simdf8_cast_to_simdf4(val) (val)
4202+#define stbir__simdfX_0123to1230 stbir__simdf_0123to1230
4203+#define stbir__simdfX_0123to2103 stbir__simdf_0123to2103
4204 #endif
4205
4206-
4207 #if defined(STBIR_NEON) && !defined(_M_ARM) && !defined(__arm__)
4208
4209- #if defined( _MSC_VER ) && !defined(__clang__)
4210- typedef __int16 stbir__FP16;
4211- #else
4212- typedef float16_t stbir__FP16;
4213- #endif
4214+#if defined(_MSC_VER) && !defined(__clang__)
4215+typedef __int16 stbir__FP16;
4216+#else
4217+typedef float16_t stbir__FP16;
4218+#endif
4219
4220 #else // no NEON, or 32-bit ARM for MSVC
4221
4222- typedef union stbir__FP16
4223- {
4224- unsigned short u;
4225- } stbir__FP16;
4226-
4227-#endif
4228-
4229-#if (!defined(STBIR_NEON) && !defined(STBIR_FP16C)) || (defined(STBIR_NEON) && defined(_M_ARM)) || (defined(STBIR_NEON) && defined(__arm__))
4230-
4231- // Fabian's half float routines, see: https://gist.github.com/rygorous/2156668
4232-
4233- static stbir__inline float stbir__half_to_float( stbir__FP16 h )
4234- {
4235- static const stbir__FP32 magic = { (254 - 15) << 23 };
4236- static const stbir__FP32 was_infnan = { (127 + 16) << 23 };
4237- stbir__FP32 o;
4238-
4239- o.u = (h.u & 0x7fff) << 13; // exponent/mantissa bits
4240- o.f *= magic.f; // exponent adjust
4241- if (o.f >= was_infnan.f) // make sure Inf/NaN survive
4242- o.u |= 255 << 23;
4243- o.u |= (h.u & 0x8000) << 16; // sign bit
4244- return o.f;
4245- }
4246-
4247- static stbir__inline stbir__FP16 stbir__float_to_half(float val)
4248- {
4249- stbir__FP32 f32infty = { 255 << 23 };
4250- stbir__FP32 f16max = { (127 + 16) << 23 };
4251- stbir__FP32 denorm_magic = { ((127 - 15) + (23 - 10) + 1) << 23 };
4252- unsigned int sign_mask = 0x80000000u;
4253- stbir__FP16 o = { 0 };
4254- stbir__FP32 f;
4255- unsigned int sign;
4256-
4257- f.f = val;
4258- sign = f.u & sign_mask;
4259- f.u ^= sign;
4260-
4261- if (f.u >= f16max.u) // result is Inf or NaN (all exponent bits set)
4262- o.u = (f.u > f32infty.u) ? 0x7e00 : 0x7c00; // NaN->qNaN and Inf->Inf
4263- else // (De)normalized number or zero
4264- {
4265- if (f.u < (113 << 23)) // resulting FP16 is subnormal or zero
4266- {
4267- // use a magic value to align our 10 mantissa bits at the bottom of
4268- // the float. as long as FP addition is round-to-nearest-even this
4269- // just works.
4270- f.f += denorm_magic.f;
4271- // and one integer subtract of the bias later, we have our final float!
4272- o.u = (unsigned short) ( f.u - denorm_magic.u );
4273- }
4274- else
4275- {
4276- unsigned int mant_odd = (f.u >> 13) & 1; // resulting mantissa is odd
4277- // update exponent, rounding bias part 1
4278- f.u = f.u + ((15u - 127) << 23) + 0xfff;
4279- // rounding bias part 2
4280- f.u += mant_odd;
4281- // take the bits!
4282- o.u = (unsigned short) ( f.u >> 13 );
4283- }
4284- }
4285-
4286- o.u |= sign >> 16;
4287- return o;
4288- }
4289+typedef union stbir__FP16 {
4290+ unsigned short u;
4291+} stbir__FP16;
4292
4293 #endif
4294
4295+#if (!defined(STBIR_NEON) && !defined(STBIR_FP16C)) || \
4296+ (defined(STBIR_NEON) && defined(_M_ARM)) || \
4297+ (defined(STBIR_NEON) && defined(__arm__))
4298+
4299+// Fabian's half float routines, see: https://gist.github.com/rygorous/2156668
4300+
4301+static stbir__inline float
4302+stbir__half_to_float(stbir__FP16 h)
4303+{
4304+ static const stbir__FP32 magic = {(254 - 15) << 23};
4305+ static const stbir__FP32 was_infnan = {(127 + 16) << 23};
4306+ stbir__FP32 o;
4307+
4308+ o.u = (h.u & 0x7fff) << 13; // exponent/mantissa bits
4309+ o.f *= magic.f; // exponent adjust
4310+ if (o.f >= was_infnan.f) { // make sure Inf/NaN survive
4311+ o.u |= 255 << 23;
4312+ }
4313+ o.u |= (h.u & 0x8000) << 16; // sign bit
4314+ return o.f;
4315+}
4316+
4317+static stbir__inline stbir__FP16
4318+stbir__float_to_half(float val)
4319+{
4320+ stbir__FP32 f32infty = {255 << 23};
4321+ stbir__FP32 f16max = {(127 + 16) << 23};
4322+ stbir__FP32 denorm_magic = {((127 - 15) + (23 - 10) + 1) << 23};
4323+ unsigned int sign_mask = 0x80000000u;
4324+ stbir__FP16 o = {0};
4325+ stbir__FP32 f;
4326+ unsigned int sign;
4327+
4328+ f.f = val;
4329+ sign = f.u & sign_mask;
4330+ f.u ^= sign;
4331+
4332+ if (f.u >= f16max.u) { // result is Inf or NaN (all exponent bits set)
4333+ o.u = (f.u > f32infty.u) ? 0x7e00 : 0x7c00; // NaN->qNaN and Inf->Inf
4334+ } else // (De)normalized number or zero
4335+ {
4336+ if (f.u < (113 << 23)) // resulting FP16 is subnormal or zero
4337+ {
4338+ // use a magic value to align our 10 mantissa bits at the bottom of
4339+ // the float. as long as FP addition is round-to-nearest-even this
4340+ // just works.
4341+ f.f += denorm_magic.f;
4342+ // and one integer subtract of the bias later, we have our final
4343+ // float!
4344+ o.u = (unsigned short)(f.u - denorm_magic.u);
4345+ } else {
4346+ unsigned int mant_odd =
4347+ (f.u >> 13) & 1; // resulting mantissa is odd
4348+ // update exponent, rounding bias part 1
4349+ f.u = f.u + ((15u - 127) << 23) + 0xfff;
4350+ // rounding bias part 2
4351+ f.u += mant_odd;
4352+ // take the bits!
4353+ o.u = (unsigned short)(f.u >> 13);
4354+ }
4355+ }
4356+
4357+ o.u |= sign >> 16;
4358+ return o;
4359+}
4360+
4361+#endif
4362
4363 #if defined(STBIR_FP16C)
4364
4365- #include <immintrin.h>
4366+#include <immintrin.h>
4367
4368- static stbir__inline void stbir__half_to_float_SIMD(float * output, stbir__FP16 const * input)
4369- {
4370- _mm256_storeu_ps( (float*)output, _mm256_cvtph_ps( _mm_loadu_si128( (__m128i const* )input ) ) );
4371- }
4372+static stbir__inline void
4373+stbir__half_to_float_SIMD(float *output, stbir__FP16 const *input)
4374+{
4375+ _mm256_storeu_ps((float *)output,
4376+ _mm256_cvtph_ps(_mm_loadu_si128((__m128i const *)input)));
4377+}
4378
4379- static stbir__inline void stbir__float_to_half_SIMD(stbir__FP16 * output, float const * input)
4380- {
4381- _mm_storeu_si128( (__m128i*)output, _mm256_cvtps_ph( _mm256_loadu_ps( input ), 0 ) );
4382- }
4383+static stbir__inline void
4384+stbir__float_to_half_SIMD(stbir__FP16 *output, float const *input)
4385+{
4386+ _mm_storeu_si128((__m128i *)output,
4387+ _mm256_cvtps_ph(_mm256_loadu_ps(input), 0));
4388+}
4389
4390- static stbir__inline float stbir__half_to_float( stbir__FP16 h )
4391- {
4392- return _mm_cvtss_f32( _mm_cvtph_ps( _mm_cvtsi32_si128( (int)h.u ) ) );
4393- }
4394+static stbir__inline float
4395+stbir__half_to_float(stbir__FP16 h)
4396+{
4397+ return _mm_cvtss_f32(_mm_cvtph_ps(_mm_cvtsi32_si128((int)h.u)));
4398+}
4399
4400- static stbir__inline stbir__FP16 stbir__float_to_half( float f )
4401- {
4402- stbir__FP16 h;
4403- h.u = (unsigned short) _mm_cvtsi128_si32( _mm_cvtps_ph( _mm_set_ss( f ), 0 ) );
4404- return h;
4405- }
4406+static stbir__inline stbir__FP16
4407+stbir__float_to_half(float f)
4408+{
4409+ stbir__FP16 h;
4410+ h.u = (unsigned short)_mm_cvtsi128_si32(_mm_cvtps_ph(_mm_set_ss(f), 0));
4411+ return h;
4412+}
4413
4414 #elif defined(STBIR_SSE2)
4415
4416- // Fabian's half float routines, see: https://gist.github.com/rygorous/2156668
4417- stbir__inline static void stbir__half_to_float_SIMD(float * output, void const * input)
4418- {
4419- static const STBIR__SIMDI_CONST(mask_nosign, 0x7fff);
4420- static const STBIR__SIMDI_CONST(smallest_normal, 0x0400);
4421- static const STBIR__SIMDI_CONST(infinity, 0x7c00);
4422- static const STBIR__SIMDI_CONST(expadjust_normal, (127 - 15) << 23);
4423- static const STBIR__SIMDI_CONST(magic_denorm, 113 << 23);
4424-
4425- __m128i i = _mm_loadu_si128 ( (__m128i const*)(input) );
4426- __m128i h = _mm_unpacklo_epi16 ( i, _mm_setzero_si128() );
4427- __m128i mnosign = STBIR__CONSTI(mask_nosign);
4428- __m128i eadjust = STBIR__CONSTI(expadjust_normal);
4429- __m128i smallest = STBIR__CONSTI(smallest_normal);
4430- __m128i infty = STBIR__CONSTI(infinity);
4431- __m128i expmant = _mm_and_si128(mnosign, h);
4432- __m128i justsign = _mm_xor_si128(h, expmant);
4433- __m128i b_notinfnan = _mm_cmpgt_epi32(infty, expmant);
4434- __m128i b_isdenorm = _mm_cmpgt_epi32(smallest, expmant);
4435- __m128i shifted = _mm_slli_epi32(expmant, 13);
4436- __m128i adj_infnan = _mm_andnot_si128(b_notinfnan, eadjust);
4437- __m128i adjusted = _mm_add_epi32(eadjust, shifted);
4438- __m128i den1 = _mm_add_epi32(shifted, STBIR__CONSTI(magic_denorm));
4439- __m128i adjusted2 = _mm_add_epi32(adjusted, adj_infnan);
4440- __m128 den2 = _mm_sub_ps(_mm_castsi128_ps(den1), *(const __m128 *)&magic_denorm);
4441- __m128 adjusted3 = _mm_and_ps(den2, _mm_castsi128_ps(b_isdenorm));
4442- __m128 adjusted4 = _mm_andnot_ps(_mm_castsi128_ps(b_isdenorm), _mm_castsi128_ps(adjusted2));
4443- __m128 adjusted5 = _mm_or_ps(adjusted3, adjusted4);
4444- __m128i sign = _mm_slli_epi32(justsign, 16);
4445- __m128 final = _mm_or_ps(adjusted5, _mm_castsi128_ps(sign));
4446- stbir__simdf_store( output + 0, final );
4447-
4448- h = _mm_unpackhi_epi16 ( i, _mm_setzero_si128() );
4449- expmant = _mm_and_si128(mnosign, h);
4450- justsign = _mm_xor_si128(h, expmant);
4451- b_notinfnan = _mm_cmpgt_epi32(infty, expmant);
4452- b_isdenorm = _mm_cmpgt_epi32(smallest, expmant);
4453- shifted = _mm_slli_epi32(expmant, 13);
4454- adj_infnan = _mm_andnot_si128(b_notinfnan, eadjust);
4455- adjusted = _mm_add_epi32(eadjust, shifted);
4456- den1 = _mm_add_epi32(shifted, STBIR__CONSTI(magic_denorm));
4457- adjusted2 = _mm_add_epi32(adjusted, adj_infnan);
4458- den2 = _mm_sub_ps(_mm_castsi128_ps(den1), *(const __m128 *)&magic_denorm);
4459- adjusted3 = _mm_and_ps(den2, _mm_castsi128_ps(b_isdenorm));
4460- adjusted4 = _mm_andnot_ps(_mm_castsi128_ps(b_isdenorm), _mm_castsi128_ps(adjusted2));
4461- adjusted5 = _mm_or_ps(adjusted3, adjusted4);
4462- sign = _mm_slli_epi32(justsign, 16);
4463- final = _mm_or_ps(adjusted5, _mm_castsi128_ps(sign));
4464- stbir__simdf_store( output + 4, final );
4465-
4466- // ~38 SSE2 ops for 8 values
4467- }
4468-
4469- // Fabian's round-to-nearest-even float to half
4470- // ~48 SSE2 ops for 8 output
4471- stbir__inline static void stbir__float_to_half_SIMD(void * output, float const * input)
4472- {
4473- static const STBIR__SIMDI_CONST(mask_sign, 0x80000000u);
4474- static const STBIR__SIMDI_CONST(c_f16max, (127 + 16) << 23); // all FP32 values >=this round to +inf
4475- static const STBIR__SIMDI_CONST(c_nanbit, 0x200);
4476- static const STBIR__SIMDI_CONST(c_infty_as_fp16, 0x7c00);
4477- static const STBIR__SIMDI_CONST(c_min_normal, (127 - 14) << 23); // smallest FP32 that yields a normalized FP16
4478- static const STBIR__SIMDI_CONST(c_subnorm_magic, ((127 - 15) + (23 - 10) + 1) << 23);
4479- static const STBIR__SIMDI_CONST(c_normal_bias, 0xfff - ((127 - 15) << 23)); // adjust exponent and add mantissa rounding
4480-
4481- __m128 f = _mm_loadu_ps(input);
4482- __m128 msign = _mm_castsi128_ps(STBIR__CONSTI(mask_sign));
4483- __m128 justsign = _mm_and_ps(msign, f);
4484- __m128 absf = _mm_xor_ps(f, justsign);
4485- __m128i absf_int = _mm_castps_si128(absf); // the cast is "free" (extra bypass latency, but no thruput hit)
4486- __m128i f16max = STBIR__CONSTI(c_f16max);
4487- __m128 b_isnan = _mm_cmpunord_ps(absf, absf); // is this a NaN?
4488- __m128i b_isregular = _mm_cmpgt_epi32(f16max, absf_int); // (sub)normalized or special?
4489- __m128i nanbit = _mm_and_si128(_mm_castps_si128(b_isnan), STBIR__CONSTI(c_nanbit));
4490- __m128i inf_or_nan = _mm_or_si128(nanbit, STBIR__CONSTI(c_infty_as_fp16)); // output for specials
4491-
4492- __m128i min_normal = STBIR__CONSTI(c_min_normal);
4493- __m128i b_issub = _mm_cmpgt_epi32(min_normal, absf_int);
4494-
4495- // "result is subnormal" path
4496- __m128 subnorm1 = _mm_add_ps(absf, _mm_castsi128_ps(STBIR__CONSTI(c_subnorm_magic))); // magic value to round output mantissa
4497- __m128i subnorm2 = _mm_sub_epi32(_mm_castps_si128(subnorm1), STBIR__CONSTI(c_subnorm_magic)); // subtract out bias
4498-
4499- // "result is normal" path
4500- __m128i mantoddbit = _mm_slli_epi32(absf_int, 31 - 13); // shift bit 13 (mantissa LSB) to sign
4501- __m128i mantodd = _mm_srai_epi32(mantoddbit, 31); // -1 if FP16 mantissa odd, else 0
4502-
4503- __m128i round1 = _mm_add_epi32(absf_int, STBIR__CONSTI(c_normal_bias));
4504- __m128i round2 = _mm_sub_epi32(round1, mantodd); // if mantissa LSB odd, bias towards rounding up (RTNE)
4505- __m128i normal = _mm_srli_epi32(round2, 13); // rounded result
4506-
4507- // combine the two non-specials
4508- __m128i nonspecial = _mm_or_si128(_mm_and_si128(subnorm2, b_issub), _mm_andnot_si128(b_issub, normal));
4509-
4510- // merge in specials as well
4511- __m128i joined = _mm_or_si128(_mm_and_si128(nonspecial, b_isregular), _mm_andnot_si128(b_isregular, inf_or_nan));
4512-
4513- __m128i sign_shift = _mm_srai_epi32(_mm_castps_si128(justsign), 16);
4514- __m128i final2, final= _mm_or_si128(joined, sign_shift);
4515-
4516- f = _mm_loadu_ps(input+4);
4517- justsign = _mm_and_ps(msign, f);
4518- absf = _mm_xor_ps(f, justsign);
4519- absf_int = _mm_castps_si128(absf); // the cast is "free" (extra bypass latency, but no thruput hit)
4520- b_isnan = _mm_cmpunord_ps(absf, absf); // is this a NaN?
4521- b_isregular = _mm_cmpgt_epi32(f16max, absf_int); // (sub)normalized or special?
4522- nanbit = _mm_and_si128(_mm_castps_si128(b_isnan), c_nanbit);
4523- inf_or_nan = _mm_or_si128(nanbit, STBIR__CONSTI(c_infty_as_fp16)); // output for specials
4524-
4525- b_issub = _mm_cmpgt_epi32(min_normal, absf_int);
4526-
4527- // "result is subnormal" path
4528- subnorm1 = _mm_add_ps(absf, _mm_castsi128_ps(STBIR__CONSTI(c_subnorm_magic))); // magic value to round output mantissa
4529- subnorm2 = _mm_sub_epi32(_mm_castps_si128(subnorm1), STBIR__CONSTI(c_subnorm_magic)); // subtract out bias
4530-
4531- // "result is normal" path
4532- mantoddbit = _mm_slli_epi32(absf_int, 31 - 13); // shift bit 13 (mantissa LSB) to sign
4533- mantodd = _mm_srai_epi32(mantoddbit, 31); // -1 if FP16 mantissa odd, else 0
4534-
4535- round1 = _mm_add_epi32(absf_int, STBIR__CONSTI(c_normal_bias));
4536- round2 = _mm_sub_epi32(round1, mantodd); // if mantissa LSB odd, bias towards rounding up (RTNE)
4537- normal = _mm_srli_epi32(round2, 13); // rounded result
4538-
4539- // combine the two non-specials
4540- nonspecial = _mm_or_si128(_mm_and_si128(subnorm2, b_issub), _mm_andnot_si128(b_issub, normal));
4541-
4542- // merge in specials as well
4543- joined = _mm_or_si128(_mm_and_si128(nonspecial, b_isregular), _mm_andnot_si128(b_isregular, inf_or_nan));
4544-
4545- sign_shift = _mm_srai_epi32(_mm_castps_si128(justsign), 16);
4546- final2 = _mm_or_si128(joined, sign_shift);
4547- final = _mm_packs_epi32(final, final2);
4548- stbir__simdi_store( output,final );
4549- }
4550-
4551-#elif defined(STBIR_NEON) && defined(_MSC_VER) && defined(_M_ARM64) && !defined(__clang__) // 64-bit ARM on MSVC (not clang)
4552-
4553- static stbir__inline void stbir__half_to_float_SIMD(float * output, stbir__FP16 const * input)
4554- {
4555- float16x4_t in0 = vld1_f16(input + 0);
4556- float16x4_t in1 = vld1_f16(input + 4);
4557- vst1q_f32(output + 0, vcvt_f32_f16(in0));
4558- vst1q_f32(output + 4, vcvt_f32_f16(in1));
4559- }
4560-
4561- static stbir__inline void stbir__float_to_half_SIMD(stbir__FP16 * output, float const * input)
4562- {
4563- float16x4_t out0 = vcvt_f16_f32(vld1q_f32(input + 0));
4564- float16x4_t out1 = vcvt_f16_f32(vld1q_f32(input + 4));
4565- vst1_f16(output+0, out0);
4566- vst1_f16(output+4, out1);
4567- }
4568-
4569- static stbir__inline float stbir__half_to_float( stbir__FP16 h )
4570- {
4571- return vgetq_lane_f32(vcvt_f32_f16(vld1_dup_f16(&h)), 0);
4572- }
4573-
4574- static stbir__inline stbir__FP16 stbir__float_to_half( float f )
4575- {
4576- return vget_lane_f16(vcvt_f16_f32(vdupq_n_f32(f)), 0).n16_u16[0];
4577- }
4578-
4579-#elif defined(STBIR_NEON) && ( defined( _M_ARM64 ) || defined( __aarch64__ ) || defined( __arm64__ ) ) // 64-bit ARM
4580-
4581- static stbir__inline void stbir__half_to_float_SIMD(float * output, stbir__FP16 const * input)
4582- {
4583- float16x8_t in = vld1q_f16(input);
4584- vst1q_f32(output + 0, vcvt_f32_f16(vget_low_f16(in)));
4585- vst1q_f32(output + 4, vcvt_f32_f16(vget_high_f16(in)));
4586- }
4587-
4588- static stbir__inline void stbir__float_to_half_SIMD(stbir__FP16 * output, float const * input)
4589- {
4590- float16x4_t out0 = vcvt_f16_f32(vld1q_f32(input + 0));
4591- float16x4_t out1 = vcvt_f16_f32(vld1q_f32(input + 4));
4592- vst1q_f16(output, vcombine_f16(out0, out1));
4593- }
4594-
4595- static stbir__inline float stbir__half_to_float( stbir__FP16 h )
4596- {
4597- return vgetq_lane_f32(vcvt_f32_f16(vdup_n_f16(h)), 0);
4598- }
4599-
4600- static stbir__inline stbir__FP16 stbir__float_to_half( float f )
4601- {
4602- return vget_lane_f16(vcvt_f16_f32(vdupq_n_f32(f)), 0);
4603- }
4604-
4605-#elif defined(STBIR_WASM) || (defined(STBIR_NEON) && (defined(_MSC_VER) || defined(_M_ARM) || defined(__arm__))) // WASM or 32-bit ARM on MSVC/clang
4606-
4607- static stbir__inline void stbir__half_to_float_SIMD(float * output, stbir__FP16 const * input)
4608- {
4609- for (int i=0; i<8; i++)
4610- {
4611- output[i] = stbir__half_to_float(input[i]);
4612- }
4613- }
4614- static stbir__inline void stbir__float_to_half_SIMD(stbir__FP16 * output, float const * input)
4615- {
4616- for (int i=0; i<8; i++)
4617- {
4618- output[i] = stbir__float_to_half(input[i]);
4619- }
4620- }
4621+// Fabian's half float routines, see: https://gist.github.com/rygorous/2156668
4622+stbir__inline static void
4623+stbir__half_to_float_SIMD(float *output, void const *input)
4624+{
4625+ static const STBIR__SIMDI_CONST(mask_nosign, 0x7fff);
4626+ static const STBIR__SIMDI_CONST(smallest_normal, 0x0400);
4627+ static const STBIR__SIMDI_CONST(infinity, 0x7c00);
4628+ static const STBIR__SIMDI_CONST(expadjust_normal, (127 - 15) << 23);
4629+ static const STBIR__SIMDI_CONST(magic_denorm, 113 << 23);
4630+
4631+ __m128i i = _mm_loadu_si128((__m128i const *)(input));
4632+ __m128i h = _mm_unpacklo_epi16(i, _mm_setzero_si128());
4633+ __m128i mnosign = STBIR__CONSTI(mask_nosign);
4634+ __m128i eadjust = STBIR__CONSTI(expadjust_normal);
4635+ __m128i smallest = STBIR__CONSTI(smallest_normal);
4636+ __m128i infty = STBIR__CONSTI(infinity);
4637+ __m128i expmant = _mm_and_si128(mnosign, h);
4638+ __m128i justsign = _mm_xor_si128(h, expmant);
4639+ __m128i b_notinfnan = _mm_cmpgt_epi32(infty, expmant);
4640+ __m128i b_isdenorm = _mm_cmpgt_epi32(smallest, expmant);
4641+ __m128i shifted = _mm_slli_epi32(expmant, 13);
4642+ __m128i adj_infnan = _mm_andnot_si128(b_notinfnan, eadjust);
4643+ __m128i adjusted = _mm_add_epi32(eadjust, shifted);
4644+ __m128i den1 = _mm_add_epi32(shifted, STBIR__CONSTI(magic_denorm));
4645+ __m128i adjusted2 = _mm_add_epi32(adjusted, adj_infnan);
4646+ __m128 den2 =
4647+ _mm_sub_ps(_mm_castsi128_ps(den1), *(const __m128 *)&magic_denorm);
4648+ __m128 adjusted3 = _mm_and_ps(den2, _mm_castsi128_ps(b_isdenorm));
4649+ __m128 adjusted4 = _mm_andnot_ps(_mm_castsi128_ps(b_isdenorm),
4650+ _mm_castsi128_ps(adjusted2));
4651+ __m128 adjusted5 = _mm_or_ps(adjusted3, adjusted4);
4652+ __m128i sign = _mm_slli_epi32(justsign, 16);
4653+ __m128 final = _mm_or_ps(adjusted5, _mm_castsi128_ps(sign));
4654+ stbir__simdf_store(output + 0, final);
4655+
4656+ h = _mm_unpackhi_epi16(i, _mm_setzero_si128());
4657+ expmant = _mm_and_si128(mnosign, h);
4658+ justsign = _mm_xor_si128(h, expmant);
4659+ b_notinfnan = _mm_cmpgt_epi32(infty, expmant);
4660+ b_isdenorm = _mm_cmpgt_epi32(smallest, expmant);
4661+ shifted = _mm_slli_epi32(expmant, 13);
4662+ adj_infnan = _mm_andnot_si128(b_notinfnan, eadjust);
4663+ adjusted = _mm_add_epi32(eadjust, shifted);
4664+ den1 = _mm_add_epi32(shifted, STBIR__CONSTI(magic_denorm));
4665+ adjusted2 = _mm_add_epi32(adjusted, adj_infnan);
4666+ den2 = _mm_sub_ps(_mm_castsi128_ps(den1), *(const __m128 *)&magic_denorm);
4667+ adjusted3 = _mm_and_ps(den2, _mm_castsi128_ps(b_isdenorm));
4668+ adjusted4 = _mm_andnot_ps(_mm_castsi128_ps(b_isdenorm),
4669+ _mm_castsi128_ps(adjusted2));
4670+ adjusted5 = _mm_or_ps(adjusted3, adjusted4);
4671+ sign = _mm_slli_epi32(justsign, 16);
4672+ final = _mm_or_ps(adjusted5, _mm_castsi128_ps(sign));
4673+ stbir__simdf_store(output + 4, final);
4674+
4675+ // ~38 SSE2 ops for 8 values
4676+}
4677+
4678+// Fabian's round-to-nearest-even float to half
4679+// ~48 SSE2 ops for 8 output
4680+stbir__inline static void
4681+stbir__float_to_half_SIMD(void *output, float const *input)
4682+{
4683+ static const STBIR__SIMDI_CONST(mask_sign, 0x80000000u);
4684+ static const STBIR__SIMDI_CONST(
4685+ c_f16max, (127 + 16) << 23); // all FP32 values >=this round to +inf
4686+ static const STBIR__SIMDI_CONST(c_nanbit, 0x200);
4687+ static const STBIR__SIMDI_CONST(c_infty_as_fp16, 0x7c00);
4688+ static const STBIR__SIMDI_CONST(
4689+ c_min_normal, (127 - 14)
4690+ << 23); // smallest FP32 that yields a normalized FP16
4691+ static const STBIR__SIMDI_CONST(c_subnorm_magic,
4692+ ((127 - 15) + (23 - 10) + 1) << 23);
4693+ static const STBIR__SIMDI_CONST(
4694+ c_normal_bias,
4695+ 0xfff -
4696+ ((127 - 15) << 23)); // adjust exponent and add mantissa rounding
4697+
4698+ __m128 f = _mm_loadu_ps(input);
4699+ __m128 msign = _mm_castsi128_ps(STBIR__CONSTI(mask_sign));
4700+ __m128 justsign = _mm_and_ps(msign, f);
4701+ __m128 absf = _mm_xor_ps(f, justsign);
4702+ __m128i absf_int = _mm_castps_si128(
4703+ absf); // the cast is "free" (extra bypass latency, but no thruput hit)
4704+ __m128i f16max = STBIR__CONSTI(c_f16max);
4705+ __m128 b_isnan = _mm_cmpunord_ps(absf, absf); // is this a NaN?
4706+ __m128i b_isregular =
4707+ _mm_cmpgt_epi32(f16max, absf_int); // (sub)normalized or special?
4708+ __m128i nanbit =
4709+ _mm_and_si128(_mm_castps_si128(b_isnan), STBIR__CONSTI(c_nanbit));
4710+ __m128i inf_or_nan = _mm_or_si128(
4711+ nanbit, STBIR__CONSTI(c_infty_as_fp16)); // output for specials
4712+
4713+ __m128i min_normal = STBIR__CONSTI(c_min_normal);
4714+ __m128i b_issub = _mm_cmpgt_epi32(min_normal, absf_int);
4715+
4716+ // "result is subnormal" path
4717+ __m128 subnorm1 = _mm_add_ps(
4718+ absf, _mm_castsi128_ps(STBIR__CONSTI(
4719+ c_subnorm_magic))); // magic value to round output mantissa
4720+ __m128i subnorm2 =
4721+ _mm_sub_epi32(_mm_castps_si128(subnorm1),
4722+ STBIR__CONSTI(c_subnorm_magic)); // subtract out bias
4723+
4724+ // "result is normal" path
4725+ __m128i mantoddbit = _mm_slli_epi32(
4726+ absf_int, 31 - 13); // shift bit 13 (mantissa LSB) to sign
4727+ __m128i mantodd =
4728+ _mm_srai_epi32(mantoddbit, 31); // -1 if FP16 mantissa odd, else 0
4729+
4730+ __m128i round1 = _mm_add_epi32(absf_int, STBIR__CONSTI(c_normal_bias));
4731+ __m128i round2 = _mm_sub_epi32(
4732+ round1,
4733+ mantodd); // if mantissa LSB odd, bias towards rounding up (RTNE)
4734+ __m128i normal = _mm_srli_epi32(round2, 13); // rounded result
4735+
4736+ // combine the two non-specials
4737+ __m128i nonspecial = _mm_or_si128(_mm_and_si128(subnorm2, b_issub),
4738+ _mm_andnot_si128(b_issub, normal));
4739+
4740+ // merge in specials as well
4741+ __m128i joined = _mm_or_si128(_mm_and_si128(nonspecial, b_isregular),
4742+ _mm_andnot_si128(b_isregular, inf_or_nan));
4743+
4744+ __m128i sign_shift = _mm_srai_epi32(_mm_castps_si128(justsign), 16);
4745+ __m128i final2, final = _mm_or_si128(joined, sign_shift);
4746+
4747+ f = _mm_loadu_ps(input + 4);
4748+ justsign = _mm_and_ps(msign, f);
4749+ absf = _mm_xor_ps(f, justsign);
4750+ absf_int = _mm_castps_si128(
4751+ absf); // the cast is "free" (extra bypass latency, but no thruput hit)
4752+ b_isnan = _mm_cmpunord_ps(absf, absf); // is this a NaN?
4753+ b_isregular =
4754+ _mm_cmpgt_epi32(f16max, absf_int); // (sub)normalized or special?
4755+ nanbit = _mm_and_si128(_mm_castps_si128(b_isnan), c_nanbit);
4756+ inf_or_nan = _mm_or_si128(
4757+ nanbit, STBIR__CONSTI(c_infty_as_fp16)); // output for specials
4758+
4759+ b_issub = _mm_cmpgt_epi32(min_normal, absf_int);
4760+
4761+ // "result is subnormal" path
4762+ subnorm1 = _mm_add_ps(
4763+ absf, _mm_castsi128_ps(STBIR__CONSTI(
4764+ c_subnorm_magic))); // magic value to round output mantissa
4765+ subnorm2 =
4766+ _mm_sub_epi32(_mm_castps_si128(subnorm1),
4767+ STBIR__CONSTI(c_subnorm_magic)); // subtract out bias
4768
4769-#endif
4770+ // "result is normal" path
4771+ mantoddbit = _mm_slli_epi32(absf_int,
4772+ 31 - 13); // shift bit 13 (mantissa LSB) to sign
4773+ mantodd = _mm_srai_epi32(mantoddbit, 31); // -1 if FP16 mantissa odd, else 0
4774
4775+ round1 = _mm_add_epi32(absf_int, STBIR__CONSTI(c_normal_bias));
4776+ round2 = _mm_sub_epi32(
4777+ round1,
4778+ mantodd); // if mantissa LSB odd, bias towards rounding up (RTNE)
4779+ normal = _mm_srli_epi32(round2, 13); // rounded result
4780
4781-#ifdef STBIR_SIMD
4782+ // combine the two non-specials
4783+ nonspecial = _mm_or_si128(_mm_and_si128(subnorm2, b_issub),
4784+ _mm_andnot_si128(b_issub, normal));
4785
4786-#define stbir__simdf_0123to3333( out, reg ) (out) = stbir__simdf_swiz( reg, 3,3,3,3 )
4787-#define stbir__simdf_0123to2222( out, reg ) (out) = stbir__simdf_swiz( reg, 2,2,2,2 )
4788-#define stbir__simdf_0123to1111( out, reg ) (out) = stbir__simdf_swiz( reg, 1,1,1,1 )
4789-#define stbir__simdf_0123to0000( out, reg ) (out) = stbir__simdf_swiz( reg, 0,0,0,0 )
4790-#define stbir__simdf_0123to0003( out, reg ) (out) = stbir__simdf_swiz( reg, 0,0,0,3 )
4791-#define stbir__simdf_0123to0001( out, reg ) (out) = stbir__simdf_swiz( reg, 0,0,0,1 )
4792-#define stbir__simdf_0123to1122( out, reg ) (out) = stbir__simdf_swiz( reg, 1,1,2,2 )
4793-#define stbir__simdf_0123to2333( out, reg ) (out) = stbir__simdf_swiz( reg, 2,3,3,3 )
4794-#define stbir__simdf_0123to0023( out, reg ) (out) = stbir__simdf_swiz( reg, 0,0,2,3 )
4795-#define stbir__simdf_0123to1230( out, reg ) (out) = stbir__simdf_swiz( reg, 1,2,3,0 )
4796-#define stbir__simdf_0123to2103( out, reg ) (out) = stbir__simdf_swiz( reg, 2,1,0,3 )
4797-#define stbir__simdf_0123to3210( out, reg ) (out) = stbir__simdf_swiz( reg, 3,2,1,0 )
4798-#define stbir__simdf_0123to2301( out, reg ) (out) = stbir__simdf_swiz( reg, 2,3,0,1 )
4799-#define stbir__simdf_0123to3012( out, reg ) (out) = stbir__simdf_swiz( reg, 3,0,1,2 )
4800-#define stbir__simdf_0123to0011( out, reg ) (out) = stbir__simdf_swiz( reg, 0,0,1,1 )
4801-#define stbir__simdf_0123to1100( out, reg ) (out) = stbir__simdf_swiz( reg, 1,1,0,0 )
4802-#define stbir__simdf_0123to2233( out, reg ) (out) = stbir__simdf_swiz( reg, 2,2,3,3 )
4803-#define stbir__simdf_0123to1133( out, reg ) (out) = stbir__simdf_swiz( reg, 1,1,3,3 )
4804-#define stbir__simdf_0123to0022( out, reg ) (out) = stbir__simdf_swiz( reg, 0,0,2,2 )
4805-#define stbir__simdf_0123to1032( out, reg ) (out) = stbir__simdf_swiz( reg, 1,0,3,2 )
4806-
4807-typedef union stbir__simdi_u32
4808-{
4809- stbir_uint32 m128i_u32[4];
4810- int m128i_i32[4];
4811- stbir__simdi m128i_i128;
4812-} stbir__simdi_u32;
4813+ // merge in specials as well
4814+ joined = _mm_or_si128(_mm_and_si128(nonspecial, b_isregular),
4815+ _mm_andnot_si128(b_isregular, inf_or_nan));
4816+
4817+ sign_shift = _mm_srai_epi32(_mm_castps_si128(justsign), 16);
4818+ final2 = _mm_or_si128(joined, sign_shift);
4819+ final = _mm_packs_epi32(final, final2);
4820+ stbir__simdi_store(output, final);
4821+}
4822+
4823+#elif defined(STBIR_NEON) && defined(_MSC_VER) && defined(_M_ARM64) && \
4824+ !defined(__clang__) // 64-bit ARM on MSVC (not clang)
4825+
4826+static stbir__inline void
4827+stbir__half_to_float_SIMD(float *output, stbir__FP16 const *input)
4828+{
4829+ float16x4_t in0 = vld1_f16(input + 0);
4830+ float16x4_t in1 = vld1_f16(input + 4);
4831+ vst1q_f32(output + 0, vcvt_f32_f16(in0));
4832+ vst1q_f32(output + 4, vcvt_f32_f16(in1));
4833+}
4834+
4835+static stbir__inline void
4836+stbir__float_to_half_SIMD(stbir__FP16 *output, float const *input)
4837+{
4838+ float16x4_t out0 = vcvt_f16_f32(vld1q_f32(input + 0));
4839+ float16x4_t out1 = vcvt_f16_f32(vld1q_f32(input + 4));
4840+ vst1_f16(output + 0, out0);
4841+ vst1_f16(output + 4, out1);
4842+}
4843
4844-static const int STBIR_mask[9] = { 0,0,0,-1,-1,-1,0,0,0 };
4845+static stbir__inline float
4846+stbir__half_to_float(stbir__FP16 h)
4847+{
4848+ return vgetq_lane_f32(vcvt_f32_f16(vld1_dup_f16(&h)), 0);
4849+}
4850+
4851+static stbir__inline stbir__FP16
4852+stbir__float_to_half(float f)
4853+{
4854+ return vget_lane_f16(vcvt_f16_f32(vdupq_n_f32(f)), 0).n16_u16[0];
4855+}
4856+
4857+#elif defined(STBIR_NEON) && (defined(_M_ARM64) || defined(__aarch64__) || \
4858+ defined(__arm64__)) // 64-bit ARM
4859+
4860+static stbir__inline void
4861+stbir__half_to_float_SIMD(float *output, stbir__FP16 const *input)
4862+{
4863+ float16x8_t in = vld1q_f16(input);
4864+ vst1q_f32(output + 0, vcvt_f32_f16(vget_low_f16(in)));
4865+ vst1q_f32(output + 4, vcvt_f32_f16(vget_high_f16(in)));
4866+}
4867+
4868+static stbir__inline void
4869+stbir__float_to_half_SIMD(stbir__FP16 *output, float const *input)
4870+{
4871+ float16x4_t out0 = vcvt_f16_f32(vld1q_f32(input + 0));
4872+ float16x4_t out1 = vcvt_f16_f32(vld1q_f32(input + 4));
4873+ vst1q_f16(output, vcombine_f16(out0, out1));
4874+}
4875+
4876+static stbir__inline float
4877+stbir__half_to_float(stbir__FP16 h)
4878+{
4879+ return vgetq_lane_f32(vcvt_f32_f16(vdup_n_f16(h)), 0);
4880+}
4881+
4882+static stbir__inline stbir__FP16
4883+stbir__float_to_half(float f)
4884+{
4885+ return vget_lane_f16(vcvt_f16_f32(vdupq_n_f32(f)), 0);
4886+}
4887+
4888+#elif defined(STBIR_WASM) || \
4889+ (defined(STBIR_NEON) && \
4890+ (defined(_MSC_VER) || defined(_M_ARM) || \
4891+ defined(__arm__))) // WASM or 32-bit ARM on MSVC/clang
4892+
4893+static stbir__inline void
4894+stbir__half_to_float_SIMD(float *output, stbir__FP16 const *input)
4895+{
4896+ for (int i = 0; i < 8; i++) {
4897+ output[i] = stbir__half_to_float(input[i]);
4898+ }
4899+}
4900+static stbir__inline void
4901+stbir__float_to_half_SIMD(stbir__FP16 *output, float const *input)
4902+{
4903+ for (int i = 0; i < 8; i++) {
4904+ output[i] = stbir__float_to_half(input[i]);
4905+ }
4906+}
4907
4908-static const STBIR__SIMDF_CONST(STBIR_max_uint8_as_float, stbir__max_uint8_as_float);
4909-static const STBIR__SIMDF_CONST(STBIR_max_uint16_as_float, stbir__max_uint16_as_float);
4910-static const STBIR__SIMDF_CONST(STBIR_max_uint8_as_float_inverted, stbir__max_uint8_as_float_inverted);
4911-static const STBIR__SIMDF_CONST(STBIR_max_uint16_as_float_inverted, stbir__max_uint16_as_float_inverted);
4912+#endif
4913+
4914+#ifdef STBIR_SIMD
4915+
4916+#define stbir__simdf_0123to3333(out, reg) \
4917+ (out) = stbir__simdf_swiz(reg, 3, 3, 3, 3)
4918+#define stbir__simdf_0123to2222(out, reg) \
4919+ (out) = stbir__simdf_swiz(reg, 2, 2, 2, 2)
4920+#define stbir__simdf_0123to1111(out, reg) \
4921+ (out) = stbir__simdf_swiz(reg, 1, 1, 1, 1)
4922+#define stbir__simdf_0123to0000(out, reg) \
4923+ (out) = stbir__simdf_swiz(reg, 0, 0, 0, 0)
4924+#define stbir__simdf_0123to0003(out, reg) \
4925+ (out) = stbir__simdf_swiz(reg, 0, 0, 0, 3)
4926+#define stbir__simdf_0123to0001(out, reg) \
4927+ (out) = stbir__simdf_swiz(reg, 0, 0, 0, 1)
4928+#define stbir__simdf_0123to1122(out, reg) \
4929+ (out) = stbir__simdf_swiz(reg, 1, 1, 2, 2)
4930+#define stbir__simdf_0123to2333(out, reg) \
4931+ (out) = stbir__simdf_swiz(reg, 2, 3, 3, 3)
4932+#define stbir__simdf_0123to0023(out, reg) \
4933+ (out) = stbir__simdf_swiz(reg, 0, 0, 2, 3)
4934+#define stbir__simdf_0123to1230(out, reg) \
4935+ (out) = stbir__simdf_swiz(reg, 1, 2, 3, 0)
4936+#define stbir__simdf_0123to2103(out, reg) \
4937+ (out) = stbir__simdf_swiz(reg, 2, 1, 0, 3)
4938+#define stbir__simdf_0123to3210(out, reg) \
4939+ (out) = stbir__simdf_swiz(reg, 3, 2, 1, 0)
4940+#define stbir__simdf_0123to2301(out, reg) \
4941+ (out) = stbir__simdf_swiz(reg, 2, 3, 0, 1)
4942+#define stbir__simdf_0123to3012(out, reg) \
4943+ (out) = stbir__simdf_swiz(reg, 3, 0, 1, 2)
4944+#define stbir__simdf_0123to0011(out, reg) \
4945+ (out) = stbir__simdf_swiz(reg, 0, 0, 1, 1)
4946+#define stbir__simdf_0123to1100(out, reg) \
4947+ (out) = stbir__simdf_swiz(reg, 1, 1, 0, 0)
4948+#define stbir__simdf_0123to2233(out, reg) \
4949+ (out) = stbir__simdf_swiz(reg, 2, 2, 3, 3)
4950+#define stbir__simdf_0123to1133(out, reg) \
4951+ (out) = stbir__simdf_swiz(reg, 1, 1, 3, 3)
4952+#define stbir__simdf_0123to0022(out, reg) \
4953+ (out) = stbir__simdf_swiz(reg, 0, 0, 2, 2)
4954+#define stbir__simdf_0123to1032(out, reg) \
4955+ (out) = stbir__simdf_swiz(reg, 1, 0, 3, 2)
4956+
4957+typedef union stbir__simdi_u32 {
4958+ stbir_uint32 m128i_u32[4];
4959+ int m128i_i32[4];
4960+ stbir__simdi m128i_i128;
4961+} stbir__simdi_u32;
4962
4963-static const STBIR__SIMDF_CONST(STBIR_simd_point5, 0.5f);
4964-static const STBIR__SIMDF_CONST(STBIR_ones, 1.0f);
4965-static const STBIR__SIMDI_CONST(STBIR_almost_zero, (127 - 13) << 23);
4966-static const STBIR__SIMDI_CONST(STBIR_almost_one, 0x3f7fffff);
4967+static const int STBIR_mask[9] = {0, 0, 0, -1, -1, -1, 0, 0, 0};
4968+
4969+static const STBIR__SIMDF_CONST(STBIR_max_uint8_as_float,
4970+ stbir__max_uint8_as_float);
4971+static const STBIR__SIMDF_CONST(STBIR_max_uint16_as_float,
4972+ stbir__max_uint16_as_float);
4973+static const STBIR__SIMDF_CONST(STBIR_max_uint8_as_float_inverted,
4974+ stbir__max_uint8_as_float_inverted);
4975+static const STBIR__SIMDF_CONST(STBIR_max_uint16_as_float_inverted,
4976+ stbir__max_uint16_as_float_inverted);
4977+
4978+static const STBIR__SIMDF_CONST(STBIR_simd_point5, 0.5f);
4979+static const STBIR__SIMDF_CONST(STBIR_ones, 1.0f);
4980+static const STBIR__SIMDI_CONST(STBIR_almost_zero, (127 - 13) << 23);
4981+static const STBIR__SIMDI_CONST(STBIR_almost_one, 0x3f7fffff);
4982 static const STBIR__SIMDI_CONST(STBIR_mastissa_mask, 0xff);
4983-static const STBIR__SIMDI_CONST(STBIR_topscale, 0x02000000);
4984+static const STBIR__SIMDI_CONST(STBIR_topscale, 0x02000000);
4985
4986 // Basically, in simd mode, we unroll the proper amount, and we don't want
4987 // the non-simd remnant loops to be unroll because they only run a few times
4988 // Adding this switch saves about 5K on clang which is Captain Unroll the 3rd.
4989-#define STBIR_SIMD_STREAMOUT_PTR( star ) STBIR_STREAMOUT_PTR( star )
4990+#define STBIR_SIMD_STREAMOUT_PTR(star) STBIR_STREAMOUT_PTR(star)
4991 #define STBIR_SIMD_NO_UNROLL(ptr) STBIR_NO_UNROLL(ptr)
4992 #define STBIR_SIMD_NO_UNROLL_LOOP_START STBIR_NO_UNROLL_LOOP_START
4993-#define STBIR_SIMD_NO_UNROLL_LOOP_START_INF_FOR STBIR_NO_UNROLL_LOOP_START_INF_FOR
4994+#define STBIR_SIMD_NO_UNROLL_LOOP_START_INF_FOR \
4995+ STBIR_NO_UNROLL_LOOP_START_INF_FOR
4996
4997 #ifdef STBIR_MEMCPY
4998 #undef STBIR_MEMCPY
4999 #endif
5000 #define STBIR_MEMCPY stbir_simd_memcpy
5001
5002-// override normal use of memcpy with much simpler copy (faster and smaller with our sized copies)
5003-static void stbir_simd_memcpy( void * dest, void const * src, size_t bytes )
5004-{
5005- char STBIR_SIMD_STREAMOUT_PTR (*) d = (char*) dest;
5006- char STBIR_SIMD_STREAMOUT_PTR( * ) d_end = ((char*) dest) + bytes;
5007- ptrdiff_t ofs_to_src = (char*)src - (char*)dest;
5008-
5009- // check overlaps
5010- STBIR_ASSERT( ( ( d >= ( (char*)src) + bytes ) ) || ( ( d + bytes ) <= (char*)src ) );
5011-
5012- if ( bytes < (16*stbir__simdfX_float_count) )
5013- {
5014- if ( bytes < 16 )
5015- {
5016- if ( bytes )
5017- {
5018- STBIR_SIMD_NO_UNROLL_LOOP_START
5019- do
5020- {
5021- STBIR_SIMD_NO_UNROLL(d);
5022- d[ 0 ] = d[ ofs_to_src ];
5023- ++d;
5024- } while ( d < d_end );
5025- }
5026- }
5027- else
5028- {
5029- stbir__simdf x;
5030- // do one unaligned to get us aligned for the stream out below
5031- stbir__simdf_load( x, ( d + ofs_to_src ) );
5032- stbir__simdf_store( d, x );
5033- d = (char*)( ( ( (size_t)d ) + 16 ) & ~15 );
5034-
5035- STBIR_SIMD_NO_UNROLL_LOOP_START_INF_FOR
5036- for(;;)
5037- {
5038- STBIR_SIMD_NO_UNROLL(d);
5039-
5040- if ( d > ( d_end - 16 ) )
5041- {
5042- if ( d == d_end )
5043- return;
5044- d = d_end - 16;
5045- }
5046-
5047- stbir__simdf_load( x, ( d + ofs_to_src ) );
5048- stbir__simdf_store( d, x );
5049- d += 16;
5050- }
5051- }
5052- }
5053- else
5054- {
5055- stbir__simdfX x0,x1,x2,x3;
5056-
5057- // do one unaligned to get us aligned for the stream out below
5058- stbir__simdfX_load( x0, ( d + ofs_to_src ) + 0*stbir__simdfX_float_count );
5059- stbir__simdfX_load( x1, ( d + ofs_to_src ) + 4*stbir__simdfX_float_count );
5060- stbir__simdfX_load( x2, ( d + ofs_to_src ) + 8*stbir__simdfX_float_count );
5061- stbir__simdfX_load( x3, ( d + ofs_to_src ) + 12*stbir__simdfX_float_count );
5062- stbir__simdfX_store( d + 0*stbir__simdfX_float_count, x0 );
5063- stbir__simdfX_store( d + 4*stbir__simdfX_float_count, x1 );
5064- stbir__simdfX_store( d + 8*stbir__simdfX_float_count, x2 );
5065- stbir__simdfX_store( d + 12*stbir__simdfX_float_count, x3 );
5066- d = (char*)( ( ( (size_t)d ) + (16*stbir__simdfX_float_count) ) & ~((16*stbir__simdfX_float_count)-1) );
5067-
5068- STBIR_SIMD_NO_UNROLL_LOOP_START_INF_FOR
5069- for(;;)
5070- {
5071- STBIR_SIMD_NO_UNROLL(d);
5072-
5073- if ( d > ( d_end - (16*stbir__simdfX_float_count) ) )
5074- {
5075- if ( d == d_end )
5076- return;
5077- d = d_end - (16*stbir__simdfX_float_count);
5078- }
5079-
5080- stbir__simdfX_load( x0, ( d + ofs_to_src ) + 0*stbir__simdfX_float_count );
5081- stbir__simdfX_load( x1, ( d + ofs_to_src ) + 4*stbir__simdfX_float_count );
5082- stbir__simdfX_load( x2, ( d + ofs_to_src ) + 8*stbir__simdfX_float_count );
5083- stbir__simdfX_load( x3, ( d + ofs_to_src ) + 12*stbir__simdfX_float_count );
5084- stbir__simdfX_store( d + 0*stbir__simdfX_float_count, x0 );
5085- stbir__simdfX_store( d + 4*stbir__simdfX_float_count, x1 );
5086- stbir__simdfX_store( d + 8*stbir__simdfX_float_count, x2 );
5087- stbir__simdfX_store( d + 12*stbir__simdfX_float_count, x3 );
5088- d += (16*stbir__simdfX_float_count);
5089- }
5090- }
5091-}
5092-
5093-// memcpy that is specically intentionally overlapping (src is smaller then dest, so can be
5094-// a normal forward copy, bytes is divisible by 4 and bytes is greater than or equal to
5095-// the diff between dest and src)
5096-static void stbir_overlapping_memcpy( void * dest, void const * src, size_t bytes )
5097-{
5098- char STBIR_SIMD_STREAMOUT_PTR (*) sd = (char*) src;
5099- char STBIR_SIMD_STREAMOUT_PTR( * ) s_end = ((char*) src) + bytes;
5100- ptrdiff_t ofs_to_dest = (char*)dest - (char*)src;
5101-
5102- if ( ofs_to_dest >= 16 ) // is the overlap more than 16 away?
5103- {
5104- char STBIR_SIMD_STREAMOUT_PTR( * ) s_end16 = ((char*) src) + (bytes&~15);
5105- STBIR_SIMD_NO_UNROLL_LOOP_START
5106- do
5107- {
5108- stbir__simdf x;
5109- STBIR_SIMD_NO_UNROLL(sd);
5110- stbir__simdf_load( x, sd );
5111- stbir__simdf_store( ( sd + ofs_to_dest ), x );
5112- sd += 16;
5113- } while ( sd < s_end16 );
5114-
5115- if ( sd == s_end )
5116- return;
5117- }
5118-
5119- do
5120- {
5121- STBIR_SIMD_NO_UNROLL(sd);
5122- *(int*)( sd + ofs_to_dest ) = *(int*) sd;
5123- sd += 4;
5124- } while ( sd < s_end );
5125+// override normal use of memcpy with much simpler copy (faster and smaller with
5126+// our sized copies)
5127+static void
5128+stbir_simd_memcpy(void *dest, void const *src, size_t bytes)
5129+{
5130+ char STBIR_SIMD_STREAMOUT_PTR(*) d = (char *)dest;
5131+ char STBIR_SIMD_STREAMOUT_PTR(*) d_end = ((char *)dest) + bytes;
5132+ ptrdiff_t ofs_to_src = (char *)src - (char *)dest;
5133+
5134+ // check overlaps
5135+ STBIR_ASSERT(((d >= ((char *)src) + bytes)) ||
5136+ ((d + bytes) <= (char *)src));
5137+
5138+ if (bytes < (16 * stbir__simdfX_float_count)) {
5139+ if (bytes < 16) {
5140+ if (bytes) {
5141+ STBIR_SIMD_NO_UNROLL_LOOP_START
5142+ do {
5143+ STBIR_SIMD_NO_UNROLL(d);
5144+ d[0] = d[ofs_to_src];
5145+ ++d;
5146+ } while (d < d_end);
5147+ }
5148+ } else {
5149+ stbir__simdf x;
5150+ // do one unaligned to get us aligned for the stream out below
5151+ stbir__simdf_load(x, (d + ofs_to_src));
5152+ stbir__simdf_store(d, x);
5153+ d = (char *)((((size_t)d) + 16) & ~15);
5154+
5155+ STBIR_SIMD_NO_UNROLL_LOOP_START_INF_FOR
5156+ for (;;) {
5157+ STBIR_SIMD_NO_UNROLL(d);
5158+
5159+ if (d > (d_end - 16)) {
5160+ if (d == d_end) {
5161+ return;
5162+ }
5163+ d = d_end - 16;
5164+ }
5165+
5166+ stbir__simdf_load(x, (d + ofs_to_src));
5167+ stbir__simdf_store(d, x);
5168+ d += 16;
5169+ }
5170+ }
5171+ } else {
5172+ stbir__simdfX x0, x1, x2, x3;
5173+
5174+ // do one unaligned to get us aligned for the stream out below
5175+ stbir__simdfX_load(x0,
5176+ (d + ofs_to_src) + 0 * stbir__simdfX_float_count);
5177+ stbir__simdfX_load(x1,
5178+ (d + ofs_to_src) + 4 * stbir__simdfX_float_count);
5179+ stbir__simdfX_load(x2,
5180+ (d + ofs_to_src) + 8 * stbir__simdfX_float_count);
5181+ stbir__simdfX_load(x3,
5182+ (d + ofs_to_src) + 12 * stbir__simdfX_float_count);
5183+ stbir__simdfX_store(d + 0 * stbir__simdfX_float_count, x0);
5184+ stbir__simdfX_store(d + 4 * stbir__simdfX_float_count, x1);
5185+ stbir__simdfX_store(d + 8 * stbir__simdfX_float_count, x2);
5186+ stbir__simdfX_store(d + 12 * stbir__simdfX_float_count, x3);
5187+ d = (char *)((((size_t)d) + (16 * stbir__simdfX_float_count)) &
5188+ ~((16 * stbir__simdfX_float_count) - 1));
5189+
5190+ STBIR_SIMD_NO_UNROLL_LOOP_START_INF_FOR
5191+ for (;;) {
5192+ STBIR_SIMD_NO_UNROLL(d);
5193+
5194+ if (d > (d_end - (16 * stbir__simdfX_float_count))) {
5195+ if (d == d_end) {
5196+ return;
5197+ }
5198+ d = d_end - (16 * stbir__simdfX_float_count);
5199+ }
5200+
5201+ stbir__simdfX_load(x0, (d + ofs_to_src) +
5202+ 0 * stbir__simdfX_float_count);
5203+ stbir__simdfX_load(x1, (d + ofs_to_src) +
5204+ 4 * stbir__simdfX_float_count);
5205+ stbir__simdfX_load(x2, (d + ofs_to_src) +
5206+ 8 * stbir__simdfX_float_count);
5207+ stbir__simdfX_load(x3, (d + ofs_to_src) +
5208+ 12 * stbir__simdfX_float_count);
5209+ stbir__simdfX_store(d + 0 * stbir__simdfX_float_count, x0);
5210+ stbir__simdfX_store(d + 4 * stbir__simdfX_float_count, x1);
5211+ stbir__simdfX_store(d + 8 * stbir__simdfX_float_count, x2);
5212+ stbir__simdfX_store(d + 12 * stbir__simdfX_float_count, x3);
5213+ d += (16 * stbir__simdfX_float_count);
5214+ }
5215+ }
5216+}
5217+
5218+// memcpy that is specically intentionally overlapping (src is smaller then
5219+// dest, so can be
5220+// a normal forward copy, bytes is divisible by 4 and bytes is greater than or
5221+// equal to the diff between dest and src)
5222+static void
5223+stbir_overlapping_memcpy(void *dest, void const *src, size_t bytes)
5224+{
5225+ char STBIR_SIMD_STREAMOUT_PTR(*) sd = (char *)src;
5226+ char STBIR_SIMD_STREAMOUT_PTR(*) s_end = ((char *)src) + bytes;
5227+ ptrdiff_t ofs_to_dest = (char *)dest - (char *)src;
5228+
5229+ if (ofs_to_dest >= 16) // is the overlap more than 16 away?
5230+ {
5231+ char STBIR_SIMD_STREAMOUT_PTR(*) s_end16 =
5232+ ((char *)src) + (bytes & ~15);
5233+ STBIR_SIMD_NO_UNROLL_LOOP_START
5234+ do {
5235+ stbir__simdf x;
5236+ STBIR_SIMD_NO_UNROLL(sd);
5237+ stbir__simdf_load(x, sd);
5238+ stbir__simdf_store((sd + ofs_to_dest), x);
5239+ sd += 16;
5240+ } while (sd < s_end16);
5241+
5242+ if (sd == s_end) {
5243+ return;
5244+ }
5245+ }
5246+
5247+ do {
5248+ STBIR_SIMD_NO_UNROLL(sd);
5249+ *(int *)(sd + ofs_to_dest) = *(int *)sd;
5250+ sd += 4;
5251+ } while (sd < s_end);
5252 }
5253
5254 #else // no SSE2
5255
5256-// when in scalar mode, we let unrolling happen, so this macro just does the __restrict
5257-#define STBIR_SIMD_STREAMOUT_PTR( star ) STBIR_STREAMOUT_PTR( star )
5258+// when in scalar mode, we let unrolling happen, so this macro just does the
5259+// __restrict
5260+#define STBIR_SIMD_STREAMOUT_PTR(star) STBIR_STREAMOUT_PTR(star)
5261 #define STBIR_SIMD_NO_UNROLL(ptr)
5262 #define STBIR_SIMD_NO_UNROLL_LOOP_START
5263 #define STBIR_SIMD_NO_UNROLL_LOOP_START_INF_FOR
5264
5265 #endif // SSE2
5266
5267-
5268 #ifdef STBIR_PROFILE
5269
5270 #ifndef STBIR_PROFILE_FUNC
5271
5272-#if defined(_x86_64) || defined( __x86_64__ ) || defined( _M_X64 ) || defined(__x86_64) || defined(__SSE2__) || defined(STBIR_SSE) || defined( _M_IX86_FP ) || defined(__i386) || defined( __i386__ ) || defined( _M_IX86 ) || defined( _X86_ )
5273+#if defined(_x86_64) || defined(__x86_64__) || defined(_M_X64) || \
5274+ defined(__x86_64) || defined(__SSE2__) || defined(STBIR_SSE) || \
5275+ defined(_M_IX86_FP) || defined(__i386) || defined(__i386__) || \
5276+ defined(_M_IX86) || defined(_X86_)
5277
5278 #ifdef _MSC_VER
5279
5280- STBIRDEF stbir_uint64 __rdtsc();
5281- #define STBIR_PROFILE_FUNC() __rdtsc()
5282+STBIRDEF stbir_uint64
5283+__rdtsc();
5284+#define STBIR_PROFILE_FUNC() __rdtsc()
5285
5286 #else // non msvc
5287
5288- static stbir__inline stbir_uint64 STBIR_PROFILE_FUNC()
5289- {
5290- stbir_uint32 lo, hi;
5291- asm volatile ("rdtsc" : "=a" (lo), "=d" (hi) );
5292- return ( ( (stbir_uint64) hi ) << 32 ) | ( (stbir_uint64) lo );
5293- }
5294+static stbir__inline stbir_uint64
5295+STBIR_PROFILE_FUNC()
5296+{
5297+ stbir_uint32 lo, hi;
5298+ asm volatile("rdtsc" : "=a"(lo), "=d"(hi));
5299+ return (((stbir_uint64)hi) << 32) | ((stbir_uint64)lo);
5300+}
5301
5302-#endif // msvc
5303+#endif // msvc
5304
5305-#elif defined( _M_ARM64 ) || defined( __aarch64__ ) || defined( __arm64__ ) || defined(__ARM_NEON__)
5306+#elif defined(_M_ARM64) || defined(__aarch64__) || defined(__arm64__) || \
5307+ defined(__ARM_NEON__)
5308
5309-#if defined( _MSC_VER ) && !defined(__clang__)
5310+#if defined(_MSC_VER) && !defined(__clang__)
5311
5312- #define STBIR_PROFILE_FUNC() _ReadStatusReg(ARM64_CNTVCT)
5313+#define STBIR_PROFILE_FUNC() _ReadStatusReg(ARM64_CNTVCT)
5314
5315 #else
5316
5317- static stbir__inline stbir_uint64 STBIR_PROFILE_FUNC()
5318- {
5319- stbir_uint64 tsc;
5320- asm volatile("mrs %0, cntvct_el0" : "=r" (tsc));
5321- return tsc;
5322- }
5323+static stbir__inline stbir_uint64
5324+STBIR_PROFILE_FUNC()
5325+{
5326+ stbir_uint64 tsc;
5327+ asm volatile("mrs %0, cntvct_el0" : "=r"(tsc));
5328+ return tsc;
5329+}
5330
5331 #endif
5332
5333@@ -2763,35 +3370,69 @@ static void stbir_overlapping_memcpy( void * dest, void const * src, size_t byte
5334
5335 #error Unknown platform for profiling.
5336
5337-#endif // x64, arm
5338+#endif // x64, arm
5339
5340 #endif // STBIR_PROFILE_FUNC
5341
5342-#define STBIR_ONLY_PROFILE_GET_SPLIT_INFO ,stbir__per_split_info * split_info
5343-#define STBIR_ONLY_PROFILE_SET_SPLIT_INFO ,split_info
5344+#define STBIR_ONLY_PROFILE_GET_SPLIT_INFO , stbir__per_split_info *split_info
5345+#define STBIR_ONLY_PROFILE_SET_SPLIT_INFO , split_info
5346
5347-#define STBIR_ONLY_PROFILE_BUILD_GET_INFO ,stbir__info * profile_info
5348-#define STBIR_ONLY_PROFILE_BUILD_SET_INFO ,profile_info
5349+#define STBIR_ONLY_PROFILE_BUILD_GET_INFO , stbir__info *profile_info
5350+#define STBIR_ONLY_PROFILE_BUILD_SET_INFO , profile_info
5351
5352 // super light-weight micro profiler
5353-#define STBIR_PROFILE_START_ll( info, wh ) { stbir_uint64 wh##thiszonetime = STBIR_PROFILE_FUNC(); stbir_uint64 * wh##save_parent_excluded_ptr = info->current_zone_excluded_ptr; stbir_uint64 wh##current_zone_excluded = 0; info->current_zone_excluded_ptr = &wh##current_zone_excluded;
5354-#define STBIR_PROFILE_END_ll( info, wh ) wh##thiszonetime = STBIR_PROFILE_FUNC() - wh##thiszonetime; info->profile.named.wh += wh##thiszonetime - wh##current_zone_excluded; *wh##save_parent_excluded_ptr += wh##thiszonetime; info->current_zone_excluded_ptr = wh##save_parent_excluded_ptr; }
5355-#define STBIR_PROFILE_FIRST_START_ll( info, wh ) { int i; info->current_zone_excluded_ptr = &info->profile.named.total; for(i=0;i<STBIR__ARRAY_SIZE(info->profile.array);i++) info->profile.array[i]=0; } STBIR_PROFILE_START_ll( info, wh );
5356-#define STBIR_PROFILE_CLEAR_EXTRAS_ll( info, num ) { int extra; for(extra=1;extra<(num);extra++) { int i; for(i=0;i<STBIR__ARRAY_SIZE((info)->profile.array);i++) (info)[extra].profile.array[i]=0; } }
5357+#define STBIR_PROFILE_START_ll(info, wh) \
5358+ { \
5359+ stbir_uint64 wh##thiszonetime = STBIR_PROFILE_FUNC(); \
5360+ stbir_uint64 *wh##save_parent_excluded_ptr = \
5361+ info->current_zone_excluded_ptr; \
5362+ stbir_uint64 wh##current_zone_excluded = 0; \
5363+ info->current_zone_excluded_ptr = &wh##current_zone_excluded;
5364+#define STBIR_PROFILE_END_ll(info, wh) \
5365+ wh##thiszonetime = STBIR_PROFILE_FUNC() - wh##thiszonetime; \
5366+ info->profile.named.wh += wh##thiszonetime - wh##current_zone_excluded; \
5367+ *wh##save_parent_excluded_ptr += wh##thiszonetime; \
5368+ info->current_zone_excluded_ptr = wh##save_parent_excluded_ptr; \
5369+ }
5370+#define STBIR_PROFILE_FIRST_START_ll(info, wh) \
5371+ { \
5372+ int i; \
5373+ info->current_zone_excluded_ptr = &info->profile.named.total; \
5374+ for (i = 0; i < STBIR__ARRAY_SIZE(info->profile.array); i++) \
5375+ info->profile.array[i] = 0; \
5376+ } \
5377+ STBIR_PROFILE_START_ll(info, wh);
5378+#define STBIR_PROFILE_CLEAR_EXTRAS_ll(info, num) \
5379+ { \
5380+ int extra; \
5381+ for (extra = 1; extra < (num); extra++) { \
5382+ int i; \
5383+ for (i = 0; i < STBIR__ARRAY_SIZE((info)->profile.array); i++) \
5384+ (info)[extra].profile.array[i] = 0; \
5385+ } \
5386+ }
5387
5388 // for thread data
5389-#define STBIR_PROFILE_START( wh ) STBIR_PROFILE_START_ll( split_info, wh )
5390-#define STBIR_PROFILE_END( wh ) STBIR_PROFILE_END_ll( split_info, wh )
5391-#define STBIR_PROFILE_FIRST_START( wh ) STBIR_PROFILE_FIRST_START_ll( split_info, wh )
5392-#define STBIR_PROFILE_CLEAR_EXTRAS() STBIR_PROFILE_CLEAR_EXTRAS_ll( split_info, split_count )
5393+#define STBIR_PROFILE_START(wh) STBIR_PROFILE_START_ll(split_info, wh)
5394+#define STBIR_PROFILE_END(wh) STBIR_PROFILE_END_ll(split_info, wh)
5395+#define STBIR_PROFILE_FIRST_START(wh) \
5396+ STBIR_PROFILE_FIRST_START_ll(split_info, wh)
5397+#define STBIR_PROFILE_CLEAR_EXTRAS() \
5398+ STBIR_PROFILE_CLEAR_EXTRAS_ll(split_info, split_count)
5399
5400 // for build data
5401-#define STBIR_PROFILE_BUILD_START( wh ) STBIR_PROFILE_START_ll( profile_info, wh )
5402-#define STBIR_PROFILE_BUILD_END( wh ) STBIR_PROFILE_END_ll( profile_info, wh )
5403-#define STBIR_PROFILE_BUILD_FIRST_START( wh ) STBIR_PROFILE_FIRST_START_ll( profile_info, wh )
5404-#define STBIR_PROFILE_BUILD_CLEAR( info ) { int i; for(i=0;i<STBIR__ARRAY_SIZE(info->profile.array);i++) info->profile.array[i]=0; }
5405-
5406-#else // no profile
5407+#define STBIR_PROFILE_BUILD_START(wh) STBIR_PROFILE_START_ll(profile_info, wh)
5408+#define STBIR_PROFILE_BUILD_END(wh) STBIR_PROFILE_END_ll(profile_info, wh)
5409+#define STBIR_PROFILE_BUILD_FIRST_START(wh) \
5410+ STBIR_PROFILE_FIRST_START_ll(profile_info, wh)
5411+#define STBIR_PROFILE_BUILD_CLEAR(info) \
5412+ { \
5413+ int i; \
5414+ for (i = 0; i < STBIR__ARRAY_SIZE(info->profile.array); i++) \
5415+ info->profile.array[i] = 0; \
5416+ }
5417+
5418+#else // no profile
5419
5420 #define STBIR_ONLY_PROFILE_GET_SPLIT_INFO
5421 #define STBIR_ONLY_PROFILE_SET_SPLIT_INFO
5422@@ -2799,17 +3440,17 @@ static void stbir_overlapping_memcpy( void * dest, void const * src, size_t byte
5423 #define STBIR_ONLY_PROFILE_BUILD_GET_INFO
5424 #define STBIR_ONLY_PROFILE_BUILD_SET_INFO
5425
5426-#define STBIR_PROFILE_START( wh )
5427-#define STBIR_PROFILE_END( wh )
5428-#define STBIR_PROFILE_FIRST_START( wh )
5429-#define STBIR_PROFILE_CLEAR_EXTRAS( )
5430+#define STBIR_PROFILE_START(wh)
5431+#define STBIR_PROFILE_END(wh)
5432+#define STBIR_PROFILE_FIRST_START(wh)
5433+#define STBIR_PROFILE_CLEAR_EXTRAS()
5434
5435-#define STBIR_PROFILE_BUILD_START( wh )
5436-#define STBIR_PROFILE_BUILD_END( wh )
5437-#define STBIR_PROFILE_BUILD_FIRST_START( wh )
5438-#define STBIR_PROFILE_BUILD_CLEAR( info )
5439+#define STBIR_PROFILE_BUILD_START(wh)
5440+#define STBIR_PROFILE_BUILD_END(wh)
5441+#define STBIR_PROFILE_BUILD_FIRST_START(wh)
5442+#define STBIR_PROFILE_BUILD_CLEAR(info)
5443
5444-#endif // stbir_profile
5445+#endif // stbir_profile
5446
5447 #ifndef STBIR_CEILF
5448 #include <math.h>
5449@@ -2825,665 +3466,763 @@ static void stbir_overlapping_memcpy( void * dest, void const * src, size_t byte
5450 #ifndef STBIR_MEMCPY
5451 // For memcpy
5452 #include <string.h>
5453-#define STBIR_MEMCPY( dest, src, len ) memcpy( dest, src, len )
5454+#define STBIR_MEMCPY(dest, src, len) memcpy(dest, src, len)
5455 #endif
5456
5457 #ifndef STBIR_SIMD
5458
5459-// memcpy that is specifically intentionally overlapping (src is smaller then dest, so can be
5460-// a normal forward copy, bytes is divisible by 4 and bytes is greater than or equal to
5461-// the diff between dest and src)
5462-static void stbir_overlapping_memcpy( void * dest, void const * src, size_t bytes )
5463-{
5464- char STBIR_SIMD_STREAMOUT_PTR (*) sd = (char*) src;
5465- char STBIR_SIMD_STREAMOUT_PTR( * ) s_end = ((char*) src) + bytes;
5466- ptrdiff_t ofs_to_dest = (char*)dest - (char*)src;
5467-
5468- if ( ofs_to_dest >= 8 ) // is the overlap more than 8 away?
5469- {
5470- char STBIR_SIMD_STREAMOUT_PTR( * ) s_end8 = ((char*) src) + (bytes&~7);
5471- STBIR_NO_UNROLL_LOOP_START
5472- do
5473- {
5474- STBIR_NO_UNROLL(sd);
5475- *(stbir_uint64*)( sd + ofs_to_dest ) = *(stbir_uint64*) sd;
5476- sd += 8;
5477- } while ( sd < s_end8 );
5478-
5479- if ( sd == s_end )
5480- return;
5481- }
5482-
5483- STBIR_NO_UNROLL_LOOP_START
5484- do
5485- {
5486- STBIR_NO_UNROLL(sd);
5487- *(int*)( sd + ofs_to_dest ) = *(int*) sd;
5488- sd += 4;
5489- } while ( sd < s_end );
5490+// memcpy that is specifically intentionally overlapping (src is smaller then
5491+// dest, so can be
5492+// a normal forward copy, bytes is divisible by 4 and bytes is greater than or
5493+// equal to the diff between dest and src)
5494+static void
5495+stbir_overlapping_memcpy(void *dest, void const *src, size_t bytes)
5496+{
5497+ char STBIR_SIMD_STREAMOUT_PTR(*) sd = (char *)src;
5498+ char STBIR_SIMD_STREAMOUT_PTR(*) s_end = ((char *)src) + bytes;
5499+ ptrdiff_t ofs_to_dest = (char *)dest - (char *)src;
5500+
5501+ if (ofs_to_dest >= 8) // is the overlap more than 8 away?
5502+ {
5503+ char STBIR_SIMD_STREAMOUT_PTR(*) s_end8 = ((char *)src) + (bytes & ~7);
5504+ STBIR_NO_UNROLL_LOOP_START
5505+ do {
5506+ STBIR_NO_UNROLL(sd);
5507+ *(stbir_uint64 *)(sd + ofs_to_dest) = *(stbir_uint64 *)sd;
5508+ sd += 8;
5509+ } while (sd < s_end8);
5510+
5511+ if (sd == s_end) {
5512+ return;
5513+ }
5514+ }
5515+
5516+ STBIR_NO_UNROLL_LOOP_START
5517+ do {
5518+ STBIR_NO_UNROLL(sd);
5519+ *(int *)(sd + ofs_to_dest) = *(int *)sd;
5520+ sd += 4;
5521+ } while (sd < s_end);
5522 }
5523
5524 #endif
5525
5526-static float stbir__filter_trapezoid(float x, float scale, void * user_data)
5527+static float
5528+stbir__filter_trapezoid(float x, float scale, void *user_data)
5529 {
5530- float halfscale = scale / 2;
5531- float t = 0.5f + halfscale;
5532- STBIR_ASSERT(scale <= 1);
5533- STBIR__UNUSED(user_data);
5534+ float halfscale = scale / 2;
5535+ float t = 0.5f + halfscale;
5536+ STBIR_ASSERT(scale <= 1);
5537+ STBIR__UNUSED(user_data);
5538
5539- if ( x < 0.0f ) x = -x;
5540+ if (x < 0.0f) {
5541+ x = -x;
5542+ }
5543
5544- if (x >= t)
5545- return 0.0f;
5546- else
5547- {
5548- float r = 0.5f - halfscale;
5549- if (x <= r)
5550- return 1.0f;
5551- else
5552- return (t - x) / scale;
5553- }
5554+ if (x >= t) {
5555+ return 0.0f;
5556+ } else {
5557+ float r = 0.5f - halfscale;
5558+ if (x <= r) {
5559+ return 1.0f;
5560+ } else {
5561+ return (t - x) / scale;
5562+ }
5563+ }
5564 }
5565
5566-static float stbir__support_trapezoid(float scale, void * user_data)
5567+static float
5568+stbir__support_trapezoid(float scale, void *user_data)
5569 {
5570- STBIR__UNUSED(user_data);
5571- return 0.5f + scale / 2.0f;
5572+ STBIR__UNUSED(user_data);
5573+ return 0.5f + scale / 2.0f;
5574 }
5575
5576-static float stbir__filter_triangle(float x, float s, void * user_data)
5577+static float
5578+stbir__filter_triangle(float x, float s, void *user_data)
5579 {
5580- STBIR__UNUSED(s);
5581- STBIR__UNUSED(user_data);
5582+ STBIR__UNUSED(s);
5583+ STBIR__UNUSED(user_data);
5584
5585- if ( x < 0.0f ) x = -x;
5586+ if (x < 0.0f) {
5587+ x = -x;
5588+ }
5589
5590- if (x <= 1.0f)
5591- return 1.0f - x;
5592- else
5593- return 0.0f;
5594+ if (x <= 1.0f) {
5595+ return 1.0f - x;
5596+ } else {
5597+ return 0.0f;
5598+ }
5599 }
5600
5601-static float stbir__filter_point(float x, float s, void * user_data)
5602+static float
5603+stbir__filter_point(float x, float s, void *user_data)
5604 {
5605- STBIR__UNUSED(x);
5606- STBIR__UNUSED(s);
5607- STBIR__UNUSED(user_data);
5608+ STBIR__UNUSED(x);
5609+ STBIR__UNUSED(s);
5610+ STBIR__UNUSED(user_data);
5611
5612- return 1.0f;
5613+ return 1.0f;
5614 }
5615
5616-static float stbir__filter_cubic(float x, float s, void * user_data)
5617+static float
5618+stbir__filter_cubic(float x, float s, void *user_data)
5619 {
5620- STBIR__UNUSED(s);
5621- STBIR__UNUSED(user_data);
5622+ STBIR__UNUSED(s);
5623+ STBIR__UNUSED(user_data);
5624
5625- if ( x < 0.0f ) x = -x;
5626+ if (x < 0.0f) {
5627+ x = -x;
5628+ }
5629
5630- if (x < 1.0f)
5631- return (4.0f + x*x*(3.0f*x - 6.0f))/6.0f;
5632- else if (x < 2.0f)
5633- return (8.0f + x*(-12.0f + x*(6.0f - x)))/6.0f;
5634+ if (x < 1.0f) {
5635+ return (4.0f + x * x * (3.0f * x - 6.0f)) / 6.0f;
5636+ } else if (x < 2.0f) {
5637+ return (8.0f + x * (-12.0f + x * (6.0f - x))) / 6.0f;
5638+ }
5639
5640- return (0.0f);
5641+ return (0.0f);
5642 }
5643
5644-static float stbir__filter_catmullrom(float x, float s, void * user_data)
5645+static float
5646+stbir__filter_catmullrom(float x, float s, void *user_data)
5647 {
5648- STBIR__UNUSED(s);
5649- STBIR__UNUSED(user_data);
5650+ STBIR__UNUSED(s);
5651+ STBIR__UNUSED(user_data);
5652
5653- if ( x < 0.0f ) x = -x;
5654+ if (x < 0.0f) {
5655+ x = -x;
5656+ }
5657
5658- if (x < 1.0f)
5659- return 1.0f - x*x*(2.5f - 1.5f*x);
5660- else if (x < 2.0f)
5661- return 2.0f - x*(4.0f + x*(0.5f*x - 2.5f));
5662+ if (x < 1.0f) {
5663+ return 1.0f - x * x * (2.5f - 1.5f * x);
5664+ } else if (x < 2.0f) {
5665+ return 2.0f - x * (4.0f + x * (0.5f * x - 2.5f));
5666+ }
5667
5668- return (0.0f);
5669+ return (0.0f);
5670 }
5671
5672-static float stbir__filter_mitchell(float x, float s, void * user_data)
5673+static float
5674+stbir__filter_mitchell(float x, float s, void *user_data)
5675 {
5676- STBIR__UNUSED(s);
5677- STBIR__UNUSED(user_data);
5678+ STBIR__UNUSED(s);
5679+ STBIR__UNUSED(user_data);
5680
5681- if ( x < 0.0f ) x = -x;
5682+ if (x < 0.0f) {
5683+ x = -x;
5684+ }
5685
5686- if (x < 1.0f)
5687- return (16.0f + x*x*(21.0f * x - 36.0f))/18.0f;
5688- else if (x < 2.0f)
5689- return (32.0f + x*(-60.0f + x*(36.0f - 7.0f*x)))/18.0f;
5690+ if (x < 1.0f) {
5691+ return (16.0f + x * x * (21.0f * x - 36.0f)) / 18.0f;
5692+ } else if (x < 2.0f) {
5693+ return (32.0f + x * (-60.0f + x * (36.0f - 7.0f * x))) / 18.0f;
5694+ }
5695
5696- return (0.0f);
5697+ return (0.0f);
5698 }
5699
5700-static float stbir__support_zeropoint5(float s, void * user_data)
5701+static float
5702+stbir__support_zeropoint5(float s, void *user_data)
5703 {
5704- STBIR__UNUSED(s);
5705- STBIR__UNUSED(user_data);
5706- return 0.5f;
5707+ STBIR__UNUSED(s);
5708+ STBIR__UNUSED(user_data);
5709+ return 0.5f;
5710 }
5711
5712-static float stbir__support_one(float s, void * user_data)
5713+static float
5714+stbir__support_one(float s, void *user_data)
5715 {
5716- STBIR__UNUSED(s);
5717- STBIR__UNUSED(user_data);
5718- return 1;
5719+ STBIR__UNUSED(s);
5720+ STBIR__UNUSED(user_data);
5721+ return 1;
5722 }
5723
5724-static float stbir__support_two(float s, void * user_data)
5725+static float
5726+stbir__support_two(float s, void *user_data)
5727 {
5728- STBIR__UNUSED(s);
5729- STBIR__UNUSED(user_data);
5730- return 2;
5731+ STBIR__UNUSED(s);
5732+ STBIR__UNUSED(user_data);
5733+ return 2;
5734 }
5735
5736 // This is the maximum number of input samples that can affect an output sample
5737 // with the given filter from the output pixel's perspective
5738-static int stbir__get_filter_pixel_width(stbir__support_callback * support, float scale, void * user_data)
5739+static int
5740+stbir__get_filter_pixel_width(stbir__support_callback *support, float scale,
5741+ void *user_data)
5742 {
5743- STBIR_ASSERT(support != 0);
5744+ STBIR_ASSERT(support != 0);
5745
5746- if ( scale >= ( 1.0f-stbir__small_float ) ) // upscale
5747- return (int)STBIR_CEILF(support(1.0f/scale,user_data) * 2.0f);
5748- else
5749- return (int)STBIR_CEILF(support(scale,user_data) * 2.0f / scale);
5750+ if (scale >= (1.0f - stbir__small_float)) { // upscale
5751+ return (int)STBIR_CEILF(support(1.0f / scale, user_data) * 2.0f);
5752+ } else {
5753+ return (int)STBIR_CEILF(support(scale, user_data) * 2.0f / scale);
5754+ }
5755 }
5756
5757 // this is how many coefficents per run of the filter (which is different
5758 // from the filter_pixel_width depending on if we are scattering or gathering)
5759-static int stbir__get_coefficient_width(stbir__sampler * samp, int is_gather, void * user_data)
5760+static int
5761+stbir__get_coefficient_width(stbir__sampler *samp, int is_gather,
5762+ void *user_data)
5763 {
5764- float scale = samp->scale_info.scale;
5765- stbir__support_callback * support = samp->filter_support;
5766+ float scale = samp->scale_info.scale;
5767+ stbir__support_callback *support = samp->filter_support;
5768
5769- switch( is_gather )
5770- {
5771- case 1:
5772- return (int)STBIR_CEILF(support(1.0f / scale, user_data) * 2.0f);
5773- case 2:
5774- return (int)STBIR_CEILF(support(scale, user_data) * 2.0f / scale);
5775- case 0:
5776- return (int)STBIR_CEILF(support(scale, user_data) * 2.0f);
5777- default:
5778- STBIR_ASSERT( (is_gather >= 0 ) && (is_gather <= 2 ) );
5779- return 0;
5780- }
5781+ switch (is_gather) {
5782+ case 1:
5783+ return (int)STBIR_CEILF(support(1.0f / scale, user_data) * 2.0f);
5784+ case 2:
5785+ return (int)STBIR_CEILF(support(scale, user_data) * 2.0f / scale);
5786+ case 0:
5787+ return (int)STBIR_CEILF(support(scale, user_data) * 2.0f);
5788+ default:
5789+ STBIR_ASSERT((is_gather >= 0) && (is_gather <= 2));
5790+ return 0;
5791+ }
5792 }
5793
5794-static int stbir__get_contributors(stbir__sampler * samp, int is_gather)
5795+static int
5796+stbir__get_contributors(stbir__sampler *samp, int is_gather)
5797 {
5798- if (is_gather)
5799- return samp->scale_info.output_sub_size;
5800- else
5801- return (samp->scale_info.input_full_size + samp->filter_pixel_margin * 2);
5802+ if (is_gather) {
5803+ return samp->scale_info.output_sub_size;
5804+ } else {
5805+ return (samp->scale_info.input_full_size +
5806+ samp->filter_pixel_margin * 2);
5807+ }
5808 }
5809
5810-static int stbir__edge_zero_full( int n, int max )
5811+static int
5812+stbir__edge_zero_full(int n, int max)
5813 {
5814- STBIR__UNUSED(n);
5815- STBIR__UNUSED(max);
5816- return 0; // NOTREACHED
5817+ STBIR__UNUSED(n);
5818+ STBIR__UNUSED(max);
5819+ return 0; // NOTREACHED
5820 }
5821
5822-static int stbir__edge_clamp_full( int n, int max )
5823+static int
5824+stbir__edge_clamp_full(int n, int max)
5825 {
5826- if (n < 0)
5827- return 0;
5828+ if (n < 0) {
5829+ return 0;
5830+ }
5831
5832- if (n >= max)
5833- return max - 1;
5834+ if (n >= max) {
5835+ return max - 1;
5836+ }
5837
5838- return n; // NOTREACHED
5839+ return n; // NOTREACHED
5840 }
5841
5842-static int stbir__edge_reflect_full( int n, int max )
5843+static int
5844+stbir__edge_reflect_full(int n, int max)
5845 {
5846- if (n < 0)
5847- {
5848- if (n > -max)
5849- return -n;
5850- else
5851- return max - 1;
5852- }
5853+ if (n < 0) {
5854+ if (n > -max) {
5855+ return -n;
5856+ } else {
5857+ return max - 1;
5858+ }
5859+ }
5860
5861- if (n >= max)
5862- {
5863- int max2 = max * 2;
5864- if (n >= max2)
5865- return 0;
5866- else
5867- return max2 - n - 1;
5868- }
5869+ if (n >= max) {
5870+ int max2 = max * 2;
5871+ if (n >= max2) {
5872+ return 0;
5873+ } else {
5874+ return max2 - n - 1;
5875+ }
5876+ }
5877
5878- return n; // NOTREACHED
5879+ return n; // NOTREACHED
5880 }
5881
5882-static int stbir__edge_wrap_full( int n, int max )
5883+static int
5884+stbir__edge_wrap_full(int n, int max)
5885 {
5886- if (n >= 0)
5887- return (n % max);
5888- else
5889- {
5890- int m = (-n) % max;
5891+ if (n >= 0) {
5892+ return (n % max);
5893+ } else {
5894+ int m = (-n) % max;
5895
5896- if (m != 0)
5897- m = max - m;
5898+ if (m != 0) {
5899+ m = max - m;
5900+ }
5901
5902- return (m);
5903- }
5904+ return (m);
5905+ }
5906 }
5907
5908-typedef int stbir__edge_wrap_func( int n, int max );
5909-static stbir__edge_wrap_func * stbir__edge_wrap_slow[] =
5910-{
5911- stbir__edge_clamp_full, // STBIR_EDGE_CLAMP
5912- stbir__edge_reflect_full, // STBIR_EDGE_REFLECT
5913- stbir__edge_wrap_full, // STBIR_EDGE_WRAP
5914- stbir__edge_zero_full, // STBIR_EDGE_ZERO
5915+typedef int
5916+stbir__edge_wrap_func(int n, int max);
5917+static stbir__edge_wrap_func *stbir__edge_wrap_slow[] = {
5918+ stbir__edge_clamp_full, // STBIR_EDGE_CLAMP
5919+ stbir__edge_reflect_full, // STBIR_EDGE_REFLECT
5920+ stbir__edge_wrap_full, // STBIR_EDGE_WRAP
5921+ stbir__edge_zero_full, // STBIR_EDGE_ZERO
5922 };
5923
5924-stbir__inline static int stbir__edge_wrap(stbir_edge edge, int n, int max)
5925+stbir__inline static int
5926+stbir__edge_wrap(stbir_edge edge, int n, int max)
5927 {
5928- // avoid per-pixel switch
5929- if (n >= 0 && n < max)
5930- return n;
5931- return stbir__edge_wrap_slow[edge]( n, max );
5932+ // avoid per-pixel switch
5933+ if (n >= 0 && n < max) {
5934+ return n;
5935+ }
5936+ return stbir__edge_wrap_slow[edge](n, max);
5937 }
5938
5939 #define STBIR__MERGE_RUNS_PIXEL_THRESHOLD 16
5940
5941 // get information on the extents of a sampler
5942-static void stbir__get_extents( stbir__sampler * samp, stbir__extents * scanline_extents )
5943-{
5944- int j, stop;
5945- int left_margin, right_margin;
5946- int min_n = 0x7fffffff, max_n = -0x7fffffff;
5947- int min_left = 0x7fffffff, max_left = -0x7fffffff;
5948- int min_right = 0x7fffffff, max_right = -0x7fffffff;
5949- stbir_edge edge = samp->edge;
5950- stbir__contributors* contributors = samp->contributors;
5951- int output_sub_size = samp->scale_info.output_sub_size;
5952- int input_full_size = samp->scale_info.input_full_size;
5953- int filter_pixel_margin = samp->filter_pixel_margin;
5954-
5955- STBIR_ASSERT( samp->is_gather );
5956-
5957- stop = output_sub_size;
5958- for (j = 0; j < stop; j++ )
5959- {
5960- STBIR_ASSERT( contributors[j].n1 >= contributors[j].n0 );
5961- if ( contributors[j].n0 < min_n )
5962- {
5963- min_n = contributors[j].n0;
5964- stop = j + filter_pixel_margin; // if we find a new min, only scan another filter width
5965- if ( stop > output_sub_size ) stop = output_sub_size;
5966- }
5967- }
5968-
5969- stop = 0;
5970- for (j = output_sub_size - 1; j >= stop; j-- )
5971- {
5972- STBIR_ASSERT( contributors[j].n1 >= contributors[j].n0 );
5973- if ( contributors[j].n1 > max_n )
5974- {
5975- max_n = contributors[j].n1;
5976- stop = j - filter_pixel_margin; // if we find a new max, only scan another filter width
5977- if (stop<0) stop = 0;
5978- }
5979- }
5980-
5981- STBIR_ASSERT( scanline_extents->conservative.n0 <= min_n );
5982- STBIR_ASSERT( scanline_extents->conservative.n1 >= max_n );
5983-
5984- // now calculate how much into the margins we really read
5985- left_margin = 0;
5986- if ( min_n < 0 )
5987- {
5988- left_margin = -min_n;
5989- min_n = 0;
5990- }
5991-
5992- right_margin = 0;
5993- if ( max_n >= input_full_size )
5994- {
5995- right_margin = max_n - input_full_size + 1;
5996- max_n = input_full_size - 1;
5997- }
5998-
5999- // index 1 is margin pixel extents (how many pixels we hang over the edge)
6000- scanline_extents->edge_sizes[0] = left_margin;
6001- scanline_extents->edge_sizes[1] = right_margin;
6002-
6003- // index 2 is pixels read from the input
6004- scanline_extents->spans[0].n0 = min_n;
6005- scanline_extents->spans[0].n1 = max_n;
6006- scanline_extents->spans[0].pixel_offset_for_input = min_n;
6007-
6008- // default to no other input range
6009- scanline_extents->spans[1].n0 = 0;
6010- scanline_extents->spans[1].n1 = -1;
6011- scanline_extents->spans[1].pixel_offset_for_input = 0;
6012-
6013- // don't have to do edge calc for zero clamp
6014- if ( edge == STBIR_EDGE_ZERO )
6015- return;
6016-
6017- // convert margin pixels to the pixels within the input (min and max)
6018- for( j = -left_margin ; j < 0 ; j++ )
6019- {
6020- int p = stbir__edge_wrap( edge, j, input_full_size );
6021- if ( p < min_left )
6022- min_left = p;
6023- if ( p > max_left )
6024- max_left = p;
6025- }
6026-
6027- for( j = input_full_size ; j < (input_full_size + right_margin) ; j++ )
6028- {
6029- int p = stbir__edge_wrap( edge, j, input_full_size );
6030- if ( p < min_right )
6031- min_right = p;
6032- if ( p > max_right )
6033- max_right = p;
6034- }
6035-
6036- // merge the left margin pixel region if it connects within 4 pixels of main pixel region
6037- if ( min_left != 0x7fffffff )
6038- {
6039- if ( ( ( min_left <= min_n ) && ( ( max_left + STBIR__MERGE_RUNS_PIXEL_THRESHOLD ) >= min_n ) ) ||
6040- ( ( min_n <= min_left ) && ( ( max_n + STBIR__MERGE_RUNS_PIXEL_THRESHOLD ) >= max_left ) ) )
6041- {
6042- scanline_extents->spans[0].n0 = min_n = stbir__min( min_n, min_left );
6043- scanline_extents->spans[0].n1 = max_n = stbir__max( max_n, max_left );
6044- scanline_extents->spans[0].pixel_offset_for_input = min_n;
6045- left_margin = 0;
6046- }
6047- }
6048-
6049- // merge the right margin pixel region if it connects within 4 pixels of main pixel region
6050- if ( min_right != 0x7fffffff )
6051- {
6052- if ( ( ( min_right <= min_n ) && ( ( max_right + STBIR__MERGE_RUNS_PIXEL_THRESHOLD ) >= min_n ) ) ||
6053- ( ( min_n <= min_right ) && ( ( max_n + STBIR__MERGE_RUNS_PIXEL_THRESHOLD ) >= max_right ) ) )
6054- {
6055- scanline_extents->spans[0].n0 = min_n = stbir__min( min_n, min_right );
6056- scanline_extents->spans[0].n1 = max_n = stbir__max( max_n, max_right );
6057- scanline_extents->spans[0].pixel_offset_for_input = min_n;
6058- right_margin = 0;
6059- }
6060- }
6061-
6062- STBIR_ASSERT( scanline_extents->conservative.n0 <= min_n );
6063- STBIR_ASSERT( scanline_extents->conservative.n1 >= max_n );
6064-
6065- // you get two ranges when you have the WRAP edge mode and you are doing just the a piece of the resize
6066- // so you need to get a second run of pixels from the opposite side of the scanline (which you
6067- // wouldn't need except for WRAP)
6068-
6069-
6070- // if we can't merge the min_left range, add it as a second range
6071- if ( ( left_margin ) && ( min_left != 0x7fffffff ) )
6072- {
6073- stbir__span * newspan = scanline_extents->spans + 1;
6074- STBIR_ASSERT( right_margin == 0 );
6075- if ( min_left < scanline_extents->spans[0].n0 )
6076- {
6077- scanline_extents->spans[1].pixel_offset_for_input = scanline_extents->spans[0].n0;
6078- scanline_extents->spans[1].n0 = scanline_extents->spans[0].n0;
6079- scanline_extents->spans[1].n1 = scanline_extents->spans[0].n1;
6080- --newspan;
6081- }
6082- newspan->pixel_offset_for_input = min_left;
6083- newspan->n0 = -left_margin;
6084- newspan->n1 = ( max_left - min_left ) - left_margin;
6085- scanline_extents->edge_sizes[0] = 0; // don't need to copy the left margin, since we are directly decoding into the margin
6086- }
6087- // if we can't merge the min_left range, add it as a second range
6088- else
6089- if ( ( right_margin ) && ( min_right != 0x7fffffff ) )
6090- {
6091- stbir__span * newspan = scanline_extents->spans + 1;
6092- if ( min_right < scanline_extents->spans[0].n0 )
6093- {
6094- scanline_extents->spans[1].pixel_offset_for_input = scanline_extents->spans[0].n0;
6095- scanline_extents->spans[1].n0 = scanline_extents->spans[0].n0;
6096- scanline_extents->spans[1].n1 = scanline_extents->spans[0].n1;
6097- --newspan;
6098- }
6099- newspan->pixel_offset_for_input = min_right;
6100- newspan->n0 = scanline_extents->spans[1].n1 + 1;
6101- newspan->n1 = scanline_extents->spans[1].n1 + 1 + ( max_right - min_right );
6102- scanline_extents->edge_sizes[1] = 0; // don't need to copy the right margin, since we are directly decoding into the margin
6103- }
6104-
6105- // sort the spans into write output order
6106- if ( ( scanline_extents->spans[1].n1 > scanline_extents->spans[1].n0 ) && ( scanline_extents->spans[0].n0 > scanline_extents->spans[1].n0 ) )
6107- {
6108- stbir__span tspan = scanline_extents->spans[0];
6109- scanline_extents->spans[0] = scanline_extents->spans[1];
6110- scanline_extents->spans[1] = tspan;
6111- }
6112-}
6113-
6114-static void stbir__calculate_in_pixel_range( int * first_pixel, int * last_pixel, float out_pixel_center, float out_filter_radius, float inv_scale, float out_shift, int input_size, stbir_edge edge )
6115-{
6116- int first, last;
6117- float out_pixel_influence_lowerbound = out_pixel_center - out_filter_radius;
6118- float out_pixel_influence_upperbound = out_pixel_center + out_filter_radius;
6119-
6120- float in_pixel_influence_lowerbound = (out_pixel_influence_lowerbound + out_shift) * inv_scale;
6121- float in_pixel_influence_upperbound = (out_pixel_influence_upperbound + out_shift) * inv_scale;
6122-
6123- first = (int)(STBIR_FLOORF(in_pixel_influence_lowerbound + 0.5f));
6124- last = (int)(STBIR_FLOORF(in_pixel_influence_upperbound - 0.5f));
6125- if ( last < first ) last = first; // point sample mode can span a value *right* at 0.5, and cause these to cross
6126-
6127- if ( edge == STBIR_EDGE_WRAP )
6128- {
6129- if ( first < -input_size )
6130- first = -input_size;
6131- if ( last >= (input_size*2))
6132- last = (input_size*2) - 1;
6133- }
6134-
6135- *first_pixel = first;
6136- *last_pixel = last;
6137-}
6138-
6139-static void stbir__calculate_coefficients_for_gather_upsample( float out_filter_radius, stbir__kernel_callback * kernel, stbir__scale_info * scale_info, int num_contributors, stbir__contributors* contributors, float* coefficient_group, int coefficient_width, stbir_edge edge, void * user_data )
6140-{
6141- int n, end;
6142- float inv_scale = scale_info->inv_scale;
6143- float out_shift = scale_info->pixel_shift;
6144- int input_size = scale_info->input_full_size;
6145- int numerator = scale_info->scale_numerator;
6146- int polyphase = ( ( scale_info->scale_is_rational ) && ( numerator < num_contributors ) );
6147-
6148- // Looping through out pixels
6149- end = num_contributors; if ( polyphase ) end = numerator;
6150- for (n = 0; n < end; n++)
6151- {
6152- int i;
6153- int last_non_zero;
6154- float out_pixel_center = (float)n + 0.5f;
6155- float in_center_of_out = (out_pixel_center + out_shift) * inv_scale;
6156-
6157- int in_first_pixel, in_last_pixel;
6158-
6159- stbir__calculate_in_pixel_range( &in_first_pixel, &in_last_pixel, out_pixel_center, out_filter_radius, inv_scale, out_shift, input_size, edge );
6160-
6161- // make sure we never generate a range larger than our precalculated coeff width
6162- // this only happens in point sample mode, but it's a good safe thing to do anyway
6163- if ( ( in_last_pixel - in_first_pixel + 1 ) > coefficient_width )
6164- in_last_pixel = in_first_pixel + coefficient_width - 1;
6165-
6166- last_non_zero = -1;
6167- for (i = 0; i <= in_last_pixel - in_first_pixel; i++)
6168- {
6169- float in_pixel_center = (float)(i + in_first_pixel) + 0.5f;
6170- float coeff = kernel(in_center_of_out - in_pixel_center, inv_scale, user_data);
6171-
6172- // kill denormals
6173- if ( ( ( coeff < stbir__small_float ) && ( coeff > -stbir__small_float ) ) )
6174- {
6175- if ( i == 0 ) // if we're at the front, just eat zero contributors
6176- {
6177- STBIR_ASSERT ( ( in_last_pixel - in_first_pixel ) != 0 ); // there should be at least one contrib
6178- ++in_first_pixel;
6179- i--;
6180- continue;
6181- }
6182- coeff = 0; // make sure is fully zero (should keep denormals away)
6183- }
6184- else
6185- last_non_zero = i;
6186-
6187- coefficient_group[i] = coeff;
6188- }
6189-
6190- in_last_pixel = last_non_zero+in_first_pixel; // kills trailing zeros
6191- contributors->n0 = in_first_pixel;
6192- contributors->n1 = in_last_pixel;
6193-
6194- STBIR_ASSERT(contributors->n1 >= contributors->n0);
6195-
6196- ++contributors;
6197- coefficient_group += coefficient_width;
6198- }
6199-}
6200-
6201-static void stbir__insert_coeff( stbir__contributors * contribs, float * coeffs, int new_pixel, float new_coeff, int max_width )
6202-{
6203- if ( new_pixel <= contribs->n1 ) // before the end
6204- {
6205- if ( new_pixel < contribs->n0 ) // before the front?
6206- {
6207- if ( ( contribs->n1 - new_pixel + 1 ) <= max_width )
6208- {
6209- int j, o = contribs->n0 - new_pixel;
6210- for ( j = contribs->n1 - contribs->n0 ; j <= 0 ; j-- )
6211- coeffs[ j + o ] = coeffs[ j ];
6212- for ( j = 1 ; j < o ; j-- )
6213- coeffs[ j ] = coeffs[ 0 ];
6214- coeffs[ 0 ] = new_coeff;
6215- contribs->n0 = new_pixel;
6216- }
6217- }
6218- else
6219- {
6220- coeffs[ new_pixel - contribs->n0 ] += new_coeff;
6221- }
6222- }
6223- else
6224- {
6225- if ( ( new_pixel - contribs->n0 + 1 ) <= max_width )
6226- {
6227- int j, e = new_pixel - contribs->n0;
6228- for( j = ( contribs->n1 - contribs->n0 ) + 1 ; j < e ; j++ ) // clear in-betweens coeffs if there are any
6229- coeffs[j] = 0;
6230-
6231- coeffs[ e ] = new_coeff;
6232- contribs->n1 = new_pixel;
6233- }
6234- }
6235-}
6236-
6237-static void stbir__calculate_out_pixel_range( int * first_pixel, int * last_pixel, float in_pixel_center, float in_pixels_radius, float scale, float out_shift, int out_size )
6238-{
6239- float in_pixel_influence_lowerbound = in_pixel_center - in_pixels_radius;
6240- float in_pixel_influence_upperbound = in_pixel_center + in_pixels_radius;
6241- float out_pixel_influence_lowerbound = in_pixel_influence_lowerbound * scale - out_shift;
6242- float out_pixel_influence_upperbound = in_pixel_influence_upperbound * scale - out_shift;
6243- int out_first_pixel = (int)(STBIR_FLOORF(out_pixel_influence_lowerbound + 0.5f));
6244- int out_last_pixel = (int)(STBIR_FLOORF(out_pixel_influence_upperbound - 0.5f));
6245-
6246- if ( out_first_pixel < 0 )
6247- out_first_pixel = 0;
6248- if ( out_last_pixel >= out_size )
6249- out_last_pixel = out_size - 1;
6250- *first_pixel = out_first_pixel;
6251- *last_pixel = out_last_pixel;
6252-}
6253-
6254-static void stbir__calculate_coefficients_for_gather_downsample( int start, int end, float in_pixels_radius, stbir__kernel_callback * kernel, stbir__scale_info * scale_info, int coefficient_width, int num_contributors, stbir__contributors * contributors, float * coefficient_group, void * user_data )
6255-{
6256- int in_pixel;
6257- int i;
6258- int first_out_inited = -1;
6259- float scale = scale_info->scale;
6260- float out_shift = scale_info->pixel_shift;
6261- int out_size = scale_info->output_sub_size;
6262- int numerator = scale_info->scale_numerator;
6263- int polyphase = ( ( scale_info->scale_is_rational ) && ( numerator < out_size ) );
6264-
6265- STBIR__UNUSED(num_contributors);
6266-
6267- // Loop through the input pixels
6268- for (in_pixel = start; in_pixel < end; in_pixel++)
6269- {
6270- float in_pixel_center = (float)in_pixel + 0.5f;
6271- float out_center_of_in = in_pixel_center * scale - out_shift;
6272- int out_first_pixel, out_last_pixel;
6273-
6274- stbir__calculate_out_pixel_range( &out_first_pixel, &out_last_pixel, in_pixel_center, in_pixels_radius, scale, out_shift, out_size );
6275-
6276- if ( out_first_pixel > out_last_pixel )
6277- continue;
6278-
6279- // clamp or exit if we are using polyphase filtering, and the limit is up
6280- if ( polyphase )
6281- {
6282- // when polyphase, you only have to do coeffs up to the numerator count
6283- if ( out_first_pixel == numerator )
6284- break;
6285-
6286- // don't do any extra work, clamp last pixel at numerator too
6287- if ( out_last_pixel >= numerator )
6288- out_last_pixel = numerator - 1;
6289- }
6290-
6291- for (i = 0; i <= out_last_pixel - out_first_pixel; i++)
6292- {
6293- float out_pixel_center = (float)(i + out_first_pixel) + 0.5f;
6294- float x = out_pixel_center - out_center_of_in;
6295- float coeff = kernel(x, scale, user_data) * scale;
6296-
6297- // kill the coeff if it's too small (avoid denormals)
6298- if ( ( ( coeff < stbir__small_float ) && ( coeff > -stbir__small_float ) ) )
6299- coeff = 0.0f;
6300-
6301- {
6302- int out = i + out_first_pixel;
6303- float * coeffs = coefficient_group + out * coefficient_width;
6304- stbir__contributors * contribs = contributors + out;
6305-
6306- // is this the first time this output pixel has been seen? Init it.
6307- if ( out > first_out_inited )
6308- {
6309- STBIR_ASSERT( out == ( first_out_inited + 1 ) ); // ensure we have only advanced one at time
6310- first_out_inited = out;
6311- contribs->n0 = in_pixel;
6312- contribs->n1 = in_pixel;
6313- coeffs[0] = coeff;
6314- }
6315- else
6316- {
6317- // insert on end (always in order)
6318- if ( coeffs[0] == 0.0f ) // if the first coefficent is zero, then zap it for this coeffs
6319- {
6320- STBIR_ASSERT( ( in_pixel - contribs->n0 ) == 1 ); // ensure that when we zap, we're at the 2nd pos
6321- contribs->n0 = in_pixel;
6322- }
6323- contribs->n1 = in_pixel;
6324- STBIR_ASSERT( ( in_pixel - contribs->n0 ) < coefficient_width );
6325- coeffs[in_pixel - contribs->n0] = coeff;
6326- }
6327- }
6328- }
6329- }
6330+static void
6331+stbir__get_extents(stbir__sampler *samp, stbir__extents *scanline_extents)
6332+{
6333+ int j, stop;
6334+ int left_margin, right_margin;
6335+ int min_n = 0x7fffffff, max_n = -0x7fffffff;
6336+ int min_left = 0x7fffffff, max_left = -0x7fffffff;
6337+ int min_right = 0x7fffffff, max_right = -0x7fffffff;
6338+ stbir_edge edge = samp->edge;
6339+ stbir__contributors *contributors = samp->contributors;
6340+ int output_sub_size = samp->scale_info.output_sub_size;
6341+ int input_full_size = samp->scale_info.input_full_size;
6342+ int filter_pixel_margin = samp->filter_pixel_margin;
6343+
6344+ STBIR_ASSERT(samp->is_gather);
6345+
6346+ stop = output_sub_size;
6347+ for (j = 0; j < stop; j++) {
6348+ STBIR_ASSERT(contributors[j].n1 >= contributors[j].n0);
6349+ if (contributors[j].n0 < min_n) {
6350+ min_n = contributors[j].n0;
6351+ stop = j + filter_pixel_margin; // if we find a new min, only scan
6352+ // another filter width
6353+ if (stop > output_sub_size) {
6354+ stop = output_sub_size;
6355+ }
6356+ }
6357+ }
6358+
6359+ stop = 0;
6360+ for (j = output_sub_size - 1; j >= stop; j--) {
6361+ STBIR_ASSERT(contributors[j].n1 >= contributors[j].n0);
6362+ if (contributors[j].n1 > max_n) {
6363+ max_n = contributors[j].n1;
6364+ stop = j - filter_pixel_margin; // if we find a new max, only scan
6365+ // another filter width
6366+ if (stop < 0) {
6367+ stop = 0;
6368+ }
6369+ }
6370+ }
6371+
6372+ STBIR_ASSERT(scanline_extents->conservative.n0 <= min_n);
6373+ STBIR_ASSERT(scanline_extents->conservative.n1 >= max_n);
6374+
6375+ // now calculate how much into the margins we really read
6376+ left_margin = 0;
6377+ if (min_n < 0) {
6378+ left_margin = -min_n;
6379+ min_n = 0;
6380+ }
6381+
6382+ right_margin = 0;
6383+ if (max_n >= input_full_size) {
6384+ right_margin = max_n - input_full_size + 1;
6385+ max_n = input_full_size - 1;
6386+ }
6387+
6388+ // index 1 is margin pixel extents (how many pixels we hang over the edge)
6389+ scanline_extents->edge_sizes[0] = left_margin;
6390+ scanline_extents->edge_sizes[1] = right_margin;
6391+
6392+ // index 2 is pixels read from the input
6393+ scanline_extents->spans[0].n0 = min_n;
6394+ scanline_extents->spans[0].n1 = max_n;
6395+ scanline_extents->spans[0].pixel_offset_for_input = min_n;
6396+
6397+ // default to no other input range
6398+ scanline_extents->spans[1].n0 = 0;
6399+ scanline_extents->spans[1].n1 = -1;
6400+ scanline_extents->spans[1].pixel_offset_for_input = 0;
6401+
6402+ // don't have to do edge calc for zero clamp
6403+ if (edge == STBIR_EDGE_ZERO) {
6404+ return;
6405+ }
6406+
6407+ // convert margin pixels to the pixels within the input (min and max)
6408+ for (j = -left_margin; j < 0; j++) {
6409+ int p = stbir__edge_wrap(edge, j, input_full_size);
6410+ if (p < min_left) {
6411+ min_left = p;
6412+ }
6413+ if (p > max_left) {
6414+ max_left = p;
6415+ }
6416+ }
6417+
6418+ for (j = input_full_size; j < (input_full_size + right_margin); j++) {
6419+ int p = stbir__edge_wrap(edge, j, input_full_size);
6420+ if (p < min_right) {
6421+ min_right = p;
6422+ }
6423+ if (p > max_right) {
6424+ max_right = p;
6425+ }
6426+ }
6427+
6428+ // merge the left margin pixel region if it connects within 4 pixels of main
6429+ // pixel region
6430+ if (min_left != 0x7fffffff) {
6431+ if (((min_left <= min_n) &&
6432+ ((max_left + STBIR__MERGE_RUNS_PIXEL_THRESHOLD) >= min_n)) ||
6433+ ((min_n <= min_left) &&
6434+ ((max_n + STBIR__MERGE_RUNS_PIXEL_THRESHOLD) >= max_left))) {
6435+ scanline_extents->spans[0].n0 = min_n = stbir__min(min_n, min_left);
6436+ scanline_extents->spans[0].n1 = max_n = stbir__max(max_n, max_left);
6437+ scanline_extents->spans[0].pixel_offset_for_input = min_n;
6438+ left_margin = 0;
6439+ }
6440+ }
6441+
6442+ // merge the right margin pixel region if it connects within 4 pixels of
6443+ // main pixel region
6444+ if (min_right != 0x7fffffff) {
6445+ if (((min_right <= min_n) &&
6446+ ((max_right + STBIR__MERGE_RUNS_PIXEL_THRESHOLD) >= min_n)) ||
6447+ ((min_n <= min_right) &&
6448+ ((max_n + STBIR__MERGE_RUNS_PIXEL_THRESHOLD) >= max_right))) {
6449+ scanline_extents->spans[0].n0 = min_n =
6450+ stbir__min(min_n, min_right);
6451+ scanline_extents->spans[0].n1 = max_n =
6452+ stbir__max(max_n, max_right);
6453+ scanline_extents->spans[0].pixel_offset_for_input = min_n;
6454+ right_margin = 0;
6455+ }
6456+ }
6457+
6458+ STBIR_ASSERT(scanline_extents->conservative.n0 <= min_n);
6459+ STBIR_ASSERT(scanline_extents->conservative.n1 >= max_n);
6460+
6461+ // you get two ranges when you have the WRAP edge mode and you are doing
6462+ // just the a piece of the resize
6463+ // so you need to get a second run of pixels from the opposite side of the
6464+ // scanline (which you wouldn't need except for WRAP)
6465+
6466+ // if we can't merge the min_left range, add it as a second range
6467+ if ((left_margin) && (min_left != 0x7fffffff)) {
6468+ stbir__span *newspan = scanline_extents->spans + 1;
6469+ STBIR_ASSERT(right_margin == 0);
6470+ if (min_left < scanline_extents->spans[0].n0) {
6471+ scanline_extents->spans[1].pixel_offset_for_input =
6472+ scanline_extents->spans[0].n0;
6473+ scanline_extents->spans[1].n0 = scanline_extents->spans[0].n0;
6474+ scanline_extents->spans[1].n1 = scanline_extents->spans[0].n1;
6475+ --newspan;
6476+ }
6477+ newspan->pixel_offset_for_input = min_left;
6478+ newspan->n0 = -left_margin;
6479+ newspan->n1 = (max_left - min_left) - left_margin;
6480+ scanline_extents->edge_sizes[0] =
6481+ 0; // don't need to copy the left margin, since we are directly
6482+ // decoding into the margin
6483+ }
6484+ // if we can't merge the min_left range, add it as a second range
6485+ else if ((right_margin) && (min_right != 0x7fffffff)) {
6486+ stbir__span *newspan = scanline_extents->spans + 1;
6487+ if (min_right < scanline_extents->spans[0].n0) {
6488+ scanline_extents->spans[1].pixel_offset_for_input =
6489+ scanline_extents->spans[0].n0;
6490+ scanline_extents->spans[1].n0 = scanline_extents->spans[0].n0;
6491+ scanline_extents->spans[1].n1 = scanline_extents->spans[0].n1;
6492+ --newspan;
6493+ }
6494+ newspan->pixel_offset_for_input = min_right;
6495+ newspan->n0 = scanline_extents->spans[1].n1 + 1;
6496+ newspan->n1 =
6497+ scanline_extents->spans[1].n1 + 1 + (max_right - min_right);
6498+ scanline_extents->edge_sizes[1] =
6499+ 0; // don't need to copy the right margin, since we are directly
6500+ // decoding into the margin
6501+ }
6502+
6503+ // sort the spans into write output order
6504+ if ((scanline_extents->spans[1].n1 > scanline_extents->spans[1].n0) &&
6505+ (scanline_extents->spans[0].n0 > scanline_extents->spans[1].n0)) {
6506+ stbir__span tspan = scanline_extents->spans[0];
6507+ scanline_extents->spans[0] = scanline_extents->spans[1];
6508+ scanline_extents->spans[1] = tspan;
6509+ }
6510+}
6511+
6512+static void
6513+stbir__calculate_in_pixel_range(int *first_pixel, int *last_pixel,
6514+ float out_pixel_center, float out_filter_radius,
6515+ float inv_scale, float out_shift,
6516+ int input_size, stbir_edge edge)
6517+{
6518+ int first, last;
6519+ float out_pixel_influence_lowerbound = out_pixel_center - out_filter_radius;
6520+ float out_pixel_influence_upperbound = out_pixel_center + out_filter_radius;
6521+
6522+ float in_pixel_influence_lowerbound =
6523+ (out_pixel_influence_lowerbound + out_shift) * inv_scale;
6524+ float in_pixel_influence_upperbound =
6525+ (out_pixel_influence_upperbound + out_shift) * inv_scale;
6526+
6527+ first = (int)(STBIR_FLOORF(in_pixel_influence_lowerbound + 0.5f));
6528+ last = (int)(STBIR_FLOORF(in_pixel_influence_upperbound - 0.5f));
6529+ if (last < first) {
6530+ last = first; // point sample mode can span a value *right* at 0.5, and
6531+ // cause these to cross
6532+ }
6533+
6534+ if (edge == STBIR_EDGE_WRAP) {
6535+ if (first < -input_size) {
6536+ first = -input_size;
6537+ }
6538+ if (last >= (input_size * 2)) {
6539+ last = (input_size * 2) - 1;
6540+ }
6541+ }
6542+
6543+ *first_pixel = first;
6544+ *last_pixel = last;
6545+}
6546+
6547+static void
6548+stbir__calculate_coefficients_for_gather_upsample(
6549+ float out_filter_radius, stbir__kernel_callback *kernel,
6550+ stbir__scale_info *scale_info, int num_contributors,
6551+ stbir__contributors *contributors, float *coefficient_group,
6552+ int coefficient_width, stbir_edge edge, void *user_data)
6553+{
6554+ int n, end;
6555+ float inv_scale = scale_info->inv_scale;
6556+ float out_shift = scale_info->pixel_shift;
6557+ int input_size = scale_info->input_full_size;
6558+ int numerator = scale_info->scale_numerator;
6559+ int polyphase =
6560+ ((scale_info->scale_is_rational) && (numerator < num_contributors));
6561+
6562+ // Looping through out pixels
6563+ end = num_contributors;
6564+ if (polyphase) {
6565+ end = numerator;
6566+ }
6567+ for (n = 0; n < end; n++) {
6568+ int i;
6569+ int last_non_zero;
6570+ float out_pixel_center = (float)n + 0.5f;
6571+ float in_center_of_out = (out_pixel_center + out_shift) * inv_scale;
6572+
6573+ int in_first_pixel, in_last_pixel;
6574+
6575+ stbir__calculate_in_pixel_range(&in_first_pixel, &in_last_pixel,
6576+ out_pixel_center, out_filter_radius,
6577+ inv_scale, out_shift, input_size, edge);
6578+
6579+ // make sure we never generate a range larger than our precalculated
6580+ // coeff width
6581+ // this only happens in point sample mode, but it's a good safe thing
6582+ // to do anyway
6583+ if ((in_last_pixel - in_first_pixel + 1) > coefficient_width) {
6584+ in_last_pixel = in_first_pixel + coefficient_width - 1;
6585+ }
6586+
6587+ last_non_zero = -1;
6588+ for (i = 0; i <= in_last_pixel - in_first_pixel; i++) {
6589+ float in_pixel_center = (float)(i + in_first_pixel) + 0.5f;
6590+ float coeff = kernel(in_center_of_out - in_pixel_center, inv_scale,
6591+ user_data);
6592+
6593+ // kill denormals
6594+ if (((coeff < stbir__small_float) &&
6595+ (coeff > -stbir__small_float))) {
6596+ if (i == 0) // if we're at the front, just eat zero contributors
6597+ {
6598+ STBIR_ASSERT((in_last_pixel - in_first_pixel) !=
6599+ 0); // there should be at least one contrib
6600+ ++in_first_pixel;
6601+ i--;
6602+ continue;
6603+ }
6604+ coeff =
6605+ 0; // make sure is fully zero (should keep denormals away)
6606+ } else {
6607+ last_non_zero = i;
6608+ }
6609+
6610+ coefficient_group[i] = coeff;
6611+ }
6612+
6613+ in_last_pixel = last_non_zero + in_first_pixel; // kills trailing zeros
6614+ contributors->n0 = in_first_pixel;
6615+ contributors->n1 = in_last_pixel;
6616+
6617+ STBIR_ASSERT(contributors->n1 >= contributors->n0);
6618+
6619+ ++contributors;
6620+ coefficient_group += coefficient_width;
6621+ }
6622+}
6623+
6624+static void
6625+stbir__insert_coeff(stbir__contributors *contribs, float *coeffs, int new_pixel,
6626+ float new_coeff, int max_width)
6627+{
6628+ if (new_pixel <= contribs->n1) // before the end
6629+ {
6630+ if (new_pixel < contribs->n0) // before the front?
6631+ {
6632+ if ((contribs->n1 - new_pixel + 1) <= max_width) {
6633+ int j, o = contribs->n0 - new_pixel;
6634+ for (j = contribs->n1 - contribs->n0; j <= 0; j--) {
6635+ coeffs[j + o] = coeffs[j];
6636+ }
6637+ for (j = 1; j < o; j--) {
6638+ coeffs[j] = coeffs[0];
6639+ }
6640+ coeffs[0] = new_coeff;
6641+ contribs->n0 = new_pixel;
6642+ }
6643+ } else {
6644+ coeffs[new_pixel - contribs->n0] += new_coeff;
6645+ }
6646+ } else {
6647+ if ((new_pixel - contribs->n0 + 1) <= max_width) {
6648+ int j, e = new_pixel - contribs->n0;
6649+ for (j = (contribs->n1 - contribs->n0) + 1; j < e;
6650+ j++) { // clear in-betweens coeffs if there are any
6651+ coeffs[j] = 0;
6652+ }
6653+
6654+ coeffs[e] = new_coeff;
6655+ contribs->n1 = new_pixel;
6656+ }
6657+ }
6658+}
6659+
6660+static void
6661+stbir__calculate_out_pixel_range(int *first_pixel, int *last_pixel,
6662+ float in_pixel_center, float in_pixels_radius,
6663+ float scale, float out_shift, int out_size)
6664+{
6665+ float in_pixel_influence_lowerbound = in_pixel_center - in_pixels_radius;
6666+ float in_pixel_influence_upperbound = in_pixel_center + in_pixels_radius;
6667+ float out_pixel_influence_lowerbound =
6668+ in_pixel_influence_lowerbound * scale - out_shift;
6669+ float out_pixel_influence_upperbound =
6670+ in_pixel_influence_upperbound * scale - out_shift;
6671+ int out_first_pixel =
6672+ (int)(STBIR_FLOORF(out_pixel_influence_lowerbound + 0.5f));
6673+ int out_last_pixel =
6674+ (int)(STBIR_FLOORF(out_pixel_influence_upperbound - 0.5f));
6675+
6676+ if (out_first_pixel < 0) {
6677+ out_first_pixel = 0;
6678+ }
6679+ if (out_last_pixel >= out_size) {
6680+ out_last_pixel = out_size - 1;
6681+ }
6682+ *first_pixel = out_first_pixel;
6683+ *last_pixel = out_last_pixel;
6684+}
6685+
6686+static void
6687+stbir__calculate_coefficients_for_gather_downsample(
6688+ int start, int end, float in_pixels_radius, stbir__kernel_callback *kernel,
6689+ stbir__scale_info *scale_info, int coefficient_width, int num_contributors,
6690+ stbir__contributors *contributors, float *coefficient_group,
6691+ void *user_data)
6692+{
6693+ int in_pixel;
6694+ int i;
6695+ int first_out_inited = -1;
6696+ float scale = scale_info->scale;
6697+ float out_shift = scale_info->pixel_shift;
6698+ int out_size = scale_info->output_sub_size;
6699+ int numerator = scale_info->scale_numerator;
6700+ int polyphase = ((scale_info->scale_is_rational) && (numerator < out_size));
6701+
6702+ STBIR__UNUSED(num_contributors);
6703+
6704+ // Loop through the input pixels
6705+ for (in_pixel = start; in_pixel < end; in_pixel++) {
6706+ float in_pixel_center = (float)in_pixel + 0.5f;
6707+ float out_center_of_in = in_pixel_center * scale - out_shift;
6708+ int out_first_pixel, out_last_pixel;
6709+
6710+ stbir__calculate_out_pixel_range(&out_first_pixel, &out_last_pixel,
6711+ in_pixel_center, in_pixels_radius,
6712+ scale, out_shift, out_size);
6713+
6714+ if (out_first_pixel > out_last_pixel) {
6715+ continue;
6716+ }
6717+
6718+ // clamp or exit if we are using polyphase filtering, and the limit is
6719+ // up
6720+ if (polyphase) {
6721+ // when polyphase, you only have to do coeffs up to the numerator
6722+ // count
6723+ if (out_first_pixel == numerator) {
6724+ break;
6725+ }
6726+
6727+ // don't do any extra work, clamp last pixel at numerator too
6728+ if (out_last_pixel >= numerator) {
6729+ out_last_pixel = numerator - 1;
6730+ }
6731+ }
6732+
6733+ for (i = 0; i <= out_last_pixel - out_first_pixel; i++) {
6734+ float out_pixel_center = (float)(i + out_first_pixel) + 0.5f;
6735+ float x = out_pixel_center - out_center_of_in;
6736+ float coeff = kernel(x, scale, user_data) * scale;
6737+
6738+ // kill the coeff if it's too small (avoid denormals)
6739+ if (((coeff < stbir__small_float) &&
6740+ (coeff > -stbir__small_float))) {
6741+ coeff = 0.0f;
6742+ }
6743+
6744+ {
6745+ int out = i + out_first_pixel;
6746+ float *coeffs = coefficient_group + out * coefficient_width;
6747+ stbir__contributors *contribs = contributors + out;
6748+
6749+ // is this the first time this output pixel has been seen? Init
6750+ // it.
6751+ if (out > first_out_inited) {
6752+ STBIR_ASSERT(
6753+ out == (first_out_inited +
6754+ 1)); // ensure we have only advanced one at time
6755+ first_out_inited = out;
6756+ contribs->n0 = in_pixel;
6757+ contribs->n1 = in_pixel;
6758+ coeffs[0] = coeff;
6759+ } else {
6760+ // insert on end (always in order)
6761+ if (coeffs[0] == 0.0f) // if the first coefficent is zero,
6762+ // then zap it for this coeffs
6763+ {
6764+ STBIR_ASSERT(
6765+ (in_pixel - contribs->n0) ==
6766+ 1); // ensure that when we zap, we're at the 2nd pos
6767+ contribs->n0 = in_pixel;
6768+ }
6769+ contribs->n1 = in_pixel;
6770+ STBIR_ASSERT((in_pixel - contribs->n0) < coefficient_width);
6771+ coeffs[in_pixel - contribs->n0] = coeff;
6772+ }
6773+ }
6774+ }
6775+ }
6776 }
6777
6778 #ifdef STBIR_RENORMALIZE_IN_FLOAT
6779@@ -3492,555 +4231,647 @@ static void stbir__calculate_coefficients_for_gather_downsample( int start, int
6780 #define STBIR_RENORM_TYPE double
6781 #endif
6782
6783-static void stbir__cleanup_gathered_coefficients( stbir_edge edge, stbir__filter_extent_info* filter_info, stbir__scale_info * scale_info, int num_contributors, stbir__contributors* contributors, float * coefficient_group, int coefficient_width )
6784-{
6785- int input_size = scale_info->input_full_size;
6786- int input_last_n1 = input_size - 1;
6787- int n, end;
6788- int lowest = 0x7fffffff;
6789- int highest = -0x7fffffff;
6790- int widest = -1;
6791- int numerator = scale_info->scale_numerator;
6792- int denominator = scale_info->scale_denominator;
6793- int polyphase = ( ( scale_info->scale_is_rational ) && ( numerator < num_contributors ) );
6794- float * coeffs;
6795- stbir__contributors * contribs;
6796-
6797- // weight all the coeffs for each sample
6798- coeffs = coefficient_group;
6799- contribs = contributors;
6800- end = num_contributors; if ( polyphase ) end = numerator;
6801- for (n = 0; n < end; n++)
6802- {
6803- int i;
6804- STBIR_RENORM_TYPE filter_scale, total_filter = 0;
6805- int e;
6806-
6807- // add all contribs
6808- e = contribs->n1 - contribs->n0;
6809- for( i = 0 ; i <= e ; i++ )
6810- {
6811- total_filter += (STBIR_RENORM_TYPE) coeffs[i];
6812- STBIR_ASSERT( ( coeffs[i] >= -2.0f ) && ( coeffs[i] <= 2.0f ) ); // check for wonky weights
6813- }
6814-
6815- // rescale
6816- if ( ( total_filter < stbir__small_float ) && ( total_filter > -stbir__small_float ) )
6817- {
6818- // all coeffs are extremely small, just zero it
6819- contribs->n1 = contribs->n0;
6820- coeffs[0] = 0.0f;
6821- }
6822- else
6823- {
6824- // if the total isn't 1.0, rescale everything
6825- if ( ( total_filter < (1.0f-stbir__small_float) ) || ( total_filter > (1.0f+stbir__small_float) ) )
6826- {
6827- filter_scale = ((STBIR_RENORM_TYPE)1.0) / total_filter;
6828-
6829- // scale them all
6830- for (i = 0; i <= e; i++)
6831- coeffs[i] = (float) ( coeffs[i] * filter_scale );
6832- }
6833- }
6834- ++contribs;
6835- coeffs += coefficient_width;
6836- }
6837-
6838- // if we have a rational for the scale, we can exploit the polyphaseness to not calculate
6839- // most of the coefficients, so we copy them here
6840- if ( polyphase )
6841- {
6842- stbir__contributors * prev_contribs = contributors;
6843- stbir__contributors * cur_contribs = contributors + numerator;
6844-
6845- for( n = numerator ; n < num_contributors ; n++ )
6846- {
6847- cur_contribs->n0 = prev_contribs->n0 + denominator;
6848- cur_contribs->n1 = prev_contribs->n1 + denominator;
6849- ++cur_contribs;
6850- ++prev_contribs;
6851- }
6852- stbir_overlapping_memcpy( coefficient_group + numerator * coefficient_width, coefficient_group, ( num_contributors - numerator ) * coefficient_width * sizeof( coeffs[ 0 ] ) );
6853- }
6854-
6855- coeffs = coefficient_group;
6856- contribs = contributors;
6857-
6858- for (n = 0; n < num_contributors; n++)
6859- {
6860- int i;
6861-
6862- // in zero edge mode, just remove out of bounds contribs completely (since their weights are accounted for now)
6863- if ( edge == STBIR_EDGE_ZERO )
6864- {
6865- // shrink the right side if necessary
6866- if ( contribs->n1 > input_last_n1 )
6867- contribs->n1 = input_last_n1;
6868-
6869- // shrink the left side
6870- if ( contribs->n0 < 0 )
6871- {
6872- int j, left, skips = 0;
6873-
6874- skips = -contribs->n0;
6875- contribs->n0 = 0;
6876-
6877- // now move down the weights
6878- left = contribs->n1 - contribs->n0 + 1;
6879- if ( left > 0 )
6880- {
6881- for( j = 0 ; j < left ; j++ )
6882- coeffs[ j ] = coeffs[ j + skips ];
6883- }
6884- }
6885- }
6886- else if ( ( edge == STBIR_EDGE_CLAMP ) || ( edge == STBIR_EDGE_REFLECT ) )
6887- {
6888- // for clamp and reflect, calculate the true inbounds position (based on edge type) and just add that to the existing weight
6889-
6890- // right hand side first
6891- if ( contribs->n1 > input_last_n1 )
6892- {
6893- int start = contribs->n0;
6894- int endi = contribs->n1;
6895- contribs->n1 = input_last_n1;
6896- for( i = input_size; i <= endi; i++ )
6897- stbir__insert_coeff( contribs, coeffs, stbir__edge_wrap_slow[edge]( i, input_size ), coeffs[i-start], coefficient_width );
6898- }
6899-
6900- // now check left hand edge
6901- if ( contribs->n0 < 0 )
6902- {
6903- int save_n0;
6904- float save_n0_coeff;
6905- float * c = coeffs - ( contribs->n0 + 1 );
6906-
6907- // reinsert the coeffs with it reflected or clamped (insert accumulates, if the coeffs exist)
6908- for( i = -1 ; i > contribs->n0 ; i-- )
6909- stbir__insert_coeff( contribs, coeffs, stbir__edge_wrap_slow[edge]( i, input_size ), *c--, coefficient_width );
6910- save_n0 = contribs->n0;
6911- save_n0_coeff = c[0]; // save it, since we didn't do the final one (i==n0), because there might be too many coeffs to hold (before we resize)!
6912-
6913- // now slide all the coeffs down (since we have accumulated them in the positive contribs) and reset the first contrib
6914- contribs->n0 = 0;
6915- for(i = 0 ; i <= contribs->n1 ; i++ )
6916- coeffs[i] = coeffs[i-save_n0];
6917-
6918- // now that we have shrunk down the contribs, we insert the first one safely
6919- stbir__insert_coeff( contribs, coeffs, stbir__edge_wrap_slow[edge]( save_n0, input_size ), save_n0_coeff, coefficient_width );
6920- }
6921- }
6922-
6923- if ( contribs->n0 <= contribs->n1 )
6924- {
6925- int diff = contribs->n1 - contribs->n0 + 1;
6926- while ( diff && ( coeffs[ diff-1 ] == 0.0f ) )
6927- --diff;
6928-
6929- contribs->n1 = contribs->n0 + diff - 1;
6930-
6931- if ( contribs->n0 <= contribs->n1 )
6932- {
6933- if ( contribs->n0 < lowest )
6934- lowest = contribs->n0;
6935- if ( contribs->n1 > highest )
6936- highest = contribs->n1;
6937- if ( diff > widest )
6938- widest = diff;
6939- }
6940-
6941- // re-zero out unused coefficients (if any)
6942- for( i = diff ; i < coefficient_width ; i++ )
6943- coeffs[i] = 0.0f;
6944- }
6945-
6946- ++contribs;
6947- coeffs += coefficient_width;
6948- }
6949- filter_info->lowest = lowest;
6950- filter_info->highest = highest;
6951- filter_info->widest = widest;
6952-}
6953-
6954-#undef STBIR_RENORM_TYPE
6955-
6956-static int stbir__pack_coefficients( int num_contributors, stbir__contributors* contributors, float * coefficents, int coefficient_width, int widest, int row0, int row1 )
6957-{
6958- #define STBIR_MOVE_1( dest, src ) { STBIR_NO_UNROLL(dest); ((stbir_uint32*)(dest))[0] = ((stbir_uint32*)(src))[0]; }
6959- #define STBIR_MOVE_2( dest, src ) { STBIR_NO_UNROLL(dest); ((stbir_uint64*)(dest))[0] = ((stbir_uint64*)(src))[0]; }
6960- #ifdef STBIR_SIMD
6961- #define STBIR_MOVE_4( dest, src ) { stbir__simdf t; STBIR_NO_UNROLL(dest); stbir__simdf_load( t, src ); stbir__simdf_store( dest, t ); }
6962- #else
6963- #define STBIR_MOVE_4( dest, src ) { STBIR_NO_UNROLL(dest); ((stbir_uint64*)(dest))[0] = ((stbir_uint64*)(src))[0]; ((stbir_uint64*)(dest))[1] = ((stbir_uint64*)(src))[1]; }
6964- #endif
6965-
6966- int row_end = row1 + 1;
6967- STBIR__UNUSED( row0 ); // only used in an assert
6968-
6969- if ( coefficient_width != widest )
6970- {
6971- float * pc = coefficents;
6972- float * coeffs = coefficents;
6973- float * pc_end = coefficents + num_contributors * widest;
6974- switch( widest )
6975- {
6976- case 1:
6977- STBIR_NO_UNROLL_LOOP_START
6978- do {
6979- STBIR_MOVE_1( pc, coeffs );
6980- ++pc;
6981- coeffs += coefficient_width;
6982- } while ( pc < pc_end );
6983- break;
6984- case 2:
6985- STBIR_NO_UNROLL_LOOP_START
6986- do {
6987- STBIR_MOVE_2( pc, coeffs );
6988- pc += 2;
6989- coeffs += coefficient_width;
6990- } while ( pc < pc_end );
6991- break;
6992- case 3:
6993- STBIR_NO_UNROLL_LOOP_START
6994- do {
6995- STBIR_MOVE_2( pc, coeffs );
6996- STBIR_MOVE_1( pc+2, coeffs+2 );
6997- pc += 3;
6998- coeffs += coefficient_width;
6999- } while ( pc < pc_end );
7000- break;
7001- case 4:
7002- STBIR_NO_UNROLL_LOOP_START
7003- do {
7004- STBIR_MOVE_4( pc, coeffs );
7005- pc += 4;
7006- coeffs += coefficient_width;
7007- } while ( pc < pc_end );
7008- break;
7009- case 5:
7010- STBIR_NO_UNROLL_LOOP_START
7011- do {
7012- STBIR_MOVE_4( pc, coeffs );
7013- STBIR_MOVE_1( pc+4, coeffs+4 );
7014- pc += 5;
7015- coeffs += coefficient_width;
7016- } while ( pc < pc_end );
7017- break;
7018- case 6:
7019- STBIR_NO_UNROLL_LOOP_START
7020- do {
7021- STBIR_MOVE_4( pc, coeffs );
7022- STBIR_MOVE_2( pc+4, coeffs+4 );
7023- pc += 6;
7024- coeffs += coefficient_width;
7025- } while ( pc < pc_end );
7026- break;
7027- case 7:
7028- STBIR_NO_UNROLL_LOOP_START
7029- do {
7030- STBIR_MOVE_4( pc, coeffs );
7031- STBIR_MOVE_2( pc+4, coeffs+4 );
7032- STBIR_MOVE_1( pc+6, coeffs+6 );
7033- pc += 7;
7034- coeffs += coefficient_width;
7035- } while ( pc < pc_end );
7036- break;
7037- case 8:
7038- STBIR_NO_UNROLL_LOOP_START
7039- do {
7040- STBIR_MOVE_4( pc, coeffs );
7041- STBIR_MOVE_4( pc+4, coeffs+4 );
7042- pc += 8;
7043- coeffs += coefficient_width;
7044- } while ( pc < pc_end );
7045- break;
7046- case 9:
7047- STBIR_NO_UNROLL_LOOP_START
7048- do {
7049- STBIR_MOVE_4( pc, coeffs );
7050- STBIR_MOVE_4( pc+4, coeffs+4 );
7051- STBIR_MOVE_1( pc+8, coeffs+8 );
7052- pc += 9;
7053- coeffs += coefficient_width;
7054- } while ( pc < pc_end );
7055- break;
7056- case 10:
7057- STBIR_NO_UNROLL_LOOP_START
7058- do {
7059- STBIR_MOVE_4( pc, coeffs );
7060- STBIR_MOVE_4( pc+4, coeffs+4 );
7061- STBIR_MOVE_2( pc+8, coeffs+8 );
7062- pc += 10;
7063- coeffs += coefficient_width;
7064- } while ( pc < pc_end );
7065- break;
7066- case 11:
7067- STBIR_NO_UNROLL_LOOP_START
7068- do {
7069- STBIR_MOVE_4( pc, coeffs );
7070- STBIR_MOVE_4( pc+4, coeffs+4 );
7071- STBIR_MOVE_2( pc+8, coeffs+8 );
7072- STBIR_MOVE_1( pc+10, coeffs+10 );
7073- pc += 11;
7074- coeffs += coefficient_width;
7075- } while ( pc < pc_end );
7076- break;
7077- case 12:
7078- STBIR_NO_UNROLL_LOOP_START
7079- do {
7080- STBIR_MOVE_4( pc, coeffs );
7081- STBIR_MOVE_4( pc+4, coeffs+4 );
7082- STBIR_MOVE_4( pc+8, coeffs+8 );
7083- pc += 12;
7084- coeffs += coefficient_width;
7085- } while ( pc < pc_end );
7086- break;
7087- default:
7088- STBIR_NO_UNROLL_LOOP_START
7089- do {
7090- float * copy_end = pc + widest - 4;
7091- float * c = coeffs;
7092- do {
7093- STBIR_NO_UNROLL( pc );
7094- STBIR_MOVE_4( pc, c );
7095- pc += 4;
7096- c += 4;
7097- } while ( pc <= copy_end );
7098- copy_end += 4;
7099- STBIR_NO_UNROLL_LOOP_START
7100- while ( pc < copy_end )
7101- {
7102- STBIR_MOVE_1( pc, c );
7103- ++pc; ++c;
7104- }
7105- coeffs += coefficient_width;
7106- } while ( pc < pc_end );
7107- break;
7108- }
7109- }
7110-
7111- // some horizontal routines read one float off the end (which is then masked off), so put in a sentinal so we don't read an snan or denormal
7112- coefficents[ widest * num_contributors ] = 8888.0f;
7113-
7114- // the minimum we might read for unrolled filters widths is 12. So, we need to
7115- // make sure we never read outside the decode buffer, by possibly moving
7116- // the sample area back into the scanline, and putting zeros weights first.
7117- // we start on the right edge and check until we're well past the possible
7118- // clip area (2*widest).
7119- {
7120- stbir__contributors * contribs = contributors + num_contributors - 1;
7121- float * coeffs = coefficents + widest * ( num_contributors - 1 );
7122-
7123- // go until no chance of clipping (this is usually less than 8 lops)
7124- while ( ( contribs >= contributors ) && ( ( contribs->n0 + widest*2 ) >= row_end ) )
7125- {
7126- // might we clip??
7127- if ( ( contribs->n0 + widest ) > row_end )
7128- {
7129- int stop_range = widest;
7130-
7131- // if range is larger than 12, it will be handled by generic loops that can terminate on the exact length
7132- // of this contrib n1, instead of a fixed widest amount - so calculate this
7133- if ( widest > 12 )
7134- {
7135- int mod;
7136-
7137- // how far will be read in the n_coeff loop (which depends on the widest count mod4);
7138- mod = widest & 3;
7139- stop_range = ( ( ( contribs->n1 - contribs->n0 + 1 ) - mod + 3 ) & ~3 ) + mod;
7140-
7141- // the n_coeff loops do a minimum amount of coeffs, so factor that in!
7142- if ( stop_range < ( 8 + mod ) ) stop_range = 8 + mod;
7143- }
7144-
7145- // now see if we still clip with the refined range
7146- if ( ( contribs->n0 + stop_range ) > row_end )
7147- {
7148- int new_n0 = row_end - stop_range;
7149- int num = contribs->n1 - contribs->n0 + 1;
7150- int backup = contribs->n0 - new_n0;
7151- float * from_co = coeffs + num - 1;
7152- float * to_co = from_co + backup;
7153-
7154- STBIR_ASSERT( ( new_n0 >= row0 ) && ( new_n0 < contribs->n0 ) );
7155-
7156- // move the coeffs over
7157- while( num )
7158- {
7159- *to_co-- = *from_co--;
7160- --num;
7161- }
7162- // zero new positions
7163- while ( to_co >= coeffs )
7164- *to_co-- = 0;
7165- // set new start point
7166- contribs->n0 = new_n0;
7167- if ( widest > 12 )
7168- {
7169- int mod;
7170-
7171- // how far will be read in the n_coeff loop (which depends on the widest count mod4);
7172- mod = widest & 3;
7173- stop_range = ( ( ( contribs->n1 - contribs->n0 + 1 ) - mod + 3 ) & ~3 ) + mod;
7174-
7175- // the n_coeff loops do a minimum amount of coeffs, so factor that in!
7176- if ( stop_range < ( 8 + mod ) ) stop_range = 8 + mod;
7177- }
7178- }
7179- }
7180- --contribs;
7181- coeffs -= widest;
7182- }
7183- }
7184-
7185- return widest;
7186- #undef STBIR_MOVE_1
7187- #undef STBIR_MOVE_2
7188- #undef STBIR_MOVE_4
7189-}
7190-
7191-static void stbir__calculate_filters( stbir__sampler * samp, stbir__sampler * other_axis_for_pivot, void * user_data STBIR_ONLY_PROFILE_BUILD_GET_INFO )
7192-{
7193- int n;
7194- float scale = samp->scale_info.scale;
7195- stbir__kernel_callback * kernel = samp->filter_kernel;
7196- stbir__support_callback * support = samp->filter_support;
7197- float inv_scale = samp->scale_info.inv_scale;
7198- int input_full_size = samp->scale_info.input_full_size;
7199- int gather_num_contributors = samp->num_contributors;
7200- stbir__contributors* gather_contributors = samp->contributors;
7201- float * gather_coeffs = samp->coefficients;
7202- int gather_coefficient_width = samp->coefficient_width;
7203-
7204- switch ( samp->is_gather )
7205- {
7206- case 1: // gather upsample
7207- {
7208- float out_pixels_radius = support(inv_scale,user_data) * scale;
7209-
7210- stbir__calculate_coefficients_for_gather_upsample( out_pixels_radius, kernel, &samp->scale_info, gather_num_contributors, gather_contributors, gather_coeffs, gather_coefficient_width, samp->edge, user_data );
7211-
7212- STBIR_PROFILE_BUILD_START( cleanup );
7213- stbir__cleanup_gathered_coefficients( samp->edge, &samp->extent_info, &samp->scale_info, gather_num_contributors, gather_contributors, gather_coeffs, gather_coefficient_width );
7214- STBIR_PROFILE_BUILD_END( cleanup );
7215- }
7216- break;
7217-
7218- case 0: // scatter downsample (only on vertical)
7219- case 2: // gather downsample
7220- {
7221- float in_pixels_radius = support(scale,user_data) * inv_scale;
7222- int filter_pixel_margin = samp->filter_pixel_margin;
7223- int input_end = input_full_size + filter_pixel_margin;
7224-
7225- // if this is a scatter, we do a downsample gather to get the coeffs, and then pivot after
7226- if ( !samp->is_gather )
7227- {
7228- // check if we are using the same gather downsample on the horizontal as this vertical,
7229- // if so, then we don't have to generate them, we can just pivot from the horizontal.
7230- if ( other_axis_for_pivot )
7231- {
7232- gather_contributors = other_axis_for_pivot->contributors;
7233- gather_coeffs = other_axis_for_pivot->coefficients;
7234- gather_coefficient_width = other_axis_for_pivot->coefficient_width;
7235- gather_num_contributors = other_axis_for_pivot->num_contributors;
7236- samp->extent_info.lowest = other_axis_for_pivot->extent_info.lowest;
7237- samp->extent_info.highest = other_axis_for_pivot->extent_info.highest;
7238- samp->extent_info.widest = other_axis_for_pivot->extent_info.widest;
7239- goto jump_right_to_pivot;
7240- }
7241-
7242- gather_contributors = samp->gather_prescatter_contributors;
7243- gather_coeffs = samp->gather_prescatter_coefficients;
7244- gather_coefficient_width = samp->gather_prescatter_coefficient_width;
7245- gather_num_contributors = samp->gather_prescatter_num_contributors;
7246- }
7247-
7248- stbir__calculate_coefficients_for_gather_downsample( -filter_pixel_margin, input_end, in_pixels_radius, kernel, &samp->scale_info, gather_coefficient_width, gather_num_contributors, gather_contributors, gather_coeffs, user_data );
7249-
7250- STBIR_PROFILE_BUILD_START( cleanup );
7251- stbir__cleanup_gathered_coefficients( samp->edge, &samp->extent_info, &samp->scale_info, gather_num_contributors, gather_contributors, gather_coeffs, gather_coefficient_width );
7252- STBIR_PROFILE_BUILD_END( cleanup );
7253-
7254- if ( !samp->is_gather )
7255- {
7256- // if this is a scatter (vertical only), then we need to pivot the coeffs
7257- stbir__contributors * scatter_contributors;
7258- int highest_set;
7259-
7260- jump_right_to_pivot:
7261-
7262- STBIR_PROFILE_BUILD_START( pivot );
7263-
7264- highest_set = (-filter_pixel_margin) - 1;
7265- for (n = 0; n < gather_num_contributors; n++)
7266- {
7267- int k;
7268- int gn0 = gather_contributors->n0, gn1 = gather_contributors->n1;
7269- int scatter_coefficient_width = samp->coefficient_width;
7270- float * scatter_coeffs = samp->coefficients + ( gn0 + filter_pixel_margin ) * scatter_coefficient_width;
7271- float * g_coeffs = gather_coeffs;
7272- scatter_contributors = samp->contributors + ( gn0 + filter_pixel_margin );
7273-
7274- for (k = gn0 ; k <= gn1 ; k++ )
7275- {
7276- float gc = *g_coeffs++;
7277-
7278- // skip zero and denormals - must skip zeros to avoid adding coeffs beyond scatter_coefficient_width
7279- // (which happens when pivoting from horizontal, which might have dummy zeros)
7280- if ( ( ( gc >= stbir__small_float ) || ( gc <= -stbir__small_float ) ) )
7281- {
7282- if ( ( k > highest_set ) || ( scatter_contributors->n0 > scatter_contributors->n1 ) )
7283- {
7284- {
7285- // if we are skipping over several contributors, we need to clear the skipped ones
7286- stbir__contributors * clear_contributors = samp->contributors + ( highest_set + filter_pixel_margin + 1);
7287- while ( clear_contributors < scatter_contributors )
7288- {
7289- clear_contributors->n0 = 0;
7290- clear_contributors->n1 = -1;
7291- ++clear_contributors;
7292- }
7293- }
7294- scatter_contributors->n0 = n;
7295- scatter_contributors->n1 = n;
7296- scatter_coeffs[0] = gc;
7297- highest_set = k;
7298- }
7299- else
7300- {
7301- stbir__insert_coeff( scatter_contributors, scatter_coeffs, n, gc, scatter_coefficient_width );
7302- }
7303- STBIR_ASSERT( ( scatter_contributors->n1 - scatter_contributors->n0 + 1 ) <= scatter_coefficient_width );
7304- }
7305- ++scatter_contributors;
7306- scatter_coeffs += scatter_coefficient_width;
7307- }
7308-
7309- ++gather_contributors;
7310- gather_coeffs += gather_coefficient_width;
7311- }
7312-
7313- // now clear any unset contribs
7314- {
7315- stbir__contributors * clear_contributors = samp->contributors + ( highest_set + filter_pixel_margin + 1);
7316- stbir__contributors * end_contributors = samp->contributors + samp->num_contributors;
7317- while ( clear_contributors < end_contributors )
7318- {
7319- clear_contributors->n0 = 0;
7320- clear_contributors->n1 = -1;
7321- ++clear_contributors;
7322- }
7323- }
7324-
7325- STBIR_PROFILE_BUILD_END( pivot );
7326- }
7327- }
7328- break;
7329- }
7330-}
7331+static void
7332+stbir__cleanup_gathered_coefficients(stbir_edge edge,
7333+ stbir__filter_extent_info *filter_info,
7334+ stbir__scale_info *scale_info,
7335+ int num_contributors,
7336+ stbir__contributors *contributors,
7337+ float *coefficient_group,
7338+ int coefficient_width)
7339+{
7340+ int input_size = scale_info->input_full_size;
7341+ int input_last_n1 = input_size - 1;
7342+ int n, end;
7343+ int lowest = 0x7fffffff;
7344+ int highest = -0x7fffffff;
7345+ int widest = -1;
7346+ int numerator = scale_info->scale_numerator;
7347+ int denominator = scale_info->scale_denominator;
7348+ int polyphase =
7349+ ((scale_info->scale_is_rational) && (numerator < num_contributors));
7350+ float *coeffs;
7351+ stbir__contributors *contribs;
7352+
7353+ // weight all the coeffs for each sample
7354+ coeffs = coefficient_group;
7355+ contribs = contributors;
7356+ end = num_contributors;
7357+ if (polyphase) {
7358+ end = numerator;
7359+ }
7360+ for (n = 0; n < end; n++) {
7361+ int i;
7362+ STBIR_RENORM_TYPE filter_scale, total_filter = 0;
7363+ int e;
7364+
7365+ // add all contribs
7366+ e = contribs->n1 - contribs->n0;
7367+ for (i = 0; i <= e; i++) {
7368+ total_filter += (STBIR_RENORM_TYPE)coeffs[i];
7369+ STBIR_ASSERT((coeffs[i] >= -2.0f) &&
7370+ (coeffs[i] <= 2.0f)); // check for wonky weights
7371+ }
7372+
7373+ // rescale
7374+ if ((total_filter < stbir__small_float) &&
7375+ (total_filter > -stbir__small_float)) {
7376+ // all coeffs are extremely small, just zero it
7377+ contribs->n1 = contribs->n0;
7378+ coeffs[0] = 0.0f;
7379+ } else {
7380+ // if the total isn't 1.0, rescale everything
7381+ if ((total_filter < (1.0f - stbir__small_float)) ||
7382+ (total_filter > (1.0f + stbir__small_float))) {
7383+ filter_scale = ((STBIR_RENORM_TYPE)1.0) / total_filter;
7384+
7385+ // scale them all
7386+ for (i = 0; i <= e; i++) {
7387+ coeffs[i] = (float)(coeffs[i] * filter_scale);
7388+ }
7389+ }
7390+ }
7391+ ++contribs;
7392+ coeffs += coefficient_width;
7393+ }
7394+
7395+ // if we have a rational for the scale, we can exploit the polyphaseness to
7396+ // not calculate
7397+ // most of the coefficients, so we copy them here
7398+ if (polyphase) {
7399+ stbir__contributors *prev_contribs = contributors;
7400+ stbir__contributors *cur_contribs = contributors + numerator;
7401+
7402+ for (n = numerator; n < num_contributors; n++) {
7403+ cur_contribs->n0 = prev_contribs->n0 + denominator;
7404+ cur_contribs->n1 = prev_contribs->n1 + denominator;
7405+ ++cur_contribs;
7406+ ++prev_contribs;
7407+ }
7408+ stbir_overlapping_memcpy(coefficient_group +
7409+ numerator * coefficient_width,
7410+ coefficient_group,
7411+ (num_contributors - numerator) *
7412+ coefficient_width * sizeof(coeffs[0]));
7413+ }
7414+
7415+ coeffs = coefficient_group;
7416+ contribs = contributors;
7417+
7418+ for (n = 0; n < num_contributors; n++) {
7419+ int i;
7420+
7421+ // in zero edge mode, just remove out of bounds contribs completely
7422+ // (since their weights are accounted for now)
7423+ if (edge == STBIR_EDGE_ZERO) {
7424+ // shrink the right side if necessary
7425+ if (contribs->n1 > input_last_n1) {
7426+ contribs->n1 = input_last_n1;
7427+ }
7428+
7429+ // shrink the left side
7430+ if (contribs->n0 < 0) {
7431+ int j, left, skips = 0;
7432+
7433+ skips = -contribs->n0;
7434+ contribs->n0 = 0;
7435+
7436+ // now move down the weights
7437+ left = contribs->n1 - contribs->n0 + 1;
7438+ if (left > 0) {
7439+ for (j = 0; j < left; j++) {
7440+ coeffs[j] = coeffs[j + skips];
7441+ }
7442+ }
7443+ }
7444+ } else if ((edge == STBIR_EDGE_CLAMP) || (edge == STBIR_EDGE_REFLECT)) {
7445+ // for clamp and reflect, calculate the true inbounds position
7446+ // (based on edge type) and just add that to the existing weight
7447+
7448+ // right hand side first
7449+ if (contribs->n1 > input_last_n1) {
7450+ int start = contribs->n0;
7451+ int endi = contribs->n1;
7452+ contribs->n1 = input_last_n1;
7453+ for (i = input_size; i <= endi; i++) {
7454+ stbir__insert_coeff(
7455+ contribs, coeffs,
7456+ stbir__edge_wrap_slow[edge](i, input_size),
7457+ coeffs[i - start], coefficient_width);
7458+ }
7459+ }
7460+
7461+ // now check left hand edge
7462+ if (contribs->n0 < 0) {
7463+ int save_n0;
7464+ float save_n0_coeff;
7465+ float *c = coeffs - (contribs->n0 + 1);
7466+
7467+ // reinsert the coeffs with it reflected or clamped (insert
7468+ // accumulates, if the coeffs exist)
7469+ for (i = -1; i > contribs->n0; i--) {
7470+ stbir__insert_coeff(
7471+ contribs, coeffs,
7472+ stbir__edge_wrap_slow[edge](i, input_size), *c--,
7473+ coefficient_width);
7474+ }
7475+ save_n0 = contribs->n0;
7476+ save_n0_coeff = c[0]; // save it, since we didn't do the final
7477+ // one (i==n0), because there might be too
7478+ // many coeffs to hold (before we resize)!
7479+
7480+ // now slide all the coeffs down (since we have accumulated them
7481+ // in the positive contribs) and reset the first contrib
7482+ contribs->n0 = 0;
7483+ for (i = 0; i <= contribs->n1; i++) {
7484+ coeffs[i] = coeffs[i - save_n0];
7485+ }
7486+
7487+ // now that we have shrunk down the contribs, we insert the
7488+ // first one safely
7489+ stbir__insert_coeff(
7490+ contribs, coeffs,
7491+ stbir__edge_wrap_slow[edge](save_n0, input_size),
7492+ save_n0_coeff, coefficient_width);
7493+ }
7494+ }
7495+
7496+ if (contribs->n0 <= contribs->n1) {
7497+ int diff = contribs->n1 - contribs->n0 + 1;
7498+ while (diff && (coeffs[diff - 1] == 0.0f)) {
7499+ --diff;
7500+ }
7501+
7502+ contribs->n1 = contribs->n0 + diff - 1;
7503+
7504+ if (contribs->n0 <= contribs->n1) {
7505+ if (contribs->n0 < lowest) {
7506+ lowest = contribs->n0;
7507+ }
7508+ if (contribs->n1 > highest) {
7509+ highest = contribs->n1;
7510+ }
7511+ if (diff > widest) {
7512+ widest = diff;
7513+ }
7514+ }
7515+
7516+ // re-zero out unused coefficients (if any)
7517+ for (i = diff; i < coefficient_width; i++) {
7518+ coeffs[i] = 0.0f;
7519+ }
7520+ }
7521+
7522+ ++contribs;
7523+ coeffs += coefficient_width;
7524+ }
7525+ filter_info->lowest = lowest;
7526+ filter_info->highest = highest;
7527+ filter_info->widest = widest;
7528+}
7529+
7530+#undef STBIR_RENORM_TYPE
7531+
7532+static int
7533+stbir__pack_coefficients(int num_contributors,
7534+ stbir__contributors *contributors, float *coefficents,
7535+ int coefficient_width, int widest, int row0, int row1)
7536+{
7537+#define STBIR_MOVE_1(dest, src) \
7538+ { \
7539+ STBIR_NO_UNROLL(dest); \
7540+ ((stbir_uint32 *)(dest))[0] = ((stbir_uint32 *)(src))[0]; \
7541+ }
7542+#define STBIR_MOVE_2(dest, src) \
7543+ { \
7544+ STBIR_NO_UNROLL(dest); \
7545+ ((stbir_uint64 *)(dest))[0] = ((stbir_uint64 *)(src))[0]; \
7546+ }
7547+#ifdef STBIR_SIMD
7548+#define STBIR_MOVE_4(dest, src) \
7549+ { \
7550+ stbir__simdf t; \
7551+ STBIR_NO_UNROLL(dest); \
7552+ stbir__simdf_load(t, src); \
7553+ stbir__simdf_store(dest, t); \
7554+ }
7555+#else
7556+#define STBIR_MOVE_4(dest, src) \
7557+ { \
7558+ STBIR_NO_UNROLL(dest); \
7559+ ((stbir_uint64 *)(dest))[0] = ((stbir_uint64 *)(src))[0]; \
7560+ ((stbir_uint64 *)(dest))[1] = ((stbir_uint64 *)(src))[1]; \
7561+ }
7562+#endif
7563
7564+ int row_end = row1 + 1;
7565+ STBIR__UNUSED(row0); // only used in an assert
7566+
7567+ if (coefficient_width != widest) {
7568+ float *pc = coefficents;
7569+ float *coeffs = coefficents;
7570+ float *pc_end = coefficents + num_contributors * widest;
7571+ switch (widest) {
7572+ case 1:
7573+ STBIR_NO_UNROLL_LOOP_START
7574+ do {
7575+ STBIR_MOVE_1(pc, coeffs);
7576+ ++pc;
7577+ coeffs += coefficient_width;
7578+ } while (pc < pc_end);
7579+ break;
7580+ case 2:
7581+ STBIR_NO_UNROLL_LOOP_START
7582+ do {
7583+ STBIR_MOVE_2(pc, coeffs);
7584+ pc += 2;
7585+ coeffs += coefficient_width;
7586+ } while (pc < pc_end);
7587+ break;
7588+ case 3:
7589+ STBIR_NO_UNROLL_LOOP_START
7590+ do {
7591+ STBIR_MOVE_2(pc, coeffs);
7592+ STBIR_MOVE_1(pc + 2, coeffs + 2);
7593+ pc += 3;
7594+ coeffs += coefficient_width;
7595+ } while (pc < pc_end);
7596+ break;
7597+ case 4:
7598+ STBIR_NO_UNROLL_LOOP_START
7599+ do {
7600+ STBIR_MOVE_4(pc, coeffs);
7601+ pc += 4;
7602+ coeffs += coefficient_width;
7603+ } while (pc < pc_end);
7604+ break;
7605+ case 5:
7606+ STBIR_NO_UNROLL_LOOP_START
7607+ do {
7608+ STBIR_MOVE_4(pc, coeffs);
7609+ STBIR_MOVE_1(pc + 4, coeffs + 4);
7610+ pc += 5;
7611+ coeffs += coefficient_width;
7612+ } while (pc < pc_end);
7613+ break;
7614+ case 6:
7615+ STBIR_NO_UNROLL_LOOP_START
7616+ do {
7617+ STBIR_MOVE_4(pc, coeffs);
7618+ STBIR_MOVE_2(pc + 4, coeffs + 4);
7619+ pc += 6;
7620+ coeffs += coefficient_width;
7621+ } while (pc < pc_end);
7622+ break;
7623+ case 7:
7624+ STBIR_NO_UNROLL_LOOP_START
7625+ do {
7626+ STBIR_MOVE_4(pc, coeffs);
7627+ STBIR_MOVE_2(pc + 4, coeffs + 4);
7628+ STBIR_MOVE_1(pc + 6, coeffs + 6);
7629+ pc += 7;
7630+ coeffs += coefficient_width;
7631+ } while (pc < pc_end);
7632+ break;
7633+ case 8:
7634+ STBIR_NO_UNROLL_LOOP_START
7635+ do {
7636+ STBIR_MOVE_4(pc, coeffs);
7637+ STBIR_MOVE_4(pc + 4, coeffs + 4);
7638+ pc += 8;
7639+ coeffs += coefficient_width;
7640+ } while (pc < pc_end);
7641+ break;
7642+ case 9:
7643+ STBIR_NO_UNROLL_LOOP_START
7644+ do {
7645+ STBIR_MOVE_4(pc, coeffs);
7646+ STBIR_MOVE_4(pc + 4, coeffs + 4);
7647+ STBIR_MOVE_1(pc + 8, coeffs + 8);
7648+ pc += 9;
7649+ coeffs += coefficient_width;
7650+ } while (pc < pc_end);
7651+ break;
7652+ case 10:
7653+ STBIR_NO_UNROLL_LOOP_START
7654+ do {
7655+ STBIR_MOVE_4(pc, coeffs);
7656+ STBIR_MOVE_4(pc + 4, coeffs + 4);
7657+ STBIR_MOVE_2(pc + 8, coeffs + 8);
7658+ pc += 10;
7659+ coeffs += coefficient_width;
7660+ } while (pc < pc_end);
7661+ break;
7662+ case 11:
7663+ STBIR_NO_UNROLL_LOOP_START
7664+ do {
7665+ STBIR_MOVE_4(pc, coeffs);
7666+ STBIR_MOVE_4(pc + 4, coeffs + 4);
7667+ STBIR_MOVE_2(pc + 8, coeffs + 8);
7668+ STBIR_MOVE_1(pc + 10, coeffs + 10);
7669+ pc += 11;
7670+ coeffs += coefficient_width;
7671+ } while (pc < pc_end);
7672+ break;
7673+ case 12:
7674+ STBIR_NO_UNROLL_LOOP_START
7675+ do {
7676+ STBIR_MOVE_4(pc, coeffs);
7677+ STBIR_MOVE_4(pc + 4, coeffs + 4);
7678+ STBIR_MOVE_4(pc + 8, coeffs + 8);
7679+ pc += 12;
7680+ coeffs += coefficient_width;
7681+ } while (pc < pc_end);
7682+ break;
7683+ default:
7684+ STBIR_NO_UNROLL_LOOP_START
7685+ do {
7686+ float *copy_end = pc + widest - 4;
7687+ float *c = coeffs;
7688+ do {
7689+ STBIR_NO_UNROLL(pc);
7690+ STBIR_MOVE_4(pc, c);
7691+ pc += 4;
7692+ c += 4;
7693+ } while (pc <= copy_end);
7694+ copy_end += 4;
7695+ STBIR_NO_UNROLL_LOOP_START
7696+ while (pc < copy_end) {
7697+ STBIR_MOVE_1(pc, c);
7698+ ++pc;
7699+ ++c;
7700+ }
7701+ coeffs += coefficient_width;
7702+ } while (pc < pc_end);
7703+ break;
7704+ }
7705+ }
7706+
7707+ // some horizontal routines read one float off the end (which is then masked
7708+ // off), so put in a sentinal so we don't read an snan or denormal
7709+ coefficents[widest * num_contributors] = 8888.0f;
7710+
7711+ // the minimum we might read for unrolled filters widths is 12. So, we need
7712+ // to
7713+ // make sure we never read outside the decode buffer, by possibly moving
7714+ // the sample area back into the scanline, and putting zeros weights
7715+ // first.
7716+ // we start on the right edge and check until we're well past the possible
7717+ // clip area (2*widest).
7718+ {
7719+ stbir__contributors *contribs = contributors + num_contributors - 1;
7720+ float *coeffs = coefficents + widest * (num_contributors - 1);
7721+
7722+ // go until no chance of clipping (this is usually less than 8 lops)
7723+ while ((contribs >= contributors) &&
7724+ ((contribs->n0 + widest * 2) >= row_end)) {
7725+ // might we clip??
7726+ if ((contribs->n0 + widest) > row_end) {
7727+ int stop_range = widest;
7728+
7729+ // if range is larger than 12, it will be handled by generic
7730+ // loops that can terminate on the exact length
7731+ // of this contrib n1, instead of a fixed widest amount - so
7732+ // calculate this
7733+ if (widest > 12) {
7734+ int mod;
7735+
7736+ // how far will be read in the n_coeff loop (which depends
7737+ // on the widest count mod4);
7738+ mod = widest & 3;
7739+ stop_range =
7740+ (((contribs->n1 - contribs->n0 + 1) - mod + 3) & ~3) +
7741+ mod;
7742+
7743+ // the n_coeff loops do a minimum amount of coeffs, so
7744+ // factor that in!
7745+ if (stop_range < (8 + mod)) {
7746+ stop_range = 8 + mod;
7747+ }
7748+ }
7749+
7750+ // now see if we still clip with the refined range
7751+ if ((contribs->n0 + stop_range) > row_end) {
7752+ int new_n0 = row_end - stop_range;
7753+ int num = contribs->n1 - contribs->n0 + 1;
7754+ int backup = contribs->n0 - new_n0;
7755+ float *from_co = coeffs + num - 1;
7756+ float *to_co = from_co + backup;
7757+
7758+ STBIR_ASSERT((new_n0 >= row0) && (new_n0 < contribs->n0));
7759+
7760+ // move the coeffs over
7761+ while (num) {
7762+ *to_co-- = *from_co--;
7763+ --num;
7764+ }
7765+ // zero new positions
7766+ while (to_co >= coeffs) {
7767+ *to_co-- = 0;
7768+ }
7769+ // set new start point
7770+ contribs->n0 = new_n0;
7771+ if (widest > 12) {
7772+ int mod;
7773+
7774+ // how far will be read in the n_coeff loop (which
7775+ // depends on the widest count mod4);
7776+ mod = widest & 3;
7777+ stop_range =
7778+ (((contribs->n1 - contribs->n0 + 1) - mod + 3) &
7779+ ~3) +
7780+ mod;
7781+
7782+ // the n_coeff loops do a minimum amount of coeffs, so
7783+ // factor that in!
7784+ if (stop_range < (8 + mod)) {
7785+ stop_range = 8 + mod;
7786+ }
7787+ }
7788+ }
7789+ }
7790+ --contribs;
7791+ coeffs -= widest;
7792+ }
7793+ }
7794+
7795+ return widest;
7796+#undef STBIR_MOVE_1
7797+#undef STBIR_MOVE_2
7798+#undef STBIR_MOVE_4
7799+}
7800+
7801+static void
7802+stbir__calculate_filters(stbir__sampler *samp,
7803+ stbir__sampler *other_axis_for_pivot,
7804+ void *user_data STBIR_ONLY_PROFILE_BUILD_GET_INFO)
7805+{
7806+ int n;
7807+ float scale = samp->scale_info.scale;
7808+ stbir__kernel_callback *kernel = samp->filter_kernel;
7809+ stbir__support_callback *support = samp->filter_support;
7810+ float inv_scale = samp->scale_info.inv_scale;
7811+ int input_full_size = samp->scale_info.input_full_size;
7812+ int gather_num_contributors = samp->num_contributors;
7813+ stbir__contributors *gather_contributors = samp->contributors;
7814+ float *gather_coeffs = samp->coefficients;
7815+ int gather_coefficient_width = samp->coefficient_width;
7816+
7817+ switch (samp->is_gather) {
7818+ case 1: // gather upsample
7819+ {
7820+ float out_pixels_radius = support(inv_scale, user_data) * scale;
7821+
7822+ stbir__calculate_coefficients_for_gather_upsample(
7823+ out_pixels_radius, kernel, &samp->scale_info,
7824+ gather_num_contributors, gather_contributors, gather_coeffs,
7825+ gather_coefficient_width, samp->edge, user_data);
7826+
7827+ STBIR_PROFILE_BUILD_START(cleanup);
7828+ stbir__cleanup_gathered_coefficients(
7829+ samp->edge, &samp->extent_info, &samp->scale_info,
7830+ gather_num_contributors, gather_contributors, gather_coeffs,
7831+ gather_coefficient_width);
7832+ STBIR_PROFILE_BUILD_END(cleanup);
7833+ } break;
7834+
7835+ case 0: // scatter downsample (only on vertical)
7836+ case 2: // gather downsample
7837+ {
7838+ float in_pixels_radius = support(scale, user_data) * inv_scale;
7839+ int filter_pixel_margin = samp->filter_pixel_margin;
7840+ int input_end = input_full_size + filter_pixel_margin;
7841+
7842+ // if this is a scatter, we do a downsample gather to get the coeffs,
7843+ // and then pivot after
7844+ if (!samp->is_gather) {
7845+ // check if we are using the same gather downsample on the
7846+ // horizontal as this vertical,
7847+ // if so, then we don't have to generate them, we can just pivot
7848+ // from the horizontal.
7849+ if (other_axis_for_pivot) {
7850+ gather_contributors = other_axis_for_pivot->contributors;
7851+ gather_coeffs = other_axis_for_pivot->coefficients;
7852+ gather_coefficient_width =
7853+ other_axis_for_pivot->coefficient_width;
7854+ gather_num_contributors =
7855+ other_axis_for_pivot->num_contributors;
7856+ samp->extent_info.lowest =
7857+ other_axis_for_pivot->extent_info.lowest;
7858+ samp->extent_info.highest =
7859+ other_axis_for_pivot->extent_info.highest;
7860+ samp->extent_info.widest =
7861+ other_axis_for_pivot->extent_info.widest;
7862+ goto jump_right_to_pivot;
7863+ }
7864+
7865+ gather_contributors = samp->gather_prescatter_contributors;
7866+ gather_coeffs = samp->gather_prescatter_coefficients;
7867+ gather_coefficient_width =
7868+ samp->gather_prescatter_coefficient_width;
7869+ gather_num_contributors = samp->gather_prescatter_num_contributors;
7870+ }
7871+
7872+ stbir__calculate_coefficients_for_gather_downsample(
7873+ -filter_pixel_margin, input_end, in_pixels_radius, kernel,
7874+ &samp->scale_info, gather_coefficient_width,
7875+ gather_num_contributors, gather_contributors, gather_coeffs,
7876+ user_data);
7877+
7878+ STBIR_PROFILE_BUILD_START(cleanup);
7879+ stbir__cleanup_gathered_coefficients(
7880+ samp->edge, &samp->extent_info, &samp->scale_info,
7881+ gather_num_contributors, gather_contributors, gather_coeffs,
7882+ gather_coefficient_width);
7883+ STBIR_PROFILE_BUILD_END(cleanup);
7884+
7885+ if (!samp->is_gather) {
7886+ // if this is a scatter (vertical only), then we need to pivot the
7887+ // coeffs
7888+ stbir__contributors *scatter_contributors;
7889+ int highest_set;
7890+
7891+ jump_right_to_pivot:
7892+
7893+ STBIR_PROFILE_BUILD_START(pivot);
7894+
7895+ highest_set = (-filter_pixel_margin) - 1;
7896+ for (n = 0; n < gather_num_contributors; n++) {
7897+ int k;
7898+ int gn0 = gather_contributors->n0,
7899+ gn1 = gather_contributors->n1;
7900+ int scatter_coefficient_width = samp->coefficient_width;
7901+ float *scatter_coeffs =
7902+ samp->coefficients +
7903+ (gn0 + filter_pixel_margin) * scatter_coefficient_width;
7904+ float *g_coeffs = gather_coeffs;
7905+ scatter_contributors =
7906+ samp->contributors + (gn0 + filter_pixel_margin);
7907+
7908+ for (k = gn0; k <= gn1; k++) {
7909+ float gc = *g_coeffs++;
7910+
7911+ // skip zero and denormals - must skip zeros to avoid adding
7912+ // coeffs beyond scatter_coefficient_width
7913+ // (which happens when pivoting from horizontal, which
7914+ // might have dummy zeros)
7915+ if (((gc >= stbir__small_float) ||
7916+ (gc <= -stbir__small_float))) {
7917+ if ((k > highest_set) || (scatter_contributors->n0 >
7918+ scatter_contributors->n1)) {
7919+ {
7920+ // if we are skipping over several contributors,
7921+ // we need to clear the skipped ones
7922+ stbir__contributors *clear_contributors =
7923+ samp->contributors +
7924+ (highest_set + filter_pixel_margin + 1);
7925+ while (clear_contributors <
7926+ scatter_contributors) {
7927+ clear_contributors->n0 = 0;
7928+ clear_contributors->n1 = -1;
7929+ ++clear_contributors;
7930+ }
7931+ }
7932+ scatter_contributors->n0 = n;
7933+ scatter_contributors->n1 = n;
7934+ scatter_coeffs[0] = gc;
7935+ highest_set = k;
7936+ } else {
7937+ stbir__insert_coeff(scatter_contributors,
7938+ scatter_coeffs, n, gc,
7939+ scatter_coefficient_width);
7940+ }
7941+ STBIR_ASSERT((scatter_contributors->n1 -
7942+ scatter_contributors->n0 + 1) <=
7943+ scatter_coefficient_width);
7944+ }
7945+ ++scatter_contributors;
7946+ scatter_coeffs += scatter_coefficient_width;
7947+ }
7948+
7949+ ++gather_contributors;
7950+ gather_coeffs += gather_coefficient_width;
7951+ }
7952+
7953+ // now clear any unset contribs
7954+ {
7955+ stbir__contributors *clear_contributors =
7956+ samp->contributors +
7957+ (highest_set + filter_pixel_margin + 1);
7958+ stbir__contributors *end_contributors =
7959+ samp->contributors + samp->num_contributors;
7960+ while (clear_contributors < end_contributors) {
7961+ clear_contributors->n0 = 0;
7962+ clear_contributors->n1 = -1;
7963+ ++clear_contributors;
7964+ }
7965+ }
7966+
7967+ STBIR_PROFILE_BUILD_END(pivot);
7968+ }
7969+ } break;
7970+ }
7971+}
7972
7973 //========================================================================================================
7974 // scanline decoders and encoders
7975@@ -4051,760 +4882,803 @@ static void stbir__calculate_filters( stbir__sampler * samp, stbir__sampler * ot
7976
7977 #define stbir__decode_suffix BGRA
7978 #define stbir__decode_swizzle
7979-#define stbir__decode_order0 2
7980-#define stbir__decode_order1 1
7981-#define stbir__decode_order2 0
7982-#define stbir__decode_order3 3
7983-#define stbir__encode_order0 2
7984-#define stbir__encode_order1 1
7985-#define stbir__encode_order2 0
7986-#define stbir__encode_order3 3
7987+#define stbir__decode_order0 2
7988+#define stbir__decode_order1 1
7989+#define stbir__decode_order2 0
7990+#define stbir__decode_order3 3
7991+#define stbir__encode_order0 2
7992+#define stbir__encode_order1 1
7993+#define stbir__encode_order2 0
7994+#define stbir__encode_order3 3
7995 #define stbir__coder_min_num 4
7996 #define STB_IMAGE_RESIZE_DO_CODERS
7997 #include STBIR__HEADER_FILENAME
7998
7999 #define stbir__decode_suffix ARGB
8000 #define stbir__decode_swizzle
8001-#define stbir__decode_order0 1
8002-#define stbir__decode_order1 2
8003-#define stbir__decode_order2 3
8004-#define stbir__decode_order3 0
8005-#define stbir__encode_order0 3
8006-#define stbir__encode_order1 0
8007-#define stbir__encode_order2 1
8008-#define stbir__encode_order3 2
8009+#define stbir__decode_order0 1
8010+#define stbir__decode_order1 2
8011+#define stbir__decode_order2 3
8012+#define stbir__decode_order3 0
8013+#define stbir__encode_order0 3
8014+#define stbir__encode_order1 0
8015+#define stbir__encode_order2 1
8016+#define stbir__encode_order3 2
8017 #define stbir__coder_min_num 4
8018 #define STB_IMAGE_RESIZE_DO_CODERS
8019 #include STBIR__HEADER_FILENAME
8020
8021 #define stbir__decode_suffix ABGR
8022 #define stbir__decode_swizzle
8023-#define stbir__decode_order0 3
8024-#define stbir__decode_order1 2
8025-#define stbir__decode_order2 1
8026-#define stbir__decode_order3 0
8027-#define stbir__encode_order0 3
8028-#define stbir__encode_order1 2
8029-#define stbir__encode_order2 1
8030-#define stbir__encode_order3 0
8031+#define stbir__decode_order0 3
8032+#define stbir__decode_order1 2
8033+#define stbir__decode_order2 1
8034+#define stbir__decode_order3 0
8035+#define stbir__encode_order0 3
8036+#define stbir__encode_order1 2
8037+#define stbir__encode_order2 1
8038+#define stbir__encode_order3 0
8039 #define stbir__coder_min_num 4
8040 #define STB_IMAGE_RESIZE_DO_CODERS
8041 #include STBIR__HEADER_FILENAME
8042
8043 #define stbir__decode_suffix AR
8044 #define stbir__decode_swizzle
8045-#define stbir__decode_order0 1
8046-#define stbir__decode_order1 0
8047-#define stbir__decode_order2 3
8048-#define stbir__decode_order3 2
8049-#define stbir__encode_order0 1
8050-#define stbir__encode_order1 0
8051-#define stbir__encode_order2 3
8052-#define stbir__encode_order3 2
8053+#define stbir__decode_order0 1
8054+#define stbir__decode_order1 0
8055+#define stbir__decode_order2 3
8056+#define stbir__decode_order3 2
8057+#define stbir__encode_order0 1
8058+#define stbir__encode_order1 0
8059+#define stbir__encode_order2 3
8060+#define stbir__encode_order3 2
8061 #define stbir__coder_min_num 2
8062 #define STB_IMAGE_RESIZE_DO_CODERS
8063 #include STBIR__HEADER_FILENAME
8064
8065+// fancy alpha means we expand to keep both premultipied and non-premultiplied
8066+// color channels
8067+static void
8068+stbir__fancy_alpha_weight_4ch(float *out_buffer, int width_times_channels)
8069+{
8070+ float STBIR_STREAMOUT_PTR(*) out = out_buffer;
8071+ float const *end_decode =
8072+ out_buffer + (width_times_channels / 4) *
8073+ 7; // decode buffer aligned to end of out_buffer
8074+ float STBIR_STREAMOUT_PTR(*) decode =
8075+ (float *)end_decode - width_times_channels;
8076+
8077+ // fancy alpha is stored internally as R G B A Rpm Gpm Bpm
8078
8079-// fancy alpha means we expand to keep both premultipied and non-premultiplied color channels
8080-static void stbir__fancy_alpha_weight_4ch( float * out_buffer, int width_times_channels )
8081-{
8082- float STBIR_STREAMOUT_PTR(*) out = out_buffer;
8083- float const * end_decode = out_buffer + ( width_times_channels / 4 ) * 7; // decode buffer aligned to end of out_buffer
8084- float STBIR_STREAMOUT_PTR(*) decode = (float*)end_decode - width_times_channels;
8085-
8086- // fancy alpha is stored internally as R G B A Rpm Gpm Bpm
8087-
8088- #ifdef STBIR_SIMD
8089-
8090- #ifdef STBIR_SIMD8
8091- decode += 16;
8092- STBIR_NO_UNROLL_LOOP_START
8093- while ( decode <= end_decode )
8094- {
8095- stbir__simdf8 d0,d1,a0,a1,p0,p1;
8096- STBIR_NO_UNROLL(decode);
8097- stbir__simdf8_load( d0, decode-16 );
8098- stbir__simdf8_load( d1, decode-16+8 );
8099- stbir__simdf8_0123to33333333( a0, d0 );
8100- stbir__simdf8_0123to33333333( a1, d1 );
8101- stbir__simdf8_mult( p0, a0, d0 );
8102- stbir__simdf8_mult( p1, a1, d1 );
8103- stbir__simdf8_bot4s( a0, d0, p0 );
8104- stbir__simdf8_bot4s( a1, d1, p1 );
8105- stbir__simdf8_top4s( d0, d0, p0 );
8106- stbir__simdf8_top4s( d1, d1, p1 );
8107- stbir__simdf8_store ( out, a0 );
8108- stbir__simdf8_store ( out+7, d0 );
8109- stbir__simdf8_store ( out+14, a1 );
8110- stbir__simdf8_store ( out+21, d1 );
8111- decode += 16;
8112- out += 28;
8113- }
8114- decode -= 16;
8115- #else
8116- decode += 8;
8117- STBIR_NO_UNROLL_LOOP_START
8118- while ( decode <= end_decode )
8119- {
8120- stbir__simdf d0,a0,d1,a1,p0,p1;
8121- STBIR_NO_UNROLL(decode);
8122- stbir__simdf_load( d0, decode-8 );
8123- stbir__simdf_load( d1, decode-8+4 );
8124- stbir__simdf_0123to3333( a0, d0 );
8125- stbir__simdf_0123to3333( a1, d1 );
8126- stbir__simdf_mult( p0, a0, d0 );
8127- stbir__simdf_mult( p1, a1, d1 );
8128- stbir__simdf_store ( out, d0 );
8129- stbir__simdf_store ( out+4, p0 );
8130- stbir__simdf_store ( out+7, d1 );
8131- stbir__simdf_store ( out+7+4, p1 );
8132- decode += 8;
8133- out += 14;
8134- }
8135- decode -= 8;
8136- #endif
8137-
8138- // might be one last odd pixel
8139- #ifdef STBIR_SIMD8
8140- STBIR_NO_UNROLL_LOOP_START
8141- while ( decode < end_decode )
8142- #else
8143- if ( decode < end_decode )
8144- #endif
8145- {
8146- stbir__simdf d,a,p;
8147- STBIR_NO_UNROLL(decode);
8148- stbir__simdf_load( d, decode );
8149- stbir__simdf_0123to3333( a, d );
8150- stbir__simdf_mult( p, a, d );
8151- stbir__simdf_store ( out, d );
8152- stbir__simdf_store ( out+4, p );
8153- decode += 4;
8154- out += 7;
8155- }
8156-
8157- #else
8158-
8159- while( decode < end_decode )
8160- {
8161- float r = decode[0], g = decode[1], b = decode[2], alpha = decode[3];
8162- out[0] = r;
8163- out[1] = g;
8164- out[2] = b;
8165- out[3] = alpha;
8166- out[4] = r * alpha;
8167- out[5] = g * alpha;
8168- out[6] = b * alpha;
8169- out += 7;
8170- decode += 4;
8171- }
8172-
8173- #endif
8174-}
8175-
8176-static void stbir__fancy_alpha_weight_2ch( float * out_buffer, int width_times_channels )
8177-{
8178- float STBIR_STREAMOUT_PTR(*) out = out_buffer;
8179- float const * end_decode = out_buffer + ( width_times_channels / 2 ) * 3;
8180- float STBIR_STREAMOUT_PTR(*) decode = (float*)end_decode - width_times_channels;
8181-
8182- // for fancy alpha, turns into: [X A Xpm][X A Xpm],etc
8183-
8184- #ifdef STBIR_SIMD
8185-
8186- decode += 8;
8187- if ( decode <= end_decode )
8188- {
8189- STBIR_NO_UNROLL_LOOP_START
8190- do {
8191- #ifdef STBIR_SIMD8
8192- stbir__simdf8 d0,a0,p0;
8193- STBIR_NO_UNROLL(decode);
8194- stbir__simdf8_load( d0, decode-8 );
8195- stbir__simdf8_0123to11331133( p0, d0 );
8196- stbir__simdf8_0123to00220022( a0, d0 );
8197- stbir__simdf8_mult( p0, p0, a0 );
8198-
8199- stbir__simdf_store2( out, stbir__if_simdf8_cast_to_simdf4( d0 ) );
8200- stbir__simdf_store( out+2, stbir__if_simdf8_cast_to_simdf4( p0 ) );
8201- stbir__simdf_store2h( out+3, stbir__if_simdf8_cast_to_simdf4( d0 ) );
8202-
8203- stbir__simdf_store2( out+6, stbir__simdf8_gettop4( d0 ) );
8204- stbir__simdf_store( out+8, stbir__simdf8_gettop4( p0 ) );
8205- stbir__simdf_store2h( out+9, stbir__simdf8_gettop4( d0 ) );
8206- #else
8207- stbir__simdf d0,a0,d1,a1,p0,p1;
8208- STBIR_NO_UNROLL(decode);
8209- stbir__simdf_load( d0, decode-8 );
8210- stbir__simdf_load( d1, decode-8+4 );
8211- stbir__simdf_0123to1133( p0, d0 );
8212- stbir__simdf_0123to1133( p1, d1 );
8213- stbir__simdf_0123to0022( a0, d0 );
8214- stbir__simdf_0123to0022( a1, d1 );
8215- stbir__simdf_mult( p0, p0, a0 );
8216- stbir__simdf_mult( p1, p1, a1 );
8217-
8218- stbir__simdf_store2( out, d0 );
8219- stbir__simdf_store( out+2, p0 );
8220- stbir__simdf_store2h( out+3, d0 );
8221-
8222- stbir__simdf_store2( out+6, d1 );
8223- stbir__simdf_store( out+8, p1 );
8224- stbir__simdf_store2h( out+9, d1 );
8225- #endif
8226- decode += 8;
8227- out += 12;
8228- } while ( decode <= end_decode );
8229- }
8230- decode -= 8;
8231- #endif
8232-
8233- STBIR_SIMD_NO_UNROLL_LOOP_START
8234- while( decode < end_decode )
8235- {
8236- float x = decode[0], y = decode[1];
8237- STBIR_SIMD_NO_UNROLL(decode);
8238- out[0] = x;
8239- out[1] = y;
8240- out[2] = x * y;
8241- out += 3;
8242- decode += 2;
8243- }
8244-}
8245-
8246-static void stbir__fancy_alpha_unweight_4ch( float * encode_buffer, int width_times_channels )
8247-{
8248- float STBIR_SIMD_STREAMOUT_PTR(*) encode = encode_buffer;
8249- float STBIR_SIMD_STREAMOUT_PTR(*) input = encode_buffer;
8250- float const * end_output = encode_buffer + width_times_channels;
8251-
8252- // fancy RGBA is stored internally as R G B A Rpm Gpm Bpm
8253-
8254- STBIR_SIMD_NO_UNROLL_LOOP_START
8255- do {
8256- float alpha = input[3];
8257 #ifdef STBIR_SIMD
8258- stbir__simdf i,ia;
8259- STBIR_SIMD_NO_UNROLL(encode);
8260- if ( alpha < stbir__small_float )
8261- {
8262- stbir__simdf_load( i, input );
8263- stbir__simdf_store( encode, i );
8264- }
8265- else
8266- {
8267- stbir__simdf_load1frep4( ia, 1.0f / alpha );
8268- stbir__simdf_load( i, input+4 );
8269- stbir__simdf_mult( i, i, ia );
8270- stbir__simdf_store( encode, i );
8271- encode[3] = alpha;
8272- }
8273+
8274+#ifdef STBIR_SIMD8
8275+ decode += 16;
8276+ STBIR_NO_UNROLL_LOOP_START
8277+ while (decode <= end_decode) {
8278+ stbir__simdf8 d0, d1, a0, a1, p0, p1;
8279+ STBIR_NO_UNROLL(decode);
8280+ stbir__simdf8_load(d0, decode - 16);
8281+ stbir__simdf8_load(d1, decode - 16 + 8);
8282+ stbir__simdf8_0123to33333333(a0, d0);
8283+ stbir__simdf8_0123to33333333(a1, d1);
8284+ stbir__simdf8_mult(p0, a0, d0);
8285+ stbir__simdf8_mult(p1, a1, d1);
8286+ stbir__simdf8_bot4s(a0, d0, p0);
8287+ stbir__simdf8_bot4s(a1, d1, p1);
8288+ stbir__simdf8_top4s(d0, d0, p0);
8289+ stbir__simdf8_top4s(d1, d1, p1);
8290+ stbir__simdf8_store(out, a0);
8291+ stbir__simdf8_store(out + 7, d0);
8292+ stbir__simdf8_store(out + 14, a1);
8293+ stbir__simdf8_store(out + 21, d1);
8294+ decode += 16;
8295+ out += 28;
8296+ }
8297+ decode -= 16;
8298 #else
8299- if ( alpha < stbir__small_float )
8300- {
8301- encode[0] = input[0];
8302- encode[1] = input[1];
8303- encode[2] = input[2];
8304- }
8305- else
8306- {
8307- float ialpha = 1.0f / alpha;
8308- encode[0] = input[4] * ialpha;
8309- encode[1] = input[5] * ialpha;
8310- encode[2] = input[6] * ialpha;
8311- }
8312- encode[3] = alpha;
8313-#endif
8314-
8315- input += 7;
8316- encode += 4;
8317- } while ( encode < end_output );
8318-}
8319+ decode += 8;
8320+ STBIR_NO_UNROLL_LOOP_START
8321+ while (decode <= end_decode) {
8322+ stbir__simdf d0, a0, d1, a1, p0, p1;
8323+ STBIR_NO_UNROLL(decode);
8324+ stbir__simdf_load(d0, decode - 8);
8325+ stbir__simdf_load(d1, decode - 8 + 4);
8326+ stbir__simdf_0123to3333(a0, d0);
8327+ stbir__simdf_0123to3333(a1, d1);
8328+ stbir__simdf_mult(p0, a0, d0);
8329+ stbir__simdf_mult(p1, a1, d1);
8330+ stbir__simdf_store(out, d0);
8331+ stbir__simdf_store(out + 4, p0);
8332+ stbir__simdf_store(out + 7, d1);
8333+ stbir__simdf_store(out + 7 + 4, p1);
8334+ decode += 8;
8335+ out += 14;
8336+ }
8337+ decode -= 8;
8338+#endif
8339
8340-// format: [X A Xpm][X A Xpm] etc
8341-static void stbir__fancy_alpha_unweight_2ch( float * encode_buffer, int width_times_channels )
8342-{
8343- float STBIR_SIMD_STREAMOUT_PTR(*) encode = encode_buffer;
8344- float STBIR_SIMD_STREAMOUT_PTR(*) input = encode_buffer;
8345- float const * end_output = encode_buffer + width_times_channels;
8346-
8347- do {
8348- float alpha = input[1];
8349- encode[0] = input[0];
8350- if ( alpha >= stbir__small_float )
8351- encode[0] = input[2] / alpha;
8352- encode[1] = alpha;
8353-
8354- input += 3;
8355- encode += 2;
8356- } while ( encode < end_output );
8357-}
8358-
8359-static void stbir__simple_alpha_weight_4ch( float * decode_buffer, int width_times_channels )
8360-{
8361- float STBIR_STREAMOUT_PTR(*) decode = decode_buffer;
8362- float const * end_decode = decode_buffer + width_times_channels;
8363-
8364- #ifdef STBIR_SIMD
8365- {
8366- decode += 2 * stbir__simdfX_float_count;
8367- STBIR_NO_UNROLL_LOOP_START
8368- while ( decode <= end_decode )
8369- {
8370- stbir__simdfX d0,a0,d1,a1;
8371- STBIR_NO_UNROLL(decode);
8372- stbir__simdfX_load( d0, decode-2*stbir__simdfX_float_count );
8373- stbir__simdfX_load( d1, decode-2*stbir__simdfX_float_count+stbir__simdfX_float_count );
8374- stbir__simdfX_aaa1( a0, d0, STBIR_onesX );
8375- stbir__simdfX_aaa1( a1, d1, STBIR_onesX );
8376- stbir__simdfX_mult( d0, d0, a0 );
8377- stbir__simdfX_mult( d1, d1, a1 );
8378- stbir__simdfX_store ( decode-2*stbir__simdfX_float_count, d0 );
8379- stbir__simdfX_store ( decode-2*stbir__simdfX_float_count+stbir__simdfX_float_count, d1 );
8380- decode += 2 * stbir__simdfX_float_count;
8381- }
8382- decode -= 2 * stbir__simdfX_float_count;
8383-
8384- // few last pixels remnants
8385- #ifdef STBIR_SIMD8
8386- STBIR_NO_UNROLL_LOOP_START
8387- while ( decode < end_decode )
8388- #else
8389- if ( decode < end_decode )
8390- #endif
8391- {
8392- stbir__simdf d,a;
8393- stbir__simdf_load( d, decode );
8394- stbir__simdf_aaa1( a, d, STBIR__CONSTF(STBIR_ones) );
8395- stbir__simdf_mult( d, d, a );
8396- stbir__simdf_store ( decode, d );
8397- decode += 4;
8398- }
8399- }
8400-
8401- #else
8402-
8403- while( decode < end_decode )
8404- {
8405- float alpha = decode[3];
8406- decode[0] *= alpha;
8407- decode[1] *= alpha;
8408- decode[2] *= alpha;
8409- decode += 4;
8410- }
8411-
8412- #endif
8413-}
8414-
8415-static void stbir__simple_alpha_weight_2ch( float * decode_buffer, int width_times_channels )
8416-{
8417- float STBIR_STREAMOUT_PTR(*) decode = decode_buffer;
8418- float const * end_decode = decode_buffer + width_times_channels;
8419-
8420- #ifdef STBIR_SIMD
8421- decode += 2 * stbir__simdfX_float_count;
8422- STBIR_NO_UNROLL_LOOP_START
8423- while ( decode <= end_decode )
8424- {
8425- stbir__simdfX d0,a0,d1,a1;
8426- STBIR_NO_UNROLL(decode);
8427- stbir__simdfX_load( d0, decode-2*stbir__simdfX_float_count );
8428- stbir__simdfX_load( d1, decode-2*stbir__simdfX_float_count+stbir__simdfX_float_count );
8429- stbir__simdfX_a1a1( a0, d0, STBIR_onesX );
8430- stbir__simdfX_a1a1( a1, d1, STBIR_onesX );
8431- stbir__simdfX_mult( d0, d0, a0 );
8432- stbir__simdfX_mult( d1, d1, a1 );
8433- stbir__simdfX_store ( decode-2*stbir__simdfX_float_count, d0 );
8434- stbir__simdfX_store ( decode-2*stbir__simdfX_float_count+stbir__simdfX_float_count, d1 );
8435- decode += 2 * stbir__simdfX_float_count;
8436- }
8437- decode -= 2 * stbir__simdfX_float_count;
8438- #endif
8439-
8440- STBIR_SIMD_NO_UNROLL_LOOP_START
8441- while( decode < end_decode )
8442- {
8443- float alpha = decode[1];
8444- STBIR_SIMD_NO_UNROLL(decode);
8445- decode[0] *= alpha;
8446- decode += 2;
8447- }
8448-}
8449-
8450-static void stbir__simple_alpha_unweight_4ch( float * encode_buffer, int width_times_channels )
8451-{
8452- float STBIR_SIMD_STREAMOUT_PTR(*) encode = encode_buffer;
8453- float const * end_output = encode_buffer + width_times_channels;
8454-
8455- STBIR_SIMD_NO_UNROLL_LOOP_START
8456- do {
8457- float alpha = encode[3];
8458+// might be one last odd pixel
8459+#ifdef STBIR_SIMD8
8460+ STBIR_NO_UNROLL_LOOP_START
8461+ while (decode < end_decode)
8462+#else
8463+ if (decode < end_decode)
8464+#endif
8465+ {
8466+ stbir__simdf d, a, p;
8467+ STBIR_NO_UNROLL(decode);
8468+ stbir__simdf_load(d, decode);
8469+ stbir__simdf_0123to3333(a, d);
8470+ stbir__simdf_mult(p, a, d);
8471+ stbir__simdf_store(out, d);
8472+ stbir__simdf_store(out + 4, p);
8473+ decode += 4;
8474+ out += 7;
8475+ }
8476
8477-#ifdef STBIR_SIMD
8478- stbir__simdf i,ia;
8479- STBIR_SIMD_NO_UNROLL(encode);
8480- if ( alpha >= stbir__small_float )
8481- {
8482- stbir__simdf_load1frep4( ia, 1.0f / alpha );
8483- stbir__simdf_load( i, encode );
8484- stbir__simdf_mult( i, i, ia );
8485- stbir__simdf_store( encode, i );
8486- encode[3] = alpha;
8487- }
8488 #else
8489- if ( alpha >= stbir__small_float )
8490- {
8491- float ialpha = 1.0f / alpha;
8492- encode[0] *= ialpha;
8493- encode[1] *= ialpha;
8494- encode[2] *= ialpha;
8495- }
8496+
8497+ while (decode < end_decode) {
8498+ float r = decode[0], g = decode[1], b = decode[2], alpha = decode[3];
8499+ out[0] = r;
8500+ out[1] = g;
8501+ out[2] = b;
8502+ out[3] = alpha;
8503+ out[4] = r * alpha;
8504+ out[5] = g * alpha;
8505+ out[6] = b * alpha;
8506+ out += 7;
8507+ decode += 4;
8508+ }
8509+
8510 #endif
8511- encode += 4;
8512- } while ( encode < end_output );
8513 }
8514
8515-static void stbir__simple_alpha_unweight_2ch( float * encode_buffer, int width_times_channels )
8516+static void
8517+stbir__fancy_alpha_weight_2ch(float *out_buffer, int width_times_channels)
8518 {
8519- float STBIR_SIMD_STREAMOUT_PTR(*) encode = encode_buffer;
8520- float const * end_output = encode_buffer + width_times_channels;
8521+ float STBIR_STREAMOUT_PTR(*) out = out_buffer;
8522+ float const *end_decode = out_buffer + (width_times_channels / 2) * 3;
8523+ float STBIR_STREAMOUT_PTR(*) decode =
8524+ (float *)end_decode - width_times_channels;
8525
8526- do {
8527- float alpha = encode[1];
8528- if ( alpha >= stbir__small_float )
8529- encode[0] /= alpha;
8530- encode += 2;
8531- } while ( encode < end_output );
8532-}
8533+ // for fancy alpha, turns into: [X A Xpm][X A Xpm],etc
8534
8535+#ifdef STBIR_SIMD
8536
8537-// only used in RGB->BGR or BGR->RGB
8538-static void stbir__simple_flip_3ch( float * decode_buffer, int width_times_channels )
8539+ decode += 8;
8540+ if (decode <= end_decode) {
8541+ STBIR_NO_UNROLL_LOOP_START
8542+ do {
8543+#ifdef STBIR_SIMD8
8544+ stbir__simdf8 d0, a0, p0;
8545+ STBIR_NO_UNROLL(decode);
8546+ stbir__simdf8_load(d0, decode - 8);
8547+ stbir__simdf8_0123to11331133(p0, d0);
8548+ stbir__simdf8_0123to00220022(a0, d0);
8549+ stbir__simdf8_mult(p0, p0, a0);
8550+
8551+ stbir__simdf_store2(out, stbir__if_simdf8_cast_to_simdf4(d0));
8552+ stbir__simdf_store(out + 2, stbir__if_simdf8_cast_to_simdf4(p0));
8553+ stbir__simdf_store2h(out + 3, stbir__if_simdf8_cast_to_simdf4(d0));
8554+
8555+ stbir__simdf_store2(out + 6, stbir__simdf8_gettop4(d0));
8556+ stbir__simdf_store(out + 8, stbir__simdf8_gettop4(p0));
8557+ stbir__simdf_store2h(out + 9, stbir__simdf8_gettop4(d0));
8558+#else
8559+ stbir__simdf d0, a0, d1, a1, p0, p1;
8560+ STBIR_NO_UNROLL(decode);
8561+ stbir__simdf_load(d0, decode - 8);
8562+ stbir__simdf_load(d1, decode - 8 + 4);
8563+ stbir__simdf_0123to1133(p0, d0);
8564+ stbir__simdf_0123to1133(p1, d1);
8565+ stbir__simdf_0123to0022(a0, d0);
8566+ stbir__simdf_0123to0022(a1, d1);
8567+ stbir__simdf_mult(p0, p0, a0);
8568+ stbir__simdf_mult(p1, p1, a1);
8569+
8570+ stbir__simdf_store2(out, d0);
8571+ stbir__simdf_store(out + 2, p0);
8572+ stbir__simdf_store2h(out + 3, d0);
8573+
8574+ stbir__simdf_store2(out + 6, d1);
8575+ stbir__simdf_store(out + 8, p1);
8576+ stbir__simdf_store2h(out + 9, d1);
8577+#endif
8578+ decode += 8;
8579+ out += 12;
8580+ } while (decode <= end_decode);
8581+ }
8582+ decode -= 8;
8583+#endif
8584+
8585+ STBIR_SIMD_NO_UNROLL_LOOP_START
8586+ while (decode < end_decode) {
8587+ float x = decode[0], y = decode[1];
8588+ STBIR_SIMD_NO_UNROLL(decode);
8589+ out[0] = x;
8590+ out[1] = y;
8591+ out[2] = x * y;
8592+ out += 3;
8593+ decode += 2;
8594+ }
8595+}
8596+
8597+static void
8598+stbir__fancy_alpha_unweight_4ch(float *encode_buffer, int width_times_channels)
8599 {
8600- float STBIR_STREAMOUT_PTR(*) decode = decode_buffer;
8601- float const * end_decode = decode_buffer + width_times_channels;
8602+ float STBIR_SIMD_STREAMOUT_PTR(*) encode = encode_buffer;
8603+ float STBIR_SIMD_STREAMOUT_PTR(*) input = encode_buffer;
8604+ float const *end_output = encode_buffer + width_times_channels;
8605+
8606+ // fancy RGBA is stored internally as R G B A Rpm Gpm Bpm
8607
8608+ STBIR_SIMD_NO_UNROLL_LOOP_START
8609+ do {
8610+ float alpha = input[3];
8611 #ifdef STBIR_SIMD
8612- #ifdef stbir__simdf_swiz2 // do we have two argument swizzles?
8613- end_decode -= 12;
8614- STBIR_NO_UNROLL_LOOP_START
8615- while( decode <= end_decode )
8616- {
8617- // on arm64 8 instructions, no overlapping stores
8618- stbir__simdf a,b,c,na,nb;
8619- STBIR_SIMD_NO_UNROLL(decode);
8620- stbir__simdf_load( a, decode );
8621- stbir__simdf_load( b, decode+4 );
8622- stbir__simdf_load( c, decode+8 );
8623-
8624- na = stbir__simdf_swiz2( a, b, 2, 1, 0, 5 );
8625- b = stbir__simdf_swiz2( a, b, 4, 3, 6, 7 );
8626- nb = stbir__simdf_swiz2( b, c, 0, 1, 4, 3 );
8627- c = stbir__simdf_swiz2( b, c, 2, 7, 6, 5 );
8628-
8629- stbir__simdf_store( decode, na );
8630- stbir__simdf_store( decode+4, nb );
8631- stbir__simdf_store( decode+8, c );
8632- decode += 12;
8633- }
8634- end_decode += 12;
8635- #else
8636- end_decode -= 24;
8637- STBIR_NO_UNROLL_LOOP_START
8638- while( decode <= end_decode )
8639- {
8640- // 26 instructions on x64
8641- stbir__simdf a,b,c,d,e,f,g;
8642- float i21, i23;
8643- STBIR_SIMD_NO_UNROLL(decode);
8644- stbir__simdf_load( a, decode );
8645- stbir__simdf_load( b, decode+3 );
8646- stbir__simdf_load( c, decode+6 );
8647- stbir__simdf_load( d, decode+9 );
8648- stbir__simdf_load( e, decode+12 );
8649- stbir__simdf_load( f, decode+15 );
8650- stbir__simdf_load( g, decode+18 );
8651-
8652- a = stbir__simdf_swiz( a, 2, 1, 0, 3 );
8653- b = stbir__simdf_swiz( b, 2, 1, 0, 3 );
8654- c = stbir__simdf_swiz( c, 2, 1, 0, 3 );
8655- d = stbir__simdf_swiz( d, 2, 1, 0, 3 );
8656- e = stbir__simdf_swiz( e, 2, 1, 0, 3 );
8657- f = stbir__simdf_swiz( f, 2, 1, 0, 3 );
8658- g = stbir__simdf_swiz( g, 2, 1, 0, 3 );
8659-
8660- // stores overlap, need to be in order,
8661- stbir__simdf_store( decode, a );
8662- i21 = decode[21];
8663- stbir__simdf_store( decode+3, b );
8664- i23 = decode[23];
8665- stbir__simdf_store( decode+6, c );
8666- stbir__simdf_store( decode+9, d );
8667- stbir__simdf_store( decode+12, e );
8668- stbir__simdf_store( decode+15, f );
8669- stbir__simdf_store( decode+18, g );
8670- decode[21] = i23;
8671- decode[23] = i21;
8672- decode += 24;
8673- }
8674- end_decode += 24;
8675- #endif
8676+ stbir__simdf i, ia;
8677+ STBIR_SIMD_NO_UNROLL(encode);
8678+ if (alpha < stbir__small_float) {
8679+ stbir__simdf_load(i, input);
8680+ stbir__simdf_store(encode, i);
8681+ } else {
8682+ stbir__simdf_load1frep4(ia, 1.0f / alpha);
8683+ stbir__simdf_load(i, input + 4);
8684+ stbir__simdf_mult(i, i, ia);
8685+ stbir__simdf_store(encode, i);
8686+ encode[3] = alpha;
8687+ }
8688 #else
8689- end_decode -= 12;
8690- STBIR_NO_UNROLL_LOOP_START
8691- while( decode <= end_decode )
8692- {
8693- // 16 instructions
8694- float t0,t1,t2,t3;
8695- STBIR_NO_UNROLL(decode);
8696- t0 = decode[0]; t1 = decode[3]; t2 = decode[6]; t3 = decode[9];
8697- decode[0] = decode[2]; decode[3] = decode[5]; decode[6] = decode[8]; decode[9] = decode[11];
8698- decode[2] = t0; decode[5] = t1; decode[8] = t2; decode[11] = t3;
8699- decode += 12;
8700- }
8701- end_decode += 12;
8702-#endif
8703-
8704- STBIR_NO_UNROLL_LOOP_START
8705- while( decode < end_decode )
8706- {
8707- float t = decode[0];
8708- STBIR_NO_UNROLL(decode);
8709- decode[0] = decode[2];
8710- decode[2] = t;
8711- decode += 3;
8712- }
8713-}
8714-
8715-
8716-
8717-static void stbir__decode_scanline(stbir__info const * stbir_info, int n, float * output_buffer STBIR_ONLY_PROFILE_GET_SPLIT_INFO )
8718-{
8719- int channels = stbir_info->channels;
8720- int effective_channels = stbir_info->effective_channels;
8721- int input_sample_in_bytes = stbir__type_size[stbir_info->input_type] * channels;
8722- stbir_edge edge_horizontal = stbir_info->horizontal.edge;
8723- stbir_edge edge_vertical = stbir_info->vertical.edge;
8724- int row = stbir__edge_wrap(edge_vertical, n, stbir_info->vertical.scale_info.input_full_size);
8725- const void* input_plane_data = ( (char *) stbir_info->input_data ) + (size_t)row * (size_t) stbir_info->input_stride_bytes;
8726- stbir__span const * spans = stbir_info->scanline_extents.spans;
8727- float * full_decode_buffer = output_buffer - stbir_info->scanline_extents.conservative.n0 * effective_channels;
8728- float * last_decoded = 0;
8729-
8730- // if we are on edge_zero, and we get in here with an out of bounds n, then the calculate filters has failed
8731- STBIR_ASSERT( !(edge_vertical == STBIR_EDGE_ZERO && (n < 0 || n >= stbir_info->vertical.scale_info.input_full_size)) );
8732-
8733- do
8734- {
8735- float * decode_buffer;
8736- void const * input_data;
8737- float * end_decode;
8738- int width_times_channels;
8739- int width;
8740-
8741- if ( spans->n1 < spans->n0 )
8742- break;
8743-
8744- width = spans->n1 + 1 - spans->n0;
8745- decode_buffer = full_decode_buffer + spans->n0 * effective_channels;
8746- end_decode = full_decode_buffer + ( spans->n1 + 1 ) * effective_channels;
8747- width_times_channels = width * channels;
8748-
8749- // read directly out of input plane by default
8750- input_data = ( (char*)input_plane_data ) + spans->pixel_offset_for_input * input_sample_in_bytes;
8751-
8752- // if we have an input callback, call it to get the input data
8753- if ( stbir_info->in_pixels_cb )
8754- {
8755- // call the callback with a temp buffer (that they can choose to use or not). the temp is just right aligned memory in the decode_buffer itself
8756- input_data = stbir_info->in_pixels_cb( ( (char*) end_decode ) - ( width * input_sample_in_bytes ) + ( ( stbir_info->input_type != STBIR_TYPE_FLOAT ) ? ( sizeof(float)*STBIR_INPUT_CALLBACK_PADDING ) : 0 ), input_plane_data, width, spans->pixel_offset_for_input, row, stbir_info->user_data );
8757- }
8758-
8759- STBIR_PROFILE_START( decode );
8760- // convert the pixels info the float decode_buffer, (we index from end_decode, so that when channels<effective_channels, we are right justified in the buffer)
8761- last_decoded = stbir_info->decode_pixels( (float*)end_decode - width_times_channels, width_times_channels, input_data );
8762- STBIR_PROFILE_END( decode );
8763-
8764- if (stbir_info->alpha_weight)
8765- {
8766- STBIR_PROFILE_START( alpha );
8767- stbir_info->alpha_weight( decode_buffer, width_times_channels );
8768- STBIR_PROFILE_END( alpha );
8769- }
8770-
8771- ++spans;
8772- } while ( spans <= ( &stbir_info->scanline_extents.spans[1] ) );
8773-
8774- // handle the edge_wrap filter (all other types are handled back out at the calculate_filter stage)
8775- // basically the idea here is that if we have the whole scanline in memory, we don't redecode the
8776- // wrapped edge pixels, and instead just memcpy them from the scanline into the edge positions
8777- if ( ( edge_horizontal == STBIR_EDGE_WRAP ) && ( stbir_info->scanline_extents.edge_sizes[0] | stbir_info->scanline_extents.edge_sizes[1] ) )
8778- {
8779- // this code only runs if we're in edge_wrap, and we're doing the entire scanline
8780- int e, start_x[2];
8781- int input_full_size = stbir_info->horizontal.scale_info.input_full_size;
8782-
8783- start_x[0] = -stbir_info->scanline_extents.edge_sizes[0]; // left edge start x
8784- start_x[1] = input_full_size; // right edge
8785-
8786- for( e = 0; e < 2 ; e++ )
8787- {
8788- // do each margin
8789- int margin = stbir_info->scanline_extents.edge_sizes[e];
8790- if ( margin )
8791- {
8792- int x = start_x[e];
8793- float * marg = full_decode_buffer + x * effective_channels;
8794- float const * src = full_decode_buffer + stbir__edge_wrap(edge_horizontal, x, input_full_size) * effective_channels;
8795- STBIR_MEMCPY( marg, src, margin * effective_channels * sizeof(float) );
8796- if ( e == 1 ) last_decoded = marg + margin * effective_channels;
8797- }
8798- }
8799- }
8800-
8801- // some of the horizontal gathers read one float off the edge (which is masked out), but we force a zero here to make sure no NaNs leak in
8802- // (we can't pre-zero it, because the input callback can use that area as padding)
8803- last_decoded[0] = 0.0f;
8804-
8805- // we clear this extra float, because the final output pixel filter kernel might have used one less coeff than the max filter width
8806- // when this happens, we do read that pixel from the input, so it too could be Nan, so just zero an extra one.
8807- // this fits because each scanline is padded by three floats (STBIR_INPUT_CALLBACK_PADDING)
8808- last_decoded[1] = 0.0f;
8809+ if (alpha < stbir__small_float) {
8810+ encode[0] = input[0];
8811+ encode[1] = input[1];
8812+ encode[2] = input[2];
8813+ } else {
8814+ float ialpha = 1.0f / alpha;
8815+ encode[0] = input[4] * ialpha;
8816+ encode[1] = input[5] * ialpha;
8817+ encode[2] = input[6] * ialpha;
8818+ }
8819+ encode[3] = alpha;
8820+#endif
8821+
8822+ input += 7;
8823+ encode += 4;
8824+ } while (encode < end_output);
8825 }
8826
8827+// format: [X A Xpm][X A Xpm] etc
8828+static void
8829+stbir__fancy_alpha_unweight_2ch(float *encode_buffer, int width_times_channels)
8830+{
8831+ float STBIR_SIMD_STREAMOUT_PTR(*) encode = encode_buffer;
8832+ float STBIR_SIMD_STREAMOUT_PTR(*) input = encode_buffer;
8833+ float const *end_output = encode_buffer + width_times_channels;
8834
8835-//=================
8836-// Do 1 channel horizontal routines
8837+ do {
8838+ float alpha = input[1];
8839+ encode[0] = input[0];
8840+ if (alpha >= stbir__small_float) {
8841+ encode[0] = input[2] / alpha;
8842+ }
8843+ encode[1] = alpha;
8844+
8845+ input += 3;
8846+ encode += 2;
8847+ } while (encode < end_output);
8848+}
8849+
8850+static void
8851+stbir__simple_alpha_weight_4ch(float *decode_buffer, int width_times_channels)
8852+{
8853+ float STBIR_STREAMOUT_PTR(*) decode = decode_buffer;
8854+ float const *end_decode = decode_buffer + width_times_channels;
8855+
8856+#ifdef STBIR_SIMD
8857+ {
8858+ decode += 2 * stbir__simdfX_float_count;
8859+ STBIR_NO_UNROLL_LOOP_START
8860+ while (decode <= end_decode) {
8861+ stbir__simdfX d0, a0, d1, a1;
8862+ STBIR_NO_UNROLL(decode);
8863+ stbir__simdfX_load(d0, decode - 2 * stbir__simdfX_float_count);
8864+ stbir__simdfX_load(d1, decode - 2 * stbir__simdfX_float_count +
8865+ stbir__simdfX_float_count);
8866+ stbir__simdfX_aaa1(a0, d0, STBIR_onesX);
8867+ stbir__simdfX_aaa1(a1, d1, STBIR_onesX);
8868+ stbir__simdfX_mult(d0, d0, a0);
8869+ stbir__simdfX_mult(d1, d1, a1);
8870+ stbir__simdfX_store(decode - 2 * stbir__simdfX_float_count, d0);
8871+ stbir__simdfX_store(decode - 2 * stbir__simdfX_float_count +
8872+ stbir__simdfX_float_count,
8873+ d1);
8874+ decode += 2 * stbir__simdfX_float_count;
8875+ }
8876+ decode -= 2 * stbir__simdfX_float_count;
8877+
8878+// few last pixels remnants
8879+#ifdef STBIR_SIMD8
8880+ STBIR_NO_UNROLL_LOOP_START
8881+ while (decode < end_decode)
8882+#else
8883+ if (decode < end_decode)
8884+#endif
8885+ {
8886+ stbir__simdf d, a;
8887+ stbir__simdf_load(d, decode);
8888+ stbir__simdf_aaa1(a, d, STBIR__CONSTF(STBIR_ones));
8889+ stbir__simdf_mult(d, d, a);
8890+ stbir__simdf_store(decode, d);
8891+ decode += 4;
8892+ }
8893+ }
8894+
8895+#else
8896+
8897+ while (decode < end_decode) {
8898+ float alpha = decode[3];
8899+ decode[0] *= alpha;
8900+ decode[1] *= alpha;
8901+ decode[2] *= alpha;
8902+ decode += 4;
8903+ }
8904+
8905+#endif
8906+}
8907+
8908+static void
8909+stbir__simple_alpha_weight_2ch(float *decode_buffer, int width_times_channels)
8910+{
8911+ float STBIR_STREAMOUT_PTR(*) decode = decode_buffer;
8912+ float const *end_decode = decode_buffer + width_times_channels;
8913
8914 #ifdef STBIR_SIMD
8915+ decode += 2 * stbir__simdfX_float_count;
8916+ STBIR_NO_UNROLL_LOOP_START
8917+ while (decode <= end_decode) {
8918+ stbir__simdfX d0, a0, d1, a1;
8919+ STBIR_NO_UNROLL(decode);
8920+ stbir__simdfX_load(d0, decode - 2 * stbir__simdfX_float_count);
8921+ stbir__simdfX_load(d1, decode - 2 * stbir__simdfX_float_count +
8922+ stbir__simdfX_float_count);
8923+ stbir__simdfX_a1a1(a0, d0, STBIR_onesX);
8924+ stbir__simdfX_a1a1(a1, d1, STBIR_onesX);
8925+ stbir__simdfX_mult(d0, d0, a0);
8926+ stbir__simdfX_mult(d1, d1, a1);
8927+ stbir__simdfX_store(decode - 2 * stbir__simdfX_float_count, d0);
8928+ stbir__simdfX_store(decode - 2 * stbir__simdfX_float_count +
8929+ stbir__simdfX_float_count,
8930+ d1);
8931+ decode += 2 * stbir__simdfX_float_count;
8932+ }
8933+ decode -= 2 * stbir__simdfX_float_count;
8934+#endif
8935+
8936+ STBIR_SIMD_NO_UNROLL_LOOP_START
8937+ while (decode < end_decode) {
8938+ float alpha = decode[1];
8939+ STBIR_SIMD_NO_UNROLL(decode);
8940+ decode[0] *= alpha;
8941+ decode += 2;
8942+ }
8943+}
8944
8945-#define stbir__1_coeff_only() \
8946- stbir__simdf tot,c; \
8947- STBIR_SIMD_NO_UNROLL(decode); \
8948- stbir__simdf_load1( c, hc ); \
8949- stbir__simdf_mult1_mem( tot, c, decode );
8950-
8951-#define stbir__2_coeff_only() \
8952- stbir__simdf tot,c,d; \
8953- STBIR_SIMD_NO_UNROLL(decode); \
8954- stbir__simdf_load2z( c, hc ); \
8955- stbir__simdf_load2( d, decode ); \
8956- stbir__simdf_mult( tot, c, d ); \
8957- stbir__simdf_0123to1230( c, tot ); \
8958- stbir__simdf_add1( tot, tot, c );
8959-
8960-#define stbir__3_coeff_only() \
8961- stbir__simdf tot,c,t; \
8962- STBIR_SIMD_NO_UNROLL(decode); \
8963- stbir__simdf_load( c, hc ); \
8964- stbir__simdf_mult_mem( tot, c, decode ); \
8965- stbir__simdf_0123to1230( c, tot ); \
8966- stbir__simdf_0123to2301( t, tot ); \
8967- stbir__simdf_add1( tot, tot, c ); \
8968- stbir__simdf_add1( tot, tot, t );
8969-
8970-#define stbir__store_output_tiny() \
8971- stbir__simdf_store1( output, tot ); \
8972- horizontal_coefficients += coefficient_width; \
8973- ++horizontal_contributors; \
8974- output += 1;
8975-
8976-#define stbir__4_coeff_start() \
8977- stbir__simdf tot,c; \
8978- STBIR_SIMD_NO_UNROLL(decode); \
8979- stbir__simdf_load( c, hc ); \
8980- stbir__simdf_mult_mem( tot, c, decode ); \
8981-
8982-#define stbir__4_coeff_continue_from_4( ofs ) \
8983- STBIR_SIMD_NO_UNROLL(decode); \
8984- stbir__simdf_load( c, hc + (ofs) ); \
8985- stbir__simdf_madd_mem( tot, tot, c, decode+(ofs) );
8986-
8987-#define stbir__1_coeff_remnant( ofs ) \
8988- { stbir__simdf d; \
8989- stbir__simdf_load1z( c, hc + (ofs) ); \
8990- stbir__simdf_load1( d, decode + (ofs) ); \
8991- stbir__simdf_madd( tot, tot, d, c ); }
8992-
8993-#define stbir__2_coeff_remnant( ofs ) \
8994- { stbir__simdf d; \
8995- stbir__simdf_load2z( c, hc+(ofs) ); \
8996- stbir__simdf_load2( d, decode+(ofs) ); \
8997- stbir__simdf_madd( tot, tot, d, c ); }
8998-
8999-#define stbir__3_coeff_setup() \
9000- stbir__simdf mask; \
9001- stbir__simdf_load( mask, STBIR_mask + 3 );
9002-
9003-#define stbir__3_coeff_remnant( ofs ) \
9004- stbir__simdf_load( c, hc+(ofs) ); \
9005- stbir__simdf_and( c, c, mask ); \
9006- stbir__simdf_madd_mem( tot, tot, c, decode+(ofs) );
9007-
9008-#define stbir__store_output() \
9009- stbir__simdf_0123to2301( c, tot ); \
9010- stbir__simdf_add( tot, tot, c ); \
9011- stbir__simdf_0123to1230( c, tot ); \
9012- stbir__simdf_add1( tot, tot, c ); \
9013- stbir__simdf_store1( output, tot ); \
9014- horizontal_coefficients += coefficient_width; \
9015- ++horizontal_contributors; \
9016- output += 1;
9017+static void
9018+stbir__simple_alpha_unweight_4ch(float *encode_buffer, int width_times_channels)
9019+{
9020+ float STBIR_SIMD_STREAMOUT_PTR(*) encode = encode_buffer;
9021+ float const *end_output = encode_buffer + width_times_channels;
9022+
9023+ STBIR_SIMD_NO_UNROLL_LOOP_START
9024+ do {
9025+ float alpha = encode[3];
9026
9027+#ifdef STBIR_SIMD
9028+ stbir__simdf i, ia;
9029+ STBIR_SIMD_NO_UNROLL(encode);
9030+ if (alpha >= stbir__small_float) {
9031+ stbir__simdf_load1frep4(ia, 1.0f / alpha);
9032+ stbir__simdf_load(i, encode);
9033+ stbir__simdf_mult(i, i, ia);
9034+ stbir__simdf_store(encode, i);
9035+ encode[3] = alpha;
9036+ }
9037 #else
9038+ if (alpha >= stbir__small_float) {
9039+ float ialpha = 1.0f / alpha;
9040+ encode[0] *= ialpha;
9041+ encode[1] *= ialpha;
9042+ encode[2] *= ialpha;
9043+ }
9044+#endif
9045+ encode += 4;
9046+ } while (encode < end_output);
9047+}
9048
9049-#define stbir__1_coeff_only() \
9050- float tot; \
9051- tot = decode[0]*hc[0];
9052+static void
9053+stbir__simple_alpha_unweight_2ch(float *encode_buffer, int width_times_channels)
9054+{
9055+ float STBIR_SIMD_STREAMOUT_PTR(*) encode = encode_buffer;
9056+ float const *end_output = encode_buffer + width_times_channels;
9057
9058-#define stbir__2_coeff_only() \
9059- float tot; \
9060- tot = decode[0] * hc[0]; \
9061- tot += decode[1] * hc[1];
9062+ do {
9063+ float alpha = encode[1];
9064+ if (alpha >= stbir__small_float) {
9065+ encode[0] /= alpha;
9066+ }
9067+ encode += 2;
9068+ } while (encode < end_output);
9069+}
9070
9071-#define stbir__3_coeff_only() \
9072- float tot; \
9073- tot = decode[0] * hc[0]; \
9074- tot += decode[1] * hc[1]; \
9075- tot += decode[2] * hc[2];
9076+// only used in RGB->BGR or BGR->RGB
9077+static void
9078+stbir__simple_flip_3ch(float *decode_buffer, int width_times_channels)
9079+{
9080+ float STBIR_STREAMOUT_PTR(*) decode = decode_buffer;
9081+ float const *end_decode = decode_buffer + width_times_channels;
9082
9083-#define stbir__store_output_tiny() \
9084- output[0] = tot; \
9085- horizontal_coefficients += coefficient_width; \
9086- ++horizontal_contributors; \
9087- output += 1;
9088+#ifdef STBIR_SIMD
9089+#ifdef stbir__simdf_swiz2 // do we have two argument swizzles?
9090+ end_decode -= 12;
9091+ STBIR_NO_UNROLL_LOOP_START
9092+ while (decode <= end_decode) {
9093+ // on arm64 8 instructions, no overlapping stores
9094+ stbir__simdf a, b, c, na, nb;
9095+ STBIR_SIMD_NO_UNROLL(decode);
9096+ stbir__simdf_load(a, decode);
9097+ stbir__simdf_load(b, decode + 4);
9098+ stbir__simdf_load(c, decode + 8);
9099+
9100+ na = stbir__simdf_swiz2(a, b, 2, 1, 0, 5);
9101+ b = stbir__simdf_swiz2(a, b, 4, 3, 6, 7);
9102+ nb = stbir__simdf_swiz2(b, c, 0, 1, 4, 3);
9103+ c = stbir__simdf_swiz2(b, c, 2, 7, 6, 5);
9104+
9105+ stbir__simdf_store(decode, na);
9106+ stbir__simdf_store(decode + 4, nb);
9107+ stbir__simdf_store(decode + 8, c);
9108+ decode += 12;
9109+ }
9110+ end_decode += 12;
9111+#else
9112+ end_decode -= 24;
9113+ STBIR_NO_UNROLL_LOOP_START
9114+ while (decode <= end_decode) {
9115+ // 26 instructions on x64
9116+ stbir__simdf a, b, c, d, e, f, g;
9117+ float i21, i23;
9118+ STBIR_SIMD_NO_UNROLL(decode);
9119+ stbir__simdf_load(a, decode);
9120+ stbir__simdf_load(b, decode + 3);
9121+ stbir__simdf_load(c, decode + 6);
9122+ stbir__simdf_load(d, decode + 9);
9123+ stbir__simdf_load(e, decode + 12);
9124+ stbir__simdf_load(f, decode + 15);
9125+ stbir__simdf_load(g, decode + 18);
9126+
9127+ a = stbir__simdf_swiz(a, 2, 1, 0, 3);
9128+ b = stbir__simdf_swiz(b, 2, 1, 0, 3);
9129+ c = stbir__simdf_swiz(c, 2, 1, 0, 3);
9130+ d = stbir__simdf_swiz(d, 2, 1, 0, 3);
9131+ e = stbir__simdf_swiz(e, 2, 1, 0, 3);
9132+ f = stbir__simdf_swiz(f, 2, 1, 0, 3);
9133+ g = stbir__simdf_swiz(g, 2, 1, 0, 3);
9134+
9135+ // stores overlap, need to be in order,
9136+ stbir__simdf_store(decode, a);
9137+ i21 = decode[21];
9138+ stbir__simdf_store(decode + 3, b);
9139+ i23 = decode[23];
9140+ stbir__simdf_store(decode + 6, c);
9141+ stbir__simdf_store(decode + 9, d);
9142+ stbir__simdf_store(decode + 12, e);
9143+ stbir__simdf_store(decode + 15, f);
9144+ stbir__simdf_store(decode + 18, g);
9145+ decode[21] = i23;
9146+ decode[23] = i21;
9147+ decode += 24;
9148+ }
9149+ end_decode += 24;
9150+#endif
9151+#else
9152+ end_decode -= 12;
9153+ STBIR_NO_UNROLL_LOOP_START
9154+ while (decode <= end_decode) {
9155+ // 16 instructions
9156+ float t0, t1, t2, t3;
9157+ STBIR_NO_UNROLL(decode);
9158+ t0 = decode[0];
9159+ t1 = decode[3];
9160+ t2 = decode[6];
9161+ t3 = decode[9];
9162+ decode[0] = decode[2];
9163+ decode[3] = decode[5];
9164+ decode[6] = decode[8];
9165+ decode[9] = decode[11];
9166+ decode[2] = t0;
9167+ decode[5] = t1;
9168+ decode[8] = t2;
9169+ decode[11] = t3;
9170+ decode += 12;
9171+ }
9172+ end_decode += 12;
9173+#endif
9174
9175-#define stbir__4_coeff_start() \
9176- float tot0,tot1,tot2,tot3; \
9177- tot0 = decode[0] * hc[0]; \
9178- tot1 = decode[1] * hc[1]; \
9179- tot2 = decode[2] * hc[2]; \
9180- tot3 = decode[3] * hc[3];
9181+ STBIR_NO_UNROLL_LOOP_START
9182+ while (decode < end_decode) {
9183+ float t = decode[0];
9184+ STBIR_NO_UNROLL(decode);
9185+ decode[0] = decode[2];
9186+ decode[2] = t;
9187+ decode += 3;
9188+ }
9189+}
9190+
9191+static void
9192+stbir__decode_scanline(stbir__info const *stbir_info, int n,
9193+ float *output_buffer STBIR_ONLY_PROFILE_GET_SPLIT_INFO)
9194+{
9195+ int channels = stbir_info->channels;
9196+ int effective_channels = stbir_info->effective_channels;
9197+ int input_sample_in_bytes =
9198+ stbir__type_size[stbir_info->input_type] * channels;
9199+ stbir_edge edge_horizontal = stbir_info->horizontal.edge;
9200+ stbir_edge edge_vertical = stbir_info->vertical.edge;
9201+ int row = stbir__edge_wrap(edge_vertical, n,
9202+ stbir_info->vertical.scale_info.input_full_size);
9203+ const void *input_plane_data =
9204+ ((char *)stbir_info->input_data) +
9205+ (size_t)row * (size_t)stbir_info->input_stride_bytes;
9206+ stbir__span const *spans = stbir_info->scanline_extents.spans;
9207+ float *full_decode_buffer =
9208+ output_buffer -
9209+ stbir_info->scanline_extents.conservative.n0 * effective_channels;
9210+ float *last_decoded = 0;
9211+
9212+ // if we are on edge_zero, and we get in here with an out of bounds n, then
9213+ // the calculate filters has failed
9214+ STBIR_ASSERT(
9215+ !(edge_vertical == STBIR_EDGE_ZERO &&
9216+ (n < 0 || n >= stbir_info->vertical.scale_info.input_full_size)));
9217+
9218+ do {
9219+ float *decode_buffer;
9220+ void const *input_data;
9221+ float *end_decode;
9222+ int width_times_channels;
9223+ int width;
9224+
9225+ if (spans->n1 < spans->n0) {
9226+ break;
9227+ }
9228+
9229+ width = spans->n1 + 1 - spans->n0;
9230+ decode_buffer = full_decode_buffer + spans->n0 * effective_channels;
9231+ end_decode = full_decode_buffer + (spans->n1 + 1) * effective_channels;
9232+ width_times_channels = width * channels;
9233+
9234+ // read directly out of input plane by default
9235+ input_data = ((char *)input_plane_data) +
9236+ spans->pixel_offset_for_input * input_sample_in_bytes;
9237+
9238+ // if we have an input callback, call it to get the input data
9239+ if (stbir_info->in_pixels_cb) {
9240+ // call the callback with a temp buffer (that they can choose to use
9241+ // or not). the temp is just right aligned memory in the
9242+ // decode_buffer itself
9243+ input_data = stbir_info->in_pixels_cb(
9244+ ((char *)end_decode) - (width * input_sample_in_bytes) +
9245+ ((stbir_info->input_type != STBIR_TYPE_FLOAT)
9246+ ? (sizeof(float) * STBIR_INPUT_CALLBACK_PADDING)
9247+ : 0),
9248+ input_plane_data, width, spans->pixel_offset_for_input, row,
9249+ stbir_info->user_data);
9250+ }
9251+
9252+ STBIR_PROFILE_START(decode);
9253+ // convert the pixels info the float decode_buffer, (we index from
9254+ // end_decode, so that when channels<effective_channels, we are right
9255+ // justified in the buffer)
9256+ last_decoded = stbir_info->decode_pixels(
9257+ (float *)end_decode - width_times_channels, width_times_channels,
9258+ input_data);
9259+ STBIR_PROFILE_END(decode);
9260+
9261+ if (stbir_info->alpha_weight) {
9262+ STBIR_PROFILE_START(alpha);
9263+ stbir_info->alpha_weight(decode_buffer, width_times_channels);
9264+ STBIR_PROFILE_END(alpha);
9265+ }
9266+
9267+ ++spans;
9268+ } while (spans <= (&stbir_info->scanline_extents.spans[1]));
9269+
9270+ // handle the edge_wrap filter (all other types are handled back out at the
9271+ // calculate_filter stage) basically the idea here is that if we have the
9272+ // whole scanline in memory, we don't redecode the
9273+ // wrapped edge pixels, and instead just memcpy them from the scanline
9274+ // into the edge positions
9275+ if ((edge_horizontal == STBIR_EDGE_WRAP) &&
9276+ (stbir_info->scanline_extents.edge_sizes[0] |
9277+ stbir_info->scanline_extents.edge_sizes[1])) {
9278+ // this code only runs if we're in edge_wrap, and we're doing the entire
9279+ // scanline
9280+ int e, start_x[2];
9281+ int input_full_size = stbir_info->horizontal.scale_info.input_full_size;
9282+
9283+ start_x[0] =
9284+ -stbir_info->scanline_extents.edge_sizes[0]; // left edge start x
9285+ start_x[1] = input_full_size; // right edge
9286+
9287+ for (e = 0; e < 2; e++) {
9288+ // do each margin
9289+ int margin = stbir_info->scanline_extents.edge_sizes[e];
9290+ if (margin) {
9291+ int x = start_x[e];
9292+ float *marg = full_decode_buffer + x * effective_channels;
9293+ float const *src =
9294+ full_decode_buffer +
9295+ stbir__edge_wrap(edge_horizontal, x, input_full_size) *
9296+ effective_channels;
9297+ STBIR_MEMCPY(marg, src,
9298+ margin * effective_channels * sizeof(float));
9299+ if (e == 1) {
9300+ last_decoded = marg + margin * effective_channels;
9301+ }
9302+ }
9303+ }
9304+ }
9305+
9306+ // some of the horizontal gathers read one float off the edge (which is
9307+ // masked out), but we force a zero here to make sure no NaNs leak in
9308+ // (we can't pre-zero it, because the input callback can use that area as
9309+ // padding)
9310+ last_decoded[0] = 0.0f;
9311+
9312+ // we clear this extra float, because the final output pixel filter kernel
9313+ // might have used one less coeff than the max filter width
9314+ // when this happens, we do read that pixel from the input, so it too
9315+ // could be Nan, so just zero an extra one. this fits because each
9316+ // scanline is padded by three floats (STBIR_INPUT_CALLBACK_PADDING)
9317+ last_decoded[1] = 0.0f;
9318+}
9319
9320-#define stbir__4_coeff_continue_from_4( ofs ) \
9321- tot0 += decode[0+(ofs)] * hc[0+(ofs)]; \
9322- tot1 += decode[1+(ofs)] * hc[1+(ofs)]; \
9323- tot2 += decode[2+(ofs)] * hc[2+(ofs)]; \
9324- tot3 += decode[3+(ofs)] * hc[3+(ofs)];
9325+//=================
9326+// Do 1 channel horizontal routines
9327
9328-#define stbir__1_coeff_remnant( ofs ) \
9329- tot0 += decode[0+(ofs)] * hc[0+(ofs)];
9330+#ifdef STBIR_SIMD
9331
9332-#define stbir__2_coeff_remnant( ofs ) \
9333- tot0 += decode[0+(ofs)] * hc[0+(ofs)]; \
9334- tot1 += decode[1+(ofs)] * hc[1+(ofs)]; \
9335+#define stbir__1_coeff_only() \
9336+ stbir__simdf tot, c; \
9337+ STBIR_SIMD_NO_UNROLL(decode); \
9338+ stbir__simdf_load1(c, hc); \
9339+ stbir__simdf_mult1_mem(tot, c, decode);
9340+
9341+#define stbir__2_coeff_only() \
9342+ stbir__simdf tot, c, d; \
9343+ STBIR_SIMD_NO_UNROLL(decode); \
9344+ stbir__simdf_load2z(c, hc); \
9345+ stbir__simdf_load2(d, decode); \
9346+ stbir__simdf_mult(tot, c, d); \
9347+ stbir__simdf_0123to1230(c, tot); \
9348+ stbir__simdf_add1(tot, tot, c);
9349+
9350+#define stbir__3_coeff_only() \
9351+ stbir__simdf tot, c, t; \
9352+ STBIR_SIMD_NO_UNROLL(decode); \
9353+ stbir__simdf_load(c, hc); \
9354+ stbir__simdf_mult_mem(tot, c, decode); \
9355+ stbir__simdf_0123to1230(c, tot); \
9356+ stbir__simdf_0123to2301(t, tot); \
9357+ stbir__simdf_add1(tot, tot, c); \
9358+ stbir__simdf_add1(tot, tot, t);
9359+
9360+#define stbir__store_output_tiny() \
9361+ stbir__simdf_store1(output, tot); \
9362+ horizontal_coefficients += coefficient_width; \
9363+ ++horizontal_contributors; \
9364+ output += 1;
9365+
9366+#define stbir__4_coeff_start() \
9367+ stbir__simdf tot, c; \
9368+ STBIR_SIMD_NO_UNROLL(decode); \
9369+ stbir__simdf_load(c, hc); \
9370+ stbir__simdf_mult_mem(tot, c, decode);
9371+
9372+#define stbir__4_coeff_continue_from_4(ofs) \
9373+ STBIR_SIMD_NO_UNROLL(decode); \
9374+ stbir__simdf_load(c, hc + (ofs)); \
9375+ stbir__simdf_madd_mem(tot, tot, c, decode + (ofs));
9376+
9377+#define stbir__1_coeff_remnant(ofs) \
9378+ { \
9379+ stbir__simdf d; \
9380+ stbir__simdf_load1z(c, hc + (ofs)); \
9381+ stbir__simdf_load1(d, decode + (ofs)); \
9382+ stbir__simdf_madd(tot, tot, d, c); \
9383+ }
9384+
9385+#define stbir__2_coeff_remnant(ofs) \
9386+ { \
9387+ stbir__simdf d; \
9388+ stbir__simdf_load2z(c, hc + (ofs)); \
9389+ stbir__simdf_load2(d, decode + (ofs)); \
9390+ stbir__simdf_madd(tot, tot, d, c); \
9391+ }
9392+
9393+#define stbir__3_coeff_setup() \
9394+ stbir__simdf mask; \
9395+ stbir__simdf_load(mask, STBIR_mask + 3);
9396+
9397+#define stbir__3_coeff_remnant(ofs) \
9398+ stbir__simdf_load(c, hc + (ofs)); \
9399+ stbir__simdf_and(c, c, mask); \
9400+ stbir__simdf_madd_mem(tot, tot, c, decode + (ofs));
9401+
9402+#define stbir__store_output() \
9403+ stbir__simdf_0123to2301(c, tot); \
9404+ stbir__simdf_add(tot, tot, c); \
9405+ stbir__simdf_0123to1230(c, tot); \
9406+ stbir__simdf_add1(tot, tot, c); \
9407+ stbir__simdf_store1(output, tot); \
9408+ horizontal_coefficients += coefficient_width; \
9409+ ++horizontal_contributors; \
9410+ output += 1;
9411
9412-#define stbir__3_coeff_remnant( ofs ) \
9413- tot0 += decode[0+(ofs)] * hc[0+(ofs)]; \
9414- tot1 += decode[1+(ofs)] * hc[1+(ofs)]; \
9415- tot2 += decode[2+(ofs)] * hc[2+(ofs)];
9416+#else
9417
9418-#define stbir__store_output() \
9419- output[0] = (tot0+tot2)+(tot1+tot3); \
9420- horizontal_coefficients += coefficient_width; \
9421- ++horizontal_contributors; \
9422- output += 1;
9423+#define stbir__1_coeff_only() \
9424+ float tot; \
9425+ tot = decode[0] * hc[0];
9426+
9427+#define stbir__2_coeff_only() \
9428+ float tot; \
9429+ tot = decode[0] * hc[0]; \
9430+ tot += decode[1] * hc[1];
9431+
9432+#define stbir__3_coeff_only() \
9433+ float tot; \
9434+ tot = decode[0] * hc[0]; \
9435+ tot += decode[1] * hc[1]; \
9436+ tot += decode[2] * hc[2];
9437+
9438+#define stbir__store_output_tiny() \
9439+ output[0] = tot; \
9440+ horizontal_coefficients += coefficient_width; \
9441+ ++horizontal_contributors; \
9442+ output += 1;
9443+
9444+#define stbir__4_coeff_start() \
9445+ float tot0, tot1, tot2, tot3; \
9446+ tot0 = decode[0] * hc[0]; \
9447+ tot1 = decode[1] * hc[1]; \
9448+ tot2 = decode[2] * hc[2]; \
9449+ tot3 = decode[3] * hc[3];
9450+
9451+#define stbir__4_coeff_continue_from_4(ofs) \
9452+ tot0 += decode[0 + (ofs)] * hc[0 + (ofs)]; \
9453+ tot1 += decode[1 + (ofs)] * hc[1 + (ofs)]; \
9454+ tot2 += decode[2 + (ofs)] * hc[2 + (ofs)]; \
9455+ tot3 += decode[3 + (ofs)] * hc[3 + (ofs)];
9456+
9457+#define stbir__1_coeff_remnant(ofs) tot0 += decode[0 + (ofs)] * hc[0 + (ofs)];
9458+
9459+#define stbir__2_coeff_remnant(ofs) \
9460+ tot0 += decode[0 + (ofs)] * hc[0 + (ofs)]; \
9461+ tot1 += decode[1 + (ofs)] * hc[1 + (ofs)];
9462+
9463+#define stbir__3_coeff_remnant(ofs) \
9464+ tot0 += decode[0 + (ofs)] * hc[0 + (ofs)]; \
9465+ tot1 += decode[1 + (ofs)] * hc[1 + (ofs)]; \
9466+ tot2 += decode[2 + (ofs)] * hc[2 + (ofs)];
9467+
9468+#define stbir__store_output() \
9469+ output[0] = (tot0 + tot2) + (tot1 + tot3); \
9470+ horizontal_coefficients += coefficient_width; \
9471+ ++horizontal_contributors; \
9472+ output += 1;
9473
9474 #endif
9475
9476@@ -4812,239 +5686,251 @@ static void stbir__decode_scanline(stbir__info const * stbir_info, int n, float
9477 #define STB_IMAGE_RESIZE_DO_HORIZONTALS
9478 #include STBIR__HEADER_FILENAME
9479
9480-
9481 //=================
9482 // Do 2 channel horizontal routines
9483
9484 #ifdef STBIR_SIMD
9485
9486-#define stbir__1_coeff_only() \
9487- stbir__simdf tot,c,d; \
9488- STBIR_SIMD_NO_UNROLL(decode); \
9489- stbir__simdf_load1z( c, hc ); \
9490- stbir__simdf_0123to0011( c, c ); \
9491- stbir__simdf_load2( d, decode ); \
9492- stbir__simdf_mult( tot, d, c );
9493-
9494-#define stbir__2_coeff_only() \
9495- stbir__simdf tot,c; \
9496- STBIR_SIMD_NO_UNROLL(decode); \
9497- stbir__simdf_load2( c, hc ); \
9498- stbir__simdf_0123to0011( c, c ); \
9499- stbir__simdf_mult_mem( tot, c, decode );
9500-
9501-#define stbir__3_coeff_only() \
9502- stbir__simdf tot,c,cs,d; \
9503- STBIR_SIMD_NO_UNROLL(decode); \
9504- stbir__simdf_load( cs, hc ); \
9505- stbir__simdf_0123to0011( c, cs ); \
9506- stbir__simdf_mult_mem( tot, c, decode ); \
9507- stbir__simdf_0123to2222( c, cs ); \
9508- stbir__simdf_load2z( d, decode+4 ); \
9509- stbir__simdf_madd( tot, tot, d, c );
9510-
9511-#define stbir__store_output_tiny() \
9512- stbir__simdf_0123to2301( c, tot ); \
9513- stbir__simdf_add( tot, tot, c ); \
9514- stbir__simdf_store2( output, tot ); \
9515- horizontal_coefficients += coefficient_width; \
9516- ++horizontal_contributors; \
9517- output += 2;
9518+#define stbir__1_coeff_only() \
9519+ stbir__simdf tot, c, d; \
9520+ STBIR_SIMD_NO_UNROLL(decode); \
9521+ stbir__simdf_load1z(c, hc); \
9522+ stbir__simdf_0123to0011(c, c); \
9523+ stbir__simdf_load2(d, decode); \
9524+ stbir__simdf_mult(tot, d, c);
9525+
9526+#define stbir__2_coeff_only() \
9527+ stbir__simdf tot, c; \
9528+ STBIR_SIMD_NO_UNROLL(decode); \
9529+ stbir__simdf_load2(c, hc); \
9530+ stbir__simdf_0123to0011(c, c); \
9531+ stbir__simdf_mult_mem(tot, c, decode);
9532+
9533+#define stbir__3_coeff_only() \
9534+ stbir__simdf tot, c, cs, d; \
9535+ STBIR_SIMD_NO_UNROLL(decode); \
9536+ stbir__simdf_load(cs, hc); \
9537+ stbir__simdf_0123to0011(c, cs); \
9538+ stbir__simdf_mult_mem(tot, c, decode); \
9539+ stbir__simdf_0123to2222(c, cs); \
9540+ stbir__simdf_load2z(d, decode + 4); \
9541+ stbir__simdf_madd(tot, tot, d, c);
9542+
9543+#define stbir__store_output_tiny() \
9544+ stbir__simdf_0123to2301(c, tot); \
9545+ stbir__simdf_add(tot, tot, c); \
9546+ stbir__simdf_store2(output, tot); \
9547+ horizontal_coefficients += coefficient_width; \
9548+ ++horizontal_contributors; \
9549+ output += 2;
9550
9551 #ifdef STBIR_SIMD8
9552
9553-#define stbir__4_coeff_start() \
9554- stbir__simdf8 tot0,c,cs; \
9555- STBIR_SIMD_NO_UNROLL(decode); \
9556- stbir__simdf8_load4b( cs, hc ); \
9557- stbir__simdf8_0123to00112233( c, cs ); \
9558- stbir__simdf8_mult_mem( tot0, c, decode );
9559-
9560-#define stbir__4_coeff_continue_from_4( ofs ) \
9561- STBIR_SIMD_NO_UNROLL(decode); \
9562- stbir__simdf8_load4b( cs, hc + (ofs) ); \
9563- stbir__simdf8_0123to00112233( c, cs ); \
9564- stbir__simdf8_madd_mem( tot0, tot0, c, decode+(ofs)*2 );
9565-
9566-#define stbir__1_coeff_remnant( ofs ) \
9567- { stbir__simdf t,d; \
9568- stbir__simdf_load1z( t, hc + (ofs) ); \
9569- stbir__simdf_load2( d, decode + (ofs) * 2 ); \
9570- stbir__simdf_0123to0011( t, t ); \
9571- stbir__simdf_mult( t, t, d ); \
9572- stbir__simdf8_add4( tot0, tot0, t ); }
9573-
9574-#define stbir__2_coeff_remnant( ofs ) \
9575- { stbir__simdf t; \
9576- stbir__simdf_load2( t, hc + (ofs) ); \
9577- stbir__simdf_0123to0011( t, t ); \
9578- stbir__simdf_mult_mem( t, t, decode+(ofs)*2 ); \
9579- stbir__simdf8_add4( tot0, tot0, t ); }
9580-
9581-#define stbir__3_coeff_remnant( ofs ) \
9582- { stbir__simdf8 d; \
9583- stbir__simdf8_load4b( cs, hc + (ofs) ); \
9584- stbir__simdf8_0123to00112233( c, cs ); \
9585- stbir__simdf8_load6z( d, decode+(ofs)*2 ); \
9586- stbir__simdf8_madd( tot0, tot0, c, d ); }
9587-
9588-#define stbir__store_output() \
9589- { stbir__simdf t,d; \
9590- stbir__simdf8_add4halves( t, stbir__if_simdf8_cast_to_simdf4(tot0), tot0 ); \
9591- stbir__simdf_0123to2301( d, t ); \
9592- stbir__simdf_add( t, t, d ); \
9593- stbir__simdf_store2( output, t ); \
9594- horizontal_coefficients += coefficient_width; \
9595- ++horizontal_contributors; \
9596- output += 2; }
9597+#define stbir__4_coeff_start() \
9598+ stbir__simdf8 tot0, c, cs; \
9599+ STBIR_SIMD_NO_UNROLL(decode); \
9600+ stbir__simdf8_load4b(cs, hc); \
9601+ stbir__simdf8_0123to00112233(c, cs); \
9602+ stbir__simdf8_mult_mem(tot0, c, decode);
9603+
9604+#define stbir__4_coeff_continue_from_4(ofs) \
9605+ STBIR_SIMD_NO_UNROLL(decode); \
9606+ stbir__simdf8_load4b(cs, hc + (ofs)); \
9607+ stbir__simdf8_0123to00112233(c, cs); \
9608+ stbir__simdf8_madd_mem(tot0, tot0, c, decode + (ofs) * 2);
9609+
9610+#define stbir__1_coeff_remnant(ofs) \
9611+ { \
9612+ stbir__simdf t, d; \
9613+ stbir__simdf_load1z(t, hc + (ofs)); \
9614+ stbir__simdf_load2(d, decode + (ofs) * 2); \
9615+ stbir__simdf_0123to0011(t, t); \
9616+ stbir__simdf_mult(t, t, d); \
9617+ stbir__simdf8_add4(tot0, tot0, t); \
9618+ }
9619+
9620+#define stbir__2_coeff_remnant(ofs) \
9621+ { \
9622+ stbir__simdf t; \
9623+ stbir__simdf_load2(t, hc + (ofs)); \
9624+ stbir__simdf_0123to0011(t, t); \
9625+ stbir__simdf_mult_mem(t, t, decode + (ofs) * 2); \
9626+ stbir__simdf8_add4(tot0, tot0, t); \
9627+ }
9628+
9629+#define stbir__3_coeff_remnant(ofs) \
9630+ { \
9631+ stbir__simdf8 d; \
9632+ stbir__simdf8_load4b(cs, hc + (ofs)); \
9633+ stbir__simdf8_0123to00112233(c, cs); \
9634+ stbir__simdf8_load6z(d, decode + (ofs) * 2); \
9635+ stbir__simdf8_madd(tot0, tot0, c, d); \
9636+ }
9637+
9638+#define stbir__store_output() \
9639+ { \
9640+ stbir__simdf t, d; \
9641+ stbir__simdf8_add4halves(t, stbir__if_simdf8_cast_to_simdf4(tot0), \
9642+ tot0); \
9643+ stbir__simdf_0123to2301(d, t); \
9644+ stbir__simdf_add(t, t, d); \
9645+ stbir__simdf_store2(output, t); \
9646+ horizontal_coefficients += coefficient_width; \
9647+ ++horizontal_contributors; \
9648+ output += 2; \
9649+ }
9650
9651 #else
9652
9653-#define stbir__4_coeff_start() \
9654- stbir__simdf tot0,tot1,c,cs; \
9655- STBIR_SIMD_NO_UNROLL(decode); \
9656- stbir__simdf_load( cs, hc ); \
9657- stbir__simdf_0123to0011( c, cs ); \
9658- stbir__simdf_mult_mem( tot0, c, decode ); \
9659- stbir__simdf_0123to2233( c, cs ); \
9660- stbir__simdf_mult_mem( tot1, c, decode+4 );
9661-
9662-#define stbir__4_coeff_continue_from_4( ofs ) \
9663- STBIR_SIMD_NO_UNROLL(decode); \
9664- stbir__simdf_load( cs, hc + (ofs) ); \
9665- stbir__simdf_0123to0011( c, cs ); \
9666- stbir__simdf_madd_mem( tot0, tot0, c, decode+(ofs)*2 ); \
9667- stbir__simdf_0123to2233( c, cs ); \
9668- stbir__simdf_madd_mem( tot1, tot1, c, decode+(ofs)*2+4 );
9669-
9670-#define stbir__1_coeff_remnant( ofs ) \
9671- { stbir__simdf d; \
9672- stbir__simdf_load1z( cs, hc + (ofs) ); \
9673- stbir__simdf_0123to0011( c, cs ); \
9674- stbir__simdf_load2( d, decode + (ofs) * 2 ); \
9675- stbir__simdf_madd( tot0, tot0, d, c ); }
9676-
9677-#define stbir__2_coeff_remnant( ofs ) \
9678- stbir__simdf_load2( cs, hc + (ofs) ); \
9679- stbir__simdf_0123to0011( c, cs ); \
9680- stbir__simdf_madd_mem( tot0, tot0, c, decode+(ofs)*2 );
9681-
9682-#define stbir__3_coeff_remnant( ofs ) \
9683- { stbir__simdf d; \
9684- stbir__simdf_load( cs, hc + (ofs) ); \
9685- stbir__simdf_0123to0011( c, cs ); \
9686- stbir__simdf_madd_mem( tot0, tot0, c, decode+(ofs)*2 ); \
9687- stbir__simdf_0123to2222( c, cs ); \
9688- stbir__simdf_load2z( d, decode + (ofs) * 2 + 4 ); \
9689- stbir__simdf_madd( tot1, tot1, d, c ); }
9690-
9691-#define stbir__store_output() \
9692- stbir__simdf_add( tot0, tot0, tot1 ); \
9693- stbir__simdf_0123to2301( c, tot0 ); \
9694- stbir__simdf_add( tot0, tot0, c ); \
9695- stbir__simdf_store2( output, tot0 ); \
9696- horizontal_coefficients += coefficient_width; \
9697- ++horizontal_contributors; \
9698- output += 2;
9699+#define stbir__4_coeff_start() \
9700+ stbir__simdf tot0, tot1, c, cs; \
9701+ STBIR_SIMD_NO_UNROLL(decode); \
9702+ stbir__simdf_load(cs, hc); \
9703+ stbir__simdf_0123to0011(c, cs); \
9704+ stbir__simdf_mult_mem(tot0, c, decode); \
9705+ stbir__simdf_0123to2233(c, cs); \
9706+ stbir__simdf_mult_mem(tot1, c, decode + 4);
9707+
9708+#define stbir__4_coeff_continue_from_4(ofs) \
9709+ STBIR_SIMD_NO_UNROLL(decode); \
9710+ stbir__simdf_load(cs, hc + (ofs)); \
9711+ stbir__simdf_0123to0011(c, cs); \
9712+ stbir__simdf_madd_mem(tot0, tot0, c, decode + (ofs) * 2); \
9713+ stbir__simdf_0123to2233(c, cs); \
9714+ stbir__simdf_madd_mem(tot1, tot1, c, decode + (ofs) * 2 + 4);
9715+
9716+#define stbir__1_coeff_remnant(ofs) \
9717+ { \
9718+ stbir__simdf d; \
9719+ stbir__simdf_load1z(cs, hc + (ofs)); \
9720+ stbir__simdf_0123to0011(c, cs); \
9721+ stbir__simdf_load2(d, decode + (ofs) * 2); \
9722+ stbir__simdf_madd(tot0, tot0, d, c); \
9723+ }
9724+
9725+#define stbir__2_coeff_remnant(ofs) \
9726+ stbir__simdf_load2(cs, hc + (ofs)); \
9727+ stbir__simdf_0123to0011(c, cs); \
9728+ stbir__simdf_madd_mem(tot0, tot0, c, decode + (ofs) * 2);
9729+
9730+#define stbir__3_coeff_remnant(ofs) \
9731+ { \
9732+ stbir__simdf d; \
9733+ stbir__simdf_load(cs, hc + (ofs)); \
9734+ stbir__simdf_0123to0011(c, cs); \
9735+ stbir__simdf_madd_mem(tot0, tot0, c, decode + (ofs) * 2); \
9736+ stbir__simdf_0123to2222(c, cs); \
9737+ stbir__simdf_load2z(d, decode + (ofs) * 2 + 4); \
9738+ stbir__simdf_madd(tot1, tot1, d, c); \
9739+ }
9740+
9741+#define stbir__store_output() \
9742+ stbir__simdf_add(tot0, tot0, tot1); \
9743+ stbir__simdf_0123to2301(c, tot0); \
9744+ stbir__simdf_add(tot0, tot0, c); \
9745+ stbir__simdf_store2(output, tot0); \
9746+ horizontal_coefficients += coefficient_width; \
9747+ ++horizontal_contributors; \
9748+ output += 2;
9749
9750 #endif
9751
9752 #else
9753
9754-#define stbir__1_coeff_only() \
9755- float tota,totb,c; \
9756- c = hc[0]; \
9757- tota = decode[0]*c; \
9758- totb = decode[1]*c;
9759-
9760-#define stbir__2_coeff_only() \
9761- float tota,totb,c; \
9762- c = hc[0]; \
9763- tota = decode[0]*c; \
9764- totb = decode[1]*c; \
9765- c = hc[1]; \
9766- tota += decode[2]*c; \
9767- totb += decode[3]*c;
9768+#define stbir__1_coeff_only() \
9769+ float tota, totb, c; \
9770+ c = hc[0]; \
9771+ tota = decode[0] * c; \
9772+ totb = decode[1] * c;
9773+
9774+#define stbir__2_coeff_only() \
9775+ float tota, totb, c; \
9776+ c = hc[0]; \
9777+ tota = decode[0] * c; \
9778+ totb = decode[1] * c; \
9779+ c = hc[1]; \
9780+ tota += decode[2] * c; \
9781+ totb += decode[3] * c;
9782
9783 // this weird order of add matches the simd
9784-#define stbir__3_coeff_only() \
9785- float tota,totb,c; \
9786- c = hc[0]; \
9787- tota = decode[0]*c; \
9788- totb = decode[1]*c; \
9789- c = hc[2]; \
9790- tota += decode[4]*c; \
9791- totb += decode[5]*c; \
9792- c = hc[1]; \
9793- tota += decode[2]*c; \
9794- totb += decode[3]*c;
9795-
9796-#define stbir__store_output_tiny() \
9797- output[0] = tota; \
9798- output[1] = totb; \
9799- horizontal_coefficients += coefficient_width; \
9800- ++horizontal_contributors; \
9801- output += 2;
9802-
9803-#define stbir__4_coeff_start() \
9804- float tota0,tota1,tota2,tota3,totb0,totb1,totb2,totb3,c; \
9805- c = hc[0]; \
9806- tota0 = decode[0]*c; \
9807- totb0 = decode[1]*c; \
9808- c = hc[1]; \
9809- tota1 = decode[2]*c; \
9810- totb1 = decode[3]*c; \
9811- c = hc[2]; \
9812- tota2 = decode[4]*c; \
9813- totb2 = decode[5]*c; \
9814- c = hc[3]; \
9815- tota3 = decode[6]*c; \
9816- totb3 = decode[7]*c;
9817-
9818-#define stbir__4_coeff_continue_from_4( ofs ) \
9819- c = hc[0+(ofs)]; \
9820- tota0 += decode[0+(ofs)*2]*c; \
9821- totb0 += decode[1+(ofs)*2]*c; \
9822- c = hc[1+(ofs)]; \
9823- tota1 += decode[2+(ofs)*2]*c; \
9824- totb1 += decode[3+(ofs)*2]*c; \
9825- c = hc[2+(ofs)]; \
9826- tota2 += decode[4+(ofs)*2]*c; \
9827- totb2 += decode[5+(ofs)*2]*c; \
9828- c = hc[3+(ofs)]; \
9829- tota3 += decode[6+(ofs)*2]*c; \
9830- totb3 += decode[7+(ofs)*2]*c;
9831-
9832-#define stbir__1_coeff_remnant( ofs ) \
9833- c = hc[0+(ofs)]; \
9834- tota0 += decode[0+(ofs)*2] * c; \
9835- totb0 += decode[1+(ofs)*2] * c;
9836-
9837-#define stbir__2_coeff_remnant( ofs ) \
9838- c = hc[0+(ofs)]; \
9839- tota0 += decode[0+(ofs)*2] * c; \
9840- totb0 += decode[1+(ofs)*2] * c; \
9841- c = hc[1+(ofs)]; \
9842- tota1 += decode[2+(ofs)*2] * c; \
9843- totb1 += decode[3+(ofs)*2] * c;
9844-
9845-#define stbir__3_coeff_remnant( ofs ) \
9846- c = hc[0+(ofs)]; \
9847- tota0 += decode[0+(ofs)*2] * c; \
9848- totb0 += decode[1+(ofs)*2] * c; \
9849- c = hc[1+(ofs)]; \
9850- tota1 += decode[2+(ofs)*2] * c; \
9851- totb1 += decode[3+(ofs)*2] * c; \
9852- c = hc[2+(ofs)]; \
9853- tota2 += decode[4+(ofs)*2] * c; \
9854- totb2 += decode[5+(ofs)*2] * c;
9855-
9856-#define stbir__store_output() \
9857- output[0] = (tota0+tota2)+(tota1+tota3); \
9858- output[1] = (totb0+totb2)+(totb1+totb3); \
9859- horizontal_coefficients += coefficient_width; \
9860- ++horizontal_contributors; \
9861- output += 2;
9862+#define stbir__3_coeff_only() \
9863+ float tota, totb, c; \
9864+ c = hc[0]; \
9865+ tota = decode[0] * c; \
9866+ totb = decode[1] * c; \
9867+ c = hc[2]; \
9868+ tota += decode[4] * c; \
9869+ totb += decode[5] * c; \
9870+ c = hc[1]; \
9871+ tota += decode[2] * c; \
9872+ totb += decode[3] * c;
9873+
9874+#define stbir__store_output_tiny() \
9875+ output[0] = tota; \
9876+ output[1] = totb; \
9877+ horizontal_coefficients += coefficient_width; \
9878+ ++horizontal_contributors; \
9879+ output += 2;
9880+
9881+#define stbir__4_coeff_start() \
9882+ float tota0, tota1, tota2, tota3, totb0, totb1, totb2, totb3, c; \
9883+ c = hc[0]; \
9884+ tota0 = decode[0] * c; \
9885+ totb0 = decode[1] * c; \
9886+ c = hc[1]; \
9887+ tota1 = decode[2] * c; \
9888+ totb1 = decode[3] * c; \
9889+ c = hc[2]; \
9890+ tota2 = decode[4] * c; \
9891+ totb2 = decode[5] * c; \
9892+ c = hc[3]; \
9893+ tota3 = decode[6] * c; \
9894+ totb3 = decode[7] * c;
9895+
9896+#define stbir__4_coeff_continue_from_4(ofs) \
9897+ c = hc[0 + (ofs)]; \
9898+ tota0 += decode[0 + (ofs) * 2] * c; \
9899+ totb0 += decode[1 + (ofs) * 2] * c; \
9900+ c = hc[1 + (ofs)]; \
9901+ tota1 += decode[2 + (ofs) * 2] * c; \
9902+ totb1 += decode[3 + (ofs) * 2] * c; \
9903+ c = hc[2 + (ofs)]; \
9904+ tota2 += decode[4 + (ofs) * 2] * c; \
9905+ totb2 += decode[5 + (ofs) * 2] * c; \
9906+ c = hc[3 + (ofs)]; \
9907+ tota3 += decode[6 + (ofs) * 2] * c; \
9908+ totb3 += decode[7 + (ofs) * 2] * c;
9909+
9910+#define stbir__1_coeff_remnant(ofs) \
9911+ c = hc[0 + (ofs)]; \
9912+ tota0 += decode[0 + (ofs) * 2] * c; \
9913+ totb0 += decode[1 + (ofs) * 2] * c;
9914+
9915+#define stbir__2_coeff_remnant(ofs) \
9916+ c = hc[0 + (ofs)]; \
9917+ tota0 += decode[0 + (ofs) * 2] * c; \
9918+ totb0 += decode[1 + (ofs) * 2] * c; \
9919+ c = hc[1 + (ofs)]; \
9920+ tota1 += decode[2 + (ofs) * 2] * c; \
9921+ totb1 += decode[3 + (ofs) * 2] * c;
9922+
9923+#define stbir__3_coeff_remnant(ofs) \
9924+ c = hc[0 + (ofs)]; \
9925+ tota0 += decode[0 + (ofs) * 2] * c; \
9926+ totb0 += decode[1 + (ofs) * 2] * c; \
9927+ c = hc[1 + (ofs)]; \
9928+ tota1 += decode[2 + (ofs) * 2] * c; \
9929+ totb1 += decode[3 + (ofs) * 2] * c; \
9930+ c = hc[2 + (ofs)]; \
9931+ tota2 += decode[4 + (ofs) * 2] * c; \
9932+ totb2 += decode[5 + (ofs) * 2] * c;
9933+
9934+#define stbir__store_output() \
9935+ output[0] = (tota0 + tota2) + (tota1 + tota3); \
9936+ output[1] = (totb0 + totb2) + (totb1 + totb3); \
9937+ horizontal_coefficients += coefficient_width; \
9938+ ++horizontal_contributors; \
9939+ output += 2;
9940
9941 #endif
9942
9943@@ -5052,300 +5938,306 @@ static void stbir__decode_scanline(stbir__info const * stbir_info, int n, float
9944 #define STB_IMAGE_RESIZE_DO_HORIZONTALS
9945 #include STBIR__HEADER_FILENAME
9946
9947-
9948 //=================
9949 // Do 3 channel horizontal routines
9950
9951 #ifdef STBIR_SIMD
9952
9953-#define stbir__1_coeff_only() \
9954- stbir__simdf tot,c,d; \
9955- STBIR_SIMD_NO_UNROLL(decode); \
9956- stbir__simdf_load1z( c, hc ); \
9957- stbir__simdf_0123to0001( c, c ); \
9958- stbir__simdf_load( d, decode ); \
9959- stbir__simdf_mult( tot, d, c );
9960-
9961-#define stbir__2_coeff_only() \
9962- stbir__simdf tot,c,cs,d; \
9963- STBIR_SIMD_NO_UNROLL(decode); \
9964- stbir__simdf_load2( cs, hc ); \
9965- stbir__simdf_0123to0000( c, cs ); \
9966- stbir__simdf_load( d, decode ); \
9967- stbir__simdf_mult( tot, d, c ); \
9968- stbir__simdf_0123to1111( c, cs ); \
9969- stbir__simdf_load( d, decode+3 ); \
9970- stbir__simdf_madd( tot, tot, d, c );
9971-
9972-#define stbir__3_coeff_only() \
9973- stbir__simdf tot,c,d,cs; \
9974- STBIR_SIMD_NO_UNROLL(decode); \
9975- stbir__simdf_load( cs, hc ); \
9976- stbir__simdf_0123to0000( c, cs ); \
9977- stbir__simdf_load( d, decode ); \
9978- stbir__simdf_mult( tot, d, c ); \
9979- stbir__simdf_0123to1111( c, cs ); \
9980- stbir__simdf_load( d, decode+3 ); \
9981- stbir__simdf_madd( tot, tot, d, c ); \
9982- stbir__simdf_0123to2222( c, cs ); \
9983- stbir__simdf_load( d, decode+6 ); \
9984- stbir__simdf_madd( tot, tot, d, c );
9985-
9986-#define stbir__store_output_tiny() \
9987- stbir__simdf_store2( output, tot ); \
9988- stbir__simdf_0123to2301( tot, tot ); \
9989- stbir__simdf_store1( output+2, tot ); \
9990- horizontal_coefficients += coefficient_width; \
9991- ++horizontal_contributors; \
9992- output += 3;
9993+#define stbir__1_coeff_only() \
9994+ stbir__simdf tot, c, d; \
9995+ STBIR_SIMD_NO_UNROLL(decode); \
9996+ stbir__simdf_load1z(c, hc); \
9997+ stbir__simdf_0123to0001(c, c); \
9998+ stbir__simdf_load(d, decode); \
9999+ stbir__simdf_mult(tot, d, c);
10000+
10001+#define stbir__2_coeff_only() \
10002+ stbir__simdf tot, c, cs, d; \
10003+ STBIR_SIMD_NO_UNROLL(decode); \
10004+ stbir__simdf_load2(cs, hc); \
10005+ stbir__simdf_0123to0000(c, cs); \
10006+ stbir__simdf_load(d, decode); \
10007+ stbir__simdf_mult(tot, d, c); \
10008+ stbir__simdf_0123to1111(c, cs); \
10009+ stbir__simdf_load(d, decode + 3); \
10010+ stbir__simdf_madd(tot, tot, d, c);
10011+
10012+#define stbir__3_coeff_only() \
10013+ stbir__simdf tot, c, d, cs; \
10014+ STBIR_SIMD_NO_UNROLL(decode); \
10015+ stbir__simdf_load(cs, hc); \
10016+ stbir__simdf_0123to0000(c, cs); \
10017+ stbir__simdf_load(d, decode); \
10018+ stbir__simdf_mult(tot, d, c); \
10019+ stbir__simdf_0123to1111(c, cs); \
10020+ stbir__simdf_load(d, decode + 3); \
10021+ stbir__simdf_madd(tot, tot, d, c); \
10022+ stbir__simdf_0123to2222(c, cs); \
10023+ stbir__simdf_load(d, decode + 6); \
10024+ stbir__simdf_madd(tot, tot, d, c);
10025+
10026+#define stbir__store_output_tiny() \
10027+ stbir__simdf_store2(output, tot); \
10028+ stbir__simdf_0123to2301(tot, tot); \
10029+ stbir__simdf_store1(output + 2, tot); \
10030+ horizontal_coefficients += coefficient_width; \
10031+ ++horizontal_contributors; \
10032+ output += 3;
10033
10034 #ifdef STBIR_SIMD8
10035
10036-// we're loading from the XXXYYY decode by -1 to get the XXXYYY into different halves of the AVX reg fyi
10037-#define stbir__4_coeff_start() \
10038- stbir__simdf8 tot0,tot1,c,cs; stbir__simdf t; \
10039- STBIR_SIMD_NO_UNROLL(decode); \
10040- stbir__simdf8_load4b( cs, hc ); \
10041- stbir__simdf8_0123to00001111( c, cs ); \
10042- stbir__simdf8_mult_mem( tot0, c, decode - 1 ); \
10043- stbir__simdf8_0123to22223333( c, cs ); \
10044- stbir__simdf8_mult_mem( tot1, c, decode+6 - 1 );
10045-
10046-#define stbir__4_coeff_continue_from_4( ofs ) \
10047- STBIR_SIMD_NO_UNROLL(decode); \
10048- stbir__simdf8_load4b( cs, hc + (ofs) ); \
10049- stbir__simdf8_0123to00001111( c, cs ); \
10050- stbir__simdf8_madd_mem( tot0, tot0, c, decode+(ofs)*3 - 1 ); \
10051- stbir__simdf8_0123to22223333( c, cs ); \
10052- stbir__simdf8_madd_mem( tot1, tot1, c, decode+(ofs)*3 + 6 - 1 );
10053-
10054-#define stbir__1_coeff_remnant( ofs ) \
10055- STBIR_SIMD_NO_UNROLL(decode); \
10056- stbir__simdf_load1rep4( t, hc + (ofs) ); \
10057- stbir__simdf8_madd_mem4( tot0, tot0, t, decode+(ofs)*3 - 1 );
10058-
10059-#define stbir__2_coeff_remnant( ofs ) \
10060- STBIR_SIMD_NO_UNROLL(decode); \
10061- stbir__simdf8_load4b( cs, hc + (ofs) - 2 ); \
10062- stbir__simdf8_0123to22223333( c, cs ); \
10063- stbir__simdf8_madd_mem( tot0, tot0, c, decode+(ofs)*3 - 1 );
10064-
10065- #define stbir__3_coeff_remnant( ofs ) \
10066- STBIR_SIMD_NO_UNROLL(decode); \
10067- stbir__simdf8_load4b( cs, hc + (ofs) ); \
10068- stbir__simdf8_0123to00001111( c, cs ); \
10069- stbir__simdf8_madd_mem( tot0, tot0, c, decode+(ofs)*3 - 1 ); \
10070- stbir__simdf8_0123to2222( t, cs ); \
10071- stbir__simdf8_madd_mem4( tot1, tot1, t, decode+(ofs)*3 + 6 - 1 );
10072-
10073-#define stbir__store_output() \
10074- stbir__simdf8_add( tot0, tot0, tot1 ); \
10075- stbir__simdf_0123to1230( t, stbir__if_simdf8_cast_to_simdf4( tot0 ) ); \
10076- stbir__simdf8_add4halves( t, t, tot0 ); \
10077- horizontal_coefficients += coefficient_width; \
10078- ++horizontal_contributors; \
10079- output += 3; \
10080- if ( output < output_end ) \
10081- { \
10082- stbir__simdf_store( output-3, t ); \
10083- continue; \
10084- } \
10085- { stbir__simdf tt; stbir__simdf_0123to2301( tt, t ); \
10086- stbir__simdf_store2( output-3, t ); \
10087- stbir__simdf_store1( output+2-3, tt ); } \
10088- break;
10089-
10090+// we're loading from the XXXYYY decode by -1 to get the XXXYYY into different
10091+// halves of the AVX reg fyi
10092+#define stbir__4_coeff_start() \
10093+ stbir__simdf8 tot0, tot1, c, cs; \
10094+ stbir__simdf t; \
10095+ STBIR_SIMD_NO_UNROLL(decode); \
10096+ stbir__simdf8_load4b(cs, hc); \
10097+ stbir__simdf8_0123to00001111(c, cs); \
10098+ stbir__simdf8_mult_mem(tot0, c, decode - 1); \
10099+ stbir__simdf8_0123to22223333(c, cs); \
10100+ stbir__simdf8_mult_mem(tot1, c, decode + 6 - 1);
10101+
10102+#define stbir__4_coeff_continue_from_4(ofs) \
10103+ STBIR_SIMD_NO_UNROLL(decode); \
10104+ stbir__simdf8_load4b(cs, hc + (ofs)); \
10105+ stbir__simdf8_0123to00001111(c, cs); \
10106+ stbir__simdf8_madd_mem(tot0, tot0, c, decode + (ofs) * 3 - 1); \
10107+ stbir__simdf8_0123to22223333(c, cs); \
10108+ stbir__simdf8_madd_mem(tot1, tot1, c, decode + (ofs) * 3 + 6 - 1);
10109+
10110+#define stbir__1_coeff_remnant(ofs) \
10111+ STBIR_SIMD_NO_UNROLL(decode); \
10112+ stbir__simdf_load1rep4(t, hc + (ofs)); \
10113+ stbir__simdf8_madd_mem4(tot0, tot0, t, decode + (ofs) * 3 - 1);
10114+
10115+#define stbir__2_coeff_remnant(ofs) \
10116+ STBIR_SIMD_NO_UNROLL(decode); \
10117+ stbir__simdf8_load4b(cs, hc + (ofs) - 2); \
10118+ stbir__simdf8_0123to22223333(c, cs); \
10119+ stbir__simdf8_madd_mem(tot0, tot0, c, decode + (ofs) * 3 - 1);
10120+
10121+#define stbir__3_coeff_remnant(ofs) \
10122+ STBIR_SIMD_NO_UNROLL(decode); \
10123+ stbir__simdf8_load4b(cs, hc + (ofs)); \
10124+ stbir__simdf8_0123to00001111(c, cs); \
10125+ stbir__simdf8_madd_mem(tot0, tot0, c, decode + (ofs) * 3 - 1); \
10126+ stbir__simdf8_0123to2222(t, cs); \
10127+ stbir__simdf8_madd_mem4(tot1, tot1, t, decode + (ofs) * 3 + 6 - 1);
10128+
10129+#define stbir__store_output() \
10130+ stbir__simdf8_add(tot0, tot0, tot1); \
10131+ stbir__simdf_0123to1230(t, stbir__if_simdf8_cast_to_simdf4(tot0)); \
10132+ stbir__simdf8_add4halves(t, t, tot0); \
10133+ horizontal_coefficients += coefficient_width; \
10134+ ++horizontal_contributors; \
10135+ output += 3; \
10136+ if (output < output_end) { \
10137+ stbir__simdf_store(output - 3, t); \
10138+ continue; \
10139+ } \
10140+ { \
10141+ stbir__simdf tt; \
10142+ stbir__simdf_0123to2301(tt, t); \
10143+ stbir__simdf_store2(output - 3, t); \
10144+ stbir__simdf_store1(output + 2 - 3, tt); \
10145+ } \
10146+ break;
10147
10148 #else
10149
10150-#define stbir__4_coeff_start() \
10151- stbir__simdf tot0,tot1,tot2,c,cs; \
10152- STBIR_SIMD_NO_UNROLL(decode); \
10153- stbir__simdf_load( cs, hc ); \
10154- stbir__simdf_0123to0001( c, cs ); \
10155- stbir__simdf_mult_mem( tot0, c, decode ); \
10156- stbir__simdf_0123to1122( c, cs ); \
10157- stbir__simdf_mult_mem( tot1, c, decode+4 ); \
10158- stbir__simdf_0123to2333( c, cs ); \
10159- stbir__simdf_mult_mem( tot2, c, decode+8 );
10160-
10161-#define stbir__4_coeff_continue_from_4( ofs ) \
10162- STBIR_SIMD_NO_UNROLL(decode); \
10163- stbir__simdf_load( cs, hc + (ofs) ); \
10164- stbir__simdf_0123to0001( c, cs ); \
10165- stbir__simdf_madd_mem( tot0, tot0, c, decode+(ofs)*3 ); \
10166- stbir__simdf_0123to1122( c, cs ); \
10167- stbir__simdf_madd_mem( tot1, tot1, c, decode+(ofs)*3+4 ); \
10168- stbir__simdf_0123to2333( c, cs ); \
10169- stbir__simdf_madd_mem( tot2, tot2, c, decode+(ofs)*3+8 );
10170-
10171-#define stbir__1_coeff_remnant( ofs ) \
10172- STBIR_SIMD_NO_UNROLL(decode); \
10173- stbir__simdf_load1z( c, hc + (ofs) ); \
10174- stbir__simdf_0123to0001( c, c ); \
10175- stbir__simdf_madd_mem( tot0, tot0, c, decode+(ofs)*3 );
10176-
10177-#define stbir__2_coeff_remnant( ofs ) \
10178- { stbir__simdf d; \
10179- STBIR_SIMD_NO_UNROLL(decode); \
10180- stbir__simdf_load2z( cs, hc + (ofs) ); \
10181- stbir__simdf_0123to0001( c, cs ); \
10182- stbir__simdf_madd_mem( tot0, tot0, c, decode+(ofs)*3 ); \
10183- stbir__simdf_0123to1122( c, cs ); \
10184- stbir__simdf_load2z( d, decode+(ofs)*3+4 ); \
10185- stbir__simdf_madd( tot1, tot1, c, d ); }
10186-
10187-#define stbir__3_coeff_remnant( ofs ) \
10188- { stbir__simdf d; \
10189- STBIR_SIMD_NO_UNROLL(decode); \
10190- stbir__simdf_load( cs, hc + (ofs) ); \
10191- stbir__simdf_0123to0001( c, cs ); \
10192- stbir__simdf_madd_mem( tot0, tot0, c, decode+(ofs)*3 ); \
10193- stbir__simdf_0123to1122( c, cs ); \
10194- stbir__simdf_madd_mem( tot1, tot1, c, decode+(ofs)*3+4 ); \
10195- stbir__simdf_0123to2222( c, cs ); \
10196- stbir__simdf_load1z( d, decode+(ofs)*3+8 ); \
10197- stbir__simdf_madd( tot2, tot2, c, d ); }
10198-
10199-#define stbir__store_output() \
10200- stbir__simdf_0123ABCDto3ABx( c, tot0, tot1 ); \
10201- stbir__simdf_0123ABCDto23Ax( cs, tot1, tot2 ); \
10202- stbir__simdf_0123to1230( tot2, tot2 ); \
10203- stbir__simdf_add( tot0, tot0, cs ); \
10204- stbir__simdf_add( c, c, tot2 ); \
10205- stbir__simdf_add( tot0, tot0, c ); \
10206- horizontal_coefficients += coefficient_width; \
10207- ++horizontal_contributors; \
10208- output += 3; \
10209- if ( output < output_end ) \
10210- { \
10211- stbir__simdf_store( output-3, tot0 ); \
10212- continue; \
10213- } \
10214- stbir__simdf_0123to2301( tot1, tot0 ); \
10215- stbir__simdf_store2( output-3, tot0 ); \
10216- stbir__simdf_store1( output+2-3, tot1 ); \
10217- break;
10218+#define stbir__4_coeff_start() \
10219+ stbir__simdf tot0, tot1, tot2, c, cs; \
10220+ STBIR_SIMD_NO_UNROLL(decode); \
10221+ stbir__simdf_load(cs, hc); \
10222+ stbir__simdf_0123to0001(c, cs); \
10223+ stbir__simdf_mult_mem(tot0, c, decode); \
10224+ stbir__simdf_0123to1122(c, cs); \
10225+ stbir__simdf_mult_mem(tot1, c, decode + 4); \
10226+ stbir__simdf_0123to2333(c, cs); \
10227+ stbir__simdf_mult_mem(tot2, c, decode + 8);
10228+
10229+#define stbir__4_coeff_continue_from_4(ofs) \
10230+ STBIR_SIMD_NO_UNROLL(decode); \
10231+ stbir__simdf_load(cs, hc + (ofs)); \
10232+ stbir__simdf_0123to0001(c, cs); \
10233+ stbir__simdf_madd_mem(tot0, tot0, c, decode + (ofs) * 3); \
10234+ stbir__simdf_0123to1122(c, cs); \
10235+ stbir__simdf_madd_mem(tot1, tot1, c, decode + (ofs) * 3 + 4); \
10236+ stbir__simdf_0123to2333(c, cs); \
10237+ stbir__simdf_madd_mem(tot2, tot2, c, decode + (ofs) * 3 + 8);
10238+
10239+#define stbir__1_coeff_remnant(ofs) \
10240+ STBIR_SIMD_NO_UNROLL(decode); \
10241+ stbir__simdf_load1z(c, hc + (ofs)); \
10242+ stbir__simdf_0123to0001(c, c); \
10243+ stbir__simdf_madd_mem(tot0, tot0, c, decode + (ofs) * 3);
10244+
10245+#define stbir__2_coeff_remnant(ofs) \
10246+ { \
10247+ stbir__simdf d; \
10248+ STBIR_SIMD_NO_UNROLL(decode); \
10249+ stbir__simdf_load2z(cs, hc + (ofs)); \
10250+ stbir__simdf_0123to0001(c, cs); \
10251+ stbir__simdf_madd_mem(tot0, tot0, c, decode + (ofs) * 3); \
10252+ stbir__simdf_0123to1122(c, cs); \
10253+ stbir__simdf_load2z(d, decode + (ofs) * 3 + 4); \
10254+ stbir__simdf_madd(tot1, tot1, c, d); \
10255+ }
10256+
10257+#define stbir__3_coeff_remnant(ofs) \
10258+ { \
10259+ stbir__simdf d; \
10260+ STBIR_SIMD_NO_UNROLL(decode); \
10261+ stbir__simdf_load(cs, hc + (ofs)); \
10262+ stbir__simdf_0123to0001(c, cs); \
10263+ stbir__simdf_madd_mem(tot0, tot0, c, decode + (ofs) * 3); \
10264+ stbir__simdf_0123to1122(c, cs); \
10265+ stbir__simdf_madd_mem(tot1, tot1, c, decode + (ofs) * 3 + 4); \
10266+ stbir__simdf_0123to2222(c, cs); \
10267+ stbir__simdf_load1z(d, decode + (ofs) * 3 + 8); \
10268+ stbir__simdf_madd(tot2, tot2, c, d); \
10269+ }
10270+
10271+#define stbir__store_output() \
10272+ stbir__simdf_0123ABCDto3ABx(c, tot0, tot1); \
10273+ stbir__simdf_0123ABCDto23Ax(cs, tot1, tot2); \
10274+ stbir__simdf_0123to1230(tot2, tot2); \
10275+ stbir__simdf_add(tot0, tot0, cs); \
10276+ stbir__simdf_add(c, c, tot2); \
10277+ stbir__simdf_add(tot0, tot0, c); \
10278+ horizontal_coefficients += coefficient_width; \
10279+ ++horizontal_contributors; \
10280+ output += 3; \
10281+ if (output < output_end) { \
10282+ stbir__simdf_store(output - 3, tot0); \
10283+ continue; \
10284+ } \
10285+ stbir__simdf_0123to2301(tot1, tot0); \
10286+ stbir__simdf_store2(output - 3, tot0); \
10287+ stbir__simdf_store1(output + 2 - 3, tot1); \
10288+ break;
10289
10290 #endif
10291
10292 #else
10293
10294-#define stbir__1_coeff_only() \
10295- float tot0, tot1, tot2, c; \
10296- c = hc[0]; \
10297- tot0 = decode[0]*c; \
10298- tot1 = decode[1]*c; \
10299- tot2 = decode[2]*c;
10300-
10301-#define stbir__2_coeff_only() \
10302- float tot0, tot1, tot2, c; \
10303- c = hc[0]; \
10304- tot0 = decode[0]*c; \
10305- tot1 = decode[1]*c; \
10306- tot2 = decode[2]*c; \
10307- c = hc[1]; \
10308- tot0 += decode[3]*c; \
10309- tot1 += decode[4]*c; \
10310- tot2 += decode[5]*c;
10311-
10312-#define stbir__3_coeff_only() \
10313- float tot0, tot1, tot2, c; \
10314- c = hc[0]; \
10315- tot0 = decode[0]*c; \
10316- tot1 = decode[1]*c; \
10317- tot2 = decode[2]*c; \
10318- c = hc[1]; \
10319- tot0 += decode[3]*c; \
10320- tot1 += decode[4]*c; \
10321- tot2 += decode[5]*c; \
10322- c = hc[2]; \
10323- tot0 += decode[6]*c; \
10324- tot1 += decode[7]*c; \
10325- tot2 += decode[8]*c;
10326-
10327-#define stbir__store_output_tiny() \
10328- output[0] = tot0; \
10329- output[1] = tot1; \
10330- output[2] = tot2; \
10331- horizontal_coefficients += coefficient_width; \
10332- ++horizontal_contributors; \
10333- output += 3;
10334-
10335-#define stbir__4_coeff_start() \
10336- float tota0,tota1,tota2,totb0,totb1,totb2,totc0,totc1,totc2,totd0,totd1,totd2,c; \
10337- c = hc[0]; \
10338- tota0 = decode[0]*c; \
10339- tota1 = decode[1]*c; \
10340- tota2 = decode[2]*c; \
10341- c = hc[1]; \
10342- totb0 = decode[3]*c; \
10343- totb1 = decode[4]*c; \
10344- totb2 = decode[5]*c; \
10345- c = hc[2]; \
10346- totc0 = decode[6]*c; \
10347- totc1 = decode[7]*c; \
10348- totc2 = decode[8]*c; \
10349- c = hc[3]; \
10350- totd0 = decode[9]*c; \
10351- totd1 = decode[10]*c; \
10352- totd2 = decode[11]*c;
10353-
10354-#define stbir__4_coeff_continue_from_4( ofs ) \
10355- c = hc[0+(ofs)]; \
10356- tota0 += decode[0+(ofs)*3]*c; \
10357- tota1 += decode[1+(ofs)*3]*c; \
10358- tota2 += decode[2+(ofs)*3]*c; \
10359- c = hc[1+(ofs)]; \
10360- totb0 += decode[3+(ofs)*3]*c; \
10361- totb1 += decode[4+(ofs)*3]*c; \
10362- totb2 += decode[5+(ofs)*3]*c; \
10363- c = hc[2+(ofs)]; \
10364- totc0 += decode[6+(ofs)*3]*c; \
10365- totc1 += decode[7+(ofs)*3]*c; \
10366- totc2 += decode[8+(ofs)*3]*c; \
10367- c = hc[3+(ofs)]; \
10368- totd0 += decode[9+(ofs)*3]*c; \
10369- totd1 += decode[10+(ofs)*3]*c; \
10370- totd2 += decode[11+(ofs)*3]*c;
10371-
10372-#define stbir__1_coeff_remnant( ofs ) \
10373- c = hc[0+(ofs)]; \
10374- tota0 += decode[0+(ofs)*3]*c; \
10375- tota1 += decode[1+(ofs)*3]*c; \
10376- tota2 += decode[2+(ofs)*3]*c;
10377-
10378-#define stbir__2_coeff_remnant( ofs ) \
10379- c = hc[0+(ofs)]; \
10380- tota0 += decode[0+(ofs)*3]*c; \
10381- tota1 += decode[1+(ofs)*3]*c; \
10382- tota2 += decode[2+(ofs)*3]*c; \
10383- c = hc[1+(ofs)]; \
10384- totb0 += decode[3+(ofs)*3]*c; \
10385- totb1 += decode[4+(ofs)*3]*c; \
10386- totb2 += decode[5+(ofs)*3]*c; \
10387-
10388-#define stbir__3_coeff_remnant( ofs ) \
10389- c = hc[0+(ofs)]; \
10390- tota0 += decode[0+(ofs)*3]*c; \
10391- tota1 += decode[1+(ofs)*3]*c; \
10392- tota2 += decode[2+(ofs)*3]*c; \
10393- c = hc[1+(ofs)]; \
10394- totb0 += decode[3+(ofs)*3]*c; \
10395- totb1 += decode[4+(ofs)*3]*c; \
10396- totb2 += decode[5+(ofs)*3]*c; \
10397- c = hc[2+(ofs)]; \
10398- totc0 += decode[6+(ofs)*3]*c; \
10399- totc1 += decode[7+(ofs)*3]*c; \
10400- totc2 += decode[8+(ofs)*3]*c;
10401-
10402-#define stbir__store_output() \
10403- output[0] = (tota0+totc0)+(totb0+totd0); \
10404- output[1] = (tota1+totc1)+(totb1+totd1); \
10405- output[2] = (tota2+totc2)+(totb2+totd2); \
10406- horizontal_coefficients += coefficient_width; \
10407- ++horizontal_contributors; \
10408- output += 3;
10409+#define stbir__1_coeff_only() \
10410+ float tot0, tot1, tot2, c; \
10411+ c = hc[0]; \
10412+ tot0 = decode[0] * c; \
10413+ tot1 = decode[1] * c; \
10414+ tot2 = decode[2] * c;
10415+
10416+#define stbir__2_coeff_only() \
10417+ float tot0, tot1, tot2, c; \
10418+ c = hc[0]; \
10419+ tot0 = decode[0] * c; \
10420+ tot1 = decode[1] * c; \
10421+ tot2 = decode[2] * c; \
10422+ c = hc[1]; \
10423+ tot0 += decode[3] * c; \
10424+ tot1 += decode[4] * c; \
10425+ tot2 += decode[5] * c;
10426+
10427+#define stbir__3_coeff_only() \
10428+ float tot0, tot1, tot2, c; \
10429+ c = hc[0]; \
10430+ tot0 = decode[0] * c; \
10431+ tot1 = decode[1] * c; \
10432+ tot2 = decode[2] * c; \
10433+ c = hc[1]; \
10434+ tot0 += decode[3] * c; \
10435+ tot1 += decode[4] * c; \
10436+ tot2 += decode[5] * c; \
10437+ c = hc[2]; \
10438+ tot0 += decode[6] * c; \
10439+ tot1 += decode[7] * c; \
10440+ tot2 += decode[8] * c;
10441+
10442+#define stbir__store_output_tiny() \
10443+ output[0] = tot0; \
10444+ output[1] = tot1; \
10445+ output[2] = tot2; \
10446+ horizontal_coefficients += coefficient_width; \
10447+ ++horizontal_contributors; \
10448+ output += 3;
10449+
10450+#define stbir__4_coeff_start() \
10451+ float tota0, tota1, tota2, totb0, totb1, totb2, totc0, totc1, totc2, \
10452+ totd0, totd1, totd2, c; \
10453+ c = hc[0]; \
10454+ tota0 = decode[0] * c; \
10455+ tota1 = decode[1] * c; \
10456+ tota2 = decode[2] * c; \
10457+ c = hc[1]; \
10458+ totb0 = decode[3] * c; \
10459+ totb1 = decode[4] * c; \
10460+ totb2 = decode[5] * c; \
10461+ c = hc[2]; \
10462+ totc0 = decode[6] * c; \
10463+ totc1 = decode[7] * c; \
10464+ totc2 = decode[8] * c; \
10465+ c = hc[3]; \
10466+ totd0 = decode[9] * c; \
10467+ totd1 = decode[10] * c; \
10468+ totd2 = decode[11] * c;
10469+
10470+#define stbir__4_coeff_continue_from_4(ofs) \
10471+ c = hc[0 + (ofs)]; \
10472+ tota0 += decode[0 + (ofs) * 3] * c; \
10473+ tota1 += decode[1 + (ofs) * 3] * c; \
10474+ tota2 += decode[2 + (ofs) * 3] * c; \
10475+ c = hc[1 + (ofs)]; \
10476+ totb0 += decode[3 + (ofs) * 3] * c; \
10477+ totb1 += decode[4 + (ofs) * 3] * c; \
10478+ totb2 += decode[5 + (ofs) * 3] * c; \
10479+ c = hc[2 + (ofs)]; \
10480+ totc0 += decode[6 + (ofs) * 3] * c; \
10481+ totc1 += decode[7 + (ofs) * 3] * c; \
10482+ totc2 += decode[8 + (ofs) * 3] * c; \
10483+ c = hc[3 + (ofs)]; \
10484+ totd0 += decode[9 + (ofs) * 3] * c; \
10485+ totd1 += decode[10 + (ofs) * 3] * c; \
10486+ totd2 += decode[11 + (ofs) * 3] * c;
10487+
10488+#define stbir__1_coeff_remnant(ofs) \
10489+ c = hc[0 + (ofs)]; \
10490+ tota0 += decode[0 + (ofs) * 3] * c; \
10491+ tota1 += decode[1 + (ofs) * 3] * c; \
10492+ tota2 += decode[2 + (ofs) * 3] * c;
10493+
10494+#define stbir__2_coeff_remnant(ofs) \
10495+ c = hc[0 + (ofs)]; \
10496+ tota0 += decode[0 + (ofs) * 3] * c; \
10497+ tota1 += decode[1 + (ofs) * 3] * c; \
10498+ tota2 += decode[2 + (ofs) * 3] * c; \
10499+ c = hc[1 + (ofs)]; \
10500+ totb0 += decode[3 + (ofs) * 3] * c; \
10501+ totb1 += decode[4 + (ofs) * 3] * c; \
10502+ totb2 += decode[5 + (ofs) * 3] * c;
10503+
10504+#define stbir__3_coeff_remnant(ofs) \
10505+ c = hc[0 + (ofs)]; \
10506+ tota0 += decode[0 + (ofs) * 3] * c; \
10507+ tota1 += decode[1 + (ofs) * 3] * c; \
10508+ tota2 += decode[2 + (ofs) * 3] * c; \
10509+ c = hc[1 + (ofs)]; \
10510+ totb0 += decode[3 + (ofs) * 3] * c; \
10511+ totb1 += decode[4 + (ofs) * 3] * c; \
10512+ totb2 += decode[5 + (ofs) * 3] * c; \
10513+ c = hc[2 + (ofs)]; \
10514+ totc0 += decode[6 + (ofs) * 3] * c; \
10515+ totc1 += decode[7 + (ofs) * 3] * c; \
10516+ totc2 += decode[8 + (ofs) * 3] * c;
10517+
10518+#define stbir__store_output() \
10519+ output[0] = (tota0 + totc0) + (totb0 + totd0); \
10520+ output[1] = (tota1 + totc1) + (totb1 + totd1); \
10521+ output[2] = (tota2 + totc2) + (totb2 + totd2); \
10522+ horizontal_coefficients += coefficient_width; \
10523+ ++horizontal_contributors; \
10524+ output += 3;
10525
10526 #endif
10527
10528@@ -5358,291 +6250,292 @@ static void stbir__decode_scanline(stbir__info const * stbir_info, int n, float
10529
10530 #ifdef STBIR_SIMD
10531
10532-#define stbir__1_coeff_only() \
10533- stbir__simdf tot,c; \
10534- STBIR_SIMD_NO_UNROLL(decode); \
10535- stbir__simdf_load1( c, hc ); \
10536- stbir__simdf_0123to0000( c, c ); \
10537- stbir__simdf_mult_mem( tot, c, decode );
10538-
10539-#define stbir__2_coeff_only() \
10540- stbir__simdf tot,c,cs; \
10541- STBIR_SIMD_NO_UNROLL(decode); \
10542- stbir__simdf_load2( cs, hc ); \
10543- stbir__simdf_0123to0000( c, cs ); \
10544- stbir__simdf_mult_mem( tot, c, decode ); \
10545- stbir__simdf_0123to1111( c, cs ); \
10546- stbir__simdf_madd_mem( tot, tot, c, decode+4 );
10547-
10548-#define stbir__3_coeff_only() \
10549- stbir__simdf tot,c,cs; \
10550- STBIR_SIMD_NO_UNROLL(decode); \
10551- stbir__simdf_load( cs, hc ); \
10552- stbir__simdf_0123to0000( c, cs ); \
10553- stbir__simdf_mult_mem( tot, c, decode ); \
10554- stbir__simdf_0123to1111( c, cs ); \
10555- stbir__simdf_madd_mem( tot, tot, c, decode+4 ); \
10556- stbir__simdf_0123to2222( c, cs ); \
10557- stbir__simdf_madd_mem( tot, tot, c, decode+8 );
10558-
10559-#define stbir__store_output_tiny() \
10560- stbir__simdf_store( output, tot ); \
10561- horizontal_coefficients += coefficient_width; \
10562- ++horizontal_contributors; \
10563- output += 4;
10564+#define stbir__1_coeff_only() \
10565+ stbir__simdf tot, c; \
10566+ STBIR_SIMD_NO_UNROLL(decode); \
10567+ stbir__simdf_load1(c, hc); \
10568+ stbir__simdf_0123to0000(c, c); \
10569+ stbir__simdf_mult_mem(tot, c, decode);
10570+
10571+#define stbir__2_coeff_only() \
10572+ stbir__simdf tot, c, cs; \
10573+ STBIR_SIMD_NO_UNROLL(decode); \
10574+ stbir__simdf_load2(cs, hc); \
10575+ stbir__simdf_0123to0000(c, cs); \
10576+ stbir__simdf_mult_mem(tot, c, decode); \
10577+ stbir__simdf_0123to1111(c, cs); \
10578+ stbir__simdf_madd_mem(tot, tot, c, decode + 4);
10579+
10580+#define stbir__3_coeff_only() \
10581+ stbir__simdf tot, c, cs; \
10582+ STBIR_SIMD_NO_UNROLL(decode); \
10583+ stbir__simdf_load(cs, hc); \
10584+ stbir__simdf_0123to0000(c, cs); \
10585+ stbir__simdf_mult_mem(tot, c, decode); \
10586+ stbir__simdf_0123to1111(c, cs); \
10587+ stbir__simdf_madd_mem(tot, tot, c, decode + 4); \
10588+ stbir__simdf_0123to2222(c, cs); \
10589+ stbir__simdf_madd_mem(tot, tot, c, decode + 8);
10590+
10591+#define stbir__store_output_tiny() \
10592+ stbir__simdf_store(output, tot); \
10593+ horizontal_coefficients += coefficient_width; \
10594+ ++horizontal_contributors; \
10595+ output += 4;
10596
10597 #ifdef STBIR_SIMD8
10598
10599-#define stbir__4_coeff_start() \
10600- stbir__simdf8 tot0,c,cs; stbir__simdf t; \
10601- STBIR_SIMD_NO_UNROLL(decode); \
10602- stbir__simdf8_load4b( cs, hc ); \
10603- stbir__simdf8_0123to00001111( c, cs ); \
10604- stbir__simdf8_mult_mem( tot0, c, decode ); \
10605- stbir__simdf8_0123to22223333( c, cs ); \
10606- stbir__simdf8_madd_mem( tot0, tot0, c, decode+8 );
10607-
10608-#define stbir__4_coeff_continue_from_4( ofs ) \
10609- STBIR_SIMD_NO_UNROLL(decode); \
10610- stbir__simdf8_load4b( cs, hc + (ofs) ); \
10611- stbir__simdf8_0123to00001111( c, cs ); \
10612- stbir__simdf8_madd_mem( tot0, tot0, c, decode+(ofs)*4 ); \
10613- stbir__simdf8_0123to22223333( c, cs ); \
10614- stbir__simdf8_madd_mem( tot0, tot0, c, decode+(ofs)*4+8 );
10615-
10616-#define stbir__1_coeff_remnant( ofs ) \
10617- STBIR_SIMD_NO_UNROLL(decode); \
10618- stbir__simdf_load1rep4( t, hc + (ofs) ); \
10619- stbir__simdf8_madd_mem4( tot0, tot0, t, decode+(ofs)*4 );
10620-
10621-#define stbir__2_coeff_remnant( ofs ) \
10622- STBIR_SIMD_NO_UNROLL(decode); \
10623- stbir__simdf8_load4b( cs, hc + (ofs) - 2 ); \
10624- stbir__simdf8_0123to22223333( c, cs ); \
10625- stbir__simdf8_madd_mem( tot0, tot0, c, decode+(ofs)*4 );
10626-
10627- #define stbir__3_coeff_remnant( ofs ) \
10628- STBIR_SIMD_NO_UNROLL(decode); \
10629- stbir__simdf8_load4b( cs, hc + (ofs) ); \
10630- stbir__simdf8_0123to00001111( c, cs ); \
10631- stbir__simdf8_madd_mem( tot0, tot0, c, decode+(ofs)*4 ); \
10632- stbir__simdf8_0123to2222( t, cs ); \
10633- stbir__simdf8_madd_mem4( tot0, tot0, t, decode+(ofs)*4+8 );
10634-
10635-#define stbir__store_output() \
10636- stbir__simdf8_add4halves( t, stbir__if_simdf8_cast_to_simdf4(tot0), tot0 ); \
10637- stbir__simdf_store( output, t ); \
10638- horizontal_coefficients += coefficient_width; \
10639- ++horizontal_contributors; \
10640- output += 4;
10641+#define stbir__4_coeff_start() \
10642+ stbir__simdf8 tot0, c, cs; \
10643+ stbir__simdf t; \
10644+ STBIR_SIMD_NO_UNROLL(decode); \
10645+ stbir__simdf8_load4b(cs, hc); \
10646+ stbir__simdf8_0123to00001111(c, cs); \
10647+ stbir__simdf8_mult_mem(tot0, c, decode); \
10648+ stbir__simdf8_0123to22223333(c, cs); \
10649+ stbir__simdf8_madd_mem(tot0, tot0, c, decode + 8);
10650+
10651+#define stbir__4_coeff_continue_from_4(ofs) \
10652+ STBIR_SIMD_NO_UNROLL(decode); \
10653+ stbir__simdf8_load4b(cs, hc + (ofs)); \
10654+ stbir__simdf8_0123to00001111(c, cs); \
10655+ stbir__simdf8_madd_mem(tot0, tot0, c, decode + (ofs) * 4); \
10656+ stbir__simdf8_0123to22223333(c, cs); \
10657+ stbir__simdf8_madd_mem(tot0, tot0, c, decode + (ofs) * 4 + 8);
10658+
10659+#define stbir__1_coeff_remnant(ofs) \
10660+ STBIR_SIMD_NO_UNROLL(decode); \
10661+ stbir__simdf_load1rep4(t, hc + (ofs)); \
10662+ stbir__simdf8_madd_mem4(tot0, tot0, t, decode + (ofs) * 4);
10663+
10664+#define stbir__2_coeff_remnant(ofs) \
10665+ STBIR_SIMD_NO_UNROLL(decode); \
10666+ stbir__simdf8_load4b(cs, hc + (ofs) - 2); \
10667+ stbir__simdf8_0123to22223333(c, cs); \
10668+ stbir__simdf8_madd_mem(tot0, tot0, c, decode + (ofs) * 4);
10669+
10670+#define stbir__3_coeff_remnant(ofs) \
10671+ STBIR_SIMD_NO_UNROLL(decode); \
10672+ stbir__simdf8_load4b(cs, hc + (ofs)); \
10673+ stbir__simdf8_0123to00001111(c, cs); \
10674+ stbir__simdf8_madd_mem(tot0, tot0, c, decode + (ofs) * 4); \
10675+ stbir__simdf8_0123to2222(t, cs); \
10676+ stbir__simdf8_madd_mem4(tot0, tot0, t, decode + (ofs) * 4 + 8);
10677+
10678+#define stbir__store_output() \
10679+ stbir__simdf8_add4halves(t, stbir__if_simdf8_cast_to_simdf4(tot0), tot0); \
10680+ stbir__simdf_store(output, t); \
10681+ horizontal_coefficients += coefficient_width; \
10682+ ++horizontal_contributors; \
10683+ output += 4;
10684
10685 #else
10686
10687-#define stbir__4_coeff_start() \
10688- stbir__simdf tot0,tot1,c,cs; \
10689- STBIR_SIMD_NO_UNROLL(decode); \
10690- stbir__simdf_load( cs, hc ); \
10691- stbir__simdf_0123to0000( c, cs ); \
10692- stbir__simdf_mult_mem( tot0, c, decode ); \
10693- stbir__simdf_0123to1111( c, cs ); \
10694- stbir__simdf_mult_mem( tot1, c, decode+4 ); \
10695- stbir__simdf_0123to2222( c, cs ); \
10696- stbir__simdf_madd_mem( tot0, tot0, c, decode+8 ); \
10697- stbir__simdf_0123to3333( c, cs ); \
10698- stbir__simdf_madd_mem( tot1, tot1, c, decode+12 );
10699-
10700-#define stbir__4_coeff_continue_from_4( ofs ) \
10701- STBIR_SIMD_NO_UNROLL(decode); \
10702- stbir__simdf_load( cs, hc + (ofs) ); \
10703- stbir__simdf_0123to0000( c, cs ); \
10704- stbir__simdf_madd_mem( tot0, tot0, c, decode+(ofs)*4 ); \
10705- stbir__simdf_0123to1111( c, cs ); \
10706- stbir__simdf_madd_mem( tot1, tot1, c, decode+(ofs)*4+4 ); \
10707- stbir__simdf_0123to2222( c, cs ); \
10708- stbir__simdf_madd_mem( tot0, tot0, c, decode+(ofs)*4+8 ); \
10709- stbir__simdf_0123to3333( c, cs ); \
10710- stbir__simdf_madd_mem( tot1, tot1, c, decode+(ofs)*4+12 );
10711-
10712-#define stbir__1_coeff_remnant( ofs ) \
10713- STBIR_SIMD_NO_UNROLL(decode); \
10714- stbir__simdf_load1( c, hc + (ofs) ); \
10715- stbir__simdf_0123to0000( c, c ); \
10716- stbir__simdf_madd_mem( tot0, tot0, c, decode+(ofs)*4 );
10717-
10718-#define stbir__2_coeff_remnant( ofs ) \
10719- STBIR_SIMD_NO_UNROLL(decode); \
10720- stbir__simdf_load2( cs, hc + (ofs) ); \
10721- stbir__simdf_0123to0000( c, cs ); \
10722- stbir__simdf_madd_mem( tot0, tot0, c, decode+(ofs)*4 ); \
10723- stbir__simdf_0123to1111( c, cs ); \
10724- stbir__simdf_madd_mem( tot1, tot1, c, decode+(ofs)*4+4 );
10725-
10726-#define stbir__3_coeff_remnant( ofs ) \
10727- STBIR_SIMD_NO_UNROLL(decode); \
10728- stbir__simdf_load( cs, hc + (ofs) ); \
10729- stbir__simdf_0123to0000( c, cs ); \
10730- stbir__simdf_madd_mem( tot0, tot0, c, decode+(ofs)*4 ); \
10731- stbir__simdf_0123to1111( c, cs ); \
10732- stbir__simdf_madd_mem( tot1, tot1, c, decode+(ofs)*4+4 ); \
10733- stbir__simdf_0123to2222( c, cs ); \
10734- stbir__simdf_madd_mem( tot0, tot0, c, decode+(ofs)*4+8 );
10735-
10736-#define stbir__store_output() \
10737- stbir__simdf_add( tot0, tot0, tot1 ); \
10738- stbir__simdf_store( output, tot0 ); \
10739- horizontal_coefficients += coefficient_width; \
10740- ++horizontal_contributors; \
10741- output += 4;
10742+#define stbir__4_coeff_start() \
10743+ stbir__simdf tot0, tot1, c, cs; \
10744+ STBIR_SIMD_NO_UNROLL(decode); \
10745+ stbir__simdf_load(cs, hc); \
10746+ stbir__simdf_0123to0000(c, cs); \
10747+ stbir__simdf_mult_mem(tot0, c, decode); \
10748+ stbir__simdf_0123to1111(c, cs); \
10749+ stbir__simdf_mult_mem(tot1, c, decode + 4); \
10750+ stbir__simdf_0123to2222(c, cs); \
10751+ stbir__simdf_madd_mem(tot0, tot0, c, decode + 8); \
10752+ stbir__simdf_0123to3333(c, cs); \
10753+ stbir__simdf_madd_mem(tot1, tot1, c, decode + 12);
10754+
10755+#define stbir__4_coeff_continue_from_4(ofs) \
10756+ STBIR_SIMD_NO_UNROLL(decode); \
10757+ stbir__simdf_load(cs, hc + (ofs)); \
10758+ stbir__simdf_0123to0000(c, cs); \
10759+ stbir__simdf_madd_mem(tot0, tot0, c, decode + (ofs) * 4); \
10760+ stbir__simdf_0123to1111(c, cs); \
10761+ stbir__simdf_madd_mem(tot1, tot1, c, decode + (ofs) * 4 + 4); \
10762+ stbir__simdf_0123to2222(c, cs); \
10763+ stbir__simdf_madd_mem(tot0, tot0, c, decode + (ofs) * 4 + 8); \
10764+ stbir__simdf_0123to3333(c, cs); \
10765+ stbir__simdf_madd_mem(tot1, tot1, c, decode + (ofs) * 4 + 12);
10766+
10767+#define stbir__1_coeff_remnant(ofs) \
10768+ STBIR_SIMD_NO_UNROLL(decode); \
10769+ stbir__simdf_load1(c, hc + (ofs)); \
10770+ stbir__simdf_0123to0000(c, c); \
10771+ stbir__simdf_madd_mem(tot0, tot0, c, decode + (ofs) * 4);
10772+
10773+#define stbir__2_coeff_remnant(ofs) \
10774+ STBIR_SIMD_NO_UNROLL(decode); \
10775+ stbir__simdf_load2(cs, hc + (ofs)); \
10776+ stbir__simdf_0123to0000(c, cs); \
10777+ stbir__simdf_madd_mem(tot0, tot0, c, decode + (ofs) * 4); \
10778+ stbir__simdf_0123to1111(c, cs); \
10779+ stbir__simdf_madd_mem(tot1, tot1, c, decode + (ofs) * 4 + 4);
10780+
10781+#define stbir__3_coeff_remnant(ofs) \
10782+ STBIR_SIMD_NO_UNROLL(decode); \
10783+ stbir__simdf_load(cs, hc + (ofs)); \
10784+ stbir__simdf_0123to0000(c, cs); \
10785+ stbir__simdf_madd_mem(tot0, tot0, c, decode + (ofs) * 4); \
10786+ stbir__simdf_0123to1111(c, cs); \
10787+ stbir__simdf_madd_mem(tot1, tot1, c, decode + (ofs) * 4 + 4); \
10788+ stbir__simdf_0123to2222(c, cs); \
10789+ stbir__simdf_madd_mem(tot0, tot0, c, decode + (ofs) * 4 + 8);
10790+
10791+#define stbir__store_output() \
10792+ stbir__simdf_add(tot0, tot0, tot1); \
10793+ stbir__simdf_store(output, tot0); \
10794+ horizontal_coefficients += coefficient_width; \
10795+ ++horizontal_contributors; \
10796+ output += 4;
10797
10798 #endif
10799
10800 #else
10801
10802-#define stbir__1_coeff_only() \
10803- float p0,p1,p2,p3,c; \
10804- STBIR_SIMD_NO_UNROLL(decode); \
10805- c = hc[0]; \
10806- p0 = decode[0] * c; \
10807- p1 = decode[1] * c; \
10808- p2 = decode[2] * c; \
10809- p3 = decode[3] * c;
10810-
10811-#define stbir__2_coeff_only() \
10812- float p0,p1,p2,p3,c; \
10813- STBIR_SIMD_NO_UNROLL(decode); \
10814- c = hc[0]; \
10815- p0 = decode[0] * c; \
10816- p1 = decode[1] * c; \
10817- p2 = decode[2] * c; \
10818- p3 = decode[3] * c; \
10819- c = hc[1]; \
10820- p0 += decode[4] * c; \
10821- p1 += decode[5] * c; \
10822- p2 += decode[6] * c; \
10823- p3 += decode[7] * c;
10824-
10825-#define stbir__3_coeff_only() \
10826- float p0,p1,p2,p3,c; \
10827- STBIR_SIMD_NO_UNROLL(decode); \
10828- c = hc[0]; \
10829- p0 = decode[0] * c; \
10830- p1 = decode[1] * c; \
10831- p2 = decode[2] * c; \
10832- p3 = decode[3] * c; \
10833- c = hc[1]; \
10834- p0 += decode[4] * c; \
10835- p1 += decode[5] * c; \
10836- p2 += decode[6] * c; \
10837- p3 += decode[7] * c; \
10838- c = hc[2]; \
10839- p0 += decode[8] * c; \
10840- p1 += decode[9] * c; \
10841- p2 += decode[10] * c; \
10842- p3 += decode[11] * c;
10843-
10844-#define stbir__store_output_tiny() \
10845- output[0] = p0; \
10846- output[1] = p1; \
10847- output[2] = p2; \
10848- output[3] = p3; \
10849- horizontal_coefficients += coefficient_width; \
10850- ++horizontal_contributors; \
10851- output += 4;
10852-
10853-#define stbir__4_coeff_start() \
10854- float x0,x1,x2,x3,y0,y1,y2,y3,c; \
10855- STBIR_SIMD_NO_UNROLL(decode); \
10856- c = hc[0]; \
10857- x0 = decode[0] * c; \
10858- x1 = decode[1] * c; \
10859- x2 = decode[2] * c; \
10860- x3 = decode[3] * c; \
10861- c = hc[1]; \
10862- y0 = decode[4] * c; \
10863- y1 = decode[5] * c; \
10864- y2 = decode[6] * c; \
10865- y3 = decode[7] * c; \
10866- c = hc[2]; \
10867- x0 += decode[8] * c; \
10868- x1 += decode[9] * c; \
10869- x2 += decode[10] * c; \
10870- x3 += decode[11] * c; \
10871- c = hc[3]; \
10872- y0 += decode[12] * c; \
10873- y1 += decode[13] * c; \
10874- y2 += decode[14] * c; \
10875- y3 += decode[15] * c;
10876-
10877-#define stbir__4_coeff_continue_from_4( ofs ) \
10878- STBIR_SIMD_NO_UNROLL(decode); \
10879- c = hc[0+(ofs)]; \
10880- x0 += decode[0+(ofs)*4] * c; \
10881- x1 += decode[1+(ofs)*4] * c; \
10882- x2 += decode[2+(ofs)*4] * c; \
10883- x3 += decode[3+(ofs)*4] * c; \
10884- c = hc[1+(ofs)]; \
10885- y0 += decode[4+(ofs)*4] * c; \
10886- y1 += decode[5+(ofs)*4] * c; \
10887- y2 += decode[6+(ofs)*4] * c; \
10888- y3 += decode[7+(ofs)*4] * c; \
10889- c = hc[2+(ofs)]; \
10890- x0 += decode[8+(ofs)*4] * c; \
10891- x1 += decode[9+(ofs)*4] * c; \
10892- x2 += decode[10+(ofs)*4] * c; \
10893- x3 += decode[11+(ofs)*4] * c; \
10894- c = hc[3+(ofs)]; \
10895- y0 += decode[12+(ofs)*4] * c; \
10896- y1 += decode[13+(ofs)*4] * c; \
10897- y2 += decode[14+(ofs)*4] * c; \
10898- y3 += decode[15+(ofs)*4] * c;
10899-
10900-#define stbir__1_coeff_remnant( ofs ) \
10901- STBIR_SIMD_NO_UNROLL(decode); \
10902- c = hc[0+(ofs)]; \
10903- x0 += decode[0+(ofs)*4] * c; \
10904- x1 += decode[1+(ofs)*4] * c; \
10905- x2 += decode[2+(ofs)*4] * c; \
10906- x3 += decode[3+(ofs)*4] * c;
10907-
10908-#define stbir__2_coeff_remnant( ofs ) \
10909- STBIR_SIMD_NO_UNROLL(decode); \
10910- c = hc[0+(ofs)]; \
10911- x0 += decode[0+(ofs)*4] * c; \
10912- x1 += decode[1+(ofs)*4] * c; \
10913- x2 += decode[2+(ofs)*4] * c; \
10914- x3 += decode[3+(ofs)*4] * c; \
10915- c = hc[1+(ofs)]; \
10916- y0 += decode[4+(ofs)*4] * c; \
10917- y1 += decode[5+(ofs)*4] * c; \
10918- y2 += decode[6+(ofs)*4] * c; \
10919- y3 += decode[7+(ofs)*4] * c;
10920-
10921-#define stbir__3_coeff_remnant( ofs ) \
10922- STBIR_SIMD_NO_UNROLL(decode); \
10923- c = hc[0+(ofs)]; \
10924- x0 += decode[0+(ofs)*4] * c; \
10925- x1 += decode[1+(ofs)*4] * c; \
10926- x2 += decode[2+(ofs)*4] * c; \
10927- x3 += decode[3+(ofs)*4] * c; \
10928- c = hc[1+(ofs)]; \
10929- y0 += decode[4+(ofs)*4] * c; \
10930- y1 += decode[5+(ofs)*4] * c; \
10931- y2 += decode[6+(ofs)*4] * c; \
10932- y3 += decode[7+(ofs)*4] * c; \
10933- c = hc[2+(ofs)]; \
10934- x0 += decode[8+(ofs)*4] * c; \
10935- x1 += decode[9+(ofs)*4] * c; \
10936- x2 += decode[10+(ofs)*4] * c; \
10937- x3 += decode[11+(ofs)*4] * c;
10938-
10939-#define stbir__store_output() \
10940- output[0] = x0 + y0; \
10941- output[1] = x1 + y1; \
10942- output[2] = x2 + y2; \
10943- output[3] = x3 + y3; \
10944- horizontal_coefficients += coefficient_width; \
10945- ++horizontal_contributors; \
10946- output += 4;
10947+#define stbir__1_coeff_only() \
10948+ float p0, p1, p2, p3, c; \
10949+ STBIR_SIMD_NO_UNROLL(decode); \
10950+ c = hc[0]; \
10951+ p0 = decode[0] * c; \
10952+ p1 = decode[1] * c; \
10953+ p2 = decode[2] * c; \
10954+ p3 = decode[3] * c;
10955+
10956+#define stbir__2_coeff_only() \
10957+ float p0, p1, p2, p3, c; \
10958+ STBIR_SIMD_NO_UNROLL(decode); \
10959+ c = hc[0]; \
10960+ p0 = decode[0] * c; \
10961+ p1 = decode[1] * c; \
10962+ p2 = decode[2] * c; \
10963+ p3 = decode[3] * c; \
10964+ c = hc[1]; \
10965+ p0 += decode[4] * c; \
10966+ p1 += decode[5] * c; \
10967+ p2 += decode[6] * c; \
10968+ p3 += decode[7] * c;
10969+
10970+#define stbir__3_coeff_only() \
10971+ float p0, p1, p2, p3, c; \
10972+ STBIR_SIMD_NO_UNROLL(decode); \
10973+ c = hc[0]; \
10974+ p0 = decode[0] * c; \
10975+ p1 = decode[1] * c; \
10976+ p2 = decode[2] * c; \
10977+ p3 = decode[3] * c; \
10978+ c = hc[1]; \
10979+ p0 += decode[4] * c; \
10980+ p1 += decode[5] * c; \
10981+ p2 += decode[6] * c; \
10982+ p3 += decode[7] * c; \
10983+ c = hc[2]; \
10984+ p0 += decode[8] * c; \
10985+ p1 += decode[9] * c; \
10986+ p2 += decode[10] * c; \
10987+ p3 += decode[11] * c;
10988+
10989+#define stbir__store_output_tiny() \
10990+ output[0] = p0; \
10991+ output[1] = p1; \
10992+ output[2] = p2; \
10993+ output[3] = p3; \
10994+ horizontal_coefficients += coefficient_width; \
10995+ ++horizontal_contributors; \
10996+ output += 4;
10997+
10998+#define stbir__4_coeff_start() \
10999+ float x0, x1, x2, x3, y0, y1, y2, y3, c; \
11000+ STBIR_SIMD_NO_UNROLL(decode); \
11001+ c = hc[0]; \
11002+ x0 = decode[0] * c; \
11003+ x1 = decode[1] * c; \
11004+ x2 = decode[2] * c; \
11005+ x3 = decode[3] * c; \
11006+ c = hc[1]; \
11007+ y0 = decode[4] * c; \
11008+ y1 = decode[5] * c; \
11009+ y2 = decode[6] * c; \
11010+ y3 = decode[7] * c; \
11011+ c = hc[2]; \
11012+ x0 += decode[8] * c; \
11013+ x1 += decode[9] * c; \
11014+ x2 += decode[10] * c; \
11015+ x3 += decode[11] * c; \
11016+ c = hc[3]; \
11017+ y0 += decode[12] * c; \
11018+ y1 += decode[13] * c; \
11019+ y2 += decode[14] * c; \
11020+ y3 += decode[15] * c;
11021+
11022+#define stbir__4_coeff_continue_from_4(ofs) \
11023+ STBIR_SIMD_NO_UNROLL(decode); \
11024+ c = hc[0 + (ofs)]; \
11025+ x0 += decode[0 + (ofs) * 4] * c; \
11026+ x1 += decode[1 + (ofs) * 4] * c; \
11027+ x2 += decode[2 + (ofs) * 4] * c; \
11028+ x3 += decode[3 + (ofs) * 4] * c; \
11029+ c = hc[1 + (ofs)]; \
11030+ y0 += decode[4 + (ofs) * 4] * c; \
11031+ y1 += decode[5 + (ofs) * 4] * c; \
11032+ y2 += decode[6 + (ofs) * 4] * c; \
11033+ y3 += decode[7 + (ofs) * 4] * c; \
11034+ c = hc[2 + (ofs)]; \
11035+ x0 += decode[8 + (ofs) * 4] * c; \
11036+ x1 += decode[9 + (ofs) * 4] * c; \
11037+ x2 += decode[10 + (ofs) * 4] * c; \
11038+ x3 += decode[11 + (ofs) * 4] * c; \
11039+ c = hc[3 + (ofs)]; \
11040+ y0 += decode[12 + (ofs) * 4] * c; \
11041+ y1 += decode[13 + (ofs) * 4] * c; \
11042+ y2 += decode[14 + (ofs) * 4] * c; \
11043+ y3 += decode[15 + (ofs) * 4] * c;
11044+
11045+#define stbir__1_coeff_remnant(ofs) \
11046+ STBIR_SIMD_NO_UNROLL(decode); \
11047+ c = hc[0 + (ofs)]; \
11048+ x0 += decode[0 + (ofs) * 4] * c; \
11049+ x1 += decode[1 + (ofs) * 4] * c; \
11050+ x2 += decode[2 + (ofs) * 4] * c; \
11051+ x3 += decode[3 + (ofs) * 4] * c;
11052+
11053+#define stbir__2_coeff_remnant(ofs) \
11054+ STBIR_SIMD_NO_UNROLL(decode); \
11055+ c = hc[0 + (ofs)]; \
11056+ x0 += decode[0 + (ofs) * 4] * c; \
11057+ x1 += decode[1 + (ofs) * 4] * c; \
11058+ x2 += decode[2 + (ofs) * 4] * c; \
11059+ x3 += decode[3 + (ofs) * 4] * c; \
11060+ c = hc[1 + (ofs)]; \
11061+ y0 += decode[4 + (ofs) * 4] * c; \
11062+ y1 += decode[5 + (ofs) * 4] * c; \
11063+ y2 += decode[6 + (ofs) * 4] * c; \
11064+ y3 += decode[7 + (ofs) * 4] * c;
11065+
11066+#define stbir__3_coeff_remnant(ofs) \
11067+ STBIR_SIMD_NO_UNROLL(decode); \
11068+ c = hc[0 + (ofs)]; \
11069+ x0 += decode[0 + (ofs) * 4] * c; \
11070+ x1 += decode[1 + (ofs) * 4] * c; \
11071+ x2 += decode[2 + (ofs) * 4] * c; \
11072+ x3 += decode[3 + (ofs) * 4] * c; \
11073+ c = hc[1 + (ofs)]; \
11074+ y0 += decode[4 + (ofs) * 4] * c; \
11075+ y1 += decode[5 + (ofs) * 4] * c; \
11076+ y2 += decode[6 + (ofs) * 4] * c; \
11077+ y3 += decode[7 + (ofs) * 4] * c; \
11078+ c = hc[2 + (ofs)]; \
11079+ x0 += decode[8 + (ofs) * 4] * c; \
11080+ x1 += decode[9 + (ofs) * 4] * c; \
11081+ x2 += decode[10 + (ofs) * 4] * c; \
11082+ x3 += decode[11 + (ofs) * 4] * c;
11083+
11084+#define stbir__store_output() \
11085+ output[0] = x0 + y0; \
11086+ output[1] = x1 + y1; \
11087+ output[2] = x2 + y2; \
11088+ output[3] = x3 + y3; \
11089+ horizontal_coefficients += coefficient_width; \
11090+ ++horizontal_contributors; \
11091+ output += 4;
11092
11093 #endif
11094
11095@@ -5650,402 +6543,401 @@ static void stbir__decode_scanline(stbir__info const * stbir_info, int n, float
11096 #define STB_IMAGE_RESIZE_DO_HORIZONTALS
11097 #include STBIR__HEADER_FILENAME
11098
11099-
11100-
11101 //=================
11102 // Do 7 channel horizontal routines
11103
11104 #ifdef STBIR_SIMD
11105
11106-#define stbir__1_coeff_only() \
11107- stbir__simdf tot0,tot1,c; \
11108- STBIR_SIMD_NO_UNROLL(decode); \
11109- stbir__simdf_load1( c, hc ); \
11110- stbir__simdf_0123to0000( c, c ); \
11111- stbir__simdf_mult_mem( tot0, c, decode ); \
11112- stbir__simdf_mult_mem( tot1, c, decode+3 );
11113-
11114-#define stbir__2_coeff_only() \
11115- stbir__simdf tot0,tot1,c,cs; \
11116- STBIR_SIMD_NO_UNROLL(decode); \
11117- stbir__simdf_load2( cs, hc ); \
11118- stbir__simdf_0123to0000( c, cs ); \
11119- stbir__simdf_mult_mem( tot0, c, decode ); \
11120- stbir__simdf_mult_mem( tot1, c, decode+3 ); \
11121- stbir__simdf_0123to1111( c, cs ); \
11122- stbir__simdf_madd_mem( tot0, tot0, c, decode+7 ); \
11123- stbir__simdf_madd_mem( tot1, tot1, c,decode+10 );
11124-
11125-#define stbir__3_coeff_only() \
11126- stbir__simdf tot0,tot1,c,cs; \
11127- STBIR_SIMD_NO_UNROLL(decode); \
11128- stbir__simdf_load( cs, hc ); \
11129- stbir__simdf_0123to0000( c, cs ); \
11130- stbir__simdf_mult_mem( tot0, c, decode ); \
11131- stbir__simdf_mult_mem( tot1, c, decode+3 ); \
11132- stbir__simdf_0123to1111( c, cs ); \
11133- stbir__simdf_madd_mem( tot0, tot0, c, decode+7 ); \
11134- stbir__simdf_madd_mem( tot1, tot1, c, decode+10 ); \
11135- stbir__simdf_0123to2222( c, cs ); \
11136- stbir__simdf_madd_mem( tot0, tot0, c, decode+14 ); \
11137- stbir__simdf_madd_mem( tot1, tot1, c, decode+17 );
11138-
11139-#define stbir__store_output_tiny() \
11140- stbir__simdf_store( output+3, tot1 ); \
11141- stbir__simdf_store( output, tot0 ); \
11142- horizontal_coefficients += coefficient_width; \
11143- ++horizontal_contributors; \
11144- output += 7;
11145+#define stbir__1_coeff_only() \
11146+ stbir__simdf tot0, tot1, c; \
11147+ STBIR_SIMD_NO_UNROLL(decode); \
11148+ stbir__simdf_load1(c, hc); \
11149+ stbir__simdf_0123to0000(c, c); \
11150+ stbir__simdf_mult_mem(tot0, c, decode); \
11151+ stbir__simdf_mult_mem(tot1, c, decode + 3);
11152+
11153+#define stbir__2_coeff_only() \
11154+ stbir__simdf tot0, tot1, c, cs; \
11155+ STBIR_SIMD_NO_UNROLL(decode); \
11156+ stbir__simdf_load2(cs, hc); \
11157+ stbir__simdf_0123to0000(c, cs); \
11158+ stbir__simdf_mult_mem(tot0, c, decode); \
11159+ stbir__simdf_mult_mem(tot1, c, decode + 3); \
11160+ stbir__simdf_0123to1111(c, cs); \
11161+ stbir__simdf_madd_mem(tot0, tot0, c, decode + 7); \
11162+ stbir__simdf_madd_mem(tot1, tot1, c, decode + 10);
11163+
11164+#define stbir__3_coeff_only() \
11165+ stbir__simdf tot0, tot1, c, cs; \
11166+ STBIR_SIMD_NO_UNROLL(decode); \
11167+ stbir__simdf_load(cs, hc); \
11168+ stbir__simdf_0123to0000(c, cs); \
11169+ stbir__simdf_mult_mem(tot0, c, decode); \
11170+ stbir__simdf_mult_mem(tot1, c, decode + 3); \
11171+ stbir__simdf_0123to1111(c, cs); \
11172+ stbir__simdf_madd_mem(tot0, tot0, c, decode + 7); \
11173+ stbir__simdf_madd_mem(tot1, tot1, c, decode + 10); \
11174+ stbir__simdf_0123to2222(c, cs); \
11175+ stbir__simdf_madd_mem(tot0, tot0, c, decode + 14); \
11176+ stbir__simdf_madd_mem(tot1, tot1, c, decode + 17);
11177+
11178+#define stbir__store_output_tiny() \
11179+ stbir__simdf_store(output + 3, tot1); \
11180+ stbir__simdf_store(output, tot0); \
11181+ horizontal_coefficients += coefficient_width; \
11182+ ++horizontal_contributors; \
11183+ output += 7;
11184
11185 #ifdef STBIR_SIMD8
11186
11187-#define stbir__4_coeff_start() \
11188- stbir__simdf8 tot0,tot1,c,cs; \
11189- STBIR_SIMD_NO_UNROLL(decode); \
11190- stbir__simdf8_load4b( cs, hc ); \
11191- stbir__simdf8_0123to00000000( c, cs ); \
11192- stbir__simdf8_mult_mem( tot0, c, decode ); \
11193- stbir__simdf8_0123to11111111( c, cs ); \
11194- stbir__simdf8_mult_mem( tot1, c, decode+7 ); \
11195- stbir__simdf8_0123to22222222( c, cs ); \
11196- stbir__simdf8_madd_mem( tot0, tot0, c, decode+14 ); \
11197- stbir__simdf8_0123to33333333( c, cs ); \
11198- stbir__simdf8_madd_mem( tot1, tot1, c, decode+21 );
11199-
11200-#define stbir__4_coeff_continue_from_4( ofs ) \
11201- STBIR_SIMD_NO_UNROLL(decode); \
11202- stbir__simdf8_load4b( cs, hc + (ofs) ); \
11203- stbir__simdf8_0123to00000000( c, cs ); \
11204- stbir__simdf8_madd_mem( tot0, tot0, c, decode+(ofs)*7 ); \
11205- stbir__simdf8_0123to11111111( c, cs ); \
11206- stbir__simdf8_madd_mem( tot1, tot1, c, decode+(ofs)*7+7 ); \
11207- stbir__simdf8_0123to22222222( c, cs ); \
11208- stbir__simdf8_madd_mem( tot0, tot0, c, decode+(ofs)*7+14 ); \
11209- stbir__simdf8_0123to33333333( c, cs ); \
11210- stbir__simdf8_madd_mem( tot1, tot1, c, decode+(ofs)*7+21 );
11211-
11212-#define stbir__1_coeff_remnant( ofs ) \
11213- STBIR_SIMD_NO_UNROLL(decode); \
11214- stbir__simdf8_load1b( c, hc + (ofs) ); \
11215- stbir__simdf8_madd_mem( tot0, tot0, c, decode+(ofs)*7 );
11216-
11217-#define stbir__2_coeff_remnant( ofs ) \
11218- STBIR_SIMD_NO_UNROLL(decode); \
11219- stbir__simdf8_load1b( c, hc + (ofs) ); \
11220- stbir__simdf8_madd_mem( tot0, tot0, c, decode+(ofs)*7 ); \
11221- stbir__simdf8_load1b( c, hc + (ofs)+1 ); \
11222- stbir__simdf8_madd_mem( tot1, tot1, c, decode+(ofs)*7+7 );
11223-
11224-#define stbir__3_coeff_remnant( ofs ) \
11225- STBIR_SIMD_NO_UNROLL(decode); \
11226- stbir__simdf8_load4b( cs, hc + (ofs) ); \
11227- stbir__simdf8_0123to00000000( c, cs ); \
11228- stbir__simdf8_madd_mem( tot0, tot0, c, decode+(ofs)*7 ); \
11229- stbir__simdf8_0123to11111111( c, cs ); \
11230- stbir__simdf8_madd_mem( tot1, tot1, c, decode+(ofs)*7+7 ); \
11231- stbir__simdf8_0123to22222222( c, cs ); \
11232- stbir__simdf8_madd_mem( tot0, tot0, c, decode+(ofs)*7+14 );
11233-
11234-#define stbir__store_output() \
11235- stbir__simdf8_add( tot0, tot0, tot1 ); \
11236- horizontal_coefficients += coefficient_width; \
11237- ++horizontal_contributors; \
11238- output += 7; \
11239- if ( output < output_end ) \
11240- { \
11241- stbir__simdf8_store( output-7, tot0 ); \
11242- continue; \
11243- } \
11244- stbir__simdf_store( output-7+3, stbir__simdf_swiz(stbir__simdf8_gettop4(tot0),0,0,1,2) ); \
11245- stbir__simdf_store( output-7, stbir__if_simdf8_cast_to_simdf4(tot0) ); \
11246- break;
11247+#define stbir__4_coeff_start() \
11248+ stbir__simdf8 tot0, tot1, c, cs; \
11249+ STBIR_SIMD_NO_UNROLL(decode); \
11250+ stbir__simdf8_load4b(cs, hc); \
11251+ stbir__simdf8_0123to00000000(c, cs); \
11252+ stbir__simdf8_mult_mem(tot0, c, decode); \
11253+ stbir__simdf8_0123to11111111(c, cs); \
11254+ stbir__simdf8_mult_mem(tot1, c, decode + 7); \
11255+ stbir__simdf8_0123to22222222(c, cs); \
11256+ stbir__simdf8_madd_mem(tot0, tot0, c, decode + 14); \
11257+ stbir__simdf8_0123to33333333(c, cs); \
11258+ stbir__simdf8_madd_mem(tot1, tot1, c, decode + 21);
11259+
11260+#define stbir__4_coeff_continue_from_4(ofs) \
11261+ STBIR_SIMD_NO_UNROLL(decode); \
11262+ stbir__simdf8_load4b(cs, hc + (ofs)); \
11263+ stbir__simdf8_0123to00000000(c, cs); \
11264+ stbir__simdf8_madd_mem(tot0, tot0, c, decode + (ofs) * 7); \
11265+ stbir__simdf8_0123to11111111(c, cs); \
11266+ stbir__simdf8_madd_mem(tot1, tot1, c, decode + (ofs) * 7 + 7); \
11267+ stbir__simdf8_0123to22222222(c, cs); \
11268+ stbir__simdf8_madd_mem(tot0, tot0, c, decode + (ofs) * 7 + 14); \
11269+ stbir__simdf8_0123to33333333(c, cs); \
11270+ stbir__simdf8_madd_mem(tot1, tot1, c, decode + (ofs) * 7 + 21);
11271+
11272+#define stbir__1_coeff_remnant(ofs) \
11273+ STBIR_SIMD_NO_UNROLL(decode); \
11274+ stbir__simdf8_load1b(c, hc + (ofs)); \
11275+ stbir__simdf8_madd_mem(tot0, tot0, c, decode + (ofs) * 7);
11276+
11277+#define stbir__2_coeff_remnant(ofs) \
11278+ STBIR_SIMD_NO_UNROLL(decode); \
11279+ stbir__simdf8_load1b(c, hc + (ofs)); \
11280+ stbir__simdf8_madd_mem(tot0, tot0, c, decode + (ofs) * 7); \
11281+ stbir__simdf8_load1b(c, hc + (ofs) + 1); \
11282+ stbir__simdf8_madd_mem(tot1, tot1, c, decode + (ofs) * 7 + 7);
11283+
11284+#define stbir__3_coeff_remnant(ofs) \
11285+ STBIR_SIMD_NO_UNROLL(decode); \
11286+ stbir__simdf8_load4b(cs, hc + (ofs)); \
11287+ stbir__simdf8_0123to00000000(c, cs); \
11288+ stbir__simdf8_madd_mem(tot0, tot0, c, decode + (ofs) * 7); \
11289+ stbir__simdf8_0123to11111111(c, cs); \
11290+ stbir__simdf8_madd_mem(tot1, tot1, c, decode + (ofs) * 7 + 7); \
11291+ stbir__simdf8_0123to22222222(c, cs); \
11292+ stbir__simdf8_madd_mem(tot0, tot0, c, decode + (ofs) * 7 + 14);
11293+
11294+#define stbir__store_output() \
11295+ stbir__simdf8_add(tot0, tot0, tot1); \
11296+ horizontal_coefficients += coefficient_width; \
11297+ ++horizontal_contributors; \
11298+ output += 7; \
11299+ if (output < output_end) { \
11300+ stbir__simdf8_store(output - 7, tot0); \
11301+ continue; \
11302+ } \
11303+ stbir__simdf_store( \
11304+ output - 7 + 3, \
11305+ stbir__simdf_swiz(stbir__simdf8_gettop4(tot0), 0, 0, 1, 2)); \
11306+ stbir__simdf_store(output - 7, stbir__if_simdf8_cast_to_simdf4(tot0)); \
11307+ break;
11308
11309 #else
11310
11311-#define stbir__4_coeff_start() \
11312- stbir__simdf tot0,tot1,tot2,tot3,c,cs; \
11313- STBIR_SIMD_NO_UNROLL(decode); \
11314- stbir__simdf_load( cs, hc ); \
11315- stbir__simdf_0123to0000( c, cs ); \
11316- stbir__simdf_mult_mem( tot0, c, decode ); \
11317- stbir__simdf_mult_mem( tot1, c, decode+3 ); \
11318- stbir__simdf_0123to1111( c, cs ); \
11319- stbir__simdf_mult_mem( tot2, c, decode+7 ); \
11320- stbir__simdf_mult_mem( tot3, c, decode+10 ); \
11321- stbir__simdf_0123to2222( c, cs ); \
11322- stbir__simdf_madd_mem( tot0, tot0, c, decode+14 ); \
11323- stbir__simdf_madd_mem( tot1, tot1, c, decode+17 ); \
11324- stbir__simdf_0123to3333( c, cs ); \
11325- stbir__simdf_madd_mem( tot2, tot2, c, decode+21 ); \
11326- stbir__simdf_madd_mem( tot3, tot3, c, decode+24 );
11327-
11328-#define stbir__4_coeff_continue_from_4( ofs ) \
11329- STBIR_SIMD_NO_UNROLL(decode); \
11330- stbir__simdf_load( cs, hc + (ofs) ); \
11331- stbir__simdf_0123to0000( c, cs ); \
11332- stbir__simdf_madd_mem( tot0, tot0, c, decode+(ofs)*7 ); \
11333- stbir__simdf_madd_mem( tot1, tot1, c, decode+(ofs)*7+3 ); \
11334- stbir__simdf_0123to1111( c, cs ); \
11335- stbir__simdf_madd_mem( tot2, tot2, c, decode+(ofs)*7+7 ); \
11336- stbir__simdf_madd_mem( tot3, tot3, c, decode+(ofs)*7+10 ); \
11337- stbir__simdf_0123to2222( c, cs ); \
11338- stbir__simdf_madd_mem( tot0, tot0, c, decode+(ofs)*7+14 ); \
11339- stbir__simdf_madd_mem( tot1, tot1, c, decode+(ofs)*7+17 ); \
11340- stbir__simdf_0123to3333( c, cs ); \
11341- stbir__simdf_madd_mem( tot2, tot2, c, decode+(ofs)*7+21 ); \
11342- stbir__simdf_madd_mem( tot3, tot3, c, decode+(ofs)*7+24 );
11343-
11344-#define stbir__1_coeff_remnant( ofs ) \
11345- STBIR_SIMD_NO_UNROLL(decode); \
11346- stbir__simdf_load1( c, hc + (ofs) ); \
11347- stbir__simdf_0123to0000( c, c ); \
11348- stbir__simdf_madd_mem( tot0, tot0, c, decode+(ofs)*7 ); \
11349- stbir__simdf_madd_mem( tot1, tot1, c, decode+(ofs)*7+3 ); \
11350-
11351-#define stbir__2_coeff_remnant( ofs ) \
11352- STBIR_SIMD_NO_UNROLL(decode); \
11353- stbir__simdf_load2( cs, hc + (ofs) ); \
11354- stbir__simdf_0123to0000( c, cs ); \
11355- stbir__simdf_madd_mem( tot0, tot0, c, decode+(ofs)*7 ); \
11356- stbir__simdf_madd_mem( tot1, tot1, c, decode+(ofs)*7+3 ); \
11357- stbir__simdf_0123to1111( c, cs ); \
11358- stbir__simdf_madd_mem( tot2, tot2, c, decode+(ofs)*7+7 ); \
11359- stbir__simdf_madd_mem( tot3, tot3, c, decode+(ofs)*7+10 );
11360-
11361-#define stbir__3_coeff_remnant( ofs ) \
11362- STBIR_SIMD_NO_UNROLL(decode); \
11363- stbir__simdf_load( cs, hc + (ofs) ); \
11364- stbir__simdf_0123to0000( c, cs ); \
11365- stbir__simdf_madd_mem( tot0, tot0, c, decode+(ofs)*7 ); \
11366- stbir__simdf_madd_mem( tot1, tot1, c, decode+(ofs)*7+3 ); \
11367- stbir__simdf_0123to1111( c, cs ); \
11368- stbir__simdf_madd_mem( tot2, tot2, c, decode+(ofs)*7+7 ); \
11369- stbir__simdf_madd_mem( tot3, tot3, c, decode+(ofs)*7+10 ); \
11370- stbir__simdf_0123to2222( c, cs ); \
11371- stbir__simdf_madd_mem( tot0, tot0, c, decode+(ofs)*7+14 ); \
11372- stbir__simdf_madd_mem( tot1, tot1, c, decode+(ofs)*7+17 );
11373-
11374-#define stbir__store_output() \
11375- stbir__simdf_add( tot0, tot0, tot2 ); \
11376- stbir__simdf_add( tot1, tot1, tot3 ); \
11377- stbir__simdf_store( output+3, tot1 ); \
11378- stbir__simdf_store( output, tot0 ); \
11379- horizontal_coefficients += coefficient_width; \
11380- ++horizontal_contributors; \
11381- output += 7;
11382+#define stbir__4_coeff_start() \
11383+ stbir__simdf tot0, tot1, tot2, tot3, c, cs; \
11384+ STBIR_SIMD_NO_UNROLL(decode); \
11385+ stbir__simdf_load(cs, hc); \
11386+ stbir__simdf_0123to0000(c, cs); \
11387+ stbir__simdf_mult_mem(tot0, c, decode); \
11388+ stbir__simdf_mult_mem(tot1, c, decode + 3); \
11389+ stbir__simdf_0123to1111(c, cs); \
11390+ stbir__simdf_mult_mem(tot2, c, decode + 7); \
11391+ stbir__simdf_mult_mem(tot3, c, decode + 10); \
11392+ stbir__simdf_0123to2222(c, cs); \
11393+ stbir__simdf_madd_mem(tot0, tot0, c, decode + 14); \
11394+ stbir__simdf_madd_mem(tot1, tot1, c, decode + 17); \
11395+ stbir__simdf_0123to3333(c, cs); \
11396+ stbir__simdf_madd_mem(tot2, tot2, c, decode + 21); \
11397+ stbir__simdf_madd_mem(tot3, tot3, c, decode + 24);
11398+
11399+#define stbir__4_coeff_continue_from_4(ofs) \
11400+ STBIR_SIMD_NO_UNROLL(decode); \
11401+ stbir__simdf_load(cs, hc + (ofs)); \
11402+ stbir__simdf_0123to0000(c, cs); \
11403+ stbir__simdf_madd_mem(tot0, tot0, c, decode + (ofs) * 7); \
11404+ stbir__simdf_madd_mem(tot1, tot1, c, decode + (ofs) * 7 + 3); \
11405+ stbir__simdf_0123to1111(c, cs); \
11406+ stbir__simdf_madd_mem(tot2, tot2, c, decode + (ofs) * 7 + 7); \
11407+ stbir__simdf_madd_mem(tot3, tot3, c, decode + (ofs) * 7 + 10); \
11408+ stbir__simdf_0123to2222(c, cs); \
11409+ stbir__simdf_madd_mem(tot0, tot0, c, decode + (ofs) * 7 + 14); \
11410+ stbir__simdf_madd_mem(tot1, tot1, c, decode + (ofs) * 7 + 17); \
11411+ stbir__simdf_0123to3333(c, cs); \
11412+ stbir__simdf_madd_mem(tot2, tot2, c, decode + (ofs) * 7 + 21); \
11413+ stbir__simdf_madd_mem(tot3, tot3, c, decode + (ofs) * 7 + 24);
11414+
11415+#define stbir__1_coeff_remnant(ofs) \
11416+ STBIR_SIMD_NO_UNROLL(decode); \
11417+ stbir__simdf_load1(c, hc + (ofs)); \
11418+ stbir__simdf_0123to0000(c, c); \
11419+ stbir__simdf_madd_mem(tot0, tot0, c, decode + (ofs) * 7); \
11420+ stbir__simdf_madd_mem(tot1, tot1, c, decode + (ofs) * 7 + 3);
11421+
11422+#define stbir__2_coeff_remnant(ofs) \
11423+ STBIR_SIMD_NO_UNROLL(decode); \
11424+ stbir__simdf_load2(cs, hc + (ofs)); \
11425+ stbir__simdf_0123to0000(c, cs); \
11426+ stbir__simdf_madd_mem(tot0, tot0, c, decode + (ofs) * 7); \
11427+ stbir__simdf_madd_mem(tot1, tot1, c, decode + (ofs) * 7 + 3); \
11428+ stbir__simdf_0123to1111(c, cs); \
11429+ stbir__simdf_madd_mem(tot2, tot2, c, decode + (ofs) * 7 + 7); \
11430+ stbir__simdf_madd_mem(tot3, tot3, c, decode + (ofs) * 7 + 10);
11431+
11432+#define stbir__3_coeff_remnant(ofs) \
11433+ STBIR_SIMD_NO_UNROLL(decode); \
11434+ stbir__simdf_load(cs, hc + (ofs)); \
11435+ stbir__simdf_0123to0000(c, cs); \
11436+ stbir__simdf_madd_mem(tot0, tot0, c, decode + (ofs) * 7); \
11437+ stbir__simdf_madd_mem(tot1, tot1, c, decode + (ofs) * 7 + 3); \
11438+ stbir__simdf_0123to1111(c, cs); \
11439+ stbir__simdf_madd_mem(tot2, tot2, c, decode + (ofs) * 7 + 7); \
11440+ stbir__simdf_madd_mem(tot3, tot3, c, decode + (ofs) * 7 + 10); \
11441+ stbir__simdf_0123to2222(c, cs); \
11442+ stbir__simdf_madd_mem(tot0, tot0, c, decode + (ofs) * 7 + 14); \
11443+ stbir__simdf_madd_mem(tot1, tot1, c, decode + (ofs) * 7 + 17);
11444+
11445+#define stbir__store_output() \
11446+ stbir__simdf_add(tot0, tot0, tot2); \
11447+ stbir__simdf_add(tot1, tot1, tot3); \
11448+ stbir__simdf_store(output + 3, tot1); \
11449+ stbir__simdf_store(output, tot0); \
11450+ horizontal_coefficients += coefficient_width; \
11451+ ++horizontal_contributors; \
11452+ output += 7;
11453
11454 #endif
11455
11456 #else
11457
11458-#define stbir__1_coeff_only() \
11459- float tot0, tot1, tot2, tot3, tot4, tot5, tot6, c; \
11460- c = hc[0]; \
11461- tot0 = decode[0]*c; \
11462- tot1 = decode[1]*c; \
11463- tot2 = decode[2]*c; \
11464- tot3 = decode[3]*c; \
11465- tot4 = decode[4]*c; \
11466- tot5 = decode[5]*c; \
11467- tot6 = decode[6]*c;
11468-
11469-#define stbir__2_coeff_only() \
11470- float tot0, tot1, tot2, tot3, tot4, tot5, tot6, c; \
11471- c = hc[0]; \
11472- tot0 = decode[0]*c; \
11473- tot1 = decode[1]*c; \
11474- tot2 = decode[2]*c; \
11475- tot3 = decode[3]*c; \
11476- tot4 = decode[4]*c; \
11477- tot5 = decode[5]*c; \
11478- tot6 = decode[6]*c; \
11479- c = hc[1]; \
11480- tot0 += decode[7]*c; \
11481- tot1 += decode[8]*c; \
11482- tot2 += decode[9]*c; \
11483- tot3 += decode[10]*c; \
11484- tot4 += decode[11]*c; \
11485- tot5 += decode[12]*c; \
11486- tot6 += decode[13]*c; \
11487-
11488-#define stbir__3_coeff_only() \
11489- float tot0, tot1, tot2, tot3, tot4, tot5, tot6, c; \
11490- c = hc[0]; \
11491- tot0 = decode[0]*c; \
11492- tot1 = decode[1]*c; \
11493- tot2 = decode[2]*c; \
11494- tot3 = decode[3]*c; \
11495- tot4 = decode[4]*c; \
11496- tot5 = decode[5]*c; \
11497- tot6 = decode[6]*c; \
11498- c = hc[1]; \
11499- tot0 += decode[7]*c; \
11500- tot1 += decode[8]*c; \
11501- tot2 += decode[9]*c; \
11502- tot3 += decode[10]*c; \
11503- tot4 += decode[11]*c; \
11504- tot5 += decode[12]*c; \
11505- tot6 += decode[13]*c; \
11506- c = hc[2]; \
11507- tot0 += decode[14]*c; \
11508- tot1 += decode[15]*c; \
11509- tot2 += decode[16]*c; \
11510- tot3 += decode[17]*c; \
11511- tot4 += decode[18]*c; \
11512- tot5 += decode[19]*c; \
11513- tot6 += decode[20]*c; \
11514-
11515-#define stbir__store_output_tiny() \
11516- output[0] = tot0; \
11517- output[1] = tot1; \
11518- output[2] = tot2; \
11519- output[3] = tot3; \
11520- output[4] = tot4; \
11521- output[5] = tot5; \
11522- output[6] = tot6; \
11523- horizontal_coefficients += coefficient_width; \
11524- ++horizontal_contributors; \
11525- output += 7;
11526-
11527-#define stbir__4_coeff_start() \
11528- float x0,x1,x2,x3,x4,x5,x6,y0,y1,y2,y3,y4,y5,y6,c; \
11529- STBIR_SIMD_NO_UNROLL(decode); \
11530- c = hc[0]; \
11531- x0 = decode[0] * c; \
11532- x1 = decode[1] * c; \
11533- x2 = decode[2] * c; \
11534- x3 = decode[3] * c; \
11535- x4 = decode[4] * c; \
11536- x5 = decode[5] * c; \
11537- x6 = decode[6] * c; \
11538- c = hc[1]; \
11539- y0 = decode[7] * c; \
11540- y1 = decode[8] * c; \
11541- y2 = decode[9] * c; \
11542- y3 = decode[10] * c; \
11543- y4 = decode[11] * c; \
11544- y5 = decode[12] * c; \
11545- y6 = decode[13] * c; \
11546- c = hc[2]; \
11547- x0 += decode[14] * c; \
11548- x1 += decode[15] * c; \
11549- x2 += decode[16] * c; \
11550- x3 += decode[17] * c; \
11551- x4 += decode[18] * c; \
11552- x5 += decode[19] * c; \
11553- x6 += decode[20] * c; \
11554- c = hc[3]; \
11555- y0 += decode[21] * c; \
11556- y1 += decode[22] * c; \
11557- y2 += decode[23] * c; \
11558- y3 += decode[24] * c; \
11559- y4 += decode[25] * c; \
11560- y5 += decode[26] * c; \
11561- y6 += decode[27] * c;
11562-
11563-#define stbir__4_coeff_continue_from_4( ofs ) \
11564- STBIR_SIMD_NO_UNROLL(decode); \
11565- c = hc[0+(ofs)]; \
11566- x0 += decode[0+(ofs)*7] * c; \
11567- x1 += decode[1+(ofs)*7] * c; \
11568- x2 += decode[2+(ofs)*7] * c; \
11569- x3 += decode[3+(ofs)*7] * c; \
11570- x4 += decode[4+(ofs)*7] * c; \
11571- x5 += decode[5+(ofs)*7] * c; \
11572- x6 += decode[6+(ofs)*7] * c; \
11573- c = hc[1+(ofs)]; \
11574- y0 += decode[7+(ofs)*7] * c; \
11575- y1 += decode[8+(ofs)*7] * c; \
11576- y2 += decode[9+(ofs)*7] * c; \
11577- y3 += decode[10+(ofs)*7] * c; \
11578- y4 += decode[11+(ofs)*7] * c; \
11579- y5 += decode[12+(ofs)*7] * c; \
11580- y6 += decode[13+(ofs)*7] * c; \
11581- c = hc[2+(ofs)]; \
11582- x0 += decode[14+(ofs)*7] * c; \
11583- x1 += decode[15+(ofs)*7] * c; \
11584- x2 += decode[16+(ofs)*7] * c; \
11585- x3 += decode[17+(ofs)*7] * c; \
11586- x4 += decode[18+(ofs)*7] * c; \
11587- x5 += decode[19+(ofs)*7] * c; \
11588- x6 += decode[20+(ofs)*7] * c; \
11589- c = hc[3+(ofs)]; \
11590- y0 += decode[21+(ofs)*7] * c; \
11591- y1 += decode[22+(ofs)*7] * c; \
11592- y2 += decode[23+(ofs)*7] * c; \
11593- y3 += decode[24+(ofs)*7] * c; \
11594- y4 += decode[25+(ofs)*7] * c; \
11595- y5 += decode[26+(ofs)*7] * c; \
11596- y6 += decode[27+(ofs)*7] * c;
11597-
11598-#define stbir__1_coeff_remnant( ofs ) \
11599- STBIR_SIMD_NO_UNROLL(decode); \
11600- c = hc[0+(ofs)]; \
11601- x0 += decode[0+(ofs)*7] * c; \
11602- x1 += decode[1+(ofs)*7] * c; \
11603- x2 += decode[2+(ofs)*7] * c; \
11604- x3 += decode[3+(ofs)*7] * c; \
11605- x4 += decode[4+(ofs)*7] * c; \
11606- x5 += decode[5+(ofs)*7] * c; \
11607- x6 += decode[6+(ofs)*7] * c; \
11608-
11609-#define stbir__2_coeff_remnant( ofs ) \
11610- STBIR_SIMD_NO_UNROLL(decode); \
11611- c = hc[0+(ofs)]; \
11612- x0 += decode[0+(ofs)*7] * c; \
11613- x1 += decode[1+(ofs)*7] * c; \
11614- x2 += decode[2+(ofs)*7] * c; \
11615- x3 += decode[3+(ofs)*7] * c; \
11616- x4 += decode[4+(ofs)*7] * c; \
11617- x5 += decode[5+(ofs)*7] * c; \
11618- x6 += decode[6+(ofs)*7] * c; \
11619- c = hc[1+(ofs)]; \
11620- y0 += decode[7+(ofs)*7] * c; \
11621- y1 += decode[8+(ofs)*7] * c; \
11622- y2 += decode[9+(ofs)*7] * c; \
11623- y3 += decode[10+(ofs)*7] * c; \
11624- y4 += decode[11+(ofs)*7] * c; \
11625- y5 += decode[12+(ofs)*7] * c; \
11626- y6 += decode[13+(ofs)*7] * c; \
11627-
11628-#define stbir__3_coeff_remnant( ofs ) \
11629- STBIR_SIMD_NO_UNROLL(decode); \
11630- c = hc[0+(ofs)]; \
11631- x0 += decode[0+(ofs)*7] * c; \
11632- x1 += decode[1+(ofs)*7] * c; \
11633- x2 += decode[2+(ofs)*7] * c; \
11634- x3 += decode[3+(ofs)*7] * c; \
11635- x4 += decode[4+(ofs)*7] * c; \
11636- x5 += decode[5+(ofs)*7] * c; \
11637- x6 += decode[6+(ofs)*7] * c; \
11638- c = hc[1+(ofs)]; \
11639- y0 += decode[7+(ofs)*7] * c; \
11640- y1 += decode[8+(ofs)*7] * c; \
11641- y2 += decode[9+(ofs)*7] * c; \
11642- y3 += decode[10+(ofs)*7] * c; \
11643- y4 += decode[11+(ofs)*7] * c; \
11644- y5 += decode[12+(ofs)*7] * c; \
11645- y6 += decode[13+(ofs)*7] * c; \
11646- c = hc[2+(ofs)]; \
11647- x0 += decode[14+(ofs)*7] * c; \
11648- x1 += decode[15+(ofs)*7] * c; \
11649- x2 += decode[16+(ofs)*7] * c; \
11650- x3 += decode[17+(ofs)*7] * c; \
11651- x4 += decode[18+(ofs)*7] * c; \
11652- x5 += decode[19+(ofs)*7] * c; \
11653- x6 += decode[20+(ofs)*7] * c; \
11654-
11655-#define stbir__store_output() \
11656- output[0] = x0 + y0; \
11657- output[1] = x1 + y1; \
11658- output[2] = x2 + y2; \
11659- output[3] = x3 + y3; \
11660- output[4] = x4 + y4; \
11661- output[5] = x5 + y5; \
11662- output[6] = x6 + y6; \
11663- horizontal_coefficients += coefficient_width; \
11664- ++horizontal_contributors; \
11665- output += 7;
11666+#define stbir__1_coeff_only() \
11667+ float tot0, tot1, tot2, tot3, tot4, tot5, tot6, c; \
11668+ c = hc[0]; \
11669+ tot0 = decode[0] * c; \
11670+ tot1 = decode[1] * c; \
11671+ tot2 = decode[2] * c; \
11672+ tot3 = decode[3] * c; \
11673+ tot4 = decode[4] * c; \
11674+ tot5 = decode[5] * c; \
11675+ tot6 = decode[6] * c;
11676+
11677+#define stbir__2_coeff_only() \
11678+ float tot0, tot1, tot2, tot3, tot4, tot5, tot6, c; \
11679+ c = hc[0]; \
11680+ tot0 = decode[0] * c; \
11681+ tot1 = decode[1] * c; \
11682+ tot2 = decode[2] * c; \
11683+ tot3 = decode[3] * c; \
11684+ tot4 = decode[4] * c; \
11685+ tot5 = decode[5] * c; \
11686+ tot6 = decode[6] * c; \
11687+ c = hc[1]; \
11688+ tot0 += decode[7] * c; \
11689+ tot1 += decode[8] * c; \
11690+ tot2 += decode[9] * c; \
11691+ tot3 += decode[10] * c; \
11692+ tot4 += decode[11] * c; \
11693+ tot5 += decode[12] * c; \
11694+ tot6 += decode[13] * c;
11695+
11696+#define stbir__3_coeff_only() \
11697+ float tot0, tot1, tot2, tot3, tot4, tot5, tot6, c; \
11698+ c = hc[0]; \
11699+ tot0 = decode[0] * c; \
11700+ tot1 = decode[1] * c; \
11701+ tot2 = decode[2] * c; \
11702+ tot3 = decode[3] * c; \
11703+ tot4 = decode[4] * c; \
11704+ tot5 = decode[5] * c; \
11705+ tot6 = decode[6] * c; \
11706+ c = hc[1]; \
11707+ tot0 += decode[7] * c; \
11708+ tot1 += decode[8] * c; \
11709+ tot2 += decode[9] * c; \
11710+ tot3 += decode[10] * c; \
11711+ tot4 += decode[11] * c; \
11712+ tot5 += decode[12] * c; \
11713+ tot6 += decode[13] * c; \
11714+ c = hc[2]; \
11715+ tot0 += decode[14] * c; \
11716+ tot1 += decode[15] * c; \
11717+ tot2 += decode[16] * c; \
11718+ tot3 += decode[17] * c; \
11719+ tot4 += decode[18] * c; \
11720+ tot5 += decode[19] * c; \
11721+ tot6 += decode[20] * c;
11722+
11723+#define stbir__store_output_tiny() \
11724+ output[0] = tot0; \
11725+ output[1] = tot1; \
11726+ output[2] = tot2; \
11727+ output[3] = tot3; \
11728+ output[4] = tot4; \
11729+ output[5] = tot5; \
11730+ output[6] = tot6; \
11731+ horizontal_coefficients += coefficient_width; \
11732+ ++horizontal_contributors; \
11733+ output += 7;
11734+
11735+#define stbir__4_coeff_start() \
11736+ float x0, x1, x2, x3, x4, x5, x6, y0, y1, y2, y3, y4, y5, y6, c; \
11737+ STBIR_SIMD_NO_UNROLL(decode); \
11738+ c = hc[0]; \
11739+ x0 = decode[0] * c; \
11740+ x1 = decode[1] * c; \
11741+ x2 = decode[2] * c; \
11742+ x3 = decode[3] * c; \
11743+ x4 = decode[4] * c; \
11744+ x5 = decode[5] * c; \
11745+ x6 = decode[6] * c; \
11746+ c = hc[1]; \
11747+ y0 = decode[7] * c; \
11748+ y1 = decode[8] * c; \
11749+ y2 = decode[9] * c; \
11750+ y3 = decode[10] * c; \
11751+ y4 = decode[11] * c; \
11752+ y5 = decode[12] * c; \
11753+ y6 = decode[13] * c; \
11754+ c = hc[2]; \
11755+ x0 += decode[14] * c; \
11756+ x1 += decode[15] * c; \
11757+ x2 += decode[16] * c; \
11758+ x3 += decode[17] * c; \
11759+ x4 += decode[18] * c; \
11760+ x5 += decode[19] * c; \
11761+ x6 += decode[20] * c; \
11762+ c = hc[3]; \
11763+ y0 += decode[21] * c; \
11764+ y1 += decode[22] * c; \
11765+ y2 += decode[23] * c; \
11766+ y3 += decode[24] * c; \
11767+ y4 += decode[25] * c; \
11768+ y5 += decode[26] * c; \
11769+ y6 += decode[27] * c;
11770+
11771+#define stbir__4_coeff_continue_from_4(ofs) \
11772+ STBIR_SIMD_NO_UNROLL(decode); \
11773+ c = hc[0 + (ofs)]; \
11774+ x0 += decode[0 + (ofs) * 7] * c; \
11775+ x1 += decode[1 + (ofs) * 7] * c; \
11776+ x2 += decode[2 + (ofs) * 7] * c; \
11777+ x3 += decode[3 + (ofs) * 7] * c; \
11778+ x4 += decode[4 + (ofs) * 7] * c; \
11779+ x5 += decode[5 + (ofs) * 7] * c; \
11780+ x6 += decode[6 + (ofs) * 7] * c; \
11781+ c = hc[1 + (ofs)]; \
11782+ y0 += decode[7 + (ofs) * 7] * c; \
11783+ y1 += decode[8 + (ofs) * 7] * c; \
11784+ y2 += decode[9 + (ofs) * 7] * c; \
11785+ y3 += decode[10 + (ofs) * 7] * c; \
11786+ y4 += decode[11 + (ofs) * 7] * c; \
11787+ y5 += decode[12 + (ofs) * 7] * c; \
11788+ y6 += decode[13 + (ofs) * 7] * c; \
11789+ c = hc[2 + (ofs)]; \
11790+ x0 += decode[14 + (ofs) * 7] * c; \
11791+ x1 += decode[15 + (ofs) * 7] * c; \
11792+ x2 += decode[16 + (ofs) * 7] * c; \
11793+ x3 += decode[17 + (ofs) * 7] * c; \
11794+ x4 += decode[18 + (ofs) * 7] * c; \
11795+ x5 += decode[19 + (ofs) * 7] * c; \
11796+ x6 += decode[20 + (ofs) * 7] * c; \
11797+ c = hc[3 + (ofs)]; \
11798+ y0 += decode[21 + (ofs) * 7] * c; \
11799+ y1 += decode[22 + (ofs) * 7] * c; \
11800+ y2 += decode[23 + (ofs) * 7] * c; \
11801+ y3 += decode[24 + (ofs) * 7] * c; \
11802+ y4 += decode[25 + (ofs) * 7] * c; \
11803+ y5 += decode[26 + (ofs) * 7] * c; \
11804+ y6 += decode[27 + (ofs) * 7] * c;
11805+
11806+#define stbir__1_coeff_remnant(ofs) \
11807+ STBIR_SIMD_NO_UNROLL(decode); \
11808+ c = hc[0 + (ofs)]; \
11809+ x0 += decode[0 + (ofs) * 7] * c; \
11810+ x1 += decode[1 + (ofs) * 7] * c; \
11811+ x2 += decode[2 + (ofs) * 7] * c; \
11812+ x3 += decode[3 + (ofs) * 7] * c; \
11813+ x4 += decode[4 + (ofs) * 7] * c; \
11814+ x5 += decode[5 + (ofs) * 7] * c; \
11815+ x6 += decode[6 + (ofs) * 7] * c;
11816+
11817+#define stbir__2_coeff_remnant(ofs) \
11818+ STBIR_SIMD_NO_UNROLL(decode); \
11819+ c = hc[0 + (ofs)]; \
11820+ x0 += decode[0 + (ofs) * 7] * c; \
11821+ x1 += decode[1 + (ofs) * 7] * c; \
11822+ x2 += decode[2 + (ofs) * 7] * c; \
11823+ x3 += decode[3 + (ofs) * 7] * c; \
11824+ x4 += decode[4 + (ofs) * 7] * c; \
11825+ x5 += decode[5 + (ofs) * 7] * c; \
11826+ x6 += decode[6 + (ofs) * 7] * c; \
11827+ c = hc[1 + (ofs)]; \
11828+ y0 += decode[7 + (ofs) * 7] * c; \
11829+ y1 += decode[8 + (ofs) * 7] * c; \
11830+ y2 += decode[9 + (ofs) * 7] * c; \
11831+ y3 += decode[10 + (ofs) * 7] * c; \
11832+ y4 += decode[11 + (ofs) * 7] * c; \
11833+ y5 += decode[12 + (ofs) * 7] * c; \
11834+ y6 += decode[13 + (ofs) * 7] * c;
11835+
11836+#define stbir__3_coeff_remnant(ofs) \
11837+ STBIR_SIMD_NO_UNROLL(decode); \
11838+ c = hc[0 + (ofs)]; \
11839+ x0 += decode[0 + (ofs) * 7] * c; \
11840+ x1 += decode[1 + (ofs) * 7] * c; \
11841+ x2 += decode[2 + (ofs) * 7] * c; \
11842+ x3 += decode[3 + (ofs) * 7] * c; \
11843+ x4 += decode[4 + (ofs) * 7] * c; \
11844+ x5 += decode[5 + (ofs) * 7] * c; \
11845+ x6 += decode[6 + (ofs) * 7] * c; \
11846+ c = hc[1 + (ofs)]; \
11847+ y0 += decode[7 + (ofs) * 7] * c; \
11848+ y1 += decode[8 + (ofs) * 7] * c; \
11849+ y2 += decode[9 + (ofs) * 7] * c; \
11850+ y3 += decode[10 + (ofs) * 7] * c; \
11851+ y4 += decode[11 + (ofs) * 7] * c; \
11852+ y5 += decode[12 + (ofs) * 7] * c; \
11853+ y6 += decode[13 + (ofs) * 7] * c; \
11854+ c = hc[2 + (ofs)]; \
11855+ x0 += decode[14 + (ofs) * 7] * c; \
11856+ x1 += decode[15 + (ofs) * 7] * c; \
11857+ x2 += decode[16 + (ofs) * 7] * c; \
11858+ x3 += decode[17 + (ofs) * 7] * c; \
11859+ x4 += decode[18 + (ofs) * 7] * c; \
11860+ x5 += decode[19 + (ofs) * 7] * c; \
11861+ x6 += decode[20 + (ofs) * 7] * c;
11862+
11863+#define stbir__store_output() \
11864+ output[0] = x0 + y0; \
11865+ output[1] = x1 + y1; \
11866+ output[2] = x2 + y2; \
11867+ output[3] = x3 + y3; \
11868+ output[4] = x4 + y4; \
11869+ output[5] = x5 + y5; \
11870+ output[6] = x6 + y6; \
11871+ horizontal_coefficients += coefficient_width; \
11872+ ++horizontal_contributors; \
11873+ output += 7;
11874
11875 #endif
11876
11877@@ -6053,7 +6945,6 @@ static void stbir__decode_scanline(stbir__info const * stbir_info, int n, float
11878 #define STB_IMAGE_RESIZE_DO_HORIZONTALS
11879 #include STBIR__HEADER_FILENAME
11880
11881-
11882 // include all of the vertical resamplers (both scatter and gather versions)
11883
11884 #define STBIR__vertical_channels 1
11885@@ -6128,801 +7019,1081 @@ static void stbir__decode_scanline(stbir__info const * stbir_info, int n, float
11886 #define STB_IMAGE_RESIZE_VERTICAL_CONTINUE
11887 #include STBIR__HEADER_FILENAME
11888
11889-typedef void STBIR_VERTICAL_GATHERFUNC( float * output, float const * coeffs, float const ** inputs, float const * input0_end );
11890-
11891-static STBIR_VERTICAL_GATHERFUNC * stbir__vertical_gathers[ 8 ] =
11892-{
11893- stbir__vertical_gather_with_1_coeffs,stbir__vertical_gather_with_2_coeffs,stbir__vertical_gather_with_3_coeffs,stbir__vertical_gather_with_4_coeffs,stbir__vertical_gather_with_5_coeffs,stbir__vertical_gather_with_6_coeffs,stbir__vertical_gather_with_7_coeffs,stbir__vertical_gather_with_8_coeffs
11894-};
11895-
11896-static STBIR_VERTICAL_GATHERFUNC * stbir__vertical_gathers_continues[ 8 ] =
11897-{
11898- stbir__vertical_gather_with_1_coeffs_cont,stbir__vertical_gather_with_2_coeffs_cont,stbir__vertical_gather_with_3_coeffs_cont,stbir__vertical_gather_with_4_coeffs_cont,stbir__vertical_gather_with_5_coeffs_cont,stbir__vertical_gather_with_6_coeffs_cont,stbir__vertical_gather_with_7_coeffs_cont,stbir__vertical_gather_with_8_coeffs_cont
11899-};
11900-
11901-typedef void STBIR_VERTICAL_SCATTERFUNC( float ** outputs, float const * coeffs, float const * input, float const * input_end );
11902-
11903-static STBIR_VERTICAL_SCATTERFUNC * stbir__vertical_scatter_sets[ 8 ] =
11904-{
11905- stbir__vertical_scatter_with_1_coeffs,stbir__vertical_scatter_with_2_coeffs,stbir__vertical_scatter_with_3_coeffs,stbir__vertical_scatter_with_4_coeffs,stbir__vertical_scatter_with_5_coeffs,stbir__vertical_scatter_with_6_coeffs,stbir__vertical_scatter_with_7_coeffs,stbir__vertical_scatter_with_8_coeffs
11906-};
11907-
11908-static STBIR_VERTICAL_SCATTERFUNC * stbir__vertical_scatter_blends[ 8 ] =
11909-{
11910- stbir__vertical_scatter_with_1_coeffs_cont,stbir__vertical_scatter_with_2_coeffs_cont,stbir__vertical_scatter_with_3_coeffs_cont,stbir__vertical_scatter_with_4_coeffs_cont,stbir__vertical_scatter_with_5_coeffs_cont,stbir__vertical_scatter_with_6_coeffs_cont,stbir__vertical_scatter_with_7_coeffs_cont,stbir__vertical_scatter_with_8_coeffs_cont
11911-};
11912-
11913-
11914-static void stbir__encode_scanline( stbir__info const * stbir_info, void *output_buffer_data, float * encode_buffer, int row STBIR_ONLY_PROFILE_GET_SPLIT_INFO )
11915-{
11916- int num_pixels = stbir_info->horizontal.scale_info.output_sub_size;
11917- int channels = stbir_info->channels;
11918- int width_times_channels = num_pixels * channels;
11919- void * output_buffer;
11920-
11921- // un-alpha weight if we need to
11922- if ( stbir_info->alpha_unweight )
11923- {
11924- STBIR_PROFILE_START( unalpha );
11925- stbir_info->alpha_unweight( encode_buffer, width_times_channels );
11926- STBIR_PROFILE_END( unalpha );
11927- }
11928-
11929- // write directly into output by default
11930- output_buffer = output_buffer_data;
11931-
11932- // if we have an output callback, we first convert the decode buffer in place (and then hand that to the callback)
11933- if ( stbir_info->out_pixels_cb )
11934- output_buffer = encode_buffer;
11935-
11936- STBIR_PROFILE_START( encode );
11937- // convert into the output buffer
11938- stbir_info->encode_pixels( output_buffer, width_times_channels, encode_buffer );
11939- STBIR_PROFILE_END( encode );
11940-
11941- // if we have an output callback, call it to send the data
11942- if ( stbir_info->out_pixels_cb )
11943- stbir_info->out_pixels_cb( output_buffer, num_pixels, row, stbir_info->user_data );
11944+typedef void
11945+STBIR_VERTICAL_GATHERFUNC(float *output, float const *coeffs,
11946+ float const **inputs, float const *input0_end);
11947+
11948+static STBIR_VERTICAL_GATHERFUNC *stbir__vertical_gathers[8] = {
11949+ stbir__vertical_gather_with_1_coeffs, stbir__vertical_gather_with_2_coeffs,
11950+ stbir__vertical_gather_with_3_coeffs, stbir__vertical_gather_with_4_coeffs,
11951+ stbir__vertical_gather_with_5_coeffs, stbir__vertical_gather_with_6_coeffs,
11952+ stbir__vertical_gather_with_7_coeffs, stbir__vertical_gather_with_8_coeffs};
11953+
11954+static STBIR_VERTICAL_GATHERFUNC *stbir__vertical_gathers_continues[8] = {
11955+ stbir__vertical_gather_with_1_coeffs_cont,
11956+ stbir__vertical_gather_with_2_coeffs_cont,
11957+ stbir__vertical_gather_with_3_coeffs_cont,
11958+ stbir__vertical_gather_with_4_coeffs_cont,
11959+ stbir__vertical_gather_with_5_coeffs_cont,
11960+ stbir__vertical_gather_with_6_coeffs_cont,
11961+ stbir__vertical_gather_with_7_coeffs_cont,
11962+ stbir__vertical_gather_with_8_coeffs_cont};
11963+
11964+typedef void
11965+STBIR_VERTICAL_SCATTERFUNC(float **outputs, float const *coeffs,
11966+ float const *input, float const *input_end);
11967+
11968+static STBIR_VERTICAL_SCATTERFUNC *stbir__vertical_scatter_sets[8] = {
11969+ stbir__vertical_scatter_with_1_coeffs,
11970+ stbir__vertical_scatter_with_2_coeffs,
11971+ stbir__vertical_scatter_with_3_coeffs,
11972+ stbir__vertical_scatter_with_4_coeffs,
11973+ stbir__vertical_scatter_with_5_coeffs,
11974+ stbir__vertical_scatter_with_6_coeffs,
11975+ stbir__vertical_scatter_with_7_coeffs,
11976+ stbir__vertical_scatter_with_8_coeffs};
11977+
11978+static STBIR_VERTICAL_SCATTERFUNC *stbir__vertical_scatter_blends[8] = {
11979+ stbir__vertical_scatter_with_1_coeffs_cont,
11980+ stbir__vertical_scatter_with_2_coeffs_cont,
11981+ stbir__vertical_scatter_with_3_coeffs_cont,
11982+ stbir__vertical_scatter_with_4_coeffs_cont,
11983+ stbir__vertical_scatter_with_5_coeffs_cont,
11984+ stbir__vertical_scatter_with_6_coeffs_cont,
11985+ stbir__vertical_scatter_with_7_coeffs_cont,
11986+ stbir__vertical_scatter_with_8_coeffs_cont};
11987+
11988+static void
11989+stbir__encode_scanline(stbir__info const *stbir_info, void *output_buffer_data,
11990+ float *encode_buffer,
11991+ int row STBIR_ONLY_PROFILE_GET_SPLIT_INFO)
11992+{
11993+ int num_pixels = stbir_info->horizontal.scale_info.output_sub_size;
11994+ int channels = stbir_info->channels;
11995+ int width_times_channels = num_pixels * channels;
11996+ void *output_buffer;
11997+
11998+ // un-alpha weight if we need to
11999+ if (stbir_info->alpha_unweight) {
12000+ STBIR_PROFILE_START(unalpha);
12001+ stbir_info->alpha_unweight(encode_buffer, width_times_channels);
12002+ STBIR_PROFILE_END(unalpha);
12003+ }
12004+
12005+ // write directly into output by default
12006+ output_buffer = output_buffer_data;
12007+
12008+ // if we have an output callback, we first convert the decode buffer in
12009+ // place (and then hand that to the callback)
12010+ if (stbir_info->out_pixels_cb) {
12011+ output_buffer = encode_buffer;
12012+ }
12013+
12014+ STBIR_PROFILE_START(encode);
12015+ // convert into the output buffer
12016+ stbir_info->encode_pixels(output_buffer, width_times_channels,
12017+ encode_buffer);
12018+ STBIR_PROFILE_END(encode);
12019+
12020+ // if we have an output callback, call it to send the data
12021+ if (stbir_info->out_pixels_cb) {
12022+ stbir_info->out_pixels_cb(output_buffer, num_pixels, row,
12023+ stbir_info->user_data);
12024+ }
12025 }
12026
12027-
12028 // Get the ring buffer pointer for an index
12029-static float* stbir__get_ring_buffer_entry(stbir__info const * stbir_info, stbir__per_split_info const * split_info, int index )
12030+static float *
12031+stbir__get_ring_buffer_entry(stbir__info const *stbir_info,
12032+ stbir__per_split_info const *split_info, int index)
12033 {
12034- STBIR_ASSERT( index < stbir_info->ring_buffer_num_entries );
12035+ STBIR_ASSERT(index < stbir_info->ring_buffer_num_entries);
12036
12037- #ifdef STBIR__SEPARATE_ALLOCATIONS
12038- return split_info->ring_buffers[ index ];
12039- #else
12040- return (float*) ( ( (char*) split_info->ring_buffer ) + ( index * stbir_info->ring_buffer_length_bytes ) );
12041- #endif
12042+#ifdef STBIR__SEPARATE_ALLOCATIONS
12043+ return split_info->ring_buffers[index];
12044+#else
12045+ return (float *)(((char *)split_info->ring_buffer) +
12046+ (index * stbir_info->ring_buffer_length_bytes));
12047+#endif
12048 }
12049
12050 // Get the specified scan line from the ring buffer
12051-static float* stbir__get_ring_buffer_scanline(stbir__info const * stbir_info, stbir__per_split_info const * split_info, int get_scanline)
12052-{
12053- int ring_buffer_index = (split_info->ring_buffer_begin_index + (get_scanline - split_info->ring_buffer_first_scanline)) % stbir_info->ring_buffer_num_entries;
12054- return stbir__get_ring_buffer_entry( stbir_info, split_info, ring_buffer_index );
12055-}
12056-
12057-static void stbir__resample_horizontal_gather(stbir__info const * stbir_info, float* output_buffer, float const * input_buffer STBIR_ONLY_PROFILE_GET_SPLIT_INFO )
12058-{
12059- float const * decode_buffer = input_buffer - ( stbir_info->scanline_extents.conservative.n0 * stbir_info->effective_channels );
12060-
12061- STBIR_PROFILE_START( horizontal );
12062- if ( ( stbir_info->horizontal.filter_enum == STBIR_FILTER_POINT_SAMPLE ) && ( stbir_info->horizontal.scale_info.scale == 1.0f ) )
12063- STBIR_MEMCPY( output_buffer, input_buffer, stbir_info->horizontal.scale_info.output_sub_size * sizeof( float ) * stbir_info->effective_channels );
12064- else
12065- stbir_info->horizontal_gather_channels( output_buffer, stbir_info->horizontal.scale_info.output_sub_size, decode_buffer, stbir_info->horizontal.contributors, stbir_info->horizontal.coefficients, stbir_info->horizontal.coefficient_width );
12066- STBIR_PROFILE_END( horizontal );
12067-}
12068-
12069-static void stbir__resample_vertical_gather(stbir__info const * stbir_info, stbir__per_split_info* split_info, int n, int contrib_n0, int contrib_n1, float const * vertical_coefficients )
12070-{
12071- float* encode_buffer = split_info->vertical_buffer;
12072- float* decode_buffer = split_info->decode_buffer;
12073- int vertical_first = stbir_info->vertical_first;
12074- int width = (vertical_first) ? ( stbir_info->scanline_extents.conservative.n1-stbir_info->scanline_extents.conservative.n0+1 ) : stbir_info->horizontal.scale_info.output_sub_size;
12075- int width_times_channels = stbir_info->effective_channels * width;
12076-
12077- STBIR_ASSERT( stbir_info->vertical.is_gather );
12078-
12079- // loop over the contributing scanlines and scale into the buffer
12080- STBIR_PROFILE_START( vertical );
12081- {
12082- int k = 0, total = contrib_n1 - contrib_n0 + 1;
12083- STBIR_ASSERT( total > 0 );
12084- do {
12085- float const * inputs[8];
12086- int i, cnt = total; if ( cnt > 8 ) cnt = 8;
12087- for( i = 0 ; i < cnt ; i++ )
12088- inputs[ i ] = stbir__get_ring_buffer_scanline(stbir_info, split_info, k+i+contrib_n0 );
12089-
12090- // call the N scanlines at a time function (up to 8 scanlines of blending at once)
12091- ((k==0)?stbir__vertical_gathers:stbir__vertical_gathers_continues)[cnt-1]( (vertical_first) ? decode_buffer : encode_buffer, vertical_coefficients + k, inputs, inputs[0] + width_times_channels );
12092- k += cnt;
12093- total -= cnt;
12094- } while ( total );
12095- }
12096- STBIR_PROFILE_END( vertical );
12097-
12098- if ( vertical_first )
12099- {
12100- // Now resample the gathered vertical data in the horizontal axis into the encode buffer
12101- decode_buffer[ width_times_channels ] = 0.0f; // clear two over for horizontals with a remnant of 3
12102- decode_buffer[ width_times_channels+1 ] = 0.0f;
12103- stbir__resample_horizontal_gather(stbir_info, encode_buffer, decode_buffer STBIR_ONLY_PROFILE_SET_SPLIT_INFO );
12104- }
12105-
12106- stbir__encode_scanline( stbir_info, ( (char *) stbir_info->output_data ) + ((size_t)n * (size_t)stbir_info->output_stride_bytes),
12107- encode_buffer, n STBIR_ONLY_PROFILE_SET_SPLIT_INFO );
12108-}
12109-
12110-static void stbir__decode_and_resample_for_vertical_gather_loop(stbir__info const * stbir_info, stbir__per_split_info* split_info, int n)
12111-{
12112- int ring_buffer_index;
12113- float* ring_buffer;
12114-
12115- // Decode the nth scanline from the source image into the decode buffer.
12116- stbir__decode_scanline( stbir_info, n, split_info->decode_buffer STBIR_ONLY_PROFILE_SET_SPLIT_INFO );
12117-
12118- // update new end scanline
12119- split_info->ring_buffer_last_scanline = n;
12120-
12121- // get ring buffer
12122- ring_buffer_index = (split_info->ring_buffer_begin_index + (split_info->ring_buffer_last_scanline - split_info->ring_buffer_first_scanline)) % stbir_info->ring_buffer_num_entries;
12123- ring_buffer = stbir__get_ring_buffer_entry(stbir_info, split_info, ring_buffer_index);
12124-
12125- // Now resample it into the ring buffer.
12126- stbir__resample_horizontal_gather( stbir_info, ring_buffer, split_info->decode_buffer STBIR_ONLY_PROFILE_SET_SPLIT_INFO );
12127-
12128- // Now it's sitting in the ring buffer ready to be used as source for the vertical sampling.
12129-}
12130-
12131-static void stbir__vertical_gather_loop( stbir__info const * stbir_info, stbir__per_split_info* split_info, int split_count )
12132-{
12133- int y, start_output_y, end_output_y;
12134- stbir__contributors* vertical_contributors = stbir_info->vertical.contributors;
12135- float const * vertical_coefficients = stbir_info->vertical.coefficients;
12136-
12137- STBIR_ASSERT( stbir_info->vertical.is_gather );
12138-
12139- start_output_y = split_info->start_output_y;
12140- end_output_y = split_info[split_count-1].end_output_y;
12141-
12142- vertical_contributors += start_output_y;
12143- vertical_coefficients += start_output_y * stbir_info->vertical.coefficient_width;
12144-
12145- // initialize the ring buffer for gathering
12146- split_info->ring_buffer_begin_index = 0;
12147- split_info->ring_buffer_first_scanline = vertical_contributors->n0;
12148- split_info->ring_buffer_last_scanline = split_info->ring_buffer_first_scanline - 1; // means "empty"
12149-
12150- for (y = start_output_y; y < end_output_y; y++)
12151- {
12152- int in_first_scanline, in_last_scanline;
12153-
12154- in_first_scanline = vertical_contributors->n0;
12155- in_last_scanline = vertical_contributors->n1;
12156-
12157- // make sure the indexing hasn't broken
12158- STBIR_ASSERT( in_first_scanline >= split_info->ring_buffer_first_scanline );
12159-
12160- // Load in new scanlines
12161- while (in_last_scanline > split_info->ring_buffer_last_scanline)
12162- {
12163- STBIR_ASSERT( ( split_info->ring_buffer_last_scanline - split_info->ring_buffer_first_scanline + 1 ) <= stbir_info->ring_buffer_num_entries );
12164-
12165- // make sure there was room in the ring buffer when we add new scanlines
12166- if ( ( split_info->ring_buffer_last_scanline - split_info->ring_buffer_first_scanline + 1 ) == stbir_info->ring_buffer_num_entries )
12167- {
12168- split_info->ring_buffer_first_scanline++;
12169- split_info->ring_buffer_begin_index++;
12170- }
12171-
12172- if ( stbir_info->vertical_first )
12173- {
12174- float * ring_buffer = stbir__get_ring_buffer_scanline( stbir_info, split_info, ++split_info->ring_buffer_last_scanline );
12175- // Decode the nth scanline from the source image into the decode buffer.
12176- stbir__decode_scanline( stbir_info, split_info->ring_buffer_last_scanline, ring_buffer STBIR_ONLY_PROFILE_SET_SPLIT_INFO );
12177- }
12178- else
12179- {
12180- stbir__decode_and_resample_for_vertical_gather_loop(stbir_info, split_info, split_info->ring_buffer_last_scanline + 1);
12181- }
12182- }
12183-
12184- // Now all buffers should be ready to write a row of vertical sampling, so do it.
12185- stbir__resample_vertical_gather(stbir_info, split_info, y, in_first_scanline, in_last_scanline, vertical_coefficients );
12186-
12187- ++vertical_contributors;
12188- vertical_coefficients += stbir_info->vertical.coefficient_width;
12189- }
12190+static float *
12191+stbir__get_ring_buffer_scanline(stbir__info const *stbir_info,
12192+ stbir__per_split_info const *split_info,
12193+ int get_scanline)
12194+{
12195+ int ring_buffer_index =
12196+ (split_info->ring_buffer_begin_index +
12197+ (get_scanline - split_info->ring_buffer_first_scanline)) %
12198+ stbir_info->ring_buffer_num_entries;
12199+ return stbir__get_ring_buffer_entry(stbir_info, split_info,
12200+ ring_buffer_index);
12201+}
12202+
12203+static void
12204+stbir__resample_horizontal_gather(
12205+ stbir__info const *stbir_info, float *output_buffer,
12206+ float const *input_buffer STBIR_ONLY_PROFILE_GET_SPLIT_INFO)
12207+{
12208+ float const *decode_buffer =
12209+ input_buffer - (stbir_info->scanline_extents.conservative.n0 *
12210+ stbir_info->effective_channels);
12211+
12212+ STBIR_PROFILE_START(horizontal);
12213+ if ((stbir_info->horizontal.filter_enum == STBIR_FILTER_POINT_SAMPLE) &&
12214+ (stbir_info->horizontal.scale_info.scale == 1.0f)) {
12215+ STBIR_MEMCPY(output_buffer, input_buffer,
12216+ stbir_info->horizontal.scale_info.output_sub_size *
12217+ sizeof(float) * stbir_info->effective_channels);
12218+ } else {
12219+ stbir_info->horizontal_gather_channels(
12220+ output_buffer, stbir_info->horizontal.scale_info.output_sub_size,
12221+ decode_buffer, stbir_info->horizontal.contributors,
12222+ stbir_info->horizontal.coefficients,
12223+ stbir_info->horizontal.coefficient_width);
12224+ }
12225+ STBIR_PROFILE_END(horizontal);
12226+}
12227+
12228+static void
12229+stbir__resample_vertical_gather(stbir__info const *stbir_info,
12230+ stbir__per_split_info *split_info, int n,
12231+ int contrib_n0, int contrib_n1,
12232+ float const *vertical_coefficients)
12233+{
12234+ float *encode_buffer = split_info->vertical_buffer;
12235+ float *decode_buffer = split_info->decode_buffer;
12236+ int vertical_first = stbir_info->vertical_first;
12237+ int width = (vertical_first)
12238+ ? (stbir_info->scanline_extents.conservative.n1 -
12239+ stbir_info->scanline_extents.conservative.n0 + 1)
12240+ : stbir_info->horizontal.scale_info.output_sub_size;
12241+ int width_times_channels = stbir_info->effective_channels * width;
12242+
12243+ STBIR_ASSERT(stbir_info->vertical.is_gather);
12244+
12245+ // loop over the contributing scanlines and scale into the buffer
12246+ STBIR_PROFILE_START(vertical);
12247+ {
12248+ int k = 0, total = contrib_n1 - contrib_n0 + 1;
12249+ STBIR_ASSERT(total > 0);
12250+ do {
12251+ float const *inputs[8];
12252+ int i, cnt = total;
12253+ if (cnt > 8) {
12254+ cnt = 8;
12255+ }
12256+ for (i = 0; i < cnt; i++) {
12257+ inputs[i] = stbir__get_ring_buffer_scanline(
12258+ stbir_info, split_info, k + i + contrib_n0);
12259+ }
12260+
12261+ // call the N scanlines at a time function (up to 8 scanlines of
12262+ // blending at once)
12263+ ((k == 0) ? stbir__vertical_gathers
12264+ : stbir__vertical_gathers_continues)[cnt - 1](
12265+ (vertical_first) ? decode_buffer : encode_buffer,
12266+ vertical_coefficients + k, inputs,
12267+ inputs[0] + width_times_channels);
12268+ k += cnt;
12269+ total -= cnt;
12270+ } while (total);
12271+ }
12272+ STBIR_PROFILE_END(vertical);
12273+
12274+ if (vertical_first) {
12275+ // Now resample the gathered vertical data in the horizontal axis into
12276+ // the encode buffer
12277+ decode_buffer[width_times_channels] =
12278+ 0.0f; // clear two over for horizontals with a remnant of 3
12279+ decode_buffer[width_times_channels + 1] = 0.0f;
12280+ stbir__resample_horizontal_gather(
12281+ stbir_info, encode_buffer,
12282+ decode_buffer STBIR_ONLY_PROFILE_SET_SPLIT_INFO);
12283+ }
12284+
12285+ stbir__encode_scanline(
12286+ stbir_info,
12287+ ((char *)stbir_info->output_data) +
12288+ ((size_t)n * (size_t)stbir_info->output_stride_bytes),
12289+ encode_buffer, n STBIR_ONLY_PROFILE_SET_SPLIT_INFO);
12290+}
12291+
12292+static void
12293+stbir__decode_and_resample_for_vertical_gather_loop(
12294+ stbir__info const *stbir_info, stbir__per_split_info *split_info, int n)
12295+{
12296+ int ring_buffer_index;
12297+ float *ring_buffer;
12298+
12299+ // Decode the nth scanline from the source image into the decode buffer.
12300+ stbir__decode_scanline(
12301+ stbir_info, n,
12302+ split_info->decode_buffer STBIR_ONLY_PROFILE_SET_SPLIT_INFO);
12303+
12304+ // update new end scanline
12305+ split_info->ring_buffer_last_scanline = n;
12306+
12307+ // get ring buffer
12308+ ring_buffer_index = (split_info->ring_buffer_begin_index +
12309+ (split_info->ring_buffer_last_scanline -
12310+ split_info->ring_buffer_first_scanline)) %
12311+ stbir_info->ring_buffer_num_entries;
12312+ ring_buffer =
12313+ stbir__get_ring_buffer_entry(stbir_info, split_info, ring_buffer_index);
12314+
12315+ // Now resample it into the ring buffer.
12316+ stbir__resample_horizontal_gather(
12317+ stbir_info, ring_buffer,
12318+ split_info->decode_buffer STBIR_ONLY_PROFILE_SET_SPLIT_INFO);
12319+
12320+ // Now it's sitting in the ring buffer ready to be used as source for the
12321+ // vertical sampling.
12322+}
12323+
12324+static void
12325+stbir__vertical_gather_loop(stbir__info const *stbir_info,
12326+ stbir__per_split_info *split_info, int split_count)
12327+{
12328+ int y, start_output_y, end_output_y;
12329+ stbir__contributors *vertical_contributors =
12330+ stbir_info->vertical.contributors;
12331+ float const *vertical_coefficients = stbir_info->vertical.coefficients;
12332+
12333+ STBIR_ASSERT(stbir_info->vertical.is_gather);
12334+
12335+ start_output_y = split_info->start_output_y;
12336+ end_output_y = split_info[split_count - 1].end_output_y;
12337+
12338+ vertical_contributors += start_output_y;
12339+ vertical_coefficients +=
12340+ start_output_y * stbir_info->vertical.coefficient_width;
12341+
12342+ // initialize the ring buffer for gathering
12343+ split_info->ring_buffer_begin_index = 0;
12344+ split_info->ring_buffer_first_scanline = vertical_contributors->n0;
12345+ split_info->ring_buffer_last_scanline =
12346+ split_info->ring_buffer_first_scanline - 1; // means "empty"
12347+
12348+ for (y = start_output_y; y < end_output_y; y++) {
12349+ int in_first_scanline, in_last_scanline;
12350+
12351+ in_first_scanline = vertical_contributors->n0;
12352+ in_last_scanline = vertical_contributors->n1;
12353+
12354+ // make sure the indexing hasn't broken
12355+ STBIR_ASSERT(in_first_scanline >=
12356+ split_info->ring_buffer_first_scanline);
12357+
12358+ // Load in new scanlines
12359+ while (in_last_scanline > split_info->ring_buffer_last_scanline) {
12360+ STBIR_ASSERT((split_info->ring_buffer_last_scanline -
12361+ split_info->ring_buffer_first_scanline + 1) <=
12362+ stbir_info->ring_buffer_num_entries);
12363+
12364+ // make sure there was room in the ring buffer when we add new
12365+ // scanlines
12366+ if ((split_info->ring_buffer_last_scanline -
12367+ split_info->ring_buffer_first_scanline + 1) ==
12368+ stbir_info->ring_buffer_num_entries) {
12369+ split_info->ring_buffer_first_scanline++;
12370+ split_info->ring_buffer_begin_index++;
12371+ }
12372+
12373+ if (stbir_info->vertical_first) {
12374+ float *ring_buffer = stbir__get_ring_buffer_scanline(
12375+ stbir_info, split_info,
12376+ ++split_info->ring_buffer_last_scanline);
12377+ // Decode the nth scanline from the source image into the decode
12378+ // buffer.
12379+ stbir__decode_scanline(
12380+ stbir_info, split_info->ring_buffer_last_scanline,
12381+ ring_buffer STBIR_ONLY_PROFILE_SET_SPLIT_INFO);
12382+ } else {
12383+ stbir__decode_and_resample_for_vertical_gather_loop(
12384+ stbir_info, split_info,
12385+ split_info->ring_buffer_last_scanline + 1);
12386+ }
12387+ }
12388+
12389+ // Now all buffers should be ready to write a row of vertical sampling,
12390+ // so do it.
12391+ stbir__resample_vertical_gather(stbir_info, split_info, y,
12392+ in_first_scanline, in_last_scanline,
12393+ vertical_coefficients);
12394+
12395+ ++vertical_contributors;
12396+ vertical_coefficients += stbir_info->vertical.coefficient_width;
12397+ }
12398 }
12399
12400 #define STBIR__FLOAT_EMPTY_MARKER 3.0e+38F
12401-#define STBIR__FLOAT_BUFFER_IS_EMPTY(ptr) ((ptr)[0]==STBIR__FLOAT_EMPTY_MARKER)
12402-
12403-static void stbir__encode_first_scanline_from_scatter(stbir__info const * stbir_info, stbir__per_split_info* split_info)
12404-{
12405- // evict a scanline out into the output buffer
12406- float* ring_buffer_entry = stbir__get_ring_buffer_entry(stbir_info, split_info, split_info->ring_buffer_begin_index );
12407-
12408- // dump the scanline out
12409- stbir__encode_scanline( stbir_info, ( (char *)stbir_info->output_data ) + ( (size_t)split_info->ring_buffer_first_scanline * (size_t)stbir_info->output_stride_bytes ), ring_buffer_entry, split_info->ring_buffer_first_scanline STBIR_ONLY_PROFILE_SET_SPLIT_INFO );
12410-
12411- // mark it as empty
12412- ring_buffer_entry[ 0 ] = STBIR__FLOAT_EMPTY_MARKER;
12413-
12414- // advance the first scanline
12415- split_info->ring_buffer_first_scanline++;
12416- if ( ++split_info->ring_buffer_begin_index == stbir_info->ring_buffer_num_entries )
12417- split_info->ring_buffer_begin_index = 0;
12418-}
12419-
12420-static void stbir__horizontal_resample_and_encode_first_scanline_from_scatter(stbir__info const * stbir_info, stbir__per_split_info* split_info)
12421-{
12422- // evict a scanline out into the output buffer
12423-
12424- float* ring_buffer_entry = stbir__get_ring_buffer_entry(stbir_info, split_info, split_info->ring_buffer_begin_index );
12425-
12426- // Now resample it into the buffer.
12427- stbir__resample_horizontal_gather( stbir_info, split_info->vertical_buffer, ring_buffer_entry STBIR_ONLY_PROFILE_SET_SPLIT_INFO );
12428-
12429- // dump the scanline out
12430- stbir__encode_scanline( stbir_info, ( (char *)stbir_info->output_data ) + ( (size_t)split_info->ring_buffer_first_scanline * (size_t)stbir_info->output_stride_bytes ), split_info->vertical_buffer, split_info->ring_buffer_first_scanline STBIR_ONLY_PROFILE_SET_SPLIT_INFO );
12431-
12432- // mark it as empty
12433- ring_buffer_entry[ 0 ] = STBIR__FLOAT_EMPTY_MARKER;
12434-
12435- // advance the first scanline
12436- split_info->ring_buffer_first_scanline++;
12437- if ( ++split_info->ring_buffer_begin_index == stbir_info->ring_buffer_num_entries )
12438- split_info->ring_buffer_begin_index = 0;
12439-}
12440-
12441-static void stbir__resample_vertical_scatter(stbir__info const * stbir_info, stbir__per_split_info* split_info, int n0, int n1, float const * vertical_coefficients, float const * vertical_buffer, float const * vertical_buffer_end )
12442-{
12443- STBIR_ASSERT( !stbir_info->vertical.is_gather );
12444-
12445- STBIR_PROFILE_START( vertical );
12446- {
12447- int k = 0, total = n1 - n0 + 1;
12448- STBIR_ASSERT( total > 0 );
12449- do {
12450- float * outputs[8];
12451- int i, n = total; if ( n > 8 ) n = 8;
12452- for( i = 0 ; i < n ; i++ )
12453- {
12454- outputs[ i ] = stbir__get_ring_buffer_scanline(stbir_info, split_info, k+i+n0 );
12455- if ( ( i ) && ( STBIR__FLOAT_BUFFER_IS_EMPTY( outputs[i] ) != STBIR__FLOAT_BUFFER_IS_EMPTY( outputs[0] ) ) ) // make sure runs are of the same type
12456- {
12457- n = i;
12458- break;
12459- }
12460- }
12461- // call the scatter to N scanlines at a time function (up to 8 scanlines of scattering at once)
12462- ((STBIR__FLOAT_BUFFER_IS_EMPTY( outputs[0] ))?stbir__vertical_scatter_sets:stbir__vertical_scatter_blends)[n-1]( outputs, vertical_coefficients + k, vertical_buffer, vertical_buffer_end );
12463- k += n;
12464- total -= n;
12465- } while ( total );
12466- }
12467-
12468- STBIR_PROFILE_END( vertical );
12469-}
12470-
12471-typedef void stbir__handle_scanline_for_scatter_func(stbir__info const * stbir_info, stbir__per_split_info* split_info);
12472-
12473-static void stbir__vertical_scatter_loop( stbir__info const * stbir_info, stbir__per_split_info* split_info, int split_count )
12474-{
12475- int y, start_output_y, end_output_y, start_input_y, end_input_y;
12476- stbir__contributors* vertical_contributors = stbir_info->vertical.contributors;
12477- float const * vertical_coefficients = stbir_info->vertical.coefficients;
12478- stbir__handle_scanline_for_scatter_func * handle_scanline_for_scatter;
12479- void * scanline_scatter_buffer;
12480- void * scanline_scatter_buffer_end;
12481- int on_first_input_y, last_input_y;
12482- int width = (stbir_info->vertical_first) ? ( stbir_info->scanline_extents.conservative.n1-stbir_info->scanline_extents.conservative.n0+1 ) : stbir_info->horizontal.scale_info.output_sub_size;
12483- int width_times_channels = stbir_info->effective_channels * width;
12484-
12485- STBIR_ASSERT( !stbir_info->vertical.is_gather );
12486-
12487- start_output_y = split_info->start_output_y;
12488- end_output_y = split_info[split_count-1].end_output_y; // may do multiple split counts
12489-
12490- start_input_y = split_info->start_input_y;
12491- end_input_y = split_info[split_count-1].end_input_y;
12492-
12493- // adjust for starting offset start_input_y
12494- y = start_input_y + stbir_info->vertical.filter_pixel_margin;
12495- vertical_contributors += y ;
12496- vertical_coefficients += stbir_info->vertical.coefficient_width * y;
12497-
12498- if ( stbir_info->vertical_first )
12499- {
12500- handle_scanline_for_scatter = stbir__horizontal_resample_and_encode_first_scanline_from_scatter;
12501- scanline_scatter_buffer = split_info->decode_buffer;
12502- scanline_scatter_buffer_end = ( (char*) scanline_scatter_buffer ) + sizeof( float ) * stbir_info->effective_channels * (stbir_info->scanline_extents.conservative.n1-stbir_info->scanline_extents.conservative.n0+1);
12503- }
12504- else
12505- {
12506- handle_scanline_for_scatter = stbir__encode_first_scanline_from_scatter;
12507- scanline_scatter_buffer = split_info->vertical_buffer;
12508- scanline_scatter_buffer_end = ( (char*) scanline_scatter_buffer ) + sizeof( float ) * stbir_info->effective_channels * stbir_info->horizontal.scale_info.output_sub_size;
12509- }
12510-
12511- // initialize the ring buffer for scattering
12512- split_info->ring_buffer_first_scanline = start_output_y;
12513- split_info->ring_buffer_last_scanline = -1;
12514- split_info->ring_buffer_begin_index = -1;
12515-
12516- // mark all the buffers as empty to start
12517- for( y = 0 ; y < stbir_info->ring_buffer_num_entries ; y++ )
12518- {
12519- float * decode_buffer = stbir__get_ring_buffer_entry( stbir_info, split_info, y );
12520- decode_buffer[ width_times_channels ] = 0.0f; // clear two over for horizontals with a remnant of 3
12521- decode_buffer[ width_times_channels+1 ] = 0.0f;
12522- decode_buffer[0] = STBIR__FLOAT_EMPTY_MARKER; // only used on scatter
12523- }
12524-
12525- // do the loop in input space
12526- on_first_input_y = 1; last_input_y = start_input_y;
12527- for (y = start_input_y ; y < end_input_y; y++)
12528- {
12529- int out_first_scanline, out_last_scanline;
12530-
12531- out_first_scanline = vertical_contributors->n0;
12532- out_last_scanline = vertical_contributors->n1;
12533-
12534- STBIR_ASSERT(out_last_scanline - out_first_scanline + 1 <= stbir_info->ring_buffer_num_entries);
12535-
12536- if ( ( out_last_scanline >= out_first_scanline ) && ( ( ( out_first_scanline >= start_output_y ) && ( out_first_scanline < end_output_y ) ) || ( ( out_last_scanline >= start_output_y ) && ( out_last_scanline < end_output_y ) ) ) )
12537- {
12538- float const * vc = vertical_coefficients;
12539-
12540- // keep track of the range actually seen for the next resize
12541- last_input_y = y;
12542- if ( ( on_first_input_y ) && ( y > start_input_y ) )
12543- split_info->start_input_y = y;
12544- on_first_input_y = 0;
12545-
12546- // clip the region
12547- if ( out_first_scanline < start_output_y )
12548- {
12549- vc += start_output_y - out_first_scanline;
12550- out_first_scanline = start_output_y;
12551- }
12552-
12553- if ( out_last_scanline >= end_output_y )
12554- out_last_scanline = end_output_y - 1;
12555-
12556- // if very first scanline, init the index
12557- if (split_info->ring_buffer_begin_index < 0)
12558- split_info->ring_buffer_begin_index = out_first_scanline - start_output_y;
12559-
12560- STBIR_ASSERT( split_info->ring_buffer_begin_index <= out_first_scanline );
12561-
12562- // Decode the nth scanline from the source image into the decode buffer.
12563- stbir__decode_scanline( stbir_info, y, split_info->decode_buffer STBIR_ONLY_PROFILE_SET_SPLIT_INFO );
12564-
12565- // When horizontal first, we resample horizontally into the vertical buffer before we scatter it out
12566- if ( !stbir_info->vertical_first )
12567- stbir__resample_horizontal_gather( stbir_info, split_info->vertical_buffer, split_info->decode_buffer STBIR_ONLY_PROFILE_SET_SPLIT_INFO );
12568-
12569- // Now it's sitting in the buffer ready to be distributed into the ring buffers.
12570-
12571- // evict from the ringbuffer, if we need are full
12572- if ( ( ( split_info->ring_buffer_last_scanline - split_info->ring_buffer_first_scanline + 1 ) == stbir_info->ring_buffer_num_entries ) &&
12573- ( out_last_scanline > split_info->ring_buffer_last_scanline ) )
12574- handle_scanline_for_scatter( stbir_info, split_info );
12575-
12576- // Now the horizontal buffer is ready to write to all ring buffer rows, so do it.
12577- stbir__resample_vertical_scatter(stbir_info, split_info, out_first_scanline, out_last_scanline, vc, (float*)scanline_scatter_buffer, (float*)scanline_scatter_buffer_end );
12578-
12579- // update the end of the buffer
12580- if ( out_last_scanline > split_info->ring_buffer_last_scanline )
12581- split_info->ring_buffer_last_scanline = out_last_scanline;
12582- }
12583- ++vertical_contributors;
12584- vertical_coefficients += stbir_info->vertical.coefficient_width;
12585- }
12586-
12587- // now evict the scanlines that are left over in the ring buffer
12588- while ( split_info->ring_buffer_first_scanline < end_output_y )
12589- handle_scanline_for_scatter(stbir_info, split_info);
12590-
12591- // update the end_input_y if we do multiple resizes with the same data
12592- ++last_input_y;
12593- for( y = 0 ; y < split_count; y++ )
12594- if ( split_info[y].end_input_y > last_input_y )
12595- split_info[y].end_input_y = last_input_y;
12596-}
12597-
12598-
12599-static stbir__kernel_callback * stbir__builtin_kernels[] = { 0, stbir__filter_trapezoid, stbir__filter_triangle, stbir__filter_cubic, stbir__filter_catmullrom, stbir__filter_mitchell, stbir__filter_point };
12600-static stbir__support_callback * stbir__builtin_supports[] = { 0, stbir__support_trapezoid, stbir__support_one, stbir__support_two, stbir__support_two, stbir__support_two, stbir__support_zeropoint5 };
12601-
12602-static void stbir__set_sampler(stbir__sampler * samp, stbir_filter filter, stbir__kernel_callback * kernel, stbir__support_callback * support, stbir_edge edge, stbir__scale_info * scale_info, int always_gather, void * user_data )
12603-{
12604- // set filter
12605- if (filter == 0)
12606- {
12607- filter = STBIR_DEFAULT_FILTER_DOWNSAMPLE; // default to downsample
12608- if (scale_info->scale >= ( 1.0f - stbir__small_float ) )
12609- {
12610- if ( (scale_info->scale <= ( 1.0f + stbir__small_float ) ) && ( STBIR_CEILF(scale_info->pixel_shift) == scale_info->pixel_shift ) )
12611- filter = STBIR_FILTER_POINT_SAMPLE;
12612- else
12613- filter = STBIR_DEFAULT_FILTER_UPSAMPLE;
12614- }
12615- }
12616- samp->filter_enum = filter;
12617-
12618- STBIR_ASSERT(samp->filter_enum != 0);
12619- STBIR_ASSERT((unsigned)samp->filter_enum < STBIR_FILTER_OTHER);
12620- samp->filter_kernel = stbir__builtin_kernels[ filter ];
12621- samp->filter_support = stbir__builtin_supports[ filter ];
12622-
12623- if ( kernel && support )
12624- {
12625- samp->filter_kernel = kernel;
12626- samp->filter_support = support;
12627- samp->filter_enum = STBIR_FILTER_OTHER;
12628- }
12629-
12630- samp->edge = edge;
12631- samp->filter_pixel_width = stbir__get_filter_pixel_width (samp->filter_support, scale_info->scale, user_data );
12632- // Gather is always better, but in extreme downsamples, you have to most or all of the data in memory
12633- // For horizontal, we always have all the pixels, so we always use gather here (always_gather==1).
12634- // For vertical, we use gather if scaling up (which means we will have samp->filter_pixel_width
12635- // scanlines in memory at once).
12636- samp->is_gather = 0;
12637- if ( scale_info->scale >= ( 1.0f - stbir__small_float ) )
12638- samp->is_gather = 1;
12639- else if ( ( always_gather ) || ( samp->filter_pixel_width <= STBIR_FORCE_GATHER_FILTER_SCANLINES_AMOUNT ) )
12640- samp->is_gather = 2;
12641-
12642- // pre calculate stuff based on the above
12643- samp->coefficient_width = stbir__get_coefficient_width(samp, samp->is_gather, user_data);
12644-
12645- // filter_pixel_width is the conservative size in pixels of input that affect an output pixel.
12646- // In rare cases (only with 2 pix to 1 pix with the default filters), it's possible that the
12647- // filter will extend before or after the scanline beyond just one extra entire copy of the
12648- // scanline (we would hit the edge twice). We don't let you do that, so we clamp the total
12649- // width to 3x the total of input pixel (once for the scanline, once for the left side
12650- // overhang, and once for the right side). We only do this for edge mode, since the other
12651- // modes can just re-edge clamp back in again.
12652- if ( edge == STBIR_EDGE_WRAP )
12653- if ( samp->filter_pixel_width > ( scale_info->input_full_size * 3 ) )
12654- samp->filter_pixel_width = scale_info->input_full_size * 3;
12655-
12656- // This is how much to expand buffers to account for filters seeking outside
12657- // the image boundaries.
12658- samp->filter_pixel_margin = samp->filter_pixel_width / 2;
12659-
12660- // filter_pixel_margin is the amount that this filter can overhang on just one side of either
12661- // end of the scanline (left or the right). Since we only allow you to overhang 1 scanline's
12662- // worth of pixels, we clamp this one side of overhang to the input scanline size. Again,
12663- // this clamping only happens in rare cases with the default filters (2 pix to 1 pix).
12664- if ( edge == STBIR_EDGE_WRAP )
12665- if ( samp->filter_pixel_margin > scale_info->input_full_size )
12666- samp->filter_pixel_margin = scale_info->input_full_size;
12667-
12668- samp->num_contributors = stbir__get_contributors(samp, samp->is_gather);
12669-
12670- samp->contributors_size = samp->num_contributors * sizeof(stbir__contributors);
12671- samp->coefficients_size = samp->num_contributors * samp->coefficient_width * sizeof(float) + sizeof(float)*STBIR_INPUT_CALLBACK_PADDING; // extra sizeof(float) is padding
12672-
12673- samp->gather_prescatter_contributors = 0;
12674- samp->gather_prescatter_coefficients = 0;
12675- if ( samp->is_gather == 0 )
12676- {
12677- samp->gather_prescatter_coefficient_width = samp->filter_pixel_width;
12678- samp->gather_prescatter_num_contributors = stbir__get_contributors(samp, 2);
12679- samp->gather_prescatter_contributors_size = samp->gather_prescatter_num_contributors * sizeof(stbir__contributors);
12680- samp->gather_prescatter_coefficients_size = samp->gather_prescatter_num_contributors * samp->gather_prescatter_coefficient_width * sizeof(float);
12681- }
12682-}
12683-
12684-static void stbir__get_conservative_extents( stbir__sampler * samp, stbir__contributors * range, void * user_data )
12685-{
12686- float scale = samp->scale_info.scale;
12687- float out_shift = samp->scale_info.pixel_shift;
12688- stbir__support_callback * support = samp->filter_support;
12689- int input_full_size = samp->scale_info.input_full_size;
12690- stbir_edge edge = samp->edge;
12691- float inv_scale = samp->scale_info.inv_scale;
12692-
12693- STBIR_ASSERT( samp->is_gather != 0 );
12694-
12695- if ( samp->is_gather == 1 )
12696- {
12697- int in_first_pixel, in_last_pixel;
12698- float out_filter_radius = support(inv_scale, user_data) * scale;
12699-
12700- stbir__calculate_in_pixel_range( &in_first_pixel, &in_last_pixel, 0.5, out_filter_radius, inv_scale, out_shift, input_full_size, edge );
12701- range->n0 = in_first_pixel;
12702- stbir__calculate_in_pixel_range( &in_first_pixel, &in_last_pixel, ( (float)(samp->scale_info.output_sub_size-1) ) + 0.5f, out_filter_radius, inv_scale, out_shift, input_full_size, edge );
12703- range->n1 = in_last_pixel;
12704- }
12705- else if ( samp->is_gather == 2 ) // downsample gather, refine
12706- {
12707- float in_pixels_radius = support(scale, user_data) * inv_scale;
12708- int filter_pixel_margin = samp->filter_pixel_margin;
12709- int output_sub_size = samp->scale_info.output_sub_size;
12710- int input_end;
12711- int n;
12712- int in_first_pixel, in_last_pixel;
12713-
12714- // get a conservative area of the input range
12715- stbir__calculate_in_pixel_range( &in_first_pixel, &in_last_pixel, 0, 0, inv_scale, out_shift, input_full_size, edge );
12716- range->n0 = in_first_pixel;
12717- stbir__calculate_in_pixel_range( &in_first_pixel, &in_last_pixel, (float)output_sub_size, 0, inv_scale, out_shift, input_full_size, edge );
12718- range->n1 = in_last_pixel;
12719-
12720- // now go through the margin to the start of area to find bottom
12721- n = range->n0 + 1;
12722- input_end = -filter_pixel_margin;
12723- while( n >= input_end )
12724- {
12725- int out_first_pixel, out_last_pixel;
12726- stbir__calculate_out_pixel_range( &out_first_pixel, &out_last_pixel, ((float)n)+0.5f, in_pixels_radius, scale, out_shift, output_sub_size );
12727- if ( out_first_pixel > out_last_pixel )
12728- break;
12729-
12730- if ( ( out_first_pixel < output_sub_size ) || ( out_last_pixel >= 0 ) )
12731- range->n0 = n;
12732- --n;
12733- }
12734-
12735- // now go through the end of the area through the margin to find top
12736- n = range->n1 - 1;
12737- input_end = n + 1 + filter_pixel_margin;
12738- while( n <= input_end )
12739- {
12740- int out_first_pixel, out_last_pixel;
12741- stbir__calculate_out_pixel_range( &out_first_pixel, &out_last_pixel, ((float)n)+0.5f, in_pixels_radius, scale, out_shift, output_sub_size );
12742- if ( out_first_pixel > out_last_pixel )
12743- break;
12744- if ( ( out_first_pixel < output_sub_size ) || ( out_last_pixel >= 0 ) )
12745- range->n1 = n;
12746- ++n;
12747- }
12748- }
12749-
12750- if ( samp->edge == STBIR_EDGE_WRAP )
12751- {
12752- // if we are wrapping, and we are very close to the image size (so the edges might merge), just use the scanline up to the edge
12753- if ( ( range->n0 > 0 ) && ( range->n1 >= input_full_size ) )
12754- {
12755- int marg = range->n1 - input_full_size + 1;
12756- if ( ( marg + STBIR__MERGE_RUNS_PIXEL_THRESHOLD ) >= range->n0 )
12757- range->n0 = 0;
12758- }
12759- if ( ( range->n0 < 0 ) && ( range->n1 < (input_full_size-1) ) )
12760- {
12761- int marg = -range->n0;
12762- if ( ( input_full_size - marg - STBIR__MERGE_RUNS_PIXEL_THRESHOLD - 1 ) <= range->n1 )
12763- range->n1 = input_full_size - 1;
12764- }
12765- }
12766- else
12767- {
12768- // for non-edge-wrap modes, we never read over the edge, so clamp
12769- if ( range->n0 < 0 )
12770- range->n0 = 0;
12771- if ( range->n1 >= input_full_size )
12772- range->n1 = input_full_size - 1;
12773- }
12774-}
12775-
12776-static void stbir__get_split_info( stbir__per_split_info* split_info, int splits, int output_height, int vertical_pixel_margin, int input_full_height, int is_gather, stbir__contributors * contribs )
12777-{
12778- int i, cur;
12779- int left = output_height;
12780-
12781- cur = 0;
12782- for( i = 0 ; i < splits ; i++ )
12783- {
12784- int each;
12785-
12786- split_info[i].start_output_y = cur;
12787- each = left / ( splits - i );
12788- split_info[i].end_output_y = cur + each;
12789-
12790- // ok, when we are gathering, we need to make sure we are starting on a y offset that doesn't have
12791- // a "special" set of coefficients. Basically, with exactly the right filter at exactly the right
12792- // resize at exactly the right phase, some of the coefficents can be zero. When they are zero, we
12793- // don't process them at all. But this leads to a tricky thing with the thread splits, where we
12794- // might have a set of two coeffs like this for example: (4,4) and (3,6). The 4,4 means there was
12795- // just one single coeff because things worked out perfectly (normally, they all have 4 coeffs
12796- // like the range 3,6. The problem is that if we start right on the (4,4) on a brand new thread,
12797- // then when we get to (3,6), we don't have the "3" sample in memory (because we didn't load
12798- // it on the initial (4,4) range because it didn't have a 3 (we only add new samples that are
12799- // larger than our existing samples - it's just how the eviction works). So, our solution here
12800- // is pretty simple, if we start right on a range that has samples that start earlier, then we
12801- // simply bump up our previous thread split range to include it, and then start this threads
12802- // range with the smaller sample. It just moves one scanline from one thread split to another,
12803- // so that we end with the unusual one, instead of start with it. To do this, we check 2-4
12804- // sample at each thread split start and then occassionally move them.
12805-
12806- if ( ( is_gather ) && ( i ) )
12807- {
12808- stbir__contributors * small_contribs;
12809- int j, smallest, stop, start_n0;
12810- stbir__contributors * split_contribs = contribs + cur;
12811-
12812- // scan for a max of 3x the filter width or until the next thread split
12813- stop = vertical_pixel_margin * 3;
12814- if ( each < stop )
12815- stop = each;
12816-
12817- // loops a few times before early out
12818- smallest = 0;
12819- small_contribs = split_contribs;
12820- start_n0 = small_contribs->n0;
12821- for( j = 1 ; j <= stop ; j++ )
12822- {
12823- ++split_contribs;
12824- if ( split_contribs->n0 > start_n0 )
12825- break;
12826- if ( split_contribs->n0 < small_contribs->n0 )
12827- {
12828- small_contribs = split_contribs;
12829- smallest = j;
12830- }
12831- }
12832-
12833- split_info[i-1].end_output_y += smallest;
12834- split_info[i].start_output_y += smallest;
12835- }
12836-
12837- cur += each;
12838- left -= each;
12839-
12840- // scatter range (updated to minimum as you run it)
12841- split_info[i].start_input_y = -vertical_pixel_margin;
12842- split_info[i].end_input_y = input_full_height + vertical_pixel_margin;
12843- }
12844-}
12845-
12846-static void stbir__free_internal_mem( stbir__info *info )
12847-{
12848- #define STBIR__FREE_AND_CLEAR( ptr ) { if ( ptr ) { void * p = (ptr); (ptr) = 0; STBIR_FREE( p, info->user_data); } }
12849-
12850- if ( info )
12851- {
12852- #ifndef STBIR__SEPARATE_ALLOCATIONS
12853- STBIR__FREE_AND_CLEAR( info->alloced_mem );
12854- #else
12855- int i,j;
12856-
12857- if ( ( info->vertical.gather_prescatter_contributors ) && ( (void*)info->vertical.gather_prescatter_contributors != (void*)info->split_info[0].decode_buffer ) )
12858- {
12859- STBIR__FREE_AND_CLEAR( info->vertical.gather_prescatter_coefficients );
12860- STBIR__FREE_AND_CLEAR( info->vertical.gather_prescatter_contributors );
12861- }
12862- for( i = 0 ; i < info->splits ; i++ )
12863- {
12864- for( j = 0 ; j < info->alloc_ring_buffer_num_entries ; j++ )
12865- {
12866- #ifdef STBIR_SIMD8
12867- if ( info->effective_channels == 3 )
12868- --info->split_info[i].ring_buffers[j]; // avx in 3 channel mode needs one float at the start of the buffer
12869- #endif
12870- STBIR__FREE_AND_CLEAR( info->split_info[i].ring_buffers[j] );
12871- }
12872-
12873- #ifdef STBIR_SIMD8
12874- if ( info->effective_channels == 3 )
12875- --info->split_info[i].decode_buffer; // avx in 3 channel mode needs one float at the start of the buffer
12876- #endif
12877- STBIR__FREE_AND_CLEAR( info->split_info[i].decode_buffer );
12878- STBIR__FREE_AND_CLEAR( info->split_info[i].ring_buffers );
12879- STBIR__FREE_AND_CLEAR( info->split_info[i].vertical_buffer );
12880- }
12881- STBIR__FREE_AND_CLEAR( info->split_info );
12882- if ( info->vertical.coefficients != info->horizontal.coefficients )
12883- {
12884- STBIR__FREE_AND_CLEAR( info->vertical.coefficients );
12885- STBIR__FREE_AND_CLEAR( info->vertical.contributors );
12886- }
12887- STBIR__FREE_AND_CLEAR( info->horizontal.coefficients );
12888- STBIR__FREE_AND_CLEAR( info->horizontal.contributors );
12889- STBIR__FREE_AND_CLEAR( info->alloced_mem );
12890- STBIR_FREE( info, info->user_data );
12891- #endif
12892- }
12893-
12894- #undef STBIR__FREE_AND_CLEAR
12895-}
12896-
12897-static int stbir__get_max_split( int splits, int height )
12898-{
12899- int i;
12900- int max = 0;
12901-
12902- for( i = 0 ; i < splits ; i++ )
12903- {
12904- int each = height / ( splits - i );
12905- if ( each > max )
12906- max = each;
12907- height -= each;
12908- }
12909- return max;
12910-}
12911-
12912-static stbir__horizontal_gather_channels_func ** stbir__horizontal_gather_n_coeffs_funcs[8] =
12913-{
12914- 0, stbir__horizontal_gather_1_channels_with_n_coeffs_funcs, stbir__horizontal_gather_2_channels_with_n_coeffs_funcs, stbir__horizontal_gather_3_channels_with_n_coeffs_funcs, stbir__horizontal_gather_4_channels_with_n_coeffs_funcs, 0,0, stbir__horizontal_gather_7_channels_with_n_coeffs_funcs
12915-};
12916-
12917-static stbir__horizontal_gather_channels_func ** stbir__horizontal_gather_channels_funcs[8] =
12918-{
12919- 0, stbir__horizontal_gather_1_channels_funcs, stbir__horizontal_gather_2_channels_funcs, stbir__horizontal_gather_3_channels_funcs, stbir__horizontal_gather_4_channels_funcs, 0,0, stbir__horizontal_gather_7_channels_funcs
12920-};
12921+#define STBIR__FLOAT_BUFFER_IS_EMPTY(ptr) \
12922+ ((ptr)[0] == STBIR__FLOAT_EMPTY_MARKER)
12923+
12924+static void
12925+stbir__encode_first_scanline_from_scatter(stbir__info const *stbir_info,
12926+ stbir__per_split_info *split_info)
12927+{
12928+ // evict a scanline out into the output buffer
12929+ float *ring_buffer_entry = stbir__get_ring_buffer_entry(
12930+ stbir_info, split_info, split_info->ring_buffer_begin_index);
12931+
12932+ // dump the scanline out
12933+ stbir__encode_scanline(stbir_info,
12934+ ((char *)stbir_info->output_data) +
12935+ ((size_t)split_info->ring_buffer_first_scanline *
12936+ (size_t)stbir_info->output_stride_bytes),
12937+ ring_buffer_entry,
12938+ split_info->ring_buffer_first_scanline
12939+ STBIR_ONLY_PROFILE_SET_SPLIT_INFO);
12940+
12941+ // mark it as empty
12942+ ring_buffer_entry[0] = STBIR__FLOAT_EMPTY_MARKER;
12943+
12944+ // advance the first scanline
12945+ split_info->ring_buffer_first_scanline++;
12946+ if (++split_info->ring_buffer_begin_index ==
12947+ stbir_info->ring_buffer_num_entries) {
12948+ split_info->ring_buffer_begin_index = 0;
12949+ }
12950+}
12951+
12952+static void
12953+stbir__horizontal_resample_and_encode_first_scanline_from_scatter(
12954+ stbir__info const *stbir_info, stbir__per_split_info *split_info)
12955+{
12956+ // evict a scanline out into the output buffer
12957+
12958+ float *ring_buffer_entry = stbir__get_ring_buffer_entry(
12959+ stbir_info, split_info, split_info->ring_buffer_begin_index);
12960+
12961+ // Now resample it into the buffer.
12962+ stbir__resample_horizontal_gather(
12963+ stbir_info, split_info->vertical_buffer,
12964+ ring_buffer_entry STBIR_ONLY_PROFILE_SET_SPLIT_INFO);
12965+
12966+ // dump the scanline out
12967+ stbir__encode_scanline(stbir_info,
12968+ ((char *)stbir_info->output_data) +
12969+ ((size_t)split_info->ring_buffer_first_scanline *
12970+ (size_t)stbir_info->output_stride_bytes),
12971+ split_info->vertical_buffer,
12972+ split_info->ring_buffer_first_scanline
12973+ STBIR_ONLY_PROFILE_SET_SPLIT_INFO);
12974+
12975+ // mark it as empty
12976+ ring_buffer_entry[0] = STBIR__FLOAT_EMPTY_MARKER;
12977+
12978+ // advance the first scanline
12979+ split_info->ring_buffer_first_scanline++;
12980+ if (++split_info->ring_buffer_begin_index ==
12981+ stbir_info->ring_buffer_num_entries) {
12982+ split_info->ring_buffer_begin_index = 0;
12983+ }
12984+}
12985+
12986+static void
12987+stbir__resample_vertical_scatter(stbir__info const *stbir_info,
12988+ stbir__per_split_info *split_info, int n0,
12989+ int n1, float const *vertical_coefficients,
12990+ float const *vertical_buffer,
12991+ float const *vertical_buffer_end)
12992+{
12993+ STBIR_ASSERT(!stbir_info->vertical.is_gather);
12994+
12995+ STBIR_PROFILE_START(vertical);
12996+ {
12997+ int k = 0, total = n1 - n0 + 1;
12998+ STBIR_ASSERT(total > 0);
12999+ do {
13000+ float *outputs[8];
13001+ int i, n = total;
13002+ if (n > 8) {
13003+ n = 8;
13004+ }
13005+ for (i = 0; i < n; i++) {
13006+ outputs[i] = stbir__get_ring_buffer_scanline(
13007+ stbir_info, split_info, k + i + n0);
13008+ if ((i) &&
13009+ (STBIR__FLOAT_BUFFER_IS_EMPTY(outputs[i]) !=
13010+ STBIR__FLOAT_BUFFER_IS_EMPTY(
13011+ outputs[0]))) // make sure runs are of the same type
13012+ {
13013+ n = i;
13014+ break;
13015+ }
13016+ }
13017+ // call the scatter to N scanlines at a time function (up to 8
13018+ // scanlines of scattering at once)
13019+ ((STBIR__FLOAT_BUFFER_IS_EMPTY(outputs[0]))
13020+ ? stbir__vertical_scatter_sets
13021+ : stbir__vertical_scatter_blends)[n - 1](
13022+ outputs, vertical_coefficients + k, vertical_buffer,
13023+ vertical_buffer_end);
13024+ k += n;
13025+ total -= n;
13026+ } while (total);
13027+ }
13028+
13029+ STBIR_PROFILE_END(vertical);
13030+}
13031+
13032+typedef void
13033+stbir__handle_scanline_for_scatter_func(stbir__info const *stbir_info,
13034+ stbir__per_split_info *split_info);
13035+
13036+static void
13037+stbir__vertical_scatter_loop(stbir__info const *stbir_info,
13038+ stbir__per_split_info *split_info, int split_count)
13039+{
13040+ int y, start_output_y, end_output_y, start_input_y, end_input_y;
13041+ stbir__contributors *vertical_contributors =
13042+ stbir_info->vertical.contributors;
13043+ float const *vertical_coefficients = stbir_info->vertical.coefficients;
13044+ stbir__handle_scanline_for_scatter_func *handle_scanline_for_scatter;
13045+ void *scanline_scatter_buffer;
13046+ void *scanline_scatter_buffer_end;
13047+ int on_first_input_y, last_input_y;
13048+ int width = (stbir_info->vertical_first)
13049+ ? (stbir_info->scanline_extents.conservative.n1 -
13050+ stbir_info->scanline_extents.conservative.n0 + 1)
13051+ : stbir_info->horizontal.scale_info.output_sub_size;
13052+ int width_times_channels = stbir_info->effective_channels * width;
13053+
13054+ STBIR_ASSERT(!stbir_info->vertical.is_gather);
13055+
13056+ start_output_y = split_info->start_output_y;
13057+ end_output_y = split_info[split_count - 1]
13058+ .end_output_y; // may do multiple split counts
13059+
13060+ start_input_y = split_info->start_input_y;
13061+ end_input_y = split_info[split_count - 1].end_input_y;
13062+
13063+ // adjust for starting offset start_input_y
13064+ y = start_input_y + stbir_info->vertical.filter_pixel_margin;
13065+ vertical_contributors += y;
13066+ vertical_coefficients += stbir_info->vertical.coefficient_width * y;
13067+
13068+ if (stbir_info->vertical_first) {
13069+ handle_scanline_for_scatter =
13070+ stbir__horizontal_resample_and_encode_first_scanline_from_scatter;
13071+ scanline_scatter_buffer = split_info->decode_buffer;
13072+ scanline_scatter_buffer_end =
13073+ ((char *)scanline_scatter_buffer) +
13074+ sizeof(float) * stbir_info->effective_channels *
13075+ (stbir_info->scanline_extents.conservative.n1 -
13076+ stbir_info->scanline_extents.conservative.n0 + 1);
13077+ } else {
13078+ handle_scanline_for_scatter = stbir__encode_first_scanline_from_scatter;
13079+ scanline_scatter_buffer = split_info->vertical_buffer;
13080+ scanline_scatter_buffer_end =
13081+ ((char *)scanline_scatter_buffer) +
13082+ sizeof(float) * stbir_info->effective_channels *
13083+ stbir_info->horizontal.scale_info.output_sub_size;
13084+ }
13085+
13086+ // initialize the ring buffer for scattering
13087+ split_info->ring_buffer_first_scanline = start_output_y;
13088+ split_info->ring_buffer_last_scanline = -1;
13089+ split_info->ring_buffer_begin_index = -1;
13090+
13091+ // mark all the buffers as empty to start
13092+ for (y = 0; y < stbir_info->ring_buffer_num_entries; y++) {
13093+ float *decode_buffer =
13094+ stbir__get_ring_buffer_entry(stbir_info, split_info, y);
13095+ decode_buffer[width_times_channels] =
13096+ 0.0f; // clear two over for horizontals with a remnant of 3
13097+ decode_buffer[width_times_channels + 1] = 0.0f;
13098+ decode_buffer[0] = STBIR__FLOAT_EMPTY_MARKER; // only used on scatter
13099+ }
13100+
13101+ // do the loop in input space
13102+ on_first_input_y = 1;
13103+ last_input_y = start_input_y;
13104+ for (y = start_input_y; y < end_input_y; y++) {
13105+ int out_first_scanline, out_last_scanline;
13106+
13107+ out_first_scanline = vertical_contributors->n0;
13108+ out_last_scanline = vertical_contributors->n1;
13109+
13110+ STBIR_ASSERT(out_last_scanline - out_first_scanline + 1 <=
13111+ stbir_info->ring_buffer_num_entries);
13112+
13113+ if ((out_last_scanline >= out_first_scanline) &&
13114+ (((out_first_scanline >= start_output_y) &&
13115+ (out_first_scanline < end_output_y)) ||
13116+ ((out_last_scanline >= start_output_y) &&
13117+ (out_last_scanline < end_output_y)))) {
13118+ float const *vc = vertical_coefficients;
13119+
13120+ // keep track of the range actually seen for the next resize
13121+ last_input_y = y;
13122+ if ((on_first_input_y) && (y > start_input_y)) {
13123+ split_info->start_input_y = y;
13124+ }
13125+ on_first_input_y = 0;
13126+
13127+ // clip the region
13128+ if (out_first_scanline < start_output_y) {
13129+ vc += start_output_y - out_first_scanline;
13130+ out_first_scanline = start_output_y;
13131+ }
13132+
13133+ if (out_last_scanline >= end_output_y) {
13134+ out_last_scanline = end_output_y - 1;
13135+ }
13136+
13137+ // if very first scanline, init the index
13138+ if (split_info->ring_buffer_begin_index < 0) {
13139+ split_info->ring_buffer_begin_index =
13140+ out_first_scanline - start_output_y;
13141+ }
13142+
13143+ STBIR_ASSERT(split_info->ring_buffer_begin_index <=
13144+ out_first_scanline);
13145+
13146+ // Decode the nth scanline from the source image into the decode
13147+ // buffer.
13148+ stbir__decode_scanline(
13149+ stbir_info, y,
13150+ split_info->decode_buffer STBIR_ONLY_PROFILE_SET_SPLIT_INFO);
13151+
13152+ // When horizontal first, we resample horizontally into the vertical
13153+ // buffer before we scatter it out
13154+ if (!stbir_info->vertical_first) {
13155+ stbir__resample_horizontal_gather(
13156+ stbir_info, split_info->vertical_buffer,
13157+ split_info
13158+ ->decode_buffer STBIR_ONLY_PROFILE_SET_SPLIT_INFO);
13159+ }
13160+
13161+ // Now it's sitting in the buffer ready to be distributed into the
13162+ // ring buffers.
13163+
13164+ // evict from the ringbuffer, if we need are full
13165+ if (((split_info->ring_buffer_last_scanline -
13166+ split_info->ring_buffer_first_scanline + 1) ==
13167+ stbir_info->ring_buffer_num_entries) &&
13168+ (out_last_scanline > split_info->ring_buffer_last_scanline)) {
13169+ handle_scanline_for_scatter(stbir_info, split_info);
13170+ }
13171+
13172+ // Now the horizontal buffer is ready to write to all ring buffer
13173+ // rows, so do it.
13174+ stbir__resample_vertical_scatter(
13175+ stbir_info, split_info, out_first_scanline, out_last_scanline,
13176+ vc, (float *)scanline_scatter_buffer,
13177+ (float *)scanline_scatter_buffer_end);
13178+
13179+ // update the end of the buffer
13180+ if (out_last_scanline > split_info->ring_buffer_last_scanline) {
13181+ split_info->ring_buffer_last_scanline = out_last_scanline;
13182+ }
13183+ }
13184+ ++vertical_contributors;
13185+ vertical_coefficients += stbir_info->vertical.coefficient_width;
13186+ }
13187+
13188+ // now evict the scanlines that are left over in the ring buffer
13189+ while (split_info->ring_buffer_first_scanline < end_output_y) {
13190+ handle_scanline_for_scatter(stbir_info, split_info);
13191+ }
13192+
13193+ // update the end_input_y if we do multiple resizes with the same data
13194+ ++last_input_y;
13195+ for (y = 0; y < split_count; y++) {
13196+ if (split_info[y].end_input_y > last_input_y) {
13197+ split_info[y].end_input_y = last_input_y;
13198+ }
13199+ }
13200+}
13201+
13202+static stbir__kernel_callback *stbir__builtin_kernels[] = {
13203+ 0,
13204+ stbir__filter_trapezoid,
13205+ stbir__filter_triangle,
13206+ stbir__filter_cubic,
13207+ stbir__filter_catmullrom,
13208+ stbir__filter_mitchell,
13209+ stbir__filter_point};
13210+static stbir__support_callback *stbir__builtin_supports[] = {
13211+ 0,
13212+ stbir__support_trapezoid,
13213+ stbir__support_one,
13214+ stbir__support_two,
13215+ stbir__support_two,
13216+ stbir__support_two,
13217+ stbir__support_zeropoint5};
13218+
13219+static void
13220+stbir__set_sampler(stbir__sampler *samp, stbir_filter filter,
13221+ stbir__kernel_callback *kernel,
13222+ stbir__support_callback *support, stbir_edge edge,
13223+ stbir__scale_info *scale_info, int always_gather,
13224+ void *user_data)
13225+{
13226+ // set filter
13227+ if (filter == 0) {
13228+ filter = STBIR_DEFAULT_FILTER_DOWNSAMPLE; // default to downsample
13229+ if (scale_info->scale >= (1.0f - stbir__small_float)) {
13230+ if ((scale_info->scale <= (1.0f + stbir__small_float)) &&
13231+ (STBIR_CEILF(scale_info->pixel_shift) ==
13232+ scale_info->pixel_shift)) {
13233+ filter = STBIR_FILTER_POINT_SAMPLE;
13234+ } else {
13235+ filter = STBIR_DEFAULT_FILTER_UPSAMPLE;
13236+ }
13237+ }
13238+ }
13239+ samp->filter_enum = filter;
13240+
13241+ STBIR_ASSERT(samp->filter_enum != 0);
13242+ STBIR_ASSERT((unsigned)samp->filter_enum < STBIR_FILTER_OTHER);
13243+ samp->filter_kernel = stbir__builtin_kernels[filter];
13244+ samp->filter_support = stbir__builtin_supports[filter];
13245+
13246+ if (kernel && support) {
13247+ samp->filter_kernel = kernel;
13248+ samp->filter_support = support;
13249+ samp->filter_enum = STBIR_FILTER_OTHER;
13250+ }
13251+
13252+ samp->edge = edge;
13253+ samp->filter_pixel_width = stbir__get_filter_pixel_width(
13254+ samp->filter_support, scale_info->scale, user_data);
13255+ // Gather is always better, but in extreme downsamples, you have to most or
13256+ // all of the data in memory
13257+ // For horizontal, we always have all the pixels, so we always use gather
13258+ // here (always_gather==1). For vertical, we use gather if scaling up
13259+ // (which means we will have samp->filter_pixel_width scanlines in memory
13260+ // at once).
13261+ samp->is_gather = 0;
13262+ if (scale_info->scale >= (1.0f - stbir__small_float)) {
13263+ samp->is_gather = 1;
13264+ } else if ((always_gather) ||
13265+ (samp->filter_pixel_width <=
13266+ STBIR_FORCE_GATHER_FILTER_SCANLINES_AMOUNT)) {
13267+ samp->is_gather = 2;
13268+ }
13269+
13270+ // pre calculate stuff based on the above
13271+ samp->coefficient_width =
13272+ stbir__get_coefficient_width(samp, samp->is_gather, user_data);
13273+
13274+ // filter_pixel_width is the conservative size in pixels of input that
13275+ // affect an output pixel.
13276+ // In rare cases (only with 2 pix to 1 pix with the default filters), it's
13277+ // possible that the filter will extend before or after the scanline
13278+ // beyond just one extra entire copy of the scanline (we would hit the
13279+ // edge twice). We don't let you do that, so we clamp the total width to
13280+ // 3x the total of input pixel (once for the scanline, once for the left
13281+ // side overhang, and once for the right side). We only do this for edge
13282+ // mode, since the other modes can just re-edge clamp back in again.
13283+ if (edge == STBIR_EDGE_WRAP) {
13284+ if (samp->filter_pixel_width > (scale_info->input_full_size * 3)) {
13285+ samp->filter_pixel_width = scale_info->input_full_size * 3;
13286+ }
13287+ }
13288+
13289+ // This is how much to expand buffers to account for filters seeking outside
13290+ // the image boundaries.
13291+ samp->filter_pixel_margin = samp->filter_pixel_width / 2;
13292+
13293+ // filter_pixel_margin is the amount that this filter can overhang on just
13294+ // one side of either
13295+ // end of the scanline (left or the right). Since we only allow you to
13296+ // overhang 1 scanline's worth of pixels, we clamp this one side of
13297+ // overhang to the input scanline size. Again, this clamping only happens
13298+ // in rare cases with the default filters (2 pix to 1 pix).
13299+ if (edge == STBIR_EDGE_WRAP) {
13300+ if (samp->filter_pixel_margin > scale_info->input_full_size) {
13301+ samp->filter_pixel_margin = scale_info->input_full_size;
13302+ }
13303+ }
13304+
13305+ samp->num_contributors = stbir__get_contributors(samp, samp->is_gather);
13306+
13307+ samp->contributors_size =
13308+ samp->num_contributors * sizeof(stbir__contributors);
13309+ samp->coefficients_size =
13310+ samp->num_contributors * samp->coefficient_width * sizeof(float) +
13311+ sizeof(float) *
13312+ STBIR_INPUT_CALLBACK_PADDING; // extra sizeof(float) is padding
13313+
13314+ samp->gather_prescatter_contributors = 0;
13315+ samp->gather_prescatter_coefficients = 0;
13316+ if (samp->is_gather == 0) {
13317+ samp->gather_prescatter_coefficient_width = samp->filter_pixel_width;
13318+ samp->gather_prescatter_num_contributors =
13319+ stbir__get_contributors(samp, 2);
13320+ samp->gather_prescatter_contributors_size =
13321+ samp->gather_prescatter_num_contributors *
13322+ sizeof(stbir__contributors);
13323+ samp->gather_prescatter_coefficients_size =
13324+ samp->gather_prescatter_num_contributors *
13325+ samp->gather_prescatter_coefficient_width * sizeof(float);
13326+ }
13327+}
13328+
13329+static void
13330+stbir__get_conservative_extents(stbir__sampler *samp,
13331+ stbir__contributors *range, void *user_data)
13332+{
13333+ float scale = samp->scale_info.scale;
13334+ float out_shift = samp->scale_info.pixel_shift;
13335+ stbir__support_callback *support = samp->filter_support;
13336+ int input_full_size = samp->scale_info.input_full_size;
13337+ stbir_edge edge = samp->edge;
13338+ float inv_scale = samp->scale_info.inv_scale;
13339+
13340+ STBIR_ASSERT(samp->is_gather != 0);
13341+
13342+ if (samp->is_gather == 1) {
13343+ int in_first_pixel, in_last_pixel;
13344+ float out_filter_radius = support(inv_scale, user_data) * scale;
13345+
13346+ stbir__calculate_in_pixel_range(&in_first_pixel, &in_last_pixel, 0.5,
13347+ out_filter_radius, inv_scale, out_shift,
13348+ input_full_size, edge);
13349+ range->n0 = in_first_pixel;
13350+ stbir__calculate_in_pixel_range(
13351+ &in_first_pixel, &in_last_pixel,
13352+ ((float)(samp->scale_info.output_sub_size - 1)) + 0.5f,
13353+ out_filter_radius, inv_scale, out_shift, input_full_size, edge);
13354+ range->n1 = in_last_pixel;
13355+ } else if (samp->is_gather == 2) // downsample gather, refine
13356+ {
13357+ float in_pixels_radius = support(scale, user_data) * inv_scale;
13358+ int filter_pixel_margin = samp->filter_pixel_margin;
13359+ int output_sub_size = samp->scale_info.output_sub_size;
13360+ int input_end;
13361+ int n;
13362+ int in_first_pixel, in_last_pixel;
13363+
13364+ // get a conservative area of the input range
13365+ stbir__calculate_in_pixel_range(&in_first_pixel, &in_last_pixel, 0, 0,
13366+ inv_scale, out_shift, input_full_size,
13367+ edge);
13368+ range->n0 = in_first_pixel;
13369+ stbir__calculate_in_pixel_range(&in_first_pixel, &in_last_pixel,
13370+ (float)output_sub_size, 0, inv_scale,
13371+ out_shift, input_full_size, edge);
13372+ range->n1 = in_last_pixel;
13373+
13374+ // now go through the margin to the start of area to find bottom
13375+ n = range->n0 + 1;
13376+ input_end = -filter_pixel_margin;
13377+ while (n >= input_end) {
13378+ int out_first_pixel, out_last_pixel;
13379+ stbir__calculate_out_pixel_range(
13380+ &out_first_pixel, &out_last_pixel, ((float)n) + 0.5f,
13381+ in_pixels_radius, scale, out_shift, output_sub_size);
13382+ if (out_first_pixel > out_last_pixel) {
13383+ break;
13384+ }
13385+
13386+ if ((out_first_pixel < output_sub_size) || (out_last_pixel >= 0)) {
13387+ range->n0 = n;
13388+ }
13389+ --n;
13390+ }
13391+
13392+ // now go through the end of the area through the margin to find top
13393+ n = range->n1 - 1;
13394+ input_end = n + 1 + filter_pixel_margin;
13395+ while (n <= input_end) {
13396+ int out_first_pixel, out_last_pixel;
13397+ stbir__calculate_out_pixel_range(
13398+ &out_first_pixel, &out_last_pixel, ((float)n) + 0.5f,
13399+ in_pixels_radius, scale, out_shift, output_sub_size);
13400+ if (out_first_pixel > out_last_pixel) {
13401+ break;
13402+ }
13403+ if ((out_first_pixel < output_sub_size) || (out_last_pixel >= 0)) {
13404+ range->n1 = n;
13405+ }
13406+ ++n;
13407+ }
13408+ }
13409+
13410+ if (samp->edge == STBIR_EDGE_WRAP) {
13411+ // if we are wrapping, and we are very close to the image size (so the
13412+ // edges might merge), just use the scanline up to the edge
13413+ if ((range->n0 > 0) && (range->n1 >= input_full_size)) {
13414+ int marg = range->n1 - input_full_size + 1;
13415+ if ((marg + STBIR__MERGE_RUNS_PIXEL_THRESHOLD) >= range->n0) {
13416+ range->n0 = 0;
13417+ }
13418+ }
13419+ if ((range->n0 < 0) && (range->n1 < (input_full_size - 1))) {
13420+ int marg = -range->n0;
13421+ if ((input_full_size - marg - STBIR__MERGE_RUNS_PIXEL_THRESHOLD -
13422+ 1) <= range->n1) {
13423+ range->n1 = input_full_size - 1;
13424+ }
13425+ }
13426+ } else {
13427+ // for non-edge-wrap modes, we never read over the edge, so clamp
13428+ if (range->n0 < 0) {
13429+ range->n0 = 0;
13430+ }
13431+ if (range->n1 >= input_full_size) {
13432+ range->n1 = input_full_size - 1;
13433+ }
13434+ }
13435+}
13436+
13437+static void
13438+stbir__get_split_info(stbir__per_split_info *split_info, int splits,
13439+ int output_height, int vertical_pixel_margin,
13440+ int input_full_height, int is_gather,
13441+ stbir__contributors *contribs)
13442+{
13443+ int i, cur;
13444+ int left = output_height;
13445+
13446+ cur = 0;
13447+ for (i = 0; i < splits; i++) {
13448+ int each;
13449+
13450+ split_info[i].start_output_y = cur;
13451+ each = left / (splits - i);
13452+ split_info[i].end_output_y = cur + each;
13453+
13454+ // ok, when we are gathering, we need to make sure we are starting on a
13455+ // y offset that doesn't have
13456+ // a "special" set of coefficients. Basically, with exactly the right
13457+ // filter at exactly the right resize at exactly the right phase, some
13458+ // of the coefficents can be zero. When they are zero, we don't
13459+ // process them at all. But this leads to a tricky thing with the
13460+ // thread splits, where we might have a set of two coeffs like this
13461+ // for example: (4,4) and (3,6). The 4,4 means there was just one
13462+ // single coeff because things worked out perfectly (normally, they
13463+ // all have 4 coeffs like the range 3,6. The problem is that if we
13464+ // start right on the (4,4) on a brand new thread, then when we get to
13465+ // (3,6), we don't have the "3" sample in memory (because we didn't
13466+ // load it on the initial (4,4) range because it didn't have a 3 (we
13467+ // only add new samples that are larger than our existing samples -
13468+ // it's just how the eviction works). So, our solution here is pretty
13469+ // simple, if we start right on a range that has samples that start
13470+ // earlier, then we simply bump up our previous thread split range to
13471+ // include it, and then start this threads range with the smaller
13472+ // sample. It just moves one scanline from one thread split to
13473+ // another, so that we end with the unusual one, instead of start with
13474+ // it. To do this, we check 2-4 sample at each thread split start and
13475+ // then occassionally move them.
13476+
13477+ if ((is_gather) && (i)) {
13478+ stbir__contributors *small_contribs;
13479+ int j, smallest, stop, start_n0;
13480+ stbir__contributors *split_contribs = contribs + cur;
13481+
13482+ // scan for a max of 3x the filter width or until the next thread
13483+ // split
13484+ stop = vertical_pixel_margin * 3;
13485+ if (each < stop) {
13486+ stop = each;
13487+ }
13488+
13489+ // loops a few times before early out
13490+ smallest = 0;
13491+ small_contribs = split_contribs;
13492+ start_n0 = small_contribs->n0;
13493+ for (j = 1; j <= stop; j++) {
13494+ ++split_contribs;
13495+ if (split_contribs->n0 > start_n0) {
13496+ break;
13497+ }
13498+ if (split_contribs->n0 < small_contribs->n0) {
13499+ small_contribs = split_contribs;
13500+ smallest = j;
13501+ }
13502+ }
13503+
13504+ split_info[i - 1].end_output_y += smallest;
13505+ split_info[i].start_output_y += smallest;
13506+ }
13507+
13508+ cur += each;
13509+ left -= each;
13510+
13511+ // scatter range (updated to minimum as you run it)
13512+ split_info[i].start_input_y = -vertical_pixel_margin;
13513+ split_info[i].end_input_y = input_full_height + vertical_pixel_margin;
13514+ }
13515+}
13516+
13517+static void
13518+stbir__free_internal_mem(stbir__info *info)
13519+{
13520+#define STBIR__FREE_AND_CLEAR(ptr) \
13521+ { \
13522+ if (ptr) { \
13523+ void *p = (ptr); \
13524+ (ptr) = 0; \
13525+ STBIR_FREE(p, info->user_data); \
13526+ } \
13527+ }
13528+
13529+ if (info) {
13530+#ifndef STBIR__SEPARATE_ALLOCATIONS
13531+ STBIR__FREE_AND_CLEAR(info->alloced_mem);
13532+#else
13533+ int i, j;
13534+
13535+ if ((info->vertical.gather_prescatter_contributors) &&
13536+ ((void *)info->vertical.gather_prescatter_contributors !=
13537+ (void *)info->split_info[0].decode_buffer)) {
13538+ STBIR__FREE_AND_CLEAR(
13539+ info->vertical.gather_prescatter_coefficients);
13540+ STBIR__FREE_AND_CLEAR(
13541+ info->vertical.gather_prescatter_contributors);
13542+ }
13543+ for (i = 0; i < info->splits; i++) {
13544+ for (j = 0; j < info->alloc_ring_buffer_num_entries; j++) {
13545+#ifdef STBIR_SIMD8
13546+ if (info->effective_channels == 3) {
13547+ --info->split_info[i]
13548+ .ring_buffers[j]; // avx in 3 channel mode needs one
13549+ // float at the start of the buffer
13550+ }
13551+#endif
13552+ STBIR__FREE_AND_CLEAR(info->split_info[i].ring_buffers[j]);
13553+ }
13554
13555-// there are six resize classifications: 0 == vertical scatter, 1 == vertical gather < 1x scale, 2 == vertical gather 1x-2x scale, 4 == vertical gather < 3x scale, 4 == vertical gather > 3x scale, 5 == <=4 pixel height, 6 == <=4 pixel wide column
13556+#ifdef STBIR_SIMD8
13557+ if (info->effective_channels == 3) {
13558+ --info->split_info[i]
13559+ .decode_buffer; // avx in 3 channel mode needs one float
13560+ // at the start of the buffer
13561+ }
13562+#endif
13563+ STBIR__FREE_AND_CLEAR(info->split_info[i].decode_buffer);
13564+ STBIR__FREE_AND_CLEAR(info->split_info[i].ring_buffers);
13565+ STBIR__FREE_AND_CLEAR(info->split_info[i].vertical_buffer);
13566+ }
13567+ STBIR__FREE_AND_CLEAR(info->split_info);
13568+ if (info->vertical.coefficients != info->horizontal.coefficients) {
13569+ STBIR__FREE_AND_CLEAR(info->vertical.coefficients);
13570+ STBIR__FREE_AND_CLEAR(info->vertical.contributors);
13571+ }
13572+ STBIR__FREE_AND_CLEAR(info->horizontal.coefficients);
13573+ STBIR__FREE_AND_CLEAR(info->horizontal.contributors);
13574+ STBIR__FREE_AND_CLEAR(info->alloced_mem);
13575+ STBIR_FREE(info, info->user_data);
13576+#endif
13577+ }
13578+
13579+#undef STBIR__FREE_AND_CLEAR
13580+}
13581+
13582+static int
13583+stbir__get_max_split(int splits, int height)
13584+{
13585+ int i;
13586+ int max = 0;
13587+
13588+ for (i = 0; i < splits; i++) {
13589+ int each = height / (splits - i);
13590+ if (each > max) {
13591+ max = each;
13592+ }
13593+ height -= each;
13594+ }
13595+ return max;
13596+}
13597+
13598+static stbir__horizontal_gather_channels_func *
13599+ *stbir__horizontal_gather_n_coeffs_funcs[8] = {
13600+ 0,
13601+ stbir__horizontal_gather_1_channels_with_n_coeffs_funcs,
13602+ stbir__horizontal_gather_2_channels_with_n_coeffs_funcs,
13603+ stbir__horizontal_gather_3_channels_with_n_coeffs_funcs,
13604+ stbir__horizontal_gather_4_channels_with_n_coeffs_funcs,
13605+ 0,
13606+ 0,
13607+ stbir__horizontal_gather_7_channels_with_n_coeffs_funcs};
13608+
13609+static stbir__horizontal_gather_channels_func *
13610+ *stbir__horizontal_gather_channels_funcs[8] = {
13611+ 0,
13612+ stbir__horizontal_gather_1_channels_funcs,
13613+ stbir__horizontal_gather_2_channels_funcs,
13614+ stbir__horizontal_gather_3_channels_funcs,
13615+ stbir__horizontal_gather_4_channels_funcs,
13616+ 0,
13617+ 0,
13618+ stbir__horizontal_gather_7_channels_funcs};
13619+
13620+// there are six resize classifications: 0 == vertical scatter, 1 == vertical
13621+// gather < 1x scale, 2 == vertical gather 1x-2x scale, 4 == vertical gather <
13622+// 3x scale, 4 == vertical gather > 3x scale, 5 == <=4 pixel height, 6 == <=4
13623+// pixel wide column
13624 #define STBIR_RESIZE_CLASSIFICATIONS 8
13625
13626-static float stbir__compute_weights[5][STBIR_RESIZE_CLASSIFICATIONS][4]= // 5 = 0=1chan, 1=2chan, 2=3chan, 3=4chan, 4=7chan
13627-{
13628- {
13629- { 1.00000f, 1.00000f, 0.31250f, 1.00000f },
13630- { 0.56250f, 0.59375f, 0.00000f, 0.96875f },
13631- { 1.00000f, 0.06250f, 0.00000f, 1.00000f },
13632- { 0.00000f, 0.09375f, 1.00000f, 1.00000f },
13633- { 1.00000f, 1.00000f, 1.00000f, 1.00000f },
13634- { 0.03125f, 0.12500f, 1.00000f, 1.00000f },
13635- { 0.06250f, 0.12500f, 0.00000f, 1.00000f },
13636- { 0.00000f, 1.00000f, 0.00000f, 0.03125f },
13637- }, {
13638- { 0.00000f, 0.84375f, 0.00000f, 0.03125f },
13639- { 0.09375f, 0.93750f, 0.00000f, 0.78125f },
13640- { 0.87500f, 0.21875f, 0.00000f, 0.96875f },
13641- { 0.09375f, 0.09375f, 1.00000f, 1.00000f },
13642- { 1.00000f, 1.00000f, 1.00000f, 1.00000f },
13643- { 0.03125f, 0.12500f, 1.00000f, 1.00000f },
13644- { 0.06250f, 0.12500f, 0.00000f, 1.00000f },
13645- { 0.00000f, 1.00000f, 0.00000f, 0.53125f },
13646- }, {
13647- { 0.00000f, 0.53125f, 0.00000f, 0.03125f },
13648- { 0.06250f, 0.96875f, 0.00000f, 0.53125f },
13649- { 0.87500f, 0.18750f, 0.00000f, 0.93750f },
13650- { 0.00000f, 0.09375f, 1.00000f, 1.00000f },
13651- { 1.00000f, 1.00000f, 1.00000f, 1.00000f },
13652- { 0.03125f, 0.12500f, 1.00000f, 1.00000f },
13653- { 0.06250f, 0.12500f, 0.00000f, 1.00000f },
13654- { 0.00000f, 1.00000f, 0.00000f, 0.56250f },
13655- }, {
13656- { 0.00000f, 0.50000f, 0.00000f, 0.71875f },
13657- { 0.06250f, 0.84375f, 0.00000f, 0.87500f },
13658- { 1.00000f, 0.50000f, 0.50000f, 0.96875f },
13659- { 1.00000f, 0.09375f, 0.31250f, 0.50000f },
13660- { 1.00000f, 1.00000f, 1.00000f, 1.00000f },
13661- { 1.00000f, 0.03125f, 0.03125f, 0.53125f },
13662- { 0.18750f, 0.12500f, 0.00000f, 1.00000f },
13663- { 0.00000f, 1.00000f, 0.03125f, 0.18750f },
13664- }, {
13665- { 0.00000f, 0.59375f, 0.00000f, 0.96875f },
13666- { 0.06250f, 0.81250f, 0.06250f, 0.59375f },
13667- { 0.75000f, 0.43750f, 0.12500f, 0.96875f },
13668- { 0.87500f, 0.06250f, 0.18750f, 0.43750f },
13669- { 1.00000f, 1.00000f, 1.00000f, 1.00000f },
13670- { 0.15625f, 0.12500f, 1.00000f, 1.00000f },
13671- { 0.06250f, 0.12500f, 0.00000f, 1.00000f },
13672- { 0.00000f, 1.00000f, 0.03125f, 0.34375f },
13673- }
13674-};
13675+static float stbir__compute_weights[5][STBIR_RESIZE_CLASSIFICATIONS]
13676+ [4] = // 5 = 0=1chan, 1=2chan, 2=3chan,
13677+ // 3=4chan, 4=7chan
13678+ {{
13679+ {1.00000f, 1.00000f, 0.31250f, 1.00000f},
13680+ {0.56250f, 0.59375f, 0.00000f, 0.96875f},
13681+ {1.00000f, 0.06250f, 0.00000f, 1.00000f},
13682+ {0.00000f, 0.09375f, 1.00000f, 1.00000f},
13683+ {1.00000f, 1.00000f, 1.00000f, 1.00000f},
13684+ {0.03125f, 0.12500f, 1.00000f, 1.00000f},
13685+ {0.06250f, 0.12500f, 0.00000f, 1.00000f},
13686+ {0.00000f, 1.00000f, 0.00000f, 0.03125f},
13687+ },
13688+ {
13689+ {0.00000f, 0.84375f, 0.00000f, 0.03125f},
13690+ {0.09375f, 0.93750f, 0.00000f, 0.78125f},
13691+ {0.87500f, 0.21875f, 0.00000f, 0.96875f},
13692+ {0.09375f, 0.09375f, 1.00000f, 1.00000f},
13693+ {1.00000f, 1.00000f, 1.00000f, 1.00000f},
13694+ {0.03125f, 0.12500f, 1.00000f, 1.00000f},
13695+ {0.06250f, 0.12500f, 0.00000f, 1.00000f},
13696+ {0.00000f, 1.00000f, 0.00000f, 0.53125f},
13697+ },
13698+ {
13699+ {0.00000f, 0.53125f, 0.00000f, 0.03125f},
13700+ {0.06250f, 0.96875f, 0.00000f, 0.53125f},
13701+ {0.87500f, 0.18750f, 0.00000f, 0.93750f},
13702+ {0.00000f, 0.09375f, 1.00000f, 1.00000f},
13703+ {1.00000f, 1.00000f, 1.00000f, 1.00000f},
13704+ {0.03125f, 0.12500f, 1.00000f, 1.00000f},
13705+ {0.06250f, 0.12500f, 0.00000f, 1.00000f},
13706+ {0.00000f, 1.00000f, 0.00000f, 0.56250f},
13707+ },
13708+ {
13709+ {0.00000f, 0.50000f, 0.00000f, 0.71875f},
13710+ {0.06250f, 0.84375f, 0.00000f, 0.87500f},
13711+ {1.00000f, 0.50000f, 0.50000f, 0.96875f},
13712+ {1.00000f, 0.09375f, 0.31250f, 0.50000f},
13713+ {1.00000f, 1.00000f, 1.00000f, 1.00000f},
13714+ {1.00000f, 0.03125f, 0.03125f, 0.53125f},
13715+ {0.18750f, 0.12500f, 0.00000f, 1.00000f},
13716+ {0.00000f, 1.00000f, 0.03125f, 0.18750f},
13717+ },
13718+ {
13719+ {0.00000f, 0.59375f, 0.00000f, 0.96875f},
13720+ {0.06250f, 0.81250f, 0.06250f, 0.59375f},
13721+ {0.75000f, 0.43750f, 0.12500f, 0.96875f},
13722+ {0.87500f, 0.06250f, 0.18750f, 0.43750f},
13723+ {1.00000f, 1.00000f, 1.00000f, 1.00000f},
13724+ {0.15625f, 0.12500f, 1.00000f, 1.00000f},
13725+ {0.06250f, 0.12500f, 0.00000f, 1.00000f},
13726+ {0.00000f, 1.00000f, 0.03125f, 0.34375f},
13727+ }};
13728
13729 // structure that allow us to query and override info for training the costs
13730-typedef struct STBIR__V_FIRST_INFO
13731-{
13732- double v_cost, h_cost;
13733- int control_v_first; // 0 = no control, 1 = force hori, 2 = force vert
13734- int v_first;
13735- int v_resize_classification;
13736- int is_gather;
13737+typedef struct STBIR__V_FIRST_INFO {
13738+ double v_cost, h_cost;
13739+ int control_v_first; // 0 = no control, 1 = force hori, 2 = force vert
13740+ int v_first;
13741+ int v_resize_classification;
13742+ int is_gather;
13743 } STBIR__V_FIRST_INFO;
13744
13745 #ifdef STBIR__V_FIRST_INFO_BUFFER
13746@@ -6954,1243 +8125,1685 @@ static STBIR__V_FIRST_INFO STBIR__V_FIRST_INFO_BUFFER = {0};
13747 // app that solves for the best weights (and shows how well it
13748 // does currently).
13749
13750-static int stbir__should_do_vertical_first( float weights_table[STBIR_RESIZE_CLASSIFICATIONS][4], int horizontal_filter_pixel_width, float horizontal_scale, int horizontal_output_size, int vertical_filter_pixel_width, float vertical_scale, int vertical_output_size, int is_gather, STBIR__V_FIRST_INFO * info )
13751-{
13752- double v_cost, h_cost;
13753- float * weights;
13754- int vertical_first;
13755- int v_classification;
13756-
13757- // categorize the resize into buckets
13758- if ( ( vertical_output_size <= 4 ) || ( horizontal_output_size <= 4 ) )
13759- v_classification = ( vertical_output_size < horizontal_output_size ) ? 6 : 7;
13760- else if ( vertical_scale <= 1.0f )
13761- v_classification = ( is_gather ) ? 1 : 0;
13762- else if ( vertical_scale <= 2.0f)
13763- v_classification = 2;
13764- else if ( vertical_scale <= 3.0f)
13765- v_classification = 3;
13766- else if ( vertical_scale <= 4.0f)
13767- v_classification = 5;
13768- else
13769- v_classification = 6;
13770-
13771- // use the right weights
13772- weights = weights_table[ v_classification ];
13773-
13774- // this is the costs when you don't take into account modern CPUs with high ipc and simd and caches - wish we had a better estimate
13775- h_cost = (float)horizontal_filter_pixel_width * weights[0] + horizontal_scale * (float)vertical_filter_pixel_width * weights[1];
13776- v_cost = (float)vertical_filter_pixel_width * weights[2] + vertical_scale * (float)horizontal_filter_pixel_width * weights[3];
13777-
13778- // use computation estimate to decide vertical first or not
13779- vertical_first = ( v_cost <= h_cost ) ? 1 : 0;
13780-
13781- // save these, if requested
13782- if ( info )
13783- {
13784- info->h_cost = h_cost;
13785- info->v_cost = v_cost;
13786- info->v_resize_classification = v_classification;
13787- info->v_first = vertical_first;
13788- info->is_gather = is_gather;
13789- }
13790-
13791- // and this allows us to override everything for testing (see dotiming.c)
13792- if ( ( info ) && ( info->control_v_first ) )
13793- vertical_first = ( info->control_v_first == 2 ) ? 1 : 0;
13794-
13795- return vertical_first;
13796+static int
13797+stbir__should_do_vertical_first(
13798+ float weights_table[STBIR_RESIZE_CLASSIFICATIONS][4],
13799+ int horizontal_filter_pixel_width, float horizontal_scale,
13800+ int horizontal_output_size, int vertical_filter_pixel_width,
13801+ float vertical_scale, int vertical_output_size, int is_gather,
13802+ STBIR__V_FIRST_INFO *info)
13803+{
13804+ double v_cost, h_cost;
13805+ float *weights;
13806+ int vertical_first;
13807+ int v_classification;
13808+
13809+ // categorize the resize into buckets
13810+ if ((vertical_output_size <= 4) || (horizontal_output_size <= 4)) {
13811+ v_classification =
13812+ (vertical_output_size < horizontal_output_size) ? 6 : 7;
13813+ } else if (vertical_scale <= 1.0f) {
13814+ v_classification = (is_gather) ? 1 : 0;
13815+ } else if (vertical_scale <= 2.0f) {
13816+ v_classification = 2;
13817+ } else if (vertical_scale <= 3.0f) {
13818+ v_classification = 3;
13819+ } else if (vertical_scale <= 4.0f) {
13820+ v_classification = 5;
13821+ } else {
13822+ v_classification = 6;
13823+ }
13824+
13825+ // use the right weights
13826+ weights = weights_table[v_classification];
13827+
13828+ // this is the costs when you don't take into account modern CPUs with high
13829+ // ipc and simd and caches - wish we had a better estimate
13830+ h_cost = (float)horizontal_filter_pixel_width * weights[0] +
13831+ horizontal_scale * (float)vertical_filter_pixel_width * weights[1];
13832+ v_cost = (float)vertical_filter_pixel_width * weights[2] +
13833+ vertical_scale * (float)horizontal_filter_pixel_width * weights[3];
13834+
13835+ // use computation estimate to decide vertical first or not
13836+ vertical_first = (v_cost <= h_cost) ? 1 : 0;
13837+
13838+ // save these, if requested
13839+ if (info) {
13840+ info->h_cost = h_cost;
13841+ info->v_cost = v_cost;
13842+ info->v_resize_classification = v_classification;
13843+ info->v_first = vertical_first;
13844+ info->is_gather = is_gather;
13845+ }
13846+
13847+ // and this allows us to override everything for testing (see dotiming.c)
13848+ if ((info) && (info->control_v_first)) {
13849+ vertical_first = (info->control_v_first == 2) ? 1 : 0;
13850+ }
13851+
13852+ return vertical_first;
13853 }
13854
13855 // layout lookups - must match stbir_internal_pixel_layout
13856 static unsigned char stbir__pixel_channels[] = {
13857- 1,2,3,3,4, // 1ch, 2ch, rgb, bgr, 4ch
13858- 4,4,4,4,2,2, // RGBA,BGRA,ARGB,ABGR,RA,AR
13859- 4,4,4,4,2,2, // RGBA_PM,BGRA_PM,ARGB_PM,ABGR_PM,RA_PM,AR_PM
13860+ 1, 2, 3, 3, 4, // 1ch, 2ch, rgb, bgr, 4ch
13861+ 4, 4, 4, 4, 2, 2, // RGBA,BGRA,ARGB,ABGR,RA,AR
13862+ 4, 4, 4, 4, 2, 2, // RGBA_PM,BGRA_PM,ARGB_PM,ABGR_PM,RA_PM,AR_PM
13863 };
13864
13865-// the internal pixel layout enums are in a different order, so we can easily do range comparisons of types
13866-// the public pixel layout is ordered in a way that if you cast num_channels (1-4) to the enum, you get something sensible
13867-static stbir_internal_pixel_layout stbir__pixel_layout_convert_public_to_internal[] = {
13868- STBIRI_BGR, STBIRI_1CHANNEL, STBIRI_2CHANNEL, STBIRI_RGB, STBIRI_RGBA,
13869- STBIRI_4CHANNEL, STBIRI_BGRA, STBIRI_ARGB, STBIRI_ABGR, STBIRI_RA, STBIRI_AR,
13870- STBIRI_RGBA_PM, STBIRI_BGRA_PM, STBIRI_ARGB_PM, STBIRI_ABGR_PM, STBIRI_RA_PM, STBIRI_AR_PM,
13871+// the internal pixel layout enums are in a different order, so we can easily do
13872+// range comparisons of types
13873+// the public pixel layout is ordered in a way that if you cast num_channels
13874+// (1-4) to the enum, you get something sensible
13875+static stbir_internal_pixel_layout
13876+ stbir__pixel_layout_convert_public_to_internal[] = {
13877+ STBIRI_BGR, STBIRI_1CHANNEL, STBIRI_2CHANNEL, STBIRI_RGB,
13878+ STBIRI_RGBA, STBIRI_4CHANNEL, STBIRI_BGRA, STBIRI_ARGB,
13879+ STBIRI_ABGR, STBIRI_RA, STBIRI_AR, STBIRI_RGBA_PM,
13880+ STBIRI_BGRA_PM, STBIRI_ARGB_PM, STBIRI_ABGR_PM, STBIRI_RA_PM,
13881+ STBIRI_AR_PM,
13882 };
13883
13884-static stbir__info * stbir__alloc_internal_mem_and_build_samplers( stbir__sampler * horizontal, stbir__sampler * vertical, stbir__contributors * conservative, stbir_pixel_layout input_pixel_layout_public, stbir_pixel_layout output_pixel_layout_public, int splits, int new_x, int new_y, int fast_alpha, void * user_data STBIR_ONLY_PROFILE_BUILD_GET_INFO )
13885-{
13886- static char stbir_channel_count_index[8]={ 9,0,1,2, 3,9,9,4 };
13887-
13888- stbir__info * info = 0;
13889- void * alloced = 0;
13890- size_t alloced_total = 0;
13891- int vertical_first;
13892- size_t decode_buffer_size, ring_buffer_length_bytes, ring_buffer_size, vertical_buffer_size;
13893- int alloc_ring_buffer_num_entries;
13894-
13895- int alpha_weighting_type = 0; // 0=none, 1=simple, 2=fancy
13896- int conservative_split_output_size = stbir__get_max_split( splits, vertical->scale_info.output_sub_size );
13897- stbir_internal_pixel_layout input_pixel_layout = stbir__pixel_layout_convert_public_to_internal[ input_pixel_layout_public ];
13898- stbir_internal_pixel_layout output_pixel_layout = stbir__pixel_layout_convert_public_to_internal[ output_pixel_layout_public ];
13899- int channels = stbir__pixel_channels[ input_pixel_layout ];
13900- int effective_channels = channels;
13901-
13902- // first figure out what type of alpha weighting to use (if any)
13903- if ( ( horizontal->filter_enum != STBIR_FILTER_POINT_SAMPLE ) || ( vertical->filter_enum != STBIR_FILTER_POINT_SAMPLE ) ) // no alpha weighting on point sampling
13904- {
13905- if ( ( input_pixel_layout >= STBIRI_RGBA ) && ( input_pixel_layout <= STBIRI_AR ) && ( output_pixel_layout >= STBIRI_RGBA ) && ( output_pixel_layout <= STBIRI_AR ) )
13906- {
13907- if ( fast_alpha )
13908- {
13909- alpha_weighting_type = 4;
13910- }
13911- else
13912- {
13913- static int fancy_alpha_effective_cnts[6] = { 7, 7, 7, 7, 3, 3 };
13914- alpha_weighting_type = 2;
13915- effective_channels = fancy_alpha_effective_cnts[ input_pixel_layout - STBIRI_RGBA ];
13916- }
13917- }
13918- else if ( ( input_pixel_layout >= STBIRI_RGBA_PM ) && ( input_pixel_layout <= STBIRI_AR_PM ) && ( output_pixel_layout >= STBIRI_RGBA ) && ( output_pixel_layout <= STBIRI_AR ) )
13919- {
13920- // input premult, output non-premult
13921- alpha_weighting_type = 3;
13922- }
13923- else if ( ( input_pixel_layout >= STBIRI_RGBA ) && ( input_pixel_layout <= STBIRI_AR ) && ( output_pixel_layout >= STBIRI_RGBA_PM ) && ( output_pixel_layout <= STBIRI_AR_PM ) )
13924- {
13925- // input non-premult, output premult
13926- alpha_weighting_type = 1;
13927- }
13928- }
13929-
13930- // channel in and out count must match currently
13931- if ( channels != stbir__pixel_channels[ output_pixel_layout ] )
13932- return 0;
13933-
13934- // get vertical first
13935- vertical_first = stbir__should_do_vertical_first( stbir__compute_weights[ (int)stbir_channel_count_index[ effective_channels ] ], horizontal->filter_pixel_width, horizontal->scale_info.scale, horizontal->scale_info.output_sub_size, vertical->filter_pixel_width, vertical->scale_info.scale, vertical->scale_info.output_sub_size, vertical->is_gather, STBIR__V_FIRST_INFO_POINTER );
13936-
13937- // sometimes read one float off in some of the unrolled loops (with a weight of zero coeff, so it doesn't have an effect)
13938- // we use a few extra floats instead of just 1, so that input callback buffer can overlap with the decode buffer without
13939- // the conversion routines overwriting the callback input data.
13940- decode_buffer_size = ( conservative->n1 - conservative->n0 + 1 ) * effective_channels * sizeof(float) + sizeof(float)*STBIR_INPUT_CALLBACK_PADDING; // extra floats for input callback stagger
13941-
13942-#if defined( STBIR__SEPARATE_ALLOCATIONS ) && defined(STBIR_SIMD8)
13943- if ( effective_channels == 3 )
13944- decode_buffer_size += sizeof(float); // avx in 3 channel mode needs one float at the start of the buffer (only with separate allocations)
13945-#endif
13946-
13947- ring_buffer_length_bytes = (size_t)horizontal->scale_info.output_sub_size * (size_t)effective_channels * sizeof(float) + sizeof(float)*STBIR_INPUT_CALLBACK_PADDING; // extra floats for padding
13948-
13949- // if we do vertical first, the ring buffer holds a whole decoded line
13950- if ( vertical_first )
13951- ring_buffer_length_bytes = ( decode_buffer_size + 15 ) & ~15;
13952-
13953- if ( ( ring_buffer_length_bytes & 4095 ) == 0 ) ring_buffer_length_bytes += 64*3; // avoid 4k alias
13954-
13955- // One extra entry because floating point precision problems sometimes cause an extra to be necessary.
13956- alloc_ring_buffer_num_entries = vertical->filter_pixel_width + 1;
13957-
13958- // we never need more ring buffer entries than the scanlines we're outputting when in scatter mode
13959- if ( ( !vertical->is_gather ) && ( alloc_ring_buffer_num_entries > conservative_split_output_size ) )
13960- alloc_ring_buffer_num_entries = conservative_split_output_size;
13961-
13962- ring_buffer_size = (size_t)alloc_ring_buffer_num_entries * (size_t)ring_buffer_length_bytes;
13963-
13964- // The vertical buffer is used differently, depending on whether we are scattering
13965- // the vertical scanlines, or gathering them.
13966- // If scattering, it's used at the temp buffer to accumulate each output.
13967- // If gathering, it's just the output buffer.
13968- vertical_buffer_size = (size_t)horizontal->scale_info.output_sub_size * (size_t)effective_channels * sizeof(float) + sizeof(float); // extra float for padding
13969-
13970- // we make two passes through this loop, 1st to add everything up, 2nd to allocate and init
13971- for(;;)
13972- {
13973- int i;
13974- void * advance_mem = alloced;
13975- int copy_horizontal = 0;
13976- stbir__sampler * possibly_use_horizontal_for_pivot = 0;
13977+static stbir__info *
13978+stbir__alloc_internal_mem_and_build_samplers(
13979+ stbir__sampler *horizontal, stbir__sampler *vertical,
13980+ stbir__contributors *conservative,
13981+ stbir_pixel_layout input_pixel_layout_public,
13982+ stbir_pixel_layout output_pixel_layout_public, int splits, int new_x,
13983+ int new_y, int fast_alpha,
13984+ void *user_data STBIR_ONLY_PROFILE_BUILD_GET_INFO)
13985+{
13986+ static char stbir_channel_count_index[8] = {9, 0, 1, 2, 3, 9, 9, 4};
13987+
13988+ stbir__info *info = 0;
13989+ void *alloced = 0;
13990+ size_t alloced_total = 0;
13991+ int vertical_first;
13992+ size_t decode_buffer_size, ring_buffer_length_bytes, ring_buffer_size,
13993+ vertical_buffer_size;
13994+ int alloc_ring_buffer_num_entries;
13995+
13996+ int alpha_weighting_type = 0; // 0=none, 1=simple, 2=fancy
13997+ int conservative_split_output_size =
13998+ stbir__get_max_split(splits, vertical->scale_info.output_sub_size);
13999+ stbir_internal_pixel_layout input_pixel_layout =
14000+ stbir__pixel_layout_convert_public_to_internal
14001+ [input_pixel_layout_public];
14002+ stbir_internal_pixel_layout output_pixel_layout =
14003+ stbir__pixel_layout_convert_public_to_internal
14004+ [output_pixel_layout_public];
14005+ int channels = stbir__pixel_channels[input_pixel_layout];
14006+ int effective_channels = channels;
14007+
14008+ // first figure out what type of alpha weighting to use (if any)
14009+ if ((horizontal->filter_enum != STBIR_FILTER_POINT_SAMPLE) ||
14010+ (vertical->filter_enum !=
14011+ STBIR_FILTER_POINT_SAMPLE)) // no alpha weighting on point sampling
14012+ {
14013+ if ((input_pixel_layout >= STBIRI_RGBA) &&
14014+ (input_pixel_layout <= STBIRI_AR) &&
14015+ (output_pixel_layout >= STBIRI_RGBA) &&
14016+ (output_pixel_layout <= STBIRI_AR)) {
14017+ if (fast_alpha) {
14018+ alpha_weighting_type = 4;
14019+ } else {
14020+ static int fancy_alpha_effective_cnts[6] = {7, 7, 7, 7, 3, 3};
14021+ alpha_weighting_type = 2;
14022+ effective_channels =
14023+ fancy_alpha_effective_cnts[input_pixel_layout -
14024+ STBIRI_RGBA];
14025+ }
14026+ } else if ((input_pixel_layout >= STBIRI_RGBA_PM) &&
14027+ (input_pixel_layout <= STBIRI_AR_PM) &&
14028+ (output_pixel_layout >= STBIRI_RGBA) &&
14029+ (output_pixel_layout <= STBIRI_AR)) {
14030+ // input premult, output non-premult
14031+ alpha_weighting_type = 3;
14032+ } else if ((input_pixel_layout >= STBIRI_RGBA) &&
14033+ (input_pixel_layout <= STBIRI_AR) &&
14034+ (output_pixel_layout >= STBIRI_RGBA_PM) &&
14035+ (output_pixel_layout <= STBIRI_AR_PM)) {
14036+ // input non-premult, output premult
14037+ alpha_weighting_type = 1;
14038+ }
14039+ }
14040+
14041+ // channel in and out count must match currently
14042+ if (channels != stbir__pixel_channels[output_pixel_layout]) {
14043+ return 0;
14044+ }
14045+
14046+ // get vertical first
14047+ vertical_first = stbir__should_do_vertical_first(
14048+ stbir__compute_weights[(
14049+ int)stbir_channel_count_index[effective_channels]],
14050+ horizontal->filter_pixel_width, horizontal->scale_info.scale,
14051+ horizontal->scale_info.output_sub_size, vertical->filter_pixel_width,
14052+ vertical->scale_info.scale, vertical->scale_info.output_sub_size,
14053+ vertical->is_gather, STBIR__V_FIRST_INFO_POINTER);
14054+
14055+ // sometimes read one float off in some of the unrolled loops (with a weight
14056+ // of zero coeff, so it doesn't have an effect)
14057+ // we use a few extra floats instead of just 1, so that input callback
14058+ // buffer can overlap with the decode buffer without the conversion
14059+ // routines overwriting the callback input data.
14060+ decode_buffer_size =
14061+ (conservative->n1 - conservative->n0 + 1) * effective_channels *
14062+ sizeof(float) +
14063+ sizeof(float) * STBIR_INPUT_CALLBACK_PADDING; // extra floats for input
14064+ // callback stagger
14065+
14066+#if defined(STBIR__SEPARATE_ALLOCATIONS) && defined(STBIR_SIMD8)
14067+ if (effective_channels == 3) {
14068+ decode_buffer_size +=
14069+ sizeof(float); // avx in 3 channel mode needs one float at the start
14070+ // of the buffer (only with separate allocations)
14071+ }
14072+#endif
14073+
14074+ ring_buffer_length_bytes =
14075+ (size_t)horizontal->scale_info.output_sub_size *
14076+ (size_t)effective_channels * sizeof(float) +
14077+ sizeof(float) *
14078+ STBIR_INPUT_CALLBACK_PADDING; // extra floats for padding
14079+
14080+ // if we do vertical first, the ring buffer holds a whole decoded line
14081+ if (vertical_first) {
14082+ ring_buffer_length_bytes = (decode_buffer_size + 15) & ~15;
14083+ }
14084+
14085+ if ((ring_buffer_length_bytes & 4095) == 0) {
14086+ ring_buffer_length_bytes += 64 * 3; // avoid 4k alias
14087+ }
14088+
14089+ // One extra entry because floating point precision problems sometimes cause
14090+ // an extra to be necessary.
14091+ alloc_ring_buffer_num_entries = vertical->filter_pixel_width + 1;
14092+
14093+ // we never need more ring buffer entries than the scanlines we're
14094+ // outputting when in scatter mode
14095+ if ((!vertical->is_gather) &&
14096+ (alloc_ring_buffer_num_entries > conservative_split_output_size)) {
14097+ alloc_ring_buffer_num_entries = conservative_split_output_size;
14098+ }
14099+
14100+ ring_buffer_size = (size_t)alloc_ring_buffer_num_entries *
14101+ (size_t)ring_buffer_length_bytes;
14102+
14103+ // The vertical buffer is used differently, depending on whether we are
14104+ // scattering
14105+ // the vertical scanlines, or gathering them.
14106+ // If scattering, it's used at the temp buffer to accumulate each output.
14107+ // If gathering, it's just the output buffer.
14108+ vertical_buffer_size = (size_t)horizontal->scale_info.output_sub_size *
14109+ (size_t)effective_channels * sizeof(float) +
14110+ sizeof(float); // extra float for padding
14111+
14112+ // we make two passes through this loop, 1st to add everything up, 2nd to
14113+ // allocate and init
14114+ for (;;) {
14115+ int i;
14116+ void *advance_mem = alloced;
14117+ int copy_horizontal = 0;
14118+ stbir__sampler *possibly_use_horizontal_for_pivot = 0;
14119
14120 #ifdef STBIR__SEPARATE_ALLOCATIONS
14121- #define STBIR__NEXT_PTR( ptr, size, ntype ) if ( alloced ) { void * p = STBIR_MALLOC( size, user_data); if ( p == 0 ) { stbir__free_internal_mem( info ); return 0; } (ptr) = (ntype*)p; }
14122+#define STBIR__NEXT_PTR(ptr, size, ntype) \
14123+ if (alloced) { \
14124+ void *p = STBIR_MALLOC(size, user_data); \
14125+ if (p == 0) { \
14126+ stbir__free_internal_mem(info); \
14127+ return 0; \
14128+ } \
14129+ (ptr) = (ntype *)p; \
14130+ }
14131 #else
14132- #define STBIR__NEXT_PTR( ptr, size, ntype ) advance_mem = (void*) ( ( ((size_t)advance_mem) + 15 ) & ~15 ); if ( alloced ) ptr = (ntype*)advance_mem; advance_mem = (char*)(((size_t)advance_mem) + (size));
14133-#endif
14134-
14135- STBIR__NEXT_PTR( info, sizeof( stbir__info ), stbir__info );
14136-
14137- STBIR__NEXT_PTR( info->split_info, sizeof( stbir__per_split_info ) * splits, stbir__per_split_info );
14138-
14139- if ( info )
14140- {
14141- static stbir__alpha_weight_func * fancy_alpha_weights[6] = { stbir__fancy_alpha_weight_4ch, stbir__fancy_alpha_weight_4ch, stbir__fancy_alpha_weight_4ch, stbir__fancy_alpha_weight_4ch, stbir__fancy_alpha_weight_2ch, stbir__fancy_alpha_weight_2ch };
14142- static stbir__alpha_unweight_func * fancy_alpha_unweights[6] = { stbir__fancy_alpha_unweight_4ch, stbir__fancy_alpha_unweight_4ch, stbir__fancy_alpha_unweight_4ch, stbir__fancy_alpha_unweight_4ch, stbir__fancy_alpha_unweight_2ch, stbir__fancy_alpha_unweight_2ch };
14143- static stbir__alpha_weight_func * simple_alpha_weights[6] = { stbir__simple_alpha_weight_4ch, stbir__simple_alpha_weight_4ch, stbir__simple_alpha_weight_4ch, stbir__simple_alpha_weight_4ch, stbir__simple_alpha_weight_2ch, stbir__simple_alpha_weight_2ch };
14144- static stbir__alpha_unweight_func * simple_alpha_unweights[6] = { stbir__simple_alpha_unweight_4ch, stbir__simple_alpha_unweight_4ch, stbir__simple_alpha_unweight_4ch, stbir__simple_alpha_unweight_4ch, stbir__simple_alpha_unweight_2ch, stbir__simple_alpha_unweight_2ch };
14145-
14146- // initialize info fields
14147- info->alloced_mem = alloced;
14148- info->alloced_total = alloced_total;
14149-
14150- info->channels = channels;
14151- info->effective_channels = effective_channels;
14152-
14153- info->offset_x = new_x;
14154- info->offset_y = new_y;
14155- info->alloc_ring_buffer_num_entries = (int)alloc_ring_buffer_num_entries;
14156- info->ring_buffer_num_entries = 0;
14157- info->ring_buffer_length_bytes = (int)ring_buffer_length_bytes;
14158- info->splits = splits;
14159- info->vertical_first = vertical_first;
14160-
14161- info->input_pixel_layout_internal = input_pixel_layout;
14162- info->output_pixel_layout_internal = output_pixel_layout;
14163-
14164- // setup alpha weight functions
14165- info->alpha_weight = 0;
14166- info->alpha_unweight = 0;
14167-
14168- // handle alpha weighting functions and overrides
14169- if ( alpha_weighting_type == 2 )
14170- {
14171- // high quality alpha multiplying on the way in, dividing on the way out
14172- info->alpha_weight = fancy_alpha_weights[ input_pixel_layout - STBIRI_RGBA ];
14173- info->alpha_unweight = fancy_alpha_unweights[ output_pixel_layout - STBIRI_RGBA ];
14174- }
14175- else if ( alpha_weighting_type == 4 )
14176- {
14177- // fast alpha multiplying on the way in, dividing on the way out
14178- info->alpha_weight = simple_alpha_weights[ input_pixel_layout - STBIRI_RGBA ];
14179- info->alpha_unweight = simple_alpha_unweights[ output_pixel_layout - STBIRI_RGBA ];
14180- }
14181- else if ( alpha_weighting_type == 1 )
14182- {
14183- // fast alpha on the way in, leave in premultiplied form on way out
14184- info->alpha_weight = simple_alpha_weights[ input_pixel_layout - STBIRI_RGBA ];
14185- }
14186- else if ( alpha_weighting_type == 3 )
14187- {
14188- // incoming is premultiplied, fast alpha dividing on the way out - non-premultiplied output
14189- info->alpha_unweight = simple_alpha_unweights[ output_pixel_layout - STBIRI_RGBA ];
14190- }
14191-
14192- // handle 3-chan color flipping, using the alpha weight path
14193- if ( ( ( input_pixel_layout == STBIRI_RGB ) && ( output_pixel_layout == STBIRI_BGR ) ) ||
14194- ( ( input_pixel_layout == STBIRI_BGR ) && ( output_pixel_layout == STBIRI_RGB ) ) )
14195- {
14196- // do the flipping on the smaller of the two ends
14197- if ( horizontal->scale_info.scale < 1.0f )
14198- info->alpha_unweight = stbir__simple_flip_3ch;
14199- else
14200- info->alpha_weight = stbir__simple_flip_3ch;
14201- }
14202-
14203- }
14204-
14205- // get all the per-split buffers
14206- for( i = 0 ; i < splits ; i++ )
14207- {
14208- STBIR__NEXT_PTR( info->split_info[i].decode_buffer, decode_buffer_size, float );
14209+#define STBIR__NEXT_PTR(ptr, size, ntype) \
14210+ advance_mem = (void *)((((size_t)advance_mem) + 15) & ~15); \
14211+ if (alloced) \
14212+ ptr = (ntype *)advance_mem; \
14213+ advance_mem = (char *)(((size_t)advance_mem) + (size));
14214+#endif
14215+
14216+ STBIR__NEXT_PTR(info, sizeof(stbir__info), stbir__info);
14217+
14218+ STBIR__NEXT_PTR(info->split_info,
14219+ sizeof(stbir__per_split_info) * splits,
14220+ stbir__per_split_info);
14221+
14222+ if (info) {
14223+ static stbir__alpha_weight_func *fancy_alpha_weights[6] = {
14224+ stbir__fancy_alpha_weight_4ch, stbir__fancy_alpha_weight_4ch,
14225+ stbir__fancy_alpha_weight_4ch, stbir__fancy_alpha_weight_4ch,
14226+ stbir__fancy_alpha_weight_2ch, stbir__fancy_alpha_weight_2ch};
14227+ static stbir__alpha_unweight_func *fancy_alpha_unweights[6] = {
14228+ stbir__fancy_alpha_unweight_4ch,
14229+ stbir__fancy_alpha_unweight_4ch,
14230+ stbir__fancy_alpha_unweight_4ch,
14231+ stbir__fancy_alpha_unweight_4ch,
14232+ stbir__fancy_alpha_unweight_2ch,
14233+ stbir__fancy_alpha_unweight_2ch};
14234+ static stbir__alpha_weight_func *simple_alpha_weights[6] = {
14235+ stbir__simple_alpha_weight_4ch, stbir__simple_alpha_weight_4ch,
14236+ stbir__simple_alpha_weight_4ch, stbir__simple_alpha_weight_4ch,
14237+ stbir__simple_alpha_weight_2ch, stbir__simple_alpha_weight_2ch};
14238+ static stbir__alpha_unweight_func *simple_alpha_unweights[6] = {
14239+ stbir__simple_alpha_unweight_4ch,
14240+ stbir__simple_alpha_unweight_4ch,
14241+ stbir__simple_alpha_unweight_4ch,
14242+ stbir__simple_alpha_unweight_4ch,
14243+ stbir__simple_alpha_unweight_2ch,
14244+ stbir__simple_alpha_unweight_2ch};
14245+
14246+ // initialize info fields
14247+ info->alloced_mem = alloced;
14248+ info->alloced_total = alloced_total;
14249+
14250+ info->channels = channels;
14251+ info->effective_channels = effective_channels;
14252+
14253+ info->offset_x = new_x;
14254+ info->offset_y = new_y;
14255+ info->alloc_ring_buffer_num_entries =
14256+ (int)alloc_ring_buffer_num_entries;
14257+ info->ring_buffer_num_entries = 0;
14258+ info->ring_buffer_length_bytes = (int)ring_buffer_length_bytes;
14259+ info->splits = splits;
14260+ info->vertical_first = vertical_first;
14261+
14262+ info->input_pixel_layout_internal = input_pixel_layout;
14263+ info->output_pixel_layout_internal = output_pixel_layout;
14264+
14265+ // setup alpha weight functions
14266+ info->alpha_weight = 0;
14267+ info->alpha_unweight = 0;
14268+
14269+ // handle alpha weighting functions and overrides
14270+ if (alpha_weighting_type == 2) {
14271+ // high quality alpha multiplying on the way in, dividing on the
14272+ // way out
14273+ info->alpha_weight =
14274+ fancy_alpha_weights[input_pixel_layout - STBIRI_RGBA];
14275+ info->alpha_unweight =
14276+ fancy_alpha_unweights[output_pixel_layout - STBIRI_RGBA];
14277+ } else if (alpha_weighting_type == 4) {
14278+ // fast alpha multiplying on the way in, dividing on the way out
14279+ info->alpha_weight =
14280+ simple_alpha_weights[input_pixel_layout - STBIRI_RGBA];
14281+ info->alpha_unweight =
14282+ simple_alpha_unweights[output_pixel_layout - STBIRI_RGBA];
14283+ } else if (alpha_weighting_type == 1) {
14284+ // fast alpha on the way in, leave in premultiplied form on way
14285+ // out
14286+ info->alpha_weight =
14287+ simple_alpha_weights[input_pixel_layout - STBIRI_RGBA];
14288+ } else if (alpha_weighting_type == 3) {
14289+ // incoming is premultiplied, fast alpha dividing on the way out
14290+ // - non-premultiplied output
14291+ info->alpha_unweight =
14292+ simple_alpha_unweights[output_pixel_layout - STBIRI_RGBA];
14293+ }
14294+
14295+ // handle 3-chan color flipping, using the alpha weight path
14296+ if (((input_pixel_layout == STBIRI_RGB) &&
14297+ (output_pixel_layout == STBIRI_BGR)) ||
14298+ ((input_pixel_layout == STBIRI_BGR) &&
14299+ (output_pixel_layout == STBIRI_RGB))) {
14300+ // do the flipping on the smaller of the two ends
14301+ if (horizontal->scale_info.scale < 1.0f) {
14302+ info->alpha_unweight = stbir__simple_flip_3ch;
14303+ } else {
14304+ info->alpha_weight = stbir__simple_flip_3ch;
14305+ }
14306+ }
14307+ }
14308+
14309+ // get all the per-split buffers
14310+ for (i = 0; i < splits; i++) {
14311+ STBIR__NEXT_PTR(info->split_info[i].decode_buffer,
14312+ decode_buffer_size, float);
14313
14314 #ifdef STBIR__SEPARATE_ALLOCATIONS
14315
14316- #ifdef STBIR_SIMD8
14317- if ( ( info ) && ( effective_channels == 3 ) )
14318- ++info->split_info[i].decode_buffer; // avx in 3 channel mode needs one float at the start of the buffer
14319- #endif
14320-
14321- STBIR__NEXT_PTR( info->split_info[i].ring_buffers, alloc_ring_buffer_num_entries * sizeof(float*), float* );
14322- {
14323- int j;
14324- for( j = 0 ; j < alloc_ring_buffer_num_entries ; j++ )
14325- {
14326- STBIR__NEXT_PTR( info->split_info[i].ring_buffers[j], ring_buffer_length_bytes, float );
14327- #ifdef STBIR_SIMD8
14328- if ( ( info ) && ( effective_channels == 3 ) )
14329- ++info->split_info[i].ring_buffers[j]; // avx in 3 channel mode needs one float at the start of the buffer
14330- #endif
14331- }
14332- }
14333+#ifdef STBIR_SIMD8
14334+ if ((info) && (effective_channels == 3)) {
14335+ ++info->split_info[i]
14336+ .decode_buffer; // avx in 3 channel mode needs one float
14337+ // at the start of the buffer
14338+ }
14339+#endif
14340+
14341+ STBIR__NEXT_PTR(info->split_info[i].ring_buffers,
14342+ alloc_ring_buffer_num_entries * sizeof(float *),
14343+ float *);
14344+ {
14345+ int j;
14346+ for (j = 0; j < alloc_ring_buffer_num_entries; j++) {
14347+ STBIR__NEXT_PTR(info->split_info[i].ring_buffers[j],
14348+ ring_buffer_length_bytes, float);
14349+#ifdef STBIR_SIMD8
14350+ if ((info) && (effective_channels == 3)) {
14351+ ++info->split_info[i]
14352+ .ring_buffers[j]; // avx in 3 channel mode needs
14353+ // one float at the start of the
14354+ // buffer
14355+ }
14356+#endif
14357+ }
14358+ }
14359 #else
14360- STBIR__NEXT_PTR( info->split_info[i].ring_buffer, ring_buffer_size, float );
14361+ STBIR__NEXT_PTR(info->split_info[i].ring_buffer, ring_buffer_size,
14362+ float);
14363 #endif
14364- STBIR__NEXT_PTR( info->split_info[i].vertical_buffer, vertical_buffer_size, float );
14365- }
14366+ STBIR__NEXT_PTR(info->split_info[i].vertical_buffer,
14367+ vertical_buffer_size, float);
14368+ }
14369
14370- // alloc memory for to-be-pivoted coeffs (if necessary)
14371- if ( vertical->is_gather == 0 )
14372- {
14373- size_t both;
14374- size_t temp_mem_amt;
14375+ // alloc memory for to-be-pivoted coeffs (if necessary)
14376+ if (vertical->is_gather == 0) {
14377+ size_t both;
14378+ size_t temp_mem_amt;
14379
14380- // when in vertical scatter mode, we first build the coefficients in gather mode, and then pivot after,
14381- // that means we need two buffers, so we try to use the decode buffer and ring buffer for this. if that
14382- // is too small, we just allocate extra memory to use as this temp.
14383+ // when in vertical scatter mode, we first build the coefficients in
14384+ // gather mode, and then pivot after,
14385+ // that means we need two buffers, so we try to use the decode
14386+ // buffer and ring buffer for this. if that is too small, we just
14387+ // allocate extra memory to use as this temp.
14388
14389- both = (size_t)vertical->gather_prescatter_contributors_size + (size_t)vertical->gather_prescatter_coefficients_size;
14390+ both = (size_t)vertical->gather_prescatter_contributors_size +
14391+ (size_t)vertical->gather_prescatter_coefficients_size;
14392
14393 #ifdef STBIR__SEPARATE_ALLOCATIONS
14394- temp_mem_amt = decode_buffer_size;
14395+ temp_mem_amt = decode_buffer_size;
14396
14397- #ifdef STBIR_SIMD8
14398- if ( effective_channels == 3 )
14399- --temp_mem_amt; // avx in 3 channel mode needs one float at the start of the buffer
14400- #endif
14401+#ifdef STBIR_SIMD8
14402+ if (effective_channels == 3) {
14403+ --temp_mem_amt; // avx in 3 channel mode needs one float at the
14404+ // start of the buffer
14405+ }
14406+#endif
14407 #else
14408- temp_mem_amt = (size_t)( decode_buffer_size + ring_buffer_size + vertical_buffer_size ) * (size_t)splits;
14409-#endif
14410- if ( temp_mem_amt >= both )
14411- {
14412- if ( info )
14413- {
14414- vertical->gather_prescatter_contributors = (stbir__contributors*)info->split_info[0].decode_buffer;
14415- vertical->gather_prescatter_coefficients = (float*) ( ( (char*)info->split_info[0].decode_buffer ) + vertical->gather_prescatter_contributors_size );
14416- }
14417- }
14418- else
14419- {
14420- // ring+decode memory is too small, so allocate temp memory
14421- STBIR__NEXT_PTR( vertical->gather_prescatter_contributors, vertical->gather_prescatter_contributors_size, stbir__contributors );
14422- STBIR__NEXT_PTR( vertical->gather_prescatter_coefficients, vertical->gather_prescatter_coefficients_size, float );
14423- }
14424- }
14425-
14426- STBIR__NEXT_PTR( horizontal->contributors, horizontal->contributors_size, stbir__contributors );
14427- STBIR__NEXT_PTR( horizontal->coefficients, horizontal->coefficients_size, float );
14428-
14429- // are the two filters identical?? (happens a lot with mipmap generation)
14430- if ( ( horizontal->filter_kernel == vertical->filter_kernel ) && ( horizontal->filter_support == vertical->filter_support ) && ( horizontal->edge == vertical->edge ) && ( horizontal->scale_info.output_sub_size == vertical->scale_info.output_sub_size ) )
14431- {
14432- float diff_scale = horizontal->scale_info.scale - vertical->scale_info.scale;
14433- float diff_shift = horizontal->scale_info.pixel_shift - vertical->scale_info.pixel_shift;
14434- if ( diff_scale < 0.0f ) diff_scale = -diff_scale;
14435- if ( diff_shift < 0.0f ) diff_shift = -diff_shift;
14436- if ( ( diff_scale <= stbir__small_float ) && ( diff_shift <= stbir__small_float ) )
14437- {
14438- if ( horizontal->is_gather == vertical->is_gather )
14439- {
14440- copy_horizontal = 1;
14441- goto no_vert_alloc;
14442- }
14443- // everything matches, but vertical is scatter, horizontal is gather, use horizontal coeffs for vertical pivot coeffs
14444- possibly_use_horizontal_for_pivot = horizontal;
14445- }
14446- }
14447-
14448- STBIR__NEXT_PTR( vertical->contributors, vertical->contributors_size, stbir__contributors );
14449- STBIR__NEXT_PTR( vertical->coefficients, vertical->coefficients_size, float );
14450-
14451- no_vert_alloc:
14452-
14453- if ( info )
14454- {
14455- STBIR_PROFILE_BUILD_START( horizontal );
14456-
14457- stbir__calculate_filters( horizontal, 0, user_data STBIR_ONLY_PROFILE_BUILD_SET_INFO );
14458-
14459- // setup the horizontal gather functions
14460- // start with defaulting to the n_coeffs functions (specialized on channels and remnant leftover)
14461- info->horizontal_gather_channels = stbir__horizontal_gather_n_coeffs_funcs[ effective_channels ][ horizontal->extent_info.widest & 3 ];
14462- // but if the number of coeffs <= 12, use another set of special cases. <=12 coeffs is any enlarging resize, or shrinking resize down to about 1/3 size
14463- if ( horizontal->extent_info.widest <= 12 )
14464- info->horizontal_gather_channels = stbir__horizontal_gather_channels_funcs[ effective_channels ][ horizontal->extent_info.widest - 1 ];
14465-
14466- info->scanline_extents.conservative.n0 = conservative->n0;
14467- info->scanline_extents.conservative.n1 = conservative->n1;
14468-
14469- // get exact extents
14470- stbir__get_extents( horizontal, &info->scanline_extents );
14471-
14472- // pack the horizontal coeffs
14473- horizontal->coefficient_width = stbir__pack_coefficients(horizontal->num_contributors, horizontal->contributors, horizontal->coefficients, horizontal->coefficient_width, horizontal->extent_info.widest, info->scanline_extents.conservative.n0, info->scanline_extents.conservative.n1 );
14474-
14475- STBIR_MEMCPY( &info->horizontal, horizontal, sizeof( stbir__sampler ) );
14476-
14477- STBIR_PROFILE_BUILD_END( horizontal );
14478-
14479- if ( copy_horizontal )
14480- {
14481- STBIR_MEMCPY( &info->vertical, horizontal, sizeof( stbir__sampler ) );
14482- }
14483- else
14484- {
14485- STBIR_PROFILE_BUILD_START( vertical );
14486-
14487- stbir__calculate_filters( vertical, possibly_use_horizontal_for_pivot, user_data STBIR_ONLY_PROFILE_BUILD_SET_INFO );
14488- STBIR_MEMCPY( &info->vertical, vertical, sizeof( stbir__sampler ) );
14489-
14490- STBIR_PROFILE_BUILD_END( vertical );
14491- }
14492-
14493- // setup the vertical split ranges
14494- stbir__get_split_info( info->split_info, info->splits, info->vertical.scale_info.output_sub_size, info->vertical.filter_pixel_margin, info->vertical.scale_info.input_full_size, info->vertical.is_gather, info->vertical.contributors );
14495-
14496- // now we know precisely how many entries we need
14497- info->ring_buffer_num_entries = info->vertical.extent_info.widest;
14498-
14499- // we never need more ring buffer entries than the scanlines we're outputting
14500- if ( ( !info->vertical.is_gather ) && ( info->ring_buffer_num_entries > conservative_split_output_size ) )
14501- info->ring_buffer_num_entries = conservative_split_output_size;
14502- STBIR_ASSERT( info->ring_buffer_num_entries <= info->alloc_ring_buffer_num_entries );
14503- }
14504- #undef STBIR__NEXT_PTR
14505-
14506-
14507- // is this the first time through loop?
14508- if ( info == 0 )
14509- {
14510- alloced_total = ( 15 + (size_t)advance_mem );
14511- alloced = STBIR_MALLOC( alloced_total, user_data );
14512- if ( alloced == 0 )
14513- return 0;
14514- }
14515- else
14516- return info; // success
14517- }
14518-}
14519-
14520-static int stbir__perform_resize( stbir__info const * info, int split_start, int split_count )
14521-{
14522- stbir__per_split_info * split_info = info->split_info + split_start;
14523-
14524- STBIR_PROFILE_CLEAR_EXTRAS();
14525-
14526- STBIR_PROFILE_FIRST_START( looping );
14527- if (info->vertical.is_gather)
14528- stbir__vertical_gather_loop( info, split_info, split_count );
14529- else
14530- stbir__vertical_scatter_loop( info, split_info, split_count );
14531- STBIR_PROFILE_END( looping );
14532-
14533- return 1;
14534-}
14535-
14536-static void stbir__update_info_from_resize( stbir__info * info, STBIR_RESIZE * resize )
14537-{
14538- static stbir__decode_pixels_func * decode_simple[STBIR_TYPE_HALF_FLOAT-STBIR_TYPE_UINT8_SRGB+1]=
14539- {
14540- /* 1ch-4ch */ stbir__decode_uint8_srgb, stbir__decode_uint8_srgb, 0, stbir__decode_float_linear, stbir__decode_half_float_linear,
14541- };
14542-
14543- static stbir__decode_pixels_func * decode_alphas[STBIRI_AR-STBIRI_RGBA+1][STBIR_TYPE_HALF_FLOAT-STBIR_TYPE_UINT8_SRGB+1]=
14544- {
14545- { /* RGBA */ stbir__decode_uint8_srgb4_linearalpha, stbir__decode_uint8_srgb, 0, stbir__decode_float_linear, stbir__decode_half_float_linear },
14546- { /* BGRA */ stbir__decode_uint8_srgb4_linearalpha_BGRA, stbir__decode_uint8_srgb_BGRA, 0, stbir__decode_float_linear_BGRA, stbir__decode_half_float_linear_BGRA },
14547- { /* ARGB */ stbir__decode_uint8_srgb4_linearalpha_ARGB, stbir__decode_uint8_srgb_ARGB, 0, stbir__decode_float_linear_ARGB, stbir__decode_half_float_linear_ARGB },
14548- { /* ABGR */ stbir__decode_uint8_srgb4_linearalpha_ABGR, stbir__decode_uint8_srgb_ABGR, 0, stbir__decode_float_linear_ABGR, stbir__decode_half_float_linear_ABGR },
14549- { /* RA */ stbir__decode_uint8_srgb2_linearalpha, stbir__decode_uint8_srgb, 0, stbir__decode_float_linear, stbir__decode_half_float_linear },
14550- { /* AR */ stbir__decode_uint8_srgb2_linearalpha_AR, stbir__decode_uint8_srgb_AR, 0, stbir__decode_float_linear_AR, stbir__decode_half_float_linear_AR },
14551- };
14552-
14553- static stbir__decode_pixels_func * decode_simple_scaled_or_not[2][2]=
14554- {
14555- { stbir__decode_uint8_linear_scaled, stbir__decode_uint8_linear }, { stbir__decode_uint16_linear_scaled, stbir__decode_uint16_linear },
14556- };
14557-
14558- static stbir__decode_pixels_func * decode_alphas_scaled_or_not[STBIRI_AR-STBIRI_RGBA+1][2][2]=
14559- {
14560- { /* RGBA */ { stbir__decode_uint8_linear_scaled, stbir__decode_uint8_linear }, { stbir__decode_uint16_linear_scaled, stbir__decode_uint16_linear } },
14561- { /* BGRA */ { stbir__decode_uint8_linear_scaled_BGRA, stbir__decode_uint8_linear_BGRA }, { stbir__decode_uint16_linear_scaled_BGRA, stbir__decode_uint16_linear_BGRA } },
14562- { /* ARGB */ { stbir__decode_uint8_linear_scaled_ARGB, stbir__decode_uint8_linear_ARGB }, { stbir__decode_uint16_linear_scaled_ARGB, stbir__decode_uint16_linear_ARGB } },
14563- { /* ABGR */ { stbir__decode_uint8_linear_scaled_ABGR, stbir__decode_uint8_linear_ABGR }, { stbir__decode_uint16_linear_scaled_ABGR, stbir__decode_uint16_linear_ABGR } },
14564- { /* RA */ { stbir__decode_uint8_linear_scaled, stbir__decode_uint8_linear }, { stbir__decode_uint16_linear_scaled, stbir__decode_uint16_linear } },
14565- { /* AR */ { stbir__decode_uint8_linear_scaled_AR, stbir__decode_uint8_linear_AR }, { stbir__decode_uint16_linear_scaled_AR, stbir__decode_uint16_linear_AR } }
14566- };
14567-
14568- static stbir__encode_pixels_func * encode_simple[STBIR_TYPE_HALF_FLOAT-STBIR_TYPE_UINT8_SRGB+1]=
14569- {
14570- /* 1ch-4ch */ stbir__encode_uint8_srgb, stbir__encode_uint8_srgb, 0, stbir__encode_float_linear, stbir__encode_half_float_linear,
14571- };
14572-
14573- static stbir__encode_pixels_func * encode_alphas[STBIRI_AR-STBIRI_RGBA+1][STBIR_TYPE_HALF_FLOAT-STBIR_TYPE_UINT8_SRGB+1]=
14574- {
14575- { /* RGBA */ stbir__encode_uint8_srgb4_linearalpha, stbir__encode_uint8_srgb, 0, stbir__encode_float_linear, stbir__encode_half_float_linear },
14576- { /* BGRA */ stbir__encode_uint8_srgb4_linearalpha_BGRA, stbir__encode_uint8_srgb_BGRA, 0, stbir__encode_float_linear_BGRA, stbir__encode_half_float_linear_BGRA },
14577- { /* ARGB */ stbir__encode_uint8_srgb4_linearalpha_ARGB, stbir__encode_uint8_srgb_ARGB, 0, stbir__encode_float_linear_ARGB, stbir__encode_half_float_linear_ARGB },
14578- { /* ABGR */ stbir__encode_uint8_srgb4_linearalpha_ABGR, stbir__encode_uint8_srgb_ABGR, 0, stbir__encode_float_linear_ABGR, stbir__encode_half_float_linear_ABGR },
14579- { /* RA */ stbir__encode_uint8_srgb2_linearalpha, stbir__encode_uint8_srgb, 0, stbir__encode_float_linear, stbir__encode_half_float_linear },
14580- { /* AR */ stbir__encode_uint8_srgb2_linearalpha_AR, stbir__encode_uint8_srgb_AR, 0, stbir__encode_float_linear_AR, stbir__encode_half_float_linear_AR }
14581- };
14582-
14583- static stbir__encode_pixels_func * encode_simple_scaled_or_not[2][2]=
14584- {
14585- { stbir__encode_uint8_linear_scaled, stbir__encode_uint8_linear }, { stbir__encode_uint16_linear_scaled, stbir__encode_uint16_linear },
14586- };
14587-
14588- static stbir__encode_pixels_func * encode_alphas_scaled_or_not[STBIRI_AR-STBIRI_RGBA+1][2][2]=
14589- {
14590- { /* RGBA */ { stbir__encode_uint8_linear_scaled, stbir__encode_uint8_linear }, { stbir__encode_uint16_linear_scaled, stbir__encode_uint16_linear } },
14591- { /* BGRA */ { stbir__encode_uint8_linear_scaled_BGRA, stbir__encode_uint8_linear_BGRA }, { stbir__encode_uint16_linear_scaled_BGRA, stbir__encode_uint16_linear_BGRA } },
14592- { /* ARGB */ { stbir__encode_uint8_linear_scaled_ARGB, stbir__encode_uint8_linear_ARGB }, { stbir__encode_uint16_linear_scaled_ARGB, stbir__encode_uint16_linear_ARGB } },
14593- { /* ABGR */ { stbir__encode_uint8_linear_scaled_ABGR, stbir__encode_uint8_linear_ABGR }, { stbir__encode_uint16_linear_scaled_ABGR, stbir__encode_uint16_linear_ABGR } },
14594- { /* RA */ { stbir__encode_uint8_linear_scaled, stbir__encode_uint8_linear }, { stbir__encode_uint16_linear_scaled, stbir__encode_uint16_linear } },
14595- { /* AR */ { stbir__encode_uint8_linear_scaled_AR, stbir__encode_uint8_linear_AR }, { stbir__encode_uint16_linear_scaled_AR, stbir__encode_uint16_linear_AR } }
14596- };
14597-
14598- stbir__decode_pixels_func * decode_pixels = 0;
14599- stbir__encode_pixels_func * encode_pixels = 0;
14600- stbir_datatype input_type, output_type;
14601-
14602- input_type = resize->input_data_type;
14603- output_type = resize->output_data_type;
14604- info->input_data = resize->input_pixels;
14605- info->input_stride_bytes = resize->input_stride_in_bytes;
14606- info->output_stride_bytes = resize->output_stride_in_bytes;
14607-
14608- // if we're completely point sampling, then we can turn off SRGB
14609- if ( ( info->horizontal.filter_enum == STBIR_FILTER_POINT_SAMPLE ) && ( info->vertical.filter_enum == STBIR_FILTER_POINT_SAMPLE ) )
14610- {
14611- if ( ( ( input_type == STBIR_TYPE_UINT8_SRGB ) || ( input_type == STBIR_TYPE_UINT8_SRGB_ALPHA ) ) &&
14612- ( ( output_type == STBIR_TYPE_UINT8_SRGB ) || ( output_type == STBIR_TYPE_UINT8_SRGB_ALPHA ) ) )
14613- {
14614- input_type = STBIR_TYPE_UINT8;
14615- output_type = STBIR_TYPE_UINT8;
14616- }
14617- }
14618-
14619- // recalc the output and input strides
14620- if ( info->input_stride_bytes == 0 )
14621- info->input_stride_bytes = info->channels * info->horizontal.scale_info.input_full_size * stbir__type_size[input_type];
14622-
14623- if ( info->output_stride_bytes == 0 )
14624- info->output_stride_bytes = info->channels * info->horizontal.scale_info.output_sub_size * stbir__type_size[output_type];
14625-
14626- // calc offset
14627- info->output_data = ( (char*) resize->output_pixels ) + ( (size_t) info->offset_y * (size_t) resize->output_stride_in_bytes ) + ( info->offset_x * info->channels * stbir__type_size[output_type] );
14628-
14629- info->in_pixels_cb = resize->input_cb;
14630- info->user_data = resize->user_data;
14631- info->out_pixels_cb = resize->output_cb;
14632-
14633- // setup the input format converters
14634- if ( ( input_type == STBIR_TYPE_UINT8 ) || ( input_type == STBIR_TYPE_UINT16 ) )
14635- {
14636- int non_scaled = 0;
14637-
14638- // check if we can run unscaled - 0-255.0/0-65535.0 instead of 0-1.0 (which is a tiny bit faster when doing linear 8->8 or 16->16)
14639- if ( ( !info->alpha_weight ) && ( !info->alpha_unweight ) ) // don't short circuit when alpha weighting (get everything to 0-1.0 as usual)
14640- if ( ( ( input_type == STBIR_TYPE_UINT8 ) && ( output_type == STBIR_TYPE_UINT8 ) ) || ( ( input_type == STBIR_TYPE_UINT16 ) && ( output_type == STBIR_TYPE_UINT16 ) ) )
14641- non_scaled = 1;
14642-
14643- if ( info->input_pixel_layout_internal <= STBIRI_4CHANNEL )
14644- decode_pixels = decode_simple_scaled_or_not[ input_type == STBIR_TYPE_UINT16 ][ non_scaled ];
14645- else
14646- decode_pixels = decode_alphas_scaled_or_not[ ( info->input_pixel_layout_internal - STBIRI_RGBA ) % ( STBIRI_AR-STBIRI_RGBA+1 ) ][ input_type == STBIR_TYPE_UINT16 ][ non_scaled ];
14647- }
14648- else
14649- {
14650- if ( info->input_pixel_layout_internal <= STBIRI_4CHANNEL )
14651- decode_pixels = decode_simple[ input_type - STBIR_TYPE_UINT8_SRGB ];
14652- else
14653- decode_pixels = decode_alphas[ ( info->input_pixel_layout_internal - STBIRI_RGBA ) % ( STBIRI_AR-STBIRI_RGBA+1 ) ][ input_type - STBIR_TYPE_UINT8_SRGB ];
14654- }
14655-
14656- // setup the output format converters
14657- if ( ( output_type == STBIR_TYPE_UINT8 ) || ( output_type == STBIR_TYPE_UINT16 ) )
14658- {
14659- int non_scaled = 0;
14660-
14661- // check if we can run unscaled - 0-255.0/0-65535.0 instead of 0-1.0 (which is a tiny bit faster when doing linear 8->8 or 16->16)
14662- if ( ( !info->alpha_weight ) && ( !info->alpha_unweight ) ) // don't short circuit when alpha weighting (get everything to 0-1.0 as usual)
14663- if ( ( ( input_type == STBIR_TYPE_UINT8 ) && ( output_type == STBIR_TYPE_UINT8 ) ) || ( ( input_type == STBIR_TYPE_UINT16 ) && ( output_type == STBIR_TYPE_UINT16 ) ) )
14664- non_scaled = 1;
14665-
14666- if ( info->output_pixel_layout_internal <= STBIRI_4CHANNEL )
14667- encode_pixels = encode_simple_scaled_or_not[ output_type == STBIR_TYPE_UINT16 ][ non_scaled ];
14668- else
14669- encode_pixels = encode_alphas_scaled_or_not[ ( info->output_pixel_layout_internal - STBIRI_RGBA ) % ( STBIRI_AR-STBIRI_RGBA+1 ) ][ output_type == STBIR_TYPE_UINT16 ][ non_scaled ];
14670- }
14671- else
14672- {
14673- if ( info->output_pixel_layout_internal <= STBIRI_4CHANNEL )
14674- encode_pixels = encode_simple[ output_type - STBIR_TYPE_UINT8_SRGB ];
14675- else
14676- encode_pixels = encode_alphas[ ( info->output_pixel_layout_internal - STBIRI_RGBA ) % ( STBIRI_AR-STBIRI_RGBA+1 ) ][ output_type - STBIR_TYPE_UINT8_SRGB ];
14677- }
14678-
14679- info->input_type = input_type;
14680- info->output_type = output_type;
14681- info->decode_pixels = decode_pixels;
14682- info->encode_pixels = encode_pixels;
14683-}
14684-
14685-static void stbir__clip( int * outx, int * outsubw, int outw, double * u0, double * u1 )
14686-{
14687- double per, adj;
14688- int over;
14689-
14690- // do left/top edge
14691- if ( *outx < 0 )
14692- {
14693- per = ( (double)*outx ) / ( (double)*outsubw ); // is negative
14694- adj = per * ( *u1 - *u0 );
14695- *u0 -= adj; // increases u0
14696- *outx = 0;
14697- }
14698-
14699- // do right/bot edge
14700- over = outw - ( *outx + *outsubw );
14701- if ( over < 0 )
14702- {
14703- per = ( (double)over ) / ( (double)*outsubw ); // is negative
14704- adj = per * ( *u1 - *u0 );
14705- *u1 += adj; // decrease u1
14706- *outsubw = outw - *outx;
14707- }
14708-}
14709-
14710-// converts a double to a rational that has less than one float bit of error (returns 0 if unable to do so)
14711-static int stbir__double_to_rational(double f, stbir_uint32 limit, stbir_uint32 *numer, stbir_uint32 *denom, int limit_denom ) // limit_denom (1) or limit numer (0)
14712-{
14713- double err;
14714- stbir_uint64 top, bot;
14715- stbir_uint64 numer_last = 0;
14716- stbir_uint64 denom_last = 1;
14717- stbir_uint64 numer_estimate = 1;
14718- stbir_uint64 denom_estimate = 0;
14719-
14720- // scale to past float error range
14721- top = (stbir_uint64)( f * (double)(1 << 25) );
14722- bot = 1 << 25;
14723-
14724- // keep refining, but usually stops in a few loops - usually 5 for bad cases
14725- for(;;)
14726- {
14727- stbir_uint64 est, temp;
14728-
14729- // hit limit, break out and do best full range estimate
14730- if ( ( ( limit_denom ) ? denom_estimate : numer_estimate ) >= limit )
14731- break;
14732-
14733- // is the current error less than 1 bit of a float? if so, we're done
14734- if ( denom_estimate )
14735- {
14736- err = ( (double)numer_estimate / (double)denom_estimate ) - f;
14737- if ( err < 0.0 ) err = -err;
14738- if ( err < ( 1.0 / (double)(1<<24) ) )
14739- {
14740- // yup, found it
14741- *numer = (stbir_uint32) numer_estimate;
14742- *denom = (stbir_uint32) denom_estimate;
14743- return 1;
14744- }
14745- }
14746-
14747- // no more refinement bits left? break out and do full range estimate
14748- if ( bot == 0 )
14749- break;
14750-
14751- // gcd the estimate bits
14752- est = top / bot;
14753- temp = top % bot;
14754- top = bot;
14755- bot = temp;
14756-
14757- // move remainders
14758- temp = est * denom_estimate + denom_last;
14759- denom_last = denom_estimate;
14760- denom_estimate = temp;
14761-
14762- // move remainders
14763- temp = est * numer_estimate + numer_last;
14764- numer_last = numer_estimate;
14765- numer_estimate = temp;
14766- }
14767-
14768- // we didn't fine anything good enough for float, use a full range estimate
14769- if ( limit_denom )
14770- {
14771- numer_estimate= (stbir_uint64)( f * (double)limit + 0.5 );
14772- denom_estimate = limit;
14773- }
14774- else
14775- {
14776- numer_estimate = limit;
14777- denom_estimate = (stbir_uint64)( ( (double)limit / f ) + 0.5 );
14778- }
14779-
14780- *numer = (stbir_uint32) numer_estimate;
14781- *denom = (stbir_uint32) denom_estimate;
14782-
14783- err = ( denom_estimate ) ? ( ( (double)(stbir_uint32)numer_estimate / (double)(stbir_uint32)denom_estimate ) - f ) : 1.0;
14784- if ( err < 0.0 ) err = -err;
14785- return ( err < ( 1.0 / (double)(1<<24) ) ) ? 1 : 0;
14786-}
14787-
14788-static int stbir__calculate_region_transform( stbir__scale_info * scale_info, int output_full_range, int * output_offset, int output_sub_range, int input_full_range, double input_s0, double input_s1 )
14789-{
14790- double output_range, input_range, output_s, input_s, ratio, scale;
14791-
14792- input_s = input_s1 - input_s0;
14793-
14794- // null area
14795- if ( ( output_full_range == 0 ) || ( input_full_range == 0 ) ||
14796- ( output_sub_range == 0 ) || ( input_s <= stbir__small_float ) )
14797- return 0;
14798-
14799- // are either of the ranges completely out of bounds?
14800- if ( ( *output_offset >= output_full_range ) || ( ( *output_offset + output_sub_range ) <= 0 ) || ( input_s0 >= (1.0f-stbir__small_float) ) || ( input_s1 <= stbir__small_float ) )
14801- return 0;
14802-
14803- output_range = (double)output_full_range;
14804- input_range = (double)input_full_range;
14805-
14806- output_s = ( (double)output_sub_range) / output_range;
14807-
14808- // figure out the scaling to use
14809- ratio = output_s / input_s;
14810-
14811- // save scale before clipping
14812- scale = ( output_range / input_range ) * ratio;
14813- scale_info->scale = (float)scale;
14814- scale_info->inv_scale = (float)( 1.0 / scale );
14815-
14816- // clip output area to left/right output edges (and adjust input area)
14817- stbir__clip( output_offset, &output_sub_range, output_full_range, &input_s0, &input_s1 );
14818-
14819- // recalc input area
14820- input_s = input_s1 - input_s0;
14821-
14822- // after clipping do we have zero input area?
14823- if ( input_s <= stbir__small_float )
14824- return 0;
14825-
14826- // calculate and store the starting source offsets in output pixel space
14827- scale_info->pixel_shift = (float) ( input_s0 * ratio * output_range );
14828-
14829- scale_info->scale_is_rational = stbir__double_to_rational( scale, ( scale <= 1.0 ) ? output_full_range : input_full_range, &scale_info->scale_numerator, &scale_info->scale_denominator, ( scale >= 1.0 ) );
14830-
14831- scale_info->input_full_size = input_full_range;
14832- scale_info->output_sub_size = output_sub_range;
14833-
14834- return 1;
14835-}
14836-
14837-
14838-static void stbir__init_and_set_layout( STBIR_RESIZE * resize, stbir_pixel_layout pixel_layout, stbir_datatype data_type )
14839-{
14840- resize->input_cb = 0;
14841- resize->output_cb = 0;
14842- resize->user_data = resize;
14843- resize->samplers = 0;
14844- resize->called_alloc = 0;
14845- resize->horizontal_filter = STBIR_FILTER_DEFAULT;
14846- resize->horizontal_filter_kernel = 0; resize->horizontal_filter_support = 0;
14847- resize->vertical_filter = STBIR_FILTER_DEFAULT;
14848- resize->vertical_filter_kernel = 0; resize->vertical_filter_support = 0;
14849- resize->horizontal_edge = STBIR_EDGE_CLAMP;
14850- resize->vertical_edge = STBIR_EDGE_CLAMP;
14851- resize->input_s0 = 0; resize->input_t0 = 0; resize->input_s1 = 1; resize->input_t1 = 1;
14852- resize->output_subx = 0; resize->output_suby = 0; resize->output_subw = resize->output_w; resize->output_subh = resize->output_h;
14853- resize->input_data_type = data_type;
14854- resize->output_data_type = data_type;
14855- resize->input_pixel_layout_public = pixel_layout;
14856- resize->output_pixel_layout_public = pixel_layout;
14857- resize->needs_rebuild = 1;
14858-}
14859-
14860-STBIRDEF void stbir_resize_init( STBIR_RESIZE * resize,
14861- const void *input_pixels, int input_w, int input_h, int input_stride_in_bytes, // stride can be zero
14862- void *output_pixels, int output_w, int output_h, int output_stride_in_bytes, // stride can be zero
14863- stbir_pixel_layout pixel_layout, stbir_datatype data_type )
14864-{
14865- resize->input_pixels = input_pixels;
14866- resize->input_w = input_w;
14867- resize->input_h = input_h;
14868- resize->input_stride_in_bytes = input_stride_in_bytes;
14869- resize->output_pixels = output_pixels;
14870- resize->output_w = output_w;
14871- resize->output_h = output_h;
14872- resize->output_stride_in_bytes = output_stride_in_bytes;
14873- resize->fast_alpha = 0;
14874-
14875- stbir__init_and_set_layout( resize, pixel_layout, data_type );
14876+ temp_mem_amt = (size_t)(decode_buffer_size + ring_buffer_size +
14877+ vertical_buffer_size) *
14878+ (size_t)splits;
14879+#endif
14880+ if (temp_mem_amt >= both) {
14881+ if (info) {
14882+ vertical->gather_prescatter_contributors =
14883+ (stbir__contributors *)info->split_info[0]
14884+ .decode_buffer;
14885+ vertical->gather_prescatter_coefficients =
14886+ (float *)(((char *)info->split_info[0].decode_buffer) +
14887+ vertical
14888+ ->gather_prescatter_contributors_size);
14889+ }
14890+ } else {
14891+ // ring+decode memory is too small, so allocate temp memory
14892+ STBIR__NEXT_PTR(vertical->gather_prescatter_contributors,
14893+ vertical->gather_prescatter_contributors_size,
14894+ stbir__contributors);
14895+ STBIR__NEXT_PTR(vertical->gather_prescatter_coefficients,
14896+ vertical->gather_prescatter_coefficients_size,
14897+ float);
14898+ }
14899+ }
14900+
14901+ STBIR__NEXT_PTR(horizontal->contributors, horizontal->contributors_size,
14902+ stbir__contributors);
14903+ STBIR__NEXT_PTR(horizontal->coefficients, horizontal->coefficients_size,
14904+ float);
14905+
14906+ // are the two filters identical?? (happens a lot with mipmap
14907+ // generation)
14908+ if ((horizontal->filter_kernel == vertical->filter_kernel) &&
14909+ (horizontal->filter_support == vertical->filter_support) &&
14910+ (horizontal->edge == vertical->edge) &&
14911+ (horizontal->scale_info.output_sub_size ==
14912+ vertical->scale_info.output_sub_size)) {
14913+ float diff_scale =
14914+ horizontal->scale_info.scale - vertical->scale_info.scale;
14915+ float diff_shift = horizontal->scale_info.pixel_shift -
14916+ vertical->scale_info.pixel_shift;
14917+ if (diff_scale < 0.0f) {
14918+ diff_scale = -diff_scale;
14919+ }
14920+ if (diff_shift < 0.0f) {
14921+ diff_shift = -diff_shift;
14922+ }
14923+ if ((diff_scale <= stbir__small_float) &&
14924+ (diff_shift <= stbir__small_float)) {
14925+ if (horizontal->is_gather == vertical->is_gather) {
14926+ copy_horizontal = 1;
14927+ goto no_vert_alloc;
14928+ }
14929+ // everything matches, but vertical is scatter, horizontal is
14930+ // gather, use horizontal coeffs for vertical pivot coeffs
14931+ possibly_use_horizontal_for_pivot = horizontal;
14932+ }
14933+ }
14934+
14935+ STBIR__NEXT_PTR(vertical->contributors, vertical->contributors_size,
14936+ stbir__contributors);
14937+ STBIR__NEXT_PTR(vertical->coefficients, vertical->coefficients_size,
14938+ float);
14939+
14940+ no_vert_alloc:
14941+
14942+ if (info) {
14943+ STBIR_PROFILE_BUILD_START(horizontal);
14944+
14945+ stbir__calculate_filters(
14946+ horizontal, 0, user_data STBIR_ONLY_PROFILE_BUILD_SET_INFO);
14947+
14948+ // setup the horizontal gather functions
14949+ // start with defaulting to the n_coeffs functions (specialized on
14950+ // channels and remnant leftover)
14951+ info->horizontal_gather_channels =
14952+ stbir__horizontal_gather_n_coeffs_funcs
14953+ [effective_channels][horizontal->extent_info.widest & 3];
14954+ // but if the number of coeffs <= 12, use another set of special
14955+ // cases. <=12 coeffs is any enlarging resize, or shrinking resize
14956+ // down to about 1/3 size
14957+ if (horizontal->extent_info.widest <= 12) {
14958+ info->horizontal_gather_channels =
14959+ stbir__horizontal_gather_channels_funcs
14960+ [effective_channels]
14961+ [horizontal->extent_info.widest - 1];
14962+ }
14963+
14964+ info->scanline_extents.conservative.n0 = conservative->n0;
14965+ info->scanline_extents.conservative.n1 = conservative->n1;
14966+
14967+ // get exact extents
14968+ stbir__get_extents(horizontal, &info->scanline_extents);
14969+
14970+ // pack the horizontal coeffs
14971+ horizontal->coefficient_width = stbir__pack_coefficients(
14972+ horizontal->num_contributors, horizontal->contributors,
14973+ horizontal->coefficients, horizontal->coefficient_width,
14974+ horizontal->extent_info.widest,
14975+ info->scanline_extents.conservative.n0,
14976+ info->scanline_extents.conservative.n1);
14977+
14978+ STBIR_MEMCPY(&info->horizontal, horizontal, sizeof(stbir__sampler));
14979+
14980+ STBIR_PROFILE_BUILD_END(horizontal);
14981+
14982+ if (copy_horizontal) {
14983+ STBIR_MEMCPY(&info->vertical, horizontal,
14984+ sizeof(stbir__sampler));
14985+ } else {
14986+ STBIR_PROFILE_BUILD_START(vertical);
14987+
14988+ stbir__calculate_filters(
14989+ vertical, possibly_use_horizontal_for_pivot,
14990+ user_data STBIR_ONLY_PROFILE_BUILD_SET_INFO);
14991+ STBIR_MEMCPY(&info->vertical, vertical, sizeof(stbir__sampler));
14992+
14993+ STBIR_PROFILE_BUILD_END(vertical);
14994+ }
14995+
14996+ // setup the vertical split ranges
14997+ stbir__get_split_info(info->split_info, info->splits,
14998+ info->vertical.scale_info.output_sub_size,
14999+ info->vertical.filter_pixel_margin,
15000+ info->vertical.scale_info.input_full_size,
15001+ info->vertical.is_gather,
15002+ info->vertical.contributors);
15003+
15004+ // now we know precisely how many entries we need
15005+ info->ring_buffer_num_entries = info->vertical.extent_info.widest;
15006+
15007+ // we never need more ring buffer entries than the scanlines we're
15008+ // outputting
15009+ if ((!info->vertical.is_gather) &&
15010+ (info->ring_buffer_num_entries >
15011+ conservative_split_output_size)) {
15012+ info->ring_buffer_num_entries = conservative_split_output_size;
15013+ }
15014+ STBIR_ASSERT(info->ring_buffer_num_entries <=
15015+ info->alloc_ring_buffer_num_entries);
15016+ }
15017+#undef STBIR__NEXT_PTR
15018+
15019+ // is this the first time through loop?
15020+ if (info == 0) {
15021+ alloced_total = (15 + (size_t)advance_mem);
15022+ alloced = STBIR_MALLOC(alloced_total, user_data);
15023+ if (alloced == 0) {
15024+ return 0;
15025+ }
15026+ } else {
15027+ return info; // success
15028+ }
15029+ }
15030+}
15031+
15032+static int
15033+stbir__perform_resize(stbir__info const *info, int split_start, int split_count)
15034+{
15035+ stbir__per_split_info *split_info = info->split_info + split_start;
15036+
15037+ STBIR_PROFILE_CLEAR_EXTRAS();
15038+
15039+ STBIR_PROFILE_FIRST_START(looping);
15040+ if (info->vertical.is_gather) {
15041+ stbir__vertical_gather_loop(info, split_info, split_count);
15042+ } else {
15043+ stbir__vertical_scatter_loop(info, split_info, split_count);
15044+ }
15045+ STBIR_PROFILE_END(looping);
15046+
15047+ return 1;
15048+}
15049+
15050+static void
15051+stbir__update_info_from_resize(stbir__info *info, STBIR_RESIZE *resize)
15052+{
15053+ static stbir__decode_pixels_func
15054+ *decode_simple[STBIR_TYPE_HALF_FLOAT - STBIR_TYPE_UINT8_SRGB + 1] = {
15055+ /* 1ch-4ch */ stbir__decode_uint8_srgb,
15056+ stbir__decode_uint8_srgb,
15057+ 0,
15058+ stbir__decode_float_linear,
15059+ stbir__decode_half_float_linear,
15060+ };
15061+
15062+ static stbir__decode_pixels_func
15063+ *decode_alphas[STBIRI_AR - STBIRI_RGBA +
15064+ 1][STBIR_TYPE_HALF_FLOAT - STBIR_TYPE_UINT8_SRGB + 1] = {
15065+ {/* RGBA */ stbir__decode_uint8_srgb4_linearalpha,
15066+ stbir__decode_uint8_srgb, 0, stbir__decode_float_linear,
15067+ stbir__decode_half_float_linear},
15068+ {/* BGRA */ stbir__decode_uint8_srgb4_linearalpha_BGRA,
15069+ stbir__decode_uint8_srgb_BGRA, 0, stbir__decode_float_linear_BGRA,
15070+ stbir__decode_half_float_linear_BGRA},
15071+ {/* ARGB */ stbir__decode_uint8_srgb4_linearalpha_ARGB,
15072+ stbir__decode_uint8_srgb_ARGB, 0, stbir__decode_float_linear_ARGB,
15073+ stbir__decode_half_float_linear_ARGB},
15074+ {/* ABGR */ stbir__decode_uint8_srgb4_linearalpha_ABGR,
15075+ stbir__decode_uint8_srgb_ABGR, 0, stbir__decode_float_linear_ABGR,
15076+ stbir__decode_half_float_linear_ABGR},
15077+ {/* RA */ stbir__decode_uint8_srgb2_linearalpha,
15078+ stbir__decode_uint8_srgb, 0, stbir__decode_float_linear,
15079+ stbir__decode_half_float_linear},
15080+ {/* AR */ stbir__decode_uint8_srgb2_linearalpha_AR,
15081+ stbir__decode_uint8_srgb_AR, 0, stbir__decode_float_linear_AR,
15082+ stbir__decode_half_float_linear_AR},
15083+ };
15084+
15085+ static stbir__decode_pixels_func *decode_simple_scaled_or_not[2][2] = {
15086+ {stbir__decode_uint8_linear_scaled, stbir__decode_uint8_linear},
15087+ {stbir__decode_uint16_linear_scaled, stbir__decode_uint16_linear},
15088+ };
15089+
15090+ static stbir__decode_pixels_func
15091+ *decode_alphas_scaled_or_not[STBIRI_AR - STBIRI_RGBA + 1][2][2] = {
15092+ {/* RGBA */ {stbir__decode_uint8_linear_scaled,
15093+ stbir__decode_uint8_linear},
15094+ {stbir__decode_uint16_linear_scaled, stbir__decode_uint16_linear}},
15095+ {/* BGRA */ {stbir__decode_uint8_linear_scaled_BGRA,
15096+ stbir__decode_uint8_linear_BGRA},
15097+ {stbir__decode_uint16_linear_scaled_BGRA,
15098+ stbir__decode_uint16_linear_BGRA}},
15099+ {/* ARGB */ {stbir__decode_uint8_linear_scaled_ARGB,
15100+ stbir__decode_uint8_linear_ARGB},
15101+ {stbir__decode_uint16_linear_scaled_ARGB,
15102+ stbir__decode_uint16_linear_ARGB}},
15103+ {/* ABGR */ {stbir__decode_uint8_linear_scaled_ABGR,
15104+ stbir__decode_uint8_linear_ABGR},
15105+ {stbir__decode_uint16_linear_scaled_ABGR,
15106+ stbir__decode_uint16_linear_ABGR}},
15107+ {/* RA */ {stbir__decode_uint8_linear_scaled,
15108+ stbir__decode_uint8_linear},
15109+ {stbir__decode_uint16_linear_scaled, stbir__decode_uint16_linear}},
15110+ {/* AR */ {stbir__decode_uint8_linear_scaled_AR,
15111+ stbir__decode_uint8_linear_AR},
15112+ {stbir__decode_uint16_linear_scaled_AR,
15113+ stbir__decode_uint16_linear_AR}}};
15114+
15115+ static stbir__encode_pixels_func
15116+ *encode_simple[STBIR_TYPE_HALF_FLOAT - STBIR_TYPE_UINT8_SRGB + 1] = {
15117+ /* 1ch-4ch */ stbir__encode_uint8_srgb,
15118+ stbir__encode_uint8_srgb,
15119+ 0,
15120+ stbir__encode_float_linear,
15121+ stbir__encode_half_float_linear,
15122+ };
15123+
15124+ static stbir__encode_pixels_func
15125+ *encode_alphas[STBIRI_AR - STBIRI_RGBA +
15126+ 1][STBIR_TYPE_HALF_FLOAT - STBIR_TYPE_UINT8_SRGB + 1] = {
15127+ {/* RGBA */ stbir__encode_uint8_srgb4_linearalpha,
15128+ stbir__encode_uint8_srgb, 0, stbir__encode_float_linear,
15129+ stbir__encode_half_float_linear},
15130+ {/* BGRA */ stbir__encode_uint8_srgb4_linearalpha_BGRA,
15131+ stbir__encode_uint8_srgb_BGRA, 0, stbir__encode_float_linear_BGRA,
15132+ stbir__encode_half_float_linear_BGRA},
15133+ {/* ARGB */ stbir__encode_uint8_srgb4_linearalpha_ARGB,
15134+ stbir__encode_uint8_srgb_ARGB, 0, stbir__encode_float_linear_ARGB,
15135+ stbir__encode_half_float_linear_ARGB},
15136+ {/* ABGR */ stbir__encode_uint8_srgb4_linearalpha_ABGR,
15137+ stbir__encode_uint8_srgb_ABGR, 0, stbir__encode_float_linear_ABGR,
15138+ stbir__encode_half_float_linear_ABGR},
15139+ {/* RA */ stbir__encode_uint8_srgb2_linearalpha,
15140+ stbir__encode_uint8_srgb, 0, stbir__encode_float_linear,
15141+ stbir__encode_half_float_linear},
15142+ {/* AR */ stbir__encode_uint8_srgb2_linearalpha_AR,
15143+ stbir__encode_uint8_srgb_AR, 0, stbir__encode_float_linear_AR,
15144+ stbir__encode_half_float_linear_AR}};
15145+
15146+ static stbir__encode_pixels_func *encode_simple_scaled_or_not[2][2] = {
15147+ {stbir__encode_uint8_linear_scaled, stbir__encode_uint8_linear},
15148+ {stbir__encode_uint16_linear_scaled, stbir__encode_uint16_linear},
15149+ };
15150+
15151+ static stbir__encode_pixels_func
15152+ *encode_alphas_scaled_or_not[STBIRI_AR - STBIRI_RGBA + 1][2][2] = {
15153+ {/* RGBA */ {stbir__encode_uint8_linear_scaled,
15154+ stbir__encode_uint8_linear},
15155+ {stbir__encode_uint16_linear_scaled, stbir__encode_uint16_linear}},
15156+ {/* BGRA */ {stbir__encode_uint8_linear_scaled_BGRA,
15157+ stbir__encode_uint8_linear_BGRA},
15158+ {stbir__encode_uint16_linear_scaled_BGRA,
15159+ stbir__encode_uint16_linear_BGRA}},
15160+ {/* ARGB */ {stbir__encode_uint8_linear_scaled_ARGB,
15161+ stbir__encode_uint8_linear_ARGB},
15162+ {stbir__encode_uint16_linear_scaled_ARGB,
15163+ stbir__encode_uint16_linear_ARGB}},
15164+ {/* ABGR */ {stbir__encode_uint8_linear_scaled_ABGR,
15165+ stbir__encode_uint8_linear_ABGR},
15166+ {stbir__encode_uint16_linear_scaled_ABGR,
15167+ stbir__encode_uint16_linear_ABGR}},
15168+ {/* RA */ {stbir__encode_uint8_linear_scaled,
15169+ stbir__encode_uint8_linear},
15170+ {stbir__encode_uint16_linear_scaled, stbir__encode_uint16_linear}},
15171+ {/* AR */ {stbir__encode_uint8_linear_scaled_AR,
15172+ stbir__encode_uint8_linear_AR},
15173+ {stbir__encode_uint16_linear_scaled_AR,
15174+ stbir__encode_uint16_linear_AR}}};
15175+
15176+ stbir__decode_pixels_func *decode_pixels = 0;
15177+ stbir__encode_pixels_func *encode_pixels = 0;
15178+ stbir_datatype input_type, output_type;
15179+
15180+ input_type = resize->input_data_type;
15181+ output_type = resize->output_data_type;
15182+ info->input_data = resize->input_pixels;
15183+ info->input_stride_bytes = resize->input_stride_in_bytes;
15184+ info->output_stride_bytes = resize->output_stride_in_bytes;
15185+
15186+ // if we're completely point sampling, then we can turn off SRGB
15187+ if ((info->horizontal.filter_enum == STBIR_FILTER_POINT_SAMPLE) &&
15188+ (info->vertical.filter_enum == STBIR_FILTER_POINT_SAMPLE)) {
15189+ if (((input_type == STBIR_TYPE_UINT8_SRGB) ||
15190+ (input_type == STBIR_TYPE_UINT8_SRGB_ALPHA)) &&
15191+ ((output_type == STBIR_TYPE_UINT8_SRGB) ||
15192+ (output_type == STBIR_TYPE_UINT8_SRGB_ALPHA))) {
15193+ input_type = STBIR_TYPE_UINT8;
15194+ output_type = STBIR_TYPE_UINT8;
15195+ }
15196+ }
15197+
15198+ // recalc the output and input strides
15199+ if (info->input_stride_bytes == 0) {
15200+ info->input_stride_bytes = info->channels *
15201+ info->horizontal.scale_info.input_full_size *
15202+ stbir__type_size[input_type];
15203+ }
15204+
15205+ if (info->output_stride_bytes == 0) {
15206+ info->output_stride_bytes =
15207+ info->channels * info->horizontal.scale_info.output_sub_size *
15208+ stbir__type_size[output_type];
15209+ }
15210+
15211+ // calc offset
15212+ info->output_data =
15213+ ((char *)resize->output_pixels) +
15214+ ((size_t)info->offset_y * (size_t)resize->output_stride_in_bytes) +
15215+ (info->offset_x * info->channels * stbir__type_size[output_type]);
15216+
15217+ info->in_pixels_cb = resize->input_cb;
15218+ info->user_data = resize->user_data;
15219+ info->out_pixels_cb = resize->output_cb;
15220+
15221+ // setup the input format converters
15222+ if ((input_type == STBIR_TYPE_UINT8) || (input_type == STBIR_TYPE_UINT16)) {
15223+ int non_scaled = 0;
15224+
15225+ // check if we can run unscaled - 0-255.0/0-65535.0 instead of 0-1.0
15226+ // (which is a tiny bit faster when doing linear 8->8 or 16->16)
15227+ if ((!info->alpha_weight) &&
15228+ (!info->alpha_unweight)) { // don't short circuit when alpha
15229+ // weighting (get everything to 0-1.0 as
15230+ // usual)
15231+ if (((input_type == STBIR_TYPE_UINT8) &&
15232+ (output_type == STBIR_TYPE_UINT8)) ||
15233+ ((input_type == STBIR_TYPE_UINT16) &&
15234+ (output_type == STBIR_TYPE_UINT16))) {
15235+ non_scaled = 1;
15236+ }
15237+ }
15238+
15239+ if (info->input_pixel_layout_internal <= STBIRI_4CHANNEL) {
15240+ decode_pixels =
15241+ decode_simple_scaled_or_not[input_type == STBIR_TYPE_UINT16]
15242+ [non_scaled];
15243+ } else {
15244+ decode_pixels =
15245+ decode_alphas_scaled_or_not[(info->input_pixel_layout_internal -
15246+ STBIRI_RGBA) %
15247+ (STBIRI_AR - STBIRI_RGBA + 1)]
15248+ [input_type == STBIR_TYPE_UINT16]
15249+ [non_scaled];
15250+ }
15251+ } else {
15252+ if (info->input_pixel_layout_internal <= STBIRI_4CHANNEL) {
15253+ decode_pixels = decode_simple[input_type - STBIR_TYPE_UINT8_SRGB];
15254+ } else {
15255+ decode_pixels = decode_alphas[(info->input_pixel_layout_internal -
15256+ STBIRI_RGBA) %
15257+ (STBIRI_AR - STBIRI_RGBA + 1)]
15258+ [input_type - STBIR_TYPE_UINT8_SRGB];
15259+ }
15260+ }
15261+
15262+ // setup the output format converters
15263+ if ((output_type == STBIR_TYPE_UINT8) ||
15264+ (output_type == STBIR_TYPE_UINT16)) {
15265+ int non_scaled = 0;
15266+
15267+ // check if we can run unscaled - 0-255.0/0-65535.0 instead of 0-1.0
15268+ // (which is a tiny bit faster when doing linear 8->8 or 16->16)
15269+ if ((!info->alpha_weight) &&
15270+ (!info->alpha_unweight)) { // don't short circuit when alpha
15271+ // weighting (get everything to 0-1.0 as
15272+ // usual)
15273+ if (((input_type == STBIR_TYPE_UINT8) &&
15274+ (output_type == STBIR_TYPE_UINT8)) ||
15275+ ((input_type == STBIR_TYPE_UINT16) &&
15276+ (output_type == STBIR_TYPE_UINT16))) {
15277+ non_scaled = 1;
15278+ }
15279+ }
15280+
15281+ if (info->output_pixel_layout_internal <= STBIRI_4CHANNEL) {
15282+ encode_pixels =
15283+ encode_simple_scaled_or_not[output_type == STBIR_TYPE_UINT16]
15284+ [non_scaled];
15285+ } else {
15286+ encode_pixels = encode_alphas_scaled_or_not
15287+ [(info->output_pixel_layout_internal - STBIRI_RGBA) %
15288+ (STBIRI_AR - STBIRI_RGBA + 1)]
15289+ [output_type == STBIR_TYPE_UINT16][non_scaled];
15290+ }
15291+ } else {
15292+ if (info->output_pixel_layout_internal <= STBIRI_4CHANNEL) {
15293+ encode_pixels = encode_simple[output_type - STBIR_TYPE_UINT8_SRGB];
15294+ } else {
15295+ encode_pixels = encode_alphas[(info->output_pixel_layout_internal -
15296+ STBIRI_RGBA) %
15297+ (STBIRI_AR - STBIRI_RGBA + 1)]
15298+ [output_type - STBIR_TYPE_UINT8_SRGB];
15299+ }
15300+ }
15301+
15302+ info->input_type = input_type;
15303+ info->output_type = output_type;
15304+ info->decode_pixels = decode_pixels;
15305+ info->encode_pixels = encode_pixels;
15306+}
15307+
15308+static void
15309+stbir__clip(int *outx, int *outsubw, int outw, double *u0, double *u1)
15310+{
15311+ double per, adj;
15312+ int over;
15313+
15314+ // do left/top edge
15315+ if (*outx < 0) {
15316+ per = ((double)*outx) / ((double)*outsubw); // is negative
15317+ adj = per * (*u1 - *u0);
15318+ *u0 -= adj; // increases u0
15319+ *outx = 0;
15320+ }
15321+
15322+ // do right/bot edge
15323+ over = outw - (*outx + *outsubw);
15324+ if (over < 0) {
15325+ per = ((double)over) / ((double)*outsubw); // is negative
15326+ adj = per * (*u1 - *u0);
15327+ *u1 += adj; // decrease u1
15328+ *outsubw = outw - *outx;
15329+ }
15330+}
15331+
15332+// converts a double to a rational that has less than one float bit of error
15333+// (returns 0 if unable to do so)
15334+static int
15335+stbir__double_to_rational(double f, stbir_uint32 limit, stbir_uint32 *numer,
15336+ stbir_uint32 *denom,
15337+ int limit_denom) // limit_denom (1) or limit numer (0)
15338+{
15339+ double err;
15340+ stbir_uint64 top, bot;
15341+ stbir_uint64 numer_last = 0;
15342+ stbir_uint64 denom_last = 1;
15343+ stbir_uint64 numer_estimate = 1;
15344+ stbir_uint64 denom_estimate = 0;
15345+
15346+ // scale to past float error range
15347+ top = (stbir_uint64)(f * (double)(1 << 25));
15348+ bot = 1 << 25;
15349+
15350+ // keep refining, but usually stops in a few loops - usually 5 for bad cases
15351+ for (;;) {
15352+ stbir_uint64 est, temp;
15353+
15354+ // hit limit, break out and do best full range estimate
15355+ if (((limit_denom) ? denom_estimate : numer_estimate) >= limit) {
15356+ break;
15357+ }
15358+
15359+ // is the current error less than 1 bit of a float? if so, we're done
15360+ if (denom_estimate) {
15361+ err = ((double)numer_estimate / (double)denom_estimate) - f;
15362+ if (err < 0.0) {
15363+ err = -err;
15364+ }
15365+ if (err < (1.0 / (double)(1 << 24))) {
15366+ // yup, found it
15367+ *numer = (stbir_uint32)numer_estimate;
15368+ *denom = (stbir_uint32)denom_estimate;
15369+ return 1;
15370+ }
15371+ }
15372+
15373+ // no more refinement bits left? break out and do full range estimate
15374+ if (bot == 0) {
15375+ break;
15376+ }
15377+
15378+ // gcd the estimate bits
15379+ est = top / bot;
15380+ temp = top % bot;
15381+ top = bot;
15382+ bot = temp;
15383+
15384+ // move remainders
15385+ temp = est * denom_estimate + denom_last;
15386+ denom_last = denom_estimate;
15387+ denom_estimate = temp;
15388+
15389+ // move remainders
15390+ temp = est * numer_estimate + numer_last;
15391+ numer_last = numer_estimate;
15392+ numer_estimate = temp;
15393+ }
15394+
15395+ // we didn't fine anything good enough for float, use a full range estimate
15396+ if (limit_denom) {
15397+ numer_estimate = (stbir_uint64)(f * (double)limit + 0.5);
15398+ denom_estimate = limit;
15399+ } else {
15400+ numer_estimate = limit;
15401+ denom_estimate = (stbir_uint64)(((double)limit / f) + 0.5);
15402+ }
15403+
15404+ *numer = (stbir_uint32)numer_estimate;
15405+ *denom = (stbir_uint32)denom_estimate;
15406+
15407+ err = (denom_estimate) ? (((double)(stbir_uint32)numer_estimate /
15408+ (double)(stbir_uint32)denom_estimate) -
15409+ f)
15410+ : 1.0;
15411+ if (err < 0.0) {
15412+ err = -err;
15413+ }
15414+ return (err < (1.0 / (double)(1 << 24))) ? 1 : 0;
15415+}
15416+
15417+static int
15418+stbir__calculate_region_transform(stbir__scale_info *scale_info,
15419+ int output_full_range, int *output_offset,
15420+ int output_sub_range, int input_full_range,
15421+ double input_s0, double input_s1)
15422+{
15423+ double output_range, input_range, output_s, input_s, ratio, scale;
15424+
15425+ input_s = input_s1 - input_s0;
15426+
15427+ // null area
15428+ if ((output_full_range == 0) || (input_full_range == 0) ||
15429+ (output_sub_range == 0) || (input_s <= stbir__small_float)) {
15430+ return 0;
15431+ }
15432+
15433+ // are either of the ranges completely out of bounds?
15434+ if ((*output_offset >= output_full_range) ||
15435+ ((*output_offset + output_sub_range) <= 0) ||
15436+ (input_s0 >= (1.0f - stbir__small_float)) ||
15437+ (input_s1 <= stbir__small_float)) {
15438+ return 0;
15439+ }
15440+
15441+ output_range = (double)output_full_range;
15442+ input_range = (double)input_full_range;
15443+
15444+ output_s = ((double)output_sub_range) / output_range;
15445+
15446+ // figure out the scaling to use
15447+ ratio = output_s / input_s;
15448+
15449+ // save scale before clipping
15450+ scale = (output_range / input_range) * ratio;
15451+ scale_info->scale = (float)scale;
15452+ scale_info->inv_scale = (float)(1.0 / scale);
15453+
15454+ // clip output area to left/right output edges (and adjust input area)
15455+ stbir__clip(output_offset, &output_sub_range, output_full_range, &input_s0,
15456+ &input_s1);
15457+
15458+ // recalc input area
15459+ input_s = input_s1 - input_s0;
15460+
15461+ // after clipping do we have zero input area?
15462+ if (input_s <= stbir__small_float) {
15463+ return 0;
15464+ }
15465+
15466+ // calculate and store the starting source offsets in output pixel space
15467+ scale_info->pixel_shift = (float)(input_s0 * ratio * output_range);
15468+
15469+ scale_info->scale_is_rational = stbir__double_to_rational(
15470+ scale, (scale <= 1.0) ? output_full_range : input_full_range,
15471+ &scale_info->scale_numerator, &scale_info->scale_denominator,
15472+ (scale >= 1.0));
15473+
15474+ scale_info->input_full_size = input_full_range;
15475+ scale_info->output_sub_size = output_sub_range;
15476+
15477+ return 1;
15478+}
15479+
15480+static void
15481+stbir__init_and_set_layout(STBIR_RESIZE *resize,
15482+ stbir_pixel_layout pixel_layout,
15483+ stbir_datatype data_type)
15484+{
15485+ resize->input_cb = 0;
15486+ resize->output_cb = 0;
15487+ resize->user_data = resize;
15488+ resize->samplers = 0;
15489+ resize->called_alloc = 0;
15490+ resize->horizontal_filter = STBIR_FILTER_DEFAULT;
15491+ resize->horizontal_filter_kernel = 0;
15492+ resize->horizontal_filter_support = 0;
15493+ resize->vertical_filter = STBIR_FILTER_DEFAULT;
15494+ resize->vertical_filter_kernel = 0;
15495+ resize->vertical_filter_support = 0;
15496+ resize->horizontal_edge = STBIR_EDGE_CLAMP;
15497+ resize->vertical_edge = STBIR_EDGE_CLAMP;
15498+ resize->input_s0 = 0;
15499+ resize->input_t0 = 0;
15500+ resize->input_s1 = 1;
15501+ resize->input_t1 = 1;
15502+ resize->output_subx = 0;
15503+ resize->output_suby = 0;
15504+ resize->output_subw = resize->output_w;
15505+ resize->output_subh = resize->output_h;
15506+ resize->input_data_type = data_type;
15507+ resize->output_data_type = data_type;
15508+ resize->input_pixel_layout_public = pixel_layout;
15509+ resize->output_pixel_layout_public = pixel_layout;
15510+ resize->needs_rebuild = 1;
15511+}
15512+
15513+STBIRDEF void
15514+stbir_resize_init(STBIR_RESIZE *resize, const void *input_pixels, int input_w,
15515+ int input_h, int input_stride_in_bytes, // stride can be zero
15516+ void *output_pixels, int output_w, int output_h,
15517+ int output_stride_in_bytes, // stride can be zero
15518+ stbir_pixel_layout pixel_layout, stbir_datatype data_type)
15519+{
15520+ resize->input_pixels = input_pixels;
15521+ resize->input_w = input_w;
15522+ resize->input_h = input_h;
15523+ resize->input_stride_in_bytes = input_stride_in_bytes;
15524+ resize->output_pixels = output_pixels;
15525+ resize->output_w = output_w;
15526+ resize->output_h = output_h;
15527+ resize->output_stride_in_bytes = output_stride_in_bytes;
15528+ resize->fast_alpha = 0;
15529+
15530+ stbir__init_and_set_layout(resize, pixel_layout, data_type);
15531 }
15532
15533 // You can update parameters any time after resize_init
15534-STBIRDEF void stbir_set_datatypes( STBIR_RESIZE * resize, stbir_datatype input_type, stbir_datatype output_type ) // by default, datatype from resize_init
15535+STBIRDEF void
15536+stbir_set_datatypes(
15537+ STBIR_RESIZE *resize, stbir_datatype input_type,
15538+ stbir_datatype output_type) // by default, datatype from resize_init
15539 {
15540- resize->input_data_type = input_type;
15541- resize->output_data_type = output_type;
15542- if ( ( resize->samplers ) && ( !resize->needs_rebuild ) )
15543- stbir__update_info_from_resize( resize->samplers, resize );
15544+ resize->input_data_type = input_type;
15545+ resize->output_data_type = output_type;
15546+ if ((resize->samplers) && (!resize->needs_rebuild)) {
15547+ stbir__update_info_from_resize(resize->samplers, resize);
15548+ }
15549 }
15550
15551-STBIRDEF void stbir_set_pixel_callbacks( STBIR_RESIZE * resize, stbir_input_callback * input_cb, stbir_output_callback * output_cb ) // no callbacks by default
15552+STBIRDEF void
15553+stbir_set_pixel_callbacks(
15554+ STBIR_RESIZE *resize, stbir_input_callback *input_cb,
15555+ stbir_output_callback *output_cb) // no callbacks by default
15556 {
15557- resize->input_cb = input_cb;
15558- resize->output_cb = output_cb;
15559+ resize->input_cb = input_cb;
15560+ resize->output_cb = output_cb;
15561
15562- if ( ( resize->samplers ) && ( !resize->needs_rebuild ) )
15563- {
15564- resize->samplers->in_pixels_cb = input_cb;
15565- resize->samplers->out_pixels_cb = output_cb;
15566- }
15567+ if ((resize->samplers) && (!resize->needs_rebuild)) {
15568+ resize->samplers->in_pixels_cb = input_cb;
15569+ resize->samplers->out_pixels_cb = output_cb;
15570+ }
15571 }
15572
15573-STBIRDEF void stbir_set_user_data( STBIR_RESIZE * resize, void * user_data ) // pass back STBIR_RESIZE* by default
15574+STBIRDEF void
15575+stbir_set_user_data(STBIR_RESIZE *resize,
15576+ void *user_data) // pass back STBIR_RESIZE* by default
15577 {
15578- resize->user_data = user_data;
15579- if ( ( resize->samplers ) && ( !resize->needs_rebuild ) )
15580- resize->samplers->user_data = user_data;
15581+ resize->user_data = user_data;
15582+ if ((resize->samplers) && (!resize->needs_rebuild)) {
15583+ resize->samplers->user_data = user_data;
15584+ }
15585 }
15586
15587-STBIRDEF void stbir_set_buffer_ptrs( STBIR_RESIZE * resize, const void * input_pixels, int input_stride_in_bytes, void * output_pixels, int output_stride_in_bytes )
15588+STBIRDEF void
15589+stbir_set_buffer_ptrs(STBIR_RESIZE *resize, const void *input_pixels,
15590+ int input_stride_in_bytes, void *output_pixels,
15591+ int output_stride_in_bytes)
15592 {
15593- resize->input_pixels = input_pixels;
15594- resize->input_stride_in_bytes = input_stride_in_bytes;
15595- resize->output_pixels = output_pixels;
15596- resize->output_stride_in_bytes = output_stride_in_bytes;
15597- if ( ( resize->samplers ) && ( !resize->needs_rebuild ) )
15598- stbir__update_info_from_resize( resize->samplers, resize );
15599+ resize->input_pixels = input_pixels;
15600+ resize->input_stride_in_bytes = input_stride_in_bytes;
15601+ resize->output_pixels = output_pixels;
15602+ resize->output_stride_in_bytes = output_stride_in_bytes;
15603+ if ((resize->samplers) && (!resize->needs_rebuild)) {
15604+ stbir__update_info_from_resize(resize->samplers, resize);
15605+ }
15606 }
15607
15608-
15609-STBIRDEF int stbir_set_edgemodes( STBIR_RESIZE * resize, stbir_edge horizontal_edge, stbir_edge vertical_edge ) // CLAMP by default
15610-{
15611- resize->horizontal_edge = horizontal_edge;
15612- resize->vertical_edge = vertical_edge;
15613- resize->needs_rebuild = 1;
15614- return 1;
15615-}
15616-
15617-STBIRDEF int stbir_set_filters( STBIR_RESIZE * resize, stbir_filter horizontal_filter, stbir_filter vertical_filter ) // STBIR_DEFAULT_FILTER_UPSAMPLE/DOWNSAMPLE by default
15618+STBIRDEF int
15619+stbir_set_edgemodes(STBIR_RESIZE *resize, stbir_edge horizontal_edge,
15620+ stbir_edge vertical_edge) // CLAMP by default
15621 {
15622- resize->horizontal_filter = horizontal_filter;
15623- resize->vertical_filter = vertical_filter;
15624- resize->needs_rebuild = 1;
15625- return 1;
15626+ resize->horizontal_edge = horizontal_edge;
15627+ resize->vertical_edge = vertical_edge;
15628+ resize->needs_rebuild = 1;
15629+ return 1;
15630 }
15631
15632-STBIRDEF int stbir_set_filter_callbacks( STBIR_RESIZE * resize, stbir__kernel_callback * horizontal_filter, stbir__support_callback * horizontal_support, stbir__kernel_callback * vertical_filter, stbir__support_callback * vertical_support )
15633+STBIRDEF int
15634+stbir_set_filters(STBIR_RESIZE *resize, stbir_filter horizontal_filter,
15635+ stbir_filter vertical_filter) // STBIR_DEFAULT_FILTER_UPSAMPLE/DOWNSAMPLE
15636+ // by default
15637 {
15638- resize->horizontal_filter_kernel = horizontal_filter; resize->horizontal_filter_support = horizontal_support;
15639- resize->vertical_filter_kernel = vertical_filter; resize->vertical_filter_support = vertical_support;
15640- resize->needs_rebuild = 1;
15641- return 1;
15642+ resize->horizontal_filter = horizontal_filter;
15643+ resize->vertical_filter = vertical_filter;
15644+ resize->needs_rebuild = 1;
15645+ return 1;
15646 }
15647
15648-STBIRDEF int stbir_set_pixel_layouts( STBIR_RESIZE * resize, stbir_pixel_layout input_pixel_layout, stbir_pixel_layout output_pixel_layout ) // sets new pixel layouts
15649-{
15650- resize->input_pixel_layout_public = input_pixel_layout;
15651- resize->output_pixel_layout_public = output_pixel_layout;
15652- resize->needs_rebuild = 1;
15653- return 1;
15654+STBIRDEF int
15655+stbir_set_filter_callbacks(STBIR_RESIZE *resize,
15656+ stbir__kernel_callback *horizontal_filter,
15657+ stbir__support_callback *horizontal_support,
15658+ stbir__kernel_callback *vertical_filter,
15659+ stbir__support_callback *vertical_support)
15660+{
15661+ resize->horizontal_filter_kernel = horizontal_filter;
15662+ resize->horizontal_filter_support = horizontal_support;
15663+ resize->vertical_filter_kernel = vertical_filter;
15664+ resize->vertical_filter_support = vertical_support;
15665+ resize->needs_rebuild = 1;
15666+ return 1;
15667 }
15668
15669-
15670-STBIRDEF int stbir_set_non_pm_alpha_speed_over_quality( STBIR_RESIZE * resize, int non_pma_alpha_speed_over_quality ) // sets alpha speed
15671+STBIRDEF int
15672+stbir_set_pixel_layouts(
15673+ STBIR_RESIZE *resize, stbir_pixel_layout input_pixel_layout,
15674+ stbir_pixel_layout output_pixel_layout) // sets new pixel layouts
15675 {
15676- resize->fast_alpha = non_pma_alpha_speed_over_quality;
15677- resize->needs_rebuild = 1;
15678- return 1;
15679+ resize->input_pixel_layout_public = input_pixel_layout;
15680+ resize->output_pixel_layout_public = output_pixel_layout;
15681+ resize->needs_rebuild = 1;
15682+ return 1;
15683 }
15684
15685-STBIRDEF int stbir_set_input_subrect( STBIR_RESIZE * resize, double s0, double t0, double s1, double t1 ) // sets input region (full region by default)
15686+STBIRDEF int
15687+stbir_set_non_pm_alpha_speed_over_quality(
15688+ STBIR_RESIZE *resize,
15689+ int non_pma_alpha_speed_over_quality) // sets alpha speed
15690 {
15691- resize->input_s0 = s0;
15692- resize->input_t0 = t0;
15693- resize->input_s1 = s1;
15694- resize->input_t1 = t1;
15695- resize->needs_rebuild = 1;
15696-
15697- // are we inbounds?
15698- if ( ( s1 < stbir__small_float ) || ( (s1-s0) < stbir__small_float ) ||
15699- ( t1 < stbir__small_float ) || ( (t1-t0) < stbir__small_float ) ||
15700- ( s0 > (1.0f-stbir__small_float) ) ||
15701- ( t0 > (1.0f-stbir__small_float) ) )
15702- return 0;
15703-
15704- return 1;
15705+ resize->fast_alpha = non_pma_alpha_speed_over_quality;
15706+ resize->needs_rebuild = 1;
15707+ return 1;
15708 }
15709
15710-STBIRDEF int stbir_set_output_pixel_subrect( STBIR_RESIZE * resize, int subx, int suby, int subw, int subh ) // sets input region (full region by default)
15711+STBIRDEF int
15712+stbir_set_input_subrect(STBIR_RESIZE *resize, double s0, double t0, double s1,
15713+ double t1) // sets input region (full region by default)
15714 {
15715- resize->output_subx = subx;
15716- resize->output_suby = suby;
15717- resize->output_subw = subw;
15718- resize->output_subh = subh;
15719- resize->needs_rebuild = 1;
15720+ resize->input_s0 = s0;
15721+ resize->input_t0 = t0;
15722+ resize->input_s1 = s1;
15723+ resize->input_t1 = t1;
15724+ resize->needs_rebuild = 1;
15725
15726- // are we inbounds?
15727- if ( ( subx >= resize->output_w ) || ( ( subx + subw ) <= 0 ) || ( suby >= resize->output_h ) || ( ( suby + subh ) <= 0 ) || ( subw == 0 ) || ( subh == 0 ) )
15728- return 0;
15729+ // are we inbounds?
15730+ if ((s1 < stbir__small_float) || ((s1 - s0) < stbir__small_float) ||
15731+ (t1 < stbir__small_float) || ((t1 - t0) < stbir__small_float) ||
15732+ (s0 > (1.0f - stbir__small_float)) ||
15733+ (t0 > (1.0f - stbir__small_float))) {
15734+ return 0;
15735+ }
15736
15737- return 1;
15738-}
15739-
15740-STBIRDEF int stbir_set_pixel_subrect( STBIR_RESIZE * resize, int subx, int suby, int subw, int subh ) // sets both regions (full regions by default)
15741+ return 1;
15742+}
15743+
15744+STBIRDEF int
15745+stbir_set_output_pixel_subrect(
15746+ STBIR_RESIZE *resize, int subx, int suby, int subw,
15747+ int subh) // sets input region (full region by default)
15748 {
15749- double s0, t0, s1, t1;
15750-
15751- s0 = ( (double)subx ) / ( (double)resize->output_w );
15752- t0 = ( (double)suby ) / ( (double)resize->output_h );
15753- s1 = ( (double)(subx+subw) ) / ( (double)resize->output_w );
15754- t1 = ( (double)(suby+subh) ) / ( (double)resize->output_h );
15755-
15756- resize->input_s0 = s0;
15757- resize->input_t0 = t0;
15758- resize->input_s1 = s1;
15759- resize->input_t1 = t1;
15760- resize->output_subx = subx;
15761- resize->output_suby = suby;
15762- resize->output_subw = subw;
15763- resize->output_subh = subh;
15764- resize->needs_rebuild = 1;
15765+ resize->output_subx = subx;
15766+ resize->output_suby = suby;
15767+ resize->output_subw = subw;
15768+ resize->output_subh = subh;
15769+ resize->needs_rebuild = 1;
15770+
15771+ // are we inbounds?
15772+ if ((subx >= resize->output_w) || ((subx + subw) <= 0) ||
15773+ (suby >= resize->output_h) || ((suby + subh) <= 0) || (subw == 0) ||
15774+ (subh == 0)) {
15775+ return 0;
15776+ }
15777
15778- // are we inbounds?
15779- if ( ( subx >= resize->output_w ) || ( ( subx + subw ) <= 0 ) || ( suby >= resize->output_h ) || ( ( suby + subh ) <= 0 ) || ( subw == 0 ) || ( subh == 0 ) )
15780- return 0;
15781-
15782- return 1;
15783+ return 1;
15784 }
15785
15786-static int stbir__perform_build( STBIR_RESIZE * resize, int splits )
15787+STBIRDEF int
15788+stbir_set_pixel_subrect(STBIR_RESIZE *resize, int subx, int suby, int subw,
15789+ int subh) // sets both regions (full regions by default)
15790 {
15791- stbir__contributors conservative = { 0, 0 };
15792- stbir__sampler horizontal, vertical;
15793- int new_output_subx, new_output_suby;
15794- stbir__info * out_info;
15795- #ifdef STBIR_PROFILE
15796- stbir__info profile_infod; // used to contain building profile info before everything is allocated
15797- stbir__info * profile_info = &profile_infod;
15798- #endif
15799-
15800- // have we already built the samplers?
15801- if ( resize->samplers )
15802- return 0;
15803-
15804- #define STBIR_RETURN_ERROR_AND_ASSERT( exp ) STBIR_ASSERT( !(exp) ); if (exp) return 0;
15805- STBIR_RETURN_ERROR_AND_ASSERT( (unsigned)resize->horizontal_filter >= STBIR_FILTER_OTHER)
15806- STBIR_RETURN_ERROR_AND_ASSERT( (unsigned)resize->vertical_filter >= STBIR_FILTER_OTHER)
15807- #undef STBIR_RETURN_ERROR_AND_ASSERT
15808-
15809- if ( splits <= 0 )
15810- return 0;
15811+ double s0, t0, s1, t1;
15812+
15813+ s0 = ((double)subx) / ((double)resize->output_w);
15814+ t0 = ((double)suby) / ((double)resize->output_h);
15815+ s1 = ((double)(subx + subw)) / ((double)resize->output_w);
15816+ t1 = ((double)(suby + subh)) / ((double)resize->output_h);
15817+
15818+ resize->input_s0 = s0;
15819+ resize->input_t0 = t0;
15820+ resize->input_s1 = s1;
15821+ resize->input_t1 = t1;
15822+ resize->output_subx = subx;
15823+ resize->output_suby = suby;
15824+ resize->output_subw = subw;
15825+ resize->output_subh = subh;
15826+ resize->needs_rebuild = 1;
15827
15828- STBIR_PROFILE_BUILD_FIRST_START( build );
15829+ // are we inbounds?
15830+ if ((subx >= resize->output_w) || ((subx + subw) <= 0) ||
15831+ (suby >= resize->output_h) || ((suby + subh) <= 0) || (subw == 0) ||
15832+ (subh == 0)) {
15833+ return 0;
15834+ }
15835
15836- new_output_subx = resize->output_subx;
15837- new_output_suby = resize->output_suby;
15838-
15839- // do horizontal clip and scale calcs
15840- if ( !stbir__calculate_region_transform( &horizontal.scale_info, resize->output_w, &new_output_subx, resize->output_subw, resize->input_w, resize->input_s0, resize->input_s1 ) )
15841- return 0;
15842-
15843- // do vertical clip and scale calcs
15844- if ( !stbir__calculate_region_transform( &vertical.scale_info, resize->output_h, &new_output_suby, resize->output_subh, resize->input_h, resize->input_t0, resize->input_t1 ) )
15845- return 0;
15846-
15847- // if nothing to do, just return
15848- if ( ( horizontal.scale_info.output_sub_size == 0 ) || ( vertical.scale_info.output_sub_size == 0 ) )
15849- return 0;
15850-
15851- stbir__set_sampler(&horizontal, resize->horizontal_filter, resize->horizontal_filter_kernel, resize->horizontal_filter_support, resize->horizontal_edge, &horizontal.scale_info, 1, resize->user_data );
15852- stbir__get_conservative_extents( &horizontal, &conservative, resize->user_data );
15853- stbir__set_sampler(&vertical, resize->vertical_filter, resize->vertical_filter_kernel, resize->vertical_filter_support, resize->vertical_edge, &vertical.scale_info, 0, resize->user_data );
15854-
15855- if ( ( vertical.scale_info.output_sub_size / splits ) < STBIR_FORCE_MINIMUM_SCANLINES_FOR_SPLITS ) // each split should be a minimum of 4 scanlines (handwavey choice)
15856- {
15857- splits = vertical.scale_info.output_sub_size / STBIR_FORCE_MINIMUM_SCANLINES_FOR_SPLITS;
15858- if ( splits == 0 ) splits = 1;
15859- }
15860-
15861- STBIR_PROFILE_BUILD_START( alloc );
15862- out_info = stbir__alloc_internal_mem_and_build_samplers( &horizontal, &vertical, &conservative, resize->input_pixel_layout_public, resize->output_pixel_layout_public, splits, new_output_subx, new_output_suby, resize->fast_alpha, resize->user_data STBIR_ONLY_PROFILE_BUILD_SET_INFO );
15863- STBIR_PROFILE_BUILD_END( alloc );
15864- STBIR_PROFILE_BUILD_END( build );
15865-
15866- if ( out_info )
15867- {
15868- resize->splits = splits;
15869- resize->samplers = out_info;
15870- resize->needs_rebuild = 0;
15871- #ifdef STBIR_PROFILE
15872- STBIR_MEMCPY( &out_info->profile, &profile_infod.profile, sizeof( out_info->profile ) );
15873- #endif
15874-
15875- // update anything that can be changed without recalcing samplers
15876- stbir__update_info_from_resize( out_info, resize );
15877-
15878- return splits;
15879- }
15880-
15881- return 0;
15882-}
15883-
15884-void stbir_free_samplers( STBIR_RESIZE * resize )
15885+ return 1;
15886+}
15887+
15888+static int
15889+stbir__perform_build(STBIR_RESIZE *resize, int splits)
15890 {
15891- if ( resize->samplers )
15892- {
15893- stbir__free_internal_mem( resize->samplers );
15894- resize->samplers = 0;
15895- resize->called_alloc = 0;
15896- }
15897-}
15898+ stbir__contributors conservative = {0, 0};
15899+ stbir__sampler horizontal, vertical;
15900+ int new_output_subx, new_output_suby;
15901+ stbir__info *out_info;
15902+#ifdef STBIR_PROFILE
15903+ stbir__info profile_infod; // used to contain building profile info before
15904+ // everything is allocated
15905+ stbir__info *profile_info = &profile_infod;
15906+#endif
15907
15908-STBIRDEF int stbir_build_samplers_with_splits( STBIR_RESIZE * resize, int splits )
15909-{
15910- if ( ( resize->samplers == 0 ) || ( resize->needs_rebuild ) )
15911- {
15912- if ( resize->samplers )
15913- stbir_free_samplers( resize );
15914+ // have we already built the samplers?
15915+ if (resize->samplers) {
15916+ return 0;
15917+ }
15918+
15919+#define STBIR_RETURN_ERROR_AND_ASSERT(exp) \
15920+ STBIR_ASSERT(!(exp)); \
15921+ if (exp) \
15922+ return 0;
15923+ STBIR_RETURN_ERROR_AND_ASSERT((unsigned)resize->horizontal_filter >=
15924+ STBIR_FILTER_OTHER)
15925+ STBIR_RETURN_ERROR_AND_ASSERT((unsigned)resize->vertical_filter >=
15926+ STBIR_FILTER_OTHER)
15927+#undef STBIR_RETURN_ERROR_AND_ASSERT
15928+
15929+ if (splits <= 0) {
15930+ return 0;
15931+ }
15932+
15933+ STBIR_PROFILE_BUILD_FIRST_START(build);
15934+
15935+ new_output_subx = resize->output_subx;
15936+ new_output_suby = resize->output_suby;
15937+
15938+ // do horizontal clip and scale calcs
15939+ if (!stbir__calculate_region_transform(
15940+ &horizontal.scale_info, resize->output_w, &new_output_subx,
15941+ resize->output_subw, resize->input_w, resize->input_s0,
15942+ resize->input_s1)) {
15943+ return 0;
15944+ }
15945+
15946+ // do vertical clip and scale calcs
15947+ if (!stbir__calculate_region_transform(
15948+ &vertical.scale_info, resize->output_h, &new_output_suby,
15949+ resize->output_subh, resize->input_h, resize->input_t0,
15950+ resize->input_t1)) {
15951+ return 0;
15952+ }
15953+
15954+ // if nothing to do, just return
15955+ if ((horizontal.scale_info.output_sub_size == 0) ||
15956+ (vertical.scale_info.output_sub_size == 0)) {
15957+ return 0;
15958+ }
15959+
15960+ stbir__set_sampler(
15961+ &horizontal, resize->horizontal_filter,
15962+ resize->horizontal_filter_kernel, resize->horizontal_filter_support,
15963+ resize->horizontal_edge, &horizontal.scale_info, 1, resize->user_data);
15964+ stbir__get_conservative_extents(&horizontal, &conservative,
15965+ resize->user_data);
15966+ stbir__set_sampler(&vertical, resize->vertical_filter,
15967+ resize->vertical_filter_kernel,
15968+ resize->vertical_filter_support, resize->vertical_edge,
15969+ &vertical.scale_info, 0, resize->user_data);
15970+
15971+ if ((vertical.scale_info.output_sub_size / splits) <
15972+ STBIR_FORCE_MINIMUM_SCANLINES_FOR_SPLITS) // each split should be a
15973+ // minimum of 4 scanlines
15974+ // (handwavey choice)
15975+ {
15976+ splits = vertical.scale_info.output_sub_size /
15977+ STBIR_FORCE_MINIMUM_SCANLINES_FOR_SPLITS;
15978+ if (splits == 0) {
15979+ splits = 1;
15980+ }
15981+ }
15982+
15983+ STBIR_PROFILE_BUILD_START(alloc);
15984+ out_info = stbir__alloc_internal_mem_and_build_samplers(
15985+ &horizontal, &vertical, &conservative,
15986+ resize->input_pixel_layout_public, resize->output_pixel_layout_public,
15987+ splits, new_output_subx, new_output_suby, resize->fast_alpha,
15988+ resize->user_data STBIR_ONLY_PROFILE_BUILD_SET_INFO);
15989+ STBIR_PROFILE_BUILD_END(alloc);
15990+ STBIR_PROFILE_BUILD_END(build);
15991+
15992+ if (out_info) {
15993+ resize->splits = splits;
15994+ resize->samplers = out_info;
15995+ resize->needs_rebuild = 0;
15996+#ifdef STBIR_PROFILE
15997+ STBIR_MEMCPY(&out_info->profile, &profile_infod.profile,
15998+ sizeof(out_info->profile));
15999+#endif
16000
16001- resize->called_alloc = 1;
16002- return stbir__perform_build( resize, splits );
16003- }
16004+ // update anything that can be changed without recalcing samplers
16005+ stbir__update_info_from_resize(out_info, resize);
16006
16007- STBIR_PROFILE_BUILD_CLEAR( resize->samplers );
16008+ return splits;
16009+ }
16010
16011- return 1;
16012+ return 0;
16013 }
16014
16015-STBIRDEF int stbir_build_samplers( STBIR_RESIZE * resize )
16016+void
16017+stbir_free_samplers(STBIR_RESIZE *resize)
16018 {
16019- return stbir_build_samplers_with_splits( resize, 1 );
16020+ if (resize->samplers) {
16021+ stbir__free_internal_mem(resize->samplers);
16022+ resize->samplers = 0;
16023+ resize->called_alloc = 0;
16024+ }
16025 }
16026
16027-STBIRDEF int stbir_resize_extended( STBIR_RESIZE * resize )
16028+STBIRDEF int
16029+stbir_build_samplers_with_splits(STBIR_RESIZE *resize, int splits)
16030 {
16031- int result;
16032-
16033- if ( ( resize->samplers == 0 ) || ( resize->needs_rebuild ) )
16034- {
16035- int alloc_state = resize->called_alloc; // remember allocated state
16036-
16037- if ( resize->samplers )
16038- {
16039- stbir__free_internal_mem( resize->samplers );
16040- resize->samplers = 0;
16041- }
16042+ if ((resize->samplers == 0) || (resize->needs_rebuild)) {
16043+ if (resize->samplers) {
16044+ stbir_free_samplers(resize);
16045+ }
16046
16047- if ( !stbir_build_samplers( resize ) )
16048- return 0;
16049+ resize->called_alloc = 1;
16050+ return stbir__perform_build(resize, splits);
16051+ }
16052
16053- resize->called_alloc = alloc_state;
16054+ STBIR_PROFILE_BUILD_CLEAR(resize->samplers);
16055
16056- // if build_samplers succeeded (above), but there are no samplers set, then
16057- // the area to stretch into was zero pixels, so don't do anything and return
16058- // success
16059- if ( resize->samplers == 0 )
16060- return 1;
16061- }
16062- else
16063- {
16064- // didn't build anything - clear it
16065- STBIR_PROFILE_BUILD_CLEAR( resize->samplers );
16066- }
16067-
16068- // do resize
16069- result = stbir__perform_resize( resize->samplers, 0, resize->splits );
16070-
16071- // if we alloced, then free
16072- if ( !resize->called_alloc )
16073- {
16074- stbir_free_samplers( resize );
16075- resize->samplers = 0;
16076- }
16077-
16078- return result;
16079+ return 1;
16080 }
16081
16082-STBIRDEF int stbir_resize_extended_split( STBIR_RESIZE * resize, int split_start, int split_count )
16083+STBIRDEF int
16084+stbir_build_samplers(STBIR_RESIZE *resize)
16085 {
16086- STBIR_ASSERT( resize->samplers );
16087-
16088- // if we're just doing the whole thing, call full
16089- if ( ( split_start == -1 ) || ( ( split_start == 0 ) && ( split_count == resize->splits ) ) )
16090- return stbir_resize_extended( resize );
16091-
16092- // you **must** build samplers first when using split resize
16093- if ( ( resize->samplers == 0 ) || ( resize->needs_rebuild ) )
16094- return 0;
16095-
16096- if ( ( split_start >= resize->splits ) || ( split_start < 0 ) || ( ( split_start + split_count ) > resize->splits ) || ( split_count <= 0 ) )
16097- return 0;
16098-
16099- // do resize
16100- return stbir__perform_resize( resize->samplers, split_start, split_count );
16101+ return stbir_build_samplers_with_splits(resize, 1);
16102 }
16103
16104-
16105-static void * stbir_quick_resize_helper( const void *input_pixels , int input_w , int input_h, int input_stride_in_bytes,
16106- void *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
16107- stbir_pixel_layout pixel_layout, stbir_datatype data_type, stbir_edge edge, stbir_filter filter )
16108+STBIRDEF int
16109+stbir_resize_extended(STBIR_RESIZE *resize)
16110 {
16111- STBIR_RESIZE resize;
16112- int scanline_output_in_bytes;
16113- int positive_output_stride_in_bytes;
16114- void * start_ptr;
16115- void * free_ptr;
16116-
16117- scanline_output_in_bytes = output_w * stbir__type_size[ data_type ] * stbir__pixel_channels[ stbir__pixel_layout_convert_public_to_internal[ pixel_layout ] ];
16118- if ( scanline_output_in_bytes == 0 )
16119- return 0;
16120-
16121- // if zero stride, use scanline output
16122- if ( output_stride_in_bytes == 0 )
16123- output_stride_in_bytes = scanline_output_in_bytes;
16124-
16125- // abs value for inverted images (negative pitches)
16126- positive_output_stride_in_bytes = output_stride_in_bytes;
16127- if ( positive_output_stride_in_bytes < 0 )
16128- positive_output_stride_in_bytes = -positive_output_stride_in_bytes;
16129-
16130- // is the requested stride smaller than the scanline output? if so, just fail
16131- if ( positive_output_stride_in_bytes < scanline_output_in_bytes )
16132- return 0;
16133+ int result;
16134
16135- start_ptr = output_pixels;
16136- free_ptr = 0; // no free pointer, since they passed buffer to use
16137+ if ((resize->samplers == 0) || (resize->needs_rebuild)) {
16138+ int alloc_state = resize->called_alloc; // remember allocated state
16139
16140- // did they pass a zero for the dest? if so, allocate the buffer
16141- if ( output_pixels == 0 )
16142- {
16143- size_t size;
16144- char * ptr;
16145-
16146- size = (size_t)positive_output_stride_in_bytes * (size_t)output_h;
16147- if ( size == 0 )
16148- return 0;
16149+ if (resize->samplers) {
16150+ stbir__free_internal_mem(resize->samplers);
16151+ resize->samplers = 0;
16152+ }
16153
16154- ptr = (char*) STBIR_MALLOC( size, 0 );
16155- if ( ptr == 0 )
16156- return 0;
16157+ if (!stbir_build_samplers(resize)) {
16158+ return 0;
16159+ }
16160
16161- free_ptr = ptr;
16162+ resize->called_alloc = alloc_state;
16163
16164- // point at the last scanline, if they requested a flipped image
16165- if ( output_stride_in_bytes < 0 )
16166- start_ptr = ptr + ( (size_t)positive_output_stride_in_bytes * (size_t)( output_h - 1 ) );
16167- else
16168- start_ptr = ptr;
16169- }
16170+ // if build_samplers succeeded (above), but there are no samplers set,
16171+ // then
16172+ // the area to stretch into was zero pixels, so don't do anything and
16173+ // return success
16174+ if (resize->samplers == 0) {
16175+ return 1;
16176+ }
16177+ } else {
16178+ // didn't build anything - clear it
16179+ STBIR_PROFILE_BUILD_CLEAR(resize->samplers);
16180+ }
16181
16182- // ok, now do the resize
16183- stbir_resize_init( &resize,
16184- input_pixels, input_w, input_h, input_stride_in_bytes,
16185- start_ptr, output_w, output_h, output_stride_in_bytes,
16186- pixel_layout, data_type );
16187+ // do resize
16188+ result = stbir__perform_resize(resize->samplers, 0, resize->splits);
16189
16190- resize.horizontal_edge = edge;
16191- resize.vertical_edge = edge;
16192- resize.horizontal_filter = filter;
16193- resize.vertical_filter = filter;
16194+ // if we alloced, then free
16195+ if (!resize->called_alloc) {
16196+ stbir_free_samplers(resize);
16197+ resize->samplers = 0;
16198+ }
16199
16200- if ( !stbir_resize_extended( &resize ) )
16201- {
16202- if ( free_ptr )
16203- STBIR_FREE( free_ptr, 0 );
16204- return 0;
16205- }
16206-
16207- return (free_ptr) ? free_ptr : start_ptr;
16208-}
16209-
16210-
16211-
16212-STBIRDEF unsigned char * stbir_resize_uint8_linear( const unsigned char *input_pixels , int input_w , int input_h, int input_stride_in_bytes,
16213- unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
16214- stbir_pixel_layout pixel_layout )
16215-{
16216- return (unsigned char *) stbir_quick_resize_helper( input_pixels , input_w , input_h, input_stride_in_bytes,
16217- output_pixels, output_w, output_h, output_stride_in_bytes,
16218- pixel_layout, STBIR_TYPE_UINT8, STBIR_EDGE_CLAMP, STBIR_FILTER_DEFAULT );
16219+ return result;
16220 }
16221
16222-STBIRDEF unsigned char * stbir_resize_uint8_srgb( const unsigned char *input_pixels , int input_w , int input_h, int input_stride_in_bytes,
16223- unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
16224- stbir_pixel_layout pixel_layout )
16225+STBIRDEF int
16226+stbir_resize_extended_split(STBIR_RESIZE *resize, int split_start,
16227+ int split_count)
16228 {
16229- return (unsigned char *) stbir_quick_resize_helper( input_pixels , input_w , input_h, input_stride_in_bytes,
16230- output_pixels, output_w, output_h, output_stride_in_bytes,
16231- pixel_layout, STBIR_TYPE_UINT8_SRGB, STBIR_EDGE_CLAMP, STBIR_FILTER_DEFAULT );
16232-}
16233+ STBIR_ASSERT(resize->samplers);
16234
16235+ // if we're just doing the whole thing, call full
16236+ if ((split_start == -1) ||
16237+ ((split_start == 0) && (split_count == resize->splits))) {
16238+ return stbir_resize_extended(resize);
16239+ }
16240
16241-STBIRDEF float * stbir_resize_float_linear( const float *input_pixels , int input_w , int input_h, int input_stride_in_bytes,
16242- float *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
16243- stbir_pixel_layout pixel_layout )
16244-{
16245- return (float *) stbir_quick_resize_helper( input_pixels , input_w , input_h, input_stride_in_bytes,
16246- output_pixels, output_w, output_h, output_stride_in_bytes,
16247- pixel_layout, STBIR_TYPE_FLOAT, STBIR_EDGE_CLAMP, STBIR_FILTER_DEFAULT );
16248-}
16249+ // you **must** build samplers first when using split resize
16250+ if ((resize->samplers == 0) || (resize->needs_rebuild)) {
16251+ return 0;
16252+ }
16253
16254+ if ((split_start >= resize->splits) || (split_start < 0) ||
16255+ ((split_start + split_count) > resize->splits) || (split_count <= 0)) {
16256+ return 0;
16257+ }
16258
16259-STBIRDEF void * stbir_resize( const void *input_pixels , int input_w , int input_h, int input_stride_in_bytes,
16260- void *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
16261- stbir_pixel_layout pixel_layout, stbir_datatype data_type,
16262- stbir_edge edge, stbir_filter filter )
16263-{
16264- return (void *) stbir_quick_resize_helper( input_pixels , input_w , input_h, input_stride_in_bytes,
16265- output_pixels, output_w, output_h, output_stride_in_bytes,
16266- pixel_layout, data_type, edge, filter );
16267+ // do resize
16268+ return stbir__perform_resize(resize->samplers, split_start, split_count);
16269+}
16270+
16271+static void *
16272+stbir_quick_resize_helper(const void *input_pixels, int input_w, int input_h,
16273+ int input_stride_in_bytes, void *output_pixels,
16274+ int output_w, int output_h,
16275+ int output_stride_in_bytes,
16276+ stbir_pixel_layout pixel_layout,
16277+ stbir_datatype data_type, stbir_edge edge,
16278+ stbir_filter filter)
16279+{
16280+ STBIR_RESIZE resize;
16281+ int scanline_output_in_bytes;
16282+ int positive_output_stride_in_bytes;
16283+ void *start_ptr;
16284+ void *free_ptr;
16285+
16286+ scanline_output_in_bytes =
16287+ output_w * stbir__type_size[data_type] *
16288+ stbir__pixel_channels
16289+ [stbir__pixel_layout_convert_public_to_internal[pixel_layout]];
16290+ if (scanline_output_in_bytes == 0) {
16291+ return 0;
16292+ }
16293+
16294+ // if zero stride, use scanline output
16295+ if (output_stride_in_bytes == 0) {
16296+ output_stride_in_bytes = scanline_output_in_bytes;
16297+ }
16298+
16299+ // abs value for inverted images (negative pitches)
16300+ positive_output_stride_in_bytes = output_stride_in_bytes;
16301+ if (positive_output_stride_in_bytes < 0) {
16302+ positive_output_stride_in_bytes = -positive_output_stride_in_bytes;
16303+ }
16304+
16305+ // is the requested stride smaller than the scanline output? if so, just
16306+ // fail
16307+ if (positive_output_stride_in_bytes < scanline_output_in_bytes) {
16308+ return 0;
16309+ }
16310+
16311+ start_ptr = output_pixels;
16312+ free_ptr = 0; // no free pointer, since they passed buffer to use
16313+
16314+ // did they pass a zero for the dest? if so, allocate the buffer
16315+ if (output_pixels == 0) {
16316+ size_t size;
16317+ char *ptr;
16318+
16319+ size = (size_t)positive_output_stride_in_bytes * (size_t)output_h;
16320+ if (size == 0) {
16321+ return 0;
16322+ }
16323+
16324+ ptr = (char *)STBIR_MALLOC(size, 0);
16325+ if (ptr == 0) {
16326+ return 0;
16327+ }
16328+
16329+ free_ptr = ptr;
16330+
16331+ // point at the last scanline, if they requested a flipped image
16332+ if (output_stride_in_bytes < 0) {
16333+ start_ptr = ptr + ((size_t)positive_output_stride_in_bytes *
16334+ (size_t)(output_h - 1));
16335+ } else {
16336+ start_ptr = ptr;
16337+ }
16338+ }
16339+
16340+ // ok, now do the resize
16341+ stbir_resize_init(&resize, input_pixels, input_w, input_h,
16342+ input_stride_in_bytes, start_ptr, output_w, output_h,
16343+ output_stride_in_bytes, pixel_layout, data_type);
16344+
16345+ resize.horizontal_edge = edge;
16346+ resize.vertical_edge = edge;
16347+ resize.horizontal_filter = filter;
16348+ resize.vertical_filter = filter;
16349+
16350+ if (!stbir_resize_extended(&resize)) {
16351+ if (free_ptr) {
16352+ STBIR_FREE(free_ptr, 0);
16353+ }
16354+ return 0;
16355+ }
16356+
16357+ return (free_ptr) ? free_ptr : start_ptr;
16358+}
16359+
16360+STBIRDEF unsigned char *
16361+stbir_resize_uint8_linear(const unsigned char *input_pixels, int input_w,
16362+ int input_h, int input_stride_in_bytes,
16363+ unsigned char *output_pixels, int output_w,
16364+ int output_h, int output_stride_in_bytes,
16365+ stbir_pixel_layout pixel_layout)
16366+{
16367+ return (unsigned char *)stbir_quick_resize_helper(
16368+ input_pixels, input_w, input_h, input_stride_in_bytes, output_pixels,
16369+ output_w, output_h, output_stride_in_bytes, pixel_layout,
16370+ STBIR_TYPE_UINT8, STBIR_EDGE_CLAMP, STBIR_FILTER_DEFAULT);
16371+}
16372+
16373+STBIRDEF unsigned char *
16374+stbir_resize_uint8_srgb(const unsigned char *input_pixels, int input_w,
16375+ int input_h, int input_stride_in_bytes,
16376+ unsigned char *output_pixels, int output_w,
16377+ int output_h, int output_stride_in_bytes,
16378+ stbir_pixel_layout pixel_layout)
16379+{
16380+ return (unsigned char *)stbir_quick_resize_helper(
16381+ input_pixels, input_w, input_h, input_stride_in_bytes, output_pixels,
16382+ output_w, output_h, output_stride_in_bytes, pixel_layout,
16383+ STBIR_TYPE_UINT8_SRGB, STBIR_EDGE_CLAMP, STBIR_FILTER_DEFAULT);
16384+}
16385+
16386+STBIRDEF float *
16387+stbir_resize_float_linear(const float *input_pixels, int input_w, int input_h,
16388+ int input_stride_in_bytes, float *output_pixels,
16389+ int output_w, int output_h,
16390+ int output_stride_in_bytes,
16391+ stbir_pixel_layout pixel_layout)
16392+{
16393+ return (float *)stbir_quick_resize_helper(
16394+ input_pixels, input_w, input_h, input_stride_in_bytes, output_pixels,
16395+ output_w, output_h, output_stride_in_bytes, pixel_layout,
16396+ STBIR_TYPE_FLOAT, STBIR_EDGE_CLAMP, STBIR_FILTER_DEFAULT);
16397+}
16398+
16399+STBIRDEF void *
16400+stbir_resize(const void *input_pixels, int input_w, int input_h,
16401+ int input_stride_in_bytes, void *output_pixels, int output_w,
16402+ int output_h, int output_stride_in_bytes,
16403+ stbir_pixel_layout pixel_layout, stbir_datatype data_type,
16404+ stbir_edge edge, stbir_filter filter)
16405+{
16406+ return (void *)stbir_quick_resize_helper(
16407+ input_pixels, input_w, input_h, input_stride_in_bytes, output_pixels,
16408+ output_w, output_h, output_stride_in_bytes, pixel_layout, data_type,
16409+ edge, filter);
16410 }
16411
16412 #ifdef STBIR_PROFILE
16413
16414-STBIRDEF void stbir_resize_build_profile_info( STBIR_PROFILE_INFO * info, STBIR_RESIZE const * resize )
16415-{
16416- static char const * bdescriptions[6] = { "Building", "Allocating", "Horizontal sampler", "Vertical sampler", "Coefficient cleanup", "Coefficient piovot" } ;
16417- stbir__info* samp = resize->samplers;
16418- int i;
16419-
16420- typedef int testa[ (STBIR__ARRAY_SIZE( bdescriptions ) == (STBIR__ARRAY_SIZE( samp->profile.array )-1) )?1:-1];
16421- typedef int testb[ (sizeof( samp->profile.array ) == (sizeof(samp->profile.named)) )?1:-1];
16422- typedef int testc[ (sizeof( info->clocks ) >= (sizeof(samp->profile.named)) )?1:-1];
16423-
16424- for( i = 0 ; i < STBIR__ARRAY_SIZE( bdescriptions ) ; i++)
16425- info->clocks[i] = samp->profile.array[i+1];
16426-
16427- info->total_clocks = samp->profile.named.total;
16428- info->descriptions = bdescriptions;
16429- info->count = STBIR__ARRAY_SIZE( bdescriptions );
16430-}
16431-
16432-STBIRDEF void stbir_resize_split_profile_info( STBIR_PROFILE_INFO * info, STBIR_RESIZE const * resize, int split_start, int split_count )
16433-{
16434- static char const * descriptions[7] = { "Looping", "Vertical sampling", "Horizontal sampling", "Scanline input", "Scanline output", "Alpha weighting", "Alpha unweighting" };
16435- stbir__per_split_info * split_info;
16436- int s, i;
16437-
16438- typedef int testa[ (STBIR__ARRAY_SIZE( descriptions ) == (STBIR__ARRAY_SIZE( split_info->profile.array )-1) )?1:-1];
16439- typedef int testb[ (sizeof( split_info->profile.array ) == (sizeof(split_info->profile.named)) )?1:-1];
16440- typedef int testc[ (sizeof( info->clocks ) >= (sizeof(split_info->profile.named)) )?1:-1];
16441-
16442- if ( split_start == -1 )
16443- {
16444- split_start = 0;
16445- split_count = resize->samplers->splits;
16446- }
16447-
16448- if ( ( split_start >= resize->splits ) || ( split_start < 0 ) || ( ( split_start + split_count ) > resize->splits ) || ( split_count <= 0 ) )
16449- {
16450- info->total_clocks = 0;
16451- info->descriptions = 0;
16452- info->count = 0;
16453- return;
16454- }
16455-
16456- split_info = resize->samplers->split_info + split_start;
16457-
16458- // sum up the profile from all the splits
16459- for( i = 0 ; i < STBIR__ARRAY_SIZE( descriptions ) ; i++ )
16460- {
16461- stbir_uint64 sum = 0;
16462- for( s = 0 ; s < split_count ; s++ )
16463- sum += split_info[s].profile.array[i+1];
16464- info->clocks[i] = sum;
16465- }
16466-
16467- info->total_clocks = split_info->profile.named.total;
16468- info->descriptions = descriptions;
16469- info->count = STBIR__ARRAY_SIZE( descriptions );
16470-}
16471-
16472-STBIRDEF void stbir_resize_extended_profile_info( STBIR_PROFILE_INFO * info, STBIR_RESIZE const * resize )
16473-{
16474- stbir_resize_split_profile_info( info, resize, -1, 0 );
16475+STBIRDEF void
16476+stbir_resize_build_profile_info(STBIR_PROFILE_INFO *info,
16477+ STBIR_RESIZE const *resize)
16478+{
16479+ static char const *bdescriptions[6] = {
16480+ "Building", "Allocating", "Horizontal sampler",
16481+ "Vertical sampler", "Coefficient cleanup", "Coefficient piovot"};
16482+ stbir__info *samp = resize->samplers;
16483+ int i;
16484+
16485+ typedef int testa[(STBIR__ARRAY_SIZE(bdescriptions) ==
16486+ (STBIR__ARRAY_SIZE(samp->profile.array) - 1))
16487+ ? 1
16488+ : -1];
16489+ typedef int
16490+ testb[(sizeof(samp->profile.array) == (sizeof(samp->profile.named)))
16491+ ? 1
16492+ : -1];
16493+ typedef int
16494+ testc[(sizeof(info->clocks) >= (sizeof(samp->profile.named))) ? 1 : -1];
16495+
16496+ for (i = 0; i < STBIR__ARRAY_SIZE(bdescriptions); i++) {
16497+ info->clocks[i] = samp->profile.array[i + 1];
16498+ }
16499+
16500+ info->total_clocks = samp->profile.named.total;
16501+ info->descriptions = bdescriptions;
16502+ info->count = STBIR__ARRAY_SIZE(bdescriptions);
16503+}
16504+
16505+STBIRDEF void
16506+stbir_resize_split_profile_info(STBIR_PROFILE_INFO *info,
16507+ STBIR_RESIZE const *resize, int split_start,
16508+ int split_count)
16509+{
16510+ static char const *descriptions[7] = {
16511+ "Looping", "Vertical sampling", "Horizontal sampling",
16512+ "Scanline input", "Scanline output", "Alpha weighting",
16513+ "Alpha unweighting"};
16514+ stbir__per_split_info *split_info;
16515+ int s, i;
16516+
16517+ typedef int testa[(STBIR__ARRAY_SIZE(descriptions) ==
16518+ (STBIR__ARRAY_SIZE(split_info->profile.array) - 1))
16519+ ? 1
16520+ : -1];
16521+ typedef int testb[(sizeof(split_info->profile.array) ==
16522+ (sizeof(split_info->profile.named)))
16523+ ? 1
16524+ : -1];
16525+ typedef int
16526+ testc[(sizeof(info->clocks) >= (sizeof(split_info->profile.named)))
16527+ ? 1
16528+ : -1];
16529+
16530+ if (split_start == -1) {
16531+ split_start = 0;
16532+ split_count = resize->samplers->splits;
16533+ }
16534+
16535+ if ((split_start >= resize->splits) || (split_start < 0) ||
16536+ ((split_start + split_count) > resize->splits) || (split_count <= 0)) {
16537+ info->total_clocks = 0;
16538+ info->descriptions = 0;
16539+ info->count = 0;
16540+ return;
16541+ }
16542+
16543+ split_info = resize->samplers->split_info + split_start;
16544+
16545+ // sum up the profile from all the splits
16546+ for (i = 0; i < STBIR__ARRAY_SIZE(descriptions); i++) {
16547+ stbir_uint64 sum = 0;
16548+ for (s = 0; s < split_count; s++) {
16549+ sum += split_info[s].profile.array[i + 1];
16550+ }
16551+ info->clocks[i] = sum;
16552+ }
16553+
16554+ info->total_clocks = split_info->profile.named.total;
16555+ info->descriptions = descriptions;
16556+ info->count = STBIR__ARRAY_SIZE(descriptions);
16557+}
16558+
16559+STBIRDEF void
16560+stbir_resize_extended_profile_info(STBIR_PROFILE_INFO *info,
16561+ STBIR_RESIZE const *resize)
16562+{
16563+ stbir_resize_split_profile_info(info, resize, -1, 0);
16564 }
16565
16566 #endif // STBIR_PROFILE
16567@@ -8215,32 +9828,58 @@ STBIRDEF void stbir_resize_extended_profile_info( STBIR_PROFILE_INFO * info, STB
16568
16569 #endif // STB_IMAGE_RESIZE_IMPLEMENTATION
16570
16571-#else // STB_IMAGE_RESIZE_HORIZONTALS&STB_IMAGE_RESIZE_DO_VERTICALS
16572+#else // STB_IMAGE_RESIZE_HORIZONTALS&STB_IMAGE_RESIZE_DO_VERTICALS
16573
16574 // we reinclude the header file to define all the horizontal functions
16575-// specializing each function for the number of coeffs is 20-40% faster *OVERALL*
16576+// specializing each function for the number of coeffs is 20-40% faster
16577+// *OVERALL*
16578
16579 // by including the header file again this way, we can still debug the functions
16580
16581-#define STBIR_strs_join2( start, mid, end ) start##mid##end
16582-#define STBIR_strs_join1( start, mid, end ) STBIR_strs_join2( start, mid, end )
16583+#define STBIR_strs_join2(start, mid, end) start##mid##end
16584+#define STBIR_strs_join1(start, mid, end) STBIR_strs_join2(start, mid, end)
16585
16586-#define STBIR_strs_join24( start, mid1, mid2, end ) start##mid1##mid2##end
16587-#define STBIR_strs_join14( start, mid1, mid2, end ) STBIR_strs_join24( start, mid1, mid2, end )
16588+#define STBIR_strs_join24(start, mid1, mid2, end) start##mid1##mid2##end
16589+#define STBIR_strs_join14(start, mid1, mid2, end) \
16590+ STBIR_strs_join24(start, mid1, mid2, end)
16591
16592 #ifdef STB_IMAGE_RESIZE_DO_CODERS
16593
16594 #ifdef stbir__decode_suffix
16595-#define STBIR__CODER_NAME( name ) STBIR_strs_join1( name, _, stbir__decode_suffix )
16596+#define STBIR__CODER_NAME(name) STBIR_strs_join1(name, _, stbir__decode_suffix)
16597 #else
16598-#define STBIR__CODER_NAME( name ) name
16599+#define STBIR__CODER_NAME(name) name
16600 #endif
16601
16602 #ifdef stbir__decode_swizzle
16603-#define stbir__decode_simdf8_flip(reg) STBIR_strs_join1( STBIR_strs_join1( STBIR_strs_join1( STBIR_strs_join1( stbir__simdf8_0123to,stbir__decode_order0,stbir__decode_order1),stbir__decode_order2,stbir__decode_order3),stbir__decode_order0,stbir__decode_order1),stbir__decode_order2,stbir__decode_order3)(reg, reg)
16604-#define stbir__decode_simdf4_flip(reg) STBIR_strs_join1( STBIR_strs_join1( stbir__simdf_0123to,stbir__decode_order0,stbir__decode_order1),stbir__decode_order2,stbir__decode_order3)(reg, reg)
16605-#define stbir__encode_simdf8_unflip(reg) STBIR_strs_join1( STBIR_strs_join1( STBIR_strs_join1( STBIR_strs_join1( stbir__simdf8_0123to,stbir__encode_order0,stbir__encode_order1),stbir__encode_order2,stbir__encode_order3),stbir__encode_order0,stbir__encode_order1),stbir__encode_order2,stbir__encode_order3)(reg, reg)
16606-#define stbir__encode_simdf4_unflip(reg) STBIR_strs_join1( STBIR_strs_join1( stbir__simdf_0123to,stbir__encode_order0,stbir__encode_order1),stbir__encode_order2,stbir__encode_order3)(reg, reg)
16607+#define stbir__decode_simdf8_flip(reg) \
16608+ STBIR_strs_join1( \
16609+ STBIR_strs_join1( \
16610+ STBIR_strs_join1(STBIR_strs_join1(stbir__simdf8_0123to, \
16611+ stbir__decode_order0, \
16612+ stbir__decode_order1), \
16613+ stbir__decode_order2, stbir__decode_order3), \
16614+ stbir__decode_order0, stbir__decode_order1), \
16615+ stbir__decode_order2, stbir__decode_order3)(reg, reg)
16616+#define stbir__decode_simdf4_flip(reg) \
16617+ STBIR_strs_join1(STBIR_strs_join1(stbir__simdf_0123to, \
16618+ stbir__decode_order0, \
16619+ stbir__decode_order1), \
16620+ stbir__decode_order2, stbir__decode_order3)(reg, reg)
16621+#define stbir__encode_simdf8_unflip(reg) \
16622+ STBIR_strs_join1( \
16623+ STBIR_strs_join1( \
16624+ STBIR_strs_join1(STBIR_strs_join1(stbir__simdf8_0123to, \
16625+ stbir__encode_order0, \
16626+ stbir__encode_order1), \
16627+ stbir__encode_order2, stbir__encode_order3), \
16628+ stbir__encode_order0, stbir__encode_order1), \
16629+ stbir__encode_order2, stbir__encode_order3)(reg, reg)
16630+#define stbir__encode_simdf4_unflip(reg) \
16631+ STBIR_strs_join1(STBIR_strs_join1(stbir__simdf_0123to, \
16632+ stbir__encode_order0, \
16633+ stbir__encode_order1), \
16634+ stbir__encode_order2, stbir__encode_order3)(reg, reg)
16635 #else
16636 #define stbir__decode_order0 0
16637 #define stbir__decode_order1 1
16638@@ -8257,1589 +9896,1817 @@ STBIRDEF void stbir_resize_extended_profile_info( STBIR_PROFILE_INFO * info, STB
16639 #endif
16640
16641 #ifdef STBIR_SIMD8
16642-#define stbir__encode_simdfX_unflip stbir__encode_simdf8_unflip
16643+#define stbir__encode_simdfX_unflip stbir__encode_simdf8_unflip
16644 #else
16645-#define stbir__encode_simdfX_unflip stbir__encode_simdf4_unflip
16646-#endif
16647-
16648-static float * STBIR__CODER_NAME( stbir__decode_uint8_linear_scaled )( float * decodep, int width_times_channels, void const * inputp )
16649-{
16650- float STBIR_STREAMOUT_PTR( * ) decode = decodep;
16651- float * decode_end = (float*) decode + width_times_channels;
16652- unsigned char const * input = (unsigned char const*)inputp;
16653-
16654- #ifdef STBIR_SIMD
16655- unsigned char const * end_input_m16 = input + width_times_channels - 16;
16656- if ( width_times_channels >= 16 )
16657- {
16658- decode_end -= 16;
16659- STBIR_NO_UNROLL_LOOP_START_INF_FOR
16660- for(;;)
16661- {
16662- #ifdef STBIR_SIMD8
16663- stbir__simdi i; stbir__simdi8 o0,o1;
16664- stbir__simdf8 of0, of1;
16665- STBIR_NO_UNROLL(decode);
16666- stbir__simdi_load( i, input );
16667- stbir__simdi8_expand_u8_to_u32( o0, o1, i );
16668- stbir__simdi8_convert_i32_to_float( of0, o0 );
16669- stbir__simdi8_convert_i32_to_float( of1, o1 );
16670- stbir__simdf8_mult( of0, of0, STBIR_max_uint8_as_float_inverted8);
16671- stbir__simdf8_mult( of1, of1, STBIR_max_uint8_as_float_inverted8);
16672- stbir__decode_simdf8_flip( of0 );
16673- stbir__decode_simdf8_flip( of1 );
16674- stbir__simdf8_store( decode + 0, of0 );
16675- stbir__simdf8_store( decode + 8, of1 );
16676- #else
16677- stbir__simdi i, o0, o1, o2, o3;
16678- stbir__simdf of0, of1, of2, of3;
16679- STBIR_NO_UNROLL(decode);
16680- stbir__simdi_load( i, input );
16681- stbir__simdi_expand_u8_to_u32( o0,o1,o2,o3,i);
16682- stbir__simdi_convert_i32_to_float( of0, o0 );
16683- stbir__simdi_convert_i32_to_float( of1, o1 );
16684- stbir__simdi_convert_i32_to_float( of2, o2 );
16685- stbir__simdi_convert_i32_to_float( of3, o3 );
16686- stbir__simdf_mult( of0, of0, STBIR__CONSTF(STBIR_max_uint8_as_float_inverted) );
16687- stbir__simdf_mult( of1, of1, STBIR__CONSTF(STBIR_max_uint8_as_float_inverted) );
16688- stbir__simdf_mult( of2, of2, STBIR__CONSTF(STBIR_max_uint8_as_float_inverted) );
16689- stbir__simdf_mult( of3, of3, STBIR__CONSTF(STBIR_max_uint8_as_float_inverted) );
16690- stbir__decode_simdf4_flip( of0 );
16691- stbir__decode_simdf4_flip( of1 );
16692- stbir__decode_simdf4_flip( of2 );
16693- stbir__decode_simdf4_flip( of3 );
16694- stbir__simdf_store( decode + 0, of0 );
16695- stbir__simdf_store( decode + 4, of1 );
16696- stbir__simdf_store( decode + 8, of2 );
16697- stbir__simdf_store( decode + 12, of3 );
16698- #endif
16699- decode += 16;
16700- input += 16;
16701- if ( decode <= decode_end )
16702- continue;
16703- if ( decode == ( decode_end + 16 ) )
16704- break;
16705- decode = decode_end; // backup and do last couple
16706- input = end_input_m16;
16707- }
16708- return decode_end + 16;
16709- }
16710- #endif
16711-
16712- // try to do blocks of 4 when you can
16713- #if stbir__coder_min_num != 3 // doesn't divide cleanly by four
16714- decode += 4;
16715- STBIR_SIMD_NO_UNROLL_LOOP_START
16716- while( decode <= decode_end )
16717- {
16718- STBIR_SIMD_NO_UNROLL(decode);
16719- decode[0-4] = ((float)(input[stbir__decode_order0])) * stbir__max_uint8_as_float_inverted;
16720- decode[1-4] = ((float)(input[stbir__decode_order1])) * stbir__max_uint8_as_float_inverted;
16721- decode[2-4] = ((float)(input[stbir__decode_order2])) * stbir__max_uint8_as_float_inverted;
16722- decode[3-4] = ((float)(input[stbir__decode_order3])) * stbir__max_uint8_as_float_inverted;
16723- decode += 4;
16724- input += 4;
16725- }
16726- decode -= 4;
16727- #endif
16728-
16729- // do the remnants
16730- #if stbir__coder_min_num < 4
16731- STBIR_NO_UNROLL_LOOP_START
16732- while( decode < decode_end )
16733- {
16734- STBIR_NO_UNROLL(decode);
16735- decode[0] = ((float)(input[stbir__decode_order0])) * stbir__max_uint8_as_float_inverted;
16736- #if stbir__coder_min_num >= 2
16737- decode[1] = ((float)(input[stbir__decode_order1])) * stbir__max_uint8_as_float_inverted;
16738- #endif
16739- #if stbir__coder_min_num >= 3
16740- decode[2] = ((float)(input[stbir__decode_order2])) * stbir__max_uint8_as_float_inverted;
16741- #endif
16742- decode += stbir__coder_min_num;
16743- input += stbir__coder_min_num;
16744- }
16745- #endif
16746-
16747- return decode_end;
16748-}
16749-
16750-static void STBIR__CODER_NAME( stbir__encode_uint8_linear_scaled )( void * outputp, int width_times_channels, float const * encode )
16751-{
16752- unsigned char STBIR_SIMD_STREAMOUT_PTR( * ) output = (unsigned char *) outputp;
16753- unsigned char * end_output = ( (unsigned char *) output ) + width_times_channels;
16754-
16755- #ifdef STBIR_SIMD
16756- if ( width_times_channels >= stbir__simdfX_float_count*2 )
16757- {
16758- float const * end_encode_m8 = encode + width_times_channels - stbir__simdfX_float_count*2;
16759- end_output -= stbir__simdfX_float_count*2;
16760- STBIR_NO_UNROLL_LOOP_START_INF_FOR
16761- for(;;)
16762- {
16763- stbir__simdfX e0, e1;
16764- stbir__simdi i;
16765- STBIR_SIMD_NO_UNROLL(encode);
16766- stbir__simdfX_madd_mem( e0, STBIR_simd_point5X, STBIR_max_uint8_as_floatX, encode );
16767- stbir__simdfX_madd_mem( e1, STBIR_simd_point5X, STBIR_max_uint8_as_floatX, encode+stbir__simdfX_float_count );
16768- stbir__encode_simdfX_unflip( e0 );
16769- stbir__encode_simdfX_unflip( e1 );
16770- #ifdef STBIR_SIMD8
16771- stbir__simdf8_pack_to_16bytes( i, e0, e1 );
16772- stbir__simdi_store( output, i );
16773- #else
16774- stbir__simdf_pack_to_8bytes( i, e0, e1 );
16775- stbir__simdi_store2( output, i );
16776- #endif
16777- encode += stbir__simdfX_float_count*2;
16778- output += stbir__simdfX_float_count*2;
16779- if ( output <= end_output )
16780- continue;
16781- if ( output == ( end_output + stbir__simdfX_float_count*2 ) )
16782- break;
16783- output = end_output; // backup and do last couple
16784- encode = end_encode_m8;
16785- }
16786- return;
16787- }
16788-
16789- // try to do blocks of 4 when you can
16790- #if stbir__coder_min_num != 3 // doesn't divide cleanly by four
16791- output += 4;
16792- STBIR_NO_UNROLL_LOOP_START
16793- while( output <= end_output )
16794- {
16795- stbir__simdf e0;
16796- stbir__simdi i0;
16797- STBIR_NO_UNROLL(encode);
16798- stbir__simdf_load( e0, encode );
16799- stbir__simdf_madd( e0, STBIR__CONSTF(STBIR_simd_point5), STBIR__CONSTF(STBIR_max_uint8_as_float), e0 );
16800- stbir__encode_simdf4_unflip( e0 );
16801- stbir__simdf_pack_to_8bytes( i0, e0, e0 ); // only use first 4
16802- *(int*)(output-4) = stbir__simdi_to_int( i0 );
16803- output += 4;
16804- encode += 4;
16805- }
16806- output -= 4;
16807- #endif
16808-
16809- // do the remnants
16810- #if stbir__coder_min_num < 4
16811- STBIR_NO_UNROLL_LOOP_START
16812- while( output < end_output )
16813- {
16814- stbir__simdf e0;
16815- STBIR_NO_UNROLL(encode);
16816- stbir__simdf_madd1_mem( e0, STBIR__CONSTF(STBIR_simd_point5), STBIR__CONSTF(STBIR_max_uint8_as_float), encode+stbir__encode_order0 ); output[0] = stbir__simdf_convert_float_to_uint8( e0 );
16817- #if stbir__coder_min_num >= 2
16818- stbir__simdf_madd1_mem( e0, STBIR__CONSTF(STBIR_simd_point5), STBIR__CONSTF(STBIR_max_uint8_as_float), encode+stbir__encode_order1 ); output[1] = stbir__simdf_convert_float_to_uint8( e0 );
16819- #endif
16820- #if stbir__coder_min_num >= 3
16821- stbir__simdf_madd1_mem( e0, STBIR__CONSTF(STBIR_simd_point5), STBIR__CONSTF(STBIR_max_uint8_as_float), encode+stbir__encode_order2 ); output[2] = stbir__simdf_convert_float_to_uint8( e0 );
16822- #endif
16823- output += stbir__coder_min_num;
16824- encode += stbir__coder_min_num;
16825- }
16826- #endif
16827-
16828- #else
16829-
16830- // try to do blocks of 4 when you can
16831- #if stbir__coder_min_num != 3 // doesn't divide cleanly by four
16832- output += 4;
16833- while( output <= end_output )
16834- {
16835- float f;
16836- f = encode[stbir__encode_order0] * stbir__max_uint8_as_float + 0.5f; STBIR_CLAMP(f, 0, 255); output[0-4] = (unsigned char)f;
16837- f = encode[stbir__encode_order1] * stbir__max_uint8_as_float + 0.5f; STBIR_CLAMP(f, 0, 255); output[1-4] = (unsigned char)f;
16838- f = encode[stbir__encode_order2] * stbir__max_uint8_as_float + 0.5f; STBIR_CLAMP(f, 0, 255); output[2-4] = (unsigned char)f;
16839- f = encode[stbir__encode_order3] * stbir__max_uint8_as_float + 0.5f; STBIR_CLAMP(f, 0, 255); output[3-4] = (unsigned char)f;
16840- output += 4;
16841- encode += 4;
16842- }
16843- output -= 4;
16844- #endif
16845-
16846- // do the remnants
16847- #if stbir__coder_min_num < 4
16848- STBIR_NO_UNROLL_LOOP_START
16849- while( output < end_output )
16850- {
16851- float f;
16852- STBIR_NO_UNROLL(encode);
16853- f = encode[stbir__encode_order0] * stbir__max_uint8_as_float + 0.5f; STBIR_CLAMP(f, 0, 255); output[0] = (unsigned char)f;
16854- #if stbir__coder_min_num >= 2
16855- f = encode[stbir__encode_order1] * stbir__max_uint8_as_float + 0.5f; STBIR_CLAMP(f, 0, 255); output[1] = (unsigned char)f;
16856- #endif
16857- #if stbir__coder_min_num >= 3
16858- f = encode[stbir__encode_order2] * stbir__max_uint8_as_float + 0.5f; STBIR_CLAMP(f, 0, 255); output[2] = (unsigned char)f;
16859- #endif
16860- output += stbir__coder_min_num;
16861- encode += stbir__coder_min_num;
16862- }
16863- #endif
16864- #endif
16865-}
16866-
16867-static float * STBIR__CODER_NAME(stbir__decode_uint8_linear)( float * decodep, int width_times_channels, void const * inputp )
16868-{
16869- float STBIR_STREAMOUT_PTR( * ) decode = decodep;
16870- float * decode_end = (float*) decode + width_times_channels;
16871- unsigned char const * input = (unsigned char const*)inputp;
16872-
16873- #ifdef STBIR_SIMD
16874- unsigned char const * end_input_m16 = input + width_times_channels - 16;
16875- if ( width_times_channels >= 16 )
16876- {
16877- decode_end -= 16;
16878- STBIR_NO_UNROLL_LOOP_START_INF_FOR
16879- for(;;)
16880- {
16881- #ifdef STBIR_SIMD8
16882- stbir__simdi i; stbir__simdi8 o0,o1;
16883- stbir__simdf8 of0, of1;
16884- STBIR_NO_UNROLL(decode);
16885- stbir__simdi_load( i, input );
16886- stbir__simdi8_expand_u8_to_u32( o0, o1, i );
16887- stbir__simdi8_convert_i32_to_float( of0, o0 );
16888- stbir__simdi8_convert_i32_to_float( of1, o1 );
16889- stbir__decode_simdf8_flip( of0 );
16890- stbir__decode_simdf8_flip( of1 );
16891- stbir__simdf8_store( decode + 0, of0 );
16892- stbir__simdf8_store( decode + 8, of1 );
16893- #else
16894- stbir__simdi i, o0, o1, o2, o3;
16895- stbir__simdf of0, of1, of2, of3;
16896- STBIR_NO_UNROLL(decode);
16897- stbir__simdi_load( i, input );
16898- stbir__simdi_expand_u8_to_u32( o0,o1,o2,o3,i);
16899- stbir__simdi_convert_i32_to_float( of0, o0 );
16900- stbir__simdi_convert_i32_to_float( of1, o1 );
16901- stbir__simdi_convert_i32_to_float( of2, o2 );
16902- stbir__simdi_convert_i32_to_float( of3, o3 );
16903- stbir__decode_simdf4_flip( of0 );
16904- stbir__decode_simdf4_flip( of1 );
16905- stbir__decode_simdf4_flip( of2 );
16906- stbir__decode_simdf4_flip( of3 );
16907- stbir__simdf_store( decode + 0, of0 );
16908- stbir__simdf_store( decode + 4, of1 );
16909- stbir__simdf_store( decode + 8, of2 );
16910- stbir__simdf_store( decode + 12, of3 );
16911-#endif
16912- decode += 16;
16913- input += 16;
16914- if ( decode <= decode_end )
16915- continue;
16916- if ( decode == ( decode_end + 16 ) )
16917- break;
16918- decode = decode_end; // backup and do last couple
16919- input = end_input_m16;
16920- }
16921- return decode_end + 16;
16922- }
16923- #endif
16924-
16925- // try to do blocks of 4 when you can
16926- #if stbir__coder_min_num != 3 // doesn't divide cleanly by four
16927- decode += 4;
16928- STBIR_SIMD_NO_UNROLL_LOOP_START
16929- while( decode <= decode_end )
16930- {
16931- STBIR_SIMD_NO_UNROLL(decode);
16932- decode[0-4] = ((float)(input[stbir__decode_order0]));
16933- decode[1-4] = ((float)(input[stbir__decode_order1]));
16934- decode[2-4] = ((float)(input[stbir__decode_order2]));
16935- decode[3-4] = ((float)(input[stbir__decode_order3]));
16936- decode += 4;
16937- input += 4;
16938- }
16939- decode -= 4;
16940- #endif
16941-
16942- // do the remnants
16943- #if stbir__coder_min_num < 4
16944- STBIR_NO_UNROLL_LOOP_START
16945- while( decode < decode_end )
16946- {
16947- STBIR_NO_UNROLL(decode);
16948- decode[0] = ((float)(input[stbir__decode_order0]));
16949- #if stbir__coder_min_num >= 2
16950- decode[1] = ((float)(input[stbir__decode_order1]));
16951- #endif
16952- #if stbir__coder_min_num >= 3
16953- decode[2] = ((float)(input[stbir__decode_order2]));
16954- #endif
16955- decode += stbir__coder_min_num;
16956- input += stbir__coder_min_num;
16957- }
16958- #endif
16959- return decode_end;
16960-}
16961-
16962-static void STBIR__CODER_NAME( stbir__encode_uint8_linear )( void * outputp, int width_times_channels, float const * encode )
16963-{
16964- unsigned char STBIR_SIMD_STREAMOUT_PTR( * ) output = (unsigned char *) outputp;
16965- unsigned char * end_output = ( (unsigned char *) output ) + width_times_channels;
16966-
16967- #ifdef STBIR_SIMD
16968- if ( width_times_channels >= stbir__simdfX_float_count*2 )
16969- {
16970- float const * end_encode_m8 = encode + width_times_channels - stbir__simdfX_float_count*2;
16971- end_output -= stbir__simdfX_float_count*2;
16972- STBIR_SIMD_NO_UNROLL_LOOP_START_INF_FOR
16973- for(;;)
16974- {
16975- stbir__simdfX e0, e1;
16976- stbir__simdi i;
16977- STBIR_SIMD_NO_UNROLL(encode);
16978- stbir__simdfX_add_mem( e0, STBIR_simd_point5X, encode );
16979- stbir__simdfX_add_mem( e1, STBIR_simd_point5X, encode+stbir__simdfX_float_count );
16980- stbir__encode_simdfX_unflip( e0 );
16981- stbir__encode_simdfX_unflip( e1 );
16982- #ifdef STBIR_SIMD8
16983- stbir__simdf8_pack_to_16bytes( i, e0, e1 );
16984- stbir__simdi_store( output, i );
16985- #else
16986- stbir__simdf_pack_to_8bytes( i, e0, e1 );
16987- stbir__simdi_store2( output, i );
16988- #endif
16989- encode += stbir__simdfX_float_count*2;
16990- output += stbir__simdfX_float_count*2;
16991- if ( output <= end_output )
16992- continue;
16993- if ( output == ( end_output + stbir__simdfX_float_count*2 ) )
16994- break;
16995- output = end_output; // backup and do last couple
16996- encode = end_encode_m8;
16997- }
16998- return;
16999- }
17000-
17001- // try to do blocks of 4 when you can
17002- #if stbir__coder_min_num != 3 // doesn't divide cleanly by four
17003- output += 4;
17004- STBIR_NO_UNROLL_LOOP_START
17005- while( output <= end_output )
17006- {
17007- stbir__simdf e0;
17008- stbir__simdi i0;
17009- STBIR_NO_UNROLL(encode);
17010- stbir__simdf_load( e0, encode );
17011- stbir__simdf_add( e0, STBIR__CONSTF(STBIR_simd_point5), e0 );
17012- stbir__encode_simdf4_unflip( e0 );
17013- stbir__simdf_pack_to_8bytes( i0, e0, e0 ); // only use first 4
17014- *(int*)(output-4) = stbir__simdi_to_int( i0 );
17015- output += 4;
17016- encode += 4;
17017- }
17018- output -= 4;
17019- #endif
17020-
17021- #else
17022-
17023- // try to do blocks of 4 when you can
17024- #if stbir__coder_min_num != 3 // doesn't divide cleanly by four
17025- output += 4;
17026- while( output <= end_output )
17027- {
17028- float f;
17029- f = encode[stbir__encode_order0] + 0.5f; STBIR_CLAMP(f, 0, 255); output[0-4] = (unsigned char)f;
17030- f = encode[stbir__encode_order1] + 0.5f; STBIR_CLAMP(f, 0, 255); output[1-4] = (unsigned char)f;
17031- f = encode[stbir__encode_order2] + 0.5f; STBIR_CLAMP(f, 0, 255); output[2-4] = (unsigned char)f;
17032- f = encode[stbir__encode_order3] + 0.5f; STBIR_CLAMP(f, 0, 255); output[3-4] = (unsigned char)f;
17033- output += 4;
17034- encode += 4;
17035- }
17036- output -= 4;
17037- #endif
17038-
17039- #endif
17040-
17041- // do the remnants
17042- #if stbir__coder_min_num < 4
17043- STBIR_NO_UNROLL_LOOP_START
17044- while( output < end_output )
17045- {
17046- float f;
17047- STBIR_NO_UNROLL(encode);
17048- f = encode[stbir__encode_order0] + 0.5f; STBIR_CLAMP(f, 0, 255); output[0] = (unsigned char)f;
17049- #if stbir__coder_min_num >= 2
17050- f = encode[stbir__encode_order1] + 0.5f; STBIR_CLAMP(f, 0, 255); output[1] = (unsigned char)f;
17051- #endif
17052- #if stbir__coder_min_num >= 3
17053- f = encode[stbir__encode_order2] + 0.5f; STBIR_CLAMP(f, 0, 255); output[2] = (unsigned char)f;
17054- #endif
17055- output += stbir__coder_min_num;
17056- encode += stbir__coder_min_num;
17057- }
17058- #endif
17059-}
17060-
17061-static float * STBIR__CODER_NAME(stbir__decode_uint8_srgb)( float * decodep, int width_times_channels, void const * inputp )
17062-{
17063- float STBIR_STREAMOUT_PTR( * ) decode = decodep;
17064- float * decode_end = (float*) decode + width_times_channels;
17065- unsigned char const * input = (unsigned char const *)inputp;
17066-
17067- // try to do blocks of 4 when you can
17068- #if stbir__coder_min_num != 3 // doesn't divide cleanly by four
17069- decode += 4;
17070- while( decode <= decode_end )
17071- {
17072- decode[0-4] = stbir__srgb_uchar_to_linear_float[ input[ stbir__decode_order0 ] ];
17073- decode[1-4] = stbir__srgb_uchar_to_linear_float[ input[ stbir__decode_order1 ] ];
17074- decode[2-4] = stbir__srgb_uchar_to_linear_float[ input[ stbir__decode_order2 ] ];
17075- decode[3-4] = stbir__srgb_uchar_to_linear_float[ input[ stbir__decode_order3 ] ];
17076- decode += 4;
17077- input += 4;
17078- }
17079- decode -= 4;
17080- #endif
17081-
17082- // do the remnants
17083- #if stbir__coder_min_num < 4
17084- STBIR_NO_UNROLL_LOOP_START
17085- while( decode < decode_end )
17086- {
17087- STBIR_NO_UNROLL(decode);
17088- decode[0] = stbir__srgb_uchar_to_linear_float[ input[ stbir__decode_order0 ] ];
17089- #if stbir__coder_min_num >= 2
17090- decode[1] = stbir__srgb_uchar_to_linear_float[ input[ stbir__decode_order1 ] ];
17091- #endif
17092- #if stbir__coder_min_num >= 3
17093- decode[2] = stbir__srgb_uchar_to_linear_float[ input[ stbir__decode_order2 ] ];
17094- #endif
17095- decode += stbir__coder_min_num;
17096- input += stbir__coder_min_num;
17097- }
17098- #endif
17099- return decode_end;
17100-}
17101-
17102-#define stbir__min_max_shift20( i, f ) \
17103- stbir__simdf_max( f, f, stbir_simdf_casti(STBIR__CONSTI( STBIR_almost_zero )) ); \
17104- stbir__simdf_min( f, f, stbir_simdf_casti(STBIR__CONSTI( STBIR_almost_one )) ); \
17105- stbir__simdi_32shr( i, stbir_simdi_castf( f ), 20 );
17106-
17107-#define stbir__scale_and_convert( i, f ) \
17108- stbir__simdf_madd( f, STBIR__CONSTF( STBIR_simd_point5 ), STBIR__CONSTF( STBIR_max_uint8_as_float ), f ); \
17109- stbir__simdf_max( f, f, stbir__simdf_zeroP() ); \
17110- stbir__simdf_min( f, f, STBIR__CONSTF( STBIR_max_uint8_as_float ) ); \
17111- stbir__simdf_convert_float_to_i32( i, f );
17112-
17113-#define stbir__linear_to_srgb_finish( i, f ) \
17114-{ \
17115- stbir__simdi temp; \
17116- stbir__simdi_32shr( temp, stbir_simdi_castf( f ), 12 ) ; \
17117- stbir__simdi_and( temp, temp, STBIR__CONSTI(STBIR_mastissa_mask) ); \
17118- stbir__simdi_or( temp, temp, STBIR__CONSTI(STBIR_topscale) ); \
17119- stbir__simdi_16madd( i, i, temp ); \
17120- stbir__simdi_32shr( i, i, 16 ); \
17121-}
17122-
17123-#define stbir__simdi_table_lookup2( v0,v1, table ) \
17124-{ \
17125- stbir__simdi_u32 temp0,temp1; \
17126- temp0.m128i_i128 = v0; \
17127- temp1.m128i_i128 = v1; \
17128- temp0.m128i_u32[0] = table[temp0.m128i_i32[0]]; temp0.m128i_u32[1] = table[temp0.m128i_i32[1]]; temp0.m128i_u32[2] = table[temp0.m128i_i32[2]]; temp0.m128i_u32[3] = table[temp0.m128i_i32[3]]; \
17129- temp1.m128i_u32[0] = table[temp1.m128i_i32[0]]; temp1.m128i_u32[1] = table[temp1.m128i_i32[1]]; temp1.m128i_u32[2] = table[temp1.m128i_i32[2]]; temp1.m128i_u32[3] = table[temp1.m128i_i32[3]]; \
17130- v0 = temp0.m128i_i128; \
17131- v1 = temp1.m128i_i128; \
17132-}
17133-
17134-#define stbir__simdi_table_lookup3( v0,v1,v2, table ) \
17135-{ \
17136- stbir__simdi_u32 temp0,temp1,temp2; \
17137- temp0.m128i_i128 = v0; \
17138- temp1.m128i_i128 = v1; \
17139- temp2.m128i_i128 = v2; \
17140- temp0.m128i_u32[0] = table[temp0.m128i_i32[0]]; temp0.m128i_u32[1] = table[temp0.m128i_i32[1]]; temp0.m128i_u32[2] = table[temp0.m128i_i32[2]]; temp0.m128i_u32[3] = table[temp0.m128i_i32[3]]; \
17141- temp1.m128i_u32[0] = table[temp1.m128i_i32[0]]; temp1.m128i_u32[1] = table[temp1.m128i_i32[1]]; temp1.m128i_u32[2] = table[temp1.m128i_i32[2]]; temp1.m128i_u32[3] = table[temp1.m128i_i32[3]]; \
17142- temp2.m128i_u32[0] = table[temp2.m128i_i32[0]]; temp2.m128i_u32[1] = table[temp2.m128i_i32[1]]; temp2.m128i_u32[2] = table[temp2.m128i_i32[2]]; temp2.m128i_u32[3] = table[temp2.m128i_i32[3]]; \
17143- v0 = temp0.m128i_i128; \
17144- v1 = temp1.m128i_i128; \
17145- v2 = temp2.m128i_i128; \
17146-}
17147-
17148-#define stbir__simdi_table_lookup4( v0,v1,v2,v3, table ) \
17149-{ \
17150- stbir__simdi_u32 temp0,temp1,temp2,temp3; \
17151- temp0.m128i_i128 = v0; \
17152- temp1.m128i_i128 = v1; \
17153- temp2.m128i_i128 = v2; \
17154- temp3.m128i_i128 = v3; \
17155- temp0.m128i_u32[0] = table[temp0.m128i_i32[0]]; temp0.m128i_u32[1] = table[temp0.m128i_i32[1]]; temp0.m128i_u32[2] = table[temp0.m128i_i32[2]]; temp0.m128i_u32[3] = table[temp0.m128i_i32[3]]; \
17156- temp1.m128i_u32[0] = table[temp1.m128i_i32[0]]; temp1.m128i_u32[1] = table[temp1.m128i_i32[1]]; temp1.m128i_u32[2] = table[temp1.m128i_i32[2]]; temp1.m128i_u32[3] = table[temp1.m128i_i32[3]]; \
17157- temp2.m128i_u32[0] = table[temp2.m128i_i32[0]]; temp2.m128i_u32[1] = table[temp2.m128i_i32[1]]; temp2.m128i_u32[2] = table[temp2.m128i_i32[2]]; temp2.m128i_u32[3] = table[temp2.m128i_i32[3]]; \
17158- temp3.m128i_u32[0] = table[temp3.m128i_i32[0]]; temp3.m128i_u32[1] = table[temp3.m128i_i32[1]]; temp3.m128i_u32[2] = table[temp3.m128i_i32[2]]; temp3.m128i_u32[3] = table[temp3.m128i_i32[3]]; \
17159- v0 = temp0.m128i_i128; \
17160- v1 = temp1.m128i_i128; \
17161- v2 = temp2.m128i_i128; \
17162- v3 = temp3.m128i_i128; \
17163-}
17164-
17165-static void STBIR__CODER_NAME( stbir__encode_uint8_srgb )( void * outputp, int width_times_channels, float const * encode )
17166-{
17167- unsigned char STBIR_SIMD_STREAMOUT_PTR( * ) output = (unsigned char*) outputp;
17168- unsigned char * end_output = ( (unsigned char*) output ) + width_times_channels;
17169-
17170- #ifdef STBIR_SIMD
17171-
17172- if ( width_times_channels >= 16 )
17173- {
17174- float const * end_encode_m16 = encode + width_times_channels - 16;
17175- end_output -= 16;
17176- STBIR_SIMD_NO_UNROLL_LOOP_START_INF_FOR
17177- for(;;)
17178- {
17179- stbir__simdf f0, f1, f2, f3;
17180- stbir__simdi i0, i1, i2, i3;
17181- STBIR_SIMD_NO_UNROLL(encode);
17182-
17183- stbir__simdf_load4_transposed( f0, f1, f2, f3, encode );
17184-
17185- stbir__min_max_shift20( i0, f0 );
17186- stbir__min_max_shift20( i1, f1 );
17187- stbir__min_max_shift20( i2, f2 );
17188- stbir__min_max_shift20( i3, f3 );
17189-
17190- stbir__simdi_table_lookup4( i0, i1, i2, i3, ( fp32_to_srgb8_tab4 - (127-13)*8 ) );
17191-
17192- stbir__linear_to_srgb_finish( i0, f0 );
17193- stbir__linear_to_srgb_finish( i1, f1 );
17194- stbir__linear_to_srgb_finish( i2, f2 );
17195- stbir__linear_to_srgb_finish( i3, f3 );
17196-
17197- stbir__interleave_pack_and_store_16_u8( output, STBIR_strs_join1(i, ,stbir__encode_order0), STBIR_strs_join1(i, ,stbir__encode_order1), STBIR_strs_join1(i, ,stbir__encode_order2), STBIR_strs_join1(i, ,stbir__encode_order3) );
17198-
17199- encode += 16;
17200- output += 16;
17201- if ( output <= end_output )
17202- continue;
17203- if ( output == ( end_output + 16 ) )
17204- break;
17205- output = end_output; // backup and do last couple
17206- encode = end_encode_m16;
17207- }
17208- return;
17209- }
17210- #endif
17211-
17212- // try to do blocks of 4 when you can
17213- #if stbir__coder_min_num != 3 // doesn't divide cleanly by four
17214- output += 4;
17215- STBIR_SIMD_NO_UNROLL_LOOP_START
17216- while ( output <= end_output )
17217- {
17218- STBIR_SIMD_NO_UNROLL(encode);
17219-
17220- output[0-4] = stbir__linear_to_srgb_uchar( encode[stbir__encode_order0] );
17221- output[1-4] = stbir__linear_to_srgb_uchar( encode[stbir__encode_order1] );
17222- output[2-4] = stbir__linear_to_srgb_uchar( encode[stbir__encode_order2] );
17223- output[3-4] = stbir__linear_to_srgb_uchar( encode[stbir__encode_order3] );
17224-
17225- output += 4;
17226- encode += 4;
17227- }
17228- output -= 4;
17229- #endif
17230-
17231- // do the remnants
17232- #if stbir__coder_min_num < 4
17233- STBIR_NO_UNROLL_LOOP_START
17234- while( output < end_output )
17235- {
17236- STBIR_NO_UNROLL(encode);
17237- output[0] = stbir__linear_to_srgb_uchar( encode[stbir__encode_order0] );
17238- #if stbir__coder_min_num >= 2
17239- output[1] = stbir__linear_to_srgb_uchar( encode[stbir__encode_order1] );
17240- #endif
17241- #if stbir__coder_min_num >= 3
17242- output[2] = stbir__linear_to_srgb_uchar( encode[stbir__encode_order2] );
17243- #endif
17244- output += stbir__coder_min_num;
17245- encode += stbir__coder_min_num;
17246- }
17247- #endif
17248-}
17249-
17250-#if ( stbir__coder_min_num == 4 ) || ( ( stbir__coder_min_num == 1 ) && ( !defined(stbir__decode_swizzle) ) )
17251-
17252-static float * STBIR__CODER_NAME(stbir__decode_uint8_srgb4_linearalpha)( float * decodep, int width_times_channels, void const * inputp )
17253-{
17254- float STBIR_STREAMOUT_PTR( * ) decode = decodep;
17255- float * decode_end = (float*) decode + width_times_channels;
17256- unsigned char const * input = (unsigned char const *)inputp;
17257-
17258- do {
17259- decode[0] = stbir__srgb_uchar_to_linear_float[ input[stbir__decode_order0] ];
17260- decode[1] = stbir__srgb_uchar_to_linear_float[ input[stbir__decode_order1] ];
17261- decode[2] = stbir__srgb_uchar_to_linear_float[ input[stbir__decode_order2] ];
17262- decode[3] = ( (float) input[stbir__decode_order3] ) * stbir__max_uint8_as_float_inverted;
17263- input += 4;
17264- decode += 4;
17265- } while( decode < decode_end );
17266- return decode_end;
17267-}
17268-
17269-
17270-static void STBIR__CODER_NAME( stbir__encode_uint8_srgb4_linearalpha )( void * outputp, int width_times_channels, float const * encode )
17271-{
17272- unsigned char STBIR_SIMD_STREAMOUT_PTR( * ) output = (unsigned char*) outputp;
17273- unsigned char * end_output = ( (unsigned char*) output ) + width_times_channels;
17274-
17275- #ifdef STBIR_SIMD
17276-
17277- if ( width_times_channels >= 16 )
17278- {
17279- float const * end_encode_m16 = encode + width_times_channels - 16;
17280- end_output -= 16;
17281- STBIR_SIMD_NO_UNROLL_LOOP_START_INF_FOR
17282- for(;;)
17283- {
17284- stbir__simdf f0, f1, f2, f3;
17285- stbir__simdi i0, i1, i2, i3;
17286-
17287- STBIR_SIMD_NO_UNROLL(encode);
17288- stbir__simdf_load4_transposed( f0, f1, f2, f3, encode );
17289-
17290- stbir__min_max_shift20( i0, f0 );
17291- stbir__min_max_shift20( i1, f1 );
17292- stbir__min_max_shift20( i2, f2 );
17293- stbir__scale_and_convert( i3, f3 );
17294-
17295- stbir__simdi_table_lookup3( i0, i1, i2, ( fp32_to_srgb8_tab4 - (127-13)*8 ) );
17296-
17297- stbir__linear_to_srgb_finish( i0, f0 );
17298- stbir__linear_to_srgb_finish( i1, f1 );
17299- stbir__linear_to_srgb_finish( i2, f2 );
17300-
17301- stbir__interleave_pack_and_store_16_u8( output, STBIR_strs_join1(i, ,stbir__encode_order0), STBIR_strs_join1(i, ,stbir__encode_order1), STBIR_strs_join1(i, ,stbir__encode_order2), STBIR_strs_join1(i, ,stbir__encode_order3) );
17302-
17303- output += 16;
17304- encode += 16;
17305-
17306- if ( output <= end_output )
17307- continue;
17308- if ( output == ( end_output + 16 ) )
17309- break;
17310- output = end_output; // backup and do last couple
17311- encode = end_encode_m16;
17312- }
17313- return;
17314- }
17315- #endif
17316-
17317- STBIR_SIMD_NO_UNROLL_LOOP_START
17318- do {
17319- float f;
17320- STBIR_SIMD_NO_UNROLL(encode);
17321-
17322- output[stbir__decode_order0] = stbir__linear_to_srgb_uchar( encode[0] );
17323- output[stbir__decode_order1] = stbir__linear_to_srgb_uchar( encode[1] );
17324- output[stbir__decode_order2] = stbir__linear_to_srgb_uchar( encode[2] );
17325-
17326- f = encode[3] * stbir__max_uint8_as_float + 0.5f;
17327- STBIR_CLAMP(f, 0, 255);
17328- output[stbir__decode_order3] = (unsigned char) f;
17329+#define stbir__encode_simdfX_unflip stbir__encode_simdf4_unflip
17330+#endif
17331
17332- output += 4;
17333- encode += 4;
17334- } while( output < end_output );
17335-}
17336+static float *
17337+STBIR__CODER_NAME(stbir__decode_uint8_linear_scaled)(float *decodep,
17338+ int width_times_channels,
17339+ void const *inputp)
17340+{
17341+ float STBIR_STREAMOUT_PTR(*) decode = decodep;
17342+ float *decode_end = (float *)decode + width_times_channels;
17343+ unsigned char const *input = (unsigned char const *)inputp;
17344+
17345+#ifdef STBIR_SIMD
17346+ unsigned char const *end_input_m16 = input + width_times_channels - 16;
17347+ if (width_times_channels >= 16) {
17348+ decode_end -= 16;
17349+ STBIR_NO_UNROLL_LOOP_START_INF_FOR
17350+ for (;;) {
17351+#ifdef STBIR_SIMD8
17352+ stbir__simdi i;
17353+ stbir__simdi8 o0, o1;
17354+ stbir__simdf8 of0, of1;
17355+ STBIR_NO_UNROLL(decode);
17356+ stbir__simdi_load(i, input);
17357+ stbir__simdi8_expand_u8_to_u32(o0, o1, i);
17358+ stbir__simdi8_convert_i32_to_float(of0, o0);
17359+ stbir__simdi8_convert_i32_to_float(of1, o1);
17360+ stbir__simdf8_mult(of0, of0, STBIR_max_uint8_as_float_inverted8);
17361+ stbir__simdf8_mult(of1, of1, STBIR_max_uint8_as_float_inverted8);
17362+ stbir__decode_simdf8_flip(of0);
17363+ stbir__decode_simdf8_flip(of1);
17364+ stbir__simdf8_store(decode + 0, of0);
17365+ stbir__simdf8_store(decode + 8, of1);
17366+#else
17367+ stbir__simdi i, o0, o1, o2, o3;
17368+ stbir__simdf of0, of1, of2, of3;
17369+ STBIR_NO_UNROLL(decode);
17370+ stbir__simdi_load(i, input);
17371+ stbir__simdi_expand_u8_to_u32(o0, o1, o2, o3, i);
17372+ stbir__simdi_convert_i32_to_float(of0, o0);
17373+ stbir__simdi_convert_i32_to_float(of1, o1);
17374+ stbir__simdi_convert_i32_to_float(of2, o2);
17375+ stbir__simdi_convert_i32_to_float(of3, o3);
17376+ stbir__simdf_mult(of0, of0,
17377+ STBIR__CONSTF(STBIR_max_uint8_as_float_inverted));
17378+ stbir__simdf_mult(of1, of1,
17379+ STBIR__CONSTF(STBIR_max_uint8_as_float_inverted));
17380+ stbir__simdf_mult(of2, of2,
17381+ STBIR__CONSTF(STBIR_max_uint8_as_float_inverted));
17382+ stbir__simdf_mult(of3, of3,
17383+ STBIR__CONSTF(STBIR_max_uint8_as_float_inverted));
17384+ stbir__decode_simdf4_flip(of0);
17385+ stbir__decode_simdf4_flip(of1);
17386+ stbir__decode_simdf4_flip(of2);
17387+ stbir__decode_simdf4_flip(of3);
17388+ stbir__simdf_store(decode + 0, of0);
17389+ stbir__simdf_store(decode + 4, of1);
17390+ stbir__simdf_store(decode + 8, of2);
17391+ stbir__simdf_store(decode + 12, of3);
17392+#endif
17393+ decode += 16;
17394+ input += 16;
17395+ if (decode <= decode_end) {
17396+ continue;
17397+ }
17398+ if (decode == (decode_end + 16)) {
17399+ break;
17400+ }
17401+ decode = decode_end; // backup and do last couple
17402+ input = end_input_m16;
17403+ }
17404+ return decode_end + 16;
17405+ }
17406+#endif
17407+
17408+// try to do blocks of 4 when you can
17409+#if stbir__coder_min_num != 3 // doesn't divide cleanly by four
17410+ decode += 4;
17411+ STBIR_SIMD_NO_UNROLL_LOOP_START
17412+ while (decode <= decode_end) {
17413+ STBIR_SIMD_NO_UNROLL(decode);
17414+ decode[0 - 4] = ((float)(input[stbir__decode_order0])) *
17415+ stbir__max_uint8_as_float_inverted;
17416+ decode[1 - 4] = ((float)(input[stbir__decode_order1])) *
17417+ stbir__max_uint8_as_float_inverted;
17418+ decode[2 - 4] = ((float)(input[stbir__decode_order2])) *
17419+ stbir__max_uint8_as_float_inverted;
17420+ decode[3 - 4] = ((float)(input[stbir__decode_order3])) *
17421+ stbir__max_uint8_as_float_inverted;
17422+ decode += 4;
17423+ input += 4;
17424+ }
17425+ decode -= 4;
17426+#endif
17427
17428+// do the remnants
17429+#if stbir__coder_min_num < 4
17430+ STBIR_NO_UNROLL_LOOP_START
17431+ while (decode < decode_end) {
17432+ STBIR_NO_UNROLL(decode);
17433+ decode[0] = ((float)(input[stbir__decode_order0])) *
17434+ stbir__max_uint8_as_float_inverted;
17435+#if stbir__coder_min_num >= 2
17436+ decode[1] = ((float)(input[stbir__decode_order1])) *
17437+ stbir__max_uint8_as_float_inverted;
17438+#endif
17439+#if stbir__coder_min_num >= 3
17440+ decode[2] = ((float)(input[stbir__decode_order2])) *
17441+ stbir__max_uint8_as_float_inverted;
17442+#endif
17443+ decode += stbir__coder_min_num;
17444+ input += stbir__coder_min_num;
17445+ }
17446 #endif
17447
17448-#if ( stbir__coder_min_num == 2 ) || ( ( stbir__coder_min_num == 1 ) && ( !defined(stbir__decode_swizzle) ) )
17449+ return decode_end;
17450+}
17451
17452-static float * STBIR__CODER_NAME(stbir__decode_uint8_srgb2_linearalpha)( float * decodep, int width_times_channels, void const * inputp )
17453+static void
17454+STBIR__CODER_NAME(stbir__encode_uint8_linear_scaled)(void *outputp,
17455+ int width_times_channels,
17456+ float const *encode)
17457 {
17458- float STBIR_STREAMOUT_PTR( * ) decode = decodep;
17459- float * decode_end = (float*) decode + width_times_channels;
17460- unsigned char const * input = (unsigned char const *)inputp;
17461+ unsigned char STBIR_SIMD_STREAMOUT_PTR(*) output = (unsigned char *)outputp;
17462+ unsigned char *end_output =
17463+ ((unsigned char *)output) + width_times_channels;
17464
17465- decode += 4;
17466- while( decode <= decode_end )
17467- {
17468- decode[0-4] = stbir__srgb_uchar_to_linear_float[ input[stbir__decode_order0] ];
17469- decode[1-4] = ( (float) input[stbir__decode_order1] ) * stbir__max_uint8_as_float_inverted;
17470- decode[2-4] = stbir__srgb_uchar_to_linear_float[ input[stbir__decode_order0+2] ];
17471- decode[3-4] = ( (float) input[stbir__decode_order1+2] ) * stbir__max_uint8_as_float_inverted;
17472- input += 4;
17473- decode += 4;
17474- }
17475- decode -= 4;
17476- if( decode < decode_end )
17477- {
17478- decode[0] = stbir__srgb_uchar_to_linear_float[ stbir__decode_order0 ];
17479- decode[1] = ( (float) input[stbir__decode_order1] ) * stbir__max_uint8_as_float_inverted;
17480- }
17481- return decode_end;
17482-}
17483-
17484-static void STBIR__CODER_NAME( stbir__encode_uint8_srgb2_linearalpha )( void * outputp, int width_times_channels, float const * encode )
17485-{
17486- unsigned char STBIR_SIMD_STREAMOUT_PTR( * ) output = (unsigned char*) outputp;
17487- unsigned char * end_output = ( (unsigned char*) output ) + width_times_channels;
17488-
17489- #ifdef STBIR_SIMD
17490-
17491- if ( width_times_channels >= 16 )
17492- {
17493- float const * end_encode_m16 = encode + width_times_channels - 16;
17494- end_output -= 16;
17495- STBIR_SIMD_NO_UNROLL_LOOP_START_INF_FOR
17496- for(;;)
17497- {
17498- stbir__simdf f0, f1, f2, f3;
17499- stbir__simdi i0, i1, i2, i3;
17500-
17501- STBIR_SIMD_NO_UNROLL(encode);
17502- stbir__simdf_load4_transposed( f0, f1, f2, f3, encode );
17503-
17504- stbir__min_max_shift20( i0, f0 );
17505- stbir__scale_and_convert( i1, f1 );
17506- stbir__min_max_shift20( i2, f2 );
17507- stbir__scale_and_convert( i3, f3 );
17508-
17509- stbir__simdi_table_lookup2( i0, i2, ( fp32_to_srgb8_tab4 - (127-13)*8 ) );
17510-
17511- stbir__linear_to_srgb_finish( i0, f0 );
17512- stbir__linear_to_srgb_finish( i2, f2 );
17513-
17514- stbir__interleave_pack_and_store_16_u8( output, STBIR_strs_join1(i, ,stbir__encode_order0), STBIR_strs_join1(i, ,stbir__encode_order1), STBIR_strs_join1(i, ,stbir__encode_order2), STBIR_strs_join1(i, ,stbir__encode_order3) );
17515-
17516- output += 16;
17517- encode += 16;
17518- if ( output <= end_output )
17519- continue;
17520- if ( output == ( end_output + 16 ) )
17521- break;
17522- output = end_output; // backup and do last couple
17523- encode = end_encode_m16;
17524- }
17525- return;
17526- }
17527- #endif
17528-
17529- STBIR_SIMD_NO_UNROLL_LOOP_START
17530- do {
17531- float f;
17532- STBIR_SIMD_NO_UNROLL(encode);
17533-
17534- output[stbir__decode_order0] = stbir__linear_to_srgb_uchar( encode[0] );
17535-
17536- f = encode[1] * stbir__max_uint8_as_float + 0.5f;
17537- STBIR_CLAMP(f, 0, 255);
17538- output[stbir__decode_order1] = (unsigned char) f;
17539-
17540- output += 2;
17541- encode += 2;
17542- } while( output < end_output );
17543-}
17544-
17545-#endif
17546-
17547-static float * STBIR__CODER_NAME(stbir__decode_uint16_linear_scaled)( float * decodep, int width_times_channels, void const * inputp )
17548-{
17549- float STBIR_STREAMOUT_PTR( * ) decode = decodep;
17550- float * decode_end = (float*) decode + width_times_channels;
17551- unsigned short const * input = (unsigned short const *)inputp;
17552-
17553- #ifdef STBIR_SIMD
17554- unsigned short const * end_input_m8 = input + width_times_channels - 8;
17555- if ( width_times_channels >= 8 )
17556- {
17557- decode_end -= 8;
17558- STBIR_NO_UNROLL_LOOP_START_INF_FOR
17559- for(;;)
17560- {
17561- #ifdef STBIR_SIMD8
17562- stbir__simdi i; stbir__simdi8 o;
17563- stbir__simdf8 of;
17564- STBIR_NO_UNROLL(decode);
17565- stbir__simdi_load( i, input );
17566- stbir__simdi8_expand_u16_to_u32( o, i );
17567- stbir__simdi8_convert_i32_to_float( of, o );
17568- stbir__simdf8_mult( of, of, STBIR_max_uint16_as_float_inverted8);
17569- stbir__decode_simdf8_flip( of );
17570- stbir__simdf8_store( decode + 0, of );
17571- #else
17572- stbir__simdi i, o0, o1;
17573- stbir__simdf of0, of1;
17574- STBIR_NO_UNROLL(decode);
17575- stbir__simdi_load( i, input );
17576- stbir__simdi_expand_u16_to_u32( o0,o1,i );
17577- stbir__simdi_convert_i32_to_float( of0, o0 );
17578- stbir__simdi_convert_i32_to_float( of1, o1 );
17579- stbir__simdf_mult( of0, of0, STBIR__CONSTF(STBIR_max_uint16_as_float_inverted) );
17580- stbir__simdf_mult( of1, of1, STBIR__CONSTF(STBIR_max_uint16_as_float_inverted));
17581- stbir__decode_simdf4_flip( of0 );
17582- stbir__decode_simdf4_flip( of1 );
17583- stbir__simdf_store( decode + 0, of0 );
17584- stbir__simdf_store( decode + 4, of1 );
17585- #endif
17586- decode += 8;
17587- input += 8;
17588- if ( decode <= decode_end )
17589- continue;
17590- if ( decode == ( decode_end + 8 ) )
17591- break;
17592- decode = decode_end; // backup and do last couple
17593- input = end_input_m8;
17594- }
17595- return decode_end + 8;
17596- }
17597- #endif
17598-
17599- // try to do blocks of 4 when you can
17600- #if stbir__coder_min_num != 3 // doesn't divide cleanly by four
17601- decode += 4;
17602- STBIR_SIMD_NO_UNROLL_LOOP_START
17603- while( decode <= decode_end )
17604- {
17605- STBIR_SIMD_NO_UNROLL(decode);
17606- decode[0-4] = ((float)(input[stbir__decode_order0])) * stbir__max_uint16_as_float_inverted;
17607- decode[1-4] = ((float)(input[stbir__decode_order1])) * stbir__max_uint16_as_float_inverted;
17608- decode[2-4] = ((float)(input[stbir__decode_order2])) * stbir__max_uint16_as_float_inverted;
17609- decode[3-4] = ((float)(input[stbir__decode_order3])) * stbir__max_uint16_as_float_inverted;
17610- decode += 4;
17611- input += 4;
17612- }
17613- decode -= 4;
17614- #endif
17615-
17616- // do the remnants
17617- #if stbir__coder_min_num < 4
17618- STBIR_NO_UNROLL_LOOP_START
17619- while( decode < decode_end )
17620- {
17621- STBIR_NO_UNROLL(decode);
17622- decode[0] = ((float)(input[stbir__decode_order0])) * stbir__max_uint16_as_float_inverted;
17623- #if stbir__coder_min_num >= 2
17624- decode[1] = ((float)(input[stbir__decode_order1])) * stbir__max_uint16_as_float_inverted;
17625- #endif
17626- #if stbir__coder_min_num >= 3
17627- decode[2] = ((float)(input[stbir__decode_order2])) * stbir__max_uint16_as_float_inverted;
17628- #endif
17629- decode += stbir__coder_min_num;
17630- input += stbir__coder_min_num;
17631- }
17632- #endif
17633- return decode_end;
17634-}
17635-
17636-
17637-static void STBIR__CODER_NAME(stbir__encode_uint16_linear_scaled)( void * outputp, int width_times_channels, float const * encode )
17638-{
17639- unsigned short STBIR_SIMD_STREAMOUT_PTR( * ) output = (unsigned short*) outputp;
17640- unsigned short * end_output = ( (unsigned short*) output ) + width_times_channels;
17641-
17642- #ifdef STBIR_SIMD
17643- {
17644- if ( width_times_channels >= stbir__simdfX_float_count*2 )
17645- {
17646- float const * end_encode_m8 = encode + width_times_channels - stbir__simdfX_float_count*2;
17647- end_output -= stbir__simdfX_float_count*2;
17648- STBIR_SIMD_NO_UNROLL_LOOP_START_INF_FOR
17649- for(;;)
17650- {
17651- stbir__simdfX e0, e1;
17652- stbir__simdiX i;
17653- STBIR_SIMD_NO_UNROLL(encode);
17654- stbir__simdfX_madd_mem( e0, STBIR_simd_point5X, STBIR_max_uint16_as_floatX, encode );
17655- stbir__simdfX_madd_mem( e1, STBIR_simd_point5X, STBIR_max_uint16_as_floatX, encode+stbir__simdfX_float_count );
17656- stbir__encode_simdfX_unflip( e0 );
17657- stbir__encode_simdfX_unflip( e1 );
17658- stbir__simdfX_pack_to_words( i, e0, e1 );
17659- stbir__simdiX_store( output, i );
17660- encode += stbir__simdfX_float_count*2;
17661- output += stbir__simdfX_float_count*2;
17662- if ( output <= end_output )
17663- continue;
17664- if ( output == ( end_output + stbir__simdfX_float_count*2 ) )
17665- break;
17666- output = end_output; // backup and do last couple
17667- encode = end_encode_m8;
17668- }
17669- return;
17670- }
17671- }
17672-
17673- // try to do blocks of 4 when you can
17674- #if stbir__coder_min_num != 3 // doesn't divide cleanly by four
17675- output += 4;
17676- STBIR_NO_UNROLL_LOOP_START
17677- while( output <= end_output )
17678- {
17679- stbir__simdf e;
17680- stbir__simdi i;
17681- STBIR_NO_UNROLL(encode);
17682- stbir__simdf_load( e, encode );
17683- stbir__simdf_madd( e, STBIR__CONSTF(STBIR_simd_point5), STBIR__CONSTF(STBIR_max_uint16_as_float), e );
17684- stbir__encode_simdf4_unflip( e );
17685- stbir__simdf_pack_to_8words( i, e, e ); // only use first 4
17686- stbir__simdi_store2( output-4, i );
17687- output += 4;
17688- encode += 4;
17689- }
17690- output -= 4;
17691- #endif
17692-
17693- // do the remnants
17694- #if stbir__coder_min_num < 4
17695- STBIR_NO_UNROLL_LOOP_START
17696- while( output < end_output )
17697- {
17698- stbir__simdf e;
17699- STBIR_NO_UNROLL(encode);
17700- stbir__simdf_madd1_mem( e, STBIR__CONSTF(STBIR_simd_point5), STBIR__CONSTF(STBIR_max_uint16_as_float), encode+stbir__encode_order0 ); output[0] = stbir__simdf_convert_float_to_short( e );
17701- #if stbir__coder_min_num >= 2
17702- stbir__simdf_madd1_mem( e, STBIR__CONSTF(STBIR_simd_point5), STBIR__CONSTF(STBIR_max_uint16_as_float), encode+stbir__encode_order1 ); output[1] = stbir__simdf_convert_float_to_short( e );
17703- #endif
17704- #if stbir__coder_min_num >= 3
17705- stbir__simdf_madd1_mem( e, STBIR__CONSTF(STBIR_simd_point5), STBIR__CONSTF(STBIR_max_uint16_as_float), encode+stbir__encode_order2 ); output[2] = stbir__simdf_convert_float_to_short( e );
17706- #endif
17707- output += stbir__coder_min_num;
17708- encode += stbir__coder_min_num;
17709- }
17710- #endif
17711-
17712- #else
17713-
17714- // try to do blocks of 4 when you can
17715- #if stbir__coder_min_num != 3 // doesn't divide cleanly by four
17716- output += 4;
17717- STBIR_SIMD_NO_UNROLL_LOOP_START
17718- while( output <= end_output )
17719- {
17720- float f;
17721- STBIR_SIMD_NO_UNROLL(encode);
17722- f = encode[stbir__encode_order0] * stbir__max_uint16_as_float + 0.5f; STBIR_CLAMP(f, 0, 65535); output[0-4] = (unsigned short)f;
17723- f = encode[stbir__encode_order1] * stbir__max_uint16_as_float + 0.5f; STBIR_CLAMP(f, 0, 65535); output[1-4] = (unsigned short)f;
17724- f = encode[stbir__encode_order2] * stbir__max_uint16_as_float + 0.5f; STBIR_CLAMP(f, 0, 65535); output[2-4] = (unsigned short)f;
17725- f = encode[stbir__encode_order3] * stbir__max_uint16_as_float + 0.5f; STBIR_CLAMP(f, 0, 65535); output[3-4] = (unsigned short)f;
17726- output += 4;
17727- encode += 4;
17728- }
17729- output -= 4;
17730- #endif
17731-
17732- // do the remnants
17733- #if stbir__coder_min_num < 4
17734- STBIR_NO_UNROLL_LOOP_START
17735- while( output < end_output )
17736- {
17737- float f;
17738- STBIR_NO_UNROLL(encode);
17739- f = encode[stbir__encode_order0] * stbir__max_uint16_as_float + 0.5f; STBIR_CLAMP(f, 0, 65535); output[0] = (unsigned short)f;
17740- #if stbir__coder_min_num >= 2
17741- f = encode[stbir__encode_order1] * stbir__max_uint16_as_float + 0.5f; STBIR_CLAMP(f, 0, 65535); output[1] = (unsigned short)f;
17742- #endif
17743- #if stbir__coder_min_num >= 3
17744- f = encode[stbir__encode_order2] * stbir__max_uint16_as_float + 0.5f; STBIR_CLAMP(f, 0, 65535); output[2] = (unsigned short)f;
17745- #endif
17746- output += stbir__coder_min_num;
17747- encode += stbir__coder_min_num;
17748- }
17749- #endif
17750- #endif
17751-}
17752-
17753-static float * STBIR__CODER_NAME(stbir__decode_uint16_linear)( float * decodep, int width_times_channels, void const * inputp )
17754-{
17755- float STBIR_STREAMOUT_PTR( * ) decode = decodep;
17756- float * decode_end = (float*) decode + width_times_channels;
17757- unsigned short const * input = (unsigned short const *)inputp;
17758-
17759- #ifdef STBIR_SIMD
17760- unsigned short const * end_input_m8 = input + width_times_channels - 8;
17761- if ( width_times_channels >= 8 )
17762- {
17763- decode_end -= 8;
17764- STBIR_NO_UNROLL_LOOP_START_INF_FOR
17765- for(;;)
17766- {
17767- #ifdef STBIR_SIMD8
17768- stbir__simdi i; stbir__simdi8 o;
17769- stbir__simdf8 of;
17770- STBIR_NO_UNROLL(decode);
17771- stbir__simdi_load( i, input );
17772- stbir__simdi8_expand_u16_to_u32( o, i );
17773- stbir__simdi8_convert_i32_to_float( of, o );
17774- stbir__decode_simdf8_flip( of );
17775- stbir__simdf8_store( decode + 0, of );
17776- #else
17777- stbir__simdi i, o0, o1;
17778- stbir__simdf of0, of1;
17779- STBIR_NO_UNROLL(decode);
17780- stbir__simdi_load( i, input );
17781- stbir__simdi_expand_u16_to_u32( o0, o1, i );
17782- stbir__simdi_convert_i32_to_float( of0, o0 );
17783- stbir__simdi_convert_i32_to_float( of1, o1 );
17784- stbir__decode_simdf4_flip( of0 );
17785- stbir__decode_simdf4_flip( of1 );
17786- stbir__simdf_store( decode + 0, of0 );
17787- stbir__simdf_store( decode + 4, of1 );
17788- #endif
17789- decode += 8;
17790- input += 8;
17791- if ( decode <= decode_end )
17792- continue;
17793- if ( decode == ( decode_end + 8 ) )
17794- break;
17795- decode = decode_end; // backup and do last couple
17796- input = end_input_m8;
17797- }
17798- return decode_end + 8;
17799- }
17800- #endif
17801-
17802- // try to do blocks of 4 when you can
17803- #if stbir__coder_min_num != 3 // doesn't divide cleanly by four
17804- decode += 4;
17805- STBIR_SIMD_NO_UNROLL_LOOP_START
17806- while( decode <= decode_end )
17807- {
17808- STBIR_SIMD_NO_UNROLL(decode);
17809- decode[0-4] = ((float)(input[stbir__decode_order0]));
17810- decode[1-4] = ((float)(input[stbir__decode_order1]));
17811- decode[2-4] = ((float)(input[stbir__decode_order2]));
17812- decode[3-4] = ((float)(input[stbir__decode_order3]));
17813- decode += 4;
17814- input += 4;
17815- }
17816- decode -= 4;
17817- #endif
17818-
17819- // do the remnants
17820- #if stbir__coder_min_num < 4
17821- STBIR_NO_UNROLL_LOOP_START
17822- while( decode < decode_end )
17823- {
17824- STBIR_NO_UNROLL(decode);
17825- decode[0] = ((float)(input[stbir__decode_order0]));
17826- #if stbir__coder_min_num >= 2
17827- decode[1] = ((float)(input[stbir__decode_order1]));
17828- #endif
17829- #if stbir__coder_min_num >= 3
17830- decode[2] = ((float)(input[stbir__decode_order2]));
17831- #endif
17832- decode += stbir__coder_min_num;
17833- input += stbir__coder_min_num;
17834- }
17835- #endif
17836- return decode_end;
17837-}
17838-
17839-static void STBIR__CODER_NAME(stbir__encode_uint16_linear)( void * outputp, int width_times_channels, float const * encode )
17840-{
17841- unsigned short STBIR_SIMD_STREAMOUT_PTR( * ) output = (unsigned short*) outputp;
17842- unsigned short * end_output = ( (unsigned short*) output ) + width_times_channels;
17843-
17844- #ifdef STBIR_SIMD
17845- {
17846- if ( width_times_channels >= stbir__simdfX_float_count*2 )
17847- {
17848- float const * end_encode_m8 = encode + width_times_channels - stbir__simdfX_float_count*2;
17849- end_output -= stbir__simdfX_float_count*2;
17850- STBIR_SIMD_NO_UNROLL_LOOP_START_INF_FOR
17851- for(;;)
17852- {
17853- stbir__simdfX e0, e1;
17854- stbir__simdiX i;
17855- STBIR_SIMD_NO_UNROLL(encode);
17856- stbir__simdfX_add_mem( e0, STBIR_simd_point5X, encode );
17857- stbir__simdfX_add_mem( e1, STBIR_simd_point5X, encode+stbir__simdfX_float_count );
17858- stbir__encode_simdfX_unflip( e0 );
17859- stbir__encode_simdfX_unflip( e1 );
17860- stbir__simdfX_pack_to_words( i, e0, e1 );
17861- stbir__simdiX_store( output, i );
17862- encode += stbir__simdfX_float_count*2;
17863- output += stbir__simdfX_float_count*2;
17864- if ( output <= end_output )
17865- continue;
17866- if ( output == ( end_output + stbir__simdfX_float_count*2 ) )
17867- break;
17868- output = end_output; // backup and do last couple
17869- encode = end_encode_m8;
17870- }
17871- return;
17872- }
17873- }
17874-
17875- // try to do blocks of 4 when you can
17876- #if stbir__coder_min_num != 3 // doesn't divide cleanly by four
17877- output += 4;
17878- STBIR_NO_UNROLL_LOOP_START
17879- while( output <= end_output )
17880- {
17881- stbir__simdf e;
17882- stbir__simdi i;
17883- STBIR_NO_UNROLL(encode);
17884- stbir__simdf_load( e, encode );
17885- stbir__simdf_add( e, STBIR__CONSTF(STBIR_simd_point5), e );
17886- stbir__encode_simdf4_unflip( e );
17887- stbir__simdf_pack_to_8words( i, e, e ); // only use first 4
17888- stbir__simdi_store2( output-4, i );
17889- output += 4;
17890- encode += 4;
17891- }
17892- output -= 4;
17893- #endif
17894-
17895- #else
17896-
17897- // try to do blocks of 4 when you can
17898- #if stbir__coder_min_num != 3 // doesn't divide cleanly by four
17899- output += 4;
17900- STBIR_SIMD_NO_UNROLL_LOOP_START
17901- while( output <= end_output )
17902- {
17903- float f;
17904- STBIR_SIMD_NO_UNROLL(encode);
17905- f = encode[stbir__encode_order0] + 0.5f; STBIR_CLAMP(f, 0, 65535); output[0-4] = (unsigned short)f;
17906- f = encode[stbir__encode_order1] + 0.5f; STBIR_CLAMP(f, 0, 65535); output[1-4] = (unsigned short)f;
17907- f = encode[stbir__encode_order2] + 0.5f; STBIR_CLAMP(f, 0, 65535); output[2-4] = (unsigned short)f;
17908- f = encode[stbir__encode_order3] + 0.5f; STBIR_CLAMP(f, 0, 65535); output[3-4] = (unsigned short)f;
17909- output += 4;
17910- encode += 4;
17911- }
17912- output -= 4;
17913- #endif
17914-
17915- #endif
17916-
17917- // do the remnants
17918- #if stbir__coder_min_num < 4
17919- STBIR_NO_UNROLL_LOOP_START
17920- while( output < end_output )
17921- {
17922- float f;
17923- STBIR_NO_UNROLL(encode);
17924- f = encode[stbir__encode_order0] + 0.5f; STBIR_CLAMP(f, 0, 65535); output[0] = (unsigned short)f;
17925- #if stbir__coder_min_num >= 2
17926- f = encode[stbir__encode_order1] + 0.5f; STBIR_CLAMP(f, 0, 65535); output[1] = (unsigned short)f;
17927- #endif
17928- #if stbir__coder_min_num >= 3
17929- f = encode[stbir__encode_order2] + 0.5f; STBIR_CLAMP(f, 0, 65535); output[2] = (unsigned short)f;
17930- #endif
17931- output += stbir__coder_min_num;
17932- encode += stbir__coder_min_num;
17933- }
17934- #endif
17935-}
17936-
17937-static float * STBIR__CODER_NAME(stbir__decode_half_float_linear)( float * decodep, int width_times_channels, void const * inputp )
17938-{
17939- float STBIR_STREAMOUT_PTR( * ) decode = decodep;
17940- float * decode_end = (float*) decode + width_times_channels;
17941- stbir__FP16 const * input = (stbir__FP16 const *)inputp;
17942-
17943- #ifdef STBIR_SIMD
17944- if ( width_times_channels >= 8 )
17945- {
17946- stbir__FP16 const * end_input_m8 = input + width_times_channels - 8;
17947- decode_end -= 8;
17948- STBIR_NO_UNROLL_LOOP_START_INF_FOR
17949- for(;;)
17950- {
17951- STBIR_NO_UNROLL(decode);
17952-
17953- stbir__half_to_float_SIMD( decode, input );
17954- #ifdef stbir__decode_swizzle
17955- #ifdef STBIR_SIMD8
17956- {
17957- stbir__simdf8 of;
17958- stbir__simdf8_load( of, decode );
17959- stbir__decode_simdf8_flip( of );
17960- stbir__simdf8_store( decode, of );
17961- }
17962- #else
17963- {
17964- stbir__simdf of0,of1;
17965- stbir__simdf_load( of0, decode );
17966- stbir__simdf_load( of1, decode+4 );
17967- stbir__decode_simdf4_flip( of0 );
17968- stbir__decode_simdf4_flip( of1 );
17969- stbir__simdf_store( decode, of0 );
17970- stbir__simdf_store( decode+4, of1 );
17971- }
17972- #endif
17973- #endif
17974- decode += 8;
17975- input += 8;
17976- if ( decode <= decode_end )
17977- continue;
17978- if ( decode == ( decode_end + 8 ) )
17979- break;
17980- decode = decode_end; // backup and do last couple
17981- input = end_input_m8;
17982- }
17983- return decode_end + 8;
17984- }
17985- #endif
17986-
17987- // try to do blocks of 4 when you can
17988- #if stbir__coder_min_num != 3 // doesn't divide cleanly by four
17989- decode += 4;
17990- STBIR_SIMD_NO_UNROLL_LOOP_START
17991- while( decode <= decode_end )
17992- {
17993- STBIR_SIMD_NO_UNROLL(decode);
17994- decode[0-4] = stbir__half_to_float(input[stbir__decode_order0]);
17995- decode[1-4] = stbir__half_to_float(input[stbir__decode_order1]);
17996- decode[2-4] = stbir__half_to_float(input[stbir__decode_order2]);
17997- decode[3-4] = stbir__half_to_float(input[stbir__decode_order3]);
17998- decode += 4;
17999- input += 4;
18000- }
18001- decode -= 4;
18002- #endif
18003-
18004- // do the remnants
18005- #if stbir__coder_min_num < 4
18006- STBIR_NO_UNROLL_LOOP_START
18007- while( decode < decode_end )
18008- {
18009- STBIR_NO_UNROLL(decode);
18010- decode[0] = stbir__half_to_float(input[stbir__decode_order0]);
18011- #if stbir__coder_min_num >= 2
18012- decode[1] = stbir__half_to_float(input[stbir__decode_order1]);
18013- #endif
18014- #if stbir__coder_min_num >= 3
18015- decode[2] = stbir__half_to_float(input[stbir__decode_order2]);
18016- #endif
18017- decode += stbir__coder_min_num;
18018- input += stbir__coder_min_num;
18019- }
18020- #endif
18021- return decode_end;
18022-}
18023-
18024-static void STBIR__CODER_NAME( stbir__encode_half_float_linear )( void * outputp, int width_times_channels, float const * encode )
18025-{
18026- stbir__FP16 STBIR_SIMD_STREAMOUT_PTR( * ) output = (stbir__FP16*) outputp;
18027- stbir__FP16 * end_output = ( (stbir__FP16*) output ) + width_times_channels;
18028-
18029- #ifdef STBIR_SIMD
18030- if ( width_times_channels >= 8 )
18031- {
18032- float const * end_encode_m8 = encode + width_times_channels - 8;
18033- end_output -= 8;
18034- STBIR_SIMD_NO_UNROLL_LOOP_START_INF_FOR
18035- for(;;)
18036- {
18037- STBIR_SIMD_NO_UNROLL(encode);
18038- #ifdef stbir__decode_swizzle
18039- #ifdef STBIR_SIMD8
18040- {
18041- stbir__simdf8 of;
18042- stbir__simdf8_load( of, encode );
18043- stbir__encode_simdf8_unflip( of );
18044- stbir__float_to_half_SIMD( output, (float*)&of );
18045- }
18046- #else
18047- {
18048- stbir__simdf of[2];
18049- stbir__simdf_load( of[0], encode );
18050- stbir__simdf_load( of[1], encode+4 );
18051- stbir__encode_simdf4_unflip( of[0] );
18052- stbir__encode_simdf4_unflip( of[1] );
18053- stbir__float_to_half_SIMD( output, (float*)of );
18054- }
18055- #endif
18056- #else
18057- stbir__float_to_half_SIMD( output, encode );
18058- #endif
18059- encode += 8;
18060- output += 8;
18061- if ( output <= end_output )
18062- continue;
18063- if ( output == ( end_output + 8 ) )
18064- break;
18065- output = end_output; // backup and do last couple
18066- encode = end_encode_m8;
18067- }
18068- return;
18069- }
18070- #endif
18071-
18072- // try to do blocks of 4 when you can
18073- #if stbir__coder_min_num != 3 // doesn't divide cleanly by four
18074- output += 4;
18075- STBIR_SIMD_NO_UNROLL_LOOP_START
18076- while( output <= end_output )
18077- {
18078- STBIR_SIMD_NO_UNROLL(output);
18079- output[0-4] = stbir__float_to_half(encode[stbir__encode_order0]);
18080- output[1-4] = stbir__float_to_half(encode[stbir__encode_order1]);
18081- output[2-4] = stbir__float_to_half(encode[stbir__encode_order2]);
18082- output[3-4] = stbir__float_to_half(encode[stbir__encode_order3]);
18083- output += 4;
18084- encode += 4;
18085- }
18086- output -= 4;
18087- #endif
18088-
18089- // do the remnants
18090- #if stbir__coder_min_num < 4
18091- STBIR_NO_UNROLL_LOOP_START
18092- while( output < end_output )
18093- {
18094- STBIR_NO_UNROLL(output);
18095- output[0] = stbir__float_to_half(encode[stbir__encode_order0]);
18096- #if stbir__coder_min_num >= 2
18097- output[1] = stbir__float_to_half(encode[stbir__encode_order1]);
18098- #endif
18099- #if stbir__coder_min_num >= 3
18100- output[2] = stbir__float_to_half(encode[stbir__encode_order2]);
18101- #endif
18102- output += stbir__coder_min_num;
18103- encode += stbir__coder_min_num;
18104- }
18105- #endif
18106-}
18107-
18108-static float * STBIR__CODER_NAME(stbir__decode_float_linear)( float * decodep, int width_times_channels, void const * inputp )
18109-{
18110- #ifdef stbir__decode_swizzle
18111- float STBIR_STREAMOUT_PTR( * ) decode = decodep;
18112- float * decode_end = (float*) decode + width_times_channels;
18113- float const * input = (float const *)inputp;
18114-
18115- #ifdef STBIR_SIMD
18116- if ( width_times_channels >= 16 )
18117- {
18118- float const * end_input_m16 = input + width_times_channels - 16;
18119- decode_end -= 16;
18120- STBIR_NO_UNROLL_LOOP_START_INF_FOR
18121- for(;;)
18122- {
18123- STBIR_NO_UNROLL(decode);
18124- #ifdef stbir__decode_swizzle
18125- #ifdef STBIR_SIMD8
18126- {
18127- stbir__simdf8 of0,of1;
18128- stbir__simdf8_load( of0, input );
18129- stbir__simdf8_load( of1, input+8 );
18130- stbir__decode_simdf8_flip( of0 );
18131- stbir__decode_simdf8_flip( of1 );
18132- stbir__simdf8_store( decode, of0 );
18133- stbir__simdf8_store( decode+8, of1 );
18134- }
18135- #else
18136- {
18137- stbir__simdf of0,of1,of2,of3;
18138- stbir__simdf_load( of0, input );
18139- stbir__simdf_load( of1, input+4 );
18140- stbir__simdf_load( of2, input+8 );
18141- stbir__simdf_load( of3, input+12 );
18142- stbir__decode_simdf4_flip( of0 );
18143- stbir__decode_simdf4_flip( of1 );
18144- stbir__decode_simdf4_flip( of2 );
18145- stbir__decode_simdf4_flip( of3 );
18146- stbir__simdf_store( decode, of0 );
18147- stbir__simdf_store( decode+4, of1 );
18148- stbir__simdf_store( decode+8, of2 );
18149- stbir__simdf_store( decode+12, of3 );
18150- }
18151- #endif
18152- #endif
18153- decode += 16;
18154- input += 16;
18155- if ( decode <= decode_end )
18156- continue;
18157- if ( decode == ( decode_end + 16 ) )
18158- break;
18159- decode = decode_end; // backup and do last couple
18160- input = end_input_m16;
18161- }
18162- return decode_end + 16;
18163- }
18164- #endif
18165-
18166- // try to do blocks of 4 when you can
18167- #if stbir__coder_min_num != 3 // doesn't divide cleanly by four
18168- decode += 4;
18169- STBIR_SIMD_NO_UNROLL_LOOP_START
18170- while( decode <= decode_end )
18171- {
18172- STBIR_SIMD_NO_UNROLL(decode);
18173- decode[0-4] = input[stbir__decode_order0];
18174- decode[1-4] = input[stbir__decode_order1];
18175- decode[2-4] = input[stbir__decode_order2];
18176- decode[3-4] = input[stbir__decode_order3];
18177- decode += 4;
18178- input += 4;
18179- }
18180- decode -= 4;
18181- #endif
18182-
18183- // do the remnants
18184- #if stbir__coder_min_num < 4
18185- STBIR_NO_UNROLL_LOOP_START
18186- while( decode < decode_end )
18187- {
18188- STBIR_NO_UNROLL(decode);
18189- decode[0] = input[stbir__decode_order0];
18190- #if stbir__coder_min_num >= 2
18191- decode[1] = input[stbir__decode_order1];
18192- #endif
18193- #if stbir__coder_min_num >= 3
18194- decode[2] = input[stbir__decode_order2];
18195- #endif
18196- decode += stbir__coder_min_num;
18197- input += stbir__coder_min_num;
18198- }
18199- #endif
18200- return decode_end;
18201-
18202- #else
18203-
18204- if ( (void*)decodep != inputp )
18205- STBIR_MEMCPY( decodep, inputp, width_times_channels * sizeof( float ) );
18206-
18207- return decodep + width_times_channels;
18208-
18209- #endif
18210-}
18211-
18212-static void STBIR__CODER_NAME( stbir__encode_float_linear )( void * outputp, int width_times_channels, float const * encode )
18213-{
18214- #if !defined( STBIR_FLOAT_HIGH_CLAMP ) && !defined(STBIR_FLOAT_LO_CLAMP) && !defined(stbir__decode_swizzle)
18215-
18216- if ( (void*)outputp != (void*) encode )
18217- STBIR_MEMCPY( outputp, encode, width_times_channels * sizeof( float ) );
18218-
18219- #else
18220-
18221- float STBIR_SIMD_STREAMOUT_PTR( * ) output = (float*) outputp;
18222- float * end_output = ( (float*) output ) + width_times_channels;
18223-
18224- #ifdef STBIR_FLOAT_HIGH_CLAMP
18225- #define stbir_scalar_hi_clamp( v ) if ( v > STBIR_FLOAT_HIGH_CLAMP ) v = STBIR_FLOAT_HIGH_CLAMP;
18226- #else
18227- #define stbir_scalar_hi_clamp( v )
18228- #endif
18229- #ifdef STBIR_FLOAT_LOW_CLAMP
18230- #define stbir_scalar_lo_clamp( v ) if ( v < STBIR_FLOAT_LOW_CLAMP ) v = STBIR_FLOAT_LOW_CLAMP;
18231- #else
18232- #define stbir_scalar_lo_clamp( v )
18233- #endif
18234-
18235- #ifdef STBIR_SIMD
18236-
18237- #ifdef STBIR_FLOAT_HIGH_CLAMP
18238- const stbir__simdfX high_clamp = stbir__simdf_frepX(STBIR_FLOAT_HIGH_CLAMP);
18239- #endif
18240- #ifdef STBIR_FLOAT_LOW_CLAMP
18241- const stbir__simdfX low_clamp = stbir__simdf_frepX(STBIR_FLOAT_LOW_CLAMP);
18242- #endif
18243-
18244- if ( width_times_channels >= ( stbir__simdfX_float_count * 2 ) )
18245- {
18246- float const * end_encode_m8 = encode + width_times_channels - ( stbir__simdfX_float_count * 2 );
18247- end_output -= ( stbir__simdfX_float_count * 2 );
18248- STBIR_SIMD_NO_UNROLL_LOOP_START_INF_FOR
18249- for(;;)
18250- {
18251- stbir__simdfX e0, e1;
18252- STBIR_SIMD_NO_UNROLL(encode);
18253- stbir__simdfX_load( e0, encode );
18254- stbir__simdfX_load( e1, encode+stbir__simdfX_float_count );
18255-#ifdef STBIR_FLOAT_HIGH_CLAMP
18256- stbir__simdfX_min( e0, e0, high_clamp );
18257- stbir__simdfX_min( e1, e1, high_clamp );
18258+#ifdef STBIR_SIMD
18259+ if (width_times_channels >= stbir__simdfX_float_count * 2) {
18260+ float const *end_encode_m8 =
18261+ encode + width_times_channels - stbir__simdfX_float_count * 2;
18262+ end_output -= stbir__simdfX_float_count * 2;
18263+ STBIR_NO_UNROLL_LOOP_START_INF_FOR
18264+ for (;;) {
18265+ stbir__simdfX e0, e1;
18266+ stbir__simdi i;
18267+ STBIR_SIMD_NO_UNROLL(encode);
18268+ stbir__simdfX_madd_mem(e0, STBIR_simd_point5X,
18269+ STBIR_max_uint8_as_floatX, encode);
18270+ stbir__simdfX_madd_mem(e1, STBIR_simd_point5X,
18271+ STBIR_max_uint8_as_floatX,
18272+ encode + stbir__simdfX_float_count);
18273+ stbir__encode_simdfX_unflip(e0);
18274+ stbir__encode_simdfX_unflip(e1);
18275+#ifdef STBIR_SIMD8
18276+ stbir__simdf8_pack_to_16bytes(i, e0, e1);
18277+ stbir__simdi_store(output, i);
18278+#else
18279+ stbir__simdf_pack_to_8bytes(i, e0, e1);
18280+ stbir__simdi_store2(output, i);
18281 #endif
18282-#ifdef STBIR_FLOAT_LOW_CLAMP
18283- stbir__simdfX_max( e0, e0, low_clamp );
18284- stbir__simdfX_max( e1, e1, low_clamp );
18285-#endif
18286- stbir__encode_simdfX_unflip( e0 );
18287- stbir__encode_simdfX_unflip( e1 );
18288- stbir__simdfX_store( output, e0 );
18289- stbir__simdfX_store( output+stbir__simdfX_float_count, e1 );
18290- encode += stbir__simdfX_float_count * 2;
18291- output += stbir__simdfX_float_count * 2;
18292- if ( output < end_output )
18293- continue;
18294- if ( output == ( end_output + ( stbir__simdfX_float_count * 2 ) ) )
18295- break;
18296- output = end_output; // backup and do last couple
18297- encode = end_encode_m8;
18298- }
18299- return;
18300- }
18301-
18302- // try to do blocks of 4 when you can
18303- #if stbir__coder_min_num != 3 // doesn't divide cleanly by four
18304- output += 4;
18305- STBIR_NO_UNROLL_LOOP_START
18306- while( output <= end_output )
18307- {
18308- stbir__simdf e0;
18309- STBIR_NO_UNROLL(encode);
18310- stbir__simdf_load( e0, encode );
18311-#ifdef STBIR_FLOAT_HIGH_CLAMP
18312- stbir__simdf_min( e0, e0, high_clamp );
18313+ encode += stbir__simdfX_float_count * 2;
18314+ output += stbir__simdfX_float_count * 2;
18315+ if (output <= end_output) {
18316+ continue;
18317+ }
18318+ if (output == (end_output + stbir__simdfX_float_count * 2)) {
18319+ break;
18320+ }
18321+ output = end_output; // backup and do last couple
18322+ encode = end_encode_m8;
18323+ }
18324+ return;
18325+ }
18326+
18327+// try to do blocks of 4 when you can
18328+#if stbir__coder_min_num != 3 // doesn't divide cleanly by four
18329+ output += 4;
18330+ STBIR_NO_UNROLL_LOOP_START
18331+ while (output <= end_output) {
18332+ stbir__simdf e0;
18333+ stbir__simdi i0;
18334+ STBIR_NO_UNROLL(encode);
18335+ stbir__simdf_load(e0, encode);
18336+ stbir__simdf_madd(e0, STBIR__CONSTF(STBIR_simd_point5),
18337+ STBIR__CONSTF(STBIR_max_uint8_as_float), e0);
18338+ stbir__encode_simdf4_unflip(e0);
18339+ stbir__simdf_pack_to_8bytes(i0, e0, e0); // only use first 4
18340+ *(int *)(output - 4) = stbir__simdi_to_int(i0);
18341+ output += 4;
18342+ encode += 4;
18343+ }
18344+ output -= 4;
18345 #endif
18346-#ifdef STBIR_FLOAT_LOW_CLAMP
18347- stbir__simdf_max( e0, e0, low_clamp );
18348-#endif
18349- stbir__encode_simdf4_unflip( e0 );
18350- stbir__simdf_store( output-4, e0 );
18351- output += 4;
18352- encode += 4;
18353- }
18354- output -= 4;
18355- #endif
18356-
18357- #else
18358-
18359- // try to do blocks of 4 when you can
18360- #if stbir__coder_min_num != 3 // doesn't divide cleanly by four
18361- output += 4;
18362- STBIR_SIMD_NO_UNROLL_LOOP_START
18363- while( output <= end_output )
18364- {
18365- float e;
18366- STBIR_SIMD_NO_UNROLL(encode);
18367- e = encode[ stbir__encode_order0 ]; stbir_scalar_hi_clamp( e ); stbir_scalar_lo_clamp( e ); output[0-4] = e;
18368- e = encode[ stbir__encode_order1 ]; stbir_scalar_hi_clamp( e ); stbir_scalar_lo_clamp( e ); output[1-4] = e;
18369- e = encode[ stbir__encode_order2 ]; stbir_scalar_hi_clamp( e ); stbir_scalar_lo_clamp( e ); output[2-4] = e;
18370- e = encode[ stbir__encode_order3 ]; stbir_scalar_hi_clamp( e ); stbir_scalar_lo_clamp( e ); output[3-4] = e;
18371- output += 4;
18372- encode += 4;
18373- }
18374- output -= 4;
18375-
18376- #endif
18377-
18378- #endif
18379-
18380- // do the remnants
18381- #if stbir__coder_min_num < 4
18382- STBIR_NO_UNROLL_LOOP_START
18383- while( output < end_output )
18384- {
18385- float e;
18386- STBIR_NO_UNROLL(encode);
18387- e = encode[ stbir__encode_order0 ]; stbir_scalar_hi_clamp( e ); stbir_scalar_lo_clamp( e ); output[0] = e;
18388- #if stbir__coder_min_num >= 2
18389- e = encode[ stbir__encode_order1 ]; stbir_scalar_hi_clamp( e ); stbir_scalar_lo_clamp( e ); output[1] = e;
18390- #endif
18391- #if stbir__coder_min_num >= 3
18392- e = encode[ stbir__encode_order2 ]; stbir_scalar_hi_clamp( e ); stbir_scalar_lo_clamp( e ); output[2] = e;
18393- #endif
18394- output += stbir__coder_min_num;
18395- encode += stbir__coder_min_num;
18396- }
18397- #endif
18398-
18399- #endif
18400-}
18401+
18402+// do the remnants
18403+#if stbir__coder_min_num < 4
18404+ STBIR_NO_UNROLL_LOOP_START
18405+ while (output < end_output) {
18406+ stbir__simdf e0;
18407+ STBIR_NO_UNROLL(encode);
18408+ stbir__simdf_madd1_mem(e0, STBIR__CONSTF(STBIR_simd_point5),
18409+ STBIR__CONSTF(STBIR_max_uint8_as_float),
18410+ encode + stbir__encode_order0);
18411+ output[0] = stbir__simdf_convert_float_to_uint8(e0);
18412+#if stbir__coder_min_num >= 2
18413+ stbir__simdf_madd1_mem(e0, STBIR__CONSTF(STBIR_simd_point5),
18414+ STBIR__CONSTF(STBIR_max_uint8_as_float),
18415+ encode + stbir__encode_order1);
18416+ output[1] = stbir__simdf_convert_float_to_uint8(e0);
18417+#endif
18418+#if stbir__coder_min_num >= 3
18419+ stbir__simdf_madd1_mem(e0, STBIR__CONSTF(STBIR_simd_point5),
18420+ STBIR__CONSTF(STBIR_max_uint8_as_float),
18421+ encode + stbir__encode_order2);
18422+ output[2] = stbir__simdf_convert_float_to_uint8(e0);
18423+#endif
18424+ output += stbir__coder_min_num;
18425+ encode += stbir__coder_min_num;
18426+ }
18427+#endif
18428+
18429+#else
18430+
18431+// try to do blocks of 4 when you can
18432+#if stbir__coder_min_num != 3 // doesn't divide cleanly by four
18433+ output += 4;
18434+ while (output <= end_output) {
18435+ float f;
18436+ f = encode[stbir__encode_order0] * stbir__max_uint8_as_float + 0.5f;
18437+ STBIR_CLAMP(f, 0, 255);
18438+ output[0 - 4] = (unsigned char)f;
18439+ f = encode[stbir__encode_order1] * stbir__max_uint8_as_float + 0.5f;
18440+ STBIR_CLAMP(f, 0, 255);
18441+ output[1 - 4] = (unsigned char)f;
18442+ f = encode[stbir__encode_order2] * stbir__max_uint8_as_float + 0.5f;
18443+ STBIR_CLAMP(f, 0, 255);
18444+ output[2 - 4] = (unsigned char)f;
18445+ f = encode[stbir__encode_order3] * stbir__max_uint8_as_float + 0.5f;
18446+ STBIR_CLAMP(f, 0, 255);
18447+ output[3 - 4] = (unsigned char)f;
18448+ output += 4;
18449+ encode += 4;
18450+ }
18451+ output -= 4;
18452+#endif
18453+
18454+// do the remnants
18455+#if stbir__coder_min_num < 4
18456+ STBIR_NO_UNROLL_LOOP_START
18457+ while (output < end_output) {
18458+ float f;
18459+ STBIR_NO_UNROLL(encode);
18460+ f = encode[stbir__encode_order0] * stbir__max_uint8_as_float + 0.5f;
18461+ STBIR_CLAMP(f, 0, 255);
18462+ output[0] = (unsigned char)f;
18463+#if stbir__coder_min_num >= 2
18464+ f = encode[stbir__encode_order1] * stbir__max_uint8_as_float + 0.5f;
18465+ STBIR_CLAMP(f, 0, 255);
18466+ output[1] = (unsigned char)f;
18467+#endif
18468+#if stbir__coder_min_num >= 3
18469+ f = encode[stbir__encode_order2] * stbir__max_uint8_as_float + 0.5f;
18470+ STBIR_CLAMP(f, 0, 255);
18471+ output[2] = (unsigned char)f;
18472+#endif
18473+ output += stbir__coder_min_num;
18474+ encode += stbir__coder_min_num;
18475+ }
18476+#endif
18477+#endif
18478+}
18479+
18480+static float *
18481+STBIR__CODER_NAME(stbir__decode_uint8_linear)(float *decodep,
18482+ int width_times_channels,
18483+ void const *inputp)
18484+{
18485+ float STBIR_STREAMOUT_PTR(*) decode = decodep;
18486+ float *decode_end = (float *)decode + width_times_channels;
18487+ unsigned char const *input = (unsigned char const *)inputp;
18488+
18489+#ifdef STBIR_SIMD
18490+ unsigned char const *end_input_m16 = input + width_times_channels - 16;
18491+ if (width_times_channels >= 16) {
18492+ decode_end -= 16;
18493+ STBIR_NO_UNROLL_LOOP_START_INF_FOR
18494+ for (;;) {
18495+#ifdef STBIR_SIMD8
18496+ stbir__simdi i;
18497+ stbir__simdi8 o0, o1;
18498+ stbir__simdf8 of0, of1;
18499+ STBIR_NO_UNROLL(decode);
18500+ stbir__simdi_load(i, input);
18501+ stbir__simdi8_expand_u8_to_u32(o0, o1, i);
18502+ stbir__simdi8_convert_i32_to_float(of0, o0);
18503+ stbir__simdi8_convert_i32_to_float(of1, o1);
18504+ stbir__decode_simdf8_flip(of0);
18505+ stbir__decode_simdf8_flip(of1);
18506+ stbir__simdf8_store(decode + 0, of0);
18507+ stbir__simdf8_store(decode + 8, of1);
18508+#else
18509+ stbir__simdi i, o0, o1, o2, o3;
18510+ stbir__simdf of0, of1, of2, of3;
18511+ STBIR_NO_UNROLL(decode);
18512+ stbir__simdi_load(i, input);
18513+ stbir__simdi_expand_u8_to_u32(o0, o1, o2, o3, i);
18514+ stbir__simdi_convert_i32_to_float(of0, o0);
18515+ stbir__simdi_convert_i32_to_float(of1, o1);
18516+ stbir__simdi_convert_i32_to_float(of2, o2);
18517+ stbir__simdi_convert_i32_to_float(of3, o3);
18518+ stbir__decode_simdf4_flip(of0);
18519+ stbir__decode_simdf4_flip(of1);
18520+ stbir__decode_simdf4_flip(of2);
18521+ stbir__decode_simdf4_flip(of3);
18522+ stbir__simdf_store(decode + 0, of0);
18523+ stbir__simdf_store(decode + 4, of1);
18524+ stbir__simdf_store(decode + 8, of2);
18525+ stbir__simdf_store(decode + 12, of3);
18526+#endif
18527+ decode += 16;
18528+ input += 16;
18529+ if (decode <= decode_end) {
18530+ continue;
18531+ }
18532+ if (decode == (decode_end + 16)) {
18533+ break;
18534+ }
18535+ decode = decode_end; // backup and do last couple
18536+ input = end_input_m16;
18537+ }
18538+ return decode_end + 16;
18539+ }
18540+#endif
18541+
18542+// try to do blocks of 4 when you can
18543+#if stbir__coder_min_num != 3 // doesn't divide cleanly by four
18544+ decode += 4;
18545+ STBIR_SIMD_NO_UNROLL_LOOP_START
18546+ while (decode <= decode_end) {
18547+ STBIR_SIMD_NO_UNROLL(decode);
18548+ decode[0 - 4] = ((float)(input[stbir__decode_order0]));
18549+ decode[1 - 4] = ((float)(input[stbir__decode_order1]));
18550+ decode[2 - 4] = ((float)(input[stbir__decode_order2]));
18551+ decode[3 - 4] = ((float)(input[stbir__decode_order3]));
18552+ decode += 4;
18553+ input += 4;
18554+ }
18555+ decode -= 4;
18556+#endif
18557+
18558+// do the remnants
18559+#if stbir__coder_min_num < 4
18560+ STBIR_NO_UNROLL_LOOP_START
18561+ while (decode < decode_end) {
18562+ STBIR_NO_UNROLL(decode);
18563+ decode[0] = ((float)(input[stbir__decode_order0]));
18564+#if stbir__coder_min_num >= 2
18565+ decode[1] = ((float)(input[stbir__decode_order1]));
18566+#endif
18567+#if stbir__coder_min_num >= 3
18568+ decode[2] = ((float)(input[stbir__decode_order2]));
18569+#endif
18570+ decode += stbir__coder_min_num;
18571+ input += stbir__coder_min_num;
18572+ }
18573+#endif
18574+ return decode_end;
18575+}
18576+
18577+static void
18578+STBIR__CODER_NAME(stbir__encode_uint8_linear)(void *outputp,
18579+ int width_times_channels,
18580+ float const *encode)
18581+{
18582+ unsigned char STBIR_SIMD_STREAMOUT_PTR(*) output = (unsigned char *)outputp;
18583+ unsigned char *end_output =
18584+ ((unsigned char *)output) + width_times_channels;
18585+
18586+#ifdef STBIR_SIMD
18587+ if (width_times_channels >= stbir__simdfX_float_count * 2) {
18588+ float const *end_encode_m8 =
18589+ encode + width_times_channels - stbir__simdfX_float_count * 2;
18590+ end_output -= stbir__simdfX_float_count * 2;
18591+ STBIR_SIMD_NO_UNROLL_LOOP_START_INF_FOR
18592+ for (;;) {
18593+ stbir__simdfX e0, e1;
18594+ stbir__simdi i;
18595+ STBIR_SIMD_NO_UNROLL(encode);
18596+ stbir__simdfX_add_mem(e0, STBIR_simd_point5X, encode);
18597+ stbir__simdfX_add_mem(e1, STBIR_simd_point5X,
18598+ encode + stbir__simdfX_float_count);
18599+ stbir__encode_simdfX_unflip(e0);
18600+ stbir__encode_simdfX_unflip(e1);
18601+#ifdef STBIR_SIMD8
18602+ stbir__simdf8_pack_to_16bytes(i, e0, e1);
18603+ stbir__simdi_store(output, i);
18604+#else
18605+ stbir__simdf_pack_to_8bytes(i, e0, e1);
18606+ stbir__simdi_store2(output, i);
18607+#endif
18608+ encode += stbir__simdfX_float_count * 2;
18609+ output += stbir__simdfX_float_count * 2;
18610+ if (output <= end_output) {
18611+ continue;
18612+ }
18613+ if (output == (end_output + stbir__simdfX_float_count * 2)) {
18614+ break;
18615+ }
18616+ output = end_output; // backup and do last couple
18617+ encode = end_encode_m8;
18618+ }
18619+ return;
18620+ }
18621+
18622+// try to do blocks of 4 when you can
18623+#if stbir__coder_min_num != 3 // doesn't divide cleanly by four
18624+ output += 4;
18625+ STBIR_NO_UNROLL_LOOP_START
18626+ while (output <= end_output) {
18627+ stbir__simdf e0;
18628+ stbir__simdi i0;
18629+ STBIR_NO_UNROLL(encode);
18630+ stbir__simdf_load(e0, encode);
18631+ stbir__simdf_add(e0, STBIR__CONSTF(STBIR_simd_point5), e0);
18632+ stbir__encode_simdf4_unflip(e0);
18633+ stbir__simdf_pack_to_8bytes(i0, e0, e0); // only use first 4
18634+ *(int *)(output - 4) = stbir__simdi_to_int(i0);
18635+ output += 4;
18636+ encode += 4;
18637+ }
18638+ output -= 4;
18639+#endif
18640+
18641+#else
18642+
18643+// try to do blocks of 4 when you can
18644+#if stbir__coder_min_num != 3 // doesn't divide cleanly by four
18645+ output += 4;
18646+ while (output <= end_output) {
18647+ float f;
18648+ f = encode[stbir__encode_order0] + 0.5f;
18649+ STBIR_CLAMP(f, 0, 255);
18650+ output[0 - 4] = (unsigned char)f;
18651+ f = encode[stbir__encode_order1] + 0.5f;
18652+ STBIR_CLAMP(f, 0, 255);
18653+ output[1 - 4] = (unsigned char)f;
18654+ f = encode[stbir__encode_order2] + 0.5f;
18655+ STBIR_CLAMP(f, 0, 255);
18656+ output[2 - 4] = (unsigned char)f;
18657+ f = encode[stbir__encode_order3] + 0.5f;
18658+ STBIR_CLAMP(f, 0, 255);
18659+ output[3 - 4] = (unsigned char)f;
18660+ output += 4;
18661+ encode += 4;
18662+ }
18663+ output -= 4;
18664+#endif
18665+
18666+#endif
18667+
18668+// do the remnants
18669+#if stbir__coder_min_num < 4
18670+ STBIR_NO_UNROLL_LOOP_START
18671+ while (output < end_output) {
18672+ float f;
18673+ STBIR_NO_UNROLL(encode);
18674+ f = encode[stbir__encode_order0] + 0.5f;
18675+ STBIR_CLAMP(f, 0, 255);
18676+ output[0] = (unsigned char)f;
18677+#if stbir__coder_min_num >= 2
18678+ f = encode[stbir__encode_order1] + 0.5f;
18679+ STBIR_CLAMP(f, 0, 255);
18680+ output[1] = (unsigned char)f;
18681+#endif
18682+#if stbir__coder_min_num >= 3
18683+ f = encode[stbir__encode_order2] + 0.5f;
18684+ STBIR_CLAMP(f, 0, 255);
18685+ output[2] = (unsigned char)f;
18686+#endif
18687+ output += stbir__coder_min_num;
18688+ encode += stbir__coder_min_num;
18689+ }
18690+#endif
18691+}
18692+
18693+static float *
18694+STBIR__CODER_NAME(stbir__decode_uint8_srgb)(float *decodep,
18695+ int width_times_channels,
18696+ void const *inputp)
18697+{
18698+ float STBIR_STREAMOUT_PTR(*) decode = decodep;
18699+ float *decode_end = (float *)decode + width_times_channels;
18700+ unsigned char const *input = (unsigned char const *)inputp;
18701+
18702+// try to do blocks of 4 when you can
18703+#if stbir__coder_min_num != 3 // doesn't divide cleanly by four
18704+ decode += 4;
18705+ while (decode <= decode_end) {
18706+ decode[0 - 4] =
18707+ stbir__srgb_uchar_to_linear_float[input[stbir__decode_order0]];
18708+ decode[1 - 4] =
18709+ stbir__srgb_uchar_to_linear_float[input[stbir__decode_order1]];
18710+ decode[2 - 4] =
18711+ stbir__srgb_uchar_to_linear_float[input[stbir__decode_order2]];
18712+ decode[3 - 4] =
18713+ stbir__srgb_uchar_to_linear_float[input[stbir__decode_order3]];
18714+ decode += 4;
18715+ input += 4;
18716+ }
18717+ decode -= 4;
18718+#endif
18719+
18720+// do the remnants
18721+#if stbir__coder_min_num < 4
18722+ STBIR_NO_UNROLL_LOOP_START
18723+ while (decode < decode_end) {
18724+ STBIR_NO_UNROLL(decode);
18725+ decode[0] =
18726+ stbir__srgb_uchar_to_linear_float[input[stbir__decode_order0]];
18727+#if stbir__coder_min_num >= 2
18728+ decode[1] =
18729+ stbir__srgb_uchar_to_linear_float[input[stbir__decode_order1]];
18730+#endif
18731+#if stbir__coder_min_num >= 3
18732+ decode[2] =
18733+ stbir__srgb_uchar_to_linear_float[input[stbir__decode_order2]];
18734+#endif
18735+ decode += stbir__coder_min_num;
18736+ input += stbir__coder_min_num;
18737+ }
18738+#endif
18739+ return decode_end;
18740+}
18741+
18742+#define stbir__min_max_shift20(i, f) \
18743+ stbir__simdf_max(f, f, \
18744+ stbir_simdf_casti(STBIR__CONSTI(STBIR_almost_zero))); \
18745+ stbir__simdf_min(f, f, \
18746+ stbir_simdf_casti(STBIR__CONSTI(STBIR_almost_one))); \
18747+ stbir__simdi_32shr(i, stbir_simdi_castf(f), 20);
18748+
18749+#define stbir__scale_and_convert(i, f) \
18750+ stbir__simdf_madd(f, STBIR__CONSTF(STBIR_simd_point5), \
18751+ STBIR__CONSTF(STBIR_max_uint8_as_float), f); \
18752+ stbir__simdf_max(f, f, stbir__simdf_zeroP()); \
18753+ stbir__simdf_min(f, f, STBIR__CONSTF(STBIR_max_uint8_as_float)); \
18754+ stbir__simdf_convert_float_to_i32(i, f);
18755+
18756+#define stbir__linear_to_srgb_finish(i, f) \
18757+ { \
18758+ stbir__simdi temp; \
18759+ stbir__simdi_32shr(temp, stbir_simdi_castf(f), 12); \
18760+ stbir__simdi_and(temp, temp, STBIR__CONSTI(STBIR_mastissa_mask)); \
18761+ stbir__simdi_or(temp, temp, STBIR__CONSTI(STBIR_topscale)); \
18762+ stbir__simdi_16madd(i, i, temp); \
18763+ stbir__simdi_32shr(i, i, 16); \
18764+ }
18765+
18766+#define stbir__simdi_table_lookup2(v0, v1, table) \
18767+ { \
18768+ stbir__simdi_u32 temp0, temp1; \
18769+ temp0.m128i_i128 = v0; \
18770+ temp1.m128i_i128 = v1; \
18771+ temp0.m128i_u32[0] = table[temp0.m128i_i32[0]]; \
18772+ temp0.m128i_u32[1] = table[temp0.m128i_i32[1]]; \
18773+ temp0.m128i_u32[2] = table[temp0.m128i_i32[2]]; \
18774+ temp0.m128i_u32[3] = table[temp0.m128i_i32[3]]; \
18775+ temp1.m128i_u32[0] = table[temp1.m128i_i32[0]]; \
18776+ temp1.m128i_u32[1] = table[temp1.m128i_i32[1]]; \
18777+ temp1.m128i_u32[2] = table[temp1.m128i_i32[2]]; \
18778+ temp1.m128i_u32[3] = table[temp1.m128i_i32[3]]; \
18779+ v0 = temp0.m128i_i128; \
18780+ v1 = temp1.m128i_i128; \
18781+ }
18782+
18783+#define stbir__simdi_table_lookup3(v0, v1, v2, table) \
18784+ { \
18785+ stbir__simdi_u32 temp0, temp1, temp2; \
18786+ temp0.m128i_i128 = v0; \
18787+ temp1.m128i_i128 = v1; \
18788+ temp2.m128i_i128 = v2; \
18789+ temp0.m128i_u32[0] = table[temp0.m128i_i32[0]]; \
18790+ temp0.m128i_u32[1] = table[temp0.m128i_i32[1]]; \
18791+ temp0.m128i_u32[2] = table[temp0.m128i_i32[2]]; \
18792+ temp0.m128i_u32[3] = table[temp0.m128i_i32[3]]; \
18793+ temp1.m128i_u32[0] = table[temp1.m128i_i32[0]]; \
18794+ temp1.m128i_u32[1] = table[temp1.m128i_i32[1]]; \
18795+ temp1.m128i_u32[2] = table[temp1.m128i_i32[2]]; \
18796+ temp1.m128i_u32[3] = table[temp1.m128i_i32[3]]; \
18797+ temp2.m128i_u32[0] = table[temp2.m128i_i32[0]]; \
18798+ temp2.m128i_u32[1] = table[temp2.m128i_i32[1]]; \
18799+ temp2.m128i_u32[2] = table[temp2.m128i_i32[2]]; \
18800+ temp2.m128i_u32[3] = table[temp2.m128i_i32[3]]; \
18801+ v0 = temp0.m128i_i128; \
18802+ v1 = temp1.m128i_i128; \
18803+ v2 = temp2.m128i_i128; \
18804+ }
18805+
18806+#define stbir__simdi_table_lookup4(v0, v1, v2, v3, table) \
18807+ { \
18808+ stbir__simdi_u32 temp0, temp1, temp2, temp3; \
18809+ temp0.m128i_i128 = v0; \
18810+ temp1.m128i_i128 = v1; \
18811+ temp2.m128i_i128 = v2; \
18812+ temp3.m128i_i128 = v3; \
18813+ temp0.m128i_u32[0] = table[temp0.m128i_i32[0]]; \
18814+ temp0.m128i_u32[1] = table[temp0.m128i_i32[1]]; \
18815+ temp0.m128i_u32[2] = table[temp0.m128i_i32[2]]; \
18816+ temp0.m128i_u32[3] = table[temp0.m128i_i32[3]]; \
18817+ temp1.m128i_u32[0] = table[temp1.m128i_i32[0]]; \
18818+ temp1.m128i_u32[1] = table[temp1.m128i_i32[1]]; \
18819+ temp1.m128i_u32[2] = table[temp1.m128i_i32[2]]; \
18820+ temp1.m128i_u32[3] = table[temp1.m128i_i32[3]]; \
18821+ temp2.m128i_u32[0] = table[temp2.m128i_i32[0]]; \
18822+ temp2.m128i_u32[1] = table[temp2.m128i_i32[1]]; \
18823+ temp2.m128i_u32[2] = table[temp2.m128i_i32[2]]; \
18824+ temp2.m128i_u32[3] = table[temp2.m128i_i32[3]]; \
18825+ temp3.m128i_u32[0] = table[temp3.m128i_i32[0]]; \
18826+ temp3.m128i_u32[1] = table[temp3.m128i_i32[1]]; \
18827+ temp3.m128i_u32[2] = table[temp3.m128i_i32[2]]; \
18828+ temp3.m128i_u32[3] = table[temp3.m128i_i32[3]]; \
18829+ v0 = temp0.m128i_i128; \
18830+ v1 = temp1.m128i_i128; \
18831+ v2 = temp2.m128i_i128; \
18832+ v3 = temp3.m128i_i128; \
18833+ }
18834+
18835+static void
18836+STBIR__CODER_NAME(stbir__encode_uint8_srgb)(void *outputp,
18837+ int width_times_channels,
18838+ float const *encode)
18839+{
18840+ unsigned char STBIR_SIMD_STREAMOUT_PTR(*) output = (unsigned char *)outputp;
18841+ unsigned char *end_output =
18842+ ((unsigned char *)output) + width_times_channels;
18843+
18844+#ifdef STBIR_SIMD
18845+
18846+ if (width_times_channels >= 16) {
18847+ float const *end_encode_m16 = encode + width_times_channels - 16;
18848+ end_output -= 16;
18849+ STBIR_SIMD_NO_UNROLL_LOOP_START_INF_FOR
18850+ for (;;) {
18851+ stbir__simdf f0, f1, f2, f3;
18852+ stbir__simdi i0, i1, i2, i3;
18853+ STBIR_SIMD_NO_UNROLL(encode);
18854+
18855+ stbir__simdf_load4_transposed(f0, f1, f2, f3, encode);
18856+
18857+ stbir__min_max_shift20(i0, f0);
18858+ stbir__min_max_shift20(i1, f1);
18859+ stbir__min_max_shift20(i2, f2);
18860+ stbir__min_max_shift20(i3, f3);
18861+
18862+ stbir__simdi_table_lookup4(i0, i1, i2, i3,
18863+ (fp32_to_srgb8_tab4 - (127 - 13) * 8));
18864+
18865+ stbir__linear_to_srgb_finish(i0, f0);
18866+ stbir__linear_to_srgb_finish(i1, f1);
18867+ stbir__linear_to_srgb_finish(i2, f2);
18868+ stbir__linear_to_srgb_finish(i3, f3);
18869+
18870+ stbir__interleave_pack_and_store_16_u8(
18871+ output, STBIR_strs_join1(i, , stbir__encode_order0),
18872+ STBIR_strs_join1(i, , stbir__encode_order1),
18873+ STBIR_strs_join1(i, , stbir__encode_order2),
18874+ STBIR_strs_join1(i, , stbir__encode_order3));
18875+
18876+ encode += 16;
18877+ output += 16;
18878+ if (output <= end_output) {
18879+ continue;
18880+ }
18881+ if (output == (end_output + 16)) {
18882+ break;
18883+ }
18884+ output = end_output; // backup and do last couple
18885+ encode = end_encode_m16;
18886+ }
18887+ return;
18888+ }
18889+#endif
18890+
18891+// try to do blocks of 4 when you can
18892+#if stbir__coder_min_num != 3 // doesn't divide cleanly by four
18893+ output += 4;
18894+ STBIR_SIMD_NO_UNROLL_LOOP_START
18895+ while (output <= end_output) {
18896+ STBIR_SIMD_NO_UNROLL(encode);
18897+
18898+ output[0 - 4] =
18899+ stbir__linear_to_srgb_uchar(encode[stbir__encode_order0]);
18900+ output[1 - 4] =
18901+ stbir__linear_to_srgb_uchar(encode[stbir__encode_order1]);
18902+ output[2 - 4] =
18903+ stbir__linear_to_srgb_uchar(encode[stbir__encode_order2]);
18904+ output[3 - 4] =
18905+ stbir__linear_to_srgb_uchar(encode[stbir__encode_order3]);
18906+
18907+ output += 4;
18908+ encode += 4;
18909+ }
18910+ output -= 4;
18911+#endif
18912+
18913+// do the remnants
18914+#if stbir__coder_min_num < 4
18915+ STBIR_NO_UNROLL_LOOP_START
18916+ while (output < end_output) {
18917+ STBIR_NO_UNROLL(encode);
18918+ output[0] = stbir__linear_to_srgb_uchar(encode[stbir__encode_order0]);
18919+#if stbir__coder_min_num >= 2
18920+ output[1] = stbir__linear_to_srgb_uchar(encode[stbir__encode_order1]);
18921+#endif
18922+#if stbir__coder_min_num >= 3
18923+ output[2] = stbir__linear_to_srgb_uchar(encode[stbir__encode_order2]);
18924+#endif
18925+ output += stbir__coder_min_num;
18926+ encode += stbir__coder_min_num;
18927+ }
18928+#endif
18929+}
18930+
18931+#if (stbir__coder_min_num == 4) || \
18932+ ((stbir__coder_min_num == 1) && (!defined(stbir__decode_swizzle)))
18933+
18934+static float *
18935+STBIR__CODER_NAME(stbir__decode_uint8_srgb4_linearalpha)(
18936+ float *decodep, int width_times_channels, void const *inputp)
18937+{
18938+ float STBIR_STREAMOUT_PTR(*) decode = decodep;
18939+ float *decode_end = (float *)decode + width_times_channels;
18940+ unsigned char const *input = (unsigned char const *)inputp;
18941+
18942+ do {
18943+ decode[0] =
18944+ stbir__srgb_uchar_to_linear_float[input[stbir__decode_order0]];
18945+ decode[1] =
18946+ stbir__srgb_uchar_to_linear_float[input[stbir__decode_order1]];
18947+ decode[2] =
18948+ stbir__srgb_uchar_to_linear_float[input[stbir__decode_order2]];
18949+ decode[3] = ((float)input[stbir__decode_order3]) *
18950+ stbir__max_uint8_as_float_inverted;
18951+ input += 4;
18952+ decode += 4;
18953+ } while (decode < decode_end);
18954+ return decode_end;
18955+}
18956+
18957+static void
18958+STBIR__CODER_NAME(stbir__encode_uint8_srgb4_linearalpha)(
18959+ void *outputp, int width_times_channels, float const *encode)
18960+{
18961+ unsigned char STBIR_SIMD_STREAMOUT_PTR(*) output = (unsigned char *)outputp;
18962+ unsigned char *end_output =
18963+ ((unsigned char *)output) + width_times_channels;
18964+
18965+#ifdef STBIR_SIMD
18966+
18967+ if (width_times_channels >= 16) {
18968+ float const *end_encode_m16 = encode + width_times_channels - 16;
18969+ end_output -= 16;
18970+ STBIR_SIMD_NO_UNROLL_LOOP_START_INF_FOR
18971+ for (;;) {
18972+ stbir__simdf f0, f1, f2, f3;
18973+ stbir__simdi i0, i1, i2, i3;
18974+
18975+ STBIR_SIMD_NO_UNROLL(encode);
18976+ stbir__simdf_load4_transposed(f0, f1, f2, f3, encode);
18977+
18978+ stbir__min_max_shift20(i0, f0);
18979+ stbir__min_max_shift20(i1, f1);
18980+ stbir__min_max_shift20(i2, f2);
18981+ stbir__scale_and_convert(i3, f3);
18982+
18983+ stbir__simdi_table_lookup3(i0, i1, i2,
18984+ (fp32_to_srgb8_tab4 - (127 - 13) * 8));
18985+
18986+ stbir__linear_to_srgb_finish(i0, f0);
18987+ stbir__linear_to_srgb_finish(i1, f1);
18988+ stbir__linear_to_srgb_finish(i2, f2);
18989+
18990+ stbir__interleave_pack_and_store_16_u8(
18991+ output, STBIR_strs_join1(i, , stbir__encode_order0),
18992+ STBIR_strs_join1(i, , stbir__encode_order1),
18993+ STBIR_strs_join1(i, , stbir__encode_order2),
18994+ STBIR_strs_join1(i, , stbir__encode_order3));
18995+
18996+ output += 16;
18997+ encode += 16;
18998+
18999+ if (output <= end_output) {
19000+ continue;
19001+ }
19002+ if (output == (end_output + 16)) {
19003+ break;
19004+ }
19005+ output = end_output; // backup and do last couple
19006+ encode = end_encode_m16;
19007+ }
19008+ return;
19009+ }
19010+#endif
19011+
19012+ STBIR_SIMD_NO_UNROLL_LOOP_START
19013+ do {
19014+ float f;
19015+ STBIR_SIMD_NO_UNROLL(encode);
19016+
19017+ output[stbir__decode_order0] = stbir__linear_to_srgb_uchar(encode[0]);
19018+ output[stbir__decode_order1] = stbir__linear_to_srgb_uchar(encode[1]);
19019+ output[stbir__decode_order2] = stbir__linear_to_srgb_uchar(encode[2]);
19020+
19021+ f = encode[3] * stbir__max_uint8_as_float + 0.5f;
19022+ STBIR_CLAMP(f, 0, 255);
19023+ output[stbir__decode_order3] = (unsigned char)f;
19024+
19025+ output += 4;
19026+ encode += 4;
19027+ } while (output < end_output);
19028+}
19029+
19030+#endif
19031+
19032+#if (stbir__coder_min_num == 2) || \
19033+ ((stbir__coder_min_num == 1) && (!defined(stbir__decode_swizzle)))
19034+
19035+static float *
19036+STBIR__CODER_NAME(stbir__decode_uint8_srgb2_linearalpha)(
19037+ float *decodep, int width_times_channels, void const *inputp)
19038+{
19039+ float STBIR_STREAMOUT_PTR(*) decode = decodep;
19040+ float *decode_end = (float *)decode + width_times_channels;
19041+ unsigned char const *input = (unsigned char const *)inputp;
19042+
19043+ decode += 4;
19044+ while (decode <= decode_end) {
19045+ decode[0 - 4] =
19046+ stbir__srgb_uchar_to_linear_float[input[stbir__decode_order0]];
19047+ decode[1 - 4] = ((float)input[stbir__decode_order1]) *
19048+ stbir__max_uint8_as_float_inverted;
19049+ decode[2 - 4] =
19050+ stbir__srgb_uchar_to_linear_float[input[stbir__decode_order0 + 2]];
19051+ decode[3 - 4] = ((float)input[stbir__decode_order1 + 2]) *
19052+ stbir__max_uint8_as_float_inverted;
19053+ input += 4;
19054+ decode += 4;
19055+ }
19056+ decode -= 4;
19057+ if (decode < decode_end) {
19058+ decode[0] = stbir__srgb_uchar_to_linear_float[stbir__decode_order0];
19059+ decode[1] = ((float)input[stbir__decode_order1]) *
19060+ stbir__max_uint8_as_float_inverted;
19061+ }
19062+ return decode_end;
19063+}
19064+
19065+static void
19066+STBIR__CODER_NAME(stbir__encode_uint8_srgb2_linearalpha)(
19067+ void *outputp, int width_times_channels, float const *encode)
19068+{
19069+ unsigned char STBIR_SIMD_STREAMOUT_PTR(*) output = (unsigned char *)outputp;
19070+ unsigned char *end_output =
19071+ ((unsigned char *)output) + width_times_channels;
19072+
19073+#ifdef STBIR_SIMD
19074+
19075+ if (width_times_channels >= 16) {
19076+ float const *end_encode_m16 = encode + width_times_channels - 16;
19077+ end_output -= 16;
19078+ STBIR_SIMD_NO_UNROLL_LOOP_START_INF_FOR
19079+ for (;;) {
19080+ stbir__simdf f0, f1, f2, f3;
19081+ stbir__simdi i0, i1, i2, i3;
19082+
19083+ STBIR_SIMD_NO_UNROLL(encode);
19084+ stbir__simdf_load4_transposed(f0, f1, f2, f3, encode);
19085+
19086+ stbir__min_max_shift20(i0, f0);
19087+ stbir__scale_and_convert(i1, f1);
19088+ stbir__min_max_shift20(i2, f2);
19089+ stbir__scale_and_convert(i3, f3);
19090+
19091+ stbir__simdi_table_lookup2(i0, i2,
19092+ (fp32_to_srgb8_tab4 - (127 - 13) * 8));
19093+
19094+ stbir__linear_to_srgb_finish(i0, f0);
19095+ stbir__linear_to_srgb_finish(i2, f2);
19096+
19097+ stbir__interleave_pack_and_store_16_u8(
19098+ output, STBIR_strs_join1(i, , stbir__encode_order0),
19099+ STBIR_strs_join1(i, , stbir__encode_order1),
19100+ STBIR_strs_join1(i, , stbir__encode_order2),
19101+ STBIR_strs_join1(i, , stbir__encode_order3));
19102+
19103+ output += 16;
19104+ encode += 16;
19105+ if (output <= end_output) {
19106+ continue;
19107+ }
19108+ if (output == (end_output + 16)) {
19109+ break;
19110+ }
19111+ output = end_output; // backup and do last couple
19112+ encode = end_encode_m16;
19113+ }
19114+ return;
19115+ }
19116+#endif
19117+
19118+ STBIR_SIMD_NO_UNROLL_LOOP_START
19119+ do {
19120+ float f;
19121+ STBIR_SIMD_NO_UNROLL(encode);
19122+
19123+ output[stbir__decode_order0] = stbir__linear_to_srgb_uchar(encode[0]);
19124+
19125+ f = encode[1] * stbir__max_uint8_as_float + 0.5f;
19126+ STBIR_CLAMP(f, 0, 255);
19127+ output[stbir__decode_order1] = (unsigned char)f;
19128+
19129+ output += 2;
19130+ encode += 2;
19131+ } while (output < end_output);
19132+}
19133+
19134+#endif
19135+
19136+static float *
19137+STBIR__CODER_NAME(stbir__decode_uint16_linear_scaled)(float *decodep,
19138+ int width_times_channels,
19139+ void const *inputp)
19140+{
19141+ float STBIR_STREAMOUT_PTR(*) decode = decodep;
19142+ float *decode_end = (float *)decode + width_times_channels;
19143+ unsigned short const *input = (unsigned short const *)inputp;
19144+
19145+#ifdef STBIR_SIMD
19146+ unsigned short const *end_input_m8 = input + width_times_channels - 8;
19147+ if (width_times_channels >= 8) {
19148+ decode_end -= 8;
19149+ STBIR_NO_UNROLL_LOOP_START_INF_FOR
19150+ for (;;) {
19151+#ifdef STBIR_SIMD8
19152+ stbir__simdi i;
19153+ stbir__simdi8 o;
19154+ stbir__simdf8 of;
19155+ STBIR_NO_UNROLL(decode);
19156+ stbir__simdi_load(i, input);
19157+ stbir__simdi8_expand_u16_to_u32(o, i);
19158+ stbir__simdi8_convert_i32_to_float(of, o);
19159+ stbir__simdf8_mult(of, of, STBIR_max_uint16_as_float_inverted8);
19160+ stbir__decode_simdf8_flip(of);
19161+ stbir__simdf8_store(decode + 0, of);
19162+#else
19163+ stbir__simdi i, o0, o1;
19164+ stbir__simdf of0, of1;
19165+ STBIR_NO_UNROLL(decode);
19166+ stbir__simdi_load(i, input);
19167+ stbir__simdi_expand_u16_to_u32(o0, o1, i);
19168+ stbir__simdi_convert_i32_to_float(of0, o0);
19169+ stbir__simdi_convert_i32_to_float(of1, o1);
19170+ stbir__simdf_mult(
19171+ of0, of0, STBIR__CONSTF(STBIR_max_uint16_as_float_inverted));
19172+ stbir__simdf_mult(
19173+ of1, of1, STBIR__CONSTF(STBIR_max_uint16_as_float_inverted));
19174+ stbir__decode_simdf4_flip(of0);
19175+ stbir__decode_simdf4_flip(of1);
19176+ stbir__simdf_store(decode + 0, of0);
19177+ stbir__simdf_store(decode + 4, of1);
19178+#endif
19179+ decode += 8;
19180+ input += 8;
19181+ if (decode <= decode_end) {
19182+ continue;
19183+ }
19184+ if (decode == (decode_end + 8)) {
19185+ break;
19186+ }
19187+ decode = decode_end; // backup and do last couple
19188+ input = end_input_m8;
19189+ }
19190+ return decode_end + 8;
19191+ }
19192+#endif
19193+
19194+// try to do blocks of 4 when you can
19195+#if stbir__coder_min_num != 3 // doesn't divide cleanly by four
19196+ decode += 4;
19197+ STBIR_SIMD_NO_UNROLL_LOOP_START
19198+ while (decode <= decode_end) {
19199+ STBIR_SIMD_NO_UNROLL(decode);
19200+ decode[0 - 4] = ((float)(input[stbir__decode_order0])) *
19201+ stbir__max_uint16_as_float_inverted;
19202+ decode[1 - 4] = ((float)(input[stbir__decode_order1])) *
19203+ stbir__max_uint16_as_float_inverted;
19204+ decode[2 - 4] = ((float)(input[stbir__decode_order2])) *
19205+ stbir__max_uint16_as_float_inverted;
19206+ decode[3 - 4] = ((float)(input[stbir__decode_order3])) *
19207+ stbir__max_uint16_as_float_inverted;
19208+ decode += 4;
19209+ input += 4;
19210+ }
19211+ decode -= 4;
19212+#endif
19213+
19214+// do the remnants
19215+#if stbir__coder_min_num < 4
19216+ STBIR_NO_UNROLL_LOOP_START
19217+ while (decode < decode_end) {
19218+ STBIR_NO_UNROLL(decode);
19219+ decode[0] = ((float)(input[stbir__decode_order0])) *
19220+ stbir__max_uint16_as_float_inverted;
19221+#if stbir__coder_min_num >= 2
19222+ decode[1] = ((float)(input[stbir__decode_order1])) *
19223+ stbir__max_uint16_as_float_inverted;
19224+#endif
19225+#if stbir__coder_min_num >= 3
19226+ decode[2] = ((float)(input[stbir__decode_order2])) *
19227+ stbir__max_uint16_as_float_inverted;
19228+#endif
19229+ decode += stbir__coder_min_num;
19230+ input += stbir__coder_min_num;
19231+ }
19232+#endif
19233+ return decode_end;
19234+}
19235+
19236+static void
19237+STBIR__CODER_NAME(stbir__encode_uint16_linear_scaled)(void *outputp,
19238+ int width_times_channels,
19239+ float const *encode)
19240+{
19241+ unsigned short STBIR_SIMD_STREAMOUT_PTR(*) output =
19242+ (unsigned short *)outputp;
19243+ unsigned short *end_output =
19244+ ((unsigned short *)output) + width_times_channels;
19245+
19246+#ifdef STBIR_SIMD
19247+ {
19248+ if (width_times_channels >= stbir__simdfX_float_count * 2) {
19249+ float const *end_encode_m8 =
19250+ encode + width_times_channels - stbir__simdfX_float_count * 2;
19251+ end_output -= stbir__simdfX_float_count * 2;
19252+ STBIR_SIMD_NO_UNROLL_LOOP_START_INF_FOR
19253+ for (;;) {
19254+ stbir__simdfX e0, e1;
19255+ stbir__simdiX i;
19256+ STBIR_SIMD_NO_UNROLL(encode);
19257+ stbir__simdfX_madd_mem(e0, STBIR_simd_point5X,
19258+ STBIR_max_uint16_as_floatX, encode);
19259+ stbir__simdfX_madd_mem(e1, STBIR_simd_point5X,
19260+ STBIR_max_uint16_as_floatX,
19261+ encode + stbir__simdfX_float_count);
19262+ stbir__encode_simdfX_unflip(e0);
19263+ stbir__encode_simdfX_unflip(e1);
19264+ stbir__simdfX_pack_to_words(i, e0, e1);
19265+ stbir__simdiX_store(output, i);
19266+ encode += stbir__simdfX_float_count * 2;
19267+ output += stbir__simdfX_float_count * 2;
19268+ if (output <= end_output) {
19269+ continue;
19270+ }
19271+ if (output == (end_output + stbir__simdfX_float_count * 2)) {
19272+ break;
19273+ }
19274+ output = end_output; // backup and do last couple
19275+ encode = end_encode_m8;
19276+ }
19277+ return;
19278+ }
19279+ }
19280+
19281+// try to do blocks of 4 when you can
19282+#if stbir__coder_min_num != 3 // doesn't divide cleanly by four
19283+ output += 4;
19284+ STBIR_NO_UNROLL_LOOP_START
19285+ while (output <= end_output) {
19286+ stbir__simdf e;
19287+ stbir__simdi i;
19288+ STBIR_NO_UNROLL(encode);
19289+ stbir__simdf_load(e, encode);
19290+ stbir__simdf_madd(e, STBIR__CONSTF(STBIR_simd_point5),
19291+ STBIR__CONSTF(STBIR_max_uint16_as_float), e);
19292+ stbir__encode_simdf4_unflip(e);
19293+ stbir__simdf_pack_to_8words(i, e, e); // only use first 4
19294+ stbir__simdi_store2(output - 4, i);
19295+ output += 4;
19296+ encode += 4;
19297+ }
19298+ output -= 4;
19299+#endif
19300+
19301+// do the remnants
19302+#if stbir__coder_min_num < 4
19303+ STBIR_NO_UNROLL_LOOP_START
19304+ while (output < end_output) {
19305+ stbir__simdf e;
19306+ STBIR_NO_UNROLL(encode);
19307+ stbir__simdf_madd1_mem(e, STBIR__CONSTF(STBIR_simd_point5),
19308+ STBIR__CONSTF(STBIR_max_uint16_as_float),
19309+ encode + stbir__encode_order0);
19310+ output[0] = stbir__simdf_convert_float_to_short(e);
19311+#if stbir__coder_min_num >= 2
19312+ stbir__simdf_madd1_mem(e, STBIR__CONSTF(STBIR_simd_point5),
19313+ STBIR__CONSTF(STBIR_max_uint16_as_float),
19314+ encode + stbir__encode_order1);
19315+ output[1] = stbir__simdf_convert_float_to_short(e);
19316+#endif
19317+#if stbir__coder_min_num >= 3
19318+ stbir__simdf_madd1_mem(e, STBIR__CONSTF(STBIR_simd_point5),
19319+ STBIR__CONSTF(STBIR_max_uint16_as_float),
19320+ encode + stbir__encode_order2);
19321+ output[2] = stbir__simdf_convert_float_to_short(e);
19322+#endif
19323+ output += stbir__coder_min_num;
19324+ encode += stbir__coder_min_num;
19325+ }
19326+#endif
19327+
19328+#else
19329+
19330+// try to do blocks of 4 when you can
19331+#if stbir__coder_min_num != 3 // doesn't divide cleanly by four
19332+ output += 4;
19333+ STBIR_SIMD_NO_UNROLL_LOOP_START
19334+ while (output <= end_output) {
19335+ float f;
19336+ STBIR_SIMD_NO_UNROLL(encode);
19337+ f = encode[stbir__encode_order0] * stbir__max_uint16_as_float + 0.5f;
19338+ STBIR_CLAMP(f, 0, 65535);
19339+ output[0 - 4] = (unsigned short)f;
19340+ f = encode[stbir__encode_order1] * stbir__max_uint16_as_float + 0.5f;
19341+ STBIR_CLAMP(f, 0, 65535);
19342+ output[1 - 4] = (unsigned short)f;
19343+ f = encode[stbir__encode_order2] * stbir__max_uint16_as_float + 0.5f;
19344+ STBIR_CLAMP(f, 0, 65535);
19345+ output[2 - 4] = (unsigned short)f;
19346+ f = encode[stbir__encode_order3] * stbir__max_uint16_as_float + 0.5f;
19347+ STBIR_CLAMP(f, 0, 65535);
19348+ output[3 - 4] = (unsigned short)f;
19349+ output += 4;
19350+ encode += 4;
19351+ }
19352+ output -= 4;
19353+#endif
19354+
19355+// do the remnants
19356+#if stbir__coder_min_num < 4
19357+ STBIR_NO_UNROLL_LOOP_START
19358+ while (output < end_output) {
19359+ float f;
19360+ STBIR_NO_UNROLL(encode);
19361+ f = encode[stbir__encode_order0] * stbir__max_uint16_as_float + 0.5f;
19362+ STBIR_CLAMP(f, 0, 65535);
19363+ output[0] = (unsigned short)f;
19364+#if stbir__coder_min_num >= 2
19365+ f = encode[stbir__encode_order1] * stbir__max_uint16_as_float + 0.5f;
19366+ STBIR_CLAMP(f, 0, 65535);
19367+ output[1] = (unsigned short)f;
19368+#endif
19369+#if stbir__coder_min_num >= 3
19370+ f = encode[stbir__encode_order2] * stbir__max_uint16_as_float + 0.5f;
19371+ STBIR_CLAMP(f, 0, 65535);
19372+ output[2] = (unsigned short)f;
19373+#endif
19374+ output += stbir__coder_min_num;
19375+ encode += stbir__coder_min_num;
19376+ }
19377+#endif
19378+#endif
19379+}
19380+
19381+static float *
19382+STBIR__CODER_NAME(stbir__decode_uint16_linear)(float *decodep,
19383+ int width_times_channels,
19384+ void const *inputp)
19385+{
19386+ float STBIR_STREAMOUT_PTR(*) decode = decodep;
19387+ float *decode_end = (float *)decode + width_times_channels;
19388+ unsigned short const *input = (unsigned short const *)inputp;
19389+
19390+#ifdef STBIR_SIMD
19391+ unsigned short const *end_input_m8 = input + width_times_channels - 8;
19392+ if (width_times_channels >= 8) {
19393+ decode_end -= 8;
19394+ STBIR_NO_UNROLL_LOOP_START_INF_FOR
19395+ for (;;) {
19396+#ifdef STBIR_SIMD8
19397+ stbir__simdi i;
19398+ stbir__simdi8 o;
19399+ stbir__simdf8 of;
19400+ STBIR_NO_UNROLL(decode);
19401+ stbir__simdi_load(i, input);
19402+ stbir__simdi8_expand_u16_to_u32(o, i);
19403+ stbir__simdi8_convert_i32_to_float(of, o);
19404+ stbir__decode_simdf8_flip(of);
19405+ stbir__simdf8_store(decode + 0, of);
19406+#else
19407+ stbir__simdi i, o0, o1;
19408+ stbir__simdf of0, of1;
19409+ STBIR_NO_UNROLL(decode);
19410+ stbir__simdi_load(i, input);
19411+ stbir__simdi_expand_u16_to_u32(o0, o1, i);
19412+ stbir__simdi_convert_i32_to_float(of0, o0);
19413+ stbir__simdi_convert_i32_to_float(of1, o1);
19414+ stbir__decode_simdf4_flip(of0);
19415+ stbir__decode_simdf4_flip(of1);
19416+ stbir__simdf_store(decode + 0, of0);
19417+ stbir__simdf_store(decode + 4, of1);
19418+#endif
19419+ decode += 8;
19420+ input += 8;
19421+ if (decode <= decode_end) {
19422+ continue;
19423+ }
19424+ if (decode == (decode_end + 8)) {
19425+ break;
19426+ }
19427+ decode = decode_end; // backup and do last couple
19428+ input = end_input_m8;
19429+ }
19430+ return decode_end + 8;
19431+ }
19432+#endif
19433+
19434+// try to do blocks of 4 when you can
19435+#if stbir__coder_min_num != 3 // doesn't divide cleanly by four
19436+ decode += 4;
19437+ STBIR_SIMD_NO_UNROLL_LOOP_START
19438+ while (decode <= decode_end) {
19439+ STBIR_SIMD_NO_UNROLL(decode);
19440+ decode[0 - 4] = ((float)(input[stbir__decode_order0]));
19441+ decode[1 - 4] = ((float)(input[stbir__decode_order1]));
19442+ decode[2 - 4] = ((float)(input[stbir__decode_order2]));
19443+ decode[3 - 4] = ((float)(input[stbir__decode_order3]));
19444+ decode += 4;
19445+ input += 4;
19446+ }
19447+ decode -= 4;
19448+#endif
19449+
19450+// do the remnants
19451+#if stbir__coder_min_num < 4
19452+ STBIR_NO_UNROLL_LOOP_START
19453+ while (decode < decode_end) {
19454+ STBIR_NO_UNROLL(decode);
19455+ decode[0] = ((float)(input[stbir__decode_order0]));
19456+#if stbir__coder_min_num >= 2
19457+ decode[1] = ((float)(input[stbir__decode_order1]));
19458+#endif
19459+#if stbir__coder_min_num >= 3
19460+ decode[2] = ((float)(input[stbir__decode_order2]));
19461+#endif
19462+ decode += stbir__coder_min_num;
19463+ input += stbir__coder_min_num;
19464+ }
19465+#endif
19466+ return decode_end;
19467+}
19468+
19469+static void
19470+STBIR__CODER_NAME(stbir__encode_uint16_linear)(void *outputp,
19471+ int width_times_channels,
19472+ float const *encode)
19473+{
19474+ unsigned short STBIR_SIMD_STREAMOUT_PTR(*) output =
19475+ (unsigned short *)outputp;
19476+ unsigned short *end_output =
19477+ ((unsigned short *)output) + width_times_channels;
19478+
19479+#ifdef STBIR_SIMD
19480+ {
19481+ if (width_times_channels >= stbir__simdfX_float_count * 2) {
19482+ float const *end_encode_m8 =
19483+ encode + width_times_channels - stbir__simdfX_float_count * 2;
19484+ end_output -= stbir__simdfX_float_count * 2;
19485+ STBIR_SIMD_NO_UNROLL_LOOP_START_INF_FOR
19486+ for (;;) {
19487+ stbir__simdfX e0, e1;
19488+ stbir__simdiX i;
19489+ STBIR_SIMD_NO_UNROLL(encode);
19490+ stbir__simdfX_add_mem(e0, STBIR_simd_point5X, encode);
19491+ stbir__simdfX_add_mem(e1, STBIR_simd_point5X,
19492+ encode + stbir__simdfX_float_count);
19493+ stbir__encode_simdfX_unflip(e0);
19494+ stbir__encode_simdfX_unflip(e1);
19495+ stbir__simdfX_pack_to_words(i, e0, e1);
19496+ stbir__simdiX_store(output, i);
19497+ encode += stbir__simdfX_float_count * 2;
19498+ output += stbir__simdfX_float_count * 2;
19499+ if (output <= end_output) {
19500+ continue;
19501+ }
19502+ if (output == (end_output + stbir__simdfX_float_count * 2)) {
19503+ break;
19504+ }
19505+ output = end_output; // backup and do last couple
19506+ encode = end_encode_m8;
19507+ }
19508+ return;
19509+ }
19510+ }
19511+
19512+// try to do blocks of 4 when you can
19513+#if stbir__coder_min_num != 3 // doesn't divide cleanly by four
19514+ output += 4;
19515+ STBIR_NO_UNROLL_LOOP_START
19516+ while (output <= end_output) {
19517+ stbir__simdf e;
19518+ stbir__simdi i;
19519+ STBIR_NO_UNROLL(encode);
19520+ stbir__simdf_load(e, encode);
19521+ stbir__simdf_add(e, STBIR__CONSTF(STBIR_simd_point5), e);
19522+ stbir__encode_simdf4_unflip(e);
19523+ stbir__simdf_pack_to_8words(i, e, e); // only use first 4
19524+ stbir__simdi_store2(output - 4, i);
19525+ output += 4;
19526+ encode += 4;
19527+ }
19528+ output -= 4;
19529+#endif
19530+
19531+#else
19532+
19533+// try to do blocks of 4 when you can
19534+#if stbir__coder_min_num != 3 // doesn't divide cleanly by four
19535+ output += 4;
19536+ STBIR_SIMD_NO_UNROLL_LOOP_START
19537+ while (output <= end_output) {
19538+ float f;
19539+ STBIR_SIMD_NO_UNROLL(encode);
19540+ f = encode[stbir__encode_order0] + 0.5f;
19541+ STBIR_CLAMP(f, 0, 65535);
19542+ output[0 - 4] = (unsigned short)f;
19543+ f = encode[stbir__encode_order1] + 0.5f;
19544+ STBIR_CLAMP(f, 0, 65535);
19545+ output[1 - 4] = (unsigned short)f;
19546+ f = encode[stbir__encode_order2] + 0.5f;
19547+ STBIR_CLAMP(f, 0, 65535);
19548+ output[2 - 4] = (unsigned short)f;
19549+ f = encode[stbir__encode_order3] + 0.5f;
19550+ STBIR_CLAMP(f, 0, 65535);
19551+ output[3 - 4] = (unsigned short)f;
19552+ output += 4;
19553+ encode += 4;
19554+ }
19555+ output -= 4;
19556+#endif
19557+
19558+#endif
19559+
19560+// do the remnants
19561+#if stbir__coder_min_num < 4
19562+ STBIR_NO_UNROLL_LOOP_START
19563+ while (output < end_output) {
19564+ float f;
19565+ STBIR_NO_UNROLL(encode);
19566+ f = encode[stbir__encode_order0] + 0.5f;
19567+ STBIR_CLAMP(f, 0, 65535);
19568+ output[0] = (unsigned short)f;
19569+#if stbir__coder_min_num >= 2
19570+ f = encode[stbir__encode_order1] + 0.5f;
19571+ STBIR_CLAMP(f, 0, 65535);
19572+ output[1] = (unsigned short)f;
19573+#endif
19574+#if stbir__coder_min_num >= 3
19575+ f = encode[stbir__encode_order2] + 0.5f;
19576+ STBIR_CLAMP(f, 0, 65535);
19577+ output[2] = (unsigned short)f;
19578+#endif
19579+ output += stbir__coder_min_num;
19580+ encode += stbir__coder_min_num;
19581+ }
19582+#endif
19583+}
19584+
19585+static float *
19586+STBIR__CODER_NAME(stbir__decode_half_float_linear)(float *decodep,
19587+ int width_times_channels,
19588+ void const *inputp)
19589+{
19590+ float STBIR_STREAMOUT_PTR(*) decode = decodep;
19591+ float *decode_end = (float *)decode + width_times_channels;
19592+ stbir__FP16 const *input = (stbir__FP16 const *)inputp;
19593+
19594+#ifdef STBIR_SIMD
19595+ if (width_times_channels >= 8) {
19596+ stbir__FP16 const *end_input_m8 = input + width_times_channels - 8;
19597+ decode_end -= 8;
19598+ STBIR_NO_UNROLL_LOOP_START_INF_FOR
19599+ for (;;) {
19600+ STBIR_NO_UNROLL(decode);
19601+
19602+ stbir__half_to_float_SIMD(decode, input);
19603+#ifdef stbir__decode_swizzle
19604+#ifdef STBIR_SIMD8
19605+ {
19606+ stbir__simdf8 of;
19607+ stbir__simdf8_load(of, decode);
19608+ stbir__decode_simdf8_flip(of);
19609+ stbir__simdf8_store(decode, of);
19610+ }
19611+#else
19612+ {
19613+ stbir__simdf of0, of1;
19614+ stbir__simdf_load(of0, decode);
19615+ stbir__simdf_load(of1, decode + 4);
19616+ stbir__decode_simdf4_flip(of0);
19617+ stbir__decode_simdf4_flip(of1);
19618+ stbir__simdf_store(decode, of0);
19619+ stbir__simdf_store(decode + 4, of1);
19620+ }
19621+#endif
19622+#endif
19623+ decode += 8;
19624+ input += 8;
19625+ if (decode <= decode_end) {
19626+ continue;
19627+ }
19628+ if (decode == (decode_end + 8)) {
19629+ break;
19630+ }
19631+ decode = decode_end; // backup and do last couple
19632+ input = end_input_m8;
19633+ }
19634+ return decode_end + 8;
19635+ }
19636+#endif
19637+
19638+// try to do blocks of 4 when you can
19639+#if stbir__coder_min_num != 3 // doesn't divide cleanly by four
19640+ decode += 4;
19641+ STBIR_SIMD_NO_UNROLL_LOOP_START
19642+ while (decode <= decode_end) {
19643+ STBIR_SIMD_NO_UNROLL(decode);
19644+ decode[0 - 4] = stbir__half_to_float(input[stbir__decode_order0]);
19645+ decode[1 - 4] = stbir__half_to_float(input[stbir__decode_order1]);
19646+ decode[2 - 4] = stbir__half_to_float(input[stbir__decode_order2]);
19647+ decode[3 - 4] = stbir__half_to_float(input[stbir__decode_order3]);
19648+ decode += 4;
19649+ input += 4;
19650+ }
19651+ decode -= 4;
19652+#endif
19653+
19654+// do the remnants
19655+#if stbir__coder_min_num < 4
19656+ STBIR_NO_UNROLL_LOOP_START
19657+ while (decode < decode_end) {
19658+ STBIR_NO_UNROLL(decode);
19659+ decode[0] = stbir__half_to_float(input[stbir__decode_order0]);
19660+#if stbir__coder_min_num >= 2
19661+ decode[1] = stbir__half_to_float(input[stbir__decode_order1]);
19662+#endif
19663+#if stbir__coder_min_num >= 3
19664+ decode[2] = stbir__half_to_float(input[stbir__decode_order2]);
19665+#endif
19666+ decode += stbir__coder_min_num;
19667+ input += stbir__coder_min_num;
19668+ }
19669+#endif
19670+ return decode_end;
19671+}
19672+
19673+static void
19674+STBIR__CODER_NAME(stbir__encode_half_float_linear)(void *outputp,
19675+ int width_times_channels,
19676+ float const *encode)
19677+{
19678+ stbir__FP16 STBIR_SIMD_STREAMOUT_PTR(*) output = (stbir__FP16 *)outputp;
19679+ stbir__FP16 *end_output = ((stbir__FP16 *)output) + width_times_channels;
19680+
19681+#ifdef STBIR_SIMD
19682+ if (width_times_channels >= 8) {
19683+ float const *end_encode_m8 = encode + width_times_channels - 8;
19684+ end_output -= 8;
19685+ STBIR_SIMD_NO_UNROLL_LOOP_START_INF_FOR
19686+ for (;;) {
19687+ STBIR_SIMD_NO_UNROLL(encode);
19688+#ifdef stbir__decode_swizzle
19689+#ifdef STBIR_SIMD8
19690+ {
19691+ stbir__simdf8 of;
19692+ stbir__simdf8_load(of, encode);
19693+ stbir__encode_simdf8_unflip(of);
19694+ stbir__float_to_half_SIMD(output, (float *)&of);
19695+ }
19696+#else
19697+ {
19698+ stbir__simdf of[2];
19699+ stbir__simdf_load(of[0], encode);
19700+ stbir__simdf_load(of[1], encode + 4);
19701+ stbir__encode_simdf4_unflip(of[0]);
19702+ stbir__encode_simdf4_unflip(of[1]);
19703+ stbir__float_to_half_SIMD(output, (float *)of);
19704+ }
19705+#endif
19706+#else
19707+ stbir__float_to_half_SIMD(output, encode);
19708+#endif
19709+ encode += 8;
19710+ output += 8;
19711+ if (output <= end_output) {
19712+ continue;
19713+ }
19714+ if (output == (end_output + 8)) {
19715+ break;
19716+ }
19717+ output = end_output; // backup and do last couple
19718+ encode = end_encode_m8;
19719+ }
19720+ return;
19721+ }
19722+#endif
19723+
19724+// try to do blocks of 4 when you can
19725+#if stbir__coder_min_num != 3 // doesn't divide cleanly by four
19726+ output += 4;
19727+ STBIR_SIMD_NO_UNROLL_LOOP_START
19728+ while (output <= end_output) {
19729+ STBIR_SIMD_NO_UNROLL(output);
19730+ output[0 - 4] = stbir__float_to_half(encode[stbir__encode_order0]);
19731+ output[1 - 4] = stbir__float_to_half(encode[stbir__encode_order1]);
19732+ output[2 - 4] = stbir__float_to_half(encode[stbir__encode_order2]);
19733+ output[3 - 4] = stbir__float_to_half(encode[stbir__encode_order3]);
19734+ output += 4;
19735+ encode += 4;
19736+ }
19737+ output -= 4;
19738+#endif
19739+
19740+// do the remnants
19741+#if stbir__coder_min_num < 4
19742+ STBIR_NO_UNROLL_LOOP_START
19743+ while (output < end_output) {
19744+ STBIR_NO_UNROLL(output);
19745+ output[0] = stbir__float_to_half(encode[stbir__encode_order0]);
19746+#if stbir__coder_min_num >= 2
19747+ output[1] = stbir__float_to_half(encode[stbir__encode_order1]);
19748+#endif
19749+#if stbir__coder_min_num >= 3
19750+ output[2] = stbir__float_to_half(encode[stbir__encode_order2]);
19751+#endif
19752+ output += stbir__coder_min_num;
19753+ encode += stbir__coder_min_num;
19754+ }
19755+#endif
19756+}
19757+
19758+static float *
19759+STBIR__CODER_NAME(stbir__decode_float_linear)(float *decodep,
19760+ int width_times_channels,
19761+ void const *inputp)
19762+{
19763+#ifdef stbir__decode_swizzle
19764+ float STBIR_STREAMOUT_PTR(*) decode = decodep;
19765+ float *decode_end = (float *)decode + width_times_channels;
19766+ float const *input = (float const *)inputp;
19767+
19768+#ifdef STBIR_SIMD
19769+ if (width_times_channels >= 16) {
19770+ float const *end_input_m16 = input + width_times_channels - 16;
19771+ decode_end -= 16;
19772+ STBIR_NO_UNROLL_LOOP_START_INF_FOR
19773+ for (;;) {
19774+ STBIR_NO_UNROLL(decode);
19775+#ifdef stbir__decode_swizzle
19776+#ifdef STBIR_SIMD8
19777+ {
19778+ stbir__simdf8 of0, of1;
19779+ stbir__simdf8_load(of0, input);
19780+ stbir__simdf8_load(of1, input + 8);
19781+ stbir__decode_simdf8_flip(of0);
19782+ stbir__decode_simdf8_flip(of1);
19783+ stbir__simdf8_store(decode, of0);
19784+ stbir__simdf8_store(decode + 8, of1);
19785+ }
19786+#else
19787+ {
19788+ stbir__simdf of0, of1, of2, of3;
19789+ stbir__simdf_load(of0, input);
19790+ stbir__simdf_load(of1, input + 4);
19791+ stbir__simdf_load(of2, input + 8);
19792+ stbir__simdf_load(of3, input + 12);
19793+ stbir__decode_simdf4_flip(of0);
19794+ stbir__decode_simdf4_flip(of1);
19795+ stbir__decode_simdf4_flip(of2);
19796+ stbir__decode_simdf4_flip(of3);
19797+ stbir__simdf_store(decode, of0);
19798+ stbir__simdf_store(decode + 4, of1);
19799+ stbir__simdf_store(decode + 8, of2);
19800+ stbir__simdf_store(decode + 12, of3);
19801+ }
19802+#endif
19803+#endif
19804+ decode += 16;
19805+ input += 16;
19806+ if (decode <= decode_end) {
19807+ continue;
19808+ }
19809+ if (decode == (decode_end + 16)) {
19810+ break;
19811+ }
19812+ decode = decode_end; // backup and do last couple
19813+ input = end_input_m16;
19814+ }
19815+ return decode_end + 16;
19816+ }
19817+#endif
19818+
19819+// try to do blocks of 4 when you can
19820+#if stbir__coder_min_num != 3 // doesn't divide cleanly by four
19821+ decode += 4;
19822+ STBIR_SIMD_NO_UNROLL_LOOP_START
19823+ while (decode <= decode_end) {
19824+ STBIR_SIMD_NO_UNROLL(decode);
19825+ decode[0 - 4] = input[stbir__decode_order0];
19826+ decode[1 - 4] = input[stbir__decode_order1];
19827+ decode[2 - 4] = input[stbir__decode_order2];
19828+ decode[3 - 4] = input[stbir__decode_order3];
19829+ decode += 4;
19830+ input += 4;
19831+ }
19832+ decode -= 4;
19833+#endif
19834+
19835+// do the remnants
19836+#if stbir__coder_min_num < 4
19837+ STBIR_NO_UNROLL_LOOP_START
19838+ while (decode < decode_end) {
19839+ STBIR_NO_UNROLL(decode);
19840+ decode[0] = input[stbir__decode_order0];
19841+#if stbir__coder_min_num >= 2
19842+ decode[1] = input[stbir__decode_order1];
19843+#endif
19844+#if stbir__coder_min_num >= 3
19845+ decode[2] = input[stbir__decode_order2];
19846+#endif
19847+ decode += stbir__coder_min_num;
19848+ input += stbir__coder_min_num;
19849+ }
19850+#endif
19851+ return decode_end;
19852+
19853+#else
19854+
19855+ if ((void *)decodep != inputp) {
19856+ STBIR_MEMCPY(decodep, inputp, width_times_channels * sizeof(float));
19857+ }
19858+
19859+ return decodep + width_times_channels;
19860+
19861+#endif
19862+}
19863+
19864+static void
19865+STBIR__CODER_NAME(stbir__encode_float_linear)(void *outputp,
19866+ int width_times_channels,
19867+ float const *encode)
19868+{
19869+#if !defined(STBIR_FLOAT_HIGH_CLAMP) && !defined(STBIR_FLOAT_LO_CLAMP) && \
19870+ !defined(stbir__decode_swizzle)
19871+
19872+ if ((void *)outputp != (void *)encode) {
19873+ STBIR_MEMCPY(outputp, encode, width_times_channels * sizeof(float));
19874+ }
19875+
19876+#else
19877+
19878+ float STBIR_SIMD_STREAMOUT_PTR(*) output = (float *)outputp;
19879+ float *end_output = ((float *)output) + width_times_channels;
19880+
19881+#ifdef STBIR_FLOAT_HIGH_CLAMP
19882+#define stbir_scalar_hi_clamp(v) \
19883+ if (v > STBIR_FLOAT_HIGH_CLAMP) \
19884+ v = STBIR_FLOAT_HIGH_CLAMP;
19885+#else
19886+#define stbir_scalar_hi_clamp(v)
19887+#endif
19888+#ifdef STBIR_FLOAT_LOW_CLAMP
19889+#define stbir_scalar_lo_clamp(v) \
19890+ if (v < STBIR_FLOAT_LOW_CLAMP) \
19891+ v = STBIR_FLOAT_LOW_CLAMP;
19892+#else
19893+#define stbir_scalar_lo_clamp(v)
19894+#endif
19895+
19896+#ifdef STBIR_SIMD
19897+
19898+#ifdef STBIR_FLOAT_HIGH_CLAMP
19899+ const stbir__simdfX high_clamp = stbir__simdf_frepX(STBIR_FLOAT_HIGH_CLAMP);
19900+#endif
19901+#ifdef STBIR_FLOAT_LOW_CLAMP
19902+ const stbir__simdfX low_clamp = stbir__simdf_frepX(STBIR_FLOAT_LOW_CLAMP);
19903+#endif
19904+
19905+ if (width_times_channels >= (stbir__simdfX_float_count * 2)) {
19906+ float const *end_encode_m8 =
19907+ encode + width_times_channels - (stbir__simdfX_float_count * 2);
19908+ end_output -= (stbir__simdfX_float_count * 2);
19909+ STBIR_SIMD_NO_UNROLL_LOOP_START_INF_FOR
19910+ for (;;) {
19911+ stbir__simdfX e0, e1;
19912+ STBIR_SIMD_NO_UNROLL(encode);
19913+ stbir__simdfX_load(e0, encode);
19914+ stbir__simdfX_load(e1, encode + stbir__simdfX_float_count);
19915+#ifdef STBIR_FLOAT_HIGH_CLAMP
19916+ stbir__simdfX_min(e0, e0, high_clamp);
19917+ stbir__simdfX_min(e1, e1, high_clamp);
19918+#endif
19919+#ifdef STBIR_FLOAT_LOW_CLAMP
19920+ stbir__simdfX_max(e0, e0, low_clamp);
19921+ stbir__simdfX_max(e1, e1, low_clamp);
19922+#endif
19923+ stbir__encode_simdfX_unflip(e0);
19924+ stbir__encode_simdfX_unflip(e1);
19925+ stbir__simdfX_store(output, e0);
19926+ stbir__simdfX_store(output + stbir__simdfX_float_count, e1);
19927+ encode += stbir__simdfX_float_count * 2;
19928+ output += stbir__simdfX_float_count * 2;
19929+ if (output < end_output) {
19930+ continue;
19931+ }
19932+ if (output == (end_output + (stbir__simdfX_float_count * 2))) {
19933+ break;
19934+ }
19935+ output = end_output; // backup and do last couple
19936+ encode = end_encode_m8;
19937+ }
19938+ return;
19939+ }
19940+
19941+// try to do blocks of 4 when you can
19942+#if stbir__coder_min_num != 3 // doesn't divide cleanly by four
19943+ output += 4;
19944+ STBIR_NO_UNROLL_LOOP_START
19945+ while (output <= end_output) {
19946+ stbir__simdf e0;
19947+ STBIR_NO_UNROLL(encode);
19948+ stbir__simdf_load(e0, encode);
19949+#ifdef STBIR_FLOAT_HIGH_CLAMP
19950+ stbir__simdf_min(e0, e0, high_clamp);
19951+#endif
19952+#ifdef STBIR_FLOAT_LOW_CLAMP
19953+ stbir__simdf_max(e0, e0, low_clamp);
19954+#endif
19955+ stbir__encode_simdf4_unflip(e0);
19956+ stbir__simdf_store(output - 4, e0);
19957+ output += 4;
19958+ encode += 4;
19959+ }
19960+ output -= 4;
19961+#endif
19962+
19963+#else
19964+
19965+// try to do blocks of 4 when you can
19966+#if stbir__coder_min_num != 3 // doesn't divide cleanly by four
19967+ output += 4;
19968+ STBIR_SIMD_NO_UNROLL_LOOP_START
19969+ while (output <= end_output) {
19970+ float e;
19971+ STBIR_SIMD_NO_UNROLL(encode);
19972+ e = encode[stbir__encode_order0];
19973+ stbir_scalar_hi_clamp(e);
19974+ stbir_scalar_lo_clamp(e);
19975+ output[0 - 4] = e;
19976+ e = encode[stbir__encode_order1];
19977+ stbir_scalar_hi_clamp(e);
19978+ stbir_scalar_lo_clamp(e);
19979+ output[1 - 4] = e;
19980+ e = encode[stbir__encode_order2];
19981+ stbir_scalar_hi_clamp(e);
19982+ stbir_scalar_lo_clamp(e);
19983+ output[2 - 4] = e;
19984+ e = encode[stbir__encode_order3];
19985+ stbir_scalar_hi_clamp(e);
19986+ stbir_scalar_lo_clamp(e);
19987+ output[3 - 4] = e;
19988+ output += 4;
19989+ encode += 4;
19990+ }
19991+ output -= 4;
19992+
19993+#endif
19994+
19995+#endif
19996+
19997+// do the remnants
19998+#if stbir__coder_min_num < 4
19999+ STBIR_NO_UNROLL_LOOP_START
20000+ while (output < end_output) {
20001+ float e;
20002+ STBIR_NO_UNROLL(encode);
20003+ e = encode[stbir__encode_order0];
20004+ stbir_scalar_hi_clamp(e);
20005+ stbir_scalar_lo_clamp(e);
20006+ output[0] = e;
20007+#if stbir__coder_min_num >= 2
20008+ e = encode[stbir__encode_order1];
20009+ stbir_scalar_hi_clamp(e);
20010+ stbir_scalar_lo_clamp(e);
20011+ output[1] = e;
20012+#endif
20013+#if stbir__coder_min_num >= 3
20014+ e = encode[stbir__encode_order2];
20015+ stbir_scalar_hi_clamp(e);
20016+ stbir_scalar_lo_clamp(e);
20017+ output[2] = e;
20018+#endif
20019+ output += stbir__coder_min_num;
20020+ encode += stbir__coder_min_num;
20021+ }
20022+#endif
20023+
20024+#endif
20025+}
20026
20027 #undef stbir__decode_suffix
20028 #undef stbir__decode_simdf8_flip
20029@@ -9862,373 +11729,989 @@ static void STBIR__CODER_NAME( stbir__encode_float_linear )( void * outputp, int
20030 #undef stbir_scalar_lo_clamp
20031 #undef STB_IMAGE_RESIZE_DO_CODERS
20032
20033-#elif defined( STB_IMAGE_RESIZE_DO_VERTICALS)
20034+#elif defined(STB_IMAGE_RESIZE_DO_VERTICALS)
20035
20036 #ifdef STB_IMAGE_RESIZE_VERTICAL_CONTINUE
20037-#define STBIR_chans( start, end ) STBIR_strs_join14(start,STBIR__vertical_channels,end,_cont)
20038+#define STBIR_chans(start, end) \
20039+ STBIR_strs_join14(start, STBIR__vertical_channels, end, _cont)
20040 #else
20041-#define STBIR_chans( start, end ) STBIR_strs_join1(start,STBIR__vertical_channels,end)
20042+#define STBIR_chans(start, end) \
20043+ STBIR_strs_join1(start, STBIR__vertical_channels, end)
20044 #endif
20045
20046 #if STBIR__vertical_channels >= 1
20047-#define stbIF0( code ) code
20048+#define stbIF0(code) code
20049 #else
20050-#define stbIF0( code )
20051+#define stbIF0(code)
20052 #endif
20053 #if STBIR__vertical_channels >= 2
20054-#define stbIF1( code ) code
20055+#define stbIF1(code) code
20056 #else
20057-#define stbIF1( code )
20058+#define stbIF1(code)
20059 #endif
20060 #if STBIR__vertical_channels >= 3
20061-#define stbIF2( code ) code
20062+#define stbIF2(code) code
20063 #else
20064-#define stbIF2( code )
20065+#define stbIF2(code)
20066 #endif
20067 #if STBIR__vertical_channels >= 4
20068-#define stbIF3( code ) code
20069+#define stbIF3(code) code
20070 #else
20071-#define stbIF3( code )
20072+#define stbIF3(code)
20073 #endif
20074 #if STBIR__vertical_channels >= 5
20075-#define stbIF4( code ) code
20076+#define stbIF4(code) code
20077 #else
20078-#define stbIF4( code )
20079+#define stbIF4(code)
20080 #endif
20081 #if STBIR__vertical_channels >= 6
20082-#define stbIF5( code ) code
20083+#define stbIF5(code) code
20084 #else
20085-#define stbIF5( code )
20086+#define stbIF5(code)
20087 #endif
20088 #if STBIR__vertical_channels >= 7
20089-#define stbIF6( code ) code
20090+#define stbIF6(code) code
20091 #else
20092-#define stbIF6( code )
20093+#define stbIF6(code)
20094 #endif
20095 #if STBIR__vertical_channels >= 8
20096-#define stbIF7( code ) code
20097+#define stbIF7(code) code
20098+#else
20099+#define stbIF7(code)
20100+#endif
20101+
20102+static void
20103+STBIR_chans(stbir__vertical_scatter_with_,
20104+ _coeffs)(float **outputs,
20105+ float const *vertical_coefficients,
20106+ float const *input,
20107+ float const *input_end)
20108+{
20109+ stbIF0(float STBIR_SIMD_STREAMOUT_PTR(*) output0 = outputs[0];
20110+ float c0s = vertical_coefficients[0];)
20111+ stbIF1(float STBIR_SIMD_STREAMOUT_PTR(*) output1 = outputs[1];
20112+ float c1s = vertical_coefficients[1];)
20113+ stbIF2(float STBIR_SIMD_STREAMOUT_PTR(*) output2 = outputs[2];
20114+ float c2s = vertical_coefficients[2];)
20115+ stbIF3(float STBIR_SIMD_STREAMOUT_PTR(*) output3 = outputs[3];
20116+ float c3s = vertical_coefficients[3];)
20117+ stbIF4(float STBIR_SIMD_STREAMOUT_PTR(*) output4 =
20118+ outputs[4];
20119+ float c4s = vertical_coefficients[4];)
20120+ stbIF5(float STBIR_SIMD_STREAMOUT_PTR(*) output5 =
20121+ outputs[5];
20122+ float c5s = vertical_coefficients[5];)
20123+ stbIF6(float STBIR_SIMD_STREAMOUT_PTR(*) output6 =
20124+ outputs[6];
20125+ float c6s = vertical_coefficients[6];)
20126+ stbIF7(float STBIR_SIMD_STREAMOUT_PTR(*)
20127+ output7 = outputs[7];
20128+ float c7s = vertical_coefficients[7];)
20129+
20130+#ifdef STBIR_SIMD
20131+ {
20132+ stbIF0(stbir__simdfX c0 = stbir__simdf_frepX(c0s);)
20133+ stbIF1(stbir__simdfX c1 = stbir__simdf_frepX(c1s);)
20134+ stbIF2(stbir__simdfX c2 = stbir__simdf_frepX(c2s);) stbIF3(
20135+ stbir__simdfX c3 = stbir__simdf_frepX(c3s);)
20136+ stbIF4(stbir__simdfX c4 = stbir__simdf_frepX(c4s);) stbIF5(
20137+ stbir__simdfX c5 = stbir__simdf_frepX(c5s);)
20138+ stbIF6(stbir__simdfX c6 = stbir__simdf_frepX(c6s);)
20139+ stbIF7(stbir__simdfX c7 = stbir__simdf_frepX(c7s);)
20140+ STBIR_SIMD_NO_UNROLL_LOOP_START while (
20141+ ((char *)input_end - (char *)input) >=
20142+ (16 * stbir__simdfX_float_count))
20143+ {
20144+ stbir__simdfX o0, o1, o2, o3, r0, r1, r2, r3;
20145+ STBIR_SIMD_NO_UNROLL(output0);
20146+
20147+ stbir__simdfX_load(r0, input);
20148+ stbir__simdfX_load(r1, input + stbir__simdfX_float_count);
20149+ stbir__simdfX_load(r2, input + (2 * stbir__simdfX_float_count));
20150+ stbir__simdfX_load(r3, input + (3 * stbir__simdfX_float_count));
20151+
20152+#ifdef STB_IMAGE_RESIZE_VERTICAL_CONTINUE
20153+ stbIF0(
20154+ stbir__simdfX_load(o0, output0);
20155+ stbir__simdfX_load(o1, output0 + stbir__simdfX_float_count);
20156+ stbir__simdfX_load(o2,
20157+ output0 + (2 * stbir__simdfX_float_count));
20158+ stbir__simdfX_load(o3,
20159+ output0 + (3 * stbir__simdfX_float_count));
20160+ stbir__simdfX_madd(o0, o0, r0, c0);
20161+ stbir__simdfX_madd(o1, o1, r1, c0);
20162+ stbir__simdfX_madd(o2, o2, r2, c0);
20163+ stbir__simdfX_madd(o3, o3, r3, c0);
20164+ stbir__simdfX_store(output0, o0);
20165+ stbir__simdfX_store(output0 + stbir__simdfX_float_count, o1);
20166+ stbir__simdfX_store(output0 + (2 * stbir__simdfX_float_count),
20167+ o2);
20168+ stbir__simdfX_store(
20169+ output0 + (3 * stbir__simdfX_float_count),
20170+ o3);) stbIF1(stbir__simdfX_load(o0, output1);
20171+ stbir__simdfX_load(
20172+ o1, output1 + stbir__simdfX_float_count);
20173+ stbir__simdfX_load(
20174+ o2,
20175+ output1 + (2 * stbir__simdfX_float_count));
20176+ stbir__simdfX_load(
20177+ o3,
20178+ output1 + (3 * stbir__simdfX_float_count));
20179+ stbir__simdfX_madd(o0, o0, r0, c1);
20180+ stbir__simdfX_madd(o1, o1, r1, c1);
20181+ stbir__simdfX_madd(o2, o2, r2, c1);
20182+ stbir__simdfX_madd(o3, o3, r3, c1);
20183+ stbir__simdfX_store(output1, o0);
20184+ stbir__simdfX_store(
20185+ output1 + stbir__simdfX_float_count, o1);
20186+ stbir__simdfX_store(
20187+ output1 + (2 * stbir__simdfX_float_count),
20188+ o2);
20189+ stbir__simdfX_store(
20190+ output1 + (3 * stbir__simdfX_float_count),
20191+ o3);)
20192+ stbIF2(
20193+ stbir__simdfX_load(o0, output2);
20194+ stbir__simdfX_load(o1, output2 + stbir__simdfX_float_count);
20195+ stbir__simdfX_load(
20196+ o2, output2 + (2 * stbir__simdfX_float_count));
20197+ stbir__simdfX_load(
20198+ o3, output2 + (3 * stbir__simdfX_float_count));
20199+ stbir__simdfX_madd(o0, o0, r0, c2);
20200+ stbir__simdfX_madd(o1, o1, r1, c2);
20201+ stbir__simdfX_madd(o2, o2, r2, c2);
20202+ stbir__simdfX_madd(o3, o3, r3, c2);
20203+ stbir__simdfX_store(output2, o0);
20204+ stbir__simdfX_store(output2 + stbir__simdfX_float_count,
20205+ o1);
20206+ stbir__simdfX_store(
20207+ output2 + (2 * stbir__simdfX_float_count), o2);
20208+ stbir__simdfX_store(
20209+ output2 + (3 * stbir__simdfX_float_count),
20210+ o3);) stbIF3(stbir__simdfX_load(o0, output3);
20211+ stbir__simdfX_load(
20212+ o1,
20213+ output3 + stbir__simdfX_float_count);
20214+ stbir__simdfX_load(
20215+ o2,
20216+ output3 +
20217+ (2 * stbir__simdfX_float_count));
20218+ stbir__simdfX_load(
20219+ o3,
20220+ output3 +
20221+ (3 * stbir__simdfX_float_count));
20222+ stbir__simdfX_madd(o0, o0, r0, c3);
20223+ stbir__simdfX_madd(o1, o1, r1, c3);
20224+ stbir__simdfX_madd(o2, o2, r2, c3);
20225+ stbir__simdfX_madd(o3, o3, r3, c3);
20226+ stbir__simdfX_store(output3, o0);
20227+ stbir__simdfX_store(
20228+ output3 + stbir__simdfX_float_count,
20229+ o1);
20230+ stbir__simdfX_store(
20231+ output3 +
20232+ (2 * stbir__simdfX_float_count),
20233+ o2);
20234+ stbir__simdfX_store(
20235+ output3 +
20236+ (3 * stbir__simdfX_float_count),
20237+ o3);)
20238+ stbIF4(stbir__simdfX_load(o0, output4); stbir__simdfX_load(
20239+ o1, output4 + stbir__simdfX_float_count);
20240+ stbir__simdfX_load(
20241+ o2, output4 + (2 * stbir__simdfX_float_count));
20242+ stbir__simdfX_load(
20243+ o3, output4 + (3 * stbir__simdfX_float_count));
20244+ stbir__simdfX_madd(o0, o0, r0, c4);
20245+ stbir__simdfX_madd(o1, o1, r1, c4);
20246+ stbir__simdfX_madd(o2, o2, r2, c4);
20247+ stbir__simdfX_madd(o3, o3, r3, c4);
20248+ stbir__simdfX_store(output4, o0);
20249+ stbir__simdfX_store(
20250+ output4 + stbir__simdfX_float_count, o1);
20251+ stbir__simdfX_store(
20252+ output4 + (2 * stbir__simdfX_float_count), o2);
20253+ stbir__simdfX_store(
20254+ output4 + (3 * stbir__simdfX_float_count), o3);)
20255+ stbIF5(
20256+ stbir__simdfX_load(o0, output5); stbir__simdfX_load(
20257+ o1, output5 + stbir__simdfX_float_count);
20258+ stbir__simdfX_load(
20259+ o2, output5 + (2 * stbir__simdfX_float_count));
20260+ stbir__simdfX_load(
20261+ o3, output5 + (3 * stbir__simdfX_float_count));
20262+ stbir__simdfX_madd(o0, o0, r0, c5);
20263+ stbir__simdfX_madd(o1, o1, r1, c5);
20264+ stbir__simdfX_madd(o2, o2, r2, c5);
20265+ stbir__simdfX_madd(o3, o3, r3, c5);
20266+ stbir__simdfX_store(output5, o0);
20267+ stbir__simdfX_store(
20268+ output5 + stbir__simdfX_float_count, o1);
20269+ stbir__simdfX_store(
20270+ output5 + (2 * stbir__simdfX_float_count), o2);
20271+ stbir__simdfX_store(
20272+ output5 + (3 * stbir__simdfX_float_count), o3);)
20273+ stbIF6(
20274+ stbir__simdfX_load(o0, output6);
20275+ stbir__simdfX_load(
20276+ o1, output6 + stbir__simdfX_float_count);
20277+ stbir__simdfX_load(
20278+ o2,
20279+ output6 + (2 * stbir__simdfX_float_count));
20280+ stbir__simdfX_load(
20281+ o3,
20282+ output6 + (3 * stbir__simdfX_float_count));
20283+ stbir__simdfX_madd(o0, o0, r0, c6);
20284+ stbir__simdfX_madd(o1, o1, r1, c6);
20285+ stbir__simdfX_madd(o2, o2, r2, c6);
20286+ stbir__simdfX_madd(o3, o3, r3, c6);
20287+ stbir__simdfX_store(output6, o0);
20288+ stbir__simdfX_store(
20289+ output6 + stbir__simdfX_float_count, o1);
20290+ stbir__simdfX_store(
20291+ output6 + (2 * stbir__simdfX_float_count),
20292+ o2);
20293+ stbir__simdfX_store(
20294+ output6 + (3 * stbir__simdfX_float_count),
20295+ o3);)
20296+ stbIF7(stbir__simdfX_load(o0, output7);
20297+ stbir__simdfX_load(
20298+ o1,
20299+ output7 + stbir__simdfX_float_count);
20300+ stbir__simdfX_load(
20301+ o2,
20302+ output7 +
20303+ (2 * stbir__simdfX_float_count));
20304+ stbir__simdfX_load(
20305+ o3,
20306+ output7 +
20307+ (3 * stbir__simdfX_float_count));
20308+ stbir__simdfX_madd(o0, o0, r0, c7);
20309+ stbir__simdfX_madd(o1, o1, r1, c7);
20310+ stbir__simdfX_madd(o2, o2, r2, c7);
20311+ stbir__simdfX_madd(o3, o3, r3, c7);
20312+ stbir__simdfX_store(output7, o0);
20313+ stbir__simdfX_store(
20314+ output7 + stbir__simdfX_float_count,
20315+ o1);
20316+ stbir__simdfX_store(
20317+ output7 +
20318+ (2 * stbir__simdfX_float_count),
20319+ o2);
20320+ stbir__simdfX_store(
20321+ output7 +
20322+ (3 * stbir__simdfX_float_count),
20323+ o3);)
20324+#else
20325+ stbIF0(
20326+ stbir__simdfX_mult(o0, r0, c0); stbir__simdfX_mult(o1, r1, c0);
20327+ stbir__simdfX_mult(o2, r2, c0);
20328+ stbir__simdfX_mult(o3, r3, c0);
20329+ stbir__simdfX_store(output0, o0);
20330+ stbir__simdfX_store(output0 + stbir__simdfX_float_count, o1);
20331+ stbir__simdfX_store(output0 + (2 * stbir__simdfX_float_count),
20332+ o2);
20333+ stbir__simdfX_store(
20334+ output0 + (3 * stbir__simdfX_float_count),
20335+ o3);) stbIF1(stbir__simdfX_mult(o0, r0, c1);
20336+ stbir__simdfX_mult(o1, r1, c1);
20337+ stbir__simdfX_mult(o2, r2, c1);
20338+ stbir__simdfX_mult(o3, r3, c1);
20339+ stbir__simdfX_store(output1, o0);
20340+ stbir__simdfX_store(
20341+ output1 + stbir__simdfX_float_count, o1);
20342+ stbir__simdfX_store(
20343+ output1 + (2 * stbir__simdfX_float_count),
20344+ o2);
20345+ stbir__simdfX_store(
20346+ output1 + (3 * stbir__simdfX_float_count),
20347+ o3);)
20348+ stbIF2(stbir__simdfX_mult(o0, r0, c2);
20349+ stbir__simdfX_mult(o1, r1, c2);
20350+ stbir__simdfX_mult(o2, r2, c2);
20351+ stbir__simdfX_mult(o3, r3, c2);
20352+ stbir__simdfX_store(output2, o0);
20353+ stbir__simdfX_store(output2 + stbir__simdfX_float_count,
20354+ o1);
20355+ stbir__simdfX_store(
20356+ output2 + (2 * stbir__simdfX_float_count), o2);
20357+ stbir__simdfX_store(
20358+ output2 + (3 * stbir__simdfX_float_count),
20359+ o3);) stbIF3(stbir__simdfX_mult(o0, r0, c3);
20360+ stbir__simdfX_mult(o1, r1, c3);
20361+ stbir__simdfX_mult(o2, r2, c3);
20362+ stbir__simdfX_mult(o3, r3, c3);
20363+ stbir__simdfX_store(output3, o0);
20364+ stbir__simdfX_store(
20365+ output3 + stbir__simdfX_float_count,
20366+ o1);
20367+ stbir__simdfX_store(
20368+ output3 +
20369+ (2 * stbir__simdfX_float_count),
20370+ o2);
20371+ stbir__simdfX_store(
20372+ output3 +
20373+ (3 * stbir__simdfX_float_count),
20374+ o3);)
20375+ stbIF4(stbir__simdfX_mult(o0, r0, c4);
20376+ stbir__simdfX_mult(o1, r1, c4);
20377+ stbir__simdfX_mult(o2, r2, c4);
20378+ stbir__simdfX_mult(o3, r3, c4);
20379+ stbir__simdfX_store(output4, o0);
20380+ stbir__simdfX_store(
20381+ output4 + stbir__simdfX_float_count, o1);
20382+ stbir__simdfX_store(
20383+ output4 + (2 * stbir__simdfX_float_count), o2);
20384+ stbir__simdfX_store(
20385+ output4 + (3 * stbir__simdfX_float_count), o3);)
20386+ stbIF5(
20387+ stbir__simdfX_mult(o0, r0, c5);
20388+ stbir__simdfX_mult(o1, r1, c5);
20389+ stbir__simdfX_mult(o2, r2, c5);
20390+ stbir__simdfX_mult(o3, r3, c5);
20391+ stbir__simdfX_store(output5, o0);
20392+ stbir__simdfX_store(
20393+ output5 + stbir__simdfX_float_count, o1);
20394+ stbir__simdfX_store(
20395+ output5 + (2 * stbir__simdfX_float_count), o2);
20396+ stbir__simdfX_store(
20397+ output5 + (3 * stbir__simdfX_float_count), o3);)
20398+ stbIF6(
20399+ stbir__simdfX_mult(o0, r0, c6);
20400+ stbir__simdfX_mult(o1, r1, c6);
20401+ stbir__simdfX_mult(o2, r2, c6);
20402+ stbir__simdfX_mult(o3, r3, c6);
20403+ stbir__simdfX_store(output6, o0);
20404+ stbir__simdfX_store(
20405+ output6 + stbir__simdfX_float_count, o1);
20406+ stbir__simdfX_store(
20407+ output6 + (2 * stbir__simdfX_float_count),
20408+ o2);
20409+ stbir__simdfX_store(
20410+ output6 + (3 * stbir__simdfX_float_count),
20411+ o3);)
20412+ stbIF7(stbir__simdfX_mult(o0, r0, c7);
20413+ stbir__simdfX_mult(o1, r1, c7);
20414+ stbir__simdfX_mult(o2, r2, c7);
20415+ stbir__simdfX_mult(o3, r3, c7);
20416+ stbir__simdfX_store(output7, o0);
20417+ stbir__simdfX_store(
20418+ output7 + stbir__simdfX_float_count,
20419+ o1);
20420+ stbir__simdfX_store(
20421+ output7 +
20422+ (2 * stbir__simdfX_float_count),
20423+ o2);
20424+ stbir__simdfX_store(
20425+ output7 +
20426+ (3 * stbir__simdfX_float_count),
20427+ o3);)
20428+#endif
20429+
20430+ input += (4 * stbir__simdfX_float_count);
20431+ stbIF0(output0 += (4 * stbir__simdfX_float_count);) stbIF1(
20432+ output1 += (4 * stbir__simdfX_float_count);)
20433+ stbIF2(output2 += (4 * stbir__simdfX_float_count);) stbIF3(
20434+ output3 += (4 * stbir__simdfX_float_count);)
20435+ stbIF4(output4 += (4 * stbir__simdfX_float_count);) stbIF5(
20436+ output5 += (4 * stbir__simdfX_float_count);)
20437+ stbIF6(output6 += (4 * stbir__simdfX_float_count);)
20438+ stbIF7(output7 += (4 * stbir__simdfX_float_count);)
20439+ }
20440+ STBIR_SIMD_NO_UNROLL_LOOP_START
20441+ while (((char *)input_end - (char *)input) >= 16) {
20442+ stbir__simdf o0, r0;
20443+ STBIR_SIMD_NO_UNROLL(output0);
20444+
20445+ stbir__simdf_load(r0, input);
20446+
20447+#ifdef STB_IMAGE_RESIZE_VERTICAL_CONTINUE
20448+ stbIF0(stbir__simdf_load(o0, output0); stbir__simdf_madd(
20449+ o0, o0, r0, stbir__if_simdf8_cast_to_simdf4(c0));
20450+ stbir__simdf_store(
20451+ output0,
20452+ o0);) stbIF1(stbir__simdf_load(o0, output1);
20453+ stbir__simdf_madd(
20454+ o0,
20455+ o0,
20456+ r0,
20457+ stbir__if_simdf8_cast_to_simdf4(c1));
20458+ stbir__simdf_store(output1, o0);)
20459+ stbIF2(stbir__simdf_load(o0, output2); stbir__simdf_madd(
20460+ o0, o0, r0, stbir__if_simdf8_cast_to_simdf4(c2));
20461+ stbir__simdf_store(output2, o0);)
20462+ stbIF3(stbir__simdf_load(o0, output3); stbir__simdf_madd(
20463+ o0, o0, r0, stbir__if_simdf8_cast_to_simdf4(c3));
20464+ stbir__simdf_store(output3, o0);)
20465+ stbIF4(stbir__simdf_load(o0, output4);
20466+ stbir__simdf_madd(
20467+ o0,
20468+ o0,
20469+ r0,
20470+ stbir__if_simdf8_cast_to_simdf4(c4));
20471+ stbir__simdf_store(output4, o0);)
20472+ stbIF5(stbir__simdf_load(o0, output5);
20473+ stbir__simdf_madd(
20474+ o0,
20475+ o0,
20476+ r0,
20477+ stbir__if_simdf8_cast_to_simdf4(c5));
20478+ stbir__simdf_store(output5, o0);)
20479+ stbIF6(stbir__simdf_load(o0, output6);
20480+ stbir__simdf_madd(
20481+ o0,
20482+ o0,
20483+ r0,
20484+ stbir__if_simdf8_cast_to_simdf4(c6));
20485+ stbir__simdf_store(output6, o0);)
20486+ stbIF7(stbir__simdf_load(o0, output7);
20487+ stbir__simdf_madd(
20488+ o0,
20489+ o0,
20490+ r0,
20491+ stbir__if_simdf8_cast_to_simdf4(
20492+ c7));
20493+ stbir__simdf_store(output7, o0);)
20494+#else
20495+ stbIF0(
20496+ stbir__simdf_mult(o0, r0, stbir__if_simdf8_cast_to_simdf4(c0));
20497+ stbir__simdf_store(output0, o0);)
20498+ stbIF1(stbir__simdf_mult(
20499+ o0, r0, stbir__if_simdf8_cast_to_simdf4(c1));
20500+ stbir__simdf_store(output1, o0);)
20501+ stbIF2(stbir__simdf_mult(
20502+ o0, r0, stbir__if_simdf8_cast_to_simdf4(c2));
20503+ stbir__simdf_store(output2, o0);)
20504+ stbIF3(stbir__simdf_mult(
20505+ o0, r0, stbir__if_simdf8_cast_to_simdf4(c3));
20506+ stbir__simdf_store(output3, o0);)
20507+ stbIF4(stbir__simdf_mult(
20508+ o0,
20509+ r0,
20510+ stbir__if_simdf8_cast_to_simdf4(c4));
20511+ stbir__simdf_store(output4, o0);)
20512+ stbIF5(stbir__simdf_mult(
20513+ o0,
20514+ r0,
20515+ stbir__if_simdf8_cast_to_simdf4(c5));
20516+ stbir__simdf_store(output5, o0);)
20517+ stbIF6(stbir__simdf_mult(
20518+ o0,
20519+ r0,
20520+ stbir__if_simdf8_cast_to_simdf4(
20521+ c6));
20522+ stbir__simdf_store(output6, o0);)
20523+ stbIF7(
20524+ stbir__simdf_mult(
20525+ o0,
20526+ r0,
20527+ stbir__if_simdf8_cast_to_simdf4(
20528+ c7));
20529+ stbir__simdf_store(output7, o0);)
20530+#endif
20531+
20532+ input += 4;
20533+ stbIF0(output0 += 4;) stbIF1(output1 += 4;) stbIF2(output2 += 4;)
20534+ stbIF3(output3 += 4;) stbIF4(output4 += 4;)
20535+ stbIF5(output5 += 4;) stbIF6(output6 += 4;)
20536+ stbIF7(output7 += 4;)
20537+ }
20538+ }
20539+#else
20540+ STBIR_NO_UNROLL_LOOP_START while (
20541+ ((char *)input_end - (char *)input) >=
20542+ 16)
20543+ {
20544+ float r0, r1, r2, r3;
20545+ STBIR_NO_UNROLL(input);
20546+
20547+ r0 = input[0], r1 = input[1], r2 = input[2], r3 = input[3];
20548+
20549+#ifdef STB_IMAGE_RESIZE_VERTICAL_CONTINUE
20550+ stbIF0(output0[0] += (r0 * c0s); output0[1] += (r1 * c0s);
20551+ output0[2] += (r2 * c0s); output0[3] += (r3 * c0s);)
20552+ stbIF1(output1[0] += (r0 * c1s); output1[1] += (r1 * c1s);
20553+ output1[2] += (r2 * c1s); output1[3] += (r3 * c1s);)
20554+ stbIF2(output2[0] += (r0 * c2s); output2[1] += (r1 * c2s);
20555+ output2[2] += (r2 * c2s); output2[3] += (r3 * c2s);)
20556+ stbIF3(output3[0] += (r0 * c3s); output3[1] += (r1 * c3s);
20557+ output3[2] += (r2 * c3s); output3[3] += (r3 * c3s);)
20558+ stbIF4(
20559+ output4[0] += (r0 * c4s); output4[1] += (r1 * c4s);
20560+ output4[2] += (r2 * c4s); output4[3] += (r3 * c4s);)
20561+ stbIF5(output5[0] += (r0 * c5s);
20562+ output5[1] += (r1 * c5s);
20563+ output5[2] += (r2 * c5s);
20564+ output5[3] += (r3 * c5s);)
20565+ stbIF6(output6[0] += (r0 * c6s);
20566+ output6[1] += (r1 * c6s);
20567+ output6[2] += (r2 * c6s);
20568+ output6[3] += (r3 * c6s);)
20569+ stbIF7(output7[0] += (r0 * c7s);
20570+ output7[1] += (r1 * c7s);
20571+ output7[2] += (r2 * c7s);
20572+ output7[3] += (r3 * c7s);)
20573+#else
20574+ stbIF0(output0[0] = (r0 * c0s); output0[1] = (r1 * c0s);
20575+ output0[2] = (r2 * c0s); output0[3] = (r3 * c0s);)
20576+ stbIF1(output1[0] = (r0 * c1s); output1[1] = (r1 * c1s);
20577+ output1[2] = (r2 * c1s); output1[3] = (r3 * c1s);)
20578+ stbIF2(output2[0] = (r0 * c2s); output2[1] = (r1 * c2s);
20579+ output2[2] = (r2 * c2s); output2[3] = (r3 * c2s);)
20580+ stbIF3(output3[0] = (r0 * c3s); output3[1] = (r1 * c3s);
20581+ output3[2] = (r2 * c3s); output3[3] = (r3 * c3s);)
20582+ stbIF4(output4[0] = (r0 * c4s); output4[1] = (r1 * c4s);
20583+ output4[2] = (r2 * c4s);
20584+ output4[3] = (r3 * c4s);)
20585+ stbIF5(output5[0] = (r0 * c5s);
20586+ output5[1] = (r1 * c5s);
20587+ output5[2] = (r2 * c5s);
20588+ output5[3] = (r3 * c5s);)
20589+ stbIF6(output6[0] = (r0 * c6s);
20590+ output6[1] = (r1 * c6s);
20591+ output6[2] = (r2 * c6s);
20592+ output6[3] = (r3 * c6s);)
20593+ stbIF7(output7[0] = (r0 * c7s);
20594+ output7[1] = (r1 * c7s);
20595+ output7[2] = (r2 * c7s);
20596+ output7[3] = (r3 * c7s);)
20597+#endif
20598+
20599+ input += 4;
20600+ stbIF0(output0 += 4;) stbIF1(output1 += 4;) stbIF2(output2 += 4;)
20601+ stbIF3(output3 += 4;) stbIF4(output4 += 4;) stbIF5(output5 += 4;)
20602+ stbIF6(output6 += 4;) stbIF7(output7 += 4;)
20603+ }
20604+#endif
20605+ STBIR_NO_UNROLL_LOOP_START
20606+ while (input < input_end) {
20607+ float r = input[0];
20608+ STBIR_NO_UNROLL(output0);
20609+
20610+#ifdef STB_IMAGE_RESIZE_VERTICAL_CONTINUE
20611+ stbIF0(output0[0] += (r * c0s);) stbIF1(output1[0] += (r * c1s);)
20612+ stbIF2(output2[0] += (r * c2s);) stbIF3(output3[0] += (r * c3s);)
20613+ stbIF4(output4[0] += (r * c4s);)
20614+ stbIF5(output5[0] += (r * c5s);)
20615+ stbIF6(output6[0] += (r * c6s);)
20616+ stbIF7(output7[0] += (r * c7s);)
20617 #else
20618-#define stbIF7( code )
20619-#endif
20620-
20621-static void STBIR_chans( stbir__vertical_scatter_with_,_coeffs)( float ** outputs, float const * vertical_coefficients, float const * input, float const * input_end )
20622-{
20623- stbIF0( float STBIR_SIMD_STREAMOUT_PTR( * ) output0 = outputs[0]; float c0s = vertical_coefficients[0]; )
20624- stbIF1( float STBIR_SIMD_STREAMOUT_PTR( * ) output1 = outputs[1]; float c1s = vertical_coefficients[1]; )
20625- stbIF2( float STBIR_SIMD_STREAMOUT_PTR( * ) output2 = outputs[2]; float c2s = vertical_coefficients[2]; )
20626- stbIF3( float STBIR_SIMD_STREAMOUT_PTR( * ) output3 = outputs[3]; float c3s = vertical_coefficients[3]; )
20627- stbIF4( float STBIR_SIMD_STREAMOUT_PTR( * ) output4 = outputs[4]; float c4s = vertical_coefficients[4]; )
20628- stbIF5( float STBIR_SIMD_STREAMOUT_PTR( * ) output5 = outputs[5]; float c5s = vertical_coefficients[5]; )
20629- stbIF6( float STBIR_SIMD_STREAMOUT_PTR( * ) output6 = outputs[6]; float c6s = vertical_coefficients[6]; )
20630- stbIF7( float STBIR_SIMD_STREAMOUT_PTR( * ) output7 = outputs[7]; float c7s = vertical_coefficients[7]; )
20631-
20632- #ifdef STBIR_SIMD
20633- {
20634- stbIF0(stbir__simdfX c0 = stbir__simdf_frepX( c0s ); )
20635- stbIF1(stbir__simdfX c1 = stbir__simdf_frepX( c1s ); )
20636- stbIF2(stbir__simdfX c2 = stbir__simdf_frepX( c2s ); )
20637- stbIF3(stbir__simdfX c3 = stbir__simdf_frepX( c3s ); )
20638- stbIF4(stbir__simdfX c4 = stbir__simdf_frepX( c4s ); )
20639- stbIF5(stbir__simdfX c5 = stbir__simdf_frepX( c5s ); )
20640- stbIF6(stbir__simdfX c6 = stbir__simdf_frepX( c6s ); )
20641- stbIF7(stbir__simdfX c7 = stbir__simdf_frepX( c7s ); )
20642- STBIR_SIMD_NO_UNROLL_LOOP_START
20643- while ( ( (char*)input_end - (char*) input ) >= (16*stbir__simdfX_float_count) )
20644- {
20645- stbir__simdfX o0, o1, o2, o3, r0, r1, r2, r3;
20646- STBIR_SIMD_NO_UNROLL(output0);
20647-
20648- stbir__simdfX_load( r0, input ); stbir__simdfX_load( r1, input+stbir__simdfX_float_count ); stbir__simdfX_load( r2, input+(2*stbir__simdfX_float_count) ); stbir__simdfX_load( r3, input+(3*stbir__simdfX_float_count) );
20649-
20650- #ifdef STB_IMAGE_RESIZE_VERTICAL_CONTINUE
20651- stbIF0( stbir__simdfX_load( o0, output0 ); stbir__simdfX_load( o1, output0+stbir__simdfX_float_count ); stbir__simdfX_load( o2, output0+(2*stbir__simdfX_float_count) ); stbir__simdfX_load( o3, output0+(3*stbir__simdfX_float_count) );
20652- stbir__simdfX_madd( o0, o0, r0, c0 ); stbir__simdfX_madd( o1, o1, r1, c0 ); stbir__simdfX_madd( o2, o2, r2, c0 ); stbir__simdfX_madd( o3, o3, r3, c0 );
20653- stbir__simdfX_store( output0, o0 ); stbir__simdfX_store( output0+stbir__simdfX_float_count, o1 ); stbir__simdfX_store( output0+(2*stbir__simdfX_float_count), o2 ); stbir__simdfX_store( output0+(3*stbir__simdfX_float_count), o3 ); )
20654- stbIF1( stbir__simdfX_load( o0, output1 ); stbir__simdfX_load( o1, output1+stbir__simdfX_float_count ); stbir__simdfX_load( o2, output1+(2*stbir__simdfX_float_count) ); stbir__simdfX_load( o3, output1+(3*stbir__simdfX_float_count) );
20655- stbir__simdfX_madd( o0, o0, r0, c1 ); stbir__simdfX_madd( o1, o1, r1, c1 ); stbir__simdfX_madd( o2, o2, r2, c1 ); stbir__simdfX_madd( o3, o3, r3, c1 );
20656- stbir__simdfX_store( output1, o0 ); stbir__simdfX_store( output1+stbir__simdfX_float_count, o1 ); stbir__simdfX_store( output1+(2*stbir__simdfX_float_count), o2 ); stbir__simdfX_store( output1+(3*stbir__simdfX_float_count), o3 ); )
20657- stbIF2( stbir__simdfX_load( o0, output2 ); stbir__simdfX_load( o1, output2+stbir__simdfX_float_count ); stbir__simdfX_load( o2, output2+(2*stbir__simdfX_float_count) ); stbir__simdfX_load( o3, output2+(3*stbir__simdfX_float_count) );
20658- stbir__simdfX_madd( o0, o0, r0, c2 ); stbir__simdfX_madd( o1, o1, r1, c2 ); stbir__simdfX_madd( o2, o2, r2, c2 ); stbir__simdfX_madd( o3, o3, r3, c2 );
20659- stbir__simdfX_store( output2, o0 ); stbir__simdfX_store( output2+stbir__simdfX_float_count, o1 ); stbir__simdfX_store( output2+(2*stbir__simdfX_float_count), o2 ); stbir__simdfX_store( output2+(3*stbir__simdfX_float_count), o3 ); )
20660- stbIF3( stbir__simdfX_load( o0, output3 ); stbir__simdfX_load( o1, output3+stbir__simdfX_float_count ); stbir__simdfX_load( o2, output3+(2*stbir__simdfX_float_count) ); stbir__simdfX_load( o3, output3+(3*stbir__simdfX_float_count) );
20661- stbir__simdfX_madd( o0, o0, r0, c3 ); stbir__simdfX_madd( o1, o1, r1, c3 ); stbir__simdfX_madd( o2, o2, r2, c3 ); stbir__simdfX_madd( o3, o3, r3, c3 );
20662- stbir__simdfX_store( output3, o0 ); stbir__simdfX_store( output3+stbir__simdfX_float_count, o1 ); stbir__simdfX_store( output3+(2*stbir__simdfX_float_count), o2 ); stbir__simdfX_store( output3+(3*stbir__simdfX_float_count), o3 ); )
20663- stbIF4( stbir__simdfX_load( o0, output4 ); stbir__simdfX_load( o1, output4+stbir__simdfX_float_count ); stbir__simdfX_load( o2, output4+(2*stbir__simdfX_float_count) ); stbir__simdfX_load( o3, output4+(3*stbir__simdfX_float_count) );
20664- stbir__simdfX_madd( o0, o0, r0, c4 ); stbir__simdfX_madd( o1, o1, r1, c4 ); stbir__simdfX_madd( o2, o2, r2, c4 ); stbir__simdfX_madd( o3, o3, r3, c4 );
20665- stbir__simdfX_store( output4, o0 ); stbir__simdfX_store( output4+stbir__simdfX_float_count, o1 ); stbir__simdfX_store( output4+(2*stbir__simdfX_float_count), o2 ); stbir__simdfX_store( output4+(3*stbir__simdfX_float_count), o3 ); )
20666- stbIF5( stbir__simdfX_load( o0, output5 ); stbir__simdfX_load( o1, output5+stbir__simdfX_float_count ); stbir__simdfX_load( o2, output5+(2*stbir__simdfX_float_count)); stbir__simdfX_load( o3, output5+(3*stbir__simdfX_float_count) );
20667- stbir__simdfX_madd( o0, o0, r0, c5 ); stbir__simdfX_madd( o1, o1, r1, c5 ); stbir__simdfX_madd( o2, o2, r2, c5 ); stbir__simdfX_madd( o3, o3, r3, c5 );
20668- stbir__simdfX_store( output5, o0 ); stbir__simdfX_store( output5+stbir__simdfX_float_count, o1 ); stbir__simdfX_store( output5+(2*stbir__simdfX_float_count), o2 ); stbir__simdfX_store( output5+(3*stbir__simdfX_float_count), o3 ); )
20669- stbIF6( stbir__simdfX_load( o0, output6 ); stbir__simdfX_load( o1, output6+stbir__simdfX_float_count ); stbir__simdfX_load( o2, output6+(2*stbir__simdfX_float_count) ); stbir__simdfX_load( o3, output6+(3*stbir__simdfX_float_count) );
20670- stbir__simdfX_madd( o0, o0, r0, c6 ); stbir__simdfX_madd( o1, o1, r1, c6 ); stbir__simdfX_madd( o2, o2, r2, c6 ); stbir__simdfX_madd( o3, o3, r3, c6 );
20671- stbir__simdfX_store( output6, o0 ); stbir__simdfX_store( output6+stbir__simdfX_float_count, o1 ); stbir__simdfX_store( output6+(2*stbir__simdfX_float_count), o2 ); stbir__simdfX_store( output6+(3*stbir__simdfX_float_count), o3 ); )
20672- stbIF7( stbir__simdfX_load( o0, output7 ); stbir__simdfX_load( o1, output7+stbir__simdfX_float_count ); stbir__simdfX_load( o2, output7+(2*stbir__simdfX_float_count) ); stbir__simdfX_load( o3, output7+(3*stbir__simdfX_float_count) );
20673- stbir__simdfX_madd( o0, o0, r0, c7 ); stbir__simdfX_madd( o1, o1, r1, c7 ); stbir__simdfX_madd( o2, o2, r2, c7 ); stbir__simdfX_madd( o3, o3, r3, c7 );
20674- stbir__simdfX_store( output7, o0 ); stbir__simdfX_store( output7+stbir__simdfX_float_count, o1 ); stbir__simdfX_store( output7+(2*stbir__simdfX_float_count), o2 ); stbir__simdfX_store( output7+(3*stbir__simdfX_float_count), o3 ); )
20675- #else
20676- stbIF0( stbir__simdfX_mult( o0, r0, c0 ); stbir__simdfX_mult( o1, r1, c0 ); stbir__simdfX_mult( o2, r2, c0 ); stbir__simdfX_mult( o3, r3, c0 );
20677- stbir__simdfX_store( output0, o0 ); stbir__simdfX_store( output0+stbir__simdfX_float_count, o1 ); stbir__simdfX_store( output0+(2*stbir__simdfX_float_count), o2 ); stbir__simdfX_store( output0+(3*stbir__simdfX_float_count), o3 ); )
20678- stbIF1( stbir__simdfX_mult( o0, r0, c1 ); stbir__simdfX_mult( o1, r1, c1 ); stbir__simdfX_mult( o2, r2, c1 ); stbir__simdfX_mult( o3, r3, c1 );
20679- stbir__simdfX_store( output1, o0 ); stbir__simdfX_store( output1+stbir__simdfX_float_count, o1 ); stbir__simdfX_store( output1+(2*stbir__simdfX_float_count), o2 ); stbir__simdfX_store( output1+(3*stbir__simdfX_float_count), o3 ); )
20680- stbIF2( stbir__simdfX_mult( o0, r0, c2 ); stbir__simdfX_mult( o1, r1, c2 ); stbir__simdfX_mult( o2, r2, c2 ); stbir__simdfX_mult( o3, r3, c2 );
20681- stbir__simdfX_store( output2, o0 ); stbir__simdfX_store( output2+stbir__simdfX_float_count, o1 ); stbir__simdfX_store( output2+(2*stbir__simdfX_float_count), o2 ); stbir__simdfX_store( output2+(3*stbir__simdfX_float_count), o3 ); )
20682- stbIF3( stbir__simdfX_mult( o0, r0, c3 ); stbir__simdfX_mult( o1, r1, c3 ); stbir__simdfX_mult( o2, r2, c3 ); stbir__simdfX_mult( o3, r3, c3 );
20683- stbir__simdfX_store( output3, o0 ); stbir__simdfX_store( output3+stbir__simdfX_float_count, o1 ); stbir__simdfX_store( output3+(2*stbir__simdfX_float_count), o2 ); stbir__simdfX_store( output3+(3*stbir__simdfX_float_count), o3 ); )
20684- stbIF4( stbir__simdfX_mult( o0, r0, c4 ); stbir__simdfX_mult( o1, r1, c4 ); stbir__simdfX_mult( o2, r2, c4 ); stbir__simdfX_mult( o3, r3, c4 );
20685- stbir__simdfX_store( output4, o0 ); stbir__simdfX_store( output4+stbir__simdfX_float_count, o1 ); stbir__simdfX_store( output4+(2*stbir__simdfX_float_count), o2 ); stbir__simdfX_store( output4+(3*stbir__simdfX_float_count), o3 ); )
20686- stbIF5( stbir__simdfX_mult( o0, r0, c5 ); stbir__simdfX_mult( o1, r1, c5 ); stbir__simdfX_mult( o2, r2, c5 ); stbir__simdfX_mult( o3, r3, c5 );
20687- stbir__simdfX_store( output5, o0 ); stbir__simdfX_store( output5+stbir__simdfX_float_count, o1 ); stbir__simdfX_store( output5+(2*stbir__simdfX_float_count), o2 ); stbir__simdfX_store( output5+(3*stbir__simdfX_float_count), o3 ); )
20688- stbIF6( stbir__simdfX_mult( o0, r0, c6 ); stbir__simdfX_mult( o1, r1, c6 ); stbir__simdfX_mult( o2, r2, c6 ); stbir__simdfX_mult( o3, r3, c6 );
20689- stbir__simdfX_store( output6, o0 ); stbir__simdfX_store( output6+stbir__simdfX_float_count, o1 ); stbir__simdfX_store( output6+(2*stbir__simdfX_float_count), o2 ); stbir__simdfX_store( output6+(3*stbir__simdfX_float_count), o3 ); )
20690- stbIF7( stbir__simdfX_mult( o0, r0, c7 ); stbir__simdfX_mult( o1, r1, c7 ); stbir__simdfX_mult( o2, r2, c7 ); stbir__simdfX_mult( o3, r3, c7 );
20691- stbir__simdfX_store( output7, o0 ); stbir__simdfX_store( output7+stbir__simdfX_float_count, o1 ); stbir__simdfX_store( output7+(2*stbir__simdfX_float_count), o2 ); stbir__simdfX_store( output7+(3*stbir__simdfX_float_count), o3 ); )
20692- #endif
20693-
20694- input += (4*stbir__simdfX_float_count);
20695- stbIF0( output0 += (4*stbir__simdfX_float_count); ) stbIF1( output1 += (4*stbir__simdfX_float_count); ) stbIF2( output2 += (4*stbir__simdfX_float_count); ) stbIF3( output3 += (4*stbir__simdfX_float_count); ) stbIF4( output4 += (4*stbir__simdfX_float_count); ) stbIF5( output5 += (4*stbir__simdfX_float_count); ) stbIF6( output6 += (4*stbir__simdfX_float_count); ) stbIF7( output7 += (4*stbir__simdfX_float_count); )
20696- }
20697- STBIR_SIMD_NO_UNROLL_LOOP_START
20698- while ( ( (char*)input_end - (char*) input ) >= 16 )
20699- {
20700- stbir__simdf o0, r0;
20701- STBIR_SIMD_NO_UNROLL(output0);
20702-
20703- stbir__simdf_load( r0, input );
20704-
20705- #ifdef STB_IMAGE_RESIZE_VERTICAL_CONTINUE
20706- stbIF0( stbir__simdf_load( o0, output0 ); stbir__simdf_madd( o0, o0, r0, stbir__if_simdf8_cast_to_simdf4( c0 ) ); stbir__simdf_store( output0, o0 ); )
20707- stbIF1( stbir__simdf_load( o0, output1 ); stbir__simdf_madd( o0, o0, r0, stbir__if_simdf8_cast_to_simdf4( c1 ) ); stbir__simdf_store( output1, o0 ); )
20708- stbIF2( stbir__simdf_load( o0, output2 ); stbir__simdf_madd( o0, o0, r0, stbir__if_simdf8_cast_to_simdf4( c2 ) ); stbir__simdf_store( output2, o0 ); )
20709- stbIF3( stbir__simdf_load( o0, output3 ); stbir__simdf_madd( o0, o0, r0, stbir__if_simdf8_cast_to_simdf4( c3 ) ); stbir__simdf_store( output3, o0 ); )
20710- stbIF4( stbir__simdf_load( o0, output4 ); stbir__simdf_madd( o0, o0, r0, stbir__if_simdf8_cast_to_simdf4( c4 ) ); stbir__simdf_store( output4, o0 ); )
20711- stbIF5( stbir__simdf_load( o0, output5 ); stbir__simdf_madd( o0, o0, r0, stbir__if_simdf8_cast_to_simdf4( c5 ) ); stbir__simdf_store( output5, o0 ); )
20712- stbIF6( stbir__simdf_load( o0, output6 ); stbir__simdf_madd( o0, o0, r0, stbir__if_simdf8_cast_to_simdf4( c6 ) ); stbir__simdf_store( output6, o0 ); )
20713- stbIF7( stbir__simdf_load( o0, output7 ); stbir__simdf_madd( o0, o0, r0, stbir__if_simdf8_cast_to_simdf4( c7 ) ); stbir__simdf_store( output7, o0 ); )
20714- #else
20715- stbIF0( stbir__simdf_mult( o0, r0, stbir__if_simdf8_cast_to_simdf4( c0 ) ); stbir__simdf_store( output0, o0 ); )
20716- stbIF1( stbir__simdf_mult( o0, r0, stbir__if_simdf8_cast_to_simdf4( c1 ) ); stbir__simdf_store( output1, o0 ); )
20717- stbIF2( stbir__simdf_mult( o0, r0, stbir__if_simdf8_cast_to_simdf4( c2 ) ); stbir__simdf_store( output2, o0 ); )
20718- stbIF3( stbir__simdf_mult( o0, r0, stbir__if_simdf8_cast_to_simdf4( c3 ) ); stbir__simdf_store( output3, o0 ); )
20719- stbIF4( stbir__simdf_mult( o0, r0, stbir__if_simdf8_cast_to_simdf4( c4 ) ); stbir__simdf_store( output4, o0 ); )
20720- stbIF5( stbir__simdf_mult( o0, r0, stbir__if_simdf8_cast_to_simdf4( c5 ) ); stbir__simdf_store( output5, o0 ); )
20721- stbIF6( stbir__simdf_mult( o0, r0, stbir__if_simdf8_cast_to_simdf4( c6 ) ); stbir__simdf_store( output6, o0 ); )
20722- stbIF7( stbir__simdf_mult( o0, r0, stbir__if_simdf8_cast_to_simdf4( c7 ) ); stbir__simdf_store( output7, o0 ); )
20723- #endif
20724-
20725- input += 4;
20726- stbIF0( output0 += 4; ) stbIF1( output1 += 4; ) stbIF2( output2 += 4; ) stbIF3( output3 += 4; ) stbIF4( output4 += 4; ) stbIF5( output5 += 4; ) stbIF6( output6 += 4; ) stbIF7( output7 += 4; )
20727- }
20728- }
20729- #else
20730- STBIR_NO_UNROLL_LOOP_START
20731- while ( ( (char*)input_end - (char*) input ) >= 16 )
20732- {
20733- float r0, r1, r2, r3;
20734- STBIR_NO_UNROLL(input);
20735-
20736- r0 = input[0], r1 = input[1], r2 = input[2], r3 = input[3];
20737-
20738- #ifdef STB_IMAGE_RESIZE_VERTICAL_CONTINUE
20739- stbIF0( output0[0] += ( r0 * c0s ); output0[1] += ( r1 * c0s ); output0[2] += ( r2 * c0s ); output0[3] += ( r3 * c0s ); )
20740- stbIF1( output1[0] += ( r0 * c1s ); output1[1] += ( r1 * c1s ); output1[2] += ( r2 * c1s ); output1[3] += ( r3 * c1s ); )
20741- stbIF2( output2[0] += ( r0 * c2s ); output2[1] += ( r1 * c2s ); output2[2] += ( r2 * c2s ); output2[3] += ( r3 * c2s ); )
20742- stbIF3( output3[0] += ( r0 * c3s ); output3[1] += ( r1 * c3s ); output3[2] += ( r2 * c3s ); output3[3] += ( r3 * c3s ); )
20743- stbIF4( output4[0] += ( r0 * c4s ); output4[1] += ( r1 * c4s ); output4[2] += ( r2 * c4s ); output4[3] += ( r3 * c4s ); )
20744- stbIF5( output5[0] += ( r0 * c5s ); output5[1] += ( r1 * c5s ); output5[2] += ( r2 * c5s ); output5[3] += ( r3 * c5s ); )
20745- stbIF6( output6[0] += ( r0 * c6s ); output6[1] += ( r1 * c6s ); output6[2] += ( r2 * c6s ); output6[3] += ( r3 * c6s ); )
20746- stbIF7( output7[0] += ( r0 * c7s ); output7[1] += ( r1 * c7s ); output7[2] += ( r2 * c7s ); output7[3] += ( r3 * c7s ); )
20747- #else
20748- stbIF0( output0[0] = ( r0 * c0s ); output0[1] = ( r1 * c0s ); output0[2] = ( r2 * c0s ); output0[3] = ( r3 * c0s ); )
20749- stbIF1( output1[0] = ( r0 * c1s ); output1[1] = ( r1 * c1s ); output1[2] = ( r2 * c1s ); output1[3] = ( r3 * c1s ); )
20750- stbIF2( output2[0] = ( r0 * c2s ); output2[1] = ( r1 * c2s ); output2[2] = ( r2 * c2s ); output2[3] = ( r3 * c2s ); )
20751- stbIF3( output3[0] = ( r0 * c3s ); output3[1] = ( r1 * c3s ); output3[2] = ( r2 * c3s ); output3[3] = ( r3 * c3s ); )
20752- stbIF4( output4[0] = ( r0 * c4s ); output4[1] = ( r1 * c4s ); output4[2] = ( r2 * c4s ); output4[3] = ( r3 * c4s ); )
20753- stbIF5( output5[0] = ( r0 * c5s ); output5[1] = ( r1 * c5s ); output5[2] = ( r2 * c5s ); output5[3] = ( r3 * c5s ); )
20754- stbIF6( output6[0] = ( r0 * c6s ); output6[1] = ( r1 * c6s ); output6[2] = ( r2 * c6s ); output6[3] = ( r3 * c6s ); )
20755- stbIF7( output7[0] = ( r0 * c7s ); output7[1] = ( r1 * c7s ); output7[2] = ( r2 * c7s ); output7[3] = ( r3 * c7s ); )
20756- #endif
20757-
20758- input += 4;
20759- stbIF0( output0 += 4; ) stbIF1( output1 += 4; ) stbIF2( output2 += 4; ) stbIF3( output3 += 4; ) stbIF4( output4 += 4; ) stbIF5( output5 += 4; ) stbIF6( output6 += 4; ) stbIF7( output7 += 4; )
20760- }
20761- #endif
20762- STBIR_NO_UNROLL_LOOP_START
20763- while ( input < input_end )
20764- {
20765- float r = input[0];
20766- STBIR_NO_UNROLL(output0);
20767-
20768- #ifdef STB_IMAGE_RESIZE_VERTICAL_CONTINUE
20769- stbIF0( output0[0] += ( r * c0s ); )
20770- stbIF1( output1[0] += ( r * c1s ); )
20771- stbIF2( output2[0] += ( r * c2s ); )
20772- stbIF3( output3[0] += ( r * c3s ); )
20773- stbIF4( output4[0] += ( r * c4s ); )
20774- stbIF5( output5[0] += ( r * c5s ); )
20775- stbIF6( output6[0] += ( r * c6s ); )
20776- stbIF7( output7[0] += ( r * c7s ); )
20777- #else
20778- stbIF0( output0[0] = ( r * c0s ); )
20779- stbIF1( output1[0] = ( r * c1s ); )
20780- stbIF2( output2[0] = ( r * c2s ); )
20781- stbIF3( output3[0] = ( r * c3s ); )
20782- stbIF4( output4[0] = ( r * c4s ); )
20783- stbIF5( output5[0] = ( r * c5s ); )
20784- stbIF6( output6[0] = ( r * c6s ); )
20785- stbIF7( output7[0] = ( r * c7s ); )
20786- #endif
20787-
20788- ++input;
20789- stbIF0( ++output0; ) stbIF1( ++output1; ) stbIF2( ++output2; ) stbIF3( ++output3; ) stbIF4( ++output4; ) stbIF5( ++output5; ) stbIF6( ++output6; ) stbIF7( ++output7; )
20790- }
20791-}
20792-
20793-static void STBIR_chans( stbir__vertical_gather_with_,_coeffs)( float * outputp, float const * vertical_coefficients, float const ** inputs, float const * input0_end )
20794-{
20795- float STBIR_SIMD_STREAMOUT_PTR( * ) output = outputp;
20796-
20797- stbIF0( float const * input0 = inputs[0]; float c0s = vertical_coefficients[0]; )
20798- stbIF1( float const * input1 = inputs[1]; float c1s = vertical_coefficients[1]; )
20799- stbIF2( float const * input2 = inputs[2]; float c2s = vertical_coefficients[2]; )
20800- stbIF3( float const * input3 = inputs[3]; float c3s = vertical_coefficients[3]; )
20801- stbIF4( float const * input4 = inputs[4]; float c4s = vertical_coefficients[4]; )
20802- stbIF5( float const * input5 = inputs[5]; float c5s = vertical_coefficients[5]; )
20803- stbIF6( float const * input6 = inputs[6]; float c6s = vertical_coefficients[6]; )
20804- stbIF7( float const * input7 = inputs[7]; float c7s = vertical_coefficients[7]; )
20805-
20806-#if ( STBIR__vertical_channels == 1 ) && !defined(STB_IMAGE_RESIZE_VERTICAL_CONTINUE)
20807- // check single channel one weight
20808- if ( ( c0s >= (1.0f-0.000001f) ) && ( c0s <= (1.0f+0.000001f) ) )
20809- {
20810- STBIR_MEMCPY( output, input0, (char*)input0_end - (char*)input0 );
20811- return;
20812- }
20813-#endif
20814-
20815- #ifdef STBIR_SIMD
20816- {
20817- stbIF0(stbir__simdfX c0 = stbir__simdf_frepX( c0s ); )
20818- stbIF1(stbir__simdfX c1 = stbir__simdf_frepX( c1s ); )
20819- stbIF2(stbir__simdfX c2 = stbir__simdf_frepX( c2s ); )
20820- stbIF3(stbir__simdfX c3 = stbir__simdf_frepX( c3s ); )
20821- stbIF4(stbir__simdfX c4 = stbir__simdf_frepX( c4s ); )
20822- stbIF5(stbir__simdfX c5 = stbir__simdf_frepX( c5s ); )
20823- stbIF6(stbir__simdfX c6 = stbir__simdf_frepX( c6s ); )
20824- stbIF7(stbir__simdfX c7 = stbir__simdf_frepX( c7s ); )
20825-
20826- STBIR_SIMD_NO_UNROLL_LOOP_START
20827- while ( ( (char*)input0_end - (char*) input0 ) >= (16*stbir__simdfX_float_count) )
20828- {
20829- stbir__simdfX o0, o1, o2, o3, r0, r1, r2, r3;
20830- STBIR_SIMD_NO_UNROLL(output);
20831-
20832- // prefetch four loop iterations ahead (doesn't affect much for small resizes, but helps with big ones)
20833- stbIF0( stbir__prefetch( input0 + (16*stbir__simdfX_float_count) ); )
20834- stbIF1( stbir__prefetch( input1 + (16*stbir__simdfX_float_count) ); )
20835- stbIF2( stbir__prefetch( input2 + (16*stbir__simdfX_float_count) ); )
20836- stbIF3( stbir__prefetch( input3 + (16*stbir__simdfX_float_count) ); )
20837- stbIF4( stbir__prefetch( input4 + (16*stbir__simdfX_float_count) ); )
20838- stbIF5( stbir__prefetch( input5 + (16*stbir__simdfX_float_count) ); )
20839- stbIF6( stbir__prefetch( input6 + (16*stbir__simdfX_float_count) ); )
20840- stbIF7( stbir__prefetch( input7 + (16*stbir__simdfX_float_count) ); )
20841-
20842- #ifdef STB_IMAGE_RESIZE_VERTICAL_CONTINUE
20843- stbIF0( stbir__simdfX_load( o0, output ); stbir__simdfX_load( o1, output+stbir__simdfX_float_count ); stbir__simdfX_load( o2, output+(2*stbir__simdfX_float_count) ); stbir__simdfX_load( o3, output+(3*stbir__simdfX_float_count) );
20844- stbir__simdfX_load( r0, input0 ); stbir__simdfX_load( r1, input0+stbir__simdfX_float_count ); stbir__simdfX_load( r2, input0+(2*stbir__simdfX_float_count) ); stbir__simdfX_load( r3, input0+(3*stbir__simdfX_float_count) );
20845- stbir__simdfX_madd( o0, o0, r0, c0 ); stbir__simdfX_madd( o1, o1, r1, c0 ); stbir__simdfX_madd( o2, o2, r2, c0 ); stbir__simdfX_madd( o3, o3, r3, c0 ); )
20846- #else
20847- stbIF0( stbir__simdfX_load( r0, input0 ); stbir__simdfX_load( r1, input0+stbir__simdfX_float_count ); stbir__simdfX_load( r2, input0+(2*stbir__simdfX_float_count) ); stbir__simdfX_load( r3, input0+(3*stbir__simdfX_float_count) );
20848- stbir__simdfX_mult( o0, r0, c0 ); stbir__simdfX_mult( o1, r1, c0 ); stbir__simdfX_mult( o2, r2, c0 ); stbir__simdfX_mult( o3, r3, c0 ); )
20849- #endif
20850-
20851- stbIF1( stbir__simdfX_load( r0, input1 ); stbir__simdfX_load( r1, input1+stbir__simdfX_float_count ); stbir__simdfX_load( r2, input1+(2*stbir__simdfX_float_count) ); stbir__simdfX_load( r3, input1+(3*stbir__simdfX_float_count) );
20852- stbir__simdfX_madd( o0, o0, r0, c1 ); stbir__simdfX_madd( o1, o1, r1, c1 ); stbir__simdfX_madd( o2, o2, r2, c1 ); stbir__simdfX_madd( o3, o3, r3, c1 ); )
20853- stbIF2( stbir__simdfX_load( r0, input2 ); stbir__simdfX_load( r1, input2+stbir__simdfX_float_count ); stbir__simdfX_load( r2, input2+(2*stbir__simdfX_float_count) ); stbir__simdfX_load( r3, input2+(3*stbir__simdfX_float_count) );
20854- stbir__simdfX_madd( o0, o0, r0, c2 ); stbir__simdfX_madd( o1, o1, r1, c2 ); stbir__simdfX_madd( o2, o2, r2, c2 ); stbir__simdfX_madd( o3, o3, r3, c2 ); )
20855- stbIF3( stbir__simdfX_load( r0, input3 ); stbir__simdfX_load( r1, input3+stbir__simdfX_float_count ); stbir__simdfX_load( r2, input3+(2*stbir__simdfX_float_count) ); stbir__simdfX_load( r3, input3+(3*stbir__simdfX_float_count) );
20856- stbir__simdfX_madd( o0, o0, r0, c3 ); stbir__simdfX_madd( o1, o1, r1, c3 ); stbir__simdfX_madd( o2, o2, r2, c3 ); stbir__simdfX_madd( o3, o3, r3, c3 ); )
20857- stbIF4( stbir__simdfX_load( r0, input4 ); stbir__simdfX_load( r1, input4+stbir__simdfX_float_count ); stbir__simdfX_load( r2, input4+(2*stbir__simdfX_float_count) ); stbir__simdfX_load( r3, input4+(3*stbir__simdfX_float_count) );
20858- stbir__simdfX_madd( o0, o0, r0, c4 ); stbir__simdfX_madd( o1, o1, r1, c4 ); stbir__simdfX_madd( o2, o2, r2, c4 ); stbir__simdfX_madd( o3, o3, r3, c4 ); )
20859- stbIF5( stbir__simdfX_load( r0, input5 ); stbir__simdfX_load( r1, input5+stbir__simdfX_float_count ); stbir__simdfX_load( r2, input5+(2*stbir__simdfX_float_count) ); stbir__simdfX_load( r3, input5+(3*stbir__simdfX_float_count) );
20860- stbir__simdfX_madd( o0, o0, r0, c5 ); stbir__simdfX_madd( o1, o1, r1, c5 ); stbir__simdfX_madd( o2, o2, r2, c5 ); stbir__simdfX_madd( o3, o3, r3, c5 ); )
20861- stbIF6( stbir__simdfX_load( r0, input6 ); stbir__simdfX_load( r1, input6+stbir__simdfX_float_count ); stbir__simdfX_load( r2, input6+(2*stbir__simdfX_float_count) ); stbir__simdfX_load( r3, input6+(3*stbir__simdfX_float_count) );
20862- stbir__simdfX_madd( o0, o0, r0, c6 ); stbir__simdfX_madd( o1, o1, r1, c6 ); stbir__simdfX_madd( o2, o2, r2, c6 ); stbir__simdfX_madd( o3, o3, r3, c6 ); )
20863- stbIF7( stbir__simdfX_load( r0, input7 ); stbir__simdfX_load( r1, input7+stbir__simdfX_float_count ); stbir__simdfX_load( r2, input7+(2*stbir__simdfX_float_count) ); stbir__simdfX_load( r3, input7+(3*stbir__simdfX_float_count) );
20864- stbir__simdfX_madd( o0, o0, r0, c7 ); stbir__simdfX_madd( o1, o1, r1, c7 ); stbir__simdfX_madd( o2, o2, r2, c7 ); stbir__simdfX_madd( o3, o3, r3, c7 ); )
20865-
20866- stbir__simdfX_store( output, o0 ); stbir__simdfX_store( output+stbir__simdfX_float_count, o1 ); stbir__simdfX_store( output+(2*stbir__simdfX_float_count), o2 ); stbir__simdfX_store( output+(3*stbir__simdfX_float_count), o3 );
20867- output += (4*stbir__simdfX_float_count);
20868- stbIF0( input0 += (4*stbir__simdfX_float_count); ) stbIF1( input1 += (4*stbir__simdfX_float_count); ) stbIF2( input2 += (4*stbir__simdfX_float_count); ) stbIF3( input3 += (4*stbir__simdfX_float_count); ) stbIF4( input4 += (4*stbir__simdfX_float_count); ) stbIF5( input5 += (4*stbir__simdfX_float_count); ) stbIF6( input6 += (4*stbir__simdfX_float_count); ) stbIF7( input7 += (4*stbir__simdfX_float_count); )
20869- }
20870-
20871- STBIR_SIMD_NO_UNROLL_LOOP_START
20872- while ( ( (char*)input0_end - (char*) input0 ) >= 16 )
20873- {
20874- stbir__simdf o0, r0;
20875- STBIR_SIMD_NO_UNROLL(output);
20876-
20877- #ifdef STB_IMAGE_RESIZE_VERTICAL_CONTINUE
20878- stbIF0( stbir__simdf_load( o0, output ); stbir__simdf_load( r0, input0 ); stbir__simdf_madd( o0, o0, r0, stbir__if_simdf8_cast_to_simdf4( c0 ) ); )
20879- #else
20880- stbIF0( stbir__simdf_load( r0, input0 ); stbir__simdf_mult( o0, r0, stbir__if_simdf8_cast_to_simdf4( c0 ) ); )
20881- #endif
20882- stbIF1( stbir__simdf_load( r0, input1 ); stbir__simdf_madd( o0, o0, r0, stbir__if_simdf8_cast_to_simdf4( c1 ) ); )
20883- stbIF2( stbir__simdf_load( r0, input2 ); stbir__simdf_madd( o0, o0, r0, stbir__if_simdf8_cast_to_simdf4( c2 ) ); )
20884- stbIF3( stbir__simdf_load( r0, input3 ); stbir__simdf_madd( o0, o0, r0, stbir__if_simdf8_cast_to_simdf4( c3 ) ); )
20885- stbIF4( stbir__simdf_load( r0, input4 ); stbir__simdf_madd( o0, o0, r0, stbir__if_simdf8_cast_to_simdf4( c4 ) ); )
20886- stbIF5( stbir__simdf_load( r0, input5 ); stbir__simdf_madd( o0, o0, r0, stbir__if_simdf8_cast_to_simdf4( c5 ) ); )
20887- stbIF6( stbir__simdf_load( r0, input6 ); stbir__simdf_madd( o0, o0, r0, stbir__if_simdf8_cast_to_simdf4( c6 ) ); )
20888- stbIF7( stbir__simdf_load( r0, input7 ); stbir__simdf_madd( o0, o0, r0, stbir__if_simdf8_cast_to_simdf4( c7 ) ); )
20889-
20890- stbir__simdf_store( output, o0 );
20891- output += 4;
20892- stbIF0( input0 += 4; ) stbIF1( input1 += 4; ) stbIF2( input2 += 4; ) stbIF3( input3 += 4; ) stbIF4( input4 += 4; ) stbIF5( input5 += 4; ) stbIF6( input6 += 4; ) stbIF7( input7 += 4; )
20893- }
20894- }
20895- #else
20896- STBIR_NO_UNROLL_LOOP_START
20897- while ( ( (char*)input0_end - (char*) input0 ) >= 16 )
20898- {
20899- float o0, o1, o2, o3;
20900- STBIR_NO_UNROLL(output);
20901- #ifdef STB_IMAGE_RESIZE_VERTICAL_CONTINUE
20902- stbIF0( o0 = output[0] + input0[0] * c0s; o1 = output[1] + input0[1] * c0s; o2 = output[2] + input0[2] * c0s; o3 = output[3] + input0[3] * c0s; )
20903- #else
20904- stbIF0( o0 = input0[0] * c0s; o1 = input0[1] * c0s; o2 = input0[2] * c0s; o3 = input0[3] * c0s; )
20905- #endif
20906- stbIF1( o0 += input1[0] * c1s; o1 += input1[1] * c1s; o2 += input1[2] * c1s; o3 += input1[3] * c1s; )
20907- stbIF2( o0 += input2[0] * c2s; o1 += input2[1] * c2s; o2 += input2[2] * c2s; o3 += input2[3] * c2s; )
20908- stbIF3( o0 += input3[0] * c3s; o1 += input3[1] * c3s; o2 += input3[2] * c3s; o3 += input3[3] * c3s; )
20909- stbIF4( o0 += input4[0] * c4s; o1 += input4[1] * c4s; o2 += input4[2] * c4s; o3 += input4[3] * c4s; )
20910- stbIF5( o0 += input5[0] * c5s; o1 += input5[1] * c5s; o2 += input5[2] * c5s; o3 += input5[3] * c5s; )
20911- stbIF6( o0 += input6[0] * c6s; o1 += input6[1] * c6s; o2 += input6[2] * c6s; o3 += input6[3] * c6s; )
20912- stbIF7( o0 += input7[0] * c7s; o1 += input7[1] * c7s; o2 += input7[2] * c7s; o3 += input7[3] * c7s; )
20913- output[0] = o0; output[1] = o1; output[2] = o2; output[3] = o3;
20914- output += 4;
20915- stbIF0( input0 += 4; ) stbIF1( input1 += 4; ) stbIF2( input2 += 4; ) stbIF3( input3 += 4; ) stbIF4( input4 += 4; ) stbIF5( input5 += 4; ) stbIF6( input6 += 4; ) stbIF7( input7 += 4; )
20916- }
20917- #endif
20918- STBIR_NO_UNROLL_LOOP_START
20919- while ( input0 < input0_end )
20920- {
20921- float o0;
20922- STBIR_NO_UNROLL(output);
20923- #ifdef STB_IMAGE_RESIZE_VERTICAL_CONTINUE
20924- stbIF0( o0 = output[0] + input0[0] * c0s; )
20925- #else
20926- stbIF0( o0 = input0[0] * c0s; )
20927- #endif
20928- stbIF1( o0 += input1[0] * c1s; )
20929- stbIF2( o0 += input2[0] * c2s; )
20930- stbIF3( o0 += input3[0] * c3s; )
20931- stbIF4( o0 += input4[0] * c4s; )
20932- stbIF5( o0 += input5[0] * c5s; )
20933- stbIF6( o0 += input6[0] * c6s; )
20934- stbIF7( o0 += input7[0] * c7s; )
20935- output[0] = o0;
20936- ++output;
20937- stbIF0( ++input0; ) stbIF1( ++input1; ) stbIF2( ++input2; ) stbIF3( ++input3; ) stbIF4( ++input4; ) stbIF5( ++input5; ) stbIF6( ++input6; ) stbIF7( ++input7; )
20938- }
20939+ stbIF0(output0[0] = (r * c0s);) stbIF1(output1[0] = (r * c1s);)
20940+ stbIF2(output2[0] = (r * c2s);) stbIF3(output3[0] = (r * c3s);)
20941+ stbIF4(output4[0] = (r * c4s);) stbIF5(output5[0] = (r * c5s);)
20942+ stbIF6(output6[0] = (r * c6s);)
20943+ stbIF7(output7[0] = (r * c7s);)
20944+#endif
20945+
20946+ ++ input;
20947+ stbIF0(++output0;) stbIF1(++output1;) stbIF2(++output2;)
20948+ stbIF3(++output3;) stbIF4(++output4;) stbIF5(++output5;)
20949+ stbIF6(++output6;) stbIF7(++output7;)
20950+ }
20951+}
20952+
20953+static void
20954+STBIR_chans(stbir__vertical_gather_with_,
20955+ _coeffs)(float *outputp,
20956+ float const *vertical_coefficients,
20957+ float const **inputs,
20958+ float const *input0_end)
20959+{
20960+ float STBIR_SIMD_STREAMOUT_PTR(*) output = outputp;
20961+
20962+ stbIF0(float const *input0 = inputs[0];
20963+ float c0s = vertical_coefficients[0];)
20964+ stbIF1(float const *input1 = inputs[1];
20965+ float c1s = vertical_coefficients[1];)
20966+ stbIF2(float const *input2 = inputs[2];
20967+ float c2s = vertical_coefficients[2];)
20968+ stbIF3(float const *input3 = inputs[3];
20969+ float c3s = vertical_coefficients[3];)
20970+ stbIF4(float const *input4 = inputs[4];
20971+ float c4s = vertical_coefficients[4];)
20972+ stbIF5(float const *input5 = inputs[5];
20973+ float c5s = vertical_coefficients[5];)
20974+ stbIF6(float const *input6 = inputs[6];
20975+ float c6s = vertical_coefficients[6];)
20976+ stbIF7(float const *input7 = inputs[7];
20977+ float c7s = vertical_coefficients[7];)
20978+
20979+#if (STBIR__vertical_channels == 1) && \
20980+ !defined(STB_IMAGE_RESIZE_VERTICAL_CONTINUE)
20981+ // check single channel one weight
20982+ if ((c0s >= (1.0f - 0.000001f)) && (c0s <= (1.0f + 0.000001f)))
20983+ {
20984+ STBIR_MEMCPY(output, input0, (char *)input0_end - (char *)input0);
20985+ return;
20986+ }
20987+#endif
20988+
20989+#ifdef STBIR_SIMD
20990+ {
20991+ stbIF0(stbir__simdfX c0 = stbir__simdf_frepX(c0s);)
20992+ stbIF1(stbir__simdfX c1 = stbir__simdf_frepX(c1s);)
20993+ stbIF2(stbir__simdfX c2 = stbir__simdf_frepX(c2s);) stbIF3(
20994+ stbir__simdfX c3 = stbir__simdf_frepX(c3s);)
20995+ stbIF4(stbir__simdfX c4 = stbir__simdf_frepX(c4s);) stbIF5(
20996+ stbir__simdfX c5 = stbir__simdf_frepX(c5s);)
20997+ stbIF6(stbir__simdfX c6 = stbir__simdf_frepX(c6s);)
20998+ stbIF7(stbir__simdfX c7 = stbir__simdf_frepX(c7s);)
20999+
21000+ STBIR_SIMD_NO_UNROLL_LOOP_START while (
21001+ ((char *)input0_end - (char *)input0) >=
21002+ (16 * stbir__simdfX_float_count))
21003+ {
21004+ stbir__simdfX o0, o1, o2, o3, r0, r1, r2, r3;
21005+ STBIR_SIMD_NO_UNROLL(output);
21006+
21007+ // prefetch four loop iterations ahead (doesn't affect much for
21008+ // small resizes, but helps with big ones)
21009+ stbIF0(stbir__prefetch(input0 + (16 * stbir__simdfX_float_count));) stbIF1(
21010+ stbir__prefetch(input1 + (16 * stbir__simdfX_float_count));)
21011+ stbIF2(stbir__prefetch(input2 + (16 * stbir__simdfX_float_count));) stbIF3(
21012+ stbir__prefetch(input3 + (16 * stbir__simdfX_float_count));)
21013+ stbIF4(stbir__prefetch(input4 + (16 * stbir__simdfX_float_count));) stbIF5(
21014+ stbir__prefetch(input5 +
21015+ (16 * stbir__simdfX_float_count));)
21016+ stbIF6(stbir__prefetch(input6 + (16 * stbir__simdfX_float_count));) stbIF7(
21017+ stbir__prefetch(input7 +
21018+ (16 * stbir__simdfX_float_count));)
21019+
21020+#ifdef STB_IMAGE_RESIZE_VERTICAL_CONTINUE
21021+ stbIF0(
21022+ stbir__simdfX_load(o0, output);
21023+ stbir__simdfX_load(
21024+ o1, output + stbir__simdfX_float_count);
21025+ stbir__simdfX_load(
21026+ o2,
21027+ output + (2 * stbir__simdfX_float_count));
21028+ stbir__simdfX_load(
21029+ o3,
21030+ output + (3 * stbir__simdfX_float_count));
21031+ stbir__simdfX_load(r0, input0);
21032+ stbir__simdfX_load(
21033+ r1, input0 + stbir__simdfX_float_count);
21034+ stbir__simdfX_load(
21035+ r2,
21036+ input0 + (2 * stbir__simdfX_float_count));
21037+ stbir__simdfX_load(
21038+ r3,
21039+ input0 + (3 * stbir__simdfX_float_count));
21040+ stbir__simdfX_madd(o0, o0, r0, c0);
21041+ stbir__simdfX_madd(o1, o1, r1, c0);
21042+ stbir__simdfX_madd(o2, o2, r2, c0);
21043+ stbir__simdfX_madd(o3, o3, r3, c0);)
21044+#else
21045+ stbIF0(
21046+ stbir__simdfX_load(r0, input0);
21047+ stbir__simdfX_load(
21048+ r1, input0 + stbir__simdfX_float_count);
21049+ stbir__simdfX_load(
21050+ r2,
21051+ input0 + (2 * stbir__simdfX_float_count));
21052+ stbir__simdfX_load(
21053+ r3,
21054+ input0 + (3 * stbir__simdfX_float_count));
21055+ stbir__simdfX_mult(o0, r0, c0);
21056+ stbir__simdfX_mult(o1, r1, c0);
21057+ stbir__simdfX_mult(o2, r2, c0);
21058+ stbir__simdfX_mult(o3, r3, c0);)
21059+#endif
21060+
21061+ stbIF1(
21062+ stbir__simdfX_load(r0, input1);
21063+ stbir__simdfX_load(
21064+ r1, input1 + stbir__simdfX_float_count);
21065+ stbir__simdfX_load(
21066+ r2,
21067+ input1 +
21068+ (2 * stbir__simdfX_float_count));
21069+ stbir__simdfX_load(
21070+ r3,
21071+ input1 +
21072+ (3 * stbir__simdfX_float_count));
21073+ stbir__simdfX_madd(o0, o0, r0, c1);
21074+ stbir__simdfX_madd(o1, o1, r1, c1);
21075+ stbir__simdfX_madd(o2, o2, r2, c1);
21076+ stbir__simdfX_madd(
21077+ o3,
21078+ o3,
21079+ r3,
21080+ c1);) stbIF2(stbir__simdfX_load(r0,
21081+ input2);
21082+ stbir__simdfX_load(
21083+ r1,
21084+ input2 +
21085+ stbir__simdfX_float_count);
21086+ stbir__simdfX_load(
21087+ r2,
21088+ input2 +
21089+ (2 *
21090+ stbir__simdfX_float_count));
21091+ stbir__simdfX_load(
21092+ r3,
21093+ input2 +
21094+ (3 *
21095+ stbir__simdfX_float_count));
21096+ stbir__simdfX_madd(
21097+ o0, o0, r0, c2);
21098+ stbir__simdfX_madd(
21099+ o1, o1, r1, c2);
21100+ stbir__simdfX_madd(
21101+ o2, o2, r2, c2);
21102+ stbir__simdfX_madd(
21103+ o3, o3, r3, c2);)
21104+ stbIF3(
21105+ stbir__simdfX_load(r0, input3);
21106+ stbir__simdfX_load(
21107+ r1,
21108+ input3 + stbir__simdfX_float_count);
21109+ stbir__simdfX_load(
21110+ r2,
21111+ input3 +
21112+ (2 *
21113+ stbir__simdfX_float_count));
21114+ stbir__simdfX_load(
21115+ r3,
21116+ input3 +
21117+ (3 *
21118+ stbir__simdfX_float_count));
21119+ stbir__simdfX_madd(o0, o0, r0, c3);
21120+ stbir__simdfX_madd(o1, o1, r1, c3);
21121+ stbir__simdfX_madd(o2, o2, r2, c3);
21122+ stbir__simdfX_madd(o3, o3, r3, c3);)
21123+ stbIF4(
21124+ stbir__simdfX_load(r0, input4);
21125+ stbir__simdfX_load(
21126+ r1,
21127+ input4 +
21128+ stbir__simdfX_float_count);
21129+ stbir__simdfX_load(
21130+ r2,
21131+ input4 +
21132+ (2 *
21133+ stbir__simdfX_float_count));
21134+ stbir__simdfX_load(
21135+ r3,
21136+ input4 +
21137+ (3 *
21138+ stbir__simdfX_float_count));
21139+ stbir__simdfX_madd(o0, o0, r0, c4);
21140+ stbir__simdfX_madd(o1, o1, r1, c4);
21141+ stbir__simdfX_madd(o2, o2, r2, c4);
21142+ stbir__simdfX_madd(o3, o3, r3, c4);)
21143+ stbIF5(
21144+ stbir__simdfX_load(r0, input5);
21145+ stbir__simdfX_load(
21146+ r1,
21147+ input5 +
21148+ stbir__simdfX_float_count);
21149+ stbir__simdfX_load(
21150+ r2,
21151+ input5 +
21152+ (2 *
21153+ stbir__simdfX_float_count));
21154+ stbir__simdfX_load(
21155+ r3,
21156+ input5 +
21157+ (3 *
21158+ stbir__simdfX_float_count));
21159+ stbir__simdfX_madd(
21160+ o0, o0, r0, c5);
21161+ stbir__simdfX_madd(
21162+ o1, o1, r1, c5);
21163+ stbir__simdfX_madd(
21164+ o2, o2, r2, c5);
21165+ stbir__simdfX_madd(
21166+ o3, o3, r3, c5);)
21167+ stbIF6(
21168+ stbir__simdfX_load(r0,
21169+ input6);
21170+ stbir__simdfX_load(
21171+ r1,
21172+ input6 +
21173+ stbir__simdfX_float_count);
21174+ stbir__simdfX_load(
21175+ r2,
21176+ input6 +
21177+ (2 *
21178+ stbir__simdfX_float_count));
21179+ stbir__simdfX_load(
21180+ r3,
21181+ input6 +
21182+ (3 *
21183+ stbir__simdfX_float_count));
21184+ stbir__simdfX_madd(
21185+ o0, o0, r0, c6);
21186+ stbir__simdfX_madd(
21187+ o1, o1, r1, c6);
21188+ stbir__simdfX_madd(
21189+ o2, o2, r2, c6);
21190+ stbir__simdfX_madd(
21191+ o3, o3, r3, c6);)
21192+ stbIF7(
21193+ stbir__simdfX_load(
21194+ r0, input7);
21195+ stbir__simdfX_load(
21196+ r1,
21197+ input7 +
21198+ stbir__simdfX_float_count);
21199+ stbir__simdfX_load(
21200+ r2,
21201+ input7 +
21202+ (2 *
21203+ stbir__simdfX_float_count));
21204+ stbir__simdfX_load(
21205+ r3,
21206+ input7 +
21207+ (3 *
21208+ stbir__simdfX_float_count));
21209+ stbir__simdfX_madd(
21210+ o0, o0, r0, c7);
21211+ stbir__simdfX_madd(
21212+ o1, o1, r1, c7);
21213+ stbir__simdfX_madd(
21214+ o2, o2, r2, c7);
21215+ stbir__simdfX_madd(
21216+ o3, o3, r3, c7);)
21217+
21218+ stbir__simdfX_store(
21219+ output, o0);
21220+ stbir__simdfX_store(output + stbir__simdfX_float_count, o1);
21221+ stbir__simdfX_store(output + (2 * stbir__simdfX_float_count), o2);
21222+ stbir__simdfX_store(output + (3 * stbir__simdfX_float_count), o3);
21223+ output += (4 * stbir__simdfX_float_count);
21224+ stbIF0(input0 += (4 * stbir__simdfX_float_count);) stbIF1(
21225+ input1 += (4 * stbir__simdfX_float_count);)
21226+ stbIF2(input2 += (4 * stbir__simdfX_float_count);) stbIF3(
21227+ input3 += (4 * stbir__simdfX_float_count);)
21228+ stbIF4(input4 += (4 * stbir__simdfX_float_count);) stbIF5(
21229+ input5 += (4 * stbir__simdfX_float_count);)
21230+ stbIF6(input6 += (4 * stbir__simdfX_float_count);)
21231+ stbIF7(input7 += (4 * stbir__simdfX_float_count);)
21232+ }
21233+
21234+ STBIR_SIMD_NO_UNROLL_LOOP_START
21235+ while (((char *)input0_end - (char *)input0) >= 16) {
21236+ stbir__simdf o0, r0;
21237+ STBIR_SIMD_NO_UNROLL(output);
21238+
21239+#ifdef STB_IMAGE_RESIZE_VERTICAL_CONTINUE
21240+ stbIF0(stbir__simdf_load(o0, output); stbir__simdf_load(r0, input0);
21241+ stbir__simdf_madd(
21242+ o0, o0, r0, stbir__if_simdf8_cast_to_simdf4(c0));)
21243+#else
21244+ stbIF0(stbir__simdf_load(r0, input0); stbir__simdf_mult(
21245+ o0, r0, stbir__if_simdf8_cast_to_simdf4(c0));)
21246+#endif
21247+ stbIF1(stbir__simdf_load(r0, input1); stbir__simdf_madd(
21248+ o0, o0, r0, stbir__if_simdf8_cast_to_simdf4(c1));)
21249+ stbIF2(
21250+ stbir__simdf_load(r0, input2); stbir__simdf_madd(
21251+ o0, o0, r0, stbir__if_simdf8_cast_to_simdf4(c2));)
21252+ stbIF3(stbir__simdf_load(r0, input3); stbir__simdf_madd(
21253+ o0,
21254+ o0,
21255+ r0,
21256+ stbir__if_simdf8_cast_to_simdf4(c3));)
21257+ stbIF4(stbir__simdf_load(r0, input4);
21258+ stbir__simdf_madd(
21259+ o0,
21260+ o0,
21261+ r0,
21262+ stbir__if_simdf8_cast_to_simdf4(c4));)
21263+ stbIF5(
21264+ stbir__simdf_load(r0, input5);
21265+ stbir__simdf_madd(
21266+ o0,
21267+ o0,
21268+ r0,
21269+ stbir__if_simdf8_cast_to_simdf4(c5));)
21270+ stbIF6(stbir__simdf_load(r0, input6);
21271+ stbir__simdf_madd(
21272+ o0,
21273+ o0,
21274+ r0,
21275+ stbir__if_simdf8_cast_to_simdf4(
21276+ c6));)
21277+ stbIF7(
21278+ stbir__simdf_load(r0, input7);
21279+ stbir__simdf_madd(
21280+ o0,
21281+ o0,
21282+ r0,
21283+ stbir__if_simdf8_cast_to_simdf4(
21284+ c7));)
21285+
21286+ stbir__simdf_store(output, o0);
21287+ output += 4;
21288+ stbIF0(input0 += 4;) stbIF1(input1 += 4;) stbIF2(input2 += 4;)
21289+ stbIF3(input3 += 4;) stbIF4(input4 += 4;) stbIF5(input5 += 4;)
21290+ stbIF6(input6 += 4;) stbIF7(input7 += 4;)
21291+ }
21292+ }
21293+#else
21294+ STBIR_NO_UNROLL_LOOP_START while (
21295+ ((char *)input0_end - (char *)input0) >=
21296+ 16)
21297+ {
21298+ float o0, o1, o2, o3;
21299+ STBIR_NO_UNROLL(output);
21300+#ifdef STB_IMAGE_RESIZE_VERTICAL_CONTINUE
21301+ stbIF0(
21302+ o0 = output[0] + input0[0] * c0s; o1 = output[1] + input0[1] * c0s;
21303+ o2 = output[2] + input0[2] * c0s; o3 = output[3] + input0[3] * c0s;)
21304+#else
21305+ stbIF0(o0 = input0[0] * c0s; o1 = input0[1] * c0s; o2 = input0[2] * c0s;
21306+ o3 = input0[3] * c0s;)
21307+#endif
21308+ stbIF1(o0 += input1[0] * c1s; o1 += input1[1] * c1s;
21309+ o2 += input1[2] * c1s; o3 += input1[3] * c1s;)
21310+ stbIF2(o0 += input2[0] * c2s; o1 += input2[1] * c2s;
21311+ o2 += input2[2] * c2s;
21312+ o3 += input2[3] * c2s;) stbIF3(o0 += input3[0] * c3s;
21313+ o1 += input3[1] * c3s;
21314+ o2 += input3[2] * c3s;
21315+ o3 += input3[3] * c3s;)
21316+ stbIF4(o0 += input4[0] * c4s; o1 += input4[1] * c4s;
21317+ o2 += input4[2] * c4s; o3 += input4[3] * c4s;)
21318+ stbIF5(o0 += input5[0] * c5s; o1 += input5[1] * c5s;
21319+ o2 += input5[2] * c5s; o3 += input5[3] * c5s;)
21320+ stbIF6(o0 += input6[0] * c6s; o1 += input6[1] * c6s;
21321+ o2 += input6[2] * c6s;
21322+ o3 += input6[3] * c6s;)
21323+ stbIF7(o0 += input7[0] * c7s;
21324+ o1 += input7[1] * c7s;
21325+ o2 += input7[2] * c7s;
21326+ o3 += input7[3] * c7s;) output[0] = o0;
21327+ output[1] = o1;
21328+ output[2] = o2;
21329+ output[3] = o3;
21330+ output += 4;
21331+ stbIF0(input0 += 4;) stbIF1(input1 += 4;) stbIF2(input2 += 4;)
21332+ stbIF3(input3 += 4;) stbIF4(input4 += 4;) stbIF5(input5 += 4;)
21333+ stbIF6(input6 += 4;) stbIF7(input7 += 4;)
21334+ }
21335+#endif
21336+ STBIR_NO_UNROLL_LOOP_START
21337+ while (input0 < input0_end) {
21338+ float o0;
21339+ STBIR_NO_UNROLL(output);
21340+#ifdef STB_IMAGE_RESIZE_VERTICAL_CONTINUE
21341+ stbIF0(o0 = output[0] + input0[0] * c0s;)
21342+#else
21343+ stbIF0(o0 = input0[0] * c0s;)
21344+#endif
21345+ stbIF1(o0 += input1[0] * c1s;) stbIF2(o0 += input2[0] * c2s;)
21346+ stbIF3(o0 += input3[0] * c3s;) stbIF4(o0 += input4[0] * c4s;)
21347+ stbIF5(o0 += input5[0] * c5s;)
21348+ stbIF6(o0 += input6[0] * c6s;)
21349+ stbIF7(o0 += input7[0] * c7s;) output[0] = o0;
21350+ ++output;
21351+ stbIF0(++input0;) stbIF1(++input1;) stbIF2(++input2;) stbIF3(++input3;)
21352+ stbIF4(++input4;) stbIF5(++input5;) stbIF6(++input6;)
21353+ stbIF7(++input7;)
21354+ }
21355 }
21356
21357 #undef stbIF0
21358@@ -10251,30 +12734,31 @@ static void STBIR_chans( stbir__vertical_gather_with_,_coeffs)( float * outputp,
21359
21360 #else // !STB_IMAGE_RESIZE_DO_VERTICALS
21361
21362-#define STBIR_chans( start, end ) STBIR_strs_join1(start,STBIR__horizontal_channels,end)
21363+#define STBIR_chans(start, end) \
21364+ STBIR_strs_join1(start, STBIR__horizontal_channels, end)
21365
21366 #ifndef stbir__2_coeff_only
21367-#define stbir__2_coeff_only() \
21368- stbir__1_coeff_only(); \
21369- stbir__1_coeff_remnant(1);
21370+#define stbir__2_coeff_only() \
21371+ stbir__1_coeff_only(); \
21372+ stbir__1_coeff_remnant(1);
21373 #endif
21374
21375 #ifndef stbir__2_coeff_remnant
21376-#define stbir__2_coeff_remnant( ofs ) \
21377- stbir__1_coeff_remnant(ofs); \
21378- stbir__1_coeff_remnant((ofs)+1);
21379+#define stbir__2_coeff_remnant(ofs) \
21380+ stbir__1_coeff_remnant(ofs); \
21381+ stbir__1_coeff_remnant((ofs) + 1);
21382 #endif
21383
21384 #ifndef stbir__3_coeff_only
21385-#define stbir__3_coeff_only() \
21386- stbir__2_coeff_only(); \
21387- stbir__1_coeff_remnant(2);
21388+#define stbir__3_coeff_only() \
21389+ stbir__2_coeff_only(); \
21390+ stbir__1_coeff_remnant(2);
21391 #endif
21392
21393 #ifndef stbir__3_coeff_remnant
21394-#define stbir__3_coeff_remnant( ofs ) \
21395- stbir__2_coeff_remnant(ofs); \
21396- stbir__1_coeff_remnant((ofs)+2);
21397+#define stbir__3_coeff_remnant(ofs) \
21398+ stbir__2_coeff_remnant(ofs); \
21399+ stbir__1_coeff_remnant((ofs) + 2);
21400 #endif
21401
21402 #ifndef stbir__3_coeff_setup
21403@@ -10282,308 +12766,432 @@ static void STBIR_chans( stbir__vertical_gather_with_,_coeffs)( float * outputp,
21404 #endif
21405
21406 #ifndef stbir__4_coeff_start
21407-#define stbir__4_coeff_start() \
21408- stbir__2_coeff_only(); \
21409- stbir__2_coeff_remnant(2);
21410+#define stbir__4_coeff_start() \
21411+ stbir__2_coeff_only(); \
21412+ stbir__2_coeff_remnant(2);
21413 #endif
21414
21415 #ifndef stbir__4_coeff_continue_from_4
21416-#define stbir__4_coeff_continue_from_4( ofs ) \
21417- stbir__2_coeff_remnant(ofs); \
21418- stbir__2_coeff_remnant((ofs)+2);
21419+#define stbir__4_coeff_continue_from_4(ofs) \
21420+ stbir__2_coeff_remnant(ofs); \
21421+ stbir__2_coeff_remnant((ofs) + 2);
21422 #endif
21423
21424 #ifndef stbir__store_output_tiny
21425 #define stbir__store_output_tiny stbir__store_output
21426 #endif
21427
21428-static void STBIR_chans( stbir__horizontal_gather_,_channels_with_1_coeff)( float * output_buffer, unsigned int output_sub_size, float const * decode_buffer, stbir__contributors const * horizontal_contributors, float const * horizontal_coefficients, int coefficient_width )
21429-{
21430- float const * output_end = output_buffer + output_sub_size * STBIR__horizontal_channels;
21431- float STBIR_SIMD_STREAMOUT_PTR( * ) output = output_buffer;
21432- STBIR_SIMD_NO_UNROLL_LOOP_START
21433- do {
21434- float const * decode = decode_buffer + horizontal_contributors->n0 * STBIR__horizontal_channels;
21435- float const * hc = horizontal_coefficients;
21436- stbir__1_coeff_only();
21437- stbir__store_output_tiny();
21438- } while ( output < output_end );
21439-}
21440-
21441-static void STBIR_chans( stbir__horizontal_gather_,_channels_with_2_coeffs)( float * output_buffer, unsigned int output_sub_size, float const * decode_buffer, stbir__contributors const * horizontal_contributors, float const * horizontal_coefficients, int coefficient_width )
21442-{
21443- float const * output_end = output_buffer + output_sub_size * STBIR__horizontal_channels;
21444- float STBIR_SIMD_STREAMOUT_PTR( * ) output = output_buffer;
21445- STBIR_SIMD_NO_UNROLL_LOOP_START
21446- do {
21447- float const * decode = decode_buffer + horizontal_contributors->n0 * STBIR__horizontal_channels;
21448- float const * hc = horizontal_coefficients;
21449- stbir__2_coeff_only();
21450- stbir__store_output_tiny();
21451- } while ( output < output_end );
21452-}
21453-
21454-static void STBIR_chans( stbir__horizontal_gather_,_channels_with_3_coeffs)( float * output_buffer, unsigned int output_sub_size, float const * decode_buffer, stbir__contributors const * horizontal_contributors, float const * horizontal_coefficients, int coefficient_width )
21455-{
21456- float const * output_end = output_buffer + output_sub_size * STBIR__horizontal_channels;
21457- float STBIR_SIMD_STREAMOUT_PTR( * ) output = output_buffer;
21458- STBIR_SIMD_NO_UNROLL_LOOP_START
21459- do {
21460- float const * decode = decode_buffer + horizontal_contributors->n0 * STBIR__horizontal_channels;
21461- float const * hc = horizontal_coefficients;
21462- stbir__3_coeff_only();
21463- stbir__store_output_tiny();
21464- } while ( output < output_end );
21465-}
21466-
21467-static void STBIR_chans( stbir__horizontal_gather_,_channels_with_4_coeffs)( float * output_buffer, unsigned int output_sub_size, float const * decode_buffer, stbir__contributors const * horizontal_contributors, float const * horizontal_coefficients, int coefficient_width )
21468-{
21469- float const * output_end = output_buffer + output_sub_size * STBIR__horizontal_channels;
21470- float STBIR_SIMD_STREAMOUT_PTR( * ) output = output_buffer;
21471- STBIR_SIMD_NO_UNROLL_LOOP_START
21472- do {
21473- float const * decode = decode_buffer + horizontal_contributors->n0 * STBIR__horizontal_channels;
21474- float const * hc = horizontal_coefficients;
21475- stbir__4_coeff_start();
21476- stbir__store_output();
21477- } while ( output < output_end );
21478-}
21479-
21480-static void STBIR_chans( stbir__horizontal_gather_,_channels_with_5_coeffs)( float * output_buffer, unsigned int output_sub_size, float const * decode_buffer, stbir__contributors const * horizontal_contributors, float const * horizontal_coefficients, int coefficient_width )
21481-{
21482- float const * output_end = output_buffer + output_sub_size * STBIR__horizontal_channels;
21483- float STBIR_SIMD_STREAMOUT_PTR( * ) output = output_buffer;
21484- STBIR_SIMD_NO_UNROLL_LOOP_START
21485- do {
21486- float const * decode = decode_buffer + horizontal_contributors->n0 * STBIR__horizontal_channels;
21487- float const * hc = horizontal_coefficients;
21488- stbir__4_coeff_start();
21489- stbir__1_coeff_remnant(4);
21490- stbir__store_output();
21491- } while ( output < output_end );
21492-}
21493-
21494-static void STBIR_chans( stbir__horizontal_gather_,_channels_with_6_coeffs)( float * output_buffer, unsigned int output_sub_size, float const * decode_buffer, stbir__contributors const * horizontal_contributors, float const * horizontal_coefficients, int coefficient_width )
21495-{
21496- float const * output_end = output_buffer + output_sub_size * STBIR__horizontal_channels;
21497- float STBIR_SIMD_STREAMOUT_PTR( * ) output = output_buffer;
21498- STBIR_SIMD_NO_UNROLL_LOOP_START
21499- do {
21500- float const * decode = decode_buffer + horizontal_contributors->n0 * STBIR__horizontal_channels;
21501- float const * hc = horizontal_coefficients;
21502- stbir__4_coeff_start();
21503- stbir__2_coeff_remnant(4);
21504- stbir__store_output();
21505- } while ( output < output_end );
21506-}
21507-
21508-static void STBIR_chans( stbir__horizontal_gather_,_channels_with_7_coeffs)( float * output_buffer, unsigned int output_sub_size, float const * decode_buffer, stbir__contributors const * horizontal_contributors, float const * horizontal_coefficients, int coefficient_width )
21509-{
21510- float const * output_end = output_buffer + output_sub_size * STBIR__horizontal_channels;
21511- float STBIR_SIMD_STREAMOUT_PTR( * ) output = output_buffer;
21512- stbir__3_coeff_setup();
21513- STBIR_SIMD_NO_UNROLL_LOOP_START
21514- do {
21515- float const * decode = decode_buffer + horizontal_contributors->n0 * STBIR__horizontal_channels;
21516- float const * hc = horizontal_coefficients;
21517-
21518- stbir__4_coeff_start();
21519- stbir__3_coeff_remnant(4);
21520- stbir__store_output();
21521- } while ( output < output_end );
21522-}
21523-
21524-static void STBIR_chans( stbir__horizontal_gather_,_channels_with_8_coeffs)( float * output_buffer, unsigned int output_sub_size, float const * decode_buffer, stbir__contributors const * horizontal_contributors, float const * horizontal_coefficients, int coefficient_width )
21525-{
21526- float const * output_end = output_buffer + output_sub_size * STBIR__horizontal_channels;
21527- float STBIR_SIMD_STREAMOUT_PTR( * ) output = output_buffer;
21528- STBIR_SIMD_NO_UNROLL_LOOP_START
21529- do {
21530- float const * decode = decode_buffer + horizontal_contributors->n0 * STBIR__horizontal_channels;
21531- float const * hc = horizontal_coefficients;
21532- stbir__4_coeff_start();
21533- stbir__4_coeff_continue_from_4(4);
21534- stbir__store_output();
21535- } while ( output < output_end );
21536-}
21537-
21538-static void STBIR_chans( stbir__horizontal_gather_,_channels_with_9_coeffs)( float * output_buffer, unsigned int output_sub_size, float const * decode_buffer, stbir__contributors const * horizontal_contributors, float const * horizontal_coefficients, int coefficient_width )
21539-{
21540- float const * output_end = output_buffer + output_sub_size * STBIR__horizontal_channels;
21541- float STBIR_SIMD_STREAMOUT_PTR( * ) output = output_buffer;
21542- STBIR_SIMD_NO_UNROLL_LOOP_START
21543- do {
21544- float const * decode = decode_buffer + horizontal_contributors->n0 * STBIR__horizontal_channels;
21545- float const * hc = horizontal_coefficients;
21546- stbir__4_coeff_start();
21547- stbir__4_coeff_continue_from_4(4);
21548- stbir__1_coeff_remnant(8);
21549- stbir__store_output();
21550- } while ( output < output_end );
21551-}
21552-
21553-static void STBIR_chans( stbir__horizontal_gather_,_channels_with_10_coeffs)( float * output_buffer, unsigned int output_sub_size, float const * decode_buffer, stbir__contributors const * horizontal_contributors, float const * horizontal_coefficients, int coefficient_width )
21554-{
21555- float const * output_end = output_buffer + output_sub_size * STBIR__horizontal_channels;
21556- float STBIR_SIMD_STREAMOUT_PTR( * ) output = output_buffer;
21557- STBIR_SIMD_NO_UNROLL_LOOP_START
21558- do {
21559- float const * decode = decode_buffer + horizontal_contributors->n0 * STBIR__horizontal_channels;
21560- float const * hc = horizontal_coefficients;
21561- stbir__4_coeff_start();
21562- stbir__4_coeff_continue_from_4(4);
21563- stbir__2_coeff_remnant(8);
21564- stbir__store_output();
21565- } while ( output < output_end );
21566-}
21567-
21568-static void STBIR_chans( stbir__horizontal_gather_,_channels_with_11_coeffs)( float * output_buffer, unsigned int output_sub_size, float const * decode_buffer, stbir__contributors const * horizontal_contributors, float const * horizontal_coefficients, int coefficient_width )
21569-{
21570- float const * output_end = output_buffer + output_sub_size * STBIR__horizontal_channels;
21571- float STBIR_SIMD_STREAMOUT_PTR( * ) output = output_buffer;
21572- stbir__3_coeff_setup();
21573- STBIR_SIMD_NO_UNROLL_LOOP_START
21574- do {
21575- float const * decode = decode_buffer + horizontal_contributors->n0 * STBIR__horizontal_channels;
21576- float const * hc = horizontal_coefficients;
21577- stbir__4_coeff_start();
21578- stbir__4_coeff_continue_from_4(4);
21579- stbir__3_coeff_remnant(8);
21580- stbir__store_output();
21581- } while ( output < output_end );
21582-}
21583-
21584-static void STBIR_chans( stbir__horizontal_gather_,_channels_with_12_coeffs)( float * output_buffer, unsigned int output_sub_size, float const * decode_buffer, stbir__contributors const * horizontal_contributors, float const * horizontal_coefficients, int coefficient_width )
21585-{
21586- float const * output_end = output_buffer + output_sub_size * STBIR__horizontal_channels;
21587- float STBIR_SIMD_STREAMOUT_PTR( * ) output = output_buffer;
21588- STBIR_SIMD_NO_UNROLL_LOOP_START
21589- do {
21590- float const * decode = decode_buffer + horizontal_contributors->n0 * STBIR__horizontal_channels;
21591- float const * hc = horizontal_coefficients;
21592- stbir__4_coeff_start();
21593- stbir__4_coeff_continue_from_4(4);
21594- stbir__4_coeff_continue_from_4(8);
21595- stbir__store_output();
21596- } while ( output < output_end );
21597-}
21598-
21599-static void STBIR_chans( stbir__horizontal_gather_,_channels_with_n_coeffs_mod0 )( float * output_buffer, unsigned int output_sub_size, float const * decode_buffer, stbir__contributors const * horizontal_contributors, float const * horizontal_coefficients, int coefficient_width )
21600-{
21601- float const * output_end = output_buffer + output_sub_size * STBIR__horizontal_channels;
21602- float STBIR_SIMD_STREAMOUT_PTR( * ) output = output_buffer;
21603- STBIR_SIMD_NO_UNROLL_LOOP_START
21604- do {
21605- float const * decode = decode_buffer + horizontal_contributors->n0 * STBIR__horizontal_channels;
21606- int n = ( ( horizontal_contributors->n1 - horizontal_contributors->n0 + 1 ) - 4 + 3 ) >> 2;
21607- float const * hc = horizontal_coefficients;
21608-
21609- stbir__4_coeff_start();
21610- STBIR_SIMD_NO_UNROLL_LOOP_START
21611- do {
21612- hc += 4;
21613- decode += STBIR__horizontal_channels * 4;
21614- stbir__4_coeff_continue_from_4( 0 );
21615- --n;
21616- } while ( n > 0 );
21617- stbir__store_output();
21618- } while ( output < output_end );
21619-}
21620-
21621-static void STBIR_chans( stbir__horizontal_gather_,_channels_with_n_coeffs_mod1 )( float * output_buffer, unsigned int output_sub_size, float const * decode_buffer, stbir__contributors const * horizontal_contributors, float const * horizontal_coefficients, int coefficient_width )
21622-{
21623- float const * output_end = output_buffer + output_sub_size * STBIR__horizontal_channels;
21624- float STBIR_SIMD_STREAMOUT_PTR( * ) output = output_buffer;
21625- STBIR_SIMD_NO_UNROLL_LOOP_START
21626- do {
21627- float const * decode = decode_buffer + horizontal_contributors->n0 * STBIR__horizontal_channels;
21628- int n = ( ( horizontal_contributors->n1 - horizontal_contributors->n0 + 1 ) - 5 + 3 ) >> 2;
21629- float const * hc = horizontal_coefficients;
21630-
21631- stbir__4_coeff_start();
21632- STBIR_SIMD_NO_UNROLL_LOOP_START
21633- do {
21634- hc += 4;
21635- decode += STBIR__horizontal_channels * 4;
21636- stbir__4_coeff_continue_from_4( 0 );
21637- --n;
21638- } while ( n > 0 );
21639- stbir__1_coeff_remnant( 4 );
21640- stbir__store_output();
21641- } while ( output < output_end );
21642-}
21643-
21644-static void STBIR_chans( stbir__horizontal_gather_,_channels_with_n_coeffs_mod2 )( float * output_buffer, unsigned int output_sub_size, float const * decode_buffer, stbir__contributors const * horizontal_contributors, float const * horizontal_coefficients, int coefficient_width )
21645-{
21646- float const * output_end = output_buffer + output_sub_size * STBIR__horizontal_channels;
21647- float STBIR_SIMD_STREAMOUT_PTR( * ) output = output_buffer;
21648- STBIR_SIMD_NO_UNROLL_LOOP_START
21649- do {
21650- float const * decode = decode_buffer + horizontal_contributors->n0 * STBIR__horizontal_channels;
21651- int n = ( ( horizontal_contributors->n1 - horizontal_contributors->n0 + 1 ) - 6 + 3 ) >> 2;
21652- float const * hc = horizontal_coefficients;
21653-
21654- stbir__4_coeff_start();
21655- STBIR_SIMD_NO_UNROLL_LOOP_START
21656- do {
21657- hc += 4;
21658- decode += STBIR__horizontal_channels * 4;
21659- stbir__4_coeff_continue_from_4( 0 );
21660- --n;
21661- } while ( n > 0 );
21662- stbir__2_coeff_remnant( 4 );
21663-
21664- stbir__store_output();
21665- } while ( output < output_end );
21666-}
21667-
21668-static void STBIR_chans( stbir__horizontal_gather_,_channels_with_n_coeffs_mod3 )( float * output_buffer, unsigned int output_sub_size, float const * decode_buffer, stbir__contributors const * horizontal_contributors, float const * horizontal_coefficients, int coefficient_width )
21669-{
21670- float const * output_end = output_buffer + output_sub_size * STBIR__horizontal_channels;
21671- float STBIR_SIMD_STREAMOUT_PTR( * ) output = output_buffer;
21672- stbir__3_coeff_setup();
21673- STBIR_SIMD_NO_UNROLL_LOOP_START
21674- do {
21675- float const * decode = decode_buffer + horizontal_contributors->n0 * STBIR__horizontal_channels;
21676- int n = ( ( horizontal_contributors->n1 - horizontal_contributors->n0 + 1 ) - 7 + 3 ) >> 2;
21677- float const * hc = horizontal_coefficients;
21678-
21679- stbir__4_coeff_start();
21680- STBIR_SIMD_NO_UNROLL_LOOP_START
21681- do {
21682- hc += 4;
21683- decode += STBIR__horizontal_channels * 4;
21684- stbir__4_coeff_continue_from_4( 0 );
21685- --n;
21686- } while ( n > 0 );
21687- stbir__3_coeff_remnant( 4 );
21688-
21689- stbir__store_output();
21690- } while ( output < output_end );
21691-}
21692-
21693-static stbir__horizontal_gather_channels_func * STBIR_chans(stbir__horizontal_gather_,_channels_with_n_coeffs_funcs)[4]=
21694-{
21695- STBIR_chans(stbir__horizontal_gather_,_channels_with_n_coeffs_mod0),
21696- STBIR_chans(stbir__horizontal_gather_,_channels_with_n_coeffs_mod1),
21697- STBIR_chans(stbir__horizontal_gather_,_channels_with_n_coeffs_mod2),
21698- STBIR_chans(stbir__horizontal_gather_,_channels_with_n_coeffs_mod3),
21699+static void
21700+STBIR_chans(stbir__horizontal_gather_, _channels_with_1_coeff)(
21701+ float *output_buffer, unsigned int output_sub_size,
21702+ float const *decode_buffer,
21703+ stbir__contributors const *horizontal_contributors,
21704+ float const *horizontal_coefficients, int coefficient_width)
21705+{
21706+ float const *output_end =
21707+ output_buffer + output_sub_size * STBIR__horizontal_channels;
21708+ float STBIR_SIMD_STREAMOUT_PTR(*) output = output_buffer;
21709+ STBIR_SIMD_NO_UNROLL_LOOP_START
21710+ do {
21711+ float const *decode = decode_buffer + horizontal_contributors->n0 *
21712+ STBIR__horizontal_channels;
21713+ float const *hc = horizontal_coefficients;
21714+ stbir__1_coeff_only();
21715+ stbir__store_output_tiny();
21716+ } while (output < output_end);
21717+}
21718+
21719+static void
21720+STBIR_chans(stbir__horizontal_gather_, _channels_with_2_coeffs)(
21721+ float *output_buffer, unsigned int output_sub_size,
21722+ float const *decode_buffer,
21723+ stbir__contributors const *horizontal_contributors,
21724+ float const *horizontal_coefficients, int coefficient_width)
21725+{
21726+ float const *output_end =
21727+ output_buffer + output_sub_size * STBIR__horizontal_channels;
21728+ float STBIR_SIMD_STREAMOUT_PTR(*) output = output_buffer;
21729+ STBIR_SIMD_NO_UNROLL_LOOP_START
21730+ do {
21731+ float const *decode = decode_buffer + horizontal_contributors->n0 *
21732+ STBIR__horizontal_channels;
21733+ float const *hc = horizontal_coefficients;
21734+ stbir__2_coeff_only();
21735+ stbir__store_output_tiny();
21736+ } while (output < output_end);
21737+}
21738+
21739+static void
21740+STBIR_chans(stbir__horizontal_gather_, _channels_with_3_coeffs)(
21741+ float *output_buffer, unsigned int output_sub_size,
21742+ float const *decode_buffer,
21743+ stbir__contributors const *horizontal_contributors,
21744+ float const *horizontal_coefficients, int coefficient_width)
21745+{
21746+ float const *output_end =
21747+ output_buffer + output_sub_size * STBIR__horizontal_channels;
21748+ float STBIR_SIMD_STREAMOUT_PTR(*) output = output_buffer;
21749+ STBIR_SIMD_NO_UNROLL_LOOP_START
21750+ do {
21751+ float const *decode = decode_buffer + horizontal_contributors->n0 *
21752+ STBIR__horizontal_channels;
21753+ float const *hc = horizontal_coefficients;
21754+ stbir__3_coeff_only();
21755+ stbir__store_output_tiny();
21756+ } while (output < output_end);
21757+}
21758+
21759+static void
21760+STBIR_chans(stbir__horizontal_gather_, _channels_with_4_coeffs)(
21761+ float *output_buffer, unsigned int output_sub_size,
21762+ float const *decode_buffer,
21763+ stbir__contributors const *horizontal_contributors,
21764+ float const *horizontal_coefficients, int coefficient_width)
21765+{
21766+ float const *output_end =
21767+ output_buffer + output_sub_size * STBIR__horizontal_channels;
21768+ float STBIR_SIMD_STREAMOUT_PTR(*) output = output_buffer;
21769+ STBIR_SIMD_NO_UNROLL_LOOP_START
21770+ do {
21771+ float const *decode = decode_buffer + horizontal_contributors->n0 *
21772+ STBIR__horizontal_channels;
21773+ float const *hc = horizontal_coefficients;
21774+ stbir__4_coeff_start();
21775+ stbir__store_output();
21776+ } while (output < output_end);
21777+}
21778+
21779+static void
21780+STBIR_chans(stbir__horizontal_gather_, _channels_with_5_coeffs)(
21781+ float *output_buffer, unsigned int output_sub_size,
21782+ float const *decode_buffer,
21783+ stbir__contributors const *horizontal_contributors,
21784+ float const *horizontal_coefficients, int coefficient_width)
21785+{
21786+ float const *output_end =
21787+ output_buffer + output_sub_size * STBIR__horizontal_channels;
21788+ float STBIR_SIMD_STREAMOUT_PTR(*) output = output_buffer;
21789+ STBIR_SIMD_NO_UNROLL_LOOP_START
21790+ do {
21791+ float const *decode = decode_buffer + horizontal_contributors->n0 *
21792+ STBIR__horizontal_channels;
21793+ float const *hc = horizontal_coefficients;
21794+ stbir__4_coeff_start();
21795+ stbir__1_coeff_remnant(4);
21796+ stbir__store_output();
21797+ } while (output < output_end);
21798+}
21799+
21800+static void
21801+STBIR_chans(stbir__horizontal_gather_, _channels_with_6_coeffs)(
21802+ float *output_buffer, unsigned int output_sub_size,
21803+ float const *decode_buffer,
21804+ stbir__contributors const *horizontal_contributors,
21805+ float const *horizontal_coefficients, int coefficient_width)
21806+{
21807+ float const *output_end =
21808+ output_buffer + output_sub_size * STBIR__horizontal_channels;
21809+ float STBIR_SIMD_STREAMOUT_PTR(*) output = output_buffer;
21810+ STBIR_SIMD_NO_UNROLL_LOOP_START
21811+ do {
21812+ float const *decode = decode_buffer + horizontal_contributors->n0 *
21813+ STBIR__horizontal_channels;
21814+ float const *hc = horizontal_coefficients;
21815+ stbir__4_coeff_start();
21816+ stbir__2_coeff_remnant(4);
21817+ stbir__store_output();
21818+ } while (output < output_end);
21819+}
21820+
21821+static void
21822+STBIR_chans(stbir__horizontal_gather_, _channels_with_7_coeffs)(
21823+ float *output_buffer, unsigned int output_sub_size,
21824+ float const *decode_buffer,
21825+ stbir__contributors const *horizontal_contributors,
21826+ float const *horizontal_coefficients, int coefficient_width)
21827+{
21828+ float const *output_end =
21829+ output_buffer + output_sub_size * STBIR__horizontal_channels;
21830+ float STBIR_SIMD_STREAMOUT_PTR(*) output = output_buffer;
21831+ stbir__3_coeff_setup();
21832+ STBIR_SIMD_NO_UNROLL_LOOP_START
21833+ do {
21834+ float const *decode = decode_buffer + horizontal_contributors->n0 *
21835+ STBIR__horizontal_channels;
21836+ float const *hc = horizontal_coefficients;
21837+
21838+ stbir__4_coeff_start();
21839+ stbir__3_coeff_remnant(4);
21840+ stbir__store_output();
21841+ } while (output < output_end);
21842+}
21843+
21844+static void
21845+STBIR_chans(stbir__horizontal_gather_, _channels_with_8_coeffs)(
21846+ float *output_buffer, unsigned int output_sub_size,
21847+ float const *decode_buffer,
21848+ stbir__contributors const *horizontal_contributors,
21849+ float const *horizontal_coefficients, int coefficient_width)
21850+{
21851+ float const *output_end =
21852+ output_buffer + output_sub_size * STBIR__horizontal_channels;
21853+ float STBIR_SIMD_STREAMOUT_PTR(*) output = output_buffer;
21854+ STBIR_SIMD_NO_UNROLL_LOOP_START
21855+ do {
21856+ float const *decode = decode_buffer + horizontal_contributors->n0 *
21857+ STBIR__horizontal_channels;
21858+ float const *hc = horizontal_coefficients;
21859+ stbir__4_coeff_start();
21860+ stbir__4_coeff_continue_from_4(4);
21861+ stbir__store_output();
21862+ } while (output < output_end);
21863+}
21864+
21865+static void
21866+STBIR_chans(stbir__horizontal_gather_, _channels_with_9_coeffs)(
21867+ float *output_buffer, unsigned int output_sub_size,
21868+ float const *decode_buffer,
21869+ stbir__contributors const *horizontal_contributors,
21870+ float const *horizontal_coefficients, int coefficient_width)
21871+{
21872+ float const *output_end =
21873+ output_buffer + output_sub_size * STBIR__horizontal_channels;
21874+ float STBIR_SIMD_STREAMOUT_PTR(*) output = output_buffer;
21875+ STBIR_SIMD_NO_UNROLL_LOOP_START
21876+ do {
21877+ float const *decode = decode_buffer + horizontal_contributors->n0 *
21878+ STBIR__horizontal_channels;
21879+ float const *hc = horizontal_coefficients;
21880+ stbir__4_coeff_start();
21881+ stbir__4_coeff_continue_from_4(4);
21882+ stbir__1_coeff_remnant(8);
21883+ stbir__store_output();
21884+ } while (output < output_end);
21885+}
21886+
21887+static void
21888+STBIR_chans(stbir__horizontal_gather_, _channels_with_10_coeffs)(
21889+ float *output_buffer, unsigned int output_sub_size,
21890+ float const *decode_buffer,
21891+ stbir__contributors const *horizontal_contributors,
21892+ float const *horizontal_coefficients, int coefficient_width)
21893+{
21894+ float const *output_end =
21895+ output_buffer + output_sub_size * STBIR__horizontal_channels;
21896+ float STBIR_SIMD_STREAMOUT_PTR(*) output = output_buffer;
21897+ STBIR_SIMD_NO_UNROLL_LOOP_START
21898+ do {
21899+ float const *decode = decode_buffer + horizontal_contributors->n0 *
21900+ STBIR__horizontal_channels;
21901+ float const *hc = horizontal_coefficients;
21902+ stbir__4_coeff_start();
21903+ stbir__4_coeff_continue_from_4(4);
21904+ stbir__2_coeff_remnant(8);
21905+ stbir__store_output();
21906+ } while (output < output_end);
21907+}
21908+
21909+static void
21910+STBIR_chans(stbir__horizontal_gather_, _channels_with_11_coeffs)(
21911+ float *output_buffer, unsigned int output_sub_size,
21912+ float const *decode_buffer,
21913+ stbir__contributors const *horizontal_contributors,
21914+ float const *horizontal_coefficients, int coefficient_width)
21915+{
21916+ float const *output_end =
21917+ output_buffer + output_sub_size * STBIR__horizontal_channels;
21918+ float STBIR_SIMD_STREAMOUT_PTR(*) output = output_buffer;
21919+ stbir__3_coeff_setup();
21920+ STBIR_SIMD_NO_UNROLL_LOOP_START
21921+ do {
21922+ float const *decode = decode_buffer + horizontal_contributors->n0 *
21923+ STBIR__horizontal_channels;
21924+ float const *hc = horizontal_coefficients;
21925+ stbir__4_coeff_start();
21926+ stbir__4_coeff_continue_from_4(4);
21927+ stbir__3_coeff_remnant(8);
21928+ stbir__store_output();
21929+ } while (output < output_end);
21930+}
21931+
21932+static void
21933+STBIR_chans(stbir__horizontal_gather_, _channels_with_12_coeffs)(
21934+ float *output_buffer, unsigned int output_sub_size,
21935+ float const *decode_buffer,
21936+ stbir__contributors const *horizontal_contributors,
21937+ float const *horizontal_coefficients, int coefficient_width)
21938+{
21939+ float const *output_end =
21940+ output_buffer + output_sub_size * STBIR__horizontal_channels;
21941+ float STBIR_SIMD_STREAMOUT_PTR(*) output = output_buffer;
21942+ STBIR_SIMD_NO_UNROLL_LOOP_START
21943+ do {
21944+ float const *decode = decode_buffer + horizontal_contributors->n0 *
21945+ STBIR__horizontal_channels;
21946+ float const *hc = horizontal_coefficients;
21947+ stbir__4_coeff_start();
21948+ stbir__4_coeff_continue_from_4(4);
21949+ stbir__4_coeff_continue_from_4(8);
21950+ stbir__store_output();
21951+ } while (output < output_end);
21952+}
21953+
21954+static void
21955+STBIR_chans(stbir__horizontal_gather_, _channels_with_n_coeffs_mod0)(
21956+ float *output_buffer, unsigned int output_sub_size,
21957+ float const *decode_buffer,
21958+ stbir__contributors const *horizontal_contributors,
21959+ float const *horizontal_coefficients, int coefficient_width)
21960+{
21961+ float const *output_end =
21962+ output_buffer + output_sub_size * STBIR__horizontal_channels;
21963+ float STBIR_SIMD_STREAMOUT_PTR(*) output = output_buffer;
21964+ STBIR_SIMD_NO_UNROLL_LOOP_START
21965+ do {
21966+ float const *decode = decode_buffer + horizontal_contributors->n0 *
21967+ STBIR__horizontal_channels;
21968+ int n =
21969+ ((horizontal_contributors->n1 - horizontal_contributors->n0 + 1) -
21970+ 4 + 3) >>
21971+ 2;
21972+ float const *hc = horizontal_coefficients;
21973+
21974+ stbir__4_coeff_start();
21975+ STBIR_SIMD_NO_UNROLL_LOOP_START
21976+ do {
21977+ hc += 4;
21978+ decode += STBIR__horizontal_channels * 4;
21979+ stbir__4_coeff_continue_from_4(0);
21980+ --n;
21981+ } while (n > 0);
21982+ stbir__store_output();
21983+ } while (output < output_end);
21984+}
21985+
21986+static void
21987+STBIR_chans(stbir__horizontal_gather_, _channels_with_n_coeffs_mod1)(
21988+ float *output_buffer, unsigned int output_sub_size,
21989+ float const *decode_buffer,
21990+ stbir__contributors const *horizontal_contributors,
21991+ float const *horizontal_coefficients, int coefficient_width)
21992+{
21993+ float const *output_end =
21994+ output_buffer + output_sub_size * STBIR__horizontal_channels;
21995+ float STBIR_SIMD_STREAMOUT_PTR(*) output = output_buffer;
21996+ STBIR_SIMD_NO_UNROLL_LOOP_START
21997+ do {
21998+ float const *decode = decode_buffer + horizontal_contributors->n0 *
21999+ STBIR__horizontal_channels;
22000+ int n =
22001+ ((horizontal_contributors->n1 - horizontal_contributors->n0 + 1) -
22002+ 5 + 3) >>
22003+ 2;
22004+ float const *hc = horizontal_coefficients;
22005+
22006+ stbir__4_coeff_start();
22007+ STBIR_SIMD_NO_UNROLL_LOOP_START
22008+ do {
22009+ hc += 4;
22010+ decode += STBIR__horizontal_channels * 4;
22011+ stbir__4_coeff_continue_from_4(0);
22012+ --n;
22013+ } while (n > 0);
22014+ stbir__1_coeff_remnant(4);
22015+ stbir__store_output();
22016+ } while (output < output_end);
22017+}
22018+
22019+static void
22020+STBIR_chans(stbir__horizontal_gather_, _channels_with_n_coeffs_mod2)(
22021+ float *output_buffer, unsigned int output_sub_size,
22022+ float const *decode_buffer,
22023+ stbir__contributors const *horizontal_contributors,
22024+ float const *horizontal_coefficients, int coefficient_width)
22025+{
22026+ float const *output_end =
22027+ output_buffer + output_sub_size * STBIR__horizontal_channels;
22028+ float STBIR_SIMD_STREAMOUT_PTR(*) output = output_buffer;
22029+ STBIR_SIMD_NO_UNROLL_LOOP_START
22030+ do {
22031+ float const *decode = decode_buffer + horizontal_contributors->n0 *
22032+ STBIR__horizontal_channels;
22033+ int n =
22034+ ((horizontal_contributors->n1 - horizontal_contributors->n0 + 1) -
22035+ 6 + 3) >>
22036+ 2;
22037+ float const *hc = horizontal_coefficients;
22038+
22039+ stbir__4_coeff_start();
22040+ STBIR_SIMD_NO_UNROLL_LOOP_START
22041+ do {
22042+ hc += 4;
22043+ decode += STBIR__horizontal_channels * 4;
22044+ stbir__4_coeff_continue_from_4(0);
22045+ --n;
22046+ } while (n > 0);
22047+ stbir__2_coeff_remnant(4);
22048+
22049+ stbir__store_output();
22050+ } while (output < output_end);
22051+}
22052+
22053+static void
22054+STBIR_chans(stbir__horizontal_gather_, _channels_with_n_coeffs_mod3)(
22055+ float *output_buffer, unsigned int output_sub_size,
22056+ float const *decode_buffer,
22057+ stbir__contributors const *horizontal_contributors,
22058+ float const *horizontal_coefficients, int coefficient_width)
22059+{
22060+ float const *output_end =
22061+ output_buffer + output_sub_size * STBIR__horizontal_channels;
22062+ float STBIR_SIMD_STREAMOUT_PTR(*) output = output_buffer;
22063+ stbir__3_coeff_setup();
22064+ STBIR_SIMD_NO_UNROLL_LOOP_START
22065+ do {
22066+ float const *decode = decode_buffer + horizontal_contributors->n0 *
22067+ STBIR__horizontal_channels;
22068+ int n =
22069+ ((horizontal_contributors->n1 - horizontal_contributors->n0 + 1) -
22070+ 7 + 3) >>
22071+ 2;
22072+ float const *hc = horizontal_coefficients;
22073+
22074+ stbir__4_coeff_start();
22075+ STBIR_SIMD_NO_UNROLL_LOOP_START
22076+ do {
22077+ hc += 4;
22078+ decode += STBIR__horizontal_channels * 4;
22079+ stbir__4_coeff_continue_from_4(0);
22080+ --n;
22081+ } while (n > 0);
22082+ stbir__3_coeff_remnant(4);
22083+
22084+ stbir__store_output();
22085+ } while (output < output_end);
22086+}
22087+
22088+static stbir__horizontal_gather_channels_func *
22089+ STBIR_chans(stbir__horizontal_gather_, _channels_with_n_coeffs_funcs)[4] = {
22090+ STBIR_chans(stbir__horizontal_gather_, _channels_with_n_coeffs_mod0),
22091+ STBIR_chans(stbir__horizontal_gather_, _channels_with_n_coeffs_mod1),
22092+ STBIR_chans(stbir__horizontal_gather_, _channels_with_n_coeffs_mod2),
22093+ STBIR_chans(stbir__horizontal_gather_, _channels_with_n_coeffs_mod3),
22094 };
22095
22096-static stbir__horizontal_gather_channels_func * STBIR_chans(stbir__horizontal_gather_,_channels_funcs)[12]=
22097-{
22098- STBIR_chans(stbir__horizontal_gather_,_channels_with_1_coeff),
22099- STBIR_chans(stbir__horizontal_gather_,_channels_with_2_coeffs),
22100- STBIR_chans(stbir__horizontal_gather_,_channels_with_3_coeffs),
22101- STBIR_chans(stbir__horizontal_gather_,_channels_with_4_coeffs),
22102- STBIR_chans(stbir__horizontal_gather_,_channels_with_5_coeffs),
22103- STBIR_chans(stbir__horizontal_gather_,_channels_with_6_coeffs),
22104- STBIR_chans(stbir__horizontal_gather_,_channels_with_7_coeffs),
22105- STBIR_chans(stbir__horizontal_gather_,_channels_with_8_coeffs),
22106- STBIR_chans(stbir__horizontal_gather_,_channels_with_9_coeffs),
22107- STBIR_chans(stbir__horizontal_gather_,_channels_with_10_coeffs),
22108- STBIR_chans(stbir__horizontal_gather_,_channels_with_11_coeffs),
22109- STBIR_chans(stbir__horizontal_gather_,_channels_with_12_coeffs),
22110+static stbir__horizontal_gather_channels_func *
22111+ STBIR_chans(stbir__horizontal_gather_, _channels_funcs)[12] = {
22112+ STBIR_chans(stbir__horizontal_gather_, _channels_with_1_coeff),
22113+ STBIR_chans(stbir__horizontal_gather_, _channels_with_2_coeffs),
22114+ STBIR_chans(stbir__horizontal_gather_, _channels_with_3_coeffs),
22115+ STBIR_chans(stbir__horizontal_gather_, _channels_with_4_coeffs),
22116+ STBIR_chans(stbir__horizontal_gather_, _channels_with_5_coeffs),
22117+ STBIR_chans(stbir__horizontal_gather_, _channels_with_6_coeffs),
22118+ STBIR_chans(stbir__horizontal_gather_, _channels_with_7_coeffs),
22119+ STBIR_chans(stbir__horizontal_gather_, _channels_with_8_coeffs),
22120+ STBIR_chans(stbir__horizontal_gather_, _channels_with_9_coeffs),
22121+ STBIR_chans(stbir__horizontal_gather_, _channels_with_10_coeffs),
22122+ STBIR_chans(stbir__horizontal_gather_, _channels_with_11_coeffs),
22123+ STBIR_chans(stbir__horizontal_gather_, _channels_with_12_coeffs),
22124 };
22125
22126 #undef STBIR__horizontal_channels
22127@@ -10601,7 +13209,7 @@ static stbir__horizontal_gather_channels_func * STBIR_chans(stbir__horizontal_ga
22128 #undef stbir__store_output_tiny
22129 #undef STBIR_chans
22130
22131-#endif // HORIZONALS
22132+#endif // HORIZONALS
22133
22134 #undef STBIR_strs_join2
22135 #undef STBIR_strs_join1