commit 7c35d5c

dalem  ·  2025-12-20 20:09:41 +0000 UTC
parent 929b5e5
add wallpaper supporttt :3
this was the initial wallpaper implementation using stb image header.
it has since been changed and mostly moved to the client.
6 files changed,  +8066, -3
D swc
+1, -0
1@@ -170,6 +170,7 @@ SWC_SOURCES= \
2 	libswc/swc.c \
3 	libswc/util.c \
4 	libswc/view.c \
5+	libswc/wallpaper.c \
6 	libswc/wayland_buffer.c \
7 	libswc/window.c \
8 	libswc/xdg_decoration.c \
+6, -2
 1@@ -283,10 +283,14 @@ renderer_repaint(struct target *target, pixman_region32_t *damage, pixman_region
 2 
 3 	wld_set_target_surface(swc.drm->renderer, target->surface);
 4 
 5-	/* Paint base damage black. */
 6 	if (pixman_region32_not_empty(base_damage)) {
 7 		pixman_region32_translate(base_damage, -target->view->geometry.x, -target->view->geometry.y);
 8-		wld_fill_region(swc.drm->renderer, 0xff000000, base_damage);
 9+		
10+		if(wallbuf)
11+			wld_copy_region(swc.drm->renderer, wallbuf, 0, 0, base_damage);
12+
13+		else
14+			wld_fill_region(swc.drm->renderer, bgcolor, base_damage);
15 	}
16 
17 	wl_list_for_each_reverse (view, views, link) {
+23, -0
 1@@ -410,6 +410,29 @@ int swc_add_axis_binding(uint32_t modifiers, uint32_t axis, swc_axis_binding_han
 2 
 3 /* }}} */
 4 
 5+/* Wallpaper {{{ */
 6+
 7+extern unsigned char *wallpaper;
 8+extern struct wld_buffer *wallbuf;
 9+
10+/**
11+ * Set wallpaper to image from fs path.
12+ * TODO: scaling, tiling, maybe diff image for each screen
13+ */
14+
15+void swc_wallpaper_init(char* path);
16+
17+/**
18+ * Set wallpaper to a single color
19+ * pretty much ignored if wallpaper is set to image
20+ * defaults to black
21+ */
22+
23+extern uint32_t bgcolor;
24+void swc_wallpaper_color_set(uint32_t color);
25+
26+/* }}} */
27+
28 /**
29  * This is a user-provided structure that swc will use to notify the display
30  * server of new windows, screens and input devices.
+48, -0
 1@@ -0,0 +1,48 @@
 2+#include <pixman.h>
 3+#include <wld/wld.h>
 4+
 5+#define STB_IMAGE_IMPLEMENTATION
 6+#define STBI_NO_HDR
 7+#include "../stb/stb_image.h"
 8+
 9+#include "swc.h"
10+#include "internal.h"
11+#include "drm.h"
12+#include "util.h"
13+#include "shm.h"
14+
15+unsigned char *wallpaper = NULL;
16+struct wld_buffer *wallbuf = NULL;
17+
18+uint32_t bgcolor = 0xff000000;
19+
20+EXPORT void
21+swc_wallpaper_init(char* path)
22+{
23+	int width, height, chan;
24+
25+	wallpaper = stbi_load(path, &width, &height, &chan, 4);
26+
27+	/* swap color channels to be compatible */
28+	for(int i = 0; i < width * height; i++) {
29+		unsigned char r = wallpaper[i*4];
30+		wallpaper[i*4] = wallpaper[(i*4)+2];
31+		wallpaper[(i*4)+2] = r;
32+	}
33+
34+	union wld_object obj;
35+	obj.ptr = (uint32_t*)wallpaper;
36+
37+	wallbuf = wld_import_buffer(swc.shm->context,
38+			WLD_OBJECT_DATA,
39+			obj,
40+			width, height,
41+			WLD_FORMAT_ARGB8888,
42+			width * 4);
43+}
44+
45+EXPORT void
46+swc_wallpaper_color_set(uint32_t color)
47+{
48+	bgcolor = color;
49+}
+7988, -0
   1@@ -0,0 +1,7988 @@
   2+/* stb_image - v2.30 - public domain image loader - http://nothings.org/stb
   3+                                  no warranty implied; use at your own risk
   4+
   5+   Do this:
   6+      #define STB_IMAGE_IMPLEMENTATION
   7+   before you include this file in *one* C or C++ file to create the implementation.
   8+
   9+   // i.e. it should look like this:
  10+   #include ...
  11+   #include ...
  12+   #include ...
  13+   #define STB_IMAGE_IMPLEMENTATION
  14+   #include "stb_image.h"
  15+
  16+   You can #define STBI_ASSERT(x) before the #include to avoid using assert.h.
  17+   And #define STBI_MALLOC, STBI_REALLOC, and STBI_FREE to avoid using malloc,realloc,free
  18+
  19+
  20+   QUICK NOTES:
  21+      Primarily of interest to game developers and other people who can
  22+          avoid problematic images and only need the trivial interface
  23+
  24+      JPEG baseline & progressive (12 bpc/arithmetic not supported, same as stock IJG lib)
  25+      PNG 1/2/4/8/16-bit-per-channel
  26+
  27+      TGA (not sure what subset, if a subset)
  28+      BMP non-1bpp, non-RLE
  29+      PSD (composited view only, no extra channels, 8/16 bit-per-channel)
  30+
  31+      GIF (*comp always reports as 4-channel)
  32+      HDR (radiance rgbE format)
  33+      PIC (Softimage PIC)
  34+      PNM (PPM and PGM binary only)
  35+
  36+      Animated GIF still needs a proper API, but here's one way to do it:
  37+          http://gist.github.com/urraka/685d9a6340b26b830d49
  38+
  39+      - decode from memory or through FILE (define STBI_NO_STDIO to remove code)
  40+      - decode from arbitrary I/O callbacks
  41+      - SIMD acceleration on x86/x64 (SSE2) and ARM (NEON)
  42+
  43+   Full documentation under "DOCUMENTATION" below.
  44+
  45+
  46+LICENSE
  47+
  48+  See end of file for license information.
  49+
  50+RECENT REVISION HISTORY:
  51+
  52+      2.30  (2024-05-31) avoid erroneous gcc warning
  53+      2.29  (2023-05-xx) optimizations
  54+      2.28  (2023-01-29) many error fixes, security errors, just tons of stuff
  55+      2.27  (2021-07-11) document stbi_info better, 16-bit PNM support, bug fixes
  56+      2.26  (2020-07-13) many minor fixes
  57+      2.25  (2020-02-02) fix warnings
  58+      2.24  (2020-02-02) fix warnings; thread-local failure_reason and flip_vertically
  59+      2.23  (2019-08-11) fix clang static analysis warning
  60+      2.22  (2019-03-04) gif fixes, fix warnings
  61+      2.21  (2019-02-25) fix typo in comment
  62+      2.20  (2019-02-07) support utf8 filenames in Windows; fix warnings and platform ifdefs
  63+      2.19  (2018-02-11) fix warning
  64+      2.18  (2018-01-30) fix warnings
  65+      2.17  (2018-01-29) bugfix, 1-bit BMP, 16-bitness query, fix warnings
  66+      2.16  (2017-07-23) all functions have 16-bit variants; optimizations; bugfixes
  67+      2.15  (2017-03-18) fix png-1,2,4; all Imagenet JPGs; no runtime SSE detection on GCC
  68+      2.14  (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs
  69+      2.13  (2016-12-04) experimental 16-bit API, only for PNG so far; fixes
  70+      2.12  (2016-04-02) fix typo in 2.11 PSD fix that caused crashes
  71+      2.11  (2016-04-02) 16-bit PNGS; enable SSE2 in non-gcc x64
  72+                         RGB-format JPEG; remove white matting in PSD;
  73+                         allocate large structures on the stack;
  74+                         correct channel count for PNG & BMP
  75+      2.10  (2016-01-22) avoid warning introduced in 2.09
  76+      2.09  (2016-01-16) 16-bit TGA; comments in PNM files; STBI_REALLOC_SIZED
  77+
  78+   See end of file for full revision history.
  79+
  80+
  81+ ============================    Contributors    =========================
  82+
  83+ Image formats                          Extensions, features
  84+    Sean Barrett (jpeg, png, bmp)          Jetro Lauha (stbi_info)
  85+    Nicolas Schulz (hdr, psd)              Martin "SpartanJ" Golini (stbi_info)
  86+    Jonathan Dummer (tga)                  James "moose2000" Brown (iPhone PNG)
  87+    Jean-Marc Lienher (gif)                Ben "Disch" Wenger (io callbacks)
  88+    Tom Seddon (pic)                       Omar Cornut (1/2/4-bit PNG)
  89+    Thatcher Ulrich (psd)                  Nicolas Guillemot (vertical flip)
  90+    Ken Miller (pgm, ppm)                  Richard Mitton (16-bit PSD)
  91+    github:urraka (animated gif)           Junggon Kim (PNM comments)
  92+    Christopher Forseth (animated gif)     Daniel Gibson (16-bit TGA)
  93+                                           socks-the-fox (16-bit PNG)
  94+                                           Jeremy Sawicki (handle all ImageNet JPGs)
  95+ Optimizations & bugfixes                  Mikhail Morozov (1-bit BMP)
  96+    Fabian "ryg" Giesen                    Anael Seghezzi (is-16-bit query)
  97+    Arseny Kapoulkine                      Simon Breuss (16-bit PNM)
  98+    John-Mark Allen
  99+    Carmelo J Fdez-Aguera
 100+
 101+ Bug & warning fixes
 102+    Marc LeBlanc            David Woo          Guillaume George     Martins Mozeiko
 103+    Christpher Lloyd        Jerry Jansson      Joseph Thomson       Blazej Dariusz Roszkowski
 104+    Phil Jordan                                Dave Moore           Roy Eltham
 105+    Hayaki Saito            Nathan Reed        Won Chun
 106+    Luke Graham             Johan Duparc       Nick Verigakis       the Horde3D community
 107+    Thomas Ruf              Ronny Chevalier                         github:rlyeh
 108+    Janez Zemva             John Bartholomew   Michal Cichon        github:romigrou
 109+    Jonathan Blow           Ken Hamada         Tero Hanninen        github:svdijk
 110+    Eugene Golushkov        Laurent Gomila     Cort Stratton        github:snagar
 111+    Aruelien Pocheville     Sergio Gonzalez    Thibault Reuille     github:Zelex
 112+    Cass Everitt            Ryamond Barbiero                        github:grim210
 113+    Paul Du Bois            Engin Manap        Aldo Culquicondor    github:sammyhw
 114+    Philipp Wiesemann       Dale Weiler        Oriol Ferrer Mesia   github:phprus
 115+    Josh Tobin              Neil Bickford      Matthew Gregan       github:poppolopoppo
 116+    Julian Raschke          Gregory Mullen     Christian Floisand   github:darealshinji
 117+    Baldur Karlsson         Kevin Schmidt      JR Smith             github:Michaelangel007
 118+                            Brad Weinberger    Matvey Cherevko      github:mosra
 119+    Luca Sas                Alexander Veselov  Zack Middleton       [reserved]
 120+    Ryan C. Gordon          [reserved]                              [reserved]
 121+                     DO NOT ADD YOUR NAME HERE
 122+
 123+                     Jacko Dirks
 124+
 125+  To add your name to the credits, pick a random blank space in the middle and fill it.
 126+  80% of merge conflicts on stb PRs are due to people adding their name at the end
 127+  of the credits.
 128+*/
 129+
 130+#ifndef STBI_INCLUDE_STB_IMAGE_H
 131+#define STBI_INCLUDE_STB_IMAGE_H
 132+
 133+// DOCUMENTATION
 134+//
 135+// Limitations:
 136+//    - no 12-bit-per-channel JPEG
 137+//    - no JPEGs with arithmetic coding
 138+//    - GIF always returns *comp=4
 139+//
 140+// Basic usage (see HDR discussion below for HDR usage):
 141+//    int x,y,n;
 142+//    unsigned char *data = stbi_load(filename, &x, &y, &n, 0);
 143+//    // ... process data if not NULL ...
 144+//    // ... x = width, y = height, n = # 8-bit components per pixel ...
 145+//    // ... replace '0' with '1'..'4' to force that many components per pixel
 146+//    // ... but 'n' will always be the number that it would have been if you said 0
 147+//    stbi_image_free(data);
 148+//
 149+// Standard parameters:
 150+//    int *x                 -- outputs image width in pixels
 151+//    int *y                 -- outputs image height in pixels
 152+//    int *channels_in_file  -- outputs # of image components in image file
 153+//    int desired_channels   -- if non-zero, # of image components requested in result
 154+//
 155+// The return value from an image loader is an 'unsigned char *' which points
 156+// to the pixel data, or NULL on an allocation failure or if the image is
 157+// corrupt or invalid. The pixel data consists of *y scanlines of *x pixels,
 158+// with each pixel consisting of N interleaved 8-bit components; the first
 159+// pixel pointed to is top-left-most in the image. There is no padding between
 160+// image scanlines or between pixels, regardless of format. The number of
 161+// components N is 'desired_channels' if desired_channels is non-zero, or
 162+// *channels_in_file otherwise. If desired_channels is non-zero,
 163+// *channels_in_file has the number of components that _would_ have been
 164+// output otherwise. E.g. if you set desired_channels to 4, you will always
 165+// get RGBA output, but you can check *channels_in_file to see if it's trivially
 166+// opaque because e.g. there were only 3 channels in the source image.
 167+//
 168+// An output image with N components has the following components interleaved
 169+// in this order in each pixel:
 170+//
 171+//     N=#comp     components
 172+//       1           grey
 173+//       2           grey, alpha
 174+//       3           red, green, blue
 175+//       4           red, green, blue, alpha
 176+//
 177+// If image loading fails for any reason, the return value will be NULL,
 178+// and *x, *y, *channels_in_file will be unchanged. The function
 179+// stbi_failure_reason() can be queried for an extremely brief, end-user
 180+// unfriendly explanation of why the load failed. Define STBI_NO_FAILURE_STRINGS
 181+// to avoid compiling these strings at all, and STBI_FAILURE_USERMSG to get slightly
 182+// more user-friendly ones.
 183+//
 184+// Paletted PNG, BMP, GIF, and PIC images are automatically depalettized.
 185+//
 186+// To query the width, height and component count of an image without having to
 187+// decode the full file, you can use the stbi_info family of functions:
 188+//
 189+//   int x,y,n,ok;
 190+//   ok = stbi_info(filename, &x, &y, &n);
 191+//   // returns ok=1 and sets x, y, n if image is a supported format,
 192+//   // 0 otherwise.
 193+//
 194+// Note that stb_image pervasively uses ints in its public API for sizes,
 195+// including sizes of memory buffers. This is now part of the API and thus
 196+// hard to change without causing breakage. As a result, the various image
 197+// loaders all have certain limits on image size; these differ somewhat
 198+// by format but generally boil down to either just under 2GB or just under
 199+// 1GB. When the decoded image would be larger than this, stb_image decoding
 200+// will fail.
 201+//
 202+// Additionally, stb_image will reject image files that have any of their
 203+// dimensions set to a larger value than the configurable STBI_MAX_DIMENSIONS,
 204+// which defaults to 2**24 = 16777216 pixels. Due to the above memory limit,
 205+// the only way to have an image with such dimensions load correctly
 206+// is for it to have a rather extreme aspect ratio. Either way, the
 207+// assumption here is that such larger images are likely to be malformed
 208+// or malicious. If you do need to load an image with individual dimensions
 209+// larger than that, and it still fits in the overall size limit, you can
 210+// #define STBI_MAX_DIMENSIONS on your own to be something larger.
 211+//
 212+// ===========================================================================
 213+//
 214+// UNICODE:
 215+//
 216+//   If compiling for Windows and you wish to use Unicode filenames, compile
 217+//   with
 218+//       #define STBI_WINDOWS_UTF8
 219+//   and pass utf8-encoded filenames. Call stbi_convert_wchar_to_utf8 to convert
 220+//   Windows wchar_t filenames to utf8.
 221+//
 222+// ===========================================================================
 223+//
 224+// Philosophy
 225+//
 226+// stb libraries are designed with the following priorities:
 227+//
 228+//    1. easy to use
 229+//    2. easy to maintain
 230+//    3. good performance
 231+//
 232+// Sometimes I let "good performance" creep up in priority over "easy to maintain",
 233+// and for best performance I may provide less-easy-to-use APIs that give higher
 234+// performance, in addition to the easy-to-use ones. Nevertheless, it's important
 235+// to keep in mind that from the standpoint of you, a client of this library,
 236+// all you care about is #1 and #3, and stb libraries DO NOT emphasize #3 above all.
 237+//
 238+// Some secondary priorities arise directly from the first two, some of which
 239+// provide more explicit reasons why performance can't be emphasized.
 240+//
 241+//    - Portable ("ease of use")
 242+//    - Small source code footprint ("easy to maintain")
 243+//    - No dependencies ("ease of use")
 244+//
 245+// ===========================================================================
 246+//
 247+// I/O callbacks
 248+//
 249+// I/O callbacks allow you to read from arbitrary sources, like packaged
 250+// files or some other source. Data read from callbacks are processed
 251+// through a small internal buffer (currently 128 bytes) to try to reduce
 252+// overhead.
 253+//
 254+// The three functions you must define are "read" (reads some bytes of data),
 255+// "skip" (skips some bytes of data), "eof" (reports if the stream is at the end).
 256+//
 257+// ===========================================================================
 258+//
 259+// SIMD support
 260+//
 261+// The JPEG decoder will try to automatically use SIMD kernels on x86 when
 262+// supported by the compiler. For ARM Neon support, you must explicitly
 263+// request it.
 264+//
 265+// (The old do-it-yourself SIMD API is no longer supported in the current
 266+// code.)
 267+//
 268+// On x86, SSE2 will automatically be used when available based on a run-time
 269+// test; if not, the generic C versions are used as a fall-back. On ARM targets,
 270+// the typical path is to have separate builds for NEON and non-NEON devices
 271+// (at least this is true for iOS and Android). Therefore, the NEON support is
 272+// toggled by a build flag: define STBI_NEON to get NEON loops.
 273+//
 274+// If for some reason you do not want to use any of SIMD code, or if
 275+// you have issues compiling it, you can disable it entirely by
 276+// defining STBI_NO_SIMD.
 277+//
 278+// ===========================================================================
 279+//
 280+// HDR image support   (disable by defining STBI_NO_HDR)
 281+//
 282+// stb_image supports loading HDR images in general, and currently the Radiance
 283+// .HDR file format specifically. You can still load any file through the existing
 284+// interface; if you attempt to load an HDR file, it will be automatically remapped
 285+// to LDR, assuming gamma 2.2 and an arbitrary scale factor defaulting to 1;
 286+// both of these constants can be reconfigured through this interface:
 287+//
 288+//     stbi_hdr_to_ldr_gamma(2.2f);
 289+//     stbi_hdr_to_ldr_scale(1.0f);
 290+//
 291+// (note, do not use _inverse_ constants; stbi_image will invert them
 292+// appropriately).
 293+//
 294+// Additionally, there is a new, parallel interface for loading files as
 295+// (linear) floats to preserve the full dynamic range:
 296+//
 297+//    float *data = stbi_loadf(filename, &x, &y, &n, 0);
 298+//
 299+// If you load LDR images through this interface, those images will
 300+// be promoted to floating point values, run through the inverse of
 301+// constants corresponding to the above:
 302+//
 303+//     stbi_ldr_to_hdr_scale(1.0f);
 304+//     stbi_ldr_to_hdr_gamma(2.2f);
 305+//
 306+// Finally, given a filename (or an open file or memory block--see header
 307+// file for details) containing image data, you can query for the "most
 308+// appropriate" interface to use (that is, whether the image is HDR or
 309+// not), using:
 310+//
 311+//     stbi_is_hdr(char *filename);
 312+//
 313+// ===========================================================================
 314+//
 315+// iPhone PNG support:
 316+//
 317+// We optionally support converting iPhone-formatted PNGs (which store
 318+// premultiplied BGRA) back to RGB, even though they're internally encoded
 319+// differently. To enable this conversion, call
 320+// stbi_convert_iphone_png_to_rgb(1).
 321+//
 322+// Call stbi_set_unpremultiply_on_load(1) as well to force a divide per
 323+// pixel to remove any premultiplied alpha *only* if the image file explicitly
 324+// says there's premultiplied data (currently only happens in iPhone images,
 325+// and only if iPhone convert-to-rgb processing is on).
 326+//
 327+// ===========================================================================
 328+//
 329+// ADDITIONAL CONFIGURATION
 330+//
 331+//  - You can suppress implementation of any of the decoders to reduce
 332+//    your code footprint by #defining one or more of the following
 333+//    symbols before creating the implementation.
 334+//
 335+//        STBI_NO_JPEG
 336+//        STBI_NO_PNG
 337+//        STBI_NO_BMP
 338+//        STBI_NO_PSD
 339+//        STBI_NO_TGA
 340+//        STBI_NO_GIF
 341+//        STBI_NO_HDR
 342+//        STBI_NO_PIC
 343+//        STBI_NO_PNM   (.ppm and .pgm)
 344+//
 345+//  - You can request *only* certain decoders and suppress all other ones
 346+//    (this will be more forward-compatible, as addition of new decoders
 347+//    doesn't require you to disable them explicitly):
 348+//
 349+//        STBI_ONLY_JPEG
 350+//        STBI_ONLY_PNG
 351+//        STBI_ONLY_BMP
 352+//        STBI_ONLY_PSD
 353+//        STBI_ONLY_TGA
 354+//        STBI_ONLY_GIF
 355+//        STBI_ONLY_HDR
 356+//        STBI_ONLY_PIC
 357+//        STBI_ONLY_PNM   (.ppm and .pgm)
 358+//
 359+//   - If you use STBI_NO_PNG (or _ONLY_ without PNG), and you still
 360+//     want the zlib decoder to be available, #define STBI_SUPPORT_ZLIB
 361+//
 362+//  - If you define STBI_MAX_DIMENSIONS, stb_image will reject images greater
 363+//    than that size (in either width or height) without further processing.
 364+//    This is to let programs in the wild set an upper bound to prevent
 365+//    denial-of-service attacks on untrusted data, as one could generate a
 366+//    valid image of gigantic dimensions and force stb_image to allocate a
 367+//    huge block of memory and spend disproportionate time decoding it. By
 368+//    default this is set to (1 << 24), which is 16777216, but that's still
 369+//    very big.
 370+
 371+#ifndef STBI_NO_STDIO
 372+#include <stdio.h>
 373+#endif // STBI_NO_STDIO
 374+
 375+#define STBI_VERSION 1
 376+
 377+enum
 378+{
 379+   STBI_default = 0, // only used for desired_channels
 380+
 381+   STBI_grey       = 1,
 382+   STBI_grey_alpha = 2,
 383+   STBI_rgb        = 3,
 384+   STBI_rgb_alpha  = 4
 385+};
 386+
 387+#include <stdlib.h>
 388+typedef unsigned char stbi_uc;
 389+typedef unsigned short stbi_us;
 390+
 391+#ifdef __cplusplus
 392+extern "C" {
 393+#endif
 394+
 395+#ifndef STBIDEF
 396+#ifdef STB_IMAGE_STATIC
 397+#define STBIDEF static
 398+#else
 399+#define STBIDEF extern
 400+#endif
 401+#endif
 402+
 403+//////////////////////////////////////////////////////////////////////////////
 404+//
 405+// PRIMARY API - works on images of any type
 406+//
 407+
 408+//
 409+// load image by filename, open file, or memory buffer
 410+//
 411+
 412+typedef struct
 413+{
 414+   int      (*read)  (void *user,char *data,int size);   // fill 'data' with 'size' bytes.  return number of bytes actually read
 415+   void     (*skip)  (void *user,int n);                 // skip the next 'n' bytes, or 'unget' the last -n bytes if negative
 416+   int      (*eof)   (void *user);                       // returns nonzero if we are at end of file/data
 417+} stbi_io_callbacks;
 418+
 419+////////////////////////////////////
 420+//
 421+// 8-bits-per-channel interface
 422+//
 423+
 424+STBIDEF stbi_uc *stbi_load_from_memory   (stbi_uc           const *buffer, int len   , int *x, int *y, int *channels_in_file, int desired_channels);
 425+STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk  , void *user, int *x, int *y, int *channels_in_file, int desired_channels);
 426+
 427+#ifndef STBI_NO_STDIO
 428+STBIDEF stbi_uc *stbi_load            (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
 429+STBIDEF stbi_uc *stbi_load_from_file  (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
 430+// for stbi_load_from_file, file pointer is left pointing immediately after image
 431+#endif
 432+
 433+#ifndef STBI_NO_GIF
 434+STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp);
 435+#endif
 436+
 437+#ifdef STBI_WINDOWS_UTF8
 438+STBIDEF int stbi_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input);
 439+#endif
 440+
 441+////////////////////////////////////
 442+//
 443+// 16-bits-per-channel interface
 444+//
 445+
 446+STBIDEF stbi_us *stbi_load_16_from_memory   (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels);
 447+STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels);
 448+
 449+#ifndef STBI_NO_STDIO
 450+STBIDEF stbi_us *stbi_load_16          (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
 451+STBIDEF stbi_us *stbi_load_from_file_16(FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
 452+#endif
 453+
 454+////////////////////////////////////
 455+//
 456+// float-per-channel interface
 457+//
 458+#ifndef STBI_NO_LINEAR
 459+   STBIDEF float *stbi_loadf_from_memory     (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels);
 460+   STBIDEF float *stbi_loadf_from_callbacks  (stbi_io_callbacks const *clbk, void *user, int *x, int *y,  int *channels_in_file, int desired_channels);
 461+
 462+   #ifndef STBI_NO_STDIO
 463+   STBIDEF float *stbi_loadf            (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
 464+   STBIDEF float *stbi_loadf_from_file  (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
 465+   #endif
 466+#endif
 467+
 468+#ifndef STBI_NO_HDR
 469+   STBIDEF void   stbi_hdr_to_ldr_gamma(float gamma);
 470+   STBIDEF void   stbi_hdr_to_ldr_scale(float scale);
 471+#endif // STBI_NO_HDR
 472+
 473+#ifndef STBI_NO_LINEAR
 474+   STBIDEF void   stbi_ldr_to_hdr_gamma(float gamma);
 475+   STBIDEF void   stbi_ldr_to_hdr_scale(float scale);
 476+#endif // STBI_NO_LINEAR
 477+
 478+// stbi_is_hdr is always defined, but always returns false if STBI_NO_HDR
 479+STBIDEF int    stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user);
 480+STBIDEF int    stbi_is_hdr_from_memory(stbi_uc const *buffer, int len);
 481+#ifndef STBI_NO_STDIO
 482+STBIDEF int      stbi_is_hdr          (char const *filename);
 483+STBIDEF int      stbi_is_hdr_from_file(FILE *f);
 484+#endif // STBI_NO_STDIO
 485+
 486+
 487+// get a VERY brief reason for failure
 488+// on most compilers (and ALL modern mainstream compilers) this is threadsafe
 489+STBIDEF const char *stbi_failure_reason  (void);
 490+
 491+// free the loaded image -- this is just free()
 492+STBIDEF void     stbi_image_free      (void *retval_from_stbi_load);
 493+
 494+// get image dimensions & components without fully decoding
 495+STBIDEF int      stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp);
 496+STBIDEF int      stbi_info_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp);
 497+STBIDEF int      stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len);
 498+STBIDEF int      stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *clbk, void *user);
 499+
 500+#ifndef STBI_NO_STDIO
 501+STBIDEF int      stbi_info               (char const *filename,     int *x, int *y, int *comp);
 502+STBIDEF int      stbi_info_from_file     (FILE *f,                  int *x, int *y, int *comp);
 503+STBIDEF int      stbi_is_16_bit          (char const *filename);
 504+STBIDEF int      stbi_is_16_bit_from_file(FILE *f);
 505+#endif
 506+
 507+
 508+
 509+// for image formats that explicitly notate that they have premultiplied alpha,
 510+// we just return the colors as stored in the file. set this flag to force
 511+// unpremultiplication. results are undefined if the unpremultiply overflow.
 512+STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply);
 513+
 514+// indicate whether we should process iphone images back to canonical format,
 515+// or just pass them through "as-is"
 516+STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert);
 517+
 518+// flip the image vertically, so the first pixel in the output array is the bottom left
 519+STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip);
 520+
 521+// as above, but only applies to images loaded on the thread that calls the function
 522+// this function is only available if your compiler supports thread-local variables;
 523+// calling it will fail to link if your compiler doesn't
 524+STBIDEF void stbi_set_unpremultiply_on_load_thread(int flag_true_if_should_unpremultiply);
 525+STBIDEF void stbi_convert_iphone_png_to_rgb_thread(int flag_true_if_should_convert);
 526+STBIDEF void stbi_set_flip_vertically_on_load_thread(int flag_true_if_should_flip);
 527+
 528+// ZLIB client - used by PNG, available for other purposes
 529+
 530+STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen);
 531+STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header);
 532+STBIDEF char *stbi_zlib_decode_malloc(const char *buffer, int len, int *outlen);
 533+STBIDEF int   stbi_zlib_decode_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);
 534+
 535+STBIDEF char *stbi_zlib_decode_noheader_malloc(const char *buffer, int len, int *outlen);
 536+STBIDEF int   stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);
 537+
 538+
 539+#ifdef __cplusplus
 540+}
 541+#endif
 542+
 543+//
 544+//
 545+////   end header file   /////////////////////////////////////////////////////
 546+#endif // STBI_INCLUDE_STB_IMAGE_H
 547+
 548+#ifdef STB_IMAGE_IMPLEMENTATION
 549+
 550+#if defined(STBI_ONLY_JPEG) || defined(STBI_ONLY_PNG) || defined(STBI_ONLY_BMP) \
 551+  || defined(STBI_ONLY_TGA) || defined(STBI_ONLY_GIF) || defined(STBI_ONLY_PSD) \
 552+  || defined(STBI_ONLY_HDR) || defined(STBI_ONLY_PIC) || defined(STBI_ONLY_PNM) \
 553+  || defined(STBI_ONLY_ZLIB)
 554+   #ifndef STBI_ONLY_JPEG
 555+   #define STBI_NO_JPEG
 556+   #endif
 557+   #ifndef STBI_ONLY_PNG
 558+   #define STBI_NO_PNG
 559+   #endif
 560+   #ifndef STBI_ONLY_BMP
 561+   #define STBI_NO_BMP
 562+   #endif
 563+   #ifndef STBI_ONLY_PSD
 564+   #define STBI_NO_PSD
 565+   #endif
 566+   #ifndef STBI_ONLY_TGA
 567+   #define STBI_NO_TGA
 568+   #endif
 569+   #ifndef STBI_ONLY_GIF
 570+   #define STBI_NO_GIF
 571+   #endif
 572+   #ifndef STBI_ONLY_HDR
 573+   #define STBI_NO_HDR
 574+   #endif
 575+   #ifndef STBI_ONLY_PIC
 576+   #define STBI_NO_PIC
 577+   #endif
 578+   #ifndef STBI_ONLY_PNM
 579+   #define STBI_NO_PNM
 580+   #endif
 581+#endif
 582+
 583+#if defined(STBI_NO_PNG) && !defined(STBI_SUPPORT_ZLIB) && !defined(STBI_NO_ZLIB)
 584+#define STBI_NO_ZLIB
 585+#endif
 586+
 587+
 588+#include <stdarg.h>
 589+#include <stddef.h> // ptrdiff_t on osx
 590+#include <stdlib.h>
 591+#include <string.h>
 592+#include <limits.h>
 593+
 594+#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
 595+#include <math.h>  // ldexp, pow
 596+#endif
 597+
 598+#ifndef STBI_NO_STDIO
 599+#include <stdio.h>
 600+#endif
 601+
 602+#ifndef STBI_ASSERT
 603+#include <assert.h>
 604+#define STBI_ASSERT(x) assert(x)
 605+#endif
 606+
 607+#ifdef __cplusplus
 608+#define STBI_EXTERN extern "C"
 609+#else
 610+#define STBI_EXTERN extern
 611+#endif
 612+
 613+
 614+#ifndef _MSC_VER
 615+   #ifdef __cplusplus
 616+   #define stbi_inline inline
 617+   #else
 618+   #define stbi_inline
 619+   #endif
 620+#else
 621+   #define stbi_inline __forceinline
 622+#endif
 623+
 624+#ifndef STBI_NO_THREAD_LOCALS
 625+   #if defined(__cplusplus) &&  __cplusplus >= 201103L
 626+      #define STBI_THREAD_LOCAL       thread_local
 627+   #elif defined(__GNUC__) && __GNUC__ < 5
 628+      #define STBI_THREAD_LOCAL       __thread
 629+   #elif defined(_MSC_VER)
 630+      #define STBI_THREAD_LOCAL       __declspec(thread)
 631+   #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L && !defined(__STDC_NO_THREADS__)
 632+      #define STBI_THREAD_LOCAL       _Thread_local
 633+   #endif
 634+
 635+   #ifndef STBI_THREAD_LOCAL
 636+      #if defined(__GNUC__)
 637+        #define STBI_THREAD_LOCAL       __thread
 638+      #endif
 639+   #endif
 640+#endif
 641+
 642+#if defined(_MSC_VER) || defined(__SYMBIAN32__)
 643+typedef unsigned short stbi__uint16;
 644+typedef   signed short stbi__int16;
 645+typedef unsigned int   stbi__uint32;
 646+typedef   signed int   stbi__int32;
 647+#else
 648+#include <stdint.h>
 649+typedef uint16_t stbi__uint16;
 650+typedef int16_t  stbi__int16;
 651+typedef uint32_t stbi__uint32;
 652+typedef int32_t  stbi__int32;
 653+#endif
 654+
 655+// should produce compiler error if size is wrong
 656+typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1];
 657+
 658+#ifdef _MSC_VER
 659+#define STBI_NOTUSED(v)  (void)(v)
 660+#else
 661+#define STBI_NOTUSED(v)  (void)sizeof(v)
 662+#endif
 663+
 664+#ifdef _MSC_VER
 665+#define STBI_HAS_LROTL
 666+#endif
 667+
 668+#ifdef STBI_HAS_LROTL
 669+   #define stbi_lrot(x,y)  _lrotl(x,y)
 670+#else
 671+   #define stbi_lrot(x,y)  (((x) << (y)) | ((x) >> (-(y) & 31)))
 672+#endif
 673+
 674+#if defined(STBI_MALLOC) && defined(STBI_FREE) && (defined(STBI_REALLOC) || defined(STBI_REALLOC_SIZED))
 675+// ok
 676+#elif !defined(STBI_MALLOC) && !defined(STBI_FREE) && !defined(STBI_REALLOC) && !defined(STBI_REALLOC_SIZED)
 677+// ok
 678+#else
 679+#error "Must define all or none of STBI_MALLOC, STBI_FREE, and STBI_REALLOC (or STBI_REALLOC_SIZED)."
 680+#endif
 681+
 682+#ifndef STBI_MALLOC
 683+#define STBI_MALLOC(sz)           malloc(sz)
 684+#define STBI_REALLOC(p,newsz)     realloc(p,newsz)
 685+#define STBI_FREE(p)              free(p)
 686+#endif
 687+
 688+#ifndef STBI_REALLOC_SIZED
 689+#define STBI_REALLOC_SIZED(p,oldsz,newsz) STBI_REALLOC(p,newsz)
 690+#endif
 691+
 692+// x86/x64 detection
 693+#if defined(__x86_64__) || defined(_M_X64)
 694+#define STBI__X64_TARGET
 695+#elif defined(__i386) || defined(_M_IX86)
 696+#define STBI__X86_TARGET
 697+#endif
 698+
 699+#if defined(__GNUC__) && defined(STBI__X86_TARGET) && !defined(__SSE2__) && !defined(STBI_NO_SIMD)
 700+// gcc doesn't support sse2 intrinsics unless you compile with -msse2,
 701+// which in turn means it gets to use SSE2 everywhere. This is unfortunate,
 702+// but previous attempts to provide the SSE2 functions with runtime
 703+// detection caused numerous issues. The way architecture extensions are
 704+// exposed in GCC/Clang is, sadly, not really suited for one-file libs.
 705+// New behavior: if compiled with -msse2, we use SSE2 without any
 706+// detection; if not, we don't use it at all.
 707+#define STBI_NO_SIMD
 708+#endif
 709+
 710+#if defined(__MINGW32__) && defined(STBI__X86_TARGET) && !defined(STBI_MINGW_ENABLE_SSE2) && !defined(STBI_NO_SIMD)
 711+// Note that __MINGW32__ doesn't actually mean 32-bit, so we have to avoid STBI__X64_TARGET
 712+//
 713+// 32-bit MinGW wants ESP to be 16-byte aligned, but this is not in the
 714+// Windows ABI and VC++ as well as Windows DLLs don't maintain that invariant.
 715+// As a result, enabling SSE2 on 32-bit MinGW is dangerous when not
 716+// simultaneously enabling "-mstackrealign".
 717+//
 718+// See https://github.com/nothings/stb/issues/81 for more information.
 719+//
 720+// So default to no SSE2 on 32-bit MinGW. If you've read this far and added
 721+// -mstackrealign to your build settings, feel free to #define STBI_MINGW_ENABLE_SSE2.
 722+#define STBI_NO_SIMD
 723+#endif
 724+
 725+#if !defined(STBI_NO_SIMD) && (defined(STBI__X86_TARGET) || defined(STBI__X64_TARGET))
 726+#define STBI_SSE2
 727+#include <emmintrin.h>
 728+
 729+#ifdef _MSC_VER
 730+
 731+#if _MSC_VER >= 1400  // not VC6
 732+#include <intrin.h> // __cpuid
 733+static int stbi__cpuid3(void)
 734+{
 735+   int info[4];
 736+   __cpuid(info,1);
 737+   return info[3];
 738+}
 739+#else
 740+static int stbi__cpuid3(void)
 741+{
 742+   int res;
 743+   __asm {
 744+      mov  eax,1
 745+      cpuid
 746+      mov  res,edx
 747+   }
 748+   return res;
 749+}
 750+#endif
 751+
 752+#define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name
 753+
 754+#if !defined(STBI_NO_JPEG) && defined(STBI_SSE2)
 755+static int stbi__sse2_available(void)
 756+{
 757+   int info3 = stbi__cpuid3();
 758+   return ((info3 >> 26) & 1) != 0;
 759+}
 760+#endif
 761+
 762+#else // assume GCC-style if not VC++
 763+#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
 764+
 765+#if !defined(STBI_NO_JPEG) && defined(STBI_SSE2)
 766+static int stbi__sse2_available(void)
 767+{
 768+   // If we're even attempting to compile this on GCC/Clang, that means
 769+   // -msse2 is on, which means the compiler is allowed to use SSE2
 770+   // instructions at will, and so are we.
 771+   return 1;
 772+}
 773+#endif
 774+
 775+#endif
 776+#endif
 777+
 778+// ARM NEON
 779+#if defined(STBI_NO_SIMD) && defined(STBI_NEON)
 780+#undef STBI_NEON
 781+#endif
 782+
 783+#ifdef STBI_NEON
 784+#include <arm_neon.h>
 785+#ifdef _MSC_VER
 786+#define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name
 787+#else
 788+#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
 789+#endif
 790+#endif
 791+
 792+#ifndef STBI_SIMD_ALIGN
 793+#define STBI_SIMD_ALIGN(type, name) type name
 794+#endif
 795+
 796+#ifndef STBI_MAX_DIMENSIONS
 797+#define STBI_MAX_DIMENSIONS (1 << 24)
 798+#endif
 799+
 800+///////////////////////////////////////////////
 801+//
 802+//  stbi__context struct and start_xxx functions
 803+
 804+// stbi__context structure is our basic context used by all images, so it
 805+// contains all the IO context, plus some basic image information
 806+typedef struct
 807+{
 808+   stbi__uint32 img_x, img_y;
 809+   int img_n, img_out_n;
 810+
 811+   stbi_io_callbacks io;
 812+   void *io_user_data;
 813+
 814+   int read_from_callbacks;
 815+   int buflen;
 816+   stbi_uc buffer_start[128];
 817+   int callback_already_read;
 818+
 819+   stbi_uc *img_buffer, *img_buffer_end;
 820+   stbi_uc *img_buffer_original, *img_buffer_original_end;
 821+} stbi__context;
 822+
 823+
 824+static void stbi__refill_buffer(stbi__context *s);
 825+
 826+// initialize a memory-decode context
 827+static void stbi__start_mem(stbi__context *s, stbi_uc const *buffer, int len)
 828+{
 829+   s->io.read = NULL;
 830+   s->read_from_callbacks = 0;
 831+   s->callback_already_read = 0;
 832+   s->img_buffer = s->img_buffer_original = (stbi_uc *) buffer;
 833+   s->img_buffer_end = s->img_buffer_original_end = (stbi_uc *) buffer+len;
 834+}
 835+
 836+// initialize a callback-based context
 837+static void stbi__start_callbacks(stbi__context *s, stbi_io_callbacks *c, void *user)
 838+{
 839+   s->io = *c;
 840+   s->io_user_data = user;
 841+   s->buflen = sizeof(s->buffer_start);
 842+   s->read_from_callbacks = 1;
 843+   s->callback_already_read = 0;
 844+   s->img_buffer = s->img_buffer_original = s->buffer_start;
 845+   stbi__refill_buffer(s);
 846+   s->img_buffer_original_end = s->img_buffer_end;
 847+}
 848+
 849+#ifndef STBI_NO_STDIO
 850+
 851+static int stbi__stdio_read(void *user, char *data, int size)
 852+{
 853+   return (int) fread(data,1,size,(FILE*) user);
 854+}
 855+
 856+static void stbi__stdio_skip(void *user, int n)
 857+{
 858+   int ch;
 859+   fseek((FILE*) user, n, SEEK_CUR);
 860+   ch = fgetc((FILE*) user);  /* have to read a byte to reset feof()'s flag */
 861+   if (ch != EOF) {
 862+      ungetc(ch, (FILE *) user);  /* push byte back onto stream if valid. */
 863+   }
 864+}
 865+
 866+static int stbi__stdio_eof(void *user)
 867+{
 868+   return feof((FILE*) user) || ferror((FILE *) user);
 869+}
 870+
 871+static stbi_io_callbacks stbi__stdio_callbacks =
 872+{
 873+   stbi__stdio_read,
 874+   stbi__stdio_skip,
 875+   stbi__stdio_eof,
 876+};
 877+
 878+static void stbi__start_file(stbi__context *s, FILE *f)
 879+{
 880+   stbi__start_callbacks(s, &stbi__stdio_callbacks, (void *) f);
 881+}
 882+
 883+//static void stop_file(stbi__context *s) { }
 884+
 885+#endif // !STBI_NO_STDIO
 886+
 887+static void stbi__rewind(stbi__context *s)
 888+{
 889+   // conceptually rewind SHOULD rewind to the beginning of the stream,
 890+   // but we just rewind to the beginning of the initial buffer, because
 891+   // we only use it after doing 'test', which only ever looks at at most 92 bytes
 892+   s->img_buffer = s->img_buffer_original;
 893+   s->img_buffer_end = s->img_buffer_original_end;
 894+}
 895+
 896+enum
 897+{
 898+   STBI_ORDER_RGB,
 899+   STBI_ORDER_BGR
 900+};
 901+
 902+typedef struct
 903+{
 904+   int bits_per_channel;
 905+   int num_channels;
 906+   int channel_order;
 907+} stbi__result_info;
 908+
 909+#ifndef STBI_NO_JPEG
 910+static int      stbi__jpeg_test(stbi__context *s);
 911+static void    *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
 912+static int      stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp);
 913+#endif
 914+
 915+#ifndef STBI_NO_PNG
 916+static int      stbi__png_test(stbi__context *s);
 917+static void    *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
 918+static int      stbi__png_info(stbi__context *s, int *x, int *y, int *comp);
 919+static int      stbi__png_is16(stbi__context *s);
 920+#endif
 921+
 922+#ifndef STBI_NO_BMP
 923+static int      stbi__bmp_test(stbi__context *s);
 924+static void    *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
 925+static int      stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp);
 926+#endif
 927+
 928+#ifndef STBI_NO_TGA
 929+static int      stbi__tga_test(stbi__context *s);
 930+static void    *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
 931+static int      stbi__tga_info(stbi__context *s, int *x, int *y, int *comp);
 932+#endif
 933+
 934+#ifndef STBI_NO_PSD
 935+static int      stbi__psd_test(stbi__context *s);
 936+static void    *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc);
 937+static int      stbi__psd_info(stbi__context *s, int *x, int *y, int *comp);
 938+static int      stbi__psd_is16(stbi__context *s);
 939+#endif
 940+
 941+#ifndef STBI_NO_HDR
 942+static int      stbi__hdr_test(stbi__context *s);
 943+static float   *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
 944+static int      stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp);
 945+#endif
 946+
 947+#ifndef STBI_NO_PIC
 948+static int      stbi__pic_test(stbi__context *s);
 949+static void    *stbi__pic_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
 950+static int      stbi__pic_info(stbi__context *s, int *x, int *y, int *comp);
 951+#endif
 952+
 953+#ifndef STBI_NO_GIF
 954+static int      stbi__gif_test(stbi__context *s);
 955+static void    *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
 956+static void    *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp);
 957+static int      stbi__gif_info(stbi__context *s, int *x, int *y, int *comp);
 958+#endif
 959+
 960+#ifndef STBI_NO_PNM
 961+static int      stbi__pnm_test(stbi__context *s);
 962+static void    *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
 963+static int      stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp);
 964+static int      stbi__pnm_is16(stbi__context *s);
 965+#endif
 966+
 967+static
 968+#ifdef STBI_THREAD_LOCAL
 969+STBI_THREAD_LOCAL
 970+#endif
 971+const char *stbi__g_failure_reason;
 972+
 973+STBIDEF const char *stbi_failure_reason(void)
 974+{
 975+   return stbi__g_failure_reason;
 976+}
 977+
 978+#ifndef STBI_NO_FAILURE_STRINGS
 979+static int stbi__err(const char *str)
 980+{
 981+   stbi__g_failure_reason = str;
 982+   return 0;
 983+}
 984+#endif
 985+
 986+static void *stbi__malloc(size_t size)
 987+{
 988+    return STBI_MALLOC(size);
 989+}
 990+
 991+// stb_image uses ints pervasively, including for offset calculations.
 992+// therefore the largest decoded image size we can support with the
 993+// current code, even on 64-bit targets, is INT_MAX. this is not a
 994+// significant limitation for the intended use case.
 995+//
 996+// we do, however, need to make sure our size calculations don't
 997+// overflow. hence a few helper functions for size calculations that
 998+// multiply integers together, making sure that they're non-negative
 999+// and no overflow occurs.
1000+
1001+// return 1 if the sum is valid, 0 on overflow.
1002+// negative terms are considered invalid.
1003+static int stbi__addsizes_valid(int a, int b)
1004+{
1005+   if (b < 0) return 0;
1006+   // now 0 <= b <= INT_MAX, hence also
1007+   // 0 <= INT_MAX - b <= INTMAX.
1008+   // And "a + b <= INT_MAX" (which might overflow) is the
1009+   // same as a <= INT_MAX - b (no overflow)
1010+   return a <= INT_MAX - b;
1011+}
1012+
1013+// returns 1 if the product is valid, 0 on overflow.
1014+// negative factors are considered invalid.
1015+static int stbi__mul2sizes_valid(int a, int b)
1016+{
1017+   if (a < 0 || b < 0) return 0;
1018+   if (b == 0) return 1; // mul-by-0 is always safe
1019+   // portable way to check for no overflows in a*b
1020+   return a <= INT_MAX/b;
1021+}
1022+
1023+#if !defined(STBI_NO_JPEG) || !defined(STBI_NO_PNG) || !defined(STBI_NO_TGA) || !defined(STBI_NO_HDR)
1024+// returns 1 if "a*b + add" has no negative terms/factors and doesn't overflow
1025+static int stbi__mad2sizes_valid(int a, int b, int add)
1026+{
1027+   return stbi__mul2sizes_valid(a, b) && stbi__addsizes_valid(a*b, add);
1028+}
1029+#endif
1030+
1031+// returns 1 if "a*b*c + add" has no negative terms/factors and doesn't overflow
1032+static int stbi__mad3sizes_valid(int a, int b, int c, int add)
1033+{
1034+   return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) &&
1035+      stbi__addsizes_valid(a*b*c, add);
1036+}
1037+
1038+// returns 1 if "a*b*c*d + add" has no negative terms/factors and doesn't overflow
1039+#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) || !defined(STBI_NO_PNM)
1040+static int stbi__mad4sizes_valid(int a, int b, int c, int d, int add)
1041+{
1042+   return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) &&
1043+      stbi__mul2sizes_valid(a*b*c, d) && stbi__addsizes_valid(a*b*c*d, add);
1044+}
1045+#endif
1046+
1047+#if !defined(STBI_NO_JPEG) || !defined(STBI_NO_PNG) || !defined(STBI_NO_TGA) || !defined(STBI_NO_HDR)
1048+// mallocs with size overflow checking
1049+static void *stbi__malloc_mad2(int a, int b, int add)
1050+{
1051+   if (!stbi__mad2sizes_valid(a, b, add)) return NULL;
1052+   return stbi__malloc(a*b + add);
1053+}
1054+#endif
1055+
1056+static void *stbi__malloc_mad3(int a, int b, int c, int add)
1057+{
1058+   if (!stbi__mad3sizes_valid(a, b, c, add)) return NULL;
1059+   return stbi__malloc(a*b*c + add);
1060+}
1061+
1062+#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) || !defined(STBI_NO_PNM)
1063+static void *stbi__malloc_mad4(int a, int b, int c, int d, int add)
1064+{
1065+   if (!stbi__mad4sizes_valid(a, b, c, d, add)) return NULL;
1066+   return stbi__malloc(a*b*c*d + add);
1067+}
1068+#endif
1069+
1070+// returns 1 if the sum of two signed ints is valid (between -2^31 and 2^31-1 inclusive), 0 on overflow.
1071+static int stbi__addints_valid(int a, int b)
1072+{
1073+   if ((a >= 0) != (b >= 0)) return 1; // a and b have different signs, so no overflow
1074+   if (a < 0 && b < 0) return a >= INT_MIN - b; // same as a + b >= INT_MIN; INT_MIN - b cannot overflow since b < 0.
1075+   return a <= INT_MAX - b;
1076+}
1077+
1078+// returns 1 if the product of two ints fits in a signed short, 0 on overflow.
1079+static int stbi__mul2shorts_valid(int a, int b)
1080+{
1081+   if (b == 0 || b == -1) return 1; // multiplication by 0 is always 0; check for -1 so SHRT_MIN/b doesn't overflow
1082+   if ((a >= 0) == (b >= 0)) return a <= SHRT_MAX/b; // product is positive, so similar to mul2sizes_valid
1083+   if (b < 0) return a <= SHRT_MIN / b; // same as a * b >= SHRT_MIN
1084+   return a >= SHRT_MIN / b;
1085+}
1086+
1087+// stbi__err - error
1088+// stbi__errpf - error returning pointer to float
1089+// stbi__errpuc - error returning pointer to unsigned char
1090+
1091+#ifdef STBI_NO_FAILURE_STRINGS
1092+   #define stbi__err(x,y)  0
1093+#elif defined(STBI_FAILURE_USERMSG)
1094+   #define stbi__err(x,y)  stbi__err(y)
1095+#else
1096+   #define stbi__err(x,y)  stbi__err(x)
1097+#endif
1098+
1099+#define stbi__errpf(x,y)   ((float *)(size_t) (stbi__err(x,y)?NULL:NULL))
1100+#define stbi__errpuc(x,y)  ((unsigned char *)(size_t) (stbi__err(x,y)?NULL:NULL))
1101+
1102+STBIDEF void stbi_image_free(void *retval_from_stbi_load)
1103+{
1104+   STBI_FREE(retval_from_stbi_load);
1105+}
1106+
1107+#ifndef STBI_NO_LINEAR
1108+static float   *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp);
1109+#endif
1110+
1111+#ifndef STBI_NO_HDR
1112+static stbi_uc *stbi__hdr_to_ldr(float   *data, int x, int y, int comp);
1113+#endif
1114+
1115+static int stbi__vertically_flip_on_load_global = 0;
1116+
1117+STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip)
1118+{
1119+   stbi__vertically_flip_on_load_global = flag_true_if_should_flip;
1120+}
1121+
1122+#ifndef STBI_THREAD_LOCAL
1123+#define stbi__vertically_flip_on_load  stbi__vertically_flip_on_load_global
1124+#else
1125+static STBI_THREAD_LOCAL int stbi__vertically_flip_on_load_local, stbi__vertically_flip_on_load_set;
1126+
1127+STBIDEF void stbi_set_flip_vertically_on_load_thread(int flag_true_if_should_flip)
1128+{
1129+   stbi__vertically_flip_on_load_local = flag_true_if_should_flip;
1130+   stbi__vertically_flip_on_load_set = 1;
1131+}
1132+
1133+#define stbi__vertically_flip_on_load  (stbi__vertically_flip_on_load_set       \
1134+                                         ? stbi__vertically_flip_on_load_local  \
1135+                                         : stbi__vertically_flip_on_load_global)
1136+#endif // STBI_THREAD_LOCAL
1137+
1138+static void *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc)
1139+{
1140+   memset(ri, 0, sizeof(*ri)); // make sure it's initialized if we add new fields
1141+   ri->bits_per_channel = 8; // default is 8 so most paths don't have to be changed
1142+   ri->channel_order = STBI_ORDER_RGB; // all current input & output are this, but this is here so we can add BGR order
1143+   ri->num_channels = 0;
1144+
1145+   // test the formats with a very explicit header first (at least a FOURCC
1146+   // or distinctive magic number first)
1147+   #ifndef STBI_NO_PNG
1148+   if (stbi__png_test(s))  return stbi__png_load(s,x,y,comp,req_comp, ri);
1149+   #endif
1150+   #ifndef STBI_NO_BMP
1151+   if (stbi__bmp_test(s))  return stbi__bmp_load(s,x,y,comp,req_comp, ri);
1152+   #endif
1153+   #ifndef STBI_NO_GIF
1154+   if (stbi__gif_test(s))  return stbi__gif_load(s,x,y,comp,req_comp, ri);
1155+   #endif
1156+   #ifndef STBI_NO_PSD
1157+   if (stbi__psd_test(s))  return stbi__psd_load(s,x,y,comp,req_comp, ri, bpc);
1158+   #else
1159+   STBI_NOTUSED(bpc);
1160+   #endif
1161+   #ifndef STBI_NO_PIC
1162+   if (stbi__pic_test(s))  return stbi__pic_load(s,x,y,comp,req_comp, ri);
1163+   #endif
1164+
1165+   // then the formats that can end up attempting to load with just 1 or 2
1166+   // bytes matching expectations; these are prone to false positives, so
1167+   // try them later
1168+   #ifndef STBI_NO_JPEG
1169+   if (stbi__jpeg_test(s)) return stbi__jpeg_load(s,x,y,comp,req_comp, ri);
1170+   #endif
1171+   #ifndef STBI_NO_PNM
1172+   if (stbi__pnm_test(s))  return stbi__pnm_load(s,x,y,comp,req_comp, ri);
1173+   #endif
1174+
1175+   #ifndef STBI_NO_HDR
1176+   if (stbi__hdr_test(s)) {
1177+      float *hdr = stbi__hdr_load(s, x,y,comp,req_comp, ri);
1178+      return stbi__hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp);
1179+   }
1180+   #endif
1181+
1182+   #ifndef STBI_NO_TGA
1183+   // test tga last because it's a crappy test!
1184+   if (stbi__tga_test(s))
1185+      return stbi__tga_load(s,x,y,comp,req_comp, ri);
1186+   #endif
1187+
1188+   return stbi__errpuc("unknown image type", "Image not of any known type, or corrupt");
1189+}
1190+
1191+static stbi_uc *stbi__convert_16_to_8(stbi__uint16 *orig, int w, int h, int channels)
1192+{
1193+   int i;
1194+   int img_len = w * h * channels;
1195+   stbi_uc *reduced;
1196+
1197+   reduced = (stbi_uc *) stbi__malloc(img_len);
1198+   if (reduced == NULL) return stbi__errpuc("outofmem", "Out of memory");
1199+
1200+   for (i = 0; i < img_len; ++i)
1201+      reduced[i] = (stbi_uc)((orig[i] >> 8) & 0xFF); // top half of each byte is sufficient approx of 16->8 bit scaling
1202+
1203+   STBI_FREE(orig);
1204+   return reduced;
1205+}
1206+
1207+static stbi__uint16 *stbi__convert_8_to_16(stbi_uc *orig, int w, int h, int channels)
1208+{
1209+   int i;
1210+   int img_len = w * h * channels;
1211+   stbi__uint16 *enlarged;
1212+
1213+   enlarged = (stbi__uint16 *) stbi__malloc(img_len*2);
1214+   if (enlarged == NULL) return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory");
1215+
1216+   for (i = 0; i < img_len; ++i)
1217+      enlarged[i] = (stbi__uint16)((orig[i] << 8) + orig[i]); // replicate to high and low byte, maps 0->0, 255->0xffff
1218+
1219+   STBI_FREE(orig);
1220+   return enlarged;
1221+}
1222+
1223+static void stbi__vertical_flip(void *image, int w, int h, int bytes_per_pixel)
1224+{
1225+   int row;
1226+   size_t bytes_per_row = (size_t)w * bytes_per_pixel;
1227+   stbi_uc temp[2048];
1228+   stbi_uc *bytes = (stbi_uc *)image;
1229+
1230+   for (row = 0; row < (h>>1); row++) {
1231+      stbi_uc *row0 = bytes + row*bytes_per_row;
1232+      stbi_uc *row1 = bytes + (h - row - 1)*bytes_per_row;
1233+      // swap row0 with row1
1234+      size_t bytes_left = bytes_per_row;
1235+      while (bytes_left) {
1236+         size_t bytes_copy = (bytes_left < sizeof(temp)) ? bytes_left : sizeof(temp);
1237+         memcpy(temp, row0, bytes_copy);
1238+         memcpy(row0, row1, bytes_copy);
1239+         memcpy(row1, temp, bytes_copy);
1240+         row0 += bytes_copy;
1241+         row1 += bytes_copy;
1242+         bytes_left -= bytes_copy;
1243+      }
1244+   }
1245+}
1246+
1247+#ifndef STBI_NO_GIF
1248+static void stbi__vertical_flip_slices(void *image, int w, int h, int z, int bytes_per_pixel)
1249+{
1250+   int slice;
1251+   int slice_size = w * h * bytes_per_pixel;
1252+
1253+   stbi_uc *bytes = (stbi_uc *)image;
1254+   for (slice = 0; slice < z; ++slice) {
1255+      stbi__vertical_flip(bytes, w, h, bytes_per_pixel);
1256+      bytes += slice_size;
1257+   }
1258+}
1259+#endif
1260+
1261+static unsigned char *stbi__load_and_postprocess_8bit(stbi__context *s, int *x, int *y, int *comp, int req_comp)
1262+{
1263+   stbi__result_info ri;
1264+   void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 8);
1265+
1266+   if (result == NULL)
1267+      return NULL;
1268+
1269+   // it is the responsibility of the loaders to make sure we get either 8 or 16 bit.
1270+   STBI_ASSERT(ri.bits_per_channel == 8 || ri.bits_per_channel == 16);
1271+
1272+   if (ri.bits_per_channel != 8) {
1273+      result = stbi__convert_16_to_8((stbi__uint16 *) result, *x, *y, req_comp == 0 ? *comp : req_comp);
1274+      ri.bits_per_channel = 8;
1275+   }
1276+
1277+   // @TODO: move stbi__convert_format to here
1278+
1279+   if (stbi__vertically_flip_on_load) {
1280+      int channels = req_comp ? req_comp : *comp;
1281+      stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi_uc));
1282+   }
1283+
1284+   return (unsigned char *) result;
1285+}
1286+
1287+static stbi__uint16 *stbi__load_and_postprocess_16bit(stbi__context *s, int *x, int *y, int *comp, int req_comp)
1288+{
1289+   stbi__result_info ri;
1290+   void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 16);
1291+
1292+   if (result == NULL)
1293+      return NULL;
1294+
1295+   // it is the responsibility of the loaders to make sure we get either 8 or 16 bit.
1296+   STBI_ASSERT(ri.bits_per_channel == 8 || ri.bits_per_channel == 16);
1297+
1298+   if (ri.bits_per_channel != 16) {
1299+      result = stbi__convert_8_to_16((stbi_uc *) result, *x, *y, req_comp == 0 ? *comp : req_comp);
1300+      ri.bits_per_channel = 16;
1301+   }
1302+
1303+   // @TODO: move stbi__convert_format16 to here
1304+   // @TODO: special case RGB-to-Y (and RGBA-to-YA) for 8-bit-to-16-bit case to keep more precision
1305+
1306+   if (stbi__vertically_flip_on_load) {
1307+      int channels = req_comp ? req_comp : *comp;
1308+      stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi__uint16));
1309+   }
1310+
1311+   return (stbi__uint16 *) result;
1312+}
1313+
1314+#if !defined(STBI_NO_HDR) && !defined(STBI_NO_LINEAR)
1315+static void stbi__float_postprocess(float *result, int *x, int *y, int *comp, int req_comp)
1316+{
1317+   if (stbi__vertically_flip_on_load && result != NULL) {
1318+      int channels = req_comp ? req_comp : *comp;
1319+      stbi__vertical_flip(result, *x, *y, channels * sizeof(float));
1320+   }
1321+}
1322+#endif
1323+
1324+#ifndef STBI_NO_STDIO
1325+
1326+#if defined(_WIN32) && defined(STBI_WINDOWS_UTF8)
1327+STBI_EXTERN __declspec(dllimport) int __stdcall MultiByteToWideChar(unsigned int cp, unsigned long flags, const char *str, int cbmb, wchar_t *widestr, int cchwide);
1328+STBI_EXTERN __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int cp, unsigned long flags, const wchar_t *widestr, int cchwide, char *str, int cbmb, const char *defchar, int *used_default);
1329+#endif
1330+
1331+#if defined(_WIN32) && defined(STBI_WINDOWS_UTF8)
1332+STBIDEF int stbi_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input)
1333+{
1334+	return WideCharToMultiByte(65001 /* UTF8 */, 0, input, -1, buffer, (int) bufferlen, NULL, NULL);
1335+}
1336+#endif
1337+
1338+static FILE *stbi__fopen(char const *filename, char const *mode)
1339+{
1340+   FILE *f;
1341+#if defined(_WIN32) && defined(STBI_WINDOWS_UTF8)
1342+   wchar_t wMode[64];
1343+   wchar_t wFilename[1024];
1344+	if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, filename, -1, wFilename, sizeof(wFilename)/sizeof(*wFilename)))
1345+      return 0;
1346+
1347+	if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, mode, -1, wMode, sizeof(wMode)/sizeof(*wMode)))
1348+      return 0;
1349+
1350+#if defined(_MSC_VER) && _MSC_VER >= 1400
1351+	if (0 != _wfopen_s(&f, wFilename, wMode))
1352+		f = 0;
1353+#else
1354+   f = _wfopen(wFilename, wMode);
1355+#endif
1356+
1357+#elif defined(_MSC_VER) && _MSC_VER >= 1400
1358+   if (0 != fopen_s(&f, filename, mode))
1359+      f=0;
1360+#else
1361+   f = fopen(filename, mode);
1362+#endif
1363+   return f;
1364+}
1365+
1366+
1367+STBIDEF stbi_uc *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp)
1368+{
1369+   FILE *f = stbi__fopen(filename, "rb");
1370+   unsigned char *result;
1371+   if (!f) return stbi__errpuc("can't fopen", "Unable to open file");
1372+   result = stbi_load_from_file(f,x,y,comp,req_comp);
1373+   fclose(f);
1374+   return result;
1375+}
1376+
1377+STBIDEF stbi_uc *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
1378+{
1379+   unsigned char *result;
1380+   stbi__context s;
1381+   stbi__start_file(&s,f);
1382+   result = stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
1383+   if (result) {
1384+      // need to 'unget' all the characters in the IO buffer
1385+      fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR);
1386+   }
1387+   return result;
1388+}
1389+
1390+STBIDEF stbi__uint16 *stbi_load_from_file_16(FILE *f, int *x, int *y, int *comp, int req_comp)
1391+{
1392+   stbi__uint16 *result;
1393+   stbi__context s;
1394+   stbi__start_file(&s,f);
1395+   result = stbi__load_and_postprocess_16bit(&s,x,y,comp,req_comp);
1396+   if (result) {
1397+      // need to 'unget' all the characters in the IO buffer
1398+      fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR);
1399+   }
1400+   return result;
1401+}
1402+
1403+STBIDEF stbi_us *stbi_load_16(char const *filename, int *x, int *y, int *comp, int req_comp)
1404+{
1405+   FILE *f = stbi__fopen(filename, "rb");
1406+   stbi__uint16 *result;
1407+   if (!f) return (stbi_us *) stbi__errpuc("can't fopen", "Unable to open file");
1408+   result = stbi_load_from_file_16(f,x,y,comp,req_comp);
1409+   fclose(f);
1410+   return result;
1411+}
1412+
1413+
1414+#endif //!STBI_NO_STDIO
1415+
1416+STBIDEF stbi_us *stbi_load_16_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels)
1417+{
1418+   stbi__context s;
1419+   stbi__start_mem(&s,buffer,len);
1420+   return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels);
1421+}
1422+
1423+STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels)
1424+{
1425+   stbi__context s;
1426+   stbi__start_callbacks(&s, (stbi_io_callbacks *)clbk, user);
1427+   return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels);
1428+}
1429+
1430+STBIDEF stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
1431+{
1432+   stbi__context s;
1433+   stbi__start_mem(&s,buffer,len);
1434+   return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
1435+}
1436+
1437+STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
1438+{
1439+   stbi__context s;
1440+   stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
1441+   return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
1442+}
1443+
1444+#ifndef STBI_NO_GIF
1445+STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp)
1446+{
1447+   unsigned char *result;
1448+   stbi__context s;
1449+   stbi__start_mem(&s,buffer,len);
1450+
1451+   result = (unsigned char*) stbi__load_gif_main(&s, delays, x, y, z, comp, req_comp);
1452+   if (stbi__vertically_flip_on_load) {
1453+      stbi__vertical_flip_slices( result, *x, *y, *z, *comp );
1454+   }
1455+
1456+   return result;
1457+}
1458+#endif
1459+
1460+#ifndef STBI_NO_LINEAR
1461+static float *stbi__loadf_main(stbi__context *s, int *x, int *y, int *comp, int req_comp)
1462+{
1463+   unsigned char *data;
1464+   #ifndef STBI_NO_HDR
1465+   if (stbi__hdr_test(s)) {
1466+      stbi__result_info ri;
1467+      float *hdr_data = stbi__hdr_load(s,x,y,comp,req_comp, &ri);
1468+      if (hdr_data)
1469+         stbi__float_postprocess(hdr_data,x,y,comp,req_comp);
1470+      return hdr_data;
1471+   }
1472+   #endif
1473+   data = stbi__load_and_postprocess_8bit(s, x, y, comp, req_comp);
1474+   if (data)
1475+      return stbi__ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp);
1476+   return stbi__errpf("unknown image type", "Image not of any known type, or corrupt");
1477+}
1478+
1479+STBIDEF float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
1480+{
1481+   stbi__context s;
1482+   stbi__start_mem(&s,buffer,len);
1483+   return stbi__loadf_main(&s,x,y,comp,req_comp);
1484+}
1485+
1486+STBIDEF float *stbi_loadf_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
1487+{
1488+   stbi__context s;
1489+   stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
1490+   return stbi__loadf_main(&s,x,y,comp,req_comp);
1491+}
1492+
1493+#ifndef STBI_NO_STDIO
1494+STBIDEF float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp)
1495+{
1496+   float *result;
1497+   FILE *f = stbi__fopen(filename, "rb");
1498+   if (!f) return stbi__errpf("can't fopen", "Unable to open file");
1499+   result = stbi_loadf_from_file(f,x,y,comp,req_comp);
1500+   fclose(f);
1501+   return result;
1502+}
1503+
1504+STBIDEF float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
1505+{
1506+   stbi__context s;
1507+   stbi__start_file(&s,f);
1508+   return stbi__loadf_main(&s,x,y,comp,req_comp);
1509+}
1510+#endif // !STBI_NO_STDIO
1511+
1512+#endif // !STBI_NO_LINEAR
1513+
1514+// these is-hdr-or-not is defined independent of whether STBI_NO_LINEAR is
1515+// defined, for API simplicity; if STBI_NO_LINEAR is defined, it always
1516+// reports false!
1517+
1518+STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len)
1519+{
1520+   #ifndef STBI_NO_HDR
1521+   stbi__context s;
1522+   stbi__start_mem(&s,buffer,len);
1523+   return stbi__hdr_test(&s);
1524+   #else
1525+   STBI_NOTUSED(buffer);
1526+   STBI_NOTUSED(len);
1527+   return 0;
1528+   #endif
1529+}
1530+
1531+#ifndef STBI_NO_STDIO
1532+STBIDEF int      stbi_is_hdr          (char const *filename)
1533+{
1534+   FILE *f = stbi__fopen(filename, "rb");
1535+   int result=0;
1536+   if (f) {
1537+      result = stbi_is_hdr_from_file(f);
1538+      fclose(f);
1539+   }
1540+   return result;
1541+}
1542+
1543+STBIDEF int stbi_is_hdr_from_file(FILE *f)
1544+{
1545+   #ifndef STBI_NO_HDR
1546+   long pos = ftell(f);
1547+   int res;
1548+   stbi__context s;
1549+   stbi__start_file(&s,f);
1550+   res = stbi__hdr_test(&s);
1551+   fseek(f, pos, SEEK_SET);
1552+   return res;
1553+   #else
1554+   STBI_NOTUSED(f);
1555+   return 0;
1556+   #endif
1557+}
1558+#endif // !STBI_NO_STDIO
1559+
1560+STBIDEF int      stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user)
1561+{
1562+   #ifndef STBI_NO_HDR
1563+   stbi__context s;
1564+   stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
1565+   return stbi__hdr_test(&s);
1566+   #else
1567+   STBI_NOTUSED(clbk);
1568+   STBI_NOTUSED(user);
1569+   return 0;
1570+   #endif
1571+}
1572+
1573+#ifndef STBI_NO_LINEAR
1574+static float stbi__l2h_gamma=2.2f, stbi__l2h_scale=1.0f;
1575+
1576+STBIDEF void   stbi_ldr_to_hdr_gamma(float gamma) { stbi__l2h_gamma = gamma; }
1577+STBIDEF void   stbi_ldr_to_hdr_scale(float scale) { stbi__l2h_scale = scale; }
1578+#endif
1579+
1580+static float stbi__h2l_gamma_i=1.0f/2.2f, stbi__h2l_scale_i=1.0f;
1581+
1582+STBIDEF void   stbi_hdr_to_ldr_gamma(float gamma) { stbi__h2l_gamma_i = 1/gamma; }
1583+STBIDEF void   stbi_hdr_to_ldr_scale(float scale) { stbi__h2l_scale_i = 1/scale; }
1584+
1585+
1586+//////////////////////////////////////////////////////////////////////////////
1587+//
1588+// Common code used by all image loaders
1589+//
1590+
1591+enum
1592+{
1593+   STBI__SCAN_load=0,
1594+   STBI__SCAN_type,
1595+   STBI__SCAN_header
1596+};
1597+
1598+static void stbi__refill_buffer(stbi__context *s)
1599+{
1600+   int n = (s->io.read)(s->io_user_data,(char*)s->buffer_start,s->buflen);
1601+   s->callback_already_read += (int) (s->img_buffer - s->img_buffer_original);
1602+   if (n == 0) {
1603+      // at end of file, treat same as if from memory, but need to handle case
1604+      // where s->img_buffer isn't pointing to safe memory, e.g. 0-byte file
1605+      s->read_from_callbacks = 0;
1606+      s->img_buffer = s->buffer_start;
1607+      s->img_buffer_end = s->buffer_start+1;
1608+      *s->img_buffer = 0;
1609+   } else {
1610+      s->img_buffer = s->buffer_start;
1611+      s->img_buffer_end = s->buffer_start + n;
1612+   }
1613+}
1614+
1615+stbi_inline static stbi_uc stbi__get8(stbi__context *s)
1616+{
1617+   if (s->img_buffer < s->img_buffer_end)
1618+      return *s->img_buffer++;
1619+   if (s->read_from_callbacks) {
1620+      stbi__refill_buffer(s);
1621+      return *s->img_buffer++;
1622+   }
1623+   return 0;
1624+}
1625+
1626+#if defined(STBI_NO_JPEG) && defined(STBI_NO_HDR) && defined(STBI_NO_PIC) && defined(STBI_NO_PNM)
1627+// nothing
1628+#else
1629+stbi_inline static int stbi__at_eof(stbi__context *s)
1630+{
1631+   if (s->io.read) {
1632+      if (!(s->io.eof)(s->io_user_data)) return 0;
1633+      // if feof() is true, check if buffer = end
1634+      // special case: we've only got the special 0 character at the end
1635+      if (s->read_from_callbacks == 0) return 1;
1636+   }
1637+
1638+   return s->img_buffer >= s->img_buffer_end;
1639+}
1640+#endif
1641+
1642+#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && defined(STBI_NO_PIC)
1643+// nothing
1644+#else
1645+static void stbi__skip(stbi__context *s, int n)
1646+{
1647+   if (n == 0) return;  // already there!
1648+   if (n < 0) {
1649+      s->img_buffer = s->img_buffer_end;
1650+      return;
1651+   }
1652+   if (s->io.read) {
1653+      int blen = (int) (s->img_buffer_end - s->img_buffer);
1654+      if (blen < n) {
1655+         s->img_buffer = s->img_buffer_end;
1656+         (s->io.skip)(s->io_user_data, n - blen);
1657+         return;
1658+      }
1659+   }
1660+   s->img_buffer += n;
1661+}
1662+#endif
1663+
1664+#if defined(STBI_NO_PNG) && defined(STBI_NO_TGA) && defined(STBI_NO_HDR) && defined(STBI_NO_PNM)
1665+// nothing
1666+#else
1667+static int stbi__getn(stbi__context *s, stbi_uc *buffer, int n)
1668+{
1669+   if (s->io.read) {
1670+      int blen = (int) (s->img_buffer_end - s->img_buffer);
1671+      if (blen < n) {
1672+         int res, count;
1673+
1674+         memcpy(buffer, s->img_buffer, blen);
1675+
1676+         count = (s->io.read)(s->io_user_data, (char*) buffer + blen, n - blen);
1677+         res = (count == (n-blen));
1678+         s->img_buffer = s->img_buffer_end;
1679+         return res;
1680+      }
1681+   }
1682+
1683+   if (s->img_buffer+n <= s->img_buffer_end) {
1684+      memcpy(buffer, s->img_buffer, n);
1685+      s->img_buffer += n;
1686+      return 1;
1687+   } else
1688+      return 0;
1689+}
1690+#endif
1691+
1692+#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_PSD) && defined(STBI_NO_PIC)
1693+// nothing
1694+#else
1695+static int stbi__get16be(stbi__context *s)
1696+{
1697+   int z = stbi__get8(s);
1698+   return (z << 8) + stbi__get8(s);
1699+}
1700+#endif
1701+
1702+#if defined(STBI_NO_PNG) && defined(STBI_NO_PSD) && defined(STBI_NO_PIC)
1703+// nothing
1704+#else
1705+static stbi__uint32 stbi__get32be(stbi__context *s)
1706+{
1707+   stbi__uint32 z = stbi__get16be(s);
1708+   return (z << 16) + stbi__get16be(s);
1709+}
1710+#endif
1711+
1712+#if defined(STBI_NO_BMP) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF)
1713+// nothing
1714+#else
1715+static int stbi__get16le(stbi__context *s)
1716+{
1717+   int z = stbi__get8(s);
1718+   return z + (stbi__get8(s) << 8);
1719+}
1720+#endif
1721+
1722+#ifndef STBI_NO_BMP
1723+static stbi__uint32 stbi__get32le(stbi__context *s)
1724+{
1725+   stbi__uint32 z = stbi__get16le(s);
1726+   z += (stbi__uint32)stbi__get16le(s) << 16;
1727+   return z;
1728+}
1729+#endif
1730+
1731+#define STBI__BYTECAST(x)  ((stbi_uc) ((x) & 255))  // truncate int to byte without warnings
1732+
1733+#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && defined(STBI_NO_PIC) && defined(STBI_NO_PNM)
1734+// nothing
1735+#else
1736+//////////////////////////////////////////////////////////////////////////////
1737+//
1738+//  generic converter from built-in img_n to req_comp
1739+//    individual types do this automatically as much as possible (e.g. jpeg
1740+//    does all cases internally since it needs to colorspace convert anyway,
1741+//    and it never has alpha, so very few cases ). png can automatically
1742+//    interleave an alpha=255 channel, but falls back to this for other cases
1743+//
1744+//  assume data buffer is malloced, so malloc a new one and free that one
1745+//  only failure mode is malloc failing
1746+
1747+static stbi_uc stbi__compute_y(int r, int g, int b)
1748+{
1749+   return (stbi_uc) (((r*77) + (g*150) +  (29*b)) >> 8);
1750+}
1751+#endif
1752+
1753+#if defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && defined(STBI_NO_PIC) && defined(STBI_NO_PNM)
1754+// nothing
1755+#else
1756+static unsigned char *stbi__convert_format(unsigned char *data, int img_n, int req_comp, unsigned int x, unsigned int y)
1757+{
1758+   int i,j;
1759+   unsigned char *good;
1760+
1761+   if (req_comp == img_n) return data;
1762+   STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
1763+
1764+   good = (unsigned char *) stbi__malloc_mad3(req_comp, x, y, 0);
1765+   if (good == NULL) {
1766+      STBI_FREE(data);
1767+      return stbi__errpuc("outofmem", "Out of memory");
1768+   }
1769+
1770+   for (j=0; j < (int) y; ++j) {
1771+      unsigned char *src  = data + j * x * img_n   ;
1772+      unsigned char *dest = good + j * x * req_comp;
1773+
1774+      #define STBI__COMBO(a,b)  ((a)*8+(b))
1775+      #define STBI__CASE(a,b)   case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
1776+      // convert source image with img_n components to one with req_comp components;
1777+      // avoid switch per pixel, so use switch per scanline and massive macros
1778+      switch (STBI__COMBO(img_n, req_comp)) {
1779+         STBI__CASE(1,2) { dest[0]=src[0]; dest[1]=255;                                     } break;
1780+         STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0];                                  } break;
1781+         STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=255;                     } break;
1782+         STBI__CASE(2,1) { dest[0]=src[0];                                                  } break;
1783+         STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0];                                  } break;
1784+         STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=src[1];                  } break;
1785+         STBI__CASE(3,4) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];dest[3]=255;        } break;
1786+         STBI__CASE(3,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]);                   } break;
1787+         STBI__CASE(3,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); dest[1] = 255;    } break;
1788+         STBI__CASE(4,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]);                   } break;
1789+         STBI__CASE(4,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); dest[1] = src[3]; } break;
1790+         STBI__CASE(4,3) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];                    } break;
1791+         default: STBI_ASSERT(0); STBI_FREE(data); STBI_FREE(good); return stbi__errpuc("unsupported", "Unsupported format conversion");
1792+      }
1793+      #undef STBI__CASE
1794+   }
1795+
1796+   STBI_FREE(data);
1797+   return good;
1798+}
1799+#endif
1800+
1801+#if defined(STBI_NO_PNG) && defined(STBI_NO_PSD)
1802+// nothing
1803+#else
1804+static stbi__uint16 stbi__compute_y_16(int r, int g, int b)
1805+{
1806+   return (stbi__uint16) (((r*77) + (g*150) +  (29*b)) >> 8);
1807+}
1808+#endif
1809+
1810+#if defined(STBI_NO_PNG) && defined(STBI_NO_PSD)
1811+// nothing
1812+#else
1813+static stbi__uint16 *stbi__convert_format16(stbi__uint16 *data, int img_n, int req_comp, unsigned int x, unsigned int y)
1814+{
1815+   int i,j;
1816+   stbi__uint16 *good;
1817+
1818+   if (req_comp == img_n) return data;
1819+   STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
1820+
1821+   good = (stbi__uint16 *) stbi__malloc(req_comp * x * y * 2);
1822+   if (good == NULL) {
1823+      STBI_FREE(data);
1824+      return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory");
1825+   }
1826+
1827+   for (j=0; j < (int) y; ++j) {
1828+      stbi__uint16 *src  = data + j * x * img_n   ;
1829+      stbi__uint16 *dest = good + j * x * req_comp;
1830+
1831+      #define STBI__COMBO(a,b)  ((a)*8+(b))
1832+      #define STBI__CASE(a,b)   case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
1833+      // convert source image with img_n components to one with req_comp components;
1834+      // avoid switch per pixel, so use switch per scanline and massive macros
1835+      switch (STBI__COMBO(img_n, req_comp)) {
1836+         STBI__CASE(1,2) { dest[0]=src[0]; dest[1]=0xffff;                                     } break;
1837+         STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0];                                     } break;
1838+         STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=0xffff;                     } break;
1839+         STBI__CASE(2,1) { dest[0]=src[0];                                                     } break;
1840+         STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0];                                     } break;
1841+         STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=src[1];                     } break;
1842+         STBI__CASE(3,4) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];dest[3]=0xffff;        } break;
1843+         STBI__CASE(3,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]);                   } break;
1844+         STBI__CASE(3,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); dest[1] = 0xffff; } break;
1845+         STBI__CASE(4,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]);                   } break;
1846+         STBI__CASE(4,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); dest[1] = src[3]; } break;
1847+         STBI__CASE(4,3) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];                       } break;
1848+         default: STBI_ASSERT(0); STBI_FREE(data); STBI_FREE(good); return (stbi__uint16*) stbi__errpuc("unsupported", "Unsupported format conversion");
1849+      }
1850+      #undef STBI__CASE
1851+   }
1852+
1853+   STBI_FREE(data);
1854+   return good;
1855+}
1856+#endif
1857+
1858+#ifndef STBI_NO_LINEAR
1859+static float   *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp)
1860+{
1861+   int i,k,n;
1862+   float *output;
1863+   if (!data) return NULL;
1864+   output = (float *) stbi__malloc_mad4(x, y, comp, sizeof(float), 0);
1865+   if (output == NULL) { STBI_FREE(data); return stbi__errpf("outofmem", "Out of memory"); }
1866+   // compute number of non-alpha components
1867+   if (comp & 1) n = comp; else n = comp-1;
1868+   for (i=0; i < x*y; ++i) {
1869+      for (k=0; k < n; ++k) {
1870+         output[i*comp + k] = (float) (pow(data[i*comp+k]/255.0f, stbi__l2h_gamma) * stbi__l2h_scale);
1871+      }
1872+   }
1873+   if (n < comp) {
1874+      for (i=0; i < x*y; ++i) {
1875+         output[i*comp + n] = data[i*comp + n]/255.0f;
1876+      }
1877+   }
1878+   STBI_FREE(data);
1879+   return output;
1880+}
1881+#endif
1882+
1883+#ifndef STBI_NO_HDR
1884+#define stbi__float2int(x)   ((int) (x))
1885+static stbi_uc *stbi__hdr_to_ldr(float   *data, int x, int y, int comp)
1886+{
1887+   int i,k,n;
1888+   stbi_uc *output;
1889+   if (!data) return NULL;
1890+   output = (stbi_uc *) stbi__malloc_mad3(x, y, comp, 0);
1891+   if (output == NULL) { STBI_FREE(data); return stbi__errpuc("outofmem", "Out of memory"); }
1892+   // compute number of non-alpha components
1893+   if (comp & 1) n = comp; else n = comp-1;
1894+   for (i=0; i < x*y; ++i) {
1895+      for (k=0; k < n; ++k) {
1896+         float z = (float) pow(data[i*comp+k]*stbi__h2l_scale_i, stbi__h2l_gamma_i) * 255 + 0.5f;
1897+         if (z < 0) z = 0;
1898+         if (z > 255) z = 255;
1899+         output[i*comp + k] = (stbi_uc) stbi__float2int(z);
1900+      }
1901+      if (k < comp) {
1902+         float z = data[i*comp+k] * 255 + 0.5f;
1903+         if (z < 0) z = 0;
1904+         if (z > 255) z = 255;
1905+         output[i*comp + k] = (stbi_uc) stbi__float2int(z);
1906+      }
1907+   }
1908+   STBI_FREE(data);
1909+   return output;
1910+}
1911+#endif
1912+
1913+//////////////////////////////////////////////////////////////////////////////
1914+//
1915+//  "baseline" JPEG/JFIF decoder
1916+//
1917+//    simple implementation
1918+//      - doesn't support delayed output of y-dimension
1919+//      - simple interface (only one output format: 8-bit interleaved RGB)
1920+//      - doesn't try to recover corrupt jpegs
1921+//      - doesn't allow partial loading, loading multiple at once
1922+//      - still fast on x86 (copying globals into locals doesn't help x86)
1923+//      - allocates lots of intermediate memory (full size of all components)
1924+//        - non-interleaved case requires this anyway
1925+//        - allows good upsampling (see next)
1926+//    high-quality
1927+//      - upsampled channels are bilinearly interpolated, even across blocks
1928+//      - quality integer IDCT derived from IJG's 'slow'
1929+//    performance
1930+//      - fast huffman; reasonable integer IDCT
1931+//      - some SIMD kernels for common paths on targets with SSE2/NEON
1932+//      - uses a lot of intermediate memory, could cache poorly
1933+
1934+#ifndef STBI_NO_JPEG
1935+
1936+// huffman decoding acceleration
1937+#define FAST_BITS   9  // larger handles more cases; smaller stomps less cache
1938+
1939+typedef struct
1940+{
1941+   stbi_uc  fast[1 << FAST_BITS];
1942+   // weirdly, repacking this into AoS is a 10% speed loss, instead of a win
1943+   stbi__uint16 code[256];
1944+   stbi_uc  values[256];
1945+   stbi_uc  size[257];
1946+   unsigned int maxcode[18];
1947+   int    delta[17];   // old 'firstsymbol' - old 'firstcode'
1948+} stbi__huffman;
1949+
1950+typedef struct
1951+{
1952+   stbi__context *s;
1953+   stbi__huffman huff_dc[4];
1954+   stbi__huffman huff_ac[4];
1955+   stbi__uint16 dequant[4][64];
1956+   stbi__int16 fast_ac[4][1 << FAST_BITS];
1957+
1958+// sizes for components, interleaved MCUs
1959+   int img_h_max, img_v_max;
1960+   int img_mcu_x, img_mcu_y;
1961+   int img_mcu_w, img_mcu_h;
1962+
1963+// definition of jpeg image component
1964+   struct
1965+   {
1966+      int id;
1967+      int h,v;
1968+      int tq;
1969+      int hd,ha;
1970+      int dc_pred;
1971+
1972+      int x,y,w2,h2;
1973+      stbi_uc *data;
1974+      void *raw_data, *raw_coeff;
1975+      stbi_uc *linebuf;
1976+      short   *coeff;   // progressive only
1977+      int      coeff_w, coeff_h; // number of 8x8 coefficient blocks
1978+   } img_comp[4];
1979+
1980+   stbi__uint32   code_buffer; // jpeg entropy-coded buffer
1981+   int            code_bits;   // number of valid bits
1982+   unsigned char  marker;      // marker seen while filling entropy buffer
1983+   int            nomore;      // flag if we saw a marker so must stop
1984+
1985+   int            progressive;
1986+   int            spec_start;
1987+   int            spec_end;
1988+   int            succ_high;
1989+   int            succ_low;
1990+   int            eob_run;
1991+   int            jfif;
1992+   int            app14_color_transform; // Adobe APP14 tag
1993+   int            rgb;
1994+
1995+   int scan_n, order[4];
1996+   int restart_interval, todo;
1997+
1998+// kernels
1999+   void (*idct_block_kernel)(stbi_uc *out, int out_stride, short data[64]);
2000+   void (*YCbCr_to_RGB_kernel)(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step);
2001+   stbi_uc *(*resample_row_hv_2_kernel)(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs);
2002+} stbi__jpeg;
2003+
2004+static int stbi__build_huffman(stbi__huffman *h, int *count)
2005+{
2006+   int i,j,k=0;
2007+   unsigned int code;
2008+   // build size list for each symbol (from JPEG spec)
2009+   for (i=0; i < 16; ++i) {
2010+      for (j=0; j < count[i]; ++j) {
2011+         h->size[k++] = (stbi_uc) (i+1);
2012+         if(k >= 257) return stbi__err("bad size list","Corrupt JPEG");
2013+      }
2014+   }
2015+   h->size[k] = 0;
2016+
2017+   // compute actual symbols (from jpeg spec)
2018+   code = 0;
2019+   k = 0;
2020+   for(j=1; j <= 16; ++j) {
2021+      // compute delta to add to code to compute symbol id
2022+      h->delta[j] = k - code;
2023+      if (h->size[k] == j) {
2024+         while (h->size[k] == j)
2025+            h->code[k++] = (stbi__uint16) (code++);
2026+         if (code-1 >= (1u << j)) return stbi__err("bad code lengths","Corrupt JPEG");
2027+      }
2028+      // compute largest code + 1 for this size, preshifted as needed later
2029+      h->maxcode[j] = code << (16-j);
2030+      code <<= 1;
2031+   }
2032+   h->maxcode[j] = 0xffffffff;
2033+
2034+   // build non-spec acceleration table; 255 is flag for not-accelerated
2035+   memset(h->fast, 255, 1 << FAST_BITS);
2036+   for (i=0; i < k; ++i) {
2037+      int s = h->size[i];
2038+      if (s <= FAST_BITS) {
2039+         int c = h->code[i] << (FAST_BITS-s);
2040+         int m = 1 << (FAST_BITS-s);
2041+         for (j=0; j < m; ++j) {
2042+            h->fast[c+j] = (stbi_uc) i;
2043+         }
2044+      }
2045+   }
2046+   return 1;
2047+}
2048+
2049+// build a table that decodes both magnitude and value of small ACs in
2050+// one go.
2051+static void stbi__build_fast_ac(stbi__int16 *fast_ac, stbi__huffman *h)
2052+{
2053+   int i;
2054+   for (i=0; i < (1 << FAST_BITS); ++i) {
2055+      stbi_uc fast = h->fast[i];
2056+      fast_ac[i] = 0;
2057+      if (fast < 255) {
2058+         int rs = h->values[fast];
2059+         int run = (rs >> 4) & 15;
2060+         int magbits = rs & 15;
2061+         int len = h->size[fast];
2062+
2063+         if (magbits && len + magbits <= FAST_BITS) {
2064+            // magnitude code followed by receive_extend code
2065+            int k = ((i << len) & ((1 << FAST_BITS) - 1)) >> (FAST_BITS - magbits);
2066+            int m = 1 << (magbits - 1);
2067+            if (k < m) k += (~0U << magbits) + 1;
2068+            // if the result is small enough, we can fit it in fast_ac table
2069+            if (k >= -128 && k <= 127)
2070+               fast_ac[i] = (stbi__int16) ((k * 256) + (run * 16) + (len + magbits));
2071+         }
2072+      }
2073+   }
2074+}
2075+
2076+static void stbi__grow_buffer_unsafe(stbi__jpeg *j)
2077+{
2078+   do {
2079+      unsigned int b = j->nomore ? 0 : stbi__get8(j->s);
2080+      if (b == 0xff) {
2081+         int c = stbi__get8(j->s);
2082+         while (c == 0xff) c = stbi__get8(j->s); // consume fill bytes
2083+         if (c != 0) {
2084+            j->marker = (unsigned char) c;
2085+            j->nomore = 1;
2086+            return;
2087+         }
2088+      }
2089+      j->code_buffer |= b << (24 - j->code_bits);
2090+      j->code_bits += 8;
2091+   } while (j->code_bits <= 24);
2092+}
2093+
2094+// (1 << n) - 1
2095+static const stbi__uint32 stbi__bmask[17]={0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535};
2096+
2097+// decode a jpeg huffman value from the bitstream
2098+stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffman *h)
2099+{
2100+   unsigned int temp;
2101+   int c,k;
2102+
2103+   if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
2104+
2105+   // look at the top FAST_BITS and determine what symbol ID it is,
2106+   // if the code is <= FAST_BITS
2107+   c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
2108+   k = h->fast[c];
2109+   if (k < 255) {
2110+      int s = h->size[k];
2111+      if (s > j->code_bits)
2112+         return -1;
2113+      j->code_buffer <<= s;
2114+      j->code_bits -= s;
2115+      return h->values[k];
2116+   }
2117+
2118+   // naive test is to shift the code_buffer down so k bits are
2119+   // valid, then test against maxcode. To speed this up, we've
2120+   // preshifted maxcode left so that it has (16-k) 0s at the
2121+   // end; in other words, regardless of the number of bits, it
2122+   // wants to be compared against something shifted to have 16;
2123+   // that way we don't need to shift inside the loop.
2124+   temp = j->code_buffer >> 16;
2125+   for (k=FAST_BITS+1 ; ; ++k)
2126+      if (temp < h->maxcode[k])
2127+         break;
2128+   if (k == 17) {
2129+      // error! code not found
2130+      j->code_bits -= 16;
2131+      return -1;
2132+   }
2133+
2134+   if (k > j->code_bits)
2135+      return -1;
2136+
2137+   // convert the huffman code to the symbol id
2138+   c = ((j->code_buffer >> (32 - k)) & stbi__bmask[k]) + h->delta[k];
2139+   if(c < 0 || c >= 256) // symbol id out of bounds!
2140+       return -1;
2141+   STBI_ASSERT((((j->code_buffer) >> (32 - h->size[c])) & stbi__bmask[h->size[c]]) == h->code[c]);
2142+
2143+   // convert the id to a symbol
2144+   j->code_bits -= k;
2145+   j->code_buffer <<= k;
2146+   return h->values[c];
2147+}
2148+
2149+// bias[n] = (-1<<n) + 1
2150+static const int stbi__jbias[16] = {0,-1,-3,-7,-15,-31,-63,-127,-255,-511,-1023,-2047,-4095,-8191,-16383,-32767};
2151+
2152+// combined JPEG 'receive' and JPEG 'extend', since baseline
2153+// always extends everything it receives.
2154+stbi_inline static int stbi__extend_receive(stbi__jpeg *j, int n)
2155+{
2156+   unsigned int k;
2157+   int sgn;
2158+   if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
2159+   if (j->code_bits < n) return 0; // ran out of bits from stream, return 0s intead of continuing
2160+
2161+   sgn = j->code_buffer >> 31; // sign bit always in MSB; 0 if MSB clear (positive), 1 if MSB set (negative)
2162+   k = stbi_lrot(j->code_buffer, n);
2163+   j->code_buffer = k & ~stbi__bmask[n];
2164+   k &= stbi__bmask[n];
2165+   j->code_bits -= n;
2166+   return k + (stbi__jbias[n] & (sgn - 1));
2167+}
2168+
2169+// get some unsigned bits
2170+stbi_inline static int stbi__jpeg_get_bits(stbi__jpeg *j, int n)
2171+{
2172+   unsigned int k;
2173+   if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
2174+   if (j->code_bits < n) return 0; // ran out of bits from stream, return 0s intead of continuing
2175+   k = stbi_lrot(j->code_buffer, n);
2176+   j->code_buffer = k & ~stbi__bmask[n];
2177+   k &= stbi__bmask[n];
2178+   j->code_bits -= n;
2179+   return k;
2180+}
2181+
2182+stbi_inline static int stbi__jpeg_get_bit(stbi__jpeg *j)
2183+{
2184+   unsigned int k;
2185+   if (j->code_bits < 1) stbi__grow_buffer_unsafe(j);
2186+   if (j->code_bits < 1) return 0; // ran out of bits from stream, return 0s intead of continuing
2187+   k = j->code_buffer;
2188+   j->code_buffer <<= 1;
2189+   --j->code_bits;
2190+   return k & 0x80000000;
2191+}
2192+
2193+// given a value that's at position X in the zigzag stream,
2194+// where does it appear in the 8x8 matrix coded as row-major?
2195+static const stbi_uc stbi__jpeg_dezigzag[64+15] =
2196+{
2197+    0,  1,  8, 16,  9,  2,  3, 10,
2198+   17, 24, 32, 25, 18, 11,  4,  5,
2199+   12, 19, 26, 33, 40, 48, 41, 34,
2200+   27, 20, 13,  6,  7, 14, 21, 28,
2201+   35, 42, 49, 56, 57, 50, 43, 36,
2202+   29, 22, 15, 23, 30, 37, 44, 51,
2203+   58, 59, 52, 45, 38, 31, 39, 46,
2204+   53, 60, 61, 54, 47, 55, 62, 63,
2205+   // let corrupt input sample past end
2206+   63, 63, 63, 63, 63, 63, 63, 63,
2207+   63, 63, 63, 63, 63, 63, 63
2208+};
2209+
2210+// decode one 64-entry block--
2211+static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman *hdc, stbi__huffman *hac, stbi__int16 *fac, int b, stbi__uint16 *dequant)
2212+{
2213+   int diff,dc,k;
2214+   int t;
2215+
2216+   if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
2217+   t = stbi__jpeg_huff_decode(j, hdc);
2218+   if (t < 0 || t > 15) return stbi__err("bad huffman code","Corrupt JPEG");
2219+
2220+   // 0 all the ac values now so we can do it 32-bits at a time
2221+   memset(data,0,64*sizeof(data[0]));
2222+
2223+   diff = t ? stbi__extend_receive(j, t) : 0;
2224+   if (!stbi__addints_valid(j->img_comp[b].dc_pred, diff)) return stbi__err("bad delta","Corrupt JPEG");
2225+   dc = j->img_comp[b].dc_pred + diff;
2226+   j->img_comp[b].dc_pred = dc;
2227+   if (!stbi__mul2shorts_valid(dc, dequant[0])) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
2228+   data[0] = (short) (dc * dequant[0]);
2229+
2230+   // decode AC components, see JPEG spec
2231+   k = 1;
2232+   do {
2233+      unsigned int zig;
2234+      int c,r,s;
2235+      if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
2236+      c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
2237+      r = fac[c];
2238+      if (r) { // fast-AC path
2239+         k += (r >> 4) & 15; // run
2240+         s = r & 15; // combined length
2241+         if (s > j->code_bits) return stbi__err("bad huffman code", "Combined length longer than code bits available");
2242+         j->code_buffer <<= s;
2243+         j->code_bits -= s;
2244+         // decode into unzigzag'd location
2245+         zig = stbi__jpeg_dezigzag[k++];
2246+         data[zig] = (short) ((r >> 8) * dequant[zig]);
2247+      } else {
2248+         int rs = stbi__jpeg_huff_decode(j, hac);
2249+         if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
2250+         s = rs & 15;
2251+         r = rs >> 4;
2252+         if (s == 0) {
2253+            if (rs != 0xf0) break; // end block
2254+            k += 16;
2255+         } else {
2256+            k += r;
2257+            // decode into unzigzag'd location
2258+            zig = stbi__jpeg_dezigzag[k++];
2259+            data[zig] = (short) (stbi__extend_receive(j,s) * dequant[zig]);
2260+         }
2261+      }
2262+   } while (k < 64);
2263+   return 1;
2264+}
2265+
2266+static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64], stbi__huffman *hdc, int b)
2267+{
2268+   int diff,dc;
2269+   int t;
2270+   if (j->spec_end != 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
2271+
2272+   if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
2273+
2274+   if (j->succ_high == 0) {
2275+      // first scan for DC coefficient, must be first
2276+      memset(data,0,64*sizeof(data[0])); // 0 all the ac values now
2277+      t = stbi__jpeg_huff_decode(j, hdc);
2278+      if (t < 0 || t > 15) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
2279+      diff = t ? stbi__extend_receive(j, t) : 0;
2280+
2281+      if (!stbi__addints_valid(j->img_comp[b].dc_pred, diff)) return stbi__err("bad delta", "Corrupt JPEG");
2282+      dc = j->img_comp[b].dc_pred + diff;
2283+      j->img_comp[b].dc_pred = dc;
2284+      if (!stbi__mul2shorts_valid(dc, 1 << j->succ_low)) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
2285+      data[0] = (short) (dc * (1 << j->succ_low));
2286+   } else {
2287+      // refinement scan for DC coefficient
2288+      if (stbi__jpeg_get_bit(j))
2289+         data[0] += (short) (1 << j->succ_low);
2290+   }
2291+   return 1;
2292+}
2293+
2294+// @OPTIMIZE: store non-zigzagged during the decode passes,
2295+// and only de-zigzag when dequantizing
2296+static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__huffman *hac, stbi__int16 *fac)
2297+{
2298+   int k;
2299+   if (j->spec_start == 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
2300+
2301+   if (j->succ_high == 0) {
2302+      int shift = j->succ_low;
2303+
2304+      if (j->eob_run) {
2305+         --j->eob_run;
2306+         return 1;
2307+      }
2308+
2309+      k = j->spec_start;
2310+      do {
2311+         unsigned int zig;
2312+         int c,r,s;
2313+         if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
2314+         c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
2315+         r = fac[c];
2316+         if (r) { // fast-AC path
2317+            k += (r >> 4) & 15; // run
2318+            s = r & 15; // combined length
2319+            if (s > j->code_bits) return stbi__err("bad huffman code", "Combined length longer than code bits available");
2320+            j->code_buffer <<= s;
2321+            j->code_bits -= s;
2322+            zig = stbi__jpeg_dezigzag[k++];
2323+            data[zig] = (short) ((r >> 8) * (1 << shift));
2324+         } else {
2325+            int rs = stbi__jpeg_huff_decode(j, hac);
2326+            if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
2327+            s = rs & 15;
2328+            r = rs >> 4;
2329+            if (s == 0) {
2330+               if (r < 15) {
2331+                  j->eob_run = (1 << r);
2332+                  if (r)
2333+                     j->eob_run += stbi__jpeg_get_bits(j, r);
2334+                  --j->eob_run;
2335+                  break;
2336+               }
2337+               k += 16;
2338+            } else {
2339+               k += r;
2340+               zig = stbi__jpeg_dezigzag[k++];
2341+               data[zig] = (short) (stbi__extend_receive(j,s) * (1 << shift));
2342+            }
2343+         }
2344+      } while (k <= j->spec_end);
2345+   } else {
2346+      // refinement scan for these AC coefficients
2347+
2348+      short bit = (short) (1 << j->succ_low);
2349+
2350+      if (j->eob_run) {
2351+         --j->eob_run;
2352+         for (k = j->spec_start; k <= j->spec_end; ++k) {
2353+            short *p = &data[stbi__jpeg_dezigzag[k]];
2354+            if (*p != 0)
2355+               if (stbi__jpeg_get_bit(j))
2356+                  if ((*p & bit)==0) {
2357+                     if (*p > 0)
2358+                        *p += bit;
2359+                     else
2360+                        *p -= bit;
2361+                  }
2362+         }
2363+      } else {
2364+         k = j->spec_start;
2365+         do {
2366+            int r,s;
2367+            int rs = stbi__jpeg_huff_decode(j, hac); // @OPTIMIZE see if we can use the fast path here, advance-by-r is so slow, eh
2368+            if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
2369+            s = rs & 15;
2370+            r = rs >> 4;
2371+            if (s == 0) {
2372+               if (r < 15) {
2373+                  j->eob_run = (1 << r) - 1;
2374+                  if (r)
2375+                     j->eob_run += stbi__jpeg_get_bits(j, r);
2376+                  r = 64; // force end of block
2377+               } else {
2378+                  // r=15 s=0 should write 16 0s, so we just do
2379+                  // a run of 15 0s and then write s (which is 0),
2380+                  // so we don't have to do anything special here
2381+               }
2382+            } else {
2383+               if (s != 1) return stbi__err("bad huffman code", "Corrupt JPEG");
2384+               // sign bit
2385+               if (stbi__jpeg_get_bit(j))
2386+                  s = bit;
2387+               else
2388+                  s = -bit;
2389+            }
2390+
2391+            // advance by r
2392+            while (k <= j->spec_end) {
2393+               short *p = &data[stbi__jpeg_dezigzag[k++]];
2394+               if (*p != 0) {
2395+                  if (stbi__jpeg_get_bit(j))
2396+                     if ((*p & bit)==0) {
2397+                        if (*p > 0)
2398+                           *p += bit;
2399+                        else
2400+                           *p -= bit;
2401+                     }
2402+               } else {
2403+                  if (r == 0) {
2404+                     *p = (short) s;
2405+                     break;
2406+                  }
2407+                  --r;
2408+               }
2409+            }
2410+         } while (k <= j->spec_end);
2411+      }
2412+   }
2413+   return 1;
2414+}
2415+
2416+// take a -128..127 value and stbi__clamp it and convert to 0..255
2417+stbi_inline static stbi_uc stbi__clamp(int x)
2418+{
2419+   // trick to use a single test to catch both cases
2420+   if ((unsigned int) x > 255) {
2421+      if (x < 0) return 0;
2422+      if (x > 255) return 255;
2423+   }
2424+   return (stbi_uc) x;
2425+}
2426+
2427+#define stbi__f2f(x)  ((int) (((x) * 4096 + 0.5)))
2428+#define stbi__fsh(x)  ((x) * 4096)
2429+
2430+// derived from jidctint -- DCT_ISLOW
2431+#define STBI__IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7) \
2432+   int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \
2433+   p2 = s2;                                    \
2434+   p3 = s6;                                    \
2435+   p1 = (p2+p3) * stbi__f2f(0.5411961f);       \
2436+   t2 = p1 + p3*stbi__f2f(-1.847759065f);      \
2437+   t3 = p1 + p2*stbi__f2f( 0.765366865f);      \
2438+   p2 = s0;                                    \
2439+   p3 = s4;                                    \
2440+   t0 = stbi__fsh(p2+p3);                      \
2441+   t1 = stbi__fsh(p2-p3);                      \
2442+   x0 = t0+t3;                                 \
2443+   x3 = t0-t3;                                 \
2444+   x1 = t1+t2;                                 \
2445+   x2 = t1-t2;                                 \
2446+   t0 = s7;                                    \
2447+   t1 = s5;                                    \
2448+   t2 = s3;                                    \
2449+   t3 = s1;                                    \
2450+   p3 = t0+t2;                                 \
2451+   p4 = t1+t3;                                 \
2452+   p1 = t0+t3;                                 \
2453+   p2 = t1+t2;                                 \
2454+   p5 = (p3+p4)*stbi__f2f( 1.175875602f);      \
2455+   t0 = t0*stbi__f2f( 0.298631336f);           \
2456+   t1 = t1*stbi__f2f( 2.053119869f);           \
2457+   t2 = t2*stbi__f2f( 3.072711026f);           \
2458+   t3 = t3*stbi__f2f( 1.501321110f);           \
2459+   p1 = p5 + p1*stbi__f2f(-0.899976223f);      \
2460+   p2 = p5 + p2*stbi__f2f(-2.562915447f);      \
2461+   p3 = p3*stbi__f2f(-1.961570560f);           \
2462+   p4 = p4*stbi__f2f(-0.390180644f);           \
2463+   t3 += p1+p4;                                \
2464+   t2 += p2+p3;                                \
2465+   t1 += p2+p4;                                \
2466+   t0 += p1+p3;
2467+
2468+static void stbi__idct_block(stbi_uc *out, int out_stride, short data[64])
2469+{
2470+   int i,val[64],*v=val;
2471+   stbi_uc *o;
2472+   short *d = data;
2473+
2474+   // columns
2475+   for (i=0; i < 8; ++i,++d, ++v) {
2476+      // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
2477+      if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0
2478+           && d[40]==0 && d[48]==0 && d[56]==0) {
2479+         //    no shortcut                 0     seconds
2480+         //    (1|2|3|4|5|6|7)==0          0     seconds
2481+         //    all separate               -0.047 seconds
2482+         //    1 && 2|3 && 4|5 && 6|7:    -0.047 seconds
2483+         int dcterm = d[0]*4;
2484+         v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;
2485+      } else {
2486+         STBI__IDCT_1D(d[ 0],d[ 8],d[16],d[24],d[32],d[40],d[48],d[56])
2487+         // constants scaled things up by 1<<12; let's bring them back
2488+         // down, but keep 2 extra bits of precision
2489+         x0 += 512; x1 += 512; x2 += 512; x3 += 512;
2490+         v[ 0] = (x0+t3) >> 10;
2491+         v[56] = (x0-t3) >> 10;
2492+         v[ 8] = (x1+t2) >> 10;
2493+         v[48] = (x1-t2) >> 10;
2494+         v[16] = (x2+t1) >> 10;
2495+         v[40] = (x2-t1) >> 10;
2496+         v[24] = (x3+t0) >> 10;
2497+         v[32] = (x3-t0) >> 10;
2498+      }
2499+   }
2500+
2501+   for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) {
2502+      // no fast case since the first 1D IDCT spread components out
2503+      STBI__IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7])
2504+      // constants scaled things up by 1<<12, plus we had 1<<2 from first
2505+      // loop, plus horizontal and vertical each scale by sqrt(8) so together
2506+      // we've got an extra 1<<3, so 1<<17 total we need to remove.
2507+      // so we want to round that, which means adding 0.5 * 1<<17,
2508+      // aka 65536. Also, we'll end up with -128 to 127 that we want
2509+      // to encode as 0..255 by adding 128, so we'll add that before the shift
2510+      x0 += 65536 + (128<<17);
2511+      x1 += 65536 + (128<<17);
2512+      x2 += 65536 + (128<<17);
2513+      x3 += 65536 + (128<<17);
2514+      // tried computing the shifts into temps, or'ing the temps to see
2515+      // if any were out of range, but that was slower
2516+      o[0] = stbi__clamp((x0+t3) >> 17);
2517+      o[7] = stbi__clamp((x0-t3) >> 17);
2518+      o[1] = stbi__clamp((x1+t2) >> 17);
2519+      o[6] = stbi__clamp((x1-t2) >> 17);
2520+      o[2] = stbi__clamp((x2+t1) >> 17);
2521+      o[5] = stbi__clamp((x2-t1) >> 17);
2522+      o[3] = stbi__clamp((x3+t0) >> 17);
2523+      o[4] = stbi__clamp((x3-t0) >> 17);
2524+   }
2525+}
2526+
2527+#ifdef STBI_SSE2
2528+// sse2 integer IDCT. not the fastest possible implementation but it
2529+// produces bit-identical results to the generic C version so it's
2530+// fully "transparent".
2531+static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
2532+{
2533+   // This is constructed to match our regular (generic) integer IDCT exactly.
2534+   __m128i row0, row1, row2, row3, row4, row5, row6, row7;
2535+   __m128i tmp;
2536+
2537+   // dot product constant: even elems=x, odd elems=y
2538+   #define dct_const(x,y)  _mm_setr_epi16((x),(y),(x),(y),(x),(y),(x),(y))
2539+
2540+   // out(0) = c0[even]*x + c0[odd]*y   (c0, x, y 16-bit, out 32-bit)
2541+   // out(1) = c1[even]*x + c1[odd]*y
2542+   #define dct_rot(out0,out1, x,y,c0,c1) \
2543+      __m128i c0##lo = _mm_unpacklo_epi16((x),(y)); \
2544+      __m128i c0##hi = _mm_unpackhi_epi16((x),(y)); \
2545+      __m128i out0##_l = _mm_madd_epi16(c0##lo, c0); \
2546+      __m128i out0##_h = _mm_madd_epi16(c0##hi, c0); \
2547+      __m128i out1##_l = _mm_madd_epi16(c0##lo, c1); \
2548+      __m128i out1##_h = _mm_madd_epi16(c0##hi, c1)
2549+
2550+   // out = in << 12  (in 16-bit, out 32-bit)
2551+   #define dct_widen(out, in) \
2552+      __m128i out##_l = _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), (in)), 4); \
2553+      __m128i out##_h = _mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), (in)), 4)
2554+
2555+   // wide add
2556+   #define dct_wadd(out, a, b) \
2557+      __m128i out##_l = _mm_add_epi32(a##_l, b##_l); \
2558+      __m128i out##_h = _mm_add_epi32(a##_h, b##_h)
2559+
2560+   // wide sub
2561+   #define dct_wsub(out, a, b) \
2562+      __m128i out##_l = _mm_sub_epi32(a##_l, b##_l); \
2563+      __m128i out##_h = _mm_sub_epi32(a##_h, b##_h)
2564+
2565+   // butterfly a/b, add bias, then shift by "s" and pack
2566+   #define dct_bfly32o(out0, out1, a,b,bias,s) \
2567+      { \
2568+         __m128i abiased_l = _mm_add_epi32(a##_l, bias); \
2569+         __m128i abiased_h = _mm_add_epi32(a##_h, bias); \
2570+         dct_wadd(sum, abiased, b); \
2571+         dct_wsub(dif, abiased, b); \
2572+         out0 = _mm_packs_epi32(_mm_srai_epi32(sum_l, s), _mm_srai_epi32(sum_h, s)); \
2573+         out1 = _mm_packs_epi32(_mm_srai_epi32(dif_l, s), _mm_srai_epi32(dif_h, s)); \
2574+      }
2575+
2576+   // 8-bit interleave step (for transposes)
2577+   #define dct_interleave8(a, b) \
2578+      tmp = a; \
2579+      a = _mm_unpacklo_epi8(a, b); \
2580+      b = _mm_unpackhi_epi8(tmp, b)
2581+
2582+   // 16-bit interleave step (for transposes)
2583+   #define dct_interleave16(a, b) \
2584+      tmp = a; \
2585+      a = _mm_unpacklo_epi16(a, b); \
2586+      b = _mm_unpackhi_epi16(tmp, b)
2587+
2588+   #define dct_pass(bias,shift) \
2589+      { \
2590+         /* even part */ \
2591+         dct_rot(t2e,t3e, row2,row6, rot0_0,rot0_1); \
2592+         __m128i sum04 = _mm_add_epi16(row0, row4); \
2593+         __m128i dif04 = _mm_sub_epi16(row0, row4); \
2594+         dct_widen(t0e, sum04); \
2595+         dct_widen(t1e, dif04); \
2596+         dct_wadd(x0, t0e, t3e); \
2597+         dct_wsub(x3, t0e, t3e); \
2598+         dct_wadd(x1, t1e, t2e); \
2599+         dct_wsub(x2, t1e, t2e); \
2600+         /* odd part */ \
2601+         dct_rot(y0o,y2o, row7,row3, rot2_0,rot2_1); \
2602+         dct_rot(y1o,y3o, row5,row1, rot3_0,rot3_1); \
2603+         __m128i sum17 = _mm_add_epi16(row1, row7); \
2604+         __m128i sum35 = _mm_add_epi16(row3, row5); \
2605+         dct_rot(y4o,y5o, sum17,sum35, rot1_0,rot1_1); \
2606+         dct_wadd(x4, y0o, y4o); \
2607+         dct_wadd(x5, y1o, y5o); \
2608+         dct_wadd(x6, y2o, y5o); \
2609+         dct_wadd(x7, y3o, y4o); \
2610+         dct_bfly32o(row0,row7, x0,x7,bias,shift); \
2611+         dct_bfly32o(row1,row6, x1,x6,bias,shift); \
2612+         dct_bfly32o(row2,row5, x2,x5,bias,shift); \
2613+         dct_bfly32o(row3,row4, x3,x4,bias,shift); \
2614+      }
2615+
2616+   __m128i rot0_0 = dct_const(stbi__f2f(0.5411961f), stbi__f2f(0.5411961f) + stbi__f2f(-1.847759065f));
2617+   __m128i rot0_1 = dct_const(stbi__f2f(0.5411961f) + stbi__f2f( 0.765366865f), stbi__f2f(0.5411961f));
2618+   __m128i rot1_0 = dct_const(stbi__f2f(1.175875602f) + stbi__f2f(-0.899976223f), stbi__f2f(1.175875602f));
2619+   __m128i rot1_1 = dct_const(stbi__f2f(1.175875602f), stbi__f2f(1.175875602f) + stbi__f2f(-2.562915447f));
2620+   __m128i rot2_0 = dct_const(stbi__f2f(-1.961570560f) + stbi__f2f( 0.298631336f), stbi__f2f(-1.961570560f));
2621+   __m128i rot2_1 = dct_const(stbi__f2f(-1.961570560f), stbi__f2f(-1.961570560f) + stbi__f2f( 3.072711026f));
2622+   __m128i rot3_0 = dct_const(stbi__f2f(-0.390180644f) + stbi__f2f( 2.053119869f), stbi__f2f(-0.390180644f));
2623+   __m128i rot3_1 = dct_const(stbi__f2f(-0.390180644f), stbi__f2f(-0.390180644f) + stbi__f2f( 1.501321110f));
2624+
2625+   // rounding biases in column/row passes, see stbi__idct_block for explanation.
2626+   __m128i bias_0 = _mm_set1_epi32(512);
2627+   __m128i bias_1 = _mm_set1_epi32(65536 + (128<<17));
2628+
2629+   // load
2630+   row0 = _mm_load_si128((const __m128i *) (data + 0*8));
2631+   row1 = _mm_load_si128((const __m128i *) (data + 1*8));
2632+   row2 = _mm_load_si128((const __m128i *) (data + 2*8));
2633+   row3 = _mm_load_si128((const __m128i *) (data + 3*8));
2634+   row4 = _mm_load_si128((const __m128i *) (data + 4*8));
2635+   row5 = _mm_load_si128((const __m128i *) (data + 5*8));
2636+   row6 = _mm_load_si128((const __m128i *) (data + 6*8));
2637+   row7 = _mm_load_si128((const __m128i *) (data + 7*8));
2638+
2639+   // column pass
2640+   dct_pass(bias_0, 10);
2641+
2642+   {
2643+      // 16bit 8x8 transpose pass 1
2644+      dct_interleave16(row0, row4);
2645+      dct_interleave16(row1, row5);
2646+      dct_interleave16(row2, row6);
2647+      dct_interleave16(row3, row7);
2648+
2649+      // transpose pass 2
2650+      dct_interleave16(row0, row2);
2651+      dct_interleave16(row1, row3);
2652+      dct_interleave16(row4, row6);
2653+      dct_interleave16(row5, row7);
2654+
2655+      // transpose pass 3
2656+      dct_interleave16(row0, row1);
2657+      dct_interleave16(row2, row3);
2658+      dct_interleave16(row4, row5);
2659+      dct_interleave16(row6, row7);
2660+   }
2661+
2662+   // row pass
2663+   dct_pass(bias_1, 17);
2664+
2665+   {
2666+      // pack
2667+      __m128i p0 = _mm_packus_epi16(row0, row1); // a0a1a2a3...a7b0b1b2b3...b7
2668+      __m128i p1 = _mm_packus_epi16(row2, row3);
2669+      __m128i p2 = _mm_packus_epi16(row4, row5);
2670+      __m128i p3 = _mm_packus_epi16(row6, row7);
2671+
2672+      // 8bit 8x8 transpose pass 1
2673+      dct_interleave8(p0, p2); // a0e0a1e1...
2674+      dct_interleave8(p1, p3); // c0g0c1g1...
2675+
2676+      // transpose pass 2
2677+      dct_interleave8(p0, p1); // a0c0e0g0...
2678+      dct_interleave8(p2, p3); // b0d0f0h0...
2679+
2680+      // transpose pass 3
2681+      dct_interleave8(p0, p2); // a0b0c0d0...
2682+      dct_interleave8(p1, p3); // a4b4c4d4...
2683+
2684+      // store
2685+      _mm_storel_epi64((__m128i *) out, p0); out += out_stride;
2686+      _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p0, 0x4e)); out += out_stride;
2687+      _mm_storel_epi64((__m128i *) out, p2); out += out_stride;
2688+      _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p2, 0x4e)); out += out_stride;
2689+      _mm_storel_epi64((__m128i *) out, p1); out += out_stride;
2690+      _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p1, 0x4e)); out += out_stride;
2691+      _mm_storel_epi64((__m128i *) out, p3); out += out_stride;
2692+      _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p3, 0x4e));
2693+   }
2694+
2695+#undef dct_const
2696+#undef dct_rot
2697+#undef dct_widen
2698+#undef dct_wadd
2699+#undef dct_wsub
2700+#undef dct_bfly32o
2701+#undef dct_interleave8
2702+#undef dct_interleave16
2703+#undef dct_pass
2704+}
2705+
2706+#endif // STBI_SSE2
2707+
2708+#ifdef STBI_NEON
2709+
2710+// NEON integer IDCT. should produce bit-identical
2711+// results to the generic C version.
2712+static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
2713+{
2714+   int16x8_t row0, row1, row2, row3, row4, row5, row6, row7;
2715+
2716+   int16x4_t rot0_0 = vdup_n_s16(stbi__f2f(0.5411961f));
2717+   int16x4_t rot0_1 = vdup_n_s16(stbi__f2f(-1.847759065f));
2718+   int16x4_t rot0_2 = vdup_n_s16(stbi__f2f( 0.765366865f));
2719+   int16x4_t rot1_0 = vdup_n_s16(stbi__f2f( 1.175875602f));
2720+   int16x4_t rot1_1 = vdup_n_s16(stbi__f2f(-0.899976223f));
2721+   int16x4_t rot1_2 = vdup_n_s16(stbi__f2f(-2.562915447f));
2722+   int16x4_t rot2_0 = vdup_n_s16(stbi__f2f(-1.961570560f));
2723+   int16x4_t rot2_1 = vdup_n_s16(stbi__f2f(-0.390180644f));
2724+   int16x4_t rot3_0 = vdup_n_s16(stbi__f2f( 0.298631336f));
2725+   int16x4_t rot3_1 = vdup_n_s16(stbi__f2f( 2.053119869f));
2726+   int16x4_t rot3_2 = vdup_n_s16(stbi__f2f( 3.072711026f));
2727+   int16x4_t rot3_3 = vdup_n_s16(stbi__f2f( 1.501321110f));
2728+
2729+#define dct_long_mul(out, inq, coeff) \
2730+   int32x4_t out##_l = vmull_s16(vget_low_s16(inq), coeff); \
2731+   int32x4_t out##_h = vmull_s16(vget_high_s16(inq), coeff)
2732+
2733+#define dct_long_mac(out, acc, inq, coeff) \
2734+   int32x4_t out##_l = vmlal_s16(acc##_l, vget_low_s16(inq), coeff); \
2735+   int32x4_t out##_h = vmlal_s16(acc##_h, vget_high_s16(inq), coeff)
2736+
2737+#define dct_widen(out, inq) \
2738+   int32x4_t out##_l = vshll_n_s16(vget_low_s16(inq), 12); \
2739+   int32x4_t out##_h = vshll_n_s16(vget_high_s16(inq), 12)
2740+
2741+// wide add
2742+#define dct_wadd(out, a, b) \
2743+   int32x4_t out##_l = vaddq_s32(a##_l, b##_l); \
2744+   int32x4_t out##_h = vaddq_s32(a##_h, b##_h)
2745+
2746+// wide sub
2747+#define dct_wsub(out, a, b) \
2748+   int32x4_t out##_l = vsubq_s32(a##_l, b##_l); \
2749+   int32x4_t out##_h = vsubq_s32(a##_h, b##_h)
2750+
2751+// butterfly a/b, then shift using "shiftop" by "s" and pack
2752+#define dct_bfly32o(out0,out1, a,b,shiftop,s) \
2753+   { \
2754+      dct_wadd(sum, a, b); \
2755+      dct_wsub(dif, a, b); \
2756+      out0 = vcombine_s16(shiftop(sum_l, s), shiftop(sum_h, s)); \
2757+      out1 = vcombine_s16(shiftop(dif_l, s), shiftop(dif_h, s)); \
2758+   }
2759+
2760+#define dct_pass(shiftop, shift) \
2761+   { \
2762+      /* even part */ \
2763+      int16x8_t sum26 = vaddq_s16(row2, row6); \
2764+      dct_long_mul(p1e, sum26, rot0_0); \
2765+      dct_long_mac(t2e, p1e, row6, rot0_1); \
2766+      dct_long_mac(t3e, p1e, row2, rot0_2); \
2767+      int16x8_t sum04 = vaddq_s16(row0, row4); \
2768+      int16x8_t dif04 = vsubq_s16(row0, row4); \
2769+      dct_widen(t0e, sum04); \
2770+      dct_widen(t1e, dif04); \
2771+      dct_wadd(x0, t0e, t3e); \
2772+      dct_wsub(x3, t0e, t3e); \
2773+      dct_wadd(x1, t1e, t2e); \
2774+      dct_wsub(x2, t1e, t2e); \
2775+      /* odd part */ \
2776+      int16x8_t sum15 = vaddq_s16(row1, row5); \
2777+      int16x8_t sum17 = vaddq_s16(row1, row7); \
2778+      int16x8_t sum35 = vaddq_s16(row3, row5); \
2779+      int16x8_t sum37 = vaddq_s16(row3, row7); \
2780+      int16x8_t sumodd = vaddq_s16(sum17, sum35); \
2781+      dct_long_mul(p5o, sumodd, rot1_0); \
2782+      dct_long_mac(p1o, p5o, sum17, rot1_1); \
2783+      dct_long_mac(p2o, p5o, sum35, rot1_2); \
2784+      dct_long_mul(p3o, sum37, rot2_0); \
2785+      dct_long_mul(p4o, sum15, rot2_1); \
2786+      dct_wadd(sump13o, p1o, p3o); \
2787+      dct_wadd(sump24o, p2o, p4o); \
2788+      dct_wadd(sump23o, p2o, p3o); \
2789+      dct_wadd(sump14o, p1o, p4o); \
2790+      dct_long_mac(x4, sump13o, row7, rot3_0); \
2791+      dct_long_mac(x5, sump24o, row5, rot3_1); \
2792+      dct_long_mac(x6, sump23o, row3, rot3_2); \
2793+      dct_long_mac(x7, sump14o, row1, rot3_3); \
2794+      dct_bfly32o(row0,row7, x0,x7,shiftop,shift); \
2795+      dct_bfly32o(row1,row6, x1,x6,shiftop,shift); \
2796+      dct_bfly32o(row2,row5, x2,x5,shiftop,shift); \
2797+      dct_bfly32o(row3,row4, x3,x4,shiftop,shift); \
2798+   }
2799+
2800+   // load
2801+   row0 = vld1q_s16(data + 0*8);
2802+   row1 = vld1q_s16(data + 1*8);
2803+   row2 = vld1q_s16(data + 2*8);
2804+   row3 = vld1q_s16(data + 3*8);
2805+   row4 = vld1q_s16(data + 4*8);
2806+   row5 = vld1q_s16(data + 5*8);
2807+   row6 = vld1q_s16(data + 6*8);
2808+   row7 = vld1q_s16(data + 7*8);
2809+
2810+   // add DC bias
2811+   row0 = vaddq_s16(row0, vsetq_lane_s16(1024, vdupq_n_s16(0), 0));
2812+
2813+   // column pass
2814+   dct_pass(vrshrn_n_s32, 10);
2815+
2816+   // 16bit 8x8 transpose
2817+   {
2818+// these three map to a single VTRN.16, VTRN.32, and VSWP, respectively.
2819+// whether compilers actually get this is another story, sadly.
2820+#define dct_trn16(x, y) { int16x8x2_t t = vtrnq_s16(x, y); x = t.val[0]; y = t.val[1]; }
2821+#define dct_trn32(x, y) { int32x4x2_t t = vtrnq_s32(vreinterpretq_s32_s16(x), vreinterpretq_s32_s16(y)); x = vreinterpretq_s16_s32(t.val[0]); y = vreinterpretq_s16_s32(t.val[1]); }
2822+#define dct_trn64(x, y) { int16x8_t x0 = x; int16x8_t y0 = y; x = vcombine_s16(vget_low_s16(x0), vget_low_s16(y0)); y = vcombine_s16(vget_high_s16(x0), vget_high_s16(y0)); }
2823+
2824+      // pass 1
2825+      dct_trn16(row0, row1); // a0b0a2b2a4b4a6b6
2826+      dct_trn16(row2, row3);
2827+      dct_trn16(row4, row5);
2828+      dct_trn16(row6, row7);
2829+
2830+      // pass 2
2831+      dct_trn32(row0, row2); // a0b0c0d0a4b4c4d4
2832+      dct_trn32(row1, row3);
2833+      dct_trn32(row4, row6);
2834+      dct_trn32(row5, row7);
2835+
2836+      // pass 3
2837+      dct_trn64(row0, row4); // a0b0c0d0e0f0g0h0
2838+      dct_trn64(row1, row5);
2839+      dct_trn64(row2, row6);
2840+      dct_trn64(row3, row7);
2841+
2842+#undef dct_trn16
2843+#undef dct_trn32
2844+#undef dct_trn64
2845+   }
2846+
2847+   // row pass
2848+   // vrshrn_n_s32 only supports shifts up to 16, we need
2849+   // 17. so do a non-rounding shift of 16 first then follow
2850+   // up with a rounding shift by 1.
2851+   dct_pass(vshrn_n_s32, 16);
2852+
2853+   {
2854+      // pack and round
2855+      uint8x8_t p0 = vqrshrun_n_s16(row0, 1);
2856+      uint8x8_t p1 = vqrshrun_n_s16(row1, 1);
2857+      uint8x8_t p2 = vqrshrun_n_s16(row2, 1);
2858+      uint8x8_t p3 = vqrshrun_n_s16(row3, 1);
2859+      uint8x8_t p4 = vqrshrun_n_s16(row4, 1);
2860+      uint8x8_t p5 = vqrshrun_n_s16(row5, 1);
2861+      uint8x8_t p6 = vqrshrun_n_s16(row6, 1);
2862+      uint8x8_t p7 = vqrshrun_n_s16(row7, 1);
2863+
2864+      // again, these can translate into one instruction, but often don't.
2865+#define dct_trn8_8(x, y) { uint8x8x2_t t = vtrn_u8(x, y); x = t.val[0]; y = t.val[1]; }
2866+#define dct_trn8_16(x, y) { uint16x4x2_t t = vtrn_u16(vreinterpret_u16_u8(x), vreinterpret_u16_u8(y)); x = vreinterpret_u8_u16(t.val[0]); y = vreinterpret_u8_u16(t.val[1]); }
2867+#define dct_trn8_32(x, y) { uint32x2x2_t t = vtrn_u32(vreinterpret_u32_u8(x), vreinterpret_u32_u8(y)); x = vreinterpret_u8_u32(t.val[0]); y = vreinterpret_u8_u32(t.val[1]); }
2868+
2869+      // sadly can't use interleaved stores here since we only write
2870+      // 8 bytes to each scan line!
2871+
2872+      // 8x8 8-bit transpose pass 1
2873+      dct_trn8_8(p0, p1);
2874+      dct_trn8_8(p2, p3);
2875+      dct_trn8_8(p4, p5);
2876+      dct_trn8_8(p6, p7);
2877+
2878+      // pass 2
2879+      dct_trn8_16(p0, p2);
2880+      dct_trn8_16(p1, p3);
2881+      dct_trn8_16(p4, p6);
2882+      dct_trn8_16(p5, p7);
2883+
2884+      // pass 3
2885+      dct_trn8_32(p0, p4);
2886+      dct_trn8_32(p1, p5);
2887+      dct_trn8_32(p2, p6);
2888+      dct_trn8_32(p3, p7);
2889+
2890+      // store
2891+      vst1_u8(out, p0); out += out_stride;
2892+      vst1_u8(out, p1); out += out_stride;
2893+      vst1_u8(out, p2); out += out_stride;
2894+      vst1_u8(out, p3); out += out_stride;
2895+      vst1_u8(out, p4); out += out_stride;
2896+      vst1_u8(out, p5); out += out_stride;
2897+      vst1_u8(out, p6); out += out_stride;
2898+      vst1_u8(out, p7);
2899+
2900+#undef dct_trn8_8
2901+#undef dct_trn8_16
2902+#undef dct_trn8_32
2903+   }
2904+
2905+#undef dct_long_mul
2906+#undef dct_long_mac
2907+#undef dct_widen
2908+#undef dct_wadd
2909+#undef dct_wsub
2910+#undef dct_bfly32o
2911+#undef dct_pass
2912+}
2913+
2914+#endif // STBI_NEON
2915+
2916+#define STBI__MARKER_none  0xff
2917+// if there's a pending marker from the entropy stream, return that
2918+// otherwise, fetch from the stream and get a marker. if there's no
2919+// marker, return 0xff, which is never a valid marker value
2920+static stbi_uc stbi__get_marker(stbi__jpeg *j)
2921+{
2922+   stbi_uc x;
2923+   if (j->marker != STBI__MARKER_none) { x = j->marker; j->marker = STBI__MARKER_none; return x; }
2924+   x = stbi__get8(j->s);
2925+   if (x != 0xff) return STBI__MARKER_none;
2926+   while (x == 0xff)
2927+      x = stbi__get8(j->s); // consume repeated 0xff fill bytes
2928+   return x;
2929+}
2930+
2931+// in each scan, we'll have scan_n components, and the order
2932+// of the components is specified by order[]
2933+#define STBI__RESTART(x)     ((x) >= 0xd0 && (x) <= 0xd7)
2934+
2935+// after a restart interval, stbi__jpeg_reset the entropy decoder and
2936+// the dc prediction
2937+static void stbi__jpeg_reset(stbi__jpeg *j)
2938+{
2939+   j->code_bits = 0;
2940+   j->code_buffer = 0;
2941+   j->nomore = 0;
2942+   j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = j->img_comp[3].dc_pred = 0;
2943+   j->marker = STBI__MARKER_none;
2944+   j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff;
2945+   j->eob_run = 0;
2946+   // no more than 1<<31 MCUs if no restart_interal? that's plenty safe,
2947+   // since we don't even allow 1<<30 pixels
2948+}
2949+
2950+static int stbi__parse_entropy_coded_data(stbi__jpeg *z)
2951+{
2952+   stbi__jpeg_reset(z);
2953+   if (!z->progressive) {
2954+      if (z->scan_n == 1) {
2955+         int i,j;
2956+         STBI_SIMD_ALIGN(short, data[64]);
2957+         int n = z->order[0];
2958+         // non-interleaved data, we just need to process one block at a time,
2959+         // in trivial scanline order
2960+         // number of blocks to do just depends on how many actual "pixels" this
2961+         // component has, independent of interleaved MCU blocking and such
2962+         int w = (z->img_comp[n].x+7) >> 3;
2963+         int h = (z->img_comp[n].y+7) >> 3;
2964+         for (j=0; j < h; ++j) {
2965+            for (i=0; i < w; ++i) {
2966+               int ha = z->img_comp[n].ha;
2967+               if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
2968+               z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data);
2969+               // every data block is an MCU, so countdown the restart interval
2970+               if (--z->todo <= 0) {
2971+                  if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2972+                  // if it's NOT a restart, then just bail, so we get corrupt data
2973+                  // rather than no data
2974+                  if (!STBI__RESTART(z->marker)) return 1;
2975+                  stbi__jpeg_reset(z);
2976+               }
2977+            }
2978+         }
2979+         return 1;
2980+      } else { // interleaved
2981+         int i,j,k,x,y;
2982+         STBI_SIMD_ALIGN(short, data[64]);
2983+         for (j=0; j < z->img_mcu_y; ++j) {
2984+            for (i=0; i < z->img_mcu_x; ++i) {
2985+               // scan an interleaved mcu... process scan_n components in order
2986+               for (k=0; k < z->scan_n; ++k) {
2987+                  int n = z->order[k];
2988+                  // scan out an mcu's worth of this component; that's just determined
2989+                  // by the basic H and V specified for the component
2990+                  for (y=0; y < z->img_comp[n].v; ++y) {
2991+                     for (x=0; x < z->img_comp[n].h; ++x) {
2992+                        int x2 = (i*z->img_comp[n].h + x)*8;
2993+                        int y2 = (j*z->img_comp[n].v + y)*8;
2994+                        int ha = z->img_comp[n].ha;
2995+                        if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
2996+                        z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data);
2997+                     }
2998+                  }
2999+               }
3000+               // after all interleaved components, that's an interleaved MCU,
3001+               // so now count down the restart interval
3002+               if (--z->todo <= 0) {
3003+                  if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
3004+                  if (!STBI__RESTART(z->marker)) return 1;
3005+                  stbi__jpeg_reset(z);
3006+               }
3007+            }
3008+         }
3009+         return 1;
3010+      }
3011+   } else {
3012+      if (z->scan_n == 1) {
3013+         int i,j;
3014+         int n = z->order[0];
3015+         // non-interleaved data, we just need to process one block at a time,
3016+         // in trivial scanline order
3017+         // number of blocks to do just depends on how many actual "pixels" this
3018+         // component has, independent of interleaved MCU blocking and such
3019+         int w = (z->img_comp[n].x+7) >> 3;
3020+         int h = (z->img_comp[n].y+7) >> 3;
3021+         for (j=0; j < h; ++j) {
3022+            for (i=0; i < w; ++i) {
3023+               short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
3024+               if (z->spec_start == 0) {
3025+                  if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
3026+                     return 0;
3027+               } else {
3028+                  int ha = z->img_comp[n].ha;
3029+                  if (!stbi__jpeg_decode_block_prog_ac(z, data, &z->huff_ac[ha], z->fast_ac[ha]))
3030+                     return 0;
3031+               }
3032+               // every data block is an MCU, so countdown the restart interval
3033+               if (--z->todo <= 0) {
3034+                  if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
3035+                  if (!STBI__RESTART(z->marker)) return 1;
3036+                  stbi__jpeg_reset(z);
3037+               }
3038+            }
3039+         }
3040+         return 1;
3041+      } else { // interleaved
3042+         int i,j,k,x,y;
3043+         for (j=0; j < z->img_mcu_y; ++j) {
3044+            for (i=0; i < z->img_mcu_x; ++i) {
3045+               // scan an interleaved mcu... process scan_n components in order
3046+               for (k=0; k < z->scan_n; ++k) {
3047+                  int n = z->order[k];
3048+                  // scan out an mcu's worth of this component; that's just determined
3049+                  // by the basic H and V specified for the component
3050+                  for (y=0; y < z->img_comp[n].v; ++y) {
3051+                     for (x=0; x < z->img_comp[n].h; ++x) {
3052+                        int x2 = (i*z->img_comp[n].h + x);
3053+                        int y2 = (j*z->img_comp[n].v + y);
3054+                        short *data = z->img_comp[n].coeff + 64 * (x2 + y2 * z->img_comp[n].coeff_w);
3055+                        if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
3056+                           return 0;
3057+                     }
3058+                  }
3059+               }
3060+               // after all interleaved components, that's an interleaved MCU,
3061+               // so now count down the restart interval
3062+               if (--z->todo <= 0) {
3063+                  if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
3064+                  if (!STBI__RESTART(z->marker)) return 1;
3065+                  stbi__jpeg_reset(z);
3066+               }
3067+            }
3068+         }
3069+         return 1;
3070+      }
3071+   }
3072+}
3073+
3074+static void stbi__jpeg_dequantize(short *data, stbi__uint16 *dequant)
3075+{
3076+   int i;
3077+   for (i=0; i < 64; ++i)
3078+      data[i] *= dequant[i];
3079+}
3080+
3081+static void stbi__jpeg_finish(stbi__jpeg *z)
3082+{
3083+   if (z->progressive) {
3084+      // dequantize and idct the data
3085+      int i,j,n;
3086+      for (n=0; n < z->s->img_n; ++n) {
3087+         int w = (z->img_comp[n].x+7) >> 3;
3088+         int h = (z->img_comp[n].y+7) >> 3;
3089+         for (j=0; j < h; ++j) {
3090+            for (i=0; i < w; ++i) {
3091+               short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
3092+               stbi__jpeg_dequantize(data, z->dequant[z->img_comp[n].tq]);
3093+               z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data);
3094+            }
3095+         }
3096+      }
3097+   }
3098+}
3099+
3100+static int stbi__process_marker(stbi__jpeg *z, int m)
3101+{
3102+   int L;
3103+   switch (m) {
3104+      case STBI__MARKER_none: // no marker found
3105+         return stbi__err("expected marker","Corrupt JPEG");
3106+
3107+      case 0xDD: // DRI - specify restart interval
3108+         if (stbi__get16be(z->s) != 4) return stbi__err("bad DRI len","Corrupt JPEG");
3109+         z->restart_interval = stbi__get16be(z->s);
3110+         return 1;
3111+
3112+      case 0xDB: // DQT - define quantization table
3113+         L = stbi__get16be(z->s)-2;
3114+         while (L > 0) {
3115+            int q = stbi__get8(z->s);
3116+            int p = q >> 4, sixteen = (p != 0);
3117+            int t = q & 15,i;
3118+            if (p != 0 && p != 1) return stbi__err("bad DQT type","Corrupt JPEG");
3119+            if (t > 3) return stbi__err("bad DQT table","Corrupt JPEG");
3120+
3121+            for (i=0; i < 64; ++i)
3122+               z->dequant[t][stbi__jpeg_dezigzag[i]] = (stbi__uint16)(sixteen ? stbi__get16be(z->s) : stbi__get8(z->s));
3123+            L -= (sixteen ? 129 : 65);
3124+         }
3125+         return L==0;
3126+
3127+      case 0xC4: // DHT - define huffman table
3128+         L = stbi__get16be(z->s)-2;
3129+         while (L > 0) {
3130+            stbi_uc *v;
3131+            int sizes[16],i,n=0;
3132+            int q = stbi__get8(z->s);
3133+            int tc = q >> 4;
3134+            int th = q & 15;
3135+            if (tc > 1 || th > 3) return stbi__err("bad DHT header","Corrupt JPEG");
3136+            for (i=0; i < 16; ++i) {
3137+               sizes[i] = stbi__get8(z->s);
3138+               n += sizes[i];
3139+            }
3140+            if(n > 256) return stbi__err("bad DHT header","Corrupt JPEG"); // Loop over i < n would write past end of values!
3141+            L -= 17;
3142+            if (tc == 0) {
3143+               if (!stbi__build_huffman(z->huff_dc+th, sizes)) return 0;
3144+               v = z->huff_dc[th].values;
3145+            } else {
3146+               if (!stbi__build_huffman(z->huff_ac+th, sizes)) return 0;
3147+               v = z->huff_ac[th].values;
3148+            }
3149+            for (i=0; i < n; ++i)
3150+               v[i] = stbi__get8(z->s);
3151+            if (tc != 0)
3152+               stbi__build_fast_ac(z->fast_ac[th], z->huff_ac + th);
3153+            L -= n;
3154+         }
3155+         return L==0;
3156+   }
3157+
3158+   // check for comment block or APP blocks
3159+   if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) {
3160+      L = stbi__get16be(z->s);
3161+      if (L < 2) {
3162+         if (m == 0xFE)
3163+            return stbi__err("bad COM len","Corrupt JPEG");
3164+         else
3165+            return stbi__err("bad APP len","Corrupt JPEG");
3166+      }
3167+      L -= 2;
3168+
3169+      if (m == 0xE0 && L >= 5) { // JFIF APP0 segment
3170+         static const unsigned char tag[5] = {'J','F','I','F','\0'};
3171+         int ok = 1;
3172+         int i;
3173+         for (i=0; i < 5; ++i)
3174+            if (stbi__get8(z->s) != tag[i])
3175+               ok = 0;
3176+         L -= 5;
3177+         if (ok)
3178+            z->jfif = 1;
3179+      } else if (m == 0xEE && L >= 12) { // Adobe APP14 segment
3180+         static const unsigned char tag[6] = {'A','d','o','b','e','\0'};
3181+         int ok = 1;
3182+         int i;
3183+         for (i=0; i < 6; ++i)
3184+            if (stbi__get8(z->s) != tag[i])
3185+               ok = 0;
3186+         L -= 6;
3187+         if (ok) {
3188+            stbi__get8(z->s); // version
3189+            stbi__get16be(z->s); // flags0
3190+            stbi__get16be(z->s); // flags1
3191+            z->app14_color_transform = stbi__get8(z->s); // color transform
3192+            L -= 6;
3193+         }
3194+      }
3195+
3196+      stbi__skip(z->s, L);
3197+      return 1;
3198+   }
3199+
3200+   return stbi__err("unknown marker","Corrupt JPEG");
3201+}
3202+
3203+// after we see SOS
3204+static int stbi__process_scan_header(stbi__jpeg *z)
3205+{
3206+   int i;
3207+   int Ls = stbi__get16be(z->s);
3208+   z->scan_n = stbi__get8(z->s);
3209+   if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int) z->s->img_n) return stbi__err("bad SOS component count","Corrupt JPEG");
3210+   if (Ls != 6+2*z->scan_n) return stbi__err("bad SOS len","Corrupt JPEG");
3211+   for (i=0; i < z->scan_n; ++i) {
3212+      int id = stbi__get8(z->s), which;
3213+      int q = stbi__get8(z->s);
3214+      for (which = 0; which < z->s->img_n; ++which)
3215+         if (z->img_comp[which].id == id)
3216+            break;
3217+      if (which == z->s->img_n) return 0; // no match
3218+      z->img_comp[which].hd = q >> 4;   if (z->img_comp[which].hd > 3) return stbi__err("bad DC huff","Corrupt JPEG");
3219+      z->img_comp[which].ha = q & 15;   if (z->img_comp[which].ha > 3) return stbi__err("bad AC huff","Corrupt JPEG");
3220+      z->order[i] = which;
3221+   }
3222+
3223+   {
3224+      int aa;
3225+      z->spec_start = stbi__get8(z->s);
3226+      z->spec_end   = stbi__get8(z->s); // should be 63, but might be 0
3227+      aa = stbi__get8(z->s);
3228+      z->succ_high = (aa >> 4);
3229+      z->succ_low  = (aa & 15);
3230+      if (z->progressive) {
3231+         if (z->spec_start > 63 || z->spec_end > 63  || z->spec_start > z->spec_end || z->succ_high > 13 || z->succ_low > 13)
3232+            return stbi__err("bad SOS", "Corrupt JPEG");
3233+      } else {
3234+         if (z->spec_start != 0) return stbi__err("bad SOS","Corrupt JPEG");
3235+         if (z->succ_high != 0 || z->succ_low != 0) return stbi__err("bad SOS","Corrupt JPEG");
3236+         z->spec_end = 63;
3237+      }
3238+   }
3239+
3240+   return 1;
3241+}
3242+
3243+static int stbi__free_jpeg_components(stbi__jpeg *z, int ncomp, int why)
3244+{
3245+   int i;
3246+   for (i=0; i < ncomp; ++i) {
3247+      if (z->img_comp[i].raw_data) {
3248+         STBI_FREE(z->img_comp[i].raw_data);
3249+         z->img_comp[i].raw_data = NULL;
3250+         z->img_comp[i].data = NULL;
3251+      }
3252+      if (z->img_comp[i].raw_coeff) {
3253+         STBI_FREE(z->img_comp[i].raw_coeff);
3254+         z->img_comp[i].raw_coeff = 0;
3255+         z->img_comp[i].coeff = 0;
3256+      }
3257+      if (z->img_comp[i].linebuf) {
3258+         STBI_FREE(z->img_comp[i].linebuf);
3259+         z->img_comp[i].linebuf = NULL;
3260+      }
3261+   }
3262+   return why;
3263+}
3264+
3265+static int stbi__process_frame_header(stbi__jpeg *z, int scan)
3266+{
3267+   stbi__context *s = z->s;
3268+   int Lf,p,i,q, h_max=1,v_max=1,c;
3269+   Lf = stbi__get16be(s);         if (Lf < 11) return stbi__err("bad SOF len","Corrupt JPEG"); // JPEG
3270+   p  = stbi__get8(s);            if (p != 8) return stbi__err("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline
3271+   s->img_y = stbi__get16be(s);   if (s->img_y == 0) return stbi__err("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG
3272+   s->img_x = stbi__get16be(s);   if (s->img_x == 0) return stbi__err("0 width","Corrupt JPEG"); // JPEG requires
3273+   if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)");
3274+   if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)");
3275+   c = stbi__get8(s);
3276+   if (c != 3 && c != 1 && c != 4) return stbi__err("bad component count","Corrupt JPEG");
3277+   s->img_n = c;
3278+   for (i=0; i < c; ++i) {
3279+      z->img_comp[i].data = NULL;
3280+      z->img_comp[i].linebuf = NULL;
3281+   }
3282+
3283+   if (Lf != 8+3*s->img_n) return stbi__err("bad SOF len","Corrupt JPEG");
3284+
3285+   z->rgb = 0;
3286+   for (i=0; i < s->img_n; ++i) {
3287+      static const unsigned char rgb[3] = { 'R', 'G', 'B' };
3288+      z->img_comp[i].id = stbi__get8(s);
3289+      if (s->img_n == 3 && z->img_comp[i].id == rgb[i])
3290+         ++z->rgb;
3291+      q = stbi__get8(s);
3292+      z->img_comp[i].h = (q >> 4);  if (!z->img_comp[i].h || z->img_comp[i].h > 4) return stbi__err("bad H","Corrupt JPEG");
3293+      z->img_comp[i].v = q & 15;    if (!z->img_comp[i].v || z->img_comp[i].v > 4) return stbi__err("bad V","Corrupt JPEG");
3294+      z->img_comp[i].tq = stbi__get8(s);  if (z->img_comp[i].tq > 3) return stbi__err("bad TQ","Corrupt JPEG");
3295+   }
3296+
3297+   if (scan != STBI__SCAN_load) return 1;
3298+
3299+   if (!stbi__mad3sizes_valid(s->img_x, s->img_y, s->img_n, 0)) return stbi__err("too large", "Image too large to decode");
3300+
3301+   for (i=0; i < s->img_n; ++i) {
3302+      if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h;
3303+      if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v;
3304+   }
3305+
3306+   // check that plane subsampling factors are integer ratios; our resamplers can't deal with fractional ratios
3307+   // and I've never seen a non-corrupted JPEG file actually use them
3308+   for (i=0; i < s->img_n; ++i) {
3309+      if (h_max % z->img_comp[i].h != 0) return stbi__err("bad H","Corrupt JPEG");
3310+      if (v_max % z->img_comp[i].v != 0) return stbi__err("bad V","Corrupt JPEG");
3311+   }
3312+
3313+   // compute interleaved mcu info
3314+   z->img_h_max = h_max;
3315+   z->img_v_max = v_max;
3316+   z->img_mcu_w = h_max * 8;
3317+   z->img_mcu_h = v_max * 8;
3318+   // these sizes can't be more than 17 bits
3319+   z->img_mcu_x = (s->img_x + z->img_mcu_w-1) / z->img_mcu_w;
3320+   z->img_mcu_y = (s->img_y + z->img_mcu_h-1) / z->img_mcu_h;
3321+
3322+   for (i=0; i < s->img_n; ++i) {
3323+      // number of effective pixels (e.g. for non-interleaved MCU)
3324+      z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max-1) / h_max;
3325+      z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max-1) / v_max;
3326+      // to simplify generation, we'll allocate enough memory to decode
3327+      // the bogus oversized data from using interleaved MCUs and their
3328+      // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't
3329+      // discard the extra data until colorspace conversion
3330+      //
3331+      // img_mcu_x, img_mcu_y: <=17 bits; comp[i].h and .v are <=4 (checked earlier)
3332+      // so these muls can't overflow with 32-bit ints (which we require)
3333+      z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8;
3334+      z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8;
3335+      z->img_comp[i].coeff = 0;
3336+      z->img_comp[i].raw_coeff = 0;
3337+      z->img_comp[i].linebuf = NULL;
3338+      z->img_comp[i].raw_data = stbi__malloc_mad2(z->img_comp[i].w2, z->img_comp[i].h2, 15);
3339+      if (z->img_comp[i].raw_data == NULL)
3340+         return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory"));
3341+      // align blocks for idct using mmx/sse
3342+      z->img_comp[i].data = (stbi_uc*) (((size_t) z->img_comp[i].raw_data + 15) & ~15);
3343+      if (z->progressive) {
3344+         // w2, h2 are multiples of 8 (see above)
3345+         z->img_comp[i].coeff_w = z->img_comp[i].w2 / 8;
3346+         z->img_comp[i].coeff_h = z->img_comp[i].h2 / 8;
3347+         z->img_comp[i].raw_coeff = stbi__malloc_mad3(z->img_comp[i].w2, z->img_comp[i].h2, sizeof(short), 15);
3348+         if (z->img_comp[i].raw_coeff == NULL)
3349+            return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory"));
3350+         z->img_comp[i].coeff = (short*) (((size_t) z->img_comp[i].raw_coeff + 15) & ~15);
3351+      }
3352+   }
3353+
3354+   return 1;
3355+}
3356+
3357+// use comparisons since in some cases we handle more than one case (e.g. SOF)
3358+#define stbi__DNL(x)         ((x) == 0xdc)
3359+#define stbi__SOI(x)         ((x) == 0xd8)
3360+#define stbi__EOI(x)         ((x) == 0xd9)
3361+#define stbi__SOF(x)         ((x) == 0xc0 || (x) == 0xc1 || (x) == 0xc2)
3362+#define stbi__SOS(x)         ((x) == 0xda)
3363+
3364+#define stbi__SOF_progressive(x)   ((x) == 0xc2)
3365+
3366+static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan)
3367+{
3368+   int m;
3369+   z->jfif = 0;
3370+   z->app14_color_transform = -1; // valid values are 0,1,2
3371+   z->marker = STBI__MARKER_none; // initialize cached marker to empty
3372+   m = stbi__get_marker(z);
3373+   if (!stbi__SOI(m)) return stbi__err("no SOI","Corrupt JPEG");
3374+   if (scan == STBI__SCAN_type) return 1;
3375+   m = stbi__get_marker(z);
3376+   while (!stbi__SOF(m)) {
3377+      if (!stbi__process_marker(z,m)) return 0;
3378+      m = stbi__get_marker(z);
3379+      while (m == STBI__MARKER_none) {
3380+         // some files have extra padding after their blocks, so ok, we'll scan
3381+         if (stbi__at_eof(z->s)) return stbi__err("no SOF", "Corrupt JPEG");
3382+         m = stbi__get_marker(z);
3383+      }
3384+   }
3385+   z->progressive = stbi__SOF_progressive(m);
3386+   if (!stbi__process_frame_header(z, scan)) return 0;
3387+   return 1;
3388+}
3389+
3390+static stbi_uc stbi__skip_jpeg_junk_at_end(stbi__jpeg *j)
3391+{
3392+   // some JPEGs have junk at end, skip over it but if we find what looks
3393+   // like a valid marker, resume there
3394+   while (!stbi__at_eof(j->s)) {
3395+      stbi_uc x = stbi__get8(j->s);
3396+      while (x == 0xff) { // might be a marker
3397+         if (stbi__at_eof(j->s)) return STBI__MARKER_none;
3398+         x = stbi__get8(j->s);
3399+         if (x != 0x00 && x != 0xff) {
3400+            // not a stuffed zero or lead-in to another marker, looks
3401+            // like an actual marker, return it
3402+            return x;
3403+         }
3404+         // stuffed zero has x=0 now which ends the loop, meaning we go
3405+         // back to regular scan loop.
3406+         // repeated 0xff keeps trying to read the next byte of the marker.
3407+      }
3408+   }
3409+   return STBI__MARKER_none;
3410+}
3411+
3412+// decode image to YCbCr format
3413+static int stbi__decode_jpeg_image(stbi__jpeg *j)
3414+{
3415+   int m;
3416+   for (m = 0; m < 4; m++) {
3417+      j->img_comp[m].raw_data = NULL;
3418+      j->img_comp[m].raw_coeff = NULL;
3419+   }
3420+   j->restart_interval = 0;
3421+   if (!stbi__decode_jpeg_header(j, STBI__SCAN_load)) return 0;
3422+   m = stbi__get_marker(j);
3423+   while (!stbi__EOI(m)) {
3424+      if (stbi__SOS(m)) {
3425+         if (!stbi__process_scan_header(j)) return 0;
3426+         if (!stbi__parse_entropy_coded_data(j)) return 0;
3427+         if (j->marker == STBI__MARKER_none ) {
3428+         j->marker = stbi__skip_jpeg_junk_at_end(j);
3429+            // if we reach eof without hitting a marker, stbi__get_marker() below will fail and we'll eventually return 0
3430+         }
3431+         m = stbi__get_marker(j);
3432+         if (STBI__RESTART(m))
3433+            m = stbi__get_marker(j);
3434+      } else if (stbi__DNL(m)) {
3435+         int Ld = stbi__get16be(j->s);
3436+         stbi__uint32 NL = stbi__get16be(j->s);
3437+         if (Ld != 4) return stbi__err("bad DNL len", "Corrupt JPEG");
3438+         if (NL != j->s->img_y) return stbi__err("bad DNL height", "Corrupt JPEG");
3439+         m = stbi__get_marker(j);
3440+      } else {
3441+         if (!stbi__process_marker(j, m)) return 1;
3442+         m = stbi__get_marker(j);
3443+      }
3444+   }
3445+   if (j->progressive)
3446+      stbi__jpeg_finish(j);
3447+   return 1;
3448+}
3449+
3450+// static jfif-centered resampling (across block boundaries)
3451+
3452+typedef stbi_uc *(*resample_row_func)(stbi_uc *out, stbi_uc *in0, stbi_uc *in1,
3453+                                    int w, int hs);
3454+
3455+#define stbi__div4(x) ((stbi_uc) ((x) >> 2))
3456+
3457+static stbi_uc *resample_row_1(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3458+{
3459+   STBI_NOTUSED(out);
3460+   STBI_NOTUSED(in_far);
3461+   STBI_NOTUSED(w);
3462+   STBI_NOTUSED(hs);
3463+   return in_near;
3464+}
3465+
3466+static stbi_uc* stbi__resample_row_v_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3467+{
3468+   // need to generate two samples vertically for every one in input
3469+   int i;
3470+   STBI_NOTUSED(hs);
3471+   for (i=0; i < w; ++i)
3472+      out[i] = stbi__div4(3*in_near[i] + in_far[i] + 2);
3473+   return out;
3474+}
3475+
3476+static stbi_uc*  stbi__resample_row_h_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3477+{
3478+   // need to generate two samples horizontally for every one in input
3479+   int i;
3480+   stbi_uc *input = in_near;
3481+
3482+   if (w == 1) {
3483+      // if only one sample, can't do any interpolation
3484+      out[0] = out[1] = input[0];
3485+      return out;
3486+   }
3487+
3488+   out[0] = input[0];
3489+   out[1] = stbi__div4(input[0]*3 + input[1] + 2);
3490+   for (i=1; i < w-1; ++i) {
3491+      int n = 3*input[i]+2;
3492+      out[i*2+0] = stbi__div4(n+input[i-1]);
3493+      out[i*2+1] = stbi__div4(n+input[i+1]);
3494+   }
3495+   out[i*2+0] = stbi__div4(input[w-2]*3 + input[w-1] + 2);
3496+   out[i*2+1] = input[w-1];
3497+
3498+   STBI_NOTUSED(in_far);
3499+   STBI_NOTUSED(hs);
3500+
3501+   return out;
3502+}
3503+
3504+#define stbi__div16(x) ((stbi_uc) ((x) >> 4))
3505+
3506+static stbi_uc *stbi__resample_row_hv_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3507+{
3508+   // need to generate 2x2 samples for every one in input
3509+   int i,t0,t1;
3510+   if (w == 1) {
3511+      out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2);
3512+      return out;
3513+   }
3514+
3515+   t1 = 3*in_near[0] + in_far[0];
3516+   out[0] = stbi__div4(t1+2);
3517+   for (i=1; i < w; ++i) {
3518+      t0 = t1;
3519+      t1 = 3*in_near[i]+in_far[i];
3520+      out[i*2-1] = stbi__div16(3*t0 + t1 + 8);
3521+      out[i*2  ] = stbi__div16(3*t1 + t0 + 8);
3522+   }
3523+   out[w*2-1] = stbi__div4(t1+2);
3524+
3525+   STBI_NOTUSED(hs);
3526+
3527+   return out;
3528+}
3529+
3530+#if defined(STBI_SSE2) || defined(STBI_NEON)
3531+static stbi_uc *stbi__resample_row_hv_2_simd(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3532+{
3533+   // need to generate 2x2 samples for every one in input
3534+   int i=0,t0,t1;
3535+
3536+   if (w == 1) {
3537+      out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2);
3538+      return out;
3539+   }
3540+
3541+   t1 = 3*in_near[0] + in_far[0];
3542+   // process groups of 8 pixels for as long as we can.
3543+   // note we can't handle the last pixel in a row in this loop
3544+   // because we need to handle the filter boundary conditions.
3545+   for (; i < ((w-1) & ~7); i += 8) {
3546+#if defined(STBI_SSE2)
3547+      // load and perform the vertical filtering pass
3548+      // this uses 3*x + y = 4*x + (y - x)
3549+      __m128i zero  = _mm_setzero_si128();
3550+      __m128i farb  = _mm_loadl_epi64((__m128i *) (in_far + i));
3551+      __m128i nearb = _mm_loadl_epi64((__m128i *) (in_near + i));
3552+      __m128i farw  = _mm_unpacklo_epi8(farb, zero);
3553+      __m128i nearw = _mm_unpacklo_epi8(nearb, zero);
3554+      __m128i diff  = _mm_sub_epi16(farw, nearw);
3555+      __m128i nears = _mm_slli_epi16(nearw, 2);
3556+      __m128i curr  = _mm_add_epi16(nears, diff); // current row
3557+
3558+      // horizontal filter works the same based on shifted vers of current
3559+      // row. "prev" is current row shifted right by 1 pixel; we need to
3560+      // insert the previous pixel value (from t1).
3561+      // "next" is current row shifted left by 1 pixel, with first pixel
3562+      // of next block of 8 pixels added in.
3563+      __m128i prv0 = _mm_slli_si128(curr, 2);
3564+      __m128i nxt0 = _mm_srli_si128(curr, 2);
3565+      __m128i prev = _mm_insert_epi16(prv0, t1, 0);
3566+      __m128i next = _mm_insert_epi16(nxt0, 3*in_near[i+8] + in_far[i+8], 7);
3567+
3568+      // horizontal filter, polyphase implementation since it's convenient:
3569+      // even pixels = 3*cur + prev = cur*4 + (prev - cur)
3570+      // odd  pixels = 3*cur + next = cur*4 + (next - cur)
3571+      // note the shared term.
3572+      __m128i bias  = _mm_set1_epi16(8);
3573+      __m128i curs = _mm_slli_epi16(curr, 2);
3574+      __m128i prvd = _mm_sub_epi16(prev, curr);
3575+      __m128i nxtd = _mm_sub_epi16(next, curr);
3576+      __m128i curb = _mm_add_epi16(curs, bias);
3577+      __m128i even = _mm_add_epi16(prvd, curb);
3578+      __m128i odd  = _mm_add_epi16(nxtd, curb);
3579+
3580+      // interleave even and odd pixels, then undo scaling.
3581+      __m128i int0 = _mm_unpacklo_epi16(even, odd);
3582+      __m128i int1 = _mm_unpackhi_epi16(even, odd);
3583+      __m128i de0  = _mm_srli_epi16(int0, 4);
3584+      __m128i de1  = _mm_srli_epi16(int1, 4);
3585+
3586+      // pack and write output
3587+      __m128i outv = _mm_packus_epi16(de0, de1);
3588+      _mm_storeu_si128((__m128i *) (out + i*2), outv);
3589+#elif defined(STBI_NEON)
3590+      // load and perform the vertical filtering pass
3591+      // this uses 3*x + y = 4*x + (y - x)
3592+      uint8x8_t farb  = vld1_u8(in_far + i);
3593+      uint8x8_t nearb = vld1_u8(in_near + i);
3594+      int16x8_t diff  = vreinterpretq_s16_u16(vsubl_u8(farb, nearb));
3595+      int16x8_t nears = vreinterpretq_s16_u16(vshll_n_u8(nearb, 2));
3596+      int16x8_t curr  = vaddq_s16(nears, diff); // current row
3597+
3598+      // horizontal filter works the same based on shifted vers of current
3599+      // row. "prev" is current row shifted right by 1 pixel; we need to
3600+      // insert the previous pixel value (from t1).
3601+      // "next" is current row shifted left by 1 pixel, with first pixel
3602+      // of next block of 8 pixels added in.
3603+      int16x8_t prv0 = vextq_s16(curr, curr, 7);
3604+      int16x8_t nxt0 = vextq_s16(curr, curr, 1);
3605+      int16x8_t prev = vsetq_lane_s16(t1, prv0, 0);
3606+      int16x8_t next = vsetq_lane_s16(3*in_near[i+8] + in_far[i+8], nxt0, 7);
3607+
3608+      // horizontal filter, polyphase implementation since it's convenient:
3609+      // even pixels = 3*cur + prev = cur*4 + (prev - cur)
3610+      // odd  pixels = 3*cur + next = cur*4 + (next - cur)
3611+      // note the shared term.
3612+      int16x8_t curs = vshlq_n_s16(curr, 2);
3613+      int16x8_t prvd = vsubq_s16(prev, curr);
3614+      int16x8_t nxtd = vsubq_s16(next, curr);
3615+      int16x8_t even = vaddq_s16(curs, prvd);
3616+      int16x8_t odd  = vaddq_s16(curs, nxtd);
3617+
3618+      // undo scaling and round, then store with even/odd phases interleaved
3619+      uint8x8x2_t o;
3620+      o.val[0] = vqrshrun_n_s16(even, 4);
3621+      o.val[1] = vqrshrun_n_s16(odd,  4);
3622+      vst2_u8(out + i*2, o);
3623+#endif
3624+
3625+      // "previous" value for next iter
3626+      t1 = 3*in_near[i+7] + in_far[i+7];
3627+   }
3628+
3629+   t0 = t1;
3630+   t1 = 3*in_near[i] + in_far[i];
3631+   out[i*2] = stbi__div16(3*t1 + t0 + 8);
3632+
3633+   for (++i; i < w; ++i) {
3634+      t0 = t1;
3635+      t1 = 3*in_near[i]+in_far[i];
3636+      out[i*2-1] = stbi__div16(3*t0 + t1 + 8);
3637+      out[i*2  ] = stbi__div16(3*t1 + t0 + 8);
3638+   }
3639+   out[w*2-1] = stbi__div4(t1+2);
3640+
3641+   STBI_NOTUSED(hs);
3642+
3643+   return out;
3644+}
3645+#endif
3646+
3647+static stbi_uc *stbi__resample_row_generic(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3648+{
3649+   // resample with nearest-neighbor
3650+   int i,j;
3651+   STBI_NOTUSED(in_far);
3652+   for (i=0; i < w; ++i)
3653+      for (j=0; j < hs; ++j)
3654+         out[i*hs+j] = in_near[i];
3655+   return out;
3656+}
3657+
3658+// this is a reduced-precision calculation of YCbCr-to-RGB introduced
3659+// to make sure the code produces the same results in both SIMD and scalar
3660+#define stbi__float2fixed(x)  (((int) ((x) * 4096.0f + 0.5f)) << 8)
3661+static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step)
3662+{
3663+   int i;
3664+   for (i=0; i < count; ++i) {
3665+      int y_fixed = (y[i] << 20) + (1<<19); // rounding
3666+      int r,g,b;
3667+      int cr = pcr[i] - 128;
3668+      int cb = pcb[i] - 128;
3669+      r = y_fixed +  cr* stbi__float2fixed(1.40200f);
3670+      g = y_fixed + (cr*-stbi__float2fixed(0.71414f)) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000);
3671+      b = y_fixed                                     +   cb* stbi__float2fixed(1.77200f);
3672+      r >>= 20;
3673+      g >>= 20;
3674+      b >>= 20;
3675+      if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
3676+      if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
3677+      if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
3678+      out[0] = (stbi_uc)r;
3679+      out[1] = (stbi_uc)g;
3680+      out[2] = (stbi_uc)b;
3681+      out[3] = 255;
3682+      out += step;
3683+   }
3684+}
3685+
3686+#if defined(STBI_SSE2) || defined(STBI_NEON)
3687+static void stbi__YCbCr_to_RGB_simd(stbi_uc *out, stbi_uc const *y, stbi_uc const *pcb, stbi_uc const *pcr, int count, int step)
3688+{
3689+   int i = 0;
3690+
3691+#ifdef STBI_SSE2
3692+   // step == 3 is pretty ugly on the final interleave, and i'm not convinced
3693+   // it's useful in practice (you wouldn't use it for textures, for example).
3694+   // so just accelerate step == 4 case.
3695+   if (step == 4) {
3696+      // this is a fairly straightforward implementation and not super-optimized.
3697+      __m128i signflip  = _mm_set1_epi8(-0x80);
3698+      __m128i cr_const0 = _mm_set1_epi16(   (short) ( 1.40200f*4096.0f+0.5f));
3699+      __m128i cr_const1 = _mm_set1_epi16( - (short) ( 0.71414f*4096.0f+0.5f));
3700+      __m128i cb_const0 = _mm_set1_epi16( - (short) ( 0.34414f*4096.0f+0.5f));
3701+      __m128i cb_const1 = _mm_set1_epi16(   (short) ( 1.77200f*4096.0f+0.5f));
3702+      __m128i y_bias = _mm_set1_epi8((char) (unsigned char) 128);
3703+      __m128i xw = _mm_set1_epi16(255); // alpha channel
3704+
3705+      for (; i+7 < count; i += 8) {
3706+         // load
3707+         __m128i y_bytes = _mm_loadl_epi64((__m128i *) (y+i));
3708+         __m128i cr_bytes = _mm_loadl_epi64((__m128i *) (pcr+i));
3709+         __m128i cb_bytes = _mm_loadl_epi64((__m128i *) (pcb+i));
3710+         __m128i cr_biased = _mm_xor_si128(cr_bytes, signflip); // -128
3711+         __m128i cb_biased = _mm_xor_si128(cb_bytes, signflip); // -128
3712+
3713+         // unpack to short (and left-shift cr, cb by 8)
3714+         __m128i yw  = _mm_unpacklo_epi8(y_bias, y_bytes);
3715+         __m128i crw = _mm_unpacklo_epi8(_mm_setzero_si128(), cr_biased);
3716+         __m128i cbw = _mm_unpacklo_epi8(_mm_setzero_si128(), cb_biased);
3717+
3718+         // color transform
3719+         __m128i yws = _mm_srli_epi16(yw, 4);
3720+         __m128i cr0 = _mm_mulhi_epi16(cr_const0, crw);
3721+         __m128i cb0 = _mm_mulhi_epi16(cb_const0, cbw);
3722+         __m128i cb1 = _mm_mulhi_epi16(cbw, cb_const1);
3723+         __m128i cr1 = _mm_mulhi_epi16(crw, cr_const1);
3724+         __m128i rws = _mm_add_epi16(cr0, yws);
3725+         __m128i gwt = _mm_add_epi16(cb0, yws);
3726+         __m128i bws = _mm_add_epi16(yws, cb1);
3727+         __m128i gws = _mm_add_epi16(gwt, cr1);
3728+
3729+         // descale
3730+         __m128i rw = _mm_srai_epi16(rws, 4);
3731+         __m128i bw = _mm_srai_epi16(bws, 4);
3732+         __m128i gw = _mm_srai_epi16(gws, 4);
3733+
3734+         // back to byte, set up for transpose
3735+         __m128i brb = _mm_packus_epi16(rw, bw);
3736+         __m128i gxb = _mm_packus_epi16(gw, xw);
3737+
3738+         // transpose to interleave channels
3739+         __m128i t0 = _mm_unpacklo_epi8(brb, gxb);
3740+         __m128i t1 = _mm_unpackhi_epi8(brb, gxb);
3741+         __m128i o0 = _mm_unpacklo_epi16(t0, t1);
3742+         __m128i o1 = _mm_unpackhi_epi16(t0, t1);
3743+
3744+         // store
3745+         _mm_storeu_si128((__m128i *) (out + 0), o0);
3746+         _mm_storeu_si128((__m128i *) (out + 16), o1);
3747+         out += 32;
3748+      }
3749+   }
3750+#endif
3751+
3752+#ifdef STBI_NEON
3753+   // in this version, step=3 support would be easy to add. but is there demand?
3754+   if (step == 4) {
3755+      // this is a fairly straightforward implementation and not super-optimized.
3756+      uint8x8_t signflip = vdup_n_u8(0x80);
3757+      int16x8_t cr_const0 = vdupq_n_s16(   (short) ( 1.40200f*4096.0f+0.5f));
3758+      int16x8_t cr_const1 = vdupq_n_s16( - (short) ( 0.71414f*4096.0f+0.5f));
3759+      int16x8_t cb_const0 = vdupq_n_s16( - (short) ( 0.34414f*4096.0f+0.5f));
3760+      int16x8_t cb_const1 = vdupq_n_s16(   (short) ( 1.77200f*4096.0f+0.5f));
3761+
3762+      for (; i+7 < count; i += 8) {
3763+         // load
3764+         uint8x8_t y_bytes  = vld1_u8(y + i);
3765+         uint8x8_t cr_bytes = vld1_u8(pcr + i);
3766+         uint8x8_t cb_bytes = vld1_u8(pcb + i);
3767+         int8x8_t cr_biased = vreinterpret_s8_u8(vsub_u8(cr_bytes, signflip));
3768+         int8x8_t cb_biased = vreinterpret_s8_u8(vsub_u8(cb_bytes, signflip));
3769+
3770+         // expand to s16
3771+         int16x8_t yws = vreinterpretq_s16_u16(vshll_n_u8(y_bytes, 4));
3772+         int16x8_t crw = vshll_n_s8(cr_biased, 7);
3773+         int16x8_t cbw = vshll_n_s8(cb_biased, 7);
3774+
3775+         // color transform
3776+         int16x8_t cr0 = vqdmulhq_s16(crw, cr_const0);
3777+         int16x8_t cb0 = vqdmulhq_s16(cbw, cb_const0);
3778+         int16x8_t cr1 = vqdmulhq_s16(crw, cr_const1);
3779+         int16x8_t cb1 = vqdmulhq_s16(cbw, cb_const1);
3780+         int16x8_t rws = vaddq_s16(yws, cr0);
3781+         int16x8_t gws = vaddq_s16(vaddq_s16(yws, cb0), cr1);
3782+         int16x8_t bws = vaddq_s16(yws, cb1);
3783+
3784+         // undo scaling, round, convert to byte
3785+         uint8x8x4_t o;
3786+         o.val[0] = vqrshrun_n_s16(rws, 4);
3787+         o.val[1] = vqrshrun_n_s16(gws, 4);
3788+         o.val[2] = vqrshrun_n_s16(bws, 4);
3789+         o.val[3] = vdup_n_u8(255);
3790+
3791+         // store, interleaving r/g/b/a
3792+         vst4_u8(out, o);
3793+         out += 8*4;
3794+      }
3795+   }
3796+#endif
3797+
3798+   for (; i < count; ++i) {
3799+      int y_fixed = (y[i] << 20) + (1<<19); // rounding
3800+      int r,g,b;
3801+      int cr = pcr[i] - 128;
3802+      int cb = pcb[i] - 128;
3803+      r = y_fixed + cr* stbi__float2fixed(1.40200f);
3804+      g = y_fixed + cr*-stbi__float2fixed(0.71414f) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000);
3805+      b = y_fixed                                   +   cb* stbi__float2fixed(1.77200f);
3806+      r >>= 20;
3807+      g >>= 20;
3808+      b >>= 20;
3809+      if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
3810+      if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
3811+      if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
3812+      out[0] = (stbi_uc)r;
3813+      out[1] = (stbi_uc)g;
3814+      out[2] = (stbi_uc)b;
3815+      out[3] = 255;
3816+      out += step;
3817+   }
3818+}
3819+#endif
3820+
3821+// set up the kernels
3822+static void stbi__setup_jpeg(stbi__jpeg *j)
3823+{
3824+   j->idct_block_kernel = stbi__idct_block;
3825+   j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_row;
3826+   j->resample_row_hv_2_kernel = stbi__resample_row_hv_2;
3827+
3828+#ifdef STBI_SSE2
3829+   if (stbi__sse2_available()) {
3830+      j->idct_block_kernel = stbi__idct_simd;
3831+      j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
3832+      j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
3833+   }
3834+#endif
3835+
3836+#ifdef STBI_NEON
3837+   j->idct_block_kernel = stbi__idct_simd;
3838+   j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
3839+   j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
3840+#endif
3841+}
3842+
3843+// clean up the temporary component buffers
3844+static void stbi__cleanup_jpeg(stbi__jpeg *j)
3845+{
3846+   stbi__free_jpeg_components(j, j->s->img_n, 0);
3847+}
3848+
3849+typedef struct
3850+{
3851+   resample_row_func resample;
3852+   stbi_uc *line0,*line1;
3853+   int hs,vs;   // expansion factor in each axis
3854+   int w_lores; // horizontal pixels pre-expansion
3855+   int ystep;   // how far through vertical expansion we are
3856+   int ypos;    // which pre-expansion row we're on
3857+} stbi__resample;
3858+
3859+// fast 0..255 * 0..255 => 0..255 rounded multiplication
3860+static stbi_uc stbi__blinn_8x8(stbi_uc x, stbi_uc y)
3861+{
3862+   unsigned int t = x*y + 128;
3863+   return (stbi_uc) ((t + (t >>8)) >> 8);
3864+}
3865+
3866+static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp, int req_comp)
3867+{
3868+   int n, decode_n, is_rgb;
3869+   z->s->img_n = 0; // make stbi__cleanup_jpeg safe
3870+
3871+   // validate req_comp
3872+   if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
3873+
3874+   // load a jpeg image from whichever source, but leave in YCbCr format
3875+   if (!stbi__decode_jpeg_image(z)) { stbi__cleanup_jpeg(z); return NULL; }
3876+
3877+   // determine actual number of components to generate
3878+   n = req_comp ? req_comp : z->s->img_n >= 3 ? 3 : 1;
3879+
3880+   is_rgb = z->s->img_n == 3 && (z->rgb == 3 || (z->app14_color_transform == 0 && !z->jfif));
3881+
3882+   if (z->s->img_n == 3 && n < 3 && !is_rgb)
3883+      decode_n = 1;
3884+   else
3885+      decode_n = z->s->img_n;
3886+
3887+   // nothing to do if no components requested; check this now to avoid
3888+   // accessing uninitialized coutput[0] later
3889+   if (decode_n <= 0) { stbi__cleanup_jpeg(z); return NULL; }
3890+
3891+   // resample and color-convert
3892+   {
3893+      int k;
3894+      unsigned int i,j;
3895+      stbi_uc *output;
3896+      stbi_uc *coutput[4] = { NULL, NULL, NULL, NULL };
3897+
3898+      stbi__resample res_comp[4];
3899+
3900+      for (k=0; k < decode_n; ++k) {
3901+         stbi__resample *r = &res_comp[k];
3902+
3903+         // allocate line buffer big enough for upsampling off the edges
3904+         // with upsample factor of 4
3905+         z->img_comp[k].linebuf = (stbi_uc *) stbi__malloc(z->s->img_x + 3);
3906+         if (!z->img_comp[k].linebuf) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }
3907+
3908+         r->hs      = z->img_h_max / z->img_comp[k].h;
3909+         r->vs      = z->img_v_max / z->img_comp[k].v;
3910+         r->ystep   = r->vs >> 1;
3911+         r->w_lores = (z->s->img_x + r->hs-1) / r->hs;
3912+         r->ypos    = 0;
3913+         r->line0   = r->line1 = z->img_comp[k].data;
3914+
3915+         if      (r->hs == 1 && r->vs == 1) r->resample = resample_row_1;
3916+         else if (r->hs == 1 && r->vs == 2) r->resample = stbi__resample_row_v_2;
3917+         else if (r->hs == 2 && r->vs == 1) r->resample = stbi__resample_row_h_2;
3918+         else if (r->hs == 2 && r->vs == 2) r->resample = z->resample_row_hv_2_kernel;
3919+         else                               r->resample = stbi__resample_row_generic;
3920+      }
3921+
3922+      // can't error after this so, this is safe
3923+      output = (stbi_uc *) stbi__malloc_mad3(n, z->s->img_x, z->s->img_y, 1);
3924+      if (!output) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }
3925+
3926+      // now go ahead and resample
3927+      for (j=0; j < z->s->img_y; ++j) {
3928+         stbi_uc *out = output + n * z->s->img_x * j;
3929+         for (k=0; k < decode_n; ++k) {
3930+            stbi__resample *r = &res_comp[k];
3931+            int y_bot = r->ystep >= (r->vs >> 1);
3932+            coutput[k] = r->resample(z->img_comp[k].linebuf,
3933+                                     y_bot ? r->line1 : r->line0,
3934+                                     y_bot ? r->line0 : r->line1,
3935+                                     r->w_lores, r->hs);
3936+            if (++r->ystep >= r->vs) {
3937+               r->ystep = 0;
3938+               r->line0 = r->line1;
3939+               if (++r->ypos < z->img_comp[k].y)
3940+                  r->line1 += z->img_comp[k].w2;
3941+            }
3942+         }
3943+         if (n >= 3) {
3944+            stbi_uc *y = coutput[0];
3945+            if (z->s->img_n == 3) {
3946+               if (is_rgb) {
3947+                  for (i=0; i < z->s->img_x; ++i) {
3948+                     out[0] = y[i];
3949+                     out[1] = coutput[1][i];
3950+                     out[2] = coutput[2][i];
3951+                     out[3] = 255;
3952+                     out += n;
3953+                  }
3954+               } else {
3955+                  z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
3956+               }
3957+            } else if (z->s->img_n == 4) {
3958+               if (z->app14_color_transform == 0) { // CMYK
3959+                  for (i=0; i < z->s->img_x; ++i) {
3960+                     stbi_uc m = coutput[3][i];
3961+                     out[0] = stbi__blinn_8x8(coutput[0][i], m);
3962+                     out[1] = stbi__blinn_8x8(coutput[1][i], m);
3963+                     out[2] = stbi__blinn_8x8(coutput[2][i], m);
3964+                     out[3] = 255;
3965+                     out += n;
3966+                  }
3967+               } else if (z->app14_color_transform == 2) { // YCCK
3968+                  z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
3969+                  for (i=0; i < z->s->img_x; ++i) {
3970+                     stbi_uc m = coutput[3][i];
3971+                     out[0] = stbi__blinn_8x8(255 - out[0], m);
3972+                     out[1] = stbi__blinn_8x8(255 - out[1], m);
3973+                     out[2] = stbi__blinn_8x8(255 - out[2], m);
3974+                     out += n;
3975+                  }
3976+               } else { // YCbCr + alpha?  Ignore the fourth channel for now
3977+                  z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
3978+               }
3979+            } else
3980+               for (i=0; i < z->s->img_x; ++i) {
3981+                  out[0] = out[1] = out[2] = y[i];
3982+                  out[3] = 255; // not used if n==3
3983+                  out += n;
3984+               }
3985+         } else {
3986+            if (is_rgb) {
3987+               if (n == 1)
3988+                  for (i=0; i < z->s->img_x; ++i)
3989+                     *out++ = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);
3990+               else {
3991+                  for (i=0; i < z->s->img_x; ++i, out += 2) {
3992+                     out[0] = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);
3993+                     out[1] = 255;
3994+                  }
3995+               }
3996+            } else if (z->s->img_n == 4 && z->app14_color_transform == 0) {
3997+               for (i=0; i < z->s->img_x; ++i) {
3998+                  stbi_uc m = coutput[3][i];
3999+                  stbi_uc r = stbi__blinn_8x8(coutput[0][i], m);
4000+                  stbi_uc g = stbi__blinn_8x8(coutput[1][i], m);
4001+                  stbi_uc b = stbi__blinn_8x8(coutput[2][i], m);
4002+                  out[0] = stbi__compute_y(r, g, b);
4003+                  out[1] = 255;
4004+                  out += n;
4005+               }
4006+            } else if (z->s->img_n == 4 && z->app14_color_transform == 2) {
4007+               for (i=0; i < z->s->img_x; ++i) {
4008+                  out[0] = stbi__blinn_8x8(255 - coutput[0][i], coutput[3][i]);
4009+                  out[1] = 255;
4010+                  out += n;
4011+               }
4012+            } else {
4013+               stbi_uc *y = coutput[0];
4014+               if (n == 1)
4015+                  for (i=0; i < z->s->img_x; ++i) out[i] = y[i];
4016+               else
4017+                  for (i=0; i < z->s->img_x; ++i) { *out++ = y[i]; *out++ = 255; }
4018+            }
4019+         }
4020+      }
4021+      stbi__cleanup_jpeg(z);
4022+      *out_x = z->s->img_x;
4023+      *out_y = z->s->img_y;
4024+      if (comp) *comp = z->s->img_n >= 3 ? 3 : 1; // report original components, not output
4025+      return output;
4026+   }
4027+}
4028+
4029+static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
4030+{
4031+   unsigned char* result;
4032+   stbi__jpeg* j = (stbi__jpeg*) stbi__malloc(sizeof(stbi__jpeg));
4033+   if (!j) return stbi__errpuc("outofmem", "Out of memory");
4034+   memset(j, 0, sizeof(stbi__jpeg));
4035+   STBI_NOTUSED(ri);
4036+   j->s = s;
4037+   stbi__setup_jpeg(j);
4038+   result = load_jpeg_image(j, x,y,comp,req_comp);
4039+   STBI_FREE(j);
4040+   return result;
4041+}
4042+
4043+static int stbi__jpeg_test(stbi__context *s)
4044+{
4045+   int r;
4046+   stbi__jpeg* j = (stbi__jpeg*)stbi__malloc(sizeof(stbi__jpeg));
4047+   if (!j) return stbi__err("outofmem", "Out of memory");
4048+   memset(j, 0, sizeof(stbi__jpeg));
4049+   j->s = s;
4050+   stbi__setup_jpeg(j);
4051+   r = stbi__decode_jpeg_header(j, STBI__SCAN_type);
4052+   stbi__rewind(s);
4053+   STBI_FREE(j);
4054+   return r;
4055+}
4056+
4057+static int stbi__jpeg_info_raw(stbi__jpeg *j, int *x, int *y, int *comp)
4058+{
4059+   if (!stbi__decode_jpeg_header(j, STBI__SCAN_header)) {
4060+      stbi__rewind( j->s );
4061+      return 0;
4062+   }
4063+   if (x) *x = j->s->img_x;
4064+   if (y) *y = j->s->img_y;
4065+   if (comp) *comp = j->s->img_n >= 3 ? 3 : 1;
4066+   return 1;
4067+}
4068+
4069+static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp)
4070+{
4071+   int result;
4072+   stbi__jpeg* j = (stbi__jpeg*) (stbi__malloc(sizeof(stbi__jpeg)));
4073+   if (!j) return stbi__err("outofmem", "Out of memory");
4074+   memset(j, 0, sizeof(stbi__jpeg));
4075+   j->s = s;
4076+   result = stbi__jpeg_info_raw(j, x, y, comp);
4077+   STBI_FREE(j);
4078+   return result;
4079+}
4080+#endif
4081+
4082+// public domain zlib decode    v0.2  Sean Barrett 2006-11-18
4083+//    simple implementation
4084+//      - all input must be provided in an upfront buffer
4085+//      - all output is written to a single output buffer (can malloc/realloc)
4086+//    performance
4087+//      - fast huffman
4088+
4089+#ifndef STBI_NO_ZLIB
4090+
4091+// fast-way is faster to check than jpeg huffman, but slow way is slower
4092+#define STBI__ZFAST_BITS  9 // accelerate all cases in default tables
4093+#define STBI__ZFAST_MASK  ((1 << STBI__ZFAST_BITS) - 1)
4094+#define STBI__ZNSYMS 288 // number of symbols in literal/length alphabet
4095+
4096+// zlib-style huffman encoding
4097+// (jpegs packs from left, zlib from right, so can't share code)
4098+typedef struct
4099+{
4100+   stbi__uint16 fast[1 << STBI__ZFAST_BITS];
4101+   stbi__uint16 firstcode[16];
4102+   int maxcode[17];
4103+   stbi__uint16 firstsymbol[16];
4104+   stbi_uc  size[STBI__ZNSYMS];
4105+   stbi__uint16 value[STBI__ZNSYMS];
4106+} stbi__zhuffman;
4107+
4108+stbi_inline static int stbi__bitreverse16(int n)
4109+{
4110+  n = ((n & 0xAAAA) >>  1) | ((n & 0x5555) << 1);
4111+  n = ((n & 0xCCCC) >>  2) | ((n & 0x3333) << 2);
4112+  n = ((n & 0xF0F0) >>  4) | ((n & 0x0F0F) << 4);
4113+  n = ((n & 0xFF00) >>  8) | ((n & 0x00FF) << 8);
4114+  return n;
4115+}
4116+
4117+stbi_inline static int stbi__bit_reverse(int v, int bits)
4118+{
4119+   STBI_ASSERT(bits <= 16);
4120+   // to bit reverse n bits, reverse 16 and shift
4121+   // e.g. 11 bits, bit reverse and shift away 5
4122+   return stbi__bitreverse16(v) >> (16-bits);
4123+}
4124+
4125+static int stbi__zbuild_huffman(stbi__zhuffman *z, const stbi_uc *sizelist, int num)
4126+{
4127+   int i,k=0;
4128+   int code, next_code[16], sizes[17];
4129+
4130+   // DEFLATE spec for generating codes
4131+   memset(sizes, 0, sizeof(sizes));
4132+   memset(z->fast, 0, sizeof(z->fast));
4133+   for (i=0; i < num; ++i)
4134+      ++sizes[sizelist[i]];
4135+   sizes[0] = 0;
4136+   for (i=1; i < 16; ++i)
4137+      if (sizes[i] > (1 << i))
4138+         return stbi__err("bad sizes", "Corrupt PNG");
4139+   code = 0;
4140+   for (i=1; i < 16; ++i) {
4141+      next_code[i] = code;
4142+      z->firstcode[i] = (stbi__uint16) code;
4143+      z->firstsymbol[i] = (stbi__uint16) k;
4144+      code = (code + sizes[i]);
4145+      if (sizes[i])
4146+         if (code-1 >= (1 << i)) return stbi__err("bad codelengths","Corrupt PNG");
4147+      z->maxcode[i] = code << (16-i); // preshift for inner loop
4148+      code <<= 1;
4149+      k += sizes[i];
4150+   }
4151+   z->maxcode[16] = 0x10000; // sentinel
4152+   for (i=0; i < num; ++i) {
4153+      int s = sizelist[i];
4154+      if (s) {
4155+         int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s];
4156+         stbi__uint16 fastv = (stbi__uint16) ((s << 9) | i);
4157+         z->size [c] = (stbi_uc     ) s;
4158+         z->value[c] = (stbi__uint16) i;
4159+         if (s <= STBI__ZFAST_BITS) {
4160+            int j = stbi__bit_reverse(next_code[s],s);
4161+            while (j < (1 << STBI__ZFAST_BITS)) {
4162+               z->fast[j] = fastv;
4163+               j += (1 << s);
4164+            }
4165+         }
4166+         ++next_code[s];
4167+      }
4168+   }
4169+   return 1;
4170+}
4171+
4172+// zlib-from-memory implementation for PNG reading
4173+//    because PNG allows splitting the zlib stream arbitrarily,
4174+//    and it's annoying structurally to have PNG call ZLIB call PNG,
4175+//    we require PNG read all the IDATs and combine them into a single
4176+//    memory buffer
4177+
4178+typedef struct
4179+{
4180+   stbi_uc *zbuffer, *zbuffer_end;
4181+   int num_bits;
4182+   int hit_zeof_once;
4183+   stbi__uint32 code_buffer;
4184+
4185+   char *zout;
4186+   char *zout_start;
4187+   char *zout_end;
4188+   int   z_expandable;
4189+
4190+   stbi__zhuffman z_length, z_distance;
4191+} stbi__zbuf;
4192+
4193+stbi_inline static int stbi__zeof(stbi__zbuf *z)
4194+{
4195+   return (z->zbuffer >= z->zbuffer_end);
4196+}
4197+
4198+stbi_inline static stbi_uc stbi__zget8(stbi__zbuf *z)
4199+{
4200+   return stbi__zeof(z) ? 0 : *z->zbuffer++;
4201+}
4202+
4203+static void stbi__fill_bits(stbi__zbuf *z)
4204+{
4205+   do {
4206+      if (z->code_buffer >= (1U << z->num_bits)) {
4207+        z->zbuffer = z->zbuffer_end;  /* treat this as EOF so we fail. */
4208+        return;
4209+      }
4210+      z->code_buffer |= (unsigned int) stbi__zget8(z) << z->num_bits;
4211+      z->num_bits += 8;
4212+   } while (z->num_bits <= 24);
4213+}
4214+
4215+stbi_inline static unsigned int stbi__zreceive(stbi__zbuf *z, int n)
4216+{
4217+   unsigned int k;
4218+   if (z->num_bits < n) stbi__fill_bits(z);
4219+   k = z->code_buffer & ((1 << n) - 1);
4220+   z->code_buffer >>= n;
4221+   z->num_bits -= n;
4222+   return k;
4223+}
4224+
4225+static int stbi__zhuffman_decode_slowpath(stbi__zbuf *a, stbi__zhuffman *z)
4226+{
4227+   int b,s,k;
4228+   // not resolved by fast table, so compute it the slow way
4229+   // use jpeg approach, which requires MSbits at top
4230+   k = stbi__bit_reverse(a->code_buffer, 16);
4231+   for (s=STBI__ZFAST_BITS+1; ; ++s)
4232+      if (k < z->maxcode[s])
4233+         break;
4234+   if (s >= 16) return -1; // invalid code!
4235+   // code size is s, so:
4236+   b = (k >> (16-s)) - z->firstcode[s] + z->firstsymbol[s];
4237+   if (b >= STBI__ZNSYMS) return -1; // some data was corrupt somewhere!
4238+   if (z->size[b] != s) return -1;  // was originally an assert, but report failure instead.
4239+   a->code_buffer >>= s;
4240+   a->num_bits -= s;
4241+   return z->value[b];
4242+}
4243+
4244+stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z)
4245+{
4246+   int b,s;
4247+   if (a->num_bits < 16) {
4248+      if (stbi__zeof(a)) {
4249+         if (!a->hit_zeof_once) {
4250+            // This is the first time we hit eof, insert 16 extra padding btis
4251+            // to allow us to keep going; if we actually consume any of them
4252+            // though, that is invalid data. This is caught later.
4253+            a->hit_zeof_once = 1;
4254+            a->num_bits += 16; // add 16 implicit zero bits
4255+         } else {
4256+            // We already inserted our extra 16 padding bits and are again
4257+            // out, this stream is actually prematurely terminated.
4258+            return -1;
4259+         }
4260+      } else {
4261+         stbi__fill_bits(a);
4262+      }
4263+   }
4264+   b = z->fast[a->code_buffer & STBI__ZFAST_MASK];
4265+   if (b) {
4266+      s = b >> 9;
4267+      a->code_buffer >>= s;
4268+      a->num_bits -= s;
4269+      return b & 511;
4270+   }
4271+   return stbi__zhuffman_decode_slowpath(a, z);
4272+}
4273+
4274+static int stbi__zexpand(stbi__zbuf *z, char *zout, int n)  // need to make room for n bytes
4275+{
4276+   char *q;
4277+   unsigned int cur, limit, old_limit;
4278+   z->zout = zout;
4279+   if (!z->z_expandable) return stbi__err("output buffer limit","Corrupt PNG");
4280+   cur   = (unsigned int) (z->zout - z->zout_start);
4281+   limit = old_limit = (unsigned) (z->zout_end - z->zout_start);
4282+   if (UINT_MAX - cur < (unsigned) n) return stbi__err("outofmem", "Out of memory");
4283+   while (cur + n > limit) {
4284+      if(limit > UINT_MAX / 2) return stbi__err("outofmem", "Out of memory");
4285+      limit *= 2;
4286+   }
4287+   q = (char *) STBI_REALLOC_SIZED(z->zout_start, old_limit, limit);
4288+   STBI_NOTUSED(old_limit);
4289+   if (q == NULL) return stbi__err("outofmem", "Out of memory");
4290+   z->zout_start = q;
4291+   z->zout       = q + cur;
4292+   z->zout_end   = q + limit;
4293+   return 1;
4294+}
4295+
4296+static const int stbi__zlength_base[31] = {
4297+   3,4,5,6,7,8,9,10,11,13,
4298+   15,17,19,23,27,31,35,43,51,59,
4299+   67,83,99,115,131,163,195,227,258,0,0 };
4300+
4301+static const int stbi__zlength_extra[31]=
4302+{ 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 };
4303+
4304+static const int stbi__zdist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,
4305+257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0};
4306+
4307+static const int stbi__zdist_extra[32] =
4308+{ 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
4309+
4310+static int stbi__parse_huffman_block(stbi__zbuf *a)
4311+{
4312+   char *zout = a->zout;
4313+   for(;;) {
4314+      int z = stbi__zhuffman_decode(a, &a->z_length);
4315+      if (z < 256) {
4316+         if (z < 0) return stbi__err("bad huffman code","Corrupt PNG"); // error in huffman codes
4317+         if (zout >= a->zout_end) {
4318+            if (!stbi__zexpand(a, zout, 1)) return 0;
4319+            zout = a->zout;
4320+         }
4321+         *zout++ = (char) z;
4322+      } else {
4323+         stbi_uc *p;
4324+         int len,dist;
4325+         if (z == 256) {
4326+            a->zout = zout;
4327+            if (a->hit_zeof_once && a->num_bits < 16) {
4328+               // The first time we hit zeof, we inserted 16 extra zero bits into our bit
4329+               // buffer so the decoder can just do its speculative decoding. But if we
4330+               // actually consumed any of those bits (which is the case when num_bits < 16),
4331+               // the stream actually read past the end so it is malformed.
4332+               return stbi__err("unexpected end","Corrupt PNG");
4333+            }
4334+            return 1;
4335+         }
4336+         if (z >= 286) return stbi__err("bad huffman code","Corrupt PNG"); // per DEFLATE, length codes 286 and 287 must not appear in compressed data
4337+         z -= 257;
4338+         len = stbi__zlength_base[z];
4339+         if (stbi__zlength_extra[z]) len += stbi__zreceive(a, stbi__zlength_extra[z]);
4340+         z = stbi__zhuffman_decode(a, &a->z_distance);
4341+         if (z < 0 || z >= 30) return stbi__err("bad huffman code","Corrupt PNG"); // per DEFLATE, distance codes 30 and 31 must not appear in compressed data
4342+         dist = stbi__zdist_base[z];
4343+         if (stbi__zdist_extra[z]) dist += stbi__zreceive(a, stbi__zdist_extra[z]);
4344+         if (zout - a->zout_start < dist) return stbi__err("bad dist","Corrupt PNG");
4345+         if (len > a->zout_end - zout) {
4346+            if (!stbi__zexpand(a, zout, len)) return 0;
4347+            zout = a->zout;
4348+         }
4349+         p = (stbi_uc *) (zout - dist);
4350+         if (dist == 1) { // run of one byte; common in images.
4351+            stbi_uc v = *p;
4352+            if (len) { do *zout++ = v; while (--len); }
4353+         } else {
4354+            if (len) { do *zout++ = *p++; while (--len); }
4355+         }
4356+      }
4357+   }
4358+}
4359+
4360+static int stbi__compute_huffman_codes(stbi__zbuf *a)
4361+{
4362+   static const stbi_uc length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 };
4363+   stbi__zhuffman z_codelength;
4364+   stbi_uc lencodes[286+32+137];//padding for maximum single op
4365+   stbi_uc codelength_sizes[19];
4366+   int i,n;
4367+
4368+   int hlit  = stbi__zreceive(a,5) + 257;
4369+   int hdist = stbi__zreceive(a,5) + 1;
4370+   int hclen = stbi__zreceive(a,4) + 4;
4371+   int ntot  = hlit + hdist;
4372+
4373+   memset(codelength_sizes, 0, sizeof(codelength_sizes));
4374+   for (i=0; i < hclen; ++i) {
4375+      int s = stbi__zreceive(a,3);
4376+      codelength_sizes[length_dezigzag[i]] = (stbi_uc) s;
4377+   }
4378+   if (!stbi__zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0;
4379+
4380+   n = 0;
4381+   while (n < ntot) {
4382+      int c = stbi__zhuffman_decode(a, &z_codelength);
4383+      if (c < 0 || c >= 19) return stbi__err("bad codelengths", "Corrupt PNG");
4384+      if (c < 16)
4385+         lencodes[n++] = (stbi_uc) c;
4386+      else {
4387+         stbi_uc fill = 0;
4388+         if (c == 16) {
4389+            c = stbi__zreceive(a,2)+3;
4390+            if (n == 0) return stbi__err("bad codelengths", "Corrupt PNG");
4391+            fill = lencodes[n-1];
4392+         } else if (c == 17) {
4393+            c = stbi__zreceive(a,3)+3;
4394+         } else if (c == 18) {
4395+            c = stbi__zreceive(a,7)+11;
4396+         } else {
4397+            return stbi__err("bad codelengths", "Corrupt PNG");
4398+         }
4399+         if (ntot - n < c) return stbi__err("bad codelengths", "Corrupt PNG");
4400+         memset(lencodes+n, fill, c);
4401+         n += c;
4402+      }
4403+   }
4404+   if (n != ntot) return stbi__err("bad codelengths","Corrupt PNG");
4405+   if (!stbi__zbuild_huffman(&a->z_length, lencodes, hlit)) return 0;
4406+   if (!stbi__zbuild_huffman(&a->z_distance, lencodes+hlit, hdist)) return 0;
4407+   return 1;
4408+}
4409+
4410+static int stbi__parse_uncompressed_block(stbi__zbuf *a)
4411+{
4412+   stbi_uc header[4];
4413+   int len,nlen,k;
4414+   if (a->num_bits & 7)
4415+      stbi__zreceive(a, a->num_bits & 7); // discard
4416+   // drain the bit-packed data into header
4417+   k = 0;
4418+   while (a->num_bits > 0) {
4419+      header[k++] = (stbi_uc) (a->code_buffer & 255); // suppress MSVC run-time check
4420+      a->code_buffer >>= 8;
4421+      a->num_bits -= 8;
4422+   }
4423+   if (a->num_bits < 0) return stbi__err("zlib corrupt","Corrupt PNG");
4424+   // now fill header the normal way
4425+   while (k < 4)
4426+      header[k++] = stbi__zget8(a);
4427+   len  = header[1] * 256 + header[0];
4428+   nlen = header[3] * 256 + header[2];
4429+   if (nlen != (len ^ 0xffff)) return stbi__err("zlib corrupt","Corrupt PNG");
4430+   if (a->zbuffer + len > a->zbuffer_end) return stbi__err("read past buffer","Corrupt PNG");
4431+   if (a->zout + len > a->zout_end)
4432+      if (!stbi__zexpand(a, a->zout, len)) return 0;
4433+   memcpy(a->zout, a->zbuffer, len);
4434+   a->zbuffer += len;
4435+   a->zout += len;
4436+   return 1;
4437+}
4438+
4439+static int stbi__parse_zlib_header(stbi__zbuf *a)
4440+{
4441+   int cmf   = stbi__zget8(a);
4442+   int cm    = cmf & 15;
4443+   /* int cinfo = cmf >> 4; */
4444+   int flg   = stbi__zget8(a);
4445+   if (stbi__zeof(a)) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec
4446+   if ((cmf*256+flg) % 31 != 0) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec
4447+   if (flg & 32) return stbi__err("no preset dict","Corrupt PNG"); // preset dictionary not allowed in png
4448+   if (cm != 8) return stbi__err("bad compression","Corrupt PNG"); // DEFLATE required for png
4449+   // window = 1 << (8 + cinfo)... but who cares, we fully buffer output
4450+   return 1;
4451+}
4452+
4453+static const stbi_uc stbi__zdefault_length[STBI__ZNSYMS] =
4454+{
4455+   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
4456+   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
4457+   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
4458+   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
4459+   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
4460+   9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
4461+   9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
4462+   9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
4463+   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8
4464+};
4465+static const stbi_uc stbi__zdefault_distance[32] =
4466+{
4467+   5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5
4468+};
4469+/*
4470+Init algorithm:
4471+{
4472+   int i;   // use <= to match clearly with spec
4473+   for (i=0; i <= 143; ++i)     stbi__zdefault_length[i]   = 8;
4474+   for (   ; i <= 255; ++i)     stbi__zdefault_length[i]   = 9;
4475+   for (   ; i <= 279; ++i)     stbi__zdefault_length[i]   = 7;
4476+   for (   ; i <= 287; ++i)     stbi__zdefault_length[i]   = 8;
4477+
4478+   for (i=0; i <=  31; ++i)     stbi__zdefault_distance[i] = 5;
4479+}
4480+*/
4481+
4482+static int stbi__parse_zlib(stbi__zbuf *a, int parse_header)
4483+{
4484+   int final, type;
4485+   if (parse_header)
4486+      if (!stbi__parse_zlib_header(a)) return 0;
4487+   a->num_bits = 0;
4488+   a->code_buffer = 0;
4489+   a->hit_zeof_once = 0;
4490+   do {
4491+      final = stbi__zreceive(a,1);
4492+      type = stbi__zreceive(a,2);
4493+      if (type == 0) {
4494+         if (!stbi__parse_uncompressed_block(a)) return 0;
4495+      } else if (type == 3) {
4496+         return 0;
4497+      } else {
4498+         if (type == 1) {
4499+            // use fixed code lengths
4500+            if (!stbi__zbuild_huffman(&a->z_length  , stbi__zdefault_length  , STBI__ZNSYMS)) return 0;
4501+            if (!stbi__zbuild_huffman(&a->z_distance, stbi__zdefault_distance,  32)) return 0;
4502+         } else {
4503+            if (!stbi__compute_huffman_codes(a)) return 0;
4504+         }
4505+         if (!stbi__parse_huffman_block(a)) return 0;
4506+      }
4507+   } while (!final);
4508+   return 1;
4509+}
4510+
4511+static int stbi__do_zlib(stbi__zbuf *a, char *obuf, int olen, int exp, int parse_header)
4512+{
4513+   a->zout_start = obuf;
4514+   a->zout       = obuf;
4515+   a->zout_end   = obuf + olen;
4516+   a->z_expandable = exp;
4517+
4518+   return stbi__parse_zlib(a, parse_header);
4519+}
4520+
4521+STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen)
4522+{
4523+   stbi__zbuf a;
4524+   char *p = (char *) stbi__malloc(initial_size);
4525+   if (p == NULL) return NULL;
4526+   a.zbuffer = (stbi_uc *) buffer;
4527+   a.zbuffer_end = (stbi_uc *) buffer + len;
4528+   if (stbi__do_zlib(&a, p, initial_size, 1, 1)) {
4529+      if (outlen) *outlen = (int) (a.zout - a.zout_start);
4530+      return a.zout_start;
4531+   } else {
4532+      STBI_FREE(a.zout_start);
4533+      return NULL;
4534+   }
4535+}
4536+
4537+STBIDEF char *stbi_zlib_decode_malloc(char const *buffer, int len, int *outlen)
4538+{
4539+   return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen);
4540+}
4541+
4542+STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header)
4543+{
4544+   stbi__zbuf a;
4545+   char *p = (char *) stbi__malloc(initial_size);
4546+   if (p == NULL) return NULL;
4547+   a.zbuffer = (stbi_uc *) buffer;
4548+   a.zbuffer_end = (stbi_uc *) buffer + len;
4549+   if (stbi__do_zlib(&a, p, initial_size, 1, parse_header)) {
4550+      if (outlen) *outlen = (int) (a.zout - a.zout_start);
4551+      return a.zout_start;
4552+   } else {
4553+      STBI_FREE(a.zout_start);
4554+      return NULL;
4555+   }
4556+}
4557+
4558+STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, char const *ibuffer, int ilen)
4559+{
4560+   stbi__zbuf a;
4561+   a.zbuffer = (stbi_uc *) ibuffer;
4562+   a.zbuffer_end = (stbi_uc *) ibuffer + ilen;
4563+   if (stbi__do_zlib(&a, obuffer, olen, 0, 1))
4564+      return (int) (a.zout - a.zout_start);
4565+   else
4566+      return -1;
4567+}
4568+
4569+STBIDEF char *stbi_zlib_decode_noheader_malloc(char const *buffer, int len, int *outlen)
4570+{
4571+   stbi__zbuf a;
4572+   char *p = (char *) stbi__malloc(16384);
4573+   if (p == NULL) return NULL;
4574+   a.zbuffer = (stbi_uc *) buffer;
4575+   a.zbuffer_end = (stbi_uc *) buffer+len;
4576+   if (stbi__do_zlib(&a, p, 16384, 1, 0)) {
4577+      if (outlen) *outlen = (int) (a.zout - a.zout_start);
4578+      return a.zout_start;
4579+   } else {
4580+      STBI_FREE(a.zout_start);
4581+      return NULL;
4582+   }
4583+}
4584+
4585+STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen)
4586+{
4587+   stbi__zbuf a;
4588+   a.zbuffer = (stbi_uc *) ibuffer;
4589+   a.zbuffer_end = (stbi_uc *) ibuffer + ilen;
4590+   if (stbi__do_zlib(&a, obuffer, olen, 0, 0))
4591+      return (int) (a.zout - a.zout_start);
4592+   else
4593+      return -1;
4594+}
4595+#endif
4596+
4597+// public domain "baseline" PNG decoder   v0.10  Sean Barrett 2006-11-18
4598+//    simple implementation
4599+//      - only 8-bit samples
4600+//      - no CRC checking
4601+//      - allocates lots of intermediate memory
4602+//        - avoids problem of streaming data between subsystems
4603+//        - avoids explicit window management
4604+//    performance
4605+//      - uses stb_zlib, a PD zlib implementation with fast huffman decoding
4606+
4607+#ifndef STBI_NO_PNG
4608+typedef struct
4609+{
4610+   stbi__uint32 length;
4611+   stbi__uint32 type;
4612+} stbi__pngchunk;
4613+
4614+static stbi__pngchunk stbi__get_chunk_header(stbi__context *s)
4615+{
4616+   stbi__pngchunk c;
4617+   c.length = stbi__get32be(s);
4618+   c.type   = stbi__get32be(s);
4619+   return c;
4620+}
4621+
4622+static int stbi__check_png_header(stbi__context *s)
4623+{
4624+   static const stbi_uc png_sig[8] = { 137,80,78,71,13,10,26,10 };
4625+   int i;
4626+   for (i=0; i < 8; ++i)
4627+      if (stbi__get8(s) != png_sig[i]) return stbi__err("bad png sig","Not a PNG");
4628+   return 1;
4629+}
4630+
4631+typedef struct
4632+{
4633+   stbi__context *s;
4634+   stbi_uc *idata, *expanded, *out;
4635+   int depth;
4636+} stbi__png;
4637+
4638+
4639+enum {
4640+   STBI__F_none=0,
4641+   STBI__F_sub=1,
4642+   STBI__F_up=2,
4643+   STBI__F_avg=3,
4644+   STBI__F_paeth=4,
4645+   // synthetic filter used for first scanline to avoid needing a dummy row of 0s
4646+   STBI__F_avg_first
4647+};
4648+
4649+static stbi_uc first_row_filter[5] =
4650+{
4651+   STBI__F_none,
4652+   STBI__F_sub,
4653+   STBI__F_none,
4654+   STBI__F_avg_first,
4655+   STBI__F_sub // Paeth with b=c=0 turns out to be equivalent to sub
4656+};
4657+
4658+static int stbi__paeth(int a, int b, int c)
4659+{
4660+   // This formulation looks very different from the reference in the PNG spec, but is
4661+   // actually equivalent and has favorable data dependencies and admits straightforward
4662+   // generation of branch-free code, which helps performance significantly.
4663+   int thresh = c*3 - (a + b);
4664+   int lo = a < b ? a : b;
4665+   int hi = a < b ? b : a;
4666+   int t0 = (hi <= thresh) ? lo : c;
4667+   int t1 = (thresh <= lo) ? hi : t0;
4668+   return t1;
4669+}
4670+
4671+static const stbi_uc stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01 };
4672+
4673+// adds an extra all-255 alpha channel
4674+// dest == src is legal
4675+// img_n must be 1 or 3
4676+static void stbi__create_png_alpha_expand8(stbi_uc *dest, stbi_uc *src, stbi__uint32 x, int img_n)
4677+{
4678+   int i;
4679+   // must process data backwards since we allow dest==src
4680+   if (img_n == 1) {
4681+      for (i=x-1; i >= 0; --i) {
4682+         dest[i*2+1] = 255;
4683+         dest[i*2+0] = src[i];
4684+      }
4685+   } else {
4686+      STBI_ASSERT(img_n == 3);
4687+      for (i=x-1; i >= 0; --i) {
4688+         dest[i*4+3] = 255;
4689+         dest[i*4+2] = src[i*3+2];
4690+         dest[i*4+1] = src[i*3+1];
4691+         dest[i*4+0] = src[i*3+0];
4692+      }
4693+   }
4694+}
4695+
4696+// create the png data from post-deflated data
4697+static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 raw_len, int out_n, stbi__uint32 x, stbi__uint32 y, int depth, int color)
4698+{
4699+   int bytes = (depth == 16 ? 2 : 1);
4700+   stbi__context *s = a->s;
4701+   stbi__uint32 i,j,stride = x*out_n*bytes;
4702+   stbi__uint32 img_len, img_width_bytes;
4703+   stbi_uc *filter_buf;
4704+   int all_ok = 1;
4705+   int k;
4706+   int img_n = s->img_n; // copy it into a local for later
4707+
4708+   int output_bytes = out_n*bytes;
4709+   int filter_bytes = img_n*bytes;
4710+   int width = x;
4711+
4712+   STBI_ASSERT(out_n == s->img_n || out_n == s->img_n+1);
4713+   a->out = (stbi_uc *) stbi__malloc_mad3(x, y, output_bytes, 0); // extra bytes to write off the end into
4714+   if (!a->out) return stbi__err("outofmem", "Out of memory");
4715+
4716+   // note: error exits here don't need to clean up a->out individually,
4717+   // stbi__do_png always does on error.
4718+   if (!stbi__mad3sizes_valid(img_n, x, depth, 7)) return stbi__err("too large", "Corrupt PNG");
4719+   img_width_bytes = (((img_n * x * depth) + 7) >> 3);
4720+   if (!stbi__mad2sizes_valid(img_width_bytes, y, img_width_bytes)) return stbi__err("too large", "Corrupt PNG");
4721+   img_len = (img_width_bytes + 1) * y;
4722+
4723+   // we used to check for exact match between raw_len and img_len on non-interlaced PNGs,
4724+   // but issue #276 reported a PNG in the wild that had extra data at the end (all zeros),
4725+   // so just check for raw_len < img_len always.
4726+   if (raw_len < img_len) return stbi__err("not enough pixels","Corrupt PNG");
4727+
4728+   // Allocate two scan lines worth of filter workspace buffer.
4729+   filter_buf = (stbi_uc *) stbi__malloc_mad2(img_width_bytes, 2, 0);
4730+   if (!filter_buf) return stbi__err("outofmem", "Out of memory");
4731+
4732+   // Filtering for low-bit-depth images
4733+   if (depth < 8) {
4734+      filter_bytes = 1;
4735+      width = img_width_bytes;
4736+   }
4737+
4738+   for (j=0; j < y; ++j) {
4739+      // cur/prior filter buffers alternate
4740+      stbi_uc *cur = filter_buf + (j & 1)*img_width_bytes;
4741+      stbi_uc *prior = filter_buf + (~j & 1)*img_width_bytes;
4742+      stbi_uc *dest = a->out + stride*j;
4743+      int nk = width * filter_bytes;
4744+      int filter = *raw++;
4745+
4746+      // check filter type
4747+      if (filter > 4) {
4748+         all_ok = stbi__err("invalid filter","Corrupt PNG");
4749+         break;
4750+      }
4751+
4752+      // if first row, use special filter that doesn't sample previous row
4753+      if (j == 0) filter = first_row_filter[filter];
4754+
4755+      // perform actual filtering
4756+      switch (filter) {
4757+      case STBI__F_none:
4758+         memcpy(cur, raw, nk);
4759+         break;
4760+      case STBI__F_sub:
4761+         memcpy(cur, raw, filter_bytes);
4762+         for (k = filter_bytes; k < nk; ++k)
4763+            cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]);
4764+         break;
4765+      case STBI__F_up:
4766+         for (k = 0; k < nk; ++k)
4767+            cur[k] = STBI__BYTECAST(raw[k] + prior[k]);
4768+         break;
4769+      case STBI__F_avg:
4770+         for (k = 0; k < filter_bytes; ++k)
4771+            cur[k] = STBI__BYTECAST(raw[k] + (prior[k]>>1));
4772+         for (k = filter_bytes; k < nk; ++k)
4773+            cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1));
4774+         break;
4775+      case STBI__F_paeth:
4776+         for (k = 0; k < filter_bytes; ++k)
4777+            cur[k] = STBI__BYTECAST(raw[k] + prior[k]); // prior[k] == stbi__paeth(0,prior[k],0)
4778+         for (k = filter_bytes; k < nk; ++k)
4779+            cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes], prior[k], prior[k-filter_bytes]));
4780+         break;
4781+      case STBI__F_avg_first:
4782+         memcpy(cur, raw, filter_bytes);
4783+         for (k = filter_bytes; k < nk; ++k)
4784+            cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1));
4785+         break;
4786+      }
4787+
4788+      raw += nk;
4789+
4790+      // expand decoded bits in cur to dest, also adding an extra alpha channel if desired
4791+      if (depth < 8) {
4792+         stbi_uc scale = (color == 0) ? stbi__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range
4793+         stbi_uc *in = cur;
4794+         stbi_uc *out = dest;
4795+         stbi_uc inb = 0;
4796+         stbi__uint32 nsmp = x*img_n;
4797+
4798+         // expand bits to bytes first
4799+         if (depth == 4) {
4800+            for (i=0; i < nsmp; ++i) {
4801+               if ((i & 1) == 0) inb = *in++;
4802+               *out++ = scale * (inb >> 4);
4803+               inb <<= 4;
4804+            }
4805+         } else if (depth == 2) {
4806+            for (i=0; i < nsmp; ++i) {
4807+               if ((i & 3) == 0) inb = *in++;
4808+               *out++ = scale * (inb >> 6);
4809+               inb <<= 2;
4810+            }
4811+         } else {
4812+            STBI_ASSERT(depth == 1);
4813+            for (i=0; i < nsmp; ++i) {
4814+               if ((i & 7) == 0) inb = *in++;
4815+               *out++ = scale * (inb >> 7);
4816+               inb <<= 1;
4817+            }
4818+         }
4819+
4820+         // insert alpha=255 values if desired
4821+         if (img_n != out_n)
4822+            stbi__create_png_alpha_expand8(dest, dest, x, img_n);
4823+      } else if (depth == 8) {
4824+         if (img_n == out_n)
4825+            memcpy(dest, cur, x*img_n);
4826+         else
4827+            stbi__create_png_alpha_expand8(dest, cur, x, img_n);
4828+      } else if (depth == 16) {
4829+         // convert the image data from big-endian to platform-native
4830+         stbi__uint16 *dest16 = (stbi__uint16*)dest;
4831+         stbi__uint32 nsmp = x*img_n;
4832+
4833+         if (img_n == out_n) {
4834+            for (i = 0; i < nsmp; ++i, ++dest16, cur += 2)
4835+               *dest16 = (cur[0] << 8) | cur[1];
4836+         } else {
4837+            STBI_ASSERT(img_n+1 == out_n);
4838+            if (img_n == 1) {
4839+               for (i = 0; i < x; ++i, dest16 += 2, cur += 2) {
4840+                  dest16[0] = (cur[0] << 8) | cur[1];
4841+                  dest16[1] = 0xffff;
4842+               }
4843+            } else {
4844+               STBI_ASSERT(img_n == 3);
4845+               for (i = 0; i < x; ++i, dest16 += 4, cur += 6) {
4846+                  dest16[0] = (cur[0] << 8) | cur[1];
4847+                  dest16[1] = (cur[2] << 8) | cur[3];
4848+                  dest16[2] = (cur[4] << 8) | cur[5];
4849+                  dest16[3] = 0xffff;
4850+               }
4851+            }
4852+         }
4853+      }
4854+   }
4855+
4856+   STBI_FREE(filter_buf);
4857+   if (!all_ok) return 0;
4858+
4859+   return 1;
4860+}
4861+
4862+static int stbi__create_png_image(stbi__png *a, stbi_uc *image_data, stbi__uint32 image_data_len, int out_n, int depth, int color, int interlaced)
4863+{
4864+   int bytes = (depth == 16 ? 2 : 1);
4865+   int out_bytes = out_n * bytes;
4866+   stbi_uc *final;
4867+   int p;
4868+   if (!interlaced)
4869+      return stbi__create_png_image_raw(a, image_data, image_data_len, out_n, a->s->img_x, a->s->img_y, depth, color);
4870+
4871+   // de-interlacing
4872+   final = (stbi_uc *) stbi__malloc_mad3(a->s->img_x, a->s->img_y, out_bytes, 0);
4873+   if (!final) return stbi__err("outofmem", "Out of memory");
4874+   for (p=0; p < 7; ++p) {
4875+      int xorig[] = { 0,4,0,2,0,1,0 };
4876+      int yorig[] = { 0,0,4,0,2,0,1 };
4877+      int xspc[]  = { 8,8,4,4,2,2,1 };
4878+      int yspc[]  = { 8,8,8,4,4,2,2 };
4879+      int i,j,x,y;
4880+      // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1
4881+      x = (a->s->img_x - xorig[p] + xspc[p]-1) / xspc[p];
4882+      y = (a->s->img_y - yorig[p] + yspc[p]-1) / yspc[p];
4883+      if (x && y) {
4884+         stbi__uint32 img_len = ((((a->s->img_n * x * depth) + 7) >> 3) + 1) * y;
4885+         if (!stbi__create_png_image_raw(a, image_data, image_data_len, out_n, x, y, depth, color)) {
4886+            STBI_FREE(final);
4887+            return 0;
4888+         }
4889+         for (j=0; j < y; ++j) {
4890+            for (i=0; i < x; ++i) {
4891+               int out_y = j*yspc[p]+yorig[p];
4892+               int out_x = i*xspc[p]+xorig[p];
4893+               memcpy(final + out_y*a->s->img_x*out_bytes + out_x*out_bytes,
4894+                      a->out + (j*x+i)*out_bytes, out_bytes);
4895+            }
4896+         }
4897+         STBI_FREE(a->out);
4898+         image_data += img_len;
4899+         image_data_len -= img_len;
4900+      }
4901+   }
4902+   a->out = final;
4903+
4904+   return 1;
4905+}
4906+
4907+static int stbi__compute_transparency(stbi__png *z, stbi_uc tc[3], int out_n)
4908+{
4909+   stbi__context *s = z->s;
4910+   stbi__uint32 i, pixel_count = s->img_x * s->img_y;
4911+   stbi_uc *p = z->out;
4912+
4913+   // compute color-based transparency, assuming we've
4914+   // already got 255 as the alpha value in the output
4915+   STBI_ASSERT(out_n == 2 || out_n == 4);
4916+
4917+   if (out_n == 2) {
4918+      for (i=0; i < pixel_count; ++i) {
4919+         p[1] = (p[0] == tc[0] ? 0 : 255);
4920+         p += 2;
4921+      }
4922+   } else {
4923+      for (i=0; i < pixel_count; ++i) {
4924+         if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
4925+            p[3] = 0;
4926+         p += 4;
4927+      }
4928+   }
4929+   return 1;
4930+}
4931+
4932+static int stbi__compute_transparency16(stbi__png *z, stbi__uint16 tc[3], int out_n)
4933+{
4934+   stbi__context *s = z->s;
4935+   stbi__uint32 i, pixel_count = s->img_x * s->img_y;
4936+   stbi__uint16 *p = (stbi__uint16*) z->out;
4937+
4938+   // compute color-based transparency, assuming we've
4939+   // already got 65535 as the alpha value in the output
4940+   STBI_ASSERT(out_n == 2 || out_n == 4);
4941+
4942+   if (out_n == 2) {
4943+      for (i = 0; i < pixel_count; ++i) {
4944+         p[1] = (p[0] == tc[0] ? 0 : 65535);
4945+         p += 2;
4946+      }
4947+   } else {
4948+      for (i = 0; i < pixel_count; ++i) {
4949+         if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
4950+            p[3] = 0;
4951+         p += 4;
4952+      }
4953+   }
4954+   return 1;
4955+}
4956+
4957+static int stbi__expand_png_palette(stbi__png *a, stbi_uc *palette, int len, int pal_img_n)
4958+{
4959+   stbi__uint32 i, pixel_count = a->s->img_x * a->s->img_y;
4960+   stbi_uc *p, *temp_out, *orig = a->out;
4961+
4962+   p = (stbi_uc *) stbi__malloc_mad2(pixel_count, pal_img_n, 0);
4963+   if (p == NULL) return stbi__err("outofmem", "Out of memory");
4964+
4965+   // between here and free(out) below, exitting would leak
4966+   temp_out = p;
4967+
4968+   if (pal_img_n == 3) {
4969+      for (i=0; i < pixel_count; ++i) {
4970+         int n = orig[i]*4;
4971+         p[0] = palette[n  ];
4972+         p[1] = palette[n+1];
4973+         p[2] = palette[n+2];
4974+         p += 3;
4975+      }
4976+   } else {
4977+      for (i=0; i < pixel_count; ++i) {
4978+         int n = orig[i]*4;
4979+         p[0] = palette[n  ];
4980+         p[1] = palette[n+1];
4981+         p[2] = palette[n+2];
4982+         p[3] = palette[n+3];
4983+         p += 4;
4984+      }
4985+   }
4986+   STBI_FREE(a->out);
4987+   a->out = temp_out;
4988+
4989+   STBI_NOTUSED(len);
4990+
4991+   return 1;
4992+}
4993+
4994+static int stbi__unpremultiply_on_load_global = 0;
4995+static int stbi__de_iphone_flag_global = 0;
4996+
4997+STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply)
4998+{
4999+   stbi__unpremultiply_on_load_global = flag_true_if_should_unpremultiply;
5000+}
5001+
5002+STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert)
5003+{
5004+   stbi__de_iphone_flag_global = flag_true_if_should_convert;
5005+}
5006+
5007+#ifndef STBI_THREAD_LOCAL
5008+#define stbi__unpremultiply_on_load  stbi__unpremultiply_on_load_global
5009+#define stbi__de_iphone_flag  stbi__de_iphone_flag_global
5010+#else
5011+static STBI_THREAD_LOCAL int stbi__unpremultiply_on_load_local, stbi__unpremultiply_on_load_set;
5012+static STBI_THREAD_LOCAL int stbi__de_iphone_flag_local, stbi__de_iphone_flag_set;
5013+
5014+STBIDEF void stbi_set_unpremultiply_on_load_thread(int flag_true_if_should_unpremultiply)
5015+{
5016+   stbi__unpremultiply_on_load_local = flag_true_if_should_unpremultiply;
5017+   stbi__unpremultiply_on_load_set = 1;
5018+}
5019+
5020+STBIDEF void stbi_convert_iphone_png_to_rgb_thread(int flag_true_if_should_convert)
5021+{
5022+   stbi__de_iphone_flag_local = flag_true_if_should_convert;
5023+   stbi__de_iphone_flag_set = 1;
5024+}
5025+
5026+#define stbi__unpremultiply_on_load  (stbi__unpremultiply_on_load_set           \
5027+                                       ? stbi__unpremultiply_on_load_local      \
5028+                                       : stbi__unpremultiply_on_load_global)
5029+#define stbi__de_iphone_flag  (stbi__de_iphone_flag_set                         \
5030+                                ? stbi__de_iphone_flag_local                    \
5031+                                : stbi__de_iphone_flag_global)
5032+#endif // STBI_THREAD_LOCAL
5033+
5034+static void stbi__de_iphone(stbi__png *z)
5035+{
5036+   stbi__context *s = z->s;
5037+   stbi__uint32 i, pixel_count = s->img_x * s->img_y;
5038+   stbi_uc *p = z->out;
5039+
5040+   if (s->img_out_n == 3) {  // convert bgr to rgb
5041+      for (i=0; i < pixel_count; ++i) {
5042+         stbi_uc t = p[0];
5043+         p[0] = p[2];
5044+         p[2] = t;
5045+         p += 3;
5046+      }
5047+   } else {
5048+      STBI_ASSERT(s->img_out_n == 4);
5049+      if (stbi__unpremultiply_on_load) {
5050+         // convert bgr to rgb and unpremultiply
5051+         for (i=0; i < pixel_count; ++i) {
5052+            stbi_uc a = p[3];
5053+            stbi_uc t = p[0];
5054+            if (a) {
5055+               stbi_uc half = a / 2;
5056+               p[0] = (p[2] * 255 + half) / a;
5057+               p[1] = (p[1] * 255 + half) / a;
5058+               p[2] = ( t   * 255 + half) / a;
5059+            } else {
5060+               p[0] = p[2];
5061+               p[2] = t;
5062+            }
5063+            p += 4;
5064+         }
5065+      } else {
5066+         // convert bgr to rgb
5067+         for (i=0; i < pixel_count; ++i) {
5068+            stbi_uc t = p[0];
5069+            p[0] = p[2];
5070+            p[2] = t;
5071+            p += 4;
5072+         }
5073+      }
5074+   }
5075+}
5076+
5077+#define STBI__PNG_TYPE(a,b,c,d)  (((unsigned) (a) << 24) + ((unsigned) (b) << 16) + ((unsigned) (c) << 8) + (unsigned) (d))
5078+
5079+static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
5080+{
5081+   stbi_uc palette[1024], pal_img_n=0;
5082+   stbi_uc has_trans=0, tc[3]={0};
5083+   stbi__uint16 tc16[3];
5084+   stbi__uint32 ioff=0, idata_limit=0, i, pal_len=0;
5085+   int first=1,k,interlace=0, color=0, is_iphone=0;
5086+   stbi__context *s = z->s;
5087+
5088+   z->expanded = NULL;
5089+   z->idata = NULL;
5090+   z->out = NULL;
5091+
5092+   if (!stbi__check_png_header(s)) return 0;
5093+
5094+   if (scan == STBI__SCAN_type) return 1;
5095+
5096+   for (;;) {
5097+      stbi__pngchunk c = stbi__get_chunk_header(s);
5098+      switch (c.type) {
5099+         case STBI__PNG_TYPE('C','g','B','I'):
5100+            is_iphone = 1;
5101+            stbi__skip(s, c.length);
5102+            break;
5103+         case STBI__PNG_TYPE('I','H','D','R'): {
5104+            int comp,filter;
5105+            if (!first) return stbi__err("multiple IHDR","Corrupt PNG");
5106+            first = 0;
5107+            if (c.length != 13) return stbi__err("bad IHDR len","Corrupt PNG");
5108+            s->img_x = stbi__get32be(s);
5109+            s->img_y = stbi__get32be(s);
5110+            if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)");
5111+            if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)");
5112+            z->depth = stbi__get8(s);  if (z->depth != 1 && z->depth != 2 && z->depth != 4 && z->depth != 8 && z->depth != 16)  return stbi__err("1/2/4/8/16-bit only","PNG not supported: 1/2/4/8/16-bit only");
5113+            color = stbi__get8(s);  if (color > 6)         return stbi__err("bad ctype","Corrupt PNG");
5114+            if (color == 3 && z->depth == 16)                  return stbi__err("bad ctype","Corrupt PNG");
5115+            if (color == 3) pal_img_n = 3; else if (color & 1) return stbi__err("bad ctype","Corrupt PNG");
5116+            comp  = stbi__get8(s);  if (comp) return stbi__err("bad comp method","Corrupt PNG");
5117+            filter= stbi__get8(s);  if (filter) return stbi__err("bad filter method","Corrupt PNG");
5118+            interlace = stbi__get8(s); if (interlace>1) return stbi__err("bad interlace method","Corrupt PNG");
5119+            if (!s->img_x || !s->img_y) return stbi__err("0-pixel image","Corrupt PNG");
5120+            if (!pal_img_n) {
5121+               s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0);
5122+               if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode");
5123+            } else {
5124+               // if paletted, then pal_n is our final components, and
5125+               // img_n is # components to decompress/filter.
5126+               s->img_n = 1;
5127+               if ((1 << 30) / s->img_x / 4 < s->img_y) return stbi__err("too large","Corrupt PNG");
5128+            }
5129+            // even with SCAN_header, have to scan to see if we have a tRNS
5130+            break;
5131+         }
5132+
5133+         case STBI__PNG_TYPE('P','L','T','E'):  {
5134+            if (first) return stbi__err("first not IHDR", "Corrupt PNG");
5135+            if (c.length > 256*3) return stbi__err("invalid PLTE","Corrupt PNG");
5136+            pal_len = c.length / 3;
5137+            if (pal_len * 3 != c.length) return stbi__err("invalid PLTE","Corrupt PNG");
5138+            for (i=0; i < pal_len; ++i) {
5139+               palette[i*4+0] = stbi__get8(s);
5140+               palette[i*4+1] = stbi__get8(s);
5141+               palette[i*4+2] = stbi__get8(s);
5142+               palette[i*4+3] = 255;
5143+            }
5144+            break;
5145+         }
5146+
5147+         case STBI__PNG_TYPE('t','R','N','S'): {
5148+            if (first) return stbi__err("first not IHDR", "Corrupt PNG");
5149+            if (z->idata) return stbi__err("tRNS after IDAT","Corrupt PNG");
5150+            if (pal_img_n) {
5151+               if (scan == STBI__SCAN_header) { s->img_n = 4; return 1; }
5152+               if (pal_len == 0) return stbi__err("tRNS before PLTE","Corrupt PNG");
5153+               if (c.length > pal_len) return stbi__err("bad tRNS len","Corrupt PNG");
5154+               pal_img_n = 4;
5155+               for (i=0; i < c.length; ++i)
5156+                  palette[i*4+3] = stbi__get8(s);
5157+            } else {
5158+               if (!(s->img_n & 1)) return stbi__err("tRNS with alpha","Corrupt PNG");
5159+               if (c.length != (stbi__uint32) s->img_n*2) return stbi__err("bad tRNS len","Corrupt PNG");
5160+               has_trans = 1;
5161+               // non-paletted with tRNS = constant alpha. if header-scanning, we can stop now.
5162+               if (scan == STBI__SCAN_header) { ++s->img_n; return 1; }
5163+               if (z->depth == 16) {
5164+                  for (k = 0; k < s->img_n && k < 3; ++k) // extra loop test to suppress false GCC warning
5165+                     tc16[k] = (stbi__uint16)stbi__get16be(s); // copy the values as-is
5166+               } else {
5167+                  for (k = 0; k < s->img_n && k < 3; ++k)
5168+                     tc[k] = (stbi_uc)(stbi__get16be(s) & 255) * stbi__depth_scale_table[z->depth]; // non 8-bit images will be larger
5169+               }
5170+            }
5171+            break;
5172+         }
5173+
5174+         case STBI__PNG_TYPE('I','D','A','T'): {
5175+            if (first) return stbi__err("first not IHDR", "Corrupt PNG");
5176+            if (pal_img_n && !pal_len) return stbi__err("no PLTE","Corrupt PNG");
5177+            if (scan == STBI__SCAN_header) {
5178+               // header scan definitely stops at first IDAT
5179+               if (pal_img_n)
5180+                  s->img_n = pal_img_n;
5181+               return 1;
5182+            }
5183+            if (c.length > (1u << 30)) return stbi__err("IDAT size limit", "IDAT section larger than 2^30 bytes");
5184+            if ((int)(ioff + c.length) < (int)ioff) return 0;
5185+            if (ioff + c.length > idata_limit) {
5186+               stbi__uint32 idata_limit_old = idata_limit;
5187+               stbi_uc *p;
5188+               if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096;
5189+               while (ioff + c.length > idata_limit)
5190+                  idata_limit *= 2;
5191+               STBI_NOTUSED(idata_limit_old);
5192+               p = (stbi_uc *) STBI_REALLOC_SIZED(z->idata, idata_limit_old, idata_limit); if (p == NULL) return stbi__err("outofmem", "Out of memory");
5193+               z->idata = p;
5194+            }
5195+            if (!stbi__getn(s, z->idata+ioff,c.length)) return stbi__err("outofdata","Corrupt PNG");
5196+            ioff += c.length;
5197+            break;
5198+         }
5199+
5200+         case STBI__PNG_TYPE('I','E','N','D'): {
5201+            stbi__uint32 raw_len, bpl;
5202+            if (first) return stbi__err("first not IHDR", "Corrupt PNG");
5203+            if (scan != STBI__SCAN_load) return 1;
5204+            if (z->idata == NULL) return stbi__err("no IDAT","Corrupt PNG");
5205+            // initial guess for decoded data size to avoid unnecessary reallocs
5206+            bpl = (s->img_x * z->depth + 7) / 8; // bytes per line, per component
5207+            raw_len = bpl * s->img_y * s->img_n /* pixels */ + s->img_y /* filter mode per row */;
5208+            z->expanded = (stbi_uc *) stbi_zlib_decode_malloc_guesssize_headerflag((char *) z->idata, ioff, raw_len, (int *) &raw_len, !is_iphone);
5209+            if (z->expanded == NULL) return 0; // zlib should set error
5210+            STBI_FREE(z->idata); z->idata = NULL;
5211+            if ((req_comp == s->img_n+1 && req_comp != 3 && !pal_img_n) || has_trans)
5212+               s->img_out_n = s->img_n+1;
5213+            else
5214+               s->img_out_n = s->img_n;
5215+            if (!stbi__create_png_image(z, z->expanded, raw_len, s->img_out_n, z->depth, color, interlace)) return 0;
5216+            if (has_trans) {
5217+               if (z->depth == 16) {
5218+                  if (!stbi__compute_transparency16(z, tc16, s->img_out_n)) return 0;
5219+               } else {
5220+                  if (!stbi__compute_transparency(z, tc, s->img_out_n)) return 0;
5221+               }
5222+            }
5223+            if (is_iphone && stbi__de_iphone_flag && s->img_out_n > 2)
5224+               stbi__de_iphone(z);
5225+            if (pal_img_n) {
5226+               // pal_img_n == 3 or 4
5227+               s->img_n = pal_img_n; // record the actual colors we had
5228+               s->img_out_n = pal_img_n;
5229+               if (req_comp >= 3) s->img_out_n = req_comp;
5230+               if (!stbi__expand_png_palette(z, palette, pal_len, s->img_out_n))
5231+                  return 0;
5232+            } else if (has_trans) {
5233+               // non-paletted image with tRNS -> source image has (constant) alpha
5234+               ++s->img_n;
5235+            }
5236+            STBI_FREE(z->expanded); z->expanded = NULL;
5237+            // end of PNG chunk, read and skip CRC
5238+            stbi__get32be(s);
5239+            return 1;
5240+         }
5241+
5242+         default:
5243+            // if critical, fail
5244+            if (first) return stbi__err("first not IHDR", "Corrupt PNG");
5245+            if ((c.type & (1 << 29)) == 0) {
5246+               #ifndef STBI_NO_FAILURE_STRINGS
5247+               // not threadsafe
5248+               static char invalid_chunk[] = "XXXX PNG chunk not known";
5249+               invalid_chunk[0] = STBI__BYTECAST(c.type >> 24);
5250+               invalid_chunk[1] = STBI__BYTECAST(c.type >> 16);
5251+               invalid_chunk[2] = STBI__BYTECAST(c.type >>  8);
5252+               invalid_chunk[3] = STBI__BYTECAST(c.type >>  0);
5253+               #endif
5254+               return stbi__err(invalid_chunk, "PNG not supported: unknown PNG chunk type");
5255+            }
5256+            stbi__skip(s, c.length);
5257+            break;
5258+      }
5259+      // end of PNG chunk, read and skip CRC
5260+      stbi__get32be(s);
5261+   }
5262+}
5263+
5264+static void *stbi__do_png(stbi__png *p, int *x, int *y, int *n, int req_comp, stbi__result_info *ri)
5265+{
5266+   void *result=NULL;
5267+   if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
5268+   if (stbi__parse_png_file(p, STBI__SCAN_load, req_comp)) {
5269+      if (p->depth <= 8)
5270+         ri->bits_per_channel = 8;
5271+      else if (p->depth == 16)
5272+         ri->bits_per_channel = 16;
5273+      else
5274+         return stbi__errpuc("bad bits_per_channel", "PNG not supported: unsupported color depth");
5275+      result = p->out;
5276+      p->out = NULL;
5277+      if (req_comp && req_comp != p->s->img_out_n) {
5278+         if (ri->bits_per_channel == 8)
5279+            result = stbi__convert_format((unsigned char *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
5280+         else
5281+            result = stbi__convert_format16((stbi__uint16 *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
5282+         p->s->img_out_n = req_comp;
5283+         if (result == NULL) return result;
5284+      }
5285+      *x = p->s->img_x;
5286+      *y = p->s->img_y;
5287+      if (n) *n = p->s->img_n;
5288+   }
5289+   STBI_FREE(p->out);      p->out      = NULL;
5290+   STBI_FREE(p->expanded); p->expanded = NULL;
5291+   STBI_FREE(p->idata);    p->idata    = NULL;
5292+
5293+   return result;
5294+}
5295+
5296+static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
5297+{
5298+   stbi__png p;
5299+   p.s = s;
5300+   return stbi__do_png(&p, x,y,comp,req_comp, ri);
5301+}
5302+
5303+static int stbi__png_test(stbi__context *s)
5304+{
5305+   int r;
5306+   r = stbi__check_png_header(s);
5307+   stbi__rewind(s);
5308+   return r;
5309+}
5310+
5311+static int stbi__png_info_raw(stbi__png *p, int *x, int *y, int *comp)
5312+{
5313+   if (!stbi__parse_png_file(p, STBI__SCAN_header, 0)) {
5314+      stbi__rewind( p->s );
5315+      return 0;
5316+   }
5317+   if (x) *x = p->s->img_x;
5318+   if (y) *y = p->s->img_y;
5319+   if (comp) *comp = p->s->img_n;
5320+   return 1;
5321+}
5322+
5323+static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp)
5324+{
5325+   stbi__png p;
5326+   p.s = s;
5327+   return stbi__png_info_raw(&p, x, y, comp);
5328+}
5329+
5330+static int stbi__png_is16(stbi__context *s)
5331+{
5332+   stbi__png p;
5333+   p.s = s;
5334+   if (!stbi__png_info_raw(&p, NULL, NULL, NULL))
5335+	   return 0;
5336+   if (p.depth != 16) {
5337+      stbi__rewind(p.s);
5338+      return 0;
5339+   }
5340+   return 1;
5341+}
5342+#endif
5343+
5344+// Microsoft/Windows BMP image
5345+
5346+#ifndef STBI_NO_BMP
5347+static int stbi__bmp_test_raw(stbi__context *s)
5348+{
5349+   int r;
5350+   int sz;
5351+   if (stbi__get8(s) != 'B') return 0;
5352+   if (stbi__get8(s) != 'M') return 0;
5353+   stbi__get32le(s); // discard filesize
5354+   stbi__get16le(s); // discard reserved
5355+   stbi__get16le(s); // discard reserved
5356+   stbi__get32le(s); // discard data offset
5357+   sz = stbi__get32le(s);
5358+   r = (sz == 12 || sz == 40 || sz == 56 || sz == 108 || sz == 124);
5359+   return r;
5360+}
5361+
5362+static int stbi__bmp_test(stbi__context *s)
5363+{
5364+   int r = stbi__bmp_test_raw(s);
5365+   stbi__rewind(s);
5366+   return r;
5367+}
5368+
5369+
5370+// returns 0..31 for the highest set bit
5371+static int stbi__high_bit(unsigned int z)
5372+{
5373+   int n=0;
5374+   if (z == 0) return -1;
5375+   if (z >= 0x10000) { n += 16; z >>= 16; }
5376+   if (z >= 0x00100) { n +=  8; z >>=  8; }
5377+   if (z >= 0x00010) { n +=  4; z >>=  4; }
5378+   if (z >= 0x00004) { n +=  2; z >>=  2; }
5379+   if (z >= 0x00002) { n +=  1;/* >>=  1;*/ }
5380+   return n;
5381+}
5382+
5383+static int stbi__bitcount(unsigned int a)
5384+{
5385+   a = (a & 0x55555555) + ((a >>  1) & 0x55555555); // max 2
5386+   a = (a & 0x33333333) + ((a >>  2) & 0x33333333); // max 4
5387+   a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits
5388+   a = (a + (a >> 8)); // max 16 per 8 bits
5389+   a = (a + (a >> 16)); // max 32 per 8 bits
5390+   return a & 0xff;
5391+}
5392+
5393+// extract an arbitrarily-aligned N-bit value (N=bits)
5394+// from v, and then make it 8-bits long and fractionally
5395+// extend it to full full range.
5396+static int stbi__shiftsigned(unsigned int v, int shift, int bits)
5397+{
5398+   static unsigned int mul_table[9] = {
5399+      0,
5400+      0xff/*0b11111111*/, 0x55/*0b01010101*/, 0x49/*0b01001001*/, 0x11/*0b00010001*/,
5401+      0x21/*0b00100001*/, 0x41/*0b01000001*/, 0x81/*0b10000001*/, 0x01/*0b00000001*/,
5402+   };
5403+   static unsigned int shift_table[9] = {
5404+      0, 0,0,1,0,2,4,6,0,
5405+   };
5406+   if (shift < 0)
5407+      v <<= -shift;
5408+   else
5409+      v >>= shift;
5410+   STBI_ASSERT(v < 256);
5411+   v >>= (8-bits);
5412+   STBI_ASSERT(bits >= 0 && bits <= 8);
5413+   return (int) ((unsigned) v * mul_table[bits]) >> shift_table[bits];
5414+}
5415+
5416+typedef struct
5417+{
5418+   int bpp, offset, hsz;
5419+   unsigned int mr,mg,mb,ma, all_a;
5420+   int extra_read;
5421+} stbi__bmp_data;
5422+
5423+static int stbi__bmp_set_mask_defaults(stbi__bmp_data *info, int compress)
5424+{
5425+   // BI_BITFIELDS specifies masks explicitly, don't override
5426+   if (compress == 3)
5427+      return 1;
5428+
5429+   if (compress == 0) {
5430+      if (info->bpp == 16) {
5431+         info->mr = 31u << 10;
5432+         info->mg = 31u <<  5;
5433+         info->mb = 31u <<  0;
5434+      } else if (info->bpp == 32) {
5435+         info->mr = 0xffu << 16;
5436+         info->mg = 0xffu <<  8;
5437+         info->mb = 0xffu <<  0;
5438+         info->ma = 0xffu << 24;
5439+         info->all_a = 0; // if all_a is 0 at end, then we loaded alpha channel but it was all 0
5440+      } else {
5441+         // otherwise, use defaults, which is all-0
5442+         info->mr = info->mg = info->mb = info->ma = 0;
5443+      }
5444+      return 1;
5445+   }
5446+   return 0; // error
5447+}
5448+
5449+static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info)
5450+{
5451+   int hsz;
5452+   if (stbi__get8(s) != 'B' || stbi__get8(s) != 'M') return stbi__errpuc("not BMP", "Corrupt BMP");
5453+   stbi__get32le(s); // discard filesize
5454+   stbi__get16le(s); // discard reserved
5455+   stbi__get16le(s); // discard reserved
5456+   info->offset = stbi__get32le(s);
5457+   info->hsz = hsz = stbi__get32le(s);
5458+   info->mr = info->mg = info->mb = info->ma = 0;
5459+   info->extra_read = 14;
5460+
5461+   if (info->offset < 0) return stbi__errpuc("bad BMP", "bad BMP");
5462+
5463+   if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108 && hsz != 124) return stbi__errpuc("unknown BMP", "BMP type not supported: unknown");
5464+   if (hsz == 12) {
5465+      s->img_x = stbi__get16le(s);
5466+      s->img_y = stbi__get16le(s);
5467+   } else {
5468+      s->img_x = stbi__get32le(s);
5469+      s->img_y = stbi__get32le(s);
5470+   }
5471+   if (stbi__get16le(s) != 1) return stbi__errpuc("bad BMP", "bad BMP");
5472+   info->bpp = stbi__get16le(s);
5473+   if (hsz != 12) {
5474+      int compress = stbi__get32le(s);
5475+      if (compress == 1 || compress == 2) return stbi__errpuc("BMP RLE", "BMP type not supported: RLE");
5476+      if (compress >= 4) return stbi__errpuc("BMP JPEG/PNG", "BMP type not supported: unsupported compression"); // this includes PNG/JPEG modes
5477+      if (compress == 3 && info->bpp != 16 && info->bpp != 32) return stbi__errpuc("bad BMP", "bad BMP"); // bitfields requires 16 or 32 bits/pixel
5478+      stbi__get32le(s); // discard sizeof
5479+      stbi__get32le(s); // discard hres
5480+      stbi__get32le(s); // discard vres
5481+      stbi__get32le(s); // discard colorsused
5482+      stbi__get32le(s); // discard max important
5483+      if (hsz == 40 || hsz == 56) {
5484+         if (hsz == 56) {
5485+            stbi__get32le(s);
5486+            stbi__get32le(s);
5487+            stbi__get32le(s);
5488+            stbi__get32le(s);
5489+         }
5490+         if (info->bpp == 16 || info->bpp == 32) {
5491+            if (compress == 0) {
5492+               stbi__bmp_set_mask_defaults(info, compress);
5493+            } else if (compress == 3) {
5494+               info->mr = stbi__get32le(s);
5495+               info->mg = stbi__get32le(s);
5496+               info->mb = stbi__get32le(s);
5497+               info->extra_read += 12;
5498+               // not documented, but generated by photoshop and handled by mspaint
5499+               if (info->mr == info->mg && info->mg == info->mb) {
5500+                  // ?!?!?
5501+                  return stbi__errpuc("bad BMP", "bad BMP");
5502+               }
5503+            } else
5504+               return stbi__errpuc("bad BMP", "bad BMP");
5505+         }
5506+      } else {
5507+         // V4/V5 header
5508+         int i;
5509+         if (hsz != 108 && hsz != 124)
5510+            return stbi__errpuc("bad BMP", "bad BMP");
5511+         info->mr = stbi__get32le(s);
5512+         info->mg = stbi__get32le(s);
5513+         info->mb = stbi__get32le(s);
5514+         info->ma = stbi__get32le(s);
5515+         if (compress != 3) // override mr/mg/mb unless in BI_BITFIELDS mode, as per docs
5516+            stbi__bmp_set_mask_defaults(info, compress);
5517+         stbi__get32le(s); // discard color space
5518+         for (i=0; i < 12; ++i)
5519+            stbi__get32le(s); // discard color space parameters
5520+         if (hsz == 124) {
5521+            stbi__get32le(s); // discard rendering intent
5522+            stbi__get32le(s); // discard offset of profile data
5523+            stbi__get32le(s); // discard size of profile data
5524+            stbi__get32le(s); // discard reserved
5525+         }
5526+      }
5527+   }
5528+   return (void *) 1;
5529+}
5530+
5531+
5532+static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
5533+{
5534+   stbi_uc *out;
5535+   unsigned int mr=0,mg=0,mb=0,ma=0, all_a;
5536+   stbi_uc pal[256][4];
5537+   int psize=0,i,j,width;
5538+   int flip_vertically, pad, target;
5539+   stbi__bmp_data info;
5540+   STBI_NOTUSED(ri);
5541+
5542+   info.all_a = 255;
5543+   if (stbi__bmp_parse_header(s, &info) == NULL)
5544+      return NULL; // error code already set
5545+
5546+   flip_vertically = ((int) s->img_y) > 0;
5547+   s->img_y = abs((int) s->img_y);
5548+
5549+   if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
5550+   if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
5551+
5552+   mr = info.mr;
5553+   mg = info.mg;
5554+   mb = info.mb;
5555+   ma = info.ma;
5556+   all_a = info.all_a;
5557+
5558+   if (info.hsz == 12) {
5559+      if (info.bpp < 24)
5560+         psize = (info.offset - info.extra_read - 24) / 3;
5561+   } else {
5562+      if (info.bpp < 16)
5563+         psize = (info.offset - info.extra_read - info.hsz) >> 2;
5564+   }
5565+   if (psize == 0) {
5566+      // accept some number of extra bytes after the header, but if the offset points either to before
5567+      // the header ends or implies a large amount of extra data, reject the file as malformed
5568+      int bytes_read_so_far = s->callback_already_read + (int)(s->img_buffer - s->img_buffer_original);
5569+      int header_limit = 1024; // max we actually read is below 256 bytes currently.
5570+      int extra_data_limit = 256*4; // what ordinarily goes here is a palette; 256 entries*4 bytes is its max size.
5571+      if (bytes_read_so_far <= 0 || bytes_read_so_far > header_limit) {
5572+         return stbi__errpuc("bad header", "Corrupt BMP");
5573+      }
5574+      // we established that bytes_read_so_far is positive and sensible.
5575+      // the first half of this test rejects offsets that are either too small positives, or
5576+      // negative, and guarantees that info.offset >= bytes_read_so_far > 0. this in turn
5577+      // ensures the number computed in the second half of the test can't overflow.
5578+      if (info.offset < bytes_read_so_far || info.offset - bytes_read_so_far > extra_data_limit) {
5579+         return stbi__errpuc("bad offset", "Corrupt BMP");
5580+      } else {
5581+         stbi__skip(s, info.offset - bytes_read_so_far);
5582+      }
5583+   }
5584+
5585+   if (info.bpp == 24 && ma == 0xff000000)
5586+      s->img_n = 3;
5587+   else
5588+      s->img_n = ma ? 4 : 3;
5589+   if (req_comp && req_comp >= 3) // we can directly decode 3 or 4
5590+      target = req_comp;
5591+   else
5592+      target = s->img_n; // if they want monochrome, we'll post-convert
5593+
5594+   // sanity-check size
5595+   if (!stbi__mad3sizes_valid(target, s->img_x, s->img_y, 0))
5596+      return stbi__errpuc("too large", "Corrupt BMP");
5597+
5598+   out = (stbi_uc *) stbi__malloc_mad3(target, s->img_x, s->img_y, 0);
5599+   if (!out) return stbi__errpuc("outofmem", "Out of memory");
5600+   if (info.bpp < 16) {
5601+      int z=0;
5602+      if (psize == 0 || psize > 256) { STBI_FREE(out); return stbi__errpuc("invalid", "Corrupt BMP"); }
5603+      for (i=0; i < psize; ++i) {
5604+         pal[i][2] = stbi__get8(s);
5605+         pal[i][1] = stbi__get8(s);
5606+         pal[i][0] = stbi__get8(s);
5607+         if (info.hsz != 12) stbi__get8(s);
5608+         pal[i][3] = 255;
5609+      }
5610+      stbi__skip(s, info.offset - info.extra_read - info.hsz - psize * (info.hsz == 12 ? 3 : 4));
5611+      if (info.bpp == 1) width = (s->img_x + 7) >> 3;
5612+      else if (info.bpp == 4) width = (s->img_x + 1) >> 1;
5613+      else if (info.bpp == 8) width = s->img_x;
5614+      else { STBI_FREE(out); return stbi__errpuc("bad bpp", "Corrupt BMP"); }
5615+      pad = (-width)&3;
5616+      if (info.bpp == 1) {
5617+         for (j=0; j < (int) s->img_y; ++j) {
5618+            int bit_offset = 7, v = stbi__get8(s);
5619+            for (i=0; i < (int) s->img_x; ++i) {
5620+               int color = (v>>bit_offset)&0x1;
5621+               out[z++] = pal[color][0];
5622+               out[z++] = pal[color][1];
5623+               out[z++] = pal[color][2];
5624+               if (target == 4) out[z++] = 255;
5625+               if (i+1 == (int) s->img_x) break;
5626+               if((--bit_offset) < 0) {
5627+                  bit_offset = 7;
5628+                  v = stbi__get8(s);
5629+               }
5630+            }
5631+            stbi__skip(s, pad);
5632+         }
5633+      } else {
5634+         for (j=0; j < (int) s->img_y; ++j) {
5635+            for (i=0; i < (int) s->img_x; i += 2) {
5636+               int v=stbi__get8(s),v2=0;
5637+               if (info.bpp == 4) {
5638+                  v2 = v & 15;
5639+                  v >>= 4;
5640+               }
5641+               out[z++] = pal[v][0];
5642+               out[z++] = pal[v][1];
5643+               out[z++] = pal[v][2];
5644+               if (target == 4) out[z++] = 255;
5645+               if (i+1 == (int) s->img_x) break;
5646+               v = (info.bpp == 8) ? stbi__get8(s) : v2;
5647+               out[z++] = pal[v][0];
5648+               out[z++] = pal[v][1];
5649+               out[z++] = pal[v][2];
5650+               if (target == 4) out[z++] = 255;
5651+            }
5652+            stbi__skip(s, pad);
5653+         }
5654+      }
5655+   } else {
5656+      int rshift=0,gshift=0,bshift=0,ashift=0,rcount=0,gcount=0,bcount=0,acount=0;
5657+      int z = 0;
5658+      int easy=0;
5659+      stbi__skip(s, info.offset - info.extra_read - info.hsz);
5660+      if (info.bpp == 24) width = 3 * s->img_x;
5661+      else if (info.bpp == 16) width = 2*s->img_x;
5662+      else /* bpp = 32 and pad = 0 */ width=0;
5663+      pad = (-width) & 3;
5664+      if (info.bpp == 24) {
5665+         easy = 1;
5666+      } else if (info.bpp == 32) {
5667+         if (mb == 0xff && mg == 0xff00 && mr == 0x00ff0000 && ma == 0xff000000)
5668+            easy = 2;
5669+      }
5670+      if (!easy) {
5671+         if (!mr || !mg || !mb) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); }
5672+         // right shift amt to put high bit in position #7
5673+         rshift = stbi__high_bit(mr)-7; rcount = stbi__bitcount(mr);
5674+         gshift = stbi__high_bit(mg)-7; gcount = stbi__bitcount(mg);
5675+         bshift = stbi__high_bit(mb)-7; bcount = stbi__bitcount(mb);
5676+         ashift = stbi__high_bit(ma)-7; acount = stbi__bitcount(ma);
5677+         if (rcount > 8 || gcount > 8 || bcount > 8 || acount > 8) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); }
5678+      }
5679+      for (j=0; j < (int) s->img_y; ++j) {
5680+         if (easy) {
5681+            for (i=0; i < (int) s->img_x; ++i) {
5682+               unsigned char a;
5683+               out[z+2] = stbi__get8(s);
5684+               out[z+1] = stbi__get8(s);
5685+               out[z+0] = stbi__get8(s);
5686+               z += 3;
5687+               a = (easy == 2 ? stbi__get8(s) : 255);
5688+               all_a |= a;
5689+               if (target == 4) out[z++] = a;
5690+            }
5691+         } else {
5692+            int bpp = info.bpp;
5693+            for (i=0; i < (int) s->img_x; ++i) {
5694+               stbi__uint32 v = (bpp == 16 ? (stbi__uint32) stbi__get16le(s) : stbi__get32le(s));
5695+               unsigned int a;
5696+               out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mr, rshift, rcount));
5697+               out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mg, gshift, gcount));
5698+               out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mb, bshift, bcount));
5699+               a = (ma ? stbi__shiftsigned(v & ma, ashift, acount) : 255);
5700+               all_a |= a;
5701+               if (target == 4) out[z++] = STBI__BYTECAST(a);
5702+            }
5703+         }
5704+         stbi__skip(s, pad);
5705+      }
5706+   }
5707+
5708+   // if alpha channel is all 0s, replace with all 255s
5709+   if (target == 4 && all_a == 0)
5710+      for (i=4*s->img_x*s->img_y-1; i >= 0; i -= 4)
5711+         out[i] = 255;
5712+
5713+   if (flip_vertically) {
5714+      stbi_uc t;
5715+      for (j=0; j < (int) s->img_y>>1; ++j) {
5716+         stbi_uc *p1 = out +      j     *s->img_x*target;
5717+         stbi_uc *p2 = out + (s->img_y-1-j)*s->img_x*target;
5718+         for (i=0; i < (int) s->img_x*target; ++i) {
5719+            t = p1[i]; p1[i] = p2[i]; p2[i] = t;
5720+         }
5721+      }
5722+   }
5723+
5724+   if (req_comp && req_comp != target) {
5725+      out = stbi__convert_format(out, target, req_comp, s->img_x, s->img_y);
5726+      if (out == NULL) return out; // stbi__convert_format frees input on failure
5727+   }
5728+
5729+   *x = s->img_x;
5730+   *y = s->img_y;
5731+   if (comp) *comp = s->img_n;
5732+   return out;
5733+}
5734+#endif
5735+
5736+// Targa Truevision - TGA
5737+// by Jonathan Dummer
5738+#ifndef STBI_NO_TGA
5739+// returns STBI_rgb or whatever, 0 on error
5740+static int stbi__tga_get_comp(int bits_per_pixel, int is_grey, int* is_rgb16)
5741+{
5742+   // only RGB or RGBA (incl. 16bit) or grey allowed
5743+   if (is_rgb16) *is_rgb16 = 0;
5744+   switch(bits_per_pixel) {
5745+      case 8:  return STBI_grey;
5746+      case 16: if(is_grey) return STBI_grey_alpha;
5747+               // fallthrough
5748+      case 15: if(is_rgb16) *is_rgb16 = 1;
5749+               return STBI_rgb;
5750+      case 24: // fallthrough
5751+      case 32: return bits_per_pixel/8;
5752+      default: return 0;
5753+   }
5754+}
5755+
5756+static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp)
5757+{
5758+    int tga_w, tga_h, tga_comp, tga_image_type, tga_bits_per_pixel, tga_colormap_bpp;
5759+    int sz, tga_colormap_type;
5760+    stbi__get8(s);                   // discard Offset
5761+    tga_colormap_type = stbi__get8(s); // colormap type
5762+    if( tga_colormap_type > 1 ) {
5763+        stbi__rewind(s);
5764+        return 0;      // only RGB or indexed allowed
5765+    }
5766+    tga_image_type = stbi__get8(s); // image type
5767+    if ( tga_colormap_type == 1 ) { // colormapped (paletted) image
5768+        if (tga_image_type != 1 && tga_image_type != 9) {
5769+            stbi__rewind(s);
5770+            return 0;
5771+        }
5772+        stbi__skip(s,4);       // skip index of first colormap entry and number of entries
5773+        sz = stbi__get8(s);    //   check bits per palette color entry
5774+        if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) {
5775+            stbi__rewind(s);
5776+            return 0;
5777+        }
5778+        stbi__skip(s,4);       // skip image x and y origin
5779+        tga_colormap_bpp = sz;
5780+    } else { // "normal" image w/o colormap - only RGB or grey allowed, +/- RLE
5781+        if ( (tga_image_type != 2) && (tga_image_type != 3) && (tga_image_type != 10) && (tga_image_type != 11) ) {
5782+            stbi__rewind(s);
5783+            return 0; // only RGB or grey allowed, +/- RLE
5784+        }
5785+        stbi__skip(s,9); // skip colormap specification and image x/y origin
5786+        tga_colormap_bpp = 0;
5787+    }
5788+    tga_w = stbi__get16le(s);
5789+    if( tga_w < 1 ) {
5790+        stbi__rewind(s);
5791+        return 0;   // test width
5792+    }
5793+    tga_h = stbi__get16le(s);
5794+    if( tga_h < 1 ) {
5795+        stbi__rewind(s);
5796+        return 0;   // test height
5797+    }
5798+    tga_bits_per_pixel = stbi__get8(s); // bits per pixel
5799+    stbi__get8(s); // ignore alpha bits
5800+    if (tga_colormap_bpp != 0) {
5801+        if((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16)) {
5802+            // when using a colormap, tga_bits_per_pixel is the size of the indexes
5803+            // I don't think anything but 8 or 16bit indexes makes sense
5804+            stbi__rewind(s);
5805+            return 0;
5806+        }
5807+        tga_comp = stbi__tga_get_comp(tga_colormap_bpp, 0, NULL);
5808+    } else {
5809+        tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3) || (tga_image_type == 11), NULL);
5810+    }
5811+    if(!tga_comp) {
5812+      stbi__rewind(s);
5813+      return 0;
5814+    }
5815+    if (x) *x = tga_w;
5816+    if (y) *y = tga_h;
5817+    if (comp) *comp = tga_comp;
5818+    return 1;                   // seems to have passed everything
5819+}
5820+
5821+static int stbi__tga_test(stbi__context *s)
5822+{
5823+   int res = 0;
5824+   int sz, tga_color_type;
5825+   stbi__get8(s);      //   discard Offset
5826+   tga_color_type = stbi__get8(s);   //   color type
5827+   if ( tga_color_type > 1 ) goto errorEnd;   //   only RGB or indexed allowed
5828+   sz = stbi__get8(s);   //   image type
5829+   if ( tga_color_type == 1 ) { // colormapped (paletted) image
5830+      if (sz != 1 && sz != 9) goto errorEnd; // colortype 1 demands image type 1 or 9
5831+      stbi__skip(s,4);       // skip index of first colormap entry and number of entries
5832+      sz = stbi__get8(s);    //   check bits per palette color entry
5833+      if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd;
5834+      stbi__skip(s,4);       // skip image x and y origin
5835+   } else { // "normal" image w/o colormap
5836+      if ( (sz != 2) && (sz != 3) && (sz != 10) && (sz != 11) ) goto errorEnd; // only RGB or grey allowed, +/- RLE
5837+      stbi__skip(s,9); // skip colormap specification and image x/y origin
5838+   }
5839+   if ( stbi__get16le(s) < 1 ) goto errorEnd;      //   test width
5840+   if ( stbi__get16le(s) < 1 ) goto errorEnd;      //   test height
5841+   sz = stbi__get8(s);   //   bits per pixel
5842+   if ( (tga_color_type == 1) && (sz != 8) && (sz != 16) ) goto errorEnd; // for colormapped images, bpp is size of an index
5843+   if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd;
5844+
5845+   res = 1; // if we got this far, everything's good and we can return 1 instead of 0
5846+
5847+errorEnd:
5848+   stbi__rewind(s);
5849+   return res;
5850+}
5851+
5852+// read 16bit value and convert to 24bit RGB
5853+static void stbi__tga_read_rgb16(stbi__context *s, stbi_uc* out)
5854+{
5855+   stbi__uint16 px = (stbi__uint16)stbi__get16le(s);
5856+   stbi__uint16 fiveBitMask = 31;
5857+   // we have 3 channels with 5bits each
5858+   int r = (px >> 10) & fiveBitMask;
5859+   int g = (px >> 5) & fiveBitMask;
5860+   int b = px & fiveBitMask;
5861+   // Note that this saves the data in RGB(A) order, so it doesn't need to be swapped later
5862+   out[0] = (stbi_uc)((r * 255)/31);
5863+   out[1] = (stbi_uc)((g * 255)/31);
5864+   out[2] = (stbi_uc)((b * 255)/31);
5865+
5866+   // some people claim that the most significant bit might be used for alpha
5867+   // (possibly if an alpha-bit is set in the "image descriptor byte")
5868+   // but that only made 16bit test images completely translucent..
5869+   // so let's treat all 15 and 16bit TGAs as RGB with no alpha.
5870+}
5871+
5872+static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
5873+{
5874+   //   read in the TGA header stuff
5875+   int tga_offset = stbi__get8(s);
5876+   int tga_indexed = stbi__get8(s);
5877+   int tga_image_type = stbi__get8(s);
5878+   int tga_is_RLE = 0;
5879+   int tga_palette_start = stbi__get16le(s);
5880+   int tga_palette_len = stbi__get16le(s);
5881+   int tga_palette_bits = stbi__get8(s);
5882+   int tga_x_origin = stbi__get16le(s);
5883+   int tga_y_origin = stbi__get16le(s);
5884+   int tga_width = stbi__get16le(s);
5885+   int tga_height = stbi__get16le(s);
5886+   int tga_bits_per_pixel = stbi__get8(s);
5887+   int tga_comp, tga_rgb16=0;
5888+   int tga_inverted = stbi__get8(s);
5889+   // int tga_alpha_bits = tga_inverted & 15; // the 4 lowest bits - unused (useless?)
5890+   //   image data
5891+   unsigned char *tga_data;
5892+   unsigned char *tga_palette = NULL;
5893+   int i, j;
5894+   unsigned char raw_data[4] = {0};
5895+   int RLE_count = 0;
5896+   int RLE_repeating = 0;
5897+   int read_next_pixel = 1;
5898+   STBI_NOTUSED(ri);
5899+   STBI_NOTUSED(tga_x_origin); // @TODO
5900+   STBI_NOTUSED(tga_y_origin); // @TODO
5901+
5902+   if (tga_height > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
5903+   if (tga_width > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
5904+
5905+   //   do a tiny bit of precessing
5906+   if ( tga_image_type >= 8 )
5907+   {
5908+      tga_image_type -= 8;
5909+      tga_is_RLE = 1;
5910+   }
5911+   tga_inverted = 1 - ((tga_inverted >> 5) & 1);
5912+
5913+   //   If I'm paletted, then I'll use the number of bits from the palette
5914+   if ( tga_indexed ) tga_comp = stbi__tga_get_comp(tga_palette_bits, 0, &tga_rgb16);
5915+   else tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3), &tga_rgb16);
5916+
5917+   if(!tga_comp) // shouldn't really happen, stbi__tga_test() should have ensured basic consistency
5918+      return stbi__errpuc("bad format", "Can't find out TGA pixelformat");
5919+
5920+   //   tga info
5921+   *x = tga_width;
5922+   *y = tga_height;
5923+   if (comp) *comp = tga_comp;
5924+
5925+   if (!stbi__mad3sizes_valid(tga_width, tga_height, tga_comp, 0))
5926+      return stbi__errpuc("too large", "Corrupt TGA");
5927+
5928+   tga_data = (unsigned char*)stbi__malloc_mad3(tga_width, tga_height, tga_comp, 0);
5929+   if (!tga_data) return stbi__errpuc("outofmem", "Out of memory");
5930+
5931+   // skip to the data's starting position (offset usually = 0)
5932+   stbi__skip(s, tga_offset );
5933+
5934+   if ( !tga_indexed && !tga_is_RLE && !tga_rgb16 ) {
5935+      for (i=0; i < tga_height; ++i) {
5936+         int row = tga_inverted ? tga_height -i - 1 : i;
5937+         stbi_uc *tga_row = tga_data + row*tga_width*tga_comp;
5938+         stbi__getn(s, tga_row, tga_width * tga_comp);
5939+      }
5940+   } else  {
5941+      //   do I need to load a palette?
5942+      if ( tga_indexed)
5943+      {
5944+         if (tga_palette_len == 0) {  /* you have to have at least one entry! */
5945+            STBI_FREE(tga_data);
5946+            return stbi__errpuc("bad palette", "Corrupt TGA");
5947+         }
5948+
5949+         //   any data to skip? (offset usually = 0)
5950+         stbi__skip(s, tga_palette_start );
5951+         //   load the palette
5952+         tga_palette = (unsigned char*)stbi__malloc_mad2(tga_palette_len, tga_comp, 0);
5953+         if (!tga_palette) {
5954+            STBI_FREE(tga_data);
5955+            return stbi__errpuc("outofmem", "Out of memory");
5956+         }
5957+         if (tga_rgb16) {
5958+            stbi_uc *pal_entry = tga_palette;
5959+            STBI_ASSERT(tga_comp == STBI_rgb);
5960+            for (i=0; i < tga_palette_len; ++i) {
5961+               stbi__tga_read_rgb16(s, pal_entry);
5962+               pal_entry += tga_comp;
5963+            }
5964+         } else if (!stbi__getn(s, tga_palette, tga_palette_len * tga_comp)) {
5965+               STBI_FREE(tga_data);
5966+               STBI_FREE(tga_palette);
5967+               return stbi__errpuc("bad palette", "Corrupt TGA");
5968+         }
5969+      }
5970+      //   load the data
5971+      for (i=0; i < tga_width * tga_height; ++i)
5972+      {
5973+         //   if I'm in RLE mode, do I need to get a RLE stbi__pngchunk?
5974+         if ( tga_is_RLE )
5975+         {
5976+            if ( RLE_count == 0 )
5977+            {
5978+               //   yep, get the next byte as a RLE command
5979+               int RLE_cmd = stbi__get8(s);
5980+               RLE_count = 1 + (RLE_cmd & 127);
5981+               RLE_repeating = RLE_cmd >> 7;
5982+               read_next_pixel = 1;
5983+            } else if ( !RLE_repeating )
5984+            {
5985+               read_next_pixel = 1;
5986+            }
5987+         } else
5988+         {
5989+            read_next_pixel = 1;
5990+         }
5991+         //   OK, if I need to read a pixel, do it now
5992+         if ( read_next_pixel )
5993+         {
5994+            //   load however much data we did have
5995+            if ( tga_indexed )
5996+            {
5997+               // read in index, then perform the lookup
5998+               int pal_idx = (tga_bits_per_pixel == 8) ? stbi__get8(s) : stbi__get16le(s);
5999+               if ( pal_idx >= tga_palette_len ) {
6000+                  // invalid index
6001+                  pal_idx = 0;
6002+               }
6003+               pal_idx *= tga_comp;
6004+               for (j = 0; j < tga_comp; ++j) {
6005+                  raw_data[j] = tga_palette[pal_idx+j];
6006+               }
6007+            } else if(tga_rgb16) {
6008+               STBI_ASSERT(tga_comp == STBI_rgb);
6009+               stbi__tga_read_rgb16(s, raw_data);
6010+            } else {
6011+               //   read in the data raw
6012+               for (j = 0; j < tga_comp; ++j) {
6013+                  raw_data[j] = stbi__get8(s);
6014+               }
6015+            }
6016+            //   clear the reading flag for the next pixel
6017+            read_next_pixel = 0;
6018+         } // end of reading a pixel
6019+
6020+         // copy data
6021+         for (j = 0; j < tga_comp; ++j)
6022+           tga_data[i*tga_comp+j] = raw_data[j];
6023+
6024+         //   in case we're in RLE mode, keep counting down
6025+         --RLE_count;
6026+      }
6027+      //   do I need to invert the image?
6028+      if ( tga_inverted )
6029+      {
6030+         for (j = 0; j*2 < tga_height; ++j)
6031+         {
6032+            int index1 = j * tga_width * tga_comp;
6033+            int index2 = (tga_height - 1 - j) * tga_width * tga_comp;
6034+            for (i = tga_width * tga_comp; i > 0; --i)
6035+            {
6036+               unsigned char temp = tga_data[index1];
6037+               tga_data[index1] = tga_data[index2];
6038+               tga_data[index2] = temp;
6039+               ++index1;
6040+               ++index2;
6041+            }
6042+         }
6043+      }
6044+      //   clear my palette, if I had one
6045+      if ( tga_palette != NULL )
6046+      {
6047+         STBI_FREE( tga_palette );
6048+      }
6049+   }
6050+
6051+   // swap RGB - if the source data was RGB16, it already is in the right order
6052+   if (tga_comp >= 3 && !tga_rgb16)
6053+   {
6054+      unsigned char* tga_pixel = tga_data;
6055+      for (i=0; i < tga_width * tga_height; ++i)
6056+      {
6057+         unsigned char temp = tga_pixel[0];
6058+         tga_pixel[0] = tga_pixel[2];
6059+         tga_pixel[2] = temp;
6060+         tga_pixel += tga_comp;
6061+      }
6062+   }
6063+
6064+   // convert to target component count
6065+   if (req_comp && req_comp != tga_comp)
6066+      tga_data = stbi__convert_format(tga_data, tga_comp, req_comp, tga_width, tga_height);
6067+
6068+   //   the things I do to get rid of an error message, and yet keep
6069+   //   Microsoft's C compilers happy... [8^(
6070+   tga_palette_start = tga_palette_len = tga_palette_bits =
6071+         tga_x_origin = tga_y_origin = 0;
6072+   STBI_NOTUSED(tga_palette_start);
6073+   //   OK, done
6074+   return tga_data;
6075+}
6076+#endif
6077+
6078+// *************************************************************************************************
6079+// Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicolas Schulz, tweaked by STB
6080+
6081+#ifndef STBI_NO_PSD
6082+static int stbi__psd_test(stbi__context *s)
6083+{
6084+   int r = (stbi__get32be(s) == 0x38425053);
6085+   stbi__rewind(s);
6086+   return r;
6087+}
6088+
6089+static int stbi__psd_decode_rle(stbi__context *s, stbi_uc *p, int pixelCount)
6090+{
6091+   int count, nleft, len;
6092+
6093+   count = 0;
6094+   while ((nleft = pixelCount - count) > 0) {
6095+      len = stbi__get8(s);
6096+      if (len == 128) {
6097+         // No-op.
6098+      } else if (len < 128) {
6099+         // Copy next len+1 bytes literally.
6100+         len++;
6101+         if (len > nleft) return 0; // corrupt data
6102+         count += len;
6103+         while (len) {
6104+            *p = stbi__get8(s);
6105+            p += 4;
6106+            len--;
6107+         }
6108+      } else if (len > 128) {
6109+         stbi_uc   val;
6110+         // Next -len+1 bytes in the dest are replicated from next source byte.
6111+         // (Interpret len as a negative 8-bit int.)
6112+         len = 257 - len;
6113+         if (len > nleft) return 0; // corrupt data
6114+         val = stbi__get8(s);
6115+         count += len;
6116+         while (len) {
6117+            *p = val;
6118+            p += 4;
6119+            len--;
6120+         }
6121+      }
6122+   }
6123+
6124+   return 1;
6125+}
6126+
6127+static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc)
6128+{
6129+   int pixelCount;
6130+   int channelCount, compression;
6131+   int channel, i;
6132+   int bitdepth;
6133+   int w,h;
6134+   stbi_uc *out;
6135+   STBI_NOTUSED(ri);
6136+
6137+   // Check identifier
6138+   if (stbi__get32be(s) != 0x38425053)   // "8BPS"
6139+      return stbi__errpuc("not PSD", "Corrupt PSD image");
6140+
6141+   // Check file type version.
6142+   if (stbi__get16be(s) != 1)
6143+      return stbi__errpuc("wrong version", "Unsupported version of PSD image");
6144+
6145+   // Skip 6 reserved bytes.
6146+   stbi__skip(s, 6 );
6147+
6148+   // Read the number of channels (R, G, B, A, etc).
6149+   channelCount = stbi__get16be(s);
6150+   if (channelCount < 0 || channelCount > 16)
6151+      return stbi__errpuc("wrong channel count", "Unsupported number of channels in PSD image");
6152+
6153+   // Read the rows and columns of the image.
6154+   h = stbi__get32be(s);
6155+   w = stbi__get32be(s);
6156+
6157+   if (h > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
6158+   if (w > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
6159+
6160+   // Make sure the depth is 8 bits.
6161+   bitdepth = stbi__get16be(s);
6162+   if (bitdepth != 8 && bitdepth != 16)
6163+      return stbi__errpuc("unsupported bit depth", "PSD bit depth is not 8 or 16 bit");
6164+
6165+   // Make sure the color mode is RGB.
6166+   // Valid options are:
6167+   //   0: Bitmap
6168+   //   1: Grayscale
6169+   //   2: Indexed color
6170+   //   3: RGB color
6171+   //   4: CMYK color
6172+   //   7: Multichannel
6173+   //   8: Duotone
6174+   //   9: Lab color
6175+   if (stbi__get16be(s) != 3)
6176+      return stbi__errpuc("wrong color format", "PSD is not in RGB color format");
6177+
6178+   // Skip the Mode Data.  (It's the palette for indexed color; other info for other modes.)
6179+   stbi__skip(s,stbi__get32be(s) );
6180+
6181+   // Skip the image resources.  (resolution, pen tool paths, etc)
6182+   stbi__skip(s, stbi__get32be(s) );
6183+
6184+   // Skip the reserved data.
6185+   stbi__skip(s, stbi__get32be(s) );
6186+
6187+   // Find out if the data is compressed.
6188+   // Known values:
6189+   //   0: no compression
6190+   //   1: RLE compressed
6191+   compression = stbi__get16be(s);
6192+   if (compression > 1)
6193+      return stbi__errpuc("bad compression", "PSD has an unknown compression format");
6194+
6195+   // Check size
6196+   if (!stbi__mad3sizes_valid(4, w, h, 0))
6197+      return stbi__errpuc("too large", "Corrupt PSD");
6198+
6199+   // Create the destination image.
6200+
6201+   if (!compression && bitdepth == 16 && bpc == 16) {
6202+      out = (stbi_uc *) stbi__malloc_mad3(8, w, h, 0);
6203+      ri->bits_per_channel = 16;
6204+   } else
6205+      out = (stbi_uc *) stbi__malloc(4 * w*h);
6206+
6207+   if (!out) return stbi__errpuc("outofmem", "Out of memory");
6208+   pixelCount = w*h;
6209+
6210+   // Initialize the data to zero.
6211+   //memset( out, 0, pixelCount * 4 );
6212+
6213+   // Finally, the image data.
6214+   if (compression) {
6215+      // RLE as used by .PSD and .TIFF
6216+      // Loop until you get the number of unpacked bytes you are expecting:
6217+      //     Read the next source byte into n.
6218+      //     If n is between 0 and 127 inclusive, copy the next n+1 bytes literally.
6219+      //     Else if n is between -127 and -1 inclusive, copy the next byte -n+1 times.
6220+      //     Else if n is 128, noop.
6221+      // Endloop
6222+
6223+      // The RLE-compressed data is preceded by a 2-byte data count for each row in the data,
6224+      // which we're going to just skip.
6225+      stbi__skip(s, h * channelCount * 2 );
6226+
6227+      // Read the RLE data by channel.
6228+      for (channel = 0; channel < 4; channel++) {
6229+         stbi_uc *p;
6230+
6231+         p = out+channel;
6232+         if (channel >= channelCount) {
6233+            // Fill this channel with default data.
6234+            for (i = 0; i < pixelCount; i++, p += 4)
6235+               *p = (channel == 3 ? 255 : 0);
6236+         } else {
6237+            // Read the RLE data.
6238+            if (!stbi__psd_decode_rle(s, p, pixelCount)) {
6239+               STBI_FREE(out);
6240+               return stbi__errpuc("corrupt", "bad RLE data");
6241+            }
6242+         }
6243+      }
6244+
6245+   } else {
6246+      // We're at the raw image data.  It's each channel in order (Red, Green, Blue, Alpha, ...)
6247+      // where each channel consists of an 8-bit (or 16-bit) value for each pixel in the image.
6248+
6249+      // Read the data by channel.
6250+      for (channel = 0; channel < 4; channel++) {
6251+         if (channel >= channelCount) {
6252+            // Fill this channel with default data.
6253+            if (bitdepth == 16 && bpc == 16) {
6254+               stbi__uint16 *q = ((stbi__uint16 *) out) + channel;
6255+               stbi__uint16 val = channel == 3 ? 65535 : 0;
6256+               for (i = 0; i < pixelCount; i++, q += 4)
6257+                  *q = val;
6258+            } else {
6259+               stbi_uc *p = out+channel;
6260+               stbi_uc val = channel == 3 ? 255 : 0;
6261+               for (i = 0; i < pixelCount; i++, p += 4)
6262+                  *p = val;
6263+            }
6264+         } else {
6265+            if (ri->bits_per_channel == 16) {    // output bpc
6266+               stbi__uint16 *q = ((stbi__uint16 *) out) + channel;
6267+               for (i = 0; i < pixelCount; i++, q += 4)
6268+                  *q = (stbi__uint16) stbi__get16be(s);
6269+            } else {
6270+               stbi_uc *p = out+channel;
6271+               if (bitdepth == 16) {  // input bpc
6272+                  for (i = 0; i < pixelCount; i++, p += 4)
6273+                     *p = (stbi_uc) (stbi__get16be(s) >> 8);
6274+               } else {
6275+                  for (i = 0; i < pixelCount; i++, p += 4)
6276+                     *p = stbi__get8(s);
6277+               }
6278+            }
6279+         }
6280+      }
6281+   }
6282+
6283+   // remove weird white matte from PSD
6284+   if (channelCount >= 4) {
6285+      if (ri->bits_per_channel == 16) {
6286+         for (i=0; i < w*h; ++i) {
6287+            stbi__uint16 *pixel = (stbi__uint16 *) out + 4*i;
6288+            if (pixel[3] != 0 && pixel[3] != 65535) {
6289+               float a = pixel[3] / 65535.0f;
6290+               float ra = 1.0f / a;
6291+               float inv_a = 65535.0f * (1 - ra);
6292+               pixel[0] = (stbi__uint16) (pixel[0]*ra + inv_a);
6293+               pixel[1] = (stbi__uint16) (pixel[1]*ra + inv_a);
6294+               pixel[2] = (stbi__uint16) (pixel[2]*ra + inv_a);
6295+            }
6296+         }
6297+      } else {
6298+         for (i=0; i < w*h; ++i) {
6299+            unsigned char *pixel = out + 4*i;
6300+            if (pixel[3] != 0 && pixel[3] != 255) {
6301+               float a = pixel[3] / 255.0f;
6302+               float ra = 1.0f / a;
6303+               float inv_a = 255.0f * (1 - ra);
6304+               pixel[0] = (unsigned char) (pixel[0]*ra + inv_a);
6305+               pixel[1] = (unsigned char) (pixel[1]*ra + inv_a);
6306+               pixel[2] = (unsigned char) (pixel[2]*ra + inv_a);
6307+            }
6308+         }
6309+      }
6310+   }
6311+
6312+   // convert to desired output format
6313+   if (req_comp && req_comp != 4) {
6314+      if (ri->bits_per_channel == 16)
6315+         out = (stbi_uc *) stbi__convert_format16((stbi__uint16 *) out, 4, req_comp, w, h);
6316+      else
6317+         out = stbi__convert_format(out, 4, req_comp, w, h);
6318+      if (out == NULL) return out; // stbi__convert_format frees input on failure
6319+   }
6320+
6321+   if (comp) *comp = 4;
6322+   *y = h;
6323+   *x = w;
6324+
6325+   return out;
6326+}
6327+#endif
6328+
6329+// *************************************************************************************************
6330+// Softimage PIC loader
6331+// by Tom Seddon
6332+//
6333+// See http://softimage.wiki.softimage.com/index.php/INFO:_PIC_file_format
6334+// See http://ozviz.wasp.uwa.edu.au/~pbourke/dataformats/softimagepic/
6335+
6336+#ifndef STBI_NO_PIC
6337+static int stbi__pic_is4(stbi__context *s,const char *str)
6338+{
6339+   int i;
6340+   for (i=0; i<4; ++i)
6341+      if (stbi__get8(s) != (stbi_uc)str[i])
6342+         return 0;
6343+
6344+   return 1;
6345+}
6346+
6347+static int stbi__pic_test_core(stbi__context *s)
6348+{
6349+   int i;
6350+
6351+   if (!stbi__pic_is4(s,"\x53\x80\xF6\x34"))
6352+      return 0;
6353+
6354+   for(i=0;i<84;++i)
6355+      stbi__get8(s);
6356+
6357+   if (!stbi__pic_is4(s,"PICT"))
6358+      return 0;
6359+
6360+   return 1;
6361+}
6362+
6363+typedef struct
6364+{
6365+   stbi_uc size,type,channel;
6366+} stbi__pic_packet;
6367+
6368+static stbi_uc *stbi__readval(stbi__context *s, int channel, stbi_uc *dest)
6369+{
6370+   int mask=0x80, i;
6371+
6372+   for (i=0; i<4; ++i, mask>>=1) {
6373+      if (channel & mask) {
6374+         if (stbi__at_eof(s)) return stbi__errpuc("bad file","PIC file too short");
6375+         dest[i]=stbi__get8(s);
6376+      }
6377+   }
6378+
6379+   return dest;
6380+}
6381+
6382+static void stbi__copyval(int channel,stbi_uc *dest,const stbi_uc *src)
6383+{
6384+   int mask=0x80,i;
6385+
6386+   for (i=0;i<4; ++i, mask>>=1)
6387+      if (channel&mask)
6388+         dest[i]=src[i];
6389+}
6390+
6391+static stbi_uc *stbi__pic_load_core(stbi__context *s,int width,int height,int *comp, stbi_uc *result)
6392+{
6393+   int act_comp=0,num_packets=0,y,chained;
6394+   stbi__pic_packet packets[10];
6395+
6396+   // this will (should...) cater for even some bizarre stuff like having data
6397+    // for the same channel in multiple packets.
6398+   do {
6399+      stbi__pic_packet *packet;
6400+
6401+      if (num_packets==sizeof(packets)/sizeof(packets[0]))
6402+         return stbi__errpuc("bad format","too many packets");
6403+
6404+      packet = &packets[num_packets++];
6405+
6406+      chained = stbi__get8(s);
6407+      packet->size    = stbi__get8(s);
6408+      packet->type    = stbi__get8(s);
6409+      packet->channel = stbi__get8(s);
6410+
6411+      act_comp |= packet->channel;
6412+
6413+      if (stbi__at_eof(s))          return stbi__errpuc("bad file","file too short (reading packets)");
6414+      if (packet->size != 8)  return stbi__errpuc("bad format","packet isn't 8bpp");
6415+   } while (chained);
6416+
6417+   *comp = (act_comp & 0x10 ? 4 : 3); // has alpha channel?
6418+
6419+   for(y=0; y<height; ++y) {
6420+      int packet_idx;
6421+
6422+      for(packet_idx=0; packet_idx < num_packets; ++packet_idx) {
6423+         stbi__pic_packet *packet = &packets[packet_idx];
6424+         stbi_uc *dest = result+y*width*4;
6425+
6426+         switch (packet->type) {
6427+            default:
6428+               return stbi__errpuc("bad format","packet has bad compression type");
6429+
6430+            case 0: {//uncompressed
6431+               int x;
6432+
6433+               for(x=0;x<width;++x, dest+=4)
6434+                  if (!stbi__readval(s,packet->channel,dest))
6435+                     return 0;
6436+               break;
6437+            }
6438+
6439+            case 1://Pure RLE
6440+               {
6441+                  int left=width, i;
6442+
6443+                  while (left>0) {
6444+                     stbi_uc count,value[4];
6445+
6446+                     count=stbi__get8(s);
6447+                     if (stbi__at_eof(s))   return stbi__errpuc("bad file","file too short (pure read count)");
6448+
6449+                     if (count > left)
6450+                        count = (stbi_uc) left;
6451+
6452+                     if (!stbi__readval(s,packet->channel,value))  return 0;
6453+
6454+                     for(i=0; i<count; ++i,dest+=4)
6455+                        stbi__copyval(packet->channel,dest,value);
6456+                     left -= count;
6457+                  }
6458+               }
6459+               break;
6460+
6461+            case 2: {//Mixed RLE
6462+               int left=width;
6463+               while (left>0) {
6464+                  int count = stbi__get8(s), i;
6465+                  if (stbi__at_eof(s))  return stbi__errpuc("bad file","file too short (mixed read count)");
6466+
6467+                  if (count >= 128) { // Repeated
6468+                     stbi_uc value[4];
6469+
6470+                     if (count==128)
6471+                        count = stbi__get16be(s);
6472+                     else
6473+                        count -= 127;
6474+                     if (count > left)
6475+                        return stbi__errpuc("bad file","scanline overrun");
6476+
6477+                     if (!stbi__readval(s,packet->channel,value))
6478+                        return 0;
6479+
6480+                     for(i=0;i<count;++i, dest += 4)
6481+                        stbi__copyval(packet->channel,dest,value);
6482+                  } else { // Raw
6483+                     ++count;
6484+                     if (count>left) return stbi__errpuc("bad file","scanline overrun");
6485+
6486+                     for(i=0;i<count;++i, dest+=4)
6487+                        if (!stbi__readval(s,packet->channel,dest))
6488+                           return 0;
6489+                  }
6490+                  left-=count;
6491+               }
6492+               break;
6493+            }
6494+         }
6495+      }
6496+   }
6497+
6498+   return result;
6499+}
6500+
6501+static void *stbi__pic_load(stbi__context *s,int *px,int *py,int *comp,int req_comp, stbi__result_info *ri)
6502+{
6503+   stbi_uc *result;
6504+   int i, x,y, internal_comp;
6505+   STBI_NOTUSED(ri);
6506+
6507+   if (!comp) comp = &internal_comp;
6508+
6509+   for (i=0; i<92; ++i)
6510+      stbi__get8(s);
6511+
6512+   x = stbi__get16be(s);
6513+   y = stbi__get16be(s);
6514+
6515+   if (y > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
6516+   if (x > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
6517+
6518+   if (stbi__at_eof(s))  return stbi__errpuc("bad file","file too short (pic header)");
6519+   if (!stbi__mad3sizes_valid(x, y, 4, 0)) return stbi__errpuc("too large", "PIC image too large to decode");
6520+
6521+   stbi__get32be(s); //skip `ratio'
6522+   stbi__get16be(s); //skip `fields'
6523+   stbi__get16be(s); //skip `pad'
6524+
6525+   // intermediate buffer is RGBA
6526+   result = (stbi_uc *) stbi__malloc_mad3(x, y, 4, 0);
6527+   if (!result) return stbi__errpuc("outofmem", "Out of memory");
6528+   memset(result, 0xff, x*y*4);
6529+
6530+   if (!stbi__pic_load_core(s,x,y,comp, result)) {
6531+      STBI_FREE(result);
6532+      result=0;
6533+   }
6534+   *px = x;
6535+   *py = y;
6536+   if (req_comp == 0) req_comp = *comp;
6537+   result=stbi__convert_format(result,4,req_comp,x,y);
6538+
6539+   return result;
6540+}
6541+
6542+static int stbi__pic_test(stbi__context *s)
6543+{
6544+   int r = stbi__pic_test_core(s);
6545+   stbi__rewind(s);
6546+   return r;
6547+}
6548+#endif
6549+
6550+// *************************************************************************************************
6551+// GIF loader -- public domain by Jean-Marc Lienher -- simplified/shrunk by stb
6552+
6553+#ifndef STBI_NO_GIF
6554+typedef struct
6555+{
6556+   stbi__int16 prefix;
6557+   stbi_uc first;
6558+   stbi_uc suffix;
6559+} stbi__gif_lzw;
6560+
6561+typedef struct
6562+{
6563+   int w,h;
6564+   stbi_uc *out;                 // output buffer (always 4 components)
6565+   stbi_uc *background;          // The current "background" as far as a gif is concerned
6566+   stbi_uc *history;
6567+   int flags, bgindex, ratio, transparent, eflags;
6568+   stbi_uc  pal[256][4];
6569+   stbi_uc lpal[256][4];
6570+   stbi__gif_lzw codes[8192];
6571+   stbi_uc *color_table;
6572+   int parse, step;
6573+   int lflags;
6574+   int start_x, start_y;
6575+   int max_x, max_y;
6576+   int cur_x, cur_y;
6577+   int line_size;
6578+   int delay;
6579+} stbi__gif;
6580+
6581+static int stbi__gif_test_raw(stbi__context *s)
6582+{
6583+   int sz;
6584+   if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8') return 0;
6585+   sz = stbi__get8(s);
6586+   if (sz != '9' && sz != '7') return 0;
6587+   if (stbi__get8(s) != 'a') return 0;
6588+   return 1;
6589+}
6590+
6591+static int stbi__gif_test(stbi__context *s)
6592+{
6593+   int r = stbi__gif_test_raw(s);
6594+   stbi__rewind(s);
6595+   return r;
6596+}
6597+
6598+static void stbi__gif_parse_colortable(stbi__context *s, stbi_uc pal[256][4], int num_entries, int transp)
6599+{
6600+   int i;
6601+   for (i=0; i < num_entries; ++i) {
6602+      pal[i][2] = stbi__get8(s);
6603+      pal[i][1] = stbi__get8(s);
6604+      pal[i][0] = stbi__get8(s);
6605+      pal[i][3] = transp == i ? 0 : 255;
6606+   }
6607+}
6608+
6609+static int stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp, int is_info)
6610+{
6611+   stbi_uc version;
6612+   if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8')
6613+      return stbi__err("not GIF", "Corrupt GIF");
6614+
6615+   version = stbi__get8(s);
6616+   if (version != '7' && version != '9')    return stbi__err("not GIF", "Corrupt GIF");
6617+   if (stbi__get8(s) != 'a')                return stbi__err("not GIF", "Corrupt GIF");
6618+
6619+   stbi__g_failure_reason = "";
6620+   g->w = stbi__get16le(s);
6621+   g->h = stbi__get16le(s);
6622+   g->flags = stbi__get8(s);
6623+   g->bgindex = stbi__get8(s);
6624+   g->ratio = stbi__get8(s);
6625+   g->transparent = -1;
6626+
6627+   if (g->w > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)");
6628+   if (g->h > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)");
6629+
6630+   if (comp != 0) *comp = 4;  // can't actually tell whether it's 3 or 4 until we parse the comments
6631+
6632+   if (is_info) return 1;
6633+
6634+   if (g->flags & 0x80)
6635+      stbi__gif_parse_colortable(s,g->pal, 2 << (g->flags & 7), -1);
6636+
6637+   return 1;
6638+}
6639+
6640+static int stbi__gif_info_raw(stbi__context *s, int *x, int *y, int *comp)
6641+{
6642+   stbi__gif* g = (stbi__gif*) stbi__malloc(sizeof(stbi__gif));
6643+   if (!g) return stbi__err("outofmem", "Out of memory");
6644+   if (!stbi__gif_header(s, g, comp, 1)) {
6645+      STBI_FREE(g);
6646+      stbi__rewind( s );
6647+      return 0;
6648+   }
6649+   if (x) *x = g->w;
6650+   if (y) *y = g->h;
6651+   STBI_FREE(g);
6652+   return 1;
6653+}
6654+
6655+static void stbi__out_gif_code(stbi__gif *g, stbi__uint16 code)
6656+{
6657+   stbi_uc *p, *c;
6658+   int idx;
6659+
6660+   // recurse to decode the prefixes, since the linked-list is backwards,
6661+   // and working backwards through an interleaved image would be nasty
6662+   if (g->codes[code].prefix >= 0)
6663+      stbi__out_gif_code(g, g->codes[code].prefix);
6664+
6665+   if (g->cur_y >= g->max_y) return;
6666+
6667+   idx = g->cur_x + g->cur_y;
6668+   p = &g->out[idx];
6669+   g->history[idx / 4] = 1;
6670+
6671+   c = &g->color_table[g->codes[code].suffix * 4];
6672+   if (c[3] > 128) { // don't render transparent pixels;
6673+      p[0] = c[2];
6674+      p[1] = c[1];
6675+      p[2] = c[0];
6676+      p[3] = c[3];
6677+   }
6678+   g->cur_x += 4;
6679+
6680+   if (g->cur_x >= g->max_x) {
6681+      g->cur_x = g->start_x;
6682+      g->cur_y += g->step;
6683+
6684+      while (g->cur_y >= g->max_y && g->parse > 0) {
6685+         g->step = (1 << g->parse) * g->line_size;
6686+         g->cur_y = g->start_y + (g->step >> 1);
6687+         --g->parse;
6688+      }
6689+   }
6690+}
6691+
6692+static stbi_uc *stbi__process_gif_raster(stbi__context *s, stbi__gif *g)
6693+{
6694+   stbi_uc lzw_cs;
6695+   stbi__int32 len, init_code;
6696+   stbi__uint32 first;
6697+   stbi__int32 codesize, codemask, avail, oldcode, bits, valid_bits, clear;
6698+   stbi__gif_lzw *p;
6699+
6700+   lzw_cs = stbi__get8(s);
6701+   if (lzw_cs > 12) return NULL;
6702+   clear = 1 << lzw_cs;
6703+   first = 1;
6704+   codesize = lzw_cs + 1;
6705+   codemask = (1 << codesize) - 1;
6706+   bits = 0;
6707+   valid_bits = 0;
6708+   for (init_code = 0; init_code < clear; init_code++) {
6709+      g->codes[init_code].prefix = -1;
6710+      g->codes[init_code].first = (stbi_uc) init_code;
6711+      g->codes[init_code].suffix = (stbi_uc) init_code;
6712+   }
6713+
6714+   // support no starting clear code
6715+   avail = clear+2;
6716+   oldcode = -1;
6717+
6718+   len = 0;
6719+   for(;;) {
6720+      if (valid_bits < codesize) {
6721+         if (len == 0) {
6722+            len = stbi__get8(s); // start new block
6723+            if (len == 0)
6724+               return g->out;
6725+         }
6726+         --len;
6727+         bits |= (stbi__int32) stbi__get8(s) << valid_bits;
6728+         valid_bits += 8;
6729+      } else {
6730+         stbi__int32 code = bits & codemask;
6731+         bits >>= codesize;
6732+         valid_bits -= codesize;
6733+         // @OPTIMIZE: is there some way we can accelerate the non-clear path?
6734+         if (code == clear) {  // clear code
6735+            codesize = lzw_cs + 1;
6736+            codemask = (1 << codesize) - 1;
6737+            avail = clear + 2;
6738+            oldcode = -1;
6739+            first = 0;
6740+         } else if (code == clear + 1) { // end of stream code
6741+            stbi__skip(s, len);
6742+            while ((len = stbi__get8(s)) > 0)
6743+               stbi__skip(s,len);
6744+            return g->out;
6745+         } else if (code <= avail) {
6746+            if (first) {
6747+               return stbi__errpuc("no clear code", "Corrupt GIF");
6748+            }
6749+
6750+            if (oldcode >= 0) {
6751+               p = &g->codes[avail++];
6752+               if (avail > 8192) {
6753+                  return stbi__errpuc("too many codes", "Corrupt GIF");
6754+               }
6755+
6756+               p->prefix = (stbi__int16) oldcode;
6757+               p->first = g->codes[oldcode].first;
6758+               p->suffix = (code == avail) ? p->first : g->codes[code].first;
6759+            } else if (code == avail)
6760+               return stbi__errpuc("illegal code in raster", "Corrupt GIF");
6761+
6762+            stbi__out_gif_code(g, (stbi__uint16) code);
6763+
6764+            if ((avail & codemask) == 0 && avail <= 0x0FFF) {
6765+               codesize++;
6766+               codemask = (1 << codesize) - 1;
6767+            }
6768+
6769+            oldcode = code;
6770+         } else {
6771+            return stbi__errpuc("illegal code in raster", "Corrupt GIF");
6772+         }
6773+      }
6774+   }
6775+}
6776+
6777+// this function is designed to support animated gifs, although stb_image doesn't support it
6778+// two back is the image from two frames ago, used for a very specific disposal format
6779+static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, int req_comp, stbi_uc *two_back)
6780+{
6781+   int dispose;
6782+   int first_frame;
6783+   int pi;
6784+   int pcount;
6785+   STBI_NOTUSED(req_comp);
6786+
6787+   // on first frame, any non-written pixels get the background colour (non-transparent)
6788+   first_frame = 0;
6789+   if (g->out == 0) {
6790+      if (!stbi__gif_header(s, g, comp,0)) return 0; // stbi__g_failure_reason set by stbi__gif_header
6791+      if (!stbi__mad3sizes_valid(4, g->w, g->h, 0))
6792+         return stbi__errpuc("too large", "GIF image is too large");
6793+      pcount = g->w * g->h;
6794+      g->out = (stbi_uc *) stbi__malloc(4 * pcount);
6795+      g->background = (stbi_uc *) stbi__malloc(4 * pcount);
6796+      g->history = (stbi_uc *) stbi__malloc(pcount);
6797+      if (!g->out || !g->background || !g->history)
6798+         return stbi__errpuc("outofmem", "Out of memory");
6799+
6800+      // image is treated as "transparent" at the start - ie, nothing overwrites the current background;
6801+      // background colour is only used for pixels that are not rendered first frame, after that "background"
6802+      // color refers to the color that was there the previous frame.
6803+      memset(g->out, 0x00, 4 * pcount);
6804+      memset(g->background, 0x00, 4 * pcount); // state of the background (starts transparent)
6805+      memset(g->history, 0x00, pcount);        // pixels that were affected previous frame
6806+      first_frame = 1;
6807+   } else {
6808+      // second frame - how do we dispose of the previous one?
6809+      dispose = (g->eflags & 0x1C) >> 2;
6810+      pcount = g->w * g->h;
6811+
6812+      if ((dispose == 3) && (two_back == 0)) {
6813+         dispose = 2; // if I don't have an image to revert back to, default to the old background
6814+      }
6815+
6816+      if (dispose == 3) { // use previous graphic
6817+         for (pi = 0; pi < pcount; ++pi) {
6818+            if (g->history[pi]) {
6819+               memcpy( &g->out[pi * 4], &two_back[pi * 4], 4 );
6820+            }
6821+         }
6822+      } else if (dispose == 2) {
6823+         // restore what was changed last frame to background before that frame;
6824+         for (pi = 0; pi < pcount; ++pi) {
6825+            if (g->history[pi]) {
6826+               memcpy( &g->out[pi * 4], &g->background[pi * 4], 4 );
6827+            }
6828+         }
6829+      } else {
6830+         // This is a non-disposal case eithe way, so just
6831+         // leave the pixels as is, and they will become the new background
6832+         // 1: do not dispose
6833+         // 0:  not specified.
6834+      }
6835+
6836+      // background is what out is after the undoing of the previou frame;
6837+      memcpy( g->background, g->out, 4 * g->w * g->h );
6838+   }
6839+
6840+   // clear my history;
6841+   memset( g->history, 0x00, g->w * g->h );        // pixels that were affected previous frame
6842+
6843+   for (;;) {
6844+      int tag = stbi__get8(s);
6845+      switch (tag) {
6846+         case 0x2C: /* Image Descriptor */
6847+         {
6848+            stbi__int32 x, y, w, h;
6849+            stbi_uc *o;
6850+
6851+            x = stbi__get16le(s);
6852+            y = stbi__get16le(s);
6853+            w = stbi__get16le(s);
6854+            h = stbi__get16le(s);
6855+            if (((x + w) > (g->w)) || ((y + h) > (g->h)))
6856+               return stbi__errpuc("bad Image Descriptor", "Corrupt GIF");
6857+
6858+            g->line_size = g->w * 4;
6859+            g->start_x = x * 4;
6860+            g->start_y = y * g->line_size;
6861+            g->max_x   = g->start_x + w * 4;
6862+            g->max_y   = g->start_y + h * g->line_size;
6863+            g->cur_x   = g->start_x;
6864+            g->cur_y   = g->start_y;
6865+
6866+            // if the width of the specified rectangle is 0, that means
6867+            // we may not see *any* pixels or the image is malformed;
6868+            // to make sure this is caught, move the current y down to
6869+            // max_y (which is what out_gif_code checks).
6870+            if (w == 0)
6871+               g->cur_y = g->max_y;
6872+
6873+            g->lflags = stbi__get8(s);
6874+
6875+            if (g->lflags & 0x40) {
6876+               g->step = 8 * g->line_size; // first interlaced spacing
6877+               g->parse = 3;
6878+            } else {
6879+               g->step = g->line_size;
6880+               g->parse = 0;
6881+            }
6882+
6883+            if (g->lflags & 0x80) {
6884+               stbi__gif_parse_colortable(s,g->lpal, 2 << (g->lflags & 7), g->eflags & 0x01 ? g->transparent : -1);
6885+               g->color_table = (stbi_uc *) g->lpal;
6886+            } else if (g->flags & 0x80) {
6887+               g->color_table = (stbi_uc *) g->pal;
6888+            } else
6889+               return stbi__errpuc("missing color table", "Corrupt GIF");
6890+
6891+            o = stbi__process_gif_raster(s, g);
6892+            if (!o) return NULL;
6893+
6894+            // if this was the first frame,
6895+            pcount = g->w * g->h;
6896+            if (first_frame && (g->bgindex > 0)) {
6897+               // if first frame, any pixel not drawn to gets the background color
6898+               for (pi = 0; pi < pcount; ++pi) {
6899+                  if (g->history[pi] == 0) {
6900+                     g->pal[g->bgindex][3] = 255; // just in case it was made transparent, undo that; It will be reset next frame if need be;
6901+                     memcpy( &g->out[pi * 4], &g->pal[g->bgindex], 4 );
6902+                  }
6903+               }
6904+            }
6905+
6906+            return o;
6907+         }
6908+
6909+         case 0x21: // Comment Extension.
6910+         {
6911+            int len;
6912+            int ext = stbi__get8(s);
6913+            if (ext == 0xF9) { // Graphic Control Extension.
6914+               len = stbi__get8(s);
6915+               if (len == 4) {
6916+                  g->eflags = stbi__get8(s);
6917+                  g->delay = 10 * stbi__get16le(s); // delay - 1/100th of a second, saving as 1/1000ths.
6918+
6919+                  // unset old transparent
6920+                  if (g->transparent >= 0) {
6921+                     g->pal[g->transparent][3] = 255;
6922+                  }
6923+                  if (g->eflags & 0x01) {
6924+                     g->transparent = stbi__get8(s);
6925+                     if (g->transparent >= 0) {
6926+                        g->pal[g->transparent][3] = 0;
6927+                     }
6928+                  } else {
6929+                     // don't need transparent
6930+                     stbi__skip(s, 1);
6931+                     g->transparent = -1;
6932+                  }
6933+               } else {
6934+                  stbi__skip(s, len);
6935+                  break;
6936+               }
6937+            }
6938+            while ((len = stbi__get8(s)) != 0) {
6939+               stbi__skip(s, len);
6940+            }
6941+            break;
6942+         }
6943+
6944+         case 0x3B: // gif stream termination code
6945+            return (stbi_uc *) s; // using '1' causes warning on some compilers
6946+
6947+         default:
6948+            return stbi__errpuc("unknown code", "Corrupt GIF");
6949+      }
6950+   }
6951+}
6952+
6953+static void *stbi__load_gif_main_outofmem(stbi__gif *g, stbi_uc *out, int **delays)
6954+{
6955+   STBI_FREE(g->out);
6956+   STBI_FREE(g->history);
6957+   STBI_FREE(g->background);
6958+
6959+   if (out) STBI_FREE(out);
6960+   if (delays && *delays) STBI_FREE(*delays);
6961+   return stbi__errpuc("outofmem", "Out of memory");
6962+}
6963+
6964+static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp)
6965+{
6966+   if (stbi__gif_test(s)) {
6967+      int layers = 0;
6968+      stbi_uc *u = 0;
6969+      stbi_uc *out = 0;
6970+      stbi_uc *two_back = 0;
6971+      stbi__gif g;
6972+      int stride;
6973+      int out_size = 0;
6974+      int delays_size = 0;
6975+
6976+      STBI_NOTUSED(out_size);
6977+      STBI_NOTUSED(delays_size);
6978+
6979+      memset(&g, 0, sizeof(g));
6980+      if (delays) {
6981+         *delays = 0;
6982+      }
6983+
6984+      do {
6985+         u = stbi__gif_load_next(s, &g, comp, req_comp, two_back);
6986+         if (u == (stbi_uc *) s) u = 0;  // end of animated gif marker
6987+
6988+         if (u) {
6989+            *x = g.w;
6990+            *y = g.h;
6991+            ++layers;
6992+            stride = g.w * g.h * 4;
6993+
6994+            if (out) {
6995+               void *tmp = (stbi_uc*) STBI_REALLOC_SIZED( out, out_size, layers * stride );
6996+               if (!tmp)
6997+                  return stbi__load_gif_main_outofmem(&g, out, delays);
6998+               else {
6999+                   out = (stbi_uc*) tmp;
7000+                   out_size = layers * stride;
7001+               }
7002+
7003+               if (delays) {
7004+                  int *new_delays = (int*) STBI_REALLOC_SIZED( *delays, delays_size, sizeof(int) * layers );
7005+                  if (!new_delays)
7006+                     return stbi__load_gif_main_outofmem(&g, out, delays);
7007+                  *delays = new_delays;
7008+                  delays_size = layers * sizeof(int);
7009+               }
7010+            } else {
7011+               out = (stbi_uc*)stbi__malloc( layers * stride );
7012+               if (!out)
7013+                  return stbi__load_gif_main_outofmem(&g, out, delays);
7014+               out_size = layers * stride;
7015+               if (delays) {
7016+                  *delays = (int*) stbi__malloc( layers * sizeof(int) );
7017+                  if (!*delays)
7018+                     return stbi__load_gif_main_outofmem(&g, out, delays);
7019+                  delays_size = layers * sizeof(int);
7020+               }
7021+            }
7022+            memcpy( out + ((layers - 1) * stride), u, stride );
7023+            if (layers >= 2) {
7024+               two_back = out - 2 * stride;
7025+            }
7026+
7027+            if (delays) {
7028+               (*delays)[layers - 1U] = g.delay;
7029+            }
7030+         }
7031+      } while (u != 0);
7032+
7033+      // free temp buffer;
7034+      STBI_FREE(g.out);
7035+      STBI_FREE(g.history);
7036+      STBI_FREE(g.background);
7037+
7038+      // do the final conversion after loading everything;
7039+      if (req_comp && req_comp != 4)
7040+         out = stbi__convert_format(out, 4, req_comp, layers * g.w, g.h);
7041+
7042+      *z = layers;
7043+      return out;
7044+   } else {
7045+      return stbi__errpuc("not GIF", "Image was not as a gif type.");
7046+   }
7047+}
7048+
7049+static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
7050+{
7051+   stbi_uc *u = 0;
7052+   stbi__gif g;
7053+   memset(&g, 0, sizeof(g));
7054+   STBI_NOTUSED(ri);
7055+
7056+   u = stbi__gif_load_next(s, &g, comp, req_comp, 0);
7057+   if (u == (stbi_uc *) s) u = 0;  // end of animated gif marker
7058+   if (u) {
7059+      *x = g.w;
7060+      *y = g.h;
7061+
7062+      // moved conversion to after successful load so that the same
7063+      // can be done for multiple frames.
7064+      if (req_comp && req_comp != 4)
7065+         u = stbi__convert_format(u, 4, req_comp, g.w, g.h);
7066+   } else if (g.out) {
7067+      // if there was an error and we allocated an image buffer, free it!
7068+      STBI_FREE(g.out);
7069+   }
7070+
7071+   // free buffers needed for multiple frame loading;
7072+   STBI_FREE(g.history);
7073+   STBI_FREE(g.background);
7074+
7075+   return u;
7076+}
7077+
7078+static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp)
7079+{
7080+   return stbi__gif_info_raw(s,x,y,comp);
7081+}
7082+#endif
7083+
7084+// *************************************************************************************************
7085+// Radiance RGBE HDR loader
7086+// originally by Nicolas Schulz
7087+#ifndef STBI_NO_HDR
7088+static int stbi__hdr_test_core(stbi__context *s, const char *signature)
7089+{
7090+   int i;
7091+   for (i=0; signature[i]; ++i)
7092+      if (stbi__get8(s) != signature[i])
7093+          return 0;
7094+   stbi__rewind(s);
7095+   return 1;
7096+}
7097+
7098+static int stbi__hdr_test(stbi__context* s)
7099+{
7100+   int r = stbi__hdr_test_core(s, "#?RADIANCE\n");
7101+   stbi__rewind(s);
7102+   if(!r) {
7103+       r = stbi__hdr_test_core(s, "#?RGBE\n");
7104+       stbi__rewind(s);
7105+   }
7106+   return r;
7107+}
7108+
7109+#define STBI__HDR_BUFLEN  1024
7110+static char *stbi__hdr_gettoken(stbi__context *z, char *buffer)
7111+{
7112+   int len=0;
7113+   char c = '\0';
7114+
7115+   c = (char) stbi__get8(z);
7116+
7117+   while (!stbi__at_eof(z) && c != '\n') {
7118+      buffer[len++] = c;
7119+      if (len == STBI__HDR_BUFLEN-1) {
7120+         // flush to end of line
7121+         while (!stbi__at_eof(z) && stbi__get8(z) != '\n')
7122+            ;
7123+         break;
7124+      }
7125+      c = (char) stbi__get8(z);
7126+   }
7127+
7128+   buffer[len] = 0;
7129+   return buffer;
7130+}
7131+
7132+static void stbi__hdr_convert(float *output, stbi_uc *input, int req_comp)
7133+{
7134+   if ( input[3] != 0 ) {
7135+      float f1;
7136+      // Exponent
7137+      f1 = (float) ldexp(1.0f, input[3] - (int)(128 + 8));
7138+      if (req_comp <= 2)
7139+         output[0] = (input[0] + input[1] + input[2]) * f1 / 3;
7140+      else {
7141+         output[0] = input[0] * f1;
7142+         output[1] = input[1] * f1;
7143+         output[2] = input[2] * f1;
7144+      }
7145+      if (req_comp == 2) output[1] = 1;
7146+      if (req_comp == 4) output[3] = 1;
7147+   } else {
7148+      switch (req_comp) {
7149+         case 4: output[3] = 1; /* fallthrough */
7150+         case 3: output[0] = output[1] = output[2] = 0;
7151+                 break;
7152+         case 2: output[1] = 1; /* fallthrough */
7153+         case 1: output[0] = 0;
7154+                 break;
7155+      }
7156+   }
7157+}
7158+
7159+static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
7160+{
7161+   char buffer[STBI__HDR_BUFLEN];
7162+   char *token;
7163+   int valid = 0;
7164+   int width, height;
7165+   stbi_uc *scanline;
7166+   float *hdr_data;
7167+   int len;
7168+   unsigned char count, value;
7169+   int i, j, k, c1,c2, z;
7170+   const char *headerToken;
7171+   STBI_NOTUSED(ri);
7172+
7173+   // Check identifier
7174+   headerToken = stbi__hdr_gettoken(s,buffer);
7175+   if (strcmp(headerToken, "#?RADIANCE") != 0 && strcmp(headerToken, "#?RGBE") != 0)
7176+      return stbi__errpf("not HDR", "Corrupt HDR image");
7177+
7178+   // Parse header
7179+   for(;;) {
7180+      token = stbi__hdr_gettoken(s,buffer);
7181+      if (token[0] == 0) break;
7182+      if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
7183+   }
7184+
7185+   if (!valid)    return stbi__errpf("unsupported format", "Unsupported HDR format");
7186+
7187+   // Parse width and height
7188+   // can't use sscanf() if we're not using stdio!
7189+   token = stbi__hdr_gettoken(s,buffer);
7190+   if (strncmp(token, "-Y ", 3))  return stbi__errpf("unsupported data layout", "Unsupported HDR format");
7191+   token += 3;
7192+   height = (int) strtol(token, &token, 10);
7193+   while (*token == ' ') ++token;
7194+   if (strncmp(token, "+X ", 3))  return stbi__errpf("unsupported data layout", "Unsupported HDR format");
7195+   token += 3;
7196+   width = (int) strtol(token, NULL, 10);
7197+
7198+   if (height > STBI_MAX_DIMENSIONS) return stbi__errpf("too large","Very large image (corrupt?)");
7199+   if (width > STBI_MAX_DIMENSIONS) return stbi__errpf("too large","Very large image (corrupt?)");
7200+
7201+   *x = width;
7202+   *y = height;
7203+
7204+   if (comp) *comp = 3;
7205+   if (req_comp == 0) req_comp = 3;
7206+
7207+   if (!stbi__mad4sizes_valid(width, height, req_comp, sizeof(float), 0))
7208+      return stbi__errpf("too large", "HDR image is too large");
7209+
7210+   // Read data
7211+   hdr_data = (float *) stbi__malloc_mad4(width, height, req_comp, sizeof(float), 0);
7212+   if (!hdr_data)
7213+      return stbi__errpf("outofmem", "Out of memory");
7214+
7215+   // Load image data
7216+   // image data is stored as some number of sca
7217+   if ( width < 8 || width >= 32768) {
7218+      // Read flat data
7219+      for (j=0; j < height; ++j) {
7220+         for (i=0; i < width; ++i) {
7221+            stbi_uc rgbe[4];
7222+           main_decode_loop:
7223+            stbi__getn(s, rgbe, 4);
7224+            stbi__hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp);
7225+         }
7226+      }
7227+   } else {
7228+      // Read RLE-encoded data
7229+      scanline = NULL;
7230+
7231+      for (j = 0; j < height; ++j) {
7232+         c1 = stbi__get8(s);
7233+         c2 = stbi__get8(s);
7234+         len = stbi__get8(s);
7235+         if (c1 != 2 || c2 != 2 || (len & 0x80)) {
7236+            // not run-length encoded, so we have to actually use THIS data as a decoded
7237+            // pixel (note this can't be a valid pixel--one of RGB must be >= 128)
7238+            stbi_uc rgbe[4];
7239+            rgbe[0] = (stbi_uc) c1;
7240+            rgbe[1] = (stbi_uc) c2;
7241+            rgbe[2] = (stbi_uc) len;
7242+            rgbe[3] = (stbi_uc) stbi__get8(s);
7243+            stbi__hdr_convert(hdr_data, rgbe, req_comp);
7244+            i = 1;
7245+            j = 0;
7246+            STBI_FREE(scanline);
7247+            goto main_decode_loop; // yes, this makes no sense
7248+         }
7249+         len <<= 8;
7250+         len |= stbi__get8(s);
7251+         if (len != width) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("invalid decoded scanline length", "corrupt HDR"); }
7252+         if (scanline == NULL) {
7253+            scanline = (stbi_uc *) stbi__malloc_mad2(width, 4, 0);
7254+            if (!scanline) {
7255+               STBI_FREE(hdr_data);
7256+               return stbi__errpf("outofmem", "Out of memory");
7257+            }
7258+         }
7259+
7260+         for (k = 0; k < 4; ++k) {
7261+            int nleft;
7262+            i = 0;
7263+            while ((nleft = width - i) > 0) {
7264+               count = stbi__get8(s);
7265+               if (count > 128) {
7266+                  // Run
7267+                  value = stbi__get8(s);
7268+                  count -= 128;
7269+                  if ((count == 0) || (count > nleft)) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); }
7270+                  for (z = 0; z < count; ++z)
7271+                     scanline[i++ * 4 + k] = value;
7272+               } else {
7273+                  // Dump
7274+                  if ((count == 0) || (count > nleft)) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); }
7275+                  for (z = 0; z < count; ++z)
7276+                     scanline[i++ * 4 + k] = stbi__get8(s);
7277+               }
7278+            }
7279+         }
7280+         for (i=0; i < width; ++i)
7281+            stbi__hdr_convert(hdr_data+(j*width + i)*req_comp, scanline + i*4, req_comp);
7282+      }
7283+      if (scanline)
7284+         STBI_FREE(scanline);
7285+   }
7286+
7287+   return hdr_data;
7288+}
7289+
7290+static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp)
7291+{
7292+   char buffer[STBI__HDR_BUFLEN];
7293+   char *token;
7294+   int valid = 0;
7295+   int dummy;
7296+
7297+   if (!x) x = &dummy;
7298+   if (!y) y = &dummy;
7299+   if (!comp) comp = &dummy;
7300+
7301+   if (stbi__hdr_test(s) == 0) {
7302+       stbi__rewind( s );
7303+       return 0;
7304+   }
7305+
7306+   for(;;) {
7307+      token = stbi__hdr_gettoken(s,buffer);
7308+      if (token[0] == 0) break;
7309+      if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
7310+   }
7311+
7312+   if (!valid) {
7313+       stbi__rewind( s );
7314+       return 0;
7315+   }
7316+   token = stbi__hdr_gettoken(s,buffer);
7317+   if (strncmp(token, "-Y ", 3)) {
7318+       stbi__rewind( s );
7319+       return 0;
7320+   }
7321+   token += 3;
7322+   *y = (int) strtol(token, &token, 10);
7323+   while (*token == ' ') ++token;
7324+   if (strncmp(token, "+X ", 3)) {
7325+       stbi__rewind( s );
7326+       return 0;
7327+   }
7328+   token += 3;
7329+   *x = (int) strtol(token, NULL, 10);
7330+   *comp = 3;
7331+   return 1;
7332+}
7333+#endif // STBI_NO_HDR
7334+
7335+#ifndef STBI_NO_BMP
7336+static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp)
7337+{
7338+   void *p;
7339+   stbi__bmp_data info;
7340+
7341+   info.all_a = 255;
7342+   p = stbi__bmp_parse_header(s, &info);
7343+   if (p == NULL) {
7344+      stbi__rewind( s );
7345+      return 0;
7346+   }
7347+   if (x) *x = s->img_x;
7348+   if (y) *y = s->img_y;
7349+   if (comp) {
7350+      if (info.bpp == 24 && info.ma == 0xff000000)
7351+         *comp = 3;
7352+      else
7353+         *comp = info.ma ? 4 : 3;
7354+   }
7355+   return 1;
7356+}
7357+#endif
7358+
7359+#ifndef STBI_NO_PSD
7360+static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp)
7361+{
7362+   int channelCount, dummy, depth;
7363+   if (!x) x = &dummy;
7364+   if (!y) y = &dummy;
7365+   if (!comp) comp = &dummy;
7366+   if (stbi__get32be(s) != 0x38425053) {
7367+       stbi__rewind( s );
7368+       return 0;
7369+   }
7370+   if (stbi__get16be(s) != 1) {
7371+       stbi__rewind( s );
7372+       return 0;
7373+   }
7374+   stbi__skip(s, 6);
7375+   channelCount = stbi__get16be(s);
7376+   if (channelCount < 0 || channelCount > 16) {
7377+       stbi__rewind( s );
7378+       return 0;
7379+   }
7380+   *y = stbi__get32be(s);
7381+   *x = stbi__get32be(s);
7382+   depth = stbi__get16be(s);
7383+   if (depth != 8 && depth != 16) {
7384+       stbi__rewind( s );
7385+       return 0;
7386+   }
7387+   if (stbi__get16be(s) != 3) {
7388+       stbi__rewind( s );
7389+       return 0;
7390+   }
7391+   *comp = 4;
7392+   return 1;
7393+}
7394+
7395+static int stbi__psd_is16(stbi__context *s)
7396+{
7397+   int channelCount, depth;
7398+   if (stbi__get32be(s) != 0x38425053) {
7399+       stbi__rewind( s );
7400+       return 0;
7401+   }
7402+   if (stbi__get16be(s) != 1) {
7403+       stbi__rewind( s );
7404+       return 0;
7405+   }
7406+   stbi__skip(s, 6);
7407+   channelCount = stbi__get16be(s);
7408+   if (channelCount < 0 || channelCount > 16) {
7409+       stbi__rewind( s );
7410+       return 0;
7411+   }
7412+   STBI_NOTUSED(stbi__get32be(s));
7413+   STBI_NOTUSED(stbi__get32be(s));
7414+   depth = stbi__get16be(s);
7415+   if (depth != 16) {
7416+       stbi__rewind( s );
7417+       return 0;
7418+   }
7419+   return 1;
7420+}
7421+#endif
7422+
7423+#ifndef STBI_NO_PIC
7424+static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp)
7425+{
7426+   int act_comp=0,num_packets=0,chained,dummy;
7427+   stbi__pic_packet packets[10];
7428+
7429+   if (!x) x = &dummy;
7430+   if (!y) y = &dummy;
7431+   if (!comp) comp = &dummy;
7432+
7433+   if (!stbi__pic_is4(s,"\x53\x80\xF6\x34")) {
7434+      stbi__rewind(s);
7435+      return 0;
7436+   }
7437+
7438+   stbi__skip(s, 88);
7439+
7440+   *x = stbi__get16be(s);
7441+   *y = stbi__get16be(s);
7442+   if (stbi__at_eof(s)) {
7443+      stbi__rewind( s);
7444+      return 0;
7445+   }
7446+   if ( (*x) != 0 && (1 << 28) / (*x) < (*y)) {
7447+      stbi__rewind( s );
7448+      return 0;
7449+   }
7450+
7451+   stbi__skip(s, 8);
7452+
7453+   do {
7454+      stbi__pic_packet *packet;
7455+
7456+      if (num_packets==sizeof(packets)/sizeof(packets[0]))
7457+         return 0;
7458+
7459+      packet = &packets[num_packets++];
7460+      chained = stbi__get8(s);
7461+      packet->size    = stbi__get8(s);
7462+      packet->type    = stbi__get8(s);
7463+      packet->channel = stbi__get8(s);
7464+      act_comp |= packet->channel;
7465+
7466+      if (stbi__at_eof(s)) {
7467+          stbi__rewind( s );
7468+          return 0;
7469+      }
7470+      if (packet->size != 8) {
7471+          stbi__rewind( s );
7472+          return 0;
7473+      }
7474+   } while (chained);
7475+
7476+   *comp = (act_comp & 0x10 ? 4 : 3);
7477+
7478+   return 1;
7479+}
7480+#endif
7481+
7482+// *************************************************************************************************
7483+// Portable Gray Map and Portable Pixel Map loader
7484+// by Ken Miller
7485+//
7486+// PGM: http://netpbm.sourceforge.net/doc/pgm.html
7487+// PPM: http://netpbm.sourceforge.net/doc/ppm.html
7488+//
7489+// Known limitations:
7490+//    Does not support comments in the header section
7491+//    Does not support ASCII image data (formats P2 and P3)
7492+
7493+#ifndef STBI_NO_PNM
7494+
7495+static int      stbi__pnm_test(stbi__context *s)
7496+{
7497+   char p, t;
7498+   p = (char) stbi__get8(s);
7499+   t = (char) stbi__get8(s);
7500+   if (p != 'P' || (t != '5' && t != '6')) {
7501+       stbi__rewind( s );
7502+       return 0;
7503+   }
7504+   return 1;
7505+}
7506+
7507+static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
7508+{
7509+   stbi_uc *out;
7510+   STBI_NOTUSED(ri);
7511+
7512+   ri->bits_per_channel = stbi__pnm_info(s, (int *)&s->img_x, (int *)&s->img_y, (int *)&s->img_n);
7513+   if (ri->bits_per_channel == 0)
7514+      return 0;
7515+
7516+   if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
7517+   if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
7518+
7519+   *x = s->img_x;
7520+   *y = s->img_y;
7521+   if (comp) *comp = s->img_n;
7522+
7523+   if (!stbi__mad4sizes_valid(s->img_n, s->img_x, s->img_y, ri->bits_per_channel / 8, 0))
7524+      return stbi__errpuc("too large", "PNM too large");
7525+
7526+   out = (stbi_uc *) stbi__malloc_mad4(s->img_n, s->img_x, s->img_y, ri->bits_per_channel / 8, 0);
7527+   if (!out) return stbi__errpuc("outofmem", "Out of memory");
7528+   if (!stbi__getn(s, out, s->img_n * s->img_x * s->img_y * (ri->bits_per_channel / 8))) {
7529+      STBI_FREE(out);
7530+      return stbi__errpuc("bad PNM", "PNM file truncated");
7531+   }
7532+
7533+   if (req_comp && req_comp != s->img_n) {
7534+      if (ri->bits_per_channel == 16) {
7535+         out = (stbi_uc *) stbi__convert_format16((stbi__uint16 *) out, s->img_n, req_comp, s->img_x, s->img_y);
7536+      } else {
7537+         out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y);
7538+      }
7539+      if (out == NULL) return out; // stbi__convert_format frees input on failure
7540+   }
7541+   return out;
7542+}
7543+
7544+static int      stbi__pnm_isspace(char c)
7545+{
7546+   return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r';
7547+}
7548+
7549+static void     stbi__pnm_skip_whitespace(stbi__context *s, char *c)
7550+{
7551+   for (;;) {
7552+      while (!stbi__at_eof(s) && stbi__pnm_isspace(*c))
7553+         *c = (char) stbi__get8(s);
7554+
7555+      if (stbi__at_eof(s) || *c != '#')
7556+         break;
7557+
7558+      while (!stbi__at_eof(s) && *c != '\n' && *c != '\r' )
7559+         *c = (char) stbi__get8(s);
7560+   }
7561+}
7562+
7563+static int      stbi__pnm_isdigit(char c)
7564+{
7565+   return c >= '0' && c <= '9';
7566+}
7567+
7568+static int      stbi__pnm_getinteger(stbi__context *s, char *c)
7569+{
7570+   int value = 0;
7571+
7572+   while (!stbi__at_eof(s) && stbi__pnm_isdigit(*c)) {
7573+      value = value*10 + (*c - '0');
7574+      *c = (char) stbi__get8(s);
7575+      if((value > 214748364) || (value == 214748364 && *c > '7'))
7576+          return stbi__err("integer parse overflow", "Parsing an integer in the PPM header overflowed a 32-bit int");
7577+   }
7578+
7579+   return value;
7580+}
7581+
7582+static int      stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp)
7583+{
7584+   int maxv, dummy;
7585+   char c, p, t;
7586+
7587+   if (!x) x = &dummy;
7588+   if (!y) y = &dummy;
7589+   if (!comp) comp = &dummy;
7590+
7591+   stbi__rewind(s);
7592+
7593+   // Get identifier
7594+   p = (char) stbi__get8(s);
7595+   t = (char) stbi__get8(s);
7596+   if (p != 'P' || (t != '5' && t != '6')) {
7597+       stbi__rewind(s);
7598+       return 0;
7599+   }
7600+
7601+   *comp = (t == '6') ? 3 : 1;  // '5' is 1-component .pgm; '6' is 3-component .ppm
7602+
7603+   c = (char) stbi__get8(s);
7604+   stbi__pnm_skip_whitespace(s, &c);
7605+
7606+   *x = stbi__pnm_getinteger(s, &c); // read width
7607+   if(*x == 0)
7608+       return stbi__err("invalid width", "PPM image header had zero or overflowing width");
7609+   stbi__pnm_skip_whitespace(s, &c);
7610+
7611+   *y = stbi__pnm_getinteger(s, &c); // read height
7612+   if (*y == 0)
7613+       return stbi__err("invalid width", "PPM image header had zero or overflowing width");
7614+   stbi__pnm_skip_whitespace(s, &c);
7615+
7616+   maxv = stbi__pnm_getinteger(s, &c);  // read max value
7617+   if (maxv > 65535)
7618+      return stbi__err("max value > 65535", "PPM image supports only 8-bit and 16-bit images");
7619+   else if (maxv > 255)
7620+      return 16;
7621+   else
7622+      return 8;
7623+}
7624+
7625+static int stbi__pnm_is16(stbi__context *s)
7626+{
7627+   if (stbi__pnm_info(s, NULL, NULL, NULL) == 16)
7628+	   return 1;
7629+   return 0;
7630+}
7631+#endif
7632+
7633+static int stbi__info_main(stbi__context *s, int *x, int *y, int *comp)
7634+{
7635+   #ifndef STBI_NO_JPEG
7636+   if (stbi__jpeg_info(s, x, y, comp)) return 1;
7637+   #endif
7638+
7639+   #ifndef STBI_NO_PNG
7640+   if (stbi__png_info(s, x, y, comp))  return 1;
7641+   #endif
7642+
7643+   #ifndef STBI_NO_GIF
7644+   if (stbi__gif_info(s, x, y, comp))  return 1;
7645+   #endif
7646+
7647+   #ifndef STBI_NO_BMP
7648+   if (stbi__bmp_info(s, x, y, comp))  return 1;
7649+   #endif
7650+
7651+   #ifndef STBI_NO_PSD
7652+   if (stbi__psd_info(s, x, y, comp))  return 1;
7653+   #endif
7654+
7655+   #ifndef STBI_NO_PIC
7656+   if (stbi__pic_info(s, x, y, comp))  return 1;
7657+   #endif
7658+
7659+   #ifndef STBI_NO_PNM
7660+   if (stbi__pnm_info(s, x, y, comp))  return 1;
7661+   #endif
7662+
7663+   #ifndef STBI_NO_HDR
7664+   if (stbi__hdr_info(s, x, y, comp))  return 1;
7665+   #endif
7666+
7667+   // test tga last because it's a crappy test!
7668+   #ifndef STBI_NO_TGA
7669+   if (stbi__tga_info(s, x, y, comp))
7670+       return 1;
7671+   #endif
7672+   return stbi__err("unknown image type", "Image not of any known type, or corrupt");
7673+}
7674+
7675+static int stbi__is_16_main(stbi__context *s)
7676+{
7677+   #ifndef STBI_NO_PNG
7678+   if (stbi__png_is16(s))  return 1;
7679+   #endif
7680+
7681+   #ifndef STBI_NO_PSD
7682+   if (stbi__psd_is16(s))  return 1;
7683+   #endif
7684+
7685+   #ifndef STBI_NO_PNM
7686+   if (stbi__pnm_is16(s))  return 1;
7687+   #endif
7688+   return 0;
7689+}
7690+
7691+#ifndef STBI_NO_STDIO
7692+STBIDEF int stbi_info(char const *filename, int *x, int *y, int *comp)
7693+{
7694+    FILE *f = stbi__fopen(filename, "rb");
7695+    int result;
7696+    if (!f) return stbi__err("can't fopen", "Unable to open file");
7697+    result = stbi_info_from_file(f, x, y, comp);
7698+    fclose(f);
7699+    return result;
7700+}
7701+
7702+STBIDEF int stbi_info_from_file(FILE *f, int *x, int *y, int *comp)
7703+{
7704+   int r;
7705+   stbi__context s;
7706+   long pos = ftell(f);
7707+   stbi__start_file(&s, f);
7708+   r = stbi__info_main(&s,x,y,comp);
7709+   fseek(f,pos,SEEK_SET);
7710+   return r;
7711+}
7712+
7713+STBIDEF int stbi_is_16_bit(char const *filename)
7714+{
7715+    FILE *f = stbi__fopen(filename, "rb");
7716+    int result;
7717+    if (!f) return stbi__err("can't fopen", "Unable to open file");
7718+    result = stbi_is_16_bit_from_file(f);
7719+    fclose(f);
7720+    return result;
7721+}
7722+
7723+STBIDEF int stbi_is_16_bit_from_file(FILE *f)
7724+{
7725+   int r;
7726+   stbi__context s;
7727+   long pos = ftell(f);
7728+   stbi__start_file(&s, f);
7729+   r = stbi__is_16_main(&s);
7730+   fseek(f,pos,SEEK_SET);
7731+   return r;
7732+}
7733+#endif // !STBI_NO_STDIO
7734+
7735+STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp)
7736+{
7737+   stbi__context s;
7738+   stbi__start_mem(&s,buffer,len);
7739+   return stbi__info_main(&s,x,y,comp);
7740+}
7741+
7742+STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *c, void *user, int *x, int *y, int *comp)
7743+{
7744+   stbi__context s;
7745+   stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user);
7746+   return stbi__info_main(&s,x,y,comp);
7747+}
7748+
7749+STBIDEF int stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len)
7750+{
7751+   stbi__context s;
7752+   stbi__start_mem(&s,buffer,len);
7753+   return stbi__is_16_main(&s);
7754+}
7755+
7756+STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *c, void *user)
7757+{
7758+   stbi__context s;
7759+   stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user);
7760+   return stbi__is_16_main(&s);
7761+}
7762+
7763+#endif // STB_IMAGE_IMPLEMENTATION
7764+
7765+/*
7766+   revision history:
7767+      2.20  (2019-02-07) support utf8 filenames in Windows; fix warnings and platform ifdefs
7768+      2.19  (2018-02-11) fix warning
7769+      2.18  (2018-01-30) fix warnings
7770+      2.17  (2018-01-29) change sbti__shiftsigned to avoid clang -O2 bug
7771+                         1-bit BMP
7772+                         *_is_16_bit api
7773+                         avoid warnings
7774+      2.16  (2017-07-23) all functions have 16-bit variants;
7775+                         STBI_NO_STDIO works again;
7776+                         compilation fixes;
7777+                         fix rounding in unpremultiply;
7778+                         optimize vertical flip;
7779+                         disable raw_len validation;
7780+                         documentation fixes
7781+      2.15  (2017-03-18) fix png-1,2,4 bug; now all Imagenet JPGs decode;
7782+                         warning fixes; disable run-time SSE detection on gcc;
7783+                         uniform handling of optional "return" values;
7784+                         thread-safe initialization of zlib tables
7785+      2.14  (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs
7786+      2.13  (2016-11-29) add 16-bit API, only supported for PNG right now
7787+      2.12  (2016-04-02) fix typo in 2.11 PSD fix that caused crashes
7788+      2.11  (2016-04-02) allocate large structures on the stack
7789+                         remove white matting for transparent PSD
7790+                         fix reported channel count for PNG & BMP
7791+                         re-enable SSE2 in non-gcc 64-bit
7792+                         support RGB-formatted JPEG
7793+                         read 16-bit PNGs (only as 8-bit)
7794+      2.10  (2016-01-22) avoid warning introduced in 2.09 by STBI_REALLOC_SIZED
7795+      2.09  (2016-01-16) allow comments in PNM files
7796+                         16-bit-per-pixel TGA (not bit-per-component)
7797+                         info() for TGA could break due to .hdr handling
7798+                         info() for BMP to shares code instead of sloppy parse
7799+                         can use STBI_REALLOC_SIZED if allocator doesn't support realloc
7800+                         code cleanup
7801+      2.08  (2015-09-13) fix to 2.07 cleanup, reading RGB PSD as RGBA
7802+      2.07  (2015-09-13) fix compiler warnings
7803+                         partial animated GIF support
7804+                         limited 16-bpc PSD support
7805+                         #ifdef unused functions
7806+                         bug with < 92 byte PIC,PNM,HDR,TGA
7807+      2.06  (2015-04-19) fix bug where PSD returns wrong '*comp' value
7808+      2.05  (2015-04-19) fix bug in progressive JPEG handling, fix warning
7809+      2.04  (2015-04-15) try to re-enable SIMD on MinGW 64-bit
7810+      2.03  (2015-04-12) extra corruption checking (mmozeiko)
7811+                         stbi_set_flip_vertically_on_load (nguillemot)
7812+                         fix NEON support; fix mingw support
7813+      2.02  (2015-01-19) fix incorrect assert, fix warning
7814+      2.01  (2015-01-17) fix various warnings; suppress SIMD on gcc 32-bit without -msse2
7815+      2.00b (2014-12-25) fix STBI_MALLOC in progressive JPEG
7816+      2.00  (2014-12-25) optimize JPG, including x86 SSE2 & NEON SIMD (ryg)
7817+                         progressive JPEG (stb)
7818+                         PGM/PPM support (Ken Miller)
7819+                         STBI_MALLOC,STBI_REALLOC,STBI_FREE
7820+                         GIF bugfix -- seemingly never worked
7821+                         STBI_NO_*, STBI_ONLY_*
7822+      1.48  (2014-12-14) fix incorrectly-named assert()
7823+      1.47  (2014-12-14) 1/2/4-bit PNG support, both direct and paletted (Omar Cornut & stb)
7824+                         optimize PNG (ryg)
7825+                         fix bug in interlaced PNG with user-specified channel count (stb)
7826+      1.46  (2014-08-26)
7827+              fix broken tRNS chunk (colorkey-style transparency) in non-paletted PNG
7828+      1.45  (2014-08-16)
7829+              fix MSVC-ARM internal compiler error by wrapping malloc
7830+      1.44  (2014-08-07)
7831+              various warning fixes from Ronny Chevalier
7832+      1.43  (2014-07-15)
7833+              fix MSVC-only compiler problem in code changed in 1.42
7834+      1.42  (2014-07-09)
7835+              don't define _CRT_SECURE_NO_WARNINGS (affects user code)
7836+              fixes to stbi__cleanup_jpeg path
7837+              added STBI_ASSERT to avoid requiring assert.h
7838+      1.41  (2014-06-25)
7839+              fix search&replace from 1.36 that messed up comments/error messages
7840+      1.40  (2014-06-22)
7841+              fix gcc struct-initialization warning
7842+      1.39  (2014-06-15)
7843+              fix to TGA optimization when req_comp != number of components in TGA;
7844+              fix to GIF loading because BMP wasn't rewinding (whoops, no GIFs in my test suite)
7845+              add support for BMP version 5 (more ignored fields)
7846+      1.38  (2014-06-06)
7847+              suppress MSVC warnings on integer casts truncating values
7848+              fix accidental rename of 'skip' field of I/O
7849+      1.37  (2014-06-04)
7850+              remove duplicate typedef
7851+      1.36  (2014-06-03)
7852+              convert to header file single-file library
7853+              if de-iphone isn't set, load iphone images color-swapped instead of returning NULL
7854+      1.35  (2014-05-27)
7855+              various warnings
7856+              fix broken STBI_SIMD path
7857+              fix bug where stbi_load_from_file no longer left file pointer in correct place
7858+              fix broken non-easy path for 32-bit BMP (possibly never used)
7859+              TGA optimization by Arseny Kapoulkine
7860+      1.34  (unknown)
7861+              use STBI_NOTUSED in stbi__resample_row_generic(), fix one more leak in tga failure case
7862+      1.33  (2011-07-14)
7863+              make stbi_is_hdr work in STBI_NO_HDR (as specified), minor compiler-friendly improvements
7864+      1.32  (2011-07-13)
7865+              support for "info" function for all supported filetypes (SpartanJ)
7866+      1.31  (2011-06-20)
7867+              a few more leak fixes, bug in PNG handling (SpartanJ)
7868+      1.30  (2011-06-11)
7869+              added ability to load files via callbacks to accomidate custom input streams (Ben Wenger)
7870+              removed deprecated format-specific test/load functions
7871+              removed support for installable file formats (stbi_loader) -- would have been broken for IO callbacks anyway
7872+              error cases in bmp and tga give messages and don't leak (Raymond Barbiero, grisha)
7873+              fix inefficiency in decoding 32-bit BMP (David Woo)
7874+      1.29  (2010-08-16)
7875+              various warning fixes from Aurelien Pocheville
7876+      1.28  (2010-08-01)
7877+              fix bug in GIF palette transparency (SpartanJ)
7878+      1.27  (2010-08-01)
7879+              cast-to-stbi_uc to fix warnings
7880+      1.26  (2010-07-24)
7881+              fix bug in file buffering for PNG reported by SpartanJ
7882+      1.25  (2010-07-17)
7883+              refix trans_data warning (Won Chun)
7884+      1.24  (2010-07-12)
7885+              perf improvements reading from files on platforms with lock-heavy fgetc()
7886+              minor perf improvements for jpeg
7887+              deprecated type-specific functions so we'll get feedback if they're needed
7888+              attempt to fix trans_data warning (Won Chun)
7889+      1.23    fixed bug in iPhone support
7890+      1.22  (2010-07-10)
7891+              removed image *writing* support
7892+              stbi_info support from Jetro Lauha
7893+              GIF support from Jean-Marc Lienher
7894+              iPhone PNG-extensions from James Brown
7895+              warning-fixes from Nicolas Schulz and Janez Zemva (i.stbi__err. Janez (U+017D)emva)
7896+      1.21    fix use of 'stbi_uc' in header (reported by jon blow)
7897+      1.20    added support for Softimage PIC, by Tom Seddon
7898+      1.19    bug in interlaced PNG corruption check (found by ryg)
7899+      1.18  (2008-08-02)
7900+              fix a threading bug (local mutable static)
7901+      1.17    support interlaced PNG
7902+      1.16    major bugfix - stbi__convert_format converted one too many pixels
7903+      1.15    initialize some fields for thread safety
7904+      1.14    fix threadsafe conversion bug
7905+              header-file-only version (#define STBI_HEADER_FILE_ONLY before including)
7906+      1.13    threadsafe
7907+      1.12    const qualifiers in the API
7908+      1.11    Support installable IDCT, colorspace conversion routines
7909+      1.10    Fixes for 64-bit (don't use "unsigned long")
7910+              optimized upsampling by Fabian "ryg" Giesen
7911+      1.09    Fix format-conversion for PSD code (bad global variables!)
7912+      1.08    Thatcher Ulrich's PSD code integrated by Nicolas Schulz
7913+      1.07    attempt to fix C++ warning/errors again
7914+      1.06    attempt to fix C++ warning/errors again
7915+      1.05    fix TGA loading to return correct *comp and use good luminance calc
7916+      1.04    default float alpha is 1, not 255; use 'void *' for stbi_image_free
7917+      1.03    bugfixes to STBI_NO_STDIO, STBI_NO_HDR
7918+      1.02    support for (subset of) HDR files, float interface for preferred access to them
7919+      1.01    fix bug: possible bug in handling right-side up bmps... not sure
7920+              fix bug: the stbi__bmp_load() and stbi__tga_load() functions didn't work at all
7921+      1.00    interface to zlib that skips zlib header
7922+      0.99    correct handling of alpha in palette
7923+      0.98    TGA loader by lonesock; dynamically add loaders (untested)
7924+      0.97    jpeg errors on too large a file; also catch another malloc failure
7925+      0.96    fix detection of invalid v value - particleman@mollyrocket forum
7926+      0.95    during header scan, seek to markers in case of padding
7927+      0.94    STBI_NO_STDIO to disable stdio usage; rename all #defines the same
7928+      0.93    handle jpegtran output; verbose errors
7929+      0.92    read 4,8,16,24,32-bit BMP files of several formats
7930+      0.91    output 24-bit Windows 3.0 BMP files
7931+      0.90    fix a few more warnings; bump version number to approach 1.0
7932+      0.61    bugfixes due to Marc LeBlanc, Christopher Lloyd
7933+      0.60    fix compiling as c++
7934+      0.59    fix warnings: merge Dave Moore's -Wall fixes
7935+      0.58    fix bug: zlib uncompressed mode len/nlen was wrong endian
7936+      0.57    fix bug: jpg last huffman symbol before marker was >9 bits but less than 16 available
7937+      0.56    fix bug: zlib uncompressed mode len vs. nlen
7938+      0.55    fix bug: restart_interval not initialized to 0
7939+      0.54    allow NULL for 'int *comp'
7940+      0.53    fix bug in png 3->4; speedup png decoding
7941+      0.52    png handles req_comp=3,4 directly; minor cleanup; jpeg comments
7942+      0.51    obey req_comp requests, 1-component jpegs return as 1-component,
7943+              on 'test' only check type, not whether we support this variant
7944+      0.50  (2006-11-19)
7945+              first released version
7946+*/
7947+
7948+
7949+/*
7950+------------------------------------------------------------------------------
7951+This software is available under 2 licenses -- choose whichever you prefer.
7952+------------------------------------------------------------------------------
7953+ALTERNATIVE A - MIT License
7954+Copyright (c) 2017 Sean Barrett
7955+Permission is hereby granted, free of charge, to any person obtaining a copy of
7956+this software and associated documentation files (the "Software"), to deal in
7957+the Software without restriction, including without limitation the rights to
7958+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
7959+of the Software, and to permit persons to whom the Software is furnished to do
7960+so, subject to the following conditions:
7961+The above copyright notice and this permission notice shall be included in all
7962+copies or substantial portions of the Software.
7963+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
7964+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
7965+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
7966+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
7967+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
7968+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
7969+SOFTWARE.
7970+------------------------------------------------------------------------------
7971+ALTERNATIVE B - Public Domain (www.unlicense.org)
7972+This is free and unencumbered software released into the public domain.
7973+Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
7974+software, either in source code form or as a compiled binary, for any purpose,
7975+commercial or non-commercial, and by any means.
7976+In jurisdictions that recognize copyright laws, the author or authors of this
7977+software dedicate any and all copyright interest in the software to the public
7978+domain. We make this dedication for the benefit of the public at large and to
7979+the detriment of our heirs and successors. We intend this dedication to be an
7980+overt act of relinquishment in perpetuity of all present and future rights to
7981+this software under copyright law.
7982+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
7983+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
7984+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
7985+AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
7986+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
7987+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
7988+------------------------------------------------------------------------------
7989+*/
D swc
+0, -1
1@@ -1 +0,0 @@
2-Subproject commit 19893a5ca3239082b904a04fdb9ac7c3a1fb62e2