mirror of
https://github.com/yhirose/cpp-httplib
synced 2024-11-21 06:26:02 -07:00
Fix gzip compression/decompression over 4 GiB data size (#1002)
* Fix gzip compression/decompression over 4 GiB data size * Add gzip test for large random data
This commit is contained in:
parent
52f5eb5980
commit
879dd261c2
2 changed files with 106 additions and 35 deletions
95
httplib.h
95
httplib.h
|
@ -2578,28 +2578,40 @@ public:
|
|||
Callback callback) override {
|
||||
assert(is_valid_);
|
||||
|
||||
auto flush = last ? Z_FINISH : Z_NO_FLUSH;
|
||||
|
||||
strm_.avail_in = static_cast<decltype(strm_.avail_in)>(data_length);
|
||||
strm_.next_in = const_cast<Bytef *>(reinterpret_cast<const Bytef *>(data));
|
||||
|
||||
int ret = Z_OK;
|
||||
|
||||
std::array<char, CPPHTTPLIB_COMPRESSION_BUFSIZ> buff{};
|
||||
do {
|
||||
strm_.avail_out = static_cast<uInt>(buff.size());
|
||||
strm_.next_out = reinterpret_cast<Bytef *>(buff.data());
|
||||
constexpr size_t max_avail_in =
|
||||
std::numeric_limits<decltype(strm_.avail_in)>::max();
|
||||
|
||||
ret = deflate(&strm_, flush);
|
||||
if (ret == Z_STREAM_ERROR) { return false; }
|
||||
strm_.avail_in = static_cast<decltype(strm_.avail_in)>(
|
||||
std::min(data_length, max_avail_in));
|
||||
strm_.next_in =
|
||||
const_cast<Bytef *>(reinterpret_cast<const Bytef *>(data));
|
||||
|
||||
if (!callback(buff.data(), buff.size() - strm_.avail_out)) {
|
||||
return false;
|
||||
}
|
||||
} while (strm_.avail_out == 0);
|
||||
data_length -= strm_.avail_in;
|
||||
data += strm_.avail_in;
|
||||
|
||||
auto flush = (last && data_length == 0) ? Z_FINISH : Z_NO_FLUSH;
|
||||
int ret = Z_OK;
|
||||
|
||||
std::array<char, CPPHTTPLIB_COMPRESSION_BUFSIZ> buff{};
|
||||
do {
|
||||
strm_.avail_out = static_cast<uInt>(buff.size());
|
||||
strm_.next_out = reinterpret_cast<Bytef *>(buff.data());
|
||||
|
||||
ret = deflate(&strm_, flush);
|
||||
if (ret == Z_STREAM_ERROR) { return false; }
|
||||
|
||||
if (!callback(buff.data(), buff.size() - strm_.avail_out)) {
|
||||
return false;
|
||||
}
|
||||
} while (strm_.avail_out == 0);
|
||||
|
||||
assert((flush == Z_FINISH && ret == Z_STREAM_END) ||
|
||||
(flush == Z_NO_FLUSH && ret == Z_OK));
|
||||
assert(strm_.avail_in == 0);
|
||||
|
||||
} while (data_length > 0);
|
||||
|
||||
assert((last && ret == Z_STREAM_END) || (!last && ret == Z_OK));
|
||||
assert(strm_.avail_in == 0);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -2633,28 +2645,41 @@ public:
|
|||
|
||||
int ret = Z_OK;
|
||||
|
||||
strm_.avail_in = static_cast<decltype(strm_.avail_in)>(data_length);
|
||||
strm_.next_in = const_cast<Bytef *>(reinterpret_cast<const Bytef *>(data));
|
||||
do {
|
||||
constexpr size_t max_avail_in =
|
||||
std::numeric_limits<decltype(strm_.avail_in)>::max();
|
||||
|
||||
std::array<char, CPPHTTPLIB_COMPRESSION_BUFSIZ> buff{};
|
||||
while (strm_.avail_in > 0) {
|
||||
strm_.avail_out = static_cast<uInt>(buff.size());
|
||||
strm_.next_out = reinterpret_cast<Bytef *>(buff.data());
|
||||
strm_.avail_in = static_cast<decltype(strm_.avail_in)>(
|
||||
std::min(data_length, max_avail_in));
|
||||
strm_.next_in =
|
||||
const_cast<Bytef *>(reinterpret_cast<const Bytef *>(data));
|
||||
|
||||
ret = inflate(&strm_, Z_NO_FLUSH);
|
||||
assert(ret != Z_STREAM_ERROR);
|
||||
switch (ret) {
|
||||
case Z_NEED_DICT:
|
||||
case Z_DATA_ERROR:
|
||||
case Z_MEM_ERROR: inflateEnd(&strm_); return false;
|
||||
data_length -= strm_.avail_in;
|
||||
data += strm_.avail_in;
|
||||
|
||||
std::array<char, CPPHTTPLIB_COMPRESSION_BUFSIZ> buff{};
|
||||
while (strm_.avail_in > 0) {
|
||||
strm_.avail_out = static_cast<uInt>(buff.size());
|
||||
strm_.next_out = reinterpret_cast<Bytef *>(buff.data());
|
||||
|
||||
ret = inflate(&strm_, Z_NO_FLUSH);
|
||||
assert(ret != Z_STREAM_ERROR);
|
||||
switch (ret) {
|
||||
case Z_NEED_DICT:
|
||||
case Z_DATA_ERROR:
|
||||
case Z_MEM_ERROR: inflateEnd(&strm_); return false;
|
||||
}
|
||||
|
||||
if (!callback(buff.data(), buff.size() - strm_.avail_out)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!callback(buff.data(), buff.size() - strm_.avail_out)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (ret != Z_OK && ret != Z_STREAM_END) return false;
|
||||
|
||||
return ret == Z_OK || ret == Z_STREAM_END;
|
||||
} while (data_length > 0);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
|
|
46
test/test.cc
46
test/test.cc
|
@ -2868,6 +2868,52 @@ TEST(GzipDecompressor, ChunkedDecompression) {
|
|||
}
|
||||
ASSERT_EQ(data, decompressed_data);
|
||||
}
|
||||
|
||||
TEST(GzipDecompressor, LargeRandomData) {
|
||||
|
||||
// prepare large random data that is difficult to be compressed and is
|
||||
// expected to have large size even when compressed
|
||||
std::random_device seed_gen;
|
||||
std::mt19937 random(seed_gen());
|
||||
constexpr auto large_size_byte = 4294967296UL; // 4GiB
|
||||
constexpr auto data_size = large_size_byte + 134217728UL; // + 128MiB
|
||||
std::vector<std::uint32_t> data(data_size / sizeof(std::uint32_t));
|
||||
std::generate(data.begin(), data.end(), [&]() { return random(); });
|
||||
|
||||
// compress data over 4GiB
|
||||
std::string compressed_data;
|
||||
compressed_data.reserve(large_size_byte + 536870912UL); // + 512MiB reserved
|
||||
httplib::detail::gzip_compressor compressor;
|
||||
auto result = compressor.compress(reinterpret_cast<const char *>(data.data()),
|
||||
data.size() * sizeof(std::uint32_t), true,
|
||||
[&](const char *data, size_t size) {
|
||||
compressed_data.insert(
|
||||
compressed_data.size(), data, size);
|
||||
return true;
|
||||
});
|
||||
ASSERT_TRUE(result);
|
||||
|
||||
// FIXME: compressed data size is expected to be greater than 4GiB,
|
||||
// but there is no guarantee
|
||||
// ASSERT_TRUE(compressed_data.size() >= large_size_byte);
|
||||
|
||||
// decompress data over 4GiB
|
||||
std::string decompressed_data;
|
||||
decompressed_data.reserve(data_size);
|
||||
httplib::detail::gzip_decompressor decompressor;
|
||||
result = decompressor.decompress(
|
||||
compressed_data.data(), compressed_data.size(),
|
||||
[&](const char *data, size_t size) {
|
||||
decompressed_data.insert(decompressed_data.size(), data, size);
|
||||
return true;
|
||||
});
|
||||
ASSERT_TRUE(result);
|
||||
|
||||
// compare
|
||||
ASSERT_EQ(data_size, decompressed_data.size());
|
||||
ASSERT_TRUE(std::memcmp(data.data(), decompressed_data.data(), data_size) ==
|
||||
0);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CPPHTTPLIB_BROTLI_SUPPORT
|
||||
|
|
Loading…
Reference in a new issue