Fix gzip compression/decompression over 4 GiB data size (#1002)

* Fix gzip compression/decompression over 4 GiB data size

* Add gzip test for large random data
This commit is contained in:
yosh-matsuda 2021-07-23 11:07:40 +09:00 committed by GitHub
parent 52f5eb5980
commit 879dd261c2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 106 additions and 35 deletions

View file

@ -2578,28 +2578,40 @@ public:
Callback callback) override {
assert(is_valid_);
auto flush = last ? Z_FINISH : Z_NO_FLUSH;
strm_.avail_in = static_cast<decltype(strm_.avail_in)>(data_length);
strm_.next_in = const_cast<Bytef *>(reinterpret_cast<const Bytef *>(data));
int ret = Z_OK;
std::array<char, CPPHTTPLIB_COMPRESSION_BUFSIZ> buff{};
do {
strm_.avail_out = static_cast<uInt>(buff.size());
strm_.next_out = reinterpret_cast<Bytef *>(buff.data());
constexpr size_t max_avail_in =
std::numeric_limits<decltype(strm_.avail_in)>::max();
ret = deflate(&strm_, flush);
if (ret == Z_STREAM_ERROR) { return false; }
strm_.avail_in = static_cast<decltype(strm_.avail_in)>(
std::min(data_length, max_avail_in));
strm_.next_in =
const_cast<Bytef *>(reinterpret_cast<const Bytef *>(data));
if (!callback(buff.data(), buff.size() - strm_.avail_out)) {
return false;
}
} while (strm_.avail_out == 0);
data_length -= strm_.avail_in;
data += strm_.avail_in;
auto flush = (last && data_length == 0) ? Z_FINISH : Z_NO_FLUSH;
int ret = Z_OK;
std::array<char, CPPHTTPLIB_COMPRESSION_BUFSIZ> buff{};
do {
strm_.avail_out = static_cast<uInt>(buff.size());
strm_.next_out = reinterpret_cast<Bytef *>(buff.data());
ret = deflate(&strm_, flush);
if (ret == Z_STREAM_ERROR) { return false; }
if (!callback(buff.data(), buff.size() - strm_.avail_out)) {
return false;
}
} while (strm_.avail_out == 0);
assert((flush == Z_FINISH && ret == Z_STREAM_END) ||
(flush == Z_NO_FLUSH && ret == Z_OK));
assert(strm_.avail_in == 0);
} while (data_length > 0);
assert((last && ret == Z_STREAM_END) || (!last && ret == Z_OK));
assert(strm_.avail_in == 0);
return true;
}
@ -2633,28 +2645,41 @@ public:
int ret = Z_OK;
strm_.avail_in = static_cast<decltype(strm_.avail_in)>(data_length);
strm_.next_in = const_cast<Bytef *>(reinterpret_cast<const Bytef *>(data));
do {
constexpr size_t max_avail_in =
std::numeric_limits<decltype(strm_.avail_in)>::max();
std::array<char, CPPHTTPLIB_COMPRESSION_BUFSIZ> buff{};
while (strm_.avail_in > 0) {
strm_.avail_out = static_cast<uInt>(buff.size());
strm_.next_out = reinterpret_cast<Bytef *>(buff.data());
strm_.avail_in = static_cast<decltype(strm_.avail_in)>(
std::min(data_length, max_avail_in));
strm_.next_in =
const_cast<Bytef *>(reinterpret_cast<const Bytef *>(data));
ret = inflate(&strm_, Z_NO_FLUSH);
assert(ret != Z_STREAM_ERROR);
switch (ret) {
case Z_NEED_DICT:
case Z_DATA_ERROR:
case Z_MEM_ERROR: inflateEnd(&strm_); return false;
data_length -= strm_.avail_in;
data += strm_.avail_in;
std::array<char, CPPHTTPLIB_COMPRESSION_BUFSIZ> buff{};
while (strm_.avail_in > 0) {
strm_.avail_out = static_cast<uInt>(buff.size());
strm_.next_out = reinterpret_cast<Bytef *>(buff.data());
ret = inflate(&strm_, Z_NO_FLUSH);
assert(ret != Z_STREAM_ERROR);
switch (ret) {
case Z_NEED_DICT:
case Z_DATA_ERROR:
case Z_MEM_ERROR: inflateEnd(&strm_); return false;
}
if (!callback(buff.data(), buff.size() - strm_.avail_out)) {
return false;
}
}
if (!callback(buff.data(), buff.size() - strm_.avail_out)) {
return false;
}
}
if (ret != Z_OK && ret != Z_STREAM_END) return false;
return ret == Z_OK || ret == Z_STREAM_END;
} while (data_length > 0);
return true;
}
private:

View file

@ -2868,6 +2868,52 @@ TEST(GzipDecompressor, ChunkedDecompression) {
}
ASSERT_EQ(data, decompressed_data);
}
TEST(GzipDecompressor, LargeRandomData) {
// prepare large random data that is difficult to be compressed and is
// expected to have large size even when compressed
std::random_device seed_gen;
std::mt19937 random(seed_gen());
constexpr auto large_size_byte = 4294967296UL; // 4GiB
constexpr auto data_size = large_size_byte + 134217728UL; // + 128MiB
std::vector<std::uint32_t> data(data_size / sizeof(std::uint32_t));
std::generate(data.begin(), data.end(), [&]() { return random(); });
// compress data over 4GiB
std::string compressed_data;
compressed_data.reserve(large_size_byte + 536870912UL); // + 512MiB reserved
httplib::detail::gzip_compressor compressor;
auto result = compressor.compress(reinterpret_cast<const char *>(data.data()),
data.size() * sizeof(std::uint32_t), true,
[&](const char *data, size_t size) {
compressed_data.insert(
compressed_data.size(), data, size);
return true;
});
ASSERT_TRUE(result);
// FIXME: compressed data size is expected to be greater than 4GiB,
// but there is no guarantee
// ASSERT_TRUE(compressed_data.size() >= large_size_byte);
// decompress data over 4GiB
std::string decompressed_data;
decompressed_data.reserve(data_size);
httplib::detail::gzip_decompressor decompressor;
result = decompressor.decompress(
compressed_data.data(), compressed_data.size(),
[&](const char *data, size_t size) {
decompressed_data.insert(decompressed_data.size(), data, size);
return true;
});
ASSERT_TRUE(result);
// compare
ASSERT_EQ(data_size, decompressed_data.size());
ASSERT_TRUE(std::memcmp(data.data(), decompressed_data.data(), data_size) ==
0);
}
#endif
#ifdef CPPHTTPLIB_BROTLI_SUPPORT