Properly trim whitespace from headers

HTTP Whitespace and regex whitespace are not the same, so we can't use
\s in regexes when parsing HTTP headers. Instead, explicitly specify
what is considered whitespace in the regex.
This commit is contained in:
Matthew DeVore 2019-12-02 15:45:26 -08:00
parent 4c93b973ff
commit a9e942d755
2 changed files with 72 additions and 17 deletions

View file

@ -1357,6 +1357,26 @@ inline bool is_connection_error() {
#endif
}
inline socket_t create_client_socket(
const char *host, int port, time_t timeout_sec) {
return create_socket(
host, port, [=](socket_t sock, struct addrinfo &ai) -> bool {
set_nonblocking(sock, true);
auto ret = ::connect(sock, ai.ai_addr, static_cast<int>(ai.ai_addrlen));
if (ret < 0) {
if (is_connection_error() ||
!wait_until_socket_is_ready(sock, timeout_sec, 0)) {
close_socket(sock);
return false;
}
}
set_nonblocking(sock, false);
return true;
});
}
inline std::string get_remote_addr(socket_t sock) {
struct sockaddr_storage addr;
socklen_t len = sizeof(addr);
@ -1542,7 +1562,11 @@ inline uint64_t get_header_value_uint64(const Headers &headers, const char *key,
}
inline bool read_headers(Stream &strm, Headers &headers) {
static std::regex re(R"((.+?):\s*(.+?)\s*\r\n)");
// Horizontal tab and ' ' are considered whitespace and are ignored when on
// the left or right side of the header value:
// - https://stackoverflow.com/questions/50179659/
// - https://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html
static std::regex re(R"((.+?):[\t ]*(.+?)[\t ]*\r\n)");
const auto bufsiz = 2048;
char buf[bufsiz];
@ -3166,22 +3190,7 @@ inline Client::~Client() {}
inline bool Client::is_valid() const { return true; }
inline socket_t Client::create_client_socket() const {
return detail::create_socket(
host_.c_str(), port_, [=](socket_t sock, struct addrinfo &ai) -> bool {
detail::set_nonblocking(sock, true);
auto ret = connect(sock, ai.ai_addr, static_cast<int>(ai.ai_addrlen));
if (ret < 0) {
if (detail::is_connection_error() ||
!detail::wait_until_socket_is_ready(sock, timeout_sec_, 0)) {
detail::close_socket(sock);
return false;
}
}
detail::set_nonblocking(sock, false);
return true;
});
return detail::create_client_socket(host_.c_str(), port_, timeout_sec_);
}
inline bool Client::read_response_line(Stream &strm, Response &res) {

View file

@ -1766,6 +1766,52 @@ TEST_F(ServerTest, MultipartFormDataGzip) {
}
#endif
TEST(ServerRequestParsingTest, TrimWhitespaceFromHeaderValues) {
Server svr;
std::string header_value;
svr.Get("/validate-ws-in-headers",
[&](const Request &req, Response &res) {
header_value = req.get_header_value("foo");
res.set_content("ok", "text/plain");
});
thread t = thread([&] { svr.listen(HOST, PORT); });
while (!svr.is_running()) {
msleep(1);
}
// Only space and horizontal tab are whitespace. Make sure other whitespace-
// like characters are not treated the same - use vertical tab and escape.
auto client_sock =
detail::create_client_socket(HOST, PORT, /*timeout_sec=*/5);
ASSERT_TRUE(client_sock != INVALID_SOCKET);
const std::string req =
"GET /validate-ws-in-headers HTTP/1.1\r\n"
"foo: \t \v bar \e\t \r\n"
"Connection: close\r\n"
"\r\n";
bool process_ok = detail::process_and_close_socket(
true, client_sock, 1, 5, 0,
[&](Stream& strm, bool /*last_connection*/,
bool &/*connection_close*/) -> bool {
if (req.size() !=
static_cast<size_t>(strm.write(req.data(), req.size()))) {
return false;
}
char buf[512];
detail::stream_line_reader line_reader(strm, buf, sizeof(buf));
while (line_reader.getline()) {}
return true;
});
ASSERT_TRUE(process_ok);
svr.stop();
t.join();
EXPECT_EQ(header_value, "\v bar \e");
}
class ServerTestWithAI_PASSIVE : public ::testing::Test {
protected:
ServerTestWithAI_PASSIVE()