SNode.C
Loading...
Searching...
No Matches
Parser.cpp
Go to the documentation of this file.
1/*
2 * SNode.C - A Slim Toolkit for Network Communication
3 * Copyright (C) Volker Christian <me@vchrist.at>
4 * 2020, 2021, 2022, 2023, 2024, 2025
5 *
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU Lesser General Public License as published
8 * by the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 */
19
20/*
21 * MIT License
22 *
23 * Permission is hereby granted, free of charge, to any person obtaining a copy
24 * of this software and associated documentation files (the "Software"), to deal
25 * in the Software without restriction, including without limitation the rights
26 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
27 * copies of the Software, and to permit persons to whom the Software is
28 * furnished to do so, subject to the following conditions:
29 *
30 * The above copyright notice and this permission notice shall be included in
31 * all copies or substantial portions of the Software.
32 *
33 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
34 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
35 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
36 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
37 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
38 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
39 * THE SOFTWARE.
40 */
41
42#include "web/http/Parser.h"
43
44#include "core/socket/stream/SocketContext.h"
45#include "web/http/decoder/Chunked.h"
46#include "web/http/decoder/HTTP10Response.h"
47#include "web/http/decoder/Identity.h"
48
49#ifndef DOXYGEN_SHOULD_SKIP_THIS
50
51#include "web/http/http_utils.h"
52
53#include <charconv>
54#include <limits>
55#include <system_error>
56#include <tuple>
57#include <utility>
58
59#endif /* DOXYGEN_SHOULD_SKIP_THIS */
60
61namespace web::http {
62
63 static bool parseContentLengthStrict(const std::string& s, std::size_t& out) {
64 bool success = false;
65
66 unsigned long long value = 0;
67
68 if (!s.empty()) {
69 const char* first = s.data();
70 const char* last = s.data() + s.size();
71
72 const auto [ptr, ec] = std::from_chars(first, last, value, 10);
73
74 if (ec == std::errc{} && ptr == last && value <= static_cast<unsigned long long>(std::numeric_limits<std::size_t>::max())) {
75 out = static_cast<std::size_t>(value);
76 success = true;
77 }
78 }
79
80 return success;
81 }
82
83 static bool transferEncodingHasChunked(CiStringMap<std::string>& headers) {
84 bool hasChunked = false;
85
86 if (headers.contains("Transfer-Encoding")) {
87 const std::string& encoding = headers["Transfer-Encoding"];
88 hasChunked = web::http::ciContains(encoding, "chunked");
89 }
90
91 return hasChunked;
92 }
93
94 // HTTP/1.0 and HTTP/1.1
95 const std::regex Parser::httpVersionRegex("^HTTP/([1])[.]([0-1])$");
96
97 Parser::Parser(core::socket::stream::SocketContext* socketContext, const enum Parser::HTTPCompliance& compliance)
98 : hTTPCompliance(compliance)
99 , socketContext(socketContext)
100 , headerDecoder(socketContext)
101 , trailerDecoder(socketContext) {
102 }
103
105 reset();
106 }
107
108 void Parser::reset() {
110 headers.clear();
111 content.clear();
112 httpMinor = 0;
113 httpMajor = 0;
114 line.clear();
115 contentLength = 0;
117
118 for (const ContentDecoder* contentDecoder : decoderQueue) {
119 delete contentDecoder;
120 }
121 decoderQueue.clear();
122
123 trailerFieldsExpected.clear();
124 }
125
126 std::size_t Parser::parse() {
127 std::size_t ret = 0;
128 std::size_t consumed = 0;
129
130 do {
131 switch (parserState) {
133 begin();
135 [[fallthrough]];
137 ret = readStartLine();
138 break;
140 ret = readHeader();
141 break;
143 ret = readContent();
144 break;
146 ret = readTrailer();
147 break;
149 break;
150 }
151 consumed += ret;
152 } while (ret > 0 && parserState != ParserState::BEGIN && parserState != ParserState::ERROR);
153
154 return consumed;
155 }
156
157 std::size_t Parser::readStartLine() {
158 std::size_t consumed = 0;
159 std::size_t ret = 0;
160
161 do {
162 char ch = 0;
164
165 if (ret > 0) {
166 consumed += ret;
167 if (ch == '\r' || ch == '\n') {
168 if (ch == '\n') {
170 line.clear();
171 }
172 } else {
173 line += ch;
174 }
175 }
176 } while (ret > 0 && parserState == ParserState::FIRSTLINE);
177
178 return consumed;
179 }
180
181 std::size_t Parser::readHeader() {
182 const std::size_t consumed = headerDecoder.read();
183
186 } else if (headerDecoder.isComplete()) {
189 }
190
191 return consumed;
192 }
193
195 bool success = true;
196
197 // Determine message framing.
198 // RFC 9112 ยง6.3: Transfer-Encoding (chunked) overrides Content-Length.
199 const bool hasChunked = transferEncodingHasChunked(headers);
200
201 if (hasChunked) {
203 decoderQueue.emplace_back(new web::http::decoder::Chunked(socketContext));
204
205 if (headers.contains("Trailer")) {
206 std::string trailers = headers["Trailer"];
207
208 while (!trailers.empty()) {
209 std::string trailerField;
210 std::tie(trailerField, trailers) = httputils::str_split(trailers, ',');
211 httputils::str_trimm(trailerField);
212 trailerFieldsExpected.insert(trailerField);
213 trailerField.clear();
214 }
216 }
217 } else if (headers.contains("Content-Length")) {
218 std::size_t length = 0;
219
220 if (!parseContentLengthStrict(headers["Content-Length"], length)) {
221 parseError(400, "Invalid Content-Length");
222 success = false;
223 } else {
224 contentLength = length;
225 decoderQueue.emplace_back(new web::http::decoder::Identity(socketContext, contentLength));
226 }
227 }
228
229 if (success) {
230 // Transfer-Encoding (other than chunked) is currently not implemented, but we keep the
231 // existing behavior of not altering the decoder queue here.
232 if (headers.contains("Transfer-Encoding")) {
233 const std::string& encoding = headers["Transfer-Encoding"];
234 if (web::http::ciContains(encoding, "compressed")) {
235 // decoderQueue.emplace_back(new web::http::decoder::Compress(socketContext));
236 }
237 if (web::http::ciContains(encoding, "deflate")) {
238 // decoderQueue.emplace_back(new web::http::decoder::Deflate(socketContext));
239 }
240 if (web::http::ciContains(encoding, "gzip")) {
241 // decoderQueue.emplace_back(new web::http::decoder::GZip(socketContext));
242 }
243 }
244
245 if (decoderQueue.empty()) {
246 decoderQueue.emplace_back(new web::http::decoder::HTTP10Response(socketContext));
247 }
248
249 if (headers.contains("Content-Encoding")) {
250 const std::string& encoding = headers["Content-Encoding"];
251
252 if (web::http::ciContains(encoding, "compressed")) {
253 // decoderQueue.emplace_back(new web::http::decoder::Compress(socketContext));
254 }
255 if (web::http::ciContains(encoding, "deflate")) {
256 // decoderQueue.emplace_back(new web::http::decoder::Deflate(socketContext));
257 }
258 if (web::http::ciContains(encoding, "gzip")) {
259 // decoderQueue.emplace_back(new web::http::decoder::GZip(socketContext));
260 }
261 if (web::http::ciContains(encoding, "br")) {
262 // decoderQueue.emplace_back(new web::http::decoder::Br(socketContext));
263 }
264 }
265 }
266 }
267
268 std::size_t Parser::readContent() {
269 ContentDecoder* contentDecoder = decoderQueue.front();
270
271 const std::size_t consumed = contentDecoder->read();
272
273 if (contentDecoder->isComplete()) {
274 contentDecoder = decoderQueue.back();
275
276 std::vector<char> chunk = contentDecoder->getContent();
277 content.insert(content.end(), chunk.begin(), chunk.end());
278
279 if (transferEncoding == TransferEncoding::Chunked && headers.contains("Trailer")) {
281 } else {
283 }
284 } else if (contentDecoder->isError()) {
285 parseError(501, "Wrong content encoding");
286 }
287
288 return consumed;
289 }
290
291 std::size_t Parser::readTrailer() {
292 const std::size_t consumed = trailerDecoder.read();
293
296 } else if (trailerDecoder.isComplete()) {
297 web::http::CiStringMap<std::string>&& trailer = trailerDecoder.getHeader();
298 headers.insert(trailer.begin(), trailer.end());
300 }
301
302 return consumed;
303 }
304
305 enum Parser::HTTPCompliance operator|(const enum Parser::HTTPCompliance& c1, const enum Parser::HTTPCompliance& c2) {
306 return static_cast<enum Parser::HTTPCompliance>(static_cast<unsigned short>(c1) | static_cast<unsigned short>(c2));
307 }
308
309 enum Parser::HTTPCompliance operator&(const enum Parser::HTTPCompliance& c1, const enum Parser::HTTPCompliance& c2) {
310 return static_cast<enum Parser::HTTPCompliance>(static_cast<unsigned short>(c1) & static_cast<unsigned short>(c2));
311 }
312
313} // namespace web::http
std::size_t readFromPeer(char *chunk, std::size_t chunklen) const final
virtual std::size_t read()=0
std::vector< char > && getContent()
ParserState parserState
Definition Parser.h:100
std::size_t parse()
Definition Parser.cpp:126
friend enum HTTPCompliance operator&(const enum HTTPCompliance &c1, const enum HTTPCompliance &c2)
Definition Parser.cpp:309
virtual void parseError(int code, const std::string &reason)=0
virtual ~Parser()
Definition Parser.cpp:104
Parser(core::socket::stream::SocketContext *socketContext, const enum HTTPCompliance &compliance=HTTPCompliance::RFC2616|HTTPCompliance::RFC7230)
Definition Parser.cpp:97
static const std::regex httpVersionRegex
Definition Parser.h:104
friend enum HTTPCompliance operator|(const enum HTTPCompliance &c1, const enum HTTPCompliance &c2)
Definition Parser.cpp:305
std::size_t contentLength
Definition Parser.h:143
virtual void parseStartLine(const std::string &line)=0
std::size_t contentLengthRead
Definition Parser.h:144
web::http::decoder::Fields headerDecoder
Definition Parser.h:130
core::socket::stream::SocketContext * socketContext
Definition Parser.h:127
CiStringMap< std::string > headers
Definition Parser.h:119
std::size_t readContent()
Definition Parser.cpp:268
std::list< web::http::ContentDecoder * > decoderQueue
Definition Parser.h:125
virtual void parsingFinished()=0
virtual void begin()=0
TransferEncoding transferEncoding
Definition Parser.h:102
std::size_t readTrailer()
Definition Parser.cpp:291
std::vector< char > content
Definition Parser.h:120
std::size_t readStartLine()
Definition Parser.cpp:157
virtual void analyzeHeader()
Definition Parser.cpp:194
std::set< std::string > trailerFieldsExpected
Definition Parser.h:132
web::http::decoder::Fields trailerDecoder
Definition Parser.h:133
std::size_t readHeader()
Definition Parser.cpp:181
std::string line
Definition Parser.h:142
Chunked(const core::socket::stream::SocketContext *socketContext)
Definition Chunked.cpp:54
std::string getErrorReason()
Definition Fields.cpp:175
Fields(core::socket::stream::SocketContext *socketContext, std::set< std::string > fieldsExpected={})
Definition Fields.cpp:60
void setFieldsExpected(std::set< std::string > fieldsExpected)
Definition Fields.cpp:66
web::http::CiStringMap< std::string > && getHeader()
Definition Fields.cpp:159
HTTP10Response(const core::socket::stream::SocketContext *socketContext)
Identity(const core::socket::stream::SocketContext *socketContext, std::size_t contentLengthExpected)
Definition Identity.cpp:54
std::pair< std::string, std::string > str_split(const std::string &base, char c_middle)
std::string & str_trimm(std::string &text)
bool ciContains(const std::string &str1, const std::string &str2)
static bool transferEncodingHasChunked(CiStringMap< std::string > &headers)
Definition Parser.cpp:83
static bool parseContentLengthStrict(const std::string &s, std::size_t &out)
Definition Parser.cpp:63