MQTTSuite
Loading...
Searching...
No Matches
string-format-check.cpp
Go to the documentation of this file.
1#include <nlohmann/json-schema.hpp>
2
4
5#include <algorithm>
6#include <exception>
7#include <iostream>
8#include <regex>
9#include <sstream>
10#include <string>
11#include <utility>
12#include <vector>
13
14#ifdef JSON_SCHEMA_BOOST_REGEX
15# include <boost/regex.hpp>
16# define REGEX_NAMESPACE boost
17#elif defined(JSON_SCHEMA_NO_REGEX)
18# define NO_STD_REGEX
19#else
20# include <regex>
21# define REGEX_NAMESPACE std
22#endif
23
24/**
25 * Many of the RegExes are from @see http://jmrware.com/articles/2009/uri_regexp/URI_regex.html
26 */
27
28namespace
29{
30template <typename T>
31void range_check(const T value, const T min, const T max)
32{
33 if (!((value >= min) && (value <= max))) {
34 std::stringstream out;
35 out << "Value " << value << " should be in interval [" << min << "," << max << "] but is not!";
36 throw std::invalid_argument(out.str());
37 }
38}
39
40/** @see date_time_check */
41void rfc3339_date_check(const std::string &value)
42{
43 const static REGEX_NAMESPACE::regex dateRegex{R"(^([0-9]{4})\-([0-9]{2})\-([0-9]{2})$)"};
44
45 REGEX_NAMESPACE::smatch matches;
46 if (!REGEX_NAMESPACE::regex_match(value, matches, dateRegex)) {
47 throw std::invalid_argument(value + " is not a date string according to RFC 3339.");
48 }
49
50 const auto year = std::stoi(matches[1].str());
51 const auto month = std::stoi(matches[2].str());
52 const auto mday = std::stoi(matches[3].str());
53
54 const auto isLeapYear = (year % 4 == 0) && ((year % 100 != 0) || (year % 400 == 0));
55
56 range_check(month, 1, 12);
57 if (month == 2) {
58 range_check(mday, 1, isLeapYear ? 29 : 28);
59 } else if (month <= 7) {
60 range_check(mday, 1, month % 2 == 0 ? 30 : 31);
61 } else {
62 range_check(mday, 1, month % 2 == 0 ? 31 : 30);
63 }
64}
65
66/** @see date_time_check */
67void rfc3339_time_check(const std::string &value)
68{
69 const static REGEX_NAMESPACE::regex timeRegex{R"(^([0-9]{2})\:([0-9]{2})\:([0-9]{2})(\.[0-9]+)?(?:[Zz]|((?:\+|\-)[0-9]{2})\:([0-9]{2}))$)"};
70
71 REGEX_NAMESPACE::smatch matches;
72 if (!REGEX_NAMESPACE::regex_match(value, matches, timeRegex)) {
73 throw std::invalid_argument(value + " is not a time string according to RFC 3339.");
74 }
75
76 auto hour = std::stoi(matches[1].str());
77 auto minute = std::stoi(matches[2].str());
78 auto second = std::stoi(matches[3].str());
79 // const auto secfrac = std::stof( matches[4].str() );
80
81 range_check(hour, 0, 23);
82 range_check(minute, 0, 59);
83
84 int offsetHour = 0,
85 offsetMinute = 0;
86
87 /* don't check the numerical offset if time zone is specified as 'Z' */
88 if (!matches[5].str().empty()) {
89 offsetHour = std::stoi(matches[5].str());
90 offsetMinute = std::stoi(matches[6].str());
91
92 range_check(offsetHour, -23, 23);
93 range_check(offsetMinute, 0, 59);
94 if (offsetHour < 0)
95 offsetMinute *= -1;
96 }
97
98 /**
99 * @todo Could be made more exact by querying a leap second database and choosing the
100 * correct maximum in {58,59,60}. This current solution might match some invalid dates
101 * but it won't lead to false negatives. This only works if we know the full date, however
102 */
103
104 auto day_minutes = hour * 60 + minute - (offsetHour * 60 + offsetMinute);
105 if (day_minutes < 0)
106 day_minutes += 60 * 24;
107 hour = day_minutes % 24;
108 minute = day_minutes / 24;
109
110 if (hour == 23 && minute == 59)
111 range_check(second, 0, 60); // possible leap-second
112 else
113 range_check(second, 0, 59);
114}
115
116/**
117 * @see https://tools.ietf.org/html/rfc3339#section-5.6
118 *
119 * @verbatim
120 * date-fullyear = 4DIGIT
121 * date-month = 2DIGIT ; 01-12
122 * date-mday = 2DIGIT ; 01-28, 01-29, 01-30, 01-31 based on
123 * ; month/year
124 * time-hour = 2DIGIT ; 00-23
125 * time-minute = 2DIGIT ; 00-59
126 * time-second = 2DIGIT ; 00-58, 00-59, 00-60 based on leap second
127 * ; rules
128 * time-secfrac = "." 1*DIGIT
129 * time-numoffset = ("+" / "-") time-hour ":" time-minute
130 * time-offset = "Z" / time-numoffset
131 *
132 * partial-time = time-hour ":" time-minute ":" time-second
133 * [time-secfrac]
134 * full-date = date-fullyear "-" date-month "-" date-mday
135 * full-time = partial-time time-offset
136 *
137 * date-time = full-date "T" full-time
138 * @endverbatim
139 * NOTE: Per [ABNF] and ISO8601, the "T" and "Z" characters in this
140 * syntax may alternatively be lower case "t" or "z" respectively.
141 */
142void rfc3339_date_time_check(const std::string &value)
143{
144 const static REGEX_NAMESPACE::regex dateTimeRegex{R"(^([0-9]{4}\-[0-9]{2}\-[0-9]{2})[Tt]([0-9]{2}\:[0-9]{2}\:[0-9]{2}(?:\.[0-9]+)?(?:[Zz]|(?:\+|\-)[0-9]{2}\:[0-9]{2}))$)"};
145
146 REGEX_NAMESPACE::smatch matches;
147 if (!REGEX_NAMESPACE::regex_match(value, matches, dateTimeRegex)) {
148 throw std::invalid_argument(value + " is not a date-time string according to RFC 3339.");
149 }
150
151 rfc3339_date_check(matches[1].str());
152 rfc3339_time_check(matches[2].str());
153}
154
155const std::string decOctet{R"((?:25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9]))"}; // matches numbers 0-255
156const std::string ipv4Address{"(?:" + decOctet + R"(\.){3})" + decOctet};
157const std::string h16{R"([0-9A-Fa-f]{1,4})"};
158const std::string h16Left{"(?:" + h16 + ":)"};
159const std::string ipv6Address{
160 "(?:"
161 "(?:" +
162 h16Left + "{6}"
163 "|::" +
164 h16Left + "{5}"
165 "|(?:" +
166 h16 + ")?::" + h16Left + "{4}"
167 "|(?:" +
168 h16Left + "{0,1}" + h16 + ")?::" + h16Left + "{3}"
169 "|(?:" +
170 h16Left + "{0,2}" + h16 + ")?::" + h16Left + "{2}"
171 "|(?:" +
172 h16Left + "{0,3}" + h16 + ")?::" + h16Left +
173 "|(?:" + h16Left + "{0,4}" + h16 + ")?::"
174 ")(?:" +
175 h16Left + h16 + "|" + ipv4Address + ")"
176 "|(?:" +
177 h16Left + "{0,5}" + h16 + ")?::" + h16 +
178 "|(?:" + h16Left + "{0,6}" + h16 + ")?::"
179 ")"};
180const std::string ipvFuture{R"([Vv][0-9A-Fa-f]+\.[A-Za-z0-9\-._~!$&'()*+,;=:]+)"};
181const std::string regName{R"((?:[A-Za-z0-9\-._~!$&'()*+,;=]|%[0-9A-Fa-f]{2})*)"};
182const std::string host{
183 "(?:"
184 R"(\‍[(?:)" +
185 ipv6Address + "|" + ipvFuture + R"()\‍])" +
186 "|" + ipv4Address +
187 "|" + regName +
188 ")"};
189
190const std::string uuid{R"([0-9a-fA-F]{8}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{12})"};
191
192// from http://stackoverflow.com/questions/106179/regular-expression-to-match-dns-hostname-or-ip-address
193const std::string hostname{R"(^([a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9\-]{0,61}[a-zA-Z0-9])(\.([a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9\-]{0,61}[a-zA-Z0-9]))*$)"};
194
195bool is_ascii(std::string const &value)
196{
197 for (auto ch : value) {
198 if (ch & 0x80) {
199 return false;
200 }
201 }
202 return true;
203}
204
205/**
206 * @see
207 *
208 * @verbatim
209 * URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
210 *
211 * hier-part = "//" authority path-abempty
212 * / path-absolute
213 * / path-rootless
214 * / path-empty
215 *
216 * URI-reference = URI / relative-ref
217 *
218 * absolute-URI = scheme ":" hier-part [ "?" query ]
219 *
220 * relative-ref = relative-part [ "?" query ] [ "#" fragment ]
221 *
222 * relative-part = "//" authority path-abempty
223 * / path-absolute
224 * / path-noscheme
225 * / path-empty
226 *
227 * scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
228 *
229 * authority = [ userinfo "@" ] host [ ":" port ]
230 * userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
231 * host = IP-literal / IPv4address / reg-name
232 * port = *DIGIT
233 *
234 * IP-literal = "[" ( IPv6address / IPvFuture ) "]"
235 *
236 * IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
237 *
238 * IPv6address = 6( h16 ":" ) ls32
239 * / "::" 5( h16 ":" ) ls32
240 * / [ h16 ] "::" 4( h16 ":" ) ls32
241 * / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
242 * / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
243 * / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
244 * / [ *4( h16 ":" ) h16 ] "::" ls32
245 * / [ *5( h16 ":" ) h16 ] "::" h16
246 * / [ *6( h16 ":" ) h16 ] "::"
247 *
248 * h16 = 1*4HEXDIG
249 * ls32 = ( h16 ":" h16 ) / IPv4address
250 * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
251 * dec-octet = DIGIT ; 0-9
252 * / %x31-39 DIGIT ; 10-99
253 * / "1" 2DIGIT ; 100-199
254 * / "2" %x30-34 DIGIT ; 200-249
255 * / "25" %x30-35 ; 250-255
256 *
257 * reg-name = *( unreserved / pct-encoded / sub-delims )
258 *
259 * path = path-abempty ; begins with "/" or is empty
260 * / path-absolute ; begins with "/" but not "//"
261 * / path-noscheme ; begins with a non-colon segment
262 * / path-rootless ; begins with a segment
263 * / path-empty ; zero characters
264 *
265 * path-abempty = *( "/" segment )
266 * path-absolute = "/" [ segment-nz *( "/" segment ) ]
267 * path-noscheme = segment-nz-nc *( "/" segment )
268 * path-rootless = segment-nz *( "/" segment )
269 * path-empty = 0<pchar>
270 *
271 * segment = *pchar
272 * segment-nz = 1*pchar
273 * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
274 * ; non-zero-length segment without any colon ":"
275 *
276 * pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
277 *
278 * query = *( pchar / "/" / "?" )
279 *
280 * fragment = *( pchar / "/" / "?" )
281 *
282 * pct-encoded = "%" HEXDIG HEXDIG
283 *
284 * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
285 * reserved = gen-delims / sub-delims
286 * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
287 * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
288 * / "*" / "+" / "," / ";" / "="
289 *
290 * @endverbatim
291 * @see adapted from: https://github.com/jhermsmeier/uri.regex/blob/master/uri.regex
292 *
293 */
294void rfc3986_uri_check(const std::string &value)
295{
296 const static std::string scheme{R"(([A-Za-z][A-Za-z0-9+\-.]*):)"};
297 const static std::string hierPart{
298 R"((?:(\/\/)(?:((?:[A-Za-z0-9\-._~!$&'()*+,;=:]|)"
299 R"(%[0-9A-Fa-f]{2})*)@)?((?:\‍[(?:(?:(?:(?:[0-9A-Fa-f]{1,4}:){6}|)"
300 R"(::(?:[0-9A-Fa-f]{1,4}:){5}|)"
301 R"((?:[0-9A-Fa-f]{1,4})?::(?:[0-9A-Fa-f]{1,4}:){4}|)"
302 R"((?:(?:[0-9A-Fa-f]{1,4}:){0,1}[0-9A-Fa-f]{1,4})?::(?:[0-9A-Fa-f]{1,4}:){3}|)"
303 R"((?:(?:[0-9A-Fa-f]{1,4}:){0,2}[0-9A-Fa-f]{1,4})?::(?:[0-9A-Fa-f]{1,4}:){2}|)"
304 R"((?:(?:[0-9A-Fa-f]{1,4}:){0,3}[0-9A-Fa-f]{1,4})?::[0-9A-Fa-f]{1,4}:|)"
305 R"((?:(?:[0-9A-Fa-f]{1,4}:){0,4}[0-9A-Fa-f]{1,4})?::)(?:[0-9A-Fa-f]{1,4}:[0-9A-Fa-f]{1,4}|)"
306 R"((?:(?:25[0-5]|2[0-4][0-9]|)"
307 R"([01]?[0-9][0-9]?)\.){3}(?:25[0-5]|)"
308 R"(2[0-4][0-9]|)"
309 R"([01]?[0-9][0-9]?))|)"
310 R"((?:(?:[0-9A-Fa-f]{1,4}:){0,5}[0-9A-Fa-f]{1,4})?::[0-9A-Fa-f]{1,4}|)"
311 R"((?:(?:[0-9A-Fa-f]{1,4}:){0,6}[0-9A-Fa-f]{1,4})?::)|)"
312 R"([Vv][0-9A-Fa-f]+\.[A-Za-z0-9\-._~!$&'()*+,;=:]+)\‍]|)"
313 R"((?:(?:25[0-5]|)"
314 R"(2[0-4][0-9]|)"
315 R"([01]?[0-9][0-9]?)\.){3}(?:25[0-5]|)"
316 R"(2[0-4][0-9]|)"
317 R"([01]?[0-9][0-9]?)|)"
318 R"((?:[A-Za-z0-9\-._~!$&'()*+,;=]|)"
319 R"(%[0-9A-Fa-f]{2})*))(?::([0-9]*))?((?:\/(?:[A-Za-z0-9\-._~!$&'()*+,;=:@]|)"
320 R"(%[0-9A-Fa-f]{2})*)*)|)"
321 R"(\/((?:(?:[A-Za-z0-9\-._~!$&'()*+,;=:@]|)"
322 R"(%[0-9A-Fa-f]{2})+(?:\/(?:[A-Za-z0-9\-._~!$&'()*+,;=:@]|)"
323 R"(%[0-9A-Fa-f]{2})*)*)?)|)"
324 R"(((?:[A-Za-z0-9\-._~!$&'()*+,;=:@]|)"
325 R"(%[0-9A-Fa-f]{2})+(?:\/(?:[A-Za-z0-9\-._~!$&'()*+,;=:@]|)"
326 R"(%[0-9A-Fa-f]{2})*)*)|))"};
327
328 const static std::string query{R"((?:\?((?:[A-Za-z0-9\-._~!$&'()*+,;=:@\/?]|%[0-9A-Fa-f]{2})*))?)"};
329 const static std::string fragment{
330 R"((?:\#((?:[A-Za-z0-9\-._~!$&'()*+,;=:@\/?]|%[0-9A-Fa-f]{2})*))?)"};
331 const static std::string uriFormat{scheme + hierPart + query + fragment};
332
333 const static REGEX_NAMESPACE::regex uriRegex{uriFormat};
334
335 if (!REGEX_NAMESPACE::regex_match(value, uriRegex)) {
336 throw std::invalid_argument(value + " is not a URI string according to RFC 3986.");
337 }
338}
339
340} // namespace
341
342namespace nlohmann
343{
344namespace json_schema
345{
346/**
347 * Checks validity for built-ins by converting the definitions given as ABNF in the linked RFC from
348 * @see https://json-schema.org/understanding-json-schema/reference/string.html#built-in-formats
349 * into regular expressions using @see https://www.msweet.org/abnf/ and some manual editing.
350 *
351 * @see https://json-schema.org/latest/json-schema-validation.html
352 */
353void default_string_format_check(const std::string &format, const std::string &value)
354{
355 if (format == "date-time") {
357 } else if (format == "date") {
359 } else if (format == "time") {
361 } else if (format == "uri") {
363 } else if (format == "email") {
364 if (!is_ascii(value)) {
365 throw std::invalid_argument(value + " contains non-ASCII values, not RFC 5321 compliant.");
366 }
367 if (!is_address(&*value.begin(), &*value.end())) {
368 throw std::invalid_argument(value + " is not a valid email according to RFC 5321.");
369 }
370 } else if (format == "idn-email") {
371 if (!is_address(&*value.begin(), &*value.end())) {
372 throw std::invalid_argument(value + " is not a valid idn-email according to RFC 6531.");
373 }
374 } else if (format == "hostname") {
375 static const REGEX_NAMESPACE::regex hostRegex{hostname};
376 if (!REGEX_NAMESPACE::regex_match(value, hostRegex)) {
377 throw std::invalid_argument(value + " is not a valid hostname according to RFC 3986 Appendix A.");
378 }
379 } else if (format == "ipv4") {
380 const static REGEX_NAMESPACE::regex ipv4Regex{"^" + ipv4Address + "$"};
381 if (!REGEX_NAMESPACE::regex_match(value, ipv4Regex)) {
382 throw std::invalid_argument(value + " is not an IPv4 string according to RFC 2673.");
383 }
384 } else if (format == "ipv6") {
385 static const REGEX_NAMESPACE::regex ipv6Regex{ipv6Address};
386 if (!REGEX_NAMESPACE::regex_match(value, ipv6Regex)) {
387 throw std::invalid_argument(value + " is not an IPv6 string according to RFC 5954.");
388 }
389 } else if (format == "uuid") {
390 static const REGEX_NAMESPACE::regex uuidRegex{uuid};
391 if (!REGEX_NAMESPACE::regex_match(value, uuidRegex)) {
392 throw std::invalid_argument(value + " is not an uuid string according to RFC 4122.");
393 }
394 } else if (format == "regex") {
395 try {
396 REGEX_NAMESPACE::regex re(value, std::regex::ECMAScript);
397 } catch (std::exception &exception) {
398 throw exception;
399 }
400 } else {
401 /* yet unsupported JSON schema draft 7 built-ins */
402 static const std::vector<std::string> jsonSchemaStringFormatBuiltIns{
403 "date-time", "time", "date", "email", "idn-email", "hostname", "idn-hostname", "ipv4", "ipv6", "uri",
404 "uri-reference", "iri", "iri-reference", "uri-template", "json-pointer", "relative-json-pointer", "regex"};
405 if (std::find(jsonSchemaStringFormatBuiltIns.begin(), jsonSchemaStringFormatBuiltIns.end(), format) != jsonSchemaStringFormatBuiltIns.end()) {
406 throw std::logic_error("JSON schema string format built-in " + format + " not yet supported. " +
407 "Please open an issue or use a custom format checker.");
408 }
409
410 throw std::logic_error("Don't know how to validate " + format);
411 }
412}
413} // namespace json_schema
414} // namespace nlohmann
#define REGEX_NAMESPACE
void range_check(const T value, const T min, const T max)
void rfc3339_date_time_check(const std::string &value)
void rfc3339_time_check(const std::string &value)
void rfc3339_date_check(const std::string &value)
void default_string_format_check(const std::string &format, const std::string &value)
bool is_address(const char *p, const char *pe)