8#include <lloyal/nlohmann/json.hpp>
17#include <unordered_map>
18#include <unordered_set>
35using json = nlohmann::ordered_json;
40 std::function<std::string(
const std::string &,
const std::string &)>
add_rule;
77inline constexpr const char *
SPACE_RULE =
"| \" \" | \"\\n\"{1,2} [ \\t]{0,20}";
81 std::vector<std::string>
deps;
92 {
"boolean", {
"(\"true\" | \"false\") space", {}}},
93 {
"decimal-part", {
"[0-9]{1,16}", {}}},
94 {
"integral-part", {
"[0] | [1-9] [0-9]{0,15}", {}}},
96 {
"(\"-\"? integral-part) (\".\" decimal-part)? ([eE] [-+]? "
97 "integral-part)? space",
98 {
"integral-part",
"decimal-part"}}},
99 {
"integer", {
"(\"-\"? integral-part) space", {
"integral-part"}}},
101 {
"object | array | string | number | boolean | null",
102 {
"object",
"array",
"string",
"number",
"boolean",
"null"}}},
104 {
"\"{\" space ( string \":\" space value (\",\" space string \":\" space "
105 "value)* )? \"}\" space",
106 {
"string",
"value"}}},
108 {
"\"[\" space ( value (\",\" space value)* )? \"]\" space", {
"value"}}},
110 {
"\"\\\"\" [0-9a-fA-F]{8} \"-\" [0-9a-fA-F]{4} \"-\" [0-9a-fA-F]{4} \"-\" "
111 "[0-9a-fA-F]{4} \"-\" [0-9a-fA-F]{12} \"\\\"\" space",
114 {
"[^\"\\\\\\x7F\\x00-\\x1F] | [\\\\] ([\"\\\\bfnrt] | \"u\" "
117 {
"string", {
"\"\\\"\" char* \"\\\"\" space", {
"char"}}},
118 {
"null", {
"\"null\" space", {}}},
130 {
"[0-9]{4} \"-\" ( \"0\" [1-9] | \"1\" [0-2] ) \"-\" ( \"0\" [1-9] | "
131 "[1-2] [0-9] | \"3\" [0-1] )",
134 {
"([01] [0-9] | \"2\" [0-3]) \":\" [0-5] [0-9] \":\" [0-5] [0-9] ( \".\" "
135 "[0-9]{3} )? ( \"Z\" | ( \"+\" | \"-\" ) ( [01] [0-9] | \"2\" [0-3] ) "
136 "\":\" [0-5] [0-9] )",
138 {
"date-time", {
"date \"T\" time", {
"date",
"time"}}},
139 {
"date-string", {
"\"\\\"\" date \"\\\"\" space", {
"date"}}},
140 {
"time-string", {
"\"\\\"\" time \"\\\"\" space", {
"time"}}},
142 {
"\"\\\"\" date-time \"\\\"\" space", {
"date-time"}}}};
157 static std::unordered_set<std::string> RESERVED_NAMES;
158 if (RESERVED_NAMES.empty()) {
159 RESERVED_NAMES.insert(
"root");
161 RESERVED_NAMES.insert(p.first);
163 RESERVED_NAMES.insert(p.first);
165 return RESERVED_NAMES.find(name) != RESERVED_NAMES.end();
174 {
'\r',
"\\r"}, {
'\n',
"\\n"}, {
'"',
"\\\""}, {
'-',
"\\-"}, {
']',
"\\]"}};
177 '|',
'.',
'(',
')',
'[',
']',
'{',
'}',
'*',
'+',
'?'};
179 '^',
'$',
'.',
'[',
']',
'(',
')',
'|',
'{',
'}',
'*',
'+',
'?'};
185 const std::string &separator_rule =
"") {
186 auto has_max = max_items != std::numeric_limits<int>::max();
188 if (max_items == 0) {
191 if (min_items == 0 && max_items == 1) {
192 return item_rule +
"?";
195 if (separator_rule.empty()) {
196 if (min_items == 1 && !has_max) {
197 return item_rule +
"+";
198 }
else if (min_items == 0 && !has_max) {
199 return item_rule +
"*";
201 return item_rule +
"{" + std::to_string(min_items) +
"," +
202 (has_max ? std::to_string(max_items) :
"") +
"}";
206 auto result = item_rule +
" " +
208 min_items == 0 ? 0 : min_items - 1,
209 has_max ? max_items - 1 : max_items);
210 if (min_items == 0) {
211 result =
"(" + result +
")?";
217 std::stringstream &out,
int decimals_left = 16,
218 bool top_level =
true) {
219 auto has_min = min_value != std::numeric_limits<int>::min();
220 auto has_max = max_value != std::numeric_limits<int>::max();
222 auto digit_range = [&](
char from,
char to) {
227 out << from <<
"-" << to;
231 auto more_digits = [&](
int min_digits,
int max_digits) {
233 if (min_digits == max_digits && min_digits == 1) {
238 if (max_digits != min_digits) {
240 if (max_digits != std::numeric_limits<int>::max()) {
246 std::function<void(
const std::string_view &,
const std::string_view &)>
248 [&](
const std::string_view &from,
const std::string_view &to) {
250 while (i < from.length() && i < to.length() && from[i] == to[i]) {
254 out <<
"\"" << from.substr(0, i) <<
"\"";
256 if (i < from.length() && i < to.length()) {
260 auto sub_len = from.length() - i - 1;
262 auto from_sub = from.substr(i + 1);
263 auto to_sub = to.substr(i + 1);
267 auto to_reached =
false;
269 if (from_sub == sub_zeros) {
270 digit_range(from[i], to[i] - 1);
272 more_digits(sub_len, sub_len);
274 out <<
"[" << from[i] <<
"] ";
276 uniform_range(from_sub, sub_nines);
278 if (from[i] < to[i] - 1) {
280 if (to_sub == sub_nines) {
281 digit_range(from[i] + 1, to[i]);
284 digit_range(from[i] + 1, to[i] - 1);
287 more_digits(sub_len, sub_len);
292 digit_range(to[i], to[i]);
294 uniform_range(sub_zeros, to_sub);
298 out <<
"[" << from[i] <<
"-" << to[i] <<
"]";
303 if (has_min && has_max) {
304 if (min_value < 0 && max_value < 0) {
320 auto min_s = std::to_string(min_value);
321 auto max_s = std::to_string(max_value);
322 auto min_digits = min_s.length();
323 auto max_digits = max_s.length();
325 for (
auto digits = min_digits; digits < max_digits; digits++) {
330 uniform_range(min_s, max_s);
334 auto less_decimals = std::max(decimals_left - 1, 1);
340 decimals_left,
false);
341 out <<
") | [0] | [1-9] ";
342 more_digits(0, decimals_left - 1);
343 }
else if (min_value == 0) {
345 out <<
"[0] | [1-9] ";
346 more_digits(0, less_decimals);
348 more_digits(1, decimals_left);
350 }
else if (min_value <= 9) {
351 char c =
'0' + min_value;
352 auto range_start = top_level ?
'1' :
'0';
353 if (c > range_start) {
354 digit_range(range_start, c - 1);
356 more_digits(1, less_decimals);
361 more_digits(0, less_decimals);
363 auto min_s = std::to_string(min_value);
364 auto len = min_s.length();
368 digit_range(top_level ?
'1' :
'0', c - 1);
370 more_digits(len, less_decimals);
376 std::numeric_limits<int>::max(), out, less_decimals,
381 digit_range(c + 1,
'9');
383 more_digits(len - 1, less_decimals);
390 if (max_value >= 0) {
392 out <<
"\"-\" [1-9] ";
393 more_digits(0, less_decimals);
401 decimals_left,
false);
407 throw std::runtime_error(
408 "At least one of min_value or max_value must be set");
412 const std::string &input,
const std::regex ®ex,
413 const std::function<std::string(
const std::smatch &)> &replacement) {
417 std::string::const_iterator searchStart(input.cbegin());
418 std::string::const_iterator searchEnd(input.cend());
420 while (std::regex_search(searchStart, searchEnd, match, regex)) {
421 result.append(searchStart, searchStart + match.position());
422 result.append(replacement(match));
423 searchStart = match.suffix().first;
426 result.append(searchStart, searchEnd);
433 [&](
const std::smatch &match) {
434 char c = match.str()[0];
437 return "\"" + escaped +
"\"";
441class SchemaConverter;
448build_grammar(
const std::function<
void(
const common_grammar_builder &)> &cb,
449 const common_grammar_options &options);
462 std::function<
json(
const std::string &)> _fetch_json;
464 std::map<std::string, std::string> _rules;
465 std::unordered_map<std::string, json> _refs;
466 std::unordered_set<std::string> _refs_being_resolved;
467 std::vector<std::string> _errors;
468 std::vector<std::string> _warnings;
470 std::string _add_rule(
const std::string &name,
const std::string &rule);
471 std::string _generate_union_rule(
const std::string &name,
472 const std::vector<json> &alt_schemas);
473 std::string _visit_pattern(
const std::string &pattern,
474 const std::string &name);
475 std::string _not_strings(
const std::vector<std::string> &strings);
476 std::string _resolve_ref(
const std::string &ref);
477 std::string _build_object_rule(
478 const std::vector<std::pair<std::string, json>> &properties,
479 const std::unordered_set<std::string> &required,
const std::string &name,
480 const json &additional_properties);
481 std::string _add_primitive(
const std::string &name,
const BuiltinRule &rule);
485 const std::function<
json(
const std::string &)> &fetch_json,
bool dotall)
486 : _fetch_json(fetch_json), _dotall(dotall) {
522 std::string
visit(
const json &schema,
const std::string &name);
531inline std::string SchemaConverter::_add_rule(
const std::string &name,
532 const std::string &rule) {
534 if (_rules.find(esc_name) == _rules.end() || _rules[esc_name] == rule) {
535 _rules[esc_name] = rule;
539 while (_rules.find(esc_name + std::to_string(i)) != _rules.end() &&
540 _rules[esc_name + std::to_string(i)] != rule) {
543 std::string key = esc_name + std::to_string(i);
550SchemaConverter::_generate_union_rule(
const std::string &name,
551 const std::vector<json> &alt_schemas) {
552 std::vector<std::string> rules;
553 for (
size_t i = 0; i < alt_schemas.size(); i++) {
555 visit(alt_schemas[i], name + (name.empty() ?
"alternative-" :
"-") +
573inline std::string SchemaConverter::_visit_pattern(
const std::string &pattern,
574 const std::string &name) {
575 if (!(pattern.front() ==
'^' && pattern.back() ==
'$')) {
576 _errors.push_back(
"Pattern must start with '^' and end with '$'");
579 std::string sub_pattern = pattern.substr(1, pattern.length() - 2);
580 std::unordered_map<std::string, std::string> sub_rule_ids;
583 size_t length = sub_pattern.length();
585 using literal_or_rule = std::pair<std::string, bool>;
586 auto to_rule = [&](
const literal_or_rule &ls) {
587 auto is_literal = ls.second;
589 return is_literal ?
"\"" + s +
"\"" : s;
591 std::function<literal_or_rule()> transform = [&]() -> literal_or_rule {
593 std::vector<literal_or_rule> seq;
595 auto get_dot = [&]() {
598 rule =
"[\\U00000000-\\U0010FFFF]";
600 rule =
"[^\\x0A\\x0D]";
602 return _add_rule(
"dot", rule);
606 auto join_seq = [&]() {
607 std::vector<literal_or_rule> ret;
610 auto flush_literal = [&]() {
611 if (literal.empty()) {
614 ret.emplace_back(literal,
true);
619 for (
const auto &item : seq) {
620 auto is_literal = item.second;
622 literal += item.first;
630 std::vector<std::string> results;
631 for (
const auto &item : ret) {
632 results.push_back(to_rule(item));
638 char c = sub_pattern[i];
640 seq.emplace_back(get_dot(),
false);
642 }
else if (c ==
'(') {
645 if (sub_pattern[i] ==
'?') {
646 _warnings.push_back(
"Unsupported pattern syntax");
649 seq.emplace_back(
"(" + to_rule(transform()) +
")",
false);
650 }
else if (c ==
')') {
652 if (start > 0 && sub_pattern[start - 1] !=
'(') {
653 _errors.push_back(
"Unbalanced parentheses");
656 }
else if (c ==
'[') {
657 std::string square_brackets = std::string(1, c);
659 while (i < length && sub_pattern[i] !=
']') {
660 if (sub_pattern[i] ==
'\\') {
661 square_brackets += sub_pattern.substr(i, 2);
664 square_brackets += sub_pattern[i];
669 _errors.push_back(
"Unbalanced square brackets");
671 square_brackets +=
']';
673 seq.emplace_back(square_brackets,
false);
674 }
else if (c ==
'|') {
675 seq.emplace_back(
"|",
false);
677 }
else if (c ==
'*' || c ==
'+' || c ==
'?') {
678 seq.back() = std::make_pair(to_rule(seq.back()) + c,
false);
680 }
else if (c ==
'{') {
681 std::string curly_brackets = std::string(1, c);
683 while (i < length && sub_pattern[i] !=
'}') {
684 curly_brackets += sub_pattern[i];
688 _errors.push_back(
"Unbalanced curly brackets");
690 curly_brackets +=
'}';
693 curly_brackets.substr(1, curly_brackets.length() - 2),
",");
695 int max_times = std::numeric_limits<int>::max();
697 if (nums.size() == 1) {
698 min_times = max_times = std::stoi(nums[0]);
699 }
else if (nums.size() != 2) {
700 _errors.push_back(
"Wrong number of values in curly brackets");
702 if (!nums[0].empty()) {
703 min_times = std::stoi(nums[0]);
705 if (!nums[1].empty()) {
706 max_times = std::stoi(nums[1]);
709 }
catch (
const std::invalid_argument &e) {
710 _errors.push_back(
"Invalid number in curly brackets");
711 return std::make_pair(
"",
false);
713 auto &last = seq.back();
714 auto &sub = last.first;
715 auto sub_is_literal = last.second;
717 if (!sub_is_literal) {
718 std::string &sub_id = sub_rule_ids[sub];
719 if (sub_id.empty()) {
720 sub_id = _add_rule(name +
"-" + std::to_string(sub_rule_ids.size()),
726 sub_is_literal ?
"\"" + sub +
"\"" : sub, min_times, max_times,
"");
727 seq.back().second =
false;
730 auto is_non_literal = [&](
char c) {
734 if (sub_pattern[i] ==
'\\' && i < length - 1) {
735 char next = sub_pattern[i + 1];
739 literal += sub_pattern[i];
742 literal += sub_pattern.substr(i, 2);
745 }
else if (sub_pattern[i] ==
'"') {
748 }
else if (!is_non_literal(sub_pattern[i]) &&
749 (i == length - 1 || literal.empty() ||
750 sub_pattern[i + 1] ==
'.' ||
751 !is_non_literal(sub_pattern[i + 1]))) {
752 literal += sub_pattern[i];
758 if (!literal.empty()) {
759 seq.emplace_back(literal,
true);
765 return _add_rule(name,
766 "\"\\\"\" (" + to_rule(transform()) +
") \"\\\"\" space");
770SchemaConverter::_not_strings(
const std::vector<std::string> &strings) {
772 std::map<char, TrieNode> children;
773 bool is_end_of_string;
775 TrieNode() : is_end_of_string(false) {}
777 void insert(
const std::string &
string) {
779 for (
char c : string) {
780 node = &node->children[c];
782 node->is_end_of_string =
true;
787 for (
const auto &s : strings) {
791 std::string char_rule = _add_primitive(
"char",
PRIMITIVE_RULES.at(
"char"));
792 std::ostringstream out;
794 std::function<void(
const TrieNode &)>
visit = [&](
const TrieNode &node) {
795 std::ostringstream rejects;
797 for (
const auto &kv : node.children) {
804 out <<
"[" << kv.first <<
"]";
805 if (!kv.second.children.empty()) {
809 }
else if (kv.second.is_end_of_string) {
810 out <<
" " << char_rule <<
"+";
813 if (!node.children.empty()) {
817 out <<
"[^\"" << rejects.str() <<
"] " << char_rule <<
"*";
823 if (!trie.is_end_of_string) {
826 out <<
" [\"] space";
830inline std::string SchemaConverter::_resolve_ref(
const std::string &ref) {
831 std::string ref_name = ref.substr(ref.find_last_of(
'/') + 1);
832 if (_rules.find(ref_name) == _rules.end() &&
833 _refs_being_resolved.find(ref) == _refs_being_resolved.end()) {
834 _refs_being_resolved.insert(ref);
835 json resolved = _refs[ref];
836 ref_name =
visit(resolved, ref_name);
837 _refs_being_resolved.erase(ref);
842inline std::string SchemaConverter::_build_object_rule(
843 const std::vector<std::pair<std::string, json>> &properties,
844 const std::unordered_set<std::string> &required,
const std::string &name,
845 const json &additional_properties) {
846 std::vector<std::string> required_props;
847 std::vector<std::string> optional_props;
848 std::unordered_map<std::string, std::string> prop_kv_rule_names;
849 std::vector<std::string> prop_names;
850 for (
const auto &kv : properties) {
851 const auto &prop_name = kv.first;
852 const auto &prop_schema = kv.second;
854 std::string prop_rule_name =
855 visit(prop_schema, name + (name.empty() ?
"" :
"-") + prop_name);
856 prop_kv_rule_names[prop_name] =
857 _add_rule(name + (name.empty() ?
"" :
"-") + prop_name +
"-kv",
859 " space \":\" space " + prop_rule_name);
860 if (required.find(prop_name) != required.end()) {
861 required_props.push_back(prop_name);
863 optional_props.push_back(prop_name);
865 prop_names.push_back(prop_name);
867 if ((additional_properties.is_boolean() &&
868 additional_properties.get<
bool>()) ||
869 additional_properties.is_object()) {
870 std::string sub_name = name + (name.empty() ?
"" :
"-") +
"additional";
871 std::string value_rule =
872 additional_properties.is_object()
873 ?
visit(additional_properties, sub_name +
"-value")
876 auto key_rule = prop_names.empty()
878 : _add_rule(sub_name +
"-k", _not_strings(prop_names));
879 std::string kv_rule =
880 _add_rule(sub_name +
"-kv", key_rule +
" \":\" space " + value_rule);
881 prop_kv_rule_names[
"*"] = kv_rule;
882 optional_props.push_back(
"*");
885 std::string rule =
"\"{\" space ";
886 for (
size_t i = 0; i < required_props.size(); i++) {
888 rule +=
" \",\" space ";
890 rule += prop_kv_rule_names[required_props[i]];
893 if (!optional_props.empty()) {
895 if (!required_props.empty()) {
896 rule +=
" \",\" space ( ";
899 std::function<std::string(
const std::vector<std::string> &,
bool)>
900 get_recursive_refs = [&](
const std::vector<std::string> &ks,
901 bool first_is_optional) {
906 std::string k = ks[0];
907 std::string kv_rule_name = prop_kv_rule_names[k];
908 std::string comma_ref =
"( \",\" space " + kv_rule_name +
" )";
909 if (first_is_optional) {
910 res = comma_ref + (k ==
"*" ?
"*" :
"?");
912 res = kv_rule_name + (k ==
"*" ?
" " + comma_ref +
"*" :
"");
916 _add_rule(name + (name.empty() ?
"" :
"-") + k +
"-rest",
917 get_recursive_refs(
std::vector<
std::string>(
918 ks.begin() + 1, ks.end()),
924 for (
size_t i = 0; i < optional_props.size(); i++) {
928 rule += get_recursive_refs(
929 std::vector<std::string>(optional_props.begin() + i,
930 optional_props.end()),
933 if (!required_props.empty()) {
939 rule +=
" \"}\" space";
944inline std::string SchemaConverter::_add_primitive(
const std::string &name,
945 const BuiltinRule &rule) {
946 auto n = _add_rule(name, rule.content);
947 for (
const auto &dep : rule.deps) {
948 BuiltinRule dep_rule;
953 _errors.push_back(
"Rule " + dep +
" not known");
957 if (_rules.find(dep) == _rules.end()) {
958 _add_primitive(dep, it->second);
965 const std::string &url) {
971 std::function<void(
json &)> visit_refs = [&](
json &n) {
976 }
else if (n.is_object()) {
977 if (n.contains(
"$ref")) {
978 std::string ref = n[
"$ref"];
979 if (_refs.find(ref) == _refs.end()) {
981 if (ref.find(
"https://") == 0) {
982 std::string base_url = ref.substr(0, ref.find(
'#'));
983 auto it = _refs.find(base_url);
984 if (it != _refs.end()) {
988 auto referenced = _fetch_json(ref);
990 _refs[base_url] = referenced;
992 if (ref.find(
'#') == std::string::npos ||
993 ref.substr(ref.find(
'#') + 1).empty()) {
996 }
else if (ref.find(
"#/") == 0) {
998 n[
"$ref"] = url + ref;
1001 _errors.push_back(
"Unsupported ref: " + ref);
1004 std::string pointer = ref.substr(ref.find(
'#') + 1);
1006 for (
size_t i = 1; i < tokens.size(); ++i) {
1007 std::string sel = tokens[i];
1008 if (target.is_null() || !target.contains(sel)) {
1009 _errors.push_back(
"Error resolving ref " + ref +
": " + sel +
1010 " not in " + target.dump());
1013 target = target[sel];
1015 _refs[ref] = target;
1018 for (
auto &kv : n.items()) {
1019 visit_refs(kv.value());
1033 const std::string &name) {
1034 json schema_type = schema.contains(
"type") ? schema[
"type"] :
json();
1035 std::string schema_format =
1036 schema.contains(
"format") ? schema[
"format"].get<std::string>() :
"";
1038 : name.empty() ?
"root"
1041 if (schema.contains(
"$ref")) {
1042 return _add_rule(rule_name, _resolve_ref(schema[
"$ref"]));
1043 }
else if (schema.contains(
"oneOf") || schema.contains(
"anyOf")) {
1044 std::vector<json> alt_schemas =
1045 schema.contains(
"oneOf") ? schema[
"oneOf"].get<std::vector<json>>()
1046 : schema[
"anyOf"].get<std::vector<json>>();
1047 return _add_rule(rule_name, _generate_union_rule(name, alt_schemas));
1048 }
else if (schema_type.is_array()) {
1049 std::vector<json> schema_types;
1050 for (
const auto &t : schema_type) {
1051 json schema_copy(schema);
1052 schema_copy[
"type"] = t;
1053 schema_types.push_back(schema_copy);
1055 return _add_rule(rule_name, _generate_union_rule(name, schema_types));
1056 }
else if (schema.contains(
"const")) {
1057 return _add_rule(rule_name,
1059 }
else if (schema.contains(
"enum")) {
1060 std::vector<std::string> enum_values;
1061 for (
const auto &v : schema[
"enum"]) {
1064 return _add_rule(rule_name,
1066 }
else if ((schema_type.is_null() || schema_type ==
"object") &&
1067 (schema.contains(
"properties") ||
1068 (schema.contains(
"additionalProperties") &&
1069 schema[
"additionalProperties"] !=
true))) {
1070 std::unordered_set<std::string> required;
1071 if (schema.contains(
"required") && schema[
"required"].is_array()) {
1072 for (
const auto &item : schema[
"required"]) {
1073 if (item.is_string()) {
1074 required.insert(item.get<std::string>());
1078 std::vector<std::pair<std::string, json>> properties;
1079 if (schema.contains(
"properties")) {
1080 for (
const auto &prop : schema[
"properties"].items()) {
1081 properties.emplace_back(prop.key(), prop.value());
1084 return _add_rule(rule_name,
1085 _build_object_rule(properties, required, name,
1086 schema.contains(
"additionalProperties")
1087 ? schema[
"additionalProperties"]
1089 }
else if ((schema_type.is_null() || schema_type ==
"object" ||
1090 schema_type ==
"string") &&
1091 schema.contains(
"allOf")) {
1092 std::unordered_set<std::string> required;
1093 std::vector<std::pair<std::string, json>> properties;
1094 std::map<std::string, size_t> enum_values;
1095 std::string hybrid_name = name;
1096 std::function<void(
const json &,
bool)> add_component =
1097 [&](
const json &comp_schema,
bool is_required) {
1098 if (comp_schema.contains(
"$ref")) {
1099 add_component(_refs[comp_schema[
"$ref"]], is_required);
1100 }
else if (comp_schema.contains(
"properties")) {
1101 for (
const auto &prop : comp_schema[
"properties"].items()) {
1102 properties.emplace_back(prop.key(), prop.value());
1104 required.insert(prop.key());
1107 }
else if (comp_schema.contains(
"enum")) {
1108 for (
const auto &v : comp_schema[
"enum"]) {
1110 if (enum_values.find(rule) == enum_values.end()) {
1111 enum_values[rule] = 0;
1113 enum_values[rule] += 1;
1119 for (
auto &t : schema[
"allOf"]) {
1120 if (t.contains(
"anyOf")) {
1121 for (
auto &tt : t[
"anyOf"]) {
1122 add_component(tt,
false);
1125 add_component(t,
true);
1128 if (!enum_values.empty()) {
1129 std::vector<std::string> enum_intersection;
1130 for (
const auto &p : enum_values) {
1131 if (p.second == schema[
"allOf"].size()) {
1132 enum_intersection.push_back(p.first);
1135 if (!enum_intersection.empty()) {
1136 return _add_rule(rule_name,
1141 return _add_rule(rule_name, _build_object_rule(properties, required,
1142 hybrid_name,
json()));
1143 }
else if ((schema_type.is_null() || schema_type ==
"array") &&
1144 (schema.contains(
"items") || schema.contains(
"prefixItems"))) {
1146 schema.contains(
"items") ? schema[
"items"] : schema[
"prefixItems"];
1147 if (items.is_array()) {
1148 std::string rule =
"\"[\" space ";
1149 for (
size_t i = 0; i < items.size(); i++) {
1151 rule +=
" \",\" space ";
1153 rule +=
visit(items[i], name + (name.empty() ?
"" :
"-") +
"tuple-" +
1156 rule +=
" \"]\" space";
1157 return _add_rule(rule_name, rule);
1159 std::string item_rule_name =
1160 visit(items, name + (name.empty() ?
"" :
"-") +
"item");
1162 schema.contains(
"minItems") ? schema[
"minItems"].get<
int>() : 0;
1163 json max_items_json =
1164 schema.contains(
"maxItems") ? schema[
"maxItems"] :
json();
1165 int max_items = max_items_json.is_number_integer()
1166 ? max_items_json.get<
int>()
1167 : std::numeric_limits<int>::max();
1169 return _add_rule(rule_name,
1172 max_items,
"\",\" space") +
1175 }
else if ((schema_type.is_null() || schema_type ==
"string") &&
1176 schema.contains(
"pattern")) {
1177 return _visit_pattern(schema[
"pattern"], rule_name);
1178 }
else if ((schema_type.is_null() || schema_type ==
"string") &&
1179 std::regex_match(schema_format, std::regex(
"^uuid[1-5]?$"))) {
1180 return _add_primitive(rule_name ==
"root" ?
"root" : schema_format,
1182 }
else if ((schema_type.is_null() || schema_type ==
"string") &&
1185 auto prim_name = schema_format +
"-string";
1189 }
else if (schema_type ==
"string" &&
1190 (schema.contains(
"minLength") || schema.contains(
"maxLength"))) {
1191 std::string char_rule = _add_primitive(
"char",
PRIMITIVE_RULES.at(
"char"));
1193 schema.contains(
"minLength") ? schema[
"minLength"].get<
int>() : 0;
1194 int max_len = schema.contains(
"maxLength")
1195 ? schema[
"maxLength"].get<
int>()
1196 : std::numeric_limits<int>::max();
1200 }
else if (schema_type ==
"integer" &&
1201 (schema.contains(
"minimum") ||
1202 schema.contains(
"exclusiveMinimum") ||
1203 schema.contains(
"maximum") ||
1204 schema.contains(
"exclusiveMaximum"))) {
1205 int min_value = std::numeric_limits<int>::min();
1206 int max_value = std::numeric_limits<int>::max();
1207 if (schema.contains(
"minimum")) {
1208 min_value = schema[
"minimum"].get<
int>();
1209 }
else if (schema.contains(
"exclusiveMinimum")) {
1210 min_value = schema[
"exclusiveMinimum"].get<
int>() + 1;
1212 if (schema.contains(
"maximum")) {
1213 max_value = schema[
"maximum"].get<
int>();
1214 }
else if (schema.contains(
"exclusiveMaximum")) {
1215 max_value = schema[
"exclusiveMaximum"].get<
int>() - 1;
1217 std::stringstream out;
1221 return _add_rule(rule_name, out.str());
1222 }
else if (schema.empty() || schema_type ==
"object") {
1223 return _add_rule(rule_name,
1226 if (!schema_type.is_string() ||
1229 _errors.push_back(
"Unrecognized schema: " + schema.dump());
1234 return _add_primitive(rule_name ==
"root" ?
"root"
1235 : schema_type.get<std::string>(),
1241 if (!_errors.empty()) {
1242 throw std::runtime_error(
"JSON schema conversion failed:\n" +
1245 if (!_warnings.empty()) {
1246 fprintf(stderr,
"WARNING: JSON schema conversion was incomplete: %s\n",
1252 std::stringstream ss;
1253 for (
const auto &kv : _rules) {
1254 ss << kv.first <<
" ::= " << kv.second << std::endl;
1266#ifdef LLAMA_USE_LLGUIDANCE
1268 return "%llguidance {}\nstart: %json " + schema.dump();
1286 [&](
const std::string &name,
const std::string &rule) {
1287 return converter._add_rule(name, rule);
1290 [&](
const std::string &name,
const nlohmann::ordered_json &schema) {
1291 return converter.visit(schema, name ==
"root" ?
"" : name);
1294 [&](nlohmann::ordered_json &schema) {
1298 converter.check_errors();
1299 return converter.format_grammar();
nlohmann::ordered_json json
SchemaConverter(const std::function< json(const std::string &)> &fetch_json, bool dotall)
std::string format_grammar()
std::string visit(const json &schema, const std::string &name)
Convert schema node to GBNF rule.
std::string _generate_constant_rule(const json &value)
void resolve_refs(json &schema, const std::string &url)
Resolve $ref pointers in JSON schema.
std::regex GRAMMAR_RANGE_LITERAL_ESCAPE_RE("[\r\n\"\\]\\-\\\\]")
const std::unordered_map< std::string, BuiltinRule > STRING_FORMAT_RULES
Grammar rules for string format validation.
const std::unordered_set< char > NON_LITERAL_SET
std::string replacePattern(const std::string &input, const std::regex ®ex, const std::function< std::string(const std::smatch &)> &replacement)
void _build_min_max_int(int min_value, int max_value, std::stringstream &out, int decimals_left=16, bool top_level=true)
bool is_reserved_name(const std::string &name)
Check if name conflicts with GBNF reserved keywords.
const std::unordered_set< char > ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS
std::string format_literal(const std::string &literal)
const std::unordered_map< std::string, BuiltinRule > PRIMITIVE_RULES
Built-in grammar rules for JSON primitives.
std::regex INVALID_RULE_CHARS_RE("[^a-zA-Z0-9-]+")
std::string build_repetition(const std::string &item_rule, int min_items, int max_items, const std::string &separator_rule="")
std::regex GRAMMAR_LITERAL_ESCAPE_RE("[\r\n\"]")
constexpr const char * SPACE_RULE
const std::unordered_map< char, std::string > GRAMMAR_LITERAL_ESCAPES
JSON Schema to Grammar Converter (Header-Only)
std::string string_repeat(const std::string &str, size_t n)
std::string json_schema_to_grammar(const json &schema, bool force_gbnf=false)
Convert JSON schema to GBNF grammar.
std::string string_join(const std::vector< std::string > &values, const std::string &separator)
std::string build_grammar(const std::function< void(const common_grammar_builder &)> &cb, const common_grammar_options &options={})
Build grammar from callback.
std::vector< std::string > string_split(const std::string &str, const std::string &delimiter)
nlohmann::ordered_json json
std::function< std::string(const std::string &, const std::string &)> add_rule
std::function< void(json &)> resolve_refs
std::function< std::string(const std::string &, const json &)> add_schema
std::vector< std::string > deps