From: Axy Date: Tue, 10 Feb 2026 02:50:07 +0000 (+0100) Subject: This is not organized, but working json deserializer :D X-Git-Url: https://git.uwuaxy.net/?a=commitdiff_plain;h=5b247395caa1d9252389b861d07b514f131d2359;p=axy%2Fft%2Fmini-serde.git This is not organized, but working json deserializer :D --- 5b247395caa1d9252389b861d07b514f131d2359 diff --git a/miniserde/serde_data.py b/miniserde/serde_data.py new file mode 100644 index 0000000..6d2da19 --- /dev/null +++ b/miniserde/serde_data.py @@ -0,0 +1,336 @@ +from collections.abc import Callable +from typing import cast + + +class NullType: + pass + + +Null: NullType = NullType() + + +type JsonValue = str | int | float | dict[str, "JsonValue"] | list[ + "JsonValue" +] | bool | NullType +type ParseResult[T] = tuple[T, str] | None +type Parser[T] = Callable[[str], ParseResult[T]] + + +class ParseError(Exception): + def __init__(self, msg: str, at: str) -> None: + self.msg: str = msg + self.at: str = at + super().__init__(f"{msg}\n\nat: {at[:40]}") + + +def option_map[T, R](f: Callable[[T], R], val: T | None) -> R | None: + return f(val) if val is not None else None + + +def parser_map[T, M](m: Callable[[T], M], p: Parser[T]) -> Parser[M]: + return lambda s: option_map(lambda res: (m(res[0]), res[1]), p(s)) + + +def parser_flatten[T](p: Parser[T | None]) -> Parser[T]: + return lambda s: option_map( + lambda res: cast(tuple[T, str], res) if res[0] is not None else None, + p(s), + ) + + +def parser_default[T](p: Parser[T], default: T) -> Parser[T]: + return alt(p, value(default, null_parser)) + + +def recognize[T](p: Parser[T]) -> Parser[str]: + return lambda s: option_map( + lambda rem: (s[: len(s) - len(rem[1])], rem[1]), + p(s), + ) + + +def cut[T](p: Parser[T]) -> Parser[T]: + def inner(s: str) -> ParseResult[T]: + res: ParseResult[T] = p(s) + if res is None: + raise ParseError("Cut error: parser did not complete", s) + return res + + return inner + + +def tag(tag: str) -> Parser[str]: + return parser_flatten( + parser_map(lambda s: s if s.startswith(tag) else None, char) + ) + + +def char(s: str) -> ParseResult[str]: + return (s[0], s[1:]) if len(s) > 0 else None + + +def null_parser(s: str) -> ParseResult[str]: + return ("", s) + + +def value[T, V](val: V, p: Parser[T]) -> Parser[V]: + return parser_map(lambda _: val, p) + + +def alt[T](*choices: Parser[T]) -> Parser[T]: + return lambda s: next( + filter( + lambda e: e is not None, + map(lambda p: p(s), choices), + ), + None, + ) + + +def fold[T, _S, R]( + p: Parser[T], + f: Callable[[R, T], R], + acc: R, + min_n: int = 0, + max_n: int | None = None, + sep: Parser[_S] = null_parser, +) -> Parser[R]: + # no clean way to do this with lambdas i could figure out :< + def inner(s: str) -> ParseResult[R]: + nonlocal acc + count: int = 0 + curr_p: Parser[T] = p + while max_n is None or count < max_n: + next: ParseResult[T] = curr_p(s) + if next is None: + break + if count == 0: + curr_p = preceeded(sep, p) + count += 1 + acc = f(acc, next[0]) + s = next[1] + return (acc, s) if count >= min_n else None + + return inner + + +def many[T, _S]( + p: Parser[T], + min_n: int = 0, + max_n: int | None = None, + sep: Parser[_S] = null_parser, +) -> Parser[list[T]]: + return fold( + parser_map(lambda e: [e], p), list.__add__, [], min_n, max_n, sep + ) + + +def many_count[T, _S]( + p: Parser[T], + min_n: int = 0, + max_n: int | None = None, + sep: Parser[_S] = null_parser, +) -> Parser[int]: + return fold(value(1, p), int.__add__, 0, min_n, max_n, sep) + + +def pair[T, U](p1: Parser[T], p2: Parser[U]) -> Parser[tuple[T, U]]: + return lambda s: option_map( + lambda res1: parser_map(lambda res2: (res1[0], res2), p2)(res1[1]), + p1(s), + ) + + +def preceeded[_T0, T1](p1: Parser[_T0], p2: Parser[T1]) -> Parser[T1]: + return parser_map(lambda res: res[1], pair(p1, p2)) + + +def terminated[T0, _T1](p1: Parser[T0], p2: Parser[_T1]) -> Parser[T0]: + return parser_map(lambda res: res[0], pair(p1, p2)) + + +def delimited[_T0, T1, _T2]( + p1: Parser[_T0], p2: Parser[T1], p3: Parser[_T2] +) -> Parser[T1]: + return preceeded(p1, terminated(p2, p3)) + + +def one_of(chars: str) -> Parser[str]: + return alt(*map(tag, chars)) + + +def none_of(chars: str) -> Parser[str]: + return lambda s: char(s) if one_of(chars)(s) is None else None + + +def ascii_hexdigit(s: str) -> ParseResult[str]: + return one_of("0123456789abcdefABCDEF")(s) + + +def ascii_digit(s: str) -> ParseResult[str]: + return one_of("0123456789")(s) + + +# json impl + + +def json_whitespace(s: str) -> ParseResult[str]: + return recognize(many_count(one_of(" \r\n\t")))(s) + + +def json_num_sign(s: str) -> ParseResult[float]: + return parser_default(value(-1.0, tag("-")), 1.0)(s) + + +def json_num_digits(s: str) -> ParseResult[float]: + # the spec explicitly wants no leading zero except if it is only one + return parser_map( + lambda digits: float(digits), + alt( + tag("0"), + recognize(pair(one_of("123456789"), many_count(ascii_digit))), + ), + )(s) + + +def json_num_decimal(s: str) -> ParseResult[float]: + return preceeded( + tag("."), + parser_map( + lambda digits: float(digits) / (10.0 ** len(digits)), + cut(recognize(many_count(ascii_digit, 1))), + ), + )(s) + + +def json_num_exponent_sign(s: str) -> ParseResult[int]: + return alt(value(-1, tag("-")), value(1, alt(tag("+"), null_parser)))(s) + + +def json_num_exponent_digits(s: str) -> ParseResult[int]: + return parser_map( + lambda digits: int(digits, 10), + recognize(many_count(ascii_digit, 1)), + )(s) + + +def json_num_exponent(s: str) -> ParseResult[float]: + return parser_map( + lambda res: 10.0 ** (int.__add__(*res)), + preceeded( + one_of("eE"), + cut(pair(json_num_exponent_sign, json_num_exponent_digits)), + ), + )(s) + + +def json_num(s: str) -> ParseResult[float]: + op_flat: Callable[ + [Callable[[float, float], float]], + Callable[[tuple[float, float]], float], + ] = lambda f: lambda nums: f(*nums) + + digit_parser: Parser[float] = parser_map( + op_flat(float.__add__), + pair(json_num_digits, parser_default(json_num_decimal, 0.0)), + ) + signed_digits_parser: Parser[float] = parser_map( + op_flat(float.__mul__), pair(json_num_sign, digit_parser) + ) + return parser_map( + op_flat(float.__mul__), + pair(signed_digits_parser, parser_default(json_num_exponent, 1.0)), + )(s) + + +# the spec disallows control characters as well as the two escape-only chars +__str_codepoint_filter = '"\\' + "".join(chr(i) for i in range(0x0, 0x20)) + + +def json_str_codepoint(s: str) -> ParseResult[str]: + return none_of(__str_codepoint_filter)(s) + + +def json_str_escape(s: str) -> ParseResult[str]: + return preceeded( + tag("\\"), + cut( + alt( + value('"', tag('"')), + value("\\", tag("\\")), + value("/", tag("/")), + value("\b", tag("b")), + value("\f", tag("f")), + value("\b", tag("b")), + value("\r", tag("r")), + value("\b", tag("b")), + preceeded( + tag("u"), + parser_map( + lambda s: chr(int(s, 16)), + cut(recognize(many_count(ascii_hexdigit, 4, 4))), + ), + ), + ) + ), + )(s) + + +def json_str(s: str) -> ParseResult[str]: + return delimited( + tag('"'), + fold(alt(json_str_codepoint, json_str_escape), str.__add__, ""), + cut(tag('"')), + )(s) + + +def json_value(s: str) -> ParseResult[JsonValue]: + return delimited( + json_whitespace, + alt( + json_str, + json_num, + json_array, + json_object, + value(True, tag("true")), + value(False, tag("false")), + value(Null, tag("null")), + ), + json_whitespace, + )(s) + + +def json_array(s: str) -> ParseResult[list[JsonValue]]: + return delimited( + tag("["), + alt(many(json_value, sep=tag(",")), value([], json_whitespace)), + cut(tag("]")), + )(s) + + +def json_object_field(s: str) -> ParseResult[tuple[str, JsonValue]]: + return pair( + delimited(json_whitespace, cut(json_str), json_whitespace), + preceeded(cut(tag(":")), cut(json_value)), + )(s) + + +def json_object(s: str) -> ParseResult[dict[str, JsonValue]]: + return delimited( + tag("{"), + alt( + fold( + json_object_field, + lambda acc, field: acc | {field[0]: field[1]}, + dict[str, JsonValue](), + sep=tag(","), + ), + value({}, json_whitespace), + ), + cut(tag("}")), + )(s) + + +print(json_value("-12331.111e231")) +print(json_value('"hello world\\\\ \\" "')) +print(json_value('[123, -123e3, {"hello": "axy", "num": 421}]')) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..a8f43fe --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,2 @@ +[tool.black] +line-length = 79