import json
import bson
from biothings.utils.common import is_str
[docs]
def generate_json_schema(dmap):
    scalarmap = {
        str: "string",
        int: "integer",
        float: "number",
        bool: "boolean",
        bson.int64.Int64: "number",
        None: "null",
    }
    def merge_type(typ1, typ2):
        if isinstance(typ1, list):
            if isinstance(typ2, list):
                typ1.extend(typ2)
            else:
                typ1.append(typ2)
        elif isinstance(typ2, list):
            typ1 = [typ1] + typ1
        else:
            typ1 = [typ1, typ2]
        return list(set(typ1))
    schema = {}
    if isinstance(dmap, dict):
        for k in dmap:
            if is_str(k):
                esch = generate_json_schema(dmap[k])
                if schema:
                    if schema["type"] == "object":
                        # we just complete 'properties', key already defined previously
                        pass
                    elif schema["type"] == "array":
                        if not schema.get("properties"):
                            schema["properties"] = {}
                            schema["type"] = merge_type(schema["type"], "object")
                    elif isinstance(schema["type"], list):
                        assert set(schema["type"]) == {"object", "array"}
                    else:
                        raise Exception("Previous schema type not expected: %s" % schema["type"])
                else:
                    schema = {"type": "object", "properties": {}}
                schema["properties"][k] = esch
            # elif type(k) == type:    # TODO: remove this line
            elif isinstance(k, type):
                if k == list:
                    if schema:
                        # already defined for this key, mixed types
                        schema.update({"items": {}})
                        schema["type"] = merge_type(schema["type"], "array")
                    else:
                        schema = {"type": "array", "items": {}}
                    esch = generate_json_schema(dmap[k])
                    schema["items"] = generate_json_schema(dmap[k])
                else:
                    if schema:
                        schema["type"] = merge_type(schema["type"], scalarmap[k])
                    else:
                        schema = {"type": scalarmap[k]}
            elif k is None:
                schema = {"type": None}
            else:
                raise Exception("no not here, k: %s" % k)
    else:
        pass
    return schema 
[docs]
def test():
    # TODO: Move these test to tests folder, or maybe already moved over? Chunlei
    # can't use assert directly, as we can't ensure the order of types (for instance)
    import biothings.utils.jsondiff
    from biothings.utils.inspect import inspect_docs, typify_inspect_doc
    biothings.utils.jsondiff.UNORDERED_LIST = True
    jsondiff = biothings.utils.jsondiff.make
    # object
    td1 = {"i": {"a": 456}}
    s1 = {
        "properties": {
            "i": {
                "properties": {"a": {"type": "integer"}},
                "type": "object",
            }
        },
        "type": "object",
    }
    m = inspect_docs([td1], mode="type")["type"]
    gs = generate_json_schema(m)
    assert jsondiff(gs, s1) == [], "%s  !=\n%s" % (gs, s1)
    td5 = {"i": [1, 2, 3]}
    s5 = {
        "properties": {
            "i": {
                "items": {"type": "integer"},
                "type": "array",
            }
        },
        "type": "object",
    }
    m = inspect_docs([td5], mode="type")["type"]
    gs = generate_json_schema(m)
    assert jsondiff(gs, s5) == [], "%s  !=\n%s" % (gs, s5)
    # array of object
    td2 = {"i": [{"a": 123}]}
    s2 = {
        "properties": {
            "i": {
                "items": {
                    "properties": {"a": {"type": "integer"}},
                    "type": "object",
                },
                "type": "array",
            }
        },
        "type": "object",
    }
    m = inspect_docs([td2], mode="type")["type"]
    gs = generate_json_schema(m)
    assert jsondiff(gs, s2) == [], "%s  !=\n%s" % (gs, s2)
    # object in object
    td3 = {"i": {"a": {"b": 123}}}
    s3 = {
        "properties": {
            "i": {
                "properties": {
                    "a": {
                        "properties": {"b": {"type": "integer"}},
                        "type": "object",
                    }
                },
                "type": "object",
            }
        },
        "type": "object",
    }
    m = inspect_docs([td3], mode="type")["type"]
    gs = generate_json_schema(m)
    assert jsondiff(gs, s3) == [], "%s  !=\n%s" % (gs, s3)
    # mixed str/float in array
    td6 = {"i": [1, 2, "a"]}
    s6 = {
        "properties": {
            "i": {
                "items": {"type": ["integer", "string"]},
                "type": "array",
            }
        },
        "type": "object",
    }
    m = inspect_docs([td6], mode="type")["type"]
    gs = generate_json_schema(m)
    assert jsondiff(gs, s6) == [], "%s  !=\n%s" % (gs, s6)
    # mixed array/object
    td1 = {"i": {"a": 456}}
    td2 = {"i": [{"a": 123}]}
    s12 = {
        "properties": {
            "i": {
                "items": {
                    "properties": {"a": {"type": "integer"}},
                    "type": "object",
                },
                "properties": {"a": {"type": "integer"}},
                "type": ["array", "object"],
            }
        },
        "type": "object",
    }
    m = inspect_docs([td1, td2], mode="type")["type"]
    gs = generate_json_schema(m)
    assert jsondiff(gs, s12) == [], "%s  !=\n%s" % (gs, s12)
    # list of integer (list of things which are not objects)
    td4 = {"a": [5, 5, 3]}
    s4 = {
        "properties": {
            "a": {
                "items": {"type": "integer"},
                "type": "array",
            }
        },
        "type": "object",
    }
    m = inspect_docs([td4], mode="type")["type"]
    gs = generate_json_schema(m)
    assert jsondiff(gs, s4) == [], "%s  !=\n%s" % (gs, s4)
    td7 = {"i": {"a": 1, "b": 2}}
    s7 = {
        "type": "object",
        "properties": {
            "i": {
                "type": "object",
                "properties": {
                    "a": {"type": "integer"},
                    "b": {"type": "integer"},
                },
            }
        },
    }
    m = inspect_docs([td7], mode="type")["type"]
    gs = generate_json_schema(m)
    assert jsondiff(gs, s7) == [], "%s  !=\n%s" % (gs, s7)
    # int or list of int (not a list of dict, testing scalar there)
    td81 = {"i": 1}
    td82 = {"i": [2, 3]}
    s812 = {
        "properties": {
            "i": {
                "items": {"type": "integer"},
                "type": ["array", "integer"],
            }
        },
        "type": "object",
    }
    m = inspect_docs([td81, td82], mode="type")["type"]
    gs = generate_json_schema(m)
    assert jsondiff(gs, s812) == [], "%s  !=\n%s" % (gs, s812)
    # run from app folder, biothings as symlink
    # small real-life collection
    cgi_schema = json.load(open("biothings/tests/cgi_schema.json"))
    cgi_map = typify_inspect_doc(json.load(open("biothings/tests/cgi_map.json")))
    schema = generate_json_schema(cgi_map)
    assert jsondiff(cgi_schema, schema) == []
    clinvar_schema = json.load(open("biothings/tests/clinvar_schema.json"))
    clinvar_map = typify_inspect_doc(json.load(open("biothings/tests/clinvar_map.json")))
    schema = generate_json_schema(clinvar_map)
    assert jsondiff(clinvar_schema, schema) == []
    mygene_schema = json.load(open("biothings/tests/mygene_schema.json"))
    mygene_map = typify_inspect_doc(json.load(open("biothings/tests/mygene_map.json")))
    schema = generate_json_schema(mygene_map)
    assert jsondiff(mygene_schema, schema) == []
    print("All test OK") 
if __name__ == "__main__":
    test()