diff --git a/Pipfile b/Pipfile index 64aad5ac0..46aa589da 100755 --- a/Pipfile +++ b/Pipfile @@ -40,6 +40,7 @@ sphinx = ">=1.8,<2" [packages] # Make sure to keep in sync with setup.py requirements. ciso8601 = ">=2.1.3,<3" +dataclasses-json = ">=0.5.2,<0.6" funcy = ">=1.7.3,<2" graphql-core = ">=3.1.2,<3.2" # minor versions sometimes contain breaking changes six = ">=1.10.0" diff --git a/Pipfile.lock b/Pipfile.lock index d14382137..5f4c91e6e 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "bc634468beba3b9a3df60a32beadbcb92d0dc46476868a86cc304d1d5093f28a" + "sha256": "91599e4b711c9bcae2f412debca81714239825a5bdf9fab2dee3ce5f92c661b1" }, "pipfile-spec": 6, "requires": { @@ -23,6 +23,14 @@ "index": "pypi", "version": "==2.1.3" }, + "dataclasses-json": { + "hashes": [ + "sha256:56ec931959ede74b5dedf65cf20772e6a79764d20c404794cce0111c88c085ff", + "sha256:b746c48d9d8e884e2a0ffa59c6220a1b21f94d4f9f12c839da0a8a0efd36dc19" + ], + "index": "pypi", + "version": "==0.5.2" + }, "funcy": { "hashes": [ "sha256:65b746fed572b392d886810a98d56939c6e0d545abb750527a717c21ced21008", @@ -43,6 +51,28 @@ "index": "pypi", "version": "==3.1.4" }, + "marshmallow": { + "hashes": [ + "sha256:0dd42891a5ef288217ed6410917f3c6048f585f8692075a0052c24f9bfff9dfd", + "sha256:16e99cb7f630c0ef4d7d364ed0109ac194268dde123966076ab3dafb9ae3906b" + ], + "markers": "python_version >= '3.5'", + "version": "==3.11.1" + }, + "marshmallow-enum": { + "hashes": [ + "sha256:38e697e11f45a8e64b4a1e664000897c659b60aa57bfa18d44e226a9920b6e58", + "sha256:57161ab3dbfde4f57adeb12090f39592e992b9c86d206d02f6bd03ebec60f072" + ], + "version": "==1.5.1" + }, + "mypy-extensions": { + "hashes": [ + "sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d", + "sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8" + ], + "version": "==0.4.3" + }, "six": { "hashes": [ "sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259", @@ -90,6 +120,28 @@ ], "index": "pypi", "version": "==1.3.24" + }, + "stringcase": { + "hashes": [ + "sha256:48a06980661908efe8d9d34eab2b6c13aefa2163b3ced26972902e3bdfd87008" + ], + "version": "==1.2.0" + }, + "typing-extensions": { + "hashes": [ + "sha256:7cb407020f00f7bfc3cb3e7881628838e69d8f3fcab2f64742a5e76b2f841918", + "sha256:99d4073b617d30288f569d3f13d2bd7548c3a7e4c8de87db09a9d29bb3a4a60c", + "sha256:dafc7639cde7f1b6e1acc0f457842a83e722ccca8eef5270af2d74792619a89f" + ], + "version": "==3.7.4.3" + }, + "typing-inspect": { + "hashes": [ + "sha256:3b98390df4d999a28cf5b35d8b333425af5da2ece8a4ea9e98f71e7591347b4f", + "sha256:8f1b1dd25908dbfd81d3bebc218011531e7ab614ba6e5bf7826d887c834afab7", + "sha256:de08f50a22955ddec353876df7b2545994d6df08a2f45d54ac8c05e530372ca0" + ], + "version": "==0.6.0" } }, "develop": { diff --git a/graphql_compiler/query_planning/typedefs.py b/graphql_compiler/query_planning/typedefs.py new file mode 100644 index 000000000..ce10bb6cb --- /dev/null +++ b/graphql_compiler/query_planning/typedefs.py @@ -0,0 +1,311 @@ +# Copyright 2021-present Kensho Technologies, LLC. +from abc import ABCMeta +from dataclasses import dataclass, field +from enum import Enum, unique +from typing import Any, Callable, Dict, Iterable, Mapping, Optional, cast + +from dataclasses_json import DataClassJsonMixin, config +from graphql import ( + GraphQLList, + GraphQLNonNull, + GraphQLNullableType, + GraphQLScalarType, + GraphQLType, + ListTypeNode, + NamedTypeNode, + NonNullTypeNode, + TypeNode, + parse_type, + specified_scalar_types, +) + +from .. import GraphQLDate, GraphQLDateTime, GraphQLDecimal +from ..compiler.compiler_frontend import OutputMetadata +from ..global_utils import is_same_type +from ..typedefs import QueryArgumentGraphQLType + + +QueryExecutionFunc = Callable[ + [str, Dict[str, QueryArgumentGraphQLType], Dict[str, Any]], Iterable[Mapping[str, Any]] +] + +# Custom scalar types. +CUSTOM_SCALAR_TYPES = { + GraphQLDate.name: GraphQLDate, + GraphQLDateTime.name: GraphQLDateTime, + GraphQLDecimal.name: GraphQLDecimal, +} + +# Custom scalar types must not have name conflicts with builtin scalar types. +if set(CUSTOM_SCALAR_TYPES).intersection(specified_scalar_types): + raise AssertionError( + f"Custom scalar types must have different names than builtin scalar types. Received " + f"overlapping type(s) {set(CUSTOM_SCALAR_TYPES).intersection(specified_scalar_types)}. " + f"Custom scalar types: {set(CUSTOM_SCALAR_TYPES)}. Builtin scalar types: " + f"{set(specified_scalar_types)}." + ) + +# Custom scalar types combined with builtin scalar types represent all allowable scalar types. +ALL_SCALAR_TYPES = CUSTOM_SCALAR_TYPES.copy() +ALL_SCALAR_TYPES.update(specified_scalar_types) + + +def _get_type_from_scalar_type_dictionary( + scalar_types: Dict[str, GraphQLScalarType], type_node: TypeNode +) -> GraphQLType: + """Get the GraphQL type definition from an AST node. + + Given a scalar type dictionary and an AST node describing a type, return a GraphQLType + definition, which applies to that type. For example, if provided the parsed AST node for + `[Date]`, a GraphQLList instance will be returned, containing the type called + "Date" found in the scalar type dictionary. If a type called "Date" is not found in the scalar + type dictionary, then None will be returned. + + Note: this is very similar to GraphQL's type_from_ast. However, instead of requiring a GraphQL + schema this function requires a dictionary of the scalar types. This simplifies deserialization + and allows for custom scalar types without constructing an entire schema. Unfortunately, this + means that user-defined custom scalars that are not known to the compiler cannot be used. + + Args: + scalar_types: dictionary mapping type name to GraphQLScalarType + type_node: AST node describing a type + + Returns: + GraphQLType that applies to the type specified in type_node. + + Raises: + AssertionError: if an invalid type node is given. + """ + if isinstance(type_node, ListTypeNode): + inner_type = _get_type_from_scalar_type_dictionary(scalar_types, type_node.type) + if inner_type: + return GraphQLList(inner_type) + else: + raise AssertionError( + f"Invalid type node. ListTypeNode contained inner type {inner_type}." + ) + elif isinstance(type_node, NonNullTypeNode): + inner_type = _get_type_from_scalar_type_dictionary(scalar_types, type_node.type) + if inner_type: + inner_type = cast(GraphQLNullableType, inner_type) + return GraphQLNonNull(inner_type) + else: + raise AssertionError( + f"Invalid type node. NonNullTypeNone contained inner type {inner_type}." + ) + elif isinstance(type_node, NamedTypeNode): + return scalar_types[type_node.name.value] + + # Not reachable. All possible type nodes have been considered. + raise AssertionError(f"Unexpected type node: {type_node}.") + + +def _serialize_output_metadata_field( + output_metadata_dictionary: Optional[Dict[str, OutputMetadata]] +) -> Optional[Dict[str, Dict[str, Any]]]: + """Serialize OutputMetadata into a dictionary.""" + if not output_metadata_dictionary: + return None + dictionary_value = {} + for output_name, output_metadata in output_metadata_dictionary.items(): + dictionary_value[output_name] = { + "type": str(output_metadata.type), + "optional": output_metadata.optional, + "folded": output_metadata.folded, + } + return dictionary_value + + +def _deserialize_output_metadata_field( + dict_value: Optional[Dict[str, Dict[str, Any]]] +) -> Optional[Dict[str, OutputMetadata]]: + """Deserialize the dictionary representation of OutputMetadata.""" + if not dict_value: + return None + output_metadata_dictionary = {} + for output_name, output_metadata in dict_value.items(): + output_metadata_dictionary[output_name] = OutputMetadata( + type=_get_type_from_scalar_type_dictionary( + ALL_SCALAR_TYPES, parse_type(output_metadata["type"]) + ), + optional=output_metadata["optional"], + folded=output_metadata["folded"], + ) + return output_metadata_dictionary + + +def _serialize_input_metadata_field( + input_metadata_dictionary: Optional[Dict[str, Any]] +) -> Optional[Dict[str, str]]: + """Serialize input metadata, converting GraphQLTypes to strings.""" + # It is possible to have an empty input metadata dictionary (i.e. no inputs for the query). + # Note that this is different than "None", which means no metadata was provided. + if input_metadata_dictionary == {}: + return {} + if input_metadata_dictionary is None: + return None + dictionary_value = {} + for input_name, input_type in input_metadata_dictionary.items(): + dictionary_value[input_name] = str(input_type) + return dictionary_value + + +def _deserialize_input_metadata_field( + dict_value: Optional[Dict[str, str]] +) -> Optional[Dict[str, GraphQLType]]: + """Deserialize input metadata, converting strings to GraphQLTypes.""" + # It is possible to have an empty input metadata dictionary (i.e. no inputs for the query). + # Note that this is different than "None", which means no metadata was provided. + if dict_value == {}: + return {} + if dict_value is None: + return None + input_metadata_dictionary = {} + for input_name, input_type in dict_value.items(): + input_metadata_dictionary[input_name] = _get_type_from_scalar_type_dictionary( + ALL_SCALAR_TYPES, parse_type(input_type) + ) + return input_metadata_dictionary + + +def _compare_input_metadata_field( + left_input_metadata: Optional[Dict[str, QueryArgumentGraphQLType]], + right_input_metadata: Optional[Dict[str, QueryArgumentGraphQLType]], +) -> bool: + """Check input_metadata SimpleExecute field equality, comparing GraphQLTypes appropriately.""" + if left_input_metadata is None: + # Since left_input_metadata is None, checking for equality requires determining whether or + # not right_metadata is also None. If right_metadata is None, left and right metadata + # are equal and True is returned. + return right_input_metadata is None + + # right_input_metadata is None, but left_input_metadata is not. + if right_input_metadata is None: + return False + + # Neither left_input_metadata nor right_input_metadata is None. + input_metadata_keys = left_input_metadata.keys() + + # Check if input_metadata keys match. + if input_metadata_keys != right_input_metadata.keys(): + return False + # Check if input_metadata values match for all keys. + for key in input_metadata_keys: + if not is_same_type(left_input_metadata[key], right_input_metadata[key]): + return False + + # All keys and values match so return True. + return True + + +def _deserialize_independent_query_plan_field(dict_value: Dict[str, Any]) -> "IndependentQueryPlan": + """Deserialize the dict representation of IndependentQueryPlan.""" + # Note: there will be more types of IndependentQueryPlans that will require different + # deserialization shortly. + return SimpleExecute.from_dict(dict_value) + + +# ############ +# Public API # +# ############ + + +@unique +class BackendType(Enum): + # N.B.: The values of the enums are the "human-friendly" display names. Since they are shown + # to humans, they are subject to change if said humans find a friendlier name. + # Don't assume they are immutable! + cypher = "Cypher" + gremlin = "Gremlin" + interpreter = "interpreter" + match = "OrientDB MATCH" + mssql = "MSSQL" + postgresql = "PostgreSQL" + + +@dataclass(init=True, repr=True, eq=True, frozen=True) +class ProviderMetadata(DataClassJsonMixin): + """Metadata about the provider.""" + + # Name of the type of provider. + backend_type: BackendType + + # Whether this backend requires MSSQL fold postprocessing for folded outputs. + requires_fold_postprocessing: bool + + +@dataclass(init=True, repr=True, eq=True, frozen=True) +class QueryPlanNode(DataClassJsonMixin, metaclass=ABCMeta): + """Abstract query plan node. May or may not contain other nodes, depending on its type.""" + + # Unique ID of the plan node. + # Note: do not compare "uuid" values to determine whether query plan nodes are equal, since two + # plans with different identifiers might still be semantically equivalent and therefore equal. + uuid: str = field(compare=False) + + +@dataclass(init=True, repr=True, eq=False, frozen=True) +class SimpleExecute(QueryPlanNode): + """Just give the specified query and args to the provider, it'll execute it for you as-is.""" + + provider_id: str + provider_metadata: ProviderMetadata + query: str # in whatever query language the provider will accept (not necessarily GraphQL) + arguments: Dict[str, Any] + + # Input and output metadata of the query. + output_metadata: Dict[str, OutputMetadata] = field( + metadata=config( + encoder=_serialize_output_metadata_field, decoder=_deserialize_output_metadata_field + ) + ) + input_metadata: Dict[str, QueryArgumentGraphQLType] = field( + metadata=config( + encoder=_serialize_input_metadata_field, decoder=_deserialize_input_metadata_field + ) + ) + + def __eq__(self, other: Any) -> bool: + """Check equality between an object and this SimpleExecute.""" + if not isinstance(other, SimpleExecute): + return False + + # Perform special check for input_metadata since GraphQLTypes don't have equality, and + # check all other fields in a straightforward manner. + return ( + self.provider_id == other.provider_id + and self.query == other.query + and self.arguments == other.arguments + and self.output_metadata == other.output_metadata + and _compare_input_metadata_field(self.input_metadata, other.input_metadata) + ) + + +# More types of IndependentQueryPlans will be added in the future. +IndependentQueryPlan = SimpleExecute + + +@dataclass(init=True, repr=True, eq=True, frozen=True) +class QueryPlan(DataClassJsonMixin): + """A description of the execution of a GraphQL query, including pagination and joins.""" + + # Version number, so we can make breaking changes without requiring lock-step upgrades. + # Clients should report supported version ranges when requesting a plan, and the server + # should pick the highest version that is supported by both client and server. + version: int + + # Metadata on which provider produced the plan, and for what inputs. + provider_id: str + input_graphql_query: str + input_parameters: Dict[str, Any] + desired_page_size: Optional[int] + output_metadata: Dict[str, OutputMetadata] = field( + metadata=config( + encoder=_serialize_output_metadata_field, decoder=_deserialize_output_metadata_field + ) + ) + + # The actual query plan. + plan_root_node: IndependentQueryPlan = field( + metadata=config(decoder=_deserialize_independent_query_plan_field) + ) diff --git a/mypy.ini b/mypy.ini index 6d34e87db..21ab52826 100644 --- a/mypy.ini +++ b/mypy.ini @@ -160,7 +160,7 @@ disallow_untyped_calls = False [mypy-graphql_compiler.query_pagination.query_parameterizer.*] disallow_untyped_calls = False -[mypy-graphql_compiler.query_planning.*] +[mypy-graphql_compiler.query_planning.make_query_plan.*] disallow_untyped_calls = False disallow_untyped_defs = False diff --git a/setup.py b/setup.py index c634a16b4..8b2e35700 100755 --- a/setup.py +++ b/setup.py @@ -56,6 +56,7 @@ def find_long_description() -> str: packages=find_packages(exclude=["tests*"]), install_requires=[ # Make sure to keep in sync with Pipfile requirements. "ciso8601>=2.1.3,<3", + "dataclasses-json>=0.5.2,<0.6", "funcy>=1.7.3,<2", "graphql-core>=3.1.2,<3.2", "six>=1.10.0",