airbyte_cdk.sources.declarative.parsers.manifest_reference_resolver
1# 2# Copyright (c) 2023 Airbyte, Inc., all rights reserved. 3# 4 5import re 6from typing import Any, Mapping, Set, Tuple, Union 7 8from airbyte_cdk.sources.declarative.parsers.custom_exceptions import ( 9 CircularReferenceException, 10 UndefinedReferenceException, 11) 12 13REF_TAG = "$ref" 14 15 16class ManifestReferenceResolver: 17 """ 18 An incoming manifest can contain references to values previously defined. 19 This parser will dereference these values to produce a complete ConnectionDefinition. 20 21 References can be defined using a #/<arg> string. 22 ``` 23 key: 1234 24 reference: "#/key" 25 ``` 26 will produce the following definition: 27 ``` 28 key: 1234 29 reference: 1234 30 ``` 31 This also works with objects: 32 ``` 33 key_value_pairs: 34 k1: v1 35 k2: v2 36 same_key_value_pairs: "#/key_value_pairs" 37 ``` 38 will produce the following definition: 39 ``` 40 key_value_pairs: 41 k1: v1 42 k2: v2 43 same_key_value_pairs: 44 k1: v1 45 k2: v2 46 ``` 47 48 The $ref keyword can be used to refer to an object and enhance it with addition key-value pairs 49 ``` 50 key_value_pairs: 51 k1: v1 52 k2: v2 53 same_key_value_pairs: 54 $ref: "#/key_value_pairs" 55 k3: v3 56 ``` 57 will produce the following definition: 58 ``` 59 key_value_pairs: 60 k1: v1 61 k2: v2 62 same_key_value_pairs: 63 k1: v1 64 k2: v2 65 k3: v3 66 ``` 67 68 References can also point to nested values. 69 Nested references are ambiguous because one could define a key containing with `.` 70 in this example, we want to refer to the limit key in the dict object: 71 ``` 72 dict: 73 limit: 50 74 limit_ref: "#/dict/limit" 75 ``` 76 will produce the following definition: 77 ``` 78 dict 79 limit: 50 80 limit-ref: 50 81 ``` 82 83 whereas here we want to access the `nested/path` value. 84 ``` 85 nested: 86 path: "first one" 87 nested/path: "uh oh" 88 value: "#/nested/path 89 ``` 90 will produce the following definition: 91 ``` 92 nested: 93 path: "first one" 94 nested/path: "uh oh" 95 value: "uh oh" 96 ``` 97 98 to resolve the ambiguity, we try looking for the reference key at the top level, and then traverse the structs downward 99 until we find a key with the given path, or until there is nothing to traverse. 100 """ 101 102 def preprocess_manifest(self, manifest: Mapping[str, Any]) -> Mapping[str, Any]: 103 """ 104 :param manifest: incoming manifest that could have references to previously defined components 105 :return: 106 """ 107 return self._evaluate_node(manifest, manifest, set()) # type: ignore[no-any-return] 108 109 def _evaluate_node(self, node: Any, manifest: Mapping[str, Any], visited: Set[Any]) -> Any: 110 if isinstance(node, dict): 111 evaluated_dict = { 112 k: self._evaluate_node(v, manifest, visited) 113 for k, v in node.items() 114 if not self._is_ref_key(k) 115 } 116 if REF_TAG in node: 117 # The node includes a $ref key, so we splat the referenced value(s) into the evaluated dict 118 evaluated_ref = self._evaluate_node(node[REF_TAG], manifest, visited) 119 if not isinstance(evaluated_ref, dict): 120 return evaluated_ref 121 else: 122 # The values defined on the component take precedence over the reference values 123 return evaluated_ref | evaluated_dict 124 else: 125 return evaluated_dict 126 elif isinstance(node, list): 127 return [self._evaluate_node(v, manifest, visited) for v in node] 128 elif self._is_ref(node): 129 if node in visited: 130 raise CircularReferenceException(node) 131 visited.add(node) 132 ret = self._evaluate_node(self._lookup_ref_value(node, manifest), manifest, visited) 133 visited.remove(node) 134 return ret 135 else: 136 return node 137 138 def _lookup_ref_value(self, ref: str, manifest: Mapping[str, Any]) -> Any: 139 ref_match = re.match(r"#/(.*)", ref) 140 if not ref_match: 141 raise ValueError(f"Invalid reference format {ref}") 142 try: 143 path = ref_match.groups()[0] 144 return self._read_ref_value(path, manifest) 145 except (AttributeError, KeyError, IndexError): 146 raise UndefinedReferenceException(path, ref) 147 148 @staticmethod 149 def _is_ref(node: Any) -> bool: 150 return isinstance(node, str) and node.startswith("#/") 151 152 @staticmethod 153 def _is_ref_key(key: str) -> bool: 154 return bool(key == REF_TAG) 155 156 @staticmethod 157 def _read_ref_value(ref: str, manifest_node: Mapping[str, Any]) -> Any: 158 """ 159 Read the value at the referenced location of the manifest. 160 161 References are ambiguous because one could define a key containing `/` 162 In this example, we want to refer to the `limit` key in the `dict` object: 163 dict: 164 limit: 50 165 limit_ref: "#/dict/limit" 166 167 Whereas here we want to access the `nested/path` value. 168 nested: 169 path: "first one" 170 nested/path: "uh oh" 171 value: "#/nested/path" 172 173 To resolve the ambiguity, we try looking for the reference key at the top level, and then traverse the structs downward 174 until we find a key with the given path, or until there is nothing to traverse. 175 176 Consider the path foo/bar/baz. To resolve the ambiguity, we first try 'foo/bar/baz' in its entirety as a top-level key. If this 177 fails, we try 'foo' as the top-level key, and if this succeeds, pass 'bar/baz' on as the key to be tried at the next level. 178 """ 179 while ref: 180 try: 181 return manifest_node[ref] 182 except (KeyError, TypeError): 183 head, ref = _parse_path(ref) 184 manifest_node = manifest_node[head] # type: ignore # Couldn't figure out how to fix this since manifest_node can get reassigned into other types like lists 185 return manifest_node 186 187 188def _parse_path(ref: str) -> Tuple[Union[str, int], str]: 189 """ 190 Return the next path component, together with the rest of the path. 191 192 A path component may be a string key, or an int index. 193 194 >>> _parse_path("foo/bar") 195 "foo", "bar" 196 >>> _parse_path("foo/7/8/bar") 197 "foo", "7/8/bar" 198 >>> _parse_path("7/8/bar") 199 7, "8/bar" 200 >>> _parse_path("8/bar") 201 8, "bar" 202 >>> _parse_path("8foo/bar") 203 "8foo", "bar" 204 """ 205 match = re.match(r"([^/]*)/?(.*)", ref) 206 if match: 207 first, rest = match.groups() 208 try: 209 return int(first), rest 210 except ValueError: 211 return first, rest 212 else: 213 raise ValueError(f"Invalid path {ref} specified")
17class ManifestReferenceResolver: 18 """ 19 An incoming manifest can contain references to values previously defined. 20 This parser will dereference these values to produce a complete ConnectionDefinition. 21 22 References can be defined using a #/<arg> string. 23 ``` 24 key: 1234 25 reference: "#/key" 26 ``` 27 will produce the following definition: 28 ``` 29 key: 1234 30 reference: 1234 31 ``` 32 This also works with objects: 33 ``` 34 key_value_pairs: 35 k1: v1 36 k2: v2 37 same_key_value_pairs: "#/key_value_pairs" 38 ``` 39 will produce the following definition: 40 ``` 41 key_value_pairs: 42 k1: v1 43 k2: v2 44 same_key_value_pairs: 45 k1: v1 46 k2: v2 47 ``` 48 49 The $ref keyword can be used to refer to an object and enhance it with addition key-value pairs 50 ``` 51 key_value_pairs: 52 k1: v1 53 k2: v2 54 same_key_value_pairs: 55 $ref: "#/key_value_pairs" 56 k3: v3 57 ``` 58 will produce the following definition: 59 ``` 60 key_value_pairs: 61 k1: v1 62 k2: v2 63 same_key_value_pairs: 64 k1: v1 65 k2: v2 66 k3: v3 67 ``` 68 69 References can also point to nested values. 70 Nested references are ambiguous because one could define a key containing with `.` 71 in this example, we want to refer to the limit key in the dict object: 72 ``` 73 dict: 74 limit: 50 75 limit_ref: "#/dict/limit" 76 ``` 77 will produce the following definition: 78 ``` 79 dict 80 limit: 50 81 limit-ref: 50 82 ``` 83 84 whereas here we want to access the `nested/path` value. 85 ``` 86 nested: 87 path: "first one" 88 nested/path: "uh oh" 89 value: "#/nested/path 90 ``` 91 will produce the following definition: 92 ``` 93 nested: 94 path: "first one" 95 nested/path: "uh oh" 96 value: "uh oh" 97 ``` 98 99 to resolve the ambiguity, we try looking for the reference key at the top level, and then traverse the structs downward 100 until we find a key with the given path, or until there is nothing to traverse. 101 """ 102 103 def preprocess_manifest(self, manifest: Mapping[str, Any]) -> Mapping[str, Any]: 104 """ 105 :param manifest: incoming manifest that could have references to previously defined components 106 :return: 107 """ 108 return self._evaluate_node(manifest, manifest, set()) # type: ignore[no-any-return] 109 110 def _evaluate_node(self, node: Any, manifest: Mapping[str, Any], visited: Set[Any]) -> Any: 111 if isinstance(node, dict): 112 evaluated_dict = { 113 k: self._evaluate_node(v, manifest, visited) 114 for k, v in node.items() 115 if not self._is_ref_key(k) 116 } 117 if REF_TAG in node: 118 # The node includes a $ref key, so we splat the referenced value(s) into the evaluated dict 119 evaluated_ref = self._evaluate_node(node[REF_TAG], manifest, visited) 120 if not isinstance(evaluated_ref, dict): 121 return evaluated_ref 122 else: 123 # The values defined on the component take precedence over the reference values 124 return evaluated_ref | evaluated_dict 125 else: 126 return evaluated_dict 127 elif isinstance(node, list): 128 return [self._evaluate_node(v, manifest, visited) for v in node] 129 elif self._is_ref(node): 130 if node in visited: 131 raise CircularReferenceException(node) 132 visited.add(node) 133 ret = self._evaluate_node(self._lookup_ref_value(node, manifest), manifest, visited) 134 visited.remove(node) 135 return ret 136 else: 137 return node 138 139 def _lookup_ref_value(self, ref: str, manifest: Mapping[str, Any]) -> Any: 140 ref_match = re.match(r"#/(.*)", ref) 141 if not ref_match: 142 raise ValueError(f"Invalid reference format {ref}") 143 try: 144 path = ref_match.groups()[0] 145 return self._read_ref_value(path, manifest) 146 except (AttributeError, KeyError, IndexError): 147 raise UndefinedReferenceException(path, ref) 148 149 @staticmethod 150 def _is_ref(node: Any) -> bool: 151 return isinstance(node, str) and node.startswith("#/") 152 153 @staticmethod 154 def _is_ref_key(key: str) -> bool: 155 return bool(key == REF_TAG) 156 157 @staticmethod 158 def _read_ref_value(ref: str, manifest_node: Mapping[str, Any]) -> Any: 159 """ 160 Read the value at the referenced location of the manifest. 161 162 References are ambiguous because one could define a key containing `/` 163 In this example, we want to refer to the `limit` key in the `dict` object: 164 dict: 165 limit: 50 166 limit_ref: "#/dict/limit" 167 168 Whereas here we want to access the `nested/path` value. 169 nested: 170 path: "first one" 171 nested/path: "uh oh" 172 value: "#/nested/path" 173 174 To resolve the ambiguity, we try looking for the reference key at the top level, and then traverse the structs downward 175 until we find a key with the given path, or until there is nothing to traverse. 176 177 Consider the path foo/bar/baz. To resolve the ambiguity, we first try 'foo/bar/baz' in its entirety as a top-level key. If this 178 fails, we try 'foo' as the top-level key, and if this succeeds, pass 'bar/baz' on as the key to be tried at the next level. 179 """ 180 while ref: 181 try: 182 return manifest_node[ref] 183 except (KeyError, TypeError): 184 head, ref = _parse_path(ref) 185 manifest_node = manifest_node[head] # type: ignore # Couldn't figure out how to fix this since manifest_node can get reassigned into other types like lists 186 return manifest_node
An incoming manifest can contain references to values previously defined. This parser will dereference these values to produce a complete ConnectionDefinition.
References can be defined using a #/
key: 1234
reference: "#/key"
will produce the following definition:
key: 1234
reference: 1234
This also works with objects:
key_value_pairs:
k1: v1
k2: v2
same_key_value_pairs: "#/key_value_pairs"
will produce the following definition:
key_value_pairs:
k1: v1
k2: v2
same_key_value_pairs:
k1: v1
k2: v2
The $ref keyword can be used to refer to an object and enhance it with addition key-value pairs
key_value_pairs:
k1: v1
k2: v2
same_key_value_pairs:
$ref: "#/key_value_pairs"
k3: v3
will produce the following definition:
key_value_pairs:
k1: v1
k2: v2
same_key_value_pairs:
k1: v1
k2: v2
k3: v3
References can also point to nested values.
Nested references are ambiguous because one could define a key containing with .
in this example, we want to refer to the limit key in the dict object:
dict:
limit: 50
limit_ref: "#/dict/limit"
will produce the following definition:
dict
limit: 50
limit-ref: 50
whereas here we want to access the nested/path
value.
nested:
path: "first one"
nested/path: "uh oh"
value: "#/nested/path
will produce the following definition:
nested:
path: "first one"
nested/path: "uh oh"
value: "uh oh"
to resolve the ambiguity, we try looking for the reference key at the top level, and then traverse the structs downward until we find a key with the given path, or until there is nothing to traverse.
103 def preprocess_manifest(self, manifest: Mapping[str, Any]) -> Mapping[str, Any]: 104 """ 105 :param manifest: incoming manifest that could have references to previously defined components 106 :return: 107 """ 108 return self._evaluate_node(manifest, manifest, set()) # type: ignore[no-any-return]
Parameters
- manifest: incoming manifest that could have references to previously defined components