airbyte_cdk.destinations.vector_db_based.config
1# 2# Copyright (c) 2023 Airbyte, Inc., all rights reserved. 3# 4 5from typing import Any, Dict, List, Literal, Optional, Union 6 7import dpath 8from pydantic.v1 import BaseModel, Field 9 10from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig 11from airbyte_cdk.utils.spec_schema_transformations import resolve_refs 12 13 14class SeparatorSplitterConfigModel(BaseModel): 15 mode: Literal["separator"] = Field("separator", const=True) 16 separators: List[str] = Field( 17 default=['"\\n\\n"', '"\\n"', '" "', '""'], 18 title="Separators", 19 description='List of separator strings to split text fields by. The separator itself needs to be wrapped in double quotes, e.g. to split by the dot character, use ".". To split by a newline, use "\\n".', 20 ) 21 keep_separator: bool = Field( 22 default=False, 23 title="Keep separator", 24 description="Whether to keep the separator in the resulting chunks", 25 ) 26 27 class Config(OneOfOptionConfig): 28 title = "By Separator" 29 description = "Split the text by the list of separators until the chunk size is reached, using the earlier mentioned separators where possible. This is useful for splitting text fields by paragraphs, sentences, words, etc." 30 discriminator = "mode" 31 32 33class MarkdownHeaderSplitterConfigModel(BaseModel): 34 mode: Literal["markdown"] = Field("markdown", const=True) 35 split_level: int = Field( 36 default=1, 37 title="Split level", 38 description="Level of markdown headers to split text fields by. Headings down to the specified level will be used as split points", 39 le=6, 40 ge=1, 41 ) 42 43 class Config(OneOfOptionConfig): 44 title = "By Markdown header" 45 description = "Split the text by Markdown headers down to the specified header level. If the chunk size fits multiple sections, they will be combined into a single chunk." 46 discriminator = "mode" 47 48 49class CodeSplitterConfigModel(BaseModel): 50 mode: Literal["code"] = Field("code", const=True) 51 language: str = Field( 52 title="Language", 53 description="Split code in suitable places based on the programming language", 54 enum=[ 55 "cpp", 56 "go", 57 "java", 58 "js", 59 "php", 60 "proto", 61 "python", 62 "rst", 63 "ruby", 64 "rust", 65 "scala", 66 "swift", 67 "markdown", 68 "latex", 69 "html", 70 "sol", 71 ], 72 ) 73 74 class Config(OneOfOptionConfig): 75 title = "By Programming Language" 76 description = "Split the text by suitable delimiters based on the programming language. This is useful for splitting code into chunks." 77 discriminator = "mode" 78 79 80TextSplitterConfigModel = Union[ 81 SeparatorSplitterConfigModel, MarkdownHeaderSplitterConfigModel, CodeSplitterConfigModel 82] 83 84 85class FieldNameMappingConfigModel(BaseModel): 86 from_field: str = Field(title="From field name", description="The field name in the source") 87 to_field: str = Field( 88 title="To field name", description="The field name to use in the destination" 89 ) 90 91 92class ProcessingConfigModel(BaseModel): 93 chunk_size: int = Field( 94 ..., 95 title="Chunk size", 96 maximum=8191, 97 minimum=1, 98 description="Size of chunks in tokens to store in vector store (make sure it is not too big for the context if your LLM)", 99 ) 100 chunk_overlap: int = Field( 101 title="Chunk overlap", 102 description="Size of overlap between chunks in tokens to store in vector store to better capture relevant context", 103 default=0, 104 ) 105 text_fields: Optional[List[str]] = Field( 106 default=[], 107 title="Text fields to embed", 108 description="List of fields in the record that should be used to calculate the embedding. The field list is applied to all streams in the same way and non-existing fields are ignored. If none are defined, all fields are considered text fields. When specifying text fields, you can access nested fields in the record by using dot notation, e.g. `user.name` will access the `name` field in the `user` object. It's also possible to use wildcards to access all fields in an object, e.g. `users.*.name` will access all `names` fields in all entries of the `users` array.", 109 always_show=True, 110 examples=["text", "user.name", "users.*.name"], 111 ) 112 metadata_fields: Optional[List[str]] = Field( 113 default=[], 114 title="Fields to store as metadata", 115 description="List of fields in the record that should be stored as metadata. The field list is applied to all streams in the same way and non-existing fields are ignored. If none are defined, all fields are considered metadata fields. When specifying text fields, you can access nested fields in the record by using dot notation, e.g. `user.name` will access the `name` field in the `user` object. It's also possible to use wildcards to access all fields in an object, e.g. `users.*.name` will access all `names` fields in all entries of the `users` array. When specifying nested paths, all matching values are flattened into an array set to a field named by the path.", 116 always_show=True, 117 examples=["age", "user", "user.name"], 118 ) 119 text_splitter: TextSplitterConfigModel = Field( 120 default=None, 121 title="Text splitter", 122 discriminator="mode", 123 type="object", 124 description="Split text fields into chunks based on the specified method.", 125 ) 126 field_name_mappings: Optional[List[FieldNameMappingConfigModel]] = Field( 127 default=[], 128 title="Field name mappings", 129 description="List of fields to rename. Not applicable for nested fields, but can be used to rename fields already flattened via dot notation.", 130 ) 131 132 class Config: 133 schema_extra = {"group": "processing"} 134 135 136class OpenAIEmbeddingConfigModel(BaseModel): 137 mode: Literal["openai"] = Field("openai", const=True) 138 openai_key: str = Field(..., title="OpenAI API key", airbyte_secret=True) 139 140 class Config(OneOfOptionConfig): 141 title = "OpenAI" 142 description = "Use the OpenAI API to embed text. This option is using the text-embedding-ada-002 model with 1536 embedding dimensions." 143 discriminator = "mode" 144 145 146class OpenAICompatibleEmbeddingConfigModel(BaseModel): 147 mode: Literal["openai_compatible"] = Field("openai_compatible", const=True) 148 api_key: str = Field(title="API key", default="", airbyte_secret=True) 149 base_url: str = Field( 150 ..., 151 title="Base URL", 152 description="The base URL for your OpenAI-compatible service", 153 examples=["https://your-service-name.com"], 154 ) 155 model_name: str = Field( 156 title="Model name", 157 description="The name of the model to use for embedding", 158 default="text-embedding-ada-002", 159 examples=["text-embedding-ada-002"], 160 ) 161 dimensions: int = Field( 162 title="Embedding dimensions", 163 description="The number of dimensions the embedding model is generating", 164 examples=[1536, 384], 165 ) 166 167 class Config(OneOfOptionConfig): 168 title = "OpenAI-compatible" 169 description = "Use a service that's compatible with the OpenAI API to embed text." 170 discriminator = "mode" 171 172 173class AzureOpenAIEmbeddingConfigModel(BaseModel): 174 mode: Literal["azure_openai"] = Field("azure_openai", const=True) 175 openai_key: str = Field( 176 ..., 177 title="Azure OpenAI API key", 178 airbyte_secret=True, 179 description="The API key for your Azure OpenAI resource. You can find this in the Azure portal under your Azure OpenAI resource", 180 ) 181 api_base: str = Field( 182 ..., 183 title="Resource base URL", 184 description="The base URL for your Azure OpenAI resource. You can find this in the Azure portal under your Azure OpenAI resource", 185 examples=["https://your-resource-name.openai.azure.com"], 186 ) 187 deployment: str = Field( 188 ..., 189 title="Deployment", 190 description="The deployment for your Azure OpenAI resource. You can find this in the Azure portal under your Azure OpenAI resource", 191 examples=["your-resource-name"], 192 ) 193 194 class Config(OneOfOptionConfig): 195 title = "Azure OpenAI" 196 description = "Use the Azure-hosted OpenAI API to embed text. This option is using the text-embedding-ada-002 model with 1536 embedding dimensions." 197 discriminator = "mode" 198 199 200class FakeEmbeddingConfigModel(BaseModel): 201 mode: Literal["fake"] = Field("fake", const=True) 202 203 class Config(OneOfOptionConfig): 204 title = "Fake" 205 description = "Use a fake embedding made out of random vectors with 1536 embedding dimensions. This is useful for testing the data pipeline without incurring any costs." 206 discriminator = "mode" 207 208 209class FromFieldEmbeddingConfigModel(BaseModel): 210 mode: Literal["from_field"] = Field("from_field", const=True) 211 field_name: str = Field( 212 ..., 213 title="Field name", 214 description="Name of the field in the record that contains the embedding", 215 examples=["embedding", "vector"], 216 ) 217 dimensions: int = Field( 218 ..., 219 title="Embedding dimensions", 220 description="The number of dimensions the embedding model is generating", 221 examples=[1536, 384], 222 ) 223 224 class Config(OneOfOptionConfig): 225 title = "From Field" 226 description = "Use a field in the record as the embedding. This is useful if you already have an embedding for your data and want to store it in the vector store." 227 discriminator = "mode" 228 229 230class CohereEmbeddingConfigModel(BaseModel): 231 mode: Literal["cohere"] = Field("cohere", const=True) 232 cohere_key: str = Field(..., title="Cohere API key", airbyte_secret=True) 233 234 class Config(OneOfOptionConfig): 235 title = "Cohere" 236 description = "Use the Cohere API to embed text." 237 discriminator = "mode" 238 239 240class VectorDBConfigModel(BaseModel): 241 """ 242 The configuration model for the Vector DB based destinations. This model is used to generate the UI for the destination configuration, 243 as well as to provide type safety for the configuration passed to the destination. 244 245 The configuration model is composed of four parts: 246 * Processing configuration 247 * Embedding configuration 248 * Indexing configuration 249 * Advanced configuration 250 251 Processing, embedding and advanced configuration are provided by this base class, while the indexing configuration is provided by the destination connector in the sub class. 252 """ 253 254 embedding: Union[ 255 OpenAIEmbeddingConfigModel, 256 CohereEmbeddingConfigModel, 257 FakeEmbeddingConfigModel, 258 AzureOpenAIEmbeddingConfigModel, 259 OpenAICompatibleEmbeddingConfigModel, 260 ] = Field( 261 ..., 262 title="Embedding", 263 description="Embedding configuration", 264 discriminator="mode", 265 group="embedding", 266 type="object", 267 ) 268 processing: ProcessingConfigModel 269 omit_raw_text: bool = Field( 270 default=False, 271 title="Do not store raw text", 272 group="advanced", 273 description="Do not store the text that gets embedded along with the vector and the metadata in the destination. If set to true, only the vector and the metadata will be stored - in this case raw text for LLM use cases needs to be retrieved from another source.", 274 ) 275 276 class Config: 277 title = "Destination Config" 278 schema_extra = { 279 "groups": [ 280 {"id": "processing", "title": "Processing"}, 281 {"id": "embedding", "title": "Embedding"}, 282 {"id": "indexing", "title": "Indexing"}, 283 {"id": "advanced", "title": "Advanced"}, 284 ] 285 } 286 287 @staticmethod 288 def remove_discriminator(schema: Dict[str, Any]) -> None: 289 """pydantic adds "discriminator" to the schema for oneOfs, which is not treated right by the platform as we inline all references""" 290 dpath.delete(schema, "properties/**/discriminator") 291 292 @classmethod 293 def schema(cls, by_alias: bool = True, ref_template: str = "") -> Dict[str, Any]: 294 """we're overriding the schema classmethod to enable some post-processing""" 295 schema: Dict[str, Any] = super().schema() 296 schema = resolve_refs(schema) 297 cls.remove_discriminator(schema) 298 return schema
15class SeparatorSplitterConfigModel(BaseModel): 16 mode: Literal["separator"] = Field("separator", const=True) 17 separators: List[str] = Field( 18 default=['"\\n\\n"', '"\\n"', '" "', '""'], 19 title="Separators", 20 description='List of separator strings to split text fields by. The separator itself needs to be wrapped in double quotes, e.g. to split by the dot character, use ".". To split by a newline, use "\\n".', 21 ) 22 keep_separator: bool = Field( 23 default=False, 24 title="Keep separator", 25 description="Whether to keep the separator in the resulting chunks", 26 ) 27 28 class Config(OneOfOptionConfig): 29 title = "By Separator" 30 description = "Split the text by the list of separators until the chunk size is reached, using the earlier mentioned separators where possible. This is useful for splitting text fields by paragraphs, sentences, words, etc." 31 discriminator = "mode"
28 class Config(OneOfOptionConfig): 29 title = "By Separator" 30 description = "Split the text by the list of separators until the chunk size is reached, using the earlier mentioned separators where possible. This is useful for splitting text fields by paragraphs, sentences, words, etc." 31 discriminator = "mode"
Base class to configure a Pydantic model that's used as a oneOf option in a parent model in a way that's compatible with all Airbyte consumers.
Inherit from this class in the nested Config class in a model and set title and description (these show up in the UI) and discriminator (this is making sure it's marked as required in the schema).
Usage:
class OptionModel(BaseModel): mode: Literal["option_a"] = Field("option_a", const=True) option_a_field: str = Field(...) class Config(OneOfOptionConfig): title = "Option A" description = "Option A description" discriminator = "mode"
Inherited Members
34class MarkdownHeaderSplitterConfigModel(BaseModel): 35 mode: Literal["markdown"] = Field("markdown", const=True) 36 split_level: int = Field( 37 default=1, 38 title="Split level", 39 description="Level of markdown headers to split text fields by. Headings down to the specified level will be used as split points", 40 le=6, 41 ge=1, 42 ) 43 44 class Config(OneOfOptionConfig): 45 title = "By Markdown header" 46 description = "Split the text by Markdown headers down to the specified header level. If the chunk size fits multiple sections, they will be combined into a single chunk." 47 discriminator = "mode"
44 class Config(OneOfOptionConfig): 45 title = "By Markdown header" 46 description = "Split the text by Markdown headers down to the specified header level. If the chunk size fits multiple sections, they will be combined into a single chunk." 47 discriminator = "mode"
Base class to configure a Pydantic model that's used as a oneOf option in a parent model in a way that's compatible with all Airbyte consumers.
Inherit from this class in the nested Config class in a model and set title and description (these show up in the UI) and discriminator (this is making sure it's marked as required in the schema).
Usage:
class OptionModel(BaseModel): mode: Literal["option_a"] = Field("option_a", const=True) option_a_field: str = Field(...) class Config(OneOfOptionConfig): title = "Option A" description = "Option A description" discriminator = "mode"
Inherited Members
50class CodeSplitterConfigModel(BaseModel): 51 mode: Literal["code"] = Field("code", const=True) 52 language: str = Field( 53 title="Language", 54 description="Split code in suitable places based on the programming language", 55 enum=[ 56 "cpp", 57 "go", 58 "java", 59 "js", 60 "php", 61 "proto", 62 "python", 63 "rst", 64 "ruby", 65 "rust", 66 "scala", 67 "swift", 68 "markdown", 69 "latex", 70 "html", 71 "sol", 72 ], 73 ) 74 75 class Config(OneOfOptionConfig): 76 title = "By Programming Language" 77 description = "Split the text by suitable delimiters based on the programming language. This is useful for splitting code into chunks." 78 discriminator = "mode"
75 class Config(OneOfOptionConfig): 76 title = "By Programming Language" 77 description = "Split the text by suitable delimiters based on the programming language. This is useful for splitting code into chunks." 78 discriminator = "mode"
Base class to configure a Pydantic model that's used as a oneOf option in a parent model in a way that's compatible with all Airbyte consumers.
Inherit from this class in the nested Config class in a model and set title and description (these show up in the UI) and discriminator (this is making sure it's marked as required in the schema).
Usage:
class OptionModel(BaseModel): mode: Literal["option_a"] = Field("option_a", const=True) option_a_field: str = Field(...) class Config(OneOfOptionConfig): title = "Option A" description = "Option A description" discriminator = "mode"
Inherited Members
93class ProcessingConfigModel(BaseModel): 94 chunk_size: int = Field( 95 ..., 96 title="Chunk size", 97 maximum=8191, 98 minimum=1, 99 description="Size of chunks in tokens to store in vector store (make sure it is not too big for the context if your LLM)", 100 ) 101 chunk_overlap: int = Field( 102 title="Chunk overlap", 103 description="Size of overlap between chunks in tokens to store in vector store to better capture relevant context", 104 default=0, 105 ) 106 text_fields: Optional[List[str]] = Field( 107 default=[], 108 title="Text fields to embed", 109 description="List of fields in the record that should be used to calculate the embedding. The field list is applied to all streams in the same way and non-existing fields are ignored. If none are defined, all fields are considered text fields. When specifying text fields, you can access nested fields in the record by using dot notation, e.g. `user.name` will access the `name` field in the `user` object. It's also possible to use wildcards to access all fields in an object, e.g. `users.*.name` will access all `names` fields in all entries of the `users` array.", 110 always_show=True, 111 examples=["text", "user.name", "users.*.name"], 112 ) 113 metadata_fields: Optional[List[str]] = Field( 114 default=[], 115 title="Fields to store as metadata", 116 description="List of fields in the record that should be stored as metadata. The field list is applied to all streams in the same way and non-existing fields are ignored. If none are defined, all fields are considered metadata fields. When specifying text fields, you can access nested fields in the record by using dot notation, e.g. `user.name` will access the `name` field in the `user` object. It's also possible to use wildcards to access all fields in an object, e.g. `users.*.name` will access all `names` fields in all entries of the `users` array. When specifying nested paths, all matching values are flattened into an array set to a field named by the path.", 117 always_show=True, 118 examples=["age", "user", "user.name"], 119 ) 120 text_splitter: TextSplitterConfigModel = Field( 121 default=None, 122 title="Text splitter", 123 discriminator="mode", 124 type="object", 125 description="Split text fields into chunks based on the specified method.", 126 ) 127 field_name_mappings: Optional[List[FieldNameMappingConfigModel]] = Field( 128 default=[], 129 title="Field name mappings", 130 description="List of fields to rename. Not applicable for nested fields, but can be used to rename fields already flattened via dot notation.", 131 ) 132 133 class Config: 134 schema_extra = {"group": "processing"}
137class OpenAIEmbeddingConfigModel(BaseModel): 138 mode: Literal["openai"] = Field("openai", const=True) 139 openai_key: str = Field(..., title="OpenAI API key", airbyte_secret=True) 140 141 class Config(OneOfOptionConfig): 142 title = "OpenAI" 143 description = "Use the OpenAI API to embed text. This option is using the text-embedding-ada-002 model with 1536 embedding dimensions." 144 discriminator = "mode"
141 class Config(OneOfOptionConfig): 142 title = "OpenAI" 143 description = "Use the OpenAI API to embed text. This option is using the text-embedding-ada-002 model with 1536 embedding dimensions." 144 discriminator = "mode"
Base class to configure a Pydantic model that's used as a oneOf option in a parent model in a way that's compatible with all Airbyte consumers.
Inherit from this class in the nested Config class in a model and set title and description (these show up in the UI) and discriminator (this is making sure it's marked as required in the schema).
Usage:
class OptionModel(BaseModel): mode: Literal["option_a"] = Field("option_a", const=True) option_a_field: str = Field(...) class Config(OneOfOptionConfig): title = "Option A" description = "Option A description" discriminator = "mode"
Inherited Members
147class OpenAICompatibleEmbeddingConfigModel(BaseModel): 148 mode: Literal["openai_compatible"] = Field("openai_compatible", const=True) 149 api_key: str = Field(title="API key", default="", airbyte_secret=True) 150 base_url: str = Field( 151 ..., 152 title="Base URL", 153 description="The base URL for your OpenAI-compatible service", 154 examples=["https://your-service-name.com"], 155 ) 156 model_name: str = Field( 157 title="Model name", 158 description="The name of the model to use for embedding", 159 default="text-embedding-ada-002", 160 examples=["text-embedding-ada-002"], 161 ) 162 dimensions: int = Field( 163 title="Embedding dimensions", 164 description="The number of dimensions the embedding model is generating", 165 examples=[1536, 384], 166 ) 167 168 class Config(OneOfOptionConfig): 169 title = "OpenAI-compatible" 170 description = "Use a service that's compatible with the OpenAI API to embed text." 171 discriminator = "mode"
168 class Config(OneOfOptionConfig): 169 title = "OpenAI-compatible" 170 description = "Use a service that's compatible with the OpenAI API to embed text." 171 discriminator = "mode"
Base class to configure a Pydantic model that's used as a oneOf option in a parent model in a way that's compatible with all Airbyte consumers.
Inherit from this class in the nested Config class in a model and set title and description (these show up in the UI) and discriminator (this is making sure it's marked as required in the schema).
Usage:
class OptionModel(BaseModel): mode: Literal["option_a"] = Field("option_a", const=True) option_a_field: str = Field(...) class Config(OneOfOptionConfig): title = "Option A" description = "Option A description" discriminator = "mode"
Inherited Members
174class AzureOpenAIEmbeddingConfigModel(BaseModel): 175 mode: Literal["azure_openai"] = Field("azure_openai", const=True) 176 openai_key: str = Field( 177 ..., 178 title="Azure OpenAI API key", 179 airbyte_secret=True, 180 description="The API key for your Azure OpenAI resource. You can find this in the Azure portal under your Azure OpenAI resource", 181 ) 182 api_base: str = Field( 183 ..., 184 title="Resource base URL", 185 description="The base URL for your Azure OpenAI resource. You can find this in the Azure portal under your Azure OpenAI resource", 186 examples=["https://your-resource-name.openai.azure.com"], 187 ) 188 deployment: str = Field( 189 ..., 190 title="Deployment", 191 description="The deployment for your Azure OpenAI resource. You can find this in the Azure portal under your Azure OpenAI resource", 192 examples=["your-resource-name"], 193 ) 194 195 class Config(OneOfOptionConfig): 196 title = "Azure OpenAI" 197 description = "Use the Azure-hosted OpenAI API to embed text. This option is using the text-embedding-ada-002 model with 1536 embedding dimensions." 198 discriminator = "mode"
195 class Config(OneOfOptionConfig): 196 title = "Azure OpenAI" 197 description = "Use the Azure-hosted OpenAI API to embed text. This option is using the text-embedding-ada-002 model with 1536 embedding dimensions." 198 discriminator = "mode"
Base class to configure a Pydantic model that's used as a oneOf option in a parent model in a way that's compatible with all Airbyte consumers.
Inherit from this class in the nested Config class in a model and set title and description (these show up in the UI) and discriminator (this is making sure it's marked as required in the schema).
Usage:
class OptionModel(BaseModel): mode: Literal["option_a"] = Field("option_a", const=True) option_a_field: str = Field(...) class Config(OneOfOptionConfig): title = "Option A" description = "Option A description" discriminator = "mode"
Inherited Members
201class FakeEmbeddingConfigModel(BaseModel): 202 mode: Literal["fake"] = Field("fake", const=True) 203 204 class Config(OneOfOptionConfig): 205 title = "Fake" 206 description = "Use a fake embedding made out of random vectors with 1536 embedding dimensions. This is useful for testing the data pipeline without incurring any costs." 207 discriminator = "mode"
204 class Config(OneOfOptionConfig): 205 title = "Fake" 206 description = "Use a fake embedding made out of random vectors with 1536 embedding dimensions. This is useful for testing the data pipeline without incurring any costs." 207 discriminator = "mode"
Base class to configure a Pydantic model that's used as a oneOf option in a parent model in a way that's compatible with all Airbyte consumers.
Inherit from this class in the nested Config class in a model and set title and description (these show up in the UI) and discriminator (this is making sure it's marked as required in the schema).
Usage:
class OptionModel(BaseModel): mode: Literal["option_a"] = Field("option_a", const=True) option_a_field: str = Field(...) class Config(OneOfOptionConfig): title = "Option A" description = "Option A description" discriminator = "mode"
Inherited Members
210class FromFieldEmbeddingConfigModel(BaseModel): 211 mode: Literal["from_field"] = Field("from_field", const=True) 212 field_name: str = Field( 213 ..., 214 title="Field name", 215 description="Name of the field in the record that contains the embedding", 216 examples=["embedding", "vector"], 217 ) 218 dimensions: int = Field( 219 ..., 220 title="Embedding dimensions", 221 description="The number of dimensions the embedding model is generating", 222 examples=[1536, 384], 223 ) 224 225 class Config(OneOfOptionConfig): 226 title = "From Field" 227 description = "Use a field in the record as the embedding. This is useful if you already have an embedding for your data and want to store it in the vector store." 228 discriminator = "mode"
225 class Config(OneOfOptionConfig): 226 title = "From Field" 227 description = "Use a field in the record as the embedding. This is useful if you already have an embedding for your data and want to store it in the vector store." 228 discriminator = "mode"
Base class to configure a Pydantic model that's used as a oneOf option in a parent model in a way that's compatible with all Airbyte consumers.
Inherit from this class in the nested Config class in a model and set title and description (these show up in the UI) and discriminator (this is making sure it's marked as required in the schema).
Usage:
class OptionModel(BaseModel): mode: Literal["option_a"] = Field("option_a", const=True) option_a_field: str = Field(...) class Config(OneOfOptionConfig): title = "Option A" description = "Option A description" discriminator = "mode"
Inherited Members
231class CohereEmbeddingConfigModel(BaseModel): 232 mode: Literal["cohere"] = Field("cohere", const=True) 233 cohere_key: str = Field(..., title="Cohere API key", airbyte_secret=True) 234 235 class Config(OneOfOptionConfig): 236 title = "Cohere" 237 description = "Use the Cohere API to embed text." 238 discriminator = "mode"
235 class Config(OneOfOptionConfig): 236 title = "Cohere" 237 description = "Use the Cohere API to embed text." 238 discriminator = "mode"
Base class to configure a Pydantic model that's used as a oneOf option in a parent model in a way that's compatible with all Airbyte consumers.
Inherit from this class in the nested Config class in a model and set title and description (these show up in the UI) and discriminator (this is making sure it's marked as required in the schema).
Usage:
class OptionModel(BaseModel): mode: Literal["option_a"] = Field("option_a", const=True) option_a_field: str = Field(...) class Config(OneOfOptionConfig): title = "Option A" description = "Option A description" discriminator = "mode"
Inherited Members
241class VectorDBConfigModel(BaseModel): 242 """ 243 The configuration model for the Vector DB based destinations. This model is used to generate the UI for the destination configuration, 244 as well as to provide type safety for the configuration passed to the destination. 245 246 The configuration model is composed of four parts: 247 * Processing configuration 248 * Embedding configuration 249 * Indexing configuration 250 * Advanced configuration 251 252 Processing, embedding and advanced configuration are provided by this base class, while the indexing configuration is provided by the destination connector in the sub class. 253 """ 254 255 embedding: Union[ 256 OpenAIEmbeddingConfigModel, 257 CohereEmbeddingConfigModel, 258 FakeEmbeddingConfigModel, 259 AzureOpenAIEmbeddingConfigModel, 260 OpenAICompatibleEmbeddingConfigModel, 261 ] = Field( 262 ..., 263 title="Embedding", 264 description="Embedding configuration", 265 discriminator="mode", 266 group="embedding", 267 type="object", 268 ) 269 processing: ProcessingConfigModel 270 omit_raw_text: bool = Field( 271 default=False, 272 title="Do not store raw text", 273 group="advanced", 274 description="Do not store the text that gets embedded along with the vector and the metadata in the destination. If set to true, only the vector and the metadata will be stored - in this case raw text for LLM use cases needs to be retrieved from another source.", 275 ) 276 277 class Config: 278 title = "Destination Config" 279 schema_extra = { 280 "groups": [ 281 {"id": "processing", "title": "Processing"}, 282 {"id": "embedding", "title": "Embedding"}, 283 {"id": "indexing", "title": "Indexing"}, 284 {"id": "advanced", "title": "Advanced"}, 285 ] 286 } 287 288 @staticmethod 289 def remove_discriminator(schema: Dict[str, Any]) -> None: 290 """pydantic adds "discriminator" to the schema for oneOfs, which is not treated right by the platform as we inline all references""" 291 dpath.delete(schema, "properties/**/discriminator") 292 293 @classmethod 294 def schema(cls, by_alias: bool = True, ref_template: str = "") -> Dict[str, Any]: 295 """we're overriding the schema classmethod to enable some post-processing""" 296 schema: Dict[str, Any] = super().schema() 297 schema = resolve_refs(schema) 298 cls.remove_discriminator(schema) 299 return schema
The configuration model for the Vector DB based destinations. This model is used to generate the UI for the destination configuration, as well as to provide type safety for the configuration passed to the destination.
The configuration model is composed of four parts:
- Processing configuration
- Embedding configuration
- Indexing configuration
- Advanced configuration
Processing, embedding and advanced configuration are provided by this base class, while the indexing configuration is provided by the destination connector in the sub class.
288 @staticmethod 289 def remove_discriminator(schema: Dict[str, Any]) -> None: 290 """pydantic adds "discriminator" to the schema for oneOfs, which is not treated right by the platform as we inline all references""" 291 dpath.delete(schema, "properties/**/discriminator")
pydantic adds "discriminator" to the schema for oneOfs, which is not treated right by the platform as we inline all references
293 @classmethod 294 def schema(cls, by_alias: bool = True, ref_template: str = "") -> Dict[str, Any]: 295 """we're overriding the schema classmethod to enable some post-processing""" 296 schema: Dict[str, Any] = super().schema() 297 schema = resolve_refs(schema) 298 cls.remove_discriminator(schema) 299 return schema
we're overriding the schema classmethod to enable some post-processing