airbyte_cdk.sources.declarative.parsers.model_to_component_factory
1# 2# Copyright (c) 2025 Airbyte, Inc., all rights reserved. 3# 4 5from __future__ import annotations 6 7import datetime 8import importlib 9import inspect 10import logging 11import re 12from functools import partial 13from typing import ( 14 Any, 15 Callable, 16 Dict, 17 List, 18 Mapping, 19 MutableMapping, 20 Optional, 21 Type, 22 Union, 23 cast, 24 get_args, 25 get_origin, 26 get_type_hints, 27) 28 29from isodate import parse_duration 30from pydantic.v1 import BaseModel 31from requests import Response 32 33from airbyte_cdk.connector_builder.models import ( 34 LogMessage as ConnectorBuilderLogMessage, 35) 36from airbyte_cdk.legacy.sources.declarative.declarative_stream import DeclarativeStream 37from airbyte_cdk.legacy.sources.declarative.incremental import ( 38 DatetimeBasedCursor, 39) 40from airbyte_cdk.models import ( 41 AirbyteStateBlob, 42 AirbyteStateMessage, 43 AirbyteStateType, 44 AirbyteStreamState, 45 FailureType, 46 Level, 47 StreamDescriptor, 48) 49from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager 50from airbyte_cdk.sources.declarative.async_job.job_orchestrator import AsyncJobOrchestrator 51from airbyte_cdk.sources.declarative.async_job.job_tracker import JobTracker 52from airbyte_cdk.sources.declarative.async_job.repository import AsyncJobRepository 53from airbyte_cdk.sources.declarative.async_job.status import AsyncJobStatus 54from airbyte_cdk.sources.declarative.auth import DeclarativeOauth2Authenticator, JwtAuthenticator 55from airbyte_cdk.sources.declarative.auth.declarative_authenticator import ( 56 DeclarativeAuthenticator, 57 NoAuth, 58) 59from airbyte_cdk.sources.declarative.auth.jwt import JwtAlgorithm 60from airbyte_cdk.sources.declarative.auth.oauth import ( 61 DeclarativeSingleUseRefreshTokenOauth2Authenticator, 62) 63from airbyte_cdk.sources.declarative.auth.selective_authenticator import SelectiveAuthenticator 64from airbyte_cdk.sources.declarative.auth.token import ( 65 ApiKeyAuthenticator, 66 BasicHttpAuthenticator, 67 BearerAuthenticator, 68 LegacySessionTokenAuthenticator, 69) 70from airbyte_cdk.sources.declarative.auth.token_provider import ( 71 InterpolatedStringTokenProvider, 72 SessionTokenProvider, 73 TokenProvider, 74) 75from airbyte_cdk.sources.declarative.checks import ( 76 CheckDynamicStream, 77 CheckStream, 78 DynamicStreamCheckConfig, 79) 80from airbyte_cdk.sources.declarative.concurrency_level import ConcurrencyLevel 81from airbyte_cdk.sources.declarative.datetime.min_max_datetime import MinMaxDatetime 82from airbyte_cdk.sources.declarative.decoders import ( 83 Decoder, 84 IterableDecoder, 85 JsonDecoder, 86 PaginationDecoderDecorator, 87 XmlDecoder, 88 ZipfileDecoder, 89) 90from airbyte_cdk.sources.declarative.decoders.composite_raw_decoder import ( 91 CompositeRawDecoder, 92 CsvParser, 93 GzipParser, 94 JsonLineParser, 95 JsonParser, 96 Parser, 97) 98from airbyte_cdk.sources.declarative.extractors import ( 99 DpathExtractor, 100 RecordFilter, 101 RecordSelector, 102 ResponseToFileExtractor, 103) 104from airbyte_cdk.sources.declarative.extractors.record_extractor import RecordExtractor 105from airbyte_cdk.sources.declarative.extractors.record_filter import ( 106 ClientSideIncrementalRecordFilterDecorator, 107) 108from airbyte_cdk.sources.declarative.incremental import ( 109 ConcurrentCursorFactory, 110 ConcurrentPerPartitionCursor, 111) 112from airbyte_cdk.sources.declarative.interpolation import InterpolatedString 113from airbyte_cdk.sources.declarative.interpolation.interpolated_mapping import InterpolatedMapping 114from airbyte_cdk.sources.declarative.migrations.legacy_to_per_partition_state_migration import ( 115 LegacyToPerPartitionStateMigration, 116) 117from airbyte_cdk.sources.declarative.models import ( 118 CustomStateMigration, 119) 120from airbyte_cdk.sources.declarative.models.base_model_with_deprecations import ( 121 DEPRECATION_LOGS_TAG, 122 BaseModelWithDeprecations, 123) 124from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 125 AddedFieldDefinition as AddedFieldDefinitionModel, 126) 127from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 128 AddFields as AddFieldsModel, 129) 130from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 131 ApiKeyAuthenticator as ApiKeyAuthenticatorModel, 132) 133from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 134 AsyncJobStatusMap as AsyncJobStatusMapModel, 135) 136from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 137 AsyncRetriever as AsyncRetrieverModel, 138) 139from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 140 BasicHttpAuthenticator as BasicHttpAuthenticatorModel, 141) 142from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 143 BearerAuthenticator as BearerAuthenticatorModel, 144) 145from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 146 CheckDynamicStream as CheckDynamicStreamModel, 147) 148from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 149 CheckStream as CheckStreamModel, 150) 151from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 152 ComplexFieldType as ComplexFieldTypeModel, 153) 154from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 155 ComponentMappingDefinition as ComponentMappingDefinitionModel, 156) 157from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 158 CompositeErrorHandler as CompositeErrorHandlerModel, 159) 160from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 161 ConcurrencyLevel as ConcurrencyLevelModel, 162) 163from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 164 ConfigAddFields as ConfigAddFieldsModel, 165) 166from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 167 ConfigComponentsResolver as ConfigComponentsResolverModel, 168) 169from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 170 ConfigMigration as ConfigMigrationModel, 171) 172from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 173 ConfigRemapField as ConfigRemapFieldModel, 174) 175from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 176 ConfigRemoveFields as ConfigRemoveFieldsModel, 177) 178from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 179 ConstantBackoffStrategy as ConstantBackoffStrategyModel, 180) 181from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 182 CsvDecoder as CsvDecoderModel, 183) 184from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 185 CursorPagination as CursorPaginationModel, 186) 187from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 188 CustomAuthenticator as CustomAuthenticatorModel, 189) 190from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 191 CustomBackoffStrategy as CustomBackoffStrategyModel, 192) 193from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 194 CustomConfigTransformation as CustomConfigTransformationModel, 195) 196from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 197 CustomDecoder as CustomDecoderModel, 198) 199from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 200 CustomErrorHandler as CustomErrorHandlerModel, 201) 202from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 203 CustomPaginationStrategy as CustomPaginationStrategyModel, 204) 205from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 206 CustomPartitionRouter as CustomPartitionRouterModel, 207) 208from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 209 CustomRecordExtractor as CustomRecordExtractorModel, 210) 211from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 212 CustomRecordFilter as CustomRecordFilterModel, 213) 214from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 215 CustomRequester as CustomRequesterModel, 216) 217from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 218 CustomRetriever as CustomRetrieverModel, 219) 220from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 221 CustomSchemaLoader as CustomSchemaLoader, 222) 223from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 224 CustomSchemaNormalization as CustomSchemaNormalizationModel, 225) 226from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 227 CustomTransformation as CustomTransformationModel, 228) 229from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 230 CustomValidationStrategy as CustomValidationStrategyModel, 231) 232from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 233 DatetimeBasedCursor as DatetimeBasedCursorModel, 234) 235from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 236 DeclarativeStream as DeclarativeStreamModel, 237) 238from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 239 DefaultErrorHandler as DefaultErrorHandlerModel, 240) 241from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 242 DefaultPaginator as DefaultPaginatorModel, 243) 244from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 245 DpathExtractor as DpathExtractorModel, 246) 247from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 248 DpathFlattenFields as DpathFlattenFieldsModel, 249) 250from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 251 DpathValidator as DpathValidatorModel, 252) 253from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 254 DynamicSchemaLoader as DynamicSchemaLoaderModel, 255) 256from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 257 DynamicStreamCheckConfig as DynamicStreamCheckConfigModel, 258) 259from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 260 ExponentialBackoffStrategy as ExponentialBackoffStrategyModel, 261) 262from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 263 FileUploader as FileUploaderModel, 264) 265from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 266 FixedWindowCallRatePolicy as FixedWindowCallRatePolicyModel, 267) 268from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 269 FlattenFields as FlattenFieldsModel, 270) 271from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 272 GroupByKeyMergeStrategy as GroupByKeyMergeStrategyModel, 273) 274from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 275 GroupingPartitionRouter as GroupingPartitionRouterModel, 276) 277from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 278 GzipDecoder as GzipDecoderModel, 279) 280from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 281 HTTPAPIBudget as HTTPAPIBudgetModel, 282) 283from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 284 HttpComponentsResolver as HttpComponentsResolverModel, 285) 286from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 287 HttpRequester as HttpRequesterModel, 288) 289from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 290 HttpRequestRegexMatcher as HttpRequestRegexMatcherModel, 291) 292from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 293 HttpResponseFilter as HttpResponseFilterModel, 294) 295from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 296 IncrementingCountCursor as IncrementingCountCursorModel, 297) 298from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 299 InlineSchemaLoader as InlineSchemaLoaderModel, 300) 301from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 302 IterableDecoder as IterableDecoderModel, 303) 304from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 305 JsonDecoder as JsonDecoderModel, 306) 307from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 308 JsonFileSchemaLoader as JsonFileSchemaLoaderModel, 309) 310from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 311 JsonlDecoder as JsonlDecoderModel, 312) 313from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 314 JwtAuthenticator as JwtAuthenticatorModel, 315) 316from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 317 JwtHeaders as JwtHeadersModel, 318) 319from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 320 JwtPayload as JwtPayloadModel, 321) 322from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 323 KeysReplace as KeysReplaceModel, 324) 325from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 326 KeysToLower as KeysToLowerModel, 327) 328from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 329 KeysToSnakeCase as KeysToSnakeCaseModel, 330) 331from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 332 LegacySessionTokenAuthenticator as LegacySessionTokenAuthenticatorModel, 333) 334from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 335 LegacyToPerPartitionStateMigration as LegacyToPerPartitionStateMigrationModel, 336) 337from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 338 ListPartitionRouter as ListPartitionRouterModel, 339) 340from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 341 MinMaxDatetime as MinMaxDatetimeModel, 342) 343from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 344 MovingWindowCallRatePolicy as MovingWindowCallRatePolicyModel, 345) 346from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 347 NoAuth as NoAuthModel, 348) 349from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 350 NoPagination as NoPaginationModel, 351) 352from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 353 OAuthAuthenticator as OAuthAuthenticatorModel, 354) 355from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 356 OffsetIncrement as OffsetIncrementModel, 357) 358from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 359 PageIncrement as PageIncrementModel, 360) 361from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 362 ParametrizedComponentsResolver as ParametrizedComponentsResolverModel, 363) 364from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 365 ParentStreamConfig as ParentStreamConfigModel, 366) 367from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 368 PredicateValidator as PredicateValidatorModel, 369) 370from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 371 PropertiesFromEndpoint as PropertiesFromEndpointModel, 372) 373from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 374 PropertyChunking as PropertyChunkingModel, 375) 376from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 377 PropertyLimitType as PropertyLimitTypeModel, 378) 379from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 380 QueryProperties as QueryPropertiesModel, 381) 382from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 383 Rate as RateModel, 384) 385from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 386 RecordFilter as RecordFilterModel, 387) 388from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 389 RecordSelector as RecordSelectorModel, 390) 391from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 392 RemoveFields as RemoveFieldsModel, 393) 394from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 395 RequestOption as RequestOptionModel, 396) 397from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 398 RequestPath as RequestPathModel, 399) 400from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 401 ResponseToFileExtractor as ResponseToFileExtractorModel, 402) 403from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 404 SchemaNormalization as SchemaNormalizationModel, 405) 406from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 407 SchemaTypeIdentifier as SchemaTypeIdentifierModel, 408) 409from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 410 SelectiveAuthenticator as SelectiveAuthenticatorModel, 411) 412from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 413 SessionTokenAuthenticator as SessionTokenAuthenticatorModel, 414) 415from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 416 SimpleRetriever as SimpleRetrieverModel, 417) 418from airbyte_cdk.sources.declarative.models.declarative_component_schema import Spec as SpecModel 419from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 420 StateDelegatingStream as StateDelegatingStreamModel, 421) 422from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 423 StreamConfig as StreamConfigModel, 424) 425from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 426 SubstreamPartitionRouter as SubstreamPartitionRouterModel, 427) 428from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 429 TypesMap as TypesMapModel, 430) 431from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 432 UnlimitedCallRatePolicy as UnlimitedCallRatePolicyModel, 433) 434from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 435 ValidateAdheresToSchema as ValidateAdheresToSchemaModel, 436) 437from airbyte_cdk.sources.declarative.models.declarative_component_schema import ValueType 438from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 439 WaitTimeFromHeader as WaitTimeFromHeaderModel, 440) 441from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 442 WaitUntilTimeFromHeader as WaitUntilTimeFromHeaderModel, 443) 444from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 445 XmlDecoder as XmlDecoderModel, 446) 447from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( 448 ZipfileDecoder as ZipfileDecoderModel, 449) 450from airbyte_cdk.sources.declarative.partition_routers import ( 451 CartesianProductStreamSlicer, 452 GroupingPartitionRouter, 453 ListPartitionRouter, 454 PartitionRouter, 455 SinglePartitionRouter, 456 SubstreamPartitionRouter, 457) 458from airbyte_cdk.sources.declarative.partition_routers.async_job_partition_router import ( 459 AsyncJobPartitionRouter, 460) 461from airbyte_cdk.sources.declarative.partition_routers.substream_partition_router import ( 462 ParentStreamConfig, 463) 464from airbyte_cdk.sources.declarative.requesters import HttpRequester, RequestOption 465from airbyte_cdk.sources.declarative.requesters.error_handlers import ( 466 CompositeErrorHandler, 467 DefaultErrorHandler, 468 HttpResponseFilter, 469) 470from airbyte_cdk.sources.declarative.requesters.error_handlers.backoff_strategies import ( 471 ConstantBackoffStrategy, 472 ExponentialBackoffStrategy, 473 WaitTimeFromHeaderBackoffStrategy, 474 WaitUntilTimeFromHeaderBackoffStrategy, 475) 476from airbyte_cdk.sources.declarative.requesters.http_job_repository import AsyncHttpJobRepository 477from airbyte_cdk.sources.declarative.requesters.paginators import ( 478 DefaultPaginator, 479 NoPagination, 480 PaginatorTestReadDecorator, 481) 482from airbyte_cdk.sources.declarative.requesters.paginators.strategies import ( 483 CursorPaginationStrategy, 484 CursorStopCondition, 485 OffsetIncrement, 486 PageIncrement, 487 StopConditionPaginationStrategyDecorator, 488) 489from airbyte_cdk.sources.declarative.requesters.query_properties import ( 490 PropertiesFromEndpoint, 491 PropertyChunking, 492 QueryProperties, 493) 494from airbyte_cdk.sources.declarative.requesters.query_properties.property_chunking import ( 495 PropertyLimitType, 496) 497from airbyte_cdk.sources.declarative.requesters.query_properties.strategies import ( 498 GroupByKey, 499) 500from airbyte_cdk.sources.declarative.requesters.request_option import RequestOptionType 501from airbyte_cdk.sources.declarative.requesters.request_options import ( 502 DatetimeBasedRequestOptionsProvider, 503 DefaultRequestOptionsProvider, 504 InterpolatedRequestOptionsProvider, 505 RequestOptionsProvider, 506) 507from airbyte_cdk.sources.declarative.requesters.request_options.per_partition_request_option_provider import ( 508 PerPartitionRequestOptionsProvider, 509) 510from airbyte_cdk.sources.declarative.requesters.request_path import RequestPath 511from airbyte_cdk.sources.declarative.requesters.requester import HttpMethod, Requester 512from airbyte_cdk.sources.declarative.resolvers import ( 513 ComponentMappingDefinition, 514 ConfigComponentsResolver, 515 HttpComponentsResolver, 516 ParametrizedComponentsResolver, 517 StreamConfig, 518 StreamParametersDefinition, 519) 520from airbyte_cdk.sources.declarative.retrievers import ( 521 AsyncRetriever, 522 LazySimpleRetriever, 523 SimpleRetriever, 524) 525from airbyte_cdk.sources.declarative.retrievers.file_uploader import ( 526 ConnectorBuilderFileUploader, 527 DefaultFileUploader, 528 FileUploader, 529 LocalFileSystemFileWriter, 530 NoopFileWriter, 531) 532from airbyte_cdk.sources.declarative.schema import ( 533 ComplexFieldType, 534 DefaultSchemaLoader, 535 DynamicSchemaLoader, 536 InlineSchemaLoader, 537 JsonFileSchemaLoader, 538 SchemaTypeIdentifier, 539 TypesMap, 540) 541from airbyte_cdk.sources.declarative.schema.composite_schema_loader import CompositeSchemaLoader 542from airbyte_cdk.sources.declarative.spec import ConfigMigration, Spec 543from airbyte_cdk.sources.declarative.stream_slicers import ( 544 StreamSlicer, 545 StreamSlicerTestReadDecorator, 546) 547from airbyte_cdk.sources.declarative.stream_slicers.declarative_partition_generator import ( 548 DeclarativePartitionFactory, 549 StreamSlicerPartitionGenerator, 550) 551from airbyte_cdk.sources.declarative.transformations import ( 552 AddFields, 553 RecordTransformation, 554 RemoveFields, 555) 556from airbyte_cdk.sources.declarative.transformations.add_fields import AddedFieldDefinition 557from airbyte_cdk.sources.declarative.transformations.config_transformations import ( 558 ConfigAddFields, 559 ConfigRemapField, 560 ConfigRemoveFields, 561) 562from airbyte_cdk.sources.declarative.transformations.config_transformations.config_transformation import ( 563 ConfigTransformation, 564) 565from airbyte_cdk.sources.declarative.transformations.dpath_flatten_fields import ( 566 DpathFlattenFields, 567 KeyTransformation, 568) 569from airbyte_cdk.sources.declarative.transformations.flatten_fields import ( 570 FlattenFields, 571) 572from airbyte_cdk.sources.declarative.transformations.keys_replace_transformation import ( 573 KeysReplaceTransformation, 574) 575from airbyte_cdk.sources.declarative.transformations.keys_to_lower_transformation import ( 576 KeysToLowerTransformation, 577) 578from airbyte_cdk.sources.declarative.transformations.keys_to_snake_transformation import ( 579 KeysToSnakeCaseTransformation, 580) 581from airbyte_cdk.sources.declarative.validators import ( 582 DpathValidator, 583 PredicateValidator, 584 ValidateAdheresToSchema, 585) 586from airbyte_cdk.sources.http_logger import format_http_message 587from airbyte_cdk.sources.message import ( 588 InMemoryMessageRepository, 589 LogAppenderMessageRepositoryDecorator, 590 MessageRepository, 591 NoopMessageRepository, 592) 593from airbyte_cdk.sources.message.repository import StateFilteringMessageRepository 594from airbyte_cdk.sources.streams.call_rate import ( 595 APIBudget, 596 FixedWindowCallRatePolicy, 597 HttpAPIBudget, 598 HttpRequestRegexMatcher, 599 MovingWindowCallRatePolicy, 600 Rate, 601 UnlimitedCallRatePolicy, 602) 603from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream 604from airbyte_cdk.sources.streams.concurrent.clamping import ( 605 ClampingEndProvider, 606 ClampingStrategy, 607 DayClampingStrategy, 608 MonthClampingStrategy, 609 NoClamping, 610 WeekClampingStrategy, 611 Weekday, 612) 613from airbyte_cdk.sources.streams.concurrent.cursor import ( 614 ConcurrentCursor, 615 Cursor, 616 CursorField, 617 FinalStateCursor, 618) 619from airbyte_cdk.sources.streams.concurrent.default_stream import DefaultStream 620from airbyte_cdk.sources.streams.concurrent.helpers import get_primary_key_from_stream 621from airbyte_cdk.sources.streams.concurrent.partitions.stream_slicer import ( 622 StreamSlicer as ConcurrentStreamSlicer, 623) 624from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_state_converter import ( 625 CustomFormatConcurrentStreamStateConverter, 626 DateTimeStreamStateConverter, 627) 628from airbyte_cdk.sources.streams.concurrent.state_converters.incrementing_count_stream_state_converter import ( 629 IncrementingCountStreamStateConverter, 630) 631from airbyte_cdk.sources.streams.http.error_handlers.response_models import ResponseAction 632from airbyte_cdk.sources.types import Config 633from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer 634 635ComponentDefinition = Mapping[str, Any] 636 637SCHEMA_TRANSFORMER_TYPE_MAPPING = { 638 SchemaNormalizationModel.None_: TransformConfig.NoTransform, 639 SchemaNormalizationModel.Default: TransformConfig.DefaultSchemaNormalization, 640} 641_NO_STREAM_SLICING = SinglePartitionRouter(parameters={}) 642 643# Ideally this should use the value defined in ConcurrentDeclarativeSource, but 644# this would be a circular import 645MAX_SLICES = 5 646 647 648class ModelToComponentFactory: 649 EPOCH_DATETIME_FORMAT = "%s" 650 651 def __init__( 652 self, 653 limit_pages_fetched_per_slice: Optional[int] = None, 654 limit_slices_fetched: Optional[int] = None, 655 emit_connector_builder_messages: bool = False, 656 disable_retries: bool = False, 657 disable_cache: bool = False, 658 message_repository: Optional[MessageRepository] = None, 659 connector_state_manager: Optional[ConnectorStateManager] = None, 660 max_concurrent_async_job_count: Optional[int] = None, 661 ): 662 self._init_mappings() 663 self._limit_pages_fetched_per_slice = limit_pages_fetched_per_slice 664 self._limit_slices_fetched = limit_slices_fetched 665 self._emit_connector_builder_messages = emit_connector_builder_messages 666 self._disable_retries = disable_retries 667 self._disable_cache = disable_cache 668 self._message_repository = message_repository or InMemoryMessageRepository( 669 self._evaluate_log_level(emit_connector_builder_messages) 670 ) 671 self._connector_state_manager = connector_state_manager or ConnectorStateManager() 672 self._api_budget: Optional[Union[APIBudget, HttpAPIBudget]] = None 673 self._job_tracker: JobTracker = JobTracker(max_concurrent_async_job_count or 1) 674 # placeholder for deprecation warnings 675 self._collected_deprecation_logs: List[ConnectorBuilderLogMessage] = [] 676 677 def _init_mappings(self) -> None: 678 self.PYDANTIC_MODEL_TO_CONSTRUCTOR: Mapping[Type[BaseModel], Callable[..., Any]] = { 679 AddedFieldDefinitionModel: self.create_added_field_definition, 680 AddFieldsModel: self.create_add_fields, 681 ApiKeyAuthenticatorModel: self.create_api_key_authenticator, 682 BasicHttpAuthenticatorModel: self.create_basic_http_authenticator, 683 BearerAuthenticatorModel: self.create_bearer_authenticator, 684 CheckStreamModel: self.create_check_stream, 685 DynamicStreamCheckConfigModel: self.create_dynamic_stream_check_config, 686 CheckDynamicStreamModel: self.create_check_dynamic_stream, 687 CompositeErrorHandlerModel: self.create_composite_error_handler, 688 ConcurrencyLevelModel: self.create_concurrency_level, 689 ConfigMigrationModel: self.create_config_migration, 690 ConfigAddFieldsModel: self.create_config_add_fields, 691 ConfigRemapFieldModel: self.create_config_remap_field, 692 ConfigRemoveFieldsModel: self.create_config_remove_fields, 693 ConstantBackoffStrategyModel: self.create_constant_backoff_strategy, 694 CsvDecoderModel: self.create_csv_decoder, 695 CursorPaginationModel: self.create_cursor_pagination, 696 CustomAuthenticatorModel: self.create_custom_component, 697 CustomBackoffStrategyModel: self.create_custom_component, 698 CustomDecoderModel: self.create_custom_component, 699 CustomErrorHandlerModel: self.create_custom_component, 700 CustomRecordExtractorModel: self.create_custom_component, 701 CustomRecordFilterModel: self.create_custom_component, 702 CustomRequesterModel: self.create_custom_component, 703 CustomRetrieverModel: self.create_custom_component, 704 CustomSchemaLoader: self.create_custom_component, 705 CustomSchemaNormalizationModel: self.create_custom_component, 706 CustomStateMigration: self.create_custom_component, 707 CustomPaginationStrategyModel: self.create_custom_component, 708 CustomPartitionRouterModel: self.create_custom_component, 709 CustomTransformationModel: self.create_custom_component, 710 CustomValidationStrategyModel: self.create_custom_component, 711 CustomConfigTransformationModel: self.create_custom_component, 712 DatetimeBasedCursorModel: self.create_datetime_based_cursor, 713 DeclarativeStreamModel: self.create_default_stream, 714 DefaultErrorHandlerModel: self.create_default_error_handler, 715 DefaultPaginatorModel: self.create_default_paginator, 716 DpathExtractorModel: self.create_dpath_extractor, 717 DpathValidatorModel: self.create_dpath_validator, 718 ResponseToFileExtractorModel: self.create_response_to_file_extractor, 719 ExponentialBackoffStrategyModel: self.create_exponential_backoff_strategy, 720 SessionTokenAuthenticatorModel: self.create_session_token_authenticator, 721 GroupByKeyMergeStrategyModel: self.create_group_by_key, 722 HttpRequesterModel: self.create_http_requester, 723 HttpResponseFilterModel: self.create_http_response_filter, 724 InlineSchemaLoaderModel: self.create_inline_schema_loader, 725 JsonDecoderModel: self.create_json_decoder, 726 JsonlDecoderModel: self.create_jsonl_decoder, 727 GzipDecoderModel: self.create_gzip_decoder, 728 KeysToLowerModel: self.create_keys_to_lower_transformation, 729 KeysToSnakeCaseModel: self.create_keys_to_snake_transformation, 730 KeysReplaceModel: self.create_keys_replace_transformation, 731 FlattenFieldsModel: self.create_flatten_fields, 732 DpathFlattenFieldsModel: self.create_dpath_flatten_fields, 733 IterableDecoderModel: self.create_iterable_decoder, 734 IncrementingCountCursorModel: self.create_incrementing_count_cursor, 735 XmlDecoderModel: self.create_xml_decoder, 736 JsonFileSchemaLoaderModel: self.create_json_file_schema_loader, 737 DynamicSchemaLoaderModel: self.create_dynamic_schema_loader, 738 SchemaTypeIdentifierModel: self.create_schema_type_identifier, 739 TypesMapModel: self.create_types_map, 740 ComplexFieldTypeModel: self.create_complex_field_type, 741 JwtAuthenticatorModel: self.create_jwt_authenticator, 742 LegacyToPerPartitionStateMigrationModel: self.create_legacy_to_per_partition_state_migration, 743 ListPartitionRouterModel: self.create_list_partition_router, 744 MinMaxDatetimeModel: self.create_min_max_datetime, 745 NoAuthModel: self.create_no_auth, 746 NoPaginationModel: self.create_no_pagination, 747 OAuthAuthenticatorModel: self.create_oauth_authenticator, 748 OffsetIncrementModel: self.create_offset_increment, 749 PageIncrementModel: self.create_page_increment, 750 ParentStreamConfigModel: self.create_parent_stream_config_with_substream_wrapper, 751 PredicateValidatorModel: self.create_predicate_validator, 752 PropertiesFromEndpointModel: self.create_properties_from_endpoint, 753 PropertyChunkingModel: self.create_property_chunking, 754 QueryPropertiesModel: self.create_query_properties, 755 RecordFilterModel: self.create_record_filter, 756 RecordSelectorModel: self.create_record_selector, 757 RemoveFieldsModel: self.create_remove_fields, 758 RequestPathModel: self.create_request_path, 759 RequestOptionModel: self.create_request_option, 760 LegacySessionTokenAuthenticatorModel: self.create_legacy_session_token_authenticator, 761 SelectiveAuthenticatorModel: self.create_selective_authenticator, 762 SimpleRetrieverModel: self.create_simple_retriever, 763 StateDelegatingStreamModel: self.create_state_delegating_stream, 764 SpecModel: self.create_spec, 765 SubstreamPartitionRouterModel: self.create_substream_partition_router, 766 ValidateAdheresToSchemaModel: self.create_validate_adheres_to_schema, 767 WaitTimeFromHeaderModel: self.create_wait_time_from_header, 768 WaitUntilTimeFromHeaderModel: self.create_wait_until_time_from_header, 769 AsyncRetrieverModel: self.create_async_retriever, 770 HttpComponentsResolverModel: self.create_http_components_resolver, 771 ConfigComponentsResolverModel: self.create_config_components_resolver, 772 ParametrizedComponentsResolverModel: self.create_parametrized_components_resolver, 773 StreamConfigModel: self.create_stream_config, 774 ComponentMappingDefinitionModel: self.create_components_mapping_definition, 775 ZipfileDecoderModel: self.create_zipfile_decoder, 776 HTTPAPIBudgetModel: self.create_http_api_budget, 777 FileUploaderModel: self.create_file_uploader, 778 FixedWindowCallRatePolicyModel: self.create_fixed_window_call_rate_policy, 779 MovingWindowCallRatePolicyModel: self.create_moving_window_call_rate_policy, 780 UnlimitedCallRatePolicyModel: self.create_unlimited_call_rate_policy, 781 RateModel: self.create_rate, 782 HttpRequestRegexMatcherModel: self.create_http_request_matcher, 783 GroupingPartitionRouterModel: self.create_grouping_partition_router, 784 } 785 786 # Needed for the case where we need to perform a second parse on the fields of a custom component 787 self.TYPE_NAME_TO_MODEL = {cls.__name__: cls for cls in self.PYDANTIC_MODEL_TO_CONSTRUCTOR} 788 789 def create_component( 790 self, 791 model_type: Type[BaseModel], 792 component_definition: ComponentDefinition, 793 config: Config, 794 **kwargs: Any, 795 ) -> Any: 796 """ 797 Takes a given Pydantic model type and Mapping representing a component definition and creates a declarative component and 798 subcomponents which will be used at runtime. This is done by first parsing the mapping into a Pydantic model and then creating 799 creating declarative components from that model. 800 801 :param model_type: The type of declarative component that is being initialized 802 :param component_definition: The mapping that represents a declarative component 803 :param config: The connector config that is provided by the customer 804 :return: The declarative component to be used at runtime 805 """ 806 807 component_type = component_definition.get("type") 808 if component_definition.get("type") != model_type.__name__: 809 raise ValueError( 810 f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" 811 ) 812 813 declarative_component_model = model_type.parse_obj(component_definition) 814 815 if not isinstance(declarative_component_model, model_type): 816 raise ValueError( 817 f"Expected {model_type.__name__} component, but received {declarative_component_model.__class__.__name__}" 818 ) 819 820 return self._create_component_from_model( 821 model=declarative_component_model, config=config, **kwargs 822 ) 823 824 def _create_component_from_model(self, model: BaseModel, config: Config, **kwargs: Any) -> Any: 825 if model.__class__ not in self.PYDANTIC_MODEL_TO_CONSTRUCTOR: 826 raise ValueError( 827 f"{model.__class__} with attributes {model} is not a valid component type" 828 ) 829 component_constructor = self.PYDANTIC_MODEL_TO_CONSTRUCTOR.get(model.__class__) 830 if not component_constructor: 831 raise ValueError(f"Could not find constructor for {model.__class__}") 832 833 # collect deprecation warnings for supported models. 834 if isinstance(model, BaseModelWithDeprecations): 835 self._collect_model_deprecations(model) 836 837 return component_constructor(model=model, config=config, **kwargs) 838 839 def get_model_deprecations(self) -> List[ConnectorBuilderLogMessage]: 840 """ 841 Returns the deprecation warnings that were collected during the creation of components. 842 """ 843 return self._collected_deprecation_logs 844 845 def _collect_model_deprecations(self, model: BaseModelWithDeprecations) -> None: 846 """ 847 Collects deprecation logs from the given model and appends any new logs to the internal collection. 848 849 This method checks if the provided model has deprecation logs (identified by the presence of the DEPRECATION_LOGS_TAG attribute and a non-None `_deprecation_logs` property). It iterates through each deprecation log in the model and appends it to the `_collected_deprecation_logs` list if it has not already been collected, ensuring that duplicate logs are avoided. 850 851 Args: 852 model (BaseModelWithDeprecations): The model instance from which to collect deprecation logs. 853 """ 854 if hasattr(model, DEPRECATION_LOGS_TAG) and model._deprecation_logs is not None: 855 for log in model._deprecation_logs: 856 # avoid duplicates for deprecation logs observed. 857 if log not in self._collected_deprecation_logs: 858 self._collected_deprecation_logs.append(log) 859 860 def create_config_migration( 861 self, model: ConfigMigrationModel, config: Config 862 ) -> ConfigMigration: 863 transformations: List[ConfigTransformation] = [ 864 self._create_component_from_model(transformation, config) 865 for transformation in model.transformations 866 ] 867 868 return ConfigMigration( 869 description=model.description, 870 transformations=transformations, 871 ) 872 873 def create_config_add_fields( 874 self, model: ConfigAddFieldsModel, config: Config, **kwargs: Any 875 ) -> ConfigAddFields: 876 fields = [self._create_component_from_model(field, config) for field in model.fields] 877 return ConfigAddFields( 878 fields=fields, 879 condition=model.condition or "", 880 ) 881 882 @staticmethod 883 def create_config_remove_fields( 884 model: ConfigRemoveFieldsModel, config: Config, **kwargs: Any 885 ) -> ConfigRemoveFields: 886 return ConfigRemoveFields( 887 field_pointers=model.field_pointers, 888 condition=model.condition or "", 889 ) 890 891 @staticmethod 892 def create_config_remap_field( 893 model: ConfigRemapFieldModel, config: Config, **kwargs: Any 894 ) -> ConfigRemapField: 895 mapping = cast(Mapping[str, Any], model.map) 896 return ConfigRemapField( 897 map=mapping, 898 field_path=model.field_path, 899 config=config, 900 ) 901 902 def create_dpath_validator(self, model: DpathValidatorModel, config: Config) -> DpathValidator: 903 strategy = self._create_component_from_model(model.validation_strategy, config) 904 905 return DpathValidator( 906 field_path=model.field_path, 907 strategy=strategy, 908 ) 909 910 def create_predicate_validator( 911 self, model: PredicateValidatorModel, config: Config 912 ) -> PredicateValidator: 913 strategy = self._create_component_from_model(model.validation_strategy, config) 914 915 return PredicateValidator( 916 value=model.value, 917 strategy=strategy, 918 ) 919 920 @staticmethod 921 def create_validate_adheres_to_schema( 922 model: ValidateAdheresToSchemaModel, config: Config, **kwargs: Any 923 ) -> ValidateAdheresToSchema: 924 base_schema = cast(Mapping[str, Any], model.base_schema) 925 return ValidateAdheresToSchema( 926 schema=base_schema, 927 ) 928 929 @staticmethod 930 def create_added_field_definition( 931 model: AddedFieldDefinitionModel, config: Config, **kwargs: Any 932 ) -> AddedFieldDefinition: 933 interpolated_value = InterpolatedString.create( 934 model.value, parameters=model.parameters or {} 935 ) 936 return AddedFieldDefinition( 937 path=model.path, 938 value=interpolated_value, 939 value_type=ModelToComponentFactory._json_schema_type_name_to_type(model.value_type), 940 parameters=model.parameters or {}, 941 ) 942 943 def create_add_fields(self, model: AddFieldsModel, config: Config, **kwargs: Any) -> AddFields: 944 added_field_definitions = [ 945 self._create_component_from_model( 946 model=added_field_definition_model, 947 value_type=ModelToComponentFactory._json_schema_type_name_to_type( 948 added_field_definition_model.value_type 949 ), 950 config=config, 951 ) 952 for added_field_definition_model in model.fields 953 ] 954 return AddFields( 955 fields=added_field_definitions, 956 condition=model.condition or "", 957 parameters=model.parameters or {}, 958 ) 959 960 def create_keys_to_lower_transformation( 961 self, model: KeysToLowerModel, config: Config, **kwargs: Any 962 ) -> KeysToLowerTransformation: 963 return KeysToLowerTransformation() 964 965 def create_keys_to_snake_transformation( 966 self, model: KeysToSnakeCaseModel, config: Config, **kwargs: Any 967 ) -> KeysToSnakeCaseTransformation: 968 return KeysToSnakeCaseTransformation() 969 970 def create_keys_replace_transformation( 971 self, model: KeysReplaceModel, config: Config, **kwargs: Any 972 ) -> KeysReplaceTransformation: 973 return KeysReplaceTransformation( 974 old=model.old, new=model.new, parameters=model.parameters or {} 975 ) 976 977 def create_flatten_fields( 978 self, model: FlattenFieldsModel, config: Config, **kwargs: Any 979 ) -> FlattenFields: 980 return FlattenFields( 981 flatten_lists=model.flatten_lists if model.flatten_lists is not None else True 982 ) 983 984 def create_dpath_flatten_fields( 985 self, model: DpathFlattenFieldsModel, config: Config, **kwargs: Any 986 ) -> DpathFlattenFields: 987 model_field_path: List[Union[InterpolatedString, str]] = [x for x in model.field_path] 988 key_transformation = ( 989 KeyTransformation( 990 config=config, 991 prefix=model.key_transformation.prefix, 992 suffix=model.key_transformation.suffix, 993 parameters=model.parameters or {}, 994 ) 995 if model.key_transformation is not None 996 else None 997 ) 998 return DpathFlattenFields( 999 config=config, 1000 field_path=model_field_path, 1001 delete_origin_value=model.delete_origin_value 1002 if model.delete_origin_value is not None 1003 else False, 1004 replace_record=model.replace_record if model.replace_record is not None else False, 1005 key_transformation=key_transformation, 1006 parameters=model.parameters or {}, 1007 ) 1008 1009 @staticmethod 1010 def _json_schema_type_name_to_type(value_type: Optional[ValueType]) -> Optional[Type[Any]]: 1011 if not value_type: 1012 return None 1013 names_to_types = { 1014 ValueType.string: str, 1015 ValueType.number: float, 1016 ValueType.integer: int, 1017 ValueType.boolean: bool, 1018 } 1019 return names_to_types[value_type] 1020 1021 def create_api_key_authenticator( 1022 self, 1023 model: ApiKeyAuthenticatorModel, 1024 config: Config, 1025 token_provider: Optional[TokenProvider] = None, 1026 **kwargs: Any, 1027 ) -> ApiKeyAuthenticator: 1028 if model.inject_into is None and model.header is None: 1029 raise ValueError( 1030 "Expected either inject_into or header to be set for ApiKeyAuthenticator" 1031 ) 1032 1033 if model.inject_into is not None and model.header is not None: 1034 raise ValueError( 1035 "inject_into and header cannot be set both for ApiKeyAuthenticator - remove the deprecated header option" 1036 ) 1037 1038 if token_provider is not None and model.api_token != "": 1039 raise ValueError( 1040 "If token_provider is set, api_token is ignored and has to be set to empty string." 1041 ) 1042 1043 request_option = ( 1044 self._create_component_from_model( 1045 model.inject_into, config, parameters=model.parameters or {} 1046 ) 1047 if model.inject_into 1048 else RequestOption( 1049 inject_into=RequestOptionType.header, 1050 field_name=model.header or "", 1051 parameters=model.parameters or {}, 1052 ) 1053 ) 1054 1055 return ApiKeyAuthenticator( 1056 token_provider=( 1057 token_provider 1058 if token_provider is not None 1059 else InterpolatedStringTokenProvider( 1060 api_token=model.api_token or "", 1061 config=config, 1062 parameters=model.parameters or {}, 1063 ) 1064 ), 1065 request_option=request_option, 1066 config=config, 1067 parameters=model.parameters or {}, 1068 ) 1069 1070 def create_legacy_to_per_partition_state_migration( 1071 self, 1072 model: LegacyToPerPartitionStateMigrationModel, 1073 config: Mapping[str, Any], 1074 declarative_stream: DeclarativeStreamModel, 1075 ) -> LegacyToPerPartitionStateMigration: 1076 retriever = declarative_stream.retriever 1077 if not isinstance(retriever, (SimpleRetrieverModel, AsyncRetrieverModel)): 1078 raise ValueError( 1079 f"LegacyToPerPartitionStateMigrations can only be applied on a DeclarativeStream with a SimpleRetriever or AsyncRetriever. Got {type(retriever)}" 1080 ) 1081 partition_router = retriever.partition_router 1082 if not isinstance( 1083 partition_router, (SubstreamPartitionRouterModel, CustomPartitionRouterModel) 1084 ): 1085 raise ValueError( 1086 f"LegacyToPerPartitionStateMigrations can only be applied on a SimpleRetriever with a Substream partition router. Got {type(partition_router)}" 1087 ) 1088 if not hasattr(partition_router, "parent_stream_configs"): 1089 raise ValueError( 1090 "LegacyToPerPartitionStateMigrations can only be applied with a parent stream configuration." 1091 ) 1092 1093 if not hasattr(declarative_stream, "incremental_sync"): 1094 raise ValueError( 1095 "LegacyToPerPartitionStateMigrations can only be applied with an incremental_sync configuration." 1096 ) 1097 1098 return LegacyToPerPartitionStateMigration( 1099 partition_router, # type: ignore # was already checked above 1100 declarative_stream.incremental_sync, # type: ignore # was already checked. Migration can be applied only to incremental streams. 1101 config, 1102 declarative_stream.parameters, # type: ignore # different type is expected here Mapping[str, Any], got Dict[str, Any] 1103 ) 1104 1105 def create_session_token_authenticator( 1106 self, model: SessionTokenAuthenticatorModel, config: Config, name: str, **kwargs: Any 1107 ) -> Union[ApiKeyAuthenticator, BearerAuthenticator]: 1108 decoder = ( 1109 self._create_component_from_model(model=model.decoder, config=config) 1110 if model.decoder 1111 else JsonDecoder(parameters={}) 1112 ) 1113 login_requester = self._create_component_from_model( 1114 model=model.login_requester, 1115 config=config, 1116 name=f"{name}_login_requester", 1117 decoder=decoder, 1118 ) 1119 token_provider = SessionTokenProvider( 1120 login_requester=login_requester, 1121 session_token_path=model.session_token_path, 1122 expiration_duration=parse_duration(model.expiration_duration) 1123 if model.expiration_duration 1124 else None, 1125 parameters=model.parameters or {}, 1126 message_repository=self._message_repository, 1127 decoder=decoder, 1128 ) 1129 if model.request_authentication.type == "Bearer": 1130 return ModelToComponentFactory.create_bearer_authenticator( 1131 BearerAuthenticatorModel(type="BearerAuthenticator", api_token=""), # type: ignore # $parameters has a default value 1132 config, 1133 token_provider=token_provider, 1134 ) 1135 else: 1136 return self.create_api_key_authenticator( 1137 ApiKeyAuthenticatorModel( 1138 type="ApiKeyAuthenticator", 1139 api_token="", 1140 inject_into=model.request_authentication.inject_into, 1141 ), # type: ignore # $parameters and headers default to None 1142 config=config, 1143 token_provider=token_provider, 1144 ) 1145 1146 @staticmethod 1147 def create_basic_http_authenticator( 1148 model: BasicHttpAuthenticatorModel, config: Config, **kwargs: Any 1149 ) -> BasicHttpAuthenticator: 1150 return BasicHttpAuthenticator( 1151 password=model.password or "", 1152 username=model.username, 1153 config=config, 1154 parameters=model.parameters or {}, 1155 ) 1156 1157 @staticmethod 1158 def create_bearer_authenticator( 1159 model: BearerAuthenticatorModel, 1160 config: Config, 1161 token_provider: Optional[TokenProvider] = None, 1162 **kwargs: Any, 1163 ) -> BearerAuthenticator: 1164 if token_provider is not None and model.api_token != "": 1165 raise ValueError( 1166 "If token_provider is set, api_token is ignored and has to be set to empty string." 1167 ) 1168 return BearerAuthenticator( 1169 token_provider=( 1170 token_provider 1171 if token_provider is not None 1172 else InterpolatedStringTokenProvider( 1173 api_token=model.api_token or "", 1174 config=config, 1175 parameters=model.parameters or {}, 1176 ) 1177 ), 1178 config=config, 1179 parameters=model.parameters or {}, 1180 ) 1181 1182 @staticmethod 1183 def create_dynamic_stream_check_config( 1184 model: DynamicStreamCheckConfigModel, config: Config, **kwargs: Any 1185 ) -> DynamicStreamCheckConfig: 1186 return DynamicStreamCheckConfig( 1187 dynamic_stream_name=model.dynamic_stream_name, 1188 stream_count=model.stream_count or 0, 1189 ) 1190 1191 def create_check_stream( 1192 self, model: CheckStreamModel, config: Config, **kwargs: Any 1193 ) -> CheckStream: 1194 if model.dynamic_streams_check_configs is None and model.stream_names is None: 1195 raise ValueError( 1196 "Expected either stream_names or dynamic_streams_check_configs to be set for CheckStream" 1197 ) 1198 1199 dynamic_streams_check_configs = ( 1200 [ 1201 self._create_component_from_model(model=dynamic_stream_check_config, config=config) 1202 for dynamic_stream_check_config in model.dynamic_streams_check_configs 1203 ] 1204 if model.dynamic_streams_check_configs 1205 else [] 1206 ) 1207 1208 return CheckStream( 1209 stream_names=model.stream_names or [], 1210 dynamic_streams_check_configs=dynamic_streams_check_configs, 1211 parameters={}, 1212 ) 1213 1214 @staticmethod 1215 def create_check_dynamic_stream( 1216 model: CheckDynamicStreamModel, config: Config, **kwargs: Any 1217 ) -> CheckDynamicStream: 1218 assert model.use_check_availability is not None # for mypy 1219 1220 use_check_availability = model.use_check_availability 1221 1222 return CheckDynamicStream( 1223 stream_count=model.stream_count, 1224 use_check_availability=use_check_availability, 1225 parameters={}, 1226 ) 1227 1228 def create_composite_error_handler( 1229 self, model: CompositeErrorHandlerModel, config: Config, **kwargs: Any 1230 ) -> CompositeErrorHandler: 1231 error_handlers = [ 1232 self._create_component_from_model(model=error_handler_model, config=config) 1233 for error_handler_model in model.error_handlers 1234 ] 1235 return CompositeErrorHandler( 1236 error_handlers=error_handlers, parameters=model.parameters or {} 1237 ) 1238 1239 @staticmethod 1240 def create_concurrency_level( 1241 model: ConcurrencyLevelModel, config: Config, **kwargs: Any 1242 ) -> ConcurrencyLevel: 1243 return ConcurrencyLevel( 1244 default_concurrency=model.default_concurrency, 1245 max_concurrency=model.max_concurrency, 1246 config=config, 1247 parameters={}, 1248 ) 1249 1250 @staticmethod 1251 def apply_stream_state_migrations( 1252 stream_state_migrations: List[Any] | None, stream_state: MutableMapping[str, Any] 1253 ) -> MutableMapping[str, Any]: 1254 if stream_state_migrations: 1255 for state_migration in stream_state_migrations: 1256 if state_migration.should_migrate(stream_state): 1257 # The state variable is expected to be mutable but the migrate method returns an immutable mapping. 1258 stream_state = dict(state_migration.migrate(stream_state)) 1259 return stream_state 1260 1261 def create_concurrent_cursor_from_datetime_based_cursor( 1262 self, 1263 model_type: Type[BaseModel], 1264 component_definition: ComponentDefinition, 1265 stream_name: str, 1266 stream_namespace: Optional[str], 1267 stream_state: MutableMapping[str, Any], 1268 config: Config, 1269 message_repository: Optional[MessageRepository] = None, 1270 runtime_lookback_window: Optional[datetime.timedelta] = None, 1271 **kwargs: Any, 1272 ) -> ConcurrentCursor: 1273 component_type = component_definition.get("type") 1274 if component_definition.get("type") != model_type.__name__: 1275 raise ValueError( 1276 f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" 1277 ) 1278 1279 # FIXME the interfaces of the concurrent cursor are kind of annoying as they take a `ComponentDefinition` instead of the actual model. This was done because the ConcurrentDeclarativeSource didn't have access to the models [here for example](https://github.com/airbytehq/airbyte-python-cdk/blob/f525803b3fec9329e4cc8478996a92bf884bfde9/airbyte_cdk/sources/declarative/concurrent_declarative_source.py#L354C54-L354C91). So now we have two cases: 1280 # * The ComponentDefinition comes from model.__dict__ in which case we have `parameters` 1281 # * The ComponentDefinition comes from the manifest as a dict in which case we have `$parameters` 1282 # We should change those interfaces to use the model once we clean up the code in CDS at which point the parameter propagation should happen as part of the ModelToComponentFactory. 1283 if "$parameters" not in component_definition and "parameters" in component_definition: 1284 component_definition["$parameters"] = component_definition.get("parameters") # type: ignore # This is a dict 1285 datetime_based_cursor_model = model_type.parse_obj(component_definition) 1286 1287 if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel): 1288 raise ValueError( 1289 f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}" 1290 ) 1291 1292 model_parameters = datetime_based_cursor_model.parameters or {} 1293 interpolated_cursor_field = InterpolatedString.create( 1294 datetime_based_cursor_model.cursor_field, 1295 parameters=model_parameters, 1296 ) 1297 cursor_field = CursorField(interpolated_cursor_field.eval(config=config)) 1298 1299 interpolated_partition_field_start = InterpolatedString.create( 1300 datetime_based_cursor_model.partition_field_start or "start_time", 1301 parameters=model_parameters, 1302 ) 1303 interpolated_partition_field_end = InterpolatedString.create( 1304 datetime_based_cursor_model.partition_field_end or "end_time", 1305 parameters=model_parameters, 1306 ) 1307 1308 slice_boundary_fields = ( 1309 interpolated_partition_field_start.eval(config=config), 1310 interpolated_partition_field_end.eval(config=config), 1311 ) 1312 1313 datetime_format = datetime_based_cursor_model.datetime_format 1314 1315 cursor_granularity = ( 1316 parse_duration(datetime_based_cursor_model.cursor_granularity) 1317 if datetime_based_cursor_model.cursor_granularity 1318 else None 1319 ) 1320 1321 lookback_window = None 1322 interpolated_lookback_window = ( 1323 InterpolatedString.create( 1324 datetime_based_cursor_model.lookback_window, 1325 parameters=model_parameters, 1326 ) 1327 if datetime_based_cursor_model.lookback_window 1328 else None 1329 ) 1330 if interpolated_lookback_window: 1331 evaluated_lookback_window = interpolated_lookback_window.eval(config=config) 1332 if evaluated_lookback_window: 1333 lookback_window = parse_duration(evaluated_lookback_window) 1334 1335 connector_state_converter: DateTimeStreamStateConverter 1336 connector_state_converter = CustomFormatConcurrentStreamStateConverter( 1337 datetime_format=datetime_format, 1338 input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats, 1339 is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state 1340 cursor_granularity=cursor_granularity, 1341 ) 1342 1343 # Adjusts the stream state by applying the runtime lookback window. 1344 # This is used to ensure correct state handling in case of failed partitions. 1345 stream_state_value = stream_state.get(cursor_field.cursor_field_key) 1346 if runtime_lookback_window and stream_state_value: 1347 new_stream_state = ( 1348 connector_state_converter.parse_timestamp(stream_state_value) 1349 - runtime_lookback_window 1350 ) 1351 stream_state[cursor_field.cursor_field_key] = connector_state_converter.output_format( 1352 new_stream_state 1353 ) 1354 1355 start_date_runtime_value: Union[InterpolatedString, str, MinMaxDatetime] 1356 if isinstance(datetime_based_cursor_model.start_datetime, MinMaxDatetimeModel): 1357 start_date_runtime_value = self.create_min_max_datetime( 1358 model=datetime_based_cursor_model.start_datetime, config=config 1359 ) 1360 else: 1361 start_date_runtime_value = datetime_based_cursor_model.start_datetime 1362 1363 end_date_runtime_value: Optional[Union[InterpolatedString, str, MinMaxDatetime]] 1364 if isinstance(datetime_based_cursor_model.end_datetime, MinMaxDatetimeModel): 1365 end_date_runtime_value = self.create_min_max_datetime( 1366 model=datetime_based_cursor_model.end_datetime, config=config 1367 ) 1368 else: 1369 end_date_runtime_value = datetime_based_cursor_model.end_datetime 1370 1371 interpolated_start_date = MinMaxDatetime.create( 1372 interpolated_string_or_min_max_datetime=start_date_runtime_value, 1373 parameters=datetime_based_cursor_model.parameters, 1374 ) 1375 interpolated_end_date = ( 1376 None 1377 if not end_date_runtime_value 1378 else MinMaxDatetime.create( 1379 end_date_runtime_value, datetime_based_cursor_model.parameters 1380 ) 1381 ) 1382 1383 # If datetime format is not specified then start/end datetime should inherit it from the stream slicer 1384 if not interpolated_start_date.datetime_format: 1385 interpolated_start_date.datetime_format = datetime_format 1386 if interpolated_end_date and not interpolated_end_date.datetime_format: 1387 interpolated_end_date.datetime_format = datetime_format 1388 1389 start_date = interpolated_start_date.get_datetime(config=config) 1390 end_date_provider = ( 1391 partial(interpolated_end_date.get_datetime, config) 1392 if interpolated_end_date 1393 else connector_state_converter.get_end_provider() 1394 ) 1395 1396 if ( 1397 datetime_based_cursor_model.step and not datetime_based_cursor_model.cursor_granularity 1398 ) or ( 1399 not datetime_based_cursor_model.step and datetime_based_cursor_model.cursor_granularity 1400 ): 1401 raise ValueError( 1402 f"If step is defined, cursor_granularity should be as well and vice-versa. " 1403 f"Right now, step is `{datetime_based_cursor_model.step}` and cursor_granularity is `{datetime_based_cursor_model.cursor_granularity}`" 1404 ) 1405 1406 # When step is not defined, default to a step size from the starting date to the present moment 1407 step_length = datetime.timedelta.max 1408 interpolated_step = ( 1409 InterpolatedString.create( 1410 datetime_based_cursor_model.step, 1411 parameters=model_parameters, 1412 ) 1413 if datetime_based_cursor_model.step 1414 else None 1415 ) 1416 if interpolated_step: 1417 evaluated_step = interpolated_step.eval(config) 1418 if evaluated_step: 1419 step_length = parse_duration(evaluated_step) 1420 1421 clamping_strategy: ClampingStrategy = NoClamping() 1422 if datetime_based_cursor_model.clamping: 1423 # While it is undesirable to interpolate within the model factory (as opposed to at runtime), 1424 # it is still better than shifting interpolation low-code concept into the ConcurrentCursor runtime 1425 # object which we want to keep agnostic of being low-code 1426 target = InterpolatedString( 1427 string=datetime_based_cursor_model.clamping.target, 1428 parameters=model_parameters, 1429 ) 1430 evaluated_target = target.eval(config=config) 1431 match evaluated_target: 1432 case "DAY": 1433 clamping_strategy = DayClampingStrategy() 1434 end_date_provider = ClampingEndProvider( 1435 DayClampingStrategy(is_ceiling=False), 1436 end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1437 granularity=cursor_granularity or datetime.timedelta(seconds=1), 1438 ) 1439 case "WEEK": 1440 if ( 1441 not datetime_based_cursor_model.clamping.target_details 1442 or "weekday" not in datetime_based_cursor_model.clamping.target_details 1443 ): 1444 raise ValueError( 1445 "Given WEEK clamping, weekday needs to be provided as target_details" 1446 ) 1447 weekday = self._assemble_weekday( 1448 datetime_based_cursor_model.clamping.target_details["weekday"] 1449 ) 1450 clamping_strategy = WeekClampingStrategy(weekday) 1451 end_date_provider = ClampingEndProvider( 1452 WeekClampingStrategy(weekday, is_ceiling=False), 1453 end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1454 granularity=cursor_granularity or datetime.timedelta(days=1), 1455 ) 1456 case "MONTH": 1457 clamping_strategy = MonthClampingStrategy() 1458 end_date_provider = ClampingEndProvider( 1459 MonthClampingStrategy(is_ceiling=False), 1460 end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1461 granularity=cursor_granularity or datetime.timedelta(days=1), 1462 ) 1463 case _: 1464 raise ValueError( 1465 f"Invalid clamping target {evaluated_target}, expected DAY, WEEK, MONTH" 1466 ) 1467 1468 return ConcurrentCursor( 1469 stream_name=stream_name, 1470 stream_namespace=stream_namespace, 1471 stream_state=stream_state, 1472 message_repository=message_repository or self._message_repository, 1473 connector_state_manager=self._connector_state_manager, 1474 connector_state_converter=connector_state_converter, 1475 cursor_field=cursor_field, 1476 slice_boundary_fields=slice_boundary_fields, 1477 start=start_date, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1478 end_provider=end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1479 lookback_window=lookback_window, 1480 slice_range=step_length, 1481 cursor_granularity=cursor_granularity, 1482 clamping_strategy=clamping_strategy, 1483 ) 1484 1485 def create_concurrent_cursor_from_incrementing_count_cursor( 1486 self, 1487 model_type: Type[BaseModel], 1488 component_definition: ComponentDefinition, 1489 stream_name: str, 1490 stream_namespace: Optional[str], 1491 stream_state: MutableMapping[str, Any], 1492 config: Config, 1493 message_repository: Optional[MessageRepository] = None, 1494 **kwargs: Any, 1495 ) -> ConcurrentCursor: 1496 component_type = component_definition.get("type") 1497 if component_definition.get("type") != model_type.__name__: 1498 raise ValueError( 1499 f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" 1500 ) 1501 1502 incrementing_count_cursor_model = model_type.parse_obj(component_definition) 1503 1504 if not isinstance(incrementing_count_cursor_model, IncrementingCountCursorModel): 1505 raise ValueError( 1506 f"Expected {model_type.__name__} component, but received {incrementing_count_cursor_model.__class__.__name__}" 1507 ) 1508 1509 interpolated_start_value = ( 1510 InterpolatedString.create( 1511 incrementing_count_cursor_model.start_value, # type: ignore 1512 parameters=incrementing_count_cursor_model.parameters or {}, 1513 ) 1514 if incrementing_count_cursor_model.start_value 1515 else 0 1516 ) 1517 1518 interpolated_cursor_field = InterpolatedString.create( 1519 incrementing_count_cursor_model.cursor_field, 1520 parameters=incrementing_count_cursor_model.parameters or {}, 1521 ) 1522 cursor_field = CursorField(interpolated_cursor_field.eval(config=config)) 1523 1524 connector_state_converter = IncrementingCountStreamStateConverter( 1525 is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state 1526 ) 1527 1528 return ConcurrentCursor( 1529 stream_name=stream_name, 1530 stream_namespace=stream_namespace, 1531 stream_state=stream_state, 1532 message_repository=message_repository or self._message_repository, 1533 connector_state_manager=self._connector_state_manager, 1534 connector_state_converter=connector_state_converter, 1535 cursor_field=cursor_field, 1536 slice_boundary_fields=None, 1537 start=interpolated_start_value, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1538 end_provider=connector_state_converter.get_end_provider(), # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1539 ) 1540 1541 def _assemble_weekday(self, weekday: str) -> Weekday: 1542 match weekday: 1543 case "MONDAY": 1544 return Weekday.MONDAY 1545 case "TUESDAY": 1546 return Weekday.TUESDAY 1547 case "WEDNESDAY": 1548 return Weekday.WEDNESDAY 1549 case "THURSDAY": 1550 return Weekday.THURSDAY 1551 case "FRIDAY": 1552 return Weekday.FRIDAY 1553 case "SATURDAY": 1554 return Weekday.SATURDAY 1555 case "SUNDAY": 1556 return Weekday.SUNDAY 1557 case _: 1558 raise ValueError(f"Unknown weekday {weekday}") 1559 1560 def create_concurrent_cursor_from_perpartition_cursor( 1561 self, 1562 state_manager: ConnectorStateManager, 1563 model_type: Type[BaseModel], 1564 component_definition: ComponentDefinition, 1565 stream_name: str, 1566 stream_namespace: Optional[str], 1567 config: Config, 1568 stream_state: MutableMapping[str, Any], 1569 partition_router: PartitionRouter, 1570 attempt_to_create_cursor_if_not_provided: bool = False, 1571 **kwargs: Any, 1572 ) -> ConcurrentPerPartitionCursor: 1573 component_type = component_definition.get("type") 1574 if component_definition.get("type") != model_type.__name__: 1575 raise ValueError( 1576 f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" 1577 ) 1578 1579 # FIXME the interfaces of the concurrent cursor are kind of annoying as they take a `ComponentDefinition` instead of the actual model. This was done because the ConcurrentDeclarativeSource didn't have access to the models [here for example](https://github.com/airbytehq/airbyte-python-cdk/blob/f525803b3fec9329e4cc8478996a92bf884bfde9/airbyte_cdk/sources/declarative/concurrent_declarative_source.py#L354C54-L354C91). So now we have two cases: 1580 # * The ComponentDefinition comes from model.__dict__ in which case we have `parameters` 1581 # * The ComponentDefinition comes from the manifest as a dict in which case we have `$parameters` 1582 # We should change those interfaces to use the model once we clean up the code in CDS at which point the parameter propagation should happen as part of the ModelToComponentFactory. 1583 if "$parameters" not in component_definition and "parameters" in component_definition: 1584 component_definition["$parameters"] = component_definition.get("parameters") # type: ignore # This is a dict 1585 datetime_based_cursor_model = model_type.parse_obj(component_definition) 1586 1587 if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel): 1588 raise ValueError( 1589 f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}" 1590 ) 1591 1592 interpolated_cursor_field = InterpolatedString.create( 1593 datetime_based_cursor_model.cursor_field, 1594 # FIXME the interfaces of the concurrent cursor are kind of annoying as they take a `ComponentDefinition` instead of the actual model. This was done because the ConcurrentDeclarativeSource didn't have access to the models [here for example](https://github.com/airbytehq/airbyte-python-cdk/blob/f525803b3fec9329e4cc8478996a92bf884bfde9/airbyte_cdk/sources/declarative/concurrent_declarative_source.py#L354C54-L354C91). So now we have two cases: 1595 # * The ComponentDefinition comes from model.__dict__ in which case we have `parameters` 1596 # * The ComponentDefinition comes from the manifest as a dict in which case we have `$parameters` 1597 # We should change those interfaces to use the model once we clean up the code in CDS at which point the parameter propagation should happen as part of the ModelToComponentFactory. 1598 parameters=datetime_based_cursor_model.parameters or {}, 1599 ) 1600 cursor_field = CursorField(interpolated_cursor_field.eval(config=config)) 1601 1602 datetime_format = datetime_based_cursor_model.datetime_format 1603 1604 cursor_granularity = ( 1605 parse_duration(datetime_based_cursor_model.cursor_granularity) 1606 if datetime_based_cursor_model.cursor_granularity 1607 else None 1608 ) 1609 1610 connector_state_converter: DateTimeStreamStateConverter 1611 connector_state_converter = CustomFormatConcurrentStreamStateConverter( 1612 datetime_format=datetime_format, 1613 input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats, 1614 is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state 1615 cursor_granularity=cursor_granularity, 1616 ) 1617 1618 # Create the cursor factory 1619 cursor_factory = ConcurrentCursorFactory( 1620 partial( 1621 self.create_concurrent_cursor_from_datetime_based_cursor, 1622 state_manager=state_manager, 1623 model_type=model_type, 1624 component_definition=component_definition, 1625 stream_name=stream_name, 1626 stream_namespace=stream_namespace, 1627 config=config, 1628 message_repository=NoopMessageRepository(), 1629 ) 1630 ) 1631 1632 # Per-partition state doesn't make sense for GroupingPartitionRouter, so force the global state 1633 use_global_cursor = isinstance( 1634 partition_router, GroupingPartitionRouter 1635 ) or component_definition.get("global_substream_cursor", False) 1636 1637 # Return the concurrent cursor and state converter 1638 return ConcurrentPerPartitionCursor( 1639 cursor_factory=cursor_factory, 1640 partition_router=partition_router, 1641 stream_name=stream_name, 1642 stream_namespace=stream_namespace, 1643 stream_state=stream_state, 1644 message_repository=self._message_repository, # type: ignore 1645 connector_state_manager=state_manager, 1646 connector_state_converter=connector_state_converter, 1647 cursor_field=cursor_field, 1648 use_global_cursor=use_global_cursor, 1649 attempt_to_create_cursor_if_not_provided=attempt_to_create_cursor_if_not_provided, 1650 ) 1651 1652 @staticmethod 1653 def create_constant_backoff_strategy( 1654 model: ConstantBackoffStrategyModel, config: Config, **kwargs: Any 1655 ) -> ConstantBackoffStrategy: 1656 return ConstantBackoffStrategy( 1657 backoff_time_in_seconds=model.backoff_time_in_seconds, 1658 config=config, 1659 parameters=model.parameters or {}, 1660 ) 1661 1662 def create_cursor_pagination( 1663 self, model: CursorPaginationModel, config: Config, decoder: Decoder, **kwargs: Any 1664 ) -> CursorPaginationStrategy: 1665 if isinstance(decoder, PaginationDecoderDecorator): 1666 inner_decoder = decoder.decoder 1667 else: 1668 inner_decoder = decoder 1669 decoder = PaginationDecoderDecorator(decoder=decoder) 1670 1671 if self._is_supported_decoder_for_pagination(inner_decoder): 1672 decoder_to_use = decoder 1673 else: 1674 raise ValueError( 1675 self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(inner_decoder)) 1676 ) 1677 1678 return CursorPaginationStrategy( 1679 cursor_value=model.cursor_value, 1680 decoder=decoder_to_use, 1681 page_size=model.page_size, 1682 stop_condition=model.stop_condition, 1683 config=config, 1684 parameters=model.parameters or {}, 1685 ) 1686 1687 def create_custom_component(self, model: Any, config: Config, **kwargs: Any) -> Any: 1688 """ 1689 Generically creates a custom component based on the model type and a class_name reference to the custom Python class being 1690 instantiated. Only the model's additional properties that match the custom class definition are passed to the constructor 1691 :param model: The Pydantic model of the custom component being created 1692 :param config: The custom defined connector config 1693 :return: The declarative component built from the Pydantic model to be used at runtime 1694 """ 1695 custom_component_class = self._get_class_from_fully_qualified_class_name(model.class_name) 1696 component_fields = get_type_hints(custom_component_class) 1697 model_args = model.dict() 1698 model_args["config"] = config 1699 1700 # There are cases where a parent component will pass arguments to a child component via kwargs. When there are field collisions 1701 # we defer to these arguments over the component's definition 1702 for key, arg in kwargs.items(): 1703 model_args[key] = arg 1704 1705 # Pydantic is unable to parse a custom component's fields that are subcomponents into models because their fields and types are not 1706 # defined in the schema. The fields and types are defined within the Python class implementation. Pydantic can only parse down to 1707 # the custom component and this code performs a second parse to convert the sub-fields first into models, then declarative components 1708 for model_field, model_value in model_args.items(): 1709 # If a custom component field doesn't have a type set, we try to use the type hints to infer the type 1710 if ( 1711 isinstance(model_value, dict) 1712 and "type" not in model_value 1713 and model_field in component_fields 1714 ): 1715 derived_type = self._derive_component_type_from_type_hints( 1716 component_fields.get(model_field) 1717 ) 1718 if derived_type: 1719 model_value["type"] = derived_type 1720 1721 if self._is_component(model_value): 1722 model_args[model_field] = self._create_nested_component( 1723 model, 1724 model_field, 1725 model_value, 1726 config, 1727 **kwargs, 1728 ) 1729 elif isinstance(model_value, list): 1730 vals = [] 1731 for v in model_value: 1732 if isinstance(v, dict) and "type" not in v and model_field in component_fields: 1733 derived_type = self._derive_component_type_from_type_hints( 1734 component_fields.get(model_field) 1735 ) 1736 if derived_type: 1737 v["type"] = derived_type 1738 if self._is_component(v): 1739 vals.append( 1740 self._create_nested_component( 1741 model, 1742 model_field, 1743 v, 1744 config, 1745 **kwargs, 1746 ) 1747 ) 1748 else: 1749 vals.append(v) 1750 model_args[model_field] = vals 1751 1752 kwargs = { 1753 class_field: model_args[class_field] 1754 for class_field in component_fields.keys() 1755 if class_field in model_args 1756 } 1757 return custom_component_class(**kwargs) 1758 1759 @staticmethod 1760 def _get_class_from_fully_qualified_class_name( 1761 full_qualified_class_name: str, 1762 ) -> Any: 1763 """Get a class from its fully qualified name. 1764 1765 If a custom components module is needed, we assume it is already registered - probably 1766 as `source_declarative_manifest.components` or `components`. 1767 1768 Args: 1769 full_qualified_class_name (str): The fully qualified name of the class (e.g., "module.ClassName"). 1770 1771 Returns: 1772 Any: The class object. 1773 1774 Raises: 1775 ValueError: If the class cannot be loaded. 1776 """ 1777 split = full_qualified_class_name.split(".") 1778 module_name_full = ".".join(split[:-1]) 1779 class_name = split[-1] 1780 1781 try: 1782 module_ref = importlib.import_module(module_name_full) 1783 except ModuleNotFoundError as e: 1784 if split[0] == "source_declarative_manifest": 1785 # During testing, the modules containing the custom components are not moved to source_declarative_manifest. In order to run the test, add the source folder to your PYTHONPATH or add it runtime using sys.path.append 1786 try: 1787 import os 1788 1789 module_name_with_source_declarative_manifest = ".".join(split[1:-1]) 1790 module_ref = importlib.import_module( 1791 module_name_with_source_declarative_manifest 1792 ) 1793 except ModuleNotFoundError: 1794 raise ValueError(f"Could not load module `{module_name_full}`.") from e 1795 else: 1796 raise ValueError(f"Could not load module `{module_name_full}`.") from e 1797 1798 try: 1799 return getattr(module_ref, class_name) 1800 except AttributeError as e: 1801 raise ValueError( 1802 f"Could not load class `{class_name}` from module `{module_name_full}`.", 1803 ) from e 1804 1805 @staticmethod 1806 def _derive_component_type_from_type_hints(field_type: Any) -> Optional[str]: 1807 interface = field_type 1808 while True: 1809 origin = get_origin(interface) 1810 if origin: 1811 # Unnest types until we reach the raw type 1812 # List[T] -> T 1813 # Optional[List[T]] -> T 1814 args = get_args(interface) 1815 interface = args[0] 1816 else: 1817 break 1818 if isinstance(interface, type) and not ModelToComponentFactory.is_builtin_type(interface): 1819 return interface.__name__ 1820 return None 1821 1822 @staticmethod 1823 def is_builtin_type(cls: Optional[Type[Any]]) -> bool: 1824 if not cls: 1825 return False 1826 return cls.__module__ == "builtins" 1827 1828 @staticmethod 1829 def _extract_missing_parameters(error: TypeError) -> List[str]: 1830 parameter_search = re.search(r"keyword-only.*:\s(.*)", str(error)) 1831 if parameter_search: 1832 return re.findall(r"\'(.+?)\'", parameter_search.group(1)) 1833 else: 1834 return [] 1835 1836 def _create_nested_component( 1837 self, model: Any, model_field: str, model_value: Any, config: Config, **kwargs: Any 1838 ) -> Any: 1839 type_name = model_value.get("type", None) 1840 if not type_name: 1841 # If no type is specified, we can assume this is a dictionary object which can be returned instead of a subcomponent 1842 return model_value 1843 1844 model_type = self.TYPE_NAME_TO_MODEL.get(type_name, None) 1845 if model_type: 1846 parsed_model = model_type.parse_obj(model_value) 1847 try: 1848 # To improve usability of the language, certain fields are shared between components. This can come in the form of 1849 # a parent component passing some of its fields to a child component or the parent extracting fields from other child 1850 # components and passing it to others. One example is the DefaultPaginator referencing the HttpRequester url_base 1851 # while constructing a SimpleRetriever. However, custom components don't support this behavior because they are created 1852 # generically in create_custom_component(). This block allows developers to specify extra arguments in $parameters that 1853 # are needed by a component and could not be shared. 1854 model_constructor = self.PYDANTIC_MODEL_TO_CONSTRUCTOR.get(parsed_model.__class__) 1855 constructor_kwargs = inspect.getfullargspec(model_constructor).kwonlyargs 1856 model_parameters = model_value.get("$parameters", {}) 1857 matching_parameters = { 1858 kwarg: model_parameters[kwarg] 1859 for kwarg in constructor_kwargs 1860 if kwarg in model_parameters 1861 } 1862 matching_kwargs = { 1863 kwarg: kwargs[kwarg] for kwarg in constructor_kwargs if kwarg in kwargs 1864 } 1865 return self._create_component_from_model( 1866 model=parsed_model, config=config, **(matching_parameters | matching_kwargs) 1867 ) 1868 except TypeError as error: 1869 missing_parameters = self._extract_missing_parameters(error) 1870 if missing_parameters: 1871 raise ValueError( 1872 f"Error creating component '{type_name}' with parent custom component {model.class_name}: Please provide " 1873 + ", ".join( 1874 ( 1875 f"{type_name}.$parameters.{parameter}" 1876 for parameter in missing_parameters 1877 ) 1878 ) 1879 ) 1880 raise TypeError( 1881 f"Error creating component '{type_name}' with parent custom component {model.class_name}: {error}" 1882 ) 1883 else: 1884 raise ValueError( 1885 f"Error creating custom component {model.class_name}. Subcomponent creation has not been implemented for '{type_name}'" 1886 ) 1887 1888 @staticmethod 1889 def _is_component(model_value: Any) -> bool: 1890 return isinstance(model_value, dict) and model_value.get("type") is not None 1891 1892 def create_datetime_based_cursor( 1893 self, model: DatetimeBasedCursorModel, config: Config, **kwargs: Any 1894 ) -> DatetimeBasedCursor: 1895 start_datetime: Union[str, MinMaxDatetime] = ( 1896 model.start_datetime 1897 if isinstance(model.start_datetime, str) 1898 else self.create_min_max_datetime(model.start_datetime, config) 1899 ) 1900 end_datetime: Union[str, MinMaxDatetime, None] = None 1901 if model.is_data_feed and model.end_datetime: 1902 raise ValueError("Data feed does not support end_datetime") 1903 if model.is_data_feed and model.is_client_side_incremental: 1904 raise ValueError( 1905 "`Client side incremental` cannot be applied with `data feed`. Choose only 1 from them." 1906 ) 1907 if model.end_datetime: 1908 end_datetime = ( 1909 model.end_datetime 1910 if isinstance(model.end_datetime, str) 1911 else self.create_min_max_datetime(model.end_datetime, config) 1912 ) 1913 1914 end_time_option = ( 1915 self._create_component_from_model( 1916 model.end_time_option, config, parameters=model.parameters or {} 1917 ) 1918 if model.end_time_option 1919 else None 1920 ) 1921 start_time_option = ( 1922 self._create_component_from_model( 1923 model.start_time_option, config, parameters=model.parameters or {} 1924 ) 1925 if model.start_time_option 1926 else None 1927 ) 1928 1929 return DatetimeBasedCursor( 1930 cursor_field=model.cursor_field, 1931 cursor_datetime_formats=model.cursor_datetime_formats 1932 if model.cursor_datetime_formats 1933 else [], 1934 cursor_granularity=model.cursor_granularity, 1935 datetime_format=model.datetime_format, 1936 end_datetime=end_datetime, 1937 start_datetime=start_datetime, 1938 step=model.step, 1939 end_time_option=end_time_option, 1940 lookback_window=model.lookback_window, 1941 start_time_option=start_time_option, 1942 partition_field_end=model.partition_field_end, 1943 partition_field_start=model.partition_field_start, 1944 message_repository=self._message_repository, 1945 is_compare_strictly=model.is_compare_strictly, 1946 config=config, 1947 parameters=model.parameters or {}, 1948 ) 1949 1950 def create_default_stream( 1951 self, model: DeclarativeStreamModel, config: Config, is_parent: bool = False, **kwargs: Any 1952 ) -> AbstractStream: 1953 primary_key = model.primary_key.__root__ if model.primary_key else None 1954 self._migrate_state(model, config) 1955 1956 partition_router = self._build_stream_slicer_from_partition_router( 1957 model.retriever, 1958 config, 1959 stream_name=model.name, 1960 **kwargs, 1961 ) 1962 concurrent_cursor = self._build_concurrent_cursor(model, partition_router, config) 1963 if model.incremental_sync and isinstance(model.incremental_sync, DatetimeBasedCursorModel): 1964 cursor_model: DatetimeBasedCursorModel = model.incremental_sync 1965 1966 end_time_option = ( 1967 self._create_component_from_model( 1968 cursor_model.end_time_option, config, parameters=cursor_model.parameters or {} 1969 ) 1970 if cursor_model.end_time_option 1971 else None 1972 ) 1973 start_time_option = ( 1974 self._create_component_from_model( 1975 cursor_model.start_time_option, config, parameters=cursor_model.parameters or {} 1976 ) 1977 if cursor_model.start_time_option 1978 else None 1979 ) 1980 1981 datetime_request_options_provider = DatetimeBasedRequestOptionsProvider( 1982 start_time_option=start_time_option, 1983 end_time_option=end_time_option, 1984 partition_field_start=cursor_model.partition_field_start, 1985 partition_field_end=cursor_model.partition_field_end, 1986 config=config, 1987 parameters=model.parameters or {}, 1988 ) 1989 request_options_provider = ( 1990 datetime_request_options_provider 1991 if not isinstance(concurrent_cursor, ConcurrentPerPartitionCursor) 1992 else PerPartitionRequestOptionsProvider( 1993 partition_router, datetime_request_options_provider 1994 ) 1995 ) 1996 elif model.incremental_sync and isinstance( 1997 model.incremental_sync, IncrementingCountCursorModel 1998 ): 1999 if isinstance(concurrent_cursor, ConcurrentPerPartitionCursor): 2000 raise ValueError( 2001 "PerPartition does not support per partition states because switching to global state is time based" 2002 ) 2003 2004 cursor_model: IncrementingCountCursorModel = model.incremental_sync # type: ignore 2005 2006 start_time_option = ( 2007 self._create_component_from_model( 2008 cursor_model.start_value_option, # type: ignore # mypy still thinks cursor_model of type DatetimeBasedCursor 2009 config, 2010 parameters=cursor_model.parameters or {}, 2011 ) 2012 if cursor_model.start_value_option # type: ignore # mypy still thinks cursor_model of type DatetimeBasedCursor 2013 else None 2014 ) 2015 2016 # The concurrent engine defaults the start/end fields on the slice to "start" and "end", but 2017 # the default DatetimeBasedRequestOptionsProvider() sets them to start_time/end_time 2018 partition_field_start = "start" 2019 2020 request_options_provider = DatetimeBasedRequestOptionsProvider( 2021 start_time_option=start_time_option, 2022 partition_field_start=partition_field_start, 2023 config=config, 2024 parameters=model.parameters or {}, 2025 ) 2026 else: 2027 request_options_provider = None 2028 2029 transformations = [] 2030 if model.transformations: 2031 for transformation_model in model.transformations: 2032 transformations.append( 2033 self._create_component_from_model(model=transformation_model, config=config) 2034 ) 2035 file_uploader = None 2036 if model.file_uploader: 2037 file_uploader = self._create_component_from_model( 2038 model=model.file_uploader, config=config 2039 ) 2040 2041 stream_slicer: ConcurrentStreamSlicer = ( 2042 partition_router 2043 if isinstance(concurrent_cursor, FinalStateCursor) 2044 else concurrent_cursor 2045 ) 2046 retriever = self._create_component_from_model( 2047 model=model.retriever, 2048 config=config, 2049 name=model.name, 2050 primary_key=primary_key, 2051 request_options_provider=request_options_provider, 2052 stream_slicer=stream_slicer, 2053 partition_router=partition_router, 2054 stop_condition_cursor=concurrent_cursor 2055 if self._is_stop_condition_on_cursor(model) 2056 else None, 2057 client_side_incremental_sync={"cursor": concurrent_cursor} 2058 if self._is_client_side_filtering_enabled(model) 2059 else None, 2060 transformations=transformations, 2061 file_uploader=file_uploader, 2062 incremental_sync=model.incremental_sync, 2063 ) 2064 if isinstance(retriever, AsyncRetriever): 2065 stream_slicer = retriever.stream_slicer 2066 2067 schema_loader: Union[ 2068 CompositeSchemaLoader, 2069 DefaultSchemaLoader, 2070 DynamicSchemaLoader, 2071 InlineSchemaLoader, 2072 JsonFileSchemaLoader, 2073 ] 2074 if model.schema_loader and isinstance(model.schema_loader, list): 2075 nested_schema_loaders = [ 2076 self._create_component_from_model(model=nested_schema_loader, config=config) 2077 for nested_schema_loader in model.schema_loader 2078 ] 2079 schema_loader = CompositeSchemaLoader( 2080 schema_loaders=nested_schema_loaders, parameters={} 2081 ) 2082 elif model.schema_loader: 2083 schema_loader = self._create_component_from_model( 2084 model=model.schema_loader, # type: ignore # If defined, schema_loader is guaranteed not to be a list and will be one of the existing base models 2085 config=config, 2086 ) 2087 else: 2088 options = model.parameters or {} 2089 if "name" not in options: 2090 options["name"] = model.name 2091 schema_loader = DefaultSchemaLoader(config=config, parameters=options) 2092 2093 stream_name = model.name or "" 2094 return DefaultStream( 2095 partition_generator=StreamSlicerPartitionGenerator( 2096 DeclarativePartitionFactory( 2097 stream_name, 2098 schema_loader, 2099 retriever, 2100 self._message_repository, 2101 ), 2102 stream_slicer, 2103 slice_limit=self._limit_slices_fetched, 2104 ), 2105 name=stream_name, 2106 json_schema=schema_loader.get_json_schema, 2107 primary_key=get_primary_key_from_stream(primary_key), 2108 cursor_field=concurrent_cursor.cursor_field.cursor_field_key 2109 if hasattr(concurrent_cursor, "cursor_field") 2110 else "", # FIXME we should have the cursor field has part of the interface of cursor, 2111 logger=logging.getLogger(f"airbyte.{stream_name}"), 2112 cursor=concurrent_cursor, 2113 supports_file_transfer=hasattr(model, "file_uploader") and bool(model.file_uploader), 2114 ) 2115 2116 def _migrate_state(self, model: DeclarativeStreamModel, config: Config) -> None: 2117 stream_name = model.name or "" 2118 stream_state = self._connector_state_manager.get_stream_state( 2119 stream_name=stream_name, namespace=None 2120 ) 2121 if model.state_migrations: 2122 state_transformations = [ 2123 self._create_component_from_model(state_migration, config, declarative_stream=model) 2124 for state_migration in model.state_migrations 2125 ] 2126 else: 2127 state_transformations = [] 2128 stream_state = self.apply_stream_state_migrations(state_transformations, stream_state) 2129 self._connector_state_manager.update_state_for_stream( 2130 stream_name=stream_name, namespace=None, value=stream_state 2131 ) 2132 2133 def _is_stop_condition_on_cursor(self, model: DeclarativeStreamModel) -> bool: 2134 return bool( 2135 model.incremental_sync 2136 and hasattr(model.incremental_sync, "is_data_feed") 2137 and model.incremental_sync.is_data_feed 2138 ) 2139 2140 def _is_client_side_filtering_enabled(self, model: DeclarativeStreamModel) -> bool: 2141 return bool( 2142 model.incremental_sync 2143 and hasattr(model.incremental_sync, "is_client_side_incremental") 2144 and model.incremental_sync.is_client_side_incremental 2145 ) 2146 2147 def _build_stream_slicer_from_partition_router( 2148 self, 2149 model: Union[ 2150 AsyncRetrieverModel, 2151 CustomRetrieverModel, 2152 SimpleRetrieverModel, 2153 ], 2154 config: Config, 2155 stream_name: Optional[str] = None, 2156 **kwargs: Any, 2157 ) -> PartitionRouter: 2158 if ( 2159 hasattr(model, "partition_router") 2160 and isinstance(model, (SimpleRetrieverModel, AsyncRetrieverModel, CustomRetrieverModel)) 2161 and model.partition_router 2162 ): 2163 stream_slicer_model = model.partition_router 2164 if isinstance(stream_slicer_model, list): 2165 return CartesianProductStreamSlicer( 2166 [ 2167 self._create_component_from_model( 2168 model=slicer, config=config, stream_name=stream_name or "" 2169 ) 2170 for slicer in stream_slicer_model 2171 ], 2172 parameters={}, 2173 ) 2174 elif isinstance(stream_slicer_model, dict): 2175 # partition router comes from CustomRetrieverModel therefore has not been parsed as a model 2176 params = stream_slicer_model.get("$parameters") 2177 if not isinstance(params, dict): 2178 params = {} 2179 stream_slicer_model["$parameters"] = params 2180 2181 if stream_name is not None: 2182 params["stream_name"] = stream_name 2183 2184 return self._create_nested_component( # type: ignore[no-any-return] # There is no guarantee that this will return a stream slicer. If not, we expect an AttributeError during the call to `stream_slices` 2185 model, 2186 "partition_router", 2187 stream_slicer_model, 2188 config, 2189 **kwargs, 2190 ) 2191 else: 2192 return self._create_component_from_model( # type: ignore[no-any-return] # Will be created PartitionRouter as stream_slicer_model is model.partition_router 2193 model=stream_slicer_model, config=config, stream_name=stream_name or "" 2194 ) 2195 return SinglePartitionRouter(parameters={}) 2196 2197 def _build_concurrent_cursor( 2198 self, 2199 model: DeclarativeStreamModel, 2200 stream_slicer: Optional[PartitionRouter], 2201 config: Config, 2202 ) -> Cursor: 2203 stream_name = model.name or "" 2204 stream_state = self._connector_state_manager.get_stream_state(stream_name, None) 2205 2206 if ( 2207 model.incremental_sync 2208 and stream_slicer 2209 and not isinstance(stream_slicer, SinglePartitionRouter) 2210 ): 2211 return self.create_concurrent_cursor_from_perpartition_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing 2212 state_manager=self._connector_state_manager, 2213 model_type=DatetimeBasedCursorModel, 2214 component_definition=model.incremental_sync.__dict__, 2215 stream_name=stream_name, 2216 stream_state=stream_state, 2217 stream_namespace=None, 2218 config=config or {}, 2219 partition_router=stream_slicer, 2220 attempt_to_create_cursor_if_not_provided=True, # FIXME can we remove that now? 2221 ) 2222 elif model.incremental_sync: 2223 if type(model.incremental_sync) == IncrementingCountCursorModel: 2224 return self.create_concurrent_cursor_from_incrementing_count_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing 2225 model_type=IncrementingCountCursorModel, 2226 component_definition=model.incremental_sync.__dict__, 2227 stream_name=stream_name, 2228 stream_namespace=None, 2229 stream_state=stream_state, 2230 config=config or {}, 2231 ) 2232 elif type(model.incremental_sync) == DatetimeBasedCursorModel: 2233 return self.create_concurrent_cursor_from_datetime_based_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing 2234 model_type=type(model.incremental_sync), 2235 component_definition=model.incremental_sync.__dict__, 2236 stream_name=stream_name, 2237 stream_namespace=None, 2238 stream_state=stream_state, 2239 config=config or {}, 2240 attempt_to_create_cursor_if_not_provided=True, 2241 ) 2242 else: 2243 raise ValueError( 2244 f"Incremental sync of type {type(model.incremental_sync)} is not supported" 2245 ) 2246 return FinalStateCursor(stream_name, None, self._message_repository) 2247 2248 def create_default_error_handler( 2249 self, model: DefaultErrorHandlerModel, config: Config, **kwargs: Any 2250 ) -> DefaultErrorHandler: 2251 backoff_strategies = [] 2252 if model.backoff_strategies: 2253 for backoff_strategy_model in model.backoff_strategies: 2254 backoff_strategies.append( 2255 self._create_component_from_model(model=backoff_strategy_model, config=config) 2256 ) 2257 2258 response_filters = [] 2259 if model.response_filters: 2260 for response_filter_model in model.response_filters: 2261 response_filters.append( 2262 self._create_component_from_model(model=response_filter_model, config=config) 2263 ) 2264 response_filters.append( 2265 HttpResponseFilter(config=config, parameters=model.parameters or {}) 2266 ) 2267 2268 return DefaultErrorHandler( 2269 backoff_strategies=backoff_strategies, 2270 max_retries=model.max_retries, 2271 response_filters=response_filters, 2272 config=config, 2273 parameters=model.parameters or {}, 2274 ) 2275 2276 def create_default_paginator( 2277 self, 2278 model: DefaultPaginatorModel, 2279 config: Config, 2280 *, 2281 url_base: str, 2282 extractor_model: Optional[Union[CustomRecordExtractorModel, DpathExtractorModel]] = None, 2283 decoder: Optional[Decoder] = None, 2284 cursor_used_for_stop_condition: Optional[Cursor] = None, 2285 ) -> Union[DefaultPaginator, PaginatorTestReadDecorator]: 2286 if decoder: 2287 if self._is_supported_decoder_for_pagination(decoder): 2288 decoder_to_use = PaginationDecoderDecorator(decoder=decoder) 2289 else: 2290 raise ValueError(self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(decoder))) 2291 else: 2292 decoder_to_use = PaginationDecoderDecorator(decoder=JsonDecoder(parameters={})) 2293 page_size_option = ( 2294 self._create_component_from_model(model=model.page_size_option, config=config) 2295 if model.page_size_option 2296 else None 2297 ) 2298 page_token_option = ( 2299 self._create_component_from_model(model=model.page_token_option, config=config) 2300 if model.page_token_option 2301 else None 2302 ) 2303 pagination_strategy = self._create_component_from_model( 2304 model=model.pagination_strategy, 2305 config=config, 2306 decoder=decoder_to_use, 2307 extractor_model=extractor_model, 2308 ) 2309 if cursor_used_for_stop_condition: 2310 pagination_strategy = StopConditionPaginationStrategyDecorator( 2311 pagination_strategy, CursorStopCondition(cursor_used_for_stop_condition) 2312 ) 2313 paginator = DefaultPaginator( 2314 decoder=decoder_to_use, 2315 page_size_option=page_size_option, 2316 page_token_option=page_token_option, 2317 pagination_strategy=pagination_strategy, 2318 url_base=url_base, 2319 config=config, 2320 parameters=model.parameters or {}, 2321 ) 2322 if self._limit_pages_fetched_per_slice: 2323 return PaginatorTestReadDecorator(paginator, self._limit_pages_fetched_per_slice) 2324 return paginator 2325 2326 def create_dpath_extractor( 2327 self, 2328 model: DpathExtractorModel, 2329 config: Config, 2330 decoder: Optional[Decoder] = None, 2331 **kwargs: Any, 2332 ) -> DpathExtractor: 2333 if decoder: 2334 decoder_to_use = decoder 2335 else: 2336 decoder_to_use = JsonDecoder(parameters={}) 2337 model_field_path: List[Union[InterpolatedString, str]] = [x for x in model.field_path] 2338 return DpathExtractor( 2339 decoder=decoder_to_use, 2340 field_path=model_field_path, 2341 config=config, 2342 parameters=model.parameters or {}, 2343 ) 2344 2345 @staticmethod 2346 def create_response_to_file_extractor( 2347 model: ResponseToFileExtractorModel, 2348 **kwargs: Any, 2349 ) -> ResponseToFileExtractor: 2350 return ResponseToFileExtractor(parameters=model.parameters or {}) 2351 2352 @staticmethod 2353 def create_exponential_backoff_strategy( 2354 model: ExponentialBackoffStrategyModel, config: Config 2355 ) -> ExponentialBackoffStrategy: 2356 return ExponentialBackoffStrategy( 2357 factor=model.factor or 5, parameters=model.parameters or {}, config=config 2358 ) 2359 2360 @staticmethod 2361 def create_group_by_key(model: GroupByKeyMergeStrategyModel, config: Config) -> GroupByKey: 2362 return GroupByKey(model.key, config=config, parameters=model.parameters or {}) 2363 2364 def create_http_requester( 2365 self, 2366 model: HttpRequesterModel, 2367 config: Config, 2368 decoder: Decoder = JsonDecoder(parameters={}), 2369 query_properties_key: Optional[str] = None, 2370 use_cache: Optional[bool] = None, 2371 *, 2372 name: str, 2373 ) -> HttpRequester: 2374 authenticator = ( 2375 self._create_component_from_model( 2376 model=model.authenticator, 2377 config=config, 2378 url_base=model.url or model.url_base, 2379 name=name, 2380 decoder=decoder, 2381 ) 2382 if model.authenticator 2383 else None 2384 ) 2385 error_handler = ( 2386 self._create_component_from_model(model=model.error_handler, config=config) 2387 if model.error_handler 2388 else DefaultErrorHandler( 2389 backoff_strategies=[], 2390 response_filters=[], 2391 config=config, 2392 parameters=model.parameters or {}, 2393 ) 2394 ) 2395 2396 api_budget = self._api_budget 2397 2398 # Removes QueryProperties components from the interpolated mappings because it has been designed 2399 # to be used by the SimpleRetriever and will be resolved from the provider from the slice directly 2400 # instead of through jinja interpolation 2401 request_parameters: Optional[Union[str, Mapping[str, str]]] 2402 if isinstance(model.request_parameters, Mapping): 2403 request_parameters = self._remove_query_properties(model.request_parameters) 2404 else: 2405 request_parameters = model.request_parameters 2406 2407 request_options_provider = InterpolatedRequestOptionsProvider( 2408 request_body=model.request_body, 2409 request_body_data=model.request_body_data, 2410 request_body_json=model.request_body_json, 2411 request_headers=model.request_headers, 2412 request_parameters=request_parameters, 2413 query_properties_key=query_properties_key, 2414 config=config, 2415 parameters=model.parameters or {}, 2416 ) 2417 2418 assert model.use_cache is not None # for mypy 2419 assert model.http_method is not None # for mypy 2420 2421 should_use_cache = (model.use_cache or bool(use_cache)) and not self._disable_cache 2422 2423 return HttpRequester( 2424 name=name, 2425 url=model.url, 2426 url_base=model.url_base, 2427 path=model.path, 2428 authenticator=authenticator, 2429 error_handler=error_handler, 2430 api_budget=api_budget, 2431 http_method=HttpMethod[model.http_method.value], 2432 request_options_provider=request_options_provider, 2433 config=config, 2434 disable_retries=self._disable_retries, 2435 parameters=model.parameters or {}, 2436 message_repository=self._message_repository, 2437 use_cache=should_use_cache, 2438 decoder=decoder, 2439 stream_response=decoder.is_stream_response() if decoder else False, 2440 ) 2441 2442 @staticmethod 2443 def create_http_response_filter( 2444 model: HttpResponseFilterModel, config: Config, **kwargs: Any 2445 ) -> HttpResponseFilter: 2446 if model.action: 2447 action = ResponseAction(model.action.value) 2448 else: 2449 action = None 2450 2451 failure_type = FailureType(model.failure_type.value) if model.failure_type else None 2452 2453 http_codes = ( 2454 set(model.http_codes) if model.http_codes else set() 2455 ) # JSON schema notation has no set data type. The schema enforces an array of unique elements 2456 2457 return HttpResponseFilter( 2458 action=action, 2459 failure_type=failure_type, 2460 error_message=model.error_message or "", 2461 error_message_contains=model.error_message_contains or "", 2462 http_codes=http_codes, 2463 predicate=model.predicate or "", 2464 config=config, 2465 parameters=model.parameters or {}, 2466 ) 2467 2468 @staticmethod 2469 def create_inline_schema_loader( 2470 model: InlineSchemaLoaderModel, config: Config, **kwargs: Any 2471 ) -> InlineSchemaLoader: 2472 return InlineSchemaLoader(schema=model.schema_ or {}, parameters={}) 2473 2474 def create_complex_field_type( 2475 self, model: ComplexFieldTypeModel, config: Config, **kwargs: Any 2476 ) -> ComplexFieldType: 2477 items = ( 2478 self._create_component_from_model(model=model.items, config=config) 2479 if isinstance(model.items, ComplexFieldTypeModel) 2480 else model.items 2481 ) 2482 2483 return ComplexFieldType(field_type=model.field_type, items=items) 2484 2485 def create_types_map(self, model: TypesMapModel, config: Config, **kwargs: Any) -> TypesMap: 2486 target_type = ( 2487 self._create_component_from_model(model=model.target_type, config=config) 2488 if isinstance(model.target_type, ComplexFieldTypeModel) 2489 else model.target_type 2490 ) 2491 2492 return TypesMap( 2493 target_type=target_type, 2494 current_type=model.current_type, 2495 condition=model.condition if model.condition is not None else "True", 2496 ) 2497 2498 def create_schema_type_identifier( 2499 self, model: SchemaTypeIdentifierModel, config: Config, **kwargs: Any 2500 ) -> SchemaTypeIdentifier: 2501 types_mapping = [] 2502 if model.types_mapping: 2503 types_mapping.extend( 2504 [ 2505 self._create_component_from_model(types_map, config=config) 2506 for types_map in model.types_mapping 2507 ] 2508 ) 2509 model_schema_pointer: List[Union[InterpolatedString, str]] = ( 2510 [x for x in model.schema_pointer] if model.schema_pointer else [] 2511 ) 2512 model_key_pointer: List[Union[InterpolatedString, str]] = [x for x in model.key_pointer] 2513 model_type_pointer: Optional[List[Union[InterpolatedString, str]]] = ( 2514 [x for x in model.type_pointer] if model.type_pointer else None 2515 ) 2516 2517 return SchemaTypeIdentifier( 2518 schema_pointer=model_schema_pointer, 2519 key_pointer=model_key_pointer, 2520 type_pointer=model_type_pointer, 2521 types_mapping=types_mapping, 2522 parameters=model.parameters or {}, 2523 ) 2524 2525 def create_dynamic_schema_loader( 2526 self, model: DynamicSchemaLoaderModel, config: Config, **kwargs: Any 2527 ) -> DynamicSchemaLoader: 2528 schema_transformations = [] 2529 if model.schema_transformations: 2530 for transformation_model in model.schema_transformations: 2531 schema_transformations.append( 2532 self._create_component_from_model(model=transformation_model, config=config) 2533 ) 2534 name = "dynamic_properties" 2535 retriever = self._create_component_from_model( 2536 model=model.retriever, 2537 config=config, 2538 name=name, 2539 primary_key=None, 2540 partition_router=self._build_stream_slicer_from_partition_router( 2541 model.retriever, config 2542 ), 2543 transformations=[], 2544 use_cache=True, 2545 log_formatter=( 2546 lambda response: format_http_message( 2547 response, 2548 f"Schema loader '{name}' request", 2549 f"Request performed in order to extract schema.", 2550 name, 2551 is_auxiliary=True, 2552 ) 2553 ), 2554 ) 2555 schema_type_identifier = self._create_component_from_model( 2556 model.schema_type_identifier, config=config, parameters=model.parameters or {} 2557 ) 2558 schema_filter = ( 2559 self._create_component_from_model( 2560 model.schema_filter, config=config, parameters=model.parameters or {} 2561 ) 2562 if model.schema_filter is not None 2563 else None 2564 ) 2565 2566 return DynamicSchemaLoader( 2567 retriever=retriever, 2568 config=config, 2569 schema_transformations=schema_transformations, 2570 schema_filter=schema_filter, 2571 schema_type_identifier=schema_type_identifier, 2572 parameters=model.parameters or {}, 2573 ) 2574 2575 @staticmethod 2576 def create_json_decoder(model: JsonDecoderModel, config: Config, **kwargs: Any) -> Decoder: 2577 return JsonDecoder(parameters={}) 2578 2579 def create_csv_decoder(self, model: CsvDecoderModel, config: Config, **kwargs: Any) -> Decoder: 2580 return CompositeRawDecoder( 2581 parser=ModelToComponentFactory._get_parser(model, config), 2582 stream_response=False if self._emit_connector_builder_messages else True, 2583 ) 2584 2585 def create_jsonl_decoder( 2586 self, model: JsonlDecoderModel, config: Config, **kwargs: Any 2587 ) -> Decoder: 2588 return CompositeRawDecoder( 2589 parser=ModelToComponentFactory._get_parser(model, config), 2590 stream_response=False if self._emit_connector_builder_messages else True, 2591 ) 2592 2593 def create_gzip_decoder( 2594 self, model: GzipDecoderModel, config: Config, **kwargs: Any 2595 ) -> Decoder: 2596 _compressed_response_types = { 2597 "gzip", 2598 "x-gzip", 2599 "gzip, deflate", 2600 "x-gzip, deflate", 2601 "application/zip", 2602 "application/gzip", 2603 "application/x-gzip", 2604 "application/x-zip-compressed", 2605 } 2606 2607 gzip_parser: GzipParser = ModelToComponentFactory._get_parser(model, config) # type: ignore # based on the model, we know this will be a GzipParser 2608 2609 if self._emit_connector_builder_messages: 2610 # This is very surprising but if the response is not streamed, 2611 # CompositeRawDecoder calls response.content and the requests library actually uncompress the data as opposed to response.raw, 2612 # which uses urllib3 directly and does not uncompress the data. 2613 return CompositeRawDecoder(gzip_parser.inner_parser, False) 2614 2615 return CompositeRawDecoder.by_headers( 2616 [({"Content-Encoding", "Content-Type"}, _compressed_response_types, gzip_parser)], 2617 stream_response=True, 2618 fallback_parser=gzip_parser.inner_parser, 2619 ) 2620 2621 # todo: This method should be removed once we deprecate the SimpleRetriever.cursor field and the various 2622 # state methods 2623 @staticmethod 2624 def create_incrementing_count_cursor( 2625 model: IncrementingCountCursorModel, config: Config, **kwargs: Any 2626 ) -> DatetimeBasedCursor: 2627 # This should not actually get used anywhere at runtime, but needed to add this to pass checks since 2628 # we still parse models into components. The issue is that there's no runtime implementation of a 2629 # IncrementingCountCursor. 2630 # A known and expected issue with this stub is running a check with the declared IncrementingCountCursor because it is run without ConcurrentCursor. 2631 return DatetimeBasedCursor( 2632 cursor_field=model.cursor_field, 2633 datetime_format="%Y-%m-%d", 2634 start_datetime="2024-12-12", 2635 config=config, 2636 parameters={}, 2637 ) 2638 2639 @staticmethod 2640 def create_iterable_decoder( 2641 model: IterableDecoderModel, config: Config, **kwargs: Any 2642 ) -> IterableDecoder: 2643 return IterableDecoder(parameters={}) 2644 2645 @staticmethod 2646 def create_xml_decoder(model: XmlDecoderModel, config: Config, **kwargs: Any) -> XmlDecoder: 2647 return XmlDecoder(parameters={}) 2648 2649 def create_zipfile_decoder( 2650 self, model: ZipfileDecoderModel, config: Config, **kwargs: Any 2651 ) -> ZipfileDecoder: 2652 return ZipfileDecoder(parser=ModelToComponentFactory._get_parser(model.decoder, config)) 2653 2654 @staticmethod 2655 def _get_parser(model: BaseModel, config: Config) -> Parser: 2656 if isinstance(model, JsonDecoderModel): 2657 # Note that the logic is a bit different from the JsonDecoder as there is some legacy that is maintained to return {} on error cases 2658 return JsonParser() 2659 elif isinstance(model, JsonlDecoderModel): 2660 return JsonLineParser() 2661 elif isinstance(model, CsvDecoderModel): 2662 return CsvParser( 2663 encoding=model.encoding, 2664 delimiter=model.delimiter, 2665 set_values_to_none=model.set_values_to_none, 2666 ) 2667 elif isinstance(model, GzipDecoderModel): 2668 return GzipParser( 2669 inner_parser=ModelToComponentFactory._get_parser(model.decoder, config) 2670 ) 2671 elif isinstance( 2672 model, (CustomDecoderModel, IterableDecoderModel, XmlDecoderModel, ZipfileDecoderModel) 2673 ): 2674 raise ValueError(f"Decoder type {model} does not have parser associated to it") 2675 2676 raise ValueError(f"Unknown decoder type {model}") 2677 2678 @staticmethod 2679 def create_json_file_schema_loader( 2680 model: JsonFileSchemaLoaderModel, config: Config, **kwargs: Any 2681 ) -> JsonFileSchemaLoader: 2682 return JsonFileSchemaLoader( 2683 file_path=model.file_path or "", config=config, parameters=model.parameters or {} 2684 ) 2685 2686 def create_jwt_authenticator( 2687 self, model: JwtAuthenticatorModel, config: Config, **kwargs: Any 2688 ) -> JwtAuthenticator: 2689 jwt_headers = model.jwt_headers or JwtHeadersModel(kid=None, typ="JWT", cty=None) 2690 jwt_payload = model.jwt_payload or JwtPayloadModel(iss=None, sub=None, aud=None) 2691 request_option = ( 2692 self._create_component_from_model(model.request_option, config) 2693 if model.request_option 2694 else None 2695 ) 2696 return JwtAuthenticator( 2697 config=config, 2698 parameters=model.parameters or {}, 2699 algorithm=JwtAlgorithm(model.algorithm.value), 2700 secret_key=model.secret_key, 2701 base64_encode_secret_key=model.base64_encode_secret_key, 2702 token_duration=model.token_duration, 2703 header_prefix=model.header_prefix, 2704 kid=jwt_headers.kid, 2705 typ=jwt_headers.typ, 2706 cty=jwt_headers.cty, 2707 iss=jwt_payload.iss, 2708 sub=jwt_payload.sub, 2709 aud=jwt_payload.aud, 2710 additional_jwt_headers=model.additional_jwt_headers, 2711 additional_jwt_payload=model.additional_jwt_payload, 2712 passphrase=model.passphrase, 2713 request_option=request_option, 2714 ) 2715 2716 def create_list_partition_router( 2717 self, model: ListPartitionRouterModel, config: Config, **kwargs: Any 2718 ) -> ListPartitionRouter: 2719 request_option = ( 2720 self._create_component_from_model(model.request_option, config) 2721 if model.request_option 2722 else None 2723 ) 2724 return ListPartitionRouter( 2725 cursor_field=model.cursor_field, 2726 request_option=request_option, 2727 values=model.values, 2728 config=config, 2729 parameters=model.parameters or {}, 2730 ) 2731 2732 @staticmethod 2733 def create_min_max_datetime( 2734 model: MinMaxDatetimeModel, config: Config, **kwargs: Any 2735 ) -> MinMaxDatetime: 2736 return MinMaxDatetime( 2737 datetime=model.datetime, 2738 datetime_format=model.datetime_format or "", 2739 max_datetime=model.max_datetime or "", 2740 min_datetime=model.min_datetime or "", 2741 parameters=model.parameters or {}, 2742 ) 2743 2744 @staticmethod 2745 def create_no_auth(model: NoAuthModel, config: Config, **kwargs: Any) -> NoAuth: 2746 return NoAuth(parameters=model.parameters or {}) 2747 2748 @staticmethod 2749 def create_no_pagination( 2750 model: NoPaginationModel, config: Config, **kwargs: Any 2751 ) -> NoPagination: 2752 return NoPagination(parameters={}) 2753 2754 def create_oauth_authenticator( 2755 self, model: OAuthAuthenticatorModel, config: Config, **kwargs: Any 2756 ) -> DeclarativeOauth2Authenticator: 2757 profile_assertion = ( 2758 self._create_component_from_model(model.profile_assertion, config=config) 2759 if model.profile_assertion 2760 else None 2761 ) 2762 2763 if model.refresh_token_updater: 2764 # ignore type error because fixing it would have a lot of dependencies, revisit later 2765 return DeclarativeSingleUseRefreshTokenOauth2Authenticator( # type: ignore 2766 config, 2767 InterpolatedString.create( 2768 model.token_refresh_endpoint, # type: ignore 2769 parameters=model.parameters or {}, 2770 ).eval(config), 2771 access_token_name=InterpolatedString.create( 2772 model.access_token_name or "access_token", parameters=model.parameters or {} 2773 ).eval(config), 2774 refresh_token_name=model.refresh_token_updater.refresh_token_name, 2775 expires_in_name=InterpolatedString.create( 2776 model.expires_in_name or "expires_in", parameters=model.parameters or {} 2777 ).eval(config), 2778 client_id_name=InterpolatedString.create( 2779 model.client_id_name or "client_id", parameters=model.parameters or {} 2780 ).eval(config), 2781 client_id=InterpolatedString.create( 2782 model.client_id, parameters=model.parameters or {} 2783 ).eval(config) 2784 if model.client_id 2785 else model.client_id, 2786 client_secret_name=InterpolatedString.create( 2787 model.client_secret_name or "client_secret", parameters=model.parameters or {} 2788 ).eval(config), 2789 client_secret=InterpolatedString.create( 2790 model.client_secret, parameters=model.parameters or {} 2791 ).eval(config) 2792 if model.client_secret 2793 else model.client_secret, 2794 access_token_config_path=model.refresh_token_updater.access_token_config_path, 2795 refresh_token_config_path=model.refresh_token_updater.refresh_token_config_path, 2796 token_expiry_date_config_path=model.refresh_token_updater.token_expiry_date_config_path, 2797 grant_type_name=InterpolatedString.create( 2798 model.grant_type_name or "grant_type", parameters=model.parameters or {} 2799 ).eval(config), 2800 grant_type=InterpolatedString.create( 2801 model.grant_type or "refresh_token", parameters=model.parameters or {} 2802 ).eval(config), 2803 refresh_request_body=InterpolatedMapping( 2804 model.refresh_request_body or {}, parameters=model.parameters or {} 2805 ).eval(config), 2806 refresh_request_headers=InterpolatedMapping( 2807 model.refresh_request_headers or {}, parameters=model.parameters or {} 2808 ).eval(config), 2809 scopes=model.scopes, 2810 token_expiry_date_format=model.token_expiry_date_format, 2811 token_expiry_is_time_of_expiration=bool(model.token_expiry_date_format), 2812 message_repository=self._message_repository, 2813 refresh_token_error_status_codes=model.refresh_token_updater.refresh_token_error_status_codes, 2814 refresh_token_error_key=model.refresh_token_updater.refresh_token_error_key, 2815 refresh_token_error_values=model.refresh_token_updater.refresh_token_error_values, 2816 ) 2817 # ignore type error because fixing it would have a lot of dependencies, revisit later 2818 return DeclarativeOauth2Authenticator( # type: ignore 2819 access_token_name=model.access_token_name or "access_token", 2820 access_token_value=model.access_token_value, 2821 client_id_name=model.client_id_name or "client_id", 2822 client_id=model.client_id, 2823 client_secret_name=model.client_secret_name or "client_secret", 2824 client_secret=model.client_secret, 2825 expires_in_name=model.expires_in_name or "expires_in", 2826 grant_type_name=model.grant_type_name or "grant_type", 2827 grant_type=model.grant_type or "refresh_token", 2828 refresh_request_body=model.refresh_request_body, 2829 refresh_request_headers=model.refresh_request_headers, 2830 refresh_token_name=model.refresh_token_name or "refresh_token", 2831 refresh_token=model.refresh_token, 2832 scopes=model.scopes, 2833 token_expiry_date=model.token_expiry_date, 2834 token_expiry_date_format=model.token_expiry_date_format, 2835 token_expiry_is_time_of_expiration=bool(model.token_expiry_date_format), 2836 token_refresh_endpoint=model.token_refresh_endpoint, 2837 config=config, 2838 parameters=model.parameters or {}, 2839 message_repository=self._message_repository, 2840 profile_assertion=profile_assertion, 2841 use_profile_assertion=model.use_profile_assertion, 2842 ) 2843 2844 def create_offset_increment( 2845 self, 2846 model: OffsetIncrementModel, 2847 config: Config, 2848 decoder: Decoder, 2849 extractor_model: Optional[Union[CustomRecordExtractorModel, DpathExtractorModel]] = None, 2850 **kwargs: Any, 2851 ) -> OffsetIncrement: 2852 if isinstance(decoder, PaginationDecoderDecorator): 2853 inner_decoder = decoder.decoder 2854 else: 2855 inner_decoder = decoder 2856 decoder = PaginationDecoderDecorator(decoder=decoder) 2857 2858 if self._is_supported_decoder_for_pagination(inner_decoder): 2859 decoder_to_use = decoder 2860 else: 2861 raise ValueError( 2862 self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(inner_decoder)) 2863 ) 2864 2865 # Ideally we would instantiate the runtime extractor from highest most level (in this case the SimpleRetriever) 2866 # so that it can be shared by OffSetIncrement and RecordSelector. However, due to how we instantiate the 2867 # decoder with various decorators here, but not in create_record_selector, it is simpler to retain existing 2868 # behavior by having two separate extractors with identical behavior since they use the same extractor model. 2869 # When we have more time to investigate we can look into reusing the same component. 2870 extractor = ( 2871 self._create_component_from_model( 2872 model=extractor_model, config=config, decoder=decoder_to_use 2873 ) 2874 if extractor_model 2875 else None 2876 ) 2877 2878 return OffsetIncrement( 2879 page_size=model.page_size, 2880 config=config, 2881 decoder=decoder_to_use, 2882 extractor=extractor, 2883 inject_on_first_request=model.inject_on_first_request or False, 2884 parameters=model.parameters or {}, 2885 ) 2886 2887 @staticmethod 2888 def create_page_increment( 2889 model: PageIncrementModel, config: Config, **kwargs: Any 2890 ) -> PageIncrement: 2891 return PageIncrement( 2892 page_size=model.page_size, 2893 config=config, 2894 start_from_page=model.start_from_page or 0, 2895 inject_on_first_request=model.inject_on_first_request or False, 2896 parameters=model.parameters or {}, 2897 ) 2898 2899 def create_parent_stream_config( 2900 self, model: ParentStreamConfigModel, config: Config, *, stream_name: str, **kwargs: Any 2901 ) -> ParentStreamConfig: 2902 declarative_stream = self._create_component_from_model( 2903 model.stream, 2904 config=config, 2905 is_parent=True, 2906 **kwargs, 2907 ) 2908 request_option = ( 2909 self._create_component_from_model(model.request_option, config=config) 2910 if model.request_option 2911 else None 2912 ) 2913 2914 if model.lazy_read_pointer and any("*" in pointer for pointer in model.lazy_read_pointer): 2915 raise ValueError( 2916 "The '*' wildcard in 'lazy_read_pointer' is not supported — only direct paths are allowed." 2917 ) 2918 2919 model_lazy_read_pointer: List[Union[InterpolatedString, str]] = ( 2920 [x for x in model.lazy_read_pointer] if model.lazy_read_pointer else [] 2921 ) 2922 2923 return ParentStreamConfig( 2924 parent_key=model.parent_key, 2925 request_option=request_option, 2926 stream=declarative_stream, 2927 partition_field=model.partition_field, 2928 config=config, 2929 incremental_dependency=model.incremental_dependency or False, 2930 parameters=model.parameters or {}, 2931 extra_fields=model.extra_fields, 2932 lazy_read_pointer=model_lazy_read_pointer, 2933 ) 2934 2935 def create_properties_from_endpoint( 2936 self, model: PropertiesFromEndpointModel, config: Config, **kwargs: Any 2937 ) -> PropertiesFromEndpoint: 2938 retriever = self._create_component_from_model( 2939 model=model.retriever, 2940 config=config, 2941 name="dynamic_properties", 2942 primary_key=None, 2943 stream_slicer=None, 2944 transformations=[], 2945 use_cache=True, # Enable caching on the HttpRequester/HttpClient because the properties endpoint will be called for every slice being processed, and it is highly unlikely for the response to different 2946 ) 2947 return PropertiesFromEndpoint( 2948 property_field_path=model.property_field_path, 2949 retriever=retriever, 2950 config=config, 2951 parameters=model.parameters or {}, 2952 ) 2953 2954 def create_property_chunking( 2955 self, model: PropertyChunkingModel, config: Config, **kwargs: Any 2956 ) -> PropertyChunking: 2957 record_merge_strategy = ( 2958 self._create_component_from_model( 2959 model=model.record_merge_strategy, config=config, **kwargs 2960 ) 2961 if model.record_merge_strategy 2962 else None 2963 ) 2964 2965 property_limit_type: PropertyLimitType 2966 match model.property_limit_type: 2967 case PropertyLimitTypeModel.property_count: 2968 property_limit_type = PropertyLimitType.property_count 2969 case PropertyLimitTypeModel.characters: 2970 property_limit_type = PropertyLimitType.characters 2971 case _: 2972 raise ValueError(f"Invalid PropertyLimitType {property_limit_type}") 2973 2974 return PropertyChunking( 2975 property_limit_type=property_limit_type, 2976 property_limit=model.property_limit, 2977 record_merge_strategy=record_merge_strategy, 2978 config=config, 2979 parameters=model.parameters or {}, 2980 ) 2981 2982 def create_query_properties( 2983 self, model: QueryPropertiesModel, config: Config, **kwargs: Any 2984 ) -> QueryProperties: 2985 if isinstance(model.property_list, list): 2986 property_list = model.property_list 2987 else: 2988 property_list = self._create_component_from_model( 2989 model=model.property_list, config=config, **kwargs 2990 ) 2991 2992 property_chunking = ( 2993 self._create_component_from_model( 2994 model=model.property_chunking, config=config, **kwargs 2995 ) 2996 if model.property_chunking 2997 else None 2998 ) 2999 3000 return QueryProperties( 3001 property_list=property_list, 3002 always_include_properties=model.always_include_properties, 3003 property_chunking=property_chunking, 3004 config=config, 3005 parameters=model.parameters or {}, 3006 ) 3007 3008 @staticmethod 3009 def create_record_filter( 3010 model: RecordFilterModel, config: Config, **kwargs: Any 3011 ) -> RecordFilter: 3012 return RecordFilter( 3013 condition=model.condition or "", config=config, parameters=model.parameters or {} 3014 ) 3015 3016 @staticmethod 3017 def create_request_path(model: RequestPathModel, config: Config, **kwargs: Any) -> RequestPath: 3018 return RequestPath(parameters={}) 3019 3020 @staticmethod 3021 def create_request_option( 3022 model: RequestOptionModel, config: Config, **kwargs: Any 3023 ) -> RequestOption: 3024 inject_into = RequestOptionType(model.inject_into.value) 3025 field_path: Optional[List[Union[InterpolatedString, str]]] = ( 3026 [ 3027 InterpolatedString.create(segment, parameters=kwargs.get("parameters", {})) 3028 for segment in model.field_path 3029 ] 3030 if model.field_path 3031 else None 3032 ) 3033 field_name = ( 3034 InterpolatedString.create(model.field_name, parameters=kwargs.get("parameters", {})) 3035 if model.field_name 3036 else None 3037 ) 3038 return RequestOption( 3039 field_name=field_name, 3040 field_path=field_path, 3041 inject_into=inject_into, 3042 parameters=kwargs.get("parameters", {}), 3043 ) 3044 3045 def create_record_selector( 3046 self, 3047 model: RecordSelectorModel, 3048 config: Config, 3049 *, 3050 name: str, 3051 transformations: List[RecordTransformation] | None = None, 3052 decoder: Decoder | None = None, 3053 client_side_incremental_sync: Dict[str, Any] | None = None, 3054 file_uploader: Optional[DefaultFileUploader] = None, 3055 **kwargs: Any, 3056 ) -> RecordSelector: 3057 extractor = self._create_component_from_model( 3058 model=model.extractor, decoder=decoder, config=config 3059 ) 3060 record_filter = ( 3061 self._create_component_from_model(model.record_filter, config=config) 3062 if model.record_filter 3063 else None 3064 ) 3065 3066 transform_before_filtering = ( 3067 False if model.transform_before_filtering is None else model.transform_before_filtering 3068 ) 3069 if client_side_incremental_sync: 3070 record_filter = ClientSideIncrementalRecordFilterDecorator( 3071 config=config, 3072 parameters=model.parameters, 3073 condition=model.record_filter.condition 3074 if (model.record_filter and hasattr(model.record_filter, "condition")) 3075 else None, 3076 **client_side_incremental_sync, 3077 ) 3078 transform_before_filtering = ( 3079 True 3080 if model.transform_before_filtering is None 3081 else model.transform_before_filtering 3082 ) 3083 3084 if model.schema_normalization is None: 3085 # default to no schema normalization if not set 3086 model.schema_normalization = SchemaNormalizationModel.None_ 3087 3088 schema_normalization = ( 3089 TypeTransformer(SCHEMA_TRANSFORMER_TYPE_MAPPING[model.schema_normalization]) 3090 if isinstance(model.schema_normalization, SchemaNormalizationModel) 3091 else self._create_component_from_model(model.schema_normalization, config=config) # type: ignore[arg-type] # custom normalization model expected here 3092 ) 3093 3094 return RecordSelector( 3095 extractor=extractor, 3096 name=name, 3097 config=config, 3098 record_filter=record_filter, 3099 transformations=transformations or [], 3100 file_uploader=file_uploader, 3101 schema_normalization=schema_normalization, 3102 parameters=model.parameters or {}, 3103 transform_before_filtering=transform_before_filtering, 3104 ) 3105 3106 @staticmethod 3107 def create_remove_fields( 3108 model: RemoveFieldsModel, config: Config, **kwargs: Any 3109 ) -> RemoveFields: 3110 return RemoveFields( 3111 field_pointers=model.field_pointers, condition=model.condition or "", parameters={} 3112 ) 3113 3114 def create_selective_authenticator( 3115 self, model: SelectiveAuthenticatorModel, config: Config, **kwargs: Any 3116 ) -> DeclarativeAuthenticator: 3117 authenticators = { 3118 name: self._create_component_from_model(model=auth, config=config) 3119 for name, auth in model.authenticators.items() 3120 } 3121 # SelectiveAuthenticator will return instance of DeclarativeAuthenticator or raise ValueError error 3122 return SelectiveAuthenticator( # type: ignore[abstract] 3123 config=config, 3124 authenticators=authenticators, 3125 authenticator_selection_path=model.authenticator_selection_path, 3126 **kwargs, 3127 ) 3128 3129 @staticmethod 3130 def create_legacy_session_token_authenticator( 3131 model: LegacySessionTokenAuthenticatorModel, config: Config, *, url_base: str, **kwargs: Any 3132 ) -> LegacySessionTokenAuthenticator: 3133 return LegacySessionTokenAuthenticator( 3134 api_url=url_base, 3135 header=model.header, 3136 login_url=model.login_url, 3137 password=model.password or "", 3138 session_token=model.session_token or "", 3139 session_token_response_key=model.session_token_response_key or "", 3140 username=model.username or "", 3141 validate_session_url=model.validate_session_url, 3142 config=config, 3143 parameters=model.parameters or {}, 3144 ) 3145 3146 def create_simple_retriever( 3147 self, 3148 model: SimpleRetrieverModel, 3149 config: Config, 3150 *, 3151 name: str, 3152 primary_key: Optional[Union[str, List[str], List[List[str]]]], 3153 request_options_provider: Optional[RequestOptionsProvider] = None, 3154 stop_condition_cursor: Optional[Cursor] = None, 3155 client_side_incremental_sync: Optional[Dict[str, Any]] = None, 3156 transformations: List[RecordTransformation], 3157 file_uploader: Optional[DefaultFileUploader] = None, 3158 incremental_sync: Optional[ 3159 Union[IncrementingCountCursorModel, DatetimeBasedCursorModel] 3160 ] = None, 3161 use_cache: Optional[bool] = None, 3162 log_formatter: Optional[Callable[[Response], Any]] = None, 3163 partition_router: Optional[PartitionRouter] = None, 3164 **kwargs: Any, 3165 ) -> SimpleRetriever: 3166 def _get_url(req: Requester) -> str: 3167 """ 3168 Closure to get the URL from the requester. This is used to get the URL in the case of a lazy retriever. 3169 This is needed because the URL is not set until the requester is created. 3170 """ 3171 3172 _url: str = ( 3173 model.requester.url 3174 if hasattr(model.requester, "url") and model.requester.url is not None 3175 else req.get_url(stream_state=None, stream_slice=None, next_page_token=None) 3176 ) 3177 _url_base: str = ( 3178 model.requester.url_base 3179 if hasattr(model.requester, "url_base") and model.requester.url_base is not None 3180 else req.get_url_base(stream_state=None, stream_slice=None, next_page_token=None) 3181 ) 3182 3183 return _url or _url_base 3184 3185 decoder = ( 3186 self._create_component_from_model(model=model.decoder, config=config) 3187 if model.decoder 3188 else JsonDecoder(parameters={}) 3189 ) 3190 record_selector = self._create_component_from_model( 3191 model=model.record_selector, 3192 name=name, 3193 config=config, 3194 decoder=decoder, 3195 transformations=transformations, 3196 client_side_incremental_sync=client_side_incremental_sync, 3197 file_uploader=file_uploader, 3198 ) 3199 3200 query_properties: Optional[QueryProperties] = None 3201 query_properties_key: Optional[str] = None 3202 if self._query_properties_in_request_parameters(model.requester): 3203 # It is better to be explicit about an error if PropertiesFromEndpoint is defined in multiple 3204 # places instead of default to request_parameters which isn't clearly documented 3205 if ( 3206 hasattr(model.requester, "fetch_properties_from_endpoint") 3207 and model.requester.fetch_properties_from_endpoint 3208 ): 3209 raise ValueError( 3210 f"PropertiesFromEndpoint should only be specified once per stream, but found in {model.requester.type}.fetch_properties_from_endpoint and {model.requester.type}.request_parameters" 3211 ) 3212 3213 query_properties_definitions = [] 3214 for key, request_parameter in model.requester.request_parameters.items(): # type: ignore # request_parameters is already validated to be a Mapping using _query_properties_in_request_parameters() 3215 if isinstance(request_parameter, QueryPropertiesModel): 3216 query_properties_key = key 3217 query_properties_definitions.append(request_parameter) 3218 3219 if len(query_properties_definitions) > 1: 3220 raise ValueError( 3221 f"request_parameters only supports defining one QueryProperties field, but found {len(query_properties_definitions)} usages" 3222 ) 3223 3224 if len(query_properties_definitions) == 1: 3225 query_properties = self._create_component_from_model( 3226 model=query_properties_definitions[0], config=config 3227 ) 3228 elif ( 3229 hasattr(model.requester, "fetch_properties_from_endpoint") 3230 and model.requester.fetch_properties_from_endpoint 3231 ): 3232 # todo: Deprecate this condition once dependent connectors migrate to query_properties 3233 query_properties_definition = QueryPropertiesModel( 3234 type="QueryProperties", 3235 property_list=model.requester.fetch_properties_from_endpoint, 3236 always_include_properties=None, 3237 property_chunking=None, 3238 ) # type: ignore # $parameters has a default value 3239 3240 query_properties = self.create_query_properties( 3241 model=query_properties_definition, 3242 config=config, 3243 ) 3244 elif hasattr(model.requester, "query_properties") and model.requester.query_properties: 3245 query_properties = self.create_query_properties( 3246 model=model.requester.query_properties, 3247 config=config, 3248 ) 3249 3250 requester = self._create_component_from_model( 3251 model=model.requester, 3252 decoder=decoder, 3253 name=name, 3254 query_properties_key=query_properties_key, 3255 use_cache=use_cache, 3256 config=config, 3257 ) 3258 3259 if not request_options_provider: 3260 request_options_provider = DefaultRequestOptionsProvider(parameters={}) 3261 if isinstance(request_options_provider, DefaultRequestOptionsProvider) and isinstance( 3262 partition_router, PartitionRouter 3263 ): 3264 request_options_provider = partition_router 3265 3266 paginator = ( 3267 self._create_component_from_model( 3268 model=model.paginator, 3269 config=config, 3270 url_base=_get_url(requester), 3271 extractor_model=model.record_selector.extractor, 3272 decoder=decoder, 3273 cursor_used_for_stop_condition=stop_condition_cursor or None, 3274 ) 3275 if model.paginator 3276 else NoPagination(parameters={}) 3277 ) 3278 3279 ignore_stream_slicer_parameters_on_paginated_requests = ( 3280 model.ignore_stream_slicer_parameters_on_paginated_requests or False 3281 ) 3282 3283 if ( 3284 model.partition_router 3285 and isinstance(model.partition_router, SubstreamPartitionRouterModel) 3286 and not bool(self._connector_state_manager.get_stream_state(name, None)) 3287 and any( 3288 parent_stream_config.lazy_read_pointer 3289 for parent_stream_config in model.partition_router.parent_stream_configs 3290 ) 3291 ): 3292 if incremental_sync: 3293 if incremental_sync.type != "DatetimeBasedCursor": 3294 raise ValueError( 3295 f"LazySimpleRetriever only supports DatetimeBasedCursor. Found: {incremental_sync.type}." 3296 ) 3297 3298 elif incremental_sync.step or incremental_sync.cursor_granularity: 3299 raise ValueError( 3300 f"Found more that one slice per parent. LazySimpleRetriever only supports single slice read for stream - {name}." 3301 ) 3302 3303 if model.decoder and model.decoder.type != "JsonDecoder": 3304 raise ValueError( 3305 f"LazySimpleRetriever only supports JsonDecoder. Found: {model.decoder.type}." 3306 ) 3307 3308 return LazySimpleRetriever( 3309 name=name, 3310 paginator=paginator, 3311 primary_key=primary_key, 3312 requester=requester, 3313 record_selector=record_selector, 3314 stream_slicer=_NO_STREAM_SLICING, 3315 request_option_provider=request_options_provider, 3316 cursor=None, 3317 config=config, 3318 ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests, 3319 parameters=model.parameters or {}, 3320 ) 3321 3322 return SimpleRetriever( 3323 name=name, 3324 paginator=paginator, 3325 primary_key=primary_key, 3326 requester=requester, 3327 record_selector=record_selector, 3328 stream_slicer=_NO_STREAM_SLICING, 3329 request_option_provider=request_options_provider, 3330 cursor=None, 3331 config=config, 3332 ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests, 3333 additional_query_properties=query_properties, 3334 log_formatter=self._get_log_formatter(log_formatter, name), 3335 parameters=model.parameters or {}, 3336 ) 3337 3338 def _get_log_formatter( 3339 self, log_formatter: Callable[[Response], Any] | None, name: str 3340 ) -> Callable[[Response], Any] | None: 3341 if self._should_limit_slices_fetched(): 3342 return ( 3343 ( 3344 lambda response: format_http_message( 3345 response, 3346 f"Stream '{name}' request", 3347 f"Request performed in order to extract records for stream '{name}'", 3348 name, 3349 ) 3350 ) 3351 if not log_formatter 3352 else log_formatter 3353 ) 3354 return None 3355 3356 def _should_limit_slices_fetched(self) -> bool: 3357 """ 3358 Returns True if the number of slices fetched should be limited, False otherwise. 3359 This is used to limit the number of slices fetched during tests. 3360 """ 3361 return bool(self._limit_slices_fetched or self._emit_connector_builder_messages) 3362 3363 @staticmethod 3364 def _query_properties_in_request_parameters( 3365 requester: Union[HttpRequesterModel, CustomRequesterModel], 3366 ) -> bool: 3367 if not hasattr(requester, "request_parameters"): 3368 return False 3369 request_parameters = requester.request_parameters 3370 if request_parameters and isinstance(request_parameters, Mapping): 3371 for request_parameter in request_parameters.values(): 3372 if isinstance(request_parameter, QueryPropertiesModel): 3373 return True 3374 return False 3375 3376 @staticmethod 3377 def _remove_query_properties( 3378 request_parameters: Mapping[str, Union[str, QueryPropertiesModel]], 3379 ) -> Mapping[str, str]: 3380 return { 3381 parameter_field: request_parameter 3382 for parameter_field, request_parameter in request_parameters.items() 3383 if not isinstance(request_parameter, QueryPropertiesModel) 3384 } 3385 3386 def create_state_delegating_stream( 3387 self, 3388 model: StateDelegatingStreamModel, 3389 config: Config, 3390 has_parent_state: Optional[bool] = None, 3391 **kwargs: Any, 3392 ) -> DeclarativeStream: 3393 if ( 3394 model.full_refresh_stream.name != model.name 3395 or model.name != model.incremental_stream.name 3396 ): 3397 raise ValueError( 3398 f"state_delegating_stream, full_refresh_stream name and incremental_stream must have equal names. Instead has {model.name}, {model.full_refresh_stream.name} and {model.incremental_stream.name}." 3399 ) 3400 3401 stream_model = self._get_state_delegating_stream_model( 3402 False if has_parent_state is None else has_parent_state, model 3403 ) 3404 3405 return self._create_component_from_model(stream_model, config=config, **kwargs) # type: ignore[no-any-return] # DeclarativeStream will be created as stream_model is alwyas DeclarativeStreamModel 3406 3407 def _get_state_delegating_stream_model( 3408 self, has_parent_state: bool, model: StateDelegatingStreamModel 3409 ) -> DeclarativeStreamModel: 3410 return ( 3411 model.incremental_stream 3412 if self._connector_state_manager.get_stream_state(model.name, None) or has_parent_state 3413 else model.full_refresh_stream 3414 ) 3415 3416 def _create_async_job_status_mapping( 3417 self, model: AsyncJobStatusMapModel, config: Config, **kwargs: Any 3418 ) -> Mapping[str, AsyncJobStatus]: 3419 api_status_to_cdk_status = {} 3420 for cdk_status, api_statuses in model.dict().items(): 3421 if cdk_status == "type": 3422 # This is an element of the dict because of the typing of the CDK but it is not a CDK status 3423 continue 3424 3425 for status in api_statuses: 3426 if status in api_status_to_cdk_status: 3427 raise ValueError( 3428 f"API status {status} is already set for CDK status {cdk_status}. Please ensure API statuses are only provided once" 3429 ) 3430 api_status_to_cdk_status[status] = self._get_async_job_status(cdk_status) 3431 return api_status_to_cdk_status 3432 3433 def _get_async_job_status(self, status: str) -> AsyncJobStatus: 3434 match status: 3435 case "running": 3436 return AsyncJobStatus.RUNNING 3437 case "completed": 3438 return AsyncJobStatus.COMPLETED 3439 case "failed": 3440 return AsyncJobStatus.FAILED 3441 case "timeout": 3442 return AsyncJobStatus.TIMED_OUT 3443 case _: 3444 raise ValueError(f"Unsupported CDK status {status}") 3445 3446 def create_async_retriever( 3447 self, 3448 model: AsyncRetrieverModel, 3449 config: Config, 3450 *, 3451 name: str, 3452 primary_key: Optional[ 3453 Union[str, List[str], List[List[str]]] 3454 ], # this seems to be needed to match create_simple_retriever 3455 stream_slicer: Optional[StreamSlicer], 3456 client_side_incremental_sync: Optional[Dict[str, Any]] = None, 3457 transformations: List[RecordTransformation], 3458 **kwargs: Any, 3459 ) -> AsyncRetriever: 3460 if model.download_target_requester and not model.download_target_extractor: 3461 raise ValueError( 3462 f"`download_target_extractor` required if using a `download_target_requester`" 3463 ) 3464 3465 def _get_download_retriever( 3466 requester: Requester, extractor: RecordExtractor, _decoder: Decoder 3467 ) -> SimpleRetriever: 3468 # We create a record selector for the download retriever 3469 # with no schema normalization and no transformations, neither record filter 3470 # as all this occurs in the record_selector of the AsyncRetriever 3471 record_selector = RecordSelector( 3472 extractor=extractor, 3473 name=name, 3474 record_filter=None, 3475 transformations=[], 3476 schema_normalization=TypeTransformer(TransformConfig.NoTransform), 3477 config=config, 3478 parameters={}, 3479 ) 3480 paginator = ( 3481 self._create_component_from_model( 3482 model=model.download_paginator, 3483 decoder=_decoder, 3484 config=config, 3485 url_base="", 3486 ) 3487 if model.download_paginator 3488 else NoPagination(parameters={}) 3489 ) 3490 3491 return SimpleRetriever( 3492 requester=requester, 3493 record_selector=record_selector, 3494 primary_key=None, 3495 name=name, 3496 paginator=paginator, 3497 config=config, 3498 parameters={}, 3499 log_formatter=self._get_log_formatter(None, name), 3500 ) 3501 3502 def _get_job_timeout() -> datetime.timedelta: 3503 user_defined_timeout: Optional[int] = ( 3504 int( 3505 InterpolatedString.create( 3506 str(model.polling_job_timeout), 3507 parameters={}, 3508 ).eval(config) 3509 ) 3510 if model.polling_job_timeout 3511 else None 3512 ) 3513 3514 # check for user defined timeout during the test read or 15 minutes 3515 test_read_timeout = datetime.timedelta(minutes=user_defined_timeout or 15) 3516 # default value for non-connector builder is 60 minutes. 3517 default_sync_timeout = datetime.timedelta(minutes=user_defined_timeout or 60) 3518 3519 return ( 3520 test_read_timeout if self._emit_connector_builder_messages else default_sync_timeout 3521 ) 3522 3523 decoder = ( 3524 self._create_component_from_model(model=model.decoder, config=config) 3525 if model.decoder 3526 else JsonDecoder(parameters={}) 3527 ) 3528 record_selector = self._create_component_from_model( 3529 model=model.record_selector, 3530 config=config, 3531 decoder=decoder, 3532 name=name, 3533 transformations=transformations, 3534 client_side_incremental_sync=client_side_incremental_sync, 3535 ) 3536 3537 stream_slicer = stream_slicer or SinglePartitionRouter(parameters={}) 3538 if self._should_limit_slices_fetched(): 3539 stream_slicer = cast( 3540 StreamSlicer, 3541 StreamSlicerTestReadDecorator( 3542 wrapped_slicer=stream_slicer, 3543 maximum_number_of_slices=self._limit_slices_fetched or 5, 3544 ), 3545 ) 3546 3547 creation_requester = self._create_component_from_model( 3548 model=model.creation_requester, 3549 decoder=decoder, 3550 config=config, 3551 name=f"job creation - {name}", 3552 ) 3553 polling_requester = self._create_component_from_model( 3554 model=model.polling_requester, 3555 decoder=decoder, 3556 config=config, 3557 name=f"job polling - {name}", 3558 ) 3559 job_download_components_name = f"job download - {name}" 3560 download_decoder = ( 3561 self._create_component_from_model(model=model.download_decoder, config=config) 3562 if model.download_decoder 3563 else JsonDecoder(parameters={}) 3564 ) 3565 download_extractor = ( 3566 self._create_component_from_model( 3567 model=model.download_extractor, 3568 config=config, 3569 decoder=download_decoder, 3570 parameters=model.parameters, 3571 ) 3572 if model.download_extractor 3573 else DpathExtractor( 3574 [], 3575 config=config, 3576 decoder=download_decoder, 3577 parameters=model.parameters or {}, 3578 ) 3579 ) 3580 download_requester = self._create_component_from_model( 3581 model=model.download_requester, 3582 decoder=download_decoder, 3583 config=config, 3584 name=job_download_components_name, 3585 ) 3586 download_retriever = _get_download_retriever( 3587 download_requester, download_extractor, download_decoder 3588 ) 3589 abort_requester = ( 3590 self._create_component_from_model( 3591 model=model.abort_requester, 3592 decoder=decoder, 3593 config=config, 3594 name=f"job abort - {name}", 3595 ) 3596 if model.abort_requester 3597 else None 3598 ) 3599 delete_requester = ( 3600 self._create_component_from_model( 3601 model=model.delete_requester, 3602 decoder=decoder, 3603 config=config, 3604 name=f"job delete - {name}", 3605 ) 3606 if model.delete_requester 3607 else None 3608 ) 3609 download_target_requester = ( 3610 self._create_component_from_model( 3611 model=model.download_target_requester, 3612 decoder=decoder, 3613 config=config, 3614 name=f"job extract_url - {name}", 3615 ) 3616 if model.download_target_requester 3617 else None 3618 ) 3619 status_extractor = self._create_component_from_model( 3620 model=model.status_extractor, decoder=decoder, config=config, name=name 3621 ) 3622 download_target_extractor = ( 3623 self._create_component_from_model( 3624 model=model.download_target_extractor, 3625 decoder=decoder, 3626 config=config, 3627 name=name, 3628 ) 3629 if model.download_target_extractor 3630 else None 3631 ) 3632 3633 job_repository: AsyncJobRepository = AsyncHttpJobRepository( 3634 creation_requester=creation_requester, 3635 polling_requester=polling_requester, 3636 download_retriever=download_retriever, 3637 download_target_requester=download_target_requester, 3638 abort_requester=abort_requester, 3639 delete_requester=delete_requester, 3640 status_extractor=status_extractor, 3641 status_mapping=self._create_async_job_status_mapping(model.status_mapping, config), 3642 download_target_extractor=download_target_extractor, 3643 job_timeout=_get_job_timeout(), 3644 ) 3645 3646 async_job_partition_router = AsyncJobPartitionRouter( 3647 job_orchestrator_factory=lambda stream_slices: AsyncJobOrchestrator( 3648 job_repository, 3649 stream_slices, 3650 self._job_tracker, 3651 self._message_repository, 3652 # FIXME work would need to be done here in order to detect if a stream as a parent stream that is bulk 3653 has_bulk_parent=False, 3654 # set the `job_max_retry` to 1 for the `Connector Builder`` use-case. 3655 # `None` == default retry is set to 3 attempts, under the hood. 3656 job_max_retry=1 if self._emit_connector_builder_messages else None, 3657 ), 3658 stream_slicer=stream_slicer, 3659 config=config, 3660 parameters=model.parameters or {}, 3661 ) 3662 3663 return AsyncRetriever( 3664 record_selector=record_selector, 3665 stream_slicer=async_job_partition_router, 3666 config=config, 3667 parameters=model.parameters or {}, 3668 ) 3669 3670 def create_spec(self, model: SpecModel, config: Config, **kwargs: Any) -> Spec: 3671 config_migrations = [ 3672 self._create_component_from_model(migration, config) 3673 for migration in ( 3674 model.config_normalization_rules.config_migrations 3675 if ( 3676 model.config_normalization_rules 3677 and model.config_normalization_rules.config_migrations 3678 ) 3679 else [] 3680 ) 3681 ] 3682 config_transformations = [ 3683 self._create_component_from_model(transformation, config) 3684 for transformation in ( 3685 model.config_normalization_rules.transformations 3686 if ( 3687 model.config_normalization_rules 3688 and model.config_normalization_rules.transformations 3689 ) 3690 else [] 3691 ) 3692 ] 3693 config_validations = [ 3694 self._create_component_from_model(validation, config) 3695 for validation in ( 3696 model.config_normalization_rules.validations 3697 if ( 3698 model.config_normalization_rules 3699 and model.config_normalization_rules.validations 3700 ) 3701 else [] 3702 ) 3703 ] 3704 3705 return Spec( 3706 connection_specification=model.connection_specification, 3707 documentation_url=model.documentation_url, 3708 advanced_auth=model.advanced_auth, 3709 parameters={}, 3710 config_migrations=config_migrations, 3711 config_transformations=config_transformations, 3712 config_validations=config_validations, 3713 ) 3714 3715 def create_substream_partition_router( 3716 self, 3717 model: SubstreamPartitionRouterModel, 3718 config: Config, 3719 *, 3720 stream_name: str, 3721 **kwargs: Any, 3722 ) -> SubstreamPartitionRouter: 3723 parent_stream_configs = [] 3724 if model.parent_stream_configs: 3725 parent_stream_configs.extend( 3726 [ 3727 self.create_parent_stream_config_with_substream_wrapper( 3728 model=parent_stream_config, config=config, stream_name=stream_name, **kwargs 3729 ) 3730 for parent_stream_config in model.parent_stream_configs 3731 ] 3732 ) 3733 3734 return SubstreamPartitionRouter( 3735 parent_stream_configs=parent_stream_configs, 3736 parameters=model.parameters or {}, 3737 config=config, 3738 ) 3739 3740 def create_parent_stream_config_with_substream_wrapper( 3741 self, model: ParentStreamConfigModel, config: Config, *, stream_name: str, **kwargs: Any 3742 ) -> Any: 3743 # getting the parent state 3744 child_state = self._connector_state_manager.get_stream_state(stream_name, None) 3745 3746 # This flag will be used exclusively for StateDelegatingStream when a parent stream is created 3747 has_parent_state = bool( 3748 self._connector_state_manager.get_stream_state(stream_name, None) 3749 if model.incremental_dependency 3750 else False 3751 ) 3752 connector_state_manager = self._instantiate_parent_stream_state_manager( 3753 child_state, config, model, has_parent_state 3754 ) 3755 3756 substream_factory = ModelToComponentFactory( 3757 connector_state_manager=connector_state_manager, 3758 limit_pages_fetched_per_slice=self._limit_pages_fetched_per_slice, 3759 limit_slices_fetched=self._limit_slices_fetched, 3760 emit_connector_builder_messages=self._emit_connector_builder_messages, 3761 disable_retries=self._disable_retries, 3762 disable_cache=self._disable_cache, 3763 message_repository=StateFilteringMessageRepository( 3764 LogAppenderMessageRepositoryDecorator( 3765 { 3766 "airbyte_cdk": {"stream": {"is_substream": True}}, 3767 "http": {"is_auxiliary": True}, 3768 }, 3769 self._message_repository, 3770 self._evaluate_log_level(self._emit_connector_builder_messages), 3771 ), 3772 ), 3773 ) 3774 3775 return substream_factory.create_parent_stream_config( 3776 model=model, config=config, stream_name=stream_name, **kwargs 3777 ) 3778 3779 def _instantiate_parent_stream_state_manager( 3780 self, 3781 child_state: MutableMapping[str, Any], 3782 config: Config, 3783 model: ParentStreamConfigModel, 3784 has_parent_state: bool, 3785 ) -> ConnectorStateManager: 3786 """ 3787 With DefaultStream, the state needs to be provided during __init__ of the cursor as opposed to the 3788 `set_initial_state` flow that existed for the declarative cursors. This state is taken from 3789 self._connector_state_manager.get_stream_state (`self` being a newly created ModelToComponentFactory to account 3790 for the MessageRepository being different). So we need to pass a ConnectorStateManager to the 3791 ModelToComponentFactory that has the parent states. This method populates this if there is a child state and if 3792 incremental_dependency is set. 3793 """ 3794 if model.incremental_dependency and child_state: 3795 parent_stream_name = model.stream.name or "" 3796 parent_state = ConcurrentPerPartitionCursor.get_parent_state( 3797 child_state, parent_stream_name 3798 ) 3799 3800 if not parent_state: 3801 # there are two migration cases: state value from child stream or from global state 3802 parent_state = ConcurrentPerPartitionCursor.get_global_state( 3803 child_state, parent_stream_name 3804 ) 3805 3806 if not parent_state and not isinstance(parent_state, dict): 3807 cursor_values = child_state.values() 3808 if cursor_values: 3809 incremental_sync_model: Union[ 3810 DatetimeBasedCursorModel, 3811 IncrementingCountCursorModel, 3812 ] = ( 3813 model.stream.incremental_sync # type: ignore # if we are there, it is because there is incremental_dependency and therefore there is an incremental_sync on the parent stream 3814 if isinstance(model.stream, DeclarativeStreamModel) 3815 else self._get_state_delegating_stream_model( 3816 has_parent_state, model.stream 3817 ).incremental_sync 3818 ) 3819 cursor_field = InterpolatedString.create( 3820 incremental_sync_model.cursor_field, 3821 parameters=incremental_sync_model.parameters or {}, 3822 ).eval(config) 3823 parent_state = AirbyteStateMessage( 3824 type=AirbyteStateType.STREAM, 3825 stream=AirbyteStreamState( 3826 stream_descriptor=StreamDescriptor( 3827 name=parent_stream_name, namespace=None 3828 ), 3829 stream_state=AirbyteStateBlob( 3830 {cursor_field: list(cursor_values)[0]} 3831 ), 3832 ), 3833 ) 3834 return ConnectorStateManager([parent_state] if parent_state else []) 3835 3836 return ConnectorStateManager([]) 3837 3838 @staticmethod 3839 def create_wait_time_from_header( 3840 model: WaitTimeFromHeaderModel, config: Config, **kwargs: Any 3841 ) -> WaitTimeFromHeaderBackoffStrategy: 3842 return WaitTimeFromHeaderBackoffStrategy( 3843 header=model.header, 3844 parameters=model.parameters or {}, 3845 config=config, 3846 regex=model.regex, 3847 max_waiting_time_in_seconds=model.max_waiting_time_in_seconds 3848 if model.max_waiting_time_in_seconds is not None 3849 else None, 3850 ) 3851 3852 @staticmethod 3853 def create_wait_until_time_from_header( 3854 model: WaitUntilTimeFromHeaderModel, config: Config, **kwargs: Any 3855 ) -> WaitUntilTimeFromHeaderBackoffStrategy: 3856 return WaitUntilTimeFromHeaderBackoffStrategy( 3857 header=model.header, 3858 parameters=model.parameters or {}, 3859 config=config, 3860 min_wait=model.min_wait, 3861 regex=model.regex, 3862 ) 3863 3864 def get_message_repository(self) -> MessageRepository: 3865 return self._message_repository 3866 3867 def _evaluate_log_level(self, emit_connector_builder_messages: bool) -> Level: 3868 return Level.DEBUG if emit_connector_builder_messages else Level.INFO 3869 3870 @staticmethod 3871 def create_components_mapping_definition( 3872 model: ComponentMappingDefinitionModel, config: Config, **kwargs: Any 3873 ) -> ComponentMappingDefinition: 3874 interpolated_value = InterpolatedString.create( 3875 model.value, parameters=model.parameters or {} 3876 ) 3877 field_path = [ 3878 InterpolatedString.create(path, parameters=model.parameters or {}) 3879 for path in model.field_path 3880 ] 3881 return ComponentMappingDefinition( 3882 field_path=field_path, # type: ignore[arg-type] # field_path can be str and InterpolatedString 3883 value=interpolated_value, 3884 value_type=ModelToComponentFactory._json_schema_type_name_to_type(model.value_type), 3885 create_or_update=model.create_or_update, 3886 condition=model.condition, 3887 parameters=model.parameters or {}, 3888 ) 3889 3890 def create_http_components_resolver( 3891 self, model: HttpComponentsResolverModel, config: Config, stream_name: Optional[str] = None 3892 ) -> Any: 3893 retriever = self._create_component_from_model( 3894 model=model.retriever, 3895 config=config, 3896 name=f"{stream_name if stream_name else '__http_components_resolver'}", 3897 primary_key=None, 3898 stream_slicer=self._build_stream_slicer_from_partition_router(model.retriever, config), 3899 transformations=[], 3900 ) 3901 3902 components_mapping = [] 3903 for component_mapping_definition_model in model.components_mapping: 3904 if component_mapping_definition_model.condition: 3905 raise ValueError("`condition` is only supported for `ConfigComponentsResolver`") 3906 components_mapping.append( 3907 self._create_component_from_model( 3908 model=component_mapping_definition_model, 3909 value_type=ModelToComponentFactory._json_schema_type_name_to_type( 3910 component_mapping_definition_model.value_type 3911 ), 3912 config=config, 3913 ) 3914 ) 3915 3916 return HttpComponentsResolver( 3917 retriever=retriever, 3918 stream_slicer=self._build_stream_slicer_from_partition_router(model.retriever, config), 3919 config=config, 3920 components_mapping=components_mapping, 3921 parameters=model.parameters or {}, 3922 ) 3923 3924 @staticmethod 3925 def create_stream_config( 3926 model: StreamConfigModel, config: Config, **kwargs: Any 3927 ) -> StreamConfig: 3928 model_configs_pointer: List[Union[InterpolatedString, str]] = ( 3929 [x for x in model.configs_pointer] if model.configs_pointer else [] 3930 ) 3931 3932 return StreamConfig( 3933 configs_pointer=model_configs_pointer, 3934 default_values=model.default_values, 3935 parameters=model.parameters or {}, 3936 ) 3937 3938 def create_config_components_resolver( 3939 self, 3940 model: ConfigComponentsResolverModel, 3941 config: Config, 3942 ) -> Any: 3943 model_stream_configs = ( 3944 model.stream_config if isinstance(model.stream_config, list) else [model.stream_config] 3945 ) 3946 3947 stream_configs = [ 3948 self._create_component_from_model( 3949 stream_config, config=config, parameters=model.parameters or {} 3950 ) 3951 for stream_config in model_stream_configs 3952 ] 3953 3954 components_mapping = [ 3955 self._create_component_from_model( 3956 model=components_mapping_definition_model, 3957 value_type=ModelToComponentFactory._json_schema_type_name_to_type( 3958 components_mapping_definition_model.value_type 3959 ), 3960 config=config, 3961 parameters=model.parameters, 3962 ) 3963 for components_mapping_definition_model in model.components_mapping 3964 ] 3965 3966 return ConfigComponentsResolver( 3967 stream_configs=stream_configs, 3968 config=config, 3969 components_mapping=components_mapping, 3970 parameters=model.parameters or {}, 3971 ) 3972 3973 def create_parametrized_components_resolver( 3974 self, 3975 model: ParametrizedComponentsResolverModel, 3976 config: Config, 3977 ) -> ParametrizedComponentsResolver: 3978 stream_parameters = StreamParametersDefinition( 3979 list_of_parameters_for_stream=model.stream_parameters.list_of_parameters_for_stream 3980 ) 3981 3982 components_mapping = [] 3983 for components_mapping_definition_model in model.components_mapping: 3984 if components_mapping_definition_model.condition: 3985 raise ValueError("`condition` is only supported for `ConfigComponentsResolver`") 3986 components_mapping.append( 3987 self._create_component_from_model( 3988 model=components_mapping_definition_model, 3989 value_type=ModelToComponentFactory._json_schema_type_name_to_type( 3990 components_mapping_definition_model.value_type 3991 ), 3992 config=config, 3993 ) 3994 ) 3995 return ParametrizedComponentsResolver( 3996 stream_parameters=stream_parameters, 3997 config=config, 3998 components_mapping=components_mapping, 3999 parameters=model.parameters or {}, 4000 ) 4001 4002 _UNSUPPORTED_DECODER_ERROR = ( 4003 "Specified decoder of {decoder_type} is not supported for pagination." 4004 "Please set as `JsonDecoder`, `XmlDecoder`, or a `CompositeRawDecoder` with an inner_parser of `JsonParser` or `GzipParser` instead." 4005 "If using `GzipParser`, please ensure that the lowest level inner_parser is a `JsonParser`." 4006 ) 4007 4008 def _is_supported_decoder_for_pagination(self, decoder: Decoder) -> bool: 4009 if isinstance(decoder, (JsonDecoder, XmlDecoder)): 4010 return True 4011 elif isinstance(decoder, CompositeRawDecoder): 4012 return self._is_supported_parser_for_pagination(decoder.parser) 4013 else: 4014 return False 4015 4016 def _is_supported_parser_for_pagination(self, parser: Parser) -> bool: 4017 if isinstance(parser, JsonParser): 4018 return True 4019 elif isinstance(parser, GzipParser): 4020 return isinstance(parser.inner_parser, JsonParser) 4021 else: 4022 return False 4023 4024 def create_http_api_budget( 4025 self, model: HTTPAPIBudgetModel, config: Config, **kwargs: Any 4026 ) -> HttpAPIBudget: 4027 policies = [ 4028 self._create_component_from_model(model=policy, config=config) 4029 for policy in model.policies 4030 ] 4031 4032 return HttpAPIBudget( 4033 policies=policies, 4034 ratelimit_reset_header=model.ratelimit_reset_header or "ratelimit-reset", 4035 ratelimit_remaining_header=model.ratelimit_remaining_header or "ratelimit-remaining", 4036 status_codes_for_ratelimit_hit=model.status_codes_for_ratelimit_hit or [429], 4037 ) 4038 4039 def create_fixed_window_call_rate_policy( 4040 self, model: FixedWindowCallRatePolicyModel, config: Config, **kwargs: Any 4041 ) -> FixedWindowCallRatePolicy: 4042 matchers = [ 4043 self._create_component_from_model(model=matcher, config=config) 4044 for matcher in model.matchers 4045 ] 4046 4047 # Set the initial reset timestamp to 10 days from now. 4048 # This value will be updated by the first request. 4049 return FixedWindowCallRatePolicy( 4050 next_reset_ts=datetime.datetime.now() + datetime.timedelta(days=10), 4051 period=parse_duration(model.period), 4052 call_limit=model.call_limit, 4053 matchers=matchers, 4054 ) 4055 4056 def create_file_uploader( 4057 self, model: FileUploaderModel, config: Config, **kwargs: Any 4058 ) -> FileUploader: 4059 name = "File Uploader" 4060 requester = self._create_component_from_model( 4061 model=model.requester, 4062 config=config, 4063 name=name, 4064 **kwargs, 4065 ) 4066 download_target_extractor = self._create_component_from_model( 4067 model=model.download_target_extractor, 4068 config=config, 4069 name=name, 4070 **kwargs, 4071 ) 4072 emit_connector_builder_messages = self._emit_connector_builder_messages 4073 file_uploader = DefaultFileUploader( 4074 requester=requester, 4075 download_target_extractor=download_target_extractor, 4076 config=config, 4077 file_writer=NoopFileWriter() 4078 if emit_connector_builder_messages 4079 else LocalFileSystemFileWriter(), 4080 parameters=model.parameters or {}, 4081 filename_extractor=model.filename_extractor if model.filename_extractor else None, 4082 ) 4083 4084 return ( 4085 ConnectorBuilderFileUploader(file_uploader) 4086 if emit_connector_builder_messages 4087 else file_uploader 4088 ) 4089 4090 def create_moving_window_call_rate_policy( 4091 self, model: MovingWindowCallRatePolicyModel, config: Config, **kwargs: Any 4092 ) -> MovingWindowCallRatePolicy: 4093 rates = [ 4094 self._create_component_from_model(model=rate, config=config) for rate in model.rates 4095 ] 4096 matchers = [ 4097 self._create_component_from_model(model=matcher, config=config) 4098 for matcher in model.matchers 4099 ] 4100 return MovingWindowCallRatePolicy( 4101 rates=rates, 4102 matchers=matchers, 4103 ) 4104 4105 def create_unlimited_call_rate_policy( 4106 self, model: UnlimitedCallRatePolicyModel, config: Config, **kwargs: Any 4107 ) -> UnlimitedCallRatePolicy: 4108 matchers = [ 4109 self._create_component_from_model(model=matcher, config=config) 4110 for matcher in model.matchers 4111 ] 4112 4113 return UnlimitedCallRatePolicy( 4114 matchers=matchers, 4115 ) 4116 4117 def create_rate(self, model: RateModel, config: Config, **kwargs: Any) -> Rate: 4118 interpolated_limit = InterpolatedString.create(str(model.limit), parameters={}) 4119 return Rate( 4120 limit=int(interpolated_limit.eval(config=config)), 4121 interval=parse_duration(model.interval), 4122 ) 4123 4124 def create_http_request_matcher( 4125 self, model: HttpRequestRegexMatcherModel, config: Config, **kwargs: Any 4126 ) -> HttpRequestRegexMatcher: 4127 return HttpRequestRegexMatcher( 4128 method=model.method, 4129 url_base=model.url_base, 4130 url_path_pattern=model.url_path_pattern, 4131 params=model.params, 4132 headers=model.headers, 4133 ) 4134 4135 def set_api_budget(self, component_definition: ComponentDefinition, config: Config) -> None: 4136 self._api_budget = self.create_component( 4137 model_type=HTTPAPIBudgetModel, component_definition=component_definition, config=config 4138 ) 4139 4140 def create_grouping_partition_router( 4141 self, 4142 model: GroupingPartitionRouterModel, 4143 config: Config, 4144 *, 4145 stream_name: str, 4146 **kwargs: Any, 4147 ) -> GroupingPartitionRouter: 4148 underlying_router = self._create_component_from_model( 4149 model=model.underlying_partition_router, 4150 config=config, 4151 stream_name=stream_name, 4152 **kwargs, 4153 ) 4154 if model.group_size < 1: 4155 raise ValueError(f"Group size must be greater than 0, got {model.group_size}") 4156 4157 # Request options in underlying partition routers are not supported for GroupingPartitionRouter 4158 # because they are specific to individual partitions and cannot be aggregated or handled 4159 # when grouping, potentially leading to incorrect API calls. Any request customization 4160 # should be managed at the stream level through the requester's configuration. 4161 if isinstance(underlying_router, SubstreamPartitionRouter): 4162 if any( 4163 parent_config.request_option 4164 for parent_config in underlying_router.parent_stream_configs 4165 ): 4166 raise ValueError("Request options are not supported for GroupingPartitionRouter.") 4167 4168 if isinstance(underlying_router, ListPartitionRouter): 4169 if underlying_router.request_option: 4170 raise ValueError("Request options are not supported for GroupingPartitionRouter.") 4171 4172 return GroupingPartitionRouter( 4173 group_size=model.group_size, 4174 underlying_partition_router=underlying_router, 4175 deduplicate=model.deduplicate if model.deduplicate is not None else True, 4176 config=config, 4177 )
649class ModelToComponentFactory: 650 EPOCH_DATETIME_FORMAT = "%s" 651 652 def __init__( 653 self, 654 limit_pages_fetched_per_slice: Optional[int] = None, 655 limit_slices_fetched: Optional[int] = None, 656 emit_connector_builder_messages: bool = False, 657 disable_retries: bool = False, 658 disable_cache: bool = False, 659 message_repository: Optional[MessageRepository] = None, 660 connector_state_manager: Optional[ConnectorStateManager] = None, 661 max_concurrent_async_job_count: Optional[int] = None, 662 ): 663 self._init_mappings() 664 self._limit_pages_fetched_per_slice = limit_pages_fetched_per_slice 665 self._limit_slices_fetched = limit_slices_fetched 666 self._emit_connector_builder_messages = emit_connector_builder_messages 667 self._disable_retries = disable_retries 668 self._disable_cache = disable_cache 669 self._message_repository = message_repository or InMemoryMessageRepository( 670 self._evaluate_log_level(emit_connector_builder_messages) 671 ) 672 self._connector_state_manager = connector_state_manager or ConnectorStateManager() 673 self._api_budget: Optional[Union[APIBudget, HttpAPIBudget]] = None 674 self._job_tracker: JobTracker = JobTracker(max_concurrent_async_job_count or 1) 675 # placeholder for deprecation warnings 676 self._collected_deprecation_logs: List[ConnectorBuilderLogMessage] = [] 677 678 def _init_mappings(self) -> None: 679 self.PYDANTIC_MODEL_TO_CONSTRUCTOR: Mapping[Type[BaseModel], Callable[..., Any]] = { 680 AddedFieldDefinitionModel: self.create_added_field_definition, 681 AddFieldsModel: self.create_add_fields, 682 ApiKeyAuthenticatorModel: self.create_api_key_authenticator, 683 BasicHttpAuthenticatorModel: self.create_basic_http_authenticator, 684 BearerAuthenticatorModel: self.create_bearer_authenticator, 685 CheckStreamModel: self.create_check_stream, 686 DynamicStreamCheckConfigModel: self.create_dynamic_stream_check_config, 687 CheckDynamicStreamModel: self.create_check_dynamic_stream, 688 CompositeErrorHandlerModel: self.create_composite_error_handler, 689 ConcurrencyLevelModel: self.create_concurrency_level, 690 ConfigMigrationModel: self.create_config_migration, 691 ConfigAddFieldsModel: self.create_config_add_fields, 692 ConfigRemapFieldModel: self.create_config_remap_field, 693 ConfigRemoveFieldsModel: self.create_config_remove_fields, 694 ConstantBackoffStrategyModel: self.create_constant_backoff_strategy, 695 CsvDecoderModel: self.create_csv_decoder, 696 CursorPaginationModel: self.create_cursor_pagination, 697 CustomAuthenticatorModel: self.create_custom_component, 698 CustomBackoffStrategyModel: self.create_custom_component, 699 CustomDecoderModel: self.create_custom_component, 700 CustomErrorHandlerModel: self.create_custom_component, 701 CustomRecordExtractorModel: self.create_custom_component, 702 CustomRecordFilterModel: self.create_custom_component, 703 CustomRequesterModel: self.create_custom_component, 704 CustomRetrieverModel: self.create_custom_component, 705 CustomSchemaLoader: self.create_custom_component, 706 CustomSchemaNormalizationModel: self.create_custom_component, 707 CustomStateMigration: self.create_custom_component, 708 CustomPaginationStrategyModel: self.create_custom_component, 709 CustomPartitionRouterModel: self.create_custom_component, 710 CustomTransformationModel: self.create_custom_component, 711 CustomValidationStrategyModel: self.create_custom_component, 712 CustomConfigTransformationModel: self.create_custom_component, 713 DatetimeBasedCursorModel: self.create_datetime_based_cursor, 714 DeclarativeStreamModel: self.create_default_stream, 715 DefaultErrorHandlerModel: self.create_default_error_handler, 716 DefaultPaginatorModel: self.create_default_paginator, 717 DpathExtractorModel: self.create_dpath_extractor, 718 DpathValidatorModel: self.create_dpath_validator, 719 ResponseToFileExtractorModel: self.create_response_to_file_extractor, 720 ExponentialBackoffStrategyModel: self.create_exponential_backoff_strategy, 721 SessionTokenAuthenticatorModel: self.create_session_token_authenticator, 722 GroupByKeyMergeStrategyModel: self.create_group_by_key, 723 HttpRequesterModel: self.create_http_requester, 724 HttpResponseFilterModel: self.create_http_response_filter, 725 InlineSchemaLoaderModel: self.create_inline_schema_loader, 726 JsonDecoderModel: self.create_json_decoder, 727 JsonlDecoderModel: self.create_jsonl_decoder, 728 GzipDecoderModel: self.create_gzip_decoder, 729 KeysToLowerModel: self.create_keys_to_lower_transformation, 730 KeysToSnakeCaseModel: self.create_keys_to_snake_transformation, 731 KeysReplaceModel: self.create_keys_replace_transformation, 732 FlattenFieldsModel: self.create_flatten_fields, 733 DpathFlattenFieldsModel: self.create_dpath_flatten_fields, 734 IterableDecoderModel: self.create_iterable_decoder, 735 IncrementingCountCursorModel: self.create_incrementing_count_cursor, 736 XmlDecoderModel: self.create_xml_decoder, 737 JsonFileSchemaLoaderModel: self.create_json_file_schema_loader, 738 DynamicSchemaLoaderModel: self.create_dynamic_schema_loader, 739 SchemaTypeIdentifierModel: self.create_schema_type_identifier, 740 TypesMapModel: self.create_types_map, 741 ComplexFieldTypeModel: self.create_complex_field_type, 742 JwtAuthenticatorModel: self.create_jwt_authenticator, 743 LegacyToPerPartitionStateMigrationModel: self.create_legacy_to_per_partition_state_migration, 744 ListPartitionRouterModel: self.create_list_partition_router, 745 MinMaxDatetimeModel: self.create_min_max_datetime, 746 NoAuthModel: self.create_no_auth, 747 NoPaginationModel: self.create_no_pagination, 748 OAuthAuthenticatorModel: self.create_oauth_authenticator, 749 OffsetIncrementModel: self.create_offset_increment, 750 PageIncrementModel: self.create_page_increment, 751 ParentStreamConfigModel: self.create_parent_stream_config_with_substream_wrapper, 752 PredicateValidatorModel: self.create_predicate_validator, 753 PropertiesFromEndpointModel: self.create_properties_from_endpoint, 754 PropertyChunkingModel: self.create_property_chunking, 755 QueryPropertiesModel: self.create_query_properties, 756 RecordFilterModel: self.create_record_filter, 757 RecordSelectorModel: self.create_record_selector, 758 RemoveFieldsModel: self.create_remove_fields, 759 RequestPathModel: self.create_request_path, 760 RequestOptionModel: self.create_request_option, 761 LegacySessionTokenAuthenticatorModel: self.create_legacy_session_token_authenticator, 762 SelectiveAuthenticatorModel: self.create_selective_authenticator, 763 SimpleRetrieverModel: self.create_simple_retriever, 764 StateDelegatingStreamModel: self.create_state_delegating_stream, 765 SpecModel: self.create_spec, 766 SubstreamPartitionRouterModel: self.create_substream_partition_router, 767 ValidateAdheresToSchemaModel: self.create_validate_adheres_to_schema, 768 WaitTimeFromHeaderModel: self.create_wait_time_from_header, 769 WaitUntilTimeFromHeaderModel: self.create_wait_until_time_from_header, 770 AsyncRetrieverModel: self.create_async_retriever, 771 HttpComponentsResolverModel: self.create_http_components_resolver, 772 ConfigComponentsResolverModel: self.create_config_components_resolver, 773 ParametrizedComponentsResolverModel: self.create_parametrized_components_resolver, 774 StreamConfigModel: self.create_stream_config, 775 ComponentMappingDefinitionModel: self.create_components_mapping_definition, 776 ZipfileDecoderModel: self.create_zipfile_decoder, 777 HTTPAPIBudgetModel: self.create_http_api_budget, 778 FileUploaderModel: self.create_file_uploader, 779 FixedWindowCallRatePolicyModel: self.create_fixed_window_call_rate_policy, 780 MovingWindowCallRatePolicyModel: self.create_moving_window_call_rate_policy, 781 UnlimitedCallRatePolicyModel: self.create_unlimited_call_rate_policy, 782 RateModel: self.create_rate, 783 HttpRequestRegexMatcherModel: self.create_http_request_matcher, 784 GroupingPartitionRouterModel: self.create_grouping_partition_router, 785 } 786 787 # Needed for the case where we need to perform a second parse on the fields of a custom component 788 self.TYPE_NAME_TO_MODEL = {cls.__name__: cls for cls in self.PYDANTIC_MODEL_TO_CONSTRUCTOR} 789 790 def create_component( 791 self, 792 model_type: Type[BaseModel], 793 component_definition: ComponentDefinition, 794 config: Config, 795 **kwargs: Any, 796 ) -> Any: 797 """ 798 Takes a given Pydantic model type and Mapping representing a component definition and creates a declarative component and 799 subcomponents which will be used at runtime. This is done by first parsing the mapping into a Pydantic model and then creating 800 creating declarative components from that model. 801 802 :param model_type: The type of declarative component that is being initialized 803 :param component_definition: The mapping that represents a declarative component 804 :param config: The connector config that is provided by the customer 805 :return: The declarative component to be used at runtime 806 """ 807 808 component_type = component_definition.get("type") 809 if component_definition.get("type") != model_type.__name__: 810 raise ValueError( 811 f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" 812 ) 813 814 declarative_component_model = model_type.parse_obj(component_definition) 815 816 if not isinstance(declarative_component_model, model_type): 817 raise ValueError( 818 f"Expected {model_type.__name__} component, but received {declarative_component_model.__class__.__name__}" 819 ) 820 821 return self._create_component_from_model( 822 model=declarative_component_model, config=config, **kwargs 823 ) 824 825 def _create_component_from_model(self, model: BaseModel, config: Config, **kwargs: Any) -> Any: 826 if model.__class__ not in self.PYDANTIC_MODEL_TO_CONSTRUCTOR: 827 raise ValueError( 828 f"{model.__class__} with attributes {model} is not a valid component type" 829 ) 830 component_constructor = self.PYDANTIC_MODEL_TO_CONSTRUCTOR.get(model.__class__) 831 if not component_constructor: 832 raise ValueError(f"Could not find constructor for {model.__class__}") 833 834 # collect deprecation warnings for supported models. 835 if isinstance(model, BaseModelWithDeprecations): 836 self._collect_model_deprecations(model) 837 838 return component_constructor(model=model, config=config, **kwargs) 839 840 def get_model_deprecations(self) -> List[ConnectorBuilderLogMessage]: 841 """ 842 Returns the deprecation warnings that were collected during the creation of components. 843 """ 844 return self._collected_deprecation_logs 845 846 def _collect_model_deprecations(self, model: BaseModelWithDeprecations) -> None: 847 """ 848 Collects deprecation logs from the given model and appends any new logs to the internal collection. 849 850 This method checks if the provided model has deprecation logs (identified by the presence of the DEPRECATION_LOGS_TAG attribute and a non-None `_deprecation_logs` property). It iterates through each deprecation log in the model and appends it to the `_collected_deprecation_logs` list if it has not already been collected, ensuring that duplicate logs are avoided. 851 852 Args: 853 model (BaseModelWithDeprecations): The model instance from which to collect deprecation logs. 854 """ 855 if hasattr(model, DEPRECATION_LOGS_TAG) and model._deprecation_logs is not None: 856 for log in model._deprecation_logs: 857 # avoid duplicates for deprecation logs observed. 858 if log not in self._collected_deprecation_logs: 859 self._collected_deprecation_logs.append(log) 860 861 def create_config_migration( 862 self, model: ConfigMigrationModel, config: Config 863 ) -> ConfigMigration: 864 transformations: List[ConfigTransformation] = [ 865 self._create_component_from_model(transformation, config) 866 for transformation in model.transformations 867 ] 868 869 return ConfigMigration( 870 description=model.description, 871 transformations=transformations, 872 ) 873 874 def create_config_add_fields( 875 self, model: ConfigAddFieldsModel, config: Config, **kwargs: Any 876 ) -> ConfigAddFields: 877 fields = [self._create_component_from_model(field, config) for field in model.fields] 878 return ConfigAddFields( 879 fields=fields, 880 condition=model.condition or "", 881 ) 882 883 @staticmethod 884 def create_config_remove_fields( 885 model: ConfigRemoveFieldsModel, config: Config, **kwargs: Any 886 ) -> ConfigRemoveFields: 887 return ConfigRemoveFields( 888 field_pointers=model.field_pointers, 889 condition=model.condition or "", 890 ) 891 892 @staticmethod 893 def create_config_remap_field( 894 model: ConfigRemapFieldModel, config: Config, **kwargs: Any 895 ) -> ConfigRemapField: 896 mapping = cast(Mapping[str, Any], model.map) 897 return ConfigRemapField( 898 map=mapping, 899 field_path=model.field_path, 900 config=config, 901 ) 902 903 def create_dpath_validator(self, model: DpathValidatorModel, config: Config) -> DpathValidator: 904 strategy = self._create_component_from_model(model.validation_strategy, config) 905 906 return DpathValidator( 907 field_path=model.field_path, 908 strategy=strategy, 909 ) 910 911 def create_predicate_validator( 912 self, model: PredicateValidatorModel, config: Config 913 ) -> PredicateValidator: 914 strategy = self._create_component_from_model(model.validation_strategy, config) 915 916 return PredicateValidator( 917 value=model.value, 918 strategy=strategy, 919 ) 920 921 @staticmethod 922 def create_validate_adheres_to_schema( 923 model: ValidateAdheresToSchemaModel, config: Config, **kwargs: Any 924 ) -> ValidateAdheresToSchema: 925 base_schema = cast(Mapping[str, Any], model.base_schema) 926 return ValidateAdheresToSchema( 927 schema=base_schema, 928 ) 929 930 @staticmethod 931 def create_added_field_definition( 932 model: AddedFieldDefinitionModel, config: Config, **kwargs: Any 933 ) -> AddedFieldDefinition: 934 interpolated_value = InterpolatedString.create( 935 model.value, parameters=model.parameters or {} 936 ) 937 return AddedFieldDefinition( 938 path=model.path, 939 value=interpolated_value, 940 value_type=ModelToComponentFactory._json_schema_type_name_to_type(model.value_type), 941 parameters=model.parameters or {}, 942 ) 943 944 def create_add_fields(self, model: AddFieldsModel, config: Config, **kwargs: Any) -> AddFields: 945 added_field_definitions = [ 946 self._create_component_from_model( 947 model=added_field_definition_model, 948 value_type=ModelToComponentFactory._json_schema_type_name_to_type( 949 added_field_definition_model.value_type 950 ), 951 config=config, 952 ) 953 for added_field_definition_model in model.fields 954 ] 955 return AddFields( 956 fields=added_field_definitions, 957 condition=model.condition or "", 958 parameters=model.parameters or {}, 959 ) 960 961 def create_keys_to_lower_transformation( 962 self, model: KeysToLowerModel, config: Config, **kwargs: Any 963 ) -> KeysToLowerTransformation: 964 return KeysToLowerTransformation() 965 966 def create_keys_to_snake_transformation( 967 self, model: KeysToSnakeCaseModel, config: Config, **kwargs: Any 968 ) -> KeysToSnakeCaseTransformation: 969 return KeysToSnakeCaseTransformation() 970 971 def create_keys_replace_transformation( 972 self, model: KeysReplaceModel, config: Config, **kwargs: Any 973 ) -> KeysReplaceTransformation: 974 return KeysReplaceTransformation( 975 old=model.old, new=model.new, parameters=model.parameters or {} 976 ) 977 978 def create_flatten_fields( 979 self, model: FlattenFieldsModel, config: Config, **kwargs: Any 980 ) -> FlattenFields: 981 return FlattenFields( 982 flatten_lists=model.flatten_lists if model.flatten_lists is not None else True 983 ) 984 985 def create_dpath_flatten_fields( 986 self, model: DpathFlattenFieldsModel, config: Config, **kwargs: Any 987 ) -> DpathFlattenFields: 988 model_field_path: List[Union[InterpolatedString, str]] = [x for x in model.field_path] 989 key_transformation = ( 990 KeyTransformation( 991 config=config, 992 prefix=model.key_transformation.prefix, 993 suffix=model.key_transformation.suffix, 994 parameters=model.parameters or {}, 995 ) 996 if model.key_transformation is not None 997 else None 998 ) 999 return DpathFlattenFields( 1000 config=config, 1001 field_path=model_field_path, 1002 delete_origin_value=model.delete_origin_value 1003 if model.delete_origin_value is not None 1004 else False, 1005 replace_record=model.replace_record if model.replace_record is not None else False, 1006 key_transformation=key_transformation, 1007 parameters=model.parameters or {}, 1008 ) 1009 1010 @staticmethod 1011 def _json_schema_type_name_to_type(value_type: Optional[ValueType]) -> Optional[Type[Any]]: 1012 if not value_type: 1013 return None 1014 names_to_types = { 1015 ValueType.string: str, 1016 ValueType.number: float, 1017 ValueType.integer: int, 1018 ValueType.boolean: bool, 1019 } 1020 return names_to_types[value_type] 1021 1022 def create_api_key_authenticator( 1023 self, 1024 model: ApiKeyAuthenticatorModel, 1025 config: Config, 1026 token_provider: Optional[TokenProvider] = None, 1027 **kwargs: Any, 1028 ) -> ApiKeyAuthenticator: 1029 if model.inject_into is None and model.header is None: 1030 raise ValueError( 1031 "Expected either inject_into or header to be set for ApiKeyAuthenticator" 1032 ) 1033 1034 if model.inject_into is not None and model.header is not None: 1035 raise ValueError( 1036 "inject_into and header cannot be set both for ApiKeyAuthenticator - remove the deprecated header option" 1037 ) 1038 1039 if token_provider is not None and model.api_token != "": 1040 raise ValueError( 1041 "If token_provider is set, api_token is ignored and has to be set to empty string." 1042 ) 1043 1044 request_option = ( 1045 self._create_component_from_model( 1046 model.inject_into, config, parameters=model.parameters or {} 1047 ) 1048 if model.inject_into 1049 else RequestOption( 1050 inject_into=RequestOptionType.header, 1051 field_name=model.header or "", 1052 parameters=model.parameters or {}, 1053 ) 1054 ) 1055 1056 return ApiKeyAuthenticator( 1057 token_provider=( 1058 token_provider 1059 if token_provider is not None 1060 else InterpolatedStringTokenProvider( 1061 api_token=model.api_token or "", 1062 config=config, 1063 parameters=model.parameters or {}, 1064 ) 1065 ), 1066 request_option=request_option, 1067 config=config, 1068 parameters=model.parameters or {}, 1069 ) 1070 1071 def create_legacy_to_per_partition_state_migration( 1072 self, 1073 model: LegacyToPerPartitionStateMigrationModel, 1074 config: Mapping[str, Any], 1075 declarative_stream: DeclarativeStreamModel, 1076 ) -> LegacyToPerPartitionStateMigration: 1077 retriever = declarative_stream.retriever 1078 if not isinstance(retriever, (SimpleRetrieverModel, AsyncRetrieverModel)): 1079 raise ValueError( 1080 f"LegacyToPerPartitionStateMigrations can only be applied on a DeclarativeStream with a SimpleRetriever or AsyncRetriever. Got {type(retriever)}" 1081 ) 1082 partition_router = retriever.partition_router 1083 if not isinstance( 1084 partition_router, (SubstreamPartitionRouterModel, CustomPartitionRouterModel) 1085 ): 1086 raise ValueError( 1087 f"LegacyToPerPartitionStateMigrations can only be applied on a SimpleRetriever with a Substream partition router. Got {type(partition_router)}" 1088 ) 1089 if not hasattr(partition_router, "parent_stream_configs"): 1090 raise ValueError( 1091 "LegacyToPerPartitionStateMigrations can only be applied with a parent stream configuration." 1092 ) 1093 1094 if not hasattr(declarative_stream, "incremental_sync"): 1095 raise ValueError( 1096 "LegacyToPerPartitionStateMigrations can only be applied with an incremental_sync configuration." 1097 ) 1098 1099 return LegacyToPerPartitionStateMigration( 1100 partition_router, # type: ignore # was already checked above 1101 declarative_stream.incremental_sync, # type: ignore # was already checked. Migration can be applied only to incremental streams. 1102 config, 1103 declarative_stream.parameters, # type: ignore # different type is expected here Mapping[str, Any], got Dict[str, Any] 1104 ) 1105 1106 def create_session_token_authenticator( 1107 self, model: SessionTokenAuthenticatorModel, config: Config, name: str, **kwargs: Any 1108 ) -> Union[ApiKeyAuthenticator, BearerAuthenticator]: 1109 decoder = ( 1110 self._create_component_from_model(model=model.decoder, config=config) 1111 if model.decoder 1112 else JsonDecoder(parameters={}) 1113 ) 1114 login_requester = self._create_component_from_model( 1115 model=model.login_requester, 1116 config=config, 1117 name=f"{name}_login_requester", 1118 decoder=decoder, 1119 ) 1120 token_provider = SessionTokenProvider( 1121 login_requester=login_requester, 1122 session_token_path=model.session_token_path, 1123 expiration_duration=parse_duration(model.expiration_duration) 1124 if model.expiration_duration 1125 else None, 1126 parameters=model.parameters or {}, 1127 message_repository=self._message_repository, 1128 decoder=decoder, 1129 ) 1130 if model.request_authentication.type == "Bearer": 1131 return ModelToComponentFactory.create_bearer_authenticator( 1132 BearerAuthenticatorModel(type="BearerAuthenticator", api_token=""), # type: ignore # $parameters has a default value 1133 config, 1134 token_provider=token_provider, 1135 ) 1136 else: 1137 return self.create_api_key_authenticator( 1138 ApiKeyAuthenticatorModel( 1139 type="ApiKeyAuthenticator", 1140 api_token="", 1141 inject_into=model.request_authentication.inject_into, 1142 ), # type: ignore # $parameters and headers default to None 1143 config=config, 1144 token_provider=token_provider, 1145 ) 1146 1147 @staticmethod 1148 def create_basic_http_authenticator( 1149 model: BasicHttpAuthenticatorModel, config: Config, **kwargs: Any 1150 ) -> BasicHttpAuthenticator: 1151 return BasicHttpAuthenticator( 1152 password=model.password or "", 1153 username=model.username, 1154 config=config, 1155 parameters=model.parameters or {}, 1156 ) 1157 1158 @staticmethod 1159 def create_bearer_authenticator( 1160 model: BearerAuthenticatorModel, 1161 config: Config, 1162 token_provider: Optional[TokenProvider] = None, 1163 **kwargs: Any, 1164 ) -> BearerAuthenticator: 1165 if token_provider is not None and model.api_token != "": 1166 raise ValueError( 1167 "If token_provider is set, api_token is ignored and has to be set to empty string." 1168 ) 1169 return BearerAuthenticator( 1170 token_provider=( 1171 token_provider 1172 if token_provider is not None 1173 else InterpolatedStringTokenProvider( 1174 api_token=model.api_token or "", 1175 config=config, 1176 parameters=model.parameters or {}, 1177 ) 1178 ), 1179 config=config, 1180 parameters=model.parameters or {}, 1181 ) 1182 1183 @staticmethod 1184 def create_dynamic_stream_check_config( 1185 model: DynamicStreamCheckConfigModel, config: Config, **kwargs: Any 1186 ) -> DynamicStreamCheckConfig: 1187 return DynamicStreamCheckConfig( 1188 dynamic_stream_name=model.dynamic_stream_name, 1189 stream_count=model.stream_count or 0, 1190 ) 1191 1192 def create_check_stream( 1193 self, model: CheckStreamModel, config: Config, **kwargs: Any 1194 ) -> CheckStream: 1195 if model.dynamic_streams_check_configs is None and model.stream_names is None: 1196 raise ValueError( 1197 "Expected either stream_names or dynamic_streams_check_configs to be set for CheckStream" 1198 ) 1199 1200 dynamic_streams_check_configs = ( 1201 [ 1202 self._create_component_from_model(model=dynamic_stream_check_config, config=config) 1203 for dynamic_stream_check_config in model.dynamic_streams_check_configs 1204 ] 1205 if model.dynamic_streams_check_configs 1206 else [] 1207 ) 1208 1209 return CheckStream( 1210 stream_names=model.stream_names or [], 1211 dynamic_streams_check_configs=dynamic_streams_check_configs, 1212 parameters={}, 1213 ) 1214 1215 @staticmethod 1216 def create_check_dynamic_stream( 1217 model: CheckDynamicStreamModel, config: Config, **kwargs: Any 1218 ) -> CheckDynamicStream: 1219 assert model.use_check_availability is not None # for mypy 1220 1221 use_check_availability = model.use_check_availability 1222 1223 return CheckDynamicStream( 1224 stream_count=model.stream_count, 1225 use_check_availability=use_check_availability, 1226 parameters={}, 1227 ) 1228 1229 def create_composite_error_handler( 1230 self, model: CompositeErrorHandlerModel, config: Config, **kwargs: Any 1231 ) -> CompositeErrorHandler: 1232 error_handlers = [ 1233 self._create_component_from_model(model=error_handler_model, config=config) 1234 for error_handler_model in model.error_handlers 1235 ] 1236 return CompositeErrorHandler( 1237 error_handlers=error_handlers, parameters=model.parameters or {} 1238 ) 1239 1240 @staticmethod 1241 def create_concurrency_level( 1242 model: ConcurrencyLevelModel, config: Config, **kwargs: Any 1243 ) -> ConcurrencyLevel: 1244 return ConcurrencyLevel( 1245 default_concurrency=model.default_concurrency, 1246 max_concurrency=model.max_concurrency, 1247 config=config, 1248 parameters={}, 1249 ) 1250 1251 @staticmethod 1252 def apply_stream_state_migrations( 1253 stream_state_migrations: List[Any] | None, stream_state: MutableMapping[str, Any] 1254 ) -> MutableMapping[str, Any]: 1255 if stream_state_migrations: 1256 for state_migration in stream_state_migrations: 1257 if state_migration.should_migrate(stream_state): 1258 # The state variable is expected to be mutable but the migrate method returns an immutable mapping. 1259 stream_state = dict(state_migration.migrate(stream_state)) 1260 return stream_state 1261 1262 def create_concurrent_cursor_from_datetime_based_cursor( 1263 self, 1264 model_type: Type[BaseModel], 1265 component_definition: ComponentDefinition, 1266 stream_name: str, 1267 stream_namespace: Optional[str], 1268 stream_state: MutableMapping[str, Any], 1269 config: Config, 1270 message_repository: Optional[MessageRepository] = None, 1271 runtime_lookback_window: Optional[datetime.timedelta] = None, 1272 **kwargs: Any, 1273 ) -> ConcurrentCursor: 1274 component_type = component_definition.get("type") 1275 if component_definition.get("type") != model_type.__name__: 1276 raise ValueError( 1277 f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" 1278 ) 1279 1280 # FIXME the interfaces of the concurrent cursor are kind of annoying as they take a `ComponentDefinition` instead of the actual model. This was done because the ConcurrentDeclarativeSource didn't have access to the models [here for example](https://github.com/airbytehq/airbyte-python-cdk/blob/f525803b3fec9329e4cc8478996a92bf884bfde9/airbyte_cdk/sources/declarative/concurrent_declarative_source.py#L354C54-L354C91). So now we have two cases: 1281 # * The ComponentDefinition comes from model.__dict__ in which case we have `parameters` 1282 # * The ComponentDefinition comes from the manifest as a dict in which case we have `$parameters` 1283 # We should change those interfaces to use the model once we clean up the code in CDS at which point the parameter propagation should happen as part of the ModelToComponentFactory. 1284 if "$parameters" not in component_definition and "parameters" in component_definition: 1285 component_definition["$parameters"] = component_definition.get("parameters") # type: ignore # This is a dict 1286 datetime_based_cursor_model = model_type.parse_obj(component_definition) 1287 1288 if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel): 1289 raise ValueError( 1290 f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}" 1291 ) 1292 1293 model_parameters = datetime_based_cursor_model.parameters or {} 1294 interpolated_cursor_field = InterpolatedString.create( 1295 datetime_based_cursor_model.cursor_field, 1296 parameters=model_parameters, 1297 ) 1298 cursor_field = CursorField(interpolated_cursor_field.eval(config=config)) 1299 1300 interpolated_partition_field_start = InterpolatedString.create( 1301 datetime_based_cursor_model.partition_field_start or "start_time", 1302 parameters=model_parameters, 1303 ) 1304 interpolated_partition_field_end = InterpolatedString.create( 1305 datetime_based_cursor_model.partition_field_end or "end_time", 1306 parameters=model_parameters, 1307 ) 1308 1309 slice_boundary_fields = ( 1310 interpolated_partition_field_start.eval(config=config), 1311 interpolated_partition_field_end.eval(config=config), 1312 ) 1313 1314 datetime_format = datetime_based_cursor_model.datetime_format 1315 1316 cursor_granularity = ( 1317 parse_duration(datetime_based_cursor_model.cursor_granularity) 1318 if datetime_based_cursor_model.cursor_granularity 1319 else None 1320 ) 1321 1322 lookback_window = None 1323 interpolated_lookback_window = ( 1324 InterpolatedString.create( 1325 datetime_based_cursor_model.lookback_window, 1326 parameters=model_parameters, 1327 ) 1328 if datetime_based_cursor_model.lookback_window 1329 else None 1330 ) 1331 if interpolated_lookback_window: 1332 evaluated_lookback_window = interpolated_lookback_window.eval(config=config) 1333 if evaluated_lookback_window: 1334 lookback_window = parse_duration(evaluated_lookback_window) 1335 1336 connector_state_converter: DateTimeStreamStateConverter 1337 connector_state_converter = CustomFormatConcurrentStreamStateConverter( 1338 datetime_format=datetime_format, 1339 input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats, 1340 is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state 1341 cursor_granularity=cursor_granularity, 1342 ) 1343 1344 # Adjusts the stream state by applying the runtime lookback window. 1345 # This is used to ensure correct state handling in case of failed partitions. 1346 stream_state_value = stream_state.get(cursor_field.cursor_field_key) 1347 if runtime_lookback_window and stream_state_value: 1348 new_stream_state = ( 1349 connector_state_converter.parse_timestamp(stream_state_value) 1350 - runtime_lookback_window 1351 ) 1352 stream_state[cursor_field.cursor_field_key] = connector_state_converter.output_format( 1353 new_stream_state 1354 ) 1355 1356 start_date_runtime_value: Union[InterpolatedString, str, MinMaxDatetime] 1357 if isinstance(datetime_based_cursor_model.start_datetime, MinMaxDatetimeModel): 1358 start_date_runtime_value = self.create_min_max_datetime( 1359 model=datetime_based_cursor_model.start_datetime, config=config 1360 ) 1361 else: 1362 start_date_runtime_value = datetime_based_cursor_model.start_datetime 1363 1364 end_date_runtime_value: Optional[Union[InterpolatedString, str, MinMaxDatetime]] 1365 if isinstance(datetime_based_cursor_model.end_datetime, MinMaxDatetimeModel): 1366 end_date_runtime_value = self.create_min_max_datetime( 1367 model=datetime_based_cursor_model.end_datetime, config=config 1368 ) 1369 else: 1370 end_date_runtime_value = datetime_based_cursor_model.end_datetime 1371 1372 interpolated_start_date = MinMaxDatetime.create( 1373 interpolated_string_or_min_max_datetime=start_date_runtime_value, 1374 parameters=datetime_based_cursor_model.parameters, 1375 ) 1376 interpolated_end_date = ( 1377 None 1378 if not end_date_runtime_value 1379 else MinMaxDatetime.create( 1380 end_date_runtime_value, datetime_based_cursor_model.parameters 1381 ) 1382 ) 1383 1384 # If datetime format is not specified then start/end datetime should inherit it from the stream slicer 1385 if not interpolated_start_date.datetime_format: 1386 interpolated_start_date.datetime_format = datetime_format 1387 if interpolated_end_date and not interpolated_end_date.datetime_format: 1388 interpolated_end_date.datetime_format = datetime_format 1389 1390 start_date = interpolated_start_date.get_datetime(config=config) 1391 end_date_provider = ( 1392 partial(interpolated_end_date.get_datetime, config) 1393 if interpolated_end_date 1394 else connector_state_converter.get_end_provider() 1395 ) 1396 1397 if ( 1398 datetime_based_cursor_model.step and not datetime_based_cursor_model.cursor_granularity 1399 ) or ( 1400 not datetime_based_cursor_model.step and datetime_based_cursor_model.cursor_granularity 1401 ): 1402 raise ValueError( 1403 f"If step is defined, cursor_granularity should be as well and vice-versa. " 1404 f"Right now, step is `{datetime_based_cursor_model.step}` and cursor_granularity is `{datetime_based_cursor_model.cursor_granularity}`" 1405 ) 1406 1407 # When step is not defined, default to a step size from the starting date to the present moment 1408 step_length = datetime.timedelta.max 1409 interpolated_step = ( 1410 InterpolatedString.create( 1411 datetime_based_cursor_model.step, 1412 parameters=model_parameters, 1413 ) 1414 if datetime_based_cursor_model.step 1415 else None 1416 ) 1417 if interpolated_step: 1418 evaluated_step = interpolated_step.eval(config) 1419 if evaluated_step: 1420 step_length = parse_duration(evaluated_step) 1421 1422 clamping_strategy: ClampingStrategy = NoClamping() 1423 if datetime_based_cursor_model.clamping: 1424 # While it is undesirable to interpolate within the model factory (as opposed to at runtime), 1425 # it is still better than shifting interpolation low-code concept into the ConcurrentCursor runtime 1426 # object which we want to keep agnostic of being low-code 1427 target = InterpolatedString( 1428 string=datetime_based_cursor_model.clamping.target, 1429 parameters=model_parameters, 1430 ) 1431 evaluated_target = target.eval(config=config) 1432 match evaluated_target: 1433 case "DAY": 1434 clamping_strategy = DayClampingStrategy() 1435 end_date_provider = ClampingEndProvider( 1436 DayClampingStrategy(is_ceiling=False), 1437 end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1438 granularity=cursor_granularity or datetime.timedelta(seconds=1), 1439 ) 1440 case "WEEK": 1441 if ( 1442 not datetime_based_cursor_model.clamping.target_details 1443 or "weekday" not in datetime_based_cursor_model.clamping.target_details 1444 ): 1445 raise ValueError( 1446 "Given WEEK clamping, weekday needs to be provided as target_details" 1447 ) 1448 weekday = self._assemble_weekday( 1449 datetime_based_cursor_model.clamping.target_details["weekday"] 1450 ) 1451 clamping_strategy = WeekClampingStrategy(weekday) 1452 end_date_provider = ClampingEndProvider( 1453 WeekClampingStrategy(weekday, is_ceiling=False), 1454 end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1455 granularity=cursor_granularity or datetime.timedelta(days=1), 1456 ) 1457 case "MONTH": 1458 clamping_strategy = MonthClampingStrategy() 1459 end_date_provider = ClampingEndProvider( 1460 MonthClampingStrategy(is_ceiling=False), 1461 end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1462 granularity=cursor_granularity or datetime.timedelta(days=1), 1463 ) 1464 case _: 1465 raise ValueError( 1466 f"Invalid clamping target {evaluated_target}, expected DAY, WEEK, MONTH" 1467 ) 1468 1469 return ConcurrentCursor( 1470 stream_name=stream_name, 1471 stream_namespace=stream_namespace, 1472 stream_state=stream_state, 1473 message_repository=message_repository or self._message_repository, 1474 connector_state_manager=self._connector_state_manager, 1475 connector_state_converter=connector_state_converter, 1476 cursor_field=cursor_field, 1477 slice_boundary_fields=slice_boundary_fields, 1478 start=start_date, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1479 end_provider=end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1480 lookback_window=lookback_window, 1481 slice_range=step_length, 1482 cursor_granularity=cursor_granularity, 1483 clamping_strategy=clamping_strategy, 1484 ) 1485 1486 def create_concurrent_cursor_from_incrementing_count_cursor( 1487 self, 1488 model_type: Type[BaseModel], 1489 component_definition: ComponentDefinition, 1490 stream_name: str, 1491 stream_namespace: Optional[str], 1492 stream_state: MutableMapping[str, Any], 1493 config: Config, 1494 message_repository: Optional[MessageRepository] = None, 1495 **kwargs: Any, 1496 ) -> ConcurrentCursor: 1497 component_type = component_definition.get("type") 1498 if component_definition.get("type") != model_type.__name__: 1499 raise ValueError( 1500 f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" 1501 ) 1502 1503 incrementing_count_cursor_model = model_type.parse_obj(component_definition) 1504 1505 if not isinstance(incrementing_count_cursor_model, IncrementingCountCursorModel): 1506 raise ValueError( 1507 f"Expected {model_type.__name__} component, but received {incrementing_count_cursor_model.__class__.__name__}" 1508 ) 1509 1510 interpolated_start_value = ( 1511 InterpolatedString.create( 1512 incrementing_count_cursor_model.start_value, # type: ignore 1513 parameters=incrementing_count_cursor_model.parameters or {}, 1514 ) 1515 if incrementing_count_cursor_model.start_value 1516 else 0 1517 ) 1518 1519 interpolated_cursor_field = InterpolatedString.create( 1520 incrementing_count_cursor_model.cursor_field, 1521 parameters=incrementing_count_cursor_model.parameters or {}, 1522 ) 1523 cursor_field = CursorField(interpolated_cursor_field.eval(config=config)) 1524 1525 connector_state_converter = IncrementingCountStreamStateConverter( 1526 is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state 1527 ) 1528 1529 return ConcurrentCursor( 1530 stream_name=stream_name, 1531 stream_namespace=stream_namespace, 1532 stream_state=stream_state, 1533 message_repository=message_repository or self._message_repository, 1534 connector_state_manager=self._connector_state_manager, 1535 connector_state_converter=connector_state_converter, 1536 cursor_field=cursor_field, 1537 slice_boundary_fields=None, 1538 start=interpolated_start_value, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1539 end_provider=connector_state_converter.get_end_provider(), # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1540 ) 1541 1542 def _assemble_weekday(self, weekday: str) -> Weekday: 1543 match weekday: 1544 case "MONDAY": 1545 return Weekday.MONDAY 1546 case "TUESDAY": 1547 return Weekday.TUESDAY 1548 case "WEDNESDAY": 1549 return Weekday.WEDNESDAY 1550 case "THURSDAY": 1551 return Weekday.THURSDAY 1552 case "FRIDAY": 1553 return Weekday.FRIDAY 1554 case "SATURDAY": 1555 return Weekday.SATURDAY 1556 case "SUNDAY": 1557 return Weekday.SUNDAY 1558 case _: 1559 raise ValueError(f"Unknown weekday {weekday}") 1560 1561 def create_concurrent_cursor_from_perpartition_cursor( 1562 self, 1563 state_manager: ConnectorStateManager, 1564 model_type: Type[BaseModel], 1565 component_definition: ComponentDefinition, 1566 stream_name: str, 1567 stream_namespace: Optional[str], 1568 config: Config, 1569 stream_state: MutableMapping[str, Any], 1570 partition_router: PartitionRouter, 1571 attempt_to_create_cursor_if_not_provided: bool = False, 1572 **kwargs: Any, 1573 ) -> ConcurrentPerPartitionCursor: 1574 component_type = component_definition.get("type") 1575 if component_definition.get("type") != model_type.__name__: 1576 raise ValueError( 1577 f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" 1578 ) 1579 1580 # FIXME the interfaces of the concurrent cursor are kind of annoying as they take a `ComponentDefinition` instead of the actual model. This was done because the ConcurrentDeclarativeSource didn't have access to the models [here for example](https://github.com/airbytehq/airbyte-python-cdk/blob/f525803b3fec9329e4cc8478996a92bf884bfde9/airbyte_cdk/sources/declarative/concurrent_declarative_source.py#L354C54-L354C91). So now we have two cases: 1581 # * The ComponentDefinition comes from model.__dict__ in which case we have `parameters` 1582 # * The ComponentDefinition comes from the manifest as a dict in which case we have `$parameters` 1583 # We should change those interfaces to use the model once we clean up the code in CDS at which point the parameter propagation should happen as part of the ModelToComponentFactory. 1584 if "$parameters" not in component_definition and "parameters" in component_definition: 1585 component_definition["$parameters"] = component_definition.get("parameters") # type: ignore # This is a dict 1586 datetime_based_cursor_model = model_type.parse_obj(component_definition) 1587 1588 if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel): 1589 raise ValueError( 1590 f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}" 1591 ) 1592 1593 interpolated_cursor_field = InterpolatedString.create( 1594 datetime_based_cursor_model.cursor_field, 1595 # FIXME the interfaces of the concurrent cursor are kind of annoying as they take a `ComponentDefinition` instead of the actual model. This was done because the ConcurrentDeclarativeSource didn't have access to the models [here for example](https://github.com/airbytehq/airbyte-python-cdk/blob/f525803b3fec9329e4cc8478996a92bf884bfde9/airbyte_cdk/sources/declarative/concurrent_declarative_source.py#L354C54-L354C91). So now we have two cases: 1596 # * The ComponentDefinition comes from model.__dict__ in which case we have `parameters` 1597 # * The ComponentDefinition comes from the manifest as a dict in which case we have `$parameters` 1598 # We should change those interfaces to use the model once we clean up the code in CDS at which point the parameter propagation should happen as part of the ModelToComponentFactory. 1599 parameters=datetime_based_cursor_model.parameters or {}, 1600 ) 1601 cursor_field = CursorField(interpolated_cursor_field.eval(config=config)) 1602 1603 datetime_format = datetime_based_cursor_model.datetime_format 1604 1605 cursor_granularity = ( 1606 parse_duration(datetime_based_cursor_model.cursor_granularity) 1607 if datetime_based_cursor_model.cursor_granularity 1608 else None 1609 ) 1610 1611 connector_state_converter: DateTimeStreamStateConverter 1612 connector_state_converter = CustomFormatConcurrentStreamStateConverter( 1613 datetime_format=datetime_format, 1614 input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats, 1615 is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state 1616 cursor_granularity=cursor_granularity, 1617 ) 1618 1619 # Create the cursor factory 1620 cursor_factory = ConcurrentCursorFactory( 1621 partial( 1622 self.create_concurrent_cursor_from_datetime_based_cursor, 1623 state_manager=state_manager, 1624 model_type=model_type, 1625 component_definition=component_definition, 1626 stream_name=stream_name, 1627 stream_namespace=stream_namespace, 1628 config=config, 1629 message_repository=NoopMessageRepository(), 1630 ) 1631 ) 1632 1633 # Per-partition state doesn't make sense for GroupingPartitionRouter, so force the global state 1634 use_global_cursor = isinstance( 1635 partition_router, GroupingPartitionRouter 1636 ) or component_definition.get("global_substream_cursor", False) 1637 1638 # Return the concurrent cursor and state converter 1639 return ConcurrentPerPartitionCursor( 1640 cursor_factory=cursor_factory, 1641 partition_router=partition_router, 1642 stream_name=stream_name, 1643 stream_namespace=stream_namespace, 1644 stream_state=stream_state, 1645 message_repository=self._message_repository, # type: ignore 1646 connector_state_manager=state_manager, 1647 connector_state_converter=connector_state_converter, 1648 cursor_field=cursor_field, 1649 use_global_cursor=use_global_cursor, 1650 attempt_to_create_cursor_if_not_provided=attempt_to_create_cursor_if_not_provided, 1651 ) 1652 1653 @staticmethod 1654 def create_constant_backoff_strategy( 1655 model: ConstantBackoffStrategyModel, config: Config, **kwargs: Any 1656 ) -> ConstantBackoffStrategy: 1657 return ConstantBackoffStrategy( 1658 backoff_time_in_seconds=model.backoff_time_in_seconds, 1659 config=config, 1660 parameters=model.parameters or {}, 1661 ) 1662 1663 def create_cursor_pagination( 1664 self, model: CursorPaginationModel, config: Config, decoder: Decoder, **kwargs: Any 1665 ) -> CursorPaginationStrategy: 1666 if isinstance(decoder, PaginationDecoderDecorator): 1667 inner_decoder = decoder.decoder 1668 else: 1669 inner_decoder = decoder 1670 decoder = PaginationDecoderDecorator(decoder=decoder) 1671 1672 if self._is_supported_decoder_for_pagination(inner_decoder): 1673 decoder_to_use = decoder 1674 else: 1675 raise ValueError( 1676 self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(inner_decoder)) 1677 ) 1678 1679 return CursorPaginationStrategy( 1680 cursor_value=model.cursor_value, 1681 decoder=decoder_to_use, 1682 page_size=model.page_size, 1683 stop_condition=model.stop_condition, 1684 config=config, 1685 parameters=model.parameters or {}, 1686 ) 1687 1688 def create_custom_component(self, model: Any, config: Config, **kwargs: Any) -> Any: 1689 """ 1690 Generically creates a custom component based on the model type and a class_name reference to the custom Python class being 1691 instantiated. Only the model's additional properties that match the custom class definition are passed to the constructor 1692 :param model: The Pydantic model of the custom component being created 1693 :param config: The custom defined connector config 1694 :return: The declarative component built from the Pydantic model to be used at runtime 1695 """ 1696 custom_component_class = self._get_class_from_fully_qualified_class_name(model.class_name) 1697 component_fields = get_type_hints(custom_component_class) 1698 model_args = model.dict() 1699 model_args["config"] = config 1700 1701 # There are cases where a parent component will pass arguments to a child component via kwargs. When there are field collisions 1702 # we defer to these arguments over the component's definition 1703 for key, arg in kwargs.items(): 1704 model_args[key] = arg 1705 1706 # Pydantic is unable to parse a custom component's fields that are subcomponents into models because their fields and types are not 1707 # defined in the schema. The fields and types are defined within the Python class implementation. Pydantic can only parse down to 1708 # the custom component and this code performs a second parse to convert the sub-fields first into models, then declarative components 1709 for model_field, model_value in model_args.items(): 1710 # If a custom component field doesn't have a type set, we try to use the type hints to infer the type 1711 if ( 1712 isinstance(model_value, dict) 1713 and "type" not in model_value 1714 and model_field in component_fields 1715 ): 1716 derived_type = self._derive_component_type_from_type_hints( 1717 component_fields.get(model_field) 1718 ) 1719 if derived_type: 1720 model_value["type"] = derived_type 1721 1722 if self._is_component(model_value): 1723 model_args[model_field] = self._create_nested_component( 1724 model, 1725 model_field, 1726 model_value, 1727 config, 1728 **kwargs, 1729 ) 1730 elif isinstance(model_value, list): 1731 vals = [] 1732 for v in model_value: 1733 if isinstance(v, dict) and "type" not in v and model_field in component_fields: 1734 derived_type = self._derive_component_type_from_type_hints( 1735 component_fields.get(model_field) 1736 ) 1737 if derived_type: 1738 v["type"] = derived_type 1739 if self._is_component(v): 1740 vals.append( 1741 self._create_nested_component( 1742 model, 1743 model_field, 1744 v, 1745 config, 1746 **kwargs, 1747 ) 1748 ) 1749 else: 1750 vals.append(v) 1751 model_args[model_field] = vals 1752 1753 kwargs = { 1754 class_field: model_args[class_field] 1755 for class_field in component_fields.keys() 1756 if class_field in model_args 1757 } 1758 return custom_component_class(**kwargs) 1759 1760 @staticmethod 1761 def _get_class_from_fully_qualified_class_name( 1762 full_qualified_class_name: str, 1763 ) -> Any: 1764 """Get a class from its fully qualified name. 1765 1766 If a custom components module is needed, we assume it is already registered - probably 1767 as `source_declarative_manifest.components` or `components`. 1768 1769 Args: 1770 full_qualified_class_name (str): The fully qualified name of the class (e.g., "module.ClassName"). 1771 1772 Returns: 1773 Any: The class object. 1774 1775 Raises: 1776 ValueError: If the class cannot be loaded. 1777 """ 1778 split = full_qualified_class_name.split(".") 1779 module_name_full = ".".join(split[:-1]) 1780 class_name = split[-1] 1781 1782 try: 1783 module_ref = importlib.import_module(module_name_full) 1784 except ModuleNotFoundError as e: 1785 if split[0] == "source_declarative_manifest": 1786 # During testing, the modules containing the custom components are not moved to source_declarative_manifest. In order to run the test, add the source folder to your PYTHONPATH or add it runtime using sys.path.append 1787 try: 1788 import os 1789 1790 module_name_with_source_declarative_manifest = ".".join(split[1:-1]) 1791 module_ref = importlib.import_module( 1792 module_name_with_source_declarative_manifest 1793 ) 1794 except ModuleNotFoundError: 1795 raise ValueError(f"Could not load module `{module_name_full}`.") from e 1796 else: 1797 raise ValueError(f"Could not load module `{module_name_full}`.") from e 1798 1799 try: 1800 return getattr(module_ref, class_name) 1801 except AttributeError as e: 1802 raise ValueError( 1803 f"Could not load class `{class_name}` from module `{module_name_full}`.", 1804 ) from e 1805 1806 @staticmethod 1807 def _derive_component_type_from_type_hints(field_type: Any) -> Optional[str]: 1808 interface = field_type 1809 while True: 1810 origin = get_origin(interface) 1811 if origin: 1812 # Unnest types until we reach the raw type 1813 # List[T] -> T 1814 # Optional[List[T]] -> T 1815 args = get_args(interface) 1816 interface = args[0] 1817 else: 1818 break 1819 if isinstance(interface, type) and not ModelToComponentFactory.is_builtin_type(interface): 1820 return interface.__name__ 1821 return None 1822 1823 @staticmethod 1824 def is_builtin_type(cls: Optional[Type[Any]]) -> bool: 1825 if not cls: 1826 return False 1827 return cls.__module__ == "builtins" 1828 1829 @staticmethod 1830 def _extract_missing_parameters(error: TypeError) -> List[str]: 1831 parameter_search = re.search(r"keyword-only.*:\s(.*)", str(error)) 1832 if parameter_search: 1833 return re.findall(r"\'(.+?)\'", parameter_search.group(1)) 1834 else: 1835 return [] 1836 1837 def _create_nested_component( 1838 self, model: Any, model_field: str, model_value: Any, config: Config, **kwargs: Any 1839 ) -> Any: 1840 type_name = model_value.get("type", None) 1841 if not type_name: 1842 # If no type is specified, we can assume this is a dictionary object which can be returned instead of a subcomponent 1843 return model_value 1844 1845 model_type = self.TYPE_NAME_TO_MODEL.get(type_name, None) 1846 if model_type: 1847 parsed_model = model_type.parse_obj(model_value) 1848 try: 1849 # To improve usability of the language, certain fields are shared between components. This can come in the form of 1850 # a parent component passing some of its fields to a child component or the parent extracting fields from other child 1851 # components and passing it to others. One example is the DefaultPaginator referencing the HttpRequester url_base 1852 # while constructing a SimpleRetriever. However, custom components don't support this behavior because they are created 1853 # generically in create_custom_component(). This block allows developers to specify extra arguments in $parameters that 1854 # are needed by a component and could not be shared. 1855 model_constructor = self.PYDANTIC_MODEL_TO_CONSTRUCTOR.get(parsed_model.__class__) 1856 constructor_kwargs = inspect.getfullargspec(model_constructor).kwonlyargs 1857 model_parameters = model_value.get("$parameters", {}) 1858 matching_parameters = { 1859 kwarg: model_parameters[kwarg] 1860 for kwarg in constructor_kwargs 1861 if kwarg in model_parameters 1862 } 1863 matching_kwargs = { 1864 kwarg: kwargs[kwarg] for kwarg in constructor_kwargs if kwarg in kwargs 1865 } 1866 return self._create_component_from_model( 1867 model=parsed_model, config=config, **(matching_parameters | matching_kwargs) 1868 ) 1869 except TypeError as error: 1870 missing_parameters = self._extract_missing_parameters(error) 1871 if missing_parameters: 1872 raise ValueError( 1873 f"Error creating component '{type_name}' with parent custom component {model.class_name}: Please provide " 1874 + ", ".join( 1875 ( 1876 f"{type_name}.$parameters.{parameter}" 1877 for parameter in missing_parameters 1878 ) 1879 ) 1880 ) 1881 raise TypeError( 1882 f"Error creating component '{type_name}' with parent custom component {model.class_name}: {error}" 1883 ) 1884 else: 1885 raise ValueError( 1886 f"Error creating custom component {model.class_name}. Subcomponent creation has not been implemented for '{type_name}'" 1887 ) 1888 1889 @staticmethod 1890 def _is_component(model_value: Any) -> bool: 1891 return isinstance(model_value, dict) and model_value.get("type") is not None 1892 1893 def create_datetime_based_cursor( 1894 self, model: DatetimeBasedCursorModel, config: Config, **kwargs: Any 1895 ) -> DatetimeBasedCursor: 1896 start_datetime: Union[str, MinMaxDatetime] = ( 1897 model.start_datetime 1898 if isinstance(model.start_datetime, str) 1899 else self.create_min_max_datetime(model.start_datetime, config) 1900 ) 1901 end_datetime: Union[str, MinMaxDatetime, None] = None 1902 if model.is_data_feed and model.end_datetime: 1903 raise ValueError("Data feed does not support end_datetime") 1904 if model.is_data_feed and model.is_client_side_incremental: 1905 raise ValueError( 1906 "`Client side incremental` cannot be applied with `data feed`. Choose only 1 from them." 1907 ) 1908 if model.end_datetime: 1909 end_datetime = ( 1910 model.end_datetime 1911 if isinstance(model.end_datetime, str) 1912 else self.create_min_max_datetime(model.end_datetime, config) 1913 ) 1914 1915 end_time_option = ( 1916 self._create_component_from_model( 1917 model.end_time_option, config, parameters=model.parameters or {} 1918 ) 1919 if model.end_time_option 1920 else None 1921 ) 1922 start_time_option = ( 1923 self._create_component_from_model( 1924 model.start_time_option, config, parameters=model.parameters or {} 1925 ) 1926 if model.start_time_option 1927 else None 1928 ) 1929 1930 return DatetimeBasedCursor( 1931 cursor_field=model.cursor_field, 1932 cursor_datetime_formats=model.cursor_datetime_formats 1933 if model.cursor_datetime_formats 1934 else [], 1935 cursor_granularity=model.cursor_granularity, 1936 datetime_format=model.datetime_format, 1937 end_datetime=end_datetime, 1938 start_datetime=start_datetime, 1939 step=model.step, 1940 end_time_option=end_time_option, 1941 lookback_window=model.lookback_window, 1942 start_time_option=start_time_option, 1943 partition_field_end=model.partition_field_end, 1944 partition_field_start=model.partition_field_start, 1945 message_repository=self._message_repository, 1946 is_compare_strictly=model.is_compare_strictly, 1947 config=config, 1948 parameters=model.parameters or {}, 1949 ) 1950 1951 def create_default_stream( 1952 self, model: DeclarativeStreamModel, config: Config, is_parent: bool = False, **kwargs: Any 1953 ) -> AbstractStream: 1954 primary_key = model.primary_key.__root__ if model.primary_key else None 1955 self._migrate_state(model, config) 1956 1957 partition_router = self._build_stream_slicer_from_partition_router( 1958 model.retriever, 1959 config, 1960 stream_name=model.name, 1961 **kwargs, 1962 ) 1963 concurrent_cursor = self._build_concurrent_cursor(model, partition_router, config) 1964 if model.incremental_sync and isinstance(model.incremental_sync, DatetimeBasedCursorModel): 1965 cursor_model: DatetimeBasedCursorModel = model.incremental_sync 1966 1967 end_time_option = ( 1968 self._create_component_from_model( 1969 cursor_model.end_time_option, config, parameters=cursor_model.parameters or {} 1970 ) 1971 if cursor_model.end_time_option 1972 else None 1973 ) 1974 start_time_option = ( 1975 self._create_component_from_model( 1976 cursor_model.start_time_option, config, parameters=cursor_model.parameters or {} 1977 ) 1978 if cursor_model.start_time_option 1979 else None 1980 ) 1981 1982 datetime_request_options_provider = DatetimeBasedRequestOptionsProvider( 1983 start_time_option=start_time_option, 1984 end_time_option=end_time_option, 1985 partition_field_start=cursor_model.partition_field_start, 1986 partition_field_end=cursor_model.partition_field_end, 1987 config=config, 1988 parameters=model.parameters or {}, 1989 ) 1990 request_options_provider = ( 1991 datetime_request_options_provider 1992 if not isinstance(concurrent_cursor, ConcurrentPerPartitionCursor) 1993 else PerPartitionRequestOptionsProvider( 1994 partition_router, datetime_request_options_provider 1995 ) 1996 ) 1997 elif model.incremental_sync and isinstance( 1998 model.incremental_sync, IncrementingCountCursorModel 1999 ): 2000 if isinstance(concurrent_cursor, ConcurrentPerPartitionCursor): 2001 raise ValueError( 2002 "PerPartition does not support per partition states because switching to global state is time based" 2003 ) 2004 2005 cursor_model: IncrementingCountCursorModel = model.incremental_sync # type: ignore 2006 2007 start_time_option = ( 2008 self._create_component_from_model( 2009 cursor_model.start_value_option, # type: ignore # mypy still thinks cursor_model of type DatetimeBasedCursor 2010 config, 2011 parameters=cursor_model.parameters or {}, 2012 ) 2013 if cursor_model.start_value_option # type: ignore # mypy still thinks cursor_model of type DatetimeBasedCursor 2014 else None 2015 ) 2016 2017 # The concurrent engine defaults the start/end fields on the slice to "start" and "end", but 2018 # the default DatetimeBasedRequestOptionsProvider() sets them to start_time/end_time 2019 partition_field_start = "start" 2020 2021 request_options_provider = DatetimeBasedRequestOptionsProvider( 2022 start_time_option=start_time_option, 2023 partition_field_start=partition_field_start, 2024 config=config, 2025 parameters=model.parameters or {}, 2026 ) 2027 else: 2028 request_options_provider = None 2029 2030 transformations = [] 2031 if model.transformations: 2032 for transformation_model in model.transformations: 2033 transformations.append( 2034 self._create_component_from_model(model=transformation_model, config=config) 2035 ) 2036 file_uploader = None 2037 if model.file_uploader: 2038 file_uploader = self._create_component_from_model( 2039 model=model.file_uploader, config=config 2040 ) 2041 2042 stream_slicer: ConcurrentStreamSlicer = ( 2043 partition_router 2044 if isinstance(concurrent_cursor, FinalStateCursor) 2045 else concurrent_cursor 2046 ) 2047 retriever = self._create_component_from_model( 2048 model=model.retriever, 2049 config=config, 2050 name=model.name, 2051 primary_key=primary_key, 2052 request_options_provider=request_options_provider, 2053 stream_slicer=stream_slicer, 2054 partition_router=partition_router, 2055 stop_condition_cursor=concurrent_cursor 2056 if self._is_stop_condition_on_cursor(model) 2057 else None, 2058 client_side_incremental_sync={"cursor": concurrent_cursor} 2059 if self._is_client_side_filtering_enabled(model) 2060 else None, 2061 transformations=transformations, 2062 file_uploader=file_uploader, 2063 incremental_sync=model.incremental_sync, 2064 ) 2065 if isinstance(retriever, AsyncRetriever): 2066 stream_slicer = retriever.stream_slicer 2067 2068 schema_loader: Union[ 2069 CompositeSchemaLoader, 2070 DefaultSchemaLoader, 2071 DynamicSchemaLoader, 2072 InlineSchemaLoader, 2073 JsonFileSchemaLoader, 2074 ] 2075 if model.schema_loader and isinstance(model.schema_loader, list): 2076 nested_schema_loaders = [ 2077 self._create_component_from_model(model=nested_schema_loader, config=config) 2078 for nested_schema_loader in model.schema_loader 2079 ] 2080 schema_loader = CompositeSchemaLoader( 2081 schema_loaders=nested_schema_loaders, parameters={} 2082 ) 2083 elif model.schema_loader: 2084 schema_loader = self._create_component_from_model( 2085 model=model.schema_loader, # type: ignore # If defined, schema_loader is guaranteed not to be a list and will be one of the existing base models 2086 config=config, 2087 ) 2088 else: 2089 options = model.parameters or {} 2090 if "name" not in options: 2091 options["name"] = model.name 2092 schema_loader = DefaultSchemaLoader(config=config, parameters=options) 2093 2094 stream_name = model.name or "" 2095 return DefaultStream( 2096 partition_generator=StreamSlicerPartitionGenerator( 2097 DeclarativePartitionFactory( 2098 stream_name, 2099 schema_loader, 2100 retriever, 2101 self._message_repository, 2102 ), 2103 stream_slicer, 2104 slice_limit=self._limit_slices_fetched, 2105 ), 2106 name=stream_name, 2107 json_schema=schema_loader.get_json_schema, 2108 primary_key=get_primary_key_from_stream(primary_key), 2109 cursor_field=concurrent_cursor.cursor_field.cursor_field_key 2110 if hasattr(concurrent_cursor, "cursor_field") 2111 else "", # FIXME we should have the cursor field has part of the interface of cursor, 2112 logger=logging.getLogger(f"airbyte.{stream_name}"), 2113 cursor=concurrent_cursor, 2114 supports_file_transfer=hasattr(model, "file_uploader") and bool(model.file_uploader), 2115 ) 2116 2117 def _migrate_state(self, model: DeclarativeStreamModel, config: Config) -> None: 2118 stream_name = model.name or "" 2119 stream_state = self._connector_state_manager.get_stream_state( 2120 stream_name=stream_name, namespace=None 2121 ) 2122 if model.state_migrations: 2123 state_transformations = [ 2124 self._create_component_from_model(state_migration, config, declarative_stream=model) 2125 for state_migration in model.state_migrations 2126 ] 2127 else: 2128 state_transformations = [] 2129 stream_state = self.apply_stream_state_migrations(state_transformations, stream_state) 2130 self._connector_state_manager.update_state_for_stream( 2131 stream_name=stream_name, namespace=None, value=stream_state 2132 ) 2133 2134 def _is_stop_condition_on_cursor(self, model: DeclarativeStreamModel) -> bool: 2135 return bool( 2136 model.incremental_sync 2137 and hasattr(model.incremental_sync, "is_data_feed") 2138 and model.incremental_sync.is_data_feed 2139 ) 2140 2141 def _is_client_side_filtering_enabled(self, model: DeclarativeStreamModel) -> bool: 2142 return bool( 2143 model.incremental_sync 2144 and hasattr(model.incremental_sync, "is_client_side_incremental") 2145 and model.incremental_sync.is_client_side_incremental 2146 ) 2147 2148 def _build_stream_slicer_from_partition_router( 2149 self, 2150 model: Union[ 2151 AsyncRetrieverModel, 2152 CustomRetrieverModel, 2153 SimpleRetrieverModel, 2154 ], 2155 config: Config, 2156 stream_name: Optional[str] = None, 2157 **kwargs: Any, 2158 ) -> PartitionRouter: 2159 if ( 2160 hasattr(model, "partition_router") 2161 and isinstance(model, (SimpleRetrieverModel, AsyncRetrieverModel, CustomRetrieverModel)) 2162 and model.partition_router 2163 ): 2164 stream_slicer_model = model.partition_router 2165 if isinstance(stream_slicer_model, list): 2166 return CartesianProductStreamSlicer( 2167 [ 2168 self._create_component_from_model( 2169 model=slicer, config=config, stream_name=stream_name or "" 2170 ) 2171 for slicer in stream_slicer_model 2172 ], 2173 parameters={}, 2174 ) 2175 elif isinstance(stream_slicer_model, dict): 2176 # partition router comes from CustomRetrieverModel therefore has not been parsed as a model 2177 params = stream_slicer_model.get("$parameters") 2178 if not isinstance(params, dict): 2179 params = {} 2180 stream_slicer_model["$parameters"] = params 2181 2182 if stream_name is not None: 2183 params["stream_name"] = stream_name 2184 2185 return self._create_nested_component( # type: ignore[no-any-return] # There is no guarantee that this will return a stream slicer. If not, we expect an AttributeError during the call to `stream_slices` 2186 model, 2187 "partition_router", 2188 stream_slicer_model, 2189 config, 2190 **kwargs, 2191 ) 2192 else: 2193 return self._create_component_from_model( # type: ignore[no-any-return] # Will be created PartitionRouter as stream_slicer_model is model.partition_router 2194 model=stream_slicer_model, config=config, stream_name=stream_name or "" 2195 ) 2196 return SinglePartitionRouter(parameters={}) 2197 2198 def _build_concurrent_cursor( 2199 self, 2200 model: DeclarativeStreamModel, 2201 stream_slicer: Optional[PartitionRouter], 2202 config: Config, 2203 ) -> Cursor: 2204 stream_name = model.name or "" 2205 stream_state = self._connector_state_manager.get_stream_state(stream_name, None) 2206 2207 if ( 2208 model.incremental_sync 2209 and stream_slicer 2210 and not isinstance(stream_slicer, SinglePartitionRouter) 2211 ): 2212 return self.create_concurrent_cursor_from_perpartition_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing 2213 state_manager=self._connector_state_manager, 2214 model_type=DatetimeBasedCursorModel, 2215 component_definition=model.incremental_sync.__dict__, 2216 stream_name=stream_name, 2217 stream_state=stream_state, 2218 stream_namespace=None, 2219 config=config or {}, 2220 partition_router=stream_slicer, 2221 attempt_to_create_cursor_if_not_provided=True, # FIXME can we remove that now? 2222 ) 2223 elif model.incremental_sync: 2224 if type(model.incremental_sync) == IncrementingCountCursorModel: 2225 return self.create_concurrent_cursor_from_incrementing_count_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing 2226 model_type=IncrementingCountCursorModel, 2227 component_definition=model.incremental_sync.__dict__, 2228 stream_name=stream_name, 2229 stream_namespace=None, 2230 stream_state=stream_state, 2231 config=config or {}, 2232 ) 2233 elif type(model.incremental_sync) == DatetimeBasedCursorModel: 2234 return self.create_concurrent_cursor_from_datetime_based_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing 2235 model_type=type(model.incremental_sync), 2236 component_definition=model.incremental_sync.__dict__, 2237 stream_name=stream_name, 2238 stream_namespace=None, 2239 stream_state=stream_state, 2240 config=config or {}, 2241 attempt_to_create_cursor_if_not_provided=True, 2242 ) 2243 else: 2244 raise ValueError( 2245 f"Incremental sync of type {type(model.incremental_sync)} is not supported" 2246 ) 2247 return FinalStateCursor(stream_name, None, self._message_repository) 2248 2249 def create_default_error_handler( 2250 self, model: DefaultErrorHandlerModel, config: Config, **kwargs: Any 2251 ) -> DefaultErrorHandler: 2252 backoff_strategies = [] 2253 if model.backoff_strategies: 2254 for backoff_strategy_model in model.backoff_strategies: 2255 backoff_strategies.append( 2256 self._create_component_from_model(model=backoff_strategy_model, config=config) 2257 ) 2258 2259 response_filters = [] 2260 if model.response_filters: 2261 for response_filter_model in model.response_filters: 2262 response_filters.append( 2263 self._create_component_from_model(model=response_filter_model, config=config) 2264 ) 2265 response_filters.append( 2266 HttpResponseFilter(config=config, parameters=model.parameters or {}) 2267 ) 2268 2269 return DefaultErrorHandler( 2270 backoff_strategies=backoff_strategies, 2271 max_retries=model.max_retries, 2272 response_filters=response_filters, 2273 config=config, 2274 parameters=model.parameters or {}, 2275 ) 2276 2277 def create_default_paginator( 2278 self, 2279 model: DefaultPaginatorModel, 2280 config: Config, 2281 *, 2282 url_base: str, 2283 extractor_model: Optional[Union[CustomRecordExtractorModel, DpathExtractorModel]] = None, 2284 decoder: Optional[Decoder] = None, 2285 cursor_used_for_stop_condition: Optional[Cursor] = None, 2286 ) -> Union[DefaultPaginator, PaginatorTestReadDecorator]: 2287 if decoder: 2288 if self._is_supported_decoder_for_pagination(decoder): 2289 decoder_to_use = PaginationDecoderDecorator(decoder=decoder) 2290 else: 2291 raise ValueError(self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(decoder))) 2292 else: 2293 decoder_to_use = PaginationDecoderDecorator(decoder=JsonDecoder(parameters={})) 2294 page_size_option = ( 2295 self._create_component_from_model(model=model.page_size_option, config=config) 2296 if model.page_size_option 2297 else None 2298 ) 2299 page_token_option = ( 2300 self._create_component_from_model(model=model.page_token_option, config=config) 2301 if model.page_token_option 2302 else None 2303 ) 2304 pagination_strategy = self._create_component_from_model( 2305 model=model.pagination_strategy, 2306 config=config, 2307 decoder=decoder_to_use, 2308 extractor_model=extractor_model, 2309 ) 2310 if cursor_used_for_stop_condition: 2311 pagination_strategy = StopConditionPaginationStrategyDecorator( 2312 pagination_strategy, CursorStopCondition(cursor_used_for_stop_condition) 2313 ) 2314 paginator = DefaultPaginator( 2315 decoder=decoder_to_use, 2316 page_size_option=page_size_option, 2317 page_token_option=page_token_option, 2318 pagination_strategy=pagination_strategy, 2319 url_base=url_base, 2320 config=config, 2321 parameters=model.parameters or {}, 2322 ) 2323 if self._limit_pages_fetched_per_slice: 2324 return PaginatorTestReadDecorator(paginator, self._limit_pages_fetched_per_slice) 2325 return paginator 2326 2327 def create_dpath_extractor( 2328 self, 2329 model: DpathExtractorModel, 2330 config: Config, 2331 decoder: Optional[Decoder] = None, 2332 **kwargs: Any, 2333 ) -> DpathExtractor: 2334 if decoder: 2335 decoder_to_use = decoder 2336 else: 2337 decoder_to_use = JsonDecoder(parameters={}) 2338 model_field_path: List[Union[InterpolatedString, str]] = [x for x in model.field_path] 2339 return DpathExtractor( 2340 decoder=decoder_to_use, 2341 field_path=model_field_path, 2342 config=config, 2343 parameters=model.parameters or {}, 2344 ) 2345 2346 @staticmethod 2347 def create_response_to_file_extractor( 2348 model: ResponseToFileExtractorModel, 2349 **kwargs: Any, 2350 ) -> ResponseToFileExtractor: 2351 return ResponseToFileExtractor(parameters=model.parameters or {}) 2352 2353 @staticmethod 2354 def create_exponential_backoff_strategy( 2355 model: ExponentialBackoffStrategyModel, config: Config 2356 ) -> ExponentialBackoffStrategy: 2357 return ExponentialBackoffStrategy( 2358 factor=model.factor or 5, parameters=model.parameters or {}, config=config 2359 ) 2360 2361 @staticmethod 2362 def create_group_by_key(model: GroupByKeyMergeStrategyModel, config: Config) -> GroupByKey: 2363 return GroupByKey(model.key, config=config, parameters=model.parameters or {}) 2364 2365 def create_http_requester( 2366 self, 2367 model: HttpRequesterModel, 2368 config: Config, 2369 decoder: Decoder = JsonDecoder(parameters={}), 2370 query_properties_key: Optional[str] = None, 2371 use_cache: Optional[bool] = None, 2372 *, 2373 name: str, 2374 ) -> HttpRequester: 2375 authenticator = ( 2376 self._create_component_from_model( 2377 model=model.authenticator, 2378 config=config, 2379 url_base=model.url or model.url_base, 2380 name=name, 2381 decoder=decoder, 2382 ) 2383 if model.authenticator 2384 else None 2385 ) 2386 error_handler = ( 2387 self._create_component_from_model(model=model.error_handler, config=config) 2388 if model.error_handler 2389 else DefaultErrorHandler( 2390 backoff_strategies=[], 2391 response_filters=[], 2392 config=config, 2393 parameters=model.parameters or {}, 2394 ) 2395 ) 2396 2397 api_budget = self._api_budget 2398 2399 # Removes QueryProperties components from the interpolated mappings because it has been designed 2400 # to be used by the SimpleRetriever and will be resolved from the provider from the slice directly 2401 # instead of through jinja interpolation 2402 request_parameters: Optional[Union[str, Mapping[str, str]]] 2403 if isinstance(model.request_parameters, Mapping): 2404 request_parameters = self._remove_query_properties(model.request_parameters) 2405 else: 2406 request_parameters = model.request_parameters 2407 2408 request_options_provider = InterpolatedRequestOptionsProvider( 2409 request_body=model.request_body, 2410 request_body_data=model.request_body_data, 2411 request_body_json=model.request_body_json, 2412 request_headers=model.request_headers, 2413 request_parameters=request_parameters, 2414 query_properties_key=query_properties_key, 2415 config=config, 2416 parameters=model.parameters or {}, 2417 ) 2418 2419 assert model.use_cache is not None # for mypy 2420 assert model.http_method is not None # for mypy 2421 2422 should_use_cache = (model.use_cache or bool(use_cache)) and not self._disable_cache 2423 2424 return HttpRequester( 2425 name=name, 2426 url=model.url, 2427 url_base=model.url_base, 2428 path=model.path, 2429 authenticator=authenticator, 2430 error_handler=error_handler, 2431 api_budget=api_budget, 2432 http_method=HttpMethod[model.http_method.value], 2433 request_options_provider=request_options_provider, 2434 config=config, 2435 disable_retries=self._disable_retries, 2436 parameters=model.parameters or {}, 2437 message_repository=self._message_repository, 2438 use_cache=should_use_cache, 2439 decoder=decoder, 2440 stream_response=decoder.is_stream_response() if decoder else False, 2441 ) 2442 2443 @staticmethod 2444 def create_http_response_filter( 2445 model: HttpResponseFilterModel, config: Config, **kwargs: Any 2446 ) -> HttpResponseFilter: 2447 if model.action: 2448 action = ResponseAction(model.action.value) 2449 else: 2450 action = None 2451 2452 failure_type = FailureType(model.failure_type.value) if model.failure_type else None 2453 2454 http_codes = ( 2455 set(model.http_codes) if model.http_codes else set() 2456 ) # JSON schema notation has no set data type. The schema enforces an array of unique elements 2457 2458 return HttpResponseFilter( 2459 action=action, 2460 failure_type=failure_type, 2461 error_message=model.error_message or "", 2462 error_message_contains=model.error_message_contains or "", 2463 http_codes=http_codes, 2464 predicate=model.predicate or "", 2465 config=config, 2466 parameters=model.parameters or {}, 2467 ) 2468 2469 @staticmethod 2470 def create_inline_schema_loader( 2471 model: InlineSchemaLoaderModel, config: Config, **kwargs: Any 2472 ) -> InlineSchemaLoader: 2473 return InlineSchemaLoader(schema=model.schema_ or {}, parameters={}) 2474 2475 def create_complex_field_type( 2476 self, model: ComplexFieldTypeModel, config: Config, **kwargs: Any 2477 ) -> ComplexFieldType: 2478 items = ( 2479 self._create_component_from_model(model=model.items, config=config) 2480 if isinstance(model.items, ComplexFieldTypeModel) 2481 else model.items 2482 ) 2483 2484 return ComplexFieldType(field_type=model.field_type, items=items) 2485 2486 def create_types_map(self, model: TypesMapModel, config: Config, **kwargs: Any) -> TypesMap: 2487 target_type = ( 2488 self._create_component_from_model(model=model.target_type, config=config) 2489 if isinstance(model.target_type, ComplexFieldTypeModel) 2490 else model.target_type 2491 ) 2492 2493 return TypesMap( 2494 target_type=target_type, 2495 current_type=model.current_type, 2496 condition=model.condition if model.condition is not None else "True", 2497 ) 2498 2499 def create_schema_type_identifier( 2500 self, model: SchemaTypeIdentifierModel, config: Config, **kwargs: Any 2501 ) -> SchemaTypeIdentifier: 2502 types_mapping = [] 2503 if model.types_mapping: 2504 types_mapping.extend( 2505 [ 2506 self._create_component_from_model(types_map, config=config) 2507 for types_map in model.types_mapping 2508 ] 2509 ) 2510 model_schema_pointer: List[Union[InterpolatedString, str]] = ( 2511 [x for x in model.schema_pointer] if model.schema_pointer else [] 2512 ) 2513 model_key_pointer: List[Union[InterpolatedString, str]] = [x for x in model.key_pointer] 2514 model_type_pointer: Optional[List[Union[InterpolatedString, str]]] = ( 2515 [x for x in model.type_pointer] if model.type_pointer else None 2516 ) 2517 2518 return SchemaTypeIdentifier( 2519 schema_pointer=model_schema_pointer, 2520 key_pointer=model_key_pointer, 2521 type_pointer=model_type_pointer, 2522 types_mapping=types_mapping, 2523 parameters=model.parameters or {}, 2524 ) 2525 2526 def create_dynamic_schema_loader( 2527 self, model: DynamicSchemaLoaderModel, config: Config, **kwargs: Any 2528 ) -> DynamicSchemaLoader: 2529 schema_transformations = [] 2530 if model.schema_transformations: 2531 for transformation_model in model.schema_transformations: 2532 schema_transformations.append( 2533 self._create_component_from_model(model=transformation_model, config=config) 2534 ) 2535 name = "dynamic_properties" 2536 retriever = self._create_component_from_model( 2537 model=model.retriever, 2538 config=config, 2539 name=name, 2540 primary_key=None, 2541 partition_router=self._build_stream_slicer_from_partition_router( 2542 model.retriever, config 2543 ), 2544 transformations=[], 2545 use_cache=True, 2546 log_formatter=( 2547 lambda response: format_http_message( 2548 response, 2549 f"Schema loader '{name}' request", 2550 f"Request performed in order to extract schema.", 2551 name, 2552 is_auxiliary=True, 2553 ) 2554 ), 2555 ) 2556 schema_type_identifier = self._create_component_from_model( 2557 model.schema_type_identifier, config=config, parameters=model.parameters or {} 2558 ) 2559 schema_filter = ( 2560 self._create_component_from_model( 2561 model.schema_filter, config=config, parameters=model.parameters or {} 2562 ) 2563 if model.schema_filter is not None 2564 else None 2565 ) 2566 2567 return DynamicSchemaLoader( 2568 retriever=retriever, 2569 config=config, 2570 schema_transformations=schema_transformations, 2571 schema_filter=schema_filter, 2572 schema_type_identifier=schema_type_identifier, 2573 parameters=model.parameters or {}, 2574 ) 2575 2576 @staticmethod 2577 def create_json_decoder(model: JsonDecoderModel, config: Config, **kwargs: Any) -> Decoder: 2578 return JsonDecoder(parameters={}) 2579 2580 def create_csv_decoder(self, model: CsvDecoderModel, config: Config, **kwargs: Any) -> Decoder: 2581 return CompositeRawDecoder( 2582 parser=ModelToComponentFactory._get_parser(model, config), 2583 stream_response=False if self._emit_connector_builder_messages else True, 2584 ) 2585 2586 def create_jsonl_decoder( 2587 self, model: JsonlDecoderModel, config: Config, **kwargs: Any 2588 ) -> Decoder: 2589 return CompositeRawDecoder( 2590 parser=ModelToComponentFactory._get_parser(model, config), 2591 stream_response=False if self._emit_connector_builder_messages else True, 2592 ) 2593 2594 def create_gzip_decoder( 2595 self, model: GzipDecoderModel, config: Config, **kwargs: Any 2596 ) -> Decoder: 2597 _compressed_response_types = { 2598 "gzip", 2599 "x-gzip", 2600 "gzip, deflate", 2601 "x-gzip, deflate", 2602 "application/zip", 2603 "application/gzip", 2604 "application/x-gzip", 2605 "application/x-zip-compressed", 2606 } 2607 2608 gzip_parser: GzipParser = ModelToComponentFactory._get_parser(model, config) # type: ignore # based on the model, we know this will be a GzipParser 2609 2610 if self._emit_connector_builder_messages: 2611 # This is very surprising but if the response is not streamed, 2612 # CompositeRawDecoder calls response.content and the requests library actually uncompress the data as opposed to response.raw, 2613 # which uses urllib3 directly and does not uncompress the data. 2614 return CompositeRawDecoder(gzip_parser.inner_parser, False) 2615 2616 return CompositeRawDecoder.by_headers( 2617 [({"Content-Encoding", "Content-Type"}, _compressed_response_types, gzip_parser)], 2618 stream_response=True, 2619 fallback_parser=gzip_parser.inner_parser, 2620 ) 2621 2622 # todo: This method should be removed once we deprecate the SimpleRetriever.cursor field and the various 2623 # state methods 2624 @staticmethod 2625 def create_incrementing_count_cursor( 2626 model: IncrementingCountCursorModel, config: Config, **kwargs: Any 2627 ) -> DatetimeBasedCursor: 2628 # This should not actually get used anywhere at runtime, but needed to add this to pass checks since 2629 # we still parse models into components. The issue is that there's no runtime implementation of a 2630 # IncrementingCountCursor. 2631 # A known and expected issue with this stub is running a check with the declared IncrementingCountCursor because it is run without ConcurrentCursor. 2632 return DatetimeBasedCursor( 2633 cursor_field=model.cursor_field, 2634 datetime_format="%Y-%m-%d", 2635 start_datetime="2024-12-12", 2636 config=config, 2637 parameters={}, 2638 ) 2639 2640 @staticmethod 2641 def create_iterable_decoder( 2642 model: IterableDecoderModel, config: Config, **kwargs: Any 2643 ) -> IterableDecoder: 2644 return IterableDecoder(parameters={}) 2645 2646 @staticmethod 2647 def create_xml_decoder(model: XmlDecoderModel, config: Config, **kwargs: Any) -> XmlDecoder: 2648 return XmlDecoder(parameters={}) 2649 2650 def create_zipfile_decoder( 2651 self, model: ZipfileDecoderModel, config: Config, **kwargs: Any 2652 ) -> ZipfileDecoder: 2653 return ZipfileDecoder(parser=ModelToComponentFactory._get_parser(model.decoder, config)) 2654 2655 @staticmethod 2656 def _get_parser(model: BaseModel, config: Config) -> Parser: 2657 if isinstance(model, JsonDecoderModel): 2658 # Note that the logic is a bit different from the JsonDecoder as there is some legacy that is maintained to return {} on error cases 2659 return JsonParser() 2660 elif isinstance(model, JsonlDecoderModel): 2661 return JsonLineParser() 2662 elif isinstance(model, CsvDecoderModel): 2663 return CsvParser( 2664 encoding=model.encoding, 2665 delimiter=model.delimiter, 2666 set_values_to_none=model.set_values_to_none, 2667 ) 2668 elif isinstance(model, GzipDecoderModel): 2669 return GzipParser( 2670 inner_parser=ModelToComponentFactory._get_parser(model.decoder, config) 2671 ) 2672 elif isinstance( 2673 model, (CustomDecoderModel, IterableDecoderModel, XmlDecoderModel, ZipfileDecoderModel) 2674 ): 2675 raise ValueError(f"Decoder type {model} does not have parser associated to it") 2676 2677 raise ValueError(f"Unknown decoder type {model}") 2678 2679 @staticmethod 2680 def create_json_file_schema_loader( 2681 model: JsonFileSchemaLoaderModel, config: Config, **kwargs: Any 2682 ) -> JsonFileSchemaLoader: 2683 return JsonFileSchemaLoader( 2684 file_path=model.file_path or "", config=config, parameters=model.parameters or {} 2685 ) 2686 2687 def create_jwt_authenticator( 2688 self, model: JwtAuthenticatorModel, config: Config, **kwargs: Any 2689 ) -> JwtAuthenticator: 2690 jwt_headers = model.jwt_headers or JwtHeadersModel(kid=None, typ="JWT", cty=None) 2691 jwt_payload = model.jwt_payload or JwtPayloadModel(iss=None, sub=None, aud=None) 2692 request_option = ( 2693 self._create_component_from_model(model.request_option, config) 2694 if model.request_option 2695 else None 2696 ) 2697 return JwtAuthenticator( 2698 config=config, 2699 parameters=model.parameters or {}, 2700 algorithm=JwtAlgorithm(model.algorithm.value), 2701 secret_key=model.secret_key, 2702 base64_encode_secret_key=model.base64_encode_secret_key, 2703 token_duration=model.token_duration, 2704 header_prefix=model.header_prefix, 2705 kid=jwt_headers.kid, 2706 typ=jwt_headers.typ, 2707 cty=jwt_headers.cty, 2708 iss=jwt_payload.iss, 2709 sub=jwt_payload.sub, 2710 aud=jwt_payload.aud, 2711 additional_jwt_headers=model.additional_jwt_headers, 2712 additional_jwt_payload=model.additional_jwt_payload, 2713 passphrase=model.passphrase, 2714 request_option=request_option, 2715 ) 2716 2717 def create_list_partition_router( 2718 self, model: ListPartitionRouterModel, config: Config, **kwargs: Any 2719 ) -> ListPartitionRouter: 2720 request_option = ( 2721 self._create_component_from_model(model.request_option, config) 2722 if model.request_option 2723 else None 2724 ) 2725 return ListPartitionRouter( 2726 cursor_field=model.cursor_field, 2727 request_option=request_option, 2728 values=model.values, 2729 config=config, 2730 parameters=model.parameters or {}, 2731 ) 2732 2733 @staticmethod 2734 def create_min_max_datetime( 2735 model: MinMaxDatetimeModel, config: Config, **kwargs: Any 2736 ) -> MinMaxDatetime: 2737 return MinMaxDatetime( 2738 datetime=model.datetime, 2739 datetime_format=model.datetime_format or "", 2740 max_datetime=model.max_datetime or "", 2741 min_datetime=model.min_datetime or "", 2742 parameters=model.parameters or {}, 2743 ) 2744 2745 @staticmethod 2746 def create_no_auth(model: NoAuthModel, config: Config, **kwargs: Any) -> NoAuth: 2747 return NoAuth(parameters=model.parameters or {}) 2748 2749 @staticmethod 2750 def create_no_pagination( 2751 model: NoPaginationModel, config: Config, **kwargs: Any 2752 ) -> NoPagination: 2753 return NoPagination(parameters={}) 2754 2755 def create_oauth_authenticator( 2756 self, model: OAuthAuthenticatorModel, config: Config, **kwargs: Any 2757 ) -> DeclarativeOauth2Authenticator: 2758 profile_assertion = ( 2759 self._create_component_from_model(model.profile_assertion, config=config) 2760 if model.profile_assertion 2761 else None 2762 ) 2763 2764 if model.refresh_token_updater: 2765 # ignore type error because fixing it would have a lot of dependencies, revisit later 2766 return DeclarativeSingleUseRefreshTokenOauth2Authenticator( # type: ignore 2767 config, 2768 InterpolatedString.create( 2769 model.token_refresh_endpoint, # type: ignore 2770 parameters=model.parameters or {}, 2771 ).eval(config), 2772 access_token_name=InterpolatedString.create( 2773 model.access_token_name or "access_token", parameters=model.parameters or {} 2774 ).eval(config), 2775 refresh_token_name=model.refresh_token_updater.refresh_token_name, 2776 expires_in_name=InterpolatedString.create( 2777 model.expires_in_name or "expires_in", parameters=model.parameters or {} 2778 ).eval(config), 2779 client_id_name=InterpolatedString.create( 2780 model.client_id_name or "client_id", parameters=model.parameters or {} 2781 ).eval(config), 2782 client_id=InterpolatedString.create( 2783 model.client_id, parameters=model.parameters or {} 2784 ).eval(config) 2785 if model.client_id 2786 else model.client_id, 2787 client_secret_name=InterpolatedString.create( 2788 model.client_secret_name or "client_secret", parameters=model.parameters or {} 2789 ).eval(config), 2790 client_secret=InterpolatedString.create( 2791 model.client_secret, parameters=model.parameters or {} 2792 ).eval(config) 2793 if model.client_secret 2794 else model.client_secret, 2795 access_token_config_path=model.refresh_token_updater.access_token_config_path, 2796 refresh_token_config_path=model.refresh_token_updater.refresh_token_config_path, 2797 token_expiry_date_config_path=model.refresh_token_updater.token_expiry_date_config_path, 2798 grant_type_name=InterpolatedString.create( 2799 model.grant_type_name or "grant_type", parameters=model.parameters or {} 2800 ).eval(config), 2801 grant_type=InterpolatedString.create( 2802 model.grant_type or "refresh_token", parameters=model.parameters or {} 2803 ).eval(config), 2804 refresh_request_body=InterpolatedMapping( 2805 model.refresh_request_body or {}, parameters=model.parameters or {} 2806 ).eval(config), 2807 refresh_request_headers=InterpolatedMapping( 2808 model.refresh_request_headers or {}, parameters=model.parameters or {} 2809 ).eval(config), 2810 scopes=model.scopes, 2811 token_expiry_date_format=model.token_expiry_date_format, 2812 token_expiry_is_time_of_expiration=bool(model.token_expiry_date_format), 2813 message_repository=self._message_repository, 2814 refresh_token_error_status_codes=model.refresh_token_updater.refresh_token_error_status_codes, 2815 refresh_token_error_key=model.refresh_token_updater.refresh_token_error_key, 2816 refresh_token_error_values=model.refresh_token_updater.refresh_token_error_values, 2817 ) 2818 # ignore type error because fixing it would have a lot of dependencies, revisit later 2819 return DeclarativeOauth2Authenticator( # type: ignore 2820 access_token_name=model.access_token_name or "access_token", 2821 access_token_value=model.access_token_value, 2822 client_id_name=model.client_id_name or "client_id", 2823 client_id=model.client_id, 2824 client_secret_name=model.client_secret_name or "client_secret", 2825 client_secret=model.client_secret, 2826 expires_in_name=model.expires_in_name or "expires_in", 2827 grant_type_name=model.grant_type_name or "grant_type", 2828 grant_type=model.grant_type or "refresh_token", 2829 refresh_request_body=model.refresh_request_body, 2830 refresh_request_headers=model.refresh_request_headers, 2831 refresh_token_name=model.refresh_token_name or "refresh_token", 2832 refresh_token=model.refresh_token, 2833 scopes=model.scopes, 2834 token_expiry_date=model.token_expiry_date, 2835 token_expiry_date_format=model.token_expiry_date_format, 2836 token_expiry_is_time_of_expiration=bool(model.token_expiry_date_format), 2837 token_refresh_endpoint=model.token_refresh_endpoint, 2838 config=config, 2839 parameters=model.parameters or {}, 2840 message_repository=self._message_repository, 2841 profile_assertion=profile_assertion, 2842 use_profile_assertion=model.use_profile_assertion, 2843 ) 2844 2845 def create_offset_increment( 2846 self, 2847 model: OffsetIncrementModel, 2848 config: Config, 2849 decoder: Decoder, 2850 extractor_model: Optional[Union[CustomRecordExtractorModel, DpathExtractorModel]] = None, 2851 **kwargs: Any, 2852 ) -> OffsetIncrement: 2853 if isinstance(decoder, PaginationDecoderDecorator): 2854 inner_decoder = decoder.decoder 2855 else: 2856 inner_decoder = decoder 2857 decoder = PaginationDecoderDecorator(decoder=decoder) 2858 2859 if self._is_supported_decoder_for_pagination(inner_decoder): 2860 decoder_to_use = decoder 2861 else: 2862 raise ValueError( 2863 self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(inner_decoder)) 2864 ) 2865 2866 # Ideally we would instantiate the runtime extractor from highest most level (in this case the SimpleRetriever) 2867 # so that it can be shared by OffSetIncrement and RecordSelector. However, due to how we instantiate the 2868 # decoder with various decorators here, but not in create_record_selector, it is simpler to retain existing 2869 # behavior by having two separate extractors with identical behavior since they use the same extractor model. 2870 # When we have more time to investigate we can look into reusing the same component. 2871 extractor = ( 2872 self._create_component_from_model( 2873 model=extractor_model, config=config, decoder=decoder_to_use 2874 ) 2875 if extractor_model 2876 else None 2877 ) 2878 2879 return OffsetIncrement( 2880 page_size=model.page_size, 2881 config=config, 2882 decoder=decoder_to_use, 2883 extractor=extractor, 2884 inject_on_first_request=model.inject_on_first_request or False, 2885 parameters=model.parameters or {}, 2886 ) 2887 2888 @staticmethod 2889 def create_page_increment( 2890 model: PageIncrementModel, config: Config, **kwargs: Any 2891 ) -> PageIncrement: 2892 return PageIncrement( 2893 page_size=model.page_size, 2894 config=config, 2895 start_from_page=model.start_from_page or 0, 2896 inject_on_first_request=model.inject_on_first_request or False, 2897 parameters=model.parameters or {}, 2898 ) 2899 2900 def create_parent_stream_config( 2901 self, model: ParentStreamConfigModel, config: Config, *, stream_name: str, **kwargs: Any 2902 ) -> ParentStreamConfig: 2903 declarative_stream = self._create_component_from_model( 2904 model.stream, 2905 config=config, 2906 is_parent=True, 2907 **kwargs, 2908 ) 2909 request_option = ( 2910 self._create_component_from_model(model.request_option, config=config) 2911 if model.request_option 2912 else None 2913 ) 2914 2915 if model.lazy_read_pointer and any("*" in pointer for pointer in model.lazy_read_pointer): 2916 raise ValueError( 2917 "The '*' wildcard in 'lazy_read_pointer' is not supported — only direct paths are allowed." 2918 ) 2919 2920 model_lazy_read_pointer: List[Union[InterpolatedString, str]] = ( 2921 [x for x in model.lazy_read_pointer] if model.lazy_read_pointer else [] 2922 ) 2923 2924 return ParentStreamConfig( 2925 parent_key=model.parent_key, 2926 request_option=request_option, 2927 stream=declarative_stream, 2928 partition_field=model.partition_field, 2929 config=config, 2930 incremental_dependency=model.incremental_dependency or False, 2931 parameters=model.parameters or {}, 2932 extra_fields=model.extra_fields, 2933 lazy_read_pointer=model_lazy_read_pointer, 2934 ) 2935 2936 def create_properties_from_endpoint( 2937 self, model: PropertiesFromEndpointModel, config: Config, **kwargs: Any 2938 ) -> PropertiesFromEndpoint: 2939 retriever = self._create_component_from_model( 2940 model=model.retriever, 2941 config=config, 2942 name="dynamic_properties", 2943 primary_key=None, 2944 stream_slicer=None, 2945 transformations=[], 2946 use_cache=True, # Enable caching on the HttpRequester/HttpClient because the properties endpoint will be called for every slice being processed, and it is highly unlikely for the response to different 2947 ) 2948 return PropertiesFromEndpoint( 2949 property_field_path=model.property_field_path, 2950 retriever=retriever, 2951 config=config, 2952 parameters=model.parameters or {}, 2953 ) 2954 2955 def create_property_chunking( 2956 self, model: PropertyChunkingModel, config: Config, **kwargs: Any 2957 ) -> PropertyChunking: 2958 record_merge_strategy = ( 2959 self._create_component_from_model( 2960 model=model.record_merge_strategy, config=config, **kwargs 2961 ) 2962 if model.record_merge_strategy 2963 else None 2964 ) 2965 2966 property_limit_type: PropertyLimitType 2967 match model.property_limit_type: 2968 case PropertyLimitTypeModel.property_count: 2969 property_limit_type = PropertyLimitType.property_count 2970 case PropertyLimitTypeModel.characters: 2971 property_limit_type = PropertyLimitType.characters 2972 case _: 2973 raise ValueError(f"Invalid PropertyLimitType {property_limit_type}") 2974 2975 return PropertyChunking( 2976 property_limit_type=property_limit_type, 2977 property_limit=model.property_limit, 2978 record_merge_strategy=record_merge_strategy, 2979 config=config, 2980 parameters=model.parameters or {}, 2981 ) 2982 2983 def create_query_properties( 2984 self, model: QueryPropertiesModel, config: Config, **kwargs: Any 2985 ) -> QueryProperties: 2986 if isinstance(model.property_list, list): 2987 property_list = model.property_list 2988 else: 2989 property_list = self._create_component_from_model( 2990 model=model.property_list, config=config, **kwargs 2991 ) 2992 2993 property_chunking = ( 2994 self._create_component_from_model( 2995 model=model.property_chunking, config=config, **kwargs 2996 ) 2997 if model.property_chunking 2998 else None 2999 ) 3000 3001 return QueryProperties( 3002 property_list=property_list, 3003 always_include_properties=model.always_include_properties, 3004 property_chunking=property_chunking, 3005 config=config, 3006 parameters=model.parameters or {}, 3007 ) 3008 3009 @staticmethod 3010 def create_record_filter( 3011 model: RecordFilterModel, config: Config, **kwargs: Any 3012 ) -> RecordFilter: 3013 return RecordFilter( 3014 condition=model.condition or "", config=config, parameters=model.parameters or {} 3015 ) 3016 3017 @staticmethod 3018 def create_request_path(model: RequestPathModel, config: Config, **kwargs: Any) -> RequestPath: 3019 return RequestPath(parameters={}) 3020 3021 @staticmethod 3022 def create_request_option( 3023 model: RequestOptionModel, config: Config, **kwargs: Any 3024 ) -> RequestOption: 3025 inject_into = RequestOptionType(model.inject_into.value) 3026 field_path: Optional[List[Union[InterpolatedString, str]]] = ( 3027 [ 3028 InterpolatedString.create(segment, parameters=kwargs.get("parameters", {})) 3029 for segment in model.field_path 3030 ] 3031 if model.field_path 3032 else None 3033 ) 3034 field_name = ( 3035 InterpolatedString.create(model.field_name, parameters=kwargs.get("parameters", {})) 3036 if model.field_name 3037 else None 3038 ) 3039 return RequestOption( 3040 field_name=field_name, 3041 field_path=field_path, 3042 inject_into=inject_into, 3043 parameters=kwargs.get("parameters", {}), 3044 ) 3045 3046 def create_record_selector( 3047 self, 3048 model: RecordSelectorModel, 3049 config: Config, 3050 *, 3051 name: str, 3052 transformations: List[RecordTransformation] | None = None, 3053 decoder: Decoder | None = None, 3054 client_side_incremental_sync: Dict[str, Any] | None = None, 3055 file_uploader: Optional[DefaultFileUploader] = None, 3056 **kwargs: Any, 3057 ) -> RecordSelector: 3058 extractor = self._create_component_from_model( 3059 model=model.extractor, decoder=decoder, config=config 3060 ) 3061 record_filter = ( 3062 self._create_component_from_model(model.record_filter, config=config) 3063 if model.record_filter 3064 else None 3065 ) 3066 3067 transform_before_filtering = ( 3068 False if model.transform_before_filtering is None else model.transform_before_filtering 3069 ) 3070 if client_side_incremental_sync: 3071 record_filter = ClientSideIncrementalRecordFilterDecorator( 3072 config=config, 3073 parameters=model.parameters, 3074 condition=model.record_filter.condition 3075 if (model.record_filter and hasattr(model.record_filter, "condition")) 3076 else None, 3077 **client_side_incremental_sync, 3078 ) 3079 transform_before_filtering = ( 3080 True 3081 if model.transform_before_filtering is None 3082 else model.transform_before_filtering 3083 ) 3084 3085 if model.schema_normalization is None: 3086 # default to no schema normalization if not set 3087 model.schema_normalization = SchemaNormalizationModel.None_ 3088 3089 schema_normalization = ( 3090 TypeTransformer(SCHEMA_TRANSFORMER_TYPE_MAPPING[model.schema_normalization]) 3091 if isinstance(model.schema_normalization, SchemaNormalizationModel) 3092 else self._create_component_from_model(model.schema_normalization, config=config) # type: ignore[arg-type] # custom normalization model expected here 3093 ) 3094 3095 return RecordSelector( 3096 extractor=extractor, 3097 name=name, 3098 config=config, 3099 record_filter=record_filter, 3100 transformations=transformations or [], 3101 file_uploader=file_uploader, 3102 schema_normalization=schema_normalization, 3103 parameters=model.parameters or {}, 3104 transform_before_filtering=transform_before_filtering, 3105 ) 3106 3107 @staticmethod 3108 def create_remove_fields( 3109 model: RemoveFieldsModel, config: Config, **kwargs: Any 3110 ) -> RemoveFields: 3111 return RemoveFields( 3112 field_pointers=model.field_pointers, condition=model.condition or "", parameters={} 3113 ) 3114 3115 def create_selective_authenticator( 3116 self, model: SelectiveAuthenticatorModel, config: Config, **kwargs: Any 3117 ) -> DeclarativeAuthenticator: 3118 authenticators = { 3119 name: self._create_component_from_model(model=auth, config=config) 3120 for name, auth in model.authenticators.items() 3121 } 3122 # SelectiveAuthenticator will return instance of DeclarativeAuthenticator or raise ValueError error 3123 return SelectiveAuthenticator( # type: ignore[abstract] 3124 config=config, 3125 authenticators=authenticators, 3126 authenticator_selection_path=model.authenticator_selection_path, 3127 **kwargs, 3128 ) 3129 3130 @staticmethod 3131 def create_legacy_session_token_authenticator( 3132 model: LegacySessionTokenAuthenticatorModel, config: Config, *, url_base: str, **kwargs: Any 3133 ) -> LegacySessionTokenAuthenticator: 3134 return LegacySessionTokenAuthenticator( 3135 api_url=url_base, 3136 header=model.header, 3137 login_url=model.login_url, 3138 password=model.password or "", 3139 session_token=model.session_token or "", 3140 session_token_response_key=model.session_token_response_key or "", 3141 username=model.username or "", 3142 validate_session_url=model.validate_session_url, 3143 config=config, 3144 parameters=model.parameters or {}, 3145 ) 3146 3147 def create_simple_retriever( 3148 self, 3149 model: SimpleRetrieverModel, 3150 config: Config, 3151 *, 3152 name: str, 3153 primary_key: Optional[Union[str, List[str], List[List[str]]]], 3154 request_options_provider: Optional[RequestOptionsProvider] = None, 3155 stop_condition_cursor: Optional[Cursor] = None, 3156 client_side_incremental_sync: Optional[Dict[str, Any]] = None, 3157 transformations: List[RecordTransformation], 3158 file_uploader: Optional[DefaultFileUploader] = None, 3159 incremental_sync: Optional[ 3160 Union[IncrementingCountCursorModel, DatetimeBasedCursorModel] 3161 ] = None, 3162 use_cache: Optional[bool] = None, 3163 log_formatter: Optional[Callable[[Response], Any]] = None, 3164 partition_router: Optional[PartitionRouter] = None, 3165 **kwargs: Any, 3166 ) -> SimpleRetriever: 3167 def _get_url(req: Requester) -> str: 3168 """ 3169 Closure to get the URL from the requester. This is used to get the URL in the case of a lazy retriever. 3170 This is needed because the URL is not set until the requester is created. 3171 """ 3172 3173 _url: str = ( 3174 model.requester.url 3175 if hasattr(model.requester, "url") and model.requester.url is not None 3176 else req.get_url(stream_state=None, stream_slice=None, next_page_token=None) 3177 ) 3178 _url_base: str = ( 3179 model.requester.url_base 3180 if hasattr(model.requester, "url_base") and model.requester.url_base is not None 3181 else req.get_url_base(stream_state=None, stream_slice=None, next_page_token=None) 3182 ) 3183 3184 return _url or _url_base 3185 3186 decoder = ( 3187 self._create_component_from_model(model=model.decoder, config=config) 3188 if model.decoder 3189 else JsonDecoder(parameters={}) 3190 ) 3191 record_selector = self._create_component_from_model( 3192 model=model.record_selector, 3193 name=name, 3194 config=config, 3195 decoder=decoder, 3196 transformations=transformations, 3197 client_side_incremental_sync=client_side_incremental_sync, 3198 file_uploader=file_uploader, 3199 ) 3200 3201 query_properties: Optional[QueryProperties] = None 3202 query_properties_key: Optional[str] = None 3203 if self._query_properties_in_request_parameters(model.requester): 3204 # It is better to be explicit about an error if PropertiesFromEndpoint is defined in multiple 3205 # places instead of default to request_parameters which isn't clearly documented 3206 if ( 3207 hasattr(model.requester, "fetch_properties_from_endpoint") 3208 and model.requester.fetch_properties_from_endpoint 3209 ): 3210 raise ValueError( 3211 f"PropertiesFromEndpoint should only be specified once per stream, but found in {model.requester.type}.fetch_properties_from_endpoint and {model.requester.type}.request_parameters" 3212 ) 3213 3214 query_properties_definitions = [] 3215 for key, request_parameter in model.requester.request_parameters.items(): # type: ignore # request_parameters is already validated to be a Mapping using _query_properties_in_request_parameters() 3216 if isinstance(request_parameter, QueryPropertiesModel): 3217 query_properties_key = key 3218 query_properties_definitions.append(request_parameter) 3219 3220 if len(query_properties_definitions) > 1: 3221 raise ValueError( 3222 f"request_parameters only supports defining one QueryProperties field, but found {len(query_properties_definitions)} usages" 3223 ) 3224 3225 if len(query_properties_definitions) == 1: 3226 query_properties = self._create_component_from_model( 3227 model=query_properties_definitions[0], config=config 3228 ) 3229 elif ( 3230 hasattr(model.requester, "fetch_properties_from_endpoint") 3231 and model.requester.fetch_properties_from_endpoint 3232 ): 3233 # todo: Deprecate this condition once dependent connectors migrate to query_properties 3234 query_properties_definition = QueryPropertiesModel( 3235 type="QueryProperties", 3236 property_list=model.requester.fetch_properties_from_endpoint, 3237 always_include_properties=None, 3238 property_chunking=None, 3239 ) # type: ignore # $parameters has a default value 3240 3241 query_properties = self.create_query_properties( 3242 model=query_properties_definition, 3243 config=config, 3244 ) 3245 elif hasattr(model.requester, "query_properties") and model.requester.query_properties: 3246 query_properties = self.create_query_properties( 3247 model=model.requester.query_properties, 3248 config=config, 3249 ) 3250 3251 requester = self._create_component_from_model( 3252 model=model.requester, 3253 decoder=decoder, 3254 name=name, 3255 query_properties_key=query_properties_key, 3256 use_cache=use_cache, 3257 config=config, 3258 ) 3259 3260 if not request_options_provider: 3261 request_options_provider = DefaultRequestOptionsProvider(parameters={}) 3262 if isinstance(request_options_provider, DefaultRequestOptionsProvider) and isinstance( 3263 partition_router, PartitionRouter 3264 ): 3265 request_options_provider = partition_router 3266 3267 paginator = ( 3268 self._create_component_from_model( 3269 model=model.paginator, 3270 config=config, 3271 url_base=_get_url(requester), 3272 extractor_model=model.record_selector.extractor, 3273 decoder=decoder, 3274 cursor_used_for_stop_condition=stop_condition_cursor or None, 3275 ) 3276 if model.paginator 3277 else NoPagination(parameters={}) 3278 ) 3279 3280 ignore_stream_slicer_parameters_on_paginated_requests = ( 3281 model.ignore_stream_slicer_parameters_on_paginated_requests or False 3282 ) 3283 3284 if ( 3285 model.partition_router 3286 and isinstance(model.partition_router, SubstreamPartitionRouterModel) 3287 and not bool(self._connector_state_manager.get_stream_state(name, None)) 3288 and any( 3289 parent_stream_config.lazy_read_pointer 3290 for parent_stream_config in model.partition_router.parent_stream_configs 3291 ) 3292 ): 3293 if incremental_sync: 3294 if incremental_sync.type != "DatetimeBasedCursor": 3295 raise ValueError( 3296 f"LazySimpleRetriever only supports DatetimeBasedCursor. Found: {incremental_sync.type}." 3297 ) 3298 3299 elif incremental_sync.step or incremental_sync.cursor_granularity: 3300 raise ValueError( 3301 f"Found more that one slice per parent. LazySimpleRetriever only supports single slice read for stream - {name}." 3302 ) 3303 3304 if model.decoder and model.decoder.type != "JsonDecoder": 3305 raise ValueError( 3306 f"LazySimpleRetriever only supports JsonDecoder. Found: {model.decoder.type}." 3307 ) 3308 3309 return LazySimpleRetriever( 3310 name=name, 3311 paginator=paginator, 3312 primary_key=primary_key, 3313 requester=requester, 3314 record_selector=record_selector, 3315 stream_slicer=_NO_STREAM_SLICING, 3316 request_option_provider=request_options_provider, 3317 cursor=None, 3318 config=config, 3319 ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests, 3320 parameters=model.parameters or {}, 3321 ) 3322 3323 return SimpleRetriever( 3324 name=name, 3325 paginator=paginator, 3326 primary_key=primary_key, 3327 requester=requester, 3328 record_selector=record_selector, 3329 stream_slicer=_NO_STREAM_SLICING, 3330 request_option_provider=request_options_provider, 3331 cursor=None, 3332 config=config, 3333 ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests, 3334 additional_query_properties=query_properties, 3335 log_formatter=self._get_log_formatter(log_formatter, name), 3336 parameters=model.parameters or {}, 3337 ) 3338 3339 def _get_log_formatter( 3340 self, log_formatter: Callable[[Response], Any] | None, name: str 3341 ) -> Callable[[Response], Any] | None: 3342 if self._should_limit_slices_fetched(): 3343 return ( 3344 ( 3345 lambda response: format_http_message( 3346 response, 3347 f"Stream '{name}' request", 3348 f"Request performed in order to extract records for stream '{name}'", 3349 name, 3350 ) 3351 ) 3352 if not log_formatter 3353 else log_formatter 3354 ) 3355 return None 3356 3357 def _should_limit_slices_fetched(self) -> bool: 3358 """ 3359 Returns True if the number of slices fetched should be limited, False otherwise. 3360 This is used to limit the number of slices fetched during tests. 3361 """ 3362 return bool(self._limit_slices_fetched or self._emit_connector_builder_messages) 3363 3364 @staticmethod 3365 def _query_properties_in_request_parameters( 3366 requester: Union[HttpRequesterModel, CustomRequesterModel], 3367 ) -> bool: 3368 if not hasattr(requester, "request_parameters"): 3369 return False 3370 request_parameters = requester.request_parameters 3371 if request_parameters and isinstance(request_parameters, Mapping): 3372 for request_parameter in request_parameters.values(): 3373 if isinstance(request_parameter, QueryPropertiesModel): 3374 return True 3375 return False 3376 3377 @staticmethod 3378 def _remove_query_properties( 3379 request_parameters: Mapping[str, Union[str, QueryPropertiesModel]], 3380 ) -> Mapping[str, str]: 3381 return { 3382 parameter_field: request_parameter 3383 for parameter_field, request_parameter in request_parameters.items() 3384 if not isinstance(request_parameter, QueryPropertiesModel) 3385 } 3386 3387 def create_state_delegating_stream( 3388 self, 3389 model: StateDelegatingStreamModel, 3390 config: Config, 3391 has_parent_state: Optional[bool] = None, 3392 **kwargs: Any, 3393 ) -> DeclarativeStream: 3394 if ( 3395 model.full_refresh_stream.name != model.name 3396 or model.name != model.incremental_stream.name 3397 ): 3398 raise ValueError( 3399 f"state_delegating_stream, full_refresh_stream name and incremental_stream must have equal names. Instead has {model.name}, {model.full_refresh_stream.name} and {model.incremental_stream.name}." 3400 ) 3401 3402 stream_model = self._get_state_delegating_stream_model( 3403 False if has_parent_state is None else has_parent_state, model 3404 ) 3405 3406 return self._create_component_from_model(stream_model, config=config, **kwargs) # type: ignore[no-any-return] # DeclarativeStream will be created as stream_model is alwyas DeclarativeStreamModel 3407 3408 def _get_state_delegating_stream_model( 3409 self, has_parent_state: bool, model: StateDelegatingStreamModel 3410 ) -> DeclarativeStreamModel: 3411 return ( 3412 model.incremental_stream 3413 if self._connector_state_manager.get_stream_state(model.name, None) or has_parent_state 3414 else model.full_refresh_stream 3415 ) 3416 3417 def _create_async_job_status_mapping( 3418 self, model: AsyncJobStatusMapModel, config: Config, **kwargs: Any 3419 ) -> Mapping[str, AsyncJobStatus]: 3420 api_status_to_cdk_status = {} 3421 for cdk_status, api_statuses in model.dict().items(): 3422 if cdk_status == "type": 3423 # This is an element of the dict because of the typing of the CDK but it is not a CDK status 3424 continue 3425 3426 for status in api_statuses: 3427 if status in api_status_to_cdk_status: 3428 raise ValueError( 3429 f"API status {status} is already set for CDK status {cdk_status}. Please ensure API statuses are only provided once" 3430 ) 3431 api_status_to_cdk_status[status] = self._get_async_job_status(cdk_status) 3432 return api_status_to_cdk_status 3433 3434 def _get_async_job_status(self, status: str) -> AsyncJobStatus: 3435 match status: 3436 case "running": 3437 return AsyncJobStatus.RUNNING 3438 case "completed": 3439 return AsyncJobStatus.COMPLETED 3440 case "failed": 3441 return AsyncJobStatus.FAILED 3442 case "timeout": 3443 return AsyncJobStatus.TIMED_OUT 3444 case _: 3445 raise ValueError(f"Unsupported CDK status {status}") 3446 3447 def create_async_retriever( 3448 self, 3449 model: AsyncRetrieverModel, 3450 config: Config, 3451 *, 3452 name: str, 3453 primary_key: Optional[ 3454 Union[str, List[str], List[List[str]]] 3455 ], # this seems to be needed to match create_simple_retriever 3456 stream_slicer: Optional[StreamSlicer], 3457 client_side_incremental_sync: Optional[Dict[str, Any]] = None, 3458 transformations: List[RecordTransformation], 3459 **kwargs: Any, 3460 ) -> AsyncRetriever: 3461 if model.download_target_requester and not model.download_target_extractor: 3462 raise ValueError( 3463 f"`download_target_extractor` required if using a `download_target_requester`" 3464 ) 3465 3466 def _get_download_retriever( 3467 requester: Requester, extractor: RecordExtractor, _decoder: Decoder 3468 ) -> SimpleRetriever: 3469 # We create a record selector for the download retriever 3470 # with no schema normalization and no transformations, neither record filter 3471 # as all this occurs in the record_selector of the AsyncRetriever 3472 record_selector = RecordSelector( 3473 extractor=extractor, 3474 name=name, 3475 record_filter=None, 3476 transformations=[], 3477 schema_normalization=TypeTransformer(TransformConfig.NoTransform), 3478 config=config, 3479 parameters={}, 3480 ) 3481 paginator = ( 3482 self._create_component_from_model( 3483 model=model.download_paginator, 3484 decoder=_decoder, 3485 config=config, 3486 url_base="", 3487 ) 3488 if model.download_paginator 3489 else NoPagination(parameters={}) 3490 ) 3491 3492 return SimpleRetriever( 3493 requester=requester, 3494 record_selector=record_selector, 3495 primary_key=None, 3496 name=name, 3497 paginator=paginator, 3498 config=config, 3499 parameters={}, 3500 log_formatter=self._get_log_formatter(None, name), 3501 ) 3502 3503 def _get_job_timeout() -> datetime.timedelta: 3504 user_defined_timeout: Optional[int] = ( 3505 int( 3506 InterpolatedString.create( 3507 str(model.polling_job_timeout), 3508 parameters={}, 3509 ).eval(config) 3510 ) 3511 if model.polling_job_timeout 3512 else None 3513 ) 3514 3515 # check for user defined timeout during the test read or 15 minutes 3516 test_read_timeout = datetime.timedelta(minutes=user_defined_timeout or 15) 3517 # default value for non-connector builder is 60 minutes. 3518 default_sync_timeout = datetime.timedelta(minutes=user_defined_timeout or 60) 3519 3520 return ( 3521 test_read_timeout if self._emit_connector_builder_messages else default_sync_timeout 3522 ) 3523 3524 decoder = ( 3525 self._create_component_from_model(model=model.decoder, config=config) 3526 if model.decoder 3527 else JsonDecoder(parameters={}) 3528 ) 3529 record_selector = self._create_component_from_model( 3530 model=model.record_selector, 3531 config=config, 3532 decoder=decoder, 3533 name=name, 3534 transformations=transformations, 3535 client_side_incremental_sync=client_side_incremental_sync, 3536 ) 3537 3538 stream_slicer = stream_slicer or SinglePartitionRouter(parameters={}) 3539 if self._should_limit_slices_fetched(): 3540 stream_slicer = cast( 3541 StreamSlicer, 3542 StreamSlicerTestReadDecorator( 3543 wrapped_slicer=stream_slicer, 3544 maximum_number_of_slices=self._limit_slices_fetched or 5, 3545 ), 3546 ) 3547 3548 creation_requester = self._create_component_from_model( 3549 model=model.creation_requester, 3550 decoder=decoder, 3551 config=config, 3552 name=f"job creation - {name}", 3553 ) 3554 polling_requester = self._create_component_from_model( 3555 model=model.polling_requester, 3556 decoder=decoder, 3557 config=config, 3558 name=f"job polling - {name}", 3559 ) 3560 job_download_components_name = f"job download - {name}" 3561 download_decoder = ( 3562 self._create_component_from_model(model=model.download_decoder, config=config) 3563 if model.download_decoder 3564 else JsonDecoder(parameters={}) 3565 ) 3566 download_extractor = ( 3567 self._create_component_from_model( 3568 model=model.download_extractor, 3569 config=config, 3570 decoder=download_decoder, 3571 parameters=model.parameters, 3572 ) 3573 if model.download_extractor 3574 else DpathExtractor( 3575 [], 3576 config=config, 3577 decoder=download_decoder, 3578 parameters=model.parameters or {}, 3579 ) 3580 ) 3581 download_requester = self._create_component_from_model( 3582 model=model.download_requester, 3583 decoder=download_decoder, 3584 config=config, 3585 name=job_download_components_name, 3586 ) 3587 download_retriever = _get_download_retriever( 3588 download_requester, download_extractor, download_decoder 3589 ) 3590 abort_requester = ( 3591 self._create_component_from_model( 3592 model=model.abort_requester, 3593 decoder=decoder, 3594 config=config, 3595 name=f"job abort - {name}", 3596 ) 3597 if model.abort_requester 3598 else None 3599 ) 3600 delete_requester = ( 3601 self._create_component_from_model( 3602 model=model.delete_requester, 3603 decoder=decoder, 3604 config=config, 3605 name=f"job delete - {name}", 3606 ) 3607 if model.delete_requester 3608 else None 3609 ) 3610 download_target_requester = ( 3611 self._create_component_from_model( 3612 model=model.download_target_requester, 3613 decoder=decoder, 3614 config=config, 3615 name=f"job extract_url - {name}", 3616 ) 3617 if model.download_target_requester 3618 else None 3619 ) 3620 status_extractor = self._create_component_from_model( 3621 model=model.status_extractor, decoder=decoder, config=config, name=name 3622 ) 3623 download_target_extractor = ( 3624 self._create_component_from_model( 3625 model=model.download_target_extractor, 3626 decoder=decoder, 3627 config=config, 3628 name=name, 3629 ) 3630 if model.download_target_extractor 3631 else None 3632 ) 3633 3634 job_repository: AsyncJobRepository = AsyncHttpJobRepository( 3635 creation_requester=creation_requester, 3636 polling_requester=polling_requester, 3637 download_retriever=download_retriever, 3638 download_target_requester=download_target_requester, 3639 abort_requester=abort_requester, 3640 delete_requester=delete_requester, 3641 status_extractor=status_extractor, 3642 status_mapping=self._create_async_job_status_mapping(model.status_mapping, config), 3643 download_target_extractor=download_target_extractor, 3644 job_timeout=_get_job_timeout(), 3645 ) 3646 3647 async_job_partition_router = AsyncJobPartitionRouter( 3648 job_orchestrator_factory=lambda stream_slices: AsyncJobOrchestrator( 3649 job_repository, 3650 stream_slices, 3651 self._job_tracker, 3652 self._message_repository, 3653 # FIXME work would need to be done here in order to detect if a stream as a parent stream that is bulk 3654 has_bulk_parent=False, 3655 # set the `job_max_retry` to 1 for the `Connector Builder`` use-case. 3656 # `None` == default retry is set to 3 attempts, under the hood. 3657 job_max_retry=1 if self._emit_connector_builder_messages else None, 3658 ), 3659 stream_slicer=stream_slicer, 3660 config=config, 3661 parameters=model.parameters or {}, 3662 ) 3663 3664 return AsyncRetriever( 3665 record_selector=record_selector, 3666 stream_slicer=async_job_partition_router, 3667 config=config, 3668 parameters=model.parameters or {}, 3669 ) 3670 3671 def create_spec(self, model: SpecModel, config: Config, **kwargs: Any) -> Spec: 3672 config_migrations = [ 3673 self._create_component_from_model(migration, config) 3674 for migration in ( 3675 model.config_normalization_rules.config_migrations 3676 if ( 3677 model.config_normalization_rules 3678 and model.config_normalization_rules.config_migrations 3679 ) 3680 else [] 3681 ) 3682 ] 3683 config_transformations = [ 3684 self._create_component_from_model(transformation, config) 3685 for transformation in ( 3686 model.config_normalization_rules.transformations 3687 if ( 3688 model.config_normalization_rules 3689 and model.config_normalization_rules.transformations 3690 ) 3691 else [] 3692 ) 3693 ] 3694 config_validations = [ 3695 self._create_component_from_model(validation, config) 3696 for validation in ( 3697 model.config_normalization_rules.validations 3698 if ( 3699 model.config_normalization_rules 3700 and model.config_normalization_rules.validations 3701 ) 3702 else [] 3703 ) 3704 ] 3705 3706 return Spec( 3707 connection_specification=model.connection_specification, 3708 documentation_url=model.documentation_url, 3709 advanced_auth=model.advanced_auth, 3710 parameters={}, 3711 config_migrations=config_migrations, 3712 config_transformations=config_transformations, 3713 config_validations=config_validations, 3714 ) 3715 3716 def create_substream_partition_router( 3717 self, 3718 model: SubstreamPartitionRouterModel, 3719 config: Config, 3720 *, 3721 stream_name: str, 3722 **kwargs: Any, 3723 ) -> SubstreamPartitionRouter: 3724 parent_stream_configs = [] 3725 if model.parent_stream_configs: 3726 parent_stream_configs.extend( 3727 [ 3728 self.create_parent_stream_config_with_substream_wrapper( 3729 model=parent_stream_config, config=config, stream_name=stream_name, **kwargs 3730 ) 3731 for parent_stream_config in model.parent_stream_configs 3732 ] 3733 ) 3734 3735 return SubstreamPartitionRouter( 3736 parent_stream_configs=parent_stream_configs, 3737 parameters=model.parameters or {}, 3738 config=config, 3739 ) 3740 3741 def create_parent_stream_config_with_substream_wrapper( 3742 self, model: ParentStreamConfigModel, config: Config, *, stream_name: str, **kwargs: Any 3743 ) -> Any: 3744 # getting the parent state 3745 child_state = self._connector_state_manager.get_stream_state(stream_name, None) 3746 3747 # This flag will be used exclusively for StateDelegatingStream when a parent stream is created 3748 has_parent_state = bool( 3749 self._connector_state_manager.get_stream_state(stream_name, None) 3750 if model.incremental_dependency 3751 else False 3752 ) 3753 connector_state_manager = self._instantiate_parent_stream_state_manager( 3754 child_state, config, model, has_parent_state 3755 ) 3756 3757 substream_factory = ModelToComponentFactory( 3758 connector_state_manager=connector_state_manager, 3759 limit_pages_fetched_per_slice=self._limit_pages_fetched_per_slice, 3760 limit_slices_fetched=self._limit_slices_fetched, 3761 emit_connector_builder_messages=self._emit_connector_builder_messages, 3762 disable_retries=self._disable_retries, 3763 disable_cache=self._disable_cache, 3764 message_repository=StateFilteringMessageRepository( 3765 LogAppenderMessageRepositoryDecorator( 3766 { 3767 "airbyte_cdk": {"stream": {"is_substream": True}}, 3768 "http": {"is_auxiliary": True}, 3769 }, 3770 self._message_repository, 3771 self._evaluate_log_level(self._emit_connector_builder_messages), 3772 ), 3773 ), 3774 ) 3775 3776 return substream_factory.create_parent_stream_config( 3777 model=model, config=config, stream_name=stream_name, **kwargs 3778 ) 3779 3780 def _instantiate_parent_stream_state_manager( 3781 self, 3782 child_state: MutableMapping[str, Any], 3783 config: Config, 3784 model: ParentStreamConfigModel, 3785 has_parent_state: bool, 3786 ) -> ConnectorStateManager: 3787 """ 3788 With DefaultStream, the state needs to be provided during __init__ of the cursor as opposed to the 3789 `set_initial_state` flow that existed for the declarative cursors. This state is taken from 3790 self._connector_state_manager.get_stream_state (`self` being a newly created ModelToComponentFactory to account 3791 for the MessageRepository being different). So we need to pass a ConnectorStateManager to the 3792 ModelToComponentFactory that has the parent states. This method populates this if there is a child state and if 3793 incremental_dependency is set. 3794 """ 3795 if model.incremental_dependency and child_state: 3796 parent_stream_name = model.stream.name or "" 3797 parent_state = ConcurrentPerPartitionCursor.get_parent_state( 3798 child_state, parent_stream_name 3799 ) 3800 3801 if not parent_state: 3802 # there are two migration cases: state value from child stream or from global state 3803 parent_state = ConcurrentPerPartitionCursor.get_global_state( 3804 child_state, parent_stream_name 3805 ) 3806 3807 if not parent_state and not isinstance(parent_state, dict): 3808 cursor_values = child_state.values() 3809 if cursor_values: 3810 incremental_sync_model: Union[ 3811 DatetimeBasedCursorModel, 3812 IncrementingCountCursorModel, 3813 ] = ( 3814 model.stream.incremental_sync # type: ignore # if we are there, it is because there is incremental_dependency and therefore there is an incremental_sync on the parent stream 3815 if isinstance(model.stream, DeclarativeStreamModel) 3816 else self._get_state_delegating_stream_model( 3817 has_parent_state, model.stream 3818 ).incremental_sync 3819 ) 3820 cursor_field = InterpolatedString.create( 3821 incremental_sync_model.cursor_field, 3822 parameters=incremental_sync_model.parameters or {}, 3823 ).eval(config) 3824 parent_state = AirbyteStateMessage( 3825 type=AirbyteStateType.STREAM, 3826 stream=AirbyteStreamState( 3827 stream_descriptor=StreamDescriptor( 3828 name=parent_stream_name, namespace=None 3829 ), 3830 stream_state=AirbyteStateBlob( 3831 {cursor_field: list(cursor_values)[0]} 3832 ), 3833 ), 3834 ) 3835 return ConnectorStateManager([parent_state] if parent_state else []) 3836 3837 return ConnectorStateManager([]) 3838 3839 @staticmethod 3840 def create_wait_time_from_header( 3841 model: WaitTimeFromHeaderModel, config: Config, **kwargs: Any 3842 ) -> WaitTimeFromHeaderBackoffStrategy: 3843 return WaitTimeFromHeaderBackoffStrategy( 3844 header=model.header, 3845 parameters=model.parameters or {}, 3846 config=config, 3847 regex=model.regex, 3848 max_waiting_time_in_seconds=model.max_waiting_time_in_seconds 3849 if model.max_waiting_time_in_seconds is not None 3850 else None, 3851 ) 3852 3853 @staticmethod 3854 def create_wait_until_time_from_header( 3855 model: WaitUntilTimeFromHeaderModel, config: Config, **kwargs: Any 3856 ) -> WaitUntilTimeFromHeaderBackoffStrategy: 3857 return WaitUntilTimeFromHeaderBackoffStrategy( 3858 header=model.header, 3859 parameters=model.parameters or {}, 3860 config=config, 3861 min_wait=model.min_wait, 3862 regex=model.regex, 3863 ) 3864 3865 def get_message_repository(self) -> MessageRepository: 3866 return self._message_repository 3867 3868 def _evaluate_log_level(self, emit_connector_builder_messages: bool) -> Level: 3869 return Level.DEBUG if emit_connector_builder_messages else Level.INFO 3870 3871 @staticmethod 3872 def create_components_mapping_definition( 3873 model: ComponentMappingDefinitionModel, config: Config, **kwargs: Any 3874 ) -> ComponentMappingDefinition: 3875 interpolated_value = InterpolatedString.create( 3876 model.value, parameters=model.parameters or {} 3877 ) 3878 field_path = [ 3879 InterpolatedString.create(path, parameters=model.parameters or {}) 3880 for path in model.field_path 3881 ] 3882 return ComponentMappingDefinition( 3883 field_path=field_path, # type: ignore[arg-type] # field_path can be str and InterpolatedString 3884 value=interpolated_value, 3885 value_type=ModelToComponentFactory._json_schema_type_name_to_type(model.value_type), 3886 create_or_update=model.create_or_update, 3887 condition=model.condition, 3888 parameters=model.parameters or {}, 3889 ) 3890 3891 def create_http_components_resolver( 3892 self, model: HttpComponentsResolverModel, config: Config, stream_name: Optional[str] = None 3893 ) -> Any: 3894 retriever = self._create_component_from_model( 3895 model=model.retriever, 3896 config=config, 3897 name=f"{stream_name if stream_name else '__http_components_resolver'}", 3898 primary_key=None, 3899 stream_slicer=self._build_stream_slicer_from_partition_router(model.retriever, config), 3900 transformations=[], 3901 ) 3902 3903 components_mapping = [] 3904 for component_mapping_definition_model in model.components_mapping: 3905 if component_mapping_definition_model.condition: 3906 raise ValueError("`condition` is only supported for `ConfigComponentsResolver`") 3907 components_mapping.append( 3908 self._create_component_from_model( 3909 model=component_mapping_definition_model, 3910 value_type=ModelToComponentFactory._json_schema_type_name_to_type( 3911 component_mapping_definition_model.value_type 3912 ), 3913 config=config, 3914 ) 3915 ) 3916 3917 return HttpComponentsResolver( 3918 retriever=retriever, 3919 stream_slicer=self._build_stream_slicer_from_partition_router(model.retriever, config), 3920 config=config, 3921 components_mapping=components_mapping, 3922 parameters=model.parameters or {}, 3923 ) 3924 3925 @staticmethod 3926 def create_stream_config( 3927 model: StreamConfigModel, config: Config, **kwargs: Any 3928 ) -> StreamConfig: 3929 model_configs_pointer: List[Union[InterpolatedString, str]] = ( 3930 [x for x in model.configs_pointer] if model.configs_pointer else [] 3931 ) 3932 3933 return StreamConfig( 3934 configs_pointer=model_configs_pointer, 3935 default_values=model.default_values, 3936 parameters=model.parameters or {}, 3937 ) 3938 3939 def create_config_components_resolver( 3940 self, 3941 model: ConfigComponentsResolverModel, 3942 config: Config, 3943 ) -> Any: 3944 model_stream_configs = ( 3945 model.stream_config if isinstance(model.stream_config, list) else [model.stream_config] 3946 ) 3947 3948 stream_configs = [ 3949 self._create_component_from_model( 3950 stream_config, config=config, parameters=model.parameters or {} 3951 ) 3952 for stream_config in model_stream_configs 3953 ] 3954 3955 components_mapping = [ 3956 self._create_component_from_model( 3957 model=components_mapping_definition_model, 3958 value_type=ModelToComponentFactory._json_schema_type_name_to_type( 3959 components_mapping_definition_model.value_type 3960 ), 3961 config=config, 3962 parameters=model.parameters, 3963 ) 3964 for components_mapping_definition_model in model.components_mapping 3965 ] 3966 3967 return ConfigComponentsResolver( 3968 stream_configs=stream_configs, 3969 config=config, 3970 components_mapping=components_mapping, 3971 parameters=model.parameters or {}, 3972 ) 3973 3974 def create_parametrized_components_resolver( 3975 self, 3976 model: ParametrizedComponentsResolverModel, 3977 config: Config, 3978 ) -> ParametrizedComponentsResolver: 3979 stream_parameters = StreamParametersDefinition( 3980 list_of_parameters_for_stream=model.stream_parameters.list_of_parameters_for_stream 3981 ) 3982 3983 components_mapping = [] 3984 for components_mapping_definition_model in model.components_mapping: 3985 if components_mapping_definition_model.condition: 3986 raise ValueError("`condition` is only supported for `ConfigComponentsResolver`") 3987 components_mapping.append( 3988 self._create_component_from_model( 3989 model=components_mapping_definition_model, 3990 value_type=ModelToComponentFactory._json_schema_type_name_to_type( 3991 components_mapping_definition_model.value_type 3992 ), 3993 config=config, 3994 ) 3995 ) 3996 return ParametrizedComponentsResolver( 3997 stream_parameters=stream_parameters, 3998 config=config, 3999 components_mapping=components_mapping, 4000 parameters=model.parameters or {}, 4001 ) 4002 4003 _UNSUPPORTED_DECODER_ERROR = ( 4004 "Specified decoder of {decoder_type} is not supported for pagination." 4005 "Please set as `JsonDecoder`, `XmlDecoder`, or a `CompositeRawDecoder` with an inner_parser of `JsonParser` or `GzipParser` instead." 4006 "If using `GzipParser`, please ensure that the lowest level inner_parser is a `JsonParser`." 4007 ) 4008 4009 def _is_supported_decoder_for_pagination(self, decoder: Decoder) -> bool: 4010 if isinstance(decoder, (JsonDecoder, XmlDecoder)): 4011 return True 4012 elif isinstance(decoder, CompositeRawDecoder): 4013 return self._is_supported_parser_for_pagination(decoder.parser) 4014 else: 4015 return False 4016 4017 def _is_supported_parser_for_pagination(self, parser: Parser) -> bool: 4018 if isinstance(parser, JsonParser): 4019 return True 4020 elif isinstance(parser, GzipParser): 4021 return isinstance(parser.inner_parser, JsonParser) 4022 else: 4023 return False 4024 4025 def create_http_api_budget( 4026 self, model: HTTPAPIBudgetModel, config: Config, **kwargs: Any 4027 ) -> HttpAPIBudget: 4028 policies = [ 4029 self._create_component_from_model(model=policy, config=config) 4030 for policy in model.policies 4031 ] 4032 4033 return HttpAPIBudget( 4034 policies=policies, 4035 ratelimit_reset_header=model.ratelimit_reset_header or "ratelimit-reset", 4036 ratelimit_remaining_header=model.ratelimit_remaining_header or "ratelimit-remaining", 4037 status_codes_for_ratelimit_hit=model.status_codes_for_ratelimit_hit or [429], 4038 ) 4039 4040 def create_fixed_window_call_rate_policy( 4041 self, model: FixedWindowCallRatePolicyModel, config: Config, **kwargs: Any 4042 ) -> FixedWindowCallRatePolicy: 4043 matchers = [ 4044 self._create_component_from_model(model=matcher, config=config) 4045 for matcher in model.matchers 4046 ] 4047 4048 # Set the initial reset timestamp to 10 days from now. 4049 # This value will be updated by the first request. 4050 return FixedWindowCallRatePolicy( 4051 next_reset_ts=datetime.datetime.now() + datetime.timedelta(days=10), 4052 period=parse_duration(model.period), 4053 call_limit=model.call_limit, 4054 matchers=matchers, 4055 ) 4056 4057 def create_file_uploader( 4058 self, model: FileUploaderModel, config: Config, **kwargs: Any 4059 ) -> FileUploader: 4060 name = "File Uploader" 4061 requester = self._create_component_from_model( 4062 model=model.requester, 4063 config=config, 4064 name=name, 4065 **kwargs, 4066 ) 4067 download_target_extractor = self._create_component_from_model( 4068 model=model.download_target_extractor, 4069 config=config, 4070 name=name, 4071 **kwargs, 4072 ) 4073 emit_connector_builder_messages = self._emit_connector_builder_messages 4074 file_uploader = DefaultFileUploader( 4075 requester=requester, 4076 download_target_extractor=download_target_extractor, 4077 config=config, 4078 file_writer=NoopFileWriter() 4079 if emit_connector_builder_messages 4080 else LocalFileSystemFileWriter(), 4081 parameters=model.parameters or {}, 4082 filename_extractor=model.filename_extractor if model.filename_extractor else None, 4083 ) 4084 4085 return ( 4086 ConnectorBuilderFileUploader(file_uploader) 4087 if emit_connector_builder_messages 4088 else file_uploader 4089 ) 4090 4091 def create_moving_window_call_rate_policy( 4092 self, model: MovingWindowCallRatePolicyModel, config: Config, **kwargs: Any 4093 ) -> MovingWindowCallRatePolicy: 4094 rates = [ 4095 self._create_component_from_model(model=rate, config=config) for rate in model.rates 4096 ] 4097 matchers = [ 4098 self._create_component_from_model(model=matcher, config=config) 4099 for matcher in model.matchers 4100 ] 4101 return MovingWindowCallRatePolicy( 4102 rates=rates, 4103 matchers=matchers, 4104 ) 4105 4106 def create_unlimited_call_rate_policy( 4107 self, model: UnlimitedCallRatePolicyModel, config: Config, **kwargs: Any 4108 ) -> UnlimitedCallRatePolicy: 4109 matchers = [ 4110 self._create_component_from_model(model=matcher, config=config) 4111 for matcher in model.matchers 4112 ] 4113 4114 return UnlimitedCallRatePolicy( 4115 matchers=matchers, 4116 ) 4117 4118 def create_rate(self, model: RateModel, config: Config, **kwargs: Any) -> Rate: 4119 interpolated_limit = InterpolatedString.create(str(model.limit), parameters={}) 4120 return Rate( 4121 limit=int(interpolated_limit.eval(config=config)), 4122 interval=parse_duration(model.interval), 4123 ) 4124 4125 def create_http_request_matcher( 4126 self, model: HttpRequestRegexMatcherModel, config: Config, **kwargs: Any 4127 ) -> HttpRequestRegexMatcher: 4128 return HttpRequestRegexMatcher( 4129 method=model.method, 4130 url_base=model.url_base, 4131 url_path_pattern=model.url_path_pattern, 4132 params=model.params, 4133 headers=model.headers, 4134 ) 4135 4136 def set_api_budget(self, component_definition: ComponentDefinition, config: Config) -> None: 4137 self._api_budget = self.create_component( 4138 model_type=HTTPAPIBudgetModel, component_definition=component_definition, config=config 4139 ) 4140 4141 def create_grouping_partition_router( 4142 self, 4143 model: GroupingPartitionRouterModel, 4144 config: Config, 4145 *, 4146 stream_name: str, 4147 **kwargs: Any, 4148 ) -> GroupingPartitionRouter: 4149 underlying_router = self._create_component_from_model( 4150 model=model.underlying_partition_router, 4151 config=config, 4152 stream_name=stream_name, 4153 **kwargs, 4154 ) 4155 if model.group_size < 1: 4156 raise ValueError(f"Group size must be greater than 0, got {model.group_size}") 4157 4158 # Request options in underlying partition routers are not supported for GroupingPartitionRouter 4159 # because they are specific to individual partitions and cannot be aggregated or handled 4160 # when grouping, potentially leading to incorrect API calls. Any request customization 4161 # should be managed at the stream level through the requester's configuration. 4162 if isinstance(underlying_router, SubstreamPartitionRouter): 4163 if any( 4164 parent_config.request_option 4165 for parent_config in underlying_router.parent_stream_configs 4166 ): 4167 raise ValueError("Request options are not supported for GroupingPartitionRouter.") 4168 4169 if isinstance(underlying_router, ListPartitionRouter): 4170 if underlying_router.request_option: 4171 raise ValueError("Request options are not supported for GroupingPartitionRouter.") 4172 4173 return GroupingPartitionRouter( 4174 group_size=model.group_size, 4175 underlying_partition_router=underlying_router, 4176 deduplicate=model.deduplicate if model.deduplicate is not None else True, 4177 config=config, 4178 )
652 def __init__( 653 self, 654 limit_pages_fetched_per_slice: Optional[int] = None, 655 limit_slices_fetched: Optional[int] = None, 656 emit_connector_builder_messages: bool = False, 657 disable_retries: bool = False, 658 disable_cache: bool = False, 659 message_repository: Optional[MessageRepository] = None, 660 connector_state_manager: Optional[ConnectorStateManager] = None, 661 max_concurrent_async_job_count: Optional[int] = None, 662 ): 663 self._init_mappings() 664 self._limit_pages_fetched_per_slice = limit_pages_fetched_per_slice 665 self._limit_slices_fetched = limit_slices_fetched 666 self._emit_connector_builder_messages = emit_connector_builder_messages 667 self._disable_retries = disable_retries 668 self._disable_cache = disable_cache 669 self._message_repository = message_repository or InMemoryMessageRepository( 670 self._evaluate_log_level(emit_connector_builder_messages) 671 ) 672 self._connector_state_manager = connector_state_manager or ConnectorStateManager() 673 self._api_budget: Optional[Union[APIBudget, HttpAPIBudget]] = None 674 self._job_tracker: JobTracker = JobTracker(max_concurrent_async_job_count or 1) 675 # placeholder for deprecation warnings 676 self._collected_deprecation_logs: List[ConnectorBuilderLogMessage] = []
790 def create_component( 791 self, 792 model_type: Type[BaseModel], 793 component_definition: ComponentDefinition, 794 config: Config, 795 **kwargs: Any, 796 ) -> Any: 797 """ 798 Takes a given Pydantic model type and Mapping representing a component definition and creates a declarative component and 799 subcomponents which will be used at runtime. This is done by first parsing the mapping into a Pydantic model and then creating 800 creating declarative components from that model. 801 802 :param model_type: The type of declarative component that is being initialized 803 :param component_definition: The mapping that represents a declarative component 804 :param config: The connector config that is provided by the customer 805 :return: The declarative component to be used at runtime 806 """ 807 808 component_type = component_definition.get("type") 809 if component_definition.get("type") != model_type.__name__: 810 raise ValueError( 811 f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" 812 ) 813 814 declarative_component_model = model_type.parse_obj(component_definition) 815 816 if not isinstance(declarative_component_model, model_type): 817 raise ValueError( 818 f"Expected {model_type.__name__} component, but received {declarative_component_model.__class__.__name__}" 819 ) 820 821 return self._create_component_from_model( 822 model=declarative_component_model, config=config, **kwargs 823 )
Takes a given Pydantic model type and Mapping representing a component definition and creates a declarative component and subcomponents which will be used at runtime. This is done by first parsing the mapping into a Pydantic model and then creating creating declarative components from that model.
Parameters
- model_type: The type of declarative component that is being initialized
- component_definition: The mapping that represents a declarative component
- config: The connector config that is provided by the customer
Returns
The declarative component to be used at runtime
840 def get_model_deprecations(self) -> List[ConnectorBuilderLogMessage]: 841 """ 842 Returns the deprecation warnings that were collected during the creation of components. 843 """ 844 return self._collected_deprecation_logs
Returns the deprecation warnings that were collected during the creation of components.
861 def create_config_migration( 862 self, model: ConfigMigrationModel, config: Config 863 ) -> ConfigMigration: 864 transformations: List[ConfigTransformation] = [ 865 self._create_component_from_model(transformation, config) 866 for transformation in model.transformations 867 ] 868 869 return ConfigMigration( 870 description=model.description, 871 transformations=transformations, 872 )
874 def create_config_add_fields( 875 self, model: ConfigAddFieldsModel, config: Config, **kwargs: Any 876 ) -> ConfigAddFields: 877 fields = [self._create_component_from_model(field, config) for field in model.fields] 878 return ConfigAddFields( 879 fields=fields, 880 condition=model.condition or "", 881 )
930 @staticmethod 931 def create_added_field_definition( 932 model: AddedFieldDefinitionModel, config: Config, **kwargs: Any 933 ) -> AddedFieldDefinition: 934 interpolated_value = InterpolatedString.create( 935 model.value, parameters=model.parameters or {} 936 ) 937 return AddedFieldDefinition( 938 path=model.path, 939 value=interpolated_value, 940 value_type=ModelToComponentFactory._json_schema_type_name_to_type(model.value_type), 941 parameters=model.parameters or {}, 942 )
944 def create_add_fields(self, model: AddFieldsModel, config: Config, **kwargs: Any) -> AddFields: 945 added_field_definitions = [ 946 self._create_component_from_model( 947 model=added_field_definition_model, 948 value_type=ModelToComponentFactory._json_schema_type_name_to_type( 949 added_field_definition_model.value_type 950 ), 951 config=config, 952 ) 953 for added_field_definition_model in model.fields 954 ] 955 return AddFields( 956 fields=added_field_definitions, 957 condition=model.condition or "", 958 parameters=model.parameters or {}, 959 )
985 def create_dpath_flatten_fields( 986 self, model: DpathFlattenFieldsModel, config: Config, **kwargs: Any 987 ) -> DpathFlattenFields: 988 model_field_path: List[Union[InterpolatedString, str]] = [x for x in model.field_path] 989 key_transformation = ( 990 KeyTransformation( 991 config=config, 992 prefix=model.key_transformation.prefix, 993 suffix=model.key_transformation.suffix, 994 parameters=model.parameters or {}, 995 ) 996 if model.key_transformation is not None 997 else None 998 ) 999 return DpathFlattenFields( 1000 config=config, 1001 field_path=model_field_path, 1002 delete_origin_value=model.delete_origin_value 1003 if model.delete_origin_value is not None 1004 else False, 1005 replace_record=model.replace_record if model.replace_record is not None else False, 1006 key_transformation=key_transformation, 1007 parameters=model.parameters or {}, 1008 )
1022 def create_api_key_authenticator( 1023 self, 1024 model: ApiKeyAuthenticatorModel, 1025 config: Config, 1026 token_provider: Optional[TokenProvider] = None, 1027 **kwargs: Any, 1028 ) -> ApiKeyAuthenticator: 1029 if model.inject_into is None and model.header is None: 1030 raise ValueError( 1031 "Expected either inject_into or header to be set for ApiKeyAuthenticator" 1032 ) 1033 1034 if model.inject_into is not None and model.header is not None: 1035 raise ValueError( 1036 "inject_into and header cannot be set both for ApiKeyAuthenticator - remove the deprecated header option" 1037 ) 1038 1039 if token_provider is not None and model.api_token != "": 1040 raise ValueError( 1041 "If token_provider is set, api_token is ignored and has to be set to empty string." 1042 ) 1043 1044 request_option = ( 1045 self._create_component_from_model( 1046 model.inject_into, config, parameters=model.parameters or {} 1047 ) 1048 if model.inject_into 1049 else RequestOption( 1050 inject_into=RequestOptionType.header, 1051 field_name=model.header or "", 1052 parameters=model.parameters or {}, 1053 ) 1054 ) 1055 1056 return ApiKeyAuthenticator( 1057 token_provider=( 1058 token_provider 1059 if token_provider is not None 1060 else InterpolatedStringTokenProvider( 1061 api_token=model.api_token or "", 1062 config=config, 1063 parameters=model.parameters or {}, 1064 ) 1065 ), 1066 request_option=request_option, 1067 config=config, 1068 parameters=model.parameters or {}, 1069 )
1071 def create_legacy_to_per_partition_state_migration( 1072 self, 1073 model: LegacyToPerPartitionStateMigrationModel, 1074 config: Mapping[str, Any], 1075 declarative_stream: DeclarativeStreamModel, 1076 ) -> LegacyToPerPartitionStateMigration: 1077 retriever = declarative_stream.retriever 1078 if not isinstance(retriever, (SimpleRetrieverModel, AsyncRetrieverModel)): 1079 raise ValueError( 1080 f"LegacyToPerPartitionStateMigrations can only be applied on a DeclarativeStream with a SimpleRetriever or AsyncRetriever. Got {type(retriever)}" 1081 ) 1082 partition_router = retriever.partition_router 1083 if not isinstance( 1084 partition_router, (SubstreamPartitionRouterModel, CustomPartitionRouterModel) 1085 ): 1086 raise ValueError( 1087 f"LegacyToPerPartitionStateMigrations can only be applied on a SimpleRetriever with a Substream partition router. Got {type(partition_router)}" 1088 ) 1089 if not hasattr(partition_router, "parent_stream_configs"): 1090 raise ValueError( 1091 "LegacyToPerPartitionStateMigrations can only be applied with a parent stream configuration." 1092 ) 1093 1094 if not hasattr(declarative_stream, "incremental_sync"): 1095 raise ValueError( 1096 "LegacyToPerPartitionStateMigrations can only be applied with an incremental_sync configuration." 1097 ) 1098 1099 return LegacyToPerPartitionStateMigration( 1100 partition_router, # type: ignore # was already checked above 1101 declarative_stream.incremental_sync, # type: ignore # was already checked. Migration can be applied only to incremental streams. 1102 config, 1103 declarative_stream.parameters, # type: ignore # different type is expected here Mapping[str, Any], got Dict[str, Any] 1104 )
1106 def create_session_token_authenticator( 1107 self, model: SessionTokenAuthenticatorModel, config: Config, name: str, **kwargs: Any 1108 ) -> Union[ApiKeyAuthenticator, BearerAuthenticator]: 1109 decoder = ( 1110 self._create_component_from_model(model=model.decoder, config=config) 1111 if model.decoder 1112 else JsonDecoder(parameters={}) 1113 ) 1114 login_requester = self._create_component_from_model( 1115 model=model.login_requester, 1116 config=config, 1117 name=f"{name}_login_requester", 1118 decoder=decoder, 1119 ) 1120 token_provider = SessionTokenProvider( 1121 login_requester=login_requester, 1122 session_token_path=model.session_token_path, 1123 expiration_duration=parse_duration(model.expiration_duration) 1124 if model.expiration_duration 1125 else None, 1126 parameters=model.parameters or {}, 1127 message_repository=self._message_repository, 1128 decoder=decoder, 1129 ) 1130 if model.request_authentication.type == "Bearer": 1131 return ModelToComponentFactory.create_bearer_authenticator( 1132 BearerAuthenticatorModel(type="BearerAuthenticator", api_token=""), # type: ignore # $parameters has a default value 1133 config, 1134 token_provider=token_provider, 1135 ) 1136 else: 1137 return self.create_api_key_authenticator( 1138 ApiKeyAuthenticatorModel( 1139 type="ApiKeyAuthenticator", 1140 api_token="", 1141 inject_into=model.request_authentication.inject_into, 1142 ), # type: ignore # $parameters and headers default to None 1143 config=config, 1144 token_provider=token_provider, 1145 )
1147 @staticmethod 1148 def create_basic_http_authenticator( 1149 model: BasicHttpAuthenticatorModel, config: Config, **kwargs: Any 1150 ) -> BasicHttpAuthenticator: 1151 return BasicHttpAuthenticator( 1152 password=model.password or "", 1153 username=model.username, 1154 config=config, 1155 parameters=model.parameters or {}, 1156 )
1158 @staticmethod 1159 def create_bearer_authenticator( 1160 model: BearerAuthenticatorModel, 1161 config: Config, 1162 token_provider: Optional[TokenProvider] = None, 1163 **kwargs: Any, 1164 ) -> BearerAuthenticator: 1165 if token_provider is not None and model.api_token != "": 1166 raise ValueError( 1167 "If token_provider is set, api_token is ignored and has to be set to empty string." 1168 ) 1169 return BearerAuthenticator( 1170 token_provider=( 1171 token_provider 1172 if token_provider is not None 1173 else InterpolatedStringTokenProvider( 1174 api_token=model.api_token or "", 1175 config=config, 1176 parameters=model.parameters or {}, 1177 ) 1178 ), 1179 config=config, 1180 parameters=model.parameters or {}, 1181 )
1183 @staticmethod 1184 def create_dynamic_stream_check_config( 1185 model: DynamicStreamCheckConfigModel, config: Config, **kwargs: Any 1186 ) -> DynamicStreamCheckConfig: 1187 return DynamicStreamCheckConfig( 1188 dynamic_stream_name=model.dynamic_stream_name, 1189 stream_count=model.stream_count or 0, 1190 )
1192 def create_check_stream( 1193 self, model: CheckStreamModel, config: Config, **kwargs: Any 1194 ) -> CheckStream: 1195 if model.dynamic_streams_check_configs is None and model.stream_names is None: 1196 raise ValueError( 1197 "Expected either stream_names or dynamic_streams_check_configs to be set for CheckStream" 1198 ) 1199 1200 dynamic_streams_check_configs = ( 1201 [ 1202 self._create_component_from_model(model=dynamic_stream_check_config, config=config) 1203 for dynamic_stream_check_config in model.dynamic_streams_check_configs 1204 ] 1205 if model.dynamic_streams_check_configs 1206 else [] 1207 ) 1208 1209 return CheckStream( 1210 stream_names=model.stream_names or [], 1211 dynamic_streams_check_configs=dynamic_streams_check_configs, 1212 parameters={}, 1213 )
1215 @staticmethod 1216 def create_check_dynamic_stream( 1217 model: CheckDynamicStreamModel, config: Config, **kwargs: Any 1218 ) -> CheckDynamicStream: 1219 assert model.use_check_availability is not None # for mypy 1220 1221 use_check_availability = model.use_check_availability 1222 1223 return CheckDynamicStream( 1224 stream_count=model.stream_count, 1225 use_check_availability=use_check_availability, 1226 parameters={}, 1227 )
1229 def create_composite_error_handler( 1230 self, model: CompositeErrorHandlerModel, config: Config, **kwargs: Any 1231 ) -> CompositeErrorHandler: 1232 error_handlers = [ 1233 self._create_component_from_model(model=error_handler_model, config=config) 1234 for error_handler_model in model.error_handlers 1235 ] 1236 return CompositeErrorHandler( 1237 error_handlers=error_handlers, parameters=model.parameters or {} 1238 )
1240 @staticmethod 1241 def create_concurrency_level( 1242 model: ConcurrencyLevelModel, config: Config, **kwargs: Any 1243 ) -> ConcurrencyLevel: 1244 return ConcurrencyLevel( 1245 default_concurrency=model.default_concurrency, 1246 max_concurrency=model.max_concurrency, 1247 config=config, 1248 parameters={}, 1249 )
1251 @staticmethod 1252 def apply_stream_state_migrations( 1253 stream_state_migrations: List[Any] | None, stream_state: MutableMapping[str, Any] 1254 ) -> MutableMapping[str, Any]: 1255 if stream_state_migrations: 1256 for state_migration in stream_state_migrations: 1257 if state_migration.should_migrate(stream_state): 1258 # The state variable is expected to be mutable but the migrate method returns an immutable mapping. 1259 stream_state = dict(state_migration.migrate(stream_state)) 1260 return stream_state
1262 def create_concurrent_cursor_from_datetime_based_cursor( 1263 self, 1264 model_type: Type[BaseModel], 1265 component_definition: ComponentDefinition, 1266 stream_name: str, 1267 stream_namespace: Optional[str], 1268 stream_state: MutableMapping[str, Any], 1269 config: Config, 1270 message_repository: Optional[MessageRepository] = None, 1271 runtime_lookback_window: Optional[datetime.timedelta] = None, 1272 **kwargs: Any, 1273 ) -> ConcurrentCursor: 1274 component_type = component_definition.get("type") 1275 if component_definition.get("type") != model_type.__name__: 1276 raise ValueError( 1277 f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" 1278 ) 1279 1280 # FIXME the interfaces of the concurrent cursor are kind of annoying as they take a `ComponentDefinition` instead of the actual model. This was done because the ConcurrentDeclarativeSource didn't have access to the models [here for example](https://github.com/airbytehq/airbyte-python-cdk/blob/f525803b3fec9329e4cc8478996a92bf884bfde9/airbyte_cdk/sources/declarative/concurrent_declarative_source.py#L354C54-L354C91). So now we have two cases: 1281 # * The ComponentDefinition comes from model.__dict__ in which case we have `parameters` 1282 # * The ComponentDefinition comes from the manifest as a dict in which case we have `$parameters` 1283 # We should change those interfaces to use the model once we clean up the code in CDS at which point the parameter propagation should happen as part of the ModelToComponentFactory. 1284 if "$parameters" not in component_definition and "parameters" in component_definition: 1285 component_definition["$parameters"] = component_definition.get("parameters") # type: ignore # This is a dict 1286 datetime_based_cursor_model = model_type.parse_obj(component_definition) 1287 1288 if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel): 1289 raise ValueError( 1290 f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}" 1291 ) 1292 1293 model_parameters = datetime_based_cursor_model.parameters or {} 1294 interpolated_cursor_field = InterpolatedString.create( 1295 datetime_based_cursor_model.cursor_field, 1296 parameters=model_parameters, 1297 ) 1298 cursor_field = CursorField(interpolated_cursor_field.eval(config=config)) 1299 1300 interpolated_partition_field_start = InterpolatedString.create( 1301 datetime_based_cursor_model.partition_field_start or "start_time", 1302 parameters=model_parameters, 1303 ) 1304 interpolated_partition_field_end = InterpolatedString.create( 1305 datetime_based_cursor_model.partition_field_end or "end_time", 1306 parameters=model_parameters, 1307 ) 1308 1309 slice_boundary_fields = ( 1310 interpolated_partition_field_start.eval(config=config), 1311 interpolated_partition_field_end.eval(config=config), 1312 ) 1313 1314 datetime_format = datetime_based_cursor_model.datetime_format 1315 1316 cursor_granularity = ( 1317 parse_duration(datetime_based_cursor_model.cursor_granularity) 1318 if datetime_based_cursor_model.cursor_granularity 1319 else None 1320 ) 1321 1322 lookback_window = None 1323 interpolated_lookback_window = ( 1324 InterpolatedString.create( 1325 datetime_based_cursor_model.lookback_window, 1326 parameters=model_parameters, 1327 ) 1328 if datetime_based_cursor_model.lookback_window 1329 else None 1330 ) 1331 if interpolated_lookback_window: 1332 evaluated_lookback_window = interpolated_lookback_window.eval(config=config) 1333 if evaluated_lookback_window: 1334 lookback_window = parse_duration(evaluated_lookback_window) 1335 1336 connector_state_converter: DateTimeStreamStateConverter 1337 connector_state_converter = CustomFormatConcurrentStreamStateConverter( 1338 datetime_format=datetime_format, 1339 input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats, 1340 is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state 1341 cursor_granularity=cursor_granularity, 1342 ) 1343 1344 # Adjusts the stream state by applying the runtime lookback window. 1345 # This is used to ensure correct state handling in case of failed partitions. 1346 stream_state_value = stream_state.get(cursor_field.cursor_field_key) 1347 if runtime_lookback_window and stream_state_value: 1348 new_stream_state = ( 1349 connector_state_converter.parse_timestamp(stream_state_value) 1350 - runtime_lookback_window 1351 ) 1352 stream_state[cursor_field.cursor_field_key] = connector_state_converter.output_format( 1353 new_stream_state 1354 ) 1355 1356 start_date_runtime_value: Union[InterpolatedString, str, MinMaxDatetime] 1357 if isinstance(datetime_based_cursor_model.start_datetime, MinMaxDatetimeModel): 1358 start_date_runtime_value = self.create_min_max_datetime( 1359 model=datetime_based_cursor_model.start_datetime, config=config 1360 ) 1361 else: 1362 start_date_runtime_value = datetime_based_cursor_model.start_datetime 1363 1364 end_date_runtime_value: Optional[Union[InterpolatedString, str, MinMaxDatetime]] 1365 if isinstance(datetime_based_cursor_model.end_datetime, MinMaxDatetimeModel): 1366 end_date_runtime_value = self.create_min_max_datetime( 1367 model=datetime_based_cursor_model.end_datetime, config=config 1368 ) 1369 else: 1370 end_date_runtime_value = datetime_based_cursor_model.end_datetime 1371 1372 interpolated_start_date = MinMaxDatetime.create( 1373 interpolated_string_or_min_max_datetime=start_date_runtime_value, 1374 parameters=datetime_based_cursor_model.parameters, 1375 ) 1376 interpolated_end_date = ( 1377 None 1378 if not end_date_runtime_value 1379 else MinMaxDatetime.create( 1380 end_date_runtime_value, datetime_based_cursor_model.parameters 1381 ) 1382 ) 1383 1384 # If datetime format is not specified then start/end datetime should inherit it from the stream slicer 1385 if not interpolated_start_date.datetime_format: 1386 interpolated_start_date.datetime_format = datetime_format 1387 if interpolated_end_date and not interpolated_end_date.datetime_format: 1388 interpolated_end_date.datetime_format = datetime_format 1389 1390 start_date = interpolated_start_date.get_datetime(config=config) 1391 end_date_provider = ( 1392 partial(interpolated_end_date.get_datetime, config) 1393 if interpolated_end_date 1394 else connector_state_converter.get_end_provider() 1395 ) 1396 1397 if ( 1398 datetime_based_cursor_model.step and not datetime_based_cursor_model.cursor_granularity 1399 ) or ( 1400 not datetime_based_cursor_model.step and datetime_based_cursor_model.cursor_granularity 1401 ): 1402 raise ValueError( 1403 f"If step is defined, cursor_granularity should be as well and vice-versa. " 1404 f"Right now, step is `{datetime_based_cursor_model.step}` and cursor_granularity is `{datetime_based_cursor_model.cursor_granularity}`" 1405 ) 1406 1407 # When step is not defined, default to a step size from the starting date to the present moment 1408 step_length = datetime.timedelta.max 1409 interpolated_step = ( 1410 InterpolatedString.create( 1411 datetime_based_cursor_model.step, 1412 parameters=model_parameters, 1413 ) 1414 if datetime_based_cursor_model.step 1415 else None 1416 ) 1417 if interpolated_step: 1418 evaluated_step = interpolated_step.eval(config) 1419 if evaluated_step: 1420 step_length = parse_duration(evaluated_step) 1421 1422 clamping_strategy: ClampingStrategy = NoClamping() 1423 if datetime_based_cursor_model.clamping: 1424 # While it is undesirable to interpolate within the model factory (as opposed to at runtime), 1425 # it is still better than shifting interpolation low-code concept into the ConcurrentCursor runtime 1426 # object which we want to keep agnostic of being low-code 1427 target = InterpolatedString( 1428 string=datetime_based_cursor_model.clamping.target, 1429 parameters=model_parameters, 1430 ) 1431 evaluated_target = target.eval(config=config) 1432 match evaluated_target: 1433 case "DAY": 1434 clamping_strategy = DayClampingStrategy() 1435 end_date_provider = ClampingEndProvider( 1436 DayClampingStrategy(is_ceiling=False), 1437 end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1438 granularity=cursor_granularity or datetime.timedelta(seconds=1), 1439 ) 1440 case "WEEK": 1441 if ( 1442 not datetime_based_cursor_model.clamping.target_details 1443 or "weekday" not in datetime_based_cursor_model.clamping.target_details 1444 ): 1445 raise ValueError( 1446 "Given WEEK clamping, weekday needs to be provided as target_details" 1447 ) 1448 weekday = self._assemble_weekday( 1449 datetime_based_cursor_model.clamping.target_details["weekday"] 1450 ) 1451 clamping_strategy = WeekClampingStrategy(weekday) 1452 end_date_provider = ClampingEndProvider( 1453 WeekClampingStrategy(weekday, is_ceiling=False), 1454 end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1455 granularity=cursor_granularity or datetime.timedelta(days=1), 1456 ) 1457 case "MONTH": 1458 clamping_strategy = MonthClampingStrategy() 1459 end_date_provider = ClampingEndProvider( 1460 MonthClampingStrategy(is_ceiling=False), 1461 end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1462 granularity=cursor_granularity or datetime.timedelta(days=1), 1463 ) 1464 case _: 1465 raise ValueError( 1466 f"Invalid clamping target {evaluated_target}, expected DAY, WEEK, MONTH" 1467 ) 1468 1469 return ConcurrentCursor( 1470 stream_name=stream_name, 1471 stream_namespace=stream_namespace, 1472 stream_state=stream_state, 1473 message_repository=message_repository or self._message_repository, 1474 connector_state_manager=self._connector_state_manager, 1475 connector_state_converter=connector_state_converter, 1476 cursor_field=cursor_field, 1477 slice_boundary_fields=slice_boundary_fields, 1478 start=start_date, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1479 end_provider=end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1480 lookback_window=lookback_window, 1481 slice_range=step_length, 1482 cursor_granularity=cursor_granularity, 1483 clamping_strategy=clamping_strategy, 1484 )
1486 def create_concurrent_cursor_from_incrementing_count_cursor( 1487 self, 1488 model_type: Type[BaseModel], 1489 component_definition: ComponentDefinition, 1490 stream_name: str, 1491 stream_namespace: Optional[str], 1492 stream_state: MutableMapping[str, Any], 1493 config: Config, 1494 message_repository: Optional[MessageRepository] = None, 1495 **kwargs: Any, 1496 ) -> ConcurrentCursor: 1497 component_type = component_definition.get("type") 1498 if component_definition.get("type") != model_type.__name__: 1499 raise ValueError( 1500 f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" 1501 ) 1502 1503 incrementing_count_cursor_model = model_type.parse_obj(component_definition) 1504 1505 if not isinstance(incrementing_count_cursor_model, IncrementingCountCursorModel): 1506 raise ValueError( 1507 f"Expected {model_type.__name__} component, but received {incrementing_count_cursor_model.__class__.__name__}" 1508 ) 1509 1510 interpolated_start_value = ( 1511 InterpolatedString.create( 1512 incrementing_count_cursor_model.start_value, # type: ignore 1513 parameters=incrementing_count_cursor_model.parameters or {}, 1514 ) 1515 if incrementing_count_cursor_model.start_value 1516 else 0 1517 ) 1518 1519 interpolated_cursor_field = InterpolatedString.create( 1520 incrementing_count_cursor_model.cursor_field, 1521 parameters=incrementing_count_cursor_model.parameters or {}, 1522 ) 1523 cursor_field = CursorField(interpolated_cursor_field.eval(config=config)) 1524 1525 connector_state_converter = IncrementingCountStreamStateConverter( 1526 is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state 1527 ) 1528 1529 return ConcurrentCursor( 1530 stream_name=stream_name, 1531 stream_namespace=stream_namespace, 1532 stream_state=stream_state, 1533 message_repository=message_repository or self._message_repository, 1534 connector_state_manager=self._connector_state_manager, 1535 connector_state_converter=connector_state_converter, 1536 cursor_field=cursor_field, 1537 slice_boundary_fields=None, 1538 start=interpolated_start_value, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1539 end_provider=connector_state_converter.get_end_provider(), # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice 1540 )
1561 def create_concurrent_cursor_from_perpartition_cursor( 1562 self, 1563 state_manager: ConnectorStateManager, 1564 model_type: Type[BaseModel], 1565 component_definition: ComponentDefinition, 1566 stream_name: str, 1567 stream_namespace: Optional[str], 1568 config: Config, 1569 stream_state: MutableMapping[str, Any], 1570 partition_router: PartitionRouter, 1571 attempt_to_create_cursor_if_not_provided: bool = False, 1572 **kwargs: Any, 1573 ) -> ConcurrentPerPartitionCursor: 1574 component_type = component_definition.get("type") 1575 if component_definition.get("type") != model_type.__name__: 1576 raise ValueError( 1577 f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" 1578 ) 1579 1580 # FIXME the interfaces of the concurrent cursor are kind of annoying as they take a `ComponentDefinition` instead of the actual model. This was done because the ConcurrentDeclarativeSource didn't have access to the models [here for example](https://github.com/airbytehq/airbyte-python-cdk/blob/f525803b3fec9329e4cc8478996a92bf884bfde9/airbyte_cdk/sources/declarative/concurrent_declarative_source.py#L354C54-L354C91). So now we have two cases: 1581 # * The ComponentDefinition comes from model.__dict__ in which case we have `parameters` 1582 # * The ComponentDefinition comes from the manifest as a dict in which case we have `$parameters` 1583 # We should change those interfaces to use the model once we clean up the code in CDS at which point the parameter propagation should happen as part of the ModelToComponentFactory. 1584 if "$parameters" not in component_definition and "parameters" in component_definition: 1585 component_definition["$parameters"] = component_definition.get("parameters") # type: ignore # This is a dict 1586 datetime_based_cursor_model = model_type.parse_obj(component_definition) 1587 1588 if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel): 1589 raise ValueError( 1590 f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}" 1591 ) 1592 1593 interpolated_cursor_field = InterpolatedString.create( 1594 datetime_based_cursor_model.cursor_field, 1595 # FIXME the interfaces of the concurrent cursor are kind of annoying as they take a `ComponentDefinition` instead of the actual model. This was done because the ConcurrentDeclarativeSource didn't have access to the models [here for example](https://github.com/airbytehq/airbyte-python-cdk/blob/f525803b3fec9329e4cc8478996a92bf884bfde9/airbyte_cdk/sources/declarative/concurrent_declarative_source.py#L354C54-L354C91). So now we have two cases: 1596 # * The ComponentDefinition comes from model.__dict__ in which case we have `parameters` 1597 # * The ComponentDefinition comes from the manifest as a dict in which case we have `$parameters` 1598 # We should change those interfaces to use the model once we clean up the code in CDS at which point the parameter propagation should happen as part of the ModelToComponentFactory. 1599 parameters=datetime_based_cursor_model.parameters or {}, 1600 ) 1601 cursor_field = CursorField(interpolated_cursor_field.eval(config=config)) 1602 1603 datetime_format = datetime_based_cursor_model.datetime_format 1604 1605 cursor_granularity = ( 1606 parse_duration(datetime_based_cursor_model.cursor_granularity) 1607 if datetime_based_cursor_model.cursor_granularity 1608 else None 1609 ) 1610 1611 connector_state_converter: DateTimeStreamStateConverter 1612 connector_state_converter = CustomFormatConcurrentStreamStateConverter( 1613 datetime_format=datetime_format, 1614 input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats, 1615 is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state 1616 cursor_granularity=cursor_granularity, 1617 ) 1618 1619 # Create the cursor factory 1620 cursor_factory = ConcurrentCursorFactory( 1621 partial( 1622 self.create_concurrent_cursor_from_datetime_based_cursor, 1623 state_manager=state_manager, 1624 model_type=model_type, 1625 component_definition=component_definition, 1626 stream_name=stream_name, 1627 stream_namespace=stream_namespace, 1628 config=config, 1629 message_repository=NoopMessageRepository(), 1630 ) 1631 ) 1632 1633 # Per-partition state doesn't make sense for GroupingPartitionRouter, so force the global state 1634 use_global_cursor = isinstance( 1635 partition_router, GroupingPartitionRouter 1636 ) or component_definition.get("global_substream_cursor", False) 1637 1638 # Return the concurrent cursor and state converter 1639 return ConcurrentPerPartitionCursor( 1640 cursor_factory=cursor_factory, 1641 partition_router=partition_router, 1642 stream_name=stream_name, 1643 stream_namespace=stream_namespace, 1644 stream_state=stream_state, 1645 message_repository=self._message_repository, # type: ignore 1646 connector_state_manager=state_manager, 1647 connector_state_converter=connector_state_converter, 1648 cursor_field=cursor_field, 1649 use_global_cursor=use_global_cursor, 1650 attempt_to_create_cursor_if_not_provided=attempt_to_create_cursor_if_not_provided, 1651 )
1653 @staticmethod 1654 def create_constant_backoff_strategy( 1655 model: ConstantBackoffStrategyModel, config: Config, **kwargs: Any 1656 ) -> ConstantBackoffStrategy: 1657 return ConstantBackoffStrategy( 1658 backoff_time_in_seconds=model.backoff_time_in_seconds, 1659 config=config, 1660 parameters=model.parameters or {}, 1661 )
1663 def create_cursor_pagination( 1664 self, model: CursorPaginationModel, config: Config, decoder: Decoder, **kwargs: Any 1665 ) -> CursorPaginationStrategy: 1666 if isinstance(decoder, PaginationDecoderDecorator): 1667 inner_decoder = decoder.decoder 1668 else: 1669 inner_decoder = decoder 1670 decoder = PaginationDecoderDecorator(decoder=decoder) 1671 1672 if self._is_supported_decoder_for_pagination(inner_decoder): 1673 decoder_to_use = decoder 1674 else: 1675 raise ValueError( 1676 self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(inner_decoder)) 1677 ) 1678 1679 return CursorPaginationStrategy( 1680 cursor_value=model.cursor_value, 1681 decoder=decoder_to_use, 1682 page_size=model.page_size, 1683 stop_condition=model.stop_condition, 1684 config=config, 1685 parameters=model.parameters or {}, 1686 )
1688 def create_custom_component(self, model: Any, config: Config, **kwargs: Any) -> Any: 1689 """ 1690 Generically creates a custom component based on the model type and a class_name reference to the custom Python class being 1691 instantiated. Only the model's additional properties that match the custom class definition are passed to the constructor 1692 :param model: The Pydantic model of the custom component being created 1693 :param config: The custom defined connector config 1694 :return: The declarative component built from the Pydantic model to be used at runtime 1695 """ 1696 custom_component_class = self._get_class_from_fully_qualified_class_name(model.class_name) 1697 component_fields = get_type_hints(custom_component_class) 1698 model_args = model.dict() 1699 model_args["config"] = config 1700 1701 # There are cases where a parent component will pass arguments to a child component via kwargs. When there are field collisions 1702 # we defer to these arguments over the component's definition 1703 for key, arg in kwargs.items(): 1704 model_args[key] = arg 1705 1706 # Pydantic is unable to parse a custom component's fields that are subcomponents into models because their fields and types are not 1707 # defined in the schema. The fields and types are defined within the Python class implementation. Pydantic can only parse down to 1708 # the custom component and this code performs a second parse to convert the sub-fields first into models, then declarative components 1709 for model_field, model_value in model_args.items(): 1710 # If a custom component field doesn't have a type set, we try to use the type hints to infer the type 1711 if ( 1712 isinstance(model_value, dict) 1713 and "type" not in model_value 1714 and model_field in component_fields 1715 ): 1716 derived_type = self._derive_component_type_from_type_hints( 1717 component_fields.get(model_field) 1718 ) 1719 if derived_type: 1720 model_value["type"] = derived_type 1721 1722 if self._is_component(model_value): 1723 model_args[model_field] = self._create_nested_component( 1724 model, 1725 model_field, 1726 model_value, 1727 config, 1728 **kwargs, 1729 ) 1730 elif isinstance(model_value, list): 1731 vals = [] 1732 for v in model_value: 1733 if isinstance(v, dict) and "type" not in v and model_field in component_fields: 1734 derived_type = self._derive_component_type_from_type_hints( 1735 component_fields.get(model_field) 1736 ) 1737 if derived_type: 1738 v["type"] = derived_type 1739 if self._is_component(v): 1740 vals.append( 1741 self._create_nested_component( 1742 model, 1743 model_field, 1744 v, 1745 config, 1746 **kwargs, 1747 ) 1748 ) 1749 else: 1750 vals.append(v) 1751 model_args[model_field] = vals 1752 1753 kwargs = { 1754 class_field: model_args[class_field] 1755 for class_field in component_fields.keys() 1756 if class_field in model_args 1757 } 1758 return custom_component_class(**kwargs)
Generically creates a custom component based on the model type and a class_name reference to the custom Python class being instantiated. Only the model's additional properties that match the custom class definition are passed to the constructor
Parameters
- model: The Pydantic model of the custom component being created
- config: The custom defined connector config
Returns
The declarative component built from the Pydantic model to be used at runtime
1893 def create_datetime_based_cursor( 1894 self, model: DatetimeBasedCursorModel, config: Config, **kwargs: Any 1895 ) -> DatetimeBasedCursor: 1896 start_datetime: Union[str, MinMaxDatetime] = ( 1897 model.start_datetime 1898 if isinstance(model.start_datetime, str) 1899 else self.create_min_max_datetime(model.start_datetime, config) 1900 ) 1901 end_datetime: Union[str, MinMaxDatetime, None] = None 1902 if model.is_data_feed and model.end_datetime: 1903 raise ValueError("Data feed does not support end_datetime") 1904 if model.is_data_feed and model.is_client_side_incremental: 1905 raise ValueError( 1906 "`Client side incremental` cannot be applied with `data feed`. Choose only 1 from them." 1907 ) 1908 if model.end_datetime: 1909 end_datetime = ( 1910 model.end_datetime 1911 if isinstance(model.end_datetime, str) 1912 else self.create_min_max_datetime(model.end_datetime, config) 1913 ) 1914 1915 end_time_option = ( 1916 self._create_component_from_model( 1917 model.end_time_option, config, parameters=model.parameters or {} 1918 ) 1919 if model.end_time_option 1920 else None 1921 ) 1922 start_time_option = ( 1923 self._create_component_from_model( 1924 model.start_time_option, config, parameters=model.parameters or {} 1925 ) 1926 if model.start_time_option 1927 else None 1928 ) 1929 1930 return DatetimeBasedCursor( 1931 cursor_field=model.cursor_field, 1932 cursor_datetime_formats=model.cursor_datetime_formats 1933 if model.cursor_datetime_formats 1934 else [], 1935 cursor_granularity=model.cursor_granularity, 1936 datetime_format=model.datetime_format, 1937 end_datetime=end_datetime, 1938 start_datetime=start_datetime, 1939 step=model.step, 1940 end_time_option=end_time_option, 1941 lookback_window=model.lookback_window, 1942 start_time_option=start_time_option, 1943 partition_field_end=model.partition_field_end, 1944 partition_field_start=model.partition_field_start, 1945 message_repository=self._message_repository, 1946 is_compare_strictly=model.is_compare_strictly, 1947 config=config, 1948 parameters=model.parameters or {}, 1949 )
1951 def create_default_stream( 1952 self, model: DeclarativeStreamModel, config: Config, is_parent: bool = False, **kwargs: Any 1953 ) -> AbstractStream: 1954 primary_key = model.primary_key.__root__ if model.primary_key else None 1955 self._migrate_state(model, config) 1956 1957 partition_router = self._build_stream_slicer_from_partition_router( 1958 model.retriever, 1959 config, 1960 stream_name=model.name, 1961 **kwargs, 1962 ) 1963 concurrent_cursor = self._build_concurrent_cursor(model, partition_router, config) 1964 if model.incremental_sync and isinstance(model.incremental_sync, DatetimeBasedCursorModel): 1965 cursor_model: DatetimeBasedCursorModel = model.incremental_sync 1966 1967 end_time_option = ( 1968 self._create_component_from_model( 1969 cursor_model.end_time_option, config, parameters=cursor_model.parameters or {} 1970 ) 1971 if cursor_model.end_time_option 1972 else None 1973 ) 1974 start_time_option = ( 1975 self._create_component_from_model( 1976 cursor_model.start_time_option, config, parameters=cursor_model.parameters or {} 1977 ) 1978 if cursor_model.start_time_option 1979 else None 1980 ) 1981 1982 datetime_request_options_provider = DatetimeBasedRequestOptionsProvider( 1983 start_time_option=start_time_option, 1984 end_time_option=end_time_option, 1985 partition_field_start=cursor_model.partition_field_start, 1986 partition_field_end=cursor_model.partition_field_end, 1987 config=config, 1988 parameters=model.parameters or {}, 1989 ) 1990 request_options_provider = ( 1991 datetime_request_options_provider 1992 if not isinstance(concurrent_cursor, ConcurrentPerPartitionCursor) 1993 else PerPartitionRequestOptionsProvider( 1994 partition_router, datetime_request_options_provider 1995 ) 1996 ) 1997 elif model.incremental_sync and isinstance( 1998 model.incremental_sync, IncrementingCountCursorModel 1999 ): 2000 if isinstance(concurrent_cursor, ConcurrentPerPartitionCursor): 2001 raise ValueError( 2002 "PerPartition does not support per partition states because switching to global state is time based" 2003 ) 2004 2005 cursor_model: IncrementingCountCursorModel = model.incremental_sync # type: ignore 2006 2007 start_time_option = ( 2008 self._create_component_from_model( 2009 cursor_model.start_value_option, # type: ignore # mypy still thinks cursor_model of type DatetimeBasedCursor 2010 config, 2011 parameters=cursor_model.parameters or {}, 2012 ) 2013 if cursor_model.start_value_option # type: ignore # mypy still thinks cursor_model of type DatetimeBasedCursor 2014 else None 2015 ) 2016 2017 # The concurrent engine defaults the start/end fields on the slice to "start" and "end", but 2018 # the default DatetimeBasedRequestOptionsProvider() sets them to start_time/end_time 2019 partition_field_start = "start" 2020 2021 request_options_provider = DatetimeBasedRequestOptionsProvider( 2022 start_time_option=start_time_option, 2023 partition_field_start=partition_field_start, 2024 config=config, 2025 parameters=model.parameters or {}, 2026 ) 2027 else: 2028 request_options_provider = None 2029 2030 transformations = [] 2031 if model.transformations: 2032 for transformation_model in model.transformations: 2033 transformations.append( 2034 self._create_component_from_model(model=transformation_model, config=config) 2035 ) 2036 file_uploader = None 2037 if model.file_uploader: 2038 file_uploader = self._create_component_from_model( 2039 model=model.file_uploader, config=config 2040 ) 2041 2042 stream_slicer: ConcurrentStreamSlicer = ( 2043 partition_router 2044 if isinstance(concurrent_cursor, FinalStateCursor) 2045 else concurrent_cursor 2046 ) 2047 retriever = self._create_component_from_model( 2048 model=model.retriever, 2049 config=config, 2050 name=model.name, 2051 primary_key=primary_key, 2052 request_options_provider=request_options_provider, 2053 stream_slicer=stream_slicer, 2054 partition_router=partition_router, 2055 stop_condition_cursor=concurrent_cursor 2056 if self._is_stop_condition_on_cursor(model) 2057 else None, 2058 client_side_incremental_sync={"cursor": concurrent_cursor} 2059 if self._is_client_side_filtering_enabled(model) 2060 else None, 2061 transformations=transformations, 2062 file_uploader=file_uploader, 2063 incremental_sync=model.incremental_sync, 2064 ) 2065 if isinstance(retriever, AsyncRetriever): 2066 stream_slicer = retriever.stream_slicer 2067 2068 schema_loader: Union[ 2069 CompositeSchemaLoader, 2070 DefaultSchemaLoader, 2071 DynamicSchemaLoader, 2072 InlineSchemaLoader, 2073 JsonFileSchemaLoader, 2074 ] 2075 if model.schema_loader and isinstance(model.schema_loader, list): 2076 nested_schema_loaders = [ 2077 self._create_component_from_model(model=nested_schema_loader, config=config) 2078 for nested_schema_loader in model.schema_loader 2079 ] 2080 schema_loader = CompositeSchemaLoader( 2081 schema_loaders=nested_schema_loaders, parameters={} 2082 ) 2083 elif model.schema_loader: 2084 schema_loader = self._create_component_from_model( 2085 model=model.schema_loader, # type: ignore # If defined, schema_loader is guaranteed not to be a list and will be one of the existing base models 2086 config=config, 2087 ) 2088 else: 2089 options = model.parameters or {} 2090 if "name" not in options: 2091 options["name"] = model.name 2092 schema_loader = DefaultSchemaLoader(config=config, parameters=options) 2093 2094 stream_name = model.name or "" 2095 return DefaultStream( 2096 partition_generator=StreamSlicerPartitionGenerator( 2097 DeclarativePartitionFactory( 2098 stream_name, 2099 schema_loader, 2100 retriever, 2101 self._message_repository, 2102 ), 2103 stream_slicer, 2104 slice_limit=self._limit_slices_fetched, 2105 ), 2106 name=stream_name, 2107 json_schema=schema_loader.get_json_schema, 2108 primary_key=get_primary_key_from_stream(primary_key), 2109 cursor_field=concurrent_cursor.cursor_field.cursor_field_key 2110 if hasattr(concurrent_cursor, "cursor_field") 2111 else "", # FIXME we should have the cursor field has part of the interface of cursor, 2112 logger=logging.getLogger(f"airbyte.{stream_name}"), 2113 cursor=concurrent_cursor, 2114 supports_file_transfer=hasattr(model, "file_uploader") and bool(model.file_uploader), 2115 )
2249 def create_default_error_handler( 2250 self, model: DefaultErrorHandlerModel, config: Config, **kwargs: Any 2251 ) -> DefaultErrorHandler: 2252 backoff_strategies = [] 2253 if model.backoff_strategies: 2254 for backoff_strategy_model in model.backoff_strategies: 2255 backoff_strategies.append( 2256 self._create_component_from_model(model=backoff_strategy_model, config=config) 2257 ) 2258 2259 response_filters = [] 2260 if model.response_filters: 2261 for response_filter_model in model.response_filters: 2262 response_filters.append( 2263 self._create_component_from_model(model=response_filter_model, config=config) 2264 ) 2265 response_filters.append( 2266 HttpResponseFilter(config=config, parameters=model.parameters or {}) 2267 ) 2268 2269 return DefaultErrorHandler( 2270 backoff_strategies=backoff_strategies, 2271 max_retries=model.max_retries, 2272 response_filters=response_filters, 2273 config=config, 2274 parameters=model.parameters or {}, 2275 )
2277 def create_default_paginator( 2278 self, 2279 model: DefaultPaginatorModel, 2280 config: Config, 2281 *, 2282 url_base: str, 2283 extractor_model: Optional[Union[CustomRecordExtractorModel, DpathExtractorModel]] = None, 2284 decoder: Optional[Decoder] = None, 2285 cursor_used_for_stop_condition: Optional[Cursor] = None, 2286 ) -> Union[DefaultPaginator, PaginatorTestReadDecorator]: 2287 if decoder: 2288 if self._is_supported_decoder_for_pagination(decoder): 2289 decoder_to_use = PaginationDecoderDecorator(decoder=decoder) 2290 else: 2291 raise ValueError(self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(decoder))) 2292 else: 2293 decoder_to_use = PaginationDecoderDecorator(decoder=JsonDecoder(parameters={})) 2294 page_size_option = ( 2295 self._create_component_from_model(model=model.page_size_option, config=config) 2296 if model.page_size_option 2297 else None 2298 ) 2299 page_token_option = ( 2300 self._create_component_from_model(model=model.page_token_option, config=config) 2301 if model.page_token_option 2302 else None 2303 ) 2304 pagination_strategy = self._create_component_from_model( 2305 model=model.pagination_strategy, 2306 config=config, 2307 decoder=decoder_to_use, 2308 extractor_model=extractor_model, 2309 ) 2310 if cursor_used_for_stop_condition: 2311 pagination_strategy = StopConditionPaginationStrategyDecorator( 2312 pagination_strategy, CursorStopCondition(cursor_used_for_stop_condition) 2313 ) 2314 paginator = DefaultPaginator( 2315 decoder=decoder_to_use, 2316 page_size_option=page_size_option, 2317 page_token_option=page_token_option, 2318 pagination_strategy=pagination_strategy, 2319 url_base=url_base, 2320 config=config, 2321 parameters=model.parameters or {}, 2322 ) 2323 if self._limit_pages_fetched_per_slice: 2324 return PaginatorTestReadDecorator(paginator, self._limit_pages_fetched_per_slice) 2325 return paginator
2327 def create_dpath_extractor( 2328 self, 2329 model: DpathExtractorModel, 2330 config: Config, 2331 decoder: Optional[Decoder] = None, 2332 **kwargs: Any, 2333 ) -> DpathExtractor: 2334 if decoder: 2335 decoder_to_use = decoder 2336 else: 2337 decoder_to_use = JsonDecoder(parameters={}) 2338 model_field_path: List[Union[InterpolatedString, str]] = [x for x in model.field_path] 2339 return DpathExtractor( 2340 decoder=decoder_to_use, 2341 field_path=model_field_path, 2342 config=config, 2343 parameters=model.parameters or {}, 2344 )
2365 def create_http_requester( 2366 self, 2367 model: HttpRequesterModel, 2368 config: Config, 2369 decoder: Decoder = JsonDecoder(parameters={}), 2370 query_properties_key: Optional[str] = None, 2371 use_cache: Optional[bool] = None, 2372 *, 2373 name: str, 2374 ) -> HttpRequester: 2375 authenticator = ( 2376 self._create_component_from_model( 2377 model=model.authenticator, 2378 config=config, 2379 url_base=model.url or model.url_base, 2380 name=name, 2381 decoder=decoder, 2382 ) 2383 if model.authenticator 2384 else None 2385 ) 2386 error_handler = ( 2387 self._create_component_from_model(model=model.error_handler, config=config) 2388 if model.error_handler 2389 else DefaultErrorHandler( 2390 backoff_strategies=[], 2391 response_filters=[], 2392 config=config, 2393 parameters=model.parameters or {}, 2394 ) 2395 ) 2396 2397 api_budget = self._api_budget 2398 2399 # Removes QueryProperties components from the interpolated mappings because it has been designed 2400 # to be used by the SimpleRetriever and will be resolved from the provider from the slice directly 2401 # instead of through jinja interpolation 2402 request_parameters: Optional[Union[str, Mapping[str, str]]] 2403 if isinstance(model.request_parameters, Mapping): 2404 request_parameters = self._remove_query_properties(model.request_parameters) 2405 else: 2406 request_parameters = model.request_parameters 2407 2408 request_options_provider = InterpolatedRequestOptionsProvider( 2409 request_body=model.request_body, 2410 request_body_data=model.request_body_data, 2411 request_body_json=model.request_body_json, 2412 request_headers=model.request_headers, 2413 request_parameters=request_parameters, 2414 query_properties_key=query_properties_key, 2415 config=config, 2416 parameters=model.parameters or {}, 2417 ) 2418 2419 assert model.use_cache is not None # for mypy 2420 assert model.http_method is not None # for mypy 2421 2422 should_use_cache = (model.use_cache or bool(use_cache)) and not self._disable_cache 2423 2424 return HttpRequester( 2425 name=name, 2426 url=model.url, 2427 url_base=model.url_base, 2428 path=model.path, 2429 authenticator=authenticator, 2430 error_handler=error_handler, 2431 api_budget=api_budget, 2432 http_method=HttpMethod[model.http_method.value], 2433 request_options_provider=request_options_provider, 2434 config=config, 2435 disable_retries=self._disable_retries, 2436 parameters=model.parameters or {}, 2437 message_repository=self._message_repository, 2438 use_cache=should_use_cache, 2439 decoder=decoder, 2440 stream_response=decoder.is_stream_response() if decoder else False, 2441 )
2443 @staticmethod 2444 def create_http_response_filter( 2445 model: HttpResponseFilterModel, config: Config, **kwargs: Any 2446 ) -> HttpResponseFilter: 2447 if model.action: 2448 action = ResponseAction(model.action.value) 2449 else: 2450 action = None 2451 2452 failure_type = FailureType(model.failure_type.value) if model.failure_type else None 2453 2454 http_codes = ( 2455 set(model.http_codes) if model.http_codes else set() 2456 ) # JSON schema notation has no set data type. The schema enforces an array of unique elements 2457 2458 return HttpResponseFilter( 2459 action=action, 2460 failure_type=failure_type, 2461 error_message=model.error_message or "", 2462 error_message_contains=model.error_message_contains or "", 2463 http_codes=http_codes, 2464 predicate=model.predicate or "", 2465 config=config, 2466 parameters=model.parameters or {}, 2467 )
2475 def create_complex_field_type( 2476 self, model: ComplexFieldTypeModel, config: Config, **kwargs: Any 2477 ) -> ComplexFieldType: 2478 items = ( 2479 self._create_component_from_model(model=model.items, config=config) 2480 if isinstance(model.items, ComplexFieldTypeModel) 2481 else model.items 2482 ) 2483 2484 return ComplexFieldType(field_type=model.field_type, items=items)
2486 def create_types_map(self, model: TypesMapModel, config: Config, **kwargs: Any) -> TypesMap: 2487 target_type = ( 2488 self._create_component_from_model(model=model.target_type, config=config) 2489 if isinstance(model.target_type, ComplexFieldTypeModel) 2490 else model.target_type 2491 ) 2492 2493 return TypesMap( 2494 target_type=target_type, 2495 current_type=model.current_type, 2496 condition=model.condition if model.condition is not None else "True", 2497 )
2499 def create_schema_type_identifier( 2500 self, model: SchemaTypeIdentifierModel, config: Config, **kwargs: Any 2501 ) -> SchemaTypeIdentifier: 2502 types_mapping = [] 2503 if model.types_mapping: 2504 types_mapping.extend( 2505 [ 2506 self._create_component_from_model(types_map, config=config) 2507 for types_map in model.types_mapping 2508 ] 2509 ) 2510 model_schema_pointer: List[Union[InterpolatedString, str]] = ( 2511 [x for x in model.schema_pointer] if model.schema_pointer else [] 2512 ) 2513 model_key_pointer: List[Union[InterpolatedString, str]] = [x for x in model.key_pointer] 2514 model_type_pointer: Optional[List[Union[InterpolatedString, str]]] = ( 2515 [x for x in model.type_pointer] if model.type_pointer else None 2516 ) 2517 2518 return SchemaTypeIdentifier( 2519 schema_pointer=model_schema_pointer, 2520 key_pointer=model_key_pointer, 2521 type_pointer=model_type_pointer, 2522 types_mapping=types_mapping, 2523 parameters=model.parameters or {}, 2524 )
2526 def create_dynamic_schema_loader( 2527 self, model: DynamicSchemaLoaderModel, config: Config, **kwargs: Any 2528 ) -> DynamicSchemaLoader: 2529 schema_transformations = [] 2530 if model.schema_transformations: 2531 for transformation_model in model.schema_transformations: 2532 schema_transformations.append( 2533 self._create_component_from_model(model=transformation_model, config=config) 2534 ) 2535 name = "dynamic_properties" 2536 retriever = self._create_component_from_model( 2537 model=model.retriever, 2538 config=config, 2539 name=name, 2540 primary_key=None, 2541 partition_router=self._build_stream_slicer_from_partition_router( 2542 model.retriever, config 2543 ), 2544 transformations=[], 2545 use_cache=True, 2546 log_formatter=( 2547 lambda response: format_http_message( 2548 response, 2549 f"Schema loader '{name}' request", 2550 f"Request performed in order to extract schema.", 2551 name, 2552 is_auxiliary=True, 2553 ) 2554 ), 2555 ) 2556 schema_type_identifier = self._create_component_from_model( 2557 model.schema_type_identifier, config=config, parameters=model.parameters or {} 2558 ) 2559 schema_filter = ( 2560 self._create_component_from_model( 2561 model.schema_filter, config=config, parameters=model.parameters or {} 2562 ) 2563 if model.schema_filter is not None 2564 else None 2565 ) 2566 2567 return DynamicSchemaLoader( 2568 retriever=retriever, 2569 config=config, 2570 schema_transformations=schema_transformations, 2571 schema_filter=schema_filter, 2572 schema_type_identifier=schema_type_identifier, 2573 parameters=model.parameters or {}, 2574 )
2594 def create_gzip_decoder( 2595 self, model: GzipDecoderModel, config: Config, **kwargs: Any 2596 ) -> Decoder: 2597 _compressed_response_types = { 2598 "gzip", 2599 "x-gzip", 2600 "gzip, deflate", 2601 "x-gzip, deflate", 2602 "application/zip", 2603 "application/gzip", 2604 "application/x-gzip", 2605 "application/x-zip-compressed", 2606 } 2607 2608 gzip_parser: GzipParser = ModelToComponentFactory._get_parser(model, config) # type: ignore # based on the model, we know this will be a GzipParser 2609 2610 if self._emit_connector_builder_messages: 2611 # This is very surprising but if the response is not streamed, 2612 # CompositeRawDecoder calls response.content and the requests library actually uncompress the data as opposed to response.raw, 2613 # which uses urllib3 directly and does not uncompress the data. 2614 return CompositeRawDecoder(gzip_parser.inner_parser, False) 2615 2616 return CompositeRawDecoder.by_headers( 2617 [({"Content-Encoding", "Content-Type"}, _compressed_response_types, gzip_parser)], 2618 stream_response=True, 2619 fallback_parser=gzip_parser.inner_parser, 2620 )
2624 @staticmethod 2625 def create_incrementing_count_cursor( 2626 model: IncrementingCountCursorModel, config: Config, **kwargs: Any 2627 ) -> DatetimeBasedCursor: 2628 # This should not actually get used anywhere at runtime, but needed to add this to pass checks since 2629 # we still parse models into components. The issue is that there's no runtime implementation of a 2630 # IncrementingCountCursor. 2631 # A known and expected issue with this stub is running a check with the declared IncrementingCountCursor because it is run without ConcurrentCursor. 2632 return DatetimeBasedCursor( 2633 cursor_field=model.cursor_field, 2634 datetime_format="%Y-%m-%d", 2635 start_datetime="2024-12-12", 2636 config=config, 2637 parameters={}, 2638 )
2687 def create_jwt_authenticator( 2688 self, model: JwtAuthenticatorModel, config: Config, **kwargs: Any 2689 ) -> JwtAuthenticator: 2690 jwt_headers = model.jwt_headers or JwtHeadersModel(kid=None, typ="JWT", cty=None) 2691 jwt_payload = model.jwt_payload or JwtPayloadModel(iss=None, sub=None, aud=None) 2692 request_option = ( 2693 self._create_component_from_model(model.request_option, config) 2694 if model.request_option 2695 else None 2696 ) 2697 return JwtAuthenticator( 2698 config=config, 2699 parameters=model.parameters or {}, 2700 algorithm=JwtAlgorithm(model.algorithm.value), 2701 secret_key=model.secret_key, 2702 base64_encode_secret_key=model.base64_encode_secret_key, 2703 token_duration=model.token_duration, 2704 header_prefix=model.header_prefix, 2705 kid=jwt_headers.kid, 2706 typ=jwt_headers.typ, 2707 cty=jwt_headers.cty, 2708 iss=jwt_payload.iss, 2709 sub=jwt_payload.sub, 2710 aud=jwt_payload.aud, 2711 additional_jwt_headers=model.additional_jwt_headers, 2712 additional_jwt_payload=model.additional_jwt_payload, 2713 passphrase=model.passphrase, 2714 request_option=request_option, 2715 )
2717 def create_list_partition_router( 2718 self, model: ListPartitionRouterModel, config: Config, **kwargs: Any 2719 ) -> ListPartitionRouter: 2720 request_option = ( 2721 self._create_component_from_model(model.request_option, config) 2722 if model.request_option 2723 else None 2724 ) 2725 return ListPartitionRouter( 2726 cursor_field=model.cursor_field, 2727 request_option=request_option, 2728 values=model.values, 2729 config=config, 2730 parameters=model.parameters or {}, 2731 )
2733 @staticmethod 2734 def create_min_max_datetime( 2735 model: MinMaxDatetimeModel, config: Config, **kwargs: Any 2736 ) -> MinMaxDatetime: 2737 return MinMaxDatetime( 2738 datetime=model.datetime, 2739 datetime_format=model.datetime_format or "", 2740 max_datetime=model.max_datetime or "", 2741 min_datetime=model.min_datetime or "", 2742 parameters=model.parameters or {}, 2743 )
2755 def create_oauth_authenticator( 2756 self, model: OAuthAuthenticatorModel, config: Config, **kwargs: Any 2757 ) -> DeclarativeOauth2Authenticator: 2758 profile_assertion = ( 2759 self._create_component_from_model(model.profile_assertion, config=config) 2760 if model.profile_assertion 2761 else None 2762 ) 2763 2764 if model.refresh_token_updater: 2765 # ignore type error because fixing it would have a lot of dependencies, revisit later 2766 return DeclarativeSingleUseRefreshTokenOauth2Authenticator( # type: ignore 2767 config, 2768 InterpolatedString.create( 2769 model.token_refresh_endpoint, # type: ignore 2770 parameters=model.parameters or {}, 2771 ).eval(config), 2772 access_token_name=InterpolatedString.create( 2773 model.access_token_name or "access_token", parameters=model.parameters or {} 2774 ).eval(config), 2775 refresh_token_name=model.refresh_token_updater.refresh_token_name, 2776 expires_in_name=InterpolatedString.create( 2777 model.expires_in_name or "expires_in", parameters=model.parameters or {} 2778 ).eval(config), 2779 client_id_name=InterpolatedString.create( 2780 model.client_id_name or "client_id", parameters=model.parameters or {} 2781 ).eval(config), 2782 client_id=InterpolatedString.create( 2783 model.client_id, parameters=model.parameters or {} 2784 ).eval(config) 2785 if model.client_id 2786 else model.client_id, 2787 client_secret_name=InterpolatedString.create( 2788 model.client_secret_name or "client_secret", parameters=model.parameters or {} 2789 ).eval(config), 2790 client_secret=InterpolatedString.create( 2791 model.client_secret, parameters=model.parameters or {} 2792 ).eval(config) 2793 if model.client_secret 2794 else model.client_secret, 2795 access_token_config_path=model.refresh_token_updater.access_token_config_path, 2796 refresh_token_config_path=model.refresh_token_updater.refresh_token_config_path, 2797 token_expiry_date_config_path=model.refresh_token_updater.token_expiry_date_config_path, 2798 grant_type_name=InterpolatedString.create( 2799 model.grant_type_name or "grant_type", parameters=model.parameters or {} 2800 ).eval(config), 2801 grant_type=InterpolatedString.create( 2802 model.grant_type or "refresh_token", parameters=model.parameters or {} 2803 ).eval(config), 2804 refresh_request_body=InterpolatedMapping( 2805 model.refresh_request_body or {}, parameters=model.parameters or {} 2806 ).eval(config), 2807 refresh_request_headers=InterpolatedMapping( 2808 model.refresh_request_headers or {}, parameters=model.parameters or {} 2809 ).eval(config), 2810 scopes=model.scopes, 2811 token_expiry_date_format=model.token_expiry_date_format, 2812 token_expiry_is_time_of_expiration=bool(model.token_expiry_date_format), 2813 message_repository=self._message_repository, 2814 refresh_token_error_status_codes=model.refresh_token_updater.refresh_token_error_status_codes, 2815 refresh_token_error_key=model.refresh_token_updater.refresh_token_error_key, 2816 refresh_token_error_values=model.refresh_token_updater.refresh_token_error_values, 2817 ) 2818 # ignore type error because fixing it would have a lot of dependencies, revisit later 2819 return DeclarativeOauth2Authenticator( # type: ignore 2820 access_token_name=model.access_token_name or "access_token", 2821 access_token_value=model.access_token_value, 2822 client_id_name=model.client_id_name or "client_id", 2823 client_id=model.client_id, 2824 client_secret_name=model.client_secret_name or "client_secret", 2825 client_secret=model.client_secret, 2826 expires_in_name=model.expires_in_name or "expires_in", 2827 grant_type_name=model.grant_type_name or "grant_type", 2828 grant_type=model.grant_type or "refresh_token", 2829 refresh_request_body=model.refresh_request_body, 2830 refresh_request_headers=model.refresh_request_headers, 2831 refresh_token_name=model.refresh_token_name or "refresh_token", 2832 refresh_token=model.refresh_token, 2833 scopes=model.scopes, 2834 token_expiry_date=model.token_expiry_date, 2835 token_expiry_date_format=model.token_expiry_date_format, 2836 token_expiry_is_time_of_expiration=bool(model.token_expiry_date_format), 2837 token_refresh_endpoint=model.token_refresh_endpoint, 2838 config=config, 2839 parameters=model.parameters or {}, 2840 message_repository=self._message_repository, 2841 profile_assertion=profile_assertion, 2842 use_profile_assertion=model.use_profile_assertion, 2843 )
2845 def create_offset_increment( 2846 self, 2847 model: OffsetIncrementModel, 2848 config: Config, 2849 decoder: Decoder, 2850 extractor_model: Optional[Union[CustomRecordExtractorModel, DpathExtractorModel]] = None, 2851 **kwargs: Any, 2852 ) -> OffsetIncrement: 2853 if isinstance(decoder, PaginationDecoderDecorator): 2854 inner_decoder = decoder.decoder 2855 else: 2856 inner_decoder = decoder 2857 decoder = PaginationDecoderDecorator(decoder=decoder) 2858 2859 if self._is_supported_decoder_for_pagination(inner_decoder): 2860 decoder_to_use = decoder 2861 else: 2862 raise ValueError( 2863 self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(inner_decoder)) 2864 ) 2865 2866 # Ideally we would instantiate the runtime extractor from highest most level (in this case the SimpleRetriever) 2867 # so that it can be shared by OffSetIncrement and RecordSelector. However, due to how we instantiate the 2868 # decoder with various decorators here, but not in create_record_selector, it is simpler to retain existing 2869 # behavior by having two separate extractors with identical behavior since they use the same extractor model. 2870 # When we have more time to investigate we can look into reusing the same component. 2871 extractor = ( 2872 self._create_component_from_model( 2873 model=extractor_model, config=config, decoder=decoder_to_use 2874 ) 2875 if extractor_model 2876 else None 2877 ) 2878 2879 return OffsetIncrement( 2880 page_size=model.page_size, 2881 config=config, 2882 decoder=decoder_to_use, 2883 extractor=extractor, 2884 inject_on_first_request=model.inject_on_first_request or False, 2885 parameters=model.parameters or {}, 2886 )
2888 @staticmethod 2889 def create_page_increment( 2890 model: PageIncrementModel, config: Config, **kwargs: Any 2891 ) -> PageIncrement: 2892 return PageIncrement( 2893 page_size=model.page_size, 2894 config=config, 2895 start_from_page=model.start_from_page or 0, 2896 inject_on_first_request=model.inject_on_first_request or False, 2897 parameters=model.parameters or {}, 2898 )
2900 def create_parent_stream_config( 2901 self, model: ParentStreamConfigModel, config: Config, *, stream_name: str, **kwargs: Any 2902 ) -> ParentStreamConfig: 2903 declarative_stream = self._create_component_from_model( 2904 model.stream, 2905 config=config, 2906 is_parent=True, 2907 **kwargs, 2908 ) 2909 request_option = ( 2910 self._create_component_from_model(model.request_option, config=config) 2911 if model.request_option 2912 else None 2913 ) 2914 2915 if model.lazy_read_pointer and any("*" in pointer for pointer in model.lazy_read_pointer): 2916 raise ValueError( 2917 "The '*' wildcard in 'lazy_read_pointer' is not supported — only direct paths are allowed." 2918 ) 2919 2920 model_lazy_read_pointer: List[Union[InterpolatedString, str]] = ( 2921 [x for x in model.lazy_read_pointer] if model.lazy_read_pointer else [] 2922 ) 2923 2924 return ParentStreamConfig( 2925 parent_key=model.parent_key, 2926 request_option=request_option, 2927 stream=declarative_stream, 2928 partition_field=model.partition_field, 2929 config=config, 2930 incremental_dependency=model.incremental_dependency or False, 2931 parameters=model.parameters or {}, 2932 extra_fields=model.extra_fields, 2933 lazy_read_pointer=model_lazy_read_pointer, 2934 )
2936 def create_properties_from_endpoint( 2937 self, model: PropertiesFromEndpointModel, config: Config, **kwargs: Any 2938 ) -> PropertiesFromEndpoint: 2939 retriever = self._create_component_from_model( 2940 model=model.retriever, 2941 config=config, 2942 name="dynamic_properties", 2943 primary_key=None, 2944 stream_slicer=None, 2945 transformations=[], 2946 use_cache=True, # Enable caching on the HttpRequester/HttpClient because the properties endpoint will be called for every slice being processed, and it is highly unlikely for the response to different 2947 ) 2948 return PropertiesFromEndpoint( 2949 property_field_path=model.property_field_path, 2950 retriever=retriever, 2951 config=config, 2952 parameters=model.parameters or {}, 2953 )
2955 def create_property_chunking( 2956 self, model: PropertyChunkingModel, config: Config, **kwargs: Any 2957 ) -> PropertyChunking: 2958 record_merge_strategy = ( 2959 self._create_component_from_model( 2960 model=model.record_merge_strategy, config=config, **kwargs 2961 ) 2962 if model.record_merge_strategy 2963 else None 2964 ) 2965 2966 property_limit_type: PropertyLimitType 2967 match model.property_limit_type: 2968 case PropertyLimitTypeModel.property_count: 2969 property_limit_type = PropertyLimitType.property_count 2970 case PropertyLimitTypeModel.characters: 2971 property_limit_type = PropertyLimitType.characters 2972 case _: 2973 raise ValueError(f"Invalid PropertyLimitType {property_limit_type}") 2974 2975 return PropertyChunking( 2976 property_limit_type=property_limit_type, 2977 property_limit=model.property_limit, 2978 record_merge_strategy=record_merge_strategy, 2979 config=config, 2980 parameters=model.parameters or {}, 2981 )
2983 def create_query_properties( 2984 self, model: QueryPropertiesModel, config: Config, **kwargs: Any 2985 ) -> QueryProperties: 2986 if isinstance(model.property_list, list): 2987 property_list = model.property_list 2988 else: 2989 property_list = self._create_component_from_model( 2990 model=model.property_list, config=config, **kwargs 2991 ) 2992 2993 property_chunking = ( 2994 self._create_component_from_model( 2995 model=model.property_chunking, config=config, **kwargs 2996 ) 2997 if model.property_chunking 2998 else None 2999 ) 3000 3001 return QueryProperties( 3002 property_list=property_list, 3003 always_include_properties=model.always_include_properties, 3004 property_chunking=property_chunking, 3005 config=config, 3006 parameters=model.parameters or {}, 3007 )
3021 @staticmethod 3022 def create_request_option( 3023 model: RequestOptionModel, config: Config, **kwargs: Any 3024 ) -> RequestOption: 3025 inject_into = RequestOptionType(model.inject_into.value) 3026 field_path: Optional[List[Union[InterpolatedString, str]]] = ( 3027 [ 3028 InterpolatedString.create(segment, parameters=kwargs.get("parameters", {})) 3029 for segment in model.field_path 3030 ] 3031 if model.field_path 3032 else None 3033 ) 3034 field_name = ( 3035 InterpolatedString.create(model.field_name, parameters=kwargs.get("parameters", {})) 3036 if model.field_name 3037 else None 3038 ) 3039 return RequestOption( 3040 field_name=field_name, 3041 field_path=field_path, 3042 inject_into=inject_into, 3043 parameters=kwargs.get("parameters", {}), 3044 )
3046 def create_record_selector( 3047 self, 3048 model: RecordSelectorModel, 3049 config: Config, 3050 *, 3051 name: str, 3052 transformations: List[RecordTransformation] | None = None, 3053 decoder: Decoder | None = None, 3054 client_side_incremental_sync: Dict[str, Any] | None = None, 3055 file_uploader: Optional[DefaultFileUploader] = None, 3056 **kwargs: Any, 3057 ) -> RecordSelector: 3058 extractor = self._create_component_from_model( 3059 model=model.extractor, decoder=decoder, config=config 3060 ) 3061 record_filter = ( 3062 self._create_component_from_model(model.record_filter, config=config) 3063 if model.record_filter 3064 else None 3065 ) 3066 3067 transform_before_filtering = ( 3068 False if model.transform_before_filtering is None else model.transform_before_filtering 3069 ) 3070 if client_side_incremental_sync: 3071 record_filter = ClientSideIncrementalRecordFilterDecorator( 3072 config=config, 3073 parameters=model.parameters, 3074 condition=model.record_filter.condition 3075 if (model.record_filter and hasattr(model.record_filter, "condition")) 3076 else None, 3077 **client_side_incremental_sync, 3078 ) 3079 transform_before_filtering = ( 3080 True 3081 if model.transform_before_filtering is None 3082 else model.transform_before_filtering 3083 ) 3084 3085 if model.schema_normalization is None: 3086 # default to no schema normalization if not set 3087 model.schema_normalization = SchemaNormalizationModel.None_ 3088 3089 schema_normalization = ( 3090 TypeTransformer(SCHEMA_TRANSFORMER_TYPE_MAPPING[model.schema_normalization]) 3091 if isinstance(model.schema_normalization, SchemaNormalizationModel) 3092 else self._create_component_from_model(model.schema_normalization, config=config) # type: ignore[arg-type] # custom normalization model expected here 3093 ) 3094 3095 return RecordSelector( 3096 extractor=extractor, 3097 name=name, 3098 config=config, 3099 record_filter=record_filter, 3100 transformations=transformations or [], 3101 file_uploader=file_uploader, 3102 schema_normalization=schema_normalization, 3103 parameters=model.parameters or {}, 3104 transform_before_filtering=transform_before_filtering, 3105 )
3115 def create_selective_authenticator( 3116 self, model: SelectiveAuthenticatorModel, config: Config, **kwargs: Any 3117 ) -> DeclarativeAuthenticator: 3118 authenticators = { 3119 name: self._create_component_from_model(model=auth, config=config) 3120 for name, auth in model.authenticators.items() 3121 } 3122 # SelectiveAuthenticator will return instance of DeclarativeAuthenticator or raise ValueError error 3123 return SelectiveAuthenticator( # type: ignore[abstract] 3124 config=config, 3125 authenticators=authenticators, 3126 authenticator_selection_path=model.authenticator_selection_path, 3127 **kwargs, 3128 )
3130 @staticmethod 3131 def create_legacy_session_token_authenticator( 3132 model: LegacySessionTokenAuthenticatorModel, config: Config, *, url_base: str, **kwargs: Any 3133 ) -> LegacySessionTokenAuthenticator: 3134 return LegacySessionTokenAuthenticator( 3135 api_url=url_base, 3136 header=model.header, 3137 login_url=model.login_url, 3138 password=model.password or "", 3139 session_token=model.session_token or "", 3140 session_token_response_key=model.session_token_response_key or "", 3141 username=model.username or "", 3142 validate_session_url=model.validate_session_url, 3143 config=config, 3144 parameters=model.parameters or {}, 3145 )
3147 def create_simple_retriever( 3148 self, 3149 model: SimpleRetrieverModel, 3150 config: Config, 3151 *, 3152 name: str, 3153 primary_key: Optional[Union[str, List[str], List[List[str]]]], 3154 request_options_provider: Optional[RequestOptionsProvider] = None, 3155 stop_condition_cursor: Optional[Cursor] = None, 3156 client_side_incremental_sync: Optional[Dict[str, Any]] = None, 3157 transformations: List[RecordTransformation], 3158 file_uploader: Optional[DefaultFileUploader] = None, 3159 incremental_sync: Optional[ 3160 Union[IncrementingCountCursorModel, DatetimeBasedCursorModel] 3161 ] = None, 3162 use_cache: Optional[bool] = None, 3163 log_formatter: Optional[Callable[[Response], Any]] = None, 3164 partition_router: Optional[PartitionRouter] = None, 3165 **kwargs: Any, 3166 ) -> SimpleRetriever: 3167 def _get_url(req: Requester) -> str: 3168 """ 3169 Closure to get the URL from the requester. This is used to get the URL in the case of a lazy retriever. 3170 This is needed because the URL is not set until the requester is created. 3171 """ 3172 3173 _url: str = ( 3174 model.requester.url 3175 if hasattr(model.requester, "url") and model.requester.url is not None 3176 else req.get_url(stream_state=None, stream_slice=None, next_page_token=None) 3177 ) 3178 _url_base: str = ( 3179 model.requester.url_base 3180 if hasattr(model.requester, "url_base") and model.requester.url_base is not None 3181 else req.get_url_base(stream_state=None, stream_slice=None, next_page_token=None) 3182 ) 3183 3184 return _url or _url_base 3185 3186 decoder = ( 3187 self._create_component_from_model(model=model.decoder, config=config) 3188 if model.decoder 3189 else JsonDecoder(parameters={}) 3190 ) 3191 record_selector = self._create_component_from_model( 3192 model=model.record_selector, 3193 name=name, 3194 config=config, 3195 decoder=decoder, 3196 transformations=transformations, 3197 client_side_incremental_sync=client_side_incremental_sync, 3198 file_uploader=file_uploader, 3199 ) 3200 3201 query_properties: Optional[QueryProperties] = None 3202 query_properties_key: Optional[str] = None 3203 if self._query_properties_in_request_parameters(model.requester): 3204 # It is better to be explicit about an error if PropertiesFromEndpoint is defined in multiple 3205 # places instead of default to request_parameters which isn't clearly documented 3206 if ( 3207 hasattr(model.requester, "fetch_properties_from_endpoint") 3208 and model.requester.fetch_properties_from_endpoint 3209 ): 3210 raise ValueError( 3211 f"PropertiesFromEndpoint should only be specified once per stream, but found in {model.requester.type}.fetch_properties_from_endpoint and {model.requester.type}.request_parameters" 3212 ) 3213 3214 query_properties_definitions = [] 3215 for key, request_parameter in model.requester.request_parameters.items(): # type: ignore # request_parameters is already validated to be a Mapping using _query_properties_in_request_parameters() 3216 if isinstance(request_parameter, QueryPropertiesModel): 3217 query_properties_key = key 3218 query_properties_definitions.append(request_parameter) 3219 3220 if len(query_properties_definitions) > 1: 3221 raise ValueError( 3222 f"request_parameters only supports defining one QueryProperties field, but found {len(query_properties_definitions)} usages" 3223 ) 3224 3225 if len(query_properties_definitions) == 1: 3226 query_properties = self._create_component_from_model( 3227 model=query_properties_definitions[0], config=config 3228 ) 3229 elif ( 3230 hasattr(model.requester, "fetch_properties_from_endpoint") 3231 and model.requester.fetch_properties_from_endpoint 3232 ): 3233 # todo: Deprecate this condition once dependent connectors migrate to query_properties 3234 query_properties_definition = QueryPropertiesModel( 3235 type="QueryProperties", 3236 property_list=model.requester.fetch_properties_from_endpoint, 3237 always_include_properties=None, 3238 property_chunking=None, 3239 ) # type: ignore # $parameters has a default value 3240 3241 query_properties = self.create_query_properties( 3242 model=query_properties_definition, 3243 config=config, 3244 ) 3245 elif hasattr(model.requester, "query_properties") and model.requester.query_properties: 3246 query_properties = self.create_query_properties( 3247 model=model.requester.query_properties, 3248 config=config, 3249 ) 3250 3251 requester = self._create_component_from_model( 3252 model=model.requester, 3253 decoder=decoder, 3254 name=name, 3255 query_properties_key=query_properties_key, 3256 use_cache=use_cache, 3257 config=config, 3258 ) 3259 3260 if not request_options_provider: 3261 request_options_provider = DefaultRequestOptionsProvider(parameters={}) 3262 if isinstance(request_options_provider, DefaultRequestOptionsProvider) and isinstance( 3263 partition_router, PartitionRouter 3264 ): 3265 request_options_provider = partition_router 3266 3267 paginator = ( 3268 self._create_component_from_model( 3269 model=model.paginator, 3270 config=config, 3271 url_base=_get_url(requester), 3272 extractor_model=model.record_selector.extractor, 3273 decoder=decoder, 3274 cursor_used_for_stop_condition=stop_condition_cursor or None, 3275 ) 3276 if model.paginator 3277 else NoPagination(parameters={}) 3278 ) 3279 3280 ignore_stream_slicer_parameters_on_paginated_requests = ( 3281 model.ignore_stream_slicer_parameters_on_paginated_requests or False 3282 ) 3283 3284 if ( 3285 model.partition_router 3286 and isinstance(model.partition_router, SubstreamPartitionRouterModel) 3287 and not bool(self._connector_state_manager.get_stream_state(name, None)) 3288 and any( 3289 parent_stream_config.lazy_read_pointer 3290 for parent_stream_config in model.partition_router.parent_stream_configs 3291 ) 3292 ): 3293 if incremental_sync: 3294 if incremental_sync.type != "DatetimeBasedCursor": 3295 raise ValueError( 3296 f"LazySimpleRetriever only supports DatetimeBasedCursor. Found: {incremental_sync.type}." 3297 ) 3298 3299 elif incremental_sync.step or incremental_sync.cursor_granularity: 3300 raise ValueError( 3301 f"Found more that one slice per parent. LazySimpleRetriever only supports single slice read for stream - {name}." 3302 ) 3303 3304 if model.decoder and model.decoder.type != "JsonDecoder": 3305 raise ValueError( 3306 f"LazySimpleRetriever only supports JsonDecoder. Found: {model.decoder.type}." 3307 ) 3308 3309 return LazySimpleRetriever( 3310 name=name, 3311 paginator=paginator, 3312 primary_key=primary_key, 3313 requester=requester, 3314 record_selector=record_selector, 3315 stream_slicer=_NO_STREAM_SLICING, 3316 request_option_provider=request_options_provider, 3317 cursor=None, 3318 config=config, 3319 ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests, 3320 parameters=model.parameters or {}, 3321 ) 3322 3323 return SimpleRetriever( 3324 name=name, 3325 paginator=paginator, 3326 primary_key=primary_key, 3327 requester=requester, 3328 record_selector=record_selector, 3329 stream_slicer=_NO_STREAM_SLICING, 3330 request_option_provider=request_options_provider, 3331 cursor=None, 3332 config=config, 3333 ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests, 3334 additional_query_properties=query_properties, 3335 log_formatter=self._get_log_formatter(log_formatter, name), 3336 parameters=model.parameters or {}, 3337 )
3387 def create_state_delegating_stream( 3388 self, 3389 model: StateDelegatingStreamModel, 3390 config: Config, 3391 has_parent_state: Optional[bool] = None, 3392 **kwargs: Any, 3393 ) -> DeclarativeStream: 3394 if ( 3395 model.full_refresh_stream.name != model.name 3396 or model.name != model.incremental_stream.name 3397 ): 3398 raise ValueError( 3399 f"state_delegating_stream, full_refresh_stream name and incremental_stream must have equal names. Instead has {model.name}, {model.full_refresh_stream.name} and {model.incremental_stream.name}." 3400 ) 3401 3402 stream_model = self._get_state_delegating_stream_model( 3403 False if has_parent_state is None else has_parent_state, model 3404 ) 3405 3406 return self._create_component_from_model(stream_model, config=config, **kwargs) # type: ignore[no-any-return] # DeclarativeStream will be created as stream_model is alwyas DeclarativeStreamModel
3447 def create_async_retriever( 3448 self, 3449 model: AsyncRetrieverModel, 3450 config: Config, 3451 *, 3452 name: str, 3453 primary_key: Optional[ 3454 Union[str, List[str], List[List[str]]] 3455 ], # this seems to be needed to match create_simple_retriever 3456 stream_slicer: Optional[StreamSlicer], 3457 client_side_incremental_sync: Optional[Dict[str, Any]] = None, 3458 transformations: List[RecordTransformation], 3459 **kwargs: Any, 3460 ) -> AsyncRetriever: 3461 if model.download_target_requester and not model.download_target_extractor: 3462 raise ValueError( 3463 f"`download_target_extractor` required if using a `download_target_requester`" 3464 ) 3465 3466 def _get_download_retriever( 3467 requester: Requester, extractor: RecordExtractor, _decoder: Decoder 3468 ) -> SimpleRetriever: 3469 # We create a record selector for the download retriever 3470 # with no schema normalization and no transformations, neither record filter 3471 # as all this occurs in the record_selector of the AsyncRetriever 3472 record_selector = RecordSelector( 3473 extractor=extractor, 3474 name=name, 3475 record_filter=None, 3476 transformations=[], 3477 schema_normalization=TypeTransformer(TransformConfig.NoTransform), 3478 config=config, 3479 parameters={}, 3480 ) 3481 paginator = ( 3482 self._create_component_from_model( 3483 model=model.download_paginator, 3484 decoder=_decoder, 3485 config=config, 3486 url_base="", 3487 ) 3488 if model.download_paginator 3489 else NoPagination(parameters={}) 3490 ) 3491 3492 return SimpleRetriever( 3493 requester=requester, 3494 record_selector=record_selector, 3495 primary_key=None, 3496 name=name, 3497 paginator=paginator, 3498 config=config, 3499 parameters={}, 3500 log_formatter=self._get_log_formatter(None, name), 3501 ) 3502 3503 def _get_job_timeout() -> datetime.timedelta: 3504 user_defined_timeout: Optional[int] = ( 3505 int( 3506 InterpolatedString.create( 3507 str(model.polling_job_timeout), 3508 parameters={}, 3509 ).eval(config) 3510 ) 3511 if model.polling_job_timeout 3512 else None 3513 ) 3514 3515 # check for user defined timeout during the test read or 15 minutes 3516 test_read_timeout = datetime.timedelta(minutes=user_defined_timeout or 15) 3517 # default value for non-connector builder is 60 minutes. 3518 default_sync_timeout = datetime.timedelta(minutes=user_defined_timeout or 60) 3519 3520 return ( 3521 test_read_timeout if self._emit_connector_builder_messages else default_sync_timeout 3522 ) 3523 3524 decoder = ( 3525 self._create_component_from_model(model=model.decoder, config=config) 3526 if model.decoder 3527 else JsonDecoder(parameters={}) 3528 ) 3529 record_selector = self._create_component_from_model( 3530 model=model.record_selector, 3531 config=config, 3532 decoder=decoder, 3533 name=name, 3534 transformations=transformations, 3535 client_side_incremental_sync=client_side_incremental_sync, 3536 ) 3537 3538 stream_slicer = stream_slicer or SinglePartitionRouter(parameters={}) 3539 if self._should_limit_slices_fetched(): 3540 stream_slicer = cast( 3541 StreamSlicer, 3542 StreamSlicerTestReadDecorator( 3543 wrapped_slicer=stream_slicer, 3544 maximum_number_of_slices=self._limit_slices_fetched or 5, 3545 ), 3546 ) 3547 3548 creation_requester = self._create_component_from_model( 3549 model=model.creation_requester, 3550 decoder=decoder, 3551 config=config, 3552 name=f"job creation - {name}", 3553 ) 3554 polling_requester = self._create_component_from_model( 3555 model=model.polling_requester, 3556 decoder=decoder, 3557 config=config, 3558 name=f"job polling - {name}", 3559 ) 3560 job_download_components_name = f"job download - {name}" 3561 download_decoder = ( 3562 self._create_component_from_model(model=model.download_decoder, config=config) 3563 if model.download_decoder 3564 else JsonDecoder(parameters={}) 3565 ) 3566 download_extractor = ( 3567 self._create_component_from_model( 3568 model=model.download_extractor, 3569 config=config, 3570 decoder=download_decoder, 3571 parameters=model.parameters, 3572 ) 3573 if model.download_extractor 3574 else DpathExtractor( 3575 [], 3576 config=config, 3577 decoder=download_decoder, 3578 parameters=model.parameters or {}, 3579 ) 3580 ) 3581 download_requester = self._create_component_from_model( 3582 model=model.download_requester, 3583 decoder=download_decoder, 3584 config=config, 3585 name=job_download_components_name, 3586 ) 3587 download_retriever = _get_download_retriever( 3588 download_requester, download_extractor, download_decoder 3589 ) 3590 abort_requester = ( 3591 self._create_component_from_model( 3592 model=model.abort_requester, 3593 decoder=decoder, 3594 config=config, 3595 name=f"job abort - {name}", 3596 ) 3597 if model.abort_requester 3598 else None 3599 ) 3600 delete_requester = ( 3601 self._create_component_from_model( 3602 model=model.delete_requester, 3603 decoder=decoder, 3604 config=config, 3605 name=f"job delete - {name}", 3606 ) 3607 if model.delete_requester 3608 else None 3609 ) 3610 download_target_requester = ( 3611 self._create_component_from_model( 3612 model=model.download_target_requester, 3613 decoder=decoder, 3614 config=config, 3615 name=f"job extract_url - {name}", 3616 ) 3617 if model.download_target_requester 3618 else None 3619 ) 3620 status_extractor = self._create_component_from_model( 3621 model=model.status_extractor, decoder=decoder, config=config, name=name 3622 ) 3623 download_target_extractor = ( 3624 self._create_component_from_model( 3625 model=model.download_target_extractor, 3626 decoder=decoder, 3627 config=config, 3628 name=name, 3629 ) 3630 if model.download_target_extractor 3631 else None 3632 ) 3633 3634 job_repository: AsyncJobRepository = AsyncHttpJobRepository( 3635 creation_requester=creation_requester, 3636 polling_requester=polling_requester, 3637 download_retriever=download_retriever, 3638 download_target_requester=download_target_requester, 3639 abort_requester=abort_requester, 3640 delete_requester=delete_requester, 3641 status_extractor=status_extractor, 3642 status_mapping=self._create_async_job_status_mapping(model.status_mapping, config), 3643 download_target_extractor=download_target_extractor, 3644 job_timeout=_get_job_timeout(), 3645 ) 3646 3647 async_job_partition_router = AsyncJobPartitionRouter( 3648 job_orchestrator_factory=lambda stream_slices: AsyncJobOrchestrator( 3649 job_repository, 3650 stream_slices, 3651 self._job_tracker, 3652 self._message_repository, 3653 # FIXME work would need to be done here in order to detect if a stream as a parent stream that is bulk 3654 has_bulk_parent=False, 3655 # set the `job_max_retry` to 1 for the `Connector Builder`` use-case. 3656 # `None` == default retry is set to 3 attempts, under the hood. 3657 job_max_retry=1 if self._emit_connector_builder_messages else None, 3658 ), 3659 stream_slicer=stream_slicer, 3660 config=config, 3661 parameters=model.parameters or {}, 3662 ) 3663 3664 return AsyncRetriever( 3665 record_selector=record_selector, 3666 stream_slicer=async_job_partition_router, 3667 config=config, 3668 parameters=model.parameters or {}, 3669 )
3671 def create_spec(self, model: SpecModel, config: Config, **kwargs: Any) -> Spec: 3672 config_migrations = [ 3673 self._create_component_from_model(migration, config) 3674 for migration in ( 3675 model.config_normalization_rules.config_migrations 3676 if ( 3677 model.config_normalization_rules 3678 and model.config_normalization_rules.config_migrations 3679 ) 3680 else [] 3681 ) 3682 ] 3683 config_transformations = [ 3684 self._create_component_from_model(transformation, config) 3685 for transformation in ( 3686 model.config_normalization_rules.transformations 3687 if ( 3688 model.config_normalization_rules 3689 and model.config_normalization_rules.transformations 3690 ) 3691 else [] 3692 ) 3693 ] 3694 config_validations = [ 3695 self._create_component_from_model(validation, config) 3696 for validation in ( 3697 model.config_normalization_rules.validations 3698 if ( 3699 model.config_normalization_rules 3700 and model.config_normalization_rules.validations 3701 ) 3702 else [] 3703 ) 3704 ] 3705 3706 return Spec( 3707 connection_specification=model.connection_specification, 3708 documentation_url=model.documentation_url, 3709 advanced_auth=model.advanced_auth, 3710 parameters={}, 3711 config_migrations=config_migrations, 3712 config_transformations=config_transformations, 3713 config_validations=config_validations, 3714 )
3716 def create_substream_partition_router( 3717 self, 3718 model: SubstreamPartitionRouterModel, 3719 config: Config, 3720 *, 3721 stream_name: str, 3722 **kwargs: Any, 3723 ) -> SubstreamPartitionRouter: 3724 parent_stream_configs = [] 3725 if model.parent_stream_configs: 3726 parent_stream_configs.extend( 3727 [ 3728 self.create_parent_stream_config_with_substream_wrapper( 3729 model=parent_stream_config, config=config, stream_name=stream_name, **kwargs 3730 ) 3731 for parent_stream_config in model.parent_stream_configs 3732 ] 3733 ) 3734 3735 return SubstreamPartitionRouter( 3736 parent_stream_configs=parent_stream_configs, 3737 parameters=model.parameters or {}, 3738 config=config, 3739 )
3741 def create_parent_stream_config_with_substream_wrapper( 3742 self, model: ParentStreamConfigModel, config: Config, *, stream_name: str, **kwargs: Any 3743 ) -> Any: 3744 # getting the parent state 3745 child_state = self._connector_state_manager.get_stream_state(stream_name, None) 3746 3747 # This flag will be used exclusively for StateDelegatingStream when a parent stream is created 3748 has_parent_state = bool( 3749 self._connector_state_manager.get_stream_state(stream_name, None) 3750 if model.incremental_dependency 3751 else False 3752 ) 3753 connector_state_manager = self._instantiate_parent_stream_state_manager( 3754 child_state, config, model, has_parent_state 3755 ) 3756 3757 substream_factory = ModelToComponentFactory( 3758 connector_state_manager=connector_state_manager, 3759 limit_pages_fetched_per_slice=self._limit_pages_fetched_per_slice, 3760 limit_slices_fetched=self._limit_slices_fetched, 3761 emit_connector_builder_messages=self._emit_connector_builder_messages, 3762 disable_retries=self._disable_retries, 3763 disable_cache=self._disable_cache, 3764 message_repository=StateFilteringMessageRepository( 3765 LogAppenderMessageRepositoryDecorator( 3766 { 3767 "airbyte_cdk": {"stream": {"is_substream": True}}, 3768 "http": {"is_auxiliary": True}, 3769 }, 3770 self._message_repository, 3771 self._evaluate_log_level(self._emit_connector_builder_messages), 3772 ), 3773 ), 3774 ) 3775 3776 return substream_factory.create_parent_stream_config( 3777 model=model, config=config, stream_name=stream_name, **kwargs 3778 )
3839 @staticmethod 3840 def create_wait_time_from_header( 3841 model: WaitTimeFromHeaderModel, config: Config, **kwargs: Any 3842 ) -> WaitTimeFromHeaderBackoffStrategy: 3843 return WaitTimeFromHeaderBackoffStrategy( 3844 header=model.header, 3845 parameters=model.parameters or {}, 3846 config=config, 3847 regex=model.regex, 3848 max_waiting_time_in_seconds=model.max_waiting_time_in_seconds 3849 if model.max_waiting_time_in_seconds is not None 3850 else None, 3851 )
3853 @staticmethod 3854 def create_wait_until_time_from_header( 3855 model: WaitUntilTimeFromHeaderModel, config: Config, **kwargs: Any 3856 ) -> WaitUntilTimeFromHeaderBackoffStrategy: 3857 return WaitUntilTimeFromHeaderBackoffStrategy( 3858 header=model.header, 3859 parameters=model.parameters or {}, 3860 config=config, 3861 min_wait=model.min_wait, 3862 regex=model.regex, 3863 )
3871 @staticmethod 3872 def create_components_mapping_definition( 3873 model: ComponentMappingDefinitionModel, config: Config, **kwargs: Any 3874 ) -> ComponentMappingDefinition: 3875 interpolated_value = InterpolatedString.create( 3876 model.value, parameters=model.parameters or {} 3877 ) 3878 field_path = [ 3879 InterpolatedString.create(path, parameters=model.parameters or {}) 3880 for path in model.field_path 3881 ] 3882 return ComponentMappingDefinition( 3883 field_path=field_path, # type: ignore[arg-type] # field_path can be str and InterpolatedString 3884 value=interpolated_value, 3885 value_type=ModelToComponentFactory._json_schema_type_name_to_type(model.value_type), 3886 create_or_update=model.create_or_update, 3887 condition=model.condition, 3888 parameters=model.parameters or {}, 3889 )
3891 def create_http_components_resolver( 3892 self, model: HttpComponentsResolverModel, config: Config, stream_name: Optional[str] = None 3893 ) -> Any: 3894 retriever = self._create_component_from_model( 3895 model=model.retriever, 3896 config=config, 3897 name=f"{stream_name if stream_name else '__http_components_resolver'}", 3898 primary_key=None, 3899 stream_slicer=self._build_stream_slicer_from_partition_router(model.retriever, config), 3900 transformations=[], 3901 ) 3902 3903 components_mapping = [] 3904 for component_mapping_definition_model in model.components_mapping: 3905 if component_mapping_definition_model.condition: 3906 raise ValueError("`condition` is only supported for `ConfigComponentsResolver`") 3907 components_mapping.append( 3908 self._create_component_from_model( 3909 model=component_mapping_definition_model, 3910 value_type=ModelToComponentFactory._json_schema_type_name_to_type( 3911 component_mapping_definition_model.value_type 3912 ), 3913 config=config, 3914 ) 3915 ) 3916 3917 return HttpComponentsResolver( 3918 retriever=retriever, 3919 stream_slicer=self._build_stream_slicer_from_partition_router(model.retriever, config), 3920 config=config, 3921 components_mapping=components_mapping, 3922 parameters=model.parameters or {}, 3923 )
3925 @staticmethod 3926 def create_stream_config( 3927 model: StreamConfigModel, config: Config, **kwargs: Any 3928 ) -> StreamConfig: 3929 model_configs_pointer: List[Union[InterpolatedString, str]] = ( 3930 [x for x in model.configs_pointer] if model.configs_pointer else [] 3931 ) 3932 3933 return StreamConfig( 3934 configs_pointer=model_configs_pointer, 3935 default_values=model.default_values, 3936 parameters=model.parameters or {}, 3937 )
3939 def create_config_components_resolver( 3940 self, 3941 model: ConfigComponentsResolverModel, 3942 config: Config, 3943 ) -> Any: 3944 model_stream_configs = ( 3945 model.stream_config if isinstance(model.stream_config, list) else [model.stream_config] 3946 ) 3947 3948 stream_configs = [ 3949 self._create_component_from_model( 3950 stream_config, config=config, parameters=model.parameters or {} 3951 ) 3952 for stream_config in model_stream_configs 3953 ] 3954 3955 components_mapping = [ 3956 self._create_component_from_model( 3957 model=components_mapping_definition_model, 3958 value_type=ModelToComponentFactory._json_schema_type_name_to_type( 3959 components_mapping_definition_model.value_type 3960 ), 3961 config=config, 3962 parameters=model.parameters, 3963 ) 3964 for components_mapping_definition_model in model.components_mapping 3965 ] 3966 3967 return ConfigComponentsResolver( 3968 stream_configs=stream_configs, 3969 config=config, 3970 components_mapping=components_mapping, 3971 parameters=model.parameters or {}, 3972 )
3974 def create_parametrized_components_resolver( 3975 self, 3976 model: ParametrizedComponentsResolverModel, 3977 config: Config, 3978 ) -> ParametrizedComponentsResolver: 3979 stream_parameters = StreamParametersDefinition( 3980 list_of_parameters_for_stream=model.stream_parameters.list_of_parameters_for_stream 3981 ) 3982 3983 components_mapping = [] 3984 for components_mapping_definition_model in model.components_mapping: 3985 if components_mapping_definition_model.condition: 3986 raise ValueError("`condition` is only supported for `ConfigComponentsResolver`") 3987 components_mapping.append( 3988 self._create_component_from_model( 3989 model=components_mapping_definition_model, 3990 value_type=ModelToComponentFactory._json_schema_type_name_to_type( 3991 components_mapping_definition_model.value_type 3992 ), 3993 config=config, 3994 ) 3995 ) 3996 return ParametrizedComponentsResolver( 3997 stream_parameters=stream_parameters, 3998 config=config, 3999 components_mapping=components_mapping, 4000 parameters=model.parameters or {}, 4001 )
4025 def create_http_api_budget( 4026 self, model: HTTPAPIBudgetModel, config: Config, **kwargs: Any 4027 ) -> HttpAPIBudget: 4028 policies = [ 4029 self._create_component_from_model(model=policy, config=config) 4030 for policy in model.policies 4031 ] 4032 4033 return HttpAPIBudget( 4034 policies=policies, 4035 ratelimit_reset_header=model.ratelimit_reset_header or "ratelimit-reset", 4036 ratelimit_remaining_header=model.ratelimit_remaining_header or "ratelimit-remaining", 4037 status_codes_for_ratelimit_hit=model.status_codes_for_ratelimit_hit or [429], 4038 )
4040 def create_fixed_window_call_rate_policy( 4041 self, model: FixedWindowCallRatePolicyModel, config: Config, **kwargs: Any 4042 ) -> FixedWindowCallRatePolicy: 4043 matchers = [ 4044 self._create_component_from_model(model=matcher, config=config) 4045 for matcher in model.matchers 4046 ] 4047 4048 # Set the initial reset timestamp to 10 days from now. 4049 # This value will be updated by the first request. 4050 return FixedWindowCallRatePolicy( 4051 next_reset_ts=datetime.datetime.now() + datetime.timedelta(days=10), 4052 period=parse_duration(model.period), 4053 call_limit=model.call_limit, 4054 matchers=matchers, 4055 )
4057 def create_file_uploader( 4058 self, model: FileUploaderModel, config: Config, **kwargs: Any 4059 ) -> FileUploader: 4060 name = "File Uploader" 4061 requester = self._create_component_from_model( 4062 model=model.requester, 4063 config=config, 4064 name=name, 4065 **kwargs, 4066 ) 4067 download_target_extractor = self._create_component_from_model( 4068 model=model.download_target_extractor, 4069 config=config, 4070 name=name, 4071 **kwargs, 4072 ) 4073 emit_connector_builder_messages = self._emit_connector_builder_messages 4074 file_uploader = DefaultFileUploader( 4075 requester=requester, 4076 download_target_extractor=download_target_extractor, 4077 config=config, 4078 file_writer=NoopFileWriter() 4079 if emit_connector_builder_messages 4080 else LocalFileSystemFileWriter(), 4081 parameters=model.parameters or {}, 4082 filename_extractor=model.filename_extractor if model.filename_extractor else None, 4083 ) 4084 4085 return ( 4086 ConnectorBuilderFileUploader(file_uploader) 4087 if emit_connector_builder_messages 4088 else file_uploader 4089 )
4091 def create_moving_window_call_rate_policy( 4092 self, model: MovingWindowCallRatePolicyModel, config: Config, **kwargs: Any 4093 ) -> MovingWindowCallRatePolicy: 4094 rates = [ 4095 self._create_component_from_model(model=rate, config=config) for rate in model.rates 4096 ] 4097 matchers = [ 4098 self._create_component_from_model(model=matcher, config=config) 4099 for matcher in model.matchers 4100 ] 4101 return MovingWindowCallRatePolicy( 4102 rates=rates, 4103 matchers=matchers, 4104 )
4106 def create_unlimited_call_rate_policy( 4107 self, model: UnlimitedCallRatePolicyModel, config: Config, **kwargs: Any 4108 ) -> UnlimitedCallRatePolicy: 4109 matchers = [ 4110 self._create_component_from_model(model=matcher, config=config) 4111 for matcher in model.matchers 4112 ] 4113 4114 return UnlimitedCallRatePolicy( 4115 matchers=matchers, 4116 )
4125 def create_http_request_matcher( 4126 self, model: HttpRequestRegexMatcherModel, config: Config, **kwargs: Any 4127 ) -> HttpRequestRegexMatcher: 4128 return HttpRequestRegexMatcher( 4129 method=model.method, 4130 url_base=model.url_base, 4131 url_path_pattern=model.url_path_pattern, 4132 params=model.params, 4133 headers=model.headers, 4134 )
4141 def create_grouping_partition_router( 4142 self, 4143 model: GroupingPartitionRouterModel, 4144 config: Config, 4145 *, 4146 stream_name: str, 4147 **kwargs: Any, 4148 ) -> GroupingPartitionRouter: 4149 underlying_router = self._create_component_from_model( 4150 model=model.underlying_partition_router, 4151 config=config, 4152 stream_name=stream_name, 4153 **kwargs, 4154 ) 4155 if model.group_size < 1: 4156 raise ValueError(f"Group size must be greater than 0, got {model.group_size}") 4157 4158 # Request options in underlying partition routers are not supported for GroupingPartitionRouter 4159 # because they are specific to individual partitions and cannot be aggregated or handled 4160 # when grouping, potentially leading to incorrect API calls. Any request customization 4161 # should be managed at the stream level through the requester's configuration. 4162 if isinstance(underlying_router, SubstreamPartitionRouter): 4163 if any( 4164 parent_config.request_option 4165 for parent_config in underlying_router.parent_stream_configs 4166 ): 4167 raise ValueError("Request options are not supported for GroupingPartitionRouter.") 4168 4169 if isinstance(underlying_router, ListPartitionRouter): 4170 if underlying_router.request_option: 4171 raise ValueError("Request options are not supported for GroupingPartitionRouter.") 4172 4173 return GroupingPartitionRouter( 4174 group_size=model.group_size, 4175 underlying_partition_router=underlying_router, 4176 deduplicate=model.deduplicate if model.deduplicate is not None else True, 4177 config=config, 4178 )