airbyte_cdk.sources.declarative.datetime.datetime_parser
1# 2# Copyright (c) 2023 Airbyte, Inc., all rights reserved. 3# 4 5import datetime 6from typing import Union 7 8 9class DatetimeParser: 10 """ 11 Parses and formats datetime objects according to a specified format. 12 13 This class mainly acts as a wrapper to properly handling timestamp formatting through the "%s" directive. 14 15 %s is part of the list of format codes required by the 1989 C standard, but it is unreliable because it always return a datetime in the system's timezone. 16 Instead of using the directive directly, we can use datetime.fromtimestamp and dt.timestamp() 17 """ 18 19 _UNIX_EPOCH = datetime.datetime(1970, 1, 1, tzinfo=datetime.timezone.utc) 20 21 def parse(self, date: Union[str, int], format: str) -> datetime.datetime: 22 # "%s" is a valid (but unreliable) directive for formatting, but not for parsing 23 # It is defined as 24 # The number of seconds since the Epoch, 1970-01-01 00:00:00+0000 (UTC). https://man7.org/linux/man-pages/man3/strptime.3.html 25 # 26 # The recommended way to parse a date from its timestamp representation is to use datetime.fromtimestamp 27 # See https://stackoverflow.com/a/4974930 28 if format == "%s": 29 return datetime.datetime.fromtimestamp(int(date), tz=datetime.timezone.utc) 30 elif format == "%s_as_float": 31 return datetime.datetime.fromtimestamp(float(date), tz=datetime.timezone.utc) 32 elif format == "%epoch_microseconds": 33 return self._UNIX_EPOCH + datetime.timedelta(microseconds=int(date)) 34 elif format == "%ms": 35 return self._UNIX_EPOCH + datetime.timedelta(milliseconds=int(date)) 36 elif "%_ms" in format: 37 format = format.replace("%_ms", "%f") 38 parsed_datetime = datetime.datetime.strptime(str(date), format) 39 if self._is_naive(parsed_datetime): 40 return parsed_datetime.replace(tzinfo=datetime.timezone.utc) 41 return parsed_datetime 42 43 def format(self, dt: datetime.datetime, format: str) -> str: 44 # strftime("%s") is unreliable because it ignores the time zone information and assumes the time zone of the system it's running on 45 # It's safer to use the timestamp() method than the %s directive 46 # See https://stackoverflow.com/a/4974930 47 if format == "%s": 48 return str(int(dt.timestamp())) 49 if format == "%s_as_float": 50 return str(float(dt.timestamp())) 51 if format == "%epoch_microseconds": 52 return str(int(dt.timestamp() * 1_000_000)) 53 if format == "%ms": 54 # timstamp() returns a float representing the number of seconds since the unix epoch 55 return str(int(dt.timestamp() * 1000)) 56 if "%_ms" in format: 57 _format = format.replace("%_ms", "%f") 58 milliseconds = int(dt.microsecond / 1000) 59 formatted_dt = dt.strftime(_format).replace(dt.strftime("%f"), "%03d" % milliseconds) 60 return formatted_dt 61 else: 62 return dt.strftime(format) 63 64 def _is_naive(self, dt: datetime.datetime) -> bool: 65 return dt.tzinfo is None or dt.tzinfo.utcoffset(dt) is None
class
DatetimeParser:
10class DatetimeParser: 11 """ 12 Parses and formats datetime objects according to a specified format. 13 14 This class mainly acts as a wrapper to properly handling timestamp formatting through the "%s" directive. 15 16 %s is part of the list of format codes required by the 1989 C standard, but it is unreliable because it always return a datetime in the system's timezone. 17 Instead of using the directive directly, we can use datetime.fromtimestamp and dt.timestamp() 18 """ 19 20 _UNIX_EPOCH = datetime.datetime(1970, 1, 1, tzinfo=datetime.timezone.utc) 21 22 def parse(self, date: Union[str, int], format: str) -> datetime.datetime: 23 # "%s" is a valid (but unreliable) directive for formatting, but not for parsing 24 # It is defined as 25 # The number of seconds since the Epoch, 1970-01-01 00:00:00+0000 (UTC). https://man7.org/linux/man-pages/man3/strptime.3.html 26 # 27 # The recommended way to parse a date from its timestamp representation is to use datetime.fromtimestamp 28 # See https://stackoverflow.com/a/4974930 29 if format == "%s": 30 return datetime.datetime.fromtimestamp(int(date), tz=datetime.timezone.utc) 31 elif format == "%s_as_float": 32 return datetime.datetime.fromtimestamp(float(date), tz=datetime.timezone.utc) 33 elif format == "%epoch_microseconds": 34 return self._UNIX_EPOCH + datetime.timedelta(microseconds=int(date)) 35 elif format == "%ms": 36 return self._UNIX_EPOCH + datetime.timedelta(milliseconds=int(date)) 37 elif "%_ms" in format: 38 format = format.replace("%_ms", "%f") 39 parsed_datetime = datetime.datetime.strptime(str(date), format) 40 if self._is_naive(parsed_datetime): 41 return parsed_datetime.replace(tzinfo=datetime.timezone.utc) 42 return parsed_datetime 43 44 def format(self, dt: datetime.datetime, format: str) -> str: 45 # strftime("%s") is unreliable because it ignores the time zone information and assumes the time zone of the system it's running on 46 # It's safer to use the timestamp() method than the %s directive 47 # See https://stackoverflow.com/a/4974930 48 if format == "%s": 49 return str(int(dt.timestamp())) 50 if format == "%s_as_float": 51 return str(float(dt.timestamp())) 52 if format == "%epoch_microseconds": 53 return str(int(dt.timestamp() * 1_000_000)) 54 if format == "%ms": 55 # timstamp() returns a float representing the number of seconds since the unix epoch 56 return str(int(dt.timestamp() * 1000)) 57 if "%_ms" in format: 58 _format = format.replace("%_ms", "%f") 59 milliseconds = int(dt.microsecond / 1000) 60 formatted_dt = dt.strftime(_format).replace(dt.strftime("%f"), "%03d" % milliseconds) 61 return formatted_dt 62 else: 63 return dt.strftime(format) 64 65 def _is_naive(self, dt: datetime.datetime) -> bool: 66 return dt.tzinfo is None or dt.tzinfo.utcoffset(dt) is None
Parses and formats datetime objects according to a specified format.
This class mainly acts as a wrapper to properly handling timestamp formatting through the "%s" directive.
%s is part of the list of format codes required by the 1989 C standard, but it is unreliable because it always return a datetime in the system's timezone. Instead of using the directive directly, we can use datetime.fromtimestamp and dt.timestamp()
def
parse(self, date: Union[str, int], format: str) -> datetime.datetime:
22 def parse(self, date: Union[str, int], format: str) -> datetime.datetime: 23 # "%s" is a valid (but unreliable) directive for formatting, but not for parsing 24 # It is defined as 25 # The number of seconds since the Epoch, 1970-01-01 00:00:00+0000 (UTC). https://man7.org/linux/man-pages/man3/strptime.3.html 26 # 27 # The recommended way to parse a date from its timestamp representation is to use datetime.fromtimestamp 28 # See https://stackoverflow.com/a/4974930 29 if format == "%s": 30 return datetime.datetime.fromtimestamp(int(date), tz=datetime.timezone.utc) 31 elif format == "%s_as_float": 32 return datetime.datetime.fromtimestamp(float(date), tz=datetime.timezone.utc) 33 elif format == "%epoch_microseconds": 34 return self._UNIX_EPOCH + datetime.timedelta(microseconds=int(date)) 35 elif format == "%ms": 36 return self._UNIX_EPOCH + datetime.timedelta(milliseconds=int(date)) 37 elif "%_ms" in format: 38 format = format.replace("%_ms", "%f") 39 parsed_datetime = datetime.datetime.strptime(str(date), format) 40 if self._is_naive(parsed_datetime): 41 return parsed_datetime.replace(tzinfo=datetime.timezone.utc) 42 return parsed_datetime
def
format(self, dt: datetime.datetime, format: str) -> str:
44 def format(self, dt: datetime.datetime, format: str) -> str: 45 # strftime("%s") is unreliable because it ignores the time zone information and assumes the time zone of the system it's running on 46 # It's safer to use the timestamp() method than the %s directive 47 # See https://stackoverflow.com/a/4974930 48 if format == "%s": 49 return str(int(dt.timestamp())) 50 if format == "%s_as_float": 51 return str(float(dt.timestamp())) 52 if format == "%epoch_microseconds": 53 return str(int(dt.timestamp() * 1_000_000)) 54 if format == "%ms": 55 # timstamp() returns a float representing the number of seconds since the unix epoch 56 return str(int(dt.timestamp() * 1000)) 57 if "%_ms" in format: 58 _format = format.replace("%_ms", "%f") 59 milliseconds = int(dt.microsecond / 1000) 60 formatted_dt = dt.strftime(_format).replace(dt.strftime("%f"), "%03d" % milliseconds) 61 return formatted_dt 62 else: 63 return dt.strftime(format)