from__future__importannotationsfromcollectionsimportabcfromdatetimeimportdatefromfunctoolsimportpartialfromitertoolsimportislicefromtypingimport(TYPE_CHECKING,Callable,TypedDict,Union,cast,overload,)importwarningsimportnumpyasnpfrompandas._libsimport(lib,tslib,)frompandas._libs.tslibsimport(OutOfBoundsDatetime,Timedelta,Timestamp,astype_overflowsafe,get_unit_from_dtype,iNaT,is_supported_unit,nat_strings,parsing,timezonesaslibtimezones,)frompandas._libs.tslibs.conversionimportprecision_from_unitfrompandas._libs.tslibs.parsingimport(DateParseError,guess_datetime_format,)frompandas._libs.tslibs.strptimeimportarray_strptimefrompandas._typingimport(AnyArrayLike,ArrayLike,DateTimeErrorChoices,npt,)frompandas.util._exceptionsimportfind_stack_levelfrompandas.core.dtypes.commonimport(ensure_object,is_float,is_integer,is_integer_dtype,is_list_like,is_numeric_dtype,)frompandas.core.dtypes.dtypesimport(ArrowDtype,DatetimeTZDtype,)frompandas.core.dtypes.genericimport(ABCDataFrame,ABCSeries,)frompandas.core.dtypes.missingimportnotnafrompandas.arraysimport(DatetimeArray,IntegerArray,NumpyExtensionArray,)frompandas.coreimportalgorithmsfrompandas.core.algorithmsimportuniquefrompandas.core.arraysimportArrowExtensionArrayfrompandas.core.arrays.baseimportExtensionArrayfrompandas.core.arrays.datetimesimport(maybe_convert_dtype,objects_to_datetime64ns,tz_to_dtype,)frompandas.core.constructionimportextract_arrayfrompandas.core.indexes.baseimportIndexfrompandas.core.indexes.datetimesimportDatetimeIndexifTYPE_CHECKING:fromcollections.abcimportHashablefrompandas._libs.tslibs.nattypeimportNaTTypefrompandas._libs.tslibs.timedeltasimportUnitChoicesfrompandasimport(DataFrame,Series,)# ---------------------------------------------------------------------# types used in annotationsArrayConvertible=Union[list,tuple,AnyArrayLike]Scalar=Union[float,str]DatetimeScalar=Union[Scalar,date,np.datetime64]DatetimeScalarOrArrayConvertible=Union[DatetimeScalar,ArrayConvertible]DatetimeDictArg=Union[list[Scalar],tuple[Scalar,...],AnyArrayLike]classYearMonthDayDict(TypedDict,total=True):year:DatetimeDictArgmonth:DatetimeDictArgday:DatetimeDictArgclassFulldatetimeDict(YearMonthDayDict,total=False):hour:DatetimeDictArghours:DatetimeDictArgminute:DatetimeDictArgminutes:DatetimeDictArgsecond:DatetimeDictArgseconds:DatetimeDictArgms:DatetimeDictArgus:DatetimeDictArgns:DatetimeDictArgDictConvertible=Union[FulldatetimeDict,"DataFrame"]start_caching_at=50# ---------------------------------------------------------------------def_guess_datetime_format_for_array(arr,dayfirst:bool|None=False)->str|None:# Try to guess the format based on the first non-NaN element, return None if can'tif(first_non_null:=tslib.first_non_null(arr))!=-1:iftype(first_non_nan_element:=arr[first_non_null])isstr:# GH#32264 np.str_ objectguessed_format=guess_datetime_format(first_non_nan_element,dayfirst=dayfirst)ifguessed_formatisnotNone:returnguessed_format# If there are multiple non-null elements, warn about# how parsing might not be consistentiftslib.first_non_null(arr[first_non_null+1:])!=-1:warnings.warn("Could not infer format, so each element will be parsed ""individually, falling back to `dateutil`. To ensure parsing is ""consistent and as-expected, please specify a format.",UserWarning,stacklevel=find_stack_level(),)returnNonedefshould_cache(arg:ArrayConvertible,unique_share:float=0.7,check_count:int|None=None)->bool:""" Decides whether to do caching. If the percent of unique elements among `check_count` elements less than `unique_share * 100` then we can do caching. Parameters ---------- arg: listlike, tuple, 1-d array, Series unique_share: float, default=0.7, optional 0 < unique_share < 1 check_count: int, optional 0 <= check_count <= len(arg) Returns ------- do_caching: bool Notes ----- By default for a sequence of less than 50 items in size, we don't do caching; for the number of elements less than 5000, we take ten percent of all elements to check for a uniqueness share; if the sequence size is more than 5000, then we check only the first 500 elements. All constants were chosen empirically by. """do_caching=True# default realizationifcheck_countisNone:# in this case, the gain from caching is negligibleiflen(arg)<=start_caching_at:returnFalseiflen(arg)<=5000:check_count=len(arg)//10else:check_count=500else:assert(0<=check_count<=len(arg)),"check_count must be in next bounds: [0; len(arg)]"ifcheck_count==0:returnFalseassert0<unique_share<1,"unique_share must be in next bounds: (0; 1)"try:# We can't cache if the items are not hashable.unique_elements=set(islice(arg,check_count))exceptTypeError:returnFalseiflen(unique_elements)>check_count*unique_share:do_caching=Falsereturndo_cachingdef_maybe_cache(arg:ArrayConvertible,format:str|None,cache:bool,convert_listlike:Callable,)->Series:""" Create a cache of unique dates from an array of dates Parameters ---------- arg : listlike, tuple, 1-d array, Series format : string Strftime format to parse time cache : bool True attempts to create a cache of converted values convert_listlike : function Conversion function to apply on dates Returns ------- cache_array : Series Cache of converted, unique dates. Can be empty """frompandasimportSeriescache_array=Series(dtype=object)ifcache:# Perform a quicker unique checkifnotshould_cache(arg):returncache_arrayifnotisinstance(arg,(np.ndarray,ExtensionArray,Index,ABCSeries)):arg=np.array(arg)unique_dates=unique(arg)iflen(unique_dates)<len(arg):cache_dates=convert_listlike(unique_dates,format)# GH#45319try:cache_array=Series(cache_dates,index=unique_dates,copy=False)exceptOutOfBoundsDatetime:returncache_array# GH#39882 and GH#35888 in case of None and NaT we get duplicatesifnotcache_array.index.is_unique:cache_array=cache_array[~cache_array.index.duplicated()]returncache_arraydef_box_as_indexlike(dt_array:ArrayLike,utc:bool=False,name:Hashable|None=None)->Index:""" Properly boxes the ndarray of datetimes to DatetimeIndex if it is possible or to generic Index instead Parameters ---------- dt_array: 1-d array Array of datetimes to be wrapped in an Index. utc : bool Whether to convert/localize timestamps to UTC. name : string, default None Name for a resulting index Returns ------- result : datetime of converted dates - DatetimeIndex if convertible to sole datetime64 type - general Index otherwise """iflib.is_np_dtype(dt_array.dtype,"M"):tz="utc"ifutcelseNonereturnDatetimeIndex(dt_array,tz=tz,name=name)returnIndex(dt_array,name=name,dtype=dt_array.dtype)def_convert_and_box_cache(arg:DatetimeScalarOrArrayConvertible,cache_array:Series,name:Hashable|None=None,)->Index:""" Convert array of dates with a cache and wrap the result in an Index. Parameters ---------- arg : integer, float, string, datetime, list, tuple, 1-d array, Series cache_array : Series Cache of converted, unique dates name : string, default None Name for a DatetimeIndex Returns ------- result : Index-like of converted dates """frompandasimportSeriesresult=Series(arg,dtype=cache_array.index.dtype).map(cache_array)return_box_as_indexlike(result._values,utc=False,name=name)def_return_parsed_timezone_results(result:np.ndarray,timezones,utc:bool,name:str)->Index:""" Return results from array_strptime if a %z or %Z directive was passed. Parameters ---------- result : ndarray[int64] int64 date representations of the dates timezones : ndarray pytz timezone objects utc : bool Whether to convert/localize timestamps to UTC. name : string, default None Name for a DatetimeIndex Returns ------- tz_result : Index-like of parsed dates with timezone """tz_results=np.empty(len(result),dtype=object)non_na_timezones=set()forzoneinunique(timezones):mask=timezones==zonedta=DatetimeArray(result[mask]).tz_localize(zone)ifutc:ifdta.tzinfoisNone:dta=dta.tz_localize("utc")else:dta=dta.tz_convert("utc")else:ifnotdta.isna().all():non_na_timezones.add(zone)tz_results[mask]=dtaiflen(non_na_timezones)>1:warnings.warn("In a future version of pandas, parsing datetimes with mixed time ""zones will raise an error unless `utc=True`. Please specify `utc=True` ""to opt in to the new behaviour and silence this warning. ""To create a `Series` with mixed offsets and `object` dtype, ""please use `apply` and `datetime.datetime.strptime`",FutureWarning,stacklevel=find_stack_level(),)returnIndex(tz_results,name=name)def_convert_listlike_datetimes(arg,format:str|None,name:Hashable|None=None,utc:bool=False,unit:str|None=None,errors:DateTimeErrorChoices="raise",dayfirst:bool|None=None,yearfirst:bool|None=None,exact:bool=True,):""" Helper function for to_datetime. Performs the conversions of 1D listlike of dates Parameters ---------- arg : list, tuple, ndarray, Series, Index date to be parsed name : object None or string for the Index name utc : bool Whether to convert/localize timestamps to UTC. unit : str None or string of the frequency of the passed data errors : str error handing behaviors from to_datetime, 'raise', 'coerce', 'ignore' dayfirst : bool dayfirst parsing behavior from to_datetime yearfirst : bool yearfirst parsing behavior from to_datetime exact : bool, default True exact format matching behavior from to_datetime Returns ------- Index-like of parsed dates """ifisinstance(arg,(list,tuple)):arg=np.array(arg,dtype="O")elifisinstance(arg,NumpyExtensionArray):arg=np.array(arg)arg_dtype=getattr(arg,"dtype",None)# these are shortcutabletz="utc"ifutcelseNoneifisinstance(arg_dtype,DatetimeTZDtype):ifnotisinstance(arg,(DatetimeArray,DatetimeIndex)):returnDatetimeIndex(arg,tz=tz,name=name)ifutc:arg=arg.tz_convert(None).tz_localize("utc")returnargelifisinstance(arg_dtype,ArrowDtype)andarg_dtype.typeisTimestamp:# TODO: Combine with above if DTI/DTA supports Arrow timestampsifutc:# pyarrow uses UTC, not lowercase utcifisinstance(arg,Index):arg_array=cast(ArrowExtensionArray,arg.array)ifarg_dtype.pyarrow_dtype.tzisnotNone:arg_array=arg_array._dt_tz_convert("UTC")else:arg_array=arg_array._dt_tz_localize("UTC")arg=Index(arg_array)else:# ArrowExtensionArrayifarg_dtype.pyarrow_dtype.tzisnotNone:arg=arg._dt_tz_convert("UTC")else:arg=arg._dt_tz_localize("UTC")returnargeliflib.is_np_dtype(arg_dtype,"M"):ifnotis_supported_unit(get_unit_from_dtype(arg_dtype)):# We go to closest supported reso, i.e. "s"arg=astype_overflowsafe(# TODO: looks like we incorrectly raise with errors=="ignore"np.asarray(arg),np.dtype("M8[s]"),is_coerce=errors=="coerce",)ifnotisinstance(arg,(DatetimeArray,DatetimeIndex)):returnDatetimeIndex(arg,tz=tz,name=name)elifutc:# DatetimeArray, DatetimeIndexreturnarg.tz_localize("utc")returnargelifunitisnotNone:ifformatisnotNone:raiseValueError("cannot specify both format and unit")return_to_datetime_with_unit(arg,unit,name,utc,errors)elifgetattr(arg,"ndim",1)>1:raiseTypeError("arg must be a string, datetime, list, tuple, 1-d array, or Series")# warn if passing timedelta64, raise for PeriodDtype# NB: this must come after unit transformationtry:arg,_=maybe_convert_dtype(arg,copy=False,tz=libtimezones.maybe_get_tz(tz))exceptTypeError:iferrors=="coerce":npvalues=np.array(["NaT"],dtype="datetime64[ns]").repeat(len(arg))returnDatetimeIndex(npvalues,name=name)eliferrors=="ignore":idx=Index(arg,name=name)returnidxraisearg=ensure_object(arg)ifformatisNone:format=_guess_datetime_format_for_array(arg,dayfirst=dayfirst)# `format` could be inferred, or user didn't ask for mixed-format parsing.ifformatisnotNoneandformat!="mixed":return_array_strptime_with_fallback(arg,name,utc,format,exact,errors)result,tz_parsed=objects_to_datetime64ns(arg,dayfirst=dayfirst,yearfirst=yearfirst,utc=utc,errors=errors,allow_object=True,)iftz_parsedisnotNone:# We can take a shortcut since the datetime64 numpy array# is in UTCdta=DatetimeArray(result,dtype=tz_to_dtype(tz_parsed))returnDatetimeIndex._simple_new(dta,name=name)return_box_as_indexlike(result,utc=utc,name=name)def_array_strptime_with_fallback(arg,name,utc:bool,fmt:str,exact:bool,errors:str,)->Index:""" Call array_strptime, with fallback behavior depending on 'errors'. """result,timezones=array_strptime(arg,fmt,exact=exact,errors=errors,utc=utc)ifany(tzisnotNonefortzintimezones):return_return_parsed_timezone_results(result,timezones,utc,name)return_box_as_indexlike(result,utc=utc,name=name)def_to_datetime_with_unit(arg,unit,name,utc:bool,errors:str)->Index:""" to_datetime specalized to the case where a 'unit' is passed. """arg=extract_array(arg,extract_numpy=True)# GH#30050 pass an ndarray to tslib.array_with_unit_to_datetime# because it expects an ndarray argumentifisinstance(arg,IntegerArray):arr=arg.astype(f"datetime64[{unit}]")tz_parsed=Noneelse:arg=np.asarray(arg)ifarg.dtype.kindin"iu":# Note we can't do "f" here because that could induce unwanted# rounding GH#14156, GH#20445arr=arg.astype(f"datetime64[{unit}]",copy=False)try:arr=astype_overflowsafe(arr,np.dtype("M8[ns]"),copy=False)exceptOutOfBoundsDatetime:iferrors=="raise":raisearg=arg.astype(object)return_to_datetime_with_unit(arg,unit,name,utc,errors)tz_parsed=Noneelifarg.dtype.kind=="f":mult,_=precision_from_unit(unit)mask=np.isnan(arg)|(arg==iNaT)fvalues=(arg*mult).astype("f8",copy=False)fvalues[mask]=0if(fvalues<Timestamp.min._value).any()or(fvalues>Timestamp.max._value).any():iferrors!="raise":arg=arg.astype(object)return_to_datetime_with_unit(arg,unit,name,utc,errors)raiseOutOfBoundsDatetime(f"cannot convert input with unit '{unit}'")arr=fvalues.astype("M8[ns]",copy=False)arr[mask]=np.datetime64("NaT","ns")tz_parsed=Noneelse:arg=arg.astype(object,copy=False)arr,tz_parsed=tslib.array_with_unit_to_datetime(arg,unit,errors=errors)iferrors=="ignore":# Index constructor _may_ infer to DatetimeIndexresult=Index._with_infer(arr,name=name)else:result=DatetimeIndex(arr,name=name)ifnotisinstance(result,DatetimeIndex):returnresult# GH#23758: We may still need to localize the result with tz# GH#25546: Apply tz_parsed first (from arg), then tz (from caller)# result will be naive but in UTCresult=result.tz_localize("UTC").tz_convert(tz_parsed)ifutc:ifresult.tzisNone:result=result.tz_localize("utc")else:result=result.tz_convert("utc")returnresultdef_adjust_to_origin(arg,origin,unit):""" Helper function for to_datetime. Adjust input argument to the specified origin Parameters ---------- arg : list, tuple, ndarray, Series, Index date to be adjusted origin : 'julian' or Timestamp origin offset for the arg unit : str passed unit from to_datetime, must be 'D' Returns ------- ndarray or scalar of adjusted date(s) """iforigin=="julian":original=argj0=Timestamp(0).to_julian_date()ifunit!="D":raiseValueError("unit must be 'D' for origin='julian'")try:arg=arg-j0exceptTypeErroraserr:raiseValueError("incompatible 'arg' type for given 'origin'='julian'")fromerr# preemptively check this for a nice rangej_max=Timestamp.max.to_julian_date()-j0j_min=Timestamp.min.to_julian_date()-j0ifnp.any(arg>j_max)ornp.any(arg<j_min):raiseOutOfBoundsDatetime(f"{original} is Out of Bounds for origin='julian'")else:# arg must be numericifnot((is_integer(arg)oris_float(arg))oris_numeric_dtype(np.asarray(arg))):raiseValueError(f"'{arg}' is not compatible with origin='{origin}'; ""it must be numeric with a unit specified")# we are going to offset back to unix / epoch timetry:offset=Timestamp(origin,unit=unit)exceptOutOfBoundsDatetimeaserr:raiseOutOfBoundsDatetime(f"origin {origin} is Out of Bounds")fromerrexceptValueErroraserr:raiseValueError(f"origin {origin} cannot be converted to a Timestamp")fromerrifoffset.tzisnotNone:raiseValueError(f"origin offset {offset} must be tz-naive")td_offset=offset-Timestamp(0)# convert the offset to the unit of the arg# this should be lossless in terms of precisionioffset=td_offset//Timedelta(1,unit=unit)# scalars & ndarray-like can handle the additionifis_list_like(arg)andnotisinstance(arg,(ABCSeries,Index,np.ndarray)):arg=np.asarray(arg)arg=arg+ioffsetreturnarg@overloaddefto_datetime(arg:DatetimeScalar,errors:DateTimeErrorChoices=...,dayfirst:bool=...,yearfirst:bool=...,utc:bool=...,format:str|None=...,exact:bool=...,unit:str|None=...,infer_datetime_format:bool=...,origin=...,cache:bool=...,)->Timestamp:...@overloaddefto_datetime(arg:Series|DictConvertible,errors:DateTimeErrorChoices=...,dayfirst:bool=...,yearfirst:bool=...,utc:bool=...,format:str|None=...,exact:bool=...,unit:str|None=...,infer_datetime_format:bool=...,origin=...,cache:bool=...,)->Series:...@overloaddefto_datetime(arg:list|tuple|Index|ArrayLike,errors:DateTimeErrorChoices=...,dayfirst:bool=...,yearfirst:bool=...,utc:bool=...,format:str|None=...,exact:bool=...,unit:str|None=...,infer_datetime_format:bool=...,origin=...,cache:bool=...,)->DatetimeIndex:...
[docs]defto_datetime(arg:DatetimeScalarOrArrayConvertible|DictConvertible,errors:DateTimeErrorChoices="raise",dayfirst:bool=False,yearfirst:bool=False,utc:bool=False,format:str|None=None,exact:bool|lib.NoDefault=lib.no_default,unit:str|None=None,infer_datetime_format:lib.NoDefault|bool=lib.no_default,origin:str="unix",cache:bool=True,)->DatetimeIndex|Series|DatetimeScalar|NaTType|None:""" Convert argument to datetime. This function converts a scalar, array-like, :class:`Series` or :class:`DataFrame`/dict-like to a pandas datetime object. Parameters ---------- arg : int, float, str, datetime, list, tuple, 1-d array, Series, DataFrame/dict-like The object to convert to a datetime. If a :class:`DataFrame` is provided, the method expects minimally the following columns: :const:`"year"`, :const:`"month"`, :const:`"day"`. The column "year" must be specified in 4-digit format. errors : {'ignore', 'raise', 'coerce'}, default 'raise' - If :const:`'raise'`, then invalid parsing will raise an exception. - If :const:`'coerce'`, then invalid parsing will be set as :const:`NaT`. - If :const:`'ignore'`, then invalid parsing will return the input. dayfirst : bool, default False Specify a date parse order if `arg` is str or is list-like. If :const:`True`, parses dates with the day first, e.g. :const:`"10/11/12"` is parsed as :const:`2012-11-10`. .. warning:: ``dayfirst=True`` is not strict, but will prefer to parse with day first. yearfirst : bool, default False Specify a date parse order if `arg` is str or is list-like. - If :const:`True` parses dates with the year first, e.g. :const:`"10/11/12"` is parsed as :const:`2010-11-12`. - If both `dayfirst` and `yearfirst` are :const:`True`, `yearfirst` is preceded (same as :mod:`dateutil`). .. warning:: ``yearfirst=True`` is not strict, but will prefer to parse with year first. utc : bool, default False Control timezone-related parsing, localization and conversion. - If :const:`True`, the function *always* returns a timezone-aware UTC-localized :class:`Timestamp`, :class:`Series` or :class:`DatetimeIndex`. To do this, timezone-naive inputs are *localized* as UTC, while timezone-aware inputs are *converted* to UTC. - If :const:`False` (default), inputs will not be coerced to UTC. Timezone-naive inputs will remain naive, while timezone-aware ones will keep their time offsets. Limitations exist for mixed offsets (typically, daylight savings), see :ref:`Examples <to_datetime_tz_examples>` section for details. .. warning:: In a future version of pandas, parsing datetimes with mixed time zones will raise an error unless `utc=True`. Please specify `utc=True` to opt in to the new behaviour and silence this warning. To create a `Series` with mixed offsets and `object` dtype, please use `apply` and `datetime.datetime.strptime`. See also: pandas general documentation about `timezone conversion and localization <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html #time-zone-handling>`_. format : str, default None The strftime to parse time, e.g. :const:`"%d/%m/%Y"`. See `strftime documentation <https://docs.python.org/3/library/datetime.html #strftime-and-strptime-behavior>`_ for more information on choices, though note that :const:`"%f"` will parse all the way up to nanoseconds. You can also pass: - "ISO8601", to parse any `ISO8601 <https://en.wikipedia.org/wiki/ISO_8601>`_ time string (not necessarily in exactly the same format); - "mixed", to infer the format for each element individually. This is risky, and you should probably use it along with `dayfirst`. .. note:: If a :class:`DataFrame` is passed, then `format` has no effect. exact : bool, default True Control how `format` is used: - If :const:`True`, require an exact `format` match. - If :const:`False`, allow the `format` to match anywhere in the target string. Cannot be used alongside ``format='ISO8601'`` or ``format='mixed'``. unit : str, default 'ns' The unit of the arg (D,s,ms,us,ns) denote the unit, which is an integer or float number. This will be based off the origin. Example, with ``unit='ms'`` and ``origin='unix'``, this would calculate the number of milliseconds to the unix epoch start. infer_datetime_format : bool, default False If :const:`True` and no `format` is given, attempt to infer the format of the datetime strings based on the first non-NaN element, and if it can be inferred, switch to a faster method of parsing them. In some cases this can increase the parsing speed by ~5-10x. .. deprecated:: 2.0.0 A strict version of this argument is now the default, passing it has no effect. origin : scalar, default 'unix' Define the reference date. The numeric values would be parsed as number of units (defined by `unit`) since this reference date. - If :const:`'unix'` (or POSIX) time; origin is set to 1970-01-01. - If :const:`'julian'`, unit must be :const:`'D'`, and origin is set to beginning of Julian Calendar. Julian day number :const:`0` is assigned to the day starting at noon on January 1, 4713 BC. - If Timestamp convertible (Timestamp, dt.datetime, np.datetimt64 or date string), origin is set to Timestamp identified by origin. - If a float or integer, origin is the millisecond difference relative to 1970-01-01. cache : bool, default True If :const:`True`, use a cache of unique, converted dates to apply the datetime conversion. May produce significant speed-up when parsing duplicate date strings, especially ones with timezone offsets. The cache is only used when there are at least 50 values. The presence of out-of-bounds values will render the cache unusable and may slow down parsing. Returns ------- datetime If parsing succeeded. Return type depends on input (types in parenthesis correspond to fallback in case of unsuccessful timezone or out-of-range timestamp parsing): - scalar: :class:`Timestamp` (or :class:`datetime.datetime`) - array-like: :class:`DatetimeIndex` (or :class:`Series` with :class:`object` dtype containing :class:`datetime.datetime`) - Series: :class:`Series` of :class:`datetime64` dtype (or :class:`Series` of :class:`object` dtype containing :class:`datetime.datetime`) - DataFrame: :class:`Series` of :class:`datetime64` dtype (or :class:`Series` of :class:`object` dtype containing :class:`datetime.datetime`) Raises ------ ParserError When parsing a date from string fails. ValueError When another datetime conversion error happens. For example when one of 'year', 'month', day' columns is missing in a :class:`DataFrame`, or when a Timezone-aware :class:`datetime.datetime` is found in an array-like of mixed time offsets, and ``utc=False``. See Also -------- DataFrame.astype : Cast argument to a specified dtype. to_timedelta : Convert argument to timedelta. convert_dtypes : Convert dtypes. Notes ----- Many input types are supported, and lead to different output types: - **scalars** can be int, float, str, datetime object (from stdlib :mod:`datetime` module or :mod:`numpy`). They are converted to :class:`Timestamp` when possible, otherwise they are converted to :class:`datetime.datetime`. None/NaN/null scalars are converted to :const:`NaT`. - **array-like** can contain int, float, str, datetime objects. They are converted to :class:`DatetimeIndex` when possible, otherwise they are converted to :class:`Index` with :class:`object` dtype, containing :class:`datetime.datetime`. None/NaN/null entries are converted to :const:`NaT` in both cases. - **Series** are converted to :class:`Series` with :class:`datetime64` dtype when possible, otherwise they are converted to :class:`Series` with :class:`object` dtype, containing :class:`datetime.datetime`. None/NaN/null entries are converted to :const:`NaT` in both cases. - **DataFrame/dict-like** are converted to :class:`Series` with :class:`datetime64` dtype. For each row a datetime is created from assembling the various dataframe columns. Column keys can be common abbreviations like ['year', 'month', 'day', 'minute', 'second', 'ms', 'us', 'ns']) or plurals of the same. The following causes are responsible for :class:`datetime.datetime` objects being returned (possibly inside an :class:`Index` or a :class:`Series` with :class:`object` dtype) instead of a proper pandas designated type (:class:`Timestamp`, :class:`DatetimeIndex` or :class:`Series` with :class:`datetime64` dtype): - when any input element is before :const:`Timestamp.min` or after :const:`Timestamp.max`, see `timestamp limitations <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html #timeseries-timestamp-limits>`_. - when ``utc=False`` (default) and the input is an array-like or :class:`Series` containing mixed naive/aware datetime, or aware with mixed time offsets. Note that this happens in the (quite frequent) situation when the timezone has a daylight savings policy. In that case you may wish to use ``utc=True``. Examples -------- **Handling various input formats** Assembling a datetime from multiple columns of a :class:`DataFrame`. The keys can be common abbreviations like ['year', 'month', 'day', 'minute', 'second', 'ms', 'us', 'ns']) or plurals of the same >>> df = pd.DataFrame({'year': [2015, 2016], ... 'month': [2, 3], ... 'day': [4, 5]}) >>> pd.to_datetime(df) 0 2015-02-04 1 2016-03-05 dtype: datetime64[ns] Using a unix epoch time >>> pd.to_datetime(1490195805, unit='s') Timestamp('2017-03-22 15:16:45') >>> pd.to_datetime(1490195805433502912, unit='ns') Timestamp('2017-03-22 15:16:45.433502912') .. warning:: For float arg, precision rounding might happen. To prevent unexpected behavior use a fixed-width exact type. Using a non-unix epoch origin >>> pd.to_datetime([1, 2, 3], unit='D', ... origin=pd.Timestamp('1960-01-01')) DatetimeIndex(['1960-01-02', '1960-01-03', '1960-01-04'], dtype='datetime64[ns]', freq=None) **Differences with strptime behavior** :const:`"%f"` will parse all the way up to nanoseconds. >>> pd.to_datetime('2018-10-26 12:00:00.0000000011', ... format='%Y-%m-%d %H:%M:%S.%f') Timestamp('2018-10-26 12:00:00.000000001') **Non-convertible date/times** If a date does not meet the `timestamp limitations <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html #timeseries-timestamp-limits>`_, passing ``errors='ignore'`` will return the original input instead of raising any exception. Passing ``errors='coerce'`` will force an out-of-bounds date to :const:`NaT`, in addition to forcing non-dates (or non-parseable dates) to :const:`NaT`. >>> pd.to_datetime('13000101', format='%Y%m%d', errors='ignore') '13000101' >>> pd.to_datetime('13000101', format='%Y%m%d', errors='coerce') NaT .. _to_datetime_tz_examples: **Timezones and time offsets** The default behaviour (``utc=False``) is as follows: - Timezone-naive inputs are converted to timezone-naive :class:`DatetimeIndex`: >>> pd.to_datetime(['2018-10-26 12:00:00', '2018-10-26 13:00:15']) DatetimeIndex(['2018-10-26 12:00:00', '2018-10-26 13:00:15'], dtype='datetime64[ns]', freq=None) - Timezone-aware inputs *with constant time offset* are converted to timezone-aware :class:`DatetimeIndex`: >>> pd.to_datetime(['2018-10-26 12:00 -0500', '2018-10-26 13:00 -0500']) DatetimeIndex(['2018-10-26 12:00:00-05:00', '2018-10-26 13:00:00-05:00'], dtype='datetime64[ns, UTC-05:00]', freq=None) - However, timezone-aware inputs *with mixed time offsets* (for example issued from a timezone with daylight savings, such as Europe/Paris) are **not successfully converted** to a :class:`DatetimeIndex`. Parsing datetimes with mixed time zones will show a warning unless `utc=True`. If you specify `utc=False` the warning below will be shown and a simple :class:`Index` containing :class:`datetime.datetime` objects will be returned: >>> pd.to_datetime(['2020-10-25 02:00 +0200', ... '2020-10-25 04:00 +0100']) # doctest: +SKIP FutureWarning: In a future version of pandas, parsing datetimes with mixed time zones will raise an error unless `utc=True`. Please specify `utc=True` to opt in to the new behaviour and silence this warning. To create a `Series` with mixed offsets and `object` dtype, please use `apply` and `datetime.datetime.strptime`. Index([2020-10-25 02:00:00+02:00, 2020-10-25 04:00:00+01:00], dtype='object') - A mix of timezone-aware and timezone-naive inputs is also converted to a simple :class:`Index` containing :class:`datetime.datetime` objects: >>> from datetime import datetime >>> pd.to_datetime(["2020-01-01 01:00:00-01:00", ... datetime(2020, 1, 1, 3, 0)]) # doctest: +SKIP FutureWarning: In a future version of pandas, parsing datetimes with mixed time zones will raise an error unless `utc=True`. Please specify `utc=True` to opt in to the new behaviour and silence this warning. To create a `Series` with mixed offsets and `object` dtype, please use `apply` and `datetime.datetime.strptime`. Index([2020-01-01 01:00:00-01:00, 2020-01-01 03:00:00], dtype='object') | Setting ``utc=True`` solves most of the above issues: - Timezone-naive inputs are *localized* as UTC >>> pd.to_datetime(['2018-10-26 12:00', '2018-10-26 13:00'], utc=True) DatetimeIndex(['2018-10-26 12:00:00+00:00', '2018-10-26 13:00:00+00:00'], dtype='datetime64[ns, UTC]', freq=None) - Timezone-aware inputs are *converted* to UTC (the output represents the exact same datetime, but viewed from the UTC time offset `+00:00`). >>> pd.to_datetime(['2018-10-26 12:00 -0530', '2018-10-26 12:00 -0500'], ... utc=True) DatetimeIndex(['2018-10-26 17:30:00+00:00', '2018-10-26 17:00:00+00:00'], dtype='datetime64[ns, UTC]', freq=None) - Inputs can contain both string or datetime, the above rules still apply >>> pd.to_datetime(['2018-10-26 12:00', datetime(2020, 1, 1, 18)], utc=True) DatetimeIndex(['2018-10-26 12:00:00+00:00', '2020-01-01 18:00:00+00:00'], dtype='datetime64[ns, UTC]', freq=None) """ifexactisnotlib.no_defaultandformatin{"mixed","ISO8601"}:raiseValueError("Cannot use 'exact' when 'format' is 'mixed' or 'ISO8601'")ifinfer_datetime_formatisnotlib.no_default:warnings.warn("The argument 'infer_datetime_format' is deprecated and will ""be removed in a future version. ""A strict version of it is now the default, see ""https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. ""You can safely remove this argument.",stacklevel=find_stack_level(),)ifargisNone:returnNoneiforigin!="unix":arg=_adjust_to_origin(arg,origin,unit)convert_listlike=partial(_convert_listlike_datetimes,utc=utc,unit=unit,dayfirst=dayfirst,yearfirst=yearfirst,errors=errors,exact=exact,)# pylint: disable-next=used-before-assignmentresult:Timestamp|NaTType|Series|Indexifisinstance(arg,Timestamp):result=argifutc:ifarg.tzisnotNone:result=arg.tz_convert("utc")else:result=arg.tz_localize("utc")elifisinstance(arg,ABCSeries):cache_array=_maybe_cache(arg,format,cache,convert_listlike)ifnotcache_array.empty:result=arg.map(cache_array)else:values=convert_listlike(arg._values,format)result=arg._constructor(values,index=arg.index,name=arg.name)elifisinstance(arg,(ABCDataFrame,abc.MutableMapping)):result=_assemble_from_unit_mappings(arg,errors,utc)elifisinstance(arg,Index):cache_array=_maybe_cache(arg,format,cache,convert_listlike)ifnotcache_array.empty:result=_convert_and_box_cache(arg,cache_array,name=arg.name)else:result=convert_listlike(arg,format,name=arg.name)elifis_list_like(arg):try:# error: Argument 1 to "_maybe_cache" has incompatible type# "Union[float, str, datetime, List[Any], Tuple[Any, ...], ExtensionArray,# ndarray[Any, Any], Series]"; expected "Union[List[Any], Tuple[Any, ...],# Union[Union[ExtensionArray, ndarray[Any, Any]], Index, Series], Series]"argc=cast(Union[list,tuple,ExtensionArray,np.ndarray,"Series",Index],arg)cache_array=_maybe_cache(argc,format,cache,convert_listlike)exceptOutOfBoundsDatetime:# caching attempts to create a DatetimeIndex, which may raise# an OOB. If that's the desired behavior, then just reraise...iferrors=="raise":raise# ... otherwise, continue without the cache.frompandasimportSeriescache_array=Series([],dtype=object)# just an empty arrayifnotcache_array.empty:result=_convert_and_box_cache(argc,cache_array)else:result=convert_listlike(argc,format)else:result=convert_listlike(np.array([arg]),format)[0]ifisinstance(arg,bool)andisinstance(result,np.bool_):result=bool(result)# TODO: avoid this kludge.# error: Incompatible return value type (got "Union[Timestamp, NaTType,# Series, Index]", expected "Union[DatetimeIndex, Series, float, str,# NaTType, None]")returnresult# type: ignore[return-value]
# mappings for assembling units_unit_map={"year":"year","years":"year","month":"month","months":"month","day":"day","days":"day","hour":"h","hours":"h","minute":"m","minutes":"m","second":"s","seconds":"s","ms":"ms","millisecond":"ms","milliseconds":"ms","us":"us","microsecond":"us","microseconds":"us","ns":"ns","nanosecond":"ns","nanoseconds":"ns",}def_assemble_from_unit_mappings(arg,errors:DateTimeErrorChoices,utc:bool):""" assemble the unit specified fields from the arg (DataFrame) Return a Series for actual parsing Parameters ---------- arg : DataFrame errors : {'ignore', 'raise', 'coerce'}, default 'raise' - If :const:`'raise'`, then invalid parsing will raise an exception - If :const:`'coerce'`, then invalid parsing will be set as :const:`NaT` - If :const:`'ignore'`, then invalid parsing will return the input utc : bool Whether to convert/localize timestamps to UTC. Returns ------- Series """frompandasimport(DataFrame,to_numeric,to_timedelta,)arg=DataFrame(arg)ifnotarg.columns.is_unique:raiseValueError("cannot assemble with duplicate keys")# replace passed unit with _unit_mapdeff(value):ifvaluein_unit_map:return_unit_map[value]# m is case significantifvalue.lower()in_unit_map:return_unit_map[value.lower()]returnvalueunit={k:f(k)forkinarg.keys()}unit_rev={v:kfork,vinunit.items()}# we require at least Ymdrequired=["year","month","day"]req=sorted(set(required)-set(unit_rev.keys()))iflen(req):_required=",".join(req)raiseValueError("to assemble mappings requires at least that "f"[year, month, day] be specified: [{_required}] is missing")# keys we don't recognizeexcess=sorted(set(unit_rev.keys())-set(_unit_map.values()))iflen(excess):_excess=",".join(excess)raiseValueError(f"extra keys have been passed to the datetime assemblage: [{_excess}]")defcoerce(values):# we allow coercion to if errors allowsvalues=to_numeric(values,errors=errors)# prevent overflow in case of int8 or int16ifis_integer_dtype(values):values=values.astype("int64",copy=False)returnvaluesvalues=(coerce(arg[unit_rev["year"]])*10000+coerce(arg[unit_rev["month"]])*100+coerce(arg[unit_rev["day"]]))try:values=to_datetime(values,format="%Y%m%d",errors=errors,utc=utc)except(TypeError,ValueError)aserr:raiseValueError(f"cannot assemble the datetimes: {err}")fromerrunits:list[UnitChoices]=["h","m","s","ms","us","ns"]foruinunits:value=unit_rev.get(u)ifvalueisnotNoneandvalueinarg:try:values+=to_timedelta(coerce(arg[value]),unit=u,errors=errors)except(TypeError,ValueError)aserr:raiseValueError(f"cannot assemble the datetimes [{value}]: {err}")fromerrreturnvaluesdef_attempt_YYYYMMDD(arg:npt.NDArray[np.object_],errors:str)->np.ndarray|None:""" try to parse the YYYYMMDD/%Y%m%d format, try to deal with NaT-like, arg is a passed in as an object dtype, but could really be ints/strings with nan-like/or floats (e.g. with nan) Parameters ---------- arg : np.ndarray[object] errors : {'raise','ignore','coerce'} """defcalc(carg):# calculate the actual resultcarg=carg.astype(object,copy=False)parsed=parsing.try_parse_year_month_day(carg/10000,carg/100%100,carg%100)returntslib.array_to_datetime(parsed,errors=errors)[0]defcalc_with_mask(carg,mask):result=np.empty(carg.shape,dtype="M8[ns]")iresult=result.view("i8")iresult[~mask]=iNaTmasked_result=calc(carg[mask].astype(np.float64).astype(np.int64))result[mask]=masked_result.astype("M8[ns]")returnresult# try intlike / strings that are intstry:returncalc(arg.astype(np.int64))except(ValueError,OverflowError,TypeError):pass# a float with actual np.nantry:carg=arg.astype(np.float64)returncalc_with_mask(carg,notna(carg))except(ValueError,OverflowError,TypeError):pass# string with NaN-liketry:mask=~algorithms.isin(arg,list(nat_strings))returncalc_with_mask(arg,mask)except(ValueError,OverflowError,TypeError):passreturnNone__all__=["DateParseError","should_cache","to_datetime",]