Skip to content

Commit ee7e30c

Browse files
authored
Backport PR #51871 on branch 2.0.x (ERR: Check that dtype_backend is valid) (#51964)
ERR: Check that dtype_backend is valid (#51871)
1 parent 4ec8ed9 commit ee7e30c

28 files changed

+188
-7
lines changed

pandas/core/generic.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@
9494
from pandas.util._decorators import doc
9595
from pandas.util._exceptions import find_stack_level
9696
from pandas.util._validators import (
97+
check_dtype_backend,
9798
validate_ascending,
9899
validate_bool_kwarg,
99100
validate_fillna_kwargs,
@@ -6534,8 +6535,8 @@ def convert_dtypes(
65346535
65356536
.. versionadded:: 1.2.0
65366537
dtype_backend : {"numpy_nullable", "pyarrow"}, default "numpy_nullable"
6537-
Which dtype_backend to use, e.g. whether a DataFrame should have NumPy
6538-
arrays, nullable dtypes are used for all dtypes that have a nullable
6538+
Which dtype_backend to use, e.g. whether a DataFrame should use nullable
6539+
dtypes for all dtypes that have a nullable
65396540
implementation when "numpy_nullable" is set, pyarrow is used for all
65406541
dtypes if "pyarrow" is set.
65416542
@@ -6654,6 +6655,7 @@ def convert_dtypes(
66546655
2 <NA>
66556656
dtype: string
66566657
"""
6658+
check_dtype_backend(dtype_backend)
66576659
if self.ndim == 1:
66586660
return self._convert_dtypes(
66596661
infer_objects,

pandas/core/internals/construction.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -991,7 +991,7 @@ def convert_object_array(
991991
----------
992992
content: List[np.ndarray]
993993
dtype: np.dtype or ExtensionDtype
994-
dtype_backend: Controls if nullable dtypes are returned.
994+
dtype_backend: Controls if nullable/pyarrow dtypes are returned.
995995
coerce_float: Cast floats that are integers to int.
996996
997997
Returns

pandas/core/tools/numeric.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
DtypeBackend,
1111
npt,
1212
)
13+
from pandas.util._validators import check_dtype_backend
1314

1415
from pandas.core.dtypes.cast import maybe_downcast_numeric
1516
from pandas.core.dtypes.common import (
@@ -161,6 +162,8 @@ def to_numeric(
161162
if errors not in ("ignore", "raise", "coerce"):
162163
raise ValueError("invalid error value specified")
163164

165+
check_dtype_backend(dtype_backend)
166+
164167
is_series = False
165168
is_index = False
166169
is_scalars = False

pandas/io/clipboards.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
from pandas._libs import lib
99
from pandas.util._exceptions import find_stack_level
10+
from pandas.util._validators import check_dtype_backend
1011

1112
from pandas.core.dtypes.generic import ABCDataFrame
1213

@@ -58,6 +59,8 @@ def read_clipboard(
5859
if encoding is not None and encoding.lower().replace("-", "") != "utf8":
5960
raise NotImplementedError("reading from clipboard only supports utf-8 encoding")
6061

62+
check_dtype_backend(dtype_backend)
63+
6164
from pandas.io.clipboard import clipboard_get
6265
from pandas.io.parsers import read_csv
6366

pandas/io/excel/_base.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
Appender,
4646
doc,
4747
)
48+
from pandas.util._validators import check_dtype_backend
4849

4950
from pandas.core.dtypes.common import (
5051
is_bool,
@@ -469,6 +470,8 @@ def read_excel(
469470
storage_options: StorageOptions = None,
470471
dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
471472
) -> DataFrame | dict[IntStrT, DataFrame]:
473+
check_dtype_backend(dtype_backend)
474+
472475
should_close = False
473476
if not isinstance(io, ExcelFile):
474477
should_close = True

pandas/io/feather_format.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
)
1717
from pandas.compat._optional import import_optional_dependency
1818
from pandas.util._decorators import doc
19+
from pandas.util._validators import check_dtype_backend
1920

2021
import pandas as pd
2122
from pandas.core.api import (
@@ -138,6 +139,8 @@ def read_feather(
138139
import_optional_dependency("pyarrow")
139140
from pyarrow import feather
140141

142+
check_dtype_backend(dtype_backend)
143+
141144
with get_handle(
142145
path, "rb", storage_options=storage_options, is_text=False
143146
) as handles:

pandas/io/html.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
AbstractMethodError,
3131
EmptyDataError,
3232
)
33+
from pandas.util._validators import check_dtype_backend
3334

3435
from pandas.core.dtypes.common import is_list_like
3536

@@ -1204,6 +1205,7 @@ def read_html(
12041205
f'"{extract_links}"'
12051206
)
12061207
validate_header_arg(header)
1208+
check_dtype_backend(dtype_backend)
12071209

12081210
io = stringify_path(io)
12091211

pandas/io/json/_json.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
from pandas.compat._optional import import_optional_dependency
4343
from pandas.errors import AbstractMethodError
4444
from pandas.util._decorators import doc
45+
from pandas.util._validators import check_dtype_backend
4546

4647
from pandas.core.dtypes.common import (
4748
ensure_str,
@@ -744,6 +745,8 @@ def read_json(
744745
if orient == "table" and convert_axes:
745746
raise ValueError("cannot pass both convert_axes and orient='table'")
746747

748+
check_dtype_backend(dtype_backend)
749+
747750
if dtype is None and orient != "table":
748751
# error: Incompatible types in assignment (expression has type "bool", variable
749752
# has type "Union[ExtensionDtype, str, dtype[Any], Type[str], Type[float],
@@ -944,14 +947,18 @@ def read(self) -> DataFrame | Series:
944947
if self.engine == "pyarrow":
945948
pyarrow_json = import_optional_dependency("pyarrow.json")
946949
pa_table = pyarrow_json.read_json(self.data)
950+
951+
mapping: type[ArrowDtype] | None | Callable
947952
if self.dtype_backend == "pyarrow":
948-
return pa_table.to_pandas(types_mapper=ArrowDtype)
953+
mapping = ArrowDtype
949954
elif self.dtype_backend == "numpy_nullable":
950955
from pandas.io._util import _arrow_dtype_mapping
951956

952-
mapping = _arrow_dtype_mapping()
953-
return pa_table.to_pandas(types_mapper=mapping.get)
954-
return pa_table.to_pandas()
957+
mapping = _arrow_dtype_mapping().get
958+
else:
959+
mapping = None
960+
961+
return pa_table.to_pandas(types_mapper=mapping)
955962
elif self.engine == "ujson":
956963
if self.lines:
957964
if self.chunksize:

pandas/io/orc.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
WriteBuffer,
1717
)
1818
from pandas.compat._optional import import_optional_dependency
19+
from pandas.util._validators import check_dtype_backend
1920

2021
from pandas.core.dtypes.common import (
2122
is_categorical_dtype,
@@ -78,6 +79,8 @@ def read_orc(
7879

7980
orc = import_optional_dependency("pyarrow.orc")
8081

82+
check_dtype_backend(dtype_backend)
83+
8184
with get_handle(path, "rb", is_text=False) as handles:
8285
orc_file = orc.ORCFile(handles.handle)
8386
pa_table = orc_file.read(columns=columns, **kwargs)

pandas/io/parquet.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
from pandas.errors import AbstractMethodError
2323
from pandas.util._decorators import doc
2424
from pandas.util._exceptions import find_stack_level
25+
from pandas.util._validators import check_dtype_backend
2526

2627
import pandas as pd
2728
from pandas import (
@@ -513,6 +514,7 @@ def read_parquet(
513514
DataFrame
514515
"""
515516
impl = get_engine(engine)
517+
516518
if use_nullable_dtypes is not lib.no_default:
517519
msg = (
518520
"The argument 'use_nullable_dtypes' is deprecated and will be removed "
@@ -525,6 +527,7 @@ def read_parquet(
525527
warnings.warn(msg, FutureWarning, stacklevel=find_stack_level())
526528
else:
527529
use_nullable_dtypes = False
530+
check_dtype_backend(dtype_backend)
528531

529532
return impl.read(
530533
path,

0 commit comments

Comments
 (0)