diff --git a/README.md b/README.md index d212717..30aaa36 100644 --- a/README.md +++ b/README.md @@ -347,9 +347,11 @@ added. ### Testing Requirements - Docker Desktop installed, either of the Linux or Windows runtimes, doesn't matter -- pytest -- hypothesis -- pytest-cov (coverage.py) +- Python packages: + - `docker` + - `hypothesis` + - `pytest` + - `pytest-cov` (coverage.py) ### What Is Tested? diff --git a/bcpandas/main.py b/bcpandas/main.py index 4b94801..52eb22a 100644 --- a/bcpandas/main.py +++ b/bcpandas/main.py @@ -348,7 +348,7 @@ def to_sql( batch_size: Optional[int] = None, use_tablock: bool = False, debug: bool = False, - bcp_path: Optional[str] = None, + bcp_path: Optional[Union[str, Path]] = None, dtype: Optional[dict] = None, process_dest_table: bool = True, print_output: bool = True, @@ -358,6 +358,7 @@ def to_sql( work_directory: Optional[Path] = None, collation: str = sql_collation, identity_insert: bool = False, + err_file: Optional[Union[str, Path]] = None, ): """ Writes the pandas DataFrame to a SQL table or view. @@ -396,7 +397,7 @@ def to_sql( Setting this option allows for larger batch sizes. debug : bool, default False If True, will not delete the temporary CSV and format files, and will output their location. - bcp_path : str, default None + bcp_path : str, pathlib.Path, default None The full path to the BCP utility, useful if it is not in the PATH environment variable dtype: dict, default None A dict with keys the names of columns and values SqlAlchemy types for defining their types. These are @@ -420,6 +421,10 @@ def to_sql( system-default for temporary files. identity_insert: bool, default False Specifies that identity value or values in the imported data file are to be used for the identity column. + err_file: str, pathlib.Path, default None + Specifies the full path of an error file used to store any rows that the bcp utility can't transfer from + the file to the database. Error messages from the bcp command go to the workstation of the user. + If this option isn't used, an error file isn't created. Notes ----- @@ -505,6 +510,7 @@ def to_sql( use_tablock=use_tablock, bcp_path=bcp_path, identity_insert=identity_insert, + err_file=err_file, ) finally: if not debug: diff --git a/bcpandas/utils.py b/bcpandas/utils.py index 5b3250f..3229883 100644 --- a/bcpandas/utils.py +++ b/bcpandas/utils.py @@ -52,6 +52,7 @@ def bcp( row_terminator: Optional[str] = None, bcp_path: Optional[Union[str, Path]] = None, identity_insert: bool = False, + err_file: Optional[Union[str, Path]] = None, ): """ See https://docs.microsoft.com/en-us/sql/tools/bcp-utility @@ -116,6 +117,9 @@ def bcp( if identity_insert: bcp_command += ["-E"] + if err_file: + bcp_command += ["-e", str(err_file)] + # formats if direc == IN and format_file_path is not None: bcp_command += ["-f", str(format_file_path)] diff --git a/tests/test_to_sql.py b/tests/test_to_sql.py index c7c9b9e..88a61ab 100644 --- a/tests/test_to_sql.py +++ b/tests/test_to_sql.py @@ -322,6 +322,43 @@ def test_custom_work_directory(sql_creds): assert conn.exec_driver_sql("SELECT * FROM some_table").first()[0] == 1.5 +def test_identity_insert_param(sql_creds): + """ + Test ingest is successful when identity_insert param used. + """ + to_sql( + df=pd.DataFrame({"col1": [1.5]}), + table_name="some_table", + creds=sql_creds, + if_exists="replace", + index=False, + sql_type="table", + identity_insert=True, + ) + with sql_creds.engine.connect() as conn: + assert conn.exec_driver_sql("SELECT * FROM some_table").first()[0] == 1.5 + + +def test_custom_err_file(sql_creds): + """ + Test the err_file parameters. + """ + err_file = Path(__file__).parent.joinpath("err_file.log") + to_sql( + df=pd.DataFrame({"col1": [pd.Timedelta(days=1)]}), + table_name="some_table", + creds=sql_creds, + if_exists="replace", + index=False, + sql_type="table", + err_file=err_file, + ) + with open(err_file) as f: + assert "Invalid character value for cast specification" in f.read() + + err_file.unlink(missing_ok=True) + + @pytest.mark.usefixtures("database") class _BaseToSql: sql_type = "table" diff --git a/tests/utils.py b/tests/utils.py index df1c38d..3c2e853 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -135,11 +135,13 @@ def __init__( def start(self): if not self.accept_eula: - raise ValueError("Must accept Microsft's End User License Agreement") + raise ValueError("Must accept Microsoft's End User License Agreement") + env = { "ACCEPT_EULA": "Y", "SA_PASSWORD": self.sa_sql_password, } + if self.mssql_image.startswith("mcr.microsoft.com/mssql/server"): # means it's linux env["MSSQL_PID"] = self.mssql_pid