4444    ValueLabelTypeMismatch ,
4545)
4646from  pandas .util ._decorators  import  (
47-     Appender ,
4847    doc ,
4948    set_module ,
5049)
127126    Return StataReader object for iterations, returns chunks with 
128127    given number of lines.""" 
129128
130- _iterator_params  =  """\  
131- 
132-     Return StataReader object.""" 
133- 
134129_reader_notes  =  """\  
135130
136131----- 
139134file is associated to an incomplete set of value labels that only 
140135label a strict subset of the values.""" 
141136
142- _read_stata_doc  =  f""" 
143- Read Stata file into DataFrame. 
144- 
145- Parameters 
146- ---------- 
147- filepath_or_buffer : str, path object or file-like object 
148-     Any valid string path is acceptable. The string could be a URL. Valid 
149-     URL schemes include http, ftp, s3, and file. For file URLs, a host is 
150-     expected. A local file could be: ``file://localhost/path/to/table.dta``. 
151- 
152-     If you want to pass in a path object, pandas accepts any ``os.PathLike``. 
153- 
154-     By file-like object, we refer to objects with a ``read()`` method, 
155-     such as a file handle (e.g. via builtin ``open`` function) 
156-     or ``StringIO``. 
157- { _statafile_processing_params1 } 
158- { _statafile_processing_params2 } 
159- { _chunksize_params } 
160- { _iterator_params } 
161- { _shared_docs ["decompression_options" ] %  "filepath_or_buffer" } 
162- { _shared_docs ["storage_options" ]} 
163- 
164- Returns 
165- ------- 
166- DataFrame, pandas.api.typing.StataReader 
167-     If iterator or chunksize, returns StataReader, else DataFrame. 
168- 
169- See Also 
170- -------- 
171- io.stata.StataReader : Low-level reader for Stata data files. 
172- DataFrame.to_stata: Export Stata data files. 
173- 
174- { _reader_notes } 
175- 
176- Examples 
177- -------- 
178- 
179- Creating a dummy stata for this example 
180- 
181- >>> df = pd.DataFrame({{'animal': ['falcon', 'parrot', 'falcon', 'parrot'], 
182- ...                   'speed': [350, 18, 361, 15]}})  # doctest: +SKIP 
183- >>> df.to_stata('animals.dta')  # doctest: +SKIP 
184- 
185- Read a Stata dta file: 
186- 
187- >>> df = pd.read_stata('animals.dta')  # doctest: +SKIP 
188- 
189- Read a Stata dta file in 10,000 line chunks: 
190- 
191- >>> values = np.random.randint(0, 10, size=(20_000, 1), dtype="uint8")  # doctest: +SKIP 
192- >>> df = pd.DataFrame(values, columns=["i"])  # doctest: +SKIP 
193- >>> df.to_stata('filename.dta')  # doctest: +SKIP 
194- 
195- >>> with pd.read_stata('filename.dta', chunksize=10000) as itr:  # doctest: +SKIP 
196- >>>     for chunk in itr: 
197- ...         # Operate on a single chunk, e.g., chunk.mean() 
198- ...         pass  # doctest: +SKIP 
199- """ 
200- 
201- _read_method_doc  =  f"""\  
202- 
203- 
204- Parameters 
205- ---------- 
206- nrows : int 
207-     Number of lines to read from data file, if None read whole file. 
208- { _statafile_processing_params1 } 
209- { _statafile_processing_params2 } 
210- 
211- Returns 
212- ------- 
213- DataFrame 
214- """ 
215- 
216137_stata_reader_doc  =  f"""\  
217138
218139
@@ -1677,7 +1598,6 @@ def get_chunk(self, size: int | None = None) -> DataFrame:
16771598            size  =  self ._chunksize 
16781599        return  self .read (nrows = size )
16791600
1680-     @Appender (_read_method_doc ) 
16811601    def  read (
16821602        self ,
16831603        nrows : int  |  None  =  None ,
@@ -1689,6 +1609,38 @@ def read(
16891609        columns : Sequence [str ] |  None  =  None ,
16901610        order_categoricals : bool  |  None  =  None ,
16911611    ) ->  DataFrame :
1612+         """ 
1613+         Reads observations from Stata file, converting them into a dataframe 
1614+ 
1615+         Parameters 
1616+         ---------- 
1617+         nrows : int 
1618+             Number of lines to read from data file, if None read whole file. 
1619+         convert_dates : bool, default True 
1620+             Convert date variables to DataFrame time values. 
1621+         convert_categoricals : bool, default True 
1622+             Read value labels and convert columns to Categorical/Factor variables. 
1623+         index_col : str, optional 
1624+             Column to set as index. 
1625+         convert_missing : bool, default False 
1626+             Flag indicating whether to convert missing values to their Stata 
1627+             representations.  If False, missing values are replaced with nan. 
1628+             If True, columns containing missing values are returned with 
1629+             object data types and missing values are represented by 
1630+             StataMissingValue objects. 
1631+         preserve_dtypes : bool, default True 
1632+             Preserve Stata datatypes. If False, numeric data are upcast to pandas 
1633+             default types for foreign data (float64 or int64). 
1634+         columns : list or None 
1635+             Columns to retain.  Columns will be returned in the given order.  None 
1636+             returns all columns. 
1637+         order_categoricals : bool, default True 
1638+             Flag indicating whether converted categorical data are ordered. 
1639+ 
1640+         Returns 
1641+         ------- 
1642+         DataFrame 
1643+         """ 
16921644        self ._ensure_open ()
16931645
16941646        # Handle options 
@@ -2135,7 +2087,6 @@ def value_labels(self) -> dict[str, dict[int, str]]:
21352087
21362088
21372089@set_module ("pandas" ) 
2138- @Appender (_read_stata_doc ) 
21392090def  read_stata (
21402091    filepath_or_buffer : FilePath  |  ReadBuffer [bytes ],
21412092    * ,
@@ -2151,6 +2102,122 @@ def read_stata(
21512102    compression : CompressionOptions  =  "infer" ,
21522103    storage_options : StorageOptions  |  None  =  None ,
21532104) ->  DataFrame  |  StataReader :
2105+     """ 
2106+     Read Stata file into DataFrame. 
2107+ 
2108+     Parameters 
2109+     ---------- 
2110+     filepath_or_buffer : str, path object or file-like object 
2111+         Any valid string path is acceptable. The string could be a URL. Valid 
2112+         URL schemes include http, ftp, s3, and file. For file URLs, a host is 
2113+         expected. A local file could be: ``file://localhost/path/to/table.dta``. 
2114+ 
2115+         If you want to pass in a path object, pandas accepts any ``os.PathLike``. 
2116+ 
2117+         By file-like object, we refer to objects with a ``read()`` method, 
2118+         such as a file handle (e.g. via builtin ``open`` function) 
2119+         or ``StringIO``. 
2120+     convert_dates : bool, default True 
2121+         Convert date variables to DataFrame time values. 
2122+     convert_categoricals : bool, default True 
2123+         Read value labels and convert columns to Categorical/Factor variables. 
2124+     index_col : str, optional 
2125+         Column to set as index. 
2126+     convert_missing : bool, default False 
2127+         Flag indicating whether to convert missing values to their Stata 
2128+         representations.  If False, missing values are replaced with nan. 
2129+         If True, columns containing missing values are returned with 
2130+         object data types and missing values are represented by 
2131+         StataMissingValue objects. 
2132+     preserve_dtypes : bool, default True 
2133+         Preserve Stata datatypes. If False, numeric data are upcast to pandas 
2134+         default types for foreign data (float64 or int64). 
2135+     columns : list or None 
2136+         Columns to retain.  Columns will be returned in the given order.  None 
2137+         returns all columns. 
2138+     order_categoricals : bool, default True 
2139+         Flag indicating whether converted categorical data are ordered. 
2140+     chunksize : int, default None 
2141+         Return StataReader object for iterations, returns chunks with 
2142+         given number of lines. 
2143+     iterator : bool, default False 
2144+         Return StataReader object. 
2145+     compression : str or dict, default 'infer' 
2146+         For on-the-fly decompression of on-disk data. If 'infer' and 
2147+         'filepath_or_buffer' is path-like, then detect compression from the 
2148+         following extensions: '.gz', '.bz2', '.zip', '.xz', '.zst', '.tar', 
2149+         '.tar.gz', '.tar.xz' or '.tar.bz2' (otherwise no compression). 
2150+         If using 'zip' or 'tar', the ZIP file must contain only one 
2151+         data file to be read in. Set to ``None`` for no decompression. 
2152+         Can also be a dict with key ``'method'`` set to one of 
2153+         {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, ``'xz'``, ``'tar'``} and 
2154+         other key-value pairs are forwarded to 
2155+         ``zipfile.ZipFile``, ``gzip.GzipFile``, 
2156+         ``bz2.BZ2File``, ``zstandard.ZstdDecompressor``, ``lzma.LZMAFile`` or 
2157+         ``tarfile.TarFile``, respectively. 
2158+         As an example, the following could be passed for Zstandard decompression using a 
2159+         custom compression dictionary: 
2160+         ``compression={'method': 'zstd', 'dict_data': my_compression_dict}``. 
2161+ 
2162+         .. versionadded:: 1.5.0 
2163+             Added support for `.tar` files. 
2164+     storage_options : dict, optional 
2165+         Extra options that make sense for a particular storage connection, e.g. 
2166+         host, port, username, password, etc. For HTTP(S) URLs the key-value pairs 
2167+         are forwarded to ``urllib.request.Request`` as header options. For other 
2168+         URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are 
2169+         forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more 
2170+         details, and for more examples on storage options refer `here 
2171+         <https://pandas.pydata.org/docs/user_guide/io.html? 
2172+         highlight=storage_options#reading-writing-remote-files>`_. 
2173+ 
2174+     Returns 
2175+     ------- 
2176+     DataFrame, pandas.api.typing.StataReader 
2177+         If iterator or chunksize, returns StataReader, else DataFrame. 
2178+ 
2179+     See Also 
2180+     -------- 
2181+     io.stata.StataReader : Low-level reader for Stata data files. 
2182+     DataFrame.to_stata: Export Stata data files. 
2183+ 
2184+     Notes 
2185+     ----- 
2186+     Categorical variables read through an iterator may not have the same 
2187+     categories and dtype. This occurs when  a variable stored in a DTA 
2188+     file is associated to an incomplete set of value labels that only 
2189+     label a strict subset of the values. 
2190+ 
2191+     Examples 
2192+     -------- 
2193+ 
2194+     Creating a dummy stata for this example 
2195+ 
2196+     >>> df = pd.DataFrame( 
2197+     ...     { 
2198+     ...         "animal": ["falcon", "parrot", "falcon", "parrot"], 
2199+     ...         "speed": [350, 18, 361, 15], 
2200+     ...     } 
2201+     ... )  # doctest: +SKIP 
2202+     >>> df.to_stata("animals.dta")  # doctest: +SKIP 
2203+ 
2204+     Read a Stata dta file: 
2205+ 
2206+     >>> df = pd.read_stata("animals.dta")  # doctest: +SKIP 
2207+ 
2208+     Read a Stata dta file in 10,000 line chunks: 
2209+ 
2210+     >>> values = np.random.randint( 
2211+     ...     0, 10, size=(20_000, 1), dtype="uint8" 
2212+     ... )  # doctest: +SKIP 
2213+     >>> df = pd.DataFrame(values, columns=["i"])  # doctest: +SKIP 
2214+     >>> df.to_stata("filename.dta")  # doctest: +SKIP 
2215+ 
2216+     >>> with pd.read_stata('filename.dta', chunksize=10000) as itr:  # doctest: +SKIP 
2217+     >>>     for chunk in itr: 
2218+     ...         # Operate on a single chunk, e.g., chunk.mean() 
2219+     ...         pass  # doctest: +SKIP 
2220+     """ 
21542221    reader  =  StataReader (
21552222        filepath_or_buffer ,
21562223        convert_dates = convert_dates ,
0 commit comments