Coverage for src/toolbox_pyspark/types.py: 100%

44 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2025-01-25 23:08 +0000

1# ============================================================================ # 

2# # 

3# Title : Column Types # 

4# Purpose : Get, check, and change a datafames column data types. # 

5# # 

6# ============================================================================ # 

7 

8 

9# ---------------------------------------------------------------------------- # 

10# # 

11# Overview #### 

12# # 

13# ---------------------------------------------------------------------------- # 

14 

15 

16# ---------------------------------------------------------------------------- # 

17# Description #### 

18# ---------------------------------------------------------------------------- # 

19 

20 

21""" 

22!!! note "Summary" 

23 The `types` module is used to get, check, and change a datafames column data types. 

24""" 

25 

26 

27# ---------------------------------------------------------------------------- # 

28# # 

29# Setup #### 

30# # 

31# ---------------------------------------------------------------------------- # 

32 

33 

34# ---------------------------------------------------------------------------- # 

35# Imports #### 

36# ---------------------------------------------------------------------------- # 

37 

38 

39# ## Python StdLib Imports ---- 

40from typing import Union 

41 

42# ## Python Third Party Imports ---- 

43import pandas as pd 

44from pandas import DataFrame as pdDataFrame 

45from pyspark.sql import DataFrame as psDataFrame, functions as F, types as T 

46from toolbox_python.checkers import is_type 

47from toolbox_python.collection_types import str_list, str_tuple 

48from toolbox_python.dictionaries import dict_reverse_keys_and_values 

49from typeguard import typechecked 

50 

51# ## Local First Party Imports ---- 

52from toolbox_pyspark.checks import ( 

53 _validate_pyspark_datatype, 

54 assert_column_exists, 

55 assert_columns_exists, 

56) 

57from toolbox_pyspark.constants import ( 

58 VALID_DATAFRAME_NAMES, 

59 VALID_PYSPARK_DATAFRAME_NAMES, 

60) 

61from toolbox_pyspark.utils.exceptions import InvalidDataFrameNameError 

62 

63 

64# ---------------------------------------------------------------------------- # 

65# Exports #### 

66# ---------------------------------------------------------------------------- # 

67 

68 

69__all__: str_list = [ 

70 "get_column_types", 

71 "cast_column_to_type", 

72 "cast_columns_to_type", 

73 "map_cast_columns_to_type", 

74] 

75 

76 

77# ---------------------------------------------------------------------------- # 

78# # 

79# Functions #### 

80# # 

81# ---------------------------------------------------------------------------- # 

82 

83 

84# ---------------------------------------------------------------------------- # 

85# Public functions #### 

86# ---------------------------------------------------------------------------- # 

87 

88 

89@typechecked 

90def get_column_types( 

91 dataframe: psDataFrame, 

92 output_type: str = "psDataFrame", 

93) -> Union[psDataFrame, pdDataFrame]: 

94 """ 

95 !!! note "Summary" 

96 This is a convenient function to return the data types from a given table as either a `#!py pyspark.sql.DataFrame` or `#!py pandas.DataFrame`. 

97 

98 Params: 

99 dataframe (psDataFrame): 

100 The DataFrame to be checked. 

101 

102 output_type (str, optional): 

103 How should the data be returned? As `#!py pdDataFrame` or `#!py psDataFrame`. 

104 

105 For `#!py pandas`, use one of: 

106 

107 ```{.sh .shell title="Terminal"} 

108 [ 

109 "pandas", "pandas.DataFrame", 

110 "pd.df", "pd.DataFrame", 

111 "pddf", "pdDataFrame", 

112 "pd", "pdDF", 

113 ] 

114 ``` 

115 

116 </div> 

117 

118 For `#!py pyspark` use one of: 

119 

120 ```{.sh .shell title="Terminal"} 

121 [ 

122 "pyspark", "spark.DataFrame", 

123 "spark", "pyspark.DataFrame", 

124 "ps.df", "ps.DataFrame", 

125 "psdf", "psDataFrame", 

126 "ps", "psDF", 

127 ] 

128 ``` 

129 

130 Any other options are invalid.<br> 

131 Defaults to `#!py "psDataFrame"`. 

132 

133 Raises: 

134 TypeError: 

135 If any of the inputs parsed to the parameters of this function are not the correct type. Uses the [`@typeguard.typechecked`](https://typeguard.readthedocs.io/en/stable/api.html#typeguard.typechecked) decorator. 

136 InvalidPySparkDataTypeError: 

137 If the given value parsed to `#!py output_type` is not one of the given valid types. 

138 

139 Returns: 

140 (Union[psDataFrame, pdDataFrame]): 

141 The DataFrame where each row represents a column on the original `#!py dataframe` object, and which has two columns: 

142 

143 1. The column name from `#!py dataframe`; and 

144 2. The data type for that column in `#!py dataframe`. 

145 

146 ???+ example "Examples" 

147 

148 ```{.py .python linenums="1" title="Set up"} 

149 >>> # Imports 

150 >>> import pandas as pd 

151 >>> from pyspark.sql import SparkSession 

152 >>> from toolbox_pyspark.types import get_column_types 

153 >>> 

154 >>> # Instantiate Spark 

155 >>> spark = SparkSession.builder.getOrCreate() 

156 >>> 

157 >>> # Create data 

158 >>> df = spark.createDataFrame( 

159 ... pd.DataFrame( 

160 ... { 

161 ... "a": [1, 2, 3, 4], 

162 ... "b": ["a", "b", "c", "d"], 

163 ... "c": [1, 1, 1, 1], 

164 ... "d": ["2", "2", "2", "2"], 

165 ... } 

166 ... ) 

167 ... ) 

168 >>> 

169 >>> # Check 

170 >>> print(df.dtypes) 

171 ``` 

172 <div class="result" markdown> 

173 ```{.sh .shell title="Terminal"} 

174 [ 

175 ("a", "bigint"), 

176 ("b", "string"), 

177 ("c", "bigint"), 

178 ("d", "string"), 

179 ] 

180 ``` 

181 </div> 

182 

183 ```{.py .python linenums="1" title="Example 1: Return PySpark"} 

184 >>> get_column_types(df).show() 

185 ``` 

186 <div class="result" markdown> 

187 ```{.txt .text title="Terminal"} 

188 +----------+----------+ 

189 | col_name | col_type | 

190 +----------+----------+ 

191 | a | bigint | 

192 | b | string | 

193 | c | bigint | 

194 | d | string | 

195 +----------+----------+ 

196 ``` 

197 !!! success "Conclusion: Successfully print PySpark output." 

198 </div> 

199 

200 ```{.py .python linenums="1" title="Example 2: Return Pandas"} 

201 >>> print(get_column_types(df, "pd")) 

202 ``` 

203 <div class="result" markdown> 

204 ```{.txt .text title="Terminal"} 

205 col_name col_type 

206 0 a bigint 

207 1 b string 

208 2 c bigint 

209 3 d string 

210 ``` 

211 !!! success "Conclusion: Successfully print Pandas output." 

212 </div> 

213 

214 ```{.py .python linenums="1" title="Example 3: Invalid output"} 

215 >>> print(get_column_types(df, "foo")) 

216 ``` 

217 <div class="result" markdown> 

218 ```{.txt .text title="Terminal"} 

219 InvalidDataFrameNameError: Invalid value for `output_type`: "foo". 

220 Must be one of: ["pandas.DataFrame", "pandas", "pd.DataFrame", "pd.df", "pddf", "pdDataFrame", "pdDF", "pd", "spark.DataFrame", "pyspark.DataFrame", "pyspark", "spark", "ps.DataFrame", "ps.df", "psdf", "psDataFrame", "psDF", "ps"] 

221 ``` 

222 !!! failure "Conclusion: Invalid input." 

223 </div> 

224 """ 

225 if output_type not in VALID_DATAFRAME_NAMES: 

226 raise InvalidDataFrameNameError( 

227 f"Invalid value for `output_type`: '{output_type}'.\n" 

228 f"Must be one of: {VALID_DATAFRAME_NAMES}" 

229 ) 

230 output = pd.DataFrame(dataframe.dtypes, columns=["col_name", "col_type"]) 

231 if output_type in VALID_PYSPARK_DATAFRAME_NAMES: 

232 return dataframe.sparkSession.createDataFrame(output) 

233 else: 

234 return output 

235 

236 

237@typechecked 

238def cast_column_to_type( 

239 dataframe: psDataFrame, 

240 column: str, 

241 datatype: Union[str, type, T.DataType], 

242) -> psDataFrame: 

243 """ 

244 !!! note "Summary" 

245 This is a convenience function for casting a single column on a given table to another data type. 

246 

247 ???+ abstract "Details" 

248 

249 At it's core, it will call the function like this: 

250 

251 ```{.py .python linenums="1"} 

252 dataframe = dataframe.withColumn(column, F.col(column).cast(datatype)) 

253 ``` 

254 

255 The reason for wrapping it up in this function is for validation of a columns existence and convenient re-declaration of the same. 

256 

257 Params: 

258 dataframe (psDataFrame): 

259 The DataFrame to be updated. 

260 column (str): 

261 The column to be updated. 

262 datatype (Union[str, type, T.DataType]): 

263 The datatype to be cast to. 

264 Must be a valid `#!py pyspark` DataType. 

265 

266 Use one of the following: 

267 ```{.sh .shell title="Terminal"} 

268 [ 

269 "string", "char", 

270 "varchar", "binary", 

271 "boolean", "decimal", 

272 "float", "double", 

273 "byte", "short", 

274 "integer", "long", 

275 "date", "timestamp", 

276 "void", "timestamp_ntz", 

277 ] 

278 ``` 

279 

280 Raises: 

281 TypeError: 

282 If any of the inputs parsed to the parameters of this function are not the correct type. Uses the [`@typeguard.typechecked`](https://typeguard.readthedocs.io/en/stable/api.html#typeguard.typechecked) decorator. 

283 ColumnDoesNotExistError: 

284 If the `#!py column` does not exist within `#!py dataframe.columns`. 

285 ParseException: 

286 If the given `#!py datatype` is not a valid PySpark DataType. 

287 

288 Returns: 

289 (psDataFrame): 

290 The updated DataFrame. 

291 

292 ???+ example "Examples" 

293 

294 ```{.py .python linenums="1" title="Set up"} 

295 >>> # Imports 

296 >>> import pandas as pd 

297 >>> from pyspark.sql import SparkSession 

298 >>> from toolbox_pyspark.types import cast_column_to_type, get_column_types 

299 >>> 

300 >>> # Instantiate Spark 

301 >>> spark = SparkSession.builder.getOrCreate() 

302 >>> 

303 >>> # Create data 

304 >>> df = spark.createDataFrame( 

305 ... pd.DataFrame( 

306 ... { 

307 ... "a": [1, 2, 3, 4], 

308 ... "b": ["a", "b", "c", "d"], 

309 ... "c": [1, 1, 1, 1], 

310 ... "d": ["2", "2", "2", "2"], 

311 ... } 

312 ... ) 

313 ... ) 

314 >>> 

315 >>> # Check 

316 >>> get_column_types(df).show() 

317 ``` 

318 <div class="result" markdown> 

319 ```{.txt .text title="Terminal"} 

320 +----------+----------+ 

321 | col_name | col_type | 

322 +----------+----------+ 

323 | a | bigint | 

324 | b | string | 

325 | c | bigint | 

326 | d | string | 

327 +----------+----------+ 

328 ``` 

329 </div> 

330 

331 ```{.py .python linenums="1" title="Example 1: Valid casting"} 

332 >>> df = cast_column_to_type(df, "a", "string") 

333 >>> get_column_types(df).show() 

334 ``` 

335 <div class="result" markdown> 

336 ```{.txt .text title="Terminal"} 

337 +----------+----------+ 

338 | col_name | col_type | 

339 +----------+----------+ 

340 | a | string | 

341 | b | string | 

342 | c | bigint | 

343 | d | string | 

344 +----------+----------+ 

345 ``` 

346 !!! success "Conclusion: Successfully cast column to type." 

347 </div> 

348 

349 ```{.py .python linenums="1" title="Example 2: Invalid column"} 

350 >>> df = cast_column_to_type(df, "x", "string") 

351 ``` 

352 <div class="result" markdown> 

353 ```{.txt .text title="Terminal"} 

354 ColumnDoesNotExistError: Column "x" does not exist in DataFrame. 

355 Try one of: ["a", "b", "c", "d"]. 

356 ``` 

357 !!! failure "Conclusion: Column `x` does not exist as a valid column." 

358 </div> 

359 

360 ```{.py .python linenums="1" title="Example 3: Invalid datatype"} 

361 >>> df = cast_column_to_type(df, "b", "foo") 

362 ``` 

363 <div class="result" markdown> 

364 ```{.txt .text title="Terminal"} 

365 ParseException: DataType "foo" is not supported. 

366 ``` 

367 !!! failure "Conclusion: Datatype `foo` is not valid." 

368 </div> 

369 

370 ??? tip "See Also" 

371 - [`assert_column_exists()`][toolbox_pyspark.checks.column_exists] 

372 - [`is_vaid_spark_type()`][toolbox_pyspark.checks.is_vaid_spark_type] 

373 - [`get_column_types()`][toolbox_pyspark.types.get_column_types] 

374 """ 

375 assert_column_exists(dataframe, column) 

376 datatype = _validate_pyspark_datatype(datatype=datatype) 

377 return dataframe.withColumn(column, F.col(column).cast(datatype)) # type:ignore 

378 

379 

380@typechecked 

381def cast_columns_to_type( 

382 dataframe: psDataFrame, 

383 columns: Union[str, str_list], 

384 datatype: Union[str, type, T.DataType], 

385) -> psDataFrame: 

386 """ 

387 !!! note "Summary" 

388 Cast multiple columns to a given type. 

389 

390 ???+ abstract "Details" 

391 An extension of [`#!py cast_column_to_type()`][toolbox_pyspark.types.cast_column_to_type] to allow casting of multiple columns simultaneously. 

392 

393 Params: 

394 dataframe (psDataFrame): 

395 The DataFrame to be updated. 

396 columns (Union[str, str_list]): 

397 The list of columns to be updated. They all must be valid columns existing on `#!py DataFrame`. 

398 datatype (Union[str, type, T.DataType]): 

399 The datatype to be cast to. 

400 Must be a valid PySpark DataType. 

401 

402 Use one of the following: 

403 ```{.sh .shell title="Terminal"} 

404 [ 

405 "string", "char", 

406 "varchar", "binary", 

407 "boolean", "decimal", 

408 "float", "double", 

409 "byte", "short", 

410 "integer", "long", 

411 "date", "timestamp", 

412 "void", "timestamp_ntz", 

413 ] 

414 ``` 

415 

416 Raises: 

417 TypeError: 

418 If any of the inputs parsed to the parameters of this function are not the correct type. Uses the [`@typeguard.typechecked`](https://typeguard.readthedocs.io/en/stable/api.html#typeguard.typechecked) decorator. 

419 

420 Returns: 

421 (psDataFrame): 

422 The updated DataFrame. 

423 

424 ???+ example "Examples" 

425 

426 ```{.py .python linenums="1" title="Set up"} 

427 >>> # Imports 

428 >>> import pandas as pd 

429 >>> from pyspark.sql import SparkSession 

430 >>> from toolbox_pyspark.types import cast_column_to_type, get_column_types 

431 >>> 

432 >>> # Instantiate Spark 

433 >>> spark = SparkSession.builder.getOrCreate() 

434 >>> 

435 >>> # Create data 

436 >>> df = spark.createDataFrame( 

437 ... pd.DataFrame( 

438 ... { 

439 ... "a": [1, 2, 3, 4], 

440 ... "b": ["a", "b", "c", "d"], 

441 ... "c": [1, 1, 1, 1], 

442 ... "d": ["2", "2", "2", "2"], 

443 ... } 

444 ... ) 

445 ... ) 

446 >>> 

447 >>> # Check 

448 >>> get_column_types(df).show() 

449 ``` 

450 <div class="result" markdown> 

451 ```{.txt .text title="Terminal"} 

452 +----------+----------+ 

453 | col_name | col_type | 

454 +----------+----------+ 

455 | a | bigint | 

456 | b | string | 

457 | c | bigint | 

458 | d | string | 

459 +----------+----------+ 

460 ``` 

461 </div> 

462 

463 ```{.py .python linenums="1" title="Example 1: Basic usage"} 

464 >>> df = cast_column_to_type(df, ["a"], "string") 

465 >>> get_column_types(df).show() 

466 ``` 

467 <div class="result" markdown> 

468 ```{.txt .text title="Terminal"} 

469 +----------+----------+ 

470 | col_name | col_type | 

471 +----------+----------+ 

472 | a | string | 

473 | b | string | 

474 | c | bigint | 

475 | d | bigint | 

476 +----------+----------+ 

477 ``` 

478 !!! success "Conclusion: Successfully cast column to type." 

479 </div> 

480 

481 ```{.py .python linenums="1" title="Example 2: Multiple columns"} 

482 >>> df = cast_column_to_type(df, ["c", "d"], "string") 

483 >>> get_column_types(df).show() 

484 ``` 

485 <div class="result" markdown> 

486 ```{.txt .text title="Terminal"} 

487 +----------+----------+ 

488 | col_name | col_type | 

489 +----------+----------+ 

490 | a | string | 

491 | b | string | 

492 | c | string | 

493 | d | string | 

494 +----------+----------+ 

495 ``` 

496 !!! success "Conclusion: Successfully cast columns to type." 

497 </div> 

498 

499 ```{.py .python linenums="1" title="Example 3: Invalid column"} 

500 >>> df = cast_columns_to_type(df, ["x", "y"], "string") 

501 ``` 

502 <div class="result" markdown> 

503 ```{.txt .text title="Terminal"} 

504 ColumnDoesNotExistError: Columns ["x", "y"] do not exist in DataFrame. 

505 Try one of: ["a", "b", "c", "d"]. 

506 ``` 

507 !!! failure "Conclusion: Columns `[x]` does not exist as a valid column." 

508 </div> 

509 

510 ```{.py .python linenums="1" title="Example 4: Invalid datatype"} 

511 >>> df = cast_columns_to_type(df, ["a", "b"], "foo") 

512 ``` 

513 <div class="result" markdown> 

514 ```{.txt .text title="Terminal"} 

515 ParseException: DataType "foo" is not supported. 

516 ``` 

517 !!! failure "Conclusion: Datatype `foo` is not valid." 

518 </div> 

519 

520 ??? tip "See Also" 

521 - [`assert_columns_exists()`][toolbox_pyspark.checks.assert_columns_exists] 

522 - [`is_vaid_spark_type()`][toolbox_pyspark.checks.is_vaid_spark_type] 

523 - [`get_column_types()`][toolbox_pyspark.types.get_column_types] 

524 """ 

525 columns = [columns] if is_type(columns, str) else columns 

526 assert_columns_exists(dataframe, columns) 

527 datatype = _validate_pyspark_datatype(datatype=datatype) 

528 return dataframe.withColumns({col: F.col(col).cast(datatype) for col in columns}) 

529 

530 

531@typechecked 

532def map_cast_columns_to_type( 

533 dataframe: psDataFrame, 

534 columns_type_mapping: dict[ 

535 Union[str, type, T.DataType], 

536 Union[str, str_list, str_tuple], 

537 ], 

538) -> psDataFrame: 

539 """ 

540 !!! note "Summary" 

541 Take a dictionary mapping of where the keys is the type and the values are the column(s), and apply that to the given dataframe. 

542 

543 ???+ abstract "Details" 

544 Applies [`#!py cast_columns_to_type()`][toolbox_pyspark.types.cast_columns_to_type] and [`#!py cast_column_to_type()`][toolbox_pyspark.types.cast_column_to_type] under the hood. 

545 

546 Params: 

547 dataframe (psDataFrame): 

548 The DataFrame to transform. 

549 columns_type_mapping (Dict[ Union[str, type, T.DataType], Union[str, str_list, str_tuple], ]): 

550 The mapping of the columns to manipulate.<br> 

551 The format must be: `#!py {type: columns}`.<br> 

552 Where the keys are the relevant type to cast to, and the values are the column(s) for casting. 

553 

554 Returns: 

555 (psDataFrame): 

556 The transformed data frame. 

557 

558 ???+ example "Examples" 

559 

560 ```{.py .python linenums="1" title="Set up"} 

561 >>> # Imports 

562 >>> import pandas as pd 

563 >>> from pyspark.sql import SparkSession 

564 >>> from toolbox_pyspark.types import cast_column_to_type, get_column_types 

565 >>> 

566 >>> # Instantiate Spark 

567 >>> spark = SparkSession.builder.getOrCreate() 

568 >>> 

569 >>> # Create data 

570 >>> df = spark.createDataFrame( 

571 ... pd.DataFrame( 

572 ... { 

573 ... "a": [1, 2, 3, 4], 

574 ... "b": ["a", "b", "c", "d"], 

575 ... "c": [1, 1, 1, 1], 

576 ... "d": ["2", "2", "2", "2"], 

577 ... } 

578 ... ) 

579 ... ) 

580 >>> 

581 >>> # Check 

582 >>> get_column_types(df).show() 

583 ``` 

584 <div class="result" markdown> 

585 ```{.txt .text title="Terminal"} 

586 +----------+----------+ 

587 | col_name | col_type | 

588 +----------+----------+ 

589 | a | bigint | 

590 | b | string | 

591 | c | bigint | 

592 | d | string | 

593 +----------+----------+ 

594 ``` 

595 </div> 

596 

597 ```{.py .python linenums="1" title="Example 1: Basic usage"} 

598 >>> df = map_cast_columns_to_type(df, {"str": ["a", "c"]}) 

599 >>> get_column_types(df).show() 

600 ``` 

601 <div class="result" markdown> 

602 ```{.txt .text title="Terminal"} 

603 +----------+----------+ 

604 | col_name | col_type | 

605 +----------+----------+ 

606 | a | string | 

607 | b | string | 

608 | c | string | 

609 | d | string | 

610 +----------+----------+ 

611 ``` 

612 !!! success "Conclusion: Successfully cast columns to type." 

613 </div> 

614 

615 ```{.py .python linenums="1" title="Example 2: Multiple types"} 

616 >>> df = map_cast_columns_to_type(df, {"int": ["a", "c"], "str": ["b"], "float": "d"}) 

617 >>> get_column_types(df).show() 

618 ``` 

619 <div class="result" markdown> 

620 ```{.txt .text title="Terminal"} 

621 +----------+----------+ 

622 | col_name | col_type | 

623 +----------+----------+ 

624 | a | bigint | 

625 | b | string | 

626 | c | bigint | 

627 | d | float | 

628 +----------+----------+ 

629 ``` 

630 !!! success "Conclusion: Successfully cast columns to types." 

631 </div> 

632 

633 ```{.py .python linenums="1" title="Example 3: All to single type"} 

634 >>> df = map_cast_columns_to_type(df, {str: [col for col in df.columns]}) 

635 >>> get_column_types(df).show() 

636 ``` 

637 <div class="result" markdown> 

638 ```{.txt .text title="Terminal"} 

639 +----------+----------+ 

640 | col_name | col_type | 

641 +----------+----------+ 

642 | a | string | 

643 | b | string | 

644 | c | string | 

645 | d | string | 

646 +----------+----------+ 

647 ``` 

648 !!! success "Conclusion: Successfully cast all columns to type." 

649 </div> 

650 

651 ??? tip "See Also" 

652 - [`cast_column_to_type()`][toolbox_pyspark.types.cast_column_to_type] 

653 - [`cast_columns_to_type()`][toolbox_pyspark.types.cast_columns_to_type] 

654 - [`assert_columns_exists()`][toolbox_pyspark.checks.assert_columns_exists] 

655 - [`is_vaid_spark_type()`][toolbox_pyspark.checks.is_vaid_spark_type] 

656 - [`get_column_types()`][toolbox_pyspark.types.get_column_types] 

657 """ 

658 

659 # Ensure all keys are `str` 

660 keys = (*columns_type_mapping.keys(),) 

661 for key in keys: 

662 if is_type(key, type): 

663 if key.__name__ in keys: 

664 columns_type_mapping[key.__name__] = list( 

665 columns_type_mapping[key.__name__] 

666 ) + list(columns_type_mapping.pop(key)) 

667 else: 

668 columns_type_mapping[key.__name__] = columns_type_mapping.pop(key) 

669 

670 # Reverse keys and values 

671 reversed_mapping = dict_reverse_keys_and_values(dictionary=columns_type_mapping) 

672 

673 # Validate 

674 assert_columns_exists(dataframe, reversed_mapping.keys()) 

675 

676 # Apply mapping to dataframe 

677 try: 

678 dataframe = dataframe.withColumns( 

679 { 

680 col: F.col(col).cast(_validate_pyspark_datatype(typ)) 

681 for col, typ in reversed_mapping.items() 

682 } 

683 ) 

684 except Exception as e: # pragma: no cover 

685 raise RuntimeError(f"Raised {e.__class__.__name__}: {e}") from e 

686 

687 # Return 

688 return dataframe