Coverage for src/toolbox_pyspark/columns.py: 100%

94 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2025-01-25 23:08 +0000

1# ============================================================================ # 

2# # 

3# Title : Dataframe Cleaning # 

4# Purpose : Fetch columns from a given DataFrame using convenient syntax. # 

5# # 

6# ============================================================================ # 

7 

8 

9# ---------------------------------------------------------------------------- # 

10# # 

11# Overview #### 

12# # 

13# ---------------------------------------------------------------------------- # 

14 

15 

16# ---------------------------------------------------------------------------- # 

17# Description #### 

18# ---------------------------------------------------------------------------- # 

19 

20 

21""" 

22!!! note "Summary" 

23 The `columns` module is used to fetch columns from a given DataFrame using convenient syntax. 

24""" 

25 

26 

27# ---------------------------------------------------------------------------- # 

28# # 

29# Setup #### 

30# # 

31# ---------------------------------------------------------------------------- # 

32 

33 

34# ---------------------------------------------------------------------------- # 

35# Imports #### 

36# ---------------------------------------------------------------------------- # 

37 

38 

39# ## Python StdLib Imports ---- 

40from typing import Literal, Optional, Union 

41 

42# ## Python Third Party Imports ---- 

43from pyspark.sql import DataFrame as psDataFrame 

44from toolbox_python.checkers import is_type 

45from toolbox_python.collection_types import str_collection, str_list 

46from typeguard import typechecked 

47 

48# ## Local First Party Imports ---- 

49from toolbox_pyspark.checks import ( 

50 assert_columns_exists, 

51 warn_columns_missing, 

52) 

53 

54 

55# ---------------------------------------------------------------------------- # 

56# Exports #### 

57# ---------------------------------------------------------------------------- # 

58 

59 

60__all__: str_list = [ 

61 "get_columns", 

62 "get_columns_by_likeness", 

63 "rename_columns", 

64 "reorder_columns", 

65 "delete_columns", 

66] 

67 

68 

69# ---------------------------------------------------------------------------- # 

70# # 

71# Functions #### 

72# # 

73# ---------------------------------------------------------------------------- # 

74 

75 

76# ---------------------------------------------------------------------------- # 

77# Selecting #### 

78# ---------------------------------------------------------------------------- # 

79 

80 

81@typechecked 

82def get_columns( 

83 dataframe: psDataFrame, 

84 columns: Optional[Union[str, str_collection]] = None, 

85) -> str_list: 

86 """ 

87 !!! note "Summary" 

88 Get a list of column names from a DataFrame based on optional filter criteria. 

89 

90 Params: 

91 dataframe (psDataFrame): 

92 The DataFrame from which to retrieve column names. 

93 columns (Optional[Union[str, str_collection]], optional): 

94 Optional filter criteria for selecting columns.<br> 

95 If a string is provided, it can be one of the following options: 

96 

97 | Value | Description | 

98 |-------|-------------| 

99 | `#!py "all"` | Return all columns in the DataFrame. 

100 | `#!py "all_str"` | Return columns of string type. 

101 | `#!py "all_int"` | Return columns of integer type. 

102 | `#!py "all_numeric"` | Return columns of numeric types (integers and floats). 

103 | `#!py "all_datetime"` or `#!py "all_timestamp"` | Return columns of datetime or timestamp type. 

104 | `#!py "all_date"` | Return columns of date type. 

105 | Any other string | Return columns matching the provided exact column name. 

106 

107 If a list or tuple of column names is provided, return only those columns.<br> 

108 Defaults to `#!py None` (which returns all columns). 

109 

110 Raises: 

111 TypeError: 

112 If any of the inputs parsed to the parameters of this function are not the correct type. Uses the [`@typeguard.typechecked`](https://typeguard.readthedocs.io/en/stable/api.html#typeguard.typechecked) decorator. 

113 

114 Returns: 

115 (str_list): 

116 The selected column names from the DataFrame. 

117 

118 ???+ example "Examples" 

119 

120 ```{.py .python linenums="1" title="Set up"} 

121 >>> # Imports 

122 >>> from pprint import pprint 

123 >>> import pandas as pd 

124 >>> from pyspark.sql import SparkSession, functions as F 

125 >>> from toolbox_pyspark.columns import get_columns 

126 >>> 

127 >>> # Instantiate Spark 

128 >>> spark = SparkSession.builder.getOrCreate() 

129 >>> 

130 >>> # Create data 

131 >>> df = ( 

132 ... spark 

133 ... .createDataFrame( 

134 ... pd.DataFrame( 

135 ... { 

136 ... "a": (0, 1, 2, 3), 

137 ... "b": ["a", "b", "c", "d"], 

138 ... } 

139 ... ) 

140 ... ) 

141 ... .withColumns( 

142 ... { 

143 ... "c": F.lit("1").cast("int"), 

144 ... "d": F.lit("2").cast("string"), 

145 ... "e": F.lit("1.1").cast("float"), 

146 ... "f": F.lit("1.2").cast("double"), 

147 ... "g": F.lit("2022-01-01").cast("date"), 

148 ... "h": F.lit("2022-02-01 01:00:00").cast("timestamp"), 

149 ... } 

150 ... ) 

151 ... ) 

152 >>> 

153 >>> # Check 

154 >>> df.show() 

155 >>> print(df.dtypes) 

156 ``` 

157 <div class="result" markdown> 

158 ```{.txt .text title="Terminal"} 

159 +---+---+---+---+-----+-----+------------+---------------------+ 

160 | a | b | c | d | e | f | g | h | 

161 +---+---+---+---+-----+-----+------------+---------------------+ 

162 | 0 | a | 1 | 2 | 1.1 | 1.2 | 2022-01-01 | 2022-02-01 01:00:00 | 

163 | 1 | b | 1 | 2 | 1.1 | 1.2 | 2022-01-01 | 2022-02-01 01:00:00 | 

164 | 2 | c | 1 | 2 | 1.1 | 1.2 | 2022-01-01 | 2022-02-01 01:00:00 | 

165 | 3 | d | 1 | 2 | 1.1 | 1.2 | 2022-01-01 | 2022-02-01 01:00:00 | 

166 +---+---+---+---+-----+-----+------------+---------------------+ 

167 ``` 

168 ```{.sh .shell title="Terminal"} 

169 [ 

170 ("a", "bigint"), 

171 ("b", "string"), 

172 ("c", "int"), 

173 ("d", "string"), 

174 ("e", "float"), 

175 ("f", "double"), 

176 ("g", "date"), 

177 ("h", "timestamp"), 

178 ] 

179 ``` 

180 </div> 

181 

182 ```{.py .python linenums="1" title="Example 1: Default params"} 

183 >>> print(get_columns(df).columns) 

184 ``` 

185 <div class="result" markdown> 

186 ```{.sh .shell title="Terminal"} 

187 ["a", "b", "c", "d", "e", "f", "g", "h"] 

188 ``` 

189 !!! success "Conclusion: Success." 

190 </div> 

191 

192 ```{.py .python linenums="1" title="Example 2: Specific columns"} 

193 >>> print(get_columns(df, ["a", "b", "c"]).columns) 

194 ``` 

195 <div class="result" markdown> 

196 ```{.sh .shell title="Terminal"} 

197 ["a", "b", "c"] 

198 ``` 

199 !!! success "Conclusion: Success." 

200 </div> 

201 

202 ```{.py .python linenums="1" title="Example 3: Single column as list"} 

203 >>> print(get_columns(df, ["a"]).columns) 

204 ``` 

205 <div class="result" markdown> 

206 ```{.sh .shell title="Terminal"} 

207 ["a"] 

208 ``` 

209 !!! success "Conclusion: Success." 

210 </div> 

211 

212 ```{.py .python linenums="1" title="Example 4: Single column as string"} 

213 >>> print(get_columns(df, "a").columns) 

214 ``` 

215 <div class="result" markdown> 

216 ```{.sh .shell title="Terminal"} 

217 ["a"] 

218 ``` 

219 !!! success "Conclusion: Success." 

220 </div> 

221 

222 ```{.py .python linenums="1" title="Example 5: All columns"} 

223 >>> print(get_columns(df, "all").columns) 

224 ``` 

225 <div class="result" markdown> 

226 ```{.sh .shell title="Terminal"} 

227 ["a", "b", "c", "d", "e", "f", "g", "h"] 

228 ``` 

229 !!! success "Conclusion: Success." 

230 </div> 

231 

232 ```{.py .python linenums="1" title="Example 6: All str"} 

233 >>> print(get_columns(df, "all_str").columns) 

234 ``` 

235 <div class="result" markdown> 

236 ```{.sh .shell title="Terminal"} 

237 ["b", "d"] 

238 ``` 

239 !!! success "Conclusion: Success." 

240 </div> 

241 

242 ```{.py .python linenums="1" title="Example 7: All int"} 

243 >>> print(get_columns(df, "all int").columns) 

244 ``` 

245 <div class="result" markdown> 

246 ```{.sh .shell title="Terminal"} 

247 ["c"] 

248 ``` 

249 !!! success "Conclusion: Success." 

250 </div> 

251 

252 ```{.py .python linenums="1" title="Example 8: All float"} 

253 >>> print(get_columns(df, "all_decimal").columns) 

254 ``` 

255 <div class="result" markdown> 

256 ```{.sh .shell title="Terminal"} 

257 ["e", "f"] 

258 ``` 

259 !!! success "Conclusion: Success." 

260 </div> 

261 

262 ```{.py .python linenums="1" title="Example 9: All numeric"} 

263 >>> print(get_columns(df, "all_numeric").columns) 

264 ``` 

265 <div class="result" markdown> 

266 ```{.sh .shell title="Terminal"} 

267 ["c", "e", "f"] 

268 ``` 

269 !!! success "Conclusion: Success." 

270 </div> 

271 

272 ```{.py .python linenums="1" title="Example 10: All date"} 

273 >>> print(get_columns(df, "all_date").columns) 

274 ``` 

275 <div class="result" markdown> 

276 ```{.sh .shell title="Terminal"} 

277 ["g"] 

278 ``` 

279 !!! success "Conclusion: Success." 

280 </div> 

281 

282 ```{.py .python linenums="1" title="Example 11: All datetime"} 

283 >>> print(get_columns(df, "all_datetime").columns) 

284 ``` 

285 <div class="result" markdown> 

286 ```{.sh .shell title="Terminal"} 

287 ["h"] 

288 ``` 

289 !!! success "Conclusion: Success." 

290 </div> 

291 """ 

292 if columns is None: 

293 return dataframe.columns 

294 elif is_type(columns, str): 

295 if "all" in columns: 

296 if "str" in columns: 

297 return [col for col, typ in dataframe.dtypes if typ in ("str", "string")] 

298 elif "int" in columns: 

299 return [col for col, typ in dataframe.dtypes if typ in ("int", "integer")] 

300 elif "numeric" in columns: 

301 return [ 

302 col 

303 for col, typ in dataframe.dtypes 

304 if typ in ("int", "integer", "float", "double", "long") or "decimal" in typ 

305 ] 

306 elif "float" in columns or "double" in columns or "decimal" in columns: 

307 return [ 

308 col 

309 for col, typ in dataframe.dtypes 

310 if typ in ("float", "double", "long") or "decimal" in typ 

311 ] 

312 elif "datetime" in columns or "timestamp" in columns: 

313 return [ 

314 col for col, typ in dataframe.dtypes if typ in ("datetime", "timestamp") 

315 ] 

316 elif "date" in columns: 

317 return [col for col, typ in dataframe.dtypes if typ in ["date"]] 

318 else: 

319 return dataframe.columns 

320 else: 

321 return [columns] 

322 else: 

323 return list(columns) 

324 

325 

326@typechecked 

327def get_columns_by_likeness( 

328 dataframe: psDataFrame, 

329 starts_with: Optional[str] = None, 

330 contains: Optional[str] = None, 

331 ends_with: Optional[str] = None, 

332 match_case: bool = False, 

333 operator: Literal["and", "or", "and not", "or not"] = "and", 

334) -> str_list: 

335 """ 

336 !!! note "Summary" 

337 Extract the column names from a given `dataframe` based on text that the column name contains. 

338 

339 ???+ abstract "Details" 

340 You can use any combination of `startswith`, `contains`, and `endswith`. Under the hood, these will be implemented with a number of internal `#!py lambda` functions to determine matches. 

341 

342 The `operator` parameter determines how the conditions (`starts_with`, `contains`, `ends_with`) are combined: 

343 

344 | Value | Description | 

345 |-------|-------------| 

346 | `"and"` | All conditions must be true. 

347 | `"or"` | At least one condition must be true. 

348 | `"and not"` | The first condition must be true and the second condition must be false. 

349 | `"or not"` | At least one condition must be true, but not all. 

350 

351 Params: 

352 dataframe (psDataFrame): 

353 The `dataframe` from which to extract the column names. 

354 starts_with (Optional[str], optional): 

355 Extract any columns that starts with this `#!py str`.<br> 

356 Determined by using the `#!py str.startswith()` method.<br> 

357 Defaults to `#!py None`. 

358 contains (Optional[str], optional): 

359 Extract any columns that contains this `#!py str` anywhere within it.<br> 

360 Determined by using the `#!py in` keyword.<br> 

361 Defaults to `#!py None`. 

362 ends_with (Optional[str], optional): 

363 Extract any columns that ends with this `#!py str`.<br> 

364 Determined by using the `#!py str.endswith()` method.<br> 

365 Defaults to `#!py None`. 

366 match_case (bool, optional): 

367 If you want to ensure an exact match for the columns, set this to `#!py True`, else if you want to match the exact case for the columns, set this to `#!py False`.<br> 

368 Defaults to `#!py False`. 

369 operator (Literal["and", "or", "and not", "or not"], optional): 

370 The logical operator to place between the functions.<br> 

371 Only used when there are multiple values parsed to the parameters: `#!py starts_with`, `#!py contains`: `#!py ends_with`.<br> 

372 Defaults to `#!py and`. 

373 

374 Returns: 

375 (str_list): 

376 The list of columns which match the criteria specified. 

377 

378 ???+ example "Examples" 

379 

380 ```{.py .python linenums="1" title="Set up"} 

381 >>> # Imports 

382 >>> import pandas as pd 

383 >>> from pyspark.sql import SparkSession 

384 >>> from toolbox_pyspark.columns import get_columns_by_likeness 

385 >>> 

386 >>> # Instantiate Spark 

387 >>> spark = SparkSession.builder.getOrCreate() 

388 >>> 

389 >>> # Create data 

390 >>> values = list(range(1, 6)) 

391 >>> df = spark.createDataFrame( 

392 ... pd.DataFrame( 

393 ... { 

394 ... "aaa": values, 

395 ... "aab": values, 

396 ... "aac": values, 

397 ... "afa": values, 

398 ... "afb": values, 

399 ... "afc": values, 

400 ... "bac": values, 

401 ... } 

402 ... ) 

403 ... ) 

404 >>> 

405 >>> # Check 

406 >>> df.show() 

407 ``` 

408 <div class="result" markdown> 

409 ```{.txt .text title="Terminal"} 

410 +-----+-----+-----+-----+-----+-----+-----+ 

411 | aaa | aab | aac | afa | afb | afc | bac | 

412 +-----+-----+-----+-----+-----+-----+-----+ 

413 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 

414 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 

415 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 

416 | 4 | 4 | 4 | 4 | 4 | 4 | 4 | 

417 | 5 | 5 | 5 | 5 | 5 | 5 | 5 | 

418 +-----+-----+-----+-----+-----+-----+-----+ 

419 ``` 

420 </div> 

421 

422 ```{.py .python linenums="1" title="Example 1: Starts With"} 

423 >>> print(get_columns_by_likeness(df, starts_with="a")) 

424 ``` 

425 <div class="result" markdown> 

426 ```{.sh .shell title="Terminal"} 

427 ["aaa", "aab", "aac", "afa", "afb", "afc"] 

428 ``` 

429 !!! success "Conclusion: Success." 

430 </div> 

431 

432 ```{.py .python linenums="1" title="Example 2: Contains"} 

433 >>> print(get_columns_by_likeness(df, contains="f")) 

434 ``` 

435 <div class="result" markdown> 

436 ```{.sh .shell title="Terminal"} 

437 ["afa", "afb", "afc"] 

438 ``` 

439 !!! success "Conclusion: Success." 

440 </div> 

441 

442 ```{.py .python linenums="1" title="Example 3: Ends With"} 

443 >>> print(get_columns_by_likeness(df, ends_with="c")) 

444 ``` 

445 <div class="result" markdown> 

446 ```{.sh .shell title="Terminal"} 

447 ["aac", "afc", "bac"] 

448 ``` 

449 !!! success "Conclusion: Success." 

450 </div> 

451 

452 ```{.py .python linenums="1" title="Example 4: Starts With and Contains"} 

453 >>> print(get_columns_by_likeness(df, starts_with="a", contains="c")) 

454 ``` 

455 <div class="result" markdown> 

456 ```{.sh .shell title="Terminal"} 

457 ["aac", "afc"] 

458 ``` 

459 !!! success "Conclusion: Success." 

460 </div> 

461 

462 ```{.py .python linenums="1" title="Example 5: Starts With and Ends With"} 

463 >>> print(get_columns_by_likeness(df, starts_with="a", ends_with="b")) 

464 ``` 

465 <div class="result" markdown> 

466 ```{.sh .shell title="Terminal"} 

467 ["aab", "afb"] 

468 ``` 

469 !!! success "Conclusion: Success." 

470 </div> 

471 

472 ```{.py .python linenums="1" title="Example 6: Contains and Ends With"} 

473 >>> print(get_columns_by_likeness(df, contains="f", ends_with="b")) 

474 ``` 

475 <div class="result" markdown> 

476 ```{.sh .shell title="Terminal"} 

477 ["afb"] 

478 ``` 

479 !!! success "Conclusion: Success." 

480 </div> 

481 

482 ```{.py .python linenums="1" title="Example 7: Starts With and Contains and Ends With"} 

483 >>> print(get_columns_by_likeness(df, starts_with="a", contains="f", ends_with="b")) 

484 ``` 

485 <div class="result" markdown> 

486 ```{.sh .shell title="Terminal"} 

487 ["afb"] 

488 ``` 

489 !!! success "Conclusion: Success." 

490 </div> 

491 

492 ```{.py .python linenums="1" title="Example 8: Using 'or' Operator"} 

493 >>> print(get_columns_by_likeness(df, starts_with="a", operator="or", contains="f")) 

494 ``` 

495 <div class="result" markdown> 

496 ```{.sh .shell title="Terminal"} 

497 ["aaa", "aab", "aac", "afa", "afb", "afc"] 

498 ``` 

499 !!! success "Conclusion: Success." 

500 </div> 

501 

502 ```{.py .python linenums="1" title="Example 9: Using 'and not' Operator"} 

503 >>> print(get_columns_by_likeness(df, starts_with="a", operator="and not", contains="f")) 

504 ``` 

505 <div class="result" markdown> 

506 ```{.sh .shell title="Terminal"} 

507 ["aaa", "aab", "aac"] 

508 ``` 

509 !!! success "Conclusion: Success." 

510 </div> 

511 

512 ```{.py .python linenums="1" title="Example 10: Error Example 1"} 

513 >>> print(get_columns_by_likeness(df, starts_with=123)) 

514 ``` 

515 <div class="result" markdown> 

516 ```{.sh .shell title="Terminal"} 

517 TypeError: `starts_with` must be a `string` or `None`. 

518 ``` 

519 !!! failure "Conclusion: Error." 

520 </div> 

521 

522 ```{.py .python linenums="1" title="Example 11: Error Example 2"} 

523 >>> print(get_columns_by_likeness(df, operator="xor")) 

524 ``` 

525 <div class="result" markdown> 

526 ```{.sh .shell title="Terminal"} 

527 ValueError: `operator` must be one of 'and', 'or', 'and not', 'or not' 

528 ``` 

529 !!! failure "Conclusion: Error." 

530 </div> 

531 """ 

532 

533 # Columns 

534 cols: str_list = dataframe.columns 

535 if not match_case: 

536 cols = [col.upper() for col in cols] 

537 starts_with = starts_with.upper() if starts_with is not None else None 

538 contains = contains.upper() if contains is not None else None 

539 ends_with = ends_with.upper() if ends_with is not None else None 

540 

541 # Parameters 

542 o_: Literal["and", "or", "and not", "or not"] = operator 

543 s_: bool = starts_with is not None 

544 c_: bool = contains is not None 

545 e_: bool = ends_with is not None 

546 

547 # Functions 

548 _ops = { 

549 "and": lambda x, y: x and y, 

550 "or": lambda x, y: x or y, 

551 "and not": lambda x, y: x and not y, 

552 "or not": lambda x, y: x or not y, 

553 } 

554 _s = lambda col, s: col.startswith(s) 

555 _c = lambda col, c: c in col 

556 _e = lambda col, e: col.endswith(e) 

557 _sc = lambda col, s, c: _ops[o_](_s(col, s), _c(col, c)) 

558 _se = lambda col, s, e: _ops[o_](_s(col, s), _e(col, e)) 

559 _ce = lambda col, c, e: _ops[o_](_c(col, c), _e(col, e)) 

560 _sce = lambda col, s, c, e: _ops[o_](_ops[o_](_s(col, s), _c(col, c)), _e(col, e)) 

561 

562 # Logic 

563 if s_ and not c_ and not e_: 

564 return [col for col in cols if _s(col, starts_with)] 

565 elif c_ and not s_ and not e_: 

566 return [col for col in cols if _c(col, contains)] 

567 elif e_ and not s_ and not c_: 

568 return [col for col in cols if _e(col, ends_with)] 

569 elif s_ and c_ and not e_: 

570 return [col for col in cols if _sc(col, starts_with, contains)] 

571 elif s_ and e_ and not c_: 

572 return [col for col in cols if _se(col, starts_with, ends_with)] 

573 elif c_ and e_ and not s_: 

574 return [col for col in cols if _ce(col, contains, ends_with)] 

575 elif s_ and c_ and e_: 

576 return [col for col in cols if _sce(col, starts_with, contains, ends_with)] 

577 else: 

578 return cols 

579 

580 

581# ---------------------------------------------------------------------------- # 

582# Renaming #### 

583# ---------------------------------------------------------------------------- # 

584 

585 

586@typechecked 

587def rename_columns( 

588 dataframe: psDataFrame, 

589 columns: Optional[Union[str, str_collection]] = None, 

590 string_function: str = "upper", 

591) -> psDataFrame: 

592 """ 

593 !!! note "Summary" 

594 Use one of the common Python string functions to be applied to one or multiple columns. 

595 

596 ???+ abstract "Details" 

597 The `string_function` must be a valid string method. For more info on available functions, see: https://docs.python.org/3/library/stdtypes.html#string-methods 

598 

599 Params: 

600 dataframe (psDataFrame): 

601 The DataFrame to be updated. 

602 columns (Optional[Union[str, str_collection]], optional): 

603 The columns to be updated.<br> 

604 Must be a valid column on `dataframe`.<br> 

605 If not provided, will be applied to all columns.<br> 

606 It is also possible to parse the values `"all"`, which will also apply this function to all columns in `dataframe`.<br> 

607 Defaults to `None`. 

608 string_function (str, optional): 

609 The string function to be applied. Defaults to `"upper"`. 

610 

611 Returns: 

612 (psDataFrame): 

613 The updated DataFrame. 

614 

615 ???+ example "Examples" 

616 

617 ```{.py .python linenums="1" title="Set up"} 

618 >>> # Import 

619 >>> import pandas as pd 

620 >>> from pyspark.sql import SparkSession 

621 >>> from toolbox_pyspark.columns import rename_columns 

622 >>> 

623 >>> # Instantiate Spark 

624 >>> spark = SparkSession.builder.getOrCreate() 

625 >>> 

626 >>> # Create data 

627 >>> df = spark.createDataFrame( 

628 ... pd.DataFrame( 

629 ... { 

630 ... "a": [0, 1, 2, 3], 

631 ... "b": ["a", "b", "c", "d"], 

632 ... "c": ["c", "c", "c", "c"], 

633 ... "d": ["d", "d", "d", "d"], 

634 ... } 

635 ... ) 

636 ... ) 

637 >>> 

638 >>> # Check 

639 >>> df.show() 

640 ``` 

641 <div class="result" markdown> 

642 ```{.txt .text title="Terminal"} 

643 +---+---+---+---+ 

644 | a | b | c | d | 

645 +---+---+---+---+ 

646 | 0 | a | c | d | 

647 | 1 | b | c | d | 

648 | 2 | c | c | d | 

649 | 3 | d | c | d | 

650 +---+---+---+---+ 

651 ``` 

652 </div> 

653 

654 ```{.py .python linenums="1" title="Example 1: Single column, default params"} 

655 >>> print(rename_columns(df, "a").columns) 

656 ``` 

657 <div class="result" markdown> 

658 ```{.sh .shell title="Terminal"} 

659 ["A", "b", "c", "d"] 

660 ``` 

661 !!! success "Conclusion: Success." 

662 </div> 

663 

664 ```{.py .python linenums="1" title="Example 2: Single column, simple function"} 

665 >>> print(rename_columns(df, "a", "upper").columns) 

666 ``` 

667 <div class="result" markdown> 

668 ```{.sh .shell title="Terminal"} 

669 ["A", "b", "c", "d"] 

670 ``` 

671 !!! success "Conclusion: Success." 

672 </div> 

673 

674 ```{.py .python linenums="1" title="Example 3: Single column, complex function"} 

675 >>> print(rename_columns(df, "a", "replace('b', 'test')").columns) 

676 ``` 

677 <div class="result" markdown> 

678 ```{.sh .shell title="Terminal"} 

679 ["a", "test", "c", "d"] 

680 ``` 

681 !!! success "Conclusion: Success." 

682 </div> 

683 

684 ```{.py .python linenums="1" title="Example 4: Multiple columns"} 

685 >>> print(rename_columns(df, ["a", "b"]).columns) 

686 ``` 

687 <div class="result" markdown> 

688 ```{.sh .shell title="Terminal"} 

689 ["A", "B", "c", "d"] 

690 ``` 

691 !!! success "Conclusion: Success." 

692 </div> 

693 

694 ```{.py .python linenums="1" title="Example 5: Default function over all columns"} 

695 >>> print(rename_columns(df).columns) 

696 ``` 

697 <div class="result" markdown> 

698 ```{.sh .shell title="Terminal"} 

699 ["A", "B", "C", "D"] 

700 ``` 

701 !!! success "Conclusion: Success." 

702 </div> 

703 

704 ```{.py .python linenums="1" title="Example 6: Complex function over multiple columns"} 

705 >>> print(rename_columns(df, ["a", "b"], "replace('b', 'test')").columns) 

706 ``` 

707 <div class="result" markdown> 

708 ```{.sh .shell title="Terminal"} 

709 ["a", "test", "c", "d"] 

710 ``` 

711 !!! success "Conclusion: Success." 

712 </div> 

713 

714 ??? tip "See Also" 

715 - [`assert_columns_exists()`][toolbox_pyspark.checks.assert_columns_exists] 

716 - [`assert_column_exists()`][toolbox_pyspark.checks.assert_column_exists] 

717 """ 

718 columns = get_columns(dataframe, columns) 

719 assert_columns_exists(dataframe=dataframe, columns=columns, match_case=True) 

720 cols_exprs: dict[str, str] = { 

721 col: eval( 

722 f"'{col}'.{string_function}{'()' if not string_function.endswith(')') else ''}" 

723 ) 

724 for col in columns 

725 } 

726 return dataframe.withColumnsRenamed(cols_exprs) 

727 

728 

729# ---------------------------------------------------------------------------- # 

730# Reordering #### 

731# ---------------------------------------------------------------------------- # 

732 

733 

734@typechecked 

735def reorder_columns( 

736 dataframe: psDataFrame, 

737 new_order: Optional[str_collection] = None, 

738 missing_columns_last: bool = True, 

739 key_columns_position: Optional[Literal["first", "last"]] = "first", 

740) -> psDataFrame: 

741 """ 

742 !!! note "Summary" 

743 Reorder the columns in a given DataFrame in to a custom order, or to put the `key_` columns at the end (that is, to the far right) of the dataframe. 

744 

745 ???+ abstract "Details" 

746 The decision flow chart is as follows: 

747 

748 ```mermaid 

749 graph TD 

750 a([begin]) 

751 z([end]) 

752 b{{new_order}} 

753 c{{missing_cols_last}} 

754 d{{key_cols_position}} 

755 g[cols = dataframe.columns] 

756 h[cols = new_order] 

757 i[cols += missing_cols] 

758 j[cols = non_key_cols + key_cols] 

759 k[cols = key_cols + non_key_cols] 

760 l["return dataframe.select(cols)"] 

761 a --> b 

762 b --is not None--> h --> c 

763 b --is None--> g --> d 

764 c --False--> l 

765 c --True--> i ----> l 

766 d --"first"--> k ---> l 

767 d --"last"---> j --> l 

768 d --None--> l 

769 l --> z 

770 ``` 

771 

772 Params: 

773 dataframe (psDataFrame): 

774 The DataFrame to update 

775 new_order (Optional[Union[str, str_list, str_tuple, str_set]], optional): 

776 The custom order for the columns on the order.<br> 

777 Defaults to `#!py None`. 

778 missing_columns_last (bool, optional): 

779 For any columns existing on `#!py dataframes.columns`, but missing from `#!py new_order`, if `#!py missing_columns_last=True`, then include those missing columns to the right of the dataframe, in the same order that they originally appear.<br> 

780 Defaults to `#!py True`. 

781 key_columns_position (Optional[Literal["first", "last"]], optional): 

782 Where should the `#!py "key_*"` columns be located?.<br> 

783 

784 - If `#!py "first"`, then they will be relocated to the start of the dataframe, before all other columns. 

785 - If `#!py "last"`, then they will be relocated to the end of the dataframe, after all other columns. 

786 - If `#!py None`, they they will remain their original order. 

787 

788 Regardless of their position, their original order will be maintained. 

789 Defaults to `#!py "first"`. 

790 

791 Raises: 

792 TypeError: 

793 If any of the inputs parsed to the parameters of this function are not the correct type. Uses the [`@typeguard.typechecked`](https://typeguard.readthedocs.io/en/stable/api.html#typeguard.typechecked) decorator. 

794 

795 Returns: 

796 (psDataFrame): 

797 The updated DataFrame. 

798 

799 ???+ example "Examples" 

800 

801 ```{.py .python linenums="1" title="Set up"} 

802 >>> # Imports 

803 >>> import pandas as pd 

804 >>> from pyspark.sql import SparkSession 

805 >>> from toolbox_pyspark.columns import reorder_columns 

806 >>> 

807 >>> # Instantiate Spark 

808 >>> spark = SparkSession.builder.getOrCreate() 

809 >>> 

810 >>> # Create data 

811 >>> df = spark.createDataFrame( 

812 ... pd.DataFrame( 

813 ... { 

814 ... "a": [0, 1, 2, 3], 

815 ... "b": ["a", "b", "c", "d"], 

816 ... "key_a": ["0", "1", "2", "3"], 

817 ... "c": ["1", "1", "1", "1"], 

818 ... "d": ["2", "2", "2", "2"], 

819 ... "key_c": ["1", "1", "1", "1"], 

820 ... "key_e": ["3", "3", "3", "3"], 

821 ... } 

822 ... ) 

823 ... ) 

824 >>> 

825 >>> # Check 

826 >>> df.show() 

827 ``` 

828 <div class="result" markdown> 

829 ```{.txt .text title="Terminal"} 

830 +---+---+-------+---+---+-------+-------+ 

831 | a | b | key_a | c | d | key_c | key_e | 

832 +---+---+-------+---+---+-------+-------+ 

833 | 0 | a | 0 | 1 | 2 | 1 | 3 | 

834 | 1 | b | 1 | 1 | 2 | 1 | 3 | 

835 | 2 | c | 2 | 1 | 2 | 1 | 3 | 

836 | 3 | d | 3 | 1 | 2 | 1 | 3 | 

837 +---+---+-------+---+---+-------+-------+ 

838 ``` 

839 </div> 

840 

841 ```{.py .python linenums="1" title="Example 1: Default config"} 

842 >>> new_df = reorder_columns(dataframe=df) 

843 >>> new_df.show() 

844 ``` 

845 <div class="result" markdown> 

846 ```{.txt .text title="Terminal"} 

847 +-------+-------+-------+---+---+---+---+ 

848 | key_a | key_c | key_e | a | b | c | d | 

849 +-------+-------+-------+---+---+---+---+ 

850 | 0 | 1 | 3 | 0 | a | 1 | 2 | 

851 | 1 | 1 | 3 | 1 | b | 1 | 2 | 

852 | 2 | 1 | 3 | 2 | c | 1 | 2 | 

853 | 3 | 1 | 3 | 3 | d | 1 | 2 | 

854 +-------+-------+-------+---+---+---+---+ 

855 ``` 

856 !!! success "Conclusion: Success." 

857 </div> 

858 

859 ```{.py .python linenums="1" title="Example 2: Custom order"} 

860 >>> new_df = reorder_columns( 

861 ... dataframe=df, 

862 ... new_order=["key_a", "key_c", "b", "key_e", "a", "c", "d"], 

863 ... ) 

864 >>> new_df.show() 

865 ``` 

866 <div class="result" markdown> 

867 ```{.txt .text title="Terminal"} 

868 +-------+-------+---+-------+---+---+---+ 

869 | key_a | key_c | b | key_e | a | c | d | 

870 +-------+-------+---+-------+---+---+---+ 

871 | 0 | 1 | a | 3 | 0 | 1 | 2 | 

872 | 1 | 1 | b | 3 | 1 | 1 | 2 | 

873 | 2 | 1 | c | 3 | 2 | 1 | 2 | 

874 | 3 | 1 | d | 3 | 3 | 1 | 2 | 

875 +-------+-------+---+-------+---+---+---+ 

876 ``` 

877 !!! success "Conclusion: Success." 

878 </div> 

879 

880 ```{.py .python linenums="1" title="Example 3: Custom order, include missing columns"} 

881 >>> new_df = reorder_columns( 

882 ... dataframe=df, 

883 ... new_order=["key_a", "key_c", "a", "b"], 

884 ... missing_columns_last=True, 

885 ... ) 

886 >>> new_df.show() 

887 ``` 

888 <div class="result" markdown> 

889 ```{.txt .text title="Terminal"} 

890 +-------+-------+---+---+-------+---+---+ 

891 | key_a | key_c | a | b | key_e | c | d | 

892 +-------+-------+---+---+-------+---+---+ 

893 | 0 | 1 | 0 | a | 3 | 1 | 2 | 

894 | 1 | 1 | 1 | b | 3 | 1 | 2 | 

895 | 2 | 1 | 2 | c | 3 | 1 | 2 | 

896 | 3 | 1 | 3 | d | 3 | 1 | 2 | 

897 +-------+-------+---+---+-------+---+---+ 

898 ``` 

899 !!! success "Conclusion: Success." 

900 </div> 

901 

902 ```{.py .python linenums="1" title="Example 4: Custom order, exclude missing columns"} 

903 >>> new_df = reorder_columns( 

904 ... dataframe=df, 

905 ... new_order=["key_a", "key_c", "a", "b"], 

906 ... missing_columns_last=False, 

907 ... ) 

908 >>> new_df.show() 

909 ``` 

910 <div class="result" markdown> 

911 ```{.txt .text title="Terminal"} 

912 +-------+-------+---+---+ 

913 | key_a | key_c | a | b | 

914 +-------+-------+---+---+ 

915 | 0 | 1 | 0 | a | 

916 | 1 | 1 | 1 | b | 

917 | 2 | 1 | 2 | c | 

918 | 3 | 1 | 3 | d | 

919 +-------+-------+---+---+ 

920 ``` 

921 !!! success "Conclusion: Success." 

922 </div> 

923 

924 ```{.py .python linenums="1" title="Example 5: Keys last"} 

925 >>> new_df = reorder_columns( 

926 ... dataframe=df, 

927 ... key_columns_position="last", 

928 ... ) 

929 >>> new_df.show() 

930 ``` 

931 <div class="result" markdown> 

932 ```{.txt .text title="Terminal"} 

933 +---+---+---+---+-------+-------+-------+ 

934 | a | b | c | d | key_a | key_c | key_e | 

935 +---+---+---+---+-------+-------+-------+ 

936 | 0 | a | 1 | 2 | 0 | 1 | 3 | 

937 | 1 | b | 1 | 2 | 1 | 1 | 3 | 

938 | 2 | c | 1 | 2 | 2 | 1 | 3 | 

939 | 3 | d | 1 | 2 | 3 | 1 | 3 | 

940 +---+---+---+---+-------+-------+-------+ 

941 ``` 

942 !!! success "Conclusion: Success." 

943 </div> 

944 

945 ```{.py .python linenums="1" title="Example 6: Keys first"} 

946 >>> new_df = reorder_columns( 

947 ... dataframe=df, 

948 ... key_columns_position="first", 

949 ... ) 

950 >>> new_df.show() 

951 ``` 

952 <div class="result" markdown> 

953 ```{.txt .text title="Terminal"} 

954 +-------+-------+-------+---+---+---+---+ 

955 | key_a | key_c | key_e | a | b | c | d | 

956 +-------+-------+-------+---+---+---+---+ 

957 | 0 | 1 | 3 | 0 | a | 1 | 2 | 

958 | 1 | 1 | 3 | 1 | b | 1 | 2 | 

959 | 2 | 1 | 3 | 2 | c | 1 | 2 | 

960 | 3 | 1 | 3 | 3 | d | 1 | 2 | 

961 +-------+-------+-------+---+---+---+---+ 

962 ``` 

963 !!! success "Conclusion: Success." 

964 </div> 

965 """ 

966 df_cols: str_list = dataframe.columns 

967 if new_order is not None: 

968 cols: str_list = get_columns(dataframe, new_order) 

969 if missing_columns_last: 

970 cols += [col for col in df_cols if col not in new_order] 

971 else: 

972 non_key_cols: str_list = [col for col in df_cols if not col.lower().startswith("key_")] 

973 key_cols: str_list = [col for col in df_cols if col.lower().startswith("key_")] 

974 if key_columns_position == "first": 

975 cols = key_cols + non_key_cols 

976 elif key_columns_position == "last": 

977 cols = non_key_cols + key_cols 

978 else: 

979 cols = df_cols 

980 return dataframe.select(cols) 

981 

982 

983# ---------------------------------------------------------------------------- # 

984# Deleting #### 

985# ---------------------------------------------------------------------------- # 

986 

987 

988@typechecked 

989def delete_columns( 

990 dataframe: psDataFrame, 

991 columns: Union[str, str_collection], 

992 missing_column_handler: Literal["raise", "warn", "pass"] = "pass", 

993) -> psDataFrame: 

994 """ 

995 !!! note "Summary" 

996 For a given `#!py dataframe`, delete the columns listed in `columns`. 

997 

998 ???+ abstract "Details" 

999 You can use `#!py missing_columns_handler` to specify how to handle missing columns. 

1000 

1001 Params: 

1002 dataframe (psDataFrame): 

1003 The dataframe from which to delete the columns 

1004 columns (Union[str, str_collection]): 

1005 The list of columns to delete. 

1006 missing_column_handler (Literal["raise", "warn", "pass"], optional): 

1007 How to handle any columns which are missing from `#!py dataframe.columns`. 

1008 

1009 If _any_ columns in `columns` are missing from `#!py dataframe.columns`, then the following will happen for each option: 

1010 

1011 | Option | Result | 

1012 |--------|--------| 

1013 | `#!py "raise"` | An `#!py ColumnDoesNotExistError` exception will be raised 

1014 | `#!py "warn"` | An `#!py ColumnDoesNotExistWarning` warning will be raised 

1015 | `#!py "pass"` | Nothing will be raised 

1016 

1017 Defaults to `#!py "pass"`. 

1018 

1019 Raises: 

1020 TypeError: 

1021 If any of the inputs parsed to the parameters of this function are not the correct type. Uses the [`@typeguard.typechecked`](https://typeguard.readthedocs.io/en/stable/api.html#typeguard.typechecked) decorator. 

1022 ColumnDoesNotExistError: 

1023 If any of the `#!py columns` do not exist within `#!py dataframe.columns`. 

1024 

1025 Returns: 

1026 (psDataFrame): 

1027 The updated `#!py dataframe`, with the columns listed in `#!py columns` having been removed. 

1028 

1029 ???+ example "Examples" 

1030 

1031 ```{.py .python linenums="1" title="Set up"} 

1032 >>> # Imports 

1033 >>> import pandas as pd 

1034 >>> from pyspark.sql import SparkSession 

1035 >>> from toolbox_pyspark.columns import delete_columns 

1036 >>> 

1037 >>> # Instantiate Spark 

1038 >>> spark = SparkSession.builder.getOrCreate() 

1039 >>> 

1040 >>> # Create data 

1041 >>> df = spark.createDataFrame( 

1042 ... pd.DataFrame( 

1043 ... { 

1044 ... "a": [0, 1, 2, 3], 

1045 ... "b": ["a", "b", "c", "d"], 

1046 ... "c": ["c", "c", "c", "c"], 

1047 ... "d": ["d", "d", "d", "d"], 

1048 ... } 

1049 ... ) 

1050 ... ) 

1051 >>> 

1052 >>> # Check 

1053 >>> df.show() 

1054 ``` 

1055 <div class="result" markdown> 

1056 ```{.txt .text title="Terminal"} 

1057 +---+---+---+---+ 

1058 | a | b | c | d | 

1059 +---+---+---+---+ 

1060 | 0 | a | c | d | 

1061 | 1 | b | c | d | 

1062 | 2 | c | c | d | 

1063 | 3 | d | c | d | 

1064 +---+---+---+---+ 

1065 ``` 

1066 </div> 

1067 

1068 ```{.py .python linenums="1" title="Example 1: Single column"} 

1069 >>> df.transform(delete_columns, "a").show() 

1070 ``` 

1071 <div class="result" markdown> 

1072 ```{.txt .text title="Terminal"} 

1073 +---+---+---+ 

1074 | b | c | d | 

1075 +---+---+---+ 

1076 | a | c | d | 

1077 | b | c | d | 

1078 | c | c | d | 

1079 | d | c | d | 

1080 +---+---+---+ 

1081 ``` 

1082 !!! success "Conclusion: Success." 

1083 </div> 

1084 

1085 ```{.py .python linenums="1" title="Example 2: Multiple columns"} 

1086 >>> df.transform(delete_columns, ["a", "b"]).show() 

1087 ``` 

1088 <div class="result" markdown> 

1089 ```{.txt .text title="Terminal"} 

1090 +---+---+ 

1091 | c | d | 

1092 +---+---+ 

1093 | c | d | 

1094 | c | d | 

1095 | c | d | 

1096 | c | d | 

1097 +---+---+ 

1098 ``` 

1099 !!! success "Conclusion: Success." 

1100 </div> 

1101 

1102 ```{.py .python linenums="1" title="Example 3: Single column missing, raises error"} 

1103 >>> ( 

1104 ... df.transform( 

1105 ... delete_columns, 

1106 ... columns="z", 

1107 ... missing_column_handler="raise", 

1108 ... ) 

1109 ... .show() 

1110 ... ) 

1111 ``` 

1112 <div class="result" markdown> 

1113 ```{.txt .text title="Terminal"} 

1114 ColumnDoesNotExistError: Columns ["z"] do not exist in "dataframe". 

1115 Try one of: ["a", "b", "c", "d"] 

1116 ``` 

1117 !!! success "Conclusion: Success." 

1118 </div> 

1119 

1120 ```{.py .python linenums="1" title="Example 4: Multiple columns, one missing, raises error"} 

1121 >>> ( 

1122 ... df.transform( 

1123 ... delete_columns, 

1124 ... columns=["a", "b", "z"], 

1125 ... missing_column_handler="raise", 

1126 ... ) 

1127 ... .show() 

1128 ... ) 

1129 ``` 

1130 <div class="result" markdown> 

1131 ```{.txt .text title="Terminal"} 

1132 ColumnDoesNotExistError: Columns ["z"] do not exist in "dataframe". 

1133 Try one of: ["a", "b", "c", "d"] 

1134 ``` 

1135 !!! success "Conclusion: Success." 

1136 </div> 

1137 

1138 ```{.py .python linenums="1" title="Example 5: Multiple columns, all missing, raises error"} 

1139 >>> ( 

1140 ... df.transform( 

1141 ... delete_columns, 

1142 ... columns=["x", "y", "z"], 

1143 ... missing_column_handler="raise", 

1144 ... ) 

1145 ... .show() 

1146 ... ) 

1147 ``` 

1148 <div class="result" markdown> 

1149 ```{.txt .text title="Terminal"} 

1150 ColumnDoesNotExistError: Columns ["x", "y", "z"] do not exist in "dataframe". 

1151 Try one of: ["a", "b", "c", "d"] 

1152 ``` 

1153 !!! success "Conclusion: Success." 

1154 </div> 

1155 

1156 ```{.py .python linenums="1" title="Example 6: Single column missing, raises warning"} 

1157 >>> ( 

1158 ... df.transform( 

1159 ... delete_columns, 

1160 ... columns="z", 

1161 ... missing_column_handler="warn", 

1162 ... ) 

1163 ... .show() 

1164 ... ) 

1165 ``` 

1166 <div class="result" markdown> 

1167 ```{.txt .text title="Terminal"} 

1168 ColumnDoesNotExistWarning: Columns missing from "dataframe": ["z"]. 

1169 Will still proceed to delete columns that do exist. 

1170 ``` 

1171 ```{.txt .text title="Terminal"} 

1172 +---+---+---+---+ 

1173 | a | b | c | d | 

1174 +---+---+---+---+ 

1175 | 0 | a | c | d | 

1176 | 1 | b | c | d | 

1177 | 2 | c | c | d | 

1178 | 3 | d | c | d | 

1179 +---+---+---+---+ 

1180 ``` 

1181 !!! success "Conclusion: Success." 

1182 </div> 

1183 

1184 ```{.py .python linenums="1" title="Example 7: Multiple columns, one missing, raises warning"} 

1185 >>> ( 

1186 ... df.transform( 

1187 ... delete_columns, 

1188 ... columns=["a", "b", "z"], 

1189 ... missing_column_handler="warn", 

1190 ... ) 

1191 ... .show() 

1192 ... ) 

1193 ``` 

1194 <div class="result" markdown> 

1195 ```{.txt .text title="Terminal"} 

1196 ColumnDoesNotExistWarning: Columns missing from "dataframe": ["z"]. 

1197 Will still proceed to delete columns that do exist. 

1198 ``` 

1199 ```{.txt .text title="Terminal"} 

1200 +---+---+ 

1201 | c | d | 

1202 +---+---+ 

1203 | c | d | 

1204 | c | d | 

1205 | c | d | 

1206 | c | d | 

1207 +---+---+ 

1208 ``` 

1209 !!! success "Conclusion: Success." 

1210 </div> 

1211 

1212 ```{.py .python linenums="1" title="Example 8: Multiple columns, all missing, raises warning"} 

1213 >>> ( 

1214 ... df.transform( 

1215 ... delete_columns, 

1216 ... columns=["x", "y", "z"], 

1217 ... missing_column_handler="warn", 

1218 ... ) 

1219 ... .show() 

1220 ... ) 

1221 ``` 

1222 <div class="result" markdown> 

1223 ```{.txt .text title="Terminal"} 

1224 ColumnDoesNotExistWarning: Columns missing from "dataframe": ["x", "y", "z"]. 

1225 Will still proceed to delete columns that do exist. 

1226 ``` 

1227 ```{.txt .text title="Terminal"} 

1228 +---+---+---+---+ 

1229 | a | b | c | d | 

1230 +---+---+---+---+ 

1231 | 0 | a | c | d | 

1232 | 1 | b | c | d | 

1233 | 2 | c | c | d | 

1234 | 3 | d | c | d | 

1235 +---+---+---+---+ 

1236 ``` 

1237 !!! success "Conclusion: Success." 

1238 </div> 

1239 

1240 ```{.py .python linenums="1" title="Example 9: Single column missing, nothing raised"} 

1241 >>> ( 

1242 ... df.transform( 

1243 ... delete_columns, 

1244 ... columns="z", 

1245 ... missing_column_handler="pass", 

1246 ... ) 

1247 ... .show() 

1248 ... ) 

1249 ``` 

1250 <div class="result" markdown> 

1251 ```{.txt .text title="Terminal"} 

1252 +---+---+---+---+ 

1253 | a | b | c | d | 

1254 +---+---+---+---+ 

1255 | 0 | a | c | d | 

1256 | 1 | b | c | d | 

1257 | 2 | c | c | d | 

1258 | 3 | d | c | d | 

1259 +---+---+---+---+ 

1260 ``` 

1261 !!! success "Conclusion: Success." 

1262 </div> 

1263 

1264 ```{.py .python linenums="1" title="Example 10: Multiple columns, one missing, nothing raised"} 

1265 >>> ( 

1266 ... df.transform( 

1267 ... delete_columns, 

1268 ... columns=["a", "b", "z"], 

1269 ... missing_column_handler="pass", 

1270 ... ) 

1271 ... .show() 

1272 ... ) 

1273 ``` 

1274 <div class="result" markdown> 

1275 ```{.txt .text title="Terminal"} 

1276 +---+---+ 

1277 | c | d | 

1278 +---+---+ 

1279 | c | d | 

1280 | c | d | 

1281 | c | d | 

1282 | c | d | 

1283 +---+---+ 

1284 ``` 

1285 !!! success "Conclusion: Success." 

1286 </div> 

1287 

1288 ```{.py .python linenums="1" title="Example 11: Multiple columns, all missing, nothing raised"} 

1289 >>> ( 

1290 ... df.transform( 

1291 ... delete_columns, 

1292 ... columns=["x", "y", "z"], 

1293 ... missing_column_handler="pass", 

1294 ... ) 

1295 ... .show() 

1296 ... ) 

1297 ``` 

1298 <div class="result" markdown> 

1299 ```{.txt .text title="Terminal"} 

1300 +---+---+---+---+ 

1301 | a | b | c | d | 

1302 +---+---+---+---+ 

1303 | 0 | a | c | d | 

1304 | 1 | b | c | d | 

1305 | 2 | c | c | d | 

1306 | 3 | d | c | d | 

1307 +---+---+---+---+ 

1308 ``` 

1309 !!! success "Conclusion: Success." 

1310 </div> 

1311 """ 

1312 columns = get_columns(dataframe, columns) 

1313 if missing_column_handler == "raise": 

1314 assert_columns_exists(dataframe=dataframe, columns=columns) 

1315 elif missing_column_handler == "warn": 

1316 warn_columns_missing(dataframe=dataframe, columns=columns) 

1317 elif missing_column_handler == "pass": 

1318 pass 

1319 return dataframe.select([col for col in dataframe.columns if col not in columns])