== Physical Plan ==
AdaptiveSparkPlan (18)
+- == Final Plan ==
CollectLimit (10)
+- * Project (9)
+- * Filter (8)
+- Window (7)
+- * Sort (6)
+- AQEShuffleRead (5)
+- ShuffleQueryStage (4), Statistics(sizeInBytes=10.9 MiB, rowCount=3.13E+4)
+- Exchange (3)
+- * Project (2)
+- Scan csv (1)
+- == Initial Plan ==
CollectLimit (17)
+- Project (16)
+- Filter (15)
+- Window (14)
+- Sort (13)
+- Exchange (12)
+- Project (11)
+- Scan csv (1)
(1) Scan csv
Output [13]: [ride_id#9482, rideable_type#9483, started_at#9484, ended_at#9485, start_station_name#9486, start_station_id#9487, end_station_name#9488, end_station_id#9489, start_lat#9490, start_lng#9491, end_lat#9492, end_lng#9493, member_casual#9494]
Batched: false
Location: InMemoryFileIndex [s3a://rzvde-g8-kirsanov-dmitry/raw/citibike_data/202502/202502-citibike-tripdata-part00.csv]
ReadSchema: struct<ride_id:string,rideable_type:string,started_at:timestamp,ended_at:timestamp,start_station_name:string,start_station_id:string,end_station_name:string,end_station_id:string,start_lat:double,start_lng:double,end_lat:double,end_lng:double,member_casual:string>
(2) Project [codegen id : 1]
Output [21]: [ride_id#9482, rideable_type#9483, started_at#9484, ended_at#9485, start_station_name#9486, cast(start_station_id#9487 as double) AS start_station_id#9589, end_station_name#9488, cast(end_station_id#9489 as double) AS end_station_id#9591, start_lat#9490, start_lng#9491, end_lat#9492, end_lng#9493, member_casual#9494, isnotnull(ride_id#9482) AS valid_ride_id#9575, (ended_at#9485 > started_at#9484) AS valid_time#9576, (((isnotnull(end_station_id#9489) AND isnotnull(start_station_id#9487)) AND NOT (end_station_id#9489 = start_station_id#9487)) <=> true) AS valid_station#9577, input_file_name() AS _source_file#9578, year(cast(started_at#9484 as date)) AS year#9582, month(cast(started_at#9484 as date)) AS month#9583, start_station_id#9487, started_at#9484]
Input [13]: [ride_id#9482, rideable_type#9483, started_at#9484, ended_at#9485, start_station_name#9486, start_station_id#9487, end_station_name#9488, end_station_id#9489, start_lat#9490, start_lng#9491, end_lat#9492, end_lng#9493, member_casual#9494]
(3) Exchange
Input [21]: [ride_id#9482, rideable_type#9483, started_at#9484, ended_at#9485, start_station_name#9486, start_station_id#9589, end_station_name#9488, end_station_id#9591, start_lat#9490, start_lng#9491, end_lat#9492, end_lng#9493, member_casual#9494, valid_ride_id#9575, valid_time#9576, valid_station#9577, _source_file#9578, year#9582, month#9583, start_station_id#9487, started_at#9484]
Arguments: hashpartitioning(start_station_id#9487, 200), ENSURE_REQUIREMENTS, [plan_id=1231]
(4) ShuffleQueryStage
Output [21]: [ride_id#9482, rideable_type#9483, started_at#9484, ended_at#9485, start_station_name#9486, start_station_id#9589, end_station_name#9488, end_station_id#9591, start_lat#9490, start_lng#9491, end_lat#9492, end_lng#9493, member_casual#9494, valid_ride_id#9575, valid_time#9576, valid_station#9577, _source_file#9578, year#9582, month#9583, start_station_id#9487, started_at#9484]
Arguments: 0
(5) AQEShuffleRead
Input [21]: [ride_id#9482, rideable_type#9483, started_at#9484, ended_at#9485, start_station_name#9486, start_station_id#9589, end_station_name#9488, end_station_id#9591, start_lat#9490, start_lng#9491, end_lat#9492, end_lng#9493, member_casual#9494, valid_ride_id#9575, valid_time#9576, valid_station#9577, _source_file#9578, year#9582, month#9583, start_station_id#9487, started_at#9484]
Arguments: coalesced
(6) Sort [codegen id : 2]
Input [21]: [ride_id#9482, rideable_type#9483, started_at#9484, ended_at#9485, start_station_name#9486, start_station_id#9589, end_station_name#9488, end_station_id#9591, start_lat#9490, start_lng#9491, end_lat#9492, end_lng#9493, member_casual#9494, valid_ride_id#9575, valid_time#9576, valid_station#9577, _source_file#9578, year#9582, month#9583, start_station_id#9487, started_at#9484]
Arguments: [start_station_id#9487 ASC NULLS FIRST, started_at#9484 ASC NULLS FIRST], false, 0
(7) Window
Input [21]: [ride_id#9482, rideable_type#9483, started_at#9484, ended_at#9485, start_station_name#9486, start_station_id#9589, end_station_name#9488, end_station_id#9591, start_lat#9490, start_lng#9491, end_lat#9492, end_lng#9493, member_casual#9494, valid_ride_id#9575, valid_time#9576, valid_station#9577, _source_file#9578, year#9582, month#9583, start_station_id#9487, started_at#9484]
Arguments: [row_number() windowspecdefinition(start_station_id#9487, started_at#9484 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS _start_station_ride_num#9581], [start_station_id#9487], [started_at#9484 ASC NULLS FIRST]
(8) Filter [codegen id : 3]
Input [22]: [ride_id#9482, rideable_type#9483, started_at#9484, ended_at#9485, start_station_name#9486, start_station_id#9589, end_station_name#9488, end_station_id#9591, start_lat#9490, start_lng#9491, end_lat#9492, end_lng#9493, member_casual#9494, valid_ride_id#9575, valid_time#9576, valid_station#9577, _source_file#9578, year#9582, month#9583, start_station_id#9487, started_at#9484, _start_station_ride_num#9581]
Condition : ((NOT valid_ride_id#9575 OR NOT valid_time#9576) OR NOT valid_station#9577)
(9) Project [codegen id : 3]
Output [21]: [toprettystring(ride_id#9482, Some(Etc/UTC)) AS toprettystring(ride_id)#9891, toprettystring(rideable_type#9483, Some(Etc/UTC)) AS toprettystring(rideable_type)#9892, toprettystring(started_at#9484, Some(Etc/UTC)) AS toprettystring(started_at)#9893, toprettystring(ended_at#9485, Some(Etc/UTC)) AS toprettystring(ended_at)#9894, toprettystring(start_station_name#9486, Some(Etc/UTC)) AS toprettystring(start_station_name)#9895, toprettystring(start_station_id#9589, Some(Etc/UTC)) AS toprettystring(start_station_id)#9896, toprettystring(end_station_name#9488, Some(Etc/UTC)) AS toprettystring(end_station_name)#9897, toprettystring(end_station_id#9591, Some(Etc/UTC)) AS toprettystring(end_station_id)#9898, toprettystring(start_lat#9490, Some(Etc/UTC)) AS toprettystring(start_lat)#9899, toprettystring(start_lng#9491, Some(Etc/UTC)) AS toprettystring(start_lng)#9900, toprettystring(end_lat#9492, Some(Etc/UTC)) AS toprettystring(end_lat)#9901, toprettystring(end_lng#9493, Some(Etc/UTC)) AS toprettystring(end_lng)#9902, toprettystring(member_casual#9494, Some(Etc/UTC)) AS toprettystring(member_casual)#9903, toprettystring(valid_ride_id#9575, Some(Etc/UTC)) AS toprettystring(valid_ride_id)#9904, toprettystring(valid_time#9576, Some(Etc/UTC)) AS toprettystring(valid_time)#9905, toprettystring(valid_station#9577, Some(Etc/UTC)) AS toprettystring(valid_station)#9906, toprettystring(_source_file#9578, Some(Etc/UTC)) AS toprettystring(_source_file)#9907, 2026-04-08 09:00:13.074602 AS toprettystring(_processed_dttm)#9908, toprettystring(_start_station_ride_num#9581, Some(Etc/UTC)) AS toprettystring(_start_station_ride_num)#9909, toprettystring(year#9582, Some(Etc/UTC)) AS toprettystring(year)#9910, toprettystring(month#9583, Some(Etc/UTC)) AS toprettystring(month)#9911]
Input [22]: [ride_id#9482, rideable_type#9483, started_at#9484, ended_at#9485, start_station_name#9486, start_station_id#9589, end_station_name#9488, end_station_id#9591, start_lat#9490, start_lng#9491, end_lat#9492, end_lng#9493, member_casual#9494, valid_ride_id#9575, valid_time#9576, valid_station#9577, _source_file#9578, year#9582, month#9583, start_station_id#9487, started_at#9484, _start_station_ride_num#9581]
(10) CollectLimit
Input [21]: [toprettystring(ride_id)#9891, toprettystring(rideable_type)#9892, toprettystring(started_at)#9893, toprettystring(ended_at)#9894, toprettystring(start_station_name)#9895, toprettystring(start_station_id)#9896, toprettystring(end_station_name)#9897, toprettystring(end_station_id)#9898, toprettystring(start_lat)#9899, toprettystring(start_lng)#9900, toprettystring(end_lat)#9901, toprettystring(end_lng)#9902, toprettystring(member_casual)#9903, toprettystring(valid_ride_id)#9904, toprettystring(valid_time)#9905, toprettystring(valid_station)#9906, toprettystring(_source_file)#9907, toprettystring(_processed_dttm)#9908, toprettystring(_start_station_ride_num)#9909, toprettystring(year)#9910, toprettystring(month)#9911]
Arguments: 21
(11) Project
Output [21]: [ride_id#9482, rideable_type#9483, started_at#9484, ended_at#9485, start_station_name#9486, cast(start_station_id#9487 as double) AS start_station_id#9589, end_station_name#9488, cast(end_station_id#9489 as double) AS end_station_id#9591, start_lat#9490, start_lng#9491, end_lat#9492, end_lng#9493, member_casual#9494, isnotnull(ride_id#9482) AS valid_ride_id#9575, (ended_at#9485 > started_at#9484) AS valid_time#9576, (((isnotnull(end_station_id#9489) AND isnotnull(start_station_id#9487)) AND NOT (end_station_id#9489 = start_station_id#9487)) <=> true) AS valid_station#9577, input_file_name() AS _source_file#9578, year(cast(started_at#9484 as date)) AS year#9582, month(cast(started_at#9484 as date)) AS month#9583, start_station_id#9487, started_at#9484]
Input [13]: [ride_id#9482, rideable_type#9483, started_at#9484, ended_at#9485, start_station_name#9486, start_station_id#9487, end_station_name#9488, end_station_id#9489, start_lat#9490, start_lng#9491, end_lat#9492, end_lng#9493, member_casual#9494]
(12) Exchange
Input [21]: [ride_id#9482, rideable_type#9483, started_at#9484, ended_at#9485, start_station_name#9486, start_station_id#9589, end_station_name#9488, end_station_id#9591, start_lat#9490, start_lng#9491, end_lat#9492, end_lng#9493, member_casual#9494, valid_ride_id#9575, valid_time#9576, valid_station#9577, _source_file#9578, year#9582, month#9583, start_station_id#9487, started_at#9484]
Arguments: hashpartitioning(start_station_id#9487, 200), ENSURE_REQUIREMENTS, [plan_id=1217]
(13) Sort
Input [21]: [ride_id#9482, rideable_type#9483, started_at#9484, ended_at#9485, start_station_name#9486, start_station_id#9589, end_station_name#9488, end_station_id#9591, start_lat#9490, start_lng#9491, end_lat#9492, end_lng#9493, member_casual#9494, valid_ride_id#9575, valid_time#9576, valid_station#9577, _source_file#9578, year#9582, month#9583, start_station_id#9487, started_at#9484]
Arguments: [start_station_id#9487 ASC NULLS FIRST, started_at#9484 ASC NULLS FIRST], false, 0
(14) Window
Input [21]: [ride_id#9482, rideable_type#9483, started_at#9484, ended_at#9485, start_station_name#9486, start_station_id#9589, end_station_name#9488, end_station_id#9591, start_lat#9490, start_lng#9491, end_lat#9492, end_lng#9493, member_casual#9494, valid_ride_id#9575, valid_time#9576, valid_station#9577, _source_file#9578, year#9582, month#9583, start_station_id#9487, started_at#9484]
Arguments: [row_number() windowspecdefinition(start_station_id#9487, started_at#9484 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS _start_station_ride_num#9581], [start_station_id#9487], [started_at#9484 ASC NULLS FIRST]
(15) Filter
Input [22]: [ride_id#9482, rideable_type#9483, started_at#9484, ended_at#9485, start_station_name#9486, start_station_id#9589, end_station_name#9488, end_station_id#9591, start_lat#9490, start_lng#9491, end_lat#9492, end_lng#9493, member_casual#9494, valid_ride_id#9575, valid_time#9576, valid_station#9577, _source_file#9578, year#9582, month#9583, start_station_id#9487, started_at#9484, _start_station_ride_num#9581]
Condition : ((NOT valid_ride_id#9575 OR NOT valid_time#9576) OR NOT valid_station#9577)
(16) Project
Output [21]: [toprettystring(ride_id#9482, Some(Etc/UTC)) AS toprettystring(ride_id)#9891, toprettystring(rideable_type#9483, Some(Etc/UTC)) AS toprettystring(rideable_type)#9892, toprettystring(started_at#9484, Some(Etc/UTC)) AS toprettystring(started_at)#9893, toprettystring(ended_at#9485, Some(Etc/UTC)) AS toprettystring(ended_at)#9894, toprettystring(start_station_name#9486, Some(Etc/UTC)) AS toprettystring(start_station_name)#9895, toprettystring(start_station_id#9589, Some(Etc/UTC)) AS toprettystring(start_station_id)#9896, toprettystring(end_station_name#9488, Some(Etc/UTC)) AS toprettystring(end_station_name)#9897, toprettystring(end_station_id#9591, Some(Etc/UTC)) AS toprettystring(end_station_id)#9898, toprettystring(start_lat#9490, Some(Etc/UTC)) AS toprettystring(start_lat)#9899, toprettystring(start_lng#9491, Some(Etc/UTC)) AS toprettystring(start_lng)#9900, toprettystring(end_lat#9492, Some(Etc/UTC)) AS toprettystring(end_lat)#9901, toprettystring(end_lng#9493, Some(Etc/UTC)) AS toprettystring(end_lng)#9902, toprettystring(member_casual#9494, Some(Etc/UTC)) AS toprettystring(member_casual)#9903, toprettystring(valid_ride_id#9575, Some(Etc/UTC)) AS toprettystring(valid_ride_id)#9904, toprettystring(valid_time#9576, Some(Etc/UTC)) AS toprettystring(valid_time)#9905, toprettystring(valid_station#9577, Some(Etc/UTC)) AS toprettystring(valid_station)#9906, toprettystring(_source_file#9578, Some(Etc/UTC)) AS toprettystring(_source_file)#9907, 2026-04-08 09:00:13.074602 AS toprettystring(_processed_dttm)#9908, toprettystring(_start_station_ride_num#9581, Some(Etc/UTC)) AS toprettystring(_start_station_ride_num)#9909, toprettystring(year#9582, Some(Etc/UTC)) AS toprettystring(year)#9910, toprettystring(month#9583, Some(Etc/UTC)) AS toprettystring(month)#9911]
Input [22]: [ride_id#9482, rideable_type#9483, started_at#9484, ended_at#9485, start_station_name#9486, start_station_id#9589, end_station_name#9488, end_station_id#9591, start_lat#9490, start_lng#9491, end_lat#9492, end_lng#9493, member_casual#9494, valid_ride_id#9575, valid_time#9576, valid_station#9577, _source_file#9578, year#9582, month#9583, start_station_id#9487, started_at#9484, _start_station_ride_num#9581]
(17) CollectLimit
Input [21]: [toprettystring(ride_id)#9891, toprettystring(rideable_type)#9892, toprettystring(started_at)#9893, toprettystring(ended_at)#9894, toprettystring(start_station_name)#9895, toprettystring(start_station_id)#9896, toprettystring(end_station_name)#9897, toprettystring(end_station_id)#9898, toprettystring(start_lat)#9899, toprettystring(start_lng)#9900, toprettystring(end_lat)#9901, toprettystring(end_lng)#9902, toprettystring(member_casual)#9903, toprettystring(valid_ride_id)#9904, toprettystring(valid_time)#9905, toprettystring(valid_station)#9906, toprettystring(_source_file)#9907, toprettystring(_processed_dttm)#9908, toprettystring(_start_station_ride_num)#9909, toprettystring(year)#9910, toprettystring(month)#9911]
Arguments: 21
(18) AdaptiveSparkPlan
Output [21]: [toprettystring(ride_id)#9891, toprettystring(rideable_type)#9892, toprettystring(started_at)#9893, toprettystring(ended_at)#9894, toprettystring(start_station_name)#9895, toprettystring(start_station_id)#9896, toprettystring(end_station_name)#9897, toprettystring(end_station_id)#9898, toprettystring(start_lat)#9899, toprettystring(start_lng)#9900, toprettystring(end_lat)#9901, toprettystring(end_lng)#9902, toprettystring(member_casual)#9903, toprettystring(valid_ride_id)#9904, toprettystring(valid_time)#9905, toprettystring(valid_station)#9906, toprettystring(_source_file)#9907, toprettystring(_processed_dttm)#9908, toprettystring(_start_station_ride_num)#9909, toprettystring(year)#9910, toprettystring(month)#9911]
Arguments: isFinalPlan=true