== Physical Plan ==
AdaptiveSparkPlan (18)
+- == Final Plan ==
CollectLimit (10)
+- * Project (9)
+- * Filter (8)
+- Window (7)
+- * Sort (6)
+- AQEShuffleRead (5)
+- ShuffleQueryStage (4), Statistics(sizeInBytes=10.9 MiB, rowCount=3.13E+4)
+- Exchange (3)
+- * Project (2)
+- Scan csv (1)
+- == Initial Plan ==
CollectLimit (17)
+- Project (16)
+- Filter (15)
+- Window (14)
+- Sort (13)
+- Exchange (12)
+- Project (11)
+- Scan csv (1)
(1) Scan csv
Output [13]: [ride_id#9482, rideable_type#9483, started_at#9484, ended_at#9485, start_station_name#9486, start_station_id#9487, end_station_name#9488, end_station_id#9489, start_lat#9490, start_lng#9491, end_lat#9492, end_lng#9493, member_casual#9494]
Batched: false
Location: InMemoryFileIndex [s3a://rzvde-g8-kirsanov-dmitry/raw/citibike_data/202502/202502-citibike-tripdata-part00.csv]
ReadSchema: struct<ride_id:string,rideable_type:string,started_at:timestamp,ended_at:timestamp,start_station_name:string,start_station_id:string,end_station_name:string,end_station_id:string,start_lat:double,start_lng:double,end_lat:double,end_lng:double,member_casual:string>
(2) Project [codegen id : 1]
Output [21]: [ride_id#9482, rideable_type#9483, started_at#9484, ended_at#9485, start_station_name#9486, cast(start_station_id#9487 as double) AS start_station_id#9589, end_station_name#9488, cast(end_station_id#9489 as double) AS end_station_id#9591, start_lat#9490, start_lng#9491, end_lat#9492, end_lng#9493, member_casual#9494, isnotnull(ride_id#9482) AS valid_ride_id#9575, (ended_at#9485 > started_at#9484) AS valid_time#9576, (((isnotnull(end_station_id#9489) AND isnotnull(start_station_id#9487)) AND NOT (end_station_id#9489 = start_station_id#9487)) <=> true) AS valid_station#9577, input_file_name() AS _source_file#9578, year(cast(started_at#9484 as date)) AS year#9582, month(cast(started_at#9484 as date)) AS month#9583, start_station_id#9487, started_at#9484]
Input [13]: [ride_id#9482, rideable_type#9483, started_at#9484, ended_at#9485, start_station_name#9486, start_station_id#9487, end_station_name#9488, end_station_id#9489, start_lat#9490, start_lng#9491, end_lat#9492, end_lng#9493, member_casual#9494]
(3) Exchange
Input [21]: [ride_id#9482, rideable_type#9483, started_at#9484, ended_at#9485, start_station_name#9486, start_station_id#9589, end_station_name#9488, end_station_id#9591, start_lat#9490, start_lng#9491, end_lat#9492, end_lng#9493, member_casual#9494, valid_ride_id#9575, valid_time#9576, valid_station#9577, _source_file#9578, year#9582, month#9583, start_station_id#9487, started_at#9484]
Arguments: hashpartitioning(start_station_id#9487, 200), ENSURE_REQUIREMENTS, [plan_id=1429]
(4) ShuffleQueryStage
Output [21]: [ride_id#9482, rideable_type#9483, started_at#9484, ended_at#9485, start_station_name#9486, start_station_id#9589, end_station_name#9488, end_station_id#9591, start_lat#9490, start_lng#9491, end_lat#9492, end_lng#9493, member_casual#9494, valid_ride_id#9575, valid_time#9576, valid_station#9577, _source_file#9578, year#9582, month#9583, start_station_id#9487, started_at#9484]
Arguments: 0
(5) AQEShuffleRead
Input [21]: [ride_id#9482, rideable_type#9483, started_at#9484, ended_at#9485, start_station_name#9486, start_station_id#9589, end_station_name#9488, end_station_id#9591, start_lat#9490, start_lng#9491, end_lat#9492, end_lng#9493, member_casual#9494, valid_ride_id#9575, valid_time#9576, valid_station#9577, _source_file#9578, year#9582, month#9583, start_station_id#9487, started_at#9484]
Arguments: coalesced
(6) Sort [codegen id : 2]
Input [21]: [ride_id#9482, rideable_type#9483, started_at#9484, ended_at#9485, start_station_name#9486, start_station_id#9589, end_station_name#9488, end_station_id#9591, start_lat#9490, start_lng#9491, end_lat#9492, end_lng#9493, member_casual#9494, valid_ride_id#9575, valid_time#9576, valid_station#9577, _source_file#9578, year#9582, month#9583, start_station_id#9487, started_at#9484]
Arguments: [start_station_id#9487 ASC NULLS FIRST, started_at#9484 ASC NULLS FIRST], false, 0
(7) Window
Input [21]: [ride_id#9482, rideable_type#9483, started_at#9484, ended_at#9485, start_station_name#9486, start_station_id#9589, end_station_name#9488, end_station_id#9591, start_lat#9490, start_lng#9491, end_lat#9492, end_lng#9493, member_casual#9494, valid_ride_id#9575, valid_time#9576, valid_station#9577, _source_file#9578, year#9582, month#9583, start_station_id#9487, started_at#9484]
Arguments: [row_number() windowspecdefinition(start_station_id#9487, started_at#9484 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS _start_station_ride_num#9581], [start_station_id#9487], [started_at#9484 ASC NULLS FIRST]
(8) Filter [codegen id : 3]
Input [22]: [ride_id#9482, rideable_type#9483, started_at#9484, ended_at#9485, start_station_name#9486, start_station_id#9589, end_station_name#9488, end_station_id#9591, start_lat#9490, start_lng#9491, end_lat#9492, end_lng#9493, member_casual#9494, valid_ride_id#9575, valid_time#9576, valid_station#9577, _source_file#9578, year#9582, month#9583, start_station_id#9487, started_at#9484, _start_station_ride_num#9581]
Condition : ((NOT valid_ride_id#9575 OR NOT valid_time#9576) OR NOT valid_station#9577)
(9) Project [codegen id : 3]
Output [21]: [toprettystring(ride_id#9482, Some(Etc/UTC)) AS toprettystring(ride_id)#10077, toprettystring(rideable_type#9483, Some(Etc/UTC)) AS toprettystring(rideable_type)#10078, toprettystring(started_at#9484, Some(Etc/UTC)) AS toprettystring(started_at)#10079, toprettystring(ended_at#9485, Some(Etc/UTC)) AS toprettystring(ended_at)#10080, toprettystring(start_station_name#9486, Some(Etc/UTC)) AS toprettystring(start_station_name)#10081, toprettystring(start_station_id#9589, Some(Etc/UTC)) AS toprettystring(start_station_id)#10082, toprettystring(end_station_name#9488, Some(Etc/UTC)) AS toprettystring(end_station_name)#10083, toprettystring(end_station_id#9591, Some(Etc/UTC)) AS toprettystring(end_station_id)#10084, toprettystring(start_lat#9490, Some(Etc/UTC)) AS toprettystring(start_lat)#10085, toprettystring(start_lng#9491, Some(Etc/UTC)) AS toprettystring(start_lng)#10086, toprettystring(end_lat#9492, Some(Etc/UTC)) AS toprettystring(end_lat)#10087, toprettystring(end_lng#9493, Some(Etc/UTC)) AS toprettystring(end_lng)#10088, toprettystring(member_casual#9494, Some(Etc/UTC)) AS toprettystring(member_casual)#10089, toprettystring(valid_ride_id#9575, Some(Etc/UTC)) AS toprettystring(valid_ride_id)#10090, toprettystring(valid_time#9576, Some(Etc/UTC)) AS toprettystring(valid_time)#10091, toprettystring(valid_station#9577, Some(Etc/UTC)) AS toprettystring(valid_station)#10092, toprettystring(_source_file#9578, Some(Etc/UTC)) AS toprettystring(_source_file)#10093, 2026-04-08 09:00:30.555009 AS toprettystring(_processed_dttm)#10094, toprettystring(_start_station_ride_num#9581, Some(Etc/UTC)) AS toprettystring(_start_station_ride_num)#10095, toprettystring(year#9582, Some(Etc/UTC)) AS toprettystring(year)#10096, toprettystring(month#9583, Some(Etc/UTC)) AS toprettystring(month)#10097]
Input [22]: [ride_id#9482, rideable_type#9483, started_at#9484, ended_at#9485, start_station_name#9486, start_station_id#9589, end_station_name#9488, end_station_id#9591, start_lat#9490, start_lng#9491, end_lat#9492, end_lng#9493, member_casual#9494, valid_ride_id#9575, valid_time#9576, valid_station#9577, _source_file#9578, year#9582, month#9583, start_station_id#9487, started_at#9484, _start_station_ride_num#9581]
(10) CollectLimit
Input [21]: [toprettystring(ride_id)#10077, toprettystring(rideable_type)#10078, toprettystring(started_at)#10079, toprettystring(ended_at)#10080, toprettystring(start_station_name)#10081, toprettystring(start_station_id)#10082, toprettystring(end_station_name)#10083, toprettystring(end_station_id)#10084, toprettystring(start_lat)#10085, toprettystring(start_lng)#10086, toprettystring(end_lat)#10087, toprettystring(end_lng)#10088, toprettystring(member_casual)#10089, toprettystring(valid_ride_id)#10090, toprettystring(valid_time)#10091, toprettystring(valid_station)#10092, toprettystring(_source_file)#10093, toprettystring(_processed_dttm)#10094, toprettystring(_start_station_ride_num)#10095, toprettystring(year)#10096, toprettystring(month)#10097]
Arguments: 51
(11) Project
Output [21]: [ride_id#9482, rideable_type#9483, started_at#9484, ended_at#9485, start_station_name#9486, cast(start_station_id#9487 as double) AS start_station_id#9589, end_station_name#9488, cast(end_station_id#9489 as double) AS end_station_id#9591, start_lat#9490, start_lng#9491, end_lat#9492, end_lng#9493, member_casual#9494, isnotnull(ride_id#9482) AS valid_ride_id#9575, (ended_at#9485 > started_at#9484) AS valid_time#9576, (((isnotnull(end_station_id#9489) AND isnotnull(start_station_id#9487)) AND NOT (end_station_id#9489 = start_station_id#9487)) <=> true) AS valid_station#9577, input_file_name() AS _source_file#9578, year(cast(started_at#9484 as date)) AS year#9582, month(cast(started_at#9484 as date)) AS month#9583, start_station_id#9487, started_at#9484]
Input [13]: [ride_id#9482, rideable_type#9483, started_at#9484, ended_at#9485, start_station_name#9486, start_station_id#9487, end_station_name#9488, end_station_id#9489, start_lat#9490, start_lng#9491, end_lat#9492, end_lng#9493, member_casual#9494]
(12) Exchange
Input [21]: [ride_id#9482, rideable_type#9483, started_at#9484, ended_at#9485, start_station_name#9486, start_station_id#9589, end_station_name#9488, end_station_id#9591, start_lat#9490, start_lng#9491, end_lat#9492, end_lng#9493, member_casual#9494, valid_ride_id#9575, valid_time#9576, valid_station#9577, _source_file#9578, year#9582, month#9583, start_station_id#9487, started_at#9484]
Arguments: hashpartitioning(start_station_id#9487, 200), ENSURE_REQUIREMENTS, [plan_id=1415]
(13) Sort
Input [21]: [ride_id#9482, rideable_type#9483, started_at#9484, ended_at#9485, start_station_name#9486, start_station_id#9589, end_station_name#9488, end_station_id#9591, start_lat#9490, start_lng#9491, end_lat#9492, end_lng#9493, member_casual#9494, valid_ride_id#9575, valid_time#9576, valid_station#9577, _source_file#9578, year#9582, month#9583, start_station_id#9487, started_at#9484]
Arguments: [start_station_id#9487 ASC NULLS FIRST, started_at#9484 ASC NULLS FIRST], false, 0
(14) Window
Input [21]: [ride_id#9482, rideable_type#9483, started_at#9484, ended_at#9485, start_station_name#9486, start_station_id#9589, end_station_name#9488, end_station_id#9591, start_lat#9490, start_lng#9491, end_lat#9492, end_lng#9493, member_casual#9494, valid_ride_id#9575, valid_time#9576, valid_station#9577, _source_file#9578, year#9582, month#9583, start_station_id#9487, started_at#9484]
Arguments: [row_number() windowspecdefinition(start_station_id#9487, started_at#9484 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS _start_station_ride_num#9581], [start_station_id#9487], [started_at#9484 ASC NULLS FIRST]
(15) Filter
Input [22]: [ride_id#9482, rideable_type#9483, started_at#9484, ended_at#9485, start_station_name#9486, start_station_id#9589, end_station_name#9488, end_station_id#9591, start_lat#9490, start_lng#9491, end_lat#9492, end_lng#9493, member_casual#9494, valid_ride_id#9575, valid_time#9576, valid_station#9577, _source_file#9578, year#9582, month#9583, start_station_id#9487, started_at#9484, _start_station_ride_num#9581]
Condition : ((NOT valid_ride_id#9575 OR NOT valid_time#9576) OR NOT valid_station#9577)
(16) Project
Output [21]: [toprettystring(ride_id#9482, Some(Etc/UTC)) AS toprettystring(ride_id)#10077, toprettystring(rideable_type#9483, Some(Etc/UTC)) AS toprettystring(rideable_type)#10078, toprettystring(started_at#9484, Some(Etc/UTC)) AS toprettystring(started_at)#10079, toprettystring(ended_at#9485, Some(Etc/UTC)) AS toprettystring(ended_at)#10080, toprettystring(start_station_name#9486, Some(Etc/UTC)) AS toprettystring(start_station_name)#10081, toprettystring(start_station_id#9589, Some(Etc/UTC)) AS toprettystring(start_station_id)#10082, toprettystring(end_station_name#9488, Some(Etc/UTC)) AS toprettystring(end_station_name)#10083, toprettystring(end_station_id#9591, Some(Etc/UTC)) AS toprettystring(end_station_id)#10084, toprettystring(start_lat#9490, Some(Etc/UTC)) AS toprettystring(start_lat)#10085, toprettystring(start_lng#9491, Some(Etc/UTC)) AS toprettystring(start_lng)#10086, toprettystring(end_lat#9492, Some(Etc/UTC)) AS toprettystring(end_lat)#10087, toprettystring(end_lng#9493, Some(Etc/UTC)) AS toprettystring(end_lng)#10088, toprettystring(member_casual#9494, Some(Etc/UTC)) AS toprettystring(member_casual)#10089, toprettystring(valid_ride_id#9575, Some(Etc/UTC)) AS toprettystring(valid_ride_id)#10090, toprettystring(valid_time#9576, Some(Etc/UTC)) AS toprettystring(valid_time)#10091, toprettystring(valid_station#9577, Some(Etc/UTC)) AS toprettystring(valid_station)#10092, toprettystring(_source_file#9578, Some(Etc/UTC)) AS toprettystring(_source_file)#10093, 2026-04-08 09:00:30.555009 AS toprettystring(_processed_dttm)#10094, toprettystring(_start_station_ride_num#9581, Some(Etc/UTC)) AS toprettystring(_start_station_ride_num)#10095, toprettystring(year#9582, Some(Etc/UTC)) AS toprettystring(year)#10096, toprettystring(month#9583, Some(Etc/UTC)) AS toprettystring(month)#10097]
Input [22]: [ride_id#9482, rideable_type#9483, started_at#9484, ended_at#9485, start_station_name#9486, start_station_id#9589, end_station_name#9488, end_station_id#9591, start_lat#9490, start_lng#9491, end_lat#9492, end_lng#9493, member_casual#9494, valid_ride_id#9575, valid_time#9576, valid_station#9577, _source_file#9578, year#9582, month#9583, start_station_id#9487, started_at#9484, _start_station_ride_num#9581]
(17) CollectLimit
Input [21]: [toprettystring(ride_id)#10077, toprettystring(rideable_type)#10078, toprettystring(started_at)#10079, toprettystring(ended_at)#10080, toprettystring(start_station_name)#10081, toprettystring(start_station_id)#10082, toprettystring(end_station_name)#10083, toprettystring(end_station_id)#10084, toprettystring(start_lat)#10085, toprettystring(start_lng)#10086, toprettystring(end_lat)#10087, toprettystring(end_lng)#10088, toprettystring(member_casual)#10089, toprettystring(valid_ride_id)#10090, toprettystring(valid_time)#10091, toprettystring(valid_station)#10092, toprettystring(_source_file)#10093, toprettystring(_processed_dttm)#10094, toprettystring(_start_station_ride_num)#10095, toprettystring(year)#10096, toprettystring(month)#10097]
Arguments: 51
(18) AdaptiveSparkPlan
Output [21]: [toprettystring(ride_id)#10077, toprettystring(rideable_type)#10078, toprettystring(started_at)#10079, toprettystring(ended_at)#10080, toprettystring(start_station_name)#10081, toprettystring(start_station_id)#10082, toprettystring(end_station_name)#10083, toprettystring(end_station_id)#10084, toprettystring(start_lat)#10085, toprettystring(start_lng)#10086, toprettystring(end_lat)#10087, toprettystring(end_lng)#10088, toprettystring(member_casual)#10089, toprettystring(valid_ride_id)#10090, toprettystring(valid_time)#10091, toprettystring(valid_station)#10092, toprettystring(_source_file)#10093, toprettystring(_processed_dttm)#10094, toprettystring(_start_station_ride_num)#10095, toprettystring(year)#10096, toprettystring(month)#10097]
Arguments: isFinalPlan=true