== Physical Plan ==
AdaptiveSparkPlan (15)
+- == Final Plan ==
CollectLimit (12)
+- * Project (11)
+- TableCacheQueryStage (10), Statistics(sizeInBytes=8.2 MiB, rowCount=3.13E+4)
+- InMemoryTableScan (1)
+- InMemoryRelation (2)
+- AdaptiveSparkPlan (9)
+- Project (8)
+- Window (7)
+- Sort (6)
+- Exchange (5)
+- Project (4)
+- Scan csv (3)
+- == Initial Plan ==
CollectLimit (14)
+- Project (13)
+- InMemoryTableScan (1)
+- InMemoryRelation (2)
+- AdaptiveSparkPlan (9)
+- Project (8)
+- Window (7)
+- Sort (6)
+- Exchange (5)
+- Project (4)
+- Scan csv (3)
(1) InMemoryTableScan
Output [21]: [_processed_dttm#349, _source_file#348, _start_station_ride_num#351, end_lat#364, end_lng#365, end_station_id#361, end_station_name#360, ended_at#357, member_casual#366, month#353, ride_id#354, rideable_type#355, start_lat#362, start_lng#363, start_station_id#359, start_station_name#358, started_at#356, valid_ride_id#345, valid_station#347, valid_time#346, year#352]
Arguments: [_processed_dttm#349, _source_file#348, _start_station_ride_num#351, end_lat#364, end_lng#365, end_station_id#361, end_station_name#360, ended_at#357, member_casual#366, month#353, ride_id#354, rideable_type#355, start_lat#362, start_lng#363, start_station_id#359, start_station_name#358, started_at#356, valid_ride_id#345, valid_station#347, valid_time#346, year#352]
(2) InMemoryRelation
Arguments: [ride_id#354, rideable_type#355, started_at#356, ended_at#357, start_station_name#358, start_station_id#359, end_station_name#360, end_station_id#361, start_lat#362, start_lng#363, end_lat#364, end_lng#365, member_casual#366, valid_ride_id#345, valid_time#346, valid_station#347, _source_file#348, _processed_dttm#349, _start_station_ride_num#351, year#352, month#353], CachedRDDBuilder(org.apache.spark.sql.execution.columnar.DefaultCachedBatchSerializer@43d79ed6,StorageLevel(disk, memory, deserialized, 1 replicas),AdaptiveSparkPlan isFinalPlan=true
+- == Final Plan ==
*(3) Project [ride_id#252, rideable_type#253, started_at#254, ended_at#255, start_station_name#256, start_station_id#359, end_station_name#258, end_station_id#361, start_lat#260, start_lng#261, end_lat#262, end_lng#263, member_casual#264, valid_ride_id#345, valid_time#346, valid_station#347, _source_file#348, 2026-04-08 08:20:23.917168 AS _processed_dttm#349, _start_station_ride_num#351, year#352, month#353]
+- Window [row_number() windowspecdefinition(start_station_id#257, started_at#254 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS _start_station_ride_num#351], [start_station_id#257], [started_at#254 ASC NULLS FIRST]
+- *(2) Sort [start_station_id#257 ASC NULLS FIRST, started_at#254 ASC NULLS FIRST], false, 0
+- AQEShuffleRead coalesced
+- ShuffleQueryStage 0
+- Exchange hashpartitioning(start_station_id#257, 200), ENSURE_REQUIREMENTS, [plan_id=151]
+- *(1) Project [ride_id#252, rideable_type#253, started_at#254, ended_at#255, start_station_name#256, cast(start_station_id#257 as double) AS start_station_id#359, end_station_name#258, cast(end_station_id#259 as double) AS end_station_id#361, start_lat#260, start_lng#261, end_lat#262, end_lng#263, member_casual#264, isnotnull(ride_id#252) AS valid_ride_id#345, (ended_at#255 > started_at#254) AS valid_time#346, NOT (end_station_id#259 = start_station_id#257) AS valid_station#347, input_file_name() AS _source_file#348, year(cast(started_at#254 as date)) AS year#352, month(cast(started_at#254 as date)) AS month#353, start_station_id#257, started_at#254]
+- FileScan csv [ride_id#252,rideable_type#253,started_at#254,ended_at#255,start_station_name#256,start_station_id#257,end_station_name#258,end_station_id#259,start_lat#260,start_lng#261,end_lat#262,end_lng#263,member_casual#264] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[s3a://rzvde-g8-kirsanov-dmitry/raw/citibike_data/202502/202502-citibik..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<ride_id:string,rideable_type:string,started_at:timestamp,ended_at:timestamp,start_station_...
+- == Initial Plan ==
Project [ride_id#252, rideable_type#253, started_at#254, ended_at#255, start_station_name#256, start_station_id#359, end_station_name#258, end_station_id#361, start_lat#260, start_lng#261, end_lat#262, end_lng#263, member_casual#264, valid_ride_id#345, valid_time#346, valid_station#347, _source_file#348, 2026-04-08 08:20:23.917168 AS _processed_dttm#349, _start_station_ride_num#351, year#352, month#353]
+- Window [row_number() windowspecdefinition(start_station_id#257, started_at#254 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS _start_station_ride_num#351], [start_station_id#257], [started_at#254 ASC NULLS FIRST]
+- Sort [start_station_id#257 ASC NULLS FIRST, started_at#254 ASC NULLS FIRST], false, 0
+- Exchange hashpartitioning(start_station_id#257, 200), ENSURE_REQUIREMENTS, [plan_id=113]
+- Project [ride_id#252, rideable_type#253, started_at#254, ended_at#255, start_station_name#256, cast(start_station_id#257 as double) AS start_station_id#359, end_station_name#258, cast(end_station_id#259 as double) AS end_station_id#361, start_lat#260, start_lng#261, end_lat#262, end_lng#263, member_casual#264, isnotnull(ride_id#252) AS valid_ride_id#345, (ended_at#255 > started_at#254) AS valid_time#346, NOT (end_station_id#259 = start_station_id#257) AS valid_station#347, input_file_name() AS _source_file#348, year(cast(started_at#254 as date)) AS year#352, month(cast(started_at#254 as date)) AS month#353, start_station_id#257, started_at#254]
+- FileScan csv [ride_id#252,rideable_type#253,started_at#254,ended_at#255,start_station_name#256,start_station_id#257,end_station_name#258,end_station_id#259,start_lat#260,start_lng#261,end_lat#262,end_lng#263,member_casual#264] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[s3a://rzvde-g8-kirsanov-dmitry/raw/citibike_data/202502/202502-citibik..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<ride_id:string,rideable_type:string,started_at:timestamp,ended_at:timestamp,start_station_...
,None)
(3) Scan csv
Output [13]: [ride_id#252, rideable_type#253, started_at#254, ended_at#255, start_station_name#256, start_station_id#257, end_station_name#258, end_station_id#259, start_lat#260, start_lng#261, end_lat#262, end_lng#263, member_casual#264]
Batched: false
Location: InMemoryFileIndex [s3a://rzvde-g8-kirsanov-dmitry/raw/citibike_data/202502/202502-citibike-tripdata-part00.csv]
ReadSchema: struct<ride_id:string,rideable_type:string,started_at:timestamp,ended_at:timestamp,start_station_name:string,start_station_id:string,end_station_name:string,end_station_id:string,start_lat:double,start_lng:double,end_lat:double,end_lng:double,member_casual:string>
(4) Project
Output [21]: [ride_id#252, rideable_type#253, started_at#254, ended_at#255, start_station_name#256, cast(start_station_id#257 as double) AS start_station_id#359, end_station_name#258, cast(end_station_id#259 as double) AS end_station_id#361, start_lat#260, start_lng#261, end_lat#262, end_lng#263, member_casual#264, isnotnull(ride_id#252) AS valid_ride_id#345, (ended_at#255 > started_at#254) AS valid_time#346, NOT (end_station_id#259 = start_station_id#257) AS valid_station#347, input_file_name() AS _source_file#348, year(cast(started_at#254 as date)) AS year#352, month(cast(started_at#254 as date)) AS month#353, start_station_id#257, started_at#254]
Input [13]: [ride_id#252, rideable_type#253, started_at#254, ended_at#255, start_station_name#256, start_station_id#257, end_station_name#258, end_station_id#259, start_lat#260, start_lng#261, end_lat#262, end_lng#263, member_casual#264]
(5) Exchange
Input [21]: [ride_id#252, rideable_type#253, started_at#254, ended_at#255, start_station_name#256, start_station_id#359, end_station_name#258, end_station_id#361, start_lat#260, start_lng#261, end_lat#262, end_lng#263, member_casual#264, valid_ride_id#345, valid_time#346, valid_station#347, _source_file#348, year#352, month#353, start_station_id#257, started_at#254]
Arguments: hashpartitioning(start_station_id#257, 200), ENSURE_REQUIREMENTS, [plan_id=218]
(6) Sort
Input [21]: [ride_id#252, rideable_type#253, started_at#254, ended_at#255, start_station_name#256, start_station_id#359, end_station_name#258, end_station_id#361, start_lat#260, start_lng#261, end_lat#262, end_lng#263, member_casual#264, valid_ride_id#345, valid_time#346, valid_station#347, _source_file#348, year#352, month#353, start_station_id#257, started_at#254]
Arguments: [start_station_id#257 ASC NULLS FIRST, started_at#254 ASC NULLS FIRST], false, 0
(7) Window
Input [21]: [ride_id#252, rideable_type#253, started_at#254, ended_at#255, start_station_name#256, start_station_id#359, end_station_name#258, end_station_id#361, start_lat#260, start_lng#261, end_lat#262, end_lng#263, member_casual#264, valid_ride_id#345, valid_time#346, valid_station#347, _source_file#348, year#352, month#353, start_station_id#257, started_at#254]
Arguments: [row_number() windowspecdefinition(start_station_id#257, started_at#254 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS _start_station_ride_num#351], [start_station_id#257], [started_at#254 ASC NULLS FIRST]
(8) Project
Output [21]: [ride_id#252, rideable_type#253, started_at#254, ended_at#255, start_station_name#256, start_station_id#359, end_station_name#258, end_station_id#361, start_lat#260, start_lng#261, end_lat#262, end_lng#263, member_casual#264, valid_ride_id#345, valid_time#346, valid_station#347, _source_file#348, 2026-04-08 08:20:23.917168 AS _processed_dttm#349, _start_station_ride_num#351, year#352, month#353]
Input [22]: [ride_id#252, rideable_type#253, started_at#254, ended_at#255, start_station_name#256, start_station_id#359, end_station_name#258, end_station_id#361, start_lat#260, start_lng#261, end_lat#262, end_lng#263, member_casual#264, valid_ride_id#345, valid_time#346, valid_station#347, _source_file#348, year#352, month#353, start_station_id#257, started_at#254, _start_station_ride_num#351]
(9) AdaptiveSparkPlan
Output [21]: [ride_id#252, rideable_type#253, started_at#254, ended_at#255, start_station_name#256, start_station_id#359, end_station_name#258, end_station_id#361, start_lat#260, start_lng#261, end_lat#262, end_lng#263, member_casual#264, valid_ride_id#345, valid_time#346, valid_station#347, _source_file#348, _processed_dttm#349, _start_station_ride_num#351, year#352, month#353]
Arguments: isFinalPlan=false
(10) TableCacheQueryStage
Output [21]: [_processed_dttm#349, _source_file#348, _start_station_ride_num#351, end_lat#364, end_lng#365, end_station_id#361, end_station_name#360, ended_at#357, member_casual#366, month#353, ride_id#354, rideable_type#355, start_lat#362, start_lng#363, start_station_id#359, start_station_name#358, started_at#356, valid_ride_id#345, valid_station#347, valid_time#346, year#352]
Arguments: 0
(11) Project [codegen id : 1]
Output [21]: [toprettystring(ride_id#354, Some(Etc/UTC)) AS toprettystring(ride_id)#1078, toprettystring(rideable_type#355, Some(Etc/UTC)) AS toprettystring(rideable_type)#1079, toprettystring(started_at#356, Some(Etc/UTC)) AS toprettystring(started_at)#1080, toprettystring(ended_at#357, Some(Etc/UTC)) AS toprettystring(ended_at)#1081, toprettystring(start_station_name#358, Some(Etc/UTC)) AS toprettystring(start_station_name)#1082, toprettystring(start_station_id#359, Some(Etc/UTC)) AS toprettystring(start_station_id)#1083, toprettystring(end_station_name#360, Some(Etc/UTC)) AS toprettystring(end_station_name)#1084, toprettystring(end_station_id#361, Some(Etc/UTC)) AS toprettystring(end_station_id)#1085, toprettystring(start_lat#362, Some(Etc/UTC)) AS toprettystring(start_lat)#1086, toprettystring(start_lng#363, Some(Etc/UTC)) AS toprettystring(start_lng)#1087, toprettystring(end_lat#364, Some(Etc/UTC)) AS toprettystring(end_lat)#1088, toprettystring(end_lng#365, Some(Etc/UTC)) AS toprettystring(end_lng)#1089, toprettystring(member_casual#366, Some(Etc/UTC)) AS toprettystring(member_casual)#1090, toprettystring(valid_ride_id#345, Some(Etc/UTC)) AS toprettystring(valid_ride_id)#1091, toprettystring(valid_time#346, Some(Etc/UTC)) AS toprettystring(valid_time)#1092, toprettystring(valid_station#347, Some(Etc/UTC)) AS toprettystring(valid_station)#1093, toprettystring(_source_file#348, Some(Etc/UTC)) AS toprettystring(_source_file)#1094, toprettystring(_processed_dttm#349, Some(Etc/UTC)) AS toprettystring(_processed_dttm)#1095, toprettystring(_start_station_ride_num#351, Some(Etc/UTC)) AS toprettystring(_start_station_ride_num)#1096, toprettystring(year#352, Some(Etc/UTC)) AS toprettystring(year)#1097, toprettystring(month#353, Some(Etc/UTC)) AS toprettystring(month)#1098]
Input [21]: [_processed_dttm#349, _source_file#348, _start_station_ride_num#351, end_lat#364, end_lng#365, end_station_id#361, end_station_name#360, ended_at#357, member_casual#366, month#353, ride_id#354, rideable_type#355, start_lat#362, start_lng#363, start_station_id#359, start_station_name#358, started_at#356, valid_ride_id#345, valid_station#347, valid_time#346, year#352]
(12) CollectLimit
Input [21]: [toprettystring(ride_id)#1078, toprettystring(rideable_type)#1079, toprettystring(started_at)#1080, toprettystring(ended_at)#1081, toprettystring(start_station_name)#1082, toprettystring(start_station_id)#1083, toprettystring(end_station_name)#1084, toprettystring(end_station_id)#1085, toprettystring(start_lat)#1086, toprettystring(start_lng)#1087, toprettystring(end_lat)#1088, toprettystring(end_lng)#1089, toprettystring(member_casual)#1090, toprettystring(valid_ride_id)#1091, toprettystring(valid_time)#1092, toprettystring(valid_station)#1093, toprettystring(_source_file)#1094, toprettystring(_processed_dttm)#1095, toprettystring(_start_station_ride_num)#1096, toprettystring(year)#1097, toprettystring(month)#1098]
Arguments: 3
(13) Project
Output [21]: [toprettystring(ride_id#354, Some(Etc/UTC)) AS toprettystring(ride_id)#1078, toprettystring(rideable_type#355, Some(Etc/UTC)) AS toprettystring(rideable_type)#1079, toprettystring(started_at#356, Some(Etc/UTC)) AS toprettystring(started_at)#1080, toprettystring(ended_at#357, Some(Etc/UTC)) AS toprettystring(ended_at)#1081, toprettystring(start_station_name#358, Some(Etc/UTC)) AS toprettystring(start_station_name)#1082, toprettystring(start_station_id#359, Some(Etc/UTC)) AS toprettystring(start_station_id)#1083, toprettystring(end_station_name#360, Some(Etc/UTC)) AS toprettystring(end_station_name)#1084, toprettystring(end_station_id#361, Some(Etc/UTC)) AS toprettystring(end_station_id)#1085, toprettystring(start_lat#362, Some(Etc/UTC)) AS toprettystring(start_lat)#1086, toprettystring(start_lng#363, Some(Etc/UTC)) AS toprettystring(start_lng)#1087, toprettystring(end_lat#364, Some(Etc/UTC)) AS toprettystring(end_lat)#1088, toprettystring(end_lng#365, Some(Etc/UTC)) AS toprettystring(end_lng)#1089, toprettystring(member_casual#366, Some(Etc/UTC)) AS toprettystring(member_casual)#1090, toprettystring(valid_ride_id#345, Some(Etc/UTC)) AS toprettystring(valid_ride_id)#1091, toprettystring(valid_time#346, Some(Etc/UTC)) AS toprettystring(valid_time)#1092, toprettystring(valid_station#347, Some(Etc/UTC)) AS toprettystring(valid_station)#1093, toprettystring(_source_file#348, Some(Etc/UTC)) AS toprettystring(_source_file)#1094, toprettystring(_processed_dttm#349, Some(Etc/UTC)) AS toprettystring(_processed_dttm)#1095, toprettystring(_start_station_ride_num#351, Some(Etc/UTC)) AS toprettystring(_start_station_ride_num)#1096, toprettystring(year#352, Some(Etc/UTC)) AS toprettystring(year)#1097, toprettystring(month#353, Some(Etc/UTC)) AS toprettystring(month)#1098]
Input [21]: [_processed_dttm#349, _source_file#348, _start_station_ride_num#351, end_lat#364, end_lng#365, end_station_id#361, end_station_name#360, ended_at#357, member_casual#366, month#353, ride_id#354, rideable_type#355, start_lat#362, start_lng#363, start_station_id#359, start_station_name#358, started_at#356, valid_ride_id#345, valid_station#347, valid_time#346, year#352]
(14) CollectLimit
Input [21]: [toprettystring(ride_id)#1078, toprettystring(rideable_type)#1079, toprettystring(started_at)#1080, toprettystring(ended_at)#1081, toprettystring(start_station_name)#1082, toprettystring(start_station_id)#1083, toprettystring(end_station_name)#1084, toprettystring(end_station_id)#1085, toprettystring(start_lat)#1086, toprettystring(start_lng)#1087, toprettystring(end_lat)#1088, toprettystring(end_lng)#1089, toprettystring(member_casual)#1090, toprettystring(valid_ride_id)#1091, toprettystring(valid_time)#1092, toprettystring(valid_station)#1093, toprettystring(_source_file)#1094, toprettystring(_processed_dttm)#1095, toprettystring(_start_station_ride_num)#1096, toprettystring(year)#1097, toprettystring(month)#1098]
Arguments: 3
(15) AdaptiveSparkPlan
Output [21]: [toprettystring(ride_id)#1078, toprettystring(rideable_type)#1079, toprettystring(started_at)#1080, toprettystring(ended_at)#1081, toprettystring(start_station_name)#1082, toprettystring(start_station_id)#1083, toprettystring(end_station_name)#1084, toprettystring(end_station_id)#1085, toprettystring(start_lat)#1086, toprettystring(start_lng)#1087, toprettystring(end_lat)#1088, toprettystring(end_lng)#1089, toprettystring(member_casual)#1090, toprettystring(valid_ride_id)#1091, toprettystring(valid_time)#1092, toprettystring(valid_station)#1093, toprettystring(_source_file)#1094, toprettystring(_processed_dttm)#1095, toprettystring(_start_station_ride_num)#1096, toprettystring(year)#1097, toprettystring(month)#1098]
Arguments: isFinalPlan=true