digraph G {
0 [labelType="html" label="<br><b>AdaptiveSparkPlan</b><br><br>"];
1 [labelType="html" label="<b>Execute InsertIntoHadoopFsRelationCommand</b><br><br>task commit time total (min, med, max (stageId: taskId))<br>10.6 s (107 ms, 141 ms, 306 ms (stage 180.0: task 1140))<br>number of written files: 987<br>job commit time: 583 ms<br>number of output rows: 1,113,415<br>number of dynamic part: 15<br>written output: 357.4 MiB"];
2 [labelType="html" label="<br><b>WriteFiles</b><br><br>"];
subgraph cluster3 {
isCluster="true";
label="WholeStageCodegen (1)\n \nduration: total (min, med, max (stageId: taskId))\n1.9 m (1.1 s, 1.6 s, 2.3 s (stage 180.0: task 1134))";
4 [labelType="html" label="<b>Sort</b><br><br>sort time total (min, med, max (stageId: taskId))<br>78 ms (0 ms, 1 ms, 3 ms (stage 180.0: task 1144))<br>peak memory total (min, med, max (stageId: taskId))<br>2.3 GiB (32.2 MiB, 32.5 MiB, 33.0 MiB (stage 180.0: task 1129))<br>spill size total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 180.0: task 1128))"];
5 [labelType="html" label="<br><b>Project</b><br><br>"];
6 [labelType="html" label="<b>Filter</b><br><br>number of output rows: 1,113,415"];
}
7 [labelType="html" label="<b>InMemoryTableScan</b><br><br>number of output rows: 48,408,771"];
8 [labelType="html" label="<br><b>AdaptiveSparkPlan</b><br><br>"];
subgraph cluster9 {
isCluster="true";
label="WholeStageCodegen (3)\n \nduration: total (min, med, max (stageId: taskId))\n0 ms (0 ms, 0 ms, 0 ms (stage 180.0: task 1128))";
10 [labelType="html" label="<br><b>Project</b><br><br>"];
}
11 [labelType="html" label="<b>Window</b><br><br>spill size total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 180.0: task 1128))"];
subgraph cluster12 {
isCluster="true";
label="WholeStageCodegen (2)\n \nduration: total (min, med, max (stageId: taskId))\n0 ms (0 ms, 0 ms, 0 ms (stage 180.0: task 1128))";
13 [labelType="html" label="<b>Sort</b><br><br>sort time total (min, med, max (stageId: taskId))<br>0 ms (0 ms, 0 ms, 0 ms (stage 180.0: task 1128))<br>peak memory total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 180.0: task 1128))<br>spill size total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 180.0: task 1128))"];
}
14 [labelType="html" label="<br><b>AQEShuffleRead</b><br><br>"];
15 [labelType="html" label="<b>Exchange</b><br><br>shuffle records written: 0<br>local merged chunks fetched: 0<br>shuffle write time total (min, med, max (stageId: taskId))<br>0 ms (0 ms, 0 ms, 0 ms (stage 180.0: task 1128))<br>remote merged bytes read total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 180.0: task 1128))<br>local merged blocks fetched: 0<br>corrupt merged block chunks: 0<br>remote merged reqs duration total (min, med, max (stageId: taskId))<br>0 ms (0 ms, 0 ms, 0 ms (stage 180.0: task 1128))<br>remote merged blocks fetched: 0<br>records read: 0<br>local bytes read total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 180.0: task 1128))<br>fetch wait time total (min, med, max (stageId: taskId))<br>0 ms (0 ms, 0 ms, 0 ms (stage 180.0: task 1128))<br>remote bytes read total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 180.0: task 1128))<br>merged fetch fallback count: 0<br>local blocks read: 0<br>remote merged chunks fetched: 0<br>remote blocks read: 0<br>data size total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 180.0: task 1128))<br>local merged bytes read total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 180.0: task 1128))<br>number of partitions: 0<br>remote reqs duration total (min, med, max (stageId: taskId))<br>0 ms (0 ms, 0 ms, 0 ms (stage 180.0: task 1128))<br>remote bytes read to disk total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 180.0: task 1128))<br>shuffle bytes written total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 180.0: task 1128))"];
subgraph cluster16 {
isCluster="true";
label="WholeStageCodegen (1)\n \nduration: total (min, med, max (stageId: taskId))\n0 ms (0 ms, 0 ms, 0 ms (stage 180.0: task 1128))";
17 [labelType="html" label="<br><b>Project</b><br><br>"];
}
18 [labelType="html" label="<b>Scan csv </b><br><br>number of output rows: 0<br>number of files read: 0<br>metadata time total (min, med, max (stageId: taskId))<br>0 ms (0 ms, 0 ms, 0 ms (stage 180.0: task 1128))<br>size of files read total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 180.0: task 1128))"];
1->0;
2->1;
4->2;
5->4;
6->5;
7->6;
8->7;
10->8;
11->10;
13->11;
14->13;
15->14;
17->15;
18->17;
}
19
AdaptiveSparkPlan isFinalPlan=true
Execute InsertIntoHadoopFsRelationCommand s3a://rzvde-g8-kirsanov-dmitry/raw_invalid/citibike_data, false, [yyyymm#15715], CSV, [header=true, __partition_columns=["yyyymm"], path=s3a://rzvde-g8-kirsanov-dmitry/raw_invalid/citibike_data/], Overwrite, [ride_id, rideable_type, started_at, ended_at, start_station_name, start_station_id, end_station_name, end_station_id, start_lat, start_lng, end_lat, end_lng, member_casual, valid_ride_id, valid_time, valid_station, _source_file, _processed_dttm, _start_station_ride_num, year, month, yyyymm]
WriteFiles
Sort [yyyymm#15715 ASC NULLS FIRST], false, 0
Project [ride_id#10650, rideable_type#10651, started_at#10652, ended_at#10653, start_station_name#10654, start_station_id#10655, end_station_name#10656, end_station_id#10657, start_lat#10658, start_lng#10659, end_lat#10660, end_lng#10661, member_casual#10662, valid_ride_id#10641, valid_time#10642, valid_station#10643, _source_file#10644, _processed_dttm#10645, _start_station_ride_num#10647, year#10648, month#10649, empty2null(date_format(started_at#10652, yyyyMM, Some(Etc/UTC))) AS yyyymm#15715]
Filter ((((((NOT valid_ride_id#10641 OR NOT valid_time#10642) OR NOT valid_station#10643) OR isnull(ride_id#10650)) OR (ride_id#10650 = )) OR isnull(rideable_type#10651)) OR (rideable_type#10651 = ))
WholeStageCodegen (1)
InMemoryTableScan [_processed_dttm#10645, _source_file#10644, _start_station_ride_num#10647, end_lat#10660, end_lng#10661, end_station_id#10657, end_station_name#10656, ended_at#10653, member_casual#10662, month#10649, ride_id#10650, rideable_type#10651, start_lat#10658, start_lng#10659, start_station_id#10655, start_station_name#10654, started_at#10652, valid_ride_id#10641, valid_station#10643, valid_time#10642, year#10648], [((((((NOT valid_ride_id#10641 OR NOT valid_time#10642) OR NOT valid_station#10643) OR isnull(ride_id#10650)) OR (ride_id#10650 = )) OR isnull(rideable_type#10651)) OR (rideable_type#10651 = ))]
AdaptiveSparkPlan isFinalPlan=true
Project [ride_id#10530, rideable_type#10531, started_at#10532, ended_at#10533, start_station_name#10534, start_station_id#10655, end_station_name#10536, end_station_id#10657, start_lat#10538, start_lng#10539, end_lat#10540, end_lng#10541, member_casual#10542, valid_ride_id#10641, valid_time#10642, valid_station#10643, _source_file#10644, 2026-04-08 09:56:30.312891 AS _processed_dttm#10645, _start_station_ride_num#10647, year#10648, month#10649]
WholeStageCodegen (3)
Window [row_number() windowspecdefinition(start_station_id#10535, started_at#10532 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS _start_station_ride_num#10647], [start_station_id#10535], [started_at#10532 ASC NULLS FIRST]
Sort [start_station_id#10535 ASC NULLS FIRST, started_at#10532 ASC NULLS FIRST], false, 0
WholeStageCodegen (2)
AQEShuffleRead coalesced
Exchange hashpartitioning(start_station_id#10535, 200), ENSURE_REQUIREMENTS, [plan_id=2133]
Project [ride_id#10530, rideable_type#10531, started_at#10532, ended_at#10533, start_station_name#10534, cast(start_station_id#10535 as double) AS start_station_id#10655, end_station_name#10536, cast(end_station_id#10537 as double) AS end_station_id#10657, start_lat#10538, start_lng#10539, end_lat#10540, end_lng#10541, member_casual#10542, isnotnull(ride_id#10530) AS valid_ride_id#10641, (ended_at#10533 > started_at#10532) AS valid_time#10642, (((isnotnull(end_station_id#10537) AND isnotnull(start_station_id#10535)) AND NOT (end_station_id#10537 = start_station_id#10535)) <=> true) AS valid_station#10643, input_file_name() AS _source_file#10644, year(cast(started_at#10532 as date)) AS year#10648, month(cast(started_at#10532 as date)) AS month#10649, start_station_id#10535, started_at#10532]
WholeStageCodegen (1)
FileScan csv [ride_id#10530,rideable_type#10531,started_at#10532,ended_at#10533,start_station_name#10534,start_station_id#10535,end_station_name#10536,end_station_id#10537,start_lat#10538,start_lng#10539,end_lat#10540,end_lng#10541,member_casual#10542] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(55 paths)[s3a://rzvde-g8-kirsanov-dmitry/raw/citibike_data/202502/202502-citibi..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<ride_id:string,rideable_type:string,started_at:timestamp,ended_at:timestamp,start_station_...
== Physical Plan ==
AdaptiveSparkPlan (21)
+- == Final Plan ==
Execute InsertIntoHadoopFsRelationCommand (15)
+- WriteFiles (14)
+- * Sort (13)
+- * Project (12)
+- * Filter (11)
+- TableCacheQueryStage (10), Statistics(sizeInBytes=12.5 GiB, rowCount=4.84E+7)
+- InMemoryTableScan (1)
+- InMemoryRelation (2)
+- AdaptiveSparkPlan (9)
+- Project (8)
+- Window (7)
+- Sort (6)
+- Exchange (5)
+- Project (4)
+- Scan csv (3)
+- == Initial Plan ==
Execute InsertIntoHadoopFsRelationCommand (20)
+- WriteFiles (19)
+- Sort (18)
+- Project (17)
+- Filter (16)
+- InMemoryTableScan (1)
+- InMemoryRelation (2)
+- AdaptiveSparkPlan (9)
+- Project (8)
+- Window (7)
+- Sort (6)
+- Exchange (5)
+- Project (4)
+- Scan csv (3)
(1) InMemoryTableScan
Output [21]: [_processed_dttm#10645, _source_file#10644, _start_station_ride_num#10647, end_lat#10660, end_lng#10661, end_station_id#10657, end_station_name#10656, ended_at#10653, member_casual#10662, month#10649, ride_id#10650, rideable_type#10651, start_lat#10658, start_lng#10659, start_station_id#10655, start_station_name#10654, started_at#10652, valid_ride_id#10641, valid_station#10643, valid_time#10642, year#10648]
Arguments: [_processed_dttm#10645, _source_file#10644, _start_station_ride_num#10647, end_lat#10660, end_lng#10661, end_station_id#10657, end_station_name#10656, ended_at#10653, member_casual#10662, month#10649, ride_id#10650, rideable_type#10651, start_lat#10658, start_lng#10659, start_station_id#10655, start_station_name#10654, started_at#10652, valid_ride_id#10641, valid_station#10643, valid_time#10642, year#10648], [((((((NOT valid_ride_id#10641 OR NOT valid_time#10642) OR NOT valid_station#10643) OR isnull(ride_id#10650)) OR (ride_id#10650 = )) OR isnull(rideable_type#10651)) OR (rideable_type#10651 = ))]
(2) InMemoryRelation
Arguments: [ride_id#10650, rideable_type#10651, started_at#10652, ended_at#10653, start_station_name#10654, start_station_id#10655, end_station_name#10656, end_station_id#10657, start_lat#10658, start_lng#10659, end_lat#10660, end_lng#10661, member_casual#10662, valid_ride_id#10641, valid_time#10642, valid_station#10643, _source_file#10644, _processed_dttm#10645, _start_station_ride_num#10647, year#10648, month#10649], CachedRDDBuilder(org.apache.spark.sql.execution.columnar.DefaultCachedBatchSerializer@43d79ed6,StorageLevel(disk, memory, deserialized, 1 replicas),AdaptiveSparkPlan isFinalPlan=true
+- == Final Plan ==
*(3) Project [ride_id#10530, rideable_type#10531, started_at#10532, ended_at#10533, start_station_name#10534, start_station_id#10655, end_station_name#10536, end_station_id#10657, start_lat#10538, start_lng#10539, end_lat#10540, end_lng#10541, member_casual#10542, valid_ride_id#10641, valid_time#10642, valid_station#10643, _source_file#10644, 2026-04-08 09:56:30.312891 AS _processed_dttm#10645, _start_station_ride_num#10647, year#10648, month#10649]
+- Window [row_number() windowspecdefinition(start_station_id#10535, started_at#10532 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS _start_station_ride_num#10647], [start_station_id#10535], [started_at#10532 ASC NULLS FIRST]
+- *(2) Sort [start_station_id#10535 ASC NULLS FIRST, started_at#10532 ASC NULLS FIRST], false, 0
+- AQEShuffleRead coalesced
+- ShuffleQueryStage 0
+- Exchange hashpartitioning(start_station_id#10535, 200), ENSURE_REQUIREMENTS, [plan_id=2133]
+- *(1) Project [ride_id#10530, rideable_type#10531, started_at#10532, ended_at#10533, start_station_name#10534, cast(start_station_id#10535 as double) AS start_station_id#10655, end_station_name#10536, cast(end_station_id#10537 as double) AS end_station_id#10657, start_lat#10538, start_lng#10539, end_lat#10540, end_lng#10541, member_casual#10542, isnotnull(ride_id#10530) AS valid_ride_id#10641, (ended_at#10533 > started_at#10532) AS valid_time#10642, (((isnotnull(end_station_id#10537) AND isnotnull(start_station_id#10535)) AND NOT (end_station_id#10537 = start_station_id#10535)) <=> true) AS valid_station#10643, input_file_name() AS _source_file#10644, year(cast(started_at#10532 as date)) AS year#10648, month(cast(started_at#10532 as date)) AS month#10649, start_station_id#10535, started_at#10532]
+- FileScan csv [ride_id#10530,rideable_type#10531,started_at#10532,ended_at#10533,start_station_name#10534,start_station_id#10535,end_station_name#10536,end_station_id#10537,start_lat#10538,start_lng#10539,end_lat#10540,end_lng#10541,member_casual#10542] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(55 paths)[s3a://rzvde-g8-kirsanov-dmitry/raw/citibike_data/202502/202502-citibi..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<ride_id:string,rideable_type:string,started_at:timestamp,ended_at:timestamp,start_station_...
+- == Initial Plan ==
Project [ride_id#10530, rideable_type#10531, started_at#10532, ended_at#10533, start_station_name#10534, start_station_id#10655, end_station_name#10536, end_station_id#10657, start_lat#10538, start_lng#10539, end_lat#10540, end_lng#10541, member_casual#10542, valid_ride_id#10641, valid_time#10642, valid_station#10643, _source_file#10644, 2026-04-08 09:56:30.312891 AS _processed_dttm#10645, _start_station_ride_num#10647, year#10648, month#10649]
+- Window [row_number() windowspecdefinition(start_station_id#10535, started_at#10532 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS _start_station_ride_num#10647], [start_station_id#10535], [started_at#10532 ASC NULLS FIRST]
+- Sort [start_station_id#10535 ASC NULLS FIRST, started_at#10532 ASC NULLS FIRST], false, 0
+- Exchange hashpartitioning(start_station_id#10535, 200), ENSURE_REQUIREMENTS, [plan_id=2095]
+- Project [ride_id#10530, rideable_type#10531, started_at#10532, ended_at#10533, start_station_name#10534, cast(start_station_id#10535 as double) AS start_station_id#10655, end_station_name#10536, cast(end_station_id#10537 as double) AS end_station_id#10657, start_lat#10538, start_lng#10539, end_lat#10540, end_lng#10541, member_casual#10542, isnotnull(ride_id#10530) AS valid_ride_id#10641, (ended_at#10533 > started_at#10532) AS valid_time#10642, (((isnotnull(end_station_id#10537) AND isnotnull(start_station_id#10535)) AND NOT (end_station_id#10537 = start_station_id#10535)) <=> true) AS valid_station#10643, input_file_name() AS _source_file#10644, year(cast(started_at#10532 as date)) AS year#10648, month(cast(started_at#10532 as date)) AS month#10649, start_station_id#10535, started_at#10532]
+- FileScan csv [ride_id#10530,rideable_type#10531,started_at#10532,ended_at#10533,start_station_name#10534,start_station_id#10535,end_station_name#10536,end_station_id#10537,start_lat#10538,start_lng#10539,end_lat#10540,end_lng#10541,member_casual#10542] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(55 paths)[s3a://rzvde-g8-kirsanov-dmitry/raw/citibike_data/202502/202502-citibi..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<ride_id:string,rideable_type:string,started_at:timestamp,ended_at:timestamp,start_station_...
,None)
(3) Scan csv
Output [13]: [ride_id#10530, rideable_type#10531, started_at#10532, ended_at#10533, start_station_name#10534, start_station_id#10535, end_station_name#10536, end_station_id#10537, start_lat#10538, start_lng#10539, end_lat#10540, end_lng#10541, member_casual#10542]
Batched: false
Location: InMemoryFileIndex [s3a://rzvde-g8-kirsanov-dmitry/raw/citibike_data/202502/202502-citibike-tripdata-part00.csv, ... 54 entries]
ReadSchema: struct<ride_id:string,rideable_type:string,started_at:timestamp,ended_at:timestamp,start_station_name:string,start_station_id:string,end_station_name:string,end_station_id:string,start_lat:double,start_lng:double,end_lat:double,end_lng:double,member_casual:string>
(4) Project
Output [21]: [ride_id#10530, rideable_type#10531, started_at#10532, ended_at#10533, start_station_name#10534, cast(start_station_id#10535 as double) AS start_station_id#10655, end_station_name#10536, cast(end_station_id#10537 as double) AS end_station_id#10657, start_lat#10538, start_lng#10539, end_lat#10540, end_lng#10541, member_casual#10542, isnotnull(ride_id#10530) AS valid_ride_id#10641, (ended_at#10533 > started_at#10532) AS valid_time#10642, (((isnotnull(end_station_id#10537) AND isnotnull(start_station_id#10535)) AND NOT (end_station_id#10537 = start_station_id#10535)) <=> true) AS valid_station#10643, input_file_name() AS _source_file#10644, year(cast(started_at#10532 as date)) AS year#10648, month(cast(started_at#10532 as date)) AS month#10649, start_station_id#10535, started_at#10532]
Input [13]: [ride_id#10530, rideable_type#10531, started_at#10532, ended_at#10533, start_station_name#10534, start_station_id#10535, end_station_name#10536, end_station_id#10537, start_lat#10538, start_lng#10539, end_lat#10540, end_lng#10541, member_casual#10542]
(5) Exchange
Input [21]: [ride_id#10530, rideable_type#10531, started_at#10532, ended_at#10533, start_station_name#10534, start_station_id#10655, end_station_name#10536, end_station_id#10657, start_lat#10538, start_lng#10539, end_lat#10540, end_lng#10541, member_casual#10542, valid_ride_id#10641, valid_time#10642, valid_station#10643, _source_file#10644, year#10648, month#10649, start_station_id#10535, started_at#10532]
Arguments: hashpartitioning(start_station_id#10535, 200), ENSURE_REQUIREMENTS, [plan_id=2660]
(6) Sort
Input [21]: [ride_id#10530, rideable_type#10531, started_at#10532, ended_at#10533, start_station_name#10534, start_station_id#10655, end_station_name#10536, end_station_id#10657, start_lat#10538, start_lng#10539, end_lat#10540, end_lng#10541, member_casual#10542, valid_ride_id#10641, valid_time#10642, valid_station#10643, _source_file#10644, year#10648, month#10649, start_station_id#10535, started_at#10532]
Arguments: [start_station_id#10535 ASC NULLS FIRST, started_at#10532 ASC NULLS FIRST], false, 0
(7) Window
Input [21]: [ride_id#10530, rideable_type#10531, started_at#10532, ended_at#10533, start_station_name#10534, start_station_id#10655, end_station_name#10536, end_station_id#10657, start_lat#10538, start_lng#10539, end_lat#10540, end_lng#10541, member_casual#10542, valid_ride_id#10641, valid_time#10642, valid_station#10643, _source_file#10644, year#10648, month#10649, start_station_id#10535, started_at#10532]
Arguments: [row_number() windowspecdefinition(start_station_id#10535, started_at#10532 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS _start_station_ride_num#10647], [start_station_id#10535], [started_at#10532 ASC NULLS FIRST]
(8) Project
Output [21]: [ride_id#10530, rideable_type#10531, started_at#10532, ended_at#10533, start_station_name#10534, start_station_id#10655, end_station_name#10536, end_station_id#10657, start_lat#10538, start_lng#10539, end_lat#10540, end_lng#10541, member_casual#10542, valid_ride_id#10641, valid_time#10642, valid_station#10643, _source_file#10644, 2026-04-08 09:56:30.312891 AS _processed_dttm#10645, _start_station_ride_num#10647, year#10648, month#10649]
Input [22]: [ride_id#10530, rideable_type#10531, started_at#10532, ended_at#10533, start_station_name#10534, start_station_id#10655, end_station_name#10536, end_station_id#10657, start_lat#10538, start_lng#10539, end_lat#10540, end_lng#10541, member_casual#10542, valid_ride_id#10641, valid_time#10642, valid_station#10643, _source_file#10644, year#10648, month#10649, start_station_id#10535, started_at#10532, _start_station_ride_num#10647]
(9) AdaptiveSparkPlan
Output [21]: [ride_id#10530, rideable_type#10531, started_at#10532, ended_at#10533, start_station_name#10534, start_station_id#10655, end_station_name#10536, end_station_id#10657, start_lat#10538, start_lng#10539, end_lat#10540, end_lng#10541, member_casual#10542, valid_ride_id#10641, valid_time#10642, valid_station#10643, _source_file#10644, _processed_dttm#10645, _start_station_ride_num#10647, year#10648, month#10649]
Arguments: isFinalPlan=false
(10) TableCacheQueryStage
Output [21]: [_processed_dttm#10645, _source_file#10644, _start_station_ride_num#10647, end_lat#10660, end_lng#10661, end_station_id#10657, end_station_name#10656, ended_at#10653, member_casual#10662, month#10649, ride_id#10650, rideable_type#10651, start_lat#10658, start_lng#10659, start_station_id#10655, start_station_name#10654, started_at#10652, valid_ride_id#10641, valid_station#10643, valid_time#10642, year#10648]
Arguments: 0
(11) Filter [codegen id : 1]
Input [21]: [_processed_dttm#10645, _source_file#10644, _start_station_ride_num#10647, end_lat#10660, end_lng#10661, end_station_id#10657, end_station_name#10656, ended_at#10653, member_casual#10662, month#10649, ride_id#10650, rideable_type#10651, start_lat#10658, start_lng#10659, start_station_id#10655, start_station_name#10654, started_at#10652, valid_ride_id#10641, valid_station#10643, valid_time#10642, year#10648]
Condition : ((((((NOT valid_ride_id#10641 OR NOT valid_time#10642) OR NOT valid_station#10643) OR isnull(ride_id#10650)) OR (ride_id#10650 = )) OR isnull(rideable_type#10651)) OR (rideable_type#10651 = ))
(12) Project [codegen id : 1]
Output [22]: [ride_id#10650, rideable_type#10651, started_at#10652, ended_at#10653, start_station_name#10654, start_station_id#10655, end_station_name#10656, end_station_id#10657, start_lat#10658, start_lng#10659, end_lat#10660, end_lng#10661, member_casual#10662, valid_ride_id#10641, valid_time#10642, valid_station#10643, _source_file#10644, _processed_dttm#10645, _start_station_ride_num#10647, year#10648, month#10649, empty2null(date_format(started_at#10652, yyyyMM, Some(Etc/UTC))) AS yyyymm#15715]
Input [21]: [_processed_dttm#10645, _source_file#10644, _start_station_ride_num#10647, end_lat#10660, end_lng#10661, end_station_id#10657, end_station_name#10656, ended_at#10653, member_casual#10662, month#10649, ride_id#10650, rideable_type#10651, start_lat#10658, start_lng#10659, start_station_id#10655, start_station_name#10654, started_at#10652, valid_ride_id#10641, valid_station#10643, valid_time#10642, year#10648]
(13) Sort [codegen id : 1]
Input [22]: [ride_id#10650, rideable_type#10651, started_at#10652, ended_at#10653, start_station_name#10654, start_station_id#10655, end_station_name#10656, end_station_id#10657, start_lat#10658, start_lng#10659, end_lat#10660, end_lng#10661, member_casual#10662, valid_ride_id#10641, valid_time#10642, valid_station#10643, _source_file#10644, _processed_dttm#10645, _start_station_ride_num#10647, year#10648, month#10649, yyyymm#15715]
Arguments: [yyyymm#15715 ASC NULLS FIRST], false, 0
(14) WriteFiles
Input [22]: [ride_id#10650, rideable_type#10651, started_at#10652, ended_at#10653, start_station_name#10654, start_station_id#10655, end_station_name#10656, end_station_id#10657, start_lat#10658, start_lng#10659, end_lat#10660, end_lng#10661, member_casual#10662, valid_ride_id#10641, valid_time#10642, valid_station#10643, _source_file#10644, _processed_dttm#10645, _start_station_ride_num#10647, year#10648, month#10649, yyyymm#15715]
(15) Execute InsertIntoHadoopFsRelationCommand
Input: []
Arguments: s3a://rzvde-g8-kirsanov-dmitry/raw_invalid/citibike_data, false, [yyyymm#15715], CSV, [header=true, __partition_columns=["yyyymm"], path=s3a://rzvde-g8-kirsanov-dmitry/raw_invalid/citibike_data/], Overwrite, [ride_id, rideable_type, started_at, ended_at, start_station_name, start_station_id, end_station_name, end_station_id, start_lat, start_lng, end_lat, end_lng, member_casual, valid_ride_id, valid_time, valid_station, _source_file, _processed_dttm, _start_station_ride_num, year, month, yyyymm]
(16) Filter
Input [21]: [_processed_dttm#10645, _source_file#10644, _start_station_ride_num#10647, end_lat#10660, end_lng#10661, end_station_id#10657, end_station_name#10656, ended_at#10653, member_casual#10662, month#10649, ride_id#10650, rideable_type#10651, start_lat#10658, start_lng#10659, start_station_id#10655, start_station_name#10654, started_at#10652, valid_ride_id#10641, valid_station#10643, valid_time#10642, year#10648]
Condition : ((((((NOT valid_ride_id#10641 OR NOT valid_time#10642) OR NOT valid_station#10643) OR isnull(ride_id#10650)) OR (ride_id#10650 = )) OR isnull(rideable_type#10651)) OR (rideable_type#10651 = ))
(17) Project
Output [22]: [ride_id#10650, rideable_type#10651, started_at#10652, ended_at#10653, start_station_name#10654, start_station_id#10655, end_station_name#10656, end_station_id#10657, start_lat#10658, start_lng#10659, end_lat#10660, end_lng#10661, member_casual#10662, valid_ride_id#10641, valid_time#10642, valid_station#10643, _source_file#10644, _processed_dttm#10645, _start_station_ride_num#10647, year#10648, month#10649, empty2null(date_format(started_at#10652, yyyyMM, Some(Etc/UTC))) AS yyyymm#15715]
Input [21]: [_processed_dttm#10645, _source_file#10644, _start_station_ride_num#10647, end_lat#10660, end_lng#10661, end_station_id#10657, end_station_name#10656, ended_at#10653, member_casual#10662, month#10649, ride_id#10650, rideable_type#10651, start_lat#10658, start_lng#10659, start_station_id#10655, start_station_name#10654, started_at#10652, valid_ride_id#10641, valid_station#10643, valid_time#10642, year#10648]
(18) Sort
Input [22]: [ride_id#10650, rideable_type#10651, started_at#10652, ended_at#10653, start_station_name#10654, start_station_id#10655, end_station_name#10656, end_station_id#10657, start_lat#10658, start_lng#10659, end_lat#10660, end_lng#10661, member_casual#10662, valid_ride_id#10641, valid_time#10642, valid_station#10643, _source_file#10644, _processed_dttm#10645, _start_station_ride_num#10647, year#10648, month#10649, yyyymm#15715]
Arguments: [yyyymm#15715 ASC NULLS FIRST], false, 0
(19) WriteFiles
Input [22]: [ride_id#10650, rideable_type#10651, started_at#10652, ended_at#10653, start_station_name#10654, start_station_id#10655, end_station_name#10656, end_station_id#10657, start_lat#10658, start_lng#10659, end_lat#10660, end_lng#10661, member_casual#10662, valid_ride_id#10641, valid_time#10642, valid_station#10643, _source_file#10644, _processed_dttm#10645, _start_station_ride_num#10647, year#10648, month#10649, yyyymm#15715]
(20) Execute InsertIntoHadoopFsRelationCommand
Input: []
Arguments: s3a://rzvde-g8-kirsanov-dmitry/raw_invalid/citibike_data, false, [yyyymm#15715], CSV, [header=true, __partition_columns=["yyyymm"], path=s3a://rzvde-g8-kirsanov-dmitry/raw_invalid/citibike_data/], Overwrite, [ride_id, rideable_type, started_at, ended_at, start_station_name, start_station_id, end_station_name, end_station_id, start_lat, start_lng, end_lat, end_lng, member_casual, valid_ride_id, valid_time, valid_station, _source_file, _processed_dttm, _start_station_ride_num, year, month, yyyymm]
(21) AdaptiveSparkPlan
Output: []
Arguments: isFinalPlan=true