easy_sql.sql_processor.funcs_spark

Module Contents

Classes

AlertFunc

AnalyticsFuncs

CacheFuncs

ColumnFuncs

IOFuncs

LangFuncs

ModelFuncs

ParallelismFuncs

PartitionFuncs

TableFuncs

TestFuncs

class easy_sql.sql_processor.funcs_spark.AlertFunc(backend, alerter)
Parameters
alert(self, step, context, rule_name, pass_condition, alert_template, mentioned_users)
Parameters
alert_exception_handler(self, rule_name, mentioned_users)
Parameters
  • rule_name (str) –

  • mentioned_users (str) –

alert_with_backend(self, backend, step, context, rule_name, pass_condition, alert_template, mentioned_users)
Parameters
class easy_sql.sql_processor.funcs_spark.AnalyticsFuncs(backend)
Parameters

backend (easy_sql.sql_processor.backend.Backend) –

data_profiling_report(self, table, query, output_folder, max_count='50000', include_correlations='true', types='html')
Parameters
  • table (str) –

  • query (str) –

  • output_folder (str) –

  • max_count (str) –

  • include_correlations (str) –

  • types (Union[str, List[str]]) –

class easy_sql.sql_processor.funcs_spark.CacheFuncs(spark)
Parameters

spark (pyspark.sql.SparkSession) –

unpersist(self, table_name)
Parameters

table_name (str) –

class easy_sql.sql_processor.funcs_spark.ColumnFuncs(backend)
Parameters

backend (easy_sql.sql_processor.backend.Backend) –

all_cols_prefixed_with_exclusion_expr(self, table_name, prefix, *cols_to_exclude)
Parameters
  • table_name (str) –

  • prefix (str) –

  • cols_to_exclude (str) –

Return type

str

all_cols_with_exclusion_expr(self, table_name, *cols_to_exclude)
Parameters
  • table_name (str) –

  • cols_to_exclude (str) –

Return type

str

all_cols_without_one_expr(self, table_name, *cols_to_exclude)
Parameters
  • table_name (str) –

  • cols_to_exclude (str) –

Return type

str

class easy_sql.sql_processor.funcs_spark.IOFuncs(spark)

Bases: easy_sql.sql_processor.funcs_common.IOFuncs

digraph inheritancea04ea7b936 { bgcolor=transparent; rankdir=LR; size="8.0, 12.0"; "IOFuncs" [fillcolor=white,fontname="Vera Sans, DejaVu Sans, Liberation Sans, Arial, Helvetica, sans",fontsize=10,height=0.25,shape=box,style="setlinewidth(0.5),filled"]; "IOFuncs" [URL="#easy_sql.sql_processor.funcs_spark.IOFuncs",fillcolor=white,fontname="Vera Sans, DejaVu Sans, Liberation Sans, Arial, Helvetica, sans",fontsize=10,height=0.25,shape=box,style="setlinewidth(0.5),filled",target="_top"]; "IOFuncs" -> "IOFuncs" [arrowsize=0.5,style="setlinewidth(0.5)"]; }
Parameters

spark (pyspark.sql.SparkSession) –

rename_csv_output(self, spark_output_path, to_file)
Parameters
  • spark_output_path (str) –

  • to_file (str) –

update_json_local(self, context, vars, list_vars, json_attr, output_file)
Parameters
write_csv(self, table, output_file)
Parameters
  • table (str) –

  • output_file (str) –

write_json_local(self, table, output_file)
Parameters
  • table (str) –

  • output_file (str) –

class easy_sql.sql_processor.funcs_spark.LangFuncs(backend)
Parameters

backend (easy_sql.sql_processor.backend.SparkBackend) –

call_java(self, cls, func_name, *args)
Parameters
  • cls (str) –

  • func_name (str) –

Return type

str

class easy_sql.sql_processor.funcs_spark.ModelFuncs(spark)
Parameters

spark (pyspark.sql.SparkSession) –

model_predict(self, model_save_path, table_name, feature_cols, id_col, output_ref_cols)
Parameters
  • model_save_path (str) –

  • table_name (str) –

  • feature_cols (str) –

  • id_col (str) –

  • output_ref_cols (str) –

class easy_sql.sql_processor.funcs_spark.ParallelismFuncs(spark)
Parameters

spark (pyspark.sql.SparkSession) –

coalesce(self, table, partitions)
Parameters
  • table (str) –

  • partitions (str) –

repartition(self, table, partitions)
Parameters
  • table (str) –

  • partitions (str) –

repartition_by_column(self, table, partitions)
Parameters
  • table (str) –

  • partitions (str) –

set_shuffle_partitions(self, partitions)
Parameters

partitions (str) –

class easy_sql.sql_processor.funcs_spark.PartitionFuncs(backend)

Bases: easy_sql.sql_processor.funcs_common.PartitionFuncs

digraph inheritanced6623e250e { bgcolor=transparent; rankdir=LR; size="8.0, 12.0"; "PartitionFuncs" [URL="../funcs_common/index.html#easy_sql.sql_processor.funcs_common.PartitionFuncs",fillcolor=white,fontname="Vera Sans, DejaVu Sans, Liberation Sans, Arial, Helvetica, sans",fontsize=10,height=0.25,shape=box,style="setlinewidth(0.5),filled",target="_top"]; "PartitionFuncs" [URL="#easy_sql.sql_processor.funcs_spark.PartitionFuncs",fillcolor=white,fontname="Vera Sans, DejaVu Sans, Liberation Sans, Arial, Helvetica, sans",fontsize=10,height=0.25,shape=box,style="setlinewidth(0.5),filled",target="_top"]; "PartitionFuncs" -> "PartitionFuncs" [arrowsize=0.5,style="setlinewidth(0.5)"]; }
Parameters

backend (Union[pyspark.sql.SparkSession, easy_sql.sql_processor.backend.SparkBackend]) –

get_partition_cols(self, table_name)
Parameters

table_name (str) –

Return type

List[str]

class easy_sql.sql_processor.funcs_spark.TableFuncs(backend)
Parameters

backend (easy_sql.sql_processor.backend.Backend) –

check_not_null_column_in_table(self, step, table_name, not_null_column, query=None)
Parameters
Return type

bool

ensure_no_null_data_in_table(self, step, table_name, query=None)
Parameters
Return type

bool

class easy_sql.sql_processor.funcs_spark.TestFuncs(backend)
Parameters

backend (easy_sql.sql_processor.backend.Backend) –

sleep(self, secs)
Parameters

secs (str) –