spark_expectations.examples.base_setup.CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
module-attribute
¶spark_expectations.examples.base_setup.RULES_DATA = ' \n ("your_product", "dq_spark_local.customer_order", "row_dq", "customer_id_is_not_null", "customer_id", "customer_id is not null","drop", "validity", "customer_id ishould not be null", true, true,false, false, 0)\n ,("your_product", "dq_spark_local.customer_order", "row_dq", "sales_greater_than_zero", "sales", "sales > 2", "drop", "accuracy", "sales value should be greater than zero", true, true, true, false, 0)\n ,("your_product", "dq_spark_local.customer_order", "row_dq", "discount_threshold", "discount", "discount*100 < 60","drop", "validity", "discount should be less than 40", true, true, false, false, 0)\n ,("your_product", "dq_spark_local.customer_order", "row_dq", "ship_mode_in_set", "ship_mode", "lower(trim(ship_mode)) in(\'second class\', \'standard class\', \'standard class\')", "drop", "validity", "ship_mode mode belongs in the sets", true, true, false, false, 0)\n ,("your_product", "dq_spark_local.customer_order", "row_dq", "profit_threshold", "profit", "profit>0", "drop", "validity", "profit threshold should be greater tahn 0", true, true, false, true, 0)\n \n ,("your_product", "dq_spark_local.customer_order", "agg_dq", "sum_of_sales", "sales", "sum(sales)>10000", "ignore", "validity", "regex format validation for quantity", true, true, true, false, 0)\n ,("your_product", "dq_spark_local.customer_order", "agg_dq", "sum_of_quantity", "quantity", "sum(sales)>10000", "ignore", "validity", "regex format validation for quantity", true, true, true, false, 0)\n ,("your_product", "dq_spark_local.customer_order", "agg_dq", "distinct_of_ship_mode", "ship_mode", "count(distinct ship_mode)<=3", "ignore", "validity", "regex format validation for quantity", true, true, true, false, 0)\n ,("your_product", "dq_spark_local.customer_order", "agg_dq", "row_count", "*", "count(*)>=10000", "ignore", "validity", "regex format validation for quantity", true, true, true, false, 0)\n\n ,("your_product", "dq_spark_local.customer_order", "query_dq", "product_missing_count_threshold", "*", "((select count(distinct product_id) from product) - (select count(distinct product_id) from order))>(select count(distinct product_id) from product)*0.2", "ignore", "validity", "row count threshold", true, true, true, false, 0)\n ,("your_product", "dq_spark_local.customer_order", "query_dq", "product_category", "*", "(select count(distinct category) from product) < 5", "ignore", "validity", "distinct product category", true, true, true, false, 0)\n ,("your_product", "dq_spark_local.customer_order", "query_dq", "row_count_in_order", "*", "(select count(*) from order)<10000", "ignore", "accuracy", "count of the row in order dataset", true, true, true, false, 0)\n \n'
module-attribute
¶spark_expectations.examples.base_setup.RULES_TABLE_SCHEMA = ' ( product_id STRING,\n table_name STRING,\n rule_type STRING,\n rule STRING,\n column_name STRING,\n expectation STRING,\n action_if_failed STRING,\n tag STRING,\n description STRING,\n enable_for_source_dq_validation BOOLEAN, \n enable_for_target_dq_validation BOOLEAN,\n is_active BOOLEAN,\n enable_error_drop_alert BOOLEAN,\n error_drop_threshold INT )\n'
module-attribute
¶spark_expectations.examples.base_setup.add_kafka_jars(builder: SparkSession.builder) -> SparkSession.builder
¶spark_expectations/examples/base_setup.py
spark_expectations.examples.base_setup.set_up_bigquery(materialization_dataset: str) -> SparkSession
¶spark_expectations/examples/base_setup.py
spark_expectations.examples.base_setup.set_up_delta() -> SparkSession
¶spark_expectations/examples/base_setup.py
spark_expectations.examples.base_setup.set_up_iceberg() -> SparkSession
¶spark_expectations/examples/base_setup.py