Source code for feast_spark.constants

from typing import Optional

from feast.constants import ConfigMeta


[docs]class ConfigOptions(metaclass=ConfigMeta): #: Default Feast Job Service URL JOB_SERVICE_URL: Optional[str] = None #: Enable or disable TLS/SSL to Feast Job Service JOB_SERVICE_ENABLE_SSL: str = "False" #: Path to certificate(s) to secure connection to Feast Job Service JOB_SERVICE_SERVER_SSL_CERT: str = "" #: Enable or disable control loop for Feast Job Service JOB_SERVICE_ENABLE_CONTROL_LOOP: str = "False" #: If set to True, Control Loop will try to start failed streaming jobss JOB_SERVICE_RETRY_FAILED_JOBS: str = "False" #: Pause in seconds between starting new jobs in Control Loop JOB_SERVICE_PAUSE_BETWEEN_JOBS: str = "5" #: Port for which Prometheus metric server will be running on JOB_SERVICE_PROMETHEUS_METRIC_PORT: int = 8080 #: Endpoint URL for S3 storage_client S3_ENDPOINT_URL: Optional[str] = None #: Spark Job launcher. The choice of storage is connected to the choice of SPARK_LAUNCHER. #: #: Options: "standalone", "dataproc", "emr" SPARK_LAUNCHER: Optional[str] = None #: Feast Spark Job ingestion jobs staging location. The choice of storage is connected to the choice of SPARK_LAUNCHER. #: #: Eg. gs://some-bucket/output/, s3://some-bucket/output/, file:///data/subfolder/ SPARK_STAGING_LOCATION: Optional[str] = None #: Feast Spark Job ingestion jar file. The choice of storage is connected to the choice of SPARK_LAUNCHER. #: #: Eg. "dataproc" (http and gs), "emr" (http and s3), "standalone" (http and file) SPARK_INGESTION_JAR: str = "https://storage.googleapis.com/feast-jobs/spark/ingestion/feast-ingestion-spark-develop.jar" #: Spark resource manager master url SPARK_STANDALONE_MASTER: str = "local[*]" #: Directory where Spark is installed SPARK_HOME: Optional[str] = None #: The project id where the materialized view of BigQuerySource is going to be created #: by default, use the same project where view is located SPARK_BQ_MATERIALIZATION_PROJECT: Optional[str] = None #: The dataset id where the materialized view of BigQuerySource is going to be created #: by default, use the same dataset where view is located SPARK_BQ_MATERIALIZATION_DATASET: Optional[str] = None #: Dataproc cluster to run Feast Spark Jobs in DATAPROC_CLUSTER_NAME: Optional[str] = None #: Project of Dataproc cluster DATAPROC_PROJECT: Optional[str] = None #: Region of Dataproc cluster DATAPROC_REGION: Optional[str] = None #: No. of executor instances for Dataproc cluster DATAPROC_EXECUTOR_INSTANCES = "2" #: No. of executor cores for Dataproc cluster DATAPROC_EXECUTOR_CORES = "2" #: No. of executor memory for Dataproc cluster DATAPROC_EXECUTOR_MEMORY = "2g" # namespace to use for Spark jobs launched using k8s spark operator SPARK_K8S_NAMESPACE = "default" # expect k8s spark operator to be running in the same cluster as Feast SPARK_K8S_USE_INCLUSTER_CONFIG = "True" # SparkApplication resource template SPARK_K8S_JOB_TEMPLATE_PATH = None # SparkApplication resource template for Batch Ingestion Jobs SPARK_K8S_BATCH_INGESTION_TEMPLATE_PATH: Optional[str] = "" # SparkApplication resource template for Stream Ingestion Jobs SPARK_K8S_STREAM_INGESTION_TEMPLATE_PATH: Optional[str] = "" # SparkApplication resource template for Historical Retrieval Jobs SPARK_K8S_HISTORICAL_RETRIEVAL_TEMPLATE_PATH: Optional[str] = "" #: Default Redis host to Redis Instance which stores Spark Ingestion Job metrics SPARK_METRICS_REDIS_HOST: Optional[str] = None #: Default Redis port to Redis Instance which stores Spark Ingestion Job metrics SPARK_METRICS_REDIS_PORT: Optional[str] = None #: Host to Redis Instance which stores locks for job management LOCK_MGR_REDIS_HOST: Optional[str] = None #: Port to Redis Instance which stores locks for job management LOCK_MGR_REDIS_PORT: Optional[str] = None #: TTL for locks for job management LOCK_EXPIRY: Optional[str] = "60" #: File format of historical retrieval features HISTORICAL_FEATURE_OUTPUT_FORMAT: str = "parquet" #: File location of historical retrieval features HISTORICAL_FEATURE_OUTPUT_LOCATION: Optional[str] = None #: Default Redis host REDIS_HOST: Optional[str] = "" #: Default Redis port REDIS_PORT: Optional[str] = "" #: Redis credentials REDIS_PASSWORD: Optional[str] = "" #: Enable or disable TLS/SSL to Redis REDIS_SSL: Optional[str] = "False" #: BigTable Project ID BIGTABLE_PROJECT: Optional[str] = "" #: BigTable Instance ID BIGTABLE_INSTANCE: Optional[str] = "" #: Cassandra host. Can be a comma separated string CASSANDRA_HOST: Optional[str] = "" #: Cassandra port CASSANDRA_PORT: Optional[str] = "" #: Enable or disable StatsD STATSD_ENABLED: str = "False" #: Default StatsD port STATSD_HOST: Optional[str] = None #: Default StatsD port STATSD_PORT: Optional[str] = None #: Ingestion Job DeadLetter Destination. The choice of storage is connected to the choice of SPARK_LAUNCHER. #: #: Eg. gs://some-bucket/output/, s3://some-bucket/output/, file:///data/subfolder/ DEADLETTER_PATH: str = "" #: Ingestion Job Checkpoint Location. Format same as for DeadLetter path CHECKPOINT_PATH: str = "" #: ProtoRegistry Address (currently only Stencil Server is supported as registry) #: https://github.com/odpf/stencil STENCIL_URL: str = "" #: Bearer token used for authentication with Stencil Server STENCIL_TOKEN: str = "" #: If set to true rows that do not pass custom validation (see feast.contrib.validation) #: won't be saved to Online Storage INGESTION_DROP_INVALID_ROWS: str = "False" #: EMR cluster to run Feast Spark Jobs in EMR_CLUSTER_ID: Optional[str] = None #: Region of EMR cluster EMR_REGION: Optional[str] = None #: Template path of EMR cluster EMR_CLUSTER_TEMPLATE_PATH: Optional[str] = None #: Log path of EMR cluster EMR_LOG_LOCATION: Optional[str] = None #: Whitelisted Feast Job Types WHITELISTED_JOB_TYPES: Optional[str] = None #: Whitelisted Feast projects WHITELISTED_PROJECTS: Optional[str] = None #: File path to a whitelist containing all the feature tables allowed for ingestion. #: Each line in the file should be in the format of <project>:<feature table> WHITELISTED_FEATURE_TABLES_PATH: Optional[str] = None #: If set - streaming ingestion job will be consuming incoming rows not continuously, #: but periodically with configured interval (in seconds). #: That may help to control amount of write requests to storage SPARK_STREAMING_TRIGGERING_INTERVAL: Optional[str] = None #: GCP project of the BigQuery dataset used to stage the entities during historical #: feature retrieval. If not set, the GCP project of the feature table batch source #: will be used instead. BQ_STAGING_PROJECT: Optional[str] = None #: BigQuery dataset used to stage the entities during historical feature retrieval. # If not set, the BigQuery dataset of the batch source will be used #: instead. BQ_STAGING_DATASET: Optional[str] = None
[docs] def defaults(self): return { k: getattr(self, k) for k in self.__config_keys__ if getattr(self, k) is not None }