vllm.config.kv_events ¶

KVEventsConfig ¶

Configuration for KV event publishing.

Source code in vllm/config/kv_events.py

@config
@dataclass
class KVEventsConfig:
    """Configuration for KV event publishing."""

    enable_kv_cache_events: bool = False
    """If True, enable KV cache events for tracking block storage and removal.
    Events can be published externally by zmq using the event publisher config.
    """

    publisher: Literal["null", "zmq"] = Field(default=None)
    """The publisher to use for publishing kv events. Can be "null", "zmq".
    """

    endpoint: str = "tcp://*:5557"
    """The zmq endpoint to use for publishing kv events.
    """

    replay_endpoint: str | None = None
    """The zmq endpoint to use for replaying kv events.
    """

    buffer_steps: int = 10_000
    """The number of steps to cache for replay endpoint. Will only save
    events from the last N steps for the replay endpoint.
    """

    hwm: int = 100_000
    """The zmq high water mark for the event publisher. After queueing N events,
    events will start dropping if the consumer is not keeping up.
    """

    max_queue_size: int = 100_000
    """The maximum number of events to queue while waiting for publishing.
    """

    topic: str = ""
    """The topic to use for the event publisher. Consumers can subscribe to
    this topic to receive events.
    """

    def __post_init__(self):
        if self.publisher is None:
            self.publisher = "zmq" if self.enable_kv_cache_events else "null"

buffer_steps `class-attribute` `instance-attribute` ¶

buffer_steps: int = 10000

The number of steps to cache for replay endpoint. Will only save events from the last N steps for the replay endpoint.

enable_kv_cache_events `class-attribute` `instance-attribute` ¶

enable_kv_cache_events: bool = False

If True, enable KV cache events for tracking block storage and removal. Events can be published externally by zmq using the event publisher config.

endpoint `class-attribute` `instance-attribute` ¶

endpoint: str = 'tcp://*:5557'

The zmq endpoint to use for publishing kv events.

hwm `class-attribute` `instance-attribute` ¶

hwm: int = 100000

The zmq high water mark for the event publisher. After queueing N events, events will start dropping if the consumer is not keeping up.

max_queue_size `class-attribute` `instance-attribute` ¶

max_queue_size: int = 100000

The maximum number of events to queue while waiting for publishing.

publisher `class-attribute` `instance-attribute` ¶

publisher: Literal['null', 'zmq'] = Field(default=None)

The publisher to use for publishing kv events. Can be "null", "zmq".

replay_endpoint `class-attribute` `instance-attribute` ¶

replay_endpoint: str | None = None

The zmq endpoint to use for replaying kv events.

topic `class-attribute` `instance-attribute` ¶

topic: str = ''

The topic to use for the event publisher. Consumers can subscribe to this topic to receive events.

__post_init__ ¶

__post_init__()

Source code in vllm/config/kv_events.py

def __post_init__(self):
    if self.publisher is None:
        self.publisher = "zmq" if self.enable_kv_cache_events else "null"

vllm.config.kv_events ¶

KVEventsConfig ¶

buffer_steps class-attribute instance-attribute ¶

enable_kv_cache_events class-attribute instance-attribute ¶

endpoint class-attribute instance-attribute ¶

hwm class-attribute instance-attribute ¶

max_queue_size class-attribute instance-attribute ¶

publisher class-attribute instance-attribute ¶

replay_endpoint class-attribute instance-attribute ¶

topic class-attribute instance-attribute ¶