module-attribute  ¶
 EMBED_DTYPE_TO_NUMPY_DTYPE_VIEW = {
    "float32": float32,
    "float16": float16,
    "bfloat16": float16,
    "fp8_e4m3": uint8,
    "fp8_e5m2": uint8,
}
 module-attribute  ¶
 EMBED_DTYPE_TO_TORCH_DTYPE = {
    "float32": float32,
    "float16": float16,
    "bfloat16": bfloat16,
    "fp8_e4m3": float8_e4m3fn,
    "fp8_e5m2": float8_e5m2,
}
 module-attribute  ¶
 EMBED_DTYPE_TO_TORCH_DTYPE_VIEW = {
    "float32": float32,
    "float16": float16,
    "bfloat16": float16,
    "fp8_e4m3": uint8,
    "fp8_e5m2": uint8,
}
 module-attribute  ¶
 EmbedDType = Literal[
    "float32", "float16", "bfloat16", "fp8_e4m3", "fp8_e5m2"
]
 dataclass  ¶
 Source code in vllm/utils/serial_utils.py
   
 __init__(
    index: int,
    embed_dtype: EmbedDType,
    endianness: Endianness,
    start: int,
    end: int,
    shape: tuple[int, ...],
) -> None
 
 binary2tensor(
    binary: bytes,
    shape: tuple[int, ...],
    embed_dtype: EmbedDType,
    endianness: Endianness,
) -> Tensor
Source code in vllm/utils/serial_utils.py
  
 decode_pooling_output(
    items: list[MetadataItem], body: bytes
) -> list[Tensor]
Source code in vllm/utils/serial_utils.py
  
 encode_pooling_bytes(
    pooling_outputs: list[PoolingRequestOutput],
    embed_dtype: EmbedDType,
    endianness: Endianness,
)
Source code in vllm/utils/serial_utils.py
  
 encode_pooling_output(
    output: PoolingRequestOutput,
    encoding_format: EncodingFormat,
    embed_dtype: EmbedDType,
    endianness: Endianness,
) -> list[float] | str | bytes
Source code in vllm/utils/serial_utils.py
  
 tensor2binary(
    tensor: Tensor,
    embed_dtype: EmbedDType,
    endianness: Endianness,
) -> bytes