Bases: LlamaForCausalLM
Source code in vllm/model_executor/models/llama_eagle.py
  instance-attribute  ¶
 logits_processor = LogitsProcessor(
    vocab_size, scale=logit_scale
)
 instance-attribute  ¶
 model = LlamaModel(
    vllm_config=vllm_config,
    prefix="model",
    start_layer_id=target_layer_num,
)
 
 __init__(*, vllm_config: VllmConfig, prefix: str = '')
Source code in vllm/model_executor/models/llama_eagle.py
  
 forward(
    input_ids: Tensor,
    positions: Tensor,
    hidden_states: Tensor,
    inputs_embeds: Tensor | None = None,
) -> tuple[Tensor, Tensor]
Source code in vllm/model_executor/models/llama_eagle.py
  
    
  Source code in vllm/model_executor/models/llama_eagle.py
  
  Bases: Module
Source code in vllm/model_executor/models/llama_eagle.py
  instance-attribute  ¶
 embed_tokens = VocabParallelEmbedding(
    vocab_size,
    hidden_size,
    prefix=maybe_prefix(prefix, "embed_tokens"),
)
 instance-attribute  ¶
 layers = ModuleList(
    [
        (
            LlamaDecoderLayer(
                vllm_config,
                i == 0,
                prefix=maybe_prefix(
                    prefix, f"layers.{i + start_layer_id}"
                ),
                config=config,
            )
        )
        for i in (range(num_hidden_layers))
    ]
)
 
 __init__(
    *,
    vllm_config: VllmConfig,
    prefix: str = "",
    start_layer_id: int = 0,
) -> None