vllm.model_executor.models.bert ¶
BertAttention ¶
Bases: Module
Source code in vllm/model_executor/models/bert.py
output instance-attribute
¶
output = BertSelfOutput(
hidden_size=hidden_size,
layer_norm_eps=layer_norm_eps,
quant_config=quant_config,
prefix=f"{prefix}.output",
)
self instance-attribute
¶
self = BertSelfAttention(
hidden_size=hidden_size,
num_attention_heads=num_attention_heads,
cache_config=cache_config,
quant_config=quant_config,
prefix=f"{prefix}.output",
)
__init__ ¶
__init__(
hidden_size: int,
num_attention_heads: int,
layer_norm_eps: float,
cache_config: Optional[CacheConfig] = None,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
)
Source code in vllm/model_executor/models/bert.py
BertEmbedding ¶
Bases: Module
Source code in vllm/model_executor/models/bert.py
position_embeddings instance-attribute
¶
position_embeddings = VocabParallelEmbedding(
max_position_embeddings, hidden_size
)
token_type_embeddings instance-attribute
¶
token_type_embeddings = VocabParallelEmbedding(
type_vocab_size, hidden_size
)
word_embeddings instance-attribute
¶
word_embeddings = VocabParallelEmbedding(
vocab_size, hidden_size
)
__init__ ¶
Source code in vllm/model_executor/models/bert.py
forward ¶
forward(
input_ids: Tensor,
position_ids: Tensor,
inputs_embeds: Optional[Tensor] = None,
) -> Tensor
Source code in vllm/model_executor/models/bert.py
BertEmbeddingModel ¶
Bases: Module
, SupportsQuant
A model that uses Bert to provide embedding functionalities.
This class encapsulates the BertModel and provides an interface for embedding operations and customized pooling functions.
Attributes:
Name | Type | Description |
---|---|---|
model | An instance of BertModel used for forward operations. | |
_pooler | An instance of Pooler used for pooling operations. |
Source code in vllm/model_executor/models/bert.py
model instance-attribute
¶
model = _build_model(
vllm_config=vllm_config,
prefix=maybe_prefix(prefix, "model"),
)
__init__ ¶
__init__(*, vllm_config: VllmConfig, prefix: str = '')
Source code in vllm/model_executor/models/bert.py
_build_model ¶
_build_model(
vllm_config: VllmConfig, prefix: str = ""
) -> BertModel
_build_pooler ¶
_build_pooler(pooler_config: PoolerConfig) -> Pooler
forward ¶
forward(
input_ids: Tensor,
positions: Tensor,
intermediate_tensors: Optional[
IntermediateTensors
] = None,
inputs_embeds: Optional[Tensor] = None,
) -> Tensor
Source code in vllm/model_executor/models/bert.py
get_input_embeddings ¶
load_weights ¶
Source code in vllm/model_executor/models/bert.py
BertEncoder ¶
Bases: Module
Source code in vllm/model_executor/models/bert.py
layer instance-attribute
¶
layer = ModuleList(
[
(
BertLayer(
config=config,
cache_config=cache_config,
quant_config=quant_config,
prefix=f"{prefix}.layer.{layer_idx}",
)
)
for layer_idx in (range(num_hidden_layers))
]
)
__init__ ¶
__init__(vllm_config: VllmConfig, prefix: str = '')
Source code in vllm/model_executor/models/bert.py
BertForSequenceClassification ¶
Bases: Module
, SupportsCrossEncoding
, SupportsQuant
A model that uses Bert to provide embedding functionalities.
This class encapsulates the BertModel and provides an interface for embedding operations and customized pooling functions.
Attributes:
Name | Type | Description |
---|---|---|
model | An instance of BertModel used for forward operations. | |
_pooler | An instance of Pooler used for pooling operations. |
Source code in vllm/model_executor/models/bert.py
576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 |
|
bert instance-attribute
¶
bert = BertPoolingModel(
vllm_config=vllm_config,
prefix=maybe_prefix(prefix, "bert"),
embedding_class=BertEmbedding,
)
pooler instance-attribute
¶
pooler = DispatchPooler(
{
"encode": for_encode(pooler_config),
"classify": ClassifierPooler(
pooling=pooler,
classifier=classifier,
act_fn=act_fn_for_seq_cls(model_config),
),
"score": ClassifierPooler(
pooling=pooler,
classifier=classifier,
act_fn=act_fn_for_cross_encoder(model_config),
),
}
)
__init__ ¶
__init__(*, vllm_config: VllmConfig, prefix: str = '')
Source code in vllm/model_executor/models/bert.py
forward ¶
forward(
input_ids: Optional[Tensor],
positions: Tensor,
intermediate_tensors: Optional[
IntermediateTensors
] = None,
inputs_embeds: Optional[Tensor] = None,
token_type_ids: Optional[Tensor] = None,
) -> Tensor
Source code in vllm/model_executor/models/bert.py
get_input_embeddings ¶
BertForTokenClassification ¶
Bases: Module
Source code in vllm/model_executor/models/bert.py
bert instance-attribute
¶
bert = BertModel(
vllm_config=vllm_config,
prefix=maybe_prefix(prefix, "bert"),
embedding_class=BertEmbedding,
)
__init__ ¶
__init__(*, vllm_config: VllmConfig, prefix: str = '')
Source code in vllm/model_executor/models/bert.py
forward ¶
forward(
input_ids: Optional[Tensor],
positions: Tensor,
intermediate_tensors: Optional[
IntermediateTensors
] = None,
inputs_embeds: Optional[Tensor] = None,
token_type_ids: Optional[Tensor] = None,
) -> Tensor
Source code in vllm/model_executor/models/bert.py
get_input_embeddings ¶
BertIntermediate ¶
Bases: Module
Source code in vllm/model_executor/models/bert.py
dense instance-attribute
¶
dense = ColumnParallelLinear(
input_size=hidden_size,
output_size=intermediate_size,
bias=True,
quant_config=quant_config,
prefix=f"{prefix}.dense",
)
__init__ ¶
__init__(
hidden_size: int,
intermediate_size: int,
hidden_act: str,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
)
Source code in vllm/model_executor/models/bert.py
forward ¶
BertLayer ¶
Bases: Module
Source code in vllm/model_executor/models/bert.py
attention instance-attribute
¶
attention = BertAttention(
hidden_size=hidden_size,
num_attention_heads=num_attention_heads,
layer_norm_eps=layer_norm_eps,
cache_config=cache_config,
quant_config=quant_config,
prefix=f"{prefix}.attention",
)
intermediate instance-attribute
¶
intermediate = BertIntermediate(
hidden_size=hidden_size,
intermediate_size=intermediate_size,
hidden_act=hidden_act,
quant_config=quant_config,
prefix=f"{prefix}.intermediate",
)
output instance-attribute
¶
output = BertOutput(
hidden_size=hidden_size,
intermediate_size=intermediate_size,
layer_norm_eps=layer_norm_eps,
quant_config=quant_config,
prefix=f"{prefix}.output",
)
__init__ ¶
__init__(
config: BertConfig,
cache_config: Optional[CacheConfig] = None,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
)
Source code in vllm/model_executor/models/bert.py
BertModel ¶
Bases: Module
, SupportsQuant
Source code in vllm/model_executor/models/bert.py
encoder instance-attribute
¶
encoder = BertEncoder(
vllm_config=vllm_config, prefix=f"{prefix}.encoder"
)
packed_modules_mapping class-attribute
instance-attribute
¶
__init__ ¶
__init__(
*,
vllm_config: VllmConfig,
prefix: str = "",
embedding_class: type[Module] = BertEmbedding,
) -> None
Source code in vllm/model_executor/models/bert.py
_load_weights ¶
Source code in vllm/model_executor/models/bert.py
forward ¶
forward(
input_ids: Tensor,
positions: Tensor,
intermediate_tensors: Optional[
IntermediateTensors
] = None,
inputs_embeds: Optional[Tensor] = None,
) -> Tensor
Source code in vllm/model_executor/models/bert.py
get_input_embeddings ¶
load_weights ¶
Source code in vllm/model_executor/models/bert.py
BertOutput ¶
Bases: Module
Source code in vllm/model_executor/models/bert.py
dense instance-attribute
¶
dense = RowParallelLinear(
input_size=intermediate_size,
output_size=hidden_size,
bias=True,
quant_config=quant_config,
prefix=f"{prefix}.dense",
)
__init__ ¶
__init__(
hidden_size: int,
intermediate_size: int,
layer_norm_eps: float,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
)
Source code in vllm/model_executor/models/bert.py
forward ¶
BertPooler ¶
Bases: Pooler
Source code in vllm/model_executor/models/bert.py
__init__ ¶
forward ¶
forward(
hidden_states: Union[Tensor, list[Tensor]],
pooling_metadata: PoolingMetadata,
) -> Union[Tensor, list[Tensor]]
Source code in vllm/model_executor/models/bert.py
get_pooling_updates ¶
get_pooling_updates(
task: PoolingTask,
) -> PoolingParamsUpdate
get_supported_tasks ¶
get_supported_tasks() -> Set[PoolingTask]
BertPoolingModel ¶
Bases: BertModel
Source code in vllm/model_executor/models/bert.py
__init__ ¶
__init__(
*,
vllm_config: VllmConfig,
prefix: str = "",
embedding_class: type[Module] = BertEmbedding,
) -> None
Source code in vllm/model_executor/models/bert.py
load_weights ¶
Source code in vllm/model_executor/models/bert.py
BertSelfAttention ¶
Bases: Module
Source code in vllm/model_executor/models/bert.py
attn instance-attribute
¶
attn = EncoderOnlyAttention(
num_heads=num_heads,
head_size=head_dim,
scale=scaling,
num_kv_heads=num_kv_heads,
cache_config=cache_config,
quant_config=quant_config,
prefix=f"{prefix}.attn",
)
qkv_proj instance-attribute
¶
qkv_proj = QKVParallelLinear(
hidden_size=hidden_size,
head_size=head_dim,
total_num_heads=total_num_heads,
total_num_kv_heads=total_num_kv_heads,
bias=True,
quant_config=quant_config,
prefix=f"{prefix}.qkv_proj",
)
__init__ ¶
__init__(
hidden_size: int,
num_attention_heads: int,
cache_config: Optional[CacheConfig] = None,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
)
Source code in vllm/model_executor/models/bert.py
forward ¶
BertSelfOutput ¶
Bases: Module
Source code in vllm/model_executor/models/bert.py
dense instance-attribute
¶
dense = RowParallelLinear(
input_size=hidden_size,
output_size=hidden_size,
bias=True,
quant_config=quant_config,
prefix=f"{prefix}.dense",
)
__init__ ¶
__init__(
hidden_size: int,
layer_norm_eps: float,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
)