Skip to content

vllm.transformers_utils.configs.chatglm

ChatGLMConfig

Bases: PretrainedConfig

Source code in vllm/transformers_utils/configs/chatglm.py
class ChatGLMConfig(PretrainedConfig):
    model_type = "chatglm"
    attribute_map = {
        "num_hidden_layers": "num_layers",
        "n_head_kv": "multi_query_group_num",
    }

    def __init__(
        self,
        num_layers=28,
        padded_vocab_size=65024,
        hidden_size=4096,
        ffn_hidden_size=13696,
        kv_channels=128,
        num_attention_heads=32,
        seq_length=2048,
        hidden_dropout=0.0,
        attention_dropout=0.0,
        layernorm_epsilon=1e-5,
        rmsnorm=True,
        apply_residual_connection_post_layernorm=False,
        post_layer_norm=True,
        add_bias_linear=False,
        add_qkv_bias=False,
        interleaved_qkv=False,
        bias_dropout_fusion=True,
        multi_query_attention=False,
        multi_query_group_num=1,
        apply_query_key_layer_scaling=True,
        attention_softmax_in_fp32=True,
        fp32_residual_connection=False,
        quantization_bit=0,
        pre_seq_len=None,
        prefix_projection=False,
        **kwargs,
    ):
        self.num_layers = num_layers
        self.vocab_size = padded_vocab_size
        self.padded_vocab_size = padded_vocab_size
        self.hidden_size = hidden_size
        self.ffn_hidden_size = ffn_hidden_size
        self.kv_channels = kv_channels
        self.num_attention_heads = num_attention_heads
        self.seq_length = seq_length
        # It is to be compatible with long lora.
        self.max_position_embeddings = seq_length
        self.hidden_dropout = hidden_dropout
        self.attention_dropout = attention_dropout
        self.layernorm_epsilon = layernorm_epsilon
        self.rmsnorm = rmsnorm
        self.apply_residual_connection_post_layernorm = (
            apply_residual_connection_post_layernorm
        )
        self.post_layer_norm = post_layer_norm
        self.add_bias_linear = add_bias_linear
        self.add_qkv_bias = add_qkv_bias
        self.bias_dropout_fusion = bias_dropout_fusion
        self.multi_query_attention = multi_query_attention
        self.multi_query_group_num = multi_query_group_num
        self.apply_query_key_layer_scaling = apply_query_key_layer_scaling
        self.attention_softmax_in_fp32 = attention_softmax_in_fp32
        self.fp32_residual_connection = fp32_residual_connection
        self.quantization_bit = quantization_bit
        self.pre_seq_len = pre_seq_len
        self.prefix_projection = prefix_projection
        self.interleaved_qkv = interleaved_qkv
        super().__init__(**kwargs)

add_bias_linear instance-attribute

add_bias_linear = add_bias_linear

add_qkv_bias instance-attribute

add_qkv_bias = add_qkv_bias

apply_query_key_layer_scaling instance-attribute

apply_query_key_layer_scaling = (
    apply_query_key_layer_scaling
)

apply_residual_connection_post_layernorm instance-attribute

apply_residual_connection_post_layernorm = (
    apply_residual_connection_post_layernorm
)

attention_dropout instance-attribute

attention_dropout = attention_dropout

attention_softmax_in_fp32 instance-attribute

attention_softmax_in_fp32 = attention_softmax_in_fp32

attribute_map class-attribute instance-attribute

attribute_map = {
    "num_hidden_layers": "num_layers",
    "n_head_kv": "multi_query_group_num",
}

bias_dropout_fusion instance-attribute

bias_dropout_fusion = bias_dropout_fusion

ffn_hidden_size instance-attribute

ffn_hidden_size = ffn_hidden_size

fp32_residual_connection instance-attribute

fp32_residual_connection = fp32_residual_connection

hidden_dropout instance-attribute

hidden_dropout = hidden_dropout

hidden_size instance-attribute

hidden_size = hidden_size

interleaved_qkv instance-attribute

interleaved_qkv = interleaved_qkv

kv_channels instance-attribute

kv_channels = kv_channels

layernorm_epsilon instance-attribute

layernorm_epsilon = layernorm_epsilon

max_position_embeddings instance-attribute

max_position_embeddings = seq_length

model_type class-attribute instance-attribute

model_type = 'chatglm'

multi_query_attention instance-attribute

multi_query_attention = multi_query_attention

multi_query_group_num instance-attribute

multi_query_group_num = multi_query_group_num

num_attention_heads instance-attribute

num_attention_heads = num_attention_heads

num_layers instance-attribute

num_layers = num_layers

padded_vocab_size instance-attribute

padded_vocab_size = padded_vocab_size

post_layer_norm instance-attribute

post_layer_norm = post_layer_norm

pre_seq_len instance-attribute

pre_seq_len = pre_seq_len

prefix_projection instance-attribute

prefix_projection = prefix_projection

quantization_bit instance-attribute

quantization_bit = quantization_bit

rmsnorm instance-attribute

rmsnorm = rmsnorm

seq_length instance-attribute

seq_length = seq_length

vocab_size instance-attribute

vocab_size = padded_vocab_size

__init__

__init__(
    num_layers=28,
    padded_vocab_size=65024,
    hidden_size=4096,
    ffn_hidden_size=13696,
    kv_channels=128,
    num_attention_heads=32,
    seq_length=2048,
    hidden_dropout=0.0,
    attention_dropout=0.0,
    layernorm_epsilon=1e-05,
    rmsnorm=True,
    apply_residual_connection_post_layernorm=False,
    post_layer_norm=True,
    add_bias_linear=False,
    add_qkv_bias=False,
    interleaved_qkv=False,
    bias_dropout_fusion=True,
    multi_query_attention=False,
    multi_query_group_num=1,
    apply_query_key_layer_scaling=True,
    attention_softmax_in_fp32=True,
    fp32_residual_connection=False,
    quantization_bit=0,
    pre_seq_len=None,
    prefix_projection=False,
    **kwargs,
)
Source code in vllm/transformers_utils/configs/chatglm.py
def __init__(
    self,
    num_layers=28,
    padded_vocab_size=65024,
    hidden_size=4096,
    ffn_hidden_size=13696,
    kv_channels=128,
    num_attention_heads=32,
    seq_length=2048,
    hidden_dropout=0.0,
    attention_dropout=0.0,
    layernorm_epsilon=1e-5,
    rmsnorm=True,
    apply_residual_connection_post_layernorm=False,
    post_layer_norm=True,
    add_bias_linear=False,
    add_qkv_bias=False,
    interleaved_qkv=False,
    bias_dropout_fusion=True,
    multi_query_attention=False,
    multi_query_group_num=1,
    apply_query_key_layer_scaling=True,
    attention_softmax_in_fp32=True,
    fp32_residual_connection=False,
    quantization_bit=0,
    pre_seq_len=None,
    prefix_projection=False,
    **kwargs,
):
    self.num_layers = num_layers
    self.vocab_size = padded_vocab_size
    self.padded_vocab_size = padded_vocab_size
    self.hidden_size = hidden_size
    self.ffn_hidden_size = ffn_hidden_size
    self.kv_channels = kv_channels
    self.num_attention_heads = num_attention_heads
    self.seq_length = seq_length
    # It is to be compatible with long lora.
    self.max_position_embeddings = seq_length
    self.hidden_dropout = hidden_dropout
    self.attention_dropout = attention_dropout
    self.layernorm_epsilon = layernorm_epsilon
    self.rmsnorm = rmsnorm
    self.apply_residual_connection_post_layernorm = (
        apply_residual_connection_post_layernorm
    )
    self.post_layer_norm = post_layer_norm
    self.add_bias_linear = add_bias_linear
    self.add_qkv_bias = add_qkv_bias
    self.bias_dropout_fusion = bias_dropout_fusion
    self.multi_query_attention = multi_query_attention
    self.multi_query_group_num = multi_query_group_num
    self.apply_query_key_layer_scaling = apply_query_key_layer_scaling
    self.attention_softmax_in_fp32 = attention_softmax_in_fp32
    self.fp32_residual_connection = fp32_residual_connection
    self.quantization_bit = quantization_bit
    self.pre_seq_len = pre_seq_len
    self.prefix_projection = prefix_projection
    self.interleaved_qkv = interleaved_qkv
    super().__init__(**kwargs)