Index
A
|
C
|
D
|
E
|
F
|
G
|
H
|
I
|
L
|
M
|
N
|
O
|
P
|
R
|
S
|
T
|
U
|
V
|
W
A
activation_bits (llm_analysis.analysis.DtypeConfig attribute)
ActivationRecomputation (class in llm_analysis.analysis)
C
config_batch_size_and_gradient_accumulation_steps() (llm_analysis.analysis.LLMAnalysis method)
D
default() (llm_analysis.config.EnhancedJSONEncoder method)
dp_size (llm_analysis.analysis.ParallelismConfig attribute)
DSZeRO (class in llm_analysis.analysis)
DtypeConfig (class in llm_analysis.analysis)
(class in llm_analysis.config)
dump_configs() (in module llm_analysis.config)
dump_hf_model_configs_by_type_and_task() (in module llm_analysis.config)
dump_model_config_by_name() (in module llm_analysis.config)
E
embedding_bits (llm_analysis.analysis.DtypeConfig attribute)
EnhancedJSONEncoder (class in llm_analysis.config)
Enum (class in llm_analysis.analysis)
ep_size (llm_analysis.analysis.ParallelismConfig attribute)
expansion_ratio (llm_analysis.analysis.ModelConfig attribute)
F
ffn_embed_dim (llm_analysis.analysis.ModelConfig attribute)
FULL (llm_analysis.analysis.ActivationRecomputation attribute)
G
get_configs_desc() (llm_analysis.analysis.LLMAnalysis method)
get_dtype_config_by_name() (in module llm_analysis.analysis)
(in module llm_analysis.config)
get_gpu_config_by_name() (in module llm_analysis.analysis)
(in module llm_analysis.config)
get_gpu_hbm_bandwidth() (llm_analysis.analysis.LLMAnalysis method)
get_hf_models_by_type_and_task() (in module llm_analysis.config)
get_inter_node_bandwidth() (llm_analysis.analysis.LLMAnalysis method)
get_intra_node_bandwidth() (llm_analysis.analysis.LLMAnalysis method)
get_latency_fwd() (llm_analysis.analysis.LLMAnalysis method)
get_latency_fwd_input_embedding() (llm_analysis.analysis.LLMAnalysis method)
get_latency_fwd_output_embedding_loss() (llm_analysis.analysis.LLMAnalysis method)
get_latency_fwd_per_layer() (llm_analysis.analysis.LLMAnalysis method)
get_latency_fwd_per_layer_attn() (llm_analysis.analysis.LLMAnalysis method)
get_latency_fwd_per_layer_layernorm() (llm_analysis.analysis.LLMAnalysis method)
get_latency_fwd_per_layer_mlp() (llm_analysis.analysis.LLMAnalysis method)
get_latency_fwd_per_layer_tp_comm() (llm_analysis.analysis.LLMAnalysis method)
get_memory_activation_embedding_output() (llm_analysis.analysis.LLMAnalysis method)
get_memory_activation_per_layer() (llm_analysis.analysis.LLMAnalysis method)
get_memory_activation_per_layer_attn() (llm_analysis.analysis.LLMAnalysis method)
get_memory_activation_per_layer_mlp() (llm_analysis.analysis.LLMAnalysis method)
get_memory_activation_per_layernorm() (llm_analysis.analysis.LLMAnalysis method)
get_memory_embedding() (llm_analysis.analysis.LLMAnalysis method)
get_memory_kv_cache_per_layer() (llm_analysis.analysis.LLMAnalysis method)
get_memory_optimizer_state_and_gradient_per_layer() (llm_analysis.analysis.LLMAnalysis method)
get_memory_weight_per_layer() (llm_analysis.analysis.LLMAnalysis method)
get_model_config_by_name() (in module llm_analysis.analysis)
(in module llm_analysis.config)
get_model_config_from_hf() (in module llm_analysis.config)
get_num_active_params_per_layer() (llm_analysis.analysis.LLMAnalysis method)
get_num_active_params_total() (llm_analysis.analysis.LLMAnalysis method)
get_num_flops_bwd_total() (llm_analysis.analysis.LLMAnalysis method)
get_num_flops_fwd_per_layer() (llm_analysis.analysis.LLMAnalysis method)
get_num_flops_fwd_per_layer_attn() (llm_analysis.analysis.LLMAnalysis method)
get_num_flops_fwd_per_layer_mlp() (llm_analysis.analysis.LLMAnalysis method)
get_num_flops_fwd_total() (llm_analysis.analysis.LLMAnalysis method)
get_num_flops_total_selective_recompute_attn() (llm_analysis.analysis.LLMAnalysis method)
get_num_params_embedding() (llm_analysis.analysis.LLMAnalysis method)
get_num_params_per_layer() (llm_analysis.analysis.LLMAnalysis method)
get_num_params_per_layer_attn() (llm_analysis.analysis.LLMAnalysis method)
get_num_params_per_layer_layernorm() (llm_analysis.analysis.LLMAnalysis method)
get_num_params_per_layer_mlp() (llm_analysis.analysis.LLMAnalysis method)
get_num_params_per_layer_router() (llm_analysis.analysis.LLMAnalysis method)
get_num_params_total() (llm_analysis.analysis.LLMAnalysis method)
get_pivot() (llm_analysis.analysis.LLMAnalysis method)
get_readable_summary_dict() (llm_analysis.analysis.LLMAnalysis method)
get_TFLOPS_per_gpu() (llm_analysis.analysis.LLMAnalysis method)
GPUConfig (class in llm_analysis.analysis)
(class in llm_analysis.config)
H
hbm_bandwidth_in_GB_per_sec (llm_analysis.analysis.GPUConfig attribute)
hidden_dim (llm_analysis.analysis.ModelConfig attribute)
I
infer() (in module llm_analysis.analysis)
inference() (llm_analysis.analysis.LLMAnalysis method)
inter_node_bandwidth_in_GB_per_sec (llm_analysis.analysis.GPUConfig attribute)
intra_node_bandwidth_in_GB_per_sec (llm_analysis.analysis.GPUConfig attribute)
intra_node_min_message_latency (llm_analysis.analysis.GPUConfig attribute)
L
list_dtype_configs() (in module llm_analysis.config)
list_gpu_configs() (in module llm_analysis.config)
list_model_configs() (in module llm_analysis.config)
llm_analysis.analysis
module
llm_analysis.config
module
llm_analysis.constant
module
LLMAnalysis (class in llm_analysis.analysis)
M
max_seq_len (llm_analysis.analysis.ModelConfig attribute)
mem_per_GPU_in_GB (llm_analysis.analysis.GPUConfig attribute)
model_type (llm_analysis.analysis.ModelConfig attribute)
ModelConfig (class in llm_analysis.analysis)
(class in llm_analysis.config)
module
llm_analysis.analysis
llm_analysis.config
llm_analysis.constant
moe_num_experts (llm_analysis.analysis.ModelConfig attribute)
moe_top_k (llm_analysis.analysis.ModelConfig attribute)
N
n_head (llm_analysis.analysis.ModelConfig attribute)
name (llm_analysis.analysis.DtypeConfig attribute)
(llm_analysis.analysis.Enum attribute)
(llm_analysis.analysis.GPUConfig attribute)
(llm_analysis.analysis.ModelConfig attribute)
NONE (llm_analysis.analysis.ActivationRecomputation attribute)
(llm_analysis.analysis.DSZeRO attribute)
num_key_value_groups (llm_analysis.analysis.ModelConfig attribute)
num_key_value_heads (llm_analysis.analysis.ModelConfig attribute)
num_layers (llm_analysis.analysis.ModelConfig attribute)
O
output_summary_dict() (llm_analysis.analysis.LLMAnalysis method)
P
ParallelismConfig (class in llm_analysis.analysis)
(class in llm_analysis.config)
peak_fp16_TFLOPS (llm_analysis.analysis.GPUConfig attribute)
peak_i4_TFLOPS (llm_analysis.analysis.GPUConfig attribute)
peak_i8_TFLOPS (llm_analysis.analysis.GPUConfig attribute)
pformat() (in module llm_analysis.analysis)
populate_model_and_gpu_configs() (in module llm_analysis.config)
pp_size (llm_analysis.analysis.ParallelismConfig attribute)
print_config() (llm_analysis.analysis.LLMAnalysis method)
R
read_configs() (in module llm_analysis.config)
S
SELECTIVE (llm_analysis.analysis.ActivationRecomputation attribute)
sp_size (llm_analysis.analysis.ParallelismConfig attribute)
STAGE_1 (llm_analysis.analysis.DSZeRO attribute)
STAGE_2 (llm_analysis.analysis.DSZeRO attribute)
STAGE_3 (llm_analysis.analysis.DSZeRO attribute)
T
total_ordering() (in module llm_analysis.analysis)
tp_size (llm_analysis.analysis.ParallelismConfig attribute)
train() (in module llm_analysis.analysis)
training() (llm_analysis.analysis.LLMAnalysis method)
U
update_dtype_config() (llm_analysis.analysis.LLMAnalysis method)
update_float_efficiency() (llm_analysis.analysis.LLMAnalysis method)
update_gpu_config() (llm_analysis.analysis.LLMAnalysis method)
update_inter_node_memory_efficiency() (llm_analysis.analysis.LLMAnalysis method)
update_intra_node_memory_efficiency() (llm_analysis.analysis.LLMAnalysis method)
update_model_config() (llm_analysis.analysis.LLMAnalysis method)
update_parallelism_config() (llm_analysis.analysis.LLMAnalysis method)
V
value (llm_analysis.analysis.Enum attribute)
vocab_size (llm_analysis.analysis.ModelConfig attribute)
W
weight_bits (llm_analysis.analysis.DtypeConfig attribute)
within_range() (in module llm_analysis.analysis)
llm-analysis
Navigation
Analysis
Configurations
Constants
Related Topics
Documentation overview
Quick search