Source code for llm_analysis.config

# Copyright 2023 Cheng Li
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import dataclasses
import json
import logging
import os
from dataclasses import dataclass
from pathlib import Path

import fire

from llm_analysis.constant import (
    DTYPE_CONFIG_DIR_NAME,
    GPU_CONFIG_DIR_NAME,
    MODEL_CONFIG_DIR_NAME,
)
from llm_analysis.logger import logger

try:
    from transformers import AutoConfig
except ImportError:
    logger.warning(
        f"cannot import AutoConfig from transformers, `transformers` is not installed, HuggingFace will not be available to use for model config retrieval"
    )
    AutoConfig = None


[docs]class EnhancedJSONEncoder(json.JSONEncoder):

[docs]    def default(self, o):
        if dataclasses.is_dataclass(o):
            return dataclasses.asdict(o)
        return super().default(o)


[docs]@dataclass
class ModelConfig:
    name: str  # model config name
    num_layers: int  # number of transformer layers (blocks)
    n_head: int  # number of attention heads
    hidden_dim: int  # hidden dimension
    vocab_size: int  # vocabulary size
    max_seq_len: int = None  # max sequence length
    num_key_value_heads: int = None  # the number of key value heads implementing Grouped Query Attention (GQA), If it is not specified, will default to n_head. If `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if `num_key_value_heads=1 the model will use Multi Query Attention (MQA) otherwise GQA is used. See https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/configuration_llama.py for details
    num_key_value_groups: int = None  # number of key value groups for GQA
    ffn_embed_dim: int = (
        None  # hidden dimension of FFN, default to 4 * hidden_dim
    )
    expansion_ratio: int = None
    model_type: str = (
        None  # model type as tagged on Hugging Face (e.g., gpt2, opt, llama.)
    )
    moe_num_experts: int = 1  # number of experts for mixture of experts model
    moe_top_k: int = 1  # top k experts for mixture of experts model

    def __post_init__(self):
        if self.ffn_embed_dim is None and self.expansion_ratio is None:
            self.ffn_embed_dim = self.hidden_dim * 4
            self.expansion_ratio = 4
        elif self.ffn_embed_dim is None:
            self.ffn_embed_dim = self.hidden_dim * self.expansion_ratio
        elif self.expansion_ratio is None:
            assert self.ffn_embed_dim % self.hidden_dim == 0, f"ffn_embed_dim ({self.ffn_embed_dim}) must be divisible by hidden_dim ({self.hidden_dim})"
            self.expansion_ratio = self.ffn_embed_dim / self.hidden_dim

        if self.num_key_value_heads is None:
            self.num_key_value_heads = self.n_head
        assert self.n_head % self.num_key_value_heads == 0, f"n_head ({self.n_head}) must be divisible by num_key_value_heads ({self.num_key_value_heads})"
        self.num_key_value_groups = self.n_head / self.num_key_value_heads

    def __str__(self):
        return dataclasses.asdict(self).__str__()


[docs]@dataclass
class GPUConfig:
    name: str  # GPU config name
    mem_per_GPU_in_GB: float  # memory per GPU in GB
    hbm_bandwidth_in_GB_per_sec: float  # GPU HBM bandwidth in GB/s
    intra_node_bandwidth_in_GB_per_sec: float  # intra node GPU bandwidth in GB/s
    intra_node_min_message_latency: float  # minimum intra node message latency in seconds
    peak_fp16_TFLOPS: float  # peak Tensor TFLOPS for FP16
    peak_i8_TFLOPS: float = None  # peak Tensor TFLOPS for INT8
    peak_i4_TFLOPS: float = None  # peak Tensor TFLOPS for INT4
    inter_node_bandwidth_in_GB_per_sec: float = 200  # inter node bandwidth in GB/s, assuming Mellanox 200Gbps HDR Infiniband

    def __post_init__(self):
        if self.peak_i8_TFLOPS is None:
            self.peak_i8_TFLOPS = 2 * self.peak_fp16_TFLOPS
        if self.peak_i4_TFLOPS is None:
            self.peak_i4_TFLOPS = 4 * self.peak_fp16_TFLOPS


[docs]@dataclass
class DtypeConfig:
    name: str = "w16a16e16"  # dtype config name
    weight_bits: int = 16  # number of bits for weight
    activation_bits: int = 16  # number of bits for activation
    embedding_bits: int = 16  # number of bits for the embedding


[docs]@dataclass
class ParallelismConfig:
    tp_size: int = 1  # tensor parallelism size, Megatron-LM tensor parallelism implementation
    pp_size: int = 1  # pipeline parallelism size, Megatron-LM pipeline parallelism implementation
    dp_size: int = (
        1  # data parallelism size, DeepSpeed Zero parallelism implementation
    )
    ep_size: int = 1  # expert parallelism size
    sp_size: int = None  # sequence parallelism size, Megatron-LM sequence parallelism implementation

    def __post_init__(self):
        if self.sp_size is None:
            self.sp_size = self.tp_size


# model name and configurations mapping populated from MODEL_CONFIG_DIR_NAME
model_configs = {}

# gpu name and configurations mapping populated from MODEL_CONFIG_DIR_NAME
# https://gist.github.com/joshlk/bbb1aca6e70b11d251886baee6423dcb
gpu_configs = {}

# dtype name and configurations mapping populated from MODEL_CONFIG_DIR_NAME
dtype_configs = {}


def canonical_model_name(name: str) -> str:
    return name.replace("/", "_")


[docs]def dump_configs(configs: dict, config_dir_name: str) -> None:
    """Dump configs to json files under config_dir_name.

    Args:
        configs (dict): a dict of configs
        config_dir_name (str): the name of the output directory
    """
    for k, v in configs.items():
        with open(
                Path(__file__).parent / Path(config_dir_name, f"{k}.json"),
                "w") as f:
            json.dump(v, f, cls=EnhancedJSONEncoder, indent=4)
    logger.info(f"dumped {len(configs)} configs to {config_dir_name}")


[docs]def get_model_config_from_hf(name: str, ) -> ModelConfig:
    """Get model config from HuggingFace transformers library `AutoConfig`; if the model
    does not exist, try updating the transformers library.

    Args:
        name (str): the model id of a pretrained model configuration hosted inside a model repo on huggingface.co

    Returns:
        ModelConfig: a dataclass for llm-analysis model config
    """
    if AutoConfig is None:
        logger.warning(
            f"cannot import AutoConfig from transformers, `transformers` is not installed, HuggingFace will not be available to use for model config retrieval"
        )
        return None
    hf_config = AutoConfig.from_pretrained(name, trust_remote_code=True)
    if hasattr(hf_config, "num_hidden_layers"):
        num_layers = hf_config.num_hidden_layers
    elif hasattr(hf_config, "n_layers"):
        num_layers = hf_config.n_layers
    else:
        raise Exception(
            "hf config does not have num_hidden_layers or n_layers, check the config.json file"
        )
    if hasattr(hf_config, "num_attention_heads"):
        n_head = hf_config.num_attention_heads
    elif hasattr(hf_config, "n_heads"):
        n_head = hf_config.n_heads
    else:
        raise Exception(
            "hf config does not have num_attention_heads or n_heads, check the config.json file"
        )

    if hasattr(hf_config, "hidden_size"):
        hidden_dim = hf_config.hidden_size
    elif hasattr(hf_config, "d_model"):
        hidden_dim = hf_config.d_model
    else:
        raise Exception(
            "hf config does not have hidden_size or d_model, check the config.json file"
        )

    config = ModelConfig(
        name=canonical_model_name(name),
        max_seq_len=hf_config.max_position_embeddings if hasattr(
            hf_config, "max_position_embeddings") else None,
        num_layers=num_layers,
        n_head=n_head,
        hidden_dim=hidden_dim,
        vocab_size=hf_config.vocab_size,
        model_type=hf_config.model_type
        if hasattr(hf_config, "model_type") else None,
        num_key_value_heads=hf_config.num_key_value_heads if hasattr(
            hf_config, "num_key_value_heads") else None,
    )
    return config


[docs]def read_configs(config_dir_name: str, type="model") -> dict:
    """Read configs from a directory."""
    configs = {}
    for filename in os.listdir(config_dir_name):
        filepath = os.path.join(config_dir_name, filename)
        with open(filepath, "r") as f:
            config_json = json.load(f)
            if type == "model":
                config = ModelConfig(**config_json)
            elif type == "gpu":
                config = GPUConfig(**config_json)
            elif type == "dtype":
                config = DtypeConfig(**config_json)
            else:
                assert False, f"unknown config type when reading: {type}"
            if config.name not in configs:
                configs[config.name] = config
    logger.info(f"Loaded {len(configs)} configs from {config_dir_name}")
    return configs


[docs]def get_hf_models_by_type_and_task(
    model_type: str = "opt",
    task: str = None,
    min_downloads: int = 10000,
    top_k: int = 6,
    full_info: bool = False,
) -> list:
    """Get a HuggingFace model name list by model type and task, filtered by popularity
    (minimal number of downloads)

    Args:
        model_type (str, optional): model type, e.g., gpt, llama, opt, bloom. Defaults to "opt".
        task (str, optional): model task, e.g., text-generation, fill-mask. Defaults to "text-generation".
        min_downloads (int, optional): minimal number of downloads to filter the models. Defaults to 10000.
        top_k (int, optional): _description_. Defaults to 6.
        full_info (bool, optional): whether to return full model information, if False, just return the list of model names. Defaults to False.

    Returns:
        list: a list of HuggingFace model information
    """
    try:
        from huggingface_hub import HfApi
    except ImportError:
        logger.error(
            f"cannot import HfApi from huggingface_hub, lease install huggingface_hub first"
        )
    api = HfApi()
    models = api.list_models(filter=model_type)
    logger.info(f"found {len(models)} models of type {model_type}")
    # sort by number of downloads
    ordered = sorted(
        models,
        reverse=True,
        key=lambda t: t.downloads if hasattr(t, "downloads") else 0,
    )
    ret = []
    for m in ordered:
        if hasattr(m, "downloads") and m.downloads > min_downloads:
            if task:
                if hasattr(m, "pipeline_tag") and m.pipeline_tag == task:
                    ret.append(m)
            else:
                ret.append(m)
    top_k = max(1, min(top_k, len(ret)))
    logger.info(f"take top {top_k} of the list of found models")
    if full_info:
        return ret[:top_k]
    return [r.modelId for r in ret][:top_k]


[docs]def populate_model_and_gpu_configs() -> None:
    """Populate model, gpu, and data type configs from the pre-defined json files."""
    global model_configs, gpu_configs, dtype_configs
    model_configs = read_configs(Path(__file__).parent /
                                 Path(MODEL_CONFIG_DIR_NAME),
                                 type="model")
    gpu_configs = read_configs(Path(__file__).parent /
                               Path(GPU_CONFIG_DIR_NAME),
                               type="gpu")

    dtype_configs = read_configs(Path(__file__).parent /
                                 Path(DTYPE_CONFIG_DIR_NAME),
                                 type="dtype")
    logger.info(
        f"Populated {len(model_configs)} model configs, {len(gpu_configs)} gpu configs, {len(dtype_configs)} dtype configs"
    )


[docs]def list_model_configs() -> None:
    """List all predefined model configs."""
    logger.info(model_configs.keys())


[docs]def list_gpu_configs() -> None:
    """List all predefined gpu configs."""
    logger.info(gpu_configs.keys())


[docs]def list_dtype_configs() -> None:
    """List all predefined data type configs."""
    logger.info(dtype_configs.keys())


[docs]def get_model_config_by_name(name: str) -> ModelConfig:
    """Get model config from the populated mapping by name, if not found, try to get it
    from HuggingFace."""
    if name in model_configs:
        return model_configs[name]
    model_config = get_model_config_from_hf(name)
    if model_config is None:
        raise (
            f"unknown model config name: {name}, and none found on HuggingFace Hub"
        )
    return model_config


[docs]def get_gpu_config_by_name(name: str) -> GPUConfig:
    """Get gpu config from the populated mapping by name."""
    if name not in gpu_configs:
        raise ValueError(f"unknown gpu config name: {name}")
    return gpu_configs[name]


[docs]def get_dtype_config_by_name(name: str) -> DtypeConfig:
    """Get data type config from the populated mapping by name."""
    if name not in dtype_configs:
        raise ValueError(f"unknown quant config name: {name}")
    return dtype_configs[name]


[docs]def dump_model_config_by_name(name: str,
                              config_dir_name: str = MODEL_CONFIG_DIR_NAME
                              ) -> None:
    """Dump a model config from either the populated `model_configs` or Hugging Face by
    name to `config_dir_name`

    Args:
        name (str): model name, e,g., gpt2, facebook/opt-1.3b, decapoda-research/llama-7b-hf, etc.
        config_dir_name (str, optional): _description_. Defaults to MODEL_CONFIG_DIR_NAME.
    """
    model_config = get_model_config_by_name(name)
    dump_configs({model_config.name: model_config}, config_dir_name)
    logger.info(f"dumped model config {model_config} to {config_dir_name}")


[docs]def dump_hf_model_configs_by_type_and_task(
    model_type: str = "opt",
    task: str = None,
    min_downloads: int = 10000,
    top_k: int = 6,
    config_dir_name: str = MODEL_CONFIG_DIR_NAME,
) -> None:
    """Dump model configs from HuggingFace by type and task to `config_dir_name`

    Args:
        model_type (str, optional): model type, e.g., gpt, llama, opt, bloom. Defaults to "opt".
        task (str, optional): model task, e.g., text-generation, fill-mask. Defaults to "text-generation".
        min_downloads (int, optional): minimal number of downloads to filter the models. Defaults to 10000.
        top_k (int, optional): _description_. Defaults to 6.
        config_dir_name (str, optional): _description_. Defaults to MODEL_CONFIG_DIR_NAME.
    """
    model_list = get_hf_models_by_type_and_task(
        model_type=model_type,
        task=task,
        min_downloads=min_downloads,
        top_k=top_k,
        full_info=False,
    )
    for m in model_list:
        dump_model_config_by_name(m, config_dir_name)
    logger.info(
        f"In total, dumped {len(model_list)} model configs of model_type={model_type}, task={task}, to {config_dir_name}"
    )


populate_model_and_gpu_configs()

if __name__ == "__main__":
    logger.setLevel(logging.getLevelName("INFO"))
    fire.Fire(
        {
            "list_model_configs":
            list_model_configs,
            "list_gpu_configs":
            list_gpu_configs,
            "list_dtype_configs":
            list_dtype_configs,
            "get_model_config_by_name":
            get_model_config_by_name,
            "get_gpu_config_by_name":
            get_gpu_config_by_name,
            "get_dtype_config_by_name":
            get_dtype_config_by_name,
            "get_hf_models_by_type_and_task":
            get_hf_models_by_type_and_task,
            "dump_model_config_by_name":
            dump_model_config_by_name,
            "dump_hf_model_configs_by_type_and_task":
            dump_hf_model_configs_by_type_and_task,
        },
        serialize=lambda x: json.dumps(x, cls=EnhancedJSONEncoder, indent=4)
        if dataclasses.is_dataclass(x) else x,
    )
Source code for llm_analysis.config

llm-analysis

Navigation

Related Topics