mirror of https://github.com/InternLM/InternLM
refactor code for log
parent
3607548265
commit
9e6e7986b6
|
@ -364,6 +364,11 @@ def launch(
|
||||||
f"data parallel size: {gpc.data_parallel_size}, pipeline parallel size: {gpc.pipeline_parallel_size}, "
|
f"data parallel size: {gpc.data_parallel_size}, pipeline parallel size: {gpc.pipeline_parallel_size}, "
|
||||||
f"tensor parallel size: {gpc.tensor_parallel_size}",
|
f"tensor parallel size: {gpc.tensor_parallel_size}",
|
||||||
)
|
)
|
||||||
|
logger.info(
|
||||||
|
f"Creating MoE with num_experts: {gpc.config.model.num_experts} | "
|
||||||
|
f"expert parallel size: {gpc.expert_parallel_size} | "
|
||||||
|
f"number of local experts: {gpc.config.model.num_experts//gpc.expert_parallel_size}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def launch_from_slurm(
|
def launch_from_slurm(
|
||||||
|
|
|
@ -69,11 +69,6 @@ class MoE(torch.nn.Module):
|
||||||
self.num_experts = num_experts
|
self.num_experts = num_experts
|
||||||
self.num_local_experts = num_experts // self.ep_size
|
self.num_local_experts = num_experts // self.ep_size
|
||||||
|
|
||||||
if gpc.is_rank_for_log():
|
|
||||||
logger.info( # pylint: disable=W1203
|
|
||||||
f"Creating MoE layer with num_experts: {num_experts} | num_local_experts:"
|
|
||||||
f"{self.num_local_experts} | expert_parallel_size: {self.ep_size}"
|
|
||||||
)
|
|
||||||
assert noisy_gate_policy is None or noisy_gate_policy in ["None", "Jitter", "RSample"], (
|
assert noisy_gate_policy is None or noisy_gate_policy in ["None", "Jitter", "RSample"], (
|
||||||
"Unsupported noisy_gate_policy: " + noisy_gate_policy
|
"Unsupported noisy_gate_policy: " + noisy_gate_policy
|
||||||
)
|
)
|
||||||
|
|
Loading…
Reference in New Issue