mirror of https://github.com/InternLM/InternLM
feat(docs): fix demo log
commit
a09b4d7d00
|
@ -145,3 +145,4 @@ core.*
|
|||
llm_ckpts
|
||||
events.*
|
||||
memory_trace
|
||||
RUN*/
|
||||
|
|
|
@ -199,4 +199,4 @@
|
|||
2023-11-10T15:06:06.988+08:00 INFO [training_internlm.py, line 601, in record_current_batch_training_metrics] - pid=78690 : tflops=127.89743136367036 step=2 loss=10.111495971679688 tgs (tokens/gpu/second)=1031.72 tgs/last_tgs_1=1031.72 tgs/tgs_all=600.43 tgs/tgs_avg=767.12 tgs/tgs_SMA=600.43 tgs/last_tgs_10=0 tgs/last_tgs_50=0 lr=8.000000000000001e-07 loss_scale=65536.0 grad_norm={'0_default': 76.99318912653898, '1_fp32': 0.0} micro_num=4 num_consumed_tokens=196608 inf_nan_skip_batches=0 num_samples_in_batch=17 largest_length=2048 largest_batch=5 smallest_batch=3 adam_beta2=0.95 fwd_bwd_time=3.4 acc=0.0704 perplexity=25907.623 acc/en=0.0704 acc/cn=0.0 acc/code=0.0 tokens/en=60244 tokens/cn=0 tokens/code=0 loss_from_metric=10.1623 loss/en=10.1623 loss/cn=nan loss/code=nan
|
||||
2023-11-10T15:06:10.994+08:00 INFO [training_internlm.py, line 601, in record_current_batch_training_metrics] - pid=78690 : tflops=127.89845291183941 step=3 loss=8.848427772521973 tgs (tokens/gpu/second)=1031.73 tgs/last_tgs_1=1031.73 tgs/tgs_all=670.5 tgs/tgs_avg=833.27 tgs/tgs_SMA=670.5 tgs/last_tgs_10=0 tgs/last_tgs_50=0 lr=1.0000000000000002e-06 loss_scale=65536.0 grad_norm={'0_default': 60.47092413727133, '1_fp32': 0.0} micro_num=4 num_consumed_tokens=262144 inf_nan_skip_batches=0 num_samples_in_batch=17 largest_length=2048 largest_batch=5 smallest_batch=3 adam_beta2=0.95 fwd_bwd_time=3.41 acc=0.0783 perplexity=7380.229 acc/en=0.0783 acc/cn=0.0 acc/code=0.0 tokens/en=60328 tokens/cn=0 tokens/code=0 loss_from_metric=8.9066 loss/en=8.9066 loss/cn=nan loss/code=nan
|
||||
2023-11-10T15:06:15.041+08:00 INFO [training_internlm.py, line 601, in record_current_batch_training_metrics] - pid=78690 : tflops=126.55593705224216 step=4 loss=7.509810924530029 tgs (tokens/gpu/second)=1020.9 tgs/last_tgs_1=1020.9 tgs/tgs_all=719.92 tgs/tgs_avg=870.8 tgs/tgs_SMA=719.92 tgs/last_tgs_10=0 tgs/last_tgs_50=0 lr=1.2000000000000002e-06 loss_scale=65536.0 grad_norm={'0_default': 42.36608180721121, '1_fp32': 0.0} micro_num=4 num_consumed_tokens=327680 inf_nan_skip_batches=0 num_samples_in_batch=22 largest_length=1893 largest_batch=8 smallest_batch=4 adam_beta2=0.95 fwd_bwd_time=3.43 acc=0.0706 perplexity=2728.5764 acc/en=0.0706 acc/cn=0.0 acc/code=0.0 tokens/en=61028 tokens/cn=0 tokens/code=0 loss_from_metric=7.9115 loss/en=7.9115 loss/cn=nan loss/code=nan
|
||||
2023-11-10T15:06:19.051+08:00 INFO [training_internlm.py, line 601, in record_current_batch_training_metrics] - pid=78690 : tflops=127.79902453659938 step=5 loss=7.049621105194092 tgs (tokens/gpu/second)=1030.92 tgs/last_tgs_1=1030.93 tgs/tgs_all=758.03 tgs/tgs_avg=897.49 tgs/tgs_SMA=758.03 tgs/last_tgs_10=0 tgs/last_tgs_50=0 lr=1.4000000000000001e-06 loss_scale=65536.0 grad_norm={'0_default': 32.49298677335042, '1_fp32': 0.0} micro_num=4 num_consumed_tokens=393216 inf_nan_skip_batches=0 num_samples_in_batch=13 largest_length=2048 largest_batch=4 smallest_batch=3 adam_beta2=0.95 fwd_bwd_time=3.42 acc=0.0726 perplexity=1169.7916 acc/en=0.0726 acc/cn=0.0 acc/code=0.0 tokens/en=61004 tokens/cn=0 tokens/code=0 loss_from_metric=7.0646 loss/en=7.0646 loss/cn=nan loss/code=nan
|
||||
2023-11-10T15:06:19.051+08:00 INFO [training_internlm.py, line 601, in record_current_batch_training_metrics] - pid=78690 : tflops=127.79902453659938 step=5 loss=7.049621105194092 tgs (tokens/gpu/second)=1030.92 tgs/last_tgs_1=1030.93 tgs/tgs_all=758.03 tgs/tgs_avg=897.49 tgs/tgs_SMA=758.03 tgs/last_tgs_10=0 tgs/last_tgs_50=0 lr=1.4000000000000001e-06 loss_scale=65536.0 grad_norm={'0_default': 32.49298677335042, '1_fp32': 0.0} micro_num=4 num_consumed_tokens=393216 inf_nan_skip_batches=0 num_samples_in_batch=13 largest_length=2048 largest_batch=4 smallest_batch=3 adam_beta2=0.95 fwd_bwd_time=3.42 acc=0.0726 perplexity=1169.7916 acc/en=0.0726 acc/cn=0.0 acc/code=0.0 tokens/en=61004 tokens/cn=0 tokens/code=0 loss_from_metric=7.0646 loss/en=7.0646 loss/cn=nan loss/code=nan
|
||||
|
|
|
@ -226,6 +226,7 @@ def get_train_data_loader(
|
|||
dataset_types = list(get_dataset_type_ids_map(train_folder).keys())
|
||||
|
||||
if not train_folder:
|
||||
dataset_types = ["en", "cn", "code"]
|
||||
train_ds = RandomDataset(num_samples=1000000, max_len=data_cfg.seq_len)
|
||||
if data_cfg.pack_sample_into_one:
|
||||
train_ds = PackedDatasetWithoutCuSeqlen(
|
||||
|
|
Loading…
Reference in New Issue