mirror of https://github.com/hpcaitech/ColossalAI
aibig-modeldata-parallelismdeep-learningdistributed-computingfoundation-modelsheterogeneous-traininghpcinferencelarge-scalemodel-parallelismpipeline-parallelism
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
20 lines
884 B
20 lines
884 B
1 year ago
|
def print_perf_stats(latency_set, config, bs, warmup=3):
|
||
|
# trim warmup queries
|
||
|
latency_set = list(latency_set)
|
||
|
latency_set = latency_set[warmup:]
|
||
|
count = len(latency_set)
|
||
|
|
||
|
if count > 0:
|
||
|
latency_set.sort()
|
||
|
avg = sum(latency_set) / count
|
||
|
num_layers = (
|
||
|
getattr(config, "num_layers") if hasattr(config, "num_layers") else getattr(config, "num_hidden_layers")
|
||
|
)
|
||
|
num_parameters = num_layers * config.hidden_size * config.hidden_size * 12
|
||
|
num_bytes = 2 # float16
|
||
|
|
||
|
print("Avg Per Token Latency: {0:8.2f} ms".format(avg * 1000))
|
||
|
print("Avg BW: {0:8.2f} GB/s".format(1 / avg * num_parameters * num_bytes / 1e9))
|
||
|
print("Avg flops: {0:8.2f} TFlops/s".format(1 / avg * num_parameters * num_bytes * bs / 1e12))
|
||
|
print("Avg Throughput: tokens/s: {}".format((1000 / (avg * 1000)) * bs))
|