[bug] workaround for idx fix

pull/5749/head
hxwang 6 months ago
parent e0dde8fda5
commit 936dd96dbb

@ -57,6 +57,7 @@ class GeminiManager:
self._comp_cuda_demand_time = 0 self._comp_cuda_demand_time = 0
def reset_attributes(self): def reset_attributes(self):
assert self._compute_idx + 1 == len(self._compute_list)
self._compute_idx = -1 self._compute_idx = -1
self._h2d_volume = 0 self._h2d_volume = 0
self._d2h_volume = 0 self._d2h_volume = 0

@ -145,6 +145,8 @@ class AutoPlacementPolicy(PlacementPolicy):
self._warmup_non_model_data_ratio = warmup_non_model_data_ratio self._warmup_non_model_data_ratio = warmup_non_model_data_ratio
self._steady_cuda_cap_ratio = steady_cuda_cap_ratio self._steady_cuda_cap_ratio = steady_cuda_cap_ratio
self.__avail_cuda_model_data_for_prefetch = None
def evict_tensors( def evict_tensors(
self, self,
can_evict_chunks: List[Chunk], can_evict_chunks: List[Chunk],
@ -204,6 +206,7 @@ class AutoPlacementPolicy(PlacementPolicy):
f"Adjust layout failed! No enough CUDA memory! " f"Adjust layout failed! No enough CUDA memory! "
f"Need {to_free_cuda_model_data}, freed {freed_cuda_model_data}" f"Need {to_free_cuda_model_data}, freed {freed_cuda_model_data}"
) )
self.__avail_cuda_model_data_for_prefetch = avail_cuda_model_data - freed_cuda_model_data
return freed_cuda_model_data, time() - start return freed_cuda_model_data, time() - start
@staticmethod @staticmethod
@ -234,14 +237,9 @@ class AutoPlacementPolicy(PlacementPolicy):
) -> List[Chunk]: ) -> List[Chunk]:
if is_warmup: # no prefetch during warmup since we need compute_list if is_warmup: # no prefetch during warmup since we need compute_list
return [] return []
# modified from self.evict_tensors
cuda_capacity = self._steady_cuda_cap_ratio * colo_device_memory_capacity( avail_cuda_model_data = self.__avail_cuda_model_data_for_prefetch
get_accelerator().get_current_device() self.__avail_cuda_model_data_for_prefetch = None # incase of double use
)
max_cuda_non_model_data_per_period = self.mem_stats_collector.next_period_non_model_data_usage("cuda")
used_cuda_model_data = self.chunk_manager.total_mem["cuda"]
total_cuda_model_data = cuda_capacity - max_cuda_non_model_data_per_period
avail_cuda_model_data = total_cuda_model_data - used_cuda_model_data
prefetch_chunk_memory = 0 prefetch_chunk_memory = 0
can_prefetch = self.max_prefetch - len(async_works) can_prefetch = self.max_prefetch - len(async_works)

Loading…
Cancel
Save