10import copy
11
12from torch.utils.data import Dataset, IterableDataset
13
14from labml_helpers.module import M, TypedModuleList17def clone_module_list(module: M, n: int) -> TypedModuleList[M]:23    return TypedModuleList([copy.deepcopy(module) for _ in range(n)])26def cycle_dataloader(data_loader):34    while True:
35        for batch in data_loader:
36            yield batchThis converts an IterableDataset
 to a map-style dataset so that we can shuffle the dataset.
This only works when the dataset size is small and can be held in memory.
39class MapStyleDataset(Dataset):52    def __init__(self, dataset: IterableDataset):Load the data to memory
54        self.data = [d for d in dataset]Get a sample by index
56    def __getitem__(self, idx: int):58        return self.data[idx]Create an iterator
60    def __iter__(self):62        return iter(self.data)Size of the dataset
64    def __len__(self):66        return len(self.data)