#

Evaluate GPT-NeoX using LLM.int8() quantization on test suite

This code evaluate GPT-NeoX using, on a suite of tasks.

13import torch
14from torch import nn
15
16from labml_nn.neox.evaluation import run_eval_harness
17from labml_nn.neox.model import LayerGenerator

#

20def main():

#

Device

22    device = torch.device('cuda:0')

#

Load layers

24    layers = list(LayerGenerator(is_clone_layers=True,
25                                 filter_layers=None,
26                                 dtype=torch.float16,
27                                 device=device
28                                 ).load())

#

Create nn.Sequential model

31    model = nn.Sequential(*layers)

#

Run evaluation harness

34    print(run_eval_harness(model, 'half_precision', ['lambada'], device))

#

38if __name__ == '__main__':
39    main()