16 KiB
16 KiB
1 | module | num_params | num_bytes | num_flops |
---|---|---|---|---|
2 | model.embed_tokens | 98500608 | 197001216 | 1 |
3 | model.embed_dropout | 0 | 0 | 0 |
4 | model.layers.0.self_attn.o_proj | 9437184 | 18874368 | 18874368 |
5 | model.layers.0.self_attn.qkv_proj | 28311552 | 56623104 | 56623104 |
6 | model.layers.0.self_attn.rotary_emb | 0 | 0 | 0 |
7 | model.layers.0.mlp.gate_up_proj | 50331648 | 100663296 | 100663296 |
8 | model.layers.0.mlp.down_proj | 25165824 | 50331648 | 50331648 |
9 | model.layers.0.mlp.activation_fn | 0 | 0 | 18432 |
10 | model.layers.0.input_layernorm | 3072 | 6144 | 12288 |
11 | model.layers.0.resid_attn_dropout | 0 | 0 | 0 |
12 | model.layers.0.resid_mlp_dropout | 0 | 0 | 0 |
13 | model.layers.0.post_attention_layernorm | 3072 | 6144 | 12288 |
14 | model.layers.1.self_attn.o_proj | 9437184 | 18874368 | 18874368 |
15 | model.layers.1.self_attn.qkv_proj | 28311552 | 56623104 | 56623104 |
16 | model.layers.1.self_attn.rotary_emb | 0 | 0 | 0 |
17 | model.layers.1.mlp.gate_up_proj | 50331648 | 100663296 | 100663296 |
18 | model.layers.1.mlp.down_proj | 25165824 | 50331648 | 50331648 |
19 | model.layers.1.mlp.activation_fn | 0 | 0 | 18432 |
20 | model.layers.1.input_layernorm | 3072 | 6144 | 12288 |
21 | model.layers.1.resid_attn_dropout | 0 | 0 | 0 |
22 | model.layers.1.resid_mlp_dropout | 0 | 0 | 0 |
23 | model.layers.1.post_attention_layernorm | 3072 | 6144 | 12288 |
24 | model.layers.2.self_attn.o_proj | 9437184 | 18874368 | 18874368 |
25 | model.layers.2.self_attn.qkv_proj | 28311552 | 56623104 | 56623104 |
26 | model.layers.2.self_attn.rotary_emb | 0 | 0 | 0 |
27 | model.layers.2.mlp.gate_up_proj | 50331648 | 100663296 | 100663296 |
28 | model.layers.2.mlp.down_proj | 25165824 | 50331648 | 50331648 |
29 | model.layers.2.mlp.activation_fn | 0 | 0 | 18432 |
30 | model.layers.2.input_layernorm | 3072 | 6144 | 12288 |
31 | model.layers.2.resid_attn_dropout | 0 | 0 | 0 |
32 | model.layers.2.resid_mlp_dropout | 0 | 0 | 0 |
33 | model.layers.2.post_attention_layernorm | 3072 | 6144 | 12288 |
34 | model.layers.3.self_attn.o_proj | 9437184 | 18874368 | 18874368 |
35 | model.layers.3.self_attn.qkv_proj | 28311552 | 56623104 | 56623104 |
36 | model.layers.3.self_attn.rotary_emb | 0 | 0 | 0 |
37 | model.layers.3.mlp.gate_up_proj | 50331648 | 100663296 | 100663296 |
38 | model.layers.3.mlp.down_proj | 25165824 | 50331648 | 50331648 |
39 | model.layers.3.mlp.activation_fn | 0 | 0 | 18432 |
40 | model.layers.3.input_layernorm | 3072 | 6144 | 12288 |
41 | model.layers.3.resid_attn_dropout | 0 | 0 | 0 |
42 | model.layers.3.resid_mlp_dropout | 0 | 0 | 0 |
43 | model.layers.3.post_attention_layernorm | 3072 | 6144 | 12288 |
44 | model.layers.4.self_attn.o_proj | 9437184 | 18874368 | 18874368 |
45 | model.layers.4.self_attn.qkv_proj | 28311552 | 56623104 | 56623104 |
46 | model.layers.4.self_attn.rotary_emb | 0 | 0 | 0 |
47 | model.layers.4.mlp.gate_up_proj | 50331648 | 100663296 | 100663296 |
48 | model.layers.4.mlp.down_proj | 25165824 | 50331648 | 50331648 |
49 | model.layers.4.mlp.activation_fn | 0 | 0 | 18432 |
50 | model.layers.4.input_layernorm | 3072 | 6144 | 12288 |
51 | model.layers.4.resid_attn_dropout | 0 | 0 | 0 |
52 | model.layers.4.resid_mlp_dropout | 0 | 0 | 0 |
53 | model.layers.4.post_attention_layernorm | 3072 | 6144 | 12288 |
54 | model.layers.5.self_attn.o_proj | 9437184 | 18874368 | 18874368 |
55 | model.layers.5.self_attn.qkv_proj | 28311552 | 56623104 | 56623104 |
56 | model.layers.5.self_attn.rotary_emb | 0 | 0 | 0 |
57 | model.layers.5.mlp.gate_up_proj | 50331648 | 100663296 | 100663296 |
58 | model.layers.5.mlp.down_proj | 25165824 | 50331648 | 50331648 |
59 | model.layers.5.mlp.activation_fn | 0 | 0 | 18432 |
60 | model.layers.5.input_layernorm | 3072 | 6144 | 12288 |
61 | model.layers.5.resid_attn_dropout | 0 | 0 | 0 |
62 | model.layers.5.resid_mlp_dropout | 0 | 0 | 0 |
63 | model.layers.5.post_attention_layernorm | 3072 | 6144 | 12288 |
64 | model.layers.6.self_attn.o_proj | 9437184 | 18874368 | 18874368 |
65 | model.layers.6.self_attn.qkv_proj | 28311552 | 56623104 | 56623104 |
66 | model.layers.6.self_attn.rotary_emb | 0 | 0 | 0 |
67 | model.layers.6.mlp.gate_up_proj | 50331648 | 100663296 | 100663296 |
68 | model.layers.6.mlp.down_proj | 25165824 | 50331648 | 50331648 |
69 | model.layers.6.mlp.activation_fn | 0 | 0 | 18432 |
70 | model.layers.6.input_layernorm | 3072 | 6144 | 12288 |
71 | model.layers.6.resid_attn_dropout | 0 | 0 | 0 |
72 | model.layers.6.resid_mlp_dropout | 0 | 0 | 0 |
73 | model.layers.6.post_attention_layernorm | 3072 | 6144 | 12288 |
74 | model.layers.7.self_attn.o_proj | 9437184 | 18874368 | 18874368 |
75 | model.layers.7.self_attn.qkv_proj | 28311552 | 56623104 | 56623104 |
76 | model.layers.7.self_attn.rotary_emb | 0 | 0 | 0 |
77 | model.layers.7.mlp.gate_up_proj | 50331648 | 100663296 | 100663296 |
78 | model.layers.7.mlp.down_proj | 25165824 | 50331648 | 50331648 |
79 | model.layers.7.mlp.activation_fn | 0 | 0 | 18432 |
80 | model.layers.7.input_layernorm | 3072 | 6144 | 12288 |
81 | model.layers.7.resid_attn_dropout | 0 | 0 | 0 |
82 | model.layers.7.resid_mlp_dropout | 0 | 0 | 0 |
83 | model.layers.7.post_attention_layernorm | 3072 | 6144 | 12288 |
84 | model.layers.8.self_attn.o_proj | 9437184 | 18874368 | 18874368 |
85 | model.layers.8.self_attn.qkv_proj | 28311552 | 56623104 | 56623104 |
86 | model.layers.8.self_attn.rotary_emb | 0 | 0 | 0 |
87 | model.layers.8.mlp.gate_up_proj | 50331648 | 100663296 | 100663296 |
88 | model.layers.8.mlp.down_proj | 25165824 | 50331648 | 50331648 |
89 | model.layers.8.mlp.activation_fn | 0 | 0 | 18432 |
90 | model.layers.8.input_layernorm | 3072 | 6144 | 12288 |
91 | model.layers.8.resid_attn_dropout | 0 | 0 | 0 |
92 | model.layers.8.resid_mlp_dropout | 0 | 0 | 0 |
93 | model.layers.8.post_attention_layernorm | 3072 | 6144 | 12288 |
94 | model.layers.9.self_attn.o_proj | 9437184 | 18874368 | 18874368 |
95 | model.layers.9.self_attn.qkv_proj | 28311552 | 56623104 | 56623104 |
96 | model.layers.9.self_attn.rotary_emb | 0 | 0 | 0 |
97 | model.layers.9.mlp.gate_up_proj | 50331648 | 100663296 | 100663296 |
98 | model.layers.9.mlp.down_proj | 25165824 | 50331648 | 50331648 |
99 | model.layers.9.mlp.activation_fn | 0 | 0 | 18432 |
100 | model.layers.9.input_layernorm | 3072 | 6144 | 12288 |
101 | model.layers.9.resid_attn_dropout | 0 | 0 | 0 |
102 | model.layers.9.resid_mlp_dropout | 0 | 0 | 0 |
103 | model.layers.9.post_attention_layernorm | 3072 | 6144 | 12288 |
104 | model.layers.10.self_attn.o_proj | 9437184 | 18874368 | 18874368 |
105 | model.layers.10.self_attn.qkv_proj | 28311552 | 56623104 | 56623104 |
106 | model.layers.10.self_attn.rotary_emb | 0 | 0 | 0 |
107 | model.layers.10.mlp.gate_up_proj | 50331648 | 100663296 | 100663296 |
108 | model.layers.10.mlp.down_proj | 25165824 | 50331648 | 50331648 |
109 | model.layers.10.mlp.activation_fn | 0 | 0 | 18432 |
110 | model.layers.10.input_layernorm | 3072 | 6144 | 12288 |
111 | model.layers.10.resid_attn_dropout | 0 | 0 | 0 |
112 | model.layers.10.resid_mlp_dropout | 0 | 0 | 0 |
113 | model.layers.10.post_attention_layernorm | 3072 | 6144 | 12288 |
114 | model.layers.11.self_attn.o_proj | 9437184 | 18874368 | 18874368 |
115 | model.layers.11.self_attn.qkv_proj | 28311552 | 56623104 | 56623104 |
116 | model.layers.11.self_attn.rotary_emb | 0 | 0 | 0 |
117 | model.layers.11.mlp.gate_up_proj | 50331648 | 100663296 | 100663296 |
118 | model.layers.11.mlp.down_proj | 25165824 | 50331648 | 50331648 |
119 | model.layers.11.mlp.activation_fn | 0 | 0 | 18432 |
120 | model.layers.11.input_layernorm | 3072 | 6144 | 12288 |
121 | model.layers.11.resid_attn_dropout | 0 | 0 | 0 |
122 | model.layers.11.resid_mlp_dropout | 0 | 0 | 0 |
123 | model.layers.11.post_attention_layernorm | 3072 | 6144 | 12288 |
124 | model.layers.12.self_attn.o_proj | 9437184 | 18874368 | 18874368 |
125 | model.layers.12.self_attn.qkv_proj | 28311552 | 56623104 | 56623104 |
126 | model.layers.12.self_attn.rotary_emb | 0 | 0 | 0 |
127 | model.layers.12.mlp.gate_up_proj | 50331648 | 100663296 | 100663296 |
128 | model.layers.12.mlp.down_proj | 25165824 | 50331648 | 50331648 |
129 | model.layers.12.mlp.activation_fn | 0 | 0 | 18432 |
130 | model.layers.12.input_layernorm | 3072 | 6144 | 12288 |
131 | model.layers.12.resid_attn_dropout | 0 | 0 | 0 |
132 | model.layers.12.resid_mlp_dropout | 0 | 0 | 0 |
133 | model.layers.12.post_attention_layernorm | 3072 | 6144 | 12288 |
134 | model.layers.13.self_attn.o_proj | 9437184 | 18874368 | 18874368 |
135 | model.layers.13.self_attn.qkv_proj | 28311552 | 56623104 | 56623104 |
136 | model.layers.13.self_attn.rotary_emb | 0 | 0 | 0 |
137 | model.layers.13.mlp.gate_up_proj | 50331648 | 100663296 | 100663296 |
138 | model.layers.13.mlp.down_proj | 25165824 | 50331648 | 50331648 |
139 | model.layers.13.mlp.activation_fn | 0 | 0 | 18432 |
140 | model.layers.13.input_layernorm | 3072 | 6144 | 12288 |
141 | model.layers.13.resid_attn_dropout | 0 | 0 | 0 |
142 | model.layers.13.resid_mlp_dropout | 0 | 0 | 0 |
143 | model.layers.13.post_attention_layernorm | 3072 | 6144 | 12288 |
144 | model.layers.14.self_attn.o_proj | 9437184 | 18874368 | 18874368 |
145 | model.layers.14.self_attn.qkv_proj | 28311552 | 56623104 | 56623104 |
146 | model.layers.14.self_attn.rotary_emb | 0 | 0 | 0 |
147 | model.layers.14.mlp.gate_up_proj | 50331648 | 100663296 | 100663296 |
148 | model.layers.14.mlp.down_proj | 25165824 | 50331648 | 50331648 |
149 | model.layers.14.mlp.activation_fn | 0 | 0 | 18432 |
150 | model.layers.14.input_layernorm | 3072 | 6144 | 12288 |
151 | model.layers.14.resid_attn_dropout | 0 | 0 | 0 |
152 | model.layers.14.resid_mlp_dropout | 0 | 0 | 0 |
153 | model.layers.14.post_attention_layernorm | 3072 | 6144 | 12288 |
154 | model.layers.15.self_attn.o_proj | 9437184 | 18874368 | 18874368 |
155 | model.layers.15.self_attn.qkv_proj | 28311552 | 56623104 | 56623104 |
156 | model.layers.15.self_attn.rotary_emb | 0 | 0 | 0 |
157 | model.layers.15.mlp.gate_up_proj | 50331648 | 100663296 | 100663296 |
158 | model.layers.15.mlp.down_proj | 25165824 | 50331648 | 50331648 |
159 | model.layers.15.mlp.activation_fn | 0 | 0 | 18432 |
160 | model.layers.15.input_layernorm | 3072 | 6144 | 12288 |
161 | model.layers.15.resid_attn_dropout | 0 | 0 | 0 |
162 | model.layers.15.resid_mlp_dropout | 0 | 0 | 0 |
163 | model.layers.15.post_attention_layernorm | 3072 | 6144 | 12288 |
164 | model.layers.16.self_attn.o_proj | 9437184 | 18874368 | 18874368 |
165 | model.layers.16.self_attn.qkv_proj | 28311552 | 56623104 | 56623104 |
166 | model.layers.16.self_attn.rotary_emb | 0 | 0 | 0 |
167 | model.layers.16.mlp.gate_up_proj | 50331648 | 100663296 | 100663296 |
168 | model.layers.16.mlp.down_proj | 25165824 | 50331648 | 50331648 |
169 | model.layers.16.mlp.activation_fn | 0 | 0 | 18432 |
170 | model.layers.16.input_layernorm | 3072 | 6144 | 12288 |
171 | model.layers.16.resid_attn_dropout | 0 | 0 | 0 |
172 | model.layers.16.resid_mlp_dropout | 0 | 0 | 0 |
173 | model.layers.16.post_attention_layernorm | 3072 | 6144 | 12288 |
174 | model.layers.17.self_attn.o_proj | 9437184 | 18874368 | 18874368 |
175 | model.layers.17.self_attn.qkv_proj | 28311552 | 56623104 | 56623104 |
176 | model.layers.17.self_attn.rotary_emb | 0 | 0 | 0 |
177 | model.layers.17.mlp.gate_up_proj | 50331648 | 100663296 | 100663296 |
178 | model.layers.17.mlp.down_proj | 25165824 | 50331648 | 50331648 |
179 | model.layers.17.mlp.activation_fn | 0 | 0 | 18432 |
180 | model.layers.17.input_layernorm | 3072 | 6144 | 12288 |
181 | model.layers.17.resid_attn_dropout | 0 | 0 | 0 |
182 | model.layers.17.resid_mlp_dropout | 0 | 0 | 0 |
183 | model.layers.17.post_attention_layernorm | 3072 | 6144 | 12288 |
184 | model.layers.18.self_attn.o_proj | 9437184 | 18874368 | 18874368 |
185 | model.layers.18.self_attn.qkv_proj | 28311552 | 56623104 | 56623104 |
186 | model.layers.18.self_attn.rotary_emb | 0 | 0 | 0 |
187 | model.layers.18.mlp.gate_up_proj | 50331648 | 100663296 | 100663296 |
188 | model.layers.18.mlp.down_proj | 25165824 | 50331648 | 50331648 |
189 | model.layers.18.mlp.activation_fn | 0 | 0 | 18432 |
190 | model.layers.18.input_layernorm | 3072 | 6144 | 12288 |
191 | model.layers.18.resid_attn_dropout | 0 | 0 | 0 |
192 | model.layers.18.resid_mlp_dropout | 0 | 0 | 0 |
193 | model.layers.18.post_attention_layernorm | 3072 | 6144 | 12288 |
194 | model.layers.19.self_attn.o_proj | 9437184 | 18874368 | 18874368 |
195 | model.layers.19.self_attn.qkv_proj | 28311552 | 56623104 | 56623104 |
196 | model.layers.19.self_attn.rotary_emb | 0 | 0 | 0 |
197 | model.layers.19.mlp.gate_up_proj | 50331648 | 100663296 | 100663296 |
198 | model.layers.19.mlp.down_proj | 25165824 | 50331648 | 50331648 |
199 | model.layers.19.mlp.activation_fn | 0 | 0 | 18432 |
200 | model.layers.19.input_layernorm | 3072 | 6144 | 12288 |
201 | model.layers.19.resid_attn_dropout | 0 | 0 | 0 |
202 | model.layers.19.resid_mlp_dropout | 0 | 0 | 0 |
203 | model.layers.19.post_attention_layernorm | 3072 | 6144 | 12288 |
204 | model.layers.20.self_attn.o_proj | 9437184 | 18874368 | 18874368 |
205 | model.layers.20.self_attn.qkv_proj | 28311552 | 56623104 | 56623104 |
206 | model.layers.20.self_attn.rotary_emb | 0 | 0 | 0 |
207 | model.layers.20.mlp.gate_up_proj | 50331648 | 100663296 | 100663296 |
208 | model.layers.20.mlp.down_proj | 25165824 | 50331648 | 50331648 |
209 | model.layers.20.mlp.activation_fn | 0 | 0 | 18432 |
210 | model.layers.20.input_layernorm | 3072 | 6144 | 12288 |
211 | model.layers.20.resid_attn_dropout | 0 | 0 | 0 |
212 | model.layers.20.resid_mlp_dropout | 0 | 0 | 0 |
213 | model.layers.20.post_attention_layernorm | 3072 | 6144 | 12288 |
214 | model.layers.21.self_attn.o_proj | 9437184 | 18874368 | 18874368 |
215 | model.layers.21.self_attn.qkv_proj | 28311552 | 56623104 | 56623104 |
216 | model.layers.21.self_attn.rotary_emb | 0 | 0 | 0 |
217 | model.layers.21.mlp.gate_up_proj | 50331648 | 100663296 | 100663296 |
218 | model.layers.21.mlp.down_proj | 25165824 | 50331648 | 50331648 |
219 | model.layers.21.mlp.activation_fn | 0 | 0 | 18432 |
220 | model.layers.21.input_layernorm | 3072 | 6144 | 12288 |
221 | model.layers.21.resid_attn_dropout | 0 | 0 | 0 |
222 | model.layers.21.resid_mlp_dropout | 0 | 0 | 0 |
223 | model.layers.21.post_attention_layernorm | 3072 | 6144 | 12288 |
224 | model.layers.22.self_attn.o_proj | 9437184 | 18874368 | 18874368 |
225 | model.layers.22.self_attn.qkv_proj | 28311552 | 56623104 | 56623104 |
226 | model.layers.22.self_attn.rotary_emb | 0 | 0 | 0 |
227 | model.layers.22.mlp.gate_up_proj | 50331648 | 100663296 | 100663296 |
228 | model.layers.22.mlp.down_proj | 25165824 | 50331648 | 50331648 |
229 | model.layers.22.mlp.activation_fn | 0 | 0 | 18432 |
230 | model.layers.22.input_layernorm | 3072 | 6144 | 12288 |
231 | model.layers.22.resid_attn_dropout | 0 | 0 | 0 |
232 | model.layers.22.resid_mlp_dropout | 0 | 0 | 0 |
233 | model.layers.22.post_attention_layernorm | 3072 | 6144 | 12288 |
234 | model.layers.23.self_attn.o_proj | 9437184 | 18874368 | 18874368 |
235 | model.layers.23.self_attn.qkv_proj | 28311552 | 56623104 | 56623104 |
236 | model.layers.23.self_attn.rotary_emb | 0 | 0 | 0 |
237 | model.layers.23.mlp.gate_up_proj | 50331648 | 100663296 | 100663296 |
238 | model.layers.23.mlp.down_proj | 25165824 | 50331648 | 50331648 |
239 | model.layers.23.mlp.activation_fn | 0 | 0 | 18432 |
240 | model.layers.23.input_layernorm | 3072 | 6144 | 12288 |
241 | model.layers.23.resid_attn_dropout | 0 | 0 | 0 |
242 | model.layers.23.resid_mlp_dropout | 0 | 0 | 0 |
243 | model.layers.23.post_attention_layernorm | 3072 | 6144 | 12288 |
244 | model.layers.24.self_attn.o_proj | 9437184 | 18874368 | 18874368 |
245 | model.layers.24.self_attn.qkv_proj | 28311552 | 56623104 | 56623104 |
246 | model.layers.24.self_attn.rotary_emb | 0 | 0 | 0 |
247 | model.layers.24.mlp.gate_up_proj | 50331648 | 100663296 | 100663296 |
248 | model.layers.24.mlp.down_proj | 25165824 | 50331648 | 50331648 |
249 | model.layers.24.mlp.activation_fn | 0 | 0 | 18432 |
250 | model.layers.24.input_layernorm | 3072 | 6144 | 12288 |
251 | model.layers.24.resid_attn_dropout | 0 | 0 | 0 |
252 | model.layers.24.resid_mlp_dropout | 0 | 0 | 0 |
253 | model.layers.24.post_attention_layernorm | 3072 | 6144 | 12288 |
254 | model.layers.25.self_attn.o_proj | 9437184 | 18874368 | 18874368 |
255 | model.layers.25.self_attn.qkv_proj | 28311552 | 56623104 | 56623104 |
256 | model.layers.25.self_attn.rotary_emb | 0 | 0 | 0 |
257 | model.layers.25.mlp.gate_up_proj | 50331648 | 100663296 | 100663296 |
258 | model.layers.25.mlp.down_proj | 25165824 | 50331648 | 50331648 |
259 | model.layers.25.mlp.activation_fn | 0 | 0 | 18432 |
260 | model.layers.25.input_layernorm | 3072 | 6144 | 12288 |
261 | model.layers.25.resid_attn_dropout | 0 | 0 | 0 |
262 | model.layers.25.resid_mlp_dropout | 0 | 0 | 0 |
263 | model.layers.25.post_attention_layernorm | 3072 | 6144 | 12288 |
264 | model.layers.26.self_attn.o_proj | 9437184 | 18874368 | 18874368 |
265 | model.layers.26.self_attn.qkv_proj | 28311552 | 56623104 | 56623104 |
266 | model.layers.26.self_attn.rotary_emb | 0 | 0 | 0 |
267 | model.layers.26.mlp.gate_up_proj | 50331648 | 100663296 | 100663296 |
268 | model.layers.26.mlp.down_proj | 25165824 | 50331648 | 50331648 |
269 | model.layers.26.mlp.activation_fn | 0 | 0 | 18432 |
270 | model.layers.26.input_layernorm | 3072 | 6144 | 12288 |
271 | model.layers.26.resid_attn_dropout | 0 | 0 | 0 |
272 | model.layers.26.resid_mlp_dropout | 0 | 0 | 0 |
273 | model.layers.26.post_attention_layernorm | 3072 | 6144 | 12288 |
274 | model.layers.27.self_attn.o_proj | 9437184 | 18874368 | 18874368 |
275 | model.layers.27.self_attn.qkv_proj | 28311552 | 56623104 | 56623104 |
276 | model.layers.27.self_attn.rotary_emb | 0 | 0 | 0 |
277 | model.layers.27.mlp.gate_up_proj | 50331648 | 100663296 | 100663296 |
278 | model.layers.27.mlp.down_proj | 25165824 | 50331648 | 50331648 |
279 | model.layers.27.mlp.activation_fn | 0 | 0 | 18432 |
280 | model.layers.27.input_layernorm | 3072 | 6144 | 12288 |
281 | model.layers.27.resid_attn_dropout | 0 | 0 | 0 |
282 | model.layers.27.resid_mlp_dropout | 0 | 0 | 0 |
283 | model.layers.27.post_attention_layernorm | 3072 | 6144 | 12288 |
284 | model.layers.28.self_attn.o_proj | 9437184 | 18874368 | 18874368 |
285 | model.layers.28.self_attn.qkv_proj | 28311552 | 56623104 | 56623104 |
286 | model.layers.28.self_attn.rotary_emb | 0 | 0 | 0 |
287 | model.layers.28.mlp.gate_up_proj | 50331648 | 100663296 | 100663296 |
288 | model.layers.28.mlp.down_proj | 25165824 | 50331648 | 50331648 |
289 | model.layers.28.mlp.activation_fn | 0 | 0 | 18432 |
290 | model.layers.28.input_layernorm | 3072 | 6144 | 12288 |
291 | model.layers.28.resid_attn_dropout | 0 | 0 | 0 |
292 | model.layers.28.resid_mlp_dropout | 0 | 0 | 0 |
293 | model.layers.28.post_attention_layernorm | 3072 | 6144 | 12288 |
294 | model.layers.29.self_attn.o_proj | 9437184 | 18874368 | 18874368 |
295 | model.layers.29.self_attn.qkv_proj | 28311552 | 56623104 | 56623104 |
296 | model.layers.29.self_attn.rotary_emb | 0 | 0 | 0 |
297 | model.layers.29.mlp.gate_up_proj | 50331648 | 100663296 | 100663296 |
298 | model.layers.29.mlp.down_proj | 25165824 | 50331648 | 50331648 |
299 | model.layers.29.mlp.activation_fn | 0 | 0 | 18432 |
300 | model.layers.29.input_layernorm | 3072 | 6144 | 12288 |
301 | model.layers.29.resid_attn_dropout | 0 | 0 | 0 |
302 | model.layers.29.resid_mlp_dropout | 0 | 0 | 0 |
303 | model.layers.29.post_attention_layernorm | 3072 | 6144 | 12288 |
304 | model.layers.30.self_attn.o_proj | 9437184 | 18874368 | 18874368 |
305 | model.layers.30.self_attn.qkv_proj | 28311552 | 56623104 | 56623104 |
306 | model.layers.30.self_attn.rotary_emb | 0 | 0 | 0 |
307 | model.layers.30.mlp.gate_up_proj | 50331648 | 100663296 | 100663296 |
308 | model.layers.30.mlp.down_proj | 25165824 | 50331648 | 50331648 |
309 | model.layers.30.mlp.activation_fn | 0 | 0 | 18432 |
310 | model.layers.30.input_layernorm | 3072 | 6144 | 12288 |
311 | model.layers.30.resid_attn_dropout | 0 | 0 | 0 |
312 | model.layers.30.resid_mlp_dropout | 0 | 0 | 0 |
313 | model.layers.30.post_attention_layernorm | 3072 | 6144 | 12288 |
314 | model.layers.31.self_attn.o_proj | 9437184 | 18874368 | 18874368 |
315 | model.layers.31.self_attn.qkv_proj | 28311552 | 56623104 | 56623104 |
316 | model.layers.31.self_attn.rotary_emb | 0 | 0 | 0 |
317 | model.layers.31.mlp.gate_up_proj | 50331648 | 100663296 | 100663296 |
318 | model.layers.31.mlp.down_proj | 25165824 | 50331648 | 50331648 |
319 | model.layers.31.mlp.activation_fn | 0 | 0 | 18432 |
320 | model.layers.31.input_layernorm | 3072 | 6144 | 12288 |
321 | model.layers.31.resid_attn_dropout | 0 | 0 | 0 |
322 | model.layers.31.resid_mlp_dropout | 0 | 0 | 0 |
323 | model.layers.31.post_attention_layernorm | 3072 | 6144 | 12288 |
324 | model.norm | 3072 | 6144 | 12288 |
325 | lm_head | 98500608 | 197001216 | 197001216 |