diff --git a/graph_net/torch/dim_gen_passes/naive_call_method_expand_pass.py b/graph_net/torch/dim_gen_passes/naive_call_method_expand_pass.py index 99fb7f552..94261d256 100644 --- a/graph_net/torch/dim_gen_passes/naive_call_method_expand_pass.py +++ b/graph_net/torch/dim_gen_passes/naive_call_method_expand_pass.py @@ -13,10 +13,7 @@ def get_pass_name(cls) -> bool: def need_rewrite(self, traced_module: fx.GraphModule) -> bool: if 0 not in self.axes: return False - for node in traced_module.graph.nodes: - if node.op == "call_method" and node.target == "expand": - return True - return False + return any(self._node_need_rewrite(node) for node in traced_module.graph.nodes) def _node_need_rewrite(self, node) -> bool: if not (node.op == "call_method"): @@ -91,6 +88,10 @@ def rewrite(self, traced_module: fx.GraphModule) -> fx.GraphModule: else: # Do nothing. pass + elif axis_idx == 0 and input_dim_size == 1 and target_dim > 1: + matched_axis = 0 + else: + pass if matched_axis != -1: # Found a matching dynamic axis (matched_axis), replace it with a size() call diff --git a/samples/transformers-auto-model/mmarco-mMiniLMv2-L12-H384-v1/graph_net.json b/samples/transformers-auto-model/mmarco-mMiniLMv2-L12-H384-v1/graph_net.json index 4052bd42a..2f23d7491 100644 --- a/samples/transformers-auto-model/mmarco-mMiniLMv2-L12-H384-v1/graph_net.json +++ b/samples/transformers-auto-model/mmarco-mMiniLMv2-L12-H384-v1/graph_net.json @@ -1 +1,54 @@ -{"framework": "torch", "num_devices_required": 1, "num_nodes_required": 1, "dynamic": false, "model_name": "cross-encoder/mmarco-mMiniLMv2-L12-H384-v1", "source": "huggingface_hub", "original_tag": ["sentence-transformers", "pytorch", "onnx", "safetensors", "openvino", "xlm-roberta", "text-classification", "transformers", "text-ranking", "en", "ar", "zh", "nl", "fr", "de", "hi", "in", "it", "ja", "pt", "ru", "es", "vi", "multilingual", "dataset:unicamp-dl/mmarco", "base_model:nreimers/mMiniLMv2-L12-H384-distilled-from-XLMR-Large", "base_model:quantized:nreimers/mMiniLMv2-L12-H384-distilled-from-XLMR-Large", "license:apache-2.0", "region:us"], "heuristic_tag": "nlp", "dimension_generalization_passes": ["naive_call_method_view_pass", "tuple_arg_call_method_view_pass", "naive_call_method_reshape_pass", "naive_call_method_expand_pass", "non_batch_call_method_expand_pass", "non_batch_call_function_arange_pass", "non_batch_call_function_getitem_slice_pass", "non_batch_call_function_full_pass", "non_batch_call_function_full_plus_one_pass", "non_batch_call_function_zeros_pass", "non_batch_call_function_arange_plus_one_pass"]} \ No newline at end of file +{ + "framework": "torch", + "num_devices_required": 1, + "num_nodes_required": 1, + "dynamic": false, + "model_name": "cross-encoder/mmarco-mMiniLMv2-L12-H384-v1", + "source": "huggingface_hub", + "original_tag": [ + "sentence-transformers", + "pytorch", + "onnx", + "safetensors", + "openvino", + "xlm-roberta", + "text-classification", + "transformers", + "text-ranking", + "en", + "ar", + "zh", + "nl", + "fr", + "de", + "hi", + "in", + "it", + "ja", + "pt", + "ru", + "es", + "vi", + "multilingual", + "dataset:unicamp-dl/mmarco", + "base_model:nreimers/mMiniLMv2-L12-H384-distilled-from-XLMR-Large", + "base_model:quantized:nreimers/mMiniLMv2-L12-H384-distilled-from-XLMR-Large", + "license:apache-2.0", + "region:us" + ], + "heuristic_tag": "nlp", + "dimension_generalization_passes": [ + "batch_call_method_view_pass", + "tuple_arg_call_method_view_pass", + "naive_call_method_reshape_pass", + "naive_call_method_expand_pass", + "non_batch_call_method_expand_pass", + "non_batch_call_function_arange_pass", + "non_batch_call_function_getitem_slice_pass", + "non_batch_call_function_full_pass", + "non_batch_call_function_full_plus_one_pass", + "non_batch_call_function_zeros_pass", + "non_batch_call_function_arange_plus_one_pass" + ], + "symbolic_dimension_reifier": "naive_nlp_sym_dim_reifier" +} \ No newline at end of file diff --git a/samples/transformers-auto-model/mmarco-mMiniLMv2-L12-H384-v1/input_tensor_constraints.py b/samples/transformers-auto-model/mmarco-mMiniLMv2-L12-H384-v1/input_tensor_constraints.py index 33e6cb5b9..07f258661 100644 --- a/samples/transformers-auto-model/mmarco-mMiniLMv2-L12-H384-v1/input_tensor_constraints.py +++ b/samples/transformers-auto-model/mmarco-mMiniLMv2-L12-H384-v1/input_tensor_constraints.py @@ -1,60 +1,45 @@ -from sympy import Symbol +from sympy import Symbol, Expr, Rel, Eq S0 = Symbol("S0") +S1 = Symbol("S1") -dynamic_dim_constraint_symbols = [S0] +dynamic_dim_constraint_symbols = [S0, S1] -dynamic_dim_constraint_symbol2example_value = {S0: 36} +dynamic_dim_constraint_symbol2example_value = {S0: 2, S1: 36} dynamic_dim_constraint_relations = [] dynamic_dim_constraint_input_shapes = [ - ([2, S0], "L_attention_mask_"), - ([2, S0], "L_input_ids_"), - ([384], "L_self_modules_classifier_modules_dense_parameters_bias_"), - ([384, 384], "L_self_modules_classifier_modules_dense_parameters_weight_"), - ([1], "L_self_modules_classifier_modules_out_proj_parameters_bias_"), - ([1, 384], "L_self_modules_classifier_modules_out_proj_parameters_weight_"), + ([S0, S1], "L_input_ids_"), ([1, 514], "L_self_modules_roberta_modules_embeddings_buffers_token_type_ids_"), ( - [384], - "L_self_modules_roberta_modules_embeddings_modules_LayerNorm_parameters_bias_", - ), - ( - [384], - "L_self_modules_roberta_modules_embeddings_modules_LayerNorm_parameters_weight_", - ), - ( - [514, 384], - "L_self_modules_roberta_modules_embeddings_modules_position_embeddings_parameters_weight_", + [250002, 384], + "L_self_modules_roberta_modules_embeddings_modules_word_embeddings_parameters_weight_", ), ( [1, 384], "L_self_modules_roberta_modules_embeddings_modules_token_type_embeddings_parameters_weight_", ), ( - [250002, 384], - "L_self_modules_roberta_modules_embeddings_modules_word_embeddings_parameters_weight_", - ), - ( - [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + [514, 384], + "L_self_modules_roberta_modules_embeddings_modules_position_embeddings_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_roberta_modules_embeddings_modules_LayerNorm_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_roberta_modules_embeddings_modules_LayerNorm_parameters_bias_", ), + ([S0, S1], "L_attention_mask_"), ( [384, 384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [384, 384], @@ -62,11 +47,11 @@ ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [384, 384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [384], @@ -74,27 +59,27 @@ ), ( [384, 384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [1536], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_intermediate_modules_dense_parameters_bias_", + [384], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [1536, 384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_intermediate_modules_dense_parameters_weight_", + [384], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_output_modules_LayerNorm_parameters_weight_", + [1536, 384], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_intermediate_modules_dense_parameters_weight_", ), ( - [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_output_modules_dense_parameters_bias_", + [1536], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_intermediate_modules_dense_parameters_bias_", ), ( [384, 1536], @@ -102,706 +87,722 @@ ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_output_modules_dense_parameters_bias_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_output_modules_LayerNorm_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_output_modules_LayerNorm_parameters_bias_", ), ( [384, 384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [384, 384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [384, 384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [384, 384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [1536], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_intermediate_modules_dense_parameters_bias_", + [384], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [1536, 384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_intermediate_modules_dense_parameters_weight_", + [384], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_output_modules_LayerNorm_parameters_weight_", + [1536, 384], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_intermediate_modules_dense_parameters_weight_", ), ( - [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_output_modules_dense_parameters_bias_", + [1536], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_intermediate_modules_dense_parameters_bias_", ), ( [384, 1536], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_output_modules_dense_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_output_modules_dense_parameters_bias_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_output_modules_LayerNorm_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_output_modules_LayerNorm_parameters_bias_", ), ( [384, 384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [384, 384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [384, 384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [384, 384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [1536], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_intermediate_modules_dense_parameters_bias_", + [384], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [1536, 384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_intermediate_modules_dense_parameters_weight_", + [384], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_output_modules_LayerNorm_parameters_weight_", + [1536, 384], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_intermediate_modules_dense_parameters_weight_", ), ( - [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_output_modules_dense_parameters_bias_", + [1536], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_intermediate_modules_dense_parameters_bias_", ), ( [384, 1536], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_output_modules_dense_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_output_modules_dense_parameters_bias_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_output_modules_LayerNorm_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_output_modules_LayerNorm_parameters_bias_", ), ( [384, 384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [384, 384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [384, 384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [384, 384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [1536], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_intermediate_modules_dense_parameters_bias_", + [384], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [1536, 384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_intermediate_modules_dense_parameters_weight_", + [384], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_output_modules_LayerNorm_parameters_weight_", + [1536, 384], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_intermediate_modules_dense_parameters_weight_", ), ( - [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_output_modules_dense_parameters_bias_", + [1536], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_intermediate_modules_dense_parameters_bias_", ), ( [384, 1536], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_output_modules_dense_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_output_modules_dense_parameters_bias_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_output_modules_LayerNorm_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_output_modules_LayerNorm_parameters_bias_", ), ( [384, 384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [384, 384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [384, 384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [384, 384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [1536], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_intermediate_modules_dense_parameters_bias_", + [384], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [1536, 384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_intermediate_modules_dense_parameters_weight_", + [384], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_output_modules_LayerNorm_parameters_weight_", + [1536, 384], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_intermediate_modules_dense_parameters_weight_", ), ( - [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_output_modules_dense_parameters_bias_", + [1536], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_intermediate_modules_dense_parameters_bias_", ), ( [384, 1536], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_output_modules_dense_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_output_modules_dense_parameters_bias_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_output_modules_LayerNorm_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_output_modules_LayerNorm_parameters_bias_", ), ( [384, 384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [384, 384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [384, 384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [384, 384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [1536], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_intermediate_modules_dense_parameters_bias_", + [384], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [1536, 384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_intermediate_modules_dense_parameters_weight_", + [384], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_output_modules_LayerNorm_parameters_weight_", + [1536, 384], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_intermediate_modules_dense_parameters_weight_", ), ( - [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_output_modules_dense_parameters_bias_", + [1536], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_intermediate_modules_dense_parameters_bias_", ), ( [384, 1536], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_output_modules_dense_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_output_modules_dense_parameters_bias_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_output_modules_LayerNorm_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_output_modules_LayerNorm_parameters_bias_", ), ( [384, 384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [384, 384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [384, 384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [384, 384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [1536], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_intermediate_modules_dense_parameters_bias_", + [384], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [1536, 384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_intermediate_modules_dense_parameters_weight_", + [384], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_output_modules_LayerNorm_parameters_weight_", + [1536, 384], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_intermediate_modules_dense_parameters_weight_", ), ( - [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_output_modules_dense_parameters_bias_", + [1536], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_intermediate_modules_dense_parameters_bias_", ), ( [384, 1536], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_output_modules_dense_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_output_modules_dense_parameters_bias_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_output_modules_LayerNorm_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_output_modules_LayerNorm_parameters_bias_", ), ( [384, 384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [384, 384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [384, 384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [384, 384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [1536], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_intermediate_modules_dense_parameters_bias_", + [384], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [1536, 384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_intermediate_modules_dense_parameters_weight_", + [384], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_output_modules_LayerNorm_parameters_weight_", + [1536, 384], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_intermediate_modules_dense_parameters_weight_", ), ( - [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_output_modules_dense_parameters_bias_", + [1536], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_intermediate_modules_dense_parameters_bias_", ), ( [384, 1536], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_output_modules_dense_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_output_modules_dense_parameters_bias_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_output_modules_LayerNorm_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_output_modules_LayerNorm_parameters_bias_", ), ( [384, 384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [384, 384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [384, 384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [384, 384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [1536], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_intermediate_modules_dense_parameters_bias_", + [384], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [1536, 384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_intermediate_modules_dense_parameters_weight_", + [384], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_output_modules_LayerNorm_parameters_weight_", + [1536, 384], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_intermediate_modules_dense_parameters_weight_", ), ( - [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_output_modules_dense_parameters_bias_", + [1536], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_intermediate_modules_dense_parameters_bias_", ), ( [384, 1536], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_output_modules_dense_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_output_modules_dense_parameters_bias_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_output_modules_LayerNorm_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_output_modules_LayerNorm_parameters_bias_", ), ( [384, 384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [384, 384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [384, 384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [384, 384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [1536], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_intermediate_modules_dense_parameters_bias_", + [384], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [1536, 384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_intermediate_modules_dense_parameters_weight_", + [384], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_output_modules_LayerNorm_parameters_weight_", + [1536, 384], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_intermediate_modules_dense_parameters_weight_", ), ( - [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_output_modules_dense_parameters_bias_", + [1536], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_intermediate_modules_dense_parameters_bias_", ), ( [384, 1536], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_output_modules_dense_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_output_modules_dense_parameters_bias_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_output_modules_LayerNorm_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_output_modules_LayerNorm_parameters_bias_", ), ( [384, 384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [384, 384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [384, 384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [384, 384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [1536], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_intermediate_modules_dense_parameters_bias_", + [384], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [1536, 384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_intermediate_modules_dense_parameters_weight_", + [384], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_output_modules_LayerNorm_parameters_weight_", + [1536, 384], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_intermediate_modules_dense_parameters_weight_", ), ( - [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_output_modules_dense_parameters_bias_", + [1536], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_intermediate_modules_dense_parameters_bias_", ), ( [384, 1536], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_output_modules_dense_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_output_modules_dense_parameters_bias_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_output_modules_LayerNorm_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_output_modules_LayerNorm_parameters_bias_", ), ( [384, 384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [384, 384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [384, 384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [384, 384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [1536], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_intermediate_modules_dense_parameters_bias_", + [384], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [1536, 384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_intermediate_modules_dense_parameters_weight_", + [384], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + ), + ( + [1536, 384], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_intermediate_modules_dense_parameters_weight_", + ), + ( + [1536], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_intermediate_modules_dense_parameters_bias_", + ), + ( + [384, 1536], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_output_modules_dense_parameters_weight_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_output_modules_dense_parameters_bias_", ), ( [384], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_output_modules_dense_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_output_modules_LayerNorm_parameters_weight_", ), ( - [384, 1536], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_output_modules_dense_parameters_weight_", + [384], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_output_modules_LayerNorm_parameters_bias_", ), + ([384, 384], "L_self_modules_classifier_modules_dense_parameters_weight_"), + ([384], "L_self_modules_classifier_modules_dense_parameters_bias_"), + ([1, 384], "L_self_modules_classifier_modules_out_proj_parameters_weight_"), + ([1], "L_self_modules_classifier_modules_out_proj_parameters_bias_"), ] diff --git a/samples/transformers-auto-model/msmarco-MiniLM-L12-en-de-v1/graph_net.json b/samples/transformers-auto-model/msmarco-MiniLM-L12-en-de-v1/graph_net.json index 06eca9f09..8e8a82b66 100644 --- a/samples/transformers-auto-model/msmarco-MiniLM-L12-en-de-v1/graph_net.json +++ b/samples/transformers-auto-model/msmarco-MiniLM-L12-en-de-v1/graph_net.json @@ -1 +1,41 @@ -{"framework": "torch", "num_devices_required": 1, "num_nodes_required": 1, "dynamic": false, "model_name": "cross-encoder/msmarco-MiniLM-L12-en-de-v1", "source": "huggingface_hub", "original_tag": ["sentence-transformers", "pytorch", "onnx", "safetensors", "openvino", "bert", "text-classification", "transformers", "text-ranking", "en", "de", "dataset:sentence-transformers/msmarco", "base_model:microsoft/Multilingual-MiniLM-L12-H384", "base_model:quantized:microsoft/Multilingual-MiniLM-L12-H384", "license:apache-2.0", "region:us"], "heuristic_tag": "nlp", "dimension_generalization_passes": ["naive_call_method_view_pass", "tuple_arg_call_method_view_pass", "naive_call_method_reshape_pass", "naive_call_method_expand_pass", "non_batch_call_method_expand_pass", "non_batch_call_function_arange_pass", "non_batch_call_function_getitem_slice_pass", "non_batch_call_function_full_pass", "non_batch_call_function_full_plus_one_pass", "non_batch_call_function_zeros_pass", "non_batch_call_function_arange_plus_one_pass"]} \ No newline at end of file +{ + "framework": "torch", + "num_devices_required": 1, + "num_nodes_required": 1, + "dynamic": false, + "model_name": "cross-encoder/msmarco-MiniLM-L12-en-de-v1", + "source": "huggingface_hub", + "original_tag": [ + "sentence-transformers", + "pytorch", + "onnx", + "safetensors", + "openvino", + "bert", + "text-classification", + "transformers", + "text-ranking", + "en", + "de", + "dataset:sentence-transformers/msmarco", + "base_model:microsoft/Multilingual-MiniLM-L12-H384", + "base_model:quantized:microsoft/Multilingual-MiniLM-L12-H384", + "license:apache-2.0", + "region:us" + ], + "heuristic_tag": "nlp", + "dimension_generalization_passes": [ + "batch_call_method_view_pass", + "tuple_arg_call_method_view_pass", + "naive_call_method_reshape_pass", + "naive_call_method_expand_pass", + "non_batch_call_method_expand_pass", + "non_batch_call_function_arange_pass", + "non_batch_call_function_getitem_slice_pass", + "non_batch_call_function_full_pass", + "non_batch_call_function_full_plus_one_pass", + "non_batch_call_function_zeros_pass", + "non_batch_call_function_arange_plus_one_pass" + ], + "symbolic_dimension_reifier": "naive_nlp_sym_dim_reifier" +} \ No newline at end of file diff --git a/samples/transformers-auto-model/msmarco-MiniLM-L12-en-de-v1/input_tensor_constraints.py b/samples/transformers-auto-model/msmarco-MiniLM-L12-en-de-v1/input_tensor_constraints.py index 34ce6e531..e02f1e03e 100644 --- a/samples/transformers-auto-model/msmarco-MiniLM-L12-en-de-v1/input_tensor_constraints.py +++ b/samples/transformers-auto-model/msmarco-MiniLM-L12-en-de-v1/input_tensor_constraints.py @@ -1,57 +1,46 @@ -from sympy import Symbol +from sympy import Symbol, Expr, Rel, Eq S0 = Symbol("S0") +S1 = Symbol("S1") -dynamic_dim_constraint_symbols = [S0] +dynamic_dim_constraint_symbols = [S0, S1] -dynamic_dim_constraint_symbol2example_value = {S0: 36} +dynamic_dim_constraint_symbol2example_value = {S0: 2, S1: 36} dynamic_dim_constraint_relations = [] dynamic_dim_constraint_input_shapes = [ - ([2, S0], "L_attention_mask_"), - ([2, S0], "L_input_ids_"), - ([1, 512], "L_self_modules_bert_modules_embeddings_buffers_position_ids_"), + ([S0, S1], "L_input_ids_"), ([1, 512], "L_self_modules_bert_modules_embeddings_buffers_token_type_ids_"), + ([1, 512], "L_self_modules_bert_modules_embeddings_buffers_position_ids_"), ( - [384], - "L_self_modules_bert_modules_embeddings_modules_LayerNorm_parameters_bias_", - ), - ( - [384], - "L_self_modules_bert_modules_embeddings_modules_LayerNorm_parameters_weight_", - ), - ( - [512, 384], - "L_self_modules_bert_modules_embeddings_modules_position_embeddings_parameters_weight_", + [250037, 384], + "L_self_modules_bert_modules_embeddings_modules_word_embeddings_parameters_weight_", ), ( [2, 384], "L_self_modules_bert_modules_embeddings_modules_token_type_embeddings_parameters_weight_", ), ( - [250037, 384], - "L_self_modules_bert_modules_embeddings_modules_word_embeddings_parameters_weight_", - ), - ( - [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + [512, 384], + "L_self_modules_bert_modules_embeddings_modules_position_embeddings_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_bert_modules_embeddings_modules_LayerNorm_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_bert_modules_embeddings_modules_LayerNorm_parameters_bias_", ), + ([S0, S1], "L_attention_mask_"), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [384, 384], @@ -59,11 +48,11 @@ ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [384], @@ -71,27 +60,27 @@ ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [1536], - "L_self_modules_bert_modules_encoder_modules_layer_modules_0_modules_intermediate_modules_dense_parameters_bias_", + [384], + "L_self_modules_bert_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [1536, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_0_modules_intermediate_modules_dense_parameters_weight_", + [384], + "L_self_modules_bert_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_0_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_0_modules_output_modules_LayerNorm_parameters_weight_", + [1536, 384], + "L_self_modules_bert_modules_encoder_modules_layer_modules_0_modules_intermediate_modules_dense_parameters_weight_", ), ( - [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_0_modules_output_modules_dense_parameters_bias_", + [1536], + "L_self_modules_bert_modules_encoder_modules_layer_modules_0_modules_intermediate_modules_dense_parameters_bias_", ), ( [384, 1536], @@ -99,710 +88,722 @@ ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_10_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_0_modules_output_modules_dense_parameters_bias_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_10_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_0_modules_output_modules_LayerNorm_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_10_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_0_modules_output_modules_LayerNorm_parameters_bias_", ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_10_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [1536], - "L_self_modules_bert_modules_encoder_modules_layer_modules_10_modules_intermediate_modules_dense_parameters_bias_", + [384], + "L_self_modules_bert_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [1536, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_10_modules_intermediate_modules_dense_parameters_weight_", + [384], + "L_self_modules_bert_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_10_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_10_modules_output_modules_LayerNorm_parameters_weight_", + [1536, 384], + "L_self_modules_bert_modules_encoder_modules_layer_modules_1_modules_intermediate_modules_dense_parameters_weight_", ), ( - [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_10_modules_output_modules_dense_parameters_bias_", + [1536], + "L_self_modules_bert_modules_encoder_modules_layer_modules_1_modules_intermediate_modules_dense_parameters_bias_", ), ( [384, 1536], - "L_self_modules_bert_modules_encoder_modules_layer_modules_10_modules_output_modules_dense_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_1_modules_output_modules_dense_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_11_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_1_modules_output_modules_dense_parameters_bias_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_11_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_1_modules_output_modules_LayerNorm_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_11_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_1_modules_output_modules_LayerNorm_parameters_bias_", ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_11_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [1536], - "L_self_modules_bert_modules_encoder_modules_layer_modules_11_modules_intermediate_modules_dense_parameters_bias_", + [384], + "L_self_modules_bert_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [1536, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_11_modules_intermediate_modules_dense_parameters_weight_", + [384], + "L_self_modules_bert_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_11_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_11_modules_output_modules_LayerNorm_parameters_weight_", + [1536, 384], + "L_self_modules_bert_modules_encoder_modules_layer_modules_2_modules_intermediate_modules_dense_parameters_weight_", ), ( - [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_11_modules_output_modules_dense_parameters_bias_", + [1536], + "L_self_modules_bert_modules_encoder_modules_layer_modules_2_modules_intermediate_modules_dense_parameters_bias_", ), ( [384, 1536], - "L_self_modules_bert_modules_encoder_modules_layer_modules_11_modules_output_modules_dense_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_2_modules_output_modules_dense_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_2_modules_output_modules_dense_parameters_bias_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_2_modules_output_modules_LayerNorm_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_2_modules_output_modules_LayerNorm_parameters_bias_", ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [1536], - "L_self_modules_bert_modules_encoder_modules_layer_modules_1_modules_intermediate_modules_dense_parameters_bias_", + [384], + "L_self_modules_bert_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [1536, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_1_modules_intermediate_modules_dense_parameters_weight_", + [384], + "L_self_modules_bert_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_1_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_1_modules_output_modules_LayerNorm_parameters_weight_", + [1536, 384], + "L_self_modules_bert_modules_encoder_modules_layer_modules_3_modules_intermediate_modules_dense_parameters_weight_", ), ( - [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_1_modules_output_modules_dense_parameters_bias_", + [1536], + "L_self_modules_bert_modules_encoder_modules_layer_modules_3_modules_intermediate_modules_dense_parameters_bias_", ), ( [384, 1536], - "L_self_modules_bert_modules_encoder_modules_layer_modules_1_modules_output_modules_dense_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_3_modules_output_modules_dense_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_3_modules_output_modules_dense_parameters_bias_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_3_modules_output_modules_LayerNorm_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_3_modules_output_modules_LayerNorm_parameters_bias_", ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [1536], - "L_self_modules_bert_modules_encoder_modules_layer_modules_2_modules_intermediate_modules_dense_parameters_bias_", + [384], + "L_self_modules_bert_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [1536, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_2_modules_intermediate_modules_dense_parameters_weight_", + [384], + "L_self_modules_bert_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_2_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_2_modules_output_modules_LayerNorm_parameters_weight_", + [1536, 384], + "L_self_modules_bert_modules_encoder_modules_layer_modules_4_modules_intermediate_modules_dense_parameters_weight_", ), ( - [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_2_modules_output_modules_dense_parameters_bias_", + [1536], + "L_self_modules_bert_modules_encoder_modules_layer_modules_4_modules_intermediate_modules_dense_parameters_bias_", ), ( [384, 1536], - "L_self_modules_bert_modules_encoder_modules_layer_modules_2_modules_output_modules_dense_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_4_modules_output_modules_dense_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_4_modules_output_modules_dense_parameters_bias_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_4_modules_output_modules_LayerNorm_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_4_modules_output_modules_LayerNorm_parameters_bias_", ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [1536], - "L_self_modules_bert_modules_encoder_modules_layer_modules_3_modules_intermediate_modules_dense_parameters_bias_", + [384], + "L_self_modules_bert_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [1536, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_3_modules_intermediate_modules_dense_parameters_weight_", + [384], + "L_self_modules_bert_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_3_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_3_modules_output_modules_LayerNorm_parameters_weight_", + [1536, 384], + "L_self_modules_bert_modules_encoder_modules_layer_modules_5_modules_intermediate_modules_dense_parameters_weight_", ), ( - [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_3_modules_output_modules_dense_parameters_bias_", + [1536], + "L_self_modules_bert_modules_encoder_modules_layer_modules_5_modules_intermediate_modules_dense_parameters_bias_", ), ( [384, 1536], - "L_self_modules_bert_modules_encoder_modules_layer_modules_3_modules_output_modules_dense_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_5_modules_output_modules_dense_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_5_modules_output_modules_dense_parameters_bias_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_5_modules_output_modules_LayerNorm_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_5_modules_output_modules_LayerNorm_parameters_bias_", ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_6_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [1536], - "L_self_modules_bert_modules_encoder_modules_layer_modules_4_modules_intermediate_modules_dense_parameters_bias_", + [384], + "L_self_modules_bert_modules_encoder_modules_layer_modules_6_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [1536, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_4_modules_intermediate_modules_dense_parameters_weight_", + [384], + "L_self_modules_bert_modules_encoder_modules_layer_modules_6_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_4_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_6_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_4_modules_output_modules_LayerNorm_parameters_weight_", + [1536, 384], + "L_self_modules_bert_modules_encoder_modules_layer_modules_6_modules_intermediate_modules_dense_parameters_weight_", ), ( - [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_4_modules_output_modules_dense_parameters_bias_", + [1536], + "L_self_modules_bert_modules_encoder_modules_layer_modules_6_modules_intermediate_modules_dense_parameters_bias_", ), ( [384, 1536], - "L_self_modules_bert_modules_encoder_modules_layer_modules_4_modules_output_modules_dense_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_6_modules_output_modules_dense_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_6_modules_output_modules_dense_parameters_bias_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_6_modules_output_modules_LayerNorm_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_6_modules_output_modules_LayerNorm_parameters_bias_", ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_7_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [1536], - "L_self_modules_bert_modules_encoder_modules_layer_modules_5_modules_intermediate_modules_dense_parameters_bias_", + [384], + "L_self_modules_bert_modules_encoder_modules_layer_modules_7_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [1536, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_5_modules_intermediate_modules_dense_parameters_weight_", + [384], + "L_self_modules_bert_modules_encoder_modules_layer_modules_7_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_5_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_7_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_5_modules_output_modules_LayerNorm_parameters_weight_", + [1536, 384], + "L_self_modules_bert_modules_encoder_modules_layer_modules_7_modules_intermediate_modules_dense_parameters_weight_", ), ( - [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_5_modules_output_modules_dense_parameters_bias_", + [1536], + "L_self_modules_bert_modules_encoder_modules_layer_modules_7_modules_intermediate_modules_dense_parameters_bias_", ), ( [384, 1536], - "L_self_modules_bert_modules_encoder_modules_layer_modules_5_modules_output_modules_dense_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_7_modules_output_modules_dense_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_6_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_7_modules_output_modules_dense_parameters_bias_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_6_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_7_modules_output_modules_LayerNorm_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_6_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_7_modules_output_modules_LayerNorm_parameters_bias_", ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_6_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_8_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [1536], - "L_self_modules_bert_modules_encoder_modules_layer_modules_6_modules_intermediate_modules_dense_parameters_bias_", + [384], + "L_self_modules_bert_modules_encoder_modules_layer_modules_8_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [1536, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_6_modules_intermediate_modules_dense_parameters_weight_", + [384], + "L_self_modules_bert_modules_encoder_modules_layer_modules_8_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_6_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_8_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_6_modules_output_modules_LayerNorm_parameters_weight_", + [1536, 384], + "L_self_modules_bert_modules_encoder_modules_layer_modules_8_modules_intermediate_modules_dense_parameters_weight_", ), ( - [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_6_modules_output_modules_dense_parameters_bias_", + [1536], + "L_self_modules_bert_modules_encoder_modules_layer_modules_8_modules_intermediate_modules_dense_parameters_bias_", ), ( [384, 1536], - "L_self_modules_bert_modules_encoder_modules_layer_modules_6_modules_output_modules_dense_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_8_modules_output_modules_dense_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_7_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_8_modules_output_modules_dense_parameters_bias_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_7_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_8_modules_output_modules_LayerNorm_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_7_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_8_modules_output_modules_LayerNorm_parameters_bias_", ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_7_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_9_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [1536], - "L_self_modules_bert_modules_encoder_modules_layer_modules_7_modules_intermediate_modules_dense_parameters_bias_", + [384], + "L_self_modules_bert_modules_encoder_modules_layer_modules_9_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [1536, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_7_modules_intermediate_modules_dense_parameters_weight_", + [384], + "L_self_modules_bert_modules_encoder_modules_layer_modules_9_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_7_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_9_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_7_modules_output_modules_LayerNorm_parameters_weight_", + [1536, 384], + "L_self_modules_bert_modules_encoder_modules_layer_modules_9_modules_intermediate_modules_dense_parameters_weight_", ), ( - [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_7_modules_output_modules_dense_parameters_bias_", + [1536], + "L_self_modules_bert_modules_encoder_modules_layer_modules_9_modules_intermediate_modules_dense_parameters_bias_", ), ( [384, 1536], - "L_self_modules_bert_modules_encoder_modules_layer_modules_7_modules_output_modules_dense_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_9_modules_output_modules_dense_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_8_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_9_modules_output_modules_dense_parameters_bias_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_8_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_9_modules_output_modules_LayerNorm_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_8_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_9_modules_output_modules_LayerNorm_parameters_bias_", ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_8_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_10_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [1536], - "L_self_modules_bert_modules_encoder_modules_layer_modules_8_modules_intermediate_modules_dense_parameters_bias_", + [384], + "L_self_modules_bert_modules_encoder_modules_layer_modules_10_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [1536, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_8_modules_intermediate_modules_dense_parameters_weight_", + [384], + "L_self_modules_bert_modules_encoder_modules_layer_modules_10_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_8_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_10_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_8_modules_output_modules_LayerNorm_parameters_weight_", + [1536, 384], + "L_self_modules_bert_modules_encoder_modules_layer_modules_10_modules_intermediate_modules_dense_parameters_weight_", ), ( - [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_8_modules_output_modules_dense_parameters_bias_", + [1536], + "L_self_modules_bert_modules_encoder_modules_layer_modules_10_modules_intermediate_modules_dense_parameters_bias_", ), ( [384, 1536], - "L_self_modules_bert_modules_encoder_modules_layer_modules_8_modules_output_modules_dense_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_10_modules_output_modules_dense_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_9_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_10_modules_output_modules_dense_parameters_bias_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_9_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_10_modules_output_modules_LayerNorm_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_9_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_10_modules_output_modules_LayerNorm_parameters_bias_", ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_9_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_11_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [1536], - "L_self_modules_bert_modules_encoder_modules_layer_modules_9_modules_intermediate_modules_dense_parameters_bias_", + [384], + "L_self_modules_bert_modules_encoder_modules_layer_modules_11_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [1536, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_9_modules_intermediate_modules_dense_parameters_weight_", + [384], + "L_self_modules_bert_modules_encoder_modules_layer_modules_11_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_9_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_11_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + ), + ( + [1536, 384], + "L_self_modules_bert_modules_encoder_modules_layer_modules_11_modules_intermediate_modules_dense_parameters_weight_", + ), + ( + [1536], + "L_self_modules_bert_modules_encoder_modules_layer_modules_11_modules_intermediate_modules_dense_parameters_bias_", + ), + ( + [384, 1536], + "L_self_modules_bert_modules_encoder_modules_layer_modules_11_modules_output_modules_dense_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_9_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_11_modules_output_modules_dense_parameters_bias_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_9_modules_output_modules_dense_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_11_modules_output_modules_LayerNorm_parameters_weight_", ), ( - [384, 1536], - "L_self_modules_bert_modules_encoder_modules_layer_modules_9_modules_output_modules_dense_parameters_weight_", + [384], + "L_self_modules_bert_modules_encoder_modules_layer_modules_11_modules_output_modules_LayerNorm_parameters_bias_", ), - ([384], "L_self_modules_bert_modules_pooler_modules_dense_parameters_bias_"), ([384, 384], "L_self_modules_bert_modules_pooler_modules_dense_parameters_weight_"), - ([1], "L_self_modules_classifier_parameters_bias_"), + ([384], "L_self_modules_bert_modules_pooler_modules_dense_parameters_bias_"), ([1, 384], "L_self_modules_classifier_parameters_weight_"), + ([1], "L_self_modules_classifier_parameters_bias_"), ] diff --git a/samples/transformers-auto-model/msmarco-MiniLM-L6-en-de-v1/graph_net.json b/samples/transformers-auto-model/msmarco-MiniLM-L6-en-de-v1/graph_net.json index f831f2db4..27359a809 100644 --- a/samples/transformers-auto-model/msmarco-MiniLM-L6-en-de-v1/graph_net.json +++ b/samples/transformers-auto-model/msmarco-MiniLM-L6-en-de-v1/graph_net.json @@ -1 +1,24 @@ -{"framework": "torch", "num_devices_required": 1, "num_nodes_required": 1, "dynamic": false, "model_name": "NO_VALID_MATCH_FOUND", "source": "huggingface_hub", "original_tag": [], "heuristic_tag": "nlp", "dimension_generalization_passes": ["naive_call_method_view_pass", "tuple_arg_call_method_view_pass", "naive_call_method_reshape_pass", "naive_call_method_expand_pass", "non_batch_call_method_expand_pass", "non_batch_call_function_arange_pass", "non_batch_call_function_getitem_slice_pass", "non_batch_call_function_full_pass", "non_batch_call_function_full_plus_one_pass", "non_batch_call_function_zeros_pass", "non_batch_call_function_arange_plus_one_pass"]} \ No newline at end of file +{ + "framework": "torch", + "num_devices_required": 1, + "num_nodes_required": 1, + "dynamic": false, + "model_name": "NO_VALID_MATCH_FOUND", + "source": "huggingface_hub", + "original_tag": [], + "heuristic_tag": "nlp", + "dimension_generalization_passes": [ + "batch_call_method_view_pass", + "tuple_arg_call_method_view_pass", + "naive_call_method_reshape_pass", + "naive_call_method_expand_pass", + "non_batch_call_method_expand_pass", + "non_batch_call_function_arange_pass", + "non_batch_call_function_getitem_slice_pass", + "non_batch_call_function_full_pass", + "non_batch_call_function_full_plus_one_pass", + "non_batch_call_function_zeros_pass", + "non_batch_call_function_arange_plus_one_pass" + ], + "symbolic_dimension_reifier": "naive_nlp_sym_dim_reifier" +} \ No newline at end of file diff --git a/samples/transformers-auto-model/msmarco-MiniLM-L6-en-de-v1/input_tensor_constraints.py b/samples/transformers-auto-model/msmarco-MiniLM-L6-en-de-v1/input_tensor_constraints.py index fbc3abc9b..ce114f9c2 100644 --- a/samples/transformers-auto-model/msmarco-MiniLM-L6-en-de-v1/input_tensor_constraints.py +++ b/samples/transformers-auto-model/msmarco-MiniLM-L6-en-de-v1/input_tensor_constraints.py @@ -1,57 +1,46 @@ -from sympy import Symbol +from sympy import Symbol, Expr, Rel, Eq S0 = Symbol("S0") +S1 = Symbol("S1") -dynamic_dim_constraint_symbols = [S0] +dynamic_dim_constraint_symbols = [S0, S1] -dynamic_dim_constraint_symbol2example_value = {S0: 36} +dynamic_dim_constraint_symbol2example_value = {S0: 2, S1: 36} dynamic_dim_constraint_relations = [] dynamic_dim_constraint_input_shapes = [ - ([2, S0], "L_attention_mask_"), - ([2, S0], "L_input_ids_"), - ([1, 512], "L_self_modules_bert_modules_embeddings_buffers_position_ids_"), + ([S0, S1], "L_input_ids_"), ([1, 512], "L_self_modules_bert_modules_embeddings_buffers_token_type_ids_"), + ([1, 512], "L_self_modules_bert_modules_embeddings_buffers_position_ids_"), ( - [384], - "L_self_modules_bert_modules_embeddings_modules_LayerNorm_parameters_bias_", - ), - ( - [384], - "L_self_modules_bert_modules_embeddings_modules_LayerNorm_parameters_weight_", - ), - ( - [512, 384], - "L_self_modules_bert_modules_embeddings_modules_position_embeddings_parameters_weight_", + [250037, 384], + "L_self_modules_bert_modules_embeddings_modules_word_embeddings_parameters_weight_", ), ( [2, 384], "L_self_modules_bert_modules_embeddings_modules_token_type_embeddings_parameters_weight_", ), ( - [250037, 384], - "L_self_modules_bert_modules_embeddings_modules_word_embeddings_parameters_weight_", - ), - ( - [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + [512, 384], + "L_self_modules_bert_modules_embeddings_modules_position_embeddings_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_bert_modules_embeddings_modules_LayerNorm_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_bert_modules_embeddings_modules_LayerNorm_parameters_bias_", ), + ([S0, S1], "L_attention_mask_"), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [384, 384], @@ -59,11 +48,11 @@ ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [384], @@ -71,27 +60,27 @@ ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [1536], - "L_self_modules_bert_modules_encoder_modules_layer_modules_0_modules_intermediate_modules_dense_parameters_bias_", + [384], + "L_self_modules_bert_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [1536, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_0_modules_intermediate_modules_dense_parameters_weight_", + [384], + "L_self_modules_bert_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_0_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_0_modules_output_modules_LayerNorm_parameters_weight_", + [1536, 384], + "L_self_modules_bert_modules_encoder_modules_layer_modules_0_modules_intermediate_modules_dense_parameters_weight_", ), ( - [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_0_modules_output_modules_dense_parameters_bias_", + [1536], + "L_self_modules_bert_modules_encoder_modules_layer_modules_0_modules_intermediate_modules_dense_parameters_bias_", ), ( [384, 1536], @@ -99,23 +88,23 @@ ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_0_modules_output_modules_dense_parameters_bias_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_0_modules_output_modules_LayerNorm_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_0_modules_output_modules_LayerNorm_parameters_bias_", ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [384, 384], @@ -123,11 +112,11 @@ ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [384], @@ -135,27 +124,27 @@ ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [1536], - "L_self_modules_bert_modules_encoder_modules_layer_modules_1_modules_intermediate_modules_dense_parameters_bias_", + [384], + "L_self_modules_bert_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [1536, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_1_modules_intermediate_modules_dense_parameters_weight_", + [384], + "L_self_modules_bert_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_1_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_1_modules_output_modules_LayerNorm_parameters_weight_", + [1536, 384], + "L_self_modules_bert_modules_encoder_modules_layer_modules_1_modules_intermediate_modules_dense_parameters_weight_", ), ( - [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_1_modules_output_modules_dense_parameters_bias_", + [1536], + "L_self_modules_bert_modules_encoder_modules_layer_modules_1_modules_intermediate_modules_dense_parameters_bias_", ), ( [384, 1536], @@ -163,23 +152,23 @@ ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_1_modules_output_modules_dense_parameters_bias_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_1_modules_output_modules_LayerNorm_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_1_modules_output_modules_LayerNorm_parameters_bias_", ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [384, 384], @@ -187,11 +176,11 @@ ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [384], @@ -199,27 +188,27 @@ ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [1536], - "L_self_modules_bert_modules_encoder_modules_layer_modules_2_modules_intermediate_modules_dense_parameters_bias_", + [384], + "L_self_modules_bert_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [1536, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_2_modules_intermediate_modules_dense_parameters_weight_", + [384], + "L_self_modules_bert_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_2_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_2_modules_output_modules_LayerNorm_parameters_weight_", + [1536, 384], + "L_self_modules_bert_modules_encoder_modules_layer_modules_2_modules_intermediate_modules_dense_parameters_weight_", ), ( - [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_2_modules_output_modules_dense_parameters_bias_", + [1536], + "L_self_modules_bert_modules_encoder_modules_layer_modules_2_modules_intermediate_modules_dense_parameters_bias_", ), ( [384, 1536], @@ -227,23 +216,23 @@ ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_2_modules_output_modules_dense_parameters_bias_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_2_modules_output_modules_LayerNorm_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_2_modules_output_modules_LayerNorm_parameters_bias_", ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [384, 384], @@ -251,11 +240,11 @@ ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [384], @@ -263,27 +252,27 @@ ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [1536], - "L_self_modules_bert_modules_encoder_modules_layer_modules_3_modules_intermediate_modules_dense_parameters_bias_", + [384], + "L_self_modules_bert_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [1536, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_3_modules_intermediate_modules_dense_parameters_weight_", + [384], + "L_self_modules_bert_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_3_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_3_modules_output_modules_LayerNorm_parameters_weight_", + [1536, 384], + "L_self_modules_bert_modules_encoder_modules_layer_modules_3_modules_intermediate_modules_dense_parameters_weight_", ), ( - [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_3_modules_output_modules_dense_parameters_bias_", + [1536], + "L_self_modules_bert_modules_encoder_modules_layer_modules_3_modules_intermediate_modules_dense_parameters_bias_", ), ( [384, 1536], @@ -291,23 +280,23 @@ ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_3_modules_output_modules_dense_parameters_bias_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_3_modules_output_modules_LayerNorm_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_3_modules_output_modules_LayerNorm_parameters_bias_", ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [384, 384], @@ -315,11 +304,11 @@ ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [384], @@ -327,27 +316,27 @@ ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [1536], - "L_self_modules_bert_modules_encoder_modules_layer_modules_4_modules_intermediate_modules_dense_parameters_bias_", + [384], + "L_self_modules_bert_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [1536, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_4_modules_intermediate_modules_dense_parameters_weight_", + [384], + "L_self_modules_bert_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_4_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_4_modules_output_modules_LayerNorm_parameters_weight_", + [1536, 384], + "L_self_modules_bert_modules_encoder_modules_layer_modules_4_modules_intermediate_modules_dense_parameters_weight_", ), ( - [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_4_modules_output_modules_dense_parameters_bias_", + [1536], + "L_self_modules_bert_modules_encoder_modules_layer_modules_4_modules_intermediate_modules_dense_parameters_bias_", ), ( [384, 1536], @@ -355,23 +344,23 @@ ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_4_modules_output_modules_dense_parameters_bias_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_4_modules_output_modules_LayerNorm_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_4_modules_output_modules_LayerNorm_parameters_bias_", ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [384, 384], @@ -379,11 +368,11 @@ ), ( [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [384], @@ -391,34 +380,46 @@ ), ( [384, 384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_bert_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [1536], - "L_self_modules_bert_modules_encoder_modules_layer_modules_5_modules_intermediate_modules_dense_parameters_bias_", + [384], + "L_self_modules_bert_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_dense_parameters_bias_", + ), + ( + [384], + "L_self_modules_bert_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + ), + ( + [384], + "L_self_modules_bert_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( [1536, 384], "L_self_modules_bert_modules_encoder_modules_layer_modules_5_modules_intermediate_modules_dense_parameters_weight_", ), ( - [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_5_modules_output_modules_LayerNorm_parameters_bias_", + [1536], + "L_self_modules_bert_modules_encoder_modules_layer_modules_5_modules_intermediate_modules_dense_parameters_bias_", ), ( - [384], - "L_self_modules_bert_modules_encoder_modules_layer_modules_5_modules_output_modules_LayerNorm_parameters_weight_", + [384, 1536], + "L_self_modules_bert_modules_encoder_modules_layer_modules_5_modules_output_modules_dense_parameters_weight_", ), ( [384], "L_self_modules_bert_modules_encoder_modules_layer_modules_5_modules_output_modules_dense_parameters_bias_", ), ( - [384, 1536], - "L_self_modules_bert_modules_encoder_modules_layer_modules_5_modules_output_modules_dense_parameters_weight_", + [384], + "L_self_modules_bert_modules_encoder_modules_layer_modules_5_modules_output_modules_LayerNorm_parameters_weight_", + ), + ( + [384], + "L_self_modules_bert_modules_encoder_modules_layer_modules_5_modules_output_modules_LayerNorm_parameters_bias_", ), - ([384], "L_self_modules_bert_modules_pooler_modules_dense_parameters_bias_"), ([384, 384], "L_self_modules_bert_modules_pooler_modules_dense_parameters_weight_"), - ([1], "L_self_modules_classifier_parameters_bias_"), + ([384], "L_self_modules_bert_modules_pooler_modules_dense_parameters_bias_"), ([1, 384], "L_self_modules_classifier_parameters_weight_"), + ([1], "L_self_modules_classifier_parameters_bias_"), ] diff --git a/samples/transformers-auto-model/nli-MiniLM2-L6-H768/graph_net.json b/samples/transformers-auto-model/nli-MiniLM2-L6-H768/graph_net.json index aaad183ec..e415779d1 100644 --- a/samples/transformers-auto-model/nli-MiniLM2-L6-H768/graph_net.json +++ b/samples/transformers-auto-model/nli-MiniLM2-L6-H768/graph_net.json @@ -1 +1,41 @@ -{"framework": "torch", "num_devices_required": 1, "num_nodes_required": 1, "dynamic": false, "model_name": "cross-encoder/nli-MiniLM2-L6-H768", "source": "huggingface_hub", "original_tag": ["sentence-transformers", "pytorch", "onnx", "safetensors", "openvino", "roberta", "text-classification", "transformers", "zero-shot-classification", "en", "dataset:nyu-mll/multi_nli", "dataset:stanfordnlp/snli", "base_model:nreimers/MiniLMv2-L6-H768-distilled-from-RoBERTa-Large", "base_model:quantized:nreimers/MiniLMv2-L6-H768-distilled-from-RoBERTa-Large", "license:apache-2.0", "region:us"], "heuristic_tag": "nlp", "dimension_generalization_passes": ["naive_call_method_view_pass", "tuple_arg_call_method_view_pass", "naive_call_method_reshape_pass", "naive_call_method_expand_pass", "non_batch_call_method_expand_pass", "non_batch_call_function_arange_pass", "non_batch_call_function_getitem_slice_pass", "non_batch_call_function_full_pass", "non_batch_call_function_full_plus_one_pass", "non_batch_call_function_zeros_pass", "non_batch_call_function_arange_plus_one_pass"]} \ No newline at end of file +{ + "framework": "torch", + "num_devices_required": 1, + "num_nodes_required": 1, + "dynamic": false, + "model_name": "cross-encoder/nli-MiniLM2-L6-H768", + "source": "huggingface_hub", + "original_tag": [ + "sentence-transformers", + "pytorch", + "onnx", + "safetensors", + "openvino", + "roberta", + "text-classification", + "transformers", + "zero-shot-classification", + "en", + "dataset:nyu-mll/multi_nli", + "dataset:stanfordnlp/snli", + "base_model:nreimers/MiniLMv2-L6-H768-distilled-from-RoBERTa-Large", + "base_model:quantized:nreimers/MiniLMv2-L6-H768-distilled-from-RoBERTa-Large", + "license:apache-2.0", + "region:us" + ], + "heuristic_tag": "nlp", + "dimension_generalization_passes": [ + "batch_call_method_view_pass", + "tuple_arg_call_method_view_pass", + "naive_call_method_reshape_pass", + "naive_call_method_expand_pass", + "non_batch_call_method_expand_pass", + "non_batch_call_function_arange_pass", + "non_batch_call_function_getitem_slice_pass", + "non_batch_call_function_full_pass", + "non_batch_call_function_full_plus_one_pass", + "non_batch_call_function_zeros_pass", + "non_batch_call_function_arange_plus_one_pass" + ], + "symbolic_dimension_reifier": "naive_nlp_sym_dim_reifier" +} \ No newline at end of file diff --git a/samples/transformers-auto-model/nli-MiniLM2-L6-H768/input_tensor_constraints.py b/samples/transformers-auto-model/nli-MiniLM2-L6-H768/input_tensor_constraints.py index d2688c2ae..03963c3f8 100644 --- a/samples/transformers-auto-model/nli-MiniLM2-L6-H768/input_tensor_constraints.py +++ b/samples/transformers-auto-model/nli-MiniLM2-L6-H768/input_tensor_constraints.py @@ -1,60 +1,45 @@ -from sympy import Symbol +from sympy import Symbol, Expr, Rel, Eq S0 = Symbol("S0") +S1 = Symbol("S1") -dynamic_dim_constraint_symbols = [S0] +dynamic_dim_constraint_symbols = [S0, S1] -dynamic_dim_constraint_symbol2example_value = {S0: 35} +dynamic_dim_constraint_symbol2example_value = {S0: 2, S1: 35} dynamic_dim_constraint_relations = [] dynamic_dim_constraint_input_shapes = [ - ([2, S0], "L_attention_mask_"), - ([2, S0], "L_input_ids_"), - ([768], "L_self_modules_classifier_modules_dense_parameters_bias_"), - ([768, 768], "L_self_modules_classifier_modules_dense_parameters_weight_"), - ([3], "L_self_modules_classifier_modules_out_proj_parameters_bias_"), - ([3, 768], "L_self_modules_classifier_modules_out_proj_parameters_weight_"), + ([S0, S1], "L_input_ids_"), ([1, 514], "L_self_modules_roberta_modules_embeddings_buffers_token_type_ids_"), ( - [768], - "L_self_modules_roberta_modules_embeddings_modules_LayerNorm_parameters_bias_", - ), - ( - [768], - "L_self_modules_roberta_modules_embeddings_modules_LayerNorm_parameters_weight_", - ), - ( - [514, 768], - "L_self_modules_roberta_modules_embeddings_modules_position_embeddings_parameters_weight_", + [50265, 768], + "L_self_modules_roberta_modules_embeddings_modules_word_embeddings_parameters_weight_", ), ( [1, 768], "L_self_modules_roberta_modules_embeddings_modules_token_type_embeddings_parameters_weight_", ), ( - [50265, 768], - "L_self_modules_roberta_modules_embeddings_modules_word_embeddings_parameters_weight_", - ), - ( - [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + [514, 768], + "L_self_modules_roberta_modules_embeddings_modules_position_embeddings_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_roberta_modules_embeddings_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_roberta_modules_embeddings_modules_LayerNorm_parameters_bias_", ), + ([S0, S1], "L_attention_mask_"), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [768, 768], @@ -62,11 +47,11 @@ ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [768], @@ -74,27 +59,27 @@ ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [3072], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_intermediate_modules_dense_parameters_bias_", + [768], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [3072, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_intermediate_modules_dense_parameters_weight_", + [768], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_output_modules_LayerNorm_parameters_weight_", + [3072, 768], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_intermediate_modules_dense_parameters_weight_", ), ( - [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_output_modules_dense_parameters_bias_", + [3072], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_intermediate_modules_dense_parameters_bias_", ), ( [768, 3072], @@ -102,23 +87,23 @@ ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_output_modules_dense_parameters_bias_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_output_modules_LayerNorm_parameters_bias_", ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [768, 768], @@ -126,11 +111,11 @@ ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [768], @@ -138,27 +123,27 @@ ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [3072], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_intermediate_modules_dense_parameters_bias_", + [768], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [3072, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_intermediate_modules_dense_parameters_weight_", + [768], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_output_modules_LayerNorm_parameters_weight_", + [3072, 768], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_intermediate_modules_dense_parameters_weight_", ), ( - [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_output_modules_dense_parameters_bias_", + [3072], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_intermediate_modules_dense_parameters_bias_", ), ( [768, 3072], @@ -166,23 +151,23 @@ ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_output_modules_dense_parameters_bias_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_output_modules_LayerNorm_parameters_bias_", ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [768, 768], @@ -190,11 +175,11 @@ ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [768], @@ -202,27 +187,27 @@ ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [3072], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_intermediate_modules_dense_parameters_bias_", + [768], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [3072, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_intermediate_modules_dense_parameters_weight_", + [768], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_output_modules_LayerNorm_parameters_weight_", + [3072, 768], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_intermediate_modules_dense_parameters_weight_", ), ( - [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_output_modules_dense_parameters_bias_", + [3072], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_intermediate_modules_dense_parameters_bias_", ), ( [768, 3072], @@ -230,23 +215,23 @@ ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_output_modules_dense_parameters_bias_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_output_modules_LayerNorm_parameters_bias_", ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [768, 768], @@ -254,11 +239,11 @@ ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [768], @@ -266,27 +251,27 @@ ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [3072], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_intermediate_modules_dense_parameters_bias_", + [768], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [3072, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_intermediate_modules_dense_parameters_weight_", + [768], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_output_modules_LayerNorm_parameters_weight_", + [3072, 768], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_intermediate_modules_dense_parameters_weight_", ), ( - [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_output_modules_dense_parameters_bias_", + [3072], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_intermediate_modules_dense_parameters_bias_", ), ( [768, 3072], @@ -294,23 +279,23 @@ ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_output_modules_dense_parameters_bias_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_output_modules_LayerNorm_parameters_bias_", ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [768, 768], @@ -318,11 +303,11 @@ ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [768], @@ -330,27 +315,27 @@ ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [3072], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_intermediate_modules_dense_parameters_bias_", + [768], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [3072, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_intermediate_modules_dense_parameters_weight_", + [768], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_output_modules_LayerNorm_parameters_weight_", + [3072, 768], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_intermediate_modules_dense_parameters_weight_", ), ( - [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_output_modules_dense_parameters_bias_", + [3072], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_intermediate_modules_dense_parameters_bias_", ), ( [768, 3072], @@ -358,23 +343,23 @@ ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_output_modules_dense_parameters_bias_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_output_modules_LayerNorm_parameters_bias_", ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [768, 768], @@ -382,11 +367,11 @@ ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [768], @@ -394,30 +379,46 @@ ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [3072], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_intermediate_modules_dense_parameters_bias_", + [768], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_dense_parameters_bias_", + ), + ( + [768], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + ), + ( + [768], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( [3072, 768], "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_intermediate_modules_dense_parameters_weight_", ), ( - [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_output_modules_LayerNorm_parameters_bias_", + [3072], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_intermediate_modules_dense_parameters_bias_", ), ( - [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_output_modules_LayerNorm_parameters_weight_", + [768, 3072], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_output_modules_dense_parameters_weight_", ), ( [768], "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_output_modules_dense_parameters_bias_", ), ( - [768, 3072], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_output_modules_dense_parameters_weight_", + [768], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_output_modules_LayerNorm_parameters_weight_", ), + ( + [768], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_output_modules_LayerNorm_parameters_bias_", + ), + ([768, 768], "L_self_modules_classifier_modules_dense_parameters_weight_"), + ([768], "L_self_modules_classifier_modules_dense_parameters_bias_"), + ([3, 768], "L_self_modules_classifier_modules_out_proj_parameters_weight_"), + ([3], "L_self_modules_classifier_modules_out_proj_parameters_bias_"), ] diff --git a/samples/transformers-auto-model/nli-roberta-base/graph_net.json b/samples/transformers-auto-model/nli-roberta-base/graph_net.json index 3db583c35..243b0adc9 100644 --- a/samples/transformers-auto-model/nli-roberta-base/graph_net.json +++ b/samples/transformers-auto-model/nli-roberta-base/graph_net.json @@ -1 +1,42 @@ -{"framework": "torch", "num_devices_required": 1, "num_nodes_required": 1, "dynamic": false, "model_name": "sentence-transformers/roberta-base-nli-stsb-mean-tokens", "source": "huggingface_hub", "original_tag": ["sentence-transformers", "pytorch", "tf", "jax", "onnx", "safetensors", "openvino", "roberta", "feature-extraction", "sentence-similarity", "transformers", "arxiv:1908.10084", "license:apache-2.0", "autotrain_compatible", "text-embeddings-inference", "endpoints_compatible", "region:us"], "heuristic_tag": "other", "dimension_generalization_passes": ["naive_call_method_view_pass", "tuple_arg_call_method_view_pass", "naive_call_method_reshape_pass", "naive_call_method_expand_pass", "non_batch_call_method_expand_pass", "non_batch_call_function_arange_pass", "non_batch_call_function_getitem_slice_pass", "non_batch_call_function_full_pass", "non_batch_call_function_full_plus_one_pass", "non_batch_call_function_zeros_pass", "non_batch_call_function_arange_plus_one_pass"]} \ No newline at end of file +{ + "framework": "torch", + "num_devices_required": 1, + "num_nodes_required": 1, + "dynamic": false, + "model_name": "sentence-transformers/roberta-base-nli-stsb-mean-tokens", + "source": "huggingface_hub", + "original_tag": [ + "sentence-transformers", + "pytorch", + "tf", + "jax", + "onnx", + "safetensors", + "openvino", + "roberta", + "feature-extraction", + "sentence-similarity", + "transformers", + "arxiv:1908.10084", + "license:apache-2.0", + "autotrain_compatible", + "text-embeddings-inference", + "endpoints_compatible", + "region:us" + ], + "heuristic_tag": "other", + "dimension_generalization_passes": [ + "batch_call_method_view_pass", + "tuple_arg_call_method_view_pass", + "naive_call_method_reshape_pass", + "naive_call_method_expand_pass", + "non_batch_call_method_expand_pass", + "non_batch_call_function_arange_pass", + "non_batch_call_function_getitem_slice_pass", + "non_batch_call_function_full_pass", + "non_batch_call_function_full_plus_one_pass", + "non_batch_call_function_zeros_pass", + "non_batch_call_function_arange_plus_one_pass" + ], + "symbolic_dimension_reifier": "naive_nlp_sym_dim_reifier" +} \ No newline at end of file diff --git a/samples/transformers-auto-model/nli-roberta-base/input_tensor_constraints.py b/samples/transformers-auto-model/nli-roberta-base/input_tensor_constraints.py index fd7327f06..a14a2761c 100644 --- a/samples/transformers-auto-model/nli-roberta-base/input_tensor_constraints.py +++ b/samples/transformers-auto-model/nli-roberta-base/input_tensor_constraints.py @@ -1,60 +1,45 @@ -from sympy import Symbol +from sympy import Symbol, Expr, Rel, Eq S0 = Symbol("S0") +S1 = Symbol("S1") -dynamic_dim_constraint_symbols = [S0] +dynamic_dim_constraint_symbols = [S0, S1] -dynamic_dim_constraint_symbol2example_value = {S0: 35} +dynamic_dim_constraint_symbol2example_value = {S0: 2, S1: 35} dynamic_dim_constraint_relations = [] dynamic_dim_constraint_input_shapes = [ - ([2, S0], "L_attention_mask_"), - ([2, S0], "L_input_ids_"), - ([768], "L_self_modules_classifier_modules_dense_parameters_bias_"), - ([768, 768], "L_self_modules_classifier_modules_dense_parameters_weight_"), - ([3], "L_self_modules_classifier_modules_out_proj_parameters_bias_"), - ([3, 768], "L_self_modules_classifier_modules_out_proj_parameters_weight_"), + ([S0, S1], "L_input_ids_"), ([1, 514], "L_self_modules_roberta_modules_embeddings_buffers_token_type_ids_"), ( - [768], - "L_self_modules_roberta_modules_embeddings_modules_LayerNorm_parameters_bias_", - ), - ( - [768], - "L_self_modules_roberta_modules_embeddings_modules_LayerNorm_parameters_weight_", - ), - ( - [514, 768], - "L_self_modules_roberta_modules_embeddings_modules_position_embeddings_parameters_weight_", + [50265, 768], + "L_self_modules_roberta_modules_embeddings_modules_word_embeddings_parameters_weight_", ), ( [1, 768], "L_self_modules_roberta_modules_embeddings_modules_token_type_embeddings_parameters_weight_", ), ( - [50265, 768], - "L_self_modules_roberta_modules_embeddings_modules_word_embeddings_parameters_weight_", - ), - ( - [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + [514, 768], + "L_self_modules_roberta_modules_embeddings_modules_position_embeddings_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_roberta_modules_embeddings_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_roberta_modules_embeddings_modules_LayerNorm_parameters_bias_", ), + ([S0, S1], "L_attention_mask_"), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [768, 768], @@ -62,11 +47,11 @@ ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [768], @@ -74,27 +59,27 @@ ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [3072], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_intermediate_modules_dense_parameters_bias_", + [768], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [3072, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_intermediate_modules_dense_parameters_weight_", + [768], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_output_modules_LayerNorm_parameters_weight_", + [3072, 768], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_intermediate_modules_dense_parameters_weight_", ), ( - [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_output_modules_dense_parameters_bias_", + [3072], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_intermediate_modules_dense_parameters_bias_", ), ( [768, 3072], @@ -102,706 +87,722 @@ ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_output_modules_dense_parameters_bias_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_output_modules_LayerNorm_parameters_bias_", ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [3072], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_intermediate_modules_dense_parameters_bias_", + [768], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [3072, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_intermediate_modules_dense_parameters_weight_", + [768], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_output_modules_LayerNorm_parameters_weight_", + [3072, 768], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_intermediate_modules_dense_parameters_weight_", ), ( - [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_output_modules_dense_parameters_bias_", + [3072], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_intermediate_modules_dense_parameters_bias_", ), ( [768, 3072], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_output_modules_dense_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_output_modules_dense_parameters_bias_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_output_modules_LayerNorm_parameters_bias_", ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [3072], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_intermediate_modules_dense_parameters_bias_", + [768], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [3072, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_intermediate_modules_dense_parameters_weight_", + [768], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_output_modules_LayerNorm_parameters_weight_", + [3072, 768], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_intermediate_modules_dense_parameters_weight_", ), ( - [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_output_modules_dense_parameters_bias_", + [3072], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_intermediate_modules_dense_parameters_bias_", ), ( [768, 3072], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_output_modules_dense_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_output_modules_dense_parameters_bias_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_output_modules_LayerNorm_parameters_bias_", ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [3072], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_intermediate_modules_dense_parameters_bias_", + [768], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [3072, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_intermediate_modules_dense_parameters_weight_", + [768], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_output_modules_LayerNorm_parameters_weight_", + [3072, 768], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_intermediate_modules_dense_parameters_weight_", ), ( - [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_output_modules_dense_parameters_bias_", + [3072], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_intermediate_modules_dense_parameters_bias_", ), ( [768, 3072], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_output_modules_dense_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_output_modules_dense_parameters_bias_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_output_modules_LayerNorm_parameters_bias_", ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [3072], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_intermediate_modules_dense_parameters_bias_", + [768], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [3072, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_intermediate_modules_dense_parameters_weight_", + [768], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_output_modules_LayerNorm_parameters_weight_", + [3072, 768], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_intermediate_modules_dense_parameters_weight_", ), ( - [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_output_modules_dense_parameters_bias_", + [3072], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_intermediate_modules_dense_parameters_bias_", ), ( [768, 3072], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_output_modules_dense_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_output_modules_dense_parameters_bias_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_output_modules_LayerNorm_parameters_bias_", ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [3072], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_intermediate_modules_dense_parameters_bias_", + [768], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [3072, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_intermediate_modules_dense_parameters_weight_", + [768], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_output_modules_LayerNorm_parameters_weight_", + [3072, 768], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_intermediate_modules_dense_parameters_weight_", ), ( - [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_output_modules_dense_parameters_bias_", + [3072], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_intermediate_modules_dense_parameters_bias_", ), ( [768, 3072], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_output_modules_dense_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_output_modules_dense_parameters_bias_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_output_modules_LayerNorm_parameters_bias_", ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [3072], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_intermediate_modules_dense_parameters_bias_", + [768], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [3072, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_intermediate_modules_dense_parameters_weight_", + [768], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_output_modules_LayerNorm_parameters_weight_", + [3072, 768], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_intermediate_modules_dense_parameters_weight_", ), ( - [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_output_modules_dense_parameters_bias_", + [3072], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_intermediate_modules_dense_parameters_bias_", ), ( [768, 3072], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_output_modules_dense_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_output_modules_dense_parameters_bias_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_output_modules_LayerNorm_parameters_bias_", ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [3072], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_intermediate_modules_dense_parameters_bias_", + [768], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [3072, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_intermediate_modules_dense_parameters_weight_", + [768], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_output_modules_LayerNorm_parameters_weight_", + [3072, 768], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_intermediate_modules_dense_parameters_weight_", ), ( - [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_output_modules_dense_parameters_bias_", + [3072], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_intermediate_modules_dense_parameters_bias_", ), ( [768, 3072], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_output_modules_dense_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_output_modules_dense_parameters_bias_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_output_modules_LayerNorm_parameters_bias_", ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [3072], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_intermediate_modules_dense_parameters_bias_", + [768], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [3072, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_intermediate_modules_dense_parameters_weight_", + [768], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_output_modules_LayerNorm_parameters_weight_", + [3072, 768], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_intermediate_modules_dense_parameters_weight_", ), ( - [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_output_modules_dense_parameters_bias_", + [3072], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_intermediate_modules_dense_parameters_bias_", ), ( [768, 3072], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_output_modules_dense_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_output_modules_dense_parameters_bias_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_output_modules_LayerNorm_parameters_bias_", ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [3072], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_intermediate_modules_dense_parameters_bias_", + [768], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [3072, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_intermediate_modules_dense_parameters_weight_", + [768], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_output_modules_LayerNorm_parameters_weight_", + [3072, 768], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_intermediate_modules_dense_parameters_weight_", ), ( - [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_output_modules_dense_parameters_bias_", + [3072], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_intermediate_modules_dense_parameters_bias_", ), ( [768, 3072], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_output_modules_dense_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_output_modules_dense_parameters_bias_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_output_modules_LayerNorm_parameters_bias_", ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [3072], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_intermediate_modules_dense_parameters_bias_", + [768], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [3072, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_intermediate_modules_dense_parameters_weight_", + [768], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_output_modules_LayerNorm_parameters_weight_", + [3072, 768], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_intermediate_modules_dense_parameters_weight_", ), ( - [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_output_modules_dense_parameters_bias_", + [3072], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_intermediate_modules_dense_parameters_bias_", ), ( [768, 3072], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_output_modules_dense_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_output_modules_dense_parameters_bias_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_output_modules_LayerNorm_parameters_bias_", ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [768, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [3072], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_intermediate_modules_dense_parameters_bias_", + [768], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [3072, 768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_intermediate_modules_dense_parameters_weight_", + [768], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + ), + ( + [3072, 768], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_intermediate_modules_dense_parameters_weight_", + ), + ( + [3072], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_intermediate_modules_dense_parameters_bias_", + ), + ( + [768, 3072], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_output_modules_dense_parameters_weight_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_output_modules_dense_parameters_bias_", ), ( [768], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_output_modules_dense_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_output_modules_LayerNorm_parameters_weight_", ), ( - [768, 3072], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_output_modules_dense_parameters_weight_", + [768], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_output_modules_LayerNorm_parameters_bias_", ), + ([768, 768], "L_self_modules_classifier_modules_dense_parameters_weight_"), + ([768], "L_self_modules_classifier_modules_dense_parameters_bias_"), + ([3, 768], "L_self_modules_classifier_modules_out_proj_parameters_weight_"), + ([3], "L_self_modules_classifier_modules_out_proj_parameters_bias_"), ] diff --git a/samples/transformers-auto-model/quora-roberta-large/graph_net.json b/samples/transformers-auto-model/quora-roberta-large/graph_net.json index a8d7268bf..ec2a0437d 100644 --- a/samples/transformers-auto-model/quora-roberta-large/graph_net.json +++ b/samples/transformers-auto-model/quora-roberta-large/graph_net.json @@ -1 +1,41 @@ -{"framework": "torch", "num_devices_required": 1, "num_nodes_required": 1, "dynamic": false, "model_name": "cross-encoder/quora-roberta-large", "source": "huggingface_hub", "original_tag": ["sentence-transformers", "pytorch", "jax", "onnx", "safetensors", "openvino", "roberta", "text-classification", "transformers", "text-ranking", "en", "dataset:sentence-transformers/quora-duplicates", "base_model:FacebookAI/roberta-large", "base_model:quantized:FacebookAI/roberta-large", "license:apache-2.0", "region:us"], "heuristic_tag": "nlp", "dimension_generalization_passes": ["naive_call_method_view_pass", "tuple_arg_call_method_view_pass", "naive_call_method_reshape_pass", "naive_call_method_expand_pass", "non_batch_call_method_expand_pass", "non_batch_call_function_arange_pass", "non_batch_call_function_getitem_slice_pass", "non_batch_call_function_full_pass", "non_batch_call_function_full_plus_one_pass", "non_batch_call_function_zeros_pass", "non_batch_call_function_arange_plus_one_pass"]} \ No newline at end of file +{ + "framework": "torch", + "num_devices_required": 1, + "num_nodes_required": 1, + "dynamic": false, + "model_name": "cross-encoder/quora-roberta-large", + "source": "huggingface_hub", + "original_tag": [ + "sentence-transformers", + "pytorch", + "jax", + "onnx", + "safetensors", + "openvino", + "roberta", + "text-classification", + "transformers", + "text-ranking", + "en", + "dataset:sentence-transformers/quora-duplicates", + "base_model:FacebookAI/roberta-large", + "base_model:quantized:FacebookAI/roberta-large", + "license:apache-2.0", + "region:us" + ], + "heuristic_tag": "nlp", + "dimension_generalization_passes": [ + "batch_call_method_view_pass", + "tuple_arg_call_method_view_pass", + "naive_call_method_reshape_pass", + "naive_call_method_expand_pass", + "non_batch_call_method_expand_pass", + "non_batch_call_function_arange_pass", + "non_batch_call_function_getitem_slice_pass", + "non_batch_call_function_full_pass", + "non_batch_call_function_full_plus_one_pass", + "non_batch_call_function_zeros_pass", + "non_batch_call_function_arange_plus_one_pass" + ], + "symbolic_dimension_reifier": "naive_nlp_sym_dim_reifier" +} \ No newline at end of file diff --git a/samples/transformers-auto-model/quora-roberta-large/input_tensor_constraints.py b/samples/transformers-auto-model/quora-roberta-large/input_tensor_constraints.py index 1e5e046ae..4caff4979 100644 --- a/samples/transformers-auto-model/quora-roberta-large/input_tensor_constraints.py +++ b/samples/transformers-auto-model/quora-roberta-large/input_tensor_constraints.py @@ -1,60 +1,45 @@ -from sympy import Symbol +from sympy import Symbol, Expr, Rel, Eq S0 = Symbol("S0") +S1 = Symbol("S1") -dynamic_dim_constraint_symbols = [S0] +dynamic_dim_constraint_symbols = [S0, S1] -dynamic_dim_constraint_symbol2example_value = {S0: 35} +dynamic_dim_constraint_symbol2example_value = {S0: 2, S1: 35} dynamic_dim_constraint_relations = [] dynamic_dim_constraint_input_shapes = [ - ([2, S0], "L_attention_mask_"), - ([2, S0], "L_input_ids_"), - ([1024], "L_self_modules_classifier_modules_dense_parameters_bias_"), - ([1024, 1024], "L_self_modules_classifier_modules_dense_parameters_weight_"), - ([1], "L_self_modules_classifier_modules_out_proj_parameters_bias_"), - ([1, 1024], "L_self_modules_classifier_modules_out_proj_parameters_weight_"), + ([S0, S1], "L_input_ids_"), ([1, 514], "L_self_modules_roberta_modules_embeddings_buffers_token_type_ids_"), ( - [1024], - "L_self_modules_roberta_modules_embeddings_modules_LayerNorm_parameters_bias_", - ), - ( - [1024], - "L_self_modules_roberta_modules_embeddings_modules_LayerNorm_parameters_weight_", - ), - ( - [514, 1024], - "L_self_modules_roberta_modules_embeddings_modules_position_embeddings_parameters_weight_", + [50265, 1024], + "L_self_modules_roberta_modules_embeddings_modules_word_embeddings_parameters_weight_", ), ( [1, 1024], "L_self_modules_roberta_modules_embeddings_modules_token_type_embeddings_parameters_weight_", ), ( - [50265, 1024], - "L_self_modules_roberta_modules_embeddings_modules_word_embeddings_parameters_weight_", - ), - ( - [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + [514, 1024], + "L_self_modules_roberta_modules_embeddings_modules_position_embeddings_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_roberta_modules_embeddings_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_roberta_modules_embeddings_modules_LayerNorm_parameters_bias_", ), + ([S0, S1], "L_attention_mask_"), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [1024, 1024], @@ -62,11 +47,11 @@ ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [1024], @@ -74,27 +59,27 @@ ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [4096], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_intermediate_modules_dense_parameters_bias_", + [1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [4096, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_intermediate_modules_dense_parameters_weight_", + [1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_output_modules_LayerNorm_parameters_weight_", + [4096, 1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_intermediate_modules_dense_parameters_weight_", ), ( - [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_output_modules_dense_parameters_bias_", + [4096], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_intermediate_modules_dense_parameters_bias_", ), ( [1024, 4096], @@ -102,1474 +87,1490 @@ ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_output_modules_dense_parameters_bias_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_0_modules_output_modules_LayerNorm_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [4096], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_intermediate_modules_dense_parameters_bias_", + [1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [4096, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_intermediate_modules_dense_parameters_weight_", + [1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_output_modules_LayerNorm_parameters_weight_", + [4096, 1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_intermediate_modules_dense_parameters_weight_", ), ( - [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_output_modules_dense_parameters_bias_", + [4096], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_intermediate_modules_dense_parameters_bias_", ), ( [1024, 4096], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_output_modules_dense_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_output_modules_dense_parameters_bias_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_output_modules_LayerNorm_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [4096], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_intermediate_modules_dense_parameters_bias_", + [1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [4096, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_intermediate_modules_dense_parameters_weight_", + [1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_output_modules_LayerNorm_parameters_weight_", + [4096, 1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_intermediate_modules_dense_parameters_weight_", ), ( - [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_output_modules_dense_parameters_bias_", + [4096], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_intermediate_modules_dense_parameters_bias_", ), ( [1024, 4096], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_output_modules_dense_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_12_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_output_modules_dense_parameters_bias_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_12_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_12_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_output_modules_LayerNorm_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_12_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_12_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_12_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_12_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_12_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_12_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_12_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [4096], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_12_modules_intermediate_modules_dense_parameters_bias_", + [1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [4096, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_12_modules_intermediate_modules_dense_parameters_weight_", + [1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_12_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_12_modules_output_modules_LayerNorm_parameters_weight_", + [4096, 1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_intermediate_modules_dense_parameters_weight_", ), ( - [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_12_modules_output_modules_dense_parameters_bias_", + [4096], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_intermediate_modules_dense_parameters_bias_", ), ( [1024, 4096], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_12_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_output_modules_dense_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_13_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_output_modules_dense_parameters_bias_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_13_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_13_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_output_modules_LayerNorm_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_13_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_13_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_13_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_13_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_13_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_13_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_13_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [4096], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_13_modules_intermediate_modules_dense_parameters_bias_", + [1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [4096, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_13_modules_intermediate_modules_dense_parameters_weight_", + [1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_13_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_13_modules_output_modules_LayerNorm_parameters_weight_", + [4096, 1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_intermediate_modules_dense_parameters_weight_", ), ( - [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_13_modules_output_modules_dense_parameters_bias_", + [4096], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_intermediate_modules_dense_parameters_bias_", ), ( [1024, 4096], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_13_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_output_modules_dense_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_14_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_output_modules_dense_parameters_bias_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_14_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_14_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_output_modules_LayerNorm_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_14_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_14_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_14_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_14_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_14_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_14_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_14_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [4096], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_14_modules_intermediate_modules_dense_parameters_bias_", + [1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [4096, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_14_modules_intermediate_modules_dense_parameters_weight_", + [1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_14_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_14_modules_output_modules_LayerNorm_parameters_weight_", + [4096, 1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_intermediate_modules_dense_parameters_weight_", ), ( - [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_14_modules_output_modules_dense_parameters_bias_", + [4096], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_intermediate_modules_dense_parameters_bias_", ), ( [1024, 4096], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_14_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_output_modules_dense_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_15_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_output_modules_dense_parameters_bias_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_15_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_15_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_output_modules_LayerNorm_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_15_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_15_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_15_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_15_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_15_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_15_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_15_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [4096], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_15_modules_intermediate_modules_dense_parameters_bias_", + [1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [4096, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_15_modules_intermediate_modules_dense_parameters_weight_", + [1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_15_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_15_modules_output_modules_LayerNorm_parameters_weight_", + [4096, 1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_intermediate_modules_dense_parameters_weight_", ), ( - [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_15_modules_output_modules_dense_parameters_bias_", + [4096], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_intermediate_modules_dense_parameters_bias_", ), ( [1024, 4096], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_15_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_output_modules_dense_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_16_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_output_modules_dense_parameters_bias_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_16_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_16_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_output_modules_LayerNorm_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_16_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_16_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_16_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_16_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_16_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_16_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_16_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [4096], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_16_modules_intermediate_modules_dense_parameters_bias_", + [1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [4096, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_16_modules_intermediate_modules_dense_parameters_weight_", + [1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_16_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_16_modules_output_modules_LayerNorm_parameters_weight_", + [4096, 1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_intermediate_modules_dense_parameters_weight_", ), ( - [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_16_modules_output_modules_dense_parameters_bias_", + [4096], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_intermediate_modules_dense_parameters_bias_", ), ( [1024, 4096], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_16_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_output_modules_dense_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_17_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_output_modules_dense_parameters_bias_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_17_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_17_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_output_modules_LayerNorm_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_17_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_17_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_17_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_17_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_17_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_17_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_17_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [4096], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_17_modules_intermediate_modules_dense_parameters_bias_", + [1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [4096, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_17_modules_intermediate_modules_dense_parameters_weight_", + [1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_17_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_17_modules_output_modules_LayerNorm_parameters_weight_", + [4096, 1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_intermediate_modules_dense_parameters_weight_", ), ( - [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_17_modules_output_modules_dense_parameters_bias_", + [4096], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_intermediate_modules_dense_parameters_bias_", ), ( [1024, 4096], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_17_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_output_modules_dense_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_18_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_output_modules_dense_parameters_bias_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_18_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_18_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_output_modules_LayerNorm_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_18_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_18_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_18_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_18_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_18_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_18_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_18_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [4096], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_18_modules_intermediate_modules_dense_parameters_bias_", + [1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [4096, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_18_modules_intermediate_modules_dense_parameters_weight_", + [1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_18_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_18_modules_output_modules_LayerNorm_parameters_weight_", + [4096, 1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_intermediate_modules_dense_parameters_weight_", ), ( - [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_18_modules_output_modules_dense_parameters_bias_", + [4096], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_intermediate_modules_dense_parameters_bias_", ), ( [1024, 4096], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_18_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_output_modules_dense_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_19_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_output_modules_dense_parameters_bias_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_19_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_19_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_output_modules_LayerNorm_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_19_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_19_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_19_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_19_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_19_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_19_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_19_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [4096], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_19_modules_intermediate_modules_dense_parameters_bias_", + [1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [4096, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_19_modules_intermediate_modules_dense_parameters_weight_", + [1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_19_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_19_modules_output_modules_LayerNorm_parameters_weight_", + [4096, 1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_intermediate_modules_dense_parameters_weight_", ), ( - [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_19_modules_output_modules_dense_parameters_bias_", + [4096], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_intermediate_modules_dense_parameters_bias_", ), ( [1024, 4096], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_19_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_output_modules_dense_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_output_modules_dense_parameters_bias_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_10_modules_output_modules_LayerNorm_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [4096], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_intermediate_modules_dense_parameters_bias_", + [1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [4096, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_intermediate_modules_dense_parameters_weight_", + [1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_output_modules_LayerNorm_parameters_weight_", + [4096, 1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_intermediate_modules_dense_parameters_weight_", ), ( - [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_output_modules_dense_parameters_bias_", + [4096], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_intermediate_modules_dense_parameters_bias_", ), ( [1024, 4096], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_1_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_output_modules_dense_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_20_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_output_modules_dense_parameters_bias_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_20_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_20_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_11_modules_output_modules_LayerNorm_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_20_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_12_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_20_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_12_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_20_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_12_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_20_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_12_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_20_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_12_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_20_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_12_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_20_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_12_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [4096], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_20_modules_intermediate_modules_dense_parameters_bias_", + [1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_12_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [4096, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_20_modules_intermediate_modules_dense_parameters_weight_", + [1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_12_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_20_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_12_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_20_modules_output_modules_LayerNorm_parameters_weight_", + [4096, 1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_12_modules_intermediate_modules_dense_parameters_weight_", ), ( - [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_20_modules_output_modules_dense_parameters_bias_", + [4096], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_12_modules_intermediate_modules_dense_parameters_bias_", ), ( [1024, 4096], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_20_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_12_modules_output_modules_dense_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_21_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_12_modules_output_modules_dense_parameters_bias_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_21_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_12_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_21_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_12_modules_output_modules_LayerNorm_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_21_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_13_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_21_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_13_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_21_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_13_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_21_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_13_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_21_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_13_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_21_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_13_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_21_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_13_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [4096], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_21_modules_intermediate_modules_dense_parameters_bias_", + [1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_13_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [4096, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_21_modules_intermediate_modules_dense_parameters_weight_", + [1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_13_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_21_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_13_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_21_modules_output_modules_LayerNorm_parameters_weight_", + [4096, 1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_13_modules_intermediate_modules_dense_parameters_weight_", ), ( - [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_21_modules_output_modules_dense_parameters_bias_", + [4096], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_13_modules_intermediate_modules_dense_parameters_bias_", ), ( [1024, 4096], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_21_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_13_modules_output_modules_dense_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_22_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_13_modules_output_modules_dense_parameters_bias_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_22_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_13_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_22_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_13_modules_output_modules_LayerNorm_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_22_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_14_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_22_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_14_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_22_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_14_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_22_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_14_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_22_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_14_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_22_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_14_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_22_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_14_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [4096], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_22_modules_intermediate_modules_dense_parameters_bias_", + [1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_14_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [4096, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_22_modules_intermediate_modules_dense_parameters_weight_", + [1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_14_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_22_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_14_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_22_modules_output_modules_LayerNorm_parameters_weight_", + [4096, 1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_14_modules_intermediate_modules_dense_parameters_weight_", ), ( - [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_22_modules_output_modules_dense_parameters_bias_", + [4096], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_14_modules_intermediate_modules_dense_parameters_bias_", ), ( [1024, 4096], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_22_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_14_modules_output_modules_dense_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_23_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_14_modules_output_modules_dense_parameters_bias_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_23_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_14_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_23_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_14_modules_output_modules_LayerNorm_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_23_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_15_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_23_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_15_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_23_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_15_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_23_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_15_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_23_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_15_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_23_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_15_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_23_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_15_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [4096], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_23_modules_intermediate_modules_dense_parameters_bias_", + [1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_15_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [4096, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_23_modules_intermediate_modules_dense_parameters_weight_", + [1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_15_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_23_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_15_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_23_modules_output_modules_LayerNorm_parameters_weight_", + [4096, 1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_15_modules_intermediate_modules_dense_parameters_weight_", ), ( - [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_23_modules_output_modules_dense_parameters_bias_", + [4096], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_15_modules_intermediate_modules_dense_parameters_bias_", ), ( [1024, 4096], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_23_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_15_modules_output_modules_dense_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_15_modules_output_modules_dense_parameters_bias_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_15_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_15_modules_output_modules_LayerNorm_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_16_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_16_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_16_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_16_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_16_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_16_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_16_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [4096], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_intermediate_modules_dense_parameters_bias_", + [1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_16_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [4096, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_intermediate_modules_dense_parameters_weight_", + [1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_16_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_16_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_output_modules_LayerNorm_parameters_weight_", + [4096, 1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_16_modules_intermediate_modules_dense_parameters_weight_", ), ( - [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_output_modules_dense_parameters_bias_", + [4096], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_16_modules_intermediate_modules_dense_parameters_bias_", ), ( [1024, 4096], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_2_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_16_modules_output_modules_dense_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_16_modules_output_modules_dense_parameters_bias_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_16_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_16_modules_output_modules_LayerNorm_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_17_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_17_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_17_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_17_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_17_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_17_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_17_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [4096], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_intermediate_modules_dense_parameters_bias_", + [1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_17_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [4096, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_intermediate_modules_dense_parameters_weight_", + [1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_17_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_17_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_output_modules_LayerNorm_parameters_weight_", + [4096, 1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_17_modules_intermediate_modules_dense_parameters_weight_", ), ( - [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_output_modules_dense_parameters_bias_", + [4096], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_17_modules_intermediate_modules_dense_parameters_bias_", ), ( [1024, 4096], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_3_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_17_modules_output_modules_dense_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_17_modules_output_modules_dense_parameters_bias_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_17_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_17_modules_output_modules_LayerNorm_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_18_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_18_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_18_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_18_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_18_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_18_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_18_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [4096], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_intermediate_modules_dense_parameters_bias_", + [1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_18_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [4096, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_intermediate_modules_dense_parameters_weight_", + [1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_18_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_18_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_output_modules_LayerNorm_parameters_weight_", + [4096, 1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_18_modules_intermediate_modules_dense_parameters_weight_", ), ( - [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_output_modules_dense_parameters_bias_", + [4096], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_18_modules_intermediate_modules_dense_parameters_bias_", ), ( [1024, 4096], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_4_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_18_modules_output_modules_dense_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_18_modules_output_modules_dense_parameters_bias_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_18_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_18_modules_output_modules_LayerNorm_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_19_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_19_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_19_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_19_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_19_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_19_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_19_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [4096], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_intermediate_modules_dense_parameters_bias_", + [1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_19_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [4096, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_intermediate_modules_dense_parameters_weight_", + [1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_19_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_19_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_output_modules_LayerNorm_parameters_weight_", + [4096, 1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_19_modules_intermediate_modules_dense_parameters_weight_", ), ( - [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_output_modules_dense_parameters_bias_", + [4096], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_19_modules_intermediate_modules_dense_parameters_bias_", ), ( [1024, 4096], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_5_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_19_modules_output_modules_dense_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_19_modules_output_modules_dense_parameters_bias_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_19_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_19_modules_output_modules_LayerNorm_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_20_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_20_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_20_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_20_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_20_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_20_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_20_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [4096], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_intermediate_modules_dense_parameters_bias_", + [1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_20_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [4096, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_intermediate_modules_dense_parameters_weight_", + [1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_20_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_20_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_output_modules_LayerNorm_parameters_weight_", + [4096, 1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_20_modules_intermediate_modules_dense_parameters_weight_", ), ( - [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_output_modules_dense_parameters_bias_", + [4096], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_20_modules_intermediate_modules_dense_parameters_bias_", ), ( [1024, 4096], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_6_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_20_modules_output_modules_dense_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_20_modules_output_modules_dense_parameters_bias_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_20_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_20_modules_output_modules_LayerNorm_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_21_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_21_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_21_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_21_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_21_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_21_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_21_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [4096], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_intermediate_modules_dense_parameters_bias_", + [1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_21_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [4096, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_intermediate_modules_dense_parameters_weight_", + [1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_21_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_21_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_output_modules_LayerNorm_parameters_weight_", + [4096, 1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_21_modules_intermediate_modules_dense_parameters_weight_", ), ( - [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_output_modules_dense_parameters_bias_", + [4096], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_21_modules_intermediate_modules_dense_parameters_bias_", ), ( [1024, 4096], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_7_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_21_modules_output_modules_dense_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_21_modules_output_modules_dense_parameters_bias_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_21_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_21_modules_output_modules_LayerNorm_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_22_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_22_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_22_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_22_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_22_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_22_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_22_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [4096], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_intermediate_modules_dense_parameters_bias_", + [1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_22_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [4096, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_intermediate_modules_dense_parameters_weight_", + [1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_22_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_22_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_output_modules_LayerNorm_parameters_weight_", + [4096, 1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_22_modules_intermediate_modules_dense_parameters_weight_", ), ( - [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_output_modules_dense_parameters_bias_", + [4096], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_22_modules_intermediate_modules_dense_parameters_bias_", ), ( [1024, 4096], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_8_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_22_modules_output_modules_dense_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_22_modules_output_modules_dense_parameters_bias_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_22_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_22_modules_output_modules_LayerNorm_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_23_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_23_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_23_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_23_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_23_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_23_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_23_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [4096], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_intermediate_modules_dense_parameters_bias_", + [1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_23_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [4096, 1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_intermediate_modules_dense_parameters_weight_", + [1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_23_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_23_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + ), + ( + [4096, 1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_23_modules_intermediate_modules_dense_parameters_weight_", + ), + ( + [4096], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_23_modules_intermediate_modules_dense_parameters_bias_", + ), + ( + [1024, 4096], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_23_modules_output_modules_dense_parameters_weight_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_23_modules_output_modules_dense_parameters_bias_", ), ( [1024], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_output_modules_dense_parameters_bias_", + "L_self_modules_roberta_modules_encoder_modules_layer_modules_23_modules_output_modules_LayerNorm_parameters_weight_", ), ( - [1024, 4096], - "L_self_modules_roberta_modules_encoder_modules_layer_modules_9_modules_output_modules_dense_parameters_weight_", + [1024], + "L_self_modules_roberta_modules_encoder_modules_layer_modules_23_modules_output_modules_LayerNorm_parameters_bias_", ), + ([1024, 1024], "L_self_modules_classifier_modules_dense_parameters_weight_"), + ([1024], "L_self_modules_classifier_modules_dense_parameters_bias_"), + ([1, 1024], "L_self_modules_classifier_modules_out_proj_parameters_weight_"), + ([1], "L_self_modules_classifier_modules_out_proj_parameters_bias_"), ] diff --git a/samples/transformers-auto-model/sentence-transformers/all-distilroberta-v1/graph_net.json b/samples/transformers-auto-model/sentence-transformers/all-distilroberta-v1/graph_net.json index 3d77b4a6f..859bc5ad9 100644 --- a/samples/transformers-auto-model/sentence-transformers/all-distilroberta-v1/graph_net.json +++ b/samples/transformers-auto-model/sentence-transformers/all-distilroberta-v1/graph_net.json @@ -1 +1,68 @@ -{"framework": "torch", "num_devices_required": 1, "num_nodes_required": 1, "dynamic": false, "model_name": "sentence-transformers/all-distilroberta-v1", "source": "huggingface_hub", "original_tag": ["sentence-transformers", "pytorch", "rust", "onnx", "safetensors", "openvino", "roberta", "fill-mask", "feature-extraction", "sentence-similarity", "transformers", "en", "dataset:s2orc", "dataset:flax-sentence-embeddings/stackexchange_xml", "dataset:ms_marco", "dataset:gooaq", "dataset:yahoo_answers_topics", "dataset:code_search_net", "dataset:search_qa", "dataset:eli5", "dataset:snli", "dataset:multi_nli", "dataset:wikihow", "dataset:natural_questions", "dataset:trivia_qa", "dataset:embedding-data/sentence-compression", "dataset:embedding-data/flickr30k-captions", "dataset:embedding-data/altlex", "dataset:embedding-data/simple-wiki", "dataset:embedding-data/QQP", "dataset:embedding-data/SPECTER", "dataset:embedding-data/PAQ_pairs", "dataset:embedding-data/WikiAnswers", "arxiv:1904.06472", "arxiv:2102.07033", "arxiv:2104.08727", "arxiv:1704.05179", "arxiv:1810.09305", "license:apache-2.0", "autotrain_compatible", "text-embeddings-inference", "endpoints_compatible", "region:us"], "heuristic_tag": "nlp", "dimension_generalization_passes": ["naive_call_method_view_pass", "tuple_arg_call_method_view_pass", "naive_call_method_reshape_pass", "naive_call_method_expand_pass", "non_batch_call_method_expand_pass", "non_batch_call_function_arange_pass", "non_batch_call_function_getitem_slice_pass", "non_batch_call_function_full_pass", "non_batch_call_function_full_plus_one_pass", "non_batch_call_function_zeros_pass", "non_batch_call_function_arange_plus_one_pass"]} \ No newline at end of file +{ + "framework": "torch", + "num_devices_required": 1, + "num_nodes_required": 1, + "dynamic": false, + "model_name": "sentence-transformers/all-distilroberta-v1", + "source": "huggingface_hub", + "original_tag": [ + "sentence-transformers", + "pytorch", + "rust", + "onnx", + "safetensors", + "openvino", + "roberta", + "fill-mask", + "feature-extraction", + "sentence-similarity", + "transformers", + "en", + "dataset:s2orc", + "dataset:flax-sentence-embeddings/stackexchange_xml", + "dataset:ms_marco", + "dataset:gooaq", + "dataset:yahoo_answers_topics", + "dataset:code_search_net", + "dataset:search_qa", + "dataset:eli5", + "dataset:snli", + "dataset:multi_nli", + "dataset:wikihow", + "dataset:natural_questions", + "dataset:trivia_qa", + "dataset:embedding-data/sentence-compression", + "dataset:embedding-data/flickr30k-captions", + "dataset:embedding-data/altlex", + "dataset:embedding-data/simple-wiki", + "dataset:embedding-data/QQP", + "dataset:embedding-data/SPECTER", + "dataset:embedding-data/PAQ_pairs", + "dataset:embedding-data/WikiAnswers", + "arxiv:1904.06472", + "arxiv:2102.07033", + "arxiv:2104.08727", + "arxiv:1704.05179", + "arxiv:1810.09305", + "license:apache-2.0", + "autotrain_compatible", + "text-embeddings-inference", + "endpoints_compatible", + "region:us" + ], + "heuristic_tag": "nlp", + "dimension_generalization_passes": [ + "batch_call_method_view_pass", + "tuple_arg_call_method_view_pass", + "naive_call_method_reshape_pass", + "naive_call_method_expand_pass", + "non_batch_call_method_expand_pass", + "non_batch_call_function_arange_pass", + "non_batch_call_function_getitem_slice_pass", + "non_batch_call_function_full_pass", + "non_batch_call_function_full_plus_one_pass", + "non_batch_call_function_zeros_pass", + "non_batch_call_function_arange_plus_one_pass" + ], + "symbolic_dimension_reifier": "naive_nlp_sym_dim_reifier" +} \ No newline at end of file diff --git a/samples/transformers-auto-model/sentence-transformers/all-distilroberta-v1/input_tensor_constraints.py b/samples/transformers-auto-model/sentence-transformers/all-distilroberta-v1/input_tensor_constraints.py index c257a3b7e..6c7768ddb 100644 --- a/samples/transformers-auto-model/sentence-transformers/all-distilroberta-v1/input_tensor_constraints.py +++ b/samples/transformers-auto-model/sentence-transformers/all-distilroberta-v1/input_tensor_constraints.py @@ -1,50 +1,39 @@ -from sympy import Symbol +from sympy import Symbol, Expr, Rel, Eq S0 = Symbol("S0") +S1 = Symbol("S1") -dynamic_dim_constraint_symbols = [S0] +dynamic_dim_constraint_symbols = [S0, S1] -dynamic_dim_constraint_symbol2example_value = {S0: 7} +dynamic_dim_constraint_symbol2example_value = {S0: 2, S1: 7} dynamic_dim_constraint_relations = [] dynamic_dim_constraint_input_shapes = [ - ([2, S0], "L_attention_mask_"), - ([2, S0], "L_input_ids_"), + ([S0, S1], "L_input_ids_"), ([1, 514], "L_self_modules_embeddings_buffers_token_type_ids_"), - ([768], "L_self_modules_embeddings_modules_LayerNorm_parameters_bias_"), - ([768], "L_self_modules_embeddings_modules_LayerNorm_parameters_weight_"), - ( - [514, 768], - "L_self_modules_embeddings_modules_position_embeddings_parameters_weight_", - ), - ( - [1, 768], - "L_self_modules_embeddings_modules_token_type_embeddings_parameters_weight_", - ), ( [50265, 768], "L_self_modules_embeddings_modules_word_embeddings_parameters_weight_", ), ( - [768], - "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", - ), - ( - [768], - "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + [1, 768], + "L_self_modules_embeddings_modules_token_type_embeddings_parameters_weight_", ), ( - [768], - "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_dense_parameters_bias_", + [514, 768], + "L_self_modules_embeddings_modules_position_embeddings_parameters_weight_", ), + ([768], "L_self_modules_embeddings_modules_LayerNorm_parameters_weight_"), + ([768], "L_self_modules_embeddings_modules_LayerNorm_parameters_bias_"), + ([S0, S1], "L_attention_mask_"), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [768, 768], @@ -52,11 +41,11 @@ ), ( [768], - "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [768], @@ -64,27 +53,27 @@ ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [3072], - "L_self_modules_encoder_modules_layer_modules_0_modules_intermediate_modules_dense_parameters_bias_", + [768], + "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [3072, 768], - "L_self_modules_encoder_modules_layer_modules_0_modules_intermediate_modules_dense_parameters_weight_", + [768], + "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_0_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [768], - "L_self_modules_encoder_modules_layer_modules_0_modules_output_modules_LayerNorm_parameters_weight_", + [3072, 768], + "L_self_modules_encoder_modules_layer_modules_0_modules_intermediate_modules_dense_parameters_weight_", ), ( - [768], - "L_self_modules_encoder_modules_layer_modules_0_modules_output_modules_dense_parameters_bias_", + [3072], + "L_self_modules_encoder_modules_layer_modules_0_modules_intermediate_modules_dense_parameters_bias_", ), ( [768, 3072], @@ -92,23 +81,23 @@ ), ( [768], - "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_0_modules_output_modules_dense_parameters_bias_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_0_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_0_modules_output_modules_LayerNorm_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [768, 768], @@ -116,11 +105,11 @@ ), ( [768], - "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [768], @@ -128,27 +117,27 @@ ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [3072], - "L_self_modules_encoder_modules_layer_modules_1_modules_intermediate_modules_dense_parameters_bias_", + [768], + "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [3072, 768], - "L_self_modules_encoder_modules_layer_modules_1_modules_intermediate_modules_dense_parameters_weight_", + [768], + "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_1_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [768], - "L_self_modules_encoder_modules_layer_modules_1_modules_output_modules_LayerNorm_parameters_weight_", + [3072, 768], + "L_self_modules_encoder_modules_layer_modules_1_modules_intermediate_modules_dense_parameters_weight_", ), ( - [768], - "L_self_modules_encoder_modules_layer_modules_1_modules_output_modules_dense_parameters_bias_", + [3072], + "L_self_modules_encoder_modules_layer_modules_1_modules_intermediate_modules_dense_parameters_bias_", ), ( [768, 3072], @@ -156,23 +145,23 @@ ), ( [768], - "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_1_modules_output_modules_dense_parameters_bias_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_1_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_1_modules_output_modules_LayerNorm_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [768, 768], @@ -180,11 +169,11 @@ ), ( [768], - "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [768], @@ -192,27 +181,27 @@ ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [3072], - "L_self_modules_encoder_modules_layer_modules_2_modules_intermediate_modules_dense_parameters_bias_", + [768], + "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [3072, 768], - "L_self_modules_encoder_modules_layer_modules_2_modules_intermediate_modules_dense_parameters_weight_", + [768], + "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_2_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [768], - "L_self_modules_encoder_modules_layer_modules_2_modules_output_modules_LayerNorm_parameters_weight_", + [3072, 768], + "L_self_modules_encoder_modules_layer_modules_2_modules_intermediate_modules_dense_parameters_weight_", ), ( - [768], - "L_self_modules_encoder_modules_layer_modules_2_modules_output_modules_dense_parameters_bias_", + [3072], + "L_self_modules_encoder_modules_layer_modules_2_modules_intermediate_modules_dense_parameters_bias_", ), ( [768, 3072], @@ -220,23 +209,23 @@ ), ( [768], - "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_2_modules_output_modules_dense_parameters_bias_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_2_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_2_modules_output_modules_LayerNorm_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [768, 768], @@ -244,11 +233,11 @@ ), ( [768], - "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [768], @@ -256,27 +245,27 @@ ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [3072], - "L_self_modules_encoder_modules_layer_modules_3_modules_intermediate_modules_dense_parameters_bias_", + [768], + "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [3072, 768], - "L_self_modules_encoder_modules_layer_modules_3_modules_intermediate_modules_dense_parameters_weight_", + [768], + "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_3_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [768], - "L_self_modules_encoder_modules_layer_modules_3_modules_output_modules_LayerNorm_parameters_weight_", + [3072, 768], + "L_self_modules_encoder_modules_layer_modules_3_modules_intermediate_modules_dense_parameters_weight_", ), ( - [768], - "L_self_modules_encoder_modules_layer_modules_3_modules_output_modules_dense_parameters_bias_", + [3072], + "L_self_modules_encoder_modules_layer_modules_3_modules_intermediate_modules_dense_parameters_bias_", ), ( [768, 3072], @@ -284,23 +273,23 @@ ), ( [768], - "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_3_modules_output_modules_dense_parameters_bias_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_3_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_3_modules_output_modules_LayerNorm_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [768, 768], @@ -308,11 +297,11 @@ ), ( [768], - "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [768], @@ -320,27 +309,27 @@ ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [3072], - "L_self_modules_encoder_modules_layer_modules_4_modules_intermediate_modules_dense_parameters_bias_", + [768], + "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [3072, 768], - "L_self_modules_encoder_modules_layer_modules_4_modules_intermediate_modules_dense_parameters_weight_", + [768], + "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_4_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [768], - "L_self_modules_encoder_modules_layer_modules_4_modules_output_modules_LayerNorm_parameters_weight_", + [3072, 768], + "L_self_modules_encoder_modules_layer_modules_4_modules_intermediate_modules_dense_parameters_weight_", ), ( - [768], - "L_self_modules_encoder_modules_layer_modules_4_modules_output_modules_dense_parameters_bias_", + [3072], + "L_self_modules_encoder_modules_layer_modules_4_modules_intermediate_modules_dense_parameters_bias_", ), ( [768, 3072], @@ -348,23 +337,23 @@ ), ( [768], - "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_4_modules_output_modules_dense_parameters_bias_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_4_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_4_modules_output_modules_LayerNorm_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [768, 768], @@ -372,11 +361,11 @@ ), ( [768], - "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [768], @@ -384,32 +373,44 @@ ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [3072], - "L_self_modules_encoder_modules_layer_modules_5_modules_intermediate_modules_dense_parameters_bias_", + [768], + "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_dense_parameters_bias_", + ), + ( + [768], + "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + ), + ( + [768], + "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( [3072, 768], "L_self_modules_encoder_modules_layer_modules_5_modules_intermediate_modules_dense_parameters_weight_", ), ( - [768], - "L_self_modules_encoder_modules_layer_modules_5_modules_output_modules_LayerNorm_parameters_bias_", + [3072], + "L_self_modules_encoder_modules_layer_modules_5_modules_intermediate_modules_dense_parameters_bias_", ), ( - [768], - "L_self_modules_encoder_modules_layer_modules_5_modules_output_modules_LayerNorm_parameters_weight_", + [768, 3072], + "L_self_modules_encoder_modules_layer_modules_5_modules_output_modules_dense_parameters_weight_", ), ( [768], "L_self_modules_encoder_modules_layer_modules_5_modules_output_modules_dense_parameters_bias_", ), ( - [768, 3072], - "L_self_modules_encoder_modules_layer_modules_5_modules_output_modules_dense_parameters_weight_", + [768], + "L_self_modules_encoder_modules_layer_modules_5_modules_output_modules_LayerNorm_parameters_weight_", + ), + ( + [768], + "L_self_modules_encoder_modules_layer_modules_5_modules_output_modules_LayerNorm_parameters_bias_", ), - ([768], "L_self_modules_pooler_modules_dense_parameters_bias_"), ([768, 768], "L_self_modules_pooler_modules_dense_parameters_weight_"), + ([768], "L_self_modules_pooler_modules_dense_parameters_bias_"), ] diff --git a/samples/transformers-auto-model/sentence-transformers/all-roberta-large-v1/graph_net.json b/samples/transformers-auto-model/sentence-transformers/all-roberta-large-v1/graph_net.json index ad24f9f4f..55ae2210d 100644 --- a/samples/transformers-auto-model/sentence-transformers/all-roberta-large-v1/graph_net.json +++ b/samples/transformers-auto-model/sentence-transformers/all-roberta-large-v1/graph_net.json @@ -1 +1,46 @@ -{"framework": "torch", "num_devices_required": 1, "num_nodes_required": 1, "dynamic": false, "model_name": "sentence-transformers/all-roberta-large-v1", "source": "huggingface_hub", "original_tag": ["sentence-transformers", "pytorch", "onnx", "safetensors", "openvino", "roberta", "fill-mask", "feature-extraction", "sentence-similarity", "transformers", "en", "arxiv:1904.06472", "arxiv:2102.07033", "arxiv:2104.08727", "arxiv:1704.05179", "arxiv:1810.09305", "license:apache-2.0", "autotrain_compatible", "text-embeddings-inference", "endpoints_compatible", "region:us"], "heuristic_tag": "nlp", "dimension_generalization_passes": ["naive_call_method_view_pass", "tuple_arg_call_method_view_pass", "naive_call_method_reshape_pass", "naive_call_method_expand_pass", "non_batch_call_method_expand_pass", "non_batch_call_function_arange_pass", "non_batch_call_function_getitem_slice_pass", "non_batch_call_function_full_pass", "non_batch_call_function_full_plus_one_pass", "non_batch_call_function_zeros_pass", "non_batch_call_function_arange_plus_one_pass"]} \ No newline at end of file +{ + "framework": "torch", + "num_devices_required": 1, + "num_nodes_required": 1, + "dynamic": false, + "model_name": "sentence-transformers/all-roberta-large-v1", + "source": "huggingface_hub", + "original_tag": [ + "sentence-transformers", + "pytorch", + "onnx", + "safetensors", + "openvino", + "roberta", + "fill-mask", + "feature-extraction", + "sentence-similarity", + "transformers", + "en", + "arxiv:1904.06472", + "arxiv:2102.07033", + "arxiv:2104.08727", + "arxiv:1704.05179", + "arxiv:1810.09305", + "license:apache-2.0", + "autotrain_compatible", + "text-embeddings-inference", + "endpoints_compatible", + "region:us" + ], + "heuristic_tag": "nlp", + "dimension_generalization_passes": [ + "batch_call_method_view_pass", + "tuple_arg_call_method_view_pass", + "naive_call_method_reshape_pass", + "naive_call_method_expand_pass", + "non_batch_call_method_expand_pass", + "non_batch_call_function_arange_pass", + "non_batch_call_function_getitem_slice_pass", + "non_batch_call_function_full_pass", + "non_batch_call_function_full_plus_one_pass", + "non_batch_call_function_zeros_pass", + "non_batch_call_function_arange_plus_one_pass" + ], + "symbolic_dimension_reifier": "naive_nlp_sym_dim_reifier" +} \ No newline at end of file diff --git a/samples/transformers-auto-model/sentence-transformers/all-roberta-large-v1/input_tensor_constraints.py b/samples/transformers-auto-model/sentence-transformers/all-roberta-large-v1/input_tensor_constraints.py index a9bf7fb82..98962d7c6 100644 --- a/samples/transformers-auto-model/sentence-transformers/all-roberta-large-v1/input_tensor_constraints.py +++ b/samples/transformers-auto-model/sentence-transformers/all-roberta-large-v1/input_tensor_constraints.py @@ -1,50 +1,39 @@ -from sympy import Symbol +from sympy import Symbol, Expr, Rel, Eq S0 = Symbol("S0") +S1 = Symbol("S1") -dynamic_dim_constraint_symbols = [S0] +dynamic_dim_constraint_symbols = [S0, S1] -dynamic_dim_constraint_symbol2example_value = {S0: 7} +dynamic_dim_constraint_symbol2example_value = {S0: 2, S1: 7} dynamic_dim_constraint_relations = [] dynamic_dim_constraint_input_shapes = [ - ([2, S0], "L_attention_mask_"), - ([2, S0], "L_input_ids_"), + ([S0, S1], "L_input_ids_"), ([1, 514], "L_self_modules_embeddings_buffers_token_type_ids_"), - ([1024], "L_self_modules_embeddings_modules_LayerNorm_parameters_bias_"), - ([1024], "L_self_modules_embeddings_modules_LayerNorm_parameters_weight_"), - ( - [514, 1024], - "L_self_modules_embeddings_modules_position_embeddings_parameters_weight_", - ), - ( - [1, 1024], - "L_self_modules_embeddings_modules_token_type_embeddings_parameters_weight_", - ), ( [50265, 1024], "L_self_modules_embeddings_modules_word_embeddings_parameters_weight_", ), ( - [1024], - "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", - ), - ( - [1024], - "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + [1, 1024], + "L_self_modules_embeddings_modules_token_type_embeddings_parameters_weight_", ), ( - [1024], - "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_dense_parameters_bias_", + [514, 1024], + "L_self_modules_embeddings_modules_position_embeddings_parameters_weight_", ), + ([1024], "L_self_modules_embeddings_modules_LayerNorm_parameters_weight_"), + ([1024], "L_self_modules_embeddings_modules_LayerNorm_parameters_bias_"), + ([S0, S1], "L_attention_mask_"), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [1024, 1024], @@ -52,11 +41,11 @@ ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [1024], @@ -64,27 +53,27 @@ ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [4096], - "L_self_modules_encoder_modules_layer_modules_0_modules_intermediate_modules_dense_parameters_bias_", + [1024], + "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [4096, 1024], - "L_self_modules_encoder_modules_layer_modules_0_modules_intermediate_modules_dense_parameters_weight_", + [1024], + "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_0_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [1024], - "L_self_modules_encoder_modules_layer_modules_0_modules_output_modules_LayerNorm_parameters_weight_", + [4096, 1024], + "L_self_modules_encoder_modules_layer_modules_0_modules_intermediate_modules_dense_parameters_weight_", ), ( - [1024], - "L_self_modules_encoder_modules_layer_modules_0_modules_output_modules_dense_parameters_bias_", + [4096], + "L_self_modules_encoder_modules_layer_modules_0_modules_intermediate_modules_dense_parameters_bias_", ), ( [1024, 4096], @@ -92,1476 +81,1488 @@ ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_10_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_0_modules_output_modules_dense_parameters_bias_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_10_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_0_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_10_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_0_modules_output_modules_LayerNorm_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_10_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [4096], - "L_self_modules_encoder_modules_layer_modules_10_modules_intermediate_modules_dense_parameters_bias_", + [1024], + "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [4096, 1024], - "L_self_modules_encoder_modules_layer_modules_10_modules_intermediate_modules_dense_parameters_weight_", + [1024], + "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_10_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [1024], - "L_self_modules_encoder_modules_layer_modules_10_modules_output_modules_LayerNorm_parameters_weight_", + [4096, 1024], + "L_self_modules_encoder_modules_layer_modules_1_modules_intermediate_modules_dense_parameters_weight_", ), ( - [1024], - "L_self_modules_encoder_modules_layer_modules_10_modules_output_modules_dense_parameters_bias_", + [4096], + "L_self_modules_encoder_modules_layer_modules_1_modules_intermediate_modules_dense_parameters_bias_", ), ( [1024, 4096], - "L_self_modules_encoder_modules_layer_modules_10_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_1_modules_output_modules_dense_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_11_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_1_modules_output_modules_dense_parameters_bias_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_11_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_1_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_11_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_1_modules_output_modules_LayerNorm_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_11_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [4096], - "L_self_modules_encoder_modules_layer_modules_11_modules_intermediate_modules_dense_parameters_bias_", + [1024], + "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [4096, 1024], - "L_self_modules_encoder_modules_layer_modules_11_modules_intermediate_modules_dense_parameters_weight_", + [1024], + "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_11_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [1024], - "L_self_modules_encoder_modules_layer_modules_11_modules_output_modules_LayerNorm_parameters_weight_", + [4096, 1024], + "L_self_modules_encoder_modules_layer_modules_2_modules_intermediate_modules_dense_parameters_weight_", ), ( - [1024], - "L_self_modules_encoder_modules_layer_modules_11_modules_output_modules_dense_parameters_bias_", + [4096], + "L_self_modules_encoder_modules_layer_modules_2_modules_intermediate_modules_dense_parameters_bias_", ), ( [1024, 4096], - "L_self_modules_encoder_modules_layer_modules_11_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_2_modules_output_modules_dense_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_12_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_2_modules_output_modules_dense_parameters_bias_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_12_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_2_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_12_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_2_modules_output_modules_LayerNorm_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_12_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_12_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_12_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_12_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_12_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_12_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_12_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [4096], - "L_self_modules_encoder_modules_layer_modules_12_modules_intermediate_modules_dense_parameters_bias_", + [1024], + "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [4096, 1024], - "L_self_modules_encoder_modules_layer_modules_12_modules_intermediate_modules_dense_parameters_weight_", + [1024], + "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_12_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [1024], - "L_self_modules_encoder_modules_layer_modules_12_modules_output_modules_LayerNorm_parameters_weight_", + [4096, 1024], + "L_self_modules_encoder_modules_layer_modules_3_modules_intermediate_modules_dense_parameters_weight_", ), ( - [1024], - "L_self_modules_encoder_modules_layer_modules_12_modules_output_modules_dense_parameters_bias_", + [4096], + "L_self_modules_encoder_modules_layer_modules_3_modules_intermediate_modules_dense_parameters_bias_", ), ( [1024, 4096], - "L_self_modules_encoder_modules_layer_modules_12_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_3_modules_output_modules_dense_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_13_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_3_modules_output_modules_dense_parameters_bias_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_13_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_3_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_13_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_3_modules_output_modules_LayerNorm_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_13_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_13_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_13_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_13_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_13_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_13_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_13_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [4096], - "L_self_modules_encoder_modules_layer_modules_13_modules_intermediate_modules_dense_parameters_bias_", + [1024], + "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [4096, 1024], - "L_self_modules_encoder_modules_layer_modules_13_modules_intermediate_modules_dense_parameters_weight_", + [1024], + "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_13_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [1024], - "L_self_modules_encoder_modules_layer_modules_13_modules_output_modules_LayerNorm_parameters_weight_", + [4096, 1024], + "L_self_modules_encoder_modules_layer_modules_4_modules_intermediate_modules_dense_parameters_weight_", ), ( - [1024], - "L_self_modules_encoder_modules_layer_modules_13_modules_output_modules_dense_parameters_bias_", + [4096], + "L_self_modules_encoder_modules_layer_modules_4_modules_intermediate_modules_dense_parameters_bias_", ), ( [1024, 4096], - "L_self_modules_encoder_modules_layer_modules_13_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_4_modules_output_modules_dense_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_14_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_4_modules_output_modules_dense_parameters_bias_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_14_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_4_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_14_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_4_modules_output_modules_LayerNorm_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_14_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_14_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_14_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_14_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_14_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_14_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_14_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [4096], - "L_self_modules_encoder_modules_layer_modules_14_modules_intermediate_modules_dense_parameters_bias_", + [1024], + "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [4096, 1024], - "L_self_modules_encoder_modules_layer_modules_14_modules_intermediate_modules_dense_parameters_weight_", + [1024], + "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_14_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [1024], - "L_self_modules_encoder_modules_layer_modules_14_modules_output_modules_LayerNorm_parameters_weight_", + [4096, 1024], + "L_self_modules_encoder_modules_layer_modules_5_modules_intermediate_modules_dense_parameters_weight_", ), ( - [1024], - "L_self_modules_encoder_modules_layer_modules_14_modules_output_modules_dense_parameters_bias_", + [4096], + "L_self_modules_encoder_modules_layer_modules_5_modules_intermediate_modules_dense_parameters_bias_", ), ( [1024, 4096], - "L_self_modules_encoder_modules_layer_modules_14_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_5_modules_output_modules_dense_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_15_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_5_modules_output_modules_dense_parameters_bias_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_15_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_5_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_15_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_5_modules_output_modules_LayerNorm_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_15_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_15_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_15_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_15_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_15_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_15_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_15_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_6_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [4096], - "L_self_modules_encoder_modules_layer_modules_15_modules_intermediate_modules_dense_parameters_bias_", + [1024], + "L_self_modules_encoder_modules_layer_modules_6_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [4096, 1024], - "L_self_modules_encoder_modules_layer_modules_15_modules_intermediate_modules_dense_parameters_weight_", + [1024], + "L_self_modules_encoder_modules_layer_modules_6_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_15_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_6_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [1024], - "L_self_modules_encoder_modules_layer_modules_15_modules_output_modules_LayerNorm_parameters_weight_", + [4096, 1024], + "L_self_modules_encoder_modules_layer_modules_6_modules_intermediate_modules_dense_parameters_weight_", ), ( - [1024], - "L_self_modules_encoder_modules_layer_modules_15_modules_output_modules_dense_parameters_bias_", + [4096], + "L_self_modules_encoder_modules_layer_modules_6_modules_intermediate_modules_dense_parameters_bias_", ), ( [1024, 4096], - "L_self_modules_encoder_modules_layer_modules_15_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_6_modules_output_modules_dense_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_16_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_6_modules_output_modules_dense_parameters_bias_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_16_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_6_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_16_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_6_modules_output_modules_LayerNorm_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_16_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_16_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_16_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_16_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_16_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_16_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_16_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_7_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [4096], - "L_self_modules_encoder_modules_layer_modules_16_modules_intermediate_modules_dense_parameters_bias_", + [1024], + "L_self_modules_encoder_modules_layer_modules_7_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [4096, 1024], - "L_self_modules_encoder_modules_layer_modules_16_modules_intermediate_modules_dense_parameters_weight_", + [1024], + "L_self_modules_encoder_modules_layer_modules_7_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_16_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_7_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [1024], - "L_self_modules_encoder_modules_layer_modules_16_modules_output_modules_LayerNorm_parameters_weight_", + [4096, 1024], + "L_self_modules_encoder_modules_layer_modules_7_modules_intermediate_modules_dense_parameters_weight_", ), ( - [1024], - "L_self_modules_encoder_modules_layer_modules_16_modules_output_modules_dense_parameters_bias_", + [4096], + "L_self_modules_encoder_modules_layer_modules_7_modules_intermediate_modules_dense_parameters_bias_", ), ( [1024, 4096], - "L_self_modules_encoder_modules_layer_modules_16_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_7_modules_output_modules_dense_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_17_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_7_modules_output_modules_dense_parameters_bias_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_17_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_7_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_17_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_7_modules_output_modules_LayerNorm_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_17_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_17_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_17_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_17_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_17_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_17_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_17_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_8_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [4096], - "L_self_modules_encoder_modules_layer_modules_17_modules_intermediate_modules_dense_parameters_bias_", + [1024], + "L_self_modules_encoder_modules_layer_modules_8_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [4096, 1024], - "L_self_modules_encoder_modules_layer_modules_17_modules_intermediate_modules_dense_parameters_weight_", + [1024], + "L_self_modules_encoder_modules_layer_modules_8_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_17_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_8_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [1024], - "L_self_modules_encoder_modules_layer_modules_17_modules_output_modules_LayerNorm_parameters_weight_", + [4096, 1024], + "L_self_modules_encoder_modules_layer_modules_8_modules_intermediate_modules_dense_parameters_weight_", ), ( - [1024], - "L_self_modules_encoder_modules_layer_modules_17_modules_output_modules_dense_parameters_bias_", + [4096], + "L_self_modules_encoder_modules_layer_modules_8_modules_intermediate_modules_dense_parameters_bias_", ), ( [1024, 4096], - "L_self_modules_encoder_modules_layer_modules_17_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_8_modules_output_modules_dense_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_18_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_8_modules_output_modules_dense_parameters_bias_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_18_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_8_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_18_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_8_modules_output_modules_LayerNorm_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_18_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_18_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_18_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_18_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_18_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_18_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_18_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_9_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [4096], - "L_self_modules_encoder_modules_layer_modules_18_modules_intermediate_modules_dense_parameters_bias_", + [1024], + "L_self_modules_encoder_modules_layer_modules_9_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [4096, 1024], - "L_self_modules_encoder_modules_layer_modules_18_modules_intermediate_modules_dense_parameters_weight_", + [1024], + "L_self_modules_encoder_modules_layer_modules_9_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_18_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_9_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [1024], - "L_self_modules_encoder_modules_layer_modules_18_modules_output_modules_LayerNorm_parameters_weight_", + [4096, 1024], + "L_self_modules_encoder_modules_layer_modules_9_modules_intermediate_modules_dense_parameters_weight_", ), ( - [1024], - "L_self_modules_encoder_modules_layer_modules_18_modules_output_modules_dense_parameters_bias_", + [4096], + "L_self_modules_encoder_modules_layer_modules_9_modules_intermediate_modules_dense_parameters_bias_", ), ( [1024, 4096], - "L_self_modules_encoder_modules_layer_modules_18_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_9_modules_output_modules_dense_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_19_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_9_modules_output_modules_dense_parameters_bias_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_19_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_9_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_19_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_9_modules_output_modules_LayerNorm_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_19_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_19_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_19_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_19_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_19_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_19_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_19_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_10_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [4096], - "L_self_modules_encoder_modules_layer_modules_19_modules_intermediate_modules_dense_parameters_bias_", + [1024], + "L_self_modules_encoder_modules_layer_modules_10_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [4096, 1024], - "L_self_modules_encoder_modules_layer_modules_19_modules_intermediate_modules_dense_parameters_weight_", + [1024], + "L_self_modules_encoder_modules_layer_modules_10_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_19_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_10_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [1024], - "L_self_modules_encoder_modules_layer_modules_19_modules_output_modules_LayerNorm_parameters_weight_", + [4096, 1024], + "L_self_modules_encoder_modules_layer_modules_10_modules_intermediate_modules_dense_parameters_weight_", ), ( - [1024], - "L_self_modules_encoder_modules_layer_modules_19_modules_output_modules_dense_parameters_bias_", + [4096], + "L_self_modules_encoder_modules_layer_modules_10_modules_intermediate_modules_dense_parameters_bias_", ), ( [1024, 4096], - "L_self_modules_encoder_modules_layer_modules_19_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_10_modules_output_modules_dense_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_10_modules_output_modules_dense_parameters_bias_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_10_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_10_modules_output_modules_LayerNorm_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_11_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [4096], - "L_self_modules_encoder_modules_layer_modules_1_modules_intermediate_modules_dense_parameters_bias_", + [1024], + "L_self_modules_encoder_modules_layer_modules_11_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [4096, 1024], - "L_self_modules_encoder_modules_layer_modules_1_modules_intermediate_modules_dense_parameters_weight_", + [1024], + "L_self_modules_encoder_modules_layer_modules_11_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_1_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_11_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [1024], - "L_self_modules_encoder_modules_layer_modules_1_modules_output_modules_LayerNorm_parameters_weight_", + [4096, 1024], + "L_self_modules_encoder_modules_layer_modules_11_modules_intermediate_modules_dense_parameters_weight_", ), ( - [1024], - "L_self_modules_encoder_modules_layer_modules_1_modules_output_modules_dense_parameters_bias_", + [4096], + "L_self_modules_encoder_modules_layer_modules_11_modules_intermediate_modules_dense_parameters_bias_", ), ( [1024, 4096], - "L_self_modules_encoder_modules_layer_modules_1_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_11_modules_output_modules_dense_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_20_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_11_modules_output_modules_dense_parameters_bias_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_20_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_11_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_20_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_11_modules_output_modules_LayerNorm_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_20_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_12_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_20_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_12_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_20_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_12_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_20_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_12_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_20_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_12_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_20_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_12_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_20_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_12_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [4096], - "L_self_modules_encoder_modules_layer_modules_20_modules_intermediate_modules_dense_parameters_bias_", + [1024], + "L_self_modules_encoder_modules_layer_modules_12_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [4096, 1024], - "L_self_modules_encoder_modules_layer_modules_20_modules_intermediate_modules_dense_parameters_weight_", + [1024], + "L_self_modules_encoder_modules_layer_modules_12_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_20_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_12_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [1024], - "L_self_modules_encoder_modules_layer_modules_20_modules_output_modules_LayerNorm_parameters_weight_", + [4096, 1024], + "L_self_modules_encoder_modules_layer_modules_12_modules_intermediate_modules_dense_parameters_weight_", ), ( - [1024], - "L_self_modules_encoder_modules_layer_modules_20_modules_output_modules_dense_parameters_bias_", + [4096], + "L_self_modules_encoder_modules_layer_modules_12_modules_intermediate_modules_dense_parameters_bias_", ), ( [1024, 4096], - "L_self_modules_encoder_modules_layer_modules_20_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_12_modules_output_modules_dense_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_21_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_12_modules_output_modules_dense_parameters_bias_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_21_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_12_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_21_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_12_modules_output_modules_LayerNorm_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_21_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_13_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_21_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_13_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_21_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_13_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_21_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_13_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_21_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_13_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_21_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_13_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_21_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_13_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [4096], - "L_self_modules_encoder_modules_layer_modules_21_modules_intermediate_modules_dense_parameters_bias_", + [1024], + "L_self_modules_encoder_modules_layer_modules_13_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [4096, 1024], - "L_self_modules_encoder_modules_layer_modules_21_modules_intermediate_modules_dense_parameters_weight_", + [1024], + "L_self_modules_encoder_modules_layer_modules_13_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_21_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_13_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [1024], - "L_self_modules_encoder_modules_layer_modules_21_modules_output_modules_LayerNorm_parameters_weight_", + [4096, 1024], + "L_self_modules_encoder_modules_layer_modules_13_modules_intermediate_modules_dense_parameters_weight_", ), ( - [1024], - "L_self_modules_encoder_modules_layer_modules_21_modules_output_modules_dense_parameters_bias_", + [4096], + "L_self_modules_encoder_modules_layer_modules_13_modules_intermediate_modules_dense_parameters_bias_", ), ( [1024, 4096], - "L_self_modules_encoder_modules_layer_modules_21_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_13_modules_output_modules_dense_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_22_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_13_modules_output_modules_dense_parameters_bias_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_22_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_13_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_22_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_13_modules_output_modules_LayerNorm_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_22_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_14_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_22_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_14_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_22_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_14_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_22_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_14_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_22_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_14_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_22_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_14_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_22_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_14_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [4096], - "L_self_modules_encoder_modules_layer_modules_22_modules_intermediate_modules_dense_parameters_bias_", + [1024], + "L_self_modules_encoder_modules_layer_modules_14_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [4096, 1024], - "L_self_modules_encoder_modules_layer_modules_22_modules_intermediate_modules_dense_parameters_weight_", + [1024], + "L_self_modules_encoder_modules_layer_modules_14_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_22_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_14_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [1024], - "L_self_modules_encoder_modules_layer_modules_22_modules_output_modules_LayerNorm_parameters_weight_", + [4096, 1024], + "L_self_modules_encoder_modules_layer_modules_14_modules_intermediate_modules_dense_parameters_weight_", ), ( - [1024], - "L_self_modules_encoder_modules_layer_modules_22_modules_output_modules_dense_parameters_bias_", + [4096], + "L_self_modules_encoder_modules_layer_modules_14_modules_intermediate_modules_dense_parameters_bias_", ), ( [1024, 4096], - "L_self_modules_encoder_modules_layer_modules_22_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_14_modules_output_modules_dense_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_23_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_14_modules_output_modules_dense_parameters_bias_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_23_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_14_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_23_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_14_modules_output_modules_LayerNorm_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_23_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_15_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_23_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_15_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_23_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_15_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_23_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_15_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_23_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_15_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_23_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_15_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_23_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_15_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [4096], - "L_self_modules_encoder_modules_layer_modules_23_modules_intermediate_modules_dense_parameters_bias_", + [1024], + "L_self_modules_encoder_modules_layer_modules_15_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [4096, 1024], - "L_self_modules_encoder_modules_layer_modules_23_modules_intermediate_modules_dense_parameters_weight_", + [1024], + "L_self_modules_encoder_modules_layer_modules_15_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_23_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_15_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [1024], - "L_self_modules_encoder_modules_layer_modules_23_modules_output_modules_LayerNorm_parameters_weight_", + [4096, 1024], + "L_self_modules_encoder_modules_layer_modules_15_modules_intermediate_modules_dense_parameters_weight_", ), ( - [1024], - "L_self_modules_encoder_modules_layer_modules_23_modules_output_modules_dense_parameters_bias_", + [4096], + "L_self_modules_encoder_modules_layer_modules_15_modules_intermediate_modules_dense_parameters_bias_", ), ( [1024, 4096], - "L_self_modules_encoder_modules_layer_modules_23_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_15_modules_output_modules_dense_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_15_modules_output_modules_dense_parameters_bias_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_15_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_15_modules_output_modules_LayerNorm_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_16_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_16_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_16_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_16_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_16_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_16_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_16_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [4096], - "L_self_modules_encoder_modules_layer_modules_2_modules_intermediate_modules_dense_parameters_bias_", + [1024], + "L_self_modules_encoder_modules_layer_modules_16_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [4096, 1024], - "L_self_modules_encoder_modules_layer_modules_2_modules_intermediate_modules_dense_parameters_weight_", + [1024], + "L_self_modules_encoder_modules_layer_modules_16_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_2_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_16_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [1024], - "L_self_modules_encoder_modules_layer_modules_2_modules_output_modules_LayerNorm_parameters_weight_", + [4096, 1024], + "L_self_modules_encoder_modules_layer_modules_16_modules_intermediate_modules_dense_parameters_weight_", ), ( - [1024], - "L_self_modules_encoder_modules_layer_modules_2_modules_output_modules_dense_parameters_bias_", + [4096], + "L_self_modules_encoder_modules_layer_modules_16_modules_intermediate_modules_dense_parameters_bias_", ), ( [1024, 4096], - "L_self_modules_encoder_modules_layer_modules_2_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_16_modules_output_modules_dense_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_16_modules_output_modules_dense_parameters_bias_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_16_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_16_modules_output_modules_LayerNorm_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_17_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_17_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_17_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_17_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_17_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_17_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_17_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [4096], - "L_self_modules_encoder_modules_layer_modules_3_modules_intermediate_modules_dense_parameters_bias_", + [1024], + "L_self_modules_encoder_modules_layer_modules_17_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [4096, 1024], - "L_self_modules_encoder_modules_layer_modules_3_modules_intermediate_modules_dense_parameters_weight_", + [1024], + "L_self_modules_encoder_modules_layer_modules_17_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_3_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_17_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [1024], - "L_self_modules_encoder_modules_layer_modules_3_modules_output_modules_LayerNorm_parameters_weight_", + [4096, 1024], + "L_self_modules_encoder_modules_layer_modules_17_modules_intermediate_modules_dense_parameters_weight_", ), ( - [1024], - "L_self_modules_encoder_modules_layer_modules_3_modules_output_modules_dense_parameters_bias_", + [4096], + "L_self_modules_encoder_modules_layer_modules_17_modules_intermediate_modules_dense_parameters_bias_", ), ( [1024, 4096], - "L_self_modules_encoder_modules_layer_modules_3_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_17_modules_output_modules_dense_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_17_modules_output_modules_dense_parameters_bias_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_17_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_17_modules_output_modules_LayerNorm_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_18_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_18_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_18_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_18_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_18_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_18_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_18_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [4096], - "L_self_modules_encoder_modules_layer_modules_4_modules_intermediate_modules_dense_parameters_bias_", + [1024], + "L_self_modules_encoder_modules_layer_modules_18_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [4096, 1024], - "L_self_modules_encoder_modules_layer_modules_4_modules_intermediate_modules_dense_parameters_weight_", + [1024], + "L_self_modules_encoder_modules_layer_modules_18_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_4_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_18_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [1024], - "L_self_modules_encoder_modules_layer_modules_4_modules_output_modules_LayerNorm_parameters_weight_", + [4096, 1024], + "L_self_modules_encoder_modules_layer_modules_18_modules_intermediate_modules_dense_parameters_weight_", ), ( - [1024], - "L_self_modules_encoder_modules_layer_modules_4_modules_output_modules_dense_parameters_bias_", + [4096], + "L_self_modules_encoder_modules_layer_modules_18_modules_intermediate_modules_dense_parameters_bias_", ), ( [1024, 4096], - "L_self_modules_encoder_modules_layer_modules_4_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_18_modules_output_modules_dense_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_18_modules_output_modules_dense_parameters_bias_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_18_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_18_modules_output_modules_LayerNorm_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_19_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_19_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_19_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_19_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_19_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_19_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_19_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [4096], - "L_self_modules_encoder_modules_layer_modules_5_modules_intermediate_modules_dense_parameters_bias_", + [1024], + "L_self_modules_encoder_modules_layer_modules_19_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [4096, 1024], - "L_self_modules_encoder_modules_layer_modules_5_modules_intermediate_modules_dense_parameters_weight_", + [1024], + "L_self_modules_encoder_modules_layer_modules_19_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_5_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_19_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [1024], - "L_self_modules_encoder_modules_layer_modules_5_modules_output_modules_LayerNorm_parameters_weight_", + [4096, 1024], + "L_self_modules_encoder_modules_layer_modules_19_modules_intermediate_modules_dense_parameters_weight_", ), ( - [1024], - "L_self_modules_encoder_modules_layer_modules_5_modules_output_modules_dense_parameters_bias_", + [4096], + "L_self_modules_encoder_modules_layer_modules_19_modules_intermediate_modules_dense_parameters_bias_", ), ( [1024, 4096], - "L_self_modules_encoder_modules_layer_modules_5_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_19_modules_output_modules_dense_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_6_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_19_modules_output_modules_dense_parameters_bias_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_6_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_19_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_6_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_19_modules_output_modules_LayerNorm_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_6_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_20_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_20_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_20_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_20_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_20_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_20_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_20_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [4096], - "L_self_modules_encoder_modules_layer_modules_6_modules_intermediate_modules_dense_parameters_bias_", + [1024], + "L_self_modules_encoder_modules_layer_modules_20_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [4096, 1024], - "L_self_modules_encoder_modules_layer_modules_6_modules_intermediate_modules_dense_parameters_weight_", + [1024], + "L_self_modules_encoder_modules_layer_modules_20_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_6_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_20_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [1024], - "L_self_modules_encoder_modules_layer_modules_6_modules_output_modules_LayerNorm_parameters_weight_", + [4096, 1024], + "L_self_modules_encoder_modules_layer_modules_20_modules_intermediate_modules_dense_parameters_weight_", ), ( - [1024], - "L_self_modules_encoder_modules_layer_modules_6_modules_output_modules_dense_parameters_bias_", + [4096], + "L_self_modules_encoder_modules_layer_modules_20_modules_intermediate_modules_dense_parameters_bias_", ), ( [1024, 4096], - "L_self_modules_encoder_modules_layer_modules_6_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_20_modules_output_modules_dense_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_7_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_20_modules_output_modules_dense_parameters_bias_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_7_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_20_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_7_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_20_modules_output_modules_LayerNorm_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_7_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_21_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_21_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_21_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_21_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_21_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_21_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_21_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [4096], - "L_self_modules_encoder_modules_layer_modules_7_modules_intermediate_modules_dense_parameters_bias_", + [1024], + "L_self_modules_encoder_modules_layer_modules_21_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [4096, 1024], - "L_self_modules_encoder_modules_layer_modules_7_modules_intermediate_modules_dense_parameters_weight_", + [1024], + "L_self_modules_encoder_modules_layer_modules_21_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_7_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_21_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [1024], - "L_self_modules_encoder_modules_layer_modules_7_modules_output_modules_LayerNorm_parameters_weight_", + [4096, 1024], + "L_self_modules_encoder_modules_layer_modules_21_modules_intermediate_modules_dense_parameters_weight_", ), ( - [1024], - "L_self_modules_encoder_modules_layer_modules_7_modules_output_modules_dense_parameters_bias_", + [4096], + "L_self_modules_encoder_modules_layer_modules_21_modules_intermediate_modules_dense_parameters_bias_", ), ( [1024, 4096], - "L_self_modules_encoder_modules_layer_modules_7_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_21_modules_output_modules_dense_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_8_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_21_modules_output_modules_dense_parameters_bias_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_8_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_21_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_8_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_21_modules_output_modules_LayerNorm_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_8_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_22_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_22_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_22_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_22_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_22_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_22_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_22_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [4096], - "L_self_modules_encoder_modules_layer_modules_8_modules_intermediate_modules_dense_parameters_bias_", + [1024], + "L_self_modules_encoder_modules_layer_modules_22_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [4096, 1024], - "L_self_modules_encoder_modules_layer_modules_8_modules_intermediate_modules_dense_parameters_weight_", + [1024], + "L_self_modules_encoder_modules_layer_modules_22_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_8_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_22_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [1024], - "L_self_modules_encoder_modules_layer_modules_8_modules_output_modules_LayerNorm_parameters_weight_", + [4096, 1024], + "L_self_modules_encoder_modules_layer_modules_22_modules_intermediate_modules_dense_parameters_weight_", ), ( - [1024], - "L_self_modules_encoder_modules_layer_modules_8_modules_output_modules_dense_parameters_bias_", + [4096], + "L_self_modules_encoder_modules_layer_modules_22_modules_intermediate_modules_dense_parameters_bias_", ), ( [1024, 4096], - "L_self_modules_encoder_modules_layer_modules_8_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_22_modules_output_modules_dense_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_9_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_22_modules_output_modules_dense_parameters_bias_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_9_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_22_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_9_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_22_modules_output_modules_LayerNorm_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_9_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_23_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_23_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_23_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_23_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_23_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_23_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [1024, 1024], - "L_self_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_23_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [4096], - "L_self_modules_encoder_modules_layer_modules_9_modules_intermediate_modules_dense_parameters_bias_", + [1024], + "L_self_modules_encoder_modules_layer_modules_23_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [4096, 1024], - "L_self_modules_encoder_modules_layer_modules_9_modules_intermediate_modules_dense_parameters_weight_", + [1024], + "L_self_modules_encoder_modules_layer_modules_23_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_9_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_23_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + ), + ( + [4096, 1024], + "L_self_modules_encoder_modules_layer_modules_23_modules_intermediate_modules_dense_parameters_weight_", + ), + ( + [4096], + "L_self_modules_encoder_modules_layer_modules_23_modules_intermediate_modules_dense_parameters_bias_", + ), + ( + [1024, 4096], + "L_self_modules_encoder_modules_layer_modules_23_modules_output_modules_dense_parameters_weight_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_9_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_23_modules_output_modules_dense_parameters_bias_", ), ( [1024], - "L_self_modules_encoder_modules_layer_modules_9_modules_output_modules_dense_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_23_modules_output_modules_LayerNorm_parameters_weight_", ), ( - [1024, 4096], - "L_self_modules_encoder_modules_layer_modules_9_modules_output_modules_dense_parameters_weight_", + [1024], + "L_self_modules_encoder_modules_layer_modules_23_modules_output_modules_LayerNorm_parameters_bias_", ), - ([1024], "L_self_modules_pooler_modules_dense_parameters_bias_"), ([1024, 1024], "L_self_modules_pooler_modules_dense_parameters_weight_"), + ([1024], "L_self_modules_pooler_modules_dense_parameters_bias_"), ] diff --git a/samples/transformers-auto-model/sentence-transformers/msmarco-roberta-base-ance-firstp/graph_net.json b/samples/transformers-auto-model/sentence-transformers/msmarco-roberta-base-ance-firstp/graph_net.json index d21b34496..abca21e0b 100644 --- a/samples/transformers-auto-model/sentence-transformers/msmarco-roberta-base-ance-firstp/graph_net.json +++ b/samples/transformers-auto-model/sentence-transformers/msmarco-roberta-base-ance-firstp/graph_net.json @@ -1 +1,39 @@ -{"framework": "torch", "num_devices_required": 1, "num_nodes_required": 1, "dynamic": false, "model_name": "sentence-transformers/msmarco-roberta-base-ance-firstp", "source": "huggingface_hub", "original_tag": ["sentence-transformers", "pytorch", "tf", "onnx", "safetensors", "openvino", "roberta", "feature-extraction", "sentence-similarity", "license:apache-2.0", "autotrain_compatible", "text-embeddings-inference", "endpoints_compatible", "region:us"], "heuristic_tag": "other", "dimension_generalization_passes": ["naive_call_method_view_pass", "tuple_arg_call_method_view_pass", "naive_call_method_reshape_pass", "naive_call_method_expand_pass", "non_batch_call_method_expand_pass", "non_batch_call_function_arange_pass", "non_batch_call_function_getitem_slice_pass", "non_batch_call_function_full_pass", "non_batch_call_function_full_plus_one_pass", "non_batch_call_function_zeros_pass", "non_batch_call_function_arange_plus_one_pass"]} \ No newline at end of file +{ + "framework": "torch", + "num_devices_required": 1, + "num_nodes_required": 1, + "dynamic": false, + "model_name": "sentence-transformers/msmarco-roberta-base-ance-firstp", + "source": "huggingface_hub", + "original_tag": [ + "sentence-transformers", + "pytorch", + "tf", + "onnx", + "safetensors", + "openvino", + "roberta", + "feature-extraction", + "sentence-similarity", + "license:apache-2.0", + "autotrain_compatible", + "text-embeddings-inference", + "endpoints_compatible", + "region:us" + ], + "heuristic_tag": "other", + "dimension_generalization_passes": [ + "batch_call_method_view_pass", + "tuple_arg_call_method_view_pass", + "naive_call_method_reshape_pass", + "naive_call_method_expand_pass", + "non_batch_call_method_expand_pass", + "non_batch_call_function_arange_pass", + "non_batch_call_function_getitem_slice_pass", + "non_batch_call_function_full_pass", + "non_batch_call_function_full_plus_one_pass", + "non_batch_call_function_zeros_pass", + "non_batch_call_function_arange_plus_one_pass" + ], + "symbolic_dimension_reifier": "naive_nlp_sym_dim_reifier" +} \ No newline at end of file diff --git a/samples/transformers-auto-model/sentence-transformers/msmarco-roberta-base-ance-firstp/input_tensor_constraints.py b/samples/transformers-auto-model/sentence-transformers/msmarco-roberta-base-ance-firstp/input_tensor_constraints.py index 967de07ea..1f0a4a75b 100644 --- a/samples/transformers-auto-model/sentence-transformers/msmarco-roberta-base-ance-firstp/input_tensor_constraints.py +++ b/samples/transformers-auto-model/sentence-transformers/msmarco-roberta-base-ance-firstp/input_tensor_constraints.py @@ -1,50 +1,39 @@ -from sympy import Symbol +from sympy import Symbol, Expr, Rel, Eq S0 = Symbol("S0") +S1 = Symbol("S1") -dynamic_dim_constraint_symbols = [S0] +dynamic_dim_constraint_symbols = [S0, S1] -dynamic_dim_constraint_symbol2example_value = {S0: 7} +dynamic_dim_constraint_symbol2example_value = {S0: 2, S1: 7} dynamic_dim_constraint_relations = [] dynamic_dim_constraint_input_shapes = [ - ([2, S0], "L_attention_mask_"), - ([2, S0], "L_input_ids_"), + ([S0, S1], "L_input_ids_"), ([1, 514], "L_self_modules_embeddings_buffers_token_type_ids_"), - ([768], "L_self_modules_embeddings_modules_LayerNorm_parameters_bias_"), - ([768], "L_self_modules_embeddings_modules_LayerNorm_parameters_weight_"), - ( - [514, 768], - "L_self_modules_embeddings_modules_position_embeddings_parameters_weight_", - ), - ( - [1, 768], - "L_self_modules_embeddings_modules_token_type_embeddings_parameters_weight_", - ), ( [50265, 768], "L_self_modules_embeddings_modules_word_embeddings_parameters_weight_", ), ( - [768], - "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", - ), - ( - [768], - "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + [1, 768], + "L_self_modules_embeddings_modules_token_type_embeddings_parameters_weight_", ), ( - [768], - "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_dense_parameters_bias_", + [514, 768], + "L_self_modules_embeddings_modules_position_embeddings_parameters_weight_", ), + ([768], "L_self_modules_embeddings_modules_LayerNorm_parameters_weight_"), + ([768], "L_self_modules_embeddings_modules_LayerNorm_parameters_bias_"), + ([S0, S1], "L_attention_mask_"), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [768, 768], @@ -52,11 +41,11 @@ ), ( [768], - "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [768], @@ -64,27 +53,27 @@ ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [3072], - "L_self_modules_encoder_modules_layer_modules_0_modules_intermediate_modules_dense_parameters_bias_", + [768], + "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [3072, 768], - "L_self_modules_encoder_modules_layer_modules_0_modules_intermediate_modules_dense_parameters_weight_", + [768], + "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_0_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [768], - "L_self_modules_encoder_modules_layer_modules_0_modules_output_modules_LayerNorm_parameters_weight_", + [3072, 768], + "L_self_modules_encoder_modules_layer_modules_0_modules_intermediate_modules_dense_parameters_weight_", ), ( - [768], - "L_self_modules_encoder_modules_layer_modules_0_modules_output_modules_dense_parameters_bias_", + [3072], + "L_self_modules_encoder_modules_layer_modules_0_modules_intermediate_modules_dense_parameters_bias_", ), ( [768, 3072], @@ -92,708 +81,720 @@ ), ( [768], - "L_self_modules_encoder_modules_layer_modules_10_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_0_modules_output_modules_dense_parameters_bias_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_10_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_0_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_10_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_0_modules_output_modules_LayerNorm_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_10_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [3072], - "L_self_modules_encoder_modules_layer_modules_10_modules_intermediate_modules_dense_parameters_bias_", + [768], + "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [3072, 768], - "L_self_modules_encoder_modules_layer_modules_10_modules_intermediate_modules_dense_parameters_weight_", + [768], + "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_10_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [768], - "L_self_modules_encoder_modules_layer_modules_10_modules_output_modules_LayerNorm_parameters_weight_", + [3072, 768], + "L_self_modules_encoder_modules_layer_modules_1_modules_intermediate_modules_dense_parameters_weight_", ), ( - [768], - "L_self_modules_encoder_modules_layer_modules_10_modules_output_modules_dense_parameters_bias_", + [3072], + "L_self_modules_encoder_modules_layer_modules_1_modules_intermediate_modules_dense_parameters_bias_", ), ( [768, 3072], - "L_self_modules_encoder_modules_layer_modules_10_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_1_modules_output_modules_dense_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_11_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_1_modules_output_modules_dense_parameters_bias_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_11_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_1_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_11_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_1_modules_output_modules_LayerNorm_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_11_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [3072], - "L_self_modules_encoder_modules_layer_modules_11_modules_intermediate_modules_dense_parameters_bias_", + [768], + "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [3072, 768], - "L_self_modules_encoder_modules_layer_modules_11_modules_intermediate_modules_dense_parameters_weight_", + [768], + "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_11_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [768], - "L_self_modules_encoder_modules_layer_modules_11_modules_output_modules_LayerNorm_parameters_weight_", + [3072, 768], + "L_self_modules_encoder_modules_layer_modules_2_modules_intermediate_modules_dense_parameters_weight_", ), ( - [768], - "L_self_modules_encoder_modules_layer_modules_11_modules_output_modules_dense_parameters_bias_", + [3072], + "L_self_modules_encoder_modules_layer_modules_2_modules_intermediate_modules_dense_parameters_bias_", ), ( [768, 3072], - "L_self_modules_encoder_modules_layer_modules_11_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_2_modules_output_modules_dense_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_2_modules_output_modules_dense_parameters_bias_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_2_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_2_modules_output_modules_LayerNorm_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [3072], - "L_self_modules_encoder_modules_layer_modules_1_modules_intermediate_modules_dense_parameters_bias_", + [768], + "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [3072, 768], - "L_self_modules_encoder_modules_layer_modules_1_modules_intermediate_modules_dense_parameters_weight_", + [768], + "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_1_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [768], - "L_self_modules_encoder_modules_layer_modules_1_modules_output_modules_LayerNorm_parameters_weight_", + [3072, 768], + "L_self_modules_encoder_modules_layer_modules_3_modules_intermediate_modules_dense_parameters_weight_", ), ( - [768], - "L_self_modules_encoder_modules_layer_modules_1_modules_output_modules_dense_parameters_bias_", + [3072], + "L_self_modules_encoder_modules_layer_modules_3_modules_intermediate_modules_dense_parameters_bias_", ), ( [768, 3072], - "L_self_modules_encoder_modules_layer_modules_1_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_3_modules_output_modules_dense_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_3_modules_output_modules_dense_parameters_bias_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_3_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_3_modules_output_modules_LayerNorm_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [3072], - "L_self_modules_encoder_modules_layer_modules_2_modules_intermediate_modules_dense_parameters_bias_", + [768], + "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [3072, 768], - "L_self_modules_encoder_modules_layer_modules_2_modules_intermediate_modules_dense_parameters_weight_", + [768], + "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_2_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [768], - "L_self_modules_encoder_modules_layer_modules_2_modules_output_modules_LayerNorm_parameters_weight_", + [3072, 768], + "L_self_modules_encoder_modules_layer_modules_4_modules_intermediate_modules_dense_parameters_weight_", ), ( - [768], - "L_self_modules_encoder_modules_layer_modules_2_modules_output_modules_dense_parameters_bias_", + [3072], + "L_self_modules_encoder_modules_layer_modules_4_modules_intermediate_modules_dense_parameters_bias_", ), ( [768, 3072], - "L_self_modules_encoder_modules_layer_modules_2_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_4_modules_output_modules_dense_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_4_modules_output_modules_dense_parameters_bias_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_4_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_4_modules_output_modules_LayerNorm_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [3072], - "L_self_modules_encoder_modules_layer_modules_3_modules_intermediate_modules_dense_parameters_bias_", + [768], + "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [3072, 768], - "L_self_modules_encoder_modules_layer_modules_3_modules_intermediate_modules_dense_parameters_weight_", + [768], + "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_3_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [768], - "L_self_modules_encoder_modules_layer_modules_3_modules_output_modules_LayerNorm_parameters_weight_", + [3072, 768], + "L_self_modules_encoder_modules_layer_modules_5_modules_intermediate_modules_dense_parameters_weight_", ), ( - [768], - "L_self_modules_encoder_modules_layer_modules_3_modules_output_modules_dense_parameters_bias_", + [3072], + "L_self_modules_encoder_modules_layer_modules_5_modules_intermediate_modules_dense_parameters_bias_", ), ( [768, 3072], - "L_self_modules_encoder_modules_layer_modules_3_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_5_modules_output_modules_dense_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_5_modules_output_modules_dense_parameters_bias_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_5_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_5_modules_output_modules_LayerNorm_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_6_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [3072], - "L_self_modules_encoder_modules_layer_modules_4_modules_intermediate_modules_dense_parameters_bias_", + [768], + "L_self_modules_encoder_modules_layer_modules_6_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [3072, 768], - "L_self_modules_encoder_modules_layer_modules_4_modules_intermediate_modules_dense_parameters_weight_", + [768], + "L_self_modules_encoder_modules_layer_modules_6_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_4_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_6_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [768], - "L_self_modules_encoder_modules_layer_modules_4_modules_output_modules_LayerNorm_parameters_weight_", + [3072, 768], + "L_self_modules_encoder_modules_layer_modules_6_modules_intermediate_modules_dense_parameters_weight_", ), ( - [768], - "L_self_modules_encoder_modules_layer_modules_4_modules_output_modules_dense_parameters_bias_", + [3072], + "L_self_modules_encoder_modules_layer_modules_6_modules_intermediate_modules_dense_parameters_bias_", ), ( [768, 3072], - "L_self_modules_encoder_modules_layer_modules_4_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_6_modules_output_modules_dense_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_6_modules_output_modules_dense_parameters_bias_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_6_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_6_modules_output_modules_LayerNorm_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_7_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [3072], - "L_self_modules_encoder_modules_layer_modules_5_modules_intermediate_modules_dense_parameters_bias_", + [768], + "L_self_modules_encoder_modules_layer_modules_7_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [3072, 768], - "L_self_modules_encoder_modules_layer_modules_5_modules_intermediate_modules_dense_parameters_weight_", + [768], + "L_self_modules_encoder_modules_layer_modules_7_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_5_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_7_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [768], - "L_self_modules_encoder_modules_layer_modules_5_modules_output_modules_LayerNorm_parameters_weight_", + [3072, 768], + "L_self_modules_encoder_modules_layer_modules_7_modules_intermediate_modules_dense_parameters_weight_", ), ( - [768], - "L_self_modules_encoder_modules_layer_modules_5_modules_output_modules_dense_parameters_bias_", + [3072], + "L_self_modules_encoder_modules_layer_modules_7_modules_intermediate_modules_dense_parameters_bias_", ), ( [768, 3072], - "L_self_modules_encoder_modules_layer_modules_5_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_7_modules_output_modules_dense_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_6_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_7_modules_output_modules_dense_parameters_bias_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_6_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_7_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_6_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_7_modules_output_modules_LayerNorm_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_6_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_6_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_8_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [3072], - "L_self_modules_encoder_modules_layer_modules_6_modules_intermediate_modules_dense_parameters_bias_", + [768], + "L_self_modules_encoder_modules_layer_modules_8_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [3072, 768], - "L_self_modules_encoder_modules_layer_modules_6_modules_intermediate_modules_dense_parameters_weight_", + [768], + "L_self_modules_encoder_modules_layer_modules_8_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_6_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_8_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [768], - "L_self_modules_encoder_modules_layer_modules_6_modules_output_modules_LayerNorm_parameters_weight_", + [3072, 768], + "L_self_modules_encoder_modules_layer_modules_8_modules_intermediate_modules_dense_parameters_weight_", ), ( - [768], - "L_self_modules_encoder_modules_layer_modules_6_modules_output_modules_dense_parameters_bias_", + [3072], + "L_self_modules_encoder_modules_layer_modules_8_modules_intermediate_modules_dense_parameters_bias_", ), ( [768, 3072], - "L_self_modules_encoder_modules_layer_modules_6_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_8_modules_output_modules_dense_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_7_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_8_modules_output_modules_dense_parameters_bias_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_7_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_8_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_7_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_8_modules_output_modules_LayerNorm_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_7_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_7_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_9_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [3072], - "L_self_modules_encoder_modules_layer_modules_7_modules_intermediate_modules_dense_parameters_bias_", + [768], + "L_self_modules_encoder_modules_layer_modules_9_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [3072, 768], - "L_self_modules_encoder_modules_layer_modules_7_modules_intermediate_modules_dense_parameters_weight_", + [768], + "L_self_modules_encoder_modules_layer_modules_9_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_7_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_9_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [768], - "L_self_modules_encoder_modules_layer_modules_7_modules_output_modules_LayerNorm_parameters_weight_", + [3072, 768], + "L_self_modules_encoder_modules_layer_modules_9_modules_intermediate_modules_dense_parameters_weight_", ), ( - [768], - "L_self_modules_encoder_modules_layer_modules_7_modules_output_modules_dense_parameters_bias_", + [3072], + "L_self_modules_encoder_modules_layer_modules_9_modules_intermediate_modules_dense_parameters_bias_", ), ( [768, 3072], - "L_self_modules_encoder_modules_layer_modules_7_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_9_modules_output_modules_dense_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_8_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_9_modules_output_modules_dense_parameters_bias_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_8_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_9_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_8_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_9_modules_output_modules_LayerNorm_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_8_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_10_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_8_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_10_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [3072], - "L_self_modules_encoder_modules_layer_modules_8_modules_intermediate_modules_dense_parameters_bias_", + [768], + "L_self_modules_encoder_modules_layer_modules_10_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [3072, 768], - "L_self_modules_encoder_modules_layer_modules_8_modules_intermediate_modules_dense_parameters_weight_", + [768], + "L_self_modules_encoder_modules_layer_modules_10_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_8_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_10_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [768], - "L_self_modules_encoder_modules_layer_modules_8_modules_output_modules_LayerNorm_parameters_weight_", + [3072, 768], + "L_self_modules_encoder_modules_layer_modules_10_modules_intermediate_modules_dense_parameters_weight_", ), ( - [768], - "L_self_modules_encoder_modules_layer_modules_8_modules_output_modules_dense_parameters_bias_", + [3072], + "L_self_modules_encoder_modules_layer_modules_10_modules_intermediate_modules_dense_parameters_bias_", ), ( [768, 3072], - "L_self_modules_encoder_modules_layer_modules_8_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_10_modules_output_modules_dense_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_9_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_10_modules_output_modules_dense_parameters_bias_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_9_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_10_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_9_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_10_modules_output_modules_LayerNorm_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_9_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_key_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_key_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_value_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_11_modules_attention_modules_self_modules_value_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_9_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_11_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [3072], - "L_self_modules_encoder_modules_layer_modules_9_modules_intermediate_modules_dense_parameters_bias_", + [768], + "L_self_modules_encoder_modules_layer_modules_11_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [3072, 768], - "L_self_modules_encoder_modules_layer_modules_9_modules_intermediate_modules_dense_parameters_weight_", + [768], + "L_self_modules_encoder_modules_layer_modules_11_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_9_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_11_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + ), + ( + [3072, 768], + "L_self_modules_encoder_modules_layer_modules_11_modules_intermediate_modules_dense_parameters_weight_", + ), + ( + [3072], + "L_self_modules_encoder_modules_layer_modules_11_modules_intermediate_modules_dense_parameters_bias_", + ), + ( + [768, 3072], + "L_self_modules_encoder_modules_layer_modules_11_modules_output_modules_dense_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_9_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_11_modules_output_modules_dense_parameters_bias_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_9_modules_output_modules_dense_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_11_modules_output_modules_LayerNorm_parameters_weight_", ), ( - [768, 3072], - "L_self_modules_encoder_modules_layer_modules_9_modules_output_modules_dense_parameters_weight_", + [768], + "L_self_modules_encoder_modules_layer_modules_11_modules_output_modules_LayerNorm_parameters_bias_", ), - ([768], "L_self_modules_pooler_modules_dense_parameters_bias_"), ([768, 768], "L_self_modules_pooler_modules_dense_parameters_weight_"), + ([768], "L_self_modules_pooler_modules_dense_parameters_bias_"), ] diff --git a/samples/transformers-auto-model/sentence-transformers/paraphrase-distilroberta-base-v1/graph_net.json b/samples/transformers-auto-model/sentence-transformers/paraphrase-distilroberta-base-v1/graph_net.json index 014473beb..427cd6b3d 100644 --- a/samples/transformers-auto-model/sentence-transformers/paraphrase-distilroberta-base-v1/graph_net.json +++ b/samples/transformers-auto-model/sentence-transformers/paraphrase-distilroberta-base-v1/graph_net.json @@ -1 +1,42 @@ -{"framework": "torch", "num_devices_required": 1, "num_nodes_required": 1, "dynamic": false, "model_name": "sentence-transformers/paraphrase-distilroberta-base-v1", "source": "huggingface_hub", "original_tag": ["sentence-transformers", "pytorch", "tf", "jax", "onnx", "safetensors", "openvino", "roberta", "feature-extraction", "sentence-similarity", "transformers", "arxiv:1908.10084", "license:apache-2.0", "autotrain_compatible", "text-embeddings-inference", "endpoints_compatible", "region:us"], "heuristic_tag": "other", "dimension_generalization_passes": ["naive_call_method_view_pass", "tuple_arg_call_method_view_pass", "naive_call_method_reshape_pass", "naive_call_method_expand_pass", "non_batch_call_method_expand_pass", "non_batch_call_function_arange_pass", "non_batch_call_function_getitem_slice_pass", "non_batch_call_function_full_pass", "non_batch_call_function_full_plus_one_pass", "non_batch_call_function_zeros_pass", "non_batch_call_function_arange_plus_one_pass"]} \ No newline at end of file +{ + "framework": "torch", + "num_devices_required": 1, + "num_nodes_required": 1, + "dynamic": false, + "model_name": "sentence-transformers/paraphrase-distilroberta-base-v1", + "source": "huggingface_hub", + "original_tag": [ + "sentence-transformers", + "pytorch", + "tf", + "jax", + "onnx", + "safetensors", + "openvino", + "roberta", + "feature-extraction", + "sentence-similarity", + "transformers", + "arxiv:1908.10084", + "license:apache-2.0", + "autotrain_compatible", + "text-embeddings-inference", + "endpoints_compatible", + "region:us" + ], + "heuristic_tag": "other", + "dimension_generalization_passes": [ + "batch_call_method_view_pass", + "tuple_arg_call_method_view_pass", + "naive_call_method_reshape_pass", + "naive_call_method_expand_pass", + "non_batch_call_method_expand_pass", + "non_batch_call_function_arange_pass", + "non_batch_call_function_getitem_slice_pass", + "non_batch_call_function_full_pass", + "non_batch_call_function_full_plus_one_pass", + "non_batch_call_function_zeros_pass", + "non_batch_call_function_arange_plus_one_pass" + ], + "symbolic_dimension_reifier": "naive_nlp_sym_dim_reifier" +} \ No newline at end of file diff --git a/samples/transformers-auto-model/sentence-transformers/paraphrase-distilroberta-base-v1/input_tensor_constraints.py b/samples/transformers-auto-model/sentence-transformers/paraphrase-distilroberta-base-v1/input_tensor_constraints.py index c257a3b7e..6c7768ddb 100644 --- a/samples/transformers-auto-model/sentence-transformers/paraphrase-distilroberta-base-v1/input_tensor_constraints.py +++ b/samples/transformers-auto-model/sentence-transformers/paraphrase-distilroberta-base-v1/input_tensor_constraints.py @@ -1,50 +1,39 @@ -from sympy import Symbol +from sympy import Symbol, Expr, Rel, Eq S0 = Symbol("S0") +S1 = Symbol("S1") -dynamic_dim_constraint_symbols = [S0] +dynamic_dim_constraint_symbols = [S0, S1] -dynamic_dim_constraint_symbol2example_value = {S0: 7} +dynamic_dim_constraint_symbol2example_value = {S0: 2, S1: 7} dynamic_dim_constraint_relations = [] dynamic_dim_constraint_input_shapes = [ - ([2, S0], "L_attention_mask_"), - ([2, S0], "L_input_ids_"), + ([S0, S1], "L_input_ids_"), ([1, 514], "L_self_modules_embeddings_buffers_token_type_ids_"), - ([768], "L_self_modules_embeddings_modules_LayerNorm_parameters_bias_"), - ([768], "L_self_modules_embeddings_modules_LayerNorm_parameters_weight_"), - ( - [514, 768], - "L_self_modules_embeddings_modules_position_embeddings_parameters_weight_", - ), - ( - [1, 768], - "L_self_modules_embeddings_modules_token_type_embeddings_parameters_weight_", - ), ( [50265, 768], "L_self_modules_embeddings_modules_word_embeddings_parameters_weight_", ), ( - [768], - "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", - ), - ( - [768], - "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + [1, 768], + "L_self_modules_embeddings_modules_token_type_embeddings_parameters_weight_", ), ( - [768], - "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_dense_parameters_bias_", + [514, 768], + "L_self_modules_embeddings_modules_position_embeddings_parameters_weight_", ), + ([768], "L_self_modules_embeddings_modules_LayerNorm_parameters_weight_"), + ([768], "L_self_modules_embeddings_modules_LayerNorm_parameters_bias_"), + ([S0, S1], "L_attention_mask_"), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [768, 768], @@ -52,11 +41,11 @@ ), ( [768], - "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [768], @@ -64,27 +53,27 @@ ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [3072], - "L_self_modules_encoder_modules_layer_modules_0_modules_intermediate_modules_dense_parameters_bias_", + [768], + "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [3072, 768], - "L_self_modules_encoder_modules_layer_modules_0_modules_intermediate_modules_dense_parameters_weight_", + [768], + "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_0_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_0_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [768], - "L_self_modules_encoder_modules_layer_modules_0_modules_output_modules_LayerNorm_parameters_weight_", + [3072, 768], + "L_self_modules_encoder_modules_layer_modules_0_modules_intermediate_modules_dense_parameters_weight_", ), ( - [768], - "L_self_modules_encoder_modules_layer_modules_0_modules_output_modules_dense_parameters_bias_", + [3072], + "L_self_modules_encoder_modules_layer_modules_0_modules_intermediate_modules_dense_parameters_bias_", ), ( [768, 3072], @@ -92,23 +81,23 @@ ), ( [768], - "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_0_modules_output_modules_dense_parameters_bias_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_0_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_0_modules_output_modules_LayerNorm_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [768, 768], @@ -116,11 +105,11 @@ ), ( [768], - "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [768], @@ -128,27 +117,27 @@ ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [3072], - "L_self_modules_encoder_modules_layer_modules_1_modules_intermediate_modules_dense_parameters_bias_", + [768], + "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [3072, 768], - "L_self_modules_encoder_modules_layer_modules_1_modules_intermediate_modules_dense_parameters_weight_", + [768], + "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_1_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_1_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [768], - "L_self_modules_encoder_modules_layer_modules_1_modules_output_modules_LayerNorm_parameters_weight_", + [3072, 768], + "L_self_modules_encoder_modules_layer_modules_1_modules_intermediate_modules_dense_parameters_weight_", ), ( - [768], - "L_self_modules_encoder_modules_layer_modules_1_modules_output_modules_dense_parameters_bias_", + [3072], + "L_self_modules_encoder_modules_layer_modules_1_modules_intermediate_modules_dense_parameters_bias_", ), ( [768, 3072], @@ -156,23 +145,23 @@ ), ( [768], - "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_1_modules_output_modules_dense_parameters_bias_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_1_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_1_modules_output_modules_LayerNorm_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [768, 768], @@ -180,11 +169,11 @@ ), ( [768], - "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [768], @@ -192,27 +181,27 @@ ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [3072], - "L_self_modules_encoder_modules_layer_modules_2_modules_intermediate_modules_dense_parameters_bias_", + [768], + "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [3072, 768], - "L_self_modules_encoder_modules_layer_modules_2_modules_intermediate_modules_dense_parameters_weight_", + [768], + "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_2_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_2_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [768], - "L_self_modules_encoder_modules_layer_modules_2_modules_output_modules_LayerNorm_parameters_weight_", + [3072, 768], + "L_self_modules_encoder_modules_layer_modules_2_modules_intermediate_modules_dense_parameters_weight_", ), ( - [768], - "L_self_modules_encoder_modules_layer_modules_2_modules_output_modules_dense_parameters_bias_", + [3072], + "L_self_modules_encoder_modules_layer_modules_2_modules_intermediate_modules_dense_parameters_bias_", ), ( [768, 3072], @@ -220,23 +209,23 @@ ), ( [768], - "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_2_modules_output_modules_dense_parameters_bias_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_2_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_2_modules_output_modules_LayerNorm_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [768, 768], @@ -244,11 +233,11 @@ ), ( [768], - "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [768], @@ -256,27 +245,27 @@ ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [3072], - "L_self_modules_encoder_modules_layer_modules_3_modules_intermediate_modules_dense_parameters_bias_", + [768], + "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [3072, 768], - "L_self_modules_encoder_modules_layer_modules_3_modules_intermediate_modules_dense_parameters_weight_", + [768], + "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_3_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_3_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [768], - "L_self_modules_encoder_modules_layer_modules_3_modules_output_modules_LayerNorm_parameters_weight_", + [3072, 768], + "L_self_modules_encoder_modules_layer_modules_3_modules_intermediate_modules_dense_parameters_weight_", ), ( - [768], - "L_self_modules_encoder_modules_layer_modules_3_modules_output_modules_dense_parameters_bias_", + [3072], + "L_self_modules_encoder_modules_layer_modules_3_modules_intermediate_modules_dense_parameters_bias_", ), ( [768, 3072], @@ -284,23 +273,23 @@ ), ( [768], - "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_3_modules_output_modules_dense_parameters_bias_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_3_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_3_modules_output_modules_LayerNorm_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [768, 768], @@ -308,11 +297,11 @@ ), ( [768], - "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [768], @@ -320,27 +309,27 @@ ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [3072], - "L_self_modules_encoder_modules_layer_modules_4_modules_intermediate_modules_dense_parameters_bias_", + [768], + "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_dense_parameters_bias_", ), ( - [3072, 768], - "L_self_modules_encoder_modules_layer_modules_4_modules_intermediate_modules_dense_parameters_weight_", + [768], + "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_4_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_4_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( - [768], - "L_self_modules_encoder_modules_layer_modules_4_modules_output_modules_LayerNorm_parameters_weight_", + [3072, 768], + "L_self_modules_encoder_modules_layer_modules_4_modules_intermediate_modules_dense_parameters_weight_", ), ( - [768], - "L_self_modules_encoder_modules_layer_modules_4_modules_output_modules_dense_parameters_bias_", + [3072], + "L_self_modules_encoder_modules_layer_modules_4_modules_intermediate_modules_dense_parameters_bias_", ), ( [768, 3072], @@ -348,23 +337,23 @@ ), ( [768], - "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_4_modules_output_modules_dense_parameters_bias_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_4_modules_output_modules_LayerNorm_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_dense_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_4_modules_output_modules_LayerNorm_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_dense_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_query_parameters_weight_", ), ( [768], - "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_key_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_query_parameters_bias_", ), ( [768, 768], @@ -372,11 +361,11 @@ ), ( [768], - "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_query_parameters_bias_", + "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_key_parameters_bias_", ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_query_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_value_parameters_weight_", ), ( [768], @@ -384,32 +373,44 @@ ), ( [768, 768], - "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_self_modules_value_parameters_weight_", + "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_dense_parameters_weight_", ), ( - [3072], - "L_self_modules_encoder_modules_layer_modules_5_modules_intermediate_modules_dense_parameters_bias_", + [768], + "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_dense_parameters_bias_", + ), + ( + [768], + "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_LayerNorm_parameters_weight_", + ), + ( + [768], + "L_self_modules_encoder_modules_layer_modules_5_modules_attention_modules_output_modules_LayerNorm_parameters_bias_", ), ( [3072, 768], "L_self_modules_encoder_modules_layer_modules_5_modules_intermediate_modules_dense_parameters_weight_", ), ( - [768], - "L_self_modules_encoder_modules_layer_modules_5_modules_output_modules_LayerNorm_parameters_bias_", + [3072], + "L_self_modules_encoder_modules_layer_modules_5_modules_intermediate_modules_dense_parameters_bias_", ), ( - [768], - "L_self_modules_encoder_modules_layer_modules_5_modules_output_modules_LayerNorm_parameters_weight_", + [768, 3072], + "L_self_modules_encoder_modules_layer_modules_5_modules_output_modules_dense_parameters_weight_", ), ( [768], "L_self_modules_encoder_modules_layer_modules_5_modules_output_modules_dense_parameters_bias_", ), ( - [768, 3072], - "L_self_modules_encoder_modules_layer_modules_5_modules_output_modules_dense_parameters_weight_", + [768], + "L_self_modules_encoder_modules_layer_modules_5_modules_output_modules_LayerNorm_parameters_weight_", + ), + ( + [768], + "L_self_modules_encoder_modules_layer_modules_5_modules_output_modules_LayerNorm_parameters_bias_", ), - ([768], "L_self_modules_pooler_modules_dense_parameters_bias_"), ([768, 768], "L_self_modules_pooler_modules_dense_parameters_weight_"), + ([768], "L_self_modules_pooler_modules_dense_parameters_bias_"), ]