Skip to content

Commit

Permalink
clean
Browse files Browse the repository at this point in the history
  • Loading branch information
RoberLopez committed Dec 26, 2024
1 parent 704860a commit 1c16cfe
Show file tree
Hide file tree
Showing 6 changed files with 121 additions and 19 deletions.
18 changes: 7 additions & 11 deletions opennn/multihead_attention_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,12 @@ MultiheadAttentionLayer::MultiheadAttentionLayer(const Index& new_input_size,
const bool& new_use_causal_mask,
const string& new_name) : Layer()
{
if (new_input_size == 0 || new_context_size == 0 || new_depth == 0 || new_heads_number == 0)
return;

set(new_input_size, new_context_size, new_depth, new_heads_number, new_name);

set_causal_mask(use_causal_mask);
set_causal_mask(new_use_causal_mask);

layer_type = Type::MultiheadAttention;

Expand Down Expand Up @@ -130,6 +133,9 @@ void MultiheadAttentionLayer::set(const Index& new_input_size,
const Index& new_heads_number,
const string& new_name)
{
if (new_input_size == 0 || new_context_size == 0 || new_depth == 0 || new_heads_number == 0)
return;

input_size = new_input_size;

context_size = new_context_size;
Expand Down Expand Up @@ -224,19 +230,12 @@ void MultiheadAttentionLayer::set_parameters_random()
const type maximum = type(0.2);

set_random(query_weights, minimum, maximum);

set_random(query_biases, minimum, maximum);

set_random(key_weights, minimum, maximum);

set_random(key_biases, minimum, maximum);

set_random(value_weights, minimum, maximum);

set_random(value_biases, minimum, maximum);

set_random(projection_weights, minimum, maximum);

set_random(projection_biases, minimum, maximum);
}

Expand All @@ -254,11 +253,8 @@ void MultiheadAttentionLayer::set_parameters_glorot()
const type maximum = limit;

set_random(query_weights, minimum, maximum);

set_random(key_weights, minimum, maximum);

set_random(value_weights, minimum, maximum);

set_random(projection_weights, minimum, maximum);
}

Expand Down
14 changes: 7 additions & 7 deletions opennn/transformer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ void Transformer::set(const Index& new_input_length,
const Index& new_embedding_dimension,
const Index& new_perceptron_depth,
const Index& new_heads_number,
const Index& new_layers_number)
const Index& new_blocks_number)
{
name = "transformer";

Expand Down Expand Up @@ -79,7 +79,7 @@ void Transformer::set(const Index& new_input_length,

// Encoder

for(Index i = 0; i < new_layers_number; i++)
for(Index i = 0; i < new_blocks_number; i++)
{
add_layer(make_unique<MultiheadAttentionLayer>(new_context_length,
new_context_length,
Expand All @@ -92,8 +92,8 @@ void Transformer::set(const Index& new_input_length,
set_layer_inputs_indices("context_self_attention_1",
{"context_embedding", "context_embedding"});
else

set_layer_inputs_indices("context_self_attention_" + to_string(i+1), { "encoder_perceptron_normalization_" + to_string(i), "encoder_perceptron_normalization_" + to_string(i) });
set_layer_inputs_indices("context_self_attention_" + to_string(i+1),
{ "encoder_perceptron_normalization_" + to_string(i), "encoder_perceptron_normalization_" + to_string(i) });

//context_self_attention_layer->set_dropout_rate(dropout_rate);

Expand Down Expand Up @@ -153,7 +153,7 @@ void Transformer::set(const Index& new_input_length,

// Decoder

for(Index i = 0; i < new_layers_number; i++)
for(Index i = 0; i < new_blocks_number; i++)
{
add_layer(make_unique<MultiheadAttentionLayer>(new_input_length,
new_input_length,
Expand Down Expand Up @@ -190,7 +190,7 @@ void Transformer::set(const Index& new_input_length,
false,
"cross_attention_" + to_string(i+1)));

set_layer_inputs_indices("cross_attention_" + to_string(i+1), {"input_self_attention_normalization_" + to_string(i+1), "encoder_perceptron_normalization_" + to_string(new_layers_number)});
set_layer_inputs_indices("cross_attention_" + to_string(i+1), {"input_self_attention_normalization_" + to_string(i+1), "encoder_perceptron_normalization_" + to_string(new_blocks_number)});

//cross_attention_layer->set_dropout_rate(dropout_rate);

Expand Down Expand Up @@ -240,7 +240,7 @@ void Transformer::set(const Index& new_input_length,
new_input_dimensions,
"probabilistic"));

set_layer_inputs_indices("probabilistic", "decoder_perceptron_normalization_" + to_string(new_layers_number));
set_layer_inputs_indices("probabilistic", "decoder_perceptron_normalization_" + to_string(new_blocks_number));
}


Expand Down
52 changes: 52 additions & 0 deletions tests/embedding_layer_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#include "pch.h"

#include "../opennn/embedding_layer.h"


TEST(EmbeddingLayer, DefaultConstructor)
{
EmbeddingLayer embedding_layer;

EXPECT_EQ(embedding_layer.get_vocabulary_size(), 0);
EXPECT_EQ(embedding_layer.get_sequence_length(), 0);
EXPECT_EQ(embedding_layer.get_embedding_dimension(), 0);
}


TEST(EmbeddingLayer, GeneralConstructor)
{
EmbeddingLayer embedding_layer(1,2,3);

EXPECT_EQ(embedding_layer.get_vocabulary_size(), 1);
EXPECT_EQ(embedding_layer.get_sequence_length(), 2);
EXPECT_EQ(embedding_layer.get_embedding_dimension(), 3);
}


TEST(EmbeddingLayer, ForwardPropagate)
{
const Index samples_number = get_random_index(1, 10);
const Index vocabulary_size = get_random_index(1, 10);
const Index sequence_length = get_random_index(1, 10);
const Index embedding_dimension = get_random_index(1, 10);

EmbeddingLayer embedding_layer(vocabulary_size, sequence_length, embedding_dimension);
embedding_layer.set_parameters_constant(type(0));

unique_ptr<LayerForwardPropagation> embedding_layer_forward_propagation
= make_unique<EmbeddingLayerForwardPropagation>(samples_number, &embedding_layer);

Tensor<type, 2> inputs(samples_number, sequence_length);
inputs.setConstant(type(0));

embedding_layer.forward_propagate({ make_pair(inputs.data(), dimensions{samples_number, sequence_length}) },
embedding_layer_forward_propagation,
true);

EXPECT_EQ(embedding_layer_forward_propagation->batch_samples_number, samples_number);
EXPECT_EQ(embedding_layer_forward_propagation->get_outputs_pair().second[0], samples_number);
EXPECT_EQ(embedding_layer_forward_propagation->get_outputs_pair().second[1], sequence_length);
EXPECT_EQ(embedding_layer_forward_propagation->get_outputs_pair().second[2], embedding_dimension);


}
52 changes: 52 additions & 0 deletions tests/multihead_attention_layer_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#include "pch.h"

#include "../opennn/multihead_attention_layer.h"


TEST(MultiheadAttentionLayer, DefaultConstructor)
{
MultiheadAttentionLayer multihead_attention_layer;

// EXPECT_EQ(multihead_attention_layer.get_vocabulary_size(), 0);
// EXPECT_EQ(multihead_attention_layer.get_sequence_length(), 0);
// EXPECT_EQ(multihead_attention_layer.get_embedding_dimension(), 0);
}


TEST(MultiheadAttentionLayer, GeneralConstructor)
{
// MultiheadAttentionLayer multihead_attention_layer;

// EXPECT_EQ(embedding_layer.get_vocabulary_size(), 1);
// EXPECT_EQ(embedding_layer.get_sequence_length(), 2);
// EXPECT_EQ(embedding_layer.get_embedding_dimension(), 3);
}


TEST(MultiheadAttentionLayer, ForwardPropagate)
{
/*
const Index samples_number = get_random_index(1, 10);
const Index vocabulary_size = get_random_index(1, 10);
const Index sequence_length = get_random_index(1, 10);
const Index embedding_dimension = get_random_index(1, 10);
EmbeddingLayer embedding_layer(vocabulary_size, sequence_length, embedding_dimension);
embedding_layer.set_parameters_constant(type(0));
unique_ptr<LayerForwardPropagation> embedding_layer_forward_propagation
= make_unique<EmbeddingLayerForwardPropagation>(samples_number, &embedding_layer);
Tensor<type, 2> inputs(samples_number, sequence_length);
inputs.setConstant(type(0));
embedding_layer.forward_propagate({ make_pair(inputs.data(), dimensions{samples_number, sequence_length}) },
embedding_layer_forward_propagation,
true);
EXPECT_EQ(embedding_layer_forward_propagation->batch_samples_number, samples_number);
EXPECT_EQ(embedding_layer_forward_propagation->get_outputs_pair().second[0], samples_number);
EXPECT_EQ(embedding_layer_forward_propagation->get_outputs_pair().second[1], sequence_length);
EXPECT_EQ(embedding_layer_forward_propagation->get_outputs_pair().second[2], embedding_dimension);
*/
}
2 changes: 2 additions & 0 deletions tests/tests.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@
<ClCompile Include="cross_entropy_error_3d_test.cpp" />
<ClCompile Include="cross_entropy_error_test.cpp" />
<ClCompile Include="data_set_test.cpp" />
<ClCompile Include="embedding_layer_test.cpp" />
<ClCompile Include="flatten_layer_test.cpp" />
<ClCompile Include="genetic_algorithm_test.cpp" />
<ClCompile Include="growing_inputs_test.cpp" />
Expand All @@ -124,6 +125,7 @@
<ClCompile Include="mean_squared_error_test.cpp" />
<ClCompile Include="minkowski_error_test.cpp" />
<ClCompile Include="model_selection_test.cpp" />
<ClCompile Include="multihead_attention_layer_test.cpp" />
<ClCompile Include="neural_network_test.cpp" />
<ClCompile Include="normalized_squared_error_test.cpp" />
<ClCompile Include="perceptron_layer_test.cpp" />
Expand Down
2 changes: 1 addition & 1 deletion tests/transformer_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ TEST(TransformerTest, GeneralConstructor)
const Index heads_number = 1;
const Index layers_number = 1;

// Transformer transformer(architecture);
Transformer transformer(1,1,1,1,1,1,1,1);

// EXPECT_EQ(transformer_1.get_layers_number() == 2 + 7 * layers_number + 10 * layers_number + 1);
}
Expand Down

0 comments on commit 1c16cfe

Please sign in to comment.